Rename find_next_newline to find_newline_no_quit.
[bpt/emacs.git] / src / search.c
CommitLineData
ca1d1d23 1/* String search routines for GNU Emacs.
4fb9a543 2
ab422c4d
PE
3Copyright (C) 1985-1987, 1993-1994, 1997-1999, 2001-2013 Free Software
4Foundation, Inc.
ca1d1d23
JB
5
6This file is part of GNU Emacs.
7
9ec0b715 8GNU Emacs is free software: you can redistribute it and/or modify
ca1d1d23 9it under the terms of the GNU General Public License as published by
9ec0b715
GM
10the Free Software Foundation, either version 3 of the License, or
11(at your option) any later version.
ca1d1d23
JB
12
13GNU Emacs is distributed in the hope that it will be useful,
14but WITHOUT ANY WARRANTY; without even the implied warranty of
15MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16GNU General Public License for more details.
17
18You should have received a copy of the GNU General Public License
9ec0b715 19along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>. */
ca1d1d23
JB
20
21
18160b98 22#include <config.h>
0328b6de 23
ca1d1d23
JB
24#include "lisp.h"
25#include "syntax.h"
5679531d 26#include "category.h"
76eb0881 27#include "character.h"
e5560ff7 28#include "buffer.h"
89ad6492 29#include "charset.h"
9169c321 30#include "region-cache.h"
ca1d1d23 31#include "commands.h"
9ac0d9e0 32#include "blockinput.h"
bf1760bb 33#include "intervals.h"
4746118a 34
ca1d1d23
JB
35#include <sys/types.h>
36#include "regex.h"
37
1d288aef 38#define REGEXP_CACHE_SIZE 20
ca1d1d23 39
487282dc
KH
40/* If the regexp is non-nil, then the buffer contains the compiled form
41 of that regexp, suitable for searching. */
1d288aef
RS
42struct regexp_cache
43{
487282dc 44 struct regexp_cache *next;
ecdb561e 45 Lisp_Object regexp, whitespace_regexp;
b69e3c18 46 /* Syntax table for which the regexp applies. We need this because
58e95211
SM
47 of character classes. If this is t, then the compiled pattern is valid
48 for any syntax-table. */
b69e3c18 49 Lisp_Object syntax_table;
487282dc
KH
50 struct re_pattern_buffer buf;
51 char fastmap[0400];
b819a390
RS
52 /* Nonzero means regexp was compiled to do full POSIX backtracking. */
53 char posix;
487282dc 54};
ca1d1d23 55
487282dc 56/* The instances of that struct. */
989b29ad 57static struct regexp_cache searchbufs[REGEXP_CACHE_SIZE];
ca1d1d23 58
487282dc 59/* The head of the linked list; points to the most recently used buffer. */
989b29ad 60static struct regexp_cache *searchbuf_head;
ca1d1d23 61
ca1d1d23 62
4746118a
JB
63/* Every call to re_match, etc., must pass &search_regs as the regs
64 argument unless you can show it is unnecessary (i.e., if re_match
65 is certainly going to be called again before region-around-match
66 can be called).
67
68 Since the registers are now dynamically allocated, we need to make
69 sure not to refer to the Nth register before checking that it has
1113d9db
JB
70 been allocated by checking search_regs.num_regs.
71
72 The regex code keeps track of whether it has allocated the search
487282dc
KH
73 buffer using bits in the re_pattern_buffer. This means that whenever
74 you compile a new pattern, it completely forgets whether it has
1113d9db
JB
75 allocated any registers, and will allocate new registers the next
76 time you call a searching or matching function. Therefore, we need
77 to call re_set_registers after compiling a new pattern or after
78 setting the match registers, so that the regex functions will be
79 able to free or re-allocate it properly. */
ca1d1d23
JB
80static struct re_registers search_regs;
81
daa37602
JB
82/* The buffer in which the last search was performed, or
83 Qt if the last search was done in a string;
84 Qnil if no searching has been done yet. */
85static Lisp_Object last_thing_searched;
ca1d1d23 86
02b16839 87/* Error condition signaled when regexp compile_pattern fails. */
955cbe7b 88static Lisp_Object Qinvalid_regexp;
ca1d1d23 89
02b16839 90/* Error condition used for failing searches. */
955cbe7b 91static Lisp_Object Qsearch_failed;
06f77a8a 92
d311d28c 93static void set_search_regs (ptrdiff_t, ptrdiff_t);
f57e2426 94static void save_search_regs (void);
d311d28c
PE
95static EMACS_INT simple_search (EMACS_INT, unsigned char *, ptrdiff_t,
96 ptrdiff_t, Lisp_Object, ptrdiff_t, ptrdiff_t,
97 ptrdiff_t, ptrdiff_t);
98static EMACS_INT boyer_moore (EMACS_INT, unsigned char *, ptrdiff_t,
99 Lisp_Object, Lisp_Object, ptrdiff_t,
100 ptrdiff_t, int);
101static EMACS_INT search_buffer (Lisp_Object, ptrdiff_t, ptrdiff_t,
102 ptrdiff_t, ptrdiff_t, EMACS_INT, int,
f57e2426 103 Lisp_Object, Lisp_Object, int);
b819a390 104
845ca893 105static _Noreturn void
971de7fb 106matcher_overflow (void)
ca1d1d23
JB
107{
108 error ("Stack overflow in regexp matcher");
109}
110
b819a390
RS
111/* Compile a regexp and signal a Lisp error if anything goes wrong.
112 PATTERN is the pattern to compile.
113 CP is the place to put the result.
facdc750 114 TRANSLATE is a translation table for ignoring case, or nil for none.
b819a390 115 POSIX is nonzero if we want full backtracking (POSIX style)
5679531d 116 for this pattern. 0 means backtrack only enough to get a valid match.
bbc73b48
KH
117
118 The behavior also depends on Vsearch_spaces_regexp. */
ca1d1d23 119
487282dc 120static void
cfe0661d 121compile_pattern_1 (struct regexp_cache *cp, Lisp_Object pattern, Lisp_Object translate, int posix)
ca1d1d23 122{
d451e4db 123 char *val;
b819a390 124 reg_syntax_t old;
ca1d1d23 125
487282dc 126 cp->regexp = Qnil;
59fab369 127 cp->buf.translate = (! NILP (translate) ? translate : make_number (0));
b819a390 128 cp->posix = posix;
93daa011 129 cp->buf.multibyte = STRING_MULTIBYTE (pattern);
89ad6492 130 cp->buf.charset_unibyte = charset_unibyte;
d8c85250
CY
131 if (STRINGP (Vsearch_spaces_regexp))
132 cp->whitespace_regexp = Vsearch_spaces_regexp;
133 else
134 cp->whitespace_regexp = Qnil;
135
e7b2dd2e
RS
136 /* rms: I think BLOCK_INPUT is not needed here any more,
137 because regex.c defines malloc to call xmalloc.
138 Using BLOCK_INPUT here means the debugger won't run if an error occurs.
139 So let's turn it off. */
140 /* BLOCK_INPUT; */
fb4a568d 141 old = re_set_syntax (RE_SYNTAX_EMACS
b819a390 142 | (posix ? 0 : RE_NO_POSIX_BACKTRACKING));
d8c85250
CY
143
144 if (STRINGP (Vsearch_spaces_regexp))
42a5b22f 145 re_set_whitespace_regexp (SSDATA (Vsearch_spaces_regexp));
d8c85250
CY
146 else
147 re_set_whitespace_regexp (NULL);
bbc73b48 148
51b59d79 149 val = (char *) re_compile_pattern (SSDATA (pattern),
8f924df7 150 SBYTES (pattern), &cp->buf);
bbc73b48 151
58e95211
SM
152 /* If the compiled pattern hard codes some of the contents of the
153 syntax-table, it can only be reused with *this* syntax table. */
4b4deea2 154 cp->syntax_table = cp->buf.used_syntax ? BVAR (current_buffer, syntax_table) : Qt;
58e95211 155
bbc73b48
KH
156 re_set_whitespace_regexp (NULL);
157
b819a390 158 re_set_syntax (old);
4d7e6e51 159 /* unblock_input (); */
ca1d1d23 160 if (val)
06f77a8a 161 xsignal1 (Qinvalid_regexp, build_string (val));
1113d9db 162
487282dc 163 cp->regexp = Fcopy_sequence (pattern);
487282dc
KH
164}
165
6efc7887
RS
166/* Shrink each compiled regexp buffer in the cache
167 to the size actually used right now.
168 This is called from garbage collection. */
169
170void
971de7fb 171shrink_regexp_cache (void)
6efc7887 172{
a968f437 173 struct regexp_cache *cp;
6efc7887
RS
174
175 for (cp = searchbuf_head; cp != 0; cp = cp->next)
176 {
177 cp->buf.allocated = cp->buf.used;
38182d90 178 cp->buf.buffer = xrealloc (cp->buf.buffer, cp->buf.used);
6efc7887
RS
179 }
180}
181
54dd3310
SM
182/* Clear the regexp cache w.r.t. a particular syntax table,
183 because it was changed.
e5b94d44
CY
184 There is no danger of memory leak here because re_compile_pattern
185 automagically manages the memory in each re_pattern_buffer struct,
186 based on its `allocated' and `buffer' values. */
187void
971de7fb 188clear_regexp_cache (void)
e5b94d44
CY
189{
190 int i;
191
192 for (i = 0; i < REGEXP_CACHE_SIZE; ++i)
920fd1fc
EZ
193 /* It's tempting to compare with the syntax-table we've actually changed,
194 but it's not sufficient because char-table inheritance means that
54dd3310
SM
195 modifying one syntax-table can change others at the same time. */
196 if (!EQ (searchbufs[i].syntax_table, Qt))
197 searchbufs[i].regexp = Qnil;
e5b94d44
CY
198}
199
487282dc 200/* Compile a regexp if necessary, but first check to see if there's one in
b819a390
RS
201 the cache.
202 PATTERN is the pattern to compile.
facdc750 203 TRANSLATE is a translation table for ignoring case, or nil for none.
b819a390
RS
204 REGP is the structure that says where to store the "register"
205 values that will result from matching this pattern.
206 If it is 0, we should compile the pattern not to record any
207 subexpression bounds.
208 POSIX is nonzero if we want full backtracking (POSIX style)
209 for this pattern. 0 means backtrack only enough to get a valid match. */
487282dc
KH
210
211struct re_pattern_buffer *
0063fdb1
DA
212compile_pattern (Lisp_Object pattern, struct re_registers *regp,
213 Lisp_Object translate, int posix, bool multibyte)
487282dc
KH
214{
215 struct regexp_cache *cp, **cpp;
216
217 for (cpp = &searchbuf_head; ; cpp = &cp->next)
218 {
219 cp = *cpp;
f1b9c7c1
KR
220 /* Entries are initialized to nil, and may be set to nil by
221 compile_pattern_1 if the pattern isn't valid. Don't apply
49a5f770
KR
222 string accessors in those cases. However, compile_pattern_1
223 is only applied to the cache entry we pick here to reuse. So
224 nil should never appear before a non-nil entry. */
7c752c80 225 if (NILP (cp->regexp))
f1b9c7c1 226 goto compile_it;
d5db4077 227 if (SCHARS (cp->regexp) == SCHARS (pattern)
cf69b13e 228 && STRING_MULTIBYTE (cp->regexp) == STRING_MULTIBYTE (pattern)
1d288aef 229 && !NILP (Fstring_equal (cp->regexp, pattern))
59fab369 230 && EQ (cp->buf.translate, (! NILP (translate) ? translate : make_number (0)))
5679531d 231 && cp->posix == posix
58e95211 232 && (EQ (cp->syntax_table, Qt)
4b4deea2 233 || EQ (cp->syntax_table, BVAR (current_buffer, syntax_table)))
89ad6492
KH
234 && !NILP (Fequal (cp->whitespace_regexp, Vsearch_spaces_regexp))
235 && cp->buf.charset_unibyte == charset_unibyte)
487282dc
KH
236 break;
237
f1b9c7c1
KR
238 /* If we're at the end of the cache, compile into the nil cell
239 we found, or the last (least recently used) cell with a
240 string value. */
487282dc
KH
241 if (cp->next == 0)
242 {
f1b9c7c1 243 compile_it:
cfe0661d 244 compile_pattern_1 (cp, pattern, translate, posix);
487282dc
KH
245 break;
246 }
247 }
248
249 /* When we get here, cp (aka *cpp) contains the compiled pattern,
250 either because we found it in the cache or because we just compiled it.
251 Move it to the front of the queue to mark it as most recently used. */
252 *cpp = cp->next;
253 cp->next = searchbuf_head;
254 searchbuf_head = cp;
1113d9db 255
6639708c
RS
256 /* Advise the searching functions about the space we have allocated
257 for register data. */
258 if (regp)
259 re_set_registers (&cp->buf, regp, regp->num_regs, regp->start, regp->end);
260
78edd3b7 261 /* The compiled pattern can be used both for multibyte and unibyte
89ad6492
KH
262 target. But, we have to tell which the pattern is used for. */
263 cp->buf.target_multibyte = multibyte;
264
487282dc 265 return &cp->buf;
ca1d1d23
JB
266}
267
ca1d1d23 268\f
b819a390 269static Lisp_Object
971de7fb 270looking_at_1 (Lisp_Object string, int posix)
ca1d1d23
JB
271{
272 Lisp_Object val;
273 unsigned char *p1, *p2;
d311d28c
PE
274 ptrdiff_t s1, s2;
275 register ptrdiff_t i;
487282dc 276 struct re_pattern_buffer *bufp;
ca1d1d23 277
7074fde6
FP
278 if (running_asynch_code)
279 save_search_regs ();
280
910c747a 281 /* This is so set_image_of_range_1 in regex.c can find the EQV table. */
34dabdb7 282 set_char_table_extras (BVAR (current_buffer, case_canon_table), 2,
a098c930 283 BVAR (current_buffer, case_eqv_table));
910c747a 284
b7826503 285 CHECK_STRING (string);
ef887810
RS
286 bufp = compile_pattern (string,
287 (NILP (Vinhibit_changing_match_data)
288 ? &search_regs : NULL),
4b4deea2
TT
289 (!NILP (BVAR (current_buffer, case_fold_search))
290 ? BVAR (current_buffer, case_canon_table) : Qnil),
0c8533c6 291 posix,
4b4deea2 292 !NILP (BVAR (current_buffer, enable_multibyte_characters)));
ca1d1d23
JB
293
294 immediate_quit = 1;
295 QUIT; /* Do a pending quit right away, to avoid paradoxical behavior */
296
297 /* Get pointers and sizes of the two strings
298 that make up the visible portion of the buffer. */
299
300 p1 = BEGV_ADDR;
fa8ed3e0 301 s1 = GPT_BYTE - BEGV_BYTE;
ca1d1d23 302 p2 = GAP_END_ADDR;
fa8ed3e0 303 s2 = ZV_BYTE - GPT_BYTE;
ca1d1d23
JB
304 if (s1 < 0)
305 {
306 p2 = p1;
fa8ed3e0 307 s2 = ZV_BYTE - BEGV_BYTE;
ca1d1d23
JB
308 s1 = 0;
309 }
310 if (s2 < 0)
311 {
fa8ed3e0 312 s1 = ZV_BYTE - BEGV_BYTE;
ca1d1d23
JB
313 s2 = 0;
314 }
8bb43c28
RS
315
316 re_match_object = Qnil;
177c0ea7 317
487282dc 318 i = re_match_2 (bufp, (char *) p1, s1, (char *) p2, s2,
ef887810
RS
319 PT_BYTE - BEGV_BYTE,
320 (NILP (Vinhibit_changing_match_data)
321 ? &search_regs : NULL),
fa8ed3e0 322 ZV_BYTE - BEGV_BYTE);
de182d70 323 immediate_quit = 0;
177c0ea7 324
ca1d1d23
JB
325 if (i == -2)
326 matcher_overflow ();
327
328 val = (0 <= i ? Qt : Qnil);
ef887810 329 if (NILP (Vinhibit_changing_match_data) && i >= 0)
fa8ed3e0
RS
330 for (i = 0; i < search_regs.num_regs; i++)
331 if (search_regs.start[i] >= 0)
332 {
333 search_regs.start[i]
334 = BYTE_TO_CHAR (search_regs.start[i] + BEGV_BYTE);
335 search_regs.end[i]
336 = BYTE_TO_CHAR (search_regs.end[i] + BEGV_BYTE);
337 }
ef887810
RS
338
339 /* Set last_thing_searched only when match data is changed. */
340 if (NILP (Vinhibit_changing_match_data))
341 XSETBUFFER (last_thing_searched, current_buffer);
342
ca1d1d23
JB
343 return val;
344}
345
b819a390 346DEFUN ("looking-at", Flooking_at, Slooking_at, 1, 1, 0,
8c1a1077
PJ
347 doc: /* Return t if text after point matches regular expression REGEXP.
348This function modifies the match data that `match-beginning',
349`match-end' and `match-data' access; save and restore the match
350data if you want to preserve them. */)
5842a27b 351 (Lisp_Object regexp)
b819a390 352{
94f94972 353 return looking_at_1 (regexp, 0);
b819a390
RS
354}
355
356DEFUN ("posix-looking-at", Fposix_looking_at, Sposix_looking_at, 1, 1, 0,
8c1a1077
PJ
357 doc: /* Return t if text after point matches regular expression REGEXP.
358Find the longest match, in accord with Posix regular expression rules.
359This function modifies the match data that `match-beginning',
360`match-end' and `match-data' access; save and restore the match
361data if you want to preserve them. */)
5842a27b 362 (Lisp_Object regexp)
b819a390 363{
94f94972 364 return looking_at_1 (regexp, 1);
b819a390
RS
365}
366\f
367static Lisp_Object
971de7fb 368string_match_1 (Lisp_Object regexp, Lisp_Object string, Lisp_Object start, int posix)
ca1d1d23 369{
d311d28c 370 ptrdiff_t val;
487282dc 371 struct re_pattern_buffer *bufp;
d311d28c
PE
372 EMACS_INT pos;
373 ptrdiff_t pos_byte, i;
ca1d1d23 374
7074fde6
FP
375 if (running_asynch_code)
376 save_search_regs ();
377
b7826503
PJ
378 CHECK_STRING (regexp);
379 CHECK_STRING (string);
ca1d1d23
JB
380
381 if (NILP (start))
0c8533c6 382 pos = 0, pos_byte = 0;
ca1d1d23
JB
383 else
384 {
d311d28c 385 ptrdiff_t len = SCHARS (string);
ca1d1d23 386
b7826503 387 CHECK_NUMBER (start);
0c8533c6
RS
388 pos = XINT (start);
389 if (pos < 0 && -pos <= len)
390 pos = len + pos;
391 else if (0 > pos || pos > len)
ca1d1d23 392 args_out_of_range (string, start);
0c8533c6 393 pos_byte = string_char_to_byte (string, pos);
ca1d1d23
JB
394 }
395
910c747a 396 /* This is so set_image_of_range_1 in regex.c can find the EQV table. */
34dabdb7 397 set_char_table_extras (BVAR (current_buffer, case_canon_table), 2,
a098c930 398 BVAR (current_buffer, case_eqv_table));
910c747a 399
ef887810
RS
400 bufp = compile_pattern (regexp,
401 (NILP (Vinhibit_changing_match_data)
402 ? &search_regs : NULL),
4b4deea2
TT
403 (!NILP (BVAR (current_buffer, case_fold_search))
404 ? BVAR (current_buffer, case_canon_table) : Qnil),
0c8533c6
RS
405 posix,
406 STRING_MULTIBYTE (string));
ca1d1d23 407 immediate_quit = 1;
8bb43c28 408 re_match_object = string;
177c0ea7 409
51b59d79 410 val = re_search (bufp, SSDATA (string),
d5db4077
KR
411 SBYTES (string), pos_byte,
412 SBYTES (string) - pos_byte,
ef887810
RS
413 (NILP (Vinhibit_changing_match_data)
414 ? &search_regs : NULL));
ca1d1d23 415 immediate_quit = 0;
ef887810
RS
416
417 /* Set last_thing_searched only when match data is changed. */
418 if (NILP (Vinhibit_changing_match_data))
419 last_thing_searched = Qt;
420
ca1d1d23
JB
421 if (val == -2)
422 matcher_overflow ();
423 if (val < 0) return Qnil;
0c8533c6 424
ef887810
RS
425 if (NILP (Vinhibit_changing_match_data))
426 for (i = 0; i < search_regs.num_regs; i++)
427 if (search_regs.start[i] >= 0)
428 {
429 search_regs.start[i]
430 = string_byte_to_char (string, search_regs.start[i]);
431 search_regs.end[i]
432 = string_byte_to_char (string, search_regs.end[i]);
433 }
0c8533c6
RS
434
435 return make_number (string_byte_to_char (string, val));
ca1d1d23 436}
e59a8453 437
a7ca3326 438DEFUN ("string-match", Fstring_match, Sstring_match, 2, 3, 0,
8c1a1077 439 doc: /* Return index of start of first match for REGEXP in STRING, or nil.
b85acc4b 440Matching ignores case if `case-fold-search' is non-nil.
8c1a1077
PJ
441If third arg START is non-nil, start search at that index in STRING.
442For index of first char beyond the match, do (match-end 0).
443`match-end' and `match-beginning' also give indices of substrings
2bd2f32d
RS
444matched by parenthesis constructs in the pattern.
445
446You can use the function `match-string' to extract the substrings
447matched by the parenthesis constructions in REGEXP. */)
5842a27b 448 (Lisp_Object regexp, Lisp_Object string, Lisp_Object start)
b819a390
RS
449{
450 return string_match_1 (regexp, string, start, 0);
451}
452
453DEFUN ("posix-string-match", Fposix_string_match, Sposix_string_match, 2, 3, 0,
8c1a1077
PJ
454 doc: /* Return index of start of first match for REGEXP in STRING, or nil.
455Find the longest match, in accord with Posix regular expression rules.
456Case is ignored if `case-fold-search' is non-nil in the current buffer.
457If third arg START is non-nil, start search at that index in STRING.
458For index of first char beyond the match, do (match-end 0).
459`match-end' and `match-beginning' also give indices of substrings
460matched by parenthesis constructs in the pattern. */)
5842a27b 461 (Lisp_Object regexp, Lisp_Object string, Lisp_Object start)
b819a390
RS
462{
463 return string_match_1 (regexp, string, start, 1);
464}
465
e59a8453
RS
466/* Match REGEXP against STRING, searching all of STRING,
467 and return the index of the match, or negative on failure.
468 This does not clobber the match data. */
469
d311d28c 470ptrdiff_t
971de7fb 471fast_string_match (Lisp_Object regexp, Lisp_Object string)
e59a8453 472{
d311d28c 473 ptrdiff_t val;
487282dc 474 struct re_pattern_buffer *bufp;
e59a8453 475
facdc750
RS
476 bufp = compile_pattern (regexp, 0, Qnil,
477 0, STRING_MULTIBYTE (string));
e59a8453 478 immediate_quit = 1;
8bb43c28 479 re_match_object = string;
177c0ea7 480
51b59d79 481 val = re_search (bufp, SSDATA (string),
d5db4077
KR
482 SBYTES (string), 0,
483 SBYTES (string), 0);
e59a8453
RS
484 immediate_quit = 0;
485 return val;
486}
5679531d
KH
487
488/* Match REGEXP against STRING, searching all of STRING ignoring case,
489 and return the index of the match, or negative on failure.
0c8533c6
RS
490 This does not clobber the match data.
491 We assume that STRING contains single-byte characters. */
5679531d 492
d311d28c 493ptrdiff_t
d923b542
DA
494fast_c_string_match_ignore_case (Lisp_Object regexp,
495 const char *string, ptrdiff_t len)
5679531d 496{
d311d28c 497 ptrdiff_t val;
5679531d 498 struct re_pattern_buffer *bufp;
5679531d 499
0c8533c6 500 regexp = string_make_unibyte (regexp);
b4577c63 501 re_match_object = Qt;
5679531d 502 bufp = compile_pattern (regexp, 0,
0190922f 503 Vascii_canon_table, 0,
f8bd51c4 504 0);
5679531d
KH
505 immediate_quit = 1;
506 val = re_search (bufp, string, len, 0, len, 0);
507 immediate_quit = 0;
508 return val;
509}
be5f4dfb
KH
510
511/* Like fast_string_match but ignore case. */
512
d311d28c 513ptrdiff_t
971de7fb 514fast_string_match_ignore_case (Lisp_Object regexp, Lisp_Object string)
be5f4dfb 515{
d311d28c 516 ptrdiff_t val;
be5f4dfb
KH
517 struct re_pattern_buffer *bufp;
518
0190922f 519 bufp = compile_pattern (regexp, 0, Vascii_canon_table,
be5f4dfb
KH
520 0, STRING_MULTIBYTE (string));
521 immediate_quit = 1;
522 re_match_object = string;
523
51b59d79 524 val = re_search (bufp, SSDATA (string),
be5f4dfb
KH
525 SBYTES (string), 0,
526 SBYTES (string), 0);
527 immediate_quit = 0;
528 return val;
529}
ca1d1d23 530\f
a80ce213 531/* Match REGEXP against the characters after POS to LIMIT, and return
8b13507a
KH
532 the number of matched characters. If STRING is non-nil, match
533 against the characters in it. In that case, POS and LIMIT are
534 indices into the string. This function doesn't modify the match
535 data. */
536
d311d28c 537ptrdiff_t
0063fdb1
DA
538fast_looking_at (Lisp_Object regexp, ptrdiff_t pos, ptrdiff_t pos_byte,
539 ptrdiff_t limit, ptrdiff_t limit_byte, Lisp_Object string)
8b13507a 540{
0063fdb1 541 bool multibyte;
8b13507a
KH
542 struct re_pattern_buffer *buf;
543 unsigned char *p1, *p2;
d311d28c
PE
544 ptrdiff_t s1, s2;
545 ptrdiff_t len;
78edd3b7 546
8b13507a
KH
547 if (STRINGP (string))
548 {
549 if (pos_byte < 0)
550 pos_byte = string_char_to_byte (string, pos);
551 if (limit_byte < 0)
552 limit_byte = string_char_to_byte (string, limit);
553 p1 = NULL;
554 s1 = 0;
555 p2 = SDATA (string);
556 s2 = SBYTES (string);
557 re_match_object = string;
558 multibyte = STRING_MULTIBYTE (string);
559 }
560 else
561 {
562 if (pos_byte < 0)
563 pos_byte = CHAR_TO_BYTE (pos);
564 if (limit_byte < 0)
565 limit_byte = CHAR_TO_BYTE (limit);
566 pos_byte -= BEGV_BYTE;
567 limit_byte -= BEGV_BYTE;
568 p1 = BEGV_ADDR;
569 s1 = GPT_BYTE - BEGV_BYTE;
570 p2 = GAP_END_ADDR;
571 s2 = ZV_BYTE - GPT_BYTE;
572 if (s1 < 0)
573 {
574 p2 = p1;
575 s2 = ZV_BYTE - BEGV_BYTE;
576 s1 = 0;
577 }
578 if (s2 < 0)
579 {
580 s1 = ZV_BYTE - BEGV_BYTE;
581 s2 = 0;
582 }
583 re_match_object = Qnil;
4b4deea2 584 multibyte = ! NILP (BVAR (current_buffer, enable_multibyte_characters));
8b13507a
KH
585 }
586
587 buf = compile_pattern (regexp, 0, Qnil, 0, multibyte);
588 immediate_quit = 1;
589 len = re_match_2 (buf, (char *) p1, s1, (char *) p2, s2,
590 pos_byte, NULL, limit_byte);
591 immediate_quit = 0;
592
593 return len;
594}
595
596\f
9169c321
JB
597/* The newline cache: remembering which sections of text have no newlines. */
598
599/* If the user has requested newline caching, make sure it's on.
600 Otherwise, make sure it's off.
601 This is our cheezy way of associating an action with the change of
602 state of a buffer-local variable. */
603static void
971de7fb 604newline_cache_on_off (struct buffer *buf)
9169c321 605{
4b4deea2 606 if (NILP (BVAR (buf, cache_long_line_scans)))
9169c321
JB
607 {
608 /* It should be off. */
609 if (buf->newline_cache)
610 {
611 free_region_cache (buf->newline_cache);
612 buf->newline_cache = 0;
613 }
614 }
615 else
616 {
617 /* It should be on. */
618 if (buf->newline_cache == 0)
619 buf->newline_cache = new_region_cache ();
620 }
621}
622
623\f
a84b7c53 624/* Search for COUNT newlines between START and END.
9169c321
JB
625
626 If COUNT is positive, search forwards; END must be >= START.
627 If COUNT is negative, search backwards for the -COUNTth instance;
628 END must be <= START.
629 If COUNT is zero, do anything you please; run rogue, for all I care.
630
631 If END is zero, use BEGV or ZV instead, as appropriate for the
632 direction indicated by COUNT.
ffd56f97
JB
633
634 If we find COUNT instances, set *SHORTAGE to zero, and return the
a9f2a45f 635 position past the COUNTth match. Note that for reverse motion
5bfe95c9 636 this is not the same as the usual convention for Emacs motion commands.
ffd56f97 637
9169c321 638 If we don't find COUNT instances before reaching END, set *SHORTAGE
a84b7c53 639 to the number of newlines left unfound, and return END.
ffd56f97 640
2a14a4f1
DA
641 If BYTEPOS is not NULL, set *BYTEPOS to the byte position corresponding
642 to the returned character position.
643
9fa1de30 644 If ALLOW_QUIT, set immediate_quit. That's good to do
087a5f81
RS
645 except when inside redisplay. */
646
d311d28c 647ptrdiff_t
2a14a4f1
DA
648find_newline (ptrdiff_t start, ptrdiff_t end, ptrdiff_t count,
649 ptrdiff_t *shortage, ptrdiff_t *bytepos, bool allow_quit)
ca1d1d23 650{
9169c321 651 struct region_cache *newline_cache;
2a14a4f1 652 ptrdiff_t start_byte = -1, end_byte = -1;
177c0ea7 653 int direction;
ffd56f97 654
9169c321
JB
655 if (count > 0)
656 {
657 direction = 1;
8ca30920
DA
658 if (!end)
659 end = ZV, end_byte = ZV_BYTE;
9169c321
JB
660 }
661 else
662 {
663 direction = -1;
a84b7c53 664 if (!end)
8ca30920 665 end = BEGV, end_byte = BEGV_BYTE;
9169c321 666 }
8ca30920
DA
667 if (end_byte == -1)
668 end_byte = CHAR_TO_BYTE (end);
ffd56f97 669
9169c321
JB
670 newline_cache_on_off (current_buffer);
671 newline_cache = current_buffer->newline_cache;
ca1d1d23
JB
672
673 if (shortage != 0)
674 *shortage = 0;
675
087a5f81 676 immediate_quit = allow_quit;
ca1d1d23 677
ffd56f97 678 if (count > 0)
9169c321 679 while (start != end)
ca1d1d23 680 {
9169c321
JB
681 /* Our innermost scanning loop is very simple; it doesn't know
682 about gaps, buffer ends, or the newline cache. ceiling is
683 the position of the last character before the next such
684 obstacle --- the last character the dumb search loop should
685 examine. */
2a14a4f1 686 ptrdiff_t tem, ceiling_byte = end_byte - 1;
9169c321
JB
687
688 /* If we're looking for a newline, consult the newline cache
689 to see where we can avoid some scanning. */
a84b7c53 690 if (newline_cache)
9169c321 691 {
0065d054 692 ptrdiff_t next_change;
9169c321
JB
693 immediate_quit = 0;
694 while (region_cache_forward
c8b9f1bc
EZ
695 (current_buffer, newline_cache, start, &next_change))
696 start = next_change;
cbe0db0d 697 immediate_quit = allow_quit;
9169c321 698
c8b9f1bc
EZ
699 start_byte = CHAR_TO_BYTE (start);
700
fa8ed3e0
RS
701 /* START should never be after END. */
702 if (start_byte > ceiling_byte)
703 start_byte = ceiling_byte;
9169c321
JB
704
705 /* Now the text after start is an unknown region, and
706 next_change is the position of the next known region. */
c8b9f1bc 707 ceiling_byte = min (CHAR_TO_BYTE (next_change) - 1, ceiling_byte);
9169c321 708 }
c8b9f1bc
EZ
709 else
710 start_byte = CHAR_TO_BYTE (start);
9169c321
JB
711
712 /* The dumb loop can only scan text stored in contiguous
713 bytes. BUFFER_CEILING_OF returns the last character
714 position that is contiguous, so the ceiling is the
715 position after that. */
67ce527d
KH
716 tem = BUFFER_CEILING_OF (start_byte);
717 ceiling_byte = min (tem, ceiling_byte);
9169c321
JB
718
719 {
177c0ea7 720 /* The termination address of the dumb loop. */
fa8ed3e0
RS
721 register unsigned char *ceiling_addr
722 = BYTE_POS_ADDR (ceiling_byte) + 1;
723 register unsigned char *cursor
724 = BYTE_POS_ADDR (start_byte);
9169c321
JB
725 unsigned char *base = cursor;
726
727 while (cursor < ceiling_addr)
728 {
9169c321 729 /* The dumb loop. */
a84b7c53 730 unsigned char *nl = memchr (cursor, '\n', ceiling_addr - cursor);
9169c321
JB
731
732 /* If we're looking for newlines, cache the fact that
733 the region from start to cursor is free of them. */
a84b7c53
PE
734 if (newline_cache)
735 {
736 unsigned char *low = cursor;
737 unsigned char *lim = nl ? nl : ceiling_addr;
738 know_region_cache (current_buffer, newline_cache,
739 BYTE_TO_CHAR (low - base + start_byte),
740 BYTE_TO_CHAR (lim - base + start_byte));
741 }
742
743 if (! nl)
744 break;
745
746 if (--count == 0)
747 {
748 immediate_quit = 0;
2a14a4f1
DA
749 if (bytepos)
750 *bytepos = nl + 1 - base + start_byte;
a84b7c53
PE
751 return BYTE_TO_CHAR (nl + 1 - base + start_byte);
752 }
753 cursor = nl + 1;
9169c321
JB
754 }
755
2a14a4f1
DA
756 start_byte += ceiling_addr - base;
757 start = BYTE_TO_CHAR (start_byte);
9169c321 758 }
ca1d1d23
JB
759 }
760 else
9169c321
JB
761 while (start > end)
762 {
763 /* The last character to check before the next obstacle. */
2a14a4f1 764 ptrdiff_t tem, ceiling_byte = end_byte;
9169c321
JB
765
766 /* Consult the newline cache, if appropriate. */
a84b7c53 767 if (newline_cache)
9169c321 768 {
0065d054 769 ptrdiff_t next_change;
9169c321
JB
770 immediate_quit = 0;
771 while (region_cache_backward
c8b9f1bc
EZ
772 (current_buffer, newline_cache, start, &next_change))
773 start = next_change;
cbe0db0d 774 immediate_quit = allow_quit;
9169c321 775
c8b9f1bc
EZ
776 start_byte = CHAR_TO_BYTE (start);
777
9169c321 778 /* Start should never be at or before end. */
fa8ed3e0
RS
779 if (start_byte <= ceiling_byte)
780 start_byte = ceiling_byte + 1;
9169c321
JB
781
782 /* Now the text before start is an unknown region, and
783 next_change is the position of the next known region. */
c8b9f1bc 784 ceiling_byte = max (CHAR_TO_BYTE (next_change), ceiling_byte);
9169c321 785 }
c8b9f1bc
EZ
786 else
787 start_byte = CHAR_TO_BYTE (start);
9169c321
JB
788
789 /* Stop scanning before the gap. */
67ce527d
KH
790 tem = BUFFER_FLOOR_OF (start_byte - 1);
791 ceiling_byte = max (tem, ceiling_byte);
9169c321
JB
792
793 {
794 /* The termination address of the dumb loop. */
fa8ed3e0
RS
795 register unsigned char *ceiling_addr = BYTE_POS_ADDR (ceiling_byte);
796 register unsigned char *cursor = BYTE_POS_ADDR (start_byte - 1);
9169c321
JB
797 unsigned char *base = cursor;
798
799 while (cursor >= ceiling_addr)
800 {
a84b7c53
PE
801 unsigned char *nl = memrchr (ceiling_addr, '\n',
802 cursor + 1 - ceiling_addr);
9169c321
JB
803
804 /* If we're looking for newlines, cache the fact that
805 the region from after the cursor to start is free of them. */
a84b7c53
PE
806 if (newline_cache)
807 {
808 unsigned char *low = nl ? nl : ceiling_addr - 1;
809 unsigned char *lim = cursor;
810 know_region_cache (current_buffer, newline_cache,
811 BYTE_TO_CHAR (low - base + start_byte),
812 BYTE_TO_CHAR (lim - base + start_byte));
813 }
814
815 if (! nl)
816 break;
817
818 if (++count >= 0)
819 {
820 immediate_quit = 0;
2a14a4f1
DA
821 if (bytepos)
822 *bytepos = nl - base + start_byte;
a84b7c53
PE
823 return BYTE_TO_CHAR (nl - base + start_byte);
824 }
825 cursor = nl - 1;
9169c321
JB
826 }
827
2a14a4f1
DA
828 start_byte += ceiling_addr - 1 - base;
829 start = BYTE_TO_CHAR (start_byte);
9169c321
JB
830 }
831 }
832
ca1d1d23 833 immediate_quit = 0;
2a14a4f1 834 if (shortage)
ffd56f97 835 *shortage = count * direction;
2a14a4f1
DA
836 if (bytepos)
837 {
838 *bytepos = start_byte == -1 ? CHAR_TO_BYTE (start) : start_byte;
839 eassert (*bytepos == CHAR_TO_BYTE (start));
840 }
9169c321 841 return start;
ca1d1d23 842}
fa8ed3e0 843\f
a84b7c53 844/* Search for COUNT instances of a line boundary.
fa8ed3e0
RS
845 Start at START. If COUNT is negative, search backwards.
846
847 We report the resulting position by calling TEMP_SET_PT_BOTH.
848
849 If we find COUNT instances. we position after (always after,
850 even if scanning backwards) the COUNTth match, and return 0.
851
852 If we don't find COUNT instances before reaching the end of the
853 buffer (or the beginning, if scanning backwards), we return
854 the number of line boundaries left unfound, and position at
855 the limit we bumped up against.
856
9fa1de30 857 If ALLOW_QUIT, set immediate_quit. That's good to do
d5d57b92 858 except in special cases. */
ca1d1d23 859
c098fdb8 860EMACS_INT
d311d28c
PE
861scan_newline (ptrdiff_t start, ptrdiff_t start_byte,
862 ptrdiff_t limit, ptrdiff_t limit_byte,
9fa1de30 863 EMACS_INT count, bool allow_quit)
63fa018d 864{
fa8ed3e0
RS
865 int direction = ((count > 0) ? 1 : -1);
866
9fa1de30 867 unsigned char *cursor;
fa8ed3e0
RS
868 unsigned char *base;
869
d311d28c 870 ptrdiff_t ceiling;
9fa1de30 871 unsigned char *ceiling_addr;
fa8ed3e0 872
9fa1de30 873 bool old_immediate_quit = immediate_quit;
d5d57b92 874
d5d57b92
RS
875 if (allow_quit)
876 immediate_quit++;
fa8ed3e0 877
fa8ed3e0
RS
878 if (count > 0)
879 {
880 while (start_byte < limit_byte)
881 {
882 ceiling = BUFFER_CEILING_OF (start_byte);
883 ceiling = min (limit_byte - 1, ceiling);
884 ceiling_addr = BYTE_POS_ADDR (ceiling) + 1;
885 base = (cursor = BYTE_POS_ADDR (start_byte));
fa8ed3e0 886
a84b7c53
PE
887 do
888 {
889 unsigned char *nl = memchr (cursor, '\n', ceiling_addr - cursor);
890 if (! nl)
891 break;
892 if (--count == 0)
fa8ed3e0 893 {
a84b7c53
PE
894 immediate_quit = old_immediate_quit;
895 start_byte += nl - base + 1;
896 start = BYTE_TO_CHAR (start_byte);
897 TEMP_SET_PT_BOTH (start, start_byte);
898 return 0;
fa8ed3e0 899 }
a84b7c53 900 cursor = nl + 1;
fa8ed3e0 901 }
a84b7c53
PE
902 while (cursor < ceiling_addr);
903
904 start_byte += ceiling_addr - base;
fa8ed3e0
RS
905 }
906 }
907 else
908 {
fa8ed3e0
RS
909 while (start_byte > limit_byte)
910 {
911 ceiling = BUFFER_FLOOR_OF (start_byte - 1);
912 ceiling = max (limit_byte, ceiling);
a84b7c53 913 ceiling_addr = BYTE_POS_ADDR (ceiling);
fa8ed3e0
RS
914 base = (cursor = BYTE_POS_ADDR (start_byte - 1) + 1);
915 while (1)
916 {
a84b7c53
PE
917 unsigned char *nl = memrchr (ceiling_addr, '\n',
918 cursor - ceiling_addr);
919 if (! nl)
920 break;
fa8ed3e0 921
a84b7c53 922 if (++count == 0)
fa8ed3e0 923 {
a84b7c53
PE
924 immediate_quit = old_immediate_quit;
925 /* Return the position AFTER the match we found. */
926 start_byte += nl - base + 1;
927 start = BYTE_TO_CHAR (start_byte);
928 TEMP_SET_PT_BOTH (start, start_byte);
929 return 0;
fa8ed3e0 930 }
a84b7c53
PE
931
932 cursor = nl;
fa8ed3e0 933 }
a84b7c53 934 start_byte += ceiling_addr - base;
fa8ed3e0
RS
935 }
936 }
937
938 TEMP_SET_PT_BOTH (limit, limit_byte);
d5d57b92 939 immediate_quit = old_immediate_quit;
fa8ed3e0
RS
940
941 return count * direction;
63fa018d
RS
942}
943
a611149e
EZ
944/* Like find_newline, but doesn't allow QUITting and doesn't return
945 SHORTAGE. */
d311d28c 946ptrdiff_t
a611149e 947find_newline_no_quit (ptrdiff_t from, ptrdiff_t cnt, ptrdiff_t *bytepos)
ca1d1d23 948{
2a14a4f1 949 return find_newline (from, 0, cnt, NULL, bytepos, 0);
9169c321
JB
950}
951
a611149e
EZ
952/* Like find_newline, but returns position before the newline, not
953 after, and only search up to TO.
954 This isn't just find_newline_no_quit (...)-1, because you might hit TO. */
fa8ed3e0 955
d311d28c 956ptrdiff_t
2a14a4f1
DA
957find_before_next_newline (ptrdiff_t from, ptrdiff_t to,
958 ptrdiff_t cnt, ptrdiff_t *bytepos)
9169c321 959{
d311d28c 960 ptrdiff_t shortage;
2a14a4f1 961 ptrdiff_t pos = find_newline (from, to, cnt, &shortage, bytepos, 1);
9169c321
JB
962
963 if (shortage == 0)
2a14a4f1
DA
964 {
965 if (bytepos)
966 DEC_BOTH (pos, *bytepos);
967 else
968 pos--;
969 }
9169c321 970 return pos;
ca1d1d23
JB
971}
972\f
ca1d1d23
JB
973/* Subroutines of Lisp buffer search functions. */
974
975static Lisp_Object
c098fdb8
EZ
976search_command (Lisp_Object string, Lisp_Object bound, Lisp_Object noerror,
977 Lisp_Object count, int direction, int RE, int posix)
ca1d1d23 978{
a53e2e89 979 register EMACS_INT np;
d311d28c
PE
980 EMACS_INT lim;
981 ptrdiff_t lim_byte;
a53e2e89 982 EMACS_INT n = direction;
ca1d1d23
JB
983
984 if (!NILP (count))
985 {
b7826503 986 CHECK_NUMBER (count);
ca1d1d23
JB
987 n *= XINT (count);
988 }
989
b7826503 990 CHECK_STRING (string);
ca1d1d23 991 if (NILP (bound))
9f43ad85
RS
992 {
993 if (n > 0)
994 lim = ZV, lim_byte = ZV_BYTE;
995 else
996 lim = BEGV, lim_byte = BEGV_BYTE;
997 }
ca1d1d23
JB
998 else
999 {
b7826503 1000 CHECK_NUMBER_COERCE_MARKER (bound);
ca1d1d23 1001 lim = XINT (bound);
6ec8bbd2 1002 if (n > 0 ? lim < PT : lim > PT)
ca1d1d23
JB
1003 error ("Invalid search bound (wrong side of point)");
1004 if (lim > ZV)
9f43ad85 1005 lim = ZV, lim_byte = ZV_BYTE;
588d2fd5 1006 else if (lim < BEGV)
9f43ad85 1007 lim = BEGV, lim_byte = BEGV_BYTE;
588d2fd5
KH
1008 else
1009 lim_byte = CHAR_TO_BYTE (lim);
ca1d1d23
JB
1010 }
1011
910c747a 1012 /* This is so set_image_of_range_1 in regex.c can find the EQV table. */
34dabdb7 1013 set_char_table_extras (BVAR (current_buffer, case_canon_table), 2,
a098c930 1014 BVAR (current_buffer, case_eqv_table));
910c747a 1015
9f43ad85 1016 np = search_buffer (string, PT, PT_BYTE, lim, lim_byte, n, RE,
4b4deea2
TT
1017 (!NILP (BVAR (current_buffer, case_fold_search))
1018 ? BVAR (current_buffer, case_canon_table)
3135e9fd 1019 : Qnil),
4b4deea2
TT
1020 (!NILP (BVAR (current_buffer, case_fold_search))
1021 ? BVAR (current_buffer, case_eqv_table)
3135e9fd 1022 : Qnil),
b819a390 1023 posix);
ca1d1d23
JB
1024 if (np <= 0)
1025 {
1026 if (NILP (noerror))
06f77a8a
KS
1027 xsignal1 (Qsearch_failed, string);
1028
ca1d1d23
JB
1029 if (!EQ (noerror, Qt))
1030 {
8ca30920 1031 eassert (BEGV <= lim && lim <= ZV);
9f43ad85 1032 SET_PT_BOTH (lim, lim_byte);
a5f217b8
RS
1033 return Qnil;
1034#if 0 /* This would be clean, but maybe programs depend on
1035 a value of nil here. */
481399bf 1036 np = lim;
a5f217b8 1037#endif
ca1d1d23 1038 }
481399bf
RS
1039 else
1040 return Qnil;
ca1d1d23
JB
1041 }
1042
8ca30920 1043 eassert (BEGV <= np && np <= ZV);
ca1d1d23
JB
1044 SET_PT (np);
1045
1046 return make_number (np);
1047}
1048\f
fa8ed3e0
RS
1049/* Return 1 if REGEXP it matches just one constant string. */
1050
b6d6a51c 1051static int
971de7fb 1052trivial_regexp_p (Lisp_Object regexp)
b6d6a51c 1053{
d311d28c 1054 ptrdiff_t len = SBYTES (regexp);
d5db4077 1055 unsigned char *s = SDATA (regexp);
b6d6a51c
KH
1056 while (--len >= 0)
1057 {
1058 switch (*s++)
1059 {
1060 case '.': case '*': case '+': case '?': case '[': case '^': case '$':
1061 return 0;
1062 case '\\':
1063 if (--len < 0)
1064 return 0;
1065 switch (*s++)
1066 {
1067 case '|': case '(': case ')': case '`': case '\'': case 'b':
1068 case 'B': case '<': case '>': case 'w': case 'W': case 's':
29f89fe7 1069 case 'S': case '=': case '{': case '}': case '_':
5679531d 1070 case 'c': case 'C': /* for categoryspec and notcategoryspec */
866f60fd 1071 case '1': case '2': case '3': case '4': case '5':
b6d6a51c
KH
1072 case '6': case '7': case '8': case '9':
1073 return 0;
1074 }
1075 }
1076 }
1077 return 1;
1078}
1079
ca325161 1080/* Search for the n'th occurrence of STRING in the current buffer,
ca1d1d23 1081 starting at position POS and stopping at position LIM,
b819a390 1082 treating STRING as a literal string if RE is false or as
ca1d1d23
JB
1083 a regular expression if RE is true.
1084
1085 If N is positive, searching is forward and LIM must be greater than POS.
1086 If N is negative, searching is backward and LIM must be less than POS.
1087
facdc750 1088 Returns -x if x occurrences remain to be found (x > 0),
ca1d1d23 1089 or else the position at the beginning of the Nth occurrence
b819a390
RS
1090 (if searching backward) or the end (if searching forward).
1091
1092 POSIX is nonzero if we want full backtracking (POSIX style)
1093 for this pattern. 0 means backtrack only enough to get a valid match. */
ca1d1d23 1094
aff2ce94
RS
1095#define TRANSLATE(out, trt, d) \
1096do \
1097 { \
1098 if (! NILP (trt)) \
1099 { \
1100 Lisp_Object temp; \
1101 temp = Faref (trt, make_number (d)); \
1102 if (INTEGERP (temp)) \
1103 out = XINT (temp); \
1104 else \
1105 out = d; \
1106 } \
1107 else \
1108 out = d; \
1109 } \
1110while (0)
facdc750 1111
ef887810
RS
1112/* Only used in search_buffer, to record the end position of the match
1113 when searching regexps and SEARCH_REGS should not be changed
1114 (i.e. Vinhibit_changing_match_data is non-nil). */
1115static struct re_registers search_regs_1;
1116
e7deaab0 1117static EMACS_INT
d311d28c
PE
1118search_buffer (Lisp_Object string, ptrdiff_t pos, ptrdiff_t pos_byte,
1119 ptrdiff_t lim, ptrdiff_t lim_byte, EMACS_INT n,
d5a3eaaf 1120 int RE, Lisp_Object trt, Lisp_Object inverse_trt, int posix)
ca1d1d23 1121{
d311d28c
PE
1122 ptrdiff_t len = SCHARS (string);
1123 ptrdiff_t len_byte = SBYTES (string);
1124 register ptrdiff_t i;
ca1d1d23 1125
7074fde6
FP
1126 if (running_asynch_code)
1127 save_search_regs ();
1128
a7e4cdde 1129 /* Searching 0 times means don't move. */
ca1d1d23 1130 /* Null string is found at starting position. */
a7e4cdde 1131 if (len == 0 || n == 0)
ca325161 1132 {
0353b28f 1133 set_search_regs (pos_byte, 0);
ca325161
RS
1134 return pos;
1135 }
3f57a499 1136
41a33295 1137 if (RE && !(trivial_regexp_p (string) && NILP (Vsearch_spaces_regexp)))
ca1d1d23 1138 {
facdc750 1139 unsigned char *p1, *p2;
d311d28c 1140 ptrdiff_t s1, s2;
487282dc
KH
1141 struct re_pattern_buffer *bufp;
1142
ef887810
RS
1143 bufp = compile_pattern (string,
1144 (NILP (Vinhibit_changing_match_data)
1145 ? &search_regs : &search_regs_1),
1146 trt, posix,
4b4deea2 1147 !NILP (BVAR (current_buffer, enable_multibyte_characters)));
ca1d1d23 1148
ca1d1d23
JB
1149 immediate_quit = 1; /* Quit immediately if user types ^G,
1150 because letting this function finish
1151 can take too long. */
1152 QUIT; /* Do a pending quit right away,
1153 to avoid paradoxical behavior */
1154 /* Get pointers and sizes of the two strings
1155 that make up the visible portion of the buffer. */
1156
1157 p1 = BEGV_ADDR;
fa8ed3e0 1158 s1 = GPT_BYTE - BEGV_BYTE;
ca1d1d23 1159 p2 = GAP_END_ADDR;
fa8ed3e0 1160 s2 = ZV_BYTE - GPT_BYTE;
ca1d1d23
JB
1161 if (s1 < 0)
1162 {
1163 p2 = p1;
fa8ed3e0 1164 s2 = ZV_BYTE - BEGV_BYTE;
ca1d1d23
JB
1165 s1 = 0;
1166 }
1167 if (s2 < 0)
1168 {
fa8ed3e0 1169 s1 = ZV_BYTE - BEGV_BYTE;
ca1d1d23
JB
1170 s2 = 0;
1171 }
8bb43c28 1172 re_match_object = Qnil;
177c0ea7 1173
ca1d1d23
JB
1174 while (n < 0)
1175 {
d311d28c 1176 ptrdiff_t val;
52c55cc7 1177
487282dc 1178 val = re_search_2 (bufp, (char *) p1, s1, (char *) p2, s2,
4996330b 1179 pos_byte - BEGV_BYTE, lim_byte - pos_byte,
ef887810
RS
1180 (NILP (Vinhibit_changing_match_data)
1181 ? &search_regs : &search_regs_1),
42db823b 1182 /* Don't allow match past current point */
4996330b 1183 pos_byte - BEGV_BYTE);
ca1d1d23 1184 if (val == -2)
b6d6a51c
KH
1185 {
1186 matcher_overflow ();
1187 }
ca1d1d23
JB
1188 if (val >= 0)
1189 {
ef887810
RS
1190 if (NILP (Vinhibit_changing_match_data))
1191 {
1192 pos_byte = search_regs.start[0] + BEGV_BYTE;
1193 for (i = 0; i < search_regs.num_regs; i++)
1194 if (search_regs.start[i] >= 0)
1195 {
1196 search_regs.start[i]
1197 = BYTE_TO_CHAR (search_regs.start[i] + BEGV_BYTE);
1198 search_regs.end[i]
1199 = BYTE_TO_CHAR (search_regs.end[i] + BEGV_BYTE);
1200 }
1201 XSETBUFFER (last_thing_searched, current_buffer);
1202 /* Set pos to the new position. */
1203 pos = search_regs.start[0];
1204 }
1205 else
1206 {
1207 pos_byte = search_regs_1.start[0] + BEGV_BYTE;
1208 /* Set pos to the new position. */
1209 pos = BYTE_TO_CHAR (search_regs_1.start[0] + BEGV_BYTE);
1210 }
ca1d1d23
JB
1211 }
1212 else
1213 {
1214 immediate_quit = 0;
1215 return (n);
1216 }
1217 n++;
1218 }
1219 while (n > 0)
1220 {
d311d28c 1221 ptrdiff_t val;
52c55cc7 1222
487282dc 1223 val = re_search_2 (bufp, (char *) p1, s1, (char *) p2, s2,
4996330b 1224 pos_byte - BEGV_BYTE, lim_byte - pos_byte,
ef887810
RS
1225 (NILP (Vinhibit_changing_match_data)
1226 ? &search_regs : &search_regs_1),
4996330b 1227 lim_byte - BEGV_BYTE);
ca1d1d23 1228 if (val == -2)
b6d6a51c
KH
1229 {
1230 matcher_overflow ();
1231 }
ca1d1d23
JB
1232 if (val >= 0)
1233 {
ef887810
RS
1234 if (NILP (Vinhibit_changing_match_data))
1235 {
1236 pos_byte = search_regs.end[0] + BEGV_BYTE;
1237 for (i = 0; i < search_regs.num_regs; i++)
1238 if (search_regs.start[i] >= 0)
1239 {
1240 search_regs.start[i]
1241 = BYTE_TO_CHAR (search_regs.start[i] + BEGV_BYTE);
1242 search_regs.end[i]
1243 = BYTE_TO_CHAR (search_regs.end[i] + BEGV_BYTE);
1244 }
1245 XSETBUFFER (last_thing_searched, current_buffer);
1246 pos = search_regs.end[0];
1247 }
1248 else
1249 {
1250 pos_byte = search_regs_1.end[0] + BEGV_BYTE;
1251 pos = BYTE_TO_CHAR (search_regs_1.end[0] + BEGV_BYTE);
1252 }
ca1d1d23
JB
1253 }
1254 else
1255 {
1256 immediate_quit = 0;
1257 return (0 - n);
1258 }
1259 n--;
1260 }
1261 immediate_quit = 0;
1262 return (pos);
1263 }
1264 else /* non-RE case */
1265 {
facdc750 1266 unsigned char *raw_pattern, *pat;
d311d28c
PE
1267 ptrdiff_t raw_pattern_size;
1268 ptrdiff_t raw_pattern_size_byte;
facdc750 1269 unsigned char *patbuf;
0063fdb1 1270 bool multibyte = !NILP (BVAR (current_buffer, enable_multibyte_characters));
a967ed62 1271 unsigned char *base_pat;
49e44e81
KH
1272 /* Set to positive if we find a non-ASCII char that need
1273 translation. Otherwise set to zero later. */
1274 int char_base = -1;
040272ce 1275 int boyer_moore_ok = 1;
facdc750
RS
1276
1277 /* MULTIBYTE says whether the text to be searched is multibyte.
1278 We must convert PATTERN to match that, or we will not really
1279 find things right. */
1280
1281 if (multibyte == STRING_MULTIBYTE (string))
1282 {
51b59d79 1283 raw_pattern = SDATA (string);
d5db4077
KR
1284 raw_pattern_size = SCHARS (string);
1285 raw_pattern_size_byte = SBYTES (string);
facdc750
RS
1286 }
1287 else if (multibyte)
1288 {
d5db4077 1289 raw_pattern_size = SCHARS (string);
facdc750 1290 raw_pattern_size_byte
d5db4077 1291 = count_size_as_multibyte (SDATA (string),
facdc750 1292 raw_pattern_size);
38182d90 1293 raw_pattern = alloca (raw_pattern_size_byte + 1);
d5db4077
KR
1294 copy_text (SDATA (string), raw_pattern,
1295 SCHARS (string), 0, 1);
facdc750
RS
1296 }
1297 else
1298 {
1299 /* Converting multibyte to single-byte.
1300
1301 ??? Perhaps this conversion should be done in a special way
1302 by subtracting nonascii-insert-offset from each non-ASCII char,
1303 so that only the multibyte chars which really correspond to
1304 the chosen single-byte character set can possibly match. */
d5db4077
KR
1305 raw_pattern_size = SCHARS (string);
1306 raw_pattern_size_byte = SCHARS (string);
38182d90 1307 raw_pattern = alloca (raw_pattern_size + 1);
d5db4077
KR
1308 copy_text (SDATA (string), raw_pattern,
1309 SBYTES (string), 1, 0);
facdc750
RS
1310 }
1311
1312 /* Copy and optionally translate the pattern. */
1313 len = raw_pattern_size;
1314 len_byte = raw_pattern_size_byte;
38182d90 1315 patbuf = alloca (len * MAX_MULTIBYTE_LENGTH);
facdc750
RS
1316 pat = patbuf;
1317 base_pat = raw_pattern;
1318 if (multibyte)
1319 {
a17ae96a
KH
1320 /* Fill patbuf by translated characters in STRING while
1321 checking if we can use boyer-moore search. If TRT is
1322 non-nil, we can use boyer-moore search only if TRT can be
1323 represented by the byte array of 256 elements. For that,
9858f6c3 1324 all non-ASCII case-equivalents of all case-sensitive
e79d60fe 1325 characters in STRING must belong to the same character
68abf867
KH
1326 group (two characters belong to the same group iff their
1327 multibyte forms are the same except for the last byte;
1328 i.e. every 64 characters form a group; U+0000..U+003F,
e79d60fe 1329 U+0040..U+007F, U+0080..U+00BF, ...). */
a17ae96a 1330
facdc750
RS
1331 while (--len >= 0)
1332 {
a17ae96a 1333 unsigned char str_base[MAX_MULTIBYTE_LENGTH], *str;
aff2ce94 1334 int c, translated, inverse;
a17ae96a 1335 int in_charlen, charlen;
facdc750
RS
1336
1337 /* If we got here and the RE flag is set, it's because we're
1338 dealing with a regexp known to be trivial, so the backslash
1339 just quotes the next character. */
1340 if (RE && *base_pat == '\\')
1341 {
1342 len--;
62221f22 1343 raw_pattern_size--;
facdc750
RS
1344 len_byte--;
1345 base_pat++;
1346 }
1347
62a6e103 1348 c = STRING_CHAR_AND_LENGTH (base_pat, in_charlen);
040272ce 1349
a17ae96a 1350 if (NILP (trt))
facdc750 1351 {
a17ae96a
KH
1352 str = base_pat;
1353 charlen = in_charlen;
1354 }
1355 else
1356 {
1357 /* Translate the character. */
1358 TRANSLATE (translated, trt, c);
1359 charlen = CHAR_STRING (translated, str_base);
1360 str = str_base;
1361
1362 /* Check if C has any other case-equivalents. */
1363 TRANSLATE (inverse, inverse_trt, c);
1364 /* If so, check if we can use boyer-moore. */
1365 if (c != inverse && boyer_moore_ok)
1366 {
1367 /* Check if all equivalents belong to the same
1368 group of characters. Note that the check of C
49e44e81
KH
1369 itself is done by the last iteration. */
1370 int this_char_base = -1;
1371
1372 while (boyer_moore_ok)
a17ae96a 1373 {
49e44e81 1374 if (ASCII_BYTE_P (inverse))
a17ae96a 1375 {
49e44e81
KH
1376 if (this_char_base > 0)
1377 boyer_moore_ok = 0;
1378 else
dafaabd1 1379 this_char_base = 0;
a17ae96a 1380 }
49e44e81
KH
1381 else if (CHAR_BYTE8_P (inverse))
1382 /* Boyer-moore search can't handle a
1383 translation of an eight-bit
1384 character. */
1385 boyer_moore_ok = 0;
1386 else if (this_char_base < 0)
1387 {
1388 this_char_base = inverse & ~0x3F;
1389 if (char_base < 0)
1390 char_base = this_char_base;
dafaabd1 1391 else if (this_char_base != char_base)
49e44e81
KH
1392 boyer_moore_ok = 0;
1393 }
1394 else if ((inverse & ~0x3F) != this_char_base)
1395 boyer_moore_ok = 0;
a17ae96a
KH
1396 if (c == inverse)
1397 break;
1398 TRANSLATE (inverse, inverse_trt, inverse);
1399 }
1400 }
aff2ce94 1401 }
facdc750
RS
1402
1403 /* Store this character into the translated pattern. */
72af86bd 1404 memcpy (pat, str, charlen);
a17ae96a 1405 pat += charlen;
facdc750
RS
1406 base_pat += in_charlen;
1407 len_byte -= in_charlen;
1408 }
dafaabd1
AS
1409
1410 /* If char_base is still negative we didn't find any translated
1411 non-ASCII characters. */
1412 if (char_base < 0)
1413 char_base = 0;
facdc750
RS
1414 }
1415 else
1416 {
040272ce 1417 /* Unibyte buffer. */
a17ae96a 1418 char_base = 0;
facdc750
RS
1419 while (--len >= 0)
1420 {
e8c6e965 1421 int c, translated, inverse;
facdc750
RS
1422
1423 /* If we got here and the RE flag is set, it's because we're
1424 dealing with a regexp known to be trivial, so the backslash
1425 just quotes the next character. */
1426 if (RE && *base_pat == '\\')
1427 {
1428 len--;
0190922f 1429 raw_pattern_size--;
facdc750
RS
1430 base_pat++;
1431 }
1432 c = *base_pat++;
aff2ce94 1433 TRANSLATE (translated, trt, c);
facdc750 1434 *pat++ = translated;
e8c6e965
EZ
1435 /* Check that none of C's equivalents violates the
1436 assumptions of boyer_moore. */
1437 TRANSLATE (inverse, inverse_trt, c);
1438 while (1)
1439 {
1440 if (inverse >= 0200)
1441 {
1442 boyer_moore_ok = 0;
1443 break;
1444 }
1445 if (c == inverse)
1446 break;
1447 TRANSLATE (inverse, inverse_trt, inverse);
1448 }
facdc750
RS
1449 }
1450 }
1451
1452 len_byte = pat - patbuf;
facdc750
RS
1453 pat = base_pat = patbuf;
1454
040272ce 1455 if (boyer_moore_ok)
1db5b1ad
JB
1456 return boyer_moore (n, pat, len_byte, trt, inverse_trt,
1457 pos_byte, lim_byte,
a17ae96a 1458 char_base);
facdc750 1459 else
1db5b1ad 1460 return simple_search (n, pat, raw_pattern_size, len_byte, trt,
facdc750
RS
1461 pos, pos_byte, lim, lim_byte);
1462 }
1463}
1464\f
1465/* Do a simple string search N times for the string PAT,
1466 whose length is LEN/LEN_BYTE,
1467 from buffer position POS/POS_BYTE until LIM/LIM_BYTE.
1468 TRT is the translation table.
f8bd51c4 1469
facdc750
RS
1470 Return the character position where the match is found.
1471 Otherwise, if M matches remained to be found, return -M.
f8bd51c4 1472
facdc750
RS
1473 This kind of search works regardless of what is in PAT and
1474 regardless of what is in TRT. It is used in cases where
1475 boyer_moore cannot work. */
1476
e7deaab0 1477static EMACS_INT
c098fdb8 1478simple_search (EMACS_INT n, unsigned char *pat,
d311d28c
PE
1479 ptrdiff_t len, ptrdiff_t len_byte, Lisp_Object trt,
1480 ptrdiff_t pos, ptrdiff_t pos_byte,
1481 ptrdiff_t lim, ptrdiff_t lim_byte)
facdc750 1482{
0063fdb1
DA
1483 bool multibyte = ! NILP (BVAR (current_buffer, enable_multibyte_characters));
1484 bool forward = n > 0;
49e44e81
KH
1485 /* Number of buffer bytes matched. Note that this may be different
1486 from len_byte in a multibyte buffer. */
8ce70ed2 1487 ptrdiff_t match_byte = PTRDIFF_MIN;
facdc750
RS
1488
1489 if (lim > pos && multibyte)
1490 while (n > 0)
1491 {
1492 while (1)
f8bd51c4 1493 {
facdc750 1494 /* Try matching at position POS. */
d311d28c
PE
1495 ptrdiff_t this_pos = pos;
1496 ptrdiff_t this_pos_byte = pos_byte;
1497 ptrdiff_t this_len = len;
facdc750 1498 unsigned char *p = pat;
a9469498 1499 if (pos + len > lim || pos_byte + len_byte > lim_byte)
facdc750
RS
1500 goto stop;
1501
1502 while (this_len > 0)
1503 {
1504 int charlen, buf_charlen;
ab228c24 1505 int pat_ch, buf_ch;
facdc750 1506
62a6e103 1507 pat_ch = STRING_CHAR_AND_LENGTH (p, charlen);
facdc750 1508 buf_ch = STRING_CHAR_AND_LENGTH (BYTE_POS_ADDR (this_pos_byte),
facdc750 1509 buf_charlen);
aff2ce94 1510 TRANSLATE (buf_ch, trt, buf_ch);
facdc750
RS
1511
1512 if (buf_ch != pat_ch)
1513 break;
ab228c24 1514
ab228c24
RS
1515 this_len--;
1516 p += charlen;
1517
1518 this_pos_byte += buf_charlen;
1519 this_pos++;
facdc750
RS
1520 }
1521
1522 if (this_len == 0)
1523 {
49e44e81 1524 match_byte = this_pos_byte - pos_byte;
facdc750 1525 pos += len;
49e44e81 1526 pos_byte += match_byte;
facdc750
RS
1527 break;
1528 }
1529
1530 INC_BOTH (pos, pos_byte);
f8bd51c4 1531 }
facdc750
RS
1532
1533 n--;
1534 }
1535 else if (lim > pos)
1536 while (n > 0)
1537 {
1538 while (1)
f8bd51c4 1539 {
facdc750 1540 /* Try matching at position POS. */
d311d28c
PE
1541 ptrdiff_t this_pos = pos;
1542 ptrdiff_t this_len = len;
facdc750
RS
1543 unsigned char *p = pat;
1544
1545 if (pos + len > lim)
1546 goto stop;
1547
1548 while (this_len > 0)
1549 {
1550 int pat_ch = *p++;
1551 int buf_ch = FETCH_BYTE (this_pos);
aff2ce94 1552 TRANSLATE (buf_ch, trt, buf_ch);
facdc750
RS
1553
1554 if (buf_ch != pat_ch)
1555 break;
ab228c24
RS
1556
1557 this_len--;
1558 this_pos++;
facdc750
RS
1559 }
1560
1561 if (this_len == 0)
1562 {
49e44e81 1563 match_byte = len;
facdc750
RS
1564 pos += len;
1565 break;
1566 }
1567
1568 pos++;
f8bd51c4 1569 }
facdc750
RS
1570
1571 n--;
1572 }
1573 /* Backwards search. */
1574 else if (lim < pos && multibyte)
1575 while (n < 0)
1576 {
1577 while (1)
f8bd51c4 1578 {
facdc750 1579 /* Try matching at position POS. */
d311d28c
PE
1580 ptrdiff_t this_pos = pos;
1581 ptrdiff_t this_pos_byte = pos_byte;
1582 ptrdiff_t this_len = len;
c525b3f2 1583 const unsigned char *p = pat + len_byte;
facdc750 1584
8b264ecb 1585 if (this_pos - len < lim || (pos_byte - len_byte) < lim_byte)
facdc750
RS
1586 goto stop;
1587
1588 while (this_len > 0)
1589 {
ab228c24 1590 int pat_ch, buf_ch;
facdc750 1591
8b264ecb
AS
1592 DEC_BOTH (this_pos, this_pos_byte);
1593 PREV_CHAR_BOUNDARY (p, pat);
1594 pat_ch = STRING_CHAR (p);
1595 buf_ch = STRING_CHAR (BYTE_POS_ADDR (this_pos_byte));
aff2ce94 1596 TRANSLATE (buf_ch, trt, buf_ch);
facdc750
RS
1597
1598 if (buf_ch != pat_ch)
1599 break;
ab228c24 1600
ab228c24 1601 this_len--;
facdc750
RS
1602 }
1603
1604 if (this_len == 0)
1605 {
8b264ecb
AS
1606 match_byte = pos_byte - this_pos_byte;
1607 pos = this_pos;
1608 pos_byte = this_pos_byte;
facdc750
RS
1609 break;
1610 }
1611
1612 DEC_BOTH (pos, pos_byte);
f8bd51c4
KH
1613 }
1614
facdc750
RS
1615 n++;
1616 }
1617 else if (lim < pos)
1618 while (n < 0)
1619 {
1620 while (1)
b6d6a51c 1621 {
facdc750 1622 /* Try matching at position POS. */
d311d28c
PE
1623 ptrdiff_t this_pos = pos - len;
1624 ptrdiff_t this_len = len;
facdc750
RS
1625 unsigned char *p = pat;
1626
a9469498 1627 if (this_pos < lim)
facdc750
RS
1628 goto stop;
1629
1630 while (this_len > 0)
1631 {
1632 int pat_ch = *p++;
1633 int buf_ch = FETCH_BYTE (this_pos);
aff2ce94 1634 TRANSLATE (buf_ch, trt, buf_ch);
facdc750
RS
1635
1636 if (buf_ch != pat_ch)
1637 break;
ab228c24
RS
1638 this_len--;
1639 this_pos++;
facdc750
RS
1640 }
1641
1642 if (this_len == 0)
b6d6a51c 1643 {
49e44e81 1644 match_byte = len;
facdc750
RS
1645 pos -= len;
1646 break;
b6d6a51c 1647 }
facdc750
RS
1648
1649 pos--;
b6d6a51c 1650 }
facdc750
RS
1651
1652 n++;
b6d6a51c 1653 }
facdc750
RS
1654
1655 stop:
1656 if (n == 0)
aff2ce94 1657 {
8ce70ed2 1658 eassert (match_byte != PTRDIFF_MIN);
ab228c24 1659 if (forward)
49e44e81 1660 set_search_regs ((multibyte ? pos_byte : pos) - match_byte, match_byte);
ab228c24 1661 else
49e44e81 1662 set_search_regs (multibyte ? pos_byte : pos, match_byte);
aff2ce94
RS
1663
1664 return pos;
1665 }
facdc750
RS
1666 else if (n > 0)
1667 return -n;
1668 else
1669 return n;
1670}
1671\f
0190922f 1672/* Do Boyer-Moore search N times for the string BASE_PAT,
1db5b1ad
JB
1673 whose length is LEN_BYTE,
1674 from buffer position POS_BYTE until LIM_BYTE.
facdc750
RS
1675 DIRECTION says which direction we search in.
1676 TRT and INVERSE_TRT are translation tables.
0190922f 1677 Characters in PAT are already translated by TRT.
facdc750 1678
0190922f
KH
1679 This kind of search works if all the characters in BASE_PAT that
1680 have nontrivial translation are the same aside from the last byte.
1681 This makes it possible to translate just the last byte of a
1682 character, and do so after just a simple test of the context.
b2e6b10f 1683 CHAR_BASE is nonzero if there is such a non-ASCII character.
facdc750
RS
1684
1685 If that criterion is not satisfied, do not call this function. */
1686
e7deaab0 1687static EMACS_INT
c098fdb8 1688boyer_moore (EMACS_INT n, unsigned char *base_pat,
d311d28c 1689 ptrdiff_t len_byte,
d5a3eaaf 1690 Lisp_Object trt, Lisp_Object inverse_trt,
d311d28c 1691 ptrdiff_t pos_byte, ptrdiff_t lim_byte,
1db5b1ad 1692 int char_base)
facdc750
RS
1693{
1694 int direction = ((n > 0) ? 1 : -1);
d311d28c
PE
1695 register ptrdiff_t dirlen;
1696 ptrdiff_t limit;
e7deaab0 1697 int stride_for_teases = 0;
f4646fff 1698 int BM_tab[0400];
177c0ea7 1699 register unsigned char *cursor, *p_limit;
d311d28c 1700 register ptrdiff_t i;
c098fdb8 1701 register int j;
cb6792d2 1702 unsigned char *pat, *pat_end;
0063fdb1 1703 bool multibyte = ! NILP (BVAR (current_buffer, enable_multibyte_characters));
facdc750
RS
1704
1705 unsigned char simple_translate[0400];
0190922f 1706 /* These are set to the preceding bytes of a byte to be translated
49e44e81 1707 if char_base is nonzero. As the maximum byte length of a
a17ae96a 1708 multibyte character is 5, we have to check at most four previous
0190922f
KH
1709 bytes. */
1710 int translate_prev_byte1 = 0;
1711 int translate_prev_byte2 = 0;
1712 int translate_prev_byte3 = 0;
facdc750 1713
f4646fff
AS
1714 /* The general approach is that we are going to maintain that we know
1715 the first (closest to the present position, in whatever direction
1716 we're searching) character that could possibly be the last
1717 (furthest from present position) character of a valid match. We
1718 advance the state of our knowledge by looking at that character
1719 and seeing whether it indeed matches the last character of the
1720 pattern. If it does, we take a closer look. If it does not, we
1721 move our pointer (to putative last characters) as far as is
1722 logically possible. This amount of movement, which I call a
1723 stride, will be the length of the pattern if the actual character
1724 appears nowhere in the pattern, otherwise it will be the distance
1725 from the last occurrence of that character to the end of the
1726 pattern. If the amount is zero we have a possible match. */
1727
1728 /* Here we make a "mickey mouse" BM table. The stride of the search
1729 is determined only by the last character of the putative match.
1730 If that character does not match, we will stride the proper
1731 distance to propose a match that superimposes it on the last
1732 instance of a character that matches it (per trt), or misses
1733 it entirely if there is none. */
facdc750
RS
1734
1735 dirlen = len_byte * direction;
cb6792d2
RS
1736
1737 /* Record position after the end of the pattern. */
1738 pat_end = base_pat + len_byte;
1739 /* BASE_PAT points to a character that we start scanning from.
1740 It is the first character in a forward search,
1741 the last character in a backward search. */
facdc750 1742 if (direction < 0)
cb6792d2
RS
1743 base_pat = pat_end - 1;
1744
f4646fff
AS
1745 /* A character that does not appear in the pattern induces a
1746 stride equal to the pattern length. */
1747 for (i = 0; i < 0400; i++)
1748 BM_tab[i] = dirlen;
facdc750
RS
1749
1750 /* We use this for translation, instead of TRT itself.
1751 We fill this in to handle the characters that actually
1752 occur in the pattern. Others don't matter anyway! */
facdc750
RS
1753 for (i = 0; i < 0400; i++)
1754 simple_translate[i] = i;
1755
a17ae96a 1756 if (char_base)
0190922f 1757 {
a17ae96a 1758 /* Setup translate_prev_byte1/2/3/4 from CHAR_BASE. Only a
0190922f 1759 byte following them are the target of translation. */
0190922f 1760 unsigned char str[MAX_MULTIBYTE_LENGTH];
1f3561e4 1761 int cblen = CHAR_STRING (char_base, str);
0190922f 1762
1f3561e4
PE
1763 translate_prev_byte1 = str[cblen - 2];
1764 if (cblen > 2)
0190922f 1765 {
1f3561e4
PE
1766 translate_prev_byte2 = str[cblen - 3];
1767 if (cblen > 3)
1f1d9321 1768 translate_prev_byte3 = str[cblen - 4];
0190922f
KH
1769 }
1770 }
1771
facdc750 1772 i = 0;
f4646fff 1773 while (i != dirlen)
facdc750 1774 {
cb6792d2 1775 unsigned char *ptr = base_pat + i;
facdc750 1776 i += direction;
facdc750 1777 if (! NILP (trt))
ca1d1d23 1778 {
49e44e81
KH
1779 /* If the byte currently looking at is the last of a
1780 character to check case-equivalents, set CH to that
1781 character. An ASCII character and a non-ASCII character
1782 matching with CHAR_BASE are to be checked. */
a17ae96a
KH
1783 int ch = -1;
1784
1785 if (ASCII_BYTE_P (*ptr) || ! multibyte)
1786 ch = *ptr;
49e44e81 1787 else if (char_base
86dc6ccb 1788 && ((pat_end - ptr) == 1 || CHAR_HEAD_P (ptr[1])))
ca1d1d23 1789 {
49e44e81
KH
1790 unsigned char *charstart = ptr - 1;
1791
1792 while (! (CHAR_HEAD_P (*charstart)))
1793 charstart--;
62a6e103 1794 ch = STRING_CHAR (charstart);
a17ae96a
KH
1795 if (char_base != (ch & ~0x3F))
1796 ch = -1;
ca1d1d23 1797 }
facdc750 1798
edb7b4dc 1799 if (ch >= 0200 && multibyte)
49e44e81
KH
1800 j = (ch & 0x3F) | 0200;
1801 else
1802 j = *ptr;
1803
f4646fff 1804 if (i == dirlen)
facdc750 1805 stride_for_teases = BM_tab[j];
ab228c24 1806
facdc750 1807 BM_tab[j] = dirlen - i;
1db5b1ad
JB
1808 /* A translation table is accompanied by its inverse -- see
1809 comment following downcase_table for details. */
a17ae96a 1810 if (ch >= 0)
ab228c24
RS
1811 {
1812 int starting_ch = ch;
49e44e81 1813 int starting_j = j;
a17ae96a 1814
ab228c24
RS
1815 while (1)
1816 {
1817 TRANSLATE (ch, inverse_trt, ch);
edb7b4dc 1818 if (ch >= 0200 && multibyte)
49e44e81 1819 j = (ch & 0x3F) | 0200;
ab228c24 1820 else
a17ae96a 1821 j = ch;
ab228c24
RS
1822
1823 /* For all the characters that map into CH,
1824 set up simple_translate to map the last byte
1825 into STARTING_J. */
1826 simple_translate[j] = starting_j;
1827 if (ch == starting_ch)
1828 break;
1829 BM_tab[j] = dirlen - i;
1830 }
1831 }
facdc750
RS
1832 }
1833 else
1834 {
1835 j = *ptr;
1836
f4646fff 1837 if (i == dirlen)
facdc750
RS
1838 stride_for_teases = BM_tab[j];
1839 BM_tab[j] = dirlen - i;
ca1d1d23 1840 }
f4646fff
AS
1841 /* stride_for_teases tells how much to stride if we get a
1842 match on the far character but are subsequently
1843 disappointed, by recording what the stride would have been
1844 for that character if the last character had been
1845 different. */
facdc750 1846 }
facdc750
RS
1847 pos_byte += dirlen - ((direction > 0) ? direction : 0);
1848 /* loop invariant - POS_BYTE points at where last char (first
1849 char if reverse) of pattern would align in a possible match. */
1850 while (n != 0)
1851 {
d311d28c 1852 ptrdiff_t tail_end;
facdc750
RS
1853 unsigned char *tail_end_ptr;
1854
1855 /* It's been reported that some (broken) compiler thinks that
1856 Boolean expressions in an arithmetic context are unsigned.
1857 Using an explicit ?1:0 prevents this. */
1858 if ((lim_byte - pos_byte - ((direction > 0) ? 1 : 0)) * direction
1859 < 0)
1860 return (n * (0 - direction));
1861 /* First we do the part we can by pointers (maybe nothing) */
1862 QUIT;
1863 pat = base_pat;
1864 limit = pos_byte - dirlen + direction;
67ce527d
KH
1865 if (direction > 0)
1866 {
1867 limit = BUFFER_CEILING_OF (limit);
1868 /* LIMIT is now the last (not beyond-last!) value POS_BYTE
1869 can take on without hitting edge of buffer or the gap. */
1870 limit = min (limit, pos_byte + 20000);
1871 limit = min (limit, lim_byte - 1);
1872 }
1873 else
1874 {
1875 limit = BUFFER_FLOOR_OF (limit);
1876 /* LIMIT is now the last (not beyond-last!) value POS_BYTE
1877 can take on without hitting edge of buffer or the gap. */
1878 limit = max (limit, pos_byte - 20000);
1879 limit = max (limit, lim_byte);
1880 }
facdc750
RS
1881 tail_end = BUFFER_CEILING_OF (pos_byte) + 1;
1882 tail_end_ptr = BYTE_POS_ADDR (tail_end);
1883
1884 if ((limit - pos_byte) * direction > 20)
ca1d1d23 1885 {
facdc750
RS
1886 unsigned char *p2;
1887
1888 p_limit = BYTE_POS_ADDR (limit);
1889 p2 = (cursor = BYTE_POS_ADDR (pos_byte));
f4646fff 1890 /* In this loop, pos + cursor - p2 is the surrogate for pos. */
facdc750 1891 while (1) /* use one cursor setting as long as i can */
ca1d1d23 1892 {
facdc750 1893 if (direction > 0) /* worth duplicating */
ca1d1d23 1894 {
f4646fff
AS
1895 while (cursor <= p_limit)
1896 {
1897 if (BM_tab[*cursor] == 0)
1898 goto hit;
facdc750 1899 cursor += BM_tab[*cursor];
f4646fff 1900 }
facdc750
RS
1901 }
1902 else
1903 {
f4646fff
AS
1904 while (cursor >= p_limit)
1905 {
1906 if (BM_tab[*cursor] == 0)
1907 goto hit;
facdc750 1908 cursor += BM_tab[*cursor];
f4646fff 1909 }
facdc750 1910 }
f4646fff
AS
1911 /* If you are here, cursor is beyond the end of the
1912 searched region. You fail to match within the
1913 permitted region and would otherwise try a character
1914 beyond that region. */
1915 break;
1916
1917 hit:
facdc750
RS
1918 i = dirlen - direction;
1919 if (! NILP (trt))
1920 {
1921 while ((i -= direction) + direction != 0)
ca1d1d23 1922 {
facdc750
RS
1923 int ch;
1924 cursor -= direction;
1925 /* Translate only the last byte of a character. */
1926 if (! multibyte
1927 || ((cursor == tail_end_ptr
1928 || CHAR_HEAD_P (cursor[1]))
1929 && (CHAR_HEAD_P (cursor[0])
0190922f 1930 /* Check if this is the last byte of
cd1181db 1931 a translatable character. */
0190922f
KH
1932 || (translate_prev_byte1 == cursor[-1]
1933 && (CHAR_HEAD_P (translate_prev_byte1)
1934 || (translate_prev_byte2 == cursor[-2]
1935 && (CHAR_HEAD_P (translate_prev_byte2)
1936 || (translate_prev_byte3 == cursor[-3]))))))))
facdc750
RS
1937 ch = simple_translate[*cursor];
1938 else
1939 ch = *cursor;
1940 if (pat[i] != ch)
1941 break;
ca1d1d23 1942 }
facdc750
RS
1943 }
1944 else
1945 {
1946 while ((i -= direction) + direction != 0)
ca1d1d23 1947 {
facdc750
RS
1948 cursor -= direction;
1949 if (pat[i] != *cursor)
1950 break;
ca1d1d23 1951 }
facdc750
RS
1952 }
1953 cursor += dirlen - i - direction; /* fix cursor */
1954 if (i + direction == 0)
1955 {
d311d28c 1956 ptrdiff_t position, start, end;
0c8533c6 1957
facdc750 1958 cursor -= direction;
1113d9db 1959
facdc750
RS
1960 position = pos_byte + cursor - p2 + ((direction > 0)
1961 ? 1 - len_byte : 0);
1962 set_search_regs (position, len_byte);
ca325161 1963
ef887810
RS
1964 if (NILP (Vinhibit_changing_match_data))
1965 {
1966 start = search_regs.start[0];
1967 end = search_regs.end[0];
1968 }
1969 else
1970 /* If Vinhibit_changing_match_data is non-nil,
1971 search_regs will not be changed. So let's
1972 compute start and end here. */
1973 {
1974 start = BYTE_TO_CHAR (position);
1975 end = BYTE_TO_CHAR (position + len_byte);
1976 }
1977
facdc750
RS
1978 if ((n -= direction) != 0)
1979 cursor += dirlen; /* to resume search */
ca1d1d23 1980 else
ef887810 1981 return direction > 0 ? end : start;
ca1d1d23 1982 }
facdc750
RS
1983 else
1984 cursor += stride_for_teases; /* <sigh> we lose - */
ca1d1d23 1985 }
facdc750
RS
1986 pos_byte += cursor - p2;
1987 }
1988 else
f4646fff
AS
1989 /* Now we'll pick up a clump that has to be done the hard
1990 way because it covers a discontinuity. */
facdc750
RS
1991 {
1992 limit = ((direction > 0)
1993 ? BUFFER_CEILING_OF (pos_byte - dirlen + 1)
1994 : BUFFER_FLOOR_OF (pos_byte - dirlen - 1));
1995 limit = ((direction > 0)
1996 ? min (limit + len_byte, lim_byte - 1)
1997 : max (limit - len_byte, lim_byte));
1998 /* LIMIT is now the last value POS_BYTE can have
1999 and still be valid for a possible match. */
2000 while (1)
ca1d1d23 2001 {
f4646fff
AS
2002 /* This loop can be coded for space rather than
2003 speed because it will usually run only once.
2004 (the reach is at most len + 21, and typically
2005 does not exceed len). */
facdc750 2006 while ((limit - pos_byte) * direction >= 0)
f4646fff
AS
2007 {
2008 int ch = FETCH_BYTE (pos_byte);
2009 if (BM_tab[ch] == 0)
2010 goto hit2;
2011 pos_byte += BM_tab[ch];
2012 }
2013 break; /* ran off the end */
2014
2015 hit2:
2016 /* Found what might be a match. */
facdc750
RS
2017 i = dirlen - direction;
2018 while ((i -= direction) + direction != 0)
ca1d1d23 2019 {
facdc750
RS
2020 int ch;
2021 unsigned char *ptr;
2022 pos_byte -= direction;
2023 ptr = BYTE_POS_ADDR (pos_byte);
2024 /* Translate only the last byte of a character. */
2025 if (! multibyte
2026 || ((ptr == tail_end_ptr
2027 || CHAR_HEAD_P (ptr[1]))
2028 && (CHAR_HEAD_P (ptr[0])
0190922f 2029 /* Check if this is the last byte of a
cd1181db 2030 translatable character. */
0190922f
KH
2031 || (translate_prev_byte1 == ptr[-1]
2032 && (CHAR_HEAD_P (translate_prev_byte1)
2033 || (translate_prev_byte2 == ptr[-2]
2034 && (CHAR_HEAD_P (translate_prev_byte2)
2035 || translate_prev_byte3 == ptr[-3])))))))
facdc750
RS
2036 ch = simple_translate[*ptr];
2037 else
2038 ch = *ptr;
2039 if (pat[i] != ch)
2040 break;
2041 }
2042 /* Above loop has moved POS_BYTE part or all the way
2043 back to the first pos (last pos if reverse).
2044 Set it once again at the last (first if reverse) char. */
f4646fff 2045 pos_byte += dirlen - i - direction;
facdc750
RS
2046 if (i + direction == 0)
2047 {
d311d28c 2048 ptrdiff_t position, start, end;
facdc750 2049 pos_byte -= direction;
1113d9db 2050
facdc750 2051 position = pos_byte + ((direction > 0) ? 1 - len_byte : 0);
facdc750 2052 set_search_regs (position, len_byte);
ca325161 2053
ef887810
RS
2054 if (NILP (Vinhibit_changing_match_data))
2055 {
2056 start = search_regs.start[0];
2057 end = search_regs.end[0];
2058 }
2059 else
2060 /* If Vinhibit_changing_match_data is non-nil,
2061 search_regs will not be changed. So let's
2062 compute start and end here. */
2063 {
2064 start = BYTE_TO_CHAR (position);
2065 end = BYTE_TO_CHAR (position + len_byte);
2066 }
2067
facdc750
RS
2068 if ((n -= direction) != 0)
2069 pos_byte += dirlen; /* to resume search */
ca1d1d23 2070 else
ef887810 2071 return direction > 0 ? end : start;
ca1d1d23 2072 }
facdc750
RS
2073 else
2074 pos_byte += stride_for_teases;
2075 }
2076 }
2077 /* We have done one clump. Can we continue? */
2078 if ((lim_byte - pos_byte) * direction < 0)
2079 return ((0 - n) * direction);
ca1d1d23 2080 }
facdc750 2081 return BYTE_TO_CHAR (pos_byte);
ca1d1d23 2082}
ca325161 2083
fa8ed3e0 2084/* Record beginning BEG_BYTE and end BEG_BYTE + NBYTES
a7e4cdde
RS
2085 for the overall match just found in the current buffer.
2086 Also clear out the match data for registers 1 and up. */
ca325161
RS
2087
2088static void
d311d28c 2089set_search_regs (ptrdiff_t beg_byte, ptrdiff_t nbytes)
ca325161 2090{
d311d28c 2091 ptrdiff_t i;
a7e4cdde 2092
ef887810
RS
2093 if (!NILP (Vinhibit_changing_match_data))
2094 return;
2095
ca325161
RS
2096 /* Make sure we have registers in which to store
2097 the match position. */
2098 if (search_regs.num_regs == 0)
2099 {
23f86fce
DA
2100 search_regs.start = xmalloc (2 * sizeof (regoff_t));
2101 search_regs.end = xmalloc (2 * sizeof (regoff_t));
487282dc 2102 search_regs.num_regs = 2;
ca325161
RS
2103 }
2104
a7e4cdde
RS
2105 /* Clear out the other registers. */
2106 for (i = 1; i < search_regs.num_regs; i++)
2107 {
2108 search_regs.start[i] = -1;
2109 search_regs.end[i] = -1;
2110 }
2111
fa8ed3e0
RS
2112 search_regs.start[0] = BYTE_TO_CHAR (beg_byte);
2113 search_regs.end[0] = BYTE_TO_CHAR (beg_byte + nbytes);
a3668d92 2114 XSETBUFFER (last_thing_searched, current_buffer);
ca325161 2115}
ca1d1d23 2116\f
ca1d1d23 2117DEFUN ("search-backward", Fsearch_backward, Ssearch_backward, 1, 4,
8c1a1077
PJ
2118 "MSearch backward: ",
2119 doc: /* Search backward from point for STRING.
2120Set point to the beginning of the occurrence found, and return point.
2121An optional second argument bounds the search; it is a buffer position.
2122The match found must not extend before that position.
2123Optional third argument, if t, means if fail just return nil (no error).
2124 If not nil and not t, position at limit of search and return nil.
acc28cb9
CY
2125Optional fourth argument COUNT, if non-nil, means to search for COUNT
2126 successive occurrences. If COUNT is negative, search forward,
2127 instead of backward, for -COUNT occurrences.
8c1a1077
PJ
2128
2129Search case-sensitivity is determined by the value of the variable
2130`case-fold-search', which see.
2131
2132See also the functions `match-beginning', `match-end' and `replace-match'. */)
5842a27b 2133 (Lisp_Object string, Lisp_Object bound, Lisp_Object noerror, Lisp_Object count)
ca1d1d23 2134{
b819a390 2135 return search_command (string, bound, noerror, count, -1, 0, 0);
ca1d1d23
JB
2136}
2137
6af43974 2138DEFUN ("search-forward", Fsearch_forward, Ssearch_forward, 1, 4, "MSearch: ",
8c1a1077
PJ
2139 doc: /* Search forward from point for STRING.
2140Set point to the end of the occurrence found, and return point.
2141An optional second argument bounds the search; it is a buffer position.
d6aacdcd 2142The match found must not extend after that position. A value of nil is
48740f01 2143 equivalent to (point-max).
8c1a1077
PJ
2144Optional third argument, if t, means if fail just return nil (no error).
2145 If not nil and not t, move to limit of search and return nil.
acc28cb9
CY
2146Optional fourth argument COUNT, if non-nil, means to search for COUNT
2147 successive occurrences. If COUNT is negative, search backward,
2148 instead of forward, for -COUNT occurrences.
8c1a1077
PJ
2149
2150Search case-sensitivity is determined by the value of the variable
2151`case-fold-search', which see.
2152
2153See also the functions `match-beginning', `match-end' and `replace-match'. */)
5842a27b 2154 (Lisp_Object string, Lisp_Object bound, Lisp_Object noerror, Lisp_Object count)
ca1d1d23 2155{
b819a390 2156 return search_command (string, bound, noerror, count, 1, 0, 0);
ca1d1d23
JB
2157}
2158
ca1d1d23 2159DEFUN ("re-search-backward", Fre_search_backward, Sre_search_backward, 1, 4,
8c1a1077
PJ
2160 "sRE search backward: ",
2161 doc: /* Search backward from point for match for regular expression REGEXP.
2162Set point to the beginning of the match, and return point.
2163The match found is the one starting last in the buffer
2164and yet ending before the origin of the search.
2165An optional second argument bounds the search; it is a buffer position.
2166The match found must start at or after that position.
2167Optional third argument, if t, means if fail just return nil (no error).
2168 If not nil and not t, move to limit of search and return nil.
2169Optional fourth argument is repeat count--search for successive occurrences.
49080b10
LMI
2170
2171Search case-sensitivity is determined by the value of the variable
2172`case-fold-search', which see.
2173
8c1a1077
PJ
2174See also the functions `match-beginning', `match-end', `match-string',
2175and `replace-match'. */)
5842a27b 2176 (Lisp_Object regexp, Lisp_Object bound, Lisp_Object noerror, Lisp_Object count)
ca1d1d23 2177{
b819a390 2178 return search_command (regexp, bound, noerror, count, -1, 1, 0);
ca1d1d23
JB
2179}
2180
2181DEFUN ("re-search-forward", Fre_search_forward, Sre_search_forward, 1, 4,
8c1a1077
PJ
2182 "sRE search: ",
2183 doc: /* Search forward from point for regular expression REGEXP.
2184Set point to the end of the occurrence found, and return point.
2185An optional second argument bounds the search; it is a buffer position.
2186The match found must not extend after that position.
2187Optional third argument, if t, means if fail just return nil (no error).
2188 If not nil and not t, move to limit of search and return nil.
2189Optional fourth argument is repeat count--search for successive occurrences.
49080b10
LMI
2190
2191Search case-sensitivity is determined by the value of the variable
2192`case-fold-search', which see.
2193
8c1a1077
PJ
2194See also the functions `match-beginning', `match-end', `match-string',
2195and `replace-match'. */)
5842a27b 2196 (Lisp_Object regexp, Lisp_Object bound, Lisp_Object noerror, Lisp_Object count)
ca1d1d23 2197{
b819a390
RS
2198 return search_command (regexp, bound, noerror, count, 1, 1, 0);
2199}
2200
2201DEFUN ("posix-search-backward", Fposix_search_backward, Sposix_search_backward, 1, 4,
8c1a1077
PJ
2202 "sPosix search backward: ",
2203 doc: /* Search backward from point for match for regular expression REGEXP.
2204Find the longest match in accord with Posix regular expression rules.
2205Set point to the beginning of the match, and return point.
2206The match found is the one starting last in the buffer
2207and yet ending before the origin of the search.
2208An optional second argument bounds the search; it is a buffer position.
2209The match found must start at or after that position.
2210Optional third argument, if t, means if fail just return nil (no error).
2211 If not nil and not t, move to limit of search and return nil.
2212Optional fourth argument is repeat count--search for successive occurrences.
49080b10
LMI
2213
2214Search case-sensitivity is determined by the value of the variable
2215`case-fold-search', which see.
2216
8c1a1077
PJ
2217See also the functions `match-beginning', `match-end', `match-string',
2218and `replace-match'. */)
5842a27b 2219 (Lisp_Object regexp, Lisp_Object bound, Lisp_Object noerror, Lisp_Object count)
b819a390
RS
2220{
2221 return search_command (regexp, bound, noerror, count, -1, 1, 1);
2222}
2223
2224DEFUN ("posix-search-forward", Fposix_search_forward, Sposix_search_forward, 1, 4,
8c1a1077
PJ
2225 "sPosix search: ",
2226 doc: /* Search forward from point for regular expression REGEXP.
2227Find the longest match in accord with Posix regular expression rules.
2228Set point to the end of the occurrence found, and return point.
2229An optional second argument bounds the search; it is a buffer position.
2230The match found must not extend after that position.
2231Optional third argument, if t, means if fail just return nil (no error).
2232 If not nil and not t, move to limit of search and return nil.
2233Optional fourth argument is repeat count--search for successive occurrences.
49080b10
LMI
2234
2235Search case-sensitivity is determined by the value of the variable
2236`case-fold-search', which see.
2237
8c1a1077
PJ
2238See also the functions `match-beginning', `match-end', `match-string',
2239and `replace-match'. */)
5842a27b 2240 (Lisp_Object regexp, Lisp_Object bound, Lisp_Object noerror, Lisp_Object count)
b819a390
RS
2241{
2242 return search_command (regexp, bound, noerror, count, 1, 1, 1);
ca1d1d23
JB
2243}
2244\f
d7a5ad5f 2245DEFUN ("replace-match", Freplace_match, Sreplace_match, 1, 5, 0,
8c1a1077 2246 doc: /* Replace text matched by last search with NEWTEXT.
4dd0c271
RS
2247Leave point at the end of the replacement text.
2248
c88b867f
CY
2249If optional second arg FIXEDCASE is non-nil, do not alter the case of
2250the replacement text. Otherwise, maybe capitalize the whole text, or
2251maybe just word initials, based on the replaced text. If the replaced
2252text has only capital letters and has at least one multiletter word,
2253convert NEWTEXT to all caps. Otherwise if all words are capitalized
2254in the replaced text, capitalize each word in NEWTEXT.
2255
2256If optional third arg LITERAL is non-nil, insert NEWTEXT literally.
8c1a1077
PJ
2257Otherwise treat `\\' as special:
2258 `\\&' in NEWTEXT means substitute original matched text.
2259 `\\N' means substitute what matched the Nth `\\(...\\)'.
2260 If Nth parens didn't match, substitute nothing.
2261 `\\\\' means insert one `\\'.
d32e47af
LM
2262 `\\?' is treated literally
2263 (for compatibility with `query-replace-regexp').
2264 Any other character following `\\' signals an error.
4dd0c271
RS
2265Case conversion does not apply to these substitutions.
2266
c88b867f
CY
2267If optional fourth argument STRING is non-nil, it should be a string
2268to act on; this should be the string on which the previous match was
2269done via `string-match'. In this case, `replace-match' creates and
2270returns a new string, made by copying STRING and replacing the part of
2271STRING that was matched (the original STRING itself is not altered).
8c1a1077
PJ
2272
2273The optional fifth argument SUBEXP specifies a subexpression;
2274it says to replace just that subexpression with NEWTEXT,
2275rather than replacing the entire matched text.
2276This is, in a vague sense, the inverse of using `\\N' in NEWTEXT;
2277`\\N' copies subexp N into NEWTEXT, but using N as SUBEXP puts
2278NEWTEXT in place of subexp N.
2279This is useful only after a regular expression search or match,
2280since only regular expressions have distinguished subexpressions. */)
5842a27b 2281 (Lisp_Object newtext, Lisp_Object fixedcase, Lisp_Object literal, Lisp_Object string, Lisp_Object subexp)
ca1d1d23
JB
2282{
2283 enum { nochange, all_caps, cap_initial } case_action;
d311d28c 2284 register ptrdiff_t pos, pos_byte;
ca1d1d23 2285 int some_multiletter_word;
97832bd0 2286 int some_lowercase;
73dc8771 2287 int some_uppercase;
208767c3 2288 int some_nonuppercase_initial;
ca1d1d23 2289 register int c, prevc;
a0efffc8 2290 ptrdiff_t sub;
d311d28c 2291 ptrdiff_t opoint, newpoint;
ca1d1d23 2292
b7826503 2293 CHECK_STRING (newtext);
ca1d1d23 2294
080c45fd 2295 if (! NILP (string))
b7826503 2296 CHECK_STRING (string);
080c45fd 2297
ca1d1d23
JB
2298 case_action = nochange; /* We tried an initialization */
2299 /* but some C compilers blew it */
4746118a
JB
2300
2301 if (search_regs.num_regs <= 0)
d72cdbfc 2302 error ("`replace-match' called before any match found");
4746118a 2303
d7a5ad5f
RS
2304 if (NILP (subexp))
2305 sub = 0;
2306 else
2307 {
b7826503 2308 CHECK_NUMBER (subexp);
a0efffc8 2309 if (! (0 <= XINT (subexp) && XINT (subexp) < search_regs.num_regs))
d7a5ad5f 2310 args_out_of_range (subexp, make_number (search_regs.num_regs));
a0efffc8 2311 sub = XINT (subexp);
d7a5ad5f
RS
2312 }
2313
080c45fd
RS
2314 if (NILP (string))
2315 {
d7a5ad5f
RS
2316 if (search_regs.start[sub] < BEGV
2317 || search_regs.start[sub] > search_regs.end[sub]
2318 || search_regs.end[sub] > ZV)
2319 args_out_of_range (make_number (search_regs.start[sub]),
2320 make_number (search_regs.end[sub]));
080c45fd
RS
2321 }
2322 else
2323 {
d7a5ad5f
RS
2324 if (search_regs.start[sub] < 0
2325 || search_regs.start[sub] > search_regs.end[sub]
d5db4077 2326 || search_regs.end[sub] > SCHARS (string))
d7a5ad5f
RS
2327 args_out_of_range (make_number (search_regs.start[sub]),
2328 make_number (search_regs.end[sub]));
080c45fd 2329 }
ca1d1d23
JB
2330
2331 if (NILP (fixedcase))
2332 {
2333 /* Decide how to casify by examining the matched text. */
d311d28c 2334 ptrdiff_t last;
ca1d1d23 2335
ac3b28b1
KH
2336 pos = search_regs.start[sub];
2337 last = search_regs.end[sub];
fa8ed3e0
RS
2338
2339 if (NILP (string))
ac3b28b1 2340 pos_byte = CHAR_TO_BYTE (pos);
fa8ed3e0 2341 else
ac3b28b1 2342 pos_byte = string_char_to_byte (string, pos);
fa8ed3e0 2343
ca1d1d23
JB
2344 prevc = '\n';
2345 case_action = all_caps;
2346
2347 /* some_multiletter_word is set nonzero if any original word
2348 is more than one letter long. */
2349 some_multiletter_word = 0;
97832bd0 2350 some_lowercase = 0;
208767c3 2351 some_nonuppercase_initial = 0;
73dc8771 2352 some_uppercase = 0;
ca1d1d23 2353
ac3b28b1 2354 while (pos < last)
ca1d1d23 2355 {
080c45fd 2356 if (NILP (string))
ac3b28b1 2357 {
93daa011 2358 c = FETCH_CHAR_AS_MULTIBYTE (pos_byte);
ac3b28b1
KH
2359 INC_BOTH (pos, pos_byte);
2360 }
080c45fd 2361 else
93daa011 2362 FETCH_STRING_CHAR_AS_MULTIBYTE_ADVANCE (c, string, pos, pos_byte);
080c45fd 2363
5da9919f 2364 if (lowercasep (c))
ca1d1d23
JB
2365 {
2366 /* Cannot be all caps if any original char is lower case */
2367
97832bd0 2368 some_lowercase = 1;
ca1d1d23 2369 if (SYNTAX (prevc) != Sword)
208767c3 2370 some_nonuppercase_initial = 1;
ca1d1d23
JB
2371 else
2372 some_multiletter_word = 1;
2373 }
5da9919f 2374 else if (uppercasep (c))
ca1d1d23 2375 {
73dc8771 2376 some_uppercase = 1;
97832bd0 2377 if (SYNTAX (prevc) != Sword)
c4d460ce 2378 ;
97832bd0 2379 else
ca1d1d23
JB
2380 some_multiletter_word = 1;
2381 }
208767c3
RS
2382 else
2383 {
2384 /* If the initial is a caseless word constituent,
2385 treat that like a lowercase initial. */
2386 if (SYNTAX (prevc) != Sword)
2387 some_nonuppercase_initial = 1;
2388 }
ca1d1d23
JB
2389
2390 prevc = c;
2391 }
2392
97832bd0
RS
2393 /* Convert to all caps if the old text is all caps
2394 and has at least one multiletter word. */
2395 if (! some_lowercase && some_multiletter_word)
2396 case_action = all_caps;
c4d460ce 2397 /* Capitalize each word, if the old text has all capitalized words. */
208767c3 2398 else if (!some_nonuppercase_initial && some_multiletter_word)
ca1d1d23 2399 case_action = cap_initial;
208767c3 2400 else if (!some_nonuppercase_initial && some_uppercase)
73dc8771
KH
2401 /* Should x -> yz, operating on X, give Yz or YZ?
2402 We'll assume the latter. */
2403 case_action = all_caps;
97832bd0
RS
2404 else
2405 case_action = nochange;
ca1d1d23
JB
2406 }
2407
080c45fd
RS
2408 /* Do replacement in a string. */
2409 if (!NILP (string))
2410 {
2411 Lisp_Object before, after;
2412
2413 before = Fsubstring (string, make_number (0),
d7a5ad5f
RS
2414 make_number (search_regs.start[sub]));
2415 after = Fsubstring (string, make_number (search_regs.end[sub]), Qnil);
080c45fd 2416
636a5e28
RS
2417 /* Substitute parts of the match into NEWTEXT
2418 if desired. */
080c45fd
RS
2419 if (NILP (literal))
2420 {
d311d28c
PE
2421 ptrdiff_t lastpos = 0;
2422 ptrdiff_t lastpos_byte = 0;
080c45fd
RS
2423 /* We build up the substituted string in ACCUM. */
2424 Lisp_Object accum;
2425 Lisp_Object middle;
d311d28c 2426 ptrdiff_t length = SBYTES (newtext);
080c45fd
RS
2427
2428 accum = Qnil;
2429
ac3b28b1 2430 for (pos_byte = 0, pos = 0; pos_byte < length;)
080c45fd 2431 {
d311d28c
PE
2432 ptrdiff_t substart = -1;
2433 ptrdiff_t subend = 0;
1e79ec24 2434 int delbackslash = 0;
080c45fd 2435
0c8533c6
RS
2436 FETCH_STRING_CHAR_ADVANCE (c, newtext, pos, pos_byte);
2437
080c45fd
RS
2438 if (c == '\\')
2439 {
0c8533c6 2440 FETCH_STRING_CHAR_ADVANCE (c, newtext, pos, pos_byte);
177c0ea7 2441
080c45fd
RS
2442 if (c == '&')
2443 {
d7a5ad5f
RS
2444 substart = search_regs.start[sub];
2445 subend = search_regs.end[sub];
080c45fd 2446 }
76fc1ea2 2447 else if (c >= '1' && c <= '9')
080c45fd 2448 {
d311d28c
PE
2449 if (c - '0' < search_regs.num_regs
2450 && 0 <= search_regs.start[c - '0'])
080c45fd
RS
2451 {
2452 substart = search_regs.start[c - '0'];
2453 subend = search_regs.end[c - '0'];
2454 }
76fc1ea2
KH
2455 else
2456 {
2457 /* If that subexp did not match,
2458 replace \\N with nothing. */
2459 substart = 0;
2460 subend = 0;
2461 }
080c45fd 2462 }
1e79ec24
KH
2463 else if (c == '\\')
2464 delbackslash = 1;
d32e47af 2465 else if (c != '?')
636a5e28 2466 error ("Invalid use of `\\' in replacement text");
080c45fd
RS
2467 }
2468 if (substart >= 0)
2469 {
d131e79c
RS
2470 if (pos - 2 != lastpos)
2471 middle = substring_both (newtext, lastpos,
2472 lastpos_byte,
2473 pos - 2, pos_byte - 2);
080c45fd
RS
2474 else
2475 middle = Qnil;
2476 accum = concat3 (accum, middle,
0c8533c6
RS
2477 Fsubstring (string,
2478 make_number (substart),
080c45fd
RS
2479 make_number (subend)));
2480 lastpos = pos;
0c8533c6 2481 lastpos_byte = pos_byte;
080c45fd 2482 }
1e79ec24
KH
2483 else if (delbackslash)
2484 {
d131e79c
RS
2485 middle = substring_both (newtext, lastpos,
2486 lastpos_byte,
2487 pos - 1, pos_byte - 1);
0c8533c6 2488
1e79ec24
KH
2489 accum = concat2 (accum, middle);
2490 lastpos = pos;
0c8533c6 2491 lastpos_byte = pos_byte;
1e79ec24 2492 }
080c45fd
RS
2493 }
2494
d131e79c
RS
2495 if (pos != lastpos)
2496 middle = substring_both (newtext, lastpos,
2497 lastpos_byte,
0c8533c6 2498 pos, pos_byte);
080c45fd
RS
2499 else
2500 middle = Qnil;
2501
2502 newtext = concat2 (accum, middle);
2503 }
2504
636a5e28 2505 /* Do case substitution in NEWTEXT if desired. */
080c45fd
RS
2506 if (case_action == all_caps)
2507 newtext = Fupcase (newtext);
2508 else if (case_action == cap_initial)
2b2eead9 2509 newtext = Fupcase_initials (newtext);
080c45fd
RS
2510
2511 return concat3 (before, newtext, after);
2512 }
2513
09c4719e 2514 /* Record point, then move (quietly) to the start of the match. */
9160906f 2515 if (PT >= search_regs.end[sub])
b0eba991 2516 opoint = PT - ZV;
9160906f
RS
2517 else if (PT > search_regs.start[sub])
2518 opoint = search_regs.end[sub] - ZV;
b0eba991
RS
2519 else
2520 opoint = PT;
2521
886ed6ec
RS
2522 /* If we want non-literal replacement,
2523 perform substitution on the replacement string. */
2524 if (NILP (literal))
ca1d1d23 2525 {
0065d054 2526 ptrdiff_t length = SBYTES (newtext);
68e69fbd 2527 unsigned char *substed;
0065d054 2528 ptrdiff_t substed_alloc_size, substed_len;
0063fdb1
DA
2529 bool buf_multibyte = !NILP (BVAR (current_buffer, enable_multibyte_characters));
2530 bool str_multibyte = STRING_MULTIBYTE (newtext);
886ed6ec 2531 int really_changed = 0;
3bc25e52 2532
0065d054
PE
2533 substed_alloc_size = ((STRING_BYTES_BOUND - 100) / 2 < length
2534 ? STRING_BYTES_BOUND
2535 : length * 2 + 100);
23f86fce 2536 substed = xmalloc (substed_alloc_size);
68e69fbd
RS
2537 substed_len = 0;
2538
3bc25e52
KH
2539 /* Go thru NEWTEXT, producing the actual text to insert in
2540 SUBSTED while adjusting multibyteness to that of the current
2541 buffer. */
ca1d1d23 2542
ac3b28b1 2543 for (pos_byte = 0, pos = 0; pos_byte < length;)
ca1d1d23 2544 {
68e69fbd 2545 unsigned char str[MAX_MULTIBYTE_LENGTH];
8ea90aa3 2546 const unsigned char *add_stuff = NULL;
0065d054 2547 ptrdiff_t add_len = 0;
a0efffc8 2548 ptrdiff_t idx = -1;
9a76659d 2549
3bc25e52
KH
2550 if (str_multibyte)
2551 {
eb99a8dd 2552 FETCH_STRING_CHAR_ADVANCE_NO_CHECK (c, newtext, pos, pos_byte);
3bc25e52 2553 if (!buf_multibyte)
461c2ab9 2554 c = multibyte_char_to_unibyte (c);
3bc25e52
KH
2555 }
2556 else
2557 {
2558 /* Note that we don't have to increment POS. */
5d69fe10 2559 c = SREF (newtext, pos_byte++);
3bc25e52 2560 if (buf_multibyte)
4c0354d7 2561 MAKE_CHAR_MULTIBYTE (c);
3bc25e52 2562 }
ac3b28b1 2563
68e69fbd
RS
2564 /* Either set ADD_STUFF and ADD_LEN to the text to put in SUBSTED,
2565 or set IDX to a match index, which means put that part
2566 of the buffer text into SUBSTED. */
2567
ca1d1d23
JB
2568 if (c == '\\')
2569 {
886ed6ec
RS
2570 really_changed = 1;
2571
3bc25e52
KH
2572 if (str_multibyte)
2573 {
eb99a8dd
KH
2574 FETCH_STRING_CHAR_ADVANCE_NO_CHECK (c, newtext,
2575 pos, pos_byte);
071ce769 2576 if (!buf_multibyte && !ASCII_CHAR_P (c))
461c2ab9 2577 c = multibyte_char_to_unibyte (c);
3bc25e52
KH
2578 }
2579 else
2580 {
d5db4077 2581 c = SREF (newtext, pos_byte++);
3bc25e52 2582 if (buf_multibyte)
4c0354d7 2583 MAKE_CHAR_MULTIBYTE (c);
3bc25e52
KH
2584 }
2585
ca1d1d23 2586 if (c == '&')
68e69fbd 2587 idx = sub;
d311d28c 2588 else if (c >= '1' && c <= '9' && c - '0' < search_regs.num_regs)
ca1d1d23
JB
2589 {
2590 if (search_regs.start[c - '0'] >= 1)
68e69fbd 2591 idx = c - '0';
ca1d1d23 2592 }
636a5e28 2593 else if (c == '\\')
a7e979a4 2594 add_len = 1, add_stuff = (unsigned char *) "\\";
636a5e28 2595 else
3bc25e52
KH
2596 {
2597 xfree (substed);
2598 error ("Invalid use of `\\' in replacement text");
2599 }
ca1d1d23
JB
2600 }
2601 else
68e69fbd
RS
2602 {
2603 add_len = CHAR_STRING (c, str);
2604 add_stuff = str;
2605 }
2606
2607 /* If we want to copy part of a previous match,
2608 set up ADD_STUFF and ADD_LEN to point to it. */
2609 if (idx >= 0)
2610 {
0065d054 2611 ptrdiff_t begbyte = CHAR_TO_BYTE (search_regs.start[idx]);
68e69fbd
RS
2612 add_len = CHAR_TO_BYTE (search_regs.end[idx]) - begbyte;
2613 if (search_regs.start[idx] < GPT && GPT < search_regs.end[idx])
13002885 2614 move_gap_both (search_regs.start[idx], begbyte);
68e69fbd
RS
2615 add_stuff = BYTE_POS_ADDR (begbyte);
2616 }
2617
2618 /* Now the stuff we want to add to SUBSTED
2619 is invariably ADD_LEN bytes starting at ADD_STUFF. */
2620
2621 /* Make sure SUBSTED is big enough. */
0065d054
PE
2622 if (substed_alloc_size - substed_len < add_len)
2623 substed =
2624 xpalloc (substed, &substed_alloc_size,
2625 add_len - (substed_alloc_size - substed_len),
2626 STRING_BYTES_BOUND, 1);
68e69fbd
RS
2627
2628 /* Now add to the end of SUBSTED. */
f8ce8a0d
GM
2629 if (add_stuff)
2630 {
72af86bd 2631 memcpy (substed + substed_len, add_stuff, add_len);
f8ce8a0d
GM
2632 substed_len += add_len;
2633 }
ca1d1d23 2634 }
68e69fbd 2635
886ed6ec 2636 if (really_changed)
76fc1ea2
KH
2637 {
2638 if (buf_multibyte)
2639 {
d311d28c 2640 ptrdiff_t nchars =
c098fdb8 2641 multibyte_chars_in_text (substed, substed_len);
68e69fbd 2642
a7e979a4
PE
2643 newtext = make_multibyte_string ((char *) substed, nchars,
2644 substed_len);
76fc1ea2
KH
2645 }
2646 else
a7e979a4 2647 newtext = make_unibyte_string ((char *) substed, substed_len);
76fc1ea2 2648 }
68e69fbd 2649 xfree (substed);
ca1d1d23
JB
2650 }
2651
886ed6ec
RS
2652 /* Replace the old text with the new in the cleanest possible way. */
2653 replace_range (search_regs.start[sub], search_regs.end[sub],
2654 newtext, 1, 0, 1);
d5db4077 2655 newpoint = search_regs.start[sub] + SCHARS (newtext);
ca1d1d23
JB
2656
2657 if (case_action == all_caps)
886ed6ec
RS
2658 Fupcase_region (make_number (search_regs.start[sub]),
2659 make_number (newpoint));
ca1d1d23 2660 else if (case_action == cap_initial)
886ed6ec
RS
2661 Fupcase_initials_region (make_number (search_regs.start[sub]),
2662 make_number (newpoint));
3e18eecf 2663
98e942e0
RS
2664 /* Adjust search data for this change. */
2665 {
d311d28c
PE
2666 ptrdiff_t oldend = search_regs.end[sub];
2667 ptrdiff_t oldstart = search_regs.start[sub];
2668 ptrdiff_t change = newpoint - search_regs.end[sub];
2669 ptrdiff_t i;
98e942e0
RS
2670
2671 for (i = 0; i < search_regs.num_regs; i++)
2672 {
41c01205 2673 if (search_regs.start[i] >= oldend)
98e942e0 2674 search_regs.start[i] += change;
41c01205
DK
2675 else if (search_regs.start[i] > oldstart)
2676 search_regs.start[i] = oldstart;
2677 if (search_regs.end[i] >= oldend)
98e942e0 2678 search_regs.end[i] += change;
41c01205
DK
2679 else if (search_regs.end[i] > oldstart)
2680 search_regs.end[i] = oldstart;
98e942e0
RS
2681 }
2682 }
2683
b0eba991 2684 /* Put point back where it was in the text. */
8d808a65 2685 if (opoint <= 0)
fa8ed3e0 2686 TEMP_SET_PT (opoint + ZV);
b0eba991 2687 else
fa8ed3e0 2688 TEMP_SET_PT (opoint);
b0eba991
RS
2689
2690 /* Now move point "officially" to the start of the inserted replacement. */
3e18eecf 2691 move_if_not_intangible (newpoint);
177c0ea7 2692
ca1d1d23
JB
2693 return Qnil;
2694}
2695\f
2696static Lisp_Object
971de7fb 2697match_limit (Lisp_Object num, int beginningp)
ca1d1d23 2698{
a0efffc8 2699 EMACS_INT n;
ca1d1d23 2700
b7826503 2701 CHECK_NUMBER (num);
ca1d1d23 2702 n = XINT (num);
f90a5bf5 2703 if (n < 0)
bd2cbd56 2704 args_out_of_range (num, make_number (0));
f90a5bf5
RS
2705 if (search_regs.num_regs <= 0)
2706 error ("No match data, because no search succeeded");
9b9ceb61 2707 if (n >= search_regs.num_regs
4746118a 2708 || search_regs.start[n] < 0)
ca1d1d23
JB
2709 return Qnil;
2710 return (make_number ((beginningp) ? search_regs.start[n]
2711 : search_regs.end[n]));
2712}
2713
a7ca3326 2714DEFUN ("match-beginning", Fmatch_beginning, Smatch_beginning, 1, 1, 0,
8c1a1077
PJ
2715 doc: /* Return position of start of text matched by last search.
2716SUBEXP, a number, specifies which parenthesized expression in the last
2717 regexp.
2718Value is nil if SUBEXPth pair didn't match, or there were less than
2719 SUBEXP pairs.
2720Zero means the entire text matched by the whole regexp or whole string. */)
5842a27b 2721 (Lisp_Object subexp)
ca1d1d23 2722{
5806161b 2723 return match_limit (subexp, 1);
ca1d1d23
JB
2724}
2725
a7ca3326 2726DEFUN ("match-end", Fmatch_end, Smatch_end, 1, 1, 0,
8c1a1077
PJ
2727 doc: /* Return position of end of text matched by last search.
2728SUBEXP, a number, specifies which parenthesized expression in the last
2729 regexp.
2730Value is nil if SUBEXPth pair didn't match, or there were less than
2731 SUBEXP pairs.
2732Zero means the entire text matched by the whole regexp or whole string. */)
5842a27b 2733 (Lisp_Object subexp)
ca1d1d23 2734{
5806161b 2735 return match_limit (subexp, 0);
177c0ea7 2736}
ca1d1d23 2737
a7ca3326 2738DEFUN ("match-data", Fmatch_data, Smatch_data, 0, 3, 0,
8c1a1077
PJ
2739 doc: /* Return a list containing all info on what the last search matched.
2740Element 2N is `(match-beginning N)'; element 2N + 1 is `(match-end N)'.
2741All the elements are markers or nil (nil if the Nth pair didn't match)
2742if the last match was on a buffer; integers or nil if a string was matched.
febb8aba 2743Use `set-match-data' to reinstate the data in this list.
8c1a1077 2744
41c01205
DK
2745If INTEGERS (the optional first argument) is non-nil, always use
2746integers \(rather than markers) to represent buffer positions. In
2747this case, and if the last match was in a buffer, the buffer will get
2748stored as one additional element at the end of the list.
2749
abd0071c
KS
2750If REUSE is a list, reuse it as part of the value. If REUSE is long
2751enough to hold all the values, and if INTEGERS is non-nil, no consing
2752is done.
2753
2754If optional third arg RESEAT is non-nil, any previous markers on the
2755REUSE list will be modified to point to nowhere.
2756
140a6b7e 2757Return value is undefined if the last search failed. */)
5842a27b 2758 (Lisp_Object integers, Lisp_Object reuse, Lisp_Object reseat)
ca1d1d23 2759{
56256c2a 2760 Lisp_Object tail, prev;
4746118a 2761 Lisp_Object *data;
d311d28c 2762 ptrdiff_t i, len;
ca1d1d23 2763
abd0071c
KS
2764 if (!NILP (reseat))
2765 for (tail = reuse; CONSP (tail); tail = XCDR (tail))
2766 if (MARKERP (XCAR (tail)))
2767 {
51f10faa 2768 unchain_marker (XMARKER (XCAR (tail)));
abd0071c
KS
2769 XSETCAR (tail, Qnil);
2770 }
2771
daa37602 2772 if (NILP (last_thing_searched))
c36bcf1b 2773 return Qnil;
daa37602 2774
6bbd7a29
GM
2775 prev = Qnil;
2776
38182d90 2777 data = alloca ((2 * search_regs.num_regs + 1) * sizeof *data);
4746118a 2778
41c01205 2779 len = 0;
4746118a 2780 for (i = 0; i < search_regs.num_regs; i++)
ca1d1d23 2781 {
d311d28c 2782 ptrdiff_t start = search_regs.start[i];
ca1d1d23
JB
2783 if (start >= 0)
2784 {
56256c2a
RS
2785 if (EQ (last_thing_searched, Qt)
2786 || ! NILP (integers))
ca1d1d23 2787 {
c235cce7
KH
2788 XSETFASTINT (data[2 * i], start);
2789 XSETFASTINT (data[2 * i + 1], search_regs.end[i]);
ca1d1d23 2790 }
0ed62dc7 2791 else if (BUFFERP (last_thing_searched))
ca1d1d23
JB
2792 {
2793 data[2 * i] = Fmake_marker ();
daa37602
JB
2794 Fset_marker (data[2 * i],
2795 make_number (start),
2796 last_thing_searched);
ca1d1d23
JB
2797 data[2 * i + 1] = Fmake_marker ();
2798 Fset_marker (data[2 * i + 1],
177c0ea7 2799 make_number (search_regs.end[i]),
daa37602 2800 last_thing_searched);
ca1d1d23 2801 }
daa37602
JB
2802 else
2803 /* last_thing_searched must always be Qt, a buffer, or Qnil. */
1088b922 2804 emacs_abort ();
daa37602 2805
abd0071c 2806 len = 2 * i + 2;
ca1d1d23
JB
2807 }
2808 else
abd0071c 2809 data[2 * i] = data[2 * i + 1] = Qnil;
ca1d1d23 2810 }
56256c2a 2811
bd2cbd56 2812 if (BUFFERP (last_thing_searched) && !NILP (integers))
41c01205 2813 {
bd2cbd56 2814 data[len] = last_thing_searched;
41c01205
DK
2815 len++;
2816 }
2817
56256c2a
RS
2818 /* If REUSE is not usable, cons up the values and return them. */
2819 if (! CONSP (reuse))
41c01205 2820 return Flist (len, data);
56256c2a
RS
2821
2822 /* If REUSE is a list, store as many value elements as will fit
2823 into the elements of REUSE. */
2824 for (i = 0, tail = reuse; CONSP (tail);
c1d497be 2825 i++, tail = XCDR (tail))
56256c2a 2826 {
41c01205 2827 if (i < len)
f3fbd155 2828 XSETCAR (tail, data[i]);
56256c2a 2829 else
f3fbd155 2830 XSETCAR (tail, Qnil);
56256c2a
RS
2831 prev = tail;
2832 }
2833
2834 /* If we couldn't fit all value elements into REUSE,
2835 cons up the rest of them and add them to the end of REUSE. */
41c01205
DK
2836 if (i < len)
2837 XSETCDR (prev, Flist (len - i, data + i));
56256c2a
RS
2838
2839 return reuse;
ca1d1d23
JB
2840}
2841
b51d6c92
SM
2842/* We used to have an internal use variant of `reseat' described as:
2843
2844 If RESEAT is `evaporate', put the markers back on the free list
2845 immediately. No other references to the markers must exist in this
2846 case, so it is used only internally on the unwind stack and
2847 save-match-data from Lisp.
2848
2849 But it was ill-conceived: those supposedly-internal markers get exposed via
2850 the undo-list, so freeing them here is unsafe. */
ca1d1d23 2851
a7ca3326 2852DEFUN ("set-match-data", Fset_match_data, Sset_match_data, 1, 2, 0,
8c1a1077 2853 doc: /* Set internal data on last search match from elements of LIST.
abd0071c
KS
2854LIST should have been created by calling `match-data' previously.
2855
51f10faa 2856If optional arg RESEAT is non-nil, make markers on LIST point nowhere. */)
5842a27b 2857 (register Lisp_Object list, Lisp_Object reseat)
ca1d1d23 2858{
5f2ab479 2859 ptrdiff_t i;
ca1d1d23
JB
2860 register Lisp_Object marker;
2861
7074fde6
FP
2862 if (running_asynch_code)
2863 save_search_regs ();
2864
29100cea 2865 CHECK_LIST (list);
ca1d1d23 2866
41c01205
DK
2867 /* Unless we find a marker with a buffer or an explicit buffer
2868 in LIST, assume that this match data came from a string. */
daa37602
JB
2869 last_thing_searched = Qt;
2870
4746118a
JB
2871 /* Allocate registers if they don't already exist. */
2872 {
d311d28c 2873 EMACS_INT length = XFASTINT (Flength (list)) / 2;
4746118a
JB
2874
2875 if (length > search_regs.num_regs)
2876 {
0065d054 2877 ptrdiff_t num_regs = search_regs.num_regs;
d311d28c
PE
2878 if (PTRDIFF_MAX < length)
2879 memory_full (SIZE_MAX);
0065d054
PE
2880 search_regs.start =
2881 xpalloc (search_regs.start, &num_regs, length - num_regs,
2882 min (PTRDIFF_MAX, UINT_MAX), sizeof (regoff_t));
2883 search_regs.end =
2884 xrealloc (search_regs.end, num_regs * sizeof (regoff_t));
2885
2886 for (i = search_regs.num_regs; i < num_regs; i++)
e62371e9
KH
2887 search_regs.start[i] = -1;
2888
0065d054 2889 search_regs.num_regs = num_regs;
4746118a 2890 }
ca1d1d23 2891
abd0071c 2892 for (i = 0; CONSP (list); i++)
41c01205 2893 {
abd0071c 2894 marker = XCAR (list);
bd2cbd56 2895 if (BUFFERP (marker))
c3762cbd 2896 {
bd2cbd56 2897 last_thing_searched = marker;
c3762cbd
DK
2898 break;
2899 }
2900 if (i >= length)
2901 break;
41c01205
DK
2902 if (NILP (marker))
2903 {
2904 search_regs.start[i] = -1;
abd0071c 2905 list = XCDR (list);
41c01205
DK
2906 }
2907 else
2908 {
d311d28c 2909 Lisp_Object from;
abd0071c 2910 Lisp_Object m;
e2811828 2911
abd0071c 2912 m = marker;
41c01205
DK
2913 if (MARKERP (marker))
2914 {
2915 if (XMARKER (marker)->buffer == 0)
2916 XSETFASTINT (marker, 0);
2917 else
2918 XSETBUFFER (last_thing_searched, XMARKER (marker)->buffer);
2919 }
e2811828 2920
41c01205 2921 CHECK_NUMBER_COERCE_MARKER (marker);
d311d28c 2922 from = marker;
e2811828 2923
abd0071c
KS
2924 if (!NILP (reseat) && MARKERP (m))
2925 {
b51d6c92 2926 unchain_marker (XMARKER (m));
9ad54a7e 2927 XSETCAR (list, Qnil);
abd0071c
KS
2928 }
2929
2930 if ((list = XCDR (list), !CONSP (list)))
2931 break;
2932
2933 m = marker = XCAR (list);
2934
41c01205
DK
2935 if (MARKERP (marker) && XMARKER (marker)->buffer == 0)
2936 XSETFASTINT (marker, 0);
e2811828 2937
41c01205 2938 CHECK_NUMBER_COERCE_MARKER (marker);
0b1fccc4
PE
2939 if ((XINT (from) < 0
2940 ? TYPE_MINIMUM (regoff_t) <= XINT (from)
2941 : XINT (from) <= TYPE_MAXIMUM (regoff_t))
2942 && (XINT (marker) < 0
2943 ? TYPE_MINIMUM (regoff_t) <= XINT (marker)
2944 : XINT (marker) <= TYPE_MAXIMUM (regoff_t)))
d311d28c
PE
2945 {
2946 search_regs.start[i] = XINT (from);
2947 search_regs.end[i] = XINT (marker);
2948 }
2949 else
2950 {
2951 search_regs.start[i] = -1;
2952 }
abd0071c
KS
2953
2954 if (!NILP (reseat) && MARKERP (m))
2955 {
b51d6c92 2956 unchain_marker (XMARKER (m));
9ad54a7e 2957 XSETCAR (list, Qnil);
abd0071c 2958 }
41c01205 2959 }
abd0071c 2960 list = XCDR (list);
41c01205 2961 }
ca1d1d23 2962
41c01205
DK
2963 for (; i < search_regs.num_regs; i++)
2964 search_regs.start[i] = -1;
2965 }
ca1d1d23 2966
177c0ea7 2967 return Qnil;
ca1d1d23
JB
2968}
2969
7074fde6
FP
2970/* If non-zero the match data have been saved in saved_search_regs
2971 during the execution of a sentinel or filter. */
75ebf74b 2972static int search_regs_saved;
7074fde6 2973static struct re_registers saved_search_regs;
41c01205 2974static Lisp_Object saved_last_thing_searched;
7074fde6
FP
2975
2976/* Called from Flooking_at, Fstring_match, search_buffer, Fstore_match_data
2977 if asynchronous code (filter or sentinel) is running. */
2978static void
971de7fb 2979save_search_regs (void)
7074fde6
FP
2980{
2981 if (!search_regs_saved)
2982 {
2983 saved_search_regs.num_regs = search_regs.num_regs;
2984 saved_search_regs.start = search_regs.start;
2985 saved_search_regs.end = search_regs.end;
41c01205
DK
2986 saved_last_thing_searched = last_thing_searched;
2987 last_thing_searched = Qnil;
7074fde6 2988 search_regs.num_regs = 0;
2d4a771a
RS
2989 search_regs.start = 0;
2990 search_regs.end = 0;
7074fde6
FP
2991
2992 search_regs_saved = 1;
2993 }
2994}
2995
2996/* Called upon exit from filters and sentinels. */
2997void
971de7fb 2998restore_search_regs (void)
7074fde6
FP
2999{
3000 if (search_regs_saved)
3001 {
3002 if (search_regs.num_regs > 0)
3003 {
3004 xfree (search_regs.start);
3005 xfree (search_regs.end);
3006 }
3007 search_regs.num_regs = saved_search_regs.num_regs;
3008 search_regs.start = saved_search_regs.start;
3009 search_regs.end = saved_search_regs.end;
41c01205
DK
3010 last_thing_searched = saved_last_thing_searched;
3011 saved_last_thing_searched = Qnil;
7074fde6
FP
3012 search_regs_saved = 0;
3013 }
3014}
3015
abd0071c 3016static Lisp_Object
971de7fb 3017unwind_set_match_data (Lisp_Object list)
abd0071c 3018{
b51d6c92
SM
3019 /* It is NOT ALWAYS safe to free (evaporate) the markers immediately. */
3020 return Fset_match_data (list, Qt);
abd0071c
KS
3021}
3022
3023/* Called to unwind protect the match data. */
3024void
971de7fb 3025record_unwind_save_match_data (void)
abd0071c
KS
3026{
3027 record_unwind_protect (unwind_set_match_data,
3028 Fmatch_data (Qnil, Qnil, Qnil));
3029}
3030
e9a452d9 3031/* Quote a string to deactivate reg-expr chars */
ca1d1d23
JB
3032
3033DEFUN ("regexp-quote", Fregexp_quote, Sregexp_quote, 1, 1, 0,
8c1a1077 3034 doc: /* Return a regexp string which matches exactly STRING and nothing else. */)
5842a27b 3035 (Lisp_Object string)
ca1d1d23 3036{
a7e979a4
PE
3037 register char *in, *out, *end;
3038 register char *temp;
0c8533c6 3039 int backslashes_added = 0;
ca1d1d23 3040
b7826503 3041 CHECK_STRING (string);
ca1d1d23 3042
38182d90 3043 temp = alloca (SBYTES (string) * 2);
ca1d1d23
JB
3044
3045 /* Now copy the data into the new string, inserting escapes. */
3046
a7e979a4 3047 in = SSDATA (string);
d5db4077 3048 end = in + SBYTES (string);
177c0ea7 3049 out = temp;
ca1d1d23
JB
3050
3051 for (; in != end; in++)
3052 {
66bc6082 3053 if (*in == '['
ca1d1d23
JB
3054 || *in == '*' || *in == '.' || *in == '\\'
3055 || *in == '?' || *in == '+'
3056 || *in == '^' || *in == '$')
0c8533c6 3057 *out++ = '\\', backslashes_added++;
ca1d1d23
JB
3058 *out++ = *in;
3059 }
3060
3f8100f1 3061 return make_specified_string (temp,
d5db4077 3062 SCHARS (string) + backslashes_added,
3f8100f1
RS
3063 out - temp,
3064 STRING_MULTIBYTE (string));
ca1d1d23 3065}
177c0ea7 3066\f
dfcf069d 3067void
971de7fb 3068syms_of_search (void)
ca1d1d23
JB
3069{
3070 register int i;
3071
487282dc
KH
3072 for (i = 0; i < REGEXP_CACHE_SIZE; ++i)
3073 {
3074 searchbufs[i].buf.allocated = 100;
23f86fce 3075 searchbufs[i].buf.buffer = xmalloc (100);
487282dc
KH
3076 searchbufs[i].buf.fastmap = searchbufs[i].fastmap;
3077 searchbufs[i].regexp = Qnil;
ecdb561e 3078 searchbufs[i].whitespace_regexp = Qnil;
b69e3c18 3079 searchbufs[i].syntax_table = Qnil;
487282dc 3080 staticpro (&searchbufs[i].regexp);
aa77b5ce 3081 staticpro (&searchbufs[i].whitespace_regexp);
b69e3c18 3082 staticpro (&searchbufs[i].syntax_table);
487282dc
KH
3083 searchbufs[i].next = (i == REGEXP_CACHE_SIZE-1 ? 0 : &searchbufs[i+1]);
3084 }
3085 searchbuf_head = &searchbufs[0];
ca1d1d23 3086
cd3520a4
JB
3087 DEFSYM (Qsearch_failed, "search-failed");
3088 DEFSYM (Qinvalid_regexp, "invalid-regexp");
ca1d1d23
JB
3089
3090 Fput (Qsearch_failed, Qerror_conditions,
3438fe21 3091 listn (CONSTYPE_PURE, 2, Qsearch_failed, Qerror));
ca1d1d23 3092 Fput (Qsearch_failed, Qerror_message,
2a0213a6 3093 build_pure_c_string ("Search failed"));
ca1d1d23
JB
3094
3095 Fput (Qinvalid_regexp, Qerror_conditions,
3438fe21 3096 listn (CONSTYPE_PURE, 2, Qinvalid_regexp, Qerror));
ca1d1d23 3097 Fput (Qinvalid_regexp, Qerror_message,
2a0213a6 3098 build_pure_c_string ("Invalid regexp"));
ca1d1d23 3099
daa37602
JB
3100 last_thing_searched = Qnil;
3101 staticpro (&last_thing_searched);
3102
0f6af254
DK
3103 saved_last_thing_searched = Qnil;
3104 staticpro (&saved_last_thing_searched);
3105
29208e82 3106 DEFVAR_LISP ("search-spaces-regexp", Vsearch_spaces_regexp,
e2811828 3107 doc: /* Regexp to substitute for bunches of spaces in regexp search.
f31a9a68
RS
3108Some commands use this for user-specified regexps.
3109Spaces that occur inside character classes or repetition operators
3110or other such regexp constructs are not replaced with this.
3111A value of nil (which is the normal value) means treat spaces literally. */);
41a33295 3112 Vsearch_spaces_regexp = Qnil;
f31a9a68 3113
29208e82 3114 DEFVAR_LISP ("inhibit-changing-match-data", Vinhibit_changing_match_data,
ef887810 3115 doc: /* Internal use only.
39d0bf74
RS
3116If non-nil, the primitive searching and matching functions
3117such as `looking-at', `string-match', `re-search-forward', etc.,
3118do not set the match data. The proper way to use this variable
3119is to bind it with `let' around a small expression. */);
ef887810
RS
3120 Vinhibit_changing_match_data = Qnil;
3121
ca1d1d23 3122 defsubr (&Slooking_at);
b819a390
RS
3123 defsubr (&Sposix_looking_at);
3124 defsubr (&Sstring_match);
3125 defsubr (&Sposix_string_match);
ca1d1d23
JB
3126 defsubr (&Ssearch_forward);
3127 defsubr (&Ssearch_backward);
ca1d1d23
JB
3128 defsubr (&Sre_search_forward);
3129 defsubr (&Sre_search_backward);
b819a390
RS
3130 defsubr (&Sposix_search_forward);
3131 defsubr (&Sposix_search_backward);
ca1d1d23
JB
3132 defsubr (&Sreplace_match);
3133 defsubr (&Smatch_beginning);
3134 defsubr (&Smatch_end);
3135 defsubr (&Smatch_data);
3f1c005b 3136 defsubr (&Sset_match_data);
ca1d1d23
JB
3137 defsubr (&Sregexp_quote);
3138}