use xmalloc_atomic for many pointerless objects
[bpt/emacs.git] / src / search.c
CommitLineData
ca1d1d23 1/* String search routines for GNU Emacs.
4fb9a543 2
ba318903 3Copyright (C) 1985-1987, 1993-1994, 1997-1999, 2001-2014 Free Software
ab422c4d 4Foundation, Inc.
ca1d1d23
JB
5
6This file is part of GNU Emacs.
7
9ec0b715 8GNU Emacs is free software: you can redistribute it and/or modify
ca1d1d23 9it under the terms of the GNU General Public License as published by
9ec0b715
GM
10the Free Software Foundation, either version 3 of the License, or
11(at your option) any later version.
ca1d1d23
JB
12
13GNU Emacs is distributed in the hope that it will be useful,
14but WITHOUT ANY WARRANTY; without even the implied warranty of
15MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16GNU General Public License for more details.
17
18You should have received a copy of the GNU General Public License
9ec0b715 19along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>. */
ca1d1d23
JB
20
21
18160b98 22#include <config.h>
0328b6de 23
ca1d1d23 24#include "lisp.h"
5679531d 25#include "category.h"
76eb0881 26#include "character.h"
e5560ff7 27#include "buffer.h"
45b683a1 28#include "syntax.h"
89ad6492 29#include "charset.h"
9169c321 30#include "region-cache.h"
ca1d1d23 31#include "commands.h"
9ac0d9e0 32#include "blockinput.h"
bf1760bb 33#include "intervals.h"
4746118a 34
ca1d1d23
JB
35#include <sys/types.h>
36#include "regex.h"
37
1d288aef 38#define REGEXP_CACHE_SIZE 20
ca1d1d23 39
487282dc
KH
40/* If the regexp is non-nil, then the buffer contains the compiled form
41 of that regexp, suitable for searching. */
1d288aef
RS
42struct regexp_cache
43{
487282dc 44 struct regexp_cache *next;
ecdb561e 45 Lisp_Object regexp, whitespace_regexp;
b69e3c18 46 /* Syntax table for which the regexp applies. We need this because
58e95211
SM
47 of character classes. If this is t, then the compiled pattern is valid
48 for any syntax-table. */
b69e3c18 49 Lisp_Object syntax_table;
487282dc
KH
50 struct re_pattern_buffer buf;
51 char fastmap[0400];
457882c2
PE
52 /* True means regexp was compiled to do full POSIX backtracking. */
53 bool posix;
487282dc 54};
ca1d1d23 55
487282dc 56/* The instances of that struct. */
989b29ad 57static struct regexp_cache searchbufs[REGEXP_CACHE_SIZE];
ca1d1d23 58
487282dc 59/* The head of the linked list; points to the most recently used buffer. */
989b29ad 60static struct regexp_cache *searchbuf_head;
ca1d1d23 61
ca1d1d23 62
4746118a
JB
63/* Every call to re_match, etc., must pass &search_regs as the regs
64 argument unless you can show it is unnecessary (i.e., if re_match
65 is certainly going to be called again before region-around-match
66 can be called).
67
68 Since the registers are now dynamically allocated, we need to make
69 sure not to refer to the Nth register before checking that it has
1113d9db
JB
70 been allocated by checking search_regs.num_regs.
71
72 The regex code keeps track of whether it has allocated the search
487282dc
KH
73 buffer using bits in the re_pattern_buffer. This means that whenever
74 you compile a new pattern, it completely forgets whether it has
1113d9db
JB
75 allocated any registers, and will allocate new registers the next
76 time you call a searching or matching function. Therefore, we need
77 to call re_set_registers after compiling a new pattern or after
78 setting the match registers, so that the regex functions will be
79 able to free or re-allocate it properly. */
ca1d1d23
JB
80static struct re_registers search_regs;
81
daa37602
JB
82/* The buffer in which the last search was performed, or
83 Qt if the last search was done in a string;
84 Qnil if no searching has been done yet. */
85static Lisp_Object last_thing_searched;
ca1d1d23 86
02b16839 87/* Error condition signaled when regexp compile_pattern fails. */
955cbe7b 88static Lisp_Object Qinvalid_regexp;
ca1d1d23 89
02b16839 90/* Error condition used for failing searches. */
955cbe7b 91static Lisp_Object Qsearch_failed;
06f77a8a 92
d311d28c 93static void set_search_regs (ptrdiff_t, ptrdiff_t);
f57e2426 94static void save_search_regs (void);
d311d28c
PE
95static EMACS_INT simple_search (EMACS_INT, unsigned char *, ptrdiff_t,
96 ptrdiff_t, Lisp_Object, ptrdiff_t, ptrdiff_t,
97 ptrdiff_t, ptrdiff_t);
98static EMACS_INT boyer_moore (EMACS_INT, unsigned char *, ptrdiff_t,
99 Lisp_Object, Lisp_Object, ptrdiff_t,
100 ptrdiff_t, int);
101static EMACS_INT search_buffer (Lisp_Object, ptrdiff_t, ptrdiff_t,
102 ptrdiff_t, ptrdiff_t, EMACS_INT, int,
457882c2 103 Lisp_Object, Lisp_Object, bool);
b819a390 104
845ca893 105static _Noreturn void
971de7fb 106matcher_overflow (void)
ca1d1d23
JB
107{
108 error ("Stack overflow in regexp matcher");
109}
110
b819a390
RS
111/* Compile a regexp and signal a Lisp error if anything goes wrong.
112 PATTERN is the pattern to compile.
113 CP is the place to put the result.
facdc750 114 TRANSLATE is a translation table for ignoring case, or nil for none.
457882c2
PE
115 POSIX is true if we want full backtracking (POSIX style) for this pattern.
116 False means backtrack only enough to get a valid match.
bbc73b48
KH
117
118 The behavior also depends on Vsearch_spaces_regexp. */
ca1d1d23 119
487282dc 120static void
457882c2
PE
121compile_pattern_1 (struct regexp_cache *cp, Lisp_Object pattern,
122 Lisp_Object translate, bool posix)
ca1d1d23 123{
d451e4db 124 char *val;
b819a390 125 reg_syntax_t old;
ca1d1d23 126
487282dc 127 cp->regexp = Qnil;
59fab369 128 cp->buf.translate = (! NILP (translate) ? translate : make_number (0));
b819a390 129 cp->posix = posix;
93daa011 130 cp->buf.multibyte = STRING_MULTIBYTE (pattern);
89ad6492 131 cp->buf.charset_unibyte = charset_unibyte;
d8c85250
CY
132 if (STRINGP (Vsearch_spaces_regexp))
133 cp->whitespace_regexp = Vsearch_spaces_regexp;
134 else
135 cp->whitespace_regexp = Qnil;
136
e7b2dd2e
RS
137 /* rms: I think BLOCK_INPUT is not needed here any more,
138 because regex.c defines malloc to call xmalloc.
139 Using BLOCK_INPUT here means the debugger won't run if an error occurs.
140 So let's turn it off. */
141 /* BLOCK_INPUT; */
fb4a568d 142 old = re_set_syntax (RE_SYNTAX_EMACS
b819a390 143 | (posix ? 0 : RE_NO_POSIX_BACKTRACKING));
d8c85250
CY
144
145 if (STRINGP (Vsearch_spaces_regexp))
42a5b22f 146 re_set_whitespace_regexp (SSDATA (Vsearch_spaces_regexp));
d8c85250
CY
147 else
148 re_set_whitespace_regexp (NULL);
bbc73b48 149
51b59d79 150 val = (char *) re_compile_pattern (SSDATA (pattern),
8f924df7 151 SBYTES (pattern), &cp->buf);
bbc73b48 152
58e95211
SM
153 /* If the compiled pattern hard codes some of the contents of the
154 syntax-table, it can only be reused with *this* syntax table. */
4b4deea2 155 cp->syntax_table = cp->buf.used_syntax ? BVAR (current_buffer, syntax_table) : Qt;
58e95211 156
bbc73b48
KH
157 re_set_whitespace_regexp (NULL);
158
b819a390 159 re_set_syntax (old);
4d7e6e51 160 /* unblock_input (); */
ca1d1d23 161 if (val)
06f77a8a 162 xsignal1 (Qinvalid_regexp, build_string (val));
1113d9db 163
487282dc 164 cp->regexp = Fcopy_sequence (pattern);
487282dc
KH
165}
166
6efc7887
RS
167/* Shrink each compiled regexp buffer in the cache
168 to the size actually used right now.
169 This is called from garbage collection. */
170
171void
971de7fb 172shrink_regexp_cache (void)
6efc7887 173{
a968f437 174 struct regexp_cache *cp;
6efc7887
RS
175
176 for (cp = searchbuf_head; cp != 0; cp = cp->next)
177 {
178 cp->buf.allocated = cp->buf.used;
38182d90 179 cp->buf.buffer = xrealloc (cp->buf.buffer, cp->buf.used);
6efc7887
RS
180 }
181}
182
54dd3310
SM
183/* Clear the regexp cache w.r.t. a particular syntax table,
184 because it was changed.
e5b94d44
CY
185 There is no danger of memory leak here because re_compile_pattern
186 automagically manages the memory in each re_pattern_buffer struct,
187 based on its `allocated' and `buffer' values. */
188void
971de7fb 189clear_regexp_cache (void)
e5b94d44
CY
190{
191 int i;
192
193 for (i = 0; i < REGEXP_CACHE_SIZE; ++i)
920fd1fc
EZ
194 /* It's tempting to compare with the syntax-table we've actually changed,
195 but it's not sufficient because char-table inheritance means that
54dd3310
SM
196 modifying one syntax-table can change others at the same time. */
197 if (!EQ (searchbufs[i].syntax_table, Qt))
198 searchbufs[i].regexp = Qnil;
e5b94d44
CY
199}
200
487282dc 201/* Compile a regexp if necessary, but first check to see if there's one in
b819a390
RS
202 the cache.
203 PATTERN is the pattern to compile.
facdc750 204 TRANSLATE is a translation table for ignoring case, or nil for none.
b819a390
RS
205 REGP is the structure that says where to store the "register"
206 values that will result from matching this pattern.
207 If it is 0, we should compile the pattern not to record any
208 subexpression bounds.
457882c2
PE
209 POSIX is true if we want full backtracking (POSIX style) for this pattern.
210 False means backtrack only enough to get a valid match. */
487282dc
KH
211
212struct re_pattern_buffer *
0063fdb1 213compile_pattern (Lisp_Object pattern, struct re_registers *regp,
457882c2 214 Lisp_Object translate, bool posix, bool multibyte)
487282dc
KH
215{
216 struct regexp_cache *cp, **cpp;
217
218 for (cpp = &searchbuf_head; ; cpp = &cp->next)
219 {
220 cp = *cpp;
f1b9c7c1
KR
221 /* Entries are initialized to nil, and may be set to nil by
222 compile_pattern_1 if the pattern isn't valid. Don't apply
49a5f770
KR
223 string accessors in those cases. However, compile_pattern_1
224 is only applied to the cache entry we pick here to reuse. So
225 nil should never appear before a non-nil entry. */
7c752c80 226 if (NILP (cp->regexp))
f1b9c7c1 227 goto compile_it;
d5db4077 228 if (SCHARS (cp->regexp) == SCHARS (pattern)
cf69b13e 229 && STRING_MULTIBYTE (cp->regexp) == STRING_MULTIBYTE (pattern)
1d288aef 230 && !NILP (Fstring_equal (cp->regexp, pattern))
59fab369 231 && EQ (cp->buf.translate, (! NILP (translate) ? translate : make_number (0)))
5679531d 232 && cp->posix == posix
58e95211 233 && (EQ (cp->syntax_table, Qt)
4b4deea2 234 || EQ (cp->syntax_table, BVAR (current_buffer, syntax_table)))
89ad6492
KH
235 && !NILP (Fequal (cp->whitespace_regexp, Vsearch_spaces_regexp))
236 && cp->buf.charset_unibyte == charset_unibyte)
487282dc
KH
237 break;
238
f1b9c7c1
KR
239 /* If we're at the end of the cache, compile into the nil cell
240 we found, or the last (least recently used) cell with a
241 string value. */
487282dc
KH
242 if (cp->next == 0)
243 {
f1b9c7c1 244 compile_it:
cfe0661d 245 compile_pattern_1 (cp, pattern, translate, posix);
487282dc
KH
246 break;
247 }
248 }
249
250 /* When we get here, cp (aka *cpp) contains the compiled pattern,
251 either because we found it in the cache or because we just compiled it.
252 Move it to the front of the queue to mark it as most recently used. */
253 *cpp = cp->next;
254 cp->next = searchbuf_head;
255 searchbuf_head = cp;
1113d9db 256
6639708c
RS
257 /* Advise the searching functions about the space we have allocated
258 for register data. */
259 if (regp)
260 re_set_registers (&cp->buf, regp, regp->num_regs, regp->start, regp->end);
261
78edd3b7 262 /* The compiled pattern can be used both for multibyte and unibyte
89ad6492
KH
263 target. But, we have to tell which the pattern is used for. */
264 cp->buf.target_multibyte = multibyte;
265
487282dc 266 return &cp->buf;
ca1d1d23
JB
267}
268
ca1d1d23 269\f
b819a390 270static Lisp_Object
457882c2 271looking_at_1 (Lisp_Object string, bool posix)
ca1d1d23
JB
272{
273 Lisp_Object val;
274 unsigned char *p1, *p2;
d311d28c
PE
275 ptrdiff_t s1, s2;
276 register ptrdiff_t i;
487282dc 277 struct re_pattern_buffer *bufp;
ca1d1d23 278
7074fde6
FP
279 if (running_asynch_code)
280 save_search_regs ();
281
910c747a 282 /* This is so set_image_of_range_1 in regex.c can find the EQV table. */
34dabdb7 283 set_char_table_extras (BVAR (current_buffer, case_canon_table), 2,
a098c930 284 BVAR (current_buffer, case_eqv_table));
910c747a 285
b7826503 286 CHECK_STRING (string);
ef887810
RS
287 bufp = compile_pattern (string,
288 (NILP (Vinhibit_changing_match_data)
289 ? &search_regs : NULL),
4b4deea2
TT
290 (!NILP (BVAR (current_buffer, case_fold_search))
291 ? BVAR (current_buffer, case_canon_table) : Qnil),
0c8533c6 292 posix,
4b4deea2 293 !NILP (BVAR (current_buffer, enable_multibyte_characters)));
ca1d1d23
JB
294
295 immediate_quit = 1;
296 QUIT; /* Do a pending quit right away, to avoid paradoxical behavior */
297
298 /* Get pointers and sizes of the two strings
299 that make up the visible portion of the buffer. */
300
301 p1 = BEGV_ADDR;
fa8ed3e0 302 s1 = GPT_BYTE - BEGV_BYTE;
ca1d1d23 303 p2 = GAP_END_ADDR;
fa8ed3e0 304 s2 = ZV_BYTE - GPT_BYTE;
ca1d1d23
JB
305 if (s1 < 0)
306 {
307 p2 = p1;
fa8ed3e0 308 s2 = ZV_BYTE - BEGV_BYTE;
ca1d1d23
JB
309 s1 = 0;
310 }
311 if (s2 < 0)
312 {
fa8ed3e0 313 s1 = ZV_BYTE - BEGV_BYTE;
ca1d1d23
JB
314 s2 = 0;
315 }
8bb43c28
RS
316
317 re_match_object = Qnil;
177c0ea7 318
487282dc 319 i = re_match_2 (bufp, (char *) p1, s1, (char *) p2, s2,
ef887810
RS
320 PT_BYTE - BEGV_BYTE,
321 (NILP (Vinhibit_changing_match_data)
322 ? &search_regs : NULL),
fa8ed3e0 323 ZV_BYTE - BEGV_BYTE);
de182d70 324 immediate_quit = 0;
177c0ea7 325
ca1d1d23
JB
326 if (i == -2)
327 matcher_overflow ();
328
908589fd 329 val = (i >= 0 ? Qt : Qnil);
ef887810 330 if (NILP (Vinhibit_changing_match_data) && i >= 0)
fc30d803 331 {
fa8ed3e0
RS
332 for (i = 0; i < search_regs.num_regs; i++)
333 if (search_regs.start[i] >= 0)
334 {
335 search_regs.start[i]
336 = BYTE_TO_CHAR (search_regs.start[i] + BEGV_BYTE);
fc30d803
SM
337 search_regs.end[i]
338 = BYTE_TO_CHAR (search_regs.end[i] + BEGV_BYTE);
339 }
340 /* Set last_thing_searched only when match data is changed. */
ef887810 341 XSETBUFFER (last_thing_searched, current_buffer);
fc30d803 342 }
ef887810 343
ca1d1d23
JB
344 return val;
345}
346
b819a390 347DEFUN ("looking-at", Flooking_at, Slooking_at, 1, 1, 0,
8c1a1077
PJ
348 doc: /* Return t if text after point matches regular expression REGEXP.
349This function modifies the match data that `match-beginning',
350`match-end' and `match-data' access; save and restore the match
351data if you want to preserve them. */)
5842a27b 352 (Lisp_Object regexp)
b819a390 353{
94f94972 354 return looking_at_1 (regexp, 0);
b819a390
RS
355}
356
357DEFUN ("posix-looking-at", Fposix_looking_at, Sposix_looking_at, 1, 1, 0,
8c1a1077
PJ
358 doc: /* Return t if text after point matches regular expression REGEXP.
359Find the longest match, in accord with Posix regular expression rules.
360This function modifies the match data that `match-beginning',
361`match-end' and `match-data' access; save and restore the match
362data if you want to preserve them. */)
5842a27b 363 (Lisp_Object regexp)
b819a390 364{
94f94972 365 return looking_at_1 (regexp, 1);
b819a390
RS
366}
367\f
368static Lisp_Object
457882c2
PE
369string_match_1 (Lisp_Object regexp, Lisp_Object string, Lisp_Object start,
370 bool posix)
ca1d1d23 371{
d311d28c 372 ptrdiff_t val;
487282dc 373 struct re_pattern_buffer *bufp;
d311d28c
PE
374 EMACS_INT pos;
375 ptrdiff_t pos_byte, i;
ca1d1d23 376
7074fde6
FP
377 if (running_asynch_code)
378 save_search_regs ();
379
b7826503
PJ
380 CHECK_STRING (regexp);
381 CHECK_STRING (string);
ca1d1d23
JB
382
383 if (NILP (start))
0c8533c6 384 pos = 0, pos_byte = 0;
ca1d1d23
JB
385 else
386 {
d311d28c 387 ptrdiff_t len = SCHARS (string);
ca1d1d23 388
b7826503 389 CHECK_NUMBER (start);
0c8533c6
RS
390 pos = XINT (start);
391 if (pos < 0 && -pos <= len)
392 pos = len + pos;
393 else if (0 > pos || pos > len)
ca1d1d23 394 args_out_of_range (string, start);
0c8533c6 395 pos_byte = string_char_to_byte (string, pos);
ca1d1d23
JB
396 }
397
910c747a 398 /* This is so set_image_of_range_1 in regex.c can find the EQV table. */
34dabdb7 399 set_char_table_extras (BVAR (current_buffer, case_canon_table), 2,
a098c930 400 BVAR (current_buffer, case_eqv_table));
910c747a 401
ef887810
RS
402 bufp = compile_pattern (regexp,
403 (NILP (Vinhibit_changing_match_data)
404 ? &search_regs : NULL),
4b4deea2
TT
405 (!NILP (BVAR (current_buffer, case_fold_search))
406 ? BVAR (current_buffer, case_canon_table) : Qnil),
0c8533c6
RS
407 posix,
408 STRING_MULTIBYTE (string));
ca1d1d23 409 immediate_quit = 1;
8bb43c28 410 re_match_object = string;
177c0ea7 411
51b59d79 412 val = re_search (bufp, SSDATA (string),
d5db4077
KR
413 SBYTES (string), pos_byte,
414 SBYTES (string) - pos_byte,
ef887810
RS
415 (NILP (Vinhibit_changing_match_data)
416 ? &search_regs : NULL));
ca1d1d23 417 immediate_quit = 0;
ef887810
RS
418
419 /* Set last_thing_searched only when match data is changed. */
420 if (NILP (Vinhibit_changing_match_data))
421 last_thing_searched = Qt;
422
ca1d1d23
JB
423 if (val == -2)
424 matcher_overflow ();
425 if (val < 0) return Qnil;
0c8533c6 426
ef887810
RS
427 if (NILP (Vinhibit_changing_match_data))
428 for (i = 0; i < search_regs.num_regs; i++)
429 if (search_regs.start[i] >= 0)
430 {
431 search_regs.start[i]
432 = string_byte_to_char (string, search_regs.start[i]);
433 search_regs.end[i]
434 = string_byte_to_char (string, search_regs.end[i]);
435 }
0c8533c6
RS
436
437 return make_number (string_byte_to_char (string, val));
ca1d1d23 438}
e59a8453 439
a7ca3326 440DEFUN ("string-match", Fstring_match, Sstring_match, 2, 3, 0,
8c1a1077 441 doc: /* Return index of start of first match for REGEXP in STRING, or nil.
b85acc4b 442Matching ignores case if `case-fold-search' is non-nil.
8c1a1077
PJ
443If third arg START is non-nil, start search at that index in STRING.
444For index of first char beyond the match, do (match-end 0).
445`match-end' and `match-beginning' also give indices of substrings
2bd2f32d
RS
446matched by parenthesis constructs in the pattern.
447
448You can use the function `match-string' to extract the substrings
449matched by the parenthesis constructions in REGEXP. */)
5842a27b 450 (Lisp_Object regexp, Lisp_Object string, Lisp_Object start)
b819a390
RS
451{
452 return string_match_1 (regexp, string, start, 0);
453}
454
455DEFUN ("posix-string-match", Fposix_string_match, Sposix_string_match, 2, 3, 0,
8c1a1077
PJ
456 doc: /* Return index of start of first match for REGEXP in STRING, or nil.
457Find the longest match, in accord with Posix regular expression rules.
458Case is ignored if `case-fold-search' is non-nil in the current buffer.
459If third arg START is non-nil, start search at that index in STRING.
460For index of first char beyond the match, do (match-end 0).
461`match-end' and `match-beginning' also give indices of substrings
462matched by parenthesis constructs in the pattern. */)
5842a27b 463 (Lisp_Object regexp, Lisp_Object string, Lisp_Object start)
b819a390
RS
464{
465 return string_match_1 (regexp, string, start, 1);
466}
467
e59a8453
RS
468/* Match REGEXP against STRING, searching all of STRING,
469 and return the index of the match, or negative on failure.
470 This does not clobber the match data. */
471
d311d28c 472ptrdiff_t
971de7fb 473fast_string_match (Lisp_Object regexp, Lisp_Object string)
e59a8453 474{
d311d28c 475 ptrdiff_t val;
487282dc 476 struct re_pattern_buffer *bufp;
e59a8453 477
facdc750
RS
478 bufp = compile_pattern (regexp, 0, Qnil,
479 0, STRING_MULTIBYTE (string));
e59a8453 480 immediate_quit = 1;
8bb43c28 481 re_match_object = string;
177c0ea7 482
51b59d79 483 val = re_search (bufp, SSDATA (string),
d5db4077
KR
484 SBYTES (string), 0,
485 SBYTES (string), 0);
e59a8453
RS
486 immediate_quit = 0;
487 return val;
488}
5679531d
KH
489
490/* Match REGEXP against STRING, searching all of STRING ignoring case,
491 and return the index of the match, or negative on failure.
0c8533c6
RS
492 This does not clobber the match data.
493 We assume that STRING contains single-byte characters. */
5679531d 494
d311d28c 495ptrdiff_t
d923b542
DA
496fast_c_string_match_ignore_case (Lisp_Object regexp,
497 const char *string, ptrdiff_t len)
5679531d 498{
d311d28c 499 ptrdiff_t val;
5679531d 500 struct re_pattern_buffer *bufp;
5679531d 501
0c8533c6 502 regexp = string_make_unibyte (regexp);
b4577c63 503 re_match_object = Qt;
5679531d 504 bufp = compile_pattern (regexp, 0,
0190922f 505 Vascii_canon_table, 0,
f8bd51c4 506 0);
5679531d
KH
507 immediate_quit = 1;
508 val = re_search (bufp, string, len, 0, len, 0);
509 immediate_quit = 0;
510 return val;
511}
be5f4dfb
KH
512
513/* Like fast_string_match but ignore case. */
514
d311d28c 515ptrdiff_t
971de7fb 516fast_string_match_ignore_case (Lisp_Object regexp, Lisp_Object string)
be5f4dfb 517{
d311d28c 518 ptrdiff_t val;
be5f4dfb
KH
519 struct re_pattern_buffer *bufp;
520
0190922f 521 bufp = compile_pattern (regexp, 0, Vascii_canon_table,
be5f4dfb
KH
522 0, STRING_MULTIBYTE (string));
523 immediate_quit = 1;
524 re_match_object = string;
525
51b59d79 526 val = re_search (bufp, SSDATA (string),
be5f4dfb
KH
527 SBYTES (string), 0,
528 SBYTES (string), 0);
529 immediate_quit = 0;
530 return val;
531}
ca1d1d23 532\f
a80ce213 533/* Match REGEXP against the characters after POS to LIMIT, and return
8b13507a
KH
534 the number of matched characters. If STRING is non-nil, match
535 against the characters in it. In that case, POS and LIMIT are
536 indices into the string. This function doesn't modify the match
537 data. */
538
d311d28c 539ptrdiff_t
0063fdb1
DA
540fast_looking_at (Lisp_Object regexp, ptrdiff_t pos, ptrdiff_t pos_byte,
541 ptrdiff_t limit, ptrdiff_t limit_byte, Lisp_Object string)
8b13507a 542{
0063fdb1 543 bool multibyte;
8b13507a
KH
544 struct re_pattern_buffer *buf;
545 unsigned char *p1, *p2;
d311d28c
PE
546 ptrdiff_t s1, s2;
547 ptrdiff_t len;
78edd3b7 548
8b13507a
KH
549 if (STRINGP (string))
550 {
551 if (pos_byte < 0)
552 pos_byte = string_char_to_byte (string, pos);
553 if (limit_byte < 0)
554 limit_byte = string_char_to_byte (string, limit);
555 p1 = NULL;
556 s1 = 0;
557 p2 = SDATA (string);
558 s2 = SBYTES (string);
559 re_match_object = string;
560 multibyte = STRING_MULTIBYTE (string);
561 }
562 else
563 {
564 if (pos_byte < 0)
565 pos_byte = CHAR_TO_BYTE (pos);
566 if (limit_byte < 0)
567 limit_byte = CHAR_TO_BYTE (limit);
568 pos_byte -= BEGV_BYTE;
569 limit_byte -= BEGV_BYTE;
570 p1 = BEGV_ADDR;
571 s1 = GPT_BYTE - BEGV_BYTE;
572 p2 = GAP_END_ADDR;
573 s2 = ZV_BYTE - GPT_BYTE;
574 if (s1 < 0)
575 {
576 p2 = p1;
577 s2 = ZV_BYTE - BEGV_BYTE;
578 s1 = 0;
579 }
580 if (s2 < 0)
581 {
582 s1 = ZV_BYTE - BEGV_BYTE;
583 s2 = 0;
584 }
585 re_match_object = Qnil;
4b4deea2 586 multibyte = ! NILP (BVAR (current_buffer, enable_multibyte_characters));
8b13507a
KH
587 }
588
589 buf = compile_pattern (regexp, 0, Qnil, 0, multibyte);
590 immediate_quit = 1;
591 len = re_match_2 (buf, (char *) p1, s1, (char *) p2, s2,
592 pos_byte, NULL, limit_byte);
593 immediate_quit = 0;
594
595 return len;
596}
597
598\f
9169c321
JB
599/* The newline cache: remembering which sections of text have no newlines. */
600
e30b79c1 601/* If the user has requested the long scans caching, make sure it's on.
9169c321
JB
602 Otherwise, make sure it's off.
603 This is our cheezy way of associating an action with the change of
604 state of a buffer-local variable. */
c10e9ece 605static struct region_cache *
971de7fb 606newline_cache_on_off (struct buffer *buf)
9169c321 607{
c10e9ece
EZ
608 struct buffer *base_buf = buf;
609 bool indirect_p = false;
610
611 if (buf->base_buffer)
612 {
613 base_buf = buf->base_buffer;
614 indirect_p = true;
615 }
616
617 /* Don't turn on or off the cache in the base buffer, if the value
618 of cache-long-scans of the base buffer is inconsistent with that.
619 This is because doing so will just make the cache pure overhead,
620 since if we turn it on via indirect buffer, it will be
621 immediately turned off by its base buffer. */
e30b79c1 622 if (NILP (BVAR (buf, cache_long_scans)))
9169c321 623 {
c10e9ece
EZ
624 if (!indirect_p
625 || NILP (BVAR (base_buf, cache_long_scans)))
626 {
627 /* It should be off. */
628 if (base_buf->newline_cache)
629 {
630 free_region_cache (base_buf->newline_cache);
631 base_buf->newline_cache = 0;
632 }
633 }
634 return NULL;
9169c321
JB
635 }
636 else
637 {
c10e9ece
EZ
638 if (!indirect_p
639 || !NILP (BVAR (base_buf, cache_long_scans)))
640 {
641 /* It should be on. */
642 if (base_buf->newline_cache == 0)
643 base_buf->newline_cache = new_region_cache ();
644 }
645 return base_buf->newline_cache;
9169c321
JB
646 }
647}
648
649\f
b5426561 650/* Search for COUNT newlines between START/START_BYTE and END/END_BYTE.
9169c321
JB
651
652 If COUNT is positive, search forwards; END must be >= START.
653 If COUNT is negative, search backwards for the -COUNTth instance;
654 END must be <= START.
655 If COUNT is zero, do anything you please; run rogue, for all I care.
656
657 If END is zero, use BEGV or ZV instead, as appropriate for the
658 direction indicated by COUNT.
ffd56f97
JB
659
660 If we find COUNT instances, set *SHORTAGE to zero, and return the
a9f2a45f 661 position past the COUNTth match. Note that for reverse motion
5bfe95c9 662 this is not the same as the usual convention for Emacs motion commands.
ffd56f97 663
9169c321 664 If we don't find COUNT instances before reaching END, set *SHORTAGE
a84b7c53 665 to the number of newlines left unfound, and return END.
ffd56f97 666
2a14a4f1
DA
667 If BYTEPOS is not NULL, set *BYTEPOS to the byte position corresponding
668 to the returned character position.
669
9fa1de30 670 If ALLOW_QUIT, set immediate_quit. That's good to do
087a5f81
RS
671 except when inside redisplay. */
672
d311d28c 673ptrdiff_t
b5426561
DA
674find_newline (ptrdiff_t start, ptrdiff_t start_byte, ptrdiff_t end,
675 ptrdiff_t end_byte, ptrdiff_t count, ptrdiff_t *shortage,
676 ptrdiff_t *bytepos, bool allow_quit)
ca1d1d23 677{
9169c321 678 struct region_cache *newline_cache;
177c0ea7 679 int direction;
c10e9ece 680 struct buffer *cache_buffer;
ffd56f97 681
9169c321
JB
682 if (count > 0)
683 {
684 direction = 1;
8ca30920
DA
685 if (!end)
686 end = ZV, end_byte = ZV_BYTE;
9169c321
JB
687 }
688 else
689 {
690 direction = -1;
a84b7c53 691 if (!end)
8ca30920 692 end = BEGV, end_byte = BEGV_BYTE;
9169c321 693 }
8ca30920
DA
694 if (end_byte == -1)
695 end_byte = CHAR_TO_BYTE (end);
ffd56f97 696
c10e9ece
EZ
697 newline_cache = newline_cache_on_off (current_buffer);
698 if (current_buffer->base_buffer)
699 cache_buffer = current_buffer->base_buffer;
700 else
701 cache_buffer = current_buffer;
ca1d1d23
JB
702
703 if (shortage != 0)
704 *shortage = 0;
705
087a5f81 706 immediate_quit = allow_quit;
ca1d1d23 707
ffd56f97 708 if (count > 0)
9169c321 709 while (start != end)
ca1d1d23 710 {
9169c321
JB
711 /* Our innermost scanning loop is very simple; it doesn't know
712 about gaps, buffer ends, or the newline cache. ceiling is
713 the position of the last character before the next such
714 obstacle --- the last character the dumb search loop should
715 examine. */
2a14a4f1 716 ptrdiff_t tem, ceiling_byte = end_byte - 1;
9169c321 717
2a3d9a06
EZ
718 /* If we're using the newline cache, consult it to see whether
719 we can avoid some scanning. */
a84b7c53 720 if (newline_cache)
9169c321 721 {
0065d054 722 ptrdiff_t next_change;
2a3d9a06
EZ
723 int result = 1;
724
9169c321 725 immediate_quit = 0;
2a3d9a06
EZ
726 while (start < end && result)
727 {
728 ptrdiff_t lim1;
729
730 result = region_cache_forward (cache_buffer, newline_cache,
731 start, &next_change);
732 if (result)
733 {
734 start = next_change;
735 lim1 = next_change = end;
736 }
737 else
738 lim1 = min (next_change, end);
739
740 /* The cache returned zero for this region; see if
741 this is because the region is known and includes
742 only newlines. While at that, count any newlines
743 we bump into, and exit if we found enough off them. */
744 start_byte = CHAR_TO_BYTE (start);
745 while (start < lim1
746 && FETCH_BYTE (start_byte) == '\n')
747 {
748 start_byte++;
749 start++;
750 if (--count == 0)
751 {
752 if (bytepos)
753 *bytepos = start_byte;
754 return start;
755 }
756 }
757 /* If we found a non-newline character before hitting
758 position where the cache will again return non-zero
759 (i.e. no newlines beyond that position), it means
760 this region is not yet known to the cache, and we
761 must resort to the "dumb loop" method. */
762 if (start < next_change && !result)
763 break;
764 result = 1;
765 }
766 if (start >= end)
767 {
768 start = end;
769 start_byte = end_byte;
770 break;
771 }
cbe0db0d 772 immediate_quit = allow_quit;
9169c321 773
fa8ed3e0
RS
774 /* START should never be after END. */
775 if (start_byte > ceiling_byte)
776 start_byte = ceiling_byte;
9169c321
JB
777
778 /* Now the text after start is an unknown region, and
779 next_change is the position of the next known region. */
c8b9f1bc 780 ceiling_byte = min (CHAR_TO_BYTE (next_change) - 1, ceiling_byte);
9169c321 781 }
b5426561 782 else if (start_byte == -1)
c8b9f1bc 783 start_byte = CHAR_TO_BYTE (start);
9169c321
JB
784
785 /* The dumb loop can only scan text stored in contiguous
786 bytes. BUFFER_CEILING_OF returns the last character
787 position that is contiguous, so the ceiling is the
788 position after that. */
67ce527d
KH
789 tem = BUFFER_CEILING_OF (start_byte);
790 ceiling_byte = min (tem, ceiling_byte);
9169c321
JB
791
792 {
177c0ea7 793 /* The termination address of the dumb loop. */
5389db9e
PE
794 unsigned char *lim_addr = BYTE_POS_ADDR (ceiling_byte) + 1;
795 ptrdiff_t lim_byte = ceiling_byte + 1;
9169c321 796
5389db9e
PE
797 /* Nonpositive offsets (relative to LIM_ADDR and LIM_BYTE)
798 of the base, the cursor, and the next line. */
799 ptrdiff_t base = start_byte - lim_byte;
800 ptrdiff_t cursor, next;
801
802 for (cursor = base; cursor < 0; cursor = next)
803 {
9169c321 804 /* The dumb loop. */
5389db9e
PE
805 unsigned char *nl = memchr (lim_addr + cursor, '\n', - cursor);
806 next = nl ? nl - lim_addr : 0;
9169c321 807
2a3d9a06
EZ
808 /* If we're using the newline cache, cache the fact that
809 the region we just traversed is free of newlines. */
810 if (newline_cache && cursor != next)
a84b7c53 811 {
c10e9ece 812 know_region_cache (cache_buffer, newline_cache,
5389db9e
PE
813 BYTE_TO_CHAR (lim_byte + cursor),
814 BYTE_TO_CHAR (lim_byte + next));
815 /* know_region_cache can relocate buffer text. */
816 lim_addr = BYTE_POS_ADDR (ceiling_byte) + 1;
a84b7c53
PE
817 }
818
819 if (! nl)
820 break;
5389db9e 821 next++;
a84b7c53
PE
822
823 if (--count == 0)
824 {
825 immediate_quit = 0;
2a14a4f1 826 if (bytepos)
5389db9e
PE
827 *bytepos = lim_byte + next;
828 return BYTE_TO_CHAR (lim_byte + next);
85f37d10 829 }
9169c321
JB
830 }
831
5389db9e 832 start_byte = lim_byte;
2a14a4f1 833 start = BYTE_TO_CHAR (start_byte);
9169c321 834 }
ca1d1d23
JB
835 }
836 else
9169c321
JB
837 while (start > end)
838 {
839 /* The last character to check before the next obstacle. */
2a14a4f1 840 ptrdiff_t tem, ceiling_byte = end_byte;
9169c321
JB
841
842 /* Consult the newline cache, if appropriate. */
a84b7c53 843 if (newline_cache)
9169c321 844 {
0065d054 845 ptrdiff_t next_change;
2a3d9a06
EZ
846 int result = 1;
847
9169c321 848 immediate_quit = 0;
2a3d9a06
EZ
849 while (start > end && result)
850 {
851 ptrdiff_t lim1;
852
853 result = region_cache_backward (cache_buffer, newline_cache,
854 start, &next_change);
855 if (result)
856 {
857 start = next_change;
858 lim1 = next_change = end;
859 }
860 else
861 lim1 = max (next_change, end);
862 start_byte = CHAR_TO_BYTE (start);
863 while (start > lim1
864 && FETCH_BYTE (start_byte - 1) == '\n')
865 {
866 if (++count == 0)
867 {
868 if (bytepos)
869 *bytepos = start_byte;
870 return start;
871 }
872 start_byte--;
873 start--;
874 }
875 if (start > next_change && !result)
876 break;
877 result = 1;
878 }
879 if (start <= end)
880 {
881 start = end;
882 start_byte = end_byte;
883 break;
884 }
cbe0db0d 885 immediate_quit = allow_quit;
9169c321
JB
886
887 /* Start should never be at or before end. */
fa8ed3e0
RS
888 if (start_byte <= ceiling_byte)
889 start_byte = ceiling_byte + 1;
9169c321
JB
890
891 /* Now the text before start is an unknown region, and
892 next_change is the position of the next known region. */
c8b9f1bc 893 ceiling_byte = max (CHAR_TO_BYTE (next_change), ceiling_byte);
9169c321 894 }
b5426561 895 else if (start_byte == -1)
c8b9f1bc 896 start_byte = CHAR_TO_BYTE (start);
9169c321
JB
897
898 /* Stop scanning before the gap. */
67ce527d
KH
899 tem = BUFFER_FLOOR_OF (start_byte - 1);
900 ceiling_byte = max (tem, ceiling_byte);
9169c321
JB
901
902 {
903 /* The termination address of the dumb loop. */
5389db9e 904 unsigned char *ceiling_addr = BYTE_POS_ADDR (ceiling_byte);
9169c321 905
5389db9e
PE
906 /* Offsets (relative to CEILING_ADDR and CEILING_BYTE) of
907 the base, the cursor, and the previous line. These
908 offsets are at least -1. */
909 ptrdiff_t base = start_byte - ceiling_byte;
910 ptrdiff_t cursor, prev;
911
912 for (cursor = base; 0 < cursor; cursor = prev)
9169c321 913 {
5389db9e
PE
914 unsigned char *nl = memrchr (ceiling_addr, '\n', cursor);
915 prev = nl ? nl - ceiling_addr : -1;
9169c321
JB
916
917 /* If we're looking for newlines, cache the fact that
5389db9e 918 this line's region is free of them. */
2a3d9a06 919 if (newline_cache && cursor != prev + 1)
a84b7c53 920 {
c10e9ece 921 know_region_cache (cache_buffer, newline_cache,
5389db9e
PE
922 BYTE_TO_CHAR (ceiling_byte + prev + 1),
923 BYTE_TO_CHAR (ceiling_byte + cursor));
924 /* know_region_cache can relocate buffer text. */
925 ceiling_addr = BYTE_POS_ADDR (ceiling_byte);
a84b7c53
PE
926 }
927
928 if (! nl)
929 break;
930
931 if (++count >= 0)
932 {
933 immediate_quit = 0;
2a14a4f1 934 if (bytepos)
5389db9e
PE
935 *bytepos = ceiling_byte + prev + 1;
936 return BYTE_TO_CHAR (ceiling_byte + prev + 1);
85f37d10 937 }
9169c321
JB
938 }
939
5389db9e 940 start_byte = ceiling_byte;
2a14a4f1 941 start = BYTE_TO_CHAR (start_byte);
9169c321
JB
942 }
943 }
944
ca1d1d23 945 immediate_quit = 0;
2a14a4f1 946 if (shortage)
ffd56f97 947 *shortage = count * direction;
2a14a4f1
DA
948 if (bytepos)
949 {
950 *bytepos = start_byte == -1 ? CHAR_TO_BYTE (start) : start_byte;
951 eassert (*bytepos == CHAR_TO_BYTE (start));
952 }
9169c321 953 return start;
ca1d1d23 954}
fa8ed3e0 955\f
a84b7c53 956/* Search for COUNT instances of a line boundary.
fa8ed3e0
RS
957 Start at START. If COUNT is negative, search backwards.
958
959 We report the resulting position by calling TEMP_SET_PT_BOTH.
960
961 If we find COUNT instances. we position after (always after,
962 even if scanning backwards) the COUNTth match, and return 0.
963
964 If we don't find COUNT instances before reaching the end of the
965 buffer (or the beginning, if scanning backwards), we return
966 the number of line boundaries left unfound, and position at
967 the limit we bumped up against.
968
9fa1de30 969 If ALLOW_QUIT, set immediate_quit. That's good to do
d5d57b92 970 except in special cases. */
ca1d1d23 971
d2b36813 972ptrdiff_t
d311d28c
PE
973scan_newline (ptrdiff_t start, ptrdiff_t start_byte,
974 ptrdiff_t limit, ptrdiff_t limit_byte,
d2b36813 975 ptrdiff_t count, bool allow_quit)
63fa018d 976{
d2b36813 977 ptrdiff_t charpos, bytepos, shortage;
fa8ed3e0 978
d2b36813
DA
979 charpos = find_newline (start, start_byte, limit, limit_byte,
980 count, &shortage, &bytepos, allow_quit);
981 if (shortage)
982 TEMP_SET_PT_BOTH (limit, limit_byte);
fa8ed3e0 983 else
d2b36813
DA
984 TEMP_SET_PT_BOTH (charpos, bytepos);
985 return shortage;
63fa018d
RS
986}
987
a611149e
EZ
988/* Like find_newline, but doesn't allow QUITting and doesn't return
989 SHORTAGE. */
d311d28c 990ptrdiff_t
b5426561
DA
991find_newline_no_quit (ptrdiff_t from, ptrdiff_t frombyte,
992 ptrdiff_t cnt, ptrdiff_t *bytepos)
ca1d1d23 993{
b5426561 994 return find_newline (from, frombyte, 0, -1, cnt, NULL, bytepos, 0);
9169c321
JB
995}
996
a611149e
EZ
997/* Like find_newline, but returns position before the newline, not
998 after, and only search up to TO.
999 This isn't just find_newline_no_quit (...)-1, because you might hit TO. */
fa8ed3e0 1000
d311d28c 1001ptrdiff_t
2a14a4f1
DA
1002find_before_next_newline (ptrdiff_t from, ptrdiff_t to,
1003 ptrdiff_t cnt, ptrdiff_t *bytepos)
9169c321 1004{
d311d28c 1005 ptrdiff_t shortage;
b5426561 1006 ptrdiff_t pos = find_newline (from, -1, to, -1, cnt, &shortage, bytepos, 1);
9169c321
JB
1007
1008 if (shortage == 0)
2a14a4f1
DA
1009 {
1010 if (bytepos)
1011 DEC_BOTH (pos, *bytepos);
1012 else
1013 pos--;
1014 }
9169c321 1015 return pos;
ca1d1d23
JB
1016}
1017\f
ca1d1d23
JB
1018/* Subroutines of Lisp buffer search functions. */
1019
1020static Lisp_Object
c098fdb8 1021search_command (Lisp_Object string, Lisp_Object bound, Lisp_Object noerror,
457882c2 1022 Lisp_Object count, int direction, int RE, bool posix)
ca1d1d23 1023{
457882c2 1024 EMACS_INT np;
d311d28c
PE
1025 EMACS_INT lim;
1026 ptrdiff_t lim_byte;
a53e2e89 1027 EMACS_INT n = direction;
ca1d1d23
JB
1028
1029 if (!NILP (count))
1030 {
b7826503 1031 CHECK_NUMBER (count);
ca1d1d23
JB
1032 n *= XINT (count);
1033 }
1034
b7826503 1035 CHECK_STRING (string);
ca1d1d23 1036 if (NILP (bound))
9f43ad85
RS
1037 {
1038 if (n > 0)
1039 lim = ZV, lim_byte = ZV_BYTE;
1040 else
1041 lim = BEGV, lim_byte = BEGV_BYTE;
1042 }
ca1d1d23
JB
1043 else
1044 {
b7826503 1045 CHECK_NUMBER_COERCE_MARKER (bound);
ca1d1d23 1046 lim = XINT (bound);
6ec8bbd2 1047 if (n > 0 ? lim < PT : lim > PT)
ca1d1d23
JB
1048 error ("Invalid search bound (wrong side of point)");
1049 if (lim > ZV)
9f43ad85 1050 lim = ZV, lim_byte = ZV_BYTE;
588d2fd5 1051 else if (lim < BEGV)
9f43ad85 1052 lim = BEGV, lim_byte = BEGV_BYTE;
588d2fd5
KH
1053 else
1054 lim_byte = CHAR_TO_BYTE (lim);
ca1d1d23
JB
1055 }
1056
910c747a 1057 /* This is so set_image_of_range_1 in regex.c can find the EQV table. */
34dabdb7 1058 set_char_table_extras (BVAR (current_buffer, case_canon_table), 2,
a098c930 1059 BVAR (current_buffer, case_eqv_table));
910c747a 1060
9f43ad85 1061 np = search_buffer (string, PT, PT_BYTE, lim, lim_byte, n, RE,
4b4deea2
TT
1062 (!NILP (BVAR (current_buffer, case_fold_search))
1063 ? BVAR (current_buffer, case_canon_table)
3135e9fd 1064 : Qnil),
4b4deea2
TT
1065 (!NILP (BVAR (current_buffer, case_fold_search))
1066 ? BVAR (current_buffer, case_eqv_table)
3135e9fd 1067 : Qnil),
b819a390 1068 posix);
ca1d1d23
JB
1069 if (np <= 0)
1070 {
1071 if (NILP (noerror))
06f77a8a
KS
1072 xsignal1 (Qsearch_failed, string);
1073
ca1d1d23
JB
1074 if (!EQ (noerror, Qt))
1075 {
8ca30920 1076 eassert (BEGV <= lim && lim <= ZV);
9f43ad85 1077 SET_PT_BOTH (lim, lim_byte);
a5f217b8
RS
1078 return Qnil;
1079#if 0 /* This would be clean, but maybe programs depend on
1080 a value of nil here. */
481399bf 1081 np = lim;
a5f217b8 1082#endif
ca1d1d23 1083 }
481399bf
RS
1084 else
1085 return Qnil;
ca1d1d23
JB
1086 }
1087
8ca30920 1088 eassert (BEGV <= np && np <= ZV);
ca1d1d23
JB
1089 SET_PT (np);
1090
1091 return make_number (np);
1092}
1093\f
457882c2 1094/* Return true if REGEXP it matches just one constant string. */
fa8ed3e0 1095
457882c2 1096static bool
971de7fb 1097trivial_regexp_p (Lisp_Object regexp)
b6d6a51c 1098{
d311d28c 1099 ptrdiff_t len = SBYTES (regexp);
d5db4077 1100 unsigned char *s = SDATA (regexp);
b6d6a51c
KH
1101 while (--len >= 0)
1102 {
1103 switch (*s++)
1104 {
1105 case '.': case '*': case '+': case '?': case '[': case '^': case '$':
1106 return 0;
1107 case '\\':
1108 if (--len < 0)
1109 return 0;
1110 switch (*s++)
1111 {
1112 case '|': case '(': case ')': case '`': case '\'': case 'b':
1113 case 'B': case '<': case '>': case 'w': case 'W': case 's':
29f89fe7 1114 case 'S': case '=': case '{': case '}': case '_':
5679531d 1115 case 'c': case 'C': /* for categoryspec and notcategoryspec */
866f60fd 1116 case '1': case '2': case '3': case '4': case '5':
b6d6a51c
KH
1117 case '6': case '7': case '8': case '9':
1118 return 0;
1119 }
1120 }
1121 }
1122 return 1;
1123}
1124
ca325161 1125/* Search for the n'th occurrence of STRING in the current buffer,
ca1d1d23 1126 starting at position POS and stopping at position LIM,
b819a390 1127 treating STRING as a literal string if RE is false or as
ca1d1d23
JB
1128 a regular expression if RE is true.
1129
1130 If N is positive, searching is forward and LIM must be greater than POS.
1131 If N is negative, searching is backward and LIM must be less than POS.
1132
facdc750 1133 Returns -x if x occurrences remain to be found (x > 0),
ca1d1d23 1134 or else the position at the beginning of the Nth occurrence
b819a390
RS
1135 (if searching backward) or the end (if searching forward).
1136
1137 POSIX is nonzero if we want full backtracking (POSIX style)
1138 for this pattern. 0 means backtrack only enough to get a valid match. */
ca1d1d23 1139
aff2ce94
RS
1140#define TRANSLATE(out, trt, d) \
1141do \
1142 { \
1143 if (! NILP (trt)) \
1144 { \
1145 Lisp_Object temp; \
1146 temp = Faref (trt, make_number (d)); \
1147 if (INTEGERP (temp)) \
1148 out = XINT (temp); \
1149 else \
1150 out = d; \
1151 } \
1152 else \
1153 out = d; \
1154 } \
1155while (0)
facdc750 1156
ef887810
RS
1157/* Only used in search_buffer, to record the end position of the match
1158 when searching regexps and SEARCH_REGS should not be changed
1159 (i.e. Vinhibit_changing_match_data is non-nil). */
1160static struct re_registers search_regs_1;
1161
e7deaab0 1162static EMACS_INT
d311d28c
PE
1163search_buffer (Lisp_Object string, ptrdiff_t pos, ptrdiff_t pos_byte,
1164 ptrdiff_t lim, ptrdiff_t lim_byte, EMACS_INT n,
457882c2 1165 int RE, Lisp_Object trt, Lisp_Object inverse_trt, bool posix)
ca1d1d23 1166{
d311d28c
PE
1167 ptrdiff_t len = SCHARS (string);
1168 ptrdiff_t len_byte = SBYTES (string);
1169 register ptrdiff_t i;
ca1d1d23 1170
7074fde6
FP
1171 if (running_asynch_code)
1172 save_search_regs ();
1173
a7e4cdde 1174 /* Searching 0 times means don't move. */
ca1d1d23 1175 /* Null string is found at starting position. */
a7e4cdde 1176 if (len == 0 || n == 0)
ca325161 1177 {
0353b28f 1178 set_search_regs (pos_byte, 0);
ca325161
RS
1179 return pos;
1180 }
3f57a499 1181
41a33295 1182 if (RE && !(trivial_regexp_p (string) && NILP (Vsearch_spaces_regexp)))
ca1d1d23 1183 {
facdc750 1184 unsigned char *p1, *p2;
d311d28c 1185 ptrdiff_t s1, s2;
487282dc
KH
1186 struct re_pattern_buffer *bufp;
1187
ef887810
RS
1188 bufp = compile_pattern (string,
1189 (NILP (Vinhibit_changing_match_data)
1190 ? &search_regs : &search_regs_1),
1191 trt, posix,
4b4deea2 1192 !NILP (BVAR (current_buffer, enable_multibyte_characters)));
ca1d1d23 1193
ca1d1d23
JB
1194 immediate_quit = 1; /* Quit immediately if user types ^G,
1195 because letting this function finish
1196 can take too long. */
1197 QUIT; /* Do a pending quit right away,
1198 to avoid paradoxical behavior */
1199 /* Get pointers and sizes of the two strings
1200 that make up the visible portion of the buffer. */
1201
1202 p1 = BEGV_ADDR;
fa8ed3e0 1203 s1 = GPT_BYTE - BEGV_BYTE;
ca1d1d23 1204 p2 = GAP_END_ADDR;
fa8ed3e0 1205 s2 = ZV_BYTE - GPT_BYTE;
ca1d1d23
JB
1206 if (s1 < 0)
1207 {
1208 p2 = p1;
fa8ed3e0 1209 s2 = ZV_BYTE - BEGV_BYTE;
ca1d1d23
JB
1210 s1 = 0;
1211 }
1212 if (s2 < 0)
1213 {
fa8ed3e0 1214 s1 = ZV_BYTE - BEGV_BYTE;
ca1d1d23
JB
1215 s2 = 0;
1216 }
8bb43c28 1217 re_match_object = Qnil;
177c0ea7 1218
ca1d1d23
JB
1219 while (n < 0)
1220 {
d311d28c 1221 ptrdiff_t val;
52c55cc7 1222
487282dc 1223 val = re_search_2 (bufp, (char *) p1, s1, (char *) p2, s2,
4996330b 1224 pos_byte - BEGV_BYTE, lim_byte - pos_byte,
ef887810
RS
1225 (NILP (Vinhibit_changing_match_data)
1226 ? &search_regs : &search_regs_1),
42db823b 1227 /* Don't allow match past current point */
4996330b 1228 pos_byte - BEGV_BYTE);
ca1d1d23 1229 if (val == -2)
b6d6a51c
KH
1230 {
1231 matcher_overflow ();
1232 }
ca1d1d23
JB
1233 if (val >= 0)
1234 {
ef887810
RS
1235 if (NILP (Vinhibit_changing_match_data))
1236 {
1237 pos_byte = search_regs.start[0] + BEGV_BYTE;
1238 for (i = 0; i < search_regs.num_regs; i++)
1239 if (search_regs.start[i] >= 0)
1240 {
1241 search_regs.start[i]
1242 = BYTE_TO_CHAR (search_regs.start[i] + BEGV_BYTE);
1243 search_regs.end[i]
1244 = BYTE_TO_CHAR (search_regs.end[i] + BEGV_BYTE);
1245 }
1246 XSETBUFFER (last_thing_searched, current_buffer);
1247 /* Set pos to the new position. */
1248 pos = search_regs.start[0];
1249 }
1250 else
1251 {
1252 pos_byte = search_regs_1.start[0] + BEGV_BYTE;
1253 /* Set pos to the new position. */
1254 pos = BYTE_TO_CHAR (search_regs_1.start[0] + BEGV_BYTE);
1255 }
ca1d1d23
JB
1256 }
1257 else
1258 {
1259 immediate_quit = 0;
1260 return (n);
1261 }
1262 n++;
1263 }
1264 while (n > 0)
1265 {
d311d28c 1266 ptrdiff_t val;
52c55cc7 1267
487282dc 1268 val = re_search_2 (bufp, (char *) p1, s1, (char *) p2, s2,
4996330b 1269 pos_byte - BEGV_BYTE, lim_byte - pos_byte,
ef887810
RS
1270 (NILP (Vinhibit_changing_match_data)
1271 ? &search_regs : &search_regs_1),
4996330b 1272 lim_byte - BEGV_BYTE);
ca1d1d23 1273 if (val == -2)
b6d6a51c
KH
1274 {
1275 matcher_overflow ();
1276 }
ca1d1d23
JB
1277 if (val >= 0)
1278 {
ef887810
RS
1279 if (NILP (Vinhibit_changing_match_data))
1280 {
1281 pos_byte = search_regs.end[0] + BEGV_BYTE;
1282 for (i = 0; i < search_regs.num_regs; i++)
1283 if (search_regs.start[i] >= 0)
1284 {
1285 search_regs.start[i]
1286 = BYTE_TO_CHAR (search_regs.start[i] + BEGV_BYTE);
1287 search_regs.end[i]
1288 = BYTE_TO_CHAR (search_regs.end[i] + BEGV_BYTE);
1289 }
1290 XSETBUFFER (last_thing_searched, current_buffer);
1291 pos = search_regs.end[0];
1292 }
1293 else
1294 {
1295 pos_byte = search_regs_1.end[0] + BEGV_BYTE;
1296 pos = BYTE_TO_CHAR (search_regs_1.end[0] + BEGV_BYTE);
1297 }
ca1d1d23
JB
1298 }
1299 else
1300 {
1301 immediate_quit = 0;
1302 return (0 - n);
1303 }
1304 n--;
1305 }
1306 immediate_quit = 0;
1307 return (pos);
1308 }
1309 else /* non-RE case */
1310 {
facdc750 1311 unsigned char *raw_pattern, *pat;
d311d28c
PE
1312 ptrdiff_t raw_pattern_size;
1313 ptrdiff_t raw_pattern_size_byte;
facdc750 1314 unsigned char *patbuf;
0063fdb1 1315 bool multibyte = !NILP (BVAR (current_buffer, enable_multibyte_characters));
a967ed62 1316 unsigned char *base_pat;
49e44e81
KH
1317 /* Set to positive if we find a non-ASCII char that need
1318 translation. Otherwise set to zero later. */
1319 int char_base = -1;
457882c2 1320 bool boyer_moore_ok = 1;
facdc750
RS
1321
1322 /* MULTIBYTE says whether the text to be searched is multibyte.
1323 We must convert PATTERN to match that, or we will not really
1324 find things right. */
1325
1326 if (multibyte == STRING_MULTIBYTE (string))
1327 {
51b59d79 1328 raw_pattern = SDATA (string);
d5db4077
KR
1329 raw_pattern_size = SCHARS (string);
1330 raw_pattern_size_byte = SBYTES (string);
facdc750
RS
1331 }
1332 else if (multibyte)
1333 {
d5db4077 1334 raw_pattern_size = SCHARS (string);
facdc750 1335 raw_pattern_size_byte
d5db4077 1336 = count_size_as_multibyte (SDATA (string),
facdc750 1337 raw_pattern_size);
38182d90 1338 raw_pattern = alloca (raw_pattern_size_byte + 1);
d5db4077
KR
1339 copy_text (SDATA (string), raw_pattern,
1340 SCHARS (string), 0, 1);
facdc750
RS
1341 }
1342 else
1343 {
1344 /* Converting multibyte to single-byte.
1345
1346 ??? Perhaps this conversion should be done in a special way
1347 by subtracting nonascii-insert-offset from each non-ASCII char,
1348 so that only the multibyte chars which really correspond to
1349 the chosen single-byte character set can possibly match. */
d5db4077
KR
1350 raw_pattern_size = SCHARS (string);
1351 raw_pattern_size_byte = SCHARS (string);
38182d90 1352 raw_pattern = alloca (raw_pattern_size + 1);
d5db4077
KR
1353 copy_text (SDATA (string), raw_pattern,
1354 SBYTES (string), 1, 0);
facdc750
RS
1355 }
1356
1357 /* Copy and optionally translate the pattern. */
1358 len = raw_pattern_size;
1359 len_byte = raw_pattern_size_byte;
38182d90 1360 patbuf = alloca (len * MAX_MULTIBYTE_LENGTH);
facdc750
RS
1361 pat = patbuf;
1362 base_pat = raw_pattern;
1363 if (multibyte)
1364 {
a17ae96a
KH
1365 /* Fill patbuf by translated characters in STRING while
1366 checking if we can use boyer-moore search. If TRT is
1367 non-nil, we can use boyer-moore search only if TRT can be
1368 represented by the byte array of 256 elements. For that,
9858f6c3 1369 all non-ASCII case-equivalents of all case-sensitive
e79d60fe 1370 characters in STRING must belong to the same character
68abf867
KH
1371 group (two characters belong to the same group iff their
1372 multibyte forms are the same except for the last byte;
1373 i.e. every 64 characters form a group; U+0000..U+003F,
e79d60fe 1374 U+0040..U+007F, U+0080..U+00BF, ...). */
a17ae96a 1375
facdc750
RS
1376 while (--len >= 0)
1377 {
a17ae96a 1378 unsigned char str_base[MAX_MULTIBYTE_LENGTH], *str;
aff2ce94 1379 int c, translated, inverse;
a17ae96a 1380 int in_charlen, charlen;
facdc750
RS
1381
1382 /* If we got here and the RE flag is set, it's because we're
1383 dealing with a regexp known to be trivial, so the backslash
1384 just quotes the next character. */
1385 if (RE && *base_pat == '\\')
1386 {
1387 len--;
62221f22 1388 raw_pattern_size--;
facdc750
RS
1389 len_byte--;
1390 base_pat++;
1391 }
1392
62a6e103 1393 c = STRING_CHAR_AND_LENGTH (base_pat, in_charlen);
040272ce 1394
a17ae96a 1395 if (NILP (trt))
facdc750 1396 {
a17ae96a
KH
1397 str = base_pat;
1398 charlen = in_charlen;
1399 }
1400 else
1401 {
1402 /* Translate the character. */
1403 TRANSLATE (translated, trt, c);
1404 charlen = CHAR_STRING (translated, str_base);
1405 str = str_base;
1406
1407 /* Check if C has any other case-equivalents. */
1408 TRANSLATE (inverse, inverse_trt, c);
1409 /* If so, check if we can use boyer-moore. */
1410 if (c != inverse && boyer_moore_ok)
1411 {
1412 /* Check if all equivalents belong to the same
1413 group of characters. Note that the check of C
49e44e81
KH
1414 itself is done by the last iteration. */
1415 int this_char_base = -1;
1416
1417 while (boyer_moore_ok)
a17ae96a 1418 {
200fc949 1419 if (ASCII_CHAR_P (inverse))
a17ae96a 1420 {
49e44e81
KH
1421 if (this_char_base > 0)
1422 boyer_moore_ok = 0;
1423 else
dafaabd1 1424 this_char_base = 0;
a17ae96a 1425 }
49e44e81
KH
1426 else if (CHAR_BYTE8_P (inverse))
1427 /* Boyer-moore search can't handle a
1428 translation of an eight-bit
1429 character. */
1430 boyer_moore_ok = 0;
1431 else if (this_char_base < 0)
1432 {
1433 this_char_base = inverse & ~0x3F;
1434 if (char_base < 0)
1435 char_base = this_char_base;
dafaabd1 1436 else if (this_char_base != char_base)
49e44e81
KH
1437 boyer_moore_ok = 0;
1438 }
1439 else if ((inverse & ~0x3F) != this_char_base)
1440 boyer_moore_ok = 0;
a17ae96a
KH
1441 if (c == inverse)
1442 break;
1443 TRANSLATE (inverse, inverse_trt, inverse);
1444 }
1445 }
aff2ce94 1446 }
facdc750
RS
1447
1448 /* Store this character into the translated pattern. */
72af86bd 1449 memcpy (pat, str, charlen);
a17ae96a 1450 pat += charlen;
facdc750
RS
1451 base_pat += in_charlen;
1452 len_byte -= in_charlen;
1453 }
dafaabd1
AS
1454
1455 /* If char_base is still negative we didn't find any translated
1456 non-ASCII characters. */
1457 if (char_base < 0)
1458 char_base = 0;
facdc750
RS
1459 }
1460 else
1461 {
040272ce 1462 /* Unibyte buffer. */
a17ae96a 1463 char_base = 0;
facdc750
RS
1464 while (--len >= 0)
1465 {
e8c6e965 1466 int c, translated, inverse;
facdc750
RS
1467
1468 /* If we got here and the RE flag is set, it's because we're
1469 dealing with a regexp known to be trivial, so the backslash
1470 just quotes the next character. */
1471 if (RE && *base_pat == '\\')
1472 {
1473 len--;
0190922f 1474 raw_pattern_size--;
facdc750
RS
1475 base_pat++;
1476 }
1477 c = *base_pat++;
aff2ce94 1478 TRANSLATE (translated, trt, c);
facdc750 1479 *pat++ = translated;
e8c6e965
EZ
1480 /* Check that none of C's equivalents violates the
1481 assumptions of boyer_moore. */
1482 TRANSLATE (inverse, inverse_trt, c);
1483 while (1)
1484 {
1485 if (inverse >= 0200)
1486 {
1487 boyer_moore_ok = 0;
1488 break;
1489 }
1490 if (c == inverse)
1491 break;
1492 TRANSLATE (inverse, inverse_trt, inverse);
1493 }
facdc750
RS
1494 }
1495 }
1496
1497 len_byte = pat - patbuf;
facdc750
RS
1498 pat = base_pat = patbuf;
1499
040272ce 1500 if (boyer_moore_ok)
1db5b1ad
JB
1501 return boyer_moore (n, pat, len_byte, trt, inverse_trt,
1502 pos_byte, lim_byte,
a17ae96a 1503 char_base);
facdc750 1504 else
1db5b1ad 1505 return simple_search (n, pat, raw_pattern_size, len_byte, trt,
facdc750
RS
1506 pos, pos_byte, lim, lim_byte);
1507 }
1508}
1509\f
1510/* Do a simple string search N times for the string PAT,
1511 whose length is LEN/LEN_BYTE,
1512 from buffer position POS/POS_BYTE until LIM/LIM_BYTE.
1513 TRT is the translation table.
f8bd51c4 1514
facdc750
RS
1515 Return the character position where the match is found.
1516 Otherwise, if M matches remained to be found, return -M.
f8bd51c4 1517
facdc750
RS
1518 This kind of search works regardless of what is in PAT and
1519 regardless of what is in TRT. It is used in cases where
1520 boyer_moore cannot work. */
1521
e7deaab0 1522static EMACS_INT
c098fdb8 1523simple_search (EMACS_INT n, unsigned char *pat,
d311d28c
PE
1524 ptrdiff_t len, ptrdiff_t len_byte, Lisp_Object trt,
1525 ptrdiff_t pos, ptrdiff_t pos_byte,
1526 ptrdiff_t lim, ptrdiff_t lim_byte)
facdc750 1527{
0063fdb1
DA
1528 bool multibyte = ! NILP (BVAR (current_buffer, enable_multibyte_characters));
1529 bool forward = n > 0;
49e44e81
KH
1530 /* Number of buffer bytes matched. Note that this may be different
1531 from len_byte in a multibyte buffer. */
8ce70ed2 1532 ptrdiff_t match_byte = PTRDIFF_MIN;
facdc750
RS
1533
1534 if (lim > pos && multibyte)
1535 while (n > 0)
1536 {
1537 while (1)
f8bd51c4 1538 {
facdc750 1539 /* Try matching at position POS. */
d311d28c
PE
1540 ptrdiff_t this_pos = pos;
1541 ptrdiff_t this_pos_byte = pos_byte;
1542 ptrdiff_t this_len = len;
facdc750 1543 unsigned char *p = pat;
a9469498 1544 if (pos + len > lim || pos_byte + len_byte > lim_byte)
facdc750
RS
1545 goto stop;
1546
1547 while (this_len > 0)
1548 {
1549 int charlen, buf_charlen;
ab228c24 1550 int pat_ch, buf_ch;
facdc750 1551
62a6e103 1552 pat_ch = STRING_CHAR_AND_LENGTH (p, charlen);
facdc750 1553 buf_ch = STRING_CHAR_AND_LENGTH (BYTE_POS_ADDR (this_pos_byte),
facdc750 1554 buf_charlen);
aff2ce94 1555 TRANSLATE (buf_ch, trt, buf_ch);
facdc750
RS
1556
1557 if (buf_ch != pat_ch)
1558 break;
ab228c24 1559
ab228c24
RS
1560 this_len--;
1561 p += charlen;
1562
1563 this_pos_byte += buf_charlen;
1564 this_pos++;
facdc750
RS
1565 }
1566
1567 if (this_len == 0)
1568 {
49e44e81 1569 match_byte = this_pos_byte - pos_byte;
facdc750 1570 pos += len;
49e44e81 1571 pos_byte += match_byte;
facdc750
RS
1572 break;
1573 }
1574
1575 INC_BOTH (pos, pos_byte);
f8bd51c4 1576 }
facdc750
RS
1577
1578 n--;
1579 }
1580 else if (lim > pos)
1581 while (n > 0)
1582 {
1583 while (1)
f8bd51c4 1584 {
facdc750 1585 /* Try matching at position POS. */
d311d28c
PE
1586 ptrdiff_t this_pos = pos;
1587 ptrdiff_t this_len = len;
facdc750
RS
1588 unsigned char *p = pat;
1589
1590 if (pos + len > lim)
1591 goto stop;
1592
1593 while (this_len > 0)
1594 {
1595 int pat_ch = *p++;
1596 int buf_ch = FETCH_BYTE (this_pos);
aff2ce94 1597 TRANSLATE (buf_ch, trt, buf_ch);
facdc750
RS
1598
1599 if (buf_ch != pat_ch)
1600 break;
ab228c24
RS
1601
1602 this_len--;
1603 this_pos++;
facdc750
RS
1604 }
1605
1606 if (this_len == 0)
1607 {
49e44e81 1608 match_byte = len;
facdc750
RS
1609 pos += len;
1610 break;
1611 }
1612
1613 pos++;
f8bd51c4 1614 }
facdc750
RS
1615
1616 n--;
1617 }
1618 /* Backwards search. */
1619 else if (lim < pos && multibyte)
1620 while (n < 0)
1621 {
1622 while (1)
f8bd51c4 1623 {
facdc750 1624 /* Try matching at position POS. */
d311d28c
PE
1625 ptrdiff_t this_pos = pos;
1626 ptrdiff_t this_pos_byte = pos_byte;
1627 ptrdiff_t this_len = len;
c525b3f2 1628 const unsigned char *p = pat + len_byte;
facdc750 1629
8b264ecb 1630 if (this_pos - len < lim || (pos_byte - len_byte) < lim_byte)
facdc750
RS
1631 goto stop;
1632
1633 while (this_len > 0)
1634 {
ab228c24 1635 int pat_ch, buf_ch;
facdc750 1636
8b264ecb
AS
1637 DEC_BOTH (this_pos, this_pos_byte);
1638 PREV_CHAR_BOUNDARY (p, pat);
1639 pat_ch = STRING_CHAR (p);
1640 buf_ch = STRING_CHAR (BYTE_POS_ADDR (this_pos_byte));
aff2ce94 1641 TRANSLATE (buf_ch, trt, buf_ch);
facdc750
RS
1642
1643 if (buf_ch != pat_ch)
1644 break;
ab228c24 1645
ab228c24 1646 this_len--;
facdc750
RS
1647 }
1648
1649 if (this_len == 0)
1650 {
8b264ecb
AS
1651 match_byte = pos_byte - this_pos_byte;
1652 pos = this_pos;
1653 pos_byte = this_pos_byte;
facdc750
RS
1654 break;
1655 }
1656
1657 DEC_BOTH (pos, pos_byte);
f8bd51c4
KH
1658 }
1659
facdc750
RS
1660 n++;
1661 }
1662 else if (lim < pos)
1663 while (n < 0)
1664 {
1665 while (1)
b6d6a51c 1666 {
facdc750 1667 /* Try matching at position POS. */
d311d28c
PE
1668 ptrdiff_t this_pos = pos - len;
1669 ptrdiff_t this_len = len;
facdc750
RS
1670 unsigned char *p = pat;
1671
a9469498 1672 if (this_pos < lim)
facdc750
RS
1673 goto stop;
1674
1675 while (this_len > 0)
1676 {
1677 int pat_ch = *p++;
1678 int buf_ch = FETCH_BYTE (this_pos);
aff2ce94 1679 TRANSLATE (buf_ch, trt, buf_ch);
facdc750
RS
1680
1681 if (buf_ch != pat_ch)
1682 break;
ab228c24
RS
1683 this_len--;
1684 this_pos++;
facdc750
RS
1685 }
1686
1687 if (this_len == 0)
b6d6a51c 1688 {
49e44e81 1689 match_byte = len;
facdc750
RS
1690 pos -= len;
1691 break;
b6d6a51c 1692 }
facdc750
RS
1693
1694 pos--;
b6d6a51c 1695 }
facdc750
RS
1696
1697 n++;
b6d6a51c 1698 }
facdc750
RS
1699
1700 stop:
1701 if (n == 0)
aff2ce94 1702 {
8ce70ed2 1703 eassert (match_byte != PTRDIFF_MIN);
ab228c24 1704 if (forward)
49e44e81 1705 set_search_regs ((multibyte ? pos_byte : pos) - match_byte, match_byte);
ab228c24 1706 else
49e44e81 1707 set_search_regs (multibyte ? pos_byte : pos, match_byte);
aff2ce94
RS
1708
1709 return pos;
1710 }
facdc750
RS
1711 else if (n > 0)
1712 return -n;
1713 else
1714 return n;
1715}
1716\f
0190922f 1717/* Do Boyer-Moore search N times for the string BASE_PAT,
1db5b1ad
JB
1718 whose length is LEN_BYTE,
1719 from buffer position POS_BYTE until LIM_BYTE.
facdc750
RS
1720 DIRECTION says which direction we search in.
1721 TRT and INVERSE_TRT are translation tables.
0190922f 1722 Characters in PAT are already translated by TRT.
facdc750 1723
0190922f
KH
1724 This kind of search works if all the characters in BASE_PAT that
1725 have nontrivial translation are the same aside from the last byte.
1726 This makes it possible to translate just the last byte of a
1727 character, and do so after just a simple test of the context.
b2e6b10f 1728 CHAR_BASE is nonzero if there is such a non-ASCII character.
facdc750
RS
1729
1730 If that criterion is not satisfied, do not call this function. */
1731
e7deaab0 1732static EMACS_INT
c098fdb8 1733boyer_moore (EMACS_INT n, unsigned char *base_pat,
d311d28c 1734 ptrdiff_t len_byte,
d5a3eaaf 1735 Lisp_Object trt, Lisp_Object inverse_trt,
d311d28c 1736 ptrdiff_t pos_byte, ptrdiff_t lim_byte,
1db5b1ad 1737 int char_base)
facdc750
RS
1738{
1739 int direction = ((n > 0) ? 1 : -1);
d311d28c
PE
1740 register ptrdiff_t dirlen;
1741 ptrdiff_t limit;
e7deaab0 1742 int stride_for_teases = 0;
f4646fff 1743 int BM_tab[0400];
177c0ea7 1744 register unsigned char *cursor, *p_limit;
d311d28c 1745 register ptrdiff_t i;
c098fdb8 1746 register int j;
cb6792d2 1747 unsigned char *pat, *pat_end;
0063fdb1 1748 bool multibyte = ! NILP (BVAR (current_buffer, enable_multibyte_characters));
facdc750
RS
1749
1750 unsigned char simple_translate[0400];
0190922f 1751 /* These are set to the preceding bytes of a byte to be translated
49e44e81 1752 if char_base is nonzero. As the maximum byte length of a
a17ae96a 1753 multibyte character is 5, we have to check at most four previous
0190922f
KH
1754 bytes. */
1755 int translate_prev_byte1 = 0;
1756 int translate_prev_byte2 = 0;
1757 int translate_prev_byte3 = 0;
facdc750 1758
f4646fff
AS
1759 /* The general approach is that we are going to maintain that we know
1760 the first (closest to the present position, in whatever direction
1761 we're searching) character that could possibly be the last
1762 (furthest from present position) character of a valid match. We
1763 advance the state of our knowledge by looking at that character
1764 and seeing whether it indeed matches the last character of the
1765 pattern. If it does, we take a closer look. If it does not, we
1766 move our pointer (to putative last characters) as far as is
1767 logically possible. This amount of movement, which I call a
1768 stride, will be the length of the pattern if the actual character
1769 appears nowhere in the pattern, otherwise it will be the distance
1770 from the last occurrence of that character to the end of the
1771 pattern. If the amount is zero we have a possible match. */
1772
1773 /* Here we make a "mickey mouse" BM table. The stride of the search
1774 is determined only by the last character of the putative match.
1775 If that character does not match, we will stride the proper
1776 distance to propose a match that superimposes it on the last
1777 instance of a character that matches it (per trt), or misses
1778 it entirely if there is none. */
facdc750
RS
1779
1780 dirlen = len_byte * direction;
cb6792d2
RS
1781
1782 /* Record position after the end of the pattern. */
1783 pat_end = base_pat + len_byte;
1784 /* BASE_PAT points to a character that we start scanning from.
1785 It is the first character in a forward search,
1786 the last character in a backward search. */
facdc750 1787 if (direction < 0)
cb6792d2
RS
1788 base_pat = pat_end - 1;
1789
f4646fff
AS
1790 /* A character that does not appear in the pattern induces a
1791 stride equal to the pattern length. */
1792 for (i = 0; i < 0400; i++)
1793 BM_tab[i] = dirlen;
facdc750
RS
1794
1795 /* We use this for translation, instead of TRT itself.
1796 We fill this in to handle the characters that actually
1797 occur in the pattern. Others don't matter anyway! */
facdc750
RS
1798 for (i = 0; i < 0400; i++)
1799 simple_translate[i] = i;
1800
a17ae96a 1801 if (char_base)
0190922f 1802 {
a17ae96a 1803 /* Setup translate_prev_byte1/2/3/4 from CHAR_BASE. Only a
0190922f 1804 byte following them are the target of translation. */
0190922f 1805 unsigned char str[MAX_MULTIBYTE_LENGTH];
1f3561e4 1806 int cblen = CHAR_STRING (char_base, str);
0190922f 1807
1f3561e4
PE
1808 translate_prev_byte1 = str[cblen - 2];
1809 if (cblen > 2)
0190922f 1810 {
1f3561e4
PE
1811 translate_prev_byte2 = str[cblen - 3];
1812 if (cblen > 3)
1f1d9321 1813 translate_prev_byte3 = str[cblen - 4];
0190922f
KH
1814 }
1815 }
1816
facdc750 1817 i = 0;
f4646fff 1818 while (i != dirlen)
facdc750 1819 {
cb6792d2 1820 unsigned char *ptr = base_pat + i;
facdc750 1821 i += direction;
facdc750 1822 if (! NILP (trt))
ca1d1d23 1823 {
49e44e81
KH
1824 /* If the byte currently looking at is the last of a
1825 character to check case-equivalents, set CH to that
1826 character. An ASCII character and a non-ASCII character
1827 matching with CHAR_BASE are to be checked. */
a17ae96a
KH
1828 int ch = -1;
1829
200fc949 1830 if (ASCII_CHAR_P (*ptr) || ! multibyte)
a17ae96a 1831 ch = *ptr;
49e44e81 1832 else if (char_base
86dc6ccb 1833 && ((pat_end - ptr) == 1 || CHAR_HEAD_P (ptr[1])))
ca1d1d23 1834 {
49e44e81
KH
1835 unsigned char *charstart = ptr - 1;
1836
1837 while (! (CHAR_HEAD_P (*charstart)))
1838 charstart--;
62a6e103 1839 ch = STRING_CHAR (charstart);
a17ae96a
KH
1840 if (char_base != (ch & ~0x3F))
1841 ch = -1;
ca1d1d23 1842 }
facdc750 1843
edb7b4dc 1844 if (ch >= 0200 && multibyte)
49e44e81
KH
1845 j = (ch & 0x3F) | 0200;
1846 else
1847 j = *ptr;
1848
f4646fff 1849 if (i == dirlen)
facdc750 1850 stride_for_teases = BM_tab[j];
ab228c24 1851
facdc750 1852 BM_tab[j] = dirlen - i;
1db5b1ad
JB
1853 /* A translation table is accompanied by its inverse -- see
1854 comment following downcase_table for details. */
a17ae96a 1855 if (ch >= 0)
ab228c24
RS
1856 {
1857 int starting_ch = ch;
49e44e81 1858 int starting_j = j;
a17ae96a 1859
ab228c24
RS
1860 while (1)
1861 {
1862 TRANSLATE (ch, inverse_trt, ch);
edb7b4dc 1863 if (ch >= 0200 && multibyte)
49e44e81 1864 j = (ch & 0x3F) | 0200;
ab228c24 1865 else
a17ae96a 1866 j = ch;
ab228c24
RS
1867
1868 /* For all the characters that map into CH,
1869 set up simple_translate to map the last byte
1870 into STARTING_J. */
1871 simple_translate[j] = starting_j;
1872 if (ch == starting_ch)
1873 break;
1874 BM_tab[j] = dirlen - i;
1875 }
1876 }
facdc750
RS
1877 }
1878 else
1879 {
1880 j = *ptr;
1881
f4646fff 1882 if (i == dirlen)
facdc750
RS
1883 stride_for_teases = BM_tab[j];
1884 BM_tab[j] = dirlen - i;
ca1d1d23 1885 }
f4646fff
AS
1886 /* stride_for_teases tells how much to stride if we get a
1887 match on the far character but are subsequently
1888 disappointed, by recording what the stride would have been
1889 for that character if the last character had been
1890 different. */
facdc750 1891 }
facdc750
RS
1892 pos_byte += dirlen - ((direction > 0) ? direction : 0);
1893 /* loop invariant - POS_BYTE points at where last char (first
1894 char if reverse) of pattern would align in a possible match. */
1895 while (n != 0)
1896 {
d311d28c 1897 ptrdiff_t tail_end;
facdc750
RS
1898 unsigned char *tail_end_ptr;
1899
1900 /* It's been reported that some (broken) compiler thinks that
1901 Boolean expressions in an arithmetic context are unsigned.
1902 Using an explicit ?1:0 prevents this. */
1903 if ((lim_byte - pos_byte - ((direction > 0) ? 1 : 0)) * direction
1904 < 0)
1905 return (n * (0 - direction));
1906 /* First we do the part we can by pointers (maybe nothing) */
1907 QUIT;
1908 pat = base_pat;
1909 limit = pos_byte - dirlen + direction;
67ce527d
KH
1910 if (direction > 0)
1911 {
1912 limit = BUFFER_CEILING_OF (limit);
1913 /* LIMIT is now the last (not beyond-last!) value POS_BYTE
1914 can take on without hitting edge of buffer or the gap. */
1915 limit = min (limit, pos_byte + 20000);
1916 limit = min (limit, lim_byte - 1);
1917 }
1918 else
1919 {
1920 limit = BUFFER_FLOOR_OF (limit);
1921 /* LIMIT is now the last (not beyond-last!) value POS_BYTE
1922 can take on without hitting edge of buffer or the gap. */
1923 limit = max (limit, pos_byte - 20000);
1924 limit = max (limit, lim_byte);
1925 }
facdc750
RS
1926 tail_end = BUFFER_CEILING_OF (pos_byte) + 1;
1927 tail_end_ptr = BYTE_POS_ADDR (tail_end);
1928
1929 if ((limit - pos_byte) * direction > 20)
ca1d1d23 1930 {
facdc750
RS
1931 unsigned char *p2;
1932
1933 p_limit = BYTE_POS_ADDR (limit);
1934 p2 = (cursor = BYTE_POS_ADDR (pos_byte));
f4646fff 1935 /* In this loop, pos + cursor - p2 is the surrogate for pos. */
facdc750 1936 while (1) /* use one cursor setting as long as i can */
ca1d1d23 1937 {
facdc750 1938 if (direction > 0) /* worth duplicating */
ca1d1d23 1939 {
f4646fff
AS
1940 while (cursor <= p_limit)
1941 {
1942 if (BM_tab[*cursor] == 0)
1943 goto hit;
facdc750 1944 cursor += BM_tab[*cursor];
f4646fff 1945 }
facdc750
RS
1946 }
1947 else
1948 {
f4646fff
AS
1949 while (cursor >= p_limit)
1950 {
1951 if (BM_tab[*cursor] == 0)
1952 goto hit;
facdc750 1953 cursor += BM_tab[*cursor];
f4646fff 1954 }
facdc750 1955 }
f4646fff
AS
1956 /* If you are here, cursor is beyond the end of the
1957 searched region. You fail to match within the
1958 permitted region and would otherwise try a character
1959 beyond that region. */
1960 break;
1961
1962 hit:
facdc750
RS
1963 i = dirlen - direction;
1964 if (! NILP (trt))
1965 {
1966 while ((i -= direction) + direction != 0)
ca1d1d23 1967 {
facdc750
RS
1968 int ch;
1969 cursor -= direction;
1970 /* Translate only the last byte of a character. */
1971 if (! multibyte
1972 || ((cursor == tail_end_ptr
1973 || CHAR_HEAD_P (cursor[1]))
1974 && (CHAR_HEAD_P (cursor[0])
0190922f 1975 /* Check if this is the last byte of
cd1181db 1976 a translatable character. */
0190922f
KH
1977 || (translate_prev_byte1 == cursor[-1]
1978 && (CHAR_HEAD_P (translate_prev_byte1)
1979 || (translate_prev_byte2 == cursor[-2]
1980 && (CHAR_HEAD_P (translate_prev_byte2)
1981 || (translate_prev_byte3 == cursor[-3]))))))))
facdc750
RS
1982 ch = simple_translate[*cursor];
1983 else
1984 ch = *cursor;
1985 if (pat[i] != ch)
1986 break;
ca1d1d23 1987 }
facdc750
RS
1988 }
1989 else
1990 {
1991 while ((i -= direction) + direction != 0)
ca1d1d23 1992 {
facdc750
RS
1993 cursor -= direction;
1994 if (pat[i] != *cursor)
1995 break;
ca1d1d23 1996 }
facdc750
RS
1997 }
1998 cursor += dirlen - i - direction; /* fix cursor */
1999 if (i + direction == 0)
2000 {
d311d28c 2001 ptrdiff_t position, start, end;
0c8533c6 2002
facdc750 2003 cursor -= direction;
1113d9db 2004
facdc750
RS
2005 position = pos_byte + cursor - p2 + ((direction > 0)
2006 ? 1 - len_byte : 0);
2007 set_search_regs (position, len_byte);
ca325161 2008
ef887810
RS
2009 if (NILP (Vinhibit_changing_match_data))
2010 {
2011 start = search_regs.start[0];
2012 end = search_regs.end[0];
2013 }
2014 else
2015 /* If Vinhibit_changing_match_data is non-nil,
2016 search_regs will not be changed. So let's
2017 compute start and end here. */
2018 {
2019 start = BYTE_TO_CHAR (position);
2020 end = BYTE_TO_CHAR (position + len_byte);
2021 }
2022
facdc750
RS
2023 if ((n -= direction) != 0)
2024 cursor += dirlen; /* to resume search */
ca1d1d23 2025 else
ef887810 2026 return direction > 0 ? end : start;
ca1d1d23 2027 }
facdc750
RS
2028 else
2029 cursor += stride_for_teases; /* <sigh> we lose - */
ca1d1d23 2030 }
facdc750
RS
2031 pos_byte += cursor - p2;
2032 }
2033 else
f4646fff
AS
2034 /* Now we'll pick up a clump that has to be done the hard
2035 way because it covers a discontinuity. */
facdc750
RS
2036 {
2037 limit = ((direction > 0)
2038 ? BUFFER_CEILING_OF (pos_byte - dirlen + 1)
2039 : BUFFER_FLOOR_OF (pos_byte - dirlen - 1));
2040 limit = ((direction > 0)
2041 ? min (limit + len_byte, lim_byte - 1)
2042 : max (limit - len_byte, lim_byte));
2043 /* LIMIT is now the last value POS_BYTE can have
2044 and still be valid for a possible match. */
2045 while (1)
ca1d1d23 2046 {
f4646fff
AS
2047 /* This loop can be coded for space rather than
2048 speed because it will usually run only once.
2049 (the reach is at most len + 21, and typically
2050 does not exceed len). */
facdc750 2051 while ((limit - pos_byte) * direction >= 0)
f4646fff
AS
2052 {
2053 int ch = FETCH_BYTE (pos_byte);
2054 if (BM_tab[ch] == 0)
2055 goto hit2;
2056 pos_byte += BM_tab[ch];
2057 }
2058 break; /* ran off the end */
2059
2060 hit2:
2061 /* Found what might be a match. */
facdc750
RS
2062 i = dirlen - direction;
2063 while ((i -= direction) + direction != 0)
ca1d1d23 2064 {
facdc750
RS
2065 int ch;
2066 unsigned char *ptr;
2067 pos_byte -= direction;
2068 ptr = BYTE_POS_ADDR (pos_byte);
2069 /* Translate only the last byte of a character. */
2070 if (! multibyte
2071 || ((ptr == tail_end_ptr
2072 || CHAR_HEAD_P (ptr[1]))
2073 && (CHAR_HEAD_P (ptr[0])
0190922f 2074 /* Check if this is the last byte of a
cd1181db 2075 translatable character. */
0190922f
KH
2076 || (translate_prev_byte1 == ptr[-1]
2077 && (CHAR_HEAD_P (translate_prev_byte1)
2078 || (translate_prev_byte2 == ptr[-2]
2079 && (CHAR_HEAD_P (translate_prev_byte2)
2080 || translate_prev_byte3 == ptr[-3])))))))
facdc750
RS
2081 ch = simple_translate[*ptr];
2082 else
2083 ch = *ptr;
2084 if (pat[i] != ch)
2085 break;
2086 }
2087 /* Above loop has moved POS_BYTE part or all the way
2088 back to the first pos (last pos if reverse).
2089 Set it once again at the last (first if reverse) char. */
f4646fff 2090 pos_byte += dirlen - i - direction;
facdc750
RS
2091 if (i + direction == 0)
2092 {
d311d28c 2093 ptrdiff_t position, start, end;
facdc750 2094 pos_byte -= direction;
1113d9db 2095
facdc750 2096 position = pos_byte + ((direction > 0) ? 1 - len_byte : 0);
facdc750 2097 set_search_regs (position, len_byte);
ca325161 2098
ef887810
RS
2099 if (NILP (Vinhibit_changing_match_data))
2100 {
2101 start = search_regs.start[0];
2102 end = search_regs.end[0];
2103 }
2104 else
2105 /* If Vinhibit_changing_match_data is non-nil,
2106 search_regs will not be changed. So let's
2107 compute start and end here. */
2108 {
2109 start = BYTE_TO_CHAR (position);
2110 end = BYTE_TO_CHAR (position + len_byte);
2111 }
2112
facdc750
RS
2113 if ((n -= direction) != 0)
2114 pos_byte += dirlen; /* to resume search */
ca1d1d23 2115 else
ef887810 2116 return direction > 0 ? end : start;
ca1d1d23 2117 }
facdc750
RS
2118 else
2119 pos_byte += stride_for_teases;
2120 }
2121 }
2122 /* We have done one clump. Can we continue? */
2123 if ((lim_byte - pos_byte) * direction < 0)
2124 return ((0 - n) * direction);
ca1d1d23 2125 }
facdc750 2126 return BYTE_TO_CHAR (pos_byte);
ca1d1d23 2127}
ca325161 2128
fa8ed3e0 2129/* Record beginning BEG_BYTE and end BEG_BYTE + NBYTES
a7e4cdde
RS
2130 for the overall match just found in the current buffer.
2131 Also clear out the match data for registers 1 and up. */
ca325161
RS
2132
2133static void
d311d28c 2134set_search_regs (ptrdiff_t beg_byte, ptrdiff_t nbytes)
ca325161 2135{
d311d28c 2136 ptrdiff_t i;
a7e4cdde 2137
ef887810
RS
2138 if (!NILP (Vinhibit_changing_match_data))
2139 return;
2140
ca325161
RS
2141 /* Make sure we have registers in which to store
2142 the match position. */
2143 if (search_regs.num_regs == 0)
2144 {
23f86fce
DA
2145 search_regs.start = xmalloc (2 * sizeof (regoff_t));
2146 search_regs.end = xmalloc (2 * sizeof (regoff_t));
487282dc 2147 search_regs.num_regs = 2;
ca325161
RS
2148 }
2149
a7e4cdde
RS
2150 /* Clear out the other registers. */
2151 for (i = 1; i < search_regs.num_regs; i++)
2152 {
2153 search_regs.start[i] = -1;
2154 search_regs.end[i] = -1;
2155 }
2156
fa8ed3e0
RS
2157 search_regs.start[0] = BYTE_TO_CHAR (beg_byte);
2158 search_regs.end[0] = BYTE_TO_CHAR (beg_byte + nbytes);
a3668d92 2159 XSETBUFFER (last_thing_searched, current_buffer);
ca325161 2160}
ca1d1d23 2161\f
ca1d1d23 2162DEFUN ("search-backward", Fsearch_backward, Ssearch_backward, 1, 4,
8c1a1077
PJ
2163 "MSearch backward: ",
2164 doc: /* Search backward from point for STRING.
2165Set point to the beginning of the occurrence found, and return point.
2166An optional second argument bounds the search; it is a buffer position.
2167The match found must not extend before that position.
2168Optional third argument, if t, means if fail just return nil (no error).
2169 If not nil and not t, position at limit of search and return nil.
acc28cb9
CY
2170Optional fourth argument COUNT, if non-nil, means to search for COUNT
2171 successive occurrences. If COUNT is negative, search forward,
2172 instead of backward, for -COUNT occurrences.
8c1a1077
PJ
2173
2174Search case-sensitivity is determined by the value of the variable
2175`case-fold-search', which see.
2176
2177See also the functions `match-beginning', `match-end' and `replace-match'. */)
5842a27b 2178 (Lisp_Object string, Lisp_Object bound, Lisp_Object noerror, Lisp_Object count)
ca1d1d23 2179{
b819a390 2180 return search_command (string, bound, noerror, count, -1, 0, 0);
ca1d1d23
JB
2181}
2182
6af43974 2183DEFUN ("search-forward", Fsearch_forward, Ssearch_forward, 1, 4, "MSearch: ",
8c1a1077
PJ
2184 doc: /* Search forward from point for STRING.
2185Set point to the end of the occurrence found, and return point.
2186An optional second argument bounds the search; it is a buffer position.
d6aacdcd 2187The match found must not extend after that position. A value of nil is
48740f01 2188 equivalent to (point-max).
8c1a1077
PJ
2189Optional third argument, if t, means if fail just return nil (no error).
2190 If not nil and not t, move to limit of search and return nil.
acc28cb9
CY
2191Optional fourth argument COUNT, if non-nil, means to search for COUNT
2192 successive occurrences. If COUNT is negative, search backward,
2193 instead of forward, for -COUNT occurrences.
8c1a1077
PJ
2194
2195Search case-sensitivity is determined by the value of the variable
2196`case-fold-search', which see.
2197
2198See also the functions `match-beginning', `match-end' and `replace-match'. */)
5842a27b 2199 (Lisp_Object string, Lisp_Object bound, Lisp_Object noerror, Lisp_Object count)
ca1d1d23 2200{
b819a390 2201 return search_command (string, bound, noerror, count, 1, 0, 0);
ca1d1d23
JB
2202}
2203
ca1d1d23 2204DEFUN ("re-search-backward", Fre_search_backward, Sre_search_backward, 1, 4,
8c1a1077
PJ
2205 "sRE search backward: ",
2206 doc: /* Search backward from point for match for regular expression REGEXP.
2207Set point to the beginning of the match, and return point.
2208The match found is the one starting last in the buffer
2209and yet ending before the origin of the search.
2210An optional second argument bounds the search; it is a buffer position.
2211The match found must start at or after that position.
2212Optional third argument, if t, means if fail just return nil (no error).
2213 If not nil and not t, move to limit of search and return nil.
2214Optional fourth argument is repeat count--search for successive occurrences.
49080b10
LMI
2215
2216Search case-sensitivity is determined by the value of the variable
2217`case-fold-search', which see.
2218
8c1a1077
PJ
2219See also the functions `match-beginning', `match-end', `match-string',
2220and `replace-match'. */)
5842a27b 2221 (Lisp_Object regexp, Lisp_Object bound, Lisp_Object noerror, Lisp_Object count)
ca1d1d23 2222{
b819a390 2223 return search_command (regexp, bound, noerror, count, -1, 1, 0);
ca1d1d23
JB
2224}
2225
2226DEFUN ("re-search-forward", Fre_search_forward, Sre_search_forward, 1, 4,
8c1a1077
PJ
2227 "sRE search: ",
2228 doc: /* Search forward from point for regular expression REGEXP.
2229Set point to the end of the occurrence found, and return point.
2230An optional second argument bounds the search; it is a buffer position.
2231The match found must not extend after that position.
2232Optional third argument, if t, means if fail just return nil (no error).
2233 If not nil and not t, move to limit of search and return nil.
2234Optional fourth argument is repeat count--search for successive occurrences.
49080b10
LMI
2235
2236Search case-sensitivity is determined by the value of the variable
2237`case-fold-search', which see.
2238
8c1a1077
PJ
2239See also the functions `match-beginning', `match-end', `match-string',
2240and `replace-match'. */)
5842a27b 2241 (Lisp_Object regexp, Lisp_Object bound, Lisp_Object noerror, Lisp_Object count)
ca1d1d23 2242{
b819a390
RS
2243 return search_command (regexp, bound, noerror, count, 1, 1, 0);
2244}
2245
2246DEFUN ("posix-search-backward", Fposix_search_backward, Sposix_search_backward, 1, 4,
8c1a1077
PJ
2247 "sPosix search backward: ",
2248 doc: /* Search backward from point for match for regular expression REGEXP.
2249Find the longest match in accord with Posix regular expression rules.
2250Set point to the beginning of the match, and return point.
2251The match found is the one starting last in the buffer
2252and yet ending before the origin of the search.
2253An optional second argument bounds the search; it is a buffer position.
2254The match found must start at or after that position.
2255Optional third argument, if t, means if fail just return nil (no error).
2256 If not nil and not t, move to limit of search and return nil.
2257Optional fourth argument is repeat count--search for successive occurrences.
49080b10
LMI
2258
2259Search case-sensitivity is determined by the value of the variable
2260`case-fold-search', which see.
2261
8c1a1077
PJ
2262See also the functions `match-beginning', `match-end', `match-string',
2263and `replace-match'. */)
5842a27b 2264 (Lisp_Object regexp, Lisp_Object bound, Lisp_Object noerror, Lisp_Object count)
b819a390
RS
2265{
2266 return search_command (regexp, bound, noerror, count, -1, 1, 1);
2267}
2268
2269DEFUN ("posix-search-forward", Fposix_search_forward, Sposix_search_forward, 1, 4,
8c1a1077
PJ
2270 "sPosix search: ",
2271 doc: /* Search forward from point for regular expression REGEXP.
2272Find the longest match in accord with Posix regular expression rules.
2273Set point to the end of the occurrence found, and return point.
2274An optional second argument bounds the search; it is a buffer position.
2275The match found must not extend after that position.
2276Optional third argument, if t, means if fail just return nil (no error).
2277 If not nil and not t, move to limit of search and return nil.
2278Optional fourth argument is repeat count--search for successive occurrences.
49080b10
LMI
2279
2280Search case-sensitivity is determined by the value of the variable
2281`case-fold-search', which see.
2282
8c1a1077
PJ
2283See also the functions `match-beginning', `match-end', `match-string',
2284and `replace-match'. */)
5842a27b 2285 (Lisp_Object regexp, Lisp_Object bound, Lisp_Object noerror, Lisp_Object count)
b819a390
RS
2286{
2287 return search_command (regexp, bound, noerror, count, 1, 1, 1);
ca1d1d23
JB
2288}
2289\f
d7a5ad5f 2290DEFUN ("replace-match", Freplace_match, Sreplace_match, 1, 5, 0,
8c1a1077 2291 doc: /* Replace text matched by last search with NEWTEXT.
4dd0c271
RS
2292Leave point at the end of the replacement text.
2293
c88b867f
CY
2294If optional second arg FIXEDCASE is non-nil, do not alter the case of
2295the replacement text. Otherwise, maybe capitalize the whole text, or
2296maybe just word initials, based on the replaced text. If the replaced
2297text has only capital letters and has at least one multiletter word,
2298convert NEWTEXT to all caps. Otherwise if all words are capitalized
2299in the replaced text, capitalize each word in NEWTEXT.
2300
2301If optional third arg LITERAL is non-nil, insert NEWTEXT literally.
8c1a1077
PJ
2302Otherwise treat `\\' as special:
2303 `\\&' in NEWTEXT means substitute original matched text.
2304 `\\N' means substitute what matched the Nth `\\(...\\)'.
2305 If Nth parens didn't match, substitute nothing.
2306 `\\\\' means insert one `\\'.
d32e47af
LM
2307 `\\?' is treated literally
2308 (for compatibility with `query-replace-regexp').
2309 Any other character following `\\' signals an error.
4dd0c271
RS
2310Case conversion does not apply to these substitutions.
2311
c88b867f
CY
2312If optional fourth argument STRING is non-nil, it should be a string
2313to act on; this should be the string on which the previous match was
2314done via `string-match'. In this case, `replace-match' creates and
2315returns a new string, made by copying STRING and replacing the part of
2316STRING that was matched (the original STRING itself is not altered).
8c1a1077
PJ
2317
2318The optional fifth argument SUBEXP specifies a subexpression;
2319it says to replace just that subexpression with NEWTEXT,
2320rather than replacing the entire matched text.
2321This is, in a vague sense, the inverse of using `\\N' in NEWTEXT;
2322`\\N' copies subexp N into NEWTEXT, but using N as SUBEXP puts
2323NEWTEXT in place of subexp N.
2324This is useful only after a regular expression search or match,
2325since only regular expressions have distinguished subexpressions. */)
5842a27b 2326 (Lisp_Object newtext, Lisp_Object fixedcase, Lisp_Object literal, Lisp_Object string, Lisp_Object subexp)
ca1d1d23
JB
2327{
2328 enum { nochange, all_caps, cap_initial } case_action;
457882c2
PE
2329 ptrdiff_t pos, pos_byte;
2330 bool some_multiletter_word;
2331 bool some_lowercase;
2332 bool some_uppercase;
2333 bool some_nonuppercase_initial;
2334 int c, prevc;
a0efffc8 2335 ptrdiff_t sub;
d311d28c 2336 ptrdiff_t opoint, newpoint;
ca1d1d23 2337
b7826503 2338 CHECK_STRING (newtext);
ca1d1d23 2339
080c45fd 2340 if (! NILP (string))
b7826503 2341 CHECK_STRING (string);
080c45fd 2342
ca1d1d23
JB
2343 case_action = nochange; /* We tried an initialization */
2344 /* but some C compilers blew it */
4746118a
JB
2345
2346 if (search_regs.num_regs <= 0)
d72cdbfc 2347 error ("`replace-match' called before any match found");
4746118a 2348
d7a5ad5f
RS
2349 if (NILP (subexp))
2350 sub = 0;
2351 else
2352 {
b7826503 2353 CHECK_NUMBER (subexp);
a0efffc8 2354 if (! (0 <= XINT (subexp) && XINT (subexp) < search_regs.num_regs))
d7a5ad5f 2355 args_out_of_range (subexp, make_number (search_regs.num_regs));
a0efffc8 2356 sub = XINT (subexp);
d7a5ad5f
RS
2357 }
2358
080c45fd
RS
2359 if (NILP (string))
2360 {
d7a5ad5f
RS
2361 if (search_regs.start[sub] < BEGV
2362 || search_regs.start[sub] > search_regs.end[sub]
2363 || search_regs.end[sub] > ZV)
2364 args_out_of_range (make_number (search_regs.start[sub]),
2365 make_number (search_regs.end[sub]));
080c45fd
RS
2366 }
2367 else
2368 {
d7a5ad5f
RS
2369 if (search_regs.start[sub] < 0
2370 || search_regs.start[sub] > search_regs.end[sub]
d5db4077 2371 || search_regs.end[sub] > SCHARS (string))
d7a5ad5f
RS
2372 args_out_of_range (make_number (search_regs.start[sub]),
2373 make_number (search_regs.end[sub]));
080c45fd 2374 }
ca1d1d23
JB
2375
2376 if (NILP (fixedcase))
2377 {
2378 /* Decide how to casify by examining the matched text. */
d311d28c 2379 ptrdiff_t last;
ca1d1d23 2380
ac3b28b1
KH
2381 pos = search_regs.start[sub];
2382 last = search_regs.end[sub];
fa8ed3e0
RS
2383
2384 if (NILP (string))
ac3b28b1 2385 pos_byte = CHAR_TO_BYTE (pos);
fa8ed3e0 2386 else
ac3b28b1 2387 pos_byte = string_char_to_byte (string, pos);
fa8ed3e0 2388
ca1d1d23
JB
2389 prevc = '\n';
2390 case_action = all_caps;
2391
2392 /* some_multiletter_word is set nonzero if any original word
2393 is more than one letter long. */
2394 some_multiletter_word = 0;
97832bd0 2395 some_lowercase = 0;
208767c3 2396 some_nonuppercase_initial = 0;
73dc8771 2397 some_uppercase = 0;
ca1d1d23 2398
ac3b28b1 2399 while (pos < last)
ca1d1d23 2400 {
080c45fd 2401 if (NILP (string))
ac3b28b1 2402 {
93daa011 2403 c = FETCH_CHAR_AS_MULTIBYTE (pos_byte);
ac3b28b1
KH
2404 INC_BOTH (pos, pos_byte);
2405 }
080c45fd 2406 else
93daa011 2407 FETCH_STRING_CHAR_AS_MULTIBYTE_ADVANCE (c, string, pos, pos_byte);
080c45fd 2408
5da9919f 2409 if (lowercasep (c))
ca1d1d23
JB
2410 {
2411 /* Cannot be all caps if any original char is lower case */
2412
97832bd0 2413 some_lowercase = 1;
ca1d1d23 2414 if (SYNTAX (prevc) != Sword)
208767c3 2415 some_nonuppercase_initial = 1;
ca1d1d23
JB
2416 else
2417 some_multiletter_word = 1;
2418 }
5da9919f 2419 else if (uppercasep (c))
ca1d1d23 2420 {
73dc8771 2421 some_uppercase = 1;
97832bd0 2422 if (SYNTAX (prevc) != Sword)
c4d460ce 2423 ;
97832bd0 2424 else
ca1d1d23
JB
2425 some_multiletter_word = 1;
2426 }
208767c3
RS
2427 else
2428 {
2429 /* If the initial is a caseless word constituent,
2430 treat that like a lowercase initial. */
2431 if (SYNTAX (prevc) != Sword)
2432 some_nonuppercase_initial = 1;
2433 }
ca1d1d23
JB
2434
2435 prevc = c;
2436 }
2437
97832bd0
RS
2438 /* Convert to all caps if the old text is all caps
2439 and has at least one multiletter word. */
2440 if (! some_lowercase && some_multiletter_word)
2441 case_action = all_caps;
c4d460ce 2442 /* Capitalize each word, if the old text has all capitalized words. */
208767c3 2443 else if (!some_nonuppercase_initial && some_multiletter_word)
ca1d1d23 2444 case_action = cap_initial;
208767c3 2445 else if (!some_nonuppercase_initial && some_uppercase)
73dc8771
KH
2446 /* Should x -> yz, operating on X, give Yz or YZ?
2447 We'll assume the latter. */
2448 case_action = all_caps;
97832bd0
RS
2449 else
2450 case_action = nochange;
ca1d1d23
JB
2451 }
2452
080c45fd
RS
2453 /* Do replacement in a string. */
2454 if (!NILP (string))
2455 {
2456 Lisp_Object before, after;
2457
2458 before = Fsubstring (string, make_number (0),
d7a5ad5f
RS
2459 make_number (search_regs.start[sub]));
2460 after = Fsubstring (string, make_number (search_regs.end[sub]), Qnil);
080c45fd 2461
636a5e28
RS
2462 /* Substitute parts of the match into NEWTEXT
2463 if desired. */
080c45fd
RS
2464 if (NILP (literal))
2465 {
d311d28c
PE
2466 ptrdiff_t lastpos = 0;
2467 ptrdiff_t lastpos_byte = 0;
080c45fd
RS
2468 /* We build up the substituted string in ACCUM. */
2469 Lisp_Object accum;
2470 Lisp_Object middle;
d311d28c 2471 ptrdiff_t length = SBYTES (newtext);
080c45fd
RS
2472
2473 accum = Qnil;
2474
ac3b28b1 2475 for (pos_byte = 0, pos = 0; pos_byte < length;)
080c45fd 2476 {
d311d28c
PE
2477 ptrdiff_t substart = -1;
2478 ptrdiff_t subend = 0;
457882c2 2479 bool delbackslash = 0;
080c45fd 2480
0c8533c6
RS
2481 FETCH_STRING_CHAR_ADVANCE (c, newtext, pos, pos_byte);
2482
080c45fd
RS
2483 if (c == '\\')
2484 {
0c8533c6 2485 FETCH_STRING_CHAR_ADVANCE (c, newtext, pos, pos_byte);
177c0ea7 2486
080c45fd
RS
2487 if (c == '&')
2488 {
d7a5ad5f
RS
2489 substart = search_regs.start[sub];
2490 subend = search_regs.end[sub];
080c45fd 2491 }
76fc1ea2 2492 else if (c >= '1' && c <= '9')
080c45fd 2493 {
d311d28c 2494 if (c - '0' < search_regs.num_regs
908589fd 2495 && search_regs.start[c - '0'] >= 0)
080c45fd
RS
2496 {
2497 substart = search_regs.start[c - '0'];
2498 subend = search_regs.end[c - '0'];
2499 }
76fc1ea2
KH
2500 else
2501 {
2502 /* If that subexp did not match,
2503 replace \\N with nothing. */
2504 substart = 0;
2505 subend = 0;
2506 }
080c45fd 2507 }
1e79ec24
KH
2508 else if (c == '\\')
2509 delbackslash = 1;
d32e47af 2510 else if (c != '?')
636a5e28 2511 error ("Invalid use of `\\' in replacement text");
080c45fd
RS
2512 }
2513 if (substart >= 0)
2514 {
d131e79c
RS
2515 if (pos - 2 != lastpos)
2516 middle = substring_both (newtext, lastpos,
2517 lastpos_byte,
2518 pos - 2, pos_byte - 2);
080c45fd
RS
2519 else
2520 middle = Qnil;
2521 accum = concat3 (accum, middle,
0c8533c6
RS
2522 Fsubstring (string,
2523 make_number (substart),
080c45fd
RS
2524 make_number (subend)));
2525 lastpos = pos;
0c8533c6 2526 lastpos_byte = pos_byte;
080c45fd 2527 }
1e79ec24
KH
2528 else if (delbackslash)
2529 {
d131e79c
RS
2530 middle = substring_both (newtext, lastpos,
2531 lastpos_byte,
2532 pos - 1, pos_byte - 1);
0c8533c6 2533
1e79ec24
KH
2534 accum = concat2 (accum, middle);
2535 lastpos = pos;
0c8533c6 2536 lastpos_byte = pos_byte;
1e79ec24 2537 }
080c45fd
RS
2538 }
2539
d131e79c
RS
2540 if (pos != lastpos)
2541 middle = substring_both (newtext, lastpos,
2542 lastpos_byte,
0c8533c6 2543 pos, pos_byte);
080c45fd
RS
2544 else
2545 middle = Qnil;
2546
2547 newtext = concat2 (accum, middle);
2548 }
2549
636a5e28 2550 /* Do case substitution in NEWTEXT if desired. */
080c45fd
RS
2551 if (case_action == all_caps)
2552 newtext = Fupcase (newtext);
2553 else if (case_action == cap_initial)
2b2eead9 2554 newtext = Fupcase_initials (newtext);
080c45fd
RS
2555
2556 return concat3 (before, newtext, after);
2557 }
2558
09c4719e 2559 /* Record point, then move (quietly) to the start of the match. */
9160906f 2560 if (PT >= search_regs.end[sub])
b0eba991 2561 opoint = PT - ZV;
9160906f
RS
2562 else if (PT > search_regs.start[sub])
2563 opoint = search_regs.end[sub] - ZV;
b0eba991
RS
2564 else
2565 opoint = PT;
2566
886ed6ec
RS
2567 /* If we want non-literal replacement,
2568 perform substitution on the replacement string. */
2569 if (NILP (literal))
ca1d1d23 2570 {
0065d054 2571 ptrdiff_t length = SBYTES (newtext);
68e69fbd 2572 unsigned char *substed;
0065d054 2573 ptrdiff_t substed_alloc_size, substed_len;
0063fdb1
DA
2574 bool buf_multibyte = !NILP (BVAR (current_buffer, enable_multibyte_characters));
2575 bool str_multibyte = STRING_MULTIBYTE (newtext);
457882c2 2576 bool really_changed = 0;
3bc25e52 2577
7216e43b
PE
2578 substed_alloc_size = (length <= (STRING_BYTES_BOUND - 100) / 2
2579 ? length * 2 + 100
2580 : STRING_BYTES_BOUND);
c1ade6f7 2581 substed = xmalloc_atomic (substed_alloc_size);
68e69fbd
RS
2582 substed_len = 0;
2583
3bc25e52
KH
2584 /* Go thru NEWTEXT, producing the actual text to insert in
2585 SUBSTED while adjusting multibyteness to that of the current
2586 buffer. */
ca1d1d23 2587
ac3b28b1 2588 for (pos_byte = 0, pos = 0; pos_byte < length;)
ca1d1d23 2589 {
68e69fbd 2590 unsigned char str[MAX_MULTIBYTE_LENGTH];
8ea90aa3 2591 const unsigned char *add_stuff = NULL;
0065d054 2592 ptrdiff_t add_len = 0;
a0efffc8 2593 ptrdiff_t idx = -1;
9a76659d 2594
3bc25e52
KH
2595 if (str_multibyte)
2596 {
eb99a8dd 2597 FETCH_STRING_CHAR_ADVANCE_NO_CHECK (c, newtext, pos, pos_byte);
3bc25e52 2598 if (!buf_multibyte)
200fc949 2599 c = CHAR_TO_BYTE8 (c);
3bc25e52
KH
2600 }
2601 else
2602 {
2603 /* Note that we don't have to increment POS. */
5d69fe10 2604 c = SREF (newtext, pos_byte++);
3bc25e52 2605 if (buf_multibyte)
4c0354d7 2606 MAKE_CHAR_MULTIBYTE (c);
3bc25e52 2607 }
ac3b28b1 2608
68e69fbd
RS
2609 /* Either set ADD_STUFF and ADD_LEN to the text to put in SUBSTED,
2610 or set IDX to a match index, which means put that part
2611 of the buffer text into SUBSTED. */
2612
ca1d1d23
JB
2613 if (c == '\\')
2614 {
886ed6ec
RS
2615 really_changed = 1;
2616
3bc25e52
KH
2617 if (str_multibyte)
2618 {
eb99a8dd
KH
2619 FETCH_STRING_CHAR_ADVANCE_NO_CHECK (c, newtext,
2620 pos, pos_byte);
071ce769 2621 if (!buf_multibyte && !ASCII_CHAR_P (c))
200fc949 2622 c = CHAR_TO_BYTE8 (c);
3bc25e52
KH
2623 }
2624 else
2625 {
d5db4077 2626 c = SREF (newtext, pos_byte++);
3bc25e52 2627 if (buf_multibyte)
4c0354d7 2628 MAKE_CHAR_MULTIBYTE (c);
3bc25e52
KH
2629 }
2630
ca1d1d23 2631 if (c == '&')
68e69fbd 2632 idx = sub;
d311d28c 2633 else if (c >= '1' && c <= '9' && c - '0' < search_regs.num_regs)
ca1d1d23
JB
2634 {
2635 if (search_regs.start[c - '0'] >= 1)
68e69fbd 2636 idx = c - '0';
ca1d1d23 2637 }
636a5e28 2638 else if (c == '\\')
a7e979a4 2639 add_len = 1, add_stuff = (unsigned char *) "\\";
636a5e28 2640 else
3bc25e52
KH
2641 {
2642 xfree (substed);
2643 error ("Invalid use of `\\' in replacement text");
2644 }
ca1d1d23
JB
2645 }
2646 else
68e69fbd
RS
2647 {
2648 add_len = CHAR_STRING (c, str);
2649 add_stuff = str;
2650 }
2651
2652 /* If we want to copy part of a previous match,
2653 set up ADD_STUFF and ADD_LEN to point to it. */
2654 if (idx >= 0)
2655 {
0065d054 2656 ptrdiff_t begbyte = CHAR_TO_BYTE (search_regs.start[idx]);
68e69fbd
RS
2657 add_len = CHAR_TO_BYTE (search_regs.end[idx]) - begbyte;
2658 if (search_regs.start[idx] < GPT && GPT < search_regs.end[idx])
13002885 2659 move_gap_both (search_regs.start[idx], begbyte);
68e69fbd
RS
2660 add_stuff = BYTE_POS_ADDR (begbyte);
2661 }
2662
2663 /* Now the stuff we want to add to SUBSTED
2664 is invariably ADD_LEN bytes starting at ADD_STUFF. */
2665
2666 /* Make sure SUBSTED is big enough. */
0065d054
PE
2667 if (substed_alloc_size - substed_len < add_len)
2668 substed =
2669 xpalloc (substed, &substed_alloc_size,
2670 add_len - (substed_alloc_size - substed_len),
2671 STRING_BYTES_BOUND, 1);
68e69fbd
RS
2672
2673 /* Now add to the end of SUBSTED. */
f8ce8a0d
GM
2674 if (add_stuff)
2675 {
72af86bd 2676 memcpy (substed + substed_len, add_stuff, add_len);
f8ce8a0d
GM
2677 substed_len += add_len;
2678 }
ca1d1d23 2679 }
68e69fbd 2680
886ed6ec 2681 if (really_changed)
68712e08
DA
2682 newtext = make_specified_string ((const char *) substed, -1,
2683 substed_len, buf_multibyte);
68e69fbd 2684 xfree (substed);
ca1d1d23
JB
2685 }
2686
886ed6ec
RS
2687 /* Replace the old text with the new in the cleanest possible way. */
2688 replace_range (search_regs.start[sub], search_regs.end[sub],
2689 newtext, 1, 0, 1);
d5db4077 2690 newpoint = search_regs.start[sub] + SCHARS (newtext);
ca1d1d23
JB
2691
2692 if (case_action == all_caps)
886ed6ec
RS
2693 Fupcase_region (make_number (search_regs.start[sub]),
2694 make_number (newpoint));
ca1d1d23 2695 else if (case_action == cap_initial)
886ed6ec
RS
2696 Fupcase_initials_region (make_number (search_regs.start[sub]),
2697 make_number (newpoint));
3e18eecf 2698
98e942e0
RS
2699 /* Adjust search data for this change. */
2700 {
d311d28c
PE
2701 ptrdiff_t oldend = search_regs.end[sub];
2702 ptrdiff_t oldstart = search_regs.start[sub];
2703 ptrdiff_t change = newpoint - search_regs.end[sub];
2704 ptrdiff_t i;
98e942e0
RS
2705
2706 for (i = 0; i < search_regs.num_regs; i++)
2707 {
41c01205 2708 if (search_regs.start[i] >= oldend)
98e942e0 2709 search_regs.start[i] += change;
41c01205
DK
2710 else if (search_regs.start[i] > oldstart)
2711 search_regs.start[i] = oldstart;
2712 if (search_regs.end[i] >= oldend)
98e942e0 2713 search_regs.end[i] += change;
41c01205
DK
2714 else if (search_regs.end[i] > oldstart)
2715 search_regs.end[i] = oldstart;
98e942e0
RS
2716 }
2717 }
2718
b0eba991 2719 /* Put point back where it was in the text. */
8d808a65 2720 if (opoint <= 0)
fa8ed3e0 2721 TEMP_SET_PT (opoint + ZV);
b0eba991 2722 else
fa8ed3e0 2723 TEMP_SET_PT (opoint);
b0eba991
RS
2724
2725 /* Now move point "officially" to the start of the inserted replacement. */
3e18eecf 2726 move_if_not_intangible (newpoint);
177c0ea7 2727
ca1d1d23
JB
2728 return Qnil;
2729}
2730\f
2731static Lisp_Object
457882c2 2732match_limit (Lisp_Object num, bool beginningp)
ca1d1d23 2733{
a0efffc8 2734 EMACS_INT n;
ca1d1d23 2735
b7826503 2736 CHECK_NUMBER (num);
ca1d1d23 2737 n = XINT (num);
f90a5bf5 2738 if (n < 0)
bd2cbd56 2739 args_out_of_range (num, make_number (0));
f90a5bf5
RS
2740 if (search_regs.num_regs <= 0)
2741 error ("No match data, because no search succeeded");
9b9ceb61 2742 if (n >= search_regs.num_regs
4746118a 2743 || search_regs.start[n] < 0)
ca1d1d23
JB
2744 return Qnil;
2745 return (make_number ((beginningp) ? search_regs.start[n]
2746 : search_regs.end[n]));
2747}
2748
a7ca3326 2749DEFUN ("match-beginning", Fmatch_beginning, Smatch_beginning, 1, 1, 0,
8c1a1077
PJ
2750 doc: /* Return position of start of text matched by last search.
2751SUBEXP, a number, specifies which parenthesized expression in the last
2752 regexp.
2753Value is nil if SUBEXPth pair didn't match, or there were less than
2754 SUBEXP pairs.
2755Zero means the entire text matched by the whole regexp or whole string. */)
5842a27b 2756 (Lisp_Object subexp)
ca1d1d23 2757{
5806161b 2758 return match_limit (subexp, 1);
ca1d1d23
JB
2759}
2760
a7ca3326 2761DEFUN ("match-end", Fmatch_end, Smatch_end, 1, 1, 0,
8c1a1077
PJ
2762 doc: /* Return position of end of text matched by last search.
2763SUBEXP, a number, specifies which parenthesized expression in the last
2764 regexp.
2765Value is nil if SUBEXPth pair didn't match, or there were less than
2766 SUBEXP pairs.
2767Zero means the entire text matched by the whole regexp or whole string. */)
5842a27b 2768 (Lisp_Object subexp)
ca1d1d23 2769{
5806161b 2770 return match_limit (subexp, 0);
177c0ea7 2771}
ca1d1d23 2772
a7ca3326 2773DEFUN ("match-data", Fmatch_data, Smatch_data, 0, 3, 0,
8c1a1077
PJ
2774 doc: /* Return a list containing all info on what the last search matched.
2775Element 2N is `(match-beginning N)'; element 2N + 1 is `(match-end N)'.
2776All the elements are markers or nil (nil if the Nth pair didn't match)
2777if the last match was on a buffer; integers or nil if a string was matched.
febb8aba 2778Use `set-match-data' to reinstate the data in this list.
8c1a1077 2779
41c01205
DK
2780If INTEGERS (the optional first argument) is non-nil, always use
2781integers \(rather than markers) to represent buffer positions. In
2782this case, and if the last match was in a buffer, the buffer will get
2783stored as one additional element at the end of the list.
2784
abd0071c
KS
2785If REUSE is a list, reuse it as part of the value. If REUSE is long
2786enough to hold all the values, and if INTEGERS is non-nil, no consing
2787is done.
2788
2789If optional third arg RESEAT is non-nil, any previous markers on the
2790REUSE list will be modified to point to nowhere.
2791
140a6b7e 2792Return value is undefined if the last search failed. */)
5842a27b 2793 (Lisp_Object integers, Lisp_Object reuse, Lisp_Object reseat)
ca1d1d23 2794{
56256c2a 2795 Lisp_Object tail, prev;
4746118a 2796 Lisp_Object *data;
d311d28c 2797 ptrdiff_t i, len;
ca1d1d23 2798
abd0071c
KS
2799 if (!NILP (reseat))
2800 for (tail = reuse; CONSP (tail); tail = XCDR (tail))
2801 if (MARKERP (XCAR (tail)))
2802 {
51f10faa 2803 unchain_marker (XMARKER (XCAR (tail)));
abd0071c
KS
2804 XSETCAR (tail, Qnil);
2805 }
2806
daa37602 2807 if (NILP (last_thing_searched))
c36bcf1b 2808 return Qnil;
daa37602 2809
6bbd7a29
GM
2810 prev = Qnil;
2811
38182d90 2812 data = alloca ((2 * search_regs.num_regs + 1) * sizeof *data);
4746118a 2813
41c01205 2814 len = 0;
4746118a 2815 for (i = 0; i < search_regs.num_regs; i++)
ca1d1d23 2816 {
d311d28c 2817 ptrdiff_t start = search_regs.start[i];
ca1d1d23
JB
2818 if (start >= 0)
2819 {
56256c2a
RS
2820 if (EQ (last_thing_searched, Qt)
2821 || ! NILP (integers))
ca1d1d23 2822 {
c235cce7
KH
2823 XSETFASTINT (data[2 * i], start);
2824 XSETFASTINT (data[2 * i + 1], search_regs.end[i]);
ca1d1d23 2825 }
0ed62dc7 2826 else if (BUFFERP (last_thing_searched))
ca1d1d23
JB
2827 {
2828 data[2 * i] = Fmake_marker ();
daa37602
JB
2829 Fset_marker (data[2 * i],
2830 make_number (start),
2831 last_thing_searched);
ca1d1d23
JB
2832 data[2 * i + 1] = Fmake_marker ();
2833 Fset_marker (data[2 * i + 1],
177c0ea7 2834 make_number (search_regs.end[i]),
daa37602 2835 last_thing_searched);
ca1d1d23 2836 }
daa37602
JB
2837 else
2838 /* last_thing_searched must always be Qt, a buffer, or Qnil. */
1088b922 2839 emacs_abort ();
daa37602 2840
abd0071c 2841 len = 2 * i + 2;
ca1d1d23
JB
2842 }
2843 else
abd0071c 2844 data[2 * i] = data[2 * i + 1] = Qnil;
ca1d1d23 2845 }
56256c2a 2846
bd2cbd56 2847 if (BUFFERP (last_thing_searched) && !NILP (integers))
41c01205 2848 {
bd2cbd56 2849 data[len] = last_thing_searched;
41c01205
DK
2850 len++;
2851 }
2852
56256c2a
RS
2853 /* If REUSE is not usable, cons up the values and return them. */
2854 if (! CONSP (reuse))
41c01205 2855 return Flist (len, data);
56256c2a
RS
2856
2857 /* If REUSE is a list, store as many value elements as will fit
2858 into the elements of REUSE. */
2859 for (i = 0, tail = reuse; CONSP (tail);
c1d497be 2860 i++, tail = XCDR (tail))
56256c2a 2861 {
41c01205 2862 if (i < len)
f3fbd155 2863 XSETCAR (tail, data[i]);
56256c2a 2864 else
f3fbd155 2865 XSETCAR (tail, Qnil);
56256c2a
RS
2866 prev = tail;
2867 }
2868
2869 /* If we couldn't fit all value elements into REUSE,
2870 cons up the rest of them and add them to the end of REUSE. */
41c01205
DK
2871 if (i < len)
2872 XSETCDR (prev, Flist (len - i, data + i));
56256c2a
RS
2873
2874 return reuse;
ca1d1d23
JB
2875}
2876
b51d6c92
SM
2877/* We used to have an internal use variant of `reseat' described as:
2878
2879 If RESEAT is `evaporate', put the markers back on the free list
2880 immediately. No other references to the markers must exist in this
2881 case, so it is used only internally on the unwind stack and
2882 save-match-data from Lisp.
2883
2884 But it was ill-conceived: those supposedly-internal markers get exposed via
2885 the undo-list, so freeing them here is unsafe. */
ca1d1d23 2886
a7ca3326 2887DEFUN ("set-match-data", Fset_match_data, Sset_match_data, 1, 2, 0,
8c1a1077 2888 doc: /* Set internal data on last search match from elements of LIST.
abd0071c
KS
2889LIST should have been created by calling `match-data' previously.
2890
51f10faa 2891If optional arg RESEAT is non-nil, make markers on LIST point nowhere. */)
5842a27b 2892 (register Lisp_Object list, Lisp_Object reseat)
ca1d1d23 2893{
5f2ab479 2894 ptrdiff_t i;
ca1d1d23
JB
2895 register Lisp_Object marker;
2896
7074fde6
FP
2897 if (running_asynch_code)
2898 save_search_regs ();
2899
29100cea 2900 CHECK_LIST (list);
ca1d1d23 2901
41c01205
DK
2902 /* Unless we find a marker with a buffer or an explicit buffer
2903 in LIST, assume that this match data came from a string. */
daa37602
JB
2904 last_thing_searched = Qt;
2905
4746118a
JB
2906 /* Allocate registers if they don't already exist. */
2907 {
d311d28c 2908 EMACS_INT length = XFASTINT (Flength (list)) / 2;
4746118a
JB
2909
2910 if (length > search_regs.num_regs)
2911 {
0065d054 2912 ptrdiff_t num_regs = search_regs.num_regs;
d311d28c
PE
2913 if (PTRDIFF_MAX < length)
2914 memory_full (SIZE_MAX);
0065d054
PE
2915 search_regs.start =
2916 xpalloc (search_regs.start, &num_regs, length - num_regs,
2917 min (PTRDIFF_MAX, UINT_MAX), sizeof (regoff_t));
2918 search_regs.end =
2919 xrealloc (search_regs.end, num_regs * sizeof (regoff_t));
2920
2921 for (i = search_regs.num_regs; i < num_regs; i++)
e62371e9
KH
2922 search_regs.start[i] = -1;
2923
0065d054 2924 search_regs.num_regs = num_regs;
4746118a 2925 }
ca1d1d23 2926
abd0071c 2927 for (i = 0; CONSP (list); i++)
41c01205 2928 {
abd0071c 2929 marker = XCAR (list);
bd2cbd56 2930 if (BUFFERP (marker))
c3762cbd 2931 {
bd2cbd56 2932 last_thing_searched = marker;
c3762cbd
DK
2933 break;
2934 }
2935 if (i >= length)
2936 break;
41c01205
DK
2937 if (NILP (marker))
2938 {
2939 search_regs.start[i] = -1;
abd0071c 2940 list = XCDR (list);
41c01205
DK
2941 }
2942 else
2943 {
d311d28c 2944 Lisp_Object from;
abd0071c 2945 Lisp_Object m;
e2811828 2946
abd0071c 2947 m = marker;
41c01205
DK
2948 if (MARKERP (marker))
2949 {
2950 if (XMARKER (marker)->buffer == 0)
2951 XSETFASTINT (marker, 0);
2952 else
2953 XSETBUFFER (last_thing_searched, XMARKER (marker)->buffer);
2954 }
e2811828 2955
41c01205 2956 CHECK_NUMBER_COERCE_MARKER (marker);
d311d28c 2957 from = marker;
e2811828 2958
abd0071c
KS
2959 if (!NILP (reseat) && MARKERP (m))
2960 {
b51d6c92 2961 unchain_marker (XMARKER (m));
9ad54a7e 2962 XSETCAR (list, Qnil);
abd0071c
KS
2963 }
2964
2965 if ((list = XCDR (list), !CONSP (list)))
2966 break;
2967
2968 m = marker = XCAR (list);
2969
41c01205
DK
2970 if (MARKERP (marker) && XMARKER (marker)->buffer == 0)
2971 XSETFASTINT (marker, 0);
e2811828 2972
41c01205 2973 CHECK_NUMBER_COERCE_MARKER (marker);
0b1fccc4
PE
2974 if ((XINT (from) < 0
2975 ? TYPE_MINIMUM (regoff_t) <= XINT (from)
2976 : XINT (from) <= TYPE_MAXIMUM (regoff_t))
2977 && (XINT (marker) < 0
2978 ? TYPE_MINIMUM (regoff_t) <= XINT (marker)
2979 : XINT (marker) <= TYPE_MAXIMUM (regoff_t)))
d311d28c
PE
2980 {
2981 search_regs.start[i] = XINT (from);
2982 search_regs.end[i] = XINT (marker);
2983 }
2984 else
2985 {
2986 search_regs.start[i] = -1;
2987 }
abd0071c
KS
2988
2989 if (!NILP (reseat) && MARKERP (m))
2990 {
b51d6c92 2991 unchain_marker (XMARKER (m));
9ad54a7e 2992 XSETCAR (list, Qnil);
abd0071c 2993 }
41c01205 2994 }
abd0071c 2995 list = XCDR (list);
41c01205 2996 }
ca1d1d23 2997
41c01205
DK
2998 for (; i < search_regs.num_regs; i++)
2999 search_regs.start[i] = -1;
3000 }
ca1d1d23 3001
177c0ea7 3002 return Qnil;
ca1d1d23
JB
3003}
3004
457882c2 3005/* If true the match data have been saved in saved_search_regs
7074fde6 3006 during the execution of a sentinel or filter. */
457882c2 3007static bool search_regs_saved;
7074fde6 3008static struct re_registers saved_search_regs;
41c01205 3009static Lisp_Object saved_last_thing_searched;
7074fde6
FP
3010
3011/* Called from Flooking_at, Fstring_match, search_buffer, Fstore_match_data
3012 if asynchronous code (filter or sentinel) is running. */
3013static void
971de7fb 3014save_search_regs (void)
7074fde6
FP
3015{
3016 if (!search_regs_saved)
3017 {
3018 saved_search_regs.num_regs = search_regs.num_regs;
3019 saved_search_regs.start = search_regs.start;
3020 saved_search_regs.end = search_regs.end;
41c01205
DK
3021 saved_last_thing_searched = last_thing_searched;
3022 last_thing_searched = Qnil;
7074fde6 3023 search_regs.num_regs = 0;
2d4a771a
RS
3024 search_regs.start = 0;
3025 search_regs.end = 0;
7074fde6
FP
3026
3027 search_regs_saved = 1;
3028 }
3029}
3030
3031/* Called upon exit from filters and sentinels. */
3032void
971de7fb 3033restore_search_regs (void)
7074fde6
FP
3034{
3035 if (search_regs_saved)
3036 {
3037 if (search_regs.num_regs > 0)
3038 {
3039 xfree (search_regs.start);
3040 xfree (search_regs.end);
3041 }
3042 search_regs.num_regs = saved_search_regs.num_regs;
3043 search_regs.start = saved_search_regs.start;
3044 search_regs.end = saved_search_regs.end;
41c01205
DK
3045 last_thing_searched = saved_last_thing_searched;
3046 saved_last_thing_searched = Qnil;
7074fde6
FP
3047 search_regs_saved = 0;
3048 }
3049}
3050
27e498e6 3051static void
971de7fb 3052unwind_set_match_data (Lisp_Object list)
abd0071c 3053{
b51d6c92 3054 /* It is NOT ALWAYS safe to free (evaporate) the markers immediately. */
27e498e6 3055 Fset_match_data (list, Qt);
abd0071c
KS
3056}
3057
3058/* Called to unwind protect the match data. */
3059void
971de7fb 3060record_unwind_save_match_data (void)
abd0071c
KS
3061{
3062 record_unwind_protect (unwind_set_match_data,
3063 Fmatch_data (Qnil, Qnil, Qnil));
3064}
3065
e9a452d9 3066/* Quote a string to deactivate reg-expr chars */
ca1d1d23
JB
3067
3068DEFUN ("regexp-quote", Fregexp_quote, Sregexp_quote, 1, 1, 0,
8c1a1077 3069 doc: /* Return a regexp string which matches exactly STRING and nothing else. */)
5842a27b 3070 (Lisp_Object string)
ca1d1d23 3071{
457882c2
PE
3072 char *in, *out, *end;
3073 char *temp;
3074 ptrdiff_t backslashes_added = 0;
ca1d1d23 3075
b7826503 3076 CHECK_STRING (string);
ca1d1d23 3077
38182d90 3078 temp = alloca (SBYTES (string) * 2);
ca1d1d23
JB
3079
3080 /* Now copy the data into the new string, inserting escapes. */
3081
a7e979a4 3082 in = SSDATA (string);
d5db4077 3083 end = in + SBYTES (string);
177c0ea7 3084 out = temp;
ca1d1d23
JB
3085
3086 for (; in != end; in++)
3087 {
66bc6082 3088 if (*in == '['
ca1d1d23
JB
3089 || *in == '*' || *in == '.' || *in == '\\'
3090 || *in == '?' || *in == '+'
3091 || *in == '^' || *in == '$')
0c8533c6 3092 *out++ = '\\', backslashes_added++;
ca1d1d23
JB
3093 *out++ = *in;
3094 }
3095
3f8100f1 3096 return make_specified_string (temp,
d5db4077 3097 SCHARS (string) + backslashes_added,
3f8100f1
RS
3098 out - temp,
3099 STRING_MULTIBYTE (string));
ca1d1d23 3100}
2f999d52
EZ
3101
3102/* Like find_newline, but doesn't use the cache, and only searches forward. */
3103static ptrdiff_t
3104find_newline1 (ptrdiff_t start, ptrdiff_t start_byte, ptrdiff_t end,
3105 ptrdiff_t end_byte, ptrdiff_t count, ptrdiff_t *shortage,
3106 ptrdiff_t *bytepos, bool allow_quit)
3107{
3108 if (count > 0)
3109 {
3110 if (!end)
3111 end = ZV, end_byte = ZV_BYTE;
3112 }
3113 else
3114 {
3115 if (!end)
3116 end = BEGV, end_byte = BEGV_BYTE;
3117 }
3118 if (end_byte == -1)
3119 end_byte = CHAR_TO_BYTE (end);
3120
3121 if (shortage != 0)
3122 *shortage = 0;
3123
3124 immediate_quit = allow_quit;
3125
3126 if (count > 0)
3127 while (start != end)
3128 {
3129 /* Our innermost scanning loop is very simple; it doesn't know
3130 about gaps, buffer ends, or the newline cache. ceiling is
3131 the position of the last character before the next such
3132 obstacle --- the last character the dumb search loop should
3133 examine. */
3134 ptrdiff_t tem, ceiling_byte = end_byte - 1;
3135
3136 if (start_byte == -1)
3137 start_byte = CHAR_TO_BYTE (start);
3138
3139 /* The dumb loop can only scan text stored in contiguous
3140 bytes. BUFFER_CEILING_OF returns the last character
3141 position that is contiguous, so the ceiling is the
3142 position after that. */
3143 tem = BUFFER_CEILING_OF (start_byte);
3144 ceiling_byte = min (tem, ceiling_byte);
3145
3146 {
3147 /* The termination address of the dumb loop. */
3148 unsigned char *lim_addr = BYTE_POS_ADDR (ceiling_byte) + 1;
3149 ptrdiff_t lim_byte = ceiling_byte + 1;
3150
3151 /* Nonpositive offsets (relative to LIM_ADDR and LIM_BYTE)
3152 of the base, the cursor, and the next line. */
3153 ptrdiff_t base = start_byte - lim_byte;
3154 ptrdiff_t cursor, next;
3155
3156 for (cursor = base; cursor < 0; cursor = next)
3157 {
3158 /* The dumb loop. */
3159 unsigned char *nl = memchr (lim_addr + cursor, '\n', - cursor);
3160 next = nl ? nl - lim_addr : 0;
3161
3162 if (! nl)
3163 break;
3164 next++;
3165
3166 if (--count == 0)
3167 {
3168 immediate_quit = 0;
3169 if (bytepos)
3170 *bytepos = lim_byte + next;
3171 return BYTE_TO_CHAR (lim_byte + next);
3172 }
3173 }
3174
3175 start_byte = lim_byte;
3176 start = BYTE_TO_CHAR (start_byte);
3177 }
3178 }
3179
3180 immediate_quit = 0;
3181 if (shortage)
3182 *shortage = count;
3183 if (bytepos)
3184 {
3185 *bytepos = start_byte == -1 ? CHAR_TO_BYTE (start) : start_byte;
3186 eassert (*bytepos == CHAR_TO_BYTE (start));
3187 }
3188 return start;
3189}
3190
3191DEFUN ("newline-cache-check", Fnewline_cache_check, Snewline_cache_check,
3192 0, 1, 0,
3193 doc: /* Check the newline cache of BUFFER against buffer contents.
3194
3195BUFFER defaults to the current buffer.
3196
3197Value is an array of 2 sub-arrays of buffer positions for newlines,
3198the first based on the cache, the second based on actually scanning
3199the buffer. If the buffer doesn't have a cache, the value is nil. */)
3200 (Lisp_Object buffer)
3201{
80d982d7 3202 struct buffer *buf, *old = NULL;
2f999d52
EZ
3203 ptrdiff_t shortage, nl_count_cache, nl_count_buf;
3204 Lisp_Object cache_newlines, buf_newlines, val;
0fae5004 3205 ptrdiff_t from, found, i;
2f999d52
EZ
3206
3207 if (NILP (buffer))
3208 buf = current_buffer;
3209 else
3210 {
3211 CHECK_BUFFER (buffer);
3212 buf = XBUFFER (buffer);
80d982d7 3213 old = current_buffer;
2f999d52
EZ
3214 }
3215 if (buf->base_buffer)
3216 buf = buf->base_buffer;
3217
3218 /* If the buffer doesn't have a newline cache, return nil. */
3219 if (NILP (BVAR (buf, cache_long_scans))
3220 || buf->newline_cache == NULL)
3221 return Qnil;
3222
80d982d7
EZ
3223 /* find_newline can only work on the current buffer. */
3224 if (old != NULL)
3225 set_buffer_internal_1 (buf);
3226
2f999d52 3227 /* How many newlines are there according to the cache? */
80d982d7 3228 find_newline (BEGV, BEGV_BYTE, ZV, ZV_BYTE,
2f999d52
EZ
3229 TYPE_MAXIMUM (ptrdiff_t), &shortage, NULL, true);
3230 nl_count_cache = TYPE_MAXIMUM (ptrdiff_t) - shortage;
3231
3232 /* Create vector and populate it. */
3233 cache_newlines = make_uninit_vector (nl_count_cache);
80d982d7
EZ
3234
3235 if (nl_count_cache)
2f999d52 3236 {
80d982d7
EZ
3237 for (from = BEGV, found = from, i = 0; from < ZV; from = found, i++)
3238 {
3239 ptrdiff_t from_byte = CHAR_TO_BYTE (from);
2f999d52 3240
80d982d7
EZ
3241 found = find_newline (from, from_byte, 0, -1, 1, &shortage,
3242 NULL, true);
3243 if (shortage != 0 || i >= nl_count_cache)
3244 break;
3245 ASET (cache_newlines, i, make_number (found - 1));
3246 }
3247 /* Fill the rest of slots with an invalid position. */
3248 for ( ; i < nl_count_cache; i++)
3249 ASET (cache_newlines, i, make_number (-1));
2f999d52
EZ
3250 }
3251
3252 /* Now do the same, but without using the cache. */
80d982d7 3253 find_newline1 (BEGV, BEGV_BYTE, ZV, ZV_BYTE,
2f999d52
EZ
3254 TYPE_MAXIMUM (ptrdiff_t), &shortage, NULL, true);
3255 nl_count_buf = TYPE_MAXIMUM (ptrdiff_t) - shortage;
3256 buf_newlines = make_uninit_vector (nl_count_buf);
80d982d7 3257 if (nl_count_buf)
2f999d52 3258 {
80d982d7
EZ
3259 for (from = BEGV, found = from, i = 0; from < ZV; from = found, i++)
3260 {
3261 ptrdiff_t from_byte = CHAR_TO_BYTE (from);
2f999d52 3262
80d982d7
EZ
3263 found = find_newline1 (from, from_byte, 0, -1, 1, &shortage,
3264 NULL, true);
3265 if (shortage != 0 || i >= nl_count_buf)
3266 break;
3267 ASET (buf_newlines, i, make_number (found - 1));
3268 }
3269 for ( ; i < nl_count_buf; i++)
3270 ASET (buf_newlines, i, make_number (-1));
2f999d52
EZ
3271 }
3272
3273 /* Construct the value and return it. */
3274 val = make_uninit_vector (2);
3275 ASET (val, 0, cache_newlines);
3276 ASET (val, 1, buf_newlines);
80d982d7
EZ
3277
3278 if (old != NULL)
3279 set_buffer_internal_1 (old);
2f999d52
EZ
3280 return val;
3281}
177c0ea7 3282\f
dfcf069d 3283void
971de7fb 3284syms_of_search (void)
ca1d1d23
JB
3285{
3286 register int i;
3287
487282dc
KH
3288 for (i = 0; i < REGEXP_CACHE_SIZE; ++i)
3289 {
3290 searchbufs[i].buf.allocated = 100;
c1ade6f7 3291 searchbufs[i].buf.buffer = xmalloc_atomic (100);
487282dc
KH
3292 searchbufs[i].buf.fastmap = searchbufs[i].fastmap;
3293 searchbufs[i].regexp = Qnil;
ecdb561e 3294 searchbufs[i].whitespace_regexp = Qnil;
b69e3c18 3295 searchbufs[i].syntax_table = Qnil;
487282dc 3296 staticpro (&searchbufs[i].regexp);
aa77b5ce 3297 staticpro (&searchbufs[i].whitespace_regexp);
b69e3c18 3298 staticpro (&searchbufs[i].syntax_table);
487282dc
KH
3299 searchbufs[i].next = (i == REGEXP_CACHE_SIZE-1 ? 0 : &searchbufs[i+1]);
3300 }
3301 searchbuf_head = &searchbufs[0];
ca1d1d23 3302
cd3520a4
JB
3303 DEFSYM (Qsearch_failed, "search-failed");
3304 DEFSYM (Qinvalid_regexp, "invalid-regexp");
ca1d1d23
JB
3305
3306 Fput (Qsearch_failed, Qerror_conditions,
3438fe21 3307 listn (CONSTYPE_PURE, 2, Qsearch_failed, Qerror));
ca1d1d23 3308 Fput (Qsearch_failed, Qerror_message,
2a0213a6 3309 build_pure_c_string ("Search failed"));
ca1d1d23
JB
3310
3311 Fput (Qinvalid_regexp, Qerror_conditions,
3438fe21 3312 listn (CONSTYPE_PURE, 2, Qinvalid_regexp, Qerror));
ca1d1d23 3313 Fput (Qinvalid_regexp, Qerror_message,
2a0213a6 3314 build_pure_c_string ("Invalid regexp"));
ca1d1d23 3315
daa37602
JB
3316 last_thing_searched = Qnil;
3317 staticpro (&last_thing_searched);
3318
0f6af254
DK
3319 saved_last_thing_searched = Qnil;
3320 staticpro (&saved_last_thing_searched);
3321
29208e82 3322 DEFVAR_LISP ("search-spaces-regexp", Vsearch_spaces_regexp,
e2811828 3323 doc: /* Regexp to substitute for bunches of spaces in regexp search.
f31a9a68
RS
3324Some commands use this for user-specified regexps.
3325Spaces that occur inside character classes or repetition operators
3326or other such regexp constructs are not replaced with this.
3327A value of nil (which is the normal value) means treat spaces literally. */);
41a33295 3328 Vsearch_spaces_regexp = Qnil;
f31a9a68 3329
29208e82 3330 DEFVAR_LISP ("inhibit-changing-match-data", Vinhibit_changing_match_data,
ef887810 3331 doc: /* Internal use only.
39d0bf74
RS
3332If non-nil, the primitive searching and matching functions
3333such as `looking-at', `string-match', `re-search-forward', etc.,
3334do not set the match data. The proper way to use this variable
3335is to bind it with `let' around a small expression. */);
ef887810
RS
3336 Vinhibit_changing_match_data = Qnil;
3337
ca1d1d23 3338 defsubr (&Slooking_at);
b819a390
RS
3339 defsubr (&Sposix_looking_at);
3340 defsubr (&Sstring_match);
3341 defsubr (&Sposix_string_match);
ca1d1d23
JB
3342 defsubr (&Ssearch_forward);
3343 defsubr (&Ssearch_backward);
ca1d1d23
JB
3344 defsubr (&Sre_search_forward);
3345 defsubr (&Sre_search_backward);
b819a390
RS
3346 defsubr (&Sposix_search_forward);
3347 defsubr (&Sposix_search_backward);
ca1d1d23
JB
3348 defsubr (&Sreplace_match);
3349 defsubr (&Smatch_beginning);
3350 defsubr (&Smatch_end);
3351 defsubr (&Smatch_data);
3f1c005b 3352 defsubr (&Sset_match_data);
ca1d1d23 3353 defsubr (&Sregexp_quote);
2f999d52 3354 defsubr (&Snewline_cache_check);
ca1d1d23 3355}