Introduce new bytecodes for efficient catch/condition-case in lexbind.
[bpt/emacs.git] / src / search.c
CommitLineData
ca1d1d23 1/* String search routines for GNU Emacs.
4fb9a543 2
ab422c4d
PE
3Copyright (C) 1985-1987, 1993-1994, 1997-1999, 2001-2013 Free Software
4Foundation, Inc.
ca1d1d23
JB
5
6This file is part of GNU Emacs.
7
9ec0b715 8GNU Emacs is free software: you can redistribute it and/or modify
ca1d1d23 9it under the terms of the GNU General Public License as published by
9ec0b715
GM
10the Free Software Foundation, either version 3 of the License, or
11(at your option) any later version.
ca1d1d23
JB
12
13GNU Emacs is distributed in the hope that it will be useful,
14but WITHOUT ANY WARRANTY; without even the implied warranty of
15MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16GNU General Public License for more details.
17
18You should have received a copy of the GNU General Public License
9ec0b715 19along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>. */
ca1d1d23
JB
20
21
18160b98 22#include <config.h>
0328b6de 23
ca1d1d23 24#include "lisp.h"
5679531d 25#include "category.h"
76eb0881 26#include "character.h"
e5560ff7 27#include "buffer.h"
45b683a1 28#include "syntax.h"
89ad6492 29#include "charset.h"
9169c321 30#include "region-cache.h"
ca1d1d23 31#include "commands.h"
9ac0d9e0 32#include "blockinput.h"
bf1760bb 33#include "intervals.h"
4746118a 34
ca1d1d23
JB
35#include <sys/types.h>
36#include "regex.h"
37
1d288aef 38#define REGEXP_CACHE_SIZE 20
ca1d1d23 39
487282dc
KH
40/* If the regexp is non-nil, then the buffer contains the compiled form
41 of that regexp, suitable for searching. */
1d288aef
RS
42struct regexp_cache
43{
487282dc 44 struct regexp_cache *next;
ecdb561e 45 Lisp_Object regexp, whitespace_regexp;
b69e3c18 46 /* Syntax table for which the regexp applies. We need this because
58e95211
SM
47 of character classes. If this is t, then the compiled pattern is valid
48 for any syntax-table. */
b69e3c18 49 Lisp_Object syntax_table;
487282dc
KH
50 struct re_pattern_buffer buf;
51 char fastmap[0400];
457882c2
PE
52 /* True means regexp was compiled to do full POSIX backtracking. */
53 bool posix;
487282dc 54};
ca1d1d23 55
487282dc 56/* The instances of that struct. */
989b29ad 57static struct regexp_cache searchbufs[REGEXP_CACHE_SIZE];
ca1d1d23 58
487282dc 59/* The head of the linked list; points to the most recently used buffer. */
989b29ad 60static struct regexp_cache *searchbuf_head;
ca1d1d23 61
ca1d1d23 62
4746118a
JB
63/* Every call to re_match, etc., must pass &search_regs as the regs
64 argument unless you can show it is unnecessary (i.e., if re_match
65 is certainly going to be called again before region-around-match
66 can be called).
67
68 Since the registers are now dynamically allocated, we need to make
69 sure not to refer to the Nth register before checking that it has
1113d9db
JB
70 been allocated by checking search_regs.num_regs.
71
72 The regex code keeps track of whether it has allocated the search
487282dc
KH
73 buffer using bits in the re_pattern_buffer. This means that whenever
74 you compile a new pattern, it completely forgets whether it has
1113d9db
JB
75 allocated any registers, and will allocate new registers the next
76 time you call a searching or matching function. Therefore, we need
77 to call re_set_registers after compiling a new pattern or after
78 setting the match registers, so that the regex functions will be
79 able to free or re-allocate it properly. */
ca1d1d23
JB
80static struct re_registers search_regs;
81
daa37602
JB
82/* The buffer in which the last search was performed, or
83 Qt if the last search was done in a string;
84 Qnil if no searching has been done yet. */
85static Lisp_Object last_thing_searched;
ca1d1d23 86
02b16839 87/* Error condition signaled when regexp compile_pattern fails. */
955cbe7b 88static Lisp_Object Qinvalid_regexp;
ca1d1d23 89
02b16839 90/* Error condition used for failing searches. */
955cbe7b 91static Lisp_Object Qsearch_failed;
06f77a8a 92
d311d28c 93static void set_search_regs (ptrdiff_t, ptrdiff_t);
f57e2426 94static void save_search_regs (void);
d311d28c
PE
95static EMACS_INT simple_search (EMACS_INT, unsigned char *, ptrdiff_t,
96 ptrdiff_t, Lisp_Object, ptrdiff_t, ptrdiff_t,
97 ptrdiff_t, ptrdiff_t);
98static EMACS_INT boyer_moore (EMACS_INT, unsigned char *, ptrdiff_t,
99 Lisp_Object, Lisp_Object, ptrdiff_t,
100 ptrdiff_t, int);
101static EMACS_INT search_buffer (Lisp_Object, ptrdiff_t, ptrdiff_t,
102 ptrdiff_t, ptrdiff_t, EMACS_INT, int,
457882c2 103 Lisp_Object, Lisp_Object, bool);
b819a390 104
845ca893 105static _Noreturn void
971de7fb 106matcher_overflow (void)
ca1d1d23
JB
107{
108 error ("Stack overflow in regexp matcher");
109}
110
b819a390
RS
111/* Compile a regexp and signal a Lisp error if anything goes wrong.
112 PATTERN is the pattern to compile.
113 CP is the place to put the result.
facdc750 114 TRANSLATE is a translation table for ignoring case, or nil for none.
457882c2
PE
115 POSIX is true if we want full backtracking (POSIX style) for this pattern.
116 False means backtrack only enough to get a valid match.
bbc73b48
KH
117
118 The behavior also depends on Vsearch_spaces_regexp. */
ca1d1d23 119
487282dc 120static void
457882c2
PE
121compile_pattern_1 (struct regexp_cache *cp, Lisp_Object pattern,
122 Lisp_Object translate, bool posix)
ca1d1d23 123{
d451e4db 124 char *val;
b819a390 125 reg_syntax_t old;
ca1d1d23 126
487282dc 127 cp->regexp = Qnil;
59fab369 128 cp->buf.translate = (! NILP (translate) ? translate : make_number (0));
b819a390 129 cp->posix = posix;
93daa011 130 cp->buf.multibyte = STRING_MULTIBYTE (pattern);
89ad6492 131 cp->buf.charset_unibyte = charset_unibyte;
d8c85250
CY
132 if (STRINGP (Vsearch_spaces_regexp))
133 cp->whitespace_regexp = Vsearch_spaces_regexp;
134 else
135 cp->whitespace_regexp = Qnil;
136
e7b2dd2e
RS
137 /* rms: I think BLOCK_INPUT is not needed here any more,
138 because regex.c defines malloc to call xmalloc.
139 Using BLOCK_INPUT here means the debugger won't run if an error occurs.
140 So let's turn it off. */
141 /* BLOCK_INPUT; */
fb4a568d 142 old = re_set_syntax (RE_SYNTAX_EMACS
b819a390 143 | (posix ? 0 : RE_NO_POSIX_BACKTRACKING));
d8c85250
CY
144
145 if (STRINGP (Vsearch_spaces_regexp))
42a5b22f 146 re_set_whitespace_regexp (SSDATA (Vsearch_spaces_regexp));
d8c85250
CY
147 else
148 re_set_whitespace_regexp (NULL);
bbc73b48 149
51b59d79 150 val = (char *) re_compile_pattern (SSDATA (pattern),
8f924df7 151 SBYTES (pattern), &cp->buf);
bbc73b48 152
58e95211
SM
153 /* If the compiled pattern hard codes some of the contents of the
154 syntax-table, it can only be reused with *this* syntax table. */
4b4deea2 155 cp->syntax_table = cp->buf.used_syntax ? BVAR (current_buffer, syntax_table) : Qt;
58e95211 156
bbc73b48
KH
157 re_set_whitespace_regexp (NULL);
158
b819a390 159 re_set_syntax (old);
4d7e6e51 160 /* unblock_input (); */
ca1d1d23 161 if (val)
06f77a8a 162 xsignal1 (Qinvalid_regexp, build_string (val));
1113d9db 163
487282dc 164 cp->regexp = Fcopy_sequence (pattern);
487282dc
KH
165}
166
6efc7887
RS
167/* Shrink each compiled regexp buffer in the cache
168 to the size actually used right now.
169 This is called from garbage collection. */
170
171void
971de7fb 172shrink_regexp_cache (void)
6efc7887 173{
a968f437 174 struct regexp_cache *cp;
6efc7887
RS
175
176 for (cp = searchbuf_head; cp != 0; cp = cp->next)
177 {
178 cp->buf.allocated = cp->buf.used;
38182d90 179 cp->buf.buffer = xrealloc (cp->buf.buffer, cp->buf.used);
6efc7887
RS
180 }
181}
182
54dd3310
SM
183/* Clear the regexp cache w.r.t. a particular syntax table,
184 because it was changed.
e5b94d44
CY
185 There is no danger of memory leak here because re_compile_pattern
186 automagically manages the memory in each re_pattern_buffer struct,
187 based on its `allocated' and `buffer' values. */
188void
971de7fb 189clear_regexp_cache (void)
e5b94d44
CY
190{
191 int i;
192
193 for (i = 0; i < REGEXP_CACHE_SIZE; ++i)
920fd1fc
EZ
194 /* It's tempting to compare with the syntax-table we've actually changed,
195 but it's not sufficient because char-table inheritance means that
54dd3310
SM
196 modifying one syntax-table can change others at the same time. */
197 if (!EQ (searchbufs[i].syntax_table, Qt))
198 searchbufs[i].regexp = Qnil;
e5b94d44
CY
199}
200
487282dc 201/* Compile a regexp if necessary, but first check to see if there's one in
b819a390
RS
202 the cache.
203 PATTERN is the pattern to compile.
facdc750 204 TRANSLATE is a translation table for ignoring case, or nil for none.
b819a390
RS
205 REGP is the structure that says where to store the "register"
206 values that will result from matching this pattern.
207 If it is 0, we should compile the pattern not to record any
208 subexpression bounds.
457882c2
PE
209 POSIX is true if we want full backtracking (POSIX style) for this pattern.
210 False means backtrack only enough to get a valid match. */
487282dc
KH
211
212struct re_pattern_buffer *
0063fdb1 213compile_pattern (Lisp_Object pattern, struct re_registers *regp,
457882c2 214 Lisp_Object translate, bool posix, bool multibyte)
487282dc
KH
215{
216 struct regexp_cache *cp, **cpp;
217
218 for (cpp = &searchbuf_head; ; cpp = &cp->next)
219 {
220 cp = *cpp;
f1b9c7c1
KR
221 /* Entries are initialized to nil, and may be set to nil by
222 compile_pattern_1 if the pattern isn't valid. Don't apply
49a5f770
KR
223 string accessors in those cases. However, compile_pattern_1
224 is only applied to the cache entry we pick here to reuse. So
225 nil should never appear before a non-nil entry. */
7c752c80 226 if (NILP (cp->regexp))
f1b9c7c1 227 goto compile_it;
d5db4077 228 if (SCHARS (cp->regexp) == SCHARS (pattern)
cf69b13e 229 && STRING_MULTIBYTE (cp->regexp) == STRING_MULTIBYTE (pattern)
1d288aef 230 && !NILP (Fstring_equal (cp->regexp, pattern))
59fab369 231 && EQ (cp->buf.translate, (! NILP (translate) ? translate : make_number (0)))
5679531d 232 && cp->posix == posix
58e95211 233 && (EQ (cp->syntax_table, Qt)
4b4deea2 234 || EQ (cp->syntax_table, BVAR (current_buffer, syntax_table)))
89ad6492
KH
235 && !NILP (Fequal (cp->whitespace_regexp, Vsearch_spaces_regexp))
236 && cp->buf.charset_unibyte == charset_unibyte)
487282dc
KH
237 break;
238
f1b9c7c1
KR
239 /* If we're at the end of the cache, compile into the nil cell
240 we found, or the last (least recently used) cell with a
241 string value. */
487282dc
KH
242 if (cp->next == 0)
243 {
f1b9c7c1 244 compile_it:
cfe0661d 245 compile_pattern_1 (cp, pattern, translate, posix);
487282dc
KH
246 break;
247 }
248 }
249
250 /* When we get here, cp (aka *cpp) contains the compiled pattern,
251 either because we found it in the cache or because we just compiled it.
252 Move it to the front of the queue to mark it as most recently used. */
253 *cpp = cp->next;
254 cp->next = searchbuf_head;
255 searchbuf_head = cp;
1113d9db 256
6639708c
RS
257 /* Advise the searching functions about the space we have allocated
258 for register data. */
259 if (regp)
260 re_set_registers (&cp->buf, regp, regp->num_regs, regp->start, regp->end);
261
78edd3b7 262 /* The compiled pattern can be used both for multibyte and unibyte
89ad6492
KH
263 target. But, we have to tell which the pattern is used for. */
264 cp->buf.target_multibyte = multibyte;
265
487282dc 266 return &cp->buf;
ca1d1d23
JB
267}
268
ca1d1d23 269\f
b819a390 270static Lisp_Object
457882c2 271looking_at_1 (Lisp_Object string, bool posix)
ca1d1d23
JB
272{
273 Lisp_Object val;
274 unsigned char *p1, *p2;
d311d28c
PE
275 ptrdiff_t s1, s2;
276 register ptrdiff_t i;
487282dc 277 struct re_pattern_buffer *bufp;
ca1d1d23 278
7074fde6
FP
279 if (running_asynch_code)
280 save_search_regs ();
281
910c747a 282 /* This is so set_image_of_range_1 in regex.c can find the EQV table. */
34dabdb7 283 set_char_table_extras (BVAR (current_buffer, case_canon_table), 2,
a098c930 284 BVAR (current_buffer, case_eqv_table));
910c747a 285
b7826503 286 CHECK_STRING (string);
ef887810
RS
287 bufp = compile_pattern (string,
288 (NILP (Vinhibit_changing_match_data)
289 ? &search_regs : NULL),
4b4deea2
TT
290 (!NILP (BVAR (current_buffer, case_fold_search))
291 ? BVAR (current_buffer, case_canon_table) : Qnil),
0c8533c6 292 posix,
4b4deea2 293 !NILP (BVAR (current_buffer, enable_multibyte_characters)));
ca1d1d23
JB
294
295 immediate_quit = 1;
296 QUIT; /* Do a pending quit right away, to avoid paradoxical behavior */
297
298 /* Get pointers and sizes of the two strings
299 that make up the visible portion of the buffer. */
300
301 p1 = BEGV_ADDR;
fa8ed3e0 302 s1 = GPT_BYTE - BEGV_BYTE;
ca1d1d23 303 p2 = GAP_END_ADDR;
fa8ed3e0 304 s2 = ZV_BYTE - GPT_BYTE;
ca1d1d23
JB
305 if (s1 < 0)
306 {
307 p2 = p1;
fa8ed3e0 308 s2 = ZV_BYTE - BEGV_BYTE;
ca1d1d23
JB
309 s1 = 0;
310 }
311 if (s2 < 0)
312 {
fa8ed3e0 313 s1 = ZV_BYTE - BEGV_BYTE;
ca1d1d23
JB
314 s2 = 0;
315 }
8bb43c28
RS
316
317 re_match_object = Qnil;
177c0ea7 318
487282dc 319 i = re_match_2 (bufp, (char *) p1, s1, (char *) p2, s2,
ef887810
RS
320 PT_BYTE - BEGV_BYTE,
321 (NILP (Vinhibit_changing_match_data)
322 ? &search_regs : NULL),
fa8ed3e0 323 ZV_BYTE - BEGV_BYTE);
de182d70 324 immediate_quit = 0;
177c0ea7 325
ca1d1d23
JB
326 if (i == -2)
327 matcher_overflow ();
328
908589fd 329 val = (i >= 0 ? Qt : Qnil);
ef887810 330 if (NILP (Vinhibit_changing_match_data) && i >= 0)
fc30d803 331 {
fa8ed3e0
RS
332 for (i = 0; i < search_regs.num_regs; i++)
333 if (search_regs.start[i] >= 0)
334 {
335 search_regs.start[i]
336 = BYTE_TO_CHAR (search_regs.start[i] + BEGV_BYTE);
fc30d803
SM
337 search_regs.end[i]
338 = BYTE_TO_CHAR (search_regs.end[i] + BEGV_BYTE);
339 }
340 /* Set last_thing_searched only when match data is changed. */
ef887810 341 XSETBUFFER (last_thing_searched, current_buffer);
fc30d803 342 }
ef887810 343
ca1d1d23
JB
344 return val;
345}
346
b819a390 347DEFUN ("looking-at", Flooking_at, Slooking_at, 1, 1, 0,
8c1a1077
PJ
348 doc: /* Return t if text after point matches regular expression REGEXP.
349This function modifies the match data that `match-beginning',
350`match-end' and `match-data' access; save and restore the match
351data if you want to preserve them. */)
5842a27b 352 (Lisp_Object regexp)
b819a390 353{
94f94972 354 return looking_at_1 (regexp, 0);
b819a390
RS
355}
356
357DEFUN ("posix-looking-at", Fposix_looking_at, Sposix_looking_at, 1, 1, 0,
8c1a1077
PJ
358 doc: /* Return t if text after point matches regular expression REGEXP.
359Find the longest match, in accord with Posix regular expression rules.
360This function modifies the match data that `match-beginning',
361`match-end' and `match-data' access; save and restore the match
362data if you want to preserve them. */)
5842a27b 363 (Lisp_Object regexp)
b819a390 364{
94f94972 365 return looking_at_1 (regexp, 1);
b819a390
RS
366}
367\f
368static Lisp_Object
457882c2
PE
369string_match_1 (Lisp_Object regexp, Lisp_Object string, Lisp_Object start,
370 bool posix)
ca1d1d23 371{
d311d28c 372 ptrdiff_t val;
487282dc 373 struct re_pattern_buffer *bufp;
d311d28c
PE
374 EMACS_INT pos;
375 ptrdiff_t pos_byte, i;
ca1d1d23 376
7074fde6
FP
377 if (running_asynch_code)
378 save_search_regs ();
379
b7826503
PJ
380 CHECK_STRING (regexp);
381 CHECK_STRING (string);
ca1d1d23
JB
382
383 if (NILP (start))
0c8533c6 384 pos = 0, pos_byte = 0;
ca1d1d23
JB
385 else
386 {
d311d28c 387 ptrdiff_t len = SCHARS (string);
ca1d1d23 388
b7826503 389 CHECK_NUMBER (start);
0c8533c6
RS
390 pos = XINT (start);
391 if (pos < 0 && -pos <= len)
392 pos = len + pos;
393 else if (0 > pos || pos > len)
ca1d1d23 394 args_out_of_range (string, start);
0c8533c6 395 pos_byte = string_char_to_byte (string, pos);
ca1d1d23
JB
396 }
397
910c747a 398 /* This is so set_image_of_range_1 in regex.c can find the EQV table. */
34dabdb7 399 set_char_table_extras (BVAR (current_buffer, case_canon_table), 2,
a098c930 400 BVAR (current_buffer, case_eqv_table));
910c747a 401
ef887810
RS
402 bufp = compile_pattern (regexp,
403 (NILP (Vinhibit_changing_match_data)
404 ? &search_regs : NULL),
4b4deea2
TT
405 (!NILP (BVAR (current_buffer, case_fold_search))
406 ? BVAR (current_buffer, case_canon_table) : Qnil),
0c8533c6
RS
407 posix,
408 STRING_MULTIBYTE (string));
ca1d1d23 409 immediate_quit = 1;
8bb43c28 410 re_match_object = string;
177c0ea7 411
51b59d79 412 val = re_search (bufp, SSDATA (string),
d5db4077
KR
413 SBYTES (string), pos_byte,
414 SBYTES (string) - pos_byte,
ef887810
RS
415 (NILP (Vinhibit_changing_match_data)
416 ? &search_regs : NULL));
ca1d1d23 417 immediate_quit = 0;
ef887810
RS
418
419 /* Set last_thing_searched only when match data is changed. */
420 if (NILP (Vinhibit_changing_match_data))
421 last_thing_searched = Qt;
422
ca1d1d23
JB
423 if (val == -2)
424 matcher_overflow ();
425 if (val < 0) return Qnil;
0c8533c6 426
ef887810
RS
427 if (NILP (Vinhibit_changing_match_data))
428 for (i = 0; i < search_regs.num_regs; i++)
429 if (search_regs.start[i] >= 0)
430 {
431 search_regs.start[i]
432 = string_byte_to_char (string, search_regs.start[i]);
433 search_regs.end[i]
434 = string_byte_to_char (string, search_regs.end[i]);
435 }
0c8533c6
RS
436
437 return make_number (string_byte_to_char (string, val));
ca1d1d23 438}
e59a8453 439
a7ca3326 440DEFUN ("string-match", Fstring_match, Sstring_match, 2, 3, 0,
8c1a1077 441 doc: /* Return index of start of first match for REGEXP in STRING, or nil.
b85acc4b 442Matching ignores case if `case-fold-search' is non-nil.
8c1a1077
PJ
443If third arg START is non-nil, start search at that index in STRING.
444For index of first char beyond the match, do (match-end 0).
445`match-end' and `match-beginning' also give indices of substrings
2bd2f32d
RS
446matched by parenthesis constructs in the pattern.
447
448You can use the function `match-string' to extract the substrings
449matched by the parenthesis constructions in REGEXP. */)
5842a27b 450 (Lisp_Object regexp, Lisp_Object string, Lisp_Object start)
b819a390
RS
451{
452 return string_match_1 (regexp, string, start, 0);
453}
454
455DEFUN ("posix-string-match", Fposix_string_match, Sposix_string_match, 2, 3, 0,
8c1a1077
PJ
456 doc: /* Return index of start of first match for REGEXP in STRING, or nil.
457Find the longest match, in accord with Posix regular expression rules.
458Case is ignored if `case-fold-search' is non-nil in the current buffer.
459If third arg START is non-nil, start search at that index in STRING.
460For index of first char beyond the match, do (match-end 0).
461`match-end' and `match-beginning' also give indices of substrings
462matched by parenthesis constructs in the pattern. */)
5842a27b 463 (Lisp_Object regexp, Lisp_Object string, Lisp_Object start)
b819a390
RS
464{
465 return string_match_1 (regexp, string, start, 1);
466}
467
e59a8453
RS
468/* Match REGEXP against STRING, searching all of STRING,
469 and return the index of the match, or negative on failure.
470 This does not clobber the match data. */
471
d311d28c 472ptrdiff_t
971de7fb 473fast_string_match (Lisp_Object regexp, Lisp_Object string)
e59a8453 474{
d311d28c 475 ptrdiff_t val;
487282dc 476 struct re_pattern_buffer *bufp;
e59a8453 477
facdc750
RS
478 bufp = compile_pattern (regexp, 0, Qnil,
479 0, STRING_MULTIBYTE (string));
e59a8453 480 immediate_quit = 1;
8bb43c28 481 re_match_object = string;
177c0ea7 482
51b59d79 483 val = re_search (bufp, SSDATA (string),
d5db4077
KR
484 SBYTES (string), 0,
485 SBYTES (string), 0);
e59a8453
RS
486 immediate_quit = 0;
487 return val;
488}
5679531d
KH
489
490/* Match REGEXP against STRING, searching all of STRING ignoring case,
491 and return the index of the match, or negative on failure.
0c8533c6
RS
492 This does not clobber the match data.
493 We assume that STRING contains single-byte characters. */
5679531d 494
d311d28c 495ptrdiff_t
d923b542
DA
496fast_c_string_match_ignore_case (Lisp_Object regexp,
497 const char *string, ptrdiff_t len)
5679531d 498{
d311d28c 499 ptrdiff_t val;
5679531d 500 struct re_pattern_buffer *bufp;
5679531d 501
0c8533c6 502 regexp = string_make_unibyte (regexp);
b4577c63 503 re_match_object = Qt;
5679531d 504 bufp = compile_pattern (regexp, 0,
0190922f 505 Vascii_canon_table, 0,
f8bd51c4 506 0);
5679531d
KH
507 immediate_quit = 1;
508 val = re_search (bufp, string, len, 0, len, 0);
509 immediate_quit = 0;
510 return val;
511}
be5f4dfb
KH
512
513/* Like fast_string_match but ignore case. */
514
d311d28c 515ptrdiff_t
971de7fb 516fast_string_match_ignore_case (Lisp_Object regexp, Lisp_Object string)
be5f4dfb 517{
d311d28c 518 ptrdiff_t val;
be5f4dfb
KH
519 struct re_pattern_buffer *bufp;
520
0190922f 521 bufp = compile_pattern (regexp, 0, Vascii_canon_table,
be5f4dfb
KH
522 0, STRING_MULTIBYTE (string));
523 immediate_quit = 1;
524 re_match_object = string;
525
51b59d79 526 val = re_search (bufp, SSDATA (string),
be5f4dfb
KH
527 SBYTES (string), 0,
528 SBYTES (string), 0);
529 immediate_quit = 0;
530 return val;
531}
ca1d1d23 532\f
a80ce213 533/* Match REGEXP against the characters after POS to LIMIT, and return
8b13507a
KH
534 the number of matched characters. If STRING is non-nil, match
535 against the characters in it. In that case, POS and LIMIT are
536 indices into the string. This function doesn't modify the match
537 data. */
538
d311d28c 539ptrdiff_t
0063fdb1
DA
540fast_looking_at (Lisp_Object regexp, ptrdiff_t pos, ptrdiff_t pos_byte,
541 ptrdiff_t limit, ptrdiff_t limit_byte, Lisp_Object string)
8b13507a 542{
0063fdb1 543 bool multibyte;
8b13507a
KH
544 struct re_pattern_buffer *buf;
545 unsigned char *p1, *p2;
d311d28c
PE
546 ptrdiff_t s1, s2;
547 ptrdiff_t len;
78edd3b7 548
8b13507a
KH
549 if (STRINGP (string))
550 {
551 if (pos_byte < 0)
552 pos_byte = string_char_to_byte (string, pos);
553 if (limit_byte < 0)
554 limit_byte = string_char_to_byte (string, limit);
555 p1 = NULL;
556 s1 = 0;
557 p2 = SDATA (string);
558 s2 = SBYTES (string);
559 re_match_object = string;
560 multibyte = STRING_MULTIBYTE (string);
561 }
562 else
563 {
564 if (pos_byte < 0)
565 pos_byte = CHAR_TO_BYTE (pos);
566 if (limit_byte < 0)
567 limit_byte = CHAR_TO_BYTE (limit);
568 pos_byte -= BEGV_BYTE;
569 limit_byte -= BEGV_BYTE;
570 p1 = BEGV_ADDR;
571 s1 = GPT_BYTE - BEGV_BYTE;
572 p2 = GAP_END_ADDR;
573 s2 = ZV_BYTE - GPT_BYTE;
574 if (s1 < 0)
575 {
576 p2 = p1;
577 s2 = ZV_BYTE - BEGV_BYTE;
578 s1 = 0;
579 }
580 if (s2 < 0)
581 {
582 s1 = ZV_BYTE - BEGV_BYTE;
583 s2 = 0;
584 }
585 re_match_object = Qnil;
4b4deea2 586 multibyte = ! NILP (BVAR (current_buffer, enable_multibyte_characters));
8b13507a
KH
587 }
588
589 buf = compile_pattern (regexp, 0, Qnil, 0, multibyte);
590 immediate_quit = 1;
591 len = re_match_2 (buf, (char *) p1, s1, (char *) p2, s2,
592 pos_byte, NULL, limit_byte);
593 immediate_quit = 0;
594
595 return len;
596}
597
598\f
9169c321
JB
599/* The newline cache: remembering which sections of text have no newlines. */
600
e30b79c1 601/* If the user has requested the long scans caching, make sure it's on.
9169c321
JB
602 Otherwise, make sure it's off.
603 This is our cheezy way of associating an action with the change of
604 state of a buffer-local variable. */
605static void
971de7fb 606newline_cache_on_off (struct buffer *buf)
9169c321 607{
e30b79c1 608 if (NILP (BVAR (buf, cache_long_scans)))
9169c321
JB
609 {
610 /* It should be off. */
611 if (buf->newline_cache)
612 {
613 free_region_cache (buf->newline_cache);
614 buf->newline_cache = 0;
615 }
616 }
617 else
618 {
619 /* It should be on. */
620 if (buf->newline_cache == 0)
621 buf->newline_cache = new_region_cache ();
622 }
623}
624
625\f
b5426561 626/* Search for COUNT newlines between START/START_BYTE and END/END_BYTE.
9169c321
JB
627
628 If COUNT is positive, search forwards; END must be >= START.
629 If COUNT is negative, search backwards for the -COUNTth instance;
630 END must be <= START.
631 If COUNT is zero, do anything you please; run rogue, for all I care.
632
633 If END is zero, use BEGV or ZV instead, as appropriate for the
634 direction indicated by COUNT.
ffd56f97
JB
635
636 If we find COUNT instances, set *SHORTAGE to zero, and return the
a9f2a45f 637 position past the COUNTth match. Note that for reverse motion
5bfe95c9 638 this is not the same as the usual convention for Emacs motion commands.
ffd56f97 639
9169c321 640 If we don't find COUNT instances before reaching END, set *SHORTAGE
a84b7c53 641 to the number of newlines left unfound, and return END.
ffd56f97 642
2a14a4f1
DA
643 If BYTEPOS is not NULL, set *BYTEPOS to the byte position corresponding
644 to the returned character position.
645
9fa1de30 646 If ALLOW_QUIT, set immediate_quit. That's good to do
087a5f81
RS
647 except when inside redisplay. */
648
d311d28c 649ptrdiff_t
b5426561
DA
650find_newline (ptrdiff_t start, ptrdiff_t start_byte, ptrdiff_t end,
651 ptrdiff_t end_byte, ptrdiff_t count, ptrdiff_t *shortage,
652 ptrdiff_t *bytepos, bool allow_quit)
ca1d1d23 653{
9169c321 654 struct region_cache *newline_cache;
177c0ea7 655 int direction;
ffd56f97 656
9169c321
JB
657 if (count > 0)
658 {
659 direction = 1;
8ca30920
DA
660 if (!end)
661 end = ZV, end_byte = ZV_BYTE;
9169c321
JB
662 }
663 else
664 {
665 direction = -1;
a84b7c53 666 if (!end)
8ca30920 667 end = BEGV, end_byte = BEGV_BYTE;
9169c321 668 }
8ca30920
DA
669 if (end_byte == -1)
670 end_byte = CHAR_TO_BYTE (end);
ffd56f97 671
9169c321
JB
672 newline_cache_on_off (current_buffer);
673 newline_cache = current_buffer->newline_cache;
ca1d1d23
JB
674
675 if (shortage != 0)
676 *shortage = 0;
677
087a5f81 678 immediate_quit = allow_quit;
ca1d1d23 679
ffd56f97 680 if (count > 0)
9169c321 681 while (start != end)
ca1d1d23 682 {
9169c321
JB
683 /* Our innermost scanning loop is very simple; it doesn't know
684 about gaps, buffer ends, or the newline cache. ceiling is
685 the position of the last character before the next such
686 obstacle --- the last character the dumb search loop should
687 examine. */
2a14a4f1 688 ptrdiff_t tem, ceiling_byte = end_byte - 1;
9169c321
JB
689
690 /* If we're looking for a newline, consult the newline cache
691 to see where we can avoid some scanning. */
a84b7c53 692 if (newline_cache)
9169c321 693 {
0065d054 694 ptrdiff_t next_change;
9169c321
JB
695 immediate_quit = 0;
696 while (region_cache_forward
c8b9f1bc
EZ
697 (current_buffer, newline_cache, start, &next_change))
698 start = next_change;
cbe0db0d 699 immediate_quit = allow_quit;
9169c321 700
c8b9f1bc
EZ
701 start_byte = CHAR_TO_BYTE (start);
702
fa8ed3e0
RS
703 /* START should never be after END. */
704 if (start_byte > ceiling_byte)
705 start_byte = ceiling_byte;
9169c321
JB
706
707 /* Now the text after start is an unknown region, and
708 next_change is the position of the next known region. */
c8b9f1bc 709 ceiling_byte = min (CHAR_TO_BYTE (next_change) - 1, ceiling_byte);
9169c321 710 }
b5426561 711 else if (start_byte == -1)
c8b9f1bc 712 start_byte = CHAR_TO_BYTE (start);
9169c321
JB
713
714 /* The dumb loop can only scan text stored in contiguous
715 bytes. BUFFER_CEILING_OF returns the last character
716 position that is contiguous, so the ceiling is the
717 position after that. */
67ce527d
KH
718 tem = BUFFER_CEILING_OF (start_byte);
719 ceiling_byte = min (tem, ceiling_byte);
9169c321
JB
720
721 {
177c0ea7 722 /* The termination address of the dumb loop. */
fa8ed3e0
RS
723 register unsigned char *ceiling_addr
724 = BYTE_POS_ADDR (ceiling_byte) + 1;
725 register unsigned char *cursor
726 = BYTE_POS_ADDR (start_byte);
9169c321
JB
727 unsigned char *base = cursor;
728
729 while (cursor < ceiling_addr)
730 {
9169c321 731 /* The dumb loop. */
a84b7c53 732 unsigned char *nl = memchr (cursor, '\n', ceiling_addr - cursor);
9169c321
JB
733
734 /* If we're looking for newlines, cache the fact that
735 the region from start to cursor is free of them. */
a84b7c53
PE
736 if (newline_cache)
737 {
738 unsigned char *low = cursor;
739 unsigned char *lim = nl ? nl : ceiling_addr;
740 know_region_cache (current_buffer, newline_cache,
741 BYTE_TO_CHAR (low - base + start_byte),
742 BYTE_TO_CHAR (lim - base + start_byte));
743 }
744
745 if (! nl)
746 break;
747
748 if (--count == 0)
749 {
750 immediate_quit = 0;
2a14a4f1
DA
751 if (bytepos)
752 *bytepos = nl + 1 - base + start_byte;
a84b7c53
PE
753 return BYTE_TO_CHAR (nl + 1 - base + start_byte);
754 }
755 cursor = nl + 1;
9169c321
JB
756 }
757
2a14a4f1
DA
758 start_byte += ceiling_addr - base;
759 start = BYTE_TO_CHAR (start_byte);
9169c321 760 }
ca1d1d23
JB
761 }
762 else
9169c321
JB
763 while (start > end)
764 {
765 /* The last character to check before the next obstacle. */
2a14a4f1 766 ptrdiff_t tem, ceiling_byte = end_byte;
9169c321
JB
767
768 /* Consult the newline cache, if appropriate. */
a84b7c53 769 if (newline_cache)
9169c321 770 {
0065d054 771 ptrdiff_t next_change;
9169c321
JB
772 immediate_quit = 0;
773 while (region_cache_backward
c8b9f1bc
EZ
774 (current_buffer, newline_cache, start, &next_change))
775 start = next_change;
cbe0db0d 776 immediate_quit = allow_quit;
9169c321 777
c8b9f1bc
EZ
778 start_byte = CHAR_TO_BYTE (start);
779
9169c321 780 /* Start should never be at or before end. */
fa8ed3e0
RS
781 if (start_byte <= ceiling_byte)
782 start_byte = ceiling_byte + 1;
9169c321
JB
783
784 /* Now the text before start is an unknown region, and
785 next_change is the position of the next known region. */
c8b9f1bc 786 ceiling_byte = max (CHAR_TO_BYTE (next_change), ceiling_byte);
9169c321 787 }
b5426561 788 else if (start_byte == -1)
c8b9f1bc 789 start_byte = CHAR_TO_BYTE (start);
9169c321
JB
790
791 /* Stop scanning before the gap. */
67ce527d
KH
792 tem = BUFFER_FLOOR_OF (start_byte - 1);
793 ceiling_byte = max (tem, ceiling_byte);
9169c321
JB
794
795 {
796 /* The termination address of the dumb loop. */
fa8ed3e0
RS
797 register unsigned char *ceiling_addr = BYTE_POS_ADDR (ceiling_byte);
798 register unsigned char *cursor = BYTE_POS_ADDR (start_byte - 1);
9169c321
JB
799 unsigned char *base = cursor;
800
801 while (cursor >= ceiling_addr)
802 {
a84b7c53
PE
803 unsigned char *nl = memrchr (ceiling_addr, '\n',
804 cursor + 1 - ceiling_addr);
9169c321
JB
805
806 /* If we're looking for newlines, cache the fact that
807 the region from after the cursor to start is free of them. */
a84b7c53
PE
808 if (newline_cache)
809 {
810 unsigned char *low = nl ? nl : ceiling_addr - 1;
811 unsigned char *lim = cursor;
812 know_region_cache (current_buffer, newline_cache,
813 BYTE_TO_CHAR (low - base + start_byte),
814 BYTE_TO_CHAR (lim - base + start_byte));
815 }
816
817 if (! nl)
818 break;
819
820 if (++count >= 0)
821 {
822 immediate_quit = 0;
2a14a4f1
DA
823 if (bytepos)
824 *bytepos = nl - base + start_byte;
a84b7c53
PE
825 return BYTE_TO_CHAR (nl - base + start_byte);
826 }
827 cursor = nl - 1;
9169c321
JB
828 }
829
2a14a4f1
DA
830 start_byte += ceiling_addr - 1 - base;
831 start = BYTE_TO_CHAR (start_byte);
9169c321
JB
832 }
833 }
834
ca1d1d23 835 immediate_quit = 0;
2a14a4f1 836 if (shortage)
ffd56f97 837 *shortage = count * direction;
2a14a4f1
DA
838 if (bytepos)
839 {
840 *bytepos = start_byte == -1 ? CHAR_TO_BYTE (start) : start_byte;
841 eassert (*bytepos == CHAR_TO_BYTE (start));
842 }
9169c321 843 return start;
ca1d1d23 844}
fa8ed3e0 845\f
a84b7c53 846/* Search for COUNT instances of a line boundary.
fa8ed3e0
RS
847 Start at START. If COUNT is negative, search backwards.
848
849 We report the resulting position by calling TEMP_SET_PT_BOTH.
850
851 If we find COUNT instances. we position after (always after,
852 even if scanning backwards) the COUNTth match, and return 0.
853
854 If we don't find COUNT instances before reaching the end of the
855 buffer (or the beginning, if scanning backwards), we return
856 the number of line boundaries left unfound, and position at
857 the limit we bumped up against.
858
9fa1de30 859 If ALLOW_QUIT, set immediate_quit. That's good to do
d5d57b92 860 except in special cases. */
ca1d1d23 861
d2b36813 862ptrdiff_t
d311d28c
PE
863scan_newline (ptrdiff_t start, ptrdiff_t start_byte,
864 ptrdiff_t limit, ptrdiff_t limit_byte,
d2b36813 865 ptrdiff_t count, bool allow_quit)
63fa018d 866{
d2b36813 867 ptrdiff_t charpos, bytepos, shortage;
fa8ed3e0 868
d2b36813
DA
869 charpos = find_newline (start, start_byte, limit, limit_byte,
870 count, &shortage, &bytepos, allow_quit);
871 if (shortage)
872 TEMP_SET_PT_BOTH (limit, limit_byte);
fa8ed3e0 873 else
d2b36813
DA
874 TEMP_SET_PT_BOTH (charpos, bytepos);
875 return shortage;
63fa018d
RS
876}
877
a611149e
EZ
878/* Like find_newline, but doesn't allow QUITting and doesn't return
879 SHORTAGE. */
d311d28c 880ptrdiff_t
b5426561
DA
881find_newline_no_quit (ptrdiff_t from, ptrdiff_t frombyte,
882 ptrdiff_t cnt, ptrdiff_t *bytepos)
ca1d1d23 883{
b5426561 884 return find_newline (from, frombyte, 0, -1, cnt, NULL, bytepos, 0);
9169c321
JB
885}
886
a611149e
EZ
887/* Like find_newline, but returns position before the newline, not
888 after, and only search up to TO.
889 This isn't just find_newline_no_quit (...)-1, because you might hit TO. */
fa8ed3e0 890
d311d28c 891ptrdiff_t
2a14a4f1
DA
892find_before_next_newline (ptrdiff_t from, ptrdiff_t to,
893 ptrdiff_t cnt, ptrdiff_t *bytepos)
9169c321 894{
d311d28c 895 ptrdiff_t shortage;
b5426561 896 ptrdiff_t pos = find_newline (from, -1, to, -1, cnt, &shortage, bytepos, 1);
9169c321
JB
897
898 if (shortage == 0)
2a14a4f1
DA
899 {
900 if (bytepos)
901 DEC_BOTH (pos, *bytepos);
902 else
903 pos--;
904 }
9169c321 905 return pos;
ca1d1d23
JB
906}
907\f
ca1d1d23
JB
908/* Subroutines of Lisp buffer search functions. */
909
910static Lisp_Object
c098fdb8 911search_command (Lisp_Object string, Lisp_Object bound, Lisp_Object noerror,
457882c2 912 Lisp_Object count, int direction, int RE, bool posix)
ca1d1d23 913{
457882c2 914 EMACS_INT np;
d311d28c
PE
915 EMACS_INT lim;
916 ptrdiff_t lim_byte;
a53e2e89 917 EMACS_INT n = direction;
ca1d1d23
JB
918
919 if (!NILP (count))
920 {
b7826503 921 CHECK_NUMBER (count);
ca1d1d23
JB
922 n *= XINT (count);
923 }
924
b7826503 925 CHECK_STRING (string);
ca1d1d23 926 if (NILP (bound))
9f43ad85
RS
927 {
928 if (n > 0)
929 lim = ZV, lim_byte = ZV_BYTE;
930 else
931 lim = BEGV, lim_byte = BEGV_BYTE;
932 }
ca1d1d23
JB
933 else
934 {
b7826503 935 CHECK_NUMBER_COERCE_MARKER (bound);
ca1d1d23 936 lim = XINT (bound);
6ec8bbd2 937 if (n > 0 ? lim < PT : lim > PT)
ca1d1d23
JB
938 error ("Invalid search bound (wrong side of point)");
939 if (lim > ZV)
9f43ad85 940 lim = ZV, lim_byte = ZV_BYTE;
588d2fd5 941 else if (lim < BEGV)
9f43ad85 942 lim = BEGV, lim_byte = BEGV_BYTE;
588d2fd5
KH
943 else
944 lim_byte = CHAR_TO_BYTE (lim);
ca1d1d23
JB
945 }
946
910c747a 947 /* This is so set_image_of_range_1 in regex.c can find the EQV table. */
34dabdb7 948 set_char_table_extras (BVAR (current_buffer, case_canon_table), 2,
a098c930 949 BVAR (current_buffer, case_eqv_table));
910c747a 950
9f43ad85 951 np = search_buffer (string, PT, PT_BYTE, lim, lim_byte, n, RE,
4b4deea2
TT
952 (!NILP (BVAR (current_buffer, case_fold_search))
953 ? BVAR (current_buffer, case_canon_table)
3135e9fd 954 : Qnil),
4b4deea2
TT
955 (!NILP (BVAR (current_buffer, case_fold_search))
956 ? BVAR (current_buffer, case_eqv_table)
3135e9fd 957 : Qnil),
b819a390 958 posix);
ca1d1d23
JB
959 if (np <= 0)
960 {
961 if (NILP (noerror))
06f77a8a
KS
962 xsignal1 (Qsearch_failed, string);
963
ca1d1d23
JB
964 if (!EQ (noerror, Qt))
965 {
8ca30920 966 eassert (BEGV <= lim && lim <= ZV);
9f43ad85 967 SET_PT_BOTH (lim, lim_byte);
a5f217b8
RS
968 return Qnil;
969#if 0 /* This would be clean, but maybe programs depend on
970 a value of nil here. */
481399bf 971 np = lim;
a5f217b8 972#endif
ca1d1d23 973 }
481399bf
RS
974 else
975 return Qnil;
ca1d1d23
JB
976 }
977
8ca30920 978 eassert (BEGV <= np && np <= ZV);
ca1d1d23
JB
979 SET_PT (np);
980
981 return make_number (np);
982}
983\f
457882c2 984/* Return true if REGEXP it matches just one constant string. */
fa8ed3e0 985
457882c2 986static bool
971de7fb 987trivial_regexp_p (Lisp_Object regexp)
b6d6a51c 988{
d311d28c 989 ptrdiff_t len = SBYTES (regexp);
d5db4077 990 unsigned char *s = SDATA (regexp);
b6d6a51c
KH
991 while (--len >= 0)
992 {
993 switch (*s++)
994 {
995 case '.': case '*': case '+': case '?': case '[': case '^': case '$':
996 return 0;
997 case '\\':
998 if (--len < 0)
999 return 0;
1000 switch (*s++)
1001 {
1002 case '|': case '(': case ')': case '`': case '\'': case 'b':
1003 case 'B': case '<': case '>': case 'w': case 'W': case 's':
29f89fe7 1004 case 'S': case '=': case '{': case '}': case '_':
5679531d 1005 case 'c': case 'C': /* for categoryspec and notcategoryspec */
866f60fd 1006 case '1': case '2': case '3': case '4': case '5':
b6d6a51c
KH
1007 case '6': case '7': case '8': case '9':
1008 return 0;
1009 }
1010 }
1011 }
1012 return 1;
1013}
1014
ca325161 1015/* Search for the n'th occurrence of STRING in the current buffer,
ca1d1d23 1016 starting at position POS and stopping at position LIM,
b819a390 1017 treating STRING as a literal string if RE is false or as
ca1d1d23
JB
1018 a regular expression if RE is true.
1019
1020 If N is positive, searching is forward and LIM must be greater than POS.
1021 If N is negative, searching is backward and LIM must be less than POS.
1022
facdc750 1023 Returns -x if x occurrences remain to be found (x > 0),
ca1d1d23 1024 or else the position at the beginning of the Nth occurrence
b819a390
RS
1025 (if searching backward) or the end (if searching forward).
1026
1027 POSIX is nonzero if we want full backtracking (POSIX style)
1028 for this pattern. 0 means backtrack only enough to get a valid match. */
ca1d1d23 1029
aff2ce94
RS
1030#define TRANSLATE(out, trt, d) \
1031do \
1032 { \
1033 if (! NILP (trt)) \
1034 { \
1035 Lisp_Object temp; \
1036 temp = Faref (trt, make_number (d)); \
1037 if (INTEGERP (temp)) \
1038 out = XINT (temp); \
1039 else \
1040 out = d; \
1041 } \
1042 else \
1043 out = d; \
1044 } \
1045while (0)
facdc750 1046
ef887810
RS
1047/* Only used in search_buffer, to record the end position of the match
1048 when searching regexps and SEARCH_REGS should not be changed
1049 (i.e. Vinhibit_changing_match_data is non-nil). */
1050static struct re_registers search_regs_1;
1051
e7deaab0 1052static EMACS_INT
d311d28c
PE
1053search_buffer (Lisp_Object string, ptrdiff_t pos, ptrdiff_t pos_byte,
1054 ptrdiff_t lim, ptrdiff_t lim_byte, EMACS_INT n,
457882c2 1055 int RE, Lisp_Object trt, Lisp_Object inverse_trt, bool posix)
ca1d1d23 1056{
d311d28c
PE
1057 ptrdiff_t len = SCHARS (string);
1058 ptrdiff_t len_byte = SBYTES (string);
1059 register ptrdiff_t i;
ca1d1d23 1060
7074fde6
FP
1061 if (running_asynch_code)
1062 save_search_regs ();
1063
a7e4cdde 1064 /* Searching 0 times means don't move. */
ca1d1d23 1065 /* Null string is found at starting position. */
a7e4cdde 1066 if (len == 0 || n == 0)
ca325161 1067 {
0353b28f 1068 set_search_regs (pos_byte, 0);
ca325161
RS
1069 return pos;
1070 }
3f57a499 1071
41a33295 1072 if (RE && !(trivial_regexp_p (string) && NILP (Vsearch_spaces_regexp)))
ca1d1d23 1073 {
facdc750 1074 unsigned char *p1, *p2;
d311d28c 1075 ptrdiff_t s1, s2;
487282dc
KH
1076 struct re_pattern_buffer *bufp;
1077
ef887810
RS
1078 bufp = compile_pattern (string,
1079 (NILP (Vinhibit_changing_match_data)
1080 ? &search_regs : &search_regs_1),
1081 trt, posix,
4b4deea2 1082 !NILP (BVAR (current_buffer, enable_multibyte_characters)));
ca1d1d23 1083
ca1d1d23
JB
1084 immediate_quit = 1; /* Quit immediately if user types ^G,
1085 because letting this function finish
1086 can take too long. */
1087 QUIT; /* Do a pending quit right away,
1088 to avoid paradoxical behavior */
1089 /* Get pointers and sizes of the two strings
1090 that make up the visible portion of the buffer. */
1091
1092 p1 = BEGV_ADDR;
fa8ed3e0 1093 s1 = GPT_BYTE - BEGV_BYTE;
ca1d1d23 1094 p2 = GAP_END_ADDR;
fa8ed3e0 1095 s2 = ZV_BYTE - GPT_BYTE;
ca1d1d23
JB
1096 if (s1 < 0)
1097 {
1098 p2 = p1;
fa8ed3e0 1099 s2 = ZV_BYTE - BEGV_BYTE;
ca1d1d23
JB
1100 s1 = 0;
1101 }
1102 if (s2 < 0)
1103 {
fa8ed3e0 1104 s1 = ZV_BYTE - BEGV_BYTE;
ca1d1d23
JB
1105 s2 = 0;
1106 }
8bb43c28 1107 re_match_object = Qnil;
177c0ea7 1108
ca1d1d23
JB
1109 while (n < 0)
1110 {
d311d28c 1111 ptrdiff_t val;
52c55cc7 1112
487282dc 1113 val = re_search_2 (bufp, (char *) p1, s1, (char *) p2, s2,
4996330b 1114 pos_byte - BEGV_BYTE, lim_byte - pos_byte,
ef887810
RS
1115 (NILP (Vinhibit_changing_match_data)
1116 ? &search_regs : &search_regs_1),
42db823b 1117 /* Don't allow match past current point */
4996330b 1118 pos_byte - BEGV_BYTE);
ca1d1d23 1119 if (val == -2)
b6d6a51c
KH
1120 {
1121 matcher_overflow ();
1122 }
ca1d1d23
JB
1123 if (val >= 0)
1124 {
ef887810
RS
1125 if (NILP (Vinhibit_changing_match_data))
1126 {
1127 pos_byte = search_regs.start[0] + BEGV_BYTE;
1128 for (i = 0; i < search_regs.num_regs; i++)
1129 if (search_regs.start[i] >= 0)
1130 {
1131 search_regs.start[i]
1132 = BYTE_TO_CHAR (search_regs.start[i] + BEGV_BYTE);
1133 search_regs.end[i]
1134 = BYTE_TO_CHAR (search_regs.end[i] + BEGV_BYTE);
1135 }
1136 XSETBUFFER (last_thing_searched, current_buffer);
1137 /* Set pos to the new position. */
1138 pos = search_regs.start[0];
1139 }
1140 else
1141 {
1142 pos_byte = search_regs_1.start[0] + BEGV_BYTE;
1143 /* Set pos to the new position. */
1144 pos = BYTE_TO_CHAR (search_regs_1.start[0] + BEGV_BYTE);
1145 }
ca1d1d23
JB
1146 }
1147 else
1148 {
1149 immediate_quit = 0;
1150 return (n);
1151 }
1152 n++;
1153 }
1154 while (n > 0)
1155 {
d311d28c 1156 ptrdiff_t val;
52c55cc7 1157
487282dc 1158 val = re_search_2 (bufp, (char *) p1, s1, (char *) p2, s2,
4996330b 1159 pos_byte - BEGV_BYTE, lim_byte - pos_byte,
ef887810
RS
1160 (NILP (Vinhibit_changing_match_data)
1161 ? &search_regs : &search_regs_1),
4996330b 1162 lim_byte - BEGV_BYTE);
ca1d1d23 1163 if (val == -2)
b6d6a51c
KH
1164 {
1165 matcher_overflow ();
1166 }
ca1d1d23
JB
1167 if (val >= 0)
1168 {
ef887810
RS
1169 if (NILP (Vinhibit_changing_match_data))
1170 {
1171 pos_byte = search_regs.end[0] + BEGV_BYTE;
1172 for (i = 0; i < search_regs.num_regs; i++)
1173 if (search_regs.start[i] >= 0)
1174 {
1175 search_regs.start[i]
1176 = BYTE_TO_CHAR (search_regs.start[i] + BEGV_BYTE);
1177 search_regs.end[i]
1178 = BYTE_TO_CHAR (search_regs.end[i] + BEGV_BYTE);
1179 }
1180 XSETBUFFER (last_thing_searched, current_buffer);
1181 pos = search_regs.end[0];
1182 }
1183 else
1184 {
1185 pos_byte = search_regs_1.end[0] + BEGV_BYTE;
1186 pos = BYTE_TO_CHAR (search_regs_1.end[0] + BEGV_BYTE);
1187 }
ca1d1d23
JB
1188 }
1189 else
1190 {
1191 immediate_quit = 0;
1192 return (0 - n);
1193 }
1194 n--;
1195 }
1196 immediate_quit = 0;
1197 return (pos);
1198 }
1199 else /* non-RE case */
1200 {
facdc750 1201 unsigned char *raw_pattern, *pat;
d311d28c
PE
1202 ptrdiff_t raw_pattern_size;
1203 ptrdiff_t raw_pattern_size_byte;
facdc750 1204 unsigned char *patbuf;
0063fdb1 1205 bool multibyte = !NILP (BVAR (current_buffer, enable_multibyte_characters));
a967ed62 1206 unsigned char *base_pat;
49e44e81
KH
1207 /* Set to positive if we find a non-ASCII char that need
1208 translation. Otherwise set to zero later. */
1209 int char_base = -1;
457882c2 1210 bool boyer_moore_ok = 1;
facdc750
RS
1211
1212 /* MULTIBYTE says whether the text to be searched is multibyte.
1213 We must convert PATTERN to match that, or we will not really
1214 find things right. */
1215
1216 if (multibyte == STRING_MULTIBYTE (string))
1217 {
51b59d79 1218 raw_pattern = SDATA (string);
d5db4077
KR
1219 raw_pattern_size = SCHARS (string);
1220 raw_pattern_size_byte = SBYTES (string);
facdc750
RS
1221 }
1222 else if (multibyte)
1223 {
d5db4077 1224 raw_pattern_size = SCHARS (string);
facdc750 1225 raw_pattern_size_byte
d5db4077 1226 = count_size_as_multibyte (SDATA (string),
facdc750 1227 raw_pattern_size);
38182d90 1228 raw_pattern = alloca (raw_pattern_size_byte + 1);
d5db4077
KR
1229 copy_text (SDATA (string), raw_pattern,
1230 SCHARS (string), 0, 1);
facdc750
RS
1231 }
1232 else
1233 {
1234 /* Converting multibyte to single-byte.
1235
1236 ??? Perhaps this conversion should be done in a special way
1237 by subtracting nonascii-insert-offset from each non-ASCII char,
1238 so that only the multibyte chars which really correspond to
1239 the chosen single-byte character set can possibly match. */
d5db4077
KR
1240 raw_pattern_size = SCHARS (string);
1241 raw_pattern_size_byte = SCHARS (string);
38182d90 1242 raw_pattern = alloca (raw_pattern_size + 1);
d5db4077
KR
1243 copy_text (SDATA (string), raw_pattern,
1244 SBYTES (string), 1, 0);
facdc750
RS
1245 }
1246
1247 /* Copy and optionally translate the pattern. */
1248 len = raw_pattern_size;
1249 len_byte = raw_pattern_size_byte;
38182d90 1250 patbuf = alloca (len * MAX_MULTIBYTE_LENGTH);
facdc750
RS
1251 pat = patbuf;
1252 base_pat = raw_pattern;
1253 if (multibyte)
1254 {
a17ae96a
KH
1255 /* Fill patbuf by translated characters in STRING while
1256 checking if we can use boyer-moore search. If TRT is
1257 non-nil, we can use boyer-moore search only if TRT can be
1258 represented by the byte array of 256 elements. For that,
9858f6c3 1259 all non-ASCII case-equivalents of all case-sensitive
e79d60fe 1260 characters in STRING must belong to the same character
68abf867
KH
1261 group (two characters belong to the same group iff their
1262 multibyte forms are the same except for the last byte;
1263 i.e. every 64 characters form a group; U+0000..U+003F,
e79d60fe 1264 U+0040..U+007F, U+0080..U+00BF, ...). */
a17ae96a 1265
facdc750
RS
1266 while (--len >= 0)
1267 {
a17ae96a 1268 unsigned char str_base[MAX_MULTIBYTE_LENGTH], *str;
aff2ce94 1269 int c, translated, inverse;
a17ae96a 1270 int in_charlen, charlen;
facdc750
RS
1271
1272 /* If we got here and the RE flag is set, it's because we're
1273 dealing with a regexp known to be trivial, so the backslash
1274 just quotes the next character. */
1275 if (RE && *base_pat == '\\')
1276 {
1277 len--;
62221f22 1278 raw_pattern_size--;
facdc750
RS
1279 len_byte--;
1280 base_pat++;
1281 }
1282
62a6e103 1283 c = STRING_CHAR_AND_LENGTH (base_pat, in_charlen);
040272ce 1284
a17ae96a 1285 if (NILP (trt))
facdc750 1286 {
a17ae96a
KH
1287 str = base_pat;
1288 charlen = in_charlen;
1289 }
1290 else
1291 {
1292 /* Translate the character. */
1293 TRANSLATE (translated, trt, c);
1294 charlen = CHAR_STRING (translated, str_base);
1295 str = str_base;
1296
1297 /* Check if C has any other case-equivalents. */
1298 TRANSLATE (inverse, inverse_trt, c);
1299 /* If so, check if we can use boyer-moore. */
1300 if (c != inverse && boyer_moore_ok)
1301 {
1302 /* Check if all equivalents belong to the same
1303 group of characters. Note that the check of C
49e44e81
KH
1304 itself is done by the last iteration. */
1305 int this_char_base = -1;
1306
1307 while (boyer_moore_ok)
a17ae96a 1308 {
49e44e81 1309 if (ASCII_BYTE_P (inverse))
a17ae96a 1310 {
49e44e81
KH
1311 if (this_char_base > 0)
1312 boyer_moore_ok = 0;
1313 else
dafaabd1 1314 this_char_base = 0;
a17ae96a 1315 }
49e44e81
KH
1316 else if (CHAR_BYTE8_P (inverse))
1317 /* Boyer-moore search can't handle a
1318 translation of an eight-bit
1319 character. */
1320 boyer_moore_ok = 0;
1321 else if (this_char_base < 0)
1322 {
1323 this_char_base = inverse & ~0x3F;
1324 if (char_base < 0)
1325 char_base = this_char_base;
dafaabd1 1326 else if (this_char_base != char_base)
49e44e81
KH
1327 boyer_moore_ok = 0;
1328 }
1329 else if ((inverse & ~0x3F) != this_char_base)
1330 boyer_moore_ok = 0;
a17ae96a
KH
1331 if (c == inverse)
1332 break;
1333 TRANSLATE (inverse, inverse_trt, inverse);
1334 }
1335 }
aff2ce94 1336 }
facdc750
RS
1337
1338 /* Store this character into the translated pattern. */
72af86bd 1339 memcpy (pat, str, charlen);
a17ae96a 1340 pat += charlen;
facdc750
RS
1341 base_pat += in_charlen;
1342 len_byte -= in_charlen;
1343 }
dafaabd1
AS
1344
1345 /* If char_base is still negative we didn't find any translated
1346 non-ASCII characters. */
1347 if (char_base < 0)
1348 char_base = 0;
facdc750
RS
1349 }
1350 else
1351 {
040272ce 1352 /* Unibyte buffer. */
a17ae96a 1353 char_base = 0;
facdc750
RS
1354 while (--len >= 0)
1355 {
e8c6e965 1356 int c, translated, inverse;
facdc750
RS
1357
1358 /* If we got here and the RE flag is set, it's because we're
1359 dealing with a regexp known to be trivial, so the backslash
1360 just quotes the next character. */
1361 if (RE && *base_pat == '\\')
1362 {
1363 len--;
0190922f 1364 raw_pattern_size--;
facdc750
RS
1365 base_pat++;
1366 }
1367 c = *base_pat++;
aff2ce94 1368 TRANSLATE (translated, trt, c);
facdc750 1369 *pat++ = translated;
e8c6e965
EZ
1370 /* Check that none of C's equivalents violates the
1371 assumptions of boyer_moore. */
1372 TRANSLATE (inverse, inverse_trt, c);
1373 while (1)
1374 {
1375 if (inverse >= 0200)
1376 {
1377 boyer_moore_ok = 0;
1378 break;
1379 }
1380 if (c == inverse)
1381 break;
1382 TRANSLATE (inverse, inverse_trt, inverse);
1383 }
facdc750
RS
1384 }
1385 }
1386
1387 len_byte = pat - patbuf;
facdc750
RS
1388 pat = base_pat = patbuf;
1389
040272ce 1390 if (boyer_moore_ok)
1db5b1ad
JB
1391 return boyer_moore (n, pat, len_byte, trt, inverse_trt,
1392 pos_byte, lim_byte,
a17ae96a 1393 char_base);
facdc750 1394 else
1db5b1ad 1395 return simple_search (n, pat, raw_pattern_size, len_byte, trt,
facdc750
RS
1396 pos, pos_byte, lim, lim_byte);
1397 }
1398}
1399\f
1400/* Do a simple string search N times for the string PAT,
1401 whose length is LEN/LEN_BYTE,
1402 from buffer position POS/POS_BYTE until LIM/LIM_BYTE.
1403 TRT is the translation table.
f8bd51c4 1404
facdc750
RS
1405 Return the character position where the match is found.
1406 Otherwise, if M matches remained to be found, return -M.
f8bd51c4 1407
facdc750
RS
1408 This kind of search works regardless of what is in PAT and
1409 regardless of what is in TRT. It is used in cases where
1410 boyer_moore cannot work. */
1411
e7deaab0 1412static EMACS_INT
c098fdb8 1413simple_search (EMACS_INT n, unsigned char *pat,
d311d28c
PE
1414 ptrdiff_t len, ptrdiff_t len_byte, Lisp_Object trt,
1415 ptrdiff_t pos, ptrdiff_t pos_byte,
1416 ptrdiff_t lim, ptrdiff_t lim_byte)
facdc750 1417{
0063fdb1
DA
1418 bool multibyte = ! NILP (BVAR (current_buffer, enable_multibyte_characters));
1419 bool forward = n > 0;
49e44e81
KH
1420 /* Number of buffer bytes matched. Note that this may be different
1421 from len_byte in a multibyte buffer. */
8ce70ed2 1422 ptrdiff_t match_byte = PTRDIFF_MIN;
facdc750
RS
1423
1424 if (lim > pos && multibyte)
1425 while (n > 0)
1426 {
1427 while (1)
f8bd51c4 1428 {
facdc750 1429 /* Try matching at position POS. */
d311d28c
PE
1430 ptrdiff_t this_pos = pos;
1431 ptrdiff_t this_pos_byte = pos_byte;
1432 ptrdiff_t this_len = len;
facdc750 1433 unsigned char *p = pat;
a9469498 1434 if (pos + len > lim || pos_byte + len_byte > lim_byte)
facdc750
RS
1435 goto stop;
1436
1437 while (this_len > 0)
1438 {
1439 int charlen, buf_charlen;
ab228c24 1440 int pat_ch, buf_ch;
facdc750 1441
62a6e103 1442 pat_ch = STRING_CHAR_AND_LENGTH (p, charlen);
facdc750 1443 buf_ch = STRING_CHAR_AND_LENGTH (BYTE_POS_ADDR (this_pos_byte),
facdc750 1444 buf_charlen);
aff2ce94 1445 TRANSLATE (buf_ch, trt, buf_ch);
facdc750
RS
1446
1447 if (buf_ch != pat_ch)
1448 break;
ab228c24 1449
ab228c24
RS
1450 this_len--;
1451 p += charlen;
1452
1453 this_pos_byte += buf_charlen;
1454 this_pos++;
facdc750
RS
1455 }
1456
1457 if (this_len == 0)
1458 {
49e44e81 1459 match_byte = this_pos_byte - pos_byte;
facdc750 1460 pos += len;
49e44e81 1461 pos_byte += match_byte;
facdc750
RS
1462 break;
1463 }
1464
1465 INC_BOTH (pos, pos_byte);
f8bd51c4 1466 }
facdc750
RS
1467
1468 n--;
1469 }
1470 else if (lim > pos)
1471 while (n > 0)
1472 {
1473 while (1)
f8bd51c4 1474 {
facdc750 1475 /* Try matching at position POS. */
d311d28c
PE
1476 ptrdiff_t this_pos = pos;
1477 ptrdiff_t this_len = len;
facdc750
RS
1478 unsigned char *p = pat;
1479
1480 if (pos + len > lim)
1481 goto stop;
1482
1483 while (this_len > 0)
1484 {
1485 int pat_ch = *p++;
1486 int buf_ch = FETCH_BYTE (this_pos);
aff2ce94 1487 TRANSLATE (buf_ch, trt, buf_ch);
facdc750
RS
1488
1489 if (buf_ch != pat_ch)
1490 break;
ab228c24
RS
1491
1492 this_len--;
1493 this_pos++;
facdc750
RS
1494 }
1495
1496 if (this_len == 0)
1497 {
49e44e81 1498 match_byte = len;
facdc750
RS
1499 pos += len;
1500 break;
1501 }
1502
1503 pos++;
f8bd51c4 1504 }
facdc750
RS
1505
1506 n--;
1507 }
1508 /* Backwards search. */
1509 else if (lim < pos && multibyte)
1510 while (n < 0)
1511 {
1512 while (1)
f8bd51c4 1513 {
facdc750 1514 /* Try matching at position POS. */
d311d28c
PE
1515 ptrdiff_t this_pos = pos;
1516 ptrdiff_t this_pos_byte = pos_byte;
1517 ptrdiff_t this_len = len;
c525b3f2 1518 const unsigned char *p = pat + len_byte;
facdc750 1519
8b264ecb 1520 if (this_pos - len < lim || (pos_byte - len_byte) < lim_byte)
facdc750
RS
1521 goto stop;
1522
1523 while (this_len > 0)
1524 {
ab228c24 1525 int pat_ch, buf_ch;
facdc750 1526
8b264ecb
AS
1527 DEC_BOTH (this_pos, this_pos_byte);
1528 PREV_CHAR_BOUNDARY (p, pat);
1529 pat_ch = STRING_CHAR (p);
1530 buf_ch = STRING_CHAR (BYTE_POS_ADDR (this_pos_byte));
aff2ce94 1531 TRANSLATE (buf_ch, trt, buf_ch);
facdc750
RS
1532
1533 if (buf_ch != pat_ch)
1534 break;
ab228c24 1535
ab228c24 1536 this_len--;
facdc750
RS
1537 }
1538
1539 if (this_len == 0)
1540 {
8b264ecb
AS
1541 match_byte = pos_byte - this_pos_byte;
1542 pos = this_pos;
1543 pos_byte = this_pos_byte;
facdc750
RS
1544 break;
1545 }
1546
1547 DEC_BOTH (pos, pos_byte);
f8bd51c4
KH
1548 }
1549
facdc750
RS
1550 n++;
1551 }
1552 else if (lim < pos)
1553 while (n < 0)
1554 {
1555 while (1)
b6d6a51c 1556 {
facdc750 1557 /* Try matching at position POS. */
d311d28c
PE
1558 ptrdiff_t this_pos = pos - len;
1559 ptrdiff_t this_len = len;
facdc750
RS
1560 unsigned char *p = pat;
1561
a9469498 1562 if (this_pos < lim)
facdc750
RS
1563 goto stop;
1564
1565 while (this_len > 0)
1566 {
1567 int pat_ch = *p++;
1568 int buf_ch = FETCH_BYTE (this_pos);
aff2ce94 1569 TRANSLATE (buf_ch, trt, buf_ch);
facdc750
RS
1570
1571 if (buf_ch != pat_ch)
1572 break;
ab228c24
RS
1573 this_len--;
1574 this_pos++;
facdc750
RS
1575 }
1576
1577 if (this_len == 0)
b6d6a51c 1578 {
49e44e81 1579 match_byte = len;
facdc750
RS
1580 pos -= len;
1581 break;
b6d6a51c 1582 }
facdc750
RS
1583
1584 pos--;
b6d6a51c 1585 }
facdc750
RS
1586
1587 n++;
b6d6a51c 1588 }
facdc750
RS
1589
1590 stop:
1591 if (n == 0)
aff2ce94 1592 {
8ce70ed2 1593 eassert (match_byte != PTRDIFF_MIN);
ab228c24 1594 if (forward)
49e44e81 1595 set_search_regs ((multibyte ? pos_byte : pos) - match_byte, match_byte);
ab228c24 1596 else
49e44e81 1597 set_search_regs (multibyte ? pos_byte : pos, match_byte);
aff2ce94
RS
1598
1599 return pos;
1600 }
facdc750
RS
1601 else if (n > 0)
1602 return -n;
1603 else
1604 return n;
1605}
1606\f
0190922f 1607/* Do Boyer-Moore search N times for the string BASE_PAT,
1db5b1ad
JB
1608 whose length is LEN_BYTE,
1609 from buffer position POS_BYTE until LIM_BYTE.
facdc750
RS
1610 DIRECTION says which direction we search in.
1611 TRT and INVERSE_TRT are translation tables.
0190922f 1612 Characters in PAT are already translated by TRT.
facdc750 1613
0190922f
KH
1614 This kind of search works if all the characters in BASE_PAT that
1615 have nontrivial translation are the same aside from the last byte.
1616 This makes it possible to translate just the last byte of a
1617 character, and do so after just a simple test of the context.
b2e6b10f 1618 CHAR_BASE is nonzero if there is such a non-ASCII character.
facdc750
RS
1619
1620 If that criterion is not satisfied, do not call this function. */
1621
e7deaab0 1622static EMACS_INT
c098fdb8 1623boyer_moore (EMACS_INT n, unsigned char *base_pat,
d311d28c 1624 ptrdiff_t len_byte,
d5a3eaaf 1625 Lisp_Object trt, Lisp_Object inverse_trt,
d311d28c 1626 ptrdiff_t pos_byte, ptrdiff_t lim_byte,
1db5b1ad 1627 int char_base)
facdc750
RS
1628{
1629 int direction = ((n > 0) ? 1 : -1);
d311d28c
PE
1630 register ptrdiff_t dirlen;
1631 ptrdiff_t limit;
e7deaab0 1632 int stride_for_teases = 0;
f4646fff 1633 int BM_tab[0400];
177c0ea7 1634 register unsigned char *cursor, *p_limit;
d311d28c 1635 register ptrdiff_t i;
c098fdb8 1636 register int j;
cb6792d2 1637 unsigned char *pat, *pat_end;
0063fdb1 1638 bool multibyte = ! NILP (BVAR (current_buffer, enable_multibyte_characters));
facdc750
RS
1639
1640 unsigned char simple_translate[0400];
0190922f 1641 /* These are set to the preceding bytes of a byte to be translated
49e44e81 1642 if char_base is nonzero. As the maximum byte length of a
a17ae96a 1643 multibyte character is 5, we have to check at most four previous
0190922f
KH
1644 bytes. */
1645 int translate_prev_byte1 = 0;
1646 int translate_prev_byte2 = 0;
1647 int translate_prev_byte3 = 0;
facdc750 1648
f4646fff
AS
1649 /* The general approach is that we are going to maintain that we know
1650 the first (closest to the present position, in whatever direction
1651 we're searching) character that could possibly be the last
1652 (furthest from present position) character of a valid match. We
1653 advance the state of our knowledge by looking at that character
1654 and seeing whether it indeed matches the last character of the
1655 pattern. If it does, we take a closer look. If it does not, we
1656 move our pointer (to putative last characters) as far as is
1657 logically possible. This amount of movement, which I call a
1658 stride, will be the length of the pattern if the actual character
1659 appears nowhere in the pattern, otherwise it will be the distance
1660 from the last occurrence of that character to the end of the
1661 pattern. If the amount is zero we have a possible match. */
1662
1663 /* Here we make a "mickey mouse" BM table. The stride of the search
1664 is determined only by the last character of the putative match.
1665 If that character does not match, we will stride the proper
1666 distance to propose a match that superimposes it on the last
1667 instance of a character that matches it (per trt), or misses
1668 it entirely if there is none. */
facdc750
RS
1669
1670 dirlen = len_byte * direction;
cb6792d2
RS
1671
1672 /* Record position after the end of the pattern. */
1673 pat_end = base_pat + len_byte;
1674 /* BASE_PAT points to a character that we start scanning from.
1675 It is the first character in a forward search,
1676 the last character in a backward search. */
facdc750 1677 if (direction < 0)
cb6792d2
RS
1678 base_pat = pat_end - 1;
1679
f4646fff
AS
1680 /* A character that does not appear in the pattern induces a
1681 stride equal to the pattern length. */
1682 for (i = 0; i < 0400; i++)
1683 BM_tab[i] = dirlen;
facdc750
RS
1684
1685 /* We use this for translation, instead of TRT itself.
1686 We fill this in to handle the characters that actually
1687 occur in the pattern. Others don't matter anyway! */
facdc750
RS
1688 for (i = 0; i < 0400; i++)
1689 simple_translate[i] = i;
1690
a17ae96a 1691 if (char_base)
0190922f 1692 {
a17ae96a 1693 /* Setup translate_prev_byte1/2/3/4 from CHAR_BASE. Only a
0190922f 1694 byte following them are the target of translation. */
0190922f 1695 unsigned char str[MAX_MULTIBYTE_LENGTH];
1f3561e4 1696 int cblen = CHAR_STRING (char_base, str);
0190922f 1697
1f3561e4
PE
1698 translate_prev_byte1 = str[cblen - 2];
1699 if (cblen > 2)
0190922f 1700 {
1f3561e4
PE
1701 translate_prev_byte2 = str[cblen - 3];
1702 if (cblen > 3)
1f1d9321 1703 translate_prev_byte3 = str[cblen - 4];
0190922f
KH
1704 }
1705 }
1706
facdc750 1707 i = 0;
f4646fff 1708 while (i != dirlen)
facdc750 1709 {
cb6792d2 1710 unsigned char *ptr = base_pat + i;
facdc750 1711 i += direction;
facdc750 1712 if (! NILP (trt))
ca1d1d23 1713 {
49e44e81
KH
1714 /* If the byte currently looking at is the last of a
1715 character to check case-equivalents, set CH to that
1716 character. An ASCII character and a non-ASCII character
1717 matching with CHAR_BASE are to be checked. */
a17ae96a
KH
1718 int ch = -1;
1719
1720 if (ASCII_BYTE_P (*ptr) || ! multibyte)
1721 ch = *ptr;
49e44e81 1722 else if (char_base
86dc6ccb 1723 && ((pat_end - ptr) == 1 || CHAR_HEAD_P (ptr[1])))
ca1d1d23 1724 {
49e44e81
KH
1725 unsigned char *charstart = ptr - 1;
1726
1727 while (! (CHAR_HEAD_P (*charstart)))
1728 charstart--;
62a6e103 1729 ch = STRING_CHAR (charstart);
a17ae96a
KH
1730 if (char_base != (ch & ~0x3F))
1731 ch = -1;
ca1d1d23 1732 }
facdc750 1733
edb7b4dc 1734 if (ch >= 0200 && multibyte)
49e44e81
KH
1735 j = (ch & 0x3F) | 0200;
1736 else
1737 j = *ptr;
1738
f4646fff 1739 if (i == dirlen)
facdc750 1740 stride_for_teases = BM_tab[j];
ab228c24 1741
facdc750 1742 BM_tab[j] = dirlen - i;
1db5b1ad
JB
1743 /* A translation table is accompanied by its inverse -- see
1744 comment following downcase_table for details. */
a17ae96a 1745 if (ch >= 0)
ab228c24
RS
1746 {
1747 int starting_ch = ch;
49e44e81 1748 int starting_j = j;
a17ae96a 1749
ab228c24
RS
1750 while (1)
1751 {
1752 TRANSLATE (ch, inverse_trt, ch);
edb7b4dc 1753 if (ch >= 0200 && multibyte)
49e44e81 1754 j = (ch & 0x3F) | 0200;
ab228c24 1755 else
a17ae96a 1756 j = ch;
ab228c24
RS
1757
1758 /* For all the characters that map into CH,
1759 set up simple_translate to map the last byte
1760 into STARTING_J. */
1761 simple_translate[j] = starting_j;
1762 if (ch == starting_ch)
1763 break;
1764 BM_tab[j] = dirlen - i;
1765 }
1766 }
facdc750
RS
1767 }
1768 else
1769 {
1770 j = *ptr;
1771
f4646fff 1772 if (i == dirlen)
facdc750
RS
1773 stride_for_teases = BM_tab[j];
1774 BM_tab[j] = dirlen - i;
ca1d1d23 1775 }
f4646fff
AS
1776 /* stride_for_teases tells how much to stride if we get a
1777 match on the far character but are subsequently
1778 disappointed, by recording what the stride would have been
1779 for that character if the last character had been
1780 different. */
facdc750 1781 }
facdc750
RS
1782 pos_byte += dirlen - ((direction > 0) ? direction : 0);
1783 /* loop invariant - POS_BYTE points at where last char (first
1784 char if reverse) of pattern would align in a possible match. */
1785 while (n != 0)
1786 {
d311d28c 1787 ptrdiff_t tail_end;
facdc750
RS
1788 unsigned char *tail_end_ptr;
1789
1790 /* It's been reported that some (broken) compiler thinks that
1791 Boolean expressions in an arithmetic context are unsigned.
1792 Using an explicit ?1:0 prevents this. */
1793 if ((lim_byte - pos_byte - ((direction > 0) ? 1 : 0)) * direction
1794 < 0)
1795 return (n * (0 - direction));
1796 /* First we do the part we can by pointers (maybe nothing) */
1797 QUIT;
1798 pat = base_pat;
1799 limit = pos_byte - dirlen + direction;
67ce527d
KH
1800 if (direction > 0)
1801 {
1802 limit = BUFFER_CEILING_OF (limit);
1803 /* LIMIT is now the last (not beyond-last!) value POS_BYTE
1804 can take on without hitting edge of buffer or the gap. */
1805 limit = min (limit, pos_byte + 20000);
1806 limit = min (limit, lim_byte - 1);
1807 }
1808 else
1809 {
1810 limit = BUFFER_FLOOR_OF (limit);
1811 /* LIMIT is now the last (not beyond-last!) value POS_BYTE
1812 can take on without hitting edge of buffer or the gap. */
1813 limit = max (limit, pos_byte - 20000);
1814 limit = max (limit, lim_byte);
1815 }
facdc750
RS
1816 tail_end = BUFFER_CEILING_OF (pos_byte) + 1;
1817 tail_end_ptr = BYTE_POS_ADDR (tail_end);
1818
1819 if ((limit - pos_byte) * direction > 20)
ca1d1d23 1820 {
facdc750
RS
1821 unsigned char *p2;
1822
1823 p_limit = BYTE_POS_ADDR (limit);
1824 p2 = (cursor = BYTE_POS_ADDR (pos_byte));
f4646fff 1825 /* In this loop, pos + cursor - p2 is the surrogate for pos. */
facdc750 1826 while (1) /* use one cursor setting as long as i can */
ca1d1d23 1827 {
facdc750 1828 if (direction > 0) /* worth duplicating */
ca1d1d23 1829 {
f4646fff
AS
1830 while (cursor <= p_limit)
1831 {
1832 if (BM_tab[*cursor] == 0)
1833 goto hit;
facdc750 1834 cursor += BM_tab[*cursor];
f4646fff 1835 }
facdc750
RS
1836 }
1837 else
1838 {
f4646fff
AS
1839 while (cursor >= p_limit)
1840 {
1841 if (BM_tab[*cursor] == 0)
1842 goto hit;
facdc750 1843 cursor += BM_tab[*cursor];
f4646fff 1844 }
facdc750 1845 }
f4646fff
AS
1846 /* If you are here, cursor is beyond the end of the
1847 searched region. You fail to match within the
1848 permitted region and would otherwise try a character
1849 beyond that region. */
1850 break;
1851
1852 hit:
facdc750
RS
1853 i = dirlen - direction;
1854 if (! NILP (trt))
1855 {
1856 while ((i -= direction) + direction != 0)
ca1d1d23 1857 {
facdc750
RS
1858 int ch;
1859 cursor -= direction;
1860 /* Translate only the last byte of a character. */
1861 if (! multibyte
1862 || ((cursor == tail_end_ptr
1863 || CHAR_HEAD_P (cursor[1]))
1864 && (CHAR_HEAD_P (cursor[0])
0190922f 1865 /* Check if this is the last byte of
cd1181db 1866 a translatable character. */
0190922f
KH
1867 || (translate_prev_byte1 == cursor[-1]
1868 && (CHAR_HEAD_P (translate_prev_byte1)
1869 || (translate_prev_byte2 == cursor[-2]
1870 && (CHAR_HEAD_P (translate_prev_byte2)
1871 || (translate_prev_byte3 == cursor[-3]))))))))
facdc750
RS
1872 ch = simple_translate[*cursor];
1873 else
1874 ch = *cursor;
1875 if (pat[i] != ch)
1876 break;
ca1d1d23 1877 }
facdc750
RS
1878 }
1879 else
1880 {
1881 while ((i -= direction) + direction != 0)
ca1d1d23 1882 {
facdc750
RS
1883 cursor -= direction;
1884 if (pat[i] != *cursor)
1885 break;
ca1d1d23 1886 }
facdc750
RS
1887 }
1888 cursor += dirlen - i - direction; /* fix cursor */
1889 if (i + direction == 0)
1890 {
d311d28c 1891 ptrdiff_t position, start, end;
0c8533c6 1892
facdc750 1893 cursor -= direction;
1113d9db 1894
facdc750
RS
1895 position = pos_byte + cursor - p2 + ((direction > 0)
1896 ? 1 - len_byte : 0);
1897 set_search_regs (position, len_byte);
ca325161 1898
ef887810
RS
1899 if (NILP (Vinhibit_changing_match_data))
1900 {
1901 start = search_regs.start[0];
1902 end = search_regs.end[0];
1903 }
1904 else
1905 /* If Vinhibit_changing_match_data is non-nil,
1906 search_regs will not be changed. So let's
1907 compute start and end here. */
1908 {
1909 start = BYTE_TO_CHAR (position);
1910 end = BYTE_TO_CHAR (position + len_byte);
1911 }
1912
facdc750
RS
1913 if ((n -= direction) != 0)
1914 cursor += dirlen; /* to resume search */
ca1d1d23 1915 else
ef887810 1916 return direction > 0 ? end : start;
ca1d1d23 1917 }
facdc750
RS
1918 else
1919 cursor += stride_for_teases; /* <sigh> we lose - */
ca1d1d23 1920 }
facdc750
RS
1921 pos_byte += cursor - p2;
1922 }
1923 else
f4646fff
AS
1924 /* Now we'll pick up a clump that has to be done the hard
1925 way because it covers a discontinuity. */
facdc750
RS
1926 {
1927 limit = ((direction > 0)
1928 ? BUFFER_CEILING_OF (pos_byte - dirlen + 1)
1929 : BUFFER_FLOOR_OF (pos_byte - dirlen - 1));
1930 limit = ((direction > 0)
1931 ? min (limit + len_byte, lim_byte - 1)
1932 : max (limit - len_byte, lim_byte));
1933 /* LIMIT is now the last value POS_BYTE can have
1934 and still be valid for a possible match. */
1935 while (1)
ca1d1d23 1936 {
f4646fff
AS
1937 /* This loop can be coded for space rather than
1938 speed because it will usually run only once.
1939 (the reach is at most len + 21, and typically
1940 does not exceed len). */
facdc750 1941 while ((limit - pos_byte) * direction >= 0)
f4646fff
AS
1942 {
1943 int ch = FETCH_BYTE (pos_byte);
1944 if (BM_tab[ch] == 0)
1945 goto hit2;
1946 pos_byte += BM_tab[ch];
1947 }
1948 break; /* ran off the end */
1949
1950 hit2:
1951 /* Found what might be a match. */
facdc750
RS
1952 i = dirlen - direction;
1953 while ((i -= direction) + direction != 0)
ca1d1d23 1954 {
facdc750
RS
1955 int ch;
1956 unsigned char *ptr;
1957 pos_byte -= direction;
1958 ptr = BYTE_POS_ADDR (pos_byte);
1959 /* Translate only the last byte of a character. */
1960 if (! multibyte
1961 || ((ptr == tail_end_ptr
1962 || CHAR_HEAD_P (ptr[1]))
1963 && (CHAR_HEAD_P (ptr[0])
0190922f 1964 /* Check if this is the last byte of a
cd1181db 1965 translatable character. */
0190922f
KH
1966 || (translate_prev_byte1 == ptr[-1]
1967 && (CHAR_HEAD_P (translate_prev_byte1)
1968 || (translate_prev_byte2 == ptr[-2]
1969 && (CHAR_HEAD_P (translate_prev_byte2)
1970 || translate_prev_byte3 == ptr[-3])))))))
facdc750
RS
1971 ch = simple_translate[*ptr];
1972 else
1973 ch = *ptr;
1974 if (pat[i] != ch)
1975 break;
1976 }
1977 /* Above loop has moved POS_BYTE part or all the way
1978 back to the first pos (last pos if reverse).
1979 Set it once again at the last (first if reverse) char. */
f4646fff 1980 pos_byte += dirlen - i - direction;
facdc750
RS
1981 if (i + direction == 0)
1982 {
d311d28c 1983 ptrdiff_t position, start, end;
facdc750 1984 pos_byte -= direction;
1113d9db 1985
facdc750 1986 position = pos_byte + ((direction > 0) ? 1 - len_byte : 0);
facdc750 1987 set_search_regs (position, len_byte);
ca325161 1988
ef887810
RS
1989 if (NILP (Vinhibit_changing_match_data))
1990 {
1991 start = search_regs.start[0];
1992 end = search_regs.end[0];
1993 }
1994 else
1995 /* If Vinhibit_changing_match_data is non-nil,
1996 search_regs will not be changed. So let's
1997 compute start and end here. */
1998 {
1999 start = BYTE_TO_CHAR (position);
2000 end = BYTE_TO_CHAR (position + len_byte);
2001 }
2002
facdc750
RS
2003 if ((n -= direction) != 0)
2004 pos_byte += dirlen; /* to resume search */
ca1d1d23 2005 else
ef887810 2006 return direction > 0 ? end : start;
ca1d1d23 2007 }
facdc750
RS
2008 else
2009 pos_byte += stride_for_teases;
2010 }
2011 }
2012 /* We have done one clump. Can we continue? */
2013 if ((lim_byte - pos_byte) * direction < 0)
2014 return ((0 - n) * direction);
ca1d1d23 2015 }
facdc750 2016 return BYTE_TO_CHAR (pos_byte);
ca1d1d23 2017}
ca325161 2018
fa8ed3e0 2019/* Record beginning BEG_BYTE and end BEG_BYTE + NBYTES
a7e4cdde
RS
2020 for the overall match just found in the current buffer.
2021 Also clear out the match data for registers 1 and up. */
ca325161
RS
2022
2023static void
d311d28c 2024set_search_regs (ptrdiff_t beg_byte, ptrdiff_t nbytes)
ca325161 2025{
d311d28c 2026 ptrdiff_t i;
a7e4cdde 2027
ef887810
RS
2028 if (!NILP (Vinhibit_changing_match_data))
2029 return;
2030
ca325161
RS
2031 /* Make sure we have registers in which to store
2032 the match position. */
2033 if (search_regs.num_regs == 0)
2034 {
23f86fce
DA
2035 search_regs.start = xmalloc (2 * sizeof (regoff_t));
2036 search_regs.end = xmalloc (2 * sizeof (regoff_t));
487282dc 2037 search_regs.num_regs = 2;
ca325161
RS
2038 }
2039
a7e4cdde
RS
2040 /* Clear out the other registers. */
2041 for (i = 1; i < search_regs.num_regs; i++)
2042 {
2043 search_regs.start[i] = -1;
2044 search_regs.end[i] = -1;
2045 }
2046
fa8ed3e0
RS
2047 search_regs.start[0] = BYTE_TO_CHAR (beg_byte);
2048 search_regs.end[0] = BYTE_TO_CHAR (beg_byte + nbytes);
a3668d92 2049 XSETBUFFER (last_thing_searched, current_buffer);
ca325161 2050}
ca1d1d23 2051\f
ca1d1d23 2052DEFUN ("search-backward", Fsearch_backward, Ssearch_backward, 1, 4,
8c1a1077
PJ
2053 "MSearch backward: ",
2054 doc: /* Search backward from point for STRING.
2055Set point to the beginning of the occurrence found, and return point.
2056An optional second argument bounds the search; it is a buffer position.
2057The match found must not extend before that position.
2058Optional third argument, if t, means if fail just return nil (no error).
2059 If not nil and not t, position at limit of search and return nil.
acc28cb9
CY
2060Optional fourth argument COUNT, if non-nil, means to search for COUNT
2061 successive occurrences. If COUNT is negative, search forward,
2062 instead of backward, for -COUNT occurrences.
8c1a1077
PJ
2063
2064Search case-sensitivity is determined by the value of the variable
2065`case-fold-search', which see.
2066
2067See also the functions `match-beginning', `match-end' and `replace-match'. */)
5842a27b 2068 (Lisp_Object string, Lisp_Object bound, Lisp_Object noerror, Lisp_Object count)
ca1d1d23 2069{
b819a390 2070 return search_command (string, bound, noerror, count, -1, 0, 0);
ca1d1d23
JB
2071}
2072
6af43974 2073DEFUN ("search-forward", Fsearch_forward, Ssearch_forward, 1, 4, "MSearch: ",
8c1a1077
PJ
2074 doc: /* Search forward from point for STRING.
2075Set point to the end of the occurrence found, and return point.
2076An optional second argument bounds the search; it is a buffer position.
d6aacdcd 2077The match found must not extend after that position. A value of nil is
48740f01 2078 equivalent to (point-max).
8c1a1077
PJ
2079Optional third argument, if t, means if fail just return nil (no error).
2080 If not nil and not t, move to limit of search and return nil.
acc28cb9
CY
2081Optional fourth argument COUNT, if non-nil, means to search for COUNT
2082 successive occurrences. If COUNT is negative, search backward,
2083 instead of forward, for -COUNT occurrences.
8c1a1077
PJ
2084
2085Search case-sensitivity is determined by the value of the variable
2086`case-fold-search', which see.
2087
2088See also the functions `match-beginning', `match-end' and `replace-match'. */)
5842a27b 2089 (Lisp_Object string, Lisp_Object bound, Lisp_Object noerror, Lisp_Object count)
ca1d1d23 2090{
b819a390 2091 return search_command (string, bound, noerror, count, 1, 0, 0);
ca1d1d23
JB
2092}
2093
ca1d1d23 2094DEFUN ("re-search-backward", Fre_search_backward, Sre_search_backward, 1, 4,
8c1a1077
PJ
2095 "sRE search backward: ",
2096 doc: /* Search backward from point for match for regular expression REGEXP.
2097Set point to the beginning of the match, and return point.
2098The match found is the one starting last in the buffer
2099and yet ending before the origin of the search.
2100An optional second argument bounds the search; it is a buffer position.
2101The match found must start at or after that position.
2102Optional third argument, if t, means if fail just return nil (no error).
2103 If not nil and not t, move to limit of search and return nil.
2104Optional fourth argument is repeat count--search for successive occurrences.
49080b10
LMI
2105
2106Search case-sensitivity is determined by the value of the variable
2107`case-fold-search', which see.
2108
8c1a1077
PJ
2109See also the functions `match-beginning', `match-end', `match-string',
2110and `replace-match'. */)
5842a27b 2111 (Lisp_Object regexp, Lisp_Object bound, Lisp_Object noerror, Lisp_Object count)
ca1d1d23 2112{
b819a390 2113 return search_command (regexp, bound, noerror, count, -1, 1, 0);
ca1d1d23
JB
2114}
2115
2116DEFUN ("re-search-forward", Fre_search_forward, Sre_search_forward, 1, 4,
8c1a1077
PJ
2117 "sRE search: ",
2118 doc: /* Search forward from point for regular expression REGEXP.
2119Set point to the end of the occurrence found, and return point.
2120An optional second argument bounds the search; it is a buffer position.
2121The match found must not extend after that position.
2122Optional third argument, if t, means if fail just return nil (no error).
2123 If not nil and not t, move to limit of search and return nil.
2124Optional fourth argument is repeat count--search for successive occurrences.
49080b10
LMI
2125
2126Search case-sensitivity is determined by the value of the variable
2127`case-fold-search', which see.
2128
8c1a1077
PJ
2129See also the functions `match-beginning', `match-end', `match-string',
2130and `replace-match'. */)
5842a27b 2131 (Lisp_Object regexp, Lisp_Object bound, Lisp_Object noerror, Lisp_Object count)
ca1d1d23 2132{
b819a390
RS
2133 return search_command (regexp, bound, noerror, count, 1, 1, 0);
2134}
2135
2136DEFUN ("posix-search-backward", Fposix_search_backward, Sposix_search_backward, 1, 4,
8c1a1077
PJ
2137 "sPosix search backward: ",
2138 doc: /* Search backward from point for match for regular expression REGEXP.
2139Find the longest match in accord with Posix regular expression rules.
2140Set point to the beginning of the match, and return point.
2141The match found is the one starting last in the buffer
2142and yet ending before the origin of the search.
2143An optional second argument bounds the search; it is a buffer position.
2144The match found must start at or after that position.
2145Optional third argument, if t, means if fail just return nil (no error).
2146 If not nil and not t, move to limit of search and return nil.
2147Optional fourth argument is repeat count--search for successive occurrences.
49080b10
LMI
2148
2149Search case-sensitivity is determined by the value of the variable
2150`case-fold-search', which see.
2151
8c1a1077
PJ
2152See also the functions `match-beginning', `match-end', `match-string',
2153and `replace-match'. */)
5842a27b 2154 (Lisp_Object regexp, Lisp_Object bound, Lisp_Object noerror, Lisp_Object count)
b819a390
RS
2155{
2156 return search_command (regexp, bound, noerror, count, -1, 1, 1);
2157}
2158
2159DEFUN ("posix-search-forward", Fposix_search_forward, Sposix_search_forward, 1, 4,
8c1a1077
PJ
2160 "sPosix search: ",
2161 doc: /* Search forward from point for regular expression REGEXP.
2162Find the longest match in accord with Posix regular expression rules.
2163Set point to the end of the occurrence found, and return point.
2164An optional second argument bounds the search; it is a buffer position.
2165The match found must not extend after that position.
2166Optional third argument, if t, means if fail just return nil (no error).
2167 If not nil and not t, move to limit of search and return nil.
2168Optional fourth argument is repeat count--search for successive occurrences.
49080b10
LMI
2169
2170Search case-sensitivity is determined by the value of the variable
2171`case-fold-search', which see.
2172
8c1a1077
PJ
2173See also the functions `match-beginning', `match-end', `match-string',
2174and `replace-match'. */)
5842a27b 2175 (Lisp_Object regexp, Lisp_Object bound, Lisp_Object noerror, Lisp_Object count)
b819a390
RS
2176{
2177 return search_command (regexp, bound, noerror, count, 1, 1, 1);
ca1d1d23
JB
2178}
2179\f
d7a5ad5f 2180DEFUN ("replace-match", Freplace_match, Sreplace_match, 1, 5, 0,
8c1a1077 2181 doc: /* Replace text matched by last search with NEWTEXT.
4dd0c271
RS
2182Leave point at the end of the replacement text.
2183
c88b867f
CY
2184If optional second arg FIXEDCASE is non-nil, do not alter the case of
2185the replacement text. Otherwise, maybe capitalize the whole text, or
2186maybe just word initials, based on the replaced text. If the replaced
2187text has only capital letters and has at least one multiletter word,
2188convert NEWTEXT to all caps. Otherwise if all words are capitalized
2189in the replaced text, capitalize each word in NEWTEXT.
2190
2191If optional third arg LITERAL is non-nil, insert NEWTEXT literally.
8c1a1077
PJ
2192Otherwise treat `\\' as special:
2193 `\\&' in NEWTEXT means substitute original matched text.
2194 `\\N' means substitute what matched the Nth `\\(...\\)'.
2195 If Nth parens didn't match, substitute nothing.
2196 `\\\\' means insert one `\\'.
d32e47af
LM
2197 `\\?' is treated literally
2198 (for compatibility with `query-replace-regexp').
2199 Any other character following `\\' signals an error.
4dd0c271
RS
2200Case conversion does not apply to these substitutions.
2201
c88b867f
CY
2202If optional fourth argument STRING is non-nil, it should be a string
2203to act on; this should be the string on which the previous match was
2204done via `string-match'. In this case, `replace-match' creates and
2205returns a new string, made by copying STRING and replacing the part of
2206STRING that was matched (the original STRING itself is not altered).
8c1a1077
PJ
2207
2208The optional fifth argument SUBEXP specifies a subexpression;
2209it says to replace just that subexpression with NEWTEXT,
2210rather than replacing the entire matched text.
2211This is, in a vague sense, the inverse of using `\\N' in NEWTEXT;
2212`\\N' copies subexp N into NEWTEXT, but using N as SUBEXP puts
2213NEWTEXT in place of subexp N.
2214This is useful only after a regular expression search or match,
2215since only regular expressions have distinguished subexpressions. */)
5842a27b 2216 (Lisp_Object newtext, Lisp_Object fixedcase, Lisp_Object literal, Lisp_Object string, Lisp_Object subexp)
ca1d1d23
JB
2217{
2218 enum { nochange, all_caps, cap_initial } case_action;
457882c2
PE
2219 ptrdiff_t pos, pos_byte;
2220 bool some_multiletter_word;
2221 bool some_lowercase;
2222 bool some_uppercase;
2223 bool some_nonuppercase_initial;
2224 int c, prevc;
a0efffc8 2225 ptrdiff_t sub;
d311d28c 2226 ptrdiff_t opoint, newpoint;
ca1d1d23 2227
b7826503 2228 CHECK_STRING (newtext);
ca1d1d23 2229
080c45fd 2230 if (! NILP (string))
b7826503 2231 CHECK_STRING (string);
080c45fd 2232
ca1d1d23
JB
2233 case_action = nochange; /* We tried an initialization */
2234 /* but some C compilers blew it */
4746118a
JB
2235
2236 if (search_regs.num_regs <= 0)
d72cdbfc 2237 error ("`replace-match' called before any match found");
4746118a 2238
d7a5ad5f
RS
2239 if (NILP (subexp))
2240 sub = 0;
2241 else
2242 {
b7826503 2243 CHECK_NUMBER (subexp);
a0efffc8 2244 if (! (0 <= XINT (subexp) && XINT (subexp) < search_regs.num_regs))
d7a5ad5f 2245 args_out_of_range (subexp, make_number (search_regs.num_regs));
a0efffc8 2246 sub = XINT (subexp);
d7a5ad5f
RS
2247 }
2248
080c45fd
RS
2249 if (NILP (string))
2250 {
d7a5ad5f
RS
2251 if (search_regs.start[sub] < BEGV
2252 || search_regs.start[sub] > search_regs.end[sub]
2253 || search_regs.end[sub] > ZV)
2254 args_out_of_range (make_number (search_regs.start[sub]),
2255 make_number (search_regs.end[sub]));
080c45fd
RS
2256 }
2257 else
2258 {
d7a5ad5f
RS
2259 if (search_regs.start[sub] < 0
2260 || search_regs.start[sub] > search_regs.end[sub]
d5db4077 2261 || search_regs.end[sub] > SCHARS (string))
d7a5ad5f
RS
2262 args_out_of_range (make_number (search_regs.start[sub]),
2263 make_number (search_regs.end[sub]));
080c45fd 2264 }
ca1d1d23
JB
2265
2266 if (NILP (fixedcase))
2267 {
2268 /* Decide how to casify by examining the matched text. */
d311d28c 2269 ptrdiff_t last;
ca1d1d23 2270
ac3b28b1
KH
2271 pos = search_regs.start[sub];
2272 last = search_regs.end[sub];
fa8ed3e0
RS
2273
2274 if (NILP (string))
ac3b28b1 2275 pos_byte = CHAR_TO_BYTE (pos);
fa8ed3e0 2276 else
ac3b28b1 2277 pos_byte = string_char_to_byte (string, pos);
fa8ed3e0 2278
ca1d1d23
JB
2279 prevc = '\n';
2280 case_action = all_caps;
2281
2282 /* some_multiletter_word is set nonzero if any original word
2283 is more than one letter long. */
2284 some_multiletter_word = 0;
97832bd0 2285 some_lowercase = 0;
208767c3 2286 some_nonuppercase_initial = 0;
73dc8771 2287 some_uppercase = 0;
ca1d1d23 2288
ac3b28b1 2289 while (pos < last)
ca1d1d23 2290 {
080c45fd 2291 if (NILP (string))
ac3b28b1 2292 {
93daa011 2293 c = FETCH_CHAR_AS_MULTIBYTE (pos_byte);
ac3b28b1
KH
2294 INC_BOTH (pos, pos_byte);
2295 }
080c45fd 2296 else
93daa011 2297 FETCH_STRING_CHAR_AS_MULTIBYTE_ADVANCE (c, string, pos, pos_byte);
080c45fd 2298
5da9919f 2299 if (lowercasep (c))
ca1d1d23
JB
2300 {
2301 /* Cannot be all caps if any original char is lower case */
2302
97832bd0 2303 some_lowercase = 1;
ca1d1d23 2304 if (SYNTAX (prevc) != Sword)
208767c3 2305 some_nonuppercase_initial = 1;
ca1d1d23
JB
2306 else
2307 some_multiletter_word = 1;
2308 }
5da9919f 2309 else if (uppercasep (c))
ca1d1d23 2310 {
73dc8771 2311 some_uppercase = 1;
97832bd0 2312 if (SYNTAX (prevc) != Sword)
c4d460ce 2313 ;
97832bd0 2314 else
ca1d1d23
JB
2315 some_multiletter_word = 1;
2316 }
208767c3
RS
2317 else
2318 {
2319 /* If the initial is a caseless word constituent,
2320 treat that like a lowercase initial. */
2321 if (SYNTAX (prevc) != Sword)
2322 some_nonuppercase_initial = 1;
2323 }
ca1d1d23
JB
2324
2325 prevc = c;
2326 }
2327
97832bd0
RS
2328 /* Convert to all caps if the old text is all caps
2329 and has at least one multiletter word. */
2330 if (! some_lowercase && some_multiletter_word)
2331 case_action = all_caps;
c4d460ce 2332 /* Capitalize each word, if the old text has all capitalized words. */
208767c3 2333 else if (!some_nonuppercase_initial && some_multiletter_word)
ca1d1d23 2334 case_action = cap_initial;
208767c3 2335 else if (!some_nonuppercase_initial && some_uppercase)
73dc8771
KH
2336 /* Should x -> yz, operating on X, give Yz or YZ?
2337 We'll assume the latter. */
2338 case_action = all_caps;
97832bd0
RS
2339 else
2340 case_action = nochange;
ca1d1d23
JB
2341 }
2342
080c45fd
RS
2343 /* Do replacement in a string. */
2344 if (!NILP (string))
2345 {
2346 Lisp_Object before, after;
2347
2348 before = Fsubstring (string, make_number (0),
d7a5ad5f
RS
2349 make_number (search_regs.start[sub]));
2350 after = Fsubstring (string, make_number (search_regs.end[sub]), Qnil);
080c45fd 2351
636a5e28
RS
2352 /* Substitute parts of the match into NEWTEXT
2353 if desired. */
080c45fd
RS
2354 if (NILP (literal))
2355 {
d311d28c
PE
2356 ptrdiff_t lastpos = 0;
2357 ptrdiff_t lastpos_byte = 0;
080c45fd
RS
2358 /* We build up the substituted string in ACCUM. */
2359 Lisp_Object accum;
2360 Lisp_Object middle;
d311d28c 2361 ptrdiff_t length = SBYTES (newtext);
080c45fd
RS
2362
2363 accum = Qnil;
2364
ac3b28b1 2365 for (pos_byte = 0, pos = 0; pos_byte < length;)
080c45fd 2366 {
d311d28c
PE
2367 ptrdiff_t substart = -1;
2368 ptrdiff_t subend = 0;
457882c2 2369 bool delbackslash = 0;
080c45fd 2370
0c8533c6
RS
2371 FETCH_STRING_CHAR_ADVANCE (c, newtext, pos, pos_byte);
2372
080c45fd
RS
2373 if (c == '\\')
2374 {
0c8533c6 2375 FETCH_STRING_CHAR_ADVANCE (c, newtext, pos, pos_byte);
177c0ea7 2376
080c45fd
RS
2377 if (c == '&')
2378 {
d7a5ad5f
RS
2379 substart = search_regs.start[sub];
2380 subend = search_regs.end[sub];
080c45fd 2381 }
76fc1ea2 2382 else if (c >= '1' && c <= '9')
080c45fd 2383 {
d311d28c 2384 if (c - '0' < search_regs.num_regs
908589fd 2385 && search_regs.start[c - '0'] >= 0)
080c45fd
RS
2386 {
2387 substart = search_regs.start[c - '0'];
2388 subend = search_regs.end[c - '0'];
2389 }
76fc1ea2
KH
2390 else
2391 {
2392 /* If that subexp did not match,
2393 replace \\N with nothing. */
2394 substart = 0;
2395 subend = 0;
2396 }
080c45fd 2397 }
1e79ec24
KH
2398 else if (c == '\\')
2399 delbackslash = 1;
d32e47af 2400 else if (c != '?')
636a5e28 2401 error ("Invalid use of `\\' in replacement text");
080c45fd
RS
2402 }
2403 if (substart >= 0)
2404 {
d131e79c
RS
2405 if (pos - 2 != lastpos)
2406 middle = substring_both (newtext, lastpos,
2407 lastpos_byte,
2408 pos - 2, pos_byte - 2);
080c45fd
RS
2409 else
2410 middle = Qnil;
2411 accum = concat3 (accum, middle,
0c8533c6
RS
2412 Fsubstring (string,
2413 make_number (substart),
080c45fd
RS
2414 make_number (subend)));
2415 lastpos = pos;
0c8533c6 2416 lastpos_byte = pos_byte;
080c45fd 2417 }
1e79ec24
KH
2418 else if (delbackslash)
2419 {
d131e79c
RS
2420 middle = substring_both (newtext, lastpos,
2421 lastpos_byte,
2422 pos - 1, pos_byte - 1);
0c8533c6 2423
1e79ec24
KH
2424 accum = concat2 (accum, middle);
2425 lastpos = pos;
0c8533c6 2426 lastpos_byte = pos_byte;
1e79ec24 2427 }
080c45fd
RS
2428 }
2429
d131e79c
RS
2430 if (pos != lastpos)
2431 middle = substring_both (newtext, lastpos,
2432 lastpos_byte,
0c8533c6 2433 pos, pos_byte);
080c45fd
RS
2434 else
2435 middle = Qnil;
2436
2437 newtext = concat2 (accum, middle);
2438 }
2439
636a5e28 2440 /* Do case substitution in NEWTEXT if desired. */
080c45fd
RS
2441 if (case_action == all_caps)
2442 newtext = Fupcase (newtext);
2443 else if (case_action == cap_initial)
2b2eead9 2444 newtext = Fupcase_initials (newtext);
080c45fd
RS
2445
2446 return concat3 (before, newtext, after);
2447 }
2448
09c4719e 2449 /* Record point, then move (quietly) to the start of the match. */
9160906f 2450 if (PT >= search_regs.end[sub])
b0eba991 2451 opoint = PT - ZV;
9160906f
RS
2452 else if (PT > search_regs.start[sub])
2453 opoint = search_regs.end[sub] - ZV;
b0eba991
RS
2454 else
2455 opoint = PT;
2456
886ed6ec
RS
2457 /* If we want non-literal replacement,
2458 perform substitution on the replacement string. */
2459 if (NILP (literal))
ca1d1d23 2460 {
0065d054 2461 ptrdiff_t length = SBYTES (newtext);
68e69fbd 2462 unsigned char *substed;
0065d054 2463 ptrdiff_t substed_alloc_size, substed_len;
0063fdb1
DA
2464 bool buf_multibyte = !NILP (BVAR (current_buffer, enable_multibyte_characters));
2465 bool str_multibyte = STRING_MULTIBYTE (newtext);
457882c2 2466 bool really_changed = 0;
3bc25e52 2467
7216e43b
PE
2468 substed_alloc_size = (length <= (STRING_BYTES_BOUND - 100) / 2
2469 ? length * 2 + 100
2470 : STRING_BYTES_BOUND);
23f86fce 2471 substed = xmalloc (substed_alloc_size);
68e69fbd
RS
2472 substed_len = 0;
2473
3bc25e52
KH
2474 /* Go thru NEWTEXT, producing the actual text to insert in
2475 SUBSTED while adjusting multibyteness to that of the current
2476 buffer. */
ca1d1d23 2477
ac3b28b1 2478 for (pos_byte = 0, pos = 0; pos_byte < length;)
ca1d1d23 2479 {
68e69fbd 2480 unsigned char str[MAX_MULTIBYTE_LENGTH];
8ea90aa3 2481 const unsigned char *add_stuff = NULL;
0065d054 2482 ptrdiff_t add_len = 0;
a0efffc8 2483 ptrdiff_t idx = -1;
9a76659d 2484
3bc25e52
KH
2485 if (str_multibyte)
2486 {
eb99a8dd 2487 FETCH_STRING_CHAR_ADVANCE_NO_CHECK (c, newtext, pos, pos_byte);
3bc25e52 2488 if (!buf_multibyte)
461c2ab9 2489 c = multibyte_char_to_unibyte (c);
3bc25e52
KH
2490 }
2491 else
2492 {
2493 /* Note that we don't have to increment POS. */
5d69fe10 2494 c = SREF (newtext, pos_byte++);
3bc25e52 2495 if (buf_multibyte)
4c0354d7 2496 MAKE_CHAR_MULTIBYTE (c);
3bc25e52 2497 }
ac3b28b1 2498
68e69fbd
RS
2499 /* Either set ADD_STUFF and ADD_LEN to the text to put in SUBSTED,
2500 or set IDX to a match index, which means put that part
2501 of the buffer text into SUBSTED. */
2502
ca1d1d23
JB
2503 if (c == '\\')
2504 {
886ed6ec
RS
2505 really_changed = 1;
2506
3bc25e52
KH
2507 if (str_multibyte)
2508 {
eb99a8dd
KH
2509 FETCH_STRING_CHAR_ADVANCE_NO_CHECK (c, newtext,
2510 pos, pos_byte);
071ce769 2511 if (!buf_multibyte && !ASCII_CHAR_P (c))
461c2ab9 2512 c = multibyte_char_to_unibyte (c);
3bc25e52
KH
2513 }
2514 else
2515 {
d5db4077 2516 c = SREF (newtext, pos_byte++);
3bc25e52 2517 if (buf_multibyte)
4c0354d7 2518 MAKE_CHAR_MULTIBYTE (c);
3bc25e52
KH
2519 }
2520
ca1d1d23 2521 if (c == '&')
68e69fbd 2522 idx = sub;
d311d28c 2523 else if (c >= '1' && c <= '9' && c - '0' < search_regs.num_regs)
ca1d1d23
JB
2524 {
2525 if (search_regs.start[c - '0'] >= 1)
68e69fbd 2526 idx = c - '0';
ca1d1d23 2527 }
636a5e28 2528 else if (c == '\\')
a7e979a4 2529 add_len = 1, add_stuff = (unsigned char *) "\\";
636a5e28 2530 else
3bc25e52
KH
2531 {
2532 xfree (substed);
2533 error ("Invalid use of `\\' in replacement text");
2534 }
ca1d1d23
JB
2535 }
2536 else
68e69fbd
RS
2537 {
2538 add_len = CHAR_STRING (c, str);
2539 add_stuff = str;
2540 }
2541
2542 /* If we want to copy part of a previous match,
2543 set up ADD_STUFF and ADD_LEN to point to it. */
2544 if (idx >= 0)
2545 {
0065d054 2546 ptrdiff_t begbyte = CHAR_TO_BYTE (search_regs.start[idx]);
68e69fbd
RS
2547 add_len = CHAR_TO_BYTE (search_regs.end[idx]) - begbyte;
2548 if (search_regs.start[idx] < GPT && GPT < search_regs.end[idx])
13002885 2549 move_gap_both (search_regs.start[idx], begbyte);
68e69fbd
RS
2550 add_stuff = BYTE_POS_ADDR (begbyte);
2551 }
2552
2553 /* Now the stuff we want to add to SUBSTED
2554 is invariably ADD_LEN bytes starting at ADD_STUFF. */
2555
2556 /* Make sure SUBSTED is big enough. */
0065d054
PE
2557 if (substed_alloc_size - substed_len < add_len)
2558 substed =
2559 xpalloc (substed, &substed_alloc_size,
2560 add_len - (substed_alloc_size - substed_len),
2561 STRING_BYTES_BOUND, 1);
68e69fbd
RS
2562
2563 /* Now add to the end of SUBSTED. */
f8ce8a0d
GM
2564 if (add_stuff)
2565 {
72af86bd 2566 memcpy (substed + substed_len, add_stuff, add_len);
f8ce8a0d
GM
2567 substed_len += add_len;
2568 }
ca1d1d23 2569 }
68e69fbd 2570
886ed6ec 2571 if (really_changed)
76fc1ea2
KH
2572 {
2573 if (buf_multibyte)
2574 {
d311d28c 2575 ptrdiff_t nchars =
c098fdb8 2576 multibyte_chars_in_text (substed, substed_len);
68e69fbd 2577
a7e979a4
PE
2578 newtext = make_multibyte_string ((char *) substed, nchars,
2579 substed_len);
76fc1ea2
KH
2580 }
2581 else
a7e979a4 2582 newtext = make_unibyte_string ((char *) substed, substed_len);
76fc1ea2 2583 }
68e69fbd 2584 xfree (substed);
ca1d1d23
JB
2585 }
2586
886ed6ec
RS
2587 /* Replace the old text with the new in the cleanest possible way. */
2588 replace_range (search_regs.start[sub], search_regs.end[sub],
2589 newtext, 1, 0, 1);
d5db4077 2590 newpoint = search_regs.start[sub] + SCHARS (newtext);
ca1d1d23
JB
2591
2592 if (case_action == all_caps)
886ed6ec
RS
2593 Fupcase_region (make_number (search_regs.start[sub]),
2594 make_number (newpoint));
ca1d1d23 2595 else if (case_action == cap_initial)
886ed6ec
RS
2596 Fupcase_initials_region (make_number (search_regs.start[sub]),
2597 make_number (newpoint));
3e18eecf 2598
98e942e0
RS
2599 /* Adjust search data for this change. */
2600 {
d311d28c
PE
2601 ptrdiff_t oldend = search_regs.end[sub];
2602 ptrdiff_t oldstart = search_regs.start[sub];
2603 ptrdiff_t change = newpoint - search_regs.end[sub];
2604 ptrdiff_t i;
98e942e0
RS
2605
2606 for (i = 0; i < search_regs.num_regs; i++)
2607 {
41c01205 2608 if (search_regs.start[i] >= oldend)
98e942e0 2609 search_regs.start[i] += change;
41c01205
DK
2610 else if (search_regs.start[i] > oldstart)
2611 search_regs.start[i] = oldstart;
2612 if (search_regs.end[i] >= oldend)
98e942e0 2613 search_regs.end[i] += change;
41c01205
DK
2614 else if (search_regs.end[i] > oldstart)
2615 search_regs.end[i] = oldstart;
98e942e0
RS
2616 }
2617 }
2618
b0eba991 2619 /* Put point back where it was in the text. */
8d808a65 2620 if (opoint <= 0)
fa8ed3e0 2621 TEMP_SET_PT (opoint + ZV);
b0eba991 2622 else
fa8ed3e0 2623 TEMP_SET_PT (opoint);
b0eba991
RS
2624
2625 /* Now move point "officially" to the start of the inserted replacement. */
3e18eecf 2626 move_if_not_intangible (newpoint);
177c0ea7 2627
ca1d1d23
JB
2628 return Qnil;
2629}
2630\f
2631static Lisp_Object
457882c2 2632match_limit (Lisp_Object num, bool beginningp)
ca1d1d23 2633{
a0efffc8 2634 EMACS_INT n;
ca1d1d23 2635
b7826503 2636 CHECK_NUMBER (num);
ca1d1d23 2637 n = XINT (num);
f90a5bf5 2638 if (n < 0)
bd2cbd56 2639 args_out_of_range (num, make_number (0));
f90a5bf5
RS
2640 if (search_regs.num_regs <= 0)
2641 error ("No match data, because no search succeeded");
9b9ceb61 2642 if (n >= search_regs.num_regs
4746118a 2643 || search_regs.start[n] < 0)
ca1d1d23
JB
2644 return Qnil;
2645 return (make_number ((beginningp) ? search_regs.start[n]
2646 : search_regs.end[n]));
2647}
2648
a7ca3326 2649DEFUN ("match-beginning", Fmatch_beginning, Smatch_beginning, 1, 1, 0,
8c1a1077
PJ
2650 doc: /* Return position of start of text matched by last search.
2651SUBEXP, a number, specifies which parenthesized expression in the last
2652 regexp.
2653Value is nil if SUBEXPth pair didn't match, or there were less than
2654 SUBEXP pairs.
2655Zero means the entire text matched by the whole regexp or whole string. */)
5842a27b 2656 (Lisp_Object subexp)
ca1d1d23 2657{
5806161b 2658 return match_limit (subexp, 1);
ca1d1d23
JB
2659}
2660
a7ca3326 2661DEFUN ("match-end", Fmatch_end, Smatch_end, 1, 1, 0,
8c1a1077
PJ
2662 doc: /* Return position of end of text matched by last search.
2663SUBEXP, a number, specifies which parenthesized expression in the last
2664 regexp.
2665Value is nil if SUBEXPth pair didn't match, or there were less than
2666 SUBEXP pairs.
2667Zero means the entire text matched by the whole regexp or whole string. */)
5842a27b 2668 (Lisp_Object subexp)
ca1d1d23 2669{
5806161b 2670 return match_limit (subexp, 0);
177c0ea7 2671}
ca1d1d23 2672
a7ca3326 2673DEFUN ("match-data", Fmatch_data, Smatch_data, 0, 3, 0,
8c1a1077
PJ
2674 doc: /* Return a list containing all info on what the last search matched.
2675Element 2N is `(match-beginning N)'; element 2N + 1 is `(match-end N)'.
2676All the elements are markers or nil (nil if the Nth pair didn't match)
2677if the last match was on a buffer; integers or nil if a string was matched.
febb8aba 2678Use `set-match-data' to reinstate the data in this list.
8c1a1077 2679
41c01205
DK
2680If INTEGERS (the optional first argument) is non-nil, always use
2681integers \(rather than markers) to represent buffer positions. In
2682this case, and if the last match was in a buffer, the buffer will get
2683stored as one additional element at the end of the list.
2684
abd0071c
KS
2685If REUSE is a list, reuse it as part of the value. If REUSE is long
2686enough to hold all the values, and if INTEGERS is non-nil, no consing
2687is done.
2688
2689If optional third arg RESEAT is non-nil, any previous markers on the
2690REUSE list will be modified to point to nowhere.
2691
140a6b7e 2692Return value is undefined if the last search failed. */)
5842a27b 2693 (Lisp_Object integers, Lisp_Object reuse, Lisp_Object reseat)
ca1d1d23 2694{
56256c2a 2695 Lisp_Object tail, prev;
4746118a 2696 Lisp_Object *data;
d311d28c 2697 ptrdiff_t i, len;
ca1d1d23 2698
abd0071c
KS
2699 if (!NILP (reseat))
2700 for (tail = reuse; CONSP (tail); tail = XCDR (tail))
2701 if (MARKERP (XCAR (tail)))
2702 {
51f10faa 2703 unchain_marker (XMARKER (XCAR (tail)));
abd0071c
KS
2704 XSETCAR (tail, Qnil);
2705 }
2706
daa37602 2707 if (NILP (last_thing_searched))
c36bcf1b 2708 return Qnil;
daa37602 2709
6bbd7a29
GM
2710 prev = Qnil;
2711
38182d90 2712 data = alloca ((2 * search_regs.num_regs + 1) * sizeof *data);
4746118a 2713
41c01205 2714 len = 0;
4746118a 2715 for (i = 0; i < search_regs.num_regs; i++)
ca1d1d23 2716 {
d311d28c 2717 ptrdiff_t start = search_regs.start[i];
ca1d1d23
JB
2718 if (start >= 0)
2719 {
56256c2a
RS
2720 if (EQ (last_thing_searched, Qt)
2721 || ! NILP (integers))
ca1d1d23 2722 {
c235cce7
KH
2723 XSETFASTINT (data[2 * i], start);
2724 XSETFASTINT (data[2 * i + 1], search_regs.end[i]);
ca1d1d23 2725 }
0ed62dc7 2726 else if (BUFFERP (last_thing_searched))
ca1d1d23
JB
2727 {
2728 data[2 * i] = Fmake_marker ();
daa37602
JB
2729 Fset_marker (data[2 * i],
2730 make_number (start),
2731 last_thing_searched);
ca1d1d23
JB
2732 data[2 * i + 1] = Fmake_marker ();
2733 Fset_marker (data[2 * i + 1],
177c0ea7 2734 make_number (search_regs.end[i]),
daa37602 2735 last_thing_searched);
ca1d1d23 2736 }
daa37602
JB
2737 else
2738 /* last_thing_searched must always be Qt, a buffer, or Qnil. */
1088b922 2739 emacs_abort ();
daa37602 2740
abd0071c 2741 len = 2 * i + 2;
ca1d1d23
JB
2742 }
2743 else
abd0071c 2744 data[2 * i] = data[2 * i + 1] = Qnil;
ca1d1d23 2745 }
56256c2a 2746
bd2cbd56 2747 if (BUFFERP (last_thing_searched) && !NILP (integers))
41c01205 2748 {
bd2cbd56 2749 data[len] = last_thing_searched;
41c01205
DK
2750 len++;
2751 }
2752
56256c2a
RS
2753 /* If REUSE is not usable, cons up the values and return them. */
2754 if (! CONSP (reuse))
41c01205 2755 return Flist (len, data);
56256c2a
RS
2756
2757 /* If REUSE is a list, store as many value elements as will fit
2758 into the elements of REUSE. */
2759 for (i = 0, tail = reuse; CONSP (tail);
c1d497be 2760 i++, tail = XCDR (tail))
56256c2a 2761 {
41c01205 2762 if (i < len)
f3fbd155 2763 XSETCAR (tail, data[i]);
56256c2a 2764 else
f3fbd155 2765 XSETCAR (tail, Qnil);
56256c2a
RS
2766 prev = tail;
2767 }
2768
2769 /* If we couldn't fit all value elements into REUSE,
2770 cons up the rest of them and add them to the end of REUSE. */
41c01205
DK
2771 if (i < len)
2772 XSETCDR (prev, Flist (len - i, data + i));
56256c2a
RS
2773
2774 return reuse;
ca1d1d23
JB
2775}
2776
b51d6c92
SM
2777/* We used to have an internal use variant of `reseat' described as:
2778
2779 If RESEAT is `evaporate', put the markers back on the free list
2780 immediately. No other references to the markers must exist in this
2781 case, so it is used only internally on the unwind stack and
2782 save-match-data from Lisp.
2783
2784 But it was ill-conceived: those supposedly-internal markers get exposed via
2785 the undo-list, so freeing them here is unsafe. */
ca1d1d23 2786
a7ca3326 2787DEFUN ("set-match-data", Fset_match_data, Sset_match_data, 1, 2, 0,
8c1a1077 2788 doc: /* Set internal data on last search match from elements of LIST.
abd0071c
KS
2789LIST should have been created by calling `match-data' previously.
2790
51f10faa 2791If optional arg RESEAT is non-nil, make markers on LIST point nowhere. */)
5842a27b 2792 (register Lisp_Object list, Lisp_Object reseat)
ca1d1d23 2793{
5f2ab479 2794 ptrdiff_t i;
ca1d1d23
JB
2795 register Lisp_Object marker;
2796
7074fde6
FP
2797 if (running_asynch_code)
2798 save_search_regs ();
2799
29100cea 2800 CHECK_LIST (list);
ca1d1d23 2801
41c01205
DK
2802 /* Unless we find a marker with a buffer or an explicit buffer
2803 in LIST, assume that this match data came from a string. */
daa37602
JB
2804 last_thing_searched = Qt;
2805
4746118a
JB
2806 /* Allocate registers if they don't already exist. */
2807 {
d311d28c 2808 EMACS_INT length = XFASTINT (Flength (list)) / 2;
4746118a
JB
2809
2810 if (length > search_regs.num_regs)
2811 {
0065d054 2812 ptrdiff_t num_regs = search_regs.num_regs;
d311d28c
PE
2813 if (PTRDIFF_MAX < length)
2814 memory_full (SIZE_MAX);
0065d054
PE
2815 search_regs.start =
2816 xpalloc (search_regs.start, &num_regs, length - num_regs,
2817 min (PTRDIFF_MAX, UINT_MAX), sizeof (regoff_t));
2818 search_regs.end =
2819 xrealloc (search_regs.end, num_regs * sizeof (regoff_t));
2820
2821 for (i = search_regs.num_regs; i < num_regs; i++)
e62371e9
KH
2822 search_regs.start[i] = -1;
2823
0065d054 2824 search_regs.num_regs = num_regs;
4746118a 2825 }
ca1d1d23 2826
abd0071c 2827 for (i = 0; CONSP (list); i++)
41c01205 2828 {
abd0071c 2829 marker = XCAR (list);
bd2cbd56 2830 if (BUFFERP (marker))
c3762cbd 2831 {
bd2cbd56 2832 last_thing_searched = marker;
c3762cbd
DK
2833 break;
2834 }
2835 if (i >= length)
2836 break;
41c01205
DK
2837 if (NILP (marker))
2838 {
2839 search_regs.start[i] = -1;
abd0071c 2840 list = XCDR (list);
41c01205
DK
2841 }
2842 else
2843 {
d311d28c 2844 Lisp_Object from;
abd0071c 2845 Lisp_Object m;
e2811828 2846
abd0071c 2847 m = marker;
41c01205
DK
2848 if (MARKERP (marker))
2849 {
2850 if (XMARKER (marker)->buffer == 0)
2851 XSETFASTINT (marker, 0);
2852 else
2853 XSETBUFFER (last_thing_searched, XMARKER (marker)->buffer);
2854 }
e2811828 2855
41c01205 2856 CHECK_NUMBER_COERCE_MARKER (marker);
d311d28c 2857 from = marker;
e2811828 2858
abd0071c
KS
2859 if (!NILP (reseat) && MARKERP (m))
2860 {
b51d6c92 2861 unchain_marker (XMARKER (m));
9ad54a7e 2862 XSETCAR (list, Qnil);
abd0071c
KS
2863 }
2864
2865 if ((list = XCDR (list), !CONSP (list)))
2866 break;
2867
2868 m = marker = XCAR (list);
2869
41c01205
DK
2870 if (MARKERP (marker) && XMARKER (marker)->buffer == 0)
2871 XSETFASTINT (marker, 0);
e2811828 2872
41c01205 2873 CHECK_NUMBER_COERCE_MARKER (marker);
0b1fccc4
PE
2874 if ((XINT (from) < 0
2875 ? TYPE_MINIMUM (regoff_t) <= XINT (from)
2876 : XINT (from) <= TYPE_MAXIMUM (regoff_t))
2877 && (XINT (marker) < 0
2878 ? TYPE_MINIMUM (regoff_t) <= XINT (marker)
2879 : XINT (marker) <= TYPE_MAXIMUM (regoff_t)))
d311d28c
PE
2880 {
2881 search_regs.start[i] = XINT (from);
2882 search_regs.end[i] = XINT (marker);
2883 }
2884 else
2885 {
2886 search_regs.start[i] = -1;
2887 }
abd0071c
KS
2888
2889 if (!NILP (reseat) && MARKERP (m))
2890 {
b51d6c92 2891 unchain_marker (XMARKER (m));
9ad54a7e 2892 XSETCAR (list, Qnil);
abd0071c 2893 }
41c01205 2894 }
abd0071c 2895 list = XCDR (list);
41c01205 2896 }
ca1d1d23 2897
41c01205
DK
2898 for (; i < search_regs.num_regs; i++)
2899 search_regs.start[i] = -1;
2900 }
ca1d1d23 2901
177c0ea7 2902 return Qnil;
ca1d1d23
JB
2903}
2904
457882c2 2905/* If true the match data have been saved in saved_search_regs
7074fde6 2906 during the execution of a sentinel or filter. */
457882c2 2907static bool search_regs_saved;
7074fde6 2908static struct re_registers saved_search_regs;
41c01205 2909static Lisp_Object saved_last_thing_searched;
7074fde6
FP
2910
2911/* Called from Flooking_at, Fstring_match, search_buffer, Fstore_match_data
2912 if asynchronous code (filter or sentinel) is running. */
2913static void
971de7fb 2914save_search_regs (void)
7074fde6
FP
2915{
2916 if (!search_regs_saved)
2917 {
2918 saved_search_regs.num_regs = search_regs.num_regs;
2919 saved_search_regs.start = search_regs.start;
2920 saved_search_regs.end = search_regs.end;
41c01205
DK
2921 saved_last_thing_searched = last_thing_searched;
2922 last_thing_searched = Qnil;
7074fde6 2923 search_regs.num_regs = 0;
2d4a771a
RS
2924 search_regs.start = 0;
2925 search_regs.end = 0;
7074fde6
FP
2926
2927 search_regs_saved = 1;
2928 }
2929}
2930
2931/* Called upon exit from filters and sentinels. */
2932void
971de7fb 2933restore_search_regs (void)
7074fde6
FP
2934{
2935 if (search_regs_saved)
2936 {
2937 if (search_regs.num_regs > 0)
2938 {
2939 xfree (search_regs.start);
2940 xfree (search_regs.end);
2941 }
2942 search_regs.num_regs = saved_search_regs.num_regs;
2943 search_regs.start = saved_search_regs.start;
2944 search_regs.end = saved_search_regs.end;
41c01205
DK
2945 last_thing_searched = saved_last_thing_searched;
2946 saved_last_thing_searched = Qnil;
7074fde6
FP
2947 search_regs_saved = 0;
2948 }
2949}
2950
27e498e6 2951static void
971de7fb 2952unwind_set_match_data (Lisp_Object list)
abd0071c 2953{
b51d6c92 2954 /* It is NOT ALWAYS safe to free (evaporate) the markers immediately. */
27e498e6 2955 Fset_match_data (list, Qt);
abd0071c
KS
2956}
2957
2958/* Called to unwind protect the match data. */
2959void
971de7fb 2960record_unwind_save_match_data (void)
abd0071c
KS
2961{
2962 record_unwind_protect (unwind_set_match_data,
2963 Fmatch_data (Qnil, Qnil, Qnil));
2964}
2965
e9a452d9 2966/* Quote a string to deactivate reg-expr chars */
ca1d1d23
JB
2967
2968DEFUN ("regexp-quote", Fregexp_quote, Sregexp_quote, 1, 1, 0,
8c1a1077 2969 doc: /* Return a regexp string which matches exactly STRING and nothing else. */)
5842a27b 2970 (Lisp_Object string)
ca1d1d23 2971{
457882c2
PE
2972 char *in, *out, *end;
2973 char *temp;
2974 ptrdiff_t backslashes_added = 0;
ca1d1d23 2975
b7826503 2976 CHECK_STRING (string);
ca1d1d23 2977
38182d90 2978 temp = alloca (SBYTES (string) * 2);
ca1d1d23
JB
2979
2980 /* Now copy the data into the new string, inserting escapes. */
2981
a7e979a4 2982 in = SSDATA (string);
d5db4077 2983 end = in + SBYTES (string);
177c0ea7 2984 out = temp;
ca1d1d23
JB
2985
2986 for (; in != end; in++)
2987 {
66bc6082 2988 if (*in == '['
ca1d1d23
JB
2989 || *in == '*' || *in == '.' || *in == '\\'
2990 || *in == '?' || *in == '+'
2991 || *in == '^' || *in == '$')
0c8533c6 2992 *out++ = '\\', backslashes_added++;
ca1d1d23
JB
2993 *out++ = *in;
2994 }
2995
3f8100f1 2996 return make_specified_string (temp,
d5db4077 2997 SCHARS (string) + backslashes_added,
3f8100f1
RS
2998 out - temp,
2999 STRING_MULTIBYTE (string));
ca1d1d23 3000}
177c0ea7 3001\f
dfcf069d 3002void
971de7fb 3003syms_of_search (void)
ca1d1d23
JB
3004{
3005 register int i;
3006
487282dc
KH
3007 for (i = 0; i < REGEXP_CACHE_SIZE; ++i)
3008 {
3009 searchbufs[i].buf.allocated = 100;
23f86fce 3010 searchbufs[i].buf.buffer = xmalloc (100);
487282dc
KH
3011 searchbufs[i].buf.fastmap = searchbufs[i].fastmap;
3012 searchbufs[i].regexp = Qnil;
ecdb561e 3013 searchbufs[i].whitespace_regexp = Qnil;
b69e3c18 3014 searchbufs[i].syntax_table = Qnil;
487282dc 3015 staticpro (&searchbufs[i].regexp);
aa77b5ce 3016 staticpro (&searchbufs[i].whitespace_regexp);
b69e3c18 3017 staticpro (&searchbufs[i].syntax_table);
487282dc
KH
3018 searchbufs[i].next = (i == REGEXP_CACHE_SIZE-1 ? 0 : &searchbufs[i+1]);
3019 }
3020 searchbuf_head = &searchbufs[0];
ca1d1d23 3021
cd3520a4
JB
3022 DEFSYM (Qsearch_failed, "search-failed");
3023 DEFSYM (Qinvalid_regexp, "invalid-regexp");
ca1d1d23
JB
3024
3025 Fput (Qsearch_failed, Qerror_conditions,
3438fe21 3026 listn (CONSTYPE_PURE, 2, Qsearch_failed, Qerror));
ca1d1d23 3027 Fput (Qsearch_failed, Qerror_message,
2a0213a6 3028 build_pure_c_string ("Search failed"));
ca1d1d23
JB
3029
3030 Fput (Qinvalid_regexp, Qerror_conditions,
3438fe21 3031 listn (CONSTYPE_PURE, 2, Qinvalid_regexp, Qerror));
ca1d1d23 3032 Fput (Qinvalid_regexp, Qerror_message,
2a0213a6 3033 build_pure_c_string ("Invalid regexp"));
ca1d1d23 3034
daa37602
JB
3035 last_thing_searched = Qnil;
3036 staticpro (&last_thing_searched);
3037
0f6af254
DK
3038 saved_last_thing_searched = Qnil;
3039 staticpro (&saved_last_thing_searched);
3040
29208e82 3041 DEFVAR_LISP ("search-spaces-regexp", Vsearch_spaces_regexp,
e2811828 3042 doc: /* Regexp to substitute for bunches of spaces in regexp search.
f31a9a68
RS
3043Some commands use this for user-specified regexps.
3044Spaces that occur inside character classes or repetition operators
3045or other such regexp constructs are not replaced with this.
3046A value of nil (which is the normal value) means treat spaces literally. */);
41a33295 3047 Vsearch_spaces_regexp = Qnil;
f31a9a68 3048
29208e82 3049 DEFVAR_LISP ("inhibit-changing-match-data", Vinhibit_changing_match_data,
ef887810 3050 doc: /* Internal use only.
39d0bf74
RS
3051If non-nil, the primitive searching and matching functions
3052such as `looking-at', `string-match', `re-search-forward', etc.,
3053do not set the match data. The proper way to use this variable
3054is to bind it with `let' around a small expression. */);
ef887810
RS
3055 Vinhibit_changing_match_data = Qnil;
3056
ca1d1d23 3057 defsubr (&Slooking_at);
b819a390
RS
3058 defsubr (&Sposix_looking_at);
3059 defsubr (&Sstring_match);
3060 defsubr (&Sposix_string_match);
ca1d1d23
JB
3061 defsubr (&Ssearch_forward);
3062 defsubr (&Ssearch_backward);
ca1d1d23
JB
3063 defsubr (&Sre_search_forward);
3064 defsubr (&Sre_search_backward);
b819a390
RS
3065 defsubr (&Sposix_search_forward);
3066 defsubr (&Sposix_search_backward);
ca1d1d23
JB
3067 defsubr (&Sreplace_match);
3068 defsubr (&Smatch_beginning);
3069 defsubr (&Smatch_end);
3070 defsubr (&Smatch_data);
3f1c005b 3071 defsubr (&Sset_match_data);
ca1d1d23
JB
3072 defsubr (&Sregexp_quote);
3073}