Merge from emacs-23; up to 2010-06-29T18:17:31Z!cyd@stupidchicken.com.
[bpt/emacs.git] / src / search.c
CommitLineData
ca1d1d23 1/* String search routines for GNU Emacs.
acaf905b 2 Copyright (C) 1985-1987, 1993-1994, 1997-1999, 2001-2012
8cabe764 3 Free Software Foundation, Inc.
ca1d1d23
JB
4
5This file is part of GNU Emacs.
6
9ec0b715 7GNU Emacs is free software: you can redistribute it and/or modify
ca1d1d23 8it under the terms of the GNU General Public License as published by
9ec0b715
GM
9the Free Software Foundation, either version 3 of the License, or
10(at your option) any later version.
ca1d1d23
JB
11
12GNU Emacs is distributed in the hope that it will be useful,
13but WITHOUT ANY WARRANTY; without even the implied warranty of
14MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15GNU General Public License for more details.
16
17You should have received a copy of the GNU General Public License
9ec0b715 18along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>. */
ca1d1d23
JB
19
20
18160b98 21#include <config.h>
d7306fe6 22#include <setjmp.h>
ca1d1d23
JB
23#include "lisp.h"
24#include "syntax.h"
5679531d 25#include "category.h"
ca1d1d23 26#include "buffer.h"
76eb0881 27#include "character.h"
89ad6492 28#include "charset.h"
9169c321 29#include "region-cache.h"
ca1d1d23 30#include "commands.h"
9ac0d9e0 31#include "blockinput.h"
bf1760bb 32#include "intervals.h"
4746118a 33
ca1d1d23
JB
34#include <sys/types.h>
35#include "regex.h"
36
1d288aef 37#define REGEXP_CACHE_SIZE 20
ca1d1d23 38
487282dc
KH
39/* If the regexp is non-nil, then the buffer contains the compiled form
40 of that regexp, suitable for searching. */
1d288aef
RS
41struct regexp_cache
42{
487282dc 43 struct regexp_cache *next;
ecdb561e 44 Lisp_Object regexp, whitespace_regexp;
b69e3c18 45 /* Syntax table for which the regexp applies. We need this because
58e95211
SM
46 of character classes. If this is t, then the compiled pattern is valid
47 for any syntax-table. */
b69e3c18 48 Lisp_Object syntax_table;
487282dc
KH
49 struct re_pattern_buffer buf;
50 char fastmap[0400];
b819a390
RS
51 /* Nonzero means regexp was compiled to do full POSIX backtracking. */
52 char posix;
487282dc 53};
ca1d1d23 54
487282dc 55/* The instances of that struct. */
989b29ad 56static struct regexp_cache searchbufs[REGEXP_CACHE_SIZE];
ca1d1d23 57
487282dc 58/* The head of the linked list; points to the most recently used buffer. */
989b29ad 59static struct regexp_cache *searchbuf_head;
ca1d1d23 60
ca1d1d23 61
4746118a
JB
62/* Every call to re_match, etc., must pass &search_regs as the regs
63 argument unless you can show it is unnecessary (i.e., if re_match
64 is certainly going to be called again before region-around-match
65 can be called).
66
67 Since the registers are now dynamically allocated, we need to make
68 sure not to refer to the Nth register before checking that it has
1113d9db
JB
69 been allocated by checking search_regs.num_regs.
70
71 The regex code keeps track of whether it has allocated the search
487282dc
KH
72 buffer using bits in the re_pattern_buffer. This means that whenever
73 you compile a new pattern, it completely forgets whether it has
1113d9db
JB
74 allocated any registers, and will allocate new registers the next
75 time you call a searching or matching function. Therefore, we need
76 to call re_set_registers after compiling a new pattern or after
77 setting the match registers, so that the regex functions will be
78 able to free or re-allocate it properly. */
ca1d1d23
JB
79static struct re_registers search_regs;
80
daa37602
JB
81/* The buffer in which the last search was performed, or
82 Qt if the last search was done in a string;
83 Qnil if no searching has been done yet. */
84static Lisp_Object last_thing_searched;
ca1d1d23 85
02b16839 86/* Error condition signaled when regexp compile_pattern fails. */
955cbe7b 87static Lisp_Object Qinvalid_regexp;
ca1d1d23 88
02b16839 89/* Error condition used for failing searches. */
955cbe7b 90static Lisp_Object Qsearch_failed;
06f77a8a 91
f57e2426
J
92static void set_search_regs (EMACS_INT, EMACS_INT);
93static void save_search_regs (void);
c098fdb8
EZ
94static EMACS_INT simple_search (EMACS_INT, unsigned char *, EMACS_INT,
95 EMACS_INT, Lisp_Object, EMACS_INT, EMACS_INT,
f57e2426 96 EMACS_INT, EMACS_INT);
1db5b1ad
JB
97static EMACS_INT boyer_moore (EMACS_INT, unsigned char *, EMACS_INT,
98 Lisp_Object, Lisp_Object, EMACS_INT,
99 EMACS_INT, int);
f57e2426 100static EMACS_INT search_buffer (Lisp_Object, EMACS_INT, EMACS_INT,
c098fdb8 101 EMACS_INT, EMACS_INT, EMACS_INT, int,
f57e2426 102 Lisp_Object, Lisp_Object, int);
971de7fb 103static void matcher_overflow (void) NO_RETURN;
b819a390 104
ca1d1d23 105static void
971de7fb 106matcher_overflow (void)
ca1d1d23
JB
107{
108 error ("Stack overflow in regexp matcher");
109}
110
b819a390
RS
111/* Compile a regexp and signal a Lisp error if anything goes wrong.
112 PATTERN is the pattern to compile.
113 CP is the place to put the result.
facdc750 114 TRANSLATE is a translation table for ignoring case, or nil for none.
b819a390 115 POSIX is nonzero if we want full backtracking (POSIX style)
5679531d 116 for this pattern. 0 means backtrack only enough to get a valid match.
bbc73b48
KH
117
118 The behavior also depends on Vsearch_spaces_regexp. */
ca1d1d23 119
487282dc 120static void
cfe0661d 121compile_pattern_1 (struct regexp_cache *cp, Lisp_Object pattern, Lisp_Object translate, int posix)
ca1d1d23 122{
d451e4db 123 char *val;
b819a390 124 reg_syntax_t old;
ca1d1d23 125
487282dc 126 cp->regexp = Qnil;
59fab369 127 cp->buf.translate = (! NILP (translate) ? translate : make_number (0));
b819a390 128 cp->posix = posix;
93daa011 129 cp->buf.multibyte = STRING_MULTIBYTE (pattern);
89ad6492 130 cp->buf.charset_unibyte = charset_unibyte;
d8c85250
CY
131 if (STRINGP (Vsearch_spaces_regexp))
132 cp->whitespace_regexp = Vsearch_spaces_regexp;
133 else
134 cp->whitespace_regexp = Qnil;
135
e7b2dd2e
RS
136 /* rms: I think BLOCK_INPUT is not needed here any more,
137 because regex.c defines malloc to call xmalloc.
138 Using BLOCK_INPUT here means the debugger won't run if an error occurs.
139 So let's turn it off. */
140 /* BLOCK_INPUT; */
fb4a568d 141 old = re_set_syntax (RE_SYNTAX_EMACS
b819a390 142 | (posix ? 0 : RE_NO_POSIX_BACKTRACKING));
d8c85250
CY
143
144 if (STRINGP (Vsearch_spaces_regexp))
42a5b22f 145 re_set_whitespace_regexp (SSDATA (Vsearch_spaces_regexp));
d8c85250
CY
146 else
147 re_set_whitespace_regexp (NULL);
bbc73b48 148
51b59d79 149 val = (char *) re_compile_pattern (SSDATA (pattern),
8f924df7 150 SBYTES (pattern), &cp->buf);
bbc73b48 151
58e95211
SM
152 /* If the compiled pattern hard codes some of the contents of the
153 syntax-table, it can only be reused with *this* syntax table. */
4b4deea2 154 cp->syntax_table = cp->buf.used_syntax ? BVAR (current_buffer, syntax_table) : Qt;
58e95211 155
bbc73b48
KH
156 re_set_whitespace_regexp (NULL);
157
b819a390 158 re_set_syntax (old);
e7b2dd2e 159 /* UNBLOCK_INPUT; */
ca1d1d23 160 if (val)
06f77a8a 161 xsignal1 (Qinvalid_regexp, build_string (val));
1113d9db 162
487282dc 163 cp->regexp = Fcopy_sequence (pattern);
487282dc
KH
164}
165
6efc7887
RS
166/* Shrink each compiled regexp buffer in the cache
167 to the size actually used right now.
168 This is called from garbage collection. */
169
170void
971de7fb 171shrink_regexp_cache (void)
6efc7887 172{
a968f437 173 struct regexp_cache *cp;
6efc7887
RS
174
175 for (cp = searchbuf_head; cp != 0; cp = cp->next)
176 {
177 cp->buf.allocated = cp->buf.used;
178 cp->buf.buffer
b23c0a83 179 = (unsigned char *) xrealloc (cp->buf.buffer, cp->buf.used);
6efc7887
RS
180 }
181}
182
54dd3310
SM
183/* Clear the regexp cache w.r.t. a particular syntax table,
184 because it was changed.
e5b94d44
CY
185 There is no danger of memory leak here because re_compile_pattern
186 automagically manages the memory in each re_pattern_buffer struct,
187 based on its `allocated' and `buffer' values. */
188void
971de7fb 189clear_regexp_cache (void)
e5b94d44
CY
190{
191 int i;
192
193 for (i = 0; i < REGEXP_CACHE_SIZE; ++i)
920fd1fc
EZ
194 /* It's tempting to compare with the syntax-table we've actually changed,
195 but it's not sufficient because char-table inheritance means that
54dd3310
SM
196 modifying one syntax-table can change others at the same time. */
197 if (!EQ (searchbufs[i].syntax_table, Qt))
198 searchbufs[i].regexp = Qnil;
e5b94d44
CY
199}
200
487282dc 201/* Compile a regexp if necessary, but first check to see if there's one in
b819a390
RS
202 the cache.
203 PATTERN is the pattern to compile.
facdc750 204 TRANSLATE is a translation table for ignoring case, or nil for none.
b819a390
RS
205 REGP is the structure that says where to store the "register"
206 values that will result from matching this pattern.
207 If it is 0, we should compile the pattern not to record any
208 subexpression bounds.
209 POSIX is nonzero if we want full backtracking (POSIX style)
210 for this pattern. 0 means backtrack only enough to get a valid match. */
487282dc
KH
211
212struct re_pattern_buffer *
971de7fb 213compile_pattern (Lisp_Object pattern, struct re_registers *regp, Lisp_Object translate, int posix, int multibyte)
487282dc
KH
214{
215 struct regexp_cache *cp, **cpp;
216
217 for (cpp = &searchbuf_head; ; cpp = &cp->next)
218 {
219 cp = *cpp;
f1b9c7c1
KR
220 /* Entries are initialized to nil, and may be set to nil by
221 compile_pattern_1 if the pattern isn't valid. Don't apply
49a5f770
KR
222 string accessors in those cases. However, compile_pattern_1
223 is only applied to the cache entry we pick here to reuse. So
224 nil should never appear before a non-nil entry. */
7c752c80 225 if (NILP (cp->regexp))
f1b9c7c1 226 goto compile_it;
d5db4077 227 if (SCHARS (cp->regexp) == SCHARS (pattern)
cf69b13e 228 && STRING_MULTIBYTE (cp->regexp) == STRING_MULTIBYTE (pattern)
1d288aef 229 && !NILP (Fstring_equal (cp->regexp, pattern))
59fab369 230 && EQ (cp->buf.translate, (! NILP (translate) ? translate : make_number (0)))
5679531d 231 && cp->posix == posix
58e95211 232 && (EQ (cp->syntax_table, Qt)
4b4deea2 233 || EQ (cp->syntax_table, BVAR (current_buffer, syntax_table)))
89ad6492
KH
234 && !NILP (Fequal (cp->whitespace_regexp, Vsearch_spaces_regexp))
235 && cp->buf.charset_unibyte == charset_unibyte)
487282dc
KH
236 break;
237
f1b9c7c1
KR
238 /* If we're at the end of the cache, compile into the nil cell
239 we found, or the last (least recently used) cell with a
240 string value. */
487282dc
KH
241 if (cp->next == 0)
242 {
f1b9c7c1 243 compile_it:
cfe0661d 244 compile_pattern_1 (cp, pattern, translate, posix);
487282dc
KH
245 break;
246 }
247 }
248
249 /* When we get here, cp (aka *cpp) contains the compiled pattern,
250 either because we found it in the cache or because we just compiled it.
251 Move it to the front of the queue to mark it as most recently used. */
252 *cpp = cp->next;
253 cp->next = searchbuf_head;
254 searchbuf_head = cp;
1113d9db 255
6639708c
RS
256 /* Advise the searching functions about the space we have allocated
257 for register data. */
258 if (regp)
259 re_set_registers (&cp->buf, regp, regp->num_regs, regp->start, regp->end);
260
78edd3b7 261 /* The compiled pattern can be used both for multibyte and unibyte
89ad6492
KH
262 target. But, we have to tell which the pattern is used for. */
263 cp->buf.target_multibyte = multibyte;
264
487282dc 265 return &cp->buf;
ca1d1d23
JB
266}
267
ca1d1d23 268\f
b819a390 269static Lisp_Object
971de7fb 270looking_at_1 (Lisp_Object string, int posix)
ca1d1d23
JB
271{
272 Lisp_Object val;
273 unsigned char *p1, *p2;
e7deaab0 274 EMACS_INT s1, s2;
c098fdb8 275 register EMACS_INT i;
487282dc 276 struct re_pattern_buffer *bufp;
ca1d1d23 277
7074fde6
FP
278 if (running_asynch_code)
279 save_search_regs ();
280
910c747a 281 /* This is so set_image_of_range_1 in regex.c can find the EQV table. */
4b4deea2
TT
282 XCHAR_TABLE (BVAR (current_buffer, case_canon_table))->extras[2]
283 = BVAR (current_buffer, case_eqv_table);
910c747a 284
b7826503 285 CHECK_STRING (string);
ef887810
RS
286 bufp = compile_pattern (string,
287 (NILP (Vinhibit_changing_match_data)
288 ? &search_regs : NULL),
4b4deea2
TT
289 (!NILP (BVAR (current_buffer, case_fold_search))
290 ? BVAR (current_buffer, case_canon_table) : Qnil),
0c8533c6 291 posix,
4b4deea2 292 !NILP (BVAR (current_buffer, enable_multibyte_characters)));
ca1d1d23
JB
293
294 immediate_quit = 1;
295 QUIT; /* Do a pending quit right away, to avoid paradoxical behavior */
296
297 /* Get pointers and sizes of the two strings
298 that make up the visible portion of the buffer. */
299
300 p1 = BEGV_ADDR;
fa8ed3e0 301 s1 = GPT_BYTE - BEGV_BYTE;
ca1d1d23 302 p2 = GAP_END_ADDR;
fa8ed3e0 303 s2 = ZV_BYTE - GPT_BYTE;
ca1d1d23
JB
304 if (s1 < 0)
305 {
306 p2 = p1;
fa8ed3e0 307 s2 = ZV_BYTE - BEGV_BYTE;
ca1d1d23
JB
308 s1 = 0;
309 }
310 if (s2 < 0)
311 {
fa8ed3e0 312 s1 = ZV_BYTE - BEGV_BYTE;
ca1d1d23
JB
313 s2 = 0;
314 }
8bb43c28
RS
315
316 re_match_object = Qnil;
177c0ea7 317
487282dc 318 i = re_match_2 (bufp, (char *) p1, s1, (char *) p2, s2,
ef887810
RS
319 PT_BYTE - BEGV_BYTE,
320 (NILP (Vinhibit_changing_match_data)
321 ? &search_regs : NULL),
fa8ed3e0 322 ZV_BYTE - BEGV_BYTE);
de182d70 323 immediate_quit = 0;
177c0ea7 324
ca1d1d23
JB
325 if (i == -2)
326 matcher_overflow ();
327
328 val = (0 <= i ? Qt : Qnil);
ef887810 329 if (NILP (Vinhibit_changing_match_data) && i >= 0)
fa8ed3e0
RS
330 for (i = 0; i < search_regs.num_regs; i++)
331 if (search_regs.start[i] >= 0)
332 {
333 search_regs.start[i]
334 = BYTE_TO_CHAR (search_regs.start[i] + BEGV_BYTE);
335 search_regs.end[i]
336 = BYTE_TO_CHAR (search_regs.end[i] + BEGV_BYTE);
337 }
ef887810
RS
338
339 /* Set last_thing_searched only when match data is changed. */
340 if (NILP (Vinhibit_changing_match_data))
341 XSETBUFFER (last_thing_searched, current_buffer);
342
ca1d1d23
JB
343 return val;
344}
345
b819a390 346DEFUN ("looking-at", Flooking_at, Slooking_at, 1, 1, 0,
8c1a1077
PJ
347 doc: /* Return t if text after point matches regular expression REGEXP.
348This function modifies the match data that `match-beginning',
349`match-end' and `match-data' access; save and restore the match
350data if you want to preserve them. */)
5842a27b 351 (Lisp_Object regexp)
b819a390 352{
94f94972 353 return looking_at_1 (regexp, 0);
b819a390
RS
354}
355
356DEFUN ("posix-looking-at", Fposix_looking_at, Sposix_looking_at, 1, 1, 0,
8c1a1077
PJ
357 doc: /* Return t if text after point matches regular expression REGEXP.
358Find the longest match, in accord with Posix regular expression rules.
359This function modifies the match data that `match-beginning',
360`match-end' and `match-data' access; save and restore the match
361data if you want to preserve them. */)
5842a27b 362 (Lisp_Object regexp)
b819a390 363{
94f94972 364 return looking_at_1 (regexp, 1);
b819a390
RS
365}
366\f
367static Lisp_Object
971de7fb 368string_match_1 (Lisp_Object regexp, Lisp_Object string, Lisp_Object start, int posix)
ca1d1d23 369{
a53e2e89 370 EMACS_INT val;
487282dc 371 struct re_pattern_buffer *bufp;
e7deaab0 372 EMACS_INT pos, pos_byte;
0c8533c6 373 int i;
ca1d1d23 374
7074fde6
FP
375 if (running_asynch_code)
376 save_search_regs ();
377
b7826503
PJ
378 CHECK_STRING (regexp);
379 CHECK_STRING (string);
ca1d1d23
JB
380
381 if (NILP (start))
0c8533c6 382 pos = 0, pos_byte = 0;
ca1d1d23
JB
383 else
384 {
c098fdb8 385 EMACS_INT len = SCHARS (string);
ca1d1d23 386
b7826503 387 CHECK_NUMBER (start);
0c8533c6
RS
388 pos = XINT (start);
389 if (pos < 0 && -pos <= len)
390 pos = len + pos;
391 else if (0 > pos || pos > len)
ca1d1d23 392 args_out_of_range (string, start);
0c8533c6 393 pos_byte = string_char_to_byte (string, pos);
ca1d1d23
JB
394 }
395
910c747a 396 /* This is so set_image_of_range_1 in regex.c can find the EQV table. */
4b4deea2
TT
397 XCHAR_TABLE (BVAR (current_buffer, case_canon_table))->extras[2]
398 = BVAR (current_buffer, case_eqv_table);
910c747a 399
ef887810
RS
400 bufp = compile_pattern (regexp,
401 (NILP (Vinhibit_changing_match_data)
402 ? &search_regs : NULL),
4b4deea2
TT
403 (!NILP (BVAR (current_buffer, case_fold_search))
404 ? BVAR (current_buffer, case_canon_table) : Qnil),
0c8533c6
RS
405 posix,
406 STRING_MULTIBYTE (string));
ca1d1d23 407 immediate_quit = 1;
8bb43c28 408 re_match_object = string;
177c0ea7 409
51b59d79 410 val = re_search (bufp, SSDATA (string),
d5db4077
KR
411 SBYTES (string), pos_byte,
412 SBYTES (string) - pos_byte,
ef887810
RS
413 (NILP (Vinhibit_changing_match_data)
414 ? &search_regs : NULL));
ca1d1d23 415 immediate_quit = 0;
ef887810
RS
416
417 /* Set last_thing_searched only when match data is changed. */
418 if (NILP (Vinhibit_changing_match_data))
419 last_thing_searched = Qt;
420
ca1d1d23
JB
421 if (val == -2)
422 matcher_overflow ();
423 if (val < 0) return Qnil;
0c8533c6 424
ef887810
RS
425 if (NILP (Vinhibit_changing_match_data))
426 for (i = 0; i < search_regs.num_regs; i++)
427 if (search_regs.start[i] >= 0)
428 {
429 search_regs.start[i]
430 = string_byte_to_char (string, search_regs.start[i]);
431 search_regs.end[i]
432 = string_byte_to_char (string, search_regs.end[i]);
433 }
0c8533c6
RS
434
435 return make_number (string_byte_to_char (string, val));
ca1d1d23 436}
e59a8453 437
a7ca3326 438DEFUN ("string-match", Fstring_match, Sstring_match, 2, 3, 0,
8c1a1077 439 doc: /* Return index of start of first match for REGEXP in STRING, or nil.
b85acc4b 440Matching ignores case if `case-fold-search' is non-nil.
8c1a1077
PJ
441If third arg START is non-nil, start search at that index in STRING.
442For index of first char beyond the match, do (match-end 0).
443`match-end' and `match-beginning' also give indices of substrings
2bd2f32d
RS
444matched by parenthesis constructs in the pattern.
445
446You can use the function `match-string' to extract the substrings
447matched by the parenthesis constructions in REGEXP. */)
5842a27b 448 (Lisp_Object regexp, Lisp_Object string, Lisp_Object start)
b819a390
RS
449{
450 return string_match_1 (regexp, string, start, 0);
451}
452
453DEFUN ("posix-string-match", Fposix_string_match, Sposix_string_match, 2, 3, 0,
8c1a1077
PJ
454 doc: /* Return index of start of first match for REGEXP in STRING, or nil.
455Find the longest match, in accord with Posix regular expression rules.
456Case is ignored if `case-fold-search' is non-nil in the current buffer.
457If third arg START is non-nil, start search at that index in STRING.
458For index of first char beyond the match, do (match-end 0).
459`match-end' and `match-beginning' also give indices of substrings
460matched by parenthesis constructs in the pattern. */)
5842a27b 461 (Lisp_Object regexp, Lisp_Object string, Lisp_Object start)
b819a390
RS
462{
463 return string_match_1 (regexp, string, start, 1);
464}
465
e59a8453
RS
466/* Match REGEXP against STRING, searching all of STRING,
467 and return the index of the match, or negative on failure.
468 This does not clobber the match data. */
469
a53e2e89 470EMACS_INT
971de7fb 471fast_string_match (Lisp_Object regexp, Lisp_Object string)
e59a8453 472{
a53e2e89 473 EMACS_INT val;
487282dc 474 struct re_pattern_buffer *bufp;
e59a8453 475
facdc750
RS
476 bufp = compile_pattern (regexp, 0, Qnil,
477 0, STRING_MULTIBYTE (string));
e59a8453 478 immediate_quit = 1;
8bb43c28 479 re_match_object = string;
177c0ea7 480
51b59d79 481 val = re_search (bufp, SSDATA (string),
d5db4077
KR
482 SBYTES (string), 0,
483 SBYTES (string), 0);
e59a8453
RS
484 immediate_quit = 0;
485 return val;
486}
5679531d
KH
487
488/* Match REGEXP against STRING, searching all of STRING ignoring case,
489 and return the index of the match, or negative on failure.
0c8533c6
RS
490 This does not clobber the match data.
491 We assume that STRING contains single-byte characters. */
5679531d 492
a53e2e89 493EMACS_INT
971de7fb 494fast_c_string_match_ignore_case (Lisp_Object regexp, const char *string)
5679531d 495{
a53e2e89 496 EMACS_INT val;
5679531d 497 struct re_pattern_buffer *bufp;
c098fdb8 498 size_t len = strlen (string);
5679531d 499
0c8533c6 500 regexp = string_make_unibyte (regexp);
b4577c63 501 re_match_object = Qt;
5679531d 502 bufp = compile_pattern (regexp, 0,
0190922f 503 Vascii_canon_table, 0,
f8bd51c4 504 0);
5679531d
KH
505 immediate_quit = 1;
506 val = re_search (bufp, string, len, 0, len, 0);
507 immediate_quit = 0;
508 return val;
509}
be5f4dfb
KH
510
511/* Like fast_string_match but ignore case. */
512
a53e2e89 513EMACS_INT
971de7fb 514fast_string_match_ignore_case (Lisp_Object regexp, Lisp_Object string)
be5f4dfb 515{
a53e2e89 516 EMACS_INT val;
be5f4dfb
KH
517 struct re_pattern_buffer *bufp;
518
0190922f 519 bufp = compile_pattern (regexp, 0, Vascii_canon_table,
be5f4dfb
KH
520 0, STRING_MULTIBYTE (string));
521 immediate_quit = 1;
522 re_match_object = string;
523
51b59d79 524 val = re_search (bufp, SSDATA (string),
be5f4dfb
KH
525 SBYTES (string), 0,
526 SBYTES (string), 0);
527 immediate_quit = 0;
528 return val;
529}
ca1d1d23 530\f
a80ce213 531/* Match REGEXP against the characters after POS to LIMIT, and return
8b13507a
KH
532 the number of matched characters. If STRING is non-nil, match
533 against the characters in it. In that case, POS and LIMIT are
534 indices into the string. This function doesn't modify the match
535 data. */
536
537EMACS_INT
971de7fb 538fast_looking_at (Lisp_Object regexp, EMACS_INT pos, EMACS_INT pos_byte, EMACS_INT limit, EMACS_INT limit_byte, Lisp_Object string)
8b13507a
KH
539{
540 int multibyte;
541 struct re_pattern_buffer *buf;
542 unsigned char *p1, *p2;
e7deaab0 543 EMACS_INT s1, s2;
8b13507a 544 EMACS_INT len;
78edd3b7 545
8b13507a
KH
546 if (STRINGP (string))
547 {
548 if (pos_byte < 0)
549 pos_byte = string_char_to_byte (string, pos);
550 if (limit_byte < 0)
551 limit_byte = string_char_to_byte (string, limit);
552 p1 = NULL;
553 s1 = 0;
554 p2 = SDATA (string);
555 s2 = SBYTES (string);
556 re_match_object = string;
557 multibyte = STRING_MULTIBYTE (string);
558 }
559 else
560 {
561 if (pos_byte < 0)
562 pos_byte = CHAR_TO_BYTE (pos);
563 if (limit_byte < 0)
564 limit_byte = CHAR_TO_BYTE (limit);
565 pos_byte -= BEGV_BYTE;
566 limit_byte -= BEGV_BYTE;
567 p1 = BEGV_ADDR;
568 s1 = GPT_BYTE - BEGV_BYTE;
569 p2 = GAP_END_ADDR;
570 s2 = ZV_BYTE - GPT_BYTE;
571 if (s1 < 0)
572 {
573 p2 = p1;
574 s2 = ZV_BYTE - BEGV_BYTE;
575 s1 = 0;
576 }
577 if (s2 < 0)
578 {
579 s1 = ZV_BYTE - BEGV_BYTE;
580 s2 = 0;
581 }
582 re_match_object = Qnil;
4b4deea2 583 multibyte = ! NILP (BVAR (current_buffer, enable_multibyte_characters));
8b13507a
KH
584 }
585
586 buf = compile_pattern (regexp, 0, Qnil, 0, multibyte);
587 immediate_quit = 1;
588 len = re_match_2 (buf, (char *) p1, s1, (char *) p2, s2,
589 pos_byte, NULL, limit_byte);
590 immediate_quit = 0;
591
592 return len;
593}
594
595\f
9169c321
JB
596/* The newline cache: remembering which sections of text have no newlines. */
597
598/* If the user has requested newline caching, make sure it's on.
599 Otherwise, make sure it's off.
600 This is our cheezy way of associating an action with the change of
601 state of a buffer-local variable. */
602static void
971de7fb 603newline_cache_on_off (struct buffer *buf)
9169c321 604{
4b4deea2 605 if (NILP (BVAR (buf, cache_long_line_scans)))
9169c321
JB
606 {
607 /* It should be off. */
608 if (buf->newline_cache)
609 {
610 free_region_cache (buf->newline_cache);
611 buf->newline_cache = 0;
612 }
613 }
614 else
615 {
616 /* It should be on. */
617 if (buf->newline_cache == 0)
618 buf->newline_cache = new_region_cache ();
619 }
620}
621
622\f
623/* Search for COUNT instances of the character TARGET between START and END.
624
625 If COUNT is positive, search forwards; END must be >= START.
626 If COUNT is negative, search backwards for the -COUNTth instance;
627 END must be <= START.
628 If COUNT is zero, do anything you please; run rogue, for all I care.
629
630 If END is zero, use BEGV or ZV instead, as appropriate for the
631 direction indicated by COUNT.
ffd56f97
JB
632
633 If we find COUNT instances, set *SHORTAGE to zero, and return the
a9f2a45f 634 position past the COUNTth match. Note that for reverse motion
5bfe95c9 635 this is not the same as the usual convention for Emacs motion commands.
ffd56f97 636
9169c321
JB
637 If we don't find COUNT instances before reaching END, set *SHORTAGE
638 to the number of TARGETs left unfound, and return END.
ffd56f97 639
087a5f81
RS
640 If ALLOW_QUIT is non-zero, set immediate_quit. That's good to do
641 except when inside redisplay. */
642
c098fdb8
EZ
643EMACS_INT
644scan_buffer (register int target, EMACS_INT start, EMACS_INT end,
a53e2e89 645 EMACS_INT count, EMACS_INT *shortage, int allow_quit)
ca1d1d23 646{
9169c321 647 struct region_cache *newline_cache;
177c0ea7 648 int direction;
ffd56f97 649
9169c321
JB
650 if (count > 0)
651 {
652 direction = 1;
653 if (! end) end = ZV;
654 }
655 else
656 {
657 direction = -1;
658 if (! end) end = BEGV;
659 }
ffd56f97 660
9169c321
JB
661 newline_cache_on_off (current_buffer);
662 newline_cache = current_buffer->newline_cache;
ca1d1d23
JB
663
664 if (shortage != 0)
665 *shortage = 0;
666
087a5f81 667 immediate_quit = allow_quit;
ca1d1d23 668
ffd56f97 669 if (count > 0)
9169c321 670 while (start != end)
ca1d1d23 671 {
9169c321
JB
672 /* Our innermost scanning loop is very simple; it doesn't know
673 about gaps, buffer ends, or the newline cache. ceiling is
674 the position of the last character before the next such
675 obstacle --- the last character the dumb search loop should
676 examine. */
e7deaab0
AS
677 EMACS_INT ceiling_byte = CHAR_TO_BYTE (end) - 1;
678 EMACS_INT start_byte = CHAR_TO_BYTE (start);
679 EMACS_INT tem;
9169c321
JB
680
681 /* If we're looking for a newline, consult the newline cache
682 to see where we can avoid some scanning. */
683 if (target == '\n' && newline_cache)
684 {
0065d054 685 ptrdiff_t next_change;
9169c321
JB
686 immediate_quit = 0;
687 while (region_cache_forward
fa8ed3e0
RS
688 (current_buffer, newline_cache, start_byte, &next_change))
689 start_byte = next_change;
cbe0db0d 690 immediate_quit = allow_quit;
9169c321 691
fa8ed3e0
RS
692 /* START should never be after END. */
693 if (start_byte > ceiling_byte)
694 start_byte = ceiling_byte;
9169c321
JB
695
696 /* Now the text after start is an unknown region, and
697 next_change is the position of the next known region. */
fa8ed3e0 698 ceiling_byte = min (next_change - 1, ceiling_byte);
9169c321
JB
699 }
700
701 /* The dumb loop can only scan text stored in contiguous
702 bytes. BUFFER_CEILING_OF returns the last character
703 position that is contiguous, so the ceiling is the
704 position after that. */
67ce527d
KH
705 tem = BUFFER_CEILING_OF (start_byte);
706 ceiling_byte = min (tem, ceiling_byte);
9169c321
JB
707
708 {
177c0ea7 709 /* The termination address of the dumb loop. */
fa8ed3e0
RS
710 register unsigned char *ceiling_addr
711 = BYTE_POS_ADDR (ceiling_byte) + 1;
712 register unsigned char *cursor
713 = BYTE_POS_ADDR (start_byte);
9169c321
JB
714 unsigned char *base = cursor;
715
716 while (cursor < ceiling_addr)
717 {
718 unsigned char *scan_start = cursor;
719
720 /* The dumb loop. */
721 while (*cursor != target && ++cursor < ceiling_addr)
722 ;
723
724 /* If we're looking for newlines, cache the fact that
725 the region from start to cursor is free of them. */
726 if (target == '\n' && newline_cache)
727 know_region_cache (current_buffer, newline_cache,
6c1bd3f3
EZ
728 BYTE_TO_CHAR (start_byte + scan_start - base),
729 BYTE_TO_CHAR (start_byte + cursor - base));
9169c321
JB
730
731 /* Did we find the target character? */
732 if (cursor < ceiling_addr)
733 {
734 if (--count == 0)
735 {
736 immediate_quit = 0;
fa8ed3e0 737 return BYTE_TO_CHAR (start_byte + cursor - base + 1);
9169c321
JB
738 }
739 cursor++;
740 }
741 }
742
fa8ed3e0 743 start = BYTE_TO_CHAR (start_byte + cursor - base);
9169c321 744 }
ca1d1d23
JB
745 }
746 else
9169c321
JB
747 while (start > end)
748 {
749 /* The last character to check before the next obstacle. */
e7deaab0
AS
750 EMACS_INT ceiling_byte = CHAR_TO_BYTE (end);
751 EMACS_INT start_byte = CHAR_TO_BYTE (start);
752 EMACS_INT tem;
9169c321
JB
753
754 /* Consult the newline cache, if appropriate. */
755 if (target == '\n' && newline_cache)
756 {
0065d054 757 ptrdiff_t next_change;
9169c321
JB
758 immediate_quit = 0;
759 while (region_cache_backward
fa8ed3e0
RS
760 (current_buffer, newline_cache, start_byte, &next_change))
761 start_byte = next_change;
cbe0db0d 762 immediate_quit = allow_quit;
9169c321
JB
763
764 /* Start should never be at or before end. */
fa8ed3e0
RS
765 if (start_byte <= ceiling_byte)
766 start_byte = ceiling_byte + 1;
9169c321
JB
767
768 /* Now the text before start is an unknown region, and
769 next_change is the position of the next known region. */
fa8ed3e0 770 ceiling_byte = max (next_change, ceiling_byte);
9169c321
JB
771 }
772
773 /* Stop scanning before the gap. */
67ce527d
KH
774 tem = BUFFER_FLOOR_OF (start_byte - 1);
775 ceiling_byte = max (tem, ceiling_byte);
9169c321
JB
776
777 {
778 /* The termination address of the dumb loop. */
fa8ed3e0
RS
779 register unsigned char *ceiling_addr = BYTE_POS_ADDR (ceiling_byte);
780 register unsigned char *cursor = BYTE_POS_ADDR (start_byte - 1);
9169c321
JB
781 unsigned char *base = cursor;
782
783 while (cursor >= ceiling_addr)
784 {
785 unsigned char *scan_start = cursor;
786
787 while (*cursor != target && --cursor >= ceiling_addr)
788 ;
789
790 /* If we're looking for newlines, cache the fact that
791 the region from after the cursor to start is free of them. */
792 if (target == '\n' && newline_cache)
793 know_region_cache (current_buffer, newline_cache,
6c1bd3f3
EZ
794 BYTE_TO_CHAR (start_byte + cursor - base),
795 BYTE_TO_CHAR (start_byte + scan_start - base));
9169c321
JB
796
797 /* Did we find the target character? */
798 if (cursor >= ceiling_addr)
799 {
800 if (++count >= 0)
801 {
802 immediate_quit = 0;
fa8ed3e0 803 return BYTE_TO_CHAR (start_byte + cursor - base);
9169c321
JB
804 }
805 cursor--;
806 }
807 }
808
fa8ed3e0 809 start = BYTE_TO_CHAR (start_byte + cursor - base);
9169c321
JB
810 }
811 }
812
ca1d1d23
JB
813 immediate_quit = 0;
814 if (shortage != 0)
ffd56f97 815 *shortage = count * direction;
9169c321 816 return start;
ca1d1d23 817}
fa8ed3e0
RS
818\f
819/* Search for COUNT instances of a line boundary, which means either a
820 newline or (if selective display enabled) a carriage return.
821 Start at START. If COUNT is negative, search backwards.
822
823 We report the resulting position by calling TEMP_SET_PT_BOTH.
824
825 If we find COUNT instances. we position after (always after,
826 even if scanning backwards) the COUNTth match, and return 0.
827
828 If we don't find COUNT instances before reaching the end of the
829 buffer (or the beginning, if scanning backwards), we return
830 the number of line boundaries left unfound, and position at
831 the limit we bumped up against.
832
833 If ALLOW_QUIT is non-zero, set immediate_quit. That's good to do
d5d57b92 834 except in special cases. */
ca1d1d23 835
c098fdb8
EZ
836EMACS_INT
837scan_newline (EMACS_INT start, EMACS_INT start_byte,
838 EMACS_INT limit, EMACS_INT limit_byte,
839 register EMACS_INT count, int allow_quit)
63fa018d 840{
fa8ed3e0
RS
841 int direction = ((count > 0) ? 1 : -1);
842
843 register unsigned char *cursor;
844 unsigned char *base;
845
e7deaab0 846 EMACS_INT ceiling;
fa8ed3e0
RS
847 register unsigned char *ceiling_addr;
848
d5d57b92
RS
849 int old_immediate_quit = immediate_quit;
850
fa8ed3e0
RS
851 /* The code that follows is like scan_buffer
852 but checks for either newline or carriage return. */
853
d5d57b92
RS
854 if (allow_quit)
855 immediate_quit++;
fa8ed3e0
RS
856
857 start_byte = CHAR_TO_BYTE (start);
858
859 if (count > 0)
860 {
861 while (start_byte < limit_byte)
862 {
863 ceiling = BUFFER_CEILING_OF (start_byte);
864 ceiling = min (limit_byte - 1, ceiling);
865 ceiling_addr = BYTE_POS_ADDR (ceiling) + 1;
866 base = (cursor = BYTE_POS_ADDR (start_byte));
867 while (1)
868 {
869 while (*cursor != '\n' && ++cursor != ceiling_addr)
870 ;
871
872 if (cursor != ceiling_addr)
873 {
874 if (--count == 0)
875 {
d5d57b92 876 immediate_quit = old_immediate_quit;
fa8ed3e0
RS
877 start_byte = start_byte + cursor - base + 1;
878 start = BYTE_TO_CHAR (start_byte);
879 TEMP_SET_PT_BOTH (start, start_byte);
880 return 0;
881 }
882 else
883 if (++cursor == ceiling_addr)
884 break;
885 }
886 else
887 break;
888 }
889 start_byte += cursor - base;
890 }
891 }
892 else
893 {
fa8ed3e0
RS
894 while (start_byte > limit_byte)
895 {
896 ceiling = BUFFER_FLOOR_OF (start_byte - 1);
897 ceiling = max (limit_byte, ceiling);
898 ceiling_addr = BYTE_POS_ADDR (ceiling) - 1;
899 base = (cursor = BYTE_POS_ADDR (start_byte - 1) + 1);
900 while (1)
901 {
902 while (--cursor != ceiling_addr && *cursor != '\n')
903 ;
904
905 if (cursor != ceiling_addr)
906 {
907 if (++count == 0)
908 {
d5d57b92 909 immediate_quit = old_immediate_quit;
fa8ed3e0
RS
910 /* Return the position AFTER the match we found. */
911 start_byte = start_byte + cursor - base + 1;
912 start = BYTE_TO_CHAR (start_byte);
913 TEMP_SET_PT_BOTH (start, start_byte);
914 return 0;
915 }
916 }
917 else
918 break;
919 }
920 /* Here we add 1 to compensate for the last decrement
921 of CURSOR, which took it past the valid range. */
922 start_byte += cursor - base + 1;
923 }
924 }
925
926 TEMP_SET_PT_BOTH (limit, limit_byte);
d5d57b92 927 immediate_quit = old_immediate_quit;
fa8ed3e0
RS
928
929 return count * direction;
63fa018d
RS
930}
931
c098fdb8
EZ
932EMACS_INT
933find_next_newline_no_quit (EMACS_INT from, EMACS_INT cnt)
ca1d1d23 934{
a53e2e89 935 return scan_buffer ('\n', from, 0, cnt, (EMACS_INT *) 0, 0);
9169c321
JB
936}
937
9169c321
JB
938/* Like find_next_newline, but returns position before the newline,
939 not after, and only search up to TO. This isn't just
940 find_next_newline (...)-1, because you might hit TO. */
fa8ed3e0 941
c098fdb8
EZ
942EMACS_INT
943find_before_next_newline (EMACS_INT from, EMACS_INT to, EMACS_INT cnt)
9169c321 944{
a53e2e89 945 EMACS_INT shortage;
c098fdb8 946 EMACS_INT pos = scan_buffer ('\n', from, to, cnt, &shortage, 1);
9169c321
JB
947
948 if (shortage == 0)
949 pos--;
177c0ea7 950
9169c321 951 return pos;
ca1d1d23
JB
952}
953\f
ca1d1d23
JB
954/* Subroutines of Lisp buffer search functions. */
955
956static Lisp_Object
c098fdb8
EZ
957search_command (Lisp_Object string, Lisp_Object bound, Lisp_Object noerror,
958 Lisp_Object count, int direction, int RE, int posix)
ca1d1d23 959{
a53e2e89 960 register EMACS_INT np;
c098fdb8 961 EMACS_INT lim, lim_byte;
a53e2e89 962 EMACS_INT n = direction;
ca1d1d23
JB
963
964 if (!NILP (count))
965 {
b7826503 966 CHECK_NUMBER (count);
ca1d1d23
JB
967 n *= XINT (count);
968 }
969
b7826503 970 CHECK_STRING (string);
ca1d1d23 971 if (NILP (bound))
9f43ad85
RS
972 {
973 if (n > 0)
974 lim = ZV, lim_byte = ZV_BYTE;
975 else
976 lim = BEGV, lim_byte = BEGV_BYTE;
977 }
ca1d1d23
JB
978 else
979 {
b7826503 980 CHECK_NUMBER_COERCE_MARKER (bound);
ca1d1d23 981 lim = XINT (bound);
6ec8bbd2 982 if (n > 0 ? lim < PT : lim > PT)
ca1d1d23
JB
983 error ("Invalid search bound (wrong side of point)");
984 if (lim > ZV)
9f43ad85 985 lim = ZV, lim_byte = ZV_BYTE;
588d2fd5 986 else if (lim < BEGV)
9f43ad85 987 lim = BEGV, lim_byte = BEGV_BYTE;
588d2fd5
KH
988 else
989 lim_byte = CHAR_TO_BYTE (lim);
ca1d1d23
JB
990 }
991
910c747a 992 /* This is so set_image_of_range_1 in regex.c can find the EQV table. */
4b4deea2
TT
993 XCHAR_TABLE (BVAR (current_buffer, case_canon_table))->extras[2]
994 = BVAR (current_buffer, case_eqv_table);
910c747a 995
9f43ad85 996 np = search_buffer (string, PT, PT_BYTE, lim, lim_byte, n, RE,
4b4deea2
TT
997 (!NILP (BVAR (current_buffer, case_fold_search))
998 ? BVAR (current_buffer, case_canon_table)
3135e9fd 999 : Qnil),
4b4deea2
TT
1000 (!NILP (BVAR (current_buffer, case_fold_search))
1001 ? BVAR (current_buffer, case_eqv_table)
3135e9fd 1002 : Qnil),
b819a390 1003 posix);
ca1d1d23
JB
1004 if (np <= 0)
1005 {
1006 if (NILP (noerror))
06f77a8a
KS
1007 xsignal1 (Qsearch_failed, string);
1008
ca1d1d23
JB
1009 if (!EQ (noerror, Qt))
1010 {
1011 if (lim < BEGV || lim > ZV)
1012 abort ();
9f43ad85 1013 SET_PT_BOTH (lim, lim_byte);
a5f217b8
RS
1014 return Qnil;
1015#if 0 /* This would be clean, but maybe programs depend on
1016 a value of nil here. */
481399bf 1017 np = lim;
a5f217b8 1018#endif
ca1d1d23 1019 }
481399bf
RS
1020 else
1021 return Qnil;
ca1d1d23
JB
1022 }
1023
1024 if (np < BEGV || np > ZV)
1025 abort ();
1026
1027 SET_PT (np);
1028
1029 return make_number (np);
1030}
1031\f
fa8ed3e0
RS
1032/* Return 1 if REGEXP it matches just one constant string. */
1033
b6d6a51c 1034static int
971de7fb 1035trivial_regexp_p (Lisp_Object regexp)
b6d6a51c 1036{
c098fdb8 1037 EMACS_INT len = SBYTES (regexp);
d5db4077 1038 unsigned char *s = SDATA (regexp);
b6d6a51c
KH
1039 while (--len >= 0)
1040 {
1041 switch (*s++)
1042 {
1043 case '.': case '*': case '+': case '?': case '[': case '^': case '$':
1044 return 0;
1045 case '\\':
1046 if (--len < 0)
1047 return 0;
1048 switch (*s++)
1049 {
1050 case '|': case '(': case ')': case '`': case '\'': case 'b':
1051 case 'B': case '<': case '>': case 'w': case 'W': case 's':
29f89fe7 1052 case 'S': case '=': case '{': case '}': case '_':
5679531d 1053 case 'c': case 'C': /* for categoryspec and notcategoryspec */
866f60fd 1054 case '1': case '2': case '3': case '4': case '5':
b6d6a51c
KH
1055 case '6': case '7': case '8': case '9':
1056 return 0;
1057 }
1058 }
1059 }
1060 return 1;
1061}
1062
ca325161 1063/* Search for the n'th occurrence of STRING in the current buffer,
ca1d1d23 1064 starting at position POS and stopping at position LIM,
b819a390 1065 treating STRING as a literal string if RE is false or as
ca1d1d23
JB
1066 a regular expression if RE is true.
1067
1068 If N is positive, searching is forward and LIM must be greater than POS.
1069 If N is negative, searching is backward and LIM must be less than POS.
1070
facdc750 1071 Returns -x if x occurrences remain to be found (x > 0),
ca1d1d23 1072 or else the position at the beginning of the Nth occurrence
b819a390
RS
1073 (if searching backward) or the end (if searching forward).
1074
1075 POSIX is nonzero if we want full backtracking (POSIX style)
1076 for this pattern. 0 means backtrack only enough to get a valid match. */
ca1d1d23 1077
aff2ce94
RS
1078#define TRANSLATE(out, trt, d) \
1079do \
1080 { \
1081 if (! NILP (trt)) \
1082 { \
1083 Lisp_Object temp; \
1084 temp = Faref (trt, make_number (d)); \
1085 if (INTEGERP (temp)) \
1086 out = XINT (temp); \
1087 else \
1088 out = d; \
1089 } \
1090 else \
1091 out = d; \
1092 } \
1093while (0)
facdc750 1094
ef887810
RS
1095/* Only used in search_buffer, to record the end position of the match
1096 when searching regexps and SEARCH_REGS should not be changed
1097 (i.e. Vinhibit_changing_match_data is non-nil). */
1098static struct re_registers search_regs_1;
1099
e7deaab0 1100static EMACS_INT
d5a3eaaf 1101search_buffer (Lisp_Object string, EMACS_INT pos, EMACS_INT pos_byte,
c098fdb8 1102 EMACS_INT lim, EMACS_INT lim_byte, EMACS_INT n,
d5a3eaaf 1103 int RE, Lisp_Object trt, Lisp_Object inverse_trt, int posix)
ca1d1d23 1104{
c098fdb8
EZ
1105 EMACS_INT len = SCHARS (string);
1106 EMACS_INT len_byte = SBYTES (string);
facdc750 1107 register int i;
ca1d1d23 1108
7074fde6
FP
1109 if (running_asynch_code)
1110 save_search_regs ();
1111
a7e4cdde 1112 /* Searching 0 times means don't move. */
ca1d1d23 1113 /* Null string is found at starting position. */
a7e4cdde 1114 if (len == 0 || n == 0)
ca325161 1115 {
0353b28f 1116 set_search_regs (pos_byte, 0);
ca325161
RS
1117 return pos;
1118 }
3f57a499 1119
41a33295 1120 if (RE && !(trivial_regexp_p (string) && NILP (Vsearch_spaces_regexp)))
ca1d1d23 1121 {
facdc750 1122 unsigned char *p1, *p2;
c098fdb8 1123 EMACS_INT s1, s2;
487282dc
KH
1124 struct re_pattern_buffer *bufp;
1125
ef887810
RS
1126 bufp = compile_pattern (string,
1127 (NILP (Vinhibit_changing_match_data)
1128 ? &search_regs : &search_regs_1),
1129 trt, posix,
4b4deea2 1130 !NILP (BVAR (current_buffer, enable_multibyte_characters)));
ca1d1d23 1131
ca1d1d23
JB
1132 immediate_quit = 1; /* Quit immediately if user types ^G,
1133 because letting this function finish
1134 can take too long. */
1135 QUIT; /* Do a pending quit right away,
1136 to avoid paradoxical behavior */
1137 /* Get pointers and sizes of the two strings
1138 that make up the visible portion of the buffer. */
1139
1140 p1 = BEGV_ADDR;
fa8ed3e0 1141 s1 = GPT_BYTE - BEGV_BYTE;
ca1d1d23 1142 p2 = GAP_END_ADDR;
fa8ed3e0 1143 s2 = ZV_BYTE - GPT_BYTE;
ca1d1d23
JB
1144 if (s1 < 0)
1145 {
1146 p2 = p1;
fa8ed3e0 1147 s2 = ZV_BYTE - BEGV_BYTE;
ca1d1d23
JB
1148 s1 = 0;
1149 }
1150 if (s2 < 0)
1151 {
fa8ed3e0 1152 s1 = ZV_BYTE - BEGV_BYTE;
ca1d1d23
JB
1153 s2 = 0;
1154 }
8bb43c28 1155 re_match_object = Qnil;
177c0ea7 1156
ca1d1d23
JB
1157 while (n < 0)
1158 {
c098fdb8 1159 EMACS_INT val;
487282dc 1160 val = re_search_2 (bufp, (char *) p1, s1, (char *) p2, s2,
4996330b 1161 pos_byte - BEGV_BYTE, lim_byte - pos_byte,
ef887810
RS
1162 (NILP (Vinhibit_changing_match_data)
1163 ? &search_regs : &search_regs_1),
42db823b 1164 /* Don't allow match past current point */
4996330b 1165 pos_byte - BEGV_BYTE);
ca1d1d23 1166 if (val == -2)
b6d6a51c
KH
1167 {
1168 matcher_overflow ();
1169 }
ca1d1d23
JB
1170 if (val >= 0)
1171 {
ef887810
RS
1172 if (NILP (Vinhibit_changing_match_data))
1173 {
1174 pos_byte = search_regs.start[0] + BEGV_BYTE;
1175 for (i = 0; i < search_regs.num_regs; i++)
1176 if (search_regs.start[i] >= 0)
1177 {
1178 search_regs.start[i]
1179 = BYTE_TO_CHAR (search_regs.start[i] + BEGV_BYTE);
1180 search_regs.end[i]
1181 = BYTE_TO_CHAR (search_regs.end[i] + BEGV_BYTE);
1182 }
1183 XSETBUFFER (last_thing_searched, current_buffer);
1184 /* Set pos to the new position. */
1185 pos = search_regs.start[0];
1186 }
1187 else
1188 {
1189 pos_byte = search_regs_1.start[0] + BEGV_BYTE;
1190 /* Set pos to the new position. */
1191 pos = BYTE_TO_CHAR (search_regs_1.start[0] + BEGV_BYTE);
1192 }
ca1d1d23
JB
1193 }
1194 else
1195 {
1196 immediate_quit = 0;
1197 return (n);
1198 }
1199 n++;
1200 }
1201 while (n > 0)
1202 {
c098fdb8 1203 EMACS_INT val;
487282dc 1204 val = re_search_2 (bufp, (char *) p1, s1, (char *) p2, s2,
4996330b 1205 pos_byte - BEGV_BYTE, lim_byte - pos_byte,
ef887810
RS
1206 (NILP (Vinhibit_changing_match_data)
1207 ? &search_regs : &search_regs_1),
4996330b 1208 lim_byte - BEGV_BYTE);
ca1d1d23 1209 if (val == -2)
b6d6a51c
KH
1210 {
1211 matcher_overflow ();
1212 }
ca1d1d23
JB
1213 if (val >= 0)
1214 {
ef887810
RS
1215 if (NILP (Vinhibit_changing_match_data))
1216 {
1217 pos_byte = search_regs.end[0] + BEGV_BYTE;
1218 for (i = 0; i < search_regs.num_regs; i++)
1219 if (search_regs.start[i] >= 0)
1220 {
1221 search_regs.start[i]
1222 = BYTE_TO_CHAR (search_regs.start[i] + BEGV_BYTE);
1223 search_regs.end[i]
1224 = BYTE_TO_CHAR (search_regs.end[i] + BEGV_BYTE);
1225 }
1226 XSETBUFFER (last_thing_searched, current_buffer);
1227 pos = search_regs.end[0];
1228 }
1229 else
1230 {
1231 pos_byte = search_regs_1.end[0] + BEGV_BYTE;
1232 pos = BYTE_TO_CHAR (search_regs_1.end[0] + BEGV_BYTE);
1233 }
ca1d1d23
JB
1234 }
1235 else
1236 {
1237 immediate_quit = 0;
1238 return (0 - n);
1239 }
1240 n--;
1241 }
1242 immediate_quit = 0;
1243 return (pos);
1244 }
1245 else /* non-RE case */
1246 {
facdc750 1247 unsigned char *raw_pattern, *pat;
c098fdb8
EZ
1248 EMACS_INT raw_pattern_size;
1249 EMACS_INT raw_pattern_size_byte;
facdc750 1250 unsigned char *patbuf;
4b4deea2 1251 int multibyte = !NILP (BVAR (current_buffer, enable_multibyte_characters));
a967ed62 1252 unsigned char *base_pat;
49e44e81
KH
1253 /* Set to positive if we find a non-ASCII char that need
1254 translation. Otherwise set to zero later. */
1255 int char_base = -1;
040272ce 1256 int boyer_moore_ok = 1;
facdc750
RS
1257
1258 /* MULTIBYTE says whether the text to be searched is multibyte.
1259 We must convert PATTERN to match that, or we will not really
1260 find things right. */
1261
1262 if (multibyte == STRING_MULTIBYTE (string))
1263 {
51b59d79 1264 raw_pattern = SDATA (string);
d5db4077
KR
1265 raw_pattern_size = SCHARS (string);
1266 raw_pattern_size_byte = SBYTES (string);
facdc750
RS
1267 }
1268 else if (multibyte)
1269 {
d5db4077 1270 raw_pattern_size = SCHARS (string);
facdc750 1271 raw_pattern_size_byte
d5db4077 1272 = count_size_as_multibyte (SDATA (string),
facdc750 1273 raw_pattern_size);
7276d3d8 1274 raw_pattern = (unsigned char *) alloca (raw_pattern_size_byte + 1);
d5db4077
KR
1275 copy_text (SDATA (string), raw_pattern,
1276 SCHARS (string), 0, 1);
facdc750
RS
1277 }
1278 else
1279 {
1280 /* Converting multibyte to single-byte.
1281
1282 ??? Perhaps this conversion should be done in a special way
1283 by subtracting nonascii-insert-offset from each non-ASCII char,
1284 so that only the multibyte chars which really correspond to
1285 the chosen single-byte character set can possibly match. */
d5db4077
KR
1286 raw_pattern_size = SCHARS (string);
1287 raw_pattern_size_byte = SCHARS (string);
7276d3d8 1288 raw_pattern = (unsigned char *) alloca (raw_pattern_size + 1);
d5db4077
KR
1289 copy_text (SDATA (string), raw_pattern,
1290 SBYTES (string), 1, 0);
facdc750
RS
1291 }
1292
1293 /* Copy and optionally translate the pattern. */
1294 len = raw_pattern_size;
1295 len_byte = raw_pattern_size_byte;
f5f7578b 1296 patbuf = (unsigned char *) alloca (len * MAX_MULTIBYTE_LENGTH);
facdc750
RS
1297 pat = patbuf;
1298 base_pat = raw_pattern;
1299 if (multibyte)
1300 {
a17ae96a
KH
1301 /* Fill patbuf by translated characters in STRING while
1302 checking if we can use boyer-moore search. If TRT is
1303 non-nil, we can use boyer-moore search only if TRT can be
1304 represented by the byte array of 256 elements. For that,
9858f6c3 1305 all non-ASCII case-equivalents of all case-sensitive
a17ae96a
KH
1306 characters in STRING must belong to the same charset and
1307 row. */
1308
facdc750
RS
1309 while (--len >= 0)
1310 {
a17ae96a 1311 unsigned char str_base[MAX_MULTIBYTE_LENGTH], *str;
aff2ce94 1312 int c, translated, inverse;
a17ae96a 1313 int in_charlen, charlen;
facdc750
RS
1314
1315 /* If we got here and the RE flag is set, it's because we're
1316 dealing with a regexp known to be trivial, so the backslash
1317 just quotes the next character. */
1318 if (RE && *base_pat == '\\')
1319 {
1320 len--;
62221f22 1321 raw_pattern_size--;
facdc750
RS
1322 len_byte--;
1323 base_pat++;
1324 }
1325
62a6e103 1326 c = STRING_CHAR_AND_LENGTH (base_pat, in_charlen);
040272ce 1327
a17ae96a 1328 if (NILP (trt))
facdc750 1329 {
a17ae96a
KH
1330 str = base_pat;
1331 charlen = in_charlen;
1332 }
1333 else
1334 {
1335 /* Translate the character. */
1336 TRANSLATE (translated, trt, c);
1337 charlen = CHAR_STRING (translated, str_base);
1338 str = str_base;
1339
1340 /* Check if C has any other case-equivalents. */
1341 TRANSLATE (inverse, inverse_trt, c);
1342 /* If so, check if we can use boyer-moore. */
1343 if (c != inverse && boyer_moore_ok)
1344 {
1345 /* Check if all equivalents belong to the same
1346 group of characters. Note that the check of C
49e44e81
KH
1347 itself is done by the last iteration. */
1348 int this_char_base = -1;
1349
1350 while (boyer_moore_ok)
a17ae96a 1351 {
49e44e81 1352 if (ASCII_BYTE_P (inverse))
a17ae96a 1353 {
49e44e81
KH
1354 if (this_char_base > 0)
1355 boyer_moore_ok = 0;
1356 else
dafaabd1 1357 this_char_base = 0;
a17ae96a 1358 }
49e44e81
KH
1359 else if (CHAR_BYTE8_P (inverse))
1360 /* Boyer-moore search can't handle a
1361 translation of an eight-bit
1362 character. */
1363 boyer_moore_ok = 0;
1364 else if (this_char_base < 0)
1365 {
1366 this_char_base = inverse & ~0x3F;
1367 if (char_base < 0)
1368 char_base = this_char_base;
dafaabd1 1369 else if (this_char_base != char_base)
49e44e81
KH
1370 boyer_moore_ok = 0;
1371 }
1372 else if ((inverse & ~0x3F) != this_char_base)
1373 boyer_moore_ok = 0;
a17ae96a
KH
1374 if (c == inverse)
1375 break;
1376 TRANSLATE (inverse, inverse_trt, inverse);
1377 }
1378 }
aff2ce94 1379 }
facdc750
RS
1380
1381 /* Store this character into the translated pattern. */
72af86bd 1382 memcpy (pat, str, charlen);
a17ae96a 1383 pat += charlen;
facdc750
RS
1384 base_pat += in_charlen;
1385 len_byte -= in_charlen;
1386 }
dafaabd1
AS
1387
1388 /* If char_base is still negative we didn't find any translated
1389 non-ASCII characters. */
1390 if (char_base < 0)
1391 char_base = 0;
facdc750
RS
1392 }
1393 else
1394 {
040272ce 1395 /* Unibyte buffer. */
a17ae96a 1396 char_base = 0;
facdc750
RS
1397 while (--len >= 0)
1398 {
040272ce 1399 int c, translated;
facdc750
RS
1400
1401 /* If we got here and the RE flag is set, it's because we're
1402 dealing with a regexp known to be trivial, so the backslash
1403 just quotes the next character. */
1404 if (RE && *base_pat == '\\')
1405 {
1406 len--;
0190922f 1407 raw_pattern_size--;
facdc750
RS
1408 base_pat++;
1409 }
1410 c = *base_pat++;
aff2ce94 1411 TRANSLATE (translated, trt, c);
facdc750
RS
1412 *pat++ = translated;
1413 }
1414 }
1415
1416 len_byte = pat - patbuf;
facdc750
RS
1417 pat = base_pat = patbuf;
1418
040272ce 1419 if (boyer_moore_ok)
1db5b1ad
JB
1420 return boyer_moore (n, pat, len_byte, trt, inverse_trt,
1421 pos_byte, lim_byte,
a17ae96a 1422 char_base);
facdc750 1423 else
1db5b1ad 1424 return simple_search (n, pat, raw_pattern_size, len_byte, trt,
facdc750
RS
1425 pos, pos_byte, lim, lim_byte);
1426 }
1427}
1428\f
1429/* Do a simple string search N times for the string PAT,
1430 whose length is LEN/LEN_BYTE,
1431 from buffer position POS/POS_BYTE until LIM/LIM_BYTE.
1432 TRT is the translation table.
f8bd51c4 1433
facdc750
RS
1434 Return the character position where the match is found.
1435 Otherwise, if M matches remained to be found, return -M.
f8bd51c4 1436
facdc750
RS
1437 This kind of search works regardless of what is in PAT and
1438 regardless of what is in TRT. It is used in cases where
1439 boyer_moore cannot work. */
1440
e7deaab0 1441static EMACS_INT
c098fdb8
EZ
1442simple_search (EMACS_INT n, unsigned char *pat,
1443 EMACS_INT len, EMACS_INT len_byte, Lisp_Object trt,
1444 EMACS_INT pos, EMACS_INT pos_byte,
1445 EMACS_INT lim, EMACS_INT lim_byte)
facdc750 1446{
4b4deea2 1447 int multibyte = ! NILP (BVAR (current_buffer, enable_multibyte_characters));
ab228c24 1448 int forward = n > 0;
49e44e81
KH
1449 /* Number of buffer bytes matched. Note that this may be different
1450 from len_byte in a multibyte buffer. */
c098fdb8 1451 EMACS_INT match_byte;
facdc750
RS
1452
1453 if (lim > pos && multibyte)
1454 while (n > 0)
1455 {
1456 while (1)
f8bd51c4 1457 {
facdc750 1458 /* Try matching at position POS. */
e7deaab0
AS
1459 EMACS_INT this_pos = pos;
1460 EMACS_INT this_pos_byte = pos_byte;
c098fdb8 1461 EMACS_INT this_len = len;
facdc750 1462 unsigned char *p = pat;
a9469498 1463 if (pos + len > lim || pos_byte + len_byte > lim_byte)
facdc750
RS
1464 goto stop;
1465
1466 while (this_len > 0)
1467 {
1468 int charlen, buf_charlen;
ab228c24 1469 int pat_ch, buf_ch;
facdc750 1470
62a6e103 1471 pat_ch = STRING_CHAR_AND_LENGTH (p, charlen);
facdc750 1472 buf_ch = STRING_CHAR_AND_LENGTH (BYTE_POS_ADDR (this_pos_byte),
facdc750 1473 buf_charlen);
aff2ce94 1474 TRANSLATE (buf_ch, trt, buf_ch);
facdc750
RS
1475
1476 if (buf_ch != pat_ch)
1477 break;
ab228c24 1478
ab228c24
RS
1479 this_len--;
1480 p += charlen;
1481
1482 this_pos_byte += buf_charlen;
1483 this_pos++;
facdc750
RS
1484 }
1485
1486 if (this_len == 0)
1487 {
49e44e81 1488 match_byte = this_pos_byte - pos_byte;
facdc750 1489 pos += len;
49e44e81 1490 pos_byte += match_byte;
facdc750
RS
1491 break;
1492 }
1493
1494 INC_BOTH (pos, pos_byte);
f8bd51c4 1495 }
facdc750
RS
1496
1497 n--;
1498 }
1499 else if (lim > pos)
1500 while (n > 0)
1501 {
1502 while (1)
f8bd51c4 1503 {
facdc750 1504 /* Try matching at position POS. */
e7deaab0 1505 EMACS_INT this_pos = pos;
c098fdb8 1506 EMACS_INT this_len = len;
facdc750
RS
1507 unsigned char *p = pat;
1508
1509 if (pos + len > lim)
1510 goto stop;
1511
1512 while (this_len > 0)
1513 {
1514 int pat_ch = *p++;
1515 int buf_ch = FETCH_BYTE (this_pos);
aff2ce94 1516 TRANSLATE (buf_ch, trt, buf_ch);
facdc750
RS
1517
1518 if (buf_ch != pat_ch)
1519 break;
ab228c24
RS
1520
1521 this_len--;
1522 this_pos++;
facdc750
RS
1523 }
1524
1525 if (this_len == 0)
1526 {
49e44e81 1527 match_byte = len;
facdc750
RS
1528 pos += len;
1529 break;
1530 }
1531
1532 pos++;
f8bd51c4 1533 }
facdc750
RS
1534
1535 n--;
1536 }
1537 /* Backwards search. */
1538 else if (lim < pos && multibyte)
1539 while (n < 0)
1540 {
1541 while (1)
f8bd51c4 1542 {
facdc750 1543 /* Try matching at position POS. */
8b264ecb
AS
1544 EMACS_INT this_pos = pos;
1545 EMACS_INT this_pos_byte = pos_byte;
c098fdb8 1546 EMACS_INT this_len = len;
c525b3f2 1547 const unsigned char *p = pat + len_byte;
facdc750 1548
8b264ecb 1549 if (this_pos - len < lim || (pos_byte - len_byte) < lim_byte)
facdc750
RS
1550 goto stop;
1551
1552 while (this_len > 0)
1553 {
ab228c24 1554 int pat_ch, buf_ch;
facdc750 1555
8b264ecb
AS
1556 DEC_BOTH (this_pos, this_pos_byte);
1557 PREV_CHAR_BOUNDARY (p, pat);
1558 pat_ch = STRING_CHAR (p);
1559 buf_ch = STRING_CHAR (BYTE_POS_ADDR (this_pos_byte));
aff2ce94 1560 TRANSLATE (buf_ch, trt, buf_ch);
facdc750
RS
1561
1562 if (buf_ch != pat_ch)
1563 break;
ab228c24 1564
ab228c24 1565 this_len--;
facdc750
RS
1566 }
1567
1568 if (this_len == 0)
1569 {
8b264ecb
AS
1570 match_byte = pos_byte - this_pos_byte;
1571 pos = this_pos;
1572 pos_byte = this_pos_byte;
facdc750
RS
1573 break;
1574 }
1575
1576 DEC_BOTH (pos, pos_byte);
f8bd51c4
KH
1577 }
1578
facdc750
RS
1579 n++;
1580 }
1581 else if (lim < pos)
1582 while (n < 0)
1583 {
1584 while (1)
b6d6a51c 1585 {
facdc750 1586 /* Try matching at position POS. */
e7deaab0 1587 EMACS_INT this_pos = pos - len;
c098fdb8 1588 EMACS_INT this_len = len;
facdc750
RS
1589 unsigned char *p = pat;
1590
a9469498 1591 if (this_pos < lim)
facdc750
RS
1592 goto stop;
1593
1594 while (this_len > 0)
1595 {
1596 int pat_ch = *p++;
1597 int buf_ch = FETCH_BYTE (this_pos);
aff2ce94 1598 TRANSLATE (buf_ch, trt, buf_ch);
facdc750
RS
1599
1600 if (buf_ch != pat_ch)
1601 break;
ab228c24
RS
1602 this_len--;
1603 this_pos++;
facdc750
RS
1604 }
1605
1606 if (this_len == 0)
b6d6a51c 1607 {
49e44e81 1608 match_byte = len;
facdc750
RS
1609 pos -= len;
1610 break;
b6d6a51c 1611 }
facdc750
RS
1612
1613 pos--;
b6d6a51c 1614 }
facdc750
RS
1615
1616 n++;
b6d6a51c 1617 }
facdc750
RS
1618
1619 stop:
1620 if (n == 0)
aff2ce94 1621 {
ab228c24 1622 if (forward)
49e44e81 1623 set_search_regs ((multibyte ? pos_byte : pos) - match_byte, match_byte);
ab228c24 1624 else
49e44e81 1625 set_search_regs (multibyte ? pos_byte : pos, match_byte);
aff2ce94
RS
1626
1627 return pos;
1628 }
facdc750
RS
1629 else if (n > 0)
1630 return -n;
1631 else
1632 return n;
1633}
1634\f
0190922f 1635/* Do Boyer-Moore search N times for the string BASE_PAT,
1db5b1ad
JB
1636 whose length is LEN_BYTE,
1637 from buffer position POS_BYTE until LIM_BYTE.
facdc750
RS
1638 DIRECTION says which direction we search in.
1639 TRT and INVERSE_TRT are translation tables.
0190922f 1640 Characters in PAT are already translated by TRT.
facdc750 1641
0190922f
KH
1642 This kind of search works if all the characters in BASE_PAT that
1643 have nontrivial translation are the same aside from the last byte.
1644 This makes it possible to translate just the last byte of a
1645 character, and do so after just a simple test of the context.
b2e6b10f 1646 CHAR_BASE is nonzero if there is such a non-ASCII character.
facdc750
RS
1647
1648 If that criterion is not satisfied, do not call this function. */
1649
e7deaab0 1650static EMACS_INT
c098fdb8 1651boyer_moore (EMACS_INT n, unsigned char *base_pat,
1db5b1ad 1652 EMACS_INT len_byte,
d5a3eaaf 1653 Lisp_Object trt, Lisp_Object inverse_trt,
1db5b1ad
JB
1654 EMACS_INT pos_byte, EMACS_INT lim_byte,
1655 int char_base)
facdc750
RS
1656{
1657 int direction = ((n > 0) ? 1 : -1);
c098fdb8 1658 register EMACS_INT dirlen;
e7deaab0
AS
1659 EMACS_INT limit;
1660 int stride_for_teases = 0;
f4646fff 1661 int BM_tab[0400];
177c0ea7 1662 register unsigned char *cursor, *p_limit;
c098fdb8
EZ
1663 register EMACS_INT i;
1664 register int j;
cb6792d2 1665 unsigned char *pat, *pat_end;
4b4deea2 1666 int multibyte = ! NILP (BVAR (current_buffer, enable_multibyte_characters));
facdc750
RS
1667
1668 unsigned char simple_translate[0400];
0190922f 1669 /* These are set to the preceding bytes of a byte to be translated
49e44e81 1670 if char_base is nonzero. As the maximum byte length of a
a17ae96a 1671 multibyte character is 5, we have to check at most four previous
0190922f
KH
1672 bytes. */
1673 int translate_prev_byte1 = 0;
1674 int translate_prev_byte2 = 0;
1675 int translate_prev_byte3 = 0;
facdc750 1676
f4646fff
AS
1677 /* The general approach is that we are going to maintain that we know
1678 the first (closest to the present position, in whatever direction
1679 we're searching) character that could possibly be the last
1680 (furthest from present position) character of a valid match. We
1681 advance the state of our knowledge by looking at that character
1682 and seeing whether it indeed matches the last character of the
1683 pattern. If it does, we take a closer look. If it does not, we
1684 move our pointer (to putative last characters) as far as is
1685 logically possible. This amount of movement, which I call a
1686 stride, will be the length of the pattern if the actual character
1687 appears nowhere in the pattern, otherwise it will be the distance
1688 from the last occurrence of that character to the end of the
1689 pattern. If the amount is zero we have a possible match. */
1690
1691 /* Here we make a "mickey mouse" BM table. The stride of the search
1692 is determined only by the last character of the putative match.
1693 If that character does not match, we will stride the proper
1694 distance to propose a match that superimposes it on the last
1695 instance of a character that matches it (per trt), or misses
1696 it entirely if there is none. */
facdc750
RS
1697
1698 dirlen = len_byte * direction;
cb6792d2
RS
1699
1700 /* Record position after the end of the pattern. */
1701 pat_end = base_pat + len_byte;
1702 /* BASE_PAT points to a character that we start scanning from.
1703 It is the first character in a forward search,
1704 the last character in a backward search. */
facdc750 1705 if (direction < 0)
cb6792d2
RS
1706 base_pat = pat_end - 1;
1707
f4646fff
AS
1708 /* A character that does not appear in the pattern induces a
1709 stride equal to the pattern length. */
1710 for (i = 0; i < 0400; i++)
1711 BM_tab[i] = dirlen;
facdc750
RS
1712
1713 /* We use this for translation, instead of TRT itself.
1714 We fill this in to handle the characters that actually
1715 occur in the pattern. Others don't matter anyway! */
facdc750
RS
1716 for (i = 0; i < 0400; i++)
1717 simple_translate[i] = i;
1718
a17ae96a 1719 if (char_base)
0190922f 1720 {
a17ae96a 1721 /* Setup translate_prev_byte1/2/3/4 from CHAR_BASE. Only a
0190922f 1722 byte following them are the target of translation. */
0190922f 1723 unsigned char str[MAX_MULTIBYTE_LENGTH];
1f3561e4 1724 int cblen = CHAR_STRING (char_base, str);
0190922f 1725
1f3561e4
PE
1726 translate_prev_byte1 = str[cblen - 2];
1727 if (cblen > 2)
0190922f 1728 {
1f3561e4
PE
1729 translate_prev_byte2 = str[cblen - 3];
1730 if (cblen > 3)
1f1d9321 1731 translate_prev_byte3 = str[cblen - 4];
0190922f
KH
1732 }
1733 }
1734
facdc750 1735 i = 0;
f4646fff 1736 while (i != dirlen)
facdc750 1737 {
cb6792d2 1738 unsigned char *ptr = base_pat + i;
facdc750 1739 i += direction;
facdc750 1740 if (! NILP (trt))
ca1d1d23 1741 {
49e44e81
KH
1742 /* If the byte currently looking at is the last of a
1743 character to check case-equivalents, set CH to that
1744 character. An ASCII character and a non-ASCII character
1745 matching with CHAR_BASE are to be checked. */
a17ae96a
KH
1746 int ch = -1;
1747
1748 if (ASCII_BYTE_P (*ptr) || ! multibyte)
1749 ch = *ptr;
49e44e81 1750 else if (char_base
86dc6ccb 1751 && ((pat_end - ptr) == 1 || CHAR_HEAD_P (ptr[1])))
ca1d1d23 1752 {
49e44e81
KH
1753 unsigned char *charstart = ptr - 1;
1754
1755 while (! (CHAR_HEAD_P (*charstart)))
1756 charstart--;
62a6e103 1757 ch = STRING_CHAR (charstart);
a17ae96a
KH
1758 if (char_base != (ch & ~0x3F))
1759 ch = -1;
ca1d1d23 1760 }
facdc750 1761
edb7b4dc 1762 if (ch >= 0200 && multibyte)
49e44e81
KH
1763 j = (ch & 0x3F) | 0200;
1764 else
1765 j = *ptr;
1766
f4646fff 1767 if (i == dirlen)
facdc750 1768 stride_for_teases = BM_tab[j];
ab228c24 1769
facdc750 1770 BM_tab[j] = dirlen - i;
1db5b1ad
JB
1771 /* A translation table is accompanied by its inverse -- see
1772 comment following downcase_table for details. */
a17ae96a 1773 if (ch >= 0)
ab228c24
RS
1774 {
1775 int starting_ch = ch;
49e44e81 1776 int starting_j = j;
a17ae96a 1777
ab228c24
RS
1778 while (1)
1779 {
1780 TRANSLATE (ch, inverse_trt, ch);
edb7b4dc 1781 if (ch >= 0200 && multibyte)
49e44e81 1782 j = (ch & 0x3F) | 0200;
ab228c24 1783 else
a17ae96a 1784 j = ch;
ab228c24
RS
1785
1786 /* For all the characters that map into CH,
1787 set up simple_translate to map the last byte
1788 into STARTING_J. */
1789 simple_translate[j] = starting_j;
1790 if (ch == starting_ch)
1791 break;
1792 BM_tab[j] = dirlen - i;
1793 }
1794 }
facdc750
RS
1795 }
1796 else
1797 {
1798 j = *ptr;
1799
f4646fff 1800 if (i == dirlen)
facdc750
RS
1801 stride_for_teases = BM_tab[j];
1802 BM_tab[j] = dirlen - i;
ca1d1d23 1803 }
f4646fff
AS
1804 /* stride_for_teases tells how much to stride if we get a
1805 match on the far character but are subsequently
1806 disappointed, by recording what the stride would have been
1807 for that character if the last character had been
1808 different. */
facdc750 1809 }
facdc750
RS
1810 pos_byte += dirlen - ((direction > 0) ? direction : 0);
1811 /* loop invariant - POS_BYTE points at where last char (first
1812 char if reverse) of pattern would align in a possible match. */
1813 while (n != 0)
1814 {
e7deaab0 1815 EMACS_INT tail_end;
facdc750
RS
1816 unsigned char *tail_end_ptr;
1817
1818 /* It's been reported that some (broken) compiler thinks that
1819 Boolean expressions in an arithmetic context are unsigned.
1820 Using an explicit ?1:0 prevents this. */
1821 if ((lim_byte - pos_byte - ((direction > 0) ? 1 : 0)) * direction
1822 < 0)
1823 return (n * (0 - direction));
1824 /* First we do the part we can by pointers (maybe nothing) */
1825 QUIT;
1826 pat = base_pat;
1827 limit = pos_byte - dirlen + direction;
67ce527d
KH
1828 if (direction > 0)
1829 {
1830 limit = BUFFER_CEILING_OF (limit);
1831 /* LIMIT is now the last (not beyond-last!) value POS_BYTE
1832 can take on without hitting edge of buffer or the gap. */
1833 limit = min (limit, pos_byte + 20000);
1834 limit = min (limit, lim_byte - 1);
1835 }
1836 else
1837 {
1838 limit = BUFFER_FLOOR_OF (limit);
1839 /* LIMIT is now the last (not beyond-last!) value POS_BYTE
1840 can take on without hitting edge of buffer or the gap. */
1841 limit = max (limit, pos_byte - 20000);
1842 limit = max (limit, lim_byte);
1843 }
facdc750
RS
1844 tail_end = BUFFER_CEILING_OF (pos_byte) + 1;
1845 tail_end_ptr = BYTE_POS_ADDR (tail_end);
1846
1847 if ((limit - pos_byte) * direction > 20)
ca1d1d23 1848 {
facdc750
RS
1849 unsigned char *p2;
1850
1851 p_limit = BYTE_POS_ADDR (limit);
1852 p2 = (cursor = BYTE_POS_ADDR (pos_byte));
f4646fff 1853 /* In this loop, pos + cursor - p2 is the surrogate for pos. */
facdc750 1854 while (1) /* use one cursor setting as long as i can */
ca1d1d23 1855 {
facdc750 1856 if (direction > 0) /* worth duplicating */
ca1d1d23 1857 {
f4646fff
AS
1858 while (cursor <= p_limit)
1859 {
1860 if (BM_tab[*cursor] == 0)
1861 goto hit;
facdc750 1862 cursor += BM_tab[*cursor];
f4646fff 1863 }
facdc750
RS
1864 }
1865 else
1866 {
f4646fff
AS
1867 while (cursor >= p_limit)
1868 {
1869 if (BM_tab[*cursor] == 0)
1870 goto hit;
facdc750 1871 cursor += BM_tab[*cursor];
f4646fff 1872 }
facdc750 1873 }
f4646fff
AS
1874 /* If you are here, cursor is beyond the end of the
1875 searched region. You fail to match within the
1876 permitted region and would otherwise try a character
1877 beyond that region. */
1878 break;
1879
1880 hit:
facdc750
RS
1881 i = dirlen - direction;
1882 if (! NILP (trt))
1883 {
1884 while ((i -= direction) + direction != 0)
ca1d1d23 1885 {
facdc750
RS
1886 int ch;
1887 cursor -= direction;
1888 /* Translate only the last byte of a character. */
1889 if (! multibyte
1890 || ((cursor == tail_end_ptr
1891 || CHAR_HEAD_P (cursor[1]))
1892 && (CHAR_HEAD_P (cursor[0])
0190922f 1893 /* Check if this is the last byte of
cd1181db 1894 a translatable character. */
0190922f
KH
1895 || (translate_prev_byte1 == cursor[-1]
1896 && (CHAR_HEAD_P (translate_prev_byte1)
1897 || (translate_prev_byte2 == cursor[-2]
1898 && (CHAR_HEAD_P (translate_prev_byte2)
1899 || (translate_prev_byte3 == cursor[-3]))))))))
facdc750
RS
1900 ch = simple_translate[*cursor];
1901 else
1902 ch = *cursor;
1903 if (pat[i] != ch)
1904 break;
ca1d1d23 1905 }
facdc750
RS
1906 }
1907 else
1908 {
1909 while ((i -= direction) + direction != 0)
ca1d1d23 1910 {
facdc750
RS
1911 cursor -= direction;
1912 if (pat[i] != *cursor)
1913 break;
ca1d1d23 1914 }
facdc750
RS
1915 }
1916 cursor += dirlen - i - direction; /* fix cursor */
1917 if (i + direction == 0)
1918 {
e7deaab0 1919 EMACS_INT position, start, end;
0c8533c6 1920
facdc750 1921 cursor -= direction;
1113d9db 1922
facdc750
RS
1923 position = pos_byte + cursor - p2 + ((direction > 0)
1924 ? 1 - len_byte : 0);
1925 set_search_regs (position, len_byte);
ca325161 1926
ef887810
RS
1927 if (NILP (Vinhibit_changing_match_data))
1928 {
1929 start = search_regs.start[0];
1930 end = search_regs.end[0];
1931 }
1932 else
1933 /* If Vinhibit_changing_match_data is non-nil,
1934 search_regs will not be changed. So let's
1935 compute start and end here. */
1936 {
1937 start = BYTE_TO_CHAR (position);
1938 end = BYTE_TO_CHAR (position + len_byte);
1939 }
1940
facdc750
RS
1941 if ((n -= direction) != 0)
1942 cursor += dirlen; /* to resume search */
ca1d1d23 1943 else
ef887810 1944 return direction > 0 ? end : start;
ca1d1d23 1945 }
facdc750
RS
1946 else
1947 cursor += stride_for_teases; /* <sigh> we lose - */
ca1d1d23 1948 }
facdc750
RS
1949 pos_byte += cursor - p2;
1950 }
1951 else
f4646fff
AS
1952 /* Now we'll pick up a clump that has to be done the hard
1953 way because it covers a discontinuity. */
facdc750
RS
1954 {
1955 limit = ((direction > 0)
1956 ? BUFFER_CEILING_OF (pos_byte - dirlen + 1)
1957 : BUFFER_FLOOR_OF (pos_byte - dirlen - 1));
1958 limit = ((direction > 0)
1959 ? min (limit + len_byte, lim_byte - 1)
1960 : max (limit - len_byte, lim_byte));
1961 /* LIMIT is now the last value POS_BYTE can have
1962 and still be valid for a possible match. */
1963 while (1)
ca1d1d23 1964 {
f4646fff
AS
1965 /* This loop can be coded for space rather than
1966 speed because it will usually run only once.
1967 (the reach is at most len + 21, and typically
1968 does not exceed len). */
facdc750 1969 while ((limit - pos_byte) * direction >= 0)
f4646fff
AS
1970 {
1971 int ch = FETCH_BYTE (pos_byte);
1972 if (BM_tab[ch] == 0)
1973 goto hit2;
1974 pos_byte += BM_tab[ch];
1975 }
1976 break; /* ran off the end */
1977
1978 hit2:
1979 /* Found what might be a match. */
facdc750
RS
1980 i = dirlen - direction;
1981 while ((i -= direction) + direction != 0)
ca1d1d23 1982 {
facdc750
RS
1983 int ch;
1984 unsigned char *ptr;
1985 pos_byte -= direction;
1986 ptr = BYTE_POS_ADDR (pos_byte);
1987 /* Translate only the last byte of a character. */
1988 if (! multibyte
1989 || ((ptr == tail_end_ptr
1990 || CHAR_HEAD_P (ptr[1]))
1991 && (CHAR_HEAD_P (ptr[0])
0190922f 1992 /* Check if this is the last byte of a
cd1181db 1993 translatable character. */
0190922f
KH
1994 || (translate_prev_byte1 == ptr[-1]
1995 && (CHAR_HEAD_P (translate_prev_byte1)
1996 || (translate_prev_byte2 == ptr[-2]
1997 && (CHAR_HEAD_P (translate_prev_byte2)
1998 || translate_prev_byte3 == ptr[-3])))))))
facdc750
RS
1999 ch = simple_translate[*ptr];
2000 else
2001 ch = *ptr;
2002 if (pat[i] != ch)
2003 break;
2004 }
2005 /* Above loop has moved POS_BYTE part or all the way
2006 back to the first pos (last pos if reverse).
2007 Set it once again at the last (first if reverse) char. */
f4646fff 2008 pos_byte += dirlen - i - direction;
facdc750
RS
2009 if (i + direction == 0)
2010 {
e7deaab0 2011 EMACS_INT position, start, end;
facdc750 2012 pos_byte -= direction;
1113d9db 2013
facdc750 2014 position = pos_byte + ((direction > 0) ? 1 - len_byte : 0);
facdc750 2015 set_search_regs (position, len_byte);
ca325161 2016
ef887810
RS
2017 if (NILP (Vinhibit_changing_match_data))
2018 {
2019 start = search_regs.start[0];
2020 end = search_regs.end[0];
2021 }
2022 else
2023 /* If Vinhibit_changing_match_data is non-nil,
2024 search_regs will not be changed. So let's
2025 compute start and end here. */
2026 {
2027 start = BYTE_TO_CHAR (position);
2028 end = BYTE_TO_CHAR (position + len_byte);
2029 }
2030
facdc750
RS
2031 if ((n -= direction) != 0)
2032 pos_byte += dirlen; /* to resume search */
ca1d1d23 2033 else
ef887810 2034 return direction > 0 ? end : start;
ca1d1d23 2035 }
facdc750
RS
2036 else
2037 pos_byte += stride_for_teases;
2038 }
2039 }
2040 /* We have done one clump. Can we continue? */
2041 if ((lim_byte - pos_byte) * direction < 0)
2042 return ((0 - n) * direction);
ca1d1d23 2043 }
facdc750 2044 return BYTE_TO_CHAR (pos_byte);
ca1d1d23 2045}
ca325161 2046
fa8ed3e0 2047/* Record beginning BEG_BYTE and end BEG_BYTE + NBYTES
a7e4cdde
RS
2048 for the overall match just found in the current buffer.
2049 Also clear out the match data for registers 1 and up. */
ca325161
RS
2050
2051static void
971de7fb 2052set_search_regs (EMACS_INT beg_byte, EMACS_INT nbytes)
ca325161 2053{
a7e4cdde
RS
2054 int i;
2055
ef887810
RS
2056 if (!NILP (Vinhibit_changing_match_data))
2057 return;
2058
ca325161
RS
2059 /* Make sure we have registers in which to store
2060 the match position. */
2061 if (search_regs.num_regs == 0)
2062 {
2d4a771a
RS
2063 search_regs.start = (regoff_t *) xmalloc (2 * sizeof (regoff_t));
2064 search_regs.end = (regoff_t *) xmalloc (2 * sizeof (regoff_t));
487282dc 2065 search_regs.num_regs = 2;
ca325161
RS
2066 }
2067
a7e4cdde
RS
2068 /* Clear out the other registers. */
2069 for (i = 1; i < search_regs.num_regs; i++)
2070 {
2071 search_regs.start[i] = -1;
2072 search_regs.end[i] = -1;
2073 }
2074
fa8ed3e0
RS
2075 search_regs.start[0] = BYTE_TO_CHAR (beg_byte);
2076 search_regs.end[0] = BYTE_TO_CHAR (beg_byte + nbytes);
a3668d92 2077 XSETBUFFER (last_thing_searched, current_buffer);
ca325161 2078}
ca1d1d23 2079\f
02b16839
JL
2080DEFUN ("word-search-regexp", Fword_search_regexp, Sword_search_regexp, 1, 2, 0,
2081 doc: /* Return a regexp which matches words, ignoring punctuation.
2082Given STRING, a string of words separated by word delimiters,
2083compute a regexp that matches those exact words separated by
2084arbitrary punctuation. If LAX is non-nil, the end of the string
2085need not match a word boundary unless it ends in whitespace.
2086
2087Used in `word-search-forward', `word-search-backward',
2088`word-search-forward-lax', `word-search-backward-lax'. */)
2089 (Lisp_Object string, Lisp_Object lax)
ca1d1d23 2090{
1f1d9321 2091 register unsigned char *o;
c098fdb8 2092 register EMACS_INT i, i_byte, len, punct_count = 0, word_count = 0;
ca1d1d23 2093 Lisp_Object val;
0c8533c6 2094 int prev_c = 0;
c098fdb8
EZ
2095 EMACS_INT adjust;
2096 int whitespace_at_end;
ca1d1d23 2097
b7826503 2098 CHECK_STRING (string);
d5db4077 2099 len = SCHARS (string);
ca1d1d23 2100
0c8533c6
RS
2101 for (i = 0, i_byte = 0; i < len; )
2102 {
2103 int c;
177c0ea7 2104
93daa011 2105 FETCH_STRING_CHAR_AS_MULTIBYTE_ADVANCE (c, string, i, i_byte);
0c8533c6
RS
2106
2107 if (SYNTAX (c) != Sword)
2108 {
2109 punct_count++;
6f076cc7 2110 if (SYNTAX (prev_c) == Sword)
0c8533c6
RS
2111 word_count++;
2112 }
ca1d1d23 2113
0c8533c6
RS
2114 prev_c = c;
2115 }
2116
2117 if (SYNTAX (prev_c) == Sword)
c6e0e1cc
CY
2118 {
2119 word_count++;
2120 whitespace_at_end = 0;
2121 }
2122 else
6f076cc7
PE
2123 {
2124 whitespace_at_end = 1;
2125 if (!word_count)
2126 return empty_unibyte_string;
2127 }
0c8533c6 2128
c6e0e1cc 2129 adjust = - punct_count + 5 * (word_count - 1)
02b16839 2130 + ((!NILP (lax) && !whitespace_at_end) ? 2 : 4);
8a2df937
RS
2131 if (STRING_MULTIBYTE (string))
2132 val = make_uninit_multibyte_string (len + adjust,
d5db4077 2133 SBYTES (string)
8a2df937
RS
2134 + adjust);
2135 else
2136 val = make_uninit_string (len + adjust);
ca1d1d23 2137
d5db4077 2138 o = SDATA (val);
ca1d1d23
JB
2139 *o++ = '\\';
2140 *o++ = 'b';
1e9582d4 2141 prev_c = 0;
ca1d1d23 2142
1e9582d4
RS
2143 for (i = 0, i_byte = 0; i < len; )
2144 {
2145 int c;
c098fdb8 2146 EMACS_INT i_byte_orig = i_byte;
177c0ea7 2147
93daa011 2148 FETCH_STRING_CHAR_AS_MULTIBYTE_ADVANCE (c, string, i, i_byte);
1e9582d4
RS
2149
2150 if (SYNTAX (c) == Sword)
2151 {
72af86bd 2152 memcpy (o, SDATA (string) + i_byte_orig, i_byte - i_byte_orig);
1e9582d4
RS
2153 o += i_byte - i_byte_orig;
2154 }
6f076cc7 2155 else if (SYNTAX (prev_c) == Sword && --word_count)
1e9582d4
RS
2156 {
2157 *o++ = '\\';
2158 *o++ = 'W';
2159 *o++ = '\\';
2160 *o++ = 'W';
2161 *o++ = '*';
2162 }
2163
2164 prev_c = c;
2165 }
ca1d1d23 2166
02b16839 2167 if (NILP (lax) || whitespace_at_end)
c6e0e1cc
CY
2168 {
2169 *o++ = '\\';
2170 *o++ = 'b';
2171 }
ca1d1d23
JB
2172
2173 return val;
2174}
2175\f
2176DEFUN ("search-backward", Fsearch_backward, Ssearch_backward, 1, 4,
8c1a1077
PJ
2177 "MSearch backward: ",
2178 doc: /* Search backward from point for STRING.
2179Set point to the beginning of the occurrence found, and return point.
2180An optional second argument bounds the search; it is a buffer position.
2181The match found must not extend before that position.
2182Optional third argument, if t, means if fail just return nil (no error).
2183 If not nil and not t, position at limit of search and return nil.
acc28cb9
CY
2184Optional fourth argument COUNT, if non-nil, means to search for COUNT
2185 successive occurrences. If COUNT is negative, search forward,
2186 instead of backward, for -COUNT occurrences.
8c1a1077
PJ
2187
2188Search case-sensitivity is determined by the value of the variable
2189`case-fold-search', which see.
2190
2191See also the functions `match-beginning', `match-end' and `replace-match'. */)
5842a27b 2192 (Lisp_Object string, Lisp_Object bound, Lisp_Object noerror, Lisp_Object count)
ca1d1d23 2193{
b819a390 2194 return search_command (string, bound, noerror, count, -1, 0, 0);
ca1d1d23
JB
2195}
2196
6af43974 2197DEFUN ("search-forward", Fsearch_forward, Ssearch_forward, 1, 4, "MSearch: ",
8c1a1077
PJ
2198 doc: /* Search forward from point for STRING.
2199Set point to the end of the occurrence found, and return point.
2200An optional second argument bounds the search; it is a buffer position.
d6aacdcd 2201The match found must not extend after that position. A value of nil is
48740f01 2202 equivalent to (point-max).
8c1a1077
PJ
2203Optional third argument, if t, means if fail just return nil (no error).
2204 If not nil and not t, move to limit of search and return nil.
acc28cb9
CY
2205Optional fourth argument COUNT, if non-nil, means to search for COUNT
2206 successive occurrences. If COUNT is negative, search backward,
2207 instead of forward, for -COUNT occurrences.
8c1a1077
PJ
2208
2209Search case-sensitivity is determined by the value of the variable
2210`case-fold-search', which see.
2211
2212See also the functions `match-beginning', `match-end' and `replace-match'. */)
5842a27b 2213 (Lisp_Object string, Lisp_Object bound, Lisp_Object noerror, Lisp_Object count)
ca1d1d23 2214{
b819a390 2215 return search_command (string, bound, noerror, count, 1, 0, 0);
ca1d1d23
JB
2216}
2217
2218DEFUN ("word-search-backward", Fword_search_backward, Sword_search_backward, 1, 4,
8c1a1077
PJ
2219 "sWord search backward: ",
2220 doc: /* Search backward from point for STRING, ignoring differences in punctuation.
2221Set point to the beginning of the occurrence found, and return point.
2222An optional second argument bounds the search; it is a buffer position.
2223The match found must not extend before that position.
2224Optional third argument, if t, means if fail just return nil (no error).
2225 If not nil and not t, move to limit of search and return nil.
02b16839
JL
2226Optional fourth argument is repeat count--search for successive occurrences.
2227
2228Relies on the function `word-search-regexp' to convert a sequence
2229of words in STRING to a regexp used to search words without regard
2230to punctuation. */)
5842a27b 2231 (Lisp_Object string, Lisp_Object bound, Lisp_Object noerror, Lisp_Object count)
ca1d1d23 2232{
02b16839 2233 return search_command (Fword_search_regexp (string, Qnil), bound, noerror, count, -1, 1, 0);
ca1d1d23
JB
2234}
2235
2236DEFUN ("word-search-forward", Fword_search_forward, Sword_search_forward, 1, 4,
8c1a1077
PJ
2237 "sWord search: ",
2238 doc: /* Search forward from point for STRING, ignoring differences in punctuation.
2239Set point to the end of the occurrence found, and return point.
2240An optional second argument bounds the search; it is a buffer position.
2241The match found must not extend after that position.
2242Optional third argument, if t, means if fail just return nil (no error).
2243 If not nil and not t, move to limit of search and return nil.
02b16839
JL
2244Optional fourth argument is repeat count--search for successive occurrences.
2245
2246Relies on the function `word-search-regexp' to convert a sequence
2247of words in STRING to a regexp used to search words without regard
2248to punctuation. */)
5842a27b 2249 (Lisp_Object string, Lisp_Object bound, Lisp_Object noerror, Lisp_Object count)
ca1d1d23 2250{
02b16839 2251 return search_command (Fword_search_regexp (string, Qnil), bound, noerror, count, 1, 1, 0);
c6e0e1cc
CY
2252}
2253
2254DEFUN ("word-search-backward-lax", Fword_search_backward_lax, Sword_search_backward_lax, 1, 4,
2255 "sWord search backward: ",
2256 doc: /* Search backward from point for STRING, ignoring differences in punctuation.
2257Set point to the beginning of the occurrence found, and return point.
2258
2259Unlike `word-search-backward', the end of STRING need not match a word
2260boundary unless it ends in whitespace.
2261
2262An optional second argument bounds the search; it is a buffer position.
2263The match found must not extend before that position.
2264Optional third argument, if t, means if fail just return nil (no error).
2265 If not nil and not t, move to limit of search and return nil.
02b16839
JL
2266Optional fourth argument is repeat count--search for successive occurrences.
2267
2268Relies on the function `word-search-regexp' to convert a sequence
2269of words in STRING to a regexp used to search words without regard
2270to punctuation. */)
5842a27b 2271 (Lisp_Object string, Lisp_Object bound, Lisp_Object noerror, Lisp_Object count)
c6e0e1cc 2272{
02b16839 2273 return search_command (Fword_search_regexp (string, Qt), bound, noerror, count, -1, 1, 0);
c6e0e1cc
CY
2274}
2275
2276DEFUN ("word-search-forward-lax", Fword_search_forward_lax, Sword_search_forward_lax, 1, 4,
2277 "sWord search: ",
2278 doc: /* Search forward from point for STRING, ignoring differences in punctuation.
2279Set point to the end of the occurrence found, and return point.
2280
2281Unlike `word-search-forward', the end of STRING need not match a word
2282boundary unless it ends in whitespace.
2283
2284An optional second argument bounds the search; it is a buffer position.
2285The match found must not extend after that position.
2286Optional third argument, if t, means if fail just return nil (no error).
2287 If not nil and not t, move to limit of search and return nil.
02b16839
JL
2288Optional fourth argument is repeat count--search for successive occurrences.
2289
2290Relies on the function `word-search-regexp' to convert a sequence
2291of words in STRING to a regexp used to search words without regard
2292to punctuation. */)
5842a27b 2293 (Lisp_Object string, Lisp_Object bound, Lisp_Object noerror, Lisp_Object count)
c6e0e1cc 2294{
02b16839 2295 return search_command (Fword_search_regexp (string, Qt), bound, noerror, count, 1, 1, 0);
ca1d1d23
JB
2296}
2297
2298DEFUN ("re-search-backward", Fre_search_backward, Sre_search_backward, 1, 4,
8c1a1077
PJ
2299 "sRE search backward: ",
2300 doc: /* Search backward from point for match for regular expression REGEXP.
2301Set point to the beginning of the match, and return point.
2302The match found is the one starting last in the buffer
2303and yet ending before the origin of the search.
2304An optional second argument bounds the search; it is a buffer position.
2305The match found must start at or after that position.
2306Optional third argument, if t, means if fail just return nil (no error).
2307 If not nil and not t, move to limit of search and return nil.
2308Optional fourth argument is repeat count--search for successive occurrences.
49080b10
LMI
2309
2310Search case-sensitivity is determined by the value of the variable
2311`case-fold-search', which see.
2312
8c1a1077
PJ
2313See also the functions `match-beginning', `match-end', `match-string',
2314and `replace-match'. */)
5842a27b 2315 (Lisp_Object regexp, Lisp_Object bound, Lisp_Object noerror, Lisp_Object count)
ca1d1d23 2316{
b819a390 2317 return search_command (regexp, bound, noerror, count, -1, 1, 0);
ca1d1d23
JB
2318}
2319
2320DEFUN ("re-search-forward", Fre_search_forward, Sre_search_forward, 1, 4,
8c1a1077
PJ
2321 "sRE search: ",
2322 doc: /* Search forward from point for regular expression REGEXP.
2323Set point to the end of the occurrence found, and return point.
2324An optional second argument bounds the search; it is a buffer position.
2325The match found must not extend after that position.
2326Optional third argument, if t, means if fail just return nil (no error).
2327 If not nil and not t, move to limit of search and return nil.
2328Optional fourth argument is repeat count--search for successive occurrences.
49080b10
LMI
2329
2330Search case-sensitivity is determined by the value of the variable
2331`case-fold-search', which see.
2332
8c1a1077
PJ
2333See also the functions `match-beginning', `match-end', `match-string',
2334and `replace-match'. */)
5842a27b 2335 (Lisp_Object regexp, Lisp_Object bound, Lisp_Object noerror, Lisp_Object count)
ca1d1d23 2336{
b819a390
RS
2337 return search_command (regexp, bound, noerror, count, 1, 1, 0);
2338}
2339
2340DEFUN ("posix-search-backward", Fposix_search_backward, Sposix_search_backward, 1, 4,
8c1a1077
PJ
2341 "sPosix search backward: ",
2342 doc: /* Search backward from point for match for regular expression REGEXP.
2343Find the longest match in accord with Posix regular expression rules.
2344Set point to the beginning of the match, and return point.
2345The match found is the one starting last in the buffer
2346and yet ending before the origin of the search.
2347An optional second argument bounds the search; it is a buffer position.
2348The match found must start at or after that position.
2349Optional third argument, if t, means if fail just return nil (no error).
2350 If not nil and not t, move to limit of search and return nil.
2351Optional fourth argument is repeat count--search for successive occurrences.
49080b10
LMI
2352
2353Search case-sensitivity is determined by the value of the variable
2354`case-fold-search', which see.
2355
8c1a1077
PJ
2356See also the functions `match-beginning', `match-end', `match-string',
2357and `replace-match'. */)
5842a27b 2358 (Lisp_Object regexp, Lisp_Object bound, Lisp_Object noerror, Lisp_Object count)
b819a390
RS
2359{
2360 return search_command (regexp, bound, noerror, count, -1, 1, 1);
2361}
2362
2363DEFUN ("posix-search-forward", Fposix_search_forward, Sposix_search_forward, 1, 4,
8c1a1077
PJ
2364 "sPosix search: ",
2365 doc: /* Search forward from point for regular expression REGEXP.
2366Find the longest match in accord with Posix regular expression rules.
2367Set point to the end of the occurrence found, and return point.
2368An optional second argument bounds the search; it is a buffer position.
2369The match found must not extend after that position.
2370Optional third argument, if t, means if fail just return nil (no error).
2371 If not nil and not t, move to limit of search and return nil.
2372Optional fourth argument is repeat count--search for successive occurrences.
49080b10
LMI
2373
2374Search case-sensitivity is determined by the value of the variable
2375`case-fold-search', which see.
2376
8c1a1077
PJ
2377See also the functions `match-beginning', `match-end', `match-string',
2378and `replace-match'. */)
5842a27b 2379 (Lisp_Object regexp, Lisp_Object bound, Lisp_Object noerror, Lisp_Object count)
b819a390
RS
2380{
2381 return search_command (regexp, bound, noerror, count, 1, 1, 1);
ca1d1d23
JB
2382}
2383\f
d7a5ad5f 2384DEFUN ("replace-match", Freplace_match, Sreplace_match, 1, 5, 0,
8c1a1077 2385 doc: /* Replace text matched by last search with NEWTEXT.
4dd0c271
RS
2386Leave point at the end of the replacement text.
2387
8c1a1077
PJ
2388If second arg FIXEDCASE is non-nil, do not alter case of replacement text.
2389Otherwise maybe capitalize the whole text, or maybe just word initials,
2390based on the replaced text.
2391If the replaced text has only capital letters
2392and has at least one multiletter word, convert NEWTEXT to all caps.
4dd0c271
RS
2393Otherwise if all words are capitalized in the replaced text,
2394capitalize each word in NEWTEXT.
8c1a1077
PJ
2395
2396If third arg LITERAL is non-nil, insert NEWTEXT literally.
2397Otherwise treat `\\' as special:
2398 `\\&' in NEWTEXT means substitute original matched text.
2399 `\\N' means substitute what matched the Nth `\\(...\\)'.
2400 If Nth parens didn't match, substitute nothing.
2401 `\\\\' means insert one `\\'.
4dd0c271
RS
2402Case conversion does not apply to these substitutions.
2403
8c1a1077 2404FIXEDCASE and LITERAL are optional arguments.
8c1a1077
PJ
2405
2406The optional fourth argument STRING can be a string to modify.
2407This is meaningful when the previous match was done against STRING,
2408using `string-match'. When used this way, `replace-match'
2409creates and returns a new string made by copying STRING and replacing
2410the part of STRING that was matched.
2411
2412The optional fifth argument SUBEXP specifies a subexpression;
2413it says to replace just that subexpression with NEWTEXT,
2414rather than replacing the entire matched text.
2415This is, in a vague sense, the inverse of using `\\N' in NEWTEXT;
2416`\\N' copies subexp N into NEWTEXT, but using N as SUBEXP puts
2417NEWTEXT in place of subexp N.
2418This is useful only after a regular expression search or match,
2419since only regular expressions have distinguished subexpressions. */)
5842a27b 2420 (Lisp_Object newtext, Lisp_Object fixedcase, Lisp_Object literal, Lisp_Object string, Lisp_Object subexp)
ca1d1d23
JB
2421{
2422 enum { nochange, all_caps, cap_initial } case_action;
c098fdb8 2423 register EMACS_INT pos, pos_byte;
ca1d1d23 2424 int some_multiletter_word;
97832bd0 2425 int some_lowercase;
73dc8771 2426 int some_uppercase;
208767c3 2427 int some_nonuppercase_initial;
ca1d1d23 2428 register int c, prevc;
a0efffc8 2429 ptrdiff_t sub;
e7deaab0 2430 EMACS_INT opoint, newpoint;
ca1d1d23 2431
b7826503 2432 CHECK_STRING (newtext);
ca1d1d23 2433
080c45fd 2434 if (! NILP (string))
b7826503 2435 CHECK_STRING (string);
080c45fd 2436
ca1d1d23
JB
2437 case_action = nochange; /* We tried an initialization */
2438 /* but some C compilers blew it */
4746118a
JB
2439
2440 if (search_regs.num_regs <= 0)
d72cdbfc 2441 error ("`replace-match' called before any match found");
4746118a 2442
d7a5ad5f
RS
2443 if (NILP (subexp))
2444 sub = 0;
2445 else
2446 {
b7826503 2447 CHECK_NUMBER (subexp);
a0efffc8 2448 if (! (0 <= XINT (subexp) && XINT (subexp) < search_regs.num_regs))
d7a5ad5f 2449 args_out_of_range (subexp, make_number (search_regs.num_regs));
a0efffc8 2450 sub = XINT (subexp);
d7a5ad5f
RS
2451 }
2452
080c45fd
RS
2453 if (NILP (string))
2454 {
d7a5ad5f
RS
2455 if (search_regs.start[sub] < BEGV
2456 || search_regs.start[sub] > search_regs.end[sub]
2457 || search_regs.end[sub] > ZV)
2458 args_out_of_range (make_number (search_regs.start[sub]),
2459 make_number (search_regs.end[sub]));
080c45fd
RS
2460 }
2461 else
2462 {
d7a5ad5f
RS
2463 if (search_regs.start[sub] < 0
2464 || search_regs.start[sub] > search_regs.end[sub]
d5db4077 2465 || search_regs.end[sub] > SCHARS (string))
d7a5ad5f
RS
2466 args_out_of_range (make_number (search_regs.start[sub]),
2467 make_number (search_regs.end[sub]));
080c45fd 2468 }
ca1d1d23
JB
2469
2470 if (NILP (fixedcase))
2471 {
2472 /* Decide how to casify by examining the matched text. */
e7deaab0 2473 EMACS_INT last;
ca1d1d23 2474
ac3b28b1
KH
2475 pos = search_regs.start[sub];
2476 last = search_regs.end[sub];
fa8ed3e0
RS
2477
2478 if (NILP (string))
ac3b28b1 2479 pos_byte = CHAR_TO_BYTE (pos);
fa8ed3e0 2480 else
ac3b28b1 2481 pos_byte = string_char_to_byte (string, pos);
fa8ed3e0 2482
ca1d1d23
JB
2483 prevc = '\n';
2484 case_action = all_caps;
2485
2486 /* some_multiletter_word is set nonzero if any original word
2487 is more than one letter long. */
2488 some_multiletter_word = 0;
97832bd0 2489 some_lowercase = 0;
208767c3 2490 some_nonuppercase_initial = 0;
73dc8771 2491 some_uppercase = 0;
ca1d1d23 2492
ac3b28b1 2493 while (pos < last)
ca1d1d23 2494 {
080c45fd 2495 if (NILP (string))
ac3b28b1 2496 {
93daa011 2497 c = FETCH_CHAR_AS_MULTIBYTE (pos_byte);
ac3b28b1
KH
2498 INC_BOTH (pos, pos_byte);
2499 }
080c45fd 2500 else
93daa011 2501 FETCH_STRING_CHAR_AS_MULTIBYTE_ADVANCE (c, string, pos, pos_byte);
080c45fd 2502
5da9919f 2503 if (lowercasep (c))
ca1d1d23
JB
2504 {
2505 /* Cannot be all caps if any original char is lower case */
2506
97832bd0 2507 some_lowercase = 1;
ca1d1d23 2508 if (SYNTAX (prevc) != Sword)
208767c3 2509 some_nonuppercase_initial = 1;
ca1d1d23
JB
2510 else
2511 some_multiletter_word = 1;
2512 }
5da9919f 2513 else if (uppercasep (c))
ca1d1d23 2514 {
73dc8771 2515 some_uppercase = 1;
97832bd0 2516 if (SYNTAX (prevc) != Sword)
c4d460ce 2517 ;
97832bd0 2518 else
ca1d1d23
JB
2519 some_multiletter_word = 1;
2520 }
208767c3
RS
2521 else
2522 {
2523 /* If the initial is a caseless word constituent,
2524 treat that like a lowercase initial. */
2525 if (SYNTAX (prevc) != Sword)
2526 some_nonuppercase_initial = 1;
2527 }
ca1d1d23
JB
2528
2529 prevc = c;
2530 }
2531
97832bd0
RS
2532 /* Convert to all caps if the old text is all caps
2533 and has at least one multiletter word. */
2534 if (! some_lowercase && some_multiletter_word)
2535 case_action = all_caps;
c4d460ce 2536 /* Capitalize each word, if the old text has all capitalized words. */
208767c3 2537 else if (!some_nonuppercase_initial && some_multiletter_word)
ca1d1d23 2538 case_action = cap_initial;
208767c3 2539 else if (!some_nonuppercase_initial && some_uppercase)
73dc8771
KH
2540 /* Should x -> yz, operating on X, give Yz or YZ?
2541 We'll assume the latter. */
2542 case_action = all_caps;
97832bd0
RS
2543 else
2544 case_action = nochange;
ca1d1d23
JB
2545 }
2546
080c45fd
RS
2547 /* Do replacement in a string. */
2548 if (!NILP (string))
2549 {
2550 Lisp_Object before, after;
2551
2552 before = Fsubstring (string, make_number (0),
d7a5ad5f
RS
2553 make_number (search_regs.start[sub]));
2554 after = Fsubstring (string, make_number (search_regs.end[sub]), Qnil);
080c45fd 2555
636a5e28
RS
2556 /* Substitute parts of the match into NEWTEXT
2557 if desired. */
080c45fd
RS
2558 if (NILP (literal))
2559 {
e7deaab0
AS
2560 EMACS_INT lastpos = 0;
2561 EMACS_INT lastpos_byte = 0;
080c45fd
RS
2562 /* We build up the substituted string in ACCUM. */
2563 Lisp_Object accum;
2564 Lisp_Object middle;
a53e2e89 2565 EMACS_INT length = SBYTES (newtext);
080c45fd
RS
2566
2567 accum = Qnil;
2568
ac3b28b1 2569 for (pos_byte = 0, pos = 0; pos_byte < length;)
080c45fd 2570 {
c098fdb8
EZ
2571 EMACS_INT substart = -1;
2572 EMACS_INT subend = 0;
1e79ec24 2573 int delbackslash = 0;
080c45fd 2574
0c8533c6
RS
2575 FETCH_STRING_CHAR_ADVANCE (c, newtext, pos, pos_byte);
2576
080c45fd
RS
2577 if (c == '\\')
2578 {
0c8533c6 2579 FETCH_STRING_CHAR_ADVANCE (c, newtext, pos, pos_byte);
177c0ea7 2580
080c45fd
RS
2581 if (c == '&')
2582 {
d7a5ad5f
RS
2583 substart = search_regs.start[sub];
2584 subend = search_regs.end[sub];
080c45fd 2585 }
76fc1ea2 2586 else if (c >= '1' && c <= '9')
080c45fd 2587 {
76fc1ea2
KH
2588 if (search_regs.start[c - '0'] >= 0
2589 && c <= search_regs.num_regs + '0')
080c45fd
RS
2590 {
2591 substart = search_regs.start[c - '0'];
2592 subend = search_regs.end[c - '0'];
2593 }
76fc1ea2
KH
2594 else
2595 {
2596 /* If that subexp did not match,
2597 replace \\N with nothing. */
2598 substart = 0;
2599 subend = 0;
2600 }
080c45fd 2601 }
1e79ec24
KH
2602 else if (c == '\\')
2603 delbackslash = 1;
636a5e28
RS
2604 else
2605 error ("Invalid use of `\\' in replacement text");
080c45fd
RS
2606 }
2607 if (substart >= 0)
2608 {
d131e79c
RS
2609 if (pos - 2 != lastpos)
2610 middle = substring_both (newtext, lastpos,
2611 lastpos_byte,
2612 pos - 2, pos_byte - 2);
080c45fd
RS
2613 else
2614 middle = Qnil;
2615 accum = concat3 (accum, middle,
0c8533c6
RS
2616 Fsubstring (string,
2617 make_number (substart),
080c45fd
RS
2618 make_number (subend)));
2619 lastpos = pos;
0c8533c6 2620 lastpos_byte = pos_byte;
080c45fd 2621 }
1e79ec24
KH
2622 else if (delbackslash)
2623 {
d131e79c
RS
2624 middle = substring_both (newtext, lastpos,
2625 lastpos_byte,
2626 pos - 1, pos_byte - 1);
0c8533c6 2627
1e79ec24
KH
2628 accum = concat2 (accum, middle);
2629 lastpos = pos;
0c8533c6 2630 lastpos_byte = pos_byte;
1e79ec24 2631 }
080c45fd
RS
2632 }
2633
d131e79c
RS
2634 if (pos != lastpos)
2635 middle = substring_both (newtext, lastpos,
2636 lastpos_byte,
0c8533c6 2637 pos, pos_byte);
080c45fd
RS
2638 else
2639 middle = Qnil;
2640
2641 newtext = concat2 (accum, middle);
2642 }
2643
636a5e28 2644 /* Do case substitution in NEWTEXT if desired. */
080c45fd
RS
2645 if (case_action == all_caps)
2646 newtext = Fupcase (newtext);
2647 else if (case_action == cap_initial)
2b2eead9 2648 newtext = Fupcase_initials (newtext);
080c45fd
RS
2649
2650 return concat3 (before, newtext, after);
2651 }
2652
09c4719e 2653 /* Record point, then move (quietly) to the start of the match. */
9160906f 2654 if (PT >= search_regs.end[sub])
b0eba991 2655 opoint = PT - ZV;
9160906f
RS
2656 else if (PT > search_regs.start[sub])
2657 opoint = search_regs.end[sub] - ZV;
b0eba991
RS
2658 else
2659 opoint = PT;
2660
886ed6ec
RS
2661 /* If we want non-literal replacement,
2662 perform substitution on the replacement string. */
2663 if (NILP (literal))
ca1d1d23 2664 {
0065d054 2665 ptrdiff_t length = SBYTES (newtext);
68e69fbd 2666 unsigned char *substed;
0065d054 2667 ptrdiff_t substed_alloc_size, substed_len;
4b4deea2 2668 int buf_multibyte = !NILP (BVAR (current_buffer, enable_multibyte_characters));
3bc25e52 2669 int str_multibyte = STRING_MULTIBYTE (newtext);
886ed6ec 2670 int really_changed = 0;
3bc25e52 2671
0065d054
PE
2672 substed_alloc_size = ((STRING_BYTES_BOUND - 100) / 2 < length
2673 ? STRING_BYTES_BOUND
2674 : length * 2 + 100);
2675 substed = (unsigned char *) xmalloc (substed_alloc_size);
68e69fbd
RS
2676 substed_len = 0;
2677
3bc25e52
KH
2678 /* Go thru NEWTEXT, producing the actual text to insert in
2679 SUBSTED while adjusting multibyteness to that of the current
2680 buffer. */
ca1d1d23 2681
ac3b28b1 2682 for (pos_byte = 0, pos = 0; pos_byte < length;)
ca1d1d23 2683 {
68e69fbd 2684 unsigned char str[MAX_MULTIBYTE_LENGTH];
8ea90aa3 2685 const unsigned char *add_stuff = NULL;
0065d054 2686 ptrdiff_t add_len = 0;
a0efffc8 2687 ptrdiff_t idx = -1;
9a76659d 2688
3bc25e52
KH
2689 if (str_multibyte)
2690 {
eb99a8dd 2691 FETCH_STRING_CHAR_ADVANCE_NO_CHECK (c, newtext, pos, pos_byte);
3bc25e52 2692 if (!buf_multibyte)
461c2ab9 2693 c = multibyte_char_to_unibyte (c);
3bc25e52
KH
2694 }
2695 else
2696 {
2697 /* Note that we don't have to increment POS. */
5d69fe10 2698 c = SREF (newtext, pos_byte++);
3bc25e52 2699 if (buf_multibyte)
4c0354d7 2700 MAKE_CHAR_MULTIBYTE (c);
3bc25e52 2701 }
ac3b28b1 2702
68e69fbd
RS
2703 /* Either set ADD_STUFF and ADD_LEN to the text to put in SUBSTED,
2704 or set IDX to a match index, which means put that part
2705 of the buffer text into SUBSTED. */
2706
ca1d1d23
JB
2707 if (c == '\\')
2708 {
886ed6ec
RS
2709 really_changed = 1;
2710
3bc25e52
KH
2711 if (str_multibyte)
2712 {
eb99a8dd
KH
2713 FETCH_STRING_CHAR_ADVANCE_NO_CHECK (c, newtext,
2714 pos, pos_byte);
071ce769 2715 if (!buf_multibyte && !ASCII_CHAR_P (c))
461c2ab9 2716 c = multibyte_char_to_unibyte (c);
3bc25e52
KH
2717 }
2718 else
2719 {
d5db4077 2720 c = SREF (newtext, pos_byte++);
3bc25e52 2721 if (buf_multibyte)
4c0354d7 2722 MAKE_CHAR_MULTIBYTE (c);
3bc25e52
KH
2723 }
2724
ca1d1d23 2725 if (c == '&')
68e69fbd 2726 idx = sub;
78445046 2727 else if (c >= '1' && c <= '9' && c <= search_regs.num_regs + '0')
ca1d1d23
JB
2728 {
2729 if (search_regs.start[c - '0'] >= 1)
68e69fbd 2730 idx = c - '0';
ca1d1d23 2731 }
636a5e28 2732 else if (c == '\\')
a7e979a4 2733 add_len = 1, add_stuff = (unsigned char *) "\\";
636a5e28 2734 else
3bc25e52
KH
2735 {
2736 xfree (substed);
2737 error ("Invalid use of `\\' in replacement text");
2738 }
ca1d1d23
JB
2739 }
2740 else
68e69fbd
RS
2741 {
2742 add_len = CHAR_STRING (c, str);
2743 add_stuff = str;
2744 }
2745
2746 /* If we want to copy part of a previous match,
2747 set up ADD_STUFF and ADD_LEN to point to it. */
2748 if (idx >= 0)
2749 {
0065d054 2750 ptrdiff_t begbyte = CHAR_TO_BYTE (search_regs.start[idx]);
68e69fbd
RS
2751 add_len = CHAR_TO_BYTE (search_regs.end[idx]) - begbyte;
2752 if (search_regs.start[idx] < GPT && GPT < search_regs.end[idx])
2753 move_gap (search_regs.start[idx]);
2754 add_stuff = BYTE_POS_ADDR (begbyte);
2755 }
2756
2757 /* Now the stuff we want to add to SUBSTED
2758 is invariably ADD_LEN bytes starting at ADD_STUFF. */
2759
2760 /* Make sure SUBSTED is big enough. */
0065d054
PE
2761 if (substed_alloc_size - substed_len < add_len)
2762 substed =
2763 xpalloc (substed, &substed_alloc_size,
2764 add_len - (substed_alloc_size - substed_len),
2765 STRING_BYTES_BOUND, 1);
68e69fbd
RS
2766
2767 /* Now add to the end of SUBSTED. */
f8ce8a0d
GM
2768 if (add_stuff)
2769 {
72af86bd 2770 memcpy (substed + substed_len, add_stuff, add_len);
f8ce8a0d
GM
2771 substed_len += add_len;
2772 }
ca1d1d23 2773 }
68e69fbd 2774
886ed6ec 2775 if (really_changed)
76fc1ea2
KH
2776 {
2777 if (buf_multibyte)
2778 {
c098fdb8
EZ
2779 EMACS_INT nchars =
2780 multibyte_chars_in_text (substed, substed_len);
68e69fbd 2781
a7e979a4
PE
2782 newtext = make_multibyte_string ((char *) substed, nchars,
2783 substed_len);
76fc1ea2
KH
2784 }
2785 else
a7e979a4 2786 newtext = make_unibyte_string ((char *) substed, substed_len);
76fc1ea2 2787 }
68e69fbd 2788 xfree (substed);
ca1d1d23
JB
2789 }
2790
886ed6ec
RS
2791 /* Replace the old text with the new in the cleanest possible way. */
2792 replace_range (search_regs.start[sub], search_regs.end[sub],
2793 newtext, 1, 0, 1);
d5db4077 2794 newpoint = search_regs.start[sub] + SCHARS (newtext);
ca1d1d23
JB
2795
2796 if (case_action == all_caps)
886ed6ec
RS
2797 Fupcase_region (make_number (search_regs.start[sub]),
2798 make_number (newpoint));
ca1d1d23 2799 else if (case_action == cap_initial)
886ed6ec
RS
2800 Fupcase_initials_region (make_number (search_regs.start[sub]),
2801 make_number (newpoint));
3e18eecf 2802
98e942e0
RS
2803 /* Adjust search data for this change. */
2804 {
e7deaab0
AS
2805 EMACS_INT oldend = search_regs.end[sub];
2806 EMACS_INT oldstart = search_regs.start[sub];
2807 EMACS_INT change = newpoint - search_regs.end[sub];
98e942e0
RS
2808 int i;
2809
2810 for (i = 0; i < search_regs.num_regs; i++)
2811 {
41c01205 2812 if (search_regs.start[i] >= oldend)
98e942e0 2813 search_regs.start[i] += change;
41c01205
DK
2814 else if (search_regs.start[i] > oldstart)
2815 search_regs.start[i] = oldstart;
2816 if (search_regs.end[i] >= oldend)
98e942e0 2817 search_regs.end[i] += change;
41c01205
DK
2818 else if (search_regs.end[i] > oldstart)
2819 search_regs.end[i] = oldstart;
98e942e0
RS
2820 }
2821 }
2822
b0eba991 2823 /* Put point back where it was in the text. */
8d808a65 2824 if (opoint <= 0)
fa8ed3e0 2825 TEMP_SET_PT (opoint + ZV);
b0eba991 2826 else
fa8ed3e0 2827 TEMP_SET_PT (opoint);
b0eba991
RS
2828
2829 /* Now move point "officially" to the start of the inserted replacement. */
3e18eecf 2830 move_if_not_intangible (newpoint);
177c0ea7 2831
ca1d1d23
JB
2832 return Qnil;
2833}
2834\f
2835static Lisp_Object
971de7fb 2836match_limit (Lisp_Object num, int beginningp)
ca1d1d23 2837{
a0efffc8 2838 EMACS_INT n;
ca1d1d23 2839
b7826503 2840 CHECK_NUMBER (num);
ca1d1d23 2841 n = XINT (num);
f90a5bf5 2842 if (n < 0)
bd2cbd56 2843 args_out_of_range (num, make_number (0));
f90a5bf5
RS
2844 if (search_regs.num_regs <= 0)
2845 error ("No match data, because no search succeeded");
9b9ceb61 2846 if (n >= search_regs.num_regs
4746118a 2847 || search_regs.start[n] < 0)
ca1d1d23
JB
2848 return Qnil;
2849 return (make_number ((beginningp) ? search_regs.start[n]
2850 : search_regs.end[n]));
2851}
2852
a7ca3326 2853DEFUN ("match-beginning", Fmatch_beginning, Smatch_beginning, 1, 1, 0,
8c1a1077
PJ
2854 doc: /* Return position of start of text matched by last search.
2855SUBEXP, a number, specifies which parenthesized expression in the last
2856 regexp.
2857Value is nil if SUBEXPth pair didn't match, or there were less than
2858 SUBEXP pairs.
2859Zero means the entire text matched by the whole regexp or whole string. */)
5842a27b 2860 (Lisp_Object subexp)
ca1d1d23 2861{
5806161b 2862 return match_limit (subexp, 1);
ca1d1d23
JB
2863}
2864
a7ca3326 2865DEFUN ("match-end", Fmatch_end, Smatch_end, 1, 1, 0,
8c1a1077
PJ
2866 doc: /* Return position of end of text matched by last search.
2867SUBEXP, a number, specifies which parenthesized expression in the last
2868 regexp.
2869Value is nil if SUBEXPth pair didn't match, or there were less than
2870 SUBEXP pairs.
2871Zero means the entire text matched by the whole regexp or whole string. */)
5842a27b 2872 (Lisp_Object subexp)
ca1d1d23 2873{
5806161b 2874 return match_limit (subexp, 0);
177c0ea7 2875}
ca1d1d23 2876
a7ca3326 2877DEFUN ("match-data", Fmatch_data, Smatch_data, 0, 3, 0,
8c1a1077
PJ
2878 doc: /* Return a list containing all info on what the last search matched.
2879Element 2N is `(match-beginning N)'; element 2N + 1 is `(match-end N)'.
2880All the elements are markers or nil (nil if the Nth pair didn't match)
2881if the last match was on a buffer; integers or nil if a string was matched.
febb8aba 2882Use `set-match-data' to reinstate the data in this list.
8c1a1077 2883
41c01205
DK
2884If INTEGERS (the optional first argument) is non-nil, always use
2885integers \(rather than markers) to represent buffer positions. In
2886this case, and if the last match was in a buffer, the buffer will get
2887stored as one additional element at the end of the list.
2888
abd0071c
KS
2889If REUSE is a list, reuse it as part of the value. If REUSE is long
2890enough to hold all the values, and if INTEGERS is non-nil, no consing
2891is done.
2892
2893If optional third arg RESEAT is non-nil, any previous markers on the
2894REUSE list will be modified to point to nowhere.
2895
140a6b7e 2896Return value is undefined if the last search failed. */)
5842a27b 2897 (Lisp_Object integers, Lisp_Object reuse, Lisp_Object reseat)
ca1d1d23 2898{
56256c2a 2899 Lisp_Object tail, prev;
4746118a 2900 Lisp_Object *data;
ca1d1d23
JB
2901 int i, len;
2902
abd0071c
KS
2903 if (!NILP (reseat))
2904 for (tail = reuse; CONSP (tail); tail = XCDR (tail))
2905 if (MARKERP (XCAR (tail)))
2906 {
51f10faa 2907 unchain_marker (XMARKER (XCAR (tail)));
abd0071c
KS
2908 XSETCAR (tail, Qnil);
2909 }
2910
daa37602 2911 if (NILP (last_thing_searched))
c36bcf1b 2912 return Qnil;
daa37602 2913
6bbd7a29
GM
2914 prev = Qnil;
2915
41c01205 2916 data = (Lisp_Object *) alloca ((2 * search_regs.num_regs + 1)
4746118a
JB
2917 * sizeof (Lisp_Object));
2918
41c01205 2919 len = 0;
4746118a 2920 for (i = 0; i < search_regs.num_regs; i++)
ca1d1d23 2921 {
a53e2e89 2922 EMACS_INT start = search_regs.start[i];
ca1d1d23
JB
2923 if (start >= 0)
2924 {
56256c2a
RS
2925 if (EQ (last_thing_searched, Qt)
2926 || ! NILP (integers))
ca1d1d23 2927 {
c235cce7
KH
2928 XSETFASTINT (data[2 * i], start);
2929 XSETFASTINT (data[2 * i + 1], search_regs.end[i]);
ca1d1d23 2930 }
0ed62dc7 2931 else if (BUFFERP (last_thing_searched))
ca1d1d23
JB
2932 {
2933 data[2 * i] = Fmake_marker ();
daa37602
JB
2934 Fset_marker (data[2 * i],
2935 make_number (start),
2936 last_thing_searched);
ca1d1d23
JB
2937 data[2 * i + 1] = Fmake_marker ();
2938 Fset_marker (data[2 * i + 1],
177c0ea7 2939 make_number (search_regs.end[i]),
daa37602 2940 last_thing_searched);
ca1d1d23 2941 }
daa37602
JB
2942 else
2943 /* last_thing_searched must always be Qt, a buffer, or Qnil. */
2944 abort ();
2945
abd0071c 2946 len = 2 * i + 2;
ca1d1d23
JB
2947 }
2948 else
abd0071c 2949 data[2 * i] = data[2 * i + 1] = Qnil;
ca1d1d23 2950 }
56256c2a 2951
bd2cbd56 2952 if (BUFFERP (last_thing_searched) && !NILP (integers))
41c01205 2953 {
bd2cbd56 2954 data[len] = last_thing_searched;
41c01205
DK
2955 len++;
2956 }
2957
56256c2a
RS
2958 /* If REUSE is not usable, cons up the values and return them. */
2959 if (! CONSP (reuse))
41c01205 2960 return Flist (len, data);
56256c2a
RS
2961
2962 /* If REUSE is a list, store as many value elements as will fit
2963 into the elements of REUSE. */
2964 for (i = 0, tail = reuse; CONSP (tail);
c1d497be 2965 i++, tail = XCDR (tail))
56256c2a 2966 {
41c01205 2967 if (i < len)
f3fbd155 2968 XSETCAR (tail, data[i]);
56256c2a 2969 else
f3fbd155 2970 XSETCAR (tail, Qnil);
56256c2a
RS
2971 prev = tail;
2972 }
2973
2974 /* If we couldn't fit all value elements into REUSE,
2975 cons up the rest of them and add them to the end of REUSE. */
41c01205
DK
2976 if (i < len)
2977 XSETCDR (prev, Flist (len - i, data + i));
56256c2a
RS
2978
2979 return reuse;
ca1d1d23
JB
2980}
2981
b51d6c92
SM
2982/* We used to have an internal use variant of `reseat' described as:
2983
2984 If RESEAT is `evaporate', put the markers back on the free list
2985 immediately. No other references to the markers must exist in this
2986 case, so it is used only internally on the unwind stack and
2987 save-match-data from Lisp.
2988
2989 But it was ill-conceived: those supposedly-internal markers get exposed via
2990 the undo-list, so freeing them here is unsafe. */
ca1d1d23 2991
a7ca3326 2992DEFUN ("set-match-data", Fset_match_data, Sset_match_data, 1, 2, 0,
8c1a1077 2993 doc: /* Set internal data on last search match from elements of LIST.
abd0071c
KS
2994LIST should have been created by calling `match-data' previously.
2995
51f10faa 2996If optional arg RESEAT is non-nil, make markers on LIST point nowhere. */)
5842a27b 2997 (register Lisp_Object list, Lisp_Object reseat)
ca1d1d23 2998{
5f2ab479 2999 ptrdiff_t i;
ca1d1d23
JB
3000 register Lisp_Object marker;
3001
7074fde6
FP
3002 if (running_asynch_code)
3003 save_search_regs ();
3004
29100cea 3005 CHECK_LIST (list);
ca1d1d23 3006
41c01205
DK
3007 /* Unless we find a marker with a buffer or an explicit buffer
3008 in LIST, assume that this match data came from a string. */
daa37602
JB
3009 last_thing_searched = Qt;
3010
4746118a
JB
3011 /* Allocate registers if they don't already exist. */
3012 {
5f2ab479 3013 ptrdiff_t length = XFASTINT (Flength (list)) / 2;
4746118a
JB
3014
3015 if (length > search_regs.num_regs)
3016 {
0065d054
PE
3017 ptrdiff_t num_regs = search_regs.num_regs;
3018 search_regs.start =
3019 xpalloc (search_regs.start, &num_regs, length - num_regs,
3020 min (PTRDIFF_MAX, UINT_MAX), sizeof (regoff_t));
3021 search_regs.end =
3022 xrealloc (search_regs.end, num_regs * sizeof (regoff_t));
3023
3024 for (i = search_regs.num_regs; i < num_regs; i++)
e62371e9
KH
3025 search_regs.start[i] = -1;
3026
0065d054 3027 search_regs.num_regs = num_regs;
4746118a 3028 }
ca1d1d23 3029
abd0071c 3030 for (i = 0; CONSP (list); i++)
41c01205 3031 {
abd0071c 3032 marker = XCAR (list);
bd2cbd56 3033 if (BUFFERP (marker))
c3762cbd 3034 {
bd2cbd56 3035 last_thing_searched = marker;
c3762cbd
DK
3036 break;
3037 }
3038 if (i >= length)
3039 break;
41c01205
DK
3040 if (NILP (marker))
3041 {
3042 search_regs.start[i] = -1;
abd0071c 3043 list = XCDR (list);
41c01205
DK
3044 }
3045 else
3046 {
e7deaab0 3047 EMACS_INT from;
abd0071c 3048 Lisp_Object m;
e2811828 3049
abd0071c 3050 m = marker;
41c01205
DK
3051 if (MARKERP (marker))
3052 {
3053 if (XMARKER (marker)->buffer == 0)
3054 XSETFASTINT (marker, 0);
3055 else
3056 XSETBUFFER (last_thing_searched, XMARKER (marker)->buffer);
3057 }
e2811828 3058
41c01205
DK
3059 CHECK_NUMBER_COERCE_MARKER (marker);
3060 from = XINT (marker);
e2811828 3061
abd0071c
KS
3062 if (!NILP (reseat) && MARKERP (m))
3063 {
b51d6c92 3064 unchain_marker (XMARKER (m));
9ad54a7e 3065 XSETCAR (list, Qnil);
abd0071c
KS
3066 }
3067
3068 if ((list = XCDR (list), !CONSP (list)))
3069 break;
3070
3071 m = marker = XCAR (list);
3072
41c01205
DK
3073 if (MARKERP (marker) && XMARKER (marker)->buffer == 0)
3074 XSETFASTINT (marker, 0);
e2811828 3075
41c01205
DK
3076 CHECK_NUMBER_COERCE_MARKER (marker);
3077 search_regs.start[i] = from;
3078 search_regs.end[i] = XINT (marker);
abd0071c
KS
3079
3080 if (!NILP (reseat) && MARKERP (m))
3081 {
b51d6c92 3082 unchain_marker (XMARKER (m));
9ad54a7e 3083 XSETCAR (list, Qnil);
abd0071c 3084 }
41c01205 3085 }
abd0071c 3086 list = XCDR (list);
41c01205 3087 }
ca1d1d23 3088
41c01205
DK
3089 for (; i < search_regs.num_regs; i++)
3090 search_regs.start[i] = -1;
3091 }
ca1d1d23 3092
177c0ea7 3093 return Qnil;
ca1d1d23
JB
3094}
3095
7074fde6
FP
3096/* If non-zero the match data have been saved in saved_search_regs
3097 during the execution of a sentinel or filter. */
75ebf74b 3098static int search_regs_saved;
7074fde6 3099static struct re_registers saved_search_regs;
41c01205 3100static Lisp_Object saved_last_thing_searched;
7074fde6
FP
3101
3102/* Called from Flooking_at, Fstring_match, search_buffer, Fstore_match_data
3103 if asynchronous code (filter or sentinel) is running. */
3104static void
971de7fb 3105save_search_regs (void)
7074fde6
FP
3106{
3107 if (!search_regs_saved)
3108 {
3109 saved_search_regs.num_regs = search_regs.num_regs;
3110 saved_search_regs.start = search_regs.start;
3111 saved_search_regs.end = search_regs.end;
41c01205
DK
3112 saved_last_thing_searched = last_thing_searched;
3113 last_thing_searched = Qnil;
7074fde6 3114 search_regs.num_regs = 0;
2d4a771a
RS
3115 search_regs.start = 0;
3116 search_regs.end = 0;
7074fde6
FP
3117
3118 search_regs_saved = 1;
3119 }
3120}
3121
3122/* Called upon exit from filters and sentinels. */
3123void
971de7fb 3124restore_search_regs (void)
7074fde6
FP
3125{
3126 if (search_regs_saved)
3127 {
3128 if (search_regs.num_regs > 0)
3129 {
3130 xfree (search_regs.start);
3131 xfree (search_regs.end);
3132 }
3133 search_regs.num_regs = saved_search_regs.num_regs;
3134 search_regs.start = saved_search_regs.start;
3135 search_regs.end = saved_search_regs.end;
41c01205
DK
3136 last_thing_searched = saved_last_thing_searched;
3137 saved_last_thing_searched = Qnil;
7074fde6
FP
3138 search_regs_saved = 0;
3139 }
3140}
3141
abd0071c 3142static Lisp_Object
971de7fb 3143unwind_set_match_data (Lisp_Object list)
abd0071c 3144{
b51d6c92
SM
3145 /* It is NOT ALWAYS safe to free (evaporate) the markers immediately. */
3146 return Fset_match_data (list, Qt);
abd0071c
KS
3147}
3148
3149/* Called to unwind protect the match data. */
3150void
971de7fb 3151record_unwind_save_match_data (void)
abd0071c
KS
3152{
3153 record_unwind_protect (unwind_set_match_data,
3154 Fmatch_data (Qnil, Qnil, Qnil));
3155}
3156
e9a452d9 3157/* Quote a string to deactivate reg-expr chars */
ca1d1d23
JB
3158
3159DEFUN ("regexp-quote", Fregexp_quote, Sregexp_quote, 1, 1, 0,
8c1a1077 3160 doc: /* Return a regexp string which matches exactly STRING and nothing else. */)
5842a27b 3161 (Lisp_Object string)
ca1d1d23 3162{
a7e979a4
PE
3163 register char *in, *out, *end;
3164 register char *temp;
0c8533c6 3165 int backslashes_added = 0;
ca1d1d23 3166
b7826503 3167 CHECK_STRING (string);
ca1d1d23 3168
a7e979a4 3169 temp = (char *) alloca (SBYTES (string) * 2);
ca1d1d23
JB
3170
3171 /* Now copy the data into the new string, inserting escapes. */
3172
a7e979a4 3173 in = SSDATA (string);
d5db4077 3174 end = in + SBYTES (string);
177c0ea7 3175 out = temp;
ca1d1d23
JB
3176
3177 for (; in != end; in++)
3178 {
66bc6082 3179 if (*in == '['
ca1d1d23
JB
3180 || *in == '*' || *in == '.' || *in == '\\'
3181 || *in == '?' || *in == '+'
3182 || *in == '^' || *in == '$')
0c8533c6 3183 *out++ = '\\', backslashes_added++;
ca1d1d23
JB
3184 *out++ = *in;
3185 }
3186
3f8100f1 3187 return make_specified_string (temp,
d5db4077 3188 SCHARS (string) + backslashes_added,
3f8100f1
RS
3189 out - temp,
3190 STRING_MULTIBYTE (string));
ca1d1d23 3191}
177c0ea7 3192\f
dfcf069d 3193void
971de7fb 3194syms_of_search (void)
ca1d1d23
JB
3195{
3196 register int i;
3197
487282dc
KH
3198 for (i = 0; i < REGEXP_CACHE_SIZE; ++i)
3199 {
3200 searchbufs[i].buf.allocated = 100;
b23c0a83 3201 searchbufs[i].buf.buffer = (unsigned char *) xmalloc (100);
487282dc
KH
3202 searchbufs[i].buf.fastmap = searchbufs[i].fastmap;
3203 searchbufs[i].regexp = Qnil;
ecdb561e 3204 searchbufs[i].whitespace_regexp = Qnil;
b69e3c18 3205 searchbufs[i].syntax_table = Qnil;
487282dc 3206 staticpro (&searchbufs[i].regexp);
aa77b5ce 3207 staticpro (&searchbufs[i].whitespace_regexp);
b69e3c18 3208 staticpro (&searchbufs[i].syntax_table);
487282dc
KH
3209 searchbufs[i].next = (i == REGEXP_CACHE_SIZE-1 ? 0 : &searchbufs[i+1]);
3210 }
3211 searchbuf_head = &searchbufs[0];
ca1d1d23 3212
cd3520a4
JB
3213 DEFSYM (Qsearch_failed, "search-failed");
3214 DEFSYM (Qinvalid_regexp, "invalid-regexp");
ca1d1d23
JB
3215
3216 Fput (Qsearch_failed, Qerror_conditions,
d67b4f80 3217 pure_cons (Qsearch_failed, pure_cons (Qerror, Qnil)));
ca1d1d23 3218 Fput (Qsearch_failed, Qerror_message,
d67b4f80 3219 make_pure_c_string ("Search failed"));
ca1d1d23
JB
3220
3221 Fput (Qinvalid_regexp, Qerror_conditions,
d67b4f80 3222 pure_cons (Qinvalid_regexp, pure_cons (Qerror, Qnil)));
ca1d1d23 3223 Fput (Qinvalid_regexp, Qerror_message,
d67b4f80 3224 make_pure_c_string ("Invalid regexp"));
ca1d1d23 3225
daa37602
JB
3226 last_thing_searched = Qnil;
3227 staticpro (&last_thing_searched);
3228
0f6af254
DK
3229 saved_last_thing_searched = Qnil;
3230 staticpro (&saved_last_thing_searched);
3231
29208e82 3232 DEFVAR_LISP ("search-spaces-regexp", Vsearch_spaces_regexp,
e2811828 3233 doc: /* Regexp to substitute for bunches of spaces in regexp search.
f31a9a68
RS
3234Some commands use this for user-specified regexps.
3235Spaces that occur inside character classes or repetition operators
3236or other such regexp constructs are not replaced with this.
3237A value of nil (which is the normal value) means treat spaces literally. */);
41a33295 3238 Vsearch_spaces_regexp = Qnil;
f31a9a68 3239
29208e82 3240 DEFVAR_LISP ("inhibit-changing-match-data", Vinhibit_changing_match_data,
ef887810 3241 doc: /* Internal use only.
39d0bf74
RS
3242If non-nil, the primitive searching and matching functions
3243such as `looking-at', `string-match', `re-search-forward', etc.,
3244do not set the match data. The proper way to use this variable
3245is to bind it with `let' around a small expression. */);
ef887810
RS
3246 Vinhibit_changing_match_data = Qnil;
3247
ca1d1d23 3248 defsubr (&Slooking_at);
b819a390
RS
3249 defsubr (&Sposix_looking_at);
3250 defsubr (&Sstring_match);
3251 defsubr (&Sposix_string_match);
ca1d1d23
JB
3252 defsubr (&Ssearch_forward);
3253 defsubr (&Ssearch_backward);
02b16839 3254 defsubr (&Sword_search_regexp);
ca1d1d23
JB
3255 defsubr (&Sword_search_forward);
3256 defsubr (&Sword_search_backward);
c6e0e1cc
CY
3257 defsubr (&Sword_search_forward_lax);
3258 defsubr (&Sword_search_backward_lax);
ca1d1d23
JB
3259 defsubr (&Sre_search_forward);
3260 defsubr (&Sre_search_backward);
b819a390
RS
3261 defsubr (&Sposix_search_forward);
3262 defsubr (&Sposix_search_backward);
ca1d1d23
JB
3263 defsubr (&Sreplace_match);
3264 defsubr (&Smatch_beginning);
3265 defsubr (&Smatch_end);
3266 defsubr (&Smatch_data);
3f1c005b 3267 defsubr (&Sset_match_data);
ca1d1d23
JB
3268 defsubr (&Sregexp_quote);
3269}