Replace bcopy, bzero, bcmp by memcpy, memmove, memset, memcmp
[bpt/emacs.git] / src / search.c
CommitLineData
ca1d1d23 1/* String search routines for GNU Emacs.
429ab54e 2 Copyright (C) 1985, 1986, 1987, 1993, 1994, 1997, 1998, 1999, 2001, 2002,
114f9c96 3 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010
8cabe764 4 Free Software Foundation, Inc.
ca1d1d23
JB
5
6This file is part of GNU Emacs.
7
9ec0b715 8GNU Emacs is free software: you can redistribute it and/or modify
ca1d1d23 9it under the terms of the GNU General Public License as published by
9ec0b715
GM
10the Free Software Foundation, either version 3 of the License, or
11(at your option) any later version.
ca1d1d23
JB
12
13GNU Emacs is distributed in the hope that it will be useful,
14but WITHOUT ANY WARRANTY; without even the implied warranty of
15MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16GNU General Public License for more details.
17
18You should have received a copy of the GNU General Public License
9ec0b715 19along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>. */
ca1d1d23
JB
20
21
18160b98 22#include <config.h>
d7306fe6 23#include <setjmp.h>
ca1d1d23
JB
24#include "lisp.h"
25#include "syntax.h"
5679531d 26#include "category.h"
ca1d1d23 27#include "buffer.h"
76eb0881 28#include "character.h"
89ad6492 29#include "charset.h"
9169c321 30#include "region-cache.h"
ca1d1d23 31#include "commands.h"
9ac0d9e0 32#include "blockinput.h"
bf1760bb 33#include "intervals.h"
4746118a 34
ca1d1d23
JB
35#include <sys/types.h>
36#include "regex.h"
37
1d288aef 38#define REGEXP_CACHE_SIZE 20
ca1d1d23 39
487282dc
KH
40/* If the regexp is non-nil, then the buffer contains the compiled form
41 of that regexp, suitable for searching. */
1d288aef
RS
42struct regexp_cache
43{
487282dc 44 struct regexp_cache *next;
ecdb561e 45 Lisp_Object regexp, whitespace_regexp;
b69e3c18 46 /* Syntax table for which the regexp applies. We need this because
58e95211
SM
47 of character classes. If this is t, then the compiled pattern is valid
48 for any syntax-table. */
b69e3c18 49 Lisp_Object syntax_table;
487282dc
KH
50 struct re_pattern_buffer buf;
51 char fastmap[0400];
b819a390
RS
52 /* Nonzero means regexp was compiled to do full POSIX backtracking. */
53 char posix;
487282dc 54};
ca1d1d23 55
487282dc
KH
56/* The instances of that struct. */
57struct regexp_cache searchbufs[REGEXP_CACHE_SIZE];
ca1d1d23 58
487282dc
KH
59/* The head of the linked list; points to the most recently used buffer. */
60struct regexp_cache *searchbuf_head;
ca1d1d23 61
ca1d1d23 62
4746118a
JB
63/* Every call to re_match, etc., must pass &search_regs as the regs
64 argument unless you can show it is unnecessary (i.e., if re_match
65 is certainly going to be called again before region-around-match
66 can be called).
67
68 Since the registers are now dynamically allocated, we need to make
69 sure not to refer to the Nth register before checking that it has
1113d9db
JB
70 been allocated by checking search_regs.num_regs.
71
72 The regex code keeps track of whether it has allocated the search
487282dc
KH
73 buffer using bits in the re_pattern_buffer. This means that whenever
74 you compile a new pattern, it completely forgets whether it has
1113d9db
JB
75 allocated any registers, and will allocate new registers the next
76 time you call a searching or matching function. Therefore, we need
77 to call re_set_registers after compiling a new pattern or after
78 setting the match registers, so that the regex functions will be
79 able to free or re-allocate it properly. */
ca1d1d23
JB
80static struct re_registers search_regs;
81
daa37602
JB
82/* The buffer in which the last search was performed, or
83 Qt if the last search was done in a string;
84 Qnil if no searching has been done yet. */
85static Lisp_Object last_thing_searched;
ca1d1d23 86
8e6208c5 87/* error condition signaled when regexp compile_pattern fails */
ca1d1d23
JB
88
89Lisp_Object Qinvalid_regexp;
90
06f77a8a
KS
91/* Error condition used for failing searches */
92Lisp_Object Qsearch_failed;
93
41a33295 94Lisp_Object Vsearch_spaces_regexp;
f31a9a68 95
ef887810
RS
96/* If non-nil, the match data will not be changed during call to
97 searching or matching functions. This variable is for internal use
98 only. */
99Lisp_Object Vinhibit_changing_match_data;
100
f57e2426
J
101static void set_search_regs (EMACS_INT, EMACS_INT);
102static void save_search_regs (void);
103static EMACS_INT simple_search (int, unsigned char *, int, int,
104 Lisp_Object, EMACS_INT, EMACS_INT,
105 EMACS_INT, EMACS_INT);
106static EMACS_INT boyer_moore (int, unsigned char *, int, int,
107 Lisp_Object, Lisp_Object,
108 EMACS_INT, EMACS_INT,
109 EMACS_INT, EMACS_INT, int);
110static EMACS_INT search_buffer (Lisp_Object, EMACS_INT, EMACS_INT,
111 EMACS_INT, EMACS_INT, int, int,
112 Lisp_Object, Lisp_Object, int);
971de7fb 113static void matcher_overflow (void) NO_RETURN;
b819a390 114
ca1d1d23 115static void
971de7fb 116matcher_overflow (void)
ca1d1d23
JB
117{
118 error ("Stack overflow in regexp matcher");
119}
120
b819a390
RS
121/* Compile a regexp and signal a Lisp error if anything goes wrong.
122 PATTERN is the pattern to compile.
123 CP is the place to put the result.
facdc750 124 TRANSLATE is a translation table for ignoring case, or nil for none.
b819a390
RS
125 REGP is the structure that says where to store the "register"
126 values that will result from matching this pattern.
127 If it is 0, we should compile the pattern not to record any
128 subexpression bounds.
129 POSIX is nonzero if we want full backtracking (POSIX style)
5679531d 130 for this pattern. 0 means backtrack only enough to get a valid match.
bbc73b48
KH
131
132 The behavior also depends on Vsearch_spaces_regexp. */
ca1d1d23 133
487282dc 134static void
971de7fb 135compile_pattern_1 (struct regexp_cache *cp, Lisp_Object pattern, Lisp_Object translate, struct re_registers *regp, int posix)
ca1d1d23 136{
d451e4db 137 char *val;
b819a390 138 reg_syntax_t old;
ca1d1d23 139
487282dc 140 cp->regexp = Qnil;
59fab369 141 cp->buf.translate = (! NILP (translate) ? translate : make_number (0));
b819a390 142 cp->posix = posix;
93daa011 143 cp->buf.multibyte = STRING_MULTIBYTE (pattern);
89ad6492 144 cp->buf.charset_unibyte = charset_unibyte;
d8c85250
CY
145 if (STRINGP (Vsearch_spaces_regexp))
146 cp->whitespace_regexp = Vsearch_spaces_regexp;
147 else
148 cp->whitespace_regexp = Qnil;
149
e7b2dd2e
RS
150 /* rms: I think BLOCK_INPUT is not needed here any more,
151 because regex.c defines malloc to call xmalloc.
152 Using BLOCK_INPUT here means the debugger won't run if an error occurs.
153 So let's turn it off. */
154 /* BLOCK_INPUT; */
fb4a568d 155 old = re_set_syntax (RE_SYNTAX_EMACS
b819a390 156 | (posix ? 0 : RE_NO_POSIX_BACKTRACKING));
d8c85250
CY
157
158 if (STRINGP (Vsearch_spaces_regexp))
159 re_set_whitespace_regexp (SDATA (Vsearch_spaces_regexp));
160 else
161 re_set_whitespace_regexp (NULL);
bbc73b48 162
8f924df7
KH
163 val = (char *) re_compile_pattern ((char *) SDATA (pattern),
164 SBYTES (pattern), &cp->buf);
bbc73b48 165
58e95211
SM
166 /* If the compiled pattern hard codes some of the contents of the
167 syntax-table, it can only be reused with *this* syntax table. */
168 cp->syntax_table = cp->buf.used_syntax ? current_buffer->syntax_table : Qt;
169
bbc73b48
KH
170 re_set_whitespace_regexp (NULL);
171
b819a390 172 re_set_syntax (old);
e7b2dd2e 173 /* UNBLOCK_INPUT; */
ca1d1d23 174 if (val)
06f77a8a 175 xsignal1 (Qinvalid_regexp, build_string (val));
1113d9db 176
487282dc 177 cp->regexp = Fcopy_sequence (pattern);
487282dc
KH
178}
179
6efc7887
RS
180/* Shrink each compiled regexp buffer in the cache
181 to the size actually used right now.
182 This is called from garbage collection. */
183
184void
971de7fb 185shrink_regexp_cache (void)
6efc7887 186{
a968f437 187 struct regexp_cache *cp;
6efc7887
RS
188
189 for (cp = searchbuf_head; cp != 0; cp = cp->next)
190 {
191 cp->buf.allocated = cp->buf.used;
192 cp->buf.buffer
b23c0a83 193 = (unsigned char *) xrealloc (cp->buf.buffer, cp->buf.used);
6efc7887
RS
194 }
195}
196
54dd3310
SM
197/* Clear the regexp cache w.r.t. a particular syntax table,
198 because it was changed.
e5b94d44
CY
199 There is no danger of memory leak here because re_compile_pattern
200 automagically manages the memory in each re_pattern_buffer struct,
201 based on its `allocated' and `buffer' values. */
202void
971de7fb 203clear_regexp_cache (void)
e5b94d44
CY
204{
205 int i;
206
207 for (i = 0; i < REGEXP_CACHE_SIZE; ++i)
920fd1fc
EZ
208 /* It's tempting to compare with the syntax-table we've actually changed,
209 but it's not sufficient because char-table inheritance means that
54dd3310
SM
210 modifying one syntax-table can change others at the same time. */
211 if (!EQ (searchbufs[i].syntax_table, Qt))
212 searchbufs[i].regexp = Qnil;
e5b94d44
CY
213}
214
487282dc 215/* Compile a regexp if necessary, but first check to see if there's one in
b819a390
RS
216 the cache.
217 PATTERN is the pattern to compile.
facdc750 218 TRANSLATE is a translation table for ignoring case, or nil for none.
b819a390
RS
219 REGP is the structure that says where to store the "register"
220 values that will result from matching this pattern.
221 If it is 0, we should compile the pattern not to record any
222 subexpression bounds.
223 POSIX is nonzero if we want full backtracking (POSIX style)
224 for this pattern. 0 means backtrack only enough to get a valid match. */
487282dc
KH
225
226struct re_pattern_buffer *
971de7fb 227compile_pattern (Lisp_Object pattern, struct re_registers *regp, Lisp_Object translate, int posix, int multibyte)
487282dc
KH
228{
229 struct regexp_cache *cp, **cpp;
230
231 for (cpp = &searchbuf_head; ; cpp = &cp->next)
232 {
233 cp = *cpp;
f1b9c7c1
KR
234 /* Entries are initialized to nil, and may be set to nil by
235 compile_pattern_1 if the pattern isn't valid. Don't apply
49a5f770
KR
236 string accessors in those cases. However, compile_pattern_1
237 is only applied to the cache entry we pick here to reuse. So
238 nil should never appear before a non-nil entry. */
7c752c80 239 if (NILP (cp->regexp))
f1b9c7c1 240 goto compile_it;
d5db4077 241 if (SCHARS (cp->regexp) == SCHARS (pattern)
cf69b13e 242 && STRING_MULTIBYTE (cp->regexp) == STRING_MULTIBYTE (pattern)
1d288aef 243 && !NILP (Fstring_equal (cp->regexp, pattern))
59fab369 244 && EQ (cp->buf.translate, (! NILP (translate) ? translate : make_number (0)))
5679531d 245 && cp->posix == posix
58e95211
SM
246 && (EQ (cp->syntax_table, Qt)
247 || EQ (cp->syntax_table, current_buffer->syntax_table))
89ad6492
KH
248 && !NILP (Fequal (cp->whitespace_regexp, Vsearch_spaces_regexp))
249 && cp->buf.charset_unibyte == charset_unibyte)
487282dc
KH
250 break;
251
f1b9c7c1
KR
252 /* If we're at the end of the cache, compile into the nil cell
253 we found, or the last (least recently used) cell with a
254 string value. */
487282dc
KH
255 if (cp->next == 0)
256 {
f1b9c7c1 257 compile_it:
89ad6492 258 compile_pattern_1 (cp, pattern, translate, regp, posix);
487282dc
KH
259 break;
260 }
261 }
262
263 /* When we get here, cp (aka *cpp) contains the compiled pattern,
264 either because we found it in the cache or because we just compiled it.
265 Move it to the front of the queue to mark it as most recently used. */
266 *cpp = cp->next;
267 cp->next = searchbuf_head;
268 searchbuf_head = cp;
1113d9db 269
6639708c
RS
270 /* Advise the searching functions about the space we have allocated
271 for register data. */
272 if (regp)
273 re_set_registers (&cp->buf, regp, regp->num_regs, regp->start, regp->end);
274
78edd3b7 275 /* The compiled pattern can be used both for multibyte and unibyte
89ad6492
KH
276 target. But, we have to tell which the pattern is used for. */
277 cp->buf.target_multibyte = multibyte;
278
487282dc 279 return &cp->buf;
ca1d1d23
JB
280}
281
ca1d1d23 282\f
b819a390 283static Lisp_Object
971de7fb 284looking_at_1 (Lisp_Object string, int posix)
ca1d1d23
JB
285{
286 Lisp_Object val;
287 unsigned char *p1, *p2;
e7deaab0 288 EMACS_INT s1, s2;
ca1d1d23 289 register int i;
487282dc 290 struct re_pattern_buffer *bufp;
ca1d1d23 291
7074fde6
FP
292 if (running_asynch_code)
293 save_search_regs ();
294
910c747a
RS
295 /* This is so set_image_of_range_1 in regex.c can find the EQV table. */
296 XCHAR_TABLE (current_buffer->case_canon_table)->extras[2]
297 = current_buffer->case_eqv_table;
298
b7826503 299 CHECK_STRING (string);
ef887810
RS
300 bufp = compile_pattern (string,
301 (NILP (Vinhibit_changing_match_data)
302 ? &search_regs : NULL),
487282dc 303 (!NILP (current_buffer->case_fold_search)
0190922f 304 ? current_buffer->case_canon_table : Qnil),
0c8533c6
RS
305 posix,
306 !NILP (current_buffer->enable_multibyte_characters));
ca1d1d23
JB
307
308 immediate_quit = 1;
309 QUIT; /* Do a pending quit right away, to avoid paradoxical behavior */
310
311 /* Get pointers and sizes of the two strings
312 that make up the visible portion of the buffer. */
313
314 p1 = BEGV_ADDR;
fa8ed3e0 315 s1 = GPT_BYTE - BEGV_BYTE;
ca1d1d23 316 p2 = GAP_END_ADDR;
fa8ed3e0 317 s2 = ZV_BYTE - GPT_BYTE;
ca1d1d23
JB
318 if (s1 < 0)
319 {
320 p2 = p1;
fa8ed3e0 321 s2 = ZV_BYTE - BEGV_BYTE;
ca1d1d23
JB
322 s1 = 0;
323 }
324 if (s2 < 0)
325 {
fa8ed3e0 326 s1 = ZV_BYTE - BEGV_BYTE;
ca1d1d23
JB
327 s2 = 0;
328 }
8bb43c28
RS
329
330 re_match_object = Qnil;
177c0ea7 331
487282dc 332 i = re_match_2 (bufp, (char *) p1, s1, (char *) p2, s2,
ef887810
RS
333 PT_BYTE - BEGV_BYTE,
334 (NILP (Vinhibit_changing_match_data)
335 ? &search_regs : NULL),
fa8ed3e0 336 ZV_BYTE - BEGV_BYTE);
de182d70 337 immediate_quit = 0;
177c0ea7 338
ca1d1d23
JB
339 if (i == -2)
340 matcher_overflow ();
341
342 val = (0 <= i ? Qt : Qnil);
ef887810 343 if (NILP (Vinhibit_changing_match_data) && i >= 0)
fa8ed3e0
RS
344 for (i = 0; i < search_regs.num_regs; i++)
345 if (search_regs.start[i] >= 0)
346 {
347 search_regs.start[i]
348 = BYTE_TO_CHAR (search_regs.start[i] + BEGV_BYTE);
349 search_regs.end[i]
350 = BYTE_TO_CHAR (search_regs.end[i] + BEGV_BYTE);
351 }
ef887810
RS
352
353 /* Set last_thing_searched only when match data is changed. */
354 if (NILP (Vinhibit_changing_match_data))
355 XSETBUFFER (last_thing_searched, current_buffer);
356
ca1d1d23
JB
357 return val;
358}
359
b819a390 360DEFUN ("looking-at", Flooking_at, Slooking_at, 1, 1, 0,
8c1a1077
PJ
361 doc: /* Return t if text after point matches regular expression REGEXP.
362This function modifies the match data that `match-beginning',
363`match-end' and `match-data' access; save and restore the match
364data if you want to preserve them. */)
365 (regexp)
94f94972 366 Lisp_Object regexp;
b819a390 367{
94f94972 368 return looking_at_1 (regexp, 0);
b819a390
RS
369}
370
371DEFUN ("posix-looking-at", Fposix_looking_at, Sposix_looking_at, 1, 1, 0,
8c1a1077
PJ
372 doc: /* Return t if text after point matches regular expression REGEXP.
373Find the longest match, in accord with Posix regular expression rules.
374This function modifies the match data that `match-beginning',
375`match-end' and `match-data' access; save and restore the match
376data if you want to preserve them. */)
377 (regexp)
94f94972 378 Lisp_Object regexp;
b819a390 379{
94f94972 380 return looking_at_1 (regexp, 1);
b819a390
RS
381}
382\f
383static Lisp_Object
971de7fb 384string_match_1 (Lisp_Object regexp, Lisp_Object string, Lisp_Object start, int posix)
ca1d1d23
JB
385{
386 int val;
487282dc 387 struct re_pattern_buffer *bufp;
e7deaab0 388 EMACS_INT pos, pos_byte;
0c8533c6 389 int i;
ca1d1d23 390
7074fde6
FP
391 if (running_asynch_code)
392 save_search_regs ();
393
b7826503
PJ
394 CHECK_STRING (regexp);
395 CHECK_STRING (string);
ca1d1d23
JB
396
397 if (NILP (start))
0c8533c6 398 pos = 0, pos_byte = 0;
ca1d1d23
JB
399 else
400 {
d5db4077 401 int len = SCHARS (string);
ca1d1d23 402
b7826503 403 CHECK_NUMBER (start);
0c8533c6
RS
404 pos = XINT (start);
405 if (pos < 0 && -pos <= len)
406 pos = len + pos;
407 else if (0 > pos || pos > len)
ca1d1d23 408 args_out_of_range (string, start);
0c8533c6 409 pos_byte = string_char_to_byte (string, pos);
ca1d1d23
JB
410 }
411
910c747a
RS
412 /* This is so set_image_of_range_1 in regex.c can find the EQV table. */
413 XCHAR_TABLE (current_buffer->case_canon_table)->extras[2]
414 = current_buffer->case_eqv_table;
415
ef887810
RS
416 bufp = compile_pattern (regexp,
417 (NILP (Vinhibit_changing_match_data)
418 ? &search_regs : NULL),
487282dc 419 (!NILP (current_buffer->case_fold_search)
0190922f 420 ? current_buffer->case_canon_table : Qnil),
0c8533c6
RS
421 posix,
422 STRING_MULTIBYTE (string));
ca1d1d23 423 immediate_quit = 1;
8bb43c28 424 re_match_object = string;
177c0ea7 425
d5db4077
KR
426 val = re_search (bufp, (char *) SDATA (string),
427 SBYTES (string), pos_byte,
428 SBYTES (string) - pos_byte,
ef887810
RS
429 (NILP (Vinhibit_changing_match_data)
430 ? &search_regs : NULL));
ca1d1d23 431 immediate_quit = 0;
ef887810
RS
432
433 /* Set last_thing_searched only when match data is changed. */
434 if (NILP (Vinhibit_changing_match_data))
435 last_thing_searched = Qt;
436
ca1d1d23
JB
437 if (val == -2)
438 matcher_overflow ();
439 if (val < 0) return Qnil;
0c8533c6 440
ef887810
RS
441 if (NILP (Vinhibit_changing_match_data))
442 for (i = 0; i < search_regs.num_regs; i++)
443 if (search_regs.start[i] >= 0)
444 {
445 search_regs.start[i]
446 = string_byte_to_char (string, search_regs.start[i]);
447 search_regs.end[i]
448 = string_byte_to_char (string, search_regs.end[i]);
449 }
0c8533c6
RS
450
451 return make_number (string_byte_to_char (string, val));
ca1d1d23 452}
e59a8453 453
b819a390 454DEFUN ("string-match", Fstring_match, Sstring_match, 2, 3, 0,
8c1a1077 455 doc: /* Return index of start of first match for REGEXP in STRING, or nil.
b85acc4b 456Matching ignores case if `case-fold-search' is non-nil.
8c1a1077
PJ
457If third arg START is non-nil, start search at that index in STRING.
458For index of first char beyond the match, do (match-end 0).
459`match-end' and `match-beginning' also give indices of substrings
2bd2f32d
RS
460matched by parenthesis constructs in the pattern.
461
462You can use the function `match-string' to extract the substrings
463matched by the parenthesis constructions in REGEXP. */)
8c1a1077 464 (regexp, string, start)
b819a390
RS
465 Lisp_Object regexp, string, start;
466{
467 return string_match_1 (regexp, string, start, 0);
468}
469
470DEFUN ("posix-string-match", Fposix_string_match, Sposix_string_match, 2, 3, 0,
8c1a1077
PJ
471 doc: /* Return index of start of first match for REGEXP in STRING, or nil.
472Find the longest match, in accord with Posix regular expression rules.
473Case is ignored if `case-fold-search' is non-nil in the current buffer.
474If third arg START is non-nil, start search at that index in STRING.
475For index of first char beyond the match, do (match-end 0).
476`match-end' and `match-beginning' also give indices of substrings
477matched by parenthesis constructs in the pattern. */)
478 (regexp, string, start)
b819a390
RS
479 Lisp_Object regexp, string, start;
480{
481 return string_match_1 (regexp, string, start, 1);
482}
483
e59a8453
RS
484/* Match REGEXP against STRING, searching all of STRING,
485 and return the index of the match, or negative on failure.
486 This does not clobber the match data. */
487
488int
971de7fb 489fast_string_match (Lisp_Object regexp, Lisp_Object string)
e59a8453
RS
490{
491 int val;
487282dc 492 struct re_pattern_buffer *bufp;
e59a8453 493
facdc750
RS
494 bufp = compile_pattern (regexp, 0, Qnil,
495 0, STRING_MULTIBYTE (string));
e59a8453 496 immediate_quit = 1;
8bb43c28 497 re_match_object = string;
177c0ea7 498
d5db4077
KR
499 val = re_search (bufp, (char *) SDATA (string),
500 SBYTES (string), 0,
501 SBYTES (string), 0);
e59a8453
RS
502 immediate_quit = 0;
503 return val;
504}
5679531d
KH
505
506/* Match REGEXP against STRING, searching all of STRING ignoring case,
507 and return the index of the match, or negative on failure.
0c8533c6
RS
508 This does not clobber the match data.
509 We assume that STRING contains single-byte characters. */
5679531d
KH
510
511extern Lisp_Object Vascii_downcase_table;
512
513int
971de7fb 514fast_c_string_match_ignore_case (Lisp_Object regexp, const char *string)
5679531d
KH
515{
516 int val;
517 struct re_pattern_buffer *bufp;
518 int len = strlen (string);
519
0c8533c6 520 regexp = string_make_unibyte (regexp);
b4577c63 521 re_match_object = Qt;
5679531d 522 bufp = compile_pattern (regexp, 0,
0190922f 523 Vascii_canon_table, 0,
f8bd51c4 524 0);
5679531d
KH
525 immediate_quit = 1;
526 val = re_search (bufp, string, len, 0, len, 0);
527 immediate_quit = 0;
528 return val;
529}
be5f4dfb
KH
530
531/* Like fast_string_match but ignore case. */
532
533int
971de7fb 534fast_string_match_ignore_case (Lisp_Object regexp, Lisp_Object string)
be5f4dfb
KH
535{
536 int val;
537 struct re_pattern_buffer *bufp;
538
0190922f 539 bufp = compile_pattern (regexp, 0, Vascii_canon_table,
be5f4dfb
KH
540 0, STRING_MULTIBYTE (string));
541 immediate_quit = 1;
542 re_match_object = string;
543
544 val = re_search (bufp, (char *) SDATA (string),
545 SBYTES (string), 0,
546 SBYTES (string), 0);
547 immediate_quit = 0;
548 return val;
549}
ca1d1d23 550\f
a80ce213 551/* Match REGEXP against the characters after POS to LIMIT, and return
8b13507a
KH
552 the number of matched characters. If STRING is non-nil, match
553 against the characters in it. In that case, POS and LIMIT are
554 indices into the string. This function doesn't modify the match
555 data. */
556
557EMACS_INT
971de7fb 558fast_looking_at (Lisp_Object regexp, EMACS_INT pos, EMACS_INT pos_byte, EMACS_INT limit, EMACS_INT limit_byte, Lisp_Object string)
8b13507a
KH
559{
560 int multibyte;
561 struct re_pattern_buffer *buf;
562 unsigned char *p1, *p2;
e7deaab0 563 EMACS_INT s1, s2;
8b13507a 564 EMACS_INT len;
78edd3b7 565
8b13507a
KH
566 if (STRINGP (string))
567 {
568 if (pos_byte < 0)
569 pos_byte = string_char_to_byte (string, pos);
570 if (limit_byte < 0)
571 limit_byte = string_char_to_byte (string, limit);
572 p1 = NULL;
573 s1 = 0;
574 p2 = SDATA (string);
575 s2 = SBYTES (string);
576 re_match_object = string;
577 multibyte = STRING_MULTIBYTE (string);
578 }
579 else
580 {
581 if (pos_byte < 0)
582 pos_byte = CHAR_TO_BYTE (pos);
583 if (limit_byte < 0)
584 limit_byte = CHAR_TO_BYTE (limit);
585 pos_byte -= BEGV_BYTE;
586 limit_byte -= BEGV_BYTE;
587 p1 = BEGV_ADDR;
588 s1 = GPT_BYTE - BEGV_BYTE;
589 p2 = GAP_END_ADDR;
590 s2 = ZV_BYTE - GPT_BYTE;
591 if (s1 < 0)
592 {
593 p2 = p1;
594 s2 = ZV_BYTE - BEGV_BYTE;
595 s1 = 0;
596 }
597 if (s2 < 0)
598 {
599 s1 = ZV_BYTE - BEGV_BYTE;
600 s2 = 0;
601 }
602 re_match_object = Qnil;
603 multibyte = ! NILP (current_buffer->enable_multibyte_characters);
604 }
605
606 buf = compile_pattern (regexp, 0, Qnil, 0, multibyte);
607 immediate_quit = 1;
608 len = re_match_2 (buf, (char *) p1, s1, (char *) p2, s2,
609 pos_byte, NULL, limit_byte);
610 immediate_quit = 0;
611
612 return len;
613}
614
615\f
9169c321
JB
616/* The newline cache: remembering which sections of text have no newlines. */
617
618/* If the user has requested newline caching, make sure it's on.
619 Otherwise, make sure it's off.
620 This is our cheezy way of associating an action with the change of
621 state of a buffer-local variable. */
622static void
971de7fb 623newline_cache_on_off (struct buffer *buf)
9169c321
JB
624{
625 if (NILP (buf->cache_long_line_scans))
626 {
627 /* It should be off. */
628 if (buf->newline_cache)
629 {
630 free_region_cache (buf->newline_cache);
631 buf->newline_cache = 0;
632 }
633 }
634 else
635 {
636 /* It should be on. */
637 if (buf->newline_cache == 0)
638 buf->newline_cache = new_region_cache ();
639 }
640}
641
642\f
643/* Search for COUNT instances of the character TARGET between START and END.
644
645 If COUNT is positive, search forwards; END must be >= START.
646 If COUNT is negative, search backwards for the -COUNTth instance;
647 END must be <= START.
648 If COUNT is zero, do anything you please; run rogue, for all I care.
649
650 If END is zero, use BEGV or ZV instead, as appropriate for the
651 direction indicated by COUNT.
ffd56f97
JB
652
653 If we find COUNT instances, set *SHORTAGE to zero, and return the
a9f2a45f 654 position past the COUNTth match. Note that for reverse motion
5bfe95c9 655 this is not the same as the usual convention for Emacs motion commands.
ffd56f97 656
9169c321
JB
657 If we don't find COUNT instances before reaching END, set *SHORTAGE
658 to the number of TARGETs left unfound, and return END.
ffd56f97 659
087a5f81
RS
660 If ALLOW_QUIT is non-zero, set immediate_quit. That's good to do
661 except when inside redisplay. */
662
dfcf069d 663int
971de7fb 664scan_buffer (register int target, EMACS_INT start, EMACS_INT end, int count, int *shortage, int allow_quit)
ca1d1d23 665{
9169c321 666 struct region_cache *newline_cache;
177c0ea7 667 int direction;
ffd56f97 668
9169c321
JB
669 if (count > 0)
670 {
671 direction = 1;
672 if (! end) end = ZV;
673 }
674 else
675 {
676 direction = -1;
677 if (! end) end = BEGV;
678 }
ffd56f97 679
9169c321
JB
680 newline_cache_on_off (current_buffer);
681 newline_cache = current_buffer->newline_cache;
ca1d1d23
JB
682
683 if (shortage != 0)
684 *shortage = 0;
685
087a5f81 686 immediate_quit = allow_quit;
ca1d1d23 687
ffd56f97 688 if (count > 0)
9169c321 689 while (start != end)
ca1d1d23 690 {
9169c321
JB
691 /* Our innermost scanning loop is very simple; it doesn't know
692 about gaps, buffer ends, or the newline cache. ceiling is
693 the position of the last character before the next such
694 obstacle --- the last character the dumb search loop should
695 examine. */
e7deaab0
AS
696 EMACS_INT ceiling_byte = CHAR_TO_BYTE (end) - 1;
697 EMACS_INT start_byte = CHAR_TO_BYTE (start);
698 EMACS_INT tem;
9169c321
JB
699
700 /* If we're looking for a newline, consult the newline cache
701 to see where we can avoid some scanning. */
702 if (target == '\n' && newline_cache)
703 {
704 int next_change;
705 immediate_quit = 0;
706 while (region_cache_forward
fa8ed3e0
RS
707 (current_buffer, newline_cache, start_byte, &next_change))
708 start_byte = next_change;
cbe0db0d 709 immediate_quit = allow_quit;
9169c321 710
fa8ed3e0
RS
711 /* START should never be after END. */
712 if (start_byte > ceiling_byte)
713 start_byte = ceiling_byte;
9169c321
JB
714
715 /* Now the text after start is an unknown region, and
716 next_change is the position of the next known region. */
fa8ed3e0 717 ceiling_byte = min (next_change - 1, ceiling_byte);
9169c321
JB
718 }
719
720 /* The dumb loop can only scan text stored in contiguous
721 bytes. BUFFER_CEILING_OF returns the last character
722 position that is contiguous, so the ceiling is the
723 position after that. */
67ce527d
KH
724 tem = BUFFER_CEILING_OF (start_byte);
725 ceiling_byte = min (tem, ceiling_byte);
9169c321
JB
726
727 {
177c0ea7 728 /* The termination address of the dumb loop. */
fa8ed3e0
RS
729 register unsigned char *ceiling_addr
730 = BYTE_POS_ADDR (ceiling_byte) + 1;
731 register unsigned char *cursor
732 = BYTE_POS_ADDR (start_byte);
9169c321
JB
733 unsigned char *base = cursor;
734
735 while (cursor < ceiling_addr)
736 {
737 unsigned char *scan_start = cursor;
738
739 /* The dumb loop. */
740 while (*cursor != target && ++cursor < ceiling_addr)
741 ;
742
743 /* If we're looking for newlines, cache the fact that
744 the region from start to cursor is free of them. */
745 if (target == '\n' && newline_cache)
746 know_region_cache (current_buffer, newline_cache,
fa8ed3e0
RS
747 start_byte + scan_start - base,
748 start_byte + cursor - base);
9169c321
JB
749
750 /* Did we find the target character? */
751 if (cursor < ceiling_addr)
752 {
753 if (--count == 0)
754 {
755 immediate_quit = 0;
fa8ed3e0 756 return BYTE_TO_CHAR (start_byte + cursor - base + 1);
9169c321
JB
757 }
758 cursor++;
759 }
760 }
761
fa8ed3e0 762 start = BYTE_TO_CHAR (start_byte + cursor - base);
9169c321 763 }
ca1d1d23
JB
764 }
765 else
9169c321
JB
766 while (start > end)
767 {
768 /* The last character to check before the next obstacle. */
e7deaab0
AS
769 EMACS_INT ceiling_byte = CHAR_TO_BYTE (end);
770 EMACS_INT start_byte = CHAR_TO_BYTE (start);
771 EMACS_INT tem;
9169c321
JB
772
773 /* Consult the newline cache, if appropriate. */
774 if (target == '\n' && newline_cache)
775 {
776 int next_change;
777 immediate_quit = 0;
778 while (region_cache_backward
fa8ed3e0
RS
779 (current_buffer, newline_cache, start_byte, &next_change))
780 start_byte = next_change;
cbe0db0d 781 immediate_quit = allow_quit;
9169c321
JB
782
783 /* Start should never be at or before end. */
fa8ed3e0
RS
784 if (start_byte <= ceiling_byte)
785 start_byte = ceiling_byte + 1;
9169c321
JB
786
787 /* Now the text before start is an unknown region, and
788 next_change is the position of the next known region. */
fa8ed3e0 789 ceiling_byte = max (next_change, ceiling_byte);
9169c321
JB
790 }
791
792 /* Stop scanning before the gap. */
67ce527d
KH
793 tem = BUFFER_FLOOR_OF (start_byte - 1);
794 ceiling_byte = max (tem, ceiling_byte);
9169c321
JB
795
796 {
797 /* The termination address of the dumb loop. */
fa8ed3e0
RS
798 register unsigned char *ceiling_addr = BYTE_POS_ADDR (ceiling_byte);
799 register unsigned char *cursor = BYTE_POS_ADDR (start_byte - 1);
9169c321
JB
800 unsigned char *base = cursor;
801
802 while (cursor >= ceiling_addr)
803 {
804 unsigned char *scan_start = cursor;
805
806 while (*cursor != target && --cursor >= ceiling_addr)
807 ;
808
809 /* If we're looking for newlines, cache the fact that
810 the region from after the cursor to start is free of them. */
811 if (target == '\n' && newline_cache)
812 know_region_cache (current_buffer, newline_cache,
fa8ed3e0
RS
813 start_byte + cursor - base,
814 start_byte + scan_start - base);
9169c321
JB
815
816 /* Did we find the target character? */
817 if (cursor >= ceiling_addr)
818 {
819 if (++count >= 0)
820 {
821 immediate_quit = 0;
fa8ed3e0 822 return BYTE_TO_CHAR (start_byte + cursor - base);
9169c321
JB
823 }
824 cursor--;
825 }
826 }
827
fa8ed3e0 828 start = BYTE_TO_CHAR (start_byte + cursor - base);
9169c321
JB
829 }
830 }
831
ca1d1d23
JB
832 immediate_quit = 0;
833 if (shortage != 0)
ffd56f97 834 *shortage = count * direction;
9169c321 835 return start;
ca1d1d23 836}
fa8ed3e0
RS
837\f
838/* Search for COUNT instances of a line boundary, which means either a
839 newline or (if selective display enabled) a carriage return.
840 Start at START. If COUNT is negative, search backwards.
841
842 We report the resulting position by calling TEMP_SET_PT_BOTH.
843
844 If we find COUNT instances. we position after (always after,
845 even if scanning backwards) the COUNTth match, and return 0.
846
847 If we don't find COUNT instances before reaching the end of the
848 buffer (or the beginning, if scanning backwards), we return
849 the number of line boundaries left unfound, and position at
850 the limit we bumped up against.
851
852 If ALLOW_QUIT is non-zero, set immediate_quit. That's good to do
d5d57b92 853 except in special cases. */
ca1d1d23 854
63fa018d 855int
971de7fb 856scan_newline (EMACS_INT start, EMACS_INT start_byte, EMACS_INT limit, EMACS_INT limit_byte, register int count, int allow_quit)
63fa018d 857{
fa8ed3e0
RS
858 int direction = ((count > 0) ? 1 : -1);
859
860 register unsigned char *cursor;
861 unsigned char *base;
862
e7deaab0 863 EMACS_INT ceiling;
fa8ed3e0
RS
864 register unsigned char *ceiling_addr;
865
d5d57b92
RS
866 int old_immediate_quit = immediate_quit;
867
fa8ed3e0
RS
868 /* The code that follows is like scan_buffer
869 but checks for either newline or carriage return. */
870
d5d57b92
RS
871 if (allow_quit)
872 immediate_quit++;
fa8ed3e0
RS
873
874 start_byte = CHAR_TO_BYTE (start);
875
876 if (count > 0)
877 {
878 while (start_byte < limit_byte)
879 {
880 ceiling = BUFFER_CEILING_OF (start_byte);
881 ceiling = min (limit_byte - 1, ceiling);
882 ceiling_addr = BYTE_POS_ADDR (ceiling) + 1;
883 base = (cursor = BYTE_POS_ADDR (start_byte));
884 while (1)
885 {
886 while (*cursor != '\n' && ++cursor != ceiling_addr)
887 ;
888
889 if (cursor != ceiling_addr)
890 {
891 if (--count == 0)
892 {
d5d57b92 893 immediate_quit = old_immediate_quit;
fa8ed3e0
RS
894 start_byte = start_byte + cursor - base + 1;
895 start = BYTE_TO_CHAR (start_byte);
896 TEMP_SET_PT_BOTH (start, start_byte);
897 return 0;
898 }
899 else
900 if (++cursor == ceiling_addr)
901 break;
902 }
903 else
904 break;
905 }
906 start_byte += cursor - base;
907 }
908 }
909 else
910 {
fa8ed3e0
RS
911 while (start_byte > limit_byte)
912 {
913 ceiling = BUFFER_FLOOR_OF (start_byte - 1);
914 ceiling = max (limit_byte, ceiling);
915 ceiling_addr = BYTE_POS_ADDR (ceiling) - 1;
916 base = (cursor = BYTE_POS_ADDR (start_byte - 1) + 1);
917 while (1)
918 {
919 while (--cursor != ceiling_addr && *cursor != '\n')
920 ;
921
922 if (cursor != ceiling_addr)
923 {
924 if (++count == 0)
925 {
d5d57b92 926 immediate_quit = old_immediate_quit;
fa8ed3e0
RS
927 /* Return the position AFTER the match we found. */
928 start_byte = start_byte + cursor - base + 1;
929 start = BYTE_TO_CHAR (start_byte);
930 TEMP_SET_PT_BOTH (start, start_byte);
931 return 0;
932 }
933 }
934 else
935 break;
936 }
937 /* Here we add 1 to compensate for the last decrement
938 of CURSOR, which took it past the valid range. */
939 start_byte += cursor - base + 1;
940 }
941 }
942
943 TEMP_SET_PT_BOTH (limit, limit_byte);
d5d57b92 944 immediate_quit = old_immediate_quit;
fa8ed3e0
RS
945
946 return count * direction;
63fa018d
RS
947}
948
ca1d1d23 949int
971de7fb 950find_next_newline_no_quit (EMACS_INT from, int cnt)
ca1d1d23 951{
fa8ed3e0 952 return scan_buffer ('\n', from, 0, cnt, (int *) 0, 0);
9169c321
JB
953}
954
9169c321
JB
955/* Like find_next_newline, but returns position before the newline,
956 not after, and only search up to TO. This isn't just
957 find_next_newline (...)-1, because you might hit TO. */
fa8ed3e0 958
9169c321 959int
971de7fb 960find_before_next_newline (EMACS_INT from, EMACS_INT to, int cnt)
9169c321
JB
961{
962 int shortage;
963 int pos = scan_buffer ('\n', from, to, cnt, &shortage, 1);
964
965 if (shortage == 0)
966 pos--;
177c0ea7 967
9169c321 968 return pos;
ca1d1d23
JB
969}
970\f
ca1d1d23
JB
971/* Subroutines of Lisp buffer search functions. */
972
973static Lisp_Object
971de7fb 974search_command (Lisp_Object string, Lisp_Object bound, Lisp_Object noerror, Lisp_Object count, int direction, int RE, int posix)
ca1d1d23
JB
975{
976 register int np;
9f43ad85 977 int lim, lim_byte;
ca1d1d23
JB
978 int n = direction;
979
980 if (!NILP (count))
981 {
b7826503 982 CHECK_NUMBER (count);
ca1d1d23
JB
983 n *= XINT (count);
984 }
985
b7826503 986 CHECK_STRING (string);
ca1d1d23 987 if (NILP (bound))
9f43ad85
RS
988 {
989 if (n > 0)
990 lim = ZV, lim_byte = ZV_BYTE;
991 else
992 lim = BEGV, lim_byte = BEGV_BYTE;
993 }
ca1d1d23
JB
994 else
995 {
b7826503 996 CHECK_NUMBER_COERCE_MARKER (bound);
ca1d1d23 997 lim = XINT (bound);
6ec8bbd2 998 if (n > 0 ? lim < PT : lim > PT)
ca1d1d23
JB
999 error ("Invalid search bound (wrong side of point)");
1000 if (lim > ZV)
9f43ad85 1001 lim = ZV, lim_byte = ZV_BYTE;
588d2fd5 1002 else if (lim < BEGV)
9f43ad85 1003 lim = BEGV, lim_byte = BEGV_BYTE;
588d2fd5
KH
1004 else
1005 lim_byte = CHAR_TO_BYTE (lim);
ca1d1d23
JB
1006 }
1007
910c747a
RS
1008 /* This is so set_image_of_range_1 in regex.c can find the EQV table. */
1009 XCHAR_TABLE (current_buffer->case_canon_table)->extras[2]
1010 = current_buffer->case_eqv_table;
1011
9f43ad85 1012 np = search_buffer (string, PT, PT_BYTE, lim, lim_byte, n, RE,
ca1d1d23 1013 (!NILP (current_buffer->case_fold_search)
facdc750 1014 ? current_buffer->case_canon_table
3135e9fd 1015 : Qnil),
ca1d1d23 1016 (!NILP (current_buffer->case_fold_search)
facdc750 1017 ? current_buffer->case_eqv_table
3135e9fd 1018 : Qnil),
b819a390 1019 posix);
ca1d1d23
JB
1020 if (np <= 0)
1021 {
1022 if (NILP (noerror))
06f77a8a
KS
1023 xsignal1 (Qsearch_failed, string);
1024
ca1d1d23
JB
1025 if (!EQ (noerror, Qt))
1026 {
1027 if (lim < BEGV || lim > ZV)
1028 abort ();
9f43ad85 1029 SET_PT_BOTH (lim, lim_byte);
a5f217b8
RS
1030 return Qnil;
1031#if 0 /* This would be clean, but maybe programs depend on
1032 a value of nil here. */
481399bf 1033 np = lim;
a5f217b8 1034#endif
ca1d1d23 1035 }
481399bf
RS
1036 else
1037 return Qnil;
ca1d1d23
JB
1038 }
1039
1040 if (np < BEGV || np > ZV)
1041 abort ();
1042
1043 SET_PT (np);
1044
1045 return make_number (np);
1046}
1047\f
fa8ed3e0
RS
1048/* Return 1 if REGEXP it matches just one constant string. */
1049
b6d6a51c 1050static int
971de7fb 1051trivial_regexp_p (Lisp_Object regexp)
b6d6a51c 1052{
d5db4077
KR
1053 int len = SBYTES (regexp);
1054 unsigned char *s = SDATA (regexp);
b6d6a51c
KH
1055 while (--len >= 0)
1056 {
1057 switch (*s++)
1058 {
1059 case '.': case '*': case '+': case '?': case '[': case '^': case '$':
1060 return 0;
1061 case '\\':
1062 if (--len < 0)
1063 return 0;
1064 switch (*s++)
1065 {
1066 case '|': case '(': case ')': case '`': case '\'': case 'b':
1067 case 'B': case '<': case '>': case 'w': case 'W': case 's':
29f89fe7 1068 case 'S': case '=': case '{': case '}': case '_':
5679531d 1069 case 'c': case 'C': /* for categoryspec and notcategoryspec */
866f60fd 1070 case '1': case '2': case '3': case '4': case '5':
b6d6a51c
KH
1071 case '6': case '7': case '8': case '9':
1072 return 0;
1073 }
1074 }
1075 }
1076 return 1;
1077}
1078
ca325161 1079/* Search for the n'th occurrence of STRING in the current buffer,
ca1d1d23 1080 starting at position POS and stopping at position LIM,
b819a390 1081 treating STRING as a literal string if RE is false or as
ca1d1d23
JB
1082 a regular expression if RE is true.
1083
1084 If N is positive, searching is forward and LIM must be greater than POS.
1085 If N is negative, searching is backward and LIM must be less than POS.
1086
facdc750 1087 Returns -x if x occurrences remain to be found (x > 0),
ca1d1d23 1088 or else the position at the beginning of the Nth occurrence
b819a390
RS
1089 (if searching backward) or the end (if searching forward).
1090
1091 POSIX is nonzero if we want full backtracking (POSIX style)
1092 for this pattern. 0 means backtrack only enough to get a valid match. */
ca1d1d23 1093
aff2ce94
RS
1094#define TRANSLATE(out, trt, d) \
1095do \
1096 { \
1097 if (! NILP (trt)) \
1098 { \
1099 Lisp_Object temp; \
1100 temp = Faref (trt, make_number (d)); \
1101 if (INTEGERP (temp)) \
1102 out = XINT (temp); \
1103 else \
1104 out = d; \
1105 } \
1106 else \
1107 out = d; \
1108 } \
1109while (0)
facdc750 1110
ef887810
RS
1111/* Only used in search_buffer, to record the end position of the match
1112 when searching regexps and SEARCH_REGS should not be changed
1113 (i.e. Vinhibit_changing_match_data is non-nil). */
1114static struct re_registers search_regs_1;
1115
e7deaab0 1116static EMACS_INT
9f43ad85
RS
1117search_buffer (string, pos, pos_byte, lim, lim_byte, n,
1118 RE, trt, inverse_trt, posix)
ca1d1d23 1119 Lisp_Object string;
e7deaab0
AS
1120 EMACS_INT pos;
1121 EMACS_INT pos_byte;
1122 EMACS_INT lim;
1123 EMACS_INT lim_byte;
ca1d1d23
JB
1124 int n;
1125 int RE;
facdc750
RS
1126 Lisp_Object trt;
1127 Lisp_Object inverse_trt;
b819a390 1128 int posix;
ca1d1d23 1129{
d5db4077
KR
1130 int len = SCHARS (string);
1131 int len_byte = SBYTES (string);
facdc750 1132 register int i;
ca1d1d23 1133
7074fde6
FP
1134 if (running_asynch_code)
1135 save_search_regs ();
1136
a7e4cdde 1137 /* Searching 0 times means don't move. */
ca1d1d23 1138 /* Null string is found at starting position. */
a7e4cdde 1139 if (len == 0 || n == 0)
ca325161 1140 {
0353b28f 1141 set_search_regs (pos_byte, 0);
ca325161
RS
1142 return pos;
1143 }
3f57a499 1144
41a33295 1145 if (RE && !(trivial_regexp_p (string) && NILP (Vsearch_spaces_regexp)))
ca1d1d23 1146 {
facdc750
RS
1147 unsigned char *p1, *p2;
1148 int s1, s2;
487282dc
KH
1149 struct re_pattern_buffer *bufp;
1150
ef887810
RS
1151 bufp = compile_pattern (string,
1152 (NILP (Vinhibit_changing_match_data)
1153 ? &search_regs : &search_regs_1),
1154 trt, posix,
0c8533c6 1155 !NILP (current_buffer->enable_multibyte_characters));
ca1d1d23 1156
ca1d1d23
JB
1157 immediate_quit = 1; /* Quit immediately if user types ^G,
1158 because letting this function finish
1159 can take too long. */
1160 QUIT; /* Do a pending quit right away,
1161 to avoid paradoxical behavior */
1162 /* Get pointers and sizes of the two strings
1163 that make up the visible portion of the buffer. */
1164
1165 p1 = BEGV_ADDR;
fa8ed3e0 1166 s1 = GPT_BYTE - BEGV_BYTE;
ca1d1d23 1167 p2 = GAP_END_ADDR;
fa8ed3e0 1168 s2 = ZV_BYTE - GPT_BYTE;
ca1d1d23
JB
1169 if (s1 < 0)
1170 {
1171 p2 = p1;
fa8ed3e0 1172 s2 = ZV_BYTE - BEGV_BYTE;
ca1d1d23
JB
1173 s1 = 0;
1174 }
1175 if (s2 < 0)
1176 {
fa8ed3e0 1177 s1 = ZV_BYTE - BEGV_BYTE;
ca1d1d23
JB
1178 s2 = 0;
1179 }
8bb43c28 1180 re_match_object = Qnil;
177c0ea7 1181
ca1d1d23
JB
1182 while (n < 0)
1183 {
42db823b 1184 int val;
487282dc 1185 val = re_search_2 (bufp, (char *) p1, s1, (char *) p2, s2,
4996330b 1186 pos_byte - BEGV_BYTE, lim_byte - pos_byte,
ef887810
RS
1187 (NILP (Vinhibit_changing_match_data)
1188 ? &search_regs : &search_regs_1),
42db823b 1189 /* Don't allow match past current point */
4996330b 1190 pos_byte - BEGV_BYTE);
ca1d1d23 1191 if (val == -2)
b6d6a51c
KH
1192 {
1193 matcher_overflow ();
1194 }
ca1d1d23
JB
1195 if (val >= 0)
1196 {
ef887810
RS
1197 if (NILP (Vinhibit_changing_match_data))
1198 {
1199 pos_byte = search_regs.start[0] + BEGV_BYTE;
1200 for (i = 0; i < search_regs.num_regs; i++)
1201 if (search_regs.start[i] >= 0)
1202 {
1203 search_regs.start[i]
1204 = BYTE_TO_CHAR (search_regs.start[i] + BEGV_BYTE);
1205 search_regs.end[i]
1206 = BYTE_TO_CHAR (search_regs.end[i] + BEGV_BYTE);
1207 }
1208 XSETBUFFER (last_thing_searched, current_buffer);
1209 /* Set pos to the new position. */
1210 pos = search_regs.start[0];
1211 }
1212 else
1213 {
1214 pos_byte = search_regs_1.start[0] + BEGV_BYTE;
1215 /* Set pos to the new position. */
1216 pos = BYTE_TO_CHAR (search_regs_1.start[0] + BEGV_BYTE);
1217 }
ca1d1d23
JB
1218 }
1219 else
1220 {
1221 immediate_quit = 0;
1222 return (n);
1223 }
1224 n++;
1225 }
1226 while (n > 0)
1227 {
42db823b 1228 int val;
487282dc 1229 val = re_search_2 (bufp, (char *) p1, s1, (char *) p2, s2,
4996330b 1230 pos_byte - BEGV_BYTE, lim_byte - pos_byte,
ef887810
RS
1231 (NILP (Vinhibit_changing_match_data)
1232 ? &search_regs : &search_regs_1),
4996330b 1233 lim_byte - BEGV_BYTE);
ca1d1d23 1234 if (val == -2)
b6d6a51c
KH
1235 {
1236 matcher_overflow ();
1237 }
ca1d1d23
JB
1238 if (val >= 0)
1239 {
ef887810
RS
1240 if (NILP (Vinhibit_changing_match_data))
1241 {
1242 pos_byte = search_regs.end[0] + BEGV_BYTE;
1243 for (i = 0; i < search_regs.num_regs; i++)
1244 if (search_regs.start[i] >= 0)
1245 {
1246 search_regs.start[i]
1247 = BYTE_TO_CHAR (search_regs.start[i] + BEGV_BYTE);
1248 search_regs.end[i]
1249 = BYTE_TO_CHAR (search_regs.end[i] + BEGV_BYTE);
1250 }
1251 XSETBUFFER (last_thing_searched, current_buffer);
1252 pos = search_regs.end[0];
1253 }
1254 else
1255 {
1256 pos_byte = search_regs_1.end[0] + BEGV_BYTE;
1257 pos = BYTE_TO_CHAR (search_regs_1.end[0] + BEGV_BYTE);
1258 }
ca1d1d23
JB
1259 }
1260 else
1261 {
1262 immediate_quit = 0;
1263 return (0 - n);
1264 }
1265 n--;
1266 }
1267 immediate_quit = 0;
1268 return (pos);
1269 }
1270 else /* non-RE case */
1271 {
facdc750
RS
1272 unsigned char *raw_pattern, *pat;
1273 int raw_pattern_size;
1274 int raw_pattern_size_byte;
1275 unsigned char *patbuf;
1276 int multibyte = !NILP (current_buffer->enable_multibyte_characters);
a967ed62 1277 unsigned char *base_pat;
49e44e81
KH
1278 /* Set to positive if we find a non-ASCII char that need
1279 translation. Otherwise set to zero later. */
1280 int char_base = -1;
040272ce 1281 int boyer_moore_ok = 1;
facdc750
RS
1282
1283 /* MULTIBYTE says whether the text to be searched is multibyte.
1284 We must convert PATTERN to match that, or we will not really
1285 find things right. */
1286
1287 if (multibyte == STRING_MULTIBYTE (string))
1288 {
d5db4077
KR
1289 raw_pattern = (unsigned char *) SDATA (string);
1290 raw_pattern_size = SCHARS (string);
1291 raw_pattern_size_byte = SBYTES (string);
facdc750
RS
1292 }
1293 else if (multibyte)
1294 {
d5db4077 1295 raw_pattern_size = SCHARS (string);
facdc750 1296 raw_pattern_size_byte
d5db4077 1297 = count_size_as_multibyte (SDATA (string),
facdc750 1298 raw_pattern_size);
7276d3d8 1299 raw_pattern = (unsigned char *) alloca (raw_pattern_size_byte + 1);
d5db4077
KR
1300 copy_text (SDATA (string), raw_pattern,
1301 SCHARS (string), 0, 1);
facdc750
RS
1302 }
1303 else
1304 {
1305 /* Converting multibyte to single-byte.
1306
1307 ??? Perhaps this conversion should be done in a special way
1308 by subtracting nonascii-insert-offset from each non-ASCII char,
1309 so that only the multibyte chars which really correspond to
1310 the chosen single-byte character set can possibly match. */
d5db4077
KR
1311 raw_pattern_size = SCHARS (string);
1312 raw_pattern_size_byte = SCHARS (string);
7276d3d8 1313 raw_pattern = (unsigned char *) alloca (raw_pattern_size + 1);
d5db4077
KR
1314 copy_text (SDATA (string), raw_pattern,
1315 SBYTES (string), 1, 0);
facdc750
RS
1316 }
1317
1318 /* Copy and optionally translate the pattern. */
1319 len = raw_pattern_size;
1320 len_byte = raw_pattern_size_byte;
f5f7578b 1321 patbuf = (unsigned char *) alloca (len * MAX_MULTIBYTE_LENGTH);
facdc750
RS
1322 pat = patbuf;
1323 base_pat = raw_pattern;
1324 if (multibyte)
1325 {
a17ae96a
KH
1326 /* Fill patbuf by translated characters in STRING while
1327 checking if we can use boyer-moore search. If TRT is
1328 non-nil, we can use boyer-moore search only if TRT can be
1329 represented by the byte array of 256 elements. For that,
1330 all non-ASCII case-equivalents of all case-senstive
1331 characters in STRING must belong to the same charset and
1332 row. */
1333
facdc750
RS
1334 while (--len >= 0)
1335 {
a17ae96a 1336 unsigned char str_base[MAX_MULTIBYTE_LENGTH], *str;
aff2ce94 1337 int c, translated, inverse;
a17ae96a 1338 int in_charlen, charlen;
facdc750
RS
1339
1340 /* If we got here and the RE flag is set, it's because we're
1341 dealing with a regexp known to be trivial, so the backslash
1342 just quotes the next character. */
1343 if (RE && *base_pat == '\\')
1344 {
1345 len--;
62221f22 1346 raw_pattern_size--;
facdc750
RS
1347 len_byte--;
1348 base_pat++;
1349 }
1350
62a6e103 1351 c = STRING_CHAR_AND_LENGTH (base_pat, in_charlen);
040272ce 1352
a17ae96a 1353 if (NILP (trt))
facdc750 1354 {
a17ae96a
KH
1355 str = base_pat;
1356 charlen = in_charlen;
1357 }
1358 else
1359 {
1360 /* Translate the character. */
1361 TRANSLATE (translated, trt, c);
1362 charlen = CHAR_STRING (translated, str_base);
1363 str = str_base;
1364
1365 /* Check if C has any other case-equivalents. */
1366 TRANSLATE (inverse, inverse_trt, c);
1367 /* If so, check if we can use boyer-moore. */
1368 if (c != inverse && boyer_moore_ok)
1369 {
1370 /* Check if all equivalents belong to the same
1371 group of characters. Note that the check of C
49e44e81
KH
1372 itself is done by the last iteration. */
1373 int this_char_base = -1;
1374
1375 while (boyer_moore_ok)
a17ae96a 1376 {
49e44e81 1377 if (ASCII_BYTE_P (inverse))
a17ae96a 1378 {
49e44e81
KH
1379 if (this_char_base > 0)
1380 boyer_moore_ok = 0;
1381 else
dafaabd1 1382 this_char_base = 0;
a17ae96a 1383 }
49e44e81
KH
1384 else if (CHAR_BYTE8_P (inverse))
1385 /* Boyer-moore search can't handle a
1386 translation of an eight-bit
1387 character. */
1388 boyer_moore_ok = 0;
1389 else if (this_char_base < 0)
1390 {
1391 this_char_base = inverse & ~0x3F;
1392 if (char_base < 0)
1393 char_base = this_char_base;
dafaabd1 1394 else if (this_char_base != char_base)
49e44e81
KH
1395 boyer_moore_ok = 0;
1396 }
1397 else if ((inverse & ~0x3F) != this_char_base)
1398 boyer_moore_ok = 0;
a17ae96a
KH
1399 if (c == inverse)
1400 break;
1401 TRANSLATE (inverse, inverse_trt, inverse);
1402 }
1403 }
aff2ce94 1404 }
facdc750
RS
1405
1406 /* Store this character into the translated pattern. */
72af86bd 1407 memcpy (pat, str, charlen);
a17ae96a 1408 pat += charlen;
facdc750
RS
1409 base_pat += in_charlen;
1410 len_byte -= in_charlen;
1411 }
dafaabd1
AS
1412
1413 /* If char_base is still negative we didn't find any translated
1414 non-ASCII characters. */
1415 if (char_base < 0)
1416 char_base = 0;
facdc750
RS
1417 }
1418 else
1419 {
040272ce 1420 /* Unibyte buffer. */
a17ae96a 1421 char_base = 0;
facdc750
RS
1422 while (--len >= 0)
1423 {
040272ce 1424 int c, translated;
facdc750
RS
1425
1426 /* If we got here and the RE flag is set, it's because we're
1427 dealing with a regexp known to be trivial, so the backslash
1428 just quotes the next character. */
1429 if (RE && *base_pat == '\\')
1430 {
1431 len--;
0190922f 1432 raw_pattern_size--;
facdc750
RS
1433 base_pat++;
1434 }
1435 c = *base_pat++;
aff2ce94 1436 TRANSLATE (translated, trt, c);
facdc750
RS
1437 *pat++ = translated;
1438 }
1439 }
1440
1441 len_byte = pat - patbuf;
1442 len = raw_pattern_size;
1443 pat = base_pat = patbuf;
1444
040272ce 1445 if (boyer_moore_ok)
facdc750 1446 return boyer_moore (n, pat, len, len_byte, trt, inverse_trt,
aff2ce94 1447 pos, pos_byte, lim, lim_byte,
a17ae96a 1448 char_base);
facdc750
RS
1449 else
1450 return simple_search (n, pat, len, len_byte, trt,
1451 pos, pos_byte, lim, lim_byte);
1452 }
1453}
1454\f
1455/* Do a simple string search N times for the string PAT,
1456 whose length is LEN/LEN_BYTE,
1457 from buffer position POS/POS_BYTE until LIM/LIM_BYTE.
1458 TRT is the translation table.
f8bd51c4 1459
facdc750
RS
1460 Return the character position where the match is found.
1461 Otherwise, if M matches remained to be found, return -M.
f8bd51c4 1462
facdc750
RS
1463 This kind of search works regardless of what is in PAT and
1464 regardless of what is in TRT. It is used in cases where
1465 boyer_moore cannot work. */
1466
e7deaab0 1467static EMACS_INT
971de7fb 1468simple_search (int n, unsigned char *pat, int len, int len_byte, Lisp_Object trt, EMACS_INT pos, EMACS_INT pos_byte, EMACS_INT lim, EMACS_INT lim_byte)
facdc750
RS
1469{
1470 int multibyte = ! NILP (current_buffer->enable_multibyte_characters);
ab228c24 1471 int forward = n > 0;
49e44e81
KH
1472 /* Number of buffer bytes matched. Note that this may be different
1473 from len_byte in a multibyte buffer. */
1474 int match_byte;
facdc750
RS
1475
1476 if (lim > pos && multibyte)
1477 while (n > 0)
1478 {
1479 while (1)
f8bd51c4 1480 {
facdc750 1481 /* Try matching at position POS. */
e7deaab0
AS
1482 EMACS_INT this_pos = pos;
1483 EMACS_INT this_pos_byte = pos_byte;
facdc750 1484 int this_len = len;
facdc750 1485 unsigned char *p = pat;
a9469498 1486 if (pos + len > lim || pos_byte + len_byte > lim_byte)
facdc750
RS
1487 goto stop;
1488
1489 while (this_len > 0)
1490 {
1491 int charlen, buf_charlen;
ab228c24 1492 int pat_ch, buf_ch;
facdc750 1493
62a6e103 1494 pat_ch = STRING_CHAR_AND_LENGTH (p, charlen);
facdc750 1495 buf_ch = STRING_CHAR_AND_LENGTH (BYTE_POS_ADDR (this_pos_byte),
facdc750 1496 buf_charlen);
aff2ce94 1497 TRANSLATE (buf_ch, trt, buf_ch);
facdc750
RS
1498
1499 if (buf_ch != pat_ch)
1500 break;
ab228c24 1501
ab228c24
RS
1502 this_len--;
1503 p += charlen;
1504
1505 this_pos_byte += buf_charlen;
1506 this_pos++;
facdc750
RS
1507 }
1508
1509 if (this_len == 0)
1510 {
49e44e81 1511 match_byte = this_pos_byte - pos_byte;
facdc750 1512 pos += len;
49e44e81 1513 pos_byte += match_byte;
facdc750
RS
1514 break;
1515 }
1516
1517 INC_BOTH (pos, pos_byte);
f8bd51c4 1518 }
facdc750
RS
1519
1520 n--;
1521 }
1522 else if (lim > pos)
1523 while (n > 0)
1524 {
1525 while (1)
f8bd51c4 1526 {
facdc750 1527 /* Try matching at position POS. */
e7deaab0 1528 EMACS_INT this_pos = pos;
facdc750
RS
1529 int this_len = len;
1530 unsigned char *p = pat;
1531
1532 if (pos + len > lim)
1533 goto stop;
1534
1535 while (this_len > 0)
1536 {
1537 int pat_ch = *p++;
1538 int buf_ch = FETCH_BYTE (this_pos);
aff2ce94 1539 TRANSLATE (buf_ch, trt, buf_ch);
facdc750
RS
1540
1541 if (buf_ch != pat_ch)
1542 break;
ab228c24
RS
1543
1544 this_len--;
1545 this_pos++;
facdc750
RS
1546 }
1547
1548 if (this_len == 0)
1549 {
49e44e81 1550 match_byte = len;
facdc750
RS
1551 pos += len;
1552 break;
1553 }
1554
1555 pos++;
f8bd51c4 1556 }
facdc750
RS
1557
1558 n--;
1559 }
1560 /* Backwards search. */
1561 else if (lim < pos && multibyte)
1562 while (n < 0)
1563 {
1564 while (1)
f8bd51c4 1565 {
facdc750 1566 /* Try matching at position POS. */
8b264ecb
AS
1567 EMACS_INT this_pos = pos;
1568 EMACS_INT this_pos_byte = pos_byte;
facdc750 1569 int this_len = len;
c525b3f2 1570 const unsigned char *p = pat + len_byte;
facdc750 1571
8b264ecb 1572 if (this_pos - len < lim || (pos_byte - len_byte) < lim_byte)
facdc750
RS
1573 goto stop;
1574
1575 while (this_len > 0)
1576 {
8b264ecb 1577 int charlen;
ab228c24 1578 int pat_ch, buf_ch;
facdc750 1579
8b264ecb
AS
1580 DEC_BOTH (this_pos, this_pos_byte);
1581 PREV_CHAR_BOUNDARY (p, pat);
1582 pat_ch = STRING_CHAR (p);
1583 buf_ch = STRING_CHAR (BYTE_POS_ADDR (this_pos_byte));
aff2ce94 1584 TRANSLATE (buf_ch, trt, buf_ch);
facdc750
RS
1585
1586 if (buf_ch != pat_ch)
1587 break;
ab228c24 1588
ab228c24 1589 this_len--;
facdc750
RS
1590 }
1591
1592 if (this_len == 0)
1593 {
8b264ecb
AS
1594 match_byte = pos_byte - this_pos_byte;
1595 pos = this_pos;
1596 pos_byte = this_pos_byte;
facdc750
RS
1597 break;
1598 }
1599
1600 DEC_BOTH (pos, pos_byte);
f8bd51c4
KH
1601 }
1602
facdc750
RS
1603 n++;
1604 }
1605 else if (lim < pos)
1606 while (n < 0)
1607 {
1608 while (1)
b6d6a51c 1609 {
facdc750 1610 /* Try matching at position POS. */
e7deaab0 1611 EMACS_INT this_pos = pos - len;
facdc750
RS
1612 int this_len = len;
1613 unsigned char *p = pat;
1614
a9469498 1615 if (this_pos < lim)
facdc750
RS
1616 goto stop;
1617
1618 while (this_len > 0)
1619 {
1620 int pat_ch = *p++;
1621 int buf_ch = FETCH_BYTE (this_pos);
aff2ce94 1622 TRANSLATE (buf_ch, trt, buf_ch);
facdc750
RS
1623
1624 if (buf_ch != pat_ch)
1625 break;
ab228c24
RS
1626 this_len--;
1627 this_pos++;
facdc750
RS
1628 }
1629
1630 if (this_len == 0)
b6d6a51c 1631 {
49e44e81 1632 match_byte = len;
facdc750
RS
1633 pos -= len;
1634 break;
b6d6a51c 1635 }
facdc750
RS
1636
1637 pos--;
b6d6a51c 1638 }
facdc750
RS
1639
1640 n++;
b6d6a51c 1641 }
facdc750
RS
1642
1643 stop:
1644 if (n == 0)
aff2ce94 1645 {
ab228c24 1646 if (forward)
49e44e81 1647 set_search_regs ((multibyte ? pos_byte : pos) - match_byte, match_byte);
ab228c24 1648 else
49e44e81 1649 set_search_regs (multibyte ? pos_byte : pos, match_byte);
aff2ce94
RS
1650
1651 return pos;
1652 }
facdc750
RS
1653 else if (n > 0)
1654 return -n;
1655 else
1656 return n;
1657}
1658\f
0190922f 1659/* Do Boyer-Moore search N times for the string BASE_PAT,
facdc750
RS
1660 whose length is LEN/LEN_BYTE,
1661 from buffer position POS/POS_BYTE until LIM/LIM_BYTE.
1662 DIRECTION says which direction we search in.
1663 TRT and INVERSE_TRT are translation tables.
0190922f 1664 Characters in PAT are already translated by TRT.
facdc750 1665
0190922f
KH
1666 This kind of search works if all the characters in BASE_PAT that
1667 have nontrivial translation are the same aside from the last byte.
1668 This makes it possible to translate just the last byte of a
1669 character, and do so after just a simple test of the context.
b2e6b10f 1670 CHAR_BASE is nonzero if there is such a non-ASCII character.
facdc750
RS
1671
1672 If that criterion is not satisfied, do not call this function. */
1673
e7deaab0 1674static EMACS_INT
facdc750 1675boyer_moore (n, base_pat, len, len_byte, trt, inverse_trt,
a17ae96a 1676 pos, pos_byte, lim, lim_byte, char_base)
facdc750
RS
1677 int n;
1678 unsigned char *base_pat;
1679 int len, len_byte;
1680 Lisp_Object trt;
1681 Lisp_Object inverse_trt;
e7deaab0
AS
1682 EMACS_INT pos, pos_byte;
1683 EMACS_INT lim, lim_byte;
a17ae96a 1684 int char_base;
facdc750
RS
1685{
1686 int direction = ((n > 0) ? 1 : -1);
1687 register int dirlen;
e7deaab0
AS
1688 EMACS_INT limit;
1689 int stride_for_teases = 0;
f4646fff 1690 int BM_tab[0400];
177c0ea7 1691 register unsigned char *cursor, *p_limit;
facdc750 1692 register int i, j;
cb6792d2 1693 unsigned char *pat, *pat_end;
facdc750
RS
1694 int multibyte = ! NILP (current_buffer->enable_multibyte_characters);
1695
1696 unsigned char simple_translate[0400];
0190922f 1697 /* These are set to the preceding bytes of a byte to be translated
49e44e81 1698 if char_base is nonzero. As the maximum byte length of a
a17ae96a 1699 multibyte character is 5, we have to check at most four previous
0190922f
KH
1700 bytes. */
1701 int translate_prev_byte1 = 0;
1702 int translate_prev_byte2 = 0;
1703 int translate_prev_byte3 = 0;
a17ae96a 1704 int translate_prev_byte4 = 0;
facdc750 1705
f4646fff
AS
1706 /* The general approach is that we are going to maintain that we know
1707 the first (closest to the present position, in whatever direction
1708 we're searching) character that could possibly be the last
1709 (furthest from present position) character of a valid match. We
1710 advance the state of our knowledge by looking at that character
1711 and seeing whether it indeed matches the last character of the
1712 pattern. If it does, we take a closer look. If it does not, we
1713 move our pointer (to putative last characters) as far as is
1714 logically possible. This amount of movement, which I call a
1715 stride, will be the length of the pattern if the actual character
1716 appears nowhere in the pattern, otherwise it will be the distance
1717 from the last occurrence of that character to the end of the
1718 pattern. If the amount is zero we have a possible match. */
1719
1720 /* Here we make a "mickey mouse" BM table. The stride of the search
1721 is determined only by the last character of the putative match.
1722 If that character does not match, we will stride the proper
1723 distance to propose a match that superimposes it on the last
1724 instance of a character that matches it (per trt), or misses
1725 it entirely if there is none. */
facdc750
RS
1726
1727 dirlen = len_byte * direction;
cb6792d2
RS
1728
1729 /* Record position after the end of the pattern. */
1730 pat_end = base_pat + len_byte;
1731 /* BASE_PAT points to a character that we start scanning from.
1732 It is the first character in a forward search,
1733 the last character in a backward search. */
facdc750 1734 if (direction < 0)
cb6792d2
RS
1735 base_pat = pat_end - 1;
1736
f4646fff
AS
1737 /* A character that does not appear in the pattern induces a
1738 stride equal to the pattern length. */
1739 for (i = 0; i < 0400; i++)
1740 BM_tab[i] = dirlen;
facdc750
RS
1741
1742 /* We use this for translation, instead of TRT itself.
1743 We fill this in to handle the characters that actually
1744 occur in the pattern. Others don't matter anyway! */
facdc750
RS
1745 for (i = 0; i < 0400; i++)
1746 simple_translate[i] = i;
1747
a17ae96a 1748 if (char_base)
0190922f 1749 {
a17ae96a 1750 /* Setup translate_prev_byte1/2/3/4 from CHAR_BASE. Only a
0190922f 1751 byte following them are the target of translation. */
0190922f 1752 unsigned char str[MAX_MULTIBYTE_LENGTH];
a17ae96a 1753 int len = CHAR_STRING (char_base, str);
0190922f
KH
1754
1755 translate_prev_byte1 = str[len - 2];
1756 if (len > 2)
1757 {
1758 translate_prev_byte2 = str[len - 3];
1759 if (len > 3)
a17ae96a
KH
1760 {
1761 translate_prev_byte3 = str[len - 4];
1762 if (len > 4)
1763 translate_prev_byte4 = str[len - 5];
1764 }
0190922f
KH
1765 }
1766 }
1767
facdc750 1768 i = 0;
f4646fff 1769 while (i != dirlen)
facdc750 1770 {
cb6792d2 1771 unsigned char *ptr = base_pat + i;
facdc750 1772 i += direction;
facdc750 1773 if (! NILP (trt))
ca1d1d23 1774 {
49e44e81
KH
1775 /* If the byte currently looking at is the last of a
1776 character to check case-equivalents, set CH to that
1777 character. An ASCII character and a non-ASCII character
1778 matching with CHAR_BASE are to be checked. */
a17ae96a
KH
1779 int ch = -1;
1780
1781 if (ASCII_BYTE_P (*ptr) || ! multibyte)
1782 ch = *ptr;
49e44e81 1783 else if (char_base
86dc6ccb 1784 && ((pat_end - ptr) == 1 || CHAR_HEAD_P (ptr[1])))
ca1d1d23 1785 {
49e44e81
KH
1786 unsigned char *charstart = ptr - 1;
1787
1788 while (! (CHAR_HEAD_P (*charstart)))
1789 charstart--;
62a6e103 1790 ch = STRING_CHAR (charstart);
a17ae96a
KH
1791 if (char_base != (ch & ~0x3F))
1792 ch = -1;
ca1d1d23 1793 }
facdc750 1794
f51995fa 1795 if (ch >= 0200)
49e44e81
KH
1796 j = (ch & 0x3F) | 0200;
1797 else
1798 j = *ptr;
1799
f4646fff 1800 if (i == dirlen)
facdc750 1801 stride_for_teases = BM_tab[j];
ab228c24 1802
facdc750
RS
1803 BM_tab[j] = dirlen - i;
1804 /* A translation table is accompanied by its inverse -- see */
177c0ea7 1805 /* comment following downcase_table for details */
a17ae96a 1806 if (ch >= 0)
ab228c24
RS
1807 {
1808 int starting_ch = ch;
49e44e81 1809 int starting_j = j;
a17ae96a 1810
ab228c24
RS
1811 while (1)
1812 {
1813 TRANSLATE (ch, inverse_trt, ch);
f51995fa 1814 if (ch >= 0200)
49e44e81 1815 j = (ch & 0x3F) | 0200;
ab228c24 1816 else
a17ae96a 1817 j = ch;
ab228c24
RS
1818
1819 /* For all the characters that map into CH,
1820 set up simple_translate to map the last byte
1821 into STARTING_J. */
1822 simple_translate[j] = starting_j;
1823 if (ch == starting_ch)
1824 break;
1825 BM_tab[j] = dirlen - i;
1826 }
1827 }
facdc750
RS
1828 }
1829 else
1830 {
1831 j = *ptr;
1832
f4646fff 1833 if (i == dirlen)
facdc750
RS
1834 stride_for_teases = BM_tab[j];
1835 BM_tab[j] = dirlen - i;
ca1d1d23 1836 }
f4646fff
AS
1837 /* stride_for_teases tells how much to stride if we get a
1838 match on the far character but are subsequently
1839 disappointed, by recording what the stride would have been
1840 for that character if the last character had been
1841 different. */
facdc750 1842 }
facdc750
RS
1843 pos_byte += dirlen - ((direction > 0) ? direction : 0);
1844 /* loop invariant - POS_BYTE points at where last char (first
1845 char if reverse) of pattern would align in a possible match. */
1846 while (n != 0)
1847 {
e7deaab0 1848 EMACS_INT tail_end;
facdc750
RS
1849 unsigned char *tail_end_ptr;
1850
1851 /* It's been reported that some (broken) compiler thinks that
1852 Boolean expressions in an arithmetic context are unsigned.
1853 Using an explicit ?1:0 prevents this. */
1854 if ((lim_byte - pos_byte - ((direction > 0) ? 1 : 0)) * direction
1855 < 0)
1856 return (n * (0 - direction));
1857 /* First we do the part we can by pointers (maybe nothing) */
1858 QUIT;
1859 pat = base_pat;
1860 limit = pos_byte - dirlen + direction;
67ce527d
KH
1861 if (direction > 0)
1862 {
1863 limit = BUFFER_CEILING_OF (limit);
1864 /* LIMIT is now the last (not beyond-last!) value POS_BYTE
1865 can take on without hitting edge of buffer or the gap. */
1866 limit = min (limit, pos_byte + 20000);
1867 limit = min (limit, lim_byte - 1);
1868 }
1869 else
1870 {
1871 limit = BUFFER_FLOOR_OF (limit);
1872 /* LIMIT is now the last (not beyond-last!) value POS_BYTE
1873 can take on without hitting edge of buffer or the gap. */
1874 limit = max (limit, pos_byte - 20000);
1875 limit = max (limit, lim_byte);
1876 }
facdc750
RS
1877 tail_end = BUFFER_CEILING_OF (pos_byte) + 1;
1878 tail_end_ptr = BYTE_POS_ADDR (tail_end);
1879
1880 if ((limit - pos_byte) * direction > 20)
ca1d1d23 1881 {
facdc750
RS
1882 unsigned char *p2;
1883
1884 p_limit = BYTE_POS_ADDR (limit);
1885 p2 = (cursor = BYTE_POS_ADDR (pos_byte));
f4646fff 1886 /* In this loop, pos + cursor - p2 is the surrogate for pos. */
facdc750 1887 while (1) /* use one cursor setting as long as i can */
ca1d1d23 1888 {
facdc750 1889 if (direction > 0) /* worth duplicating */
ca1d1d23 1890 {
f4646fff
AS
1891 while (cursor <= p_limit)
1892 {
1893 if (BM_tab[*cursor] == 0)
1894 goto hit;
facdc750 1895 cursor += BM_tab[*cursor];
f4646fff 1896 }
facdc750
RS
1897 }
1898 else
1899 {
f4646fff
AS
1900 while (cursor >= p_limit)
1901 {
1902 if (BM_tab[*cursor] == 0)
1903 goto hit;
facdc750 1904 cursor += BM_tab[*cursor];
f4646fff 1905 }
facdc750 1906 }
f4646fff
AS
1907 /* If you are here, cursor is beyond the end of the
1908 searched region. You fail to match within the
1909 permitted region and would otherwise try a character
1910 beyond that region. */
1911 break;
1912
1913 hit:
facdc750
RS
1914 i = dirlen - direction;
1915 if (! NILP (trt))
1916 {
1917 while ((i -= direction) + direction != 0)
ca1d1d23 1918 {
facdc750
RS
1919 int ch;
1920 cursor -= direction;
1921 /* Translate only the last byte of a character. */
1922 if (! multibyte
1923 || ((cursor == tail_end_ptr
1924 || CHAR_HEAD_P (cursor[1]))
1925 && (CHAR_HEAD_P (cursor[0])
0190922f
KH
1926 /* Check if this is the last byte of
1927 a translable character. */
1928 || (translate_prev_byte1 == cursor[-1]
1929 && (CHAR_HEAD_P (translate_prev_byte1)
1930 || (translate_prev_byte2 == cursor[-2]
1931 && (CHAR_HEAD_P (translate_prev_byte2)
1932 || (translate_prev_byte3 == cursor[-3]))))))))
facdc750
RS
1933 ch = simple_translate[*cursor];
1934 else
1935 ch = *cursor;
1936 if (pat[i] != ch)
1937 break;
ca1d1d23 1938 }
facdc750
RS
1939 }
1940 else
1941 {
1942 while ((i -= direction) + direction != 0)
ca1d1d23 1943 {
facdc750
RS
1944 cursor -= direction;
1945 if (pat[i] != *cursor)
1946 break;
ca1d1d23 1947 }
facdc750
RS
1948 }
1949 cursor += dirlen - i - direction; /* fix cursor */
1950 if (i + direction == 0)
1951 {
e7deaab0 1952 EMACS_INT position, start, end;
0c8533c6 1953
facdc750 1954 cursor -= direction;
1113d9db 1955
facdc750
RS
1956 position = pos_byte + cursor - p2 + ((direction > 0)
1957 ? 1 - len_byte : 0);
1958 set_search_regs (position, len_byte);
ca325161 1959
ef887810
RS
1960 if (NILP (Vinhibit_changing_match_data))
1961 {
1962 start = search_regs.start[0];
1963 end = search_regs.end[0];
1964 }
1965 else
1966 /* If Vinhibit_changing_match_data is non-nil,
1967 search_regs will not be changed. So let's
1968 compute start and end here. */
1969 {
1970 start = BYTE_TO_CHAR (position);
1971 end = BYTE_TO_CHAR (position + len_byte);
1972 }
1973
facdc750
RS
1974 if ((n -= direction) != 0)
1975 cursor += dirlen; /* to resume search */
ca1d1d23 1976 else
ef887810 1977 return direction > 0 ? end : start;
ca1d1d23 1978 }
facdc750
RS
1979 else
1980 cursor += stride_for_teases; /* <sigh> we lose - */
ca1d1d23 1981 }
facdc750
RS
1982 pos_byte += cursor - p2;
1983 }
1984 else
f4646fff
AS
1985 /* Now we'll pick up a clump that has to be done the hard
1986 way because it covers a discontinuity. */
facdc750
RS
1987 {
1988 limit = ((direction > 0)
1989 ? BUFFER_CEILING_OF (pos_byte - dirlen + 1)
1990 : BUFFER_FLOOR_OF (pos_byte - dirlen - 1));
1991 limit = ((direction > 0)
1992 ? min (limit + len_byte, lim_byte - 1)
1993 : max (limit - len_byte, lim_byte));
1994 /* LIMIT is now the last value POS_BYTE can have
1995 and still be valid for a possible match. */
1996 while (1)
ca1d1d23 1997 {
f4646fff
AS
1998 /* This loop can be coded for space rather than
1999 speed because it will usually run only once.
2000 (the reach is at most len + 21, and typically
2001 does not exceed len). */
facdc750 2002 while ((limit - pos_byte) * direction >= 0)
f4646fff
AS
2003 {
2004 int ch = FETCH_BYTE (pos_byte);
2005 if (BM_tab[ch] == 0)
2006 goto hit2;
2007 pos_byte += BM_tab[ch];
2008 }
2009 break; /* ran off the end */
2010
2011 hit2:
2012 /* Found what might be a match. */
facdc750
RS
2013 i = dirlen - direction;
2014 while ((i -= direction) + direction != 0)
ca1d1d23 2015 {
facdc750
RS
2016 int ch;
2017 unsigned char *ptr;
2018 pos_byte -= direction;
2019 ptr = BYTE_POS_ADDR (pos_byte);
2020 /* Translate only the last byte of a character. */
2021 if (! multibyte
2022 || ((ptr == tail_end_ptr
2023 || CHAR_HEAD_P (ptr[1]))
2024 && (CHAR_HEAD_P (ptr[0])
0190922f
KH
2025 /* Check if this is the last byte of a
2026 translable character. */
2027 || (translate_prev_byte1 == ptr[-1]
2028 && (CHAR_HEAD_P (translate_prev_byte1)
2029 || (translate_prev_byte2 == ptr[-2]
2030 && (CHAR_HEAD_P (translate_prev_byte2)
2031 || translate_prev_byte3 == ptr[-3])))))))
facdc750
RS
2032 ch = simple_translate[*ptr];
2033 else
2034 ch = *ptr;
2035 if (pat[i] != ch)
2036 break;
2037 }
2038 /* Above loop has moved POS_BYTE part or all the way
2039 back to the first pos (last pos if reverse).
2040 Set it once again at the last (first if reverse) char. */
f4646fff 2041 pos_byte += dirlen - i - direction;
facdc750
RS
2042 if (i + direction == 0)
2043 {
e7deaab0 2044 EMACS_INT position, start, end;
facdc750 2045 pos_byte -= direction;
1113d9db 2046
facdc750 2047 position = pos_byte + ((direction > 0) ? 1 - len_byte : 0);
facdc750 2048 set_search_regs (position, len_byte);
ca325161 2049
ef887810
RS
2050 if (NILP (Vinhibit_changing_match_data))
2051 {
2052 start = search_regs.start[0];
2053 end = search_regs.end[0];
2054 }
2055 else
2056 /* If Vinhibit_changing_match_data is non-nil,
2057 search_regs will not be changed. So let's
2058 compute start and end here. */
2059 {
2060 start = BYTE_TO_CHAR (position);
2061 end = BYTE_TO_CHAR (position + len_byte);
2062 }
2063
facdc750
RS
2064 if ((n -= direction) != 0)
2065 pos_byte += dirlen; /* to resume search */
ca1d1d23 2066 else
ef887810 2067 return direction > 0 ? end : start;
ca1d1d23 2068 }
facdc750
RS
2069 else
2070 pos_byte += stride_for_teases;
2071 }
2072 }
2073 /* We have done one clump. Can we continue? */
2074 if ((lim_byte - pos_byte) * direction < 0)
2075 return ((0 - n) * direction);
ca1d1d23 2076 }
facdc750 2077 return BYTE_TO_CHAR (pos_byte);
ca1d1d23 2078}
ca325161 2079
fa8ed3e0 2080/* Record beginning BEG_BYTE and end BEG_BYTE + NBYTES
a7e4cdde
RS
2081 for the overall match just found in the current buffer.
2082 Also clear out the match data for registers 1 and up. */
ca325161
RS
2083
2084static void
971de7fb 2085set_search_regs (EMACS_INT beg_byte, EMACS_INT nbytes)
ca325161 2086{
a7e4cdde
RS
2087 int i;
2088
ef887810
RS
2089 if (!NILP (Vinhibit_changing_match_data))
2090 return;
2091
ca325161
RS
2092 /* Make sure we have registers in which to store
2093 the match position. */
2094 if (search_regs.num_regs == 0)
2095 {
2d4a771a
RS
2096 search_regs.start = (regoff_t *) xmalloc (2 * sizeof (regoff_t));
2097 search_regs.end = (regoff_t *) xmalloc (2 * sizeof (regoff_t));
487282dc 2098 search_regs.num_regs = 2;
ca325161
RS
2099 }
2100
a7e4cdde
RS
2101 /* Clear out the other registers. */
2102 for (i = 1; i < search_regs.num_regs; i++)
2103 {
2104 search_regs.start[i] = -1;
2105 search_regs.end[i] = -1;
2106 }
2107
fa8ed3e0
RS
2108 search_regs.start[0] = BYTE_TO_CHAR (beg_byte);
2109 search_regs.end[0] = BYTE_TO_CHAR (beg_byte + nbytes);
a3668d92 2110 XSETBUFFER (last_thing_searched, current_buffer);
ca325161 2111}
ca1d1d23 2112\f
c6e0e1cc
CY
2113/* Given STRING, a string of words separated by word delimiters,
2114 compute a regexp that matches those exact words separated by
2115 arbitrary punctuation. If LAX is nonzero, the end of the string
2116 need not match a word boundary unless it ends in whitespace. */
ca1d1d23
JB
2117
2118static Lisp_Object
971de7fb 2119wordify (Lisp_Object string, int lax)
ca1d1d23
JB
2120{
2121 register unsigned char *p, *o;
0c8533c6 2122 register int i, i_byte, len, punct_count = 0, word_count = 0;
ca1d1d23 2123 Lisp_Object val;
0c8533c6 2124 int prev_c = 0;
c6e0e1cc 2125 int adjust, whitespace_at_end;
ca1d1d23 2126
b7826503 2127 CHECK_STRING (string);
d5db4077
KR
2128 p = SDATA (string);
2129 len = SCHARS (string);
ca1d1d23 2130
0c8533c6
RS
2131 for (i = 0, i_byte = 0; i < len; )
2132 {
2133 int c;
177c0ea7 2134
93daa011 2135 FETCH_STRING_CHAR_AS_MULTIBYTE_ADVANCE (c, string, i, i_byte);
0c8533c6
RS
2136
2137 if (SYNTAX (c) != Sword)
2138 {
2139 punct_count++;
2140 if (i > 0 && SYNTAX (prev_c) == Sword)
2141 word_count++;
2142 }
ca1d1d23 2143
0c8533c6
RS
2144 prev_c = c;
2145 }
2146
2147 if (SYNTAX (prev_c) == Sword)
c6e0e1cc
CY
2148 {
2149 word_count++;
2150 whitespace_at_end = 0;
2151 }
2152 else
2153 whitespace_at_end = 1;
2154
0c8533c6 2155 if (!word_count)
c60416e0 2156 return empty_unibyte_string;
0c8533c6 2157
c6e0e1cc
CY
2158 adjust = - punct_count + 5 * (word_count - 1)
2159 + ((lax && !whitespace_at_end) ? 2 : 4);
8a2df937
RS
2160 if (STRING_MULTIBYTE (string))
2161 val = make_uninit_multibyte_string (len + adjust,
d5db4077 2162 SBYTES (string)
8a2df937
RS
2163 + adjust);
2164 else
2165 val = make_uninit_string (len + adjust);
ca1d1d23 2166
d5db4077 2167 o = SDATA (val);
ca1d1d23
JB
2168 *o++ = '\\';
2169 *o++ = 'b';
1e9582d4 2170 prev_c = 0;
ca1d1d23 2171
1e9582d4
RS
2172 for (i = 0, i_byte = 0; i < len; )
2173 {
2174 int c;
2175 int i_byte_orig = i_byte;
177c0ea7 2176
93daa011 2177 FETCH_STRING_CHAR_AS_MULTIBYTE_ADVANCE (c, string, i, i_byte);
1e9582d4
RS
2178
2179 if (SYNTAX (c) == Sword)
2180 {
72af86bd 2181 memcpy (o, SDATA (string) + i_byte_orig, i_byte - i_byte_orig);
1e9582d4
RS
2182 o += i_byte - i_byte_orig;
2183 }
2184 else if (i > 0 && SYNTAX (prev_c) == Sword && --word_count)
2185 {
2186 *o++ = '\\';
2187 *o++ = 'W';
2188 *o++ = '\\';
2189 *o++ = 'W';
2190 *o++ = '*';
2191 }
2192
2193 prev_c = c;
2194 }
ca1d1d23 2195
c6e0e1cc
CY
2196 if (!lax || whitespace_at_end)
2197 {
2198 *o++ = '\\';
2199 *o++ = 'b';
2200 }
ca1d1d23
JB
2201
2202 return val;
2203}
2204\f
2205DEFUN ("search-backward", Fsearch_backward, Ssearch_backward, 1, 4,
8c1a1077
PJ
2206 "MSearch backward: ",
2207 doc: /* Search backward from point for STRING.
2208Set point to the beginning of the occurrence found, and return point.
2209An optional second argument bounds the search; it is a buffer position.
2210The match found must not extend before that position.
2211Optional third argument, if t, means if fail just return nil (no error).
2212 If not nil and not t, position at limit of search and return nil.
2213Optional fourth argument is repeat count--search for successive occurrences.
2214
2215Search case-sensitivity is determined by the value of the variable
2216`case-fold-search', which see.
2217
2218See also the functions `match-beginning', `match-end' and `replace-match'. */)
2219 (string, bound, noerror, count)
ca1d1d23
JB
2220 Lisp_Object string, bound, noerror, count;
2221{
b819a390 2222 return search_command (string, bound, noerror, count, -1, 0, 0);
ca1d1d23
JB
2223}
2224
6af43974 2225DEFUN ("search-forward", Fsearch_forward, Ssearch_forward, 1, 4, "MSearch: ",
8c1a1077
PJ
2226 doc: /* Search forward from point for STRING.
2227Set point to the end of the occurrence found, and return point.
2228An optional second argument bounds the search; it is a buffer position.
d6aacdcd 2229The match found must not extend after that position. A value of nil is
48740f01 2230 equivalent to (point-max).
8c1a1077
PJ
2231Optional third argument, if t, means if fail just return nil (no error).
2232 If not nil and not t, move to limit of search and return nil.
2233Optional fourth argument is repeat count--search for successive occurrences.
2234
2235Search case-sensitivity is determined by the value of the variable
2236`case-fold-search', which see.
2237
2238See also the functions `match-beginning', `match-end' and `replace-match'. */)
2239 (string, bound, noerror, count)
ca1d1d23
JB
2240 Lisp_Object string, bound, noerror, count;
2241{
b819a390 2242 return search_command (string, bound, noerror, count, 1, 0, 0);
ca1d1d23
JB
2243}
2244
2245DEFUN ("word-search-backward", Fword_search_backward, Sword_search_backward, 1, 4,
8c1a1077
PJ
2246 "sWord search backward: ",
2247 doc: /* Search backward from point for STRING, ignoring differences in punctuation.
2248Set point to the beginning of the occurrence found, and return point.
2249An optional second argument bounds the search; it is a buffer position.
2250The match found must not extend before that position.
2251Optional third argument, if t, means if fail just return nil (no error).
2252 If not nil and not t, move to limit of search and return nil.
2253Optional fourth argument is repeat count--search for successive occurrences. */)
2254 (string, bound, noerror, count)
ca1d1d23
JB
2255 Lisp_Object string, bound, noerror, count;
2256{
c6e0e1cc 2257 return search_command (wordify (string, 0), bound, noerror, count, -1, 1, 0);
ca1d1d23
JB
2258}
2259
2260DEFUN ("word-search-forward", Fword_search_forward, Sword_search_forward, 1, 4,
8c1a1077
PJ
2261 "sWord search: ",
2262 doc: /* Search forward from point for STRING, ignoring differences in punctuation.
2263Set point to the end of the occurrence found, and return point.
2264An optional second argument bounds the search; it is a buffer position.
2265The match found must not extend after that position.
2266Optional third argument, if t, means if fail just return nil (no error).
2267 If not nil and not t, move to limit of search and return nil.
2268Optional fourth argument is repeat count--search for successive occurrences. */)
2269 (string, bound, noerror, count)
ca1d1d23
JB
2270 Lisp_Object string, bound, noerror, count;
2271{
c6e0e1cc
CY
2272 return search_command (wordify (string, 0), bound, noerror, count, 1, 1, 0);
2273}
2274
2275DEFUN ("word-search-backward-lax", Fword_search_backward_lax, Sword_search_backward_lax, 1, 4,
2276 "sWord search backward: ",
2277 doc: /* Search backward from point for STRING, ignoring differences in punctuation.
2278Set point to the beginning of the occurrence found, and return point.
2279
2280Unlike `word-search-backward', the end of STRING need not match a word
2281boundary unless it ends in whitespace.
2282
2283An optional second argument bounds the search; it is a buffer position.
2284The match found must not extend before that position.
2285Optional third argument, if t, means if fail just return nil (no error).
2286 If not nil and not t, move to limit of search and return nil.
2287Optional fourth argument is repeat count--search for successive occurrences. */)
2288 (string, bound, noerror, count)
2289 Lisp_Object string, bound, noerror, count;
2290{
2291 return search_command (wordify (string, 1), bound, noerror, count, -1, 1, 0);
2292}
2293
2294DEFUN ("word-search-forward-lax", Fword_search_forward_lax, Sword_search_forward_lax, 1, 4,
2295 "sWord search: ",
2296 doc: /* Search forward from point for STRING, ignoring differences in punctuation.
2297Set point to the end of the occurrence found, and return point.
2298
2299Unlike `word-search-forward', the end of STRING need not match a word
2300boundary unless it ends in whitespace.
2301
2302An optional second argument bounds the search; it is a buffer position.
2303The match found must not extend after that position.
2304Optional third argument, if t, means if fail just return nil (no error).
2305 If not nil and not t, move to limit of search and return nil.
2306Optional fourth argument is repeat count--search for successive occurrences. */)
2307 (string, bound, noerror, count)
2308 Lisp_Object string, bound, noerror, count;
2309{
2310 return search_command (wordify (string, 1), bound, noerror, count, 1, 1, 0);
ca1d1d23
JB
2311}
2312
2313DEFUN ("re-search-backward", Fre_search_backward, Sre_search_backward, 1, 4,
8c1a1077
PJ
2314 "sRE search backward: ",
2315 doc: /* Search backward from point for match for regular expression REGEXP.
2316Set point to the beginning of the match, and return point.
2317The match found is the one starting last in the buffer
2318and yet ending before the origin of the search.
2319An optional second argument bounds the search; it is a buffer position.
2320The match found must start at or after that position.
2321Optional third argument, if t, means if fail just return nil (no error).
2322 If not nil and not t, move to limit of search and return nil.
2323Optional fourth argument is repeat count--search for successive occurrences.
2324See also the functions `match-beginning', `match-end', `match-string',
2325and `replace-match'. */)
2326 (regexp, bound, noerror, count)
19c0a730 2327 Lisp_Object regexp, bound, noerror, count;
ca1d1d23 2328{
b819a390 2329 return search_command (regexp, bound, noerror, count, -1, 1, 0);
ca1d1d23
JB
2330}
2331
2332DEFUN ("re-search-forward", Fre_search_forward, Sre_search_forward, 1, 4,
8c1a1077
PJ
2333 "sRE search: ",
2334 doc: /* Search forward from point for regular expression REGEXP.
2335Set point to the end of the occurrence found, and return point.
2336An optional second argument bounds the search; it is a buffer position.
2337The match found must not extend after that position.
2338Optional third argument, if t, means if fail just return nil (no error).
2339 If not nil and not t, move to limit of search and return nil.
2340Optional fourth argument is repeat count--search for successive occurrences.
2341See also the functions `match-beginning', `match-end', `match-string',
2342and `replace-match'. */)
2343 (regexp, bound, noerror, count)
19c0a730 2344 Lisp_Object regexp, bound, noerror, count;
ca1d1d23 2345{
b819a390
RS
2346 return search_command (regexp, bound, noerror, count, 1, 1, 0);
2347}
2348
2349DEFUN ("posix-search-backward", Fposix_search_backward, Sposix_search_backward, 1, 4,
8c1a1077
PJ
2350 "sPosix search backward: ",
2351 doc: /* Search backward from point for match for regular expression REGEXP.
2352Find the longest match in accord with Posix regular expression rules.
2353Set point to the beginning of the match, and return point.
2354The match found is the one starting last in the buffer
2355and yet ending before the origin of the search.
2356An optional second argument bounds the search; it is a buffer position.
2357The match found must start at or after that position.
2358Optional third argument, if t, means if fail just return nil (no error).
2359 If not nil and not t, move to limit of search and return nil.
2360Optional fourth argument is repeat count--search for successive occurrences.
2361See also the functions `match-beginning', `match-end', `match-string',
2362and `replace-match'. */)
2363 (regexp, bound, noerror, count)
b819a390
RS
2364 Lisp_Object regexp, bound, noerror, count;
2365{
2366 return search_command (regexp, bound, noerror, count, -1, 1, 1);
2367}
2368
2369DEFUN ("posix-search-forward", Fposix_search_forward, Sposix_search_forward, 1, 4,
8c1a1077
PJ
2370 "sPosix search: ",
2371 doc: /* Search forward from point for regular expression REGEXP.
2372Find the longest match in accord with Posix regular expression rules.
2373Set point to the end of the occurrence found, and return point.
2374An optional second argument bounds the search; it is a buffer position.
2375The match found must not extend after that position.
2376Optional third argument, if t, means if fail just return nil (no error).
2377 If not nil and not t, move to limit of search and return nil.
2378Optional fourth argument is repeat count--search for successive occurrences.
2379See also the functions `match-beginning', `match-end', `match-string',
2380and `replace-match'. */)
2381 (regexp, bound, noerror, count)
b819a390
RS
2382 Lisp_Object regexp, bound, noerror, count;
2383{
2384 return search_command (regexp, bound, noerror, count, 1, 1, 1);
ca1d1d23
JB
2385}
2386\f
d7a5ad5f 2387DEFUN ("replace-match", Freplace_match, Sreplace_match, 1, 5, 0,
8c1a1077 2388 doc: /* Replace text matched by last search with NEWTEXT.
4dd0c271
RS
2389Leave point at the end of the replacement text.
2390
8c1a1077
PJ
2391If second arg FIXEDCASE is non-nil, do not alter case of replacement text.
2392Otherwise maybe capitalize the whole text, or maybe just word initials,
2393based on the replaced text.
2394If the replaced text has only capital letters
2395and has at least one multiletter word, convert NEWTEXT to all caps.
4dd0c271
RS
2396Otherwise if all words are capitalized in the replaced text,
2397capitalize each word in NEWTEXT.
8c1a1077
PJ
2398
2399If third arg LITERAL is non-nil, insert NEWTEXT literally.
2400Otherwise treat `\\' as special:
2401 `\\&' in NEWTEXT means substitute original matched text.
2402 `\\N' means substitute what matched the Nth `\\(...\\)'.
2403 If Nth parens didn't match, substitute nothing.
2404 `\\\\' means insert one `\\'.
4dd0c271
RS
2405Case conversion does not apply to these substitutions.
2406
8c1a1077 2407FIXEDCASE and LITERAL are optional arguments.
8c1a1077
PJ
2408
2409The optional fourth argument STRING can be a string to modify.
2410This is meaningful when the previous match was done against STRING,
2411using `string-match'. When used this way, `replace-match'
2412creates and returns a new string made by copying STRING and replacing
2413the part of STRING that was matched.
2414
2415The optional fifth argument SUBEXP specifies a subexpression;
2416it says to replace just that subexpression with NEWTEXT,
2417rather than replacing the entire matched text.
2418This is, in a vague sense, the inverse of using `\\N' in NEWTEXT;
2419`\\N' copies subexp N into NEWTEXT, but using N as SUBEXP puts
2420NEWTEXT in place of subexp N.
2421This is useful only after a regular expression search or match,
2422since only regular expressions have distinguished subexpressions. */)
2423 (newtext, fixedcase, literal, string, subexp)
d7a5ad5f 2424 Lisp_Object newtext, fixedcase, literal, string, subexp;
ca1d1d23
JB
2425{
2426 enum { nochange, all_caps, cap_initial } case_action;
ac3b28b1 2427 register int pos, pos_byte;
ca1d1d23 2428 int some_multiletter_word;
97832bd0 2429 int some_lowercase;
73dc8771 2430 int some_uppercase;
208767c3 2431 int some_nonuppercase_initial;
ca1d1d23 2432 register int c, prevc;
d7a5ad5f 2433 int sub;
e7deaab0 2434 EMACS_INT opoint, newpoint;
ca1d1d23 2435
b7826503 2436 CHECK_STRING (newtext);
ca1d1d23 2437
080c45fd 2438 if (! NILP (string))
b7826503 2439 CHECK_STRING (string);
080c45fd 2440
ca1d1d23
JB
2441 case_action = nochange; /* We tried an initialization */
2442 /* but some C compilers blew it */
4746118a
JB
2443
2444 if (search_regs.num_regs <= 0)
d72cdbfc 2445 error ("`replace-match' called before any match found");
4746118a 2446
d7a5ad5f
RS
2447 if (NILP (subexp))
2448 sub = 0;
2449 else
2450 {
b7826503 2451 CHECK_NUMBER (subexp);
d7a5ad5f
RS
2452 sub = XINT (subexp);
2453 if (sub < 0 || sub >= search_regs.num_regs)
2454 args_out_of_range (subexp, make_number (search_regs.num_regs));
2455 }
2456
080c45fd
RS
2457 if (NILP (string))
2458 {
d7a5ad5f
RS
2459 if (search_regs.start[sub] < BEGV
2460 || search_regs.start[sub] > search_regs.end[sub]
2461 || search_regs.end[sub] > ZV)
2462 args_out_of_range (make_number (search_regs.start[sub]),
2463 make_number (search_regs.end[sub]));
080c45fd
RS
2464 }
2465 else
2466 {
d7a5ad5f
RS
2467 if (search_regs.start[sub] < 0
2468 || search_regs.start[sub] > search_regs.end[sub]
d5db4077 2469 || search_regs.end[sub] > SCHARS (string))
d7a5ad5f
RS
2470 args_out_of_range (make_number (search_regs.start[sub]),
2471 make_number (search_regs.end[sub]));
080c45fd 2472 }
ca1d1d23
JB
2473
2474 if (NILP (fixedcase))
2475 {
2476 /* Decide how to casify by examining the matched text. */
e7deaab0 2477 EMACS_INT last;
ca1d1d23 2478
ac3b28b1
KH
2479 pos = search_regs.start[sub];
2480 last = search_regs.end[sub];
fa8ed3e0
RS
2481
2482 if (NILP (string))
ac3b28b1 2483 pos_byte = CHAR_TO_BYTE (pos);
fa8ed3e0 2484 else
ac3b28b1 2485 pos_byte = string_char_to_byte (string, pos);
fa8ed3e0 2486
ca1d1d23
JB
2487 prevc = '\n';
2488 case_action = all_caps;
2489
2490 /* some_multiletter_word is set nonzero if any original word
2491 is more than one letter long. */
2492 some_multiletter_word = 0;
97832bd0 2493 some_lowercase = 0;
208767c3 2494 some_nonuppercase_initial = 0;
73dc8771 2495 some_uppercase = 0;
ca1d1d23 2496
ac3b28b1 2497 while (pos < last)
ca1d1d23 2498 {
080c45fd 2499 if (NILP (string))
ac3b28b1 2500 {
93daa011 2501 c = FETCH_CHAR_AS_MULTIBYTE (pos_byte);
ac3b28b1
KH
2502 INC_BOTH (pos, pos_byte);
2503 }
080c45fd 2504 else
93daa011 2505 FETCH_STRING_CHAR_AS_MULTIBYTE_ADVANCE (c, string, pos, pos_byte);
080c45fd 2506
ca1d1d23
JB
2507 if (LOWERCASEP (c))
2508 {
2509 /* Cannot be all caps if any original char is lower case */
2510
97832bd0 2511 some_lowercase = 1;
ca1d1d23 2512 if (SYNTAX (prevc) != Sword)
208767c3 2513 some_nonuppercase_initial = 1;
ca1d1d23
JB
2514 else
2515 some_multiletter_word = 1;
2516 }
d16c2b66 2517 else if (UPPERCASEP (c))
ca1d1d23 2518 {
73dc8771 2519 some_uppercase = 1;
97832bd0 2520 if (SYNTAX (prevc) != Sword)
c4d460ce 2521 ;
97832bd0 2522 else
ca1d1d23
JB
2523 some_multiletter_word = 1;
2524 }
208767c3
RS
2525 else
2526 {
2527 /* If the initial is a caseless word constituent,
2528 treat that like a lowercase initial. */
2529 if (SYNTAX (prevc) != Sword)
2530 some_nonuppercase_initial = 1;
2531 }
ca1d1d23
JB
2532
2533 prevc = c;
2534 }
2535
97832bd0
RS
2536 /* Convert to all caps if the old text is all caps
2537 and has at least one multiletter word. */
2538 if (! some_lowercase && some_multiletter_word)
2539 case_action = all_caps;
c4d460ce 2540 /* Capitalize each word, if the old text has all capitalized words. */
208767c3 2541 else if (!some_nonuppercase_initial && some_multiletter_word)
ca1d1d23 2542 case_action = cap_initial;
208767c3 2543 else if (!some_nonuppercase_initial && some_uppercase)
73dc8771
KH
2544 /* Should x -> yz, operating on X, give Yz or YZ?
2545 We'll assume the latter. */
2546 case_action = all_caps;
97832bd0
RS
2547 else
2548 case_action = nochange;
ca1d1d23
JB
2549 }
2550
080c45fd
RS
2551 /* Do replacement in a string. */
2552 if (!NILP (string))
2553 {
2554 Lisp_Object before, after;
2555
2556 before = Fsubstring (string, make_number (0),
d7a5ad5f
RS
2557 make_number (search_regs.start[sub]));
2558 after = Fsubstring (string, make_number (search_regs.end[sub]), Qnil);
080c45fd 2559
636a5e28
RS
2560 /* Substitute parts of the match into NEWTEXT
2561 if desired. */
080c45fd
RS
2562 if (NILP (literal))
2563 {
e7deaab0
AS
2564 EMACS_INT lastpos = 0;
2565 EMACS_INT lastpos_byte = 0;
080c45fd
RS
2566 /* We build up the substituted string in ACCUM. */
2567 Lisp_Object accum;
2568 Lisp_Object middle;
d5db4077 2569 int length = SBYTES (newtext);
080c45fd
RS
2570
2571 accum = Qnil;
2572
ac3b28b1 2573 for (pos_byte = 0, pos = 0; pos_byte < length;)
080c45fd
RS
2574 {
2575 int substart = -1;
6bbd7a29 2576 int subend = 0;
1e79ec24 2577 int delbackslash = 0;
080c45fd 2578
0c8533c6
RS
2579 FETCH_STRING_CHAR_ADVANCE (c, newtext, pos, pos_byte);
2580
080c45fd
RS
2581 if (c == '\\')
2582 {
0c8533c6 2583 FETCH_STRING_CHAR_ADVANCE (c, newtext, pos, pos_byte);
177c0ea7 2584
080c45fd
RS
2585 if (c == '&')
2586 {
d7a5ad5f
RS
2587 substart = search_regs.start[sub];
2588 subend = search_regs.end[sub];
080c45fd 2589 }
76fc1ea2 2590 else if (c >= '1' && c <= '9')
080c45fd 2591 {
76fc1ea2
KH
2592 if (search_regs.start[c - '0'] >= 0
2593 && c <= search_regs.num_regs + '0')
080c45fd
RS
2594 {
2595 substart = search_regs.start[c - '0'];
2596 subend = search_regs.end[c - '0'];
2597 }
76fc1ea2
KH
2598 else
2599 {
2600 /* If that subexp did not match,
2601 replace \\N with nothing. */
2602 substart = 0;
2603 subend = 0;
2604 }
080c45fd 2605 }
1e79ec24
KH
2606 else if (c == '\\')
2607 delbackslash = 1;
636a5e28
RS
2608 else
2609 error ("Invalid use of `\\' in replacement text");
080c45fd
RS
2610 }
2611 if (substart >= 0)
2612 {
d131e79c
RS
2613 if (pos - 2 != lastpos)
2614 middle = substring_both (newtext, lastpos,
2615 lastpos_byte,
2616 pos - 2, pos_byte - 2);
080c45fd
RS
2617 else
2618 middle = Qnil;
2619 accum = concat3 (accum, middle,
0c8533c6
RS
2620 Fsubstring (string,
2621 make_number (substart),
080c45fd
RS
2622 make_number (subend)));
2623 lastpos = pos;
0c8533c6 2624 lastpos_byte = pos_byte;
080c45fd 2625 }
1e79ec24
KH
2626 else if (delbackslash)
2627 {
d131e79c
RS
2628 middle = substring_both (newtext, lastpos,
2629 lastpos_byte,
2630 pos - 1, pos_byte - 1);
0c8533c6 2631
1e79ec24
KH
2632 accum = concat2 (accum, middle);
2633 lastpos = pos;
0c8533c6 2634 lastpos_byte = pos_byte;
1e79ec24 2635 }
080c45fd
RS
2636 }
2637
d131e79c
RS
2638 if (pos != lastpos)
2639 middle = substring_both (newtext, lastpos,
2640 lastpos_byte,
0c8533c6 2641 pos, pos_byte);
080c45fd
RS
2642 else
2643 middle = Qnil;
2644
2645 newtext = concat2 (accum, middle);
2646 }
2647
636a5e28 2648 /* Do case substitution in NEWTEXT if desired. */
080c45fd
RS
2649 if (case_action == all_caps)
2650 newtext = Fupcase (newtext);
2651 else if (case_action == cap_initial)
2b2eead9 2652 newtext = Fupcase_initials (newtext);
080c45fd
RS
2653
2654 return concat3 (before, newtext, after);
2655 }
2656
09c4719e 2657 /* Record point, then move (quietly) to the start of the match. */
9160906f 2658 if (PT >= search_regs.end[sub])
b0eba991 2659 opoint = PT - ZV;
9160906f
RS
2660 else if (PT > search_regs.start[sub])
2661 opoint = search_regs.end[sub] - ZV;
b0eba991
RS
2662 else
2663 opoint = PT;
2664
886ed6ec
RS
2665 /* If we want non-literal replacement,
2666 perform substitution on the replacement string. */
2667 if (NILP (literal))
ca1d1d23 2668 {
d5db4077 2669 int length = SBYTES (newtext);
68e69fbd
RS
2670 unsigned char *substed;
2671 int substed_alloc_size, substed_len;
3bc25e52
KH
2672 int buf_multibyte = !NILP (current_buffer->enable_multibyte_characters);
2673 int str_multibyte = STRING_MULTIBYTE (newtext);
2674 Lisp_Object rev_tbl;
886ed6ec 2675 int really_changed = 0;
3bc25e52 2676
8f924df7 2677 rev_tbl = Qnil;
ac3b28b1 2678
68e69fbd
RS
2679 substed_alloc_size = length * 2 + 100;
2680 substed = (unsigned char *) xmalloc (substed_alloc_size + 1);
2681 substed_len = 0;
2682
3bc25e52
KH
2683 /* Go thru NEWTEXT, producing the actual text to insert in
2684 SUBSTED while adjusting multibyteness to that of the current
2685 buffer. */
ca1d1d23 2686
ac3b28b1 2687 for (pos_byte = 0, pos = 0; pos_byte < length;)
ca1d1d23 2688 {
68e69fbd 2689 unsigned char str[MAX_MULTIBYTE_LENGTH];
f8ce8a0d
GM
2690 unsigned char *add_stuff = NULL;
2691 int add_len = 0;
68e69fbd 2692 int idx = -1;
9a76659d 2693
3bc25e52
KH
2694 if (str_multibyte)
2695 {
eb99a8dd 2696 FETCH_STRING_CHAR_ADVANCE_NO_CHECK (c, newtext, pos, pos_byte);
3bc25e52
KH
2697 if (!buf_multibyte)
2698 c = multibyte_char_to_unibyte (c, rev_tbl);
2699 }
2700 else
2701 {
2702 /* Note that we don't have to increment POS. */
5d69fe10 2703 c = SREF (newtext, pos_byte++);
3bc25e52 2704 if (buf_multibyte)
4c0354d7 2705 MAKE_CHAR_MULTIBYTE (c);
3bc25e52 2706 }
ac3b28b1 2707
68e69fbd
RS
2708 /* Either set ADD_STUFF and ADD_LEN to the text to put in SUBSTED,
2709 or set IDX to a match index, which means put that part
2710 of the buffer text into SUBSTED. */
2711
ca1d1d23
JB
2712 if (c == '\\')
2713 {
886ed6ec
RS
2714 really_changed = 1;
2715
3bc25e52
KH
2716 if (str_multibyte)
2717 {
eb99a8dd
KH
2718 FETCH_STRING_CHAR_ADVANCE_NO_CHECK (c, newtext,
2719 pos, pos_byte);
071ce769 2720 if (!buf_multibyte && !ASCII_CHAR_P (c))
3bc25e52
KH
2721 c = multibyte_char_to_unibyte (c, rev_tbl);
2722 }
2723 else
2724 {
d5db4077 2725 c = SREF (newtext, pos_byte++);
3bc25e52 2726 if (buf_multibyte)
4c0354d7 2727 MAKE_CHAR_MULTIBYTE (c);
3bc25e52
KH
2728 }
2729
ca1d1d23 2730 if (c == '&')
68e69fbd 2731 idx = sub;
78445046 2732 else if (c >= '1' && c <= '9' && c <= search_regs.num_regs + '0')
ca1d1d23
JB
2733 {
2734 if (search_regs.start[c - '0'] >= 1)
68e69fbd 2735 idx = c - '0';
ca1d1d23 2736 }
636a5e28 2737 else if (c == '\\')
68e69fbd 2738 add_len = 1, add_stuff = "\\";
636a5e28 2739 else
3bc25e52
KH
2740 {
2741 xfree (substed);
2742 error ("Invalid use of `\\' in replacement text");
2743 }
ca1d1d23
JB
2744 }
2745 else
68e69fbd
RS
2746 {
2747 add_len = CHAR_STRING (c, str);
2748 add_stuff = str;
2749 }
2750
2751 /* If we want to copy part of a previous match,
2752 set up ADD_STUFF and ADD_LEN to point to it. */
2753 if (idx >= 0)
2754 {
e7deaab0 2755 EMACS_INT begbyte = CHAR_TO_BYTE (search_regs.start[idx]);
68e69fbd
RS
2756 add_len = CHAR_TO_BYTE (search_regs.end[idx]) - begbyte;
2757 if (search_regs.start[idx] < GPT && GPT < search_regs.end[idx])
2758 move_gap (search_regs.start[idx]);
2759 add_stuff = BYTE_POS_ADDR (begbyte);
2760 }
2761
2762 /* Now the stuff we want to add to SUBSTED
2763 is invariably ADD_LEN bytes starting at ADD_STUFF. */
2764
2765 /* Make sure SUBSTED is big enough. */
2766 if (substed_len + add_len >= substed_alloc_size)
2767 {
2768 substed_alloc_size = substed_len + add_len + 500;
2769 substed = (unsigned char *) xrealloc (substed,
2770 substed_alloc_size + 1);
2771 }
2772
2773 /* Now add to the end of SUBSTED. */
f8ce8a0d
GM
2774 if (add_stuff)
2775 {
72af86bd 2776 memcpy (substed + substed_len, add_stuff, add_len);
f8ce8a0d
GM
2777 substed_len += add_len;
2778 }
ca1d1d23 2779 }
68e69fbd 2780
886ed6ec 2781 if (really_changed)
76fc1ea2
KH
2782 {
2783 if (buf_multibyte)
2784 {
2785 int nchars = multibyte_chars_in_text (substed, substed_len);
68e69fbd 2786
76fc1ea2
KH
2787 newtext = make_multibyte_string (substed, nchars, substed_len);
2788 }
2789 else
2790 newtext = make_unibyte_string (substed, substed_len);
2791 }
68e69fbd 2792 xfree (substed);
ca1d1d23
JB
2793 }
2794
886ed6ec
RS
2795 /* Replace the old text with the new in the cleanest possible way. */
2796 replace_range (search_regs.start[sub], search_regs.end[sub],
2797 newtext, 1, 0, 1);
d5db4077 2798 newpoint = search_regs.start[sub] + SCHARS (newtext);
ca1d1d23
JB
2799
2800 if (case_action == all_caps)
886ed6ec
RS
2801 Fupcase_region (make_number (search_regs.start[sub]),
2802 make_number (newpoint));
ca1d1d23 2803 else if (case_action == cap_initial)
886ed6ec
RS
2804 Fupcase_initials_region (make_number (search_regs.start[sub]),
2805 make_number (newpoint));
3e18eecf 2806
98e942e0
RS
2807 /* Adjust search data for this change. */
2808 {
e7deaab0
AS
2809 EMACS_INT oldend = search_regs.end[sub];
2810 EMACS_INT oldstart = search_regs.start[sub];
2811 EMACS_INT change = newpoint - search_regs.end[sub];
98e942e0
RS
2812 int i;
2813
2814 for (i = 0; i < search_regs.num_regs; i++)
2815 {
41c01205 2816 if (search_regs.start[i] >= oldend)
98e942e0 2817 search_regs.start[i] += change;
41c01205
DK
2818 else if (search_regs.start[i] > oldstart)
2819 search_regs.start[i] = oldstart;
2820 if (search_regs.end[i] >= oldend)
98e942e0 2821 search_regs.end[i] += change;
41c01205
DK
2822 else if (search_regs.end[i] > oldstart)
2823 search_regs.end[i] = oldstart;
98e942e0
RS
2824 }
2825 }
2826
b0eba991 2827 /* Put point back where it was in the text. */
8d808a65 2828 if (opoint <= 0)
fa8ed3e0 2829 TEMP_SET_PT (opoint + ZV);
b0eba991 2830 else
fa8ed3e0 2831 TEMP_SET_PT (opoint);
b0eba991
RS
2832
2833 /* Now move point "officially" to the start of the inserted replacement. */
3e18eecf 2834 move_if_not_intangible (newpoint);
177c0ea7 2835
ca1d1d23
JB
2836 return Qnil;
2837}
2838\f
2839static Lisp_Object
971de7fb 2840match_limit (Lisp_Object num, int beginningp)
ca1d1d23
JB
2841{
2842 register int n;
2843
b7826503 2844 CHECK_NUMBER (num);
ca1d1d23 2845 n = XINT (num);
f90a5bf5 2846 if (n < 0)
bd2cbd56 2847 args_out_of_range (num, make_number (0));
f90a5bf5
RS
2848 if (search_regs.num_regs <= 0)
2849 error ("No match data, because no search succeeded");
9b9ceb61 2850 if (n >= search_regs.num_regs
4746118a 2851 || search_regs.start[n] < 0)
ca1d1d23
JB
2852 return Qnil;
2853 return (make_number ((beginningp) ? search_regs.start[n]
2854 : search_regs.end[n]));
2855}
2856
2857DEFUN ("match-beginning", Fmatch_beginning, Smatch_beginning, 1, 1, 0,
8c1a1077
PJ
2858 doc: /* Return position of start of text matched by last search.
2859SUBEXP, a number, specifies which parenthesized expression in the last
2860 regexp.
2861Value is nil if SUBEXPth pair didn't match, or there were less than
2862 SUBEXP pairs.
2863Zero means the entire text matched by the whole regexp or whole string. */)
2864 (subexp)
5806161b 2865 Lisp_Object subexp;
ca1d1d23 2866{
5806161b 2867 return match_limit (subexp, 1);
ca1d1d23
JB
2868}
2869
2870DEFUN ("match-end", Fmatch_end, Smatch_end, 1, 1, 0,
8c1a1077
PJ
2871 doc: /* Return position of end of text matched by last search.
2872SUBEXP, a number, specifies which parenthesized expression in the last
2873 regexp.
2874Value is nil if SUBEXPth pair didn't match, or there were less than
2875 SUBEXP pairs.
2876Zero means the entire text matched by the whole regexp or whole string. */)
2877 (subexp)
5806161b 2878 Lisp_Object subexp;
ca1d1d23 2879{
5806161b 2880 return match_limit (subexp, 0);
177c0ea7 2881}
ca1d1d23 2882
abd0071c 2883DEFUN ("match-data", Fmatch_data, Smatch_data, 0, 3, 0,
8c1a1077
PJ
2884 doc: /* Return a list containing all info on what the last search matched.
2885Element 2N is `(match-beginning N)'; element 2N + 1 is `(match-end N)'.
2886All the elements are markers or nil (nil if the Nth pair didn't match)
2887if the last match was on a buffer; integers or nil if a string was matched.
febb8aba 2888Use `set-match-data' to reinstate the data in this list.
8c1a1077 2889
41c01205
DK
2890If INTEGERS (the optional first argument) is non-nil, always use
2891integers \(rather than markers) to represent buffer positions. In
2892this case, and if the last match was in a buffer, the buffer will get
2893stored as one additional element at the end of the list.
2894
abd0071c
KS
2895If REUSE is a list, reuse it as part of the value. If REUSE is long
2896enough to hold all the values, and if INTEGERS is non-nil, no consing
2897is done.
2898
2899If optional third arg RESEAT is non-nil, any previous markers on the
2900REUSE list will be modified to point to nowhere.
2901
140a6b7e 2902Return value is undefined if the last search failed. */)
abd0071c
KS
2903 (integers, reuse, reseat)
2904 Lisp_Object integers, reuse, reseat;
ca1d1d23 2905{
56256c2a 2906 Lisp_Object tail, prev;
4746118a 2907 Lisp_Object *data;
ca1d1d23
JB
2908 int i, len;
2909
abd0071c
KS
2910 if (!NILP (reseat))
2911 for (tail = reuse; CONSP (tail); tail = XCDR (tail))
2912 if (MARKERP (XCAR (tail)))
2913 {
51f10faa 2914 unchain_marker (XMARKER (XCAR (tail)));
abd0071c
KS
2915 XSETCAR (tail, Qnil);
2916 }
2917
daa37602 2918 if (NILP (last_thing_searched))
c36bcf1b 2919 return Qnil;
daa37602 2920
6bbd7a29
GM
2921 prev = Qnil;
2922
41c01205 2923 data = (Lisp_Object *) alloca ((2 * search_regs.num_regs + 1)
4746118a
JB
2924 * sizeof (Lisp_Object));
2925
41c01205 2926 len = 0;
4746118a 2927 for (i = 0; i < search_regs.num_regs; i++)
ca1d1d23
JB
2928 {
2929 int start = search_regs.start[i];
2930 if (start >= 0)
2931 {
56256c2a
RS
2932 if (EQ (last_thing_searched, Qt)
2933 || ! NILP (integers))
ca1d1d23 2934 {
c235cce7
KH
2935 XSETFASTINT (data[2 * i], start);
2936 XSETFASTINT (data[2 * i + 1], search_regs.end[i]);
ca1d1d23 2937 }
0ed62dc7 2938 else if (BUFFERP (last_thing_searched))
ca1d1d23
JB
2939 {
2940 data[2 * i] = Fmake_marker ();
daa37602
JB
2941 Fset_marker (data[2 * i],
2942 make_number (start),
2943 last_thing_searched);
ca1d1d23
JB
2944 data[2 * i + 1] = Fmake_marker ();
2945 Fset_marker (data[2 * i + 1],
177c0ea7 2946 make_number (search_regs.end[i]),
daa37602 2947 last_thing_searched);
ca1d1d23 2948 }
daa37602
JB
2949 else
2950 /* last_thing_searched must always be Qt, a buffer, or Qnil. */
2951 abort ();
2952
abd0071c 2953 len = 2 * i + 2;
ca1d1d23
JB
2954 }
2955 else
abd0071c 2956 data[2 * i] = data[2 * i + 1] = Qnil;
ca1d1d23 2957 }
56256c2a 2958
bd2cbd56 2959 if (BUFFERP (last_thing_searched) && !NILP (integers))
41c01205 2960 {
bd2cbd56 2961 data[len] = last_thing_searched;
41c01205
DK
2962 len++;
2963 }
2964
56256c2a
RS
2965 /* If REUSE is not usable, cons up the values and return them. */
2966 if (! CONSP (reuse))
41c01205 2967 return Flist (len, data);
56256c2a
RS
2968
2969 /* If REUSE is a list, store as many value elements as will fit
2970 into the elements of REUSE. */
2971 for (i = 0, tail = reuse; CONSP (tail);
c1d497be 2972 i++, tail = XCDR (tail))
56256c2a 2973 {
41c01205 2974 if (i < len)
f3fbd155 2975 XSETCAR (tail, data[i]);
56256c2a 2976 else
f3fbd155 2977 XSETCAR (tail, Qnil);
56256c2a
RS
2978 prev = tail;
2979 }
2980
2981 /* If we couldn't fit all value elements into REUSE,
2982 cons up the rest of them and add them to the end of REUSE. */
41c01205
DK
2983 if (i < len)
2984 XSETCDR (prev, Flist (len - i, data + i));
56256c2a
RS
2985
2986 return reuse;
ca1d1d23
JB
2987}
2988
b51d6c92
SM
2989/* We used to have an internal use variant of `reseat' described as:
2990
2991 If RESEAT is `evaporate', put the markers back on the free list
2992 immediately. No other references to the markers must exist in this
2993 case, so it is used only internally on the unwind stack and
2994 save-match-data from Lisp.
2995
2996 But it was ill-conceived: those supposedly-internal markers get exposed via
2997 the undo-list, so freeing them here is unsafe. */
ca1d1d23 2998
abd0071c 2999DEFUN ("set-match-data", Fset_match_data, Sset_match_data, 1, 2, 0,
8c1a1077 3000 doc: /* Set internal data on last search match from elements of LIST.
abd0071c
KS
3001LIST should have been created by calling `match-data' previously.
3002
51f10faa 3003If optional arg RESEAT is non-nil, make markers on LIST point nowhere. */)
abd0071c
KS
3004 (list, reseat)
3005 register Lisp_Object list, reseat;
ca1d1d23
JB
3006{
3007 register int i;
3008 register Lisp_Object marker;
3009
7074fde6
FP
3010 if (running_asynch_code)
3011 save_search_regs ();
3012
29100cea 3013 CHECK_LIST (list);
ca1d1d23 3014
41c01205
DK
3015 /* Unless we find a marker with a buffer or an explicit buffer
3016 in LIST, assume that this match data came from a string. */
daa37602
JB
3017 last_thing_searched = Qt;
3018
4746118a
JB
3019 /* Allocate registers if they don't already exist. */
3020 {
d084e942 3021 int length = XFASTINT (Flength (list)) / 2;
4746118a
JB
3022
3023 if (length > search_regs.num_regs)
3024 {
1113d9db
JB
3025 if (search_regs.num_regs == 0)
3026 {
3027 search_regs.start
3028 = (regoff_t *) xmalloc (length * sizeof (regoff_t));
3029 search_regs.end
3030 = (regoff_t *) xmalloc (length * sizeof (regoff_t));
3031 }
4746118a 3032 else
1113d9db
JB
3033 {
3034 search_regs.start
3035 = (regoff_t *) xrealloc (search_regs.start,
3036 length * sizeof (regoff_t));
3037 search_regs.end
3038 = (regoff_t *) xrealloc (search_regs.end,
3039 length * sizeof (regoff_t));
3040 }
4746118a 3041
e62371e9
KH
3042 for (i = search_regs.num_regs; i < length; i++)
3043 search_regs.start[i] = -1;
3044
487282dc 3045 search_regs.num_regs = length;
4746118a 3046 }
ca1d1d23 3047
abd0071c 3048 for (i = 0; CONSP (list); i++)
41c01205 3049 {
abd0071c 3050 marker = XCAR (list);
bd2cbd56 3051 if (BUFFERP (marker))
c3762cbd 3052 {
bd2cbd56 3053 last_thing_searched = marker;
c3762cbd
DK
3054 break;
3055 }
3056 if (i >= length)
3057 break;
41c01205
DK
3058 if (NILP (marker))
3059 {
3060 search_regs.start[i] = -1;
abd0071c 3061 list = XCDR (list);
41c01205
DK
3062 }
3063 else
3064 {
e7deaab0 3065 EMACS_INT from;
abd0071c 3066 Lisp_Object m;
e2811828 3067
abd0071c 3068 m = marker;
41c01205
DK
3069 if (MARKERP (marker))
3070 {
3071 if (XMARKER (marker)->buffer == 0)
3072 XSETFASTINT (marker, 0);
3073 else
3074 XSETBUFFER (last_thing_searched, XMARKER (marker)->buffer);
3075 }
e2811828 3076
41c01205
DK
3077 CHECK_NUMBER_COERCE_MARKER (marker);
3078 from = XINT (marker);
e2811828 3079
abd0071c
KS
3080 if (!NILP (reseat) && MARKERP (m))
3081 {
b51d6c92 3082 unchain_marker (XMARKER (m));
9ad54a7e 3083 XSETCAR (list, Qnil);
abd0071c
KS
3084 }
3085
3086 if ((list = XCDR (list), !CONSP (list)))
3087 break;
3088
3089 m = marker = XCAR (list);
3090
41c01205
DK
3091 if (MARKERP (marker) && XMARKER (marker)->buffer == 0)
3092 XSETFASTINT (marker, 0);
e2811828 3093
41c01205
DK
3094 CHECK_NUMBER_COERCE_MARKER (marker);
3095 search_regs.start[i] = from;
3096 search_regs.end[i] = XINT (marker);
abd0071c
KS
3097
3098 if (!NILP (reseat) && MARKERP (m))
3099 {
b51d6c92 3100 unchain_marker (XMARKER (m));
9ad54a7e 3101 XSETCAR (list, Qnil);
abd0071c 3102 }
41c01205 3103 }
abd0071c 3104 list = XCDR (list);
41c01205 3105 }
ca1d1d23 3106
41c01205
DK
3107 for (; i < search_regs.num_regs; i++)
3108 search_regs.start[i] = -1;
3109 }
ca1d1d23 3110
177c0ea7 3111 return Qnil;
ca1d1d23
JB
3112}
3113
7074fde6
FP
3114/* If non-zero the match data have been saved in saved_search_regs
3115 during the execution of a sentinel or filter. */
75ebf74b 3116static int search_regs_saved;
7074fde6 3117static struct re_registers saved_search_regs;
41c01205 3118static Lisp_Object saved_last_thing_searched;
7074fde6
FP
3119
3120/* Called from Flooking_at, Fstring_match, search_buffer, Fstore_match_data
3121 if asynchronous code (filter or sentinel) is running. */
3122static void
971de7fb 3123save_search_regs (void)
7074fde6
FP
3124{
3125 if (!search_regs_saved)
3126 {
3127 saved_search_regs.num_regs = search_regs.num_regs;
3128 saved_search_regs.start = search_regs.start;
3129 saved_search_regs.end = search_regs.end;
41c01205
DK
3130 saved_last_thing_searched = last_thing_searched;
3131 last_thing_searched = Qnil;
7074fde6 3132 search_regs.num_regs = 0;
2d4a771a
RS
3133 search_regs.start = 0;
3134 search_regs.end = 0;
7074fde6
FP
3135
3136 search_regs_saved = 1;
3137 }
3138}
3139
3140/* Called upon exit from filters and sentinels. */
3141void
971de7fb 3142restore_search_regs (void)
7074fde6
FP
3143{
3144 if (search_regs_saved)
3145 {
3146 if (search_regs.num_regs > 0)
3147 {
3148 xfree (search_regs.start);
3149 xfree (search_regs.end);
3150 }
3151 search_regs.num_regs = saved_search_regs.num_regs;
3152 search_regs.start = saved_search_regs.start;
3153 search_regs.end = saved_search_regs.end;
41c01205
DK
3154 last_thing_searched = saved_last_thing_searched;
3155 saved_last_thing_searched = Qnil;
7074fde6
FP
3156 search_regs_saved = 0;
3157 }
3158}
3159
abd0071c 3160static Lisp_Object
971de7fb 3161unwind_set_match_data (Lisp_Object list)
abd0071c 3162{
b51d6c92
SM
3163 /* It is NOT ALWAYS safe to free (evaporate) the markers immediately. */
3164 return Fset_match_data (list, Qt);
abd0071c
KS
3165}
3166
3167/* Called to unwind protect the match data. */
3168void
971de7fb 3169record_unwind_save_match_data (void)
abd0071c
KS
3170{
3171 record_unwind_protect (unwind_set_match_data,
3172 Fmatch_data (Qnil, Qnil, Qnil));
3173}
3174
ca1d1d23
JB
3175/* Quote a string to inactivate reg-expr chars */
3176
3177DEFUN ("regexp-quote", Fregexp_quote, Sregexp_quote, 1, 1, 0,
8c1a1077
PJ
3178 doc: /* Return a regexp string which matches exactly STRING and nothing else. */)
3179 (string)
5806161b 3180 Lisp_Object string;
ca1d1d23
JB
3181{
3182 register unsigned char *in, *out, *end;
3183 register unsigned char *temp;
0c8533c6 3184 int backslashes_added = 0;
ca1d1d23 3185
b7826503 3186 CHECK_STRING (string);
ca1d1d23 3187
d5db4077 3188 temp = (unsigned char *) alloca (SBYTES (string) * 2);
ca1d1d23
JB
3189
3190 /* Now copy the data into the new string, inserting escapes. */
3191
d5db4077
KR
3192 in = SDATA (string);
3193 end = in + SBYTES (string);
177c0ea7 3194 out = temp;
ca1d1d23
JB
3195
3196 for (; in != end; in++)
3197 {
66bc6082 3198 if (*in == '['
ca1d1d23
JB
3199 || *in == '*' || *in == '.' || *in == '\\'
3200 || *in == '?' || *in == '+'
3201 || *in == '^' || *in == '$')
0c8533c6 3202 *out++ = '\\', backslashes_added++;
ca1d1d23
JB
3203 *out++ = *in;
3204 }
3205
3f8100f1 3206 return make_specified_string (temp,
d5db4077 3207 SCHARS (string) + backslashes_added,
3f8100f1
RS
3208 out - temp,
3209 STRING_MULTIBYTE (string));
ca1d1d23 3210}
177c0ea7 3211\f
dfcf069d 3212void
971de7fb 3213syms_of_search (void)
ca1d1d23
JB
3214{
3215 register int i;
3216
487282dc
KH
3217 for (i = 0; i < REGEXP_CACHE_SIZE; ++i)
3218 {
3219 searchbufs[i].buf.allocated = 100;
b23c0a83 3220 searchbufs[i].buf.buffer = (unsigned char *) xmalloc (100);
487282dc
KH
3221 searchbufs[i].buf.fastmap = searchbufs[i].fastmap;
3222 searchbufs[i].regexp = Qnil;
ecdb561e 3223 searchbufs[i].whitespace_regexp = Qnil;
b69e3c18 3224 searchbufs[i].syntax_table = Qnil;
487282dc 3225 staticpro (&searchbufs[i].regexp);
aa77b5ce 3226 staticpro (&searchbufs[i].whitespace_regexp);
b69e3c18 3227 staticpro (&searchbufs[i].syntax_table);
487282dc
KH
3228 searchbufs[i].next = (i == REGEXP_CACHE_SIZE-1 ? 0 : &searchbufs[i+1]);
3229 }
3230 searchbuf_head = &searchbufs[0];
ca1d1d23 3231
d67b4f80 3232 Qsearch_failed = intern_c_string ("search-failed");
ca1d1d23 3233 staticpro (&Qsearch_failed);
d67b4f80 3234 Qinvalid_regexp = intern_c_string ("invalid-regexp");
ca1d1d23
JB
3235 staticpro (&Qinvalid_regexp);
3236
3237 Fput (Qsearch_failed, Qerror_conditions,
d67b4f80 3238 pure_cons (Qsearch_failed, pure_cons (Qerror, Qnil)));
ca1d1d23 3239 Fput (Qsearch_failed, Qerror_message,
d67b4f80 3240 make_pure_c_string ("Search failed"));
ca1d1d23
JB
3241
3242 Fput (Qinvalid_regexp, Qerror_conditions,
d67b4f80 3243 pure_cons (Qinvalid_regexp, pure_cons (Qerror, Qnil)));
ca1d1d23 3244 Fput (Qinvalid_regexp, Qerror_message,
d67b4f80 3245 make_pure_c_string ("Invalid regexp"));
ca1d1d23 3246
daa37602
JB
3247 last_thing_searched = Qnil;
3248 staticpro (&last_thing_searched);
3249
0f6af254
DK
3250 saved_last_thing_searched = Qnil;
3251 staticpro (&saved_last_thing_searched);
3252
41a33295 3253 DEFVAR_LISP ("search-spaces-regexp", &Vsearch_spaces_regexp,
e2811828 3254 doc: /* Regexp to substitute for bunches of spaces in regexp search.
f31a9a68
RS
3255Some commands use this for user-specified regexps.
3256Spaces that occur inside character classes or repetition operators
3257or other such regexp constructs are not replaced with this.
3258A value of nil (which is the normal value) means treat spaces literally. */);
41a33295 3259 Vsearch_spaces_regexp = Qnil;
f31a9a68 3260
ef887810
RS
3261 DEFVAR_LISP ("inhibit-changing-match-data", &Vinhibit_changing_match_data,
3262 doc: /* Internal use only.
39d0bf74
RS
3263If non-nil, the primitive searching and matching functions
3264such as `looking-at', `string-match', `re-search-forward', etc.,
3265do not set the match data. The proper way to use this variable
3266is to bind it with `let' around a small expression. */);
ef887810
RS
3267 Vinhibit_changing_match_data = Qnil;
3268
ca1d1d23 3269 defsubr (&Slooking_at);
b819a390
RS
3270 defsubr (&Sposix_looking_at);
3271 defsubr (&Sstring_match);
3272 defsubr (&Sposix_string_match);
ca1d1d23
JB
3273 defsubr (&Ssearch_forward);
3274 defsubr (&Ssearch_backward);
3275 defsubr (&Sword_search_forward);
3276 defsubr (&Sword_search_backward);
c6e0e1cc
CY
3277 defsubr (&Sword_search_forward_lax);
3278 defsubr (&Sword_search_backward_lax);
ca1d1d23
JB
3279 defsubr (&Sre_search_forward);
3280 defsubr (&Sre_search_backward);
b819a390
RS
3281 defsubr (&Sposix_search_forward);
3282 defsubr (&Sposix_search_backward);
ca1d1d23
JB
3283 defsubr (&Sreplace_match);
3284 defsubr (&Smatch_beginning);
3285 defsubr (&Smatch_end);
3286 defsubr (&Smatch_data);
3f1c005b 3287 defsubr (&Sset_match_data);
ca1d1d23
JB
3288 defsubr (&Sregexp_quote);
3289}
76fc1ea2
KH
3290
3291/* arch-tag: a6059d79-0552-4f14-a2cb-d379a4e3c78f
3292 (do not change this comment) */