* icomplete.el (icomplete-simple-completing-p): Use the correct
[bpt/emacs.git] / src / search.c
CommitLineData
ca1d1d23 1/* String search routines for GNU Emacs.
0b5538bd 2 Copyright (C) 1985, 1986, 1987, 1993, 1994, 1997, 1998, 1999, 2002, 2003,
aaef169d 3 2004, 2005, 2006 Free Software Foundation, Inc.
ca1d1d23
JB
4
5This file is part of GNU Emacs.
6
7GNU Emacs is free software; you can redistribute it and/or modify
8it under the terms of the GNU General Public License as published by
7c938215 9the Free Software Foundation; either version 2, or (at your option)
ca1d1d23
JB
10any later version.
11
12GNU Emacs is distributed in the hope that it will be useful,
13but WITHOUT ANY WARRANTY; without even the implied warranty of
14MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15GNU General Public License for more details.
16
17You should have received a copy of the GNU General Public License
18along with GNU Emacs; see the file COPYING. If not, write to
4fc5845f
LK
19the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
20Boston, MA 02110-1301, USA. */
ca1d1d23
JB
21
22
18160b98 23#include <config.h>
ca1d1d23
JB
24#include "lisp.h"
25#include "syntax.h"
5679531d 26#include "category.h"
ca1d1d23 27#include "buffer.h"
5679531d 28#include "charset.h"
9169c321 29#include "region-cache.h"
ca1d1d23 30#include "commands.h"
9ac0d9e0 31#include "blockinput.h"
bf1760bb 32#include "intervals.h"
4746118a 33
ca1d1d23
JB
34#include <sys/types.h>
35#include "regex.h"
36
1d288aef 37#define REGEXP_CACHE_SIZE 20
ca1d1d23 38
487282dc
KH
39/* If the regexp is non-nil, then the buffer contains the compiled form
40 of that regexp, suitable for searching. */
1d288aef
RS
41struct regexp_cache
42{
487282dc 43 struct regexp_cache *next;
ecdb561e 44 Lisp_Object regexp, whitespace_regexp;
b69e3c18
CY
45 /* Syntax table for which the regexp applies. We need this because
46 of character classes. */
47 Lisp_Object syntax_table;
487282dc
KH
48 struct re_pattern_buffer buf;
49 char fastmap[0400];
b819a390
RS
50 /* Nonzero means regexp was compiled to do full POSIX backtracking. */
51 char posix;
487282dc 52};
ca1d1d23 53
487282dc
KH
54/* The instances of that struct. */
55struct regexp_cache searchbufs[REGEXP_CACHE_SIZE];
ca1d1d23 56
487282dc
KH
57/* The head of the linked list; points to the most recently used buffer. */
58struct regexp_cache *searchbuf_head;
ca1d1d23 59
ca1d1d23 60
4746118a
JB
61/* Every call to re_match, etc., must pass &search_regs as the regs
62 argument unless you can show it is unnecessary (i.e., if re_match
63 is certainly going to be called again before region-around-match
64 can be called).
65
66 Since the registers are now dynamically allocated, we need to make
67 sure not to refer to the Nth register before checking that it has
1113d9db
JB
68 been allocated by checking search_regs.num_regs.
69
70 The regex code keeps track of whether it has allocated the search
487282dc
KH
71 buffer using bits in the re_pattern_buffer. This means that whenever
72 you compile a new pattern, it completely forgets whether it has
1113d9db
JB
73 allocated any registers, and will allocate new registers the next
74 time you call a searching or matching function. Therefore, we need
75 to call re_set_registers after compiling a new pattern or after
76 setting the match registers, so that the regex functions will be
77 able to free or re-allocate it properly. */
ca1d1d23
JB
78static struct re_registers search_regs;
79
daa37602
JB
80/* The buffer in which the last search was performed, or
81 Qt if the last search was done in a string;
82 Qnil if no searching has been done yet. */
83static Lisp_Object last_thing_searched;
ca1d1d23 84
8e6208c5 85/* error condition signaled when regexp compile_pattern fails */
ca1d1d23
JB
86
87Lisp_Object Qinvalid_regexp;
88
06f77a8a
KS
89/* Error condition used for failing searches */
90Lisp_Object Qsearch_failed;
91
41a33295 92Lisp_Object Vsearch_spaces_regexp;
f31a9a68 93
ca325161 94static void set_search_regs ();
044f81f1 95static void save_search_regs ();
facdc750
RS
96static int simple_search ();
97static int boyer_moore ();
b819a390 98static int search_buffer ();
01d09305 99static void matcher_overflow () NO_RETURN;
b819a390 100
ca1d1d23
JB
101static void
102matcher_overflow ()
103{
104 error ("Stack overflow in regexp matcher");
105}
106
b819a390
RS
107/* Compile a regexp and signal a Lisp error if anything goes wrong.
108 PATTERN is the pattern to compile.
109 CP is the place to put the result.
facdc750 110 TRANSLATE is a translation table for ignoring case, or nil for none.
b819a390
RS
111 REGP is the structure that says where to store the "register"
112 values that will result from matching this pattern.
113 If it is 0, we should compile the pattern not to record any
114 subexpression bounds.
115 POSIX is nonzero if we want full backtracking (POSIX style)
5679531d
KH
116 for this pattern. 0 means backtrack only enough to get a valid match.
117 MULTIBYTE is nonzero if we want to handle multibyte characters in
118 PATTERN. 0 means all multibyte characters are recognized just as
ecdb561e
RS
119 sequences of binary data.
120
41a33295 121 The behavior also depends on Vsearch_spaces_regexp. */
ca1d1d23 122
487282dc 123static void
5679531d 124compile_pattern_1 (cp, pattern, translate, regp, posix, multibyte)
487282dc 125 struct regexp_cache *cp;
ca1d1d23 126 Lisp_Object pattern;
facdc750 127 Lisp_Object translate;
487282dc 128 struct re_registers *regp;
b819a390 129 int posix;
5679531d 130 int multibyte;
ca1d1d23 131{
7276d3d8 132 unsigned char *raw_pattern;
f8bd51c4 133 int raw_pattern_size;
d451e4db 134 char *val;
b819a390 135 reg_syntax_t old;
ca1d1d23 136
f8bd51c4
KH
137 /* MULTIBYTE says whether the text to be searched is multibyte.
138 We must convert PATTERN to match that, or we will not really
139 find things right. */
140
141 if (multibyte == STRING_MULTIBYTE (pattern))
142 {
d5db4077
KR
143 raw_pattern = (unsigned char *) SDATA (pattern);
144 raw_pattern_size = SBYTES (pattern);
f8bd51c4
KH
145 }
146 else if (multibyte)
147 {
d5db4077
KR
148 raw_pattern_size = count_size_as_multibyte (SDATA (pattern),
149 SCHARS (pattern));
7276d3d8 150 raw_pattern = (unsigned char *) alloca (raw_pattern_size + 1);
d5db4077
KR
151 copy_text (SDATA (pattern), raw_pattern,
152 SCHARS (pattern), 0, 1);
f8bd51c4
KH
153 }
154 else
155 {
156 /* Converting multibyte to single-byte.
157
158 ??? Perhaps this conversion should be done in a special way
159 by subtracting nonascii-insert-offset from each non-ASCII char,
160 so that only the multibyte chars which really correspond to
161 the chosen single-byte character set can possibly match. */
d5db4077 162 raw_pattern_size = SCHARS (pattern);
7276d3d8 163 raw_pattern = (unsigned char *) alloca (raw_pattern_size + 1);
d5db4077
KR
164 copy_text (SDATA (pattern), raw_pattern,
165 SBYTES (pattern), 1, 0);
f8bd51c4
KH
166 }
167
487282dc 168 cp->regexp = Qnil;
59fab369 169 cp->buf.translate = (! NILP (translate) ? translate : make_number (0));
b819a390 170 cp->posix = posix;
5679531d 171 cp->buf.multibyte = multibyte;
41a33295 172 cp->whitespace_regexp = Vsearch_spaces_regexp;
b69e3c18 173 cp->syntax_table = current_buffer->syntax_table;
910c747a
RS
174 /* Doing BLOCK_INPUT here has the effect that
175 the debugger won't run if an error occurs.
176 Why is BLOCK_INPUT needed here? */
9ac0d9e0 177 BLOCK_INPUT;
fb4a568d 178 old = re_set_syntax (RE_SYNTAX_EMACS
b819a390 179 | (posix ? 0 : RE_NO_POSIX_BACKTRACKING));
f31a9a68 180
41a33295
RS
181 re_set_whitespace_regexp (NILP (Vsearch_spaces_regexp) ? NULL
182 : SDATA (Vsearch_spaces_regexp));
f31a9a68 183
7276d3d8
RS
184 val = (char *) re_compile_pattern ((char *)raw_pattern,
185 raw_pattern_size, &cp->buf);
f31a9a68
RS
186
187 re_set_whitespace_regexp (NULL);
188
b819a390 189 re_set_syntax (old);
9ac0d9e0 190 UNBLOCK_INPUT;
ca1d1d23 191 if (val)
06f77a8a 192 xsignal1 (Qinvalid_regexp, build_string (val));
1113d9db 193
487282dc 194 cp->regexp = Fcopy_sequence (pattern);
487282dc
KH
195}
196
6efc7887
RS
197/* Shrink each compiled regexp buffer in the cache
198 to the size actually used right now.
199 This is called from garbage collection. */
200
201void
202shrink_regexp_cache ()
203{
a968f437 204 struct regexp_cache *cp;
6efc7887
RS
205
206 for (cp = searchbuf_head; cp != 0; cp = cp->next)
207 {
208 cp->buf.allocated = cp->buf.used;
209 cp->buf.buffer
b23c0a83 210 = (unsigned char *) xrealloc (cp->buf.buffer, cp->buf.used);
6efc7887
RS
211 }
212}
213
e5b94d44
CY
214/* Clear the regexp cache.
215 There is no danger of memory leak here because re_compile_pattern
216 automagically manages the memory in each re_pattern_buffer struct,
217 based on its `allocated' and `buffer' values. */
218void
219clear_regexp_cache ()
220{
221 int i;
222
223 for (i = 0; i < REGEXP_CACHE_SIZE; ++i)
224 searchbufs[i].regexp = Qnil;
225}
226
487282dc 227/* Compile a regexp if necessary, but first check to see if there's one in
b819a390
RS
228 the cache.
229 PATTERN is the pattern to compile.
facdc750 230 TRANSLATE is a translation table for ignoring case, or nil for none.
b819a390
RS
231 REGP is the structure that says where to store the "register"
232 values that will result from matching this pattern.
233 If it is 0, we should compile the pattern not to record any
234 subexpression bounds.
235 POSIX is nonzero if we want full backtracking (POSIX style)
236 for this pattern. 0 means backtrack only enough to get a valid match. */
487282dc
KH
237
238struct re_pattern_buffer *
0c8533c6 239compile_pattern (pattern, regp, translate, posix, multibyte)
487282dc
KH
240 Lisp_Object pattern;
241 struct re_registers *regp;
facdc750 242 Lisp_Object translate;
0c8533c6 243 int posix, multibyte;
487282dc
KH
244{
245 struct regexp_cache *cp, **cpp;
246
247 for (cpp = &searchbuf_head; ; cpp = &cp->next)
248 {
249 cp = *cpp;
f1b9c7c1
KR
250 /* Entries are initialized to nil, and may be set to nil by
251 compile_pattern_1 if the pattern isn't valid. Don't apply
49a5f770
KR
252 string accessors in those cases. However, compile_pattern_1
253 is only applied to the cache entry we pick here to reuse. So
254 nil should never appear before a non-nil entry. */
7c752c80 255 if (NILP (cp->regexp))
f1b9c7c1 256 goto compile_it;
d5db4077 257 if (SCHARS (cp->regexp) == SCHARS (pattern)
cf69b13e 258 && STRING_MULTIBYTE (cp->regexp) == STRING_MULTIBYTE (pattern)
1d288aef 259 && !NILP (Fstring_equal (cp->regexp, pattern))
59fab369 260 && EQ (cp->buf.translate, (! NILP (translate) ? translate : make_number (0)))
5679531d 261 && cp->posix == posix
ecdb561e 262 && cp->buf.multibyte == multibyte
b69e3c18
CY
263 /* TODO: Strictly speaking, we only need to match syntax
264 tables when a character class like [[:space:]] occurs in
265 the pattern. -- cyd*/
266 && EQ (cp->syntax_table, current_buffer->syntax_table)
41a33295 267 && !NILP (Fequal (cp->whitespace_regexp, Vsearch_spaces_regexp)))
487282dc
KH
268 break;
269
f1b9c7c1
KR
270 /* If we're at the end of the cache, compile into the nil cell
271 we found, or the last (least recently used) cell with a
272 string value. */
487282dc
KH
273 if (cp->next == 0)
274 {
f1b9c7c1 275 compile_it:
5679531d 276 compile_pattern_1 (cp, pattern, translate, regp, posix, multibyte);
487282dc
KH
277 break;
278 }
279 }
280
281 /* When we get here, cp (aka *cpp) contains the compiled pattern,
282 either because we found it in the cache or because we just compiled it.
283 Move it to the front of the queue to mark it as most recently used. */
284 *cpp = cp->next;
285 cp->next = searchbuf_head;
286 searchbuf_head = cp;
1113d9db 287
6639708c
RS
288 /* Advise the searching functions about the space we have allocated
289 for register data. */
290 if (regp)
291 re_set_registers (&cp->buf, regp, regp->num_regs, regp->start, regp->end);
292
487282dc 293 return &cp->buf;
ca1d1d23
JB
294}
295
ca1d1d23 296\f
b819a390
RS
297static Lisp_Object
298looking_at_1 (string, posix)
ca1d1d23 299 Lisp_Object string;
b819a390 300 int posix;
ca1d1d23
JB
301{
302 Lisp_Object val;
303 unsigned char *p1, *p2;
304 int s1, s2;
305 register int i;
487282dc 306 struct re_pattern_buffer *bufp;
ca1d1d23 307
7074fde6
FP
308 if (running_asynch_code)
309 save_search_regs ();
310
910c747a
RS
311 /* This is so set_image_of_range_1 in regex.c can find the EQV table. */
312 XCHAR_TABLE (current_buffer->case_canon_table)->extras[2]
313 = current_buffer->case_eqv_table;
314
b7826503 315 CHECK_STRING (string);
487282dc
KH
316 bufp = compile_pattern (string, &search_regs,
317 (!NILP (current_buffer->case_fold_search)
0190922f 318 ? current_buffer->case_canon_table : Qnil),
0c8533c6
RS
319 posix,
320 !NILP (current_buffer->enable_multibyte_characters));
ca1d1d23
JB
321
322 immediate_quit = 1;
323 QUIT; /* Do a pending quit right away, to avoid paradoxical behavior */
324
325 /* Get pointers and sizes of the two strings
326 that make up the visible portion of the buffer. */
327
328 p1 = BEGV_ADDR;
fa8ed3e0 329 s1 = GPT_BYTE - BEGV_BYTE;
ca1d1d23 330 p2 = GAP_END_ADDR;
fa8ed3e0 331 s2 = ZV_BYTE - GPT_BYTE;
ca1d1d23
JB
332 if (s1 < 0)
333 {
334 p2 = p1;
fa8ed3e0 335 s2 = ZV_BYTE - BEGV_BYTE;
ca1d1d23
JB
336 s1 = 0;
337 }
338 if (s2 < 0)
339 {
fa8ed3e0 340 s1 = ZV_BYTE - BEGV_BYTE;
ca1d1d23
JB
341 s2 = 0;
342 }
8bb43c28
RS
343
344 re_match_object = Qnil;
177c0ea7 345
487282dc 346 i = re_match_2 (bufp, (char *) p1, s1, (char *) p2, s2,
fa8ed3e0
RS
347 PT_BYTE - BEGV_BYTE, &search_regs,
348 ZV_BYTE - BEGV_BYTE);
de182d70 349 immediate_quit = 0;
177c0ea7 350
ca1d1d23
JB
351 if (i == -2)
352 matcher_overflow ();
353
354 val = (0 <= i ? Qt : Qnil);
fa8ed3e0
RS
355 if (i >= 0)
356 for (i = 0; i < search_regs.num_regs; i++)
357 if (search_regs.start[i] >= 0)
358 {
359 search_regs.start[i]
360 = BYTE_TO_CHAR (search_regs.start[i] + BEGV_BYTE);
361 search_regs.end[i]
362 = BYTE_TO_CHAR (search_regs.end[i] + BEGV_BYTE);
363 }
a3668d92 364 XSETBUFFER (last_thing_searched, current_buffer);
ca1d1d23
JB
365 return val;
366}
367
b819a390 368DEFUN ("looking-at", Flooking_at, Slooking_at, 1, 1, 0,
8c1a1077
PJ
369 doc: /* Return t if text after point matches regular expression REGEXP.
370This function modifies the match data that `match-beginning',
371`match-end' and `match-data' access; save and restore the match
372data if you want to preserve them. */)
373 (regexp)
94f94972 374 Lisp_Object regexp;
b819a390 375{
94f94972 376 return looking_at_1 (regexp, 0);
b819a390
RS
377}
378
379DEFUN ("posix-looking-at", Fposix_looking_at, Sposix_looking_at, 1, 1, 0,
8c1a1077
PJ
380 doc: /* Return t if text after point matches regular expression REGEXP.
381Find the longest match, in accord with Posix regular expression rules.
382This function modifies the match data that `match-beginning',
383`match-end' and `match-data' access; save and restore the match
384data if you want to preserve them. */)
385 (regexp)
94f94972 386 Lisp_Object regexp;
b819a390 387{
94f94972 388 return looking_at_1 (regexp, 1);
b819a390
RS
389}
390\f
391static Lisp_Object
392string_match_1 (regexp, string, start, posix)
ca1d1d23 393 Lisp_Object regexp, string, start;
b819a390 394 int posix;
ca1d1d23
JB
395{
396 int val;
487282dc 397 struct re_pattern_buffer *bufp;
0c8533c6
RS
398 int pos, pos_byte;
399 int i;
ca1d1d23 400
7074fde6
FP
401 if (running_asynch_code)
402 save_search_regs ();
403
b7826503
PJ
404 CHECK_STRING (regexp);
405 CHECK_STRING (string);
ca1d1d23
JB
406
407 if (NILP (start))
0c8533c6 408 pos = 0, pos_byte = 0;
ca1d1d23
JB
409 else
410 {
d5db4077 411 int len = SCHARS (string);
ca1d1d23 412
b7826503 413 CHECK_NUMBER (start);
0c8533c6
RS
414 pos = XINT (start);
415 if (pos < 0 && -pos <= len)
416 pos = len + pos;
417 else if (0 > pos || pos > len)
ca1d1d23 418 args_out_of_range (string, start);
0c8533c6 419 pos_byte = string_char_to_byte (string, pos);
ca1d1d23
JB
420 }
421
910c747a
RS
422 /* This is so set_image_of_range_1 in regex.c can find the EQV table. */
423 XCHAR_TABLE (current_buffer->case_canon_table)->extras[2]
424 = current_buffer->case_eqv_table;
425
487282dc
KH
426 bufp = compile_pattern (regexp, &search_regs,
427 (!NILP (current_buffer->case_fold_search)
0190922f 428 ? current_buffer->case_canon_table : Qnil),
0c8533c6
RS
429 posix,
430 STRING_MULTIBYTE (string));
ca1d1d23 431 immediate_quit = 1;
8bb43c28 432 re_match_object = string;
177c0ea7 433
d5db4077
KR
434 val = re_search (bufp, (char *) SDATA (string),
435 SBYTES (string), pos_byte,
436 SBYTES (string) - pos_byte,
ca1d1d23
JB
437 &search_regs);
438 immediate_quit = 0;
daa37602 439 last_thing_searched = Qt;
ca1d1d23
JB
440 if (val == -2)
441 matcher_overflow ();
442 if (val < 0) return Qnil;
0c8533c6
RS
443
444 for (i = 0; i < search_regs.num_regs; i++)
445 if (search_regs.start[i] >= 0)
446 {
447 search_regs.start[i]
448 = string_byte_to_char (string, search_regs.start[i]);
449 search_regs.end[i]
450 = string_byte_to_char (string, search_regs.end[i]);
451 }
452
453 return make_number (string_byte_to_char (string, val));
ca1d1d23 454}
e59a8453 455
b819a390 456DEFUN ("string-match", Fstring_match, Sstring_match, 2, 3, 0,
8c1a1077 457 doc: /* Return index of start of first match for REGEXP in STRING, or nil.
b85acc4b 458Matching ignores case if `case-fold-search' is non-nil.
8c1a1077
PJ
459If third arg START is non-nil, start search at that index in STRING.
460For index of first char beyond the match, do (match-end 0).
461`match-end' and `match-beginning' also give indices of substrings
2bd2f32d
RS
462matched by parenthesis constructs in the pattern.
463
464You can use the function `match-string' to extract the substrings
465matched by the parenthesis constructions in REGEXP. */)
8c1a1077 466 (regexp, string, start)
b819a390
RS
467 Lisp_Object regexp, string, start;
468{
469 return string_match_1 (regexp, string, start, 0);
470}
471
472DEFUN ("posix-string-match", Fposix_string_match, Sposix_string_match, 2, 3, 0,
8c1a1077
PJ
473 doc: /* Return index of start of first match for REGEXP in STRING, or nil.
474Find the longest match, in accord with Posix regular expression rules.
475Case is ignored if `case-fold-search' is non-nil in the current buffer.
476If third arg START is non-nil, start search at that index in STRING.
477For index of first char beyond the match, do (match-end 0).
478`match-end' and `match-beginning' also give indices of substrings
479matched by parenthesis constructs in the pattern. */)
480 (regexp, string, start)
b819a390
RS
481 Lisp_Object regexp, string, start;
482{
483 return string_match_1 (regexp, string, start, 1);
484}
485
e59a8453
RS
486/* Match REGEXP against STRING, searching all of STRING,
487 and return the index of the match, or negative on failure.
488 This does not clobber the match data. */
489
490int
491fast_string_match (regexp, string)
492 Lisp_Object regexp, string;
493{
494 int val;
487282dc 495 struct re_pattern_buffer *bufp;
e59a8453 496
facdc750
RS
497 bufp = compile_pattern (regexp, 0, Qnil,
498 0, STRING_MULTIBYTE (string));
e59a8453 499 immediate_quit = 1;
8bb43c28 500 re_match_object = string;
177c0ea7 501
d5db4077
KR
502 val = re_search (bufp, (char *) SDATA (string),
503 SBYTES (string), 0,
504 SBYTES (string), 0);
e59a8453
RS
505 immediate_quit = 0;
506 return val;
507}
5679531d
KH
508
509/* Match REGEXP against STRING, searching all of STRING ignoring case,
510 and return the index of the match, or negative on failure.
0c8533c6
RS
511 This does not clobber the match data.
512 We assume that STRING contains single-byte characters. */
5679531d
KH
513
514extern Lisp_Object Vascii_downcase_table;
515
516int
b4577c63 517fast_c_string_match_ignore_case (regexp, string)
5679531d 518 Lisp_Object regexp;
96b80561 519 const char *string;
5679531d
KH
520{
521 int val;
522 struct re_pattern_buffer *bufp;
523 int len = strlen (string);
524
0c8533c6 525 regexp = string_make_unibyte (regexp);
b4577c63 526 re_match_object = Qt;
5679531d 527 bufp = compile_pattern (regexp, 0,
0190922f 528 Vascii_canon_table, 0,
f8bd51c4 529 0);
5679531d
KH
530 immediate_quit = 1;
531 val = re_search (bufp, string, len, 0, len, 0);
532 immediate_quit = 0;
533 return val;
534}
be5f4dfb
KH
535
536/* Like fast_string_match but ignore case. */
537
538int
539fast_string_match_ignore_case (regexp, string)
540 Lisp_Object regexp, string;
541{
542 int val;
543 struct re_pattern_buffer *bufp;
544
0190922f 545 bufp = compile_pattern (regexp, 0, Vascii_canon_table,
be5f4dfb
KH
546 0, STRING_MULTIBYTE (string));
547 immediate_quit = 1;
548 re_match_object = string;
549
550 val = re_search (bufp, (char *) SDATA (string),
551 SBYTES (string), 0,
552 SBYTES (string), 0);
553 immediate_quit = 0;
554 return val;
555}
ca1d1d23 556\f
9169c321
JB
557/* The newline cache: remembering which sections of text have no newlines. */
558
559/* If the user has requested newline caching, make sure it's on.
560 Otherwise, make sure it's off.
561 This is our cheezy way of associating an action with the change of
562 state of a buffer-local variable. */
563static void
564newline_cache_on_off (buf)
565 struct buffer *buf;
566{
567 if (NILP (buf->cache_long_line_scans))
568 {
569 /* It should be off. */
570 if (buf->newline_cache)
571 {
572 free_region_cache (buf->newline_cache);
573 buf->newline_cache = 0;
574 }
575 }
576 else
577 {
578 /* It should be on. */
579 if (buf->newline_cache == 0)
580 buf->newline_cache = new_region_cache ();
581 }
582}
583
584\f
585/* Search for COUNT instances of the character TARGET between START and END.
586
587 If COUNT is positive, search forwards; END must be >= START.
588 If COUNT is negative, search backwards for the -COUNTth instance;
589 END must be <= START.
590 If COUNT is zero, do anything you please; run rogue, for all I care.
591
592 If END is zero, use BEGV or ZV instead, as appropriate for the
593 direction indicated by COUNT.
ffd56f97
JB
594
595 If we find COUNT instances, set *SHORTAGE to zero, and return the
a9f2a45f 596 position past the COUNTth match. Note that for reverse motion
5bfe95c9 597 this is not the same as the usual convention for Emacs motion commands.
ffd56f97 598
9169c321
JB
599 If we don't find COUNT instances before reaching END, set *SHORTAGE
600 to the number of TARGETs left unfound, and return END.
ffd56f97 601
087a5f81
RS
602 If ALLOW_QUIT is non-zero, set immediate_quit. That's good to do
603 except when inside redisplay. */
604
dfcf069d 605int
9169c321
JB
606scan_buffer (target, start, end, count, shortage, allow_quit)
607 register int target;
608 int start, end;
609 int count;
610 int *shortage;
087a5f81 611 int allow_quit;
ca1d1d23 612{
9169c321 613 struct region_cache *newline_cache;
177c0ea7 614 int direction;
ffd56f97 615
9169c321
JB
616 if (count > 0)
617 {
618 direction = 1;
619 if (! end) end = ZV;
620 }
621 else
622 {
623 direction = -1;
624 if (! end) end = BEGV;
625 }
ffd56f97 626
9169c321
JB
627 newline_cache_on_off (current_buffer);
628 newline_cache = current_buffer->newline_cache;
ca1d1d23
JB
629
630 if (shortage != 0)
631 *shortage = 0;
632
087a5f81 633 immediate_quit = allow_quit;
ca1d1d23 634
ffd56f97 635 if (count > 0)
9169c321 636 while (start != end)
ca1d1d23 637 {
9169c321
JB
638 /* Our innermost scanning loop is very simple; it doesn't know
639 about gaps, buffer ends, or the newline cache. ceiling is
640 the position of the last character before the next such
641 obstacle --- the last character the dumb search loop should
642 examine. */
fa8ed3e0
RS
643 int ceiling_byte = CHAR_TO_BYTE (end) - 1;
644 int start_byte = CHAR_TO_BYTE (start);
67ce527d 645 int tem;
9169c321
JB
646
647 /* If we're looking for a newline, consult the newline cache
648 to see where we can avoid some scanning. */
649 if (target == '\n' && newline_cache)
650 {
651 int next_change;
652 immediate_quit = 0;
653 while (region_cache_forward
fa8ed3e0
RS
654 (current_buffer, newline_cache, start_byte, &next_change))
655 start_byte = next_change;
cbe0db0d 656 immediate_quit = allow_quit;
9169c321 657
fa8ed3e0
RS
658 /* START should never be after END. */
659 if (start_byte > ceiling_byte)
660 start_byte = ceiling_byte;
9169c321
JB
661
662 /* Now the text after start is an unknown region, and
663 next_change is the position of the next known region. */
fa8ed3e0 664 ceiling_byte = min (next_change - 1, ceiling_byte);
9169c321
JB
665 }
666
667 /* The dumb loop can only scan text stored in contiguous
668 bytes. BUFFER_CEILING_OF returns the last character
669 position that is contiguous, so the ceiling is the
670 position after that. */
67ce527d
KH
671 tem = BUFFER_CEILING_OF (start_byte);
672 ceiling_byte = min (tem, ceiling_byte);
9169c321
JB
673
674 {
177c0ea7 675 /* The termination address of the dumb loop. */
fa8ed3e0
RS
676 register unsigned char *ceiling_addr
677 = BYTE_POS_ADDR (ceiling_byte) + 1;
678 register unsigned char *cursor
679 = BYTE_POS_ADDR (start_byte);
9169c321
JB
680 unsigned char *base = cursor;
681
682 while (cursor < ceiling_addr)
683 {
684 unsigned char *scan_start = cursor;
685
686 /* The dumb loop. */
687 while (*cursor != target && ++cursor < ceiling_addr)
688 ;
689
690 /* If we're looking for newlines, cache the fact that
691 the region from start to cursor is free of them. */
692 if (target == '\n' && newline_cache)
693 know_region_cache (current_buffer, newline_cache,
fa8ed3e0
RS
694 start_byte + scan_start - base,
695 start_byte + cursor - base);
9169c321
JB
696
697 /* Did we find the target character? */
698 if (cursor < ceiling_addr)
699 {
700 if (--count == 0)
701 {
702 immediate_quit = 0;
fa8ed3e0 703 return BYTE_TO_CHAR (start_byte + cursor - base + 1);
9169c321
JB
704 }
705 cursor++;
706 }
707 }
708
fa8ed3e0 709 start = BYTE_TO_CHAR (start_byte + cursor - base);
9169c321 710 }
ca1d1d23
JB
711 }
712 else
9169c321
JB
713 while (start > end)
714 {
715 /* The last character to check before the next obstacle. */
fa8ed3e0
RS
716 int ceiling_byte = CHAR_TO_BYTE (end);
717 int start_byte = CHAR_TO_BYTE (start);
67ce527d 718 int tem;
9169c321
JB
719
720 /* Consult the newline cache, if appropriate. */
721 if (target == '\n' && newline_cache)
722 {
723 int next_change;
724 immediate_quit = 0;
725 while (region_cache_backward
fa8ed3e0
RS
726 (current_buffer, newline_cache, start_byte, &next_change))
727 start_byte = next_change;
cbe0db0d 728 immediate_quit = allow_quit;
9169c321
JB
729
730 /* Start should never be at or before end. */
fa8ed3e0
RS
731 if (start_byte <= ceiling_byte)
732 start_byte = ceiling_byte + 1;
9169c321
JB
733
734 /* Now the text before start is an unknown region, and
735 next_change is the position of the next known region. */
fa8ed3e0 736 ceiling_byte = max (next_change, ceiling_byte);
9169c321
JB
737 }
738
739 /* Stop scanning before the gap. */
67ce527d
KH
740 tem = BUFFER_FLOOR_OF (start_byte - 1);
741 ceiling_byte = max (tem, ceiling_byte);
9169c321
JB
742
743 {
744 /* The termination address of the dumb loop. */
fa8ed3e0
RS
745 register unsigned char *ceiling_addr = BYTE_POS_ADDR (ceiling_byte);
746 register unsigned char *cursor = BYTE_POS_ADDR (start_byte - 1);
9169c321
JB
747 unsigned char *base = cursor;
748
749 while (cursor >= ceiling_addr)
750 {
751 unsigned char *scan_start = cursor;
752
753 while (*cursor != target && --cursor >= ceiling_addr)
754 ;
755
756 /* If we're looking for newlines, cache the fact that
757 the region from after the cursor to start is free of them. */
758 if (target == '\n' && newline_cache)
759 know_region_cache (current_buffer, newline_cache,
fa8ed3e0
RS
760 start_byte + cursor - base,
761 start_byte + scan_start - base);
9169c321
JB
762
763 /* Did we find the target character? */
764 if (cursor >= ceiling_addr)
765 {
766 if (++count >= 0)
767 {
768 immediate_quit = 0;
fa8ed3e0 769 return BYTE_TO_CHAR (start_byte + cursor - base);
9169c321
JB
770 }
771 cursor--;
772 }
773 }
774
fa8ed3e0 775 start = BYTE_TO_CHAR (start_byte + cursor - base);
9169c321
JB
776 }
777 }
778
ca1d1d23
JB
779 immediate_quit = 0;
780 if (shortage != 0)
ffd56f97 781 *shortage = count * direction;
9169c321 782 return start;
ca1d1d23 783}
fa8ed3e0
RS
784\f
785/* Search for COUNT instances of a line boundary, which means either a
786 newline or (if selective display enabled) a carriage return.
787 Start at START. If COUNT is negative, search backwards.
788
789 We report the resulting position by calling TEMP_SET_PT_BOTH.
790
791 If we find COUNT instances. we position after (always after,
792 even if scanning backwards) the COUNTth match, and return 0.
793
794 If we don't find COUNT instances before reaching the end of the
795 buffer (or the beginning, if scanning backwards), we return
796 the number of line boundaries left unfound, and position at
797 the limit we bumped up against.
798
799 If ALLOW_QUIT is non-zero, set immediate_quit. That's good to do
d5d57b92 800 except in special cases. */
ca1d1d23 801
63fa018d 802int
fa8ed3e0
RS
803scan_newline (start, start_byte, limit, limit_byte, count, allow_quit)
804 int start, start_byte;
805 int limit, limit_byte;
806 register int count;
807 int allow_quit;
63fa018d 808{
fa8ed3e0
RS
809 int direction = ((count > 0) ? 1 : -1);
810
811 register unsigned char *cursor;
812 unsigned char *base;
813
814 register int ceiling;
815 register unsigned char *ceiling_addr;
816
d5d57b92
RS
817 int old_immediate_quit = immediate_quit;
818
fa8ed3e0
RS
819 /* The code that follows is like scan_buffer
820 but checks for either newline or carriage return. */
821
d5d57b92
RS
822 if (allow_quit)
823 immediate_quit++;
fa8ed3e0
RS
824
825 start_byte = CHAR_TO_BYTE (start);
826
827 if (count > 0)
828 {
829 while (start_byte < limit_byte)
830 {
831 ceiling = BUFFER_CEILING_OF (start_byte);
832 ceiling = min (limit_byte - 1, ceiling);
833 ceiling_addr = BYTE_POS_ADDR (ceiling) + 1;
834 base = (cursor = BYTE_POS_ADDR (start_byte));
835 while (1)
836 {
837 while (*cursor != '\n' && ++cursor != ceiling_addr)
838 ;
839
840 if (cursor != ceiling_addr)
841 {
842 if (--count == 0)
843 {
d5d57b92 844 immediate_quit = old_immediate_quit;
fa8ed3e0
RS
845 start_byte = start_byte + cursor - base + 1;
846 start = BYTE_TO_CHAR (start_byte);
847 TEMP_SET_PT_BOTH (start, start_byte);
848 return 0;
849 }
850 else
851 if (++cursor == ceiling_addr)
852 break;
853 }
854 else
855 break;
856 }
857 start_byte += cursor - base;
858 }
859 }
860 else
861 {
fa8ed3e0
RS
862 while (start_byte > limit_byte)
863 {
864 ceiling = BUFFER_FLOOR_OF (start_byte - 1);
865 ceiling = max (limit_byte, ceiling);
866 ceiling_addr = BYTE_POS_ADDR (ceiling) - 1;
867 base = (cursor = BYTE_POS_ADDR (start_byte - 1) + 1);
868 while (1)
869 {
870 while (--cursor != ceiling_addr && *cursor != '\n')
871 ;
872
873 if (cursor != ceiling_addr)
874 {
875 if (++count == 0)
876 {
d5d57b92 877 immediate_quit = old_immediate_quit;
fa8ed3e0
RS
878 /* Return the position AFTER the match we found. */
879 start_byte = start_byte + cursor - base + 1;
880 start = BYTE_TO_CHAR (start_byte);
881 TEMP_SET_PT_BOTH (start, start_byte);
882 return 0;
883 }
884 }
885 else
886 break;
887 }
888 /* Here we add 1 to compensate for the last decrement
889 of CURSOR, which took it past the valid range. */
890 start_byte += cursor - base + 1;
891 }
892 }
893
894 TEMP_SET_PT_BOTH (limit, limit_byte);
d5d57b92 895 immediate_quit = old_immediate_quit;
fa8ed3e0
RS
896
897 return count * direction;
63fa018d
RS
898}
899
ca1d1d23 900int
fa8ed3e0 901find_next_newline_no_quit (from, cnt)
ca1d1d23
JB
902 register int from, cnt;
903{
fa8ed3e0 904 return scan_buffer ('\n', from, 0, cnt, (int *) 0, 0);
9169c321
JB
905}
906
9169c321
JB
907/* Like find_next_newline, but returns position before the newline,
908 not after, and only search up to TO. This isn't just
909 find_next_newline (...)-1, because you might hit TO. */
fa8ed3e0 910
9169c321
JB
911int
912find_before_next_newline (from, to, cnt)
cbe0db0d 913 int from, to, cnt;
9169c321
JB
914{
915 int shortage;
916 int pos = scan_buffer ('\n', from, to, cnt, &shortage, 1);
917
918 if (shortage == 0)
919 pos--;
177c0ea7 920
9169c321 921 return pos;
ca1d1d23
JB
922}
923\f
ca1d1d23
JB
924/* Subroutines of Lisp buffer search functions. */
925
926static Lisp_Object
b819a390 927search_command (string, bound, noerror, count, direction, RE, posix)
ca1d1d23
JB
928 Lisp_Object string, bound, noerror, count;
929 int direction;
930 int RE;
b819a390 931 int posix;
ca1d1d23
JB
932{
933 register int np;
9f43ad85 934 int lim, lim_byte;
ca1d1d23
JB
935 int n = direction;
936
937 if (!NILP (count))
938 {
b7826503 939 CHECK_NUMBER (count);
ca1d1d23
JB
940 n *= XINT (count);
941 }
942
b7826503 943 CHECK_STRING (string);
ca1d1d23 944 if (NILP (bound))
9f43ad85
RS
945 {
946 if (n > 0)
947 lim = ZV, lim_byte = ZV_BYTE;
948 else
949 lim = BEGV, lim_byte = BEGV_BYTE;
950 }
ca1d1d23
JB
951 else
952 {
b7826503 953 CHECK_NUMBER_COERCE_MARKER (bound);
ca1d1d23 954 lim = XINT (bound);
6ec8bbd2 955 if (n > 0 ? lim < PT : lim > PT)
ca1d1d23
JB
956 error ("Invalid search bound (wrong side of point)");
957 if (lim > ZV)
9f43ad85 958 lim = ZV, lim_byte = ZV_BYTE;
588d2fd5 959 else if (lim < BEGV)
9f43ad85 960 lim = BEGV, lim_byte = BEGV_BYTE;
588d2fd5
KH
961 else
962 lim_byte = CHAR_TO_BYTE (lim);
ca1d1d23
JB
963 }
964
910c747a
RS
965 /* This is so set_image_of_range_1 in regex.c can find the EQV table. */
966 XCHAR_TABLE (current_buffer->case_canon_table)->extras[2]
967 = current_buffer->case_eqv_table;
968
9f43ad85 969 np = search_buffer (string, PT, PT_BYTE, lim, lim_byte, n, RE,
ca1d1d23 970 (!NILP (current_buffer->case_fold_search)
facdc750 971 ? current_buffer->case_canon_table
3135e9fd 972 : Qnil),
ca1d1d23 973 (!NILP (current_buffer->case_fold_search)
facdc750 974 ? current_buffer->case_eqv_table
3135e9fd 975 : Qnil),
b819a390 976 posix);
ca1d1d23
JB
977 if (np <= 0)
978 {
979 if (NILP (noerror))
06f77a8a
KS
980 xsignal1 (Qsearch_failed, string);
981
ca1d1d23
JB
982 if (!EQ (noerror, Qt))
983 {
984 if (lim < BEGV || lim > ZV)
985 abort ();
9f43ad85 986 SET_PT_BOTH (lim, lim_byte);
a5f217b8
RS
987 return Qnil;
988#if 0 /* This would be clean, but maybe programs depend on
989 a value of nil here. */
481399bf 990 np = lim;
a5f217b8 991#endif
ca1d1d23 992 }
481399bf
RS
993 else
994 return Qnil;
ca1d1d23
JB
995 }
996
997 if (np < BEGV || np > ZV)
998 abort ();
999
1000 SET_PT (np);
1001
1002 return make_number (np);
1003}
1004\f
fa8ed3e0
RS
1005/* Return 1 if REGEXP it matches just one constant string. */
1006
b6d6a51c
KH
1007static int
1008trivial_regexp_p (regexp)
1009 Lisp_Object regexp;
1010{
d5db4077
KR
1011 int len = SBYTES (regexp);
1012 unsigned char *s = SDATA (regexp);
b6d6a51c
KH
1013 while (--len >= 0)
1014 {
1015 switch (*s++)
1016 {
1017 case '.': case '*': case '+': case '?': case '[': case '^': case '$':
1018 return 0;
1019 case '\\':
1020 if (--len < 0)
1021 return 0;
1022 switch (*s++)
1023 {
1024 case '|': case '(': case ')': case '`': case '\'': case 'b':
1025 case 'B': case '<': case '>': case 'w': case 'W': case 's':
29f89fe7 1026 case 'S': case '=': case '{': case '}': case '_':
5679531d 1027 case 'c': case 'C': /* for categoryspec and notcategoryspec */
866f60fd 1028 case '1': case '2': case '3': case '4': case '5':
b6d6a51c
KH
1029 case '6': case '7': case '8': case '9':
1030 return 0;
1031 }
1032 }
1033 }
1034 return 1;
1035}
1036
ca325161 1037/* Search for the n'th occurrence of STRING in the current buffer,
ca1d1d23 1038 starting at position POS and stopping at position LIM,
b819a390 1039 treating STRING as a literal string if RE is false or as
ca1d1d23
JB
1040 a regular expression if RE is true.
1041
1042 If N is positive, searching is forward and LIM must be greater than POS.
1043 If N is negative, searching is backward and LIM must be less than POS.
1044
facdc750 1045 Returns -x if x occurrences remain to be found (x > 0),
ca1d1d23 1046 or else the position at the beginning of the Nth occurrence
b819a390
RS
1047 (if searching backward) or the end (if searching forward).
1048
1049 POSIX is nonzero if we want full backtracking (POSIX style)
1050 for this pattern. 0 means backtrack only enough to get a valid match. */
ca1d1d23 1051
aff2ce94
RS
1052#define TRANSLATE(out, trt, d) \
1053do \
1054 { \
1055 if (! NILP (trt)) \
1056 { \
1057 Lisp_Object temp; \
1058 temp = Faref (trt, make_number (d)); \
1059 if (INTEGERP (temp)) \
1060 out = XINT (temp); \
1061 else \
1062 out = d; \
1063 } \
1064 else \
1065 out = d; \
1066 } \
1067while (0)
facdc750 1068
b819a390 1069static int
9f43ad85
RS
1070search_buffer (string, pos, pos_byte, lim, lim_byte, n,
1071 RE, trt, inverse_trt, posix)
ca1d1d23
JB
1072 Lisp_Object string;
1073 int pos;
9f43ad85 1074 int pos_byte;
ca1d1d23 1075 int lim;
9f43ad85 1076 int lim_byte;
ca1d1d23
JB
1077 int n;
1078 int RE;
facdc750
RS
1079 Lisp_Object trt;
1080 Lisp_Object inverse_trt;
b819a390 1081 int posix;
ca1d1d23 1082{
d5db4077
KR
1083 int len = SCHARS (string);
1084 int len_byte = SBYTES (string);
facdc750 1085 register int i;
ca1d1d23 1086
7074fde6
FP
1087 if (running_asynch_code)
1088 save_search_regs ();
1089
a7e4cdde 1090 /* Searching 0 times means don't move. */
ca1d1d23 1091 /* Null string is found at starting position. */
a7e4cdde 1092 if (len == 0 || n == 0)
ca325161 1093 {
0353b28f 1094 set_search_regs (pos_byte, 0);
ca325161
RS
1095 return pos;
1096 }
3f57a499 1097
41a33295 1098 if (RE && !(trivial_regexp_p (string) && NILP (Vsearch_spaces_regexp)))
ca1d1d23 1099 {
facdc750
RS
1100 unsigned char *p1, *p2;
1101 int s1, s2;
487282dc
KH
1102 struct re_pattern_buffer *bufp;
1103
0c8533c6
RS
1104 bufp = compile_pattern (string, &search_regs, trt, posix,
1105 !NILP (current_buffer->enable_multibyte_characters));
ca1d1d23 1106
ca1d1d23
JB
1107 immediate_quit = 1; /* Quit immediately if user types ^G,
1108 because letting this function finish
1109 can take too long. */
1110 QUIT; /* Do a pending quit right away,
1111 to avoid paradoxical behavior */
1112 /* Get pointers and sizes of the two strings
1113 that make up the visible portion of the buffer. */
1114
1115 p1 = BEGV_ADDR;
fa8ed3e0 1116 s1 = GPT_BYTE - BEGV_BYTE;
ca1d1d23 1117 p2 = GAP_END_ADDR;
fa8ed3e0 1118 s2 = ZV_BYTE - GPT_BYTE;
ca1d1d23
JB
1119 if (s1 < 0)
1120 {
1121 p2 = p1;
fa8ed3e0 1122 s2 = ZV_BYTE - BEGV_BYTE;
ca1d1d23
JB
1123 s1 = 0;
1124 }
1125 if (s2 < 0)
1126 {
fa8ed3e0 1127 s1 = ZV_BYTE - BEGV_BYTE;
ca1d1d23
JB
1128 s2 = 0;
1129 }
8bb43c28 1130 re_match_object = Qnil;
177c0ea7 1131
ca1d1d23
JB
1132 while (n < 0)
1133 {
42db823b 1134 int val;
487282dc 1135 val = re_search_2 (bufp, (char *) p1, s1, (char *) p2, s2,
4996330b
KH
1136 pos_byte - BEGV_BYTE, lim_byte - pos_byte,
1137 &search_regs,
42db823b 1138 /* Don't allow match past current point */
4996330b 1139 pos_byte - BEGV_BYTE);
ca1d1d23 1140 if (val == -2)
b6d6a51c
KH
1141 {
1142 matcher_overflow ();
1143 }
ca1d1d23
JB
1144 if (val >= 0)
1145 {
26aff150 1146 pos_byte = search_regs.start[0] + BEGV_BYTE;
4746118a 1147 for (i = 0; i < search_regs.num_regs; i++)
ca1d1d23
JB
1148 if (search_regs.start[i] >= 0)
1149 {
fa8ed3e0
RS
1150 search_regs.start[i]
1151 = BYTE_TO_CHAR (search_regs.start[i] + BEGV_BYTE);
1152 search_regs.end[i]
1153 = BYTE_TO_CHAR (search_regs.end[i] + BEGV_BYTE);
ca1d1d23 1154 }
a3668d92 1155 XSETBUFFER (last_thing_searched, current_buffer);
ca1d1d23
JB
1156 /* Set pos to the new position. */
1157 pos = search_regs.start[0];
1158 }
1159 else
1160 {
1161 immediate_quit = 0;
1162 return (n);
1163 }
1164 n++;
1165 }
1166 while (n > 0)
1167 {
42db823b 1168 int val;
487282dc 1169 val = re_search_2 (bufp, (char *) p1, s1, (char *) p2, s2,
4996330b
KH
1170 pos_byte - BEGV_BYTE, lim_byte - pos_byte,
1171 &search_regs,
1172 lim_byte - BEGV_BYTE);
ca1d1d23 1173 if (val == -2)
b6d6a51c
KH
1174 {
1175 matcher_overflow ();
1176 }
ca1d1d23
JB
1177 if (val >= 0)
1178 {
26aff150 1179 pos_byte = search_regs.end[0] + BEGV_BYTE;
4746118a 1180 for (i = 0; i < search_regs.num_regs; i++)
ca1d1d23
JB
1181 if (search_regs.start[i] >= 0)
1182 {
fa8ed3e0
RS
1183 search_regs.start[i]
1184 = BYTE_TO_CHAR (search_regs.start[i] + BEGV_BYTE);
1185 search_regs.end[i]
1186 = BYTE_TO_CHAR (search_regs.end[i] + BEGV_BYTE);
ca1d1d23 1187 }
a3668d92 1188 XSETBUFFER (last_thing_searched, current_buffer);
ca1d1d23
JB
1189 pos = search_regs.end[0];
1190 }
1191 else
1192 {
1193 immediate_quit = 0;
1194 return (0 - n);
1195 }
1196 n--;
1197 }
1198 immediate_quit = 0;
1199 return (pos);
1200 }
1201 else /* non-RE case */
1202 {
facdc750
RS
1203 unsigned char *raw_pattern, *pat;
1204 int raw_pattern_size;
1205 int raw_pattern_size_byte;
1206 unsigned char *patbuf;
1207 int multibyte = !NILP (current_buffer->enable_multibyte_characters);
a967ed62 1208 unsigned char *base_pat;
fed91c38
KH
1209 /* Set to positive if we find a non-ASCII char that need
1210 translation. Otherwise set to zero later. */
1211 int charset_base = -1;
040272ce 1212 int boyer_moore_ok = 1;
facdc750
RS
1213
1214 /* MULTIBYTE says whether the text to be searched is multibyte.
1215 We must convert PATTERN to match that, or we will not really
1216 find things right. */
1217
1218 if (multibyte == STRING_MULTIBYTE (string))
1219 {
d5db4077
KR
1220 raw_pattern = (unsigned char *) SDATA (string);
1221 raw_pattern_size = SCHARS (string);
1222 raw_pattern_size_byte = SBYTES (string);
facdc750
RS
1223 }
1224 else if (multibyte)
1225 {
d5db4077 1226 raw_pattern_size = SCHARS (string);
facdc750 1227 raw_pattern_size_byte
d5db4077 1228 = count_size_as_multibyte (SDATA (string),
facdc750 1229 raw_pattern_size);
7276d3d8 1230 raw_pattern = (unsigned char *) alloca (raw_pattern_size_byte + 1);
d5db4077
KR
1231 copy_text (SDATA (string), raw_pattern,
1232 SCHARS (string), 0, 1);
facdc750
RS
1233 }
1234 else
1235 {
1236 /* Converting multibyte to single-byte.
1237
1238 ??? Perhaps this conversion should be done in a special way
1239 by subtracting nonascii-insert-offset from each non-ASCII char,
1240 so that only the multibyte chars which really correspond to
1241 the chosen single-byte character set can possibly match. */
d5db4077
KR
1242 raw_pattern_size = SCHARS (string);
1243 raw_pattern_size_byte = SCHARS (string);
7276d3d8 1244 raw_pattern = (unsigned char *) alloca (raw_pattern_size + 1);
d5db4077
KR
1245 copy_text (SDATA (string), raw_pattern,
1246 SBYTES (string), 1, 0);
facdc750
RS
1247 }
1248
1249 /* Copy and optionally translate the pattern. */
1250 len = raw_pattern_size;
1251 len_byte = raw_pattern_size_byte;
1252 patbuf = (unsigned char *) alloca (len_byte);
1253 pat = patbuf;
1254 base_pat = raw_pattern;
1255 if (multibyte)
1256 {
0190922f
KH
1257 /* Fill patbuf by translated characters in STRING while
1258 checking if we can use boyer-moore search. If TRT is
1259 non-nil, we can use boyer-moore search only if TRT can be
1260 represented by the byte array of 256 elements. For that,
1261 all non-ASCII case-equivalents of all case-senstive
1262 characters in STRING must belong to the same charset and
1263 row. */
1264
facdc750
RS
1265 while (--len >= 0)
1266 {
0190922f 1267 unsigned char str_base[MAX_MULTIBYTE_LENGTH], *str;
aff2ce94 1268 int c, translated, inverse;
facdc750
RS
1269 int in_charlen, charlen;
1270
1271 /* If we got here and the RE flag is set, it's because we're
1272 dealing with a regexp known to be trivial, so the backslash
1273 just quotes the next character. */
1274 if (RE && *base_pat == '\\')
1275 {
1276 len--;
0190922f 1277 raw_pattern_size--;
facdc750
RS
1278 len_byte--;
1279 base_pat++;
1280 }
1281
1282 c = STRING_CHAR_AND_LENGTH (base_pat, len_byte, in_charlen);
040272ce 1283
0190922f 1284 if (NILP (trt))
facdc750 1285 {
0190922f
KH
1286 str = base_pat;
1287 charlen = in_charlen;
facdc750 1288 }
0190922f 1289 else
facdc750 1290 {
0190922f
KH
1291 /* Translate the character. */
1292 TRANSLATE (translated, trt, c);
1293 charlen = CHAR_STRING (translated, str_base);
1294 str = str_base;
1295
1296 /* Check if C has any other case-equivalents. */
1297 TRANSLATE (inverse, inverse_trt, c);
1298 /* If so, check if we can use boyer-moore. */
1299 if (c != inverse && boyer_moore_ok)
1300 {
1301 /* Check if all equivalents belong to the same
1302 charset & row. Note that the check of C
1303 itself is done by the last iteration. Note
1304 also that we don't have to check ASCII
1305 characters because boyer-moore search can
1306 always handle their translation. */
1307 while (1)
1308 {
fed91c38 1309 if (ASCII_BYTE_P (inverse))
0190922f 1310 {
fed91c38 1311 if (charset_base > 0)
0190922f
KH
1312 {
1313 boyer_moore_ok = 0;
1314 break;
1315 }
fed91c38
KH
1316 charset_base = 0;
1317 }
1318 else if (SINGLE_BYTE_CHAR_P (inverse))
1319 {
1320 /* Boyer-moore search can't handle a
1321 translation of an eight-bit
1322 character. */
1323 boyer_moore_ok = 0;
1324 break;
1325 }
1326 else if (charset_base < 0)
1327 charset_base = inverse & ~CHAR_FIELD3_MASK;
1328 else if ((inverse & ~CHAR_FIELD3_MASK)
1329 != charset_base)
1330 {
1331 boyer_moore_ok = 0;
1332 break;
0190922f
KH
1333 }
1334 if (c == inverse)
1335 break;
1336 TRANSLATE (inverse, inverse_trt, inverse);
1337 }
1338 }
aff2ce94 1339 }
fed91c38
KH
1340 if (charset_base < 0)
1341 charset_base = 0;
facdc750
RS
1342
1343 /* Store this character into the translated pattern. */
1344 bcopy (str, pat, charlen);
1345 pat += charlen;
1346 base_pat += in_charlen;
1347 len_byte -= in_charlen;
1348 }
1349 }
1350 else
1351 {
040272ce
KH
1352 /* Unibyte buffer. */
1353 charset_base = 0;
facdc750
RS
1354 while (--len >= 0)
1355 {
040272ce 1356 int c, translated;
facdc750
RS
1357
1358 /* If we got here and the RE flag is set, it's because we're
1359 dealing with a regexp known to be trivial, so the backslash
1360 just quotes the next character. */
1361 if (RE && *base_pat == '\\')
1362 {
1363 len--;
0190922f 1364 raw_pattern_size--;
facdc750
RS
1365 base_pat++;
1366 }
1367 c = *base_pat++;
aff2ce94 1368 TRANSLATE (translated, trt, c);
facdc750
RS
1369 *pat++ = translated;
1370 }
1371 }
1372
1373 len_byte = pat - patbuf;
1374 len = raw_pattern_size;
1375 pat = base_pat = patbuf;
1376
040272ce 1377 if (boyer_moore_ok)
facdc750 1378 return boyer_moore (n, pat, len, len_byte, trt, inverse_trt,
aff2ce94
RS
1379 pos, pos_byte, lim, lim_byte,
1380 charset_base);
facdc750
RS
1381 else
1382 return simple_search (n, pat, len, len_byte, trt,
1383 pos, pos_byte, lim, lim_byte);
1384 }
1385}
1386\f
1387/* Do a simple string search N times for the string PAT,
1388 whose length is LEN/LEN_BYTE,
1389 from buffer position POS/POS_BYTE until LIM/LIM_BYTE.
1390 TRT is the translation table.
f8bd51c4 1391
facdc750
RS
1392 Return the character position where the match is found.
1393 Otherwise, if M matches remained to be found, return -M.
f8bd51c4 1394
facdc750
RS
1395 This kind of search works regardless of what is in PAT and
1396 regardless of what is in TRT. It is used in cases where
1397 boyer_moore cannot work. */
1398
1399static int
1400simple_search (n, pat, len, len_byte, trt, pos, pos_byte, lim, lim_byte)
1401 int n;
1402 unsigned char *pat;
1403 int len, len_byte;
1404 Lisp_Object trt;
1405 int pos, pos_byte;
1406 int lim, lim_byte;
1407{
1408 int multibyte = ! NILP (current_buffer->enable_multibyte_characters);
ab228c24 1409 int forward = n > 0;
facdc750
RS
1410
1411 if (lim > pos && multibyte)
1412 while (n > 0)
1413 {
1414 while (1)
f8bd51c4 1415 {
facdc750
RS
1416 /* Try matching at position POS. */
1417 int this_pos = pos;
1418 int this_pos_byte = pos_byte;
1419 int this_len = len;
1420 int this_len_byte = len_byte;
1421 unsigned char *p = pat;
1422 if (pos + len > lim)
1423 goto stop;
1424
1425 while (this_len > 0)
1426 {
1427 int charlen, buf_charlen;
ab228c24 1428 int pat_ch, buf_ch;
facdc750 1429
ab228c24 1430 pat_ch = STRING_CHAR_AND_LENGTH (p, this_len_byte, charlen);
facdc750
RS
1431 buf_ch = STRING_CHAR_AND_LENGTH (BYTE_POS_ADDR (this_pos_byte),
1432 ZV_BYTE - this_pos_byte,
1433 buf_charlen);
aff2ce94 1434 TRANSLATE (buf_ch, trt, buf_ch);
facdc750
RS
1435
1436 if (buf_ch != pat_ch)
1437 break;
ab228c24
RS
1438
1439 this_len_byte -= charlen;
1440 this_len--;
1441 p += charlen;
1442
1443 this_pos_byte += buf_charlen;
1444 this_pos++;
facdc750
RS
1445 }
1446
1447 if (this_len == 0)
1448 {
1449 pos += len;
1450 pos_byte += len_byte;
1451 break;
1452 }
1453
1454 INC_BOTH (pos, pos_byte);
f8bd51c4 1455 }
facdc750
RS
1456
1457 n--;
1458 }
1459 else if (lim > pos)
1460 while (n > 0)
1461 {
1462 while (1)
f8bd51c4 1463 {
facdc750
RS
1464 /* Try matching at position POS. */
1465 int this_pos = pos;
1466 int this_len = len;
1467 unsigned char *p = pat;
1468
1469 if (pos + len > lim)
1470 goto stop;
1471
1472 while (this_len > 0)
1473 {
1474 int pat_ch = *p++;
1475 int buf_ch = FETCH_BYTE (this_pos);
aff2ce94 1476 TRANSLATE (buf_ch, trt, buf_ch);
facdc750
RS
1477
1478 if (buf_ch != pat_ch)
1479 break;
ab228c24
RS
1480
1481 this_len--;
1482 this_pos++;
facdc750
RS
1483 }
1484
1485 if (this_len == 0)
1486 {
1487 pos += len;
1488 break;
1489 }
1490
1491 pos++;
f8bd51c4 1492 }
facdc750
RS
1493
1494 n--;
1495 }
1496 /* Backwards search. */
1497 else if (lim < pos && multibyte)
1498 while (n < 0)
1499 {
1500 while (1)
f8bd51c4 1501 {
facdc750
RS
1502 /* Try matching at position POS. */
1503 int this_pos = pos - len;
1504 int this_pos_byte = pos_byte - len_byte;
1505 int this_len = len;
1506 int this_len_byte = len_byte;
1507 unsigned char *p = pat;
1508
1509 if (pos - len < lim)
1510 goto stop;
1511
1512 while (this_len > 0)
1513 {
1514 int charlen, buf_charlen;
ab228c24 1515 int pat_ch, buf_ch;
facdc750 1516
ab228c24 1517 pat_ch = STRING_CHAR_AND_LENGTH (p, this_len_byte, charlen);
facdc750
RS
1518 buf_ch = STRING_CHAR_AND_LENGTH (BYTE_POS_ADDR (this_pos_byte),
1519 ZV_BYTE - this_pos_byte,
1520 buf_charlen);
aff2ce94 1521 TRANSLATE (buf_ch, trt, buf_ch);
facdc750
RS
1522
1523 if (buf_ch != pat_ch)
1524 break;
ab228c24
RS
1525
1526 this_len_byte -= charlen;
1527 this_len--;
1528 p += charlen;
1529 this_pos_byte += buf_charlen;
1530 this_pos++;
facdc750
RS
1531 }
1532
1533 if (this_len == 0)
1534 {
1535 pos -= len;
1536 pos_byte -= len_byte;
1537 break;
1538 }
1539
1540 DEC_BOTH (pos, pos_byte);
f8bd51c4
KH
1541 }
1542
facdc750
RS
1543 n++;
1544 }
1545 else if (lim < pos)
1546 while (n < 0)
1547 {
1548 while (1)
b6d6a51c 1549 {
facdc750
RS
1550 /* Try matching at position POS. */
1551 int this_pos = pos - len;
1552 int this_len = len;
1553 unsigned char *p = pat;
1554
1555 if (pos - len < lim)
1556 goto stop;
1557
1558 while (this_len > 0)
1559 {
1560 int pat_ch = *p++;
1561 int buf_ch = FETCH_BYTE (this_pos);
aff2ce94 1562 TRANSLATE (buf_ch, trt, buf_ch);
facdc750
RS
1563
1564 if (buf_ch != pat_ch)
1565 break;
ab228c24
RS
1566 this_len--;
1567 this_pos++;
facdc750
RS
1568 }
1569
1570 if (this_len == 0)
b6d6a51c 1571 {
facdc750
RS
1572 pos -= len;
1573 break;
b6d6a51c 1574 }
facdc750
RS
1575
1576 pos--;
b6d6a51c 1577 }
facdc750
RS
1578
1579 n++;
b6d6a51c 1580 }
facdc750
RS
1581
1582 stop:
1583 if (n == 0)
aff2ce94 1584 {
ab228c24
RS
1585 if (forward)
1586 set_search_regs ((multibyte ? pos_byte : pos) - len_byte, len_byte);
1587 else
1588 set_search_regs (multibyte ? pos_byte : pos, len_byte);
aff2ce94
RS
1589
1590 return pos;
1591 }
facdc750
RS
1592 else if (n > 0)
1593 return -n;
1594 else
1595 return n;
1596}
1597\f
0190922f 1598/* Do Boyer-Moore search N times for the string BASE_PAT,
facdc750
RS
1599 whose length is LEN/LEN_BYTE,
1600 from buffer position POS/POS_BYTE until LIM/LIM_BYTE.
1601 DIRECTION says which direction we search in.
1602 TRT and INVERSE_TRT are translation tables.
0190922f 1603 Characters in PAT are already translated by TRT.
facdc750 1604
0190922f
KH
1605 This kind of search works if all the characters in BASE_PAT that
1606 have nontrivial translation are the same aside from the last byte.
1607 This makes it possible to translate just the last byte of a
1608 character, and do so after just a simple test of the context.
1609 CHARSET_BASE is nonzero iff there is such a non-ASCII character.
facdc750
RS
1610
1611 If that criterion is not satisfied, do not call this function. */
1612
1613static int
1614boyer_moore (n, base_pat, len, len_byte, trt, inverse_trt,
aff2ce94 1615 pos, pos_byte, lim, lim_byte, charset_base)
facdc750
RS
1616 int n;
1617 unsigned char *base_pat;
1618 int len, len_byte;
1619 Lisp_Object trt;
1620 Lisp_Object inverse_trt;
1621 int pos, pos_byte;
1622 int lim, lim_byte;
aff2ce94 1623 int charset_base;
facdc750
RS
1624{
1625 int direction = ((n > 0) ? 1 : -1);
1626 register int dirlen;
a968f437 1627 int infinity, limit, stride_for_teases = 0;
facdc750
RS
1628 register int *BM_tab;
1629 int *BM_tab_base;
177c0ea7 1630 register unsigned char *cursor, *p_limit;
facdc750 1631 register int i, j;
cb6792d2 1632 unsigned char *pat, *pat_end;
facdc750
RS
1633 int multibyte = ! NILP (current_buffer->enable_multibyte_characters);
1634
1635 unsigned char simple_translate[0400];
0190922f
KH
1636 /* These are set to the preceding bytes of a byte to be translated
1637 if charset_base is nonzero. As the maximum byte length of a
1638 multibyte character is 4, we have to check at most three previous
1639 bytes. */
1640 int translate_prev_byte1 = 0;
1641 int translate_prev_byte2 = 0;
1642 int translate_prev_byte3 = 0;
facdc750
RS
1643
1644#ifdef C_ALLOCA
1645 int BM_tab_space[0400];
1646 BM_tab = &BM_tab_space[0];
1647#else
1648 BM_tab = (int *) alloca (0400 * sizeof (int));
1649#endif
1650 /* The general approach is that we are going to maintain that we know */
1651 /* the first (closest to the present position, in whatever direction */
1652 /* we're searching) character that could possibly be the last */
1653 /* (furthest from present position) character of a valid match. We */
1654 /* advance the state of our knowledge by looking at that character */
1655 /* and seeing whether it indeed matches the last character of the */
1656 /* pattern. If it does, we take a closer look. If it does not, we */
1657 /* move our pointer (to putative last characters) as far as is */
1658 /* logically possible. This amount of movement, which I call a */
1659 /* stride, will be the length of the pattern if the actual character */
1660 /* appears nowhere in the pattern, otherwise it will be the distance */
1661 /* from the last occurrence of that character to the end of the */
1662 /* pattern. */
1663 /* As a coding trick, an enormous stride is coded into the table for */
1664 /* characters that match the last character. This allows use of only */
1665 /* a single test, a test for having gone past the end of the */
1666 /* permissible match region, to test for both possible matches (when */
1667 /* the stride goes past the end immediately) and failure to */
177c0ea7 1668 /* match (where you get nudged past the end one stride at a time). */
facdc750
RS
1669
1670 /* Here we make a "mickey mouse" BM table. The stride of the search */
1671 /* is determined only by the last character of the putative match. */
1672 /* If that character does not match, we will stride the proper */
1673 /* distance to propose a match that superimposes it on the last */
1674 /* instance of a character that matches it (per trt), or misses */
177c0ea7 1675 /* it entirely if there is none. */
facdc750
RS
1676
1677 dirlen = len_byte * direction;
1678 infinity = dirlen - (lim_byte + pos_byte + len_byte + len_byte) * direction;
cb6792d2
RS
1679
1680 /* Record position after the end of the pattern. */
1681 pat_end = base_pat + len_byte;
1682 /* BASE_PAT points to a character that we start scanning from.
1683 It is the first character in a forward search,
1684 the last character in a backward search. */
facdc750 1685 if (direction < 0)
cb6792d2
RS
1686 base_pat = pat_end - 1;
1687
facdc750
RS
1688 BM_tab_base = BM_tab;
1689 BM_tab += 0400;
1690 j = dirlen; /* to get it in a register */
1691 /* A character that does not appear in the pattern induces a */
1692 /* stride equal to the pattern length. */
1693 while (BM_tab_base != BM_tab)
1694 {
1695 *--BM_tab = j;
1696 *--BM_tab = j;
1697 *--BM_tab = j;
1698 *--BM_tab = j;
1699 }
1700
1701 /* We use this for translation, instead of TRT itself.
1702 We fill this in to handle the characters that actually
1703 occur in the pattern. Others don't matter anyway! */
1704 bzero (simple_translate, sizeof simple_translate);
1705 for (i = 0; i < 0400; i++)
1706 simple_translate[i] = i;
1707
0190922f
KH
1708 if (charset_base)
1709 {
1710 /* Setup translate_prev_byte1/2/3 from CHARSET_BASE. Only a
1711 byte following them are the target of translation. */
1712 int sample_char = charset_base | 0x20;
1713 unsigned char str[MAX_MULTIBYTE_LENGTH];
1714 int len = CHAR_STRING (sample_char, str);
1715
1716 translate_prev_byte1 = str[len - 2];
1717 if (len > 2)
1718 {
1719 translate_prev_byte2 = str[len - 3];
1720 if (len > 3)
1721 translate_prev_byte3 = str[len - 4];
1722 }
1723 }
1724
facdc750
RS
1725 i = 0;
1726 while (i != infinity)
1727 {
cb6792d2 1728 unsigned char *ptr = base_pat + i;
facdc750
RS
1729 i += direction;
1730 if (i == dirlen)
1731 i = infinity;
1732 if (! NILP (trt))
ca1d1d23 1733 {
620a5178
KH
1734 /* If the byte currently looking at is the last of a
1735 character to check case-equivalents, set CH to that
1736 character. An ASCII character and a non-ASCII character
1737 matching with CHARSET_BASE are to be checked. */
0190922f
KH
1738 int ch = -1;
1739
1740 if (ASCII_BYTE_P (*ptr) || ! multibyte)
1741 ch = *ptr;
620a5178 1742 else if (charset_base
86dc6ccb 1743 && ((pat_end - ptr) == 1 || CHAR_HEAD_P (ptr[1])))
ca1d1d23 1744 {
620a5178
KH
1745 unsigned char *charstart = ptr - 1;
1746
1747 while (! (CHAR_HEAD_P (*charstart)))
1748 charstart--;
1749 ch = STRING_CHAR (charstart, ptr - charstart + 1);
0190922f
KH
1750 if (charset_base != (ch & ~CHAR_FIELD3_MASK))
1751 ch = -1;
ca1d1d23 1752 }
facdc750 1753
6786a107 1754 if (ch >= 0400)
620a5178
KH
1755 j = ((unsigned char) ch) | 0200;
1756 else
1757 j = *ptr;
1758
facdc750
RS
1759 if (i == infinity)
1760 stride_for_teases = BM_tab[j];
ab228c24 1761
facdc750
RS
1762 BM_tab[j] = dirlen - i;
1763 /* A translation table is accompanied by its inverse -- see */
177c0ea7 1764 /* comment following downcase_table for details */
0190922f 1765 if (ch >= 0)
ab228c24
RS
1766 {
1767 int starting_ch = ch;
620a5178 1768 int starting_j = j;
0190922f 1769
ab228c24
RS
1770 while (1)
1771 {
1772 TRANSLATE (ch, inverse_trt, ch);
6786a107 1773 if (ch >= 0400)
ab228c24
RS
1774 j = ((unsigned char) ch) | 0200;
1775 else
1776 j = (unsigned char) ch;
1777
1778 /* For all the characters that map into CH,
1779 set up simple_translate to map the last byte
1780 into STARTING_J. */
1781 simple_translate[j] = starting_j;
1782 if (ch == starting_ch)
1783 break;
1784 BM_tab[j] = dirlen - i;
1785 }
1786 }
facdc750
RS
1787 }
1788 else
1789 {
1790 j = *ptr;
1791
1792 if (i == infinity)
1793 stride_for_teases = BM_tab[j];
1794 BM_tab[j] = dirlen - i;
ca1d1d23 1795 }
facdc750
RS
1796 /* stride_for_teases tells how much to stride if we get a */
1797 /* match on the far character but are subsequently */
1798 /* disappointed, by recording what the stride would have been */
1799 /* for that character if the last character had been */
1800 /* different. */
1801 }
1802 infinity = dirlen - infinity;
1803 pos_byte += dirlen - ((direction > 0) ? direction : 0);
1804 /* loop invariant - POS_BYTE points at where last char (first
1805 char if reverse) of pattern would align in a possible match. */
1806 while (n != 0)
1807 {
1808 int tail_end;
1809 unsigned char *tail_end_ptr;
1810
1811 /* It's been reported that some (broken) compiler thinks that
1812 Boolean expressions in an arithmetic context are unsigned.
1813 Using an explicit ?1:0 prevents this. */
1814 if ((lim_byte - pos_byte - ((direction > 0) ? 1 : 0)) * direction
1815 < 0)
1816 return (n * (0 - direction));
1817 /* First we do the part we can by pointers (maybe nothing) */
1818 QUIT;
1819 pat = base_pat;
1820 limit = pos_byte - dirlen + direction;
67ce527d
KH
1821 if (direction > 0)
1822 {
1823 limit = BUFFER_CEILING_OF (limit);
1824 /* LIMIT is now the last (not beyond-last!) value POS_BYTE
1825 can take on without hitting edge of buffer or the gap. */
1826 limit = min (limit, pos_byte + 20000);
1827 limit = min (limit, lim_byte - 1);
1828 }
1829 else
1830 {
1831 limit = BUFFER_FLOOR_OF (limit);
1832 /* LIMIT is now the last (not beyond-last!) value POS_BYTE
1833 can take on without hitting edge of buffer or the gap. */
1834 limit = max (limit, pos_byte - 20000);
1835 limit = max (limit, lim_byte);
1836 }
facdc750
RS
1837 tail_end = BUFFER_CEILING_OF (pos_byte) + 1;
1838 tail_end_ptr = BYTE_POS_ADDR (tail_end);
1839
1840 if ((limit - pos_byte) * direction > 20)
ca1d1d23 1841 {
facdc750
RS
1842 unsigned char *p2;
1843
1844 p_limit = BYTE_POS_ADDR (limit);
1845 p2 = (cursor = BYTE_POS_ADDR (pos_byte));
1846 /* In this loop, pos + cursor - p2 is the surrogate for pos */
1847 while (1) /* use one cursor setting as long as i can */
ca1d1d23 1848 {
facdc750 1849 if (direction > 0) /* worth duplicating */
ca1d1d23 1850 {
facdc750
RS
1851 /* Use signed comparison if appropriate
1852 to make cursor+infinity sure to be > p_limit.
1853 Assuming that the buffer lies in a range of addresses
1854 that are all "positive" (as ints) or all "negative",
1855 either kind of comparison will work as long
1856 as we don't step by infinity. So pick the kind
1857 that works when we do step by infinity. */
1858 if ((EMACS_INT) (p_limit + infinity) > (EMACS_INT) p_limit)
1859 while ((EMACS_INT) cursor <= (EMACS_INT) p_limit)
1860 cursor += BM_tab[*cursor];
ca1d1d23 1861 else
facdc750
RS
1862 while ((EMACS_UINT) cursor <= (EMACS_UINT) p_limit)
1863 cursor += BM_tab[*cursor];
1864 }
1865 else
1866 {
1867 if ((EMACS_INT) (p_limit + infinity) < (EMACS_INT) p_limit)
1868 while ((EMACS_INT) cursor >= (EMACS_INT) p_limit)
1869 cursor += BM_tab[*cursor];
1870 else
1871 while ((EMACS_UINT) cursor >= (EMACS_UINT) p_limit)
1872 cursor += BM_tab[*cursor];
1873 }
ca1d1d23 1874/* If you are here, cursor is beyond the end of the searched region. */
facdc750
RS
1875/* This can happen if you match on the far character of the pattern, */
1876/* because the "stride" of that character is infinity, a number able */
1877/* to throw you well beyond the end of the search. It can also */
1878/* happen if you fail to match within the permitted region and would */
1879/* otherwise try a character beyond that region */
1880 if ((cursor - p_limit) * direction <= len_byte)
1881 break; /* a small overrun is genuine */
1882 cursor -= infinity; /* large overrun = hit */
1883 i = dirlen - direction;
1884 if (! NILP (trt))
1885 {
1886 while ((i -= direction) + direction != 0)
ca1d1d23 1887 {
facdc750
RS
1888 int ch;
1889 cursor -= direction;
1890 /* Translate only the last byte of a character. */
1891 if (! multibyte
1892 || ((cursor == tail_end_ptr
1893 || CHAR_HEAD_P (cursor[1]))
1894 && (CHAR_HEAD_P (cursor[0])
0190922f
KH
1895 /* Check if this is the last byte of
1896 a translable character. */
1897 || (translate_prev_byte1 == cursor[-1]
1898 && (CHAR_HEAD_P (translate_prev_byte1)
1899 || (translate_prev_byte2 == cursor[-2]
1900 && (CHAR_HEAD_P (translate_prev_byte2)
1901 || (translate_prev_byte3 == cursor[-3]))))))))
facdc750
RS
1902 ch = simple_translate[*cursor];
1903 else
1904 ch = *cursor;
1905 if (pat[i] != ch)
1906 break;
ca1d1d23 1907 }
facdc750
RS
1908 }
1909 else
1910 {
1911 while ((i -= direction) + direction != 0)
ca1d1d23 1912 {
facdc750
RS
1913 cursor -= direction;
1914 if (pat[i] != *cursor)
1915 break;
ca1d1d23 1916 }
facdc750
RS
1917 }
1918 cursor += dirlen - i - direction; /* fix cursor */
1919 if (i + direction == 0)
1920 {
1921 int position;
0c8533c6 1922
facdc750 1923 cursor -= direction;
1113d9db 1924
facdc750
RS
1925 position = pos_byte + cursor - p2 + ((direction > 0)
1926 ? 1 - len_byte : 0);
1927 set_search_regs (position, len_byte);
ca325161 1928
facdc750
RS
1929 if ((n -= direction) != 0)
1930 cursor += dirlen; /* to resume search */
ca1d1d23 1931 else
facdc750
RS
1932 return ((direction > 0)
1933 ? search_regs.end[0] : search_regs.start[0]);
ca1d1d23 1934 }
facdc750
RS
1935 else
1936 cursor += stride_for_teases; /* <sigh> we lose - */
ca1d1d23 1937 }
facdc750
RS
1938 pos_byte += cursor - p2;
1939 }
1940 else
1941 /* Now we'll pick up a clump that has to be done the hard */
1942 /* way because it covers a discontinuity */
1943 {
1944 limit = ((direction > 0)
1945 ? BUFFER_CEILING_OF (pos_byte - dirlen + 1)
1946 : BUFFER_FLOOR_OF (pos_byte - dirlen - 1));
1947 limit = ((direction > 0)
1948 ? min (limit + len_byte, lim_byte - 1)
1949 : max (limit - len_byte, lim_byte));
1950 /* LIMIT is now the last value POS_BYTE can have
1951 and still be valid for a possible match. */
1952 while (1)
ca1d1d23 1953 {
facdc750
RS
1954 /* This loop can be coded for space rather than */
1955 /* speed because it will usually run only once. */
1956 /* (the reach is at most len + 21, and typically */
177c0ea7 1957 /* does not exceed len) */
facdc750
RS
1958 while ((limit - pos_byte) * direction >= 0)
1959 pos_byte += BM_tab[FETCH_BYTE (pos_byte)];
1960 /* now run the same tests to distinguish going off the */
1961 /* end, a match or a phony match. */
1962 if ((pos_byte - limit) * direction <= len_byte)
1963 break; /* ran off the end */
1964 /* Found what might be a match.
1965 Set POS_BYTE back to last (first if reverse) pos. */
1966 pos_byte -= infinity;
1967 i = dirlen - direction;
1968 while ((i -= direction) + direction != 0)
ca1d1d23 1969 {
facdc750
RS
1970 int ch;
1971 unsigned char *ptr;
1972 pos_byte -= direction;
1973 ptr = BYTE_POS_ADDR (pos_byte);
1974 /* Translate only the last byte of a character. */
1975 if (! multibyte
1976 || ((ptr == tail_end_ptr
1977 || CHAR_HEAD_P (ptr[1]))
1978 && (CHAR_HEAD_P (ptr[0])
0190922f
KH
1979 /* Check if this is the last byte of a
1980 translable character. */
1981 || (translate_prev_byte1 == ptr[-1]
1982 && (CHAR_HEAD_P (translate_prev_byte1)
1983 || (translate_prev_byte2 == ptr[-2]
1984 && (CHAR_HEAD_P (translate_prev_byte2)
1985 || translate_prev_byte3 == ptr[-3])))))))
facdc750
RS
1986 ch = simple_translate[*ptr];
1987 else
1988 ch = *ptr;
1989 if (pat[i] != ch)
1990 break;
1991 }
1992 /* Above loop has moved POS_BYTE part or all the way
1993 back to the first pos (last pos if reverse).
1994 Set it once again at the last (first if reverse) char. */
1995 pos_byte += dirlen - i- direction;
1996 if (i + direction == 0)
1997 {
1998 int position;
1999 pos_byte -= direction;
1113d9db 2000
facdc750 2001 position = pos_byte + ((direction > 0) ? 1 - len_byte : 0);
0c8533c6 2002
facdc750 2003 set_search_regs (position, len_byte);
ca325161 2004
facdc750
RS
2005 if ((n -= direction) != 0)
2006 pos_byte += dirlen; /* to resume search */
ca1d1d23 2007 else
facdc750
RS
2008 return ((direction > 0)
2009 ? search_regs.end[0] : search_regs.start[0]);
ca1d1d23 2010 }
facdc750
RS
2011 else
2012 pos_byte += stride_for_teases;
2013 }
2014 }
2015 /* We have done one clump. Can we continue? */
2016 if ((lim_byte - pos_byte) * direction < 0)
2017 return ((0 - n) * direction);
ca1d1d23 2018 }
facdc750 2019 return BYTE_TO_CHAR (pos_byte);
ca1d1d23 2020}
ca325161 2021
fa8ed3e0 2022/* Record beginning BEG_BYTE and end BEG_BYTE + NBYTES
a7e4cdde
RS
2023 for the overall match just found in the current buffer.
2024 Also clear out the match data for registers 1 and up. */
ca325161
RS
2025
2026static void
fa8ed3e0
RS
2027set_search_regs (beg_byte, nbytes)
2028 int beg_byte, nbytes;
ca325161 2029{
a7e4cdde
RS
2030 int i;
2031
ca325161
RS
2032 /* Make sure we have registers in which to store
2033 the match position. */
2034 if (search_regs.num_regs == 0)
2035 {
2d4a771a
RS
2036 search_regs.start = (regoff_t *) xmalloc (2 * sizeof (regoff_t));
2037 search_regs.end = (regoff_t *) xmalloc (2 * sizeof (regoff_t));
487282dc 2038 search_regs.num_regs = 2;
ca325161
RS
2039 }
2040
a7e4cdde
RS
2041 /* Clear out the other registers. */
2042 for (i = 1; i < search_regs.num_regs; i++)
2043 {
2044 search_regs.start[i] = -1;
2045 search_regs.end[i] = -1;
2046 }
2047
fa8ed3e0
RS
2048 search_regs.start[0] = BYTE_TO_CHAR (beg_byte);
2049 search_regs.end[0] = BYTE_TO_CHAR (beg_byte + nbytes);
a3668d92 2050 XSETBUFFER (last_thing_searched, current_buffer);
ca325161 2051}
ca1d1d23
JB
2052\f
2053/* Given a string of words separated by word delimiters,
2054 compute a regexp that matches those exact words
2055 separated by arbitrary punctuation. */
2056
2057static Lisp_Object
2058wordify (string)
2059 Lisp_Object string;
2060{
2061 register unsigned char *p, *o;
0c8533c6 2062 register int i, i_byte, len, punct_count = 0, word_count = 0;
ca1d1d23 2063 Lisp_Object val;
0c8533c6
RS
2064 int prev_c = 0;
2065 int adjust;
ca1d1d23 2066
b7826503 2067 CHECK_STRING (string);
d5db4077
KR
2068 p = SDATA (string);
2069 len = SCHARS (string);
ca1d1d23 2070
0c8533c6
RS
2071 for (i = 0, i_byte = 0; i < len; )
2072 {
2073 int c;
177c0ea7 2074
eb99a8dd 2075 FETCH_STRING_CHAR_ADVANCE (c, string, i, i_byte);
0c8533c6
RS
2076
2077 if (SYNTAX (c) != Sword)
2078 {
2079 punct_count++;
2080 if (i > 0 && SYNTAX (prev_c) == Sword)
2081 word_count++;
2082 }
ca1d1d23 2083
0c8533c6
RS
2084 prev_c = c;
2085 }
2086
2087 if (SYNTAX (prev_c) == Sword)
2088 word_count++;
2089 if (!word_count)
b07b65aa 2090 return empty_string;
0c8533c6
RS
2091
2092 adjust = - punct_count + 5 * (word_count - 1) + 4;
8a2df937
RS
2093 if (STRING_MULTIBYTE (string))
2094 val = make_uninit_multibyte_string (len + adjust,
d5db4077 2095 SBYTES (string)
8a2df937
RS
2096 + adjust);
2097 else
2098 val = make_uninit_string (len + adjust);
ca1d1d23 2099
d5db4077 2100 o = SDATA (val);
ca1d1d23
JB
2101 *o++ = '\\';
2102 *o++ = 'b';
1e9582d4 2103 prev_c = 0;
ca1d1d23 2104
1e9582d4
RS
2105 for (i = 0, i_byte = 0; i < len; )
2106 {
2107 int c;
2108 int i_byte_orig = i_byte;
177c0ea7 2109
eb99a8dd 2110 FETCH_STRING_CHAR_ADVANCE (c, string, i, i_byte);
1e9582d4
RS
2111
2112 if (SYNTAX (c) == Sword)
2113 {
5d69fe10 2114 bcopy (SDATA (string) + i_byte_orig, o,
1e9582d4
RS
2115 i_byte - i_byte_orig);
2116 o += i_byte - i_byte_orig;
2117 }
2118 else if (i > 0 && SYNTAX (prev_c) == Sword && --word_count)
2119 {
2120 *o++ = '\\';
2121 *o++ = 'W';
2122 *o++ = '\\';
2123 *o++ = 'W';
2124 *o++ = '*';
2125 }
2126
2127 prev_c = c;
2128 }
ca1d1d23
JB
2129
2130 *o++ = '\\';
2131 *o++ = 'b';
2132
2133 return val;
2134}
2135\f
2136DEFUN ("search-backward", Fsearch_backward, Ssearch_backward, 1, 4,
8c1a1077
PJ
2137 "MSearch backward: ",
2138 doc: /* Search backward from point for STRING.
2139Set point to the beginning of the occurrence found, and return point.
2140An optional second argument bounds the search; it is a buffer position.
2141The match found must not extend before that position.
2142Optional third argument, if t, means if fail just return nil (no error).
2143 If not nil and not t, position at limit of search and return nil.
2144Optional fourth argument is repeat count--search for successive occurrences.
2145
2146Search case-sensitivity is determined by the value of the variable
2147`case-fold-search', which see.
2148
2149See also the functions `match-beginning', `match-end' and `replace-match'. */)
2150 (string, bound, noerror, count)
ca1d1d23
JB
2151 Lisp_Object string, bound, noerror, count;
2152{
b819a390 2153 return search_command (string, bound, noerror, count, -1, 0, 0);
ca1d1d23
JB
2154}
2155
6af43974 2156DEFUN ("search-forward", Fsearch_forward, Ssearch_forward, 1, 4, "MSearch: ",
8c1a1077
PJ
2157 doc: /* Search forward from point for STRING.
2158Set point to the end of the occurrence found, and return point.
2159An optional second argument bounds the search; it is a buffer position.
2160The match found must not extend after that position. nil is equivalent
2161 to (point-max).
2162Optional third argument, if t, means if fail just return nil (no error).
2163 If not nil and not t, move to limit of search and return nil.
2164Optional fourth argument is repeat count--search for successive occurrences.
2165
2166Search case-sensitivity is determined by the value of the variable
2167`case-fold-search', which see.
2168
2169See also the functions `match-beginning', `match-end' and `replace-match'. */)
2170 (string, bound, noerror, count)
ca1d1d23
JB
2171 Lisp_Object string, bound, noerror, count;
2172{
b819a390 2173 return search_command (string, bound, noerror, count, 1, 0, 0);
ca1d1d23
JB
2174}
2175
2176DEFUN ("word-search-backward", Fword_search_backward, Sword_search_backward, 1, 4,
8c1a1077
PJ
2177 "sWord search backward: ",
2178 doc: /* Search backward from point for STRING, ignoring differences in punctuation.
2179Set point to the beginning of the occurrence found, and return point.
2180An optional second argument bounds the search; it is a buffer position.
2181The match found must not extend before that position.
2182Optional third argument, if t, means if fail just return nil (no error).
2183 If not nil and not t, move to limit of search and return nil.
2184Optional fourth argument is repeat count--search for successive occurrences. */)
2185 (string, bound, noerror, count)
ca1d1d23
JB
2186 Lisp_Object string, bound, noerror, count;
2187{
b819a390 2188 return search_command (wordify (string), bound, noerror, count, -1, 1, 0);
ca1d1d23
JB
2189}
2190
2191DEFUN ("word-search-forward", Fword_search_forward, Sword_search_forward, 1, 4,
8c1a1077
PJ
2192 "sWord search: ",
2193 doc: /* Search forward from point for STRING, ignoring differences in punctuation.
2194Set point to the end of the occurrence found, and return point.
2195An optional second argument bounds the search; it is a buffer position.
2196The match found must not extend after that position.
2197Optional third argument, if t, means if fail just return nil (no error).
2198 If not nil and not t, move to limit of search and return nil.
2199Optional fourth argument is repeat count--search for successive occurrences. */)
2200 (string, bound, noerror, count)
ca1d1d23
JB
2201 Lisp_Object string, bound, noerror, count;
2202{
b819a390 2203 return search_command (wordify (string), bound, noerror, count, 1, 1, 0);
ca1d1d23
JB
2204}
2205
2206DEFUN ("re-search-backward", Fre_search_backward, Sre_search_backward, 1, 4,
8c1a1077
PJ
2207 "sRE search backward: ",
2208 doc: /* Search backward from point for match for regular expression REGEXP.
2209Set point to the beginning of the match, and return point.
2210The match found is the one starting last in the buffer
2211and yet ending before the origin of the search.
2212An optional second argument bounds the search; it is a buffer position.
2213The match found must start at or after that position.
2214Optional third argument, if t, means if fail just return nil (no error).
2215 If not nil and not t, move to limit of search and return nil.
2216Optional fourth argument is repeat count--search for successive occurrences.
2217See also the functions `match-beginning', `match-end', `match-string',
2218and `replace-match'. */)
2219 (regexp, bound, noerror, count)
19c0a730 2220 Lisp_Object regexp, bound, noerror, count;
ca1d1d23 2221{
b819a390 2222 return search_command (regexp, bound, noerror, count, -1, 1, 0);
ca1d1d23
JB
2223}
2224
2225DEFUN ("re-search-forward", Fre_search_forward, Sre_search_forward, 1, 4,
8c1a1077
PJ
2226 "sRE search: ",
2227 doc: /* Search forward from point for regular expression REGEXP.
2228Set point to the end of the occurrence found, and return point.
2229An optional second argument bounds the search; it is a buffer position.
2230The match found must not extend after that position.
2231Optional third argument, if t, means if fail just return nil (no error).
2232 If not nil and not t, move to limit of search and return nil.
2233Optional fourth argument is repeat count--search for successive occurrences.
2234See also the functions `match-beginning', `match-end', `match-string',
2235and `replace-match'. */)
2236 (regexp, bound, noerror, count)
19c0a730 2237 Lisp_Object regexp, bound, noerror, count;
ca1d1d23 2238{
b819a390
RS
2239 return search_command (regexp, bound, noerror, count, 1, 1, 0);
2240}
2241
2242DEFUN ("posix-search-backward", Fposix_search_backward, Sposix_search_backward, 1, 4,
8c1a1077
PJ
2243 "sPosix search backward: ",
2244 doc: /* Search backward from point for match for regular expression REGEXP.
2245Find the longest match in accord with Posix regular expression rules.
2246Set point to the beginning of the match, and return point.
2247The match found is the one starting last in the buffer
2248and yet ending before the origin of the search.
2249An optional second argument bounds the search; it is a buffer position.
2250The match found must start at or after that position.
2251Optional third argument, if t, means if fail just return nil (no error).
2252 If not nil and not t, move to limit of search and return nil.
2253Optional fourth argument is repeat count--search for successive occurrences.
2254See also the functions `match-beginning', `match-end', `match-string',
2255and `replace-match'. */)
2256 (regexp, bound, noerror, count)
b819a390
RS
2257 Lisp_Object regexp, bound, noerror, count;
2258{
2259 return search_command (regexp, bound, noerror, count, -1, 1, 1);
2260}
2261
2262DEFUN ("posix-search-forward", Fposix_search_forward, Sposix_search_forward, 1, 4,
8c1a1077
PJ
2263 "sPosix search: ",
2264 doc: /* Search forward from point for regular expression REGEXP.
2265Find the longest match in accord with Posix regular expression rules.
2266Set point to the end of the occurrence found, and return point.
2267An optional second argument bounds the search; it is a buffer position.
2268The match found must not extend after that position.
2269Optional third argument, if t, means if fail just return nil (no error).
2270 If not nil and not t, move to limit of search and return nil.
2271Optional fourth argument is repeat count--search for successive occurrences.
2272See also the functions `match-beginning', `match-end', `match-string',
2273and `replace-match'. */)
2274 (regexp, bound, noerror, count)
b819a390
RS
2275 Lisp_Object regexp, bound, noerror, count;
2276{
2277 return search_command (regexp, bound, noerror, count, 1, 1, 1);
ca1d1d23
JB
2278}
2279\f
d7a5ad5f 2280DEFUN ("replace-match", Freplace_match, Sreplace_match, 1, 5, 0,
8c1a1077 2281 doc: /* Replace text matched by last search with NEWTEXT.
4dd0c271
RS
2282Leave point at the end of the replacement text.
2283
8c1a1077
PJ
2284If second arg FIXEDCASE is non-nil, do not alter case of replacement text.
2285Otherwise maybe capitalize the whole text, or maybe just word initials,
2286based on the replaced text.
2287If the replaced text has only capital letters
2288and has at least one multiletter word, convert NEWTEXT to all caps.
4dd0c271
RS
2289Otherwise if all words are capitalized in the replaced text,
2290capitalize each word in NEWTEXT.
8c1a1077
PJ
2291
2292If third arg LITERAL is non-nil, insert NEWTEXT literally.
2293Otherwise treat `\\' as special:
2294 `\\&' in NEWTEXT means substitute original matched text.
2295 `\\N' means substitute what matched the Nth `\\(...\\)'.
2296 If Nth parens didn't match, substitute nothing.
2297 `\\\\' means insert one `\\'.
4dd0c271
RS
2298Case conversion does not apply to these substitutions.
2299
8c1a1077 2300FIXEDCASE and LITERAL are optional arguments.
8c1a1077
PJ
2301
2302The optional fourth argument STRING can be a string to modify.
2303This is meaningful when the previous match was done against STRING,
2304using `string-match'. When used this way, `replace-match'
2305creates and returns a new string made by copying STRING and replacing
2306the part of STRING that was matched.
2307
2308The optional fifth argument SUBEXP specifies a subexpression;
2309it says to replace just that subexpression with NEWTEXT,
2310rather than replacing the entire matched text.
2311This is, in a vague sense, the inverse of using `\\N' in NEWTEXT;
2312`\\N' copies subexp N into NEWTEXT, but using N as SUBEXP puts
2313NEWTEXT in place of subexp N.
2314This is useful only after a regular expression search or match,
2315since only regular expressions have distinguished subexpressions. */)
2316 (newtext, fixedcase, literal, string, subexp)
d7a5ad5f 2317 Lisp_Object newtext, fixedcase, literal, string, subexp;
ca1d1d23
JB
2318{
2319 enum { nochange, all_caps, cap_initial } case_action;
ac3b28b1 2320 register int pos, pos_byte;
ca1d1d23 2321 int some_multiletter_word;
97832bd0 2322 int some_lowercase;
73dc8771 2323 int some_uppercase;
208767c3 2324 int some_nonuppercase_initial;
ca1d1d23 2325 register int c, prevc;
d7a5ad5f 2326 int sub;
3e18eecf 2327 int opoint, newpoint;
ca1d1d23 2328
b7826503 2329 CHECK_STRING (newtext);
ca1d1d23 2330
080c45fd 2331 if (! NILP (string))
b7826503 2332 CHECK_STRING (string);
080c45fd 2333
ca1d1d23
JB
2334 case_action = nochange; /* We tried an initialization */
2335 /* but some C compilers blew it */
4746118a
JB
2336
2337 if (search_regs.num_regs <= 0)
d72cdbfc 2338 error ("`replace-match' called before any match found");
4746118a 2339
d7a5ad5f
RS
2340 if (NILP (subexp))
2341 sub = 0;
2342 else
2343 {
b7826503 2344 CHECK_NUMBER (subexp);
d7a5ad5f
RS
2345 sub = XINT (subexp);
2346 if (sub < 0 || sub >= search_regs.num_regs)
2347 args_out_of_range (subexp, make_number (search_regs.num_regs));
2348 }
2349
080c45fd
RS
2350 if (NILP (string))
2351 {
d7a5ad5f
RS
2352 if (search_regs.start[sub] < BEGV
2353 || search_regs.start[sub] > search_regs.end[sub]
2354 || search_regs.end[sub] > ZV)
2355 args_out_of_range (make_number (search_regs.start[sub]),
2356 make_number (search_regs.end[sub]));
080c45fd
RS
2357 }
2358 else
2359 {
d7a5ad5f
RS
2360 if (search_regs.start[sub] < 0
2361 || search_regs.start[sub] > search_regs.end[sub]
d5db4077 2362 || search_regs.end[sub] > SCHARS (string))
d7a5ad5f
RS
2363 args_out_of_range (make_number (search_regs.start[sub]),
2364 make_number (search_regs.end[sub]));
080c45fd 2365 }
ca1d1d23
JB
2366
2367 if (NILP (fixedcase))
2368 {
2369 /* Decide how to casify by examining the matched text. */
ac3b28b1 2370 int last;
ca1d1d23 2371
ac3b28b1
KH
2372 pos = search_regs.start[sub];
2373 last = search_regs.end[sub];
fa8ed3e0
RS
2374
2375 if (NILP (string))
ac3b28b1 2376 pos_byte = CHAR_TO_BYTE (pos);
fa8ed3e0 2377 else
ac3b28b1 2378 pos_byte = string_char_to_byte (string, pos);
fa8ed3e0 2379
ca1d1d23
JB
2380 prevc = '\n';
2381 case_action = all_caps;
2382
2383 /* some_multiletter_word is set nonzero if any original word
2384 is more than one letter long. */
2385 some_multiletter_word = 0;
97832bd0 2386 some_lowercase = 0;
208767c3 2387 some_nonuppercase_initial = 0;
73dc8771 2388 some_uppercase = 0;
ca1d1d23 2389
ac3b28b1 2390 while (pos < last)
ca1d1d23 2391 {
080c45fd 2392 if (NILP (string))
ac3b28b1
KH
2393 {
2394 c = FETCH_CHAR (pos_byte);
2395 INC_BOTH (pos, pos_byte);
2396 }
080c45fd 2397 else
ac3b28b1 2398 FETCH_STRING_CHAR_ADVANCE (c, string, pos, pos_byte);
080c45fd 2399
ca1d1d23
JB
2400 if (LOWERCASEP (c))
2401 {
2402 /* Cannot be all caps if any original char is lower case */
2403
97832bd0 2404 some_lowercase = 1;
ca1d1d23 2405 if (SYNTAX (prevc) != Sword)
208767c3 2406 some_nonuppercase_initial = 1;
ca1d1d23
JB
2407 else
2408 some_multiletter_word = 1;
2409 }
d16c2b66 2410 else if (UPPERCASEP (c))
ca1d1d23 2411 {
73dc8771 2412 some_uppercase = 1;
97832bd0 2413 if (SYNTAX (prevc) != Sword)
c4d460ce 2414 ;
97832bd0 2415 else
ca1d1d23
JB
2416 some_multiletter_word = 1;
2417 }
208767c3
RS
2418 else
2419 {
2420 /* If the initial is a caseless word constituent,
2421 treat that like a lowercase initial. */
2422 if (SYNTAX (prevc) != Sword)
2423 some_nonuppercase_initial = 1;
2424 }
ca1d1d23
JB
2425
2426 prevc = c;
2427 }
2428
97832bd0
RS
2429 /* Convert to all caps if the old text is all caps
2430 and has at least one multiletter word. */
2431 if (! some_lowercase && some_multiletter_word)
2432 case_action = all_caps;
c4d460ce 2433 /* Capitalize each word, if the old text has all capitalized words. */
208767c3 2434 else if (!some_nonuppercase_initial && some_multiletter_word)
ca1d1d23 2435 case_action = cap_initial;
208767c3 2436 else if (!some_nonuppercase_initial && some_uppercase)
73dc8771
KH
2437 /* Should x -> yz, operating on X, give Yz or YZ?
2438 We'll assume the latter. */
2439 case_action = all_caps;
97832bd0
RS
2440 else
2441 case_action = nochange;
ca1d1d23
JB
2442 }
2443
080c45fd
RS
2444 /* Do replacement in a string. */
2445 if (!NILP (string))
2446 {
2447 Lisp_Object before, after;
2448
2449 before = Fsubstring (string, make_number (0),
d7a5ad5f
RS
2450 make_number (search_regs.start[sub]));
2451 after = Fsubstring (string, make_number (search_regs.end[sub]), Qnil);
080c45fd 2452
636a5e28
RS
2453 /* Substitute parts of the match into NEWTEXT
2454 if desired. */
080c45fd
RS
2455 if (NILP (literal))
2456 {
d131e79c
RS
2457 int lastpos = 0;
2458 int lastpos_byte = 0;
080c45fd
RS
2459 /* We build up the substituted string in ACCUM. */
2460 Lisp_Object accum;
2461 Lisp_Object middle;
d5db4077 2462 int length = SBYTES (newtext);
080c45fd
RS
2463
2464 accum = Qnil;
2465
ac3b28b1 2466 for (pos_byte = 0, pos = 0; pos_byte < length;)
080c45fd
RS
2467 {
2468 int substart = -1;
6bbd7a29 2469 int subend = 0;
1e79ec24 2470 int delbackslash = 0;
080c45fd 2471
0c8533c6
RS
2472 FETCH_STRING_CHAR_ADVANCE (c, newtext, pos, pos_byte);
2473
080c45fd
RS
2474 if (c == '\\')
2475 {
0c8533c6 2476 FETCH_STRING_CHAR_ADVANCE (c, newtext, pos, pos_byte);
177c0ea7 2477
080c45fd
RS
2478 if (c == '&')
2479 {
d7a5ad5f
RS
2480 substart = search_regs.start[sub];
2481 subend = search_regs.end[sub];
080c45fd 2482 }
5fbbc83b 2483 else if (c >= '1' && c <= '9')
080c45fd 2484 {
5fbbc83b
RS
2485 if (search_regs.start[c - '0'] >= 0
2486 && c <= search_regs.num_regs + '0')
080c45fd
RS
2487 {
2488 substart = search_regs.start[c - '0'];
2489 subend = search_regs.end[c - '0'];
2490 }
5fbbc83b
RS
2491 else
2492 {
2493 /* If that subexp did not match,
2494 replace \\N with nothing. */
2495 substart = 0;
2496 subend = 0;
2497 }
080c45fd 2498 }
1e79ec24
KH
2499 else if (c == '\\')
2500 delbackslash = 1;
636a5e28
RS
2501 else
2502 error ("Invalid use of `\\' in replacement text");
080c45fd
RS
2503 }
2504 if (substart >= 0)
2505 {
d131e79c
RS
2506 if (pos - 2 != lastpos)
2507 middle = substring_both (newtext, lastpos,
2508 lastpos_byte,
2509 pos - 2, pos_byte - 2);
080c45fd
RS
2510 else
2511 middle = Qnil;
2512 accum = concat3 (accum, middle,
0c8533c6
RS
2513 Fsubstring (string,
2514 make_number (substart),
080c45fd
RS
2515 make_number (subend)));
2516 lastpos = pos;
0c8533c6 2517 lastpos_byte = pos_byte;
080c45fd 2518 }
1e79ec24
KH
2519 else if (delbackslash)
2520 {
d131e79c
RS
2521 middle = substring_both (newtext, lastpos,
2522 lastpos_byte,
2523 pos - 1, pos_byte - 1);
0c8533c6 2524
1e79ec24
KH
2525 accum = concat2 (accum, middle);
2526 lastpos = pos;
0c8533c6 2527 lastpos_byte = pos_byte;
1e79ec24 2528 }
080c45fd
RS
2529 }
2530
d131e79c
RS
2531 if (pos != lastpos)
2532 middle = substring_both (newtext, lastpos,
2533 lastpos_byte,
0c8533c6 2534 pos, pos_byte);
080c45fd
RS
2535 else
2536 middle = Qnil;
2537
2538 newtext = concat2 (accum, middle);
2539 }
2540
636a5e28 2541 /* Do case substitution in NEWTEXT if desired. */
080c45fd
RS
2542 if (case_action == all_caps)
2543 newtext = Fupcase (newtext);
2544 else if (case_action == cap_initial)
2b2eead9 2545 newtext = Fupcase_initials (newtext);
080c45fd
RS
2546
2547 return concat3 (before, newtext, after);
2548 }
2549
09c4719e 2550 /* Record point, then move (quietly) to the start of the match. */
9160906f 2551 if (PT >= search_regs.end[sub])
b0eba991 2552 opoint = PT - ZV;
9160906f
RS
2553 else if (PT > search_regs.start[sub])
2554 opoint = search_regs.end[sub] - ZV;
b0eba991
RS
2555 else
2556 opoint = PT;
2557
886ed6ec
RS
2558 /* If we want non-literal replacement,
2559 perform substitution on the replacement string. */
2560 if (NILP (literal))
ca1d1d23 2561 {
d5db4077 2562 int length = SBYTES (newtext);
68e69fbd
RS
2563 unsigned char *substed;
2564 int substed_alloc_size, substed_len;
3bc25e52
KH
2565 int buf_multibyte = !NILP (current_buffer->enable_multibyte_characters);
2566 int str_multibyte = STRING_MULTIBYTE (newtext);
2567 Lisp_Object rev_tbl;
886ed6ec 2568 int really_changed = 0;
3bc25e52
KH
2569
2570 rev_tbl= (!buf_multibyte && CHAR_TABLE_P (Vnonascii_translation_table)
2571 ? Fchar_table_extra_slot (Vnonascii_translation_table,
2572 make_number (0))
2573 : Qnil);
ac3b28b1 2574
68e69fbd
RS
2575 substed_alloc_size = length * 2 + 100;
2576 substed = (unsigned char *) xmalloc (substed_alloc_size + 1);
2577 substed_len = 0;
2578
3bc25e52
KH
2579 /* Go thru NEWTEXT, producing the actual text to insert in
2580 SUBSTED while adjusting multibyteness to that of the current
2581 buffer. */
ca1d1d23 2582
ac3b28b1 2583 for (pos_byte = 0, pos = 0; pos_byte < length;)
ca1d1d23 2584 {
68e69fbd 2585 unsigned char str[MAX_MULTIBYTE_LENGTH];
f8ce8a0d
GM
2586 unsigned char *add_stuff = NULL;
2587 int add_len = 0;
68e69fbd 2588 int idx = -1;
9a76659d 2589
3bc25e52
KH
2590 if (str_multibyte)
2591 {
eb99a8dd 2592 FETCH_STRING_CHAR_ADVANCE_NO_CHECK (c, newtext, pos, pos_byte);
3bc25e52
KH
2593 if (!buf_multibyte)
2594 c = multibyte_char_to_unibyte (c, rev_tbl);
2595 }
2596 else
2597 {
2598 /* Note that we don't have to increment POS. */
5d69fe10 2599 c = SREF (newtext, pos_byte++);
3bc25e52
KH
2600 if (buf_multibyte)
2601 c = unibyte_char_to_multibyte (c);
2602 }
ac3b28b1 2603
68e69fbd
RS
2604 /* Either set ADD_STUFF and ADD_LEN to the text to put in SUBSTED,
2605 or set IDX to a match index, which means put that part
2606 of the buffer text into SUBSTED. */
2607
ca1d1d23
JB
2608 if (c == '\\')
2609 {
886ed6ec
RS
2610 really_changed = 1;
2611
3bc25e52
KH
2612 if (str_multibyte)
2613 {
eb99a8dd
KH
2614 FETCH_STRING_CHAR_ADVANCE_NO_CHECK (c, newtext,
2615 pos, pos_byte);
3bc25e52
KH
2616 if (!buf_multibyte && !SINGLE_BYTE_CHAR_P (c))
2617 c = multibyte_char_to_unibyte (c, rev_tbl);
2618 }
2619 else
2620 {
d5db4077 2621 c = SREF (newtext, pos_byte++);
3bc25e52
KH
2622 if (buf_multibyte)
2623 c = unibyte_char_to_multibyte (c);
2624 }
2625
ca1d1d23 2626 if (c == '&')
68e69fbd 2627 idx = sub;
78445046 2628 else if (c >= '1' && c <= '9' && c <= search_regs.num_regs + '0')
ca1d1d23
JB
2629 {
2630 if (search_regs.start[c - '0'] >= 1)
68e69fbd 2631 idx = c - '0';
ca1d1d23 2632 }
636a5e28 2633 else if (c == '\\')
68e69fbd 2634 add_len = 1, add_stuff = "\\";
636a5e28 2635 else
3bc25e52
KH
2636 {
2637 xfree (substed);
2638 error ("Invalid use of `\\' in replacement text");
2639 }
ca1d1d23
JB
2640 }
2641 else
68e69fbd
RS
2642 {
2643 add_len = CHAR_STRING (c, str);
2644 add_stuff = str;
2645 }
2646
2647 /* If we want to copy part of a previous match,
2648 set up ADD_STUFF and ADD_LEN to point to it. */
2649 if (idx >= 0)
2650 {
2651 int begbyte = CHAR_TO_BYTE (search_regs.start[idx]);
2652 add_len = CHAR_TO_BYTE (search_regs.end[idx]) - begbyte;
2653 if (search_regs.start[idx] < GPT && GPT < search_regs.end[idx])
2654 move_gap (search_regs.start[idx]);
2655 add_stuff = BYTE_POS_ADDR (begbyte);
2656 }
2657
2658 /* Now the stuff we want to add to SUBSTED
2659 is invariably ADD_LEN bytes starting at ADD_STUFF. */
2660
2661 /* Make sure SUBSTED is big enough. */
2662 if (substed_len + add_len >= substed_alloc_size)
2663 {
2664 substed_alloc_size = substed_len + add_len + 500;
2665 substed = (unsigned char *) xrealloc (substed,
2666 substed_alloc_size + 1);
2667 }
2668
2669 /* Now add to the end of SUBSTED. */
f8ce8a0d
GM
2670 if (add_stuff)
2671 {
2672 bcopy (add_stuff, substed + substed_len, add_len);
2673 substed_len += add_len;
2674 }
ca1d1d23 2675 }
68e69fbd 2676
886ed6ec 2677 if (really_changed)
80460525
KH
2678 {
2679 if (buf_multibyte)
2680 {
2681 int nchars = multibyte_chars_in_text (substed, substed_len);
68e69fbd 2682
80460525
KH
2683 newtext = make_multibyte_string (substed, nchars, substed_len);
2684 }
2685 else
2686 newtext = make_unibyte_string (substed, substed_len);
2687 }
68e69fbd 2688 xfree (substed);
ca1d1d23
JB
2689 }
2690
886ed6ec
RS
2691 /* Replace the old text with the new in the cleanest possible way. */
2692 replace_range (search_regs.start[sub], search_regs.end[sub],
2693 newtext, 1, 0, 1);
d5db4077 2694 newpoint = search_regs.start[sub] + SCHARS (newtext);
ca1d1d23
JB
2695
2696 if (case_action == all_caps)
886ed6ec
RS
2697 Fupcase_region (make_number (search_regs.start[sub]),
2698 make_number (newpoint));
ca1d1d23 2699 else if (case_action == cap_initial)
886ed6ec
RS
2700 Fupcase_initials_region (make_number (search_regs.start[sub]),
2701 make_number (newpoint));
3e18eecf 2702
98e942e0
RS
2703 /* Adjust search data for this change. */
2704 {
5b88a2c5 2705 int oldend = search_regs.end[sub];
41c01205 2706 int oldstart = search_regs.start[sub];
98e942e0
RS
2707 int change = newpoint - search_regs.end[sub];
2708 int i;
2709
2710 for (i = 0; i < search_regs.num_regs; i++)
2711 {
41c01205 2712 if (search_regs.start[i] >= oldend)
98e942e0 2713 search_regs.start[i] += change;
41c01205
DK
2714 else if (search_regs.start[i] > oldstart)
2715 search_regs.start[i] = oldstart;
2716 if (search_regs.end[i] >= oldend)
98e942e0 2717 search_regs.end[i] += change;
41c01205
DK
2718 else if (search_regs.end[i] > oldstart)
2719 search_regs.end[i] = oldstart;
98e942e0
RS
2720 }
2721 }
2722
b0eba991 2723 /* Put point back where it was in the text. */
8d808a65 2724 if (opoint <= 0)
fa8ed3e0 2725 TEMP_SET_PT (opoint + ZV);
b0eba991 2726 else
fa8ed3e0 2727 TEMP_SET_PT (opoint);
b0eba991
RS
2728
2729 /* Now move point "officially" to the start of the inserted replacement. */
3e18eecf 2730 move_if_not_intangible (newpoint);
177c0ea7 2731
ca1d1d23
JB
2732 return Qnil;
2733}
2734\f
2735static Lisp_Object
2736match_limit (num, beginningp)
2737 Lisp_Object num;
2738 int beginningp;
2739{
2740 register int n;
2741
b7826503 2742 CHECK_NUMBER (num);
ca1d1d23 2743 n = XINT (num);
f90a5bf5 2744 if (n < 0)
bd2cbd56 2745 args_out_of_range (num, make_number (0));
f90a5bf5
RS
2746 if (search_regs.num_regs <= 0)
2747 error ("No match data, because no search succeeded");
9b9ceb61 2748 if (n >= search_regs.num_regs
4746118a 2749 || search_regs.start[n] < 0)
ca1d1d23
JB
2750 return Qnil;
2751 return (make_number ((beginningp) ? search_regs.start[n]
2752 : search_regs.end[n]));
2753}
2754
2755DEFUN ("match-beginning", Fmatch_beginning, Smatch_beginning, 1, 1, 0,
8c1a1077
PJ
2756 doc: /* Return position of start of text matched by last search.
2757SUBEXP, a number, specifies which parenthesized expression in the last
2758 regexp.
2759Value is nil if SUBEXPth pair didn't match, or there were less than
2760 SUBEXP pairs.
2761Zero means the entire text matched by the whole regexp or whole string. */)
2762 (subexp)
5806161b 2763 Lisp_Object subexp;
ca1d1d23 2764{
5806161b 2765 return match_limit (subexp, 1);
ca1d1d23
JB
2766}
2767
2768DEFUN ("match-end", Fmatch_end, Smatch_end, 1, 1, 0,
8c1a1077
PJ
2769 doc: /* Return position of end of text matched by last search.
2770SUBEXP, a number, specifies which parenthesized expression in the last
2771 regexp.
2772Value is nil if SUBEXPth pair didn't match, or there were less than
2773 SUBEXP pairs.
2774Zero means the entire text matched by the whole regexp or whole string. */)
2775 (subexp)
5806161b 2776 Lisp_Object subexp;
ca1d1d23 2777{
5806161b 2778 return match_limit (subexp, 0);
177c0ea7 2779}
ca1d1d23 2780
abd0071c 2781DEFUN ("match-data", Fmatch_data, Smatch_data, 0, 3, 0,
8c1a1077
PJ
2782 doc: /* Return a list containing all info on what the last search matched.
2783Element 2N is `(match-beginning N)'; element 2N + 1 is `(match-end N)'.
2784All the elements are markers or nil (nil if the Nth pair didn't match)
2785if the last match was on a buffer; integers or nil if a string was matched.
2786Use `store-match-data' to reinstate the data in this list.
2787
41c01205
DK
2788If INTEGERS (the optional first argument) is non-nil, always use
2789integers \(rather than markers) to represent buffer positions. In
2790this case, and if the last match was in a buffer, the buffer will get
2791stored as one additional element at the end of the list.
2792
abd0071c
KS
2793If REUSE is a list, reuse it as part of the value. If REUSE is long
2794enough to hold all the values, and if INTEGERS is non-nil, no consing
2795is done.
2796
2797If optional third arg RESEAT is non-nil, any previous markers on the
2798REUSE list will be modified to point to nowhere.
2799
140a6b7e 2800Return value is undefined if the last search failed. */)
abd0071c
KS
2801 (integers, reuse, reseat)
2802 Lisp_Object integers, reuse, reseat;
ca1d1d23 2803{
56256c2a 2804 Lisp_Object tail, prev;
4746118a 2805 Lisp_Object *data;
ca1d1d23
JB
2806 int i, len;
2807
abd0071c
KS
2808 if (!NILP (reseat))
2809 for (tail = reuse; CONSP (tail); tail = XCDR (tail))
2810 if (MARKERP (XCAR (tail)))
2811 {
51f10faa 2812 unchain_marker (XMARKER (XCAR (tail)));
abd0071c
KS
2813 XSETCAR (tail, Qnil);
2814 }
2815
daa37602 2816 if (NILP (last_thing_searched))
c36bcf1b 2817 return Qnil;
daa37602 2818
6bbd7a29
GM
2819 prev = Qnil;
2820
41c01205 2821 data = (Lisp_Object *) alloca ((2 * search_regs.num_regs + 1)
4746118a
JB
2822 * sizeof (Lisp_Object));
2823
41c01205 2824 len = 0;
4746118a 2825 for (i = 0; i < search_regs.num_regs; i++)
ca1d1d23
JB
2826 {
2827 int start = search_regs.start[i];
2828 if (start >= 0)
2829 {
56256c2a
RS
2830 if (EQ (last_thing_searched, Qt)
2831 || ! NILP (integers))
ca1d1d23 2832 {
c235cce7
KH
2833 XSETFASTINT (data[2 * i], start);
2834 XSETFASTINT (data[2 * i + 1], search_regs.end[i]);
ca1d1d23 2835 }
0ed62dc7 2836 else if (BUFFERP (last_thing_searched))
ca1d1d23
JB
2837 {
2838 data[2 * i] = Fmake_marker ();
daa37602
JB
2839 Fset_marker (data[2 * i],
2840 make_number (start),
2841 last_thing_searched);
ca1d1d23
JB
2842 data[2 * i + 1] = Fmake_marker ();
2843 Fset_marker (data[2 * i + 1],
177c0ea7 2844 make_number (search_regs.end[i]),
daa37602 2845 last_thing_searched);
ca1d1d23 2846 }
daa37602
JB
2847 else
2848 /* last_thing_searched must always be Qt, a buffer, or Qnil. */
2849 abort ();
2850
abd0071c 2851 len = 2 * i + 2;
ca1d1d23
JB
2852 }
2853 else
abd0071c 2854 data[2 * i] = data[2 * i + 1] = Qnil;
ca1d1d23 2855 }
56256c2a 2856
bd2cbd56 2857 if (BUFFERP (last_thing_searched) && !NILP (integers))
41c01205 2858 {
bd2cbd56 2859 data[len] = last_thing_searched;
41c01205
DK
2860 len++;
2861 }
2862
56256c2a
RS
2863 /* If REUSE is not usable, cons up the values and return them. */
2864 if (! CONSP (reuse))
41c01205 2865 return Flist (len, data);
56256c2a
RS
2866
2867 /* If REUSE is a list, store as many value elements as will fit
2868 into the elements of REUSE. */
2869 for (i = 0, tail = reuse; CONSP (tail);
c1d497be 2870 i++, tail = XCDR (tail))
56256c2a 2871 {
41c01205 2872 if (i < len)
f3fbd155 2873 XSETCAR (tail, data[i]);
56256c2a 2874 else
f3fbd155 2875 XSETCAR (tail, Qnil);
56256c2a
RS
2876 prev = tail;
2877 }
2878
2879 /* If we couldn't fit all value elements into REUSE,
2880 cons up the rest of them and add them to the end of REUSE. */
41c01205
DK
2881 if (i < len)
2882 XSETCDR (prev, Flist (len - i, data + i));
56256c2a
RS
2883
2884 return reuse;
ca1d1d23
JB
2885}
2886
51f10faa
KS
2887/* Internal usage only:
2888 If RESEAT is `evaporate', put the markers back on the free list
2889 immediately. No other references to the markers must exist in this case,
2890 so it is used only internally on the unwind stack and save-match-data from
2891 Lisp. */
ca1d1d23 2892
abd0071c 2893DEFUN ("set-match-data", Fset_match_data, Sset_match_data, 1, 2, 0,
8c1a1077 2894 doc: /* Set internal data on last search match from elements of LIST.
abd0071c
KS
2895LIST should have been created by calling `match-data' previously.
2896
51f10faa 2897If optional arg RESEAT is non-nil, make markers on LIST point nowhere. */)
abd0071c
KS
2898 (list, reseat)
2899 register Lisp_Object list, reseat;
ca1d1d23
JB
2900{
2901 register int i;
2902 register Lisp_Object marker;
2903
7074fde6
FP
2904 if (running_asynch_code)
2905 save_search_regs ();
2906
29100cea 2907 CHECK_LIST (list);
ca1d1d23 2908
41c01205
DK
2909 /* Unless we find a marker with a buffer or an explicit buffer
2910 in LIST, assume that this match data came from a string. */
daa37602
JB
2911 last_thing_searched = Qt;
2912
4746118a
JB
2913 /* Allocate registers if they don't already exist. */
2914 {
d084e942 2915 int length = XFASTINT (Flength (list)) / 2;
4746118a
JB
2916
2917 if (length > search_regs.num_regs)
2918 {
1113d9db
JB
2919 if (search_regs.num_regs == 0)
2920 {
2921 search_regs.start
2922 = (regoff_t *) xmalloc (length * sizeof (regoff_t));
2923 search_regs.end
2924 = (regoff_t *) xmalloc (length * sizeof (regoff_t));
2925 }
4746118a 2926 else
1113d9db
JB
2927 {
2928 search_regs.start
2929 = (regoff_t *) xrealloc (search_regs.start,
2930 length * sizeof (regoff_t));
2931 search_regs.end
2932 = (regoff_t *) xrealloc (search_regs.end,
2933 length * sizeof (regoff_t));
2934 }
4746118a 2935
e62371e9
KH
2936 for (i = search_regs.num_regs; i < length; i++)
2937 search_regs.start[i] = -1;
2938
487282dc 2939 search_regs.num_regs = length;
4746118a 2940 }
ca1d1d23 2941
abd0071c 2942 for (i = 0; CONSP (list); i++)
41c01205 2943 {
abd0071c 2944 marker = XCAR (list);
bd2cbd56 2945 if (BUFFERP (marker))
c3762cbd 2946 {
bd2cbd56 2947 last_thing_searched = marker;
c3762cbd
DK
2948 break;
2949 }
2950 if (i >= length)
2951 break;
41c01205
DK
2952 if (NILP (marker))
2953 {
2954 search_regs.start[i] = -1;
abd0071c 2955 list = XCDR (list);
41c01205
DK
2956 }
2957 else
2958 {
2959 int from;
abd0071c 2960 Lisp_Object m;
e2811828 2961
abd0071c 2962 m = marker;
41c01205
DK
2963 if (MARKERP (marker))
2964 {
2965 if (XMARKER (marker)->buffer == 0)
2966 XSETFASTINT (marker, 0);
2967 else
2968 XSETBUFFER (last_thing_searched, XMARKER (marker)->buffer);
2969 }
e2811828 2970
41c01205
DK
2971 CHECK_NUMBER_COERCE_MARKER (marker);
2972 from = XINT (marker);
e2811828 2973
abd0071c
KS
2974 if (!NILP (reseat) && MARKERP (m))
2975 {
2976 if (EQ (reseat, Qevaporate))
2977 free_marker (m);
2978 else
2979 unchain_marker (XMARKER (m));
9ad54a7e 2980 XSETCAR (list, Qnil);
abd0071c
KS
2981 }
2982
2983 if ((list = XCDR (list), !CONSP (list)))
2984 break;
2985
2986 m = marker = XCAR (list);
2987
41c01205
DK
2988 if (MARKERP (marker) && XMARKER (marker)->buffer == 0)
2989 XSETFASTINT (marker, 0);
e2811828 2990
41c01205
DK
2991 CHECK_NUMBER_COERCE_MARKER (marker);
2992 search_regs.start[i] = from;
2993 search_regs.end[i] = XINT (marker);
abd0071c
KS
2994
2995 if (!NILP (reseat) && MARKERP (m))
2996 {
2997 if (EQ (reseat, Qevaporate))
2998 free_marker (m);
2999 else
3000 unchain_marker (XMARKER (m));
9ad54a7e 3001 XSETCAR (list, Qnil);
abd0071c 3002 }
41c01205 3003 }
abd0071c 3004 list = XCDR (list);
41c01205 3005 }
ca1d1d23 3006
41c01205
DK
3007 for (; i < search_regs.num_regs; i++)
3008 search_regs.start[i] = -1;
3009 }
ca1d1d23 3010
177c0ea7 3011 return Qnil;
ca1d1d23
JB
3012}
3013
7074fde6
FP
3014/* If non-zero the match data have been saved in saved_search_regs
3015 during the execution of a sentinel or filter. */
75ebf74b 3016static int search_regs_saved;
7074fde6 3017static struct re_registers saved_search_regs;
41c01205 3018static Lisp_Object saved_last_thing_searched;
7074fde6
FP
3019
3020/* Called from Flooking_at, Fstring_match, search_buffer, Fstore_match_data
3021 if asynchronous code (filter or sentinel) is running. */
3022static void
3023save_search_regs ()
3024{
3025 if (!search_regs_saved)
3026 {
3027 saved_search_regs.num_regs = search_regs.num_regs;
3028 saved_search_regs.start = search_regs.start;
3029 saved_search_regs.end = search_regs.end;
41c01205
DK
3030 saved_last_thing_searched = last_thing_searched;
3031 last_thing_searched = Qnil;
7074fde6 3032 search_regs.num_regs = 0;
2d4a771a
RS
3033 search_regs.start = 0;
3034 search_regs.end = 0;
7074fde6
FP
3035
3036 search_regs_saved = 1;
3037 }
3038}
3039
3040/* Called upon exit from filters and sentinels. */
3041void
abd0071c 3042restore_search_regs ()
7074fde6
FP
3043{
3044 if (search_regs_saved)
3045 {
3046 if (search_regs.num_regs > 0)
3047 {
3048 xfree (search_regs.start);
3049 xfree (search_regs.end);
3050 }
3051 search_regs.num_regs = saved_search_regs.num_regs;
3052 search_regs.start = saved_search_regs.start;
3053 search_regs.end = saved_search_regs.end;
41c01205
DK
3054 last_thing_searched = saved_last_thing_searched;
3055 saved_last_thing_searched = Qnil;
7074fde6
FP
3056 search_regs_saved = 0;
3057 }
3058}
3059
abd0071c
KS
3060static Lisp_Object
3061unwind_set_match_data (list)
3062 Lisp_Object list;
3063{
51f10faa 3064 /* It is safe to free (evaporate) the markers immediately. */
abd0071c
KS
3065 return Fset_match_data (list, Qevaporate);
3066}
3067
3068/* Called to unwind protect the match data. */
3069void
3070record_unwind_save_match_data ()
3071{
3072 record_unwind_protect (unwind_set_match_data,
3073 Fmatch_data (Qnil, Qnil, Qnil));
3074}
3075
ca1d1d23
JB
3076/* Quote a string to inactivate reg-expr chars */
3077
3078DEFUN ("regexp-quote", Fregexp_quote, Sregexp_quote, 1, 1, 0,
8c1a1077
PJ
3079 doc: /* Return a regexp string which matches exactly STRING and nothing else. */)
3080 (string)
5806161b 3081 Lisp_Object string;
ca1d1d23
JB
3082{
3083 register unsigned char *in, *out, *end;
3084 register unsigned char *temp;
0c8533c6 3085 int backslashes_added = 0;
ca1d1d23 3086
b7826503 3087 CHECK_STRING (string);
ca1d1d23 3088
d5db4077 3089 temp = (unsigned char *) alloca (SBYTES (string) * 2);
ca1d1d23
JB
3090
3091 /* Now copy the data into the new string, inserting escapes. */
3092
d5db4077
KR
3093 in = SDATA (string);
3094 end = in + SBYTES (string);
177c0ea7 3095 out = temp;
ca1d1d23
JB
3096
3097 for (; in != end; in++)
3098 {
66bc6082 3099 if (*in == '['
ca1d1d23
JB
3100 || *in == '*' || *in == '.' || *in == '\\'
3101 || *in == '?' || *in == '+'
3102 || *in == '^' || *in == '$')
0c8533c6 3103 *out++ = '\\', backslashes_added++;
ca1d1d23
JB
3104 *out++ = *in;
3105 }
3106
3f8100f1 3107 return make_specified_string (temp,
d5db4077 3108 SCHARS (string) + backslashes_added,
3f8100f1
RS
3109 out - temp,
3110 STRING_MULTIBYTE (string));
ca1d1d23 3111}
177c0ea7 3112\f
dfcf069d 3113void
ca1d1d23
JB
3114syms_of_search ()
3115{
3116 register int i;
3117
487282dc
KH
3118 for (i = 0; i < REGEXP_CACHE_SIZE; ++i)
3119 {
3120 searchbufs[i].buf.allocated = 100;
b23c0a83 3121 searchbufs[i].buf.buffer = (unsigned char *) xmalloc (100);
487282dc
KH
3122 searchbufs[i].buf.fastmap = searchbufs[i].fastmap;
3123 searchbufs[i].regexp = Qnil;
ecdb561e 3124 searchbufs[i].whitespace_regexp = Qnil;
b69e3c18 3125 searchbufs[i].syntax_table = Qnil;
487282dc 3126 staticpro (&searchbufs[i].regexp);
aa77b5ce 3127 staticpro (&searchbufs[i].whitespace_regexp);
b69e3c18 3128 staticpro (&searchbufs[i].syntax_table);
487282dc
KH
3129 searchbufs[i].next = (i == REGEXP_CACHE_SIZE-1 ? 0 : &searchbufs[i+1]);
3130 }
3131 searchbuf_head = &searchbufs[0];
ca1d1d23
JB
3132
3133 Qsearch_failed = intern ("search-failed");
3134 staticpro (&Qsearch_failed);
3135 Qinvalid_regexp = intern ("invalid-regexp");
3136 staticpro (&Qinvalid_regexp);
3137
3138 Fput (Qsearch_failed, Qerror_conditions,
3139 Fcons (Qsearch_failed, Fcons (Qerror, Qnil)));
3140 Fput (Qsearch_failed, Qerror_message,
3141 build_string ("Search failed"));
3142
3143 Fput (Qinvalid_regexp, Qerror_conditions,
3144 Fcons (Qinvalid_regexp, Fcons (Qerror, Qnil)));
3145 Fput (Qinvalid_regexp, Qerror_message,
3146 build_string ("Invalid regexp"));
3147
daa37602
JB
3148 last_thing_searched = Qnil;
3149 staticpro (&last_thing_searched);
3150
0f6af254
DK
3151 saved_last_thing_searched = Qnil;
3152 staticpro (&saved_last_thing_searched);
3153
41a33295 3154 DEFVAR_LISP ("search-spaces-regexp", &Vsearch_spaces_regexp,
e2811828 3155 doc: /* Regexp to substitute for bunches of spaces in regexp search.
f31a9a68
RS
3156Some commands use this for user-specified regexps.
3157Spaces that occur inside character classes or repetition operators
3158or other such regexp constructs are not replaced with this.
3159A value of nil (which is the normal value) means treat spaces literally. */);
41a33295 3160 Vsearch_spaces_regexp = Qnil;
f31a9a68 3161
ca1d1d23 3162 defsubr (&Slooking_at);
b819a390
RS
3163 defsubr (&Sposix_looking_at);
3164 defsubr (&Sstring_match);
3165 defsubr (&Sposix_string_match);
ca1d1d23
JB
3166 defsubr (&Ssearch_forward);
3167 defsubr (&Ssearch_backward);
3168 defsubr (&Sword_search_forward);
3169 defsubr (&Sword_search_backward);
3170 defsubr (&Sre_search_forward);
3171 defsubr (&Sre_search_backward);
b819a390
RS
3172 defsubr (&Sposix_search_forward);
3173 defsubr (&Sposix_search_backward);
ca1d1d23
JB
3174 defsubr (&Sreplace_match);
3175 defsubr (&Smatch_beginning);
3176 defsubr (&Smatch_end);
3177 defsubr (&Smatch_data);
3f1c005b 3178 defsubr (&Sset_match_data);
ca1d1d23
JB
3179 defsubr (&Sregexp_quote);
3180}
ab5796a9
MB
3181
3182/* arch-tag: a6059d79-0552-4f14-a2cb-d379a4e3c78f
3183 (do not change this comment) */