(regex_compile): Set the new `used_syntax' bit.
[bpt/emacs.git] / src / search.c
CommitLineData
ca1d1d23 1/* String search routines for GNU Emacs.
0b5538bd 2 Copyright (C) 1985, 1986, 1987, 1993, 1994, 1997, 1998, 1999, 2002, 2003,
aaef169d 3 2004, 2005, 2006 Free Software Foundation, Inc.
ca1d1d23
JB
4
5This file is part of GNU Emacs.
6
7GNU Emacs is free software; you can redistribute it and/or modify
8it under the terms of the GNU General Public License as published by
7c938215 9the Free Software Foundation; either version 2, or (at your option)
ca1d1d23
JB
10any later version.
11
12GNU Emacs is distributed in the hope that it will be useful,
13but WITHOUT ANY WARRANTY; without even the implied warranty of
14MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15GNU General Public License for more details.
16
17You should have received a copy of the GNU General Public License
18along with GNU Emacs; see the file COPYING. If not, write to
4fc5845f
LK
19the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
20Boston, MA 02110-1301, USA. */
ca1d1d23
JB
21
22
18160b98 23#include <config.h>
ca1d1d23
JB
24#include "lisp.h"
25#include "syntax.h"
5679531d 26#include "category.h"
ca1d1d23 27#include "buffer.h"
5679531d 28#include "charset.h"
9169c321 29#include "region-cache.h"
ca1d1d23 30#include "commands.h"
9ac0d9e0 31#include "blockinput.h"
bf1760bb 32#include "intervals.h"
4746118a 33
ca1d1d23
JB
34#include <sys/types.h>
35#include "regex.h"
36
1d288aef 37#define REGEXP_CACHE_SIZE 20
ca1d1d23 38
487282dc
KH
39/* If the regexp is non-nil, then the buffer contains the compiled form
40 of that regexp, suitable for searching. */
1d288aef
RS
41struct regexp_cache
42{
487282dc 43 struct regexp_cache *next;
ecdb561e 44 Lisp_Object regexp, whitespace_regexp;
b69e3c18
CY
45 /* Syntax table for which the regexp applies. We need this because
46 of character classes. */
47 Lisp_Object syntax_table;
487282dc
KH
48 struct re_pattern_buffer buf;
49 char fastmap[0400];
b819a390
RS
50 /* Nonzero means regexp was compiled to do full POSIX backtracking. */
51 char posix;
487282dc 52};
ca1d1d23 53
487282dc
KH
54/* The instances of that struct. */
55struct regexp_cache searchbufs[REGEXP_CACHE_SIZE];
ca1d1d23 56
487282dc
KH
57/* The head of the linked list; points to the most recently used buffer. */
58struct regexp_cache *searchbuf_head;
ca1d1d23 59
ca1d1d23 60
4746118a
JB
61/* Every call to re_match, etc., must pass &search_regs as the regs
62 argument unless you can show it is unnecessary (i.e., if re_match
63 is certainly going to be called again before region-around-match
64 can be called).
65
66 Since the registers are now dynamically allocated, we need to make
67 sure not to refer to the Nth register before checking that it has
1113d9db
JB
68 been allocated by checking search_regs.num_regs.
69
70 The regex code keeps track of whether it has allocated the search
487282dc
KH
71 buffer using bits in the re_pattern_buffer. This means that whenever
72 you compile a new pattern, it completely forgets whether it has
1113d9db
JB
73 allocated any registers, and will allocate new registers the next
74 time you call a searching or matching function. Therefore, we need
75 to call re_set_registers after compiling a new pattern or after
76 setting the match registers, so that the regex functions will be
77 able to free or re-allocate it properly. */
ca1d1d23
JB
78static struct re_registers search_regs;
79
daa37602
JB
80/* The buffer in which the last search was performed, or
81 Qt if the last search was done in a string;
82 Qnil if no searching has been done yet. */
83static Lisp_Object last_thing_searched;
ca1d1d23 84
8e6208c5 85/* error condition signaled when regexp compile_pattern fails */
ca1d1d23
JB
86
87Lisp_Object Qinvalid_regexp;
88
06f77a8a
KS
89/* Error condition used for failing searches */
90Lisp_Object Qsearch_failed;
91
41a33295 92Lisp_Object Vsearch_spaces_regexp;
f31a9a68 93
ca325161 94static void set_search_regs ();
044f81f1 95static void save_search_regs ();
facdc750
RS
96static int simple_search ();
97static int boyer_moore ();
b819a390 98static int search_buffer ();
01d09305 99static void matcher_overflow () NO_RETURN;
b819a390 100
ca1d1d23
JB
101static void
102matcher_overflow ()
103{
104 error ("Stack overflow in regexp matcher");
105}
106
b819a390
RS
107/* Compile a regexp and signal a Lisp error if anything goes wrong.
108 PATTERN is the pattern to compile.
109 CP is the place to put the result.
facdc750 110 TRANSLATE is a translation table for ignoring case, or nil for none.
b819a390
RS
111 REGP is the structure that says where to store the "register"
112 values that will result from matching this pattern.
113 If it is 0, we should compile the pattern not to record any
114 subexpression bounds.
115 POSIX is nonzero if we want full backtracking (POSIX style)
5679531d
KH
116 for this pattern. 0 means backtrack only enough to get a valid match.
117 MULTIBYTE is nonzero if we want to handle multibyte characters in
118 PATTERN. 0 means all multibyte characters are recognized just as
ecdb561e
RS
119 sequences of binary data.
120
41a33295 121 The behavior also depends on Vsearch_spaces_regexp. */
ca1d1d23 122
487282dc 123static void
5679531d 124compile_pattern_1 (cp, pattern, translate, regp, posix, multibyte)
487282dc 125 struct regexp_cache *cp;
ca1d1d23 126 Lisp_Object pattern;
facdc750 127 Lisp_Object translate;
487282dc 128 struct re_registers *regp;
b819a390 129 int posix;
5679531d 130 int multibyte;
ca1d1d23 131{
7276d3d8 132 unsigned char *raw_pattern;
f8bd51c4 133 int raw_pattern_size;
d451e4db 134 char *val;
b819a390 135 reg_syntax_t old;
ca1d1d23 136
f8bd51c4
KH
137 /* MULTIBYTE says whether the text to be searched is multibyte.
138 We must convert PATTERN to match that, or we will not really
139 find things right. */
140
141 if (multibyte == STRING_MULTIBYTE (pattern))
142 {
d5db4077
KR
143 raw_pattern = (unsigned char *) SDATA (pattern);
144 raw_pattern_size = SBYTES (pattern);
f8bd51c4
KH
145 }
146 else if (multibyte)
147 {
d5db4077
KR
148 raw_pattern_size = count_size_as_multibyte (SDATA (pattern),
149 SCHARS (pattern));
7276d3d8 150 raw_pattern = (unsigned char *) alloca (raw_pattern_size + 1);
d5db4077
KR
151 copy_text (SDATA (pattern), raw_pattern,
152 SCHARS (pattern), 0, 1);
f8bd51c4
KH
153 }
154 else
155 {
156 /* Converting multibyte to single-byte.
157
158 ??? Perhaps this conversion should be done in a special way
159 by subtracting nonascii-insert-offset from each non-ASCII char,
160 so that only the multibyte chars which really correspond to
161 the chosen single-byte character set can possibly match. */
d5db4077 162 raw_pattern_size = SCHARS (pattern);
7276d3d8 163 raw_pattern = (unsigned char *) alloca (raw_pattern_size + 1);
d5db4077
KR
164 copy_text (SDATA (pattern), raw_pattern,
165 SBYTES (pattern), 1, 0);
f8bd51c4
KH
166 }
167
487282dc 168 cp->regexp = Qnil;
59fab369 169 cp->buf.translate = (! NILP (translate) ? translate : make_number (0));
b819a390 170 cp->posix = posix;
5679531d 171 cp->buf.multibyte = multibyte;
41a33295 172 cp->whitespace_regexp = Vsearch_spaces_regexp;
b69e3c18 173 cp->syntax_table = current_buffer->syntax_table;
e7b2dd2e
RS
174 /* rms: I think BLOCK_INPUT is not needed here any more,
175 because regex.c defines malloc to call xmalloc.
176 Using BLOCK_INPUT here means the debugger won't run if an error occurs.
177 So let's turn it off. */
178 /* BLOCK_INPUT; */
fb4a568d 179 old = re_set_syntax (RE_SYNTAX_EMACS
b819a390 180 | (posix ? 0 : RE_NO_POSIX_BACKTRACKING));
f31a9a68 181
41a33295
RS
182 re_set_whitespace_regexp (NILP (Vsearch_spaces_regexp) ? NULL
183 : SDATA (Vsearch_spaces_regexp));
f31a9a68 184
7276d3d8
RS
185 val = (char *) re_compile_pattern ((char *)raw_pattern,
186 raw_pattern_size, &cp->buf);
f31a9a68
RS
187
188 re_set_whitespace_regexp (NULL);
189
b819a390 190 re_set_syntax (old);
e7b2dd2e 191 /* UNBLOCK_INPUT; */
ca1d1d23 192 if (val)
06f77a8a 193 xsignal1 (Qinvalid_regexp, build_string (val));
1113d9db 194
487282dc 195 cp->regexp = Fcopy_sequence (pattern);
487282dc
KH
196}
197
6efc7887
RS
198/* Shrink each compiled regexp buffer in the cache
199 to the size actually used right now.
200 This is called from garbage collection. */
201
202void
203shrink_regexp_cache ()
204{
a968f437 205 struct regexp_cache *cp;
6efc7887
RS
206
207 for (cp = searchbuf_head; cp != 0; cp = cp->next)
208 {
209 cp->buf.allocated = cp->buf.used;
210 cp->buf.buffer
b23c0a83 211 = (unsigned char *) xrealloc (cp->buf.buffer, cp->buf.used);
6efc7887
RS
212 }
213}
214
e5b94d44
CY
215/* Clear the regexp cache.
216 There is no danger of memory leak here because re_compile_pattern
217 automagically manages the memory in each re_pattern_buffer struct,
218 based on its `allocated' and `buffer' values. */
219void
220clear_regexp_cache ()
221{
222 int i;
223
224 for (i = 0; i < REGEXP_CACHE_SIZE; ++i)
225 searchbufs[i].regexp = Qnil;
226}
227
487282dc 228/* Compile a regexp if necessary, but first check to see if there's one in
b819a390
RS
229 the cache.
230 PATTERN is the pattern to compile.
facdc750 231 TRANSLATE is a translation table for ignoring case, or nil for none.
b819a390
RS
232 REGP is the structure that says where to store the "register"
233 values that will result from matching this pattern.
234 If it is 0, we should compile the pattern not to record any
235 subexpression bounds.
236 POSIX is nonzero if we want full backtracking (POSIX style)
237 for this pattern. 0 means backtrack only enough to get a valid match. */
487282dc
KH
238
239struct re_pattern_buffer *
0c8533c6 240compile_pattern (pattern, regp, translate, posix, multibyte)
487282dc
KH
241 Lisp_Object pattern;
242 struct re_registers *regp;
facdc750 243 Lisp_Object translate;
0c8533c6 244 int posix, multibyte;
487282dc
KH
245{
246 struct regexp_cache *cp, **cpp;
247
248 for (cpp = &searchbuf_head; ; cpp = &cp->next)
249 {
250 cp = *cpp;
f1b9c7c1
KR
251 /* Entries are initialized to nil, and may be set to nil by
252 compile_pattern_1 if the pattern isn't valid. Don't apply
49a5f770
KR
253 string accessors in those cases. However, compile_pattern_1
254 is only applied to the cache entry we pick here to reuse. So
255 nil should never appear before a non-nil entry. */
7c752c80 256 if (NILP (cp->regexp))
f1b9c7c1 257 goto compile_it;
d5db4077 258 if (SCHARS (cp->regexp) == SCHARS (pattern)
cf69b13e 259 && STRING_MULTIBYTE (cp->regexp) == STRING_MULTIBYTE (pattern)
1d288aef 260 && !NILP (Fstring_equal (cp->regexp, pattern))
59fab369 261 && EQ (cp->buf.translate, (! NILP (translate) ? translate : make_number (0)))
5679531d 262 && cp->posix == posix
ecdb561e 263 && cp->buf.multibyte == multibyte
b69e3c18
CY
264 /* TODO: Strictly speaking, we only need to match syntax
265 tables when a character class like [[:space:]] occurs in
266 the pattern. -- cyd*/
267 && EQ (cp->syntax_table, current_buffer->syntax_table)
41a33295 268 && !NILP (Fequal (cp->whitespace_regexp, Vsearch_spaces_regexp)))
487282dc
KH
269 break;
270
f1b9c7c1
KR
271 /* If we're at the end of the cache, compile into the nil cell
272 we found, or the last (least recently used) cell with a
273 string value. */
487282dc
KH
274 if (cp->next == 0)
275 {
f1b9c7c1 276 compile_it:
5679531d 277 compile_pattern_1 (cp, pattern, translate, regp, posix, multibyte);
487282dc
KH
278 break;
279 }
280 }
281
282 /* When we get here, cp (aka *cpp) contains the compiled pattern,
283 either because we found it in the cache or because we just compiled it.
284 Move it to the front of the queue to mark it as most recently used. */
285 *cpp = cp->next;
286 cp->next = searchbuf_head;
287 searchbuf_head = cp;
1113d9db 288
6639708c
RS
289 /* Advise the searching functions about the space we have allocated
290 for register data. */
291 if (regp)
292 re_set_registers (&cp->buf, regp, regp->num_regs, regp->start, regp->end);
293
487282dc 294 return &cp->buf;
ca1d1d23
JB
295}
296
ca1d1d23 297\f
b819a390
RS
298static Lisp_Object
299looking_at_1 (string, posix)
ca1d1d23 300 Lisp_Object string;
b819a390 301 int posix;
ca1d1d23
JB
302{
303 Lisp_Object val;
304 unsigned char *p1, *p2;
305 int s1, s2;
306 register int i;
487282dc 307 struct re_pattern_buffer *bufp;
ca1d1d23 308
7074fde6
FP
309 if (running_asynch_code)
310 save_search_regs ();
311
910c747a
RS
312 /* This is so set_image_of_range_1 in regex.c can find the EQV table. */
313 XCHAR_TABLE (current_buffer->case_canon_table)->extras[2]
314 = current_buffer->case_eqv_table;
315
b7826503 316 CHECK_STRING (string);
487282dc
KH
317 bufp = compile_pattern (string, &search_regs,
318 (!NILP (current_buffer->case_fold_search)
0190922f 319 ? current_buffer->case_canon_table : Qnil),
0c8533c6
RS
320 posix,
321 !NILP (current_buffer->enable_multibyte_characters));
ca1d1d23
JB
322
323 immediate_quit = 1;
324 QUIT; /* Do a pending quit right away, to avoid paradoxical behavior */
325
326 /* Get pointers and sizes of the two strings
327 that make up the visible portion of the buffer. */
328
329 p1 = BEGV_ADDR;
fa8ed3e0 330 s1 = GPT_BYTE - BEGV_BYTE;
ca1d1d23 331 p2 = GAP_END_ADDR;
fa8ed3e0 332 s2 = ZV_BYTE - GPT_BYTE;
ca1d1d23
JB
333 if (s1 < 0)
334 {
335 p2 = p1;
fa8ed3e0 336 s2 = ZV_BYTE - BEGV_BYTE;
ca1d1d23
JB
337 s1 = 0;
338 }
339 if (s2 < 0)
340 {
fa8ed3e0 341 s1 = ZV_BYTE - BEGV_BYTE;
ca1d1d23
JB
342 s2 = 0;
343 }
8bb43c28
RS
344
345 re_match_object = Qnil;
177c0ea7 346
487282dc 347 i = re_match_2 (bufp, (char *) p1, s1, (char *) p2, s2,
fa8ed3e0
RS
348 PT_BYTE - BEGV_BYTE, &search_regs,
349 ZV_BYTE - BEGV_BYTE);
de182d70 350 immediate_quit = 0;
177c0ea7 351
ca1d1d23
JB
352 if (i == -2)
353 matcher_overflow ();
354
355 val = (0 <= i ? Qt : Qnil);
fa8ed3e0
RS
356 if (i >= 0)
357 for (i = 0; i < search_regs.num_regs; i++)
358 if (search_regs.start[i] >= 0)
359 {
360 search_regs.start[i]
361 = BYTE_TO_CHAR (search_regs.start[i] + BEGV_BYTE);
362 search_regs.end[i]
363 = BYTE_TO_CHAR (search_regs.end[i] + BEGV_BYTE);
364 }
a3668d92 365 XSETBUFFER (last_thing_searched, current_buffer);
ca1d1d23
JB
366 return val;
367}
368
b819a390 369DEFUN ("looking-at", Flooking_at, Slooking_at, 1, 1, 0,
8c1a1077
PJ
370 doc: /* Return t if text after point matches regular expression REGEXP.
371This function modifies the match data that `match-beginning',
372`match-end' and `match-data' access; save and restore the match
373data if you want to preserve them. */)
374 (regexp)
94f94972 375 Lisp_Object regexp;
b819a390 376{
94f94972 377 return looking_at_1 (regexp, 0);
b819a390
RS
378}
379
380DEFUN ("posix-looking-at", Fposix_looking_at, Sposix_looking_at, 1, 1, 0,
8c1a1077
PJ
381 doc: /* Return t if text after point matches regular expression REGEXP.
382Find the longest match, in accord with Posix regular expression rules.
383This function modifies the match data that `match-beginning',
384`match-end' and `match-data' access; save and restore the match
385data if you want to preserve them. */)
386 (regexp)
94f94972 387 Lisp_Object regexp;
b819a390 388{
94f94972 389 return looking_at_1 (regexp, 1);
b819a390
RS
390}
391\f
392static Lisp_Object
393string_match_1 (regexp, string, start, posix)
ca1d1d23 394 Lisp_Object regexp, string, start;
b819a390 395 int posix;
ca1d1d23
JB
396{
397 int val;
487282dc 398 struct re_pattern_buffer *bufp;
0c8533c6
RS
399 int pos, pos_byte;
400 int i;
ca1d1d23 401
7074fde6
FP
402 if (running_asynch_code)
403 save_search_regs ();
404
b7826503
PJ
405 CHECK_STRING (regexp);
406 CHECK_STRING (string);
ca1d1d23
JB
407
408 if (NILP (start))
0c8533c6 409 pos = 0, pos_byte = 0;
ca1d1d23
JB
410 else
411 {
d5db4077 412 int len = SCHARS (string);
ca1d1d23 413
b7826503 414 CHECK_NUMBER (start);
0c8533c6
RS
415 pos = XINT (start);
416 if (pos < 0 && -pos <= len)
417 pos = len + pos;
418 else if (0 > pos || pos > len)
ca1d1d23 419 args_out_of_range (string, start);
0c8533c6 420 pos_byte = string_char_to_byte (string, pos);
ca1d1d23
JB
421 }
422
910c747a
RS
423 /* This is so set_image_of_range_1 in regex.c can find the EQV table. */
424 XCHAR_TABLE (current_buffer->case_canon_table)->extras[2]
425 = current_buffer->case_eqv_table;
426
487282dc
KH
427 bufp = compile_pattern (regexp, &search_regs,
428 (!NILP (current_buffer->case_fold_search)
0190922f 429 ? current_buffer->case_canon_table : Qnil),
0c8533c6
RS
430 posix,
431 STRING_MULTIBYTE (string));
ca1d1d23 432 immediate_quit = 1;
8bb43c28 433 re_match_object = string;
177c0ea7 434
d5db4077
KR
435 val = re_search (bufp, (char *) SDATA (string),
436 SBYTES (string), pos_byte,
437 SBYTES (string) - pos_byte,
ca1d1d23
JB
438 &search_regs);
439 immediate_quit = 0;
daa37602 440 last_thing_searched = Qt;
ca1d1d23
JB
441 if (val == -2)
442 matcher_overflow ();
443 if (val < 0) return Qnil;
0c8533c6
RS
444
445 for (i = 0; i < search_regs.num_regs; i++)
446 if (search_regs.start[i] >= 0)
447 {
448 search_regs.start[i]
449 = string_byte_to_char (string, search_regs.start[i]);
450 search_regs.end[i]
451 = string_byte_to_char (string, search_regs.end[i]);
452 }
453
454 return make_number (string_byte_to_char (string, val));
ca1d1d23 455}
e59a8453 456
b819a390 457DEFUN ("string-match", Fstring_match, Sstring_match, 2, 3, 0,
8c1a1077 458 doc: /* Return index of start of first match for REGEXP in STRING, or nil.
b85acc4b 459Matching ignores case if `case-fold-search' is non-nil.
8c1a1077
PJ
460If third arg START is non-nil, start search at that index in STRING.
461For index of first char beyond the match, do (match-end 0).
462`match-end' and `match-beginning' also give indices of substrings
2bd2f32d
RS
463matched by parenthesis constructs in the pattern.
464
465You can use the function `match-string' to extract the substrings
466matched by the parenthesis constructions in REGEXP. */)
8c1a1077 467 (regexp, string, start)
b819a390
RS
468 Lisp_Object regexp, string, start;
469{
470 return string_match_1 (regexp, string, start, 0);
471}
472
473DEFUN ("posix-string-match", Fposix_string_match, Sposix_string_match, 2, 3, 0,
8c1a1077
PJ
474 doc: /* Return index of start of first match for REGEXP in STRING, or nil.
475Find the longest match, in accord with Posix regular expression rules.
476Case is ignored if `case-fold-search' is non-nil in the current buffer.
477If third arg START is non-nil, start search at that index in STRING.
478For index of first char beyond the match, do (match-end 0).
479`match-end' and `match-beginning' also give indices of substrings
480matched by parenthesis constructs in the pattern. */)
481 (regexp, string, start)
b819a390
RS
482 Lisp_Object regexp, string, start;
483{
484 return string_match_1 (regexp, string, start, 1);
485}
486
e59a8453
RS
487/* Match REGEXP against STRING, searching all of STRING,
488 and return the index of the match, or negative on failure.
489 This does not clobber the match data. */
490
491int
492fast_string_match (regexp, string)
493 Lisp_Object regexp, string;
494{
495 int val;
487282dc 496 struct re_pattern_buffer *bufp;
e59a8453 497
facdc750
RS
498 bufp = compile_pattern (regexp, 0, Qnil,
499 0, STRING_MULTIBYTE (string));
e59a8453 500 immediate_quit = 1;
8bb43c28 501 re_match_object = string;
177c0ea7 502
d5db4077
KR
503 val = re_search (bufp, (char *) SDATA (string),
504 SBYTES (string), 0,
505 SBYTES (string), 0);
e59a8453
RS
506 immediate_quit = 0;
507 return val;
508}
5679531d
KH
509
510/* Match REGEXP against STRING, searching all of STRING ignoring case,
511 and return the index of the match, or negative on failure.
0c8533c6
RS
512 This does not clobber the match data.
513 We assume that STRING contains single-byte characters. */
5679531d
KH
514
515extern Lisp_Object Vascii_downcase_table;
516
517int
b4577c63 518fast_c_string_match_ignore_case (regexp, string)
5679531d 519 Lisp_Object regexp;
96b80561 520 const char *string;
5679531d
KH
521{
522 int val;
523 struct re_pattern_buffer *bufp;
524 int len = strlen (string);
525
0c8533c6 526 regexp = string_make_unibyte (regexp);
b4577c63 527 re_match_object = Qt;
5679531d 528 bufp = compile_pattern (regexp, 0,
0190922f 529 Vascii_canon_table, 0,
f8bd51c4 530 0);
5679531d
KH
531 immediate_quit = 1;
532 val = re_search (bufp, string, len, 0, len, 0);
533 immediate_quit = 0;
534 return val;
535}
be5f4dfb
KH
536
537/* Like fast_string_match but ignore case. */
538
539int
540fast_string_match_ignore_case (regexp, string)
541 Lisp_Object regexp, string;
542{
543 int val;
544 struct re_pattern_buffer *bufp;
545
0190922f 546 bufp = compile_pattern (regexp, 0, Vascii_canon_table,
be5f4dfb
KH
547 0, STRING_MULTIBYTE (string));
548 immediate_quit = 1;
549 re_match_object = string;
550
551 val = re_search (bufp, (char *) SDATA (string),
552 SBYTES (string), 0,
553 SBYTES (string), 0);
554 immediate_quit = 0;
555 return val;
556}
ca1d1d23 557\f
9169c321
JB
558/* The newline cache: remembering which sections of text have no newlines. */
559
560/* If the user has requested newline caching, make sure it's on.
561 Otherwise, make sure it's off.
562 This is our cheezy way of associating an action with the change of
563 state of a buffer-local variable. */
564static void
565newline_cache_on_off (buf)
566 struct buffer *buf;
567{
568 if (NILP (buf->cache_long_line_scans))
569 {
570 /* It should be off. */
571 if (buf->newline_cache)
572 {
573 free_region_cache (buf->newline_cache);
574 buf->newline_cache = 0;
575 }
576 }
577 else
578 {
579 /* It should be on. */
580 if (buf->newline_cache == 0)
581 buf->newline_cache = new_region_cache ();
582 }
583}
584
585\f
586/* Search for COUNT instances of the character TARGET between START and END.
587
588 If COUNT is positive, search forwards; END must be >= START.
589 If COUNT is negative, search backwards for the -COUNTth instance;
590 END must be <= START.
591 If COUNT is zero, do anything you please; run rogue, for all I care.
592
593 If END is zero, use BEGV or ZV instead, as appropriate for the
594 direction indicated by COUNT.
ffd56f97
JB
595
596 If we find COUNT instances, set *SHORTAGE to zero, and return the
a9f2a45f 597 position past the COUNTth match. Note that for reverse motion
5bfe95c9 598 this is not the same as the usual convention for Emacs motion commands.
ffd56f97 599
9169c321
JB
600 If we don't find COUNT instances before reaching END, set *SHORTAGE
601 to the number of TARGETs left unfound, and return END.
ffd56f97 602
087a5f81
RS
603 If ALLOW_QUIT is non-zero, set immediate_quit. That's good to do
604 except when inside redisplay. */
605
dfcf069d 606int
9169c321
JB
607scan_buffer (target, start, end, count, shortage, allow_quit)
608 register int target;
609 int start, end;
610 int count;
611 int *shortage;
087a5f81 612 int allow_quit;
ca1d1d23 613{
9169c321 614 struct region_cache *newline_cache;
177c0ea7 615 int direction;
ffd56f97 616
9169c321
JB
617 if (count > 0)
618 {
619 direction = 1;
620 if (! end) end = ZV;
621 }
622 else
623 {
624 direction = -1;
625 if (! end) end = BEGV;
626 }
ffd56f97 627
9169c321
JB
628 newline_cache_on_off (current_buffer);
629 newline_cache = current_buffer->newline_cache;
ca1d1d23
JB
630
631 if (shortage != 0)
632 *shortage = 0;
633
087a5f81 634 immediate_quit = allow_quit;
ca1d1d23 635
ffd56f97 636 if (count > 0)
9169c321 637 while (start != end)
ca1d1d23 638 {
9169c321
JB
639 /* Our innermost scanning loop is very simple; it doesn't know
640 about gaps, buffer ends, or the newline cache. ceiling is
641 the position of the last character before the next such
642 obstacle --- the last character the dumb search loop should
643 examine. */
fa8ed3e0
RS
644 int ceiling_byte = CHAR_TO_BYTE (end) - 1;
645 int start_byte = CHAR_TO_BYTE (start);
67ce527d 646 int tem;
9169c321
JB
647
648 /* If we're looking for a newline, consult the newline cache
649 to see where we can avoid some scanning. */
650 if (target == '\n' && newline_cache)
651 {
652 int next_change;
653 immediate_quit = 0;
654 while (region_cache_forward
fa8ed3e0
RS
655 (current_buffer, newline_cache, start_byte, &next_change))
656 start_byte = next_change;
cbe0db0d 657 immediate_quit = allow_quit;
9169c321 658
fa8ed3e0
RS
659 /* START should never be after END. */
660 if (start_byte > ceiling_byte)
661 start_byte = ceiling_byte;
9169c321
JB
662
663 /* Now the text after start is an unknown region, and
664 next_change is the position of the next known region. */
fa8ed3e0 665 ceiling_byte = min (next_change - 1, ceiling_byte);
9169c321
JB
666 }
667
668 /* The dumb loop can only scan text stored in contiguous
669 bytes. BUFFER_CEILING_OF returns the last character
670 position that is contiguous, so the ceiling is the
671 position after that. */
67ce527d
KH
672 tem = BUFFER_CEILING_OF (start_byte);
673 ceiling_byte = min (tem, ceiling_byte);
9169c321
JB
674
675 {
177c0ea7 676 /* The termination address of the dumb loop. */
fa8ed3e0
RS
677 register unsigned char *ceiling_addr
678 = BYTE_POS_ADDR (ceiling_byte) + 1;
679 register unsigned char *cursor
680 = BYTE_POS_ADDR (start_byte);
9169c321
JB
681 unsigned char *base = cursor;
682
683 while (cursor < ceiling_addr)
684 {
685 unsigned char *scan_start = cursor;
686
687 /* The dumb loop. */
688 while (*cursor != target && ++cursor < ceiling_addr)
689 ;
690
691 /* If we're looking for newlines, cache the fact that
692 the region from start to cursor is free of them. */
693 if (target == '\n' && newline_cache)
694 know_region_cache (current_buffer, newline_cache,
fa8ed3e0
RS
695 start_byte + scan_start - base,
696 start_byte + cursor - base);
9169c321
JB
697
698 /* Did we find the target character? */
699 if (cursor < ceiling_addr)
700 {
701 if (--count == 0)
702 {
703 immediate_quit = 0;
fa8ed3e0 704 return BYTE_TO_CHAR (start_byte + cursor - base + 1);
9169c321
JB
705 }
706 cursor++;
707 }
708 }
709
fa8ed3e0 710 start = BYTE_TO_CHAR (start_byte + cursor - base);
9169c321 711 }
ca1d1d23
JB
712 }
713 else
9169c321
JB
714 while (start > end)
715 {
716 /* The last character to check before the next obstacle. */
fa8ed3e0
RS
717 int ceiling_byte = CHAR_TO_BYTE (end);
718 int start_byte = CHAR_TO_BYTE (start);
67ce527d 719 int tem;
9169c321
JB
720
721 /* Consult the newline cache, if appropriate. */
722 if (target == '\n' && newline_cache)
723 {
724 int next_change;
725 immediate_quit = 0;
726 while (region_cache_backward
fa8ed3e0
RS
727 (current_buffer, newline_cache, start_byte, &next_change))
728 start_byte = next_change;
cbe0db0d 729 immediate_quit = allow_quit;
9169c321
JB
730
731 /* Start should never be at or before end. */
fa8ed3e0
RS
732 if (start_byte <= ceiling_byte)
733 start_byte = ceiling_byte + 1;
9169c321
JB
734
735 /* Now the text before start is an unknown region, and
736 next_change is the position of the next known region. */
fa8ed3e0 737 ceiling_byte = max (next_change, ceiling_byte);
9169c321
JB
738 }
739
740 /* Stop scanning before the gap. */
67ce527d
KH
741 tem = BUFFER_FLOOR_OF (start_byte - 1);
742 ceiling_byte = max (tem, ceiling_byte);
9169c321
JB
743
744 {
745 /* The termination address of the dumb loop. */
fa8ed3e0
RS
746 register unsigned char *ceiling_addr = BYTE_POS_ADDR (ceiling_byte);
747 register unsigned char *cursor = BYTE_POS_ADDR (start_byte - 1);
9169c321
JB
748 unsigned char *base = cursor;
749
750 while (cursor >= ceiling_addr)
751 {
752 unsigned char *scan_start = cursor;
753
754 while (*cursor != target && --cursor >= ceiling_addr)
755 ;
756
757 /* If we're looking for newlines, cache the fact that
758 the region from after the cursor to start is free of them. */
759 if (target == '\n' && newline_cache)
760 know_region_cache (current_buffer, newline_cache,
fa8ed3e0
RS
761 start_byte + cursor - base,
762 start_byte + scan_start - base);
9169c321
JB
763
764 /* Did we find the target character? */
765 if (cursor >= ceiling_addr)
766 {
767 if (++count >= 0)
768 {
769 immediate_quit = 0;
fa8ed3e0 770 return BYTE_TO_CHAR (start_byte + cursor - base);
9169c321
JB
771 }
772 cursor--;
773 }
774 }
775
fa8ed3e0 776 start = BYTE_TO_CHAR (start_byte + cursor - base);
9169c321
JB
777 }
778 }
779
ca1d1d23
JB
780 immediate_quit = 0;
781 if (shortage != 0)
ffd56f97 782 *shortage = count * direction;
9169c321 783 return start;
ca1d1d23 784}
fa8ed3e0
RS
785\f
786/* Search for COUNT instances of a line boundary, which means either a
787 newline or (if selective display enabled) a carriage return.
788 Start at START. If COUNT is negative, search backwards.
789
790 We report the resulting position by calling TEMP_SET_PT_BOTH.
791
792 If we find COUNT instances. we position after (always after,
793 even if scanning backwards) the COUNTth match, and return 0.
794
795 If we don't find COUNT instances before reaching the end of the
796 buffer (or the beginning, if scanning backwards), we return
797 the number of line boundaries left unfound, and position at
798 the limit we bumped up against.
799
800 If ALLOW_QUIT is non-zero, set immediate_quit. That's good to do
d5d57b92 801 except in special cases. */
ca1d1d23 802
63fa018d 803int
fa8ed3e0
RS
804scan_newline (start, start_byte, limit, limit_byte, count, allow_quit)
805 int start, start_byte;
806 int limit, limit_byte;
807 register int count;
808 int allow_quit;
63fa018d 809{
fa8ed3e0
RS
810 int direction = ((count > 0) ? 1 : -1);
811
812 register unsigned char *cursor;
813 unsigned char *base;
814
815 register int ceiling;
816 register unsigned char *ceiling_addr;
817
d5d57b92
RS
818 int old_immediate_quit = immediate_quit;
819
fa8ed3e0
RS
820 /* The code that follows is like scan_buffer
821 but checks for either newline or carriage return. */
822
d5d57b92
RS
823 if (allow_quit)
824 immediate_quit++;
fa8ed3e0
RS
825
826 start_byte = CHAR_TO_BYTE (start);
827
828 if (count > 0)
829 {
830 while (start_byte < limit_byte)
831 {
832 ceiling = BUFFER_CEILING_OF (start_byte);
833 ceiling = min (limit_byte - 1, ceiling);
834 ceiling_addr = BYTE_POS_ADDR (ceiling) + 1;
835 base = (cursor = BYTE_POS_ADDR (start_byte));
836 while (1)
837 {
838 while (*cursor != '\n' && ++cursor != ceiling_addr)
839 ;
840
841 if (cursor != ceiling_addr)
842 {
843 if (--count == 0)
844 {
d5d57b92 845 immediate_quit = old_immediate_quit;
fa8ed3e0
RS
846 start_byte = start_byte + cursor - base + 1;
847 start = BYTE_TO_CHAR (start_byte);
848 TEMP_SET_PT_BOTH (start, start_byte);
849 return 0;
850 }
851 else
852 if (++cursor == ceiling_addr)
853 break;
854 }
855 else
856 break;
857 }
858 start_byte += cursor - base;
859 }
860 }
861 else
862 {
fa8ed3e0
RS
863 while (start_byte > limit_byte)
864 {
865 ceiling = BUFFER_FLOOR_OF (start_byte - 1);
866 ceiling = max (limit_byte, ceiling);
867 ceiling_addr = BYTE_POS_ADDR (ceiling) - 1;
868 base = (cursor = BYTE_POS_ADDR (start_byte - 1) + 1);
869 while (1)
870 {
871 while (--cursor != ceiling_addr && *cursor != '\n')
872 ;
873
874 if (cursor != ceiling_addr)
875 {
876 if (++count == 0)
877 {
d5d57b92 878 immediate_quit = old_immediate_quit;
fa8ed3e0
RS
879 /* Return the position AFTER the match we found. */
880 start_byte = start_byte + cursor - base + 1;
881 start = BYTE_TO_CHAR (start_byte);
882 TEMP_SET_PT_BOTH (start, start_byte);
883 return 0;
884 }
885 }
886 else
887 break;
888 }
889 /* Here we add 1 to compensate for the last decrement
890 of CURSOR, which took it past the valid range. */
891 start_byte += cursor - base + 1;
892 }
893 }
894
895 TEMP_SET_PT_BOTH (limit, limit_byte);
d5d57b92 896 immediate_quit = old_immediate_quit;
fa8ed3e0
RS
897
898 return count * direction;
63fa018d
RS
899}
900
ca1d1d23 901int
fa8ed3e0 902find_next_newline_no_quit (from, cnt)
ca1d1d23
JB
903 register int from, cnt;
904{
fa8ed3e0 905 return scan_buffer ('\n', from, 0, cnt, (int *) 0, 0);
9169c321
JB
906}
907
9169c321
JB
908/* Like find_next_newline, but returns position before the newline,
909 not after, and only search up to TO. This isn't just
910 find_next_newline (...)-1, because you might hit TO. */
fa8ed3e0 911
9169c321
JB
912int
913find_before_next_newline (from, to, cnt)
cbe0db0d 914 int from, to, cnt;
9169c321
JB
915{
916 int shortage;
917 int pos = scan_buffer ('\n', from, to, cnt, &shortage, 1);
918
919 if (shortage == 0)
920 pos--;
177c0ea7 921
9169c321 922 return pos;
ca1d1d23
JB
923}
924\f
ca1d1d23
JB
925/* Subroutines of Lisp buffer search functions. */
926
927static Lisp_Object
b819a390 928search_command (string, bound, noerror, count, direction, RE, posix)
ca1d1d23
JB
929 Lisp_Object string, bound, noerror, count;
930 int direction;
931 int RE;
b819a390 932 int posix;
ca1d1d23
JB
933{
934 register int np;
9f43ad85 935 int lim, lim_byte;
ca1d1d23
JB
936 int n = direction;
937
938 if (!NILP (count))
939 {
b7826503 940 CHECK_NUMBER (count);
ca1d1d23
JB
941 n *= XINT (count);
942 }
943
b7826503 944 CHECK_STRING (string);
ca1d1d23 945 if (NILP (bound))
9f43ad85
RS
946 {
947 if (n > 0)
948 lim = ZV, lim_byte = ZV_BYTE;
949 else
950 lim = BEGV, lim_byte = BEGV_BYTE;
951 }
ca1d1d23
JB
952 else
953 {
b7826503 954 CHECK_NUMBER_COERCE_MARKER (bound);
ca1d1d23 955 lim = XINT (bound);
6ec8bbd2 956 if (n > 0 ? lim < PT : lim > PT)
ca1d1d23
JB
957 error ("Invalid search bound (wrong side of point)");
958 if (lim > ZV)
9f43ad85 959 lim = ZV, lim_byte = ZV_BYTE;
588d2fd5 960 else if (lim < BEGV)
9f43ad85 961 lim = BEGV, lim_byte = BEGV_BYTE;
588d2fd5
KH
962 else
963 lim_byte = CHAR_TO_BYTE (lim);
ca1d1d23
JB
964 }
965
910c747a
RS
966 /* This is so set_image_of_range_1 in regex.c can find the EQV table. */
967 XCHAR_TABLE (current_buffer->case_canon_table)->extras[2]
968 = current_buffer->case_eqv_table;
969
9f43ad85 970 np = search_buffer (string, PT, PT_BYTE, lim, lim_byte, n, RE,
ca1d1d23 971 (!NILP (current_buffer->case_fold_search)
facdc750 972 ? current_buffer->case_canon_table
3135e9fd 973 : Qnil),
ca1d1d23 974 (!NILP (current_buffer->case_fold_search)
facdc750 975 ? current_buffer->case_eqv_table
3135e9fd 976 : Qnil),
b819a390 977 posix);
ca1d1d23
JB
978 if (np <= 0)
979 {
980 if (NILP (noerror))
06f77a8a
KS
981 xsignal1 (Qsearch_failed, string);
982
ca1d1d23
JB
983 if (!EQ (noerror, Qt))
984 {
985 if (lim < BEGV || lim > ZV)
986 abort ();
9f43ad85 987 SET_PT_BOTH (lim, lim_byte);
a5f217b8
RS
988 return Qnil;
989#if 0 /* This would be clean, but maybe programs depend on
990 a value of nil here. */
481399bf 991 np = lim;
a5f217b8 992#endif
ca1d1d23 993 }
481399bf
RS
994 else
995 return Qnil;
ca1d1d23
JB
996 }
997
998 if (np < BEGV || np > ZV)
999 abort ();
1000
1001 SET_PT (np);
1002
1003 return make_number (np);
1004}
1005\f
fa8ed3e0
RS
1006/* Return 1 if REGEXP it matches just one constant string. */
1007
b6d6a51c
KH
1008static int
1009trivial_regexp_p (regexp)
1010 Lisp_Object regexp;
1011{
d5db4077
KR
1012 int len = SBYTES (regexp);
1013 unsigned char *s = SDATA (regexp);
b6d6a51c
KH
1014 while (--len >= 0)
1015 {
1016 switch (*s++)
1017 {
1018 case '.': case '*': case '+': case '?': case '[': case '^': case '$':
1019 return 0;
1020 case '\\':
1021 if (--len < 0)
1022 return 0;
1023 switch (*s++)
1024 {
1025 case '|': case '(': case ')': case '`': case '\'': case 'b':
1026 case 'B': case '<': case '>': case 'w': case 'W': case 's':
29f89fe7 1027 case 'S': case '=': case '{': case '}': case '_':
5679531d 1028 case 'c': case 'C': /* for categoryspec and notcategoryspec */
866f60fd 1029 case '1': case '2': case '3': case '4': case '5':
b6d6a51c
KH
1030 case '6': case '7': case '8': case '9':
1031 return 0;
1032 }
1033 }
1034 }
1035 return 1;
1036}
1037
ca325161 1038/* Search for the n'th occurrence of STRING in the current buffer,
ca1d1d23 1039 starting at position POS and stopping at position LIM,
b819a390 1040 treating STRING as a literal string if RE is false or as
ca1d1d23
JB
1041 a regular expression if RE is true.
1042
1043 If N is positive, searching is forward and LIM must be greater than POS.
1044 If N is negative, searching is backward and LIM must be less than POS.
1045
facdc750 1046 Returns -x if x occurrences remain to be found (x > 0),
ca1d1d23 1047 or else the position at the beginning of the Nth occurrence
b819a390
RS
1048 (if searching backward) or the end (if searching forward).
1049
1050 POSIX is nonzero if we want full backtracking (POSIX style)
1051 for this pattern. 0 means backtrack only enough to get a valid match. */
ca1d1d23 1052
aff2ce94
RS
1053#define TRANSLATE(out, trt, d) \
1054do \
1055 { \
1056 if (! NILP (trt)) \
1057 { \
1058 Lisp_Object temp; \
1059 temp = Faref (trt, make_number (d)); \
1060 if (INTEGERP (temp)) \
1061 out = XINT (temp); \
1062 else \
1063 out = d; \
1064 } \
1065 else \
1066 out = d; \
1067 } \
1068while (0)
facdc750 1069
b819a390 1070static int
9f43ad85
RS
1071search_buffer (string, pos, pos_byte, lim, lim_byte, n,
1072 RE, trt, inverse_trt, posix)
ca1d1d23
JB
1073 Lisp_Object string;
1074 int pos;
9f43ad85 1075 int pos_byte;
ca1d1d23 1076 int lim;
9f43ad85 1077 int lim_byte;
ca1d1d23
JB
1078 int n;
1079 int RE;
facdc750
RS
1080 Lisp_Object trt;
1081 Lisp_Object inverse_trt;
b819a390 1082 int posix;
ca1d1d23 1083{
d5db4077
KR
1084 int len = SCHARS (string);
1085 int len_byte = SBYTES (string);
facdc750 1086 register int i;
ca1d1d23 1087
7074fde6
FP
1088 if (running_asynch_code)
1089 save_search_regs ();
1090
a7e4cdde 1091 /* Searching 0 times means don't move. */
ca1d1d23 1092 /* Null string is found at starting position. */
a7e4cdde 1093 if (len == 0 || n == 0)
ca325161 1094 {
0353b28f 1095 set_search_regs (pos_byte, 0);
ca325161
RS
1096 return pos;
1097 }
3f57a499 1098
41a33295 1099 if (RE && !(trivial_regexp_p (string) && NILP (Vsearch_spaces_regexp)))
ca1d1d23 1100 {
facdc750
RS
1101 unsigned char *p1, *p2;
1102 int s1, s2;
487282dc
KH
1103 struct re_pattern_buffer *bufp;
1104
0c8533c6
RS
1105 bufp = compile_pattern (string, &search_regs, trt, posix,
1106 !NILP (current_buffer->enable_multibyte_characters));
ca1d1d23 1107
ca1d1d23
JB
1108 immediate_quit = 1; /* Quit immediately if user types ^G,
1109 because letting this function finish
1110 can take too long. */
1111 QUIT; /* Do a pending quit right away,
1112 to avoid paradoxical behavior */
1113 /* Get pointers and sizes of the two strings
1114 that make up the visible portion of the buffer. */
1115
1116 p1 = BEGV_ADDR;
fa8ed3e0 1117 s1 = GPT_BYTE - BEGV_BYTE;
ca1d1d23 1118 p2 = GAP_END_ADDR;
fa8ed3e0 1119 s2 = ZV_BYTE - GPT_BYTE;
ca1d1d23
JB
1120 if (s1 < 0)
1121 {
1122 p2 = p1;
fa8ed3e0 1123 s2 = ZV_BYTE - BEGV_BYTE;
ca1d1d23
JB
1124 s1 = 0;
1125 }
1126 if (s2 < 0)
1127 {
fa8ed3e0 1128 s1 = ZV_BYTE - BEGV_BYTE;
ca1d1d23
JB
1129 s2 = 0;
1130 }
8bb43c28 1131 re_match_object = Qnil;
177c0ea7 1132
ca1d1d23
JB
1133 while (n < 0)
1134 {
42db823b 1135 int val;
487282dc 1136 val = re_search_2 (bufp, (char *) p1, s1, (char *) p2, s2,
4996330b
KH
1137 pos_byte - BEGV_BYTE, lim_byte - pos_byte,
1138 &search_regs,
42db823b 1139 /* Don't allow match past current point */
4996330b 1140 pos_byte - BEGV_BYTE);
ca1d1d23 1141 if (val == -2)
b6d6a51c
KH
1142 {
1143 matcher_overflow ();
1144 }
ca1d1d23
JB
1145 if (val >= 0)
1146 {
26aff150 1147 pos_byte = search_regs.start[0] + BEGV_BYTE;
4746118a 1148 for (i = 0; i < search_regs.num_regs; i++)
ca1d1d23
JB
1149 if (search_regs.start[i] >= 0)
1150 {
fa8ed3e0
RS
1151 search_regs.start[i]
1152 = BYTE_TO_CHAR (search_regs.start[i] + BEGV_BYTE);
1153 search_regs.end[i]
1154 = BYTE_TO_CHAR (search_regs.end[i] + BEGV_BYTE);
ca1d1d23 1155 }
a3668d92 1156 XSETBUFFER (last_thing_searched, current_buffer);
ca1d1d23
JB
1157 /* Set pos to the new position. */
1158 pos = search_regs.start[0];
1159 }
1160 else
1161 {
1162 immediate_quit = 0;
1163 return (n);
1164 }
1165 n++;
1166 }
1167 while (n > 0)
1168 {
42db823b 1169 int val;
487282dc 1170 val = re_search_2 (bufp, (char *) p1, s1, (char *) p2, s2,
4996330b
KH
1171 pos_byte - BEGV_BYTE, lim_byte - pos_byte,
1172 &search_regs,
1173 lim_byte - BEGV_BYTE);
ca1d1d23 1174 if (val == -2)
b6d6a51c
KH
1175 {
1176 matcher_overflow ();
1177 }
ca1d1d23
JB
1178 if (val >= 0)
1179 {
26aff150 1180 pos_byte = search_regs.end[0] + BEGV_BYTE;
4746118a 1181 for (i = 0; i < search_regs.num_regs; i++)
ca1d1d23
JB
1182 if (search_regs.start[i] >= 0)
1183 {
fa8ed3e0
RS
1184 search_regs.start[i]
1185 = BYTE_TO_CHAR (search_regs.start[i] + BEGV_BYTE);
1186 search_regs.end[i]
1187 = BYTE_TO_CHAR (search_regs.end[i] + BEGV_BYTE);
ca1d1d23 1188 }
a3668d92 1189 XSETBUFFER (last_thing_searched, current_buffer);
ca1d1d23
JB
1190 pos = search_regs.end[0];
1191 }
1192 else
1193 {
1194 immediate_quit = 0;
1195 return (0 - n);
1196 }
1197 n--;
1198 }
1199 immediate_quit = 0;
1200 return (pos);
1201 }
1202 else /* non-RE case */
1203 {
facdc750
RS
1204 unsigned char *raw_pattern, *pat;
1205 int raw_pattern_size;
1206 int raw_pattern_size_byte;
1207 unsigned char *patbuf;
1208 int multibyte = !NILP (current_buffer->enable_multibyte_characters);
a967ed62 1209 unsigned char *base_pat;
fed91c38
KH
1210 /* Set to positive if we find a non-ASCII char that need
1211 translation. Otherwise set to zero later. */
1212 int charset_base = -1;
040272ce 1213 int boyer_moore_ok = 1;
facdc750
RS
1214
1215 /* MULTIBYTE says whether the text to be searched is multibyte.
1216 We must convert PATTERN to match that, or we will not really
1217 find things right. */
1218
1219 if (multibyte == STRING_MULTIBYTE (string))
1220 {
d5db4077
KR
1221 raw_pattern = (unsigned char *) SDATA (string);
1222 raw_pattern_size = SCHARS (string);
1223 raw_pattern_size_byte = SBYTES (string);
facdc750
RS
1224 }
1225 else if (multibyte)
1226 {
d5db4077 1227 raw_pattern_size = SCHARS (string);
facdc750 1228 raw_pattern_size_byte
d5db4077 1229 = count_size_as_multibyte (SDATA (string),
facdc750 1230 raw_pattern_size);
7276d3d8 1231 raw_pattern = (unsigned char *) alloca (raw_pattern_size_byte + 1);
d5db4077
KR
1232 copy_text (SDATA (string), raw_pattern,
1233 SCHARS (string), 0, 1);
facdc750
RS
1234 }
1235 else
1236 {
1237 /* Converting multibyte to single-byte.
1238
1239 ??? Perhaps this conversion should be done in a special way
1240 by subtracting nonascii-insert-offset from each non-ASCII char,
1241 so that only the multibyte chars which really correspond to
1242 the chosen single-byte character set can possibly match. */
d5db4077
KR
1243 raw_pattern_size = SCHARS (string);
1244 raw_pattern_size_byte = SCHARS (string);
7276d3d8 1245 raw_pattern = (unsigned char *) alloca (raw_pattern_size + 1);
d5db4077
KR
1246 copy_text (SDATA (string), raw_pattern,
1247 SBYTES (string), 1, 0);
facdc750
RS
1248 }
1249
1250 /* Copy and optionally translate the pattern. */
1251 len = raw_pattern_size;
1252 len_byte = raw_pattern_size_byte;
1253 patbuf = (unsigned char *) alloca (len_byte);
1254 pat = patbuf;
1255 base_pat = raw_pattern;
1256 if (multibyte)
1257 {
0190922f
KH
1258 /* Fill patbuf by translated characters in STRING while
1259 checking if we can use boyer-moore search. If TRT is
1260 non-nil, we can use boyer-moore search only if TRT can be
1261 represented by the byte array of 256 elements. For that,
1262 all non-ASCII case-equivalents of all case-senstive
1263 characters in STRING must belong to the same charset and
1264 row. */
1265
facdc750
RS
1266 while (--len >= 0)
1267 {
0190922f 1268 unsigned char str_base[MAX_MULTIBYTE_LENGTH], *str;
aff2ce94 1269 int c, translated, inverse;
facdc750
RS
1270 int in_charlen, charlen;
1271
1272 /* If we got here and the RE flag is set, it's because we're
1273 dealing with a regexp known to be trivial, so the backslash
1274 just quotes the next character. */
1275 if (RE && *base_pat == '\\')
1276 {
1277 len--;
0190922f 1278 raw_pattern_size--;
facdc750
RS
1279 len_byte--;
1280 base_pat++;
1281 }
1282
1283 c = STRING_CHAR_AND_LENGTH (base_pat, len_byte, in_charlen);
040272ce 1284
0190922f 1285 if (NILP (trt))
facdc750 1286 {
0190922f
KH
1287 str = base_pat;
1288 charlen = in_charlen;
facdc750 1289 }
0190922f 1290 else
facdc750 1291 {
0190922f
KH
1292 /* Translate the character. */
1293 TRANSLATE (translated, trt, c);
1294 charlen = CHAR_STRING (translated, str_base);
1295 str = str_base;
1296
1297 /* Check if C has any other case-equivalents. */
1298 TRANSLATE (inverse, inverse_trt, c);
1299 /* If so, check if we can use boyer-moore. */
1300 if (c != inverse && boyer_moore_ok)
1301 {
1302 /* Check if all equivalents belong to the same
1303 charset & row. Note that the check of C
1304 itself is done by the last iteration. Note
1305 also that we don't have to check ASCII
1306 characters because boyer-moore search can
1307 always handle their translation. */
1308 while (1)
1309 {
fed91c38 1310 if (ASCII_BYTE_P (inverse))
0190922f 1311 {
fed91c38 1312 if (charset_base > 0)
0190922f
KH
1313 {
1314 boyer_moore_ok = 0;
1315 break;
1316 }
fed91c38
KH
1317 charset_base = 0;
1318 }
1319 else if (SINGLE_BYTE_CHAR_P (inverse))
1320 {
1321 /* Boyer-moore search can't handle a
1322 translation of an eight-bit
1323 character. */
1324 boyer_moore_ok = 0;
1325 break;
1326 }
1327 else if (charset_base < 0)
1328 charset_base = inverse & ~CHAR_FIELD3_MASK;
1329 else if ((inverse & ~CHAR_FIELD3_MASK)
1330 != charset_base)
1331 {
1332 boyer_moore_ok = 0;
1333 break;
0190922f
KH
1334 }
1335 if (c == inverse)
1336 break;
1337 TRANSLATE (inverse, inverse_trt, inverse);
1338 }
1339 }
aff2ce94 1340 }
fed91c38
KH
1341 if (charset_base < 0)
1342 charset_base = 0;
facdc750
RS
1343
1344 /* Store this character into the translated pattern. */
1345 bcopy (str, pat, charlen);
1346 pat += charlen;
1347 base_pat += in_charlen;
1348 len_byte -= in_charlen;
1349 }
1350 }
1351 else
1352 {
040272ce
KH
1353 /* Unibyte buffer. */
1354 charset_base = 0;
facdc750
RS
1355 while (--len >= 0)
1356 {
040272ce 1357 int c, translated;
facdc750
RS
1358
1359 /* If we got here and the RE flag is set, it's because we're
1360 dealing with a regexp known to be trivial, so the backslash
1361 just quotes the next character. */
1362 if (RE && *base_pat == '\\')
1363 {
1364 len--;
0190922f 1365 raw_pattern_size--;
facdc750
RS
1366 base_pat++;
1367 }
1368 c = *base_pat++;
aff2ce94 1369 TRANSLATE (translated, trt, c);
facdc750
RS
1370 *pat++ = translated;
1371 }
1372 }
1373
1374 len_byte = pat - patbuf;
1375 len = raw_pattern_size;
1376 pat = base_pat = patbuf;
1377
040272ce 1378 if (boyer_moore_ok)
facdc750 1379 return boyer_moore (n, pat, len, len_byte, trt, inverse_trt,
aff2ce94
RS
1380 pos, pos_byte, lim, lim_byte,
1381 charset_base);
facdc750
RS
1382 else
1383 return simple_search (n, pat, len, len_byte, trt,
1384 pos, pos_byte, lim, lim_byte);
1385 }
1386}
1387\f
1388/* Do a simple string search N times for the string PAT,
1389 whose length is LEN/LEN_BYTE,
1390 from buffer position POS/POS_BYTE until LIM/LIM_BYTE.
1391 TRT is the translation table.
f8bd51c4 1392
facdc750
RS
1393 Return the character position where the match is found.
1394 Otherwise, if M matches remained to be found, return -M.
f8bd51c4 1395
facdc750
RS
1396 This kind of search works regardless of what is in PAT and
1397 regardless of what is in TRT. It is used in cases where
1398 boyer_moore cannot work. */
1399
1400static int
1401simple_search (n, pat, len, len_byte, trt, pos, pos_byte, lim, lim_byte)
1402 int n;
1403 unsigned char *pat;
1404 int len, len_byte;
1405 Lisp_Object trt;
1406 int pos, pos_byte;
1407 int lim, lim_byte;
1408{
1409 int multibyte = ! NILP (current_buffer->enable_multibyte_characters);
ab228c24 1410 int forward = n > 0;
facdc750
RS
1411
1412 if (lim > pos && multibyte)
1413 while (n > 0)
1414 {
1415 while (1)
f8bd51c4 1416 {
facdc750
RS
1417 /* Try matching at position POS. */
1418 int this_pos = pos;
1419 int this_pos_byte = pos_byte;
1420 int this_len = len;
1421 int this_len_byte = len_byte;
1422 unsigned char *p = pat;
1423 if (pos + len > lim)
1424 goto stop;
1425
1426 while (this_len > 0)
1427 {
1428 int charlen, buf_charlen;
ab228c24 1429 int pat_ch, buf_ch;
facdc750 1430
ab228c24 1431 pat_ch = STRING_CHAR_AND_LENGTH (p, this_len_byte, charlen);
facdc750
RS
1432 buf_ch = STRING_CHAR_AND_LENGTH (BYTE_POS_ADDR (this_pos_byte),
1433 ZV_BYTE - this_pos_byte,
1434 buf_charlen);
aff2ce94 1435 TRANSLATE (buf_ch, trt, buf_ch);
facdc750
RS
1436
1437 if (buf_ch != pat_ch)
1438 break;
ab228c24
RS
1439
1440 this_len_byte -= charlen;
1441 this_len--;
1442 p += charlen;
1443
1444 this_pos_byte += buf_charlen;
1445 this_pos++;
facdc750
RS
1446 }
1447
1448 if (this_len == 0)
1449 {
1450 pos += len;
1451 pos_byte += len_byte;
1452 break;
1453 }
1454
1455 INC_BOTH (pos, pos_byte);
f8bd51c4 1456 }
facdc750
RS
1457
1458 n--;
1459 }
1460 else if (lim > pos)
1461 while (n > 0)
1462 {
1463 while (1)
f8bd51c4 1464 {
facdc750
RS
1465 /* Try matching at position POS. */
1466 int this_pos = pos;
1467 int this_len = len;
1468 unsigned char *p = pat;
1469
1470 if (pos + len > lim)
1471 goto stop;
1472
1473 while (this_len > 0)
1474 {
1475 int pat_ch = *p++;
1476 int buf_ch = FETCH_BYTE (this_pos);
aff2ce94 1477 TRANSLATE (buf_ch, trt, buf_ch);
facdc750
RS
1478
1479 if (buf_ch != pat_ch)
1480 break;
ab228c24
RS
1481
1482 this_len--;
1483 this_pos++;
facdc750
RS
1484 }
1485
1486 if (this_len == 0)
1487 {
1488 pos += len;
1489 break;
1490 }
1491
1492 pos++;
f8bd51c4 1493 }
facdc750
RS
1494
1495 n--;
1496 }
1497 /* Backwards search. */
1498 else if (lim < pos && multibyte)
1499 while (n < 0)
1500 {
1501 while (1)
f8bd51c4 1502 {
facdc750
RS
1503 /* Try matching at position POS. */
1504 int this_pos = pos - len;
1505 int this_pos_byte = pos_byte - len_byte;
1506 int this_len = len;
1507 int this_len_byte = len_byte;
1508 unsigned char *p = pat;
1509
1510 if (pos - len < lim)
1511 goto stop;
1512
1513 while (this_len > 0)
1514 {
1515 int charlen, buf_charlen;
ab228c24 1516 int pat_ch, buf_ch;
facdc750 1517
ab228c24 1518 pat_ch = STRING_CHAR_AND_LENGTH (p, this_len_byte, charlen);
facdc750
RS
1519 buf_ch = STRING_CHAR_AND_LENGTH (BYTE_POS_ADDR (this_pos_byte),
1520 ZV_BYTE - this_pos_byte,
1521 buf_charlen);
aff2ce94 1522 TRANSLATE (buf_ch, trt, buf_ch);
facdc750
RS
1523
1524 if (buf_ch != pat_ch)
1525 break;
ab228c24
RS
1526
1527 this_len_byte -= charlen;
1528 this_len--;
1529 p += charlen;
1530 this_pos_byte += buf_charlen;
1531 this_pos++;
facdc750
RS
1532 }
1533
1534 if (this_len == 0)
1535 {
1536 pos -= len;
1537 pos_byte -= len_byte;
1538 break;
1539 }
1540
1541 DEC_BOTH (pos, pos_byte);
f8bd51c4
KH
1542 }
1543
facdc750
RS
1544 n++;
1545 }
1546 else if (lim < pos)
1547 while (n < 0)
1548 {
1549 while (1)
b6d6a51c 1550 {
facdc750
RS
1551 /* Try matching at position POS. */
1552 int this_pos = pos - len;
1553 int this_len = len;
1554 unsigned char *p = pat;
1555
1556 if (pos - len < lim)
1557 goto stop;
1558
1559 while (this_len > 0)
1560 {
1561 int pat_ch = *p++;
1562 int buf_ch = FETCH_BYTE (this_pos);
aff2ce94 1563 TRANSLATE (buf_ch, trt, buf_ch);
facdc750
RS
1564
1565 if (buf_ch != pat_ch)
1566 break;
ab228c24
RS
1567 this_len--;
1568 this_pos++;
facdc750
RS
1569 }
1570
1571 if (this_len == 0)
b6d6a51c 1572 {
facdc750
RS
1573 pos -= len;
1574 break;
b6d6a51c 1575 }
facdc750
RS
1576
1577 pos--;
b6d6a51c 1578 }
facdc750
RS
1579
1580 n++;
b6d6a51c 1581 }
facdc750
RS
1582
1583 stop:
1584 if (n == 0)
aff2ce94 1585 {
ab228c24
RS
1586 if (forward)
1587 set_search_regs ((multibyte ? pos_byte : pos) - len_byte, len_byte);
1588 else
1589 set_search_regs (multibyte ? pos_byte : pos, len_byte);
aff2ce94
RS
1590
1591 return pos;
1592 }
facdc750
RS
1593 else if (n > 0)
1594 return -n;
1595 else
1596 return n;
1597}
1598\f
0190922f 1599/* Do Boyer-Moore search N times for the string BASE_PAT,
facdc750
RS
1600 whose length is LEN/LEN_BYTE,
1601 from buffer position POS/POS_BYTE until LIM/LIM_BYTE.
1602 DIRECTION says which direction we search in.
1603 TRT and INVERSE_TRT are translation tables.
0190922f 1604 Characters in PAT are already translated by TRT.
facdc750 1605
0190922f
KH
1606 This kind of search works if all the characters in BASE_PAT that
1607 have nontrivial translation are the same aside from the last byte.
1608 This makes it possible to translate just the last byte of a
1609 character, and do so after just a simple test of the context.
1610 CHARSET_BASE is nonzero iff there is such a non-ASCII character.
facdc750
RS
1611
1612 If that criterion is not satisfied, do not call this function. */
1613
1614static int
1615boyer_moore (n, base_pat, len, len_byte, trt, inverse_trt,
aff2ce94 1616 pos, pos_byte, lim, lim_byte, charset_base)
facdc750
RS
1617 int n;
1618 unsigned char *base_pat;
1619 int len, len_byte;
1620 Lisp_Object trt;
1621 Lisp_Object inverse_trt;
1622 int pos, pos_byte;
1623 int lim, lim_byte;
aff2ce94 1624 int charset_base;
facdc750
RS
1625{
1626 int direction = ((n > 0) ? 1 : -1);
1627 register int dirlen;
a968f437 1628 int infinity, limit, stride_for_teases = 0;
facdc750
RS
1629 register int *BM_tab;
1630 int *BM_tab_base;
177c0ea7 1631 register unsigned char *cursor, *p_limit;
facdc750 1632 register int i, j;
cb6792d2 1633 unsigned char *pat, *pat_end;
facdc750
RS
1634 int multibyte = ! NILP (current_buffer->enable_multibyte_characters);
1635
1636 unsigned char simple_translate[0400];
0190922f
KH
1637 /* These are set to the preceding bytes of a byte to be translated
1638 if charset_base is nonzero. As the maximum byte length of a
1639 multibyte character is 4, we have to check at most three previous
1640 bytes. */
1641 int translate_prev_byte1 = 0;
1642 int translate_prev_byte2 = 0;
1643 int translate_prev_byte3 = 0;
facdc750
RS
1644
1645#ifdef C_ALLOCA
1646 int BM_tab_space[0400];
1647 BM_tab = &BM_tab_space[0];
1648#else
1649 BM_tab = (int *) alloca (0400 * sizeof (int));
1650#endif
1651 /* The general approach is that we are going to maintain that we know */
1652 /* the first (closest to the present position, in whatever direction */
1653 /* we're searching) character that could possibly be the last */
1654 /* (furthest from present position) character of a valid match. We */
1655 /* advance the state of our knowledge by looking at that character */
1656 /* and seeing whether it indeed matches the last character of the */
1657 /* pattern. If it does, we take a closer look. If it does not, we */
1658 /* move our pointer (to putative last characters) as far as is */
1659 /* logically possible. This amount of movement, which I call a */
1660 /* stride, will be the length of the pattern if the actual character */
1661 /* appears nowhere in the pattern, otherwise it will be the distance */
1662 /* from the last occurrence of that character to the end of the */
1663 /* pattern. */
1664 /* As a coding trick, an enormous stride is coded into the table for */
1665 /* characters that match the last character. This allows use of only */
1666 /* a single test, a test for having gone past the end of the */
1667 /* permissible match region, to test for both possible matches (when */
1668 /* the stride goes past the end immediately) and failure to */
177c0ea7 1669 /* match (where you get nudged past the end one stride at a time). */
facdc750
RS
1670
1671 /* Here we make a "mickey mouse" BM table. The stride of the search */
1672 /* is determined only by the last character of the putative match. */
1673 /* If that character does not match, we will stride the proper */
1674 /* distance to propose a match that superimposes it on the last */
1675 /* instance of a character that matches it (per trt), or misses */
177c0ea7 1676 /* it entirely if there is none. */
facdc750
RS
1677
1678 dirlen = len_byte * direction;
1679 infinity = dirlen - (lim_byte + pos_byte + len_byte + len_byte) * direction;
cb6792d2
RS
1680
1681 /* Record position after the end of the pattern. */
1682 pat_end = base_pat + len_byte;
1683 /* BASE_PAT points to a character that we start scanning from.
1684 It is the first character in a forward search,
1685 the last character in a backward search. */
facdc750 1686 if (direction < 0)
cb6792d2
RS
1687 base_pat = pat_end - 1;
1688
facdc750
RS
1689 BM_tab_base = BM_tab;
1690 BM_tab += 0400;
1691 j = dirlen; /* to get it in a register */
1692 /* A character that does not appear in the pattern induces a */
1693 /* stride equal to the pattern length. */
1694 while (BM_tab_base != BM_tab)
1695 {
1696 *--BM_tab = j;
1697 *--BM_tab = j;
1698 *--BM_tab = j;
1699 *--BM_tab = j;
1700 }
1701
1702 /* We use this for translation, instead of TRT itself.
1703 We fill this in to handle the characters that actually
1704 occur in the pattern. Others don't matter anyway! */
1705 bzero (simple_translate, sizeof simple_translate);
1706 for (i = 0; i < 0400; i++)
1707 simple_translate[i] = i;
1708
0190922f
KH
1709 if (charset_base)
1710 {
1711 /* Setup translate_prev_byte1/2/3 from CHARSET_BASE. Only a
1712 byte following them are the target of translation. */
1713 int sample_char = charset_base | 0x20;
1714 unsigned char str[MAX_MULTIBYTE_LENGTH];
1715 int len = CHAR_STRING (sample_char, str);
1716
1717 translate_prev_byte1 = str[len - 2];
1718 if (len > 2)
1719 {
1720 translate_prev_byte2 = str[len - 3];
1721 if (len > 3)
1722 translate_prev_byte3 = str[len - 4];
1723 }
1724 }
1725
facdc750
RS
1726 i = 0;
1727 while (i != infinity)
1728 {
cb6792d2 1729 unsigned char *ptr = base_pat + i;
facdc750
RS
1730 i += direction;
1731 if (i == dirlen)
1732 i = infinity;
1733 if (! NILP (trt))
ca1d1d23 1734 {
620a5178
KH
1735 /* If the byte currently looking at is the last of a
1736 character to check case-equivalents, set CH to that
1737 character. An ASCII character and a non-ASCII character
1738 matching with CHARSET_BASE are to be checked. */
0190922f
KH
1739 int ch = -1;
1740
1741 if (ASCII_BYTE_P (*ptr) || ! multibyte)
1742 ch = *ptr;
620a5178 1743 else if (charset_base
86dc6ccb 1744 && ((pat_end - ptr) == 1 || CHAR_HEAD_P (ptr[1])))
ca1d1d23 1745 {
620a5178
KH
1746 unsigned char *charstart = ptr - 1;
1747
1748 while (! (CHAR_HEAD_P (*charstart)))
1749 charstart--;
1750 ch = STRING_CHAR (charstart, ptr - charstart + 1);
0190922f
KH
1751 if (charset_base != (ch & ~CHAR_FIELD3_MASK))
1752 ch = -1;
ca1d1d23 1753 }
facdc750 1754
6786a107 1755 if (ch >= 0400)
620a5178
KH
1756 j = ((unsigned char) ch) | 0200;
1757 else
1758 j = *ptr;
1759
facdc750
RS
1760 if (i == infinity)
1761 stride_for_teases = BM_tab[j];
ab228c24 1762
facdc750
RS
1763 BM_tab[j] = dirlen - i;
1764 /* A translation table is accompanied by its inverse -- see */
177c0ea7 1765 /* comment following downcase_table for details */
0190922f 1766 if (ch >= 0)
ab228c24
RS
1767 {
1768 int starting_ch = ch;
620a5178 1769 int starting_j = j;
0190922f 1770
ab228c24
RS
1771 while (1)
1772 {
1773 TRANSLATE (ch, inverse_trt, ch);
6786a107 1774 if (ch >= 0400)
ab228c24
RS
1775 j = ((unsigned char) ch) | 0200;
1776 else
1777 j = (unsigned char) ch;
1778
1779 /* For all the characters that map into CH,
1780 set up simple_translate to map the last byte
1781 into STARTING_J. */
1782 simple_translate[j] = starting_j;
1783 if (ch == starting_ch)
1784 break;
1785 BM_tab[j] = dirlen - i;
1786 }
1787 }
facdc750
RS
1788 }
1789 else
1790 {
1791 j = *ptr;
1792
1793 if (i == infinity)
1794 stride_for_teases = BM_tab[j];
1795 BM_tab[j] = dirlen - i;
ca1d1d23 1796 }
facdc750
RS
1797 /* stride_for_teases tells how much to stride if we get a */
1798 /* match on the far character but are subsequently */
1799 /* disappointed, by recording what the stride would have been */
1800 /* for that character if the last character had been */
1801 /* different. */
1802 }
1803 infinity = dirlen - infinity;
1804 pos_byte += dirlen - ((direction > 0) ? direction : 0);
1805 /* loop invariant - POS_BYTE points at where last char (first
1806 char if reverse) of pattern would align in a possible match. */
1807 while (n != 0)
1808 {
1809 int tail_end;
1810 unsigned char *tail_end_ptr;
1811
1812 /* It's been reported that some (broken) compiler thinks that
1813 Boolean expressions in an arithmetic context are unsigned.
1814 Using an explicit ?1:0 prevents this. */
1815 if ((lim_byte - pos_byte - ((direction > 0) ? 1 : 0)) * direction
1816 < 0)
1817 return (n * (0 - direction));
1818 /* First we do the part we can by pointers (maybe nothing) */
1819 QUIT;
1820 pat = base_pat;
1821 limit = pos_byte - dirlen + direction;
67ce527d
KH
1822 if (direction > 0)
1823 {
1824 limit = BUFFER_CEILING_OF (limit);
1825 /* LIMIT is now the last (not beyond-last!) value POS_BYTE
1826 can take on without hitting edge of buffer or the gap. */
1827 limit = min (limit, pos_byte + 20000);
1828 limit = min (limit, lim_byte - 1);
1829 }
1830 else
1831 {
1832 limit = BUFFER_FLOOR_OF (limit);
1833 /* LIMIT is now the last (not beyond-last!) value POS_BYTE
1834 can take on without hitting edge of buffer or the gap. */
1835 limit = max (limit, pos_byte - 20000);
1836 limit = max (limit, lim_byte);
1837 }
facdc750
RS
1838 tail_end = BUFFER_CEILING_OF (pos_byte) + 1;
1839 tail_end_ptr = BYTE_POS_ADDR (tail_end);
1840
1841 if ((limit - pos_byte) * direction > 20)
ca1d1d23 1842 {
facdc750
RS
1843 unsigned char *p2;
1844
1845 p_limit = BYTE_POS_ADDR (limit);
1846 p2 = (cursor = BYTE_POS_ADDR (pos_byte));
1847 /* In this loop, pos + cursor - p2 is the surrogate for pos */
1848 while (1) /* use one cursor setting as long as i can */
ca1d1d23 1849 {
facdc750 1850 if (direction > 0) /* worth duplicating */
ca1d1d23 1851 {
facdc750
RS
1852 /* Use signed comparison if appropriate
1853 to make cursor+infinity sure to be > p_limit.
1854 Assuming that the buffer lies in a range of addresses
1855 that are all "positive" (as ints) or all "negative",
1856 either kind of comparison will work as long
1857 as we don't step by infinity. So pick the kind
1858 that works when we do step by infinity. */
1859 if ((EMACS_INT) (p_limit + infinity) > (EMACS_INT) p_limit)
1860 while ((EMACS_INT) cursor <= (EMACS_INT) p_limit)
1861 cursor += BM_tab[*cursor];
ca1d1d23 1862 else
facdc750
RS
1863 while ((EMACS_UINT) cursor <= (EMACS_UINT) p_limit)
1864 cursor += BM_tab[*cursor];
1865 }
1866 else
1867 {
1868 if ((EMACS_INT) (p_limit + infinity) < (EMACS_INT) p_limit)
1869 while ((EMACS_INT) cursor >= (EMACS_INT) p_limit)
1870 cursor += BM_tab[*cursor];
1871 else
1872 while ((EMACS_UINT) cursor >= (EMACS_UINT) p_limit)
1873 cursor += BM_tab[*cursor];
1874 }
ca1d1d23 1875/* If you are here, cursor is beyond the end of the searched region. */
facdc750
RS
1876/* This can happen if you match on the far character of the pattern, */
1877/* because the "stride" of that character is infinity, a number able */
1878/* to throw you well beyond the end of the search. It can also */
1879/* happen if you fail to match within the permitted region and would */
1880/* otherwise try a character beyond that region */
1881 if ((cursor - p_limit) * direction <= len_byte)
1882 break; /* a small overrun is genuine */
1883 cursor -= infinity; /* large overrun = hit */
1884 i = dirlen - direction;
1885 if (! NILP (trt))
1886 {
1887 while ((i -= direction) + direction != 0)
ca1d1d23 1888 {
facdc750
RS
1889 int ch;
1890 cursor -= direction;
1891 /* Translate only the last byte of a character. */
1892 if (! multibyte
1893 || ((cursor == tail_end_ptr
1894 || CHAR_HEAD_P (cursor[1]))
1895 && (CHAR_HEAD_P (cursor[0])
0190922f
KH
1896 /* Check if this is the last byte of
1897 a translable character. */
1898 || (translate_prev_byte1 == cursor[-1]
1899 && (CHAR_HEAD_P (translate_prev_byte1)
1900 || (translate_prev_byte2 == cursor[-2]
1901 && (CHAR_HEAD_P (translate_prev_byte2)
1902 || (translate_prev_byte3 == cursor[-3]))))))))
facdc750
RS
1903 ch = simple_translate[*cursor];
1904 else
1905 ch = *cursor;
1906 if (pat[i] != ch)
1907 break;
ca1d1d23 1908 }
facdc750
RS
1909 }
1910 else
1911 {
1912 while ((i -= direction) + direction != 0)
ca1d1d23 1913 {
facdc750
RS
1914 cursor -= direction;
1915 if (pat[i] != *cursor)
1916 break;
ca1d1d23 1917 }
facdc750
RS
1918 }
1919 cursor += dirlen - i - direction; /* fix cursor */
1920 if (i + direction == 0)
1921 {
1922 int position;
0c8533c6 1923
facdc750 1924 cursor -= direction;
1113d9db 1925
facdc750
RS
1926 position = pos_byte + cursor - p2 + ((direction > 0)
1927 ? 1 - len_byte : 0);
1928 set_search_regs (position, len_byte);
ca325161 1929
facdc750
RS
1930 if ((n -= direction) != 0)
1931 cursor += dirlen; /* to resume search */
ca1d1d23 1932 else
facdc750
RS
1933 return ((direction > 0)
1934 ? search_regs.end[0] : search_regs.start[0]);
ca1d1d23 1935 }
facdc750
RS
1936 else
1937 cursor += stride_for_teases; /* <sigh> we lose - */
ca1d1d23 1938 }
facdc750
RS
1939 pos_byte += cursor - p2;
1940 }
1941 else
1942 /* Now we'll pick up a clump that has to be done the hard */
1943 /* way because it covers a discontinuity */
1944 {
1945 limit = ((direction > 0)
1946 ? BUFFER_CEILING_OF (pos_byte - dirlen + 1)
1947 : BUFFER_FLOOR_OF (pos_byte - dirlen - 1));
1948 limit = ((direction > 0)
1949 ? min (limit + len_byte, lim_byte - 1)
1950 : max (limit - len_byte, lim_byte));
1951 /* LIMIT is now the last value POS_BYTE can have
1952 and still be valid for a possible match. */
1953 while (1)
ca1d1d23 1954 {
facdc750
RS
1955 /* This loop can be coded for space rather than */
1956 /* speed because it will usually run only once. */
1957 /* (the reach is at most len + 21, and typically */
177c0ea7 1958 /* does not exceed len) */
facdc750
RS
1959 while ((limit - pos_byte) * direction >= 0)
1960 pos_byte += BM_tab[FETCH_BYTE (pos_byte)];
1961 /* now run the same tests to distinguish going off the */
1962 /* end, a match or a phony match. */
1963 if ((pos_byte - limit) * direction <= len_byte)
1964 break; /* ran off the end */
1965 /* Found what might be a match.
1966 Set POS_BYTE back to last (first if reverse) pos. */
1967 pos_byte -= infinity;
1968 i = dirlen - direction;
1969 while ((i -= direction) + direction != 0)
ca1d1d23 1970 {
facdc750
RS
1971 int ch;
1972 unsigned char *ptr;
1973 pos_byte -= direction;
1974 ptr = BYTE_POS_ADDR (pos_byte);
1975 /* Translate only the last byte of a character. */
1976 if (! multibyte
1977 || ((ptr == tail_end_ptr
1978 || CHAR_HEAD_P (ptr[1]))
1979 && (CHAR_HEAD_P (ptr[0])
0190922f
KH
1980 /* Check if this is the last byte of a
1981 translable character. */
1982 || (translate_prev_byte1 == ptr[-1]
1983 && (CHAR_HEAD_P (translate_prev_byte1)
1984 || (translate_prev_byte2 == ptr[-2]
1985 && (CHAR_HEAD_P (translate_prev_byte2)
1986 || translate_prev_byte3 == ptr[-3])))))))
facdc750
RS
1987 ch = simple_translate[*ptr];
1988 else
1989 ch = *ptr;
1990 if (pat[i] != ch)
1991 break;
1992 }
1993 /* Above loop has moved POS_BYTE part or all the way
1994 back to the first pos (last pos if reverse).
1995 Set it once again at the last (first if reverse) char. */
1996 pos_byte += dirlen - i- direction;
1997 if (i + direction == 0)
1998 {
1999 int position;
2000 pos_byte -= direction;
1113d9db 2001
facdc750 2002 position = pos_byte + ((direction > 0) ? 1 - len_byte : 0);
0c8533c6 2003
facdc750 2004 set_search_regs (position, len_byte);
ca325161 2005
facdc750
RS
2006 if ((n -= direction) != 0)
2007 pos_byte += dirlen; /* to resume search */
ca1d1d23 2008 else
facdc750
RS
2009 return ((direction > 0)
2010 ? search_regs.end[0] : search_regs.start[0]);
ca1d1d23 2011 }
facdc750
RS
2012 else
2013 pos_byte += stride_for_teases;
2014 }
2015 }
2016 /* We have done one clump. Can we continue? */
2017 if ((lim_byte - pos_byte) * direction < 0)
2018 return ((0 - n) * direction);
ca1d1d23 2019 }
facdc750 2020 return BYTE_TO_CHAR (pos_byte);
ca1d1d23 2021}
ca325161 2022
fa8ed3e0 2023/* Record beginning BEG_BYTE and end BEG_BYTE + NBYTES
a7e4cdde
RS
2024 for the overall match just found in the current buffer.
2025 Also clear out the match data for registers 1 and up. */
ca325161
RS
2026
2027static void
fa8ed3e0
RS
2028set_search_regs (beg_byte, nbytes)
2029 int beg_byte, nbytes;
ca325161 2030{
a7e4cdde
RS
2031 int i;
2032
ca325161
RS
2033 /* Make sure we have registers in which to store
2034 the match position. */
2035 if (search_regs.num_regs == 0)
2036 {
2d4a771a
RS
2037 search_regs.start = (regoff_t *) xmalloc (2 * sizeof (regoff_t));
2038 search_regs.end = (regoff_t *) xmalloc (2 * sizeof (regoff_t));
487282dc 2039 search_regs.num_regs = 2;
ca325161
RS
2040 }
2041
a7e4cdde
RS
2042 /* Clear out the other registers. */
2043 for (i = 1; i < search_regs.num_regs; i++)
2044 {
2045 search_regs.start[i] = -1;
2046 search_regs.end[i] = -1;
2047 }
2048
fa8ed3e0
RS
2049 search_regs.start[0] = BYTE_TO_CHAR (beg_byte);
2050 search_regs.end[0] = BYTE_TO_CHAR (beg_byte + nbytes);
a3668d92 2051 XSETBUFFER (last_thing_searched, current_buffer);
ca325161 2052}
ca1d1d23
JB
2053\f
2054/* Given a string of words separated by word delimiters,
2055 compute a regexp that matches those exact words
2056 separated by arbitrary punctuation. */
2057
2058static Lisp_Object
2059wordify (string)
2060 Lisp_Object string;
2061{
2062 register unsigned char *p, *o;
0c8533c6 2063 register int i, i_byte, len, punct_count = 0, word_count = 0;
ca1d1d23 2064 Lisp_Object val;
0c8533c6
RS
2065 int prev_c = 0;
2066 int adjust;
ca1d1d23 2067
b7826503 2068 CHECK_STRING (string);
d5db4077
KR
2069 p = SDATA (string);
2070 len = SCHARS (string);
ca1d1d23 2071
0c8533c6
RS
2072 for (i = 0, i_byte = 0; i < len; )
2073 {
2074 int c;
177c0ea7 2075
eb99a8dd 2076 FETCH_STRING_CHAR_ADVANCE (c, string, i, i_byte);
0c8533c6
RS
2077
2078 if (SYNTAX (c) != Sword)
2079 {
2080 punct_count++;
2081 if (i > 0 && SYNTAX (prev_c) == Sword)
2082 word_count++;
2083 }
ca1d1d23 2084
0c8533c6
RS
2085 prev_c = c;
2086 }
2087
2088 if (SYNTAX (prev_c) == Sword)
2089 word_count++;
2090 if (!word_count)
b07b65aa 2091 return empty_string;
0c8533c6
RS
2092
2093 adjust = - punct_count + 5 * (word_count - 1) + 4;
8a2df937
RS
2094 if (STRING_MULTIBYTE (string))
2095 val = make_uninit_multibyte_string (len + adjust,
d5db4077 2096 SBYTES (string)
8a2df937
RS
2097 + adjust);
2098 else
2099 val = make_uninit_string (len + adjust);
ca1d1d23 2100
d5db4077 2101 o = SDATA (val);
ca1d1d23
JB
2102 *o++ = '\\';
2103 *o++ = 'b';
1e9582d4 2104 prev_c = 0;
ca1d1d23 2105
1e9582d4
RS
2106 for (i = 0, i_byte = 0; i < len; )
2107 {
2108 int c;
2109 int i_byte_orig = i_byte;
177c0ea7 2110
eb99a8dd 2111 FETCH_STRING_CHAR_ADVANCE (c, string, i, i_byte);
1e9582d4
RS
2112
2113 if (SYNTAX (c) == Sword)
2114 {
5d69fe10 2115 bcopy (SDATA (string) + i_byte_orig, o,
1e9582d4
RS
2116 i_byte - i_byte_orig);
2117 o += i_byte - i_byte_orig;
2118 }
2119 else if (i > 0 && SYNTAX (prev_c) == Sword && --word_count)
2120 {
2121 *o++ = '\\';
2122 *o++ = 'W';
2123 *o++ = '\\';
2124 *o++ = 'W';
2125 *o++ = '*';
2126 }
2127
2128 prev_c = c;
2129 }
ca1d1d23
JB
2130
2131 *o++ = '\\';
2132 *o++ = 'b';
2133
2134 return val;
2135}
2136\f
2137DEFUN ("search-backward", Fsearch_backward, Ssearch_backward, 1, 4,
8c1a1077
PJ
2138 "MSearch backward: ",
2139 doc: /* Search backward from point for STRING.
2140Set point to the beginning of the occurrence found, and return point.
2141An optional second argument bounds the search; it is a buffer position.
2142The match found must not extend before that position.
2143Optional third argument, if t, means if fail just return nil (no error).
2144 If not nil and not t, position at limit of search and return nil.
2145Optional fourth argument is repeat count--search for successive occurrences.
2146
2147Search case-sensitivity is determined by the value of the variable
2148`case-fold-search', which see.
2149
2150See also the functions `match-beginning', `match-end' and `replace-match'. */)
2151 (string, bound, noerror, count)
ca1d1d23
JB
2152 Lisp_Object string, bound, noerror, count;
2153{
b819a390 2154 return search_command (string, bound, noerror, count, -1, 0, 0);
ca1d1d23
JB
2155}
2156
6af43974 2157DEFUN ("search-forward", Fsearch_forward, Ssearch_forward, 1, 4, "MSearch: ",
8c1a1077
PJ
2158 doc: /* Search forward from point for STRING.
2159Set point to the end of the occurrence found, and return point.
2160An optional second argument bounds the search; it is a buffer position.
2161The match found must not extend after that position. nil is equivalent
2162 to (point-max).
2163Optional third argument, if t, means if fail just return nil (no error).
2164 If not nil and not t, move to limit of search and return nil.
2165Optional fourth argument is repeat count--search for successive occurrences.
2166
2167Search case-sensitivity is determined by the value of the variable
2168`case-fold-search', which see.
2169
2170See also the functions `match-beginning', `match-end' and `replace-match'. */)
2171 (string, bound, noerror, count)
ca1d1d23
JB
2172 Lisp_Object string, bound, noerror, count;
2173{
b819a390 2174 return search_command (string, bound, noerror, count, 1, 0, 0);
ca1d1d23
JB
2175}
2176
2177DEFUN ("word-search-backward", Fword_search_backward, Sword_search_backward, 1, 4,
8c1a1077
PJ
2178 "sWord search backward: ",
2179 doc: /* Search backward from point for STRING, ignoring differences in punctuation.
2180Set point to the beginning of the occurrence found, and return point.
2181An optional second argument bounds the search; it is a buffer position.
2182The match found must not extend before that position.
2183Optional third argument, if t, means if fail just return nil (no error).
2184 If not nil and not t, move to limit of search and return nil.
2185Optional fourth argument is repeat count--search for successive occurrences. */)
2186 (string, bound, noerror, count)
ca1d1d23
JB
2187 Lisp_Object string, bound, noerror, count;
2188{
b819a390 2189 return search_command (wordify (string), bound, noerror, count, -1, 1, 0);
ca1d1d23
JB
2190}
2191
2192DEFUN ("word-search-forward", Fword_search_forward, Sword_search_forward, 1, 4,
8c1a1077
PJ
2193 "sWord search: ",
2194 doc: /* Search forward from point for STRING, ignoring differences in punctuation.
2195Set point to the end of the occurrence found, and return point.
2196An optional second argument bounds the search; it is a buffer position.
2197The match found must not extend after that position.
2198Optional third argument, if t, means if fail just return nil (no error).
2199 If not nil and not t, move to limit of search and return nil.
2200Optional fourth argument is repeat count--search for successive occurrences. */)
2201 (string, bound, noerror, count)
ca1d1d23
JB
2202 Lisp_Object string, bound, noerror, count;
2203{
b819a390 2204 return search_command (wordify (string), bound, noerror, count, 1, 1, 0);
ca1d1d23
JB
2205}
2206
2207DEFUN ("re-search-backward", Fre_search_backward, Sre_search_backward, 1, 4,
8c1a1077
PJ
2208 "sRE search backward: ",
2209 doc: /* Search backward from point for match for regular expression REGEXP.
2210Set point to the beginning of the match, and return point.
2211The match found is the one starting last in the buffer
2212and yet ending before the origin of the search.
2213An optional second argument bounds the search; it is a buffer position.
2214The match found must start at or after that position.
2215Optional third argument, if t, means if fail just return nil (no error).
2216 If not nil and not t, move to limit of search and return nil.
2217Optional fourth argument is repeat count--search for successive occurrences.
2218See also the functions `match-beginning', `match-end', `match-string',
2219and `replace-match'. */)
2220 (regexp, bound, noerror, count)
19c0a730 2221 Lisp_Object regexp, bound, noerror, count;
ca1d1d23 2222{
b819a390 2223 return search_command (regexp, bound, noerror, count, -1, 1, 0);
ca1d1d23
JB
2224}
2225
2226DEFUN ("re-search-forward", Fre_search_forward, Sre_search_forward, 1, 4,
8c1a1077
PJ
2227 "sRE search: ",
2228 doc: /* Search forward from point for regular expression REGEXP.
2229Set point to the end of the occurrence found, and return point.
2230An optional second argument bounds the search; it is a buffer position.
2231The match found must not extend after that position.
2232Optional third argument, if t, means if fail just return nil (no error).
2233 If not nil and not t, move to limit of search and return nil.
2234Optional fourth argument is repeat count--search for successive occurrences.
2235See also the functions `match-beginning', `match-end', `match-string',
2236and `replace-match'. */)
2237 (regexp, bound, noerror, count)
19c0a730 2238 Lisp_Object regexp, bound, noerror, count;
ca1d1d23 2239{
b819a390
RS
2240 return search_command (regexp, bound, noerror, count, 1, 1, 0);
2241}
2242
2243DEFUN ("posix-search-backward", Fposix_search_backward, Sposix_search_backward, 1, 4,
8c1a1077
PJ
2244 "sPosix search backward: ",
2245 doc: /* Search backward from point for match for regular expression REGEXP.
2246Find the longest match in accord with Posix regular expression rules.
2247Set point to the beginning of the match, and return point.
2248The match found is the one starting last in the buffer
2249and yet ending before the origin of the search.
2250An optional second argument bounds the search; it is a buffer position.
2251The match found must start at or after that position.
2252Optional third argument, if t, means if fail just return nil (no error).
2253 If not nil and not t, move to limit of search and return nil.
2254Optional fourth argument is repeat count--search for successive occurrences.
2255See also the functions `match-beginning', `match-end', `match-string',
2256and `replace-match'. */)
2257 (regexp, bound, noerror, count)
b819a390
RS
2258 Lisp_Object regexp, bound, noerror, count;
2259{
2260 return search_command (regexp, bound, noerror, count, -1, 1, 1);
2261}
2262
2263DEFUN ("posix-search-forward", Fposix_search_forward, Sposix_search_forward, 1, 4,
8c1a1077
PJ
2264 "sPosix search: ",
2265 doc: /* Search forward from point for regular expression REGEXP.
2266Find the longest match in accord with Posix regular expression rules.
2267Set point to the end of the occurrence found, and return point.
2268An optional second argument bounds the search; it is a buffer position.
2269The match found must not extend after that position.
2270Optional third argument, if t, means if fail just return nil (no error).
2271 If not nil and not t, move to limit of search and return nil.
2272Optional fourth argument is repeat count--search for successive occurrences.
2273See also the functions `match-beginning', `match-end', `match-string',
2274and `replace-match'. */)
2275 (regexp, bound, noerror, count)
b819a390
RS
2276 Lisp_Object regexp, bound, noerror, count;
2277{
2278 return search_command (regexp, bound, noerror, count, 1, 1, 1);
ca1d1d23
JB
2279}
2280\f
d7a5ad5f 2281DEFUN ("replace-match", Freplace_match, Sreplace_match, 1, 5, 0,
8c1a1077 2282 doc: /* Replace text matched by last search with NEWTEXT.
4dd0c271
RS
2283Leave point at the end of the replacement text.
2284
8c1a1077
PJ
2285If second arg FIXEDCASE is non-nil, do not alter case of replacement text.
2286Otherwise maybe capitalize the whole text, or maybe just word initials,
2287based on the replaced text.
2288If the replaced text has only capital letters
2289and has at least one multiletter word, convert NEWTEXT to all caps.
4dd0c271
RS
2290Otherwise if all words are capitalized in the replaced text,
2291capitalize each word in NEWTEXT.
8c1a1077
PJ
2292
2293If third arg LITERAL is non-nil, insert NEWTEXT literally.
2294Otherwise treat `\\' as special:
2295 `\\&' in NEWTEXT means substitute original matched text.
2296 `\\N' means substitute what matched the Nth `\\(...\\)'.
2297 If Nth parens didn't match, substitute nothing.
2298 `\\\\' means insert one `\\'.
4dd0c271
RS
2299Case conversion does not apply to these substitutions.
2300
8c1a1077 2301FIXEDCASE and LITERAL are optional arguments.
8c1a1077
PJ
2302
2303The optional fourth argument STRING can be a string to modify.
2304This is meaningful when the previous match was done against STRING,
2305using `string-match'. When used this way, `replace-match'
2306creates and returns a new string made by copying STRING and replacing
2307the part of STRING that was matched.
2308
2309The optional fifth argument SUBEXP specifies a subexpression;
2310it says to replace just that subexpression with NEWTEXT,
2311rather than replacing the entire matched text.
2312This is, in a vague sense, the inverse of using `\\N' in NEWTEXT;
2313`\\N' copies subexp N into NEWTEXT, but using N as SUBEXP puts
2314NEWTEXT in place of subexp N.
2315This is useful only after a regular expression search or match,
2316since only regular expressions have distinguished subexpressions. */)
2317 (newtext, fixedcase, literal, string, subexp)
d7a5ad5f 2318 Lisp_Object newtext, fixedcase, literal, string, subexp;
ca1d1d23
JB
2319{
2320 enum { nochange, all_caps, cap_initial } case_action;
ac3b28b1 2321 register int pos, pos_byte;
ca1d1d23 2322 int some_multiletter_word;
97832bd0 2323 int some_lowercase;
73dc8771 2324 int some_uppercase;
208767c3 2325 int some_nonuppercase_initial;
ca1d1d23 2326 register int c, prevc;
d7a5ad5f 2327 int sub;
3e18eecf 2328 int opoint, newpoint;
ca1d1d23 2329
b7826503 2330 CHECK_STRING (newtext);
ca1d1d23 2331
080c45fd 2332 if (! NILP (string))
b7826503 2333 CHECK_STRING (string);
080c45fd 2334
ca1d1d23
JB
2335 case_action = nochange; /* We tried an initialization */
2336 /* but some C compilers blew it */
4746118a
JB
2337
2338 if (search_regs.num_regs <= 0)
d72cdbfc 2339 error ("`replace-match' called before any match found");
4746118a 2340
d7a5ad5f
RS
2341 if (NILP (subexp))
2342 sub = 0;
2343 else
2344 {
b7826503 2345 CHECK_NUMBER (subexp);
d7a5ad5f
RS
2346 sub = XINT (subexp);
2347 if (sub < 0 || sub >= search_regs.num_regs)
2348 args_out_of_range (subexp, make_number (search_regs.num_regs));
2349 }
2350
080c45fd
RS
2351 if (NILP (string))
2352 {
d7a5ad5f
RS
2353 if (search_regs.start[sub] < BEGV
2354 || search_regs.start[sub] > search_regs.end[sub]
2355 || search_regs.end[sub] > ZV)
2356 args_out_of_range (make_number (search_regs.start[sub]),
2357 make_number (search_regs.end[sub]));
080c45fd
RS
2358 }
2359 else
2360 {
d7a5ad5f
RS
2361 if (search_regs.start[sub] < 0
2362 || search_regs.start[sub] > search_regs.end[sub]
d5db4077 2363 || search_regs.end[sub] > SCHARS (string))
d7a5ad5f
RS
2364 args_out_of_range (make_number (search_regs.start[sub]),
2365 make_number (search_regs.end[sub]));
080c45fd 2366 }
ca1d1d23
JB
2367
2368 if (NILP (fixedcase))
2369 {
2370 /* Decide how to casify by examining the matched text. */
ac3b28b1 2371 int last;
ca1d1d23 2372
ac3b28b1
KH
2373 pos = search_regs.start[sub];
2374 last = search_regs.end[sub];
fa8ed3e0
RS
2375
2376 if (NILP (string))
ac3b28b1 2377 pos_byte = CHAR_TO_BYTE (pos);
fa8ed3e0 2378 else
ac3b28b1 2379 pos_byte = string_char_to_byte (string, pos);
fa8ed3e0 2380
ca1d1d23
JB
2381 prevc = '\n';
2382 case_action = all_caps;
2383
2384 /* some_multiletter_word is set nonzero if any original word
2385 is more than one letter long. */
2386 some_multiletter_word = 0;
97832bd0 2387 some_lowercase = 0;
208767c3 2388 some_nonuppercase_initial = 0;
73dc8771 2389 some_uppercase = 0;
ca1d1d23 2390
ac3b28b1 2391 while (pos < last)
ca1d1d23 2392 {
080c45fd 2393 if (NILP (string))
ac3b28b1
KH
2394 {
2395 c = FETCH_CHAR (pos_byte);
2396 INC_BOTH (pos, pos_byte);
2397 }
080c45fd 2398 else
ac3b28b1 2399 FETCH_STRING_CHAR_ADVANCE (c, string, pos, pos_byte);
080c45fd 2400
ca1d1d23
JB
2401 if (LOWERCASEP (c))
2402 {
2403 /* Cannot be all caps if any original char is lower case */
2404
97832bd0 2405 some_lowercase = 1;
ca1d1d23 2406 if (SYNTAX (prevc) != Sword)
208767c3 2407 some_nonuppercase_initial = 1;
ca1d1d23
JB
2408 else
2409 some_multiletter_word = 1;
2410 }
d16c2b66 2411 else if (UPPERCASEP (c))
ca1d1d23 2412 {
73dc8771 2413 some_uppercase = 1;
97832bd0 2414 if (SYNTAX (prevc) != Sword)
c4d460ce 2415 ;
97832bd0 2416 else
ca1d1d23
JB
2417 some_multiletter_word = 1;
2418 }
208767c3
RS
2419 else
2420 {
2421 /* If the initial is a caseless word constituent,
2422 treat that like a lowercase initial. */
2423 if (SYNTAX (prevc) != Sword)
2424 some_nonuppercase_initial = 1;
2425 }
ca1d1d23
JB
2426
2427 prevc = c;
2428 }
2429
97832bd0
RS
2430 /* Convert to all caps if the old text is all caps
2431 and has at least one multiletter word. */
2432 if (! some_lowercase && some_multiletter_word)
2433 case_action = all_caps;
c4d460ce 2434 /* Capitalize each word, if the old text has all capitalized words. */
208767c3 2435 else if (!some_nonuppercase_initial && some_multiletter_word)
ca1d1d23 2436 case_action = cap_initial;
208767c3 2437 else if (!some_nonuppercase_initial && some_uppercase)
73dc8771
KH
2438 /* Should x -> yz, operating on X, give Yz or YZ?
2439 We'll assume the latter. */
2440 case_action = all_caps;
97832bd0
RS
2441 else
2442 case_action = nochange;
ca1d1d23
JB
2443 }
2444
080c45fd
RS
2445 /* Do replacement in a string. */
2446 if (!NILP (string))
2447 {
2448 Lisp_Object before, after;
2449
2450 before = Fsubstring (string, make_number (0),
d7a5ad5f
RS
2451 make_number (search_regs.start[sub]));
2452 after = Fsubstring (string, make_number (search_regs.end[sub]), Qnil);
080c45fd 2453
636a5e28
RS
2454 /* Substitute parts of the match into NEWTEXT
2455 if desired. */
080c45fd
RS
2456 if (NILP (literal))
2457 {
d131e79c
RS
2458 int lastpos = 0;
2459 int lastpos_byte = 0;
080c45fd
RS
2460 /* We build up the substituted string in ACCUM. */
2461 Lisp_Object accum;
2462 Lisp_Object middle;
d5db4077 2463 int length = SBYTES (newtext);
080c45fd
RS
2464
2465 accum = Qnil;
2466
ac3b28b1 2467 for (pos_byte = 0, pos = 0; pos_byte < length;)
080c45fd
RS
2468 {
2469 int substart = -1;
6bbd7a29 2470 int subend = 0;
1e79ec24 2471 int delbackslash = 0;
080c45fd 2472
0c8533c6
RS
2473 FETCH_STRING_CHAR_ADVANCE (c, newtext, pos, pos_byte);
2474
080c45fd
RS
2475 if (c == '\\')
2476 {
0c8533c6 2477 FETCH_STRING_CHAR_ADVANCE (c, newtext, pos, pos_byte);
177c0ea7 2478
080c45fd
RS
2479 if (c == '&')
2480 {
d7a5ad5f
RS
2481 substart = search_regs.start[sub];
2482 subend = search_regs.end[sub];
080c45fd 2483 }
5fbbc83b 2484 else if (c >= '1' && c <= '9')
080c45fd 2485 {
5fbbc83b
RS
2486 if (search_regs.start[c - '0'] >= 0
2487 && c <= search_regs.num_regs + '0')
080c45fd
RS
2488 {
2489 substart = search_regs.start[c - '0'];
2490 subend = search_regs.end[c - '0'];
2491 }
5fbbc83b
RS
2492 else
2493 {
2494 /* If that subexp did not match,
2495 replace \\N with nothing. */
2496 substart = 0;
2497 subend = 0;
2498 }
080c45fd 2499 }
1e79ec24
KH
2500 else if (c == '\\')
2501 delbackslash = 1;
636a5e28
RS
2502 else
2503 error ("Invalid use of `\\' in replacement text");
080c45fd
RS
2504 }
2505 if (substart >= 0)
2506 {
d131e79c
RS
2507 if (pos - 2 != lastpos)
2508 middle = substring_both (newtext, lastpos,
2509 lastpos_byte,
2510 pos - 2, pos_byte - 2);
080c45fd
RS
2511 else
2512 middle = Qnil;
2513 accum = concat3 (accum, middle,
0c8533c6
RS
2514 Fsubstring (string,
2515 make_number (substart),
080c45fd
RS
2516 make_number (subend)));
2517 lastpos = pos;
0c8533c6 2518 lastpos_byte = pos_byte;
080c45fd 2519 }
1e79ec24
KH
2520 else if (delbackslash)
2521 {
d131e79c
RS
2522 middle = substring_both (newtext, lastpos,
2523 lastpos_byte,
2524 pos - 1, pos_byte - 1);
0c8533c6 2525
1e79ec24
KH
2526 accum = concat2 (accum, middle);
2527 lastpos = pos;
0c8533c6 2528 lastpos_byte = pos_byte;
1e79ec24 2529 }
080c45fd
RS
2530 }
2531
d131e79c
RS
2532 if (pos != lastpos)
2533 middle = substring_both (newtext, lastpos,
2534 lastpos_byte,
0c8533c6 2535 pos, pos_byte);
080c45fd
RS
2536 else
2537 middle = Qnil;
2538
2539 newtext = concat2 (accum, middle);
2540 }
2541
636a5e28 2542 /* Do case substitution in NEWTEXT if desired. */
080c45fd
RS
2543 if (case_action == all_caps)
2544 newtext = Fupcase (newtext);
2545 else if (case_action == cap_initial)
2b2eead9 2546 newtext = Fupcase_initials (newtext);
080c45fd
RS
2547
2548 return concat3 (before, newtext, after);
2549 }
2550
09c4719e 2551 /* Record point, then move (quietly) to the start of the match. */
9160906f 2552 if (PT >= search_regs.end[sub])
b0eba991 2553 opoint = PT - ZV;
9160906f
RS
2554 else if (PT > search_regs.start[sub])
2555 opoint = search_regs.end[sub] - ZV;
b0eba991
RS
2556 else
2557 opoint = PT;
2558
886ed6ec
RS
2559 /* If we want non-literal replacement,
2560 perform substitution on the replacement string. */
2561 if (NILP (literal))
ca1d1d23 2562 {
d5db4077 2563 int length = SBYTES (newtext);
68e69fbd
RS
2564 unsigned char *substed;
2565 int substed_alloc_size, substed_len;
3bc25e52
KH
2566 int buf_multibyte = !NILP (current_buffer->enable_multibyte_characters);
2567 int str_multibyte = STRING_MULTIBYTE (newtext);
2568 Lisp_Object rev_tbl;
886ed6ec 2569 int really_changed = 0;
3bc25e52
KH
2570
2571 rev_tbl= (!buf_multibyte && CHAR_TABLE_P (Vnonascii_translation_table)
2572 ? Fchar_table_extra_slot (Vnonascii_translation_table,
2573 make_number (0))
2574 : Qnil);
ac3b28b1 2575
68e69fbd
RS
2576 substed_alloc_size = length * 2 + 100;
2577 substed = (unsigned char *) xmalloc (substed_alloc_size + 1);
2578 substed_len = 0;
2579
3bc25e52
KH
2580 /* Go thru NEWTEXT, producing the actual text to insert in
2581 SUBSTED while adjusting multibyteness to that of the current
2582 buffer. */
ca1d1d23 2583
ac3b28b1 2584 for (pos_byte = 0, pos = 0; pos_byte < length;)
ca1d1d23 2585 {
68e69fbd 2586 unsigned char str[MAX_MULTIBYTE_LENGTH];
f8ce8a0d
GM
2587 unsigned char *add_stuff = NULL;
2588 int add_len = 0;
68e69fbd 2589 int idx = -1;
9a76659d 2590
3bc25e52
KH
2591 if (str_multibyte)
2592 {
eb99a8dd 2593 FETCH_STRING_CHAR_ADVANCE_NO_CHECK (c, newtext, pos, pos_byte);
3bc25e52
KH
2594 if (!buf_multibyte)
2595 c = multibyte_char_to_unibyte (c, rev_tbl);
2596 }
2597 else
2598 {
2599 /* Note that we don't have to increment POS. */
5d69fe10 2600 c = SREF (newtext, pos_byte++);
3bc25e52
KH
2601 if (buf_multibyte)
2602 c = unibyte_char_to_multibyte (c);
2603 }
ac3b28b1 2604
68e69fbd
RS
2605 /* Either set ADD_STUFF and ADD_LEN to the text to put in SUBSTED,
2606 or set IDX to a match index, which means put that part
2607 of the buffer text into SUBSTED. */
2608
ca1d1d23
JB
2609 if (c == '\\')
2610 {
886ed6ec
RS
2611 really_changed = 1;
2612
3bc25e52
KH
2613 if (str_multibyte)
2614 {
eb99a8dd
KH
2615 FETCH_STRING_CHAR_ADVANCE_NO_CHECK (c, newtext,
2616 pos, pos_byte);
3bc25e52
KH
2617 if (!buf_multibyte && !SINGLE_BYTE_CHAR_P (c))
2618 c = multibyte_char_to_unibyte (c, rev_tbl);
2619 }
2620 else
2621 {
d5db4077 2622 c = SREF (newtext, pos_byte++);
3bc25e52
KH
2623 if (buf_multibyte)
2624 c = unibyte_char_to_multibyte (c);
2625 }
2626
ca1d1d23 2627 if (c == '&')
68e69fbd 2628 idx = sub;
78445046 2629 else if (c >= '1' && c <= '9' && c <= search_regs.num_regs + '0')
ca1d1d23
JB
2630 {
2631 if (search_regs.start[c - '0'] >= 1)
68e69fbd 2632 idx = c - '0';
ca1d1d23 2633 }
636a5e28 2634 else if (c == '\\')
68e69fbd 2635 add_len = 1, add_stuff = "\\";
636a5e28 2636 else
3bc25e52
KH
2637 {
2638 xfree (substed);
2639 error ("Invalid use of `\\' in replacement text");
2640 }
ca1d1d23
JB
2641 }
2642 else
68e69fbd
RS
2643 {
2644 add_len = CHAR_STRING (c, str);
2645 add_stuff = str;
2646 }
2647
2648 /* If we want to copy part of a previous match,
2649 set up ADD_STUFF and ADD_LEN to point to it. */
2650 if (idx >= 0)
2651 {
2652 int begbyte = CHAR_TO_BYTE (search_regs.start[idx]);
2653 add_len = CHAR_TO_BYTE (search_regs.end[idx]) - begbyte;
2654 if (search_regs.start[idx] < GPT && GPT < search_regs.end[idx])
2655 move_gap (search_regs.start[idx]);
2656 add_stuff = BYTE_POS_ADDR (begbyte);
2657 }
2658
2659 /* Now the stuff we want to add to SUBSTED
2660 is invariably ADD_LEN bytes starting at ADD_STUFF. */
2661
2662 /* Make sure SUBSTED is big enough. */
2663 if (substed_len + add_len >= substed_alloc_size)
2664 {
2665 substed_alloc_size = substed_len + add_len + 500;
2666 substed = (unsigned char *) xrealloc (substed,
2667 substed_alloc_size + 1);
2668 }
2669
2670 /* Now add to the end of SUBSTED. */
f8ce8a0d
GM
2671 if (add_stuff)
2672 {
2673 bcopy (add_stuff, substed + substed_len, add_len);
2674 substed_len += add_len;
2675 }
ca1d1d23 2676 }
68e69fbd 2677
886ed6ec 2678 if (really_changed)
80460525
KH
2679 {
2680 if (buf_multibyte)
2681 {
2682 int nchars = multibyte_chars_in_text (substed, substed_len);
68e69fbd 2683
80460525
KH
2684 newtext = make_multibyte_string (substed, nchars, substed_len);
2685 }
2686 else
2687 newtext = make_unibyte_string (substed, substed_len);
2688 }
68e69fbd 2689 xfree (substed);
ca1d1d23
JB
2690 }
2691
886ed6ec
RS
2692 /* Replace the old text with the new in the cleanest possible way. */
2693 replace_range (search_regs.start[sub], search_regs.end[sub],
2694 newtext, 1, 0, 1);
d5db4077 2695 newpoint = search_regs.start[sub] + SCHARS (newtext);
ca1d1d23
JB
2696
2697 if (case_action == all_caps)
886ed6ec
RS
2698 Fupcase_region (make_number (search_regs.start[sub]),
2699 make_number (newpoint));
ca1d1d23 2700 else if (case_action == cap_initial)
886ed6ec
RS
2701 Fupcase_initials_region (make_number (search_regs.start[sub]),
2702 make_number (newpoint));
3e18eecf 2703
98e942e0
RS
2704 /* Adjust search data for this change. */
2705 {
5b88a2c5 2706 int oldend = search_regs.end[sub];
41c01205 2707 int oldstart = search_regs.start[sub];
98e942e0
RS
2708 int change = newpoint - search_regs.end[sub];
2709 int i;
2710
2711 for (i = 0; i < search_regs.num_regs; i++)
2712 {
41c01205 2713 if (search_regs.start[i] >= oldend)
98e942e0 2714 search_regs.start[i] += change;
41c01205
DK
2715 else if (search_regs.start[i] > oldstart)
2716 search_regs.start[i] = oldstart;
2717 if (search_regs.end[i] >= oldend)
98e942e0 2718 search_regs.end[i] += change;
41c01205
DK
2719 else if (search_regs.end[i] > oldstart)
2720 search_regs.end[i] = oldstart;
98e942e0
RS
2721 }
2722 }
2723
b0eba991 2724 /* Put point back where it was in the text. */
8d808a65 2725 if (opoint <= 0)
fa8ed3e0 2726 TEMP_SET_PT (opoint + ZV);
b0eba991 2727 else
fa8ed3e0 2728 TEMP_SET_PT (opoint);
b0eba991
RS
2729
2730 /* Now move point "officially" to the start of the inserted replacement. */
3e18eecf 2731 move_if_not_intangible (newpoint);
177c0ea7 2732
ca1d1d23
JB
2733 return Qnil;
2734}
2735\f
2736static Lisp_Object
2737match_limit (num, beginningp)
2738 Lisp_Object num;
2739 int beginningp;
2740{
2741 register int n;
2742
b7826503 2743 CHECK_NUMBER (num);
ca1d1d23 2744 n = XINT (num);
f90a5bf5 2745 if (n < 0)
bd2cbd56 2746 args_out_of_range (num, make_number (0));
f90a5bf5
RS
2747 if (search_regs.num_regs <= 0)
2748 error ("No match data, because no search succeeded");
9b9ceb61 2749 if (n >= search_regs.num_regs
4746118a 2750 || search_regs.start[n] < 0)
ca1d1d23
JB
2751 return Qnil;
2752 return (make_number ((beginningp) ? search_regs.start[n]
2753 : search_regs.end[n]));
2754}
2755
2756DEFUN ("match-beginning", Fmatch_beginning, Smatch_beginning, 1, 1, 0,
8c1a1077
PJ
2757 doc: /* Return position of start of text matched by last search.
2758SUBEXP, a number, specifies which parenthesized expression in the last
2759 regexp.
2760Value is nil if SUBEXPth pair didn't match, or there were less than
2761 SUBEXP pairs.
2762Zero means the entire text matched by the whole regexp or whole string. */)
2763 (subexp)
5806161b 2764 Lisp_Object subexp;
ca1d1d23 2765{
5806161b 2766 return match_limit (subexp, 1);
ca1d1d23
JB
2767}
2768
2769DEFUN ("match-end", Fmatch_end, Smatch_end, 1, 1, 0,
8c1a1077
PJ
2770 doc: /* Return position of end of text matched by last search.
2771SUBEXP, a number, specifies which parenthesized expression in the last
2772 regexp.
2773Value is nil if SUBEXPth pair didn't match, or there were less than
2774 SUBEXP pairs.
2775Zero means the entire text matched by the whole regexp or whole string. */)
2776 (subexp)
5806161b 2777 Lisp_Object subexp;
ca1d1d23 2778{
5806161b 2779 return match_limit (subexp, 0);
177c0ea7 2780}
ca1d1d23 2781
abd0071c 2782DEFUN ("match-data", Fmatch_data, Smatch_data, 0, 3, 0,
8c1a1077
PJ
2783 doc: /* Return a list containing all info on what the last search matched.
2784Element 2N is `(match-beginning N)'; element 2N + 1 is `(match-end N)'.
2785All the elements are markers or nil (nil if the Nth pair didn't match)
2786if the last match was on a buffer; integers or nil if a string was matched.
2787Use `store-match-data' to reinstate the data in this list.
2788
41c01205
DK
2789If INTEGERS (the optional first argument) is non-nil, always use
2790integers \(rather than markers) to represent buffer positions. In
2791this case, and if the last match was in a buffer, the buffer will get
2792stored as one additional element at the end of the list.
2793
abd0071c
KS
2794If REUSE is a list, reuse it as part of the value. If REUSE is long
2795enough to hold all the values, and if INTEGERS is non-nil, no consing
2796is done.
2797
2798If optional third arg RESEAT is non-nil, any previous markers on the
2799REUSE list will be modified to point to nowhere.
2800
140a6b7e 2801Return value is undefined if the last search failed. */)
abd0071c
KS
2802 (integers, reuse, reseat)
2803 Lisp_Object integers, reuse, reseat;
ca1d1d23 2804{
56256c2a 2805 Lisp_Object tail, prev;
4746118a 2806 Lisp_Object *data;
ca1d1d23
JB
2807 int i, len;
2808
abd0071c
KS
2809 if (!NILP (reseat))
2810 for (tail = reuse; CONSP (tail); tail = XCDR (tail))
2811 if (MARKERP (XCAR (tail)))
2812 {
51f10faa 2813 unchain_marker (XMARKER (XCAR (tail)));
abd0071c
KS
2814 XSETCAR (tail, Qnil);
2815 }
2816
daa37602 2817 if (NILP (last_thing_searched))
c36bcf1b 2818 return Qnil;
daa37602 2819
6bbd7a29
GM
2820 prev = Qnil;
2821
41c01205 2822 data = (Lisp_Object *) alloca ((2 * search_regs.num_regs + 1)
4746118a
JB
2823 * sizeof (Lisp_Object));
2824
41c01205 2825 len = 0;
4746118a 2826 for (i = 0; i < search_regs.num_regs; i++)
ca1d1d23
JB
2827 {
2828 int start = search_regs.start[i];
2829 if (start >= 0)
2830 {
56256c2a
RS
2831 if (EQ (last_thing_searched, Qt)
2832 || ! NILP (integers))
ca1d1d23 2833 {
c235cce7
KH
2834 XSETFASTINT (data[2 * i], start);
2835 XSETFASTINT (data[2 * i + 1], search_regs.end[i]);
ca1d1d23 2836 }
0ed62dc7 2837 else if (BUFFERP (last_thing_searched))
ca1d1d23
JB
2838 {
2839 data[2 * i] = Fmake_marker ();
daa37602
JB
2840 Fset_marker (data[2 * i],
2841 make_number (start),
2842 last_thing_searched);
ca1d1d23
JB
2843 data[2 * i + 1] = Fmake_marker ();
2844 Fset_marker (data[2 * i + 1],
177c0ea7 2845 make_number (search_regs.end[i]),
daa37602 2846 last_thing_searched);
ca1d1d23 2847 }
daa37602
JB
2848 else
2849 /* last_thing_searched must always be Qt, a buffer, or Qnil. */
2850 abort ();
2851
abd0071c 2852 len = 2 * i + 2;
ca1d1d23
JB
2853 }
2854 else
abd0071c 2855 data[2 * i] = data[2 * i + 1] = Qnil;
ca1d1d23 2856 }
56256c2a 2857
bd2cbd56 2858 if (BUFFERP (last_thing_searched) && !NILP (integers))
41c01205 2859 {
bd2cbd56 2860 data[len] = last_thing_searched;
41c01205
DK
2861 len++;
2862 }
2863
56256c2a
RS
2864 /* If REUSE is not usable, cons up the values and return them. */
2865 if (! CONSP (reuse))
41c01205 2866 return Flist (len, data);
56256c2a
RS
2867
2868 /* If REUSE is a list, store as many value elements as will fit
2869 into the elements of REUSE. */
2870 for (i = 0, tail = reuse; CONSP (tail);
c1d497be 2871 i++, tail = XCDR (tail))
56256c2a 2872 {
41c01205 2873 if (i < len)
f3fbd155 2874 XSETCAR (tail, data[i]);
56256c2a 2875 else
f3fbd155 2876 XSETCAR (tail, Qnil);
56256c2a
RS
2877 prev = tail;
2878 }
2879
2880 /* If we couldn't fit all value elements into REUSE,
2881 cons up the rest of them and add them to the end of REUSE. */
41c01205
DK
2882 if (i < len)
2883 XSETCDR (prev, Flist (len - i, data + i));
56256c2a
RS
2884
2885 return reuse;
ca1d1d23
JB
2886}
2887
51f10faa
KS
2888/* Internal usage only:
2889 If RESEAT is `evaporate', put the markers back on the free list
2890 immediately. No other references to the markers must exist in this case,
2891 so it is used only internally on the unwind stack and save-match-data from
2892 Lisp. */
ca1d1d23 2893
abd0071c 2894DEFUN ("set-match-data", Fset_match_data, Sset_match_data, 1, 2, 0,
8c1a1077 2895 doc: /* Set internal data on last search match from elements of LIST.
abd0071c
KS
2896LIST should have been created by calling `match-data' previously.
2897
51f10faa 2898If optional arg RESEAT is non-nil, make markers on LIST point nowhere. */)
abd0071c
KS
2899 (list, reseat)
2900 register Lisp_Object list, reseat;
ca1d1d23
JB
2901{
2902 register int i;
2903 register Lisp_Object marker;
2904
7074fde6
FP
2905 if (running_asynch_code)
2906 save_search_regs ();
2907
29100cea 2908 CHECK_LIST (list);
ca1d1d23 2909
41c01205
DK
2910 /* Unless we find a marker with a buffer or an explicit buffer
2911 in LIST, assume that this match data came from a string. */
daa37602
JB
2912 last_thing_searched = Qt;
2913
4746118a
JB
2914 /* Allocate registers if they don't already exist. */
2915 {
d084e942 2916 int length = XFASTINT (Flength (list)) / 2;
4746118a
JB
2917
2918 if (length > search_regs.num_regs)
2919 {
1113d9db
JB
2920 if (search_regs.num_regs == 0)
2921 {
2922 search_regs.start
2923 = (regoff_t *) xmalloc (length * sizeof (regoff_t));
2924 search_regs.end
2925 = (regoff_t *) xmalloc (length * sizeof (regoff_t));
2926 }
4746118a 2927 else
1113d9db
JB
2928 {
2929 search_regs.start
2930 = (regoff_t *) xrealloc (search_regs.start,
2931 length * sizeof (regoff_t));
2932 search_regs.end
2933 = (regoff_t *) xrealloc (search_regs.end,
2934 length * sizeof (regoff_t));
2935 }
4746118a 2936
e62371e9
KH
2937 for (i = search_regs.num_regs; i < length; i++)
2938 search_regs.start[i] = -1;
2939
487282dc 2940 search_regs.num_regs = length;
4746118a 2941 }
ca1d1d23 2942
abd0071c 2943 for (i = 0; CONSP (list); i++)
41c01205 2944 {
abd0071c 2945 marker = XCAR (list);
bd2cbd56 2946 if (BUFFERP (marker))
c3762cbd 2947 {
bd2cbd56 2948 last_thing_searched = marker;
c3762cbd
DK
2949 break;
2950 }
2951 if (i >= length)
2952 break;
41c01205
DK
2953 if (NILP (marker))
2954 {
2955 search_regs.start[i] = -1;
abd0071c 2956 list = XCDR (list);
41c01205
DK
2957 }
2958 else
2959 {
2960 int from;
abd0071c 2961 Lisp_Object m;
e2811828 2962
abd0071c 2963 m = marker;
41c01205
DK
2964 if (MARKERP (marker))
2965 {
2966 if (XMARKER (marker)->buffer == 0)
2967 XSETFASTINT (marker, 0);
2968 else
2969 XSETBUFFER (last_thing_searched, XMARKER (marker)->buffer);
2970 }
e2811828 2971
41c01205
DK
2972 CHECK_NUMBER_COERCE_MARKER (marker);
2973 from = XINT (marker);
e2811828 2974
abd0071c
KS
2975 if (!NILP (reseat) && MARKERP (m))
2976 {
2977 if (EQ (reseat, Qevaporate))
2978 free_marker (m);
2979 else
2980 unchain_marker (XMARKER (m));
9ad54a7e 2981 XSETCAR (list, Qnil);
abd0071c
KS
2982 }
2983
2984 if ((list = XCDR (list), !CONSP (list)))
2985 break;
2986
2987 m = marker = XCAR (list);
2988
41c01205
DK
2989 if (MARKERP (marker) && XMARKER (marker)->buffer == 0)
2990 XSETFASTINT (marker, 0);
e2811828 2991
41c01205
DK
2992 CHECK_NUMBER_COERCE_MARKER (marker);
2993 search_regs.start[i] = from;
2994 search_regs.end[i] = XINT (marker);
abd0071c
KS
2995
2996 if (!NILP (reseat) && MARKERP (m))
2997 {
2998 if (EQ (reseat, Qevaporate))
2999 free_marker (m);
3000 else
3001 unchain_marker (XMARKER (m));
9ad54a7e 3002 XSETCAR (list, Qnil);
abd0071c 3003 }
41c01205 3004 }
abd0071c 3005 list = XCDR (list);
41c01205 3006 }
ca1d1d23 3007
41c01205
DK
3008 for (; i < search_regs.num_regs; i++)
3009 search_regs.start[i] = -1;
3010 }
ca1d1d23 3011
177c0ea7 3012 return Qnil;
ca1d1d23
JB
3013}
3014
7074fde6
FP
3015/* If non-zero the match data have been saved in saved_search_regs
3016 during the execution of a sentinel or filter. */
75ebf74b 3017static int search_regs_saved;
7074fde6 3018static struct re_registers saved_search_regs;
41c01205 3019static Lisp_Object saved_last_thing_searched;
7074fde6
FP
3020
3021/* Called from Flooking_at, Fstring_match, search_buffer, Fstore_match_data
3022 if asynchronous code (filter or sentinel) is running. */
3023static void
3024save_search_regs ()
3025{
3026 if (!search_regs_saved)
3027 {
3028 saved_search_regs.num_regs = search_regs.num_regs;
3029 saved_search_regs.start = search_regs.start;
3030 saved_search_regs.end = search_regs.end;
41c01205
DK
3031 saved_last_thing_searched = last_thing_searched;
3032 last_thing_searched = Qnil;
7074fde6 3033 search_regs.num_regs = 0;
2d4a771a
RS
3034 search_regs.start = 0;
3035 search_regs.end = 0;
7074fde6
FP
3036
3037 search_regs_saved = 1;
3038 }
3039}
3040
3041/* Called upon exit from filters and sentinels. */
3042void
abd0071c 3043restore_search_regs ()
7074fde6
FP
3044{
3045 if (search_regs_saved)
3046 {
3047 if (search_regs.num_regs > 0)
3048 {
3049 xfree (search_regs.start);
3050 xfree (search_regs.end);
3051 }
3052 search_regs.num_regs = saved_search_regs.num_regs;
3053 search_regs.start = saved_search_regs.start;
3054 search_regs.end = saved_search_regs.end;
41c01205
DK
3055 last_thing_searched = saved_last_thing_searched;
3056 saved_last_thing_searched = Qnil;
7074fde6
FP
3057 search_regs_saved = 0;
3058 }
3059}
3060
abd0071c
KS
3061static Lisp_Object
3062unwind_set_match_data (list)
3063 Lisp_Object list;
3064{
51f10faa 3065 /* It is safe to free (evaporate) the markers immediately. */
abd0071c
KS
3066 return Fset_match_data (list, Qevaporate);
3067}
3068
3069/* Called to unwind protect the match data. */
3070void
3071record_unwind_save_match_data ()
3072{
3073 record_unwind_protect (unwind_set_match_data,
3074 Fmatch_data (Qnil, Qnil, Qnil));
3075}
3076
ca1d1d23
JB
3077/* Quote a string to inactivate reg-expr chars */
3078
3079DEFUN ("regexp-quote", Fregexp_quote, Sregexp_quote, 1, 1, 0,
8c1a1077
PJ
3080 doc: /* Return a regexp string which matches exactly STRING and nothing else. */)
3081 (string)
5806161b 3082 Lisp_Object string;
ca1d1d23
JB
3083{
3084 register unsigned char *in, *out, *end;
3085 register unsigned char *temp;
0c8533c6 3086 int backslashes_added = 0;
ca1d1d23 3087
b7826503 3088 CHECK_STRING (string);
ca1d1d23 3089
d5db4077 3090 temp = (unsigned char *) alloca (SBYTES (string) * 2);
ca1d1d23
JB
3091
3092 /* Now copy the data into the new string, inserting escapes. */
3093
d5db4077
KR
3094 in = SDATA (string);
3095 end = in + SBYTES (string);
177c0ea7 3096 out = temp;
ca1d1d23
JB
3097
3098 for (; in != end; in++)
3099 {
66bc6082 3100 if (*in == '['
ca1d1d23
JB
3101 || *in == '*' || *in == '.' || *in == '\\'
3102 || *in == '?' || *in == '+'
3103 || *in == '^' || *in == '$')
0c8533c6 3104 *out++ = '\\', backslashes_added++;
ca1d1d23
JB
3105 *out++ = *in;
3106 }
3107
3f8100f1 3108 return make_specified_string (temp,
d5db4077 3109 SCHARS (string) + backslashes_added,
3f8100f1
RS
3110 out - temp,
3111 STRING_MULTIBYTE (string));
ca1d1d23 3112}
177c0ea7 3113\f
dfcf069d 3114void
ca1d1d23
JB
3115syms_of_search ()
3116{
3117 register int i;
3118
487282dc
KH
3119 for (i = 0; i < REGEXP_CACHE_SIZE; ++i)
3120 {
3121 searchbufs[i].buf.allocated = 100;
b23c0a83 3122 searchbufs[i].buf.buffer = (unsigned char *) xmalloc (100);
487282dc
KH
3123 searchbufs[i].buf.fastmap = searchbufs[i].fastmap;
3124 searchbufs[i].regexp = Qnil;
ecdb561e 3125 searchbufs[i].whitespace_regexp = Qnil;
b69e3c18 3126 searchbufs[i].syntax_table = Qnil;
487282dc 3127 staticpro (&searchbufs[i].regexp);
aa77b5ce 3128 staticpro (&searchbufs[i].whitespace_regexp);
b69e3c18 3129 staticpro (&searchbufs[i].syntax_table);
487282dc
KH
3130 searchbufs[i].next = (i == REGEXP_CACHE_SIZE-1 ? 0 : &searchbufs[i+1]);
3131 }
3132 searchbuf_head = &searchbufs[0];
ca1d1d23
JB
3133
3134 Qsearch_failed = intern ("search-failed");
3135 staticpro (&Qsearch_failed);
3136 Qinvalid_regexp = intern ("invalid-regexp");
3137 staticpro (&Qinvalid_regexp);
3138
3139 Fput (Qsearch_failed, Qerror_conditions,
3140 Fcons (Qsearch_failed, Fcons (Qerror, Qnil)));
3141 Fput (Qsearch_failed, Qerror_message,
3142 build_string ("Search failed"));
3143
3144 Fput (Qinvalid_regexp, Qerror_conditions,
3145 Fcons (Qinvalid_regexp, Fcons (Qerror, Qnil)));
3146 Fput (Qinvalid_regexp, Qerror_message,
3147 build_string ("Invalid regexp"));
3148
daa37602
JB
3149 last_thing_searched = Qnil;
3150 staticpro (&last_thing_searched);
3151
0f6af254
DK
3152 saved_last_thing_searched = Qnil;
3153 staticpro (&saved_last_thing_searched);
3154
41a33295 3155 DEFVAR_LISP ("search-spaces-regexp", &Vsearch_spaces_regexp,
e2811828 3156 doc: /* Regexp to substitute for bunches of spaces in regexp search.
f31a9a68
RS
3157Some commands use this for user-specified regexps.
3158Spaces that occur inside character classes or repetition operators
3159or other such regexp constructs are not replaced with this.
3160A value of nil (which is the normal value) means treat spaces literally. */);
41a33295 3161 Vsearch_spaces_regexp = Qnil;
f31a9a68 3162
ca1d1d23 3163 defsubr (&Slooking_at);
b819a390
RS
3164 defsubr (&Sposix_looking_at);
3165 defsubr (&Sstring_match);
3166 defsubr (&Sposix_string_match);
ca1d1d23
JB
3167 defsubr (&Ssearch_forward);
3168 defsubr (&Ssearch_backward);
3169 defsubr (&Sword_search_forward);
3170 defsubr (&Sword_search_backward);
3171 defsubr (&Sre_search_forward);
3172 defsubr (&Sre_search_backward);
b819a390
RS
3173 defsubr (&Sposix_search_forward);
3174 defsubr (&Sposix_search_backward);
ca1d1d23
JB
3175 defsubr (&Sreplace_match);
3176 defsubr (&Smatch_beginning);
3177 defsubr (&Smatch_end);
3178 defsubr (&Smatch_data);
3f1c005b 3179 defsubr (&Sset_match_data);
ca1d1d23
JB
3180 defsubr (&Sregexp_quote);
3181}
ab5796a9
MB
3182
3183/* arch-tag: a6059d79-0552-4f14-a2cb-d379a4e3c78f
3184 (do not change this comment) */