Fixed "Field handling in C-n."
[bpt/emacs.git] / src / search.c
CommitLineData
ca1d1d23 1/* String search routines for GNU Emacs.
0b5538bd 2 Copyright (C) 1985, 1986, 1987, 1993, 1994, 1997, 1998, 1999, 2002, 2003,
aaef169d 3 2004, 2005, 2006 Free Software Foundation, Inc.
ca1d1d23
JB
4
5This file is part of GNU Emacs.
6
7GNU Emacs is free software; you can redistribute it and/or modify
8it under the terms of the GNU General Public License as published by
7c938215 9the Free Software Foundation; either version 2, or (at your option)
ca1d1d23
JB
10any later version.
11
12GNU Emacs is distributed in the hope that it will be useful,
13but WITHOUT ANY WARRANTY; without even the implied warranty of
14MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15GNU General Public License for more details.
16
17You should have received a copy of the GNU General Public License
18along with GNU Emacs; see the file COPYING. If not, write to
4fc5845f
LK
19the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
20Boston, MA 02110-1301, USA. */
ca1d1d23
JB
21
22
18160b98 23#include <config.h>
ca1d1d23
JB
24#include "lisp.h"
25#include "syntax.h"
5679531d 26#include "category.h"
ca1d1d23 27#include "buffer.h"
5679531d 28#include "charset.h"
9169c321 29#include "region-cache.h"
ca1d1d23 30#include "commands.h"
9ac0d9e0 31#include "blockinput.h"
bf1760bb 32#include "intervals.h"
4746118a 33
ca1d1d23
JB
34#include <sys/types.h>
35#include "regex.h"
36
1d288aef 37#define REGEXP_CACHE_SIZE 20
ca1d1d23 38
487282dc
KH
39/* If the regexp is non-nil, then the buffer contains the compiled form
40 of that regexp, suitable for searching. */
1d288aef
RS
41struct regexp_cache
42{
487282dc 43 struct regexp_cache *next;
ecdb561e 44 Lisp_Object regexp, whitespace_regexp;
487282dc
KH
45 struct re_pattern_buffer buf;
46 char fastmap[0400];
b819a390
RS
47 /* Nonzero means regexp was compiled to do full POSIX backtracking. */
48 char posix;
487282dc 49};
ca1d1d23 50
487282dc
KH
51/* The instances of that struct. */
52struct regexp_cache searchbufs[REGEXP_CACHE_SIZE];
ca1d1d23 53
487282dc
KH
54/* The head of the linked list; points to the most recently used buffer. */
55struct regexp_cache *searchbuf_head;
ca1d1d23 56
ca1d1d23 57
4746118a
JB
58/* Every call to re_match, etc., must pass &search_regs as the regs
59 argument unless you can show it is unnecessary (i.e., if re_match
60 is certainly going to be called again before region-around-match
61 can be called).
62
63 Since the registers are now dynamically allocated, we need to make
64 sure not to refer to the Nth register before checking that it has
1113d9db
JB
65 been allocated by checking search_regs.num_regs.
66
67 The regex code keeps track of whether it has allocated the search
487282dc
KH
68 buffer using bits in the re_pattern_buffer. This means that whenever
69 you compile a new pattern, it completely forgets whether it has
1113d9db
JB
70 allocated any registers, and will allocate new registers the next
71 time you call a searching or matching function. Therefore, we need
72 to call re_set_registers after compiling a new pattern or after
73 setting the match registers, so that the regex functions will be
74 able to free or re-allocate it properly. */
ca1d1d23
JB
75static struct re_registers search_regs;
76
daa37602
JB
77/* The buffer in which the last search was performed, or
78 Qt if the last search was done in a string;
79 Qnil if no searching has been done yet. */
80static Lisp_Object last_thing_searched;
ca1d1d23 81
8e6208c5 82/* error condition signaled when regexp compile_pattern fails */
ca1d1d23
JB
83
84Lisp_Object Qinvalid_regexp;
85
06f77a8a
KS
86/* Error condition used for failing searches */
87Lisp_Object Qsearch_failed;
88
41a33295 89Lisp_Object Vsearch_spaces_regexp;
f31a9a68 90
ca325161 91static void set_search_regs ();
044f81f1 92static void save_search_regs ();
facdc750
RS
93static int simple_search ();
94static int boyer_moore ();
b819a390 95static int search_buffer ();
01d09305 96static void matcher_overflow () NO_RETURN;
b819a390 97
ca1d1d23
JB
98static void
99matcher_overflow ()
100{
101 error ("Stack overflow in regexp matcher");
102}
103
b819a390
RS
104/* Compile a regexp and signal a Lisp error if anything goes wrong.
105 PATTERN is the pattern to compile.
106 CP is the place to put the result.
facdc750 107 TRANSLATE is a translation table for ignoring case, or nil for none.
b819a390
RS
108 REGP is the structure that says where to store the "register"
109 values that will result from matching this pattern.
110 If it is 0, we should compile the pattern not to record any
111 subexpression bounds.
112 POSIX is nonzero if we want full backtracking (POSIX style)
5679531d
KH
113 for this pattern. 0 means backtrack only enough to get a valid match.
114 MULTIBYTE is nonzero if we want to handle multibyte characters in
115 PATTERN. 0 means all multibyte characters are recognized just as
ecdb561e
RS
116 sequences of binary data.
117
41a33295 118 The behavior also depends on Vsearch_spaces_regexp. */
ca1d1d23 119
487282dc 120static void
5679531d 121compile_pattern_1 (cp, pattern, translate, regp, posix, multibyte)
487282dc 122 struct regexp_cache *cp;
ca1d1d23 123 Lisp_Object pattern;
facdc750 124 Lisp_Object translate;
487282dc 125 struct re_registers *regp;
b819a390 126 int posix;
5679531d 127 int multibyte;
ca1d1d23 128{
7276d3d8 129 unsigned char *raw_pattern;
f8bd51c4 130 int raw_pattern_size;
d451e4db 131 char *val;
b819a390 132 reg_syntax_t old;
ca1d1d23 133
f8bd51c4
KH
134 /* MULTIBYTE says whether the text to be searched is multibyte.
135 We must convert PATTERN to match that, or we will not really
136 find things right. */
137
138 if (multibyte == STRING_MULTIBYTE (pattern))
139 {
d5db4077
KR
140 raw_pattern = (unsigned char *) SDATA (pattern);
141 raw_pattern_size = SBYTES (pattern);
f8bd51c4
KH
142 }
143 else if (multibyte)
144 {
d5db4077
KR
145 raw_pattern_size = count_size_as_multibyte (SDATA (pattern),
146 SCHARS (pattern));
7276d3d8 147 raw_pattern = (unsigned char *) alloca (raw_pattern_size + 1);
d5db4077
KR
148 copy_text (SDATA (pattern), raw_pattern,
149 SCHARS (pattern), 0, 1);
f8bd51c4
KH
150 }
151 else
152 {
153 /* Converting multibyte to single-byte.
154
155 ??? Perhaps this conversion should be done in a special way
156 by subtracting nonascii-insert-offset from each non-ASCII char,
157 so that only the multibyte chars which really correspond to
158 the chosen single-byte character set can possibly match. */
d5db4077 159 raw_pattern_size = SCHARS (pattern);
7276d3d8 160 raw_pattern = (unsigned char *) alloca (raw_pattern_size + 1);
d5db4077
KR
161 copy_text (SDATA (pattern), raw_pattern,
162 SBYTES (pattern), 1, 0);
f8bd51c4
KH
163 }
164
487282dc 165 cp->regexp = Qnil;
59fab369 166 cp->buf.translate = (! NILP (translate) ? translate : make_number (0));
b819a390 167 cp->posix = posix;
5679531d 168 cp->buf.multibyte = multibyte;
41a33295 169 cp->whitespace_regexp = Vsearch_spaces_regexp;
910c747a
RS
170 /* Doing BLOCK_INPUT here has the effect that
171 the debugger won't run if an error occurs.
172 Why is BLOCK_INPUT needed here? */
9ac0d9e0 173 BLOCK_INPUT;
fb4a568d 174 old = re_set_syntax (RE_SYNTAX_EMACS
b819a390 175 | (posix ? 0 : RE_NO_POSIX_BACKTRACKING));
f31a9a68 176
41a33295
RS
177 re_set_whitespace_regexp (NILP (Vsearch_spaces_regexp) ? NULL
178 : SDATA (Vsearch_spaces_regexp));
f31a9a68 179
7276d3d8
RS
180 val = (char *) re_compile_pattern ((char *)raw_pattern,
181 raw_pattern_size, &cp->buf);
f31a9a68
RS
182
183 re_set_whitespace_regexp (NULL);
184
b819a390 185 re_set_syntax (old);
9ac0d9e0 186 UNBLOCK_INPUT;
ca1d1d23 187 if (val)
06f77a8a 188 xsignal1 (Qinvalid_regexp, build_string (val));
1113d9db 189
487282dc 190 cp->regexp = Fcopy_sequence (pattern);
487282dc
KH
191}
192
6efc7887
RS
193/* Shrink each compiled regexp buffer in the cache
194 to the size actually used right now.
195 This is called from garbage collection. */
196
197void
198shrink_regexp_cache ()
199{
a968f437 200 struct regexp_cache *cp;
6efc7887
RS
201
202 for (cp = searchbuf_head; cp != 0; cp = cp->next)
203 {
204 cp->buf.allocated = cp->buf.used;
205 cp->buf.buffer
b23c0a83 206 = (unsigned char *) xrealloc (cp->buf.buffer, cp->buf.used);
6efc7887
RS
207 }
208}
209
e5b94d44
CY
210/* Clear the regexp cache.
211 There is no danger of memory leak here because re_compile_pattern
212 automagically manages the memory in each re_pattern_buffer struct,
213 based on its `allocated' and `buffer' values. */
214void
215clear_regexp_cache ()
216{
217 int i;
218
219 for (i = 0; i < REGEXP_CACHE_SIZE; ++i)
220 searchbufs[i].regexp = Qnil;
221}
222
487282dc 223/* Compile a regexp if necessary, but first check to see if there's one in
b819a390
RS
224 the cache.
225 PATTERN is the pattern to compile.
facdc750 226 TRANSLATE is a translation table for ignoring case, or nil for none.
b819a390
RS
227 REGP is the structure that says where to store the "register"
228 values that will result from matching this pattern.
229 If it is 0, we should compile the pattern not to record any
230 subexpression bounds.
231 POSIX is nonzero if we want full backtracking (POSIX style)
232 for this pattern. 0 means backtrack only enough to get a valid match. */
487282dc
KH
233
234struct re_pattern_buffer *
0c8533c6 235compile_pattern (pattern, regp, translate, posix, multibyte)
487282dc
KH
236 Lisp_Object pattern;
237 struct re_registers *regp;
facdc750 238 Lisp_Object translate;
0c8533c6 239 int posix, multibyte;
487282dc
KH
240{
241 struct regexp_cache *cp, **cpp;
242
243 for (cpp = &searchbuf_head; ; cpp = &cp->next)
244 {
245 cp = *cpp;
f1b9c7c1
KR
246 /* Entries are initialized to nil, and may be set to nil by
247 compile_pattern_1 if the pattern isn't valid. Don't apply
49a5f770
KR
248 string accessors in those cases. However, compile_pattern_1
249 is only applied to the cache entry we pick here to reuse. So
250 nil should never appear before a non-nil entry. */
7c752c80 251 if (NILP (cp->regexp))
f1b9c7c1 252 goto compile_it;
d5db4077 253 if (SCHARS (cp->regexp) == SCHARS (pattern)
cf69b13e 254 && STRING_MULTIBYTE (cp->regexp) == STRING_MULTIBYTE (pattern)
1d288aef 255 && !NILP (Fstring_equal (cp->regexp, pattern))
59fab369 256 && EQ (cp->buf.translate, (! NILP (translate) ? translate : make_number (0)))
5679531d 257 && cp->posix == posix
ecdb561e 258 && cp->buf.multibyte == multibyte
41a33295 259 && !NILP (Fequal (cp->whitespace_regexp, Vsearch_spaces_regexp)))
487282dc
KH
260 break;
261
f1b9c7c1
KR
262 /* If we're at the end of the cache, compile into the nil cell
263 we found, or the last (least recently used) cell with a
264 string value. */
487282dc
KH
265 if (cp->next == 0)
266 {
f1b9c7c1 267 compile_it:
5679531d 268 compile_pattern_1 (cp, pattern, translate, regp, posix, multibyte);
487282dc
KH
269 break;
270 }
271 }
272
273 /* When we get here, cp (aka *cpp) contains the compiled pattern,
274 either because we found it in the cache or because we just compiled it.
275 Move it to the front of the queue to mark it as most recently used. */
276 *cpp = cp->next;
277 cp->next = searchbuf_head;
278 searchbuf_head = cp;
1113d9db 279
6639708c
RS
280 /* Advise the searching functions about the space we have allocated
281 for register data. */
282 if (regp)
283 re_set_registers (&cp->buf, regp, regp->num_regs, regp->start, regp->end);
284
487282dc 285 return &cp->buf;
ca1d1d23
JB
286}
287
ca1d1d23 288\f
b819a390
RS
289static Lisp_Object
290looking_at_1 (string, posix)
ca1d1d23 291 Lisp_Object string;
b819a390 292 int posix;
ca1d1d23
JB
293{
294 Lisp_Object val;
295 unsigned char *p1, *p2;
296 int s1, s2;
297 register int i;
487282dc 298 struct re_pattern_buffer *bufp;
ca1d1d23 299
7074fde6
FP
300 if (running_asynch_code)
301 save_search_regs ();
302
910c747a
RS
303 /* This is so set_image_of_range_1 in regex.c can find the EQV table. */
304 XCHAR_TABLE (current_buffer->case_canon_table)->extras[2]
305 = current_buffer->case_eqv_table;
306
b7826503 307 CHECK_STRING (string);
487282dc
KH
308 bufp = compile_pattern (string, &search_regs,
309 (!NILP (current_buffer->case_fold_search)
0190922f 310 ? current_buffer->case_canon_table : Qnil),
0c8533c6
RS
311 posix,
312 !NILP (current_buffer->enable_multibyte_characters));
ca1d1d23
JB
313
314 immediate_quit = 1;
315 QUIT; /* Do a pending quit right away, to avoid paradoxical behavior */
316
317 /* Get pointers and sizes of the two strings
318 that make up the visible portion of the buffer. */
319
320 p1 = BEGV_ADDR;
fa8ed3e0 321 s1 = GPT_BYTE - BEGV_BYTE;
ca1d1d23 322 p2 = GAP_END_ADDR;
fa8ed3e0 323 s2 = ZV_BYTE - GPT_BYTE;
ca1d1d23
JB
324 if (s1 < 0)
325 {
326 p2 = p1;
fa8ed3e0 327 s2 = ZV_BYTE - BEGV_BYTE;
ca1d1d23
JB
328 s1 = 0;
329 }
330 if (s2 < 0)
331 {
fa8ed3e0 332 s1 = ZV_BYTE - BEGV_BYTE;
ca1d1d23
JB
333 s2 = 0;
334 }
8bb43c28
RS
335
336 re_match_object = Qnil;
177c0ea7 337
487282dc 338 i = re_match_2 (bufp, (char *) p1, s1, (char *) p2, s2,
fa8ed3e0
RS
339 PT_BYTE - BEGV_BYTE, &search_regs,
340 ZV_BYTE - BEGV_BYTE);
de182d70 341 immediate_quit = 0;
177c0ea7 342
ca1d1d23
JB
343 if (i == -2)
344 matcher_overflow ();
345
346 val = (0 <= i ? Qt : Qnil);
fa8ed3e0
RS
347 if (i >= 0)
348 for (i = 0; i < search_regs.num_regs; i++)
349 if (search_regs.start[i] >= 0)
350 {
351 search_regs.start[i]
352 = BYTE_TO_CHAR (search_regs.start[i] + BEGV_BYTE);
353 search_regs.end[i]
354 = BYTE_TO_CHAR (search_regs.end[i] + BEGV_BYTE);
355 }
a3668d92 356 XSETBUFFER (last_thing_searched, current_buffer);
ca1d1d23
JB
357 return val;
358}
359
b819a390 360DEFUN ("looking-at", Flooking_at, Slooking_at, 1, 1, 0,
8c1a1077
PJ
361 doc: /* Return t if text after point matches regular expression REGEXP.
362This function modifies the match data that `match-beginning',
363`match-end' and `match-data' access; save and restore the match
364data if you want to preserve them. */)
365 (regexp)
94f94972 366 Lisp_Object regexp;
b819a390 367{
94f94972 368 return looking_at_1 (regexp, 0);
b819a390
RS
369}
370
371DEFUN ("posix-looking-at", Fposix_looking_at, Sposix_looking_at, 1, 1, 0,
8c1a1077
PJ
372 doc: /* Return t if text after point matches regular expression REGEXP.
373Find the longest match, in accord with Posix regular expression rules.
374This function modifies the match data that `match-beginning',
375`match-end' and `match-data' access; save and restore the match
376data if you want to preserve them. */)
377 (regexp)
94f94972 378 Lisp_Object regexp;
b819a390 379{
94f94972 380 return looking_at_1 (regexp, 1);
b819a390
RS
381}
382\f
383static Lisp_Object
384string_match_1 (regexp, string, start, posix)
ca1d1d23 385 Lisp_Object regexp, string, start;
b819a390 386 int posix;
ca1d1d23
JB
387{
388 int val;
487282dc 389 struct re_pattern_buffer *bufp;
0c8533c6
RS
390 int pos, pos_byte;
391 int i;
ca1d1d23 392
7074fde6
FP
393 if (running_asynch_code)
394 save_search_regs ();
395
b7826503
PJ
396 CHECK_STRING (regexp);
397 CHECK_STRING (string);
ca1d1d23
JB
398
399 if (NILP (start))
0c8533c6 400 pos = 0, pos_byte = 0;
ca1d1d23
JB
401 else
402 {
d5db4077 403 int len = SCHARS (string);
ca1d1d23 404
b7826503 405 CHECK_NUMBER (start);
0c8533c6
RS
406 pos = XINT (start);
407 if (pos < 0 && -pos <= len)
408 pos = len + pos;
409 else if (0 > pos || pos > len)
ca1d1d23 410 args_out_of_range (string, start);
0c8533c6 411 pos_byte = string_char_to_byte (string, pos);
ca1d1d23
JB
412 }
413
910c747a
RS
414 /* This is so set_image_of_range_1 in regex.c can find the EQV table. */
415 XCHAR_TABLE (current_buffer->case_canon_table)->extras[2]
416 = current_buffer->case_eqv_table;
417
487282dc
KH
418 bufp = compile_pattern (regexp, &search_regs,
419 (!NILP (current_buffer->case_fold_search)
0190922f 420 ? current_buffer->case_canon_table : Qnil),
0c8533c6
RS
421 posix,
422 STRING_MULTIBYTE (string));
ca1d1d23 423 immediate_quit = 1;
8bb43c28 424 re_match_object = string;
177c0ea7 425
d5db4077
KR
426 val = re_search (bufp, (char *) SDATA (string),
427 SBYTES (string), pos_byte,
428 SBYTES (string) - pos_byte,
ca1d1d23
JB
429 &search_regs);
430 immediate_quit = 0;
daa37602 431 last_thing_searched = Qt;
ca1d1d23
JB
432 if (val == -2)
433 matcher_overflow ();
434 if (val < 0) return Qnil;
0c8533c6
RS
435
436 for (i = 0; i < search_regs.num_regs; i++)
437 if (search_regs.start[i] >= 0)
438 {
439 search_regs.start[i]
440 = string_byte_to_char (string, search_regs.start[i]);
441 search_regs.end[i]
442 = string_byte_to_char (string, search_regs.end[i]);
443 }
444
445 return make_number (string_byte_to_char (string, val));
ca1d1d23 446}
e59a8453 447
b819a390 448DEFUN ("string-match", Fstring_match, Sstring_match, 2, 3, 0,
8c1a1077 449 doc: /* Return index of start of first match for REGEXP in STRING, or nil.
b85acc4b 450Matching ignores case if `case-fold-search' is non-nil.
8c1a1077
PJ
451If third arg START is non-nil, start search at that index in STRING.
452For index of first char beyond the match, do (match-end 0).
453`match-end' and `match-beginning' also give indices of substrings
2bd2f32d
RS
454matched by parenthesis constructs in the pattern.
455
456You can use the function `match-string' to extract the substrings
457matched by the parenthesis constructions in REGEXP. */)
8c1a1077 458 (regexp, string, start)
b819a390
RS
459 Lisp_Object regexp, string, start;
460{
461 return string_match_1 (regexp, string, start, 0);
462}
463
464DEFUN ("posix-string-match", Fposix_string_match, Sposix_string_match, 2, 3, 0,
8c1a1077
PJ
465 doc: /* Return index of start of first match for REGEXP in STRING, or nil.
466Find the longest match, in accord with Posix regular expression rules.
467Case is ignored if `case-fold-search' is non-nil in the current buffer.
468If third arg START is non-nil, start search at that index in STRING.
469For index of first char beyond the match, do (match-end 0).
470`match-end' and `match-beginning' also give indices of substrings
471matched by parenthesis constructs in the pattern. */)
472 (regexp, string, start)
b819a390
RS
473 Lisp_Object regexp, string, start;
474{
475 return string_match_1 (regexp, string, start, 1);
476}
477
e59a8453
RS
478/* Match REGEXP against STRING, searching all of STRING,
479 and return the index of the match, or negative on failure.
480 This does not clobber the match data. */
481
482int
483fast_string_match (regexp, string)
484 Lisp_Object regexp, string;
485{
486 int val;
487282dc 487 struct re_pattern_buffer *bufp;
e59a8453 488
facdc750
RS
489 bufp = compile_pattern (regexp, 0, Qnil,
490 0, STRING_MULTIBYTE (string));
e59a8453 491 immediate_quit = 1;
8bb43c28 492 re_match_object = string;
177c0ea7 493
d5db4077
KR
494 val = re_search (bufp, (char *) SDATA (string),
495 SBYTES (string), 0,
496 SBYTES (string), 0);
e59a8453
RS
497 immediate_quit = 0;
498 return val;
499}
5679531d
KH
500
501/* Match REGEXP against STRING, searching all of STRING ignoring case,
502 and return the index of the match, or negative on failure.
0c8533c6
RS
503 This does not clobber the match data.
504 We assume that STRING contains single-byte characters. */
5679531d
KH
505
506extern Lisp_Object Vascii_downcase_table;
507
508int
b4577c63 509fast_c_string_match_ignore_case (regexp, string)
5679531d 510 Lisp_Object regexp;
96b80561 511 const char *string;
5679531d
KH
512{
513 int val;
514 struct re_pattern_buffer *bufp;
515 int len = strlen (string);
516
0c8533c6 517 regexp = string_make_unibyte (regexp);
b4577c63 518 re_match_object = Qt;
5679531d 519 bufp = compile_pattern (regexp, 0,
0190922f 520 Vascii_canon_table, 0,
f8bd51c4 521 0);
5679531d
KH
522 immediate_quit = 1;
523 val = re_search (bufp, string, len, 0, len, 0);
524 immediate_quit = 0;
525 return val;
526}
be5f4dfb
KH
527
528/* Like fast_string_match but ignore case. */
529
530int
531fast_string_match_ignore_case (regexp, string)
532 Lisp_Object regexp, string;
533{
534 int val;
535 struct re_pattern_buffer *bufp;
536
0190922f 537 bufp = compile_pattern (regexp, 0, Vascii_canon_table,
be5f4dfb
KH
538 0, STRING_MULTIBYTE (string));
539 immediate_quit = 1;
540 re_match_object = string;
541
542 val = re_search (bufp, (char *) SDATA (string),
543 SBYTES (string), 0,
544 SBYTES (string), 0);
545 immediate_quit = 0;
546 return val;
547}
ca1d1d23 548\f
9169c321
JB
549/* The newline cache: remembering which sections of text have no newlines. */
550
551/* If the user has requested newline caching, make sure it's on.
552 Otherwise, make sure it's off.
553 This is our cheezy way of associating an action with the change of
554 state of a buffer-local variable. */
555static void
556newline_cache_on_off (buf)
557 struct buffer *buf;
558{
559 if (NILP (buf->cache_long_line_scans))
560 {
561 /* It should be off. */
562 if (buf->newline_cache)
563 {
564 free_region_cache (buf->newline_cache);
565 buf->newline_cache = 0;
566 }
567 }
568 else
569 {
570 /* It should be on. */
571 if (buf->newline_cache == 0)
572 buf->newline_cache = new_region_cache ();
573 }
574}
575
576\f
577/* Search for COUNT instances of the character TARGET between START and END.
578
579 If COUNT is positive, search forwards; END must be >= START.
580 If COUNT is negative, search backwards for the -COUNTth instance;
581 END must be <= START.
582 If COUNT is zero, do anything you please; run rogue, for all I care.
583
584 If END is zero, use BEGV or ZV instead, as appropriate for the
585 direction indicated by COUNT.
ffd56f97
JB
586
587 If we find COUNT instances, set *SHORTAGE to zero, and return the
a9f2a45f 588 position past the COUNTth match. Note that for reverse motion
5bfe95c9 589 this is not the same as the usual convention for Emacs motion commands.
ffd56f97 590
9169c321
JB
591 If we don't find COUNT instances before reaching END, set *SHORTAGE
592 to the number of TARGETs left unfound, and return END.
ffd56f97 593
087a5f81
RS
594 If ALLOW_QUIT is non-zero, set immediate_quit. That's good to do
595 except when inside redisplay. */
596
dfcf069d 597int
9169c321
JB
598scan_buffer (target, start, end, count, shortage, allow_quit)
599 register int target;
600 int start, end;
601 int count;
602 int *shortage;
087a5f81 603 int allow_quit;
ca1d1d23 604{
9169c321 605 struct region_cache *newline_cache;
177c0ea7 606 int direction;
ffd56f97 607
9169c321
JB
608 if (count > 0)
609 {
610 direction = 1;
611 if (! end) end = ZV;
612 }
613 else
614 {
615 direction = -1;
616 if (! end) end = BEGV;
617 }
ffd56f97 618
9169c321
JB
619 newline_cache_on_off (current_buffer);
620 newline_cache = current_buffer->newline_cache;
ca1d1d23
JB
621
622 if (shortage != 0)
623 *shortage = 0;
624
087a5f81 625 immediate_quit = allow_quit;
ca1d1d23 626
ffd56f97 627 if (count > 0)
9169c321 628 while (start != end)
ca1d1d23 629 {
9169c321
JB
630 /* Our innermost scanning loop is very simple; it doesn't know
631 about gaps, buffer ends, or the newline cache. ceiling is
632 the position of the last character before the next such
633 obstacle --- the last character the dumb search loop should
634 examine. */
fa8ed3e0
RS
635 int ceiling_byte = CHAR_TO_BYTE (end) - 1;
636 int start_byte = CHAR_TO_BYTE (start);
67ce527d 637 int tem;
9169c321
JB
638
639 /* If we're looking for a newline, consult the newline cache
640 to see where we can avoid some scanning. */
641 if (target == '\n' && newline_cache)
642 {
643 int next_change;
644 immediate_quit = 0;
645 while (region_cache_forward
fa8ed3e0
RS
646 (current_buffer, newline_cache, start_byte, &next_change))
647 start_byte = next_change;
cbe0db0d 648 immediate_quit = allow_quit;
9169c321 649
fa8ed3e0
RS
650 /* START should never be after END. */
651 if (start_byte > ceiling_byte)
652 start_byte = ceiling_byte;
9169c321
JB
653
654 /* Now the text after start is an unknown region, and
655 next_change is the position of the next known region. */
fa8ed3e0 656 ceiling_byte = min (next_change - 1, ceiling_byte);
9169c321
JB
657 }
658
659 /* The dumb loop can only scan text stored in contiguous
660 bytes. BUFFER_CEILING_OF returns the last character
661 position that is contiguous, so the ceiling is the
662 position after that. */
67ce527d
KH
663 tem = BUFFER_CEILING_OF (start_byte);
664 ceiling_byte = min (tem, ceiling_byte);
9169c321
JB
665
666 {
177c0ea7 667 /* The termination address of the dumb loop. */
fa8ed3e0
RS
668 register unsigned char *ceiling_addr
669 = BYTE_POS_ADDR (ceiling_byte) + 1;
670 register unsigned char *cursor
671 = BYTE_POS_ADDR (start_byte);
9169c321
JB
672 unsigned char *base = cursor;
673
674 while (cursor < ceiling_addr)
675 {
676 unsigned char *scan_start = cursor;
677
678 /* The dumb loop. */
679 while (*cursor != target && ++cursor < ceiling_addr)
680 ;
681
682 /* If we're looking for newlines, cache the fact that
683 the region from start to cursor is free of them. */
684 if (target == '\n' && newline_cache)
685 know_region_cache (current_buffer, newline_cache,
fa8ed3e0
RS
686 start_byte + scan_start - base,
687 start_byte + cursor - base);
9169c321
JB
688
689 /* Did we find the target character? */
690 if (cursor < ceiling_addr)
691 {
692 if (--count == 0)
693 {
694 immediate_quit = 0;
fa8ed3e0 695 return BYTE_TO_CHAR (start_byte + cursor - base + 1);
9169c321
JB
696 }
697 cursor++;
698 }
699 }
700
fa8ed3e0 701 start = BYTE_TO_CHAR (start_byte + cursor - base);
9169c321 702 }
ca1d1d23
JB
703 }
704 else
9169c321
JB
705 while (start > end)
706 {
707 /* The last character to check before the next obstacle. */
fa8ed3e0
RS
708 int ceiling_byte = CHAR_TO_BYTE (end);
709 int start_byte = CHAR_TO_BYTE (start);
67ce527d 710 int tem;
9169c321
JB
711
712 /* Consult the newline cache, if appropriate. */
713 if (target == '\n' && newline_cache)
714 {
715 int next_change;
716 immediate_quit = 0;
717 while (region_cache_backward
fa8ed3e0
RS
718 (current_buffer, newline_cache, start_byte, &next_change))
719 start_byte = next_change;
cbe0db0d 720 immediate_quit = allow_quit;
9169c321
JB
721
722 /* Start should never be at or before end. */
fa8ed3e0
RS
723 if (start_byte <= ceiling_byte)
724 start_byte = ceiling_byte + 1;
9169c321
JB
725
726 /* Now the text before start is an unknown region, and
727 next_change is the position of the next known region. */
fa8ed3e0 728 ceiling_byte = max (next_change, ceiling_byte);
9169c321
JB
729 }
730
731 /* Stop scanning before the gap. */
67ce527d
KH
732 tem = BUFFER_FLOOR_OF (start_byte - 1);
733 ceiling_byte = max (tem, ceiling_byte);
9169c321
JB
734
735 {
736 /* The termination address of the dumb loop. */
fa8ed3e0
RS
737 register unsigned char *ceiling_addr = BYTE_POS_ADDR (ceiling_byte);
738 register unsigned char *cursor = BYTE_POS_ADDR (start_byte - 1);
9169c321
JB
739 unsigned char *base = cursor;
740
741 while (cursor >= ceiling_addr)
742 {
743 unsigned char *scan_start = cursor;
744
745 while (*cursor != target && --cursor >= ceiling_addr)
746 ;
747
748 /* If we're looking for newlines, cache the fact that
749 the region from after the cursor to start is free of them. */
750 if (target == '\n' && newline_cache)
751 know_region_cache (current_buffer, newline_cache,
fa8ed3e0
RS
752 start_byte + cursor - base,
753 start_byte + scan_start - base);
9169c321
JB
754
755 /* Did we find the target character? */
756 if (cursor >= ceiling_addr)
757 {
758 if (++count >= 0)
759 {
760 immediate_quit = 0;
fa8ed3e0 761 return BYTE_TO_CHAR (start_byte + cursor - base);
9169c321
JB
762 }
763 cursor--;
764 }
765 }
766
fa8ed3e0 767 start = BYTE_TO_CHAR (start_byte + cursor - base);
9169c321
JB
768 }
769 }
770
ca1d1d23
JB
771 immediate_quit = 0;
772 if (shortage != 0)
ffd56f97 773 *shortage = count * direction;
9169c321 774 return start;
ca1d1d23 775}
fa8ed3e0
RS
776\f
777/* Search for COUNT instances of a line boundary, which means either a
778 newline or (if selective display enabled) a carriage return.
779 Start at START. If COUNT is negative, search backwards.
780
781 We report the resulting position by calling TEMP_SET_PT_BOTH.
782
783 If we find COUNT instances. we position after (always after,
784 even if scanning backwards) the COUNTth match, and return 0.
785
786 If we don't find COUNT instances before reaching the end of the
787 buffer (or the beginning, if scanning backwards), we return
788 the number of line boundaries left unfound, and position at
789 the limit we bumped up against.
790
791 If ALLOW_QUIT is non-zero, set immediate_quit. That's good to do
d5d57b92 792 except in special cases. */
ca1d1d23 793
63fa018d 794int
fa8ed3e0
RS
795scan_newline (start, start_byte, limit, limit_byte, count, allow_quit)
796 int start, start_byte;
797 int limit, limit_byte;
798 register int count;
799 int allow_quit;
63fa018d 800{
fa8ed3e0
RS
801 int direction = ((count > 0) ? 1 : -1);
802
803 register unsigned char *cursor;
804 unsigned char *base;
805
806 register int ceiling;
807 register unsigned char *ceiling_addr;
808
d5d57b92
RS
809 int old_immediate_quit = immediate_quit;
810
fa8ed3e0
RS
811 /* The code that follows is like scan_buffer
812 but checks for either newline or carriage return. */
813
d5d57b92
RS
814 if (allow_quit)
815 immediate_quit++;
fa8ed3e0
RS
816
817 start_byte = CHAR_TO_BYTE (start);
818
819 if (count > 0)
820 {
821 while (start_byte < limit_byte)
822 {
823 ceiling = BUFFER_CEILING_OF (start_byte);
824 ceiling = min (limit_byte - 1, ceiling);
825 ceiling_addr = BYTE_POS_ADDR (ceiling) + 1;
826 base = (cursor = BYTE_POS_ADDR (start_byte));
827 while (1)
828 {
829 while (*cursor != '\n' && ++cursor != ceiling_addr)
830 ;
831
832 if (cursor != ceiling_addr)
833 {
834 if (--count == 0)
835 {
d5d57b92 836 immediate_quit = old_immediate_quit;
fa8ed3e0
RS
837 start_byte = start_byte + cursor - base + 1;
838 start = BYTE_TO_CHAR (start_byte);
839 TEMP_SET_PT_BOTH (start, start_byte);
840 return 0;
841 }
842 else
843 if (++cursor == ceiling_addr)
844 break;
845 }
846 else
847 break;
848 }
849 start_byte += cursor - base;
850 }
851 }
852 else
853 {
fa8ed3e0
RS
854 while (start_byte > limit_byte)
855 {
856 ceiling = BUFFER_FLOOR_OF (start_byte - 1);
857 ceiling = max (limit_byte, ceiling);
858 ceiling_addr = BYTE_POS_ADDR (ceiling) - 1;
859 base = (cursor = BYTE_POS_ADDR (start_byte - 1) + 1);
860 while (1)
861 {
862 while (--cursor != ceiling_addr && *cursor != '\n')
863 ;
864
865 if (cursor != ceiling_addr)
866 {
867 if (++count == 0)
868 {
d5d57b92 869 immediate_quit = old_immediate_quit;
fa8ed3e0
RS
870 /* Return the position AFTER the match we found. */
871 start_byte = start_byte + cursor - base + 1;
872 start = BYTE_TO_CHAR (start_byte);
873 TEMP_SET_PT_BOTH (start, start_byte);
874 return 0;
875 }
876 }
877 else
878 break;
879 }
880 /* Here we add 1 to compensate for the last decrement
881 of CURSOR, which took it past the valid range. */
882 start_byte += cursor - base + 1;
883 }
884 }
885
886 TEMP_SET_PT_BOTH (limit, limit_byte);
d5d57b92 887 immediate_quit = old_immediate_quit;
fa8ed3e0
RS
888
889 return count * direction;
63fa018d
RS
890}
891
ca1d1d23 892int
fa8ed3e0 893find_next_newline_no_quit (from, cnt)
ca1d1d23
JB
894 register int from, cnt;
895{
fa8ed3e0 896 return scan_buffer ('\n', from, 0, cnt, (int *) 0, 0);
9169c321
JB
897}
898
9169c321
JB
899/* Like find_next_newline, but returns position before the newline,
900 not after, and only search up to TO. This isn't just
901 find_next_newline (...)-1, because you might hit TO. */
fa8ed3e0 902
9169c321
JB
903int
904find_before_next_newline (from, to, cnt)
cbe0db0d 905 int from, to, cnt;
9169c321
JB
906{
907 int shortage;
908 int pos = scan_buffer ('\n', from, to, cnt, &shortage, 1);
909
910 if (shortage == 0)
911 pos--;
177c0ea7 912
9169c321 913 return pos;
ca1d1d23
JB
914}
915\f
ca1d1d23
JB
916/* Subroutines of Lisp buffer search functions. */
917
918static Lisp_Object
b819a390 919search_command (string, bound, noerror, count, direction, RE, posix)
ca1d1d23
JB
920 Lisp_Object string, bound, noerror, count;
921 int direction;
922 int RE;
b819a390 923 int posix;
ca1d1d23
JB
924{
925 register int np;
9f43ad85 926 int lim, lim_byte;
ca1d1d23
JB
927 int n = direction;
928
929 if (!NILP (count))
930 {
b7826503 931 CHECK_NUMBER (count);
ca1d1d23
JB
932 n *= XINT (count);
933 }
934
b7826503 935 CHECK_STRING (string);
ca1d1d23 936 if (NILP (bound))
9f43ad85
RS
937 {
938 if (n > 0)
939 lim = ZV, lim_byte = ZV_BYTE;
940 else
941 lim = BEGV, lim_byte = BEGV_BYTE;
942 }
ca1d1d23
JB
943 else
944 {
b7826503 945 CHECK_NUMBER_COERCE_MARKER (bound);
ca1d1d23 946 lim = XINT (bound);
6ec8bbd2 947 if (n > 0 ? lim < PT : lim > PT)
ca1d1d23
JB
948 error ("Invalid search bound (wrong side of point)");
949 if (lim > ZV)
9f43ad85 950 lim = ZV, lim_byte = ZV_BYTE;
588d2fd5 951 else if (lim < BEGV)
9f43ad85 952 lim = BEGV, lim_byte = BEGV_BYTE;
588d2fd5
KH
953 else
954 lim_byte = CHAR_TO_BYTE (lim);
ca1d1d23
JB
955 }
956
910c747a
RS
957 /* This is so set_image_of_range_1 in regex.c can find the EQV table. */
958 XCHAR_TABLE (current_buffer->case_canon_table)->extras[2]
959 = current_buffer->case_eqv_table;
960
9f43ad85 961 np = search_buffer (string, PT, PT_BYTE, lim, lim_byte, n, RE,
ca1d1d23 962 (!NILP (current_buffer->case_fold_search)
facdc750 963 ? current_buffer->case_canon_table
3135e9fd 964 : Qnil),
ca1d1d23 965 (!NILP (current_buffer->case_fold_search)
facdc750 966 ? current_buffer->case_eqv_table
3135e9fd 967 : Qnil),
b819a390 968 posix);
ca1d1d23
JB
969 if (np <= 0)
970 {
971 if (NILP (noerror))
06f77a8a
KS
972 xsignal1 (Qsearch_failed, string);
973
ca1d1d23
JB
974 if (!EQ (noerror, Qt))
975 {
976 if (lim < BEGV || lim > ZV)
977 abort ();
9f43ad85 978 SET_PT_BOTH (lim, lim_byte);
a5f217b8
RS
979 return Qnil;
980#if 0 /* This would be clean, but maybe programs depend on
981 a value of nil here. */
481399bf 982 np = lim;
a5f217b8 983#endif
ca1d1d23 984 }
481399bf
RS
985 else
986 return Qnil;
ca1d1d23
JB
987 }
988
989 if (np < BEGV || np > ZV)
990 abort ();
991
992 SET_PT (np);
993
994 return make_number (np);
995}
996\f
fa8ed3e0
RS
997/* Return 1 if REGEXP it matches just one constant string. */
998
b6d6a51c
KH
999static int
1000trivial_regexp_p (regexp)
1001 Lisp_Object regexp;
1002{
d5db4077
KR
1003 int len = SBYTES (regexp);
1004 unsigned char *s = SDATA (regexp);
b6d6a51c
KH
1005 while (--len >= 0)
1006 {
1007 switch (*s++)
1008 {
1009 case '.': case '*': case '+': case '?': case '[': case '^': case '$':
1010 return 0;
1011 case '\\':
1012 if (--len < 0)
1013 return 0;
1014 switch (*s++)
1015 {
1016 case '|': case '(': case ')': case '`': case '\'': case 'b':
1017 case 'B': case '<': case '>': case 'w': case 'W': case 's':
29f89fe7 1018 case 'S': case '=': case '{': case '}': case '_':
5679531d 1019 case 'c': case 'C': /* for categoryspec and notcategoryspec */
866f60fd 1020 case '1': case '2': case '3': case '4': case '5':
b6d6a51c
KH
1021 case '6': case '7': case '8': case '9':
1022 return 0;
1023 }
1024 }
1025 }
1026 return 1;
1027}
1028
ca325161 1029/* Search for the n'th occurrence of STRING in the current buffer,
ca1d1d23 1030 starting at position POS and stopping at position LIM,
b819a390 1031 treating STRING as a literal string if RE is false or as
ca1d1d23
JB
1032 a regular expression if RE is true.
1033
1034 If N is positive, searching is forward and LIM must be greater than POS.
1035 If N is negative, searching is backward and LIM must be less than POS.
1036
facdc750 1037 Returns -x if x occurrences remain to be found (x > 0),
ca1d1d23 1038 or else the position at the beginning of the Nth occurrence
b819a390
RS
1039 (if searching backward) or the end (if searching forward).
1040
1041 POSIX is nonzero if we want full backtracking (POSIX style)
1042 for this pattern. 0 means backtrack only enough to get a valid match. */
ca1d1d23 1043
aff2ce94
RS
1044#define TRANSLATE(out, trt, d) \
1045do \
1046 { \
1047 if (! NILP (trt)) \
1048 { \
1049 Lisp_Object temp; \
1050 temp = Faref (trt, make_number (d)); \
1051 if (INTEGERP (temp)) \
1052 out = XINT (temp); \
1053 else \
1054 out = d; \
1055 } \
1056 else \
1057 out = d; \
1058 } \
1059while (0)
facdc750 1060
b819a390 1061static int
9f43ad85
RS
1062search_buffer (string, pos, pos_byte, lim, lim_byte, n,
1063 RE, trt, inverse_trt, posix)
ca1d1d23
JB
1064 Lisp_Object string;
1065 int pos;
9f43ad85 1066 int pos_byte;
ca1d1d23 1067 int lim;
9f43ad85 1068 int lim_byte;
ca1d1d23
JB
1069 int n;
1070 int RE;
facdc750
RS
1071 Lisp_Object trt;
1072 Lisp_Object inverse_trt;
b819a390 1073 int posix;
ca1d1d23 1074{
d5db4077
KR
1075 int len = SCHARS (string);
1076 int len_byte = SBYTES (string);
facdc750 1077 register int i;
ca1d1d23 1078
7074fde6
FP
1079 if (running_asynch_code)
1080 save_search_regs ();
1081
a7e4cdde 1082 /* Searching 0 times means don't move. */
ca1d1d23 1083 /* Null string is found at starting position. */
a7e4cdde 1084 if (len == 0 || n == 0)
ca325161 1085 {
0353b28f 1086 set_search_regs (pos_byte, 0);
ca325161
RS
1087 return pos;
1088 }
3f57a499 1089
41a33295 1090 if (RE && !(trivial_regexp_p (string) && NILP (Vsearch_spaces_regexp)))
ca1d1d23 1091 {
facdc750
RS
1092 unsigned char *p1, *p2;
1093 int s1, s2;
487282dc
KH
1094 struct re_pattern_buffer *bufp;
1095
0c8533c6
RS
1096 bufp = compile_pattern (string, &search_regs, trt, posix,
1097 !NILP (current_buffer->enable_multibyte_characters));
ca1d1d23 1098
ca1d1d23
JB
1099 immediate_quit = 1; /* Quit immediately if user types ^G,
1100 because letting this function finish
1101 can take too long. */
1102 QUIT; /* Do a pending quit right away,
1103 to avoid paradoxical behavior */
1104 /* Get pointers and sizes of the two strings
1105 that make up the visible portion of the buffer. */
1106
1107 p1 = BEGV_ADDR;
fa8ed3e0 1108 s1 = GPT_BYTE - BEGV_BYTE;
ca1d1d23 1109 p2 = GAP_END_ADDR;
fa8ed3e0 1110 s2 = ZV_BYTE - GPT_BYTE;
ca1d1d23
JB
1111 if (s1 < 0)
1112 {
1113 p2 = p1;
fa8ed3e0 1114 s2 = ZV_BYTE - BEGV_BYTE;
ca1d1d23
JB
1115 s1 = 0;
1116 }
1117 if (s2 < 0)
1118 {
fa8ed3e0 1119 s1 = ZV_BYTE - BEGV_BYTE;
ca1d1d23
JB
1120 s2 = 0;
1121 }
8bb43c28 1122 re_match_object = Qnil;
177c0ea7 1123
ca1d1d23
JB
1124 while (n < 0)
1125 {
42db823b 1126 int val;
487282dc 1127 val = re_search_2 (bufp, (char *) p1, s1, (char *) p2, s2,
4996330b
KH
1128 pos_byte - BEGV_BYTE, lim_byte - pos_byte,
1129 &search_regs,
42db823b 1130 /* Don't allow match past current point */
4996330b 1131 pos_byte - BEGV_BYTE);
ca1d1d23 1132 if (val == -2)
b6d6a51c
KH
1133 {
1134 matcher_overflow ();
1135 }
ca1d1d23
JB
1136 if (val >= 0)
1137 {
26aff150 1138 pos_byte = search_regs.start[0] + BEGV_BYTE;
4746118a 1139 for (i = 0; i < search_regs.num_regs; i++)
ca1d1d23
JB
1140 if (search_regs.start[i] >= 0)
1141 {
fa8ed3e0
RS
1142 search_regs.start[i]
1143 = BYTE_TO_CHAR (search_regs.start[i] + BEGV_BYTE);
1144 search_regs.end[i]
1145 = BYTE_TO_CHAR (search_regs.end[i] + BEGV_BYTE);
ca1d1d23 1146 }
a3668d92 1147 XSETBUFFER (last_thing_searched, current_buffer);
ca1d1d23
JB
1148 /* Set pos to the new position. */
1149 pos = search_regs.start[0];
1150 }
1151 else
1152 {
1153 immediate_quit = 0;
1154 return (n);
1155 }
1156 n++;
1157 }
1158 while (n > 0)
1159 {
42db823b 1160 int val;
487282dc 1161 val = re_search_2 (bufp, (char *) p1, s1, (char *) p2, s2,
4996330b
KH
1162 pos_byte - BEGV_BYTE, lim_byte - pos_byte,
1163 &search_regs,
1164 lim_byte - BEGV_BYTE);
ca1d1d23 1165 if (val == -2)
b6d6a51c
KH
1166 {
1167 matcher_overflow ();
1168 }
ca1d1d23
JB
1169 if (val >= 0)
1170 {
26aff150 1171 pos_byte = search_regs.end[0] + BEGV_BYTE;
4746118a 1172 for (i = 0; i < search_regs.num_regs; i++)
ca1d1d23
JB
1173 if (search_regs.start[i] >= 0)
1174 {
fa8ed3e0
RS
1175 search_regs.start[i]
1176 = BYTE_TO_CHAR (search_regs.start[i] + BEGV_BYTE);
1177 search_regs.end[i]
1178 = BYTE_TO_CHAR (search_regs.end[i] + BEGV_BYTE);
ca1d1d23 1179 }
a3668d92 1180 XSETBUFFER (last_thing_searched, current_buffer);
ca1d1d23
JB
1181 pos = search_regs.end[0];
1182 }
1183 else
1184 {
1185 immediate_quit = 0;
1186 return (0 - n);
1187 }
1188 n--;
1189 }
1190 immediate_quit = 0;
1191 return (pos);
1192 }
1193 else /* non-RE case */
1194 {
facdc750
RS
1195 unsigned char *raw_pattern, *pat;
1196 int raw_pattern_size;
1197 int raw_pattern_size_byte;
1198 unsigned char *patbuf;
1199 int multibyte = !NILP (current_buffer->enable_multibyte_characters);
a967ed62 1200 unsigned char *base_pat;
fed91c38
KH
1201 /* Set to positive if we find a non-ASCII char that need
1202 translation. Otherwise set to zero later. */
1203 int charset_base = -1;
040272ce 1204 int boyer_moore_ok = 1;
facdc750
RS
1205
1206 /* MULTIBYTE says whether the text to be searched is multibyte.
1207 We must convert PATTERN to match that, or we will not really
1208 find things right. */
1209
1210 if (multibyte == STRING_MULTIBYTE (string))
1211 {
d5db4077
KR
1212 raw_pattern = (unsigned char *) SDATA (string);
1213 raw_pattern_size = SCHARS (string);
1214 raw_pattern_size_byte = SBYTES (string);
facdc750
RS
1215 }
1216 else if (multibyte)
1217 {
d5db4077 1218 raw_pattern_size = SCHARS (string);
facdc750 1219 raw_pattern_size_byte
d5db4077 1220 = count_size_as_multibyte (SDATA (string),
facdc750 1221 raw_pattern_size);
7276d3d8 1222 raw_pattern = (unsigned char *) alloca (raw_pattern_size_byte + 1);
d5db4077
KR
1223 copy_text (SDATA (string), raw_pattern,
1224 SCHARS (string), 0, 1);
facdc750
RS
1225 }
1226 else
1227 {
1228 /* Converting multibyte to single-byte.
1229
1230 ??? Perhaps this conversion should be done in a special way
1231 by subtracting nonascii-insert-offset from each non-ASCII char,
1232 so that only the multibyte chars which really correspond to
1233 the chosen single-byte character set can possibly match. */
d5db4077
KR
1234 raw_pattern_size = SCHARS (string);
1235 raw_pattern_size_byte = SCHARS (string);
7276d3d8 1236 raw_pattern = (unsigned char *) alloca (raw_pattern_size + 1);
d5db4077
KR
1237 copy_text (SDATA (string), raw_pattern,
1238 SBYTES (string), 1, 0);
facdc750
RS
1239 }
1240
1241 /* Copy and optionally translate the pattern. */
1242 len = raw_pattern_size;
1243 len_byte = raw_pattern_size_byte;
1244 patbuf = (unsigned char *) alloca (len_byte);
1245 pat = patbuf;
1246 base_pat = raw_pattern;
1247 if (multibyte)
1248 {
0190922f
KH
1249 /* Fill patbuf by translated characters in STRING while
1250 checking if we can use boyer-moore search. If TRT is
1251 non-nil, we can use boyer-moore search only if TRT can be
1252 represented by the byte array of 256 elements. For that,
1253 all non-ASCII case-equivalents of all case-senstive
1254 characters in STRING must belong to the same charset and
1255 row. */
1256
facdc750
RS
1257 while (--len >= 0)
1258 {
0190922f 1259 unsigned char str_base[MAX_MULTIBYTE_LENGTH], *str;
aff2ce94 1260 int c, translated, inverse;
facdc750
RS
1261 int in_charlen, charlen;
1262
1263 /* If we got here and the RE flag is set, it's because we're
1264 dealing with a regexp known to be trivial, so the backslash
1265 just quotes the next character. */
1266 if (RE && *base_pat == '\\')
1267 {
1268 len--;
0190922f 1269 raw_pattern_size--;
facdc750
RS
1270 len_byte--;
1271 base_pat++;
1272 }
1273
1274 c = STRING_CHAR_AND_LENGTH (base_pat, len_byte, in_charlen);
040272ce 1275
0190922f 1276 if (NILP (trt))
facdc750 1277 {
0190922f
KH
1278 str = base_pat;
1279 charlen = in_charlen;
facdc750 1280 }
0190922f 1281 else
facdc750 1282 {
0190922f
KH
1283 /* Translate the character. */
1284 TRANSLATE (translated, trt, c);
1285 charlen = CHAR_STRING (translated, str_base);
1286 str = str_base;
1287
1288 /* Check if C has any other case-equivalents. */
1289 TRANSLATE (inverse, inverse_trt, c);
1290 /* If so, check if we can use boyer-moore. */
1291 if (c != inverse && boyer_moore_ok)
1292 {
1293 /* Check if all equivalents belong to the same
1294 charset & row. Note that the check of C
1295 itself is done by the last iteration. Note
1296 also that we don't have to check ASCII
1297 characters because boyer-moore search can
1298 always handle their translation. */
1299 while (1)
1300 {
fed91c38 1301 if (ASCII_BYTE_P (inverse))
0190922f 1302 {
fed91c38 1303 if (charset_base > 0)
0190922f
KH
1304 {
1305 boyer_moore_ok = 0;
1306 break;
1307 }
fed91c38
KH
1308 charset_base = 0;
1309 }
1310 else if (SINGLE_BYTE_CHAR_P (inverse))
1311 {
1312 /* Boyer-moore search can't handle a
1313 translation of an eight-bit
1314 character. */
1315 boyer_moore_ok = 0;
1316 break;
1317 }
1318 else if (charset_base < 0)
1319 charset_base = inverse & ~CHAR_FIELD3_MASK;
1320 else if ((inverse & ~CHAR_FIELD3_MASK)
1321 != charset_base)
1322 {
1323 boyer_moore_ok = 0;
1324 break;
0190922f
KH
1325 }
1326 if (c == inverse)
1327 break;
1328 TRANSLATE (inverse, inverse_trt, inverse);
1329 }
1330 }
aff2ce94 1331 }
fed91c38
KH
1332 if (charset_base < 0)
1333 charset_base = 0;
facdc750
RS
1334
1335 /* Store this character into the translated pattern. */
1336 bcopy (str, pat, charlen);
1337 pat += charlen;
1338 base_pat += in_charlen;
1339 len_byte -= in_charlen;
1340 }
1341 }
1342 else
1343 {
040272ce
KH
1344 /* Unibyte buffer. */
1345 charset_base = 0;
facdc750
RS
1346 while (--len >= 0)
1347 {
040272ce 1348 int c, translated;
facdc750
RS
1349
1350 /* If we got here and the RE flag is set, it's because we're
1351 dealing with a regexp known to be trivial, so the backslash
1352 just quotes the next character. */
1353 if (RE && *base_pat == '\\')
1354 {
1355 len--;
0190922f 1356 raw_pattern_size--;
facdc750
RS
1357 base_pat++;
1358 }
1359 c = *base_pat++;
aff2ce94 1360 TRANSLATE (translated, trt, c);
facdc750
RS
1361 *pat++ = translated;
1362 }
1363 }
1364
1365 len_byte = pat - patbuf;
1366 len = raw_pattern_size;
1367 pat = base_pat = patbuf;
1368
040272ce 1369 if (boyer_moore_ok)
facdc750 1370 return boyer_moore (n, pat, len, len_byte, trt, inverse_trt,
aff2ce94
RS
1371 pos, pos_byte, lim, lim_byte,
1372 charset_base);
facdc750
RS
1373 else
1374 return simple_search (n, pat, len, len_byte, trt,
1375 pos, pos_byte, lim, lim_byte);
1376 }
1377}
1378\f
1379/* Do a simple string search N times for the string PAT,
1380 whose length is LEN/LEN_BYTE,
1381 from buffer position POS/POS_BYTE until LIM/LIM_BYTE.
1382 TRT is the translation table.
f8bd51c4 1383
facdc750
RS
1384 Return the character position where the match is found.
1385 Otherwise, if M matches remained to be found, return -M.
f8bd51c4 1386
facdc750
RS
1387 This kind of search works regardless of what is in PAT and
1388 regardless of what is in TRT. It is used in cases where
1389 boyer_moore cannot work. */
1390
1391static int
1392simple_search (n, pat, len, len_byte, trt, pos, pos_byte, lim, lim_byte)
1393 int n;
1394 unsigned char *pat;
1395 int len, len_byte;
1396 Lisp_Object trt;
1397 int pos, pos_byte;
1398 int lim, lim_byte;
1399{
1400 int multibyte = ! NILP (current_buffer->enable_multibyte_characters);
ab228c24 1401 int forward = n > 0;
facdc750
RS
1402
1403 if (lim > pos && multibyte)
1404 while (n > 0)
1405 {
1406 while (1)
f8bd51c4 1407 {
facdc750
RS
1408 /* Try matching at position POS. */
1409 int this_pos = pos;
1410 int this_pos_byte = pos_byte;
1411 int this_len = len;
1412 int this_len_byte = len_byte;
1413 unsigned char *p = pat;
1414 if (pos + len > lim)
1415 goto stop;
1416
1417 while (this_len > 0)
1418 {
1419 int charlen, buf_charlen;
ab228c24 1420 int pat_ch, buf_ch;
facdc750 1421
ab228c24 1422 pat_ch = STRING_CHAR_AND_LENGTH (p, this_len_byte, charlen);
facdc750
RS
1423 buf_ch = STRING_CHAR_AND_LENGTH (BYTE_POS_ADDR (this_pos_byte),
1424 ZV_BYTE - this_pos_byte,
1425 buf_charlen);
aff2ce94 1426 TRANSLATE (buf_ch, trt, buf_ch);
facdc750
RS
1427
1428 if (buf_ch != pat_ch)
1429 break;
ab228c24
RS
1430
1431 this_len_byte -= charlen;
1432 this_len--;
1433 p += charlen;
1434
1435 this_pos_byte += buf_charlen;
1436 this_pos++;
facdc750
RS
1437 }
1438
1439 if (this_len == 0)
1440 {
1441 pos += len;
1442 pos_byte += len_byte;
1443 break;
1444 }
1445
1446 INC_BOTH (pos, pos_byte);
f8bd51c4 1447 }
facdc750
RS
1448
1449 n--;
1450 }
1451 else if (lim > pos)
1452 while (n > 0)
1453 {
1454 while (1)
f8bd51c4 1455 {
facdc750
RS
1456 /* Try matching at position POS. */
1457 int this_pos = pos;
1458 int this_len = len;
1459 unsigned char *p = pat;
1460
1461 if (pos + len > lim)
1462 goto stop;
1463
1464 while (this_len > 0)
1465 {
1466 int pat_ch = *p++;
1467 int buf_ch = FETCH_BYTE (this_pos);
aff2ce94 1468 TRANSLATE (buf_ch, trt, buf_ch);
facdc750
RS
1469
1470 if (buf_ch != pat_ch)
1471 break;
ab228c24
RS
1472
1473 this_len--;
1474 this_pos++;
facdc750
RS
1475 }
1476
1477 if (this_len == 0)
1478 {
1479 pos += len;
1480 break;
1481 }
1482
1483 pos++;
f8bd51c4 1484 }
facdc750
RS
1485
1486 n--;
1487 }
1488 /* Backwards search. */
1489 else if (lim < pos && multibyte)
1490 while (n < 0)
1491 {
1492 while (1)
f8bd51c4 1493 {
facdc750
RS
1494 /* Try matching at position POS. */
1495 int this_pos = pos - len;
1496 int this_pos_byte = pos_byte - len_byte;
1497 int this_len = len;
1498 int this_len_byte = len_byte;
1499 unsigned char *p = pat;
1500
1501 if (pos - len < lim)
1502 goto stop;
1503
1504 while (this_len > 0)
1505 {
1506 int charlen, buf_charlen;
ab228c24 1507 int pat_ch, buf_ch;
facdc750 1508
ab228c24 1509 pat_ch = STRING_CHAR_AND_LENGTH (p, this_len_byte, charlen);
facdc750
RS
1510 buf_ch = STRING_CHAR_AND_LENGTH (BYTE_POS_ADDR (this_pos_byte),
1511 ZV_BYTE - this_pos_byte,
1512 buf_charlen);
aff2ce94 1513 TRANSLATE (buf_ch, trt, buf_ch);
facdc750
RS
1514
1515 if (buf_ch != pat_ch)
1516 break;
ab228c24
RS
1517
1518 this_len_byte -= charlen;
1519 this_len--;
1520 p += charlen;
1521 this_pos_byte += buf_charlen;
1522 this_pos++;
facdc750
RS
1523 }
1524
1525 if (this_len == 0)
1526 {
1527 pos -= len;
1528 pos_byte -= len_byte;
1529 break;
1530 }
1531
1532 DEC_BOTH (pos, pos_byte);
f8bd51c4
KH
1533 }
1534
facdc750
RS
1535 n++;
1536 }
1537 else if (lim < pos)
1538 while (n < 0)
1539 {
1540 while (1)
b6d6a51c 1541 {
facdc750
RS
1542 /* Try matching at position POS. */
1543 int this_pos = pos - len;
1544 int this_len = len;
1545 unsigned char *p = pat;
1546
1547 if (pos - len < lim)
1548 goto stop;
1549
1550 while (this_len > 0)
1551 {
1552 int pat_ch = *p++;
1553 int buf_ch = FETCH_BYTE (this_pos);
aff2ce94 1554 TRANSLATE (buf_ch, trt, buf_ch);
facdc750
RS
1555
1556 if (buf_ch != pat_ch)
1557 break;
ab228c24
RS
1558 this_len--;
1559 this_pos++;
facdc750
RS
1560 }
1561
1562 if (this_len == 0)
b6d6a51c 1563 {
facdc750
RS
1564 pos -= len;
1565 break;
b6d6a51c 1566 }
facdc750
RS
1567
1568 pos--;
b6d6a51c 1569 }
facdc750
RS
1570
1571 n++;
b6d6a51c 1572 }
facdc750
RS
1573
1574 stop:
1575 if (n == 0)
aff2ce94 1576 {
ab228c24
RS
1577 if (forward)
1578 set_search_regs ((multibyte ? pos_byte : pos) - len_byte, len_byte);
1579 else
1580 set_search_regs (multibyte ? pos_byte : pos, len_byte);
aff2ce94
RS
1581
1582 return pos;
1583 }
facdc750
RS
1584 else if (n > 0)
1585 return -n;
1586 else
1587 return n;
1588}
1589\f
0190922f 1590/* Do Boyer-Moore search N times for the string BASE_PAT,
facdc750
RS
1591 whose length is LEN/LEN_BYTE,
1592 from buffer position POS/POS_BYTE until LIM/LIM_BYTE.
1593 DIRECTION says which direction we search in.
1594 TRT and INVERSE_TRT are translation tables.
0190922f 1595 Characters in PAT are already translated by TRT.
facdc750 1596
0190922f
KH
1597 This kind of search works if all the characters in BASE_PAT that
1598 have nontrivial translation are the same aside from the last byte.
1599 This makes it possible to translate just the last byte of a
1600 character, and do so after just a simple test of the context.
1601 CHARSET_BASE is nonzero iff there is such a non-ASCII character.
facdc750
RS
1602
1603 If that criterion is not satisfied, do not call this function. */
1604
1605static int
1606boyer_moore (n, base_pat, len, len_byte, trt, inverse_trt,
aff2ce94 1607 pos, pos_byte, lim, lim_byte, charset_base)
facdc750
RS
1608 int n;
1609 unsigned char *base_pat;
1610 int len, len_byte;
1611 Lisp_Object trt;
1612 Lisp_Object inverse_trt;
1613 int pos, pos_byte;
1614 int lim, lim_byte;
aff2ce94 1615 int charset_base;
facdc750
RS
1616{
1617 int direction = ((n > 0) ? 1 : -1);
1618 register int dirlen;
a968f437 1619 int infinity, limit, stride_for_teases = 0;
facdc750
RS
1620 register int *BM_tab;
1621 int *BM_tab_base;
177c0ea7 1622 register unsigned char *cursor, *p_limit;
facdc750 1623 register int i, j;
cb6792d2 1624 unsigned char *pat, *pat_end;
facdc750
RS
1625 int multibyte = ! NILP (current_buffer->enable_multibyte_characters);
1626
1627 unsigned char simple_translate[0400];
0190922f
KH
1628 /* These are set to the preceding bytes of a byte to be translated
1629 if charset_base is nonzero. As the maximum byte length of a
1630 multibyte character is 4, we have to check at most three previous
1631 bytes. */
1632 int translate_prev_byte1 = 0;
1633 int translate_prev_byte2 = 0;
1634 int translate_prev_byte3 = 0;
facdc750
RS
1635
1636#ifdef C_ALLOCA
1637 int BM_tab_space[0400];
1638 BM_tab = &BM_tab_space[0];
1639#else
1640 BM_tab = (int *) alloca (0400 * sizeof (int));
1641#endif
1642 /* The general approach is that we are going to maintain that we know */
1643 /* the first (closest to the present position, in whatever direction */
1644 /* we're searching) character that could possibly be the last */
1645 /* (furthest from present position) character of a valid match. We */
1646 /* advance the state of our knowledge by looking at that character */
1647 /* and seeing whether it indeed matches the last character of the */
1648 /* pattern. If it does, we take a closer look. If it does not, we */
1649 /* move our pointer (to putative last characters) as far as is */
1650 /* logically possible. This amount of movement, which I call a */
1651 /* stride, will be the length of the pattern if the actual character */
1652 /* appears nowhere in the pattern, otherwise it will be the distance */
1653 /* from the last occurrence of that character to the end of the */
1654 /* pattern. */
1655 /* As a coding trick, an enormous stride is coded into the table for */
1656 /* characters that match the last character. This allows use of only */
1657 /* a single test, a test for having gone past the end of the */
1658 /* permissible match region, to test for both possible matches (when */
1659 /* the stride goes past the end immediately) and failure to */
177c0ea7 1660 /* match (where you get nudged past the end one stride at a time). */
facdc750
RS
1661
1662 /* Here we make a "mickey mouse" BM table. The stride of the search */
1663 /* is determined only by the last character of the putative match. */
1664 /* If that character does not match, we will stride the proper */
1665 /* distance to propose a match that superimposes it on the last */
1666 /* instance of a character that matches it (per trt), or misses */
177c0ea7 1667 /* it entirely if there is none. */
facdc750
RS
1668
1669 dirlen = len_byte * direction;
1670 infinity = dirlen - (lim_byte + pos_byte + len_byte + len_byte) * direction;
cb6792d2
RS
1671
1672 /* Record position after the end of the pattern. */
1673 pat_end = base_pat + len_byte;
1674 /* BASE_PAT points to a character that we start scanning from.
1675 It is the first character in a forward search,
1676 the last character in a backward search. */
facdc750 1677 if (direction < 0)
cb6792d2
RS
1678 base_pat = pat_end - 1;
1679
facdc750
RS
1680 BM_tab_base = BM_tab;
1681 BM_tab += 0400;
1682 j = dirlen; /* to get it in a register */
1683 /* A character that does not appear in the pattern induces a */
1684 /* stride equal to the pattern length. */
1685 while (BM_tab_base != BM_tab)
1686 {
1687 *--BM_tab = j;
1688 *--BM_tab = j;
1689 *--BM_tab = j;
1690 *--BM_tab = j;
1691 }
1692
1693 /* We use this for translation, instead of TRT itself.
1694 We fill this in to handle the characters that actually
1695 occur in the pattern. Others don't matter anyway! */
1696 bzero (simple_translate, sizeof simple_translate);
1697 for (i = 0; i < 0400; i++)
1698 simple_translate[i] = i;
1699
0190922f
KH
1700 if (charset_base)
1701 {
1702 /* Setup translate_prev_byte1/2/3 from CHARSET_BASE. Only a
1703 byte following them are the target of translation. */
1704 int sample_char = charset_base | 0x20;
1705 unsigned char str[MAX_MULTIBYTE_LENGTH];
1706 int len = CHAR_STRING (sample_char, str);
1707
1708 translate_prev_byte1 = str[len - 2];
1709 if (len > 2)
1710 {
1711 translate_prev_byte2 = str[len - 3];
1712 if (len > 3)
1713 translate_prev_byte3 = str[len - 4];
1714 }
1715 }
1716
facdc750
RS
1717 i = 0;
1718 while (i != infinity)
1719 {
cb6792d2 1720 unsigned char *ptr = base_pat + i;
facdc750
RS
1721 i += direction;
1722 if (i == dirlen)
1723 i = infinity;
1724 if (! NILP (trt))
ca1d1d23 1725 {
620a5178
KH
1726 /* If the byte currently looking at is the last of a
1727 character to check case-equivalents, set CH to that
1728 character. An ASCII character and a non-ASCII character
1729 matching with CHARSET_BASE are to be checked. */
0190922f
KH
1730 int ch = -1;
1731
1732 if (ASCII_BYTE_P (*ptr) || ! multibyte)
1733 ch = *ptr;
620a5178 1734 else if (charset_base
86dc6ccb 1735 && ((pat_end - ptr) == 1 || CHAR_HEAD_P (ptr[1])))
ca1d1d23 1736 {
620a5178
KH
1737 unsigned char *charstart = ptr - 1;
1738
1739 while (! (CHAR_HEAD_P (*charstart)))
1740 charstart--;
1741 ch = STRING_CHAR (charstart, ptr - charstart + 1);
0190922f
KH
1742 if (charset_base != (ch & ~CHAR_FIELD3_MASK))
1743 ch = -1;
ca1d1d23 1744 }
facdc750 1745
6786a107 1746 if (ch >= 0400)
620a5178
KH
1747 j = ((unsigned char) ch) | 0200;
1748 else
1749 j = *ptr;
1750
facdc750
RS
1751 if (i == infinity)
1752 stride_for_teases = BM_tab[j];
ab228c24 1753
facdc750
RS
1754 BM_tab[j] = dirlen - i;
1755 /* A translation table is accompanied by its inverse -- see */
177c0ea7 1756 /* comment following downcase_table for details */
0190922f 1757 if (ch >= 0)
ab228c24
RS
1758 {
1759 int starting_ch = ch;
620a5178 1760 int starting_j = j;
0190922f 1761
ab228c24
RS
1762 while (1)
1763 {
1764 TRANSLATE (ch, inverse_trt, ch);
6786a107 1765 if (ch >= 0400)
ab228c24
RS
1766 j = ((unsigned char) ch) | 0200;
1767 else
1768 j = (unsigned char) ch;
1769
1770 /* For all the characters that map into CH,
1771 set up simple_translate to map the last byte
1772 into STARTING_J. */
1773 simple_translate[j] = starting_j;
1774 if (ch == starting_ch)
1775 break;
1776 BM_tab[j] = dirlen - i;
1777 }
1778 }
facdc750
RS
1779 }
1780 else
1781 {
1782 j = *ptr;
1783
1784 if (i == infinity)
1785 stride_for_teases = BM_tab[j];
1786 BM_tab[j] = dirlen - i;
ca1d1d23 1787 }
facdc750
RS
1788 /* stride_for_teases tells how much to stride if we get a */
1789 /* match on the far character but are subsequently */
1790 /* disappointed, by recording what the stride would have been */
1791 /* for that character if the last character had been */
1792 /* different. */
1793 }
1794 infinity = dirlen - infinity;
1795 pos_byte += dirlen - ((direction > 0) ? direction : 0);
1796 /* loop invariant - POS_BYTE points at where last char (first
1797 char if reverse) of pattern would align in a possible match. */
1798 while (n != 0)
1799 {
1800 int tail_end;
1801 unsigned char *tail_end_ptr;
1802
1803 /* It's been reported that some (broken) compiler thinks that
1804 Boolean expressions in an arithmetic context are unsigned.
1805 Using an explicit ?1:0 prevents this. */
1806 if ((lim_byte - pos_byte - ((direction > 0) ? 1 : 0)) * direction
1807 < 0)
1808 return (n * (0 - direction));
1809 /* First we do the part we can by pointers (maybe nothing) */
1810 QUIT;
1811 pat = base_pat;
1812 limit = pos_byte - dirlen + direction;
67ce527d
KH
1813 if (direction > 0)
1814 {
1815 limit = BUFFER_CEILING_OF (limit);
1816 /* LIMIT is now the last (not beyond-last!) value POS_BYTE
1817 can take on without hitting edge of buffer or the gap. */
1818 limit = min (limit, pos_byte + 20000);
1819 limit = min (limit, lim_byte - 1);
1820 }
1821 else
1822 {
1823 limit = BUFFER_FLOOR_OF (limit);
1824 /* LIMIT is now the last (not beyond-last!) value POS_BYTE
1825 can take on without hitting edge of buffer or the gap. */
1826 limit = max (limit, pos_byte - 20000);
1827 limit = max (limit, lim_byte);
1828 }
facdc750
RS
1829 tail_end = BUFFER_CEILING_OF (pos_byte) + 1;
1830 tail_end_ptr = BYTE_POS_ADDR (tail_end);
1831
1832 if ((limit - pos_byte) * direction > 20)
ca1d1d23 1833 {
facdc750
RS
1834 unsigned char *p2;
1835
1836 p_limit = BYTE_POS_ADDR (limit);
1837 p2 = (cursor = BYTE_POS_ADDR (pos_byte));
1838 /* In this loop, pos + cursor - p2 is the surrogate for pos */
1839 while (1) /* use one cursor setting as long as i can */
ca1d1d23 1840 {
facdc750 1841 if (direction > 0) /* worth duplicating */
ca1d1d23 1842 {
facdc750
RS
1843 /* Use signed comparison if appropriate
1844 to make cursor+infinity sure to be > p_limit.
1845 Assuming that the buffer lies in a range of addresses
1846 that are all "positive" (as ints) or all "negative",
1847 either kind of comparison will work as long
1848 as we don't step by infinity. So pick the kind
1849 that works when we do step by infinity. */
1850 if ((EMACS_INT) (p_limit + infinity) > (EMACS_INT) p_limit)
1851 while ((EMACS_INT) cursor <= (EMACS_INT) p_limit)
1852 cursor += BM_tab[*cursor];
ca1d1d23 1853 else
facdc750
RS
1854 while ((EMACS_UINT) cursor <= (EMACS_UINT) p_limit)
1855 cursor += BM_tab[*cursor];
1856 }
1857 else
1858 {
1859 if ((EMACS_INT) (p_limit + infinity) < (EMACS_INT) p_limit)
1860 while ((EMACS_INT) cursor >= (EMACS_INT) p_limit)
1861 cursor += BM_tab[*cursor];
1862 else
1863 while ((EMACS_UINT) cursor >= (EMACS_UINT) p_limit)
1864 cursor += BM_tab[*cursor];
1865 }
ca1d1d23 1866/* If you are here, cursor is beyond the end of the searched region. */
facdc750
RS
1867/* This can happen if you match on the far character of the pattern, */
1868/* because the "stride" of that character is infinity, a number able */
1869/* to throw you well beyond the end of the search. It can also */
1870/* happen if you fail to match within the permitted region and would */
1871/* otherwise try a character beyond that region */
1872 if ((cursor - p_limit) * direction <= len_byte)
1873 break; /* a small overrun is genuine */
1874 cursor -= infinity; /* large overrun = hit */
1875 i = dirlen - direction;
1876 if (! NILP (trt))
1877 {
1878 while ((i -= direction) + direction != 0)
ca1d1d23 1879 {
facdc750
RS
1880 int ch;
1881 cursor -= direction;
1882 /* Translate only the last byte of a character. */
1883 if (! multibyte
1884 || ((cursor == tail_end_ptr
1885 || CHAR_HEAD_P (cursor[1]))
1886 && (CHAR_HEAD_P (cursor[0])
0190922f
KH
1887 /* Check if this is the last byte of
1888 a translable character. */
1889 || (translate_prev_byte1 == cursor[-1]
1890 && (CHAR_HEAD_P (translate_prev_byte1)
1891 || (translate_prev_byte2 == cursor[-2]
1892 && (CHAR_HEAD_P (translate_prev_byte2)
1893 || (translate_prev_byte3 == cursor[-3]))))))))
facdc750
RS
1894 ch = simple_translate[*cursor];
1895 else
1896 ch = *cursor;
1897 if (pat[i] != ch)
1898 break;
ca1d1d23 1899 }
facdc750
RS
1900 }
1901 else
1902 {
1903 while ((i -= direction) + direction != 0)
ca1d1d23 1904 {
facdc750
RS
1905 cursor -= direction;
1906 if (pat[i] != *cursor)
1907 break;
ca1d1d23 1908 }
facdc750
RS
1909 }
1910 cursor += dirlen - i - direction; /* fix cursor */
1911 if (i + direction == 0)
1912 {
1913 int position;
0c8533c6 1914
facdc750 1915 cursor -= direction;
1113d9db 1916
facdc750
RS
1917 position = pos_byte + cursor - p2 + ((direction > 0)
1918 ? 1 - len_byte : 0);
1919 set_search_regs (position, len_byte);
ca325161 1920
facdc750
RS
1921 if ((n -= direction) != 0)
1922 cursor += dirlen; /* to resume search */
ca1d1d23 1923 else
facdc750
RS
1924 return ((direction > 0)
1925 ? search_regs.end[0] : search_regs.start[0]);
ca1d1d23 1926 }
facdc750
RS
1927 else
1928 cursor += stride_for_teases; /* <sigh> we lose - */
ca1d1d23 1929 }
facdc750
RS
1930 pos_byte += cursor - p2;
1931 }
1932 else
1933 /* Now we'll pick up a clump that has to be done the hard */
1934 /* way because it covers a discontinuity */
1935 {
1936 limit = ((direction > 0)
1937 ? BUFFER_CEILING_OF (pos_byte - dirlen + 1)
1938 : BUFFER_FLOOR_OF (pos_byte - dirlen - 1));
1939 limit = ((direction > 0)
1940 ? min (limit + len_byte, lim_byte - 1)
1941 : max (limit - len_byte, lim_byte));
1942 /* LIMIT is now the last value POS_BYTE can have
1943 and still be valid for a possible match. */
1944 while (1)
ca1d1d23 1945 {
facdc750
RS
1946 /* This loop can be coded for space rather than */
1947 /* speed because it will usually run only once. */
1948 /* (the reach is at most len + 21, and typically */
177c0ea7 1949 /* does not exceed len) */
facdc750
RS
1950 while ((limit - pos_byte) * direction >= 0)
1951 pos_byte += BM_tab[FETCH_BYTE (pos_byte)];
1952 /* now run the same tests to distinguish going off the */
1953 /* end, a match or a phony match. */
1954 if ((pos_byte - limit) * direction <= len_byte)
1955 break; /* ran off the end */
1956 /* Found what might be a match.
1957 Set POS_BYTE back to last (first if reverse) pos. */
1958 pos_byte -= infinity;
1959 i = dirlen - direction;
1960 while ((i -= direction) + direction != 0)
ca1d1d23 1961 {
facdc750
RS
1962 int ch;
1963 unsigned char *ptr;
1964 pos_byte -= direction;
1965 ptr = BYTE_POS_ADDR (pos_byte);
1966 /* Translate only the last byte of a character. */
1967 if (! multibyte
1968 || ((ptr == tail_end_ptr
1969 || CHAR_HEAD_P (ptr[1]))
1970 && (CHAR_HEAD_P (ptr[0])
0190922f
KH
1971 /* Check if this is the last byte of a
1972 translable character. */
1973 || (translate_prev_byte1 == ptr[-1]
1974 && (CHAR_HEAD_P (translate_prev_byte1)
1975 || (translate_prev_byte2 == ptr[-2]
1976 && (CHAR_HEAD_P (translate_prev_byte2)
1977 || translate_prev_byte3 == ptr[-3])))))))
facdc750
RS
1978 ch = simple_translate[*ptr];
1979 else
1980 ch = *ptr;
1981 if (pat[i] != ch)
1982 break;
1983 }
1984 /* Above loop has moved POS_BYTE part or all the way
1985 back to the first pos (last pos if reverse).
1986 Set it once again at the last (first if reverse) char. */
1987 pos_byte += dirlen - i- direction;
1988 if (i + direction == 0)
1989 {
1990 int position;
1991 pos_byte -= direction;
1113d9db 1992
facdc750 1993 position = pos_byte + ((direction > 0) ? 1 - len_byte : 0);
0c8533c6 1994
facdc750 1995 set_search_regs (position, len_byte);
ca325161 1996
facdc750
RS
1997 if ((n -= direction) != 0)
1998 pos_byte += dirlen; /* to resume search */
ca1d1d23 1999 else
facdc750
RS
2000 return ((direction > 0)
2001 ? search_regs.end[0] : search_regs.start[0]);
ca1d1d23 2002 }
facdc750
RS
2003 else
2004 pos_byte += stride_for_teases;
2005 }
2006 }
2007 /* We have done one clump. Can we continue? */
2008 if ((lim_byte - pos_byte) * direction < 0)
2009 return ((0 - n) * direction);
ca1d1d23 2010 }
facdc750 2011 return BYTE_TO_CHAR (pos_byte);
ca1d1d23 2012}
ca325161 2013
fa8ed3e0 2014/* Record beginning BEG_BYTE and end BEG_BYTE + NBYTES
a7e4cdde
RS
2015 for the overall match just found in the current buffer.
2016 Also clear out the match data for registers 1 and up. */
ca325161
RS
2017
2018static void
fa8ed3e0
RS
2019set_search_regs (beg_byte, nbytes)
2020 int beg_byte, nbytes;
ca325161 2021{
a7e4cdde
RS
2022 int i;
2023
ca325161
RS
2024 /* Make sure we have registers in which to store
2025 the match position. */
2026 if (search_regs.num_regs == 0)
2027 {
2d4a771a
RS
2028 search_regs.start = (regoff_t *) xmalloc (2 * sizeof (regoff_t));
2029 search_regs.end = (regoff_t *) xmalloc (2 * sizeof (regoff_t));
487282dc 2030 search_regs.num_regs = 2;
ca325161
RS
2031 }
2032
a7e4cdde
RS
2033 /* Clear out the other registers. */
2034 for (i = 1; i < search_regs.num_regs; i++)
2035 {
2036 search_regs.start[i] = -1;
2037 search_regs.end[i] = -1;
2038 }
2039
fa8ed3e0
RS
2040 search_regs.start[0] = BYTE_TO_CHAR (beg_byte);
2041 search_regs.end[0] = BYTE_TO_CHAR (beg_byte + nbytes);
a3668d92 2042 XSETBUFFER (last_thing_searched, current_buffer);
ca325161 2043}
ca1d1d23
JB
2044\f
2045/* Given a string of words separated by word delimiters,
2046 compute a regexp that matches those exact words
2047 separated by arbitrary punctuation. */
2048
2049static Lisp_Object
2050wordify (string)
2051 Lisp_Object string;
2052{
2053 register unsigned char *p, *o;
0c8533c6 2054 register int i, i_byte, len, punct_count = 0, word_count = 0;
ca1d1d23 2055 Lisp_Object val;
0c8533c6
RS
2056 int prev_c = 0;
2057 int adjust;
ca1d1d23 2058
b7826503 2059 CHECK_STRING (string);
d5db4077
KR
2060 p = SDATA (string);
2061 len = SCHARS (string);
ca1d1d23 2062
0c8533c6
RS
2063 for (i = 0, i_byte = 0; i < len; )
2064 {
2065 int c;
177c0ea7 2066
eb99a8dd 2067 FETCH_STRING_CHAR_ADVANCE (c, string, i, i_byte);
0c8533c6
RS
2068
2069 if (SYNTAX (c) != Sword)
2070 {
2071 punct_count++;
2072 if (i > 0 && SYNTAX (prev_c) == Sword)
2073 word_count++;
2074 }
ca1d1d23 2075
0c8533c6
RS
2076 prev_c = c;
2077 }
2078
2079 if (SYNTAX (prev_c) == Sword)
2080 word_count++;
2081 if (!word_count)
b07b65aa 2082 return empty_string;
0c8533c6
RS
2083
2084 adjust = - punct_count + 5 * (word_count - 1) + 4;
8a2df937
RS
2085 if (STRING_MULTIBYTE (string))
2086 val = make_uninit_multibyte_string (len + adjust,
d5db4077 2087 SBYTES (string)
8a2df937
RS
2088 + adjust);
2089 else
2090 val = make_uninit_string (len + adjust);
ca1d1d23 2091
d5db4077 2092 o = SDATA (val);
ca1d1d23
JB
2093 *o++ = '\\';
2094 *o++ = 'b';
1e9582d4 2095 prev_c = 0;
ca1d1d23 2096
1e9582d4
RS
2097 for (i = 0, i_byte = 0; i < len; )
2098 {
2099 int c;
2100 int i_byte_orig = i_byte;
177c0ea7 2101
eb99a8dd 2102 FETCH_STRING_CHAR_ADVANCE (c, string, i, i_byte);
1e9582d4
RS
2103
2104 if (SYNTAX (c) == Sword)
2105 {
5d69fe10 2106 bcopy (SDATA (string) + i_byte_orig, o,
1e9582d4
RS
2107 i_byte - i_byte_orig);
2108 o += i_byte - i_byte_orig;
2109 }
2110 else if (i > 0 && SYNTAX (prev_c) == Sword && --word_count)
2111 {
2112 *o++ = '\\';
2113 *o++ = 'W';
2114 *o++ = '\\';
2115 *o++ = 'W';
2116 *o++ = '*';
2117 }
2118
2119 prev_c = c;
2120 }
ca1d1d23
JB
2121
2122 *o++ = '\\';
2123 *o++ = 'b';
2124
2125 return val;
2126}
2127\f
2128DEFUN ("search-backward", Fsearch_backward, Ssearch_backward, 1, 4,
8c1a1077
PJ
2129 "MSearch backward: ",
2130 doc: /* Search backward from point for STRING.
2131Set point to the beginning of the occurrence found, and return point.
2132An optional second argument bounds the search; it is a buffer position.
2133The match found must not extend before that position.
2134Optional third argument, if t, means if fail just return nil (no error).
2135 If not nil and not t, position at limit of search and return nil.
2136Optional fourth argument is repeat count--search for successive occurrences.
2137
2138Search case-sensitivity is determined by the value of the variable
2139`case-fold-search', which see.
2140
2141See also the functions `match-beginning', `match-end' and `replace-match'. */)
2142 (string, bound, noerror, count)
ca1d1d23
JB
2143 Lisp_Object string, bound, noerror, count;
2144{
b819a390 2145 return search_command (string, bound, noerror, count, -1, 0, 0);
ca1d1d23
JB
2146}
2147
6af43974 2148DEFUN ("search-forward", Fsearch_forward, Ssearch_forward, 1, 4, "MSearch: ",
8c1a1077
PJ
2149 doc: /* Search forward from point for STRING.
2150Set point to the end of the occurrence found, and return point.
2151An optional second argument bounds the search; it is a buffer position.
2152The match found must not extend after that position. nil is equivalent
2153 to (point-max).
2154Optional third argument, if t, means if fail just return nil (no error).
2155 If not nil and not t, move to limit of search and return nil.
2156Optional fourth argument is repeat count--search for successive occurrences.
2157
2158Search case-sensitivity is determined by the value of the variable
2159`case-fold-search', which see.
2160
2161See also the functions `match-beginning', `match-end' and `replace-match'. */)
2162 (string, bound, noerror, count)
ca1d1d23
JB
2163 Lisp_Object string, bound, noerror, count;
2164{
b819a390 2165 return search_command (string, bound, noerror, count, 1, 0, 0);
ca1d1d23
JB
2166}
2167
2168DEFUN ("word-search-backward", Fword_search_backward, Sword_search_backward, 1, 4,
8c1a1077
PJ
2169 "sWord search backward: ",
2170 doc: /* Search backward from point for STRING, ignoring differences in punctuation.
2171Set point to the beginning of the occurrence found, and return point.
2172An optional second argument bounds the search; it is a buffer position.
2173The match found must not extend before that position.
2174Optional third argument, if t, means if fail just return nil (no error).
2175 If not nil and not t, move to limit of search and return nil.
2176Optional fourth argument is repeat count--search for successive occurrences. */)
2177 (string, bound, noerror, count)
ca1d1d23
JB
2178 Lisp_Object string, bound, noerror, count;
2179{
b819a390 2180 return search_command (wordify (string), bound, noerror, count, -1, 1, 0);
ca1d1d23
JB
2181}
2182
2183DEFUN ("word-search-forward", Fword_search_forward, Sword_search_forward, 1, 4,
8c1a1077
PJ
2184 "sWord search: ",
2185 doc: /* Search forward from point for STRING, ignoring differences in punctuation.
2186Set point to the end of the occurrence found, and return point.
2187An optional second argument bounds the search; it is a buffer position.
2188The match found must not extend after that position.
2189Optional third argument, if t, means if fail just return nil (no error).
2190 If not nil and not t, move to limit of search and return nil.
2191Optional fourth argument is repeat count--search for successive occurrences. */)
2192 (string, bound, noerror, count)
ca1d1d23
JB
2193 Lisp_Object string, bound, noerror, count;
2194{
b819a390 2195 return search_command (wordify (string), bound, noerror, count, 1, 1, 0);
ca1d1d23
JB
2196}
2197
2198DEFUN ("re-search-backward", Fre_search_backward, Sre_search_backward, 1, 4,
8c1a1077
PJ
2199 "sRE search backward: ",
2200 doc: /* Search backward from point for match for regular expression REGEXP.
2201Set point to the beginning of the match, and return point.
2202The match found is the one starting last in the buffer
2203and yet ending before the origin of the search.
2204An optional second argument bounds the search; it is a buffer position.
2205The match found must start at or after that position.
2206Optional third argument, if t, means if fail just return nil (no error).
2207 If not nil and not t, move to limit of search and return nil.
2208Optional fourth argument is repeat count--search for successive occurrences.
2209See also the functions `match-beginning', `match-end', `match-string',
2210and `replace-match'. */)
2211 (regexp, bound, noerror, count)
19c0a730 2212 Lisp_Object regexp, bound, noerror, count;
ca1d1d23 2213{
b819a390 2214 return search_command (regexp, bound, noerror, count, -1, 1, 0);
ca1d1d23
JB
2215}
2216
2217DEFUN ("re-search-forward", Fre_search_forward, Sre_search_forward, 1, 4,
8c1a1077
PJ
2218 "sRE search: ",
2219 doc: /* Search forward from point for regular expression REGEXP.
2220Set point to the end of the occurrence found, and return point.
2221An optional second argument bounds the search; it is a buffer position.
2222The match found must not extend after that position.
2223Optional third argument, if t, means if fail just return nil (no error).
2224 If not nil and not t, move to limit of search and return nil.
2225Optional fourth argument is repeat count--search for successive occurrences.
2226See also the functions `match-beginning', `match-end', `match-string',
2227and `replace-match'. */)
2228 (regexp, bound, noerror, count)
19c0a730 2229 Lisp_Object regexp, bound, noerror, count;
ca1d1d23 2230{
b819a390
RS
2231 return search_command (regexp, bound, noerror, count, 1, 1, 0);
2232}
2233
2234DEFUN ("posix-search-backward", Fposix_search_backward, Sposix_search_backward, 1, 4,
8c1a1077
PJ
2235 "sPosix search backward: ",
2236 doc: /* Search backward from point for match for regular expression REGEXP.
2237Find the longest match in accord with Posix regular expression rules.
2238Set point to the beginning of the match, and return point.
2239The match found is the one starting last in the buffer
2240and yet ending before the origin of the search.
2241An optional second argument bounds the search; it is a buffer position.
2242The match found must start at or after that position.
2243Optional third argument, if t, means if fail just return nil (no error).
2244 If not nil and not t, move to limit of search and return nil.
2245Optional fourth argument is repeat count--search for successive occurrences.
2246See also the functions `match-beginning', `match-end', `match-string',
2247and `replace-match'. */)
2248 (regexp, bound, noerror, count)
b819a390
RS
2249 Lisp_Object regexp, bound, noerror, count;
2250{
2251 return search_command (regexp, bound, noerror, count, -1, 1, 1);
2252}
2253
2254DEFUN ("posix-search-forward", Fposix_search_forward, Sposix_search_forward, 1, 4,
8c1a1077
PJ
2255 "sPosix search: ",
2256 doc: /* Search forward from point for regular expression REGEXP.
2257Find the longest match in accord with Posix regular expression rules.
2258Set point to the end of the occurrence found, and return point.
2259An optional second argument bounds the search; it is a buffer position.
2260The match found must not extend after that position.
2261Optional third argument, if t, means if fail just return nil (no error).
2262 If not nil and not t, move to limit of search and return nil.
2263Optional fourth argument is repeat count--search for successive occurrences.
2264See also the functions `match-beginning', `match-end', `match-string',
2265and `replace-match'. */)
2266 (regexp, bound, noerror, count)
b819a390
RS
2267 Lisp_Object regexp, bound, noerror, count;
2268{
2269 return search_command (regexp, bound, noerror, count, 1, 1, 1);
ca1d1d23
JB
2270}
2271\f
d7a5ad5f 2272DEFUN ("replace-match", Freplace_match, Sreplace_match, 1, 5, 0,
8c1a1077 2273 doc: /* Replace text matched by last search with NEWTEXT.
4dd0c271
RS
2274Leave point at the end of the replacement text.
2275
8c1a1077
PJ
2276If second arg FIXEDCASE is non-nil, do not alter case of replacement text.
2277Otherwise maybe capitalize the whole text, or maybe just word initials,
2278based on the replaced text.
2279If the replaced text has only capital letters
2280and has at least one multiletter word, convert NEWTEXT to all caps.
4dd0c271
RS
2281Otherwise if all words are capitalized in the replaced text,
2282capitalize each word in NEWTEXT.
8c1a1077
PJ
2283
2284If third arg LITERAL is non-nil, insert NEWTEXT literally.
2285Otherwise treat `\\' as special:
2286 `\\&' in NEWTEXT means substitute original matched text.
2287 `\\N' means substitute what matched the Nth `\\(...\\)'.
2288 If Nth parens didn't match, substitute nothing.
2289 `\\\\' means insert one `\\'.
4dd0c271
RS
2290Case conversion does not apply to these substitutions.
2291
8c1a1077 2292FIXEDCASE and LITERAL are optional arguments.
8c1a1077
PJ
2293
2294The optional fourth argument STRING can be a string to modify.
2295This is meaningful when the previous match was done against STRING,
2296using `string-match'. When used this way, `replace-match'
2297creates and returns a new string made by copying STRING and replacing
2298the part of STRING that was matched.
2299
2300The optional fifth argument SUBEXP specifies a subexpression;
2301it says to replace just that subexpression with NEWTEXT,
2302rather than replacing the entire matched text.
2303This is, in a vague sense, the inverse of using `\\N' in NEWTEXT;
2304`\\N' copies subexp N into NEWTEXT, but using N as SUBEXP puts
2305NEWTEXT in place of subexp N.
2306This is useful only after a regular expression search or match,
2307since only regular expressions have distinguished subexpressions. */)
2308 (newtext, fixedcase, literal, string, subexp)
d7a5ad5f 2309 Lisp_Object newtext, fixedcase, literal, string, subexp;
ca1d1d23
JB
2310{
2311 enum { nochange, all_caps, cap_initial } case_action;
ac3b28b1 2312 register int pos, pos_byte;
ca1d1d23 2313 int some_multiletter_word;
97832bd0 2314 int some_lowercase;
73dc8771 2315 int some_uppercase;
208767c3 2316 int some_nonuppercase_initial;
ca1d1d23 2317 register int c, prevc;
d7a5ad5f 2318 int sub;
3e18eecf 2319 int opoint, newpoint;
ca1d1d23 2320
b7826503 2321 CHECK_STRING (newtext);
ca1d1d23 2322
080c45fd 2323 if (! NILP (string))
b7826503 2324 CHECK_STRING (string);
080c45fd 2325
ca1d1d23
JB
2326 case_action = nochange; /* We tried an initialization */
2327 /* but some C compilers blew it */
4746118a
JB
2328
2329 if (search_regs.num_regs <= 0)
d72cdbfc 2330 error ("`replace-match' called before any match found");
4746118a 2331
d7a5ad5f
RS
2332 if (NILP (subexp))
2333 sub = 0;
2334 else
2335 {
b7826503 2336 CHECK_NUMBER (subexp);
d7a5ad5f
RS
2337 sub = XINT (subexp);
2338 if (sub < 0 || sub >= search_regs.num_regs)
2339 args_out_of_range (subexp, make_number (search_regs.num_regs));
2340 }
2341
080c45fd
RS
2342 if (NILP (string))
2343 {
d7a5ad5f
RS
2344 if (search_regs.start[sub] < BEGV
2345 || search_regs.start[sub] > search_regs.end[sub]
2346 || search_regs.end[sub] > ZV)
2347 args_out_of_range (make_number (search_regs.start[sub]),
2348 make_number (search_regs.end[sub]));
080c45fd
RS
2349 }
2350 else
2351 {
d7a5ad5f
RS
2352 if (search_regs.start[sub] < 0
2353 || search_regs.start[sub] > search_regs.end[sub]
d5db4077 2354 || search_regs.end[sub] > SCHARS (string))
d7a5ad5f
RS
2355 args_out_of_range (make_number (search_regs.start[sub]),
2356 make_number (search_regs.end[sub]));
080c45fd 2357 }
ca1d1d23
JB
2358
2359 if (NILP (fixedcase))
2360 {
2361 /* Decide how to casify by examining the matched text. */
ac3b28b1 2362 int last;
ca1d1d23 2363
ac3b28b1
KH
2364 pos = search_regs.start[sub];
2365 last = search_regs.end[sub];
fa8ed3e0
RS
2366
2367 if (NILP (string))
ac3b28b1 2368 pos_byte = CHAR_TO_BYTE (pos);
fa8ed3e0 2369 else
ac3b28b1 2370 pos_byte = string_char_to_byte (string, pos);
fa8ed3e0 2371
ca1d1d23
JB
2372 prevc = '\n';
2373 case_action = all_caps;
2374
2375 /* some_multiletter_word is set nonzero if any original word
2376 is more than one letter long. */
2377 some_multiletter_word = 0;
97832bd0 2378 some_lowercase = 0;
208767c3 2379 some_nonuppercase_initial = 0;
73dc8771 2380 some_uppercase = 0;
ca1d1d23 2381
ac3b28b1 2382 while (pos < last)
ca1d1d23 2383 {
080c45fd 2384 if (NILP (string))
ac3b28b1
KH
2385 {
2386 c = FETCH_CHAR (pos_byte);
2387 INC_BOTH (pos, pos_byte);
2388 }
080c45fd 2389 else
ac3b28b1 2390 FETCH_STRING_CHAR_ADVANCE (c, string, pos, pos_byte);
080c45fd 2391
ca1d1d23
JB
2392 if (LOWERCASEP (c))
2393 {
2394 /* Cannot be all caps if any original char is lower case */
2395
97832bd0 2396 some_lowercase = 1;
ca1d1d23 2397 if (SYNTAX (prevc) != Sword)
208767c3 2398 some_nonuppercase_initial = 1;
ca1d1d23
JB
2399 else
2400 some_multiletter_word = 1;
2401 }
d16c2b66 2402 else if (UPPERCASEP (c))
ca1d1d23 2403 {
73dc8771 2404 some_uppercase = 1;
97832bd0 2405 if (SYNTAX (prevc) != Sword)
c4d460ce 2406 ;
97832bd0 2407 else
ca1d1d23
JB
2408 some_multiletter_word = 1;
2409 }
208767c3
RS
2410 else
2411 {
2412 /* If the initial is a caseless word constituent,
2413 treat that like a lowercase initial. */
2414 if (SYNTAX (prevc) != Sword)
2415 some_nonuppercase_initial = 1;
2416 }
ca1d1d23
JB
2417
2418 prevc = c;
2419 }
2420
97832bd0
RS
2421 /* Convert to all caps if the old text is all caps
2422 and has at least one multiletter word. */
2423 if (! some_lowercase && some_multiletter_word)
2424 case_action = all_caps;
c4d460ce 2425 /* Capitalize each word, if the old text has all capitalized words. */
208767c3 2426 else if (!some_nonuppercase_initial && some_multiletter_word)
ca1d1d23 2427 case_action = cap_initial;
208767c3 2428 else if (!some_nonuppercase_initial && some_uppercase)
73dc8771
KH
2429 /* Should x -> yz, operating on X, give Yz or YZ?
2430 We'll assume the latter. */
2431 case_action = all_caps;
97832bd0
RS
2432 else
2433 case_action = nochange;
ca1d1d23
JB
2434 }
2435
080c45fd
RS
2436 /* Do replacement in a string. */
2437 if (!NILP (string))
2438 {
2439 Lisp_Object before, after;
2440
2441 before = Fsubstring (string, make_number (0),
d7a5ad5f
RS
2442 make_number (search_regs.start[sub]));
2443 after = Fsubstring (string, make_number (search_regs.end[sub]), Qnil);
080c45fd 2444
636a5e28
RS
2445 /* Substitute parts of the match into NEWTEXT
2446 if desired. */
080c45fd
RS
2447 if (NILP (literal))
2448 {
d131e79c
RS
2449 int lastpos = 0;
2450 int lastpos_byte = 0;
080c45fd
RS
2451 /* We build up the substituted string in ACCUM. */
2452 Lisp_Object accum;
2453 Lisp_Object middle;
d5db4077 2454 int length = SBYTES (newtext);
080c45fd
RS
2455
2456 accum = Qnil;
2457
ac3b28b1 2458 for (pos_byte = 0, pos = 0; pos_byte < length;)
080c45fd
RS
2459 {
2460 int substart = -1;
6bbd7a29 2461 int subend = 0;
1e79ec24 2462 int delbackslash = 0;
080c45fd 2463
0c8533c6
RS
2464 FETCH_STRING_CHAR_ADVANCE (c, newtext, pos, pos_byte);
2465
080c45fd
RS
2466 if (c == '\\')
2467 {
0c8533c6 2468 FETCH_STRING_CHAR_ADVANCE (c, newtext, pos, pos_byte);
177c0ea7 2469
080c45fd
RS
2470 if (c == '&')
2471 {
d7a5ad5f
RS
2472 substart = search_regs.start[sub];
2473 subend = search_regs.end[sub];
080c45fd 2474 }
5fbbc83b 2475 else if (c >= '1' && c <= '9')
080c45fd 2476 {
5fbbc83b
RS
2477 if (search_regs.start[c - '0'] >= 0
2478 && c <= search_regs.num_regs + '0')
080c45fd
RS
2479 {
2480 substart = search_regs.start[c - '0'];
2481 subend = search_regs.end[c - '0'];
2482 }
5fbbc83b
RS
2483 else
2484 {
2485 /* If that subexp did not match,
2486 replace \\N with nothing. */
2487 substart = 0;
2488 subend = 0;
2489 }
080c45fd 2490 }
1e79ec24
KH
2491 else if (c == '\\')
2492 delbackslash = 1;
636a5e28
RS
2493 else
2494 error ("Invalid use of `\\' in replacement text");
080c45fd
RS
2495 }
2496 if (substart >= 0)
2497 {
d131e79c
RS
2498 if (pos - 2 != lastpos)
2499 middle = substring_both (newtext, lastpos,
2500 lastpos_byte,
2501 pos - 2, pos_byte - 2);
080c45fd
RS
2502 else
2503 middle = Qnil;
2504 accum = concat3 (accum, middle,
0c8533c6
RS
2505 Fsubstring (string,
2506 make_number (substart),
080c45fd
RS
2507 make_number (subend)));
2508 lastpos = pos;
0c8533c6 2509 lastpos_byte = pos_byte;
080c45fd 2510 }
1e79ec24
KH
2511 else if (delbackslash)
2512 {
d131e79c
RS
2513 middle = substring_both (newtext, lastpos,
2514 lastpos_byte,
2515 pos - 1, pos_byte - 1);
0c8533c6 2516
1e79ec24
KH
2517 accum = concat2 (accum, middle);
2518 lastpos = pos;
0c8533c6 2519 lastpos_byte = pos_byte;
1e79ec24 2520 }
080c45fd
RS
2521 }
2522
d131e79c
RS
2523 if (pos != lastpos)
2524 middle = substring_both (newtext, lastpos,
2525 lastpos_byte,
0c8533c6 2526 pos, pos_byte);
080c45fd
RS
2527 else
2528 middle = Qnil;
2529
2530 newtext = concat2 (accum, middle);
2531 }
2532
636a5e28 2533 /* Do case substitution in NEWTEXT if desired. */
080c45fd
RS
2534 if (case_action == all_caps)
2535 newtext = Fupcase (newtext);
2536 else if (case_action == cap_initial)
2b2eead9 2537 newtext = Fupcase_initials (newtext);
080c45fd
RS
2538
2539 return concat3 (before, newtext, after);
2540 }
2541
09c4719e 2542 /* Record point, then move (quietly) to the start of the match. */
9160906f 2543 if (PT >= search_regs.end[sub])
b0eba991 2544 opoint = PT - ZV;
9160906f
RS
2545 else if (PT > search_regs.start[sub])
2546 opoint = search_regs.end[sub] - ZV;
b0eba991
RS
2547 else
2548 opoint = PT;
2549
886ed6ec
RS
2550 /* If we want non-literal replacement,
2551 perform substitution on the replacement string. */
2552 if (NILP (literal))
ca1d1d23 2553 {
d5db4077 2554 int length = SBYTES (newtext);
68e69fbd
RS
2555 unsigned char *substed;
2556 int substed_alloc_size, substed_len;
3bc25e52
KH
2557 int buf_multibyte = !NILP (current_buffer->enable_multibyte_characters);
2558 int str_multibyte = STRING_MULTIBYTE (newtext);
2559 Lisp_Object rev_tbl;
886ed6ec 2560 int really_changed = 0;
3bc25e52
KH
2561
2562 rev_tbl= (!buf_multibyte && CHAR_TABLE_P (Vnonascii_translation_table)
2563 ? Fchar_table_extra_slot (Vnonascii_translation_table,
2564 make_number (0))
2565 : Qnil);
ac3b28b1 2566
68e69fbd
RS
2567 substed_alloc_size = length * 2 + 100;
2568 substed = (unsigned char *) xmalloc (substed_alloc_size + 1);
2569 substed_len = 0;
2570
3bc25e52
KH
2571 /* Go thru NEWTEXT, producing the actual text to insert in
2572 SUBSTED while adjusting multibyteness to that of the current
2573 buffer. */
ca1d1d23 2574
ac3b28b1 2575 for (pos_byte = 0, pos = 0; pos_byte < length;)
ca1d1d23 2576 {
68e69fbd 2577 unsigned char str[MAX_MULTIBYTE_LENGTH];
f8ce8a0d
GM
2578 unsigned char *add_stuff = NULL;
2579 int add_len = 0;
68e69fbd 2580 int idx = -1;
9a76659d 2581
3bc25e52
KH
2582 if (str_multibyte)
2583 {
eb99a8dd 2584 FETCH_STRING_CHAR_ADVANCE_NO_CHECK (c, newtext, pos, pos_byte);
3bc25e52
KH
2585 if (!buf_multibyte)
2586 c = multibyte_char_to_unibyte (c, rev_tbl);
2587 }
2588 else
2589 {
2590 /* Note that we don't have to increment POS. */
5d69fe10 2591 c = SREF (newtext, pos_byte++);
3bc25e52
KH
2592 if (buf_multibyte)
2593 c = unibyte_char_to_multibyte (c);
2594 }
ac3b28b1 2595
68e69fbd
RS
2596 /* Either set ADD_STUFF and ADD_LEN to the text to put in SUBSTED,
2597 or set IDX to a match index, which means put that part
2598 of the buffer text into SUBSTED. */
2599
ca1d1d23
JB
2600 if (c == '\\')
2601 {
886ed6ec
RS
2602 really_changed = 1;
2603
3bc25e52
KH
2604 if (str_multibyte)
2605 {
eb99a8dd
KH
2606 FETCH_STRING_CHAR_ADVANCE_NO_CHECK (c, newtext,
2607 pos, pos_byte);
3bc25e52
KH
2608 if (!buf_multibyte && !SINGLE_BYTE_CHAR_P (c))
2609 c = multibyte_char_to_unibyte (c, rev_tbl);
2610 }
2611 else
2612 {
d5db4077 2613 c = SREF (newtext, pos_byte++);
3bc25e52
KH
2614 if (buf_multibyte)
2615 c = unibyte_char_to_multibyte (c);
2616 }
2617
ca1d1d23 2618 if (c == '&')
68e69fbd 2619 idx = sub;
78445046 2620 else if (c >= '1' && c <= '9' && c <= search_regs.num_regs + '0')
ca1d1d23
JB
2621 {
2622 if (search_regs.start[c - '0'] >= 1)
68e69fbd 2623 idx = c - '0';
ca1d1d23 2624 }
636a5e28 2625 else if (c == '\\')
68e69fbd 2626 add_len = 1, add_stuff = "\\";
636a5e28 2627 else
3bc25e52
KH
2628 {
2629 xfree (substed);
2630 error ("Invalid use of `\\' in replacement text");
2631 }
ca1d1d23
JB
2632 }
2633 else
68e69fbd
RS
2634 {
2635 add_len = CHAR_STRING (c, str);
2636 add_stuff = str;
2637 }
2638
2639 /* If we want to copy part of a previous match,
2640 set up ADD_STUFF and ADD_LEN to point to it. */
2641 if (idx >= 0)
2642 {
2643 int begbyte = CHAR_TO_BYTE (search_regs.start[idx]);
2644 add_len = CHAR_TO_BYTE (search_regs.end[idx]) - begbyte;
2645 if (search_regs.start[idx] < GPT && GPT < search_regs.end[idx])
2646 move_gap (search_regs.start[idx]);
2647 add_stuff = BYTE_POS_ADDR (begbyte);
2648 }
2649
2650 /* Now the stuff we want to add to SUBSTED
2651 is invariably ADD_LEN bytes starting at ADD_STUFF. */
2652
2653 /* Make sure SUBSTED is big enough. */
2654 if (substed_len + add_len >= substed_alloc_size)
2655 {
2656 substed_alloc_size = substed_len + add_len + 500;
2657 substed = (unsigned char *) xrealloc (substed,
2658 substed_alloc_size + 1);
2659 }
2660
2661 /* Now add to the end of SUBSTED. */
f8ce8a0d
GM
2662 if (add_stuff)
2663 {
2664 bcopy (add_stuff, substed + substed_len, add_len);
2665 substed_len += add_len;
2666 }
ca1d1d23 2667 }
68e69fbd 2668
886ed6ec 2669 if (really_changed)
80460525
KH
2670 {
2671 if (buf_multibyte)
2672 {
2673 int nchars = multibyte_chars_in_text (substed, substed_len);
68e69fbd 2674
80460525
KH
2675 newtext = make_multibyte_string (substed, nchars, substed_len);
2676 }
2677 else
2678 newtext = make_unibyte_string (substed, substed_len);
2679 }
68e69fbd 2680 xfree (substed);
ca1d1d23
JB
2681 }
2682
886ed6ec
RS
2683 /* Replace the old text with the new in the cleanest possible way. */
2684 replace_range (search_regs.start[sub], search_regs.end[sub],
2685 newtext, 1, 0, 1);
d5db4077 2686 newpoint = search_regs.start[sub] + SCHARS (newtext);
ca1d1d23
JB
2687
2688 if (case_action == all_caps)
886ed6ec
RS
2689 Fupcase_region (make_number (search_regs.start[sub]),
2690 make_number (newpoint));
ca1d1d23 2691 else if (case_action == cap_initial)
886ed6ec
RS
2692 Fupcase_initials_region (make_number (search_regs.start[sub]),
2693 make_number (newpoint));
3e18eecf 2694
98e942e0
RS
2695 /* Adjust search data for this change. */
2696 {
5b88a2c5 2697 int oldend = search_regs.end[sub];
41c01205 2698 int oldstart = search_regs.start[sub];
98e942e0
RS
2699 int change = newpoint - search_regs.end[sub];
2700 int i;
2701
2702 for (i = 0; i < search_regs.num_regs; i++)
2703 {
41c01205 2704 if (search_regs.start[i] >= oldend)
98e942e0 2705 search_regs.start[i] += change;
41c01205
DK
2706 else if (search_regs.start[i] > oldstart)
2707 search_regs.start[i] = oldstart;
2708 if (search_regs.end[i] >= oldend)
98e942e0 2709 search_regs.end[i] += change;
41c01205
DK
2710 else if (search_regs.end[i] > oldstart)
2711 search_regs.end[i] = oldstart;
98e942e0
RS
2712 }
2713 }
2714
b0eba991 2715 /* Put point back where it was in the text. */
8d808a65 2716 if (opoint <= 0)
fa8ed3e0 2717 TEMP_SET_PT (opoint + ZV);
b0eba991 2718 else
fa8ed3e0 2719 TEMP_SET_PT (opoint);
b0eba991
RS
2720
2721 /* Now move point "officially" to the start of the inserted replacement. */
3e18eecf 2722 move_if_not_intangible (newpoint);
177c0ea7 2723
ca1d1d23
JB
2724 return Qnil;
2725}
2726\f
2727static Lisp_Object
2728match_limit (num, beginningp)
2729 Lisp_Object num;
2730 int beginningp;
2731{
2732 register int n;
2733
b7826503 2734 CHECK_NUMBER (num);
ca1d1d23 2735 n = XINT (num);
f90a5bf5 2736 if (n < 0)
bd2cbd56 2737 args_out_of_range (num, make_number (0));
f90a5bf5
RS
2738 if (search_regs.num_regs <= 0)
2739 error ("No match data, because no search succeeded");
9b9ceb61 2740 if (n >= search_regs.num_regs
4746118a 2741 || search_regs.start[n] < 0)
ca1d1d23
JB
2742 return Qnil;
2743 return (make_number ((beginningp) ? search_regs.start[n]
2744 : search_regs.end[n]));
2745}
2746
2747DEFUN ("match-beginning", Fmatch_beginning, Smatch_beginning, 1, 1, 0,
8c1a1077
PJ
2748 doc: /* Return position of start of text matched by last search.
2749SUBEXP, a number, specifies which parenthesized expression in the last
2750 regexp.
2751Value is nil if SUBEXPth pair didn't match, or there were less than
2752 SUBEXP pairs.
2753Zero means the entire text matched by the whole regexp or whole string. */)
2754 (subexp)
5806161b 2755 Lisp_Object subexp;
ca1d1d23 2756{
5806161b 2757 return match_limit (subexp, 1);
ca1d1d23
JB
2758}
2759
2760DEFUN ("match-end", Fmatch_end, Smatch_end, 1, 1, 0,
8c1a1077
PJ
2761 doc: /* Return position of end of text matched by last search.
2762SUBEXP, a number, specifies which parenthesized expression in the last
2763 regexp.
2764Value is nil if SUBEXPth pair didn't match, or there were less than
2765 SUBEXP pairs.
2766Zero means the entire text matched by the whole regexp or whole string. */)
2767 (subexp)
5806161b 2768 Lisp_Object subexp;
ca1d1d23 2769{
5806161b 2770 return match_limit (subexp, 0);
177c0ea7 2771}
ca1d1d23 2772
abd0071c 2773DEFUN ("match-data", Fmatch_data, Smatch_data, 0, 3, 0,
8c1a1077
PJ
2774 doc: /* Return a list containing all info on what the last search matched.
2775Element 2N is `(match-beginning N)'; element 2N + 1 is `(match-end N)'.
2776All the elements are markers or nil (nil if the Nth pair didn't match)
2777if the last match was on a buffer; integers or nil if a string was matched.
2778Use `store-match-data' to reinstate the data in this list.
2779
41c01205
DK
2780If INTEGERS (the optional first argument) is non-nil, always use
2781integers \(rather than markers) to represent buffer positions. In
2782this case, and if the last match was in a buffer, the buffer will get
2783stored as one additional element at the end of the list.
2784
abd0071c
KS
2785If REUSE is a list, reuse it as part of the value. If REUSE is long
2786enough to hold all the values, and if INTEGERS is non-nil, no consing
2787is done.
2788
2789If optional third arg RESEAT is non-nil, any previous markers on the
2790REUSE list will be modified to point to nowhere.
2791
140a6b7e 2792Return value is undefined if the last search failed. */)
abd0071c
KS
2793 (integers, reuse, reseat)
2794 Lisp_Object integers, reuse, reseat;
ca1d1d23 2795{
56256c2a 2796 Lisp_Object tail, prev;
4746118a 2797 Lisp_Object *data;
ca1d1d23
JB
2798 int i, len;
2799
abd0071c
KS
2800 if (!NILP (reseat))
2801 for (tail = reuse; CONSP (tail); tail = XCDR (tail))
2802 if (MARKERP (XCAR (tail)))
2803 {
51f10faa 2804 unchain_marker (XMARKER (XCAR (tail)));
abd0071c
KS
2805 XSETCAR (tail, Qnil);
2806 }
2807
daa37602 2808 if (NILP (last_thing_searched))
c36bcf1b 2809 return Qnil;
daa37602 2810
6bbd7a29
GM
2811 prev = Qnil;
2812
41c01205 2813 data = (Lisp_Object *) alloca ((2 * search_regs.num_regs + 1)
4746118a
JB
2814 * sizeof (Lisp_Object));
2815
41c01205 2816 len = 0;
4746118a 2817 for (i = 0; i < search_regs.num_regs; i++)
ca1d1d23
JB
2818 {
2819 int start = search_regs.start[i];
2820 if (start >= 0)
2821 {
56256c2a
RS
2822 if (EQ (last_thing_searched, Qt)
2823 || ! NILP (integers))
ca1d1d23 2824 {
c235cce7
KH
2825 XSETFASTINT (data[2 * i], start);
2826 XSETFASTINT (data[2 * i + 1], search_regs.end[i]);
ca1d1d23 2827 }
0ed62dc7 2828 else if (BUFFERP (last_thing_searched))
ca1d1d23
JB
2829 {
2830 data[2 * i] = Fmake_marker ();
daa37602
JB
2831 Fset_marker (data[2 * i],
2832 make_number (start),
2833 last_thing_searched);
ca1d1d23
JB
2834 data[2 * i + 1] = Fmake_marker ();
2835 Fset_marker (data[2 * i + 1],
177c0ea7 2836 make_number (search_regs.end[i]),
daa37602 2837 last_thing_searched);
ca1d1d23 2838 }
daa37602
JB
2839 else
2840 /* last_thing_searched must always be Qt, a buffer, or Qnil. */
2841 abort ();
2842
abd0071c 2843 len = 2 * i + 2;
ca1d1d23
JB
2844 }
2845 else
abd0071c 2846 data[2 * i] = data[2 * i + 1] = Qnil;
ca1d1d23 2847 }
56256c2a 2848
bd2cbd56 2849 if (BUFFERP (last_thing_searched) && !NILP (integers))
41c01205 2850 {
bd2cbd56 2851 data[len] = last_thing_searched;
41c01205
DK
2852 len++;
2853 }
2854
56256c2a
RS
2855 /* If REUSE is not usable, cons up the values and return them. */
2856 if (! CONSP (reuse))
41c01205 2857 return Flist (len, data);
56256c2a
RS
2858
2859 /* If REUSE is a list, store as many value elements as will fit
2860 into the elements of REUSE. */
2861 for (i = 0, tail = reuse; CONSP (tail);
c1d497be 2862 i++, tail = XCDR (tail))
56256c2a 2863 {
41c01205 2864 if (i < len)
f3fbd155 2865 XSETCAR (tail, data[i]);
56256c2a 2866 else
f3fbd155 2867 XSETCAR (tail, Qnil);
56256c2a
RS
2868 prev = tail;
2869 }
2870
2871 /* If we couldn't fit all value elements into REUSE,
2872 cons up the rest of them and add them to the end of REUSE. */
41c01205
DK
2873 if (i < len)
2874 XSETCDR (prev, Flist (len - i, data + i));
56256c2a
RS
2875
2876 return reuse;
ca1d1d23
JB
2877}
2878
51f10faa
KS
2879/* Internal usage only:
2880 If RESEAT is `evaporate', put the markers back on the free list
2881 immediately. No other references to the markers must exist in this case,
2882 so it is used only internally on the unwind stack and save-match-data from
2883 Lisp. */
ca1d1d23 2884
abd0071c 2885DEFUN ("set-match-data", Fset_match_data, Sset_match_data, 1, 2, 0,
8c1a1077 2886 doc: /* Set internal data on last search match from elements of LIST.
abd0071c
KS
2887LIST should have been created by calling `match-data' previously.
2888
51f10faa 2889If optional arg RESEAT is non-nil, make markers on LIST point nowhere. */)
abd0071c
KS
2890 (list, reseat)
2891 register Lisp_Object list, reseat;
ca1d1d23
JB
2892{
2893 register int i;
2894 register Lisp_Object marker;
2895
7074fde6
FP
2896 if (running_asynch_code)
2897 save_search_regs ();
2898
29100cea 2899 CHECK_LIST (list);
ca1d1d23 2900
41c01205
DK
2901 /* Unless we find a marker with a buffer or an explicit buffer
2902 in LIST, assume that this match data came from a string. */
daa37602
JB
2903 last_thing_searched = Qt;
2904
4746118a
JB
2905 /* Allocate registers if they don't already exist. */
2906 {
d084e942 2907 int length = XFASTINT (Flength (list)) / 2;
4746118a
JB
2908
2909 if (length > search_regs.num_regs)
2910 {
1113d9db
JB
2911 if (search_regs.num_regs == 0)
2912 {
2913 search_regs.start
2914 = (regoff_t *) xmalloc (length * sizeof (regoff_t));
2915 search_regs.end
2916 = (regoff_t *) xmalloc (length * sizeof (regoff_t));
2917 }
4746118a 2918 else
1113d9db
JB
2919 {
2920 search_regs.start
2921 = (regoff_t *) xrealloc (search_regs.start,
2922 length * sizeof (regoff_t));
2923 search_regs.end
2924 = (regoff_t *) xrealloc (search_regs.end,
2925 length * sizeof (regoff_t));
2926 }
4746118a 2927
e62371e9
KH
2928 for (i = search_regs.num_regs; i < length; i++)
2929 search_regs.start[i] = -1;
2930
487282dc 2931 search_regs.num_regs = length;
4746118a 2932 }
ca1d1d23 2933
abd0071c 2934 for (i = 0; CONSP (list); i++)
41c01205 2935 {
abd0071c 2936 marker = XCAR (list);
bd2cbd56 2937 if (BUFFERP (marker))
c3762cbd 2938 {
bd2cbd56 2939 last_thing_searched = marker;
c3762cbd
DK
2940 break;
2941 }
2942 if (i >= length)
2943 break;
41c01205
DK
2944 if (NILP (marker))
2945 {
2946 search_regs.start[i] = -1;
abd0071c 2947 list = XCDR (list);
41c01205
DK
2948 }
2949 else
2950 {
2951 int from;
abd0071c 2952 Lisp_Object m;
e2811828 2953
abd0071c 2954 m = marker;
41c01205
DK
2955 if (MARKERP (marker))
2956 {
2957 if (XMARKER (marker)->buffer == 0)
2958 XSETFASTINT (marker, 0);
2959 else
2960 XSETBUFFER (last_thing_searched, XMARKER (marker)->buffer);
2961 }
e2811828 2962
41c01205
DK
2963 CHECK_NUMBER_COERCE_MARKER (marker);
2964 from = XINT (marker);
e2811828 2965
abd0071c
KS
2966 if (!NILP (reseat) && MARKERP (m))
2967 {
2968 if (EQ (reseat, Qevaporate))
2969 free_marker (m);
2970 else
2971 unchain_marker (XMARKER (m));
9ad54a7e 2972 XSETCAR (list, Qnil);
abd0071c
KS
2973 }
2974
2975 if ((list = XCDR (list), !CONSP (list)))
2976 break;
2977
2978 m = marker = XCAR (list);
2979
41c01205
DK
2980 if (MARKERP (marker) && XMARKER (marker)->buffer == 0)
2981 XSETFASTINT (marker, 0);
e2811828 2982
41c01205
DK
2983 CHECK_NUMBER_COERCE_MARKER (marker);
2984 search_regs.start[i] = from;
2985 search_regs.end[i] = XINT (marker);
abd0071c
KS
2986
2987 if (!NILP (reseat) && MARKERP (m))
2988 {
2989 if (EQ (reseat, Qevaporate))
2990 free_marker (m);
2991 else
2992 unchain_marker (XMARKER (m));
9ad54a7e 2993 XSETCAR (list, Qnil);
abd0071c 2994 }
41c01205 2995 }
abd0071c 2996 list = XCDR (list);
41c01205 2997 }
ca1d1d23 2998
41c01205
DK
2999 for (; i < search_regs.num_regs; i++)
3000 search_regs.start[i] = -1;
3001 }
ca1d1d23 3002
177c0ea7 3003 return Qnil;
ca1d1d23
JB
3004}
3005
7074fde6
FP
3006/* If non-zero the match data have been saved in saved_search_regs
3007 during the execution of a sentinel or filter. */
75ebf74b 3008static int search_regs_saved;
7074fde6 3009static struct re_registers saved_search_regs;
41c01205 3010static Lisp_Object saved_last_thing_searched;
7074fde6
FP
3011
3012/* Called from Flooking_at, Fstring_match, search_buffer, Fstore_match_data
3013 if asynchronous code (filter or sentinel) is running. */
3014static void
3015save_search_regs ()
3016{
3017 if (!search_regs_saved)
3018 {
3019 saved_search_regs.num_regs = search_regs.num_regs;
3020 saved_search_regs.start = search_regs.start;
3021 saved_search_regs.end = search_regs.end;
41c01205
DK
3022 saved_last_thing_searched = last_thing_searched;
3023 last_thing_searched = Qnil;
7074fde6 3024 search_regs.num_regs = 0;
2d4a771a
RS
3025 search_regs.start = 0;
3026 search_regs.end = 0;
7074fde6
FP
3027
3028 search_regs_saved = 1;
3029 }
3030}
3031
3032/* Called upon exit from filters and sentinels. */
3033void
abd0071c 3034restore_search_regs ()
7074fde6
FP
3035{
3036 if (search_regs_saved)
3037 {
3038 if (search_regs.num_regs > 0)
3039 {
3040 xfree (search_regs.start);
3041 xfree (search_regs.end);
3042 }
3043 search_regs.num_regs = saved_search_regs.num_regs;
3044 search_regs.start = saved_search_regs.start;
3045 search_regs.end = saved_search_regs.end;
41c01205
DK
3046 last_thing_searched = saved_last_thing_searched;
3047 saved_last_thing_searched = Qnil;
7074fde6
FP
3048 search_regs_saved = 0;
3049 }
3050}
3051
abd0071c
KS
3052static Lisp_Object
3053unwind_set_match_data (list)
3054 Lisp_Object list;
3055{
51f10faa 3056 /* It is safe to free (evaporate) the markers immediately. */
abd0071c
KS
3057 return Fset_match_data (list, Qevaporate);
3058}
3059
3060/* Called to unwind protect the match data. */
3061void
3062record_unwind_save_match_data ()
3063{
3064 record_unwind_protect (unwind_set_match_data,
3065 Fmatch_data (Qnil, Qnil, Qnil));
3066}
3067
ca1d1d23
JB
3068/* Quote a string to inactivate reg-expr chars */
3069
3070DEFUN ("regexp-quote", Fregexp_quote, Sregexp_quote, 1, 1, 0,
8c1a1077
PJ
3071 doc: /* Return a regexp string which matches exactly STRING and nothing else. */)
3072 (string)
5806161b 3073 Lisp_Object string;
ca1d1d23
JB
3074{
3075 register unsigned char *in, *out, *end;
3076 register unsigned char *temp;
0c8533c6 3077 int backslashes_added = 0;
ca1d1d23 3078
b7826503 3079 CHECK_STRING (string);
ca1d1d23 3080
d5db4077 3081 temp = (unsigned char *) alloca (SBYTES (string) * 2);
ca1d1d23
JB
3082
3083 /* Now copy the data into the new string, inserting escapes. */
3084
d5db4077
KR
3085 in = SDATA (string);
3086 end = in + SBYTES (string);
177c0ea7 3087 out = temp;
ca1d1d23
JB
3088
3089 for (; in != end; in++)
3090 {
66bc6082 3091 if (*in == '['
ca1d1d23
JB
3092 || *in == '*' || *in == '.' || *in == '\\'
3093 || *in == '?' || *in == '+'
3094 || *in == '^' || *in == '$')
0c8533c6 3095 *out++ = '\\', backslashes_added++;
ca1d1d23
JB
3096 *out++ = *in;
3097 }
3098
3f8100f1 3099 return make_specified_string (temp,
d5db4077 3100 SCHARS (string) + backslashes_added,
3f8100f1
RS
3101 out - temp,
3102 STRING_MULTIBYTE (string));
ca1d1d23 3103}
177c0ea7 3104\f
dfcf069d 3105void
ca1d1d23
JB
3106syms_of_search ()
3107{
3108 register int i;
3109
487282dc
KH
3110 for (i = 0; i < REGEXP_CACHE_SIZE; ++i)
3111 {
3112 searchbufs[i].buf.allocated = 100;
b23c0a83 3113 searchbufs[i].buf.buffer = (unsigned char *) xmalloc (100);
487282dc
KH
3114 searchbufs[i].buf.fastmap = searchbufs[i].fastmap;
3115 searchbufs[i].regexp = Qnil;
ecdb561e 3116 searchbufs[i].whitespace_regexp = Qnil;
487282dc 3117 staticpro (&searchbufs[i].regexp);
aa77b5ce 3118 staticpro (&searchbufs[i].whitespace_regexp);
487282dc
KH
3119 searchbufs[i].next = (i == REGEXP_CACHE_SIZE-1 ? 0 : &searchbufs[i+1]);
3120 }
3121 searchbuf_head = &searchbufs[0];
ca1d1d23
JB
3122
3123 Qsearch_failed = intern ("search-failed");
3124 staticpro (&Qsearch_failed);
3125 Qinvalid_regexp = intern ("invalid-regexp");
3126 staticpro (&Qinvalid_regexp);
3127
3128 Fput (Qsearch_failed, Qerror_conditions,
3129 Fcons (Qsearch_failed, Fcons (Qerror, Qnil)));
3130 Fput (Qsearch_failed, Qerror_message,
3131 build_string ("Search failed"));
3132
3133 Fput (Qinvalid_regexp, Qerror_conditions,
3134 Fcons (Qinvalid_regexp, Fcons (Qerror, Qnil)));
3135 Fput (Qinvalid_regexp, Qerror_message,
3136 build_string ("Invalid regexp"));
3137
daa37602
JB
3138 last_thing_searched = Qnil;
3139 staticpro (&last_thing_searched);
3140
0f6af254
DK
3141 saved_last_thing_searched = Qnil;
3142 staticpro (&saved_last_thing_searched);
3143
41a33295 3144 DEFVAR_LISP ("search-spaces-regexp", &Vsearch_spaces_regexp,
e2811828 3145 doc: /* Regexp to substitute for bunches of spaces in regexp search.
f31a9a68
RS
3146Some commands use this for user-specified regexps.
3147Spaces that occur inside character classes or repetition operators
3148or other such regexp constructs are not replaced with this.
3149A value of nil (which is the normal value) means treat spaces literally. */);
41a33295 3150 Vsearch_spaces_regexp = Qnil;
f31a9a68 3151
ca1d1d23 3152 defsubr (&Slooking_at);
b819a390
RS
3153 defsubr (&Sposix_looking_at);
3154 defsubr (&Sstring_match);
3155 defsubr (&Sposix_string_match);
ca1d1d23
JB
3156 defsubr (&Ssearch_forward);
3157 defsubr (&Ssearch_backward);
3158 defsubr (&Sword_search_forward);
3159 defsubr (&Sword_search_backward);
3160 defsubr (&Sre_search_forward);
3161 defsubr (&Sre_search_backward);
b819a390
RS
3162 defsubr (&Sposix_search_forward);
3163 defsubr (&Sposix_search_backward);
ca1d1d23
JB
3164 defsubr (&Sreplace_match);
3165 defsubr (&Smatch_beginning);
3166 defsubr (&Smatch_end);
3167 defsubr (&Smatch_data);
3f1c005b 3168 defsubr (&Sset_match_data);
ca1d1d23
JB
3169 defsubr (&Sregexp_quote);
3170}
ab5796a9
MB
3171
3172/* arch-tag: a6059d79-0552-4f14-a2cb-d379a4e3c78f
3173 (do not change this comment) */