Update FSF's address.
[bpt/emacs.git] / src / search.c
CommitLineData
ca1d1d23 1/* String search routines for GNU Emacs.
bd2cbd56
SM
2 Copyright (C) 1985, 86,87,93,94,97,98, 1999, 2004
3 Free Software Foundation, Inc.
ca1d1d23
JB
4
5This file is part of GNU Emacs.
6
7GNU Emacs is free software; you can redistribute it and/or modify
8it under the terms of the GNU General Public License as published by
7c938215 9the Free Software Foundation; either version 2, or (at your option)
ca1d1d23
JB
10any later version.
11
12GNU Emacs is distributed in the hope that it will be useful,
13but WITHOUT ANY WARRANTY; without even the implied warranty of
14MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15GNU General Public License for more details.
16
17You should have received a copy of the GNU General Public License
18along with GNU Emacs; see the file COPYING. If not, write to
4fc5845f
LK
19the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
20Boston, MA 02110-1301, USA. */
ca1d1d23
JB
21
22
18160b98 23#include <config.h>
ca1d1d23
JB
24#include "lisp.h"
25#include "syntax.h"
5679531d 26#include "category.h"
ca1d1d23 27#include "buffer.h"
5679531d 28#include "charset.h"
9169c321 29#include "region-cache.h"
ca1d1d23 30#include "commands.h"
9ac0d9e0 31#include "blockinput.h"
bf1760bb 32#include "intervals.h"
4746118a 33
ca1d1d23
JB
34#include <sys/types.h>
35#include "regex.h"
36
1d288aef 37#define REGEXP_CACHE_SIZE 20
ca1d1d23 38
487282dc
KH
39/* If the regexp is non-nil, then the buffer contains the compiled form
40 of that regexp, suitable for searching. */
1d288aef
RS
41struct regexp_cache
42{
487282dc 43 struct regexp_cache *next;
ecdb561e 44 Lisp_Object regexp, whitespace_regexp;
487282dc
KH
45 struct re_pattern_buffer buf;
46 char fastmap[0400];
b819a390
RS
47 /* Nonzero means regexp was compiled to do full POSIX backtracking. */
48 char posix;
487282dc 49};
ca1d1d23 50
487282dc
KH
51/* The instances of that struct. */
52struct regexp_cache searchbufs[REGEXP_CACHE_SIZE];
ca1d1d23 53
487282dc
KH
54/* The head of the linked list; points to the most recently used buffer. */
55struct regexp_cache *searchbuf_head;
ca1d1d23 56
ca1d1d23 57
4746118a
JB
58/* Every call to re_match, etc., must pass &search_regs as the regs
59 argument unless you can show it is unnecessary (i.e., if re_match
60 is certainly going to be called again before region-around-match
61 can be called).
62
63 Since the registers are now dynamically allocated, we need to make
64 sure not to refer to the Nth register before checking that it has
1113d9db
JB
65 been allocated by checking search_regs.num_regs.
66
67 The regex code keeps track of whether it has allocated the search
487282dc
KH
68 buffer using bits in the re_pattern_buffer. This means that whenever
69 you compile a new pattern, it completely forgets whether it has
1113d9db
JB
70 allocated any registers, and will allocate new registers the next
71 time you call a searching or matching function. Therefore, we need
72 to call re_set_registers after compiling a new pattern or after
73 setting the match registers, so that the regex functions will be
74 able to free or re-allocate it properly. */
ca1d1d23
JB
75static struct re_registers search_regs;
76
daa37602
JB
77/* The buffer in which the last search was performed, or
78 Qt if the last search was done in a string;
79 Qnil if no searching has been done yet. */
80static Lisp_Object last_thing_searched;
ca1d1d23 81
8e6208c5 82/* error condition signaled when regexp compile_pattern fails */
ca1d1d23
JB
83
84Lisp_Object Qinvalid_regexp;
85
41a33295 86Lisp_Object Vsearch_spaces_regexp;
f31a9a68 87
ca325161 88static void set_search_regs ();
044f81f1 89static void save_search_regs ();
facdc750
RS
90static int simple_search ();
91static int boyer_moore ();
b819a390
RS
92static int search_buffer ();
93
ca1d1d23
JB
94static void
95matcher_overflow ()
96{
97 error ("Stack overflow in regexp matcher");
98}
99
b819a390
RS
100/* Compile a regexp and signal a Lisp error if anything goes wrong.
101 PATTERN is the pattern to compile.
102 CP is the place to put the result.
facdc750 103 TRANSLATE is a translation table for ignoring case, or nil for none.
b819a390
RS
104 REGP is the structure that says where to store the "register"
105 values that will result from matching this pattern.
106 If it is 0, we should compile the pattern not to record any
107 subexpression bounds.
108 POSIX is nonzero if we want full backtracking (POSIX style)
5679531d
KH
109 for this pattern. 0 means backtrack only enough to get a valid match.
110 MULTIBYTE is nonzero if we want to handle multibyte characters in
111 PATTERN. 0 means all multibyte characters are recognized just as
ecdb561e
RS
112 sequences of binary data.
113
41a33295 114 The behavior also depends on Vsearch_spaces_regexp. */
ca1d1d23 115
487282dc 116static void
5679531d 117compile_pattern_1 (cp, pattern, translate, regp, posix, multibyte)
487282dc 118 struct regexp_cache *cp;
ca1d1d23 119 Lisp_Object pattern;
facdc750 120 Lisp_Object translate;
487282dc 121 struct re_registers *regp;
b819a390 122 int posix;
5679531d 123 int multibyte;
ca1d1d23 124{
7276d3d8 125 unsigned char *raw_pattern;
f8bd51c4 126 int raw_pattern_size;
d451e4db 127 char *val;
b819a390 128 reg_syntax_t old;
ca1d1d23 129
f8bd51c4
KH
130 /* MULTIBYTE says whether the text to be searched is multibyte.
131 We must convert PATTERN to match that, or we will not really
132 find things right. */
133
134 if (multibyte == STRING_MULTIBYTE (pattern))
135 {
d5db4077
KR
136 raw_pattern = (unsigned char *) SDATA (pattern);
137 raw_pattern_size = SBYTES (pattern);
f8bd51c4
KH
138 }
139 else if (multibyte)
140 {
d5db4077
KR
141 raw_pattern_size = count_size_as_multibyte (SDATA (pattern),
142 SCHARS (pattern));
7276d3d8 143 raw_pattern = (unsigned char *) alloca (raw_pattern_size + 1);
d5db4077
KR
144 copy_text (SDATA (pattern), raw_pattern,
145 SCHARS (pattern), 0, 1);
f8bd51c4
KH
146 }
147 else
148 {
149 /* Converting multibyte to single-byte.
150
151 ??? Perhaps this conversion should be done in a special way
152 by subtracting nonascii-insert-offset from each non-ASCII char,
153 so that only the multibyte chars which really correspond to
154 the chosen single-byte character set can possibly match. */
d5db4077 155 raw_pattern_size = SCHARS (pattern);
7276d3d8 156 raw_pattern = (unsigned char *) alloca (raw_pattern_size + 1);
d5db4077
KR
157 copy_text (SDATA (pattern), raw_pattern,
158 SBYTES (pattern), 1, 0);
f8bd51c4
KH
159 }
160
487282dc 161 cp->regexp = Qnil;
59fab369 162 cp->buf.translate = (! NILP (translate) ? translate : make_number (0));
b819a390 163 cp->posix = posix;
5679531d 164 cp->buf.multibyte = multibyte;
41a33295 165 cp->whitespace_regexp = Vsearch_spaces_regexp;
9ac0d9e0 166 BLOCK_INPUT;
fb4a568d 167 old = re_set_syntax (RE_SYNTAX_EMACS
b819a390 168 | (posix ? 0 : RE_NO_POSIX_BACKTRACKING));
f31a9a68 169
41a33295
RS
170 re_set_whitespace_regexp (NILP (Vsearch_spaces_regexp) ? NULL
171 : SDATA (Vsearch_spaces_regexp));
f31a9a68 172
7276d3d8
RS
173 val = (char *) re_compile_pattern ((char *)raw_pattern,
174 raw_pattern_size, &cp->buf);
f31a9a68
RS
175
176 re_set_whitespace_regexp (NULL);
177
b819a390 178 re_set_syntax (old);
9ac0d9e0 179 UNBLOCK_INPUT;
ca1d1d23 180 if (val)
487282dc 181 Fsignal (Qinvalid_regexp, Fcons (build_string (val), Qnil));
1113d9db 182
487282dc 183 cp->regexp = Fcopy_sequence (pattern);
487282dc
KH
184}
185
6efc7887
RS
186/* Shrink each compiled regexp buffer in the cache
187 to the size actually used right now.
188 This is called from garbage collection. */
189
190void
191shrink_regexp_cache ()
192{
a968f437 193 struct regexp_cache *cp;
6efc7887
RS
194
195 for (cp = searchbuf_head; cp != 0; cp = cp->next)
196 {
197 cp->buf.allocated = cp->buf.used;
198 cp->buf.buffer
b23c0a83 199 = (unsigned char *) xrealloc (cp->buf.buffer, cp->buf.used);
6efc7887
RS
200 }
201}
202
487282dc 203/* Compile a regexp if necessary, but first check to see if there's one in
b819a390
RS
204 the cache.
205 PATTERN is the pattern to compile.
facdc750 206 TRANSLATE is a translation table for ignoring case, or nil for none.
b819a390
RS
207 REGP is the structure that says where to store the "register"
208 values that will result from matching this pattern.
209 If it is 0, we should compile the pattern not to record any
210 subexpression bounds.
211 POSIX is nonzero if we want full backtracking (POSIX style)
212 for this pattern. 0 means backtrack only enough to get a valid match. */
487282dc
KH
213
214struct re_pattern_buffer *
0c8533c6 215compile_pattern (pattern, regp, translate, posix, multibyte)
487282dc
KH
216 Lisp_Object pattern;
217 struct re_registers *regp;
facdc750 218 Lisp_Object translate;
0c8533c6 219 int posix, multibyte;
487282dc
KH
220{
221 struct regexp_cache *cp, **cpp;
222
223 for (cpp = &searchbuf_head; ; cpp = &cp->next)
224 {
225 cp = *cpp;
f1b9c7c1
KR
226 /* Entries are initialized to nil, and may be set to nil by
227 compile_pattern_1 if the pattern isn't valid. Don't apply
49a5f770
KR
228 string accessors in those cases. However, compile_pattern_1
229 is only applied to the cache entry we pick here to reuse. So
230 nil should never appear before a non-nil entry. */
7c752c80 231 if (NILP (cp->regexp))
f1b9c7c1 232 goto compile_it;
d5db4077 233 if (SCHARS (cp->regexp) == SCHARS (pattern)
cf69b13e 234 && STRING_MULTIBYTE (cp->regexp) == STRING_MULTIBYTE (pattern)
1d288aef 235 && !NILP (Fstring_equal (cp->regexp, pattern))
59fab369 236 && EQ (cp->buf.translate, (! NILP (translate) ? translate : make_number (0)))
5679531d 237 && cp->posix == posix
ecdb561e 238 && cp->buf.multibyte == multibyte
41a33295 239 && !NILP (Fequal (cp->whitespace_regexp, Vsearch_spaces_regexp)))
487282dc
KH
240 break;
241
f1b9c7c1
KR
242 /* If we're at the end of the cache, compile into the nil cell
243 we found, or the last (least recently used) cell with a
244 string value. */
487282dc
KH
245 if (cp->next == 0)
246 {
f1b9c7c1 247 compile_it:
5679531d 248 compile_pattern_1 (cp, pattern, translate, regp, posix, multibyte);
487282dc
KH
249 break;
250 }
251 }
252
253 /* When we get here, cp (aka *cpp) contains the compiled pattern,
254 either because we found it in the cache or because we just compiled it.
255 Move it to the front of the queue to mark it as most recently used. */
256 *cpp = cp->next;
257 cp->next = searchbuf_head;
258 searchbuf_head = cp;
1113d9db 259
6639708c
RS
260 /* Advise the searching functions about the space we have allocated
261 for register data. */
262 if (regp)
263 re_set_registers (&cp->buf, regp, regp->num_regs, regp->start, regp->end);
264
487282dc 265 return &cp->buf;
ca1d1d23
JB
266}
267
268/* Error condition used for failing searches */
269Lisp_Object Qsearch_failed;
270
271Lisp_Object
272signal_failure (arg)
273 Lisp_Object arg;
274{
275 Fsignal (Qsearch_failed, Fcons (arg, Qnil));
276 return Qnil;
277}
278\f
b819a390
RS
279static Lisp_Object
280looking_at_1 (string, posix)
ca1d1d23 281 Lisp_Object string;
b819a390 282 int posix;
ca1d1d23
JB
283{
284 Lisp_Object val;
285 unsigned char *p1, *p2;
286 int s1, s2;
287 register int i;
487282dc 288 struct re_pattern_buffer *bufp;
ca1d1d23 289
7074fde6
FP
290 if (running_asynch_code)
291 save_search_regs ();
292
b7826503 293 CHECK_STRING (string);
487282dc
KH
294 bufp = compile_pattern (string, &search_regs,
295 (!NILP (current_buffer->case_fold_search)
0190922f 296 ? current_buffer->case_canon_table : Qnil),
0c8533c6
RS
297 posix,
298 !NILP (current_buffer->enable_multibyte_characters));
ca1d1d23
JB
299
300 immediate_quit = 1;
301 QUIT; /* Do a pending quit right away, to avoid paradoxical behavior */
302
303 /* Get pointers and sizes of the two strings
304 that make up the visible portion of the buffer. */
305
306 p1 = BEGV_ADDR;
fa8ed3e0 307 s1 = GPT_BYTE - BEGV_BYTE;
ca1d1d23 308 p2 = GAP_END_ADDR;
fa8ed3e0 309 s2 = ZV_BYTE - GPT_BYTE;
ca1d1d23
JB
310 if (s1 < 0)
311 {
312 p2 = p1;
fa8ed3e0 313 s2 = ZV_BYTE - BEGV_BYTE;
ca1d1d23
JB
314 s1 = 0;
315 }
316 if (s2 < 0)
317 {
fa8ed3e0 318 s1 = ZV_BYTE - BEGV_BYTE;
ca1d1d23
JB
319 s2 = 0;
320 }
8bb43c28
RS
321
322 re_match_object = Qnil;
177c0ea7 323
487282dc 324 i = re_match_2 (bufp, (char *) p1, s1, (char *) p2, s2,
fa8ed3e0
RS
325 PT_BYTE - BEGV_BYTE, &search_regs,
326 ZV_BYTE - BEGV_BYTE);
de182d70 327 immediate_quit = 0;
177c0ea7 328
ca1d1d23
JB
329 if (i == -2)
330 matcher_overflow ();
331
332 val = (0 <= i ? Qt : Qnil);
fa8ed3e0
RS
333 if (i >= 0)
334 for (i = 0; i < search_regs.num_regs; i++)
335 if (search_regs.start[i] >= 0)
336 {
337 search_regs.start[i]
338 = BYTE_TO_CHAR (search_regs.start[i] + BEGV_BYTE);
339 search_regs.end[i]
340 = BYTE_TO_CHAR (search_regs.end[i] + BEGV_BYTE);
341 }
a3668d92 342 XSETBUFFER (last_thing_searched, current_buffer);
ca1d1d23
JB
343 return val;
344}
345
b819a390 346DEFUN ("looking-at", Flooking_at, Slooking_at, 1, 1, 0,
8c1a1077
PJ
347 doc: /* Return t if text after point matches regular expression REGEXP.
348This function modifies the match data that `match-beginning',
349`match-end' and `match-data' access; save and restore the match
350data if you want to preserve them. */)
351 (regexp)
94f94972 352 Lisp_Object regexp;
b819a390 353{
94f94972 354 return looking_at_1 (regexp, 0);
b819a390
RS
355}
356
357DEFUN ("posix-looking-at", Fposix_looking_at, Sposix_looking_at, 1, 1, 0,
8c1a1077
PJ
358 doc: /* Return t if text after point matches regular expression REGEXP.
359Find the longest match, in accord with Posix regular expression rules.
360This function modifies the match data that `match-beginning',
361`match-end' and `match-data' access; save and restore the match
362data if you want to preserve them. */)
363 (regexp)
94f94972 364 Lisp_Object regexp;
b819a390 365{
94f94972 366 return looking_at_1 (regexp, 1);
b819a390
RS
367}
368\f
369static Lisp_Object
370string_match_1 (regexp, string, start, posix)
ca1d1d23 371 Lisp_Object regexp, string, start;
b819a390 372 int posix;
ca1d1d23
JB
373{
374 int val;
487282dc 375 struct re_pattern_buffer *bufp;
0c8533c6
RS
376 int pos, pos_byte;
377 int i;
ca1d1d23 378
7074fde6
FP
379 if (running_asynch_code)
380 save_search_regs ();
381
b7826503
PJ
382 CHECK_STRING (regexp);
383 CHECK_STRING (string);
ca1d1d23
JB
384
385 if (NILP (start))
0c8533c6 386 pos = 0, pos_byte = 0;
ca1d1d23
JB
387 else
388 {
d5db4077 389 int len = SCHARS (string);
ca1d1d23 390
b7826503 391 CHECK_NUMBER (start);
0c8533c6
RS
392 pos = XINT (start);
393 if (pos < 0 && -pos <= len)
394 pos = len + pos;
395 else if (0 > pos || pos > len)
ca1d1d23 396 args_out_of_range (string, start);
0c8533c6 397 pos_byte = string_char_to_byte (string, pos);
ca1d1d23
JB
398 }
399
487282dc
KH
400 bufp = compile_pattern (regexp, &search_regs,
401 (!NILP (current_buffer->case_fold_search)
0190922f 402 ? current_buffer->case_canon_table : Qnil),
0c8533c6
RS
403 posix,
404 STRING_MULTIBYTE (string));
ca1d1d23 405 immediate_quit = 1;
8bb43c28 406 re_match_object = string;
177c0ea7 407
d5db4077
KR
408 val = re_search (bufp, (char *) SDATA (string),
409 SBYTES (string), pos_byte,
410 SBYTES (string) - pos_byte,
ca1d1d23
JB
411 &search_regs);
412 immediate_quit = 0;
daa37602 413 last_thing_searched = Qt;
ca1d1d23
JB
414 if (val == -2)
415 matcher_overflow ();
416 if (val < 0) return Qnil;
0c8533c6
RS
417
418 for (i = 0; i < search_regs.num_regs; i++)
419 if (search_regs.start[i] >= 0)
420 {
421 search_regs.start[i]
422 = string_byte_to_char (string, search_regs.start[i]);
423 search_regs.end[i]
424 = string_byte_to_char (string, search_regs.end[i]);
425 }
426
427 return make_number (string_byte_to_char (string, val));
ca1d1d23 428}
e59a8453 429
b819a390 430DEFUN ("string-match", Fstring_match, Sstring_match, 2, 3, 0,
8c1a1077
PJ
431 doc: /* Return index of start of first match for REGEXP in STRING, or nil.
432Case is ignored if `case-fold-search' is non-nil in the current buffer.
433If third arg START is non-nil, start search at that index in STRING.
434For index of first char beyond the match, do (match-end 0).
435`match-end' and `match-beginning' also give indices of substrings
2bd2f32d
RS
436matched by parenthesis constructs in the pattern.
437
438You can use the function `match-string' to extract the substrings
439matched by the parenthesis constructions in REGEXP. */)
8c1a1077 440 (regexp, string, start)
b819a390
RS
441 Lisp_Object regexp, string, start;
442{
443 return string_match_1 (regexp, string, start, 0);
444}
445
446DEFUN ("posix-string-match", Fposix_string_match, Sposix_string_match, 2, 3, 0,
8c1a1077
PJ
447 doc: /* Return index of start of first match for REGEXP in STRING, or nil.
448Find the longest match, in accord with Posix regular expression rules.
449Case is ignored if `case-fold-search' is non-nil in the current buffer.
450If third arg START is non-nil, start search at that index in STRING.
451For index of first char beyond the match, do (match-end 0).
452`match-end' and `match-beginning' also give indices of substrings
453matched by parenthesis constructs in the pattern. */)
454 (regexp, string, start)
b819a390
RS
455 Lisp_Object regexp, string, start;
456{
457 return string_match_1 (regexp, string, start, 1);
458}
459
e59a8453
RS
460/* Match REGEXP against STRING, searching all of STRING,
461 and return the index of the match, or negative on failure.
462 This does not clobber the match data. */
463
464int
465fast_string_match (regexp, string)
466 Lisp_Object regexp, string;
467{
468 int val;
487282dc 469 struct re_pattern_buffer *bufp;
e59a8453 470
facdc750
RS
471 bufp = compile_pattern (regexp, 0, Qnil,
472 0, STRING_MULTIBYTE (string));
e59a8453 473 immediate_quit = 1;
8bb43c28 474 re_match_object = string;
177c0ea7 475
d5db4077
KR
476 val = re_search (bufp, (char *) SDATA (string),
477 SBYTES (string), 0,
478 SBYTES (string), 0);
e59a8453
RS
479 immediate_quit = 0;
480 return val;
481}
5679531d
KH
482
483/* Match REGEXP against STRING, searching all of STRING ignoring case,
484 and return the index of the match, or negative on failure.
0c8533c6
RS
485 This does not clobber the match data.
486 We assume that STRING contains single-byte characters. */
5679531d
KH
487
488extern Lisp_Object Vascii_downcase_table;
489
490int
b4577c63 491fast_c_string_match_ignore_case (regexp, string)
5679531d 492 Lisp_Object regexp;
96b80561 493 const char *string;
5679531d
KH
494{
495 int val;
496 struct re_pattern_buffer *bufp;
497 int len = strlen (string);
498
0c8533c6 499 regexp = string_make_unibyte (regexp);
b4577c63 500 re_match_object = Qt;
5679531d 501 bufp = compile_pattern (regexp, 0,
0190922f 502 Vascii_canon_table, 0,
f8bd51c4 503 0);
5679531d
KH
504 immediate_quit = 1;
505 val = re_search (bufp, string, len, 0, len, 0);
506 immediate_quit = 0;
507 return val;
508}
be5f4dfb
KH
509
510/* Like fast_string_match but ignore case. */
511
512int
513fast_string_match_ignore_case (regexp, string)
514 Lisp_Object regexp, string;
515{
516 int val;
517 struct re_pattern_buffer *bufp;
518
0190922f 519 bufp = compile_pattern (regexp, 0, Vascii_canon_table,
be5f4dfb
KH
520 0, STRING_MULTIBYTE (string));
521 immediate_quit = 1;
522 re_match_object = string;
523
524 val = re_search (bufp, (char *) SDATA (string),
525 SBYTES (string), 0,
526 SBYTES (string), 0);
527 immediate_quit = 0;
528 return val;
529}
ca1d1d23 530\f
9169c321
JB
531/* The newline cache: remembering which sections of text have no newlines. */
532
533/* If the user has requested newline caching, make sure it's on.
534 Otherwise, make sure it's off.
535 This is our cheezy way of associating an action with the change of
536 state of a buffer-local variable. */
537static void
538newline_cache_on_off (buf)
539 struct buffer *buf;
540{
541 if (NILP (buf->cache_long_line_scans))
542 {
543 /* It should be off. */
544 if (buf->newline_cache)
545 {
546 free_region_cache (buf->newline_cache);
547 buf->newline_cache = 0;
548 }
549 }
550 else
551 {
552 /* It should be on. */
553 if (buf->newline_cache == 0)
554 buf->newline_cache = new_region_cache ();
555 }
556}
557
558\f
559/* Search for COUNT instances of the character TARGET between START and END.
560
561 If COUNT is positive, search forwards; END must be >= START.
562 If COUNT is negative, search backwards for the -COUNTth instance;
563 END must be <= START.
564 If COUNT is zero, do anything you please; run rogue, for all I care.
565
566 If END is zero, use BEGV or ZV instead, as appropriate for the
567 direction indicated by COUNT.
ffd56f97
JB
568
569 If we find COUNT instances, set *SHORTAGE to zero, and return the
a9f2a45f 570 position past the COUNTth match. Note that for reverse motion
5bfe95c9 571 this is not the same as the usual convention for Emacs motion commands.
ffd56f97 572
9169c321
JB
573 If we don't find COUNT instances before reaching END, set *SHORTAGE
574 to the number of TARGETs left unfound, and return END.
ffd56f97 575
087a5f81
RS
576 If ALLOW_QUIT is non-zero, set immediate_quit. That's good to do
577 except when inside redisplay. */
578
dfcf069d 579int
9169c321
JB
580scan_buffer (target, start, end, count, shortage, allow_quit)
581 register int target;
582 int start, end;
583 int count;
584 int *shortage;
087a5f81 585 int allow_quit;
ca1d1d23 586{
9169c321 587 struct region_cache *newline_cache;
177c0ea7 588 int direction;
ffd56f97 589
9169c321
JB
590 if (count > 0)
591 {
592 direction = 1;
593 if (! end) end = ZV;
594 }
595 else
596 {
597 direction = -1;
598 if (! end) end = BEGV;
599 }
ffd56f97 600
9169c321
JB
601 newline_cache_on_off (current_buffer);
602 newline_cache = current_buffer->newline_cache;
ca1d1d23
JB
603
604 if (shortage != 0)
605 *shortage = 0;
606
087a5f81 607 immediate_quit = allow_quit;
ca1d1d23 608
ffd56f97 609 if (count > 0)
9169c321 610 while (start != end)
ca1d1d23 611 {
9169c321
JB
612 /* Our innermost scanning loop is very simple; it doesn't know
613 about gaps, buffer ends, or the newline cache. ceiling is
614 the position of the last character before the next such
615 obstacle --- the last character the dumb search loop should
616 examine. */
fa8ed3e0
RS
617 int ceiling_byte = CHAR_TO_BYTE (end) - 1;
618 int start_byte = CHAR_TO_BYTE (start);
67ce527d 619 int tem;
9169c321
JB
620
621 /* If we're looking for a newline, consult the newline cache
622 to see where we can avoid some scanning. */
623 if (target == '\n' && newline_cache)
624 {
625 int next_change;
626 immediate_quit = 0;
627 while (region_cache_forward
fa8ed3e0
RS
628 (current_buffer, newline_cache, start_byte, &next_change))
629 start_byte = next_change;
cbe0db0d 630 immediate_quit = allow_quit;
9169c321 631
fa8ed3e0
RS
632 /* START should never be after END. */
633 if (start_byte > ceiling_byte)
634 start_byte = ceiling_byte;
9169c321
JB
635
636 /* Now the text after start is an unknown region, and
637 next_change is the position of the next known region. */
fa8ed3e0 638 ceiling_byte = min (next_change - 1, ceiling_byte);
9169c321
JB
639 }
640
641 /* The dumb loop can only scan text stored in contiguous
642 bytes. BUFFER_CEILING_OF returns the last character
643 position that is contiguous, so the ceiling is the
644 position after that. */
67ce527d
KH
645 tem = BUFFER_CEILING_OF (start_byte);
646 ceiling_byte = min (tem, ceiling_byte);
9169c321
JB
647
648 {
177c0ea7 649 /* The termination address of the dumb loop. */
fa8ed3e0
RS
650 register unsigned char *ceiling_addr
651 = BYTE_POS_ADDR (ceiling_byte) + 1;
652 register unsigned char *cursor
653 = BYTE_POS_ADDR (start_byte);
9169c321
JB
654 unsigned char *base = cursor;
655
656 while (cursor < ceiling_addr)
657 {
658 unsigned char *scan_start = cursor;
659
660 /* The dumb loop. */
661 while (*cursor != target && ++cursor < ceiling_addr)
662 ;
663
664 /* If we're looking for newlines, cache the fact that
665 the region from start to cursor is free of them. */
666 if (target == '\n' && newline_cache)
667 know_region_cache (current_buffer, newline_cache,
fa8ed3e0
RS
668 start_byte + scan_start - base,
669 start_byte + cursor - base);
9169c321
JB
670
671 /* Did we find the target character? */
672 if (cursor < ceiling_addr)
673 {
674 if (--count == 0)
675 {
676 immediate_quit = 0;
fa8ed3e0 677 return BYTE_TO_CHAR (start_byte + cursor - base + 1);
9169c321
JB
678 }
679 cursor++;
680 }
681 }
682
fa8ed3e0 683 start = BYTE_TO_CHAR (start_byte + cursor - base);
9169c321 684 }
ca1d1d23
JB
685 }
686 else
9169c321
JB
687 while (start > end)
688 {
689 /* The last character to check before the next obstacle. */
fa8ed3e0
RS
690 int ceiling_byte = CHAR_TO_BYTE (end);
691 int start_byte = CHAR_TO_BYTE (start);
67ce527d 692 int tem;
9169c321
JB
693
694 /* Consult the newline cache, if appropriate. */
695 if (target == '\n' && newline_cache)
696 {
697 int next_change;
698 immediate_quit = 0;
699 while (region_cache_backward
fa8ed3e0
RS
700 (current_buffer, newline_cache, start_byte, &next_change))
701 start_byte = next_change;
cbe0db0d 702 immediate_quit = allow_quit;
9169c321
JB
703
704 /* Start should never be at or before end. */
fa8ed3e0
RS
705 if (start_byte <= ceiling_byte)
706 start_byte = ceiling_byte + 1;
9169c321
JB
707
708 /* Now the text before start is an unknown region, and
709 next_change is the position of the next known region. */
fa8ed3e0 710 ceiling_byte = max (next_change, ceiling_byte);
9169c321
JB
711 }
712
713 /* Stop scanning before the gap. */
67ce527d
KH
714 tem = BUFFER_FLOOR_OF (start_byte - 1);
715 ceiling_byte = max (tem, ceiling_byte);
9169c321
JB
716
717 {
718 /* The termination address of the dumb loop. */
fa8ed3e0
RS
719 register unsigned char *ceiling_addr = BYTE_POS_ADDR (ceiling_byte);
720 register unsigned char *cursor = BYTE_POS_ADDR (start_byte - 1);
9169c321
JB
721 unsigned char *base = cursor;
722
723 while (cursor >= ceiling_addr)
724 {
725 unsigned char *scan_start = cursor;
726
727 while (*cursor != target && --cursor >= ceiling_addr)
728 ;
729
730 /* If we're looking for newlines, cache the fact that
731 the region from after the cursor to start is free of them. */
732 if (target == '\n' && newline_cache)
733 know_region_cache (current_buffer, newline_cache,
fa8ed3e0
RS
734 start_byte + cursor - base,
735 start_byte + scan_start - base);
9169c321
JB
736
737 /* Did we find the target character? */
738 if (cursor >= ceiling_addr)
739 {
740 if (++count >= 0)
741 {
742 immediate_quit = 0;
fa8ed3e0 743 return BYTE_TO_CHAR (start_byte + cursor - base);
9169c321
JB
744 }
745 cursor--;
746 }
747 }
748
fa8ed3e0 749 start = BYTE_TO_CHAR (start_byte + cursor - base);
9169c321
JB
750 }
751 }
752
ca1d1d23
JB
753 immediate_quit = 0;
754 if (shortage != 0)
ffd56f97 755 *shortage = count * direction;
9169c321 756 return start;
ca1d1d23 757}
fa8ed3e0
RS
758\f
759/* Search for COUNT instances of a line boundary, which means either a
760 newline or (if selective display enabled) a carriage return.
761 Start at START. If COUNT is negative, search backwards.
762
763 We report the resulting position by calling TEMP_SET_PT_BOTH.
764
765 If we find COUNT instances. we position after (always after,
766 even if scanning backwards) the COUNTth match, and return 0.
767
768 If we don't find COUNT instances before reaching the end of the
769 buffer (or the beginning, if scanning backwards), we return
770 the number of line boundaries left unfound, and position at
771 the limit we bumped up against.
772
773 If ALLOW_QUIT is non-zero, set immediate_quit. That's good to do
d5d57b92 774 except in special cases. */
ca1d1d23 775
63fa018d 776int
fa8ed3e0
RS
777scan_newline (start, start_byte, limit, limit_byte, count, allow_quit)
778 int start, start_byte;
779 int limit, limit_byte;
780 register int count;
781 int allow_quit;
63fa018d 782{
fa8ed3e0
RS
783 int direction = ((count > 0) ? 1 : -1);
784
785 register unsigned char *cursor;
786 unsigned char *base;
787
788 register int ceiling;
789 register unsigned char *ceiling_addr;
790
d5d57b92
RS
791 int old_immediate_quit = immediate_quit;
792
fa8ed3e0
RS
793 /* The code that follows is like scan_buffer
794 but checks for either newline or carriage return. */
795
d5d57b92
RS
796 if (allow_quit)
797 immediate_quit++;
fa8ed3e0
RS
798
799 start_byte = CHAR_TO_BYTE (start);
800
801 if (count > 0)
802 {
803 while (start_byte < limit_byte)
804 {
805 ceiling = BUFFER_CEILING_OF (start_byte);
806 ceiling = min (limit_byte - 1, ceiling);
807 ceiling_addr = BYTE_POS_ADDR (ceiling) + 1;
808 base = (cursor = BYTE_POS_ADDR (start_byte));
809 while (1)
810 {
811 while (*cursor != '\n' && ++cursor != ceiling_addr)
812 ;
813
814 if (cursor != ceiling_addr)
815 {
816 if (--count == 0)
817 {
d5d57b92 818 immediate_quit = old_immediate_quit;
fa8ed3e0
RS
819 start_byte = start_byte + cursor - base + 1;
820 start = BYTE_TO_CHAR (start_byte);
821 TEMP_SET_PT_BOTH (start, start_byte);
822 return 0;
823 }
824 else
825 if (++cursor == ceiling_addr)
826 break;
827 }
828 else
829 break;
830 }
831 start_byte += cursor - base;
832 }
833 }
834 else
835 {
fa8ed3e0
RS
836 while (start_byte > limit_byte)
837 {
838 ceiling = BUFFER_FLOOR_OF (start_byte - 1);
839 ceiling = max (limit_byte, ceiling);
840 ceiling_addr = BYTE_POS_ADDR (ceiling) - 1;
841 base = (cursor = BYTE_POS_ADDR (start_byte - 1) + 1);
842 while (1)
843 {
844 while (--cursor != ceiling_addr && *cursor != '\n')
845 ;
846
847 if (cursor != ceiling_addr)
848 {
849 if (++count == 0)
850 {
d5d57b92 851 immediate_quit = old_immediate_quit;
fa8ed3e0
RS
852 /* Return the position AFTER the match we found. */
853 start_byte = start_byte + cursor - base + 1;
854 start = BYTE_TO_CHAR (start_byte);
855 TEMP_SET_PT_BOTH (start, start_byte);
856 return 0;
857 }
858 }
859 else
860 break;
861 }
862 /* Here we add 1 to compensate for the last decrement
863 of CURSOR, which took it past the valid range. */
864 start_byte += cursor - base + 1;
865 }
866 }
867
868 TEMP_SET_PT_BOTH (limit, limit_byte);
d5d57b92 869 immediate_quit = old_immediate_quit;
fa8ed3e0
RS
870
871 return count * direction;
63fa018d
RS
872}
873
ca1d1d23 874int
fa8ed3e0 875find_next_newline_no_quit (from, cnt)
ca1d1d23
JB
876 register int from, cnt;
877{
fa8ed3e0 878 return scan_buffer ('\n', from, 0, cnt, (int *) 0, 0);
9169c321
JB
879}
880
9169c321
JB
881/* Like find_next_newline, but returns position before the newline,
882 not after, and only search up to TO. This isn't just
883 find_next_newline (...)-1, because you might hit TO. */
fa8ed3e0 884
9169c321
JB
885int
886find_before_next_newline (from, to, cnt)
cbe0db0d 887 int from, to, cnt;
9169c321
JB
888{
889 int shortage;
890 int pos = scan_buffer ('\n', from, to, cnt, &shortage, 1);
891
892 if (shortage == 0)
893 pos--;
177c0ea7 894
9169c321 895 return pos;
ca1d1d23
JB
896}
897\f
ca1d1d23
JB
898/* Subroutines of Lisp buffer search functions. */
899
900static Lisp_Object
b819a390 901search_command (string, bound, noerror, count, direction, RE, posix)
ca1d1d23
JB
902 Lisp_Object string, bound, noerror, count;
903 int direction;
904 int RE;
b819a390 905 int posix;
ca1d1d23
JB
906{
907 register int np;
9f43ad85 908 int lim, lim_byte;
ca1d1d23
JB
909 int n = direction;
910
911 if (!NILP (count))
912 {
b7826503 913 CHECK_NUMBER (count);
ca1d1d23
JB
914 n *= XINT (count);
915 }
916
b7826503 917 CHECK_STRING (string);
ca1d1d23 918 if (NILP (bound))
9f43ad85
RS
919 {
920 if (n > 0)
921 lim = ZV, lim_byte = ZV_BYTE;
922 else
923 lim = BEGV, lim_byte = BEGV_BYTE;
924 }
ca1d1d23
JB
925 else
926 {
b7826503 927 CHECK_NUMBER_COERCE_MARKER (bound);
ca1d1d23 928 lim = XINT (bound);
6ec8bbd2 929 if (n > 0 ? lim < PT : lim > PT)
ca1d1d23
JB
930 error ("Invalid search bound (wrong side of point)");
931 if (lim > ZV)
9f43ad85 932 lim = ZV, lim_byte = ZV_BYTE;
588d2fd5 933 else if (lim < BEGV)
9f43ad85 934 lim = BEGV, lim_byte = BEGV_BYTE;
588d2fd5
KH
935 else
936 lim_byte = CHAR_TO_BYTE (lim);
ca1d1d23
JB
937 }
938
9f43ad85 939 np = search_buffer (string, PT, PT_BYTE, lim, lim_byte, n, RE,
ca1d1d23 940 (!NILP (current_buffer->case_fold_search)
facdc750 941 ? current_buffer->case_canon_table
3135e9fd 942 : Qnil),
ca1d1d23 943 (!NILP (current_buffer->case_fold_search)
facdc750 944 ? current_buffer->case_eqv_table
3135e9fd 945 : Qnil),
b819a390 946 posix);
ca1d1d23
JB
947 if (np <= 0)
948 {
949 if (NILP (noerror))
950 return signal_failure (string);
951 if (!EQ (noerror, Qt))
952 {
953 if (lim < BEGV || lim > ZV)
954 abort ();
9f43ad85 955 SET_PT_BOTH (lim, lim_byte);
a5f217b8
RS
956 return Qnil;
957#if 0 /* This would be clean, but maybe programs depend on
958 a value of nil here. */
481399bf 959 np = lim;
a5f217b8 960#endif
ca1d1d23 961 }
481399bf
RS
962 else
963 return Qnil;
ca1d1d23
JB
964 }
965
966 if (np < BEGV || np > ZV)
967 abort ();
968
969 SET_PT (np);
970
971 return make_number (np);
972}
973\f
fa8ed3e0
RS
974/* Return 1 if REGEXP it matches just one constant string. */
975
b6d6a51c
KH
976static int
977trivial_regexp_p (regexp)
978 Lisp_Object regexp;
979{
d5db4077
KR
980 int len = SBYTES (regexp);
981 unsigned char *s = SDATA (regexp);
b6d6a51c
KH
982 while (--len >= 0)
983 {
984 switch (*s++)
985 {
986 case '.': case '*': case '+': case '?': case '[': case '^': case '$':
987 return 0;
988 case '\\':
989 if (--len < 0)
990 return 0;
991 switch (*s++)
992 {
993 case '|': case '(': case ')': case '`': case '\'': case 'b':
994 case 'B': case '<': case '>': case 'w': case 'W': case 's':
29f89fe7 995 case 'S': case '=': case '{': case '}': case '_':
5679531d 996 case 'c': case 'C': /* for categoryspec and notcategoryspec */
866f60fd 997 case '1': case '2': case '3': case '4': case '5':
b6d6a51c
KH
998 case '6': case '7': case '8': case '9':
999 return 0;
1000 }
1001 }
1002 }
1003 return 1;
1004}
1005
ca325161 1006/* Search for the n'th occurrence of STRING in the current buffer,
ca1d1d23 1007 starting at position POS and stopping at position LIM,
b819a390 1008 treating STRING as a literal string if RE is false or as
ca1d1d23
JB
1009 a regular expression if RE is true.
1010
1011 If N is positive, searching is forward and LIM must be greater than POS.
1012 If N is negative, searching is backward and LIM must be less than POS.
1013
facdc750 1014 Returns -x if x occurrences remain to be found (x > 0),
ca1d1d23 1015 or else the position at the beginning of the Nth occurrence
b819a390
RS
1016 (if searching backward) or the end (if searching forward).
1017
1018 POSIX is nonzero if we want full backtracking (POSIX style)
1019 for this pattern. 0 means backtrack only enough to get a valid match. */
ca1d1d23 1020
aff2ce94
RS
1021#define TRANSLATE(out, trt, d) \
1022do \
1023 { \
1024 if (! NILP (trt)) \
1025 { \
1026 Lisp_Object temp; \
1027 temp = Faref (trt, make_number (d)); \
1028 if (INTEGERP (temp)) \
1029 out = XINT (temp); \
1030 else \
1031 out = d; \
1032 } \
1033 else \
1034 out = d; \
1035 } \
1036while (0)
facdc750 1037
b819a390 1038static int
9f43ad85
RS
1039search_buffer (string, pos, pos_byte, lim, lim_byte, n,
1040 RE, trt, inverse_trt, posix)
ca1d1d23
JB
1041 Lisp_Object string;
1042 int pos;
9f43ad85 1043 int pos_byte;
ca1d1d23 1044 int lim;
9f43ad85 1045 int lim_byte;
ca1d1d23
JB
1046 int n;
1047 int RE;
facdc750
RS
1048 Lisp_Object trt;
1049 Lisp_Object inverse_trt;
b819a390 1050 int posix;
ca1d1d23 1051{
d5db4077
KR
1052 int len = SCHARS (string);
1053 int len_byte = SBYTES (string);
facdc750 1054 register int i;
ca1d1d23 1055
7074fde6
FP
1056 if (running_asynch_code)
1057 save_search_regs ();
1058
a7e4cdde 1059 /* Searching 0 times means don't move. */
ca1d1d23 1060 /* Null string is found at starting position. */
a7e4cdde 1061 if (len == 0 || n == 0)
ca325161 1062 {
0353b28f 1063 set_search_regs (pos_byte, 0);
ca325161
RS
1064 return pos;
1065 }
3f57a499 1066
41a33295 1067 if (RE && !(trivial_regexp_p (string) && NILP (Vsearch_spaces_regexp)))
ca1d1d23 1068 {
facdc750
RS
1069 unsigned char *p1, *p2;
1070 int s1, s2;
487282dc
KH
1071 struct re_pattern_buffer *bufp;
1072
0c8533c6
RS
1073 bufp = compile_pattern (string, &search_regs, trt, posix,
1074 !NILP (current_buffer->enable_multibyte_characters));
ca1d1d23 1075
ca1d1d23
JB
1076 immediate_quit = 1; /* Quit immediately if user types ^G,
1077 because letting this function finish
1078 can take too long. */
1079 QUIT; /* Do a pending quit right away,
1080 to avoid paradoxical behavior */
1081 /* Get pointers and sizes of the two strings
1082 that make up the visible portion of the buffer. */
1083
1084 p1 = BEGV_ADDR;
fa8ed3e0 1085 s1 = GPT_BYTE - BEGV_BYTE;
ca1d1d23 1086 p2 = GAP_END_ADDR;
fa8ed3e0 1087 s2 = ZV_BYTE - GPT_BYTE;
ca1d1d23
JB
1088 if (s1 < 0)
1089 {
1090 p2 = p1;
fa8ed3e0 1091 s2 = ZV_BYTE - BEGV_BYTE;
ca1d1d23
JB
1092 s1 = 0;
1093 }
1094 if (s2 < 0)
1095 {
fa8ed3e0 1096 s1 = ZV_BYTE - BEGV_BYTE;
ca1d1d23
JB
1097 s2 = 0;
1098 }
8bb43c28 1099 re_match_object = Qnil;
177c0ea7 1100
ca1d1d23
JB
1101 while (n < 0)
1102 {
42db823b 1103 int val;
487282dc 1104 val = re_search_2 (bufp, (char *) p1, s1, (char *) p2, s2,
4996330b
KH
1105 pos_byte - BEGV_BYTE, lim_byte - pos_byte,
1106 &search_regs,
42db823b 1107 /* Don't allow match past current point */
4996330b 1108 pos_byte - BEGV_BYTE);
ca1d1d23 1109 if (val == -2)
b6d6a51c
KH
1110 {
1111 matcher_overflow ();
1112 }
ca1d1d23
JB
1113 if (val >= 0)
1114 {
26aff150 1115 pos_byte = search_regs.start[0] + BEGV_BYTE;
4746118a 1116 for (i = 0; i < search_regs.num_regs; i++)
ca1d1d23
JB
1117 if (search_regs.start[i] >= 0)
1118 {
fa8ed3e0
RS
1119 search_regs.start[i]
1120 = BYTE_TO_CHAR (search_regs.start[i] + BEGV_BYTE);
1121 search_regs.end[i]
1122 = BYTE_TO_CHAR (search_regs.end[i] + BEGV_BYTE);
ca1d1d23 1123 }
a3668d92 1124 XSETBUFFER (last_thing_searched, current_buffer);
ca1d1d23
JB
1125 /* Set pos to the new position. */
1126 pos = search_regs.start[0];
1127 }
1128 else
1129 {
1130 immediate_quit = 0;
1131 return (n);
1132 }
1133 n++;
1134 }
1135 while (n > 0)
1136 {
42db823b 1137 int val;
487282dc 1138 val = re_search_2 (bufp, (char *) p1, s1, (char *) p2, s2,
4996330b
KH
1139 pos_byte - BEGV_BYTE, lim_byte - pos_byte,
1140 &search_regs,
1141 lim_byte - BEGV_BYTE);
ca1d1d23 1142 if (val == -2)
b6d6a51c
KH
1143 {
1144 matcher_overflow ();
1145 }
ca1d1d23
JB
1146 if (val >= 0)
1147 {
26aff150 1148 pos_byte = search_regs.end[0] + BEGV_BYTE;
4746118a 1149 for (i = 0; i < search_regs.num_regs; i++)
ca1d1d23
JB
1150 if (search_regs.start[i] >= 0)
1151 {
fa8ed3e0
RS
1152 search_regs.start[i]
1153 = BYTE_TO_CHAR (search_regs.start[i] + BEGV_BYTE);
1154 search_regs.end[i]
1155 = BYTE_TO_CHAR (search_regs.end[i] + BEGV_BYTE);
ca1d1d23 1156 }
a3668d92 1157 XSETBUFFER (last_thing_searched, current_buffer);
ca1d1d23
JB
1158 pos = search_regs.end[0];
1159 }
1160 else
1161 {
1162 immediate_quit = 0;
1163 return (0 - n);
1164 }
1165 n--;
1166 }
1167 immediate_quit = 0;
1168 return (pos);
1169 }
1170 else /* non-RE case */
1171 {
facdc750
RS
1172 unsigned char *raw_pattern, *pat;
1173 int raw_pattern_size;
1174 int raw_pattern_size_byte;
1175 unsigned char *patbuf;
1176 int multibyte = !NILP (current_buffer->enable_multibyte_characters);
d5db4077 1177 unsigned char *base_pat = SDATA (string);
0190922f
KH
1178 /* Set to nozero if we find a non-ASCII char that need
1179 translation. */
1180 int charset_base = 0;
040272ce 1181 int boyer_moore_ok = 1;
facdc750
RS
1182
1183 /* MULTIBYTE says whether the text to be searched is multibyte.
1184 We must convert PATTERN to match that, or we will not really
1185 find things right. */
1186
1187 if (multibyte == STRING_MULTIBYTE (string))
1188 {
d5db4077
KR
1189 raw_pattern = (unsigned char *) SDATA (string);
1190 raw_pattern_size = SCHARS (string);
1191 raw_pattern_size_byte = SBYTES (string);
facdc750
RS
1192 }
1193 else if (multibyte)
1194 {
d5db4077 1195 raw_pattern_size = SCHARS (string);
facdc750 1196 raw_pattern_size_byte
d5db4077 1197 = count_size_as_multibyte (SDATA (string),
facdc750 1198 raw_pattern_size);
7276d3d8 1199 raw_pattern = (unsigned char *) alloca (raw_pattern_size_byte + 1);
d5db4077
KR
1200 copy_text (SDATA (string), raw_pattern,
1201 SCHARS (string), 0, 1);
facdc750
RS
1202 }
1203 else
1204 {
1205 /* Converting multibyte to single-byte.
1206
1207 ??? Perhaps this conversion should be done in a special way
1208 by subtracting nonascii-insert-offset from each non-ASCII char,
1209 so that only the multibyte chars which really correspond to
1210 the chosen single-byte character set can possibly match. */
d5db4077
KR
1211 raw_pattern_size = SCHARS (string);
1212 raw_pattern_size_byte = SCHARS (string);
7276d3d8 1213 raw_pattern = (unsigned char *) alloca (raw_pattern_size + 1);
d5db4077
KR
1214 copy_text (SDATA (string), raw_pattern,
1215 SBYTES (string), 1, 0);
facdc750
RS
1216 }
1217
1218 /* Copy and optionally translate the pattern. */
1219 len = raw_pattern_size;
1220 len_byte = raw_pattern_size_byte;
1221 patbuf = (unsigned char *) alloca (len_byte);
1222 pat = patbuf;
1223 base_pat = raw_pattern;
1224 if (multibyte)
1225 {
0190922f
KH
1226 /* Fill patbuf by translated characters in STRING while
1227 checking if we can use boyer-moore search. If TRT is
1228 non-nil, we can use boyer-moore search only if TRT can be
1229 represented by the byte array of 256 elements. For that,
1230 all non-ASCII case-equivalents of all case-senstive
1231 characters in STRING must belong to the same charset and
1232 row. */
1233
facdc750
RS
1234 while (--len >= 0)
1235 {
0190922f 1236 unsigned char str_base[MAX_MULTIBYTE_LENGTH], *str;
aff2ce94 1237 int c, translated, inverse;
facdc750
RS
1238 int in_charlen, charlen;
1239
1240 /* If we got here and the RE flag is set, it's because we're
1241 dealing with a regexp known to be trivial, so the backslash
1242 just quotes the next character. */
1243 if (RE && *base_pat == '\\')
1244 {
1245 len--;
0190922f 1246 raw_pattern_size--;
facdc750
RS
1247 len_byte--;
1248 base_pat++;
1249 }
1250
1251 c = STRING_CHAR_AND_LENGTH (base_pat, len_byte, in_charlen);
040272ce 1252
0190922f 1253 if (NILP (trt))
facdc750 1254 {
0190922f
KH
1255 str = base_pat;
1256 charlen = in_charlen;
facdc750 1257 }
0190922f 1258 else
facdc750 1259 {
0190922f
KH
1260 /* Translate the character. */
1261 TRANSLATE (translated, trt, c);
1262 charlen = CHAR_STRING (translated, str_base);
1263 str = str_base;
1264
1265 /* Check if C has any other case-equivalents. */
1266 TRANSLATE (inverse, inverse_trt, c);
1267 /* If so, check if we can use boyer-moore. */
1268 if (c != inverse && boyer_moore_ok)
1269 {
1270 /* Check if all equivalents belong to the same
1271 charset & row. Note that the check of C
1272 itself is done by the last iteration. Note
1273 also that we don't have to check ASCII
1274 characters because boyer-moore search can
1275 always handle their translation. */
1276 while (1)
1277 {
1278 if (! ASCII_BYTE_P (inverse))
1279 {
1280 if (SINGLE_BYTE_CHAR_P (inverse))
1281 {
1282 /* Boyer-moore search can't handle a
1283 translation of an eight-bit
1284 character. */
1285 boyer_moore_ok = 0;
1286 break;
1287 }
1288 else if (charset_base == 0)
1289 charset_base = inverse & ~CHAR_FIELD3_MASK;
1290 else if ((inverse & ~CHAR_FIELD3_MASK)
1291 != charset_base)
1292 {
1293 boyer_moore_ok = 0;
1294 break;
1295 }
1296 }
1297 if (c == inverse)
1298 break;
1299 TRANSLATE (inverse, inverse_trt, inverse);
1300 }
1301 }
aff2ce94 1302 }
facdc750
RS
1303
1304 /* Store this character into the translated pattern. */
1305 bcopy (str, pat, charlen);
1306 pat += charlen;
1307 base_pat += in_charlen;
1308 len_byte -= in_charlen;
1309 }
1310 }
1311 else
1312 {
040272ce
KH
1313 /* Unibyte buffer. */
1314 charset_base = 0;
facdc750
RS
1315 while (--len >= 0)
1316 {
040272ce 1317 int c, translated;
facdc750
RS
1318
1319 /* If we got here and the RE flag is set, it's because we're
1320 dealing with a regexp known to be trivial, so the backslash
1321 just quotes the next character. */
1322 if (RE && *base_pat == '\\')
1323 {
1324 len--;
0190922f 1325 raw_pattern_size--;
facdc750
RS
1326 base_pat++;
1327 }
1328 c = *base_pat++;
aff2ce94 1329 TRANSLATE (translated, trt, c);
facdc750
RS
1330 *pat++ = translated;
1331 }
1332 }
1333
1334 len_byte = pat - patbuf;
1335 len = raw_pattern_size;
1336 pat = base_pat = patbuf;
1337
040272ce 1338 if (boyer_moore_ok)
facdc750 1339 return boyer_moore (n, pat, len, len_byte, trt, inverse_trt,
aff2ce94
RS
1340 pos, pos_byte, lim, lim_byte,
1341 charset_base);
facdc750
RS
1342 else
1343 return simple_search (n, pat, len, len_byte, trt,
1344 pos, pos_byte, lim, lim_byte);
1345 }
1346}
1347\f
1348/* Do a simple string search N times for the string PAT,
1349 whose length is LEN/LEN_BYTE,
1350 from buffer position POS/POS_BYTE until LIM/LIM_BYTE.
1351 TRT is the translation table.
f8bd51c4 1352
facdc750
RS
1353 Return the character position where the match is found.
1354 Otherwise, if M matches remained to be found, return -M.
f8bd51c4 1355
facdc750
RS
1356 This kind of search works regardless of what is in PAT and
1357 regardless of what is in TRT. It is used in cases where
1358 boyer_moore cannot work. */
1359
1360static int
1361simple_search (n, pat, len, len_byte, trt, pos, pos_byte, lim, lim_byte)
1362 int n;
1363 unsigned char *pat;
1364 int len, len_byte;
1365 Lisp_Object trt;
1366 int pos, pos_byte;
1367 int lim, lim_byte;
1368{
1369 int multibyte = ! NILP (current_buffer->enable_multibyte_characters);
ab228c24 1370 int forward = n > 0;
facdc750
RS
1371
1372 if (lim > pos && multibyte)
1373 while (n > 0)
1374 {
1375 while (1)
f8bd51c4 1376 {
facdc750
RS
1377 /* Try matching at position POS. */
1378 int this_pos = pos;
1379 int this_pos_byte = pos_byte;
1380 int this_len = len;
1381 int this_len_byte = len_byte;
1382 unsigned char *p = pat;
1383 if (pos + len > lim)
1384 goto stop;
1385
1386 while (this_len > 0)
1387 {
1388 int charlen, buf_charlen;
ab228c24 1389 int pat_ch, buf_ch;
facdc750 1390
ab228c24 1391 pat_ch = STRING_CHAR_AND_LENGTH (p, this_len_byte, charlen);
facdc750
RS
1392 buf_ch = STRING_CHAR_AND_LENGTH (BYTE_POS_ADDR (this_pos_byte),
1393 ZV_BYTE - this_pos_byte,
1394 buf_charlen);
aff2ce94 1395 TRANSLATE (buf_ch, trt, buf_ch);
facdc750
RS
1396
1397 if (buf_ch != pat_ch)
1398 break;
ab228c24
RS
1399
1400 this_len_byte -= charlen;
1401 this_len--;
1402 p += charlen;
1403
1404 this_pos_byte += buf_charlen;
1405 this_pos++;
facdc750
RS
1406 }
1407
1408 if (this_len == 0)
1409 {
1410 pos += len;
1411 pos_byte += len_byte;
1412 break;
1413 }
1414
1415 INC_BOTH (pos, pos_byte);
f8bd51c4 1416 }
facdc750
RS
1417
1418 n--;
1419 }
1420 else if (lim > pos)
1421 while (n > 0)
1422 {
1423 while (1)
f8bd51c4 1424 {
facdc750
RS
1425 /* Try matching at position POS. */
1426 int this_pos = pos;
1427 int this_len = len;
1428 unsigned char *p = pat;
1429
1430 if (pos + len > lim)
1431 goto stop;
1432
1433 while (this_len > 0)
1434 {
1435 int pat_ch = *p++;
1436 int buf_ch = FETCH_BYTE (this_pos);
aff2ce94 1437 TRANSLATE (buf_ch, trt, buf_ch);
facdc750
RS
1438
1439 if (buf_ch != pat_ch)
1440 break;
ab228c24
RS
1441
1442 this_len--;
1443 this_pos++;
facdc750
RS
1444 }
1445
1446 if (this_len == 0)
1447 {
1448 pos += len;
1449 break;
1450 }
1451
1452 pos++;
f8bd51c4 1453 }
facdc750
RS
1454
1455 n--;
1456 }
1457 /* Backwards search. */
1458 else if (lim < pos && multibyte)
1459 while (n < 0)
1460 {
1461 while (1)
f8bd51c4 1462 {
facdc750
RS
1463 /* Try matching at position POS. */
1464 int this_pos = pos - len;
1465 int this_pos_byte = pos_byte - len_byte;
1466 int this_len = len;
1467 int this_len_byte = len_byte;
1468 unsigned char *p = pat;
1469
1470 if (pos - len < lim)
1471 goto stop;
1472
1473 while (this_len > 0)
1474 {
1475 int charlen, buf_charlen;
ab228c24 1476 int pat_ch, buf_ch;
facdc750 1477
ab228c24 1478 pat_ch = STRING_CHAR_AND_LENGTH (p, this_len_byte, charlen);
facdc750
RS
1479 buf_ch = STRING_CHAR_AND_LENGTH (BYTE_POS_ADDR (this_pos_byte),
1480 ZV_BYTE - this_pos_byte,
1481 buf_charlen);
aff2ce94 1482 TRANSLATE (buf_ch, trt, buf_ch);
facdc750
RS
1483
1484 if (buf_ch != pat_ch)
1485 break;
ab228c24
RS
1486
1487 this_len_byte -= charlen;
1488 this_len--;
1489 p += charlen;
1490 this_pos_byte += buf_charlen;
1491 this_pos++;
facdc750
RS
1492 }
1493
1494 if (this_len == 0)
1495 {
1496 pos -= len;
1497 pos_byte -= len_byte;
1498 break;
1499 }
1500
1501 DEC_BOTH (pos, pos_byte);
f8bd51c4
KH
1502 }
1503
facdc750
RS
1504 n++;
1505 }
1506 else if (lim < pos)
1507 while (n < 0)
1508 {
1509 while (1)
b6d6a51c 1510 {
facdc750
RS
1511 /* Try matching at position POS. */
1512 int this_pos = pos - len;
1513 int this_len = len;
1514 unsigned char *p = pat;
1515
1516 if (pos - len < lim)
1517 goto stop;
1518
1519 while (this_len > 0)
1520 {
1521 int pat_ch = *p++;
1522 int buf_ch = FETCH_BYTE (this_pos);
aff2ce94 1523 TRANSLATE (buf_ch, trt, buf_ch);
facdc750
RS
1524
1525 if (buf_ch != pat_ch)
1526 break;
ab228c24
RS
1527 this_len--;
1528 this_pos++;
facdc750
RS
1529 }
1530
1531 if (this_len == 0)
b6d6a51c 1532 {
facdc750
RS
1533 pos -= len;
1534 break;
b6d6a51c 1535 }
facdc750
RS
1536
1537 pos--;
b6d6a51c 1538 }
facdc750
RS
1539
1540 n++;
b6d6a51c 1541 }
facdc750
RS
1542
1543 stop:
1544 if (n == 0)
aff2ce94 1545 {
ab228c24
RS
1546 if (forward)
1547 set_search_regs ((multibyte ? pos_byte : pos) - len_byte, len_byte);
1548 else
1549 set_search_regs (multibyte ? pos_byte : pos, len_byte);
aff2ce94
RS
1550
1551 return pos;
1552 }
facdc750
RS
1553 else if (n > 0)
1554 return -n;
1555 else
1556 return n;
1557}
1558\f
0190922f 1559/* Do Boyer-Moore search N times for the string BASE_PAT,
facdc750
RS
1560 whose length is LEN/LEN_BYTE,
1561 from buffer position POS/POS_BYTE until LIM/LIM_BYTE.
1562 DIRECTION says which direction we search in.
1563 TRT and INVERSE_TRT are translation tables.
0190922f 1564 Characters in PAT are already translated by TRT.
facdc750 1565
0190922f
KH
1566 This kind of search works if all the characters in BASE_PAT that
1567 have nontrivial translation are the same aside from the last byte.
1568 This makes it possible to translate just the last byte of a
1569 character, and do so after just a simple test of the context.
1570 CHARSET_BASE is nonzero iff there is such a non-ASCII character.
facdc750
RS
1571
1572 If that criterion is not satisfied, do not call this function. */
1573
1574static int
1575boyer_moore (n, base_pat, len, len_byte, trt, inverse_trt,
aff2ce94 1576 pos, pos_byte, lim, lim_byte, charset_base)
facdc750
RS
1577 int n;
1578 unsigned char *base_pat;
1579 int len, len_byte;
1580 Lisp_Object trt;
1581 Lisp_Object inverse_trt;
1582 int pos, pos_byte;
1583 int lim, lim_byte;
aff2ce94 1584 int charset_base;
facdc750
RS
1585{
1586 int direction = ((n > 0) ? 1 : -1);
1587 register int dirlen;
a968f437 1588 int infinity, limit, stride_for_teases = 0;
facdc750
RS
1589 register int *BM_tab;
1590 int *BM_tab_base;
177c0ea7 1591 register unsigned char *cursor, *p_limit;
facdc750 1592 register int i, j;
cb6792d2 1593 unsigned char *pat, *pat_end;
facdc750
RS
1594 int multibyte = ! NILP (current_buffer->enable_multibyte_characters);
1595
1596 unsigned char simple_translate[0400];
0190922f
KH
1597 /* These are set to the preceding bytes of a byte to be translated
1598 if charset_base is nonzero. As the maximum byte length of a
1599 multibyte character is 4, we have to check at most three previous
1600 bytes. */
1601 int translate_prev_byte1 = 0;
1602 int translate_prev_byte2 = 0;
1603 int translate_prev_byte3 = 0;
facdc750
RS
1604
1605#ifdef C_ALLOCA
1606 int BM_tab_space[0400];
1607 BM_tab = &BM_tab_space[0];
1608#else
1609 BM_tab = (int *) alloca (0400 * sizeof (int));
1610#endif
1611 /* The general approach is that we are going to maintain that we know */
1612 /* the first (closest to the present position, in whatever direction */
1613 /* we're searching) character that could possibly be the last */
1614 /* (furthest from present position) character of a valid match. We */
1615 /* advance the state of our knowledge by looking at that character */
1616 /* and seeing whether it indeed matches the last character of the */
1617 /* pattern. If it does, we take a closer look. If it does not, we */
1618 /* move our pointer (to putative last characters) as far as is */
1619 /* logically possible. This amount of movement, which I call a */
1620 /* stride, will be the length of the pattern if the actual character */
1621 /* appears nowhere in the pattern, otherwise it will be the distance */
1622 /* from the last occurrence of that character to the end of the */
1623 /* pattern. */
1624 /* As a coding trick, an enormous stride is coded into the table for */
1625 /* characters that match the last character. This allows use of only */
1626 /* a single test, a test for having gone past the end of the */
1627 /* permissible match region, to test for both possible matches (when */
1628 /* the stride goes past the end immediately) and failure to */
177c0ea7 1629 /* match (where you get nudged past the end one stride at a time). */
facdc750
RS
1630
1631 /* Here we make a "mickey mouse" BM table. The stride of the search */
1632 /* is determined only by the last character of the putative match. */
1633 /* If that character does not match, we will stride the proper */
1634 /* distance to propose a match that superimposes it on the last */
1635 /* instance of a character that matches it (per trt), or misses */
177c0ea7 1636 /* it entirely if there is none. */
facdc750
RS
1637
1638 dirlen = len_byte * direction;
1639 infinity = dirlen - (lim_byte + pos_byte + len_byte + len_byte) * direction;
cb6792d2
RS
1640
1641 /* Record position after the end of the pattern. */
1642 pat_end = base_pat + len_byte;
1643 /* BASE_PAT points to a character that we start scanning from.
1644 It is the first character in a forward search,
1645 the last character in a backward search. */
facdc750 1646 if (direction < 0)
cb6792d2
RS
1647 base_pat = pat_end - 1;
1648
facdc750
RS
1649 BM_tab_base = BM_tab;
1650 BM_tab += 0400;
1651 j = dirlen; /* to get it in a register */
1652 /* A character that does not appear in the pattern induces a */
1653 /* stride equal to the pattern length. */
1654 while (BM_tab_base != BM_tab)
1655 {
1656 *--BM_tab = j;
1657 *--BM_tab = j;
1658 *--BM_tab = j;
1659 *--BM_tab = j;
1660 }
1661
1662 /* We use this for translation, instead of TRT itself.
1663 We fill this in to handle the characters that actually
1664 occur in the pattern. Others don't matter anyway! */
1665 bzero (simple_translate, sizeof simple_translate);
1666 for (i = 0; i < 0400; i++)
1667 simple_translate[i] = i;
1668
0190922f
KH
1669 if (charset_base)
1670 {
1671 /* Setup translate_prev_byte1/2/3 from CHARSET_BASE. Only a
1672 byte following them are the target of translation. */
1673 int sample_char = charset_base | 0x20;
1674 unsigned char str[MAX_MULTIBYTE_LENGTH];
1675 int len = CHAR_STRING (sample_char, str);
1676
1677 translate_prev_byte1 = str[len - 2];
1678 if (len > 2)
1679 {
1680 translate_prev_byte2 = str[len - 3];
1681 if (len > 3)
1682 translate_prev_byte3 = str[len - 4];
1683 }
1684 }
1685
facdc750
RS
1686 i = 0;
1687 while (i != infinity)
1688 {
cb6792d2 1689 unsigned char *ptr = base_pat + i;
facdc750
RS
1690 i += direction;
1691 if (i == dirlen)
1692 i = infinity;
1693 if (! NILP (trt))
ca1d1d23 1694 {
620a5178
KH
1695 /* If the byte currently looking at is the last of a
1696 character to check case-equivalents, set CH to that
1697 character. An ASCII character and a non-ASCII character
1698 matching with CHARSET_BASE are to be checked. */
0190922f
KH
1699 int ch = -1;
1700
1701 if (ASCII_BYTE_P (*ptr) || ! multibyte)
1702 ch = *ptr;
620a5178
KH
1703 else if (charset_base
1704 && (pat_end - ptr) == 1 || CHAR_HEAD_P (ptr[1]))
ca1d1d23 1705 {
620a5178
KH
1706 unsigned char *charstart = ptr - 1;
1707
1708 while (! (CHAR_HEAD_P (*charstart)))
1709 charstart--;
1710 ch = STRING_CHAR (charstart, ptr - charstart + 1);
0190922f
KH
1711 if (charset_base != (ch & ~CHAR_FIELD3_MASK))
1712 ch = -1;
ca1d1d23 1713 }
facdc750 1714
620a5178
KH
1715 if (ch > 0400)
1716 j = ((unsigned char) ch) | 0200;
1717 else
1718 j = *ptr;
1719
facdc750
RS
1720 if (i == infinity)
1721 stride_for_teases = BM_tab[j];
ab228c24 1722
facdc750
RS
1723 BM_tab[j] = dirlen - i;
1724 /* A translation table is accompanied by its inverse -- see */
177c0ea7 1725 /* comment following downcase_table for details */
0190922f 1726 if (ch >= 0)
ab228c24
RS
1727 {
1728 int starting_ch = ch;
620a5178 1729 int starting_j = j;
0190922f 1730
ab228c24
RS
1731 while (1)
1732 {
1733 TRANSLATE (ch, inverse_trt, ch);
1734 if (ch > 0400)
1735 j = ((unsigned char) ch) | 0200;
1736 else
1737 j = (unsigned char) ch;
1738
1739 /* For all the characters that map into CH,
1740 set up simple_translate to map the last byte
1741 into STARTING_J. */
1742 simple_translate[j] = starting_j;
1743 if (ch == starting_ch)
1744 break;
1745 BM_tab[j] = dirlen - i;
1746 }
1747 }
facdc750
RS
1748 }
1749 else
1750 {
1751 j = *ptr;
1752
1753 if (i == infinity)
1754 stride_for_teases = BM_tab[j];
1755 BM_tab[j] = dirlen - i;
ca1d1d23 1756 }
facdc750
RS
1757 /* stride_for_teases tells how much to stride if we get a */
1758 /* match on the far character but are subsequently */
1759 /* disappointed, by recording what the stride would have been */
1760 /* for that character if the last character had been */
1761 /* different. */
1762 }
1763 infinity = dirlen - infinity;
1764 pos_byte += dirlen - ((direction > 0) ? direction : 0);
1765 /* loop invariant - POS_BYTE points at where last char (first
1766 char if reverse) of pattern would align in a possible match. */
1767 while (n != 0)
1768 {
1769 int tail_end;
1770 unsigned char *tail_end_ptr;
1771
1772 /* It's been reported that some (broken) compiler thinks that
1773 Boolean expressions in an arithmetic context are unsigned.
1774 Using an explicit ?1:0 prevents this. */
1775 if ((lim_byte - pos_byte - ((direction > 0) ? 1 : 0)) * direction
1776 < 0)
1777 return (n * (0 - direction));
1778 /* First we do the part we can by pointers (maybe nothing) */
1779 QUIT;
1780 pat = base_pat;
1781 limit = pos_byte - dirlen + direction;
67ce527d
KH
1782 if (direction > 0)
1783 {
1784 limit = BUFFER_CEILING_OF (limit);
1785 /* LIMIT is now the last (not beyond-last!) value POS_BYTE
1786 can take on without hitting edge of buffer or the gap. */
1787 limit = min (limit, pos_byte + 20000);
1788 limit = min (limit, lim_byte - 1);
1789 }
1790 else
1791 {
1792 limit = BUFFER_FLOOR_OF (limit);
1793 /* LIMIT is now the last (not beyond-last!) value POS_BYTE
1794 can take on without hitting edge of buffer or the gap. */
1795 limit = max (limit, pos_byte - 20000);
1796 limit = max (limit, lim_byte);
1797 }
facdc750
RS
1798 tail_end = BUFFER_CEILING_OF (pos_byte) + 1;
1799 tail_end_ptr = BYTE_POS_ADDR (tail_end);
1800
1801 if ((limit - pos_byte) * direction > 20)
ca1d1d23 1802 {
facdc750
RS
1803 unsigned char *p2;
1804
1805 p_limit = BYTE_POS_ADDR (limit);
1806 p2 = (cursor = BYTE_POS_ADDR (pos_byte));
1807 /* In this loop, pos + cursor - p2 is the surrogate for pos */
1808 while (1) /* use one cursor setting as long as i can */
ca1d1d23 1809 {
facdc750 1810 if (direction > 0) /* worth duplicating */
ca1d1d23 1811 {
facdc750
RS
1812 /* Use signed comparison if appropriate
1813 to make cursor+infinity sure to be > p_limit.
1814 Assuming that the buffer lies in a range of addresses
1815 that are all "positive" (as ints) or all "negative",
1816 either kind of comparison will work as long
1817 as we don't step by infinity. So pick the kind
1818 that works when we do step by infinity. */
1819 if ((EMACS_INT) (p_limit + infinity) > (EMACS_INT) p_limit)
1820 while ((EMACS_INT) cursor <= (EMACS_INT) p_limit)
1821 cursor += BM_tab[*cursor];
ca1d1d23 1822 else
facdc750
RS
1823 while ((EMACS_UINT) cursor <= (EMACS_UINT) p_limit)
1824 cursor += BM_tab[*cursor];
1825 }
1826 else
1827 {
1828 if ((EMACS_INT) (p_limit + infinity) < (EMACS_INT) p_limit)
1829 while ((EMACS_INT) cursor >= (EMACS_INT) p_limit)
1830 cursor += BM_tab[*cursor];
1831 else
1832 while ((EMACS_UINT) cursor >= (EMACS_UINT) p_limit)
1833 cursor += BM_tab[*cursor];
1834 }
ca1d1d23 1835/* If you are here, cursor is beyond the end of the searched region. */
facdc750
RS
1836/* This can happen if you match on the far character of the pattern, */
1837/* because the "stride" of that character is infinity, a number able */
1838/* to throw you well beyond the end of the search. It can also */
1839/* happen if you fail to match within the permitted region and would */
1840/* otherwise try a character beyond that region */
1841 if ((cursor - p_limit) * direction <= len_byte)
1842 break; /* a small overrun is genuine */
1843 cursor -= infinity; /* large overrun = hit */
1844 i = dirlen - direction;
1845 if (! NILP (trt))
1846 {
1847 while ((i -= direction) + direction != 0)
ca1d1d23 1848 {
facdc750
RS
1849 int ch;
1850 cursor -= direction;
1851 /* Translate only the last byte of a character. */
1852 if (! multibyte
1853 || ((cursor == tail_end_ptr
1854 || CHAR_HEAD_P (cursor[1]))
1855 && (CHAR_HEAD_P (cursor[0])
0190922f
KH
1856 /* Check if this is the last byte of
1857 a translable character. */
1858 || (translate_prev_byte1 == cursor[-1]
1859 && (CHAR_HEAD_P (translate_prev_byte1)
1860 || (translate_prev_byte2 == cursor[-2]
1861 && (CHAR_HEAD_P (translate_prev_byte2)
1862 || (translate_prev_byte3 == cursor[-3]))))))))
facdc750
RS
1863 ch = simple_translate[*cursor];
1864 else
1865 ch = *cursor;
1866 if (pat[i] != ch)
1867 break;
ca1d1d23 1868 }
facdc750
RS
1869 }
1870 else
1871 {
1872 while ((i -= direction) + direction != 0)
ca1d1d23 1873 {
facdc750
RS
1874 cursor -= direction;
1875 if (pat[i] != *cursor)
1876 break;
ca1d1d23 1877 }
facdc750
RS
1878 }
1879 cursor += dirlen - i - direction; /* fix cursor */
1880 if (i + direction == 0)
1881 {
1882 int position;
0c8533c6 1883
facdc750 1884 cursor -= direction;
1113d9db 1885
facdc750
RS
1886 position = pos_byte + cursor - p2 + ((direction > 0)
1887 ? 1 - len_byte : 0);
1888 set_search_regs (position, len_byte);
ca325161 1889
facdc750
RS
1890 if ((n -= direction) != 0)
1891 cursor += dirlen; /* to resume search */
ca1d1d23 1892 else
facdc750
RS
1893 return ((direction > 0)
1894 ? search_regs.end[0] : search_regs.start[0]);
ca1d1d23 1895 }
facdc750
RS
1896 else
1897 cursor += stride_for_teases; /* <sigh> we lose - */
ca1d1d23 1898 }
facdc750
RS
1899 pos_byte += cursor - p2;
1900 }
1901 else
1902 /* Now we'll pick up a clump that has to be done the hard */
1903 /* way because it covers a discontinuity */
1904 {
1905 limit = ((direction > 0)
1906 ? BUFFER_CEILING_OF (pos_byte - dirlen + 1)
1907 : BUFFER_FLOOR_OF (pos_byte - dirlen - 1));
1908 limit = ((direction > 0)
1909 ? min (limit + len_byte, lim_byte - 1)
1910 : max (limit - len_byte, lim_byte));
1911 /* LIMIT is now the last value POS_BYTE can have
1912 and still be valid for a possible match. */
1913 while (1)
ca1d1d23 1914 {
facdc750
RS
1915 /* This loop can be coded for space rather than */
1916 /* speed because it will usually run only once. */
1917 /* (the reach is at most len + 21, and typically */
177c0ea7 1918 /* does not exceed len) */
facdc750
RS
1919 while ((limit - pos_byte) * direction >= 0)
1920 pos_byte += BM_tab[FETCH_BYTE (pos_byte)];
1921 /* now run the same tests to distinguish going off the */
1922 /* end, a match or a phony match. */
1923 if ((pos_byte - limit) * direction <= len_byte)
1924 break; /* ran off the end */
1925 /* Found what might be a match.
1926 Set POS_BYTE back to last (first if reverse) pos. */
1927 pos_byte -= infinity;
1928 i = dirlen - direction;
1929 while ((i -= direction) + direction != 0)
ca1d1d23 1930 {
facdc750
RS
1931 int ch;
1932 unsigned char *ptr;
1933 pos_byte -= direction;
1934 ptr = BYTE_POS_ADDR (pos_byte);
1935 /* Translate only the last byte of a character. */
1936 if (! multibyte
1937 || ((ptr == tail_end_ptr
1938 || CHAR_HEAD_P (ptr[1]))
1939 && (CHAR_HEAD_P (ptr[0])
0190922f
KH
1940 /* Check if this is the last byte of a
1941 translable character. */
1942 || (translate_prev_byte1 == ptr[-1]
1943 && (CHAR_HEAD_P (translate_prev_byte1)
1944 || (translate_prev_byte2 == ptr[-2]
1945 && (CHAR_HEAD_P (translate_prev_byte2)
1946 || translate_prev_byte3 == ptr[-3])))))))
facdc750
RS
1947 ch = simple_translate[*ptr];
1948 else
1949 ch = *ptr;
1950 if (pat[i] != ch)
1951 break;
1952 }
1953 /* Above loop has moved POS_BYTE part or all the way
1954 back to the first pos (last pos if reverse).
1955 Set it once again at the last (first if reverse) char. */
1956 pos_byte += dirlen - i- direction;
1957 if (i + direction == 0)
1958 {
1959 int position;
1960 pos_byte -= direction;
1113d9db 1961
facdc750 1962 position = pos_byte + ((direction > 0) ? 1 - len_byte : 0);
0c8533c6 1963
facdc750 1964 set_search_regs (position, len_byte);
ca325161 1965
facdc750
RS
1966 if ((n -= direction) != 0)
1967 pos_byte += dirlen; /* to resume search */
ca1d1d23 1968 else
facdc750
RS
1969 return ((direction > 0)
1970 ? search_regs.end[0] : search_regs.start[0]);
ca1d1d23 1971 }
facdc750
RS
1972 else
1973 pos_byte += stride_for_teases;
1974 }
1975 }
1976 /* We have done one clump. Can we continue? */
1977 if ((lim_byte - pos_byte) * direction < 0)
1978 return ((0 - n) * direction);
ca1d1d23 1979 }
facdc750 1980 return BYTE_TO_CHAR (pos_byte);
ca1d1d23 1981}
ca325161 1982
fa8ed3e0 1983/* Record beginning BEG_BYTE and end BEG_BYTE + NBYTES
a7e4cdde
RS
1984 for the overall match just found in the current buffer.
1985 Also clear out the match data for registers 1 and up. */
ca325161
RS
1986
1987static void
fa8ed3e0
RS
1988set_search_regs (beg_byte, nbytes)
1989 int beg_byte, nbytes;
ca325161 1990{
a7e4cdde
RS
1991 int i;
1992
ca325161
RS
1993 /* Make sure we have registers in which to store
1994 the match position. */
1995 if (search_regs.num_regs == 0)
1996 {
2d4a771a
RS
1997 search_regs.start = (regoff_t *) xmalloc (2 * sizeof (regoff_t));
1998 search_regs.end = (regoff_t *) xmalloc (2 * sizeof (regoff_t));
487282dc 1999 search_regs.num_regs = 2;
ca325161
RS
2000 }
2001
a7e4cdde
RS
2002 /* Clear out the other registers. */
2003 for (i = 1; i < search_regs.num_regs; i++)
2004 {
2005 search_regs.start[i] = -1;
2006 search_regs.end[i] = -1;
2007 }
2008
fa8ed3e0
RS
2009 search_regs.start[0] = BYTE_TO_CHAR (beg_byte);
2010 search_regs.end[0] = BYTE_TO_CHAR (beg_byte + nbytes);
a3668d92 2011 XSETBUFFER (last_thing_searched, current_buffer);
ca325161 2012}
ca1d1d23
JB
2013\f
2014/* Given a string of words separated by word delimiters,
2015 compute a regexp that matches those exact words
2016 separated by arbitrary punctuation. */
2017
2018static Lisp_Object
2019wordify (string)
2020 Lisp_Object string;
2021{
2022 register unsigned char *p, *o;
0c8533c6 2023 register int i, i_byte, len, punct_count = 0, word_count = 0;
ca1d1d23 2024 Lisp_Object val;
0c8533c6
RS
2025 int prev_c = 0;
2026 int adjust;
ca1d1d23 2027
b7826503 2028 CHECK_STRING (string);
d5db4077
KR
2029 p = SDATA (string);
2030 len = SCHARS (string);
ca1d1d23 2031
0c8533c6
RS
2032 for (i = 0, i_byte = 0; i < len; )
2033 {
2034 int c;
177c0ea7 2035
eb99a8dd 2036 FETCH_STRING_CHAR_ADVANCE (c, string, i, i_byte);
0c8533c6
RS
2037
2038 if (SYNTAX (c) != Sword)
2039 {
2040 punct_count++;
2041 if (i > 0 && SYNTAX (prev_c) == Sword)
2042 word_count++;
2043 }
ca1d1d23 2044
0c8533c6
RS
2045 prev_c = c;
2046 }
2047
2048 if (SYNTAX (prev_c) == Sword)
2049 word_count++;
2050 if (!word_count)
b07b65aa 2051 return empty_string;
0c8533c6
RS
2052
2053 adjust = - punct_count + 5 * (word_count - 1) + 4;
8a2df937
RS
2054 if (STRING_MULTIBYTE (string))
2055 val = make_uninit_multibyte_string (len + adjust,
d5db4077 2056 SBYTES (string)
8a2df937
RS
2057 + adjust);
2058 else
2059 val = make_uninit_string (len + adjust);
ca1d1d23 2060
d5db4077 2061 o = SDATA (val);
ca1d1d23
JB
2062 *o++ = '\\';
2063 *o++ = 'b';
1e9582d4 2064 prev_c = 0;
ca1d1d23 2065
1e9582d4
RS
2066 for (i = 0, i_byte = 0; i < len; )
2067 {
2068 int c;
2069 int i_byte_orig = i_byte;
177c0ea7 2070
eb99a8dd 2071 FETCH_STRING_CHAR_ADVANCE (c, string, i, i_byte);
1e9582d4
RS
2072
2073 if (SYNTAX (c) == Sword)
2074 {
5d69fe10 2075 bcopy (SDATA (string) + i_byte_orig, o,
1e9582d4
RS
2076 i_byte - i_byte_orig);
2077 o += i_byte - i_byte_orig;
2078 }
2079 else if (i > 0 && SYNTAX (prev_c) == Sword && --word_count)
2080 {
2081 *o++ = '\\';
2082 *o++ = 'W';
2083 *o++ = '\\';
2084 *o++ = 'W';
2085 *o++ = '*';
2086 }
2087
2088 prev_c = c;
2089 }
ca1d1d23
JB
2090
2091 *o++ = '\\';
2092 *o++ = 'b';
2093
2094 return val;
2095}
2096\f
2097DEFUN ("search-backward", Fsearch_backward, Ssearch_backward, 1, 4,
8c1a1077
PJ
2098 "MSearch backward: ",
2099 doc: /* Search backward from point for STRING.
2100Set point to the beginning of the occurrence found, and return point.
2101An optional second argument bounds the search; it is a buffer position.
2102The match found must not extend before that position.
2103Optional third argument, if t, means if fail just return nil (no error).
2104 If not nil and not t, position at limit of search and return nil.
2105Optional fourth argument is repeat count--search for successive occurrences.
2106
2107Search case-sensitivity is determined by the value of the variable
2108`case-fold-search', which see.
2109
2110See also the functions `match-beginning', `match-end' and `replace-match'. */)
2111 (string, bound, noerror, count)
ca1d1d23
JB
2112 Lisp_Object string, bound, noerror, count;
2113{
b819a390 2114 return search_command (string, bound, noerror, count, -1, 0, 0);
ca1d1d23
JB
2115}
2116
6af43974 2117DEFUN ("search-forward", Fsearch_forward, Ssearch_forward, 1, 4, "MSearch: ",
8c1a1077
PJ
2118 doc: /* Search forward from point for STRING.
2119Set point to the end of the occurrence found, and return point.
2120An optional second argument bounds the search; it is a buffer position.
2121The match found must not extend after that position. nil is equivalent
2122 to (point-max).
2123Optional third argument, if t, means if fail just return nil (no error).
2124 If not nil and not t, move to limit of search and return nil.
2125Optional fourth argument is repeat count--search for successive occurrences.
2126
2127Search case-sensitivity is determined by the value of the variable
2128`case-fold-search', which see.
2129
2130See also the functions `match-beginning', `match-end' and `replace-match'. */)
2131 (string, bound, noerror, count)
ca1d1d23
JB
2132 Lisp_Object string, bound, noerror, count;
2133{
b819a390 2134 return search_command (string, bound, noerror, count, 1, 0, 0);
ca1d1d23
JB
2135}
2136
2137DEFUN ("word-search-backward", Fword_search_backward, Sword_search_backward, 1, 4,
8c1a1077
PJ
2138 "sWord search backward: ",
2139 doc: /* Search backward from point for STRING, ignoring differences in punctuation.
2140Set point to the beginning of the occurrence found, and return point.
2141An optional second argument bounds the search; it is a buffer position.
2142The match found must not extend before that position.
2143Optional third argument, if t, means if fail just return nil (no error).
2144 If not nil and not t, move to limit of search and return nil.
2145Optional fourth argument is repeat count--search for successive occurrences. */)
2146 (string, bound, noerror, count)
ca1d1d23
JB
2147 Lisp_Object string, bound, noerror, count;
2148{
b819a390 2149 return search_command (wordify (string), bound, noerror, count, -1, 1, 0);
ca1d1d23
JB
2150}
2151
2152DEFUN ("word-search-forward", Fword_search_forward, Sword_search_forward, 1, 4,
8c1a1077
PJ
2153 "sWord search: ",
2154 doc: /* Search forward from point for STRING, ignoring differences in punctuation.
2155Set point to the end of the occurrence found, and return point.
2156An optional second argument bounds the search; it is a buffer position.
2157The match found must not extend after that position.
2158Optional third argument, if t, means if fail just return nil (no error).
2159 If not nil and not t, move to limit of search and return nil.
2160Optional fourth argument is repeat count--search for successive occurrences. */)
2161 (string, bound, noerror, count)
ca1d1d23
JB
2162 Lisp_Object string, bound, noerror, count;
2163{
b819a390 2164 return search_command (wordify (string), bound, noerror, count, 1, 1, 0);
ca1d1d23
JB
2165}
2166
2167DEFUN ("re-search-backward", Fre_search_backward, Sre_search_backward, 1, 4,
8c1a1077
PJ
2168 "sRE search backward: ",
2169 doc: /* Search backward from point for match for regular expression REGEXP.
2170Set point to the beginning of the match, and return point.
2171The match found is the one starting last in the buffer
2172and yet ending before the origin of the search.
2173An optional second argument bounds the search; it is a buffer position.
2174The match found must start at or after that position.
2175Optional third argument, if t, means if fail just return nil (no error).
2176 If not nil and not t, move to limit of search and return nil.
2177Optional fourth argument is repeat count--search for successive occurrences.
2178See also the functions `match-beginning', `match-end', `match-string',
2179and `replace-match'. */)
2180 (regexp, bound, noerror, count)
19c0a730 2181 Lisp_Object regexp, bound, noerror, count;
ca1d1d23 2182{
b819a390 2183 return search_command (regexp, bound, noerror, count, -1, 1, 0);
ca1d1d23
JB
2184}
2185
2186DEFUN ("re-search-forward", Fre_search_forward, Sre_search_forward, 1, 4,
8c1a1077
PJ
2187 "sRE search: ",
2188 doc: /* Search forward from point for regular expression REGEXP.
2189Set point to the end of the occurrence found, and return point.
2190An optional second argument bounds the search; it is a buffer position.
2191The match found must not extend after that position.
2192Optional third argument, if t, means if fail just return nil (no error).
2193 If not nil and not t, move to limit of search and return nil.
2194Optional fourth argument is repeat count--search for successive occurrences.
2195See also the functions `match-beginning', `match-end', `match-string',
2196and `replace-match'. */)
2197 (regexp, bound, noerror, count)
19c0a730 2198 Lisp_Object regexp, bound, noerror, count;
ca1d1d23 2199{
b819a390
RS
2200 return search_command (regexp, bound, noerror, count, 1, 1, 0);
2201}
2202
2203DEFUN ("posix-search-backward", Fposix_search_backward, Sposix_search_backward, 1, 4,
8c1a1077
PJ
2204 "sPosix search backward: ",
2205 doc: /* Search backward from point for match for regular expression REGEXP.
2206Find the longest match in accord with Posix regular expression rules.
2207Set point to the beginning of the match, and return point.
2208The match found is the one starting last in the buffer
2209and yet ending before the origin of the search.
2210An optional second argument bounds the search; it is a buffer position.
2211The match found must start at or after that position.
2212Optional third argument, if t, means if fail just return nil (no error).
2213 If not nil and not t, move to limit of search and return nil.
2214Optional fourth argument is repeat count--search for successive occurrences.
2215See also the functions `match-beginning', `match-end', `match-string',
2216and `replace-match'. */)
2217 (regexp, bound, noerror, count)
b819a390
RS
2218 Lisp_Object regexp, bound, noerror, count;
2219{
2220 return search_command (regexp, bound, noerror, count, -1, 1, 1);
2221}
2222
2223DEFUN ("posix-search-forward", Fposix_search_forward, Sposix_search_forward, 1, 4,
8c1a1077
PJ
2224 "sPosix search: ",
2225 doc: /* Search forward from point for regular expression REGEXP.
2226Find the longest match in accord with Posix regular expression rules.
2227Set point to the end of the occurrence found, and return point.
2228An optional second argument bounds the search; it is a buffer position.
2229The match found must not extend after that position.
2230Optional third argument, if t, means if fail just return nil (no error).
2231 If not nil and not t, move to limit of search and return nil.
2232Optional fourth argument is repeat count--search for successive occurrences.
2233See also the functions `match-beginning', `match-end', `match-string',
2234and `replace-match'. */)
2235 (regexp, bound, noerror, count)
b819a390
RS
2236 Lisp_Object regexp, bound, noerror, count;
2237{
2238 return search_command (regexp, bound, noerror, count, 1, 1, 1);
ca1d1d23
JB
2239}
2240\f
d7a5ad5f 2241DEFUN ("replace-match", Freplace_match, Sreplace_match, 1, 5, 0,
8c1a1077 2242 doc: /* Replace text matched by last search with NEWTEXT.
4dd0c271
RS
2243Leave point at the end of the replacement text.
2244
8c1a1077
PJ
2245If second arg FIXEDCASE is non-nil, do not alter case of replacement text.
2246Otherwise maybe capitalize the whole text, or maybe just word initials,
2247based on the replaced text.
2248If the replaced text has only capital letters
2249and has at least one multiletter word, convert NEWTEXT to all caps.
4dd0c271
RS
2250Otherwise if all words are capitalized in the replaced text,
2251capitalize each word in NEWTEXT.
8c1a1077
PJ
2252
2253If third arg LITERAL is non-nil, insert NEWTEXT literally.
2254Otherwise treat `\\' as special:
2255 `\\&' in NEWTEXT means substitute original matched text.
2256 `\\N' means substitute what matched the Nth `\\(...\\)'.
2257 If Nth parens didn't match, substitute nothing.
2258 `\\\\' means insert one `\\'.
4dd0c271
RS
2259Case conversion does not apply to these substitutions.
2260
8c1a1077 2261FIXEDCASE and LITERAL are optional arguments.
8c1a1077
PJ
2262
2263The optional fourth argument STRING can be a string to modify.
2264This is meaningful when the previous match was done against STRING,
2265using `string-match'. When used this way, `replace-match'
2266creates and returns a new string made by copying STRING and replacing
2267the part of STRING that was matched.
2268
2269The optional fifth argument SUBEXP specifies a subexpression;
2270it says to replace just that subexpression with NEWTEXT,
2271rather than replacing the entire matched text.
2272This is, in a vague sense, the inverse of using `\\N' in NEWTEXT;
2273`\\N' copies subexp N into NEWTEXT, but using N as SUBEXP puts
2274NEWTEXT in place of subexp N.
2275This is useful only after a regular expression search or match,
2276since only regular expressions have distinguished subexpressions. */)
2277 (newtext, fixedcase, literal, string, subexp)
d7a5ad5f 2278 Lisp_Object newtext, fixedcase, literal, string, subexp;
ca1d1d23
JB
2279{
2280 enum { nochange, all_caps, cap_initial } case_action;
ac3b28b1 2281 register int pos, pos_byte;
ca1d1d23 2282 int some_multiletter_word;
97832bd0 2283 int some_lowercase;
73dc8771 2284 int some_uppercase;
208767c3 2285 int some_nonuppercase_initial;
ca1d1d23 2286 register int c, prevc;
d7a5ad5f 2287 int sub;
3e18eecf 2288 int opoint, newpoint;
ca1d1d23 2289
b7826503 2290 CHECK_STRING (newtext);
ca1d1d23 2291
080c45fd 2292 if (! NILP (string))
b7826503 2293 CHECK_STRING (string);
080c45fd 2294
ca1d1d23
JB
2295 case_action = nochange; /* We tried an initialization */
2296 /* but some C compilers blew it */
4746118a
JB
2297
2298 if (search_regs.num_regs <= 0)
d72cdbfc 2299 error ("`replace-match' called before any match found");
4746118a 2300
d7a5ad5f
RS
2301 if (NILP (subexp))
2302 sub = 0;
2303 else
2304 {
b7826503 2305 CHECK_NUMBER (subexp);
d7a5ad5f
RS
2306 sub = XINT (subexp);
2307 if (sub < 0 || sub >= search_regs.num_regs)
2308 args_out_of_range (subexp, make_number (search_regs.num_regs));
2309 }
2310
080c45fd
RS
2311 if (NILP (string))
2312 {
d7a5ad5f
RS
2313 if (search_regs.start[sub] < BEGV
2314 || search_regs.start[sub] > search_regs.end[sub]
2315 || search_regs.end[sub] > ZV)
2316 args_out_of_range (make_number (search_regs.start[sub]),
2317 make_number (search_regs.end[sub]));
080c45fd
RS
2318 }
2319 else
2320 {
d7a5ad5f
RS
2321 if (search_regs.start[sub] < 0
2322 || search_regs.start[sub] > search_regs.end[sub]
d5db4077 2323 || search_regs.end[sub] > SCHARS (string))
d7a5ad5f
RS
2324 args_out_of_range (make_number (search_regs.start[sub]),
2325 make_number (search_regs.end[sub]));
080c45fd 2326 }
ca1d1d23
JB
2327
2328 if (NILP (fixedcase))
2329 {
2330 /* Decide how to casify by examining the matched text. */
ac3b28b1 2331 int last;
ca1d1d23 2332
ac3b28b1
KH
2333 pos = search_regs.start[sub];
2334 last = search_regs.end[sub];
fa8ed3e0
RS
2335
2336 if (NILP (string))
ac3b28b1 2337 pos_byte = CHAR_TO_BYTE (pos);
fa8ed3e0 2338 else
ac3b28b1 2339 pos_byte = string_char_to_byte (string, pos);
fa8ed3e0 2340
ca1d1d23
JB
2341 prevc = '\n';
2342 case_action = all_caps;
2343
2344 /* some_multiletter_word is set nonzero if any original word
2345 is more than one letter long. */
2346 some_multiletter_word = 0;
97832bd0 2347 some_lowercase = 0;
208767c3 2348 some_nonuppercase_initial = 0;
73dc8771 2349 some_uppercase = 0;
ca1d1d23 2350
ac3b28b1 2351 while (pos < last)
ca1d1d23 2352 {
080c45fd 2353 if (NILP (string))
ac3b28b1
KH
2354 {
2355 c = FETCH_CHAR (pos_byte);
2356 INC_BOTH (pos, pos_byte);
2357 }
080c45fd 2358 else
ac3b28b1 2359 FETCH_STRING_CHAR_ADVANCE (c, string, pos, pos_byte);
080c45fd 2360
ca1d1d23
JB
2361 if (LOWERCASEP (c))
2362 {
2363 /* Cannot be all caps if any original char is lower case */
2364
97832bd0 2365 some_lowercase = 1;
ca1d1d23 2366 if (SYNTAX (prevc) != Sword)
208767c3 2367 some_nonuppercase_initial = 1;
ca1d1d23
JB
2368 else
2369 some_multiletter_word = 1;
2370 }
2371 else if (!NOCASEP (c))
2372 {
73dc8771 2373 some_uppercase = 1;
97832bd0 2374 if (SYNTAX (prevc) != Sword)
c4d460ce 2375 ;
97832bd0 2376 else
ca1d1d23
JB
2377 some_multiletter_word = 1;
2378 }
208767c3
RS
2379 else
2380 {
2381 /* If the initial is a caseless word constituent,
2382 treat that like a lowercase initial. */
2383 if (SYNTAX (prevc) != Sword)
2384 some_nonuppercase_initial = 1;
2385 }
ca1d1d23
JB
2386
2387 prevc = c;
2388 }
2389
97832bd0
RS
2390 /* Convert to all caps if the old text is all caps
2391 and has at least one multiletter word. */
2392 if (! some_lowercase && some_multiletter_word)
2393 case_action = all_caps;
c4d460ce 2394 /* Capitalize each word, if the old text has all capitalized words. */
208767c3 2395 else if (!some_nonuppercase_initial && some_multiletter_word)
ca1d1d23 2396 case_action = cap_initial;
208767c3 2397 else if (!some_nonuppercase_initial && some_uppercase)
73dc8771
KH
2398 /* Should x -> yz, operating on X, give Yz or YZ?
2399 We'll assume the latter. */
2400 case_action = all_caps;
97832bd0
RS
2401 else
2402 case_action = nochange;
ca1d1d23
JB
2403 }
2404
080c45fd
RS
2405 /* Do replacement in a string. */
2406 if (!NILP (string))
2407 {
2408 Lisp_Object before, after;
2409
2410 before = Fsubstring (string, make_number (0),
d7a5ad5f
RS
2411 make_number (search_regs.start[sub]));
2412 after = Fsubstring (string, make_number (search_regs.end[sub]), Qnil);
080c45fd 2413
636a5e28
RS
2414 /* Substitute parts of the match into NEWTEXT
2415 if desired. */
080c45fd
RS
2416 if (NILP (literal))
2417 {
d131e79c
RS
2418 int lastpos = 0;
2419 int lastpos_byte = 0;
080c45fd
RS
2420 /* We build up the substituted string in ACCUM. */
2421 Lisp_Object accum;
2422 Lisp_Object middle;
d5db4077 2423 int length = SBYTES (newtext);
080c45fd
RS
2424
2425 accum = Qnil;
2426
ac3b28b1 2427 for (pos_byte = 0, pos = 0; pos_byte < length;)
080c45fd
RS
2428 {
2429 int substart = -1;
6bbd7a29 2430 int subend = 0;
1e79ec24 2431 int delbackslash = 0;
080c45fd 2432
0c8533c6
RS
2433 FETCH_STRING_CHAR_ADVANCE (c, newtext, pos, pos_byte);
2434
080c45fd
RS
2435 if (c == '\\')
2436 {
0c8533c6 2437 FETCH_STRING_CHAR_ADVANCE (c, newtext, pos, pos_byte);
177c0ea7 2438
080c45fd
RS
2439 if (c == '&')
2440 {
d7a5ad5f
RS
2441 substart = search_regs.start[sub];
2442 subend = search_regs.end[sub];
080c45fd 2443 }
5fbbc83b 2444 else if (c >= '1' && c <= '9')
080c45fd 2445 {
5fbbc83b
RS
2446 if (search_regs.start[c - '0'] >= 0
2447 && c <= search_regs.num_regs + '0')
080c45fd
RS
2448 {
2449 substart = search_regs.start[c - '0'];
2450 subend = search_regs.end[c - '0'];
2451 }
5fbbc83b
RS
2452 else
2453 {
2454 /* If that subexp did not match,
2455 replace \\N with nothing. */
2456 substart = 0;
2457 subend = 0;
2458 }
080c45fd 2459 }
1e79ec24
KH
2460 else if (c == '\\')
2461 delbackslash = 1;
636a5e28
RS
2462 else
2463 error ("Invalid use of `\\' in replacement text");
080c45fd
RS
2464 }
2465 if (substart >= 0)
2466 {
d131e79c
RS
2467 if (pos - 2 != lastpos)
2468 middle = substring_both (newtext, lastpos,
2469 lastpos_byte,
2470 pos - 2, pos_byte - 2);
080c45fd
RS
2471 else
2472 middle = Qnil;
2473 accum = concat3 (accum, middle,
0c8533c6
RS
2474 Fsubstring (string,
2475 make_number (substart),
080c45fd
RS
2476 make_number (subend)));
2477 lastpos = pos;
0c8533c6 2478 lastpos_byte = pos_byte;
080c45fd 2479 }
1e79ec24
KH
2480 else if (delbackslash)
2481 {
d131e79c
RS
2482 middle = substring_both (newtext, lastpos,
2483 lastpos_byte,
2484 pos - 1, pos_byte - 1);
0c8533c6 2485
1e79ec24
KH
2486 accum = concat2 (accum, middle);
2487 lastpos = pos;
0c8533c6 2488 lastpos_byte = pos_byte;
1e79ec24 2489 }
080c45fd
RS
2490 }
2491
d131e79c
RS
2492 if (pos != lastpos)
2493 middle = substring_both (newtext, lastpos,
2494 lastpos_byte,
0c8533c6 2495 pos, pos_byte);
080c45fd
RS
2496 else
2497 middle = Qnil;
2498
2499 newtext = concat2 (accum, middle);
2500 }
2501
636a5e28 2502 /* Do case substitution in NEWTEXT if desired. */
080c45fd
RS
2503 if (case_action == all_caps)
2504 newtext = Fupcase (newtext);
2505 else if (case_action == cap_initial)
2b2eead9 2506 newtext = Fupcase_initials (newtext);
080c45fd
RS
2507
2508 return concat3 (before, newtext, after);
2509 }
2510
09c4719e 2511 /* Record point, then move (quietly) to the start of the match. */
9160906f 2512 if (PT >= search_regs.end[sub])
b0eba991 2513 opoint = PT - ZV;
9160906f
RS
2514 else if (PT > search_regs.start[sub])
2515 opoint = search_regs.end[sub] - ZV;
b0eba991
RS
2516 else
2517 opoint = PT;
2518
886ed6ec
RS
2519 /* If we want non-literal replacement,
2520 perform substitution on the replacement string. */
2521 if (NILP (literal))
ca1d1d23 2522 {
d5db4077 2523 int length = SBYTES (newtext);
68e69fbd
RS
2524 unsigned char *substed;
2525 int substed_alloc_size, substed_len;
3bc25e52
KH
2526 int buf_multibyte = !NILP (current_buffer->enable_multibyte_characters);
2527 int str_multibyte = STRING_MULTIBYTE (newtext);
2528 Lisp_Object rev_tbl;
886ed6ec 2529 int really_changed = 0;
3bc25e52
KH
2530
2531 rev_tbl= (!buf_multibyte && CHAR_TABLE_P (Vnonascii_translation_table)
2532 ? Fchar_table_extra_slot (Vnonascii_translation_table,
2533 make_number (0))
2534 : Qnil);
ac3b28b1 2535
68e69fbd
RS
2536 substed_alloc_size = length * 2 + 100;
2537 substed = (unsigned char *) xmalloc (substed_alloc_size + 1);
2538 substed_len = 0;
2539
3bc25e52
KH
2540 /* Go thru NEWTEXT, producing the actual text to insert in
2541 SUBSTED while adjusting multibyteness to that of the current
2542 buffer. */
ca1d1d23 2543
ac3b28b1 2544 for (pos_byte = 0, pos = 0; pos_byte < length;)
ca1d1d23 2545 {
68e69fbd 2546 unsigned char str[MAX_MULTIBYTE_LENGTH];
f8ce8a0d
GM
2547 unsigned char *add_stuff = NULL;
2548 int add_len = 0;
68e69fbd 2549 int idx = -1;
9a76659d 2550
3bc25e52
KH
2551 if (str_multibyte)
2552 {
eb99a8dd 2553 FETCH_STRING_CHAR_ADVANCE_NO_CHECK (c, newtext, pos, pos_byte);
3bc25e52
KH
2554 if (!buf_multibyte)
2555 c = multibyte_char_to_unibyte (c, rev_tbl);
2556 }
2557 else
2558 {
2559 /* Note that we don't have to increment POS. */
5d69fe10 2560 c = SREF (newtext, pos_byte++);
3bc25e52
KH
2561 if (buf_multibyte)
2562 c = unibyte_char_to_multibyte (c);
2563 }
ac3b28b1 2564
68e69fbd
RS
2565 /* Either set ADD_STUFF and ADD_LEN to the text to put in SUBSTED,
2566 or set IDX to a match index, which means put that part
2567 of the buffer text into SUBSTED. */
2568
ca1d1d23
JB
2569 if (c == '\\')
2570 {
886ed6ec
RS
2571 really_changed = 1;
2572
3bc25e52
KH
2573 if (str_multibyte)
2574 {
eb99a8dd
KH
2575 FETCH_STRING_CHAR_ADVANCE_NO_CHECK (c, newtext,
2576 pos, pos_byte);
3bc25e52
KH
2577 if (!buf_multibyte && !SINGLE_BYTE_CHAR_P (c))
2578 c = multibyte_char_to_unibyte (c, rev_tbl);
2579 }
2580 else
2581 {
d5db4077 2582 c = SREF (newtext, pos_byte++);
3bc25e52
KH
2583 if (buf_multibyte)
2584 c = unibyte_char_to_multibyte (c);
2585 }
2586
ca1d1d23 2587 if (c == '&')
68e69fbd 2588 idx = sub;
78445046 2589 else if (c >= '1' && c <= '9' && c <= search_regs.num_regs + '0')
ca1d1d23
JB
2590 {
2591 if (search_regs.start[c - '0'] >= 1)
68e69fbd 2592 idx = c - '0';
ca1d1d23 2593 }
636a5e28 2594 else if (c == '\\')
68e69fbd 2595 add_len = 1, add_stuff = "\\";
636a5e28 2596 else
3bc25e52
KH
2597 {
2598 xfree (substed);
2599 error ("Invalid use of `\\' in replacement text");
2600 }
ca1d1d23
JB
2601 }
2602 else
68e69fbd
RS
2603 {
2604 add_len = CHAR_STRING (c, str);
2605 add_stuff = str;
2606 }
2607
2608 /* If we want to copy part of a previous match,
2609 set up ADD_STUFF and ADD_LEN to point to it. */
2610 if (idx >= 0)
2611 {
2612 int begbyte = CHAR_TO_BYTE (search_regs.start[idx]);
2613 add_len = CHAR_TO_BYTE (search_regs.end[idx]) - begbyte;
2614 if (search_regs.start[idx] < GPT && GPT < search_regs.end[idx])
2615 move_gap (search_regs.start[idx]);
2616 add_stuff = BYTE_POS_ADDR (begbyte);
2617 }
2618
2619 /* Now the stuff we want to add to SUBSTED
2620 is invariably ADD_LEN bytes starting at ADD_STUFF. */
2621
2622 /* Make sure SUBSTED is big enough. */
2623 if (substed_len + add_len >= substed_alloc_size)
2624 {
2625 substed_alloc_size = substed_len + add_len + 500;
2626 substed = (unsigned char *) xrealloc (substed,
2627 substed_alloc_size + 1);
2628 }
2629
2630 /* Now add to the end of SUBSTED. */
f8ce8a0d
GM
2631 if (add_stuff)
2632 {
2633 bcopy (add_stuff, substed + substed_len, add_len);
2634 substed_len += add_len;
2635 }
ca1d1d23 2636 }
68e69fbd 2637
886ed6ec 2638 if (really_changed)
80460525
KH
2639 {
2640 if (buf_multibyte)
2641 {
2642 int nchars = multibyte_chars_in_text (substed, substed_len);
68e69fbd 2643
80460525
KH
2644 newtext = make_multibyte_string (substed, nchars, substed_len);
2645 }
2646 else
2647 newtext = make_unibyte_string (substed, substed_len);
2648 }
68e69fbd 2649 xfree (substed);
ca1d1d23
JB
2650 }
2651
886ed6ec
RS
2652 /* Replace the old text with the new in the cleanest possible way. */
2653 replace_range (search_regs.start[sub], search_regs.end[sub],
2654 newtext, 1, 0, 1);
d5db4077 2655 newpoint = search_regs.start[sub] + SCHARS (newtext);
ca1d1d23
JB
2656
2657 if (case_action == all_caps)
886ed6ec
RS
2658 Fupcase_region (make_number (search_regs.start[sub]),
2659 make_number (newpoint));
ca1d1d23 2660 else if (case_action == cap_initial)
886ed6ec
RS
2661 Fupcase_initials_region (make_number (search_regs.start[sub]),
2662 make_number (newpoint));
3e18eecf 2663
98e942e0
RS
2664 /* Adjust search data for this change. */
2665 {
5b88a2c5 2666 int oldend = search_regs.end[sub];
41c01205 2667 int oldstart = search_regs.start[sub];
98e942e0
RS
2668 int change = newpoint - search_regs.end[sub];
2669 int i;
2670
2671 for (i = 0; i < search_regs.num_regs; i++)
2672 {
41c01205 2673 if (search_regs.start[i] >= oldend)
98e942e0 2674 search_regs.start[i] += change;
41c01205
DK
2675 else if (search_regs.start[i] > oldstart)
2676 search_regs.start[i] = oldstart;
2677 if (search_regs.end[i] >= oldend)
98e942e0 2678 search_regs.end[i] += change;
41c01205
DK
2679 else if (search_regs.end[i] > oldstart)
2680 search_regs.end[i] = oldstart;
98e942e0
RS
2681 }
2682 }
2683
b0eba991 2684 /* Put point back where it was in the text. */
8d808a65 2685 if (opoint <= 0)
fa8ed3e0 2686 TEMP_SET_PT (opoint + ZV);
b0eba991 2687 else
fa8ed3e0 2688 TEMP_SET_PT (opoint);
b0eba991
RS
2689
2690 /* Now move point "officially" to the start of the inserted replacement. */
3e18eecf 2691 move_if_not_intangible (newpoint);
177c0ea7 2692
ca1d1d23
JB
2693 return Qnil;
2694}
2695\f
2696static Lisp_Object
2697match_limit (num, beginningp)
2698 Lisp_Object num;
2699 int beginningp;
2700{
2701 register int n;
2702
b7826503 2703 CHECK_NUMBER (num);
ca1d1d23 2704 n = XINT (num);
f90a5bf5 2705 if (n < 0)
bd2cbd56 2706 args_out_of_range (num, make_number (0));
f90a5bf5
RS
2707 if (search_regs.num_regs <= 0)
2708 error ("No match data, because no search succeeded");
9b9ceb61 2709 if (n >= search_regs.num_regs
4746118a 2710 || search_regs.start[n] < 0)
ca1d1d23
JB
2711 return Qnil;
2712 return (make_number ((beginningp) ? search_regs.start[n]
2713 : search_regs.end[n]));
2714}
2715
2716DEFUN ("match-beginning", Fmatch_beginning, Smatch_beginning, 1, 1, 0,
8c1a1077
PJ
2717 doc: /* Return position of start of text matched by last search.
2718SUBEXP, a number, specifies which parenthesized expression in the last
2719 regexp.
2720Value is nil if SUBEXPth pair didn't match, or there were less than
2721 SUBEXP pairs.
2722Zero means the entire text matched by the whole regexp or whole string. */)
2723 (subexp)
5806161b 2724 Lisp_Object subexp;
ca1d1d23 2725{
5806161b 2726 return match_limit (subexp, 1);
ca1d1d23
JB
2727}
2728
2729DEFUN ("match-end", Fmatch_end, Smatch_end, 1, 1, 0,
8c1a1077
PJ
2730 doc: /* Return position of end of text matched by last search.
2731SUBEXP, a number, specifies which parenthesized expression in the last
2732 regexp.
2733Value is nil if SUBEXPth pair didn't match, or there were less than
2734 SUBEXP pairs.
2735Zero means the entire text matched by the whole regexp or whole string. */)
2736 (subexp)
5806161b 2737 Lisp_Object subexp;
ca1d1d23 2738{
5806161b 2739 return match_limit (subexp, 0);
177c0ea7 2740}
ca1d1d23 2741
abd0071c 2742DEFUN ("match-data", Fmatch_data, Smatch_data, 0, 3, 0,
8c1a1077
PJ
2743 doc: /* Return a list containing all info on what the last search matched.
2744Element 2N is `(match-beginning N)'; element 2N + 1 is `(match-end N)'.
2745All the elements are markers or nil (nil if the Nth pair didn't match)
2746if the last match was on a buffer; integers or nil if a string was matched.
2747Use `store-match-data' to reinstate the data in this list.
2748
41c01205
DK
2749If INTEGERS (the optional first argument) is non-nil, always use
2750integers \(rather than markers) to represent buffer positions. In
2751this case, and if the last match was in a buffer, the buffer will get
2752stored as one additional element at the end of the list.
2753
abd0071c
KS
2754If REUSE is a list, reuse it as part of the value. If REUSE is long
2755enough to hold all the values, and if INTEGERS is non-nil, no consing
2756is done.
2757
2758If optional third arg RESEAT is non-nil, any previous markers on the
2759REUSE list will be modified to point to nowhere.
2760
140a6b7e 2761Return value is undefined if the last search failed. */)
abd0071c
KS
2762 (integers, reuse, reseat)
2763 Lisp_Object integers, reuse, reseat;
ca1d1d23 2764{
56256c2a 2765 Lisp_Object tail, prev;
4746118a 2766 Lisp_Object *data;
ca1d1d23
JB
2767 int i, len;
2768
abd0071c
KS
2769 if (!NILP (reseat))
2770 for (tail = reuse; CONSP (tail); tail = XCDR (tail))
2771 if (MARKERP (XCAR (tail)))
2772 {
51f10faa 2773 unchain_marker (XMARKER (XCAR (tail)));
abd0071c
KS
2774 XSETCAR (tail, Qnil);
2775 }
2776
daa37602 2777 if (NILP (last_thing_searched))
c36bcf1b 2778 return Qnil;
daa37602 2779
6bbd7a29
GM
2780 prev = Qnil;
2781
41c01205 2782 data = (Lisp_Object *) alloca ((2 * search_regs.num_regs + 1)
4746118a
JB
2783 * sizeof (Lisp_Object));
2784
41c01205 2785 len = 0;
4746118a 2786 for (i = 0; i < search_regs.num_regs; i++)
ca1d1d23
JB
2787 {
2788 int start = search_regs.start[i];
2789 if (start >= 0)
2790 {
56256c2a
RS
2791 if (EQ (last_thing_searched, Qt)
2792 || ! NILP (integers))
ca1d1d23 2793 {
c235cce7
KH
2794 XSETFASTINT (data[2 * i], start);
2795 XSETFASTINT (data[2 * i + 1], search_regs.end[i]);
ca1d1d23 2796 }
0ed62dc7 2797 else if (BUFFERP (last_thing_searched))
ca1d1d23
JB
2798 {
2799 data[2 * i] = Fmake_marker ();
daa37602
JB
2800 Fset_marker (data[2 * i],
2801 make_number (start),
2802 last_thing_searched);
ca1d1d23
JB
2803 data[2 * i + 1] = Fmake_marker ();
2804 Fset_marker (data[2 * i + 1],
177c0ea7 2805 make_number (search_regs.end[i]),
daa37602 2806 last_thing_searched);
ca1d1d23 2807 }
daa37602
JB
2808 else
2809 /* last_thing_searched must always be Qt, a buffer, or Qnil. */
2810 abort ();
2811
abd0071c 2812 len = 2 * i + 2;
ca1d1d23
JB
2813 }
2814 else
abd0071c 2815 data[2 * i] = data[2 * i + 1] = Qnil;
ca1d1d23 2816 }
56256c2a 2817
bd2cbd56 2818 if (BUFFERP (last_thing_searched) && !NILP (integers))
41c01205 2819 {
bd2cbd56 2820 data[len] = last_thing_searched;
41c01205
DK
2821 len++;
2822 }
2823
56256c2a
RS
2824 /* If REUSE is not usable, cons up the values and return them. */
2825 if (! CONSP (reuse))
41c01205 2826 return Flist (len, data);
56256c2a
RS
2827
2828 /* If REUSE is a list, store as many value elements as will fit
2829 into the elements of REUSE. */
2830 for (i = 0, tail = reuse; CONSP (tail);
c1d497be 2831 i++, tail = XCDR (tail))
56256c2a 2832 {
41c01205 2833 if (i < len)
f3fbd155 2834 XSETCAR (tail, data[i]);
56256c2a 2835 else
f3fbd155 2836 XSETCAR (tail, Qnil);
56256c2a
RS
2837 prev = tail;
2838 }
2839
2840 /* If we couldn't fit all value elements into REUSE,
2841 cons up the rest of them and add them to the end of REUSE. */
41c01205
DK
2842 if (i < len)
2843 XSETCDR (prev, Flist (len - i, data + i));
56256c2a
RS
2844
2845 return reuse;
ca1d1d23
JB
2846}
2847
51f10faa
KS
2848/* Internal usage only:
2849 If RESEAT is `evaporate', put the markers back on the free list
2850 immediately. No other references to the markers must exist in this case,
2851 so it is used only internally on the unwind stack and save-match-data from
2852 Lisp. */
ca1d1d23 2853
abd0071c 2854DEFUN ("set-match-data", Fset_match_data, Sset_match_data, 1, 2, 0,
8c1a1077 2855 doc: /* Set internal data on last search match from elements of LIST.
abd0071c
KS
2856LIST should have been created by calling `match-data' previously.
2857
51f10faa 2858If optional arg RESEAT is non-nil, make markers on LIST point nowhere. */)
abd0071c
KS
2859 (list, reseat)
2860 register Lisp_Object list, reseat;
ca1d1d23
JB
2861{
2862 register int i;
2863 register Lisp_Object marker;
2864
7074fde6
FP
2865 if (running_asynch_code)
2866 save_search_regs ();
2867
ca1d1d23 2868 if (!CONSP (list) && !NILP (list))
b37902c8 2869 list = wrong_type_argument (Qconsp, list);
ca1d1d23 2870
41c01205
DK
2871 /* Unless we find a marker with a buffer or an explicit buffer
2872 in LIST, assume that this match data came from a string. */
daa37602
JB
2873 last_thing_searched = Qt;
2874
4746118a
JB
2875 /* Allocate registers if they don't already exist. */
2876 {
d084e942 2877 int length = XFASTINT (Flength (list)) / 2;
4746118a
JB
2878
2879 if (length > search_regs.num_regs)
2880 {
1113d9db
JB
2881 if (search_regs.num_regs == 0)
2882 {
2883 search_regs.start
2884 = (regoff_t *) xmalloc (length * sizeof (regoff_t));
2885 search_regs.end
2886 = (regoff_t *) xmalloc (length * sizeof (regoff_t));
2887 }
4746118a 2888 else
1113d9db
JB
2889 {
2890 search_regs.start
2891 = (regoff_t *) xrealloc (search_regs.start,
2892 length * sizeof (regoff_t));
2893 search_regs.end
2894 = (regoff_t *) xrealloc (search_regs.end,
2895 length * sizeof (regoff_t));
2896 }
4746118a 2897
e62371e9
KH
2898 for (i = search_regs.num_regs; i < length; i++)
2899 search_regs.start[i] = -1;
2900
487282dc 2901 search_regs.num_regs = length;
4746118a 2902 }
ca1d1d23 2903
abd0071c 2904 for (i = 0; CONSP (list); i++)
41c01205 2905 {
abd0071c 2906 marker = XCAR (list);
bd2cbd56 2907 if (BUFFERP (marker))
c3762cbd 2908 {
bd2cbd56 2909 last_thing_searched = marker;
c3762cbd
DK
2910 break;
2911 }
2912 if (i >= length)
2913 break;
41c01205
DK
2914 if (NILP (marker))
2915 {
2916 search_regs.start[i] = -1;
abd0071c 2917 list = XCDR (list);
41c01205
DK
2918 }
2919 else
2920 {
2921 int from;
abd0071c 2922 Lisp_Object m;
e2811828 2923
abd0071c 2924 m = marker;
41c01205
DK
2925 if (MARKERP (marker))
2926 {
2927 if (XMARKER (marker)->buffer == 0)
2928 XSETFASTINT (marker, 0);
2929 else
2930 XSETBUFFER (last_thing_searched, XMARKER (marker)->buffer);
2931 }
e2811828 2932
41c01205
DK
2933 CHECK_NUMBER_COERCE_MARKER (marker);
2934 from = XINT (marker);
e2811828 2935
abd0071c
KS
2936 if (!NILP (reseat) && MARKERP (m))
2937 {
2938 if (EQ (reseat, Qevaporate))
2939 free_marker (m);
2940 else
2941 unchain_marker (XMARKER (m));
9ad54a7e 2942 XSETCAR (list, Qnil);
abd0071c
KS
2943 }
2944
2945 if ((list = XCDR (list), !CONSP (list)))
2946 break;
2947
2948 m = marker = XCAR (list);
2949
41c01205
DK
2950 if (MARKERP (marker) && XMARKER (marker)->buffer == 0)
2951 XSETFASTINT (marker, 0);
e2811828 2952
41c01205
DK
2953 CHECK_NUMBER_COERCE_MARKER (marker);
2954 search_regs.start[i] = from;
2955 search_regs.end[i] = XINT (marker);
abd0071c
KS
2956
2957 if (!NILP (reseat) && MARKERP (m))
2958 {
2959 if (EQ (reseat, Qevaporate))
2960 free_marker (m);
2961 else
2962 unchain_marker (XMARKER (m));
9ad54a7e 2963 XSETCAR (list, Qnil);
abd0071c 2964 }
41c01205 2965 }
abd0071c 2966 list = XCDR (list);
41c01205 2967 }
ca1d1d23 2968
41c01205
DK
2969 for (; i < search_regs.num_regs; i++)
2970 search_regs.start[i] = -1;
2971 }
ca1d1d23 2972
177c0ea7 2973 return Qnil;
ca1d1d23
JB
2974}
2975
7074fde6
FP
2976/* If non-zero the match data have been saved in saved_search_regs
2977 during the execution of a sentinel or filter. */
75ebf74b 2978static int search_regs_saved;
7074fde6 2979static struct re_registers saved_search_regs;
41c01205 2980static Lisp_Object saved_last_thing_searched;
7074fde6
FP
2981
2982/* Called from Flooking_at, Fstring_match, search_buffer, Fstore_match_data
2983 if asynchronous code (filter or sentinel) is running. */
2984static void
2985save_search_regs ()
2986{
2987 if (!search_regs_saved)
2988 {
2989 saved_search_regs.num_regs = search_regs.num_regs;
2990 saved_search_regs.start = search_regs.start;
2991 saved_search_regs.end = search_regs.end;
41c01205
DK
2992 saved_last_thing_searched = last_thing_searched;
2993 last_thing_searched = Qnil;
7074fde6 2994 search_regs.num_regs = 0;
2d4a771a
RS
2995 search_regs.start = 0;
2996 search_regs.end = 0;
7074fde6
FP
2997
2998 search_regs_saved = 1;
2999 }
3000}
3001
3002/* Called upon exit from filters and sentinels. */
3003void
abd0071c 3004restore_search_regs ()
7074fde6
FP
3005{
3006 if (search_regs_saved)
3007 {
3008 if (search_regs.num_regs > 0)
3009 {
3010 xfree (search_regs.start);
3011 xfree (search_regs.end);
3012 }
3013 search_regs.num_regs = saved_search_regs.num_regs;
3014 search_regs.start = saved_search_regs.start;
3015 search_regs.end = saved_search_regs.end;
41c01205
DK
3016 last_thing_searched = saved_last_thing_searched;
3017 saved_last_thing_searched = Qnil;
7074fde6
FP
3018 search_regs_saved = 0;
3019 }
3020}
3021
abd0071c
KS
3022static Lisp_Object
3023unwind_set_match_data (list)
3024 Lisp_Object list;
3025{
51f10faa 3026 /* It is safe to free (evaporate) the markers immediately. */
abd0071c
KS
3027 return Fset_match_data (list, Qevaporate);
3028}
3029
3030/* Called to unwind protect the match data. */
3031void
3032record_unwind_save_match_data ()
3033{
3034 record_unwind_protect (unwind_set_match_data,
3035 Fmatch_data (Qnil, Qnil, Qnil));
3036}
3037
ca1d1d23
JB
3038/* Quote a string to inactivate reg-expr chars */
3039
3040DEFUN ("regexp-quote", Fregexp_quote, Sregexp_quote, 1, 1, 0,
8c1a1077
PJ
3041 doc: /* Return a regexp string which matches exactly STRING and nothing else. */)
3042 (string)
5806161b 3043 Lisp_Object string;
ca1d1d23
JB
3044{
3045 register unsigned char *in, *out, *end;
3046 register unsigned char *temp;
0c8533c6 3047 int backslashes_added = 0;
ca1d1d23 3048
b7826503 3049 CHECK_STRING (string);
ca1d1d23 3050
d5db4077 3051 temp = (unsigned char *) alloca (SBYTES (string) * 2);
ca1d1d23
JB
3052
3053 /* Now copy the data into the new string, inserting escapes. */
3054
d5db4077
KR
3055 in = SDATA (string);
3056 end = in + SBYTES (string);
177c0ea7 3057 out = temp;
ca1d1d23
JB
3058
3059 for (; in != end; in++)
3060 {
3061 if (*in == '[' || *in == ']'
3062 || *in == '*' || *in == '.' || *in == '\\'
3063 || *in == '?' || *in == '+'
3064 || *in == '^' || *in == '$')
0c8533c6 3065 *out++ = '\\', backslashes_added++;
ca1d1d23
JB
3066 *out++ = *in;
3067 }
3068
3f8100f1 3069 return make_specified_string (temp,
d5db4077 3070 SCHARS (string) + backslashes_added,
3f8100f1
RS
3071 out - temp,
3072 STRING_MULTIBYTE (string));
ca1d1d23 3073}
177c0ea7 3074\f
dfcf069d 3075void
ca1d1d23
JB
3076syms_of_search ()
3077{
3078 register int i;
3079
487282dc
KH
3080 for (i = 0; i < REGEXP_CACHE_SIZE; ++i)
3081 {
3082 searchbufs[i].buf.allocated = 100;
b23c0a83 3083 searchbufs[i].buf.buffer = (unsigned char *) xmalloc (100);
487282dc
KH
3084 searchbufs[i].buf.fastmap = searchbufs[i].fastmap;
3085 searchbufs[i].regexp = Qnil;
ecdb561e 3086 searchbufs[i].whitespace_regexp = Qnil;
487282dc
KH
3087 staticpro (&searchbufs[i].regexp);
3088 searchbufs[i].next = (i == REGEXP_CACHE_SIZE-1 ? 0 : &searchbufs[i+1]);
3089 }
3090 searchbuf_head = &searchbufs[0];
ca1d1d23
JB
3091
3092 Qsearch_failed = intern ("search-failed");
3093 staticpro (&Qsearch_failed);
3094 Qinvalid_regexp = intern ("invalid-regexp");
3095 staticpro (&Qinvalid_regexp);
3096
3097 Fput (Qsearch_failed, Qerror_conditions,
3098 Fcons (Qsearch_failed, Fcons (Qerror, Qnil)));
3099 Fput (Qsearch_failed, Qerror_message,
3100 build_string ("Search failed"));
3101
3102 Fput (Qinvalid_regexp, Qerror_conditions,
3103 Fcons (Qinvalid_regexp, Fcons (Qerror, Qnil)));
3104 Fput (Qinvalid_regexp, Qerror_message,
3105 build_string ("Invalid regexp"));
3106
daa37602
JB
3107 last_thing_searched = Qnil;
3108 staticpro (&last_thing_searched);
3109
0f6af254
DK
3110 saved_last_thing_searched = Qnil;
3111 staticpro (&saved_last_thing_searched);
3112
41a33295 3113 DEFVAR_LISP ("search-spaces-regexp", &Vsearch_spaces_regexp,
e2811828 3114 doc: /* Regexp to substitute for bunches of spaces in regexp search.
f31a9a68
RS
3115Some commands use this for user-specified regexps.
3116Spaces that occur inside character classes or repetition operators
3117or other such regexp constructs are not replaced with this.
3118A value of nil (which is the normal value) means treat spaces literally. */);
41a33295 3119 Vsearch_spaces_regexp = Qnil;
f31a9a68 3120
ca1d1d23 3121 defsubr (&Slooking_at);
b819a390
RS
3122 defsubr (&Sposix_looking_at);
3123 defsubr (&Sstring_match);
3124 defsubr (&Sposix_string_match);
ca1d1d23
JB
3125 defsubr (&Ssearch_forward);
3126 defsubr (&Ssearch_backward);
3127 defsubr (&Sword_search_forward);
3128 defsubr (&Sword_search_backward);
3129 defsubr (&Sre_search_forward);
3130 defsubr (&Sre_search_backward);
b819a390
RS
3131 defsubr (&Sposix_search_forward);
3132 defsubr (&Sposix_search_backward);
ca1d1d23
JB
3133 defsubr (&Sreplace_match);
3134 defsubr (&Smatch_beginning);
3135 defsubr (&Smatch_end);
3136 defsubr (&Smatch_data);
3f1c005b 3137 defsubr (&Sset_match_data);
ca1d1d23
JB
3138 defsubr (&Sregexp_quote);
3139}
ab5796a9
MB
3140
3141/* arch-tag: a6059d79-0552-4f14-a2cb-d379a4e3c78f
3142 (do not change this comment) */