*** empty log message ***
[bpt/emacs.git] / src / search.c
CommitLineData
ca1d1d23 1/* String search routines for GNU Emacs.
0b5538bd 2 Copyright (C) 1985, 1986, 1987, 1993, 1994, 1997, 1998, 1999, 2002, 2003,
aaef169d 3 2004, 2005, 2006 Free Software Foundation, Inc.
ca1d1d23
JB
4
5This file is part of GNU Emacs.
6
7GNU Emacs is free software; you can redistribute it and/or modify
8it under the terms of the GNU General Public License as published by
7c938215 9the Free Software Foundation; either version 2, or (at your option)
ca1d1d23
JB
10any later version.
11
12GNU Emacs is distributed in the hope that it will be useful,
13but WITHOUT ANY WARRANTY; without even the implied warranty of
14MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15GNU General Public License for more details.
16
17You should have received a copy of the GNU General Public License
18along with GNU Emacs; see the file COPYING. If not, write to
4fc5845f
LK
19the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
20Boston, MA 02110-1301, USA. */
ca1d1d23
JB
21
22
18160b98 23#include <config.h>
ca1d1d23
JB
24#include "lisp.h"
25#include "syntax.h"
5679531d 26#include "category.h"
ca1d1d23 27#include "buffer.h"
5679531d 28#include "charset.h"
9169c321 29#include "region-cache.h"
ca1d1d23 30#include "commands.h"
9ac0d9e0 31#include "blockinput.h"
bf1760bb 32#include "intervals.h"
4746118a 33
ca1d1d23
JB
34#include <sys/types.h>
35#include "regex.h"
36
1d288aef 37#define REGEXP_CACHE_SIZE 20
ca1d1d23 38
487282dc
KH
39/* If the regexp is non-nil, then the buffer contains the compiled form
40 of that regexp, suitable for searching. */
1d288aef
RS
41struct regexp_cache
42{
487282dc 43 struct regexp_cache *next;
ecdb561e 44 Lisp_Object regexp, whitespace_regexp;
487282dc
KH
45 struct re_pattern_buffer buf;
46 char fastmap[0400];
b819a390
RS
47 /* Nonzero means regexp was compiled to do full POSIX backtracking. */
48 char posix;
487282dc 49};
ca1d1d23 50
487282dc
KH
51/* The instances of that struct. */
52struct regexp_cache searchbufs[REGEXP_CACHE_SIZE];
ca1d1d23 53
487282dc
KH
54/* The head of the linked list; points to the most recently used buffer. */
55struct regexp_cache *searchbuf_head;
ca1d1d23 56
ca1d1d23 57
4746118a
JB
58/* Every call to re_match, etc., must pass &search_regs as the regs
59 argument unless you can show it is unnecessary (i.e., if re_match
60 is certainly going to be called again before region-around-match
61 can be called).
62
63 Since the registers are now dynamically allocated, we need to make
64 sure not to refer to the Nth register before checking that it has
1113d9db
JB
65 been allocated by checking search_regs.num_regs.
66
67 The regex code keeps track of whether it has allocated the search
487282dc
KH
68 buffer using bits in the re_pattern_buffer. This means that whenever
69 you compile a new pattern, it completely forgets whether it has
1113d9db
JB
70 allocated any registers, and will allocate new registers the next
71 time you call a searching or matching function. Therefore, we need
72 to call re_set_registers after compiling a new pattern or after
73 setting the match registers, so that the regex functions will be
74 able to free or re-allocate it properly. */
ca1d1d23
JB
75static struct re_registers search_regs;
76
daa37602
JB
77/* The buffer in which the last search was performed, or
78 Qt if the last search was done in a string;
79 Qnil if no searching has been done yet. */
80static Lisp_Object last_thing_searched;
ca1d1d23 81
8e6208c5 82/* error condition signaled when regexp compile_pattern fails */
ca1d1d23
JB
83
84Lisp_Object Qinvalid_regexp;
85
41a33295 86Lisp_Object Vsearch_spaces_regexp;
f31a9a68 87
ca325161 88static void set_search_regs ();
044f81f1 89static void save_search_regs ();
facdc750
RS
90static int simple_search ();
91static int boyer_moore ();
b819a390
RS
92static int search_buffer ();
93
ca1d1d23
JB
94static void
95matcher_overflow ()
96{
97 error ("Stack overflow in regexp matcher");
98}
99
b819a390
RS
100/* Compile a regexp and signal a Lisp error if anything goes wrong.
101 PATTERN is the pattern to compile.
102 CP is the place to put the result.
facdc750 103 TRANSLATE is a translation table for ignoring case, or nil for none.
b819a390
RS
104 REGP is the structure that says where to store the "register"
105 values that will result from matching this pattern.
106 If it is 0, we should compile the pattern not to record any
107 subexpression bounds.
108 POSIX is nonzero if we want full backtracking (POSIX style)
5679531d
KH
109 for this pattern. 0 means backtrack only enough to get a valid match.
110 MULTIBYTE is nonzero if we want to handle multibyte characters in
111 PATTERN. 0 means all multibyte characters are recognized just as
ecdb561e
RS
112 sequences of binary data.
113
41a33295 114 The behavior also depends on Vsearch_spaces_regexp. */
ca1d1d23 115
487282dc 116static void
5679531d 117compile_pattern_1 (cp, pattern, translate, regp, posix, multibyte)
487282dc 118 struct regexp_cache *cp;
ca1d1d23 119 Lisp_Object pattern;
facdc750 120 Lisp_Object translate;
487282dc 121 struct re_registers *regp;
b819a390 122 int posix;
5679531d 123 int multibyte;
ca1d1d23 124{
7276d3d8 125 unsigned char *raw_pattern;
f8bd51c4 126 int raw_pattern_size;
d451e4db 127 char *val;
b819a390 128 reg_syntax_t old;
ca1d1d23 129
f8bd51c4
KH
130 /* MULTIBYTE says whether the text to be searched is multibyte.
131 We must convert PATTERN to match that, or we will not really
132 find things right. */
133
134 if (multibyte == STRING_MULTIBYTE (pattern))
135 {
d5db4077
KR
136 raw_pattern = (unsigned char *) SDATA (pattern);
137 raw_pattern_size = SBYTES (pattern);
f8bd51c4
KH
138 }
139 else if (multibyte)
140 {
d5db4077
KR
141 raw_pattern_size = count_size_as_multibyte (SDATA (pattern),
142 SCHARS (pattern));
7276d3d8 143 raw_pattern = (unsigned char *) alloca (raw_pattern_size + 1);
d5db4077
KR
144 copy_text (SDATA (pattern), raw_pattern,
145 SCHARS (pattern), 0, 1);
f8bd51c4
KH
146 }
147 else
148 {
149 /* Converting multibyte to single-byte.
150
151 ??? Perhaps this conversion should be done in a special way
152 by subtracting nonascii-insert-offset from each non-ASCII char,
153 so that only the multibyte chars which really correspond to
154 the chosen single-byte character set can possibly match. */
d5db4077 155 raw_pattern_size = SCHARS (pattern);
7276d3d8 156 raw_pattern = (unsigned char *) alloca (raw_pattern_size + 1);
d5db4077
KR
157 copy_text (SDATA (pattern), raw_pattern,
158 SBYTES (pattern), 1, 0);
f8bd51c4
KH
159 }
160
487282dc 161 cp->regexp = Qnil;
59fab369 162 cp->buf.translate = (! NILP (translate) ? translate : make_number (0));
b819a390 163 cp->posix = posix;
5679531d 164 cp->buf.multibyte = multibyte;
41a33295 165 cp->whitespace_regexp = Vsearch_spaces_regexp;
9ac0d9e0 166 BLOCK_INPUT;
fb4a568d 167 old = re_set_syntax (RE_SYNTAX_EMACS
b819a390 168 | (posix ? 0 : RE_NO_POSIX_BACKTRACKING));
f31a9a68 169
41a33295
RS
170 re_set_whitespace_regexp (NILP (Vsearch_spaces_regexp) ? NULL
171 : SDATA (Vsearch_spaces_regexp));
f31a9a68 172
7276d3d8
RS
173 val = (char *) re_compile_pattern ((char *)raw_pattern,
174 raw_pattern_size, &cp->buf);
f31a9a68
RS
175
176 re_set_whitespace_regexp (NULL);
177
b819a390 178 re_set_syntax (old);
9ac0d9e0 179 UNBLOCK_INPUT;
ca1d1d23 180 if (val)
487282dc 181 Fsignal (Qinvalid_regexp, Fcons (build_string (val), Qnil));
1113d9db 182
487282dc 183 cp->regexp = Fcopy_sequence (pattern);
487282dc
KH
184}
185
6efc7887
RS
186/* Shrink each compiled regexp buffer in the cache
187 to the size actually used right now.
188 This is called from garbage collection. */
189
190void
191shrink_regexp_cache ()
192{
a968f437 193 struct regexp_cache *cp;
6efc7887
RS
194
195 for (cp = searchbuf_head; cp != 0; cp = cp->next)
196 {
197 cp->buf.allocated = cp->buf.used;
198 cp->buf.buffer
b23c0a83 199 = (unsigned char *) xrealloc (cp->buf.buffer, cp->buf.used);
6efc7887
RS
200 }
201}
202
487282dc 203/* Compile a regexp if necessary, but first check to see if there's one in
b819a390
RS
204 the cache.
205 PATTERN is the pattern to compile.
facdc750 206 TRANSLATE is a translation table for ignoring case, or nil for none.
b819a390
RS
207 REGP is the structure that says where to store the "register"
208 values that will result from matching this pattern.
209 If it is 0, we should compile the pattern not to record any
210 subexpression bounds.
211 POSIX is nonzero if we want full backtracking (POSIX style)
212 for this pattern. 0 means backtrack only enough to get a valid match. */
487282dc
KH
213
214struct re_pattern_buffer *
0c8533c6 215compile_pattern (pattern, regp, translate, posix, multibyte)
487282dc
KH
216 Lisp_Object pattern;
217 struct re_registers *regp;
facdc750 218 Lisp_Object translate;
0c8533c6 219 int posix, multibyte;
487282dc
KH
220{
221 struct regexp_cache *cp, **cpp;
222
223 for (cpp = &searchbuf_head; ; cpp = &cp->next)
224 {
225 cp = *cpp;
f1b9c7c1
KR
226 /* Entries are initialized to nil, and may be set to nil by
227 compile_pattern_1 if the pattern isn't valid. Don't apply
49a5f770
KR
228 string accessors in those cases. However, compile_pattern_1
229 is only applied to the cache entry we pick here to reuse. So
230 nil should never appear before a non-nil entry. */
7c752c80 231 if (NILP (cp->regexp))
f1b9c7c1 232 goto compile_it;
d5db4077 233 if (SCHARS (cp->regexp) == SCHARS (pattern)
cf69b13e 234 && STRING_MULTIBYTE (cp->regexp) == STRING_MULTIBYTE (pattern)
1d288aef 235 && !NILP (Fstring_equal (cp->regexp, pattern))
59fab369 236 && EQ (cp->buf.translate, (! NILP (translate) ? translate : make_number (0)))
5679531d 237 && cp->posix == posix
ecdb561e 238 && cp->buf.multibyte == multibyte
41a33295 239 && !NILP (Fequal (cp->whitespace_regexp, Vsearch_spaces_regexp)))
487282dc
KH
240 break;
241
f1b9c7c1
KR
242 /* If we're at the end of the cache, compile into the nil cell
243 we found, or the last (least recently used) cell with a
244 string value. */
487282dc
KH
245 if (cp->next == 0)
246 {
f1b9c7c1 247 compile_it:
5679531d 248 compile_pattern_1 (cp, pattern, translate, regp, posix, multibyte);
487282dc
KH
249 break;
250 }
251 }
252
253 /* When we get here, cp (aka *cpp) contains the compiled pattern,
254 either because we found it in the cache or because we just compiled it.
255 Move it to the front of the queue to mark it as most recently used. */
256 *cpp = cp->next;
257 cp->next = searchbuf_head;
258 searchbuf_head = cp;
1113d9db 259
6639708c
RS
260 /* Advise the searching functions about the space we have allocated
261 for register data. */
262 if (regp)
263 re_set_registers (&cp->buf, regp, regp->num_regs, regp->start, regp->end);
264
487282dc 265 return &cp->buf;
ca1d1d23
JB
266}
267
268/* Error condition used for failing searches */
269Lisp_Object Qsearch_failed;
270
271Lisp_Object
272signal_failure (arg)
273 Lisp_Object arg;
274{
275 Fsignal (Qsearch_failed, Fcons (arg, Qnil));
276 return Qnil;
277}
278\f
b819a390
RS
279static Lisp_Object
280looking_at_1 (string, posix)
ca1d1d23 281 Lisp_Object string;
b819a390 282 int posix;
ca1d1d23
JB
283{
284 Lisp_Object val;
285 unsigned char *p1, *p2;
286 int s1, s2;
287 register int i;
487282dc 288 struct re_pattern_buffer *bufp;
ca1d1d23 289
7074fde6
FP
290 if (running_asynch_code)
291 save_search_regs ();
292
b7826503 293 CHECK_STRING (string);
487282dc
KH
294 bufp = compile_pattern (string, &search_regs,
295 (!NILP (current_buffer->case_fold_search)
0190922f 296 ? current_buffer->case_canon_table : Qnil),
0c8533c6
RS
297 posix,
298 !NILP (current_buffer->enable_multibyte_characters));
ca1d1d23
JB
299
300 immediate_quit = 1;
301 QUIT; /* Do a pending quit right away, to avoid paradoxical behavior */
302
303 /* Get pointers and sizes of the two strings
304 that make up the visible portion of the buffer. */
305
306 p1 = BEGV_ADDR;
fa8ed3e0 307 s1 = GPT_BYTE - BEGV_BYTE;
ca1d1d23 308 p2 = GAP_END_ADDR;
fa8ed3e0 309 s2 = ZV_BYTE - GPT_BYTE;
ca1d1d23
JB
310 if (s1 < 0)
311 {
312 p2 = p1;
fa8ed3e0 313 s2 = ZV_BYTE - BEGV_BYTE;
ca1d1d23
JB
314 s1 = 0;
315 }
316 if (s2 < 0)
317 {
fa8ed3e0 318 s1 = ZV_BYTE - BEGV_BYTE;
ca1d1d23
JB
319 s2 = 0;
320 }
8bb43c28
RS
321
322 re_match_object = Qnil;
177c0ea7 323
487282dc 324 i = re_match_2 (bufp, (char *) p1, s1, (char *) p2, s2,
fa8ed3e0
RS
325 PT_BYTE - BEGV_BYTE, &search_regs,
326 ZV_BYTE - BEGV_BYTE);
de182d70 327 immediate_quit = 0;
177c0ea7 328
ca1d1d23
JB
329 if (i == -2)
330 matcher_overflow ();
331
332 val = (0 <= i ? Qt : Qnil);
fa8ed3e0
RS
333 if (i >= 0)
334 for (i = 0; i < search_regs.num_regs; i++)
335 if (search_regs.start[i] >= 0)
336 {
337 search_regs.start[i]
338 = BYTE_TO_CHAR (search_regs.start[i] + BEGV_BYTE);
339 search_regs.end[i]
340 = BYTE_TO_CHAR (search_regs.end[i] + BEGV_BYTE);
341 }
a3668d92 342 XSETBUFFER (last_thing_searched, current_buffer);
ca1d1d23
JB
343 return val;
344}
345
b819a390 346DEFUN ("looking-at", Flooking_at, Slooking_at, 1, 1, 0,
8c1a1077
PJ
347 doc: /* Return t if text after point matches regular expression REGEXP.
348This function modifies the match data that `match-beginning',
349`match-end' and `match-data' access; save and restore the match
350data if you want to preserve them. */)
351 (regexp)
94f94972 352 Lisp_Object regexp;
b819a390 353{
94f94972 354 return looking_at_1 (regexp, 0);
b819a390
RS
355}
356
357DEFUN ("posix-looking-at", Fposix_looking_at, Sposix_looking_at, 1, 1, 0,
8c1a1077
PJ
358 doc: /* Return t if text after point matches regular expression REGEXP.
359Find the longest match, in accord with Posix regular expression rules.
360This function modifies the match data that `match-beginning',
361`match-end' and `match-data' access; save and restore the match
362data if you want to preserve them. */)
363 (regexp)
94f94972 364 Lisp_Object regexp;
b819a390 365{
94f94972 366 return looking_at_1 (regexp, 1);
b819a390
RS
367}
368\f
369static Lisp_Object
370string_match_1 (regexp, string, start, posix)
ca1d1d23 371 Lisp_Object regexp, string, start;
b819a390 372 int posix;
ca1d1d23
JB
373{
374 int val;
487282dc 375 struct re_pattern_buffer *bufp;
0c8533c6
RS
376 int pos, pos_byte;
377 int i;
ca1d1d23 378
7074fde6
FP
379 if (running_asynch_code)
380 save_search_regs ();
381
b7826503
PJ
382 CHECK_STRING (regexp);
383 CHECK_STRING (string);
ca1d1d23
JB
384
385 if (NILP (start))
0c8533c6 386 pos = 0, pos_byte = 0;
ca1d1d23
JB
387 else
388 {
d5db4077 389 int len = SCHARS (string);
ca1d1d23 390
b7826503 391 CHECK_NUMBER (start);
0c8533c6
RS
392 pos = XINT (start);
393 if (pos < 0 && -pos <= len)
394 pos = len + pos;
395 else if (0 > pos || pos > len)
ca1d1d23 396 args_out_of_range (string, start);
0c8533c6 397 pos_byte = string_char_to_byte (string, pos);
ca1d1d23
JB
398 }
399
487282dc
KH
400 bufp = compile_pattern (regexp, &search_regs,
401 (!NILP (current_buffer->case_fold_search)
0190922f 402 ? current_buffer->case_canon_table : Qnil),
0c8533c6
RS
403 posix,
404 STRING_MULTIBYTE (string));
ca1d1d23 405 immediate_quit = 1;
8bb43c28 406 re_match_object = string;
177c0ea7 407
d5db4077
KR
408 val = re_search (bufp, (char *) SDATA (string),
409 SBYTES (string), pos_byte,
410 SBYTES (string) - pos_byte,
ca1d1d23
JB
411 &search_regs);
412 immediate_quit = 0;
daa37602 413 last_thing_searched = Qt;
ca1d1d23
JB
414 if (val == -2)
415 matcher_overflow ();
416 if (val < 0) return Qnil;
0c8533c6
RS
417
418 for (i = 0; i < search_regs.num_regs; i++)
419 if (search_regs.start[i] >= 0)
420 {
421 search_regs.start[i]
422 = string_byte_to_char (string, search_regs.start[i]);
423 search_regs.end[i]
424 = string_byte_to_char (string, search_regs.end[i]);
425 }
426
427 return make_number (string_byte_to_char (string, val));
ca1d1d23 428}
e59a8453 429
b819a390 430DEFUN ("string-match", Fstring_match, Sstring_match, 2, 3, 0,
8c1a1077 431 doc: /* Return index of start of first match for REGEXP in STRING, or nil.
b85acc4b 432Matching ignores case if `case-fold-search' is non-nil.
8c1a1077
PJ
433If third arg START is non-nil, start search at that index in STRING.
434For index of first char beyond the match, do (match-end 0).
435`match-end' and `match-beginning' also give indices of substrings
2bd2f32d
RS
436matched by parenthesis constructs in the pattern.
437
438You can use the function `match-string' to extract the substrings
439matched by the parenthesis constructions in REGEXP. */)
8c1a1077 440 (regexp, string, start)
b819a390
RS
441 Lisp_Object regexp, string, start;
442{
443 return string_match_1 (regexp, string, start, 0);
444}
445
446DEFUN ("posix-string-match", Fposix_string_match, Sposix_string_match, 2, 3, 0,
8c1a1077
PJ
447 doc: /* Return index of start of first match for REGEXP in STRING, or nil.
448Find the longest match, in accord with Posix regular expression rules.
449Case is ignored if `case-fold-search' is non-nil in the current buffer.
450If third arg START is non-nil, start search at that index in STRING.
451For index of first char beyond the match, do (match-end 0).
452`match-end' and `match-beginning' also give indices of substrings
453matched by parenthesis constructs in the pattern. */)
454 (regexp, string, start)
b819a390
RS
455 Lisp_Object regexp, string, start;
456{
457 return string_match_1 (regexp, string, start, 1);
458}
459
e59a8453
RS
460/* Match REGEXP against STRING, searching all of STRING,
461 and return the index of the match, or negative on failure.
462 This does not clobber the match data. */
463
464int
465fast_string_match (regexp, string)
466 Lisp_Object regexp, string;
467{
468 int val;
487282dc 469 struct re_pattern_buffer *bufp;
e59a8453 470
facdc750
RS
471 bufp = compile_pattern (regexp, 0, Qnil,
472 0, STRING_MULTIBYTE (string));
e59a8453 473 immediate_quit = 1;
8bb43c28 474 re_match_object = string;
177c0ea7 475
d5db4077
KR
476 val = re_search (bufp, (char *) SDATA (string),
477 SBYTES (string), 0,
478 SBYTES (string), 0);
e59a8453
RS
479 immediate_quit = 0;
480 return val;
481}
5679531d
KH
482
483/* Match REGEXP against STRING, searching all of STRING ignoring case,
484 and return the index of the match, or negative on failure.
0c8533c6
RS
485 This does not clobber the match data.
486 We assume that STRING contains single-byte characters. */
5679531d
KH
487
488extern Lisp_Object Vascii_downcase_table;
489
490int
b4577c63 491fast_c_string_match_ignore_case (regexp, string)
5679531d 492 Lisp_Object regexp;
96b80561 493 const char *string;
5679531d
KH
494{
495 int val;
496 struct re_pattern_buffer *bufp;
497 int len = strlen (string);
498
0c8533c6 499 regexp = string_make_unibyte (regexp);
b4577c63 500 re_match_object = Qt;
5679531d 501 bufp = compile_pattern (regexp, 0,
0190922f 502 Vascii_canon_table, 0,
f8bd51c4 503 0);
5679531d
KH
504 immediate_quit = 1;
505 val = re_search (bufp, string, len, 0, len, 0);
506 immediate_quit = 0;
507 return val;
508}
be5f4dfb
KH
509
510/* Like fast_string_match but ignore case. */
511
512int
513fast_string_match_ignore_case (regexp, string)
514 Lisp_Object regexp, string;
515{
516 int val;
517 struct re_pattern_buffer *bufp;
518
0190922f 519 bufp = compile_pattern (regexp, 0, Vascii_canon_table,
be5f4dfb
KH
520 0, STRING_MULTIBYTE (string));
521 immediate_quit = 1;
522 re_match_object = string;
523
524 val = re_search (bufp, (char *) SDATA (string),
525 SBYTES (string), 0,
526 SBYTES (string), 0);
527 immediate_quit = 0;
528 return val;
529}
ca1d1d23 530\f
9169c321
JB
531/* The newline cache: remembering which sections of text have no newlines. */
532
533/* If the user has requested newline caching, make sure it's on.
534 Otherwise, make sure it's off.
535 This is our cheezy way of associating an action with the change of
536 state of a buffer-local variable. */
537static void
538newline_cache_on_off (buf)
539 struct buffer *buf;
540{
541 if (NILP (buf->cache_long_line_scans))
542 {
543 /* It should be off. */
544 if (buf->newline_cache)
545 {
546 free_region_cache (buf->newline_cache);
547 buf->newline_cache = 0;
548 }
549 }
550 else
551 {
552 /* It should be on. */
553 if (buf->newline_cache == 0)
554 buf->newline_cache = new_region_cache ();
555 }
556}
557
558\f
559/* Search for COUNT instances of the character TARGET between START and END.
560
561 If COUNT is positive, search forwards; END must be >= START.
562 If COUNT is negative, search backwards for the -COUNTth instance;
563 END must be <= START.
564 If COUNT is zero, do anything you please; run rogue, for all I care.
565
566 If END is zero, use BEGV or ZV instead, as appropriate for the
567 direction indicated by COUNT.
ffd56f97
JB
568
569 If we find COUNT instances, set *SHORTAGE to zero, and return the
a9f2a45f 570 position past the COUNTth match. Note that for reverse motion
5bfe95c9 571 this is not the same as the usual convention for Emacs motion commands.
ffd56f97 572
9169c321
JB
573 If we don't find COUNT instances before reaching END, set *SHORTAGE
574 to the number of TARGETs left unfound, and return END.
ffd56f97 575
087a5f81
RS
576 If ALLOW_QUIT is non-zero, set immediate_quit. That's good to do
577 except when inside redisplay. */
578
dfcf069d 579int
9169c321
JB
580scan_buffer (target, start, end, count, shortage, allow_quit)
581 register int target;
582 int start, end;
583 int count;
584 int *shortage;
087a5f81 585 int allow_quit;
ca1d1d23 586{
9169c321 587 struct region_cache *newline_cache;
177c0ea7 588 int direction;
ffd56f97 589
9169c321
JB
590 if (count > 0)
591 {
592 direction = 1;
593 if (! end) end = ZV;
594 }
595 else
596 {
597 direction = -1;
598 if (! end) end = BEGV;
599 }
ffd56f97 600
9169c321
JB
601 newline_cache_on_off (current_buffer);
602 newline_cache = current_buffer->newline_cache;
ca1d1d23
JB
603
604 if (shortage != 0)
605 *shortage = 0;
606
087a5f81 607 immediate_quit = allow_quit;
ca1d1d23 608
ffd56f97 609 if (count > 0)
9169c321 610 while (start != end)
ca1d1d23 611 {
9169c321
JB
612 /* Our innermost scanning loop is very simple; it doesn't know
613 about gaps, buffer ends, or the newline cache. ceiling is
614 the position of the last character before the next such
615 obstacle --- the last character the dumb search loop should
616 examine. */
fa8ed3e0
RS
617 int ceiling_byte = CHAR_TO_BYTE (end) - 1;
618 int start_byte = CHAR_TO_BYTE (start);
67ce527d 619 int tem;
9169c321
JB
620
621 /* If we're looking for a newline, consult the newline cache
622 to see where we can avoid some scanning. */
623 if (target == '\n' && newline_cache)
624 {
625 int next_change;
626 immediate_quit = 0;
627 while (region_cache_forward
fa8ed3e0
RS
628 (current_buffer, newline_cache, start_byte, &next_change))
629 start_byte = next_change;
cbe0db0d 630 immediate_quit = allow_quit;
9169c321 631
fa8ed3e0
RS
632 /* START should never be after END. */
633 if (start_byte > ceiling_byte)
634 start_byte = ceiling_byte;
9169c321
JB
635
636 /* Now the text after start is an unknown region, and
637 next_change is the position of the next known region. */
fa8ed3e0 638 ceiling_byte = min (next_change - 1, ceiling_byte);
9169c321
JB
639 }
640
641 /* The dumb loop can only scan text stored in contiguous
642 bytes. BUFFER_CEILING_OF returns the last character
643 position that is contiguous, so the ceiling is the
644 position after that. */
67ce527d
KH
645 tem = BUFFER_CEILING_OF (start_byte);
646 ceiling_byte = min (tem, ceiling_byte);
9169c321
JB
647
648 {
177c0ea7 649 /* The termination address of the dumb loop. */
fa8ed3e0
RS
650 register unsigned char *ceiling_addr
651 = BYTE_POS_ADDR (ceiling_byte) + 1;
652 register unsigned char *cursor
653 = BYTE_POS_ADDR (start_byte);
9169c321
JB
654 unsigned char *base = cursor;
655
656 while (cursor < ceiling_addr)
657 {
658 unsigned char *scan_start = cursor;
659
660 /* The dumb loop. */
661 while (*cursor != target && ++cursor < ceiling_addr)
662 ;
663
664 /* If we're looking for newlines, cache the fact that
665 the region from start to cursor is free of them. */
666 if (target == '\n' && newline_cache)
667 know_region_cache (current_buffer, newline_cache,
fa8ed3e0
RS
668 start_byte + scan_start - base,
669 start_byte + cursor - base);
9169c321
JB
670
671 /* Did we find the target character? */
672 if (cursor < ceiling_addr)
673 {
674 if (--count == 0)
675 {
676 immediate_quit = 0;
fa8ed3e0 677 return BYTE_TO_CHAR (start_byte + cursor - base + 1);
9169c321
JB
678 }
679 cursor++;
680 }
681 }
682
fa8ed3e0 683 start = BYTE_TO_CHAR (start_byte + cursor - base);
9169c321 684 }
ca1d1d23
JB
685 }
686 else
9169c321
JB
687 while (start > end)
688 {
689 /* The last character to check before the next obstacle. */
fa8ed3e0
RS
690 int ceiling_byte = CHAR_TO_BYTE (end);
691 int start_byte = CHAR_TO_BYTE (start);
67ce527d 692 int tem;
9169c321
JB
693
694 /* Consult the newline cache, if appropriate. */
695 if (target == '\n' && newline_cache)
696 {
697 int next_change;
698 immediate_quit = 0;
699 while (region_cache_backward
fa8ed3e0
RS
700 (current_buffer, newline_cache, start_byte, &next_change))
701 start_byte = next_change;
cbe0db0d 702 immediate_quit = allow_quit;
9169c321
JB
703
704 /* Start should never be at or before end. */
fa8ed3e0
RS
705 if (start_byte <= ceiling_byte)
706 start_byte = ceiling_byte + 1;
9169c321
JB
707
708 /* Now the text before start is an unknown region, and
709 next_change is the position of the next known region. */
fa8ed3e0 710 ceiling_byte = max (next_change, ceiling_byte);
9169c321
JB
711 }
712
713 /* Stop scanning before the gap. */
67ce527d
KH
714 tem = BUFFER_FLOOR_OF (start_byte - 1);
715 ceiling_byte = max (tem, ceiling_byte);
9169c321
JB
716
717 {
718 /* The termination address of the dumb loop. */
fa8ed3e0
RS
719 register unsigned char *ceiling_addr = BYTE_POS_ADDR (ceiling_byte);
720 register unsigned char *cursor = BYTE_POS_ADDR (start_byte - 1);
9169c321
JB
721 unsigned char *base = cursor;
722
723 while (cursor >= ceiling_addr)
724 {
725 unsigned char *scan_start = cursor;
726
727 while (*cursor != target && --cursor >= ceiling_addr)
728 ;
729
730 /* If we're looking for newlines, cache the fact that
731 the region from after the cursor to start is free of them. */
732 if (target == '\n' && newline_cache)
733 know_region_cache (current_buffer, newline_cache,
fa8ed3e0
RS
734 start_byte + cursor - base,
735 start_byte + scan_start - base);
9169c321
JB
736
737 /* Did we find the target character? */
738 if (cursor >= ceiling_addr)
739 {
740 if (++count >= 0)
741 {
742 immediate_quit = 0;
fa8ed3e0 743 return BYTE_TO_CHAR (start_byte + cursor - base);
9169c321
JB
744 }
745 cursor--;
746 }
747 }
748
fa8ed3e0 749 start = BYTE_TO_CHAR (start_byte + cursor - base);
9169c321
JB
750 }
751 }
752
ca1d1d23
JB
753 immediate_quit = 0;
754 if (shortage != 0)
ffd56f97 755 *shortage = count * direction;
9169c321 756 return start;
ca1d1d23 757}
fa8ed3e0
RS
758\f
759/* Search for COUNT instances of a line boundary, which means either a
760 newline or (if selective display enabled) a carriage return.
761 Start at START. If COUNT is negative, search backwards.
762
763 We report the resulting position by calling TEMP_SET_PT_BOTH.
764
765 If we find COUNT instances. we position after (always after,
766 even if scanning backwards) the COUNTth match, and return 0.
767
768 If we don't find COUNT instances before reaching the end of the
769 buffer (or the beginning, if scanning backwards), we return
770 the number of line boundaries left unfound, and position at
771 the limit we bumped up against.
772
773 If ALLOW_QUIT is non-zero, set immediate_quit. That's good to do
d5d57b92 774 except in special cases. */
ca1d1d23 775
63fa018d 776int
fa8ed3e0
RS
777scan_newline (start, start_byte, limit, limit_byte, count, allow_quit)
778 int start, start_byte;
779 int limit, limit_byte;
780 register int count;
781 int allow_quit;
63fa018d 782{
fa8ed3e0
RS
783 int direction = ((count > 0) ? 1 : -1);
784
785 register unsigned char *cursor;
786 unsigned char *base;
787
788 register int ceiling;
789 register unsigned char *ceiling_addr;
790
d5d57b92
RS
791 int old_immediate_quit = immediate_quit;
792
fa8ed3e0
RS
793 /* The code that follows is like scan_buffer
794 but checks for either newline or carriage return. */
795
d5d57b92
RS
796 if (allow_quit)
797 immediate_quit++;
fa8ed3e0
RS
798
799 start_byte = CHAR_TO_BYTE (start);
800
801 if (count > 0)
802 {
803 while (start_byte < limit_byte)
804 {
805 ceiling = BUFFER_CEILING_OF (start_byte);
806 ceiling = min (limit_byte - 1, ceiling);
807 ceiling_addr = BYTE_POS_ADDR (ceiling) + 1;
808 base = (cursor = BYTE_POS_ADDR (start_byte));
809 while (1)
810 {
811 while (*cursor != '\n' && ++cursor != ceiling_addr)
812 ;
813
814 if (cursor != ceiling_addr)
815 {
816 if (--count == 0)
817 {
d5d57b92 818 immediate_quit = old_immediate_quit;
fa8ed3e0
RS
819 start_byte = start_byte + cursor - base + 1;
820 start = BYTE_TO_CHAR (start_byte);
821 TEMP_SET_PT_BOTH (start, start_byte);
822 return 0;
823 }
824 else
825 if (++cursor == ceiling_addr)
826 break;
827 }
828 else
829 break;
830 }
831 start_byte += cursor - base;
832 }
833 }
834 else
835 {
fa8ed3e0
RS
836 while (start_byte > limit_byte)
837 {
838 ceiling = BUFFER_FLOOR_OF (start_byte - 1);
839 ceiling = max (limit_byte, ceiling);
840 ceiling_addr = BYTE_POS_ADDR (ceiling) - 1;
841 base = (cursor = BYTE_POS_ADDR (start_byte - 1) + 1);
842 while (1)
843 {
844 while (--cursor != ceiling_addr && *cursor != '\n')
845 ;
846
847 if (cursor != ceiling_addr)
848 {
849 if (++count == 0)
850 {
d5d57b92 851 immediate_quit = old_immediate_quit;
fa8ed3e0
RS
852 /* Return the position AFTER the match we found. */
853 start_byte = start_byte + cursor - base + 1;
854 start = BYTE_TO_CHAR (start_byte);
855 TEMP_SET_PT_BOTH (start, start_byte);
856 return 0;
857 }
858 }
859 else
860 break;
861 }
862 /* Here we add 1 to compensate for the last decrement
863 of CURSOR, which took it past the valid range. */
864 start_byte += cursor - base + 1;
865 }
866 }
867
868 TEMP_SET_PT_BOTH (limit, limit_byte);
d5d57b92 869 immediate_quit = old_immediate_quit;
fa8ed3e0
RS
870
871 return count * direction;
63fa018d
RS
872}
873
ca1d1d23 874int
fa8ed3e0 875find_next_newline_no_quit (from, cnt)
ca1d1d23
JB
876 register int from, cnt;
877{
fa8ed3e0 878 return scan_buffer ('\n', from, 0, cnt, (int *) 0, 0);
9169c321
JB
879}
880
9169c321
JB
881/* Like find_next_newline, but returns position before the newline,
882 not after, and only search up to TO. This isn't just
883 find_next_newline (...)-1, because you might hit TO. */
fa8ed3e0 884
9169c321
JB
885int
886find_before_next_newline (from, to, cnt)
cbe0db0d 887 int from, to, cnt;
9169c321
JB
888{
889 int shortage;
890 int pos = scan_buffer ('\n', from, to, cnt, &shortage, 1);
891
892 if (shortage == 0)
893 pos--;
177c0ea7 894
9169c321 895 return pos;
ca1d1d23
JB
896}
897\f
ca1d1d23
JB
898/* Subroutines of Lisp buffer search functions. */
899
900static Lisp_Object
b819a390 901search_command (string, bound, noerror, count, direction, RE, posix)
ca1d1d23
JB
902 Lisp_Object string, bound, noerror, count;
903 int direction;
904 int RE;
b819a390 905 int posix;
ca1d1d23
JB
906{
907 register int np;
9f43ad85 908 int lim, lim_byte;
ca1d1d23
JB
909 int n = direction;
910
911 if (!NILP (count))
912 {
b7826503 913 CHECK_NUMBER (count);
ca1d1d23
JB
914 n *= XINT (count);
915 }
916
b7826503 917 CHECK_STRING (string);
ca1d1d23 918 if (NILP (bound))
9f43ad85
RS
919 {
920 if (n > 0)
921 lim = ZV, lim_byte = ZV_BYTE;
922 else
923 lim = BEGV, lim_byte = BEGV_BYTE;
924 }
ca1d1d23
JB
925 else
926 {
b7826503 927 CHECK_NUMBER_COERCE_MARKER (bound);
ca1d1d23 928 lim = XINT (bound);
6ec8bbd2 929 if (n > 0 ? lim < PT : lim > PT)
ca1d1d23
JB
930 error ("Invalid search bound (wrong side of point)");
931 if (lim > ZV)
9f43ad85 932 lim = ZV, lim_byte = ZV_BYTE;
588d2fd5 933 else if (lim < BEGV)
9f43ad85 934 lim = BEGV, lim_byte = BEGV_BYTE;
588d2fd5
KH
935 else
936 lim_byte = CHAR_TO_BYTE (lim);
ca1d1d23
JB
937 }
938
9f43ad85 939 np = search_buffer (string, PT, PT_BYTE, lim, lim_byte, n, RE,
ca1d1d23 940 (!NILP (current_buffer->case_fold_search)
facdc750 941 ? current_buffer->case_canon_table
3135e9fd 942 : Qnil),
ca1d1d23 943 (!NILP (current_buffer->case_fold_search)
facdc750 944 ? current_buffer->case_eqv_table
3135e9fd 945 : Qnil),
b819a390 946 posix);
ca1d1d23
JB
947 if (np <= 0)
948 {
949 if (NILP (noerror))
950 return signal_failure (string);
951 if (!EQ (noerror, Qt))
952 {
953 if (lim < BEGV || lim > ZV)
954 abort ();
9f43ad85 955 SET_PT_BOTH (lim, lim_byte);
a5f217b8
RS
956 return Qnil;
957#if 0 /* This would be clean, but maybe programs depend on
958 a value of nil here. */
481399bf 959 np = lim;
a5f217b8 960#endif
ca1d1d23 961 }
481399bf
RS
962 else
963 return Qnil;
ca1d1d23
JB
964 }
965
966 if (np < BEGV || np > ZV)
967 abort ();
968
969 SET_PT (np);
970
971 return make_number (np);
972}
973\f
fa8ed3e0
RS
974/* Return 1 if REGEXP it matches just one constant string. */
975
b6d6a51c
KH
976static int
977trivial_regexp_p (regexp)
978 Lisp_Object regexp;
979{
d5db4077
KR
980 int len = SBYTES (regexp);
981 unsigned char *s = SDATA (regexp);
b6d6a51c
KH
982 while (--len >= 0)
983 {
984 switch (*s++)
985 {
986 case '.': case '*': case '+': case '?': case '[': case '^': case '$':
987 return 0;
988 case '\\':
989 if (--len < 0)
990 return 0;
991 switch (*s++)
992 {
993 case '|': case '(': case ')': case '`': case '\'': case 'b':
994 case 'B': case '<': case '>': case 'w': case 'W': case 's':
29f89fe7 995 case 'S': case '=': case '{': case '}': case '_':
5679531d 996 case 'c': case 'C': /* for categoryspec and notcategoryspec */
866f60fd 997 case '1': case '2': case '3': case '4': case '5':
b6d6a51c
KH
998 case '6': case '7': case '8': case '9':
999 return 0;
1000 }
1001 }
1002 }
1003 return 1;
1004}
1005
ca325161 1006/* Search for the n'th occurrence of STRING in the current buffer,
ca1d1d23 1007 starting at position POS and stopping at position LIM,
b819a390 1008 treating STRING as a literal string if RE is false or as
ca1d1d23
JB
1009 a regular expression if RE is true.
1010
1011 If N is positive, searching is forward and LIM must be greater than POS.
1012 If N is negative, searching is backward and LIM must be less than POS.
1013
facdc750 1014 Returns -x if x occurrences remain to be found (x > 0),
ca1d1d23 1015 or else the position at the beginning of the Nth occurrence
b819a390
RS
1016 (if searching backward) or the end (if searching forward).
1017
1018 POSIX is nonzero if we want full backtracking (POSIX style)
1019 for this pattern. 0 means backtrack only enough to get a valid match. */
ca1d1d23 1020
aff2ce94
RS
1021#define TRANSLATE(out, trt, d) \
1022do \
1023 { \
1024 if (! NILP (trt)) \
1025 { \
1026 Lisp_Object temp; \
1027 temp = Faref (trt, make_number (d)); \
1028 if (INTEGERP (temp)) \
1029 out = XINT (temp); \
1030 else \
1031 out = d; \
1032 } \
1033 else \
1034 out = d; \
1035 } \
1036while (0)
facdc750 1037
b819a390 1038static int
9f43ad85
RS
1039search_buffer (string, pos, pos_byte, lim, lim_byte, n,
1040 RE, trt, inverse_trt, posix)
ca1d1d23
JB
1041 Lisp_Object string;
1042 int pos;
9f43ad85 1043 int pos_byte;
ca1d1d23 1044 int lim;
9f43ad85 1045 int lim_byte;
ca1d1d23
JB
1046 int n;
1047 int RE;
facdc750
RS
1048 Lisp_Object trt;
1049 Lisp_Object inverse_trt;
b819a390 1050 int posix;
ca1d1d23 1051{
d5db4077
KR
1052 int len = SCHARS (string);
1053 int len_byte = SBYTES (string);
facdc750 1054 register int i;
ca1d1d23 1055
7074fde6
FP
1056 if (running_asynch_code)
1057 save_search_regs ();
1058
a7e4cdde 1059 /* Searching 0 times means don't move. */
ca1d1d23 1060 /* Null string is found at starting position. */
a7e4cdde 1061 if (len == 0 || n == 0)
ca325161 1062 {
0353b28f 1063 set_search_regs (pos_byte, 0);
ca325161
RS
1064 return pos;
1065 }
3f57a499 1066
41a33295 1067 if (RE && !(trivial_regexp_p (string) && NILP (Vsearch_spaces_regexp)))
ca1d1d23 1068 {
facdc750
RS
1069 unsigned char *p1, *p2;
1070 int s1, s2;
487282dc
KH
1071 struct re_pattern_buffer *bufp;
1072
0c8533c6
RS
1073 bufp = compile_pattern (string, &search_regs, trt, posix,
1074 !NILP (current_buffer->enable_multibyte_characters));
ca1d1d23 1075
ca1d1d23
JB
1076 immediate_quit = 1; /* Quit immediately if user types ^G,
1077 because letting this function finish
1078 can take too long. */
1079 QUIT; /* Do a pending quit right away,
1080 to avoid paradoxical behavior */
1081 /* Get pointers and sizes of the two strings
1082 that make up the visible portion of the buffer. */
1083
1084 p1 = BEGV_ADDR;
fa8ed3e0 1085 s1 = GPT_BYTE - BEGV_BYTE;
ca1d1d23 1086 p2 = GAP_END_ADDR;
fa8ed3e0 1087 s2 = ZV_BYTE - GPT_BYTE;
ca1d1d23
JB
1088 if (s1 < 0)
1089 {
1090 p2 = p1;
fa8ed3e0 1091 s2 = ZV_BYTE - BEGV_BYTE;
ca1d1d23
JB
1092 s1 = 0;
1093 }
1094 if (s2 < 0)
1095 {
fa8ed3e0 1096 s1 = ZV_BYTE - BEGV_BYTE;
ca1d1d23
JB
1097 s2 = 0;
1098 }
8bb43c28 1099 re_match_object = Qnil;
177c0ea7 1100
ca1d1d23
JB
1101 while (n < 0)
1102 {
42db823b 1103 int val;
487282dc 1104 val = re_search_2 (bufp, (char *) p1, s1, (char *) p2, s2,
4996330b
KH
1105 pos_byte - BEGV_BYTE, lim_byte - pos_byte,
1106 &search_regs,
42db823b 1107 /* Don't allow match past current point */
4996330b 1108 pos_byte - BEGV_BYTE);
ca1d1d23 1109 if (val == -2)
b6d6a51c
KH
1110 {
1111 matcher_overflow ();
1112 }
ca1d1d23
JB
1113 if (val >= 0)
1114 {
26aff150 1115 pos_byte = search_regs.start[0] + BEGV_BYTE;
4746118a 1116 for (i = 0; i < search_regs.num_regs; i++)
ca1d1d23
JB
1117 if (search_regs.start[i] >= 0)
1118 {
fa8ed3e0
RS
1119 search_regs.start[i]
1120 = BYTE_TO_CHAR (search_regs.start[i] + BEGV_BYTE);
1121 search_regs.end[i]
1122 = BYTE_TO_CHAR (search_regs.end[i] + BEGV_BYTE);
ca1d1d23 1123 }
a3668d92 1124 XSETBUFFER (last_thing_searched, current_buffer);
ca1d1d23
JB
1125 /* Set pos to the new position. */
1126 pos = search_regs.start[0];
1127 }
1128 else
1129 {
1130 immediate_quit = 0;
1131 return (n);
1132 }
1133 n++;
1134 }
1135 while (n > 0)
1136 {
42db823b 1137 int val;
487282dc 1138 val = re_search_2 (bufp, (char *) p1, s1, (char *) p2, s2,
4996330b
KH
1139 pos_byte - BEGV_BYTE, lim_byte - pos_byte,
1140 &search_regs,
1141 lim_byte - BEGV_BYTE);
ca1d1d23 1142 if (val == -2)
b6d6a51c
KH
1143 {
1144 matcher_overflow ();
1145 }
ca1d1d23
JB
1146 if (val >= 0)
1147 {
26aff150 1148 pos_byte = search_regs.end[0] + BEGV_BYTE;
4746118a 1149 for (i = 0; i < search_regs.num_regs; i++)
ca1d1d23
JB
1150 if (search_regs.start[i] >= 0)
1151 {
fa8ed3e0
RS
1152 search_regs.start[i]
1153 = BYTE_TO_CHAR (search_regs.start[i] + BEGV_BYTE);
1154 search_regs.end[i]
1155 = BYTE_TO_CHAR (search_regs.end[i] + BEGV_BYTE);
ca1d1d23 1156 }
a3668d92 1157 XSETBUFFER (last_thing_searched, current_buffer);
ca1d1d23
JB
1158 pos = search_regs.end[0];
1159 }
1160 else
1161 {
1162 immediate_quit = 0;
1163 return (0 - n);
1164 }
1165 n--;
1166 }
1167 immediate_quit = 0;
1168 return (pos);
1169 }
1170 else /* non-RE case */
1171 {
facdc750
RS
1172 unsigned char *raw_pattern, *pat;
1173 int raw_pattern_size;
1174 int raw_pattern_size_byte;
1175 unsigned char *patbuf;
1176 int multibyte = !NILP (current_buffer->enable_multibyte_characters);
a967ed62 1177 unsigned char *base_pat;
fed91c38
KH
1178 /* Set to positive if we find a non-ASCII char that need
1179 translation. Otherwise set to zero later. */
1180 int charset_base = -1;
040272ce 1181 int boyer_moore_ok = 1;
facdc750
RS
1182
1183 /* MULTIBYTE says whether the text to be searched is multibyte.
1184 We must convert PATTERN to match that, or we will not really
1185 find things right. */
1186
1187 if (multibyte == STRING_MULTIBYTE (string))
1188 {
d5db4077
KR
1189 raw_pattern = (unsigned char *) SDATA (string);
1190 raw_pattern_size = SCHARS (string);
1191 raw_pattern_size_byte = SBYTES (string);
facdc750
RS
1192 }
1193 else if (multibyte)
1194 {
d5db4077 1195 raw_pattern_size = SCHARS (string);
facdc750 1196 raw_pattern_size_byte
d5db4077 1197 = count_size_as_multibyte (SDATA (string),
facdc750 1198 raw_pattern_size);
7276d3d8 1199 raw_pattern = (unsigned char *) alloca (raw_pattern_size_byte + 1);
d5db4077
KR
1200 copy_text (SDATA (string), raw_pattern,
1201 SCHARS (string), 0, 1);
facdc750
RS
1202 }
1203 else
1204 {
1205 /* Converting multibyte to single-byte.
1206
1207 ??? Perhaps this conversion should be done in a special way
1208 by subtracting nonascii-insert-offset from each non-ASCII char,
1209 so that only the multibyte chars which really correspond to
1210 the chosen single-byte character set can possibly match. */
d5db4077
KR
1211 raw_pattern_size = SCHARS (string);
1212 raw_pattern_size_byte = SCHARS (string);
7276d3d8 1213 raw_pattern = (unsigned char *) alloca (raw_pattern_size + 1);
d5db4077
KR
1214 copy_text (SDATA (string), raw_pattern,
1215 SBYTES (string), 1, 0);
facdc750
RS
1216 }
1217
1218 /* Copy and optionally translate the pattern. */
1219 len = raw_pattern_size;
1220 len_byte = raw_pattern_size_byte;
1221 patbuf = (unsigned char *) alloca (len_byte);
1222 pat = patbuf;
1223 base_pat = raw_pattern;
1224 if (multibyte)
1225 {
0190922f
KH
1226 /* Fill patbuf by translated characters in STRING while
1227 checking if we can use boyer-moore search. If TRT is
1228 non-nil, we can use boyer-moore search only if TRT can be
1229 represented by the byte array of 256 elements. For that,
1230 all non-ASCII case-equivalents of all case-senstive
1231 characters in STRING must belong to the same charset and
1232 row. */
1233
facdc750
RS
1234 while (--len >= 0)
1235 {
0190922f 1236 unsigned char str_base[MAX_MULTIBYTE_LENGTH], *str;
aff2ce94 1237 int c, translated, inverse;
facdc750
RS
1238 int in_charlen, charlen;
1239
1240 /* If we got here and the RE flag is set, it's because we're
1241 dealing with a regexp known to be trivial, so the backslash
1242 just quotes the next character. */
1243 if (RE && *base_pat == '\\')
1244 {
1245 len--;
0190922f 1246 raw_pattern_size--;
facdc750
RS
1247 len_byte--;
1248 base_pat++;
1249 }
1250
1251 c = STRING_CHAR_AND_LENGTH (base_pat, len_byte, in_charlen);
040272ce 1252
0190922f 1253 if (NILP (trt))
facdc750 1254 {
0190922f
KH
1255 str = base_pat;
1256 charlen = in_charlen;
facdc750 1257 }
0190922f 1258 else
facdc750 1259 {
0190922f
KH
1260 /* Translate the character. */
1261 TRANSLATE (translated, trt, c);
1262 charlen = CHAR_STRING (translated, str_base);
1263 str = str_base;
1264
1265 /* Check if C has any other case-equivalents. */
1266 TRANSLATE (inverse, inverse_trt, c);
1267 /* If so, check if we can use boyer-moore. */
1268 if (c != inverse && boyer_moore_ok)
1269 {
1270 /* Check if all equivalents belong to the same
1271 charset & row. Note that the check of C
1272 itself is done by the last iteration. Note
1273 also that we don't have to check ASCII
1274 characters because boyer-moore search can
1275 always handle their translation. */
1276 while (1)
1277 {
fed91c38 1278 if (ASCII_BYTE_P (inverse))
0190922f 1279 {
fed91c38 1280 if (charset_base > 0)
0190922f
KH
1281 {
1282 boyer_moore_ok = 0;
1283 break;
1284 }
fed91c38
KH
1285 charset_base = 0;
1286 }
1287 else if (SINGLE_BYTE_CHAR_P (inverse))
1288 {
1289 /* Boyer-moore search can't handle a
1290 translation of an eight-bit
1291 character. */
1292 boyer_moore_ok = 0;
1293 break;
1294 }
1295 else if (charset_base < 0)
1296 charset_base = inverse & ~CHAR_FIELD3_MASK;
1297 else if ((inverse & ~CHAR_FIELD3_MASK)
1298 != charset_base)
1299 {
1300 boyer_moore_ok = 0;
1301 break;
0190922f
KH
1302 }
1303 if (c == inverse)
1304 break;
1305 TRANSLATE (inverse, inverse_trt, inverse);
1306 }
1307 }
aff2ce94 1308 }
fed91c38
KH
1309 if (charset_base < 0)
1310 charset_base = 0;
facdc750
RS
1311
1312 /* Store this character into the translated pattern. */
1313 bcopy (str, pat, charlen);
1314 pat += charlen;
1315 base_pat += in_charlen;
1316 len_byte -= in_charlen;
1317 }
1318 }
1319 else
1320 {
040272ce
KH
1321 /* Unibyte buffer. */
1322 charset_base = 0;
facdc750
RS
1323 while (--len >= 0)
1324 {
040272ce 1325 int c, translated;
facdc750
RS
1326
1327 /* If we got here and the RE flag is set, it's because we're
1328 dealing with a regexp known to be trivial, so the backslash
1329 just quotes the next character. */
1330 if (RE && *base_pat == '\\')
1331 {
1332 len--;
0190922f 1333 raw_pattern_size--;
facdc750
RS
1334 base_pat++;
1335 }
1336 c = *base_pat++;
aff2ce94 1337 TRANSLATE (translated, trt, c);
facdc750
RS
1338 *pat++ = translated;
1339 }
1340 }
1341
1342 len_byte = pat - patbuf;
1343 len = raw_pattern_size;
1344 pat = base_pat = patbuf;
1345
040272ce 1346 if (boyer_moore_ok)
facdc750 1347 return boyer_moore (n, pat, len, len_byte, trt, inverse_trt,
aff2ce94
RS
1348 pos, pos_byte, lim, lim_byte,
1349 charset_base);
facdc750
RS
1350 else
1351 return simple_search (n, pat, len, len_byte, trt,
1352 pos, pos_byte, lim, lim_byte);
1353 }
1354}
1355\f
1356/* Do a simple string search N times for the string PAT,
1357 whose length is LEN/LEN_BYTE,
1358 from buffer position POS/POS_BYTE until LIM/LIM_BYTE.
1359 TRT is the translation table.
f8bd51c4 1360
facdc750
RS
1361 Return the character position where the match is found.
1362 Otherwise, if M matches remained to be found, return -M.
f8bd51c4 1363
facdc750
RS
1364 This kind of search works regardless of what is in PAT and
1365 regardless of what is in TRT. It is used in cases where
1366 boyer_moore cannot work. */
1367
1368static int
1369simple_search (n, pat, len, len_byte, trt, pos, pos_byte, lim, lim_byte)
1370 int n;
1371 unsigned char *pat;
1372 int len, len_byte;
1373 Lisp_Object trt;
1374 int pos, pos_byte;
1375 int lim, lim_byte;
1376{
1377 int multibyte = ! NILP (current_buffer->enable_multibyte_characters);
ab228c24 1378 int forward = n > 0;
facdc750
RS
1379
1380 if (lim > pos && multibyte)
1381 while (n > 0)
1382 {
1383 while (1)
f8bd51c4 1384 {
facdc750
RS
1385 /* Try matching at position POS. */
1386 int this_pos = pos;
1387 int this_pos_byte = pos_byte;
1388 int this_len = len;
1389 int this_len_byte = len_byte;
1390 unsigned char *p = pat;
1391 if (pos + len > lim)
1392 goto stop;
1393
1394 while (this_len > 0)
1395 {
1396 int charlen, buf_charlen;
ab228c24 1397 int pat_ch, buf_ch;
facdc750 1398
ab228c24 1399 pat_ch = STRING_CHAR_AND_LENGTH (p, this_len_byte, charlen);
facdc750
RS
1400 buf_ch = STRING_CHAR_AND_LENGTH (BYTE_POS_ADDR (this_pos_byte),
1401 ZV_BYTE - this_pos_byte,
1402 buf_charlen);
aff2ce94 1403 TRANSLATE (buf_ch, trt, buf_ch);
facdc750
RS
1404
1405 if (buf_ch != pat_ch)
1406 break;
ab228c24
RS
1407
1408 this_len_byte -= charlen;
1409 this_len--;
1410 p += charlen;
1411
1412 this_pos_byte += buf_charlen;
1413 this_pos++;
facdc750
RS
1414 }
1415
1416 if (this_len == 0)
1417 {
1418 pos += len;
1419 pos_byte += len_byte;
1420 break;
1421 }
1422
1423 INC_BOTH (pos, pos_byte);
f8bd51c4 1424 }
facdc750
RS
1425
1426 n--;
1427 }
1428 else if (lim > pos)
1429 while (n > 0)
1430 {
1431 while (1)
f8bd51c4 1432 {
facdc750
RS
1433 /* Try matching at position POS. */
1434 int this_pos = pos;
1435 int this_len = len;
1436 unsigned char *p = pat;
1437
1438 if (pos + len > lim)
1439 goto stop;
1440
1441 while (this_len > 0)
1442 {
1443 int pat_ch = *p++;
1444 int buf_ch = FETCH_BYTE (this_pos);
aff2ce94 1445 TRANSLATE (buf_ch, trt, buf_ch);
facdc750
RS
1446
1447 if (buf_ch != pat_ch)
1448 break;
ab228c24
RS
1449
1450 this_len--;
1451 this_pos++;
facdc750
RS
1452 }
1453
1454 if (this_len == 0)
1455 {
1456 pos += len;
1457 break;
1458 }
1459
1460 pos++;
f8bd51c4 1461 }
facdc750
RS
1462
1463 n--;
1464 }
1465 /* Backwards search. */
1466 else if (lim < pos && multibyte)
1467 while (n < 0)
1468 {
1469 while (1)
f8bd51c4 1470 {
facdc750
RS
1471 /* Try matching at position POS. */
1472 int this_pos = pos - len;
1473 int this_pos_byte = pos_byte - len_byte;
1474 int this_len = len;
1475 int this_len_byte = len_byte;
1476 unsigned char *p = pat;
1477
1478 if (pos - len < lim)
1479 goto stop;
1480
1481 while (this_len > 0)
1482 {
1483 int charlen, buf_charlen;
ab228c24 1484 int pat_ch, buf_ch;
facdc750 1485
ab228c24 1486 pat_ch = STRING_CHAR_AND_LENGTH (p, this_len_byte, charlen);
facdc750
RS
1487 buf_ch = STRING_CHAR_AND_LENGTH (BYTE_POS_ADDR (this_pos_byte),
1488 ZV_BYTE - this_pos_byte,
1489 buf_charlen);
aff2ce94 1490 TRANSLATE (buf_ch, trt, buf_ch);
facdc750
RS
1491
1492 if (buf_ch != pat_ch)
1493 break;
ab228c24
RS
1494
1495 this_len_byte -= charlen;
1496 this_len--;
1497 p += charlen;
1498 this_pos_byte += buf_charlen;
1499 this_pos++;
facdc750
RS
1500 }
1501
1502 if (this_len == 0)
1503 {
1504 pos -= len;
1505 pos_byte -= len_byte;
1506 break;
1507 }
1508
1509 DEC_BOTH (pos, pos_byte);
f8bd51c4
KH
1510 }
1511
facdc750
RS
1512 n++;
1513 }
1514 else if (lim < pos)
1515 while (n < 0)
1516 {
1517 while (1)
b6d6a51c 1518 {
facdc750
RS
1519 /* Try matching at position POS. */
1520 int this_pos = pos - len;
1521 int this_len = len;
1522 unsigned char *p = pat;
1523
1524 if (pos - len < lim)
1525 goto stop;
1526
1527 while (this_len > 0)
1528 {
1529 int pat_ch = *p++;
1530 int buf_ch = FETCH_BYTE (this_pos);
aff2ce94 1531 TRANSLATE (buf_ch, trt, buf_ch);
facdc750
RS
1532
1533 if (buf_ch != pat_ch)
1534 break;
ab228c24
RS
1535 this_len--;
1536 this_pos++;
facdc750
RS
1537 }
1538
1539 if (this_len == 0)
b6d6a51c 1540 {
facdc750
RS
1541 pos -= len;
1542 break;
b6d6a51c 1543 }
facdc750
RS
1544
1545 pos--;
b6d6a51c 1546 }
facdc750
RS
1547
1548 n++;
b6d6a51c 1549 }
facdc750
RS
1550
1551 stop:
1552 if (n == 0)
aff2ce94 1553 {
ab228c24
RS
1554 if (forward)
1555 set_search_regs ((multibyte ? pos_byte : pos) - len_byte, len_byte);
1556 else
1557 set_search_regs (multibyte ? pos_byte : pos, len_byte);
aff2ce94
RS
1558
1559 return pos;
1560 }
facdc750
RS
1561 else if (n > 0)
1562 return -n;
1563 else
1564 return n;
1565}
1566\f
0190922f 1567/* Do Boyer-Moore search N times for the string BASE_PAT,
facdc750
RS
1568 whose length is LEN/LEN_BYTE,
1569 from buffer position POS/POS_BYTE until LIM/LIM_BYTE.
1570 DIRECTION says which direction we search in.
1571 TRT and INVERSE_TRT are translation tables.
0190922f 1572 Characters in PAT are already translated by TRT.
facdc750 1573
0190922f
KH
1574 This kind of search works if all the characters in BASE_PAT that
1575 have nontrivial translation are the same aside from the last byte.
1576 This makes it possible to translate just the last byte of a
1577 character, and do so after just a simple test of the context.
1578 CHARSET_BASE is nonzero iff there is such a non-ASCII character.
facdc750
RS
1579
1580 If that criterion is not satisfied, do not call this function. */
1581
1582static int
1583boyer_moore (n, base_pat, len, len_byte, trt, inverse_trt,
aff2ce94 1584 pos, pos_byte, lim, lim_byte, charset_base)
facdc750
RS
1585 int n;
1586 unsigned char *base_pat;
1587 int len, len_byte;
1588 Lisp_Object trt;
1589 Lisp_Object inverse_trt;
1590 int pos, pos_byte;
1591 int lim, lim_byte;
aff2ce94 1592 int charset_base;
facdc750
RS
1593{
1594 int direction = ((n > 0) ? 1 : -1);
1595 register int dirlen;
a968f437 1596 int infinity, limit, stride_for_teases = 0;
facdc750
RS
1597 register int *BM_tab;
1598 int *BM_tab_base;
177c0ea7 1599 register unsigned char *cursor, *p_limit;
facdc750 1600 register int i, j;
cb6792d2 1601 unsigned char *pat, *pat_end;
facdc750
RS
1602 int multibyte = ! NILP (current_buffer->enable_multibyte_characters);
1603
1604 unsigned char simple_translate[0400];
0190922f
KH
1605 /* These are set to the preceding bytes of a byte to be translated
1606 if charset_base is nonzero. As the maximum byte length of a
1607 multibyte character is 4, we have to check at most three previous
1608 bytes. */
1609 int translate_prev_byte1 = 0;
1610 int translate_prev_byte2 = 0;
1611 int translate_prev_byte3 = 0;
facdc750
RS
1612
1613#ifdef C_ALLOCA
1614 int BM_tab_space[0400];
1615 BM_tab = &BM_tab_space[0];
1616#else
1617 BM_tab = (int *) alloca (0400 * sizeof (int));
1618#endif
1619 /* The general approach is that we are going to maintain that we know */
1620 /* the first (closest to the present position, in whatever direction */
1621 /* we're searching) character that could possibly be the last */
1622 /* (furthest from present position) character of a valid match. We */
1623 /* advance the state of our knowledge by looking at that character */
1624 /* and seeing whether it indeed matches the last character of the */
1625 /* pattern. If it does, we take a closer look. If it does not, we */
1626 /* move our pointer (to putative last characters) as far as is */
1627 /* logically possible. This amount of movement, which I call a */
1628 /* stride, will be the length of the pattern if the actual character */
1629 /* appears nowhere in the pattern, otherwise it will be the distance */
1630 /* from the last occurrence of that character to the end of the */
1631 /* pattern. */
1632 /* As a coding trick, an enormous stride is coded into the table for */
1633 /* characters that match the last character. This allows use of only */
1634 /* a single test, a test for having gone past the end of the */
1635 /* permissible match region, to test for both possible matches (when */
1636 /* the stride goes past the end immediately) and failure to */
177c0ea7 1637 /* match (where you get nudged past the end one stride at a time). */
facdc750
RS
1638
1639 /* Here we make a "mickey mouse" BM table. The stride of the search */
1640 /* is determined only by the last character of the putative match. */
1641 /* If that character does not match, we will stride the proper */
1642 /* distance to propose a match that superimposes it on the last */
1643 /* instance of a character that matches it (per trt), or misses */
177c0ea7 1644 /* it entirely if there is none. */
facdc750
RS
1645
1646 dirlen = len_byte * direction;
1647 infinity = dirlen - (lim_byte + pos_byte + len_byte + len_byte) * direction;
cb6792d2
RS
1648
1649 /* Record position after the end of the pattern. */
1650 pat_end = base_pat + len_byte;
1651 /* BASE_PAT points to a character that we start scanning from.
1652 It is the first character in a forward search,
1653 the last character in a backward search. */
facdc750 1654 if (direction < 0)
cb6792d2
RS
1655 base_pat = pat_end - 1;
1656
facdc750
RS
1657 BM_tab_base = BM_tab;
1658 BM_tab += 0400;
1659 j = dirlen; /* to get it in a register */
1660 /* A character that does not appear in the pattern induces a */
1661 /* stride equal to the pattern length. */
1662 while (BM_tab_base != BM_tab)
1663 {
1664 *--BM_tab = j;
1665 *--BM_tab = j;
1666 *--BM_tab = j;
1667 *--BM_tab = j;
1668 }
1669
1670 /* We use this for translation, instead of TRT itself.
1671 We fill this in to handle the characters that actually
1672 occur in the pattern. Others don't matter anyway! */
1673 bzero (simple_translate, sizeof simple_translate);
1674 for (i = 0; i < 0400; i++)
1675 simple_translate[i] = i;
1676
0190922f
KH
1677 if (charset_base)
1678 {
1679 /* Setup translate_prev_byte1/2/3 from CHARSET_BASE. Only a
1680 byte following them are the target of translation. */
1681 int sample_char = charset_base | 0x20;
1682 unsigned char str[MAX_MULTIBYTE_LENGTH];
1683 int len = CHAR_STRING (sample_char, str);
1684
1685 translate_prev_byte1 = str[len - 2];
1686 if (len > 2)
1687 {
1688 translate_prev_byte2 = str[len - 3];
1689 if (len > 3)
1690 translate_prev_byte3 = str[len - 4];
1691 }
1692 }
1693
facdc750
RS
1694 i = 0;
1695 while (i != infinity)
1696 {
cb6792d2 1697 unsigned char *ptr = base_pat + i;
facdc750
RS
1698 i += direction;
1699 if (i == dirlen)
1700 i = infinity;
1701 if (! NILP (trt))
ca1d1d23 1702 {
620a5178
KH
1703 /* If the byte currently looking at is the last of a
1704 character to check case-equivalents, set CH to that
1705 character. An ASCII character and a non-ASCII character
1706 matching with CHARSET_BASE are to be checked. */
0190922f
KH
1707 int ch = -1;
1708
1709 if (ASCII_BYTE_P (*ptr) || ! multibyte)
1710 ch = *ptr;
620a5178 1711 else if (charset_base
86dc6ccb 1712 && ((pat_end - ptr) == 1 || CHAR_HEAD_P (ptr[1])))
ca1d1d23 1713 {
620a5178
KH
1714 unsigned char *charstart = ptr - 1;
1715
1716 while (! (CHAR_HEAD_P (*charstart)))
1717 charstart--;
1718 ch = STRING_CHAR (charstart, ptr - charstart + 1);
0190922f
KH
1719 if (charset_base != (ch & ~CHAR_FIELD3_MASK))
1720 ch = -1;
ca1d1d23 1721 }
facdc750 1722
620a5178
KH
1723 if (ch > 0400)
1724 j = ((unsigned char) ch) | 0200;
1725 else
1726 j = *ptr;
1727
facdc750
RS
1728 if (i == infinity)
1729 stride_for_teases = BM_tab[j];
ab228c24 1730
facdc750
RS
1731 BM_tab[j] = dirlen - i;
1732 /* A translation table is accompanied by its inverse -- see */
177c0ea7 1733 /* comment following downcase_table for details */
0190922f 1734 if (ch >= 0)
ab228c24
RS
1735 {
1736 int starting_ch = ch;
620a5178 1737 int starting_j = j;
0190922f 1738
ab228c24
RS
1739 while (1)
1740 {
1741 TRANSLATE (ch, inverse_trt, ch);
1742 if (ch > 0400)
1743 j = ((unsigned char) ch) | 0200;
1744 else
1745 j = (unsigned char) ch;
1746
1747 /* For all the characters that map into CH,
1748 set up simple_translate to map the last byte
1749 into STARTING_J. */
1750 simple_translate[j] = starting_j;
1751 if (ch == starting_ch)
1752 break;
1753 BM_tab[j] = dirlen - i;
1754 }
1755 }
facdc750
RS
1756 }
1757 else
1758 {
1759 j = *ptr;
1760
1761 if (i == infinity)
1762 stride_for_teases = BM_tab[j];
1763 BM_tab[j] = dirlen - i;
ca1d1d23 1764 }
facdc750
RS
1765 /* stride_for_teases tells how much to stride if we get a */
1766 /* match on the far character but are subsequently */
1767 /* disappointed, by recording what the stride would have been */
1768 /* for that character if the last character had been */
1769 /* different. */
1770 }
1771 infinity = dirlen - infinity;
1772 pos_byte += dirlen - ((direction > 0) ? direction : 0);
1773 /* loop invariant - POS_BYTE points at where last char (first
1774 char if reverse) of pattern would align in a possible match. */
1775 while (n != 0)
1776 {
1777 int tail_end;
1778 unsigned char *tail_end_ptr;
1779
1780 /* It's been reported that some (broken) compiler thinks that
1781 Boolean expressions in an arithmetic context are unsigned.
1782 Using an explicit ?1:0 prevents this. */
1783 if ((lim_byte - pos_byte - ((direction > 0) ? 1 : 0)) * direction
1784 < 0)
1785 return (n * (0 - direction));
1786 /* First we do the part we can by pointers (maybe nothing) */
1787 QUIT;
1788 pat = base_pat;
1789 limit = pos_byte - dirlen + direction;
67ce527d
KH
1790 if (direction > 0)
1791 {
1792 limit = BUFFER_CEILING_OF (limit);
1793 /* LIMIT is now the last (not beyond-last!) value POS_BYTE
1794 can take on without hitting edge of buffer or the gap. */
1795 limit = min (limit, pos_byte + 20000);
1796 limit = min (limit, lim_byte - 1);
1797 }
1798 else
1799 {
1800 limit = BUFFER_FLOOR_OF (limit);
1801 /* LIMIT is now the last (not beyond-last!) value POS_BYTE
1802 can take on without hitting edge of buffer or the gap. */
1803 limit = max (limit, pos_byte - 20000);
1804 limit = max (limit, lim_byte);
1805 }
facdc750
RS
1806 tail_end = BUFFER_CEILING_OF (pos_byte) + 1;
1807 tail_end_ptr = BYTE_POS_ADDR (tail_end);
1808
1809 if ((limit - pos_byte) * direction > 20)
ca1d1d23 1810 {
facdc750
RS
1811 unsigned char *p2;
1812
1813 p_limit = BYTE_POS_ADDR (limit);
1814 p2 = (cursor = BYTE_POS_ADDR (pos_byte));
1815 /* In this loop, pos + cursor - p2 is the surrogate for pos */
1816 while (1) /* use one cursor setting as long as i can */
ca1d1d23 1817 {
facdc750 1818 if (direction > 0) /* worth duplicating */
ca1d1d23 1819 {
facdc750
RS
1820 /* Use signed comparison if appropriate
1821 to make cursor+infinity sure to be > p_limit.
1822 Assuming that the buffer lies in a range of addresses
1823 that are all "positive" (as ints) or all "negative",
1824 either kind of comparison will work as long
1825 as we don't step by infinity. So pick the kind
1826 that works when we do step by infinity. */
1827 if ((EMACS_INT) (p_limit + infinity) > (EMACS_INT) p_limit)
1828 while ((EMACS_INT) cursor <= (EMACS_INT) p_limit)
1829 cursor += BM_tab[*cursor];
ca1d1d23 1830 else
facdc750
RS
1831 while ((EMACS_UINT) cursor <= (EMACS_UINT) p_limit)
1832 cursor += BM_tab[*cursor];
1833 }
1834 else
1835 {
1836 if ((EMACS_INT) (p_limit + infinity) < (EMACS_INT) p_limit)
1837 while ((EMACS_INT) cursor >= (EMACS_INT) p_limit)
1838 cursor += BM_tab[*cursor];
1839 else
1840 while ((EMACS_UINT) cursor >= (EMACS_UINT) p_limit)
1841 cursor += BM_tab[*cursor];
1842 }
ca1d1d23 1843/* If you are here, cursor is beyond the end of the searched region. */
facdc750
RS
1844/* This can happen if you match on the far character of the pattern, */
1845/* because the "stride" of that character is infinity, a number able */
1846/* to throw you well beyond the end of the search. It can also */
1847/* happen if you fail to match within the permitted region and would */
1848/* otherwise try a character beyond that region */
1849 if ((cursor - p_limit) * direction <= len_byte)
1850 break; /* a small overrun is genuine */
1851 cursor -= infinity; /* large overrun = hit */
1852 i = dirlen - direction;
1853 if (! NILP (trt))
1854 {
1855 while ((i -= direction) + direction != 0)
ca1d1d23 1856 {
facdc750
RS
1857 int ch;
1858 cursor -= direction;
1859 /* Translate only the last byte of a character. */
1860 if (! multibyte
1861 || ((cursor == tail_end_ptr
1862 || CHAR_HEAD_P (cursor[1]))
1863 && (CHAR_HEAD_P (cursor[0])
0190922f
KH
1864 /* Check if this is the last byte of
1865 a translable character. */
1866 || (translate_prev_byte1 == cursor[-1]
1867 && (CHAR_HEAD_P (translate_prev_byte1)
1868 || (translate_prev_byte2 == cursor[-2]
1869 && (CHAR_HEAD_P (translate_prev_byte2)
1870 || (translate_prev_byte3 == cursor[-3]))))))))
facdc750
RS
1871 ch = simple_translate[*cursor];
1872 else
1873 ch = *cursor;
1874 if (pat[i] != ch)
1875 break;
ca1d1d23 1876 }
facdc750
RS
1877 }
1878 else
1879 {
1880 while ((i -= direction) + direction != 0)
ca1d1d23 1881 {
facdc750
RS
1882 cursor -= direction;
1883 if (pat[i] != *cursor)
1884 break;
ca1d1d23 1885 }
facdc750
RS
1886 }
1887 cursor += dirlen - i - direction; /* fix cursor */
1888 if (i + direction == 0)
1889 {
1890 int position;
0c8533c6 1891
facdc750 1892 cursor -= direction;
1113d9db 1893
facdc750
RS
1894 position = pos_byte + cursor - p2 + ((direction > 0)
1895 ? 1 - len_byte : 0);
1896 set_search_regs (position, len_byte);
ca325161 1897
facdc750
RS
1898 if ((n -= direction) != 0)
1899 cursor += dirlen; /* to resume search */
ca1d1d23 1900 else
facdc750
RS
1901 return ((direction > 0)
1902 ? search_regs.end[0] : search_regs.start[0]);
ca1d1d23 1903 }
facdc750
RS
1904 else
1905 cursor += stride_for_teases; /* <sigh> we lose - */
ca1d1d23 1906 }
facdc750
RS
1907 pos_byte += cursor - p2;
1908 }
1909 else
1910 /* Now we'll pick up a clump that has to be done the hard */
1911 /* way because it covers a discontinuity */
1912 {
1913 limit = ((direction > 0)
1914 ? BUFFER_CEILING_OF (pos_byte - dirlen + 1)
1915 : BUFFER_FLOOR_OF (pos_byte - dirlen - 1));
1916 limit = ((direction > 0)
1917 ? min (limit + len_byte, lim_byte - 1)
1918 : max (limit - len_byte, lim_byte));
1919 /* LIMIT is now the last value POS_BYTE can have
1920 and still be valid for a possible match. */
1921 while (1)
ca1d1d23 1922 {
facdc750
RS
1923 /* This loop can be coded for space rather than */
1924 /* speed because it will usually run only once. */
1925 /* (the reach is at most len + 21, and typically */
177c0ea7 1926 /* does not exceed len) */
facdc750
RS
1927 while ((limit - pos_byte) * direction >= 0)
1928 pos_byte += BM_tab[FETCH_BYTE (pos_byte)];
1929 /* now run the same tests to distinguish going off the */
1930 /* end, a match or a phony match. */
1931 if ((pos_byte - limit) * direction <= len_byte)
1932 break; /* ran off the end */
1933 /* Found what might be a match.
1934 Set POS_BYTE back to last (first if reverse) pos. */
1935 pos_byte -= infinity;
1936 i = dirlen - direction;
1937 while ((i -= direction) + direction != 0)
ca1d1d23 1938 {
facdc750
RS
1939 int ch;
1940 unsigned char *ptr;
1941 pos_byte -= direction;
1942 ptr = BYTE_POS_ADDR (pos_byte);
1943 /* Translate only the last byte of a character. */
1944 if (! multibyte
1945 || ((ptr == tail_end_ptr
1946 || CHAR_HEAD_P (ptr[1]))
1947 && (CHAR_HEAD_P (ptr[0])
0190922f
KH
1948 /* Check if this is the last byte of a
1949 translable character. */
1950 || (translate_prev_byte1 == ptr[-1]
1951 && (CHAR_HEAD_P (translate_prev_byte1)
1952 || (translate_prev_byte2 == ptr[-2]
1953 && (CHAR_HEAD_P (translate_prev_byte2)
1954 || translate_prev_byte3 == ptr[-3])))))))
facdc750
RS
1955 ch = simple_translate[*ptr];
1956 else
1957 ch = *ptr;
1958 if (pat[i] != ch)
1959 break;
1960 }
1961 /* Above loop has moved POS_BYTE part or all the way
1962 back to the first pos (last pos if reverse).
1963 Set it once again at the last (first if reverse) char. */
1964 pos_byte += dirlen - i- direction;
1965 if (i + direction == 0)
1966 {
1967 int position;
1968 pos_byte -= direction;
1113d9db 1969
facdc750 1970 position = pos_byte + ((direction > 0) ? 1 - len_byte : 0);
0c8533c6 1971
facdc750 1972 set_search_regs (position, len_byte);
ca325161 1973
facdc750
RS
1974 if ((n -= direction) != 0)
1975 pos_byte += dirlen; /* to resume search */
ca1d1d23 1976 else
facdc750
RS
1977 return ((direction > 0)
1978 ? search_regs.end[0] : search_regs.start[0]);
ca1d1d23 1979 }
facdc750
RS
1980 else
1981 pos_byte += stride_for_teases;
1982 }
1983 }
1984 /* We have done one clump. Can we continue? */
1985 if ((lim_byte - pos_byte) * direction < 0)
1986 return ((0 - n) * direction);
ca1d1d23 1987 }
facdc750 1988 return BYTE_TO_CHAR (pos_byte);
ca1d1d23 1989}
ca325161 1990
fa8ed3e0 1991/* Record beginning BEG_BYTE and end BEG_BYTE + NBYTES
a7e4cdde
RS
1992 for the overall match just found in the current buffer.
1993 Also clear out the match data for registers 1 and up. */
ca325161
RS
1994
1995static void
fa8ed3e0
RS
1996set_search_regs (beg_byte, nbytes)
1997 int beg_byte, nbytes;
ca325161 1998{
a7e4cdde
RS
1999 int i;
2000
ca325161
RS
2001 /* Make sure we have registers in which to store
2002 the match position. */
2003 if (search_regs.num_regs == 0)
2004 {
2d4a771a
RS
2005 search_regs.start = (regoff_t *) xmalloc (2 * sizeof (regoff_t));
2006 search_regs.end = (regoff_t *) xmalloc (2 * sizeof (regoff_t));
487282dc 2007 search_regs.num_regs = 2;
ca325161
RS
2008 }
2009
a7e4cdde
RS
2010 /* Clear out the other registers. */
2011 for (i = 1; i < search_regs.num_regs; i++)
2012 {
2013 search_regs.start[i] = -1;
2014 search_regs.end[i] = -1;
2015 }
2016
fa8ed3e0
RS
2017 search_regs.start[0] = BYTE_TO_CHAR (beg_byte);
2018 search_regs.end[0] = BYTE_TO_CHAR (beg_byte + nbytes);
a3668d92 2019 XSETBUFFER (last_thing_searched, current_buffer);
ca325161 2020}
ca1d1d23
JB
2021\f
2022/* Given a string of words separated by word delimiters,
2023 compute a regexp that matches those exact words
2024 separated by arbitrary punctuation. */
2025
2026static Lisp_Object
2027wordify (string)
2028 Lisp_Object string;
2029{
2030 register unsigned char *p, *o;
0c8533c6 2031 register int i, i_byte, len, punct_count = 0, word_count = 0;
ca1d1d23 2032 Lisp_Object val;
0c8533c6
RS
2033 int prev_c = 0;
2034 int adjust;
ca1d1d23 2035
b7826503 2036 CHECK_STRING (string);
d5db4077
KR
2037 p = SDATA (string);
2038 len = SCHARS (string);
ca1d1d23 2039
0c8533c6
RS
2040 for (i = 0, i_byte = 0; i < len; )
2041 {
2042 int c;
177c0ea7 2043
eb99a8dd 2044 FETCH_STRING_CHAR_ADVANCE (c, string, i, i_byte);
0c8533c6
RS
2045
2046 if (SYNTAX (c) != Sword)
2047 {
2048 punct_count++;
2049 if (i > 0 && SYNTAX (prev_c) == Sword)
2050 word_count++;
2051 }
ca1d1d23 2052
0c8533c6
RS
2053 prev_c = c;
2054 }
2055
2056 if (SYNTAX (prev_c) == Sword)
2057 word_count++;
2058 if (!word_count)
b07b65aa 2059 return empty_string;
0c8533c6
RS
2060
2061 adjust = - punct_count + 5 * (word_count - 1) + 4;
8a2df937
RS
2062 if (STRING_MULTIBYTE (string))
2063 val = make_uninit_multibyte_string (len + adjust,
d5db4077 2064 SBYTES (string)
8a2df937
RS
2065 + adjust);
2066 else
2067 val = make_uninit_string (len + adjust);
ca1d1d23 2068
d5db4077 2069 o = SDATA (val);
ca1d1d23
JB
2070 *o++ = '\\';
2071 *o++ = 'b';
1e9582d4 2072 prev_c = 0;
ca1d1d23 2073
1e9582d4
RS
2074 for (i = 0, i_byte = 0; i < len; )
2075 {
2076 int c;
2077 int i_byte_orig = i_byte;
177c0ea7 2078
eb99a8dd 2079 FETCH_STRING_CHAR_ADVANCE (c, string, i, i_byte);
1e9582d4
RS
2080
2081 if (SYNTAX (c) == Sword)
2082 {
5d69fe10 2083 bcopy (SDATA (string) + i_byte_orig, o,
1e9582d4
RS
2084 i_byte - i_byte_orig);
2085 o += i_byte - i_byte_orig;
2086 }
2087 else if (i > 0 && SYNTAX (prev_c) == Sword && --word_count)
2088 {
2089 *o++ = '\\';
2090 *o++ = 'W';
2091 *o++ = '\\';
2092 *o++ = 'W';
2093 *o++ = '*';
2094 }
2095
2096 prev_c = c;
2097 }
ca1d1d23
JB
2098
2099 *o++ = '\\';
2100 *o++ = 'b';
2101
2102 return val;
2103}
2104\f
2105DEFUN ("search-backward", Fsearch_backward, Ssearch_backward, 1, 4,
8c1a1077
PJ
2106 "MSearch backward: ",
2107 doc: /* Search backward from point for STRING.
2108Set point to the beginning of the occurrence found, and return point.
2109An optional second argument bounds the search; it is a buffer position.
2110The match found must not extend before that position.
2111Optional third argument, if t, means if fail just return nil (no error).
2112 If not nil and not t, position at limit of search and return nil.
2113Optional fourth argument is repeat count--search for successive occurrences.
2114
2115Search case-sensitivity is determined by the value of the variable
2116`case-fold-search', which see.
2117
2118See also the functions `match-beginning', `match-end' and `replace-match'. */)
2119 (string, bound, noerror, count)
ca1d1d23
JB
2120 Lisp_Object string, bound, noerror, count;
2121{
b819a390 2122 return search_command (string, bound, noerror, count, -1, 0, 0);
ca1d1d23
JB
2123}
2124
6af43974 2125DEFUN ("search-forward", Fsearch_forward, Ssearch_forward, 1, 4, "MSearch: ",
8c1a1077
PJ
2126 doc: /* Search forward from point for STRING.
2127Set point to the end of the occurrence found, and return point.
2128An optional second argument bounds the search; it is a buffer position.
2129The match found must not extend after that position. nil is equivalent
2130 to (point-max).
2131Optional third argument, if t, means if fail just return nil (no error).
2132 If not nil and not t, move to limit of search and return nil.
2133Optional fourth argument is repeat count--search for successive occurrences.
2134
2135Search case-sensitivity is determined by the value of the variable
2136`case-fold-search', which see.
2137
2138See also the functions `match-beginning', `match-end' and `replace-match'. */)
2139 (string, bound, noerror, count)
ca1d1d23
JB
2140 Lisp_Object string, bound, noerror, count;
2141{
b819a390 2142 return search_command (string, bound, noerror, count, 1, 0, 0);
ca1d1d23
JB
2143}
2144
2145DEFUN ("word-search-backward", Fword_search_backward, Sword_search_backward, 1, 4,
8c1a1077
PJ
2146 "sWord search backward: ",
2147 doc: /* Search backward from point for STRING, ignoring differences in punctuation.
2148Set point to the beginning of the occurrence found, and return point.
2149An optional second argument bounds the search; it is a buffer position.
2150The match found must not extend before that position.
2151Optional third argument, if t, means if fail just return nil (no error).
2152 If not nil and not t, move to limit of search and return nil.
2153Optional fourth argument is repeat count--search for successive occurrences. */)
2154 (string, bound, noerror, count)
ca1d1d23
JB
2155 Lisp_Object string, bound, noerror, count;
2156{
b819a390 2157 return search_command (wordify (string), bound, noerror, count, -1, 1, 0);
ca1d1d23
JB
2158}
2159
2160DEFUN ("word-search-forward", Fword_search_forward, Sword_search_forward, 1, 4,
8c1a1077
PJ
2161 "sWord search: ",
2162 doc: /* Search forward from point for STRING, ignoring differences in punctuation.
2163Set point to the end of the occurrence found, and return point.
2164An optional second argument bounds the search; it is a buffer position.
2165The match found must not extend after that position.
2166Optional third argument, if t, means if fail just return nil (no error).
2167 If not nil and not t, move to limit of search and return nil.
2168Optional fourth argument is repeat count--search for successive occurrences. */)
2169 (string, bound, noerror, count)
ca1d1d23
JB
2170 Lisp_Object string, bound, noerror, count;
2171{
b819a390 2172 return search_command (wordify (string), bound, noerror, count, 1, 1, 0);
ca1d1d23
JB
2173}
2174
2175DEFUN ("re-search-backward", Fre_search_backward, Sre_search_backward, 1, 4,
8c1a1077
PJ
2176 "sRE search backward: ",
2177 doc: /* Search backward from point for match for regular expression REGEXP.
2178Set point to the beginning of the match, and return point.
2179The match found is the one starting last in the buffer
2180and yet ending before the origin of the search.
2181An optional second argument bounds the search; it is a buffer position.
2182The match found must start at or after that position.
2183Optional third argument, if t, means if fail just return nil (no error).
2184 If not nil and not t, move to limit of search and return nil.
2185Optional fourth argument is repeat count--search for successive occurrences.
2186See also the functions `match-beginning', `match-end', `match-string',
2187and `replace-match'. */)
2188 (regexp, bound, noerror, count)
19c0a730 2189 Lisp_Object regexp, bound, noerror, count;
ca1d1d23 2190{
b819a390 2191 return search_command (regexp, bound, noerror, count, -1, 1, 0);
ca1d1d23
JB
2192}
2193
2194DEFUN ("re-search-forward", Fre_search_forward, Sre_search_forward, 1, 4,
8c1a1077
PJ
2195 "sRE search: ",
2196 doc: /* Search forward from point for regular expression REGEXP.
2197Set point to the end of the occurrence found, and return point.
2198An optional second argument bounds the search; it is a buffer position.
2199The match found must not extend after that position.
2200Optional third argument, if t, means if fail just return nil (no error).
2201 If not nil and not t, move to limit of search and return nil.
2202Optional fourth argument is repeat count--search for successive occurrences.
2203See also the functions `match-beginning', `match-end', `match-string',
2204and `replace-match'. */)
2205 (regexp, bound, noerror, count)
19c0a730 2206 Lisp_Object regexp, bound, noerror, count;
ca1d1d23 2207{
b819a390
RS
2208 return search_command (regexp, bound, noerror, count, 1, 1, 0);
2209}
2210
2211DEFUN ("posix-search-backward", Fposix_search_backward, Sposix_search_backward, 1, 4,
8c1a1077
PJ
2212 "sPosix search backward: ",
2213 doc: /* Search backward from point for match for regular expression REGEXP.
2214Find the longest match in accord with Posix regular expression rules.
2215Set point to the beginning of the match, and return point.
2216The match found is the one starting last in the buffer
2217and yet ending before the origin of the search.
2218An optional second argument bounds the search; it is a buffer position.
2219The match found must start at or after that position.
2220Optional third argument, if t, means if fail just return nil (no error).
2221 If not nil and not t, move to limit of search and return nil.
2222Optional fourth argument is repeat count--search for successive occurrences.
2223See also the functions `match-beginning', `match-end', `match-string',
2224and `replace-match'. */)
2225 (regexp, bound, noerror, count)
b819a390
RS
2226 Lisp_Object regexp, bound, noerror, count;
2227{
2228 return search_command (regexp, bound, noerror, count, -1, 1, 1);
2229}
2230
2231DEFUN ("posix-search-forward", Fposix_search_forward, Sposix_search_forward, 1, 4,
8c1a1077
PJ
2232 "sPosix search: ",
2233 doc: /* Search forward from point for regular expression REGEXP.
2234Find the longest match in accord with Posix regular expression rules.
2235Set point to the end of the occurrence found, and return point.
2236An optional second argument bounds the search; it is a buffer position.
2237The match found must not extend after that position.
2238Optional third argument, if t, means if fail just return nil (no error).
2239 If not nil and not t, move to limit of search and return nil.
2240Optional fourth argument is repeat count--search for successive occurrences.
2241See also the functions `match-beginning', `match-end', `match-string',
2242and `replace-match'. */)
2243 (regexp, bound, noerror, count)
b819a390
RS
2244 Lisp_Object regexp, bound, noerror, count;
2245{
2246 return search_command (regexp, bound, noerror, count, 1, 1, 1);
ca1d1d23
JB
2247}
2248\f
d7a5ad5f 2249DEFUN ("replace-match", Freplace_match, Sreplace_match, 1, 5, 0,
8c1a1077 2250 doc: /* Replace text matched by last search with NEWTEXT.
4dd0c271
RS
2251Leave point at the end of the replacement text.
2252
8c1a1077
PJ
2253If second arg FIXEDCASE is non-nil, do not alter case of replacement text.
2254Otherwise maybe capitalize the whole text, or maybe just word initials,
2255based on the replaced text.
2256If the replaced text has only capital letters
2257and has at least one multiletter word, convert NEWTEXT to all caps.
4dd0c271
RS
2258Otherwise if all words are capitalized in the replaced text,
2259capitalize each word in NEWTEXT.
8c1a1077
PJ
2260
2261If third arg LITERAL is non-nil, insert NEWTEXT literally.
2262Otherwise treat `\\' as special:
2263 `\\&' in NEWTEXT means substitute original matched text.
2264 `\\N' means substitute what matched the Nth `\\(...\\)'.
2265 If Nth parens didn't match, substitute nothing.
2266 `\\\\' means insert one `\\'.
4dd0c271
RS
2267Case conversion does not apply to these substitutions.
2268
8c1a1077 2269FIXEDCASE and LITERAL are optional arguments.
8c1a1077
PJ
2270
2271The optional fourth argument STRING can be a string to modify.
2272This is meaningful when the previous match was done against STRING,
2273using `string-match'. When used this way, `replace-match'
2274creates and returns a new string made by copying STRING and replacing
2275the part of STRING that was matched.
2276
2277The optional fifth argument SUBEXP specifies a subexpression;
2278it says to replace just that subexpression with NEWTEXT,
2279rather than replacing the entire matched text.
2280This is, in a vague sense, the inverse of using `\\N' in NEWTEXT;
2281`\\N' copies subexp N into NEWTEXT, but using N as SUBEXP puts
2282NEWTEXT in place of subexp N.
2283This is useful only after a regular expression search or match,
2284since only regular expressions have distinguished subexpressions. */)
2285 (newtext, fixedcase, literal, string, subexp)
d7a5ad5f 2286 Lisp_Object newtext, fixedcase, literal, string, subexp;
ca1d1d23
JB
2287{
2288 enum { nochange, all_caps, cap_initial } case_action;
ac3b28b1 2289 register int pos, pos_byte;
ca1d1d23 2290 int some_multiletter_word;
97832bd0 2291 int some_lowercase;
73dc8771 2292 int some_uppercase;
208767c3 2293 int some_nonuppercase_initial;
ca1d1d23 2294 register int c, prevc;
d7a5ad5f 2295 int sub;
3e18eecf 2296 int opoint, newpoint;
ca1d1d23 2297
b7826503 2298 CHECK_STRING (newtext);
ca1d1d23 2299
080c45fd 2300 if (! NILP (string))
b7826503 2301 CHECK_STRING (string);
080c45fd 2302
ca1d1d23
JB
2303 case_action = nochange; /* We tried an initialization */
2304 /* but some C compilers blew it */
4746118a
JB
2305
2306 if (search_regs.num_regs <= 0)
d72cdbfc 2307 error ("`replace-match' called before any match found");
4746118a 2308
d7a5ad5f
RS
2309 if (NILP (subexp))
2310 sub = 0;
2311 else
2312 {
b7826503 2313 CHECK_NUMBER (subexp);
d7a5ad5f
RS
2314 sub = XINT (subexp);
2315 if (sub < 0 || sub >= search_regs.num_regs)
2316 args_out_of_range (subexp, make_number (search_regs.num_regs));
2317 }
2318
080c45fd
RS
2319 if (NILP (string))
2320 {
d7a5ad5f
RS
2321 if (search_regs.start[sub] < BEGV
2322 || search_regs.start[sub] > search_regs.end[sub]
2323 || search_regs.end[sub] > ZV)
2324 args_out_of_range (make_number (search_regs.start[sub]),
2325 make_number (search_regs.end[sub]));
080c45fd
RS
2326 }
2327 else
2328 {
d7a5ad5f
RS
2329 if (search_regs.start[sub] < 0
2330 || search_regs.start[sub] > search_regs.end[sub]
d5db4077 2331 || search_regs.end[sub] > SCHARS (string))
d7a5ad5f
RS
2332 args_out_of_range (make_number (search_regs.start[sub]),
2333 make_number (search_regs.end[sub]));
080c45fd 2334 }
ca1d1d23
JB
2335
2336 if (NILP (fixedcase))
2337 {
2338 /* Decide how to casify by examining the matched text. */
ac3b28b1 2339 int last;
ca1d1d23 2340
ac3b28b1
KH
2341 pos = search_regs.start[sub];
2342 last = search_regs.end[sub];
fa8ed3e0
RS
2343
2344 if (NILP (string))
ac3b28b1 2345 pos_byte = CHAR_TO_BYTE (pos);
fa8ed3e0 2346 else
ac3b28b1 2347 pos_byte = string_char_to_byte (string, pos);
fa8ed3e0 2348
ca1d1d23
JB
2349 prevc = '\n';
2350 case_action = all_caps;
2351
2352 /* some_multiletter_word is set nonzero if any original word
2353 is more than one letter long. */
2354 some_multiletter_word = 0;
97832bd0 2355 some_lowercase = 0;
208767c3 2356 some_nonuppercase_initial = 0;
73dc8771 2357 some_uppercase = 0;
ca1d1d23 2358
ac3b28b1 2359 while (pos < last)
ca1d1d23 2360 {
080c45fd 2361 if (NILP (string))
ac3b28b1
KH
2362 {
2363 c = FETCH_CHAR (pos_byte);
2364 INC_BOTH (pos, pos_byte);
2365 }
080c45fd 2366 else
ac3b28b1 2367 FETCH_STRING_CHAR_ADVANCE (c, string, pos, pos_byte);
080c45fd 2368
ca1d1d23
JB
2369 if (LOWERCASEP (c))
2370 {
2371 /* Cannot be all caps if any original char is lower case */
2372
97832bd0 2373 some_lowercase = 1;
ca1d1d23 2374 if (SYNTAX (prevc) != Sword)
208767c3 2375 some_nonuppercase_initial = 1;
ca1d1d23
JB
2376 else
2377 some_multiletter_word = 1;
2378 }
d16c2b66 2379 else if (UPPERCASEP (c))
ca1d1d23 2380 {
73dc8771 2381 some_uppercase = 1;
97832bd0 2382 if (SYNTAX (prevc) != Sword)
c4d460ce 2383 ;
97832bd0 2384 else
ca1d1d23
JB
2385 some_multiletter_word = 1;
2386 }
208767c3
RS
2387 else
2388 {
2389 /* If the initial is a caseless word constituent,
2390 treat that like a lowercase initial. */
2391 if (SYNTAX (prevc) != Sword)
2392 some_nonuppercase_initial = 1;
2393 }
ca1d1d23
JB
2394
2395 prevc = c;
2396 }
2397
97832bd0
RS
2398 /* Convert to all caps if the old text is all caps
2399 and has at least one multiletter word. */
2400 if (! some_lowercase && some_multiletter_word)
2401 case_action = all_caps;
c4d460ce 2402 /* Capitalize each word, if the old text has all capitalized words. */
208767c3 2403 else if (!some_nonuppercase_initial && some_multiletter_word)
ca1d1d23 2404 case_action = cap_initial;
208767c3 2405 else if (!some_nonuppercase_initial && some_uppercase)
73dc8771
KH
2406 /* Should x -> yz, operating on X, give Yz or YZ?
2407 We'll assume the latter. */
2408 case_action = all_caps;
97832bd0
RS
2409 else
2410 case_action = nochange;
ca1d1d23
JB
2411 }
2412
080c45fd
RS
2413 /* Do replacement in a string. */
2414 if (!NILP (string))
2415 {
2416 Lisp_Object before, after;
2417
2418 before = Fsubstring (string, make_number (0),
d7a5ad5f
RS
2419 make_number (search_regs.start[sub]));
2420 after = Fsubstring (string, make_number (search_regs.end[sub]), Qnil);
080c45fd 2421
636a5e28
RS
2422 /* Substitute parts of the match into NEWTEXT
2423 if desired. */
080c45fd
RS
2424 if (NILP (literal))
2425 {
d131e79c
RS
2426 int lastpos = 0;
2427 int lastpos_byte = 0;
080c45fd
RS
2428 /* We build up the substituted string in ACCUM. */
2429 Lisp_Object accum;
2430 Lisp_Object middle;
d5db4077 2431 int length = SBYTES (newtext);
080c45fd
RS
2432
2433 accum = Qnil;
2434
ac3b28b1 2435 for (pos_byte = 0, pos = 0; pos_byte < length;)
080c45fd
RS
2436 {
2437 int substart = -1;
6bbd7a29 2438 int subend = 0;
1e79ec24 2439 int delbackslash = 0;
080c45fd 2440
0c8533c6
RS
2441 FETCH_STRING_CHAR_ADVANCE (c, newtext, pos, pos_byte);
2442
080c45fd
RS
2443 if (c == '\\')
2444 {
0c8533c6 2445 FETCH_STRING_CHAR_ADVANCE (c, newtext, pos, pos_byte);
177c0ea7 2446
080c45fd
RS
2447 if (c == '&')
2448 {
d7a5ad5f
RS
2449 substart = search_regs.start[sub];
2450 subend = search_regs.end[sub];
080c45fd 2451 }
5fbbc83b 2452 else if (c >= '1' && c <= '9')
080c45fd 2453 {
5fbbc83b
RS
2454 if (search_regs.start[c - '0'] >= 0
2455 && c <= search_regs.num_regs + '0')
080c45fd
RS
2456 {
2457 substart = search_regs.start[c - '0'];
2458 subend = search_regs.end[c - '0'];
2459 }
5fbbc83b
RS
2460 else
2461 {
2462 /* If that subexp did not match,
2463 replace \\N with nothing. */
2464 substart = 0;
2465 subend = 0;
2466 }
080c45fd 2467 }
1e79ec24
KH
2468 else if (c == '\\')
2469 delbackslash = 1;
636a5e28
RS
2470 else
2471 error ("Invalid use of `\\' in replacement text");
080c45fd
RS
2472 }
2473 if (substart >= 0)
2474 {
d131e79c
RS
2475 if (pos - 2 != lastpos)
2476 middle = substring_both (newtext, lastpos,
2477 lastpos_byte,
2478 pos - 2, pos_byte - 2);
080c45fd
RS
2479 else
2480 middle = Qnil;
2481 accum = concat3 (accum, middle,
0c8533c6
RS
2482 Fsubstring (string,
2483 make_number (substart),
080c45fd
RS
2484 make_number (subend)));
2485 lastpos = pos;
0c8533c6 2486 lastpos_byte = pos_byte;
080c45fd 2487 }
1e79ec24
KH
2488 else if (delbackslash)
2489 {
d131e79c
RS
2490 middle = substring_both (newtext, lastpos,
2491 lastpos_byte,
2492 pos - 1, pos_byte - 1);
0c8533c6 2493
1e79ec24
KH
2494 accum = concat2 (accum, middle);
2495 lastpos = pos;
0c8533c6 2496 lastpos_byte = pos_byte;
1e79ec24 2497 }
080c45fd
RS
2498 }
2499
d131e79c
RS
2500 if (pos != lastpos)
2501 middle = substring_both (newtext, lastpos,
2502 lastpos_byte,
0c8533c6 2503 pos, pos_byte);
080c45fd
RS
2504 else
2505 middle = Qnil;
2506
2507 newtext = concat2 (accum, middle);
2508 }
2509
636a5e28 2510 /* Do case substitution in NEWTEXT if desired. */
080c45fd
RS
2511 if (case_action == all_caps)
2512 newtext = Fupcase (newtext);
2513 else if (case_action == cap_initial)
2b2eead9 2514 newtext = Fupcase_initials (newtext);
080c45fd
RS
2515
2516 return concat3 (before, newtext, after);
2517 }
2518
09c4719e 2519 /* Record point, then move (quietly) to the start of the match. */
9160906f 2520 if (PT >= search_regs.end[sub])
b0eba991 2521 opoint = PT - ZV;
9160906f
RS
2522 else if (PT > search_regs.start[sub])
2523 opoint = search_regs.end[sub] - ZV;
b0eba991
RS
2524 else
2525 opoint = PT;
2526
886ed6ec
RS
2527 /* If we want non-literal replacement,
2528 perform substitution on the replacement string. */
2529 if (NILP (literal))
ca1d1d23 2530 {
d5db4077 2531 int length = SBYTES (newtext);
68e69fbd
RS
2532 unsigned char *substed;
2533 int substed_alloc_size, substed_len;
3bc25e52
KH
2534 int buf_multibyte = !NILP (current_buffer->enable_multibyte_characters);
2535 int str_multibyte = STRING_MULTIBYTE (newtext);
2536 Lisp_Object rev_tbl;
886ed6ec 2537 int really_changed = 0;
3bc25e52
KH
2538
2539 rev_tbl= (!buf_multibyte && CHAR_TABLE_P (Vnonascii_translation_table)
2540 ? Fchar_table_extra_slot (Vnonascii_translation_table,
2541 make_number (0))
2542 : Qnil);
ac3b28b1 2543
68e69fbd
RS
2544 substed_alloc_size = length * 2 + 100;
2545 substed = (unsigned char *) xmalloc (substed_alloc_size + 1);
2546 substed_len = 0;
2547
3bc25e52
KH
2548 /* Go thru NEWTEXT, producing the actual text to insert in
2549 SUBSTED while adjusting multibyteness to that of the current
2550 buffer. */
ca1d1d23 2551
ac3b28b1 2552 for (pos_byte = 0, pos = 0; pos_byte < length;)
ca1d1d23 2553 {
68e69fbd 2554 unsigned char str[MAX_MULTIBYTE_LENGTH];
f8ce8a0d
GM
2555 unsigned char *add_stuff = NULL;
2556 int add_len = 0;
68e69fbd 2557 int idx = -1;
9a76659d 2558
3bc25e52
KH
2559 if (str_multibyte)
2560 {
eb99a8dd 2561 FETCH_STRING_CHAR_ADVANCE_NO_CHECK (c, newtext, pos, pos_byte);
3bc25e52
KH
2562 if (!buf_multibyte)
2563 c = multibyte_char_to_unibyte (c, rev_tbl);
2564 }
2565 else
2566 {
2567 /* Note that we don't have to increment POS. */
5d69fe10 2568 c = SREF (newtext, pos_byte++);
3bc25e52
KH
2569 if (buf_multibyte)
2570 c = unibyte_char_to_multibyte (c);
2571 }
ac3b28b1 2572
68e69fbd
RS
2573 /* Either set ADD_STUFF and ADD_LEN to the text to put in SUBSTED,
2574 or set IDX to a match index, which means put that part
2575 of the buffer text into SUBSTED. */
2576
ca1d1d23
JB
2577 if (c == '\\')
2578 {
886ed6ec
RS
2579 really_changed = 1;
2580
3bc25e52
KH
2581 if (str_multibyte)
2582 {
eb99a8dd
KH
2583 FETCH_STRING_CHAR_ADVANCE_NO_CHECK (c, newtext,
2584 pos, pos_byte);
3bc25e52
KH
2585 if (!buf_multibyte && !SINGLE_BYTE_CHAR_P (c))
2586 c = multibyte_char_to_unibyte (c, rev_tbl);
2587 }
2588 else
2589 {
d5db4077 2590 c = SREF (newtext, pos_byte++);
3bc25e52
KH
2591 if (buf_multibyte)
2592 c = unibyte_char_to_multibyte (c);
2593 }
2594
ca1d1d23 2595 if (c == '&')
68e69fbd 2596 idx = sub;
78445046 2597 else if (c >= '1' && c <= '9' && c <= search_regs.num_regs + '0')
ca1d1d23
JB
2598 {
2599 if (search_regs.start[c - '0'] >= 1)
68e69fbd 2600 idx = c - '0';
ca1d1d23 2601 }
636a5e28 2602 else if (c == '\\')
68e69fbd 2603 add_len = 1, add_stuff = "\\";
636a5e28 2604 else
3bc25e52
KH
2605 {
2606 xfree (substed);
2607 error ("Invalid use of `\\' in replacement text");
2608 }
ca1d1d23
JB
2609 }
2610 else
68e69fbd
RS
2611 {
2612 add_len = CHAR_STRING (c, str);
2613 add_stuff = str;
2614 }
2615
2616 /* If we want to copy part of a previous match,
2617 set up ADD_STUFF and ADD_LEN to point to it. */
2618 if (idx >= 0)
2619 {
2620 int begbyte = CHAR_TO_BYTE (search_regs.start[idx]);
2621 add_len = CHAR_TO_BYTE (search_regs.end[idx]) - begbyte;
2622 if (search_regs.start[idx] < GPT && GPT < search_regs.end[idx])
2623 move_gap (search_regs.start[idx]);
2624 add_stuff = BYTE_POS_ADDR (begbyte);
2625 }
2626
2627 /* Now the stuff we want to add to SUBSTED
2628 is invariably ADD_LEN bytes starting at ADD_STUFF. */
2629
2630 /* Make sure SUBSTED is big enough. */
2631 if (substed_len + add_len >= substed_alloc_size)
2632 {
2633 substed_alloc_size = substed_len + add_len + 500;
2634 substed = (unsigned char *) xrealloc (substed,
2635 substed_alloc_size + 1);
2636 }
2637
2638 /* Now add to the end of SUBSTED. */
f8ce8a0d
GM
2639 if (add_stuff)
2640 {
2641 bcopy (add_stuff, substed + substed_len, add_len);
2642 substed_len += add_len;
2643 }
ca1d1d23 2644 }
68e69fbd 2645
886ed6ec 2646 if (really_changed)
80460525
KH
2647 {
2648 if (buf_multibyte)
2649 {
2650 int nchars = multibyte_chars_in_text (substed, substed_len);
68e69fbd 2651
80460525
KH
2652 newtext = make_multibyte_string (substed, nchars, substed_len);
2653 }
2654 else
2655 newtext = make_unibyte_string (substed, substed_len);
2656 }
68e69fbd 2657 xfree (substed);
ca1d1d23
JB
2658 }
2659
886ed6ec
RS
2660 /* Replace the old text with the new in the cleanest possible way. */
2661 replace_range (search_regs.start[sub], search_regs.end[sub],
2662 newtext, 1, 0, 1);
d5db4077 2663 newpoint = search_regs.start[sub] + SCHARS (newtext);
ca1d1d23
JB
2664
2665 if (case_action == all_caps)
886ed6ec
RS
2666 Fupcase_region (make_number (search_regs.start[sub]),
2667 make_number (newpoint));
ca1d1d23 2668 else if (case_action == cap_initial)
886ed6ec
RS
2669 Fupcase_initials_region (make_number (search_regs.start[sub]),
2670 make_number (newpoint));
3e18eecf 2671
98e942e0
RS
2672 /* Adjust search data for this change. */
2673 {
5b88a2c5 2674 int oldend = search_regs.end[sub];
41c01205 2675 int oldstart = search_regs.start[sub];
98e942e0
RS
2676 int change = newpoint - search_regs.end[sub];
2677 int i;
2678
2679 for (i = 0; i < search_regs.num_regs; i++)
2680 {
41c01205 2681 if (search_regs.start[i] >= oldend)
98e942e0 2682 search_regs.start[i] += change;
41c01205
DK
2683 else if (search_regs.start[i] > oldstart)
2684 search_regs.start[i] = oldstart;
2685 if (search_regs.end[i] >= oldend)
98e942e0 2686 search_regs.end[i] += change;
41c01205
DK
2687 else if (search_regs.end[i] > oldstart)
2688 search_regs.end[i] = oldstart;
98e942e0
RS
2689 }
2690 }
2691
b0eba991 2692 /* Put point back where it was in the text. */
8d808a65 2693 if (opoint <= 0)
fa8ed3e0 2694 TEMP_SET_PT (opoint + ZV);
b0eba991 2695 else
fa8ed3e0 2696 TEMP_SET_PT (opoint);
b0eba991
RS
2697
2698 /* Now move point "officially" to the start of the inserted replacement. */
3e18eecf 2699 move_if_not_intangible (newpoint);
177c0ea7 2700
ca1d1d23
JB
2701 return Qnil;
2702}
2703\f
2704static Lisp_Object
2705match_limit (num, beginningp)
2706 Lisp_Object num;
2707 int beginningp;
2708{
2709 register int n;
2710
b7826503 2711 CHECK_NUMBER (num);
ca1d1d23 2712 n = XINT (num);
f90a5bf5 2713 if (n < 0)
bd2cbd56 2714 args_out_of_range (num, make_number (0));
f90a5bf5
RS
2715 if (search_regs.num_regs <= 0)
2716 error ("No match data, because no search succeeded");
9b9ceb61 2717 if (n >= search_regs.num_regs
4746118a 2718 || search_regs.start[n] < 0)
ca1d1d23
JB
2719 return Qnil;
2720 return (make_number ((beginningp) ? search_regs.start[n]
2721 : search_regs.end[n]));
2722}
2723
2724DEFUN ("match-beginning", Fmatch_beginning, Smatch_beginning, 1, 1, 0,
8c1a1077
PJ
2725 doc: /* Return position of start of text matched by last search.
2726SUBEXP, a number, specifies which parenthesized expression in the last
2727 regexp.
2728Value is nil if SUBEXPth pair didn't match, or there were less than
2729 SUBEXP pairs.
2730Zero means the entire text matched by the whole regexp or whole string. */)
2731 (subexp)
5806161b 2732 Lisp_Object subexp;
ca1d1d23 2733{
5806161b 2734 return match_limit (subexp, 1);
ca1d1d23
JB
2735}
2736
2737DEFUN ("match-end", Fmatch_end, Smatch_end, 1, 1, 0,
8c1a1077
PJ
2738 doc: /* Return position of end of text matched by last search.
2739SUBEXP, a number, specifies which parenthesized expression in the last
2740 regexp.
2741Value is nil if SUBEXPth pair didn't match, or there were less than
2742 SUBEXP pairs.
2743Zero means the entire text matched by the whole regexp or whole string. */)
2744 (subexp)
5806161b 2745 Lisp_Object subexp;
ca1d1d23 2746{
5806161b 2747 return match_limit (subexp, 0);
177c0ea7 2748}
ca1d1d23 2749
abd0071c 2750DEFUN ("match-data", Fmatch_data, Smatch_data, 0, 3, 0,
8c1a1077
PJ
2751 doc: /* Return a list containing all info on what the last search matched.
2752Element 2N is `(match-beginning N)'; element 2N + 1 is `(match-end N)'.
2753All the elements are markers or nil (nil if the Nth pair didn't match)
2754if the last match was on a buffer; integers or nil if a string was matched.
2755Use `store-match-data' to reinstate the data in this list.
2756
41c01205
DK
2757If INTEGERS (the optional first argument) is non-nil, always use
2758integers \(rather than markers) to represent buffer positions. In
2759this case, and if the last match was in a buffer, the buffer will get
2760stored as one additional element at the end of the list.
2761
abd0071c
KS
2762If REUSE is a list, reuse it as part of the value. If REUSE is long
2763enough to hold all the values, and if INTEGERS is non-nil, no consing
2764is done.
2765
2766If optional third arg RESEAT is non-nil, any previous markers on the
2767REUSE list will be modified to point to nowhere.
2768
140a6b7e 2769Return value is undefined if the last search failed. */)
abd0071c
KS
2770 (integers, reuse, reseat)
2771 Lisp_Object integers, reuse, reseat;
ca1d1d23 2772{
56256c2a 2773 Lisp_Object tail, prev;
4746118a 2774 Lisp_Object *data;
ca1d1d23
JB
2775 int i, len;
2776
abd0071c
KS
2777 if (!NILP (reseat))
2778 for (tail = reuse; CONSP (tail); tail = XCDR (tail))
2779 if (MARKERP (XCAR (tail)))
2780 {
51f10faa 2781 unchain_marker (XMARKER (XCAR (tail)));
abd0071c
KS
2782 XSETCAR (tail, Qnil);
2783 }
2784
daa37602 2785 if (NILP (last_thing_searched))
c36bcf1b 2786 return Qnil;
daa37602 2787
6bbd7a29
GM
2788 prev = Qnil;
2789
41c01205 2790 data = (Lisp_Object *) alloca ((2 * search_regs.num_regs + 1)
4746118a
JB
2791 * sizeof (Lisp_Object));
2792
41c01205 2793 len = 0;
4746118a 2794 for (i = 0; i < search_regs.num_regs; i++)
ca1d1d23
JB
2795 {
2796 int start = search_regs.start[i];
2797 if (start >= 0)
2798 {
56256c2a
RS
2799 if (EQ (last_thing_searched, Qt)
2800 || ! NILP (integers))
ca1d1d23 2801 {
c235cce7
KH
2802 XSETFASTINT (data[2 * i], start);
2803 XSETFASTINT (data[2 * i + 1], search_regs.end[i]);
ca1d1d23 2804 }
0ed62dc7 2805 else if (BUFFERP (last_thing_searched))
ca1d1d23
JB
2806 {
2807 data[2 * i] = Fmake_marker ();
daa37602
JB
2808 Fset_marker (data[2 * i],
2809 make_number (start),
2810 last_thing_searched);
ca1d1d23
JB
2811 data[2 * i + 1] = Fmake_marker ();
2812 Fset_marker (data[2 * i + 1],
177c0ea7 2813 make_number (search_regs.end[i]),
daa37602 2814 last_thing_searched);
ca1d1d23 2815 }
daa37602
JB
2816 else
2817 /* last_thing_searched must always be Qt, a buffer, or Qnil. */
2818 abort ();
2819
abd0071c 2820 len = 2 * i + 2;
ca1d1d23
JB
2821 }
2822 else
abd0071c 2823 data[2 * i] = data[2 * i + 1] = Qnil;
ca1d1d23 2824 }
56256c2a 2825
bd2cbd56 2826 if (BUFFERP (last_thing_searched) && !NILP (integers))
41c01205 2827 {
bd2cbd56 2828 data[len] = last_thing_searched;
41c01205
DK
2829 len++;
2830 }
2831
56256c2a
RS
2832 /* If REUSE is not usable, cons up the values and return them. */
2833 if (! CONSP (reuse))
41c01205 2834 return Flist (len, data);
56256c2a
RS
2835
2836 /* If REUSE is a list, store as many value elements as will fit
2837 into the elements of REUSE. */
2838 for (i = 0, tail = reuse; CONSP (tail);
c1d497be 2839 i++, tail = XCDR (tail))
56256c2a 2840 {
41c01205 2841 if (i < len)
f3fbd155 2842 XSETCAR (tail, data[i]);
56256c2a 2843 else
f3fbd155 2844 XSETCAR (tail, Qnil);
56256c2a
RS
2845 prev = tail;
2846 }
2847
2848 /* If we couldn't fit all value elements into REUSE,
2849 cons up the rest of them and add them to the end of REUSE. */
41c01205
DK
2850 if (i < len)
2851 XSETCDR (prev, Flist (len - i, data + i));
56256c2a
RS
2852
2853 return reuse;
ca1d1d23
JB
2854}
2855
51f10faa
KS
2856/* Internal usage only:
2857 If RESEAT is `evaporate', put the markers back on the free list
2858 immediately. No other references to the markers must exist in this case,
2859 so it is used only internally on the unwind stack and save-match-data from
2860 Lisp. */
ca1d1d23 2861
abd0071c 2862DEFUN ("set-match-data", Fset_match_data, Sset_match_data, 1, 2, 0,
8c1a1077 2863 doc: /* Set internal data on last search match from elements of LIST.
abd0071c
KS
2864LIST should have been created by calling `match-data' previously.
2865
51f10faa 2866If optional arg RESEAT is non-nil, make markers on LIST point nowhere. */)
abd0071c
KS
2867 (list, reseat)
2868 register Lisp_Object list, reseat;
ca1d1d23
JB
2869{
2870 register int i;
2871 register Lisp_Object marker;
2872
7074fde6
FP
2873 if (running_asynch_code)
2874 save_search_regs ();
2875
ca1d1d23 2876 if (!CONSP (list) && !NILP (list))
b37902c8 2877 list = wrong_type_argument (Qconsp, list);
ca1d1d23 2878
41c01205
DK
2879 /* Unless we find a marker with a buffer or an explicit buffer
2880 in LIST, assume that this match data came from a string. */
daa37602
JB
2881 last_thing_searched = Qt;
2882
4746118a
JB
2883 /* Allocate registers if they don't already exist. */
2884 {
d084e942 2885 int length = XFASTINT (Flength (list)) / 2;
4746118a
JB
2886
2887 if (length > search_regs.num_regs)
2888 {
1113d9db
JB
2889 if (search_regs.num_regs == 0)
2890 {
2891 search_regs.start
2892 = (regoff_t *) xmalloc (length * sizeof (regoff_t));
2893 search_regs.end
2894 = (regoff_t *) xmalloc (length * sizeof (regoff_t));
2895 }
4746118a 2896 else
1113d9db
JB
2897 {
2898 search_regs.start
2899 = (regoff_t *) xrealloc (search_regs.start,
2900 length * sizeof (regoff_t));
2901 search_regs.end
2902 = (regoff_t *) xrealloc (search_regs.end,
2903 length * sizeof (regoff_t));
2904 }
4746118a 2905
e62371e9
KH
2906 for (i = search_regs.num_regs; i < length; i++)
2907 search_regs.start[i] = -1;
2908
487282dc 2909 search_regs.num_regs = length;
4746118a 2910 }
ca1d1d23 2911
abd0071c 2912 for (i = 0; CONSP (list); i++)
41c01205 2913 {
abd0071c 2914 marker = XCAR (list);
bd2cbd56 2915 if (BUFFERP (marker))
c3762cbd 2916 {
bd2cbd56 2917 last_thing_searched = marker;
c3762cbd
DK
2918 break;
2919 }
2920 if (i >= length)
2921 break;
41c01205
DK
2922 if (NILP (marker))
2923 {
2924 search_regs.start[i] = -1;
abd0071c 2925 list = XCDR (list);
41c01205
DK
2926 }
2927 else
2928 {
2929 int from;
abd0071c 2930 Lisp_Object m;
e2811828 2931
abd0071c 2932 m = marker;
41c01205
DK
2933 if (MARKERP (marker))
2934 {
2935 if (XMARKER (marker)->buffer == 0)
2936 XSETFASTINT (marker, 0);
2937 else
2938 XSETBUFFER (last_thing_searched, XMARKER (marker)->buffer);
2939 }
e2811828 2940
41c01205
DK
2941 CHECK_NUMBER_COERCE_MARKER (marker);
2942 from = XINT (marker);
e2811828 2943
abd0071c
KS
2944 if (!NILP (reseat) && MARKERP (m))
2945 {
2946 if (EQ (reseat, Qevaporate))
2947 free_marker (m);
2948 else
2949 unchain_marker (XMARKER (m));
9ad54a7e 2950 XSETCAR (list, Qnil);
abd0071c
KS
2951 }
2952
2953 if ((list = XCDR (list), !CONSP (list)))
2954 break;
2955
2956 m = marker = XCAR (list);
2957
41c01205
DK
2958 if (MARKERP (marker) && XMARKER (marker)->buffer == 0)
2959 XSETFASTINT (marker, 0);
e2811828 2960
41c01205
DK
2961 CHECK_NUMBER_COERCE_MARKER (marker);
2962 search_regs.start[i] = from;
2963 search_regs.end[i] = XINT (marker);
abd0071c
KS
2964
2965 if (!NILP (reseat) && MARKERP (m))
2966 {
2967 if (EQ (reseat, Qevaporate))
2968 free_marker (m);
2969 else
2970 unchain_marker (XMARKER (m));
9ad54a7e 2971 XSETCAR (list, Qnil);
abd0071c 2972 }
41c01205 2973 }
abd0071c 2974 list = XCDR (list);
41c01205 2975 }
ca1d1d23 2976
41c01205
DK
2977 for (; i < search_regs.num_regs; i++)
2978 search_regs.start[i] = -1;
2979 }
ca1d1d23 2980
177c0ea7 2981 return Qnil;
ca1d1d23
JB
2982}
2983
7074fde6
FP
2984/* If non-zero the match data have been saved in saved_search_regs
2985 during the execution of a sentinel or filter. */
75ebf74b 2986static int search_regs_saved;
7074fde6 2987static struct re_registers saved_search_regs;
41c01205 2988static Lisp_Object saved_last_thing_searched;
7074fde6
FP
2989
2990/* Called from Flooking_at, Fstring_match, search_buffer, Fstore_match_data
2991 if asynchronous code (filter or sentinel) is running. */
2992static void
2993save_search_regs ()
2994{
2995 if (!search_regs_saved)
2996 {
2997 saved_search_regs.num_regs = search_regs.num_regs;
2998 saved_search_regs.start = search_regs.start;
2999 saved_search_regs.end = search_regs.end;
41c01205
DK
3000 saved_last_thing_searched = last_thing_searched;
3001 last_thing_searched = Qnil;
7074fde6 3002 search_regs.num_regs = 0;
2d4a771a
RS
3003 search_regs.start = 0;
3004 search_regs.end = 0;
7074fde6
FP
3005
3006 search_regs_saved = 1;
3007 }
3008}
3009
3010/* Called upon exit from filters and sentinels. */
3011void
abd0071c 3012restore_search_regs ()
7074fde6
FP
3013{
3014 if (search_regs_saved)
3015 {
3016 if (search_regs.num_regs > 0)
3017 {
3018 xfree (search_regs.start);
3019 xfree (search_regs.end);
3020 }
3021 search_regs.num_regs = saved_search_regs.num_regs;
3022 search_regs.start = saved_search_regs.start;
3023 search_regs.end = saved_search_regs.end;
41c01205
DK
3024 last_thing_searched = saved_last_thing_searched;
3025 saved_last_thing_searched = Qnil;
7074fde6
FP
3026 search_regs_saved = 0;
3027 }
3028}
3029
abd0071c
KS
3030static Lisp_Object
3031unwind_set_match_data (list)
3032 Lisp_Object list;
3033{
51f10faa 3034 /* It is safe to free (evaporate) the markers immediately. */
abd0071c
KS
3035 return Fset_match_data (list, Qevaporate);
3036}
3037
3038/* Called to unwind protect the match data. */
3039void
3040record_unwind_save_match_data ()
3041{
3042 record_unwind_protect (unwind_set_match_data,
3043 Fmatch_data (Qnil, Qnil, Qnil));
3044}
3045
ca1d1d23
JB
3046/* Quote a string to inactivate reg-expr chars */
3047
3048DEFUN ("regexp-quote", Fregexp_quote, Sregexp_quote, 1, 1, 0,
8c1a1077
PJ
3049 doc: /* Return a regexp string which matches exactly STRING and nothing else. */)
3050 (string)
5806161b 3051 Lisp_Object string;
ca1d1d23
JB
3052{
3053 register unsigned char *in, *out, *end;
3054 register unsigned char *temp;
0c8533c6 3055 int backslashes_added = 0;
ca1d1d23 3056
b7826503 3057 CHECK_STRING (string);
ca1d1d23 3058
d5db4077 3059 temp = (unsigned char *) alloca (SBYTES (string) * 2);
ca1d1d23
JB
3060
3061 /* Now copy the data into the new string, inserting escapes. */
3062
d5db4077
KR
3063 in = SDATA (string);
3064 end = in + SBYTES (string);
177c0ea7 3065 out = temp;
ca1d1d23
JB
3066
3067 for (; in != end; in++)
3068 {
3069 if (*in == '[' || *in == ']'
3070 || *in == '*' || *in == '.' || *in == '\\'
3071 || *in == '?' || *in == '+'
3072 || *in == '^' || *in == '$')
0c8533c6 3073 *out++ = '\\', backslashes_added++;
ca1d1d23
JB
3074 *out++ = *in;
3075 }
3076
3f8100f1 3077 return make_specified_string (temp,
d5db4077 3078 SCHARS (string) + backslashes_added,
3f8100f1
RS
3079 out - temp,
3080 STRING_MULTIBYTE (string));
ca1d1d23 3081}
177c0ea7 3082\f
dfcf069d 3083void
ca1d1d23
JB
3084syms_of_search ()
3085{
3086 register int i;
3087
487282dc
KH
3088 for (i = 0; i < REGEXP_CACHE_SIZE; ++i)
3089 {
3090 searchbufs[i].buf.allocated = 100;
b23c0a83 3091 searchbufs[i].buf.buffer = (unsigned char *) xmalloc (100);
487282dc
KH
3092 searchbufs[i].buf.fastmap = searchbufs[i].fastmap;
3093 searchbufs[i].regexp = Qnil;
ecdb561e 3094 searchbufs[i].whitespace_regexp = Qnil;
487282dc 3095 staticpro (&searchbufs[i].regexp);
aa77b5ce 3096 staticpro (&searchbufs[i].whitespace_regexp);
487282dc
KH
3097 searchbufs[i].next = (i == REGEXP_CACHE_SIZE-1 ? 0 : &searchbufs[i+1]);
3098 }
3099 searchbuf_head = &searchbufs[0];
ca1d1d23
JB
3100
3101 Qsearch_failed = intern ("search-failed");
3102 staticpro (&Qsearch_failed);
3103 Qinvalid_regexp = intern ("invalid-regexp");
3104 staticpro (&Qinvalid_regexp);
3105
3106 Fput (Qsearch_failed, Qerror_conditions,
3107 Fcons (Qsearch_failed, Fcons (Qerror, Qnil)));
3108 Fput (Qsearch_failed, Qerror_message,
3109 build_string ("Search failed"));
3110
3111 Fput (Qinvalid_regexp, Qerror_conditions,
3112 Fcons (Qinvalid_regexp, Fcons (Qerror, Qnil)));
3113 Fput (Qinvalid_regexp, Qerror_message,
3114 build_string ("Invalid regexp"));
3115
daa37602
JB
3116 last_thing_searched = Qnil;
3117 staticpro (&last_thing_searched);
3118
0f6af254
DK
3119 saved_last_thing_searched = Qnil;
3120 staticpro (&saved_last_thing_searched);
3121
41a33295 3122 DEFVAR_LISP ("search-spaces-regexp", &Vsearch_spaces_regexp,
e2811828 3123 doc: /* Regexp to substitute for bunches of spaces in regexp search.
f31a9a68
RS
3124Some commands use this for user-specified regexps.
3125Spaces that occur inside character classes or repetition operators
3126or other such regexp constructs are not replaced with this.
3127A value of nil (which is the normal value) means treat spaces literally. */);
41a33295 3128 Vsearch_spaces_regexp = Qnil;
f31a9a68 3129
ca1d1d23 3130 defsubr (&Slooking_at);
b819a390
RS
3131 defsubr (&Sposix_looking_at);
3132 defsubr (&Sstring_match);
3133 defsubr (&Sposix_string_match);
ca1d1d23
JB
3134 defsubr (&Ssearch_forward);
3135 defsubr (&Ssearch_backward);
3136 defsubr (&Sword_search_forward);
3137 defsubr (&Sword_search_backward);
3138 defsubr (&Sre_search_forward);
3139 defsubr (&Sre_search_backward);
b819a390
RS
3140 defsubr (&Sposix_search_forward);
3141 defsubr (&Sposix_search_backward);
ca1d1d23
JB
3142 defsubr (&Sreplace_match);
3143 defsubr (&Smatch_beginning);
3144 defsubr (&Smatch_end);
3145 defsubr (&Smatch_data);
3f1c005b 3146 defsubr (&Sset_match_data);
ca1d1d23
JB
3147 defsubr (&Sregexp_quote);
3148}
ab5796a9
MB
3149
3150/* arch-tag: a6059d79-0552-4f14-a2cb-d379a4e3c78f
3151 (do not change this comment) */