*** empty log message ***
[bpt/emacs.git] / src / search.c
CommitLineData
ca1d1d23 1/* String search routines for GNU Emacs.
0b5538bd 2 Copyright (C) 1985, 1986, 1987, 1993, 1994, 1997, 1998, 1999, 2002, 2003,
aaef169d 3 2004, 2005, 2006 Free Software Foundation, Inc.
ca1d1d23
JB
4
5This file is part of GNU Emacs.
6
7GNU Emacs is free software; you can redistribute it and/or modify
8it under the terms of the GNU General Public License as published by
7c938215 9the Free Software Foundation; either version 2, or (at your option)
ca1d1d23
JB
10any later version.
11
12GNU Emacs is distributed in the hope that it will be useful,
13but WITHOUT ANY WARRANTY; without even the implied warranty of
14MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15GNU General Public License for more details.
16
17You should have received a copy of the GNU General Public License
18along with GNU Emacs; see the file COPYING. If not, write to
4fc5845f
LK
19the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
20Boston, MA 02110-1301, USA. */
ca1d1d23
JB
21
22
18160b98 23#include <config.h>
ca1d1d23
JB
24#include "lisp.h"
25#include "syntax.h"
5679531d 26#include "category.h"
ca1d1d23 27#include "buffer.h"
5679531d 28#include "charset.h"
9169c321 29#include "region-cache.h"
ca1d1d23 30#include "commands.h"
9ac0d9e0 31#include "blockinput.h"
bf1760bb 32#include "intervals.h"
4746118a 33
ca1d1d23
JB
34#include <sys/types.h>
35#include "regex.h"
36
1d288aef 37#define REGEXP_CACHE_SIZE 20
ca1d1d23 38
487282dc
KH
39/* If the regexp is non-nil, then the buffer contains the compiled form
40 of that regexp, suitable for searching. */
1d288aef
RS
41struct regexp_cache
42{
487282dc 43 struct regexp_cache *next;
ecdb561e 44 Lisp_Object regexp, whitespace_regexp;
487282dc
KH
45 struct re_pattern_buffer buf;
46 char fastmap[0400];
b819a390
RS
47 /* Nonzero means regexp was compiled to do full POSIX backtracking. */
48 char posix;
487282dc 49};
ca1d1d23 50
487282dc
KH
51/* The instances of that struct. */
52struct regexp_cache searchbufs[REGEXP_CACHE_SIZE];
ca1d1d23 53
487282dc
KH
54/* The head of the linked list; points to the most recently used buffer. */
55struct regexp_cache *searchbuf_head;
ca1d1d23 56
ca1d1d23 57
4746118a
JB
58/* Every call to re_match, etc., must pass &search_regs as the regs
59 argument unless you can show it is unnecessary (i.e., if re_match
60 is certainly going to be called again before region-around-match
61 can be called).
62
63 Since the registers are now dynamically allocated, we need to make
64 sure not to refer to the Nth register before checking that it has
1113d9db
JB
65 been allocated by checking search_regs.num_regs.
66
67 The regex code keeps track of whether it has allocated the search
487282dc
KH
68 buffer using bits in the re_pattern_buffer. This means that whenever
69 you compile a new pattern, it completely forgets whether it has
1113d9db
JB
70 allocated any registers, and will allocate new registers the next
71 time you call a searching or matching function. Therefore, we need
72 to call re_set_registers after compiling a new pattern or after
73 setting the match registers, so that the regex functions will be
74 able to free or re-allocate it properly. */
ca1d1d23
JB
75static struct re_registers search_regs;
76
daa37602
JB
77/* The buffer in which the last search was performed, or
78 Qt if the last search was done in a string;
79 Qnil if no searching has been done yet. */
80static Lisp_Object last_thing_searched;
ca1d1d23 81
8e6208c5 82/* error condition signaled when regexp compile_pattern fails */
ca1d1d23
JB
83
84Lisp_Object Qinvalid_regexp;
85
06f77a8a
KS
86/* Error condition used for failing searches */
87Lisp_Object Qsearch_failed;
88
41a33295 89Lisp_Object Vsearch_spaces_regexp;
f31a9a68 90
ca325161 91static void set_search_regs ();
044f81f1 92static void save_search_regs ();
facdc750
RS
93static int simple_search ();
94static int boyer_moore ();
b819a390 95static int search_buffer ();
01d09305 96static void matcher_overflow () NO_RETURN;
b819a390 97
ca1d1d23
JB
98static void
99matcher_overflow ()
100{
101 error ("Stack overflow in regexp matcher");
102}
103
b819a390
RS
104/* Compile a regexp and signal a Lisp error if anything goes wrong.
105 PATTERN is the pattern to compile.
106 CP is the place to put the result.
facdc750 107 TRANSLATE is a translation table for ignoring case, or nil for none.
b819a390
RS
108 REGP is the structure that says where to store the "register"
109 values that will result from matching this pattern.
110 If it is 0, we should compile the pattern not to record any
111 subexpression bounds.
112 POSIX is nonzero if we want full backtracking (POSIX style)
5679531d
KH
113 for this pattern. 0 means backtrack only enough to get a valid match.
114 MULTIBYTE is nonzero if we want to handle multibyte characters in
115 PATTERN. 0 means all multibyte characters are recognized just as
ecdb561e
RS
116 sequences of binary data.
117
41a33295 118 The behavior also depends on Vsearch_spaces_regexp. */
ca1d1d23 119
487282dc 120static void
5679531d 121compile_pattern_1 (cp, pattern, translate, regp, posix, multibyte)
487282dc 122 struct regexp_cache *cp;
ca1d1d23 123 Lisp_Object pattern;
facdc750 124 Lisp_Object translate;
487282dc 125 struct re_registers *regp;
b819a390 126 int posix;
5679531d 127 int multibyte;
ca1d1d23 128{
7276d3d8 129 unsigned char *raw_pattern;
f8bd51c4 130 int raw_pattern_size;
d451e4db 131 char *val;
b819a390 132 reg_syntax_t old;
ca1d1d23 133
f8bd51c4
KH
134 /* MULTIBYTE says whether the text to be searched is multibyte.
135 We must convert PATTERN to match that, or we will not really
136 find things right. */
137
138 if (multibyte == STRING_MULTIBYTE (pattern))
139 {
d5db4077
KR
140 raw_pattern = (unsigned char *) SDATA (pattern);
141 raw_pattern_size = SBYTES (pattern);
f8bd51c4
KH
142 }
143 else if (multibyte)
144 {
d5db4077
KR
145 raw_pattern_size = count_size_as_multibyte (SDATA (pattern),
146 SCHARS (pattern));
7276d3d8 147 raw_pattern = (unsigned char *) alloca (raw_pattern_size + 1);
d5db4077
KR
148 copy_text (SDATA (pattern), raw_pattern,
149 SCHARS (pattern), 0, 1);
f8bd51c4
KH
150 }
151 else
152 {
153 /* Converting multibyte to single-byte.
154
155 ??? Perhaps this conversion should be done in a special way
156 by subtracting nonascii-insert-offset from each non-ASCII char,
157 so that only the multibyte chars which really correspond to
158 the chosen single-byte character set can possibly match. */
d5db4077 159 raw_pattern_size = SCHARS (pattern);
7276d3d8 160 raw_pattern = (unsigned char *) alloca (raw_pattern_size + 1);
d5db4077
KR
161 copy_text (SDATA (pattern), raw_pattern,
162 SBYTES (pattern), 1, 0);
f8bd51c4
KH
163 }
164
487282dc 165 cp->regexp = Qnil;
59fab369 166 cp->buf.translate = (! NILP (translate) ? translate : make_number (0));
b819a390 167 cp->posix = posix;
5679531d 168 cp->buf.multibyte = multibyte;
41a33295 169 cp->whitespace_regexp = Vsearch_spaces_regexp;
910c747a
RS
170 /* Doing BLOCK_INPUT here has the effect that
171 the debugger won't run if an error occurs.
172 Why is BLOCK_INPUT needed here? */
9ac0d9e0 173 BLOCK_INPUT;
fb4a568d 174 old = re_set_syntax (RE_SYNTAX_EMACS
b819a390 175 | (posix ? 0 : RE_NO_POSIX_BACKTRACKING));
f31a9a68 176
41a33295
RS
177 re_set_whitespace_regexp (NILP (Vsearch_spaces_regexp) ? NULL
178 : SDATA (Vsearch_spaces_regexp));
f31a9a68 179
7276d3d8
RS
180 val = (char *) re_compile_pattern ((char *)raw_pattern,
181 raw_pattern_size, &cp->buf);
f31a9a68
RS
182
183 re_set_whitespace_regexp (NULL);
184
b819a390 185 re_set_syntax (old);
9ac0d9e0 186 UNBLOCK_INPUT;
ca1d1d23 187 if (val)
06f77a8a 188 xsignal1 (Qinvalid_regexp, build_string (val));
1113d9db 189
487282dc 190 cp->regexp = Fcopy_sequence (pattern);
487282dc
KH
191}
192
6efc7887
RS
193/* Shrink each compiled regexp buffer in the cache
194 to the size actually used right now.
195 This is called from garbage collection. */
196
197void
198shrink_regexp_cache ()
199{
a968f437 200 struct regexp_cache *cp;
6efc7887
RS
201
202 for (cp = searchbuf_head; cp != 0; cp = cp->next)
203 {
204 cp->buf.allocated = cp->buf.used;
205 cp->buf.buffer
b23c0a83 206 = (unsigned char *) xrealloc (cp->buf.buffer, cp->buf.used);
6efc7887
RS
207 }
208}
209
487282dc 210/* Compile a regexp if necessary, but first check to see if there's one in
b819a390
RS
211 the cache.
212 PATTERN is the pattern to compile.
facdc750 213 TRANSLATE is a translation table for ignoring case, or nil for none.
b819a390
RS
214 REGP is the structure that says where to store the "register"
215 values that will result from matching this pattern.
216 If it is 0, we should compile the pattern not to record any
217 subexpression bounds.
218 POSIX is nonzero if we want full backtracking (POSIX style)
219 for this pattern. 0 means backtrack only enough to get a valid match. */
487282dc
KH
220
221struct re_pattern_buffer *
0c8533c6 222compile_pattern (pattern, regp, translate, posix, multibyte)
487282dc
KH
223 Lisp_Object pattern;
224 struct re_registers *regp;
facdc750 225 Lisp_Object translate;
0c8533c6 226 int posix, multibyte;
487282dc
KH
227{
228 struct regexp_cache *cp, **cpp;
229
230 for (cpp = &searchbuf_head; ; cpp = &cp->next)
231 {
232 cp = *cpp;
f1b9c7c1
KR
233 /* Entries are initialized to nil, and may be set to nil by
234 compile_pattern_1 if the pattern isn't valid. Don't apply
49a5f770
KR
235 string accessors in those cases. However, compile_pattern_1
236 is only applied to the cache entry we pick here to reuse. So
237 nil should never appear before a non-nil entry. */
7c752c80 238 if (NILP (cp->regexp))
f1b9c7c1 239 goto compile_it;
d5db4077 240 if (SCHARS (cp->regexp) == SCHARS (pattern)
cf69b13e 241 && STRING_MULTIBYTE (cp->regexp) == STRING_MULTIBYTE (pattern)
1d288aef 242 && !NILP (Fstring_equal (cp->regexp, pattern))
59fab369 243 && EQ (cp->buf.translate, (! NILP (translate) ? translate : make_number (0)))
5679531d 244 && cp->posix == posix
ecdb561e 245 && cp->buf.multibyte == multibyte
41a33295 246 && !NILP (Fequal (cp->whitespace_regexp, Vsearch_spaces_regexp)))
487282dc
KH
247 break;
248
f1b9c7c1
KR
249 /* If we're at the end of the cache, compile into the nil cell
250 we found, or the last (least recently used) cell with a
251 string value. */
487282dc
KH
252 if (cp->next == 0)
253 {
f1b9c7c1 254 compile_it:
5679531d 255 compile_pattern_1 (cp, pattern, translate, regp, posix, multibyte);
487282dc
KH
256 break;
257 }
258 }
259
260 /* When we get here, cp (aka *cpp) contains the compiled pattern,
261 either because we found it in the cache or because we just compiled it.
262 Move it to the front of the queue to mark it as most recently used. */
263 *cpp = cp->next;
264 cp->next = searchbuf_head;
265 searchbuf_head = cp;
1113d9db 266
6639708c
RS
267 /* Advise the searching functions about the space we have allocated
268 for register data. */
269 if (regp)
270 re_set_registers (&cp->buf, regp, regp->num_regs, regp->start, regp->end);
271
487282dc 272 return &cp->buf;
ca1d1d23
JB
273}
274
ca1d1d23 275\f
b819a390
RS
276static Lisp_Object
277looking_at_1 (string, posix)
ca1d1d23 278 Lisp_Object string;
b819a390 279 int posix;
ca1d1d23
JB
280{
281 Lisp_Object val;
282 unsigned char *p1, *p2;
283 int s1, s2;
284 register int i;
487282dc 285 struct re_pattern_buffer *bufp;
ca1d1d23 286
7074fde6
FP
287 if (running_asynch_code)
288 save_search_regs ();
289
910c747a
RS
290 /* This is so set_image_of_range_1 in regex.c can find the EQV table. */
291 XCHAR_TABLE (current_buffer->case_canon_table)->extras[2]
292 = current_buffer->case_eqv_table;
293
b7826503 294 CHECK_STRING (string);
487282dc
KH
295 bufp = compile_pattern (string, &search_regs,
296 (!NILP (current_buffer->case_fold_search)
0190922f 297 ? current_buffer->case_canon_table : Qnil),
0c8533c6
RS
298 posix,
299 !NILP (current_buffer->enable_multibyte_characters));
ca1d1d23
JB
300
301 immediate_quit = 1;
302 QUIT; /* Do a pending quit right away, to avoid paradoxical behavior */
303
304 /* Get pointers and sizes of the two strings
305 that make up the visible portion of the buffer. */
306
307 p1 = BEGV_ADDR;
fa8ed3e0 308 s1 = GPT_BYTE - BEGV_BYTE;
ca1d1d23 309 p2 = GAP_END_ADDR;
fa8ed3e0 310 s2 = ZV_BYTE - GPT_BYTE;
ca1d1d23
JB
311 if (s1 < 0)
312 {
313 p2 = p1;
fa8ed3e0 314 s2 = ZV_BYTE - BEGV_BYTE;
ca1d1d23
JB
315 s1 = 0;
316 }
317 if (s2 < 0)
318 {
fa8ed3e0 319 s1 = ZV_BYTE - BEGV_BYTE;
ca1d1d23
JB
320 s2 = 0;
321 }
8bb43c28
RS
322
323 re_match_object = Qnil;
177c0ea7 324
487282dc 325 i = re_match_2 (bufp, (char *) p1, s1, (char *) p2, s2,
fa8ed3e0
RS
326 PT_BYTE - BEGV_BYTE, &search_regs,
327 ZV_BYTE - BEGV_BYTE);
de182d70 328 immediate_quit = 0;
177c0ea7 329
ca1d1d23
JB
330 if (i == -2)
331 matcher_overflow ();
332
333 val = (0 <= i ? Qt : Qnil);
fa8ed3e0
RS
334 if (i >= 0)
335 for (i = 0; i < search_regs.num_regs; i++)
336 if (search_regs.start[i] >= 0)
337 {
338 search_regs.start[i]
339 = BYTE_TO_CHAR (search_regs.start[i] + BEGV_BYTE);
340 search_regs.end[i]
341 = BYTE_TO_CHAR (search_regs.end[i] + BEGV_BYTE);
342 }
a3668d92 343 XSETBUFFER (last_thing_searched, current_buffer);
ca1d1d23
JB
344 return val;
345}
346
b819a390 347DEFUN ("looking-at", Flooking_at, Slooking_at, 1, 1, 0,
8c1a1077
PJ
348 doc: /* Return t if text after point matches regular expression REGEXP.
349This function modifies the match data that `match-beginning',
350`match-end' and `match-data' access; save and restore the match
351data if you want to preserve them. */)
352 (regexp)
94f94972 353 Lisp_Object regexp;
b819a390 354{
94f94972 355 return looking_at_1 (regexp, 0);
b819a390
RS
356}
357
358DEFUN ("posix-looking-at", Fposix_looking_at, Sposix_looking_at, 1, 1, 0,
8c1a1077
PJ
359 doc: /* Return t if text after point matches regular expression REGEXP.
360Find the longest match, in accord with Posix regular expression rules.
361This function modifies the match data that `match-beginning',
362`match-end' and `match-data' access; save and restore the match
363data if you want to preserve them. */)
364 (regexp)
94f94972 365 Lisp_Object regexp;
b819a390 366{
94f94972 367 return looking_at_1 (regexp, 1);
b819a390
RS
368}
369\f
370static Lisp_Object
371string_match_1 (regexp, string, start, posix)
ca1d1d23 372 Lisp_Object regexp, string, start;
b819a390 373 int posix;
ca1d1d23
JB
374{
375 int val;
487282dc 376 struct re_pattern_buffer *bufp;
0c8533c6
RS
377 int pos, pos_byte;
378 int i;
ca1d1d23 379
7074fde6
FP
380 if (running_asynch_code)
381 save_search_regs ();
382
b7826503
PJ
383 CHECK_STRING (regexp);
384 CHECK_STRING (string);
ca1d1d23
JB
385
386 if (NILP (start))
0c8533c6 387 pos = 0, pos_byte = 0;
ca1d1d23
JB
388 else
389 {
d5db4077 390 int len = SCHARS (string);
ca1d1d23 391
b7826503 392 CHECK_NUMBER (start);
0c8533c6
RS
393 pos = XINT (start);
394 if (pos < 0 && -pos <= len)
395 pos = len + pos;
396 else if (0 > pos || pos > len)
ca1d1d23 397 args_out_of_range (string, start);
0c8533c6 398 pos_byte = string_char_to_byte (string, pos);
ca1d1d23
JB
399 }
400
910c747a
RS
401 /* This is so set_image_of_range_1 in regex.c can find the EQV table. */
402 XCHAR_TABLE (current_buffer->case_canon_table)->extras[2]
403 = current_buffer->case_eqv_table;
404
487282dc
KH
405 bufp = compile_pattern (regexp, &search_regs,
406 (!NILP (current_buffer->case_fold_search)
0190922f 407 ? current_buffer->case_canon_table : Qnil),
0c8533c6
RS
408 posix,
409 STRING_MULTIBYTE (string));
ca1d1d23 410 immediate_quit = 1;
8bb43c28 411 re_match_object = string;
177c0ea7 412
d5db4077
KR
413 val = re_search (bufp, (char *) SDATA (string),
414 SBYTES (string), pos_byte,
415 SBYTES (string) - pos_byte,
ca1d1d23
JB
416 &search_regs);
417 immediate_quit = 0;
daa37602 418 last_thing_searched = Qt;
ca1d1d23
JB
419 if (val == -2)
420 matcher_overflow ();
421 if (val < 0) return Qnil;
0c8533c6
RS
422
423 for (i = 0; i < search_regs.num_regs; i++)
424 if (search_regs.start[i] >= 0)
425 {
426 search_regs.start[i]
427 = string_byte_to_char (string, search_regs.start[i]);
428 search_regs.end[i]
429 = string_byte_to_char (string, search_regs.end[i]);
430 }
431
432 return make_number (string_byte_to_char (string, val));
ca1d1d23 433}
e59a8453 434
b819a390 435DEFUN ("string-match", Fstring_match, Sstring_match, 2, 3, 0,
8c1a1077 436 doc: /* Return index of start of first match for REGEXP in STRING, or nil.
b85acc4b 437Matching ignores case if `case-fold-search' is non-nil.
8c1a1077
PJ
438If third arg START is non-nil, start search at that index in STRING.
439For index of first char beyond the match, do (match-end 0).
440`match-end' and `match-beginning' also give indices of substrings
2bd2f32d
RS
441matched by parenthesis constructs in the pattern.
442
443You can use the function `match-string' to extract the substrings
444matched by the parenthesis constructions in REGEXP. */)
8c1a1077 445 (regexp, string, start)
b819a390
RS
446 Lisp_Object regexp, string, start;
447{
448 return string_match_1 (regexp, string, start, 0);
449}
450
451DEFUN ("posix-string-match", Fposix_string_match, Sposix_string_match, 2, 3, 0,
8c1a1077
PJ
452 doc: /* Return index of start of first match for REGEXP in STRING, or nil.
453Find the longest match, in accord with Posix regular expression rules.
454Case is ignored if `case-fold-search' is non-nil in the current buffer.
455If third arg START is non-nil, start search at that index in STRING.
456For index of first char beyond the match, do (match-end 0).
457`match-end' and `match-beginning' also give indices of substrings
458matched by parenthesis constructs in the pattern. */)
459 (regexp, string, start)
b819a390
RS
460 Lisp_Object regexp, string, start;
461{
462 return string_match_1 (regexp, string, start, 1);
463}
464
e59a8453
RS
465/* Match REGEXP against STRING, searching all of STRING,
466 and return the index of the match, or negative on failure.
467 This does not clobber the match data. */
468
469int
470fast_string_match (regexp, string)
471 Lisp_Object regexp, string;
472{
473 int val;
487282dc 474 struct re_pattern_buffer *bufp;
e59a8453 475
facdc750
RS
476 bufp = compile_pattern (regexp, 0, Qnil,
477 0, STRING_MULTIBYTE (string));
e59a8453 478 immediate_quit = 1;
8bb43c28 479 re_match_object = string;
177c0ea7 480
d5db4077
KR
481 val = re_search (bufp, (char *) SDATA (string),
482 SBYTES (string), 0,
483 SBYTES (string), 0);
e59a8453
RS
484 immediate_quit = 0;
485 return val;
486}
5679531d
KH
487
488/* Match REGEXP against STRING, searching all of STRING ignoring case,
489 and return the index of the match, or negative on failure.
0c8533c6
RS
490 This does not clobber the match data.
491 We assume that STRING contains single-byte characters. */
5679531d
KH
492
493extern Lisp_Object Vascii_downcase_table;
494
495int
b4577c63 496fast_c_string_match_ignore_case (regexp, string)
5679531d 497 Lisp_Object regexp;
96b80561 498 const char *string;
5679531d
KH
499{
500 int val;
501 struct re_pattern_buffer *bufp;
502 int len = strlen (string);
503
0c8533c6 504 regexp = string_make_unibyte (regexp);
b4577c63 505 re_match_object = Qt;
5679531d 506 bufp = compile_pattern (regexp, 0,
0190922f 507 Vascii_canon_table, 0,
f8bd51c4 508 0);
5679531d
KH
509 immediate_quit = 1;
510 val = re_search (bufp, string, len, 0, len, 0);
511 immediate_quit = 0;
512 return val;
513}
be5f4dfb
KH
514
515/* Like fast_string_match but ignore case. */
516
517int
518fast_string_match_ignore_case (regexp, string)
519 Lisp_Object regexp, string;
520{
521 int val;
522 struct re_pattern_buffer *bufp;
523
0190922f 524 bufp = compile_pattern (regexp, 0, Vascii_canon_table,
be5f4dfb
KH
525 0, STRING_MULTIBYTE (string));
526 immediate_quit = 1;
527 re_match_object = string;
528
529 val = re_search (bufp, (char *) SDATA (string),
530 SBYTES (string), 0,
531 SBYTES (string), 0);
532 immediate_quit = 0;
533 return val;
534}
ca1d1d23 535\f
9169c321
JB
536/* The newline cache: remembering which sections of text have no newlines. */
537
538/* If the user has requested newline caching, make sure it's on.
539 Otherwise, make sure it's off.
540 This is our cheezy way of associating an action with the change of
541 state of a buffer-local variable. */
542static void
543newline_cache_on_off (buf)
544 struct buffer *buf;
545{
546 if (NILP (buf->cache_long_line_scans))
547 {
548 /* It should be off. */
549 if (buf->newline_cache)
550 {
551 free_region_cache (buf->newline_cache);
552 buf->newline_cache = 0;
553 }
554 }
555 else
556 {
557 /* It should be on. */
558 if (buf->newline_cache == 0)
559 buf->newline_cache = new_region_cache ();
560 }
561}
562
563\f
564/* Search for COUNT instances of the character TARGET between START and END.
565
566 If COUNT is positive, search forwards; END must be >= START.
567 If COUNT is negative, search backwards for the -COUNTth instance;
568 END must be <= START.
569 If COUNT is zero, do anything you please; run rogue, for all I care.
570
571 If END is zero, use BEGV or ZV instead, as appropriate for the
572 direction indicated by COUNT.
ffd56f97
JB
573
574 If we find COUNT instances, set *SHORTAGE to zero, and return the
a9f2a45f 575 position past the COUNTth match. Note that for reverse motion
5bfe95c9 576 this is not the same as the usual convention for Emacs motion commands.
ffd56f97 577
9169c321
JB
578 If we don't find COUNT instances before reaching END, set *SHORTAGE
579 to the number of TARGETs left unfound, and return END.
ffd56f97 580
087a5f81
RS
581 If ALLOW_QUIT is non-zero, set immediate_quit. That's good to do
582 except when inside redisplay. */
583
dfcf069d 584int
9169c321
JB
585scan_buffer (target, start, end, count, shortage, allow_quit)
586 register int target;
587 int start, end;
588 int count;
589 int *shortage;
087a5f81 590 int allow_quit;
ca1d1d23 591{
9169c321 592 struct region_cache *newline_cache;
177c0ea7 593 int direction;
ffd56f97 594
9169c321
JB
595 if (count > 0)
596 {
597 direction = 1;
598 if (! end) end = ZV;
599 }
600 else
601 {
602 direction = -1;
603 if (! end) end = BEGV;
604 }
ffd56f97 605
9169c321
JB
606 newline_cache_on_off (current_buffer);
607 newline_cache = current_buffer->newline_cache;
ca1d1d23
JB
608
609 if (shortage != 0)
610 *shortage = 0;
611
087a5f81 612 immediate_quit = allow_quit;
ca1d1d23 613
ffd56f97 614 if (count > 0)
9169c321 615 while (start != end)
ca1d1d23 616 {
9169c321
JB
617 /* Our innermost scanning loop is very simple; it doesn't know
618 about gaps, buffer ends, or the newline cache. ceiling is
619 the position of the last character before the next such
620 obstacle --- the last character the dumb search loop should
621 examine. */
fa8ed3e0
RS
622 int ceiling_byte = CHAR_TO_BYTE (end) - 1;
623 int start_byte = CHAR_TO_BYTE (start);
67ce527d 624 int tem;
9169c321
JB
625
626 /* If we're looking for a newline, consult the newline cache
627 to see where we can avoid some scanning. */
628 if (target == '\n' && newline_cache)
629 {
630 int next_change;
631 immediate_quit = 0;
632 while (region_cache_forward
fa8ed3e0
RS
633 (current_buffer, newline_cache, start_byte, &next_change))
634 start_byte = next_change;
cbe0db0d 635 immediate_quit = allow_quit;
9169c321 636
fa8ed3e0
RS
637 /* START should never be after END. */
638 if (start_byte > ceiling_byte)
639 start_byte = ceiling_byte;
9169c321
JB
640
641 /* Now the text after start is an unknown region, and
642 next_change is the position of the next known region. */
fa8ed3e0 643 ceiling_byte = min (next_change - 1, ceiling_byte);
9169c321
JB
644 }
645
646 /* The dumb loop can only scan text stored in contiguous
647 bytes. BUFFER_CEILING_OF returns the last character
648 position that is contiguous, so the ceiling is the
649 position after that. */
67ce527d
KH
650 tem = BUFFER_CEILING_OF (start_byte);
651 ceiling_byte = min (tem, ceiling_byte);
9169c321
JB
652
653 {
177c0ea7 654 /* The termination address of the dumb loop. */
fa8ed3e0
RS
655 register unsigned char *ceiling_addr
656 = BYTE_POS_ADDR (ceiling_byte) + 1;
657 register unsigned char *cursor
658 = BYTE_POS_ADDR (start_byte);
9169c321
JB
659 unsigned char *base = cursor;
660
661 while (cursor < ceiling_addr)
662 {
663 unsigned char *scan_start = cursor;
664
665 /* The dumb loop. */
666 while (*cursor != target && ++cursor < ceiling_addr)
667 ;
668
669 /* If we're looking for newlines, cache the fact that
670 the region from start to cursor is free of them. */
671 if (target == '\n' && newline_cache)
672 know_region_cache (current_buffer, newline_cache,
fa8ed3e0
RS
673 start_byte + scan_start - base,
674 start_byte + cursor - base);
9169c321
JB
675
676 /* Did we find the target character? */
677 if (cursor < ceiling_addr)
678 {
679 if (--count == 0)
680 {
681 immediate_quit = 0;
fa8ed3e0 682 return BYTE_TO_CHAR (start_byte + cursor - base + 1);
9169c321
JB
683 }
684 cursor++;
685 }
686 }
687
fa8ed3e0 688 start = BYTE_TO_CHAR (start_byte + cursor - base);
9169c321 689 }
ca1d1d23
JB
690 }
691 else
9169c321
JB
692 while (start > end)
693 {
694 /* The last character to check before the next obstacle. */
fa8ed3e0
RS
695 int ceiling_byte = CHAR_TO_BYTE (end);
696 int start_byte = CHAR_TO_BYTE (start);
67ce527d 697 int tem;
9169c321
JB
698
699 /* Consult the newline cache, if appropriate. */
700 if (target == '\n' && newline_cache)
701 {
702 int next_change;
703 immediate_quit = 0;
704 while (region_cache_backward
fa8ed3e0
RS
705 (current_buffer, newline_cache, start_byte, &next_change))
706 start_byte = next_change;
cbe0db0d 707 immediate_quit = allow_quit;
9169c321
JB
708
709 /* Start should never be at or before end. */
fa8ed3e0
RS
710 if (start_byte <= ceiling_byte)
711 start_byte = ceiling_byte + 1;
9169c321
JB
712
713 /* Now the text before start is an unknown region, and
714 next_change is the position of the next known region. */
fa8ed3e0 715 ceiling_byte = max (next_change, ceiling_byte);
9169c321
JB
716 }
717
718 /* Stop scanning before the gap. */
67ce527d
KH
719 tem = BUFFER_FLOOR_OF (start_byte - 1);
720 ceiling_byte = max (tem, ceiling_byte);
9169c321
JB
721
722 {
723 /* The termination address of the dumb loop. */
fa8ed3e0
RS
724 register unsigned char *ceiling_addr = BYTE_POS_ADDR (ceiling_byte);
725 register unsigned char *cursor = BYTE_POS_ADDR (start_byte - 1);
9169c321
JB
726 unsigned char *base = cursor;
727
728 while (cursor >= ceiling_addr)
729 {
730 unsigned char *scan_start = cursor;
731
732 while (*cursor != target && --cursor >= ceiling_addr)
733 ;
734
735 /* If we're looking for newlines, cache the fact that
736 the region from after the cursor to start is free of them. */
737 if (target == '\n' && newline_cache)
738 know_region_cache (current_buffer, newline_cache,
fa8ed3e0
RS
739 start_byte + cursor - base,
740 start_byte + scan_start - base);
9169c321
JB
741
742 /* Did we find the target character? */
743 if (cursor >= ceiling_addr)
744 {
745 if (++count >= 0)
746 {
747 immediate_quit = 0;
fa8ed3e0 748 return BYTE_TO_CHAR (start_byte + cursor - base);
9169c321
JB
749 }
750 cursor--;
751 }
752 }
753
fa8ed3e0 754 start = BYTE_TO_CHAR (start_byte + cursor - base);
9169c321
JB
755 }
756 }
757
ca1d1d23
JB
758 immediate_quit = 0;
759 if (shortage != 0)
ffd56f97 760 *shortage = count * direction;
9169c321 761 return start;
ca1d1d23 762}
fa8ed3e0
RS
763\f
764/* Search for COUNT instances of a line boundary, which means either a
765 newline or (if selective display enabled) a carriage return.
766 Start at START. If COUNT is negative, search backwards.
767
768 We report the resulting position by calling TEMP_SET_PT_BOTH.
769
770 If we find COUNT instances. we position after (always after,
771 even if scanning backwards) the COUNTth match, and return 0.
772
773 If we don't find COUNT instances before reaching the end of the
774 buffer (or the beginning, if scanning backwards), we return
775 the number of line boundaries left unfound, and position at
776 the limit we bumped up against.
777
778 If ALLOW_QUIT is non-zero, set immediate_quit. That's good to do
d5d57b92 779 except in special cases. */
ca1d1d23 780
63fa018d 781int
fa8ed3e0
RS
782scan_newline (start, start_byte, limit, limit_byte, count, allow_quit)
783 int start, start_byte;
784 int limit, limit_byte;
785 register int count;
786 int allow_quit;
63fa018d 787{
fa8ed3e0
RS
788 int direction = ((count > 0) ? 1 : -1);
789
790 register unsigned char *cursor;
791 unsigned char *base;
792
793 register int ceiling;
794 register unsigned char *ceiling_addr;
795
d5d57b92
RS
796 int old_immediate_quit = immediate_quit;
797
fa8ed3e0
RS
798 /* The code that follows is like scan_buffer
799 but checks for either newline or carriage return. */
800
d5d57b92
RS
801 if (allow_quit)
802 immediate_quit++;
fa8ed3e0
RS
803
804 start_byte = CHAR_TO_BYTE (start);
805
806 if (count > 0)
807 {
808 while (start_byte < limit_byte)
809 {
810 ceiling = BUFFER_CEILING_OF (start_byte);
811 ceiling = min (limit_byte - 1, ceiling);
812 ceiling_addr = BYTE_POS_ADDR (ceiling) + 1;
813 base = (cursor = BYTE_POS_ADDR (start_byte));
814 while (1)
815 {
816 while (*cursor != '\n' && ++cursor != ceiling_addr)
817 ;
818
819 if (cursor != ceiling_addr)
820 {
821 if (--count == 0)
822 {
d5d57b92 823 immediate_quit = old_immediate_quit;
fa8ed3e0
RS
824 start_byte = start_byte + cursor - base + 1;
825 start = BYTE_TO_CHAR (start_byte);
826 TEMP_SET_PT_BOTH (start, start_byte);
827 return 0;
828 }
829 else
830 if (++cursor == ceiling_addr)
831 break;
832 }
833 else
834 break;
835 }
836 start_byte += cursor - base;
837 }
838 }
839 else
840 {
fa8ed3e0
RS
841 while (start_byte > limit_byte)
842 {
843 ceiling = BUFFER_FLOOR_OF (start_byte - 1);
844 ceiling = max (limit_byte, ceiling);
845 ceiling_addr = BYTE_POS_ADDR (ceiling) - 1;
846 base = (cursor = BYTE_POS_ADDR (start_byte - 1) + 1);
847 while (1)
848 {
849 while (--cursor != ceiling_addr && *cursor != '\n')
850 ;
851
852 if (cursor != ceiling_addr)
853 {
854 if (++count == 0)
855 {
d5d57b92 856 immediate_quit = old_immediate_quit;
fa8ed3e0
RS
857 /* Return the position AFTER the match we found. */
858 start_byte = start_byte + cursor - base + 1;
859 start = BYTE_TO_CHAR (start_byte);
860 TEMP_SET_PT_BOTH (start, start_byte);
861 return 0;
862 }
863 }
864 else
865 break;
866 }
867 /* Here we add 1 to compensate for the last decrement
868 of CURSOR, which took it past the valid range. */
869 start_byte += cursor - base + 1;
870 }
871 }
872
873 TEMP_SET_PT_BOTH (limit, limit_byte);
d5d57b92 874 immediate_quit = old_immediate_quit;
fa8ed3e0
RS
875
876 return count * direction;
63fa018d
RS
877}
878
ca1d1d23 879int
fa8ed3e0 880find_next_newline_no_quit (from, cnt)
ca1d1d23
JB
881 register int from, cnt;
882{
fa8ed3e0 883 return scan_buffer ('\n', from, 0, cnt, (int *) 0, 0);
9169c321
JB
884}
885
9169c321
JB
886/* Like find_next_newline, but returns position before the newline,
887 not after, and only search up to TO. This isn't just
888 find_next_newline (...)-1, because you might hit TO. */
fa8ed3e0 889
9169c321
JB
890int
891find_before_next_newline (from, to, cnt)
cbe0db0d 892 int from, to, cnt;
9169c321
JB
893{
894 int shortage;
895 int pos = scan_buffer ('\n', from, to, cnt, &shortage, 1);
896
897 if (shortage == 0)
898 pos--;
177c0ea7 899
9169c321 900 return pos;
ca1d1d23
JB
901}
902\f
ca1d1d23
JB
903/* Subroutines of Lisp buffer search functions. */
904
905static Lisp_Object
b819a390 906search_command (string, bound, noerror, count, direction, RE, posix)
ca1d1d23
JB
907 Lisp_Object string, bound, noerror, count;
908 int direction;
909 int RE;
b819a390 910 int posix;
ca1d1d23
JB
911{
912 register int np;
9f43ad85 913 int lim, lim_byte;
ca1d1d23
JB
914 int n = direction;
915
916 if (!NILP (count))
917 {
b7826503 918 CHECK_NUMBER (count);
ca1d1d23
JB
919 n *= XINT (count);
920 }
921
b7826503 922 CHECK_STRING (string);
ca1d1d23 923 if (NILP (bound))
9f43ad85
RS
924 {
925 if (n > 0)
926 lim = ZV, lim_byte = ZV_BYTE;
927 else
928 lim = BEGV, lim_byte = BEGV_BYTE;
929 }
ca1d1d23
JB
930 else
931 {
b7826503 932 CHECK_NUMBER_COERCE_MARKER (bound);
ca1d1d23 933 lim = XINT (bound);
6ec8bbd2 934 if (n > 0 ? lim < PT : lim > PT)
ca1d1d23
JB
935 error ("Invalid search bound (wrong side of point)");
936 if (lim > ZV)
9f43ad85 937 lim = ZV, lim_byte = ZV_BYTE;
588d2fd5 938 else if (lim < BEGV)
9f43ad85 939 lim = BEGV, lim_byte = BEGV_BYTE;
588d2fd5
KH
940 else
941 lim_byte = CHAR_TO_BYTE (lim);
ca1d1d23
JB
942 }
943
910c747a
RS
944 /* This is so set_image_of_range_1 in regex.c can find the EQV table. */
945 XCHAR_TABLE (current_buffer->case_canon_table)->extras[2]
946 = current_buffer->case_eqv_table;
947
9f43ad85 948 np = search_buffer (string, PT, PT_BYTE, lim, lim_byte, n, RE,
ca1d1d23 949 (!NILP (current_buffer->case_fold_search)
facdc750 950 ? current_buffer->case_canon_table
3135e9fd 951 : Qnil),
ca1d1d23 952 (!NILP (current_buffer->case_fold_search)
facdc750 953 ? current_buffer->case_eqv_table
3135e9fd 954 : Qnil),
b819a390 955 posix);
ca1d1d23
JB
956 if (np <= 0)
957 {
958 if (NILP (noerror))
06f77a8a
KS
959 xsignal1 (Qsearch_failed, string);
960
ca1d1d23
JB
961 if (!EQ (noerror, Qt))
962 {
963 if (lim < BEGV || lim > ZV)
964 abort ();
9f43ad85 965 SET_PT_BOTH (lim, lim_byte);
a5f217b8
RS
966 return Qnil;
967#if 0 /* This would be clean, but maybe programs depend on
968 a value of nil here. */
481399bf 969 np = lim;
a5f217b8 970#endif
ca1d1d23 971 }
481399bf
RS
972 else
973 return Qnil;
ca1d1d23
JB
974 }
975
976 if (np < BEGV || np > ZV)
977 abort ();
978
979 SET_PT (np);
980
981 return make_number (np);
982}
983\f
fa8ed3e0
RS
984/* Return 1 if REGEXP it matches just one constant string. */
985
b6d6a51c
KH
986static int
987trivial_regexp_p (regexp)
988 Lisp_Object regexp;
989{
d5db4077
KR
990 int len = SBYTES (regexp);
991 unsigned char *s = SDATA (regexp);
b6d6a51c
KH
992 while (--len >= 0)
993 {
994 switch (*s++)
995 {
996 case '.': case '*': case '+': case '?': case '[': case '^': case '$':
997 return 0;
998 case '\\':
999 if (--len < 0)
1000 return 0;
1001 switch (*s++)
1002 {
1003 case '|': case '(': case ')': case '`': case '\'': case 'b':
1004 case 'B': case '<': case '>': case 'w': case 'W': case 's':
29f89fe7 1005 case 'S': case '=': case '{': case '}': case '_':
5679531d 1006 case 'c': case 'C': /* for categoryspec and notcategoryspec */
866f60fd 1007 case '1': case '2': case '3': case '4': case '5':
b6d6a51c
KH
1008 case '6': case '7': case '8': case '9':
1009 return 0;
1010 }
1011 }
1012 }
1013 return 1;
1014}
1015
ca325161 1016/* Search for the n'th occurrence of STRING in the current buffer,
ca1d1d23 1017 starting at position POS and stopping at position LIM,
b819a390 1018 treating STRING as a literal string if RE is false or as
ca1d1d23
JB
1019 a regular expression if RE is true.
1020
1021 If N is positive, searching is forward and LIM must be greater than POS.
1022 If N is negative, searching is backward and LIM must be less than POS.
1023
facdc750 1024 Returns -x if x occurrences remain to be found (x > 0),
ca1d1d23 1025 or else the position at the beginning of the Nth occurrence
b819a390
RS
1026 (if searching backward) or the end (if searching forward).
1027
1028 POSIX is nonzero if we want full backtracking (POSIX style)
1029 for this pattern. 0 means backtrack only enough to get a valid match. */
ca1d1d23 1030
aff2ce94
RS
1031#define TRANSLATE(out, trt, d) \
1032do \
1033 { \
1034 if (! NILP (trt)) \
1035 { \
1036 Lisp_Object temp; \
1037 temp = Faref (trt, make_number (d)); \
1038 if (INTEGERP (temp)) \
1039 out = XINT (temp); \
1040 else \
1041 out = d; \
1042 } \
1043 else \
1044 out = d; \
1045 } \
1046while (0)
facdc750 1047
b819a390 1048static int
9f43ad85
RS
1049search_buffer (string, pos, pos_byte, lim, lim_byte, n,
1050 RE, trt, inverse_trt, posix)
ca1d1d23
JB
1051 Lisp_Object string;
1052 int pos;
9f43ad85 1053 int pos_byte;
ca1d1d23 1054 int lim;
9f43ad85 1055 int lim_byte;
ca1d1d23
JB
1056 int n;
1057 int RE;
facdc750
RS
1058 Lisp_Object trt;
1059 Lisp_Object inverse_trt;
b819a390 1060 int posix;
ca1d1d23 1061{
d5db4077
KR
1062 int len = SCHARS (string);
1063 int len_byte = SBYTES (string);
facdc750 1064 register int i;
ca1d1d23 1065
7074fde6
FP
1066 if (running_asynch_code)
1067 save_search_regs ();
1068
a7e4cdde 1069 /* Searching 0 times means don't move. */
ca1d1d23 1070 /* Null string is found at starting position. */
a7e4cdde 1071 if (len == 0 || n == 0)
ca325161 1072 {
0353b28f 1073 set_search_regs (pos_byte, 0);
ca325161
RS
1074 return pos;
1075 }
3f57a499 1076
41a33295 1077 if (RE && !(trivial_regexp_p (string) && NILP (Vsearch_spaces_regexp)))
ca1d1d23 1078 {
facdc750
RS
1079 unsigned char *p1, *p2;
1080 int s1, s2;
487282dc
KH
1081 struct re_pattern_buffer *bufp;
1082
0c8533c6
RS
1083 bufp = compile_pattern (string, &search_regs, trt, posix,
1084 !NILP (current_buffer->enable_multibyte_characters));
ca1d1d23 1085
ca1d1d23
JB
1086 immediate_quit = 1; /* Quit immediately if user types ^G,
1087 because letting this function finish
1088 can take too long. */
1089 QUIT; /* Do a pending quit right away,
1090 to avoid paradoxical behavior */
1091 /* Get pointers and sizes of the two strings
1092 that make up the visible portion of the buffer. */
1093
1094 p1 = BEGV_ADDR;
fa8ed3e0 1095 s1 = GPT_BYTE - BEGV_BYTE;
ca1d1d23 1096 p2 = GAP_END_ADDR;
fa8ed3e0 1097 s2 = ZV_BYTE - GPT_BYTE;
ca1d1d23
JB
1098 if (s1 < 0)
1099 {
1100 p2 = p1;
fa8ed3e0 1101 s2 = ZV_BYTE - BEGV_BYTE;
ca1d1d23
JB
1102 s1 = 0;
1103 }
1104 if (s2 < 0)
1105 {
fa8ed3e0 1106 s1 = ZV_BYTE - BEGV_BYTE;
ca1d1d23
JB
1107 s2 = 0;
1108 }
8bb43c28 1109 re_match_object = Qnil;
177c0ea7 1110
ca1d1d23
JB
1111 while (n < 0)
1112 {
42db823b 1113 int val;
487282dc 1114 val = re_search_2 (bufp, (char *) p1, s1, (char *) p2, s2,
4996330b
KH
1115 pos_byte - BEGV_BYTE, lim_byte - pos_byte,
1116 &search_regs,
42db823b 1117 /* Don't allow match past current point */
4996330b 1118 pos_byte - BEGV_BYTE);
ca1d1d23 1119 if (val == -2)
b6d6a51c
KH
1120 {
1121 matcher_overflow ();
1122 }
ca1d1d23
JB
1123 if (val >= 0)
1124 {
26aff150 1125 pos_byte = search_regs.start[0] + BEGV_BYTE;
4746118a 1126 for (i = 0; i < search_regs.num_regs; i++)
ca1d1d23
JB
1127 if (search_regs.start[i] >= 0)
1128 {
fa8ed3e0
RS
1129 search_regs.start[i]
1130 = BYTE_TO_CHAR (search_regs.start[i] + BEGV_BYTE);
1131 search_regs.end[i]
1132 = BYTE_TO_CHAR (search_regs.end[i] + BEGV_BYTE);
ca1d1d23 1133 }
a3668d92 1134 XSETBUFFER (last_thing_searched, current_buffer);
ca1d1d23
JB
1135 /* Set pos to the new position. */
1136 pos = search_regs.start[0];
1137 }
1138 else
1139 {
1140 immediate_quit = 0;
1141 return (n);
1142 }
1143 n++;
1144 }
1145 while (n > 0)
1146 {
42db823b 1147 int val;
487282dc 1148 val = re_search_2 (bufp, (char *) p1, s1, (char *) p2, s2,
4996330b
KH
1149 pos_byte - BEGV_BYTE, lim_byte - pos_byte,
1150 &search_regs,
1151 lim_byte - BEGV_BYTE);
ca1d1d23 1152 if (val == -2)
b6d6a51c
KH
1153 {
1154 matcher_overflow ();
1155 }
ca1d1d23
JB
1156 if (val >= 0)
1157 {
26aff150 1158 pos_byte = search_regs.end[0] + BEGV_BYTE;
4746118a 1159 for (i = 0; i < search_regs.num_regs; i++)
ca1d1d23
JB
1160 if (search_regs.start[i] >= 0)
1161 {
fa8ed3e0
RS
1162 search_regs.start[i]
1163 = BYTE_TO_CHAR (search_regs.start[i] + BEGV_BYTE);
1164 search_regs.end[i]
1165 = BYTE_TO_CHAR (search_regs.end[i] + BEGV_BYTE);
ca1d1d23 1166 }
a3668d92 1167 XSETBUFFER (last_thing_searched, current_buffer);
ca1d1d23
JB
1168 pos = search_regs.end[0];
1169 }
1170 else
1171 {
1172 immediate_quit = 0;
1173 return (0 - n);
1174 }
1175 n--;
1176 }
1177 immediate_quit = 0;
1178 return (pos);
1179 }
1180 else /* non-RE case */
1181 {
facdc750
RS
1182 unsigned char *raw_pattern, *pat;
1183 int raw_pattern_size;
1184 int raw_pattern_size_byte;
1185 unsigned char *patbuf;
1186 int multibyte = !NILP (current_buffer->enable_multibyte_characters);
a967ed62 1187 unsigned char *base_pat;
fed91c38
KH
1188 /* Set to positive if we find a non-ASCII char that need
1189 translation. Otherwise set to zero later. */
1190 int charset_base = -1;
040272ce 1191 int boyer_moore_ok = 1;
facdc750
RS
1192
1193 /* MULTIBYTE says whether the text to be searched is multibyte.
1194 We must convert PATTERN to match that, or we will not really
1195 find things right. */
1196
1197 if (multibyte == STRING_MULTIBYTE (string))
1198 {
d5db4077
KR
1199 raw_pattern = (unsigned char *) SDATA (string);
1200 raw_pattern_size = SCHARS (string);
1201 raw_pattern_size_byte = SBYTES (string);
facdc750
RS
1202 }
1203 else if (multibyte)
1204 {
d5db4077 1205 raw_pattern_size = SCHARS (string);
facdc750 1206 raw_pattern_size_byte
d5db4077 1207 = count_size_as_multibyte (SDATA (string),
facdc750 1208 raw_pattern_size);
7276d3d8 1209 raw_pattern = (unsigned char *) alloca (raw_pattern_size_byte + 1);
d5db4077
KR
1210 copy_text (SDATA (string), raw_pattern,
1211 SCHARS (string), 0, 1);
facdc750
RS
1212 }
1213 else
1214 {
1215 /* Converting multibyte to single-byte.
1216
1217 ??? Perhaps this conversion should be done in a special way
1218 by subtracting nonascii-insert-offset from each non-ASCII char,
1219 so that only the multibyte chars which really correspond to
1220 the chosen single-byte character set can possibly match. */
d5db4077
KR
1221 raw_pattern_size = SCHARS (string);
1222 raw_pattern_size_byte = SCHARS (string);
7276d3d8 1223 raw_pattern = (unsigned char *) alloca (raw_pattern_size + 1);
d5db4077
KR
1224 copy_text (SDATA (string), raw_pattern,
1225 SBYTES (string), 1, 0);
facdc750
RS
1226 }
1227
1228 /* Copy and optionally translate the pattern. */
1229 len = raw_pattern_size;
1230 len_byte = raw_pattern_size_byte;
1231 patbuf = (unsigned char *) alloca (len_byte);
1232 pat = patbuf;
1233 base_pat = raw_pattern;
1234 if (multibyte)
1235 {
0190922f
KH
1236 /* Fill patbuf by translated characters in STRING while
1237 checking if we can use boyer-moore search. If TRT is
1238 non-nil, we can use boyer-moore search only if TRT can be
1239 represented by the byte array of 256 elements. For that,
1240 all non-ASCII case-equivalents of all case-senstive
1241 characters in STRING must belong to the same charset and
1242 row. */
1243
facdc750
RS
1244 while (--len >= 0)
1245 {
0190922f 1246 unsigned char str_base[MAX_MULTIBYTE_LENGTH], *str;
aff2ce94 1247 int c, translated, inverse;
facdc750
RS
1248 int in_charlen, charlen;
1249
1250 /* If we got here and the RE flag is set, it's because we're
1251 dealing with a regexp known to be trivial, so the backslash
1252 just quotes the next character. */
1253 if (RE && *base_pat == '\\')
1254 {
1255 len--;
0190922f 1256 raw_pattern_size--;
facdc750
RS
1257 len_byte--;
1258 base_pat++;
1259 }
1260
1261 c = STRING_CHAR_AND_LENGTH (base_pat, len_byte, in_charlen);
040272ce 1262
0190922f 1263 if (NILP (trt))
facdc750 1264 {
0190922f
KH
1265 str = base_pat;
1266 charlen = in_charlen;
facdc750 1267 }
0190922f 1268 else
facdc750 1269 {
0190922f
KH
1270 /* Translate the character. */
1271 TRANSLATE (translated, trt, c);
1272 charlen = CHAR_STRING (translated, str_base);
1273 str = str_base;
1274
1275 /* Check if C has any other case-equivalents. */
1276 TRANSLATE (inverse, inverse_trt, c);
1277 /* If so, check if we can use boyer-moore. */
1278 if (c != inverse && boyer_moore_ok)
1279 {
1280 /* Check if all equivalents belong to the same
1281 charset & row. Note that the check of C
1282 itself is done by the last iteration. Note
1283 also that we don't have to check ASCII
1284 characters because boyer-moore search can
1285 always handle their translation. */
1286 while (1)
1287 {
fed91c38 1288 if (ASCII_BYTE_P (inverse))
0190922f 1289 {
fed91c38 1290 if (charset_base > 0)
0190922f
KH
1291 {
1292 boyer_moore_ok = 0;
1293 break;
1294 }
fed91c38
KH
1295 charset_base = 0;
1296 }
1297 else if (SINGLE_BYTE_CHAR_P (inverse))
1298 {
1299 /* Boyer-moore search can't handle a
1300 translation of an eight-bit
1301 character. */
1302 boyer_moore_ok = 0;
1303 break;
1304 }
1305 else if (charset_base < 0)
1306 charset_base = inverse & ~CHAR_FIELD3_MASK;
1307 else if ((inverse & ~CHAR_FIELD3_MASK)
1308 != charset_base)
1309 {
1310 boyer_moore_ok = 0;
1311 break;
0190922f
KH
1312 }
1313 if (c == inverse)
1314 break;
1315 TRANSLATE (inverse, inverse_trt, inverse);
1316 }
1317 }
aff2ce94 1318 }
fed91c38
KH
1319 if (charset_base < 0)
1320 charset_base = 0;
facdc750
RS
1321
1322 /* Store this character into the translated pattern. */
1323 bcopy (str, pat, charlen);
1324 pat += charlen;
1325 base_pat += in_charlen;
1326 len_byte -= in_charlen;
1327 }
1328 }
1329 else
1330 {
040272ce
KH
1331 /* Unibyte buffer. */
1332 charset_base = 0;
facdc750
RS
1333 while (--len >= 0)
1334 {
040272ce 1335 int c, translated;
facdc750
RS
1336
1337 /* If we got here and the RE flag is set, it's because we're
1338 dealing with a regexp known to be trivial, so the backslash
1339 just quotes the next character. */
1340 if (RE && *base_pat == '\\')
1341 {
1342 len--;
0190922f 1343 raw_pattern_size--;
facdc750
RS
1344 base_pat++;
1345 }
1346 c = *base_pat++;
aff2ce94 1347 TRANSLATE (translated, trt, c);
facdc750
RS
1348 *pat++ = translated;
1349 }
1350 }
1351
1352 len_byte = pat - patbuf;
1353 len = raw_pattern_size;
1354 pat = base_pat = patbuf;
1355
040272ce 1356 if (boyer_moore_ok)
facdc750 1357 return boyer_moore (n, pat, len, len_byte, trt, inverse_trt,
aff2ce94
RS
1358 pos, pos_byte, lim, lim_byte,
1359 charset_base);
facdc750
RS
1360 else
1361 return simple_search (n, pat, len, len_byte, trt,
1362 pos, pos_byte, lim, lim_byte);
1363 }
1364}
1365\f
1366/* Do a simple string search N times for the string PAT,
1367 whose length is LEN/LEN_BYTE,
1368 from buffer position POS/POS_BYTE until LIM/LIM_BYTE.
1369 TRT is the translation table.
f8bd51c4 1370
facdc750
RS
1371 Return the character position where the match is found.
1372 Otherwise, if M matches remained to be found, return -M.
f8bd51c4 1373
facdc750
RS
1374 This kind of search works regardless of what is in PAT and
1375 regardless of what is in TRT. It is used in cases where
1376 boyer_moore cannot work. */
1377
1378static int
1379simple_search (n, pat, len, len_byte, trt, pos, pos_byte, lim, lim_byte)
1380 int n;
1381 unsigned char *pat;
1382 int len, len_byte;
1383 Lisp_Object trt;
1384 int pos, pos_byte;
1385 int lim, lim_byte;
1386{
1387 int multibyte = ! NILP (current_buffer->enable_multibyte_characters);
ab228c24 1388 int forward = n > 0;
facdc750
RS
1389
1390 if (lim > pos && multibyte)
1391 while (n > 0)
1392 {
1393 while (1)
f8bd51c4 1394 {
facdc750
RS
1395 /* Try matching at position POS. */
1396 int this_pos = pos;
1397 int this_pos_byte = pos_byte;
1398 int this_len = len;
1399 int this_len_byte = len_byte;
1400 unsigned char *p = pat;
1401 if (pos + len > lim)
1402 goto stop;
1403
1404 while (this_len > 0)
1405 {
1406 int charlen, buf_charlen;
ab228c24 1407 int pat_ch, buf_ch;
facdc750 1408
ab228c24 1409 pat_ch = STRING_CHAR_AND_LENGTH (p, this_len_byte, charlen);
facdc750
RS
1410 buf_ch = STRING_CHAR_AND_LENGTH (BYTE_POS_ADDR (this_pos_byte),
1411 ZV_BYTE - this_pos_byte,
1412 buf_charlen);
aff2ce94 1413 TRANSLATE (buf_ch, trt, buf_ch);
facdc750
RS
1414
1415 if (buf_ch != pat_ch)
1416 break;
ab228c24
RS
1417
1418 this_len_byte -= charlen;
1419 this_len--;
1420 p += charlen;
1421
1422 this_pos_byte += buf_charlen;
1423 this_pos++;
facdc750
RS
1424 }
1425
1426 if (this_len == 0)
1427 {
1428 pos += len;
1429 pos_byte += len_byte;
1430 break;
1431 }
1432
1433 INC_BOTH (pos, pos_byte);
f8bd51c4 1434 }
facdc750
RS
1435
1436 n--;
1437 }
1438 else if (lim > pos)
1439 while (n > 0)
1440 {
1441 while (1)
f8bd51c4 1442 {
facdc750
RS
1443 /* Try matching at position POS. */
1444 int this_pos = pos;
1445 int this_len = len;
1446 unsigned char *p = pat;
1447
1448 if (pos + len > lim)
1449 goto stop;
1450
1451 while (this_len > 0)
1452 {
1453 int pat_ch = *p++;
1454 int buf_ch = FETCH_BYTE (this_pos);
aff2ce94 1455 TRANSLATE (buf_ch, trt, buf_ch);
facdc750
RS
1456
1457 if (buf_ch != pat_ch)
1458 break;
ab228c24
RS
1459
1460 this_len--;
1461 this_pos++;
facdc750
RS
1462 }
1463
1464 if (this_len == 0)
1465 {
1466 pos += len;
1467 break;
1468 }
1469
1470 pos++;
f8bd51c4 1471 }
facdc750
RS
1472
1473 n--;
1474 }
1475 /* Backwards search. */
1476 else if (lim < pos && multibyte)
1477 while (n < 0)
1478 {
1479 while (1)
f8bd51c4 1480 {
facdc750
RS
1481 /* Try matching at position POS. */
1482 int this_pos = pos - len;
1483 int this_pos_byte = pos_byte - len_byte;
1484 int this_len = len;
1485 int this_len_byte = len_byte;
1486 unsigned char *p = pat;
1487
1488 if (pos - len < lim)
1489 goto stop;
1490
1491 while (this_len > 0)
1492 {
1493 int charlen, buf_charlen;
ab228c24 1494 int pat_ch, buf_ch;
facdc750 1495
ab228c24 1496 pat_ch = STRING_CHAR_AND_LENGTH (p, this_len_byte, charlen);
facdc750
RS
1497 buf_ch = STRING_CHAR_AND_LENGTH (BYTE_POS_ADDR (this_pos_byte),
1498 ZV_BYTE - this_pos_byte,
1499 buf_charlen);
aff2ce94 1500 TRANSLATE (buf_ch, trt, buf_ch);
facdc750
RS
1501
1502 if (buf_ch != pat_ch)
1503 break;
ab228c24
RS
1504
1505 this_len_byte -= charlen;
1506 this_len--;
1507 p += charlen;
1508 this_pos_byte += buf_charlen;
1509 this_pos++;
facdc750
RS
1510 }
1511
1512 if (this_len == 0)
1513 {
1514 pos -= len;
1515 pos_byte -= len_byte;
1516 break;
1517 }
1518
1519 DEC_BOTH (pos, pos_byte);
f8bd51c4
KH
1520 }
1521
facdc750
RS
1522 n++;
1523 }
1524 else if (lim < pos)
1525 while (n < 0)
1526 {
1527 while (1)
b6d6a51c 1528 {
facdc750
RS
1529 /* Try matching at position POS. */
1530 int this_pos = pos - len;
1531 int this_len = len;
1532 unsigned char *p = pat;
1533
1534 if (pos - len < lim)
1535 goto stop;
1536
1537 while (this_len > 0)
1538 {
1539 int pat_ch = *p++;
1540 int buf_ch = FETCH_BYTE (this_pos);
aff2ce94 1541 TRANSLATE (buf_ch, trt, buf_ch);
facdc750
RS
1542
1543 if (buf_ch != pat_ch)
1544 break;
ab228c24
RS
1545 this_len--;
1546 this_pos++;
facdc750
RS
1547 }
1548
1549 if (this_len == 0)
b6d6a51c 1550 {
facdc750
RS
1551 pos -= len;
1552 break;
b6d6a51c 1553 }
facdc750
RS
1554
1555 pos--;
b6d6a51c 1556 }
facdc750
RS
1557
1558 n++;
b6d6a51c 1559 }
facdc750
RS
1560
1561 stop:
1562 if (n == 0)
aff2ce94 1563 {
ab228c24
RS
1564 if (forward)
1565 set_search_regs ((multibyte ? pos_byte : pos) - len_byte, len_byte);
1566 else
1567 set_search_regs (multibyte ? pos_byte : pos, len_byte);
aff2ce94
RS
1568
1569 return pos;
1570 }
facdc750
RS
1571 else if (n > 0)
1572 return -n;
1573 else
1574 return n;
1575}
1576\f
0190922f 1577/* Do Boyer-Moore search N times for the string BASE_PAT,
facdc750
RS
1578 whose length is LEN/LEN_BYTE,
1579 from buffer position POS/POS_BYTE until LIM/LIM_BYTE.
1580 DIRECTION says which direction we search in.
1581 TRT and INVERSE_TRT are translation tables.
0190922f 1582 Characters in PAT are already translated by TRT.
facdc750 1583
0190922f
KH
1584 This kind of search works if all the characters in BASE_PAT that
1585 have nontrivial translation are the same aside from the last byte.
1586 This makes it possible to translate just the last byte of a
1587 character, and do so after just a simple test of the context.
1588 CHARSET_BASE is nonzero iff there is such a non-ASCII character.
facdc750
RS
1589
1590 If that criterion is not satisfied, do not call this function. */
1591
1592static int
1593boyer_moore (n, base_pat, len, len_byte, trt, inverse_trt,
aff2ce94 1594 pos, pos_byte, lim, lim_byte, charset_base)
facdc750
RS
1595 int n;
1596 unsigned char *base_pat;
1597 int len, len_byte;
1598 Lisp_Object trt;
1599 Lisp_Object inverse_trt;
1600 int pos, pos_byte;
1601 int lim, lim_byte;
aff2ce94 1602 int charset_base;
facdc750
RS
1603{
1604 int direction = ((n > 0) ? 1 : -1);
1605 register int dirlen;
a968f437 1606 int infinity, limit, stride_for_teases = 0;
facdc750
RS
1607 register int *BM_tab;
1608 int *BM_tab_base;
177c0ea7 1609 register unsigned char *cursor, *p_limit;
facdc750 1610 register int i, j;
cb6792d2 1611 unsigned char *pat, *pat_end;
facdc750
RS
1612 int multibyte = ! NILP (current_buffer->enable_multibyte_characters);
1613
1614 unsigned char simple_translate[0400];
0190922f
KH
1615 /* These are set to the preceding bytes of a byte to be translated
1616 if charset_base is nonzero. As the maximum byte length of a
1617 multibyte character is 4, we have to check at most three previous
1618 bytes. */
1619 int translate_prev_byte1 = 0;
1620 int translate_prev_byte2 = 0;
1621 int translate_prev_byte3 = 0;
facdc750
RS
1622
1623#ifdef C_ALLOCA
1624 int BM_tab_space[0400];
1625 BM_tab = &BM_tab_space[0];
1626#else
1627 BM_tab = (int *) alloca (0400 * sizeof (int));
1628#endif
1629 /* The general approach is that we are going to maintain that we know */
1630 /* the first (closest to the present position, in whatever direction */
1631 /* we're searching) character that could possibly be the last */
1632 /* (furthest from present position) character of a valid match. We */
1633 /* advance the state of our knowledge by looking at that character */
1634 /* and seeing whether it indeed matches the last character of the */
1635 /* pattern. If it does, we take a closer look. If it does not, we */
1636 /* move our pointer (to putative last characters) as far as is */
1637 /* logically possible. This amount of movement, which I call a */
1638 /* stride, will be the length of the pattern if the actual character */
1639 /* appears nowhere in the pattern, otherwise it will be the distance */
1640 /* from the last occurrence of that character to the end of the */
1641 /* pattern. */
1642 /* As a coding trick, an enormous stride is coded into the table for */
1643 /* characters that match the last character. This allows use of only */
1644 /* a single test, a test for having gone past the end of the */
1645 /* permissible match region, to test for both possible matches (when */
1646 /* the stride goes past the end immediately) and failure to */
177c0ea7 1647 /* match (where you get nudged past the end one stride at a time). */
facdc750
RS
1648
1649 /* Here we make a "mickey mouse" BM table. The stride of the search */
1650 /* is determined only by the last character of the putative match. */
1651 /* If that character does not match, we will stride the proper */
1652 /* distance to propose a match that superimposes it on the last */
1653 /* instance of a character that matches it (per trt), or misses */
177c0ea7 1654 /* it entirely if there is none. */
facdc750
RS
1655
1656 dirlen = len_byte * direction;
1657 infinity = dirlen - (lim_byte + pos_byte + len_byte + len_byte) * direction;
cb6792d2
RS
1658
1659 /* Record position after the end of the pattern. */
1660 pat_end = base_pat + len_byte;
1661 /* BASE_PAT points to a character that we start scanning from.
1662 It is the first character in a forward search,
1663 the last character in a backward search. */
facdc750 1664 if (direction < 0)
cb6792d2
RS
1665 base_pat = pat_end - 1;
1666
facdc750
RS
1667 BM_tab_base = BM_tab;
1668 BM_tab += 0400;
1669 j = dirlen; /* to get it in a register */
1670 /* A character that does not appear in the pattern induces a */
1671 /* stride equal to the pattern length. */
1672 while (BM_tab_base != BM_tab)
1673 {
1674 *--BM_tab = j;
1675 *--BM_tab = j;
1676 *--BM_tab = j;
1677 *--BM_tab = j;
1678 }
1679
1680 /* We use this for translation, instead of TRT itself.
1681 We fill this in to handle the characters that actually
1682 occur in the pattern. Others don't matter anyway! */
1683 bzero (simple_translate, sizeof simple_translate);
1684 for (i = 0; i < 0400; i++)
1685 simple_translate[i] = i;
1686
0190922f
KH
1687 if (charset_base)
1688 {
1689 /* Setup translate_prev_byte1/2/3 from CHARSET_BASE. Only a
1690 byte following them are the target of translation. */
1691 int sample_char = charset_base | 0x20;
1692 unsigned char str[MAX_MULTIBYTE_LENGTH];
1693 int len = CHAR_STRING (sample_char, str);
1694
1695 translate_prev_byte1 = str[len - 2];
1696 if (len > 2)
1697 {
1698 translate_prev_byte2 = str[len - 3];
1699 if (len > 3)
1700 translate_prev_byte3 = str[len - 4];
1701 }
1702 }
1703
facdc750
RS
1704 i = 0;
1705 while (i != infinity)
1706 {
cb6792d2 1707 unsigned char *ptr = base_pat + i;
facdc750
RS
1708 i += direction;
1709 if (i == dirlen)
1710 i = infinity;
1711 if (! NILP (trt))
ca1d1d23 1712 {
620a5178
KH
1713 /* If the byte currently looking at is the last of a
1714 character to check case-equivalents, set CH to that
1715 character. An ASCII character and a non-ASCII character
1716 matching with CHARSET_BASE are to be checked. */
0190922f
KH
1717 int ch = -1;
1718
1719 if (ASCII_BYTE_P (*ptr) || ! multibyte)
1720 ch = *ptr;
620a5178 1721 else if (charset_base
86dc6ccb 1722 && ((pat_end - ptr) == 1 || CHAR_HEAD_P (ptr[1])))
ca1d1d23 1723 {
620a5178
KH
1724 unsigned char *charstart = ptr - 1;
1725
1726 while (! (CHAR_HEAD_P (*charstart)))
1727 charstart--;
1728 ch = STRING_CHAR (charstart, ptr - charstart + 1);
0190922f
KH
1729 if (charset_base != (ch & ~CHAR_FIELD3_MASK))
1730 ch = -1;
ca1d1d23 1731 }
facdc750 1732
6786a107 1733 if (ch >= 0400)
620a5178
KH
1734 j = ((unsigned char) ch) | 0200;
1735 else
1736 j = *ptr;
1737
facdc750
RS
1738 if (i == infinity)
1739 stride_for_teases = BM_tab[j];
ab228c24 1740
facdc750
RS
1741 BM_tab[j] = dirlen - i;
1742 /* A translation table is accompanied by its inverse -- see */
177c0ea7 1743 /* comment following downcase_table for details */
0190922f 1744 if (ch >= 0)
ab228c24
RS
1745 {
1746 int starting_ch = ch;
620a5178 1747 int starting_j = j;
0190922f 1748
ab228c24
RS
1749 while (1)
1750 {
1751 TRANSLATE (ch, inverse_trt, ch);
6786a107 1752 if (ch >= 0400)
ab228c24
RS
1753 j = ((unsigned char) ch) | 0200;
1754 else
1755 j = (unsigned char) ch;
1756
1757 /* For all the characters that map into CH,
1758 set up simple_translate to map the last byte
1759 into STARTING_J. */
1760 simple_translate[j] = starting_j;
1761 if (ch == starting_ch)
1762 break;
1763 BM_tab[j] = dirlen - i;
1764 }
1765 }
facdc750
RS
1766 }
1767 else
1768 {
1769 j = *ptr;
1770
1771 if (i == infinity)
1772 stride_for_teases = BM_tab[j];
1773 BM_tab[j] = dirlen - i;
ca1d1d23 1774 }
facdc750
RS
1775 /* stride_for_teases tells how much to stride if we get a */
1776 /* match on the far character but are subsequently */
1777 /* disappointed, by recording what the stride would have been */
1778 /* for that character if the last character had been */
1779 /* different. */
1780 }
1781 infinity = dirlen - infinity;
1782 pos_byte += dirlen - ((direction > 0) ? direction : 0);
1783 /* loop invariant - POS_BYTE points at where last char (first
1784 char if reverse) of pattern would align in a possible match. */
1785 while (n != 0)
1786 {
1787 int tail_end;
1788 unsigned char *tail_end_ptr;
1789
1790 /* It's been reported that some (broken) compiler thinks that
1791 Boolean expressions in an arithmetic context are unsigned.
1792 Using an explicit ?1:0 prevents this. */
1793 if ((lim_byte - pos_byte - ((direction > 0) ? 1 : 0)) * direction
1794 < 0)
1795 return (n * (0 - direction));
1796 /* First we do the part we can by pointers (maybe nothing) */
1797 QUIT;
1798 pat = base_pat;
1799 limit = pos_byte - dirlen + direction;
67ce527d
KH
1800 if (direction > 0)
1801 {
1802 limit = BUFFER_CEILING_OF (limit);
1803 /* LIMIT is now the last (not beyond-last!) value POS_BYTE
1804 can take on without hitting edge of buffer or the gap. */
1805 limit = min (limit, pos_byte + 20000);
1806 limit = min (limit, lim_byte - 1);
1807 }
1808 else
1809 {
1810 limit = BUFFER_FLOOR_OF (limit);
1811 /* LIMIT is now the last (not beyond-last!) value POS_BYTE
1812 can take on without hitting edge of buffer or the gap. */
1813 limit = max (limit, pos_byte - 20000);
1814 limit = max (limit, lim_byte);
1815 }
facdc750
RS
1816 tail_end = BUFFER_CEILING_OF (pos_byte) + 1;
1817 tail_end_ptr = BYTE_POS_ADDR (tail_end);
1818
1819 if ((limit - pos_byte) * direction > 20)
ca1d1d23 1820 {
facdc750
RS
1821 unsigned char *p2;
1822
1823 p_limit = BYTE_POS_ADDR (limit);
1824 p2 = (cursor = BYTE_POS_ADDR (pos_byte));
1825 /* In this loop, pos + cursor - p2 is the surrogate for pos */
1826 while (1) /* use one cursor setting as long as i can */
ca1d1d23 1827 {
facdc750 1828 if (direction > 0) /* worth duplicating */
ca1d1d23 1829 {
facdc750
RS
1830 /* Use signed comparison if appropriate
1831 to make cursor+infinity sure to be > p_limit.
1832 Assuming that the buffer lies in a range of addresses
1833 that are all "positive" (as ints) or all "negative",
1834 either kind of comparison will work as long
1835 as we don't step by infinity. So pick the kind
1836 that works when we do step by infinity. */
1837 if ((EMACS_INT) (p_limit + infinity) > (EMACS_INT) p_limit)
1838 while ((EMACS_INT) cursor <= (EMACS_INT) p_limit)
1839 cursor += BM_tab[*cursor];
ca1d1d23 1840 else
facdc750
RS
1841 while ((EMACS_UINT) cursor <= (EMACS_UINT) p_limit)
1842 cursor += BM_tab[*cursor];
1843 }
1844 else
1845 {
1846 if ((EMACS_INT) (p_limit + infinity) < (EMACS_INT) p_limit)
1847 while ((EMACS_INT) cursor >= (EMACS_INT) p_limit)
1848 cursor += BM_tab[*cursor];
1849 else
1850 while ((EMACS_UINT) cursor >= (EMACS_UINT) p_limit)
1851 cursor += BM_tab[*cursor];
1852 }
ca1d1d23 1853/* If you are here, cursor is beyond the end of the searched region. */
facdc750
RS
1854/* This can happen if you match on the far character of the pattern, */
1855/* because the "stride" of that character is infinity, a number able */
1856/* to throw you well beyond the end of the search. It can also */
1857/* happen if you fail to match within the permitted region and would */
1858/* otherwise try a character beyond that region */
1859 if ((cursor - p_limit) * direction <= len_byte)
1860 break; /* a small overrun is genuine */
1861 cursor -= infinity; /* large overrun = hit */
1862 i = dirlen - direction;
1863 if (! NILP (trt))
1864 {
1865 while ((i -= direction) + direction != 0)
ca1d1d23 1866 {
facdc750
RS
1867 int ch;
1868 cursor -= direction;
1869 /* Translate only the last byte of a character. */
1870 if (! multibyte
1871 || ((cursor == tail_end_ptr
1872 || CHAR_HEAD_P (cursor[1]))
1873 && (CHAR_HEAD_P (cursor[0])
0190922f
KH
1874 /* Check if this is the last byte of
1875 a translable character. */
1876 || (translate_prev_byte1 == cursor[-1]
1877 && (CHAR_HEAD_P (translate_prev_byte1)
1878 || (translate_prev_byte2 == cursor[-2]
1879 && (CHAR_HEAD_P (translate_prev_byte2)
1880 || (translate_prev_byte3 == cursor[-3]))))))))
facdc750
RS
1881 ch = simple_translate[*cursor];
1882 else
1883 ch = *cursor;
1884 if (pat[i] != ch)
1885 break;
ca1d1d23 1886 }
facdc750
RS
1887 }
1888 else
1889 {
1890 while ((i -= direction) + direction != 0)
ca1d1d23 1891 {
facdc750
RS
1892 cursor -= direction;
1893 if (pat[i] != *cursor)
1894 break;
ca1d1d23 1895 }
facdc750
RS
1896 }
1897 cursor += dirlen - i - direction; /* fix cursor */
1898 if (i + direction == 0)
1899 {
1900 int position;
0c8533c6 1901
facdc750 1902 cursor -= direction;
1113d9db 1903
facdc750
RS
1904 position = pos_byte + cursor - p2 + ((direction > 0)
1905 ? 1 - len_byte : 0);
1906 set_search_regs (position, len_byte);
ca325161 1907
facdc750
RS
1908 if ((n -= direction) != 0)
1909 cursor += dirlen; /* to resume search */
ca1d1d23 1910 else
facdc750
RS
1911 return ((direction > 0)
1912 ? search_regs.end[0] : search_regs.start[0]);
ca1d1d23 1913 }
facdc750
RS
1914 else
1915 cursor += stride_for_teases; /* <sigh> we lose - */
ca1d1d23 1916 }
facdc750
RS
1917 pos_byte += cursor - p2;
1918 }
1919 else
1920 /* Now we'll pick up a clump that has to be done the hard */
1921 /* way because it covers a discontinuity */
1922 {
1923 limit = ((direction > 0)
1924 ? BUFFER_CEILING_OF (pos_byte - dirlen + 1)
1925 : BUFFER_FLOOR_OF (pos_byte - dirlen - 1));
1926 limit = ((direction > 0)
1927 ? min (limit + len_byte, lim_byte - 1)
1928 : max (limit - len_byte, lim_byte));
1929 /* LIMIT is now the last value POS_BYTE can have
1930 and still be valid for a possible match. */
1931 while (1)
ca1d1d23 1932 {
facdc750
RS
1933 /* This loop can be coded for space rather than */
1934 /* speed because it will usually run only once. */
1935 /* (the reach is at most len + 21, and typically */
177c0ea7 1936 /* does not exceed len) */
facdc750
RS
1937 while ((limit - pos_byte) * direction >= 0)
1938 pos_byte += BM_tab[FETCH_BYTE (pos_byte)];
1939 /* now run the same tests to distinguish going off the */
1940 /* end, a match or a phony match. */
1941 if ((pos_byte - limit) * direction <= len_byte)
1942 break; /* ran off the end */
1943 /* Found what might be a match.
1944 Set POS_BYTE back to last (first if reverse) pos. */
1945 pos_byte -= infinity;
1946 i = dirlen - direction;
1947 while ((i -= direction) + direction != 0)
ca1d1d23 1948 {
facdc750
RS
1949 int ch;
1950 unsigned char *ptr;
1951 pos_byte -= direction;
1952 ptr = BYTE_POS_ADDR (pos_byte);
1953 /* Translate only the last byte of a character. */
1954 if (! multibyte
1955 || ((ptr == tail_end_ptr
1956 || CHAR_HEAD_P (ptr[1]))
1957 && (CHAR_HEAD_P (ptr[0])
0190922f
KH
1958 /* Check if this is the last byte of a
1959 translable character. */
1960 || (translate_prev_byte1 == ptr[-1]
1961 && (CHAR_HEAD_P (translate_prev_byte1)
1962 || (translate_prev_byte2 == ptr[-2]
1963 && (CHAR_HEAD_P (translate_prev_byte2)
1964 || translate_prev_byte3 == ptr[-3])))))))
facdc750
RS
1965 ch = simple_translate[*ptr];
1966 else
1967 ch = *ptr;
1968 if (pat[i] != ch)
1969 break;
1970 }
1971 /* Above loop has moved POS_BYTE part or all the way
1972 back to the first pos (last pos if reverse).
1973 Set it once again at the last (first if reverse) char. */
1974 pos_byte += dirlen - i- direction;
1975 if (i + direction == 0)
1976 {
1977 int position;
1978 pos_byte -= direction;
1113d9db 1979
facdc750 1980 position = pos_byte + ((direction > 0) ? 1 - len_byte : 0);
0c8533c6 1981
facdc750 1982 set_search_regs (position, len_byte);
ca325161 1983
facdc750
RS
1984 if ((n -= direction) != 0)
1985 pos_byte += dirlen; /* to resume search */
ca1d1d23 1986 else
facdc750
RS
1987 return ((direction > 0)
1988 ? search_regs.end[0] : search_regs.start[0]);
ca1d1d23 1989 }
facdc750
RS
1990 else
1991 pos_byte += stride_for_teases;
1992 }
1993 }
1994 /* We have done one clump. Can we continue? */
1995 if ((lim_byte - pos_byte) * direction < 0)
1996 return ((0 - n) * direction);
ca1d1d23 1997 }
facdc750 1998 return BYTE_TO_CHAR (pos_byte);
ca1d1d23 1999}
ca325161 2000
fa8ed3e0 2001/* Record beginning BEG_BYTE and end BEG_BYTE + NBYTES
a7e4cdde
RS
2002 for the overall match just found in the current buffer.
2003 Also clear out the match data for registers 1 and up. */
ca325161
RS
2004
2005static void
fa8ed3e0
RS
2006set_search_regs (beg_byte, nbytes)
2007 int beg_byte, nbytes;
ca325161 2008{
a7e4cdde
RS
2009 int i;
2010
ca325161
RS
2011 /* Make sure we have registers in which to store
2012 the match position. */
2013 if (search_regs.num_regs == 0)
2014 {
2d4a771a
RS
2015 search_regs.start = (regoff_t *) xmalloc (2 * sizeof (regoff_t));
2016 search_regs.end = (regoff_t *) xmalloc (2 * sizeof (regoff_t));
487282dc 2017 search_regs.num_regs = 2;
ca325161
RS
2018 }
2019
a7e4cdde
RS
2020 /* Clear out the other registers. */
2021 for (i = 1; i < search_regs.num_regs; i++)
2022 {
2023 search_regs.start[i] = -1;
2024 search_regs.end[i] = -1;
2025 }
2026
fa8ed3e0
RS
2027 search_regs.start[0] = BYTE_TO_CHAR (beg_byte);
2028 search_regs.end[0] = BYTE_TO_CHAR (beg_byte + nbytes);
a3668d92 2029 XSETBUFFER (last_thing_searched, current_buffer);
ca325161 2030}
ca1d1d23
JB
2031\f
2032/* Given a string of words separated by word delimiters,
2033 compute a regexp that matches those exact words
2034 separated by arbitrary punctuation. */
2035
2036static Lisp_Object
2037wordify (string)
2038 Lisp_Object string;
2039{
2040 register unsigned char *p, *o;
0c8533c6 2041 register int i, i_byte, len, punct_count = 0, word_count = 0;
ca1d1d23 2042 Lisp_Object val;
0c8533c6
RS
2043 int prev_c = 0;
2044 int adjust;
ca1d1d23 2045
b7826503 2046 CHECK_STRING (string);
d5db4077
KR
2047 p = SDATA (string);
2048 len = SCHARS (string);
ca1d1d23 2049
0c8533c6
RS
2050 for (i = 0, i_byte = 0; i < len; )
2051 {
2052 int c;
177c0ea7 2053
eb99a8dd 2054 FETCH_STRING_CHAR_ADVANCE (c, string, i, i_byte);
0c8533c6
RS
2055
2056 if (SYNTAX (c) != Sword)
2057 {
2058 punct_count++;
2059 if (i > 0 && SYNTAX (prev_c) == Sword)
2060 word_count++;
2061 }
ca1d1d23 2062
0c8533c6
RS
2063 prev_c = c;
2064 }
2065
2066 if (SYNTAX (prev_c) == Sword)
2067 word_count++;
2068 if (!word_count)
b07b65aa 2069 return empty_string;
0c8533c6
RS
2070
2071 adjust = - punct_count + 5 * (word_count - 1) + 4;
8a2df937
RS
2072 if (STRING_MULTIBYTE (string))
2073 val = make_uninit_multibyte_string (len + adjust,
d5db4077 2074 SBYTES (string)
8a2df937
RS
2075 + adjust);
2076 else
2077 val = make_uninit_string (len + adjust);
ca1d1d23 2078
d5db4077 2079 o = SDATA (val);
ca1d1d23
JB
2080 *o++ = '\\';
2081 *o++ = 'b';
1e9582d4 2082 prev_c = 0;
ca1d1d23 2083
1e9582d4
RS
2084 for (i = 0, i_byte = 0; i < len; )
2085 {
2086 int c;
2087 int i_byte_orig = i_byte;
177c0ea7 2088
eb99a8dd 2089 FETCH_STRING_CHAR_ADVANCE (c, string, i, i_byte);
1e9582d4
RS
2090
2091 if (SYNTAX (c) == Sword)
2092 {
5d69fe10 2093 bcopy (SDATA (string) + i_byte_orig, o,
1e9582d4
RS
2094 i_byte - i_byte_orig);
2095 o += i_byte - i_byte_orig;
2096 }
2097 else if (i > 0 && SYNTAX (prev_c) == Sword && --word_count)
2098 {
2099 *o++ = '\\';
2100 *o++ = 'W';
2101 *o++ = '\\';
2102 *o++ = 'W';
2103 *o++ = '*';
2104 }
2105
2106 prev_c = c;
2107 }
ca1d1d23
JB
2108
2109 *o++ = '\\';
2110 *o++ = 'b';
2111
2112 return val;
2113}
2114\f
2115DEFUN ("search-backward", Fsearch_backward, Ssearch_backward, 1, 4,
8c1a1077
PJ
2116 "MSearch backward: ",
2117 doc: /* Search backward from point for STRING.
2118Set point to the beginning of the occurrence found, and return point.
2119An optional second argument bounds the search; it is a buffer position.
2120The match found must not extend before that position.
2121Optional third argument, if t, means if fail just return nil (no error).
2122 If not nil and not t, position at limit of search and return nil.
2123Optional fourth argument is repeat count--search for successive occurrences.
2124
2125Search case-sensitivity is determined by the value of the variable
2126`case-fold-search', which see.
2127
2128See also the functions `match-beginning', `match-end' and `replace-match'. */)
2129 (string, bound, noerror, count)
ca1d1d23
JB
2130 Lisp_Object string, bound, noerror, count;
2131{
b819a390 2132 return search_command (string, bound, noerror, count, -1, 0, 0);
ca1d1d23
JB
2133}
2134
6af43974 2135DEFUN ("search-forward", Fsearch_forward, Ssearch_forward, 1, 4, "MSearch: ",
8c1a1077
PJ
2136 doc: /* Search forward from point for STRING.
2137Set point to the end of the occurrence found, and return point.
2138An optional second argument bounds the search; it is a buffer position.
2139The match found must not extend after that position. nil is equivalent
2140 to (point-max).
2141Optional third argument, if t, means if fail just return nil (no error).
2142 If not nil and not t, move to limit of search and return nil.
2143Optional fourth argument is repeat count--search for successive occurrences.
2144
2145Search case-sensitivity is determined by the value of the variable
2146`case-fold-search', which see.
2147
2148See also the functions `match-beginning', `match-end' and `replace-match'. */)
2149 (string, bound, noerror, count)
ca1d1d23
JB
2150 Lisp_Object string, bound, noerror, count;
2151{
b819a390 2152 return search_command (string, bound, noerror, count, 1, 0, 0);
ca1d1d23
JB
2153}
2154
2155DEFUN ("word-search-backward", Fword_search_backward, Sword_search_backward, 1, 4,
8c1a1077
PJ
2156 "sWord search backward: ",
2157 doc: /* Search backward from point for STRING, ignoring differences in punctuation.
2158Set point to the beginning of the occurrence found, and return point.
2159An optional second argument bounds the search; it is a buffer position.
2160The match found must not extend before that position.
2161Optional third argument, if t, means if fail just return nil (no error).
2162 If not nil and not t, move to limit of search and return nil.
2163Optional fourth argument is repeat count--search for successive occurrences. */)
2164 (string, bound, noerror, count)
ca1d1d23
JB
2165 Lisp_Object string, bound, noerror, count;
2166{
b819a390 2167 return search_command (wordify (string), bound, noerror, count, -1, 1, 0);
ca1d1d23
JB
2168}
2169
2170DEFUN ("word-search-forward", Fword_search_forward, Sword_search_forward, 1, 4,
8c1a1077
PJ
2171 "sWord search: ",
2172 doc: /* Search forward from point for STRING, ignoring differences in punctuation.
2173Set point to the end of the occurrence found, and return point.
2174An optional second argument bounds the search; it is a buffer position.
2175The match found must not extend after that position.
2176Optional third argument, if t, means if fail just return nil (no error).
2177 If not nil and not t, move to limit of search and return nil.
2178Optional fourth argument is repeat count--search for successive occurrences. */)
2179 (string, bound, noerror, count)
ca1d1d23
JB
2180 Lisp_Object string, bound, noerror, count;
2181{
b819a390 2182 return search_command (wordify (string), bound, noerror, count, 1, 1, 0);
ca1d1d23
JB
2183}
2184
2185DEFUN ("re-search-backward", Fre_search_backward, Sre_search_backward, 1, 4,
8c1a1077
PJ
2186 "sRE search backward: ",
2187 doc: /* Search backward from point for match for regular expression REGEXP.
2188Set point to the beginning of the match, and return point.
2189The match found is the one starting last in the buffer
2190and yet ending before the origin of the search.
2191An optional second argument bounds the search; it is a buffer position.
2192The match found must start at or after that position.
2193Optional third argument, if t, means if fail just return nil (no error).
2194 If not nil and not t, move to limit of search and return nil.
2195Optional fourth argument is repeat count--search for successive occurrences.
2196See also the functions `match-beginning', `match-end', `match-string',
2197and `replace-match'. */)
2198 (regexp, bound, noerror, count)
19c0a730 2199 Lisp_Object regexp, bound, noerror, count;
ca1d1d23 2200{
b819a390 2201 return search_command (regexp, bound, noerror, count, -1, 1, 0);
ca1d1d23
JB
2202}
2203
2204DEFUN ("re-search-forward", Fre_search_forward, Sre_search_forward, 1, 4,
8c1a1077
PJ
2205 "sRE search: ",
2206 doc: /* Search forward from point for regular expression REGEXP.
2207Set point to the end of the occurrence found, and return point.
2208An optional second argument bounds the search; it is a buffer position.
2209The match found must not extend after that position.
2210Optional third argument, if t, means if fail just return nil (no error).
2211 If not nil and not t, move to limit of search and return nil.
2212Optional fourth argument is repeat count--search for successive occurrences.
2213See also the functions `match-beginning', `match-end', `match-string',
2214and `replace-match'. */)
2215 (regexp, bound, noerror, count)
19c0a730 2216 Lisp_Object regexp, bound, noerror, count;
ca1d1d23 2217{
b819a390
RS
2218 return search_command (regexp, bound, noerror, count, 1, 1, 0);
2219}
2220
2221DEFUN ("posix-search-backward", Fposix_search_backward, Sposix_search_backward, 1, 4,
8c1a1077
PJ
2222 "sPosix search backward: ",
2223 doc: /* Search backward from point for match for regular expression REGEXP.
2224Find the longest match in accord with Posix regular expression rules.
2225Set point to the beginning of the match, and return point.
2226The match found is the one starting last in the buffer
2227and yet ending before the origin of the search.
2228An optional second argument bounds the search; it is a buffer position.
2229The match found must start at or after that position.
2230Optional third argument, if t, means if fail just return nil (no error).
2231 If not nil and not t, move to limit of search and return nil.
2232Optional fourth argument is repeat count--search for successive occurrences.
2233See also the functions `match-beginning', `match-end', `match-string',
2234and `replace-match'. */)
2235 (regexp, bound, noerror, count)
b819a390
RS
2236 Lisp_Object regexp, bound, noerror, count;
2237{
2238 return search_command (regexp, bound, noerror, count, -1, 1, 1);
2239}
2240
2241DEFUN ("posix-search-forward", Fposix_search_forward, Sposix_search_forward, 1, 4,
8c1a1077
PJ
2242 "sPosix search: ",
2243 doc: /* Search forward from point for regular expression REGEXP.
2244Find the longest match in accord with Posix regular expression rules.
2245Set point to the end of the occurrence found, and return point.
2246An optional second argument bounds the search; it is a buffer position.
2247The match found must not extend after that position.
2248Optional third argument, if t, means if fail just return nil (no error).
2249 If not nil and not t, move to limit of search and return nil.
2250Optional fourth argument is repeat count--search for successive occurrences.
2251See also the functions `match-beginning', `match-end', `match-string',
2252and `replace-match'. */)
2253 (regexp, bound, noerror, count)
b819a390
RS
2254 Lisp_Object regexp, bound, noerror, count;
2255{
2256 return search_command (regexp, bound, noerror, count, 1, 1, 1);
ca1d1d23
JB
2257}
2258\f
d7a5ad5f 2259DEFUN ("replace-match", Freplace_match, Sreplace_match, 1, 5, 0,
8c1a1077 2260 doc: /* Replace text matched by last search with NEWTEXT.
4dd0c271
RS
2261Leave point at the end of the replacement text.
2262
8c1a1077
PJ
2263If second arg FIXEDCASE is non-nil, do not alter case of replacement text.
2264Otherwise maybe capitalize the whole text, or maybe just word initials,
2265based on the replaced text.
2266If the replaced text has only capital letters
2267and has at least one multiletter word, convert NEWTEXT to all caps.
4dd0c271
RS
2268Otherwise if all words are capitalized in the replaced text,
2269capitalize each word in NEWTEXT.
8c1a1077
PJ
2270
2271If third arg LITERAL is non-nil, insert NEWTEXT literally.
2272Otherwise treat `\\' as special:
2273 `\\&' in NEWTEXT means substitute original matched text.
2274 `\\N' means substitute what matched the Nth `\\(...\\)'.
2275 If Nth parens didn't match, substitute nothing.
2276 `\\\\' means insert one `\\'.
4dd0c271
RS
2277Case conversion does not apply to these substitutions.
2278
8c1a1077 2279FIXEDCASE and LITERAL are optional arguments.
8c1a1077
PJ
2280
2281The optional fourth argument STRING can be a string to modify.
2282This is meaningful when the previous match was done against STRING,
2283using `string-match'. When used this way, `replace-match'
2284creates and returns a new string made by copying STRING and replacing
2285the part of STRING that was matched.
2286
2287The optional fifth argument SUBEXP specifies a subexpression;
2288it says to replace just that subexpression with NEWTEXT,
2289rather than replacing the entire matched text.
2290This is, in a vague sense, the inverse of using `\\N' in NEWTEXT;
2291`\\N' copies subexp N into NEWTEXT, but using N as SUBEXP puts
2292NEWTEXT in place of subexp N.
2293This is useful only after a regular expression search or match,
2294since only regular expressions have distinguished subexpressions. */)
2295 (newtext, fixedcase, literal, string, subexp)
d7a5ad5f 2296 Lisp_Object newtext, fixedcase, literal, string, subexp;
ca1d1d23
JB
2297{
2298 enum { nochange, all_caps, cap_initial } case_action;
ac3b28b1 2299 register int pos, pos_byte;
ca1d1d23 2300 int some_multiletter_word;
97832bd0 2301 int some_lowercase;
73dc8771 2302 int some_uppercase;
208767c3 2303 int some_nonuppercase_initial;
ca1d1d23 2304 register int c, prevc;
d7a5ad5f 2305 int sub;
3e18eecf 2306 int opoint, newpoint;
ca1d1d23 2307
b7826503 2308 CHECK_STRING (newtext);
ca1d1d23 2309
080c45fd 2310 if (! NILP (string))
b7826503 2311 CHECK_STRING (string);
080c45fd 2312
ca1d1d23
JB
2313 case_action = nochange; /* We tried an initialization */
2314 /* but some C compilers blew it */
4746118a
JB
2315
2316 if (search_regs.num_regs <= 0)
d72cdbfc 2317 error ("`replace-match' called before any match found");
4746118a 2318
d7a5ad5f
RS
2319 if (NILP (subexp))
2320 sub = 0;
2321 else
2322 {
b7826503 2323 CHECK_NUMBER (subexp);
d7a5ad5f
RS
2324 sub = XINT (subexp);
2325 if (sub < 0 || sub >= search_regs.num_regs)
2326 args_out_of_range (subexp, make_number (search_regs.num_regs));
2327 }
2328
080c45fd
RS
2329 if (NILP (string))
2330 {
d7a5ad5f
RS
2331 if (search_regs.start[sub] < BEGV
2332 || search_regs.start[sub] > search_regs.end[sub]
2333 || search_regs.end[sub] > ZV)
2334 args_out_of_range (make_number (search_regs.start[sub]),
2335 make_number (search_regs.end[sub]));
080c45fd
RS
2336 }
2337 else
2338 {
d7a5ad5f
RS
2339 if (search_regs.start[sub] < 0
2340 || search_regs.start[sub] > search_regs.end[sub]
d5db4077 2341 || search_regs.end[sub] > SCHARS (string))
d7a5ad5f
RS
2342 args_out_of_range (make_number (search_regs.start[sub]),
2343 make_number (search_regs.end[sub]));
080c45fd 2344 }
ca1d1d23
JB
2345
2346 if (NILP (fixedcase))
2347 {
2348 /* Decide how to casify by examining the matched text. */
ac3b28b1 2349 int last;
ca1d1d23 2350
ac3b28b1
KH
2351 pos = search_regs.start[sub];
2352 last = search_regs.end[sub];
fa8ed3e0
RS
2353
2354 if (NILP (string))
ac3b28b1 2355 pos_byte = CHAR_TO_BYTE (pos);
fa8ed3e0 2356 else
ac3b28b1 2357 pos_byte = string_char_to_byte (string, pos);
fa8ed3e0 2358
ca1d1d23
JB
2359 prevc = '\n';
2360 case_action = all_caps;
2361
2362 /* some_multiletter_word is set nonzero if any original word
2363 is more than one letter long. */
2364 some_multiletter_word = 0;
97832bd0 2365 some_lowercase = 0;
208767c3 2366 some_nonuppercase_initial = 0;
73dc8771 2367 some_uppercase = 0;
ca1d1d23 2368
ac3b28b1 2369 while (pos < last)
ca1d1d23 2370 {
080c45fd 2371 if (NILP (string))
ac3b28b1
KH
2372 {
2373 c = FETCH_CHAR (pos_byte);
2374 INC_BOTH (pos, pos_byte);
2375 }
080c45fd 2376 else
ac3b28b1 2377 FETCH_STRING_CHAR_ADVANCE (c, string, pos, pos_byte);
080c45fd 2378
ca1d1d23
JB
2379 if (LOWERCASEP (c))
2380 {
2381 /* Cannot be all caps if any original char is lower case */
2382
97832bd0 2383 some_lowercase = 1;
ca1d1d23 2384 if (SYNTAX (prevc) != Sword)
208767c3 2385 some_nonuppercase_initial = 1;
ca1d1d23
JB
2386 else
2387 some_multiletter_word = 1;
2388 }
d16c2b66 2389 else if (UPPERCASEP (c))
ca1d1d23 2390 {
73dc8771 2391 some_uppercase = 1;
97832bd0 2392 if (SYNTAX (prevc) != Sword)
c4d460ce 2393 ;
97832bd0 2394 else
ca1d1d23
JB
2395 some_multiletter_word = 1;
2396 }
208767c3
RS
2397 else
2398 {
2399 /* If the initial is a caseless word constituent,
2400 treat that like a lowercase initial. */
2401 if (SYNTAX (prevc) != Sword)
2402 some_nonuppercase_initial = 1;
2403 }
ca1d1d23
JB
2404
2405 prevc = c;
2406 }
2407
97832bd0
RS
2408 /* Convert to all caps if the old text is all caps
2409 and has at least one multiletter word. */
2410 if (! some_lowercase && some_multiletter_word)
2411 case_action = all_caps;
c4d460ce 2412 /* Capitalize each word, if the old text has all capitalized words. */
208767c3 2413 else if (!some_nonuppercase_initial && some_multiletter_word)
ca1d1d23 2414 case_action = cap_initial;
208767c3 2415 else if (!some_nonuppercase_initial && some_uppercase)
73dc8771
KH
2416 /* Should x -> yz, operating on X, give Yz or YZ?
2417 We'll assume the latter. */
2418 case_action = all_caps;
97832bd0
RS
2419 else
2420 case_action = nochange;
ca1d1d23
JB
2421 }
2422
080c45fd
RS
2423 /* Do replacement in a string. */
2424 if (!NILP (string))
2425 {
2426 Lisp_Object before, after;
2427
2428 before = Fsubstring (string, make_number (0),
d7a5ad5f
RS
2429 make_number (search_regs.start[sub]));
2430 after = Fsubstring (string, make_number (search_regs.end[sub]), Qnil);
080c45fd 2431
636a5e28
RS
2432 /* Substitute parts of the match into NEWTEXT
2433 if desired. */
080c45fd
RS
2434 if (NILP (literal))
2435 {
d131e79c
RS
2436 int lastpos = 0;
2437 int lastpos_byte = 0;
080c45fd
RS
2438 /* We build up the substituted string in ACCUM. */
2439 Lisp_Object accum;
2440 Lisp_Object middle;
d5db4077 2441 int length = SBYTES (newtext);
080c45fd
RS
2442
2443 accum = Qnil;
2444
ac3b28b1 2445 for (pos_byte = 0, pos = 0; pos_byte < length;)
080c45fd
RS
2446 {
2447 int substart = -1;
6bbd7a29 2448 int subend = 0;
1e79ec24 2449 int delbackslash = 0;
080c45fd 2450
0c8533c6
RS
2451 FETCH_STRING_CHAR_ADVANCE (c, newtext, pos, pos_byte);
2452
080c45fd
RS
2453 if (c == '\\')
2454 {
0c8533c6 2455 FETCH_STRING_CHAR_ADVANCE (c, newtext, pos, pos_byte);
177c0ea7 2456
080c45fd
RS
2457 if (c == '&')
2458 {
d7a5ad5f
RS
2459 substart = search_regs.start[sub];
2460 subend = search_regs.end[sub];
080c45fd 2461 }
5fbbc83b 2462 else if (c >= '1' && c <= '9')
080c45fd 2463 {
5fbbc83b
RS
2464 if (search_regs.start[c - '0'] >= 0
2465 && c <= search_regs.num_regs + '0')
080c45fd
RS
2466 {
2467 substart = search_regs.start[c - '0'];
2468 subend = search_regs.end[c - '0'];
2469 }
5fbbc83b
RS
2470 else
2471 {
2472 /* If that subexp did not match,
2473 replace \\N with nothing. */
2474 substart = 0;
2475 subend = 0;
2476 }
080c45fd 2477 }
1e79ec24
KH
2478 else if (c == '\\')
2479 delbackslash = 1;
636a5e28
RS
2480 else
2481 error ("Invalid use of `\\' in replacement text");
080c45fd
RS
2482 }
2483 if (substart >= 0)
2484 {
d131e79c
RS
2485 if (pos - 2 != lastpos)
2486 middle = substring_both (newtext, lastpos,
2487 lastpos_byte,
2488 pos - 2, pos_byte - 2);
080c45fd
RS
2489 else
2490 middle = Qnil;
2491 accum = concat3 (accum, middle,
0c8533c6
RS
2492 Fsubstring (string,
2493 make_number (substart),
080c45fd
RS
2494 make_number (subend)));
2495 lastpos = pos;
0c8533c6 2496 lastpos_byte = pos_byte;
080c45fd 2497 }
1e79ec24
KH
2498 else if (delbackslash)
2499 {
d131e79c
RS
2500 middle = substring_both (newtext, lastpos,
2501 lastpos_byte,
2502 pos - 1, pos_byte - 1);
0c8533c6 2503
1e79ec24
KH
2504 accum = concat2 (accum, middle);
2505 lastpos = pos;
0c8533c6 2506 lastpos_byte = pos_byte;
1e79ec24 2507 }
080c45fd
RS
2508 }
2509
d131e79c
RS
2510 if (pos != lastpos)
2511 middle = substring_both (newtext, lastpos,
2512 lastpos_byte,
0c8533c6 2513 pos, pos_byte);
080c45fd
RS
2514 else
2515 middle = Qnil;
2516
2517 newtext = concat2 (accum, middle);
2518 }
2519
636a5e28 2520 /* Do case substitution in NEWTEXT if desired. */
080c45fd
RS
2521 if (case_action == all_caps)
2522 newtext = Fupcase (newtext);
2523 else if (case_action == cap_initial)
2b2eead9 2524 newtext = Fupcase_initials (newtext);
080c45fd
RS
2525
2526 return concat3 (before, newtext, after);
2527 }
2528
09c4719e 2529 /* Record point, then move (quietly) to the start of the match. */
9160906f 2530 if (PT >= search_regs.end[sub])
b0eba991 2531 opoint = PT - ZV;
9160906f
RS
2532 else if (PT > search_regs.start[sub])
2533 opoint = search_regs.end[sub] - ZV;
b0eba991
RS
2534 else
2535 opoint = PT;
2536
886ed6ec
RS
2537 /* If we want non-literal replacement,
2538 perform substitution on the replacement string. */
2539 if (NILP (literal))
ca1d1d23 2540 {
d5db4077 2541 int length = SBYTES (newtext);
68e69fbd
RS
2542 unsigned char *substed;
2543 int substed_alloc_size, substed_len;
3bc25e52
KH
2544 int buf_multibyte = !NILP (current_buffer->enable_multibyte_characters);
2545 int str_multibyte = STRING_MULTIBYTE (newtext);
2546 Lisp_Object rev_tbl;
886ed6ec 2547 int really_changed = 0;
3bc25e52
KH
2548
2549 rev_tbl= (!buf_multibyte && CHAR_TABLE_P (Vnonascii_translation_table)
2550 ? Fchar_table_extra_slot (Vnonascii_translation_table,
2551 make_number (0))
2552 : Qnil);
ac3b28b1 2553
68e69fbd
RS
2554 substed_alloc_size = length * 2 + 100;
2555 substed = (unsigned char *) xmalloc (substed_alloc_size + 1);
2556 substed_len = 0;
2557
3bc25e52
KH
2558 /* Go thru NEWTEXT, producing the actual text to insert in
2559 SUBSTED while adjusting multibyteness to that of the current
2560 buffer. */
ca1d1d23 2561
ac3b28b1 2562 for (pos_byte = 0, pos = 0; pos_byte < length;)
ca1d1d23 2563 {
68e69fbd 2564 unsigned char str[MAX_MULTIBYTE_LENGTH];
f8ce8a0d
GM
2565 unsigned char *add_stuff = NULL;
2566 int add_len = 0;
68e69fbd 2567 int idx = -1;
9a76659d 2568
3bc25e52
KH
2569 if (str_multibyte)
2570 {
eb99a8dd 2571 FETCH_STRING_CHAR_ADVANCE_NO_CHECK (c, newtext, pos, pos_byte);
3bc25e52
KH
2572 if (!buf_multibyte)
2573 c = multibyte_char_to_unibyte (c, rev_tbl);
2574 }
2575 else
2576 {
2577 /* Note that we don't have to increment POS. */
5d69fe10 2578 c = SREF (newtext, pos_byte++);
3bc25e52
KH
2579 if (buf_multibyte)
2580 c = unibyte_char_to_multibyte (c);
2581 }
ac3b28b1 2582
68e69fbd
RS
2583 /* Either set ADD_STUFF and ADD_LEN to the text to put in SUBSTED,
2584 or set IDX to a match index, which means put that part
2585 of the buffer text into SUBSTED. */
2586
ca1d1d23
JB
2587 if (c == '\\')
2588 {
886ed6ec
RS
2589 really_changed = 1;
2590
3bc25e52
KH
2591 if (str_multibyte)
2592 {
eb99a8dd
KH
2593 FETCH_STRING_CHAR_ADVANCE_NO_CHECK (c, newtext,
2594 pos, pos_byte);
3bc25e52
KH
2595 if (!buf_multibyte && !SINGLE_BYTE_CHAR_P (c))
2596 c = multibyte_char_to_unibyte (c, rev_tbl);
2597 }
2598 else
2599 {
d5db4077 2600 c = SREF (newtext, pos_byte++);
3bc25e52
KH
2601 if (buf_multibyte)
2602 c = unibyte_char_to_multibyte (c);
2603 }
2604
ca1d1d23 2605 if (c == '&')
68e69fbd 2606 idx = sub;
78445046 2607 else if (c >= '1' && c <= '9' && c <= search_regs.num_regs + '0')
ca1d1d23
JB
2608 {
2609 if (search_regs.start[c - '0'] >= 1)
68e69fbd 2610 idx = c - '0';
ca1d1d23 2611 }
636a5e28 2612 else if (c == '\\')
68e69fbd 2613 add_len = 1, add_stuff = "\\";
636a5e28 2614 else
3bc25e52
KH
2615 {
2616 xfree (substed);
2617 error ("Invalid use of `\\' in replacement text");
2618 }
ca1d1d23
JB
2619 }
2620 else
68e69fbd
RS
2621 {
2622 add_len = CHAR_STRING (c, str);
2623 add_stuff = str;
2624 }
2625
2626 /* If we want to copy part of a previous match,
2627 set up ADD_STUFF and ADD_LEN to point to it. */
2628 if (idx >= 0)
2629 {
2630 int begbyte = CHAR_TO_BYTE (search_regs.start[idx]);
2631 add_len = CHAR_TO_BYTE (search_regs.end[idx]) - begbyte;
2632 if (search_regs.start[idx] < GPT && GPT < search_regs.end[idx])
2633 move_gap (search_regs.start[idx]);
2634 add_stuff = BYTE_POS_ADDR (begbyte);
2635 }
2636
2637 /* Now the stuff we want to add to SUBSTED
2638 is invariably ADD_LEN bytes starting at ADD_STUFF. */
2639
2640 /* Make sure SUBSTED is big enough. */
2641 if (substed_len + add_len >= substed_alloc_size)
2642 {
2643 substed_alloc_size = substed_len + add_len + 500;
2644 substed = (unsigned char *) xrealloc (substed,
2645 substed_alloc_size + 1);
2646 }
2647
2648 /* Now add to the end of SUBSTED. */
f8ce8a0d
GM
2649 if (add_stuff)
2650 {
2651 bcopy (add_stuff, substed + substed_len, add_len);
2652 substed_len += add_len;
2653 }
ca1d1d23 2654 }
68e69fbd 2655
886ed6ec 2656 if (really_changed)
80460525
KH
2657 {
2658 if (buf_multibyte)
2659 {
2660 int nchars = multibyte_chars_in_text (substed, substed_len);
68e69fbd 2661
80460525
KH
2662 newtext = make_multibyte_string (substed, nchars, substed_len);
2663 }
2664 else
2665 newtext = make_unibyte_string (substed, substed_len);
2666 }
68e69fbd 2667 xfree (substed);
ca1d1d23
JB
2668 }
2669
886ed6ec
RS
2670 /* Replace the old text with the new in the cleanest possible way. */
2671 replace_range (search_regs.start[sub], search_regs.end[sub],
2672 newtext, 1, 0, 1);
d5db4077 2673 newpoint = search_regs.start[sub] + SCHARS (newtext);
ca1d1d23
JB
2674
2675 if (case_action == all_caps)
886ed6ec
RS
2676 Fupcase_region (make_number (search_regs.start[sub]),
2677 make_number (newpoint));
ca1d1d23 2678 else if (case_action == cap_initial)
886ed6ec
RS
2679 Fupcase_initials_region (make_number (search_regs.start[sub]),
2680 make_number (newpoint));
3e18eecf 2681
98e942e0
RS
2682 /* Adjust search data for this change. */
2683 {
5b88a2c5 2684 int oldend = search_regs.end[sub];
41c01205 2685 int oldstart = search_regs.start[sub];
98e942e0
RS
2686 int change = newpoint - search_regs.end[sub];
2687 int i;
2688
2689 for (i = 0; i < search_regs.num_regs; i++)
2690 {
41c01205 2691 if (search_regs.start[i] >= oldend)
98e942e0 2692 search_regs.start[i] += change;
41c01205
DK
2693 else if (search_regs.start[i] > oldstart)
2694 search_regs.start[i] = oldstart;
2695 if (search_regs.end[i] >= oldend)
98e942e0 2696 search_regs.end[i] += change;
41c01205
DK
2697 else if (search_regs.end[i] > oldstart)
2698 search_regs.end[i] = oldstart;
98e942e0
RS
2699 }
2700 }
2701
b0eba991 2702 /* Put point back where it was in the text. */
8d808a65 2703 if (opoint <= 0)
fa8ed3e0 2704 TEMP_SET_PT (opoint + ZV);
b0eba991 2705 else
fa8ed3e0 2706 TEMP_SET_PT (opoint);
b0eba991
RS
2707
2708 /* Now move point "officially" to the start of the inserted replacement. */
3e18eecf 2709 move_if_not_intangible (newpoint);
177c0ea7 2710
ca1d1d23
JB
2711 return Qnil;
2712}
2713\f
2714static Lisp_Object
2715match_limit (num, beginningp)
2716 Lisp_Object num;
2717 int beginningp;
2718{
2719 register int n;
2720
b7826503 2721 CHECK_NUMBER (num);
ca1d1d23 2722 n = XINT (num);
f90a5bf5 2723 if (n < 0)
bd2cbd56 2724 args_out_of_range (num, make_number (0));
f90a5bf5
RS
2725 if (search_regs.num_regs <= 0)
2726 error ("No match data, because no search succeeded");
9b9ceb61 2727 if (n >= search_regs.num_regs
4746118a 2728 || search_regs.start[n] < 0)
ca1d1d23
JB
2729 return Qnil;
2730 return (make_number ((beginningp) ? search_regs.start[n]
2731 : search_regs.end[n]));
2732}
2733
2734DEFUN ("match-beginning", Fmatch_beginning, Smatch_beginning, 1, 1, 0,
8c1a1077
PJ
2735 doc: /* Return position of start of text matched by last search.
2736SUBEXP, a number, specifies which parenthesized expression in the last
2737 regexp.
2738Value is nil if SUBEXPth pair didn't match, or there were less than
2739 SUBEXP pairs.
2740Zero means the entire text matched by the whole regexp or whole string. */)
2741 (subexp)
5806161b 2742 Lisp_Object subexp;
ca1d1d23 2743{
5806161b 2744 return match_limit (subexp, 1);
ca1d1d23
JB
2745}
2746
2747DEFUN ("match-end", Fmatch_end, Smatch_end, 1, 1, 0,
8c1a1077
PJ
2748 doc: /* Return position of end of text matched by last search.
2749SUBEXP, a number, specifies which parenthesized expression in the last
2750 regexp.
2751Value is nil if SUBEXPth pair didn't match, or there were less than
2752 SUBEXP pairs.
2753Zero means the entire text matched by the whole regexp or whole string. */)
2754 (subexp)
5806161b 2755 Lisp_Object subexp;
ca1d1d23 2756{
5806161b 2757 return match_limit (subexp, 0);
177c0ea7 2758}
ca1d1d23 2759
abd0071c 2760DEFUN ("match-data", Fmatch_data, Smatch_data, 0, 3, 0,
8c1a1077
PJ
2761 doc: /* Return a list containing all info on what the last search matched.
2762Element 2N is `(match-beginning N)'; element 2N + 1 is `(match-end N)'.
2763All the elements are markers or nil (nil if the Nth pair didn't match)
2764if the last match was on a buffer; integers or nil if a string was matched.
2765Use `store-match-data' to reinstate the data in this list.
2766
41c01205
DK
2767If INTEGERS (the optional first argument) is non-nil, always use
2768integers \(rather than markers) to represent buffer positions. In
2769this case, and if the last match was in a buffer, the buffer will get
2770stored as one additional element at the end of the list.
2771
abd0071c
KS
2772If REUSE is a list, reuse it as part of the value. If REUSE is long
2773enough to hold all the values, and if INTEGERS is non-nil, no consing
2774is done.
2775
2776If optional third arg RESEAT is non-nil, any previous markers on the
2777REUSE list will be modified to point to nowhere.
2778
140a6b7e 2779Return value is undefined if the last search failed. */)
abd0071c
KS
2780 (integers, reuse, reseat)
2781 Lisp_Object integers, reuse, reseat;
ca1d1d23 2782{
56256c2a 2783 Lisp_Object tail, prev;
4746118a 2784 Lisp_Object *data;
ca1d1d23
JB
2785 int i, len;
2786
abd0071c
KS
2787 if (!NILP (reseat))
2788 for (tail = reuse; CONSP (tail); tail = XCDR (tail))
2789 if (MARKERP (XCAR (tail)))
2790 {
51f10faa 2791 unchain_marker (XMARKER (XCAR (tail)));
abd0071c
KS
2792 XSETCAR (tail, Qnil);
2793 }
2794
daa37602 2795 if (NILP (last_thing_searched))
c36bcf1b 2796 return Qnil;
daa37602 2797
6bbd7a29
GM
2798 prev = Qnil;
2799
41c01205 2800 data = (Lisp_Object *) alloca ((2 * search_regs.num_regs + 1)
4746118a
JB
2801 * sizeof (Lisp_Object));
2802
41c01205 2803 len = 0;
4746118a 2804 for (i = 0; i < search_regs.num_regs; i++)
ca1d1d23
JB
2805 {
2806 int start = search_regs.start[i];
2807 if (start >= 0)
2808 {
56256c2a
RS
2809 if (EQ (last_thing_searched, Qt)
2810 || ! NILP (integers))
ca1d1d23 2811 {
c235cce7
KH
2812 XSETFASTINT (data[2 * i], start);
2813 XSETFASTINT (data[2 * i + 1], search_regs.end[i]);
ca1d1d23 2814 }
0ed62dc7 2815 else if (BUFFERP (last_thing_searched))
ca1d1d23
JB
2816 {
2817 data[2 * i] = Fmake_marker ();
daa37602
JB
2818 Fset_marker (data[2 * i],
2819 make_number (start),
2820 last_thing_searched);
ca1d1d23
JB
2821 data[2 * i + 1] = Fmake_marker ();
2822 Fset_marker (data[2 * i + 1],
177c0ea7 2823 make_number (search_regs.end[i]),
daa37602 2824 last_thing_searched);
ca1d1d23 2825 }
daa37602
JB
2826 else
2827 /* last_thing_searched must always be Qt, a buffer, or Qnil. */
2828 abort ();
2829
abd0071c 2830 len = 2 * i + 2;
ca1d1d23
JB
2831 }
2832 else
abd0071c 2833 data[2 * i] = data[2 * i + 1] = Qnil;
ca1d1d23 2834 }
56256c2a 2835
bd2cbd56 2836 if (BUFFERP (last_thing_searched) && !NILP (integers))
41c01205 2837 {
bd2cbd56 2838 data[len] = last_thing_searched;
41c01205
DK
2839 len++;
2840 }
2841
56256c2a
RS
2842 /* If REUSE is not usable, cons up the values and return them. */
2843 if (! CONSP (reuse))
41c01205 2844 return Flist (len, data);
56256c2a
RS
2845
2846 /* If REUSE is a list, store as many value elements as will fit
2847 into the elements of REUSE. */
2848 for (i = 0, tail = reuse; CONSP (tail);
c1d497be 2849 i++, tail = XCDR (tail))
56256c2a 2850 {
41c01205 2851 if (i < len)
f3fbd155 2852 XSETCAR (tail, data[i]);
56256c2a 2853 else
f3fbd155 2854 XSETCAR (tail, Qnil);
56256c2a
RS
2855 prev = tail;
2856 }
2857
2858 /* If we couldn't fit all value elements into REUSE,
2859 cons up the rest of them and add them to the end of REUSE. */
41c01205
DK
2860 if (i < len)
2861 XSETCDR (prev, Flist (len - i, data + i));
56256c2a
RS
2862
2863 return reuse;
ca1d1d23
JB
2864}
2865
51f10faa
KS
2866/* Internal usage only:
2867 If RESEAT is `evaporate', put the markers back on the free list
2868 immediately. No other references to the markers must exist in this case,
2869 so it is used only internally on the unwind stack and save-match-data from
2870 Lisp. */
ca1d1d23 2871
abd0071c 2872DEFUN ("set-match-data", Fset_match_data, Sset_match_data, 1, 2, 0,
8c1a1077 2873 doc: /* Set internal data on last search match from elements of LIST.
abd0071c
KS
2874LIST should have been created by calling `match-data' previously.
2875
51f10faa 2876If optional arg RESEAT is non-nil, make markers on LIST point nowhere. */)
abd0071c
KS
2877 (list, reseat)
2878 register Lisp_Object list, reseat;
ca1d1d23
JB
2879{
2880 register int i;
2881 register Lisp_Object marker;
2882
7074fde6
FP
2883 if (running_asynch_code)
2884 save_search_regs ();
2885
29100cea 2886 CHECK_LIST (list);
ca1d1d23 2887
41c01205
DK
2888 /* Unless we find a marker with a buffer or an explicit buffer
2889 in LIST, assume that this match data came from a string. */
daa37602
JB
2890 last_thing_searched = Qt;
2891
4746118a
JB
2892 /* Allocate registers if they don't already exist. */
2893 {
d084e942 2894 int length = XFASTINT (Flength (list)) / 2;
4746118a
JB
2895
2896 if (length > search_regs.num_regs)
2897 {
1113d9db
JB
2898 if (search_regs.num_regs == 0)
2899 {
2900 search_regs.start
2901 = (regoff_t *) xmalloc (length * sizeof (regoff_t));
2902 search_regs.end
2903 = (regoff_t *) xmalloc (length * sizeof (regoff_t));
2904 }
4746118a 2905 else
1113d9db
JB
2906 {
2907 search_regs.start
2908 = (regoff_t *) xrealloc (search_regs.start,
2909 length * sizeof (regoff_t));
2910 search_regs.end
2911 = (regoff_t *) xrealloc (search_regs.end,
2912 length * sizeof (regoff_t));
2913 }
4746118a 2914
e62371e9
KH
2915 for (i = search_regs.num_regs; i < length; i++)
2916 search_regs.start[i] = -1;
2917
487282dc 2918 search_regs.num_regs = length;
4746118a 2919 }
ca1d1d23 2920
abd0071c 2921 for (i = 0; CONSP (list); i++)
41c01205 2922 {
abd0071c 2923 marker = XCAR (list);
bd2cbd56 2924 if (BUFFERP (marker))
c3762cbd 2925 {
bd2cbd56 2926 last_thing_searched = marker;
c3762cbd
DK
2927 break;
2928 }
2929 if (i >= length)
2930 break;
41c01205
DK
2931 if (NILP (marker))
2932 {
2933 search_regs.start[i] = -1;
abd0071c 2934 list = XCDR (list);
41c01205
DK
2935 }
2936 else
2937 {
2938 int from;
abd0071c 2939 Lisp_Object m;
e2811828 2940
abd0071c 2941 m = marker;
41c01205
DK
2942 if (MARKERP (marker))
2943 {
2944 if (XMARKER (marker)->buffer == 0)
2945 XSETFASTINT (marker, 0);
2946 else
2947 XSETBUFFER (last_thing_searched, XMARKER (marker)->buffer);
2948 }
e2811828 2949
41c01205
DK
2950 CHECK_NUMBER_COERCE_MARKER (marker);
2951 from = XINT (marker);
e2811828 2952
abd0071c
KS
2953 if (!NILP (reseat) && MARKERP (m))
2954 {
2955 if (EQ (reseat, Qevaporate))
2956 free_marker (m);
2957 else
2958 unchain_marker (XMARKER (m));
9ad54a7e 2959 XSETCAR (list, Qnil);
abd0071c
KS
2960 }
2961
2962 if ((list = XCDR (list), !CONSP (list)))
2963 break;
2964
2965 m = marker = XCAR (list);
2966
41c01205
DK
2967 if (MARKERP (marker) && XMARKER (marker)->buffer == 0)
2968 XSETFASTINT (marker, 0);
e2811828 2969
41c01205
DK
2970 CHECK_NUMBER_COERCE_MARKER (marker);
2971 search_regs.start[i] = from;
2972 search_regs.end[i] = XINT (marker);
abd0071c
KS
2973
2974 if (!NILP (reseat) && MARKERP (m))
2975 {
2976 if (EQ (reseat, Qevaporate))
2977 free_marker (m);
2978 else
2979 unchain_marker (XMARKER (m));
9ad54a7e 2980 XSETCAR (list, Qnil);
abd0071c 2981 }
41c01205 2982 }
abd0071c 2983 list = XCDR (list);
41c01205 2984 }
ca1d1d23 2985
41c01205
DK
2986 for (; i < search_regs.num_regs; i++)
2987 search_regs.start[i] = -1;
2988 }
ca1d1d23 2989
177c0ea7 2990 return Qnil;
ca1d1d23
JB
2991}
2992
7074fde6
FP
2993/* If non-zero the match data have been saved in saved_search_regs
2994 during the execution of a sentinel or filter. */
75ebf74b 2995static int search_regs_saved;
7074fde6 2996static struct re_registers saved_search_regs;
41c01205 2997static Lisp_Object saved_last_thing_searched;
7074fde6
FP
2998
2999/* Called from Flooking_at, Fstring_match, search_buffer, Fstore_match_data
3000 if asynchronous code (filter or sentinel) is running. */
3001static void
3002save_search_regs ()
3003{
3004 if (!search_regs_saved)
3005 {
3006 saved_search_regs.num_regs = search_regs.num_regs;
3007 saved_search_regs.start = search_regs.start;
3008 saved_search_regs.end = search_regs.end;
41c01205
DK
3009 saved_last_thing_searched = last_thing_searched;
3010 last_thing_searched = Qnil;
7074fde6 3011 search_regs.num_regs = 0;
2d4a771a
RS
3012 search_regs.start = 0;
3013 search_regs.end = 0;
7074fde6
FP
3014
3015 search_regs_saved = 1;
3016 }
3017}
3018
3019/* Called upon exit from filters and sentinels. */
3020void
abd0071c 3021restore_search_regs ()
7074fde6
FP
3022{
3023 if (search_regs_saved)
3024 {
3025 if (search_regs.num_regs > 0)
3026 {
3027 xfree (search_regs.start);
3028 xfree (search_regs.end);
3029 }
3030 search_regs.num_regs = saved_search_regs.num_regs;
3031 search_regs.start = saved_search_regs.start;
3032 search_regs.end = saved_search_regs.end;
41c01205
DK
3033 last_thing_searched = saved_last_thing_searched;
3034 saved_last_thing_searched = Qnil;
7074fde6
FP
3035 search_regs_saved = 0;
3036 }
3037}
3038
abd0071c
KS
3039static Lisp_Object
3040unwind_set_match_data (list)
3041 Lisp_Object list;
3042{
51f10faa 3043 /* It is safe to free (evaporate) the markers immediately. */
abd0071c
KS
3044 return Fset_match_data (list, Qevaporate);
3045}
3046
3047/* Called to unwind protect the match data. */
3048void
3049record_unwind_save_match_data ()
3050{
3051 record_unwind_protect (unwind_set_match_data,
3052 Fmatch_data (Qnil, Qnil, Qnil));
3053}
3054
ca1d1d23
JB
3055/* Quote a string to inactivate reg-expr chars */
3056
3057DEFUN ("regexp-quote", Fregexp_quote, Sregexp_quote, 1, 1, 0,
8c1a1077
PJ
3058 doc: /* Return a regexp string which matches exactly STRING and nothing else. */)
3059 (string)
5806161b 3060 Lisp_Object string;
ca1d1d23
JB
3061{
3062 register unsigned char *in, *out, *end;
3063 register unsigned char *temp;
0c8533c6 3064 int backslashes_added = 0;
ca1d1d23 3065
b7826503 3066 CHECK_STRING (string);
ca1d1d23 3067
d5db4077 3068 temp = (unsigned char *) alloca (SBYTES (string) * 2);
ca1d1d23
JB
3069
3070 /* Now copy the data into the new string, inserting escapes. */
3071
d5db4077
KR
3072 in = SDATA (string);
3073 end = in + SBYTES (string);
177c0ea7 3074 out = temp;
ca1d1d23
JB
3075
3076 for (; in != end; in++)
3077 {
66bc6082 3078 if (*in == '['
ca1d1d23
JB
3079 || *in == '*' || *in == '.' || *in == '\\'
3080 || *in == '?' || *in == '+'
3081 || *in == '^' || *in == '$')
0c8533c6 3082 *out++ = '\\', backslashes_added++;
ca1d1d23
JB
3083 *out++ = *in;
3084 }
3085
3f8100f1 3086 return make_specified_string (temp,
d5db4077 3087 SCHARS (string) + backslashes_added,
3f8100f1
RS
3088 out - temp,
3089 STRING_MULTIBYTE (string));
ca1d1d23 3090}
177c0ea7 3091\f
dfcf069d 3092void
ca1d1d23
JB
3093syms_of_search ()
3094{
3095 register int i;
3096
487282dc
KH
3097 for (i = 0; i < REGEXP_CACHE_SIZE; ++i)
3098 {
3099 searchbufs[i].buf.allocated = 100;
b23c0a83 3100 searchbufs[i].buf.buffer = (unsigned char *) xmalloc (100);
487282dc
KH
3101 searchbufs[i].buf.fastmap = searchbufs[i].fastmap;
3102 searchbufs[i].regexp = Qnil;
ecdb561e 3103 searchbufs[i].whitespace_regexp = Qnil;
487282dc 3104 staticpro (&searchbufs[i].regexp);
aa77b5ce 3105 staticpro (&searchbufs[i].whitespace_regexp);
487282dc
KH
3106 searchbufs[i].next = (i == REGEXP_CACHE_SIZE-1 ? 0 : &searchbufs[i+1]);
3107 }
3108 searchbuf_head = &searchbufs[0];
ca1d1d23
JB
3109
3110 Qsearch_failed = intern ("search-failed");
3111 staticpro (&Qsearch_failed);
3112 Qinvalid_regexp = intern ("invalid-regexp");
3113 staticpro (&Qinvalid_regexp);
3114
3115 Fput (Qsearch_failed, Qerror_conditions,
3116 Fcons (Qsearch_failed, Fcons (Qerror, Qnil)));
3117 Fput (Qsearch_failed, Qerror_message,
3118 build_string ("Search failed"));
3119
3120 Fput (Qinvalid_regexp, Qerror_conditions,
3121 Fcons (Qinvalid_regexp, Fcons (Qerror, Qnil)));
3122 Fput (Qinvalid_regexp, Qerror_message,
3123 build_string ("Invalid regexp"));
3124
daa37602
JB
3125 last_thing_searched = Qnil;
3126 staticpro (&last_thing_searched);
3127
0f6af254
DK
3128 saved_last_thing_searched = Qnil;
3129 staticpro (&saved_last_thing_searched);
3130
41a33295 3131 DEFVAR_LISP ("search-spaces-regexp", &Vsearch_spaces_regexp,
e2811828 3132 doc: /* Regexp to substitute for bunches of spaces in regexp search.
f31a9a68
RS
3133Some commands use this for user-specified regexps.
3134Spaces that occur inside character classes or repetition operators
3135or other such regexp constructs are not replaced with this.
3136A value of nil (which is the normal value) means treat spaces literally. */);
41a33295 3137 Vsearch_spaces_regexp = Qnil;
f31a9a68 3138
ca1d1d23 3139 defsubr (&Slooking_at);
b819a390
RS
3140 defsubr (&Sposix_looking_at);
3141 defsubr (&Sstring_match);
3142 defsubr (&Sposix_string_match);
ca1d1d23
JB
3143 defsubr (&Ssearch_forward);
3144 defsubr (&Ssearch_backward);
3145 defsubr (&Sword_search_forward);
3146 defsubr (&Sword_search_backward);
3147 defsubr (&Sre_search_forward);
3148 defsubr (&Sre_search_backward);
b819a390
RS
3149 defsubr (&Sposix_search_forward);
3150 defsubr (&Sposix_search_backward);
ca1d1d23
JB
3151 defsubr (&Sreplace_match);
3152 defsubr (&Smatch_beginning);
3153 defsubr (&Smatch_end);
3154 defsubr (&Smatch_data);
3f1c005b 3155 defsubr (&Sset_match_data);
ca1d1d23
JB
3156 defsubr (&Sregexp_quote);
3157}
ab5796a9
MB
3158
3159/* arch-tag: a6059d79-0552-4f14-a2cb-d379a4e3c78f
3160 (do not change this comment) */