Merge from emacs--rel--22
[bpt/emacs.git] / src / search.c
CommitLineData
ca1d1d23 1/* String search routines for GNU Emacs.
429ab54e
GM
2 Copyright (C) 1985, 1986, 1987, 1993, 1994, 1997, 1998, 1999, 2001, 2002,
3 2003, 2004, 2005, 2006, 2007 Free Software Foundation, Inc.
ca1d1d23
JB
4
5This file is part of GNU Emacs.
6
7GNU Emacs is free software; you can redistribute it and/or modify
8it under the terms of the GNU General Public License as published by
684d6f5b 9the Free Software Foundation; either version 3, or (at your option)
ca1d1d23
JB
10any later version.
11
12GNU Emacs is distributed in the hope that it will be useful,
13but WITHOUT ANY WARRANTY; without even the implied warranty of
14MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15GNU General Public License for more details.
16
17You should have received a copy of the GNU General Public License
18along with GNU Emacs; see the file COPYING. If not, write to
4fc5845f
LK
19the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
20Boston, MA 02110-1301, USA. */
ca1d1d23
JB
21
22
18160b98 23#include <config.h>
ca1d1d23
JB
24#include "lisp.h"
25#include "syntax.h"
5679531d 26#include "category.h"
ca1d1d23 27#include "buffer.h"
5679531d 28#include "charset.h"
9169c321 29#include "region-cache.h"
ca1d1d23 30#include "commands.h"
9ac0d9e0 31#include "blockinput.h"
bf1760bb 32#include "intervals.h"
4746118a 33
ca1d1d23
JB
34#include <sys/types.h>
35#include "regex.h"
36
1d288aef 37#define REGEXP_CACHE_SIZE 20
ca1d1d23 38
487282dc
KH
39/* If the regexp is non-nil, then the buffer contains the compiled form
40 of that regexp, suitable for searching. */
1d288aef
RS
41struct regexp_cache
42{
487282dc 43 struct regexp_cache *next;
ecdb561e 44 Lisp_Object regexp, whitespace_regexp;
b69e3c18 45 /* Syntax table for which the regexp applies. We need this because
58e95211
SM
46 of character classes. If this is t, then the compiled pattern is valid
47 for any syntax-table. */
b69e3c18 48 Lisp_Object syntax_table;
487282dc
KH
49 struct re_pattern_buffer buf;
50 char fastmap[0400];
b819a390
RS
51 /* Nonzero means regexp was compiled to do full POSIX backtracking. */
52 char posix;
487282dc 53};
ca1d1d23 54
487282dc
KH
55/* The instances of that struct. */
56struct regexp_cache searchbufs[REGEXP_CACHE_SIZE];
ca1d1d23 57
487282dc
KH
58/* The head of the linked list; points to the most recently used buffer. */
59struct regexp_cache *searchbuf_head;
ca1d1d23 60
ca1d1d23 61
4746118a
JB
62/* Every call to re_match, etc., must pass &search_regs as the regs
63 argument unless you can show it is unnecessary (i.e., if re_match
64 is certainly going to be called again before region-around-match
65 can be called).
66
67 Since the registers are now dynamically allocated, we need to make
68 sure not to refer to the Nth register before checking that it has
1113d9db
JB
69 been allocated by checking search_regs.num_regs.
70
71 The regex code keeps track of whether it has allocated the search
487282dc
KH
72 buffer using bits in the re_pattern_buffer. This means that whenever
73 you compile a new pattern, it completely forgets whether it has
1113d9db
JB
74 allocated any registers, and will allocate new registers the next
75 time you call a searching or matching function. Therefore, we need
76 to call re_set_registers after compiling a new pattern or after
77 setting the match registers, so that the regex functions will be
78 able to free or re-allocate it properly. */
ca1d1d23
JB
79static struct re_registers search_regs;
80
daa37602
JB
81/* The buffer in which the last search was performed, or
82 Qt if the last search was done in a string;
83 Qnil if no searching has been done yet. */
84static Lisp_Object last_thing_searched;
ca1d1d23 85
8e6208c5 86/* error condition signaled when regexp compile_pattern fails */
ca1d1d23
JB
87
88Lisp_Object Qinvalid_regexp;
89
06f77a8a
KS
90/* Error condition used for failing searches */
91Lisp_Object Qsearch_failed;
92
41a33295 93Lisp_Object Vsearch_spaces_regexp;
f31a9a68 94
ef887810
RS
95/* If non-nil, the match data will not be changed during call to
96 searching or matching functions. This variable is for internal use
97 only. */
98Lisp_Object Vinhibit_changing_match_data;
99
ca325161 100static void set_search_regs ();
044f81f1 101static void save_search_regs ();
facdc750
RS
102static int simple_search ();
103static int boyer_moore ();
b819a390 104static int search_buffer ();
01d09305 105static void matcher_overflow () NO_RETURN;
b819a390 106
ca1d1d23
JB
107static void
108matcher_overflow ()
109{
110 error ("Stack overflow in regexp matcher");
111}
112
b819a390
RS
113/* Compile a regexp and signal a Lisp error if anything goes wrong.
114 PATTERN is the pattern to compile.
115 CP is the place to put the result.
facdc750 116 TRANSLATE is a translation table for ignoring case, or nil for none.
b819a390
RS
117 REGP is the structure that says where to store the "register"
118 values that will result from matching this pattern.
119 If it is 0, we should compile the pattern not to record any
120 subexpression bounds.
121 POSIX is nonzero if we want full backtracking (POSIX style)
5679531d
KH
122 for this pattern. 0 means backtrack only enough to get a valid match.
123 MULTIBYTE is nonzero if we want to handle multibyte characters in
124 PATTERN. 0 means all multibyte characters are recognized just as
ecdb561e
RS
125 sequences of binary data.
126
41a33295 127 The behavior also depends on Vsearch_spaces_regexp. */
ca1d1d23 128
487282dc 129static void
5679531d 130compile_pattern_1 (cp, pattern, translate, regp, posix, multibyte)
487282dc 131 struct regexp_cache *cp;
ca1d1d23 132 Lisp_Object pattern;
facdc750 133 Lisp_Object translate;
487282dc 134 struct re_registers *regp;
b819a390 135 int posix;
5679531d 136 int multibyte;
ca1d1d23 137{
7276d3d8 138 unsigned char *raw_pattern;
f8bd51c4 139 int raw_pattern_size;
d451e4db 140 char *val;
b819a390 141 reg_syntax_t old;
ca1d1d23 142
f8bd51c4
KH
143 /* MULTIBYTE says whether the text to be searched is multibyte.
144 We must convert PATTERN to match that, or we will not really
145 find things right. */
146
147 if (multibyte == STRING_MULTIBYTE (pattern))
148 {
d5db4077
KR
149 raw_pattern = (unsigned char *) SDATA (pattern);
150 raw_pattern_size = SBYTES (pattern);
f8bd51c4
KH
151 }
152 else if (multibyte)
153 {
d5db4077
KR
154 raw_pattern_size = count_size_as_multibyte (SDATA (pattern),
155 SCHARS (pattern));
7276d3d8 156 raw_pattern = (unsigned char *) alloca (raw_pattern_size + 1);
d5db4077
KR
157 copy_text (SDATA (pattern), raw_pattern,
158 SCHARS (pattern), 0, 1);
f8bd51c4
KH
159 }
160 else
161 {
162 /* Converting multibyte to single-byte.
163
164 ??? Perhaps this conversion should be done in a special way
165 by subtracting nonascii-insert-offset from each non-ASCII char,
166 so that only the multibyte chars which really correspond to
167 the chosen single-byte character set can possibly match. */
d5db4077 168 raw_pattern_size = SCHARS (pattern);
7276d3d8 169 raw_pattern = (unsigned char *) alloca (raw_pattern_size + 1);
d5db4077
KR
170 copy_text (SDATA (pattern), raw_pattern,
171 SBYTES (pattern), 1, 0);
f8bd51c4
KH
172 }
173
487282dc 174 cp->regexp = Qnil;
59fab369 175 cp->buf.translate = (! NILP (translate) ? translate : make_number (0));
b819a390 176 cp->posix = posix;
5679531d 177 cp->buf.multibyte = multibyte;
41a33295 178 cp->whitespace_regexp = Vsearch_spaces_regexp;
e7b2dd2e
RS
179 /* rms: I think BLOCK_INPUT is not needed here any more,
180 because regex.c defines malloc to call xmalloc.
181 Using BLOCK_INPUT here means the debugger won't run if an error occurs.
182 So let's turn it off. */
183 /* BLOCK_INPUT; */
fb4a568d 184 old = re_set_syntax (RE_SYNTAX_EMACS
b819a390 185 | (posix ? 0 : RE_NO_POSIX_BACKTRACKING));
f31a9a68 186
41a33295
RS
187 re_set_whitespace_regexp (NILP (Vsearch_spaces_regexp) ? NULL
188 : SDATA (Vsearch_spaces_regexp));
f31a9a68 189
7276d3d8
RS
190 val = (char *) re_compile_pattern ((char *)raw_pattern,
191 raw_pattern_size, &cp->buf);
f31a9a68 192
58e95211
SM
193 /* If the compiled pattern hard codes some of the contents of the
194 syntax-table, it can only be reused with *this* syntax table. */
195 cp->syntax_table = cp->buf.used_syntax ? current_buffer->syntax_table : Qt;
196
f31a9a68
RS
197 re_set_whitespace_regexp (NULL);
198
b819a390 199 re_set_syntax (old);
e7b2dd2e 200 /* UNBLOCK_INPUT; */
ca1d1d23 201 if (val)
06f77a8a 202 xsignal1 (Qinvalid_regexp, build_string (val));
1113d9db 203
487282dc 204 cp->regexp = Fcopy_sequence (pattern);
487282dc
KH
205}
206
6efc7887
RS
207/* Shrink each compiled regexp buffer in the cache
208 to the size actually used right now.
209 This is called from garbage collection. */
210
211void
212shrink_regexp_cache ()
213{
a968f437 214 struct regexp_cache *cp;
6efc7887
RS
215
216 for (cp = searchbuf_head; cp != 0; cp = cp->next)
217 {
218 cp->buf.allocated = cp->buf.used;
219 cp->buf.buffer
b23c0a83 220 = (unsigned char *) xrealloc (cp->buf.buffer, cp->buf.used);
6efc7887
RS
221 }
222}
223
54dd3310
SM
224/* Clear the regexp cache w.r.t. a particular syntax table,
225 because it was changed.
e5b94d44
CY
226 There is no danger of memory leak here because re_compile_pattern
227 automagically manages the memory in each re_pattern_buffer struct,
228 based on its `allocated' and `buffer' values. */
229void
230clear_regexp_cache ()
231{
232 int i;
233
234 for (i = 0; i < REGEXP_CACHE_SIZE; ++i)
54dd3310
SM
235 /* It's tempting to compare with the syntax-table we've actually changd,
236 but it's not sufficient because char-table inheritance mewans that
237 modifying one syntax-table can change others at the same time. */
238 if (!EQ (searchbufs[i].syntax_table, Qt))
239 searchbufs[i].regexp = Qnil;
e5b94d44
CY
240}
241
487282dc 242/* Compile a regexp if necessary, but first check to see if there's one in
b819a390
RS
243 the cache.
244 PATTERN is the pattern to compile.
facdc750 245 TRANSLATE is a translation table for ignoring case, or nil for none.
b819a390
RS
246 REGP is the structure that says where to store the "register"
247 values that will result from matching this pattern.
248 If it is 0, we should compile the pattern not to record any
249 subexpression bounds.
250 POSIX is nonzero if we want full backtracking (POSIX style)
251 for this pattern. 0 means backtrack only enough to get a valid match. */
487282dc
KH
252
253struct re_pattern_buffer *
0c8533c6 254compile_pattern (pattern, regp, translate, posix, multibyte)
487282dc
KH
255 Lisp_Object pattern;
256 struct re_registers *regp;
facdc750 257 Lisp_Object translate;
0c8533c6 258 int posix, multibyte;
487282dc
KH
259{
260 struct regexp_cache *cp, **cpp;
261
262 for (cpp = &searchbuf_head; ; cpp = &cp->next)
263 {
264 cp = *cpp;
f1b9c7c1
KR
265 /* Entries are initialized to nil, and may be set to nil by
266 compile_pattern_1 if the pattern isn't valid. Don't apply
49a5f770
KR
267 string accessors in those cases. However, compile_pattern_1
268 is only applied to the cache entry we pick here to reuse. So
269 nil should never appear before a non-nil entry. */
7c752c80 270 if (NILP (cp->regexp))
f1b9c7c1 271 goto compile_it;
d5db4077 272 if (SCHARS (cp->regexp) == SCHARS (pattern)
cf69b13e 273 && STRING_MULTIBYTE (cp->regexp) == STRING_MULTIBYTE (pattern)
1d288aef 274 && !NILP (Fstring_equal (cp->regexp, pattern))
59fab369 275 && EQ (cp->buf.translate, (! NILP (translate) ? translate : make_number (0)))
5679531d 276 && cp->posix == posix
ecdb561e 277 && cp->buf.multibyte == multibyte
58e95211
SM
278 && (EQ (cp->syntax_table, Qt)
279 || EQ (cp->syntax_table, current_buffer->syntax_table))
41a33295 280 && !NILP (Fequal (cp->whitespace_regexp, Vsearch_spaces_regexp)))
487282dc
KH
281 break;
282
f1b9c7c1
KR
283 /* If we're at the end of the cache, compile into the nil cell
284 we found, or the last (least recently used) cell with a
285 string value. */
487282dc
KH
286 if (cp->next == 0)
287 {
f1b9c7c1 288 compile_it:
5679531d 289 compile_pattern_1 (cp, pattern, translate, regp, posix, multibyte);
487282dc
KH
290 break;
291 }
292 }
293
294 /* When we get here, cp (aka *cpp) contains the compiled pattern,
295 either because we found it in the cache or because we just compiled it.
296 Move it to the front of the queue to mark it as most recently used. */
297 *cpp = cp->next;
298 cp->next = searchbuf_head;
299 searchbuf_head = cp;
1113d9db 300
6639708c
RS
301 /* Advise the searching functions about the space we have allocated
302 for register data. */
303 if (regp)
304 re_set_registers (&cp->buf, regp, regp->num_regs, regp->start, regp->end);
305
487282dc 306 return &cp->buf;
ca1d1d23
JB
307}
308
ca1d1d23 309\f
b819a390
RS
310static Lisp_Object
311looking_at_1 (string, posix)
ca1d1d23 312 Lisp_Object string;
b819a390 313 int posix;
ca1d1d23
JB
314{
315 Lisp_Object val;
316 unsigned char *p1, *p2;
317 int s1, s2;
318 register int i;
487282dc 319 struct re_pattern_buffer *bufp;
ca1d1d23 320
7074fde6
FP
321 if (running_asynch_code)
322 save_search_regs ();
323
910c747a
RS
324 /* This is so set_image_of_range_1 in regex.c can find the EQV table. */
325 XCHAR_TABLE (current_buffer->case_canon_table)->extras[2]
326 = current_buffer->case_eqv_table;
327
b7826503 328 CHECK_STRING (string);
ef887810
RS
329 bufp = compile_pattern (string,
330 (NILP (Vinhibit_changing_match_data)
331 ? &search_regs : NULL),
487282dc 332 (!NILP (current_buffer->case_fold_search)
0190922f 333 ? current_buffer->case_canon_table : Qnil),
0c8533c6
RS
334 posix,
335 !NILP (current_buffer->enable_multibyte_characters));
ca1d1d23
JB
336
337 immediate_quit = 1;
338 QUIT; /* Do a pending quit right away, to avoid paradoxical behavior */
339
340 /* Get pointers and sizes of the two strings
341 that make up the visible portion of the buffer. */
342
343 p1 = BEGV_ADDR;
fa8ed3e0 344 s1 = GPT_BYTE - BEGV_BYTE;
ca1d1d23 345 p2 = GAP_END_ADDR;
fa8ed3e0 346 s2 = ZV_BYTE - GPT_BYTE;
ca1d1d23
JB
347 if (s1 < 0)
348 {
349 p2 = p1;
fa8ed3e0 350 s2 = ZV_BYTE - BEGV_BYTE;
ca1d1d23
JB
351 s1 = 0;
352 }
353 if (s2 < 0)
354 {
fa8ed3e0 355 s1 = ZV_BYTE - BEGV_BYTE;
ca1d1d23
JB
356 s2 = 0;
357 }
8bb43c28
RS
358
359 re_match_object = Qnil;
177c0ea7 360
487282dc 361 i = re_match_2 (bufp, (char *) p1, s1, (char *) p2, s2,
ef887810
RS
362 PT_BYTE - BEGV_BYTE,
363 (NILP (Vinhibit_changing_match_data)
364 ? &search_regs : NULL),
fa8ed3e0 365 ZV_BYTE - BEGV_BYTE);
de182d70 366 immediate_quit = 0;
177c0ea7 367
ca1d1d23
JB
368 if (i == -2)
369 matcher_overflow ();
370
371 val = (0 <= i ? Qt : Qnil);
ef887810 372 if (NILP (Vinhibit_changing_match_data) && i >= 0)
fa8ed3e0
RS
373 for (i = 0; i < search_regs.num_regs; i++)
374 if (search_regs.start[i] >= 0)
375 {
376 search_regs.start[i]
377 = BYTE_TO_CHAR (search_regs.start[i] + BEGV_BYTE);
378 search_regs.end[i]
379 = BYTE_TO_CHAR (search_regs.end[i] + BEGV_BYTE);
380 }
ef887810
RS
381
382 /* Set last_thing_searched only when match data is changed. */
383 if (NILP (Vinhibit_changing_match_data))
384 XSETBUFFER (last_thing_searched, current_buffer);
385
ca1d1d23
JB
386 return val;
387}
388
b819a390 389DEFUN ("looking-at", Flooking_at, Slooking_at, 1, 1, 0,
8c1a1077
PJ
390 doc: /* Return t if text after point matches regular expression REGEXP.
391This function modifies the match data that `match-beginning',
392`match-end' and `match-data' access; save and restore the match
393data if you want to preserve them. */)
394 (regexp)
94f94972 395 Lisp_Object regexp;
b819a390 396{
94f94972 397 return looking_at_1 (regexp, 0);
b819a390
RS
398}
399
400DEFUN ("posix-looking-at", Fposix_looking_at, Sposix_looking_at, 1, 1, 0,
8c1a1077
PJ
401 doc: /* Return t if text after point matches regular expression REGEXP.
402Find the longest match, in accord with Posix regular expression rules.
403This function modifies the match data that `match-beginning',
404`match-end' and `match-data' access; save and restore the match
405data if you want to preserve them. */)
406 (regexp)
94f94972 407 Lisp_Object regexp;
b819a390 408{
94f94972 409 return looking_at_1 (regexp, 1);
b819a390
RS
410}
411\f
412static Lisp_Object
413string_match_1 (regexp, string, start, posix)
ca1d1d23 414 Lisp_Object regexp, string, start;
b819a390 415 int posix;
ca1d1d23
JB
416{
417 int val;
487282dc 418 struct re_pattern_buffer *bufp;
0c8533c6
RS
419 int pos, pos_byte;
420 int i;
ca1d1d23 421
7074fde6
FP
422 if (running_asynch_code)
423 save_search_regs ();
424
b7826503
PJ
425 CHECK_STRING (regexp);
426 CHECK_STRING (string);
ca1d1d23
JB
427
428 if (NILP (start))
0c8533c6 429 pos = 0, pos_byte = 0;
ca1d1d23
JB
430 else
431 {
d5db4077 432 int len = SCHARS (string);
ca1d1d23 433
b7826503 434 CHECK_NUMBER (start);
0c8533c6
RS
435 pos = XINT (start);
436 if (pos < 0 && -pos <= len)
437 pos = len + pos;
438 else if (0 > pos || pos > len)
ca1d1d23 439 args_out_of_range (string, start);
0c8533c6 440 pos_byte = string_char_to_byte (string, pos);
ca1d1d23
JB
441 }
442
910c747a
RS
443 /* This is so set_image_of_range_1 in regex.c can find the EQV table. */
444 XCHAR_TABLE (current_buffer->case_canon_table)->extras[2]
445 = current_buffer->case_eqv_table;
446
ef887810
RS
447 bufp = compile_pattern (regexp,
448 (NILP (Vinhibit_changing_match_data)
449 ? &search_regs : NULL),
487282dc 450 (!NILP (current_buffer->case_fold_search)
0190922f 451 ? current_buffer->case_canon_table : Qnil),
0c8533c6
RS
452 posix,
453 STRING_MULTIBYTE (string));
ca1d1d23 454 immediate_quit = 1;
8bb43c28 455 re_match_object = string;
177c0ea7 456
d5db4077
KR
457 val = re_search (bufp, (char *) SDATA (string),
458 SBYTES (string), pos_byte,
459 SBYTES (string) - pos_byte,
ef887810
RS
460 (NILP (Vinhibit_changing_match_data)
461 ? &search_regs : NULL));
ca1d1d23 462 immediate_quit = 0;
ef887810
RS
463
464 /* Set last_thing_searched only when match data is changed. */
465 if (NILP (Vinhibit_changing_match_data))
466 last_thing_searched = Qt;
467
ca1d1d23
JB
468 if (val == -2)
469 matcher_overflow ();
470 if (val < 0) return Qnil;
0c8533c6 471
ef887810
RS
472 if (NILP (Vinhibit_changing_match_data))
473 for (i = 0; i < search_regs.num_regs; i++)
474 if (search_regs.start[i] >= 0)
475 {
476 search_regs.start[i]
477 = string_byte_to_char (string, search_regs.start[i]);
478 search_regs.end[i]
479 = string_byte_to_char (string, search_regs.end[i]);
480 }
0c8533c6
RS
481
482 return make_number (string_byte_to_char (string, val));
ca1d1d23 483}
e59a8453 484
b819a390 485DEFUN ("string-match", Fstring_match, Sstring_match, 2, 3, 0,
8c1a1077 486 doc: /* Return index of start of first match for REGEXP in STRING, or nil.
b85acc4b 487Matching ignores case if `case-fold-search' is non-nil.
8c1a1077
PJ
488If third arg START is non-nil, start search at that index in STRING.
489For index of first char beyond the match, do (match-end 0).
490`match-end' and `match-beginning' also give indices of substrings
2bd2f32d
RS
491matched by parenthesis constructs in the pattern.
492
493You can use the function `match-string' to extract the substrings
494matched by the parenthesis constructions in REGEXP. */)
8c1a1077 495 (regexp, string, start)
b819a390
RS
496 Lisp_Object regexp, string, start;
497{
498 return string_match_1 (regexp, string, start, 0);
499}
500
501DEFUN ("posix-string-match", Fposix_string_match, Sposix_string_match, 2, 3, 0,
8c1a1077
PJ
502 doc: /* Return index of start of first match for REGEXP in STRING, or nil.
503Find the longest match, in accord with Posix regular expression rules.
504Case is ignored if `case-fold-search' is non-nil in the current buffer.
505If third arg START is non-nil, start search at that index in STRING.
506For index of first char beyond the match, do (match-end 0).
507`match-end' and `match-beginning' also give indices of substrings
508matched by parenthesis constructs in the pattern. */)
509 (regexp, string, start)
b819a390
RS
510 Lisp_Object regexp, string, start;
511{
512 return string_match_1 (regexp, string, start, 1);
513}
514
e59a8453
RS
515/* Match REGEXP against STRING, searching all of STRING,
516 and return the index of the match, or negative on failure.
517 This does not clobber the match data. */
518
519int
520fast_string_match (regexp, string)
521 Lisp_Object regexp, string;
522{
523 int val;
487282dc 524 struct re_pattern_buffer *bufp;
e59a8453 525
facdc750
RS
526 bufp = compile_pattern (regexp, 0, Qnil,
527 0, STRING_MULTIBYTE (string));
e59a8453 528 immediate_quit = 1;
8bb43c28 529 re_match_object = string;
177c0ea7 530
d5db4077
KR
531 val = re_search (bufp, (char *) SDATA (string),
532 SBYTES (string), 0,
533 SBYTES (string), 0);
e59a8453
RS
534 immediate_quit = 0;
535 return val;
536}
5679531d
KH
537
538/* Match REGEXP against STRING, searching all of STRING ignoring case,
539 and return the index of the match, or negative on failure.
0c8533c6
RS
540 This does not clobber the match data.
541 We assume that STRING contains single-byte characters. */
5679531d
KH
542
543extern Lisp_Object Vascii_downcase_table;
544
545int
b4577c63 546fast_c_string_match_ignore_case (regexp, string)
5679531d 547 Lisp_Object regexp;
96b80561 548 const char *string;
5679531d
KH
549{
550 int val;
551 struct re_pattern_buffer *bufp;
552 int len = strlen (string);
553
0c8533c6 554 regexp = string_make_unibyte (regexp);
b4577c63 555 re_match_object = Qt;
5679531d 556 bufp = compile_pattern (regexp, 0,
0190922f 557 Vascii_canon_table, 0,
f8bd51c4 558 0);
5679531d
KH
559 immediate_quit = 1;
560 val = re_search (bufp, string, len, 0, len, 0);
561 immediate_quit = 0;
562 return val;
563}
be5f4dfb
KH
564
565/* Like fast_string_match but ignore case. */
566
567int
568fast_string_match_ignore_case (regexp, string)
569 Lisp_Object regexp, string;
570{
571 int val;
572 struct re_pattern_buffer *bufp;
573
0190922f 574 bufp = compile_pattern (regexp, 0, Vascii_canon_table,
be5f4dfb
KH
575 0, STRING_MULTIBYTE (string));
576 immediate_quit = 1;
577 re_match_object = string;
578
579 val = re_search (bufp, (char *) SDATA (string),
580 SBYTES (string), 0,
581 SBYTES (string), 0);
582 immediate_quit = 0;
583 return val;
584}
ca1d1d23 585\f
9169c321
JB
586/* The newline cache: remembering which sections of text have no newlines. */
587
588/* If the user has requested newline caching, make sure it's on.
589 Otherwise, make sure it's off.
590 This is our cheezy way of associating an action with the change of
591 state of a buffer-local variable. */
592static void
593newline_cache_on_off (buf)
594 struct buffer *buf;
595{
596 if (NILP (buf->cache_long_line_scans))
597 {
598 /* It should be off. */
599 if (buf->newline_cache)
600 {
601 free_region_cache (buf->newline_cache);
602 buf->newline_cache = 0;
603 }
604 }
605 else
606 {
607 /* It should be on. */
608 if (buf->newline_cache == 0)
609 buf->newline_cache = new_region_cache ();
610 }
611}
612
613\f
614/* Search for COUNT instances of the character TARGET between START and END.
615
616 If COUNT is positive, search forwards; END must be >= START.
617 If COUNT is negative, search backwards for the -COUNTth instance;
618 END must be <= START.
619 If COUNT is zero, do anything you please; run rogue, for all I care.
620
621 If END is zero, use BEGV or ZV instead, as appropriate for the
622 direction indicated by COUNT.
ffd56f97
JB
623
624 If we find COUNT instances, set *SHORTAGE to zero, and return the
a9f2a45f 625 position past the COUNTth match. Note that for reverse motion
5bfe95c9 626 this is not the same as the usual convention for Emacs motion commands.
ffd56f97 627
9169c321
JB
628 If we don't find COUNT instances before reaching END, set *SHORTAGE
629 to the number of TARGETs left unfound, and return END.
ffd56f97 630
087a5f81
RS
631 If ALLOW_QUIT is non-zero, set immediate_quit. That's good to do
632 except when inside redisplay. */
633
dfcf069d 634int
9169c321
JB
635scan_buffer (target, start, end, count, shortage, allow_quit)
636 register int target;
637 int start, end;
638 int count;
639 int *shortage;
087a5f81 640 int allow_quit;
ca1d1d23 641{
9169c321 642 struct region_cache *newline_cache;
177c0ea7 643 int direction;
ffd56f97 644
9169c321
JB
645 if (count > 0)
646 {
647 direction = 1;
648 if (! end) end = ZV;
649 }
650 else
651 {
652 direction = -1;
653 if (! end) end = BEGV;
654 }
ffd56f97 655
9169c321
JB
656 newline_cache_on_off (current_buffer);
657 newline_cache = current_buffer->newline_cache;
ca1d1d23
JB
658
659 if (shortage != 0)
660 *shortage = 0;
661
087a5f81 662 immediate_quit = allow_quit;
ca1d1d23 663
ffd56f97 664 if (count > 0)
9169c321 665 while (start != end)
ca1d1d23 666 {
9169c321
JB
667 /* Our innermost scanning loop is very simple; it doesn't know
668 about gaps, buffer ends, or the newline cache. ceiling is
669 the position of the last character before the next such
670 obstacle --- the last character the dumb search loop should
671 examine. */
fa8ed3e0
RS
672 int ceiling_byte = CHAR_TO_BYTE (end) - 1;
673 int start_byte = CHAR_TO_BYTE (start);
67ce527d 674 int tem;
9169c321
JB
675
676 /* If we're looking for a newline, consult the newline cache
677 to see where we can avoid some scanning. */
678 if (target == '\n' && newline_cache)
679 {
680 int next_change;
681 immediate_quit = 0;
682 while (region_cache_forward
fa8ed3e0
RS
683 (current_buffer, newline_cache, start_byte, &next_change))
684 start_byte = next_change;
cbe0db0d 685 immediate_quit = allow_quit;
9169c321 686
fa8ed3e0
RS
687 /* START should never be after END. */
688 if (start_byte > ceiling_byte)
689 start_byte = ceiling_byte;
9169c321
JB
690
691 /* Now the text after start is an unknown region, and
692 next_change is the position of the next known region. */
fa8ed3e0 693 ceiling_byte = min (next_change - 1, ceiling_byte);
9169c321
JB
694 }
695
696 /* The dumb loop can only scan text stored in contiguous
697 bytes. BUFFER_CEILING_OF returns the last character
698 position that is contiguous, so the ceiling is the
699 position after that. */
67ce527d
KH
700 tem = BUFFER_CEILING_OF (start_byte);
701 ceiling_byte = min (tem, ceiling_byte);
9169c321
JB
702
703 {
177c0ea7 704 /* The termination address of the dumb loop. */
fa8ed3e0
RS
705 register unsigned char *ceiling_addr
706 = BYTE_POS_ADDR (ceiling_byte) + 1;
707 register unsigned char *cursor
708 = BYTE_POS_ADDR (start_byte);
9169c321
JB
709 unsigned char *base = cursor;
710
711 while (cursor < ceiling_addr)
712 {
713 unsigned char *scan_start = cursor;
714
715 /* The dumb loop. */
716 while (*cursor != target && ++cursor < ceiling_addr)
717 ;
718
719 /* If we're looking for newlines, cache the fact that
720 the region from start to cursor is free of them. */
721 if (target == '\n' && newline_cache)
722 know_region_cache (current_buffer, newline_cache,
fa8ed3e0
RS
723 start_byte + scan_start - base,
724 start_byte + cursor - base);
9169c321
JB
725
726 /* Did we find the target character? */
727 if (cursor < ceiling_addr)
728 {
729 if (--count == 0)
730 {
731 immediate_quit = 0;
fa8ed3e0 732 return BYTE_TO_CHAR (start_byte + cursor - base + 1);
9169c321
JB
733 }
734 cursor++;
735 }
736 }
737
fa8ed3e0 738 start = BYTE_TO_CHAR (start_byte + cursor - base);
9169c321 739 }
ca1d1d23
JB
740 }
741 else
9169c321
JB
742 while (start > end)
743 {
744 /* The last character to check before the next obstacle. */
fa8ed3e0
RS
745 int ceiling_byte = CHAR_TO_BYTE (end);
746 int start_byte = CHAR_TO_BYTE (start);
67ce527d 747 int tem;
9169c321
JB
748
749 /* Consult the newline cache, if appropriate. */
750 if (target == '\n' && newline_cache)
751 {
752 int next_change;
753 immediate_quit = 0;
754 while (region_cache_backward
fa8ed3e0
RS
755 (current_buffer, newline_cache, start_byte, &next_change))
756 start_byte = next_change;
cbe0db0d 757 immediate_quit = allow_quit;
9169c321
JB
758
759 /* Start should never be at or before end. */
fa8ed3e0
RS
760 if (start_byte <= ceiling_byte)
761 start_byte = ceiling_byte + 1;
9169c321
JB
762
763 /* Now the text before start is an unknown region, and
764 next_change is the position of the next known region. */
fa8ed3e0 765 ceiling_byte = max (next_change, ceiling_byte);
9169c321
JB
766 }
767
768 /* Stop scanning before the gap. */
67ce527d
KH
769 tem = BUFFER_FLOOR_OF (start_byte - 1);
770 ceiling_byte = max (tem, ceiling_byte);
9169c321
JB
771
772 {
773 /* The termination address of the dumb loop. */
fa8ed3e0
RS
774 register unsigned char *ceiling_addr = BYTE_POS_ADDR (ceiling_byte);
775 register unsigned char *cursor = BYTE_POS_ADDR (start_byte - 1);
9169c321
JB
776 unsigned char *base = cursor;
777
778 while (cursor >= ceiling_addr)
779 {
780 unsigned char *scan_start = cursor;
781
782 while (*cursor != target && --cursor >= ceiling_addr)
783 ;
784
785 /* If we're looking for newlines, cache the fact that
786 the region from after the cursor to start is free of them. */
787 if (target == '\n' && newline_cache)
788 know_region_cache (current_buffer, newline_cache,
fa8ed3e0
RS
789 start_byte + cursor - base,
790 start_byte + scan_start - base);
9169c321
JB
791
792 /* Did we find the target character? */
793 if (cursor >= ceiling_addr)
794 {
795 if (++count >= 0)
796 {
797 immediate_quit = 0;
fa8ed3e0 798 return BYTE_TO_CHAR (start_byte + cursor - base);
9169c321
JB
799 }
800 cursor--;
801 }
802 }
803
fa8ed3e0 804 start = BYTE_TO_CHAR (start_byte + cursor - base);
9169c321
JB
805 }
806 }
807
ca1d1d23
JB
808 immediate_quit = 0;
809 if (shortage != 0)
ffd56f97 810 *shortage = count * direction;
9169c321 811 return start;
ca1d1d23 812}
fa8ed3e0
RS
813\f
814/* Search for COUNT instances of a line boundary, which means either a
815 newline or (if selective display enabled) a carriage return.
816 Start at START. If COUNT is negative, search backwards.
817
818 We report the resulting position by calling TEMP_SET_PT_BOTH.
819
820 If we find COUNT instances. we position after (always after,
821 even if scanning backwards) the COUNTth match, and return 0.
822
823 If we don't find COUNT instances before reaching the end of the
824 buffer (or the beginning, if scanning backwards), we return
825 the number of line boundaries left unfound, and position at
826 the limit we bumped up against.
827
828 If ALLOW_QUIT is non-zero, set immediate_quit. That's good to do
d5d57b92 829 except in special cases. */
ca1d1d23 830
63fa018d 831int
fa8ed3e0
RS
832scan_newline (start, start_byte, limit, limit_byte, count, allow_quit)
833 int start, start_byte;
834 int limit, limit_byte;
835 register int count;
836 int allow_quit;
63fa018d 837{
fa8ed3e0
RS
838 int direction = ((count > 0) ? 1 : -1);
839
840 register unsigned char *cursor;
841 unsigned char *base;
842
843 register int ceiling;
844 register unsigned char *ceiling_addr;
845
d5d57b92
RS
846 int old_immediate_quit = immediate_quit;
847
fa8ed3e0
RS
848 /* The code that follows is like scan_buffer
849 but checks for either newline or carriage return. */
850
d5d57b92
RS
851 if (allow_quit)
852 immediate_quit++;
fa8ed3e0
RS
853
854 start_byte = CHAR_TO_BYTE (start);
855
856 if (count > 0)
857 {
858 while (start_byte < limit_byte)
859 {
860 ceiling = BUFFER_CEILING_OF (start_byte);
861 ceiling = min (limit_byte - 1, ceiling);
862 ceiling_addr = BYTE_POS_ADDR (ceiling) + 1;
863 base = (cursor = BYTE_POS_ADDR (start_byte));
864 while (1)
865 {
866 while (*cursor != '\n' && ++cursor != ceiling_addr)
867 ;
868
869 if (cursor != ceiling_addr)
870 {
871 if (--count == 0)
872 {
d5d57b92 873 immediate_quit = old_immediate_quit;
fa8ed3e0
RS
874 start_byte = start_byte + cursor - base + 1;
875 start = BYTE_TO_CHAR (start_byte);
876 TEMP_SET_PT_BOTH (start, start_byte);
877 return 0;
878 }
879 else
880 if (++cursor == ceiling_addr)
881 break;
882 }
883 else
884 break;
885 }
886 start_byte += cursor - base;
887 }
888 }
889 else
890 {
fa8ed3e0
RS
891 while (start_byte > limit_byte)
892 {
893 ceiling = BUFFER_FLOOR_OF (start_byte - 1);
894 ceiling = max (limit_byte, ceiling);
895 ceiling_addr = BYTE_POS_ADDR (ceiling) - 1;
896 base = (cursor = BYTE_POS_ADDR (start_byte - 1) + 1);
897 while (1)
898 {
899 while (--cursor != ceiling_addr && *cursor != '\n')
900 ;
901
902 if (cursor != ceiling_addr)
903 {
904 if (++count == 0)
905 {
d5d57b92 906 immediate_quit = old_immediate_quit;
fa8ed3e0
RS
907 /* Return the position AFTER the match we found. */
908 start_byte = start_byte + cursor - base + 1;
909 start = BYTE_TO_CHAR (start_byte);
910 TEMP_SET_PT_BOTH (start, start_byte);
911 return 0;
912 }
913 }
914 else
915 break;
916 }
917 /* Here we add 1 to compensate for the last decrement
918 of CURSOR, which took it past the valid range. */
919 start_byte += cursor - base + 1;
920 }
921 }
922
923 TEMP_SET_PT_BOTH (limit, limit_byte);
d5d57b92 924 immediate_quit = old_immediate_quit;
fa8ed3e0
RS
925
926 return count * direction;
63fa018d
RS
927}
928
ca1d1d23 929int
fa8ed3e0 930find_next_newline_no_quit (from, cnt)
ca1d1d23
JB
931 register int from, cnt;
932{
fa8ed3e0 933 return scan_buffer ('\n', from, 0, cnt, (int *) 0, 0);
9169c321
JB
934}
935
9169c321
JB
936/* Like find_next_newline, but returns position before the newline,
937 not after, and only search up to TO. This isn't just
938 find_next_newline (...)-1, because you might hit TO. */
fa8ed3e0 939
9169c321
JB
940int
941find_before_next_newline (from, to, cnt)
cbe0db0d 942 int from, to, cnt;
9169c321
JB
943{
944 int shortage;
945 int pos = scan_buffer ('\n', from, to, cnt, &shortage, 1);
946
947 if (shortage == 0)
948 pos--;
177c0ea7 949
9169c321 950 return pos;
ca1d1d23
JB
951}
952\f
ca1d1d23
JB
953/* Subroutines of Lisp buffer search functions. */
954
955static Lisp_Object
b819a390 956search_command (string, bound, noerror, count, direction, RE, posix)
ca1d1d23
JB
957 Lisp_Object string, bound, noerror, count;
958 int direction;
959 int RE;
b819a390 960 int posix;
ca1d1d23
JB
961{
962 register int np;
9f43ad85 963 int lim, lim_byte;
ca1d1d23
JB
964 int n = direction;
965
966 if (!NILP (count))
967 {
b7826503 968 CHECK_NUMBER (count);
ca1d1d23
JB
969 n *= XINT (count);
970 }
971
b7826503 972 CHECK_STRING (string);
ca1d1d23 973 if (NILP (bound))
9f43ad85
RS
974 {
975 if (n > 0)
976 lim = ZV, lim_byte = ZV_BYTE;
977 else
978 lim = BEGV, lim_byte = BEGV_BYTE;
979 }
ca1d1d23
JB
980 else
981 {
b7826503 982 CHECK_NUMBER_COERCE_MARKER (bound);
ca1d1d23 983 lim = XINT (bound);
6ec8bbd2 984 if (n > 0 ? lim < PT : lim > PT)
ca1d1d23
JB
985 error ("Invalid search bound (wrong side of point)");
986 if (lim > ZV)
9f43ad85 987 lim = ZV, lim_byte = ZV_BYTE;
588d2fd5 988 else if (lim < BEGV)
9f43ad85 989 lim = BEGV, lim_byte = BEGV_BYTE;
588d2fd5
KH
990 else
991 lim_byte = CHAR_TO_BYTE (lim);
ca1d1d23
JB
992 }
993
910c747a
RS
994 /* This is so set_image_of_range_1 in regex.c can find the EQV table. */
995 XCHAR_TABLE (current_buffer->case_canon_table)->extras[2]
996 = current_buffer->case_eqv_table;
997
9f43ad85 998 np = search_buffer (string, PT, PT_BYTE, lim, lim_byte, n, RE,
ca1d1d23 999 (!NILP (current_buffer->case_fold_search)
facdc750 1000 ? current_buffer->case_canon_table
3135e9fd 1001 : Qnil),
ca1d1d23 1002 (!NILP (current_buffer->case_fold_search)
facdc750 1003 ? current_buffer->case_eqv_table
3135e9fd 1004 : Qnil),
b819a390 1005 posix);
ca1d1d23
JB
1006 if (np <= 0)
1007 {
1008 if (NILP (noerror))
06f77a8a
KS
1009 xsignal1 (Qsearch_failed, string);
1010
ca1d1d23
JB
1011 if (!EQ (noerror, Qt))
1012 {
1013 if (lim < BEGV || lim > ZV)
1014 abort ();
9f43ad85 1015 SET_PT_BOTH (lim, lim_byte);
a5f217b8
RS
1016 return Qnil;
1017#if 0 /* This would be clean, but maybe programs depend on
1018 a value of nil here. */
481399bf 1019 np = lim;
a5f217b8 1020#endif
ca1d1d23 1021 }
481399bf
RS
1022 else
1023 return Qnil;
ca1d1d23
JB
1024 }
1025
1026 if (np < BEGV || np > ZV)
1027 abort ();
1028
1029 SET_PT (np);
1030
1031 return make_number (np);
1032}
1033\f
fa8ed3e0
RS
1034/* Return 1 if REGEXP it matches just one constant string. */
1035
b6d6a51c
KH
1036static int
1037trivial_regexp_p (regexp)
1038 Lisp_Object regexp;
1039{
d5db4077
KR
1040 int len = SBYTES (regexp);
1041 unsigned char *s = SDATA (regexp);
b6d6a51c
KH
1042 while (--len >= 0)
1043 {
1044 switch (*s++)
1045 {
1046 case '.': case '*': case '+': case '?': case '[': case '^': case '$':
1047 return 0;
1048 case '\\':
1049 if (--len < 0)
1050 return 0;
1051 switch (*s++)
1052 {
1053 case '|': case '(': case ')': case '`': case '\'': case 'b':
1054 case 'B': case '<': case '>': case 'w': case 'W': case 's':
29f89fe7 1055 case 'S': case '=': case '{': case '}': case '_':
5679531d 1056 case 'c': case 'C': /* for categoryspec and notcategoryspec */
866f60fd 1057 case '1': case '2': case '3': case '4': case '5':
b6d6a51c
KH
1058 case '6': case '7': case '8': case '9':
1059 return 0;
1060 }
1061 }
1062 }
1063 return 1;
1064}
1065
ca325161 1066/* Search for the n'th occurrence of STRING in the current buffer,
ca1d1d23 1067 starting at position POS and stopping at position LIM,
b819a390 1068 treating STRING as a literal string if RE is false or as
ca1d1d23
JB
1069 a regular expression if RE is true.
1070
1071 If N is positive, searching is forward and LIM must be greater than POS.
1072 If N is negative, searching is backward and LIM must be less than POS.
1073
facdc750 1074 Returns -x if x occurrences remain to be found (x > 0),
ca1d1d23 1075 or else the position at the beginning of the Nth occurrence
b819a390
RS
1076 (if searching backward) or the end (if searching forward).
1077
1078 POSIX is nonzero if we want full backtracking (POSIX style)
1079 for this pattern. 0 means backtrack only enough to get a valid match. */
ca1d1d23 1080
aff2ce94
RS
1081#define TRANSLATE(out, trt, d) \
1082do \
1083 { \
1084 if (! NILP (trt)) \
1085 { \
1086 Lisp_Object temp; \
1087 temp = Faref (trt, make_number (d)); \
1088 if (INTEGERP (temp)) \
1089 out = XINT (temp); \
1090 else \
1091 out = d; \
1092 } \
1093 else \
1094 out = d; \
1095 } \
1096while (0)
facdc750 1097
ef887810
RS
1098/* Only used in search_buffer, to record the end position of the match
1099 when searching regexps and SEARCH_REGS should not be changed
1100 (i.e. Vinhibit_changing_match_data is non-nil). */
1101static struct re_registers search_regs_1;
1102
b819a390 1103static int
9f43ad85
RS
1104search_buffer (string, pos, pos_byte, lim, lim_byte, n,
1105 RE, trt, inverse_trt, posix)
ca1d1d23
JB
1106 Lisp_Object string;
1107 int pos;
9f43ad85 1108 int pos_byte;
ca1d1d23 1109 int lim;
9f43ad85 1110 int lim_byte;
ca1d1d23
JB
1111 int n;
1112 int RE;
facdc750
RS
1113 Lisp_Object trt;
1114 Lisp_Object inverse_trt;
b819a390 1115 int posix;
ca1d1d23 1116{
d5db4077
KR
1117 int len = SCHARS (string);
1118 int len_byte = SBYTES (string);
facdc750 1119 register int i;
ca1d1d23 1120
7074fde6
FP
1121 if (running_asynch_code)
1122 save_search_regs ();
1123
a7e4cdde 1124 /* Searching 0 times means don't move. */
ca1d1d23 1125 /* Null string is found at starting position. */
a7e4cdde 1126 if (len == 0 || n == 0)
ca325161 1127 {
0353b28f 1128 set_search_regs (pos_byte, 0);
ca325161
RS
1129 return pos;
1130 }
3f57a499 1131
41a33295 1132 if (RE && !(trivial_regexp_p (string) && NILP (Vsearch_spaces_regexp)))
ca1d1d23 1133 {
facdc750
RS
1134 unsigned char *p1, *p2;
1135 int s1, s2;
487282dc
KH
1136 struct re_pattern_buffer *bufp;
1137
ef887810
RS
1138 bufp = compile_pattern (string,
1139 (NILP (Vinhibit_changing_match_data)
1140 ? &search_regs : &search_regs_1),
1141 trt, posix,
0c8533c6 1142 !NILP (current_buffer->enable_multibyte_characters));
ca1d1d23 1143
ca1d1d23
JB
1144 immediate_quit = 1; /* Quit immediately if user types ^G,
1145 because letting this function finish
1146 can take too long. */
1147 QUIT; /* Do a pending quit right away,
1148 to avoid paradoxical behavior */
1149 /* Get pointers and sizes of the two strings
1150 that make up the visible portion of the buffer. */
1151
1152 p1 = BEGV_ADDR;
fa8ed3e0 1153 s1 = GPT_BYTE - BEGV_BYTE;
ca1d1d23 1154 p2 = GAP_END_ADDR;
fa8ed3e0 1155 s2 = ZV_BYTE - GPT_BYTE;
ca1d1d23
JB
1156 if (s1 < 0)
1157 {
1158 p2 = p1;
fa8ed3e0 1159 s2 = ZV_BYTE - BEGV_BYTE;
ca1d1d23
JB
1160 s1 = 0;
1161 }
1162 if (s2 < 0)
1163 {
fa8ed3e0 1164 s1 = ZV_BYTE - BEGV_BYTE;
ca1d1d23
JB
1165 s2 = 0;
1166 }
8bb43c28 1167 re_match_object = Qnil;
177c0ea7 1168
ca1d1d23
JB
1169 while (n < 0)
1170 {
42db823b 1171 int val;
487282dc 1172 val = re_search_2 (bufp, (char *) p1, s1, (char *) p2, s2,
4996330b 1173 pos_byte - BEGV_BYTE, lim_byte - pos_byte,
ef887810
RS
1174 (NILP (Vinhibit_changing_match_data)
1175 ? &search_regs : &search_regs_1),
42db823b 1176 /* Don't allow match past current point */
4996330b 1177 pos_byte - BEGV_BYTE);
ca1d1d23 1178 if (val == -2)
b6d6a51c
KH
1179 {
1180 matcher_overflow ();
1181 }
ca1d1d23
JB
1182 if (val >= 0)
1183 {
ef887810
RS
1184 if (NILP (Vinhibit_changing_match_data))
1185 {
1186 pos_byte = search_regs.start[0] + BEGV_BYTE;
1187 for (i = 0; i < search_regs.num_regs; i++)
1188 if (search_regs.start[i] >= 0)
1189 {
1190 search_regs.start[i]
1191 = BYTE_TO_CHAR (search_regs.start[i] + BEGV_BYTE);
1192 search_regs.end[i]
1193 = BYTE_TO_CHAR (search_regs.end[i] + BEGV_BYTE);
1194 }
1195 XSETBUFFER (last_thing_searched, current_buffer);
1196 /* Set pos to the new position. */
1197 pos = search_regs.start[0];
1198 }
1199 else
1200 {
1201 pos_byte = search_regs_1.start[0] + BEGV_BYTE;
1202 /* Set pos to the new position. */
1203 pos = BYTE_TO_CHAR (search_regs_1.start[0] + BEGV_BYTE);
1204 }
ca1d1d23
JB
1205 }
1206 else
1207 {
1208 immediate_quit = 0;
1209 return (n);
1210 }
1211 n++;
1212 }
1213 while (n > 0)
1214 {
42db823b 1215 int val;
487282dc 1216 val = re_search_2 (bufp, (char *) p1, s1, (char *) p2, s2,
4996330b 1217 pos_byte - BEGV_BYTE, lim_byte - pos_byte,
ef887810
RS
1218 (NILP (Vinhibit_changing_match_data)
1219 ? &search_regs : &search_regs_1),
4996330b 1220 lim_byte - BEGV_BYTE);
ca1d1d23 1221 if (val == -2)
b6d6a51c
KH
1222 {
1223 matcher_overflow ();
1224 }
ca1d1d23
JB
1225 if (val >= 0)
1226 {
ef887810
RS
1227 if (NILP (Vinhibit_changing_match_data))
1228 {
1229 pos_byte = search_regs.end[0] + BEGV_BYTE;
1230 for (i = 0; i < search_regs.num_regs; i++)
1231 if (search_regs.start[i] >= 0)
1232 {
1233 search_regs.start[i]
1234 = BYTE_TO_CHAR (search_regs.start[i] + BEGV_BYTE);
1235 search_regs.end[i]
1236 = BYTE_TO_CHAR (search_regs.end[i] + BEGV_BYTE);
1237 }
1238 XSETBUFFER (last_thing_searched, current_buffer);
1239 pos = search_regs.end[0];
1240 }
1241 else
1242 {
1243 pos_byte = search_regs_1.end[0] + BEGV_BYTE;
1244 pos = BYTE_TO_CHAR (search_regs_1.end[0] + BEGV_BYTE);
1245 }
ca1d1d23
JB
1246 }
1247 else
1248 {
1249 immediate_quit = 0;
1250 return (0 - n);
1251 }
1252 n--;
1253 }
1254 immediate_quit = 0;
1255 return (pos);
1256 }
1257 else /* non-RE case */
1258 {
facdc750
RS
1259 unsigned char *raw_pattern, *pat;
1260 int raw_pattern_size;
1261 int raw_pattern_size_byte;
1262 unsigned char *patbuf;
1263 int multibyte = !NILP (current_buffer->enable_multibyte_characters);
a967ed62 1264 unsigned char *base_pat;
fed91c38
KH
1265 /* Set to positive if we find a non-ASCII char that need
1266 translation. Otherwise set to zero later. */
1267 int charset_base = -1;
040272ce 1268 int boyer_moore_ok = 1;
facdc750
RS
1269
1270 /* MULTIBYTE says whether the text to be searched is multibyte.
1271 We must convert PATTERN to match that, or we will not really
1272 find things right. */
1273
1274 if (multibyte == STRING_MULTIBYTE (string))
1275 {
d5db4077
KR
1276 raw_pattern = (unsigned char *) SDATA (string);
1277 raw_pattern_size = SCHARS (string);
1278 raw_pattern_size_byte = SBYTES (string);
facdc750
RS
1279 }
1280 else if (multibyte)
1281 {
d5db4077 1282 raw_pattern_size = SCHARS (string);
facdc750 1283 raw_pattern_size_byte
d5db4077 1284 = count_size_as_multibyte (SDATA (string),
facdc750 1285 raw_pattern_size);
7276d3d8 1286 raw_pattern = (unsigned char *) alloca (raw_pattern_size_byte + 1);
d5db4077
KR
1287 copy_text (SDATA (string), raw_pattern,
1288 SCHARS (string), 0, 1);
facdc750
RS
1289 }
1290 else
1291 {
1292 /* Converting multibyte to single-byte.
1293
1294 ??? Perhaps this conversion should be done in a special way
1295 by subtracting nonascii-insert-offset from each non-ASCII char,
1296 so that only the multibyte chars which really correspond to
1297 the chosen single-byte character set can possibly match. */
d5db4077
KR
1298 raw_pattern_size = SCHARS (string);
1299 raw_pattern_size_byte = SCHARS (string);
7276d3d8 1300 raw_pattern = (unsigned char *) alloca (raw_pattern_size + 1);
d5db4077
KR
1301 copy_text (SDATA (string), raw_pattern,
1302 SBYTES (string), 1, 0);
facdc750
RS
1303 }
1304
1305 /* Copy and optionally translate the pattern. */
1306 len = raw_pattern_size;
1307 len_byte = raw_pattern_size_byte;
1308 patbuf = (unsigned char *) alloca (len_byte);
1309 pat = patbuf;
1310 base_pat = raw_pattern;
1311 if (multibyte)
1312 {
0190922f
KH
1313 /* Fill patbuf by translated characters in STRING while
1314 checking if we can use boyer-moore search. If TRT is
1315 non-nil, we can use boyer-moore search only if TRT can be
1316 represented by the byte array of 256 elements. For that,
1317 all non-ASCII case-equivalents of all case-senstive
1318 characters in STRING must belong to the same charset and
1319 row. */
1320
facdc750
RS
1321 while (--len >= 0)
1322 {
0190922f 1323 unsigned char str_base[MAX_MULTIBYTE_LENGTH], *str;
aff2ce94 1324 int c, translated, inverse;
facdc750
RS
1325 int in_charlen, charlen;
1326
1327 /* If we got here and the RE flag is set, it's because we're
1328 dealing with a regexp known to be trivial, so the backslash
1329 just quotes the next character. */
1330 if (RE && *base_pat == '\\')
1331 {
1332 len--;
0190922f 1333 raw_pattern_size--;
facdc750
RS
1334 len_byte--;
1335 base_pat++;
1336 }
1337
1338 c = STRING_CHAR_AND_LENGTH (base_pat, len_byte, in_charlen);
040272ce 1339
0190922f 1340 if (NILP (trt))
facdc750 1341 {
0190922f
KH
1342 str = base_pat;
1343 charlen = in_charlen;
facdc750 1344 }
0190922f 1345 else
facdc750 1346 {
0190922f
KH
1347 /* Translate the character. */
1348 TRANSLATE (translated, trt, c);
1349 charlen = CHAR_STRING (translated, str_base);
1350 str = str_base;
1351
1352 /* Check if C has any other case-equivalents. */
1353 TRANSLATE (inverse, inverse_trt, c);
1354 /* If so, check if we can use boyer-moore. */
1355 if (c != inverse && boyer_moore_ok)
1356 {
1357 /* Check if all equivalents belong to the same
1358 charset & row. Note that the check of C
1359 itself is done by the last iteration. Note
1360 also that we don't have to check ASCII
1361 characters because boyer-moore search can
1362 always handle their translation. */
1363 while (1)
1364 {
fed91c38 1365 if (ASCII_BYTE_P (inverse))
0190922f 1366 {
fed91c38 1367 if (charset_base > 0)
0190922f
KH
1368 {
1369 boyer_moore_ok = 0;
1370 break;
1371 }
fed91c38
KH
1372 charset_base = 0;
1373 }
1374 else if (SINGLE_BYTE_CHAR_P (inverse))
1375 {
1376 /* Boyer-moore search can't handle a
1377 translation of an eight-bit
1378 character. */
1379 boyer_moore_ok = 0;
1380 break;
1381 }
1382 else if (charset_base < 0)
1383 charset_base = inverse & ~CHAR_FIELD3_MASK;
1384 else if ((inverse & ~CHAR_FIELD3_MASK)
1385 != charset_base)
1386 {
1387 boyer_moore_ok = 0;
1388 break;
0190922f
KH
1389 }
1390 if (c == inverse)
1391 break;
1392 TRANSLATE (inverse, inverse_trt, inverse);
1393 }
1394 }
aff2ce94 1395 }
fed91c38
KH
1396 if (charset_base < 0)
1397 charset_base = 0;
facdc750
RS
1398
1399 /* Store this character into the translated pattern. */
1400 bcopy (str, pat, charlen);
1401 pat += charlen;
1402 base_pat += in_charlen;
1403 len_byte -= in_charlen;
1404 }
1405 }
1406 else
1407 {
040272ce
KH
1408 /* Unibyte buffer. */
1409 charset_base = 0;
facdc750
RS
1410 while (--len >= 0)
1411 {
040272ce 1412 int c, translated;
facdc750
RS
1413
1414 /* If we got here and the RE flag is set, it's because we're
1415 dealing with a regexp known to be trivial, so the backslash
1416 just quotes the next character. */
1417 if (RE && *base_pat == '\\')
1418 {
1419 len--;
0190922f 1420 raw_pattern_size--;
facdc750
RS
1421 base_pat++;
1422 }
1423 c = *base_pat++;
aff2ce94 1424 TRANSLATE (translated, trt, c);
facdc750
RS
1425 *pat++ = translated;
1426 }
1427 }
1428
1429 len_byte = pat - patbuf;
1430 len = raw_pattern_size;
1431 pat = base_pat = patbuf;
1432
040272ce 1433 if (boyer_moore_ok)
facdc750 1434 return boyer_moore (n, pat, len, len_byte, trt, inverse_trt,
aff2ce94
RS
1435 pos, pos_byte, lim, lim_byte,
1436 charset_base);
facdc750
RS
1437 else
1438 return simple_search (n, pat, len, len_byte, trt,
1439 pos, pos_byte, lim, lim_byte);
1440 }
1441}
1442\f
1443/* Do a simple string search N times for the string PAT,
1444 whose length is LEN/LEN_BYTE,
1445 from buffer position POS/POS_BYTE until LIM/LIM_BYTE.
1446 TRT is the translation table.
f8bd51c4 1447
facdc750
RS
1448 Return the character position where the match is found.
1449 Otherwise, if M matches remained to be found, return -M.
f8bd51c4 1450
facdc750
RS
1451 This kind of search works regardless of what is in PAT and
1452 regardless of what is in TRT. It is used in cases where
1453 boyer_moore cannot work. */
1454
1455static int
1456simple_search (n, pat, len, len_byte, trt, pos, pos_byte, lim, lim_byte)
1457 int n;
1458 unsigned char *pat;
1459 int len, len_byte;
1460 Lisp_Object trt;
1461 int pos, pos_byte;
1462 int lim, lim_byte;
1463{
1464 int multibyte = ! NILP (current_buffer->enable_multibyte_characters);
ab228c24 1465 int forward = n > 0;
facdc750
RS
1466
1467 if (lim > pos && multibyte)
1468 while (n > 0)
1469 {
1470 while (1)
f8bd51c4 1471 {
facdc750
RS
1472 /* Try matching at position POS. */
1473 int this_pos = pos;
1474 int this_pos_byte = pos_byte;
1475 int this_len = len;
1476 int this_len_byte = len_byte;
1477 unsigned char *p = pat;
1478 if (pos + len > lim)
1479 goto stop;
1480
1481 while (this_len > 0)
1482 {
1483 int charlen, buf_charlen;
ab228c24 1484 int pat_ch, buf_ch;
facdc750 1485
ab228c24 1486 pat_ch = STRING_CHAR_AND_LENGTH (p, this_len_byte, charlen);
facdc750
RS
1487 buf_ch = STRING_CHAR_AND_LENGTH (BYTE_POS_ADDR (this_pos_byte),
1488 ZV_BYTE - this_pos_byte,
1489 buf_charlen);
aff2ce94 1490 TRANSLATE (buf_ch, trt, buf_ch);
facdc750
RS
1491
1492 if (buf_ch != pat_ch)
1493 break;
ab228c24
RS
1494
1495 this_len_byte -= charlen;
1496 this_len--;
1497 p += charlen;
1498
1499 this_pos_byte += buf_charlen;
1500 this_pos++;
facdc750
RS
1501 }
1502
1503 if (this_len == 0)
1504 {
1505 pos += len;
1506 pos_byte += len_byte;
1507 break;
1508 }
1509
1510 INC_BOTH (pos, pos_byte);
f8bd51c4 1511 }
facdc750
RS
1512
1513 n--;
1514 }
1515 else if (lim > pos)
1516 while (n > 0)
1517 {
1518 while (1)
f8bd51c4 1519 {
facdc750
RS
1520 /* Try matching at position POS. */
1521 int this_pos = pos;
1522 int this_len = len;
1523 unsigned char *p = pat;
1524
1525 if (pos + len > lim)
1526 goto stop;
1527
1528 while (this_len > 0)
1529 {
1530 int pat_ch = *p++;
1531 int buf_ch = FETCH_BYTE (this_pos);
aff2ce94 1532 TRANSLATE (buf_ch, trt, buf_ch);
facdc750
RS
1533
1534 if (buf_ch != pat_ch)
1535 break;
ab228c24
RS
1536
1537 this_len--;
1538 this_pos++;
facdc750
RS
1539 }
1540
1541 if (this_len == 0)
1542 {
1543 pos += len;
1544 break;
1545 }
1546
1547 pos++;
f8bd51c4 1548 }
facdc750
RS
1549
1550 n--;
1551 }
1552 /* Backwards search. */
1553 else if (lim < pos && multibyte)
1554 while (n < 0)
1555 {
1556 while (1)
f8bd51c4 1557 {
facdc750
RS
1558 /* Try matching at position POS. */
1559 int this_pos = pos - len;
1560 int this_pos_byte = pos_byte - len_byte;
1561 int this_len = len;
1562 int this_len_byte = len_byte;
1563 unsigned char *p = pat;
1564
347d9bbe 1565 if (this_pos < lim || this_pos_byte < lim_byte)
facdc750
RS
1566 goto stop;
1567
1568 while (this_len > 0)
1569 {
1570 int charlen, buf_charlen;
ab228c24 1571 int pat_ch, buf_ch;
facdc750 1572
ab228c24 1573 pat_ch = STRING_CHAR_AND_LENGTH (p, this_len_byte, charlen);
facdc750
RS
1574 buf_ch = STRING_CHAR_AND_LENGTH (BYTE_POS_ADDR (this_pos_byte),
1575 ZV_BYTE - this_pos_byte,
1576 buf_charlen);
aff2ce94 1577 TRANSLATE (buf_ch, trt, buf_ch);
facdc750
RS
1578
1579 if (buf_ch != pat_ch)
1580 break;
ab228c24
RS
1581
1582 this_len_byte -= charlen;
1583 this_len--;
1584 p += charlen;
1585 this_pos_byte += buf_charlen;
1586 this_pos++;
facdc750
RS
1587 }
1588
1589 if (this_len == 0)
1590 {
1591 pos -= len;
1592 pos_byte -= len_byte;
1593 break;
1594 }
1595
1596 DEC_BOTH (pos, pos_byte);
f8bd51c4
KH
1597 }
1598
facdc750
RS
1599 n++;
1600 }
1601 else if (lim < pos)
1602 while (n < 0)
1603 {
1604 while (1)
b6d6a51c 1605 {
facdc750
RS
1606 /* Try matching at position POS. */
1607 int this_pos = pos - len;
1608 int this_len = len;
1609 unsigned char *p = pat;
1610
1611 if (pos - len < lim)
1612 goto stop;
1613
1614 while (this_len > 0)
1615 {
1616 int pat_ch = *p++;
1617 int buf_ch = FETCH_BYTE (this_pos);
aff2ce94 1618 TRANSLATE (buf_ch, trt, buf_ch);
facdc750
RS
1619
1620 if (buf_ch != pat_ch)
1621 break;
ab228c24
RS
1622 this_len--;
1623 this_pos++;
facdc750
RS
1624 }
1625
1626 if (this_len == 0)
b6d6a51c 1627 {
facdc750
RS
1628 pos -= len;
1629 break;
b6d6a51c 1630 }
facdc750
RS
1631
1632 pos--;
b6d6a51c 1633 }
facdc750
RS
1634
1635 n++;
b6d6a51c 1636 }
facdc750
RS
1637
1638 stop:
1639 if (n == 0)
aff2ce94 1640 {
ab228c24
RS
1641 if (forward)
1642 set_search_regs ((multibyte ? pos_byte : pos) - len_byte, len_byte);
1643 else
1644 set_search_regs (multibyte ? pos_byte : pos, len_byte);
aff2ce94
RS
1645
1646 return pos;
1647 }
facdc750
RS
1648 else if (n > 0)
1649 return -n;
1650 else
1651 return n;
1652}
1653\f
0190922f 1654/* Do Boyer-Moore search N times for the string BASE_PAT,
facdc750
RS
1655 whose length is LEN/LEN_BYTE,
1656 from buffer position POS/POS_BYTE until LIM/LIM_BYTE.
1657 DIRECTION says which direction we search in.
1658 TRT and INVERSE_TRT are translation tables.
0190922f 1659 Characters in PAT are already translated by TRT.
facdc750 1660
0190922f
KH
1661 This kind of search works if all the characters in BASE_PAT that
1662 have nontrivial translation are the same aside from the last byte.
1663 This makes it possible to translate just the last byte of a
1664 character, and do so after just a simple test of the context.
e0f24100 1665 CHARSET_BASE is nonzero if there is such a non-ASCII character.
facdc750
RS
1666
1667 If that criterion is not satisfied, do not call this function. */
1668
1669static int
1670boyer_moore (n, base_pat, len, len_byte, trt, inverse_trt,
aff2ce94 1671 pos, pos_byte, lim, lim_byte, charset_base)
facdc750
RS
1672 int n;
1673 unsigned char *base_pat;
1674 int len, len_byte;
1675 Lisp_Object trt;
1676 Lisp_Object inverse_trt;
1677 int pos, pos_byte;
1678 int lim, lim_byte;
aff2ce94 1679 int charset_base;
facdc750
RS
1680{
1681 int direction = ((n > 0) ? 1 : -1);
1682 register int dirlen;
a968f437 1683 int infinity, limit, stride_for_teases = 0;
facdc750
RS
1684 register int *BM_tab;
1685 int *BM_tab_base;
177c0ea7 1686 register unsigned char *cursor, *p_limit;
facdc750 1687 register int i, j;
cb6792d2 1688 unsigned char *pat, *pat_end;
facdc750
RS
1689 int multibyte = ! NILP (current_buffer->enable_multibyte_characters);
1690
1691 unsigned char simple_translate[0400];
0190922f
KH
1692 /* These are set to the preceding bytes of a byte to be translated
1693 if charset_base is nonzero. As the maximum byte length of a
1694 multibyte character is 4, we have to check at most three previous
1695 bytes. */
1696 int translate_prev_byte1 = 0;
1697 int translate_prev_byte2 = 0;
1698 int translate_prev_byte3 = 0;
facdc750
RS
1699
1700#ifdef C_ALLOCA
1701 int BM_tab_space[0400];
1702 BM_tab = &BM_tab_space[0];
1703#else
1704 BM_tab = (int *) alloca (0400 * sizeof (int));
1705#endif
1706 /* The general approach is that we are going to maintain that we know */
1707 /* the first (closest to the present position, in whatever direction */
1708 /* we're searching) character that could possibly be the last */
1709 /* (furthest from present position) character of a valid match. We */
1710 /* advance the state of our knowledge by looking at that character */
1711 /* and seeing whether it indeed matches the last character of the */
1712 /* pattern. If it does, we take a closer look. If it does not, we */
1713 /* move our pointer (to putative last characters) as far as is */
1714 /* logically possible. This amount of movement, which I call a */
1715 /* stride, will be the length of the pattern if the actual character */
1716 /* appears nowhere in the pattern, otherwise it will be the distance */
1717 /* from the last occurrence of that character to the end of the */
1718 /* pattern. */
1719 /* As a coding trick, an enormous stride is coded into the table for */
1720 /* characters that match the last character. This allows use of only */
1721 /* a single test, a test for having gone past the end of the */
1722 /* permissible match region, to test for both possible matches (when */
1723 /* the stride goes past the end immediately) and failure to */
177c0ea7 1724 /* match (where you get nudged past the end one stride at a time). */
facdc750
RS
1725
1726 /* Here we make a "mickey mouse" BM table. The stride of the search */
1727 /* is determined only by the last character of the putative match. */
1728 /* If that character does not match, we will stride the proper */
1729 /* distance to propose a match that superimposes it on the last */
1730 /* instance of a character that matches it (per trt), or misses */
177c0ea7 1731 /* it entirely if there is none. */
facdc750
RS
1732
1733 dirlen = len_byte * direction;
1734 infinity = dirlen - (lim_byte + pos_byte + len_byte + len_byte) * direction;
cb6792d2
RS
1735
1736 /* Record position after the end of the pattern. */
1737 pat_end = base_pat + len_byte;
1738 /* BASE_PAT points to a character that we start scanning from.
1739 It is the first character in a forward search,
1740 the last character in a backward search. */
facdc750 1741 if (direction < 0)
cb6792d2
RS
1742 base_pat = pat_end - 1;
1743
facdc750
RS
1744 BM_tab_base = BM_tab;
1745 BM_tab += 0400;
1746 j = dirlen; /* to get it in a register */
1747 /* A character that does not appear in the pattern induces a */
1748 /* stride equal to the pattern length. */
1749 while (BM_tab_base != BM_tab)
1750 {
1751 *--BM_tab = j;
1752 *--BM_tab = j;
1753 *--BM_tab = j;
1754 *--BM_tab = j;
1755 }
1756
1757 /* We use this for translation, instead of TRT itself.
1758 We fill this in to handle the characters that actually
1759 occur in the pattern. Others don't matter anyway! */
1760 bzero (simple_translate, sizeof simple_translate);
1761 for (i = 0; i < 0400; i++)
1762 simple_translate[i] = i;
1763
0190922f
KH
1764 if (charset_base)
1765 {
1766 /* Setup translate_prev_byte1/2/3 from CHARSET_BASE. Only a
1767 byte following them are the target of translation. */
1768 int sample_char = charset_base | 0x20;
1769 unsigned char str[MAX_MULTIBYTE_LENGTH];
1770 int len = CHAR_STRING (sample_char, str);
1771
1772 translate_prev_byte1 = str[len - 2];
1773 if (len > 2)
1774 {
1775 translate_prev_byte2 = str[len - 3];
1776 if (len > 3)
1777 translate_prev_byte3 = str[len - 4];
1778 }
1779 }
1780
facdc750
RS
1781 i = 0;
1782 while (i != infinity)
1783 {
cb6792d2 1784 unsigned char *ptr = base_pat + i;
facdc750
RS
1785 i += direction;
1786 if (i == dirlen)
1787 i = infinity;
1788 if (! NILP (trt))
ca1d1d23 1789 {
620a5178
KH
1790 /* If the byte currently looking at is the last of a
1791 character to check case-equivalents, set CH to that
1792 character. An ASCII character and a non-ASCII character
1793 matching with CHARSET_BASE are to be checked. */
0190922f
KH
1794 int ch = -1;
1795
1796 if (ASCII_BYTE_P (*ptr) || ! multibyte)
1797 ch = *ptr;
620a5178 1798 else if (charset_base
86dc6ccb 1799 && ((pat_end - ptr) == 1 || CHAR_HEAD_P (ptr[1])))
ca1d1d23 1800 {
620a5178
KH
1801 unsigned char *charstart = ptr - 1;
1802
1803 while (! (CHAR_HEAD_P (*charstart)))
1804 charstart--;
1805 ch = STRING_CHAR (charstart, ptr - charstart + 1);
0190922f
KH
1806 if (charset_base != (ch & ~CHAR_FIELD3_MASK))
1807 ch = -1;
ca1d1d23 1808 }
facdc750 1809
6786a107 1810 if (ch >= 0400)
620a5178
KH
1811 j = ((unsigned char) ch) | 0200;
1812 else
1813 j = *ptr;
1814
facdc750
RS
1815 if (i == infinity)
1816 stride_for_teases = BM_tab[j];
ab228c24 1817
facdc750
RS
1818 BM_tab[j] = dirlen - i;
1819 /* A translation table is accompanied by its inverse -- see */
177c0ea7 1820 /* comment following downcase_table for details */
0190922f 1821 if (ch >= 0)
ab228c24
RS
1822 {
1823 int starting_ch = ch;
620a5178 1824 int starting_j = j;
0190922f 1825
ab228c24
RS
1826 while (1)
1827 {
1828 TRANSLATE (ch, inverse_trt, ch);
6786a107 1829 if (ch >= 0400)
ab228c24
RS
1830 j = ((unsigned char) ch) | 0200;
1831 else
1832 j = (unsigned char) ch;
1833
1834 /* For all the characters that map into CH,
1835 set up simple_translate to map the last byte
1836 into STARTING_J. */
1837 simple_translate[j] = starting_j;
1838 if (ch == starting_ch)
1839 break;
1840 BM_tab[j] = dirlen - i;
1841 }
1842 }
facdc750
RS
1843 }
1844 else
1845 {
1846 j = *ptr;
1847
1848 if (i == infinity)
1849 stride_for_teases = BM_tab[j];
1850 BM_tab[j] = dirlen - i;
ca1d1d23 1851 }
facdc750
RS
1852 /* stride_for_teases tells how much to stride if we get a */
1853 /* match on the far character but are subsequently */
1854 /* disappointed, by recording what the stride would have been */
1855 /* for that character if the last character had been */
1856 /* different. */
1857 }
1858 infinity = dirlen - infinity;
1859 pos_byte += dirlen - ((direction > 0) ? direction : 0);
1860 /* loop invariant - POS_BYTE points at where last char (first
1861 char if reverse) of pattern would align in a possible match. */
1862 while (n != 0)
1863 {
1864 int tail_end;
1865 unsigned char *tail_end_ptr;
1866
1867 /* It's been reported that some (broken) compiler thinks that
1868 Boolean expressions in an arithmetic context are unsigned.
1869 Using an explicit ?1:0 prevents this. */
1870 if ((lim_byte - pos_byte - ((direction > 0) ? 1 : 0)) * direction
1871 < 0)
1872 return (n * (0 - direction));
1873 /* First we do the part we can by pointers (maybe nothing) */
1874 QUIT;
1875 pat = base_pat;
1876 limit = pos_byte - dirlen + direction;
67ce527d
KH
1877 if (direction > 0)
1878 {
1879 limit = BUFFER_CEILING_OF (limit);
1880 /* LIMIT is now the last (not beyond-last!) value POS_BYTE
1881 can take on without hitting edge of buffer or the gap. */
1882 limit = min (limit, pos_byte + 20000);
1883 limit = min (limit, lim_byte - 1);
1884 }
1885 else
1886 {
1887 limit = BUFFER_FLOOR_OF (limit);
1888 /* LIMIT is now the last (not beyond-last!) value POS_BYTE
1889 can take on without hitting edge of buffer or the gap. */
1890 limit = max (limit, pos_byte - 20000);
1891 limit = max (limit, lim_byte);
1892 }
facdc750
RS
1893 tail_end = BUFFER_CEILING_OF (pos_byte) + 1;
1894 tail_end_ptr = BYTE_POS_ADDR (tail_end);
1895
1896 if ((limit - pos_byte) * direction > 20)
ca1d1d23 1897 {
facdc750
RS
1898 unsigned char *p2;
1899
1900 p_limit = BYTE_POS_ADDR (limit);
1901 p2 = (cursor = BYTE_POS_ADDR (pos_byte));
1902 /* In this loop, pos + cursor - p2 is the surrogate for pos */
1903 while (1) /* use one cursor setting as long as i can */
ca1d1d23 1904 {
facdc750 1905 if (direction > 0) /* worth duplicating */
ca1d1d23 1906 {
facdc750
RS
1907 /* Use signed comparison if appropriate
1908 to make cursor+infinity sure to be > p_limit.
1909 Assuming that the buffer lies in a range of addresses
1910 that are all "positive" (as ints) or all "negative",
1911 either kind of comparison will work as long
1912 as we don't step by infinity. So pick the kind
1913 that works when we do step by infinity. */
1914 if ((EMACS_INT) (p_limit + infinity) > (EMACS_INT) p_limit)
1915 while ((EMACS_INT) cursor <= (EMACS_INT) p_limit)
1916 cursor += BM_tab[*cursor];
ca1d1d23 1917 else
facdc750
RS
1918 while ((EMACS_UINT) cursor <= (EMACS_UINT) p_limit)
1919 cursor += BM_tab[*cursor];
1920 }
1921 else
1922 {
1923 if ((EMACS_INT) (p_limit + infinity) < (EMACS_INT) p_limit)
1924 while ((EMACS_INT) cursor >= (EMACS_INT) p_limit)
1925 cursor += BM_tab[*cursor];
1926 else
1927 while ((EMACS_UINT) cursor >= (EMACS_UINT) p_limit)
1928 cursor += BM_tab[*cursor];
1929 }
ca1d1d23 1930/* If you are here, cursor is beyond the end of the searched region. */
facdc750
RS
1931/* This can happen if you match on the far character of the pattern, */
1932/* because the "stride" of that character is infinity, a number able */
1933/* to throw you well beyond the end of the search. It can also */
1934/* happen if you fail to match within the permitted region and would */
1935/* otherwise try a character beyond that region */
1936 if ((cursor - p_limit) * direction <= len_byte)
1937 break; /* a small overrun is genuine */
1938 cursor -= infinity; /* large overrun = hit */
1939 i = dirlen - direction;
1940 if (! NILP (trt))
1941 {
1942 while ((i -= direction) + direction != 0)
ca1d1d23 1943 {
facdc750
RS
1944 int ch;
1945 cursor -= direction;
1946 /* Translate only the last byte of a character. */
1947 if (! multibyte
1948 || ((cursor == tail_end_ptr
1949 || CHAR_HEAD_P (cursor[1]))
1950 && (CHAR_HEAD_P (cursor[0])
0190922f
KH
1951 /* Check if this is the last byte of
1952 a translable character. */
1953 || (translate_prev_byte1 == cursor[-1]
1954 && (CHAR_HEAD_P (translate_prev_byte1)
1955 || (translate_prev_byte2 == cursor[-2]
1956 && (CHAR_HEAD_P (translate_prev_byte2)
1957 || (translate_prev_byte3 == cursor[-3]))))))))
facdc750
RS
1958 ch = simple_translate[*cursor];
1959 else
1960 ch = *cursor;
1961 if (pat[i] != ch)
1962 break;
ca1d1d23 1963 }
facdc750
RS
1964 }
1965 else
1966 {
1967 while ((i -= direction) + direction != 0)
ca1d1d23 1968 {
facdc750
RS
1969 cursor -= direction;
1970 if (pat[i] != *cursor)
1971 break;
ca1d1d23 1972 }
facdc750
RS
1973 }
1974 cursor += dirlen - i - direction; /* fix cursor */
1975 if (i + direction == 0)
1976 {
ef887810 1977 int position, start, end;
0c8533c6 1978
facdc750 1979 cursor -= direction;
1113d9db 1980
facdc750
RS
1981 position = pos_byte + cursor - p2 + ((direction > 0)
1982 ? 1 - len_byte : 0);
1983 set_search_regs (position, len_byte);
ca325161 1984
ef887810
RS
1985 if (NILP (Vinhibit_changing_match_data))
1986 {
1987 start = search_regs.start[0];
1988 end = search_regs.end[0];
1989 }
1990 else
1991 /* If Vinhibit_changing_match_data is non-nil,
1992 search_regs will not be changed. So let's
1993 compute start and end here. */
1994 {
1995 start = BYTE_TO_CHAR (position);
1996 end = BYTE_TO_CHAR (position + len_byte);
1997 }
1998
facdc750
RS
1999 if ((n -= direction) != 0)
2000 cursor += dirlen; /* to resume search */
ca1d1d23 2001 else
ef887810 2002 return direction > 0 ? end : start;
ca1d1d23 2003 }
facdc750
RS
2004 else
2005 cursor += stride_for_teases; /* <sigh> we lose - */
ca1d1d23 2006 }
facdc750
RS
2007 pos_byte += cursor - p2;
2008 }
2009 else
2010 /* Now we'll pick up a clump that has to be done the hard */
2011 /* way because it covers a discontinuity */
2012 {
2013 limit = ((direction > 0)
2014 ? BUFFER_CEILING_OF (pos_byte - dirlen + 1)
2015 : BUFFER_FLOOR_OF (pos_byte - dirlen - 1));
2016 limit = ((direction > 0)
2017 ? min (limit + len_byte, lim_byte - 1)
2018 : max (limit - len_byte, lim_byte));
2019 /* LIMIT is now the last value POS_BYTE can have
2020 and still be valid for a possible match. */
2021 while (1)
ca1d1d23 2022 {
facdc750
RS
2023 /* This loop can be coded for space rather than */
2024 /* speed because it will usually run only once. */
2025 /* (the reach is at most len + 21, and typically */
177c0ea7 2026 /* does not exceed len) */
facdc750
RS
2027 while ((limit - pos_byte) * direction >= 0)
2028 pos_byte += BM_tab[FETCH_BYTE (pos_byte)];
2029 /* now run the same tests to distinguish going off the */
2030 /* end, a match or a phony match. */
2031 if ((pos_byte - limit) * direction <= len_byte)
2032 break; /* ran off the end */
2033 /* Found what might be a match.
2034 Set POS_BYTE back to last (first if reverse) pos. */
2035 pos_byte -= infinity;
2036 i = dirlen - direction;
2037 while ((i -= direction) + direction != 0)
ca1d1d23 2038 {
facdc750
RS
2039 int ch;
2040 unsigned char *ptr;
2041 pos_byte -= direction;
2042 ptr = BYTE_POS_ADDR (pos_byte);
2043 /* Translate only the last byte of a character. */
2044 if (! multibyte
2045 || ((ptr == tail_end_ptr
2046 || CHAR_HEAD_P (ptr[1]))
2047 && (CHAR_HEAD_P (ptr[0])
0190922f
KH
2048 /* Check if this is the last byte of a
2049 translable character. */
2050 || (translate_prev_byte1 == ptr[-1]
2051 && (CHAR_HEAD_P (translate_prev_byte1)
2052 || (translate_prev_byte2 == ptr[-2]
2053 && (CHAR_HEAD_P (translate_prev_byte2)
2054 || translate_prev_byte3 == ptr[-3])))))))
facdc750
RS
2055 ch = simple_translate[*ptr];
2056 else
2057 ch = *ptr;
2058 if (pat[i] != ch)
2059 break;
2060 }
2061 /* Above loop has moved POS_BYTE part or all the way
2062 back to the first pos (last pos if reverse).
2063 Set it once again at the last (first if reverse) char. */
2064 pos_byte += dirlen - i- direction;
2065 if (i + direction == 0)
2066 {
ef887810 2067 int position, start, end;
facdc750 2068 pos_byte -= direction;
1113d9db 2069
facdc750 2070 position = pos_byte + ((direction > 0) ? 1 - len_byte : 0);
facdc750 2071 set_search_regs (position, len_byte);
ca325161 2072
ef887810
RS
2073 if (NILP (Vinhibit_changing_match_data))
2074 {
2075 start = search_regs.start[0];
2076 end = search_regs.end[0];
2077 }
2078 else
2079 /* If Vinhibit_changing_match_data is non-nil,
2080 search_regs will not be changed. So let's
2081 compute start and end here. */
2082 {
2083 start = BYTE_TO_CHAR (position);
2084 end = BYTE_TO_CHAR (position + len_byte);
2085 }
2086
facdc750
RS
2087 if ((n -= direction) != 0)
2088 pos_byte += dirlen; /* to resume search */
ca1d1d23 2089 else
ef887810 2090 return direction > 0 ? end : start;
ca1d1d23 2091 }
facdc750
RS
2092 else
2093 pos_byte += stride_for_teases;
2094 }
2095 }
2096 /* We have done one clump. Can we continue? */
2097 if ((lim_byte - pos_byte) * direction < 0)
2098 return ((0 - n) * direction);
ca1d1d23 2099 }
facdc750 2100 return BYTE_TO_CHAR (pos_byte);
ca1d1d23 2101}
ca325161 2102
fa8ed3e0 2103/* Record beginning BEG_BYTE and end BEG_BYTE + NBYTES
a7e4cdde
RS
2104 for the overall match just found in the current buffer.
2105 Also clear out the match data for registers 1 and up. */
ca325161
RS
2106
2107static void
fa8ed3e0
RS
2108set_search_regs (beg_byte, nbytes)
2109 int beg_byte, nbytes;
ca325161 2110{
a7e4cdde
RS
2111 int i;
2112
ef887810
RS
2113 if (!NILP (Vinhibit_changing_match_data))
2114 return;
2115
ca325161
RS
2116 /* Make sure we have registers in which to store
2117 the match position. */
2118 if (search_regs.num_regs == 0)
2119 {
2d4a771a
RS
2120 search_regs.start = (regoff_t *) xmalloc (2 * sizeof (regoff_t));
2121 search_regs.end = (regoff_t *) xmalloc (2 * sizeof (regoff_t));
487282dc 2122 search_regs.num_regs = 2;
ca325161
RS
2123 }
2124
a7e4cdde
RS
2125 /* Clear out the other registers. */
2126 for (i = 1; i < search_regs.num_regs; i++)
2127 {
2128 search_regs.start[i] = -1;
2129 search_regs.end[i] = -1;
2130 }
2131
fa8ed3e0
RS
2132 search_regs.start[0] = BYTE_TO_CHAR (beg_byte);
2133 search_regs.end[0] = BYTE_TO_CHAR (beg_byte + nbytes);
a3668d92 2134 XSETBUFFER (last_thing_searched, current_buffer);
ca325161 2135}
ca1d1d23
JB
2136\f
2137/* Given a string of words separated by word delimiters,
2138 compute a regexp that matches those exact words
2139 separated by arbitrary punctuation. */
2140
2141static Lisp_Object
2142wordify (string)
2143 Lisp_Object string;
2144{
2145 register unsigned char *p, *o;
0c8533c6 2146 register int i, i_byte, len, punct_count = 0, word_count = 0;
ca1d1d23 2147 Lisp_Object val;
0c8533c6
RS
2148 int prev_c = 0;
2149 int adjust;
ca1d1d23 2150
b7826503 2151 CHECK_STRING (string);
d5db4077
KR
2152 p = SDATA (string);
2153 len = SCHARS (string);
ca1d1d23 2154
0c8533c6
RS
2155 for (i = 0, i_byte = 0; i < len; )
2156 {
2157 int c;
177c0ea7 2158
eb99a8dd 2159 FETCH_STRING_CHAR_ADVANCE (c, string, i, i_byte);
0c8533c6
RS
2160
2161 if (SYNTAX (c) != Sword)
2162 {
2163 punct_count++;
2164 if (i > 0 && SYNTAX (prev_c) == Sword)
2165 word_count++;
2166 }
ca1d1d23 2167
0c8533c6
RS
2168 prev_c = c;
2169 }
2170
2171 if (SYNTAX (prev_c) == Sword)
2172 word_count++;
2173 if (!word_count)
c60416e0 2174 return empty_unibyte_string;
0c8533c6
RS
2175
2176 adjust = - punct_count + 5 * (word_count - 1) + 4;
8a2df937
RS
2177 if (STRING_MULTIBYTE (string))
2178 val = make_uninit_multibyte_string (len + adjust,
d5db4077 2179 SBYTES (string)
8a2df937
RS
2180 + adjust);
2181 else
2182 val = make_uninit_string (len + adjust);
ca1d1d23 2183
d5db4077 2184 o = SDATA (val);
ca1d1d23
JB
2185 *o++ = '\\';
2186 *o++ = 'b';
1e9582d4 2187 prev_c = 0;
ca1d1d23 2188
1e9582d4
RS
2189 for (i = 0, i_byte = 0; i < len; )
2190 {
2191 int c;
2192 int i_byte_orig = i_byte;
177c0ea7 2193
eb99a8dd 2194 FETCH_STRING_CHAR_ADVANCE (c, string, i, i_byte);
1e9582d4
RS
2195
2196 if (SYNTAX (c) == Sword)
2197 {
5d69fe10 2198 bcopy (SDATA (string) + i_byte_orig, o,
1e9582d4
RS
2199 i_byte - i_byte_orig);
2200 o += i_byte - i_byte_orig;
2201 }
2202 else if (i > 0 && SYNTAX (prev_c) == Sword && --word_count)
2203 {
2204 *o++ = '\\';
2205 *o++ = 'W';
2206 *o++ = '\\';
2207 *o++ = 'W';
2208 *o++ = '*';
2209 }
2210
2211 prev_c = c;
2212 }
ca1d1d23
JB
2213
2214 *o++ = '\\';
2215 *o++ = 'b';
2216
2217 return val;
2218}
2219\f
2220DEFUN ("search-backward", Fsearch_backward, Ssearch_backward, 1, 4,
8c1a1077
PJ
2221 "MSearch backward: ",
2222 doc: /* Search backward from point for STRING.
2223Set point to the beginning of the occurrence found, and return point.
2224An optional second argument bounds the search; it is a buffer position.
2225The match found must not extend before that position.
2226Optional third argument, if t, means if fail just return nil (no error).
2227 If not nil and not t, position at limit of search and return nil.
2228Optional fourth argument is repeat count--search for successive occurrences.
2229
2230Search case-sensitivity is determined by the value of the variable
2231`case-fold-search', which see.
2232
2233See also the functions `match-beginning', `match-end' and `replace-match'. */)
2234 (string, bound, noerror, count)
ca1d1d23
JB
2235 Lisp_Object string, bound, noerror, count;
2236{
b819a390 2237 return search_command (string, bound, noerror, count, -1, 0, 0);
ca1d1d23
JB
2238}
2239
6af43974 2240DEFUN ("search-forward", Fsearch_forward, Ssearch_forward, 1, 4, "MSearch: ",
8c1a1077
PJ
2241 doc: /* Search forward from point for STRING.
2242Set point to the end of the occurrence found, and return point.
2243An optional second argument bounds the search; it is a buffer position.
d6aacdcd 2244The match found must not extend after that position. A value of nil is
48740f01 2245 equivalent to (point-max).
8c1a1077
PJ
2246Optional third argument, if t, means if fail just return nil (no error).
2247 If not nil and not t, move to limit of search and return nil.
2248Optional fourth argument is repeat count--search for successive occurrences.
2249
2250Search case-sensitivity is determined by the value of the variable
2251`case-fold-search', which see.
2252
2253See also the functions `match-beginning', `match-end' and `replace-match'. */)
2254 (string, bound, noerror, count)
ca1d1d23
JB
2255 Lisp_Object string, bound, noerror, count;
2256{
b819a390 2257 return search_command (string, bound, noerror, count, 1, 0, 0);
ca1d1d23
JB
2258}
2259
2260DEFUN ("word-search-backward", Fword_search_backward, Sword_search_backward, 1, 4,
8c1a1077
PJ
2261 "sWord search backward: ",
2262 doc: /* Search backward from point for STRING, ignoring differences in punctuation.
2263Set point to the beginning of the occurrence found, and return point.
2264An optional second argument bounds the search; it is a buffer position.
2265The match found must not extend before that position.
2266Optional third argument, if t, means if fail just return nil (no error).
2267 If not nil and not t, move to limit of search and return nil.
2268Optional fourth argument is repeat count--search for successive occurrences. */)
2269 (string, bound, noerror, count)
ca1d1d23
JB
2270 Lisp_Object string, bound, noerror, count;
2271{
b819a390 2272 return search_command (wordify (string), bound, noerror, count, -1, 1, 0);
ca1d1d23
JB
2273}
2274
2275DEFUN ("word-search-forward", Fword_search_forward, Sword_search_forward, 1, 4,
8c1a1077
PJ
2276 "sWord search: ",
2277 doc: /* Search forward from point for STRING, ignoring differences in punctuation.
2278Set point to the end of the occurrence found, and return point.
2279An optional second argument bounds the search; it is a buffer position.
2280The match found must not extend after that position.
2281Optional third argument, if t, means if fail just return nil (no error).
2282 If not nil and not t, move to limit of search and return nil.
2283Optional fourth argument is repeat count--search for successive occurrences. */)
2284 (string, bound, noerror, count)
ca1d1d23
JB
2285 Lisp_Object string, bound, noerror, count;
2286{
b819a390 2287 return search_command (wordify (string), bound, noerror, count, 1, 1, 0);
ca1d1d23
JB
2288}
2289
2290DEFUN ("re-search-backward", Fre_search_backward, Sre_search_backward, 1, 4,
8c1a1077
PJ
2291 "sRE search backward: ",
2292 doc: /* Search backward from point for match for regular expression REGEXP.
2293Set point to the beginning of the match, and return point.
2294The match found is the one starting last in the buffer
2295and yet ending before the origin of the search.
2296An optional second argument bounds the search; it is a buffer position.
2297The match found must start at or after that position.
2298Optional third argument, if t, means if fail just return nil (no error).
2299 If not nil and not t, move to limit of search and return nil.
2300Optional fourth argument is repeat count--search for successive occurrences.
2301See also the functions `match-beginning', `match-end', `match-string',
2302and `replace-match'. */)
2303 (regexp, bound, noerror, count)
19c0a730 2304 Lisp_Object regexp, bound, noerror, count;
ca1d1d23 2305{
b819a390 2306 return search_command (regexp, bound, noerror, count, -1, 1, 0);
ca1d1d23
JB
2307}
2308
2309DEFUN ("re-search-forward", Fre_search_forward, Sre_search_forward, 1, 4,
8c1a1077
PJ
2310 "sRE search: ",
2311 doc: /* Search forward from point for regular expression REGEXP.
2312Set point to the end of the occurrence found, and return point.
2313An optional second argument bounds the search; it is a buffer position.
2314The match found must not extend after that position.
2315Optional third argument, if t, means if fail just return nil (no error).
2316 If not nil and not t, move to limit of search and return nil.
2317Optional fourth argument is repeat count--search for successive occurrences.
2318See also the functions `match-beginning', `match-end', `match-string',
2319and `replace-match'. */)
2320 (regexp, bound, noerror, count)
19c0a730 2321 Lisp_Object regexp, bound, noerror, count;
ca1d1d23 2322{
b819a390
RS
2323 return search_command (regexp, bound, noerror, count, 1, 1, 0);
2324}
2325
2326DEFUN ("posix-search-backward", Fposix_search_backward, Sposix_search_backward, 1, 4,
8c1a1077
PJ
2327 "sPosix search backward: ",
2328 doc: /* Search backward from point for match for regular expression REGEXP.
2329Find the longest match in accord with Posix regular expression rules.
2330Set point to the beginning of the match, and return point.
2331The match found is the one starting last in the buffer
2332and yet ending before the origin of the search.
2333An optional second argument bounds the search; it is a buffer position.
2334The match found must start at or after that position.
2335Optional third argument, if t, means if fail just return nil (no error).
2336 If not nil and not t, move to limit of search and return nil.
2337Optional fourth argument is repeat count--search for successive occurrences.
2338See also the functions `match-beginning', `match-end', `match-string',
2339and `replace-match'. */)
2340 (regexp, bound, noerror, count)
b819a390
RS
2341 Lisp_Object regexp, bound, noerror, count;
2342{
2343 return search_command (regexp, bound, noerror, count, -1, 1, 1);
2344}
2345
2346DEFUN ("posix-search-forward", Fposix_search_forward, Sposix_search_forward, 1, 4,
8c1a1077
PJ
2347 "sPosix search: ",
2348 doc: /* Search forward from point for regular expression REGEXP.
2349Find the longest match in accord with Posix regular expression rules.
2350Set point to the end of the occurrence found, and return point.
2351An optional second argument bounds the search; it is a buffer position.
2352The match found must not extend after that position.
2353Optional third argument, if t, means if fail just return nil (no error).
2354 If not nil and not t, move to limit of search and return nil.
2355Optional fourth argument is repeat count--search for successive occurrences.
2356See also the functions `match-beginning', `match-end', `match-string',
2357and `replace-match'. */)
2358 (regexp, bound, noerror, count)
b819a390
RS
2359 Lisp_Object regexp, bound, noerror, count;
2360{
2361 return search_command (regexp, bound, noerror, count, 1, 1, 1);
ca1d1d23
JB
2362}
2363\f
d7a5ad5f 2364DEFUN ("replace-match", Freplace_match, Sreplace_match, 1, 5, 0,
8c1a1077 2365 doc: /* Replace text matched by last search with NEWTEXT.
4dd0c271
RS
2366Leave point at the end of the replacement text.
2367
8c1a1077
PJ
2368If second arg FIXEDCASE is non-nil, do not alter case of replacement text.
2369Otherwise maybe capitalize the whole text, or maybe just word initials,
2370based on the replaced text.
2371If the replaced text has only capital letters
2372and has at least one multiletter word, convert NEWTEXT to all caps.
4dd0c271
RS
2373Otherwise if all words are capitalized in the replaced text,
2374capitalize each word in NEWTEXT.
8c1a1077
PJ
2375
2376If third arg LITERAL is non-nil, insert NEWTEXT literally.
2377Otherwise treat `\\' as special:
2378 `\\&' in NEWTEXT means substitute original matched text.
2379 `\\N' means substitute what matched the Nth `\\(...\\)'.
2380 If Nth parens didn't match, substitute nothing.
2381 `\\\\' means insert one `\\'.
4dd0c271
RS
2382Case conversion does not apply to these substitutions.
2383
8c1a1077 2384FIXEDCASE and LITERAL are optional arguments.
8c1a1077
PJ
2385
2386The optional fourth argument STRING can be a string to modify.
2387This is meaningful when the previous match was done against STRING,
2388using `string-match'. When used this way, `replace-match'
2389creates and returns a new string made by copying STRING and replacing
2390the part of STRING that was matched.
2391
2392The optional fifth argument SUBEXP specifies a subexpression;
2393it says to replace just that subexpression with NEWTEXT,
2394rather than replacing the entire matched text.
2395This is, in a vague sense, the inverse of using `\\N' in NEWTEXT;
2396`\\N' copies subexp N into NEWTEXT, but using N as SUBEXP puts
2397NEWTEXT in place of subexp N.
2398This is useful only after a regular expression search or match,
2399since only regular expressions have distinguished subexpressions. */)
2400 (newtext, fixedcase, literal, string, subexp)
d7a5ad5f 2401 Lisp_Object newtext, fixedcase, literal, string, subexp;
ca1d1d23
JB
2402{
2403 enum { nochange, all_caps, cap_initial } case_action;
ac3b28b1 2404 register int pos, pos_byte;
ca1d1d23 2405 int some_multiletter_word;
97832bd0 2406 int some_lowercase;
73dc8771 2407 int some_uppercase;
208767c3 2408 int some_nonuppercase_initial;
ca1d1d23 2409 register int c, prevc;
d7a5ad5f 2410 int sub;
3e18eecf 2411 int opoint, newpoint;
ca1d1d23 2412
b7826503 2413 CHECK_STRING (newtext);
ca1d1d23 2414
080c45fd 2415 if (! NILP (string))
b7826503 2416 CHECK_STRING (string);
080c45fd 2417
ca1d1d23
JB
2418 case_action = nochange; /* We tried an initialization */
2419 /* but some C compilers blew it */
4746118a
JB
2420
2421 if (search_regs.num_regs <= 0)
d72cdbfc 2422 error ("`replace-match' called before any match found");
4746118a 2423
d7a5ad5f
RS
2424 if (NILP (subexp))
2425 sub = 0;
2426 else
2427 {
b7826503 2428 CHECK_NUMBER (subexp);
d7a5ad5f
RS
2429 sub = XINT (subexp);
2430 if (sub < 0 || sub >= search_regs.num_regs)
2431 args_out_of_range (subexp, make_number (search_regs.num_regs));
2432 }
2433
080c45fd
RS
2434 if (NILP (string))
2435 {
d7a5ad5f
RS
2436 if (search_regs.start[sub] < BEGV
2437 || search_regs.start[sub] > search_regs.end[sub]
2438 || search_regs.end[sub] > ZV)
2439 args_out_of_range (make_number (search_regs.start[sub]),
2440 make_number (search_regs.end[sub]));
080c45fd
RS
2441 }
2442 else
2443 {
d7a5ad5f
RS
2444 if (search_regs.start[sub] < 0
2445 || search_regs.start[sub] > search_regs.end[sub]
d5db4077 2446 || search_regs.end[sub] > SCHARS (string))
d7a5ad5f
RS
2447 args_out_of_range (make_number (search_regs.start[sub]),
2448 make_number (search_regs.end[sub]));
080c45fd 2449 }
ca1d1d23
JB
2450
2451 if (NILP (fixedcase))
2452 {
2453 /* Decide how to casify by examining the matched text. */
ac3b28b1 2454 int last;
ca1d1d23 2455
ac3b28b1
KH
2456 pos = search_regs.start[sub];
2457 last = search_regs.end[sub];
fa8ed3e0
RS
2458
2459 if (NILP (string))
ac3b28b1 2460 pos_byte = CHAR_TO_BYTE (pos);
fa8ed3e0 2461 else
ac3b28b1 2462 pos_byte = string_char_to_byte (string, pos);
fa8ed3e0 2463
ca1d1d23
JB
2464 prevc = '\n';
2465 case_action = all_caps;
2466
2467 /* some_multiletter_word is set nonzero if any original word
2468 is more than one letter long. */
2469 some_multiletter_word = 0;
97832bd0 2470 some_lowercase = 0;
208767c3 2471 some_nonuppercase_initial = 0;
73dc8771 2472 some_uppercase = 0;
ca1d1d23 2473
ac3b28b1 2474 while (pos < last)
ca1d1d23 2475 {
080c45fd 2476 if (NILP (string))
ac3b28b1
KH
2477 {
2478 c = FETCH_CHAR (pos_byte);
2479 INC_BOTH (pos, pos_byte);
2480 }
080c45fd 2481 else
ac3b28b1 2482 FETCH_STRING_CHAR_ADVANCE (c, string, pos, pos_byte);
080c45fd 2483
ca1d1d23
JB
2484 if (LOWERCASEP (c))
2485 {
2486 /* Cannot be all caps if any original char is lower case */
2487
97832bd0 2488 some_lowercase = 1;
ca1d1d23 2489 if (SYNTAX (prevc) != Sword)
208767c3 2490 some_nonuppercase_initial = 1;
ca1d1d23
JB
2491 else
2492 some_multiletter_word = 1;
2493 }
d16c2b66 2494 else if (UPPERCASEP (c))
ca1d1d23 2495 {
73dc8771 2496 some_uppercase = 1;
97832bd0 2497 if (SYNTAX (prevc) != Sword)
c4d460ce 2498 ;
97832bd0 2499 else
ca1d1d23
JB
2500 some_multiletter_word = 1;
2501 }
208767c3
RS
2502 else
2503 {
2504 /* If the initial is a caseless word constituent,
2505 treat that like a lowercase initial. */
2506 if (SYNTAX (prevc) != Sword)
2507 some_nonuppercase_initial = 1;
2508 }
ca1d1d23
JB
2509
2510 prevc = c;
2511 }
2512
97832bd0
RS
2513 /* Convert to all caps if the old text is all caps
2514 and has at least one multiletter word. */
2515 if (! some_lowercase && some_multiletter_word)
2516 case_action = all_caps;
c4d460ce 2517 /* Capitalize each word, if the old text has all capitalized words. */
208767c3 2518 else if (!some_nonuppercase_initial && some_multiletter_word)
ca1d1d23 2519 case_action = cap_initial;
208767c3 2520 else if (!some_nonuppercase_initial && some_uppercase)
73dc8771
KH
2521 /* Should x -> yz, operating on X, give Yz or YZ?
2522 We'll assume the latter. */
2523 case_action = all_caps;
97832bd0
RS
2524 else
2525 case_action = nochange;
ca1d1d23
JB
2526 }
2527
080c45fd
RS
2528 /* Do replacement in a string. */
2529 if (!NILP (string))
2530 {
2531 Lisp_Object before, after;
2532
2533 before = Fsubstring (string, make_number (0),
d7a5ad5f
RS
2534 make_number (search_regs.start[sub]));
2535 after = Fsubstring (string, make_number (search_regs.end[sub]), Qnil);
080c45fd 2536
636a5e28
RS
2537 /* Substitute parts of the match into NEWTEXT
2538 if desired. */
080c45fd
RS
2539 if (NILP (literal))
2540 {
d131e79c
RS
2541 int lastpos = 0;
2542 int lastpos_byte = 0;
080c45fd
RS
2543 /* We build up the substituted string in ACCUM. */
2544 Lisp_Object accum;
2545 Lisp_Object middle;
d5db4077 2546 int length = SBYTES (newtext);
080c45fd
RS
2547
2548 accum = Qnil;
2549
ac3b28b1 2550 for (pos_byte = 0, pos = 0; pos_byte < length;)
080c45fd
RS
2551 {
2552 int substart = -1;
6bbd7a29 2553 int subend = 0;
1e79ec24 2554 int delbackslash = 0;
080c45fd 2555
0c8533c6
RS
2556 FETCH_STRING_CHAR_ADVANCE (c, newtext, pos, pos_byte);
2557
080c45fd
RS
2558 if (c == '\\')
2559 {
0c8533c6 2560 FETCH_STRING_CHAR_ADVANCE (c, newtext, pos, pos_byte);
177c0ea7 2561
080c45fd
RS
2562 if (c == '&')
2563 {
d7a5ad5f
RS
2564 substart = search_regs.start[sub];
2565 subend = search_regs.end[sub];
080c45fd 2566 }
5fbbc83b 2567 else if (c >= '1' && c <= '9')
080c45fd 2568 {
5fbbc83b
RS
2569 if (search_regs.start[c - '0'] >= 0
2570 && c <= search_regs.num_regs + '0')
080c45fd
RS
2571 {
2572 substart = search_regs.start[c - '0'];
2573 subend = search_regs.end[c - '0'];
2574 }
5fbbc83b
RS
2575 else
2576 {
2577 /* If that subexp did not match,
2578 replace \\N with nothing. */
2579 substart = 0;
2580 subend = 0;
2581 }
080c45fd 2582 }
1e79ec24
KH
2583 else if (c == '\\')
2584 delbackslash = 1;
636a5e28
RS
2585 else
2586 error ("Invalid use of `\\' in replacement text");
080c45fd
RS
2587 }
2588 if (substart >= 0)
2589 {
d131e79c
RS
2590 if (pos - 2 != lastpos)
2591 middle = substring_both (newtext, lastpos,
2592 lastpos_byte,
2593 pos - 2, pos_byte - 2);
080c45fd
RS
2594 else
2595 middle = Qnil;
2596 accum = concat3 (accum, middle,
0c8533c6
RS
2597 Fsubstring (string,
2598 make_number (substart),
080c45fd
RS
2599 make_number (subend)));
2600 lastpos = pos;
0c8533c6 2601 lastpos_byte = pos_byte;
080c45fd 2602 }
1e79ec24
KH
2603 else if (delbackslash)
2604 {
d131e79c
RS
2605 middle = substring_both (newtext, lastpos,
2606 lastpos_byte,
2607 pos - 1, pos_byte - 1);
0c8533c6 2608
1e79ec24
KH
2609 accum = concat2 (accum, middle);
2610 lastpos = pos;
0c8533c6 2611 lastpos_byte = pos_byte;
1e79ec24 2612 }
080c45fd
RS
2613 }
2614
d131e79c
RS
2615 if (pos != lastpos)
2616 middle = substring_both (newtext, lastpos,
2617 lastpos_byte,
0c8533c6 2618 pos, pos_byte);
080c45fd
RS
2619 else
2620 middle = Qnil;
2621
2622 newtext = concat2 (accum, middle);
2623 }
2624
636a5e28 2625 /* Do case substitution in NEWTEXT if desired. */
080c45fd
RS
2626 if (case_action == all_caps)
2627 newtext = Fupcase (newtext);
2628 else if (case_action == cap_initial)
2b2eead9 2629 newtext = Fupcase_initials (newtext);
080c45fd
RS
2630
2631 return concat3 (before, newtext, after);
2632 }
2633
09c4719e 2634 /* Record point, then move (quietly) to the start of the match. */
9160906f 2635 if (PT >= search_regs.end[sub])
b0eba991 2636 opoint = PT - ZV;
9160906f
RS
2637 else if (PT > search_regs.start[sub])
2638 opoint = search_regs.end[sub] - ZV;
b0eba991
RS
2639 else
2640 opoint = PT;
2641
886ed6ec
RS
2642 /* If we want non-literal replacement,
2643 perform substitution on the replacement string. */
2644 if (NILP (literal))
ca1d1d23 2645 {
d5db4077 2646 int length = SBYTES (newtext);
68e69fbd
RS
2647 unsigned char *substed;
2648 int substed_alloc_size, substed_len;
3bc25e52
KH
2649 int buf_multibyte = !NILP (current_buffer->enable_multibyte_characters);
2650 int str_multibyte = STRING_MULTIBYTE (newtext);
2651 Lisp_Object rev_tbl;
886ed6ec 2652 int really_changed = 0;
3bc25e52
KH
2653
2654 rev_tbl= (!buf_multibyte && CHAR_TABLE_P (Vnonascii_translation_table)
2655 ? Fchar_table_extra_slot (Vnonascii_translation_table,
2656 make_number (0))
2657 : Qnil);
ac3b28b1 2658
68e69fbd
RS
2659 substed_alloc_size = length * 2 + 100;
2660 substed = (unsigned char *) xmalloc (substed_alloc_size + 1);
2661 substed_len = 0;
2662
3bc25e52
KH
2663 /* Go thru NEWTEXT, producing the actual text to insert in
2664 SUBSTED while adjusting multibyteness to that of the current
2665 buffer. */
ca1d1d23 2666
ac3b28b1 2667 for (pos_byte = 0, pos = 0; pos_byte < length;)
ca1d1d23 2668 {
68e69fbd 2669 unsigned char str[MAX_MULTIBYTE_LENGTH];
f8ce8a0d
GM
2670 unsigned char *add_stuff = NULL;
2671 int add_len = 0;
68e69fbd 2672 int idx = -1;
9a76659d 2673
3bc25e52
KH
2674 if (str_multibyte)
2675 {
eb99a8dd 2676 FETCH_STRING_CHAR_ADVANCE_NO_CHECK (c, newtext, pos, pos_byte);
3bc25e52
KH
2677 if (!buf_multibyte)
2678 c = multibyte_char_to_unibyte (c, rev_tbl);
2679 }
2680 else
2681 {
2682 /* Note that we don't have to increment POS. */
5d69fe10 2683 c = SREF (newtext, pos_byte++);
3bc25e52
KH
2684 if (buf_multibyte)
2685 c = unibyte_char_to_multibyte (c);
2686 }
ac3b28b1 2687
68e69fbd
RS
2688 /* Either set ADD_STUFF and ADD_LEN to the text to put in SUBSTED,
2689 or set IDX to a match index, which means put that part
2690 of the buffer text into SUBSTED. */
2691
ca1d1d23
JB
2692 if (c == '\\')
2693 {
886ed6ec
RS
2694 really_changed = 1;
2695
3bc25e52
KH
2696 if (str_multibyte)
2697 {
eb99a8dd
KH
2698 FETCH_STRING_CHAR_ADVANCE_NO_CHECK (c, newtext,
2699 pos, pos_byte);
3bc25e52
KH
2700 if (!buf_multibyte && !SINGLE_BYTE_CHAR_P (c))
2701 c = multibyte_char_to_unibyte (c, rev_tbl);
2702 }
2703 else
2704 {
d5db4077 2705 c = SREF (newtext, pos_byte++);
3bc25e52
KH
2706 if (buf_multibyte)
2707 c = unibyte_char_to_multibyte (c);
2708 }
2709
ca1d1d23 2710 if (c == '&')
68e69fbd 2711 idx = sub;
78445046 2712 else if (c >= '1' && c <= '9' && c <= search_regs.num_regs + '0')
ca1d1d23
JB
2713 {
2714 if (search_regs.start[c - '0'] >= 1)
68e69fbd 2715 idx = c - '0';
ca1d1d23 2716 }
636a5e28 2717 else if (c == '\\')
68e69fbd 2718 add_len = 1, add_stuff = "\\";
636a5e28 2719 else
3bc25e52
KH
2720 {
2721 xfree (substed);
2722 error ("Invalid use of `\\' in replacement text");
2723 }
ca1d1d23
JB
2724 }
2725 else
68e69fbd
RS
2726 {
2727 add_len = CHAR_STRING (c, str);
2728 add_stuff = str;
2729 }
2730
2731 /* If we want to copy part of a previous match,
2732 set up ADD_STUFF and ADD_LEN to point to it. */
2733 if (idx >= 0)
2734 {
2735 int begbyte = CHAR_TO_BYTE (search_regs.start[idx]);
2736 add_len = CHAR_TO_BYTE (search_regs.end[idx]) - begbyte;
2737 if (search_regs.start[idx] < GPT && GPT < search_regs.end[idx])
2738 move_gap (search_regs.start[idx]);
2739 add_stuff = BYTE_POS_ADDR (begbyte);
2740 }
2741
2742 /* Now the stuff we want to add to SUBSTED
2743 is invariably ADD_LEN bytes starting at ADD_STUFF. */
2744
2745 /* Make sure SUBSTED is big enough. */
2746 if (substed_len + add_len >= substed_alloc_size)
2747 {
2748 substed_alloc_size = substed_len + add_len + 500;
2749 substed = (unsigned char *) xrealloc (substed,
2750 substed_alloc_size + 1);
2751 }
2752
2753 /* Now add to the end of SUBSTED. */
f8ce8a0d
GM
2754 if (add_stuff)
2755 {
2756 bcopy (add_stuff, substed + substed_len, add_len);
2757 substed_len += add_len;
2758 }
ca1d1d23 2759 }
68e69fbd 2760
886ed6ec 2761 if (really_changed)
80460525
KH
2762 {
2763 if (buf_multibyte)
2764 {
2765 int nchars = multibyte_chars_in_text (substed, substed_len);
68e69fbd 2766
80460525
KH
2767 newtext = make_multibyte_string (substed, nchars, substed_len);
2768 }
2769 else
2770 newtext = make_unibyte_string (substed, substed_len);
2771 }
68e69fbd 2772 xfree (substed);
ca1d1d23
JB
2773 }
2774
886ed6ec
RS
2775 /* Replace the old text with the new in the cleanest possible way. */
2776 replace_range (search_regs.start[sub], search_regs.end[sub],
2777 newtext, 1, 0, 1);
d5db4077 2778 newpoint = search_regs.start[sub] + SCHARS (newtext);
ca1d1d23
JB
2779
2780 if (case_action == all_caps)
886ed6ec
RS
2781 Fupcase_region (make_number (search_regs.start[sub]),
2782 make_number (newpoint));
ca1d1d23 2783 else if (case_action == cap_initial)
886ed6ec
RS
2784 Fupcase_initials_region (make_number (search_regs.start[sub]),
2785 make_number (newpoint));
3e18eecf 2786
98e942e0
RS
2787 /* Adjust search data for this change. */
2788 {
5b88a2c5 2789 int oldend = search_regs.end[sub];
41c01205 2790 int oldstart = search_regs.start[sub];
98e942e0
RS
2791 int change = newpoint - search_regs.end[sub];
2792 int i;
2793
2794 for (i = 0; i < search_regs.num_regs; i++)
2795 {
41c01205 2796 if (search_regs.start[i] >= oldend)
98e942e0 2797 search_regs.start[i] += change;
41c01205
DK
2798 else if (search_regs.start[i] > oldstart)
2799 search_regs.start[i] = oldstart;
2800 if (search_regs.end[i] >= oldend)
98e942e0 2801 search_regs.end[i] += change;
41c01205
DK
2802 else if (search_regs.end[i] > oldstart)
2803 search_regs.end[i] = oldstart;
98e942e0
RS
2804 }
2805 }
2806
b0eba991 2807 /* Put point back where it was in the text. */
8d808a65 2808 if (opoint <= 0)
fa8ed3e0 2809 TEMP_SET_PT (opoint + ZV);
b0eba991 2810 else
fa8ed3e0 2811 TEMP_SET_PT (opoint);
b0eba991
RS
2812
2813 /* Now move point "officially" to the start of the inserted replacement. */
3e18eecf 2814 move_if_not_intangible (newpoint);
177c0ea7 2815
ca1d1d23
JB
2816 return Qnil;
2817}
2818\f
2819static Lisp_Object
2820match_limit (num, beginningp)
2821 Lisp_Object num;
2822 int beginningp;
2823{
2824 register int n;
2825
b7826503 2826 CHECK_NUMBER (num);
ca1d1d23 2827 n = XINT (num);
f90a5bf5 2828 if (n < 0)
bd2cbd56 2829 args_out_of_range (num, make_number (0));
f90a5bf5
RS
2830 if (search_regs.num_regs <= 0)
2831 error ("No match data, because no search succeeded");
9b9ceb61 2832 if (n >= search_regs.num_regs
4746118a 2833 || search_regs.start[n] < 0)
ca1d1d23
JB
2834 return Qnil;
2835 return (make_number ((beginningp) ? search_regs.start[n]
2836 : search_regs.end[n]));
2837}
2838
2839DEFUN ("match-beginning", Fmatch_beginning, Smatch_beginning, 1, 1, 0,
8c1a1077
PJ
2840 doc: /* Return position of start of text matched by last search.
2841SUBEXP, a number, specifies which parenthesized expression in the last
2842 regexp.
2843Value is nil if SUBEXPth pair didn't match, or there were less than
2844 SUBEXP pairs.
2845Zero means the entire text matched by the whole regexp or whole string. */)
2846 (subexp)
5806161b 2847 Lisp_Object subexp;
ca1d1d23 2848{
5806161b 2849 return match_limit (subexp, 1);
ca1d1d23
JB
2850}
2851
2852DEFUN ("match-end", Fmatch_end, Smatch_end, 1, 1, 0,
8c1a1077
PJ
2853 doc: /* Return position of end of text matched by last search.
2854SUBEXP, a number, specifies which parenthesized expression in the last
2855 regexp.
2856Value is nil if SUBEXPth pair didn't match, or there were less than
2857 SUBEXP pairs.
2858Zero means the entire text matched by the whole regexp or whole string. */)
2859 (subexp)
5806161b 2860 Lisp_Object subexp;
ca1d1d23 2861{
5806161b 2862 return match_limit (subexp, 0);
177c0ea7 2863}
ca1d1d23 2864
abd0071c 2865DEFUN ("match-data", Fmatch_data, Smatch_data, 0, 3, 0,
8c1a1077
PJ
2866 doc: /* Return a list containing all info on what the last search matched.
2867Element 2N is `(match-beginning N)'; element 2N + 1 is `(match-end N)'.
2868All the elements are markers or nil (nil if the Nth pair didn't match)
2869if the last match was on a buffer; integers or nil if a string was matched.
2870Use `store-match-data' to reinstate the data in this list.
2871
41c01205
DK
2872If INTEGERS (the optional first argument) is non-nil, always use
2873integers \(rather than markers) to represent buffer positions. In
2874this case, and if the last match was in a buffer, the buffer will get
2875stored as one additional element at the end of the list.
2876
abd0071c
KS
2877If REUSE is a list, reuse it as part of the value. If REUSE is long
2878enough to hold all the values, and if INTEGERS is non-nil, no consing
2879is done.
2880
2881If optional third arg RESEAT is non-nil, any previous markers on the
2882REUSE list will be modified to point to nowhere.
2883
140a6b7e 2884Return value is undefined if the last search failed. */)
abd0071c
KS
2885 (integers, reuse, reseat)
2886 Lisp_Object integers, reuse, reseat;
ca1d1d23 2887{
56256c2a 2888 Lisp_Object tail, prev;
4746118a 2889 Lisp_Object *data;
ca1d1d23
JB
2890 int i, len;
2891
abd0071c
KS
2892 if (!NILP (reseat))
2893 for (tail = reuse; CONSP (tail); tail = XCDR (tail))
2894 if (MARKERP (XCAR (tail)))
2895 {
51f10faa 2896 unchain_marker (XMARKER (XCAR (tail)));
abd0071c
KS
2897 XSETCAR (tail, Qnil);
2898 }
2899
daa37602 2900 if (NILP (last_thing_searched))
c36bcf1b 2901 return Qnil;
daa37602 2902
6bbd7a29
GM
2903 prev = Qnil;
2904
41c01205 2905 data = (Lisp_Object *) alloca ((2 * search_regs.num_regs + 1)
4746118a
JB
2906 * sizeof (Lisp_Object));
2907
41c01205 2908 len = 0;
4746118a 2909 for (i = 0; i < search_regs.num_regs; i++)
ca1d1d23
JB
2910 {
2911 int start = search_regs.start[i];
2912 if (start >= 0)
2913 {
56256c2a
RS
2914 if (EQ (last_thing_searched, Qt)
2915 || ! NILP (integers))
ca1d1d23 2916 {
c235cce7
KH
2917 XSETFASTINT (data[2 * i], start);
2918 XSETFASTINT (data[2 * i + 1], search_regs.end[i]);
ca1d1d23 2919 }
0ed62dc7 2920 else if (BUFFERP (last_thing_searched))
ca1d1d23
JB
2921 {
2922 data[2 * i] = Fmake_marker ();
daa37602
JB
2923 Fset_marker (data[2 * i],
2924 make_number (start),
2925 last_thing_searched);
ca1d1d23
JB
2926 data[2 * i + 1] = Fmake_marker ();
2927 Fset_marker (data[2 * i + 1],
177c0ea7 2928 make_number (search_regs.end[i]),
daa37602 2929 last_thing_searched);
ca1d1d23 2930 }
daa37602
JB
2931 else
2932 /* last_thing_searched must always be Qt, a buffer, or Qnil. */
2933 abort ();
2934
abd0071c 2935 len = 2 * i + 2;
ca1d1d23
JB
2936 }
2937 else
abd0071c 2938 data[2 * i] = data[2 * i + 1] = Qnil;
ca1d1d23 2939 }
56256c2a 2940
bd2cbd56 2941 if (BUFFERP (last_thing_searched) && !NILP (integers))
41c01205 2942 {
bd2cbd56 2943 data[len] = last_thing_searched;
41c01205
DK
2944 len++;
2945 }
2946
56256c2a
RS
2947 /* If REUSE is not usable, cons up the values and return them. */
2948 if (! CONSP (reuse))
41c01205 2949 return Flist (len, data);
56256c2a
RS
2950
2951 /* If REUSE is a list, store as many value elements as will fit
2952 into the elements of REUSE. */
2953 for (i = 0, tail = reuse; CONSP (tail);
c1d497be 2954 i++, tail = XCDR (tail))
56256c2a 2955 {
41c01205 2956 if (i < len)
f3fbd155 2957 XSETCAR (tail, data[i]);
56256c2a 2958 else
f3fbd155 2959 XSETCAR (tail, Qnil);
56256c2a
RS
2960 prev = tail;
2961 }
2962
2963 /* If we couldn't fit all value elements into REUSE,
2964 cons up the rest of them and add them to the end of REUSE. */
41c01205
DK
2965 if (i < len)
2966 XSETCDR (prev, Flist (len - i, data + i));
56256c2a
RS
2967
2968 return reuse;
ca1d1d23
JB
2969}
2970
51f10faa
KS
2971/* Internal usage only:
2972 If RESEAT is `evaporate', put the markers back on the free list
2973 immediately. No other references to the markers must exist in this case,
2974 so it is used only internally on the unwind stack and save-match-data from
2975 Lisp. */
ca1d1d23 2976
abd0071c 2977DEFUN ("set-match-data", Fset_match_data, Sset_match_data, 1, 2, 0,
8c1a1077 2978 doc: /* Set internal data on last search match from elements of LIST.
abd0071c
KS
2979LIST should have been created by calling `match-data' previously.
2980
51f10faa 2981If optional arg RESEAT is non-nil, make markers on LIST point nowhere. */)
abd0071c
KS
2982 (list, reseat)
2983 register Lisp_Object list, reseat;
ca1d1d23
JB
2984{
2985 register int i;
2986 register Lisp_Object marker;
2987
7074fde6
FP
2988 if (running_asynch_code)
2989 save_search_regs ();
2990
29100cea 2991 CHECK_LIST (list);
ca1d1d23 2992
41c01205
DK
2993 /* Unless we find a marker with a buffer or an explicit buffer
2994 in LIST, assume that this match data came from a string. */
daa37602
JB
2995 last_thing_searched = Qt;
2996
4746118a
JB
2997 /* Allocate registers if they don't already exist. */
2998 {
d084e942 2999 int length = XFASTINT (Flength (list)) / 2;
4746118a
JB
3000
3001 if (length > search_regs.num_regs)
3002 {
1113d9db
JB
3003 if (search_regs.num_regs == 0)
3004 {
3005 search_regs.start
3006 = (regoff_t *) xmalloc (length * sizeof (regoff_t));
3007 search_regs.end
3008 = (regoff_t *) xmalloc (length * sizeof (regoff_t));
3009 }
4746118a 3010 else
1113d9db
JB
3011 {
3012 search_regs.start
3013 = (regoff_t *) xrealloc (search_regs.start,
3014 length * sizeof (regoff_t));
3015 search_regs.end
3016 = (regoff_t *) xrealloc (search_regs.end,
3017 length * sizeof (regoff_t));
3018 }
4746118a 3019
e62371e9
KH
3020 for (i = search_regs.num_regs; i < length; i++)
3021 search_regs.start[i] = -1;
3022
487282dc 3023 search_regs.num_regs = length;
4746118a 3024 }
ca1d1d23 3025
abd0071c 3026 for (i = 0; CONSP (list); i++)
41c01205 3027 {
abd0071c 3028 marker = XCAR (list);
bd2cbd56 3029 if (BUFFERP (marker))
c3762cbd 3030 {
bd2cbd56 3031 last_thing_searched = marker;
c3762cbd
DK
3032 break;
3033 }
3034 if (i >= length)
3035 break;
41c01205
DK
3036 if (NILP (marker))
3037 {
3038 search_regs.start[i] = -1;
abd0071c 3039 list = XCDR (list);
41c01205
DK
3040 }
3041 else
3042 {
3043 int from;
abd0071c 3044 Lisp_Object m;
e2811828 3045
abd0071c 3046 m = marker;
41c01205
DK
3047 if (MARKERP (marker))
3048 {
3049 if (XMARKER (marker)->buffer == 0)
3050 XSETFASTINT (marker, 0);
3051 else
3052 XSETBUFFER (last_thing_searched, XMARKER (marker)->buffer);
3053 }
e2811828 3054
41c01205
DK
3055 CHECK_NUMBER_COERCE_MARKER (marker);
3056 from = XINT (marker);
e2811828 3057
abd0071c
KS
3058 if (!NILP (reseat) && MARKERP (m))
3059 {
3060 if (EQ (reseat, Qevaporate))
3061 free_marker (m);
3062 else
3063 unchain_marker (XMARKER (m));
9ad54a7e 3064 XSETCAR (list, Qnil);
abd0071c
KS
3065 }
3066
3067 if ((list = XCDR (list), !CONSP (list)))
3068 break;
3069
3070 m = marker = XCAR (list);
3071
41c01205
DK
3072 if (MARKERP (marker) && XMARKER (marker)->buffer == 0)
3073 XSETFASTINT (marker, 0);
e2811828 3074
41c01205
DK
3075 CHECK_NUMBER_COERCE_MARKER (marker);
3076 search_regs.start[i] = from;
3077 search_regs.end[i] = XINT (marker);
abd0071c
KS
3078
3079 if (!NILP (reseat) && MARKERP (m))
3080 {
3081 if (EQ (reseat, Qevaporate))
3082 free_marker (m);
3083 else
3084 unchain_marker (XMARKER (m));
9ad54a7e 3085 XSETCAR (list, Qnil);
abd0071c 3086 }
41c01205 3087 }
abd0071c 3088 list = XCDR (list);
41c01205 3089 }
ca1d1d23 3090
41c01205
DK
3091 for (; i < search_regs.num_regs; i++)
3092 search_regs.start[i] = -1;
3093 }
ca1d1d23 3094
177c0ea7 3095 return Qnil;
ca1d1d23
JB
3096}
3097
7074fde6
FP
3098/* If non-zero the match data have been saved in saved_search_regs
3099 during the execution of a sentinel or filter. */
75ebf74b 3100static int search_regs_saved;
7074fde6 3101static struct re_registers saved_search_regs;
41c01205 3102static Lisp_Object saved_last_thing_searched;
7074fde6
FP
3103
3104/* Called from Flooking_at, Fstring_match, search_buffer, Fstore_match_data
3105 if asynchronous code (filter or sentinel) is running. */
3106static void
3107save_search_regs ()
3108{
3109 if (!search_regs_saved)
3110 {
3111 saved_search_regs.num_regs = search_regs.num_regs;
3112 saved_search_regs.start = search_regs.start;
3113 saved_search_regs.end = search_regs.end;
41c01205
DK
3114 saved_last_thing_searched = last_thing_searched;
3115 last_thing_searched = Qnil;
7074fde6 3116 search_regs.num_regs = 0;
2d4a771a
RS
3117 search_regs.start = 0;
3118 search_regs.end = 0;
7074fde6
FP
3119
3120 search_regs_saved = 1;
3121 }
3122}
3123
3124/* Called upon exit from filters and sentinels. */
3125void
abd0071c 3126restore_search_regs ()
7074fde6
FP
3127{
3128 if (search_regs_saved)
3129 {
3130 if (search_regs.num_regs > 0)
3131 {
3132 xfree (search_regs.start);
3133 xfree (search_regs.end);
3134 }
3135 search_regs.num_regs = saved_search_regs.num_regs;
3136 search_regs.start = saved_search_regs.start;
3137 search_regs.end = saved_search_regs.end;
41c01205
DK
3138 last_thing_searched = saved_last_thing_searched;
3139 saved_last_thing_searched = Qnil;
7074fde6
FP
3140 search_regs_saved = 0;
3141 }
3142}
3143
abd0071c
KS
3144static Lisp_Object
3145unwind_set_match_data (list)
3146 Lisp_Object list;
3147{
51f10faa 3148 /* It is safe to free (evaporate) the markers immediately. */
abd0071c
KS
3149 return Fset_match_data (list, Qevaporate);
3150}
3151
3152/* Called to unwind protect the match data. */
3153void
3154record_unwind_save_match_data ()
3155{
3156 record_unwind_protect (unwind_set_match_data,
3157 Fmatch_data (Qnil, Qnil, Qnil));
3158}
3159
ca1d1d23
JB
3160/* Quote a string to inactivate reg-expr chars */
3161
3162DEFUN ("regexp-quote", Fregexp_quote, Sregexp_quote, 1, 1, 0,
8c1a1077
PJ
3163 doc: /* Return a regexp string which matches exactly STRING and nothing else. */)
3164 (string)
5806161b 3165 Lisp_Object string;
ca1d1d23
JB
3166{
3167 register unsigned char *in, *out, *end;
3168 register unsigned char *temp;
0c8533c6 3169 int backslashes_added = 0;
ca1d1d23 3170
b7826503 3171 CHECK_STRING (string);
ca1d1d23 3172
d5db4077 3173 temp = (unsigned char *) alloca (SBYTES (string) * 2);
ca1d1d23
JB
3174
3175 /* Now copy the data into the new string, inserting escapes. */
3176
d5db4077
KR
3177 in = SDATA (string);
3178 end = in + SBYTES (string);
177c0ea7 3179 out = temp;
ca1d1d23
JB
3180
3181 for (; in != end; in++)
3182 {
66bc6082 3183 if (*in == '['
ca1d1d23
JB
3184 || *in == '*' || *in == '.' || *in == '\\'
3185 || *in == '?' || *in == '+'
3186 || *in == '^' || *in == '$')
0c8533c6 3187 *out++ = '\\', backslashes_added++;
ca1d1d23
JB
3188 *out++ = *in;
3189 }
3190
3f8100f1 3191 return make_specified_string (temp,
d5db4077 3192 SCHARS (string) + backslashes_added,
3f8100f1
RS
3193 out - temp,
3194 STRING_MULTIBYTE (string));
ca1d1d23 3195}
177c0ea7 3196\f
dfcf069d 3197void
ca1d1d23
JB
3198syms_of_search ()
3199{
3200 register int i;
3201
487282dc
KH
3202 for (i = 0; i < REGEXP_CACHE_SIZE; ++i)
3203 {
3204 searchbufs[i].buf.allocated = 100;
b23c0a83 3205 searchbufs[i].buf.buffer = (unsigned char *) xmalloc (100);
487282dc
KH
3206 searchbufs[i].buf.fastmap = searchbufs[i].fastmap;
3207 searchbufs[i].regexp = Qnil;
ecdb561e 3208 searchbufs[i].whitespace_regexp = Qnil;
b69e3c18 3209 searchbufs[i].syntax_table = Qnil;
487282dc 3210 staticpro (&searchbufs[i].regexp);
aa77b5ce 3211 staticpro (&searchbufs[i].whitespace_regexp);
b69e3c18 3212 staticpro (&searchbufs[i].syntax_table);
487282dc
KH
3213 searchbufs[i].next = (i == REGEXP_CACHE_SIZE-1 ? 0 : &searchbufs[i+1]);
3214 }
3215 searchbuf_head = &searchbufs[0];
ca1d1d23
JB
3216
3217 Qsearch_failed = intern ("search-failed");
3218 staticpro (&Qsearch_failed);
3219 Qinvalid_regexp = intern ("invalid-regexp");
3220 staticpro (&Qinvalid_regexp);
3221
3222 Fput (Qsearch_failed, Qerror_conditions,
3223 Fcons (Qsearch_failed, Fcons (Qerror, Qnil)));
3224 Fput (Qsearch_failed, Qerror_message,
3225 build_string ("Search failed"));
3226
3227 Fput (Qinvalid_regexp, Qerror_conditions,
3228 Fcons (Qinvalid_regexp, Fcons (Qerror, Qnil)));
3229 Fput (Qinvalid_regexp, Qerror_message,
3230 build_string ("Invalid regexp"));
3231
daa37602
JB
3232 last_thing_searched = Qnil;
3233 staticpro (&last_thing_searched);
3234
0f6af254
DK
3235 saved_last_thing_searched = Qnil;
3236 staticpro (&saved_last_thing_searched);
3237
41a33295 3238 DEFVAR_LISP ("search-spaces-regexp", &Vsearch_spaces_regexp,
e2811828 3239 doc: /* Regexp to substitute for bunches of spaces in regexp search.
f31a9a68
RS
3240Some commands use this for user-specified regexps.
3241Spaces that occur inside character classes or repetition operators
3242or other such regexp constructs are not replaced with this.
3243A value of nil (which is the normal value) means treat spaces literally. */);
41a33295 3244 Vsearch_spaces_regexp = Qnil;
f31a9a68 3245
ef887810
RS
3246 DEFVAR_LISP ("inhibit-changing-match-data", &Vinhibit_changing_match_data,
3247 doc: /* Internal use only.
3248If non-nil, the match data will not be changed during call to searching or
3249matching functions, such as `looking-at', `string-match', `re-search-forward'
3250etc. */);
3251 Vinhibit_changing_match_data = Qnil;
3252
ca1d1d23 3253 defsubr (&Slooking_at);
b819a390
RS
3254 defsubr (&Sposix_looking_at);
3255 defsubr (&Sstring_match);
3256 defsubr (&Sposix_string_match);
ca1d1d23
JB
3257 defsubr (&Ssearch_forward);
3258 defsubr (&Ssearch_backward);
3259 defsubr (&Sword_search_forward);
3260 defsubr (&Sword_search_backward);
3261 defsubr (&Sre_search_forward);
3262 defsubr (&Sre_search_backward);
b819a390
RS
3263 defsubr (&Sposix_search_forward);
3264 defsubr (&Sposix_search_backward);
ca1d1d23
JB
3265 defsubr (&Sreplace_match);
3266 defsubr (&Smatch_beginning);
3267 defsubr (&Smatch_end);
3268 defsubr (&Smatch_data);
3f1c005b 3269 defsubr (&Sset_match_data);
ca1d1d23
JB
3270 defsubr (&Sregexp_quote);
3271}
ab5796a9
MB
3272
3273/* arch-tag: a6059d79-0552-4f14-a2cb-d379a4e3c78f
3274 (do not change this comment) */