Silence some byte-compiler warnings in ERC
[bpt/emacs.git] / src / search.c
CommitLineData
ca1d1d23 1/* String search routines for GNU Emacs.
429ab54e
GM
2 Copyright (C) 1985, 1986, 1987, 1993, 1994, 1997, 1998, 1999, 2001, 2002,
3 2003, 2004, 2005, 2006, 2007 Free Software Foundation, Inc.
ca1d1d23
JB
4
5This file is part of GNU Emacs.
6
7GNU Emacs is free software; you can redistribute it and/or modify
8it under the terms of the GNU General Public License as published by
684d6f5b 9the Free Software Foundation; either version 3, or (at your option)
ca1d1d23
JB
10any later version.
11
12GNU Emacs is distributed in the hope that it will be useful,
13but WITHOUT ANY WARRANTY; without even the implied warranty of
14MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15GNU General Public License for more details.
16
17You should have received a copy of the GNU General Public License
18along with GNU Emacs; see the file COPYING. If not, write to
4fc5845f
LK
19the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
20Boston, MA 02110-1301, USA. */
ca1d1d23
JB
21
22
18160b98 23#include <config.h>
ca1d1d23
JB
24#include "lisp.h"
25#include "syntax.h"
5679531d 26#include "category.h"
ca1d1d23 27#include "buffer.h"
5679531d 28#include "charset.h"
9169c321 29#include "region-cache.h"
ca1d1d23 30#include "commands.h"
9ac0d9e0 31#include "blockinput.h"
bf1760bb 32#include "intervals.h"
4746118a 33
ca1d1d23
JB
34#include <sys/types.h>
35#include "regex.h"
36
1d288aef 37#define REGEXP_CACHE_SIZE 20
ca1d1d23 38
487282dc
KH
39/* If the regexp is non-nil, then the buffer contains the compiled form
40 of that regexp, suitable for searching. */
1d288aef
RS
41struct regexp_cache
42{
487282dc 43 struct regexp_cache *next;
ecdb561e 44 Lisp_Object regexp, whitespace_regexp;
b69e3c18 45 /* Syntax table for which the regexp applies. We need this because
58e95211
SM
46 of character classes. If this is t, then the compiled pattern is valid
47 for any syntax-table. */
b69e3c18 48 Lisp_Object syntax_table;
487282dc
KH
49 struct re_pattern_buffer buf;
50 char fastmap[0400];
b819a390
RS
51 /* Nonzero means regexp was compiled to do full POSIX backtracking. */
52 char posix;
487282dc 53};
ca1d1d23 54
487282dc
KH
55/* The instances of that struct. */
56struct regexp_cache searchbufs[REGEXP_CACHE_SIZE];
ca1d1d23 57
487282dc
KH
58/* The head of the linked list; points to the most recently used buffer. */
59struct regexp_cache *searchbuf_head;
ca1d1d23 60
ca1d1d23 61
4746118a
JB
62/* Every call to re_match, etc., must pass &search_regs as the regs
63 argument unless you can show it is unnecessary (i.e., if re_match
64 is certainly going to be called again before region-around-match
65 can be called).
66
67 Since the registers are now dynamically allocated, we need to make
68 sure not to refer to the Nth register before checking that it has
1113d9db
JB
69 been allocated by checking search_regs.num_regs.
70
71 The regex code keeps track of whether it has allocated the search
487282dc
KH
72 buffer using bits in the re_pattern_buffer. This means that whenever
73 you compile a new pattern, it completely forgets whether it has
1113d9db
JB
74 allocated any registers, and will allocate new registers the next
75 time you call a searching or matching function. Therefore, we need
76 to call re_set_registers after compiling a new pattern or after
77 setting the match registers, so that the regex functions will be
78 able to free or re-allocate it properly. */
ca1d1d23
JB
79static struct re_registers search_regs;
80
daa37602
JB
81/* The buffer in which the last search was performed, or
82 Qt if the last search was done in a string;
83 Qnil if no searching has been done yet. */
84static Lisp_Object last_thing_searched;
ca1d1d23 85
8e6208c5 86/* error condition signaled when regexp compile_pattern fails */
ca1d1d23
JB
87
88Lisp_Object Qinvalid_regexp;
89
06f77a8a
KS
90/* Error condition used for failing searches */
91Lisp_Object Qsearch_failed;
92
41a33295 93Lisp_Object Vsearch_spaces_regexp;
f31a9a68 94
ef887810
RS
95/* If non-nil, the match data will not be changed during call to
96 searching or matching functions. This variable is for internal use
97 only. */
98Lisp_Object Vinhibit_changing_match_data;
99
ca325161 100static void set_search_regs ();
044f81f1 101static void save_search_regs ();
facdc750
RS
102static int simple_search ();
103static int boyer_moore ();
b819a390 104static int search_buffer ();
01d09305 105static void matcher_overflow () NO_RETURN;
b819a390 106
ca1d1d23
JB
107static void
108matcher_overflow ()
109{
110 error ("Stack overflow in regexp matcher");
111}
112
b819a390
RS
113/* Compile a regexp and signal a Lisp error if anything goes wrong.
114 PATTERN is the pattern to compile.
115 CP is the place to put the result.
facdc750 116 TRANSLATE is a translation table for ignoring case, or nil for none.
b819a390
RS
117 REGP is the structure that says where to store the "register"
118 values that will result from matching this pattern.
119 If it is 0, we should compile the pattern not to record any
120 subexpression bounds.
121 POSIX is nonzero if we want full backtracking (POSIX style)
5679531d
KH
122 for this pattern. 0 means backtrack only enough to get a valid match.
123 MULTIBYTE is nonzero if we want to handle multibyte characters in
124 PATTERN. 0 means all multibyte characters are recognized just as
ecdb561e
RS
125 sequences of binary data.
126
41a33295 127 The behavior also depends on Vsearch_spaces_regexp. */
ca1d1d23 128
487282dc 129static void
5679531d 130compile_pattern_1 (cp, pattern, translate, regp, posix, multibyte)
487282dc 131 struct regexp_cache *cp;
ca1d1d23 132 Lisp_Object pattern;
facdc750 133 Lisp_Object translate;
487282dc 134 struct re_registers *regp;
b819a390 135 int posix;
5679531d 136 int multibyte;
ca1d1d23 137{
7276d3d8 138 unsigned char *raw_pattern;
f8bd51c4 139 int raw_pattern_size;
d451e4db 140 char *val;
b819a390 141 reg_syntax_t old;
ca1d1d23 142
f8bd51c4
KH
143 /* MULTIBYTE says whether the text to be searched is multibyte.
144 We must convert PATTERN to match that, or we will not really
145 find things right. */
146
147 if (multibyte == STRING_MULTIBYTE (pattern))
148 {
d5db4077
KR
149 raw_pattern = (unsigned char *) SDATA (pattern);
150 raw_pattern_size = SBYTES (pattern);
f8bd51c4
KH
151 }
152 else if (multibyte)
153 {
d5db4077
KR
154 raw_pattern_size = count_size_as_multibyte (SDATA (pattern),
155 SCHARS (pattern));
7276d3d8 156 raw_pattern = (unsigned char *) alloca (raw_pattern_size + 1);
d5db4077
KR
157 copy_text (SDATA (pattern), raw_pattern,
158 SCHARS (pattern), 0, 1);
f8bd51c4
KH
159 }
160 else
161 {
162 /* Converting multibyte to single-byte.
163
164 ??? Perhaps this conversion should be done in a special way
165 by subtracting nonascii-insert-offset from each non-ASCII char,
166 so that only the multibyte chars which really correspond to
167 the chosen single-byte character set can possibly match. */
d5db4077 168 raw_pattern_size = SCHARS (pattern);
7276d3d8 169 raw_pattern = (unsigned char *) alloca (raw_pattern_size + 1);
d5db4077
KR
170 copy_text (SDATA (pattern), raw_pattern,
171 SBYTES (pattern), 1, 0);
f8bd51c4
KH
172 }
173
487282dc 174 cp->regexp = Qnil;
59fab369 175 cp->buf.translate = (! NILP (translate) ? translate : make_number (0));
b819a390 176 cp->posix = posix;
5679531d 177 cp->buf.multibyte = multibyte;
41a33295 178 cp->whitespace_regexp = Vsearch_spaces_regexp;
e7b2dd2e
RS
179 /* rms: I think BLOCK_INPUT is not needed here any more,
180 because regex.c defines malloc to call xmalloc.
181 Using BLOCK_INPUT here means the debugger won't run if an error occurs.
182 So let's turn it off. */
183 /* BLOCK_INPUT; */
fb4a568d 184 old = re_set_syntax (RE_SYNTAX_EMACS
b819a390 185 | (posix ? 0 : RE_NO_POSIX_BACKTRACKING));
f31a9a68 186
41a33295
RS
187 re_set_whitespace_regexp (NILP (Vsearch_spaces_regexp) ? NULL
188 : SDATA (Vsearch_spaces_regexp));
f31a9a68 189
7276d3d8
RS
190 val = (char *) re_compile_pattern ((char *)raw_pattern,
191 raw_pattern_size, &cp->buf);
f31a9a68 192
58e95211
SM
193 /* If the compiled pattern hard codes some of the contents of the
194 syntax-table, it can only be reused with *this* syntax table. */
195 cp->syntax_table = cp->buf.used_syntax ? current_buffer->syntax_table : Qt;
196
f31a9a68
RS
197 re_set_whitespace_regexp (NULL);
198
b819a390 199 re_set_syntax (old);
e7b2dd2e 200 /* UNBLOCK_INPUT; */
ca1d1d23 201 if (val)
06f77a8a 202 xsignal1 (Qinvalid_regexp, build_string (val));
1113d9db 203
487282dc 204 cp->regexp = Fcopy_sequence (pattern);
487282dc
KH
205}
206
6efc7887
RS
207/* Shrink each compiled regexp buffer in the cache
208 to the size actually used right now.
209 This is called from garbage collection. */
210
211void
212shrink_regexp_cache ()
213{
a968f437 214 struct regexp_cache *cp;
6efc7887
RS
215
216 for (cp = searchbuf_head; cp != 0; cp = cp->next)
217 {
218 cp->buf.allocated = cp->buf.used;
219 cp->buf.buffer
b23c0a83 220 = (unsigned char *) xrealloc (cp->buf.buffer, cp->buf.used);
6efc7887
RS
221 }
222}
223
54dd3310
SM
224/* Clear the regexp cache w.r.t. a particular syntax table,
225 because it was changed.
e5b94d44
CY
226 There is no danger of memory leak here because re_compile_pattern
227 automagically manages the memory in each re_pattern_buffer struct,
228 based on its `allocated' and `buffer' values. */
229void
230clear_regexp_cache ()
231{
232 int i;
233
234 for (i = 0; i < REGEXP_CACHE_SIZE; ++i)
54dd3310
SM
235 /* It's tempting to compare with the syntax-table we've actually changd,
236 but it's not sufficient because char-table inheritance mewans that
237 modifying one syntax-table can change others at the same time. */
238 if (!EQ (searchbufs[i].syntax_table, Qt))
239 searchbufs[i].regexp = Qnil;
e5b94d44
CY
240}
241
487282dc 242/* Compile a regexp if necessary, but first check to see if there's one in
b819a390
RS
243 the cache.
244 PATTERN is the pattern to compile.
facdc750 245 TRANSLATE is a translation table for ignoring case, or nil for none.
b819a390
RS
246 REGP is the structure that says where to store the "register"
247 values that will result from matching this pattern.
248 If it is 0, we should compile the pattern not to record any
249 subexpression bounds.
250 POSIX is nonzero if we want full backtracking (POSIX style)
251 for this pattern. 0 means backtrack only enough to get a valid match. */
487282dc
KH
252
253struct re_pattern_buffer *
0c8533c6 254compile_pattern (pattern, regp, translate, posix, multibyte)
487282dc
KH
255 Lisp_Object pattern;
256 struct re_registers *regp;
facdc750 257 Lisp_Object translate;
0c8533c6 258 int posix, multibyte;
487282dc
KH
259{
260 struct regexp_cache *cp, **cpp;
261
262 for (cpp = &searchbuf_head; ; cpp = &cp->next)
263 {
264 cp = *cpp;
f1b9c7c1
KR
265 /* Entries are initialized to nil, and may be set to nil by
266 compile_pattern_1 if the pattern isn't valid. Don't apply
49a5f770
KR
267 string accessors in those cases. However, compile_pattern_1
268 is only applied to the cache entry we pick here to reuse. So
269 nil should never appear before a non-nil entry. */
7c752c80 270 if (NILP (cp->regexp))
f1b9c7c1 271 goto compile_it;
d5db4077 272 if (SCHARS (cp->regexp) == SCHARS (pattern)
cf69b13e 273 && STRING_MULTIBYTE (cp->regexp) == STRING_MULTIBYTE (pattern)
1d288aef 274 && !NILP (Fstring_equal (cp->regexp, pattern))
59fab369 275 && EQ (cp->buf.translate, (! NILP (translate) ? translate : make_number (0)))
5679531d 276 && cp->posix == posix
ecdb561e 277 && cp->buf.multibyte == multibyte
58e95211
SM
278 && (EQ (cp->syntax_table, Qt)
279 || EQ (cp->syntax_table, current_buffer->syntax_table))
41a33295 280 && !NILP (Fequal (cp->whitespace_regexp, Vsearch_spaces_regexp)))
487282dc
KH
281 break;
282
f1b9c7c1
KR
283 /* If we're at the end of the cache, compile into the nil cell
284 we found, or the last (least recently used) cell with a
285 string value. */
487282dc
KH
286 if (cp->next == 0)
287 {
f1b9c7c1 288 compile_it:
5679531d 289 compile_pattern_1 (cp, pattern, translate, regp, posix, multibyte);
487282dc
KH
290 break;
291 }
292 }
293
294 /* When we get here, cp (aka *cpp) contains the compiled pattern,
295 either because we found it in the cache or because we just compiled it.
296 Move it to the front of the queue to mark it as most recently used. */
297 *cpp = cp->next;
298 cp->next = searchbuf_head;
299 searchbuf_head = cp;
1113d9db 300
6639708c
RS
301 /* Advise the searching functions about the space we have allocated
302 for register data. */
303 if (regp)
304 re_set_registers (&cp->buf, regp, regp->num_regs, regp->start, regp->end);
305
487282dc 306 return &cp->buf;
ca1d1d23
JB
307}
308
ca1d1d23 309\f
b819a390
RS
310static Lisp_Object
311looking_at_1 (string, posix)
ca1d1d23 312 Lisp_Object string;
b819a390 313 int posix;
ca1d1d23
JB
314{
315 Lisp_Object val;
316 unsigned char *p1, *p2;
317 int s1, s2;
318 register int i;
487282dc 319 struct re_pattern_buffer *bufp;
ca1d1d23 320
7074fde6
FP
321 if (running_asynch_code)
322 save_search_regs ();
323
910c747a
RS
324 /* This is so set_image_of_range_1 in regex.c can find the EQV table. */
325 XCHAR_TABLE (current_buffer->case_canon_table)->extras[2]
326 = current_buffer->case_eqv_table;
327
b7826503 328 CHECK_STRING (string);
ef887810
RS
329 bufp = compile_pattern (string,
330 (NILP (Vinhibit_changing_match_data)
331 ? &search_regs : NULL),
487282dc 332 (!NILP (current_buffer->case_fold_search)
0190922f 333 ? current_buffer->case_canon_table : Qnil),
0c8533c6
RS
334 posix,
335 !NILP (current_buffer->enable_multibyte_characters));
ca1d1d23
JB
336
337 immediate_quit = 1;
338 QUIT; /* Do a pending quit right away, to avoid paradoxical behavior */
339
340 /* Get pointers and sizes of the two strings
341 that make up the visible portion of the buffer. */
342
343 p1 = BEGV_ADDR;
fa8ed3e0 344 s1 = GPT_BYTE - BEGV_BYTE;
ca1d1d23 345 p2 = GAP_END_ADDR;
fa8ed3e0 346 s2 = ZV_BYTE - GPT_BYTE;
ca1d1d23
JB
347 if (s1 < 0)
348 {
349 p2 = p1;
fa8ed3e0 350 s2 = ZV_BYTE - BEGV_BYTE;
ca1d1d23
JB
351 s1 = 0;
352 }
353 if (s2 < 0)
354 {
fa8ed3e0 355 s1 = ZV_BYTE - BEGV_BYTE;
ca1d1d23
JB
356 s2 = 0;
357 }
8bb43c28
RS
358
359 re_match_object = Qnil;
177c0ea7 360
487282dc 361 i = re_match_2 (bufp, (char *) p1, s1, (char *) p2, s2,
ef887810
RS
362 PT_BYTE - BEGV_BYTE,
363 (NILP (Vinhibit_changing_match_data)
364 ? &search_regs : NULL),
fa8ed3e0 365 ZV_BYTE - BEGV_BYTE);
de182d70 366 immediate_quit = 0;
177c0ea7 367
ca1d1d23
JB
368 if (i == -2)
369 matcher_overflow ();
370
371 val = (0 <= i ? Qt : Qnil);
ef887810 372 if (NILP (Vinhibit_changing_match_data) && i >= 0)
fa8ed3e0
RS
373 for (i = 0; i < search_regs.num_regs; i++)
374 if (search_regs.start[i] >= 0)
375 {
376 search_regs.start[i]
377 = BYTE_TO_CHAR (search_regs.start[i] + BEGV_BYTE);
378 search_regs.end[i]
379 = BYTE_TO_CHAR (search_regs.end[i] + BEGV_BYTE);
380 }
ef887810
RS
381
382 /* Set last_thing_searched only when match data is changed. */
383 if (NILP (Vinhibit_changing_match_data))
384 XSETBUFFER (last_thing_searched, current_buffer);
385
ca1d1d23
JB
386 return val;
387}
388
b819a390 389DEFUN ("looking-at", Flooking_at, Slooking_at, 1, 1, 0,
8c1a1077
PJ
390 doc: /* Return t if text after point matches regular expression REGEXP.
391This function modifies the match data that `match-beginning',
392`match-end' and `match-data' access; save and restore the match
393data if you want to preserve them. */)
394 (regexp)
94f94972 395 Lisp_Object regexp;
b819a390 396{
94f94972 397 return looking_at_1 (regexp, 0);
b819a390
RS
398}
399
400DEFUN ("posix-looking-at", Fposix_looking_at, Sposix_looking_at, 1, 1, 0,
8c1a1077
PJ
401 doc: /* Return t if text after point matches regular expression REGEXP.
402Find the longest match, in accord with Posix regular expression rules.
403This function modifies the match data that `match-beginning',
404`match-end' and `match-data' access; save and restore the match
405data if you want to preserve them. */)
406 (regexp)
94f94972 407 Lisp_Object regexp;
b819a390 408{
94f94972 409 return looking_at_1 (regexp, 1);
b819a390
RS
410}
411\f
412static Lisp_Object
413string_match_1 (regexp, string, start, posix)
ca1d1d23 414 Lisp_Object regexp, string, start;
b819a390 415 int posix;
ca1d1d23
JB
416{
417 int val;
487282dc 418 struct re_pattern_buffer *bufp;
0c8533c6
RS
419 int pos, pos_byte;
420 int i;
ca1d1d23 421
7074fde6
FP
422 if (running_asynch_code)
423 save_search_regs ();
424
b7826503
PJ
425 CHECK_STRING (regexp);
426 CHECK_STRING (string);
ca1d1d23
JB
427
428 if (NILP (start))
0c8533c6 429 pos = 0, pos_byte = 0;
ca1d1d23
JB
430 else
431 {
d5db4077 432 int len = SCHARS (string);
ca1d1d23 433
b7826503 434 CHECK_NUMBER (start);
0c8533c6
RS
435 pos = XINT (start);
436 if (pos < 0 && -pos <= len)
437 pos = len + pos;
438 else if (0 > pos || pos > len)
ca1d1d23 439 args_out_of_range (string, start);
0c8533c6 440 pos_byte = string_char_to_byte (string, pos);
ca1d1d23
JB
441 }
442
910c747a
RS
443 /* This is so set_image_of_range_1 in regex.c can find the EQV table. */
444 XCHAR_TABLE (current_buffer->case_canon_table)->extras[2]
445 = current_buffer->case_eqv_table;
446
ef887810
RS
447 bufp = compile_pattern (regexp,
448 (NILP (Vinhibit_changing_match_data)
449 ? &search_regs : NULL),
487282dc 450 (!NILP (current_buffer->case_fold_search)
0190922f 451 ? current_buffer->case_canon_table : Qnil),
0c8533c6
RS
452 posix,
453 STRING_MULTIBYTE (string));
ca1d1d23 454 immediate_quit = 1;
8bb43c28 455 re_match_object = string;
177c0ea7 456
d5db4077
KR
457 val = re_search (bufp, (char *) SDATA (string),
458 SBYTES (string), pos_byte,
459 SBYTES (string) - pos_byte,
ef887810
RS
460 (NILP (Vinhibit_changing_match_data)
461 ? &search_regs : NULL));
ca1d1d23 462 immediate_quit = 0;
ef887810
RS
463
464 /* Set last_thing_searched only when match data is changed. */
465 if (NILP (Vinhibit_changing_match_data))
466 last_thing_searched = Qt;
467
ca1d1d23
JB
468 if (val == -2)
469 matcher_overflow ();
470 if (val < 0) return Qnil;
0c8533c6 471
ef887810
RS
472 if (NILP (Vinhibit_changing_match_data))
473 for (i = 0; i < search_regs.num_regs; i++)
474 if (search_regs.start[i] >= 0)
475 {
476 search_regs.start[i]
477 = string_byte_to_char (string, search_regs.start[i]);
478 search_regs.end[i]
479 = string_byte_to_char (string, search_regs.end[i]);
480 }
0c8533c6
RS
481
482 return make_number (string_byte_to_char (string, val));
ca1d1d23 483}
e59a8453 484
b819a390 485DEFUN ("string-match", Fstring_match, Sstring_match, 2, 3, 0,
8c1a1077 486 doc: /* Return index of start of first match for REGEXP in STRING, or nil.
b85acc4b 487Matching ignores case if `case-fold-search' is non-nil.
8c1a1077
PJ
488If third arg START is non-nil, start search at that index in STRING.
489For index of first char beyond the match, do (match-end 0).
490`match-end' and `match-beginning' also give indices of substrings
2bd2f32d
RS
491matched by parenthesis constructs in the pattern.
492
493You can use the function `match-string' to extract the substrings
494matched by the parenthesis constructions in REGEXP. */)
8c1a1077 495 (regexp, string, start)
b819a390
RS
496 Lisp_Object regexp, string, start;
497{
498 return string_match_1 (regexp, string, start, 0);
499}
500
501DEFUN ("posix-string-match", Fposix_string_match, Sposix_string_match, 2, 3, 0,
8c1a1077
PJ
502 doc: /* Return index of start of first match for REGEXP in STRING, or nil.
503Find the longest match, in accord with Posix regular expression rules.
504Case is ignored if `case-fold-search' is non-nil in the current buffer.
505If third arg START is non-nil, start search at that index in STRING.
506For index of first char beyond the match, do (match-end 0).
507`match-end' and `match-beginning' also give indices of substrings
508matched by parenthesis constructs in the pattern. */)
509 (regexp, string, start)
b819a390
RS
510 Lisp_Object regexp, string, start;
511{
512 return string_match_1 (regexp, string, start, 1);
513}
514
e59a8453
RS
515/* Match REGEXP against STRING, searching all of STRING,
516 and return the index of the match, or negative on failure.
517 This does not clobber the match data. */
518
519int
520fast_string_match (regexp, string)
521 Lisp_Object regexp, string;
522{
523 int val;
487282dc 524 struct re_pattern_buffer *bufp;
e59a8453 525
facdc750
RS
526 bufp = compile_pattern (regexp, 0, Qnil,
527 0, STRING_MULTIBYTE (string));
e59a8453 528 immediate_quit = 1;
8bb43c28 529 re_match_object = string;
177c0ea7 530
d5db4077
KR
531 val = re_search (bufp, (char *) SDATA (string),
532 SBYTES (string), 0,
533 SBYTES (string), 0);
e59a8453
RS
534 immediate_quit = 0;
535 return val;
536}
5679531d
KH
537
538/* Match REGEXP against STRING, searching all of STRING ignoring case,
539 and return the index of the match, or negative on failure.
0c8533c6
RS
540 This does not clobber the match data.
541 We assume that STRING contains single-byte characters. */
5679531d
KH
542
543extern Lisp_Object Vascii_downcase_table;
544
545int
b4577c63 546fast_c_string_match_ignore_case (regexp, string)
5679531d 547 Lisp_Object regexp;
96b80561 548 const char *string;
5679531d
KH
549{
550 int val;
551 struct re_pattern_buffer *bufp;
552 int len = strlen (string);
553
0c8533c6 554 regexp = string_make_unibyte (regexp);
b4577c63 555 re_match_object = Qt;
5679531d 556 bufp = compile_pattern (regexp, 0,
0190922f 557 Vascii_canon_table, 0,
f8bd51c4 558 0);
5679531d
KH
559 immediate_quit = 1;
560 val = re_search (bufp, string, len, 0, len, 0);
561 immediate_quit = 0;
562 return val;
563}
be5f4dfb
KH
564
565/* Like fast_string_match but ignore case. */
566
567int
568fast_string_match_ignore_case (regexp, string)
569 Lisp_Object regexp, string;
570{
571 int val;
572 struct re_pattern_buffer *bufp;
573
0190922f 574 bufp = compile_pattern (regexp, 0, Vascii_canon_table,
be5f4dfb
KH
575 0, STRING_MULTIBYTE (string));
576 immediate_quit = 1;
577 re_match_object = string;
578
579 val = re_search (bufp, (char *) SDATA (string),
580 SBYTES (string), 0,
581 SBYTES (string), 0);
582 immediate_quit = 0;
583 return val;
584}
ca1d1d23 585\f
9169c321
JB
586/* The newline cache: remembering which sections of text have no newlines. */
587
588/* If the user has requested newline caching, make sure it's on.
589 Otherwise, make sure it's off.
590 This is our cheezy way of associating an action with the change of
591 state of a buffer-local variable. */
592static void
593newline_cache_on_off (buf)
594 struct buffer *buf;
595{
596 if (NILP (buf->cache_long_line_scans))
597 {
598 /* It should be off. */
599 if (buf->newline_cache)
600 {
601 free_region_cache (buf->newline_cache);
602 buf->newline_cache = 0;
603 }
604 }
605 else
606 {
607 /* It should be on. */
608 if (buf->newline_cache == 0)
609 buf->newline_cache = new_region_cache ();
610 }
611}
612
613\f
614/* Search for COUNT instances of the character TARGET between START and END.
615
616 If COUNT is positive, search forwards; END must be >= START.
617 If COUNT is negative, search backwards for the -COUNTth instance;
618 END must be <= START.
619 If COUNT is zero, do anything you please; run rogue, for all I care.
620
621 If END is zero, use BEGV or ZV instead, as appropriate for the
622 direction indicated by COUNT.
ffd56f97
JB
623
624 If we find COUNT instances, set *SHORTAGE to zero, and return the
a9f2a45f 625 position past the COUNTth match. Note that for reverse motion
5bfe95c9 626 this is not the same as the usual convention for Emacs motion commands.
ffd56f97 627
9169c321
JB
628 If we don't find COUNT instances before reaching END, set *SHORTAGE
629 to the number of TARGETs left unfound, and return END.
ffd56f97 630
087a5f81
RS
631 If ALLOW_QUIT is non-zero, set immediate_quit. That's good to do
632 except when inside redisplay. */
633
dfcf069d 634int
9169c321
JB
635scan_buffer (target, start, end, count, shortage, allow_quit)
636 register int target;
637 int start, end;
638 int count;
639 int *shortage;
087a5f81 640 int allow_quit;
ca1d1d23 641{
9169c321 642 struct region_cache *newline_cache;
177c0ea7 643 int direction;
ffd56f97 644
9169c321
JB
645 if (count > 0)
646 {
647 direction = 1;
648 if (! end) end = ZV;
649 }
650 else
651 {
652 direction = -1;
653 if (! end) end = BEGV;
654 }
ffd56f97 655
9169c321
JB
656 newline_cache_on_off (current_buffer);
657 newline_cache = current_buffer->newline_cache;
ca1d1d23
JB
658
659 if (shortage != 0)
660 *shortage = 0;
661
087a5f81 662 immediate_quit = allow_quit;
ca1d1d23 663
ffd56f97 664 if (count > 0)
9169c321 665 while (start != end)
ca1d1d23 666 {
9169c321
JB
667 /* Our innermost scanning loop is very simple; it doesn't know
668 about gaps, buffer ends, or the newline cache. ceiling is
669 the position of the last character before the next such
670 obstacle --- the last character the dumb search loop should
671 examine. */
fa8ed3e0
RS
672 int ceiling_byte = CHAR_TO_BYTE (end) - 1;
673 int start_byte = CHAR_TO_BYTE (start);
67ce527d 674 int tem;
9169c321
JB
675
676 /* If we're looking for a newline, consult the newline cache
677 to see where we can avoid some scanning. */
678 if (target == '\n' && newline_cache)
679 {
680 int next_change;
681 immediate_quit = 0;
682 while (region_cache_forward
fa8ed3e0
RS
683 (current_buffer, newline_cache, start_byte, &next_change))
684 start_byte = next_change;
cbe0db0d 685 immediate_quit = allow_quit;
9169c321 686
fa8ed3e0
RS
687 /* START should never be after END. */
688 if (start_byte > ceiling_byte)
689 start_byte = ceiling_byte;
9169c321
JB
690
691 /* Now the text after start is an unknown region, and
692 next_change is the position of the next known region. */
fa8ed3e0 693 ceiling_byte = min (next_change - 1, ceiling_byte);
9169c321
JB
694 }
695
696 /* The dumb loop can only scan text stored in contiguous
697 bytes. BUFFER_CEILING_OF returns the last character
698 position that is contiguous, so the ceiling is the
699 position after that. */
67ce527d
KH
700 tem = BUFFER_CEILING_OF (start_byte);
701 ceiling_byte = min (tem, ceiling_byte);
9169c321
JB
702
703 {
177c0ea7 704 /* The termination address of the dumb loop. */
fa8ed3e0
RS
705 register unsigned char *ceiling_addr
706 = BYTE_POS_ADDR (ceiling_byte) + 1;
707 register unsigned char *cursor
708 = BYTE_POS_ADDR (start_byte);
9169c321
JB
709 unsigned char *base = cursor;
710
711 while (cursor < ceiling_addr)
712 {
713 unsigned char *scan_start = cursor;
714
715 /* The dumb loop. */
716 while (*cursor != target && ++cursor < ceiling_addr)
717 ;
718
719 /* If we're looking for newlines, cache the fact that
720 the region from start to cursor is free of them. */
721 if (target == '\n' && newline_cache)
722 know_region_cache (current_buffer, newline_cache,
fa8ed3e0
RS
723 start_byte + scan_start - base,
724 start_byte + cursor - base);
9169c321
JB
725
726 /* Did we find the target character? */
727 if (cursor < ceiling_addr)
728 {
729 if (--count == 0)
730 {
731 immediate_quit = 0;
fa8ed3e0 732 return BYTE_TO_CHAR (start_byte + cursor - base + 1);
9169c321
JB
733 }
734 cursor++;
735 }
736 }
737
fa8ed3e0 738 start = BYTE_TO_CHAR (start_byte + cursor - base);
9169c321 739 }
ca1d1d23
JB
740 }
741 else
9169c321
JB
742 while (start > end)
743 {
744 /* The last character to check before the next obstacle. */
fa8ed3e0
RS
745 int ceiling_byte = CHAR_TO_BYTE (end);
746 int start_byte = CHAR_TO_BYTE (start);
67ce527d 747 int tem;
9169c321
JB
748
749 /* Consult the newline cache, if appropriate. */
750 if (target == '\n' && newline_cache)
751 {
752 int next_change;
753 immediate_quit = 0;
754 while (region_cache_backward
fa8ed3e0
RS
755 (current_buffer, newline_cache, start_byte, &next_change))
756 start_byte = next_change;
cbe0db0d 757 immediate_quit = allow_quit;
9169c321
JB
758
759 /* Start should never be at or before end. */
fa8ed3e0
RS
760 if (start_byte <= ceiling_byte)
761 start_byte = ceiling_byte + 1;
9169c321
JB
762
763 /* Now the text before start is an unknown region, and
764 next_change is the position of the next known region. */
fa8ed3e0 765 ceiling_byte = max (next_change, ceiling_byte);
9169c321
JB
766 }
767
768 /* Stop scanning before the gap. */
67ce527d
KH
769 tem = BUFFER_FLOOR_OF (start_byte - 1);
770 ceiling_byte = max (tem, ceiling_byte);
9169c321
JB
771
772 {
773 /* The termination address of the dumb loop. */
fa8ed3e0
RS
774 register unsigned char *ceiling_addr = BYTE_POS_ADDR (ceiling_byte);
775 register unsigned char *cursor = BYTE_POS_ADDR (start_byte - 1);
9169c321
JB
776 unsigned char *base = cursor;
777
778 while (cursor >= ceiling_addr)
779 {
780 unsigned char *scan_start = cursor;
781
782 while (*cursor != target && --cursor >= ceiling_addr)
783 ;
784
785 /* If we're looking for newlines, cache the fact that
786 the region from after the cursor to start is free of them. */
787 if (target == '\n' && newline_cache)
788 know_region_cache (current_buffer, newline_cache,
fa8ed3e0
RS
789 start_byte + cursor - base,
790 start_byte + scan_start - base);
9169c321
JB
791
792 /* Did we find the target character? */
793 if (cursor >= ceiling_addr)
794 {
795 if (++count >= 0)
796 {
797 immediate_quit = 0;
fa8ed3e0 798 return BYTE_TO_CHAR (start_byte + cursor - base);
9169c321
JB
799 }
800 cursor--;
801 }
802 }
803
fa8ed3e0 804 start = BYTE_TO_CHAR (start_byte + cursor - base);
9169c321
JB
805 }
806 }
807
ca1d1d23
JB
808 immediate_quit = 0;
809 if (shortage != 0)
ffd56f97 810 *shortage = count * direction;
9169c321 811 return start;
ca1d1d23 812}
fa8ed3e0
RS
813\f
814/* Search for COUNT instances of a line boundary, which means either a
815 newline or (if selective display enabled) a carriage return.
816 Start at START. If COUNT is negative, search backwards.
817
818 We report the resulting position by calling TEMP_SET_PT_BOTH.
819
820 If we find COUNT instances. we position after (always after,
821 even if scanning backwards) the COUNTth match, and return 0.
822
823 If we don't find COUNT instances before reaching the end of the
824 buffer (or the beginning, if scanning backwards), we return
825 the number of line boundaries left unfound, and position at
826 the limit we bumped up against.
827
828 If ALLOW_QUIT is non-zero, set immediate_quit. That's good to do
d5d57b92 829 except in special cases. */
ca1d1d23 830
63fa018d 831int
fa8ed3e0
RS
832scan_newline (start, start_byte, limit, limit_byte, count, allow_quit)
833 int start, start_byte;
834 int limit, limit_byte;
835 register int count;
836 int allow_quit;
63fa018d 837{
fa8ed3e0
RS
838 int direction = ((count > 0) ? 1 : -1);
839
840 register unsigned char *cursor;
841 unsigned char *base;
842
843 register int ceiling;
844 register unsigned char *ceiling_addr;
845
d5d57b92
RS
846 int old_immediate_quit = immediate_quit;
847
fa8ed3e0
RS
848 /* The code that follows is like scan_buffer
849 but checks for either newline or carriage return. */
850
d5d57b92
RS
851 if (allow_quit)
852 immediate_quit++;
fa8ed3e0
RS
853
854 start_byte = CHAR_TO_BYTE (start);
855
856 if (count > 0)
857 {
858 while (start_byte < limit_byte)
859 {
860 ceiling = BUFFER_CEILING_OF (start_byte);
861 ceiling = min (limit_byte - 1, ceiling);
862 ceiling_addr = BYTE_POS_ADDR (ceiling) + 1;
863 base = (cursor = BYTE_POS_ADDR (start_byte));
864 while (1)
865 {
866 while (*cursor != '\n' && ++cursor != ceiling_addr)
867 ;
868
869 if (cursor != ceiling_addr)
870 {
871 if (--count == 0)
872 {
d5d57b92 873 immediate_quit = old_immediate_quit;
fa8ed3e0
RS
874 start_byte = start_byte + cursor - base + 1;
875 start = BYTE_TO_CHAR (start_byte);
876 TEMP_SET_PT_BOTH (start, start_byte);
877 return 0;
878 }
879 else
880 if (++cursor == ceiling_addr)
881 break;
882 }
883 else
884 break;
885 }
886 start_byte += cursor - base;
887 }
888 }
889 else
890 {
fa8ed3e0
RS
891 while (start_byte > limit_byte)
892 {
893 ceiling = BUFFER_FLOOR_OF (start_byte - 1);
894 ceiling = max (limit_byte, ceiling);
895 ceiling_addr = BYTE_POS_ADDR (ceiling) - 1;
896 base = (cursor = BYTE_POS_ADDR (start_byte - 1) + 1);
897 while (1)
898 {
899 while (--cursor != ceiling_addr && *cursor != '\n')
900 ;
901
902 if (cursor != ceiling_addr)
903 {
904 if (++count == 0)
905 {
d5d57b92 906 immediate_quit = old_immediate_quit;
fa8ed3e0
RS
907 /* Return the position AFTER the match we found. */
908 start_byte = start_byte + cursor - base + 1;
909 start = BYTE_TO_CHAR (start_byte);
910 TEMP_SET_PT_BOTH (start, start_byte);
911 return 0;
912 }
913 }
914 else
915 break;
916 }
917 /* Here we add 1 to compensate for the last decrement
918 of CURSOR, which took it past the valid range. */
919 start_byte += cursor - base + 1;
920 }
921 }
922
923 TEMP_SET_PT_BOTH (limit, limit_byte);
d5d57b92 924 immediate_quit = old_immediate_quit;
fa8ed3e0
RS
925
926 return count * direction;
63fa018d
RS
927}
928
ca1d1d23 929int
fa8ed3e0 930find_next_newline_no_quit (from, cnt)
ca1d1d23
JB
931 register int from, cnt;
932{
fa8ed3e0 933 return scan_buffer ('\n', from, 0, cnt, (int *) 0, 0);
9169c321
JB
934}
935
9169c321
JB
936/* Like find_next_newline, but returns position before the newline,
937 not after, and only search up to TO. This isn't just
938 find_next_newline (...)-1, because you might hit TO. */
fa8ed3e0 939
9169c321
JB
940int
941find_before_next_newline (from, to, cnt)
cbe0db0d 942 int from, to, cnt;
9169c321
JB
943{
944 int shortage;
945 int pos = scan_buffer ('\n', from, to, cnt, &shortage, 1);
946
947 if (shortage == 0)
948 pos--;
177c0ea7 949
9169c321 950 return pos;
ca1d1d23
JB
951}
952\f
ca1d1d23
JB
953/* Subroutines of Lisp buffer search functions. */
954
955static Lisp_Object
b819a390 956search_command (string, bound, noerror, count, direction, RE, posix)
ca1d1d23
JB
957 Lisp_Object string, bound, noerror, count;
958 int direction;
959 int RE;
b819a390 960 int posix;
ca1d1d23
JB
961{
962 register int np;
9f43ad85 963 int lim, lim_byte;
ca1d1d23
JB
964 int n = direction;
965
966 if (!NILP (count))
967 {
b7826503 968 CHECK_NUMBER (count);
ca1d1d23
JB
969 n *= XINT (count);
970 }
971
b7826503 972 CHECK_STRING (string);
ca1d1d23 973 if (NILP (bound))
9f43ad85
RS
974 {
975 if (n > 0)
976 lim = ZV, lim_byte = ZV_BYTE;
977 else
978 lim = BEGV, lim_byte = BEGV_BYTE;
979 }
ca1d1d23
JB
980 else
981 {
b7826503 982 CHECK_NUMBER_COERCE_MARKER (bound);
ca1d1d23 983 lim = XINT (bound);
6ec8bbd2 984 if (n > 0 ? lim < PT : lim > PT)
ca1d1d23
JB
985 error ("Invalid search bound (wrong side of point)");
986 if (lim > ZV)
9f43ad85 987 lim = ZV, lim_byte = ZV_BYTE;
588d2fd5 988 else if (lim < BEGV)
9f43ad85 989 lim = BEGV, lim_byte = BEGV_BYTE;
588d2fd5
KH
990 else
991 lim_byte = CHAR_TO_BYTE (lim);
ca1d1d23
JB
992 }
993
910c747a
RS
994 /* This is so set_image_of_range_1 in regex.c can find the EQV table. */
995 XCHAR_TABLE (current_buffer->case_canon_table)->extras[2]
996 = current_buffer->case_eqv_table;
997
9f43ad85 998 np = search_buffer (string, PT, PT_BYTE, lim, lim_byte, n, RE,
ca1d1d23 999 (!NILP (current_buffer->case_fold_search)
facdc750 1000 ? current_buffer->case_canon_table
3135e9fd 1001 : Qnil),
ca1d1d23 1002 (!NILP (current_buffer->case_fold_search)
facdc750 1003 ? current_buffer->case_eqv_table
3135e9fd 1004 : Qnil),
b819a390 1005 posix);
ca1d1d23
JB
1006 if (np <= 0)
1007 {
1008 if (NILP (noerror))
06f77a8a
KS
1009 xsignal1 (Qsearch_failed, string);
1010
ca1d1d23
JB
1011 if (!EQ (noerror, Qt))
1012 {
1013 if (lim < BEGV || lim > ZV)
1014 abort ();
9f43ad85 1015 SET_PT_BOTH (lim, lim_byte);
a5f217b8
RS
1016 return Qnil;
1017#if 0 /* This would be clean, but maybe programs depend on
1018 a value of nil here. */
481399bf 1019 np = lim;
a5f217b8 1020#endif
ca1d1d23 1021 }
481399bf
RS
1022 else
1023 return Qnil;
ca1d1d23
JB
1024 }
1025
1026 if (np < BEGV || np > ZV)
1027 abort ();
1028
1029 SET_PT (np);
1030
1031 return make_number (np);
1032}
1033\f
fa8ed3e0
RS
1034/* Return 1 if REGEXP it matches just one constant string. */
1035
b6d6a51c
KH
1036static int
1037trivial_regexp_p (regexp)
1038 Lisp_Object regexp;
1039{
d5db4077
KR
1040 int len = SBYTES (regexp);
1041 unsigned char *s = SDATA (regexp);
b6d6a51c
KH
1042 while (--len >= 0)
1043 {
1044 switch (*s++)
1045 {
1046 case '.': case '*': case '+': case '?': case '[': case '^': case '$':
1047 return 0;
1048 case '\\':
1049 if (--len < 0)
1050 return 0;
1051 switch (*s++)
1052 {
1053 case '|': case '(': case ')': case '`': case '\'': case 'b':
1054 case 'B': case '<': case '>': case 'w': case 'W': case 's':
29f89fe7 1055 case 'S': case '=': case '{': case '}': case '_':
5679531d 1056 case 'c': case 'C': /* for categoryspec and notcategoryspec */
866f60fd 1057 case '1': case '2': case '3': case '4': case '5':
b6d6a51c
KH
1058 case '6': case '7': case '8': case '9':
1059 return 0;
1060 }
1061 }
1062 }
1063 return 1;
1064}
1065
ca325161 1066/* Search for the n'th occurrence of STRING in the current buffer,
ca1d1d23 1067 starting at position POS and stopping at position LIM,
b819a390 1068 treating STRING as a literal string if RE is false or as
ca1d1d23
JB
1069 a regular expression if RE is true.
1070
1071 If N is positive, searching is forward and LIM must be greater than POS.
1072 If N is negative, searching is backward and LIM must be less than POS.
1073
facdc750 1074 Returns -x if x occurrences remain to be found (x > 0),
ca1d1d23 1075 or else the position at the beginning of the Nth occurrence
b819a390
RS
1076 (if searching backward) or the end (if searching forward).
1077
1078 POSIX is nonzero if we want full backtracking (POSIX style)
1079 for this pattern. 0 means backtrack only enough to get a valid match. */
ca1d1d23 1080
aff2ce94
RS
1081#define TRANSLATE(out, trt, d) \
1082do \
1083 { \
1084 if (! NILP (trt)) \
1085 { \
1086 Lisp_Object temp; \
1087 temp = Faref (trt, make_number (d)); \
1088 if (INTEGERP (temp)) \
1089 out = XINT (temp); \
1090 else \
1091 out = d; \
1092 } \
1093 else \
1094 out = d; \
1095 } \
1096while (0)
facdc750 1097
ef887810
RS
1098/* Only used in search_buffer, to record the end position of the match
1099 when searching regexps and SEARCH_REGS should not be changed
1100 (i.e. Vinhibit_changing_match_data is non-nil). */
1101static struct re_registers search_regs_1;
1102
b819a390 1103static int
9f43ad85
RS
1104search_buffer (string, pos, pos_byte, lim, lim_byte, n,
1105 RE, trt, inverse_trt, posix)
ca1d1d23
JB
1106 Lisp_Object string;
1107 int pos;
9f43ad85 1108 int pos_byte;
ca1d1d23 1109 int lim;
9f43ad85 1110 int lim_byte;
ca1d1d23
JB
1111 int n;
1112 int RE;
facdc750
RS
1113 Lisp_Object trt;
1114 Lisp_Object inverse_trt;
b819a390 1115 int posix;
ca1d1d23 1116{
d5db4077
KR
1117 int len = SCHARS (string);
1118 int len_byte = SBYTES (string);
facdc750 1119 register int i;
ca1d1d23 1120
7074fde6
FP
1121 if (running_asynch_code)
1122 save_search_regs ();
1123
a7e4cdde 1124 /* Searching 0 times means don't move. */
ca1d1d23 1125 /* Null string is found at starting position. */
a7e4cdde 1126 if (len == 0 || n == 0)
ca325161 1127 {
0353b28f 1128 set_search_regs (pos_byte, 0);
ca325161
RS
1129 return pos;
1130 }
3f57a499 1131
41a33295 1132 if (RE && !(trivial_regexp_p (string) && NILP (Vsearch_spaces_regexp)))
ca1d1d23 1133 {
facdc750
RS
1134 unsigned char *p1, *p2;
1135 int s1, s2;
487282dc
KH
1136 struct re_pattern_buffer *bufp;
1137
ef887810
RS
1138 bufp = compile_pattern (string,
1139 (NILP (Vinhibit_changing_match_data)
1140 ? &search_regs : &search_regs_1),
1141 trt, posix,
0c8533c6 1142 !NILP (current_buffer->enable_multibyte_characters));
ca1d1d23 1143
ca1d1d23
JB
1144 immediate_quit = 1; /* Quit immediately if user types ^G,
1145 because letting this function finish
1146 can take too long. */
1147 QUIT; /* Do a pending quit right away,
1148 to avoid paradoxical behavior */
1149 /* Get pointers and sizes of the two strings
1150 that make up the visible portion of the buffer. */
1151
1152 p1 = BEGV_ADDR;
fa8ed3e0 1153 s1 = GPT_BYTE - BEGV_BYTE;
ca1d1d23 1154 p2 = GAP_END_ADDR;
fa8ed3e0 1155 s2 = ZV_BYTE - GPT_BYTE;
ca1d1d23
JB
1156 if (s1 < 0)
1157 {
1158 p2 = p1;
fa8ed3e0 1159 s2 = ZV_BYTE - BEGV_BYTE;
ca1d1d23
JB
1160 s1 = 0;
1161 }
1162 if (s2 < 0)
1163 {
fa8ed3e0 1164 s1 = ZV_BYTE - BEGV_BYTE;
ca1d1d23
JB
1165 s2 = 0;
1166 }
8bb43c28 1167 re_match_object = Qnil;
177c0ea7 1168
ca1d1d23
JB
1169 while (n < 0)
1170 {
42db823b 1171 int val;
487282dc 1172 val = re_search_2 (bufp, (char *) p1, s1, (char *) p2, s2,
4996330b 1173 pos_byte - BEGV_BYTE, lim_byte - pos_byte,
ef887810
RS
1174 (NILP (Vinhibit_changing_match_data)
1175 ? &search_regs : &search_regs_1),
42db823b 1176 /* Don't allow match past current point */
4996330b 1177 pos_byte - BEGV_BYTE);
ca1d1d23 1178 if (val == -2)
b6d6a51c
KH
1179 {
1180 matcher_overflow ();
1181 }
ca1d1d23
JB
1182 if (val >= 0)
1183 {
ef887810
RS
1184 if (NILP (Vinhibit_changing_match_data))
1185 {
1186 pos_byte = search_regs.start[0] + BEGV_BYTE;
1187 for (i = 0; i < search_regs.num_regs; i++)
1188 if (search_regs.start[i] >= 0)
1189 {
1190 search_regs.start[i]
1191 = BYTE_TO_CHAR (search_regs.start[i] + BEGV_BYTE);
1192 search_regs.end[i]
1193 = BYTE_TO_CHAR (search_regs.end[i] + BEGV_BYTE);
1194 }
1195 XSETBUFFER (last_thing_searched, current_buffer);
1196 /* Set pos to the new position. */
1197 pos = search_regs.start[0];
1198 }
1199 else
1200 {
1201 pos_byte = search_regs_1.start[0] + BEGV_BYTE;
1202 /* Set pos to the new position. */
1203 pos = BYTE_TO_CHAR (search_regs_1.start[0] + BEGV_BYTE);
1204 }
ca1d1d23
JB
1205 }
1206 else
1207 {
1208 immediate_quit = 0;
1209 return (n);
1210 }
1211 n++;
1212 }
1213 while (n > 0)
1214 {
42db823b 1215 int val;
487282dc 1216 val = re_search_2 (bufp, (char *) p1, s1, (char *) p2, s2,
4996330b 1217 pos_byte - BEGV_BYTE, lim_byte - pos_byte,
ef887810
RS
1218 (NILP (Vinhibit_changing_match_data)
1219 ? &search_regs : &search_regs_1),
4996330b 1220 lim_byte - BEGV_BYTE);
ca1d1d23 1221 if (val == -2)
b6d6a51c
KH
1222 {
1223 matcher_overflow ();
1224 }
ca1d1d23
JB
1225 if (val >= 0)
1226 {
ef887810
RS
1227 if (NILP (Vinhibit_changing_match_data))
1228 {
1229 pos_byte = search_regs.end[0] + BEGV_BYTE;
1230 for (i = 0; i < search_regs.num_regs; i++)
1231 if (search_regs.start[i] >= 0)
1232 {
1233 search_regs.start[i]
1234 = BYTE_TO_CHAR (search_regs.start[i] + BEGV_BYTE);
1235 search_regs.end[i]
1236 = BYTE_TO_CHAR (search_regs.end[i] + BEGV_BYTE);
1237 }
1238 XSETBUFFER (last_thing_searched, current_buffer);
1239 pos = search_regs.end[0];
1240 }
1241 else
1242 {
1243 pos_byte = search_regs_1.end[0] + BEGV_BYTE;
1244 pos = BYTE_TO_CHAR (search_regs_1.end[0] + BEGV_BYTE);
1245 }
ca1d1d23
JB
1246 }
1247 else
1248 {
1249 immediate_quit = 0;
1250 return (0 - n);
1251 }
1252 n--;
1253 }
1254 immediate_quit = 0;
1255 return (pos);
1256 }
1257 else /* non-RE case */
1258 {
facdc750
RS
1259 unsigned char *raw_pattern, *pat;
1260 int raw_pattern_size;
1261 int raw_pattern_size_byte;
1262 unsigned char *patbuf;
1263 int multibyte = !NILP (current_buffer->enable_multibyte_characters);
a967ed62 1264 unsigned char *base_pat;
fed91c38
KH
1265 /* Set to positive if we find a non-ASCII char that need
1266 translation. Otherwise set to zero later. */
1267 int charset_base = -1;
040272ce 1268 int boyer_moore_ok = 1;
facdc750
RS
1269
1270 /* MULTIBYTE says whether the text to be searched is multibyte.
1271 We must convert PATTERN to match that, or we will not really
1272 find things right. */
1273
1274 if (multibyte == STRING_MULTIBYTE (string))
1275 {
d5db4077
KR
1276 raw_pattern = (unsigned char *) SDATA (string);
1277 raw_pattern_size = SCHARS (string);
1278 raw_pattern_size_byte = SBYTES (string);
facdc750
RS
1279 }
1280 else if (multibyte)
1281 {
d5db4077 1282 raw_pattern_size = SCHARS (string);
facdc750 1283 raw_pattern_size_byte
d5db4077 1284 = count_size_as_multibyte (SDATA (string),
facdc750 1285 raw_pattern_size);
7276d3d8 1286 raw_pattern = (unsigned char *) alloca (raw_pattern_size_byte + 1);
d5db4077
KR
1287 copy_text (SDATA (string), raw_pattern,
1288 SCHARS (string), 0, 1);
facdc750
RS
1289 }
1290 else
1291 {
1292 /* Converting multibyte to single-byte.
1293
1294 ??? Perhaps this conversion should be done in a special way
1295 by subtracting nonascii-insert-offset from each non-ASCII char,
1296 so that only the multibyte chars which really correspond to
1297 the chosen single-byte character set can possibly match. */
d5db4077
KR
1298 raw_pattern_size = SCHARS (string);
1299 raw_pattern_size_byte = SCHARS (string);
7276d3d8 1300 raw_pattern = (unsigned char *) alloca (raw_pattern_size + 1);
d5db4077
KR
1301 copy_text (SDATA (string), raw_pattern,
1302 SBYTES (string), 1, 0);
facdc750
RS
1303 }
1304
1305 /* Copy and optionally translate the pattern. */
1306 len = raw_pattern_size;
1307 len_byte = raw_pattern_size_byte;
1308 patbuf = (unsigned char *) alloca (len_byte);
1309 pat = patbuf;
1310 base_pat = raw_pattern;
1311 if (multibyte)
1312 {
0190922f
KH
1313 /* Fill patbuf by translated characters in STRING while
1314 checking if we can use boyer-moore search. If TRT is
1315 non-nil, we can use boyer-moore search only if TRT can be
1316 represented by the byte array of 256 elements. For that,
1317 all non-ASCII case-equivalents of all case-senstive
1318 characters in STRING must belong to the same charset and
1319 row. */
1320
facdc750
RS
1321 while (--len >= 0)
1322 {
0190922f 1323 unsigned char str_base[MAX_MULTIBYTE_LENGTH], *str;
aff2ce94 1324 int c, translated, inverse;
facdc750
RS
1325 int in_charlen, charlen;
1326
1327 /* If we got here and the RE flag is set, it's because we're
1328 dealing with a regexp known to be trivial, so the backslash
1329 just quotes the next character. */
1330 if (RE && *base_pat == '\\')
1331 {
1332 len--;
0190922f 1333 raw_pattern_size--;
facdc750
RS
1334 len_byte--;
1335 base_pat++;
1336 }
1337
1338 c = STRING_CHAR_AND_LENGTH (base_pat, len_byte, in_charlen);
040272ce 1339
0190922f 1340 if (NILP (trt))
facdc750 1341 {
0190922f
KH
1342 str = base_pat;
1343 charlen = in_charlen;
facdc750 1344 }
0190922f 1345 else
facdc750 1346 {
0190922f
KH
1347 /* Translate the character. */
1348 TRANSLATE (translated, trt, c);
1349 charlen = CHAR_STRING (translated, str_base);
1350 str = str_base;
1351
1352 /* Check if C has any other case-equivalents. */
1353 TRANSLATE (inverse, inverse_trt, c);
1354 /* If so, check if we can use boyer-moore. */
1355 if (c != inverse && boyer_moore_ok)
1356 {
1357 /* Check if all equivalents belong to the same
1358 charset & row. Note that the check of C
1359 itself is done by the last iteration. Note
1360 also that we don't have to check ASCII
1361 characters because boyer-moore search can
1362 always handle their translation. */
1363 while (1)
1364 {
fed91c38 1365 if (ASCII_BYTE_P (inverse))
0190922f 1366 {
fed91c38 1367 if (charset_base > 0)
0190922f
KH
1368 {
1369 boyer_moore_ok = 0;
1370 break;
1371 }
fed91c38
KH
1372 charset_base = 0;
1373 }
1374 else if (SINGLE_BYTE_CHAR_P (inverse))
1375 {
1376 /* Boyer-moore search can't handle a
1377 translation of an eight-bit
1378 character. */
1379 boyer_moore_ok = 0;
1380 break;
1381 }
1382 else if (charset_base < 0)
1383 charset_base = inverse & ~CHAR_FIELD3_MASK;
1384 else if ((inverse & ~CHAR_FIELD3_MASK)
1385 != charset_base)
1386 {
1387 boyer_moore_ok = 0;
1388 break;
0190922f
KH
1389 }
1390 if (c == inverse)
1391 break;
1392 TRANSLATE (inverse, inverse_trt, inverse);
1393 }
1394 }
aff2ce94 1395 }
fed91c38
KH
1396 if (charset_base < 0)
1397 charset_base = 0;
facdc750
RS
1398
1399 /* Store this character into the translated pattern. */
1400 bcopy (str, pat, charlen);
1401 pat += charlen;
1402 base_pat += in_charlen;
1403 len_byte -= in_charlen;
1404 }
1405 }
1406 else
1407 {
040272ce
KH
1408 /* Unibyte buffer. */
1409 charset_base = 0;
facdc750
RS
1410 while (--len >= 0)
1411 {
040272ce 1412 int c, translated;
facdc750
RS
1413
1414 /* If we got here and the RE flag is set, it's because we're
1415 dealing with a regexp known to be trivial, so the backslash
1416 just quotes the next character. */
1417 if (RE && *base_pat == '\\')
1418 {
1419 len--;
0190922f 1420 raw_pattern_size--;
facdc750
RS
1421 base_pat++;
1422 }
1423 c = *base_pat++;
aff2ce94 1424 TRANSLATE (translated, trt, c);
facdc750
RS
1425 *pat++ = translated;
1426 }
1427 }
1428
1429 len_byte = pat - patbuf;
1430 len = raw_pattern_size;
1431 pat = base_pat = patbuf;
1432
040272ce 1433 if (boyer_moore_ok)
facdc750 1434 return boyer_moore (n, pat, len, len_byte, trt, inverse_trt,
aff2ce94
RS
1435 pos, pos_byte, lim, lim_byte,
1436 charset_base);
facdc750
RS
1437 else
1438 return simple_search (n, pat, len, len_byte, trt,
1439 pos, pos_byte, lim, lim_byte);
1440 }
1441}
1442\f
1443/* Do a simple string search N times for the string PAT,
1444 whose length is LEN/LEN_BYTE,
1445 from buffer position POS/POS_BYTE until LIM/LIM_BYTE.
1446 TRT is the translation table.
f8bd51c4 1447
facdc750
RS
1448 Return the character position where the match is found.
1449 Otherwise, if M matches remained to be found, return -M.
f8bd51c4 1450
facdc750
RS
1451 This kind of search works regardless of what is in PAT and
1452 regardless of what is in TRT. It is used in cases where
1453 boyer_moore cannot work. */
1454
1455static int
1456simple_search (n, pat, len, len_byte, trt, pos, pos_byte, lim, lim_byte)
1457 int n;
1458 unsigned char *pat;
1459 int len, len_byte;
1460 Lisp_Object trt;
1461 int pos, pos_byte;
1462 int lim, lim_byte;
1463{
1464 int multibyte = ! NILP (current_buffer->enable_multibyte_characters);
ab228c24 1465 int forward = n > 0;
facdc750
RS
1466
1467 if (lim > pos && multibyte)
1468 while (n > 0)
1469 {
1470 while (1)
f8bd51c4 1471 {
facdc750
RS
1472 /* Try matching at position POS. */
1473 int this_pos = pos;
1474 int this_pos_byte = pos_byte;
1475 int this_len = len;
1476 int this_len_byte = len_byte;
1477 unsigned char *p = pat;
1478 if (pos + len > lim)
1479 goto stop;
1480
1481 while (this_len > 0)
1482 {
1483 int charlen, buf_charlen;
ab228c24 1484 int pat_ch, buf_ch;
facdc750 1485
ab228c24 1486 pat_ch = STRING_CHAR_AND_LENGTH (p, this_len_byte, charlen);
facdc750
RS
1487 buf_ch = STRING_CHAR_AND_LENGTH (BYTE_POS_ADDR (this_pos_byte),
1488 ZV_BYTE - this_pos_byte,
1489 buf_charlen);
aff2ce94 1490 TRANSLATE (buf_ch, trt, buf_ch);
facdc750
RS
1491
1492 if (buf_ch != pat_ch)
1493 break;
ab228c24
RS
1494
1495 this_len_byte -= charlen;
1496 this_len--;
1497 p += charlen;
1498
1499 this_pos_byte += buf_charlen;
1500 this_pos++;
facdc750
RS
1501 }
1502
1503 if (this_len == 0)
1504 {
1505 pos += len;
1506 pos_byte += len_byte;
1507 break;
1508 }
1509
1510 INC_BOTH (pos, pos_byte);
f8bd51c4 1511 }
facdc750
RS
1512
1513 n--;
1514 }
1515 else if (lim > pos)
1516 while (n > 0)
1517 {
1518 while (1)
f8bd51c4 1519 {
facdc750
RS
1520 /* Try matching at position POS. */
1521 int this_pos = pos;
1522 int this_len = len;
1523 unsigned char *p = pat;
1524
1525 if (pos + len > lim)
1526 goto stop;
1527
1528 while (this_len > 0)
1529 {
1530 int pat_ch = *p++;
1531 int buf_ch = FETCH_BYTE (this_pos);
aff2ce94 1532 TRANSLATE (buf_ch, trt, buf_ch);
facdc750
RS
1533
1534 if (buf_ch != pat_ch)
1535 break;
ab228c24
RS
1536
1537 this_len--;
1538 this_pos++;
facdc750
RS
1539 }
1540
1541 if (this_len == 0)
1542 {
1543 pos += len;
1544 break;
1545 }
1546
1547 pos++;
f8bd51c4 1548 }
facdc750
RS
1549
1550 n--;
1551 }
1552 /* Backwards search. */
1553 else if (lim < pos && multibyte)
1554 while (n < 0)
1555 {
1556 while (1)
f8bd51c4 1557 {
facdc750
RS
1558 /* Try matching at position POS. */
1559 int this_pos = pos - len;
1560 int this_pos_byte = pos_byte - len_byte;
1561 int this_len = len;
1562 int this_len_byte = len_byte;
1563 unsigned char *p = pat;
1564
347d9bbe 1565 if (this_pos < lim || this_pos_byte < lim_byte)
facdc750
RS
1566 goto stop;
1567
1568 while (this_len > 0)
1569 {
1570 int charlen, buf_charlen;
ab228c24 1571 int pat_ch, buf_ch;
facdc750 1572
ab228c24 1573 pat_ch = STRING_CHAR_AND_LENGTH (p, this_len_byte, charlen);
facdc750
RS
1574 buf_ch = STRING_CHAR_AND_LENGTH (BYTE_POS_ADDR (this_pos_byte),
1575 ZV_BYTE - this_pos_byte,
1576 buf_charlen);
aff2ce94 1577 TRANSLATE (buf_ch, trt, buf_ch);
facdc750
RS
1578
1579 if (buf_ch != pat_ch)
1580 break;
ab228c24
RS
1581
1582 this_len_byte -= charlen;
1583 this_len--;
1584 p += charlen;
1585 this_pos_byte += buf_charlen;
1586 this_pos++;
facdc750
RS
1587 }
1588
1589 if (this_len == 0)
1590 {
1591 pos -= len;
1592 pos_byte -= len_byte;
1593 break;
1594 }
1595
1596 DEC_BOTH (pos, pos_byte);
f8bd51c4
KH
1597 }
1598
facdc750
RS
1599 n++;
1600 }
1601 else if (lim < pos)
1602 while (n < 0)
1603 {
1604 while (1)
b6d6a51c 1605 {
facdc750
RS
1606 /* Try matching at position POS. */
1607 int this_pos = pos - len;
1608 int this_len = len;
1609 unsigned char *p = pat;
1610
1611 if (pos - len < lim)
1612 goto stop;
1613
1614 while (this_len > 0)
1615 {
1616 int pat_ch = *p++;
1617 int buf_ch = FETCH_BYTE (this_pos);
aff2ce94 1618 TRANSLATE (buf_ch, trt, buf_ch);
facdc750
RS
1619
1620 if (buf_ch != pat_ch)
1621 break;
ab228c24
RS
1622 this_len--;
1623 this_pos++;
facdc750
RS
1624 }
1625
1626 if (this_len == 0)
b6d6a51c 1627 {
facdc750
RS
1628 pos -= len;
1629 break;
b6d6a51c 1630 }
facdc750
RS
1631
1632 pos--;
b6d6a51c 1633 }
facdc750
RS
1634
1635 n++;
b6d6a51c 1636 }
facdc750
RS
1637
1638 stop:
1639 if (n == 0)
aff2ce94 1640 {
ab228c24
RS
1641 if (forward)
1642 set_search_regs ((multibyte ? pos_byte : pos) - len_byte, len_byte);
1643 else
1644 set_search_regs (multibyte ? pos_byte : pos, len_byte);
aff2ce94
RS
1645
1646 return pos;
1647 }
facdc750
RS
1648 else if (n > 0)
1649 return -n;
1650 else
1651 return n;
1652}
1653\f
0190922f 1654/* Do Boyer-Moore search N times for the string BASE_PAT,
facdc750
RS
1655 whose length is LEN/LEN_BYTE,
1656 from buffer position POS/POS_BYTE until LIM/LIM_BYTE.
1657 DIRECTION says which direction we search in.
1658 TRT and INVERSE_TRT are translation tables.
0190922f 1659 Characters in PAT are already translated by TRT.
facdc750 1660
0190922f
KH
1661 This kind of search works if all the characters in BASE_PAT that
1662 have nontrivial translation are the same aside from the last byte.
1663 This makes it possible to translate just the last byte of a
1664 character, and do so after just a simple test of the context.
e0f24100 1665 CHARSET_BASE is nonzero if there is such a non-ASCII character.
facdc750
RS
1666
1667 If that criterion is not satisfied, do not call this function. */
1668
1669static int
1670boyer_moore (n, base_pat, len, len_byte, trt, inverse_trt,
aff2ce94 1671 pos, pos_byte, lim, lim_byte, charset_base)
facdc750
RS
1672 int n;
1673 unsigned char *base_pat;
1674 int len, len_byte;
1675 Lisp_Object trt;
1676 Lisp_Object inverse_trt;
1677 int pos, pos_byte;
1678 int lim, lim_byte;
aff2ce94 1679 int charset_base;
facdc750
RS
1680{
1681 int direction = ((n > 0) ? 1 : -1);
1682 register int dirlen;
a968f437 1683 int infinity, limit, stride_for_teases = 0;
facdc750
RS
1684 register int *BM_tab;
1685 int *BM_tab_base;
177c0ea7 1686 register unsigned char *cursor, *p_limit;
facdc750 1687 register int i, j;
cb6792d2 1688 unsigned char *pat, *pat_end;
facdc750
RS
1689 int multibyte = ! NILP (current_buffer->enable_multibyte_characters);
1690
1691 unsigned char simple_translate[0400];
0190922f
KH
1692 /* These are set to the preceding bytes of a byte to be translated
1693 if charset_base is nonzero. As the maximum byte length of a
1694 multibyte character is 4, we have to check at most three previous
1695 bytes. */
1696 int translate_prev_byte1 = 0;
1697 int translate_prev_byte2 = 0;
1698 int translate_prev_byte3 = 0;
facdc750 1699
facdc750 1700 BM_tab = (int *) alloca (0400 * sizeof (int));
e2a51aa8 1701
facdc750
RS
1702 /* The general approach is that we are going to maintain that we know */
1703 /* the first (closest to the present position, in whatever direction */
1704 /* we're searching) character that could possibly be the last */
1705 /* (furthest from present position) character of a valid match. We */
1706 /* advance the state of our knowledge by looking at that character */
1707 /* and seeing whether it indeed matches the last character of the */
1708 /* pattern. If it does, we take a closer look. If it does not, we */
1709 /* move our pointer (to putative last characters) as far as is */
1710 /* logically possible. This amount of movement, which I call a */
1711 /* stride, will be the length of the pattern if the actual character */
1712 /* appears nowhere in the pattern, otherwise it will be the distance */
1713 /* from the last occurrence of that character to the end of the */
1714 /* pattern. */
1715 /* As a coding trick, an enormous stride is coded into the table for */
1716 /* characters that match the last character. This allows use of only */
1717 /* a single test, a test for having gone past the end of the */
1718 /* permissible match region, to test for both possible matches (when */
1719 /* the stride goes past the end immediately) and failure to */
177c0ea7 1720 /* match (where you get nudged past the end one stride at a time). */
facdc750
RS
1721
1722 /* Here we make a "mickey mouse" BM table. The stride of the search */
1723 /* is determined only by the last character of the putative match. */
1724 /* If that character does not match, we will stride the proper */
1725 /* distance to propose a match that superimposes it on the last */
1726 /* instance of a character that matches it (per trt), or misses */
177c0ea7 1727 /* it entirely if there is none. */
facdc750
RS
1728
1729 dirlen = len_byte * direction;
1730 infinity = dirlen - (lim_byte + pos_byte + len_byte + len_byte) * direction;
cb6792d2
RS
1731
1732 /* Record position after the end of the pattern. */
1733 pat_end = base_pat + len_byte;
1734 /* BASE_PAT points to a character that we start scanning from.
1735 It is the first character in a forward search,
1736 the last character in a backward search. */
facdc750 1737 if (direction < 0)
cb6792d2
RS
1738 base_pat = pat_end - 1;
1739
facdc750
RS
1740 BM_tab_base = BM_tab;
1741 BM_tab += 0400;
1742 j = dirlen; /* to get it in a register */
1743 /* A character that does not appear in the pattern induces a */
1744 /* stride equal to the pattern length. */
1745 while (BM_tab_base != BM_tab)
1746 {
1747 *--BM_tab = j;
1748 *--BM_tab = j;
1749 *--BM_tab = j;
1750 *--BM_tab = j;
1751 }
1752
1753 /* We use this for translation, instead of TRT itself.
1754 We fill this in to handle the characters that actually
1755 occur in the pattern. Others don't matter anyway! */
1756 bzero (simple_translate, sizeof simple_translate);
1757 for (i = 0; i < 0400; i++)
1758 simple_translate[i] = i;
1759
0190922f
KH
1760 if (charset_base)
1761 {
1762 /* Setup translate_prev_byte1/2/3 from CHARSET_BASE. Only a
1763 byte following them are the target of translation. */
1764 int sample_char = charset_base | 0x20;
1765 unsigned char str[MAX_MULTIBYTE_LENGTH];
1766 int len = CHAR_STRING (sample_char, str);
1767
1768 translate_prev_byte1 = str[len - 2];
1769 if (len > 2)
1770 {
1771 translate_prev_byte2 = str[len - 3];
1772 if (len > 3)
1773 translate_prev_byte3 = str[len - 4];
1774 }
1775 }
1776
facdc750
RS
1777 i = 0;
1778 while (i != infinity)
1779 {
cb6792d2 1780 unsigned char *ptr = base_pat + i;
facdc750
RS
1781 i += direction;
1782 if (i == dirlen)
1783 i = infinity;
1784 if (! NILP (trt))
ca1d1d23 1785 {
620a5178
KH
1786 /* If the byte currently looking at is the last of a
1787 character to check case-equivalents, set CH to that
1788 character. An ASCII character and a non-ASCII character
1789 matching with CHARSET_BASE are to be checked. */
0190922f
KH
1790 int ch = -1;
1791
1792 if (ASCII_BYTE_P (*ptr) || ! multibyte)
1793 ch = *ptr;
620a5178 1794 else if (charset_base
86dc6ccb 1795 && ((pat_end - ptr) == 1 || CHAR_HEAD_P (ptr[1])))
ca1d1d23 1796 {
620a5178
KH
1797 unsigned char *charstart = ptr - 1;
1798
1799 while (! (CHAR_HEAD_P (*charstart)))
1800 charstart--;
1801 ch = STRING_CHAR (charstart, ptr - charstart + 1);
0190922f
KH
1802 if (charset_base != (ch & ~CHAR_FIELD3_MASK))
1803 ch = -1;
ca1d1d23 1804 }
facdc750 1805
6786a107 1806 if (ch >= 0400)
620a5178
KH
1807 j = ((unsigned char) ch) | 0200;
1808 else
1809 j = *ptr;
1810
facdc750
RS
1811 if (i == infinity)
1812 stride_for_teases = BM_tab[j];
ab228c24 1813
facdc750
RS
1814 BM_tab[j] = dirlen - i;
1815 /* A translation table is accompanied by its inverse -- see */
177c0ea7 1816 /* comment following downcase_table for details */
0190922f 1817 if (ch >= 0)
ab228c24
RS
1818 {
1819 int starting_ch = ch;
620a5178 1820 int starting_j = j;
0190922f 1821
ab228c24
RS
1822 while (1)
1823 {
1824 TRANSLATE (ch, inverse_trt, ch);
6786a107 1825 if (ch >= 0400)
ab228c24
RS
1826 j = ((unsigned char) ch) | 0200;
1827 else
1828 j = (unsigned char) ch;
1829
1830 /* For all the characters that map into CH,
1831 set up simple_translate to map the last byte
1832 into STARTING_J. */
1833 simple_translate[j] = starting_j;
1834 if (ch == starting_ch)
1835 break;
1836 BM_tab[j] = dirlen - i;
1837 }
1838 }
facdc750
RS
1839 }
1840 else
1841 {
1842 j = *ptr;
1843
1844 if (i == infinity)
1845 stride_for_teases = BM_tab[j];
1846 BM_tab[j] = dirlen - i;
ca1d1d23 1847 }
facdc750
RS
1848 /* stride_for_teases tells how much to stride if we get a */
1849 /* match on the far character but are subsequently */
1850 /* disappointed, by recording what the stride would have been */
1851 /* for that character if the last character had been */
1852 /* different. */
1853 }
1854 infinity = dirlen - infinity;
1855 pos_byte += dirlen - ((direction > 0) ? direction : 0);
1856 /* loop invariant - POS_BYTE points at where last char (first
1857 char if reverse) of pattern would align in a possible match. */
1858 while (n != 0)
1859 {
1860 int tail_end;
1861 unsigned char *tail_end_ptr;
1862
1863 /* It's been reported that some (broken) compiler thinks that
1864 Boolean expressions in an arithmetic context are unsigned.
1865 Using an explicit ?1:0 prevents this. */
1866 if ((lim_byte - pos_byte - ((direction > 0) ? 1 : 0)) * direction
1867 < 0)
1868 return (n * (0 - direction));
1869 /* First we do the part we can by pointers (maybe nothing) */
1870 QUIT;
1871 pat = base_pat;
1872 limit = pos_byte - dirlen + direction;
67ce527d
KH
1873 if (direction > 0)
1874 {
1875 limit = BUFFER_CEILING_OF (limit);
1876 /* LIMIT is now the last (not beyond-last!) value POS_BYTE
1877 can take on without hitting edge of buffer or the gap. */
1878 limit = min (limit, pos_byte + 20000);
1879 limit = min (limit, lim_byte - 1);
1880 }
1881 else
1882 {
1883 limit = BUFFER_FLOOR_OF (limit);
1884 /* LIMIT is now the last (not beyond-last!) value POS_BYTE
1885 can take on without hitting edge of buffer or the gap. */
1886 limit = max (limit, pos_byte - 20000);
1887 limit = max (limit, lim_byte);
1888 }
facdc750
RS
1889 tail_end = BUFFER_CEILING_OF (pos_byte) + 1;
1890 tail_end_ptr = BYTE_POS_ADDR (tail_end);
1891
1892 if ((limit - pos_byte) * direction > 20)
ca1d1d23 1893 {
facdc750
RS
1894 unsigned char *p2;
1895
1896 p_limit = BYTE_POS_ADDR (limit);
1897 p2 = (cursor = BYTE_POS_ADDR (pos_byte));
1898 /* In this loop, pos + cursor - p2 is the surrogate for pos */
1899 while (1) /* use one cursor setting as long as i can */
ca1d1d23 1900 {
facdc750 1901 if (direction > 0) /* worth duplicating */
ca1d1d23 1902 {
facdc750
RS
1903 /* Use signed comparison if appropriate
1904 to make cursor+infinity sure to be > p_limit.
1905 Assuming that the buffer lies in a range of addresses
1906 that are all "positive" (as ints) or all "negative",
1907 either kind of comparison will work as long
1908 as we don't step by infinity. So pick the kind
1909 that works when we do step by infinity. */
1910 if ((EMACS_INT) (p_limit + infinity) > (EMACS_INT) p_limit)
1911 while ((EMACS_INT) cursor <= (EMACS_INT) p_limit)
1912 cursor += BM_tab[*cursor];
ca1d1d23 1913 else
facdc750
RS
1914 while ((EMACS_UINT) cursor <= (EMACS_UINT) p_limit)
1915 cursor += BM_tab[*cursor];
1916 }
1917 else
1918 {
1919 if ((EMACS_INT) (p_limit + infinity) < (EMACS_INT) p_limit)
1920 while ((EMACS_INT) cursor >= (EMACS_INT) p_limit)
1921 cursor += BM_tab[*cursor];
1922 else
1923 while ((EMACS_UINT) cursor >= (EMACS_UINT) p_limit)
1924 cursor += BM_tab[*cursor];
1925 }
ca1d1d23 1926/* If you are here, cursor is beyond the end of the searched region. */
facdc750
RS
1927/* This can happen if you match on the far character of the pattern, */
1928/* because the "stride" of that character is infinity, a number able */
1929/* to throw you well beyond the end of the search. It can also */
1930/* happen if you fail to match within the permitted region and would */
1931/* otherwise try a character beyond that region */
1932 if ((cursor - p_limit) * direction <= len_byte)
1933 break; /* a small overrun is genuine */
1934 cursor -= infinity; /* large overrun = hit */
1935 i = dirlen - direction;
1936 if (! NILP (trt))
1937 {
1938 while ((i -= direction) + direction != 0)
ca1d1d23 1939 {
facdc750
RS
1940 int ch;
1941 cursor -= direction;
1942 /* Translate only the last byte of a character. */
1943 if (! multibyte
1944 || ((cursor == tail_end_ptr
1945 || CHAR_HEAD_P (cursor[1]))
1946 && (CHAR_HEAD_P (cursor[0])
0190922f
KH
1947 /* Check if this is the last byte of
1948 a translable character. */
1949 || (translate_prev_byte1 == cursor[-1]
1950 && (CHAR_HEAD_P (translate_prev_byte1)
1951 || (translate_prev_byte2 == cursor[-2]
1952 && (CHAR_HEAD_P (translate_prev_byte2)
1953 || (translate_prev_byte3 == cursor[-3]))))))))
facdc750
RS
1954 ch = simple_translate[*cursor];
1955 else
1956 ch = *cursor;
1957 if (pat[i] != ch)
1958 break;
ca1d1d23 1959 }
facdc750
RS
1960 }
1961 else
1962 {
1963 while ((i -= direction) + direction != 0)
ca1d1d23 1964 {
facdc750
RS
1965 cursor -= direction;
1966 if (pat[i] != *cursor)
1967 break;
ca1d1d23 1968 }
facdc750
RS
1969 }
1970 cursor += dirlen - i - direction; /* fix cursor */
1971 if (i + direction == 0)
1972 {
ef887810 1973 int position, start, end;
0c8533c6 1974
facdc750 1975 cursor -= direction;
1113d9db 1976
facdc750
RS
1977 position = pos_byte + cursor - p2 + ((direction > 0)
1978 ? 1 - len_byte : 0);
1979 set_search_regs (position, len_byte);
ca325161 1980
ef887810
RS
1981 if (NILP (Vinhibit_changing_match_data))
1982 {
1983 start = search_regs.start[0];
1984 end = search_regs.end[0];
1985 }
1986 else
1987 /* If Vinhibit_changing_match_data is non-nil,
1988 search_regs will not be changed. So let's
1989 compute start and end here. */
1990 {
1991 start = BYTE_TO_CHAR (position);
1992 end = BYTE_TO_CHAR (position + len_byte);
1993 }
1994
facdc750
RS
1995 if ((n -= direction) != 0)
1996 cursor += dirlen; /* to resume search */
ca1d1d23 1997 else
ef887810 1998 return direction > 0 ? end : start;
ca1d1d23 1999 }
facdc750
RS
2000 else
2001 cursor += stride_for_teases; /* <sigh> we lose - */
ca1d1d23 2002 }
facdc750
RS
2003 pos_byte += cursor - p2;
2004 }
2005 else
2006 /* Now we'll pick up a clump that has to be done the hard */
2007 /* way because it covers a discontinuity */
2008 {
2009 limit = ((direction > 0)
2010 ? BUFFER_CEILING_OF (pos_byte - dirlen + 1)
2011 : BUFFER_FLOOR_OF (pos_byte - dirlen - 1));
2012 limit = ((direction > 0)
2013 ? min (limit + len_byte, lim_byte - 1)
2014 : max (limit - len_byte, lim_byte));
2015 /* LIMIT is now the last value POS_BYTE can have
2016 and still be valid for a possible match. */
2017 while (1)
ca1d1d23 2018 {
facdc750
RS
2019 /* This loop can be coded for space rather than */
2020 /* speed because it will usually run only once. */
2021 /* (the reach is at most len + 21, and typically */
177c0ea7 2022 /* does not exceed len) */
facdc750
RS
2023 while ((limit - pos_byte) * direction >= 0)
2024 pos_byte += BM_tab[FETCH_BYTE (pos_byte)];
2025 /* now run the same tests to distinguish going off the */
2026 /* end, a match or a phony match. */
2027 if ((pos_byte - limit) * direction <= len_byte)
2028 break; /* ran off the end */
2029 /* Found what might be a match.
2030 Set POS_BYTE back to last (first if reverse) pos. */
2031 pos_byte -= infinity;
2032 i = dirlen - direction;
2033 while ((i -= direction) + direction != 0)
ca1d1d23 2034 {
facdc750
RS
2035 int ch;
2036 unsigned char *ptr;
2037 pos_byte -= direction;
2038 ptr = BYTE_POS_ADDR (pos_byte);
2039 /* Translate only the last byte of a character. */
2040 if (! multibyte
2041 || ((ptr == tail_end_ptr
2042 || CHAR_HEAD_P (ptr[1]))
2043 && (CHAR_HEAD_P (ptr[0])
0190922f
KH
2044 /* Check if this is the last byte of a
2045 translable character. */
2046 || (translate_prev_byte1 == ptr[-1]
2047 && (CHAR_HEAD_P (translate_prev_byte1)
2048 || (translate_prev_byte2 == ptr[-2]
2049 && (CHAR_HEAD_P (translate_prev_byte2)
2050 || translate_prev_byte3 == ptr[-3])))))))
facdc750
RS
2051 ch = simple_translate[*ptr];
2052 else
2053 ch = *ptr;
2054 if (pat[i] != ch)
2055 break;
2056 }
2057 /* Above loop has moved POS_BYTE part or all the way
2058 back to the first pos (last pos if reverse).
2059 Set it once again at the last (first if reverse) char. */
2060 pos_byte += dirlen - i- direction;
2061 if (i + direction == 0)
2062 {
ef887810 2063 int position, start, end;
facdc750 2064 pos_byte -= direction;
1113d9db 2065
facdc750 2066 position = pos_byte + ((direction > 0) ? 1 - len_byte : 0);
facdc750 2067 set_search_regs (position, len_byte);
ca325161 2068
ef887810
RS
2069 if (NILP (Vinhibit_changing_match_data))
2070 {
2071 start = search_regs.start[0];
2072 end = search_regs.end[0];
2073 }
2074 else
2075 /* If Vinhibit_changing_match_data is non-nil,
2076 search_regs will not be changed. So let's
2077 compute start and end here. */
2078 {
2079 start = BYTE_TO_CHAR (position);
2080 end = BYTE_TO_CHAR (position + len_byte);
2081 }
2082
facdc750
RS
2083 if ((n -= direction) != 0)
2084 pos_byte += dirlen; /* to resume search */
ca1d1d23 2085 else
ef887810 2086 return direction > 0 ? end : start;
ca1d1d23 2087 }
facdc750
RS
2088 else
2089 pos_byte += stride_for_teases;
2090 }
2091 }
2092 /* We have done one clump. Can we continue? */
2093 if ((lim_byte - pos_byte) * direction < 0)
2094 return ((0 - n) * direction);
ca1d1d23 2095 }
facdc750 2096 return BYTE_TO_CHAR (pos_byte);
ca1d1d23 2097}
ca325161 2098
fa8ed3e0 2099/* Record beginning BEG_BYTE and end BEG_BYTE + NBYTES
a7e4cdde
RS
2100 for the overall match just found in the current buffer.
2101 Also clear out the match data for registers 1 and up. */
ca325161
RS
2102
2103static void
fa8ed3e0
RS
2104set_search_regs (beg_byte, nbytes)
2105 int beg_byte, nbytes;
ca325161 2106{
a7e4cdde
RS
2107 int i;
2108
ef887810
RS
2109 if (!NILP (Vinhibit_changing_match_data))
2110 return;
2111
ca325161
RS
2112 /* Make sure we have registers in which to store
2113 the match position. */
2114 if (search_regs.num_regs == 0)
2115 {
2d4a771a
RS
2116 search_regs.start = (regoff_t *) xmalloc (2 * sizeof (regoff_t));
2117 search_regs.end = (regoff_t *) xmalloc (2 * sizeof (regoff_t));
487282dc 2118 search_regs.num_regs = 2;
ca325161
RS
2119 }
2120
a7e4cdde
RS
2121 /* Clear out the other registers. */
2122 for (i = 1; i < search_regs.num_regs; i++)
2123 {
2124 search_regs.start[i] = -1;
2125 search_regs.end[i] = -1;
2126 }
2127
fa8ed3e0
RS
2128 search_regs.start[0] = BYTE_TO_CHAR (beg_byte);
2129 search_regs.end[0] = BYTE_TO_CHAR (beg_byte + nbytes);
a3668d92 2130 XSETBUFFER (last_thing_searched, current_buffer);
ca325161 2131}
ca1d1d23
JB
2132\f
2133/* Given a string of words separated by word delimiters,
2134 compute a regexp that matches those exact words
2135 separated by arbitrary punctuation. */
2136
2137static Lisp_Object
2138wordify (string)
2139 Lisp_Object string;
2140{
2141 register unsigned char *p, *o;
0c8533c6 2142 register int i, i_byte, len, punct_count = 0, word_count = 0;
ca1d1d23 2143 Lisp_Object val;
0c8533c6
RS
2144 int prev_c = 0;
2145 int adjust;
ca1d1d23 2146
b7826503 2147 CHECK_STRING (string);
d5db4077
KR
2148 p = SDATA (string);
2149 len = SCHARS (string);
ca1d1d23 2150
0c8533c6
RS
2151 for (i = 0, i_byte = 0; i < len; )
2152 {
2153 int c;
177c0ea7 2154
eb99a8dd 2155 FETCH_STRING_CHAR_ADVANCE (c, string, i, i_byte);
0c8533c6
RS
2156
2157 if (SYNTAX (c) != Sword)
2158 {
2159 punct_count++;
2160 if (i > 0 && SYNTAX (prev_c) == Sword)
2161 word_count++;
2162 }
ca1d1d23 2163
0c8533c6
RS
2164 prev_c = c;
2165 }
2166
2167 if (SYNTAX (prev_c) == Sword)
2168 word_count++;
2169 if (!word_count)
c60416e0 2170 return empty_unibyte_string;
0c8533c6
RS
2171
2172 adjust = - punct_count + 5 * (word_count - 1) + 4;
8a2df937
RS
2173 if (STRING_MULTIBYTE (string))
2174 val = make_uninit_multibyte_string (len + adjust,
d5db4077 2175 SBYTES (string)
8a2df937
RS
2176 + adjust);
2177 else
2178 val = make_uninit_string (len + adjust);
ca1d1d23 2179
d5db4077 2180 o = SDATA (val);
ca1d1d23
JB
2181 *o++ = '\\';
2182 *o++ = 'b';
1e9582d4 2183 prev_c = 0;
ca1d1d23 2184
1e9582d4
RS
2185 for (i = 0, i_byte = 0; i < len; )
2186 {
2187 int c;
2188 int i_byte_orig = i_byte;
177c0ea7 2189
eb99a8dd 2190 FETCH_STRING_CHAR_ADVANCE (c, string, i, i_byte);
1e9582d4
RS
2191
2192 if (SYNTAX (c) == Sword)
2193 {
5d69fe10 2194 bcopy (SDATA (string) + i_byte_orig, o,
1e9582d4
RS
2195 i_byte - i_byte_orig);
2196 o += i_byte - i_byte_orig;
2197 }
2198 else if (i > 0 && SYNTAX (prev_c) == Sword && --word_count)
2199 {
2200 *o++ = '\\';
2201 *o++ = 'W';
2202 *o++ = '\\';
2203 *o++ = 'W';
2204 *o++ = '*';
2205 }
2206
2207 prev_c = c;
2208 }
ca1d1d23
JB
2209
2210 *o++ = '\\';
2211 *o++ = 'b';
2212
2213 return val;
2214}
2215\f
2216DEFUN ("search-backward", Fsearch_backward, Ssearch_backward, 1, 4,
8c1a1077
PJ
2217 "MSearch backward: ",
2218 doc: /* Search backward from point for STRING.
2219Set point to the beginning of the occurrence found, and return point.
2220An optional second argument bounds the search; it is a buffer position.
2221The match found must not extend before that position.
2222Optional third argument, if t, means if fail just return nil (no error).
2223 If not nil and not t, position at limit of search and return nil.
2224Optional fourth argument is repeat count--search for successive occurrences.
2225
2226Search case-sensitivity is determined by the value of the variable
2227`case-fold-search', which see.
2228
2229See also the functions `match-beginning', `match-end' and `replace-match'. */)
2230 (string, bound, noerror, count)
ca1d1d23
JB
2231 Lisp_Object string, bound, noerror, count;
2232{
b819a390 2233 return search_command (string, bound, noerror, count, -1, 0, 0);
ca1d1d23
JB
2234}
2235
6af43974 2236DEFUN ("search-forward", Fsearch_forward, Ssearch_forward, 1, 4, "MSearch: ",
8c1a1077
PJ
2237 doc: /* Search forward from point for STRING.
2238Set point to the end of the occurrence found, and return point.
2239An optional second argument bounds the search; it is a buffer position.
d6aacdcd 2240The match found must not extend after that position. A value of nil is
48740f01 2241 equivalent to (point-max).
8c1a1077
PJ
2242Optional third argument, if t, means if fail just return nil (no error).
2243 If not nil and not t, move to limit of search and return nil.
2244Optional fourth argument is repeat count--search for successive occurrences.
2245
2246Search case-sensitivity is determined by the value of the variable
2247`case-fold-search', which see.
2248
2249See also the functions `match-beginning', `match-end' and `replace-match'. */)
2250 (string, bound, noerror, count)
ca1d1d23
JB
2251 Lisp_Object string, bound, noerror, count;
2252{
b819a390 2253 return search_command (string, bound, noerror, count, 1, 0, 0);
ca1d1d23
JB
2254}
2255
2256DEFUN ("word-search-backward", Fword_search_backward, Sword_search_backward, 1, 4,
8c1a1077
PJ
2257 "sWord search backward: ",
2258 doc: /* Search backward from point for STRING, ignoring differences in punctuation.
2259Set point to the beginning of the occurrence found, and return point.
2260An optional second argument bounds the search; it is a buffer position.
2261The match found must not extend before that position.
2262Optional third argument, if t, means if fail just return nil (no error).
2263 If not nil and not t, move to limit of search and return nil.
2264Optional fourth argument is repeat count--search for successive occurrences. */)
2265 (string, bound, noerror, count)
ca1d1d23
JB
2266 Lisp_Object string, bound, noerror, count;
2267{
b819a390 2268 return search_command (wordify (string), bound, noerror, count, -1, 1, 0);
ca1d1d23
JB
2269}
2270
2271DEFUN ("word-search-forward", Fword_search_forward, Sword_search_forward, 1, 4,
8c1a1077
PJ
2272 "sWord search: ",
2273 doc: /* Search forward from point for STRING, ignoring differences in punctuation.
2274Set point to the end of the occurrence found, and return point.
2275An optional second argument bounds the search; it is a buffer position.
2276The match found must not extend after that position.
2277Optional third argument, if t, means if fail just return nil (no error).
2278 If not nil and not t, move to limit of search and return nil.
2279Optional fourth argument is repeat count--search for successive occurrences. */)
2280 (string, bound, noerror, count)
ca1d1d23
JB
2281 Lisp_Object string, bound, noerror, count;
2282{
b819a390 2283 return search_command (wordify (string), bound, noerror, count, 1, 1, 0);
ca1d1d23
JB
2284}
2285
2286DEFUN ("re-search-backward", Fre_search_backward, Sre_search_backward, 1, 4,
8c1a1077
PJ
2287 "sRE search backward: ",
2288 doc: /* Search backward from point for match for regular expression REGEXP.
2289Set point to the beginning of the match, and return point.
2290The match found is the one starting last in the buffer
2291and yet ending before the origin of the search.
2292An optional second argument bounds the search; it is a buffer position.
2293The match found must start at or after that position.
2294Optional third argument, if t, means if fail just return nil (no error).
2295 If not nil and not t, move to limit of search and return nil.
2296Optional fourth argument is repeat count--search for successive occurrences.
2297See also the functions `match-beginning', `match-end', `match-string',
2298and `replace-match'. */)
2299 (regexp, bound, noerror, count)
19c0a730 2300 Lisp_Object regexp, bound, noerror, count;
ca1d1d23 2301{
b819a390 2302 return search_command (regexp, bound, noerror, count, -1, 1, 0);
ca1d1d23
JB
2303}
2304
2305DEFUN ("re-search-forward", Fre_search_forward, Sre_search_forward, 1, 4,
8c1a1077
PJ
2306 "sRE search: ",
2307 doc: /* Search forward from point for regular expression REGEXP.
2308Set point to the end of the occurrence found, and return point.
2309An optional second argument bounds the search; it is a buffer position.
2310The match found must not extend after that position.
2311Optional third argument, if t, means if fail just return nil (no error).
2312 If not nil and not t, move to limit of search and return nil.
2313Optional fourth argument is repeat count--search for successive occurrences.
2314See also the functions `match-beginning', `match-end', `match-string',
2315and `replace-match'. */)
2316 (regexp, bound, noerror, count)
19c0a730 2317 Lisp_Object regexp, bound, noerror, count;
ca1d1d23 2318{
b819a390
RS
2319 return search_command (regexp, bound, noerror, count, 1, 1, 0);
2320}
2321
2322DEFUN ("posix-search-backward", Fposix_search_backward, Sposix_search_backward, 1, 4,
8c1a1077
PJ
2323 "sPosix search backward: ",
2324 doc: /* Search backward from point for match for regular expression REGEXP.
2325Find the longest match in accord with Posix regular expression rules.
2326Set point to the beginning of the match, and return point.
2327The match found is the one starting last in the buffer
2328and yet ending before the origin of the search.
2329An optional second argument bounds the search; it is a buffer position.
2330The match found must start at or after that position.
2331Optional third argument, if t, means if fail just return nil (no error).
2332 If not nil and not t, move to limit of search and return nil.
2333Optional fourth argument is repeat count--search for successive occurrences.
2334See also the functions `match-beginning', `match-end', `match-string',
2335and `replace-match'. */)
2336 (regexp, bound, noerror, count)
b819a390
RS
2337 Lisp_Object regexp, bound, noerror, count;
2338{
2339 return search_command (regexp, bound, noerror, count, -1, 1, 1);
2340}
2341
2342DEFUN ("posix-search-forward", Fposix_search_forward, Sposix_search_forward, 1, 4,
8c1a1077
PJ
2343 "sPosix search: ",
2344 doc: /* Search forward from point for regular expression REGEXP.
2345Find the longest match in accord with Posix regular expression rules.
2346Set point to the end of the occurrence found, and return point.
2347An optional second argument bounds the search; it is a buffer position.
2348The match found must not extend after that position.
2349Optional third argument, if t, means if fail just return nil (no error).
2350 If not nil and not t, move to limit of search and return nil.
2351Optional fourth argument is repeat count--search for successive occurrences.
2352See also the functions `match-beginning', `match-end', `match-string',
2353and `replace-match'. */)
2354 (regexp, bound, noerror, count)
b819a390
RS
2355 Lisp_Object regexp, bound, noerror, count;
2356{
2357 return search_command (regexp, bound, noerror, count, 1, 1, 1);
ca1d1d23
JB
2358}
2359\f
d7a5ad5f 2360DEFUN ("replace-match", Freplace_match, Sreplace_match, 1, 5, 0,
8c1a1077 2361 doc: /* Replace text matched by last search with NEWTEXT.
4dd0c271
RS
2362Leave point at the end of the replacement text.
2363
8c1a1077
PJ
2364If second arg FIXEDCASE is non-nil, do not alter case of replacement text.
2365Otherwise maybe capitalize the whole text, or maybe just word initials,
2366based on the replaced text.
2367If the replaced text has only capital letters
2368and has at least one multiletter word, convert NEWTEXT to all caps.
4dd0c271
RS
2369Otherwise if all words are capitalized in the replaced text,
2370capitalize each word in NEWTEXT.
8c1a1077
PJ
2371
2372If third arg LITERAL is non-nil, insert NEWTEXT literally.
2373Otherwise treat `\\' as special:
2374 `\\&' in NEWTEXT means substitute original matched text.
2375 `\\N' means substitute what matched the Nth `\\(...\\)'.
2376 If Nth parens didn't match, substitute nothing.
2377 `\\\\' means insert one `\\'.
4dd0c271
RS
2378Case conversion does not apply to these substitutions.
2379
8c1a1077 2380FIXEDCASE and LITERAL are optional arguments.
8c1a1077
PJ
2381
2382The optional fourth argument STRING can be a string to modify.
2383This is meaningful when the previous match was done against STRING,
2384using `string-match'. When used this way, `replace-match'
2385creates and returns a new string made by copying STRING and replacing
2386the part of STRING that was matched.
2387
2388The optional fifth argument SUBEXP specifies a subexpression;
2389it says to replace just that subexpression with NEWTEXT,
2390rather than replacing the entire matched text.
2391This is, in a vague sense, the inverse of using `\\N' in NEWTEXT;
2392`\\N' copies subexp N into NEWTEXT, but using N as SUBEXP puts
2393NEWTEXT in place of subexp N.
2394This is useful only after a regular expression search or match,
2395since only regular expressions have distinguished subexpressions. */)
2396 (newtext, fixedcase, literal, string, subexp)
d7a5ad5f 2397 Lisp_Object newtext, fixedcase, literal, string, subexp;
ca1d1d23
JB
2398{
2399 enum { nochange, all_caps, cap_initial } case_action;
ac3b28b1 2400 register int pos, pos_byte;
ca1d1d23 2401 int some_multiletter_word;
97832bd0 2402 int some_lowercase;
73dc8771 2403 int some_uppercase;
208767c3 2404 int some_nonuppercase_initial;
ca1d1d23 2405 register int c, prevc;
d7a5ad5f 2406 int sub;
3e18eecf 2407 int opoint, newpoint;
ca1d1d23 2408
b7826503 2409 CHECK_STRING (newtext);
ca1d1d23 2410
080c45fd 2411 if (! NILP (string))
b7826503 2412 CHECK_STRING (string);
080c45fd 2413
ca1d1d23
JB
2414 case_action = nochange; /* We tried an initialization */
2415 /* but some C compilers blew it */
4746118a
JB
2416
2417 if (search_regs.num_regs <= 0)
d72cdbfc 2418 error ("`replace-match' called before any match found");
4746118a 2419
d7a5ad5f
RS
2420 if (NILP (subexp))
2421 sub = 0;
2422 else
2423 {
b7826503 2424 CHECK_NUMBER (subexp);
d7a5ad5f
RS
2425 sub = XINT (subexp);
2426 if (sub < 0 || sub >= search_regs.num_regs)
2427 args_out_of_range (subexp, make_number (search_regs.num_regs));
2428 }
2429
080c45fd
RS
2430 if (NILP (string))
2431 {
d7a5ad5f
RS
2432 if (search_regs.start[sub] < BEGV
2433 || search_regs.start[sub] > search_regs.end[sub]
2434 || search_regs.end[sub] > ZV)
2435 args_out_of_range (make_number (search_regs.start[sub]),
2436 make_number (search_regs.end[sub]));
080c45fd
RS
2437 }
2438 else
2439 {
d7a5ad5f
RS
2440 if (search_regs.start[sub] < 0
2441 || search_regs.start[sub] > search_regs.end[sub]
d5db4077 2442 || search_regs.end[sub] > SCHARS (string))
d7a5ad5f
RS
2443 args_out_of_range (make_number (search_regs.start[sub]),
2444 make_number (search_regs.end[sub]));
080c45fd 2445 }
ca1d1d23
JB
2446
2447 if (NILP (fixedcase))
2448 {
2449 /* Decide how to casify by examining the matched text. */
ac3b28b1 2450 int last;
ca1d1d23 2451
ac3b28b1
KH
2452 pos = search_regs.start[sub];
2453 last = search_regs.end[sub];
fa8ed3e0
RS
2454
2455 if (NILP (string))
ac3b28b1 2456 pos_byte = CHAR_TO_BYTE (pos);
fa8ed3e0 2457 else
ac3b28b1 2458 pos_byte = string_char_to_byte (string, pos);
fa8ed3e0 2459
ca1d1d23
JB
2460 prevc = '\n';
2461 case_action = all_caps;
2462
2463 /* some_multiletter_word is set nonzero if any original word
2464 is more than one letter long. */
2465 some_multiletter_word = 0;
97832bd0 2466 some_lowercase = 0;
208767c3 2467 some_nonuppercase_initial = 0;
73dc8771 2468 some_uppercase = 0;
ca1d1d23 2469
ac3b28b1 2470 while (pos < last)
ca1d1d23 2471 {
080c45fd 2472 if (NILP (string))
ac3b28b1
KH
2473 {
2474 c = FETCH_CHAR (pos_byte);
2475 INC_BOTH (pos, pos_byte);
2476 }
080c45fd 2477 else
ac3b28b1 2478 FETCH_STRING_CHAR_ADVANCE (c, string, pos, pos_byte);
080c45fd 2479
ca1d1d23
JB
2480 if (LOWERCASEP (c))
2481 {
2482 /* Cannot be all caps if any original char is lower case */
2483
97832bd0 2484 some_lowercase = 1;
ca1d1d23 2485 if (SYNTAX (prevc) != Sword)
208767c3 2486 some_nonuppercase_initial = 1;
ca1d1d23
JB
2487 else
2488 some_multiletter_word = 1;
2489 }
d16c2b66 2490 else if (UPPERCASEP (c))
ca1d1d23 2491 {
73dc8771 2492 some_uppercase = 1;
97832bd0 2493 if (SYNTAX (prevc) != Sword)
c4d460ce 2494 ;
97832bd0 2495 else
ca1d1d23
JB
2496 some_multiletter_word = 1;
2497 }
208767c3
RS
2498 else
2499 {
2500 /* If the initial is a caseless word constituent,
2501 treat that like a lowercase initial. */
2502 if (SYNTAX (prevc) != Sword)
2503 some_nonuppercase_initial = 1;
2504 }
ca1d1d23
JB
2505
2506 prevc = c;
2507 }
2508
97832bd0
RS
2509 /* Convert to all caps if the old text is all caps
2510 and has at least one multiletter word. */
2511 if (! some_lowercase && some_multiletter_word)
2512 case_action = all_caps;
c4d460ce 2513 /* Capitalize each word, if the old text has all capitalized words. */
208767c3 2514 else if (!some_nonuppercase_initial && some_multiletter_word)
ca1d1d23 2515 case_action = cap_initial;
208767c3 2516 else if (!some_nonuppercase_initial && some_uppercase)
73dc8771
KH
2517 /* Should x -> yz, operating on X, give Yz or YZ?
2518 We'll assume the latter. */
2519 case_action = all_caps;
97832bd0
RS
2520 else
2521 case_action = nochange;
ca1d1d23
JB
2522 }
2523
080c45fd
RS
2524 /* Do replacement in a string. */
2525 if (!NILP (string))
2526 {
2527 Lisp_Object before, after;
2528
2529 before = Fsubstring (string, make_number (0),
d7a5ad5f
RS
2530 make_number (search_regs.start[sub]));
2531 after = Fsubstring (string, make_number (search_regs.end[sub]), Qnil);
080c45fd 2532
636a5e28
RS
2533 /* Substitute parts of the match into NEWTEXT
2534 if desired. */
080c45fd
RS
2535 if (NILP (literal))
2536 {
d131e79c
RS
2537 int lastpos = 0;
2538 int lastpos_byte = 0;
080c45fd
RS
2539 /* We build up the substituted string in ACCUM. */
2540 Lisp_Object accum;
2541 Lisp_Object middle;
d5db4077 2542 int length = SBYTES (newtext);
080c45fd
RS
2543
2544 accum = Qnil;
2545
ac3b28b1 2546 for (pos_byte = 0, pos = 0; pos_byte < length;)
080c45fd
RS
2547 {
2548 int substart = -1;
6bbd7a29 2549 int subend = 0;
1e79ec24 2550 int delbackslash = 0;
080c45fd 2551
0c8533c6
RS
2552 FETCH_STRING_CHAR_ADVANCE (c, newtext, pos, pos_byte);
2553
080c45fd
RS
2554 if (c == '\\')
2555 {
0c8533c6 2556 FETCH_STRING_CHAR_ADVANCE (c, newtext, pos, pos_byte);
177c0ea7 2557
080c45fd
RS
2558 if (c == '&')
2559 {
d7a5ad5f
RS
2560 substart = search_regs.start[sub];
2561 subend = search_regs.end[sub];
080c45fd 2562 }
5fbbc83b 2563 else if (c >= '1' && c <= '9')
080c45fd 2564 {
5fbbc83b
RS
2565 if (search_regs.start[c - '0'] >= 0
2566 && c <= search_regs.num_regs + '0')
080c45fd
RS
2567 {
2568 substart = search_regs.start[c - '0'];
2569 subend = search_regs.end[c - '0'];
2570 }
5fbbc83b
RS
2571 else
2572 {
2573 /* If that subexp did not match,
2574 replace \\N with nothing. */
2575 substart = 0;
2576 subend = 0;
2577 }
080c45fd 2578 }
1e79ec24
KH
2579 else if (c == '\\')
2580 delbackslash = 1;
636a5e28
RS
2581 else
2582 error ("Invalid use of `\\' in replacement text");
080c45fd
RS
2583 }
2584 if (substart >= 0)
2585 {
d131e79c
RS
2586 if (pos - 2 != lastpos)
2587 middle = substring_both (newtext, lastpos,
2588 lastpos_byte,
2589 pos - 2, pos_byte - 2);
080c45fd
RS
2590 else
2591 middle = Qnil;
2592 accum = concat3 (accum, middle,
0c8533c6
RS
2593 Fsubstring (string,
2594 make_number (substart),
080c45fd
RS
2595 make_number (subend)));
2596 lastpos = pos;
0c8533c6 2597 lastpos_byte = pos_byte;
080c45fd 2598 }
1e79ec24
KH
2599 else if (delbackslash)
2600 {
d131e79c
RS
2601 middle = substring_both (newtext, lastpos,
2602 lastpos_byte,
2603 pos - 1, pos_byte - 1);
0c8533c6 2604
1e79ec24
KH
2605 accum = concat2 (accum, middle);
2606 lastpos = pos;
0c8533c6 2607 lastpos_byte = pos_byte;
1e79ec24 2608 }
080c45fd
RS
2609 }
2610
d131e79c
RS
2611 if (pos != lastpos)
2612 middle = substring_both (newtext, lastpos,
2613 lastpos_byte,
0c8533c6 2614 pos, pos_byte);
080c45fd
RS
2615 else
2616 middle = Qnil;
2617
2618 newtext = concat2 (accum, middle);
2619 }
2620
636a5e28 2621 /* Do case substitution in NEWTEXT if desired. */
080c45fd
RS
2622 if (case_action == all_caps)
2623 newtext = Fupcase (newtext);
2624 else if (case_action == cap_initial)
2b2eead9 2625 newtext = Fupcase_initials (newtext);
080c45fd
RS
2626
2627 return concat3 (before, newtext, after);
2628 }
2629
09c4719e 2630 /* Record point, then move (quietly) to the start of the match. */
9160906f 2631 if (PT >= search_regs.end[sub])
b0eba991 2632 opoint = PT - ZV;
9160906f
RS
2633 else if (PT > search_regs.start[sub])
2634 opoint = search_regs.end[sub] - ZV;
b0eba991
RS
2635 else
2636 opoint = PT;
2637
886ed6ec
RS
2638 /* If we want non-literal replacement,
2639 perform substitution on the replacement string. */
2640 if (NILP (literal))
ca1d1d23 2641 {
d5db4077 2642 int length = SBYTES (newtext);
68e69fbd
RS
2643 unsigned char *substed;
2644 int substed_alloc_size, substed_len;
3bc25e52
KH
2645 int buf_multibyte = !NILP (current_buffer->enable_multibyte_characters);
2646 int str_multibyte = STRING_MULTIBYTE (newtext);
2647 Lisp_Object rev_tbl;
886ed6ec 2648 int really_changed = 0;
3bc25e52
KH
2649
2650 rev_tbl= (!buf_multibyte && CHAR_TABLE_P (Vnonascii_translation_table)
2651 ? Fchar_table_extra_slot (Vnonascii_translation_table,
2652 make_number (0))
2653 : Qnil);
ac3b28b1 2654
68e69fbd
RS
2655 substed_alloc_size = length * 2 + 100;
2656 substed = (unsigned char *) xmalloc (substed_alloc_size + 1);
2657 substed_len = 0;
2658
3bc25e52
KH
2659 /* Go thru NEWTEXT, producing the actual text to insert in
2660 SUBSTED while adjusting multibyteness to that of the current
2661 buffer. */
ca1d1d23 2662
ac3b28b1 2663 for (pos_byte = 0, pos = 0; pos_byte < length;)
ca1d1d23 2664 {
68e69fbd 2665 unsigned char str[MAX_MULTIBYTE_LENGTH];
f8ce8a0d
GM
2666 unsigned char *add_stuff = NULL;
2667 int add_len = 0;
68e69fbd 2668 int idx = -1;
9a76659d 2669
3bc25e52
KH
2670 if (str_multibyte)
2671 {
eb99a8dd 2672 FETCH_STRING_CHAR_ADVANCE_NO_CHECK (c, newtext, pos, pos_byte);
3bc25e52
KH
2673 if (!buf_multibyte)
2674 c = multibyte_char_to_unibyte (c, rev_tbl);
2675 }
2676 else
2677 {
2678 /* Note that we don't have to increment POS. */
5d69fe10 2679 c = SREF (newtext, pos_byte++);
3bc25e52
KH
2680 if (buf_multibyte)
2681 c = unibyte_char_to_multibyte (c);
2682 }
ac3b28b1 2683
68e69fbd
RS
2684 /* Either set ADD_STUFF and ADD_LEN to the text to put in SUBSTED,
2685 or set IDX to a match index, which means put that part
2686 of the buffer text into SUBSTED. */
2687
ca1d1d23
JB
2688 if (c == '\\')
2689 {
886ed6ec
RS
2690 really_changed = 1;
2691
3bc25e52
KH
2692 if (str_multibyte)
2693 {
eb99a8dd
KH
2694 FETCH_STRING_CHAR_ADVANCE_NO_CHECK (c, newtext,
2695 pos, pos_byte);
3bc25e52
KH
2696 if (!buf_multibyte && !SINGLE_BYTE_CHAR_P (c))
2697 c = multibyte_char_to_unibyte (c, rev_tbl);
2698 }
2699 else
2700 {
d5db4077 2701 c = SREF (newtext, pos_byte++);
3bc25e52
KH
2702 if (buf_multibyte)
2703 c = unibyte_char_to_multibyte (c);
2704 }
2705
ca1d1d23 2706 if (c == '&')
68e69fbd 2707 idx = sub;
78445046 2708 else if (c >= '1' && c <= '9' && c <= search_regs.num_regs + '0')
ca1d1d23
JB
2709 {
2710 if (search_regs.start[c - '0'] >= 1)
68e69fbd 2711 idx = c - '0';
ca1d1d23 2712 }
636a5e28 2713 else if (c == '\\')
68e69fbd 2714 add_len = 1, add_stuff = "\\";
636a5e28 2715 else
3bc25e52
KH
2716 {
2717 xfree (substed);
2718 error ("Invalid use of `\\' in replacement text");
2719 }
ca1d1d23
JB
2720 }
2721 else
68e69fbd
RS
2722 {
2723 add_len = CHAR_STRING (c, str);
2724 add_stuff = str;
2725 }
2726
2727 /* If we want to copy part of a previous match,
2728 set up ADD_STUFF and ADD_LEN to point to it. */
2729 if (idx >= 0)
2730 {
2731 int begbyte = CHAR_TO_BYTE (search_regs.start[idx]);
2732 add_len = CHAR_TO_BYTE (search_regs.end[idx]) - begbyte;
2733 if (search_regs.start[idx] < GPT && GPT < search_regs.end[idx])
2734 move_gap (search_regs.start[idx]);
2735 add_stuff = BYTE_POS_ADDR (begbyte);
2736 }
2737
2738 /* Now the stuff we want to add to SUBSTED
2739 is invariably ADD_LEN bytes starting at ADD_STUFF. */
2740
2741 /* Make sure SUBSTED is big enough. */
2742 if (substed_len + add_len >= substed_alloc_size)
2743 {
2744 substed_alloc_size = substed_len + add_len + 500;
2745 substed = (unsigned char *) xrealloc (substed,
2746 substed_alloc_size + 1);
2747 }
2748
2749 /* Now add to the end of SUBSTED. */
f8ce8a0d
GM
2750 if (add_stuff)
2751 {
2752 bcopy (add_stuff, substed + substed_len, add_len);
2753 substed_len += add_len;
2754 }
ca1d1d23 2755 }
68e69fbd 2756
886ed6ec 2757 if (really_changed)
80460525
KH
2758 {
2759 if (buf_multibyte)
2760 {
2761 int nchars = multibyte_chars_in_text (substed, substed_len);
68e69fbd 2762
80460525
KH
2763 newtext = make_multibyte_string (substed, nchars, substed_len);
2764 }
2765 else
2766 newtext = make_unibyte_string (substed, substed_len);
2767 }
68e69fbd 2768 xfree (substed);
ca1d1d23
JB
2769 }
2770
886ed6ec
RS
2771 /* Replace the old text with the new in the cleanest possible way. */
2772 replace_range (search_regs.start[sub], search_regs.end[sub],
2773 newtext, 1, 0, 1);
d5db4077 2774 newpoint = search_regs.start[sub] + SCHARS (newtext);
ca1d1d23
JB
2775
2776 if (case_action == all_caps)
886ed6ec
RS
2777 Fupcase_region (make_number (search_regs.start[sub]),
2778 make_number (newpoint));
ca1d1d23 2779 else if (case_action == cap_initial)
886ed6ec
RS
2780 Fupcase_initials_region (make_number (search_regs.start[sub]),
2781 make_number (newpoint));
3e18eecf 2782
98e942e0
RS
2783 /* Adjust search data for this change. */
2784 {
5b88a2c5 2785 int oldend = search_regs.end[sub];
41c01205 2786 int oldstart = search_regs.start[sub];
98e942e0
RS
2787 int change = newpoint - search_regs.end[sub];
2788 int i;
2789
2790 for (i = 0; i < search_regs.num_regs; i++)
2791 {
41c01205 2792 if (search_regs.start[i] >= oldend)
98e942e0 2793 search_regs.start[i] += change;
41c01205
DK
2794 else if (search_regs.start[i] > oldstart)
2795 search_regs.start[i] = oldstart;
2796 if (search_regs.end[i] >= oldend)
98e942e0 2797 search_regs.end[i] += change;
41c01205
DK
2798 else if (search_regs.end[i] > oldstart)
2799 search_regs.end[i] = oldstart;
98e942e0
RS
2800 }
2801 }
2802
b0eba991 2803 /* Put point back where it was in the text. */
8d808a65 2804 if (opoint <= 0)
fa8ed3e0 2805 TEMP_SET_PT (opoint + ZV);
b0eba991 2806 else
fa8ed3e0 2807 TEMP_SET_PT (opoint);
b0eba991
RS
2808
2809 /* Now move point "officially" to the start of the inserted replacement. */
3e18eecf 2810 move_if_not_intangible (newpoint);
177c0ea7 2811
ca1d1d23
JB
2812 return Qnil;
2813}
2814\f
2815static Lisp_Object
2816match_limit (num, beginningp)
2817 Lisp_Object num;
2818 int beginningp;
2819{
2820 register int n;
2821
b7826503 2822 CHECK_NUMBER (num);
ca1d1d23 2823 n = XINT (num);
f90a5bf5 2824 if (n < 0)
bd2cbd56 2825 args_out_of_range (num, make_number (0));
f90a5bf5
RS
2826 if (search_regs.num_regs <= 0)
2827 error ("No match data, because no search succeeded");
9b9ceb61 2828 if (n >= search_regs.num_regs
4746118a 2829 || search_regs.start[n] < 0)
ca1d1d23
JB
2830 return Qnil;
2831 return (make_number ((beginningp) ? search_regs.start[n]
2832 : search_regs.end[n]));
2833}
2834
2835DEFUN ("match-beginning", Fmatch_beginning, Smatch_beginning, 1, 1, 0,
8c1a1077
PJ
2836 doc: /* Return position of start of text matched by last search.
2837SUBEXP, a number, specifies which parenthesized expression in the last
2838 regexp.
2839Value is nil if SUBEXPth pair didn't match, or there were less than
2840 SUBEXP pairs.
2841Zero means the entire text matched by the whole regexp or whole string. */)
2842 (subexp)
5806161b 2843 Lisp_Object subexp;
ca1d1d23 2844{
5806161b 2845 return match_limit (subexp, 1);
ca1d1d23
JB
2846}
2847
2848DEFUN ("match-end", Fmatch_end, Smatch_end, 1, 1, 0,
8c1a1077
PJ
2849 doc: /* Return position of end of text matched by last search.
2850SUBEXP, a number, specifies which parenthesized expression in the last
2851 regexp.
2852Value is nil if SUBEXPth pair didn't match, or there were less than
2853 SUBEXP pairs.
2854Zero means the entire text matched by the whole regexp or whole string. */)
2855 (subexp)
5806161b 2856 Lisp_Object subexp;
ca1d1d23 2857{
5806161b 2858 return match_limit (subexp, 0);
177c0ea7 2859}
ca1d1d23 2860
abd0071c 2861DEFUN ("match-data", Fmatch_data, Smatch_data, 0, 3, 0,
8c1a1077
PJ
2862 doc: /* Return a list containing all info on what the last search matched.
2863Element 2N is `(match-beginning N)'; element 2N + 1 is `(match-end N)'.
2864All the elements are markers or nil (nil if the Nth pair didn't match)
2865if the last match was on a buffer; integers or nil if a string was matched.
2866Use `store-match-data' to reinstate the data in this list.
2867
41c01205
DK
2868If INTEGERS (the optional first argument) is non-nil, always use
2869integers \(rather than markers) to represent buffer positions. In
2870this case, and if the last match was in a buffer, the buffer will get
2871stored as one additional element at the end of the list.
2872
abd0071c
KS
2873If REUSE is a list, reuse it as part of the value. If REUSE is long
2874enough to hold all the values, and if INTEGERS is non-nil, no consing
2875is done.
2876
2877If optional third arg RESEAT is non-nil, any previous markers on the
2878REUSE list will be modified to point to nowhere.
2879
140a6b7e 2880Return value is undefined if the last search failed. */)
abd0071c
KS
2881 (integers, reuse, reseat)
2882 Lisp_Object integers, reuse, reseat;
ca1d1d23 2883{
56256c2a 2884 Lisp_Object tail, prev;
4746118a 2885 Lisp_Object *data;
ca1d1d23
JB
2886 int i, len;
2887
abd0071c
KS
2888 if (!NILP (reseat))
2889 for (tail = reuse; CONSP (tail); tail = XCDR (tail))
2890 if (MARKERP (XCAR (tail)))
2891 {
51f10faa 2892 unchain_marker (XMARKER (XCAR (tail)));
abd0071c
KS
2893 XSETCAR (tail, Qnil);
2894 }
2895
daa37602 2896 if (NILP (last_thing_searched))
c36bcf1b 2897 return Qnil;
daa37602 2898
6bbd7a29
GM
2899 prev = Qnil;
2900
41c01205 2901 data = (Lisp_Object *) alloca ((2 * search_regs.num_regs + 1)
4746118a
JB
2902 * sizeof (Lisp_Object));
2903
41c01205 2904 len = 0;
4746118a 2905 for (i = 0; i < search_regs.num_regs; i++)
ca1d1d23
JB
2906 {
2907 int start = search_regs.start[i];
2908 if (start >= 0)
2909 {
56256c2a
RS
2910 if (EQ (last_thing_searched, Qt)
2911 || ! NILP (integers))
ca1d1d23 2912 {
c235cce7
KH
2913 XSETFASTINT (data[2 * i], start);
2914 XSETFASTINT (data[2 * i + 1], search_regs.end[i]);
ca1d1d23 2915 }
0ed62dc7 2916 else if (BUFFERP (last_thing_searched))
ca1d1d23
JB
2917 {
2918 data[2 * i] = Fmake_marker ();
daa37602
JB
2919 Fset_marker (data[2 * i],
2920 make_number (start),
2921 last_thing_searched);
ca1d1d23
JB
2922 data[2 * i + 1] = Fmake_marker ();
2923 Fset_marker (data[2 * i + 1],
177c0ea7 2924 make_number (search_regs.end[i]),
daa37602 2925 last_thing_searched);
ca1d1d23 2926 }
daa37602
JB
2927 else
2928 /* last_thing_searched must always be Qt, a buffer, or Qnil. */
2929 abort ();
2930
abd0071c 2931 len = 2 * i + 2;
ca1d1d23
JB
2932 }
2933 else
abd0071c 2934 data[2 * i] = data[2 * i + 1] = Qnil;
ca1d1d23 2935 }
56256c2a 2936
bd2cbd56 2937 if (BUFFERP (last_thing_searched) && !NILP (integers))
41c01205 2938 {
bd2cbd56 2939 data[len] = last_thing_searched;
41c01205
DK
2940 len++;
2941 }
2942
56256c2a
RS
2943 /* If REUSE is not usable, cons up the values and return them. */
2944 if (! CONSP (reuse))
41c01205 2945 return Flist (len, data);
56256c2a
RS
2946
2947 /* If REUSE is a list, store as many value elements as will fit
2948 into the elements of REUSE. */
2949 for (i = 0, tail = reuse; CONSP (tail);
c1d497be 2950 i++, tail = XCDR (tail))
56256c2a 2951 {
41c01205 2952 if (i < len)
f3fbd155 2953 XSETCAR (tail, data[i]);
56256c2a 2954 else
f3fbd155 2955 XSETCAR (tail, Qnil);
56256c2a
RS
2956 prev = tail;
2957 }
2958
2959 /* If we couldn't fit all value elements into REUSE,
2960 cons up the rest of them and add them to the end of REUSE. */
41c01205
DK
2961 if (i < len)
2962 XSETCDR (prev, Flist (len - i, data + i));
56256c2a
RS
2963
2964 return reuse;
ca1d1d23
JB
2965}
2966
51f10faa
KS
2967/* Internal usage only:
2968 If RESEAT is `evaporate', put the markers back on the free list
2969 immediately. No other references to the markers must exist in this case,
2970 so it is used only internally on the unwind stack and save-match-data from
2971 Lisp. */
ca1d1d23 2972
abd0071c 2973DEFUN ("set-match-data", Fset_match_data, Sset_match_data, 1, 2, 0,
8c1a1077 2974 doc: /* Set internal data on last search match from elements of LIST.
abd0071c
KS
2975LIST should have been created by calling `match-data' previously.
2976
51f10faa 2977If optional arg RESEAT is non-nil, make markers on LIST point nowhere. */)
abd0071c
KS
2978 (list, reseat)
2979 register Lisp_Object list, reseat;
ca1d1d23
JB
2980{
2981 register int i;
2982 register Lisp_Object marker;
2983
7074fde6
FP
2984 if (running_asynch_code)
2985 save_search_regs ();
2986
29100cea 2987 CHECK_LIST (list);
ca1d1d23 2988
41c01205
DK
2989 /* Unless we find a marker with a buffer or an explicit buffer
2990 in LIST, assume that this match data came from a string. */
daa37602
JB
2991 last_thing_searched = Qt;
2992
4746118a
JB
2993 /* Allocate registers if they don't already exist. */
2994 {
d084e942 2995 int length = XFASTINT (Flength (list)) / 2;
4746118a
JB
2996
2997 if (length > search_regs.num_regs)
2998 {
1113d9db
JB
2999 if (search_regs.num_regs == 0)
3000 {
3001 search_regs.start
3002 = (regoff_t *) xmalloc (length * sizeof (regoff_t));
3003 search_regs.end
3004 = (regoff_t *) xmalloc (length * sizeof (regoff_t));
3005 }
4746118a 3006 else
1113d9db
JB
3007 {
3008 search_regs.start
3009 = (regoff_t *) xrealloc (search_regs.start,
3010 length * sizeof (regoff_t));
3011 search_regs.end
3012 = (regoff_t *) xrealloc (search_regs.end,
3013 length * sizeof (regoff_t));
3014 }
4746118a 3015
e62371e9
KH
3016 for (i = search_regs.num_regs; i < length; i++)
3017 search_regs.start[i] = -1;
3018
487282dc 3019 search_regs.num_regs = length;
4746118a 3020 }
ca1d1d23 3021
abd0071c 3022 for (i = 0; CONSP (list); i++)
41c01205 3023 {
abd0071c 3024 marker = XCAR (list);
bd2cbd56 3025 if (BUFFERP (marker))
c3762cbd 3026 {
bd2cbd56 3027 last_thing_searched = marker;
c3762cbd
DK
3028 break;
3029 }
3030 if (i >= length)
3031 break;
41c01205
DK
3032 if (NILP (marker))
3033 {
3034 search_regs.start[i] = -1;
abd0071c 3035 list = XCDR (list);
41c01205
DK
3036 }
3037 else
3038 {
3039 int from;
abd0071c 3040 Lisp_Object m;
e2811828 3041
abd0071c 3042 m = marker;
41c01205
DK
3043 if (MARKERP (marker))
3044 {
3045 if (XMARKER (marker)->buffer == 0)
3046 XSETFASTINT (marker, 0);
3047 else
3048 XSETBUFFER (last_thing_searched, XMARKER (marker)->buffer);
3049 }
e2811828 3050
41c01205
DK
3051 CHECK_NUMBER_COERCE_MARKER (marker);
3052 from = XINT (marker);
e2811828 3053
abd0071c
KS
3054 if (!NILP (reseat) && MARKERP (m))
3055 {
3056 if (EQ (reseat, Qevaporate))
3057 free_marker (m);
3058 else
3059 unchain_marker (XMARKER (m));
9ad54a7e 3060 XSETCAR (list, Qnil);
abd0071c
KS
3061 }
3062
3063 if ((list = XCDR (list), !CONSP (list)))
3064 break;
3065
3066 m = marker = XCAR (list);
3067
41c01205
DK
3068 if (MARKERP (marker) && XMARKER (marker)->buffer == 0)
3069 XSETFASTINT (marker, 0);
e2811828 3070
41c01205
DK
3071 CHECK_NUMBER_COERCE_MARKER (marker);
3072 search_regs.start[i] = from;
3073 search_regs.end[i] = XINT (marker);
abd0071c
KS
3074
3075 if (!NILP (reseat) && MARKERP (m))
3076 {
3077 if (EQ (reseat, Qevaporate))
3078 free_marker (m);
3079 else
3080 unchain_marker (XMARKER (m));
9ad54a7e 3081 XSETCAR (list, Qnil);
abd0071c 3082 }
41c01205 3083 }
abd0071c 3084 list = XCDR (list);
41c01205 3085 }
ca1d1d23 3086
41c01205
DK
3087 for (; i < search_regs.num_regs; i++)
3088 search_regs.start[i] = -1;
3089 }
ca1d1d23 3090
177c0ea7 3091 return Qnil;
ca1d1d23
JB
3092}
3093
7074fde6
FP
3094/* If non-zero the match data have been saved in saved_search_regs
3095 during the execution of a sentinel or filter. */
75ebf74b 3096static int search_regs_saved;
7074fde6 3097static struct re_registers saved_search_regs;
41c01205 3098static Lisp_Object saved_last_thing_searched;
7074fde6
FP
3099
3100/* Called from Flooking_at, Fstring_match, search_buffer, Fstore_match_data
3101 if asynchronous code (filter or sentinel) is running. */
3102static void
3103save_search_regs ()
3104{
3105 if (!search_regs_saved)
3106 {
3107 saved_search_regs.num_regs = search_regs.num_regs;
3108 saved_search_regs.start = search_regs.start;
3109 saved_search_regs.end = search_regs.end;
41c01205
DK
3110 saved_last_thing_searched = last_thing_searched;
3111 last_thing_searched = Qnil;
7074fde6 3112 search_regs.num_regs = 0;
2d4a771a
RS
3113 search_regs.start = 0;
3114 search_regs.end = 0;
7074fde6
FP
3115
3116 search_regs_saved = 1;
3117 }
3118}
3119
3120/* Called upon exit from filters and sentinels. */
3121void
abd0071c 3122restore_search_regs ()
7074fde6
FP
3123{
3124 if (search_regs_saved)
3125 {
3126 if (search_regs.num_regs > 0)
3127 {
3128 xfree (search_regs.start);
3129 xfree (search_regs.end);
3130 }
3131 search_regs.num_regs = saved_search_regs.num_regs;
3132 search_regs.start = saved_search_regs.start;
3133 search_regs.end = saved_search_regs.end;
41c01205
DK
3134 last_thing_searched = saved_last_thing_searched;
3135 saved_last_thing_searched = Qnil;
7074fde6
FP
3136 search_regs_saved = 0;
3137 }
3138}
3139
abd0071c
KS
3140static Lisp_Object
3141unwind_set_match_data (list)
3142 Lisp_Object list;
3143{
51f10faa 3144 /* It is safe to free (evaporate) the markers immediately. */
abd0071c
KS
3145 return Fset_match_data (list, Qevaporate);
3146}
3147
3148/* Called to unwind protect the match data. */
3149void
3150record_unwind_save_match_data ()
3151{
3152 record_unwind_protect (unwind_set_match_data,
3153 Fmatch_data (Qnil, Qnil, Qnil));
3154}
3155
ca1d1d23
JB
3156/* Quote a string to inactivate reg-expr chars */
3157
3158DEFUN ("regexp-quote", Fregexp_quote, Sregexp_quote, 1, 1, 0,
8c1a1077
PJ
3159 doc: /* Return a regexp string which matches exactly STRING and nothing else. */)
3160 (string)
5806161b 3161 Lisp_Object string;
ca1d1d23
JB
3162{
3163 register unsigned char *in, *out, *end;
3164 register unsigned char *temp;
0c8533c6 3165 int backslashes_added = 0;
ca1d1d23 3166
b7826503 3167 CHECK_STRING (string);
ca1d1d23 3168
d5db4077 3169 temp = (unsigned char *) alloca (SBYTES (string) * 2);
ca1d1d23
JB
3170
3171 /* Now copy the data into the new string, inserting escapes. */
3172
d5db4077
KR
3173 in = SDATA (string);
3174 end = in + SBYTES (string);
177c0ea7 3175 out = temp;
ca1d1d23
JB
3176
3177 for (; in != end; in++)
3178 {
66bc6082 3179 if (*in == '['
ca1d1d23
JB
3180 || *in == '*' || *in == '.' || *in == '\\'
3181 || *in == '?' || *in == '+'
3182 || *in == '^' || *in == '$')
0c8533c6 3183 *out++ = '\\', backslashes_added++;
ca1d1d23
JB
3184 *out++ = *in;
3185 }
3186
3f8100f1 3187 return make_specified_string (temp,
d5db4077 3188 SCHARS (string) + backslashes_added,
3f8100f1
RS
3189 out - temp,
3190 STRING_MULTIBYTE (string));
ca1d1d23 3191}
177c0ea7 3192\f
dfcf069d 3193void
ca1d1d23
JB
3194syms_of_search ()
3195{
3196 register int i;
3197
487282dc
KH
3198 for (i = 0; i < REGEXP_CACHE_SIZE; ++i)
3199 {
3200 searchbufs[i].buf.allocated = 100;
b23c0a83 3201 searchbufs[i].buf.buffer = (unsigned char *) xmalloc (100);
487282dc
KH
3202 searchbufs[i].buf.fastmap = searchbufs[i].fastmap;
3203 searchbufs[i].regexp = Qnil;
ecdb561e 3204 searchbufs[i].whitespace_regexp = Qnil;
b69e3c18 3205 searchbufs[i].syntax_table = Qnil;
487282dc 3206 staticpro (&searchbufs[i].regexp);
aa77b5ce 3207 staticpro (&searchbufs[i].whitespace_regexp);
b69e3c18 3208 staticpro (&searchbufs[i].syntax_table);
487282dc
KH
3209 searchbufs[i].next = (i == REGEXP_CACHE_SIZE-1 ? 0 : &searchbufs[i+1]);
3210 }
3211 searchbuf_head = &searchbufs[0];
ca1d1d23
JB
3212
3213 Qsearch_failed = intern ("search-failed");
3214 staticpro (&Qsearch_failed);
3215 Qinvalid_regexp = intern ("invalid-regexp");
3216 staticpro (&Qinvalid_regexp);
3217
3218 Fput (Qsearch_failed, Qerror_conditions,
3219 Fcons (Qsearch_failed, Fcons (Qerror, Qnil)));
3220 Fput (Qsearch_failed, Qerror_message,
3221 build_string ("Search failed"));
3222
3223 Fput (Qinvalid_regexp, Qerror_conditions,
3224 Fcons (Qinvalid_regexp, Fcons (Qerror, Qnil)));
3225 Fput (Qinvalid_regexp, Qerror_message,
3226 build_string ("Invalid regexp"));
3227
daa37602
JB
3228 last_thing_searched = Qnil;
3229 staticpro (&last_thing_searched);
3230
0f6af254
DK
3231 saved_last_thing_searched = Qnil;
3232 staticpro (&saved_last_thing_searched);
3233
41a33295 3234 DEFVAR_LISP ("search-spaces-regexp", &Vsearch_spaces_regexp,
e2811828 3235 doc: /* Regexp to substitute for bunches of spaces in regexp search.
f31a9a68
RS
3236Some commands use this for user-specified regexps.
3237Spaces that occur inside character classes or repetition operators
3238or other such regexp constructs are not replaced with this.
3239A value of nil (which is the normal value) means treat spaces literally. */);
41a33295 3240 Vsearch_spaces_regexp = Qnil;
f31a9a68 3241
ef887810
RS
3242 DEFVAR_LISP ("inhibit-changing-match-data", &Vinhibit_changing_match_data,
3243 doc: /* Internal use only.
3244If non-nil, the match data will not be changed during call to searching or
3245matching functions, such as `looking-at', `string-match', `re-search-forward'
3246etc. */);
3247 Vinhibit_changing_match_data = Qnil;
3248
ca1d1d23 3249 defsubr (&Slooking_at);
b819a390
RS
3250 defsubr (&Sposix_looking_at);
3251 defsubr (&Sstring_match);
3252 defsubr (&Sposix_string_match);
ca1d1d23
JB
3253 defsubr (&Ssearch_forward);
3254 defsubr (&Ssearch_backward);
3255 defsubr (&Sword_search_forward);
3256 defsubr (&Sword_search_backward);
3257 defsubr (&Sre_search_forward);
3258 defsubr (&Sre_search_backward);
b819a390
RS
3259 defsubr (&Sposix_search_forward);
3260 defsubr (&Sposix_search_backward);
ca1d1d23
JB
3261 defsubr (&Sreplace_match);
3262 defsubr (&Smatch_beginning);
3263 defsubr (&Smatch_end);
3264 defsubr (&Smatch_data);
3f1c005b 3265 defsubr (&Sset_match_data);
ca1d1d23
JB
3266 defsubr (&Sregexp_quote);
3267}
ab5796a9
MB
3268
3269/* arch-tag: a6059d79-0552-4f14-a2cb-d379a4e3c78f
3270 (do not change this comment) */