Regenerated
[bpt/emacs.git] / src / search.c
CommitLineData
ca1d1d23 1/* String search routines for GNU Emacs.
429ab54e 2 Copyright (C) 1985, 1986, 1987, 1993, 1994, 1997, 1998, 1999, 2001, 2002,
76b6f707 3 2003, 2004, 2005, 2006, 2007, 2008, 2009
8cabe764 4 Free Software Foundation, Inc.
ca1d1d23
JB
5
6This file is part of GNU Emacs.
7
9ec0b715 8GNU Emacs is free software: you can redistribute it and/or modify
ca1d1d23 9it under the terms of the GNU General Public License as published by
9ec0b715
GM
10the Free Software Foundation, either version 3 of the License, or
11(at your option) any later version.
ca1d1d23
JB
12
13GNU Emacs is distributed in the hope that it will be useful,
14but WITHOUT ANY WARRANTY; without even the implied warranty of
15MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16GNU General Public License for more details.
17
18You should have received a copy of the GNU General Public License
9ec0b715 19along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>. */
ca1d1d23
JB
20
21
18160b98 22#include <config.h>
d7306fe6 23#include <setjmp.h>
ca1d1d23
JB
24#include "lisp.h"
25#include "syntax.h"
5679531d 26#include "category.h"
ca1d1d23 27#include "buffer.h"
76eb0881 28#include "character.h"
89ad6492 29#include "charset.h"
9169c321 30#include "region-cache.h"
ca1d1d23 31#include "commands.h"
9ac0d9e0 32#include "blockinput.h"
bf1760bb 33#include "intervals.h"
4746118a 34
ca1d1d23
JB
35#include <sys/types.h>
36#include "regex.h"
37
1d288aef 38#define REGEXP_CACHE_SIZE 20
ca1d1d23 39
487282dc
KH
40/* If the regexp is non-nil, then the buffer contains the compiled form
41 of that regexp, suitable for searching. */
1d288aef
RS
42struct regexp_cache
43{
487282dc 44 struct regexp_cache *next;
ecdb561e 45 Lisp_Object regexp, whitespace_regexp;
b69e3c18 46 /* Syntax table for which the regexp applies. We need this because
58e95211
SM
47 of character classes. If this is t, then the compiled pattern is valid
48 for any syntax-table. */
b69e3c18 49 Lisp_Object syntax_table;
487282dc
KH
50 struct re_pattern_buffer buf;
51 char fastmap[0400];
b819a390
RS
52 /* Nonzero means regexp was compiled to do full POSIX backtracking. */
53 char posix;
487282dc 54};
ca1d1d23 55
487282dc
KH
56/* The instances of that struct. */
57struct regexp_cache searchbufs[REGEXP_CACHE_SIZE];
ca1d1d23 58
487282dc
KH
59/* The head of the linked list; points to the most recently used buffer. */
60struct regexp_cache *searchbuf_head;
ca1d1d23 61
ca1d1d23 62
4746118a
JB
63/* Every call to re_match, etc., must pass &search_regs as the regs
64 argument unless you can show it is unnecessary (i.e., if re_match
65 is certainly going to be called again before region-around-match
66 can be called).
67
68 Since the registers are now dynamically allocated, we need to make
69 sure not to refer to the Nth register before checking that it has
1113d9db
JB
70 been allocated by checking search_regs.num_regs.
71
72 The regex code keeps track of whether it has allocated the search
487282dc
KH
73 buffer using bits in the re_pattern_buffer. This means that whenever
74 you compile a new pattern, it completely forgets whether it has
1113d9db
JB
75 allocated any registers, and will allocate new registers the next
76 time you call a searching or matching function. Therefore, we need
77 to call re_set_registers after compiling a new pattern or after
78 setting the match registers, so that the regex functions will be
79 able to free or re-allocate it properly. */
ca1d1d23
JB
80static struct re_registers search_regs;
81
daa37602
JB
82/* The buffer in which the last search was performed, or
83 Qt if the last search was done in a string;
84 Qnil if no searching has been done yet. */
85static Lisp_Object last_thing_searched;
ca1d1d23 86
8e6208c5 87/* error condition signaled when regexp compile_pattern fails */
ca1d1d23
JB
88
89Lisp_Object Qinvalid_regexp;
90
06f77a8a
KS
91/* Error condition used for failing searches */
92Lisp_Object Qsearch_failed;
93
41a33295 94Lisp_Object Vsearch_spaces_regexp;
f31a9a68 95
ef887810
RS
96/* If non-nil, the match data will not be changed during call to
97 searching or matching functions. This variable is for internal use
98 only. */
99Lisp_Object Vinhibit_changing_match_data;
100
e7deaab0
AS
101static void set_search_regs P_ ((EMACS_INT, EMACS_INT));
102static void save_search_regs P_ ((void));
103static EMACS_INT simple_search P_ ((int, unsigned char *, int, int,
104 Lisp_Object, EMACS_INT, EMACS_INT,
105 EMACS_INT, EMACS_INT));
106static EMACS_INT boyer_moore P_ ((int, unsigned char *, int, int,
107 Lisp_Object, Lisp_Object,
108 EMACS_INT, EMACS_INT,
109 EMACS_INT, EMACS_INT, int));
110static EMACS_INT search_buffer P_ ((Lisp_Object, EMACS_INT, EMACS_INT,
111 EMACS_INT, EMACS_INT, int, int,
112 Lisp_Object, Lisp_Object, int));
01d09305 113static void matcher_overflow () NO_RETURN;
b819a390 114
ca1d1d23
JB
115static void
116matcher_overflow ()
117{
118 error ("Stack overflow in regexp matcher");
119}
120
b819a390
RS
121/* Compile a regexp and signal a Lisp error if anything goes wrong.
122 PATTERN is the pattern to compile.
123 CP is the place to put the result.
facdc750 124 TRANSLATE is a translation table for ignoring case, or nil for none.
b819a390
RS
125 REGP is the structure that says where to store the "register"
126 values that will result from matching this pattern.
127 If it is 0, we should compile the pattern not to record any
128 subexpression bounds.
129 POSIX is nonzero if we want full backtracking (POSIX style)
5679531d 130 for this pattern. 0 means backtrack only enough to get a valid match.
bbc73b48
KH
131
132 The behavior also depends on Vsearch_spaces_regexp. */
ca1d1d23 133
487282dc 134static void
89ad6492 135compile_pattern_1 (cp, pattern, translate, regp, posix)
487282dc 136 struct regexp_cache *cp;
ca1d1d23 137 Lisp_Object pattern;
facdc750 138 Lisp_Object translate;
487282dc 139 struct re_registers *regp;
b819a390 140 int posix;
ca1d1d23 141{
d451e4db 142 char *val;
b819a390 143 reg_syntax_t old;
ca1d1d23 144
487282dc 145 cp->regexp = Qnil;
59fab369 146 cp->buf.translate = (! NILP (translate) ? translate : make_number (0));
b819a390 147 cp->posix = posix;
93daa011 148 cp->buf.multibyte = STRING_MULTIBYTE (pattern);
89ad6492 149 cp->buf.charset_unibyte = charset_unibyte;
d8c85250
CY
150 if (STRINGP (Vsearch_spaces_regexp))
151 cp->whitespace_regexp = Vsearch_spaces_regexp;
152 else
153 cp->whitespace_regexp = Qnil;
154
e7b2dd2e
RS
155 /* rms: I think BLOCK_INPUT is not needed here any more,
156 because regex.c defines malloc to call xmalloc.
157 Using BLOCK_INPUT here means the debugger won't run if an error occurs.
158 So let's turn it off. */
159 /* BLOCK_INPUT; */
fb4a568d 160 old = re_set_syntax (RE_SYNTAX_EMACS
b819a390 161 | (posix ? 0 : RE_NO_POSIX_BACKTRACKING));
d8c85250
CY
162
163 if (STRINGP (Vsearch_spaces_regexp))
164 re_set_whitespace_regexp (SDATA (Vsearch_spaces_regexp));
165 else
166 re_set_whitespace_regexp (NULL);
bbc73b48 167
8f924df7
KH
168 val = (char *) re_compile_pattern ((char *) SDATA (pattern),
169 SBYTES (pattern), &cp->buf);
bbc73b48 170
58e95211
SM
171 /* If the compiled pattern hard codes some of the contents of the
172 syntax-table, it can only be reused with *this* syntax table. */
173 cp->syntax_table = cp->buf.used_syntax ? current_buffer->syntax_table : Qt;
174
bbc73b48
KH
175 re_set_whitespace_regexp (NULL);
176
b819a390 177 re_set_syntax (old);
e7b2dd2e 178 /* UNBLOCK_INPUT; */
ca1d1d23 179 if (val)
06f77a8a 180 xsignal1 (Qinvalid_regexp, build_string (val));
1113d9db 181
487282dc 182 cp->regexp = Fcopy_sequence (pattern);
487282dc
KH
183}
184
6efc7887
RS
185/* Shrink each compiled regexp buffer in the cache
186 to the size actually used right now.
187 This is called from garbage collection. */
188
189void
190shrink_regexp_cache ()
191{
a968f437 192 struct regexp_cache *cp;
6efc7887
RS
193
194 for (cp = searchbuf_head; cp != 0; cp = cp->next)
195 {
196 cp->buf.allocated = cp->buf.used;
197 cp->buf.buffer
b23c0a83 198 = (unsigned char *) xrealloc (cp->buf.buffer, cp->buf.used);
6efc7887
RS
199 }
200}
201
54dd3310
SM
202/* Clear the regexp cache w.r.t. a particular syntax table,
203 because it was changed.
e5b94d44
CY
204 There is no danger of memory leak here because re_compile_pattern
205 automagically manages the memory in each re_pattern_buffer struct,
206 based on its `allocated' and `buffer' values. */
207void
208clear_regexp_cache ()
209{
210 int i;
211
212 for (i = 0; i < REGEXP_CACHE_SIZE; ++i)
920fd1fc
EZ
213 /* It's tempting to compare with the syntax-table we've actually changed,
214 but it's not sufficient because char-table inheritance means that
54dd3310
SM
215 modifying one syntax-table can change others at the same time. */
216 if (!EQ (searchbufs[i].syntax_table, Qt))
217 searchbufs[i].regexp = Qnil;
e5b94d44
CY
218}
219
487282dc 220/* Compile a regexp if necessary, but first check to see if there's one in
b819a390
RS
221 the cache.
222 PATTERN is the pattern to compile.
facdc750 223 TRANSLATE is a translation table for ignoring case, or nil for none.
b819a390
RS
224 REGP is the structure that says where to store the "register"
225 values that will result from matching this pattern.
226 If it is 0, we should compile the pattern not to record any
227 subexpression bounds.
228 POSIX is nonzero if we want full backtracking (POSIX style)
229 for this pattern. 0 means backtrack only enough to get a valid match. */
487282dc
KH
230
231struct re_pattern_buffer *
0c8533c6 232compile_pattern (pattern, regp, translate, posix, multibyte)
487282dc
KH
233 Lisp_Object pattern;
234 struct re_registers *regp;
facdc750 235 Lisp_Object translate;
0c8533c6 236 int posix, multibyte;
487282dc
KH
237{
238 struct regexp_cache *cp, **cpp;
239
240 for (cpp = &searchbuf_head; ; cpp = &cp->next)
241 {
242 cp = *cpp;
f1b9c7c1
KR
243 /* Entries are initialized to nil, and may be set to nil by
244 compile_pattern_1 if the pattern isn't valid. Don't apply
49a5f770
KR
245 string accessors in those cases. However, compile_pattern_1
246 is only applied to the cache entry we pick here to reuse. So
247 nil should never appear before a non-nil entry. */
7c752c80 248 if (NILP (cp->regexp))
f1b9c7c1 249 goto compile_it;
d5db4077 250 if (SCHARS (cp->regexp) == SCHARS (pattern)
cf69b13e 251 && STRING_MULTIBYTE (cp->regexp) == STRING_MULTIBYTE (pattern)
1d288aef 252 && !NILP (Fstring_equal (cp->regexp, pattern))
59fab369 253 && EQ (cp->buf.translate, (! NILP (translate) ? translate : make_number (0)))
5679531d 254 && cp->posix == posix
58e95211
SM
255 && (EQ (cp->syntax_table, Qt)
256 || EQ (cp->syntax_table, current_buffer->syntax_table))
89ad6492
KH
257 && !NILP (Fequal (cp->whitespace_regexp, Vsearch_spaces_regexp))
258 && cp->buf.charset_unibyte == charset_unibyte)
487282dc
KH
259 break;
260
f1b9c7c1
KR
261 /* If we're at the end of the cache, compile into the nil cell
262 we found, or the last (least recently used) cell with a
263 string value. */
487282dc
KH
264 if (cp->next == 0)
265 {
f1b9c7c1 266 compile_it:
89ad6492 267 compile_pattern_1 (cp, pattern, translate, regp, posix);
487282dc
KH
268 break;
269 }
270 }
271
272 /* When we get here, cp (aka *cpp) contains the compiled pattern,
273 either because we found it in the cache or because we just compiled it.
274 Move it to the front of the queue to mark it as most recently used. */
275 *cpp = cp->next;
276 cp->next = searchbuf_head;
277 searchbuf_head = cp;
1113d9db 278
6639708c
RS
279 /* Advise the searching functions about the space we have allocated
280 for register data. */
281 if (regp)
282 re_set_registers (&cp->buf, regp, regp->num_regs, regp->start, regp->end);
283
89ad6492
KH
284 /* The compiled pattern can be used both for mulitbyte and unibyte
285 target. But, we have to tell which the pattern is used for. */
286 cp->buf.target_multibyte = multibyte;
287
487282dc 288 return &cp->buf;
ca1d1d23
JB
289}
290
ca1d1d23 291\f
b819a390
RS
292static Lisp_Object
293looking_at_1 (string, posix)
ca1d1d23 294 Lisp_Object string;
b819a390 295 int posix;
ca1d1d23
JB
296{
297 Lisp_Object val;
298 unsigned char *p1, *p2;
e7deaab0 299 EMACS_INT s1, s2;
ca1d1d23 300 register int i;
487282dc 301 struct re_pattern_buffer *bufp;
ca1d1d23 302
7074fde6
FP
303 if (running_asynch_code)
304 save_search_regs ();
305
910c747a
RS
306 /* This is so set_image_of_range_1 in regex.c can find the EQV table. */
307 XCHAR_TABLE (current_buffer->case_canon_table)->extras[2]
308 = current_buffer->case_eqv_table;
309
b7826503 310 CHECK_STRING (string);
ef887810
RS
311 bufp = compile_pattern (string,
312 (NILP (Vinhibit_changing_match_data)
313 ? &search_regs : NULL),
487282dc 314 (!NILP (current_buffer->case_fold_search)
0190922f 315 ? current_buffer->case_canon_table : Qnil),
0c8533c6
RS
316 posix,
317 !NILP (current_buffer->enable_multibyte_characters));
ca1d1d23
JB
318
319 immediate_quit = 1;
320 QUIT; /* Do a pending quit right away, to avoid paradoxical behavior */
321
322 /* Get pointers and sizes of the two strings
323 that make up the visible portion of the buffer. */
324
325 p1 = BEGV_ADDR;
fa8ed3e0 326 s1 = GPT_BYTE - BEGV_BYTE;
ca1d1d23 327 p2 = GAP_END_ADDR;
fa8ed3e0 328 s2 = ZV_BYTE - GPT_BYTE;
ca1d1d23
JB
329 if (s1 < 0)
330 {
331 p2 = p1;
fa8ed3e0 332 s2 = ZV_BYTE - BEGV_BYTE;
ca1d1d23
JB
333 s1 = 0;
334 }
335 if (s2 < 0)
336 {
fa8ed3e0 337 s1 = ZV_BYTE - BEGV_BYTE;
ca1d1d23
JB
338 s2 = 0;
339 }
8bb43c28
RS
340
341 re_match_object = Qnil;
177c0ea7 342
487282dc 343 i = re_match_2 (bufp, (char *) p1, s1, (char *) p2, s2,
ef887810
RS
344 PT_BYTE - BEGV_BYTE,
345 (NILP (Vinhibit_changing_match_data)
346 ? &search_regs : NULL),
fa8ed3e0 347 ZV_BYTE - BEGV_BYTE);
de182d70 348 immediate_quit = 0;
177c0ea7 349
ca1d1d23
JB
350 if (i == -2)
351 matcher_overflow ();
352
353 val = (0 <= i ? Qt : Qnil);
ef887810 354 if (NILP (Vinhibit_changing_match_data) && i >= 0)
fa8ed3e0
RS
355 for (i = 0; i < search_regs.num_regs; i++)
356 if (search_regs.start[i] >= 0)
357 {
358 search_regs.start[i]
359 = BYTE_TO_CHAR (search_regs.start[i] + BEGV_BYTE);
360 search_regs.end[i]
361 = BYTE_TO_CHAR (search_regs.end[i] + BEGV_BYTE);
362 }
ef887810
RS
363
364 /* Set last_thing_searched only when match data is changed. */
365 if (NILP (Vinhibit_changing_match_data))
366 XSETBUFFER (last_thing_searched, current_buffer);
367
ca1d1d23
JB
368 return val;
369}
370
b819a390 371DEFUN ("looking-at", Flooking_at, Slooking_at, 1, 1, 0,
8c1a1077
PJ
372 doc: /* Return t if text after point matches regular expression REGEXP.
373This function modifies the match data that `match-beginning',
374`match-end' and `match-data' access; save and restore the match
375data if you want to preserve them. */)
376 (regexp)
94f94972 377 Lisp_Object regexp;
b819a390 378{
94f94972 379 return looking_at_1 (regexp, 0);
b819a390
RS
380}
381
382DEFUN ("posix-looking-at", Fposix_looking_at, Sposix_looking_at, 1, 1, 0,
8c1a1077
PJ
383 doc: /* Return t if text after point matches regular expression REGEXP.
384Find the longest match, in accord with Posix regular expression rules.
385This function modifies the match data that `match-beginning',
386`match-end' and `match-data' access; save and restore the match
387data if you want to preserve them. */)
388 (regexp)
94f94972 389 Lisp_Object regexp;
b819a390 390{
94f94972 391 return looking_at_1 (regexp, 1);
b819a390
RS
392}
393\f
394static Lisp_Object
395string_match_1 (regexp, string, start, posix)
ca1d1d23 396 Lisp_Object regexp, string, start;
b819a390 397 int posix;
ca1d1d23
JB
398{
399 int val;
487282dc 400 struct re_pattern_buffer *bufp;
e7deaab0 401 EMACS_INT pos, pos_byte;
0c8533c6 402 int i;
ca1d1d23 403
7074fde6
FP
404 if (running_asynch_code)
405 save_search_regs ();
406
b7826503
PJ
407 CHECK_STRING (regexp);
408 CHECK_STRING (string);
ca1d1d23
JB
409
410 if (NILP (start))
0c8533c6 411 pos = 0, pos_byte = 0;
ca1d1d23
JB
412 else
413 {
d5db4077 414 int len = SCHARS (string);
ca1d1d23 415
b7826503 416 CHECK_NUMBER (start);
0c8533c6
RS
417 pos = XINT (start);
418 if (pos < 0 && -pos <= len)
419 pos = len + pos;
420 else if (0 > pos || pos > len)
ca1d1d23 421 args_out_of_range (string, start);
0c8533c6 422 pos_byte = string_char_to_byte (string, pos);
ca1d1d23
JB
423 }
424
910c747a
RS
425 /* This is so set_image_of_range_1 in regex.c can find the EQV table. */
426 XCHAR_TABLE (current_buffer->case_canon_table)->extras[2]
427 = current_buffer->case_eqv_table;
428
ef887810
RS
429 bufp = compile_pattern (regexp,
430 (NILP (Vinhibit_changing_match_data)
431 ? &search_regs : NULL),
487282dc 432 (!NILP (current_buffer->case_fold_search)
0190922f 433 ? current_buffer->case_canon_table : Qnil),
0c8533c6
RS
434 posix,
435 STRING_MULTIBYTE (string));
ca1d1d23 436 immediate_quit = 1;
8bb43c28 437 re_match_object = string;
177c0ea7 438
d5db4077
KR
439 val = re_search (bufp, (char *) SDATA (string),
440 SBYTES (string), pos_byte,
441 SBYTES (string) - pos_byte,
ef887810
RS
442 (NILP (Vinhibit_changing_match_data)
443 ? &search_regs : NULL));
ca1d1d23 444 immediate_quit = 0;
ef887810
RS
445
446 /* Set last_thing_searched only when match data is changed. */
447 if (NILP (Vinhibit_changing_match_data))
448 last_thing_searched = Qt;
449
ca1d1d23
JB
450 if (val == -2)
451 matcher_overflow ();
452 if (val < 0) return Qnil;
0c8533c6 453
ef887810
RS
454 if (NILP (Vinhibit_changing_match_data))
455 for (i = 0; i < search_regs.num_regs; i++)
456 if (search_regs.start[i] >= 0)
457 {
458 search_regs.start[i]
459 = string_byte_to_char (string, search_regs.start[i]);
460 search_regs.end[i]
461 = string_byte_to_char (string, search_regs.end[i]);
462 }
0c8533c6
RS
463
464 return make_number (string_byte_to_char (string, val));
ca1d1d23 465}
e59a8453 466
b819a390 467DEFUN ("string-match", Fstring_match, Sstring_match, 2, 3, 0,
8c1a1077 468 doc: /* Return index of start of first match for REGEXP in STRING, or nil.
b85acc4b 469Matching ignores case if `case-fold-search' is non-nil.
8c1a1077
PJ
470If third arg START is non-nil, start search at that index in STRING.
471For index of first char beyond the match, do (match-end 0).
472`match-end' and `match-beginning' also give indices of substrings
2bd2f32d
RS
473matched by parenthesis constructs in the pattern.
474
475You can use the function `match-string' to extract the substrings
476matched by the parenthesis constructions in REGEXP. */)
8c1a1077 477 (regexp, string, start)
b819a390
RS
478 Lisp_Object regexp, string, start;
479{
480 return string_match_1 (regexp, string, start, 0);
481}
482
483DEFUN ("posix-string-match", Fposix_string_match, Sposix_string_match, 2, 3, 0,
8c1a1077
PJ
484 doc: /* Return index of start of first match for REGEXP in STRING, or nil.
485Find the longest match, in accord with Posix regular expression rules.
486Case is ignored if `case-fold-search' is non-nil in the current buffer.
487If third arg START is non-nil, start search at that index in STRING.
488For index of first char beyond the match, do (match-end 0).
489`match-end' and `match-beginning' also give indices of substrings
490matched by parenthesis constructs in the pattern. */)
491 (regexp, string, start)
b819a390
RS
492 Lisp_Object regexp, string, start;
493{
494 return string_match_1 (regexp, string, start, 1);
495}
496
e59a8453
RS
497/* Match REGEXP against STRING, searching all of STRING,
498 and return the index of the match, or negative on failure.
499 This does not clobber the match data. */
500
501int
502fast_string_match (regexp, string)
503 Lisp_Object regexp, string;
504{
505 int val;
487282dc 506 struct re_pattern_buffer *bufp;
e59a8453 507
facdc750
RS
508 bufp = compile_pattern (regexp, 0, Qnil,
509 0, STRING_MULTIBYTE (string));
e59a8453 510 immediate_quit = 1;
8bb43c28 511 re_match_object = string;
177c0ea7 512
d5db4077
KR
513 val = re_search (bufp, (char *) SDATA (string),
514 SBYTES (string), 0,
515 SBYTES (string), 0);
e59a8453
RS
516 immediate_quit = 0;
517 return val;
518}
5679531d
KH
519
520/* Match REGEXP against STRING, searching all of STRING ignoring case,
521 and return the index of the match, or negative on failure.
0c8533c6
RS
522 This does not clobber the match data.
523 We assume that STRING contains single-byte characters. */
5679531d
KH
524
525extern Lisp_Object Vascii_downcase_table;
526
527int
b4577c63 528fast_c_string_match_ignore_case (regexp, string)
5679531d 529 Lisp_Object regexp;
96b80561 530 const char *string;
5679531d
KH
531{
532 int val;
533 struct re_pattern_buffer *bufp;
534 int len = strlen (string);
535
0c8533c6 536 regexp = string_make_unibyte (regexp);
b4577c63 537 re_match_object = Qt;
5679531d 538 bufp = compile_pattern (regexp, 0,
0190922f 539 Vascii_canon_table, 0,
f8bd51c4 540 0);
5679531d
KH
541 immediate_quit = 1;
542 val = re_search (bufp, string, len, 0, len, 0);
543 immediate_quit = 0;
544 return val;
545}
be5f4dfb
KH
546
547/* Like fast_string_match but ignore case. */
548
549int
550fast_string_match_ignore_case (regexp, string)
551 Lisp_Object regexp, string;
552{
553 int val;
554 struct re_pattern_buffer *bufp;
555
0190922f 556 bufp = compile_pattern (regexp, 0, Vascii_canon_table,
be5f4dfb
KH
557 0, STRING_MULTIBYTE (string));
558 immediate_quit = 1;
559 re_match_object = string;
560
561 val = re_search (bufp, (char *) SDATA (string),
562 SBYTES (string), 0,
563 SBYTES (string), 0);
564 immediate_quit = 0;
565 return val;
566}
ca1d1d23 567\f
a80ce213 568/* Match REGEXP against the characters after POS to LIMIT, and return
8b13507a
KH
569 the number of matched characters. If STRING is non-nil, match
570 against the characters in it. In that case, POS and LIMIT are
571 indices into the string. This function doesn't modify the match
572 data. */
573
574EMACS_INT
575fast_looking_at (regexp, pos, pos_byte, limit, limit_byte, string)
576 Lisp_Object regexp;
577 EMACS_INT pos, pos_byte, limit, limit_byte;
578 Lisp_Object string;
579{
580 int multibyte;
581 struct re_pattern_buffer *buf;
582 unsigned char *p1, *p2;
e7deaab0 583 EMACS_INT s1, s2;
8b13507a
KH
584 EMACS_INT len;
585
586 if (STRINGP (string))
587 {
588 if (pos_byte < 0)
589 pos_byte = string_char_to_byte (string, pos);
590 if (limit_byte < 0)
591 limit_byte = string_char_to_byte (string, limit);
592 p1 = NULL;
593 s1 = 0;
594 p2 = SDATA (string);
595 s2 = SBYTES (string);
596 re_match_object = string;
597 multibyte = STRING_MULTIBYTE (string);
598 }
599 else
600 {
601 if (pos_byte < 0)
602 pos_byte = CHAR_TO_BYTE (pos);
603 if (limit_byte < 0)
604 limit_byte = CHAR_TO_BYTE (limit);
605 pos_byte -= BEGV_BYTE;
606 limit_byte -= BEGV_BYTE;
607 p1 = BEGV_ADDR;
608 s1 = GPT_BYTE - BEGV_BYTE;
609 p2 = GAP_END_ADDR;
610 s2 = ZV_BYTE - GPT_BYTE;
611 if (s1 < 0)
612 {
613 p2 = p1;
614 s2 = ZV_BYTE - BEGV_BYTE;
615 s1 = 0;
616 }
617 if (s2 < 0)
618 {
619 s1 = ZV_BYTE - BEGV_BYTE;
620 s2 = 0;
621 }
622 re_match_object = Qnil;
623 multibyte = ! NILP (current_buffer->enable_multibyte_characters);
624 }
625
626 buf = compile_pattern (regexp, 0, Qnil, 0, multibyte);
627 immediate_quit = 1;
628 len = re_match_2 (buf, (char *) p1, s1, (char *) p2, s2,
629 pos_byte, NULL, limit_byte);
630 immediate_quit = 0;
631
632 return len;
633}
634
635\f
9169c321
JB
636/* The newline cache: remembering which sections of text have no newlines. */
637
638/* If the user has requested newline caching, make sure it's on.
639 Otherwise, make sure it's off.
640 This is our cheezy way of associating an action with the change of
641 state of a buffer-local variable. */
642static void
643newline_cache_on_off (buf)
644 struct buffer *buf;
645{
646 if (NILP (buf->cache_long_line_scans))
647 {
648 /* It should be off. */
649 if (buf->newline_cache)
650 {
651 free_region_cache (buf->newline_cache);
652 buf->newline_cache = 0;
653 }
654 }
655 else
656 {
657 /* It should be on. */
658 if (buf->newline_cache == 0)
659 buf->newline_cache = new_region_cache ();
660 }
661}
662
663\f
664/* Search for COUNT instances of the character TARGET between START and END.
665
666 If COUNT is positive, search forwards; END must be >= START.
667 If COUNT is negative, search backwards for the -COUNTth instance;
668 END must be <= START.
669 If COUNT is zero, do anything you please; run rogue, for all I care.
670
671 If END is zero, use BEGV or ZV instead, as appropriate for the
672 direction indicated by COUNT.
ffd56f97
JB
673
674 If we find COUNT instances, set *SHORTAGE to zero, and return the
a9f2a45f 675 position past the COUNTth match. Note that for reverse motion
5bfe95c9 676 this is not the same as the usual convention for Emacs motion commands.
ffd56f97 677
9169c321
JB
678 If we don't find COUNT instances before reaching END, set *SHORTAGE
679 to the number of TARGETs left unfound, and return END.
ffd56f97 680
087a5f81
RS
681 If ALLOW_QUIT is non-zero, set immediate_quit. That's good to do
682 except when inside redisplay. */
683
dfcf069d 684int
9169c321
JB
685scan_buffer (target, start, end, count, shortage, allow_quit)
686 register int target;
e7deaab0 687 EMACS_INT start, end;
9169c321
JB
688 int count;
689 int *shortage;
087a5f81 690 int allow_quit;
ca1d1d23 691{
9169c321 692 struct region_cache *newline_cache;
177c0ea7 693 int direction;
ffd56f97 694
9169c321
JB
695 if (count > 0)
696 {
697 direction = 1;
698 if (! end) end = ZV;
699 }
700 else
701 {
702 direction = -1;
703 if (! end) end = BEGV;
704 }
ffd56f97 705
9169c321
JB
706 newline_cache_on_off (current_buffer);
707 newline_cache = current_buffer->newline_cache;
ca1d1d23
JB
708
709 if (shortage != 0)
710 *shortage = 0;
711
087a5f81 712 immediate_quit = allow_quit;
ca1d1d23 713
ffd56f97 714 if (count > 0)
9169c321 715 while (start != end)
ca1d1d23 716 {
9169c321
JB
717 /* Our innermost scanning loop is very simple; it doesn't know
718 about gaps, buffer ends, or the newline cache. ceiling is
719 the position of the last character before the next such
720 obstacle --- the last character the dumb search loop should
721 examine. */
e7deaab0
AS
722 EMACS_INT ceiling_byte = CHAR_TO_BYTE (end) - 1;
723 EMACS_INT start_byte = CHAR_TO_BYTE (start);
724 EMACS_INT tem;
9169c321
JB
725
726 /* If we're looking for a newline, consult the newline cache
727 to see where we can avoid some scanning. */
728 if (target == '\n' && newline_cache)
729 {
730 int next_change;
731 immediate_quit = 0;
732 while (region_cache_forward
fa8ed3e0
RS
733 (current_buffer, newline_cache, start_byte, &next_change))
734 start_byte = next_change;
cbe0db0d 735 immediate_quit = allow_quit;
9169c321 736
fa8ed3e0
RS
737 /* START should never be after END. */
738 if (start_byte > ceiling_byte)
739 start_byte = ceiling_byte;
9169c321
JB
740
741 /* Now the text after start is an unknown region, and
742 next_change is the position of the next known region. */
fa8ed3e0 743 ceiling_byte = min (next_change - 1, ceiling_byte);
9169c321
JB
744 }
745
746 /* The dumb loop can only scan text stored in contiguous
747 bytes. BUFFER_CEILING_OF returns the last character
748 position that is contiguous, so the ceiling is the
749 position after that. */
67ce527d
KH
750 tem = BUFFER_CEILING_OF (start_byte);
751 ceiling_byte = min (tem, ceiling_byte);
9169c321
JB
752
753 {
177c0ea7 754 /* The termination address of the dumb loop. */
fa8ed3e0
RS
755 register unsigned char *ceiling_addr
756 = BYTE_POS_ADDR (ceiling_byte) + 1;
757 register unsigned char *cursor
758 = BYTE_POS_ADDR (start_byte);
9169c321
JB
759 unsigned char *base = cursor;
760
761 while (cursor < ceiling_addr)
762 {
763 unsigned char *scan_start = cursor;
764
765 /* The dumb loop. */
766 while (*cursor != target && ++cursor < ceiling_addr)
767 ;
768
769 /* If we're looking for newlines, cache the fact that
770 the region from start to cursor is free of them. */
771 if (target == '\n' && newline_cache)
772 know_region_cache (current_buffer, newline_cache,
fa8ed3e0
RS
773 start_byte + scan_start - base,
774 start_byte + cursor - base);
9169c321
JB
775
776 /* Did we find the target character? */
777 if (cursor < ceiling_addr)
778 {
779 if (--count == 0)
780 {
781 immediate_quit = 0;
fa8ed3e0 782 return BYTE_TO_CHAR (start_byte + cursor - base + 1);
9169c321
JB
783 }
784 cursor++;
785 }
786 }
787
fa8ed3e0 788 start = BYTE_TO_CHAR (start_byte + cursor - base);
9169c321 789 }
ca1d1d23
JB
790 }
791 else
9169c321
JB
792 while (start > end)
793 {
794 /* The last character to check before the next obstacle. */
e7deaab0
AS
795 EMACS_INT ceiling_byte = CHAR_TO_BYTE (end);
796 EMACS_INT start_byte = CHAR_TO_BYTE (start);
797 EMACS_INT tem;
9169c321
JB
798
799 /* Consult the newline cache, if appropriate. */
800 if (target == '\n' && newline_cache)
801 {
802 int next_change;
803 immediate_quit = 0;
804 while (region_cache_backward
fa8ed3e0
RS
805 (current_buffer, newline_cache, start_byte, &next_change))
806 start_byte = next_change;
cbe0db0d 807 immediate_quit = allow_quit;
9169c321
JB
808
809 /* Start should never be at or before end. */
fa8ed3e0
RS
810 if (start_byte <= ceiling_byte)
811 start_byte = ceiling_byte + 1;
9169c321
JB
812
813 /* Now the text before start is an unknown region, and
814 next_change is the position of the next known region. */
fa8ed3e0 815 ceiling_byte = max (next_change, ceiling_byte);
9169c321
JB
816 }
817
818 /* Stop scanning before the gap. */
67ce527d
KH
819 tem = BUFFER_FLOOR_OF (start_byte - 1);
820 ceiling_byte = max (tem, ceiling_byte);
9169c321
JB
821
822 {
823 /* The termination address of the dumb loop. */
fa8ed3e0
RS
824 register unsigned char *ceiling_addr = BYTE_POS_ADDR (ceiling_byte);
825 register unsigned char *cursor = BYTE_POS_ADDR (start_byte - 1);
9169c321
JB
826 unsigned char *base = cursor;
827
828 while (cursor >= ceiling_addr)
829 {
830 unsigned char *scan_start = cursor;
831
832 while (*cursor != target && --cursor >= ceiling_addr)
833 ;
834
835 /* If we're looking for newlines, cache the fact that
836 the region from after the cursor to start is free of them. */
837 if (target == '\n' && newline_cache)
838 know_region_cache (current_buffer, newline_cache,
fa8ed3e0
RS
839 start_byte + cursor - base,
840 start_byte + scan_start - base);
9169c321
JB
841
842 /* Did we find the target character? */
843 if (cursor >= ceiling_addr)
844 {
845 if (++count >= 0)
846 {
847 immediate_quit = 0;
fa8ed3e0 848 return BYTE_TO_CHAR (start_byte + cursor - base);
9169c321
JB
849 }
850 cursor--;
851 }
852 }
853
fa8ed3e0 854 start = BYTE_TO_CHAR (start_byte + cursor - base);
9169c321
JB
855 }
856 }
857
ca1d1d23
JB
858 immediate_quit = 0;
859 if (shortage != 0)
ffd56f97 860 *shortage = count * direction;
9169c321 861 return start;
ca1d1d23 862}
fa8ed3e0
RS
863\f
864/* Search for COUNT instances of a line boundary, which means either a
865 newline or (if selective display enabled) a carriage return.
866 Start at START. If COUNT is negative, search backwards.
867
868 We report the resulting position by calling TEMP_SET_PT_BOTH.
869
870 If we find COUNT instances. we position after (always after,
871 even if scanning backwards) the COUNTth match, and return 0.
872
873 If we don't find COUNT instances before reaching the end of the
874 buffer (or the beginning, if scanning backwards), we return
875 the number of line boundaries left unfound, and position at
876 the limit we bumped up against.
877
878 If ALLOW_QUIT is non-zero, set immediate_quit. That's good to do
d5d57b92 879 except in special cases. */
ca1d1d23 880
63fa018d 881int
fa8ed3e0 882scan_newline (start, start_byte, limit, limit_byte, count, allow_quit)
e7deaab0
AS
883 EMACS_INT start, start_byte;
884 EMACS_INT limit, limit_byte;
fa8ed3e0
RS
885 register int count;
886 int allow_quit;
63fa018d 887{
fa8ed3e0
RS
888 int direction = ((count > 0) ? 1 : -1);
889
890 register unsigned char *cursor;
891 unsigned char *base;
892
e7deaab0 893 EMACS_INT ceiling;
fa8ed3e0
RS
894 register unsigned char *ceiling_addr;
895
d5d57b92
RS
896 int old_immediate_quit = immediate_quit;
897
fa8ed3e0
RS
898 /* The code that follows is like scan_buffer
899 but checks for either newline or carriage return. */
900
d5d57b92
RS
901 if (allow_quit)
902 immediate_quit++;
fa8ed3e0
RS
903
904 start_byte = CHAR_TO_BYTE (start);
905
906 if (count > 0)
907 {
908 while (start_byte < limit_byte)
909 {
910 ceiling = BUFFER_CEILING_OF (start_byte);
911 ceiling = min (limit_byte - 1, ceiling);
912 ceiling_addr = BYTE_POS_ADDR (ceiling) + 1;
913 base = (cursor = BYTE_POS_ADDR (start_byte));
914 while (1)
915 {
916 while (*cursor != '\n' && ++cursor != ceiling_addr)
917 ;
918
919 if (cursor != ceiling_addr)
920 {
921 if (--count == 0)
922 {
d5d57b92 923 immediate_quit = old_immediate_quit;
fa8ed3e0
RS
924 start_byte = start_byte + cursor - base + 1;
925 start = BYTE_TO_CHAR (start_byte);
926 TEMP_SET_PT_BOTH (start, start_byte);
927 return 0;
928 }
929 else
930 if (++cursor == ceiling_addr)
931 break;
932 }
933 else
934 break;
935 }
936 start_byte += cursor - base;
937 }
938 }
939 else
940 {
fa8ed3e0
RS
941 while (start_byte > limit_byte)
942 {
943 ceiling = BUFFER_FLOOR_OF (start_byte - 1);
944 ceiling = max (limit_byte, ceiling);
945 ceiling_addr = BYTE_POS_ADDR (ceiling) - 1;
946 base = (cursor = BYTE_POS_ADDR (start_byte - 1) + 1);
947 while (1)
948 {
949 while (--cursor != ceiling_addr && *cursor != '\n')
950 ;
951
952 if (cursor != ceiling_addr)
953 {
954 if (++count == 0)
955 {
d5d57b92 956 immediate_quit = old_immediate_quit;
fa8ed3e0
RS
957 /* Return the position AFTER the match we found. */
958 start_byte = start_byte + cursor - base + 1;
959 start = BYTE_TO_CHAR (start_byte);
960 TEMP_SET_PT_BOTH (start, start_byte);
961 return 0;
962 }
963 }
964 else
965 break;
966 }
967 /* Here we add 1 to compensate for the last decrement
968 of CURSOR, which took it past the valid range. */
969 start_byte += cursor - base + 1;
970 }
971 }
972
973 TEMP_SET_PT_BOTH (limit, limit_byte);
d5d57b92 974 immediate_quit = old_immediate_quit;
fa8ed3e0
RS
975
976 return count * direction;
63fa018d
RS
977}
978
ca1d1d23 979int
fa8ed3e0 980find_next_newline_no_quit (from, cnt)
e7deaab0
AS
981 EMACS_INT from;
982 int cnt;
ca1d1d23 983{
fa8ed3e0 984 return scan_buffer ('\n', from, 0, cnt, (int *) 0, 0);
9169c321
JB
985}
986
9169c321
JB
987/* Like find_next_newline, but returns position before the newline,
988 not after, and only search up to TO. This isn't just
989 find_next_newline (...)-1, because you might hit TO. */
fa8ed3e0 990
9169c321
JB
991int
992find_before_next_newline (from, to, cnt)
e7deaab0
AS
993 EMACS_INT from, to;
994 int cnt;
9169c321
JB
995{
996 int shortage;
997 int pos = scan_buffer ('\n', from, to, cnt, &shortage, 1);
998
999 if (shortage == 0)
1000 pos--;
177c0ea7 1001
9169c321 1002 return pos;
ca1d1d23
JB
1003}
1004\f
ca1d1d23
JB
1005/* Subroutines of Lisp buffer search functions. */
1006
1007static Lisp_Object
b819a390 1008search_command (string, bound, noerror, count, direction, RE, posix)
ca1d1d23
JB
1009 Lisp_Object string, bound, noerror, count;
1010 int direction;
1011 int RE;
b819a390 1012 int posix;
ca1d1d23
JB
1013{
1014 register int np;
9f43ad85 1015 int lim, lim_byte;
ca1d1d23
JB
1016 int n = direction;
1017
1018 if (!NILP (count))
1019 {
b7826503 1020 CHECK_NUMBER (count);
ca1d1d23
JB
1021 n *= XINT (count);
1022 }
1023
b7826503 1024 CHECK_STRING (string);
ca1d1d23 1025 if (NILP (bound))
9f43ad85
RS
1026 {
1027 if (n > 0)
1028 lim = ZV, lim_byte = ZV_BYTE;
1029 else
1030 lim = BEGV, lim_byte = BEGV_BYTE;
1031 }
ca1d1d23
JB
1032 else
1033 {
b7826503 1034 CHECK_NUMBER_COERCE_MARKER (bound);
ca1d1d23 1035 lim = XINT (bound);
6ec8bbd2 1036 if (n > 0 ? lim < PT : lim > PT)
ca1d1d23
JB
1037 error ("Invalid search bound (wrong side of point)");
1038 if (lim > ZV)
9f43ad85 1039 lim = ZV, lim_byte = ZV_BYTE;
588d2fd5 1040 else if (lim < BEGV)
9f43ad85 1041 lim = BEGV, lim_byte = BEGV_BYTE;
588d2fd5
KH
1042 else
1043 lim_byte = CHAR_TO_BYTE (lim);
ca1d1d23
JB
1044 }
1045
910c747a
RS
1046 /* This is so set_image_of_range_1 in regex.c can find the EQV table. */
1047 XCHAR_TABLE (current_buffer->case_canon_table)->extras[2]
1048 = current_buffer->case_eqv_table;
1049
9f43ad85 1050 np = search_buffer (string, PT, PT_BYTE, lim, lim_byte, n, RE,
ca1d1d23 1051 (!NILP (current_buffer->case_fold_search)
facdc750 1052 ? current_buffer->case_canon_table
3135e9fd 1053 : Qnil),
ca1d1d23 1054 (!NILP (current_buffer->case_fold_search)
facdc750 1055 ? current_buffer->case_eqv_table
3135e9fd 1056 : Qnil),
b819a390 1057 posix);
ca1d1d23
JB
1058 if (np <= 0)
1059 {
1060 if (NILP (noerror))
06f77a8a
KS
1061 xsignal1 (Qsearch_failed, string);
1062
ca1d1d23
JB
1063 if (!EQ (noerror, Qt))
1064 {
1065 if (lim < BEGV || lim > ZV)
1066 abort ();
9f43ad85 1067 SET_PT_BOTH (lim, lim_byte);
a5f217b8
RS
1068 return Qnil;
1069#if 0 /* This would be clean, but maybe programs depend on
1070 a value of nil here. */
481399bf 1071 np = lim;
a5f217b8 1072#endif
ca1d1d23 1073 }
481399bf
RS
1074 else
1075 return Qnil;
ca1d1d23
JB
1076 }
1077
1078 if (np < BEGV || np > ZV)
1079 abort ();
1080
1081 SET_PT (np);
1082
1083 return make_number (np);
1084}
1085\f
fa8ed3e0
RS
1086/* Return 1 if REGEXP it matches just one constant string. */
1087
b6d6a51c
KH
1088static int
1089trivial_regexp_p (regexp)
1090 Lisp_Object regexp;
1091{
d5db4077
KR
1092 int len = SBYTES (regexp);
1093 unsigned char *s = SDATA (regexp);
b6d6a51c
KH
1094 while (--len >= 0)
1095 {
1096 switch (*s++)
1097 {
1098 case '.': case '*': case '+': case '?': case '[': case '^': case '$':
1099 return 0;
1100 case '\\':
1101 if (--len < 0)
1102 return 0;
1103 switch (*s++)
1104 {
1105 case '|': case '(': case ')': case '`': case '\'': case 'b':
1106 case 'B': case '<': case '>': case 'w': case 'W': case 's':
29f89fe7 1107 case 'S': case '=': case '{': case '}': case '_':
5679531d 1108 case 'c': case 'C': /* for categoryspec and notcategoryspec */
866f60fd 1109 case '1': case '2': case '3': case '4': case '5':
b6d6a51c
KH
1110 case '6': case '7': case '8': case '9':
1111 return 0;
1112 }
1113 }
1114 }
1115 return 1;
1116}
1117
ca325161 1118/* Search for the n'th occurrence of STRING in the current buffer,
ca1d1d23 1119 starting at position POS and stopping at position LIM,
b819a390 1120 treating STRING as a literal string if RE is false or as
ca1d1d23
JB
1121 a regular expression if RE is true.
1122
1123 If N is positive, searching is forward and LIM must be greater than POS.
1124 If N is negative, searching is backward and LIM must be less than POS.
1125
facdc750 1126 Returns -x if x occurrences remain to be found (x > 0),
ca1d1d23 1127 or else the position at the beginning of the Nth occurrence
b819a390
RS
1128 (if searching backward) or the end (if searching forward).
1129
1130 POSIX is nonzero if we want full backtracking (POSIX style)
1131 for this pattern. 0 means backtrack only enough to get a valid match. */
ca1d1d23 1132
aff2ce94
RS
1133#define TRANSLATE(out, trt, d) \
1134do \
1135 { \
1136 if (! NILP (trt)) \
1137 { \
1138 Lisp_Object temp; \
1139 temp = Faref (trt, make_number (d)); \
1140 if (INTEGERP (temp)) \
1141 out = XINT (temp); \
1142 else \
1143 out = d; \
1144 } \
1145 else \
1146 out = d; \
1147 } \
1148while (0)
facdc750 1149
ef887810
RS
1150/* Only used in search_buffer, to record the end position of the match
1151 when searching regexps and SEARCH_REGS should not be changed
1152 (i.e. Vinhibit_changing_match_data is non-nil). */
1153static struct re_registers search_regs_1;
1154
e7deaab0 1155static EMACS_INT
9f43ad85
RS
1156search_buffer (string, pos, pos_byte, lim, lim_byte, n,
1157 RE, trt, inverse_trt, posix)
ca1d1d23 1158 Lisp_Object string;
e7deaab0
AS
1159 EMACS_INT pos;
1160 EMACS_INT pos_byte;
1161 EMACS_INT lim;
1162 EMACS_INT lim_byte;
ca1d1d23
JB
1163 int n;
1164 int RE;
facdc750
RS
1165 Lisp_Object trt;
1166 Lisp_Object inverse_trt;
b819a390 1167 int posix;
ca1d1d23 1168{
d5db4077
KR
1169 int len = SCHARS (string);
1170 int len_byte = SBYTES (string);
facdc750 1171 register int i;
ca1d1d23 1172
7074fde6
FP
1173 if (running_asynch_code)
1174 save_search_regs ();
1175
a7e4cdde 1176 /* Searching 0 times means don't move. */
ca1d1d23 1177 /* Null string is found at starting position. */
a7e4cdde 1178 if (len == 0 || n == 0)
ca325161 1179 {
0353b28f 1180 set_search_regs (pos_byte, 0);
ca325161
RS
1181 return pos;
1182 }
3f57a499 1183
41a33295 1184 if (RE && !(trivial_regexp_p (string) && NILP (Vsearch_spaces_regexp)))
ca1d1d23 1185 {
facdc750
RS
1186 unsigned char *p1, *p2;
1187 int s1, s2;
487282dc
KH
1188 struct re_pattern_buffer *bufp;
1189
ef887810
RS
1190 bufp = compile_pattern (string,
1191 (NILP (Vinhibit_changing_match_data)
1192 ? &search_regs : &search_regs_1),
1193 trt, posix,
0c8533c6 1194 !NILP (current_buffer->enable_multibyte_characters));
ca1d1d23 1195
ca1d1d23
JB
1196 immediate_quit = 1; /* Quit immediately if user types ^G,
1197 because letting this function finish
1198 can take too long. */
1199 QUIT; /* Do a pending quit right away,
1200 to avoid paradoxical behavior */
1201 /* Get pointers and sizes of the two strings
1202 that make up the visible portion of the buffer. */
1203
1204 p1 = BEGV_ADDR;
fa8ed3e0 1205 s1 = GPT_BYTE - BEGV_BYTE;
ca1d1d23 1206 p2 = GAP_END_ADDR;
fa8ed3e0 1207 s2 = ZV_BYTE - GPT_BYTE;
ca1d1d23
JB
1208 if (s1 < 0)
1209 {
1210 p2 = p1;
fa8ed3e0 1211 s2 = ZV_BYTE - BEGV_BYTE;
ca1d1d23
JB
1212 s1 = 0;
1213 }
1214 if (s2 < 0)
1215 {
fa8ed3e0 1216 s1 = ZV_BYTE - BEGV_BYTE;
ca1d1d23
JB
1217 s2 = 0;
1218 }
8bb43c28 1219 re_match_object = Qnil;
177c0ea7 1220
ca1d1d23
JB
1221 while (n < 0)
1222 {
42db823b 1223 int val;
487282dc 1224 val = re_search_2 (bufp, (char *) p1, s1, (char *) p2, s2,
4996330b 1225 pos_byte - BEGV_BYTE, lim_byte - pos_byte,
ef887810
RS
1226 (NILP (Vinhibit_changing_match_data)
1227 ? &search_regs : &search_regs_1),
42db823b 1228 /* Don't allow match past current point */
4996330b 1229 pos_byte - BEGV_BYTE);
ca1d1d23 1230 if (val == -2)
b6d6a51c
KH
1231 {
1232 matcher_overflow ();
1233 }
ca1d1d23
JB
1234 if (val >= 0)
1235 {
ef887810
RS
1236 if (NILP (Vinhibit_changing_match_data))
1237 {
1238 pos_byte = search_regs.start[0] + BEGV_BYTE;
1239 for (i = 0; i < search_regs.num_regs; i++)
1240 if (search_regs.start[i] >= 0)
1241 {
1242 search_regs.start[i]
1243 = BYTE_TO_CHAR (search_regs.start[i] + BEGV_BYTE);
1244 search_regs.end[i]
1245 = BYTE_TO_CHAR (search_regs.end[i] + BEGV_BYTE);
1246 }
1247 XSETBUFFER (last_thing_searched, current_buffer);
1248 /* Set pos to the new position. */
1249 pos = search_regs.start[0];
1250 }
1251 else
1252 {
1253 pos_byte = search_regs_1.start[0] + BEGV_BYTE;
1254 /* Set pos to the new position. */
1255 pos = BYTE_TO_CHAR (search_regs_1.start[0] + BEGV_BYTE);
1256 }
ca1d1d23
JB
1257 }
1258 else
1259 {
1260 immediate_quit = 0;
1261 return (n);
1262 }
1263 n++;
1264 }
1265 while (n > 0)
1266 {
42db823b 1267 int val;
487282dc 1268 val = re_search_2 (bufp, (char *) p1, s1, (char *) p2, s2,
4996330b 1269 pos_byte - BEGV_BYTE, lim_byte - pos_byte,
ef887810
RS
1270 (NILP (Vinhibit_changing_match_data)
1271 ? &search_regs : &search_regs_1),
4996330b 1272 lim_byte - BEGV_BYTE);
ca1d1d23 1273 if (val == -2)
b6d6a51c
KH
1274 {
1275 matcher_overflow ();
1276 }
ca1d1d23
JB
1277 if (val >= 0)
1278 {
ef887810
RS
1279 if (NILP (Vinhibit_changing_match_data))
1280 {
1281 pos_byte = search_regs.end[0] + BEGV_BYTE;
1282 for (i = 0; i < search_regs.num_regs; i++)
1283 if (search_regs.start[i] >= 0)
1284 {
1285 search_regs.start[i]
1286 = BYTE_TO_CHAR (search_regs.start[i] + BEGV_BYTE);
1287 search_regs.end[i]
1288 = BYTE_TO_CHAR (search_regs.end[i] + BEGV_BYTE);
1289 }
1290 XSETBUFFER (last_thing_searched, current_buffer);
1291 pos = search_regs.end[0];
1292 }
1293 else
1294 {
1295 pos_byte = search_regs_1.end[0] + BEGV_BYTE;
1296 pos = BYTE_TO_CHAR (search_regs_1.end[0] + BEGV_BYTE);
1297 }
ca1d1d23
JB
1298 }
1299 else
1300 {
1301 immediate_quit = 0;
1302 return (0 - n);
1303 }
1304 n--;
1305 }
1306 immediate_quit = 0;
1307 return (pos);
1308 }
1309 else /* non-RE case */
1310 {
facdc750
RS
1311 unsigned char *raw_pattern, *pat;
1312 int raw_pattern_size;
1313 int raw_pattern_size_byte;
1314 unsigned char *patbuf;
1315 int multibyte = !NILP (current_buffer->enable_multibyte_characters);
a967ed62 1316 unsigned char *base_pat;
49e44e81
KH
1317 /* Set to positive if we find a non-ASCII char that need
1318 translation. Otherwise set to zero later. */
1319 int char_base = -1;
040272ce 1320 int boyer_moore_ok = 1;
facdc750
RS
1321
1322 /* MULTIBYTE says whether the text to be searched is multibyte.
1323 We must convert PATTERN to match that, or we will not really
1324 find things right. */
1325
1326 if (multibyte == STRING_MULTIBYTE (string))
1327 {
d5db4077
KR
1328 raw_pattern = (unsigned char *) SDATA (string);
1329 raw_pattern_size = SCHARS (string);
1330 raw_pattern_size_byte = SBYTES (string);
facdc750
RS
1331 }
1332 else if (multibyte)
1333 {
d5db4077 1334 raw_pattern_size = SCHARS (string);
facdc750 1335 raw_pattern_size_byte
d5db4077 1336 = count_size_as_multibyte (SDATA (string),
facdc750 1337 raw_pattern_size);
7276d3d8 1338 raw_pattern = (unsigned char *) alloca (raw_pattern_size_byte + 1);
d5db4077
KR
1339 copy_text (SDATA (string), raw_pattern,
1340 SCHARS (string), 0, 1);
facdc750
RS
1341 }
1342 else
1343 {
1344 /* Converting multibyte to single-byte.
1345
1346 ??? Perhaps this conversion should be done in a special way
1347 by subtracting nonascii-insert-offset from each non-ASCII char,
1348 so that only the multibyte chars which really correspond to
1349 the chosen single-byte character set can possibly match. */
d5db4077
KR
1350 raw_pattern_size = SCHARS (string);
1351 raw_pattern_size_byte = SCHARS (string);
7276d3d8 1352 raw_pattern = (unsigned char *) alloca (raw_pattern_size + 1);
d5db4077
KR
1353 copy_text (SDATA (string), raw_pattern,
1354 SBYTES (string), 1, 0);
facdc750
RS
1355 }
1356
1357 /* Copy and optionally translate the pattern. */
1358 len = raw_pattern_size;
1359 len_byte = raw_pattern_size_byte;
f5f7578b 1360 patbuf = (unsigned char *) alloca (len * MAX_MULTIBYTE_LENGTH);
facdc750
RS
1361 pat = patbuf;
1362 base_pat = raw_pattern;
1363 if (multibyte)
1364 {
a17ae96a
KH
1365 /* Fill patbuf by translated characters in STRING while
1366 checking if we can use boyer-moore search. If TRT is
1367 non-nil, we can use boyer-moore search only if TRT can be
1368 represented by the byte array of 256 elements. For that,
1369 all non-ASCII case-equivalents of all case-senstive
1370 characters in STRING must belong to the same charset and
1371 row. */
1372
facdc750
RS
1373 while (--len >= 0)
1374 {
a17ae96a 1375 unsigned char str_base[MAX_MULTIBYTE_LENGTH], *str;
aff2ce94 1376 int c, translated, inverse;
a17ae96a 1377 int in_charlen, charlen;
facdc750
RS
1378
1379 /* If we got here and the RE flag is set, it's because we're
1380 dealing with a regexp known to be trivial, so the backslash
1381 just quotes the next character. */
1382 if (RE && *base_pat == '\\')
1383 {
1384 len--;
62221f22 1385 raw_pattern_size--;
facdc750
RS
1386 len_byte--;
1387 base_pat++;
1388 }
1389
1390 c = STRING_CHAR_AND_LENGTH (base_pat, len_byte, in_charlen);
040272ce 1391
a17ae96a 1392 if (NILP (trt))
facdc750 1393 {
a17ae96a
KH
1394 str = base_pat;
1395 charlen = in_charlen;
1396 }
1397 else
1398 {
1399 /* Translate the character. */
1400 TRANSLATE (translated, trt, c);
1401 charlen = CHAR_STRING (translated, str_base);
1402 str = str_base;
1403
1404 /* Check if C has any other case-equivalents. */
1405 TRANSLATE (inverse, inverse_trt, c);
1406 /* If so, check if we can use boyer-moore. */
1407 if (c != inverse && boyer_moore_ok)
1408 {
1409 /* Check if all equivalents belong to the same
1410 group of characters. Note that the check of C
49e44e81
KH
1411 itself is done by the last iteration. */
1412 int this_char_base = -1;
1413
1414 while (boyer_moore_ok)
a17ae96a 1415 {
49e44e81 1416 if (ASCII_BYTE_P (inverse))
a17ae96a 1417 {
49e44e81
KH
1418 if (this_char_base > 0)
1419 boyer_moore_ok = 0;
1420 else
dafaabd1 1421 this_char_base = 0;
a17ae96a 1422 }
49e44e81
KH
1423 else if (CHAR_BYTE8_P (inverse))
1424 /* Boyer-moore search can't handle a
1425 translation of an eight-bit
1426 character. */
1427 boyer_moore_ok = 0;
1428 else if (this_char_base < 0)
1429 {
1430 this_char_base = inverse & ~0x3F;
1431 if (char_base < 0)
1432 char_base = this_char_base;
dafaabd1 1433 else if (this_char_base != char_base)
49e44e81
KH
1434 boyer_moore_ok = 0;
1435 }
1436 else if ((inverse & ~0x3F) != this_char_base)
1437 boyer_moore_ok = 0;
a17ae96a
KH
1438 if (c == inverse)
1439 break;
1440 TRANSLATE (inverse, inverse_trt, inverse);
1441 }
1442 }
aff2ce94 1443 }
facdc750
RS
1444
1445 /* Store this character into the translated pattern. */
a17ae96a
KH
1446 bcopy (str, pat, charlen);
1447 pat += charlen;
facdc750
RS
1448 base_pat += in_charlen;
1449 len_byte -= in_charlen;
1450 }
dafaabd1
AS
1451
1452 /* If char_base is still negative we didn't find any translated
1453 non-ASCII characters. */
1454 if (char_base < 0)
1455 char_base = 0;
facdc750
RS
1456 }
1457 else
1458 {
040272ce 1459 /* Unibyte buffer. */
a17ae96a 1460 char_base = 0;
facdc750
RS
1461 while (--len >= 0)
1462 {
040272ce 1463 int c, translated;
facdc750
RS
1464
1465 /* If we got here and the RE flag is set, it's because we're
1466 dealing with a regexp known to be trivial, so the backslash
1467 just quotes the next character. */
1468 if (RE && *base_pat == '\\')
1469 {
1470 len--;
0190922f 1471 raw_pattern_size--;
facdc750
RS
1472 base_pat++;
1473 }
1474 c = *base_pat++;
aff2ce94 1475 TRANSLATE (translated, trt, c);
facdc750
RS
1476 *pat++ = translated;
1477 }
1478 }
1479
1480 len_byte = pat - patbuf;
1481 len = raw_pattern_size;
1482 pat = base_pat = patbuf;
1483
040272ce 1484 if (boyer_moore_ok)
facdc750 1485 return boyer_moore (n, pat, len, len_byte, trt, inverse_trt,
aff2ce94 1486 pos, pos_byte, lim, lim_byte,
a17ae96a 1487 char_base);
facdc750
RS
1488 else
1489 return simple_search (n, pat, len, len_byte, trt,
1490 pos, pos_byte, lim, lim_byte);
1491 }
1492}
1493\f
1494/* Do a simple string search N times for the string PAT,
1495 whose length is LEN/LEN_BYTE,
1496 from buffer position POS/POS_BYTE until LIM/LIM_BYTE.
1497 TRT is the translation table.
f8bd51c4 1498
facdc750
RS
1499 Return the character position where the match is found.
1500 Otherwise, if M matches remained to be found, return -M.
f8bd51c4 1501
facdc750
RS
1502 This kind of search works regardless of what is in PAT and
1503 regardless of what is in TRT. It is used in cases where
1504 boyer_moore cannot work. */
1505
e7deaab0 1506static EMACS_INT
facdc750
RS
1507simple_search (n, pat, len, len_byte, trt, pos, pos_byte, lim, lim_byte)
1508 int n;
1509 unsigned char *pat;
1510 int len, len_byte;
1511 Lisp_Object trt;
e7deaab0
AS
1512 EMACS_INT pos, pos_byte;
1513 EMACS_INT lim, lim_byte;
facdc750
RS
1514{
1515 int multibyte = ! NILP (current_buffer->enable_multibyte_characters);
ab228c24 1516 int forward = n > 0;
49e44e81
KH
1517 /* Number of buffer bytes matched. Note that this may be different
1518 from len_byte in a multibyte buffer. */
1519 int match_byte;
facdc750
RS
1520
1521 if (lim > pos && multibyte)
1522 while (n > 0)
1523 {
1524 while (1)
f8bd51c4 1525 {
facdc750 1526 /* Try matching at position POS. */
e7deaab0
AS
1527 EMACS_INT this_pos = pos;
1528 EMACS_INT this_pos_byte = pos_byte;
facdc750
RS
1529 int this_len = len;
1530 int this_len_byte = len_byte;
1531 unsigned char *p = pat;
a9469498 1532 if (pos + len > lim || pos_byte + len_byte > lim_byte)
facdc750
RS
1533 goto stop;
1534
1535 while (this_len > 0)
1536 {
1537 int charlen, buf_charlen;
ab228c24 1538 int pat_ch, buf_ch;
facdc750 1539
ab228c24 1540 pat_ch = STRING_CHAR_AND_LENGTH (p, this_len_byte, charlen);
facdc750
RS
1541 buf_ch = STRING_CHAR_AND_LENGTH (BYTE_POS_ADDR (this_pos_byte),
1542 ZV_BYTE - this_pos_byte,
1543 buf_charlen);
aff2ce94 1544 TRANSLATE (buf_ch, trt, buf_ch);
facdc750
RS
1545
1546 if (buf_ch != pat_ch)
1547 break;
ab228c24
RS
1548
1549 this_len_byte -= charlen;
1550 this_len--;
1551 p += charlen;
1552
1553 this_pos_byte += buf_charlen;
1554 this_pos++;
facdc750
RS
1555 }
1556
1557 if (this_len == 0)
1558 {
49e44e81 1559 match_byte = this_pos_byte - pos_byte;
facdc750 1560 pos += len;
49e44e81 1561 pos_byte += match_byte;
facdc750
RS
1562 break;
1563 }
1564
1565 INC_BOTH (pos, pos_byte);
f8bd51c4 1566 }
facdc750
RS
1567
1568 n--;
1569 }
1570 else if (lim > pos)
1571 while (n > 0)
1572 {
1573 while (1)
f8bd51c4 1574 {
facdc750 1575 /* Try matching at position POS. */
e7deaab0 1576 EMACS_INT this_pos = pos;
facdc750
RS
1577 int this_len = len;
1578 unsigned char *p = pat;
1579
1580 if (pos + len > lim)
1581 goto stop;
1582
1583 while (this_len > 0)
1584 {
1585 int pat_ch = *p++;
1586 int buf_ch = FETCH_BYTE (this_pos);
aff2ce94 1587 TRANSLATE (buf_ch, trt, buf_ch);
facdc750
RS
1588
1589 if (buf_ch != pat_ch)
1590 break;
ab228c24
RS
1591
1592 this_len--;
1593 this_pos++;
facdc750
RS
1594 }
1595
1596 if (this_len == 0)
1597 {
49e44e81 1598 match_byte = len;
facdc750
RS
1599 pos += len;
1600 break;
1601 }
1602
1603 pos++;
f8bd51c4 1604 }
facdc750
RS
1605
1606 n--;
1607 }
1608 /* Backwards search. */
1609 else if (lim < pos && multibyte)
1610 while (n < 0)
1611 {
1612 while (1)
f8bd51c4 1613 {
facdc750 1614 /* Try matching at position POS. */
e7deaab0
AS
1615 EMACS_INT this_pos = pos - len;
1616 EMACS_INT this_pos_byte;
facdc750
RS
1617 int this_len = len;
1618 int this_len_byte = len_byte;
1619 unsigned char *p = pat;
1620
3dcf0066 1621 if (this_pos < lim || (pos_byte - len_byte) < lim_byte)
facdc750 1622 goto stop;
3dcf0066 1623 this_pos_byte = CHAR_TO_BYTE (this_pos);
49e44e81 1624 match_byte = pos_byte - this_pos_byte;
facdc750
RS
1625
1626 while (this_len > 0)
1627 {
1628 int charlen, buf_charlen;
ab228c24 1629 int pat_ch, buf_ch;
facdc750 1630
ab228c24 1631 pat_ch = STRING_CHAR_AND_LENGTH (p, this_len_byte, charlen);
facdc750
RS
1632 buf_ch = STRING_CHAR_AND_LENGTH (BYTE_POS_ADDR (this_pos_byte),
1633 ZV_BYTE - this_pos_byte,
1634 buf_charlen);
aff2ce94 1635 TRANSLATE (buf_ch, trt, buf_ch);
facdc750
RS
1636
1637 if (buf_ch != pat_ch)
1638 break;
ab228c24
RS
1639
1640 this_len_byte -= charlen;
1641 this_len--;
1642 p += charlen;
1643 this_pos_byte += buf_charlen;
1644 this_pos++;
facdc750
RS
1645 }
1646
1647 if (this_len == 0)
1648 {
1649 pos -= len;
49e44e81 1650 pos_byte -= match_byte;
facdc750
RS
1651 break;
1652 }
1653
1654 DEC_BOTH (pos, pos_byte);
f8bd51c4
KH
1655 }
1656
facdc750
RS
1657 n++;
1658 }
1659 else if (lim < pos)
1660 while (n < 0)
1661 {
1662 while (1)
b6d6a51c 1663 {
facdc750 1664 /* Try matching at position POS. */
e7deaab0 1665 EMACS_INT this_pos = pos - len;
facdc750
RS
1666 int this_len = len;
1667 unsigned char *p = pat;
1668
a9469498 1669 if (this_pos < lim)
facdc750
RS
1670 goto stop;
1671
1672 while (this_len > 0)
1673 {
1674 int pat_ch = *p++;
1675 int buf_ch = FETCH_BYTE (this_pos);
aff2ce94 1676 TRANSLATE (buf_ch, trt, buf_ch);
facdc750
RS
1677
1678 if (buf_ch != pat_ch)
1679 break;
ab228c24
RS
1680 this_len--;
1681 this_pos++;
facdc750
RS
1682 }
1683
1684 if (this_len == 0)
b6d6a51c 1685 {
49e44e81 1686 match_byte = len;
facdc750
RS
1687 pos -= len;
1688 break;
b6d6a51c 1689 }
facdc750
RS
1690
1691 pos--;
b6d6a51c 1692 }
facdc750
RS
1693
1694 n++;
b6d6a51c 1695 }
facdc750
RS
1696
1697 stop:
1698 if (n == 0)
aff2ce94 1699 {
ab228c24 1700 if (forward)
49e44e81 1701 set_search_regs ((multibyte ? pos_byte : pos) - match_byte, match_byte);
ab228c24 1702 else
49e44e81 1703 set_search_regs (multibyte ? pos_byte : pos, match_byte);
aff2ce94
RS
1704
1705 return pos;
1706 }
facdc750
RS
1707 else if (n > 0)
1708 return -n;
1709 else
1710 return n;
1711}
1712\f
0190922f 1713/* Do Boyer-Moore search N times for the string BASE_PAT,
facdc750
RS
1714 whose length is LEN/LEN_BYTE,
1715 from buffer position POS/POS_BYTE until LIM/LIM_BYTE.
1716 DIRECTION says which direction we search in.
1717 TRT and INVERSE_TRT are translation tables.
0190922f 1718 Characters in PAT are already translated by TRT.
facdc750 1719
0190922f
KH
1720 This kind of search works if all the characters in BASE_PAT that
1721 have nontrivial translation are the same aside from the last byte.
1722 This makes it possible to translate just the last byte of a
1723 character, and do so after just a simple test of the context.
b2e6b10f 1724 CHAR_BASE is nonzero if there is such a non-ASCII character.
facdc750
RS
1725
1726 If that criterion is not satisfied, do not call this function. */
1727
e7deaab0 1728static EMACS_INT
facdc750 1729boyer_moore (n, base_pat, len, len_byte, trt, inverse_trt,
a17ae96a 1730 pos, pos_byte, lim, lim_byte, char_base)
facdc750
RS
1731 int n;
1732 unsigned char *base_pat;
1733 int len, len_byte;
1734 Lisp_Object trt;
1735 Lisp_Object inverse_trt;
e7deaab0
AS
1736 EMACS_INT pos, pos_byte;
1737 EMACS_INT lim, lim_byte;
a17ae96a 1738 int char_base;
facdc750
RS
1739{
1740 int direction = ((n > 0) ? 1 : -1);
1741 register int dirlen;
e7deaab0
AS
1742 EMACS_INT limit;
1743 int stride_for_teases = 0;
f4646fff 1744 int BM_tab[0400];
177c0ea7 1745 register unsigned char *cursor, *p_limit;
facdc750 1746 register int i, j;
cb6792d2 1747 unsigned char *pat, *pat_end;
facdc750
RS
1748 int multibyte = ! NILP (current_buffer->enable_multibyte_characters);
1749
1750 unsigned char simple_translate[0400];
0190922f 1751 /* These are set to the preceding bytes of a byte to be translated
49e44e81 1752 if char_base is nonzero. As the maximum byte length of a
a17ae96a 1753 multibyte character is 5, we have to check at most four previous
0190922f
KH
1754 bytes. */
1755 int translate_prev_byte1 = 0;
1756 int translate_prev_byte2 = 0;
1757 int translate_prev_byte3 = 0;
a17ae96a 1758 int translate_prev_byte4 = 0;
facdc750 1759
f4646fff
AS
1760 /* The general approach is that we are going to maintain that we know
1761 the first (closest to the present position, in whatever direction
1762 we're searching) character that could possibly be the last
1763 (furthest from present position) character of a valid match. We
1764 advance the state of our knowledge by looking at that character
1765 and seeing whether it indeed matches the last character of the
1766 pattern. If it does, we take a closer look. If it does not, we
1767 move our pointer (to putative last characters) as far as is
1768 logically possible. This amount of movement, which I call a
1769 stride, will be the length of the pattern if the actual character
1770 appears nowhere in the pattern, otherwise it will be the distance
1771 from the last occurrence of that character to the end of the
1772 pattern. If the amount is zero we have a possible match. */
1773
1774 /* Here we make a "mickey mouse" BM table. The stride of the search
1775 is determined only by the last character of the putative match.
1776 If that character does not match, we will stride the proper
1777 distance to propose a match that superimposes it on the last
1778 instance of a character that matches it (per trt), or misses
1779 it entirely if there is none. */
facdc750
RS
1780
1781 dirlen = len_byte * direction;
cb6792d2
RS
1782
1783 /* Record position after the end of the pattern. */
1784 pat_end = base_pat + len_byte;
1785 /* BASE_PAT points to a character that we start scanning from.
1786 It is the first character in a forward search,
1787 the last character in a backward search. */
facdc750 1788 if (direction < 0)
cb6792d2
RS
1789 base_pat = pat_end - 1;
1790
f4646fff
AS
1791 /* A character that does not appear in the pattern induces a
1792 stride equal to the pattern length. */
1793 for (i = 0; i < 0400; i++)
1794 BM_tab[i] = dirlen;
facdc750
RS
1795
1796 /* We use this for translation, instead of TRT itself.
1797 We fill this in to handle the characters that actually
1798 occur in the pattern. Others don't matter anyway! */
facdc750
RS
1799 for (i = 0; i < 0400; i++)
1800 simple_translate[i] = i;
1801
a17ae96a 1802 if (char_base)
0190922f 1803 {
a17ae96a 1804 /* Setup translate_prev_byte1/2/3/4 from CHAR_BASE. Only a
0190922f 1805 byte following them are the target of translation. */
0190922f 1806 unsigned char str[MAX_MULTIBYTE_LENGTH];
a17ae96a 1807 int len = CHAR_STRING (char_base, str);
0190922f
KH
1808
1809 translate_prev_byte1 = str[len - 2];
1810 if (len > 2)
1811 {
1812 translate_prev_byte2 = str[len - 3];
1813 if (len > 3)
a17ae96a
KH
1814 {
1815 translate_prev_byte3 = str[len - 4];
1816 if (len > 4)
1817 translate_prev_byte4 = str[len - 5];
1818 }
0190922f
KH
1819 }
1820 }
1821
facdc750 1822 i = 0;
f4646fff 1823 while (i != dirlen)
facdc750 1824 {
cb6792d2 1825 unsigned char *ptr = base_pat + i;
facdc750 1826 i += direction;
facdc750 1827 if (! NILP (trt))
ca1d1d23 1828 {
49e44e81
KH
1829 /* If the byte currently looking at is the last of a
1830 character to check case-equivalents, set CH to that
1831 character. An ASCII character and a non-ASCII character
1832 matching with CHAR_BASE are to be checked. */
a17ae96a
KH
1833 int ch = -1;
1834
1835 if (ASCII_BYTE_P (*ptr) || ! multibyte)
1836 ch = *ptr;
49e44e81 1837 else if (char_base
86dc6ccb 1838 && ((pat_end - ptr) == 1 || CHAR_HEAD_P (ptr[1])))
ca1d1d23 1839 {
49e44e81
KH
1840 unsigned char *charstart = ptr - 1;
1841
1842 while (! (CHAR_HEAD_P (*charstart)))
1843 charstart--;
1844 ch = STRING_CHAR (charstart, ptr - charstart + 1);
a17ae96a
KH
1845 if (char_base != (ch & ~0x3F))
1846 ch = -1;
ca1d1d23 1847 }
facdc750 1848
f51995fa 1849 if (ch >= 0200)
49e44e81
KH
1850 j = (ch & 0x3F) | 0200;
1851 else
1852 j = *ptr;
1853
f4646fff 1854 if (i == dirlen)
facdc750 1855 stride_for_teases = BM_tab[j];
ab228c24 1856
facdc750
RS
1857 BM_tab[j] = dirlen - i;
1858 /* A translation table is accompanied by its inverse -- see */
177c0ea7 1859 /* comment following downcase_table for details */
a17ae96a 1860 if (ch >= 0)
ab228c24
RS
1861 {
1862 int starting_ch = ch;
49e44e81 1863 int starting_j = j;
a17ae96a 1864
ab228c24
RS
1865 while (1)
1866 {
1867 TRANSLATE (ch, inverse_trt, ch);
f51995fa 1868 if (ch >= 0200)
49e44e81 1869 j = (ch & 0x3F) | 0200;
ab228c24 1870 else
a17ae96a 1871 j = ch;
ab228c24
RS
1872
1873 /* For all the characters that map into CH,
1874 set up simple_translate to map the last byte
1875 into STARTING_J. */
1876 simple_translate[j] = starting_j;
1877 if (ch == starting_ch)
1878 break;
1879 BM_tab[j] = dirlen - i;
1880 }
1881 }
facdc750
RS
1882 }
1883 else
1884 {
1885 j = *ptr;
1886
f4646fff 1887 if (i == dirlen)
facdc750
RS
1888 stride_for_teases = BM_tab[j];
1889 BM_tab[j] = dirlen - i;
ca1d1d23 1890 }
f4646fff
AS
1891 /* stride_for_teases tells how much to stride if we get a
1892 match on the far character but are subsequently
1893 disappointed, by recording what the stride would have been
1894 for that character if the last character had been
1895 different. */
facdc750 1896 }
facdc750
RS
1897 pos_byte += dirlen - ((direction > 0) ? direction : 0);
1898 /* loop invariant - POS_BYTE points at where last char (first
1899 char if reverse) of pattern would align in a possible match. */
1900 while (n != 0)
1901 {
e7deaab0 1902 EMACS_INT tail_end;
facdc750
RS
1903 unsigned char *tail_end_ptr;
1904
1905 /* It's been reported that some (broken) compiler thinks that
1906 Boolean expressions in an arithmetic context are unsigned.
1907 Using an explicit ?1:0 prevents this. */
1908 if ((lim_byte - pos_byte - ((direction > 0) ? 1 : 0)) * direction
1909 < 0)
1910 return (n * (0 - direction));
1911 /* First we do the part we can by pointers (maybe nothing) */
1912 QUIT;
1913 pat = base_pat;
1914 limit = pos_byte - dirlen + direction;
67ce527d
KH
1915 if (direction > 0)
1916 {
1917 limit = BUFFER_CEILING_OF (limit);
1918 /* LIMIT is now the last (not beyond-last!) value POS_BYTE
1919 can take on without hitting edge of buffer or the gap. */
1920 limit = min (limit, pos_byte + 20000);
1921 limit = min (limit, lim_byte - 1);
1922 }
1923 else
1924 {
1925 limit = BUFFER_FLOOR_OF (limit);
1926 /* LIMIT is now the last (not beyond-last!) value POS_BYTE
1927 can take on without hitting edge of buffer or the gap. */
1928 limit = max (limit, pos_byte - 20000);
1929 limit = max (limit, lim_byte);
1930 }
facdc750
RS
1931 tail_end = BUFFER_CEILING_OF (pos_byte) + 1;
1932 tail_end_ptr = BYTE_POS_ADDR (tail_end);
1933
1934 if ((limit - pos_byte) * direction > 20)
ca1d1d23 1935 {
facdc750
RS
1936 unsigned char *p2;
1937
1938 p_limit = BYTE_POS_ADDR (limit);
1939 p2 = (cursor = BYTE_POS_ADDR (pos_byte));
f4646fff 1940 /* In this loop, pos + cursor - p2 is the surrogate for pos. */
facdc750 1941 while (1) /* use one cursor setting as long as i can */
ca1d1d23 1942 {
facdc750 1943 if (direction > 0) /* worth duplicating */
ca1d1d23 1944 {
f4646fff
AS
1945 while (cursor <= p_limit)
1946 {
1947 if (BM_tab[*cursor] == 0)
1948 goto hit;
facdc750 1949 cursor += BM_tab[*cursor];
f4646fff 1950 }
facdc750
RS
1951 }
1952 else
1953 {
f4646fff
AS
1954 while (cursor >= p_limit)
1955 {
1956 if (BM_tab[*cursor] == 0)
1957 goto hit;
facdc750 1958 cursor += BM_tab[*cursor];
f4646fff 1959 }
facdc750 1960 }
f4646fff
AS
1961 /* If you are here, cursor is beyond the end of the
1962 searched region. You fail to match within the
1963 permitted region and would otherwise try a character
1964 beyond that region. */
1965 break;
1966
1967 hit:
facdc750
RS
1968 i = dirlen - direction;
1969 if (! NILP (trt))
1970 {
1971 while ((i -= direction) + direction != 0)
ca1d1d23 1972 {
facdc750
RS
1973 int ch;
1974 cursor -= direction;
1975 /* Translate only the last byte of a character. */
1976 if (! multibyte
1977 || ((cursor == tail_end_ptr
1978 || CHAR_HEAD_P (cursor[1]))
1979 && (CHAR_HEAD_P (cursor[0])
0190922f
KH
1980 /* Check if this is the last byte of
1981 a translable character. */
1982 || (translate_prev_byte1 == cursor[-1]
1983 && (CHAR_HEAD_P (translate_prev_byte1)
1984 || (translate_prev_byte2 == cursor[-2]
1985 && (CHAR_HEAD_P (translate_prev_byte2)
1986 || (translate_prev_byte3 == cursor[-3]))))))))
facdc750
RS
1987 ch = simple_translate[*cursor];
1988 else
1989 ch = *cursor;
1990 if (pat[i] != ch)
1991 break;
ca1d1d23 1992 }
facdc750
RS
1993 }
1994 else
1995 {
1996 while ((i -= direction) + direction != 0)
ca1d1d23 1997 {
facdc750
RS
1998 cursor -= direction;
1999 if (pat[i] != *cursor)
2000 break;
ca1d1d23 2001 }
facdc750
RS
2002 }
2003 cursor += dirlen - i - direction; /* fix cursor */
2004 if (i + direction == 0)
2005 {
e7deaab0 2006 EMACS_INT position, start, end;
0c8533c6 2007
facdc750 2008 cursor -= direction;
1113d9db 2009
facdc750
RS
2010 position = pos_byte + cursor - p2 + ((direction > 0)
2011 ? 1 - len_byte : 0);
2012 set_search_regs (position, len_byte);
ca325161 2013
ef887810
RS
2014 if (NILP (Vinhibit_changing_match_data))
2015 {
2016 start = search_regs.start[0];
2017 end = search_regs.end[0];
2018 }
2019 else
2020 /* If Vinhibit_changing_match_data is non-nil,
2021 search_regs will not be changed. So let's
2022 compute start and end here. */
2023 {
2024 start = BYTE_TO_CHAR (position);
2025 end = BYTE_TO_CHAR (position + len_byte);
2026 }
2027
facdc750
RS
2028 if ((n -= direction) != 0)
2029 cursor += dirlen; /* to resume search */
ca1d1d23 2030 else
ef887810 2031 return direction > 0 ? end : start;
ca1d1d23 2032 }
facdc750
RS
2033 else
2034 cursor += stride_for_teases; /* <sigh> we lose - */
ca1d1d23 2035 }
facdc750
RS
2036 pos_byte += cursor - p2;
2037 }
2038 else
f4646fff
AS
2039 /* Now we'll pick up a clump that has to be done the hard
2040 way because it covers a discontinuity. */
facdc750
RS
2041 {
2042 limit = ((direction > 0)
2043 ? BUFFER_CEILING_OF (pos_byte - dirlen + 1)
2044 : BUFFER_FLOOR_OF (pos_byte - dirlen - 1));
2045 limit = ((direction > 0)
2046 ? min (limit + len_byte, lim_byte - 1)
2047 : max (limit - len_byte, lim_byte));
2048 /* LIMIT is now the last value POS_BYTE can have
2049 and still be valid for a possible match. */
2050 while (1)
ca1d1d23 2051 {
f4646fff
AS
2052 /* This loop can be coded for space rather than
2053 speed because it will usually run only once.
2054 (the reach is at most len + 21, and typically
2055 does not exceed len). */
facdc750 2056 while ((limit - pos_byte) * direction >= 0)
f4646fff
AS
2057 {
2058 int ch = FETCH_BYTE (pos_byte);
2059 if (BM_tab[ch] == 0)
2060 goto hit2;
2061 pos_byte += BM_tab[ch];
2062 }
2063 break; /* ran off the end */
2064
2065 hit2:
2066 /* Found what might be a match. */
facdc750
RS
2067 i = dirlen - direction;
2068 while ((i -= direction) + direction != 0)
ca1d1d23 2069 {
facdc750
RS
2070 int ch;
2071 unsigned char *ptr;
2072 pos_byte -= direction;
2073 ptr = BYTE_POS_ADDR (pos_byte);
2074 /* Translate only the last byte of a character. */
2075 if (! multibyte
2076 || ((ptr == tail_end_ptr
2077 || CHAR_HEAD_P (ptr[1]))
2078 && (CHAR_HEAD_P (ptr[0])
0190922f
KH
2079 /* Check if this is the last byte of a
2080 translable character. */
2081 || (translate_prev_byte1 == ptr[-1]
2082 && (CHAR_HEAD_P (translate_prev_byte1)
2083 || (translate_prev_byte2 == ptr[-2]
2084 && (CHAR_HEAD_P (translate_prev_byte2)
2085 || translate_prev_byte3 == ptr[-3])))))))
facdc750
RS
2086 ch = simple_translate[*ptr];
2087 else
2088 ch = *ptr;
2089 if (pat[i] != ch)
2090 break;
2091 }
2092 /* Above loop has moved POS_BYTE part or all the way
2093 back to the first pos (last pos if reverse).
2094 Set it once again at the last (first if reverse) char. */
f4646fff 2095 pos_byte += dirlen - i - direction;
facdc750
RS
2096 if (i + direction == 0)
2097 {
e7deaab0 2098 EMACS_INT position, start, end;
facdc750 2099 pos_byte -= direction;
1113d9db 2100
facdc750 2101 position = pos_byte + ((direction > 0) ? 1 - len_byte : 0);
facdc750 2102 set_search_regs (position, len_byte);
ca325161 2103
ef887810
RS
2104 if (NILP (Vinhibit_changing_match_data))
2105 {
2106 start = search_regs.start[0];
2107 end = search_regs.end[0];
2108 }
2109 else
2110 /* If Vinhibit_changing_match_data is non-nil,
2111 search_regs will not be changed. So let's
2112 compute start and end here. */
2113 {
2114 start = BYTE_TO_CHAR (position);
2115 end = BYTE_TO_CHAR (position + len_byte);
2116 }
2117
facdc750
RS
2118 if ((n -= direction) != 0)
2119 pos_byte += dirlen; /* to resume search */
ca1d1d23 2120 else
ef887810 2121 return direction > 0 ? end : start;
ca1d1d23 2122 }
facdc750
RS
2123 else
2124 pos_byte += stride_for_teases;
2125 }
2126 }
2127 /* We have done one clump. Can we continue? */
2128 if ((lim_byte - pos_byte) * direction < 0)
2129 return ((0 - n) * direction);
ca1d1d23 2130 }
facdc750 2131 return BYTE_TO_CHAR (pos_byte);
ca1d1d23 2132}
ca325161 2133
fa8ed3e0 2134/* Record beginning BEG_BYTE and end BEG_BYTE + NBYTES
a7e4cdde
RS
2135 for the overall match just found in the current buffer.
2136 Also clear out the match data for registers 1 and up. */
ca325161
RS
2137
2138static void
fa8ed3e0 2139set_search_regs (beg_byte, nbytes)
e7deaab0 2140 EMACS_INT beg_byte, nbytes;
ca325161 2141{
a7e4cdde
RS
2142 int i;
2143
ef887810
RS
2144 if (!NILP (Vinhibit_changing_match_data))
2145 return;
2146
ca325161
RS
2147 /* Make sure we have registers in which to store
2148 the match position. */
2149 if (search_regs.num_regs == 0)
2150 {
2d4a771a
RS
2151 search_regs.start = (regoff_t *) xmalloc (2 * sizeof (regoff_t));
2152 search_regs.end = (regoff_t *) xmalloc (2 * sizeof (regoff_t));
487282dc 2153 search_regs.num_regs = 2;
ca325161
RS
2154 }
2155
a7e4cdde
RS
2156 /* Clear out the other registers. */
2157 for (i = 1; i < search_regs.num_regs; i++)
2158 {
2159 search_regs.start[i] = -1;
2160 search_regs.end[i] = -1;
2161 }
2162
fa8ed3e0
RS
2163 search_regs.start[0] = BYTE_TO_CHAR (beg_byte);
2164 search_regs.end[0] = BYTE_TO_CHAR (beg_byte + nbytes);
a3668d92 2165 XSETBUFFER (last_thing_searched, current_buffer);
ca325161 2166}
ca1d1d23 2167\f
c6e0e1cc
CY
2168/* Given STRING, a string of words separated by word delimiters,
2169 compute a regexp that matches those exact words separated by
2170 arbitrary punctuation. If LAX is nonzero, the end of the string
2171 need not match a word boundary unless it ends in whitespace. */
ca1d1d23
JB
2172
2173static Lisp_Object
c6e0e1cc 2174wordify (string, lax)
ca1d1d23 2175 Lisp_Object string;
c6e0e1cc 2176 int lax;
ca1d1d23
JB
2177{
2178 register unsigned char *p, *o;
0c8533c6 2179 register int i, i_byte, len, punct_count = 0, word_count = 0;
ca1d1d23 2180 Lisp_Object val;
0c8533c6 2181 int prev_c = 0;
c6e0e1cc 2182 int adjust, whitespace_at_end;
ca1d1d23 2183
b7826503 2184 CHECK_STRING (string);
d5db4077
KR
2185 p = SDATA (string);
2186 len = SCHARS (string);
ca1d1d23 2187
0c8533c6
RS
2188 for (i = 0, i_byte = 0; i < len; )
2189 {
2190 int c;
177c0ea7 2191
93daa011 2192 FETCH_STRING_CHAR_AS_MULTIBYTE_ADVANCE (c, string, i, i_byte);
0c8533c6
RS
2193
2194 if (SYNTAX (c) != Sword)
2195 {
2196 punct_count++;
2197 if (i > 0 && SYNTAX (prev_c) == Sword)
2198 word_count++;
2199 }
ca1d1d23 2200
0c8533c6
RS
2201 prev_c = c;
2202 }
2203
2204 if (SYNTAX (prev_c) == Sword)
c6e0e1cc
CY
2205 {
2206 word_count++;
2207 whitespace_at_end = 0;
2208 }
2209 else
2210 whitespace_at_end = 1;
2211
0c8533c6 2212 if (!word_count)
c60416e0 2213 return empty_unibyte_string;
0c8533c6 2214
c6e0e1cc
CY
2215 adjust = - punct_count + 5 * (word_count - 1)
2216 + ((lax && !whitespace_at_end) ? 2 : 4);
8a2df937
RS
2217 if (STRING_MULTIBYTE (string))
2218 val = make_uninit_multibyte_string (len + adjust,
d5db4077 2219 SBYTES (string)
8a2df937
RS
2220 + adjust);
2221 else
2222 val = make_uninit_string (len + adjust);
ca1d1d23 2223
d5db4077 2224 o = SDATA (val);
ca1d1d23
JB
2225 *o++ = '\\';
2226 *o++ = 'b';
1e9582d4 2227 prev_c = 0;
ca1d1d23 2228
1e9582d4
RS
2229 for (i = 0, i_byte = 0; i < len; )
2230 {
2231 int c;
2232 int i_byte_orig = i_byte;
177c0ea7 2233
93daa011 2234 FETCH_STRING_CHAR_AS_MULTIBYTE_ADVANCE (c, string, i, i_byte);
1e9582d4
RS
2235
2236 if (SYNTAX (c) == Sword)
2237 {
5d69fe10 2238 bcopy (SDATA (string) + i_byte_orig, o,
1e9582d4
RS
2239 i_byte - i_byte_orig);
2240 o += i_byte - i_byte_orig;
2241 }
2242 else if (i > 0 && SYNTAX (prev_c) == Sword && --word_count)
2243 {
2244 *o++ = '\\';
2245 *o++ = 'W';
2246 *o++ = '\\';
2247 *o++ = 'W';
2248 *o++ = '*';
2249 }
2250
2251 prev_c = c;
2252 }
ca1d1d23 2253
c6e0e1cc
CY
2254 if (!lax || whitespace_at_end)
2255 {
2256 *o++ = '\\';
2257 *o++ = 'b';
2258 }
ca1d1d23
JB
2259
2260 return val;
2261}
2262\f
2263DEFUN ("search-backward", Fsearch_backward, Ssearch_backward, 1, 4,
8c1a1077
PJ
2264 "MSearch backward: ",
2265 doc: /* Search backward from point for STRING.
2266Set point to the beginning of the occurrence found, and return point.
2267An optional second argument bounds the search; it is a buffer position.
2268The match found must not extend before that position.
2269Optional third argument, if t, means if fail just return nil (no error).
2270 If not nil and not t, position at limit of search and return nil.
2271Optional fourth argument is repeat count--search for successive occurrences.
2272
2273Search case-sensitivity is determined by the value of the variable
2274`case-fold-search', which see.
2275
2276See also the functions `match-beginning', `match-end' and `replace-match'. */)
2277 (string, bound, noerror, count)
ca1d1d23
JB
2278 Lisp_Object string, bound, noerror, count;
2279{
b819a390 2280 return search_command (string, bound, noerror, count, -1, 0, 0);
ca1d1d23
JB
2281}
2282
6af43974 2283DEFUN ("search-forward", Fsearch_forward, Ssearch_forward, 1, 4, "MSearch: ",
8c1a1077
PJ
2284 doc: /* Search forward from point for STRING.
2285Set point to the end of the occurrence found, and return point.
2286An optional second argument bounds the search; it is a buffer position.
d6aacdcd 2287The match found must not extend after that position. A value of nil is
48740f01 2288 equivalent to (point-max).
8c1a1077
PJ
2289Optional third argument, if t, means if fail just return nil (no error).
2290 If not nil and not t, move to limit of search and return nil.
2291Optional fourth argument is repeat count--search for successive occurrences.
2292
2293Search case-sensitivity is determined by the value of the variable
2294`case-fold-search', which see.
2295
2296See also the functions `match-beginning', `match-end' and `replace-match'. */)
2297 (string, bound, noerror, count)
ca1d1d23
JB
2298 Lisp_Object string, bound, noerror, count;
2299{
b819a390 2300 return search_command (string, bound, noerror, count, 1, 0, 0);
ca1d1d23
JB
2301}
2302
2303DEFUN ("word-search-backward", Fword_search_backward, Sword_search_backward, 1, 4,
8c1a1077
PJ
2304 "sWord search backward: ",
2305 doc: /* Search backward from point for STRING, ignoring differences in punctuation.
2306Set point to the beginning of the occurrence found, and return point.
2307An optional second argument bounds the search; it is a buffer position.
2308The match found must not extend before that position.
2309Optional third argument, if t, means if fail just return nil (no error).
2310 If not nil and not t, move to limit of search and return nil.
2311Optional fourth argument is repeat count--search for successive occurrences. */)
2312 (string, bound, noerror, count)
ca1d1d23
JB
2313 Lisp_Object string, bound, noerror, count;
2314{
c6e0e1cc 2315 return search_command (wordify (string, 0), bound, noerror, count, -1, 1, 0);
ca1d1d23
JB
2316}
2317
2318DEFUN ("word-search-forward", Fword_search_forward, Sword_search_forward, 1, 4,
8c1a1077
PJ
2319 "sWord search: ",
2320 doc: /* Search forward from point for STRING, ignoring differences in punctuation.
2321Set point to the end of the occurrence found, and return point.
2322An optional second argument bounds the search; it is a buffer position.
2323The match found must not extend after that position.
2324Optional third argument, if t, means if fail just return nil (no error).
2325 If not nil and not t, move to limit of search and return nil.
2326Optional fourth argument is repeat count--search for successive occurrences. */)
2327 (string, bound, noerror, count)
ca1d1d23
JB
2328 Lisp_Object string, bound, noerror, count;
2329{
c6e0e1cc
CY
2330 return search_command (wordify (string, 0), bound, noerror, count, 1, 1, 0);
2331}
2332
2333DEFUN ("word-search-backward-lax", Fword_search_backward_lax, Sword_search_backward_lax, 1, 4,
2334 "sWord search backward: ",
2335 doc: /* Search backward from point for STRING, ignoring differences in punctuation.
2336Set point to the beginning of the occurrence found, and return point.
2337
2338Unlike `word-search-backward', the end of STRING need not match a word
2339boundary unless it ends in whitespace.
2340
2341An optional second argument bounds the search; it is a buffer position.
2342The match found must not extend before that position.
2343Optional third argument, if t, means if fail just return nil (no error).
2344 If not nil and not t, move to limit of search and return nil.
2345Optional fourth argument is repeat count--search for successive occurrences. */)
2346 (string, bound, noerror, count)
2347 Lisp_Object string, bound, noerror, count;
2348{
2349 return search_command (wordify (string, 1), bound, noerror, count, -1, 1, 0);
2350}
2351
2352DEFUN ("word-search-forward-lax", Fword_search_forward_lax, Sword_search_forward_lax, 1, 4,
2353 "sWord search: ",
2354 doc: /* Search forward from point for STRING, ignoring differences in punctuation.
2355Set point to the end of the occurrence found, and return point.
2356
2357Unlike `word-search-forward', the end of STRING need not match a word
2358boundary unless it ends in whitespace.
2359
2360An optional second argument bounds the search; it is a buffer position.
2361The match found must not extend after that position.
2362Optional third argument, if t, means if fail just return nil (no error).
2363 If not nil and not t, move to limit of search and return nil.
2364Optional fourth argument is repeat count--search for successive occurrences. */)
2365 (string, bound, noerror, count)
2366 Lisp_Object string, bound, noerror, count;
2367{
2368 return search_command (wordify (string, 1), bound, noerror, count, 1, 1, 0);
ca1d1d23
JB
2369}
2370
2371DEFUN ("re-search-backward", Fre_search_backward, Sre_search_backward, 1, 4,
8c1a1077
PJ
2372 "sRE search backward: ",
2373 doc: /* Search backward from point for match for regular expression REGEXP.
2374Set point to the beginning of the match, and return point.
2375The match found is the one starting last in the buffer
2376and yet ending before the origin of the search.
2377An optional second argument bounds the search; it is a buffer position.
2378The match found must start at or after that position.
2379Optional third argument, if t, means if fail just return nil (no error).
2380 If not nil and not t, move to limit of search and return nil.
2381Optional fourth argument is repeat count--search for successive occurrences.
2382See also the functions `match-beginning', `match-end', `match-string',
2383and `replace-match'. */)
2384 (regexp, bound, noerror, count)
19c0a730 2385 Lisp_Object regexp, bound, noerror, count;
ca1d1d23 2386{
b819a390 2387 return search_command (regexp, bound, noerror, count, -1, 1, 0);
ca1d1d23
JB
2388}
2389
2390DEFUN ("re-search-forward", Fre_search_forward, Sre_search_forward, 1, 4,
8c1a1077
PJ
2391 "sRE search: ",
2392 doc: /* Search forward from point for regular expression REGEXP.
2393Set point to the end of the occurrence found, and return point.
2394An optional second argument bounds the search; it is a buffer position.
2395The match found must not extend after that position.
2396Optional third argument, if t, means if fail just return nil (no error).
2397 If not nil and not t, move to limit of search and return nil.
2398Optional fourth argument is repeat count--search for successive occurrences.
2399See also the functions `match-beginning', `match-end', `match-string',
2400and `replace-match'. */)
2401 (regexp, bound, noerror, count)
19c0a730 2402 Lisp_Object regexp, bound, noerror, count;
ca1d1d23 2403{
b819a390
RS
2404 return search_command (regexp, bound, noerror, count, 1, 1, 0);
2405}
2406
2407DEFUN ("posix-search-backward", Fposix_search_backward, Sposix_search_backward, 1, 4,
8c1a1077
PJ
2408 "sPosix search backward: ",
2409 doc: /* Search backward from point for match for regular expression REGEXP.
2410Find the longest match in accord with Posix regular expression rules.
2411Set point to the beginning of the match, and return point.
2412The match found is the one starting last in the buffer
2413and yet ending before the origin of the search.
2414An optional second argument bounds the search; it is a buffer position.
2415The match found must start at or after that position.
2416Optional third argument, if t, means if fail just return nil (no error).
2417 If not nil and not t, move to limit of search and return nil.
2418Optional fourth argument is repeat count--search for successive occurrences.
2419See also the functions `match-beginning', `match-end', `match-string',
2420and `replace-match'. */)
2421 (regexp, bound, noerror, count)
b819a390
RS
2422 Lisp_Object regexp, bound, noerror, count;
2423{
2424 return search_command (regexp, bound, noerror, count, -1, 1, 1);
2425}
2426
2427DEFUN ("posix-search-forward", Fposix_search_forward, Sposix_search_forward, 1, 4,
8c1a1077
PJ
2428 "sPosix search: ",
2429 doc: /* Search forward from point for regular expression REGEXP.
2430Find the longest match in accord with Posix regular expression rules.
2431Set point to the end of the occurrence found, and return point.
2432An optional second argument bounds the search; it is a buffer position.
2433The match found must not extend after that position.
2434Optional third argument, if t, means if fail just return nil (no error).
2435 If not nil and not t, move to limit of search and return nil.
2436Optional fourth argument is repeat count--search for successive occurrences.
2437See also the functions `match-beginning', `match-end', `match-string',
2438and `replace-match'. */)
2439 (regexp, bound, noerror, count)
b819a390
RS
2440 Lisp_Object regexp, bound, noerror, count;
2441{
2442 return search_command (regexp, bound, noerror, count, 1, 1, 1);
ca1d1d23
JB
2443}
2444\f
d7a5ad5f 2445DEFUN ("replace-match", Freplace_match, Sreplace_match, 1, 5, 0,
8c1a1077 2446 doc: /* Replace text matched by last search with NEWTEXT.
4dd0c271
RS
2447Leave point at the end of the replacement text.
2448
8c1a1077
PJ
2449If second arg FIXEDCASE is non-nil, do not alter case of replacement text.
2450Otherwise maybe capitalize the whole text, or maybe just word initials,
2451based on the replaced text.
2452If the replaced text has only capital letters
2453and has at least one multiletter word, convert NEWTEXT to all caps.
4dd0c271
RS
2454Otherwise if all words are capitalized in the replaced text,
2455capitalize each word in NEWTEXT.
8c1a1077
PJ
2456
2457If third arg LITERAL is non-nil, insert NEWTEXT literally.
2458Otherwise treat `\\' as special:
2459 `\\&' in NEWTEXT means substitute original matched text.
2460 `\\N' means substitute what matched the Nth `\\(...\\)'.
2461 If Nth parens didn't match, substitute nothing.
2462 `\\\\' means insert one `\\'.
4dd0c271
RS
2463Case conversion does not apply to these substitutions.
2464
8c1a1077 2465FIXEDCASE and LITERAL are optional arguments.
8c1a1077
PJ
2466
2467The optional fourth argument STRING can be a string to modify.
2468This is meaningful when the previous match was done against STRING,
2469using `string-match'. When used this way, `replace-match'
2470creates and returns a new string made by copying STRING and replacing
2471the part of STRING that was matched.
2472
2473The optional fifth argument SUBEXP specifies a subexpression;
2474it says to replace just that subexpression with NEWTEXT,
2475rather than replacing the entire matched text.
2476This is, in a vague sense, the inverse of using `\\N' in NEWTEXT;
2477`\\N' copies subexp N into NEWTEXT, but using N as SUBEXP puts
2478NEWTEXT in place of subexp N.
2479This is useful only after a regular expression search or match,
2480since only regular expressions have distinguished subexpressions. */)
2481 (newtext, fixedcase, literal, string, subexp)
d7a5ad5f 2482 Lisp_Object newtext, fixedcase, literal, string, subexp;
ca1d1d23
JB
2483{
2484 enum { nochange, all_caps, cap_initial } case_action;
ac3b28b1 2485 register int pos, pos_byte;
ca1d1d23 2486 int some_multiletter_word;
97832bd0 2487 int some_lowercase;
73dc8771 2488 int some_uppercase;
208767c3 2489 int some_nonuppercase_initial;
ca1d1d23 2490 register int c, prevc;
d7a5ad5f 2491 int sub;
e7deaab0 2492 EMACS_INT opoint, newpoint;
ca1d1d23 2493
b7826503 2494 CHECK_STRING (newtext);
ca1d1d23 2495
080c45fd 2496 if (! NILP (string))
b7826503 2497 CHECK_STRING (string);
080c45fd 2498
ca1d1d23
JB
2499 case_action = nochange; /* We tried an initialization */
2500 /* but some C compilers blew it */
4746118a
JB
2501
2502 if (search_regs.num_regs <= 0)
d72cdbfc 2503 error ("`replace-match' called before any match found");
4746118a 2504
d7a5ad5f
RS
2505 if (NILP (subexp))
2506 sub = 0;
2507 else
2508 {
b7826503 2509 CHECK_NUMBER (subexp);
d7a5ad5f
RS
2510 sub = XINT (subexp);
2511 if (sub < 0 || sub >= search_regs.num_regs)
2512 args_out_of_range (subexp, make_number (search_regs.num_regs));
2513 }
2514
080c45fd
RS
2515 if (NILP (string))
2516 {
d7a5ad5f
RS
2517 if (search_regs.start[sub] < BEGV
2518 || search_regs.start[sub] > search_regs.end[sub]
2519 || search_regs.end[sub] > ZV)
2520 args_out_of_range (make_number (search_regs.start[sub]),
2521 make_number (search_regs.end[sub]));
080c45fd
RS
2522 }
2523 else
2524 {
d7a5ad5f
RS
2525 if (search_regs.start[sub] < 0
2526 || search_regs.start[sub] > search_regs.end[sub]
d5db4077 2527 || search_regs.end[sub] > SCHARS (string))
d7a5ad5f
RS
2528 args_out_of_range (make_number (search_regs.start[sub]),
2529 make_number (search_regs.end[sub]));
080c45fd 2530 }
ca1d1d23
JB
2531
2532 if (NILP (fixedcase))
2533 {
2534 /* Decide how to casify by examining the matched text. */
e7deaab0 2535 EMACS_INT last;
ca1d1d23 2536
ac3b28b1
KH
2537 pos = search_regs.start[sub];
2538 last = search_regs.end[sub];
fa8ed3e0
RS
2539
2540 if (NILP (string))
ac3b28b1 2541 pos_byte = CHAR_TO_BYTE (pos);
fa8ed3e0 2542 else
ac3b28b1 2543 pos_byte = string_char_to_byte (string, pos);
fa8ed3e0 2544
ca1d1d23
JB
2545 prevc = '\n';
2546 case_action = all_caps;
2547
2548 /* some_multiletter_word is set nonzero if any original word
2549 is more than one letter long. */
2550 some_multiletter_word = 0;
97832bd0 2551 some_lowercase = 0;
208767c3 2552 some_nonuppercase_initial = 0;
73dc8771 2553 some_uppercase = 0;
ca1d1d23 2554
ac3b28b1 2555 while (pos < last)
ca1d1d23 2556 {
080c45fd 2557 if (NILP (string))
ac3b28b1 2558 {
93daa011 2559 c = FETCH_CHAR_AS_MULTIBYTE (pos_byte);
ac3b28b1
KH
2560 INC_BOTH (pos, pos_byte);
2561 }
080c45fd 2562 else
93daa011 2563 FETCH_STRING_CHAR_AS_MULTIBYTE_ADVANCE (c, string, pos, pos_byte);
080c45fd 2564
ca1d1d23
JB
2565 if (LOWERCASEP (c))
2566 {
2567 /* Cannot be all caps if any original char is lower case */
2568
97832bd0 2569 some_lowercase = 1;
ca1d1d23 2570 if (SYNTAX (prevc) != Sword)
208767c3 2571 some_nonuppercase_initial = 1;
ca1d1d23
JB
2572 else
2573 some_multiletter_word = 1;
2574 }
d16c2b66 2575 else if (UPPERCASEP (c))
ca1d1d23 2576 {
73dc8771 2577 some_uppercase = 1;
97832bd0 2578 if (SYNTAX (prevc) != Sword)
c4d460ce 2579 ;
97832bd0 2580 else
ca1d1d23
JB
2581 some_multiletter_word = 1;
2582 }
208767c3
RS
2583 else
2584 {
2585 /* If the initial is a caseless word constituent,
2586 treat that like a lowercase initial. */
2587 if (SYNTAX (prevc) != Sword)
2588 some_nonuppercase_initial = 1;
2589 }
ca1d1d23
JB
2590
2591 prevc = c;
2592 }
2593
97832bd0
RS
2594 /* Convert to all caps if the old text is all caps
2595 and has at least one multiletter word. */
2596 if (! some_lowercase && some_multiletter_word)
2597 case_action = all_caps;
c4d460ce 2598 /* Capitalize each word, if the old text has all capitalized words. */
208767c3 2599 else if (!some_nonuppercase_initial && some_multiletter_word)
ca1d1d23 2600 case_action = cap_initial;
208767c3 2601 else if (!some_nonuppercase_initial && some_uppercase)
73dc8771
KH
2602 /* Should x -> yz, operating on X, give Yz or YZ?
2603 We'll assume the latter. */
2604 case_action = all_caps;
97832bd0
RS
2605 else
2606 case_action = nochange;
ca1d1d23
JB
2607 }
2608
080c45fd
RS
2609 /* Do replacement in a string. */
2610 if (!NILP (string))
2611 {
2612 Lisp_Object before, after;
2613
2614 before = Fsubstring (string, make_number (0),
d7a5ad5f
RS
2615 make_number (search_regs.start[sub]));
2616 after = Fsubstring (string, make_number (search_regs.end[sub]), Qnil);
080c45fd 2617
636a5e28
RS
2618 /* Substitute parts of the match into NEWTEXT
2619 if desired. */
080c45fd
RS
2620 if (NILP (literal))
2621 {
e7deaab0
AS
2622 EMACS_INT lastpos = 0;
2623 EMACS_INT lastpos_byte = 0;
080c45fd
RS
2624 /* We build up the substituted string in ACCUM. */
2625 Lisp_Object accum;
2626 Lisp_Object middle;
d5db4077 2627 int length = SBYTES (newtext);
080c45fd
RS
2628
2629 accum = Qnil;
2630
ac3b28b1 2631 for (pos_byte = 0, pos = 0; pos_byte < length;)
080c45fd
RS
2632 {
2633 int substart = -1;
6bbd7a29 2634 int subend = 0;
1e79ec24 2635 int delbackslash = 0;
080c45fd 2636
0c8533c6
RS
2637 FETCH_STRING_CHAR_ADVANCE (c, newtext, pos, pos_byte);
2638
080c45fd
RS
2639 if (c == '\\')
2640 {
0c8533c6 2641 FETCH_STRING_CHAR_ADVANCE (c, newtext, pos, pos_byte);
177c0ea7 2642
080c45fd
RS
2643 if (c == '&')
2644 {
d7a5ad5f
RS
2645 substart = search_regs.start[sub];
2646 subend = search_regs.end[sub];
080c45fd 2647 }
76fc1ea2 2648 else if (c >= '1' && c <= '9')
080c45fd 2649 {
76fc1ea2
KH
2650 if (search_regs.start[c - '0'] >= 0
2651 && c <= search_regs.num_regs + '0')
080c45fd
RS
2652 {
2653 substart = search_regs.start[c - '0'];
2654 subend = search_regs.end[c - '0'];
2655 }
76fc1ea2
KH
2656 else
2657 {
2658 /* If that subexp did not match,
2659 replace \\N with nothing. */
2660 substart = 0;
2661 subend = 0;
2662 }
080c45fd 2663 }
1e79ec24
KH
2664 else if (c == '\\')
2665 delbackslash = 1;
636a5e28
RS
2666 else
2667 error ("Invalid use of `\\' in replacement text");
080c45fd
RS
2668 }
2669 if (substart >= 0)
2670 {
d131e79c
RS
2671 if (pos - 2 != lastpos)
2672 middle = substring_both (newtext, lastpos,
2673 lastpos_byte,
2674 pos - 2, pos_byte - 2);
080c45fd
RS
2675 else
2676 middle = Qnil;
2677 accum = concat3 (accum, middle,
0c8533c6
RS
2678 Fsubstring (string,
2679 make_number (substart),
080c45fd
RS
2680 make_number (subend)));
2681 lastpos = pos;
0c8533c6 2682 lastpos_byte = pos_byte;
080c45fd 2683 }
1e79ec24
KH
2684 else if (delbackslash)
2685 {
d131e79c
RS
2686 middle = substring_both (newtext, lastpos,
2687 lastpos_byte,
2688 pos - 1, pos_byte - 1);
0c8533c6 2689
1e79ec24
KH
2690 accum = concat2 (accum, middle);
2691 lastpos = pos;
0c8533c6 2692 lastpos_byte = pos_byte;
1e79ec24 2693 }
080c45fd
RS
2694 }
2695
d131e79c
RS
2696 if (pos != lastpos)
2697 middle = substring_both (newtext, lastpos,
2698 lastpos_byte,
0c8533c6 2699 pos, pos_byte);
080c45fd
RS
2700 else
2701 middle = Qnil;
2702
2703 newtext = concat2 (accum, middle);
2704 }
2705
636a5e28 2706 /* Do case substitution in NEWTEXT if desired. */
080c45fd
RS
2707 if (case_action == all_caps)
2708 newtext = Fupcase (newtext);
2709 else if (case_action == cap_initial)
2b2eead9 2710 newtext = Fupcase_initials (newtext);
080c45fd
RS
2711
2712 return concat3 (before, newtext, after);
2713 }
2714
09c4719e 2715 /* Record point, then move (quietly) to the start of the match. */
9160906f 2716 if (PT >= search_regs.end[sub])
b0eba991 2717 opoint = PT - ZV;
9160906f
RS
2718 else if (PT > search_regs.start[sub])
2719 opoint = search_regs.end[sub] - ZV;
b0eba991
RS
2720 else
2721 opoint = PT;
2722
886ed6ec
RS
2723 /* If we want non-literal replacement,
2724 perform substitution on the replacement string. */
2725 if (NILP (literal))
ca1d1d23 2726 {
d5db4077 2727 int length = SBYTES (newtext);
68e69fbd
RS
2728 unsigned char *substed;
2729 int substed_alloc_size, substed_len;
3bc25e52
KH
2730 int buf_multibyte = !NILP (current_buffer->enable_multibyte_characters);
2731 int str_multibyte = STRING_MULTIBYTE (newtext);
2732 Lisp_Object rev_tbl;
886ed6ec 2733 int really_changed = 0;
3bc25e52 2734
8f924df7 2735 rev_tbl = Qnil;
ac3b28b1 2736
68e69fbd
RS
2737 substed_alloc_size = length * 2 + 100;
2738 substed = (unsigned char *) xmalloc (substed_alloc_size + 1);
2739 substed_len = 0;
2740
3bc25e52
KH
2741 /* Go thru NEWTEXT, producing the actual text to insert in
2742 SUBSTED while adjusting multibyteness to that of the current
2743 buffer. */
ca1d1d23 2744
ac3b28b1 2745 for (pos_byte = 0, pos = 0; pos_byte < length;)
ca1d1d23 2746 {
68e69fbd 2747 unsigned char str[MAX_MULTIBYTE_LENGTH];
f8ce8a0d
GM
2748 unsigned char *add_stuff = NULL;
2749 int add_len = 0;
68e69fbd 2750 int idx = -1;
9a76659d 2751
3bc25e52
KH
2752 if (str_multibyte)
2753 {
eb99a8dd 2754 FETCH_STRING_CHAR_ADVANCE_NO_CHECK (c, newtext, pos, pos_byte);
3bc25e52
KH
2755 if (!buf_multibyte)
2756 c = multibyte_char_to_unibyte (c, rev_tbl);
2757 }
2758 else
2759 {
2760 /* Note that we don't have to increment POS. */
5d69fe10 2761 c = SREF (newtext, pos_byte++);
3bc25e52 2762 if (buf_multibyte)
4c0354d7 2763 MAKE_CHAR_MULTIBYTE (c);
3bc25e52 2764 }
ac3b28b1 2765
68e69fbd
RS
2766 /* Either set ADD_STUFF and ADD_LEN to the text to put in SUBSTED,
2767 or set IDX to a match index, which means put that part
2768 of the buffer text into SUBSTED. */
2769
ca1d1d23
JB
2770 if (c == '\\')
2771 {
886ed6ec
RS
2772 really_changed = 1;
2773
3bc25e52
KH
2774 if (str_multibyte)
2775 {
eb99a8dd
KH
2776 FETCH_STRING_CHAR_ADVANCE_NO_CHECK (c, newtext,
2777 pos, pos_byte);
071ce769 2778 if (!buf_multibyte && !ASCII_CHAR_P (c))
3bc25e52
KH
2779 c = multibyte_char_to_unibyte (c, rev_tbl);
2780 }
2781 else
2782 {
d5db4077 2783 c = SREF (newtext, pos_byte++);
3bc25e52 2784 if (buf_multibyte)
4c0354d7 2785 MAKE_CHAR_MULTIBYTE (c);
3bc25e52
KH
2786 }
2787
ca1d1d23 2788 if (c == '&')
68e69fbd 2789 idx = sub;
78445046 2790 else if (c >= '1' && c <= '9' && c <= search_regs.num_regs + '0')
ca1d1d23
JB
2791 {
2792 if (search_regs.start[c - '0'] >= 1)
68e69fbd 2793 idx = c - '0';
ca1d1d23 2794 }
636a5e28 2795 else if (c == '\\')
68e69fbd 2796 add_len = 1, add_stuff = "\\";
636a5e28 2797 else
3bc25e52
KH
2798 {
2799 xfree (substed);
2800 error ("Invalid use of `\\' in replacement text");
2801 }
ca1d1d23
JB
2802 }
2803 else
68e69fbd
RS
2804 {
2805 add_len = CHAR_STRING (c, str);
2806 add_stuff = str;
2807 }
2808
2809 /* If we want to copy part of a previous match,
2810 set up ADD_STUFF and ADD_LEN to point to it. */
2811 if (idx >= 0)
2812 {
e7deaab0 2813 EMACS_INT begbyte = CHAR_TO_BYTE (search_regs.start[idx]);
68e69fbd
RS
2814 add_len = CHAR_TO_BYTE (search_regs.end[idx]) - begbyte;
2815 if (search_regs.start[idx] < GPT && GPT < search_regs.end[idx])
2816 move_gap (search_regs.start[idx]);
2817 add_stuff = BYTE_POS_ADDR (begbyte);
2818 }
2819
2820 /* Now the stuff we want to add to SUBSTED
2821 is invariably ADD_LEN bytes starting at ADD_STUFF. */
2822
2823 /* Make sure SUBSTED is big enough. */
2824 if (substed_len + add_len >= substed_alloc_size)
2825 {
2826 substed_alloc_size = substed_len + add_len + 500;
2827 substed = (unsigned char *) xrealloc (substed,
2828 substed_alloc_size + 1);
2829 }
2830
2831 /* Now add to the end of SUBSTED. */
f8ce8a0d
GM
2832 if (add_stuff)
2833 {
2834 bcopy (add_stuff, substed + substed_len, add_len);
2835 substed_len += add_len;
2836 }
ca1d1d23 2837 }
68e69fbd 2838
886ed6ec 2839 if (really_changed)
76fc1ea2
KH
2840 {
2841 if (buf_multibyte)
2842 {
2843 int nchars = multibyte_chars_in_text (substed, substed_len);
68e69fbd 2844
76fc1ea2
KH
2845 newtext = make_multibyte_string (substed, nchars, substed_len);
2846 }
2847 else
2848 newtext = make_unibyte_string (substed, substed_len);
2849 }
68e69fbd 2850 xfree (substed);
ca1d1d23
JB
2851 }
2852
886ed6ec
RS
2853 /* Replace the old text with the new in the cleanest possible way. */
2854 replace_range (search_regs.start[sub], search_regs.end[sub],
2855 newtext, 1, 0, 1);
d5db4077 2856 newpoint = search_regs.start[sub] + SCHARS (newtext);
ca1d1d23
JB
2857
2858 if (case_action == all_caps)
886ed6ec
RS
2859 Fupcase_region (make_number (search_regs.start[sub]),
2860 make_number (newpoint));
ca1d1d23 2861 else if (case_action == cap_initial)
886ed6ec
RS
2862 Fupcase_initials_region (make_number (search_regs.start[sub]),
2863 make_number (newpoint));
3e18eecf 2864
98e942e0
RS
2865 /* Adjust search data for this change. */
2866 {
e7deaab0
AS
2867 EMACS_INT oldend = search_regs.end[sub];
2868 EMACS_INT oldstart = search_regs.start[sub];
2869 EMACS_INT change = newpoint - search_regs.end[sub];
98e942e0
RS
2870 int i;
2871
2872 for (i = 0; i < search_regs.num_regs; i++)
2873 {
41c01205 2874 if (search_regs.start[i] >= oldend)
98e942e0 2875 search_regs.start[i] += change;
41c01205
DK
2876 else if (search_regs.start[i] > oldstart)
2877 search_regs.start[i] = oldstart;
2878 if (search_regs.end[i] >= oldend)
98e942e0 2879 search_regs.end[i] += change;
41c01205
DK
2880 else if (search_regs.end[i] > oldstart)
2881 search_regs.end[i] = oldstart;
98e942e0
RS
2882 }
2883 }
2884
b0eba991 2885 /* Put point back where it was in the text. */
8d808a65 2886 if (opoint <= 0)
fa8ed3e0 2887 TEMP_SET_PT (opoint + ZV);
b0eba991 2888 else
fa8ed3e0 2889 TEMP_SET_PT (opoint);
b0eba991
RS
2890
2891 /* Now move point "officially" to the start of the inserted replacement. */
3e18eecf 2892 move_if_not_intangible (newpoint);
177c0ea7 2893
ca1d1d23
JB
2894 return Qnil;
2895}
2896\f
2897static Lisp_Object
2898match_limit (num, beginningp)
2899 Lisp_Object num;
2900 int beginningp;
2901{
2902 register int n;
2903
b7826503 2904 CHECK_NUMBER (num);
ca1d1d23 2905 n = XINT (num);
f90a5bf5 2906 if (n < 0)
bd2cbd56 2907 args_out_of_range (num, make_number (0));
f90a5bf5
RS
2908 if (search_regs.num_regs <= 0)
2909 error ("No match data, because no search succeeded");
9b9ceb61 2910 if (n >= search_regs.num_regs
4746118a 2911 || search_regs.start[n] < 0)
ca1d1d23
JB
2912 return Qnil;
2913 return (make_number ((beginningp) ? search_regs.start[n]
2914 : search_regs.end[n]));
2915}
2916
2917DEFUN ("match-beginning", Fmatch_beginning, Smatch_beginning, 1, 1, 0,
8c1a1077
PJ
2918 doc: /* Return position of start of text matched by last search.
2919SUBEXP, a number, specifies which parenthesized expression in the last
2920 regexp.
2921Value is nil if SUBEXPth pair didn't match, or there were less than
2922 SUBEXP pairs.
2923Zero means the entire text matched by the whole regexp or whole string. */)
2924 (subexp)
5806161b 2925 Lisp_Object subexp;
ca1d1d23 2926{
5806161b 2927 return match_limit (subexp, 1);
ca1d1d23
JB
2928}
2929
2930DEFUN ("match-end", Fmatch_end, Smatch_end, 1, 1, 0,
8c1a1077
PJ
2931 doc: /* Return position of end of text matched by last search.
2932SUBEXP, a number, specifies which parenthesized expression in the last
2933 regexp.
2934Value is nil if SUBEXPth pair didn't match, or there were less than
2935 SUBEXP pairs.
2936Zero means the entire text matched by the whole regexp or whole string. */)
2937 (subexp)
5806161b 2938 Lisp_Object subexp;
ca1d1d23 2939{
5806161b 2940 return match_limit (subexp, 0);
177c0ea7 2941}
ca1d1d23 2942
abd0071c 2943DEFUN ("match-data", Fmatch_data, Smatch_data, 0, 3, 0,
8c1a1077
PJ
2944 doc: /* Return a list containing all info on what the last search matched.
2945Element 2N is `(match-beginning N)'; element 2N + 1 is `(match-end N)'.
2946All the elements are markers or nil (nil if the Nth pair didn't match)
2947if the last match was on a buffer; integers or nil if a string was matched.
febb8aba 2948Use `set-match-data' to reinstate the data in this list.
8c1a1077 2949
41c01205
DK
2950If INTEGERS (the optional first argument) is non-nil, always use
2951integers \(rather than markers) to represent buffer positions. In
2952this case, and if the last match was in a buffer, the buffer will get
2953stored as one additional element at the end of the list.
2954
abd0071c
KS
2955If REUSE is a list, reuse it as part of the value. If REUSE is long
2956enough to hold all the values, and if INTEGERS is non-nil, no consing
2957is done.
2958
2959If optional third arg RESEAT is non-nil, any previous markers on the
2960REUSE list will be modified to point to nowhere.
2961
140a6b7e 2962Return value is undefined if the last search failed. */)
abd0071c
KS
2963 (integers, reuse, reseat)
2964 Lisp_Object integers, reuse, reseat;
ca1d1d23 2965{
56256c2a 2966 Lisp_Object tail, prev;
4746118a 2967 Lisp_Object *data;
ca1d1d23
JB
2968 int i, len;
2969
abd0071c
KS
2970 if (!NILP (reseat))
2971 for (tail = reuse; CONSP (tail); tail = XCDR (tail))
2972 if (MARKERP (XCAR (tail)))
2973 {
51f10faa 2974 unchain_marker (XMARKER (XCAR (tail)));
abd0071c
KS
2975 XSETCAR (tail, Qnil);
2976 }
2977
daa37602 2978 if (NILP (last_thing_searched))
c36bcf1b 2979 return Qnil;
daa37602 2980
6bbd7a29
GM
2981 prev = Qnil;
2982
41c01205 2983 data = (Lisp_Object *) alloca ((2 * search_regs.num_regs + 1)
4746118a
JB
2984 * sizeof (Lisp_Object));
2985
41c01205 2986 len = 0;
4746118a 2987 for (i = 0; i < search_regs.num_regs; i++)
ca1d1d23
JB
2988 {
2989 int start = search_regs.start[i];
2990 if (start >= 0)
2991 {
56256c2a
RS
2992 if (EQ (last_thing_searched, Qt)
2993 || ! NILP (integers))
ca1d1d23 2994 {
c235cce7
KH
2995 XSETFASTINT (data[2 * i], start);
2996 XSETFASTINT (data[2 * i + 1], search_regs.end[i]);
ca1d1d23 2997 }
0ed62dc7 2998 else if (BUFFERP (last_thing_searched))
ca1d1d23
JB
2999 {
3000 data[2 * i] = Fmake_marker ();
daa37602
JB
3001 Fset_marker (data[2 * i],
3002 make_number (start),
3003 last_thing_searched);
ca1d1d23
JB
3004 data[2 * i + 1] = Fmake_marker ();
3005 Fset_marker (data[2 * i + 1],
177c0ea7 3006 make_number (search_regs.end[i]),
daa37602 3007 last_thing_searched);
ca1d1d23 3008 }
daa37602
JB
3009 else
3010 /* last_thing_searched must always be Qt, a buffer, or Qnil. */
3011 abort ();
3012
abd0071c 3013 len = 2 * i + 2;
ca1d1d23
JB
3014 }
3015 else
abd0071c 3016 data[2 * i] = data[2 * i + 1] = Qnil;
ca1d1d23 3017 }
56256c2a 3018
bd2cbd56 3019 if (BUFFERP (last_thing_searched) && !NILP (integers))
41c01205 3020 {
bd2cbd56 3021 data[len] = last_thing_searched;
41c01205
DK
3022 len++;
3023 }
3024
56256c2a
RS
3025 /* If REUSE is not usable, cons up the values and return them. */
3026 if (! CONSP (reuse))
41c01205 3027 return Flist (len, data);
56256c2a
RS
3028
3029 /* If REUSE is a list, store as many value elements as will fit
3030 into the elements of REUSE. */
3031 for (i = 0, tail = reuse; CONSP (tail);
c1d497be 3032 i++, tail = XCDR (tail))
56256c2a 3033 {
41c01205 3034 if (i < len)
f3fbd155 3035 XSETCAR (tail, data[i]);
56256c2a 3036 else
f3fbd155 3037 XSETCAR (tail, Qnil);
56256c2a
RS
3038 prev = tail;
3039 }
3040
3041 /* If we couldn't fit all value elements into REUSE,
3042 cons up the rest of them and add them to the end of REUSE. */
41c01205
DK
3043 if (i < len)
3044 XSETCDR (prev, Flist (len - i, data + i));
56256c2a
RS
3045
3046 return reuse;
ca1d1d23
JB
3047}
3048
b51d6c92
SM
3049/* We used to have an internal use variant of `reseat' described as:
3050
3051 If RESEAT is `evaporate', put the markers back on the free list
3052 immediately. No other references to the markers must exist in this
3053 case, so it is used only internally on the unwind stack and
3054 save-match-data from Lisp.
3055
3056 But it was ill-conceived: those supposedly-internal markers get exposed via
3057 the undo-list, so freeing them here is unsafe. */
ca1d1d23 3058
abd0071c 3059DEFUN ("set-match-data", Fset_match_data, Sset_match_data, 1, 2, 0,
8c1a1077 3060 doc: /* Set internal data on last search match from elements of LIST.
abd0071c
KS
3061LIST should have been created by calling `match-data' previously.
3062
51f10faa 3063If optional arg RESEAT is non-nil, make markers on LIST point nowhere. */)
abd0071c
KS
3064 (list, reseat)
3065 register Lisp_Object list, reseat;
ca1d1d23
JB
3066{
3067 register int i;
3068 register Lisp_Object marker;
3069
7074fde6
FP
3070 if (running_asynch_code)
3071 save_search_regs ();
3072
29100cea 3073 CHECK_LIST (list);
ca1d1d23 3074
41c01205
DK
3075 /* Unless we find a marker with a buffer or an explicit buffer
3076 in LIST, assume that this match data came from a string. */
daa37602
JB
3077 last_thing_searched = Qt;
3078
4746118a
JB
3079 /* Allocate registers if they don't already exist. */
3080 {
d084e942 3081 int length = XFASTINT (Flength (list)) / 2;
4746118a
JB
3082
3083 if (length > search_regs.num_regs)
3084 {
1113d9db
JB
3085 if (search_regs.num_regs == 0)
3086 {
3087 search_regs.start
3088 = (regoff_t *) xmalloc (length * sizeof (regoff_t));
3089 search_regs.end
3090 = (regoff_t *) xmalloc (length * sizeof (regoff_t));
3091 }
4746118a 3092 else
1113d9db
JB
3093 {
3094 search_regs.start
3095 = (regoff_t *) xrealloc (search_regs.start,
3096 length * sizeof (regoff_t));
3097 search_regs.end
3098 = (regoff_t *) xrealloc (search_regs.end,
3099 length * sizeof (regoff_t));
3100 }
4746118a 3101
e62371e9
KH
3102 for (i = search_regs.num_regs; i < length; i++)
3103 search_regs.start[i] = -1;
3104
487282dc 3105 search_regs.num_regs = length;
4746118a 3106 }
ca1d1d23 3107
abd0071c 3108 for (i = 0; CONSP (list); i++)
41c01205 3109 {
abd0071c 3110 marker = XCAR (list);
bd2cbd56 3111 if (BUFFERP (marker))
c3762cbd 3112 {
bd2cbd56 3113 last_thing_searched = marker;
c3762cbd
DK
3114 break;
3115 }
3116 if (i >= length)
3117 break;
41c01205
DK
3118 if (NILP (marker))
3119 {
3120 search_regs.start[i] = -1;
abd0071c 3121 list = XCDR (list);
41c01205
DK
3122 }
3123 else
3124 {
e7deaab0 3125 EMACS_INT from;
abd0071c 3126 Lisp_Object m;
e2811828 3127
abd0071c 3128 m = marker;
41c01205
DK
3129 if (MARKERP (marker))
3130 {
3131 if (XMARKER (marker)->buffer == 0)
3132 XSETFASTINT (marker, 0);
3133 else
3134 XSETBUFFER (last_thing_searched, XMARKER (marker)->buffer);
3135 }
e2811828 3136
41c01205
DK
3137 CHECK_NUMBER_COERCE_MARKER (marker);
3138 from = XINT (marker);
e2811828 3139
abd0071c
KS
3140 if (!NILP (reseat) && MARKERP (m))
3141 {
b51d6c92 3142 unchain_marker (XMARKER (m));
9ad54a7e 3143 XSETCAR (list, Qnil);
abd0071c
KS
3144 }
3145
3146 if ((list = XCDR (list), !CONSP (list)))
3147 break;
3148
3149 m = marker = XCAR (list);
3150
41c01205
DK
3151 if (MARKERP (marker) && XMARKER (marker)->buffer == 0)
3152 XSETFASTINT (marker, 0);
e2811828 3153
41c01205
DK
3154 CHECK_NUMBER_COERCE_MARKER (marker);
3155 search_regs.start[i] = from;
3156 search_regs.end[i] = XINT (marker);
abd0071c
KS
3157
3158 if (!NILP (reseat) && MARKERP (m))
3159 {
b51d6c92 3160 unchain_marker (XMARKER (m));
9ad54a7e 3161 XSETCAR (list, Qnil);
abd0071c 3162 }
41c01205 3163 }
abd0071c 3164 list = XCDR (list);
41c01205 3165 }
ca1d1d23 3166
41c01205
DK
3167 for (; i < search_regs.num_regs; i++)
3168 search_regs.start[i] = -1;
3169 }
ca1d1d23 3170
177c0ea7 3171 return Qnil;
ca1d1d23
JB
3172}
3173
7074fde6
FP
3174/* If non-zero the match data have been saved in saved_search_regs
3175 during the execution of a sentinel or filter. */
75ebf74b 3176static int search_regs_saved;
7074fde6 3177static struct re_registers saved_search_regs;
41c01205 3178static Lisp_Object saved_last_thing_searched;
7074fde6
FP
3179
3180/* Called from Flooking_at, Fstring_match, search_buffer, Fstore_match_data
3181 if asynchronous code (filter or sentinel) is running. */
3182static void
3183save_search_regs ()
3184{
3185 if (!search_regs_saved)
3186 {
3187 saved_search_regs.num_regs = search_regs.num_regs;
3188 saved_search_regs.start = search_regs.start;
3189 saved_search_regs.end = search_regs.end;
41c01205
DK
3190 saved_last_thing_searched = last_thing_searched;
3191 last_thing_searched = Qnil;
7074fde6 3192 search_regs.num_regs = 0;
2d4a771a
RS
3193 search_regs.start = 0;
3194 search_regs.end = 0;
7074fde6
FP
3195
3196 search_regs_saved = 1;
3197 }
3198}
3199
3200/* Called upon exit from filters and sentinels. */
3201void
abd0071c 3202restore_search_regs ()
7074fde6
FP
3203{
3204 if (search_regs_saved)
3205 {
3206 if (search_regs.num_regs > 0)
3207 {
3208 xfree (search_regs.start);
3209 xfree (search_regs.end);
3210 }
3211 search_regs.num_regs = saved_search_regs.num_regs;
3212 search_regs.start = saved_search_regs.start;
3213 search_regs.end = saved_search_regs.end;
41c01205
DK
3214 last_thing_searched = saved_last_thing_searched;
3215 saved_last_thing_searched = Qnil;
7074fde6
FP
3216 search_regs_saved = 0;
3217 }
3218}
3219
abd0071c
KS
3220static Lisp_Object
3221unwind_set_match_data (list)
3222 Lisp_Object list;
3223{
b51d6c92
SM
3224 /* It is NOT ALWAYS safe to free (evaporate) the markers immediately. */
3225 return Fset_match_data (list, Qt);
abd0071c
KS
3226}
3227
3228/* Called to unwind protect the match data. */
3229void
3230record_unwind_save_match_data ()
3231{
3232 record_unwind_protect (unwind_set_match_data,
3233 Fmatch_data (Qnil, Qnil, Qnil));
3234}
3235
ca1d1d23
JB
3236/* Quote a string to inactivate reg-expr chars */
3237
3238DEFUN ("regexp-quote", Fregexp_quote, Sregexp_quote, 1, 1, 0,
8c1a1077
PJ
3239 doc: /* Return a regexp string which matches exactly STRING and nothing else. */)
3240 (string)
5806161b 3241 Lisp_Object string;
ca1d1d23
JB
3242{
3243 register unsigned char *in, *out, *end;
3244 register unsigned char *temp;
0c8533c6 3245 int backslashes_added = 0;
ca1d1d23 3246
b7826503 3247 CHECK_STRING (string);
ca1d1d23 3248
d5db4077 3249 temp = (unsigned char *) alloca (SBYTES (string) * 2);
ca1d1d23
JB
3250
3251 /* Now copy the data into the new string, inserting escapes. */
3252
d5db4077
KR
3253 in = SDATA (string);
3254 end = in + SBYTES (string);
177c0ea7 3255 out = temp;
ca1d1d23
JB
3256
3257 for (; in != end; in++)
3258 {
66bc6082 3259 if (*in == '['
ca1d1d23
JB
3260 || *in == '*' || *in == '.' || *in == '\\'
3261 || *in == '?' || *in == '+'
3262 || *in == '^' || *in == '$')
0c8533c6 3263 *out++ = '\\', backslashes_added++;
ca1d1d23
JB
3264 *out++ = *in;
3265 }
3266
3f8100f1 3267 return make_specified_string (temp,
d5db4077 3268 SCHARS (string) + backslashes_added,
3f8100f1
RS
3269 out - temp,
3270 STRING_MULTIBYTE (string));
ca1d1d23 3271}
177c0ea7 3272\f
dfcf069d 3273void
ca1d1d23
JB
3274syms_of_search ()
3275{
3276 register int i;
3277
487282dc
KH
3278 for (i = 0; i < REGEXP_CACHE_SIZE; ++i)
3279 {
3280 searchbufs[i].buf.allocated = 100;
b23c0a83 3281 searchbufs[i].buf.buffer = (unsigned char *) xmalloc (100);
487282dc
KH
3282 searchbufs[i].buf.fastmap = searchbufs[i].fastmap;
3283 searchbufs[i].regexp = Qnil;
ecdb561e 3284 searchbufs[i].whitespace_regexp = Qnil;
b69e3c18 3285 searchbufs[i].syntax_table = Qnil;
487282dc 3286 staticpro (&searchbufs[i].regexp);
aa77b5ce 3287 staticpro (&searchbufs[i].whitespace_regexp);
b69e3c18 3288 staticpro (&searchbufs[i].syntax_table);
487282dc
KH
3289 searchbufs[i].next = (i == REGEXP_CACHE_SIZE-1 ? 0 : &searchbufs[i+1]);
3290 }
3291 searchbuf_head = &searchbufs[0];
ca1d1d23
JB
3292
3293 Qsearch_failed = intern ("search-failed");
3294 staticpro (&Qsearch_failed);
3295 Qinvalid_regexp = intern ("invalid-regexp");
3296 staticpro (&Qinvalid_regexp);
3297
3298 Fput (Qsearch_failed, Qerror_conditions,
3299 Fcons (Qsearch_failed, Fcons (Qerror, Qnil)));
3300 Fput (Qsearch_failed, Qerror_message,
3301 build_string ("Search failed"));
3302
3303 Fput (Qinvalid_regexp, Qerror_conditions,
3304 Fcons (Qinvalid_regexp, Fcons (Qerror, Qnil)));
3305 Fput (Qinvalid_regexp, Qerror_message,
3306 build_string ("Invalid regexp"));
3307
daa37602
JB
3308 last_thing_searched = Qnil;
3309 staticpro (&last_thing_searched);
3310
0f6af254
DK
3311 saved_last_thing_searched = Qnil;
3312 staticpro (&saved_last_thing_searched);
3313
41a33295 3314 DEFVAR_LISP ("search-spaces-regexp", &Vsearch_spaces_regexp,
e2811828 3315 doc: /* Regexp to substitute for bunches of spaces in regexp search.
f31a9a68
RS
3316Some commands use this for user-specified regexps.
3317Spaces that occur inside character classes or repetition operators
3318or other such regexp constructs are not replaced with this.
3319A value of nil (which is the normal value) means treat spaces literally. */);
41a33295 3320 Vsearch_spaces_regexp = Qnil;
f31a9a68 3321
ef887810
RS
3322 DEFVAR_LISP ("inhibit-changing-match-data", &Vinhibit_changing_match_data,
3323 doc: /* Internal use only.
39d0bf74
RS
3324If non-nil, the primitive searching and matching functions
3325such as `looking-at', `string-match', `re-search-forward', etc.,
3326do not set the match data. The proper way to use this variable
3327is to bind it with `let' around a small expression. */);
ef887810
RS
3328 Vinhibit_changing_match_data = Qnil;
3329
ca1d1d23 3330 defsubr (&Slooking_at);
b819a390
RS
3331 defsubr (&Sposix_looking_at);
3332 defsubr (&Sstring_match);
3333 defsubr (&Sposix_string_match);
ca1d1d23
JB
3334 defsubr (&Ssearch_forward);
3335 defsubr (&Ssearch_backward);
3336 defsubr (&Sword_search_forward);
3337 defsubr (&Sword_search_backward);
c6e0e1cc
CY
3338 defsubr (&Sword_search_forward_lax);
3339 defsubr (&Sword_search_backward_lax);
ca1d1d23
JB
3340 defsubr (&Sre_search_forward);
3341 defsubr (&Sre_search_backward);
b819a390
RS
3342 defsubr (&Sposix_search_forward);
3343 defsubr (&Sposix_search_backward);
ca1d1d23
JB
3344 defsubr (&Sreplace_match);
3345 defsubr (&Smatch_beginning);
3346 defsubr (&Smatch_end);
3347 defsubr (&Smatch_data);
3f1c005b 3348 defsubr (&Sset_match_data);
ca1d1d23
JB
3349 defsubr (&Sregexp_quote);
3350}
76fc1ea2
KH
3351
3352/* arch-tag: a6059d79-0552-4f14-a2cb-d379a4e3c78f
3353 (do not change this comment) */