I spell gud.
[bpt/emacs.git] / src / search.c
CommitLineData
ca1d1d23 1/* String search routines for GNU Emacs.
429ab54e 2 Copyright (C) 1985, 1986, 1987, 1993, 1994, 1997, 1998, 1999, 2001, 2002,
76b6f707 3 2003, 2004, 2005, 2006, 2007, 2008, 2009
8cabe764 4 Free Software Foundation, Inc.
ca1d1d23
JB
5
6This file is part of GNU Emacs.
7
9ec0b715 8GNU Emacs is free software: you can redistribute it and/or modify
ca1d1d23 9it under the terms of the GNU General Public License as published by
9ec0b715
GM
10the Free Software Foundation, either version 3 of the License, or
11(at your option) any later version.
ca1d1d23
JB
12
13GNU Emacs is distributed in the hope that it will be useful,
14but WITHOUT ANY WARRANTY; without even the implied warranty of
15MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16GNU General Public License for more details.
17
18You should have received a copy of the GNU General Public License
9ec0b715 19along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>. */
ca1d1d23
JB
20
21
18160b98 22#include <config.h>
ca1d1d23
JB
23#include "lisp.h"
24#include "syntax.h"
5679531d 25#include "category.h"
ca1d1d23 26#include "buffer.h"
76eb0881 27#include "character.h"
89ad6492 28#include "charset.h"
9169c321 29#include "region-cache.h"
ca1d1d23 30#include "commands.h"
9ac0d9e0 31#include "blockinput.h"
bf1760bb 32#include "intervals.h"
4746118a 33
ca1d1d23
JB
34#include <sys/types.h>
35#include "regex.h"
36
1d288aef 37#define REGEXP_CACHE_SIZE 20
ca1d1d23 38
487282dc
KH
39/* If the regexp is non-nil, then the buffer contains the compiled form
40 of that regexp, suitable for searching. */
1d288aef
RS
41struct regexp_cache
42{
487282dc 43 struct regexp_cache *next;
ecdb561e 44 Lisp_Object regexp, whitespace_regexp;
b69e3c18 45 /* Syntax table for which the regexp applies. We need this because
58e95211
SM
46 of character classes. If this is t, then the compiled pattern is valid
47 for any syntax-table. */
b69e3c18 48 Lisp_Object syntax_table;
487282dc
KH
49 struct re_pattern_buffer buf;
50 char fastmap[0400];
b819a390
RS
51 /* Nonzero means regexp was compiled to do full POSIX backtracking. */
52 char posix;
487282dc 53};
ca1d1d23 54
487282dc
KH
55/* The instances of that struct. */
56struct regexp_cache searchbufs[REGEXP_CACHE_SIZE];
ca1d1d23 57
487282dc
KH
58/* The head of the linked list; points to the most recently used buffer. */
59struct regexp_cache *searchbuf_head;
ca1d1d23 60
ca1d1d23 61
4746118a
JB
62/* Every call to re_match, etc., must pass &search_regs as the regs
63 argument unless you can show it is unnecessary (i.e., if re_match
64 is certainly going to be called again before region-around-match
65 can be called).
66
67 Since the registers are now dynamically allocated, we need to make
68 sure not to refer to the Nth register before checking that it has
1113d9db
JB
69 been allocated by checking search_regs.num_regs.
70
71 The regex code keeps track of whether it has allocated the search
487282dc
KH
72 buffer using bits in the re_pattern_buffer. This means that whenever
73 you compile a new pattern, it completely forgets whether it has
1113d9db
JB
74 allocated any registers, and will allocate new registers the next
75 time you call a searching or matching function. Therefore, we need
76 to call re_set_registers after compiling a new pattern or after
77 setting the match registers, so that the regex functions will be
78 able to free or re-allocate it properly. */
ca1d1d23
JB
79static struct re_registers search_regs;
80
daa37602
JB
81/* The buffer in which the last search was performed, or
82 Qt if the last search was done in a string;
83 Qnil if no searching has been done yet. */
84static Lisp_Object last_thing_searched;
ca1d1d23 85
8e6208c5 86/* error condition signaled when regexp compile_pattern fails */
ca1d1d23
JB
87
88Lisp_Object Qinvalid_regexp;
89
06f77a8a
KS
90/* Error condition used for failing searches */
91Lisp_Object Qsearch_failed;
92
41a33295 93Lisp_Object Vsearch_spaces_regexp;
f31a9a68 94
ef887810
RS
95/* If non-nil, the match data will not be changed during call to
96 searching or matching functions. This variable is for internal use
97 only. */
98Lisp_Object Vinhibit_changing_match_data;
99
e7deaab0
AS
100static void set_search_regs P_ ((EMACS_INT, EMACS_INT));
101static void save_search_regs P_ ((void));
102static EMACS_INT simple_search P_ ((int, unsigned char *, int, int,
103 Lisp_Object, EMACS_INT, EMACS_INT,
104 EMACS_INT, EMACS_INT));
105static EMACS_INT boyer_moore P_ ((int, unsigned char *, int, int,
106 Lisp_Object, Lisp_Object,
107 EMACS_INT, EMACS_INT,
108 EMACS_INT, EMACS_INT, int));
109static EMACS_INT search_buffer P_ ((Lisp_Object, EMACS_INT, EMACS_INT,
110 EMACS_INT, EMACS_INT, int, int,
111 Lisp_Object, Lisp_Object, int));
01d09305 112static void matcher_overflow () NO_RETURN;
b819a390 113
ca1d1d23
JB
114static void
115matcher_overflow ()
116{
117 error ("Stack overflow in regexp matcher");
118}
119
b819a390
RS
120/* Compile a regexp and signal a Lisp error if anything goes wrong.
121 PATTERN is the pattern to compile.
122 CP is the place to put the result.
facdc750 123 TRANSLATE is a translation table for ignoring case, or nil for none.
b819a390
RS
124 REGP is the structure that says where to store the "register"
125 values that will result from matching this pattern.
126 If it is 0, we should compile the pattern not to record any
127 subexpression bounds.
128 POSIX is nonzero if we want full backtracking (POSIX style)
5679531d 129 for this pattern. 0 means backtrack only enough to get a valid match.
bbc73b48
KH
130
131 The behavior also depends on Vsearch_spaces_regexp. */
ca1d1d23 132
487282dc 133static void
89ad6492 134compile_pattern_1 (cp, pattern, translate, regp, posix)
487282dc 135 struct regexp_cache *cp;
ca1d1d23 136 Lisp_Object pattern;
facdc750 137 Lisp_Object translate;
487282dc 138 struct re_registers *regp;
b819a390 139 int posix;
ca1d1d23 140{
d451e4db 141 char *val;
b819a390 142 reg_syntax_t old;
ca1d1d23 143
487282dc 144 cp->regexp = Qnil;
59fab369 145 cp->buf.translate = (! NILP (translate) ? translate : make_number (0));
b819a390 146 cp->posix = posix;
93daa011 147 cp->buf.multibyte = STRING_MULTIBYTE (pattern);
89ad6492 148 cp->buf.charset_unibyte = charset_unibyte;
d8c85250
CY
149 if (STRINGP (Vsearch_spaces_regexp))
150 cp->whitespace_regexp = Vsearch_spaces_regexp;
151 else
152 cp->whitespace_regexp = Qnil;
153
e7b2dd2e
RS
154 /* rms: I think BLOCK_INPUT is not needed here any more,
155 because regex.c defines malloc to call xmalloc.
156 Using BLOCK_INPUT here means the debugger won't run if an error occurs.
157 So let's turn it off. */
158 /* BLOCK_INPUT; */
fb4a568d 159 old = re_set_syntax (RE_SYNTAX_EMACS
b819a390 160 | (posix ? 0 : RE_NO_POSIX_BACKTRACKING));
d8c85250
CY
161
162 if (STRINGP (Vsearch_spaces_regexp))
163 re_set_whitespace_regexp (SDATA (Vsearch_spaces_regexp));
164 else
165 re_set_whitespace_regexp (NULL);
bbc73b48 166
8f924df7
KH
167 val = (char *) re_compile_pattern ((char *) SDATA (pattern),
168 SBYTES (pattern), &cp->buf);
bbc73b48 169
58e95211
SM
170 /* If the compiled pattern hard codes some of the contents of the
171 syntax-table, it can only be reused with *this* syntax table. */
172 cp->syntax_table = cp->buf.used_syntax ? current_buffer->syntax_table : Qt;
173
bbc73b48
KH
174 re_set_whitespace_regexp (NULL);
175
b819a390 176 re_set_syntax (old);
e7b2dd2e 177 /* UNBLOCK_INPUT; */
ca1d1d23 178 if (val)
06f77a8a 179 xsignal1 (Qinvalid_regexp, build_string (val));
1113d9db 180
487282dc 181 cp->regexp = Fcopy_sequence (pattern);
487282dc
KH
182}
183
6efc7887
RS
184/* Shrink each compiled regexp buffer in the cache
185 to the size actually used right now.
186 This is called from garbage collection. */
187
188void
189shrink_regexp_cache ()
190{
a968f437 191 struct regexp_cache *cp;
6efc7887
RS
192
193 for (cp = searchbuf_head; cp != 0; cp = cp->next)
194 {
195 cp->buf.allocated = cp->buf.used;
196 cp->buf.buffer
b23c0a83 197 = (unsigned char *) xrealloc (cp->buf.buffer, cp->buf.used);
6efc7887
RS
198 }
199}
200
54dd3310
SM
201/* Clear the regexp cache w.r.t. a particular syntax table,
202 because it was changed.
e5b94d44
CY
203 There is no danger of memory leak here because re_compile_pattern
204 automagically manages the memory in each re_pattern_buffer struct,
205 based on its `allocated' and `buffer' values. */
206void
207clear_regexp_cache ()
208{
209 int i;
210
211 for (i = 0; i < REGEXP_CACHE_SIZE; ++i)
920fd1fc
EZ
212 /* It's tempting to compare with the syntax-table we've actually changed,
213 but it's not sufficient because char-table inheritance means that
54dd3310
SM
214 modifying one syntax-table can change others at the same time. */
215 if (!EQ (searchbufs[i].syntax_table, Qt))
216 searchbufs[i].regexp = Qnil;
e5b94d44
CY
217}
218
487282dc 219/* Compile a regexp if necessary, but first check to see if there's one in
b819a390
RS
220 the cache.
221 PATTERN is the pattern to compile.
facdc750 222 TRANSLATE is a translation table for ignoring case, or nil for none.
b819a390
RS
223 REGP is the structure that says where to store the "register"
224 values that will result from matching this pattern.
225 If it is 0, we should compile the pattern not to record any
226 subexpression bounds.
227 POSIX is nonzero if we want full backtracking (POSIX style)
228 for this pattern. 0 means backtrack only enough to get a valid match. */
487282dc
KH
229
230struct re_pattern_buffer *
0c8533c6 231compile_pattern (pattern, regp, translate, posix, multibyte)
487282dc
KH
232 Lisp_Object pattern;
233 struct re_registers *regp;
facdc750 234 Lisp_Object translate;
0c8533c6 235 int posix, multibyte;
487282dc
KH
236{
237 struct regexp_cache *cp, **cpp;
238
239 for (cpp = &searchbuf_head; ; cpp = &cp->next)
240 {
241 cp = *cpp;
f1b9c7c1
KR
242 /* Entries are initialized to nil, and may be set to nil by
243 compile_pattern_1 if the pattern isn't valid. Don't apply
49a5f770
KR
244 string accessors in those cases. However, compile_pattern_1
245 is only applied to the cache entry we pick here to reuse. So
246 nil should never appear before a non-nil entry. */
7c752c80 247 if (NILP (cp->regexp))
f1b9c7c1 248 goto compile_it;
d5db4077 249 if (SCHARS (cp->regexp) == SCHARS (pattern)
cf69b13e 250 && STRING_MULTIBYTE (cp->regexp) == STRING_MULTIBYTE (pattern)
1d288aef 251 && !NILP (Fstring_equal (cp->regexp, pattern))
59fab369 252 && EQ (cp->buf.translate, (! NILP (translate) ? translate : make_number (0)))
5679531d 253 && cp->posix == posix
58e95211
SM
254 && (EQ (cp->syntax_table, Qt)
255 || EQ (cp->syntax_table, current_buffer->syntax_table))
89ad6492
KH
256 && !NILP (Fequal (cp->whitespace_regexp, Vsearch_spaces_regexp))
257 && cp->buf.charset_unibyte == charset_unibyte)
487282dc
KH
258 break;
259
f1b9c7c1
KR
260 /* If we're at the end of the cache, compile into the nil cell
261 we found, or the last (least recently used) cell with a
262 string value. */
487282dc
KH
263 if (cp->next == 0)
264 {
f1b9c7c1 265 compile_it:
89ad6492 266 compile_pattern_1 (cp, pattern, translate, regp, posix);
487282dc
KH
267 break;
268 }
269 }
270
271 /* When we get here, cp (aka *cpp) contains the compiled pattern,
272 either because we found it in the cache or because we just compiled it.
273 Move it to the front of the queue to mark it as most recently used. */
274 *cpp = cp->next;
275 cp->next = searchbuf_head;
276 searchbuf_head = cp;
1113d9db 277
6639708c
RS
278 /* Advise the searching functions about the space we have allocated
279 for register data. */
280 if (regp)
281 re_set_registers (&cp->buf, regp, regp->num_regs, regp->start, regp->end);
282
89ad6492
KH
283 /* The compiled pattern can be used both for mulitbyte and unibyte
284 target. But, we have to tell which the pattern is used for. */
285 cp->buf.target_multibyte = multibyte;
286
487282dc 287 return &cp->buf;
ca1d1d23
JB
288}
289
ca1d1d23 290\f
b819a390
RS
291static Lisp_Object
292looking_at_1 (string, posix)
ca1d1d23 293 Lisp_Object string;
b819a390 294 int posix;
ca1d1d23
JB
295{
296 Lisp_Object val;
297 unsigned char *p1, *p2;
e7deaab0 298 EMACS_INT s1, s2;
ca1d1d23 299 register int i;
487282dc 300 struct re_pattern_buffer *bufp;
ca1d1d23 301
7074fde6
FP
302 if (running_asynch_code)
303 save_search_regs ();
304
910c747a
RS
305 /* This is so set_image_of_range_1 in regex.c can find the EQV table. */
306 XCHAR_TABLE (current_buffer->case_canon_table)->extras[2]
307 = current_buffer->case_eqv_table;
308
b7826503 309 CHECK_STRING (string);
ef887810
RS
310 bufp = compile_pattern (string,
311 (NILP (Vinhibit_changing_match_data)
312 ? &search_regs : NULL),
487282dc 313 (!NILP (current_buffer->case_fold_search)
0190922f 314 ? current_buffer->case_canon_table : Qnil),
0c8533c6
RS
315 posix,
316 !NILP (current_buffer->enable_multibyte_characters));
ca1d1d23
JB
317
318 immediate_quit = 1;
319 QUIT; /* Do a pending quit right away, to avoid paradoxical behavior */
320
321 /* Get pointers and sizes of the two strings
322 that make up the visible portion of the buffer. */
323
324 p1 = BEGV_ADDR;
fa8ed3e0 325 s1 = GPT_BYTE - BEGV_BYTE;
ca1d1d23 326 p2 = GAP_END_ADDR;
fa8ed3e0 327 s2 = ZV_BYTE - GPT_BYTE;
ca1d1d23
JB
328 if (s1 < 0)
329 {
330 p2 = p1;
fa8ed3e0 331 s2 = ZV_BYTE - BEGV_BYTE;
ca1d1d23
JB
332 s1 = 0;
333 }
334 if (s2 < 0)
335 {
fa8ed3e0 336 s1 = ZV_BYTE - BEGV_BYTE;
ca1d1d23
JB
337 s2 = 0;
338 }
8bb43c28
RS
339
340 re_match_object = Qnil;
177c0ea7 341
487282dc 342 i = re_match_2 (bufp, (char *) p1, s1, (char *) p2, s2,
ef887810
RS
343 PT_BYTE - BEGV_BYTE,
344 (NILP (Vinhibit_changing_match_data)
345 ? &search_regs : NULL),
fa8ed3e0 346 ZV_BYTE - BEGV_BYTE);
de182d70 347 immediate_quit = 0;
177c0ea7 348
ca1d1d23
JB
349 if (i == -2)
350 matcher_overflow ();
351
352 val = (0 <= i ? Qt : Qnil);
ef887810 353 if (NILP (Vinhibit_changing_match_data) && i >= 0)
fa8ed3e0
RS
354 for (i = 0; i < search_regs.num_regs; i++)
355 if (search_regs.start[i] >= 0)
356 {
357 search_regs.start[i]
358 = BYTE_TO_CHAR (search_regs.start[i] + BEGV_BYTE);
359 search_regs.end[i]
360 = BYTE_TO_CHAR (search_regs.end[i] + BEGV_BYTE);
361 }
ef887810
RS
362
363 /* Set last_thing_searched only when match data is changed. */
364 if (NILP (Vinhibit_changing_match_data))
365 XSETBUFFER (last_thing_searched, current_buffer);
366
ca1d1d23
JB
367 return val;
368}
369
b819a390 370DEFUN ("looking-at", Flooking_at, Slooking_at, 1, 1, 0,
8c1a1077
PJ
371 doc: /* Return t if text after point matches regular expression REGEXP.
372This function modifies the match data that `match-beginning',
373`match-end' and `match-data' access; save and restore the match
374data if you want to preserve them. */)
375 (regexp)
94f94972 376 Lisp_Object regexp;
b819a390 377{
94f94972 378 return looking_at_1 (regexp, 0);
b819a390
RS
379}
380
381DEFUN ("posix-looking-at", Fposix_looking_at, Sposix_looking_at, 1, 1, 0,
8c1a1077
PJ
382 doc: /* Return t if text after point matches regular expression REGEXP.
383Find the longest match, in accord with Posix regular expression rules.
384This function modifies the match data that `match-beginning',
385`match-end' and `match-data' access; save and restore the match
386data if you want to preserve them. */)
387 (regexp)
94f94972 388 Lisp_Object regexp;
b819a390 389{
94f94972 390 return looking_at_1 (regexp, 1);
b819a390
RS
391}
392\f
393static Lisp_Object
394string_match_1 (regexp, string, start, posix)
ca1d1d23 395 Lisp_Object regexp, string, start;
b819a390 396 int posix;
ca1d1d23
JB
397{
398 int val;
487282dc 399 struct re_pattern_buffer *bufp;
e7deaab0 400 EMACS_INT pos, pos_byte;
0c8533c6 401 int i;
ca1d1d23 402
7074fde6
FP
403 if (running_asynch_code)
404 save_search_regs ();
405
b7826503
PJ
406 CHECK_STRING (regexp);
407 CHECK_STRING (string);
ca1d1d23
JB
408
409 if (NILP (start))
0c8533c6 410 pos = 0, pos_byte = 0;
ca1d1d23
JB
411 else
412 {
d5db4077 413 int len = SCHARS (string);
ca1d1d23 414
b7826503 415 CHECK_NUMBER (start);
0c8533c6
RS
416 pos = XINT (start);
417 if (pos < 0 && -pos <= len)
418 pos = len + pos;
419 else if (0 > pos || pos > len)
ca1d1d23 420 args_out_of_range (string, start);
0c8533c6 421 pos_byte = string_char_to_byte (string, pos);
ca1d1d23
JB
422 }
423
910c747a
RS
424 /* This is so set_image_of_range_1 in regex.c can find the EQV table. */
425 XCHAR_TABLE (current_buffer->case_canon_table)->extras[2]
426 = current_buffer->case_eqv_table;
427
ef887810
RS
428 bufp = compile_pattern (regexp,
429 (NILP (Vinhibit_changing_match_data)
430 ? &search_regs : NULL),
487282dc 431 (!NILP (current_buffer->case_fold_search)
0190922f 432 ? current_buffer->case_canon_table : Qnil),
0c8533c6
RS
433 posix,
434 STRING_MULTIBYTE (string));
ca1d1d23 435 immediate_quit = 1;
8bb43c28 436 re_match_object = string;
177c0ea7 437
d5db4077
KR
438 val = re_search (bufp, (char *) SDATA (string),
439 SBYTES (string), pos_byte,
440 SBYTES (string) - pos_byte,
ef887810
RS
441 (NILP (Vinhibit_changing_match_data)
442 ? &search_regs : NULL));
ca1d1d23 443 immediate_quit = 0;
ef887810
RS
444
445 /* Set last_thing_searched only when match data is changed. */
446 if (NILP (Vinhibit_changing_match_data))
447 last_thing_searched = Qt;
448
ca1d1d23
JB
449 if (val == -2)
450 matcher_overflow ();
451 if (val < 0) return Qnil;
0c8533c6 452
ef887810
RS
453 if (NILP (Vinhibit_changing_match_data))
454 for (i = 0; i < search_regs.num_regs; i++)
455 if (search_regs.start[i] >= 0)
456 {
457 search_regs.start[i]
458 = string_byte_to_char (string, search_regs.start[i]);
459 search_regs.end[i]
460 = string_byte_to_char (string, search_regs.end[i]);
461 }
0c8533c6
RS
462
463 return make_number (string_byte_to_char (string, val));
ca1d1d23 464}
e59a8453 465
b819a390 466DEFUN ("string-match", Fstring_match, Sstring_match, 2, 3, 0,
8c1a1077 467 doc: /* Return index of start of first match for REGEXP in STRING, or nil.
b85acc4b 468Matching ignores case if `case-fold-search' is non-nil.
8c1a1077
PJ
469If third arg START is non-nil, start search at that index in STRING.
470For index of first char beyond the match, do (match-end 0).
471`match-end' and `match-beginning' also give indices of substrings
2bd2f32d
RS
472matched by parenthesis constructs in the pattern.
473
474You can use the function `match-string' to extract the substrings
475matched by the parenthesis constructions in REGEXP. */)
8c1a1077 476 (regexp, string, start)
b819a390
RS
477 Lisp_Object regexp, string, start;
478{
479 return string_match_1 (regexp, string, start, 0);
480}
481
482DEFUN ("posix-string-match", Fposix_string_match, Sposix_string_match, 2, 3, 0,
8c1a1077
PJ
483 doc: /* Return index of start of first match for REGEXP in STRING, or nil.
484Find the longest match, in accord with Posix regular expression rules.
485Case is ignored if `case-fold-search' is non-nil in the current buffer.
486If third arg START is non-nil, start search at that index in STRING.
487For index of first char beyond the match, do (match-end 0).
488`match-end' and `match-beginning' also give indices of substrings
489matched by parenthesis constructs in the pattern. */)
490 (regexp, string, start)
b819a390
RS
491 Lisp_Object regexp, string, start;
492{
493 return string_match_1 (regexp, string, start, 1);
494}
495
e59a8453
RS
496/* Match REGEXP against STRING, searching all of STRING,
497 and return the index of the match, or negative on failure.
498 This does not clobber the match data. */
499
500int
501fast_string_match (regexp, string)
502 Lisp_Object regexp, string;
503{
504 int val;
487282dc 505 struct re_pattern_buffer *bufp;
e59a8453 506
facdc750
RS
507 bufp = compile_pattern (regexp, 0, Qnil,
508 0, STRING_MULTIBYTE (string));
e59a8453 509 immediate_quit = 1;
8bb43c28 510 re_match_object = string;
177c0ea7 511
d5db4077
KR
512 val = re_search (bufp, (char *) SDATA (string),
513 SBYTES (string), 0,
514 SBYTES (string), 0);
e59a8453
RS
515 immediate_quit = 0;
516 return val;
517}
5679531d
KH
518
519/* Match REGEXP against STRING, searching all of STRING ignoring case,
520 and return the index of the match, or negative on failure.
0c8533c6
RS
521 This does not clobber the match data.
522 We assume that STRING contains single-byte characters. */
5679531d
KH
523
524extern Lisp_Object Vascii_downcase_table;
525
526int
b4577c63 527fast_c_string_match_ignore_case (regexp, string)
5679531d 528 Lisp_Object regexp;
96b80561 529 const char *string;
5679531d
KH
530{
531 int val;
532 struct re_pattern_buffer *bufp;
533 int len = strlen (string);
534
0c8533c6 535 regexp = string_make_unibyte (regexp);
b4577c63 536 re_match_object = Qt;
5679531d 537 bufp = compile_pattern (regexp, 0,
0190922f 538 Vascii_canon_table, 0,
f8bd51c4 539 0);
5679531d
KH
540 immediate_quit = 1;
541 val = re_search (bufp, string, len, 0, len, 0);
542 immediate_quit = 0;
543 return val;
544}
be5f4dfb
KH
545
546/* Like fast_string_match but ignore case. */
547
548int
549fast_string_match_ignore_case (regexp, string)
550 Lisp_Object regexp, string;
551{
552 int val;
553 struct re_pattern_buffer *bufp;
554
0190922f 555 bufp = compile_pattern (regexp, 0, Vascii_canon_table,
be5f4dfb
KH
556 0, STRING_MULTIBYTE (string));
557 immediate_quit = 1;
558 re_match_object = string;
559
560 val = re_search (bufp, (char *) SDATA (string),
561 SBYTES (string), 0,
562 SBYTES (string), 0);
563 immediate_quit = 0;
564 return val;
565}
ca1d1d23 566\f
a80ce213 567/* Match REGEXP against the characters after POS to LIMIT, and return
8b13507a
KH
568 the number of matched characters. If STRING is non-nil, match
569 against the characters in it. In that case, POS and LIMIT are
570 indices into the string. This function doesn't modify the match
571 data. */
572
573EMACS_INT
574fast_looking_at (regexp, pos, pos_byte, limit, limit_byte, string)
575 Lisp_Object regexp;
576 EMACS_INT pos, pos_byte, limit, limit_byte;
577 Lisp_Object string;
578{
579 int multibyte;
580 struct re_pattern_buffer *buf;
581 unsigned char *p1, *p2;
e7deaab0 582 EMACS_INT s1, s2;
8b13507a
KH
583 EMACS_INT len;
584
585 if (STRINGP (string))
586 {
587 if (pos_byte < 0)
588 pos_byte = string_char_to_byte (string, pos);
589 if (limit_byte < 0)
590 limit_byte = string_char_to_byte (string, limit);
591 p1 = NULL;
592 s1 = 0;
593 p2 = SDATA (string);
594 s2 = SBYTES (string);
595 re_match_object = string;
596 multibyte = STRING_MULTIBYTE (string);
597 }
598 else
599 {
600 if (pos_byte < 0)
601 pos_byte = CHAR_TO_BYTE (pos);
602 if (limit_byte < 0)
603 limit_byte = CHAR_TO_BYTE (limit);
604 pos_byte -= BEGV_BYTE;
605 limit_byte -= BEGV_BYTE;
606 p1 = BEGV_ADDR;
607 s1 = GPT_BYTE - BEGV_BYTE;
608 p2 = GAP_END_ADDR;
609 s2 = ZV_BYTE - GPT_BYTE;
610 if (s1 < 0)
611 {
612 p2 = p1;
613 s2 = ZV_BYTE - BEGV_BYTE;
614 s1 = 0;
615 }
616 if (s2 < 0)
617 {
618 s1 = ZV_BYTE - BEGV_BYTE;
619 s2 = 0;
620 }
621 re_match_object = Qnil;
622 multibyte = ! NILP (current_buffer->enable_multibyte_characters);
623 }
624
625 buf = compile_pattern (regexp, 0, Qnil, 0, multibyte);
626 immediate_quit = 1;
627 len = re_match_2 (buf, (char *) p1, s1, (char *) p2, s2,
628 pos_byte, NULL, limit_byte);
629 immediate_quit = 0;
630
631 return len;
632}
633
634\f
9169c321
JB
635/* The newline cache: remembering which sections of text have no newlines. */
636
637/* If the user has requested newline caching, make sure it's on.
638 Otherwise, make sure it's off.
639 This is our cheezy way of associating an action with the change of
640 state of a buffer-local variable. */
641static void
642newline_cache_on_off (buf)
643 struct buffer *buf;
644{
645 if (NILP (buf->cache_long_line_scans))
646 {
647 /* It should be off. */
648 if (buf->newline_cache)
649 {
650 free_region_cache (buf->newline_cache);
651 buf->newline_cache = 0;
652 }
653 }
654 else
655 {
656 /* It should be on. */
657 if (buf->newline_cache == 0)
658 buf->newline_cache = new_region_cache ();
659 }
660}
661
662\f
663/* Search for COUNT instances of the character TARGET between START and END.
664
665 If COUNT is positive, search forwards; END must be >= START.
666 If COUNT is negative, search backwards for the -COUNTth instance;
667 END must be <= START.
668 If COUNT is zero, do anything you please; run rogue, for all I care.
669
670 If END is zero, use BEGV or ZV instead, as appropriate for the
671 direction indicated by COUNT.
ffd56f97
JB
672
673 If we find COUNT instances, set *SHORTAGE to zero, and return the
a9f2a45f 674 position past the COUNTth match. Note that for reverse motion
5bfe95c9 675 this is not the same as the usual convention for Emacs motion commands.
ffd56f97 676
9169c321
JB
677 If we don't find COUNT instances before reaching END, set *SHORTAGE
678 to the number of TARGETs left unfound, and return END.
ffd56f97 679
087a5f81
RS
680 If ALLOW_QUIT is non-zero, set immediate_quit. That's good to do
681 except when inside redisplay. */
682
dfcf069d 683int
9169c321
JB
684scan_buffer (target, start, end, count, shortage, allow_quit)
685 register int target;
e7deaab0 686 EMACS_INT start, end;
9169c321
JB
687 int count;
688 int *shortage;
087a5f81 689 int allow_quit;
ca1d1d23 690{
9169c321 691 struct region_cache *newline_cache;
177c0ea7 692 int direction;
ffd56f97 693
9169c321
JB
694 if (count > 0)
695 {
696 direction = 1;
697 if (! end) end = ZV;
698 }
699 else
700 {
701 direction = -1;
702 if (! end) end = BEGV;
703 }
ffd56f97 704
9169c321
JB
705 newline_cache_on_off (current_buffer);
706 newline_cache = current_buffer->newline_cache;
ca1d1d23
JB
707
708 if (shortage != 0)
709 *shortage = 0;
710
087a5f81 711 immediate_quit = allow_quit;
ca1d1d23 712
ffd56f97 713 if (count > 0)
9169c321 714 while (start != end)
ca1d1d23 715 {
9169c321
JB
716 /* Our innermost scanning loop is very simple; it doesn't know
717 about gaps, buffer ends, or the newline cache. ceiling is
718 the position of the last character before the next such
719 obstacle --- the last character the dumb search loop should
720 examine. */
e7deaab0
AS
721 EMACS_INT ceiling_byte = CHAR_TO_BYTE (end) - 1;
722 EMACS_INT start_byte = CHAR_TO_BYTE (start);
723 EMACS_INT tem;
9169c321
JB
724
725 /* If we're looking for a newline, consult the newline cache
726 to see where we can avoid some scanning. */
727 if (target == '\n' && newline_cache)
728 {
729 int next_change;
730 immediate_quit = 0;
731 while (region_cache_forward
fa8ed3e0
RS
732 (current_buffer, newline_cache, start_byte, &next_change))
733 start_byte = next_change;
cbe0db0d 734 immediate_quit = allow_quit;
9169c321 735
fa8ed3e0
RS
736 /* START should never be after END. */
737 if (start_byte > ceiling_byte)
738 start_byte = ceiling_byte;
9169c321
JB
739
740 /* Now the text after start is an unknown region, and
741 next_change is the position of the next known region. */
fa8ed3e0 742 ceiling_byte = min (next_change - 1, ceiling_byte);
9169c321
JB
743 }
744
745 /* The dumb loop can only scan text stored in contiguous
746 bytes. BUFFER_CEILING_OF returns the last character
747 position that is contiguous, so the ceiling is the
748 position after that. */
67ce527d
KH
749 tem = BUFFER_CEILING_OF (start_byte);
750 ceiling_byte = min (tem, ceiling_byte);
9169c321
JB
751
752 {
177c0ea7 753 /* The termination address of the dumb loop. */
fa8ed3e0
RS
754 register unsigned char *ceiling_addr
755 = BYTE_POS_ADDR (ceiling_byte) + 1;
756 register unsigned char *cursor
757 = BYTE_POS_ADDR (start_byte);
9169c321
JB
758 unsigned char *base = cursor;
759
760 while (cursor < ceiling_addr)
761 {
762 unsigned char *scan_start = cursor;
763
764 /* The dumb loop. */
765 while (*cursor != target && ++cursor < ceiling_addr)
766 ;
767
768 /* If we're looking for newlines, cache the fact that
769 the region from start to cursor is free of them. */
770 if (target == '\n' && newline_cache)
771 know_region_cache (current_buffer, newline_cache,
fa8ed3e0
RS
772 start_byte + scan_start - base,
773 start_byte + cursor - base);
9169c321
JB
774
775 /* Did we find the target character? */
776 if (cursor < ceiling_addr)
777 {
778 if (--count == 0)
779 {
780 immediate_quit = 0;
fa8ed3e0 781 return BYTE_TO_CHAR (start_byte + cursor - base + 1);
9169c321
JB
782 }
783 cursor++;
784 }
785 }
786
fa8ed3e0 787 start = BYTE_TO_CHAR (start_byte + cursor - base);
9169c321 788 }
ca1d1d23
JB
789 }
790 else
9169c321
JB
791 while (start > end)
792 {
793 /* The last character to check before the next obstacle. */
e7deaab0
AS
794 EMACS_INT ceiling_byte = CHAR_TO_BYTE (end);
795 EMACS_INT start_byte = CHAR_TO_BYTE (start);
796 EMACS_INT tem;
9169c321
JB
797
798 /* Consult the newline cache, if appropriate. */
799 if (target == '\n' && newline_cache)
800 {
801 int next_change;
802 immediate_quit = 0;
803 while (region_cache_backward
fa8ed3e0
RS
804 (current_buffer, newline_cache, start_byte, &next_change))
805 start_byte = next_change;
cbe0db0d 806 immediate_quit = allow_quit;
9169c321
JB
807
808 /* Start should never be at or before end. */
fa8ed3e0
RS
809 if (start_byte <= ceiling_byte)
810 start_byte = ceiling_byte + 1;
9169c321
JB
811
812 /* Now the text before start is an unknown region, and
813 next_change is the position of the next known region. */
fa8ed3e0 814 ceiling_byte = max (next_change, ceiling_byte);
9169c321
JB
815 }
816
817 /* Stop scanning before the gap. */
67ce527d
KH
818 tem = BUFFER_FLOOR_OF (start_byte - 1);
819 ceiling_byte = max (tem, ceiling_byte);
9169c321
JB
820
821 {
822 /* The termination address of the dumb loop. */
fa8ed3e0
RS
823 register unsigned char *ceiling_addr = BYTE_POS_ADDR (ceiling_byte);
824 register unsigned char *cursor = BYTE_POS_ADDR (start_byte - 1);
9169c321
JB
825 unsigned char *base = cursor;
826
827 while (cursor >= ceiling_addr)
828 {
829 unsigned char *scan_start = cursor;
830
831 while (*cursor != target && --cursor >= ceiling_addr)
832 ;
833
834 /* If we're looking for newlines, cache the fact that
835 the region from after the cursor to start is free of them. */
836 if (target == '\n' && newline_cache)
837 know_region_cache (current_buffer, newline_cache,
fa8ed3e0
RS
838 start_byte + cursor - base,
839 start_byte + scan_start - base);
9169c321
JB
840
841 /* Did we find the target character? */
842 if (cursor >= ceiling_addr)
843 {
844 if (++count >= 0)
845 {
846 immediate_quit = 0;
fa8ed3e0 847 return BYTE_TO_CHAR (start_byte + cursor - base);
9169c321
JB
848 }
849 cursor--;
850 }
851 }
852
fa8ed3e0 853 start = BYTE_TO_CHAR (start_byte + cursor - base);
9169c321
JB
854 }
855 }
856
ca1d1d23
JB
857 immediate_quit = 0;
858 if (shortage != 0)
ffd56f97 859 *shortage = count * direction;
9169c321 860 return start;
ca1d1d23 861}
fa8ed3e0
RS
862\f
863/* Search for COUNT instances of a line boundary, which means either a
864 newline or (if selective display enabled) a carriage return.
865 Start at START. If COUNT is negative, search backwards.
866
867 We report the resulting position by calling TEMP_SET_PT_BOTH.
868
869 If we find COUNT instances. we position after (always after,
870 even if scanning backwards) the COUNTth match, and return 0.
871
872 If we don't find COUNT instances before reaching the end of the
873 buffer (or the beginning, if scanning backwards), we return
874 the number of line boundaries left unfound, and position at
875 the limit we bumped up against.
876
877 If ALLOW_QUIT is non-zero, set immediate_quit. That's good to do
d5d57b92 878 except in special cases. */
ca1d1d23 879
63fa018d 880int
fa8ed3e0 881scan_newline (start, start_byte, limit, limit_byte, count, allow_quit)
e7deaab0
AS
882 EMACS_INT start, start_byte;
883 EMACS_INT limit, limit_byte;
fa8ed3e0
RS
884 register int count;
885 int allow_quit;
63fa018d 886{
fa8ed3e0
RS
887 int direction = ((count > 0) ? 1 : -1);
888
889 register unsigned char *cursor;
890 unsigned char *base;
891
e7deaab0 892 EMACS_INT ceiling;
fa8ed3e0
RS
893 register unsigned char *ceiling_addr;
894
d5d57b92
RS
895 int old_immediate_quit = immediate_quit;
896
fa8ed3e0
RS
897 /* The code that follows is like scan_buffer
898 but checks for either newline or carriage return. */
899
d5d57b92
RS
900 if (allow_quit)
901 immediate_quit++;
fa8ed3e0
RS
902
903 start_byte = CHAR_TO_BYTE (start);
904
905 if (count > 0)
906 {
907 while (start_byte < limit_byte)
908 {
909 ceiling = BUFFER_CEILING_OF (start_byte);
910 ceiling = min (limit_byte - 1, ceiling);
911 ceiling_addr = BYTE_POS_ADDR (ceiling) + 1;
912 base = (cursor = BYTE_POS_ADDR (start_byte));
913 while (1)
914 {
915 while (*cursor != '\n' && ++cursor != ceiling_addr)
916 ;
917
918 if (cursor != ceiling_addr)
919 {
920 if (--count == 0)
921 {
d5d57b92 922 immediate_quit = old_immediate_quit;
fa8ed3e0
RS
923 start_byte = start_byte + cursor - base + 1;
924 start = BYTE_TO_CHAR (start_byte);
925 TEMP_SET_PT_BOTH (start, start_byte);
926 return 0;
927 }
928 else
929 if (++cursor == ceiling_addr)
930 break;
931 }
932 else
933 break;
934 }
935 start_byte += cursor - base;
936 }
937 }
938 else
939 {
fa8ed3e0
RS
940 while (start_byte > limit_byte)
941 {
942 ceiling = BUFFER_FLOOR_OF (start_byte - 1);
943 ceiling = max (limit_byte, ceiling);
944 ceiling_addr = BYTE_POS_ADDR (ceiling) - 1;
945 base = (cursor = BYTE_POS_ADDR (start_byte - 1) + 1);
946 while (1)
947 {
948 while (--cursor != ceiling_addr && *cursor != '\n')
949 ;
950
951 if (cursor != ceiling_addr)
952 {
953 if (++count == 0)
954 {
d5d57b92 955 immediate_quit = old_immediate_quit;
fa8ed3e0
RS
956 /* Return the position AFTER the match we found. */
957 start_byte = start_byte + cursor - base + 1;
958 start = BYTE_TO_CHAR (start_byte);
959 TEMP_SET_PT_BOTH (start, start_byte);
960 return 0;
961 }
962 }
963 else
964 break;
965 }
966 /* Here we add 1 to compensate for the last decrement
967 of CURSOR, which took it past the valid range. */
968 start_byte += cursor - base + 1;
969 }
970 }
971
972 TEMP_SET_PT_BOTH (limit, limit_byte);
d5d57b92 973 immediate_quit = old_immediate_quit;
fa8ed3e0
RS
974
975 return count * direction;
63fa018d
RS
976}
977
ca1d1d23 978int
fa8ed3e0 979find_next_newline_no_quit (from, cnt)
e7deaab0
AS
980 EMACS_INT from;
981 int cnt;
ca1d1d23 982{
fa8ed3e0 983 return scan_buffer ('\n', from, 0, cnt, (int *) 0, 0);
9169c321
JB
984}
985
9169c321
JB
986/* Like find_next_newline, but returns position before the newline,
987 not after, and only search up to TO. This isn't just
988 find_next_newline (...)-1, because you might hit TO. */
fa8ed3e0 989
9169c321
JB
990int
991find_before_next_newline (from, to, cnt)
e7deaab0
AS
992 EMACS_INT from, to;
993 int cnt;
9169c321
JB
994{
995 int shortage;
996 int pos = scan_buffer ('\n', from, to, cnt, &shortage, 1);
997
998 if (shortage == 0)
999 pos--;
177c0ea7 1000
9169c321 1001 return pos;
ca1d1d23
JB
1002}
1003\f
ca1d1d23
JB
1004/* Subroutines of Lisp buffer search functions. */
1005
1006static Lisp_Object
b819a390 1007search_command (string, bound, noerror, count, direction, RE, posix)
ca1d1d23
JB
1008 Lisp_Object string, bound, noerror, count;
1009 int direction;
1010 int RE;
b819a390 1011 int posix;
ca1d1d23
JB
1012{
1013 register int np;
9f43ad85 1014 int lim, lim_byte;
ca1d1d23
JB
1015 int n = direction;
1016
1017 if (!NILP (count))
1018 {
b7826503 1019 CHECK_NUMBER (count);
ca1d1d23
JB
1020 n *= XINT (count);
1021 }
1022
b7826503 1023 CHECK_STRING (string);
ca1d1d23 1024 if (NILP (bound))
9f43ad85
RS
1025 {
1026 if (n > 0)
1027 lim = ZV, lim_byte = ZV_BYTE;
1028 else
1029 lim = BEGV, lim_byte = BEGV_BYTE;
1030 }
ca1d1d23
JB
1031 else
1032 {
b7826503 1033 CHECK_NUMBER_COERCE_MARKER (bound);
ca1d1d23 1034 lim = XINT (bound);
6ec8bbd2 1035 if (n > 0 ? lim < PT : lim > PT)
ca1d1d23
JB
1036 error ("Invalid search bound (wrong side of point)");
1037 if (lim > ZV)
9f43ad85 1038 lim = ZV, lim_byte = ZV_BYTE;
588d2fd5 1039 else if (lim < BEGV)
9f43ad85 1040 lim = BEGV, lim_byte = BEGV_BYTE;
588d2fd5
KH
1041 else
1042 lim_byte = CHAR_TO_BYTE (lim);
ca1d1d23
JB
1043 }
1044
910c747a
RS
1045 /* This is so set_image_of_range_1 in regex.c can find the EQV table. */
1046 XCHAR_TABLE (current_buffer->case_canon_table)->extras[2]
1047 = current_buffer->case_eqv_table;
1048
9f43ad85 1049 np = search_buffer (string, PT, PT_BYTE, lim, lim_byte, n, RE,
ca1d1d23 1050 (!NILP (current_buffer->case_fold_search)
facdc750 1051 ? current_buffer->case_canon_table
3135e9fd 1052 : Qnil),
ca1d1d23 1053 (!NILP (current_buffer->case_fold_search)
facdc750 1054 ? current_buffer->case_eqv_table
3135e9fd 1055 : Qnil),
b819a390 1056 posix);
ca1d1d23
JB
1057 if (np <= 0)
1058 {
1059 if (NILP (noerror))
06f77a8a
KS
1060 xsignal1 (Qsearch_failed, string);
1061
ca1d1d23
JB
1062 if (!EQ (noerror, Qt))
1063 {
1064 if (lim < BEGV || lim > ZV)
1065 abort ();
9f43ad85 1066 SET_PT_BOTH (lim, lim_byte);
a5f217b8
RS
1067 return Qnil;
1068#if 0 /* This would be clean, but maybe programs depend on
1069 a value of nil here. */
481399bf 1070 np = lim;
a5f217b8 1071#endif
ca1d1d23 1072 }
481399bf
RS
1073 else
1074 return Qnil;
ca1d1d23
JB
1075 }
1076
1077 if (np < BEGV || np > ZV)
1078 abort ();
1079
1080 SET_PT (np);
1081
1082 return make_number (np);
1083}
1084\f
fa8ed3e0
RS
1085/* Return 1 if REGEXP it matches just one constant string. */
1086
b6d6a51c
KH
1087static int
1088trivial_regexp_p (regexp)
1089 Lisp_Object regexp;
1090{
d5db4077
KR
1091 int len = SBYTES (regexp);
1092 unsigned char *s = SDATA (regexp);
b6d6a51c
KH
1093 while (--len >= 0)
1094 {
1095 switch (*s++)
1096 {
1097 case '.': case '*': case '+': case '?': case '[': case '^': case '$':
1098 return 0;
1099 case '\\':
1100 if (--len < 0)
1101 return 0;
1102 switch (*s++)
1103 {
1104 case '|': case '(': case ')': case '`': case '\'': case 'b':
1105 case 'B': case '<': case '>': case 'w': case 'W': case 's':
29f89fe7 1106 case 'S': case '=': case '{': case '}': case '_':
5679531d 1107 case 'c': case 'C': /* for categoryspec and notcategoryspec */
866f60fd 1108 case '1': case '2': case '3': case '4': case '5':
b6d6a51c
KH
1109 case '6': case '7': case '8': case '9':
1110 return 0;
1111 }
1112 }
1113 }
1114 return 1;
1115}
1116
ca325161 1117/* Search for the n'th occurrence of STRING in the current buffer,
ca1d1d23 1118 starting at position POS and stopping at position LIM,
b819a390 1119 treating STRING as a literal string if RE is false or as
ca1d1d23
JB
1120 a regular expression if RE is true.
1121
1122 If N is positive, searching is forward and LIM must be greater than POS.
1123 If N is negative, searching is backward and LIM must be less than POS.
1124
facdc750 1125 Returns -x if x occurrences remain to be found (x > 0),
ca1d1d23 1126 or else the position at the beginning of the Nth occurrence
b819a390
RS
1127 (if searching backward) or the end (if searching forward).
1128
1129 POSIX is nonzero if we want full backtracking (POSIX style)
1130 for this pattern. 0 means backtrack only enough to get a valid match. */
ca1d1d23 1131
aff2ce94
RS
1132#define TRANSLATE(out, trt, d) \
1133do \
1134 { \
1135 if (! NILP (trt)) \
1136 { \
1137 Lisp_Object temp; \
1138 temp = Faref (trt, make_number (d)); \
1139 if (INTEGERP (temp)) \
1140 out = XINT (temp); \
1141 else \
1142 out = d; \
1143 } \
1144 else \
1145 out = d; \
1146 } \
1147while (0)
facdc750 1148
ef887810
RS
1149/* Only used in search_buffer, to record the end position of the match
1150 when searching regexps and SEARCH_REGS should not be changed
1151 (i.e. Vinhibit_changing_match_data is non-nil). */
1152static struct re_registers search_regs_1;
1153
e7deaab0 1154static EMACS_INT
9f43ad85
RS
1155search_buffer (string, pos, pos_byte, lim, lim_byte, n,
1156 RE, trt, inverse_trt, posix)
ca1d1d23 1157 Lisp_Object string;
e7deaab0
AS
1158 EMACS_INT pos;
1159 EMACS_INT pos_byte;
1160 EMACS_INT lim;
1161 EMACS_INT lim_byte;
ca1d1d23
JB
1162 int n;
1163 int RE;
facdc750
RS
1164 Lisp_Object trt;
1165 Lisp_Object inverse_trt;
b819a390 1166 int posix;
ca1d1d23 1167{
d5db4077
KR
1168 int len = SCHARS (string);
1169 int len_byte = SBYTES (string);
facdc750 1170 register int i;
ca1d1d23 1171
7074fde6
FP
1172 if (running_asynch_code)
1173 save_search_regs ();
1174
a7e4cdde 1175 /* Searching 0 times means don't move. */
ca1d1d23 1176 /* Null string is found at starting position. */
a7e4cdde 1177 if (len == 0 || n == 0)
ca325161 1178 {
0353b28f 1179 set_search_regs (pos_byte, 0);
ca325161
RS
1180 return pos;
1181 }
3f57a499 1182
41a33295 1183 if (RE && !(trivial_regexp_p (string) && NILP (Vsearch_spaces_regexp)))
ca1d1d23 1184 {
facdc750
RS
1185 unsigned char *p1, *p2;
1186 int s1, s2;
487282dc
KH
1187 struct re_pattern_buffer *bufp;
1188
ef887810
RS
1189 bufp = compile_pattern (string,
1190 (NILP (Vinhibit_changing_match_data)
1191 ? &search_regs : &search_regs_1),
1192 trt, posix,
0c8533c6 1193 !NILP (current_buffer->enable_multibyte_characters));
ca1d1d23 1194
ca1d1d23
JB
1195 immediate_quit = 1; /* Quit immediately if user types ^G,
1196 because letting this function finish
1197 can take too long. */
1198 QUIT; /* Do a pending quit right away,
1199 to avoid paradoxical behavior */
1200 /* Get pointers and sizes of the two strings
1201 that make up the visible portion of the buffer. */
1202
1203 p1 = BEGV_ADDR;
fa8ed3e0 1204 s1 = GPT_BYTE - BEGV_BYTE;
ca1d1d23 1205 p2 = GAP_END_ADDR;
fa8ed3e0 1206 s2 = ZV_BYTE - GPT_BYTE;
ca1d1d23
JB
1207 if (s1 < 0)
1208 {
1209 p2 = p1;
fa8ed3e0 1210 s2 = ZV_BYTE - BEGV_BYTE;
ca1d1d23
JB
1211 s1 = 0;
1212 }
1213 if (s2 < 0)
1214 {
fa8ed3e0 1215 s1 = ZV_BYTE - BEGV_BYTE;
ca1d1d23
JB
1216 s2 = 0;
1217 }
8bb43c28 1218 re_match_object = Qnil;
177c0ea7 1219
ca1d1d23
JB
1220 while (n < 0)
1221 {
42db823b 1222 int val;
487282dc 1223 val = re_search_2 (bufp, (char *) p1, s1, (char *) p2, s2,
4996330b 1224 pos_byte - BEGV_BYTE, lim_byte - pos_byte,
ef887810
RS
1225 (NILP (Vinhibit_changing_match_data)
1226 ? &search_regs : &search_regs_1),
42db823b 1227 /* Don't allow match past current point */
4996330b 1228 pos_byte - BEGV_BYTE);
ca1d1d23 1229 if (val == -2)
b6d6a51c
KH
1230 {
1231 matcher_overflow ();
1232 }
ca1d1d23
JB
1233 if (val >= 0)
1234 {
ef887810
RS
1235 if (NILP (Vinhibit_changing_match_data))
1236 {
1237 pos_byte = search_regs.start[0] + BEGV_BYTE;
1238 for (i = 0; i < search_regs.num_regs; i++)
1239 if (search_regs.start[i] >= 0)
1240 {
1241 search_regs.start[i]
1242 = BYTE_TO_CHAR (search_regs.start[i] + BEGV_BYTE);
1243 search_regs.end[i]
1244 = BYTE_TO_CHAR (search_regs.end[i] + BEGV_BYTE);
1245 }
1246 XSETBUFFER (last_thing_searched, current_buffer);
1247 /* Set pos to the new position. */
1248 pos = search_regs.start[0];
1249 }
1250 else
1251 {
1252 pos_byte = search_regs_1.start[0] + BEGV_BYTE;
1253 /* Set pos to the new position. */
1254 pos = BYTE_TO_CHAR (search_regs_1.start[0] + BEGV_BYTE);
1255 }
ca1d1d23
JB
1256 }
1257 else
1258 {
1259 immediate_quit = 0;
1260 return (n);
1261 }
1262 n++;
1263 }
1264 while (n > 0)
1265 {
42db823b 1266 int val;
487282dc 1267 val = re_search_2 (bufp, (char *) p1, s1, (char *) p2, s2,
4996330b 1268 pos_byte - BEGV_BYTE, lim_byte - pos_byte,
ef887810
RS
1269 (NILP (Vinhibit_changing_match_data)
1270 ? &search_regs : &search_regs_1),
4996330b 1271 lim_byte - BEGV_BYTE);
ca1d1d23 1272 if (val == -2)
b6d6a51c
KH
1273 {
1274 matcher_overflow ();
1275 }
ca1d1d23
JB
1276 if (val >= 0)
1277 {
ef887810
RS
1278 if (NILP (Vinhibit_changing_match_data))
1279 {
1280 pos_byte = search_regs.end[0] + BEGV_BYTE;
1281 for (i = 0; i < search_regs.num_regs; i++)
1282 if (search_regs.start[i] >= 0)
1283 {
1284 search_regs.start[i]
1285 = BYTE_TO_CHAR (search_regs.start[i] + BEGV_BYTE);
1286 search_regs.end[i]
1287 = BYTE_TO_CHAR (search_regs.end[i] + BEGV_BYTE);
1288 }
1289 XSETBUFFER (last_thing_searched, current_buffer);
1290 pos = search_regs.end[0];
1291 }
1292 else
1293 {
1294 pos_byte = search_regs_1.end[0] + BEGV_BYTE;
1295 pos = BYTE_TO_CHAR (search_regs_1.end[0] + BEGV_BYTE);
1296 }
ca1d1d23
JB
1297 }
1298 else
1299 {
1300 immediate_quit = 0;
1301 return (0 - n);
1302 }
1303 n--;
1304 }
1305 immediate_quit = 0;
1306 return (pos);
1307 }
1308 else /* non-RE case */
1309 {
facdc750
RS
1310 unsigned char *raw_pattern, *pat;
1311 int raw_pattern_size;
1312 int raw_pattern_size_byte;
1313 unsigned char *patbuf;
1314 int multibyte = !NILP (current_buffer->enable_multibyte_characters);
a967ed62 1315 unsigned char *base_pat;
49e44e81
KH
1316 /* Set to positive if we find a non-ASCII char that need
1317 translation. Otherwise set to zero later. */
1318 int char_base = -1;
040272ce 1319 int boyer_moore_ok = 1;
facdc750
RS
1320
1321 /* MULTIBYTE says whether the text to be searched is multibyte.
1322 We must convert PATTERN to match that, or we will not really
1323 find things right. */
1324
1325 if (multibyte == STRING_MULTIBYTE (string))
1326 {
d5db4077
KR
1327 raw_pattern = (unsigned char *) SDATA (string);
1328 raw_pattern_size = SCHARS (string);
1329 raw_pattern_size_byte = SBYTES (string);
facdc750
RS
1330 }
1331 else if (multibyte)
1332 {
d5db4077 1333 raw_pattern_size = SCHARS (string);
facdc750 1334 raw_pattern_size_byte
d5db4077 1335 = count_size_as_multibyte (SDATA (string),
facdc750 1336 raw_pattern_size);
7276d3d8 1337 raw_pattern = (unsigned char *) alloca (raw_pattern_size_byte + 1);
d5db4077
KR
1338 copy_text (SDATA (string), raw_pattern,
1339 SCHARS (string), 0, 1);
facdc750
RS
1340 }
1341 else
1342 {
1343 /* Converting multibyte to single-byte.
1344
1345 ??? Perhaps this conversion should be done in a special way
1346 by subtracting nonascii-insert-offset from each non-ASCII char,
1347 so that only the multibyte chars which really correspond to
1348 the chosen single-byte character set can possibly match. */
d5db4077
KR
1349 raw_pattern_size = SCHARS (string);
1350 raw_pattern_size_byte = SCHARS (string);
7276d3d8 1351 raw_pattern = (unsigned char *) alloca (raw_pattern_size + 1);
d5db4077
KR
1352 copy_text (SDATA (string), raw_pattern,
1353 SBYTES (string), 1, 0);
facdc750
RS
1354 }
1355
1356 /* Copy and optionally translate the pattern. */
1357 len = raw_pattern_size;
1358 len_byte = raw_pattern_size_byte;
f5f7578b 1359 patbuf = (unsigned char *) alloca (len * MAX_MULTIBYTE_LENGTH);
facdc750
RS
1360 pat = patbuf;
1361 base_pat = raw_pattern;
1362 if (multibyte)
1363 {
a17ae96a
KH
1364 /* Fill patbuf by translated characters in STRING while
1365 checking if we can use boyer-moore search. If TRT is
1366 non-nil, we can use boyer-moore search only if TRT can be
1367 represented by the byte array of 256 elements. For that,
1368 all non-ASCII case-equivalents of all case-senstive
1369 characters in STRING must belong to the same charset and
1370 row. */
1371
facdc750
RS
1372 while (--len >= 0)
1373 {
a17ae96a 1374 unsigned char str_base[MAX_MULTIBYTE_LENGTH], *str;
aff2ce94 1375 int c, translated, inverse;
a17ae96a 1376 int in_charlen, charlen;
facdc750
RS
1377
1378 /* If we got here and the RE flag is set, it's because we're
1379 dealing with a regexp known to be trivial, so the backslash
1380 just quotes the next character. */
1381 if (RE && *base_pat == '\\')
1382 {
1383 len--;
62221f22 1384 raw_pattern_size--;
facdc750
RS
1385 len_byte--;
1386 base_pat++;
1387 }
1388
1389 c = STRING_CHAR_AND_LENGTH (base_pat, len_byte, in_charlen);
040272ce 1390
a17ae96a 1391 if (NILP (trt))
facdc750 1392 {
a17ae96a
KH
1393 str = base_pat;
1394 charlen = in_charlen;
1395 }
1396 else
1397 {
1398 /* Translate the character. */
1399 TRANSLATE (translated, trt, c);
1400 charlen = CHAR_STRING (translated, str_base);
1401 str = str_base;
1402
1403 /* Check if C has any other case-equivalents. */
1404 TRANSLATE (inverse, inverse_trt, c);
1405 /* If so, check if we can use boyer-moore. */
1406 if (c != inverse && boyer_moore_ok)
1407 {
1408 /* Check if all equivalents belong to the same
1409 group of characters. Note that the check of C
49e44e81
KH
1410 itself is done by the last iteration. */
1411 int this_char_base = -1;
1412
1413 while (boyer_moore_ok)
a17ae96a 1414 {
49e44e81 1415 if (ASCII_BYTE_P (inverse))
a17ae96a 1416 {
49e44e81
KH
1417 if (this_char_base > 0)
1418 boyer_moore_ok = 0;
1419 else
dafaabd1 1420 this_char_base = 0;
a17ae96a 1421 }
49e44e81
KH
1422 else if (CHAR_BYTE8_P (inverse))
1423 /* Boyer-moore search can't handle a
1424 translation of an eight-bit
1425 character. */
1426 boyer_moore_ok = 0;
1427 else if (this_char_base < 0)
1428 {
1429 this_char_base = inverse & ~0x3F;
1430 if (char_base < 0)
1431 char_base = this_char_base;
dafaabd1 1432 else if (this_char_base != char_base)
49e44e81
KH
1433 boyer_moore_ok = 0;
1434 }
1435 else if ((inverse & ~0x3F) != this_char_base)
1436 boyer_moore_ok = 0;
a17ae96a
KH
1437 if (c == inverse)
1438 break;
1439 TRANSLATE (inverse, inverse_trt, inverse);
1440 }
1441 }
aff2ce94 1442 }
facdc750
RS
1443
1444 /* Store this character into the translated pattern. */
a17ae96a
KH
1445 bcopy (str, pat, charlen);
1446 pat += charlen;
facdc750
RS
1447 base_pat += in_charlen;
1448 len_byte -= in_charlen;
1449 }
dafaabd1
AS
1450
1451 /* If char_base is still negative we didn't find any translated
1452 non-ASCII characters. */
1453 if (char_base < 0)
1454 char_base = 0;
facdc750
RS
1455 }
1456 else
1457 {
040272ce 1458 /* Unibyte buffer. */
a17ae96a 1459 char_base = 0;
facdc750
RS
1460 while (--len >= 0)
1461 {
040272ce 1462 int c, translated;
facdc750
RS
1463
1464 /* If we got here and the RE flag is set, it's because we're
1465 dealing with a regexp known to be trivial, so the backslash
1466 just quotes the next character. */
1467 if (RE && *base_pat == '\\')
1468 {
1469 len--;
0190922f 1470 raw_pattern_size--;
facdc750
RS
1471 base_pat++;
1472 }
1473 c = *base_pat++;
aff2ce94 1474 TRANSLATE (translated, trt, c);
facdc750
RS
1475 *pat++ = translated;
1476 }
1477 }
1478
1479 len_byte = pat - patbuf;
1480 len = raw_pattern_size;
1481 pat = base_pat = patbuf;
1482
040272ce 1483 if (boyer_moore_ok)
facdc750 1484 return boyer_moore (n, pat, len, len_byte, trt, inverse_trt,
aff2ce94 1485 pos, pos_byte, lim, lim_byte,
a17ae96a 1486 char_base);
facdc750
RS
1487 else
1488 return simple_search (n, pat, len, len_byte, trt,
1489 pos, pos_byte, lim, lim_byte);
1490 }
1491}
1492\f
1493/* Do a simple string search N times for the string PAT,
1494 whose length is LEN/LEN_BYTE,
1495 from buffer position POS/POS_BYTE until LIM/LIM_BYTE.
1496 TRT is the translation table.
f8bd51c4 1497
facdc750
RS
1498 Return the character position where the match is found.
1499 Otherwise, if M matches remained to be found, return -M.
f8bd51c4 1500
facdc750
RS
1501 This kind of search works regardless of what is in PAT and
1502 regardless of what is in TRT. It is used in cases where
1503 boyer_moore cannot work. */
1504
e7deaab0 1505static EMACS_INT
facdc750
RS
1506simple_search (n, pat, len, len_byte, trt, pos, pos_byte, lim, lim_byte)
1507 int n;
1508 unsigned char *pat;
1509 int len, len_byte;
1510 Lisp_Object trt;
e7deaab0
AS
1511 EMACS_INT pos, pos_byte;
1512 EMACS_INT lim, lim_byte;
facdc750
RS
1513{
1514 int multibyte = ! NILP (current_buffer->enable_multibyte_characters);
ab228c24 1515 int forward = n > 0;
49e44e81
KH
1516 /* Number of buffer bytes matched. Note that this may be different
1517 from len_byte in a multibyte buffer. */
1518 int match_byte;
facdc750
RS
1519
1520 if (lim > pos && multibyte)
1521 while (n > 0)
1522 {
1523 while (1)
f8bd51c4 1524 {
facdc750 1525 /* Try matching at position POS. */
e7deaab0
AS
1526 EMACS_INT this_pos = pos;
1527 EMACS_INT this_pos_byte = pos_byte;
facdc750
RS
1528 int this_len = len;
1529 int this_len_byte = len_byte;
1530 unsigned char *p = pat;
a9469498 1531 if (pos + len > lim || pos_byte + len_byte > lim_byte)
facdc750
RS
1532 goto stop;
1533
1534 while (this_len > 0)
1535 {
1536 int charlen, buf_charlen;
ab228c24 1537 int pat_ch, buf_ch;
facdc750 1538
ab228c24 1539 pat_ch = STRING_CHAR_AND_LENGTH (p, this_len_byte, charlen);
facdc750
RS
1540 buf_ch = STRING_CHAR_AND_LENGTH (BYTE_POS_ADDR (this_pos_byte),
1541 ZV_BYTE - this_pos_byte,
1542 buf_charlen);
aff2ce94 1543 TRANSLATE (buf_ch, trt, buf_ch);
facdc750
RS
1544
1545 if (buf_ch != pat_ch)
1546 break;
ab228c24
RS
1547
1548 this_len_byte -= charlen;
1549 this_len--;
1550 p += charlen;
1551
1552 this_pos_byte += buf_charlen;
1553 this_pos++;
facdc750
RS
1554 }
1555
1556 if (this_len == 0)
1557 {
49e44e81 1558 match_byte = this_pos_byte - pos_byte;
facdc750 1559 pos += len;
49e44e81 1560 pos_byte += match_byte;
facdc750
RS
1561 break;
1562 }
1563
1564 INC_BOTH (pos, pos_byte);
f8bd51c4 1565 }
facdc750
RS
1566
1567 n--;
1568 }
1569 else if (lim > pos)
1570 while (n > 0)
1571 {
1572 while (1)
f8bd51c4 1573 {
facdc750 1574 /* Try matching at position POS. */
e7deaab0 1575 EMACS_INT this_pos = pos;
facdc750
RS
1576 int this_len = len;
1577 unsigned char *p = pat;
1578
1579 if (pos + len > lim)
1580 goto stop;
1581
1582 while (this_len > 0)
1583 {
1584 int pat_ch = *p++;
1585 int buf_ch = FETCH_BYTE (this_pos);
aff2ce94 1586 TRANSLATE (buf_ch, trt, buf_ch);
facdc750
RS
1587
1588 if (buf_ch != pat_ch)
1589 break;
ab228c24
RS
1590
1591 this_len--;
1592 this_pos++;
facdc750
RS
1593 }
1594
1595 if (this_len == 0)
1596 {
49e44e81 1597 match_byte = len;
facdc750
RS
1598 pos += len;
1599 break;
1600 }
1601
1602 pos++;
f8bd51c4 1603 }
facdc750
RS
1604
1605 n--;
1606 }
1607 /* Backwards search. */
1608 else if (lim < pos && multibyte)
1609 while (n < 0)
1610 {
1611 while (1)
f8bd51c4 1612 {
facdc750 1613 /* Try matching at position POS. */
e7deaab0
AS
1614 EMACS_INT this_pos = pos - len;
1615 EMACS_INT this_pos_byte;
facdc750
RS
1616 int this_len = len;
1617 int this_len_byte = len_byte;
1618 unsigned char *p = pat;
1619
3dcf0066 1620 if (this_pos < lim || (pos_byte - len_byte) < lim_byte)
facdc750 1621 goto stop;
3dcf0066 1622 this_pos_byte = CHAR_TO_BYTE (this_pos);
49e44e81 1623 match_byte = pos_byte - this_pos_byte;
facdc750
RS
1624
1625 while (this_len > 0)
1626 {
1627 int charlen, buf_charlen;
ab228c24 1628 int pat_ch, buf_ch;
facdc750 1629
ab228c24 1630 pat_ch = STRING_CHAR_AND_LENGTH (p, this_len_byte, charlen);
facdc750
RS
1631 buf_ch = STRING_CHAR_AND_LENGTH (BYTE_POS_ADDR (this_pos_byte),
1632 ZV_BYTE - this_pos_byte,
1633 buf_charlen);
aff2ce94 1634 TRANSLATE (buf_ch, trt, buf_ch);
facdc750
RS
1635
1636 if (buf_ch != pat_ch)
1637 break;
ab228c24
RS
1638
1639 this_len_byte -= charlen;
1640 this_len--;
1641 p += charlen;
1642 this_pos_byte += buf_charlen;
1643 this_pos++;
facdc750
RS
1644 }
1645
1646 if (this_len == 0)
1647 {
1648 pos -= len;
49e44e81 1649 pos_byte -= match_byte;
facdc750
RS
1650 break;
1651 }
1652
1653 DEC_BOTH (pos, pos_byte);
f8bd51c4
KH
1654 }
1655
facdc750
RS
1656 n++;
1657 }
1658 else if (lim < pos)
1659 while (n < 0)
1660 {
1661 while (1)
b6d6a51c 1662 {
facdc750 1663 /* Try matching at position POS. */
e7deaab0 1664 EMACS_INT this_pos = pos - len;
facdc750
RS
1665 int this_len = len;
1666 unsigned char *p = pat;
1667
a9469498 1668 if (this_pos < lim)
facdc750
RS
1669 goto stop;
1670
1671 while (this_len > 0)
1672 {
1673 int pat_ch = *p++;
1674 int buf_ch = FETCH_BYTE (this_pos);
aff2ce94 1675 TRANSLATE (buf_ch, trt, buf_ch);
facdc750
RS
1676
1677 if (buf_ch != pat_ch)
1678 break;
ab228c24
RS
1679 this_len--;
1680 this_pos++;
facdc750
RS
1681 }
1682
1683 if (this_len == 0)
b6d6a51c 1684 {
49e44e81 1685 match_byte = len;
facdc750
RS
1686 pos -= len;
1687 break;
b6d6a51c 1688 }
facdc750
RS
1689
1690 pos--;
b6d6a51c 1691 }
facdc750
RS
1692
1693 n++;
b6d6a51c 1694 }
facdc750
RS
1695
1696 stop:
1697 if (n == 0)
aff2ce94 1698 {
ab228c24 1699 if (forward)
49e44e81 1700 set_search_regs ((multibyte ? pos_byte : pos) - match_byte, match_byte);
ab228c24 1701 else
49e44e81 1702 set_search_regs (multibyte ? pos_byte : pos, match_byte);
aff2ce94
RS
1703
1704 return pos;
1705 }
facdc750
RS
1706 else if (n > 0)
1707 return -n;
1708 else
1709 return n;
1710}
1711\f
0190922f 1712/* Do Boyer-Moore search N times for the string BASE_PAT,
facdc750
RS
1713 whose length is LEN/LEN_BYTE,
1714 from buffer position POS/POS_BYTE until LIM/LIM_BYTE.
1715 DIRECTION says which direction we search in.
1716 TRT and INVERSE_TRT are translation tables.
0190922f 1717 Characters in PAT are already translated by TRT.
facdc750 1718
0190922f
KH
1719 This kind of search works if all the characters in BASE_PAT that
1720 have nontrivial translation are the same aside from the last byte.
1721 This makes it possible to translate just the last byte of a
1722 character, and do so after just a simple test of the context.
b2e6b10f 1723 CHAR_BASE is nonzero if there is such a non-ASCII character.
facdc750
RS
1724
1725 If that criterion is not satisfied, do not call this function. */
1726
e7deaab0 1727static EMACS_INT
facdc750 1728boyer_moore (n, base_pat, len, len_byte, trt, inverse_trt,
a17ae96a 1729 pos, pos_byte, lim, lim_byte, char_base)
facdc750
RS
1730 int n;
1731 unsigned char *base_pat;
1732 int len, len_byte;
1733 Lisp_Object trt;
1734 Lisp_Object inverse_trt;
e7deaab0
AS
1735 EMACS_INT pos, pos_byte;
1736 EMACS_INT lim, lim_byte;
a17ae96a 1737 int char_base;
facdc750
RS
1738{
1739 int direction = ((n > 0) ? 1 : -1);
1740 register int dirlen;
e7deaab0
AS
1741 EMACS_INT limit;
1742 int stride_for_teases = 0;
f4646fff 1743 int BM_tab[0400];
177c0ea7 1744 register unsigned char *cursor, *p_limit;
facdc750 1745 register int i, j;
cb6792d2 1746 unsigned char *pat, *pat_end;
facdc750
RS
1747 int multibyte = ! NILP (current_buffer->enable_multibyte_characters);
1748
1749 unsigned char simple_translate[0400];
0190922f 1750 /* These are set to the preceding bytes of a byte to be translated
49e44e81 1751 if char_base is nonzero. As the maximum byte length of a
a17ae96a 1752 multibyte character is 5, we have to check at most four previous
0190922f
KH
1753 bytes. */
1754 int translate_prev_byte1 = 0;
1755 int translate_prev_byte2 = 0;
1756 int translate_prev_byte3 = 0;
a17ae96a 1757 int translate_prev_byte4 = 0;
facdc750 1758
f4646fff
AS
1759 /* The general approach is that we are going to maintain that we know
1760 the first (closest to the present position, in whatever direction
1761 we're searching) character that could possibly be the last
1762 (furthest from present position) character of a valid match. We
1763 advance the state of our knowledge by looking at that character
1764 and seeing whether it indeed matches the last character of the
1765 pattern. If it does, we take a closer look. If it does not, we
1766 move our pointer (to putative last characters) as far as is
1767 logically possible. This amount of movement, which I call a
1768 stride, will be the length of the pattern if the actual character
1769 appears nowhere in the pattern, otherwise it will be the distance
1770 from the last occurrence of that character to the end of the
1771 pattern. If the amount is zero we have a possible match. */
1772
1773 /* Here we make a "mickey mouse" BM table. The stride of the search
1774 is determined only by the last character of the putative match.
1775 If that character does not match, we will stride the proper
1776 distance to propose a match that superimposes it on the last
1777 instance of a character that matches it (per trt), or misses
1778 it entirely if there is none. */
facdc750
RS
1779
1780 dirlen = len_byte * direction;
cb6792d2
RS
1781
1782 /* Record position after the end of the pattern. */
1783 pat_end = base_pat + len_byte;
1784 /* BASE_PAT points to a character that we start scanning from.
1785 It is the first character in a forward search,
1786 the last character in a backward search. */
facdc750 1787 if (direction < 0)
cb6792d2
RS
1788 base_pat = pat_end - 1;
1789
f4646fff
AS
1790 /* A character that does not appear in the pattern induces a
1791 stride equal to the pattern length. */
1792 for (i = 0; i < 0400; i++)
1793 BM_tab[i] = dirlen;
facdc750
RS
1794
1795 /* We use this for translation, instead of TRT itself.
1796 We fill this in to handle the characters that actually
1797 occur in the pattern. Others don't matter anyway! */
facdc750
RS
1798 for (i = 0; i < 0400; i++)
1799 simple_translate[i] = i;
1800
a17ae96a 1801 if (char_base)
0190922f 1802 {
a17ae96a 1803 /* Setup translate_prev_byte1/2/3/4 from CHAR_BASE. Only a
0190922f 1804 byte following them are the target of translation. */
0190922f 1805 unsigned char str[MAX_MULTIBYTE_LENGTH];
a17ae96a 1806 int len = CHAR_STRING (char_base, str);
0190922f
KH
1807
1808 translate_prev_byte1 = str[len - 2];
1809 if (len > 2)
1810 {
1811 translate_prev_byte2 = str[len - 3];
1812 if (len > 3)
a17ae96a
KH
1813 {
1814 translate_prev_byte3 = str[len - 4];
1815 if (len > 4)
1816 translate_prev_byte4 = str[len - 5];
1817 }
0190922f
KH
1818 }
1819 }
1820
facdc750 1821 i = 0;
f4646fff 1822 while (i != dirlen)
facdc750 1823 {
cb6792d2 1824 unsigned char *ptr = base_pat + i;
facdc750 1825 i += direction;
facdc750 1826 if (! NILP (trt))
ca1d1d23 1827 {
49e44e81
KH
1828 /* If the byte currently looking at is the last of a
1829 character to check case-equivalents, set CH to that
1830 character. An ASCII character and a non-ASCII character
1831 matching with CHAR_BASE are to be checked. */
a17ae96a
KH
1832 int ch = -1;
1833
1834 if (ASCII_BYTE_P (*ptr) || ! multibyte)
1835 ch = *ptr;
49e44e81 1836 else if (char_base
86dc6ccb 1837 && ((pat_end - ptr) == 1 || CHAR_HEAD_P (ptr[1])))
ca1d1d23 1838 {
49e44e81
KH
1839 unsigned char *charstart = ptr - 1;
1840
1841 while (! (CHAR_HEAD_P (*charstart)))
1842 charstart--;
1843 ch = STRING_CHAR (charstart, ptr - charstart + 1);
a17ae96a
KH
1844 if (char_base != (ch & ~0x3F))
1845 ch = -1;
ca1d1d23 1846 }
facdc750 1847
f51995fa 1848 if (ch >= 0200)
49e44e81
KH
1849 j = (ch & 0x3F) | 0200;
1850 else
1851 j = *ptr;
1852
f4646fff 1853 if (i == dirlen)
facdc750 1854 stride_for_teases = BM_tab[j];
ab228c24 1855
facdc750
RS
1856 BM_tab[j] = dirlen - i;
1857 /* A translation table is accompanied by its inverse -- see */
177c0ea7 1858 /* comment following downcase_table for details */
a17ae96a 1859 if (ch >= 0)
ab228c24
RS
1860 {
1861 int starting_ch = ch;
49e44e81 1862 int starting_j = j;
a17ae96a 1863
ab228c24
RS
1864 while (1)
1865 {
1866 TRANSLATE (ch, inverse_trt, ch);
f51995fa 1867 if (ch >= 0200)
49e44e81 1868 j = (ch & 0x3F) | 0200;
ab228c24 1869 else
a17ae96a 1870 j = ch;
ab228c24
RS
1871
1872 /* For all the characters that map into CH,
1873 set up simple_translate to map the last byte
1874 into STARTING_J. */
1875 simple_translate[j] = starting_j;
1876 if (ch == starting_ch)
1877 break;
1878 BM_tab[j] = dirlen - i;
1879 }
1880 }
facdc750
RS
1881 }
1882 else
1883 {
1884 j = *ptr;
1885
f4646fff 1886 if (i == dirlen)
facdc750
RS
1887 stride_for_teases = BM_tab[j];
1888 BM_tab[j] = dirlen - i;
ca1d1d23 1889 }
f4646fff
AS
1890 /* stride_for_teases tells how much to stride if we get a
1891 match on the far character but are subsequently
1892 disappointed, by recording what the stride would have been
1893 for that character if the last character had been
1894 different. */
facdc750 1895 }
facdc750
RS
1896 pos_byte += dirlen - ((direction > 0) ? direction : 0);
1897 /* loop invariant - POS_BYTE points at where last char (first
1898 char if reverse) of pattern would align in a possible match. */
1899 while (n != 0)
1900 {
e7deaab0 1901 EMACS_INT tail_end;
facdc750
RS
1902 unsigned char *tail_end_ptr;
1903
1904 /* It's been reported that some (broken) compiler thinks that
1905 Boolean expressions in an arithmetic context are unsigned.
1906 Using an explicit ?1:0 prevents this. */
1907 if ((lim_byte - pos_byte - ((direction > 0) ? 1 : 0)) * direction
1908 < 0)
1909 return (n * (0 - direction));
1910 /* First we do the part we can by pointers (maybe nothing) */
1911 QUIT;
1912 pat = base_pat;
1913 limit = pos_byte - dirlen + direction;
67ce527d
KH
1914 if (direction > 0)
1915 {
1916 limit = BUFFER_CEILING_OF (limit);
1917 /* LIMIT is now the last (not beyond-last!) value POS_BYTE
1918 can take on without hitting edge of buffer or the gap. */
1919 limit = min (limit, pos_byte + 20000);
1920 limit = min (limit, lim_byte - 1);
1921 }
1922 else
1923 {
1924 limit = BUFFER_FLOOR_OF (limit);
1925 /* LIMIT is now the last (not beyond-last!) value POS_BYTE
1926 can take on without hitting edge of buffer or the gap. */
1927 limit = max (limit, pos_byte - 20000);
1928 limit = max (limit, lim_byte);
1929 }
facdc750
RS
1930 tail_end = BUFFER_CEILING_OF (pos_byte) + 1;
1931 tail_end_ptr = BYTE_POS_ADDR (tail_end);
1932
1933 if ((limit - pos_byte) * direction > 20)
ca1d1d23 1934 {
facdc750
RS
1935 unsigned char *p2;
1936
1937 p_limit = BYTE_POS_ADDR (limit);
1938 p2 = (cursor = BYTE_POS_ADDR (pos_byte));
f4646fff 1939 /* In this loop, pos + cursor - p2 is the surrogate for pos. */
facdc750 1940 while (1) /* use one cursor setting as long as i can */
ca1d1d23 1941 {
facdc750 1942 if (direction > 0) /* worth duplicating */
ca1d1d23 1943 {
f4646fff
AS
1944 while (cursor <= p_limit)
1945 {
1946 if (BM_tab[*cursor] == 0)
1947 goto hit;
facdc750 1948 cursor += BM_tab[*cursor];
f4646fff 1949 }
facdc750
RS
1950 }
1951 else
1952 {
f4646fff
AS
1953 while (cursor >= p_limit)
1954 {
1955 if (BM_tab[*cursor] == 0)
1956 goto hit;
facdc750 1957 cursor += BM_tab[*cursor];
f4646fff 1958 }
facdc750 1959 }
f4646fff
AS
1960 /* If you are here, cursor is beyond the end of the
1961 searched region. You fail to match within the
1962 permitted region and would otherwise try a character
1963 beyond that region. */
1964 break;
1965
1966 hit:
facdc750
RS
1967 i = dirlen - direction;
1968 if (! NILP (trt))
1969 {
1970 while ((i -= direction) + direction != 0)
ca1d1d23 1971 {
facdc750
RS
1972 int ch;
1973 cursor -= direction;
1974 /* Translate only the last byte of a character. */
1975 if (! multibyte
1976 || ((cursor == tail_end_ptr
1977 || CHAR_HEAD_P (cursor[1]))
1978 && (CHAR_HEAD_P (cursor[0])
0190922f
KH
1979 /* Check if this is the last byte of
1980 a translable character. */
1981 || (translate_prev_byte1 == cursor[-1]
1982 && (CHAR_HEAD_P (translate_prev_byte1)
1983 || (translate_prev_byte2 == cursor[-2]
1984 && (CHAR_HEAD_P (translate_prev_byte2)
1985 || (translate_prev_byte3 == cursor[-3]))))))))
facdc750
RS
1986 ch = simple_translate[*cursor];
1987 else
1988 ch = *cursor;
1989 if (pat[i] != ch)
1990 break;
ca1d1d23 1991 }
facdc750
RS
1992 }
1993 else
1994 {
1995 while ((i -= direction) + direction != 0)
ca1d1d23 1996 {
facdc750
RS
1997 cursor -= direction;
1998 if (pat[i] != *cursor)
1999 break;
ca1d1d23 2000 }
facdc750
RS
2001 }
2002 cursor += dirlen - i - direction; /* fix cursor */
2003 if (i + direction == 0)
2004 {
e7deaab0 2005 EMACS_INT position, start, end;
0c8533c6 2006
facdc750 2007 cursor -= direction;
1113d9db 2008
facdc750
RS
2009 position = pos_byte + cursor - p2 + ((direction > 0)
2010 ? 1 - len_byte : 0);
2011 set_search_regs (position, len_byte);
ca325161 2012
ef887810
RS
2013 if (NILP (Vinhibit_changing_match_data))
2014 {
2015 start = search_regs.start[0];
2016 end = search_regs.end[0];
2017 }
2018 else
2019 /* If Vinhibit_changing_match_data is non-nil,
2020 search_regs will not be changed. So let's
2021 compute start and end here. */
2022 {
2023 start = BYTE_TO_CHAR (position);
2024 end = BYTE_TO_CHAR (position + len_byte);
2025 }
2026
facdc750
RS
2027 if ((n -= direction) != 0)
2028 cursor += dirlen; /* to resume search */
ca1d1d23 2029 else
ef887810 2030 return direction > 0 ? end : start;
ca1d1d23 2031 }
facdc750
RS
2032 else
2033 cursor += stride_for_teases; /* <sigh> we lose - */
ca1d1d23 2034 }
facdc750
RS
2035 pos_byte += cursor - p2;
2036 }
2037 else
f4646fff
AS
2038 /* Now we'll pick up a clump that has to be done the hard
2039 way because it covers a discontinuity. */
facdc750
RS
2040 {
2041 limit = ((direction > 0)
2042 ? BUFFER_CEILING_OF (pos_byte - dirlen + 1)
2043 : BUFFER_FLOOR_OF (pos_byte - dirlen - 1));
2044 limit = ((direction > 0)
2045 ? min (limit + len_byte, lim_byte - 1)
2046 : max (limit - len_byte, lim_byte));
2047 /* LIMIT is now the last value POS_BYTE can have
2048 and still be valid for a possible match. */
2049 while (1)
ca1d1d23 2050 {
f4646fff
AS
2051 /* This loop can be coded for space rather than
2052 speed because it will usually run only once.
2053 (the reach is at most len + 21, and typically
2054 does not exceed len). */
facdc750 2055 while ((limit - pos_byte) * direction >= 0)
f4646fff
AS
2056 {
2057 int ch = FETCH_BYTE (pos_byte);
2058 if (BM_tab[ch] == 0)
2059 goto hit2;
2060 pos_byte += BM_tab[ch];
2061 }
2062 break; /* ran off the end */
2063
2064 hit2:
2065 /* Found what might be a match. */
facdc750
RS
2066 i = dirlen - direction;
2067 while ((i -= direction) + direction != 0)
ca1d1d23 2068 {
facdc750
RS
2069 int ch;
2070 unsigned char *ptr;
2071 pos_byte -= direction;
2072 ptr = BYTE_POS_ADDR (pos_byte);
2073 /* Translate only the last byte of a character. */
2074 if (! multibyte
2075 || ((ptr == tail_end_ptr
2076 || CHAR_HEAD_P (ptr[1]))
2077 && (CHAR_HEAD_P (ptr[0])
0190922f
KH
2078 /* Check if this is the last byte of a
2079 translable character. */
2080 || (translate_prev_byte1 == ptr[-1]
2081 && (CHAR_HEAD_P (translate_prev_byte1)
2082 || (translate_prev_byte2 == ptr[-2]
2083 && (CHAR_HEAD_P (translate_prev_byte2)
2084 || translate_prev_byte3 == ptr[-3])))))))
facdc750
RS
2085 ch = simple_translate[*ptr];
2086 else
2087 ch = *ptr;
2088 if (pat[i] != ch)
2089 break;
2090 }
2091 /* Above loop has moved POS_BYTE part or all the way
2092 back to the first pos (last pos if reverse).
2093 Set it once again at the last (first if reverse) char. */
f4646fff 2094 pos_byte += dirlen - i - direction;
facdc750
RS
2095 if (i + direction == 0)
2096 {
e7deaab0 2097 EMACS_INT position, start, end;
facdc750 2098 pos_byte -= direction;
1113d9db 2099
facdc750 2100 position = pos_byte + ((direction > 0) ? 1 - len_byte : 0);
facdc750 2101 set_search_regs (position, len_byte);
ca325161 2102
ef887810
RS
2103 if (NILP (Vinhibit_changing_match_data))
2104 {
2105 start = search_regs.start[0];
2106 end = search_regs.end[0];
2107 }
2108 else
2109 /* If Vinhibit_changing_match_data is non-nil,
2110 search_regs will not be changed. So let's
2111 compute start and end here. */
2112 {
2113 start = BYTE_TO_CHAR (position);
2114 end = BYTE_TO_CHAR (position + len_byte);
2115 }
2116
facdc750
RS
2117 if ((n -= direction) != 0)
2118 pos_byte += dirlen; /* to resume search */
ca1d1d23 2119 else
ef887810 2120 return direction > 0 ? end : start;
ca1d1d23 2121 }
facdc750
RS
2122 else
2123 pos_byte += stride_for_teases;
2124 }
2125 }
2126 /* We have done one clump. Can we continue? */
2127 if ((lim_byte - pos_byte) * direction < 0)
2128 return ((0 - n) * direction);
ca1d1d23 2129 }
facdc750 2130 return BYTE_TO_CHAR (pos_byte);
ca1d1d23 2131}
ca325161 2132
fa8ed3e0 2133/* Record beginning BEG_BYTE and end BEG_BYTE + NBYTES
a7e4cdde
RS
2134 for the overall match just found in the current buffer.
2135 Also clear out the match data for registers 1 and up. */
ca325161
RS
2136
2137static void
fa8ed3e0 2138set_search_regs (beg_byte, nbytes)
e7deaab0 2139 EMACS_INT beg_byte, nbytes;
ca325161 2140{
a7e4cdde
RS
2141 int i;
2142
ef887810
RS
2143 if (!NILP (Vinhibit_changing_match_data))
2144 return;
2145
ca325161
RS
2146 /* Make sure we have registers in which to store
2147 the match position. */
2148 if (search_regs.num_regs == 0)
2149 {
2d4a771a
RS
2150 search_regs.start = (regoff_t *) xmalloc (2 * sizeof (regoff_t));
2151 search_regs.end = (regoff_t *) xmalloc (2 * sizeof (regoff_t));
487282dc 2152 search_regs.num_regs = 2;
ca325161
RS
2153 }
2154
a7e4cdde
RS
2155 /* Clear out the other registers. */
2156 for (i = 1; i < search_regs.num_regs; i++)
2157 {
2158 search_regs.start[i] = -1;
2159 search_regs.end[i] = -1;
2160 }
2161
fa8ed3e0
RS
2162 search_regs.start[0] = BYTE_TO_CHAR (beg_byte);
2163 search_regs.end[0] = BYTE_TO_CHAR (beg_byte + nbytes);
a3668d92 2164 XSETBUFFER (last_thing_searched, current_buffer);
ca325161 2165}
ca1d1d23 2166\f
c6e0e1cc
CY
2167/* Given STRING, a string of words separated by word delimiters,
2168 compute a regexp that matches those exact words separated by
2169 arbitrary punctuation. If LAX is nonzero, the end of the string
2170 need not match a word boundary unless it ends in whitespace. */
ca1d1d23
JB
2171
2172static Lisp_Object
c6e0e1cc 2173wordify (string, lax)
ca1d1d23 2174 Lisp_Object string;
c6e0e1cc 2175 int lax;
ca1d1d23
JB
2176{
2177 register unsigned char *p, *o;
0c8533c6 2178 register int i, i_byte, len, punct_count = 0, word_count = 0;
ca1d1d23 2179 Lisp_Object val;
0c8533c6 2180 int prev_c = 0;
c6e0e1cc 2181 int adjust, whitespace_at_end;
ca1d1d23 2182
b7826503 2183 CHECK_STRING (string);
d5db4077
KR
2184 p = SDATA (string);
2185 len = SCHARS (string);
ca1d1d23 2186
0c8533c6
RS
2187 for (i = 0, i_byte = 0; i < len; )
2188 {
2189 int c;
177c0ea7 2190
93daa011 2191 FETCH_STRING_CHAR_AS_MULTIBYTE_ADVANCE (c, string, i, i_byte);
0c8533c6
RS
2192
2193 if (SYNTAX (c) != Sword)
2194 {
2195 punct_count++;
2196 if (i > 0 && SYNTAX (prev_c) == Sword)
2197 word_count++;
2198 }
ca1d1d23 2199
0c8533c6
RS
2200 prev_c = c;
2201 }
2202
2203 if (SYNTAX (prev_c) == Sword)
c6e0e1cc
CY
2204 {
2205 word_count++;
2206 whitespace_at_end = 0;
2207 }
2208 else
2209 whitespace_at_end = 1;
2210
0c8533c6 2211 if (!word_count)
c60416e0 2212 return empty_unibyte_string;
0c8533c6 2213
c6e0e1cc
CY
2214 adjust = - punct_count + 5 * (word_count - 1)
2215 + ((lax && !whitespace_at_end) ? 2 : 4);
8a2df937
RS
2216 if (STRING_MULTIBYTE (string))
2217 val = make_uninit_multibyte_string (len + adjust,
d5db4077 2218 SBYTES (string)
8a2df937
RS
2219 + adjust);
2220 else
2221 val = make_uninit_string (len + adjust);
ca1d1d23 2222
d5db4077 2223 o = SDATA (val);
ca1d1d23
JB
2224 *o++ = '\\';
2225 *o++ = 'b';
1e9582d4 2226 prev_c = 0;
ca1d1d23 2227
1e9582d4
RS
2228 for (i = 0, i_byte = 0; i < len; )
2229 {
2230 int c;
2231 int i_byte_orig = i_byte;
177c0ea7 2232
93daa011 2233 FETCH_STRING_CHAR_AS_MULTIBYTE_ADVANCE (c, string, i, i_byte);
1e9582d4
RS
2234
2235 if (SYNTAX (c) == Sword)
2236 {
5d69fe10 2237 bcopy (SDATA (string) + i_byte_orig, o,
1e9582d4
RS
2238 i_byte - i_byte_orig);
2239 o += i_byte - i_byte_orig;
2240 }
2241 else if (i > 0 && SYNTAX (prev_c) == Sword && --word_count)
2242 {
2243 *o++ = '\\';
2244 *o++ = 'W';
2245 *o++ = '\\';
2246 *o++ = 'W';
2247 *o++ = '*';
2248 }
2249
2250 prev_c = c;
2251 }
ca1d1d23 2252
c6e0e1cc
CY
2253 if (!lax || whitespace_at_end)
2254 {
2255 *o++ = '\\';
2256 *o++ = 'b';
2257 }
ca1d1d23
JB
2258
2259 return val;
2260}
2261\f
2262DEFUN ("search-backward", Fsearch_backward, Ssearch_backward, 1, 4,
8c1a1077
PJ
2263 "MSearch backward: ",
2264 doc: /* Search backward from point for STRING.
2265Set point to the beginning of the occurrence found, and return point.
2266An optional second argument bounds the search; it is a buffer position.
2267The match found must not extend before that position.
2268Optional third argument, if t, means if fail just return nil (no error).
2269 If not nil and not t, position at limit of search and return nil.
2270Optional fourth argument is repeat count--search for successive occurrences.
2271
2272Search case-sensitivity is determined by the value of the variable
2273`case-fold-search', which see.
2274
2275See also the functions `match-beginning', `match-end' and `replace-match'. */)
2276 (string, bound, noerror, count)
ca1d1d23
JB
2277 Lisp_Object string, bound, noerror, count;
2278{
b819a390 2279 return search_command (string, bound, noerror, count, -1, 0, 0);
ca1d1d23
JB
2280}
2281
6af43974 2282DEFUN ("search-forward", Fsearch_forward, Ssearch_forward, 1, 4, "MSearch: ",
8c1a1077
PJ
2283 doc: /* Search forward from point for STRING.
2284Set point to the end of the occurrence found, and return point.
2285An optional second argument bounds the search; it is a buffer position.
d6aacdcd 2286The match found must not extend after that position. A value of nil is
48740f01 2287 equivalent to (point-max).
8c1a1077
PJ
2288Optional third argument, if t, means if fail just return nil (no error).
2289 If not nil and not t, move to limit of search and return nil.
2290Optional fourth argument is repeat count--search for successive occurrences.
2291
2292Search case-sensitivity is determined by the value of the variable
2293`case-fold-search', which see.
2294
2295See also the functions `match-beginning', `match-end' and `replace-match'. */)
2296 (string, bound, noerror, count)
ca1d1d23
JB
2297 Lisp_Object string, bound, noerror, count;
2298{
b819a390 2299 return search_command (string, bound, noerror, count, 1, 0, 0);
ca1d1d23
JB
2300}
2301
2302DEFUN ("word-search-backward", Fword_search_backward, Sword_search_backward, 1, 4,
8c1a1077
PJ
2303 "sWord search backward: ",
2304 doc: /* Search backward from point for STRING, ignoring differences in punctuation.
2305Set point to the beginning of the occurrence found, and return point.
2306An optional second argument bounds the search; it is a buffer position.
2307The match found must not extend before that position.
2308Optional third argument, if t, means if fail just return nil (no error).
2309 If not nil and not t, move to limit of search and return nil.
2310Optional fourth argument is repeat count--search for successive occurrences. */)
2311 (string, bound, noerror, count)
ca1d1d23
JB
2312 Lisp_Object string, bound, noerror, count;
2313{
c6e0e1cc 2314 return search_command (wordify (string, 0), bound, noerror, count, -1, 1, 0);
ca1d1d23
JB
2315}
2316
2317DEFUN ("word-search-forward", Fword_search_forward, Sword_search_forward, 1, 4,
8c1a1077
PJ
2318 "sWord search: ",
2319 doc: /* Search forward from point for STRING, ignoring differences in punctuation.
2320Set point to the end of the occurrence found, and return point.
2321An optional second argument bounds the search; it is a buffer position.
2322The match found must not extend after that position.
2323Optional third argument, if t, means if fail just return nil (no error).
2324 If not nil and not t, move to limit of search and return nil.
2325Optional fourth argument is repeat count--search for successive occurrences. */)
2326 (string, bound, noerror, count)
ca1d1d23
JB
2327 Lisp_Object string, bound, noerror, count;
2328{
c6e0e1cc
CY
2329 return search_command (wordify (string, 0), bound, noerror, count, 1, 1, 0);
2330}
2331
2332DEFUN ("word-search-backward-lax", Fword_search_backward_lax, Sword_search_backward_lax, 1, 4,
2333 "sWord search backward: ",
2334 doc: /* Search backward from point for STRING, ignoring differences in punctuation.
2335Set point to the beginning of the occurrence found, and return point.
2336
2337Unlike `word-search-backward', the end of STRING need not match a word
2338boundary unless it ends in whitespace.
2339
2340An optional second argument bounds the search; it is a buffer position.
2341The match found must not extend before that position.
2342Optional third argument, if t, means if fail just return nil (no error).
2343 If not nil and not t, move to limit of search and return nil.
2344Optional fourth argument is repeat count--search for successive occurrences. */)
2345 (string, bound, noerror, count)
2346 Lisp_Object string, bound, noerror, count;
2347{
2348 return search_command (wordify (string, 1), bound, noerror, count, -1, 1, 0);
2349}
2350
2351DEFUN ("word-search-forward-lax", Fword_search_forward_lax, Sword_search_forward_lax, 1, 4,
2352 "sWord search: ",
2353 doc: /* Search forward from point for STRING, ignoring differences in punctuation.
2354Set point to the end of the occurrence found, and return point.
2355
2356Unlike `word-search-forward', the end of STRING need not match a word
2357boundary unless it ends in whitespace.
2358
2359An optional second argument bounds the search; it is a buffer position.
2360The match found must not extend after that position.
2361Optional third argument, if t, means if fail just return nil (no error).
2362 If not nil and not t, move to limit of search and return nil.
2363Optional fourth argument is repeat count--search for successive occurrences. */)
2364 (string, bound, noerror, count)
2365 Lisp_Object string, bound, noerror, count;
2366{
2367 return search_command (wordify (string, 1), bound, noerror, count, 1, 1, 0);
ca1d1d23
JB
2368}
2369
2370DEFUN ("re-search-backward", Fre_search_backward, Sre_search_backward, 1, 4,
8c1a1077
PJ
2371 "sRE search backward: ",
2372 doc: /* Search backward from point for match for regular expression REGEXP.
2373Set point to the beginning of the match, and return point.
2374The match found is the one starting last in the buffer
2375and yet ending before the origin of the search.
2376An optional second argument bounds the search; it is a buffer position.
2377The match found must start at or after that position.
2378Optional third argument, if t, means if fail just return nil (no error).
2379 If not nil and not t, move to limit of search and return nil.
2380Optional fourth argument is repeat count--search for successive occurrences.
2381See also the functions `match-beginning', `match-end', `match-string',
2382and `replace-match'. */)
2383 (regexp, bound, noerror, count)
19c0a730 2384 Lisp_Object regexp, bound, noerror, count;
ca1d1d23 2385{
b819a390 2386 return search_command (regexp, bound, noerror, count, -1, 1, 0);
ca1d1d23
JB
2387}
2388
2389DEFUN ("re-search-forward", Fre_search_forward, Sre_search_forward, 1, 4,
8c1a1077
PJ
2390 "sRE search: ",
2391 doc: /* Search forward from point for regular expression REGEXP.
2392Set point to the end of the occurrence found, and return point.
2393An optional second argument bounds the search; it is a buffer position.
2394The match found must not extend after that position.
2395Optional third argument, if t, means if fail just return nil (no error).
2396 If not nil and not t, move to limit of search and return nil.
2397Optional fourth argument is repeat count--search for successive occurrences.
2398See also the functions `match-beginning', `match-end', `match-string',
2399and `replace-match'. */)
2400 (regexp, bound, noerror, count)
19c0a730 2401 Lisp_Object regexp, bound, noerror, count;
ca1d1d23 2402{
b819a390
RS
2403 return search_command (regexp, bound, noerror, count, 1, 1, 0);
2404}
2405
2406DEFUN ("posix-search-backward", Fposix_search_backward, Sposix_search_backward, 1, 4,
8c1a1077
PJ
2407 "sPosix search backward: ",
2408 doc: /* Search backward from point for match for regular expression REGEXP.
2409Find the longest match in accord with Posix regular expression rules.
2410Set point to the beginning of the match, and return point.
2411The match found is the one starting last in the buffer
2412and yet ending before the origin of the search.
2413An optional second argument bounds the search; it is a buffer position.
2414The match found must start at or after that position.
2415Optional third argument, if t, means if fail just return nil (no error).
2416 If not nil and not t, move to limit of search and return nil.
2417Optional fourth argument is repeat count--search for successive occurrences.
2418See also the functions `match-beginning', `match-end', `match-string',
2419and `replace-match'. */)
2420 (regexp, bound, noerror, count)
b819a390
RS
2421 Lisp_Object regexp, bound, noerror, count;
2422{
2423 return search_command (regexp, bound, noerror, count, -1, 1, 1);
2424}
2425
2426DEFUN ("posix-search-forward", Fposix_search_forward, Sposix_search_forward, 1, 4,
8c1a1077
PJ
2427 "sPosix search: ",
2428 doc: /* Search forward from point for regular expression REGEXP.
2429Find the longest match in accord with Posix regular expression rules.
2430Set point to the end of the occurrence found, and return point.
2431An optional second argument bounds the search; it is a buffer position.
2432The match found must not extend after that position.
2433Optional third argument, if t, means if fail just return nil (no error).
2434 If not nil and not t, move to limit of search and return nil.
2435Optional fourth argument is repeat count--search for successive occurrences.
2436See also the functions `match-beginning', `match-end', `match-string',
2437and `replace-match'. */)
2438 (regexp, bound, noerror, count)
b819a390
RS
2439 Lisp_Object regexp, bound, noerror, count;
2440{
2441 return search_command (regexp, bound, noerror, count, 1, 1, 1);
ca1d1d23
JB
2442}
2443\f
d7a5ad5f 2444DEFUN ("replace-match", Freplace_match, Sreplace_match, 1, 5, 0,
8c1a1077 2445 doc: /* Replace text matched by last search with NEWTEXT.
4dd0c271
RS
2446Leave point at the end of the replacement text.
2447
8c1a1077
PJ
2448If second arg FIXEDCASE is non-nil, do not alter case of replacement text.
2449Otherwise maybe capitalize the whole text, or maybe just word initials,
2450based on the replaced text.
2451If the replaced text has only capital letters
2452and has at least one multiletter word, convert NEWTEXT to all caps.
4dd0c271
RS
2453Otherwise if all words are capitalized in the replaced text,
2454capitalize each word in NEWTEXT.
8c1a1077
PJ
2455
2456If third arg LITERAL is non-nil, insert NEWTEXT literally.
2457Otherwise treat `\\' as special:
2458 `\\&' in NEWTEXT means substitute original matched text.
2459 `\\N' means substitute what matched the Nth `\\(...\\)'.
2460 If Nth parens didn't match, substitute nothing.
2461 `\\\\' means insert one `\\'.
4dd0c271
RS
2462Case conversion does not apply to these substitutions.
2463
8c1a1077 2464FIXEDCASE and LITERAL are optional arguments.
8c1a1077
PJ
2465
2466The optional fourth argument STRING can be a string to modify.
2467This is meaningful when the previous match was done against STRING,
2468using `string-match'. When used this way, `replace-match'
2469creates and returns a new string made by copying STRING and replacing
2470the part of STRING that was matched.
2471
2472The optional fifth argument SUBEXP specifies a subexpression;
2473it says to replace just that subexpression with NEWTEXT,
2474rather than replacing the entire matched text.
2475This is, in a vague sense, the inverse of using `\\N' in NEWTEXT;
2476`\\N' copies subexp N into NEWTEXT, but using N as SUBEXP puts
2477NEWTEXT in place of subexp N.
2478This is useful only after a regular expression search or match,
2479since only regular expressions have distinguished subexpressions. */)
2480 (newtext, fixedcase, literal, string, subexp)
d7a5ad5f 2481 Lisp_Object newtext, fixedcase, literal, string, subexp;
ca1d1d23
JB
2482{
2483 enum { nochange, all_caps, cap_initial } case_action;
ac3b28b1 2484 register int pos, pos_byte;
ca1d1d23 2485 int some_multiletter_word;
97832bd0 2486 int some_lowercase;
73dc8771 2487 int some_uppercase;
208767c3 2488 int some_nonuppercase_initial;
ca1d1d23 2489 register int c, prevc;
d7a5ad5f 2490 int sub;
e7deaab0 2491 EMACS_INT opoint, newpoint;
ca1d1d23 2492
b7826503 2493 CHECK_STRING (newtext);
ca1d1d23 2494
080c45fd 2495 if (! NILP (string))
b7826503 2496 CHECK_STRING (string);
080c45fd 2497
ca1d1d23
JB
2498 case_action = nochange; /* We tried an initialization */
2499 /* but some C compilers blew it */
4746118a
JB
2500
2501 if (search_regs.num_regs <= 0)
d72cdbfc 2502 error ("`replace-match' called before any match found");
4746118a 2503
d7a5ad5f
RS
2504 if (NILP (subexp))
2505 sub = 0;
2506 else
2507 {
b7826503 2508 CHECK_NUMBER (subexp);
d7a5ad5f
RS
2509 sub = XINT (subexp);
2510 if (sub < 0 || sub >= search_regs.num_regs)
2511 args_out_of_range (subexp, make_number (search_regs.num_regs));
2512 }
2513
080c45fd
RS
2514 if (NILP (string))
2515 {
d7a5ad5f
RS
2516 if (search_regs.start[sub] < BEGV
2517 || search_regs.start[sub] > search_regs.end[sub]
2518 || search_regs.end[sub] > ZV)
2519 args_out_of_range (make_number (search_regs.start[sub]),
2520 make_number (search_regs.end[sub]));
080c45fd
RS
2521 }
2522 else
2523 {
d7a5ad5f
RS
2524 if (search_regs.start[sub] < 0
2525 || search_regs.start[sub] > search_regs.end[sub]
d5db4077 2526 || search_regs.end[sub] > SCHARS (string))
d7a5ad5f
RS
2527 args_out_of_range (make_number (search_regs.start[sub]),
2528 make_number (search_regs.end[sub]));
080c45fd 2529 }
ca1d1d23
JB
2530
2531 if (NILP (fixedcase))
2532 {
2533 /* Decide how to casify by examining the matched text. */
e7deaab0 2534 EMACS_INT last;
ca1d1d23 2535
ac3b28b1
KH
2536 pos = search_regs.start[sub];
2537 last = search_regs.end[sub];
fa8ed3e0
RS
2538
2539 if (NILP (string))
ac3b28b1 2540 pos_byte = CHAR_TO_BYTE (pos);
fa8ed3e0 2541 else
ac3b28b1 2542 pos_byte = string_char_to_byte (string, pos);
fa8ed3e0 2543
ca1d1d23
JB
2544 prevc = '\n';
2545 case_action = all_caps;
2546
2547 /* some_multiletter_word is set nonzero if any original word
2548 is more than one letter long. */
2549 some_multiletter_word = 0;
97832bd0 2550 some_lowercase = 0;
208767c3 2551 some_nonuppercase_initial = 0;
73dc8771 2552 some_uppercase = 0;
ca1d1d23 2553
ac3b28b1 2554 while (pos < last)
ca1d1d23 2555 {
080c45fd 2556 if (NILP (string))
ac3b28b1 2557 {
93daa011 2558 c = FETCH_CHAR_AS_MULTIBYTE (pos_byte);
ac3b28b1
KH
2559 INC_BOTH (pos, pos_byte);
2560 }
080c45fd 2561 else
93daa011 2562 FETCH_STRING_CHAR_AS_MULTIBYTE_ADVANCE (c, string, pos, pos_byte);
080c45fd 2563
ca1d1d23
JB
2564 if (LOWERCASEP (c))
2565 {
2566 /* Cannot be all caps if any original char is lower case */
2567
97832bd0 2568 some_lowercase = 1;
ca1d1d23 2569 if (SYNTAX (prevc) != Sword)
208767c3 2570 some_nonuppercase_initial = 1;
ca1d1d23
JB
2571 else
2572 some_multiletter_word = 1;
2573 }
d16c2b66 2574 else if (UPPERCASEP (c))
ca1d1d23 2575 {
73dc8771 2576 some_uppercase = 1;
97832bd0 2577 if (SYNTAX (prevc) != Sword)
c4d460ce 2578 ;
97832bd0 2579 else
ca1d1d23
JB
2580 some_multiletter_word = 1;
2581 }
208767c3
RS
2582 else
2583 {
2584 /* If the initial is a caseless word constituent,
2585 treat that like a lowercase initial. */
2586 if (SYNTAX (prevc) != Sword)
2587 some_nonuppercase_initial = 1;
2588 }
ca1d1d23
JB
2589
2590 prevc = c;
2591 }
2592
97832bd0
RS
2593 /* Convert to all caps if the old text is all caps
2594 and has at least one multiletter word. */
2595 if (! some_lowercase && some_multiletter_word)
2596 case_action = all_caps;
c4d460ce 2597 /* Capitalize each word, if the old text has all capitalized words. */
208767c3 2598 else if (!some_nonuppercase_initial && some_multiletter_word)
ca1d1d23 2599 case_action = cap_initial;
208767c3 2600 else if (!some_nonuppercase_initial && some_uppercase)
73dc8771
KH
2601 /* Should x -> yz, operating on X, give Yz or YZ?
2602 We'll assume the latter. */
2603 case_action = all_caps;
97832bd0
RS
2604 else
2605 case_action = nochange;
ca1d1d23
JB
2606 }
2607
080c45fd
RS
2608 /* Do replacement in a string. */
2609 if (!NILP (string))
2610 {
2611 Lisp_Object before, after;
2612
2613 before = Fsubstring (string, make_number (0),
d7a5ad5f
RS
2614 make_number (search_regs.start[sub]));
2615 after = Fsubstring (string, make_number (search_regs.end[sub]), Qnil);
080c45fd 2616
636a5e28
RS
2617 /* Substitute parts of the match into NEWTEXT
2618 if desired. */
080c45fd
RS
2619 if (NILP (literal))
2620 {
e7deaab0
AS
2621 EMACS_INT lastpos = 0;
2622 EMACS_INT lastpos_byte = 0;
080c45fd
RS
2623 /* We build up the substituted string in ACCUM. */
2624 Lisp_Object accum;
2625 Lisp_Object middle;
d5db4077 2626 int length = SBYTES (newtext);
080c45fd
RS
2627
2628 accum = Qnil;
2629
ac3b28b1 2630 for (pos_byte = 0, pos = 0; pos_byte < length;)
080c45fd
RS
2631 {
2632 int substart = -1;
6bbd7a29 2633 int subend = 0;
1e79ec24 2634 int delbackslash = 0;
080c45fd 2635
0c8533c6
RS
2636 FETCH_STRING_CHAR_ADVANCE (c, newtext, pos, pos_byte);
2637
080c45fd
RS
2638 if (c == '\\')
2639 {
0c8533c6 2640 FETCH_STRING_CHAR_ADVANCE (c, newtext, pos, pos_byte);
177c0ea7 2641
080c45fd
RS
2642 if (c == '&')
2643 {
d7a5ad5f
RS
2644 substart = search_regs.start[sub];
2645 subend = search_regs.end[sub];
080c45fd 2646 }
76fc1ea2 2647 else if (c >= '1' && c <= '9')
080c45fd 2648 {
76fc1ea2
KH
2649 if (search_regs.start[c - '0'] >= 0
2650 && c <= search_regs.num_regs + '0')
080c45fd
RS
2651 {
2652 substart = search_regs.start[c - '0'];
2653 subend = search_regs.end[c - '0'];
2654 }
76fc1ea2
KH
2655 else
2656 {
2657 /* If that subexp did not match,
2658 replace \\N with nothing. */
2659 substart = 0;
2660 subend = 0;
2661 }
080c45fd 2662 }
1e79ec24
KH
2663 else if (c == '\\')
2664 delbackslash = 1;
636a5e28
RS
2665 else
2666 error ("Invalid use of `\\' in replacement text");
080c45fd
RS
2667 }
2668 if (substart >= 0)
2669 {
d131e79c
RS
2670 if (pos - 2 != lastpos)
2671 middle = substring_both (newtext, lastpos,
2672 lastpos_byte,
2673 pos - 2, pos_byte - 2);
080c45fd
RS
2674 else
2675 middle = Qnil;
2676 accum = concat3 (accum, middle,
0c8533c6
RS
2677 Fsubstring (string,
2678 make_number (substart),
080c45fd
RS
2679 make_number (subend)));
2680 lastpos = pos;
0c8533c6 2681 lastpos_byte = pos_byte;
080c45fd 2682 }
1e79ec24
KH
2683 else if (delbackslash)
2684 {
d131e79c
RS
2685 middle = substring_both (newtext, lastpos,
2686 lastpos_byte,
2687 pos - 1, pos_byte - 1);
0c8533c6 2688
1e79ec24
KH
2689 accum = concat2 (accum, middle);
2690 lastpos = pos;
0c8533c6 2691 lastpos_byte = pos_byte;
1e79ec24 2692 }
080c45fd
RS
2693 }
2694
d131e79c
RS
2695 if (pos != lastpos)
2696 middle = substring_both (newtext, lastpos,
2697 lastpos_byte,
0c8533c6 2698 pos, pos_byte);
080c45fd
RS
2699 else
2700 middle = Qnil;
2701
2702 newtext = concat2 (accum, middle);
2703 }
2704
636a5e28 2705 /* Do case substitution in NEWTEXT if desired. */
080c45fd
RS
2706 if (case_action == all_caps)
2707 newtext = Fupcase (newtext);
2708 else if (case_action == cap_initial)
2b2eead9 2709 newtext = Fupcase_initials (newtext);
080c45fd
RS
2710
2711 return concat3 (before, newtext, after);
2712 }
2713
09c4719e 2714 /* Record point, then move (quietly) to the start of the match. */
9160906f 2715 if (PT >= search_regs.end[sub])
b0eba991 2716 opoint = PT - ZV;
9160906f
RS
2717 else if (PT > search_regs.start[sub])
2718 opoint = search_regs.end[sub] - ZV;
b0eba991
RS
2719 else
2720 opoint = PT;
2721
886ed6ec
RS
2722 /* If we want non-literal replacement,
2723 perform substitution on the replacement string. */
2724 if (NILP (literal))
ca1d1d23 2725 {
d5db4077 2726 int length = SBYTES (newtext);
68e69fbd
RS
2727 unsigned char *substed;
2728 int substed_alloc_size, substed_len;
3bc25e52
KH
2729 int buf_multibyte = !NILP (current_buffer->enable_multibyte_characters);
2730 int str_multibyte = STRING_MULTIBYTE (newtext);
2731 Lisp_Object rev_tbl;
886ed6ec 2732 int really_changed = 0;
3bc25e52 2733
8f924df7 2734 rev_tbl = Qnil;
ac3b28b1 2735
68e69fbd
RS
2736 substed_alloc_size = length * 2 + 100;
2737 substed = (unsigned char *) xmalloc (substed_alloc_size + 1);
2738 substed_len = 0;
2739
3bc25e52
KH
2740 /* Go thru NEWTEXT, producing the actual text to insert in
2741 SUBSTED while adjusting multibyteness to that of the current
2742 buffer. */
ca1d1d23 2743
ac3b28b1 2744 for (pos_byte = 0, pos = 0; pos_byte < length;)
ca1d1d23 2745 {
68e69fbd 2746 unsigned char str[MAX_MULTIBYTE_LENGTH];
f8ce8a0d
GM
2747 unsigned char *add_stuff = NULL;
2748 int add_len = 0;
68e69fbd 2749 int idx = -1;
9a76659d 2750
3bc25e52
KH
2751 if (str_multibyte)
2752 {
eb99a8dd 2753 FETCH_STRING_CHAR_ADVANCE_NO_CHECK (c, newtext, pos, pos_byte);
3bc25e52
KH
2754 if (!buf_multibyte)
2755 c = multibyte_char_to_unibyte (c, rev_tbl);
2756 }
2757 else
2758 {
2759 /* Note that we don't have to increment POS. */
5d69fe10 2760 c = SREF (newtext, pos_byte++);
3bc25e52
KH
2761 if (buf_multibyte)
2762 c = unibyte_char_to_multibyte (c);
2763 }
ac3b28b1 2764
68e69fbd
RS
2765 /* Either set ADD_STUFF and ADD_LEN to the text to put in SUBSTED,
2766 or set IDX to a match index, which means put that part
2767 of the buffer text into SUBSTED. */
2768
ca1d1d23
JB
2769 if (c == '\\')
2770 {
886ed6ec
RS
2771 really_changed = 1;
2772
3bc25e52
KH
2773 if (str_multibyte)
2774 {
eb99a8dd
KH
2775 FETCH_STRING_CHAR_ADVANCE_NO_CHECK (c, newtext,
2776 pos, pos_byte);
071ce769 2777 if (!buf_multibyte && !ASCII_CHAR_P (c))
3bc25e52
KH
2778 c = multibyte_char_to_unibyte (c, rev_tbl);
2779 }
2780 else
2781 {
d5db4077 2782 c = SREF (newtext, pos_byte++);
3bc25e52
KH
2783 if (buf_multibyte)
2784 c = unibyte_char_to_multibyte (c);
2785 }
2786
ca1d1d23 2787 if (c == '&')
68e69fbd 2788 idx = sub;
78445046 2789 else if (c >= '1' && c <= '9' && c <= search_regs.num_regs + '0')
ca1d1d23
JB
2790 {
2791 if (search_regs.start[c - '0'] >= 1)
68e69fbd 2792 idx = c - '0';
ca1d1d23 2793 }
636a5e28 2794 else if (c == '\\')
68e69fbd 2795 add_len = 1, add_stuff = "\\";
636a5e28 2796 else
3bc25e52
KH
2797 {
2798 xfree (substed);
2799 error ("Invalid use of `\\' in replacement text");
2800 }
ca1d1d23
JB
2801 }
2802 else
68e69fbd
RS
2803 {
2804 add_len = CHAR_STRING (c, str);
2805 add_stuff = str;
2806 }
2807
2808 /* If we want to copy part of a previous match,
2809 set up ADD_STUFF and ADD_LEN to point to it. */
2810 if (idx >= 0)
2811 {
e7deaab0 2812 EMACS_INT begbyte = CHAR_TO_BYTE (search_regs.start[idx]);
68e69fbd
RS
2813 add_len = CHAR_TO_BYTE (search_regs.end[idx]) - begbyte;
2814 if (search_regs.start[idx] < GPT && GPT < search_regs.end[idx])
2815 move_gap (search_regs.start[idx]);
2816 add_stuff = BYTE_POS_ADDR (begbyte);
2817 }
2818
2819 /* Now the stuff we want to add to SUBSTED
2820 is invariably ADD_LEN bytes starting at ADD_STUFF. */
2821
2822 /* Make sure SUBSTED is big enough. */
2823 if (substed_len + add_len >= substed_alloc_size)
2824 {
2825 substed_alloc_size = substed_len + add_len + 500;
2826 substed = (unsigned char *) xrealloc (substed,
2827 substed_alloc_size + 1);
2828 }
2829
2830 /* Now add to the end of SUBSTED. */
f8ce8a0d
GM
2831 if (add_stuff)
2832 {
2833 bcopy (add_stuff, substed + substed_len, add_len);
2834 substed_len += add_len;
2835 }
ca1d1d23 2836 }
68e69fbd 2837
886ed6ec 2838 if (really_changed)
76fc1ea2
KH
2839 {
2840 if (buf_multibyte)
2841 {
2842 int nchars = multibyte_chars_in_text (substed, substed_len);
68e69fbd 2843
76fc1ea2
KH
2844 newtext = make_multibyte_string (substed, nchars, substed_len);
2845 }
2846 else
2847 newtext = make_unibyte_string (substed, substed_len);
2848 }
68e69fbd 2849 xfree (substed);
ca1d1d23
JB
2850 }
2851
886ed6ec
RS
2852 /* Replace the old text with the new in the cleanest possible way. */
2853 replace_range (search_regs.start[sub], search_regs.end[sub],
2854 newtext, 1, 0, 1);
d5db4077 2855 newpoint = search_regs.start[sub] + SCHARS (newtext);
ca1d1d23
JB
2856
2857 if (case_action == all_caps)
886ed6ec
RS
2858 Fupcase_region (make_number (search_regs.start[sub]),
2859 make_number (newpoint));
ca1d1d23 2860 else if (case_action == cap_initial)
886ed6ec
RS
2861 Fupcase_initials_region (make_number (search_regs.start[sub]),
2862 make_number (newpoint));
3e18eecf 2863
98e942e0
RS
2864 /* Adjust search data for this change. */
2865 {
e7deaab0
AS
2866 EMACS_INT oldend = search_regs.end[sub];
2867 EMACS_INT oldstart = search_regs.start[sub];
2868 EMACS_INT change = newpoint - search_regs.end[sub];
98e942e0
RS
2869 int i;
2870
2871 for (i = 0; i < search_regs.num_regs; i++)
2872 {
41c01205 2873 if (search_regs.start[i] >= oldend)
98e942e0 2874 search_regs.start[i] += change;
41c01205
DK
2875 else if (search_regs.start[i] > oldstart)
2876 search_regs.start[i] = oldstart;
2877 if (search_regs.end[i] >= oldend)
98e942e0 2878 search_regs.end[i] += change;
41c01205
DK
2879 else if (search_regs.end[i] > oldstart)
2880 search_regs.end[i] = oldstart;
98e942e0
RS
2881 }
2882 }
2883
b0eba991 2884 /* Put point back where it was in the text. */
8d808a65 2885 if (opoint <= 0)
fa8ed3e0 2886 TEMP_SET_PT (opoint + ZV);
b0eba991 2887 else
fa8ed3e0 2888 TEMP_SET_PT (opoint);
b0eba991
RS
2889
2890 /* Now move point "officially" to the start of the inserted replacement. */
3e18eecf 2891 move_if_not_intangible (newpoint);
177c0ea7 2892
ca1d1d23
JB
2893 return Qnil;
2894}
2895\f
2896static Lisp_Object
2897match_limit (num, beginningp)
2898 Lisp_Object num;
2899 int beginningp;
2900{
2901 register int n;
2902
b7826503 2903 CHECK_NUMBER (num);
ca1d1d23 2904 n = XINT (num);
f90a5bf5 2905 if (n < 0)
bd2cbd56 2906 args_out_of_range (num, make_number (0));
f90a5bf5
RS
2907 if (search_regs.num_regs <= 0)
2908 error ("No match data, because no search succeeded");
9b9ceb61 2909 if (n >= search_regs.num_regs
4746118a 2910 || search_regs.start[n] < 0)
ca1d1d23
JB
2911 return Qnil;
2912 return (make_number ((beginningp) ? search_regs.start[n]
2913 : search_regs.end[n]));
2914}
2915
2916DEFUN ("match-beginning", Fmatch_beginning, Smatch_beginning, 1, 1, 0,
8c1a1077
PJ
2917 doc: /* Return position of start of text matched by last search.
2918SUBEXP, a number, specifies which parenthesized expression in the last
2919 regexp.
2920Value is nil if SUBEXPth pair didn't match, or there were less than
2921 SUBEXP pairs.
2922Zero means the entire text matched by the whole regexp or whole string. */)
2923 (subexp)
5806161b 2924 Lisp_Object subexp;
ca1d1d23 2925{
5806161b 2926 return match_limit (subexp, 1);
ca1d1d23
JB
2927}
2928
2929DEFUN ("match-end", Fmatch_end, Smatch_end, 1, 1, 0,
8c1a1077
PJ
2930 doc: /* Return position of end of text matched by last search.
2931SUBEXP, a number, specifies which parenthesized expression in the last
2932 regexp.
2933Value is nil if SUBEXPth pair didn't match, or there were less than
2934 SUBEXP pairs.
2935Zero means the entire text matched by the whole regexp or whole string. */)
2936 (subexp)
5806161b 2937 Lisp_Object subexp;
ca1d1d23 2938{
5806161b 2939 return match_limit (subexp, 0);
177c0ea7 2940}
ca1d1d23 2941
abd0071c 2942DEFUN ("match-data", Fmatch_data, Smatch_data, 0, 3, 0,
8c1a1077
PJ
2943 doc: /* Return a list containing all info on what the last search matched.
2944Element 2N is `(match-beginning N)'; element 2N + 1 is `(match-end N)'.
2945All the elements are markers or nil (nil if the Nth pair didn't match)
2946if the last match was on a buffer; integers or nil if a string was matched.
febb8aba 2947Use `set-match-data' to reinstate the data in this list.
8c1a1077 2948
41c01205
DK
2949If INTEGERS (the optional first argument) is non-nil, always use
2950integers \(rather than markers) to represent buffer positions. In
2951this case, and if the last match was in a buffer, the buffer will get
2952stored as one additional element at the end of the list.
2953
abd0071c
KS
2954If REUSE is a list, reuse it as part of the value. If REUSE is long
2955enough to hold all the values, and if INTEGERS is non-nil, no consing
2956is done.
2957
2958If optional third arg RESEAT is non-nil, any previous markers on the
2959REUSE list will be modified to point to nowhere.
2960
140a6b7e 2961Return value is undefined if the last search failed. */)
abd0071c
KS
2962 (integers, reuse, reseat)
2963 Lisp_Object integers, reuse, reseat;
ca1d1d23 2964{
56256c2a 2965 Lisp_Object tail, prev;
4746118a 2966 Lisp_Object *data;
ca1d1d23
JB
2967 int i, len;
2968
abd0071c
KS
2969 if (!NILP (reseat))
2970 for (tail = reuse; CONSP (tail); tail = XCDR (tail))
2971 if (MARKERP (XCAR (tail)))
2972 {
51f10faa 2973 unchain_marker (XMARKER (XCAR (tail)));
abd0071c
KS
2974 XSETCAR (tail, Qnil);
2975 }
2976
daa37602 2977 if (NILP (last_thing_searched))
c36bcf1b 2978 return Qnil;
daa37602 2979
6bbd7a29
GM
2980 prev = Qnil;
2981
41c01205 2982 data = (Lisp_Object *) alloca ((2 * search_regs.num_regs + 1)
4746118a
JB
2983 * sizeof (Lisp_Object));
2984
41c01205 2985 len = 0;
4746118a 2986 for (i = 0; i < search_regs.num_regs; i++)
ca1d1d23
JB
2987 {
2988 int start = search_regs.start[i];
2989 if (start >= 0)
2990 {
56256c2a
RS
2991 if (EQ (last_thing_searched, Qt)
2992 || ! NILP (integers))
ca1d1d23 2993 {
c235cce7
KH
2994 XSETFASTINT (data[2 * i], start);
2995 XSETFASTINT (data[2 * i + 1], search_regs.end[i]);
ca1d1d23 2996 }
0ed62dc7 2997 else if (BUFFERP (last_thing_searched))
ca1d1d23
JB
2998 {
2999 data[2 * i] = Fmake_marker ();
daa37602
JB
3000 Fset_marker (data[2 * i],
3001 make_number (start),
3002 last_thing_searched);
ca1d1d23
JB
3003 data[2 * i + 1] = Fmake_marker ();
3004 Fset_marker (data[2 * i + 1],
177c0ea7 3005 make_number (search_regs.end[i]),
daa37602 3006 last_thing_searched);
ca1d1d23 3007 }
daa37602
JB
3008 else
3009 /* last_thing_searched must always be Qt, a buffer, or Qnil. */
3010 abort ();
3011
abd0071c 3012 len = 2 * i + 2;
ca1d1d23
JB
3013 }
3014 else
abd0071c 3015 data[2 * i] = data[2 * i + 1] = Qnil;
ca1d1d23 3016 }
56256c2a 3017
bd2cbd56 3018 if (BUFFERP (last_thing_searched) && !NILP (integers))
41c01205 3019 {
bd2cbd56 3020 data[len] = last_thing_searched;
41c01205
DK
3021 len++;
3022 }
3023
56256c2a
RS
3024 /* If REUSE is not usable, cons up the values and return them. */
3025 if (! CONSP (reuse))
41c01205 3026 return Flist (len, data);
56256c2a
RS
3027
3028 /* If REUSE is a list, store as many value elements as will fit
3029 into the elements of REUSE. */
3030 for (i = 0, tail = reuse; CONSP (tail);
c1d497be 3031 i++, tail = XCDR (tail))
56256c2a 3032 {
41c01205 3033 if (i < len)
f3fbd155 3034 XSETCAR (tail, data[i]);
56256c2a 3035 else
f3fbd155 3036 XSETCAR (tail, Qnil);
56256c2a
RS
3037 prev = tail;
3038 }
3039
3040 /* If we couldn't fit all value elements into REUSE,
3041 cons up the rest of them and add them to the end of REUSE. */
41c01205
DK
3042 if (i < len)
3043 XSETCDR (prev, Flist (len - i, data + i));
56256c2a
RS
3044
3045 return reuse;
ca1d1d23
JB
3046}
3047
b51d6c92
SM
3048/* We used to have an internal use variant of `reseat' described as:
3049
3050 If RESEAT is `evaporate', put the markers back on the free list
3051 immediately. No other references to the markers must exist in this
3052 case, so it is used only internally on the unwind stack and
3053 save-match-data from Lisp.
3054
3055 But it was ill-conceived: those supposedly-internal markers get exposed via
3056 the undo-list, so freeing them here is unsafe. */
ca1d1d23 3057
abd0071c 3058DEFUN ("set-match-data", Fset_match_data, Sset_match_data, 1, 2, 0,
8c1a1077 3059 doc: /* Set internal data on last search match from elements of LIST.
abd0071c
KS
3060LIST should have been created by calling `match-data' previously.
3061
51f10faa 3062If optional arg RESEAT is non-nil, make markers on LIST point nowhere. */)
abd0071c
KS
3063 (list, reseat)
3064 register Lisp_Object list, reseat;
ca1d1d23
JB
3065{
3066 register int i;
3067 register Lisp_Object marker;
3068
7074fde6
FP
3069 if (running_asynch_code)
3070 save_search_regs ();
3071
29100cea 3072 CHECK_LIST (list);
ca1d1d23 3073
41c01205
DK
3074 /* Unless we find a marker with a buffer or an explicit buffer
3075 in LIST, assume that this match data came from a string. */
daa37602
JB
3076 last_thing_searched = Qt;
3077
4746118a
JB
3078 /* Allocate registers if they don't already exist. */
3079 {
d084e942 3080 int length = XFASTINT (Flength (list)) / 2;
4746118a
JB
3081
3082 if (length > search_regs.num_regs)
3083 {
1113d9db
JB
3084 if (search_regs.num_regs == 0)
3085 {
3086 search_regs.start
3087 = (regoff_t *) xmalloc (length * sizeof (regoff_t));
3088 search_regs.end
3089 = (regoff_t *) xmalloc (length * sizeof (regoff_t));
3090 }
4746118a 3091 else
1113d9db
JB
3092 {
3093 search_regs.start
3094 = (regoff_t *) xrealloc (search_regs.start,
3095 length * sizeof (regoff_t));
3096 search_regs.end
3097 = (regoff_t *) xrealloc (search_regs.end,
3098 length * sizeof (regoff_t));
3099 }
4746118a 3100
e62371e9
KH
3101 for (i = search_regs.num_regs; i < length; i++)
3102 search_regs.start[i] = -1;
3103
487282dc 3104 search_regs.num_regs = length;
4746118a 3105 }
ca1d1d23 3106
abd0071c 3107 for (i = 0; CONSP (list); i++)
41c01205 3108 {
abd0071c 3109 marker = XCAR (list);
bd2cbd56 3110 if (BUFFERP (marker))
c3762cbd 3111 {
bd2cbd56 3112 last_thing_searched = marker;
c3762cbd
DK
3113 break;
3114 }
3115 if (i >= length)
3116 break;
41c01205
DK
3117 if (NILP (marker))
3118 {
3119 search_regs.start[i] = -1;
abd0071c 3120 list = XCDR (list);
41c01205
DK
3121 }
3122 else
3123 {
e7deaab0 3124 EMACS_INT from;
abd0071c 3125 Lisp_Object m;
e2811828 3126
abd0071c 3127 m = marker;
41c01205
DK
3128 if (MARKERP (marker))
3129 {
3130 if (XMARKER (marker)->buffer == 0)
3131 XSETFASTINT (marker, 0);
3132 else
3133 XSETBUFFER (last_thing_searched, XMARKER (marker)->buffer);
3134 }
e2811828 3135
41c01205
DK
3136 CHECK_NUMBER_COERCE_MARKER (marker);
3137 from = XINT (marker);
e2811828 3138
abd0071c
KS
3139 if (!NILP (reseat) && MARKERP (m))
3140 {
b51d6c92 3141 unchain_marker (XMARKER (m));
9ad54a7e 3142 XSETCAR (list, Qnil);
abd0071c
KS
3143 }
3144
3145 if ((list = XCDR (list), !CONSP (list)))
3146 break;
3147
3148 m = marker = XCAR (list);
3149
41c01205
DK
3150 if (MARKERP (marker) && XMARKER (marker)->buffer == 0)
3151 XSETFASTINT (marker, 0);
e2811828 3152
41c01205
DK
3153 CHECK_NUMBER_COERCE_MARKER (marker);
3154 search_regs.start[i] = from;
3155 search_regs.end[i] = XINT (marker);
abd0071c
KS
3156
3157 if (!NILP (reseat) && MARKERP (m))
3158 {
b51d6c92 3159 unchain_marker (XMARKER (m));
9ad54a7e 3160 XSETCAR (list, Qnil);
abd0071c 3161 }
41c01205 3162 }
abd0071c 3163 list = XCDR (list);
41c01205 3164 }
ca1d1d23 3165
41c01205
DK
3166 for (; i < search_regs.num_regs; i++)
3167 search_regs.start[i] = -1;
3168 }
ca1d1d23 3169
177c0ea7 3170 return Qnil;
ca1d1d23
JB
3171}
3172
7074fde6
FP
3173/* If non-zero the match data have been saved in saved_search_regs
3174 during the execution of a sentinel or filter. */
75ebf74b 3175static int search_regs_saved;
7074fde6 3176static struct re_registers saved_search_regs;
41c01205 3177static Lisp_Object saved_last_thing_searched;
7074fde6
FP
3178
3179/* Called from Flooking_at, Fstring_match, search_buffer, Fstore_match_data
3180 if asynchronous code (filter or sentinel) is running. */
3181static void
3182save_search_regs ()
3183{
3184 if (!search_regs_saved)
3185 {
3186 saved_search_regs.num_regs = search_regs.num_regs;
3187 saved_search_regs.start = search_regs.start;
3188 saved_search_regs.end = search_regs.end;
41c01205
DK
3189 saved_last_thing_searched = last_thing_searched;
3190 last_thing_searched = Qnil;
7074fde6 3191 search_regs.num_regs = 0;
2d4a771a
RS
3192 search_regs.start = 0;
3193 search_regs.end = 0;
7074fde6
FP
3194
3195 search_regs_saved = 1;
3196 }
3197}
3198
3199/* Called upon exit from filters and sentinels. */
3200void
abd0071c 3201restore_search_regs ()
7074fde6
FP
3202{
3203 if (search_regs_saved)
3204 {
3205 if (search_regs.num_regs > 0)
3206 {
3207 xfree (search_regs.start);
3208 xfree (search_regs.end);
3209 }
3210 search_regs.num_regs = saved_search_regs.num_regs;
3211 search_regs.start = saved_search_regs.start;
3212 search_regs.end = saved_search_regs.end;
41c01205
DK
3213 last_thing_searched = saved_last_thing_searched;
3214 saved_last_thing_searched = Qnil;
7074fde6
FP
3215 search_regs_saved = 0;
3216 }
3217}
3218
abd0071c
KS
3219static Lisp_Object
3220unwind_set_match_data (list)
3221 Lisp_Object list;
3222{
b51d6c92
SM
3223 /* It is NOT ALWAYS safe to free (evaporate) the markers immediately. */
3224 return Fset_match_data (list, Qt);
abd0071c
KS
3225}
3226
3227/* Called to unwind protect the match data. */
3228void
3229record_unwind_save_match_data ()
3230{
3231 record_unwind_protect (unwind_set_match_data,
3232 Fmatch_data (Qnil, Qnil, Qnil));
3233}
3234
ca1d1d23
JB
3235/* Quote a string to inactivate reg-expr chars */
3236
3237DEFUN ("regexp-quote", Fregexp_quote, Sregexp_quote, 1, 1, 0,
8c1a1077
PJ
3238 doc: /* Return a regexp string which matches exactly STRING and nothing else. */)
3239 (string)
5806161b 3240 Lisp_Object string;
ca1d1d23
JB
3241{
3242 register unsigned char *in, *out, *end;
3243 register unsigned char *temp;
0c8533c6 3244 int backslashes_added = 0;
ca1d1d23 3245
b7826503 3246 CHECK_STRING (string);
ca1d1d23 3247
d5db4077 3248 temp = (unsigned char *) alloca (SBYTES (string) * 2);
ca1d1d23
JB
3249
3250 /* Now copy the data into the new string, inserting escapes. */
3251
d5db4077
KR
3252 in = SDATA (string);
3253 end = in + SBYTES (string);
177c0ea7 3254 out = temp;
ca1d1d23
JB
3255
3256 for (; in != end; in++)
3257 {
66bc6082 3258 if (*in == '['
ca1d1d23
JB
3259 || *in == '*' || *in == '.' || *in == '\\'
3260 || *in == '?' || *in == '+'
3261 || *in == '^' || *in == '$')
0c8533c6 3262 *out++ = '\\', backslashes_added++;
ca1d1d23
JB
3263 *out++ = *in;
3264 }
3265
3f8100f1 3266 return make_specified_string (temp,
d5db4077 3267 SCHARS (string) + backslashes_added,
3f8100f1
RS
3268 out - temp,
3269 STRING_MULTIBYTE (string));
ca1d1d23 3270}
177c0ea7 3271\f
dfcf069d 3272void
ca1d1d23
JB
3273syms_of_search ()
3274{
3275 register int i;
3276
487282dc
KH
3277 for (i = 0; i < REGEXP_CACHE_SIZE; ++i)
3278 {
3279 searchbufs[i].buf.allocated = 100;
b23c0a83 3280 searchbufs[i].buf.buffer = (unsigned char *) xmalloc (100);
487282dc
KH
3281 searchbufs[i].buf.fastmap = searchbufs[i].fastmap;
3282 searchbufs[i].regexp = Qnil;
ecdb561e 3283 searchbufs[i].whitespace_regexp = Qnil;
b69e3c18 3284 searchbufs[i].syntax_table = Qnil;
487282dc 3285 staticpro (&searchbufs[i].regexp);
aa77b5ce 3286 staticpro (&searchbufs[i].whitespace_regexp);
b69e3c18 3287 staticpro (&searchbufs[i].syntax_table);
487282dc
KH
3288 searchbufs[i].next = (i == REGEXP_CACHE_SIZE-1 ? 0 : &searchbufs[i+1]);
3289 }
3290 searchbuf_head = &searchbufs[0];
ca1d1d23
JB
3291
3292 Qsearch_failed = intern ("search-failed");
3293 staticpro (&Qsearch_failed);
3294 Qinvalid_regexp = intern ("invalid-regexp");
3295 staticpro (&Qinvalid_regexp);
3296
3297 Fput (Qsearch_failed, Qerror_conditions,
3298 Fcons (Qsearch_failed, Fcons (Qerror, Qnil)));
3299 Fput (Qsearch_failed, Qerror_message,
3300 build_string ("Search failed"));
3301
3302 Fput (Qinvalid_regexp, Qerror_conditions,
3303 Fcons (Qinvalid_regexp, Fcons (Qerror, Qnil)));
3304 Fput (Qinvalid_regexp, Qerror_message,
3305 build_string ("Invalid regexp"));
3306
daa37602
JB
3307 last_thing_searched = Qnil;
3308 staticpro (&last_thing_searched);
3309
0f6af254
DK
3310 saved_last_thing_searched = Qnil;
3311 staticpro (&saved_last_thing_searched);
3312
41a33295 3313 DEFVAR_LISP ("search-spaces-regexp", &Vsearch_spaces_regexp,
e2811828 3314 doc: /* Regexp to substitute for bunches of spaces in regexp search.
f31a9a68
RS
3315Some commands use this for user-specified regexps.
3316Spaces that occur inside character classes or repetition operators
3317or other such regexp constructs are not replaced with this.
3318A value of nil (which is the normal value) means treat spaces literally. */);
41a33295 3319 Vsearch_spaces_regexp = Qnil;
f31a9a68 3320
ef887810
RS
3321 DEFVAR_LISP ("inhibit-changing-match-data", &Vinhibit_changing_match_data,
3322 doc: /* Internal use only.
39d0bf74
RS
3323If non-nil, the primitive searching and matching functions
3324such as `looking-at', `string-match', `re-search-forward', etc.,
3325do not set the match data. The proper way to use this variable
3326is to bind it with `let' around a small expression. */);
ef887810
RS
3327 Vinhibit_changing_match_data = Qnil;
3328
ca1d1d23 3329 defsubr (&Slooking_at);
b819a390
RS
3330 defsubr (&Sposix_looking_at);
3331 defsubr (&Sstring_match);
3332 defsubr (&Sposix_string_match);
ca1d1d23
JB
3333 defsubr (&Ssearch_forward);
3334 defsubr (&Ssearch_backward);
3335 defsubr (&Sword_search_forward);
3336 defsubr (&Sword_search_backward);
c6e0e1cc
CY
3337 defsubr (&Sword_search_forward_lax);
3338 defsubr (&Sword_search_backward_lax);
ca1d1d23
JB
3339 defsubr (&Sre_search_forward);
3340 defsubr (&Sre_search_backward);
b819a390
RS
3341 defsubr (&Sposix_search_forward);
3342 defsubr (&Sposix_search_backward);
ca1d1d23
JB
3343 defsubr (&Sreplace_match);
3344 defsubr (&Smatch_beginning);
3345 defsubr (&Smatch_end);
3346 defsubr (&Smatch_data);
3f1c005b 3347 defsubr (&Sset_match_data);
ca1d1d23
JB
3348 defsubr (&Sregexp_quote);
3349}
76fc1ea2
KH
3350
3351/* arch-tag: a6059d79-0552-4f14-a2cb-d379a4e3c78f
3352 (do not change this comment) */