Promote SSDATA macro from gtkutil.c and xsmfns.c to lisp.h.
[bpt/emacs.git] / src / search.c
CommitLineData
ca1d1d23 1/* String search routines for GNU Emacs.
429ab54e 2 Copyright (C) 1985, 1986, 1987, 1993, 1994, 1997, 1998, 1999, 2001, 2002,
5df4f04c 3 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011
8cabe764 4 Free Software Foundation, Inc.
ca1d1d23
JB
5
6This file is part of GNU Emacs.
7
9ec0b715 8GNU Emacs is free software: you can redistribute it and/or modify
ca1d1d23 9it under the terms of the GNU General Public License as published by
9ec0b715
GM
10the Free Software Foundation, either version 3 of the License, or
11(at your option) any later version.
ca1d1d23
JB
12
13GNU Emacs is distributed in the hope that it will be useful,
14but WITHOUT ANY WARRANTY; without even the implied warranty of
15MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16GNU General Public License for more details.
17
18You should have received a copy of the GNU General Public License
9ec0b715 19along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>. */
ca1d1d23
JB
20
21
18160b98 22#include <config.h>
d7306fe6 23#include <setjmp.h>
ca1d1d23
JB
24#include "lisp.h"
25#include "syntax.h"
5679531d 26#include "category.h"
ca1d1d23 27#include "buffer.h"
76eb0881 28#include "character.h"
89ad6492 29#include "charset.h"
9169c321 30#include "region-cache.h"
ca1d1d23 31#include "commands.h"
9ac0d9e0 32#include "blockinput.h"
bf1760bb 33#include "intervals.h"
4746118a 34
ca1d1d23
JB
35#include <sys/types.h>
36#include "regex.h"
37
1d288aef 38#define REGEXP_CACHE_SIZE 20
ca1d1d23 39
487282dc
KH
40/* If the regexp is non-nil, then the buffer contains the compiled form
41 of that regexp, suitable for searching. */
1d288aef
RS
42struct regexp_cache
43{
487282dc 44 struct regexp_cache *next;
ecdb561e 45 Lisp_Object regexp, whitespace_regexp;
b69e3c18 46 /* Syntax table for which the regexp applies. We need this because
58e95211
SM
47 of character classes. If this is t, then the compiled pattern is valid
48 for any syntax-table. */
b69e3c18 49 Lisp_Object syntax_table;
487282dc
KH
50 struct re_pattern_buffer buf;
51 char fastmap[0400];
b819a390
RS
52 /* Nonzero means regexp was compiled to do full POSIX backtracking. */
53 char posix;
487282dc 54};
ca1d1d23 55
487282dc
KH
56/* The instances of that struct. */
57struct regexp_cache searchbufs[REGEXP_CACHE_SIZE];
ca1d1d23 58
487282dc
KH
59/* The head of the linked list; points to the most recently used buffer. */
60struct regexp_cache *searchbuf_head;
ca1d1d23 61
ca1d1d23 62
4746118a
JB
63/* Every call to re_match, etc., must pass &search_regs as the regs
64 argument unless you can show it is unnecessary (i.e., if re_match
65 is certainly going to be called again before region-around-match
66 can be called).
67
68 Since the registers are now dynamically allocated, we need to make
69 sure not to refer to the Nth register before checking that it has
1113d9db
JB
70 been allocated by checking search_regs.num_regs.
71
72 The regex code keeps track of whether it has allocated the search
487282dc
KH
73 buffer using bits in the re_pattern_buffer. This means that whenever
74 you compile a new pattern, it completely forgets whether it has
1113d9db
JB
75 allocated any registers, and will allocate new registers the next
76 time you call a searching or matching function. Therefore, we need
77 to call re_set_registers after compiling a new pattern or after
78 setting the match registers, so that the regex functions will be
79 able to free or re-allocate it properly. */
ca1d1d23
JB
80static struct re_registers search_regs;
81
daa37602
JB
82/* The buffer in which the last search was performed, or
83 Qt if the last search was done in a string;
84 Qnil if no searching has been done yet. */
85static Lisp_Object last_thing_searched;
ca1d1d23 86
8e6208c5 87/* error condition signaled when regexp compile_pattern fails */
ca1d1d23
JB
88
89Lisp_Object Qinvalid_regexp;
90
06f77a8a
KS
91/* Error condition used for failing searches */
92Lisp_Object Qsearch_failed;
93
f57e2426
J
94static void set_search_regs (EMACS_INT, EMACS_INT);
95static void save_search_regs (void);
c098fdb8
EZ
96static EMACS_INT simple_search (EMACS_INT, unsigned char *, EMACS_INT,
97 EMACS_INT, Lisp_Object, EMACS_INT, EMACS_INT,
f57e2426 98 EMACS_INT, EMACS_INT);
c098fdb8 99static EMACS_INT boyer_moore (EMACS_INT, unsigned char *, EMACS_INT, EMACS_INT,
f57e2426
J
100 Lisp_Object, Lisp_Object,
101 EMACS_INT, EMACS_INT,
102 EMACS_INT, EMACS_INT, int);
103static EMACS_INT search_buffer (Lisp_Object, EMACS_INT, EMACS_INT,
c098fdb8 104 EMACS_INT, EMACS_INT, EMACS_INT, int,
f57e2426 105 Lisp_Object, Lisp_Object, int);
971de7fb 106static void matcher_overflow (void) NO_RETURN;
b819a390 107
ca1d1d23 108static void
971de7fb 109matcher_overflow (void)
ca1d1d23
JB
110{
111 error ("Stack overflow in regexp matcher");
112}
113
b819a390
RS
114/* Compile a regexp and signal a Lisp error if anything goes wrong.
115 PATTERN is the pattern to compile.
116 CP is the place to put the result.
facdc750 117 TRANSLATE is a translation table for ignoring case, or nil for none.
b819a390
RS
118 REGP is the structure that says where to store the "register"
119 values that will result from matching this pattern.
120 If it is 0, we should compile the pattern not to record any
121 subexpression bounds.
122 POSIX is nonzero if we want full backtracking (POSIX style)
5679531d 123 for this pattern. 0 means backtrack only enough to get a valid match.
bbc73b48
KH
124
125 The behavior also depends on Vsearch_spaces_regexp. */
ca1d1d23 126
487282dc 127static void
971de7fb 128compile_pattern_1 (struct regexp_cache *cp, Lisp_Object pattern, Lisp_Object translate, struct re_registers *regp, int posix)
ca1d1d23 129{
d451e4db 130 char *val;
b819a390 131 reg_syntax_t old;
ca1d1d23 132
487282dc 133 cp->regexp = Qnil;
59fab369 134 cp->buf.translate = (! NILP (translate) ? translate : make_number (0));
b819a390 135 cp->posix = posix;
93daa011 136 cp->buf.multibyte = STRING_MULTIBYTE (pattern);
89ad6492 137 cp->buf.charset_unibyte = charset_unibyte;
d8c85250
CY
138 if (STRINGP (Vsearch_spaces_regexp))
139 cp->whitespace_regexp = Vsearch_spaces_regexp;
140 else
141 cp->whitespace_regexp = Qnil;
142
e7b2dd2e
RS
143 /* rms: I think BLOCK_INPUT is not needed here any more,
144 because regex.c defines malloc to call xmalloc.
145 Using BLOCK_INPUT here means the debugger won't run if an error occurs.
146 So let's turn it off. */
147 /* BLOCK_INPUT; */
fb4a568d 148 old = re_set_syntax (RE_SYNTAX_EMACS
b819a390 149 | (posix ? 0 : RE_NO_POSIX_BACKTRACKING));
d8c85250
CY
150
151 if (STRINGP (Vsearch_spaces_regexp))
152 re_set_whitespace_regexp (SDATA (Vsearch_spaces_regexp));
153 else
154 re_set_whitespace_regexp (NULL);
bbc73b48 155
51b59d79 156 val = (char *) re_compile_pattern (SSDATA (pattern),
8f924df7 157 SBYTES (pattern), &cp->buf);
bbc73b48 158
58e95211
SM
159 /* If the compiled pattern hard codes some of the contents of the
160 syntax-table, it can only be reused with *this* syntax table. */
161 cp->syntax_table = cp->buf.used_syntax ? current_buffer->syntax_table : Qt;
162
bbc73b48
KH
163 re_set_whitespace_regexp (NULL);
164
b819a390 165 re_set_syntax (old);
e7b2dd2e 166 /* UNBLOCK_INPUT; */
ca1d1d23 167 if (val)
06f77a8a 168 xsignal1 (Qinvalid_regexp, build_string (val));
1113d9db 169
487282dc 170 cp->regexp = Fcopy_sequence (pattern);
487282dc
KH
171}
172
6efc7887
RS
173/* Shrink each compiled regexp buffer in the cache
174 to the size actually used right now.
175 This is called from garbage collection. */
176
177void
971de7fb 178shrink_regexp_cache (void)
6efc7887 179{
a968f437 180 struct regexp_cache *cp;
6efc7887
RS
181
182 for (cp = searchbuf_head; cp != 0; cp = cp->next)
183 {
184 cp->buf.allocated = cp->buf.used;
185 cp->buf.buffer
b23c0a83 186 = (unsigned char *) xrealloc (cp->buf.buffer, cp->buf.used);
6efc7887
RS
187 }
188}
189
54dd3310
SM
190/* Clear the regexp cache w.r.t. a particular syntax table,
191 because it was changed.
e5b94d44
CY
192 There is no danger of memory leak here because re_compile_pattern
193 automagically manages the memory in each re_pattern_buffer struct,
194 based on its `allocated' and `buffer' values. */
195void
971de7fb 196clear_regexp_cache (void)
e5b94d44
CY
197{
198 int i;
199
200 for (i = 0; i < REGEXP_CACHE_SIZE; ++i)
920fd1fc
EZ
201 /* It's tempting to compare with the syntax-table we've actually changed,
202 but it's not sufficient because char-table inheritance means that
54dd3310
SM
203 modifying one syntax-table can change others at the same time. */
204 if (!EQ (searchbufs[i].syntax_table, Qt))
205 searchbufs[i].regexp = Qnil;
e5b94d44
CY
206}
207
487282dc 208/* Compile a regexp if necessary, but first check to see if there's one in
b819a390
RS
209 the cache.
210 PATTERN is the pattern to compile.
facdc750 211 TRANSLATE is a translation table for ignoring case, or nil for none.
b819a390
RS
212 REGP is the structure that says where to store the "register"
213 values that will result from matching this pattern.
214 If it is 0, we should compile the pattern not to record any
215 subexpression bounds.
216 POSIX is nonzero if we want full backtracking (POSIX style)
217 for this pattern. 0 means backtrack only enough to get a valid match. */
487282dc
KH
218
219struct re_pattern_buffer *
971de7fb 220compile_pattern (Lisp_Object pattern, struct re_registers *regp, Lisp_Object translate, int posix, int multibyte)
487282dc
KH
221{
222 struct regexp_cache *cp, **cpp;
223
224 for (cpp = &searchbuf_head; ; cpp = &cp->next)
225 {
226 cp = *cpp;
f1b9c7c1
KR
227 /* Entries are initialized to nil, and may be set to nil by
228 compile_pattern_1 if the pattern isn't valid. Don't apply
49a5f770
KR
229 string accessors in those cases. However, compile_pattern_1
230 is only applied to the cache entry we pick here to reuse. So
231 nil should never appear before a non-nil entry. */
7c752c80 232 if (NILP (cp->regexp))
f1b9c7c1 233 goto compile_it;
d5db4077 234 if (SCHARS (cp->regexp) == SCHARS (pattern)
cf69b13e 235 && STRING_MULTIBYTE (cp->regexp) == STRING_MULTIBYTE (pattern)
1d288aef 236 && !NILP (Fstring_equal (cp->regexp, pattern))
59fab369 237 && EQ (cp->buf.translate, (! NILP (translate) ? translate : make_number (0)))
5679531d 238 && cp->posix == posix
58e95211
SM
239 && (EQ (cp->syntax_table, Qt)
240 || EQ (cp->syntax_table, current_buffer->syntax_table))
89ad6492
KH
241 && !NILP (Fequal (cp->whitespace_regexp, Vsearch_spaces_regexp))
242 && cp->buf.charset_unibyte == charset_unibyte)
487282dc
KH
243 break;
244
f1b9c7c1
KR
245 /* If we're at the end of the cache, compile into the nil cell
246 we found, or the last (least recently used) cell with a
247 string value. */
487282dc
KH
248 if (cp->next == 0)
249 {
f1b9c7c1 250 compile_it:
89ad6492 251 compile_pattern_1 (cp, pattern, translate, regp, posix);
487282dc
KH
252 break;
253 }
254 }
255
256 /* When we get here, cp (aka *cpp) contains the compiled pattern,
257 either because we found it in the cache or because we just compiled it.
258 Move it to the front of the queue to mark it as most recently used. */
259 *cpp = cp->next;
260 cp->next = searchbuf_head;
261 searchbuf_head = cp;
1113d9db 262
6639708c
RS
263 /* Advise the searching functions about the space we have allocated
264 for register data. */
265 if (regp)
266 re_set_registers (&cp->buf, regp, regp->num_regs, regp->start, regp->end);
267
78edd3b7 268 /* The compiled pattern can be used both for multibyte and unibyte
89ad6492
KH
269 target. But, we have to tell which the pattern is used for. */
270 cp->buf.target_multibyte = multibyte;
271
487282dc 272 return &cp->buf;
ca1d1d23
JB
273}
274
ca1d1d23 275\f
b819a390 276static Lisp_Object
971de7fb 277looking_at_1 (Lisp_Object string, int posix)
ca1d1d23
JB
278{
279 Lisp_Object val;
280 unsigned char *p1, *p2;
e7deaab0 281 EMACS_INT s1, s2;
c098fdb8 282 register EMACS_INT i;
487282dc 283 struct re_pattern_buffer *bufp;
ca1d1d23 284
7074fde6
FP
285 if (running_asynch_code)
286 save_search_regs ();
287
910c747a
RS
288 /* This is so set_image_of_range_1 in regex.c can find the EQV table. */
289 XCHAR_TABLE (current_buffer->case_canon_table)->extras[2]
290 = current_buffer->case_eqv_table;
291
b7826503 292 CHECK_STRING (string);
ef887810
RS
293 bufp = compile_pattern (string,
294 (NILP (Vinhibit_changing_match_data)
295 ? &search_regs : NULL),
487282dc 296 (!NILP (current_buffer->case_fold_search)
0190922f 297 ? current_buffer->case_canon_table : Qnil),
0c8533c6
RS
298 posix,
299 !NILP (current_buffer->enable_multibyte_characters));
ca1d1d23
JB
300
301 immediate_quit = 1;
302 QUIT; /* Do a pending quit right away, to avoid paradoxical behavior */
303
304 /* Get pointers and sizes of the two strings
305 that make up the visible portion of the buffer. */
306
307 p1 = BEGV_ADDR;
fa8ed3e0 308 s1 = GPT_BYTE - BEGV_BYTE;
ca1d1d23 309 p2 = GAP_END_ADDR;
fa8ed3e0 310 s2 = ZV_BYTE - GPT_BYTE;
ca1d1d23
JB
311 if (s1 < 0)
312 {
313 p2 = p1;
fa8ed3e0 314 s2 = ZV_BYTE - BEGV_BYTE;
ca1d1d23
JB
315 s1 = 0;
316 }
317 if (s2 < 0)
318 {
fa8ed3e0 319 s1 = ZV_BYTE - BEGV_BYTE;
ca1d1d23
JB
320 s2 = 0;
321 }
8bb43c28
RS
322
323 re_match_object = Qnil;
177c0ea7 324
487282dc 325 i = re_match_2 (bufp, (char *) p1, s1, (char *) p2, s2,
ef887810
RS
326 PT_BYTE - BEGV_BYTE,
327 (NILP (Vinhibit_changing_match_data)
328 ? &search_regs : NULL),
fa8ed3e0 329 ZV_BYTE - BEGV_BYTE);
de182d70 330 immediate_quit = 0;
177c0ea7 331
ca1d1d23
JB
332 if (i == -2)
333 matcher_overflow ();
334
335 val = (0 <= i ? Qt : Qnil);
ef887810 336 if (NILP (Vinhibit_changing_match_data) && i >= 0)
fa8ed3e0
RS
337 for (i = 0; i < search_regs.num_regs; i++)
338 if (search_regs.start[i] >= 0)
339 {
340 search_regs.start[i]
341 = BYTE_TO_CHAR (search_regs.start[i] + BEGV_BYTE);
342 search_regs.end[i]
343 = BYTE_TO_CHAR (search_regs.end[i] + BEGV_BYTE);
344 }
ef887810
RS
345
346 /* Set last_thing_searched only when match data is changed. */
347 if (NILP (Vinhibit_changing_match_data))
348 XSETBUFFER (last_thing_searched, current_buffer);
349
ca1d1d23
JB
350 return val;
351}
352
b819a390 353DEFUN ("looking-at", Flooking_at, Slooking_at, 1, 1, 0,
8c1a1077
PJ
354 doc: /* Return t if text after point matches regular expression REGEXP.
355This function modifies the match data that `match-beginning',
356`match-end' and `match-data' access; save and restore the match
357data if you want to preserve them. */)
5842a27b 358 (Lisp_Object regexp)
b819a390 359{
94f94972 360 return looking_at_1 (regexp, 0);
b819a390
RS
361}
362
363DEFUN ("posix-looking-at", Fposix_looking_at, Sposix_looking_at, 1, 1, 0,
8c1a1077
PJ
364 doc: /* Return t if text after point matches regular expression REGEXP.
365Find the longest match, in accord with Posix regular expression rules.
366This function modifies the match data that `match-beginning',
367`match-end' and `match-data' access; save and restore the match
368data if you want to preserve them. */)
5842a27b 369 (Lisp_Object regexp)
b819a390 370{
94f94972 371 return looking_at_1 (regexp, 1);
b819a390
RS
372}
373\f
374static Lisp_Object
971de7fb 375string_match_1 (Lisp_Object regexp, Lisp_Object string, Lisp_Object start, int posix)
ca1d1d23
JB
376{
377 int val;
487282dc 378 struct re_pattern_buffer *bufp;
e7deaab0 379 EMACS_INT pos, pos_byte;
0c8533c6 380 int i;
ca1d1d23 381
7074fde6
FP
382 if (running_asynch_code)
383 save_search_regs ();
384
b7826503
PJ
385 CHECK_STRING (regexp);
386 CHECK_STRING (string);
ca1d1d23
JB
387
388 if (NILP (start))
0c8533c6 389 pos = 0, pos_byte = 0;
ca1d1d23
JB
390 else
391 {
c098fdb8 392 EMACS_INT len = SCHARS (string);
ca1d1d23 393
b7826503 394 CHECK_NUMBER (start);
0c8533c6
RS
395 pos = XINT (start);
396 if (pos < 0 && -pos <= len)
397 pos = len + pos;
398 else if (0 > pos || pos > len)
ca1d1d23 399 args_out_of_range (string, start);
0c8533c6 400 pos_byte = string_char_to_byte (string, pos);
ca1d1d23
JB
401 }
402
910c747a
RS
403 /* This is so set_image_of_range_1 in regex.c can find the EQV table. */
404 XCHAR_TABLE (current_buffer->case_canon_table)->extras[2]
405 = current_buffer->case_eqv_table;
406
ef887810
RS
407 bufp = compile_pattern (regexp,
408 (NILP (Vinhibit_changing_match_data)
409 ? &search_regs : NULL),
487282dc 410 (!NILP (current_buffer->case_fold_search)
0190922f 411 ? current_buffer->case_canon_table : Qnil),
0c8533c6
RS
412 posix,
413 STRING_MULTIBYTE (string));
ca1d1d23 414 immediate_quit = 1;
8bb43c28 415 re_match_object = string;
177c0ea7 416
51b59d79 417 val = re_search (bufp, SSDATA (string),
d5db4077
KR
418 SBYTES (string), pos_byte,
419 SBYTES (string) - pos_byte,
ef887810
RS
420 (NILP (Vinhibit_changing_match_data)
421 ? &search_regs : NULL));
ca1d1d23 422 immediate_quit = 0;
ef887810
RS
423
424 /* Set last_thing_searched only when match data is changed. */
425 if (NILP (Vinhibit_changing_match_data))
426 last_thing_searched = Qt;
427
ca1d1d23
JB
428 if (val == -2)
429 matcher_overflow ();
430 if (val < 0) return Qnil;
0c8533c6 431
ef887810
RS
432 if (NILP (Vinhibit_changing_match_data))
433 for (i = 0; i < search_regs.num_regs; i++)
434 if (search_regs.start[i] >= 0)
435 {
436 search_regs.start[i]
437 = string_byte_to_char (string, search_regs.start[i]);
438 search_regs.end[i]
439 = string_byte_to_char (string, search_regs.end[i]);
440 }
0c8533c6
RS
441
442 return make_number (string_byte_to_char (string, val));
ca1d1d23 443}
e59a8453 444
b819a390 445DEFUN ("string-match", Fstring_match, Sstring_match, 2, 3, 0,
8c1a1077 446 doc: /* Return index of start of first match for REGEXP in STRING, or nil.
b85acc4b 447Matching ignores case if `case-fold-search' is non-nil.
8c1a1077
PJ
448If third arg START is non-nil, start search at that index in STRING.
449For index of first char beyond the match, do (match-end 0).
450`match-end' and `match-beginning' also give indices of substrings
2bd2f32d
RS
451matched by parenthesis constructs in the pattern.
452
453You can use the function `match-string' to extract the substrings
454matched by the parenthesis constructions in REGEXP. */)
5842a27b 455 (Lisp_Object regexp, Lisp_Object string, Lisp_Object start)
b819a390
RS
456{
457 return string_match_1 (regexp, string, start, 0);
458}
459
460DEFUN ("posix-string-match", Fposix_string_match, Sposix_string_match, 2, 3, 0,
8c1a1077
PJ
461 doc: /* Return index of start of first match for REGEXP in STRING, or nil.
462Find the longest match, in accord with Posix regular expression rules.
463Case is ignored if `case-fold-search' is non-nil in the current buffer.
464If third arg START is non-nil, start search at that index in STRING.
465For index of first char beyond the match, do (match-end 0).
466`match-end' and `match-beginning' also give indices of substrings
467matched by parenthesis constructs in the pattern. */)
5842a27b 468 (Lisp_Object regexp, Lisp_Object string, Lisp_Object start)
b819a390
RS
469{
470 return string_match_1 (regexp, string, start, 1);
471}
472
e59a8453
RS
473/* Match REGEXP against STRING, searching all of STRING,
474 and return the index of the match, or negative on failure.
475 This does not clobber the match data. */
476
477int
971de7fb 478fast_string_match (Lisp_Object regexp, Lisp_Object string)
e59a8453
RS
479{
480 int val;
487282dc 481 struct re_pattern_buffer *bufp;
e59a8453 482
facdc750
RS
483 bufp = compile_pattern (regexp, 0, Qnil,
484 0, STRING_MULTIBYTE (string));
e59a8453 485 immediate_quit = 1;
8bb43c28 486 re_match_object = string;
177c0ea7 487
51b59d79 488 val = re_search (bufp, SSDATA (string),
d5db4077
KR
489 SBYTES (string), 0,
490 SBYTES (string), 0);
e59a8453
RS
491 immediate_quit = 0;
492 return val;
493}
5679531d
KH
494
495/* Match REGEXP against STRING, searching all of STRING ignoring case,
496 and return the index of the match, or negative on failure.
0c8533c6
RS
497 This does not clobber the match data.
498 We assume that STRING contains single-byte characters. */
5679531d 499
5679531d 500int
971de7fb 501fast_c_string_match_ignore_case (Lisp_Object regexp, const char *string)
5679531d
KH
502{
503 int val;
504 struct re_pattern_buffer *bufp;
c098fdb8 505 size_t len = strlen (string);
5679531d 506
0c8533c6 507 regexp = string_make_unibyte (regexp);
b4577c63 508 re_match_object = Qt;
5679531d 509 bufp = compile_pattern (regexp, 0,
0190922f 510 Vascii_canon_table, 0,
f8bd51c4 511 0);
5679531d
KH
512 immediate_quit = 1;
513 val = re_search (bufp, string, len, 0, len, 0);
514 immediate_quit = 0;
515 return val;
516}
be5f4dfb
KH
517
518/* Like fast_string_match but ignore case. */
519
520int
971de7fb 521fast_string_match_ignore_case (Lisp_Object regexp, Lisp_Object string)
be5f4dfb
KH
522{
523 int val;
524 struct re_pattern_buffer *bufp;
525
0190922f 526 bufp = compile_pattern (regexp, 0, Vascii_canon_table,
be5f4dfb
KH
527 0, STRING_MULTIBYTE (string));
528 immediate_quit = 1;
529 re_match_object = string;
530
51b59d79 531 val = re_search (bufp, SSDATA (string),
be5f4dfb
KH
532 SBYTES (string), 0,
533 SBYTES (string), 0);
534 immediate_quit = 0;
535 return val;
536}
ca1d1d23 537\f
a80ce213 538/* Match REGEXP against the characters after POS to LIMIT, and return
8b13507a
KH
539 the number of matched characters. If STRING is non-nil, match
540 against the characters in it. In that case, POS and LIMIT are
541 indices into the string. This function doesn't modify the match
542 data. */
543
544EMACS_INT
971de7fb 545fast_looking_at (Lisp_Object regexp, EMACS_INT pos, EMACS_INT pos_byte, EMACS_INT limit, EMACS_INT limit_byte, Lisp_Object string)
8b13507a
KH
546{
547 int multibyte;
548 struct re_pattern_buffer *buf;
549 unsigned char *p1, *p2;
e7deaab0 550 EMACS_INT s1, s2;
8b13507a 551 EMACS_INT len;
78edd3b7 552
8b13507a
KH
553 if (STRINGP (string))
554 {
555 if (pos_byte < 0)
556 pos_byte = string_char_to_byte (string, pos);
557 if (limit_byte < 0)
558 limit_byte = string_char_to_byte (string, limit);
559 p1 = NULL;
560 s1 = 0;
561 p2 = SDATA (string);
562 s2 = SBYTES (string);
563 re_match_object = string;
564 multibyte = STRING_MULTIBYTE (string);
565 }
566 else
567 {
568 if (pos_byte < 0)
569 pos_byte = CHAR_TO_BYTE (pos);
570 if (limit_byte < 0)
571 limit_byte = CHAR_TO_BYTE (limit);
572 pos_byte -= BEGV_BYTE;
573 limit_byte -= BEGV_BYTE;
574 p1 = BEGV_ADDR;
575 s1 = GPT_BYTE - BEGV_BYTE;
576 p2 = GAP_END_ADDR;
577 s2 = ZV_BYTE - GPT_BYTE;
578 if (s1 < 0)
579 {
580 p2 = p1;
581 s2 = ZV_BYTE - BEGV_BYTE;
582 s1 = 0;
583 }
584 if (s2 < 0)
585 {
586 s1 = ZV_BYTE - BEGV_BYTE;
587 s2 = 0;
588 }
589 re_match_object = Qnil;
590 multibyte = ! NILP (current_buffer->enable_multibyte_characters);
591 }
592
593 buf = compile_pattern (regexp, 0, Qnil, 0, multibyte);
594 immediate_quit = 1;
595 len = re_match_2 (buf, (char *) p1, s1, (char *) p2, s2,
596 pos_byte, NULL, limit_byte);
597 immediate_quit = 0;
598
599 return len;
600}
601
602\f
9169c321
JB
603/* The newline cache: remembering which sections of text have no newlines. */
604
605/* If the user has requested newline caching, make sure it's on.
606 Otherwise, make sure it's off.
607 This is our cheezy way of associating an action with the change of
608 state of a buffer-local variable. */
609static void
971de7fb 610newline_cache_on_off (struct buffer *buf)
9169c321
JB
611{
612 if (NILP (buf->cache_long_line_scans))
613 {
614 /* It should be off. */
615 if (buf->newline_cache)
616 {
617 free_region_cache (buf->newline_cache);
618 buf->newline_cache = 0;
619 }
620 }
621 else
622 {
623 /* It should be on. */
624 if (buf->newline_cache == 0)
625 buf->newline_cache = new_region_cache ();
626 }
627}
628
629\f
630/* Search for COUNT instances of the character TARGET between START and END.
631
632 If COUNT is positive, search forwards; END must be >= START.
633 If COUNT is negative, search backwards for the -COUNTth instance;
634 END must be <= START.
635 If COUNT is zero, do anything you please; run rogue, for all I care.
636
637 If END is zero, use BEGV or ZV instead, as appropriate for the
638 direction indicated by COUNT.
ffd56f97
JB
639
640 If we find COUNT instances, set *SHORTAGE to zero, and return the
a9f2a45f 641 position past the COUNTth match. Note that for reverse motion
5bfe95c9 642 this is not the same as the usual convention for Emacs motion commands.
ffd56f97 643
9169c321
JB
644 If we don't find COUNT instances before reaching END, set *SHORTAGE
645 to the number of TARGETs left unfound, and return END.
ffd56f97 646
087a5f81
RS
647 If ALLOW_QUIT is non-zero, set immediate_quit. That's good to do
648 except when inside redisplay. */
649
c098fdb8
EZ
650EMACS_INT
651scan_buffer (register int target, EMACS_INT start, EMACS_INT end,
652 EMACS_INT count, int *shortage, int allow_quit)
ca1d1d23 653{
9169c321 654 struct region_cache *newline_cache;
177c0ea7 655 int direction;
ffd56f97 656
9169c321
JB
657 if (count > 0)
658 {
659 direction = 1;
660 if (! end) end = ZV;
661 }
662 else
663 {
664 direction = -1;
665 if (! end) end = BEGV;
666 }
ffd56f97 667
9169c321
JB
668 newline_cache_on_off (current_buffer);
669 newline_cache = current_buffer->newline_cache;
ca1d1d23
JB
670
671 if (shortage != 0)
672 *shortage = 0;
673
087a5f81 674 immediate_quit = allow_quit;
ca1d1d23 675
ffd56f97 676 if (count > 0)
9169c321 677 while (start != end)
ca1d1d23 678 {
9169c321
JB
679 /* Our innermost scanning loop is very simple; it doesn't know
680 about gaps, buffer ends, or the newline cache. ceiling is
681 the position of the last character before the next such
682 obstacle --- the last character the dumb search loop should
683 examine. */
e7deaab0
AS
684 EMACS_INT ceiling_byte = CHAR_TO_BYTE (end) - 1;
685 EMACS_INT start_byte = CHAR_TO_BYTE (start);
686 EMACS_INT tem;
9169c321
JB
687
688 /* If we're looking for a newline, consult the newline cache
689 to see where we can avoid some scanning. */
690 if (target == '\n' && newline_cache)
691 {
c098fdb8 692 EMACS_INT next_change;
9169c321
JB
693 immediate_quit = 0;
694 while (region_cache_forward
fa8ed3e0
RS
695 (current_buffer, newline_cache, start_byte, &next_change))
696 start_byte = next_change;
cbe0db0d 697 immediate_quit = allow_quit;
9169c321 698
fa8ed3e0
RS
699 /* START should never be after END. */
700 if (start_byte > ceiling_byte)
701 start_byte = ceiling_byte;
9169c321
JB
702
703 /* Now the text after start is an unknown region, and
704 next_change is the position of the next known region. */
fa8ed3e0 705 ceiling_byte = min (next_change - 1, ceiling_byte);
9169c321
JB
706 }
707
708 /* The dumb loop can only scan text stored in contiguous
709 bytes. BUFFER_CEILING_OF returns the last character
710 position that is contiguous, so the ceiling is the
711 position after that. */
67ce527d
KH
712 tem = BUFFER_CEILING_OF (start_byte);
713 ceiling_byte = min (tem, ceiling_byte);
9169c321
JB
714
715 {
177c0ea7 716 /* The termination address of the dumb loop. */
fa8ed3e0
RS
717 register unsigned char *ceiling_addr
718 = BYTE_POS_ADDR (ceiling_byte) + 1;
719 register unsigned char *cursor
720 = BYTE_POS_ADDR (start_byte);
9169c321
JB
721 unsigned char *base = cursor;
722
723 while (cursor < ceiling_addr)
724 {
725 unsigned char *scan_start = cursor;
726
727 /* The dumb loop. */
728 while (*cursor != target && ++cursor < ceiling_addr)
729 ;
730
731 /* If we're looking for newlines, cache the fact that
732 the region from start to cursor is free of them. */
733 if (target == '\n' && newline_cache)
734 know_region_cache (current_buffer, newline_cache,
fa8ed3e0
RS
735 start_byte + scan_start - base,
736 start_byte + cursor - base);
9169c321
JB
737
738 /* Did we find the target character? */
739 if (cursor < ceiling_addr)
740 {
741 if (--count == 0)
742 {
743 immediate_quit = 0;
fa8ed3e0 744 return BYTE_TO_CHAR (start_byte + cursor - base + 1);
9169c321
JB
745 }
746 cursor++;
747 }
748 }
749
fa8ed3e0 750 start = BYTE_TO_CHAR (start_byte + cursor - base);
9169c321 751 }
ca1d1d23
JB
752 }
753 else
9169c321
JB
754 while (start > end)
755 {
756 /* The last character to check before the next obstacle. */
e7deaab0
AS
757 EMACS_INT ceiling_byte = CHAR_TO_BYTE (end);
758 EMACS_INT start_byte = CHAR_TO_BYTE (start);
759 EMACS_INT tem;
9169c321
JB
760
761 /* Consult the newline cache, if appropriate. */
762 if (target == '\n' && newline_cache)
763 {
c098fdb8 764 EMACS_INT next_change;
9169c321
JB
765 immediate_quit = 0;
766 while (region_cache_backward
fa8ed3e0
RS
767 (current_buffer, newline_cache, start_byte, &next_change))
768 start_byte = next_change;
cbe0db0d 769 immediate_quit = allow_quit;
9169c321
JB
770
771 /* Start should never be at or before end. */
fa8ed3e0
RS
772 if (start_byte <= ceiling_byte)
773 start_byte = ceiling_byte + 1;
9169c321
JB
774
775 /* Now the text before start is an unknown region, and
776 next_change is the position of the next known region. */
fa8ed3e0 777 ceiling_byte = max (next_change, ceiling_byte);
9169c321
JB
778 }
779
780 /* Stop scanning before the gap. */
67ce527d
KH
781 tem = BUFFER_FLOOR_OF (start_byte - 1);
782 ceiling_byte = max (tem, ceiling_byte);
9169c321
JB
783
784 {
785 /* The termination address of the dumb loop. */
fa8ed3e0
RS
786 register unsigned char *ceiling_addr = BYTE_POS_ADDR (ceiling_byte);
787 register unsigned char *cursor = BYTE_POS_ADDR (start_byte - 1);
9169c321
JB
788 unsigned char *base = cursor;
789
790 while (cursor >= ceiling_addr)
791 {
792 unsigned char *scan_start = cursor;
793
794 while (*cursor != target && --cursor >= ceiling_addr)
795 ;
796
797 /* If we're looking for newlines, cache the fact that
798 the region from after the cursor to start is free of them. */
799 if (target == '\n' && newline_cache)
800 know_region_cache (current_buffer, newline_cache,
fa8ed3e0
RS
801 start_byte + cursor - base,
802 start_byte + scan_start - base);
9169c321
JB
803
804 /* Did we find the target character? */
805 if (cursor >= ceiling_addr)
806 {
807 if (++count >= 0)
808 {
809 immediate_quit = 0;
fa8ed3e0 810 return BYTE_TO_CHAR (start_byte + cursor - base);
9169c321
JB
811 }
812 cursor--;
813 }
814 }
815
fa8ed3e0 816 start = BYTE_TO_CHAR (start_byte + cursor - base);
9169c321
JB
817 }
818 }
819
ca1d1d23
JB
820 immediate_quit = 0;
821 if (shortage != 0)
ffd56f97 822 *shortage = count * direction;
9169c321 823 return start;
ca1d1d23 824}
fa8ed3e0
RS
825\f
826/* Search for COUNT instances of a line boundary, which means either a
827 newline or (if selective display enabled) a carriage return.
828 Start at START. If COUNT is negative, search backwards.
829
830 We report the resulting position by calling TEMP_SET_PT_BOTH.
831
832 If we find COUNT instances. we position after (always after,
833 even if scanning backwards) the COUNTth match, and return 0.
834
835 If we don't find COUNT instances before reaching the end of the
836 buffer (or the beginning, if scanning backwards), we return
837 the number of line boundaries left unfound, and position at
838 the limit we bumped up against.
839
840 If ALLOW_QUIT is non-zero, set immediate_quit. That's good to do
d5d57b92 841 except in special cases. */
ca1d1d23 842
c098fdb8
EZ
843EMACS_INT
844scan_newline (EMACS_INT start, EMACS_INT start_byte,
845 EMACS_INT limit, EMACS_INT limit_byte,
846 register EMACS_INT count, int allow_quit)
63fa018d 847{
fa8ed3e0
RS
848 int direction = ((count > 0) ? 1 : -1);
849
850 register unsigned char *cursor;
851 unsigned char *base;
852
e7deaab0 853 EMACS_INT ceiling;
fa8ed3e0
RS
854 register unsigned char *ceiling_addr;
855
d5d57b92
RS
856 int old_immediate_quit = immediate_quit;
857
fa8ed3e0
RS
858 /* The code that follows is like scan_buffer
859 but checks for either newline or carriage return. */
860
d5d57b92
RS
861 if (allow_quit)
862 immediate_quit++;
fa8ed3e0
RS
863
864 start_byte = CHAR_TO_BYTE (start);
865
866 if (count > 0)
867 {
868 while (start_byte < limit_byte)
869 {
870 ceiling = BUFFER_CEILING_OF (start_byte);
871 ceiling = min (limit_byte - 1, ceiling);
872 ceiling_addr = BYTE_POS_ADDR (ceiling) + 1;
873 base = (cursor = BYTE_POS_ADDR (start_byte));
874 while (1)
875 {
876 while (*cursor != '\n' && ++cursor != ceiling_addr)
877 ;
878
879 if (cursor != ceiling_addr)
880 {
881 if (--count == 0)
882 {
d5d57b92 883 immediate_quit = old_immediate_quit;
fa8ed3e0
RS
884 start_byte = start_byte + cursor - base + 1;
885 start = BYTE_TO_CHAR (start_byte);
886 TEMP_SET_PT_BOTH (start, start_byte);
887 return 0;
888 }
889 else
890 if (++cursor == ceiling_addr)
891 break;
892 }
893 else
894 break;
895 }
896 start_byte += cursor - base;
897 }
898 }
899 else
900 {
fa8ed3e0
RS
901 while (start_byte > limit_byte)
902 {
903 ceiling = BUFFER_FLOOR_OF (start_byte - 1);
904 ceiling = max (limit_byte, ceiling);
905 ceiling_addr = BYTE_POS_ADDR (ceiling) - 1;
906 base = (cursor = BYTE_POS_ADDR (start_byte - 1) + 1);
907 while (1)
908 {
909 while (--cursor != ceiling_addr && *cursor != '\n')
910 ;
911
912 if (cursor != ceiling_addr)
913 {
914 if (++count == 0)
915 {
d5d57b92 916 immediate_quit = old_immediate_quit;
fa8ed3e0
RS
917 /* Return the position AFTER the match we found. */
918 start_byte = start_byte + cursor - base + 1;
919 start = BYTE_TO_CHAR (start_byte);
920 TEMP_SET_PT_BOTH (start, start_byte);
921 return 0;
922 }
923 }
924 else
925 break;
926 }
927 /* Here we add 1 to compensate for the last decrement
928 of CURSOR, which took it past the valid range. */
929 start_byte += cursor - base + 1;
930 }
931 }
932
933 TEMP_SET_PT_BOTH (limit, limit_byte);
d5d57b92 934 immediate_quit = old_immediate_quit;
fa8ed3e0
RS
935
936 return count * direction;
63fa018d
RS
937}
938
c098fdb8
EZ
939EMACS_INT
940find_next_newline_no_quit (EMACS_INT from, EMACS_INT cnt)
ca1d1d23 941{
fa8ed3e0 942 return scan_buffer ('\n', from, 0, cnt, (int *) 0, 0);
9169c321
JB
943}
944
9169c321
JB
945/* Like find_next_newline, but returns position before the newline,
946 not after, and only search up to TO. This isn't just
947 find_next_newline (...)-1, because you might hit TO. */
fa8ed3e0 948
c098fdb8
EZ
949EMACS_INT
950find_before_next_newline (EMACS_INT from, EMACS_INT to, EMACS_INT cnt)
9169c321
JB
951{
952 int shortage;
c098fdb8 953 EMACS_INT pos = scan_buffer ('\n', from, to, cnt, &shortage, 1);
9169c321
JB
954
955 if (shortage == 0)
956 pos--;
177c0ea7 957
9169c321 958 return pos;
ca1d1d23
JB
959}
960\f
ca1d1d23
JB
961/* Subroutines of Lisp buffer search functions. */
962
963static Lisp_Object
c098fdb8
EZ
964search_command (Lisp_Object string, Lisp_Object bound, Lisp_Object noerror,
965 Lisp_Object count, int direction, int RE, int posix)
ca1d1d23
JB
966{
967 register int np;
c098fdb8 968 EMACS_INT lim, lim_byte;
ca1d1d23
JB
969 int n = direction;
970
971 if (!NILP (count))
972 {
b7826503 973 CHECK_NUMBER (count);
ca1d1d23
JB
974 n *= XINT (count);
975 }
976
b7826503 977 CHECK_STRING (string);
ca1d1d23 978 if (NILP (bound))
9f43ad85
RS
979 {
980 if (n > 0)
981 lim = ZV, lim_byte = ZV_BYTE;
982 else
983 lim = BEGV, lim_byte = BEGV_BYTE;
984 }
ca1d1d23
JB
985 else
986 {
b7826503 987 CHECK_NUMBER_COERCE_MARKER (bound);
ca1d1d23 988 lim = XINT (bound);
6ec8bbd2 989 if (n > 0 ? lim < PT : lim > PT)
ca1d1d23
JB
990 error ("Invalid search bound (wrong side of point)");
991 if (lim > ZV)
9f43ad85 992 lim = ZV, lim_byte = ZV_BYTE;
588d2fd5 993 else if (lim < BEGV)
9f43ad85 994 lim = BEGV, lim_byte = BEGV_BYTE;
588d2fd5
KH
995 else
996 lim_byte = CHAR_TO_BYTE (lim);
ca1d1d23
JB
997 }
998
910c747a
RS
999 /* This is so set_image_of_range_1 in regex.c can find the EQV table. */
1000 XCHAR_TABLE (current_buffer->case_canon_table)->extras[2]
1001 = current_buffer->case_eqv_table;
1002
9f43ad85 1003 np = search_buffer (string, PT, PT_BYTE, lim, lim_byte, n, RE,
ca1d1d23 1004 (!NILP (current_buffer->case_fold_search)
facdc750 1005 ? current_buffer->case_canon_table
3135e9fd 1006 : Qnil),
ca1d1d23 1007 (!NILP (current_buffer->case_fold_search)
facdc750 1008 ? current_buffer->case_eqv_table
3135e9fd 1009 : Qnil),
b819a390 1010 posix);
ca1d1d23
JB
1011 if (np <= 0)
1012 {
1013 if (NILP (noerror))
06f77a8a
KS
1014 xsignal1 (Qsearch_failed, string);
1015
ca1d1d23
JB
1016 if (!EQ (noerror, Qt))
1017 {
1018 if (lim < BEGV || lim > ZV)
1019 abort ();
9f43ad85 1020 SET_PT_BOTH (lim, lim_byte);
a5f217b8
RS
1021 return Qnil;
1022#if 0 /* This would be clean, but maybe programs depend on
1023 a value of nil here. */
481399bf 1024 np = lim;
a5f217b8 1025#endif
ca1d1d23 1026 }
481399bf
RS
1027 else
1028 return Qnil;
ca1d1d23
JB
1029 }
1030
1031 if (np < BEGV || np > ZV)
1032 abort ();
1033
1034 SET_PT (np);
1035
1036 return make_number (np);
1037}
1038\f
fa8ed3e0
RS
1039/* Return 1 if REGEXP it matches just one constant string. */
1040
b6d6a51c 1041static int
971de7fb 1042trivial_regexp_p (Lisp_Object regexp)
b6d6a51c 1043{
c098fdb8 1044 EMACS_INT len = SBYTES (regexp);
d5db4077 1045 unsigned char *s = SDATA (regexp);
b6d6a51c
KH
1046 while (--len >= 0)
1047 {
1048 switch (*s++)
1049 {
1050 case '.': case '*': case '+': case '?': case '[': case '^': case '$':
1051 return 0;
1052 case '\\':
1053 if (--len < 0)
1054 return 0;
1055 switch (*s++)
1056 {
1057 case '|': case '(': case ')': case '`': case '\'': case 'b':
1058 case 'B': case '<': case '>': case 'w': case 'W': case 's':
29f89fe7 1059 case 'S': case '=': case '{': case '}': case '_':
5679531d 1060 case 'c': case 'C': /* for categoryspec and notcategoryspec */
866f60fd 1061 case '1': case '2': case '3': case '4': case '5':
b6d6a51c
KH
1062 case '6': case '7': case '8': case '9':
1063 return 0;
1064 }
1065 }
1066 }
1067 return 1;
1068}
1069
ca325161 1070/* Search for the n'th occurrence of STRING in the current buffer,
ca1d1d23 1071 starting at position POS and stopping at position LIM,
b819a390 1072 treating STRING as a literal string if RE is false or as
ca1d1d23
JB
1073 a regular expression if RE is true.
1074
1075 If N is positive, searching is forward and LIM must be greater than POS.
1076 If N is negative, searching is backward and LIM must be less than POS.
1077
facdc750 1078 Returns -x if x occurrences remain to be found (x > 0),
ca1d1d23 1079 or else the position at the beginning of the Nth occurrence
b819a390
RS
1080 (if searching backward) or the end (if searching forward).
1081
1082 POSIX is nonzero if we want full backtracking (POSIX style)
1083 for this pattern. 0 means backtrack only enough to get a valid match. */
ca1d1d23 1084
aff2ce94
RS
1085#define TRANSLATE(out, trt, d) \
1086do \
1087 { \
1088 if (! NILP (trt)) \
1089 { \
1090 Lisp_Object temp; \
1091 temp = Faref (trt, make_number (d)); \
1092 if (INTEGERP (temp)) \
1093 out = XINT (temp); \
1094 else \
1095 out = d; \
1096 } \
1097 else \
1098 out = d; \
1099 } \
1100while (0)
facdc750 1101
ef887810
RS
1102/* Only used in search_buffer, to record the end position of the match
1103 when searching regexps and SEARCH_REGS should not be changed
1104 (i.e. Vinhibit_changing_match_data is non-nil). */
1105static struct re_registers search_regs_1;
1106
e7deaab0 1107static EMACS_INT
d5a3eaaf 1108search_buffer (Lisp_Object string, EMACS_INT pos, EMACS_INT pos_byte,
c098fdb8 1109 EMACS_INT lim, EMACS_INT lim_byte, EMACS_INT n,
d5a3eaaf 1110 int RE, Lisp_Object trt, Lisp_Object inverse_trt, int posix)
ca1d1d23 1111{
c098fdb8
EZ
1112 EMACS_INT len = SCHARS (string);
1113 EMACS_INT len_byte = SBYTES (string);
facdc750 1114 register int i;
ca1d1d23 1115
7074fde6
FP
1116 if (running_asynch_code)
1117 save_search_regs ();
1118
a7e4cdde 1119 /* Searching 0 times means don't move. */
ca1d1d23 1120 /* Null string is found at starting position. */
a7e4cdde 1121 if (len == 0 || n == 0)
ca325161 1122 {
0353b28f 1123 set_search_regs (pos_byte, 0);
ca325161
RS
1124 return pos;
1125 }
3f57a499 1126
41a33295 1127 if (RE && !(trivial_regexp_p (string) && NILP (Vsearch_spaces_regexp)))
ca1d1d23 1128 {
facdc750 1129 unsigned char *p1, *p2;
c098fdb8 1130 EMACS_INT s1, s2;
487282dc
KH
1131 struct re_pattern_buffer *bufp;
1132
ef887810
RS
1133 bufp = compile_pattern (string,
1134 (NILP (Vinhibit_changing_match_data)
1135 ? &search_regs : &search_regs_1),
1136 trt, posix,
0c8533c6 1137 !NILP (current_buffer->enable_multibyte_characters));
ca1d1d23 1138
ca1d1d23
JB
1139 immediate_quit = 1; /* Quit immediately if user types ^G,
1140 because letting this function finish
1141 can take too long. */
1142 QUIT; /* Do a pending quit right away,
1143 to avoid paradoxical behavior */
1144 /* Get pointers and sizes of the two strings
1145 that make up the visible portion of the buffer. */
1146
1147 p1 = BEGV_ADDR;
fa8ed3e0 1148 s1 = GPT_BYTE - BEGV_BYTE;
ca1d1d23 1149 p2 = GAP_END_ADDR;
fa8ed3e0 1150 s2 = ZV_BYTE - GPT_BYTE;
ca1d1d23
JB
1151 if (s1 < 0)
1152 {
1153 p2 = p1;
fa8ed3e0 1154 s2 = ZV_BYTE - BEGV_BYTE;
ca1d1d23
JB
1155 s1 = 0;
1156 }
1157 if (s2 < 0)
1158 {
fa8ed3e0 1159 s1 = ZV_BYTE - BEGV_BYTE;
ca1d1d23
JB
1160 s2 = 0;
1161 }
8bb43c28 1162 re_match_object = Qnil;
177c0ea7 1163
ca1d1d23
JB
1164 while (n < 0)
1165 {
c098fdb8 1166 EMACS_INT val;
487282dc 1167 val = re_search_2 (bufp, (char *) p1, s1, (char *) p2, s2,
4996330b 1168 pos_byte - BEGV_BYTE, lim_byte - pos_byte,
ef887810
RS
1169 (NILP (Vinhibit_changing_match_data)
1170 ? &search_regs : &search_regs_1),
42db823b 1171 /* Don't allow match past current point */
4996330b 1172 pos_byte - BEGV_BYTE);
ca1d1d23 1173 if (val == -2)
b6d6a51c
KH
1174 {
1175 matcher_overflow ();
1176 }
ca1d1d23
JB
1177 if (val >= 0)
1178 {
ef887810
RS
1179 if (NILP (Vinhibit_changing_match_data))
1180 {
1181 pos_byte = search_regs.start[0] + BEGV_BYTE;
1182 for (i = 0; i < search_regs.num_regs; i++)
1183 if (search_regs.start[i] >= 0)
1184 {
1185 search_regs.start[i]
1186 = BYTE_TO_CHAR (search_regs.start[i] + BEGV_BYTE);
1187 search_regs.end[i]
1188 = BYTE_TO_CHAR (search_regs.end[i] + BEGV_BYTE);
1189 }
1190 XSETBUFFER (last_thing_searched, current_buffer);
1191 /* Set pos to the new position. */
1192 pos = search_regs.start[0];
1193 }
1194 else
1195 {
1196 pos_byte = search_regs_1.start[0] + BEGV_BYTE;
1197 /* Set pos to the new position. */
1198 pos = BYTE_TO_CHAR (search_regs_1.start[0] + BEGV_BYTE);
1199 }
ca1d1d23
JB
1200 }
1201 else
1202 {
1203 immediate_quit = 0;
1204 return (n);
1205 }
1206 n++;
1207 }
1208 while (n > 0)
1209 {
c098fdb8 1210 EMACS_INT val;
487282dc 1211 val = re_search_2 (bufp, (char *) p1, s1, (char *) p2, s2,
4996330b 1212 pos_byte - BEGV_BYTE, lim_byte - pos_byte,
ef887810
RS
1213 (NILP (Vinhibit_changing_match_data)
1214 ? &search_regs : &search_regs_1),
4996330b 1215 lim_byte - BEGV_BYTE);
ca1d1d23 1216 if (val == -2)
b6d6a51c
KH
1217 {
1218 matcher_overflow ();
1219 }
ca1d1d23
JB
1220 if (val >= 0)
1221 {
ef887810
RS
1222 if (NILP (Vinhibit_changing_match_data))
1223 {
1224 pos_byte = search_regs.end[0] + BEGV_BYTE;
1225 for (i = 0; i < search_regs.num_regs; i++)
1226 if (search_regs.start[i] >= 0)
1227 {
1228 search_regs.start[i]
1229 = BYTE_TO_CHAR (search_regs.start[i] + BEGV_BYTE);
1230 search_regs.end[i]
1231 = BYTE_TO_CHAR (search_regs.end[i] + BEGV_BYTE);
1232 }
1233 XSETBUFFER (last_thing_searched, current_buffer);
1234 pos = search_regs.end[0];
1235 }
1236 else
1237 {
1238 pos_byte = search_regs_1.end[0] + BEGV_BYTE;
1239 pos = BYTE_TO_CHAR (search_regs_1.end[0] + BEGV_BYTE);
1240 }
ca1d1d23
JB
1241 }
1242 else
1243 {
1244 immediate_quit = 0;
1245 return (0 - n);
1246 }
1247 n--;
1248 }
1249 immediate_quit = 0;
1250 return (pos);
1251 }
1252 else /* non-RE case */
1253 {
facdc750 1254 unsigned char *raw_pattern, *pat;
c098fdb8
EZ
1255 EMACS_INT raw_pattern_size;
1256 EMACS_INT raw_pattern_size_byte;
facdc750
RS
1257 unsigned char *patbuf;
1258 int multibyte = !NILP (current_buffer->enable_multibyte_characters);
a967ed62 1259 unsigned char *base_pat;
49e44e81
KH
1260 /* Set to positive if we find a non-ASCII char that need
1261 translation. Otherwise set to zero later. */
1262 int char_base = -1;
040272ce 1263 int boyer_moore_ok = 1;
facdc750
RS
1264
1265 /* MULTIBYTE says whether the text to be searched is multibyte.
1266 We must convert PATTERN to match that, or we will not really
1267 find things right. */
1268
1269 if (multibyte == STRING_MULTIBYTE (string))
1270 {
51b59d79 1271 raw_pattern = SDATA (string);
d5db4077
KR
1272 raw_pattern_size = SCHARS (string);
1273 raw_pattern_size_byte = SBYTES (string);
facdc750
RS
1274 }
1275 else if (multibyte)
1276 {
d5db4077 1277 raw_pattern_size = SCHARS (string);
facdc750 1278 raw_pattern_size_byte
d5db4077 1279 = count_size_as_multibyte (SDATA (string),
facdc750 1280 raw_pattern_size);
7276d3d8 1281 raw_pattern = (unsigned char *) alloca (raw_pattern_size_byte + 1);
d5db4077
KR
1282 copy_text (SDATA (string), raw_pattern,
1283 SCHARS (string), 0, 1);
facdc750
RS
1284 }
1285 else
1286 {
1287 /* Converting multibyte to single-byte.
1288
1289 ??? Perhaps this conversion should be done in a special way
1290 by subtracting nonascii-insert-offset from each non-ASCII char,
1291 so that only the multibyte chars which really correspond to
1292 the chosen single-byte character set can possibly match. */
d5db4077
KR
1293 raw_pattern_size = SCHARS (string);
1294 raw_pattern_size_byte = SCHARS (string);
7276d3d8 1295 raw_pattern = (unsigned char *) alloca (raw_pattern_size + 1);
d5db4077
KR
1296 copy_text (SDATA (string), raw_pattern,
1297 SBYTES (string), 1, 0);
facdc750
RS
1298 }
1299
1300 /* Copy and optionally translate the pattern. */
1301 len = raw_pattern_size;
1302 len_byte = raw_pattern_size_byte;
f5f7578b 1303 patbuf = (unsigned char *) alloca (len * MAX_MULTIBYTE_LENGTH);
facdc750
RS
1304 pat = patbuf;
1305 base_pat = raw_pattern;
1306 if (multibyte)
1307 {
a17ae96a
KH
1308 /* Fill patbuf by translated characters in STRING while
1309 checking if we can use boyer-moore search. If TRT is
1310 non-nil, we can use boyer-moore search only if TRT can be
1311 represented by the byte array of 256 elements. For that,
1312 all non-ASCII case-equivalents of all case-senstive
1313 characters in STRING must belong to the same charset and
1314 row. */
1315
facdc750
RS
1316 while (--len >= 0)
1317 {
a17ae96a 1318 unsigned char str_base[MAX_MULTIBYTE_LENGTH], *str;
aff2ce94 1319 int c, translated, inverse;
a17ae96a 1320 int in_charlen, charlen;
facdc750
RS
1321
1322 /* If we got here and the RE flag is set, it's because we're
1323 dealing with a regexp known to be trivial, so the backslash
1324 just quotes the next character. */
1325 if (RE && *base_pat == '\\')
1326 {
1327 len--;
62221f22 1328 raw_pattern_size--;
facdc750
RS
1329 len_byte--;
1330 base_pat++;
1331 }
1332
62a6e103 1333 c = STRING_CHAR_AND_LENGTH (base_pat, in_charlen);
040272ce 1334
a17ae96a 1335 if (NILP (trt))
facdc750 1336 {
a17ae96a
KH
1337 str = base_pat;
1338 charlen = in_charlen;
1339 }
1340 else
1341 {
1342 /* Translate the character. */
1343 TRANSLATE (translated, trt, c);
1344 charlen = CHAR_STRING (translated, str_base);
1345 str = str_base;
1346
1347 /* Check if C has any other case-equivalents. */
1348 TRANSLATE (inverse, inverse_trt, c);
1349 /* If so, check if we can use boyer-moore. */
1350 if (c != inverse && boyer_moore_ok)
1351 {
1352 /* Check if all equivalents belong to the same
1353 group of characters. Note that the check of C
49e44e81
KH
1354 itself is done by the last iteration. */
1355 int this_char_base = -1;
1356
1357 while (boyer_moore_ok)
a17ae96a 1358 {
49e44e81 1359 if (ASCII_BYTE_P (inverse))
a17ae96a 1360 {
49e44e81
KH
1361 if (this_char_base > 0)
1362 boyer_moore_ok = 0;
1363 else
dafaabd1 1364 this_char_base = 0;
a17ae96a 1365 }
49e44e81
KH
1366 else if (CHAR_BYTE8_P (inverse))
1367 /* Boyer-moore search can't handle a
1368 translation of an eight-bit
1369 character. */
1370 boyer_moore_ok = 0;
1371 else if (this_char_base < 0)
1372 {
1373 this_char_base = inverse & ~0x3F;
1374 if (char_base < 0)
1375 char_base = this_char_base;
dafaabd1 1376 else if (this_char_base != char_base)
49e44e81
KH
1377 boyer_moore_ok = 0;
1378 }
1379 else if ((inverse & ~0x3F) != this_char_base)
1380 boyer_moore_ok = 0;
a17ae96a
KH
1381 if (c == inverse)
1382 break;
1383 TRANSLATE (inverse, inverse_trt, inverse);
1384 }
1385 }
aff2ce94 1386 }
facdc750
RS
1387
1388 /* Store this character into the translated pattern. */
72af86bd 1389 memcpy (pat, str, charlen);
a17ae96a 1390 pat += charlen;
facdc750
RS
1391 base_pat += in_charlen;
1392 len_byte -= in_charlen;
1393 }
dafaabd1
AS
1394
1395 /* If char_base is still negative we didn't find any translated
1396 non-ASCII characters. */
1397 if (char_base < 0)
1398 char_base = 0;
facdc750
RS
1399 }
1400 else
1401 {
040272ce 1402 /* Unibyte buffer. */
a17ae96a 1403 char_base = 0;
facdc750
RS
1404 while (--len >= 0)
1405 {
040272ce 1406 int c, translated;
facdc750
RS
1407
1408 /* If we got here and the RE flag is set, it's because we're
1409 dealing with a regexp known to be trivial, so the backslash
1410 just quotes the next character. */
1411 if (RE && *base_pat == '\\')
1412 {
1413 len--;
0190922f 1414 raw_pattern_size--;
facdc750
RS
1415 base_pat++;
1416 }
1417 c = *base_pat++;
aff2ce94 1418 TRANSLATE (translated, trt, c);
facdc750
RS
1419 *pat++ = translated;
1420 }
1421 }
1422
1423 len_byte = pat - patbuf;
1424 len = raw_pattern_size;
1425 pat = base_pat = patbuf;
1426
040272ce 1427 if (boyer_moore_ok)
facdc750 1428 return boyer_moore (n, pat, len, len_byte, trt, inverse_trt,
aff2ce94 1429 pos, pos_byte, lim, lim_byte,
a17ae96a 1430 char_base);
facdc750
RS
1431 else
1432 return simple_search (n, pat, len, len_byte, trt,
1433 pos, pos_byte, lim, lim_byte);
1434 }
1435}
1436\f
1437/* Do a simple string search N times for the string PAT,
1438 whose length is LEN/LEN_BYTE,
1439 from buffer position POS/POS_BYTE until LIM/LIM_BYTE.
1440 TRT is the translation table.
f8bd51c4 1441
facdc750
RS
1442 Return the character position where the match is found.
1443 Otherwise, if M matches remained to be found, return -M.
f8bd51c4 1444
facdc750
RS
1445 This kind of search works regardless of what is in PAT and
1446 regardless of what is in TRT. It is used in cases where
1447 boyer_moore cannot work. */
1448
e7deaab0 1449static EMACS_INT
c098fdb8
EZ
1450simple_search (EMACS_INT n, unsigned char *pat,
1451 EMACS_INT len, EMACS_INT len_byte, Lisp_Object trt,
1452 EMACS_INT pos, EMACS_INT pos_byte,
1453 EMACS_INT lim, EMACS_INT lim_byte)
facdc750
RS
1454{
1455 int multibyte = ! NILP (current_buffer->enable_multibyte_characters);
ab228c24 1456 int forward = n > 0;
49e44e81
KH
1457 /* Number of buffer bytes matched. Note that this may be different
1458 from len_byte in a multibyte buffer. */
c098fdb8 1459 EMACS_INT match_byte;
facdc750
RS
1460
1461 if (lim > pos && multibyte)
1462 while (n > 0)
1463 {
1464 while (1)
f8bd51c4 1465 {
facdc750 1466 /* Try matching at position POS. */
e7deaab0
AS
1467 EMACS_INT this_pos = pos;
1468 EMACS_INT this_pos_byte = pos_byte;
c098fdb8 1469 EMACS_INT this_len = len;
facdc750 1470 unsigned char *p = pat;
a9469498 1471 if (pos + len > lim || pos_byte + len_byte > lim_byte)
facdc750
RS
1472 goto stop;
1473
1474 while (this_len > 0)
1475 {
1476 int charlen, buf_charlen;
ab228c24 1477 int pat_ch, buf_ch;
facdc750 1478
62a6e103 1479 pat_ch = STRING_CHAR_AND_LENGTH (p, charlen);
facdc750 1480 buf_ch = STRING_CHAR_AND_LENGTH (BYTE_POS_ADDR (this_pos_byte),
facdc750 1481 buf_charlen);
aff2ce94 1482 TRANSLATE (buf_ch, trt, buf_ch);
facdc750
RS
1483
1484 if (buf_ch != pat_ch)
1485 break;
ab228c24 1486
ab228c24
RS
1487 this_len--;
1488 p += charlen;
1489
1490 this_pos_byte += buf_charlen;
1491 this_pos++;
facdc750
RS
1492 }
1493
1494 if (this_len == 0)
1495 {
49e44e81 1496 match_byte = this_pos_byte - pos_byte;
facdc750 1497 pos += len;
49e44e81 1498 pos_byte += match_byte;
facdc750
RS
1499 break;
1500 }
1501
1502 INC_BOTH (pos, pos_byte);
f8bd51c4 1503 }
facdc750
RS
1504
1505 n--;
1506 }
1507 else if (lim > pos)
1508 while (n > 0)
1509 {
1510 while (1)
f8bd51c4 1511 {
facdc750 1512 /* Try matching at position POS. */
e7deaab0 1513 EMACS_INT this_pos = pos;
c098fdb8 1514 EMACS_INT this_len = len;
facdc750
RS
1515 unsigned char *p = pat;
1516
1517 if (pos + len > lim)
1518 goto stop;
1519
1520 while (this_len > 0)
1521 {
1522 int pat_ch = *p++;
1523 int buf_ch = FETCH_BYTE (this_pos);
aff2ce94 1524 TRANSLATE (buf_ch, trt, buf_ch);
facdc750
RS
1525
1526 if (buf_ch != pat_ch)
1527 break;
ab228c24
RS
1528
1529 this_len--;
1530 this_pos++;
facdc750
RS
1531 }
1532
1533 if (this_len == 0)
1534 {
49e44e81 1535 match_byte = len;
facdc750
RS
1536 pos += len;
1537 break;
1538 }
1539
1540 pos++;
f8bd51c4 1541 }
facdc750
RS
1542
1543 n--;
1544 }
1545 /* Backwards search. */
1546 else if (lim < pos && multibyte)
1547 while (n < 0)
1548 {
1549 while (1)
f8bd51c4 1550 {
facdc750 1551 /* Try matching at position POS. */
8b264ecb
AS
1552 EMACS_INT this_pos = pos;
1553 EMACS_INT this_pos_byte = pos_byte;
c098fdb8 1554 EMACS_INT this_len = len;
c525b3f2 1555 const unsigned char *p = pat + len_byte;
facdc750 1556
8b264ecb 1557 if (this_pos - len < lim || (pos_byte - len_byte) < lim_byte)
facdc750
RS
1558 goto stop;
1559
1560 while (this_len > 0)
1561 {
8b264ecb 1562 int charlen;
ab228c24 1563 int pat_ch, buf_ch;
facdc750 1564
8b264ecb
AS
1565 DEC_BOTH (this_pos, this_pos_byte);
1566 PREV_CHAR_BOUNDARY (p, pat);
1567 pat_ch = STRING_CHAR (p);
1568 buf_ch = STRING_CHAR (BYTE_POS_ADDR (this_pos_byte));
aff2ce94 1569 TRANSLATE (buf_ch, trt, buf_ch);
facdc750
RS
1570
1571 if (buf_ch != pat_ch)
1572 break;
ab228c24 1573
ab228c24 1574 this_len--;
facdc750
RS
1575 }
1576
1577 if (this_len == 0)
1578 {
8b264ecb
AS
1579 match_byte = pos_byte - this_pos_byte;
1580 pos = this_pos;
1581 pos_byte = this_pos_byte;
facdc750
RS
1582 break;
1583 }
1584
1585 DEC_BOTH (pos, pos_byte);
f8bd51c4
KH
1586 }
1587
facdc750
RS
1588 n++;
1589 }
1590 else if (lim < pos)
1591 while (n < 0)
1592 {
1593 while (1)
b6d6a51c 1594 {
facdc750 1595 /* Try matching at position POS. */
e7deaab0 1596 EMACS_INT this_pos = pos - len;
c098fdb8 1597 EMACS_INT this_len = len;
facdc750
RS
1598 unsigned char *p = pat;
1599
a9469498 1600 if (this_pos < lim)
facdc750
RS
1601 goto stop;
1602
1603 while (this_len > 0)
1604 {
1605 int pat_ch = *p++;
1606 int buf_ch = FETCH_BYTE (this_pos);
aff2ce94 1607 TRANSLATE (buf_ch, trt, buf_ch);
facdc750
RS
1608
1609 if (buf_ch != pat_ch)
1610 break;
ab228c24
RS
1611 this_len--;
1612 this_pos++;
facdc750
RS
1613 }
1614
1615 if (this_len == 0)
b6d6a51c 1616 {
49e44e81 1617 match_byte = len;
facdc750
RS
1618 pos -= len;
1619 break;
b6d6a51c 1620 }
facdc750
RS
1621
1622 pos--;
b6d6a51c 1623 }
facdc750
RS
1624
1625 n++;
b6d6a51c 1626 }
facdc750
RS
1627
1628 stop:
1629 if (n == 0)
aff2ce94 1630 {
ab228c24 1631 if (forward)
49e44e81 1632 set_search_regs ((multibyte ? pos_byte : pos) - match_byte, match_byte);
ab228c24 1633 else
49e44e81 1634 set_search_regs (multibyte ? pos_byte : pos, match_byte);
aff2ce94
RS
1635
1636 return pos;
1637 }
facdc750
RS
1638 else if (n > 0)
1639 return -n;
1640 else
1641 return n;
1642}
1643\f
0190922f 1644/* Do Boyer-Moore search N times for the string BASE_PAT,
facdc750
RS
1645 whose length is LEN/LEN_BYTE,
1646 from buffer position POS/POS_BYTE until LIM/LIM_BYTE.
1647 DIRECTION says which direction we search in.
1648 TRT and INVERSE_TRT are translation tables.
0190922f 1649 Characters in PAT are already translated by TRT.
facdc750 1650
0190922f
KH
1651 This kind of search works if all the characters in BASE_PAT that
1652 have nontrivial translation are the same aside from the last byte.
1653 This makes it possible to translate just the last byte of a
1654 character, and do so after just a simple test of the context.
b2e6b10f 1655 CHAR_BASE is nonzero if there is such a non-ASCII character.
facdc750
RS
1656
1657 If that criterion is not satisfied, do not call this function. */
1658
e7deaab0 1659static EMACS_INT
c098fdb8
EZ
1660boyer_moore (EMACS_INT n, unsigned char *base_pat,
1661 EMACS_INT len, EMACS_INT len_byte,
d5a3eaaf
AS
1662 Lisp_Object trt, Lisp_Object inverse_trt,
1663 EMACS_INT pos, EMACS_INT pos_byte,
1664 EMACS_INT lim, EMACS_INT lim_byte, int char_base)
facdc750
RS
1665{
1666 int direction = ((n > 0) ? 1 : -1);
c098fdb8 1667 register EMACS_INT dirlen;
e7deaab0
AS
1668 EMACS_INT limit;
1669 int stride_for_teases = 0;
f4646fff 1670 int BM_tab[0400];
177c0ea7 1671 register unsigned char *cursor, *p_limit;
c098fdb8
EZ
1672 register EMACS_INT i;
1673 register int j;
cb6792d2 1674 unsigned char *pat, *pat_end;
facdc750
RS
1675 int multibyte = ! NILP (current_buffer->enable_multibyte_characters);
1676
1677 unsigned char simple_translate[0400];
0190922f 1678 /* These are set to the preceding bytes of a byte to be translated
49e44e81 1679 if char_base is nonzero. As the maximum byte length of a
a17ae96a 1680 multibyte character is 5, we have to check at most four previous
0190922f
KH
1681 bytes. */
1682 int translate_prev_byte1 = 0;
1683 int translate_prev_byte2 = 0;
1684 int translate_prev_byte3 = 0;
a17ae96a 1685 int translate_prev_byte4 = 0;
facdc750 1686
f4646fff
AS
1687 /* The general approach is that we are going to maintain that we know
1688 the first (closest to the present position, in whatever direction
1689 we're searching) character that could possibly be the last
1690 (furthest from present position) character of a valid match. We
1691 advance the state of our knowledge by looking at that character
1692 and seeing whether it indeed matches the last character of the
1693 pattern. If it does, we take a closer look. If it does not, we
1694 move our pointer (to putative last characters) as far as is
1695 logically possible. This amount of movement, which I call a
1696 stride, will be the length of the pattern if the actual character
1697 appears nowhere in the pattern, otherwise it will be the distance
1698 from the last occurrence of that character to the end of the
1699 pattern. If the amount is zero we have a possible match. */
1700
1701 /* Here we make a "mickey mouse" BM table. The stride of the search
1702 is determined only by the last character of the putative match.
1703 If that character does not match, we will stride the proper
1704 distance to propose a match that superimposes it on the last
1705 instance of a character that matches it (per trt), or misses
1706 it entirely if there is none. */
facdc750
RS
1707
1708 dirlen = len_byte * direction;
cb6792d2
RS
1709
1710 /* Record position after the end of the pattern. */
1711 pat_end = base_pat + len_byte;
1712 /* BASE_PAT points to a character that we start scanning from.
1713 It is the first character in a forward search,
1714 the last character in a backward search. */
facdc750 1715 if (direction < 0)
cb6792d2
RS
1716 base_pat = pat_end - 1;
1717
f4646fff
AS
1718 /* A character that does not appear in the pattern induces a
1719 stride equal to the pattern length. */
1720 for (i = 0; i < 0400; i++)
1721 BM_tab[i] = dirlen;
facdc750
RS
1722
1723 /* We use this for translation, instead of TRT itself.
1724 We fill this in to handle the characters that actually
1725 occur in the pattern. Others don't matter anyway! */
facdc750
RS
1726 for (i = 0; i < 0400; i++)
1727 simple_translate[i] = i;
1728
a17ae96a 1729 if (char_base)
0190922f 1730 {
a17ae96a 1731 /* Setup translate_prev_byte1/2/3/4 from CHAR_BASE. Only a
0190922f 1732 byte following them are the target of translation. */
0190922f 1733 unsigned char str[MAX_MULTIBYTE_LENGTH];
a17ae96a 1734 int len = CHAR_STRING (char_base, str);
0190922f
KH
1735
1736 translate_prev_byte1 = str[len - 2];
1737 if (len > 2)
1738 {
1739 translate_prev_byte2 = str[len - 3];
1740 if (len > 3)
a17ae96a
KH
1741 {
1742 translate_prev_byte3 = str[len - 4];
1743 if (len > 4)
1744 translate_prev_byte4 = str[len - 5];
1745 }
0190922f
KH
1746 }
1747 }
1748
facdc750 1749 i = 0;
f4646fff 1750 while (i != dirlen)
facdc750 1751 {
cb6792d2 1752 unsigned char *ptr = base_pat + i;
facdc750 1753 i += direction;
facdc750 1754 if (! NILP (trt))
ca1d1d23 1755 {
49e44e81
KH
1756 /* If the byte currently looking at is the last of a
1757 character to check case-equivalents, set CH to that
1758 character. An ASCII character and a non-ASCII character
1759 matching with CHAR_BASE are to be checked. */
a17ae96a
KH
1760 int ch = -1;
1761
1762 if (ASCII_BYTE_P (*ptr) || ! multibyte)
1763 ch = *ptr;
49e44e81 1764 else if (char_base
86dc6ccb 1765 && ((pat_end - ptr) == 1 || CHAR_HEAD_P (ptr[1])))
ca1d1d23 1766 {
49e44e81
KH
1767 unsigned char *charstart = ptr - 1;
1768
1769 while (! (CHAR_HEAD_P (*charstart)))
1770 charstart--;
62a6e103 1771 ch = STRING_CHAR (charstart);
a17ae96a
KH
1772 if (char_base != (ch & ~0x3F))
1773 ch = -1;
ca1d1d23 1774 }
facdc750 1775
f51995fa 1776 if (ch >= 0200)
49e44e81
KH
1777 j = (ch & 0x3F) | 0200;
1778 else
1779 j = *ptr;
1780
f4646fff 1781 if (i == dirlen)
facdc750 1782 stride_for_teases = BM_tab[j];
ab228c24 1783
facdc750
RS
1784 BM_tab[j] = dirlen - i;
1785 /* A translation table is accompanied by its inverse -- see */
177c0ea7 1786 /* comment following downcase_table for details */
a17ae96a 1787 if (ch >= 0)
ab228c24
RS
1788 {
1789 int starting_ch = ch;
49e44e81 1790 int starting_j = j;
a17ae96a 1791
ab228c24
RS
1792 while (1)
1793 {
1794 TRANSLATE (ch, inverse_trt, ch);
f51995fa 1795 if (ch >= 0200)
49e44e81 1796 j = (ch & 0x3F) | 0200;
ab228c24 1797 else
a17ae96a 1798 j = ch;
ab228c24
RS
1799
1800 /* For all the characters that map into CH,
1801 set up simple_translate to map the last byte
1802 into STARTING_J. */
1803 simple_translate[j] = starting_j;
1804 if (ch == starting_ch)
1805 break;
1806 BM_tab[j] = dirlen - i;
1807 }
1808 }
facdc750
RS
1809 }
1810 else
1811 {
1812 j = *ptr;
1813
f4646fff 1814 if (i == dirlen)
facdc750
RS
1815 stride_for_teases = BM_tab[j];
1816 BM_tab[j] = dirlen - i;
ca1d1d23 1817 }
f4646fff
AS
1818 /* stride_for_teases tells how much to stride if we get a
1819 match on the far character but are subsequently
1820 disappointed, by recording what the stride would have been
1821 for that character if the last character had been
1822 different. */
facdc750 1823 }
facdc750
RS
1824 pos_byte += dirlen - ((direction > 0) ? direction : 0);
1825 /* loop invariant - POS_BYTE points at where last char (first
1826 char if reverse) of pattern would align in a possible match. */
1827 while (n != 0)
1828 {
e7deaab0 1829 EMACS_INT tail_end;
facdc750
RS
1830 unsigned char *tail_end_ptr;
1831
1832 /* It's been reported that some (broken) compiler thinks that
1833 Boolean expressions in an arithmetic context are unsigned.
1834 Using an explicit ?1:0 prevents this. */
1835 if ((lim_byte - pos_byte - ((direction > 0) ? 1 : 0)) * direction
1836 < 0)
1837 return (n * (0 - direction));
1838 /* First we do the part we can by pointers (maybe nothing) */
1839 QUIT;
1840 pat = base_pat;
1841 limit = pos_byte - dirlen + direction;
67ce527d
KH
1842 if (direction > 0)
1843 {
1844 limit = BUFFER_CEILING_OF (limit);
1845 /* LIMIT is now the last (not beyond-last!) value POS_BYTE
1846 can take on without hitting edge of buffer or the gap. */
1847 limit = min (limit, pos_byte + 20000);
1848 limit = min (limit, lim_byte - 1);
1849 }
1850 else
1851 {
1852 limit = BUFFER_FLOOR_OF (limit);
1853 /* LIMIT is now the last (not beyond-last!) value POS_BYTE
1854 can take on without hitting edge of buffer or the gap. */
1855 limit = max (limit, pos_byte - 20000);
1856 limit = max (limit, lim_byte);
1857 }
facdc750
RS
1858 tail_end = BUFFER_CEILING_OF (pos_byte) + 1;
1859 tail_end_ptr = BYTE_POS_ADDR (tail_end);
1860
1861 if ((limit - pos_byte) * direction > 20)
ca1d1d23 1862 {
facdc750
RS
1863 unsigned char *p2;
1864
1865 p_limit = BYTE_POS_ADDR (limit);
1866 p2 = (cursor = BYTE_POS_ADDR (pos_byte));
f4646fff 1867 /* In this loop, pos + cursor - p2 is the surrogate for pos. */
facdc750 1868 while (1) /* use one cursor setting as long as i can */
ca1d1d23 1869 {
facdc750 1870 if (direction > 0) /* worth duplicating */
ca1d1d23 1871 {
f4646fff
AS
1872 while (cursor <= p_limit)
1873 {
1874 if (BM_tab[*cursor] == 0)
1875 goto hit;
facdc750 1876 cursor += BM_tab[*cursor];
f4646fff 1877 }
facdc750
RS
1878 }
1879 else
1880 {
f4646fff
AS
1881 while (cursor >= p_limit)
1882 {
1883 if (BM_tab[*cursor] == 0)
1884 goto hit;
facdc750 1885 cursor += BM_tab[*cursor];
f4646fff 1886 }
facdc750 1887 }
f4646fff
AS
1888 /* If you are here, cursor is beyond the end of the
1889 searched region. You fail to match within the
1890 permitted region and would otherwise try a character
1891 beyond that region. */
1892 break;
1893
1894 hit:
facdc750
RS
1895 i = dirlen - direction;
1896 if (! NILP (trt))
1897 {
1898 while ((i -= direction) + direction != 0)
ca1d1d23 1899 {
facdc750
RS
1900 int ch;
1901 cursor -= direction;
1902 /* Translate only the last byte of a character. */
1903 if (! multibyte
1904 || ((cursor == tail_end_ptr
1905 || CHAR_HEAD_P (cursor[1]))
1906 && (CHAR_HEAD_P (cursor[0])
0190922f
KH
1907 /* Check if this is the last byte of
1908 a translable character. */
1909 || (translate_prev_byte1 == cursor[-1]
1910 && (CHAR_HEAD_P (translate_prev_byte1)
1911 || (translate_prev_byte2 == cursor[-2]
1912 && (CHAR_HEAD_P (translate_prev_byte2)
1913 || (translate_prev_byte3 == cursor[-3]))))))))
facdc750
RS
1914 ch = simple_translate[*cursor];
1915 else
1916 ch = *cursor;
1917 if (pat[i] != ch)
1918 break;
ca1d1d23 1919 }
facdc750
RS
1920 }
1921 else
1922 {
1923 while ((i -= direction) + direction != 0)
ca1d1d23 1924 {
facdc750
RS
1925 cursor -= direction;
1926 if (pat[i] != *cursor)
1927 break;
ca1d1d23 1928 }
facdc750
RS
1929 }
1930 cursor += dirlen - i - direction; /* fix cursor */
1931 if (i + direction == 0)
1932 {
e7deaab0 1933 EMACS_INT position, start, end;
0c8533c6 1934
facdc750 1935 cursor -= direction;
1113d9db 1936
facdc750
RS
1937 position = pos_byte + cursor - p2 + ((direction > 0)
1938 ? 1 - len_byte : 0);
1939 set_search_regs (position, len_byte);
ca325161 1940
ef887810
RS
1941 if (NILP (Vinhibit_changing_match_data))
1942 {
1943 start = search_regs.start[0];
1944 end = search_regs.end[0];
1945 }
1946 else
1947 /* If Vinhibit_changing_match_data is non-nil,
1948 search_regs will not be changed. So let's
1949 compute start and end here. */
1950 {
1951 start = BYTE_TO_CHAR (position);
1952 end = BYTE_TO_CHAR (position + len_byte);
1953 }
1954
facdc750
RS
1955 if ((n -= direction) != 0)
1956 cursor += dirlen; /* to resume search */
ca1d1d23 1957 else
ef887810 1958 return direction > 0 ? end : start;
ca1d1d23 1959 }
facdc750
RS
1960 else
1961 cursor += stride_for_teases; /* <sigh> we lose - */
ca1d1d23 1962 }
facdc750
RS
1963 pos_byte += cursor - p2;
1964 }
1965 else
f4646fff
AS
1966 /* Now we'll pick up a clump that has to be done the hard
1967 way because it covers a discontinuity. */
facdc750
RS
1968 {
1969 limit = ((direction > 0)
1970 ? BUFFER_CEILING_OF (pos_byte - dirlen + 1)
1971 : BUFFER_FLOOR_OF (pos_byte - dirlen - 1));
1972 limit = ((direction > 0)
1973 ? min (limit + len_byte, lim_byte - 1)
1974 : max (limit - len_byte, lim_byte));
1975 /* LIMIT is now the last value POS_BYTE can have
1976 and still be valid for a possible match. */
1977 while (1)
ca1d1d23 1978 {
f4646fff
AS
1979 /* This loop can be coded for space rather than
1980 speed because it will usually run only once.
1981 (the reach is at most len + 21, and typically
1982 does not exceed len). */
facdc750 1983 while ((limit - pos_byte) * direction >= 0)
f4646fff
AS
1984 {
1985 int ch = FETCH_BYTE (pos_byte);
1986 if (BM_tab[ch] == 0)
1987 goto hit2;
1988 pos_byte += BM_tab[ch];
1989 }
1990 break; /* ran off the end */
1991
1992 hit2:
1993 /* Found what might be a match. */
facdc750
RS
1994 i = dirlen - direction;
1995 while ((i -= direction) + direction != 0)
ca1d1d23 1996 {
facdc750
RS
1997 int ch;
1998 unsigned char *ptr;
1999 pos_byte -= direction;
2000 ptr = BYTE_POS_ADDR (pos_byte);
2001 /* Translate only the last byte of a character. */
2002 if (! multibyte
2003 || ((ptr == tail_end_ptr
2004 || CHAR_HEAD_P (ptr[1]))
2005 && (CHAR_HEAD_P (ptr[0])
0190922f
KH
2006 /* Check if this is the last byte of a
2007 translable character. */
2008 || (translate_prev_byte1 == ptr[-1]
2009 && (CHAR_HEAD_P (translate_prev_byte1)
2010 || (translate_prev_byte2 == ptr[-2]
2011 && (CHAR_HEAD_P (translate_prev_byte2)
2012 || translate_prev_byte3 == ptr[-3])))))))
facdc750
RS
2013 ch = simple_translate[*ptr];
2014 else
2015 ch = *ptr;
2016 if (pat[i] != ch)
2017 break;
2018 }
2019 /* Above loop has moved POS_BYTE part or all the way
2020 back to the first pos (last pos if reverse).
2021 Set it once again at the last (first if reverse) char. */
f4646fff 2022 pos_byte += dirlen - i - direction;
facdc750
RS
2023 if (i + direction == 0)
2024 {
e7deaab0 2025 EMACS_INT position, start, end;
facdc750 2026 pos_byte -= direction;
1113d9db 2027
facdc750 2028 position = pos_byte + ((direction > 0) ? 1 - len_byte : 0);
facdc750 2029 set_search_regs (position, len_byte);
ca325161 2030
ef887810
RS
2031 if (NILP (Vinhibit_changing_match_data))
2032 {
2033 start = search_regs.start[0];
2034 end = search_regs.end[0];
2035 }
2036 else
2037 /* If Vinhibit_changing_match_data is non-nil,
2038 search_regs will not be changed. So let's
2039 compute start and end here. */
2040 {
2041 start = BYTE_TO_CHAR (position);
2042 end = BYTE_TO_CHAR (position + len_byte);
2043 }
2044
facdc750
RS
2045 if ((n -= direction) != 0)
2046 pos_byte += dirlen; /* to resume search */
ca1d1d23 2047 else
ef887810 2048 return direction > 0 ? end : start;
ca1d1d23 2049 }
facdc750
RS
2050 else
2051 pos_byte += stride_for_teases;
2052 }
2053 }
2054 /* We have done one clump. Can we continue? */
2055 if ((lim_byte - pos_byte) * direction < 0)
2056 return ((0 - n) * direction);
ca1d1d23 2057 }
facdc750 2058 return BYTE_TO_CHAR (pos_byte);
ca1d1d23 2059}
ca325161 2060
fa8ed3e0 2061/* Record beginning BEG_BYTE and end BEG_BYTE + NBYTES
a7e4cdde
RS
2062 for the overall match just found in the current buffer.
2063 Also clear out the match data for registers 1 and up. */
ca325161
RS
2064
2065static void
971de7fb 2066set_search_regs (EMACS_INT beg_byte, EMACS_INT nbytes)
ca325161 2067{
a7e4cdde
RS
2068 int i;
2069
ef887810
RS
2070 if (!NILP (Vinhibit_changing_match_data))
2071 return;
2072
ca325161
RS
2073 /* Make sure we have registers in which to store
2074 the match position. */
2075 if (search_regs.num_regs == 0)
2076 {
2d4a771a
RS
2077 search_regs.start = (regoff_t *) xmalloc (2 * sizeof (regoff_t));
2078 search_regs.end = (regoff_t *) xmalloc (2 * sizeof (regoff_t));
487282dc 2079 search_regs.num_regs = 2;
ca325161
RS
2080 }
2081
a7e4cdde
RS
2082 /* Clear out the other registers. */
2083 for (i = 1; i < search_regs.num_regs; i++)
2084 {
2085 search_regs.start[i] = -1;
2086 search_regs.end[i] = -1;
2087 }
2088
fa8ed3e0
RS
2089 search_regs.start[0] = BYTE_TO_CHAR (beg_byte);
2090 search_regs.end[0] = BYTE_TO_CHAR (beg_byte + nbytes);
a3668d92 2091 XSETBUFFER (last_thing_searched, current_buffer);
ca325161 2092}
ca1d1d23 2093\f
c6e0e1cc
CY
2094/* Given STRING, a string of words separated by word delimiters,
2095 compute a regexp that matches those exact words separated by
2096 arbitrary punctuation. If LAX is nonzero, the end of the string
2097 need not match a word boundary unless it ends in whitespace. */
ca1d1d23
JB
2098
2099static Lisp_Object
971de7fb 2100wordify (Lisp_Object string, int lax)
ca1d1d23
JB
2101{
2102 register unsigned char *p, *o;
c098fdb8 2103 register EMACS_INT i, i_byte, len, punct_count = 0, word_count = 0;
ca1d1d23 2104 Lisp_Object val;
0c8533c6 2105 int prev_c = 0;
c098fdb8
EZ
2106 EMACS_INT adjust;
2107 int whitespace_at_end;
ca1d1d23 2108
b7826503 2109 CHECK_STRING (string);
d5db4077
KR
2110 p = SDATA (string);
2111 len = SCHARS (string);
ca1d1d23 2112
0c8533c6
RS
2113 for (i = 0, i_byte = 0; i < len; )
2114 {
2115 int c;
177c0ea7 2116
93daa011 2117 FETCH_STRING_CHAR_AS_MULTIBYTE_ADVANCE (c, string, i, i_byte);
0c8533c6
RS
2118
2119 if (SYNTAX (c) != Sword)
2120 {
2121 punct_count++;
2122 if (i > 0 && SYNTAX (prev_c) == Sword)
2123 word_count++;
2124 }
ca1d1d23 2125
0c8533c6
RS
2126 prev_c = c;
2127 }
2128
2129 if (SYNTAX (prev_c) == Sword)
c6e0e1cc
CY
2130 {
2131 word_count++;
2132 whitespace_at_end = 0;
2133 }
2134 else
2135 whitespace_at_end = 1;
2136
0c8533c6 2137 if (!word_count)
c60416e0 2138 return empty_unibyte_string;
0c8533c6 2139
c6e0e1cc
CY
2140 adjust = - punct_count + 5 * (word_count - 1)
2141 + ((lax && !whitespace_at_end) ? 2 : 4);
8a2df937
RS
2142 if (STRING_MULTIBYTE (string))
2143 val = make_uninit_multibyte_string (len + adjust,
d5db4077 2144 SBYTES (string)
8a2df937
RS
2145 + adjust);
2146 else
2147 val = make_uninit_string (len + adjust);
ca1d1d23 2148
d5db4077 2149 o = SDATA (val);
ca1d1d23
JB
2150 *o++ = '\\';
2151 *o++ = 'b';
1e9582d4 2152 prev_c = 0;
ca1d1d23 2153
1e9582d4
RS
2154 for (i = 0, i_byte = 0; i < len; )
2155 {
2156 int c;
c098fdb8 2157 EMACS_INT i_byte_orig = i_byte;
177c0ea7 2158
93daa011 2159 FETCH_STRING_CHAR_AS_MULTIBYTE_ADVANCE (c, string, i, i_byte);
1e9582d4
RS
2160
2161 if (SYNTAX (c) == Sword)
2162 {
72af86bd 2163 memcpy (o, SDATA (string) + i_byte_orig, i_byte - i_byte_orig);
1e9582d4
RS
2164 o += i_byte - i_byte_orig;
2165 }
2166 else if (i > 0 && SYNTAX (prev_c) == Sword && --word_count)
2167 {
2168 *o++ = '\\';
2169 *o++ = 'W';
2170 *o++ = '\\';
2171 *o++ = 'W';
2172 *o++ = '*';
2173 }
2174
2175 prev_c = c;
2176 }
ca1d1d23 2177
c6e0e1cc
CY
2178 if (!lax || whitespace_at_end)
2179 {
2180 *o++ = '\\';
2181 *o++ = 'b';
2182 }
ca1d1d23
JB
2183
2184 return val;
2185}
2186\f
2187DEFUN ("search-backward", Fsearch_backward, Ssearch_backward, 1, 4,
8c1a1077
PJ
2188 "MSearch backward: ",
2189 doc: /* Search backward from point for STRING.
2190Set point to the beginning of the occurrence found, and return point.
2191An optional second argument bounds the search; it is a buffer position.
2192The match found must not extend before that position.
2193Optional third argument, if t, means if fail just return nil (no error).
2194 If not nil and not t, position at limit of search and return nil.
2195Optional fourth argument is repeat count--search for successive occurrences.
2196
2197Search case-sensitivity is determined by the value of the variable
2198`case-fold-search', which see.
2199
2200See also the functions `match-beginning', `match-end' and `replace-match'. */)
5842a27b 2201 (Lisp_Object string, Lisp_Object bound, Lisp_Object noerror, Lisp_Object count)
ca1d1d23 2202{
b819a390 2203 return search_command (string, bound, noerror, count, -1, 0, 0);
ca1d1d23
JB
2204}
2205
6af43974 2206DEFUN ("search-forward", Fsearch_forward, Ssearch_forward, 1, 4, "MSearch: ",
8c1a1077
PJ
2207 doc: /* Search forward from point for STRING.
2208Set point to the end of the occurrence found, and return point.
2209An optional second argument bounds the search; it is a buffer position.
d6aacdcd 2210The match found must not extend after that position. A value of nil is
48740f01 2211 equivalent to (point-max).
8c1a1077
PJ
2212Optional third argument, if t, means if fail just return nil (no error).
2213 If not nil and not t, move to limit of search and return nil.
2214Optional fourth argument is repeat count--search for successive occurrences.
2215
2216Search case-sensitivity is determined by the value of the variable
2217`case-fold-search', which see.
2218
2219See also the functions `match-beginning', `match-end' and `replace-match'. */)
5842a27b 2220 (Lisp_Object string, Lisp_Object bound, Lisp_Object noerror, Lisp_Object count)
ca1d1d23 2221{
b819a390 2222 return search_command (string, bound, noerror, count, 1, 0, 0);
ca1d1d23
JB
2223}
2224
2225DEFUN ("word-search-backward", Fword_search_backward, Sword_search_backward, 1, 4,
8c1a1077
PJ
2226 "sWord search backward: ",
2227 doc: /* Search backward from point for STRING, ignoring differences in punctuation.
2228Set point to the beginning of the occurrence found, and return point.
2229An optional second argument bounds the search; it is a buffer position.
2230The match found must not extend before that position.
2231Optional third argument, if t, means if fail just return nil (no error).
2232 If not nil and not t, move to limit of search and return nil.
2233Optional fourth argument is repeat count--search for successive occurrences. */)
5842a27b 2234 (Lisp_Object string, Lisp_Object bound, Lisp_Object noerror, Lisp_Object count)
ca1d1d23 2235{
c6e0e1cc 2236 return search_command (wordify (string, 0), bound, noerror, count, -1, 1, 0);
ca1d1d23
JB
2237}
2238
2239DEFUN ("word-search-forward", Fword_search_forward, Sword_search_forward, 1, 4,
8c1a1077
PJ
2240 "sWord search: ",
2241 doc: /* Search forward from point for STRING, ignoring differences in punctuation.
2242Set point to the end of the occurrence found, and return point.
2243An optional second argument bounds the search; it is a buffer position.
2244The match found must not extend after that position.
2245Optional third argument, if t, means if fail just return nil (no error).
2246 If not nil and not t, move to limit of search and return nil.
2247Optional fourth argument is repeat count--search for successive occurrences. */)
5842a27b 2248 (Lisp_Object string, Lisp_Object bound, Lisp_Object noerror, Lisp_Object count)
ca1d1d23 2249{
c6e0e1cc
CY
2250 return search_command (wordify (string, 0), bound, noerror, count, 1, 1, 0);
2251}
2252
2253DEFUN ("word-search-backward-lax", Fword_search_backward_lax, Sword_search_backward_lax, 1, 4,
2254 "sWord search backward: ",
2255 doc: /* Search backward from point for STRING, ignoring differences in punctuation.
2256Set point to the beginning of the occurrence found, and return point.
2257
2258Unlike `word-search-backward', the end of STRING need not match a word
2259boundary unless it ends in whitespace.
2260
2261An optional second argument bounds the search; it is a buffer position.
2262The match found must not extend before that position.
2263Optional third argument, if t, means if fail just return nil (no error).
2264 If not nil and not t, move to limit of search and return nil.
2265Optional fourth argument is repeat count--search for successive occurrences. */)
5842a27b 2266 (Lisp_Object string, Lisp_Object bound, Lisp_Object noerror, Lisp_Object count)
c6e0e1cc
CY
2267{
2268 return search_command (wordify (string, 1), bound, noerror, count, -1, 1, 0);
2269}
2270
2271DEFUN ("word-search-forward-lax", Fword_search_forward_lax, Sword_search_forward_lax, 1, 4,
2272 "sWord search: ",
2273 doc: /* Search forward from point for STRING, ignoring differences in punctuation.
2274Set point to the end of the occurrence found, and return point.
2275
2276Unlike `word-search-forward', the end of STRING need not match a word
2277boundary unless it ends in whitespace.
2278
2279An optional second argument bounds the search; it is a buffer position.
2280The match found must not extend after that position.
2281Optional third argument, if t, means if fail just return nil (no error).
2282 If not nil and not t, move to limit of search and return nil.
2283Optional fourth argument is repeat count--search for successive occurrences. */)
5842a27b 2284 (Lisp_Object string, Lisp_Object bound, Lisp_Object noerror, Lisp_Object count)
c6e0e1cc
CY
2285{
2286 return search_command (wordify (string, 1), bound, noerror, count, 1, 1, 0);
ca1d1d23
JB
2287}
2288
2289DEFUN ("re-search-backward", Fre_search_backward, Sre_search_backward, 1, 4,
8c1a1077
PJ
2290 "sRE search backward: ",
2291 doc: /* Search backward from point for match for regular expression REGEXP.
2292Set point to the beginning of the match, and return point.
2293The match found is the one starting last in the buffer
2294and yet ending before the origin of the search.
2295An optional second argument bounds the search; it is a buffer position.
2296The match found must start at or after that position.
2297Optional third argument, if t, means if fail just return nil (no error).
2298 If not nil and not t, move to limit of search and return nil.
2299Optional fourth argument is repeat count--search for successive occurrences.
2300See also the functions `match-beginning', `match-end', `match-string',
2301and `replace-match'. */)
5842a27b 2302 (Lisp_Object regexp, Lisp_Object bound, Lisp_Object noerror, Lisp_Object count)
ca1d1d23 2303{
b819a390 2304 return search_command (regexp, bound, noerror, count, -1, 1, 0);
ca1d1d23
JB
2305}
2306
2307DEFUN ("re-search-forward", Fre_search_forward, Sre_search_forward, 1, 4,
8c1a1077
PJ
2308 "sRE search: ",
2309 doc: /* Search forward from point for regular expression REGEXP.
2310Set point to the end of the occurrence found, and return point.
2311An optional second argument bounds the search; it is a buffer position.
2312The match found must not extend after that position.
2313Optional third argument, if t, means if fail just return nil (no error).
2314 If not nil and not t, move to limit of search and return nil.
2315Optional fourth argument is repeat count--search for successive occurrences.
2316See also the functions `match-beginning', `match-end', `match-string',
2317and `replace-match'. */)
5842a27b 2318 (Lisp_Object regexp, Lisp_Object bound, Lisp_Object noerror, Lisp_Object count)
ca1d1d23 2319{
b819a390
RS
2320 return search_command (regexp, bound, noerror, count, 1, 1, 0);
2321}
2322
2323DEFUN ("posix-search-backward", Fposix_search_backward, Sposix_search_backward, 1, 4,
8c1a1077
PJ
2324 "sPosix search backward: ",
2325 doc: /* Search backward from point for match for regular expression REGEXP.
2326Find the longest match in accord with Posix regular expression rules.
2327Set point to the beginning of the match, and return point.
2328The match found is the one starting last in the buffer
2329and yet ending before the origin of the search.
2330An optional second argument bounds the search; it is a buffer position.
2331The match found must start at or after that position.
2332Optional third argument, if t, means if fail just return nil (no error).
2333 If not nil and not t, move to limit of search and return nil.
2334Optional fourth argument is repeat count--search for successive occurrences.
2335See also the functions `match-beginning', `match-end', `match-string',
2336and `replace-match'. */)
5842a27b 2337 (Lisp_Object regexp, Lisp_Object bound, Lisp_Object noerror, Lisp_Object count)
b819a390
RS
2338{
2339 return search_command (regexp, bound, noerror, count, -1, 1, 1);
2340}
2341
2342DEFUN ("posix-search-forward", Fposix_search_forward, Sposix_search_forward, 1, 4,
8c1a1077
PJ
2343 "sPosix search: ",
2344 doc: /* Search forward from point for regular expression REGEXP.
2345Find the longest match in accord with Posix regular expression rules.
2346Set point to the end of the occurrence found, and return point.
2347An optional second argument bounds the search; it is a buffer position.
2348The match found must not extend after that position.
2349Optional third argument, if t, means if fail just return nil (no error).
2350 If not nil and not t, move to limit of search and return nil.
2351Optional fourth argument is repeat count--search for successive occurrences.
2352See also the functions `match-beginning', `match-end', `match-string',
2353and `replace-match'. */)
5842a27b 2354 (Lisp_Object regexp, Lisp_Object bound, Lisp_Object noerror, Lisp_Object count)
b819a390
RS
2355{
2356 return search_command (regexp, bound, noerror, count, 1, 1, 1);
ca1d1d23
JB
2357}
2358\f
d7a5ad5f 2359DEFUN ("replace-match", Freplace_match, Sreplace_match, 1, 5, 0,
8c1a1077 2360 doc: /* Replace text matched by last search with NEWTEXT.
4dd0c271
RS
2361Leave point at the end of the replacement text.
2362
8c1a1077
PJ
2363If second arg FIXEDCASE is non-nil, do not alter case of replacement text.
2364Otherwise maybe capitalize the whole text, or maybe just word initials,
2365based on the replaced text.
2366If the replaced text has only capital letters
2367and has at least one multiletter word, convert NEWTEXT to all caps.
4dd0c271
RS
2368Otherwise if all words are capitalized in the replaced text,
2369capitalize each word in NEWTEXT.
8c1a1077
PJ
2370
2371If third arg LITERAL is non-nil, insert NEWTEXT literally.
2372Otherwise treat `\\' as special:
2373 `\\&' in NEWTEXT means substitute original matched text.
2374 `\\N' means substitute what matched the Nth `\\(...\\)'.
2375 If Nth parens didn't match, substitute nothing.
2376 `\\\\' means insert one `\\'.
4dd0c271
RS
2377Case conversion does not apply to these substitutions.
2378
8c1a1077 2379FIXEDCASE and LITERAL are optional arguments.
8c1a1077
PJ
2380
2381The optional fourth argument STRING can be a string to modify.
2382This is meaningful when the previous match was done against STRING,
2383using `string-match'. When used this way, `replace-match'
2384creates and returns a new string made by copying STRING and replacing
2385the part of STRING that was matched.
2386
2387The optional fifth argument SUBEXP specifies a subexpression;
2388it says to replace just that subexpression with NEWTEXT,
2389rather than replacing the entire matched text.
2390This is, in a vague sense, the inverse of using `\\N' in NEWTEXT;
2391`\\N' copies subexp N into NEWTEXT, but using N as SUBEXP puts
2392NEWTEXT in place of subexp N.
2393This is useful only after a regular expression search or match,
2394since only regular expressions have distinguished subexpressions. */)
5842a27b 2395 (Lisp_Object newtext, Lisp_Object fixedcase, Lisp_Object literal, Lisp_Object string, Lisp_Object subexp)
ca1d1d23
JB
2396{
2397 enum { nochange, all_caps, cap_initial } case_action;
c098fdb8 2398 register EMACS_INT pos, pos_byte;
ca1d1d23 2399 int some_multiletter_word;
97832bd0 2400 int some_lowercase;
73dc8771 2401 int some_uppercase;
208767c3 2402 int some_nonuppercase_initial;
ca1d1d23 2403 register int c, prevc;
d7a5ad5f 2404 int sub;
e7deaab0 2405 EMACS_INT opoint, newpoint;
ca1d1d23 2406
b7826503 2407 CHECK_STRING (newtext);
ca1d1d23 2408
080c45fd 2409 if (! NILP (string))
b7826503 2410 CHECK_STRING (string);
080c45fd 2411
ca1d1d23
JB
2412 case_action = nochange; /* We tried an initialization */
2413 /* but some C compilers blew it */
4746118a
JB
2414
2415 if (search_regs.num_regs <= 0)
d72cdbfc 2416 error ("`replace-match' called before any match found");
4746118a 2417
d7a5ad5f
RS
2418 if (NILP (subexp))
2419 sub = 0;
2420 else
2421 {
b7826503 2422 CHECK_NUMBER (subexp);
d7a5ad5f
RS
2423 sub = XINT (subexp);
2424 if (sub < 0 || sub >= search_regs.num_regs)
2425 args_out_of_range (subexp, make_number (search_regs.num_regs));
2426 }
2427
080c45fd
RS
2428 if (NILP (string))
2429 {
d7a5ad5f
RS
2430 if (search_regs.start[sub] < BEGV
2431 || search_regs.start[sub] > search_regs.end[sub]
2432 || search_regs.end[sub] > ZV)
2433 args_out_of_range (make_number (search_regs.start[sub]),
2434 make_number (search_regs.end[sub]));
080c45fd
RS
2435 }
2436 else
2437 {
d7a5ad5f
RS
2438 if (search_regs.start[sub] < 0
2439 || search_regs.start[sub] > search_regs.end[sub]
d5db4077 2440 || search_regs.end[sub] > SCHARS (string))
d7a5ad5f
RS
2441 args_out_of_range (make_number (search_regs.start[sub]),
2442 make_number (search_regs.end[sub]));
080c45fd 2443 }
ca1d1d23
JB
2444
2445 if (NILP (fixedcase))
2446 {
2447 /* Decide how to casify by examining the matched text. */
e7deaab0 2448 EMACS_INT last;
ca1d1d23 2449
ac3b28b1
KH
2450 pos = search_regs.start[sub];
2451 last = search_regs.end[sub];
fa8ed3e0
RS
2452
2453 if (NILP (string))
ac3b28b1 2454 pos_byte = CHAR_TO_BYTE (pos);
fa8ed3e0 2455 else
ac3b28b1 2456 pos_byte = string_char_to_byte (string, pos);
fa8ed3e0 2457
ca1d1d23
JB
2458 prevc = '\n';
2459 case_action = all_caps;
2460
2461 /* some_multiletter_word is set nonzero if any original word
2462 is more than one letter long. */
2463 some_multiletter_word = 0;
97832bd0 2464 some_lowercase = 0;
208767c3 2465 some_nonuppercase_initial = 0;
73dc8771 2466 some_uppercase = 0;
ca1d1d23 2467
ac3b28b1 2468 while (pos < last)
ca1d1d23 2469 {
080c45fd 2470 if (NILP (string))
ac3b28b1 2471 {
93daa011 2472 c = FETCH_CHAR_AS_MULTIBYTE (pos_byte);
ac3b28b1
KH
2473 INC_BOTH (pos, pos_byte);
2474 }
080c45fd 2475 else
93daa011 2476 FETCH_STRING_CHAR_AS_MULTIBYTE_ADVANCE (c, string, pos, pos_byte);
080c45fd 2477
ca1d1d23
JB
2478 if (LOWERCASEP (c))
2479 {
2480 /* Cannot be all caps if any original char is lower case */
2481
97832bd0 2482 some_lowercase = 1;
ca1d1d23 2483 if (SYNTAX (prevc) != Sword)
208767c3 2484 some_nonuppercase_initial = 1;
ca1d1d23
JB
2485 else
2486 some_multiletter_word = 1;
2487 }
d16c2b66 2488 else if (UPPERCASEP (c))
ca1d1d23 2489 {
73dc8771 2490 some_uppercase = 1;
97832bd0 2491 if (SYNTAX (prevc) != Sword)
c4d460ce 2492 ;
97832bd0 2493 else
ca1d1d23
JB
2494 some_multiletter_word = 1;
2495 }
208767c3
RS
2496 else
2497 {
2498 /* If the initial is a caseless word constituent,
2499 treat that like a lowercase initial. */
2500 if (SYNTAX (prevc) != Sword)
2501 some_nonuppercase_initial = 1;
2502 }
ca1d1d23
JB
2503
2504 prevc = c;
2505 }
2506
97832bd0
RS
2507 /* Convert to all caps if the old text is all caps
2508 and has at least one multiletter word. */
2509 if (! some_lowercase && some_multiletter_word)
2510 case_action = all_caps;
c4d460ce 2511 /* Capitalize each word, if the old text has all capitalized words. */
208767c3 2512 else if (!some_nonuppercase_initial && some_multiletter_word)
ca1d1d23 2513 case_action = cap_initial;
208767c3 2514 else if (!some_nonuppercase_initial && some_uppercase)
73dc8771
KH
2515 /* Should x -> yz, operating on X, give Yz or YZ?
2516 We'll assume the latter. */
2517 case_action = all_caps;
97832bd0
RS
2518 else
2519 case_action = nochange;
ca1d1d23
JB
2520 }
2521
080c45fd
RS
2522 /* Do replacement in a string. */
2523 if (!NILP (string))
2524 {
2525 Lisp_Object before, after;
2526
2527 before = Fsubstring (string, make_number (0),
d7a5ad5f
RS
2528 make_number (search_regs.start[sub]));
2529 after = Fsubstring (string, make_number (search_regs.end[sub]), Qnil);
080c45fd 2530
636a5e28
RS
2531 /* Substitute parts of the match into NEWTEXT
2532 if desired. */
080c45fd
RS
2533 if (NILP (literal))
2534 {
e7deaab0
AS
2535 EMACS_INT lastpos = 0;
2536 EMACS_INT lastpos_byte = 0;
080c45fd
RS
2537 /* We build up the substituted string in ACCUM. */
2538 Lisp_Object accum;
2539 Lisp_Object middle;
d5db4077 2540 int length = SBYTES (newtext);
080c45fd
RS
2541
2542 accum = Qnil;
2543
ac3b28b1 2544 for (pos_byte = 0, pos = 0; pos_byte < length;)
080c45fd 2545 {
c098fdb8
EZ
2546 EMACS_INT substart = -1;
2547 EMACS_INT subend = 0;
1e79ec24 2548 int delbackslash = 0;
080c45fd 2549
0c8533c6
RS
2550 FETCH_STRING_CHAR_ADVANCE (c, newtext, pos, pos_byte);
2551
080c45fd
RS
2552 if (c == '\\')
2553 {
0c8533c6 2554 FETCH_STRING_CHAR_ADVANCE (c, newtext, pos, pos_byte);
177c0ea7 2555
080c45fd
RS
2556 if (c == '&')
2557 {
d7a5ad5f
RS
2558 substart = search_regs.start[sub];
2559 subend = search_regs.end[sub];
080c45fd 2560 }
76fc1ea2 2561 else if (c >= '1' && c <= '9')
080c45fd 2562 {
76fc1ea2
KH
2563 if (search_regs.start[c - '0'] >= 0
2564 && c <= search_regs.num_regs + '0')
080c45fd
RS
2565 {
2566 substart = search_regs.start[c - '0'];
2567 subend = search_regs.end[c - '0'];
2568 }
76fc1ea2
KH
2569 else
2570 {
2571 /* If that subexp did not match,
2572 replace \\N with nothing. */
2573 substart = 0;
2574 subend = 0;
2575 }
080c45fd 2576 }
1e79ec24
KH
2577 else if (c == '\\')
2578 delbackslash = 1;
636a5e28
RS
2579 else
2580 error ("Invalid use of `\\' in replacement text");
080c45fd
RS
2581 }
2582 if (substart >= 0)
2583 {
d131e79c
RS
2584 if (pos - 2 != lastpos)
2585 middle = substring_both (newtext, lastpos,
2586 lastpos_byte,
2587 pos - 2, pos_byte - 2);
080c45fd
RS
2588 else
2589 middle = Qnil;
2590 accum = concat3 (accum, middle,
0c8533c6
RS
2591 Fsubstring (string,
2592 make_number (substart),
080c45fd
RS
2593 make_number (subend)));
2594 lastpos = pos;
0c8533c6 2595 lastpos_byte = pos_byte;
080c45fd 2596 }
1e79ec24
KH
2597 else if (delbackslash)
2598 {
d131e79c
RS
2599 middle = substring_both (newtext, lastpos,
2600 lastpos_byte,
2601 pos - 1, pos_byte - 1);
0c8533c6 2602
1e79ec24
KH
2603 accum = concat2 (accum, middle);
2604 lastpos = pos;
0c8533c6 2605 lastpos_byte = pos_byte;
1e79ec24 2606 }
080c45fd
RS
2607 }
2608
d131e79c
RS
2609 if (pos != lastpos)
2610 middle = substring_both (newtext, lastpos,
2611 lastpos_byte,
0c8533c6 2612 pos, pos_byte);
080c45fd
RS
2613 else
2614 middle = Qnil;
2615
2616 newtext = concat2 (accum, middle);
2617 }
2618
636a5e28 2619 /* Do case substitution in NEWTEXT if desired. */
080c45fd
RS
2620 if (case_action == all_caps)
2621 newtext = Fupcase (newtext);
2622 else if (case_action == cap_initial)
2b2eead9 2623 newtext = Fupcase_initials (newtext);
080c45fd
RS
2624
2625 return concat3 (before, newtext, after);
2626 }
2627
09c4719e 2628 /* Record point, then move (quietly) to the start of the match. */
9160906f 2629 if (PT >= search_regs.end[sub])
b0eba991 2630 opoint = PT - ZV;
9160906f
RS
2631 else if (PT > search_regs.start[sub])
2632 opoint = search_regs.end[sub] - ZV;
b0eba991
RS
2633 else
2634 opoint = PT;
2635
886ed6ec
RS
2636 /* If we want non-literal replacement,
2637 perform substitution on the replacement string. */
2638 if (NILP (literal))
ca1d1d23 2639 {
c098fdb8 2640 EMACS_INT length = SBYTES (newtext);
68e69fbd 2641 unsigned char *substed;
c098fdb8 2642 EMACS_INT substed_alloc_size, substed_len;
3bc25e52
KH
2643 int buf_multibyte = !NILP (current_buffer->enable_multibyte_characters);
2644 int str_multibyte = STRING_MULTIBYTE (newtext);
2645 Lisp_Object rev_tbl;
886ed6ec 2646 int really_changed = 0;
3bc25e52 2647
8f924df7 2648 rev_tbl = Qnil;
ac3b28b1 2649
68e69fbd
RS
2650 substed_alloc_size = length * 2 + 100;
2651 substed = (unsigned char *) xmalloc (substed_alloc_size + 1);
2652 substed_len = 0;
2653
3bc25e52
KH
2654 /* Go thru NEWTEXT, producing the actual text to insert in
2655 SUBSTED while adjusting multibyteness to that of the current
2656 buffer. */
ca1d1d23 2657
ac3b28b1 2658 for (pos_byte = 0, pos = 0; pos_byte < length;)
ca1d1d23 2659 {
68e69fbd 2660 unsigned char str[MAX_MULTIBYTE_LENGTH];
8ea90aa3 2661 const unsigned char *add_stuff = NULL;
c098fdb8 2662 EMACS_INT add_len = 0;
68e69fbd 2663 int idx = -1;
9a76659d 2664
3bc25e52
KH
2665 if (str_multibyte)
2666 {
eb99a8dd 2667 FETCH_STRING_CHAR_ADVANCE_NO_CHECK (c, newtext, pos, pos_byte);
3bc25e52
KH
2668 if (!buf_multibyte)
2669 c = multibyte_char_to_unibyte (c, rev_tbl);
2670 }
2671 else
2672 {
2673 /* Note that we don't have to increment POS. */
5d69fe10 2674 c = SREF (newtext, pos_byte++);
3bc25e52 2675 if (buf_multibyte)
4c0354d7 2676 MAKE_CHAR_MULTIBYTE (c);
3bc25e52 2677 }
ac3b28b1 2678
68e69fbd
RS
2679 /* Either set ADD_STUFF and ADD_LEN to the text to put in SUBSTED,
2680 or set IDX to a match index, which means put that part
2681 of the buffer text into SUBSTED. */
2682
ca1d1d23
JB
2683 if (c == '\\')
2684 {
886ed6ec
RS
2685 really_changed = 1;
2686
3bc25e52
KH
2687 if (str_multibyte)
2688 {
eb99a8dd
KH
2689 FETCH_STRING_CHAR_ADVANCE_NO_CHECK (c, newtext,
2690 pos, pos_byte);
071ce769 2691 if (!buf_multibyte && !ASCII_CHAR_P (c))
3bc25e52
KH
2692 c = multibyte_char_to_unibyte (c, rev_tbl);
2693 }
2694 else
2695 {
d5db4077 2696 c = SREF (newtext, pos_byte++);
3bc25e52 2697 if (buf_multibyte)
4c0354d7 2698 MAKE_CHAR_MULTIBYTE (c);
3bc25e52
KH
2699 }
2700
ca1d1d23 2701 if (c == '&')
68e69fbd 2702 idx = sub;
78445046 2703 else if (c >= '1' && c <= '9' && c <= search_regs.num_regs + '0')
ca1d1d23
JB
2704 {
2705 if (search_regs.start[c - '0'] >= 1)
68e69fbd 2706 idx = c - '0';
ca1d1d23 2707 }
636a5e28 2708 else if (c == '\\')
68e69fbd 2709 add_len = 1, add_stuff = "\\";
636a5e28 2710 else
3bc25e52
KH
2711 {
2712 xfree (substed);
2713 error ("Invalid use of `\\' in replacement text");
2714 }
ca1d1d23
JB
2715 }
2716 else
68e69fbd
RS
2717 {
2718 add_len = CHAR_STRING (c, str);
2719 add_stuff = str;
2720 }
2721
2722 /* If we want to copy part of a previous match,
2723 set up ADD_STUFF and ADD_LEN to point to it. */
2724 if (idx >= 0)
2725 {
e7deaab0 2726 EMACS_INT begbyte = CHAR_TO_BYTE (search_regs.start[idx]);
68e69fbd
RS
2727 add_len = CHAR_TO_BYTE (search_regs.end[idx]) - begbyte;
2728 if (search_regs.start[idx] < GPT && GPT < search_regs.end[idx])
2729 move_gap (search_regs.start[idx]);
2730 add_stuff = BYTE_POS_ADDR (begbyte);
2731 }
2732
2733 /* Now the stuff we want to add to SUBSTED
2734 is invariably ADD_LEN bytes starting at ADD_STUFF. */
2735
2736 /* Make sure SUBSTED is big enough. */
2737 if (substed_len + add_len >= substed_alloc_size)
2738 {
2739 substed_alloc_size = substed_len + add_len + 500;
2740 substed = (unsigned char *) xrealloc (substed,
2741 substed_alloc_size + 1);
2742 }
2743
2744 /* Now add to the end of SUBSTED. */
f8ce8a0d
GM
2745 if (add_stuff)
2746 {
72af86bd 2747 memcpy (substed + substed_len, add_stuff, add_len);
f8ce8a0d
GM
2748 substed_len += add_len;
2749 }
ca1d1d23 2750 }
68e69fbd 2751
886ed6ec 2752 if (really_changed)
76fc1ea2
KH
2753 {
2754 if (buf_multibyte)
2755 {
c098fdb8
EZ
2756 EMACS_INT nchars =
2757 multibyte_chars_in_text (substed, substed_len);
68e69fbd 2758
76fc1ea2
KH
2759 newtext = make_multibyte_string (substed, nchars, substed_len);
2760 }
2761 else
2762 newtext = make_unibyte_string (substed, substed_len);
2763 }
68e69fbd 2764 xfree (substed);
ca1d1d23
JB
2765 }
2766
886ed6ec
RS
2767 /* Replace the old text with the new in the cleanest possible way. */
2768 replace_range (search_regs.start[sub], search_regs.end[sub],
2769 newtext, 1, 0, 1);
d5db4077 2770 newpoint = search_regs.start[sub] + SCHARS (newtext);
ca1d1d23
JB
2771
2772 if (case_action == all_caps)
886ed6ec
RS
2773 Fupcase_region (make_number (search_regs.start[sub]),
2774 make_number (newpoint));
ca1d1d23 2775 else if (case_action == cap_initial)
886ed6ec
RS
2776 Fupcase_initials_region (make_number (search_regs.start[sub]),
2777 make_number (newpoint));
3e18eecf 2778
98e942e0
RS
2779 /* Adjust search data for this change. */
2780 {
e7deaab0
AS
2781 EMACS_INT oldend = search_regs.end[sub];
2782 EMACS_INT oldstart = search_regs.start[sub];
2783 EMACS_INT change = newpoint - search_regs.end[sub];
98e942e0
RS
2784 int i;
2785
2786 for (i = 0; i < search_regs.num_regs; i++)
2787 {
41c01205 2788 if (search_regs.start[i] >= oldend)
98e942e0 2789 search_regs.start[i] += change;
41c01205
DK
2790 else if (search_regs.start[i] > oldstart)
2791 search_regs.start[i] = oldstart;
2792 if (search_regs.end[i] >= oldend)
98e942e0 2793 search_regs.end[i] += change;
41c01205
DK
2794 else if (search_regs.end[i] > oldstart)
2795 search_regs.end[i] = oldstart;
98e942e0
RS
2796 }
2797 }
2798
b0eba991 2799 /* Put point back where it was in the text. */
8d808a65 2800 if (opoint <= 0)
fa8ed3e0 2801 TEMP_SET_PT (opoint + ZV);
b0eba991 2802 else
fa8ed3e0 2803 TEMP_SET_PT (opoint);
b0eba991
RS
2804
2805 /* Now move point "officially" to the start of the inserted replacement. */
3e18eecf 2806 move_if_not_intangible (newpoint);
177c0ea7 2807
ca1d1d23
JB
2808 return Qnil;
2809}
2810\f
2811static Lisp_Object
971de7fb 2812match_limit (Lisp_Object num, int beginningp)
ca1d1d23
JB
2813{
2814 register int n;
2815
b7826503 2816 CHECK_NUMBER (num);
ca1d1d23 2817 n = XINT (num);
f90a5bf5 2818 if (n < 0)
bd2cbd56 2819 args_out_of_range (num, make_number (0));
f90a5bf5
RS
2820 if (search_regs.num_regs <= 0)
2821 error ("No match data, because no search succeeded");
9b9ceb61 2822 if (n >= search_regs.num_regs
4746118a 2823 || search_regs.start[n] < 0)
ca1d1d23
JB
2824 return Qnil;
2825 return (make_number ((beginningp) ? search_regs.start[n]
2826 : search_regs.end[n]));
2827}
2828
2829DEFUN ("match-beginning", Fmatch_beginning, Smatch_beginning, 1, 1, 0,
8c1a1077
PJ
2830 doc: /* Return position of start of text matched by last search.
2831SUBEXP, a number, specifies which parenthesized expression in the last
2832 regexp.
2833Value is nil if SUBEXPth pair didn't match, or there were less than
2834 SUBEXP pairs.
2835Zero means the entire text matched by the whole regexp or whole string. */)
5842a27b 2836 (Lisp_Object subexp)
ca1d1d23 2837{
5806161b 2838 return match_limit (subexp, 1);
ca1d1d23
JB
2839}
2840
2841DEFUN ("match-end", Fmatch_end, Smatch_end, 1, 1, 0,
8c1a1077
PJ
2842 doc: /* Return position of end of text matched by last search.
2843SUBEXP, a number, specifies which parenthesized expression in the last
2844 regexp.
2845Value is nil if SUBEXPth pair didn't match, or there were less than
2846 SUBEXP pairs.
2847Zero means the entire text matched by the whole regexp or whole string. */)
5842a27b 2848 (Lisp_Object subexp)
ca1d1d23 2849{
5806161b 2850 return match_limit (subexp, 0);
177c0ea7 2851}
ca1d1d23 2852
abd0071c 2853DEFUN ("match-data", Fmatch_data, Smatch_data, 0, 3, 0,
8c1a1077
PJ
2854 doc: /* Return a list containing all info on what the last search matched.
2855Element 2N is `(match-beginning N)'; element 2N + 1 is `(match-end N)'.
2856All the elements are markers or nil (nil if the Nth pair didn't match)
2857if the last match was on a buffer; integers or nil if a string was matched.
febb8aba 2858Use `set-match-data' to reinstate the data in this list.
8c1a1077 2859
41c01205
DK
2860If INTEGERS (the optional first argument) is non-nil, always use
2861integers \(rather than markers) to represent buffer positions. In
2862this case, and if the last match was in a buffer, the buffer will get
2863stored as one additional element at the end of the list.
2864
abd0071c
KS
2865If REUSE is a list, reuse it as part of the value. If REUSE is long
2866enough to hold all the values, and if INTEGERS is non-nil, no consing
2867is done.
2868
2869If optional third arg RESEAT is non-nil, any previous markers on the
2870REUSE list will be modified to point to nowhere.
2871
140a6b7e 2872Return value is undefined if the last search failed. */)
5842a27b 2873 (Lisp_Object integers, Lisp_Object reuse, Lisp_Object reseat)
ca1d1d23 2874{
56256c2a 2875 Lisp_Object tail, prev;
4746118a 2876 Lisp_Object *data;
ca1d1d23
JB
2877 int i, len;
2878
abd0071c
KS
2879 if (!NILP (reseat))
2880 for (tail = reuse; CONSP (tail); tail = XCDR (tail))
2881 if (MARKERP (XCAR (tail)))
2882 {
51f10faa 2883 unchain_marker (XMARKER (XCAR (tail)));
abd0071c
KS
2884 XSETCAR (tail, Qnil);
2885 }
2886
daa37602 2887 if (NILP (last_thing_searched))
c36bcf1b 2888 return Qnil;
daa37602 2889
6bbd7a29
GM
2890 prev = Qnil;
2891
41c01205 2892 data = (Lisp_Object *) alloca ((2 * search_regs.num_regs + 1)
4746118a
JB
2893 * sizeof (Lisp_Object));
2894
41c01205 2895 len = 0;
4746118a 2896 for (i = 0; i < search_regs.num_regs; i++)
ca1d1d23
JB
2897 {
2898 int start = search_regs.start[i];
2899 if (start >= 0)
2900 {
56256c2a
RS
2901 if (EQ (last_thing_searched, Qt)
2902 || ! NILP (integers))
ca1d1d23 2903 {
c235cce7
KH
2904 XSETFASTINT (data[2 * i], start);
2905 XSETFASTINT (data[2 * i + 1], search_regs.end[i]);
ca1d1d23 2906 }
0ed62dc7 2907 else if (BUFFERP (last_thing_searched))
ca1d1d23
JB
2908 {
2909 data[2 * i] = Fmake_marker ();
daa37602
JB
2910 Fset_marker (data[2 * i],
2911 make_number (start),
2912 last_thing_searched);
ca1d1d23
JB
2913 data[2 * i + 1] = Fmake_marker ();
2914 Fset_marker (data[2 * i + 1],
177c0ea7 2915 make_number (search_regs.end[i]),
daa37602 2916 last_thing_searched);
ca1d1d23 2917 }
daa37602
JB
2918 else
2919 /* last_thing_searched must always be Qt, a buffer, or Qnil. */
2920 abort ();
2921
abd0071c 2922 len = 2 * i + 2;
ca1d1d23
JB
2923 }
2924 else
abd0071c 2925 data[2 * i] = data[2 * i + 1] = Qnil;
ca1d1d23 2926 }
56256c2a 2927
bd2cbd56 2928 if (BUFFERP (last_thing_searched) && !NILP (integers))
41c01205 2929 {
bd2cbd56 2930 data[len] = last_thing_searched;
41c01205
DK
2931 len++;
2932 }
2933
56256c2a
RS
2934 /* If REUSE is not usable, cons up the values and return them. */
2935 if (! CONSP (reuse))
41c01205 2936 return Flist (len, data);
56256c2a
RS
2937
2938 /* If REUSE is a list, store as many value elements as will fit
2939 into the elements of REUSE. */
2940 for (i = 0, tail = reuse; CONSP (tail);
c1d497be 2941 i++, tail = XCDR (tail))
56256c2a 2942 {
41c01205 2943 if (i < len)
f3fbd155 2944 XSETCAR (tail, data[i]);
56256c2a 2945 else
f3fbd155 2946 XSETCAR (tail, Qnil);
56256c2a
RS
2947 prev = tail;
2948 }
2949
2950 /* If we couldn't fit all value elements into REUSE,
2951 cons up the rest of them and add them to the end of REUSE. */
41c01205
DK
2952 if (i < len)
2953 XSETCDR (prev, Flist (len - i, data + i));
56256c2a
RS
2954
2955 return reuse;
ca1d1d23
JB
2956}
2957
b51d6c92
SM
2958/* We used to have an internal use variant of `reseat' described as:
2959
2960 If RESEAT is `evaporate', put the markers back on the free list
2961 immediately. No other references to the markers must exist in this
2962 case, so it is used only internally on the unwind stack and
2963 save-match-data from Lisp.
2964
2965 But it was ill-conceived: those supposedly-internal markers get exposed via
2966 the undo-list, so freeing them here is unsafe. */
ca1d1d23 2967
abd0071c 2968DEFUN ("set-match-data", Fset_match_data, Sset_match_data, 1, 2, 0,
8c1a1077 2969 doc: /* Set internal data on last search match from elements of LIST.
abd0071c
KS
2970LIST should have been created by calling `match-data' previously.
2971
51f10faa 2972If optional arg RESEAT is non-nil, make markers on LIST point nowhere. */)
5842a27b 2973 (register Lisp_Object list, Lisp_Object reseat)
ca1d1d23
JB
2974{
2975 register int i;
2976 register Lisp_Object marker;
2977
7074fde6
FP
2978 if (running_asynch_code)
2979 save_search_regs ();
2980
29100cea 2981 CHECK_LIST (list);
ca1d1d23 2982
41c01205
DK
2983 /* Unless we find a marker with a buffer or an explicit buffer
2984 in LIST, assume that this match data came from a string. */
daa37602
JB
2985 last_thing_searched = Qt;
2986
4746118a
JB
2987 /* Allocate registers if they don't already exist. */
2988 {
d084e942 2989 int length = XFASTINT (Flength (list)) / 2;
4746118a
JB
2990
2991 if (length > search_regs.num_regs)
2992 {
1113d9db
JB
2993 if (search_regs.num_regs == 0)
2994 {
2995 search_regs.start
2996 = (regoff_t *) xmalloc (length * sizeof (regoff_t));
2997 search_regs.end
2998 = (regoff_t *) xmalloc (length * sizeof (regoff_t));
2999 }
4746118a 3000 else
1113d9db
JB
3001 {
3002 search_regs.start
3003 = (regoff_t *) xrealloc (search_regs.start,
3004 length * sizeof (regoff_t));
3005 search_regs.end
3006 = (regoff_t *) xrealloc (search_regs.end,
3007 length * sizeof (regoff_t));
3008 }
4746118a 3009
e62371e9
KH
3010 for (i = search_regs.num_regs; i < length; i++)
3011 search_regs.start[i] = -1;
3012
487282dc 3013 search_regs.num_regs = length;
4746118a 3014 }
ca1d1d23 3015
abd0071c 3016 for (i = 0; CONSP (list); i++)
41c01205 3017 {
abd0071c 3018 marker = XCAR (list);
bd2cbd56 3019 if (BUFFERP (marker))
c3762cbd 3020 {
bd2cbd56 3021 last_thing_searched = marker;
c3762cbd
DK
3022 break;
3023 }
3024 if (i >= length)
3025 break;
41c01205
DK
3026 if (NILP (marker))
3027 {
3028 search_regs.start[i] = -1;
abd0071c 3029 list = XCDR (list);
41c01205
DK
3030 }
3031 else
3032 {
e7deaab0 3033 EMACS_INT from;
abd0071c 3034 Lisp_Object m;
e2811828 3035
abd0071c 3036 m = marker;
41c01205
DK
3037 if (MARKERP (marker))
3038 {
3039 if (XMARKER (marker)->buffer == 0)
3040 XSETFASTINT (marker, 0);
3041 else
3042 XSETBUFFER (last_thing_searched, XMARKER (marker)->buffer);
3043 }
e2811828 3044
41c01205
DK
3045 CHECK_NUMBER_COERCE_MARKER (marker);
3046 from = XINT (marker);
e2811828 3047
abd0071c
KS
3048 if (!NILP (reseat) && MARKERP (m))
3049 {
b51d6c92 3050 unchain_marker (XMARKER (m));
9ad54a7e 3051 XSETCAR (list, Qnil);
abd0071c
KS
3052 }
3053
3054 if ((list = XCDR (list), !CONSP (list)))
3055 break;
3056
3057 m = marker = XCAR (list);
3058
41c01205
DK
3059 if (MARKERP (marker) && XMARKER (marker)->buffer == 0)
3060 XSETFASTINT (marker, 0);
e2811828 3061
41c01205
DK
3062 CHECK_NUMBER_COERCE_MARKER (marker);
3063 search_regs.start[i] = from;
3064 search_regs.end[i] = XINT (marker);
abd0071c
KS
3065
3066 if (!NILP (reseat) && MARKERP (m))
3067 {
b51d6c92 3068 unchain_marker (XMARKER (m));
9ad54a7e 3069 XSETCAR (list, Qnil);
abd0071c 3070 }
41c01205 3071 }
abd0071c 3072 list = XCDR (list);
41c01205 3073 }
ca1d1d23 3074
41c01205
DK
3075 for (; i < search_regs.num_regs; i++)
3076 search_regs.start[i] = -1;
3077 }
ca1d1d23 3078
177c0ea7 3079 return Qnil;
ca1d1d23
JB
3080}
3081
7074fde6
FP
3082/* If non-zero the match data have been saved in saved_search_regs
3083 during the execution of a sentinel or filter. */
75ebf74b 3084static int search_regs_saved;
7074fde6 3085static struct re_registers saved_search_regs;
41c01205 3086static Lisp_Object saved_last_thing_searched;
7074fde6
FP
3087
3088/* Called from Flooking_at, Fstring_match, search_buffer, Fstore_match_data
3089 if asynchronous code (filter or sentinel) is running. */
3090static void
971de7fb 3091save_search_regs (void)
7074fde6
FP
3092{
3093 if (!search_regs_saved)
3094 {
3095 saved_search_regs.num_regs = search_regs.num_regs;
3096 saved_search_regs.start = search_regs.start;
3097 saved_search_regs.end = search_regs.end;
41c01205
DK
3098 saved_last_thing_searched = last_thing_searched;
3099 last_thing_searched = Qnil;
7074fde6 3100 search_regs.num_regs = 0;
2d4a771a
RS
3101 search_regs.start = 0;
3102 search_regs.end = 0;
7074fde6
FP
3103
3104 search_regs_saved = 1;
3105 }
3106}
3107
3108/* Called upon exit from filters and sentinels. */
3109void
971de7fb 3110restore_search_regs (void)
7074fde6
FP
3111{
3112 if (search_regs_saved)
3113 {
3114 if (search_regs.num_regs > 0)
3115 {
3116 xfree (search_regs.start);
3117 xfree (search_regs.end);
3118 }
3119 search_regs.num_regs = saved_search_regs.num_regs;
3120 search_regs.start = saved_search_regs.start;
3121 search_regs.end = saved_search_regs.end;
41c01205
DK
3122 last_thing_searched = saved_last_thing_searched;
3123 saved_last_thing_searched = Qnil;
7074fde6
FP
3124 search_regs_saved = 0;
3125 }
3126}
3127
abd0071c 3128static Lisp_Object
971de7fb 3129unwind_set_match_data (Lisp_Object list)
abd0071c 3130{
b51d6c92
SM
3131 /* It is NOT ALWAYS safe to free (evaporate) the markers immediately. */
3132 return Fset_match_data (list, Qt);
abd0071c
KS
3133}
3134
3135/* Called to unwind protect the match data. */
3136void
971de7fb 3137record_unwind_save_match_data (void)
abd0071c
KS
3138{
3139 record_unwind_protect (unwind_set_match_data,
3140 Fmatch_data (Qnil, Qnil, Qnil));
3141}
3142
ca1d1d23
JB
3143/* Quote a string to inactivate reg-expr chars */
3144
3145DEFUN ("regexp-quote", Fregexp_quote, Sregexp_quote, 1, 1, 0,
8c1a1077 3146 doc: /* Return a regexp string which matches exactly STRING and nothing else. */)
5842a27b 3147 (Lisp_Object string)
ca1d1d23
JB
3148{
3149 register unsigned char *in, *out, *end;
3150 register unsigned char *temp;
0c8533c6 3151 int backslashes_added = 0;
ca1d1d23 3152
b7826503 3153 CHECK_STRING (string);
ca1d1d23 3154
d5db4077 3155 temp = (unsigned char *) alloca (SBYTES (string) * 2);
ca1d1d23
JB
3156
3157 /* Now copy the data into the new string, inserting escapes. */
3158
d5db4077
KR
3159 in = SDATA (string);
3160 end = in + SBYTES (string);
177c0ea7 3161 out = temp;
ca1d1d23
JB
3162
3163 for (; in != end; in++)
3164 {
66bc6082 3165 if (*in == '['
ca1d1d23
JB
3166 || *in == '*' || *in == '.' || *in == '\\'
3167 || *in == '?' || *in == '+'
3168 || *in == '^' || *in == '$')
0c8533c6 3169 *out++ = '\\', backslashes_added++;
ca1d1d23
JB
3170 *out++ = *in;
3171 }
3172
3f8100f1 3173 return make_specified_string (temp,
d5db4077 3174 SCHARS (string) + backslashes_added,
3f8100f1
RS
3175 out - temp,
3176 STRING_MULTIBYTE (string));
ca1d1d23 3177}
177c0ea7 3178\f
dfcf069d 3179void
971de7fb 3180syms_of_search (void)
ca1d1d23
JB
3181{
3182 register int i;
3183
487282dc
KH
3184 for (i = 0; i < REGEXP_CACHE_SIZE; ++i)
3185 {
3186 searchbufs[i].buf.allocated = 100;
b23c0a83 3187 searchbufs[i].buf.buffer = (unsigned char *) xmalloc (100);
487282dc
KH
3188 searchbufs[i].buf.fastmap = searchbufs[i].fastmap;
3189 searchbufs[i].regexp = Qnil;
ecdb561e 3190 searchbufs[i].whitespace_regexp = Qnil;
b69e3c18 3191 searchbufs[i].syntax_table = Qnil;
487282dc 3192 staticpro (&searchbufs[i].regexp);
aa77b5ce 3193 staticpro (&searchbufs[i].whitespace_regexp);
b69e3c18 3194 staticpro (&searchbufs[i].syntax_table);
487282dc
KH
3195 searchbufs[i].next = (i == REGEXP_CACHE_SIZE-1 ? 0 : &searchbufs[i+1]);
3196 }
3197 searchbuf_head = &searchbufs[0];
ca1d1d23 3198
d67b4f80 3199 Qsearch_failed = intern_c_string ("search-failed");
ca1d1d23 3200 staticpro (&Qsearch_failed);
d67b4f80 3201 Qinvalid_regexp = intern_c_string ("invalid-regexp");
ca1d1d23
JB
3202 staticpro (&Qinvalid_regexp);
3203
3204 Fput (Qsearch_failed, Qerror_conditions,
d67b4f80 3205 pure_cons (Qsearch_failed, pure_cons (Qerror, Qnil)));
ca1d1d23 3206 Fput (Qsearch_failed, Qerror_message,
d67b4f80 3207 make_pure_c_string ("Search failed"));
ca1d1d23
JB
3208
3209 Fput (Qinvalid_regexp, Qerror_conditions,
d67b4f80 3210 pure_cons (Qinvalid_regexp, pure_cons (Qerror, Qnil)));
ca1d1d23 3211 Fput (Qinvalid_regexp, Qerror_message,
d67b4f80 3212 make_pure_c_string ("Invalid regexp"));
ca1d1d23 3213
daa37602
JB
3214 last_thing_searched = Qnil;
3215 staticpro (&last_thing_searched);
3216
0f6af254
DK
3217 saved_last_thing_searched = Qnil;
3218 staticpro (&saved_last_thing_searched);
3219
29208e82 3220 DEFVAR_LISP ("search-spaces-regexp", Vsearch_spaces_regexp,
e2811828 3221 doc: /* Regexp to substitute for bunches of spaces in regexp search.
f31a9a68
RS
3222Some commands use this for user-specified regexps.
3223Spaces that occur inside character classes or repetition operators
3224or other such regexp constructs are not replaced with this.
3225A value of nil (which is the normal value) means treat spaces literally. */);
41a33295 3226 Vsearch_spaces_regexp = Qnil;
f31a9a68 3227
29208e82 3228 DEFVAR_LISP ("inhibit-changing-match-data", Vinhibit_changing_match_data,
ef887810 3229 doc: /* Internal use only.
39d0bf74
RS
3230If non-nil, the primitive searching and matching functions
3231such as `looking-at', `string-match', `re-search-forward', etc.,
3232do not set the match data. The proper way to use this variable
3233is to bind it with `let' around a small expression. */);
ef887810
RS
3234 Vinhibit_changing_match_data = Qnil;
3235
ca1d1d23 3236 defsubr (&Slooking_at);
b819a390
RS
3237 defsubr (&Sposix_looking_at);
3238 defsubr (&Sstring_match);
3239 defsubr (&Sposix_string_match);
ca1d1d23
JB
3240 defsubr (&Ssearch_forward);
3241 defsubr (&Ssearch_backward);
3242 defsubr (&Sword_search_forward);
3243 defsubr (&Sword_search_backward);
c6e0e1cc
CY
3244 defsubr (&Sword_search_forward_lax);
3245 defsubr (&Sword_search_backward_lax);
ca1d1d23
JB
3246 defsubr (&Sre_search_forward);
3247 defsubr (&Sre_search_backward);
b819a390
RS
3248 defsubr (&Sposix_search_forward);
3249 defsubr (&Sposix_search_backward);
ca1d1d23
JB
3250 defsubr (&Sreplace_match);
3251 defsubr (&Smatch_beginning);
3252 defsubr (&Smatch_end);
3253 defsubr (&Smatch_data);
3f1c005b 3254 defsubr (&Sset_match_data);
ca1d1d23
JB
3255 defsubr (&Sregexp_quote);
3256}