Convert (most) functions in src to standard C.
[bpt/emacs.git] / src / search.c
CommitLineData
ca1d1d23 1/* String search routines for GNU Emacs.
429ab54e 2 Copyright (C) 1985, 1986, 1987, 1993, 1994, 1997, 1998, 1999, 2001, 2002,
114f9c96 3 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010
8cabe764 4 Free Software Foundation, Inc.
ca1d1d23
JB
5
6This file is part of GNU Emacs.
7
9ec0b715 8GNU Emacs is free software: you can redistribute it and/or modify
ca1d1d23 9it under the terms of the GNU General Public License as published by
9ec0b715
GM
10the Free Software Foundation, either version 3 of the License, or
11(at your option) any later version.
ca1d1d23
JB
12
13GNU Emacs is distributed in the hope that it will be useful,
14but WITHOUT ANY WARRANTY; without even the implied warranty of
15MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16GNU General Public License for more details.
17
18You should have received a copy of the GNU General Public License
9ec0b715 19along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>. */
ca1d1d23
JB
20
21
18160b98 22#include <config.h>
d7306fe6 23#include <setjmp.h>
ca1d1d23
JB
24#include "lisp.h"
25#include "syntax.h"
5679531d 26#include "category.h"
ca1d1d23 27#include "buffer.h"
76eb0881 28#include "character.h"
89ad6492 29#include "charset.h"
9169c321 30#include "region-cache.h"
ca1d1d23 31#include "commands.h"
9ac0d9e0 32#include "blockinput.h"
bf1760bb 33#include "intervals.h"
4746118a 34
ca1d1d23
JB
35#include <sys/types.h>
36#include "regex.h"
37
1d288aef 38#define REGEXP_CACHE_SIZE 20
ca1d1d23 39
487282dc
KH
40/* If the regexp is non-nil, then the buffer contains the compiled form
41 of that regexp, suitable for searching. */
1d288aef
RS
42struct regexp_cache
43{
487282dc 44 struct regexp_cache *next;
ecdb561e 45 Lisp_Object regexp, whitespace_regexp;
b69e3c18 46 /* Syntax table for which the regexp applies. We need this because
58e95211
SM
47 of character classes. If this is t, then the compiled pattern is valid
48 for any syntax-table. */
b69e3c18 49 Lisp_Object syntax_table;
487282dc
KH
50 struct re_pattern_buffer buf;
51 char fastmap[0400];
b819a390
RS
52 /* Nonzero means regexp was compiled to do full POSIX backtracking. */
53 char posix;
487282dc 54};
ca1d1d23 55
487282dc
KH
56/* The instances of that struct. */
57struct regexp_cache searchbufs[REGEXP_CACHE_SIZE];
ca1d1d23 58
487282dc
KH
59/* The head of the linked list; points to the most recently used buffer. */
60struct regexp_cache *searchbuf_head;
ca1d1d23 61
ca1d1d23 62
4746118a
JB
63/* Every call to re_match, etc., must pass &search_regs as the regs
64 argument unless you can show it is unnecessary (i.e., if re_match
65 is certainly going to be called again before region-around-match
66 can be called).
67
68 Since the registers are now dynamically allocated, we need to make
69 sure not to refer to the Nth register before checking that it has
1113d9db
JB
70 been allocated by checking search_regs.num_regs.
71
72 The regex code keeps track of whether it has allocated the search
487282dc
KH
73 buffer using bits in the re_pattern_buffer. This means that whenever
74 you compile a new pattern, it completely forgets whether it has
1113d9db
JB
75 allocated any registers, and will allocate new registers the next
76 time you call a searching or matching function. Therefore, we need
77 to call re_set_registers after compiling a new pattern or after
78 setting the match registers, so that the regex functions will be
79 able to free or re-allocate it properly. */
ca1d1d23
JB
80static struct re_registers search_regs;
81
daa37602
JB
82/* The buffer in which the last search was performed, or
83 Qt if the last search was done in a string;
84 Qnil if no searching has been done yet. */
85static Lisp_Object last_thing_searched;
ca1d1d23 86
8e6208c5 87/* error condition signaled when regexp compile_pattern fails */
ca1d1d23
JB
88
89Lisp_Object Qinvalid_regexp;
90
06f77a8a
KS
91/* Error condition used for failing searches */
92Lisp_Object Qsearch_failed;
93
41a33295 94Lisp_Object Vsearch_spaces_regexp;
f31a9a68 95
ef887810
RS
96/* If non-nil, the match data will not be changed during call to
97 searching or matching functions. This variable is for internal use
98 only. */
99Lisp_Object Vinhibit_changing_match_data;
100
f57e2426
J
101static void set_search_regs (EMACS_INT, EMACS_INT);
102static void save_search_regs (void);
103static EMACS_INT simple_search (int, unsigned char *, int, int,
104 Lisp_Object, EMACS_INT, EMACS_INT,
105 EMACS_INT, EMACS_INT);
106static EMACS_INT boyer_moore (int, unsigned char *, int, int,
107 Lisp_Object, Lisp_Object,
108 EMACS_INT, EMACS_INT,
109 EMACS_INT, EMACS_INT, int);
110static EMACS_INT search_buffer (Lisp_Object, EMACS_INT, EMACS_INT,
111 EMACS_INT, EMACS_INT, int, int,
112 Lisp_Object, Lisp_Object, int);
971de7fb 113static void matcher_overflow (void) NO_RETURN;
b819a390 114
ca1d1d23 115static void
971de7fb 116matcher_overflow (void)
ca1d1d23
JB
117{
118 error ("Stack overflow in regexp matcher");
119}
120
b819a390
RS
121/* Compile a regexp and signal a Lisp error if anything goes wrong.
122 PATTERN is the pattern to compile.
123 CP is the place to put the result.
facdc750 124 TRANSLATE is a translation table for ignoring case, or nil for none.
b819a390
RS
125 REGP is the structure that says where to store the "register"
126 values that will result from matching this pattern.
127 If it is 0, we should compile the pattern not to record any
128 subexpression bounds.
129 POSIX is nonzero if we want full backtracking (POSIX style)
5679531d 130 for this pattern. 0 means backtrack only enough to get a valid match.
bbc73b48
KH
131
132 The behavior also depends on Vsearch_spaces_regexp. */
ca1d1d23 133
487282dc 134static void
971de7fb 135compile_pattern_1 (struct regexp_cache *cp, Lisp_Object pattern, Lisp_Object translate, struct re_registers *regp, int posix)
ca1d1d23 136{
d451e4db 137 char *val;
b819a390 138 reg_syntax_t old;
ca1d1d23 139
487282dc 140 cp->regexp = Qnil;
59fab369 141 cp->buf.translate = (! NILP (translate) ? translate : make_number (0));
b819a390 142 cp->posix = posix;
93daa011 143 cp->buf.multibyte = STRING_MULTIBYTE (pattern);
89ad6492 144 cp->buf.charset_unibyte = charset_unibyte;
d8c85250
CY
145 if (STRINGP (Vsearch_spaces_regexp))
146 cp->whitespace_regexp = Vsearch_spaces_regexp;
147 else
148 cp->whitespace_regexp = Qnil;
149
e7b2dd2e
RS
150 /* rms: I think BLOCK_INPUT is not needed here any more,
151 because regex.c defines malloc to call xmalloc.
152 Using BLOCK_INPUT here means the debugger won't run if an error occurs.
153 So let's turn it off. */
154 /* BLOCK_INPUT; */
fb4a568d 155 old = re_set_syntax (RE_SYNTAX_EMACS
b819a390 156 | (posix ? 0 : RE_NO_POSIX_BACKTRACKING));
d8c85250
CY
157
158 if (STRINGP (Vsearch_spaces_regexp))
159 re_set_whitespace_regexp (SDATA (Vsearch_spaces_regexp));
160 else
161 re_set_whitespace_regexp (NULL);
bbc73b48 162
8f924df7
KH
163 val = (char *) re_compile_pattern ((char *) SDATA (pattern),
164 SBYTES (pattern), &cp->buf);
bbc73b48 165
58e95211
SM
166 /* If the compiled pattern hard codes some of the contents of the
167 syntax-table, it can only be reused with *this* syntax table. */
168 cp->syntax_table = cp->buf.used_syntax ? current_buffer->syntax_table : Qt;
169
bbc73b48
KH
170 re_set_whitespace_regexp (NULL);
171
b819a390 172 re_set_syntax (old);
e7b2dd2e 173 /* UNBLOCK_INPUT; */
ca1d1d23 174 if (val)
06f77a8a 175 xsignal1 (Qinvalid_regexp, build_string (val));
1113d9db 176
487282dc 177 cp->regexp = Fcopy_sequence (pattern);
487282dc
KH
178}
179
6efc7887
RS
180/* Shrink each compiled regexp buffer in the cache
181 to the size actually used right now.
182 This is called from garbage collection. */
183
184void
971de7fb 185shrink_regexp_cache (void)
6efc7887 186{
a968f437 187 struct regexp_cache *cp;
6efc7887
RS
188
189 for (cp = searchbuf_head; cp != 0; cp = cp->next)
190 {
191 cp->buf.allocated = cp->buf.used;
192 cp->buf.buffer
b23c0a83 193 = (unsigned char *) xrealloc (cp->buf.buffer, cp->buf.used);
6efc7887
RS
194 }
195}
196
54dd3310
SM
197/* Clear the regexp cache w.r.t. a particular syntax table,
198 because it was changed.
e5b94d44
CY
199 There is no danger of memory leak here because re_compile_pattern
200 automagically manages the memory in each re_pattern_buffer struct,
201 based on its `allocated' and `buffer' values. */
202void
971de7fb 203clear_regexp_cache (void)
e5b94d44
CY
204{
205 int i;
206
207 for (i = 0; i < REGEXP_CACHE_SIZE; ++i)
920fd1fc
EZ
208 /* It's tempting to compare with the syntax-table we've actually changed,
209 but it's not sufficient because char-table inheritance means that
54dd3310
SM
210 modifying one syntax-table can change others at the same time. */
211 if (!EQ (searchbufs[i].syntax_table, Qt))
212 searchbufs[i].regexp = Qnil;
e5b94d44
CY
213}
214
487282dc 215/* Compile a regexp if necessary, but first check to see if there's one in
b819a390
RS
216 the cache.
217 PATTERN is the pattern to compile.
facdc750 218 TRANSLATE is a translation table for ignoring case, or nil for none.
b819a390
RS
219 REGP is the structure that says where to store the "register"
220 values that will result from matching this pattern.
221 If it is 0, we should compile the pattern not to record any
222 subexpression bounds.
223 POSIX is nonzero if we want full backtracking (POSIX style)
224 for this pattern. 0 means backtrack only enough to get a valid match. */
487282dc
KH
225
226struct re_pattern_buffer *
971de7fb 227compile_pattern (Lisp_Object pattern, struct re_registers *regp, Lisp_Object translate, int posix, int multibyte)
487282dc
KH
228{
229 struct regexp_cache *cp, **cpp;
230
231 for (cpp = &searchbuf_head; ; cpp = &cp->next)
232 {
233 cp = *cpp;
f1b9c7c1
KR
234 /* Entries are initialized to nil, and may be set to nil by
235 compile_pattern_1 if the pattern isn't valid. Don't apply
49a5f770
KR
236 string accessors in those cases. However, compile_pattern_1
237 is only applied to the cache entry we pick here to reuse. So
238 nil should never appear before a non-nil entry. */
7c752c80 239 if (NILP (cp->regexp))
f1b9c7c1 240 goto compile_it;
d5db4077 241 if (SCHARS (cp->regexp) == SCHARS (pattern)
cf69b13e 242 && STRING_MULTIBYTE (cp->regexp) == STRING_MULTIBYTE (pattern)
1d288aef 243 && !NILP (Fstring_equal (cp->regexp, pattern))
59fab369 244 && EQ (cp->buf.translate, (! NILP (translate) ? translate : make_number (0)))
5679531d 245 && cp->posix == posix
58e95211
SM
246 && (EQ (cp->syntax_table, Qt)
247 || EQ (cp->syntax_table, current_buffer->syntax_table))
89ad6492
KH
248 && !NILP (Fequal (cp->whitespace_regexp, Vsearch_spaces_regexp))
249 && cp->buf.charset_unibyte == charset_unibyte)
487282dc
KH
250 break;
251
f1b9c7c1
KR
252 /* If we're at the end of the cache, compile into the nil cell
253 we found, or the last (least recently used) cell with a
254 string value. */
487282dc
KH
255 if (cp->next == 0)
256 {
f1b9c7c1 257 compile_it:
89ad6492 258 compile_pattern_1 (cp, pattern, translate, regp, posix);
487282dc
KH
259 break;
260 }
261 }
262
263 /* When we get here, cp (aka *cpp) contains the compiled pattern,
264 either because we found it in the cache or because we just compiled it.
265 Move it to the front of the queue to mark it as most recently used. */
266 *cpp = cp->next;
267 cp->next = searchbuf_head;
268 searchbuf_head = cp;
1113d9db 269
6639708c
RS
270 /* Advise the searching functions about the space we have allocated
271 for register data. */
272 if (regp)
273 re_set_registers (&cp->buf, regp, regp->num_regs, regp->start, regp->end);
274
78edd3b7 275 /* The compiled pattern can be used both for multibyte and unibyte
89ad6492
KH
276 target. But, we have to tell which the pattern is used for. */
277 cp->buf.target_multibyte = multibyte;
278
487282dc 279 return &cp->buf;
ca1d1d23
JB
280}
281
ca1d1d23 282\f
b819a390 283static Lisp_Object
971de7fb 284looking_at_1 (Lisp_Object string, int posix)
ca1d1d23
JB
285{
286 Lisp_Object val;
287 unsigned char *p1, *p2;
e7deaab0 288 EMACS_INT s1, s2;
ca1d1d23 289 register int i;
487282dc 290 struct re_pattern_buffer *bufp;
ca1d1d23 291
7074fde6
FP
292 if (running_asynch_code)
293 save_search_regs ();
294
910c747a
RS
295 /* This is so set_image_of_range_1 in regex.c can find the EQV table. */
296 XCHAR_TABLE (current_buffer->case_canon_table)->extras[2]
297 = current_buffer->case_eqv_table;
298
b7826503 299 CHECK_STRING (string);
ef887810
RS
300 bufp = compile_pattern (string,
301 (NILP (Vinhibit_changing_match_data)
302 ? &search_regs : NULL),
487282dc 303 (!NILP (current_buffer->case_fold_search)
0190922f 304 ? current_buffer->case_canon_table : Qnil),
0c8533c6
RS
305 posix,
306 !NILP (current_buffer->enable_multibyte_characters));
ca1d1d23
JB
307
308 immediate_quit = 1;
309 QUIT; /* Do a pending quit right away, to avoid paradoxical behavior */
310
311 /* Get pointers and sizes of the two strings
312 that make up the visible portion of the buffer. */
313
314 p1 = BEGV_ADDR;
fa8ed3e0 315 s1 = GPT_BYTE - BEGV_BYTE;
ca1d1d23 316 p2 = GAP_END_ADDR;
fa8ed3e0 317 s2 = ZV_BYTE - GPT_BYTE;
ca1d1d23
JB
318 if (s1 < 0)
319 {
320 p2 = p1;
fa8ed3e0 321 s2 = ZV_BYTE - BEGV_BYTE;
ca1d1d23
JB
322 s1 = 0;
323 }
324 if (s2 < 0)
325 {
fa8ed3e0 326 s1 = ZV_BYTE - BEGV_BYTE;
ca1d1d23
JB
327 s2 = 0;
328 }
8bb43c28
RS
329
330 re_match_object = Qnil;
177c0ea7 331
487282dc 332 i = re_match_2 (bufp, (char *) p1, s1, (char *) p2, s2,
ef887810
RS
333 PT_BYTE - BEGV_BYTE,
334 (NILP (Vinhibit_changing_match_data)
335 ? &search_regs : NULL),
fa8ed3e0 336 ZV_BYTE - BEGV_BYTE);
de182d70 337 immediate_quit = 0;
177c0ea7 338
ca1d1d23
JB
339 if (i == -2)
340 matcher_overflow ();
341
342 val = (0 <= i ? Qt : Qnil);
ef887810 343 if (NILP (Vinhibit_changing_match_data) && i >= 0)
fa8ed3e0
RS
344 for (i = 0; i < search_regs.num_regs; i++)
345 if (search_regs.start[i] >= 0)
346 {
347 search_regs.start[i]
348 = BYTE_TO_CHAR (search_regs.start[i] + BEGV_BYTE);
349 search_regs.end[i]
350 = BYTE_TO_CHAR (search_regs.end[i] + BEGV_BYTE);
351 }
ef887810
RS
352
353 /* Set last_thing_searched only when match data is changed. */
354 if (NILP (Vinhibit_changing_match_data))
355 XSETBUFFER (last_thing_searched, current_buffer);
356
ca1d1d23
JB
357 return val;
358}
359
b819a390 360DEFUN ("looking-at", Flooking_at, Slooking_at, 1, 1, 0,
8c1a1077
PJ
361 doc: /* Return t if text after point matches regular expression REGEXP.
362This function modifies the match data that `match-beginning',
363`match-end' and `match-data' access; save and restore the match
364data if you want to preserve them. */)
365 (regexp)
94f94972 366 Lisp_Object regexp;
b819a390 367{
94f94972 368 return looking_at_1 (regexp, 0);
b819a390
RS
369}
370
371DEFUN ("posix-looking-at", Fposix_looking_at, Sposix_looking_at, 1, 1, 0,
8c1a1077
PJ
372 doc: /* Return t if text after point matches regular expression REGEXP.
373Find the longest match, in accord with Posix regular expression rules.
374This function modifies the match data that `match-beginning',
375`match-end' and `match-data' access; save and restore the match
376data if you want to preserve them. */)
377 (regexp)
94f94972 378 Lisp_Object regexp;
b819a390 379{
94f94972 380 return looking_at_1 (regexp, 1);
b819a390
RS
381}
382\f
383static Lisp_Object
971de7fb 384string_match_1 (Lisp_Object regexp, Lisp_Object string, Lisp_Object start, int posix)
ca1d1d23
JB
385{
386 int val;
487282dc 387 struct re_pattern_buffer *bufp;
e7deaab0 388 EMACS_INT pos, pos_byte;
0c8533c6 389 int i;
ca1d1d23 390
7074fde6
FP
391 if (running_asynch_code)
392 save_search_regs ();
393
b7826503
PJ
394 CHECK_STRING (regexp);
395 CHECK_STRING (string);
ca1d1d23
JB
396
397 if (NILP (start))
0c8533c6 398 pos = 0, pos_byte = 0;
ca1d1d23
JB
399 else
400 {
d5db4077 401 int len = SCHARS (string);
ca1d1d23 402
b7826503 403 CHECK_NUMBER (start);
0c8533c6
RS
404 pos = XINT (start);
405 if (pos < 0 && -pos <= len)
406 pos = len + pos;
407 else if (0 > pos || pos > len)
ca1d1d23 408 args_out_of_range (string, start);
0c8533c6 409 pos_byte = string_char_to_byte (string, pos);
ca1d1d23
JB
410 }
411
910c747a
RS
412 /* This is so set_image_of_range_1 in regex.c can find the EQV table. */
413 XCHAR_TABLE (current_buffer->case_canon_table)->extras[2]
414 = current_buffer->case_eqv_table;
415
ef887810
RS
416 bufp = compile_pattern (regexp,
417 (NILP (Vinhibit_changing_match_data)
418 ? &search_regs : NULL),
487282dc 419 (!NILP (current_buffer->case_fold_search)
0190922f 420 ? current_buffer->case_canon_table : Qnil),
0c8533c6
RS
421 posix,
422 STRING_MULTIBYTE (string));
ca1d1d23 423 immediate_quit = 1;
8bb43c28 424 re_match_object = string;
177c0ea7 425
d5db4077
KR
426 val = re_search (bufp, (char *) SDATA (string),
427 SBYTES (string), pos_byte,
428 SBYTES (string) - pos_byte,
ef887810
RS
429 (NILP (Vinhibit_changing_match_data)
430 ? &search_regs : NULL));
ca1d1d23 431 immediate_quit = 0;
ef887810
RS
432
433 /* Set last_thing_searched only when match data is changed. */
434 if (NILP (Vinhibit_changing_match_data))
435 last_thing_searched = Qt;
436
ca1d1d23
JB
437 if (val == -2)
438 matcher_overflow ();
439 if (val < 0) return Qnil;
0c8533c6 440
ef887810
RS
441 if (NILP (Vinhibit_changing_match_data))
442 for (i = 0; i < search_regs.num_regs; i++)
443 if (search_regs.start[i] >= 0)
444 {
445 search_regs.start[i]
446 = string_byte_to_char (string, search_regs.start[i]);
447 search_regs.end[i]
448 = string_byte_to_char (string, search_regs.end[i]);
449 }
0c8533c6
RS
450
451 return make_number (string_byte_to_char (string, val));
ca1d1d23 452}
e59a8453 453
b819a390 454DEFUN ("string-match", Fstring_match, Sstring_match, 2, 3, 0,
8c1a1077 455 doc: /* Return index of start of first match for REGEXP in STRING, or nil.
b85acc4b 456Matching ignores case if `case-fold-search' is non-nil.
8c1a1077
PJ
457If third arg START is non-nil, start search at that index in STRING.
458For index of first char beyond the match, do (match-end 0).
459`match-end' and `match-beginning' also give indices of substrings
2bd2f32d
RS
460matched by parenthesis constructs in the pattern.
461
462You can use the function `match-string' to extract the substrings
463matched by the parenthesis constructions in REGEXP. */)
8c1a1077 464 (regexp, string, start)
b819a390
RS
465 Lisp_Object regexp, string, start;
466{
467 return string_match_1 (regexp, string, start, 0);
468}
469
470DEFUN ("posix-string-match", Fposix_string_match, Sposix_string_match, 2, 3, 0,
8c1a1077
PJ
471 doc: /* Return index of start of first match for REGEXP in STRING, or nil.
472Find the longest match, in accord with Posix regular expression rules.
473Case is ignored if `case-fold-search' is non-nil in the current buffer.
474If third arg START is non-nil, start search at that index in STRING.
475For index of first char beyond the match, do (match-end 0).
476`match-end' and `match-beginning' also give indices of substrings
477matched by parenthesis constructs in the pattern. */)
478 (regexp, string, start)
b819a390
RS
479 Lisp_Object regexp, string, start;
480{
481 return string_match_1 (regexp, string, start, 1);
482}
483
e59a8453
RS
484/* Match REGEXP against STRING, searching all of STRING,
485 and return the index of the match, or negative on failure.
486 This does not clobber the match data. */
487
488int
971de7fb 489fast_string_match (Lisp_Object regexp, Lisp_Object string)
e59a8453
RS
490{
491 int val;
487282dc 492 struct re_pattern_buffer *bufp;
e59a8453 493
facdc750
RS
494 bufp = compile_pattern (regexp, 0, Qnil,
495 0, STRING_MULTIBYTE (string));
e59a8453 496 immediate_quit = 1;
8bb43c28 497 re_match_object = string;
177c0ea7 498
d5db4077
KR
499 val = re_search (bufp, (char *) SDATA (string),
500 SBYTES (string), 0,
501 SBYTES (string), 0);
e59a8453
RS
502 immediate_quit = 0;
503 return val;
504}
5679531d
KH
505
506/* Match REGEXP against STRING, searching all of STRING ignoring case,
507 and return the index of the match, or negative on failure.
0c8533c6
RS
508 This does not clobber the match data.
509 We assume that STRING contains single-byte characters. */
5679531d
KH
510
511extern Lisp_Object Vascii_downcase_table;
512
513int
971de7fb 514fast_c_string_match_ignore_case (Lisp_Object regexp, const char *string)
5679531d
KH
515{
516 int val;
517 struct re_pattern_buffer *bufp;
518 int len = strlen (string);
519
0c8533c6 520 regexp = string_make_unibyte (regexp);
b4577c63 521 re_match_object = Qt;
5679531d 522 bufp = compile_pattern (regexp, 0,
0190922f 523 Vascii_canon_table, 0,
f8bd51c4 524 0);
5679531d
KH
525 immediate_quit = 1;
526 val = re_search (bufp, string, len, 0, len, 0);
527 immediate_quit = 0;
528 return val;
529}
be5f4dfb
KH
530
531/* Like fast_string_match but ignore case. */
532
533int
971de7fb 534fast_string_match_ignore_case (Lisp_Object regexp, Lisp_Object string)
be5f4dfb
KH
535{
536 int val;
537 struct re_pattern_buffer *bufp;
538
0190922f 539 bufp = compile_pattern (regexp, 0, Vascii_canon_table,
be5f4dfb
KH
540 0, STRING_MULTIBYTE (string));
541 immediate_quit = 1;
542 re_match_object = string;
543
544 val = re_search (bufp, (char *) SDATA (string),
545 SBYTES (string), 0,
546 SBYTES (string), 0);
547 immediate_quit = 0;
548 return val;
549}
ca1d1d23 550\f
a80ce213 551/* Match REGEXP against the characters after POS to LIMIT, and return
8b13507a
KH
552 the number of matched characters. If STRING is non-nil, match
553 against the characters in it. In that case, POS and LIMIT are
554 indices into the string. This function doesn't modify the match
555 data. */
556
557EMACS_INT
971de7fb 558fast_looking_at (Lisp_Object regexp, EMACS_INT pos, EMACS_INT pos_byte, EMACS_INT limit, EMACS_INT limit_byte, Lisp_Object string)
8b13507a
KH
559{
560 int multibyte;
561 struct re_pattern_buffer *buf;
562 unsigned char *p1, *p2;
e7deaab0 563 EMACS_INT s1, s2;
8b13507a 564 EMACS_INT len;
78edd3b7 565
8b13507a
KH
566 if (STRINGP (string))
567 {
568 if (pos_byte < 0)
569 pos_byte = string_char_to_byte (string, pos);
570 if (limit_byte < 0)
571 limit_byte = string_char_to_byte (string, limit);
572 p1 = NULL;
573 s1 = 0;
574 p2 = SDATA (string);
575 s2 = SBYTES (string);
576 re_match_object = string;
577 multibyte = STRING_MULTIBYTE (string);
578 }
579 else
580 {
581 if (pos_byte < 0)
582 pos_byte = CHAR_TO_BYTE (pos);
583 if (limit_byte < 0)
584 limit_byte = CHAR_TO_BYTE (limit);
585 pos_byte -= BEGV_BYTE;
586 limit_byte -= BEGV_BYTE;
587 p1 = BEGV_ADDR;
588 s1 = GPT_BYTE - BEGV_BYTE;
589 p2 = GAP_END_ADDR;
590 s2 = ZV_BYTE - GPT_BYTE;
591 if (s1 < 0)
592 {
593 p2 = p1;
594 s2 = ZV_BYTE - BEGV_BYTE;
595 s1 = 0;
596 }
597 if (s2 < 0)
598 {
599 s1 = ZV_BYTE - BEGV_BYTE;
600 s2 = 0;
601 }
602 re_match_object = Qnil;
603 multibyte = ! NILP (current_buffer->enable_multibyte_characters);
604 }
605
606 buf = compile_pattern (regexp, 0, Qnil, 0, multibyte);
607 immediate_quit = 1;
608 len = re_match_2 (buf, (char *) p1, s1, (char *) p2, s2,
609 pos_byte, NULL, limit_byte);
610 immediate_quit = 0;
611
612 return len;
613}
614
615\f
9169c321
JB
616/* The newline cache: remembering which sections of text have no newlines. */
617
618/* If the user has requested newline caching, make sure it's on.
619 Otherwise, make sure it's off.
620 This is our cheezy way of associating an action with the change of
621 state of a buffer-local variable. */
622static void
971de7fb 623newline_cache_on_off (struct buffer *buf)
9169c321
JB
624{
625 if (NILP (buf->cache_long_line_scans))
626 {
627 /* It should be off. */
628 if (buf->newline_cache)
629 {
630 free_region_cache (buf->newline_cache);
631 buf->newline_cache = 0;
632 }
633 }
634 else
635 {
636 /* It should be on. */
637 if (buf->newline_cache == 0)
638 buf->newline_cache = new_region_cache ();
639 }
640}
641
642\f
643/* Search for COUNT instances of the character TARGET between START and END.
644
645 If COUNT is positive, search forwards; END must be >= START.
646 If COUNT is negative, search backwards for the -COUNTth instance;
647 END must be <= START.
648 If COUNT is zero, do anything you please; run rogue, for all I care.
649
650 If END is zero, use BEGV or ZV instead, as appropriate for the
651 direction indicated by COUNT.
ffd56f97
JB
652
653 If we find COUNT instances, set *SHORTAGE to zero, and return the
a9f2a45f 654 position past the COUNTth match. Note that for reverse motion
5bfe95c9 655 this is not the same as the usual convention for Emacs motion commands.
ffd56f97 656
9169c321
JB
657 If we don't find COUNT instances before reaching END, set *SHORTAGE
658 to the number of TARGETs left unfound, and return END.
ffd56f97 659
087a5f81
RS
660 If ALLOW_QUIT is non-zero, set immediate_quit. That's good to do
661 except when inside redisplay. */
662
dfcf069d 663int
971de7fb 664scan_buffer (register int target, EMACS_INT start, EMACS_INT end, int count, int *shortage, int allow_quit)
ca1d1d23 665{
9169c321 666 struct region_cache *newline_cache;
177c0ea7 667 int direction;
ffd56f97 668
9169c321
JB
669 if (count > 0)
670 {
671 direction = 1;
672 if (! end) end = ZV;
673 }
674 else
675 {
676 direction = -1;
677 if (! end) end = BEGV;
678 }
ffd56f97 679
9169c321
JB
680 newline_cache_on_off (current_buffer);
681 newline_cache = current_buffer->newline_cache;
ca1d1d23
JB
682
683 if (shortage != 0)
684 *shortage = 0;
685
087a5f81 686 immediate_quit = allow_quit;
ca1d1d23 687
ffd56f97 688 if (count > 0)
9169c321 689 while (start != end)
ca1d1d23 690 {
9169c321
JB
691 /* Our innermost scanning loop is very simple; it doesn't know
692 about gaps, buffer ends, or the newline cache. ceiling is
693 the position of the last character before the next such
694 obstacle --- the last character the dumb search loop should
695 examine. */
e7deaab0
AS
696 EMACS_INT ceiling_byte = CHAR_TO_BYTE (end) - 1;
697 EMACS_INT start_byte = CHAR_TO_BYTE (start);
698 EMACS_INT tem;
9169c321
JB
699
700 /* If we're looking for a newline, consult the newline cache
701 to see where we can avoid some scanning. */
702 if (target == '\n' && newline_cache)
703 {
704 int next_change;
705 immediate_quit = 0;
706 while (region_cache_forward
fa8ed3e0
RS
707 (current_buffer, newline_cache, start_byte, &next_change))
708 start_byte = next_change;
cbe0db0d 709 immediate_quit = allow_quit;
9169c321 710
fa8ed3e0
RS
711 /* START should never be after END. */
712 if (start_byte > ceiling_byte)
713 start_byte = ceiling_byte;
9169c321
JB
714
715 /* Now the text after start is an unknown region, and
716 next_change is the position of the next known region. */
fa8ed3e0 717 ceiling_byte = min (next_change - 1, ceiling_byte);
9169c321
JB
718 }
719
720 /* The dumb loop can only scan text stored in contiguous
721 bytes. BUFFER_CEILING_OF returns the last character
722 position that is contiguous, so the ceiling is the
723 position after that. */
67ce527d
KH
724 tem = BUFFER_CEILING_OF (start_byte);
725 ceiling_byte = min (tem, ceiling_byte);
9169c321
JB
726
727 {
177c0ea7 728 /* The termination address of the dumb loop. */
fa8ed3e0
RS
729 register unsigned char *ceiling_addr
730 = BYTE_POS_ADDR (ceiling_byte) + 1;
731 register unsigned char *cursor
732 = BYTE_POS_ADDR (start_byte);
9169c321
JB
733 unsigned char *base = cursor;
734
735 while (cursor < ceiling_addr)
736 {
737 unsigned char *scan_start = cursor;
738
739 /* The dumb loop. */
740 while (*cursor != target && ++cursor < ceiling_addr)
741 ;
742
743 /* If we're looking for newlines, cache the fact that
744 the region from start to cursor is free of them. */
745 if (target == '\n' && newline_cache)
746 know_region_cache (current_buffer, newline_cache,
fa8ed3e0
RS
747 start_byte + scan_start - base,
748 start_byte + cursor - base);
9169c321
JB
749
750 /* Did we find the target character? */
751 if (cursor < ceiling_addr)
752 {
753 if (--count == 0)
754 {
755 immediate_quit = 0;
fa8ed3e0 756 return BYTE_TO_CHAR (start_byte + cursor - base + 1);
9169c321
JB
757 }
758 cursor++;
759 }
760 }
761
fa8ed3e0 762 start = BYTE_TO_CHAR (start_byte + cursor - base);
9169c321 763 }
ca1d1d23
JB
764 }
765 else
9169c321
JB
766 while (start > end)
767 {
768 /* The last character to check before the next obstacle. */
e7deaab0
AS
769 EMACS_INT ceiling_byte = CHAR_TO_BYTE (end);
770 EMACS_INT start_byte = CHAR_TO_BYTE (start);
771 EMACS_INT tem;
9169c321
JB
772
773 /* Consult the newline cache, if appropriate. */
774 if (target == '\n' && newline_cache)
775 {
776 int next_change;
777 immediate_quit = 0;
778 while (region_cache_backward
fa8ed3e0
RS
779 (current_buffer, newline_cache, start_byte, &next_change))
780 start_byte = next_change;
cbe0db0d 781 immediate_quit = allow_quit;
9169c321
JB
782
783 /* Start should never be at or before end. */
fa8ed3e0
RS
784 if (start_byte <= ceiling_byte)
785 start_byte = ceiling_byte + 1;
9169c321
JB
786
787 /* Now the text before start is an unknown region, and
788 next_change is the position of the next known region. */
fa8ed3e0 789 ceiling_byte = max (next_change, ceiling_byte);
9169c321
JB
790 }
791
792 /* Stop scanning before the gap. */
67ce527d
KH
793 tem = BUFFER_FLOOR_OF (start_byte - 1);
794 ceiling_byte = max (tem, ceiling_byte);
9169c321
JB
795
796 {
797 /* The termination address of the dumb loop. */
fa8ed3e0
RS
798 register unsigned char *ceiling_addr = BYTE_POS_ADDR (ceiling_byte);
799 register unsigned char *cursor = BYTE_POS_ADDR (start_byte - 1);
9169c321
JB
800 unsigned char *base = cursor;
801
802 while (cursor >= ceiling_addr)
803 {
804 unsigned char *scan_start = cursor;
805
806 while (*cursor != target && --cursor >= ceiling_addr)
807 ;
808
809 /* If we're looking for newlines, cache the fact that
810 the region from after the cursor to start is free of them. */
811 if (target == '\n' && newline_cache)
812 know_region_cache (current_buffer, newline_cache,
fa8ed3e0
RS
813 start_byte + cursor - base,
814 start_byte + scan_start - base);
9169c321
JB
815
816 /* Did we find the target character? */
817 if (cursor >= ceiling_addr)
818 {
819 if (++count >= 0)
820 {
821 immediate_quit = 0;
fa8ed3e0 822 return BYTE_TO_CHAR (start_byte + cursor - base);
9169c321
JB
823 }
824 cursor--;
825 }
826 }
827
fa8ed3e0 828 start = BYTE_TO_CHAR (start_byte + cursor - base);
9169c321
JB
829 }
830 }
831
ca1d1d23
JB
832 immediate_quit = 0;
833 if (shortage != 0)
ffd56f97 834 *shortage = count * direction;
9169c321 835 return start;
ca1d1d23 836}
fa8ed3e0
RS
837\f
838/* Search for COUNT instances of a line boundary, which means either a
839 newline or (if selective display enabled) a carriage return.
840 Start at START. If COUNT is negative, search backwards.
841
842 We report the resulting position by calling TEMP_SET_PT_BOTH.
843
844 If we find COUNT instances. we position after (always after,
845 even if scanning backwards) the COUNTth match, and return 0.
846
847 If we don't find COUNT instances before reaching the end of the
848 buffer (or the beginning, if scanning backwards), we return
849 the number of line boundaries left unfound, and position at
850 the limit we bumped up against.
851
852 If ALLOW_QUIT is non-zero, set immediate_quit. That's good to do
d5d57b92 853 except in special cases. */
ca1d1d23 854
63fa018d 855int
971de7fb 856scan_newline (EMACS_INT start, EMACS_INT start_byte, EMACS_INT limit, EMACS_INT limit_byte, register int count, int allow_quit)
63fa018d 857{
fa8ed3e0
RS
858 int direction = ((count > 0) ? 1 : -1);
859
860 register unsigned char *cursor;
861 unsigned char *base;
862
e7deaab0 863 EMACS_INT ceiling;
fa8ed3e0
RS
864 register unsigned char *ceiling_addr;
865
d5d57b92
RS
866 int old_immediate_quit = immediate_quit;
867
fa8ed3e0
RS
868 /* The code that follows is like scan_buffer
869 but checks for either newline or carriage return. */
870
d5d57b92
RS
871 if (allow_quit)
872 immediate_quit++;
fa8ed3e0
RS
873
874 start_byte = CHAR_TO_BYTE (start);
875
876 if (count > 0)
877 {
878 while (start_byte < limit_byte)
879 {
880 ceiling = BUFFER_CEILING_OF (start_byte);
881 ceiling = min (limit_byte - 1, ceiling);
882 ceiling_addr = BYTE_POS_ADDR (ceiling) + 1;
883 base = (cursor = BYTE_POS_ADDR (start_byte));
884 while (1)
885 {
886 while (*cursor != '\n' && ++cursor != ceiling_addr)
887 ;
888
889 if (cursor != ceiling_addr)
890 {
891 if (--count == 0)
892 {
d5d57b92 893 immediate_quit = old_immediate_quit;
fa8ed3e0
RS
894 start_byte = start_byte + cursor - base + 1;
895 start = BYTE_TO_CHAR (start_byte);
896 TEMP_SET_PT_BOTH (start, start_byte);
897 return 0;
898 }
899 else
900 if (++cursor == ceiling_addr)
901 break;
902 }
903 else
904 break;
905 }
906 start_byte += cursor - base;
907 }
908 }
909 else
910 {
fa8ed3e0
RS
911 while (start_byte > limit_byte)
912 {
913 ceiling = BUFFER_FLOOR_OF (start_byte - 1);
914 ceiling = max (limit_byte, ceiling);
915 ceiling_addr = BYTE_POS_ADDR (ceiling) - 1;
916 base = (cursor = BYTE_POS_ADDR (start_byte - 1) + 1);
917 while (1)
918 {
919 while (--cursor != ceiling_addr && *cursor != '\n')
920 ;
921
922 if (cursor != ceiling_addr)
923 {
924 if (++count == 0)
925 {
d5d57b92 926 immediate_quit = old_immediate_quit;
fa8ed3e0
RS
927 /* Return the position AFTER the match we found. */
928 start_byte = start_byte + cursor - base + 1;
929 start = BYTE_TO_CHAR (start_byte);
930 TEMP_SET_PT_BOTH (start, start_byte);
931 return 0;
932 }
933 }
934 else
935 break;
936 }
937 /* Here we add 1 to compensate for the last decrement
938 of CURSOR, which took it past the valid range. */
939 start_byte += cursor - base + 1;
940 }
941 }
942
943 TEMP_SET_PT_BOTH (limit, limit_byte);
d5d57b92 944 immediate_quit = old_immediate_quit;
fa8ed3e0
RS
945
946 return count * direction;
63fa018d
RS
947}
948
ca1d1d23 949int
971de7fb 950find_next_newline_no_quit (EMACS_INT from, int cnt)
ca1d1d23 951{
fa8ed3e0 952 return scan_buffer ('\n', from, 0, cnt, (int *) 0, 0);
9169c321
JB
953}
954
9169c321
JB
955/* Like find_next_newline, but returns position before the newline,
956 not after, and only search up to TO. This isn't just
957 find_next_newline (...)-1, because you might hit TO. */
fa8ed3e0 958
9169c321 959int
971de7fb 960find_before_next_newline (EMACS_INT from, EMACS_INT to, int cnt)
9169c321
JB
961{
962 int shortage;
963 int pos = scan_buffer ('\n', from, to, cnt, &shortage, 1);
964
965 if (shortage == 0)
966 pos--;
177c0ea7 967
9169c321 968 return pos;
ca1d1d23
JB
969}
970\f
ca1d1d23
JB
971/* Subroutines of Lisp buffer search functions. */
972
973static Lisp_Object
971de7fb 974search_command (Lisp_Object string, Lisp_Object bound, Lisp_Object noerror, Lisp_Object count, int direction, int RE, int posix)
ca1d1d23
JB
975{
976 register int np;
9f43ad85 977 int lim, lim_byte;
ca1d1d23
JB
978 int n = direction;
979
980 if (!NILP (count))
981 {
b7826503 982 CHECK_NUMBER (count);
ca1d1d23
JB
983 n *= XINT (count);
984 }
985
b7826503 986 CHECK_STRING (string);
ca1d1d23 987 if (NILP (bound))
9f43ad85
RS
988 {
989 if (n > 0)
990 lim = ZV, lim_byte = ZV_BYTE;
991 else
992 lim = BEGV, lim_byte = BEGV_BYTE;
993 }
ca1d1d23
JB
994 else
995 {
b7826503 996 CHECK_NUMBER_COERCE_MARKER (bound);
ca1d1d23 997 lim = XINT (bound);
6ec8bbd2 998 if (n > 0 ? lim < PT : lim > PT)
ca1d1d23
JB
999 error ("Invalid search bound (wrong side of point)");
1000 if (lim > ZV)
9f43ad85 1001 lim = ZV, lim_byte = ZV_BYTE;
588d2fd5 1002 else if (lim < BEGV)
9f43ad85 1003 lim = BEGV, lim_byte = BEGV_BYTE;
588d2fd5
KH
1004 else
1005 lim_byte = CHAR_TO_BYTE (lim);
ca1d1d23
JB
1006 }
1007
910c747a
RS
1008 /* This is so set_image_of_range_1 in regex.c can find the EQV table. */
1009 XCHAR_TABLE (current_buffer->case_canon_table)->extras[2]
1010 = current_buffer->case_eqv_table;
1011
9f43ad85 1012 np = search_buffer (string, PT, PT_BYTE, lim, lim_byte, n, RE,
ca1d1d23 1013 (!NILP (current_buffer->case_fold_search)
facdc750 1014 ? current_buffer->case_canon_table
3135e9fd 1015 : Qnil),
ca1d1d23 1016 (!NILP (current_buffer->case_fold_search)
facdc750 1017 ? current_buffer->case_eqv_table
3135e9fd 1018 : Qnil),
b819a390 1019 posix);
ca1d1d23
JB
1020 if (np <= 0)
1021 {
1022 if (NILP (noerror))
06f77a8a
KS
1023 xsignal1 (Qsearch_failed, string);
1024
ca1d1d23
JB
1025 if (!EQ (noerror, Qt))
1026 {
1027 if (lim < BEGV || lim > ZV)
1028 abort ();
9f43ad85 1029 SET_PT_BOTH (lim, lim_byte);
a5f217b8
RS
1030 return Qnil;
1031#if 0 /* This would be clean, but maybe programs depend on
1032 a value of nil here. */
481399bf 1033 np = lim;
a5f217b8 1034#endif
ca1d1d23 1035 }
481399bf
RS
1036 else
1037 return Qnil;
ca1d1d23
JB
1038 }
1039
1040 if (np < BEGV || np > ZV)
1041 abort ();
1042
1043 SET_PT (np);
1044
1045 return make_number (np);
1046}
1047\f
fa8ed3e0
RS
1048/* Return 1 if REGEXP it matches just one constant string. */
1049
b6d6a51c 1050static int
971de7fb 1051trivial_regexp_p (Lisp_Object regexp)
b6d6a51c 1052{
d5db4077
KR
1053 int len = SBYTES (regexp);
1054 unsigned char *s = SDATA (regexp);
b6d6a51c
KH
1055 while (--len >= 0)
1056 {
1057 switch (*s++)
1058 {
1059 case '.': case '*': case '+': case '?': case '[': case '^': case '$':
1060 return 0;
1061 case '\\':
1062 if (--len < 0)
1063 return 0;
1064 switch (*s++)
1065 {
1066 case '|': case '(': case ')': case '`': case '\'': case 'b':
1067 case 'B': case '<': case '>': case 'w': case 'W': case 's':
29f89fe7 1068 case 'S': case '=': case '{': case '}': case '_':
5679531d 1069 case 'c': case 'C': /* for categoryspec and notcategoryspec */
866f60fd 1070 case '1': case '2': case '3': case '4': case '5':
b6d6a51c
KH
1071 case '6': case '7': case '8': case '9':
1072 return 0;
1073 }
1074 }
1075 }
1076 return 1;
1077}
1078
ca325161 1079/* Search for the n'th occurrence of STRING in the current buffer,
ca1d1d23 1080 starting at position POS and stopping at position LIM,
b819a390 1081 treating STRING as a literal string if RE is false or as
ca1d1d23
JB
1082 a regular expression if RE is true.
1083
1084 If N is positive, searching is forward and LIM must be greater than POS.
1085 If N is negative, searching is backward and LIM must be less than POS.
1086
facdc750 1087 Returns -x if x occurrences remain to be found (x > 0),
ca1d1d23 1088 or else the position at the beginning of the Nth occurrence
b819a390
RS
1089 (if searching backward) or the end (if searching forward).
1090
1091 POSIX is nonzero if we want full backtracking (POSIX style)
1092 for this pattern. 0 means backtrack only enough to get a valid match. */
ca1d1d23 1093
aff2ce94
RS
1094#define TRANSLATE(out, trt, d) \
1095do \
1096 { \
1097 if (! NILP (trt)) \
1098 { \
1099 Lisp_Object temp; \
1100 temp = Faref (trt, make_number (d)); \
1101 if (INTEGERP (temp)) \
1102 out = XINT (temp); \
1103 else \
1104 out = d; \
1105 } \
1106 else \
1107 out = d; \
1108 } \
1109while (0)
facdc750 1110
ef887810
RS
1111/* Only used in search_buffer, to record the end position of the match
1112 when searching regexps and SEARCH_REGS should not be changed
1113 (i.e. Vinhibit_changing_match_data is non-nil). */
1114static struct re_registers search_regs_1;
1115
e7deaab0 1116static EMACS_INT
9f43ad85
RS
1117search_buffer (string, pos, pos_byte, lim, lim_byte, n,
1118 RE, trt, inverse_trt, posix)
ca1d1d23 1119 Lisp_Object string;
e7deaab0
AS
1120 EMACS_INT pos;
1121 EMACS_INT pos_byte;
1122 EMACS_INT lim;
1123 EMACS_INT lim_byte;
ca1d1d23
JB
1124 int n;
1125 int RE;
facdc750
RS
1126 Lisp_Object trt;
1127 Lisp_Object inverse_trt;
b819a390 1128 int posix;
ca1d1d23 1129{
d5db4077
KR
1130 int len = SCHARS (string);
1131 int len_byte = SBYTES (string);
facdc750 1132 register int i;
ca1d1d23 1133
7074fde6
FP
1134 if (running_asynch_code)
1135 save_search_regs ();
1136
a7e4cdde 1137 /* Searching 0 times means don't move. */
ca1d1d23 1138 /* Null string is found at starting position. */
a7e4cdde 1139 if (len == 0 || n == 0)
ca325161 1140 {
0353b28f 1141 set_search_regs (pos_byte, 0);
ca325161
RS
1142 return pos;
1143 }
3f57a499 1144
41a33295 1145 if (RE && !(trivial_regexp_p (string) && NILP (Vsearch_spaces_regexp)))
ca1d1d23 1146 {
facdc750
RS
1147 unsigned char *p1, *p2;
1148 int s1, s2;
487282dc
KH
1149 struct re_pattern_buffer *bufp;
1150
ef887810
RS
1151 bufp = compile_pattern (string,
1152 (NILP (Vinhibit_changing_match_data)
1153 ? &search_regs : &search_regs_1),
1154 trt, posix,
0c8533c6 1155 !NILP (current_buffer->enable_multibyte_characters));
ca1d1d23 1156
ca1d1d23
JB
1157 immediate_quit = 1; /* Quit immediately if user types ^G,
1158 because letting this function finish
1159 can take too long. */
1160 QUIT; /* Do a pending quit right away,
1161 to avoid paradoxical behavior */
1162 /* Get pointers and sizes of the two strings
1163 that make up the visible portion of the buffer. */
1164
1165 p1 = BEGV_ADDR;
fa8ed3e0 1166 s1 = GPT_BYTE - BEGV_BYTE;
ca1d1d23 1167 p2 = GAP_END_ADDR;
fa8ed3e0 1168 s2 = ZV_BYTE - GPT_BYTE;
ca1d1d23
JB
1169 if (s1 < 0)
1170 {
1171 p2 = p1;
fa8ed3e0 1172 s2 = ZV_BYTE - BEGV_BYTE;
ca1d1d23
JB
1173 s1 = 0;
1174 }
1175 if (s2 < 0)
1176 {
fa8ed3e0 1177 s1 = ZV_BYTE - BEGV_BYTE;
ca1d1d23
JB
1178 s2 = 0;
1179 }
8bb43c28 1180 re_match_object = Qnil;
177c0ea7 1181
ca1d1d23
JB
1182 while (n < 0)
1183 {
42db823b 1184 int val;
487282dc 1185 val = re_search_2 (bufp, (char *) p1, s1, (char *) p2, s2,
4996330b 1186 pos_byte - BEGV_BYTE, lim_byte - pos_byte,
ef887810
RS
1187 (NILP (Vinhibit_changing_match_data)
1188 ? &search_regs : &search_regs_1),
42db823b 1189 /* Don't allow match past current point */
4996330b 1190 pos_byte - BEGV_BYTE);
ca1d1d23 1191 if (val == -2)
b6d6a51c
KH
1192 {
1193 matcher_overflow ();
1194 }
ca1d1d23
JB
1195 if (val >= 0)
1196 {
ef887810
RS
1197 if (NILP (Vinhibit_changing_match_data))
1198 {
1199 pos_byte = search_regs.start[0] + BEGV_BYTE;
1200 for (i = 0; i < search_regs.num_regs; i++)
1201 if (search_regs.start[i] >= 0)
1202 {
1203 search_regs.start[i]
1204 = BYTE_TO_CHAR (search_regs.start[i] + BEGV_BYTE);
1205 search_regs.end[i]
1206 = BYTE_TO_CHAR (search_regs.end[i] + BEGV_BYTE);
1207 }
1208 XSETBUFFER (last_thing_searched, current_buffer);
1209 /* Set pos to the new position. */
1210 pos = search_regs.start[0];
1211 }
1212 else
1213 {
1214 pos_byte = search_regs_1.start[0] + BEGV_BYTE;
1215 /* Set pos to the new position. */
1216 pos = BYTE_TO_CHAR (search_regs_1.start[0] + BEGV_BYTE);
1217 }
ca1d1d23
JB
1218 }
1219 else
1220 {
1221 immediate_quit = 0;
1222 return (n);
1223 }
1224 n++;
1225 }
1226 while (n > 0)
1227 {
42db823b 1228 int val;
487282dc 1229 val = re_search_2 (bufp, (char *) p1, s1, (char *) p2, s2,
4996330b 1230 pos_byte - BEGV_BYTE, lim_byte - pos_byte,
ef887810
RS
1231 (NILP (Vinhibit_changing_match_data)
1232 ? &search_regs : &search_regs_1),
4996330b 1233 lim_byte - BEGV_BYTE);
ca1d1d23 1234 if (val == -2)
b6d6a51c
KH
1235 {
1236 matcher_overflow ();
1237 }
ca1d1d23
JB
1238 if (val >= 0)
1239 {
ef887810
RS
1240 if (NILP (Vinhibit_changing_match_data))
1241 {
1242 pos_byte = search_regs.end[0] + BEGV_BYTE;
1243 for (i = 0; i < search_regs.num_regs; i++)
1244 if (search_regs.start[i] >= 0)
1245 {
1246 search_regs.start[i]
1247 = BYTE_TO_CHAR (search_regs.start[i] + BEGV_BYTE);
1248 search_regs.end[i]
1249 = BYTE_TO_CHAR (search_regs.end[i] + BEGV_BYTE);
1250 }
1251 XSETBUFFER (last_thing_searched, current_buffer);
1252 pos = search_regs.end[0];
1253 }
1254 else
1255 {
1256 pos_byte = search_regs_1.end[0] + BEGV_BYTE;
1257 pos = BYTE_TO_CHAR (search_regs_1.end[0] + BEGV_BYTE);
1258 }
ca1d1d23
JB
1259 }
1260 else
1261 {
1262 immediate_quit = 0;
1263 return (0 - n);
1264 }
1265 n--;
1266 }
1267 immediate_quit = 0;
1268 return (pos);
1269 }
1270 else /* non-RE case */
1271 {
facdc750
RS
1272 unsigned char *raw_pattern, *pat;
1273 int raw_pattern_size;
1274 int raw_pattern_size_byte;
1275 unsigned char *patbuf;
1276 int multibyte = !NILP (current_buffer->enable_multibyte_characters);
a967ed62 1277 unsigned char *base_pat;
49e44e81
KH
1278 /* Set to positive if we find a non-ASCII char that need
1279 translation. Otherwise set to zero later. */
1280 int char_base = -1;
040272ce 1281 int boyer_moore_ok = 1;
facdc750
RS
1282
1283 /* MULTIBYTE says whether the text to be searched is multibyte.
1284 We must convert PATTERN to match that, or we will not really
1285 find things right. */
1286
1287 if (multibyte == STRING_MULTIBYTE (string))
1288 {
d5db4077
KR
1289 raw_pattern = (unsigned char *) SDATA (string);
1290 raw_pattern_size = SCHARS (string);
1291 raw_pattern_size_byte = SBYTES (string);
facdc750
RS
1292 }
1293 else if (multibyte)
1294 {
d5db4077 1295 raw_pattern_size = SCHARS (string);
facdc750 1296 raw_pattern_size_byte
d5db4077 1297 = count_size_as_multibyte (SDATA (string),
facdc750 1298 raw_pattern_size);
7276d3d8 1299 raw_pattern = (unsigned char *) alloca (raw_pattern_size_byte + 1);
d5db4077
KR
1300 copy_text (SDATA (string), raw_pattern,
1301 SCHARS (string), 0, 1);
facdc750
RS
1302 }
1303 else
1304 {
1305 /* Converting multibyte to single-byte.
1306
1307 ??? Perhaps this conversion should be done in a special way
1308 by subtracting nonascii-insert-offset from each non-ASCII char,
1309 so that only the multibyte chars which really correspond to
1310 the chosen single-byte character set can possibly match. */
d5db4077
KR
1311 raw_pattern_size = SCHARS (string);
1312 raw_pattern_size_byte = SCHARS (string);
7276d3d8 1313 raw_pattern = (unsigned char *) alloca (raw_pattern_size + 1);
d5db4077
KR
1314 copy_text (SDATA (string), raw_pattern,
1315 SBYTES (string), 1, 0);
facdc750
RS
1316 }
1317
1318 /* Copy and optionally translate the pattern. */
1319 len = raw_pattern_size;
1320 len_byte = raw_pattern_size_byte;
f5f7578b 1321 patbuf = (unsigned char *) alloca (len * MAX_MULTIBYTE_LENGTH);
facdc750
RS
1322 pat = patbuf;
1323 base_pat = raw_pattern;
1324 if (multibyte)
1325 {
a17ae96a
KH
1326 /* Fill patbuf by translated characters in STRING while
1327 checking if we can use boyer-moore search. If TRT is
1328 non-nil, we can use boyer-moore search only if TRT can be
1329 represented by the byte array of 256 elements. For that,
1330 all non-ASCII case-equivalents of all case-senstive
1331 characters in STRING must belong to the same charset and
1332 row. */
1333
facdc750
RS
1334 while (--len >= 0)
1335 {
a17ae96a 1336 unsigned char str_base[MAX_MULTIBYTE_LENGTH], *str;
aff2ce94 1337 int c, translated, inverse;
a17ae96a 1338 int in_charlen, charlen;
facdc750
RS
1339
1340 /* If we got here and the RE flag is set, it's because we're
1341 dealing with a regexp known to be trivial, so the backslash
1342 just quotes the next character. */
1343 if (RE && *base_pat == '\\')
1344 {
1345 len--;
62221f22 1346 raw_pattern_size--;
facdc750
RS
1347 len_byte--;
1348 base_pat++;
1349 }
1350
62a6e103 1351 c = STRING_CHAR_AND_LENGTH (base_pat, in_charlen);
040272ce 1352
a17ae96a 1353 if (NILP (trt))
facdc750 1354 {
a17ae96a
KH
1355 str = base_pat;
1356 charlen = in_charlen;
1357 }
1358 else
1359 {
1360 /* Translate the character. */
1361 TRANSLATE (translated, trt, c);
1362 charlen = CHAR_STRING (translated, str_base);
1363 str = str_base;
1364
1365 /* Check if C has any other case-equivalents. */
1366 TRANSLATE (inverse, inverse_trt, c);
1367 /* If so, check if we can use boyer-moore. */
1368 if (c != inverse && boyer_moore_ok)
1369 {
1370 /* Check if all equivalents belong to the same
1371 group of characters. Note that the check of C
49e44e81
KH
1372 itself is done by the last iteration. */
1373 int this_char_base = -1;
1374
1375 while (boyer_moore_ok)
a17ae96a 1376 {
49e44e81 1377 if (ASCII_BYTE_P (inverse))
a17ae96a 1378 {
49e44e81
KH
1379 if (this_char_base > 0)
1380 boyer_moore_ok = 0;
1381 else
dafaabd1 1382 this_char_base = 0;
a17ae96a 1383 }
49e44e81
KH
1384 else if (CHAR_BYTE8_P (inverse))
1385 /* Boyer-moore search can't handle a
1386 translation of an eight-bit
1387 character. */
1388 boyer_moore_ok = 0;
1389 else if (this_char_base < 0)
1390 {
1391 this_char_base = inverse & ~0x3F;
1392 if (char_base < 0)
1393 char_base = this_char_base;
dafaabd1 1394 else if (this_char_base != char_base)
49e44e81
KH
1395 boyer_moore_ok = 0;
1396 }
1397 else if ((inverse & ~0x3F) != this_char_base)
1398 boyer_moore_ok = 0;
a17ae96a
KH
1399 if (c == inverse)
1400 break;
1401 TRANSLATE (inverse, inverse_trt, inverse);
1402 }
1403 }
aff2ce94 1404 }
facdc750
RS
1405
1406 /* Store this character into the translated pattern. */
a17ae96a
KH
1407 bcopy (str, pat, charlen);
1408 pat += charlen;
facdc750
RS
1409 base_pat += in_charlen;
1410 len_byte -= in_charlen;
1411 }
dafaabd1
AS
1412
1413 /* If char_base is still negative we didn't find any translated
1414 non-ASCII characters. */
1415 if (char_base < 0)
1416 char_base = 0;
facdc750
RS
1417 }
1418 else
1419 {
040272ce 1420 /* Unibyte buffer. */
a17ae96a 1421 char_base = 0;
facdc750
RS
1422 while (--len >= 0)
1423 {
040272ce 1424 int c, translated;
facdc750
RS
1425
1426 /* If we got here and the RE flag is set, it's because we're
1427 dealing with a regexp known to be trivial, so the backslash
1428 just quotes the next character. */
1429 if (RE && *base_pat == '\\')
1430 {
1431 len--;
0190922f 1432 raw_pattern_size--;
facdc750
RS
1433 base_pat++;
1434 }
1435 c = *base_pat++;
aff2ce94 1436 TRANSLATE (translated, trt, c);
facdc750
RS
1437 *pat++ = translated;
1438 }
1439 }
1440
1441 len_byte = pat - patbuf;
1442 len = raw_pattern_size;
1443 pat = base_pat = patbuf;
1444
040272ce 1445 if (boyer_moore_ok)
facdc750 1446 return boyer_moore (n, pat, len, len_byte, trt, inverse_trt,
aff2ce94 1447 pos, pos_byte, lim, lim_byte,
a17ae96a 1448 char_base);
facdc750
RS
1449 else
1450 return simple_search (n, pat, len, len_byte, trt,
1451 pos, pos_byte, lim, lim_byte);
1452 }
1453}
1454\f
1455/* Do a simple string search N times for the string PAT,
1456 whose length is LEN/LEN_BYTE,
1457 from buffer position POS/POS_BYTE until LIM/LIM_BYTE.
1458 TRT is the translation table.
f8bd51c4 1459
facdc750
RS
1460 Return the character position where the match is found.
1461 Otherwise, if M matches remained to be found, return -M.
f8bd51c4 1462
facdc750
RS
1463 This kind of search works regardless of what is in PAT and
1464 regardless of what is in TRT. It is used in cases where
1465 boyer_moore cannot work. */
1466
e7deaab0 1467static EMACS_INT
971de7fb 1468simple_search (int n, unsigned char *pat, int len, int len_byte, Lisp_Object trt, EMACS_INT pos, EMACS_INT pos_byte, EMACS_INT lim, EMACS_INT lim_byte)
facdc750
RS
1469{
1470 int multibyte = ! NILP (current_buffer->enable_multibyte_characters);
ab228c24 1471 int forward = n > 0;
49e44e81
KH
1472 /* Number of buffer bytes matched. Note that this may be different
1473 from len_byte in a multibyte buffer. */
1474 int match_byte;
facdc750
RS
1475
1476 if (lim > pos && multibyte)
1477 while (n > 0)
1478 {
1479 while (1)
f8bd51c4 1480 {
facdc750 1481 /* Try matching at position POS. */
e7deaab0
AS
1482 EMACS_INT this_pos = pos;
1483 EMACS_INT this_pos_byte = pos_byte;
facdc750 1484 int this_len = len;
facdc750 1485 unsigned char *p = pat;
a9469498 1486 if (pos + len > lim || pos_byte + len_byte > lim_byte)
facdc750
RS
1487 goto stop;
1488
1489 while (this_len > 0)
1490 {
1491 int charlen, buf_charlen;
ab228c24 1492 int pat_ch, buf_ch;
facdc750 1493
62a6e103 1494 pat_ch = STRING_CHAR_AND_LENGTH (p, charlen);
facdc750 1495 buf_ch = STRING_CHAR_AND_LENGTH (BYTE_POS_ADDR (this_pos_byte),
facdc750 1496 buf_charlen);
aff2ce94 1497 TRANSLATE (buf_ch, trt, buf_ch);
facdc750
RS
1498
1499 if (buf_ch != pat_ch)
1500 break;
ab228c24 1501
ab228c24
RS
1502 this_len--;
1503 p += charlen;
1504
1505 this_pos_byte += buf_charlen;
1506 this_pos++;
facdc750
RS
1507 }
1508
1509 if (this_len == 0)
1510 {
49e44e81 1511 match_byte = this_pos_byte - pos_byte;
facdc750 1512 pos += len;
49e44e81 1513 pos_byte += match_byte;
facdc750
RS
1514 break;
1515 }
1516
1517 INC_BOTH (pos, pos_byte);
f8bd51c4 1518 }
facdc750
RS
1519
1520 n--;
1521 }
1522 else if (lim > pos)
1523 while (n > 0)
1524 {
1525 while (1)
f8bd51c4 1526 {
facdc750 1527 /* Try matching at position POS. */
e7deaab0 1528 EMACS_INT this_pos = pos;
facdc750
RS
1529 int this_len = len;
1530 unsigned char *p = pat;
1531
1532 if (pos + len > lim)
1533 goto stop;
1534
1535 while (this_len > 0)
1536 {
1537 int pat_ch = *p++;
1538 int buf_ch = FETCH_BYTE (this_pos);
aff2ce94 1539 TRANSLATE (buf_ch, trt, buf_ch);
facdc750
RS
1540
1541 if (buf_ch != pat_ch)
1542 break;
ab228c24
RS
1543
1544 this_len--;
1545 this_pos++;
facdc750
RS
1546 }
1547
1548 if (this_len == 0)
1549 {
49e44e81 1550 match_byte = len;
facdc750
RS
1551 pos += len;
1552 break;
1553 }
1554
1555 pos++;
f8bd51c4 1556 }
facdc750
RS
1557
1558 n--;
1559 }
1560 /* Backwards search. */
1561 else if (lim < pos && multibyte)
1562 while (n < 0)
1563 {
1564 while (1)
f8bd51c4 1565 {
facdc750 1566 /* Try matching at position POS. */
8b264ecb
AS
1567 EMACS_INT this_pos = pos;
1568 EMACS_INT this_pos_byte = pos_byte;
facdc750 1569 int this_len = len;
c525b3f2 1570 const unsigned char *p = pat + len_byte;
facdc750 1571
8b264ecb 1572 if (this_pos - len < lim || (pos_byte - len_byte) < lim_byte)
facdc750
RS
1573 goto stop;
1574
1575 while (this_len > 0)
1576 {
8b264ecb 1577 int charlen;
ab228c24 1578 int pat_ch, buf_ch;
facdc750 1579
8b264ecb
AS
1580 DEC_BOTH (this_pos, this_pos_byte);
1581 PREV_CHAR_BOUNDARY (p, pat);
1582 pat_ch = STRING_CHAR (p);
1583 buf_ch = STRING_CHAR (BYTE_POS_ADDR (this_pos_byte));
aff2ce94 1584 TRANSLATE (buf_ch, trt, buf_ch);
facdc750
RS
1585
1586 if (buf_ch != pat_ch)
1587 break;
ab228c24 1588
ab228c24 1589 this_len--;
facdc750
RS
1590 }
1591
1592 if (this_len == 0)
1593 {
8b264ecb
AS
1594 match_byte = pos_byte - this_pos_byte;
1595 pos = this_pos;
1596 pos_byte = this_pos_byte;
facdc750
RS
1597 break;
1598 }
1599
1600 DEC_BOTH (pos, pos_byte);
f8bd51c4
KH
1601 }
1602
facdc750
RS
1603 n++;
1604 }
1605 else if (lim < pos)
1606 while (n < 0)
1607 {
1608 while (1)
b6d6a51c 1609 {
facdc750 1610 /* Try matching at position POS. */
e7deaab0 1611 EMACS_INT this_pos = pos - len;
facdc750
RS
1612 int this_len = len;
1613 unsigned char *p = pat;
1614
a9469498 1615 if (this_pos < lim)
facdc750
RS
1616 goto stop;
1617
1618 while (this_len > 0)
1619 {
1620 int pat_ch = *p++;
1621 int buf_ch = FETCH_BYTE (this_pos);
aff2ce94 1622 TRANSLATE (buf_ch, trt, buf_ch);
facdc750
RS
1623
1624 if (buf_ch != pat_ch)
1625 break;
ab228c24
RS
1626 this_len--;
1627 this_pos++;
facdc750
RS
1628 }
1629
1630 if (this_len == 0)
b6d6a51c 1631 {
49e44e81 1632 match_byte = len;
facdc750
RS
1633 pos -= len;
1634 break;
b6d6a51c 1635 }
facdc750
RS
1636
1637 pos--;
b6d6a51c 1638 }
facdc750
RS
1639
1640 n++;
b6d6a51c 1641 }
facdc750
RS
1642
1643 stop:
1644 if (n == 0)
aff2ce94 1645 {
ab228c24 1646 if (forward)
49e44e81 1647 set_search_regs ((multibyte ? pos_byte : pos) - match_byte, match_byte);
ab228c24 1648 else
49e44e81 1649 set_search_regs (multibyte ? pos_byte : pos, match_byte);
aff2ce94
RS
1650
1651 return pos;
1652 }
facdc750
RS
1653 else if (n > 0)
1654 return -n;
1655 else
1656 return n;
1657}
1658\f
0190922f 1659/* Do Boyer-Moore search N times for the string BASE_PAT,
facdc750
RS
1660 whose length is LEN/LEN_BYTE,
1661 from buffer position POS/POS_BYTE until LIM/LIM_BYTE.
1662 DIRECTION says which direction we search in.
1663 TRT and INVERSE_TRT are translation tables.
0190922f 1664 Characters in PAT are already translated by TRT.
facdc750 1665
0190922f
KH
1666 This kind of search works if all the characters in BASE_PAT that
1667 have nontrivial translation are the same aside from the last byte.
1668 This makes it possible to translate just the last byte of a
1669 character, and do so after just a simple test of the context.
b2e6b10f 1670 CHAR_BASE is nonzero if there is such a non-ASCII character.
facdc750
RS
1671
1672 If that criterion is not satisfied, do not call this function. */
1673
e7deaab0 1674static EMACS_INT
facdc750 1675boyer_moore (n, base_pat, len, len_byte, trt, inverse_trt,
a17ae96a 1676 pos, pos_byte, lim, lim_byte, char_base)
facdc750
RS
1677 int n;
1678 unsigned char *base_pat;
1679 int len, len_byte;
1680 Lisp_Object trt;
1681 Lisp_Object inverse_trt;
e7deaab0
AS
1682 EMACS_INT pos, pos_byte;
1683 EMACS_INT lim, lim_byte;
a17ae96a 1684 int char_base;
facdc750
RS
1685{
1686 int direction = ((n > 0) ? 1 : -1);
1687 register int dirlen;
e7deaab0
AS
1688 EMACS_INT limit;
1689 int stride_for_teases = 0;
f4646fff 1690 int BM_tab[0400];
177c0ea7 1691 register unsigned char *cursor, *p_limit;
facdc750 1692 register int i, j;
cb6792d2 1693 unsigned char *pat, *pat_end;
facdc750
RS
1694 int multibyte = ! NILP (current_buffer->enable_multibyte_characters);
1695
1696 unsigned char simple_translate[0400];
0190922f 1697 /* These are set to the preceding bytes of a byte to be translated
49e44e81 1698 if char_base is nonzero. As the maximum byte length of a
a17ae96a 1699 multibyte character is 5, we have to check at most four previous
0190922f
KH
1700 bytes. */
1701 int translate_prev_byte1 = 0;
1702 int translate_prev_byte2 = 0;
1703 int translate_prev_byte3 = 0;
a17ae96a 1704 int translate_prev_byte4 = 0;
facdc750 1705
f4646fff
AS
1706 /* The general approach is that we are going to maintain that we know
1707 the first (closest to the present position, in whatever direction
1708 we're searching) character that could possibly be the last
1709 (furthest from present position) character of a valid match. We
1710 advance the state of our knowledge by looking at that character
1711 and seeing whether it indeed matches the last character of the
1712 pattern. If it does, we take a closer look. If it does not, we
1713 move our pointer (to putative last characters) as far as is
1714 logically possible. This amount of movement, which I call a
1715 stride, will be the length of the pattern if the actual character
1716 appears nowhere in the pattern, otherwise it will be the distance
1717 from the last occurrence of that character to the end of the
1718 pattern. If the amount is zero we have a possible match. */
1719
1720 /* Here we make a "mickey mouse" BM table. The stride of the search
1721 is determined only by the last character of the putative match.
1722 If that character does not match, we will stride the proper
1723 distance to propose a match that superimposes it on the last
1724 instance of a character that matches it (per trt), or misses
1725 it entirely if there is none. */
facdc750
RS
1726
1727 dirlen = len_byte * direction;
cb6792d2
RS
1728
1729 /* Record position after the end of the pattern. */
1730 pat_end = base_pat + len_byte;
1731 /* BASE_PAT points to a character that we start scanning from.
1732 It is the first character in a forward search,
1733 the last character in a backward search. */
facdc750 1734 if (direction < 0)
cb6792d2
RS
1735 base_pat = pat_end - 1;
1736
f4646fff
AS
1737 /* A character that does not appear in the pattern induces a
1738 stride equal to the pattern length. */
1739 for (i = 0; i < 0400; i++)
1740 BM_tab[i] = dirlen;
facdc750
RS
1741
1742 /* We use this for translation, instead of TRT itself.
1743 We fill this in to handle the characters that actually
1744 occur in the pattern. Others don't matter anyway! */
facdc750
RS
1745 for (i = 0; i < 0400; i++)
1746 simple_translate[i] = i;
1747
a17ae96a 1748 if (char_base)
0190922f 1749 {
a17ae96a 1750 /* Setup translate_prev_byte1/2/3/4 from CHAR_BASE. Only a
0190922f 1751 byte following them are the target of translation. */
0190922f 1752 unsigned char str[MAX_MULTIBYTE_LENGTH];
a17ae96a 1753 int len = CHAR_STRING (char_base, str);
0190922f
KH
1754
1755 translate_prev_byte1 = str[len - 2];
1756 if (len > 2)
1757 {
1758 translate_prev_byte2 = str[len - 3];
1759 if (len > 3)
a17ae96a
KH
1760 {
1761 translate_prev_byte3 = str[len - 4];
1762 if (len > 4)
1763 translate_prev_byte4 = str[len - 5];
1764 }
0190922f
KH
1765 }
1766 }
1767
facdc750 1768 i = 0;
f4646fff 1769 while (i != dirlen)
facdc750 1770 {
cb6792d2 1771 unsigned char *ptr = base_pat + i;
facdc750 1772 i += direction;
facdc750 1773 if (! NILP (trt))
ca1d1d23 1774 {
49e44e81
KH
1775 /* If the byte currently looking at is the last of a
1776 character to check case-equivalents, set CH to that
1777 character. An ASCII character and a non-ASCII character
1778 matching with CHAR_BASE are to be checked. */
a17ae96a
KH
1779 int ch = -1;
1780
1781 if (ASCII_BYTE_P (*ptr) || ! multibyte)
1782 ch = *ptr;
49e44e81 1783 else if (char_base
86dc6ccb 1784 && ((pat_end - ptr) == 1 || CHAR_HEAD_P (ptr[1])))
ca1d1d23 1785 {
49e44e81
KH
1786 unsigned char *charstart = ptr - 1;
1787
1788 while (! (CHAR_HEAD_P (*charstart)))
1789 charstart--;
62a6e103 1790 ch = STRING_CHAR (charstart);
a17ae96a
KH
1791 if (char_base != (ch & ~0x3F))
1792 ch = -1;
ca1d1d23 1793 }
facdc750 1794
f51995fa 1795 if (ch >= 0200)
49e44e81
KH
1796 j = (ch & 0x3F) | 0200;
1797 else
1798 j = *ptr;
1799
f4646fff 1800 if (i == dirlen)
facdc750 1801 stride_for_teases = BM_tab[j];
ab228c24 1802
facdc750
RS
1803 BM_tab[j] = dirlen - i;
1804 /* A translation table is accompanied by its inverse -- see */
177c0ea7 1805 /* comment following downcase_table for details */
a17ae96a 1806 if (ch >= 0)
ab228c24
RS
1807 {
1808 int starting_ch = ch;
49e44e81 1809 int starting_j = j;
a17ae96a 1810
ab228c24
RS
1811 while (1)
1812 {
1813 TRANSLATE (ch, inverse_trt, ch);
f51995fa 1814 if (ch >= 0200)
49e44e81 1815 j = (ch & 0x3F) | 0200;
ab228c24 1816 else
a17ae96a 1817 j = ch;
ab228c24
RS
1818
1819 /* For all the characters that map into CH,
1820 set up simple_translate to map the last byte
1821 into STARTING_J. */
1822 simple_translate[j] = starting_j;
1823 if (ch == starting_ch)
1824 break;
1825 BM_tab[j] = dirlen - i;
1826 }
1827 }
facdc750
RS
1828 }
1829 else
1830 {
1831 j = *ptr;
1832
f4646fff 1833 if (i == dirlen)
facdc750
RS
1834 stride_for_teases = BM_tab[j];
1835 BM_tab[j] = dirlen - i;
ca1d1d23 1836 }
f4646fff
AS
1837 /* stride_for_teases tells how much to stride if we get a
1838 match on the far character but are subsequently
1839 disappointed, by recording what the stride would have been
1840 for that character if the last character had been
1841 different. */
facdc750 1842 }
facdc750
RS
1843 pos_byte += dirlen - ((direction > 0) ? direction : 0);
1844 /* loop invariant - POS_BYTE points at where last char (first
1845 char if reverse) of pattern would align in a possible match. */
1846 while (n != 0)
1847 {
e7deaab0 1848 EMACS_INT tail_end;
facdc750
RS
1849 unsigned char *tail_end_ptr;
1850
1851 /* It's been reported that some (broken) compiler thinks that
1852 Boolean expressions in an arithmetic context are unsigned.
1853 Using an explicit ?1:0 prevents this. */
1854 if ((lim_byte - pos_byte - ((direction > 0) ? 1 : 0)) * direction
1855 < 0)
1856 return (n * (0 - direction));
1857 /* First we do the part we can by pointers (maybe nothing) */
1858 QUIT;
1859 pat = base_pat;
1860 limit = pos_byte - dirlen + direction;
67ce527d
KH
1861 if (direction > 0)
1862 {
1863 limit = BUFFER_CEILING_OF (limit);
1864 /* LIMIT is now the last (not beyond-last!) value POS_BYTE
1865 can take on without hitting edge of buffer or the gap. */
1866 limit = min (limit, pos_byte + 20000);
1867 limit = min (limit, lim_byte - 1);
1868 }
1869 else
1870 {
1871 limit = BUFFER_FLOOR_OF (limit);
1872 /* LIMIT is now the last (not beyond-last!) value POS_BYTE
1873 can take on without hitting edge of buffer or the gap. */
1874 limit = max (limit, pos_byte - 20000);
1875 limit = max (limit, lim_byte);
1876 }
facdc750
RS
1877 tail_end = BUFFER_CEILING_OF (pos_byte) + 1;
1878 tail_end_ptr = BYTE_POS_ADDR (tail_end);
1879
1880 if ((limit - pos_byte) * direction > 20)
ca1d1d23 1881 {
facdc750
RS
1882 unsigned char *p2;
1883
1884 p_limit = BYTE_POS_ADDR (limit);
1885 p2 = (cursor = BYTE_POS_ADDR (pos_byte));
f4646fff 1886 /* In this loop, pos + cursor - p2 is the surrogate for pos. */
facdc750 1887 while (1) /* use one cursor setting as long as i can */
ca1d1d23 1888 {
facdc750 1889 if (direction > 0) /* worth duplicating */
ca1d1d23 1890 {
f4646fff
AS
1891 while (cursor <= p_limit)
1892 {
1893 if (BM_tab[*cursor] == 0)
1894 goto hit;
facdc750 1895 cursor += BM_tab[*cursor];
f4646fff 1896 }
facdc750
RS
1897 }
1898 else
1899 {
f4646fff
AS
1900 while (cursor >= p_limit)
1901 {
1902 if (BM_tab[*cursor] == 0)
1903 goto hit;
facdc750 1904 cursor += BM_tab[*cursor];
f4646fff 1905 }
facdc750 1906 }
f4646fff
AS
1907 /* If you are here, cursor is beyond the end of the
1908 searched region. You fail to match within the
1909 permitted region and would otherwise try a character
1910 beyond that region. */
1911 break;
1912
1913 hit:
facdc750
RS
1914 i = dirlen - direction;
1915 if (! NILP (trt))
1916 {
1917 while ((i -= direction) + direction != 0)
ca1d1d23 1918 {
facdc750
RS
1919 int ch;
1920 cursor -= direction;
1921 /* Translate only the last byte of a character. */
1922 if (! multibyte
1923 || ((cursor == tail_end_ptr
1924 || CHAR_HEAD_P (cursor[1]))
1925 && (CHAR_HEAD_P (cursor[0])
0190922f
KH
1926 /* Check if this is the last byte of
1927 a translable character. */
1928 || (translate_prev_byte1 == cursor[-1]
1929 && (CHAR_HEAD_P (translate_prev_byte1)
1930 || (translate_prev_byte2 == cursor[-2]
1931 && (CHAR_HEAD_P (translate_prev_byte2)
1932 || (translate_prev_byte3 == cursor[-3]))))))))
facdc750
RS
1933 ch = simple_translate[*cursor];
1934 else
1935 ch = *cursor;
1936 if (pat[i] != ch)
1937 break;
ca1d1d23 1938 }
facdc750
RS
1939 }
1940 else
1941 {
1942 while ((i -= direction) + direction != 0)
ca1d1d23 1943 {
facdc750
RS
1944 cursor -= direction;
1945 if (pat[i] != *cursor)
1946 break;
ca1d1d23 1947 }
facdc750
RS
1948 }
1949 cursor += dirlen - i - direction; /* fix cursor */
1950 if (i + direction == 0)
1951 {
e7deaab0 1952 EMACS_INT position, start, end;
0c8533c6 1953
facdc750 1954 cursor -= direction;
1113d9db 1955
facdc750
RS
1956 position = pos_byte + cursor - p2 + ((direction > 0)
1957 ? 1 - len_byte : 0);
1958 set_search_regs (position, len_byte);
ca325161 1959
ef887810
RS
1960 if (NILP (Vinhibit_changing_match_data))
1961 {
1962 start = search_regs.start[0];
1963 end = search_regs.end[0];
1964 }
1965 else
1966 /* If Vinhibit_changing_match_data is non-nil,
1967 search_regs will not be changed. So let's
1968 compute start and end here. */
1969 {
1970 start = BYTE_TO_CHAR (position);
1971 end = BYTE_TO_CHAR (position + len_byte);
1972 }
1973
facdc750
RS
1974 if ((n -= direction) != 0)
1975 cursor += dirlen; /* to resume search */
ca1d1d23 1976 else
ef887810 1977 return direction > 0 ? end : start;
ca1d1d23 1978 }
facdc750
RS
1979 else
1980 cursor += stride_for_teases; /* <sigh> we lose - */
ca1d1d23 1981 }
facdc750
RS
1982 pos_byte += cursor - p2;
1983 }
1984 else
f4646fff
AS
1985 /* Now we'll pick up a clump that has to be done the hard
1986 way because it covers a discontinuity. */
facdc750
RS
1987 {
1988 limit = ((direction > 0)
1989 ? BUFFER_CEILING_OF (pos_byte - dirlen + 1)
1990 : BUFFER_FLOOR_OF (pos_byte - dirlen - 1));
1991 limit = ((direction > 0)
1992 ? min (limit + len_byte, lim_byte - 1)
1993 : max (limit - len_byte, lim_byte));
1994 /* LIMIT is now the last value POS_BYTE can have
1995 and still be valid for a possible match. */
1996 while (1)
ca1d1d23 1997 {
f4646fff
AS
1998 /* This loop can be coded for space rather than
1999 speed because it will usually run only once.
2000 (the reach is at most len + 21, and typically
2001 does not exceed len). */
facdc750 2002 while ((limit - pos_byte) * direction >= 0)
f4646fff
AS
2003 {
2004 int ch = FETCH_BYTE (pos_byte);
2005 if (BM_tab[ch] == 0)
2006 goto hit2;
2007 pos_byte += BM_tab[ch];
2008 }
2009 break; /* ran off the end */
2010
2011 hit2:
2012 /* Found what might be a match. */
facdc750
RS
2013 i = dirlen - direction;
2014 while ((i -= direction) + direction != 0)
ca1d1d23 2015 {
facdc750
RS
2016 int ch;
2017 unsigned char *ptr;
2018 pos_byte -= direction;
2019 ptr = BYTE_POS_ADDR (pos_byte);
2020 /* Translate only the last byte of a character. */
2021 if (! multibyte
2022 || ((ptr == tail_end_ptr
2023 || CHAR_HEAD_P (ptr[1]))
2024 && (CHAR_HEAD_P (ptr[0])
0190922f
KH
2025 /* Check if this is the last byte of a
2026 translable character. */
2027 || (translate_prev_byte1 == ptr[-1]
2028 && (CHAR_HEAD_P (translate_prev_byte1)
2029 || (translate_prev_byte2 == ptr[-2]
2030 && (CHAR_HEAD_P (translate_prev_byte2)
2031 || translate_prev_byte3 == ptr[-3])))))))
facdc750
RS
2032 ch = simple_translate[*ptr];
2033 else
2034 ch = *ptr;
2035 if (pat[i] != ch)
2036 break;
2037 }
2038 /* Above loop has moved POS_BYTE part or all the way
2039 back to the first pos (last pos if reverse).
2040 Set it once again at the last (first if reverse) char. */
f4646fff 2041 pos_byte += dirlen - i - direction;
facdc750
RS
2042 if (i + direction == 0)
2043 {
e7deaab0 2044 EMACS_INT position, start, end;
facdc750 2045 pos_byte -= direction;
1113d9db 2046
facdc750 2047 position = pos_byte + ((direction > 0) ? 1 - len_byte : 0);
facdc750 2048 set_search_regs (position, len_byte);
ca325161 2049
ef887810
RS
2050 if (NILP (Vinhibit_changing_match_data))
2051 {
2052 start = search_regs.start[0];
2053 end = search_regs.end[0];
2054 }
2055 else
2056 /* If Vinhibit_changing_match_data is non-nil,
2057 search_regs will not be changed. So let's
2058 compute start and end here. */
2059 {
2060 start = BYTE_TO_CHAR (position);
2061 end = BYTE_TO_CHAR (position + len_byte);
2062 }
2063
facdc750
RS
2064 if ((n -= direction) != 0)
2065 pos_byte += dirlen; /* to resume search */
ca1d1d23 2066 else
ef887810 2067 return direction > 0 ? end : start;
ca1d1d23 2068 }
facdc750
RS
2069 else
2070 pos_byte += stride_for_teases;
2071 }
2072 }
2073 /* We have done one clump. Can we continue? */
2074 if ((lim_byte - pos_byte) * direction < 0)
2075 return ((0 - n) * direction);
ca1d1d23 2076 }
facdc750 2077 return BYTE_TO_CHAR (pos_byte);
ca1d1d23 2078}
ca325161 2079
fa8ed3e0 2080/* Record beginning BEG_BYTE and end BEG_BYTE + NBYTES
a7e4cdde
RS
2081 for the overall match just found in the current buffer.
2082 Also clear out the match data for registers 1 and up. */
ca325161
RS
2083
2084static void
971de7fb 2085set_search_regs (EMACS_INT beg_byte, EMACS_INT nbytes)
ca325161 2086{
a7e4cdde
RS
2087 int i;
2088
ef887810
RS
2089 if (!NILP (Vinhibit_changing_match_data))
2090 return;
2091
ca325161
RS
2092 /* Make sure we have registers in which to store
2093 the match position. */
2094 if (search_regs.num_regs == 0)
2095 {
2d4a771a
RS
2096 search_regs.start = (regoff_t *) xmalloc (2 * sizeof (regoff_t));
2097 search_regs.end = (regoff_t *) xmalloc (2 * sizeof (regoff_t));
487282dc 2098 search_regs.num_regs = 2;
ca325161
RS
2099 }
2100
a7e4cdde
RS
2101 /* Clear out the other registers. */
2102 for (i = 1; i < search_regs.num_regs; i++)
2103 {
2104 search_regs.start[i] = -1;
2105 search_regs.end[i] = -1;
2106 }
2107
fa8ed3e0
RS
2108 search_regs.start[0] = BYTE_TO_CHAR (beg_byte);
2109 search_regs.end[0] = BYTE_TO_CHAR (beg_byte + nbytes);
a3668d92 2110 XSETBUFFER (last_thing_searched, current_buffer);
ca325161 2111}
ca1d1d23 2112\f
c6e0e1cc
CY
2113/* Given STRING, a string of words separated by word delimiters,
2114 compute a regexp that matches those exact words separated by
2115 arbitrary punctuation. If LAX is nonzero, the end of the string
2116 need not match a word boundary unless it ends in whitespace. */
ca1d1d23
JB
2117
2118static Lisp_Object
971de7fb 2119wordify (Lisp_Object string, int lax)
ca1d1d23
JB
2120{
2121 register unsigned char *p, *o;
0c8533c6 2122 register int i, i_byte, len, punct_count = 0, word_count = 0;
ca1d1d23 2123 Lisp_Object val;
0c8533c6 2124 int prev_c = 0;
c6e0e1cc 2125 int adjust, whitespace_at_end;
ca1d1d23 2126
b7826503 2127 CHECK_STRING (string);
d5db4077
KR
2128 p = SDATA (string);
2129 len = SCHARS (string);
ca1d1d23 2130
0c8533c6
RS
2131 for (i = 0, i_byte = 0; i < len; )
2132 {
2133 int c;
177c0ea7 2134
93daa011 2135 FETCH_STRING_CHAR_AS_MULTIBYTE_ADVANCE (c, string, i, i_byte);
0c8533c6
RS
2136
2137 if (SYNTAX (c) != Sword)
2138 {
2139 punct_count++;
2140 if (i > 0 && SYNTAX (prev_c) == Sword)
2141 word_count++;
2142 }
ca1d1d23 2143
0c8533c6
RS
2144 prev_c = c;
2145 }
2146
2147 if (SYNTAX (prev_c) == Sword)
c6e0e1cc
CY
2148 {
2149 word_count++;
2150 whitespace_at_end = 0;
2151 }
2152 else
2153 whitespace_at_end = 1;
2154
0c8533c6 2155 if (!word_count)
c60416e0 2156 return empty_unibyte_string;
0c8533c6 2157
c6e0e1cc
CY
2158 adjust = - punct_count + 5 * (word_count - 1)
2159 + ((lax && !whitespace_at_end) ? 2 : 4);
8a2df937
RS
2160 if (STRING_MULTIBYTE (string))
2161 val = make_uninit_multibyte_string (len + adjust,
d5db4077 2162 SBYTES (string)
8a2df937
RS
2163 + adjust);
2164 else
2165 val = make_uninit_string (len + adjust);
ca1d1d23 2166
d5db4077 2167 o = SDATA (val);
ca1d1d23
JB
2168 *o++ = '\\';
2169 *o++ = 'b';
1e9582d4 2170 prev_c = 0;
ca1d1d23 2171
1e9582d4
RS
2172 for (i = 0, i_byte = 0; i < len; )
2173 {
2174 int c;
2175 int i_byte_orig = i_byte;
177c0ea7 2176
93daa011 2177 FETCH_STRING_CHAR_AS_MULTIBYTE_ADVANCE (c, string, i, i_byte);
1e9582d4
RS
2178
2179 if (SYNTAX (c) == Sword)
2180 {
5d69fe10 2181 bcopy (SDATA (string) + i_byte_orig, o,
1e9582d4
RS
2182 i_byte - i_byte_orig);
2183 o += i_byte - i_byte_orig;
2184 }
2185 else if (i > 0 && SYNTAX (prev_c) == Sword && --word_count)
2186 {
2187 *o++ = '\\';
2188 *o++ = 'W';
2189 *o++ = '\\';
2190 *o++ = 'W';
2191 *o++ = '*';
2192 }
2193
2194 prev_c = c;
2195 }
ca1d1d23 2196
c6e0e1cc
CY
2197 if (!lax || whitespace_at_end)
2198 {
2199 *o++ = '\\';
2200 *o++ = 'b';
2201 }
ca1d1d23
JB
2202
2203 return val;
2204}
2205\f
2206DEFUN ("search-backward", Fsearch_backward, Ssearch_backward, 1, 4,
8c1a1077
PJ
2207 "MSearch backward: ",
2208 doc: /* Search backward from point for STRING.
2209Set point to the beginning of the occurrence found, and return point.
2210An optional second argument bounds the search; it is a buffer position.
2211The match found must not extend before that position.
2212Optional third argument, if t, means if fail just return nil (no error).
2213 If not nil and not t, position at limit of search and return nil.
2214Optional fourth argument is repeat count--search for successive occurrences.
2215
2216Search case-sensitivity is determined by the value of the variable
2217`case-fold-search', which see.
2218
2219See also the functions `match-beginning', `match-end' and `replace-match'. */)
2220 (string, bound, noerror, count)
ca1d1d23
JB
2221 Lisp_Object string, bound, noerror, count;
2222{
b819a390 2223 return search_command (string, bound, noerror, count, -1, 0, 0);
ca1d1d23
JB
2224}
2225
6af43974 2226DEFUN ("search-forward", Fsearch_forward, Ssearch_forward, 1, 4, "MSearch: ",
8c1a1077
PJ
2227 doc: /* Search forward from point for STRING.
2228Set point to the end of the occurrence found, and return point.
2229An optional second argument bounds the search; it is a buffer position.
d6aacdcd 2230The match found must not extend after that position. A value of nil is
48740f01 2231 equivalent to (point-max).
8c1a1077
PJ
2232Optional third argument, if t, means if fail just return nil (no error).
2233 If not nil and not t, move to limit of search and return nil.
2234Optional fourth argument is repeat count--search for successive occurrences.
2235
2236Search case-sensitivity is determined by the value of the variable
2237`case-fold-search', which see.
2238
2239See also the functions `match-beginning', `match-end' and `replace-match'. */)
2240 (string, bound, noerror, count)
ca1d1d23
JB
2241 Lisp_Object string, bound, noerror, count;
2242{
b819a390 2243 return search_command (string, bound, noerror, count, 1, 0, 0);
ca1d1d23
JB
2244}
2245
2246DEFUN ("word-search-backward", Fword_search_backward, Sword_search_backward, 1, 4,
8c1a1077
PJ
2247 "sWord search backward: ",
2248 doc: /* Search backward from point for STRING, ignoring differences in punctuation.
2249Set point to the beginning of the occurrence found, and return point.
2250An optional second argument bounds the search; it is a buffer position.
2251The match found must not extend before that position.
2252Optional third argument, if t, means if fail just return nil (no error).
2253 If not nil and not t, move to limit of search and return nil.
2254Optional fourth argument is repeat count--search for successive occurrences. */)
2255 (string, bound, noerror, count)
ca1d1d23
JB
2256 Lisp_Object string, bound, noerror, count;
2257{
c6e0e1cc 2258 return search_command (wordify (string, 0), bound, noerror, count, -1, 1, 0);
ca1d1d23
JB
2259}
2260
2261DEFUN ("word-search-forward", Fword_search_forward, Sword_search_forward, 1, 4,
8c1a1077
PJ
2262 "sWord search: ",
2263 doc: /* Search forward from point for STRING, ignoring differences in punctuation.
2264Set point to the end of the occurrence found, and return point.
2265An optional second argument bounds the search; it is a buffer position.
2266The match found must not extend after that position.
2267Optional third argument, if t, means if fail just return nil (no error).
2268 If not nil and not t, move to limit of search and return nil.
2269Optional fourth argument is repeat count--search for successive occurrences. */)
2270 (string, bound, noerror, count)
ca1d1d23
JB
2271 Lisp_Object string, bound, noerror, count;
2272{
c6e0e1cc
CY
2273 return search_command (wordify (string, 0), bound, noerror, count, 1, 1, 0);
2274}
2275
2276DEFUN ("word-search-backward-lax", Fword_search_backward_lax, Sword_search_backward_lax, 1, 4,
2277 "sWord search backward: ",
2278 doc: /* Search backward from point for STRING, ignoring differences in punctuation.
2279Set point to the beginning of the occurrence found, and return point.
2280
2281Unlike `word-search-backward', the end of STRING need not match a word
2282boundary unless it ends in whitespace.
2283
2284An optional second argument bounds the search; it is a buffer position.
2285The match found must not extend before that position.
2286Optional third argument, if t, means if fail just return nil (no error).
2287 If not nil and not t, move to limit of search and return nil.
2288Optional fourth argument is repeat count--search for successive occurrences. */)
2289 (string, bound, noerror, count)
2290 Lisp_Object string, bound, noerror, count;
2291{
2292 return search_command (wordify (string, 1), bound, noerror, count, -1, 1, 0);
2293}
2294
2295DEFUN ("word-search-forward-lax", Fword_search_forward_lax, Sword_search_forward_lax, 1, 4,
2296 "sWord search: ",
2297 doc: /* Search forward from point for STRING, ignoring differences in punctuation.
2298Set point to the end of the occurrence found, and return point.
2299
2300Unlike `word-search-forward', the end of STRING need not match a word
2301boundary unless it ends in whitespace.
2302
2303An optional second argument bounds the search; it is a buffer position.
2304The match found must not extend after that position.
2305Optional third argument, if t, means if fail just return nil (no error).
2306 If not nil and not t, move to limit of search and return nil.
2307Optional fourth argument is repeat count--search for successive occurrences. */)
2308 (string, bound, noerror, count)
2309 Lisp_Object string, bound, noerror, count;
2310{
2311 return search_command (wordify (string, 1), bound, noerror, count, 1, 1, 0);
ca1d1d23
JB
2312}
2313
2314DEFUN ("re-search-backward", Fre_search_backward, Sre_search_backward, 1, 4,
8c1a1077
PJ
2315 "sRE search backward: ",
2316 doc: /* Search backward from point for match for regular expression REGEXP.
2317Set point to the beginning of the match, and return point.
2318The match found is the one starting last in the buffer
2319and yet ending before the origin of the search.
2320An optional second argument bounds the search; it is a buffer position.
2321The match found must start at or after that position.
2322Optional third argument, if t, means if fail just return nil (no error).
2323 If not nil and not t, move to limit of search and return nil.
2324Optional fourth argument is repeat count--search for successive occurrences.
2325See also the functions `match-beginning', `match-end', `match-string',
2326and `replace-match'. */)
2327 (regexp, bound, noerror, count)
19c0a730 2328 Lisp_Object regexp, bound, noerror, count;
ca1d1d23 2329{
b819a390 2330 return search_command (regexp, bound, noerror, count, -1, 1, 0);
ca1d1d23
JB
2331}
2332
2333DEFUN ("re-search-forward", Fre_search_forward, Sre_search_forward, 1, 4,
8c1a1077
PJ
2334 "sRE search: ",
2335 doc: /* Search forward from point for regular expression REGEXP.
2336Set point to the end of the occurrence found, and return point.
2337An optional second argument bounds the search; it is a buffer position.
2338The match found must not extend after that position.
2339Optional third argument, if t, means if fail just return nil (no error).
2340 If not nil and not t, move to limit of search and return nil.
2341Optional fourth argument is repeat count--search for successive occurrences.
2342See also the functions `match-beginning', `match-end', `match-string',
2343and `replace-match'. */)
2344 (regexp, bound, noerror, count)
19c0a730 2345 Lisp_Object regexp, bound, noerror, count;
ca1d1d23 2346{
b819a390
RS
2347 return search_command (regexp, bound, noerror, count, 1, 1, 0);
2348}
2349
2350DEFUN ("posix-search-backward", Fposix_search_backward, Sposix_search_backward, 1, 4,
8c1a1077
PJ
2351 "sPosix search backward: ",
2352 doc: /* Search backward from point for match for regular expression REGEXP.
2353Find the longest match in accord with Posix regular expression rules.
2354Set point to the beginning of the match, and return point.
2355The match found is the one starting last in the buffer
2356and yet ending before the origin of the search.
2357An optional second argument bounds the search; it is a buffer position.
2358The match found must start at or after that position.
2359Optional third argument, if t, means if fail just return nil (no error).
2360 If not nil and not t, move to limit of search and return nil.
2361Optional fourth argument is repeat count--search for successive occurrences.
2362See also the functions `match-beginning', `match-end', `match-string',
2363and `replace-match'. */)
2364 (regexp, bound, noerror, count)
b819a390
RS
2365 Lisp_Object regexp, bound, noerror, count;
2366{
2367 return search_command (regexp, bound, noerror, count, -1, 1, 1);
2368}
2369
2370DEFUN ("posix-search-forward", Fposix_search_forward, Sposix_search_forward, 1, 4,
8c1a1077
PJ
2371 "sPosix search: ",
2372 doc: /* Search forward from point for regular expression REGEXP.
2373Find the longest match in accord with Posix regular expression rules.
2374Set point to the end of the occurrence found, and return point.
2375An optional second argument bounds the search; it is a buffer position.
2376The match found must not extend after that position.
2377Optional third argument, if t, means if fail just return nil (no error).
2378 If not nil and not t, move to limit of search and return nil.
2379Optional fourth argument is repeat count--search for successive occurrences.
2380See also the functions `match-beginning', `match-end', `match-string',
2381and `replace-match'. */)
2382 (regexp, bound, noerror, count)
b819a390
RS
2383 Lisp_Object regexp, bound, noerror, count;
2384{
2385 return search_command (regexp, bound, noerror, count, 1, 1, 1);
ca1d1d23
JB
2386}
2387\f
d7a5ad5f 2388DEFUN ("replace-match", Freplace_match, Sreplace_match, 1, 5, 0,
8c1a1077 2389 doc: /* Replace text matched by last search with NEWTEXT.
4dd0c271
RS
2390Leave point at the end of the replacement text.
2391
8c1a1077
PJ
2392If second arg FIXEDCASE is non-nil, do not alter case of replacement text.
2393Otherwise maybe capitalize the whole text, or maybe just word initials,
2394based on the replaced text.
2395If the replaced text has only capital letters
2396and has at least one multiletter word, convert NEWTEXT to all caps.
4dd0c271
RS
2397Otherwise if all words are capitalized in the replaced text,
2398capitalize each word in NEWTEXT.
8c1a1077
PJ
2399
2400If third arg LITERAL is non-nil, insert NEWTEXT literally.
2401Otherwise treat `\\' as special:
2402 `\\&' in NEWTEXT means substitute original matched text.
2403 `\\N' means substitute what matched the Nth `\\(...\\)'.
2404 If Nth parens didn't match, substitute nothing.
2405 `\\\\' means insert one `\\'.
4dd0c271
RS
2406Case conversion does not apply to these substitutions.
2407
8c1a1077 2408FIXEDCASE and LITERAL are optional arguments.
8c1a1077
PJ
2409
2410The optional fourth argument STRING can be a string to modify.
2411This is meaningful when the previous match was done against STRING,
2412using `string-match'. When used this way, `replace-match'
2413creates and returns a new string made by copying STRING and replacing
2414the part of STRING that was matched.
2415
2416The optional fifth argument SUBEXP specifies a subexpression;
2417it says to replace just that subexpression with NEWTEXT,
2418rather than replacing the entire matched text.
2419This is, in a vague sense, the inverse of using `\\N' in NEWTEXT;
2420`\\N' copies subexp N into NEWTEXT, but using N as SUBEXP puts
2421NEWTEXT in place of subexp N.
2422This is useful only after a regular expression search or match,
2423since only regular expressions have distinguished subexpressions. */)
2424 (newtext, fixedcase, literal, string, subexp)
d7a5ad5f 2425 Lisp_Object newtext, fixedcase, literal, string, subexp;
ca1d1d23
JB
2426{
2427 enum { nochange, all_caps, cap_initial } case_action;
ac3b28b1 2428 register int pos, pos_byte;
ca1d1d23 2429 int some_multiletter_word;
97832bd0 2430 int some_lowercase;
73dc8771 2431 int some_uppercase;
208767c3 2432 int some_nonuppercase_initial;
ca1d1d23 2433 register int c, prevc;
d7a5ad5f 2434 int sub;
e7deaab0 2435 EMACS_INT opoint, newpoint;
ca1d1d23 2436
b7826503 2437 CHECK_STRING (newtext);
ca1d1d23 2438
080c45fd 2439 if (! NILP (string))
b7826503 2440 CHECK_STRING (string);
080c45fd 2441
ca1d1d23
JB
2442 case_action = nochange; /* We tried an initialization */
2443 /* but some C compilers blew it */
4746118a
JB
2444
2445 if (search_regs.num_regs <= 0)
d72cdbfc 2446 error ("`replace-match' called before any match found");
4746118a 2447
d7a5ad5f
RS
2448 if (NILP (subexp))
2449 sub = 0;
2450 else
2451 {
b7826503 2452 CHECK_NUMBER (subexp);
d7a5ad5f
RS
2453 sub = XINT (subexp);
2454 if (sub < 0 || sub >= search_regs.num_regs)
2455 args_out_of_range (subexp, make_number (search_regs.num_regs));
2456 }
2457
080c45fd
RS
2458 if (NILP (string))
2459 {
d7a5ad5f
RS
2460 if (search_regs.start[sub] < BEGV
2461 || search_regs.start[sub] > search_regs.end[sub]
2462 || search_regs.end[sub] > ZV)
2463 args_out_of_range (make_number (search_regs.start[sub]),
2464 make_number (search_regs.end[sub]));
080c45fd
RS
2465 }
2466 else
2467 {
d7a5ad5f
RS
2468 if (search_regs.start[sub] < 0
2469 || search_regs.start[sub] > search_regs.end[sub]
d5db4077 2470 || search_regs.end[sub] > SCHARS (string))
d7a5ad5f
RS
2471 args_out_of_range (make_number (search_regs.start[sub]),
2472 make_number (search_regs.end[sub]));
080c45fd 2473 }
ca1d1d23
JB
2474
2475 if (NILP (fixedcase))
2476 {
2477 /* Decide how to casify by examining the matched text. */
e7deaab0 2478 EMACS_INT last;
ca1d1d23 2479
ac3b28b1
KH
2480 pos = search_regs.start[sub];
2481 last = search_regs.end[sub];
fa8ed3e0
RS
2482
2483 if (NILP (string))
ac3b28b1 2484 pos_byte = CHAR_TO_BYTE (pos);
fa8ed3e0 2485 else
ac3b28b1 2486 pos_byte = string_char_to_byte (string, pos);
fa8ed3e0 2487
ca1d1d23
JB
2488 prevc = '\n';
2489 case_action = all_caps;
2490
2491 /* some_multiletter_word is set nonzero if any original word
2492 is more than one letter long. */
2493 some_multiletter_word = 0;
97832bd0 2494 some_lowercase = 0;
208767c3 2495 some_nonuppercase_initial = 0;
73dc8771 2496 some_uppercase = 0;
ca1d1d23 2497
ac3b28b1 2498 while (pos < last)
ca1d1d23 2499 {
080c45fd 2500 if (NILP (string))
ac3b28b1 2501 {
93daa011 2502 c = FETCH_CHAR_AS_MULTIBYTE (pos_byte);
ac3b28b1
KH
2503 INC_BOTH (pos, pos_byte);
2504 }
080c45fd 2505 else
93daa011 2506 FETCH_STRING_CHAR_AS_MULTIBYTE_ADVANCE (c, string, pos, pos_byte);
080c45fd 2507
ca1d1d23
JB
2508 if (LOWERCASEP (c))
2509 {
2510 /* Cannot be all caps if any original char is lower case */
2511
97832bd0 2512 some_lowercase = 1;
ca1d1d23 2513 if (SYNTAX (prevc) != Sword)
208767c3 2514 some_nonuppercase_initial = 1;
ca1d1d23
JB
2515 else
2516 some_multiletter_word = 1;
2517 }
d16c2b66 2518 else if (UPPERCASEP (c))
ca1d1d23 2519 {
73dc8771 2520 some_uppercase = 1;
97832bd0 2521 if (SYNTAX (prevc) != Sword)
c4d460ce 2522 ;
97832bd0 2523 else
ca1d1d23
JB
2524 some_multiletter_word = 1;
2525 }
208767c3
RS
2526 else
2527 {
2528 /* If the initial is a caseless word constituent,
2529 treat that like a lowercase initial. */
2530 if (SYNTAX (prevc) != Sword)
2531 some_nonuppercase_initial = 1;
2532 }
ca1d1d23
JB
2533
2534 prevc = c;
2535 }
2536
97832bd0
RS
2537 /* Convert to all caps if the old text is all caps
2538 and has at least one multiletter word. */
2539 if (! some_lowercase && some_multiletter_word)
2540 case_action = all_caps;
c4d460ce 2541 /* Capitalize each word, if the old text has all capitalized words. */
208767c3 2542 else if (!some_nonuppercase_initial && some_multiletter_word)
ca1d1d23 2543 case_action = cap_initial;
208767c3 2544 else if (!some_nonuppercase_initial && some_uppercase)
73dc8771
KH
2545 /* Should x -> yz, operating on X, give Yz or YZ?
2546 We'll assume the latter. */
2547 case_action = all_caps;
97832bd0
RS
2548 else
2549 case_action = nochange;
ca1d1d23
JB
2550 }
2551
080c45fd
RS
2552 /* Do replacement in a string. */
2553 if (!NILP (string))
2554 {
2555 Lisp_Object before, after;
2556
2557 before = Fsubstring (string, make_number (0),
d7a5ad5f
RS
2558 make_number (search_regs.start[sub]));
2559 after = Fsubstring (string, make_number (search_regs.end[sub]), Qnil);
080c45fd 2560
636a5e28
RS
2561 /* Substitute parts of the match into NEWTEXT
2562 if desired. */
080c45fd
RS
2563 if (NILP (literal))
2564 {
e7deaab0
AS
2565 EMACS_INT lastpos = 0;
2566 EMACS_INT lastpos_byte = 0;
080c45fd
RS
2567 /* We build up the substituted string in ACCUM. */
2568 Lisp_Object accum;
2569 Lisp_Object middle;
d5db4077 2570 int length = SBYTES (newtext);
080c45fd
RS
2571
2572 accum = Qnil;
2573
ac3b28b1 2574 for (pos_byte = 0, pos = 0; pos_byte < length;)
080c45fd
RS
2575 {
2576 int substart = -1;
6bbd7a29 2577 int subend = 0;
1e79ec24 2578 int delbackslash = 0;
080c45fd 2579
0c8533c6
RS
2580 FETCH_STRING_CHAR_ADVANCE (c, newtext, pos, pos_byte);
2581
080c45fd
RS
2582 if (c == '\\')
2583 {
0c8533c6 2584 FETCH_STRING_CHAR_ADVANCE (c, newtext, pos, pos_byte);
177c0ea7 2585
080c45fd
RS
2586 if (c == '&')
2587 {
d7a5ad5f
RS
2588 substart = search_regs.start[sub];
2589 subend = search_regs.end[sub];
080c45fd 2590 }
76fc1ea2 2591 else if (c >= '1' && c <= '9')
080c45fd 2592 {
76fc1ea2
KH
2593 if (search_regs.start[c - '0'] >= 0
2594 && c <= search_regs.num_regs + '0')
080c45fd
RS
2595 {
2596 substart = search_regs.start[c - '0'];
2597 subend = search_regs.end[c - '0'];
2598 }
76fc1ea2
KH
2599 else
2600 {
2601 /* If that subexp did not match,
2602 replace \\N with nothing. */
2603 substart = 0;
2604 subend = 0;
2605 }
080c45fd 2606 }
1e79ec24
KH
2607 else if (c == '\\')
2608 delbackslash = 1;
636a5e28
RS
2609 else
2610 error ("Invalid use of `\\' in replacement text");
080c45fd
RS
2611 }
2612 if (substart >= 0)
2613 {
d131e79c
RS
2614 if (pos - 2 != lastpos)
2615 middle = substring_both (newtext, lastpos,
2616 lastpos_byte,
2617 pos - 2, pos_byte - 2);
080c45fd
RS
2618 else
2619 middle = Qnil;
2620 accum = concat3 (accum, middle,
0c8533c6
RS
2621 Fsubstring (string,
2622 make_number (substart),
080c45fd
RS
2623 make_number (subend)));
2624 lastpos = pos;
0c8533c6 2625 lastpos_byte = pos_byte;
080c45fd 2626 }
1e79ec24
KH
2627 else if (delbackslash)
2628 {
d131e79c
RS
2629 middle = substring_both (newtext, lastpos,
2630 lastpos_byte,
2631 pos - 1, pos_byte - 1);
0c8533c6 2632
1e79ec24
KH
2633 accum = concat2 (accum, middle);
2634 lastpos = pos;
0c8533c6 2635 lastpos_byte = pos_byte;
1e79ec24 2636 }
080c45fd
RS
2637 }
2638
d131e79c
RS
2639 if (pos != lastpos)
2640 middle = substring_both (newtext, lastpos,
2641 lastpos_byte,
0c8533c6 2642 pos, pos_byte);
080c45fd
RS
2643 else
2644 middle = Qnil;
2645
2646 newtext = concat2 (accum, middle);
2647 }
2648
636a5e28 2649 /* Do case substitution in NEWTEXT if desired. */
080c45fd
RS
2650 if (case_action == all_caps)
2651 newtext = Fupcase (newtext);
2652 else if (case_action == cap_initial)
2b2eead9 2653 newtext = Fupcase_initials (newtext);
080c45fd
RS
2654
2655 return concat3 (before, newtext, after);
2656 }
2657
09c4719e 2658 /* Record point, then move (quietly) to the start of the match. */
9160906f 2659 if (PT >= search_regs.end[sub])
b0eba991 2660 opoint = PT - ZV;
9160906f
RS
2661 else if (PT > search_regs.start[sub])
2662 opoint = search_regs.end[sub] - ZV;
b0eba991
RS
2663 else
2664 opoint = PT;
2665
886ed6ec
RS
2666 /* If we want non-literal replacement,
2667 perform substitution on the replacement string. */
2668 if (NILP (literal))
ca1d1d23 2669 {
d5db4077 2670 int length = SBYTES (newtext);
68e69fbd
RS
2671 unsigned char *substed;
2672 int substed_alloc_size, substed_len;
3bc25e52
KH
2673 int buf_multibyte = !NILP (current_buffer->enable_multibyte_characters);
2674 int str_multibyte = STRING_MULTIBYTE (newtext);
2675 Lisp_Object rev_tbl;
886ed6ec 2676 int really_changed = 0;
3bc25e52 2677
8f924df7 2678 rev_tbl = Qnil;
ac3b28b1 2679
68e69fbd
RS
2680 substed_alloc_size = length * 2 + 100;
2681 substed = (unsigned char *) xmalloc (substed_alloc_size + 1);
2682 substed_len = 0;
2683
3bc25e52
KH
2684 /* Go thru NEWTEXT, producing the actual text to insert in
2685 SUBSTED while adjusting multibyteness to that of the current
2686 buffer. */
ca1d1d23 2687
ac3b28b1 2688 for (pos_byte = 0, pos = 0; pos_byte < length;)
ca1d1d23 2689 {
68e69fbd 2690 unsigned char str[MAX_MULTIBYTE_LENGTH];
f8ce8a0d
GM
2691 unsigned char *add_stuff = NULL;
2692 int add_len = 0;
68e69fbd 2693 int idx = -1;
9a76659d 2694
3bc25e52
KH
2695 if (str_multibyte)
2696 {
eb99a8dd 2697 FETCH_STRING_CHAR_ADVANCE_NO_CHECK (c, newtext, pos, pos_byte);
3bc25e52
KH
2698 if (!buf_multibyte)
2699 c = multibyte_char_to_unibyte (c, rev_tbl);
2700 }
2701 else
2702 {
2703 /* Note that we don't have to increment POS. */
5d69fe10 2704 c = SREF (newtext, pos_byte++);
3bc25e52 2705 if (buf_multibyte)
4c0354d7 2706 MAKE_CHAR_MULTIBYTE (c);
3bc25e52 2707 }
ac3b28b1 2708
68e69fbd
RS
2709 /* Either set ADD_STUFF and ADD_LEN to the text to put in SUBSTED,
2710 or set IDX to a match index, which means put that part
2711 of the buffer text into SUBSTED. */
2712
ca1d1d23
JB
2713 if (c == '\\')
2714 {
886ed6ec
RS
2715 really_changed = 1;
2716
3bc25e52
KH
2717 if (str_multibyte)
2718 {
eb99a8dd
KH
2719 FETCH_STRING_CHAR_ADVANCE_NO_CHECK (c, newtext,
2720 pos, pos_byte);
071ce769 2721 if (!buf_multibyte && !ASCII_CHAR_P (c))
3bc25e52
KH
2722 c = multibyte_char_to_unibyte (c, rev_tbl);
2723 }
2724 else
2725 {
d5db4077 2726 c = SREF (newtext, pos_byte++);
3bc25e52 2727 if (buf_multibyte)
4c0354d7 2728 MAKE_CHAR_MULTIBYTE (c);
3bc25e52
KH
2729 }
2730
ca1d1d23 2731 if (c == '&')
68e69fbd 2732 idx = sub;
78445046 2733 else if (c >= '1' && c <= '9' && c <= search_regs.num_regs + '0')
ca1d1d23
JB
2734 {
2735 if (search_regs.start[c - '0'] >= 1)
68e69fbd 2736 idx = c - '0';
ca1d1d23 2737 }
636a5e28 2738 else if (c == '\\')
68e69fbd 2739 add_len = 1, add_stuff = "\\";
636a5e28 2740 else
3bc25e52
KH
2741 {
2742 xfree (substed);
2743 error ("Invalid use of `\\' in replacement text");
2744 }
ca1d1d23
JB
2745 }
2746 else
68e69fbd
RS
2747 {
2748 add_len = CHAR_STRING (c, str);
2749 add_stuff = str;
2750 }
2751
2752 /* If we want to copy part of a previous match,
2753 set up ADD_STUFF and ADD_LEN to point to it. */
2754 if (idx >= 0)
2755 {
e7deaab0 2756 EMACS_INT begbyte = CHAR_TO_BYTE (search_regs.start[idx]);
68e69fbd
RS
2757 add_len = CHAR_TO_BYTE (search_regs.end[idx]) - begbyte;
2758 if (search_regs.start[idx] < GPT && GPT < search_regs.end[idx])
2759 move_gap (search_regs.start[idx]);
2760 add_stuff = BYTE_POS_ADDR (begbyte);
2761 }
2762
2763 /* Now the stuff we want to add to SUBSTED
2764 is invariably ADD_LEN bytes starting at ADD_STUFF. */
2765
2766 /* Make sure SUBSTED is big enough. */
2767 if (substed_len + add_len >= substed_alloc_size)
2768 {
2769 substed_alloc_size = substed_len + add_len + 500;
2770 substed = (unsigned char *) xrealloc (substed,
2771 substed_alloc_size + 1);
2772 }
2773
2774 /* Now add to the end of SUBSTED. */
f8ce8a0d
GM
2775 if (add_stuff)
2776 {
2777 bcopy (add_stuff, substed + substed_len, add_len);
2778 substed_len += add_len;
2779 }
ca1d1d23 2780 }
68e69fbd 2781
886ed6ec 2782 if (really_changed)
76fc1ea2
KH
2783 {
2784 if (buf_multibyte)
2785 {
2786 int nchars = multibyte_chars_in_text (substed, substed_len);
68e69fbd 2787
76fc1ea2
KH
2788 newtext = make_multibyte_string (substed, nchars, substed_len);
2789 }
2790 else
2791 newtext = make_unibyte_string (substed, substed_len);
2792 }
68e69fbd 2793 xfree (substed);
ca1d1d23
JB
2794 }
2795
886ed6ec
RS
2796 /* Replace the old text with the new in the cleanest possible way. */
2797 replace_range (search_regs.start[sub], search_regs.end[sub],
2798 newtext, 1, 0, 1);
d5db4077 2799 newpoint = search_regs.start[sub] + SCHARS (newtext);
ca1d1d23
JB
2800
2801 if (case_action == all_caps)
886ed6ec
RS
2802 Fupcase_region (make_number (search_regs.start[sub]),
2803 make_number (newpoint));
ca1d1d23 2804 else if (case_action == cap_initial)
886ed6ec
RS
2805 Fupcase_initials_region (make_number (search_regs.start[sub]),
2806 make_number (newpoint));
3e18eecf 2807
98e942e0
RS
2808 /* Adjust search data for this change. */
2809 {
e7deaab0
AS
2810 EMACS_INT oldend = search_regs.end[sub];
2811 EMACS_INT oldstart = search_regs.start[sub];
2812 EMACS_INT change = newpoint - search_regs.end[sub];
98e942e0
RS
2813 int i;
2814
2815 for (i = 0; i < search_regs.num_regs; i++)
2816 {
41c01205 2817 if (search_regs.start[i] >= oldend)
98e942e0 2818 search_regs.start[i] += change;
41c01205
DK
2819 else if (search_regs.start[i] > oldstart)
2820 search_regs.start[i] = oldstart;
2821 if (search_regs.end[i] >= oldend)
98e942e0 2822 search_regs.end[i] += change;
41c01205
DK
2823 else if (search_regs.end[i] > oldstart)
2824 search_regs.end[i] = oldstart;
98e942e0
RS
2825 }
2826 }
2827
b0eba991 2828 /* Put point back where it was in the text. */
8d808a65 2829 if (opoint <= 0)
fa8ed3e0 2830 TEMP_SET_PT (opoint + ZV);
b0eba991 2831 else
fa8ed3e0 2832 TEMP_SET_PT (opoint);
b0eba991
RS
2833
2834 /* Now move point "officially" to the start of the inserted replacement. */
3e18eecf 2835 move_if_not_intangible (newpoint);
177c0ea7 2836
ca1d1d23
JB
2837 return Qnil;
2838}
2839\f
2840static Lisp_Object
971de7fb 2841match_limit (Lisp_Object num, int beginningp)
ca1d1d23
JB
2842{
2843 register int n;
2844
b7826503 2845 CHECK_NUMBER (num);
ca1d1d23 2846 n = XINT (num);
f90a5bf5 2847 if (n < 0)
bd2cbd56 2848 args_out_of_range (num, make_number (0));
f90a5bf5
RS
2849 if (search_regs.num_regs <= 0)
2850 error ("No match data, because no search succeeded");
9b9ceb61 2851 if (n >= search_regs.num_regs
4746118a 2852 || search_regs.start[n] < 0)
ca1d1d23
JB
2853 return Qnil;
2854 return (make_number ((beginningp) ? search_regs.start[n]
2855 : search_regs.end[n]));
2856}
2857
2858DEFUN ("match-beginning", Fmatch_beginning, Smatch_beginning, 1, 1, 0,
8c1a1077
PJ
2859 doc: /* Return position of start of text matched by last search.
2860SUBEXP, a number, specifies which parenthesized expression in the last
2861 regexp.
2862Value is nil if SUBEXPth pair didn't match, or there were less than
2863 SUBEXP pairs.
2864Zero means the entire text matched by the whole regexp or whole string. */)
2865 (subexp)
5806161b 2866 Lisp_Object subexp;
ca1d1d23 2867{
5806161b 2868 return match_limit (subexp, 1);
ca1d1d23
JB
2869}
2870
2871DEFUN ("match-end", Fmatch_end, Smatch_end, 1, 1, 0,
8c1a1077
PJ
2872 doc: /* Return position of end of text matched by last search.
2873SUBEXP, a number, specifies which parenthesized expression in the last
2874 regexp.
2875Value is nil if SUBEXPth pair didn't match, or there were less than
2876 SUBEXP pairs.
2877Zero means the entire text matched by the whole regexp or whole string. */)
2878 (subexp)
5806161b 2879 Lisp_Object subexp;
ca1d1d23 2880{
5806161b 2881 return match_limit (subexp, 0);
177c0ea7 2882}
ca1d1d23 2883
abd0071c 2884DEFUN ("match-data", Fmatch_data, Smatch_data, 0, 3, 0,
8c1a1077
PJ
2885 doc: /* Return a list containing all info on what the last search matched.
2886Element 2N is `(match-beginning N)'; element 2N + 1 is `(match-end N)'.
2887All the elements are markers or nil (nil if the Nth pair didn't match)
2888if the last match was on a buffer; integers or nil if a string was matched.
febb8aba 2889Use `set-match-data' to reinstate the data in this list.
8c1a1077 2890
41c01205
DK
2891If INTEGERS (the optional first argument) is non-nil, always use
2892integers \(rather than markers) to represent buffer positions. In
2893this case, and if the last match was in a buffer, the buffer will get
2894stored as one additional element at the end of the list.
2895
abd0071c
KS
2896If REUSE is a list, reuse it as part of the value. If REUSE is long
2897enough to hold all the values, and if INTEGERS is non-nil, no consing
2898is done.
2899
2900If optional third arg RESEAT is non-nil, any previous markers on the
2901REUSE list will be modified to point to nowhere.
2902
140a6b7e 2903Return value is undefined if the last search failed. */)
abd0071c
KS
2904 (integers, reuse, reseat)
2905 Lisp_Object integers, reuse, reseat;
ca1d1d23 2906{
56256c2a 2907 Lisp_Object tail, prev;
4746118a 2908 Lisp_Object *data;
ca1d1d23
JB
2909 int i, len;
2910
abd0071c
KS
2911 if (!NILP (reseat))
2912 for (tail = reuse; CONSP (tail); tail = XCDR (tail))
2913 if (MARKERP (XCAR (tail)))
2914 {
51f10faa 2915 unchain_marker (XMARKER (XCAR (tail)));
abd0071c
KS
2916 XSETCAR (tail, Qnil);
2917 }
2918
daa37602 2919 if (NILP (last_thing_searched))
c36bcf1b 2920 return Qnil;
daa37602 2921
6bbd7a29
GM
2922 prev = Qnil;
2923
41c01205 2924 data = (Lisp_Object *) alloca ((2 * search_regs.num_regs + 1)
4746118a
JB
2925 * sizeof (Lisp_Object));
2926
41c01205 2927 len = 0;
4746118a 2928 for (i = 0; i < search_regs.num_regs; i++)
ca1d1d23
JB
2929 {
2930 int start = search_regs.start[i];
2931 if (start >= 0)
2932 {
56256c2a
RS
2933 if (EQ (last_thing_searched, Qt)
2934 || ! NILP (integers))
ca1d1d23 2935 {
c235cce7
KH
2936 XSETFASTINT (data[2 * i], start);
2937 XSETFASTINT (data[2 * i + 1], search_regs.end[i]);
ca1d1d23 2938 }
0ed62dc7 2939 else if (BUFFERP (last_thing_searched))
ca1d1d23
JB
2940 {
2941 data[2 * i] = Fmake_marker ();
daa37602
JB
2942 Fset_marker (data[2 * i],
2943 make_number (start),
2944 last_thing_searched);
ca1d1d23
JB
2945 data[2 * i + 1] = Fmake_marker ();
2946 Fset_marker (data[2 * i + 1],
177c0ea7 2947 make_number (search_regs.end[i]),
daa37602 2948 last_thing_searched);
ca1d1d23 2949 }
daa37602
JB
2950 else
2951 /* last_thing_searched must always be Qt, a buffer, or Qnil. */
2952 abort ();
2953
abd0071c 2954 len = 2 * i + 2;
ca1d1d23
JB
2955 }
2956 else
abd0071c 2957 data[2 * i] = data[2 * i + 1] = Qnil;
ca1d1d23 2958 }
56256c2a 2959
bd2cbd56 2960 if (BUFFERP (last_thing_searched) && !NILP (integers))
41c01205 2961 {
bd2cbd56 2962 data[len] = last_thing_searched;
41c01205
DK
2963 len++;
2964 }
2965
56256c2a
RS
2966 /* If REUSE is not usable, cons up the values and return them. */
2967 if (! CONSP (reuse))
41c01205 2968 return Flist (len, data);
56256c2a
RS
2969
2970 /* If REUSE is a list, store as many value elements as will fit
2971 into the elements of REUSE. */
2972 for (i = 0, tail = reuse; CONSP (tail);
c1d497be 2973 i++, tail = XCDR (tail))
56256c2a 2974 {
41c01205 2975 if (i < len)
f3fbd155 2976 XSETCAR (tail, data[i]);
56256c2a 2977 else
f3fbd155 2978 XSETCAR (tail, Qnil);
56256c2a
RS
2979 prev = tail;
2980 }
2981
2982 /* If we couldn't fit all value elements into REUSE,
2983 cons up the rest of them and add them to the end of REUSE. */
41c01205
DK
2984 if (i < len)
2985 XSETCDR (prev, Flist (len - i, data + i));
56256c2a
RS
2986
2987 return reuse;
ca1d1d23
JB
2988}
2989
b51d6c92
SM
2990/* We used to have an internal use variant of `reseat' described as:
2991
2992 If RESEAT is `evaporate', put the markers back on the free list
2993 immediately. No other references to the markers must exist in this
2994 case, so it is used only internally on the unwind stack and
2995 save-match-data from Lisp.
2996
2997 But it was ill-conceived: those supposedly-internal markers get exposed via
2998 the undo-list, so freeing them here is unsafe. */
ca1d1d23 2999
abd0071c 3000DEFUN ("set-match-data", Fset_match_data, Sset_match_data, 1, 2, 0,
8c1a1077 3001 doc: /* Set internal data on last search match from elements of LIST.
abd0071c
KS
3002LIST should have been created by calling `match-data' previously.
3003
51f10faa 3004If optional arg RESEAT is non-nil, make markers on LIST point nowhere. */)
abd0071c
KS
3005 (list, reseat)
3006 register Lisp_Object list, reseat;
ca1d1d23
JB
3007{
3008 register int i;
3009 register Lisp_Object marker;
3010
7074fde6
FP
3011 if (running_asynch_code)
3012 save_search_regs ();
3013
29100cea 3014 CHECK_LIST (list);
ca1d1d23 3015
41c01205
DK
3016 /* Unless we find a marker with a buffer or an explicit buffer
3017 in LIST, assume that this match data came from a string. */
daa37602
JB
3018 last_thing_searched = Qt;
3019
4746118a
JB
3020 /* Allocate registers if they don't already exist. */
3021 {
d084e942 3022 int length = XFASTINT (Flength (list)) / 2;
4746118a
JB
3023
3024 if (length > search_regs.num_regs)
3025 {
1113d9db
JB
3026 if (search_regs.num_regs == 0)
3027 {
3028 search_regs.start
3029 = (regoff_t *) xmalloc (length * sizeof (regoff_t));
3030 search_regs.end
3031 = (regoff_t *) xmalloc (length * sizeof (regoff_t));
3032 }
4746118a 3033 else
1113d9db
JB
3034 {
3035 search_regs.start
3036 = (regoff_t *) xrealloc (search_regs.start,
3037 length * sizeof (regoff_t));
3038 search_regs.end
3039 = (regoff_t *) xrealloc (search_regs.end,
3040 length * sizeof (regoff_t));
3041 }
4746118a 3042
e62371e9
KH
3043 for (i = search_regs.num_regs; i < length; i++)
3044 search_regs.start[i] = -1;
3045
487282dc 3046 search_regs.num_regs = length;
4746118a 3047 }
ca1d1d23 3048
abd0071c 3049 for (i = 0; CONSP (list); i++)
41c01205 3050 {
abd0071c 3051 marker = XCAR (list);
bd2cbd56 3052 if (BUFFERP (marker))
c3762cbd 3053 {
bd2cbd56 3054 last_thing_searched = marker;
c3762cbd
DK
3055 break;
3056 }
3057 if (i >= length)
3058 break;
41c01205
DK
3059 if (NILP (marker))
3060 {
3061 search_regs.start[i] = -1;
abd0071c 3062 list = XCDR (list);
41c01205
DK
3063 }
3064 else
3065 {
e7deaab0 3066 EMACS_INT from;
abd0071c 3067 Lisp_Object m;
e2811828 3068
abd0071c 3069 m = marker;
41c01205
DK
3070 if (MARKERP (marker))
3071 {
3072 if (XMARKER (marker)->buffer == 0)
3073 XSETFASTINT (marker, 0);
3074 else
3075 XSETBUFFER (last_thing_searched, XMARKER (marker)->buffer);
3076 }
e2811828 3077
41c01205
DK
3078 CHECK_NUMBER_COERCE_MARKER (marker);
3079 from = XINT (marker);
e2811828 3080
abd0071c
KS
3081 if (!NILP (reseat) && MARKERP (m))
3082 {
b51d6c92 3083 unchain_marker (XMARKER (m));
9ad54a7e 3084 XSETCAR (list, Qnil);
abd0071c
KS
3085 }
3086
3087 if ((list = XCDR (list), !CONSP (list)))
3088 break;
3089
3090 m = marker = XCAR (list);
3091
41c01205
DK
3092 if (MARKERP (marker) && XMARKER (marker)->buffer == 0)
3093 XSETFASTINT (marker, 0);
e2811828 3094
41c01205
DK
3095 CHECK_NUMBER_COERCE_MARKER (marker);
3096 search_regs.start[i] = from;
3097 search_regs.end[i] = XINT (marker);
abd0071c
KS
3098
3099 if (!NILP (reseat) && MARKERP (m))
3100 {
b51d6c92 3101 unchain_marker (XMARKER (m));
9ad54a7e 3102 XSETCAR (list, Qnil);
abd0071c 3103 }
41c01205 3104 }
abd0071c 3105 list = XCDR (list);
41c01205 3106 }
ca1d1d23 3107
41c01205
DK
3108 for (; i < search_regs.num_regs; i++)
3109 search_regs.start[i] = -1;
3110 }
ca1d1d23 3111
177c0ea7 3112 return Qnil;
ca1d1d23
JB
3113}
3114
7074fde6
FP
3115/* If non-zero the match data have been saved in saved_search_regs
3116 during the execution of a sentinel or filter. */
75ebf74b 3117static int search_regs_saved;
7074fde6 3118static struct re_registers saved_search_regs;
41c01205 3119static Lisp_Object saved_last_thing_searched;
7074fde6
FP
3120
3121/* Called from Flooking_at, Fstring_match, search_buffer, Fstore_match_data
3122 if asynchronous code (filter or sentinel) is running. */
3123static void
971de7fb 3124save_search_regs (void)
7074fde6
FP
3125{
3126 if (!search_regs_saved)
3127 {
3128 saved_search_regs.num_regs = search_regs.num_regs;
3129 saved_search_regs.start = search_regs.start;
3130 saved_search_regs.end = search_regs.end;
41c01205
DK
3131 saved_last_thing_searched = last_thing_searched;
3132 last_thing_searched = Qnil;
7074fde6 3133 search_regs.num_regs = 0;
2d4a771a
RS
3134 search_regs.start = 0;
3135 search_regs.end = 0;
7074fde6
FP
3136
3137 search_regs_saved = 1;
3138 }
3139}
3140
3141/* Called upon exit from filters and sentinels. */
3142void
971de7fb 3143restore_search_regs (void)
7074fde6
FP
3144{
3145 if (search_regs_saved)
3146 {
3147 if (search_regs.num_regs > 0)
3148 {
3149 xfree (search_regs.start);
3150 xfree (search_regs.end);
3151 }
3152 search_regs.num_regs = saved_search_regs.num_regs;
3153 search_regs.start = saved_search_regs.start;
3154 search_regs.end = saved_search_regs.end;
41c01205
DK
3155 last_thing_searched = saved_last_thing_searched;
3156 saved_last_thing_searched = Qnil;
7074fde6
FP
3157 search_regs_saved = 0;
3158 }
3159}
3160
abd0071c 3161static Lisp_Object
971de7fb 3162unwind_set_match_data (Lisp_Object list)
abd0071c 3163{
b51d6c92
SM
3164 /* It is NOT ALWAYS safe to free (evaporate) the markers immediately. */
3165 return Fset_match_data (list, Qt);
abd0071c
KS
3166}
3167
3168/* Called to unwind protect the match data. */
3169void
971de7fb 3170record_unwind_save_match_data (void)
abd0071c
KS
3171{
3172 record_unwind_protect (unwind_set_match_data,
3173 Fmatch_data (Qnil, Qnil, Qnil));
3174}
3175
ca1d1d23
JB
3176/* Quote a string to inactivate reg-expr chars */
3177
3178DEFUN ("regexp-quote", Fregexp_quote, Sregexp_quote, 1, 1, 0,
8c1a1077
PJ
3179 doc: /* Return a regexp string which matches exactly STRING and nothing else. */)
3180 (string)
5806161b 3181 Lisp_Object string;
ca1d1d23
JB
3182{
3183 register unsigned char *in, *out, *end;
3184 register unsigned char *temp;
0c8533c6 3185 int backslashes_added = 0;
ca1d1d23 3186
b7826503 3187 CHECK_STRING (string);
ca1d1d23 3188
d5db4077 3189 temp = (unsigned char *) alloca (SBYTES (string) * 2);
ca1d1d23
JB
3190
3191 /* Now copy the data into the new string, inserting escapes. */
3192
d5db4077
KR
3193 in = SDATA (string);
3194 end = in + SBYTES (string);
177c0ea7 3195 out = temp;
ca1d1d23
JB
3196
3197 for (; in != end; in++)
3198 {
66bc6082 3199 if (*in == '['
ca1d1d23
JB
3200 || *in == '*' || *in == '.' || *in == '\\'
3201 || *in == '?' || *in == '+'
3202 || *in == '^' || *in == '$')
0c8533c6 3203 *out++ = '\\', backslashes_added++;
ca1d1d23
JB
3204 *out++ = *in;
3205 }
3206
3f8100f1 3207 return make_specified_string (temp,
d5db4077 3208 SCHARS (string) + backslashes_added,
3f8100f1
RS
3209 out - temp,
3210 STRING_MULTIBYTE (string));
ca1d1d23 3211}
177c0ea7 3212\f
dfcf069d 3213void
971de7fb 3214syms_of_search (void)
ca1d1d23
JB
3215{
3216 register int i;
3217
487282dc
KH
3218 for (i = 0; i < REGEXP_CACHE_SIZE; ++i)
3219 {
3220 searchbufs[i].buf.allocated = 100;
b23c0a83 3221 searchbufs[i].buf.buffer = (unsigned char *) xmalloc (100);
487282dc
KH
3222 searchbufs[i].buf.fastmap = searchbufs[i].fastmap;
3223 searchbufs[i].regexp = Qnil;
ecdb561e 3224 searchbufs[i].whitespace_regexp = Qnil;
b69e3c18 3225 searchbufs[i].syntax_table = Qnil;
487282dc 3226 staticpro (&searchbufs[i].regexp);
aa77b5ce 3227 staticpro (&searchbufs[i].whitespace_regexp);
b69e3c18 3228 staticpro (&searchbufs[i].syntax_table);
487282dc
KH
3229 searchbufs[i].next = (i == REGEXP_CACHE_SIZE-1 ? 0 : &searchbufs[i+1]);
3230 }
3231 searchbuf_head = &searchbufs[0];
ca1d1d23 3232
d67b4f80 3233 Qsearch_failed = intern_c_string ("search-failed");
ca1d1d23 3234 staticpro (&Qsearch_failed);
d67b4f80 3235 Qinvalid_regexp = intern_c_string ("invalid-regexp");
ca1d1d23
JB
3236 staticpro (&Qinvalid_regexp);
3237
3238 Fput (Qsearch_failed, Qerror_conditions,
d67b4f80 3239 pure_cons (Qsearch_failed, pure_cons (Qerror, Qnil)));
ca1d1d23 3240 Fput (Qsearch_failed, Qerror_message,
d67b4f80 3241 make_pure_c_string ("Search failed"));
ca1d1d23
JB
3242
3243 Fput (Qinvalid_regexp, Qerror_conditions,
d67b4f80 3244 pure_cons (Qinvalid_regexp, pure_cons (Qerror, Qnil)));
ca1d1d23 3245 Fput (Qinvalid_regexp, Qerror_message,
d67b4f80 3246 make_pure_c_string ("Invalid regexp"));
ca1d1d23 3247
daa37602
JB
3248 last_thing_searched = Qnil;
3249 staticpro (&last_thing_searched);
3250
0f6af254
DK
3251 saved_last_thing_searched = Qnil;
3252 staticpro (&saved_last_thing_searched);
3253
41a33295 3254 DEFVAR_LISP ("search-spaces-regexp", &Vsearch_spaces_regexp,
e2811828 3255 doc: /* Regexp to substitute for bunches of spaces in regexp search.
f31a9a68
RS
3256Some commands use this for user-specified regexps.
3257Spaces that occur inside character classes or repetition operators
3258or other such regexp constructs are not replaced with this.
3259A value of nil (which is the normal value) means treat spaces literally. */);
41a33295 3260 Vsearch_spaces_regexp = Qnil;
f31a9a68 3261
ef887810
RS
3262 DEFVAR_LISP ("inhibit-changing-match-data", &Vinhibit_changing_match_data,
3263 doc: /* Internal use only.
39d0bf74
RS
3264If non-nil, the primitive searching and matching functions
3265such as `looking-at', `string-match', `re-search-forward', etc.,
3266do not set the match data. The proper way to use this variable
3267is to bind it with `let' around a small expression. */);
ef887810
RS
3268 Vinhibit_changing_match_data = Qnil;
3269
ca1d1d23 3270 defsubr (&Slooking_at);
b819a390
RS
3271 defsubr (&Sposix_looking_at);
3272 defsubr (&Sstring_match);
3273 defsubr (&Sposix_string_match);
ca1d1d23
JB
3274 defsubr (&Ssearch_forward);
3275 defsubr (&Ssearch_backward);
3276 defsubr (&Sword_search_forward);
3277 defsubr (&Sword_search_backward);
c6e0e1cc
CY
3278 defsubr (&Sword_search_forward_lax);
3279 defsubr (&Sword_search_backward_lax);
ca1d1d23
JB
3280 defsubr (&Sre_search_forward);
3281 defsubr (&Sre_search_backward);
b819a390
RS
3282 defsubr (&Sposix_search_forward);
3283 defsubr (&Sposix_search_backward);
ca1d1d23
JB
3284 defsubr (&Sreplace_match);
3285 defsubr (&Smatch_beginning);
3286 defsubr (&Smatch_end);
3287 defsubr (&Smatch_data);
3f1c005b 3288 defsubr (&Sset_match_data);
ca1d1d23
JB
3289 defsubr (&Sregexp_quote);
3290}
76fc1ea2
KH
3291
3292/* arch-tag: a6059d79-0552-4f14-a2cb-d379a4e3c78f
3293 (do not change this comment) */