Update for font-backend changes.
[bpt/emacs.git] / src / search.c
CommitLineData
ca1d1d23 1/* String search routines for GNU Emacs.
429ab54e 2 Copyright (C) 1985, 1986, 1987, 1993, 1994, 1997, 1998, 1999, 2001, 2002,
8cabe764
GM
3 2003, 2004, 2005, 2006, 2007, 2008
4 Free Software Foundation, Inc.
ca1d1d23
JB
5
6This file is part of GNU Emacs.
7
8GNU Emacs is free software; you can redistribute it and/or modify
9it under the terms of the GNU General Public License as published by
684d6f5b 10the Free Software Foundation; either version 3, or (at your option)
ca1d1d23
JB
11any later version.
12
13GNU Emacs is distributed in the hope that it will be useful,
14but WITHOUT ANY WARRANTY; without even the implied warranty of
15MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16GNU General Public License for more details.
17
18You should have received a copy of the GNU General Public License
19along with GNU Emacs; see the file COPYING. If not, write to
4fc5845f
LK
20the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
21Boston, MA 02110-1301, USA. */
ca1d1d23
JB
22
23
18160b98 24#include <config.h>
ca1d1d23
JB
25#include "lisp.h"
26#include "syntax.h"
5679531d 27#include "category.h"
ca1d1d23 28#include "buffer.h"
76eb0881 29#include "character.h"
89ad6492 30#include "charset.h"
9169c321 31#include "region-cache.h"
ca1d1d23 32#include "commands.h"
9ac0d9e0 33#include "blockinput.h"
bf1760bb 34#include "intervals.h"
4746118a 35
ca1d1d23
JB
36#include <sys/types.h>
37#include "regex.h"
38
1d288aef 39#define REGEXP_CACHE_SIZE 20
ca1d1d23 40
487282dc
KH
41/* If the regexp is non-nil, then the buffer contains the compiled form
42 of that regexp, suitable for searching. */
1d288aef
RS
43struct regexp_cache
44{
487282dc 45 struct regexp_cache *next;
ecdb561e 46 Lisp_Object regexp, whitespace_regexp;
b69e3c18 47 /* Syntax table for which the regexp applies. We need this because
58e95211
SM
48 of character classes. If this is t, then the compiled pattern is valid
49 for any syntax-table. */
b69e3c18 50 Lisp_Object syntax_table;
487282dc
KH
51 struct re_pattern_buffer buf;
52 char fastmap[0400];
b819a390
RS
53 /* Nonzero means regexp was compiled to do full POSIX backtracking. */
54 char posix;
487282dc 55};
ca1d1d23 56
487282dc
KH
57/* The instances of that struct. */
58struct regexp_cache searchbufs[REGEXP_CACHE_SIZE];
ca1d1d23 59
487282dc
KH
60/* The head of the linked list; points to the most recently used buffer. */
61struct regexp_cache *searchbuf_head;
ca1d1d23 62
ca1d1d23 63
4746118a
JB
64/* Every call to re_match, etc., must pass &search_regs as the regs
65 argument unless you can show it is unnecessary (i.e., if re_match
66 is certainly going to be called again before region-around-match
67 can be called).
68
69 Since the registers are now dynamically allocated, we need to make
70 sure not to refer to the Nth register before checking that it has
1113d9db
JB
71 been allocated by checking search_regs.num_regs.
72
73 The regex code keeps track of whether it has allocated the search
487282dc
KH
74 buffer using bits in the re_pattern_buffer. This means that whenever
75 you compile a new pattern, it completely forgets whether it has
1113d9db
JB
76 allocated any registers, and will allocate new registers the next
77 time you call a searching or matching function. Therefore, we need
78 to call re_set_registers after compiling a new pattern or after
79 setting the match registers, so that the regex functions will be
80 able to free or re-allocate it properly. */
ca1d1d23
JB
81static struct re_registers search_regs;
82
daa37602
JB
83/* The buffer in which the last search was performed, or
84 Qt if the last search was done in a string;
85 Qnil if no searching has been done yet. */
86static Lisp_Object last_thing_searched;
ca1d1d23 87
8e6208c5 88/* error condition signaled when regexp compile_pattern fails */
ca1d1d23
JB
89
90Lisp_Object Qinvalid_regexp;
91
06f77a8a
KS
92/* Error condition used for failing searches */
93Lisp_Object Qsearch_failed;
94
41a33295 95Lisp_Object Vsearch_spaces_regexp;
f31a9a68 96
ef887810
RS
97/* If non-nil, the match data will not be changed during call to
98 searching or matching functions. This variable is for internal use
99 only. */
100Lisp_Object Vinhibit_changing_match_data;
101
ca325161 102static void set_search_regs ();
044f81f1 103static void save_search_regs ();
facdc750
RS
104static int simple_search ();
105static int boyer_moore ();
b819a390 106static int search_buffer ();
01d09305 107static void matcher_overflow () NO_RETURN;
b819a390 108
ca1d1d23
JB
109static void
110matcher_overflow ()
111{
112 error ("Stack overflow in regexp matcher");
113}
114
b819a390
RS
115/* Compile a regexp and signal a Lisp error if anything goes wrong.
116 PATTERN is the pattern to compile.
117 CP is the place to put the result.
facdc750 118 TRANSLATE is a translation table for ignoring case, or nil for none.
b819a390
RS
119 REGP is the structure that says where to store the "register"
120 values that will result from matching this pattern.
121 If it is 0, we should compile the pattern not to record any
122 subexpression bounds.
123 POSIX is nonzero if we want full backtracking (POSIX style)
5679531d 124 for this pattern. 0 means backtrack only enough to get a valid match.
bbc73b48
KH
125
126 The behavior also depends on Vsearch_spaces_regexp. */
ca1d1d23 127
487282dc 128static void
89ad6492 129compile_pattern_1 (cp, pattern, translate, regp, posix)
487282dc 130 struct regexp_cache *cp;
ca1d1d23 131 Lisp_Object pattern;
facdc750 132 Lisp_Object translate;
487282dc 133 struct re_registers *regp;
b819a390 134 int posix;
ca1d1d23 135{
d451e4db 136 char *val;
b819a390 137 reg_syntax_t old;
ca1d1d23 138
487282dc 139 cp->regexp = Qnil;
59fab369 140 cp->buf.translate = (! NILP (translate) ? translate : make_number (0));
b819a390 141 cp->posix = posix;
93daa011 142 cp->buf.multibyte = STRING_MULTIBYTE (pattern);
89ad6492 143 cp->buf.charset_unibyte = charset_unibyte;
d8c85250
CY
144 if (STRINGP (Vsearch_spaces_regexp))
145 cp->whitespace_regexp = Vsearch_spaces_regexp;
146 else
147 cp->whitespace_regexp = Qnil;
148
e7b2dd2e
RS
149 /* rms: I think BLOCK_INPUT is not needed here any more,
150 because regex.c defines malloc to call xmalloc.
151 Using BLOCK_INPUT here means the debugger won't run if an error occurs.
152 So let's turn it off. */
153 /* BLOCK_INPUT; */
fb4a568d 154 old = re_set_syntax (RE_SYNTAX_EMACS
b819a390 155 | (posix ? 0 : RE_NO_POSIX_BACKTRACKING));
d8c85250
CY
156
157 if (STRINGP (Vsearch_spaces_regexp))
158 re_set_whitespace_regexp (SDATA (Vsearch_spaces_regexp));
159 else
160 re_set_whitespace_regexp (NULL);
bbc73b48 161
8f924df7
KH
162 val = (char *) re_compile_pattern ((char *) SDATA (pattern),
163 SBYTES (pattern), &cp->buf);
bbc73b48 164
58e95211
SM
165 /* If the compiled pattern hard codes some of the contents of the
166 syntax-table, it can only be reused with *this* syntax table. */
167 cp->syntax_table = cp->buf.used_syntax ? current_buffer->syntax_table : Qt;
168
bbc73b48
KH
169 re_set_whitespace_regexp (NULL);
170
b819a390 171 re_set_syntax (old);
e7b2dd2e 172 /* UNBLOCK_INPUT; */
ca1d1d23 173 if (val)
06f77a8a 174 xsignal1 (Qinvalid_regexp, build_string (val));
1113d9db 175
487282dc 176 cp->regexp = Fcopy_sequence (pattern);
487282dc
KH
177}
178
6efc7887
RS
179/* Shrink each compiled regexp buffer in the cache
180 to the size actually used right now.
181 This is called from garbage collection. */
182
183void
184shrink_regexp_cache ()
185{
a968f437 186 struct regexp_cache *cp;
6efc7887
RS
187
188 for (cp = searchbuf_head; cp != 0; cp = cp->next)
189 {
190 cp->buf.allocated = cp->buf.used;
191 cp->buf.buffer
b23c0a83 192 = (unsigned char *) xrealloc (cp->buf.buffer, cp->buf.used);
6efc7887
RS
193 }
194}
195
54dd3310
SM
196/* Clear the regexp cache w.r.t. a particular syntax table,
197 because it was changed.
e5b94d44
CY
198 There is no danger of memory leak here because re_compile_pattern
199 automagically manages the memory in each re_pattern_buffer struct,
200 based on its `allocated' and `buffer' values. */
201void
202clear_regexp_cache ()
203{
204 int i;
205
206 for (i = 0; i < REGEXP_CACHE_SIZE; ++i)
54dd3310
SM
207 /* It's tempting to compare with the syntax-table we've actually changd,
208 but it's not sufficient because char-table inheritance mewans that
209 modifying one syntax-table can change others at the same time. */
210 if (!EQ (searchbufs[i].syntax_table, Qt))
211 searchbufs[i].regexp = Qnil;
e5b94d44
CY
212}
213
487282dc 214/* Compile a regexp if necessary, but first check to see if there's one in
b819a390
RS
215 the cache.
216 PATTERN is the pattern to compile.
facdc750 217 TRANSLATE is a translation table for ignoring case, or nil for none.
b819a390
RS
218 REGP is the structure that says where to store the "register"
219 values that will result from matching this pattern.
220 If it is 0, we should compile the pattern not to record any
221 subexpression bounds.
222 POSIX is nonzero if we want full backtracking (POSIX style)
223 for this pattern. 0 means backtrack only enough to get a valid match. */
487282dc
KH
224
225struct re_pattern_buffer *
0c8533c6 226compile_pattern (pattern, regp, translate, posix, multibyte)
487282dc
KH
227 Lisp_Object pattern;
228 struct re_registers *regp;
facdc750 229 Lisp_Object translate;
0c8533c6 230 int posix, multibyte;
487282dc
KH
231{
232 struct regexp_cache *cp, **cpp;
233
234 for (cpp = &searchbuf_head; ; cpp = &cp->next)
235 {
236 cp = *cpp;
f1b9c7c1
KR
237 /* Entries are initialized to nil, and may be set to nil by
238 compile_pattern_1 if the pattern isn't valid. Don't apply
49a5f770
KR
239 string accessors in those cases. However, compile_pattern_1
240 is only applied to the cache entry we pick here to reuse. So
241 nil should never appear before a non-nil entry. */
7c752c80 242 if (NILP (cp->regexp))
f1b9c7c1 243 goto compile_it;
d5db4077 244 if (SCHARS (cp->regexp) == SCHARS (pattern)
cf69b13e 245 && STRING_MULTIBYTE (cp->regexp) == STRING_MULTIBYTE (pattern)
1d288aef 246 && !NILP (Fstring_equal (cp->regexp, pattern))
59fab369 247 && EQ (cp->buf.translate, (! NILP (translate) ? translate : make_number (0)))
5679531d 248 && cp->posix == posix
58e95211
SM
249 && (EQ (cp->syntax_table, Qt)
250 || EQ (cp->syntax_table, current_buffer->syntax_table))
89ad6492
KH
251 && !NILP (Fequal (cp->whitespace_regexp, Vsearch_spaces_regexp))
252 && cp->buf.charset_unibyte == charset_unibyte)
487282dc
KH
253 break;
254
f1b9c7c1
KR
255 /* If we're at the end of the cache, compile into the nil cell
256 we found, or the last (least recently used) cell with a
257 string value. */
487282dc
KH
258 if (cp->next == 0)
259 {
f1b9c7c1 260 compile_it:
89ad6492 261 compile_pattern_1 (cp, pattern, translate, regp, posix);
487282dc
KH
262 break;
263 }
264 }
265
266 /* When we get here, cp (aka *cpp) contains the compiled pattern,
267 either because we found it in the cache or because we just compiled it.
268 Move it to the front of the queue to mark it as most recently used. */
269 *cpp = cp->next;
270 cp->next = searchbuf_head;
271 searchbuf_head = cp;
1113d9db 272
6639708c
RS
273 /* Advise the searching functions about the space we have allocated
274 for register data. */
275 if (regp)
276 re_set_registers (&cp->buf, regp, regp->num_regs, regp->start, regp->end);
277
89ad6492
KH
278 /* The compiled pattern can be used both for mulitbyte and unibyte
279 target. But, we have to tell which the pattern is used for. */
280 cp->buf.target_multibyte = multibyte;
281
487282dc 282 return &cp->buf;
ca1d1d23
JB
283}
284
ca1d1d23 285\f
b819a390
RS
286static Lisp_Object
287looking_at_1 (string, posix)
ca1d1d23 288 Lisp_Object string;
b819a390 289 int posix;
ca1d1d23
JB
290{
291 Lisp_Object val;
292 unsigned char *p1, *p2;
293 int s1, s2;
294 register int i;
487282dc 295 struct re_pattern_buffer *bufp;
ca1d1d23 296
7074fde6
FP
297 if (running_asynch_code)
298 save_search_regs ();
299
910c747a
RS
300 /* This is so set_image_of_range_1 in regex.c can find the EQV table. */
301 XCHAR_TABLE (current_buffer->case_canon_table)->extras[2]
302 = current_buffer->case_eqv_table;
303
b7826503 304 CHECK_STRING (string);
ef887810
RS
305 bufp = compile_pattern (string,
306 (NILP (Vinhibit_changing_match_data)
307 ? &search_regs : NULL),
487282dc 308 (!NILP (current_buffer->case_fold_search)
0190922f 309 ? current_buffer->case_canon_table : Qnil),
0c8533c6
RS
310 posix,
311 !NILP (current_buffer->enable_multibyte_characters));
ca1d1d23
JB
312
313 immediate_quit = 1;
314 QUIT; /* Do a pending quit right away, to avoid paradoxical behavior */
315
316 /* Get pointers and sizes of the two strings
317 that make up the visible portion of the buffer. */
318
319 p1 = BEGV_ADDR;
fa8ed3e0 320 s1 = GPT_BYTE - BEGV_BYTE;
ca1d1d23 321 p2 = GAP_END_ADDR;
fa8ed3e0 322 s2 = ZV_BYTE - GPT_BYTE;
ca1d1d23
JB
323 if (s1 < 0)
324 {
325 p2 = p1;
fa8ed3e0 326 s2 = ZV_BYTE - BEGV_BYTE;
ca1d1d23
JB
327 s1 = 0;
328 }
329 if (s2 < 0)
330 {
fa8ed3e0 331 s1 = ZV_BYTE - BEGV_BYTE;
ca1d1d23
JB
332 s2 = 0;
333 }
8bb43c28
RS
334
335 re_match_object = Qnil;
177c0ea7 336
487282dc 337 i = re_match_2 (bufp, (char *) p1, s1, (char *) p2, s2,
ef887810
RS
338 PT_BYTE - BEGV_BYTE,
339 (NILP (Vinhibit_changing_match_data)
340 ? &search_regs : NULL),
fa8ed3e0 341 ZV_BYTE - BEGV_BYTE);
de182d70 342 immediate_quit = 0;
177c0ea7 343
ca1d1d23
JB
344 if (i == -2)
345 matcher_overflow ();
346
347 val = (0 <= i ? Qt : Qnil);
ef887810 348 if (NILP (Vinhibit_changing_match_data) && i >= 0)
fa8ed3e0
RS
349 for (i = 0; i < search_regs.num_regs; i++)
350 if (search_regs.start[i] >= 0)
351 {
352 search_regs.start[i]
353 = BYTE_TO_CHAR (search_regs.start[i] + BEGV_BYTE);
354 search_regs.end[i]
355 = BYTE_TO_CHAR (search_regs.end[i] + BEGV_BYTE);
356 }
ef887810
RS
357
358 /* Set last_thing_searched only when match data is changed. */
359 if (NILP (Vinhibit_changing_match_data))
360 XSETBUFFER (last_thing_searched, current_buffer);
361
ca1d1d23
JB
362 return val;
363}
364
b819a390 365DEFUN ("looking-at", Flooking_at, Slooking_at, 1, 1, 0,
8c1a1077
PJ
366 doc: /* Return t if text after point matches regular expression REGEXP.
367This function modifies the match data that `match-beginning',
368`match-end' and `match-data' access; save and restore the match
369data if you want to preserve them. */)
370 (regexp)
94f94972 371 Lisp_Object regexp;
b819a390 372{
94f94972 373 return looking_at_1 (regexp, 0);
b819a390
RS
374}
375
376DEFUN ("posix-looking-at", Fposix_looking_at, Sposix_looking_at, 1, 1, 0,
8c1a1077
PJ
377 doc: /* Return t if text after point matches regular expression REGEXP.
378Find the longest match, in accord with Posix regular expression rules.
379This function modifies the match data that `match-beginning',
380`match-end' and `match-data' access; save and restore the match
381data if you want to preserve them. */)
382 (regexp)
94f94972 383 Lisp_Object regexp;
b819a390 384{
94f94972 385 return looking_at_1 (regexp, 1);
b819a390
RS
386}
387\f
388static Lisp_Object
389string_match_1 (regexp, string, start, posix)
ca1d1d23 390 Lisp_Object regexp, string, start;
b819a390 391 int posix;
ca1d1d23
JB
392{
393 int val;
487282dc 394 struct re_pattern_buffer *bufp;
0c8533c6
RS
395 int pos, pos_byte;
396 int i;
ca1d1d23 397
7074fde6
FP
398 if (running_asynch_code)
399 save_search_regs ();
400
b7826503
PJ
401 CHECK_STRING (regexp);
402 CHECK_STRING (string);
ca1d1d23
JB
403
404 if (NILP (start))
0c8533c6 405 pos = 0, pos_byte = 0;
ca1d1d23
JB
406 else
407 {
d5db4077 408 int len = SCHARS (string);
ca1d1d23 409
b7826503 410 CHECK_NUMBER (start);
0c8533c6
RS
411 pos = XINT (start);
412 if (pos < 0 && -pos <= len)
413 pos = len + pos;
414 else if (0 > pos || pos > len)
ca1d1d23 415 args_out_of_range (string, start);
0c8533c6 416 pos_byte = string_char_to_byte (string, pos);
ca1d1d23
JB
417 }
418
910c747a
RS
419 /* This is so set_image_of_range_1 in regex.c can find the EQV table. */
420 XCHAR_TABLE (current_buffer->case_canon_table)->extras[2]
421 = current_buffer->case_eqv_table;
422
ef887810
RS
423 bufp = compile_pattern (regexp,
424 (NILP (Vinhibit_changing_match_data)
425 ? &search_regs : NULL),
487282dc 426 (!NILP (current_buffer->case_fold_search)
0190922f 427 ? current_buffer->case_canon_table : Qnil),
0c8533c6
RS
428 posix,
429 STRING_MULTIBYTE (string));
ca1d1d23 430 immediate_quit = 1;
8bb43c28 431 re_match_object = string;
177c0ea7 432
d5db4077
KR
433 val = re_search (bufp, (char *) SDATA (string),
434 SBYTES (string), pos_byte,
435 SBYTES (string) - pos_byte,
ef887810
RS
436 (NILP (Vinhibit_changing_match_data)
437 ? &search_regs : NULL));
ca1d1d23 438 immediate_quit = 0;
ef887810
RS
439
440 /* Set last_thing_searched only when match data is changed. */
441 if (NILP (Vinhibit_changing_match_data))
442 last_thing_searched = Qt;
443
ca1d1d23
JB
444 if (val == -2)
445 matcher_overflow ();
446 if (val < 0) return Qnil;
0c8533c6 447
ef887810
RS
448 if (NILP (Vinhibit_changing_match_data))
449 for (i = 0; i < search_regs.num_regs; i++)
450 if (search_regs.start[i] >= 0)
451 {
452 search_regs.start[i]
453 = string_byte_to_char (string, search_regs.start[i]);
454 search_regs.end[i]
455 = string_byte_to_char (string, search_regs.end[i]);
456 }
0c8533c6
RS
457
458 return make_number (string_byte_to_char (string, val));
ca1d1d23 459}
e59a8453 460
b819a390 461DEFUN ("string-match", Fstring_match, Sstring_match, 2, 3, 0,
8c1a1077 462 doc: /* Return index of start of first match for REGEXP in STRING, or nil.
b85acc4b 463Matching ignores case if `case-fold-search' is non-nil.
8c1a1077
PJ
464If third arg START is non-nil, start search at that index in STRING.
465For index of first char beyond the match, do (match-end 0).
466`match-end' and `match-beginning' also give indices of substrings
2bd2f32d
RS
467matched by parenthesis constructs in the pattern.
468
469You can use the function `match-string' to extract the substrings
470matched by the parenthesis constructions in REGEXP. */)
8c1a1077 471 (regexp, string, start)
b819a390
RS
472 Lisp_Object regexp, string, start;
473{
474 return string_match_1 (regexp, string, start, 0);
475}
476
477DEFUN ("posix-string-match", Fposix_string_match, Sposix_string_match, 2, 3, 0,
8c1a1077
PJ
478 doc: /* Return index of start of first match for REGEXP in STRING, or nil.
479Find the longest match, in accord with Posix regular expression rules.
480Case is ignored if `case-fold-search' is non-nil in the current buffer.
481If third arg START is non-nil, start search at that index in STRING.
482For index of first char beyond the match, do (match-end 0).
483`match-end' and `match-beginning' also give indices of substrings
484matched by parenthesis constructs in the pattern. */)
485 (regexp, string, start)
b819a390
RS
486 Lisp_Object regexp, string, start;
487{
488 return string_match_1 (regexp, string, start, 1);
489}
490
e59a8453
RS
491/* Match REGEXP against STRING, searching all of STRING,
492 and return the index of the match, or negative on failure.
493 This does not clobber the match data. */
494
495int
496fast_string_match (regexp, string)
497 Lisp_Object regexp, string;
498{
499 int val;
487282dc 500 struct re_pattern_buffer *bufp;
e59a8453 501
facdc750
RS
502 bufp = compile_pattern (regexp, 0, Qnil,
503 0, STRING_MULTIBYTE (string));
e59a8453 504 immediate_quit = 1;
8bb43c28 505 re_match_object = string;
177c0ea7 506
d5db4077
KR
507 val = re_search (bufp, (char *) SDATA (string),
508 SBYTES (string), 0,
509 SBYTES (string), 0);
e59a8453
RS
510 immediate_quit = 0;
511 return val;
512}
5679531d
KH
513
514/* Match REGEXP against STRING, searching all of STRING ignoring case,
515 and return the index of the match, or negative on failure.
0c8533c6
RS
516 This does not clobber the match data.
517 We assume that STRING contains single-byte characters. */
5679531d
KH
518
519extern Lisp_Object Vascii_downcase_table;
520
521int
b4577c63 522fast_c_string_match_ignore_case (regexp, string)
5679531d 523 Lisp_Object regexp;
96b80561 524 const char *string;
5679531d
KH
525{
526 int val;
527 struct re_pattern_buffer *bufp;
528 int len = strlen (string);
529
0c8533c6 530 regexp = string_make_unibyte (regexp);
b4577c63 531 re_match_object = Qt;
5679531d 532 bufp = compile_pattern (regexp, 0,
0190922f 533 Vascii_canon_table, 0,
f8bd51c4 534 0);
5679531d
KH
535 immediate_quit = 1;
536 val = re_search (bufp, string, len, 0, len, 0);
537 immediate_quit = 0;
538 return val;
539}
be5f4dfb
KH
540
541/* Like fast_string_match but ignore case. */
542
543int
544fast_string_match_ignore_case (regexp, string)
545 Lisp_Object regexp, string;
546{
547 int val;
548 struct re_pattern_buffer *bufp;
549
0190922f 550 bufp = compile_pattern (regexp, 0, Vascii_canon_table,
be5f4dfb
KH
551 0, STRING_MULTIBYTE (string));
552 immediate_quit = 1;
553 re_match_object = string;
554
555 val = re_search (bufp, (char *) SDATA (string),
556 SBYTES (string), 0,
557 SBYTES (string), 0);
558 immediate_quit = 0;
559 return val;
560}
ca1d1d23 561\f
9169c321
JB
562/* The newline cache: remembering which sections of text have no newlines. */
563
564/* If the user has requested newline caching, make sure it's on.
565 Otherwise, make sure it's off.
566 This is our cheezy way of associating an action with the change of
567 state of a buffer-local variable. */
568static void
569newline_cache_on_off (buf)
570 struct buffer *buf;
571{
572 if (NILP (buf->cache_long_line_scans))
573 {
574 /* It should be off. */
575 if (buf->newline_cache)
576 {
577 free_region_cache (buf->newline_cache);
578 buf->newline_cache = 0;
579 }
580 }
581 else
582 {
583 /* It should be on. */
584 if (buf->newline_cache == 0)
585 buf->newline_cache = new_region_cache ();
586 }
587}
588
589\f
590/* Search for COUNT instances of the character TARGET between START and END.
591
592 If COUNT is positive, search forwards; END must be >= START.
593 If COUNT is negative, search backwards for the -COUNTth instance;
594 END must be <= START.
595 If COUNT is zero, do anything you please; run rogue, for all I care.
596
597 If END is zero, use BEGV or ZV instead, as appropriate for the
598 direction indicated by COUNT.
ffd56f97
JB
599
600 If we find COUNT instances, set *SHORTAGE to zero, and return the
a9f2a45f 601 position past the COUNTth match. Note that for reverse motion
5bfe95c9 602 this is not the same as the usual convention for Emacs motion commands.
ffd56f97 603
9169c321
JB
604 If we don't find COUNT instances before reaching END, set *SHORTAGE
605 to the number of TARGETs left unfound, and return END.
ffd56f97 606
087a5f81
RS
607 If ALLOW_QUIT is non-zero, set immediate_quit. That's good to do
608 except when inside redisplay. */
609
dfcf069d 610int
9169c321
JB
611scan_buffer (target, start, end, count, shortage, allow_quit)
612 register int target;
613 int start, end;
614 int count;
615 int *shortage;
087a5f81 616 int allow_quit;
ca1d1d23 617{
9169c321 618 struct region_cache *newline_cache;
177c0ea7 619 int direction;
ffd56f97 620
9169c321
JB
621 if (count > 0)
622 {
623 direction = 1;
624 if (! end) end = ZV;
625 }
626 else
627 {
628 direction = -1;
629 if (! end) end = BEGV;
630 }
ffd56f97 631
9169c321
JB
632 newline_cache_on_off (current_buffer);
633 newline_cache = current_buffer->newline_cache;
ca1d1d23
JB
634
635 if (shortage != 0)
636 *shortage = 0;
637
087a5f81 638 immediate_quit = allow_quit;
ca1d1d23 639
ffd56f97 640 if (count > 0)
9169c321 641 while (start != end)
ca1d1d23 642 {
9169c321
JB
643 /* Our innermost scanning loop is very simple; it doesn't know
644 about gaps, buffer ends, or the newline cache. ceiling is
645 the position of the last character before the next such
646 obstacle --- the last character the dumb search loop should
647 examine. */
fa8ed3e0
RS
648 int ceiling_byte = CHAR_TO_BYTE (end) - 1;
649 int start_byte = CHAR_TO_BYTE (start);
67ce527d 650 int tem;
9169c321
JB
651
652 /* If we're looking for a newline, consult the newline cache
653 to see where we can avoid some scanning. */
654 if (target == '\n' && newline_cache)
655 {
656 int next_change;
657 immediate_quit = 0;
658 while (region_cache_forward
fa8ed3e0
RS
659 (current_buffer, newline_cache, start_byte, &next_change))
660 start_byte = next_change;
cbe0db0d 661 immediate_quit = allow_quit;
9169c321 662
fa8ed3e0
RS
663 /* START should never be after END. */
664 if (start_byte > ceiling_byte)
665 start_byte = ceiling_byte;
9169c321
JB
666
667 /* Now the text after start is an unknown region, and
668 next_change is the position of the next known region. */
fa8ed3e0 669 ceiling_byte = min (next_change - 1, ceiling_byte);
9169c321
JB
670 }
671
672 /* The dumb loop can only scan text stored in contiguous
673 bytes. BUFFER_CEILING_OF returns the last character
674 position that is contiguous, so the ceiling is the
675 position after that. */
67ce527d
KH
676 tem = BUFFER_CEILING_OF (start_byte);
677 ceiling_byte = min (tem, ceiling_byte);
9169c321
JB
678
679 {
177c0ea7 680 /* The termination address of the dumb loop. */
fa8ed3e0
RS
681 register unsigned char *ceiling_addr
682 = BYTE_POS_ADDR (ceiling_byte) + 1;
683 register unsigned char *cursor
684 = BYTE_POS_ADDR (start_byte);
9169c321
JB
685 unsigned char *base = cursor;
686
687 while (cursor < ceiling_addr)
688 {
689 unsigned char *scan_start = cursor;
690
691 /* The dumb loop. */
692 while (*cursor != target && ++cursor < ceiling_addr)
693 ;
694
695 /* If we're looking for newlines, cache the fact that
696 the region from start to cursor is free of them. */
697 if (target == '\n' && newline_cache)
698 know_region_cache (current_buffer, newline_cache,
fa8ed3e0
RS
699 start_byte + scan_start - base,
700 start_byte + cursor - base);
9169c321
JB
701
702 /* Did we find the target character? */
703 if (cursor < ceiling_addr)
704 {
705 if (--count == 0)
706 {
707 immediate_quit = 0;
fa8ed3e0 708 return BYTE_TO_CHAR (start_byte + cursor - base + 1);
9169c321
JB
709 }
710 cursor++;
711 }
712 }
713
fa8ed3e0 714 start = BYTE_TO_CHAR (start_byte + cursor - base);
9169c321 715 }
ca1d1d23
JB
716 }
717 else
9169c321
JB
718 while (start > end)
719 {
720 /* The last character to check before the next obstacle. */
fa8ed3e0
RS
721 int ceiling_byte = CHAR_TO_BYTE (end);
722 int start_byte = CHAR_TO_BYTE (start);
67ce527d 723 int tem;
9169c321
JB
724
725 /* Consult the newline cache, if appropriate. */
726 if (target == '\n' && newline_cache)
727 {
728 int next_change;
729 immediate_quit = 0;
730 while (region_cache_backward
fa8ed3e0
RS
731 (current_buffer, newline_cache, start_byte, &next_change))
732 start_byte = next_change;
cbe0db0d 733 immediate_quit = allow_quit;
9169c321
JB
734
735 /* Start should never be at or before end. */
fa8ed3e0
RS
736 if (start_byte <= ceiling_byte)
737 start_byte = ceiling_byte + 1;
9169c321
JB
738
739 /* Now the text before start is an unknown region, and
740 next_change is the position of the next known region. */
fa8ed3e0 741 ceiling_byte = max (next_change, ceiling_byte);
9169c321
JB
742 }
743
744 /* Stop scanning before the gap. */
67ce527d
KH
745 tem = BUFFER_FLOOR_OF (start_byte - 1);
746 ceiling_byte = max (tem, ceiling_byte);
9169c321
JB
747
748 {
749 /* The termination address of the dumb loop. */
fa8ed3e0
RS
750 register unsigned char *ceiling_addr = BYTE_POS_ADDR (ceiling_byte);
751 register unsigned char *cursor = BYTE_POS_ADDR (start_byte - 1);
9169c321
JB
752 unsigned char *base = cursor;
753
754 while (cursor >= ceiling_addr)
755 {
756 unsigned char *scan_start = cursor;
757
758 while (*cursor != target && --cursor >= ceiling_addr)
759 ;
760
761 /* If we're looking for newlines, cache the fact that
762 the region from after the cursor to start is free of them. */
763 if (target == '\n' && newline_cache)
764 know_region_cache (current_buffer, newline_cache,
fa8ed3e0
RS
765 start_byte + cursor - base,
766 start_byte + scan_start - base);
9169c321
JB
767
768 /* Did we find the target character? */
769 if (cursor >= ceiling_addr)
770 {
771 if (++count >= 0)
772 {
773 immediate_quit = 0;
fa8ed3e0 774 return BYTE_TO_CHAR (start_byte + cursor - base);
9169c321
JB
775 }
776 cursor--;
777 }
778 }
779
fa8ed3e0 780 start = BYTE_TO_CHAR (start_byte + cursor - base);
9169c321
JB
781 }
782 }
783
ca1d1d23
JB
784 immediate_quit = 0;
785 if (shortage != 0)
ffd56f97 786 *shortage = count * direction;
9169c321 787 return start;
ca1d1d23 788}
fa8ed3e0
RS
789\f
790/* Search for COUNT instances of a line boundary, which means either a
791 newline or (if selective display enabled) a carriage return.
792 Start at START. If COUNT is negative, search backwards.
793
794 We report the resulting position by calling TEMP_SET_PT_BOTH.
795
796 If we find COUNT instances. we position after (always after,
797 even if scanning backwards) the COUNTth match, and return 0.
798
799 If we don't find COUNT instances before reaching the end of the
800 buffer (or the beginning, if scanning backwards), we return
801 the number of line boundaries left unfound, and position at
802 the limit we bumped up against.
803
804 If ALLOW_QUIT is non-zero, set immediate_quit. That's good to do
d5d57b92 805 except in special cases. */
ca1d1d23 806
63fa018d 807int
fa8ed3e0
RS
808scan_newline (start, start_byte, limit, limit_byte, count, allow_quit)
809 int start, start_byte;
810 int limit, limit_byte;
811 register int count;
812 int allow_quit;
63fa018d 813{
fa8ed3e0
RS
814 int direction = ((count > 0) ? 1 : -1);
815
816 register unsigned char *cursor;
817 unsigned char *base;
818
819 register int ceiling;
820 register unsigned char *ceiling_addr;
821
d5d57b92
RS
822 int old_immediate_quit = immediate_quit;
823
fa8ed3e0
RS
824 /* The code that follows is like scan_buffer
825 but checks for either newline or carriage return. */
826
d5d57b92
RS
827 if (allow_quit)
828 immediate_quit++;
fa8ed3e0
RS
829
830 start_byte = CHAR_TO_BYTE (start);
831
832 if (count > 0)
833 {
834 while (start_byte < limit_byte)
835 {
836 ceiling = BUFFER_CEILING_OF (start_byte);
837 ceiling = min (limit_byte - 1, ceiling);
838 ceiling_addr = BYTE_POS_ADDR (ceiling) + 1;
839 base = (cursor = BYTE_POS_ADDR (start_byte));
840 while (1)
841 {
842 while (*cursor != '\n' && ++cursor != ceiling_addr)
843 ;
844
845 if (cursor != ceiling_addr)
846 {
847 if (--count == 0)
848 {
d5d57b92 849 immediate_quit = old_immediate_quit;
fa8ed3e0
RS
850 start_byte = start_byte + cursor - base + 1;
851 start = BYTE_TO_CHAR (start_byte);
852 TEMP_SET_PT_BOTH (start, start_byte);
853 return 0;
854 }
855 else
856 if (++cursor == ceiling_addr)
857 break;
858 }
859 else
860 break;
861 }
862 start_byte += cursor - base;
863 }
864 }
865 else
866 {
fa8ed3e0
RS
867 while (start_byte > limit_byte)
868 {
869 ceiling = BUFFER_FLOOR_OF (start_byte - 1);
870 ceiling = max (limit_byte, ceiling);
871 ceiling_addr = BYTE_POS_ADDR (ceiling) - 1;
872 base = (cursor = BYTE_POS_ADDR (start_byte - 1) + 1);
873 while (1)
874 {
875 while (--cursor != ceiling_addr && *cursor != '\n')
876 ;
877
878 if (cursor != ceiling_addr)
879 {
880 if (++count == 0)
881 {
d5d57b92 882 immediate_quit = old_immediate_quit;
fa8ed3e0
RS
883 /* Return the position AFTER the match we found. */
884 start_byte = start_byte + cursor - base + 1;
885 start = BYTE_TO_CHAR (start_byte);
886 TEMP_SET_PT_BOTH (start, start_byte);
887 return 0;
888 }
889 }
890 else
891 break;
892 }
893 /* Here we add 1 to compensate for the last decrement
894 of CURSOR, which took it past the valid range. */
895 start_byte += cursor - base + 1;
896 }
897 }
898
899 TEMP_SET_PT_BOTH (limit, limit_byte);
d5d57b92 900 immediate_quit = old_immediate_quit;
fa8ed3e0
RS
901
902 return count * direction;
63fa018d
RS
903}
904
ca1d1d23 905int
fa8ed3e0 906find_next_newline_no_quit (from, cnt)
ca1d1d23
JB
907 register int from, cnt;
908{
fa8ed3e0 909 return scan_buffer ('\n', from, 0, cnt, (int *) 0, 0);
9169c321
JB
910}
911
9169c321
JB
912/* Like find_next_newline, but returns position before the newline,
913 not after, and only search up to TO. This isn't just
914 find_next_newline (...)-1, because you might hit TO. */
fa8ed3e0 915
9169c321
JB
916int
917find_before_next_newline (from, to, cnt)
cbe0db0d 918 int from, to, cnt;
9169c321
JB
919{
920 int shortage;
921 int pos = scan_buffer ('\n', from, to, cnt, &shortage, 1);
922
923 if (shortage == 0)
924 pos--;
177c0ea7 925
9169c321 926 return pos;
ca1d1d23
JB
927}
928\f
ca1d1d23
JB
929/* Subroutines of Lisp buffer search functions. */
930
931static Lisp_Object
b819a390 932search_command (string, bound, noerror, count, direction, RE, posix)
ca1d1d23
JB
933 Lisp_Object string, bound, noerror, count;
934 int direction;
935 int RE;
b819a390 936 int posix;
ca1d1d23
JB
937{
938 register int np;
9f43ad85 939 int lim, lim_byte;
ca1d1d23
JB
940 int n = direction;
941
942 if (!NILP (count))
943 {
b7826503 944 CHECK_NUMBER (count);
ca1d1d23
JB
945 n *= XINT (count);
946 }
947
b7826503 948 CHECK_STRING (string);
ca1d1d23 949 if (NILP (bound))
9f43ad85
RS
950 {
951 if (n > 0)
952 lim = ZV, lim_byte = ZV_BYTE;
953 else
954 lim = BEGV, lim_byte = BEGV_BYTE;
955 }
ca1d1d23
JB
956 else
957 {
b7826503 958 CHECK_NUMBER_COERCE_MARKER (bound);
ca1d1d23 959 lim = XINT (bound);
6ec8bbd2 960 if (n > 0 ? lim < PT : lim > PT)
ca1d1d23
JB
961 error ("Invalid search bound (wrong side of point)");
962 if (lim > ZV)
9f43ad85 963 lim = ZV, lim_byte = ZV_BYTE;
588d2fd5 964 else if (lim < BEGV)
9f43ad85 965 lim = BEGV, lim_byte = BEGV_BYTE;
588d2fd5
KH
966 else
967 lim_byte = CHAR_TO_BYTE (lim);
ca1d1d23
JB
968 }
969
910c747a
RS
970 /* This is so set_image_of_range_1 in regex.c can find the EQV table. */
971 XCHAR_TABLE (current_buffer->case_canon_table)->extras[2]
972 = current_buffer->case_eqv_table;
973
9f43ad85 974 np = search_buffer (string, PT, PT_BYTE, lim, lim_byte, n, RE,
ca1d1d23 975 (!NILP (current_buffer->case_fold_search)
facdc750 976 ? current_buffer->case_canon_table
3135e9fd 977 : Qnil),
ca1d1d23 978 (!NILP (current_buffer->case_fold_search)
facdc750 979 ? current_buffer->case_eqv_table
3135e9fd 980 : Qnil),
b819a390 981 posix);
ca1d1d23
JB
982 if (np <= 0)
983 {
984 if (NILP (noerror))
06f77a8a
KS
985 xsignal1 (Qsearch_failed, string);
986
ca1d1d23
JB
987 if (!EQ (noerror, Qt))
988 {
989 if (lim < BEGV || lim > ZV)
990 abort ();
9f43ad85 991 SET_PT_BOTH (lim, lim_byte);
a5f217b8
RS
992 return Qnil;
993#if 0 /* This would be clean, but maybe programs depend on
994 a value of nil here. */
481399bf 995 np = lim;
a5f217b8 996#endif
ca1d1d23 997 }
481399bf
RS
998 else
999 return Qnil;
ca1d1d23
JB
1000 }
1001
1002 if (np < BEGV || np > ZV)
1003 abort ();
1004
1005 SET_PT (np);
1006
1007 return make_number (np);
1008}
1009\f
fa8ed3e0
RS
1010/* Return 1 if REGEXP it matches just one constant string. */
1011
b6d6a51c
KH
1012static int
1013trivial_regexp_p (regexp)
1014 Lisp_Object regexp;
1015{
d5db4077
KR
1016 int len = SBYTES (regexp);
1017 unsigned char *s = SDATA (regexp);
b6d6a51c
KH
1018 while (--len >= 0)
1019 {
1020 switch (*s++)
1021 {
1022 case '.': case '*': case '+': case '?': case '[': case '^': case '$':
1023 return 0;
1024 case '\\':
1025 if (--len < 0)
1026 return 0;
1027 switch (*s++)
1028 {
1029 case '|': case '(': case ')': case '`': case '\'': case 'b':
1030 case 'B': case '<': case '>': case 'w': case 'W': case 's':
29f89fe7 1031 case 'S': case '=': case '{': case '}': case '_':
5679531d 1032 case 'c': case 'C': /* for categoryspec and notcategoryspec */
866f60fd 1033 case '1': case '2': case '3': case '4': case '5':
b6d6a51c
KH
1034 case '6': case '7': case '8': case '9':
1035 return 0;
1036 }
1037 }
1038 }
1039 return 1;
1040}
1041
ca325161 1042/* Search for the n'th occurrence of STRING in the current buffer,
ca1d1d23 1043 starting at position POS and stopping at position LIM,
b819a390 1044 treating STRING as a literal string if RE is false or as
ca1d1d23
JB
1045 a regular expression if RE is true.
1046
1047 If N is positive, searching is forward and LIM must be greater than POS.
1048 If N is negative, searching is backward and LIM must be less than POS.
1049
facdc750 1050 Returns -x if x occurrences remain to be found (x > 0),
ca1d1d23 1051 or else the position at the beginning of the Nth occurrence
b819a390
RS
1052 (if searching backward) or the end (if searching forward).
1053
1054 POSIX is nonzero if we want full backtracking (POSIX style)
1055 for this pattern. 0 means backtrack only enough to get a valid match. */
ca1d1d23 1056
aff2ce94
RS
1057#define TRANSLATE(out, trt, d) \
1058do \
1059 { \
1060 if (! NILP (trt)) \
1061 { \
1062 Lisp_Object temp; \
1063 temp = Faref (trt, make_number (d)); \
1064 if (INTEGERP (temp)) \
1065 out = XINT (temp); \
1066 else \
1067 out = d; \
1068 } \
1069 else \
1070 out = d; \
1071 } \
1072while (0)
facdc750 1073
ef887810
RS
1074/* Only used in search_buffer, to record the end position of the match
1075 when searching regexps and SEARCH_REGS should not be changed
1076 (i.e. Vinhibit_changing_match_data is non-nil). */
1077static struct re_registers search_regs_1;
1078
b819a390 1079static int
9f43ad85
RS
1080search_buffer (string, pos, pos_byte, lim, lim_byte, n,
1081 RE, trt, inverse_trt, posix)
ca1d1d23
JB
1082 Lisp_Object string;
1083 int pos;
9f43ad85 1084 int pos_byte;
ca1d1d23 1085 int lim;
9f43ad85 1086 int lim_byte;
ca1d1d23
JB
1087 int n;
1088 int RE;
facdc750
RS
1089 Lisp_Object trt;
1090 Lisp_Object inverse_trt;
b819a390 1091 int posix;
ca1d1d23 1092{
d5db4077
KR
1093 int len = SCHARS (string);
1094 int len_byte = SBYTES (string);
facdc750 1095 register int i;
ca1d1d23 1096
7074fde6
FP
1097 if (running_asynch_code)
1098 save_search_regs ();
1099
a7e4cdde 1100 /* Searching 0 times means don't move. */
ca1d1d23 1101 /* Null string is found at starting position. */
a7e4cdde 1102 if (len == 0 || n == 0)
ca325161 1103 {
0353b28f 1104 set_search_regs (pos_byte, 0);
ca325161
RS
1105 return pos;
1106 }
3f57a499 1107
41a33295 1108 if (RE && !(trivial_regexp_p (string) && NILP (Vsearch_spaces_regexp)))
ca1d1d23 1109 {
facdc750
RS
1110 unsigned char *p1, *p2;
1111 int s1, s2;
487282dc
KH
1112 struct re_pattern_buffer *bufp;
1113
ef887810
RS
1114 bufp = compile_pattern (string,
1115 (NILP (Vinhibit_changing_match_data)
1116 ? &search_regs : &search_regs_1),
1117 trt, posix,
0c8533c6 1118 !NILP (current_buffer->enable_multibyte_characters));
ca1d1d23 1119
ca1d1d23
JB
1120 immediate_quit = 1; /* Quit immediately if user types ^G,
1121 because letting this function finish
1122 can take too long. */
1123 QUIT; /* Do a pending quit right away,
1124 to avoid paradoxical behavior */
1125 /* Get pointers and sizes of the two strings
1126 that make up the visible portion of the buffer. */
1127
1128 p1 = BEGV_ADDR;
fa8ed3e0 1129 s1 = GPT_BYTE - BEGV_BYTE;
ca1d1d23 1130 p2 = GAP_END_ADDR;
fa8ed3e0 1131 s2 = ZV_BYTE - GPT_BYTE;
ca1d1d23
JB
1132 if (s1 < 0)
1133 {
1134 p2 = p1;
fa8ed3e0 1135 s2 = ZV_BYTE - BEGV_BYTE;
ca1d1d23
JB
1136 s1 = 0;
1137 }
1138 if (s2 < 0)
1139 {
fa8ed3e0 1140 s1 = ZV_BYTE - BEGV_BYTE;
ca1d1d23
JB
1141 s2 = 0;
1142 }
8bb43c28 1143 re_match_object = Qnil;
177c0ea7 1144
ca1d1d23
JB
1145 while (n < 0)
1146 {
42db823b 1147 int val;
487282dc 1148 val = re_search_2 (bufp, (char *) p1, s1, (char *) p2, s2,
4996330b 1149 pos_byte - BEGV_BYTE, lim_byte - pos_byte,
ef887810
RS
1150 (NILP (Vinhibit_changing_match_data)
1151 ? &search_regs : &search_regs_1),
42db823b 1152 /* Don't allow match past current point */
4996330b 1153 pos_byte - BEGV_BYTE);
ca1d1d23 1154 if (val == -2)
b6d6a51c
KH
1155 {
1156 matcher_overflow ();
1157 }
ca1d1d23
JB
1158 if (val >= 0)
1159 {
ef887810
RS
1160 if (NILP (Vinhibit_changing_match_data))
1161 {
1162 pos_byte = search_regs.start[0] + BEGV_BYTE;
1163 for (i = 0; i < search_regs.num_regs; i++)
1164 if (search_regs.start[i] >= 0)
1165 {
1166 search_regs.start[i]
1167 = BYTE_TO_CHAR (search_regs.start[i] + BEGV_BYTE);
1168 search_regs.end[i]
1169 = BYTE_TO_CHAR (search_regs.end[i] + BEGV_BYTE);
1170 }
1171 XSETBUFFER (last_thing_searched, current_buffer);
1172 /* Set pos to the new position. */
1173 pos = search_regs.start[0];
1174 }
1175 else
1176 {
1177 pos_byte = search_regs_1.start[0] + BEGV_BYTE;
1178 /* Set pos to the new position. */
1179 pos = BYTE_TO_CHAR (search_regs_1.start[0] + BEGV_BYTE);
1180 }
ca1d1d23
JB
1181 }
1182 else
1183 {
1184 immediate_quit = 0;
1185 return (n);
1186 }
1187 n++;
1188 }
1189 while (n > 0)
1190 {
42db823b 1191 int val;
487282dc 1192 val = re_search_2 (bufp, (char *) p1, s1, (char *) p2, s2,
4996330b 1193 pos_byte - BEGV_BYTE, lim_byte - pos_byte,
ef887810
RS
1194 (NILP (Vinhibit_changing_match_data)
1195 ? &search_regs : &search_regs_1),
4996330b 1196 lim_byte - BEGV_BYTE);
ca1d1d23 1197 if (val == -2)
b6d6a51c
KH
1198 {
1199 matcher_overflow ();
1200 }
ca1d1d23
JB
1201 if (val >= 0)
1202 {
ef887810
RS
1203 if (NILP (Vinhibit_changing_match_data))
1204 {
1205 pos_byte = search_regs.end[0] + BEGV_BYTE;
1206 for (i = 0; i < search_regs.num_regs; i++)
1207 if (search_regs.start[i] >= 0)
1208 {
1209 search_regs.start[i]
1210 = BYTE_TO_CHAR (search_regs.start[i] + BEGV_BYTE);
1211 search_regs.end[i]
1212 = BYTE_TO_CHAR (search_regs.end[i] + BEGV_BYTE);
1213 }
1214 XSETBUFFER (last_thing_searched, current_buffer);
1215 pos = search_regs.end[0];
1216 }
1217 else
1218 {
1219 pos_byte = search_regs_1.end[0] + BEGV_BYTE;
1220 pos = BYTE_TO_CHAR (search_regs_1.end[0] + BEGV_BYTE);
1221 }
ca1d1d23
JB
1222 }
1223 else
1224 {
1225 immediate_quit = 0;
1226 return (0 - n);
1227 }
1228 n--;
1229 }
1230 immediate_quit = 0;
1231 return (pos);
1232 }
1233 else /* non-RE case */
1234 {
facdc750
RS
1235 unsigned char *raw_pattern, *pat;
1236 int raw_pattern_size;
1237 int raw_pattern_size_byte;
1238 unsigned char *patbuf;
1239 int multibyte = !NILP (current_buffer->enable_multibyte_characters);
a967ed62 1240 unsigned char *base_pat;
49e44e81
KH
1241 /* Set to positive if we find a non-ASCII char that need
1242 translation. Otherwise set to zero later. */
1243 int char_base = -1;
040272ce 1244 int boyer_moore_ok = 1;
facdc750
RS
1245
1246 /* MULTIBYTE says whether the text to be searched is multibyte.
1247 We must convert PATTERN to match that, or we will not really
1248 find things right. */
1249
1250 if (multibyte == STRING_MULTIBYTE (string))
1251 {
d5db4077
KR
1252 raw_pattern = (unsigned char *) SDATA (string);
1253 raw_pattern_size = SCHARS (string);
1254 raw_pattern_size_byte = SBYTES (string);
facdc750
RS
1255 }
1256 else if (multibyte)
1257 {
d5db4077 1258 raw_pattern_size = SCHARS (string);
facdc750 1259 raw_pattern_size_byte
d5db4077 1260 = count_size_as_multibyte (SDATA (string),
facdc750 1261 raw_pattern_size);
7276d3d8 1262 raw_pattern = (unsigned char *) alloca (raw_pattern_size_byte + 1);
d5db4077
KR
1263 copy_text (SDATA (string), raw_pattern,
1264 SCHARS (string), 0, 1);
facdc750
RS
1265 }
1266 else
1267 {
1268 /* Converting multibyte to single-byte.
1269
1270 ??? Perhaps this conversion should be done in a special way
1271 by subtracting nonascii-insert-offset from each non-ASCII char,
1272 so that only the multibyte chars which really correspond to
1273 the chosen single-byte character set can possibly match. */
d5db4077
KR
1274 raw_pattern_size = SCHARS (string);
1275 raw_pattern_size_byte = SCHARS (string);
7276d3d8 1276 raw_pattern = (unsigned char *) alloca (raw_pattern_size + 1);
d5db4077
KR
1277 copy_text (SDATA (string), raw_pattern,
1278 SBYTES (string), 1, 0);
facdc750
RS
1279 }
1280
1281 /* Copy and optionally translate the pattern. */
1282 len = raw_pattern_size;
1283 len_byte = raw_pattern_size_byte;
f5f7578b 1284 patbuf = (unsigned char *) alloca (len * MAX_MULTIBYTE_LENGTH);
facdc750
RS
1285 pat = patbuf;
1286 base_pat = raw_pattern;
1287 if (multibyte)
1288 {
a17ae96a
KH
1289 /* Fill patbuf by translated characters in STRING while
1290 checking if we can use boyer-moore search. If TRT is
1291 non-nil, we can use boyer-moore search only if TRT can be
1292 represented by the byte array of 256 elements. For that,
1293 all non-ASCII case-equivalents of all case-senstive
1294 characters in STRING must belong to the same charset and
1295 row. */
1296
facdc750
RS
1297 while (--len >= 0)
1298 {
a17ae96a 1299 unsigned char str_base[MAX_MULTIBYTE_LENGTH], *str;
aff2ce94 1300 int c, translated, inverse;
a17ae96a 1301 int in_charlen, charlen;
facdc750
RS
1302
1303 /* If we got here and the RE flag is set, it's because we're
1304 dealing with a regexp known to be trivial, so the backslash
1305 just quotes the next character. */
1306 if (RE && *base_pat == '\\')
1307 {
1308 len--;
62221f22 1309 raw_pattern_size--;
facdc750
RS
1310 len_byte--;
1311 base_pat++;
1312 }
1313
1314 c = STRING_CHAR_AND_LENGTH (base_pat, len_byte, in_charlen);
040272ce 1315
a17ae96a 1316 if (NILP (trt))
facdc750 1317 {
a17ae96a
KH
1318 str = base_pat;
1319 charlen = in_charlen;
1320 }
1321 else
1322 {
1323 /* Translate the character. */
1324 TRANSLATE (translated, trt, c);
1325 charlen = CHAR_STRING (translated, str_base);
1326 str = str_base;
1327
1328 /* Check if C has any other case-equivalents. */
1329 TRANSLATE (inverse, inverse_trt, c);
1330 /* If so, check if we can use boyer-moore. */
1331 if (c != inverse && boyer_moore_ok)
1332 {
1333 /* Check if all equivalents belong to the same
1334 group of characters. Note that the check of C
49e44e81
KH
1335 itself is done by the last iteration. */
1336 int this_char_base = -1;
1337
1338 while (boyer_moore_ok)
a17ae96a 1339 {
49e44e81 1340 if (ASCII_BYTE_P (inverse))
a17ae96a 1341 {
49e44e81
KH
1342 if (this_char_base > 0)
1343 boyer_moore_ok = 0;
1344 else
a17ae96a 1345 {
49e44e81
KH
1346 this_char_base = 0;
1347 if (char_base < 0)
1348 char_base = this_char_base;
a17ae96a
KH
1349 }
1350 }
49e44e81
KH
1351 else if (CHAR_BYTE8_P (inverse))
1352 /* Boyer-moore search can't handle a
1353 translation of an eight-bit
1354 character. */
1355 boyer_moore_ok = 0;
1356 else if (this_char_base < 0)
1357 {
1358 this_char_base = inverse & ~0x3F;
1359 if (char_base < 0)
1360 char_base = this_char_base;
1361 else if (char_base > 0
1362 && this_char_base != char_base)
1363 boyer_moore_ok = 0;
1364 }
1365 else if ((inverse & ~0x3F) != this_char_base)
1366 boyer_moore_ok = 0;
a17ae96a
KH
1367 if (c == inverse)
1368 break;
1369 TRANSLATE (inverse, inverse_trt, inverse);
1370 }
1371 }
aff2ce94 1372 }
49e44e81
KH
1373 if (char_base < 0)
1374 char_base = 0;
facdc750
RS
1375
1376 /* Store this character into the translated pattern. */
a17ae96a
KH
1377 bcopy (str, pat, charlen);
1378 pat += charlen;
facdc750
RS
1379 base_pat += in_charlen;
1380 len_byte -= in_charlen;
1381 }
1382 }
1383 else
1384 {
040272ce 1385 /* Unibyte buffer. */
a17ae96a 1386 char_base = 0;
facdc750
RS
1387 while (--len >= 0)
1388 {
040272ce 1389 int c, translated;
facdc750
RS
1390
1391 /* If we got here and the RE flag is set, it's because we're
1392 dealing with a regexp known to be trivial, so the backslash
1393 just quotes the next character. */
1394 if (RE && *base_pat == '\\')
1395 {
1396 len--;
0190922f 1397 raw_pattern_size--;
facdc750
RS
1398 base_pat++;
1399 }
1400 c = *base_pat++;
aff2ce94 1401 TRANSLATE (translated, trt, c);
facdc750
RS
1402 *pat++ = translated;
1403 }
1404 }
1405
1406 len_byte = pat - patbuf;
1407 len = raw_pattern_size;
1408 pat = base_pat = patbuf;
1409
040272ce 1410 if (boyer_moore_ok)
facdc750 1411 return boyer_moore (n, pat, len, len_byte, trt, inverse_trt,
aff2ce94 1412 pos, pos_byte, lim, lim_byte,
a17ae96a 1413 char_base);
facdc750
RS
1414 else
1415 return simple_search (n, pat, len, len_byte, trt,
1416 pos, pos_byte, lim, lim_byte);
1417 }
1418}
1419\f
1420/* Do a simple string search N times for the string PAT,
1421 whose length is LEN/LEN_BYTE,
1422 from buffer position POS/POS_BYTE until LIM/LIM_BYTE.
1423 TRT is the translation table.
f8bd51c4 1424
facdc750
RS
1425 Return the character position where the match is found.
1426 Otherwise, if M matches remained to be found, return -M.
f8bd51c4 1427
facdc750
RS
1428 This kind of search works regardless of what is in PAT and
1429 regardless of what is in TRT. It is used in cases where
1430 boyer_moore cannot work. */
1431
1432static int
1433simple_search (n, pat, len, len_byte, trt, pos, pos_byte, lim, lim_byte)
1434 int n;
1435 unsigned char *pat;
1436 int len, len_byte;
1437 Lisp_Object trt;
1438 int pos, pos_byte;
1439 int lim, lim_byte;
1440{
1441 int multibyte = ! NILP (current_buffer->enable_multibyte_characters);
ab228c24 1442 int forward = n > 0;
49e44e81
KH
1443 /* Number of buffer bytes matched. Note that this may be different
1444 from len_byte in a multibyte buffer. */
1445 int match_byte;
facdc750
RS
1446
1447 if (lim > pos && multibyte)
1448 while (n > 0)
1449 {
1450 while (1)
f8bd51c4 1451 {
facdc750
RS
1452 /* Try matching at position POS. */
1453 int this_pos = pos;
1454 int this_pos_byte = pos_byte;
1455 int this_len = len;
1456 int this_len_byte = len_byte;
1457 unsigned char *p = pat;
a9469498 1458 if (pos + len > lim || pos_byte + len_byte > lim_byte)
facdc750
RS
1459 goto stop;
1460
1461 while (this_len > 0)
1462 {
1463 int charlen, buf_charlen;
ab228c24 1464 int pat_ch, buf_ch;
facdc750 1465
ab228c24 1466 pat_ch = STRING_CHAR_AND_LENGTH (p, this_len_byte, charlen);
facdc750
RS
1467 buf_ch = STRING_CHAR_AND_LENGTH (BYTE_POS_ADDR (this_pos_byte),
1468 ZV_BYTE - this_pos_byte,
1469 buf_charlen);
aff2ce94 1470 TRANSLATE (buf_ch, trt, buf_ch);
facdc750
RS
1471
1472 if (buf_ch != pat_ch)
1473 break;
ab228c24
RS
1474
1475 this_len_byte -= charlen;
1476 this_len--;
1477 p += charlen;
1478
1479 this_pos_byte += buf_charlen;
1480 this_pos++;
facdc750
RS
1481 }
1482
1483 if (this_len == 0)
1484 {
49e44e81 1485 match_byte = this_pos_byte - pos_byte;
facdc750 1486 pos += len;
49e44e81 1487 pos_byte += match_byte;
facdc750
RS
1488 break;
1489 }
1490
1491 INC_BOTH (pos, pos_byte);
f8bd51c4 1492 }
facdc750
RS
1493
1494 n--;
1495 }
1496 else if (lim > pos)
1497 while (n > 0)
1498 {
1499 while (1)
f8bd51c4 1500 {
facdc750
RS
1501 /* Try matching at position POS. */
1502 int this_pos = pos;
1503 int this_len = len;
1504 unsigned char *p = pat;
1505
1506 if (pos + len > lim)
1507 goto stop;
1508
1509 while (this_len > 0)
1510 {
1511 int pat_ch = *p++;
1512 int buf_ch = FETCH_BYTE (this_pos);
aff2ce94 1513 TRANSLATE (buf_ch, trt, buf_ch);
facdc750
RS
1514
1515 if (buf_ch != pat_ch)
1516 break;
ab228c24
RS
1517
1518 this_len--;
1519 this_pos++;
facdc750
RS
1520 }
1521
1522 if (this_len == 0)
1523 {
49e44e81 1524 match_byte = len;
facdc750
RS
1525 pos += len;
1526 break;
1527 }
1528
1529 pos++;
f8bd51c4 1530 }
facdc750
RS
1531
1532 n--;
1533 }
1534 /* Backwards search. */
1535 else if (lim < pos && multibyte)
1536 while (n < 0)
1537 {
1538 while (1)
f8bd51c4 1539 {
facdc750
RS
1540 /* Try matching at position POS. */
1541 int this_pos = pos - len;
3dcf0066 1542 int this_pos_byte;
facdc750
RS
1543 int this_len = len;
1544 int this_len_byte = len_byte;
1545 unsigned char *p = pat;
1546
3dcf0066 1547 if (this_pos < lim || (pos_byte - len_byte) < lim_byte)
facdc750 1548 goto stop;
3dcf0066 1549 this_pos_byte = CHAR_TO_BYTE (this_pos);
49e44e81 1550 match_byte = pos_byte - this_pos_byte;
facdc750
RS
1551
1552 while (this_len > 0)
1553 {
1554 int charlen, buf_charlen;
ab228c24 1555 int pat_ch, buf_ch;
facdc750 1556
ab228c24 1557 pat_ch = STRING_CHAR_AND_LENGTH (p, this_len_byte, charlen);
facdc750
RS
1558 buf_ch = STRING_CHAR_AND_LENGTH (BYTE_POS_ADDR (this_pos_byte),
1559 ZV_BYTE - this_pos_byte,
1560 buf_charlen);
aff2ce94 1561 TRANSLATE (buf_ch, trt, buf_ch);
facdc750
RS
1562
1563 if (buf_ch != pat_ch)
1564 break;
ab228c24
RS
1565
1566 this_len_byte -= charlen;
1567 this_len--;
1568 p += charlen;
1569 this_pos_byte += buf_charlen;
1570 this_pos++;
facdc750
RS
1571 }
1572
1573 if (this_len == 0)
1574 {
1575 pos -= len;
49e44e81 1576 pos_byte -= match_byte;
facdc750
RS
1577 break;
1578 }
1579
1580 DEC_BOTH (pos, pos_byte);
f8bd51c4
KH
1581 }
1582
facdc750
RS
1583 n++;
1584 }
1585 else if (lim < pos)
1586 while (n < 0)
1587 {
1588 while (1)
b6d6a51c 1589 {
facdc750
RS
1590 /* Try matching at position POS. */
1591 int this_pos = pos - len;
1592 int this_len = len;
1593 unsigned char *p = pat;
1594
a9469498 1595 if (this_pos < lim)
facdc750
RS
1596 goto stop;
1597
1598 while (this_len > 0)
1599 {
1600 int pat_ch = *p++;
1601 int buf_ch = FETCH_BYTE (this_pos);
aff2ce94 1602 TRANSLATE (buf_ch, trt, buf_ch);
facdc750
RS
1603
1604 if (buf_ch != pat_ch)
1605 break;
ab228c24
RS
1606 this_len--;
1607 this_pos++;
facdc750
RS
1608 }
1609
1610 if (this_len == 0)
b6d6a51c 1611 {
49e44e81 1612 match_byte = len;
facdc750
RS
1613 pos -= len;
1614 break;
b6d6a51c 1615 }
facdc750
RS
1616
1617 pos--;
b6d6a51c 1618 }
facdc750
RS
1619
1620 n++;
b6d6a51c 1621 }
facdc750
RS
1622
1623 stop:
1624 if (n == 0)
aff2ce94 1625 {
ab228c24 1626 if (forward)
49e44e81 1627 set_search_regs ((multibyte ? pos_byte : pos) - match_byte, match_byte);
ab228c24 1628 else
49e44e81 1629 set_search_regs (multibyte ? pos_byte : pos, match_byte);
aff2ce94
RS
1630
1631 return pos;
1632 }
facdc750
RS
1633 else if (n > 0)
1634 return -n;
1635 else
1636 return n;
1637}
1638\f
0190922f 1639/* Do Boyer-Moore search N times for the string BASE_PAT,
facdc750
RS
1640 whose length is LEN/LEN_BYTE,
1641 from buffer position POS/POS_BYTE until LIM/LIM_BYTE.
1642 DIRECTION says which direction we search in.
1643 TRT and INVERSE_TRT are translation tables.
0190922f 1644 Characters in PAT are already translated by TRT.
facdc750 1645
0190922f
KH
1646 This kind of search works if all the characters in BASE_PAT that
1647 have nontrivial translation are the same aside from the last byte.
1648 This makes it possible to translate just the last byte of a
1649 character, and do so after just a simple test of the context.
b2e6b10f 1650 CHAR_BASE is nonzero if there is such a non-ASCII character.
facdc750
RS
1651
1652 If that criterion is not satisfied, do not call this function. */
1653
1654static int
1655boyer_moore (n, base_pat, len, len_byte, trt, inverse_trt,
a17ae96a 1656 pos, pos_byte, lim, lim_byte, char_base)
facdc750
RS
1657 int n;
1658 unsigned char *base_pat;
1659 int len, len_byte;
1660 Lisp_Object trt;
1661 Lisp_Object inverse_trt;
1662 int pos, pos_byte;
1663 int lim, lim_byte;
a17ae96a 1664 int char_base;
facdc750
RS
1665{
1666 int direction = ((n > 0) ? 1 : -1);
1667 register int dirlen;
a968f437 1668 int infinity, limit, stride_for_teases = 0;
facdc750
RS
1669 register int *BM_tab;
1670 int *BM_tab_base;
177c0ea7 1671 register unsigned char *cursor, *p_limit;
facdc750 1672 register int i, j;
cb6792d2 1673 unsigned char *pat, *pat_end;
facdc750
RS
1674 int multibyte = ! NILP (current_buffer->enable_multibyte_characters);
1675
1676 unsigned char simple_translate[0400];
0190922f 1677 /* These are set to the preceding bytes of a byte to be translated
49e44e81 1678 if char_base is nonzero. As the maximum byte length of a
a17ae96a 1679 multibyte character is 5, we have to check at most four previous
0190922f
KH
1680 bytes. */
1681 int translate_prev_byte1 = 0;
1682 int translate_prev_byte2 = 0;
1683 int translate_prev_byte3 = 0;
a17ae96a 1684 int translate_prev_byte4 = 0;
facdc750 1685
facdc750 1686 BM_tab = (int *) alloca (0400 * sizeof (int));
e2a51aa8 1687
facdc750
RS
1688 /* The general approach is that we are going to maintain that we know */
1689 /* the first (closest to the present position, in whatever direction */
1690 /* we're searching) character that could possibly be the last */
1691 /* (furthest from present position) character of a valid match. We */
1692 /* advance the state of our knowledge by looking at that character */
1693 /* and seeing whether it indeed matches the last character of the */
1694 /* pattern. If it does, we take a closer look. If it does not, we */
1695 /* move our pointer (to putative last characters) as far as is */
1696 /* logically possible. This amount of movement, which I call a */
1697 /* stride, will be the length of the pattern if the actual character */
1698 /* appears nowhere in the pattern, otherwise it will be the distance */
1699 /* from the last occurrence of that character to the end of the */
1700 /* pattern. */
1701 /* As a coding trick, an enormous stride is coded into the table for */
1702 /* characters that match the last character. This allows use of only */
1703 /* a single test, a test for having gone past the end of the */
1704 /* permissible match region, to test for both possible matches (when */
1705 /* the stride goes past the end immediately) and failure to */
177c0ea7 1706 /* match (where you get nudged past the end one stride at a time). */
facdc750
RS
1707
1708 /* Here we make a "mickey mouse" BM table. The stride of the search */
1709 /* is determined only by the last character of the putative match. */
1710 /* If that character does not match, we will stride the proper */
1711 /* distance to propose a match that superimposes it on the last */
1712 /* instance of a character that matches it (per trt), or misses */
177c0ea7 1713 /* it entirely if there is none. */
facdc750
RS
1714
1715 dirlen = len_byte * direction;
1716 infinity = dirlen - (lim_byte + pos_byte + len_byte + len_byte) * direction;
cb6792d2
RS
1717
1718 /* Record position after the end of the pattern. */
1719 pat_end = base_pat + len_byte;
1720 /* BASE_PAT points to a character that we start scanning from.
1721 It is the first character in a forward search,
1722 the last character in a backward search. */
facdc750 1723 if (direction < 0)
cb6792d2
RS
1724 base_pat = pat_end - 1;
1725
facdc750
RS
1726 BM_tab_base = BM_tab;
1727 BM_tab += 0400;
1728 j = dirlen; /* to get it in a register */
1729 /* A character that does not appear in the pattern induces a */
1730 /* stride equal to the pattern length. */
1731 while (BM_tab_base != BM_tab)
1732 {
1733 *--BM_tab = j;
1734 *--BM_tab = j;
1735 *--BM_tab = j;
1736 *--BM_tab = j;
1737 }
1738
1739 /* We use this for translation, instead of TRT itself.
1740 We fill this in to handle the characters that actually
1741 occur in the pattern. Others don't matter anyway! */
1742 bzero (simple_translate, sizeof simple_translate);
1743 for (i = 0; i < 0400; i++)
1744 simple_translate[i] = i;
1745
a17ae96a 1746 if (char_base)
0190922f 1747 {
a17ae96a 1748 /* Setup translate_prev_byte1/2/3/4 from CHAR_BASE. Only a
0190922f 1749 byte following them are the target of translation. */
0190922f 1750 unsigned char str[MAX_MULTIBYTE_LENGTH];
a17ae96a 1751 int len = CHAR_STRING (char_base, str);
0190922f
KH
1752
1753 translate_prev_byte1 = str[len - 2];
1754 if (len > 2)
1755 {
1756 translate_prev_byte2 = str[len - 3];
1757 if (len > 3)
a17ae96a
KH
1758 {
1759 translate_prev_byte3 = str[len - 4];
1760 if (len > 4)
1761 translate_prev_byte4 = str[len - 5];
1762 }
0190922f
KH
1763 }
1764 }
1765
facdc750
RS
1766 i = 0;
1767 while (i != infinity)
1768 {
cb6792d2 1769 unsigned char *ptr = base_pat + i;
facdc750
RS
1770 i += direction;
1771 if (i == dirlen)
1772 i = infinity;
1773 if (! NILP (trt))
ca1d1d23 1774 {
49e44e81
KH
1775 /* If the byte currently looking at is the last of a
1776 character to check case-equivalents, set CH to that
1777 character. An ASCII character and a non-ASCII character
1778 matching with CHAR_BASE are to be checked. */
a17ae96a
KH
1779 int ch = -1;
1780
1781 if (ASCII_BYTE_P (*ptr) || ! multibyte)
1782 ch = *ptr;
49e44e81 1783 else if (char_base
86dc6ccb 1784 && ((pat_end - ptr) == 1 || CHAR_HEAD_P (ptr[1])))
ca1d1d23 1785 {
49e44e81
KH
1786 unsigned char *charstart = ptr - 1;
1787
1788 while (! (CHAR_HEAD_P (*charstart)))
1789 charstart--;
1790 ch = STRING_CHAR (charstart, ptr - charstart + 1);
a17ae96a
KH
1791 if (char_base != (ch & ~0x3F))
1792 ch = -1;
ca1d1d23 1793 }
facdc750 1794
f51995fa 1795 if (ch >= 0200)
49e44e81
KH
1796 j = (ch & 0x3F) | 0200;
1797 else
1798 j = *ptr;
1799
facdc750
RS
1800 if (i == infinity)
1801 stride_for_teases = BM_tab[j];
ab228c24 1802
facdc750
RS
1803 BM_tab[j] = dirlen - i;
1804 /* A translation table is accompanied by its inverse -- see */
177c0ea7 1805 /* comment following downcase_table for details */
a17ae96a 1806 if (ch >= 0)
ab228c24
RS
1807 {
1808 int starting_ch = ch;
49e44e81 1809 int starting_j = j;
a17ae96a 1810
ab228c24
RS
1811 while (1)
1812 {
1813 TRANSLATE (ch, inverse_trt, ch);
f51995fa 1814 if (ch >= 0200)
49e44e81 1815 j = (ch & 0x3F) | 0200;
ab228c24 1816 else
a17ae96a 1817 j = ch;
ab228c24
RS
1818
1819 /* For all the characters that map into CH,
1820 set up simple_translate to map the last byte
1821 into STARTING_J. */
1822 simple_translate[j] = starting_j;
1823 if (ch == starting_ch)
1824 break;
1825 BM_tab[j] = dirlen - i;
1826 }
1827 }
facdc750
RS
1828 }
1829 else
1830 {
1831 j = *ptr;
1832
1833 if (i == infinity)
1834 stride_for_teases = BM_tab[j];
1835 BM_tab[j] = dirlen - i;
ca1d1d23 1836 }
facdc750
RS
1837 /* stride_for_teases tells how much to stride if we get a */
1838 /* match on the far character but are subsequently */
1839 /* disappointed, by recording what the stride would have been */
1840 /* for that character if the last character had been */
1841 /* different. */
1842 }
1843 infinity = dirlen - infinity;
1844 pos_byte += dirlen - ((direction > 0) ? direction : 0);
1845 /* loop invariant - POS_BYTE points at where last char (first
1846 char if reverse) of pattern would align in a possible match. */
1847 while (n != 0)
1848 {
1849 int tail_end;
1850 unsigned char *tail_end_ptr;
1851
1852 /* It's been reported that some (broken) compiler thinks that
1853 Boolean expressions in an arithmetic context are unsigned.
1854 Using an explicit ?1:0 prevents this. */
1855 if ((lim_byte - pos_byte - ((direction > 0) ? 1 : 0)) * direction
1856 < 0)
1857 return (n * (0 - direction));
1858 /* First we do the part we can by pointers (maybe nothing) */
1859 QUIT;
1860 pat = base_pat;
1861 limit = pos_byte - dirlen + direction;
67ce527d
KH
1862 if (direction > 0)
1863 {
1864 limit = BUFFER_CEILING_OF (limit);
1865 /* LIMIT is now the last (not beyond-last!) value POS_BYTE
1866 can take on without hitting edge of buffer or the gap. */
1867 limit = min (limit, pos_byte + 20000);
1868 limit = min (limit, lim_byte - 1);
1869 }
1870 else
1871 {
1872 limit = BUFFER_FLOOR_OF (limit);
1873 /* LIMIT is now the last (not beyond-last!) value POS_BYTE
1874 can take on without hitting edge of buffer or the gap. */
1875 limit = max (limit, pos_byte - 20000);
1876 limit = max (limit, lim_byte);
1877 }
facdc750
RS
1878 tail_end = BUFFER_CEILING_OF (pos_byte) + 1;
1879 tail_end_ptr = BYTE_POS_ADDR (tail_end);
1880
1881 if ((limit - pos_byte) * direction > 20)
ca1d1d23 1882 {
facdc750
RS
1883 unsigned char *p2;
1884
1885 p_limit = BYTE_POS_ADDR (limit);
1886 p2 = (cursor = BYTE_POS_ADDR (pos_byte));
1887 /* In this loop, pos + cursor - p2 is the surrogate for pos */
1888 while (1) /* use one cursor setting as long as i can */
ca1d1d23 1889 {
facdc750 1890 if (direction > 0) /* worth duplicating */
ca1d1d23 1891 {
facdc750
RS
1892 /* Use signed comparison if appropriate
1893 to make cursor+infinity sure to be > p_limit.
1894 Assuming that the buffer lies in a range of addresses
1895 that are all "positive" (as ints) or all "negative",
1896 either kind of comparison will work as long
1897 as we don't step by infinity. So pick the kind
1898 that works when we do step by infinity. */
1899 if ((EMACS_INT) (p_limit + infinity) > (EMACS_INT) p_limit)
1900 while ((EMACS_INT) cursor <= (EMACS_INT) p_limit)
1901 cursor += BM_tab[*cursor];
ca1d1d23 1902 else
facdc750
RS
1903 while ((EMACS_UINT) cursor <= (EMACS_UINT) p_limit)
1904 cursor += BM_tab[*cursor];
1905 }
1906 else
1907 {
1908 if ((EMACS_INT) (p_limit + infinity) < (EMACS_INT) p_limit)
1909 while ((EMACS_INT) cursor >= (EMACS_INT) p_limit)
1910 cursor += BM_tab[*cursor];
1911 else
1912 while ((EMACS_UINT) cursor >= (EMACS_UINT) p_limit)
1913 cursor += BM_tab[*cursor];
1914 }
ca1d1d23 1915/* If you are here, cursor is beyond the end of the searched region. */
facdc750
RS
1916/* This can happen if you match on the far character of the pattern, */
1917/* because the "stride" of that character is infinity, a number able */
1918/* to throw you well beyond the end of the search. It can also */
1919/* happen if you fail to match within the permitted region and would */
1920/* otherwise try a character beyond that region */
1921 if ((cursor - p_limit) * direction <= len_byte)
1922 break; /* a small overrun is genuine */
1923 cursor -= infinity; /* large overrun = hit */
1924 i = dirlen - direction;
1925 if (! NILP (trt))
1926 {
1927 while ((i -= direction) + direction != 0)
ca1d1d23 1928 {
facdc750
RS
1929 int ch;
1930 cursor -= direction;
1931 /* Translate only the last byte of a character. */
1932 if (! multibyte
1933 || ((cursor == tail_end_ptr
1934 || CHAR_HEAD_P (cursor[1]))
1935 && (CHAR_HEAD_P (cursor[0])
0190922f
KH
1936 /* Check if this is the last byte of
1937 a translable character. */
1938 || (translate_prev_byte1 == cursor[-1]
1939 && (CHAR_HEAD_P (translate_prev_byte1)
1940 || (translate_prev_byte2 == cursor[-2]
1941 && (CHAR_HEAD_P (translate_prev_byte2)
1942 || (translate_prev_byte3 == cursor[-3]))))))))
facdc750
RS
1943 ch = simple_translate[*cursor];
1944 else
1945 ch = *cursor;
1946 if (pat[i] != ch)
1947 break;
ca1d1d23 1948 }
facdc750
RS
1949 }
1950 else
1951 {
1952 while ((i -= direction) + direction != 0)
ca1d1d23 1953 {
facdc750
RS
1954 cursor -= direction;
1955 if (pat[i] != *cursor)
1956 break;
ca1d1d23 1957 }
facdc750
RS
1958 }
1959 cursor += dirlen - i - direction; /* fix cursor */
1960 if (i + direction == 0)
1961 {
ef887810 1962 int position, start, end;
0c8533c6 1963
facdc750 1964 cursor -= direction;
1113d9db 1965
facdc750
RS
1966 position = pos_byte + cursor - p2 + ((direction > 0)
1967 ? 1 - len_byte : 0);
1968 set_search_regs (position, len_byte);
ca325161 1969
ef887810
RS
1970 if (NILP (Vinhibit_changing_match_data))
1971 {
1972 start = search_regs.start[0];
1973 end = search_regs.end[0];
1974 }
1975 else
1976 /* If Vinhibit_changing_match_data is non-nil,
1977 search_regs will not be changed. So let's
1978 compute start and end here. */
1979 {
1980 start = BYTE_TO_CHAR (position);
1981 end = BYTE_TO_CHAR (position + len_byte);
1982 }
1983
facdc750
RS
1984 if ((n -= direction) != 0)
1985 cursor += dirlen; /* to resume search */
ca1d1d23 1986 else
ef887810 1987 return direction > 0 ? end : start;
ca1d1d23 1988 }
facdc750
RS
1989 else
1990 cursor += stride_for_teases; /* <sigh> we lose - */
ca1d1d23 1991 }
facdc750
RS
1992 pos_byte += cursor - p2;
1993 }
1994 else
1995 /* Now we'll pick up a clump that has to be done the hard */
1996 /* way because it covers a discontinuity */
1997 {
1998 limit = ((direction > 0)
1999 ? BUFFER_CEILING_OF (pos_byte - dirlen + 1)
2000 : BUFFER_FLOOR_OF (pos_byte - dirlen - 1));
2001 limit = ((direction > 0)
2002 ? min (limit + len_byte, lim_byte - 1)
2003 : max (limit - len_byte, lim_byte));
2004 /* LIMIT is now the last value POS_BYTE can have
2005 and still be valid for a possible match. */
2006 while (1)
ca1d1d23 2007 {
facdc750
RS
2008 /* This loop can be coded for space rather than */
2009 /* speed because it will usually run only once. */
2010 /* (the reach is at most len + 21, and typically */
177c0ea7 2011 /* does not exceed len) */
facdc750
RS
2012 while ((limit - pos_byte) * direction >= 0)
2013 pos_byte += BM_tab[FETCH_BYTE (pos_byte)];
2014 /* now run the same tests to distinguish going off the */
2015 /* end, a match or a phony match. */
2016 if ((pos_byte - limit) * direction <= len_byte)
2017 break; /* ran off the end */
2018 /* Found what might be a match.
2019 Set POS_BYTE back to last (first if reverse) pos. */
2020 pos_byte -= infinity;
2021 i = dirlen - direction;
2022 while ((i -= direction) + direction != 0)
ca1d1d23 2023 {
facdc750
RS
2024 int ch;
2025 unsigned char *ptr;
2026 pos_byte -= direction;
2027 ptr = BYTE_POS_ADDR (pos_byte);
2028 /* Translate only the last byte of a character. */
2029 if (! multibyte
2030 || ((ptr == tail_end_ptr
2031 || CHAR_HEAD_P (ptr[1]))
2032 && (CHAR_HEAD_P (ptr[0])
0190922f
KH
2033 /* Check if this is the last byte of a
2034 translable character. */
2035 || (translate_prev_byte1 == ptr[-1]
2036 && (CHAR_HEAD_P (translate_prev_byte1)
2037 || (translate_prev_byte2 == ptr[-2]
2038 && (CHAR_HEAD_P (translate_prev_byte2)
2039 || translate_prev_byte3 == ptr[-3])))))))
facdc750
RS
2040 ch = simple_translate[*ptr];
2041 else
2042 ch = *ptr;
2043 if (pat[i] != ch)
2044 break;
2045 }
2046 /* Above loop has moved POS_BYTE part or all the way
2047 back to the first pos (last pos if reverse).
2048 Set it once again at the last (first if reverse) char. */
2049 pos_byte += dirlen - i- direction;
2050 if (i + direction == 0)
2051 {
ef887810 2052 int position, start, end;
facdc750 2053 pos_byte -= direction;
1113d9db 2054
facdc750 2055 position = pos_byte + ((direction > 0) ? 1 - len_byte : 0);
facdc750 2056 set_search_regs (position, len_byte);
ca325161 2057
ef887810
RS
2058 if (NILP (Vinhibit_changing_match_data))
2059 {
2060 start = search_regs.start[0];
2061 end = search_regs.end[0];
2062 }
2063 else
2064 /* If Vinhibit_changing_match_data is non-nil,
2065 search_regs will not be changed. So let's
2066 compute start and end here. */
2067 {
2068 start = BYTE_TO_CHAR (position);
2069 end = BYTE_TO_CHAR (position + len_byte);
2070 }
2071
facdc750
RS
2072 if ((n -= direction) != 0)
2073 pos_byte += dirlen; /* to resume search */
ca1d1d23 2074 else
ef887810 2075 return direction > 0 ? end : start;
ca1d1d23 2076 }
facdc750
RS
2077 else
2078 pos_byte += stride_for_teases;
2079 }
2080 }
2081 /* We have done one clump. Can we continue? */
2082 if ((lim_byte - pos_byte) * direction < 0)
2083 return ((0 - n) * direction);
ca1d1d23 2084 }
facdc750 2085 return BYTE_TO_CHAR (pos_byte);
ca1d1d23 2086}
ca325161 2087
fa8ed3e0 2088/* Record beginning BEG_BYTE and end BEG_BYTE + NBYTES
a7e4cdde
RS
2089 for the overall match just found in the current buffer.
2090 Also clear out the match data for registers 1 and up. */
ca325161
RS
2091
2092static void
fa8ed3e0
RS
2093set_search_regs (beg_byte, nbytes)
2094 int beg_byte, nbytes;
ca325161 2095{
a7e4cdde
RS
2096 int i;
2097
ef887810
RS
2098 if (!NILP (Vinhibit_changing_match_data))
2099 return;
2100
ca325161
RS
2101 /* Make sure we have registers in which to store
2102 the match position. */
2103 if (search_regs.num_regs == 0)
2104 {
2d4a771a
RS
2105 search_regs.start = (regoff_t *) xmalloc (2 * sizeof (regoff_t));
2106 search_regs.end = (regoff_t *) xmalloc (2 * sizeof (regoff_t));
487282dc 2107 search_regs.num_regs = 2;
ca325161
RS
2108 }
2109
a7e4cdde
RS
2110 /* Clear out the other registers. */
2111 for (i = 1; i < search_regs.num_regs; i++)
2112 {
2113 search_regs.start[i] = -1;
2114 search_regs.end[i] = -1;
2115 }
2116
fa8ed3e0
RS
2117 search_regs.start[0] = BYTE_TO_CHAR (beg_byte);
2118 search_regs.end[0] = BYTE_TO_CHAR (beg_byte + nbytes);
a3668d92 2119 XSETBUFFER (last_thing_searched, current_buffer);
ca325161 2120}
ca1d1d23
JB
2121\f
2122/* Given a string of words separated by word delimiters,
2123 compute a regexp that matches those exact words
2124 separated by arbitrary punctuation. */
2125
2126static Lisp_Object
2127wordify (string)
2128 Lisp_Object string;
2129{
2130 register unsigned char *p, *o;
0c8533c6 2131 register int i, i_byte, len, punct_count = 0, word_count = 0;
ca1d1d23 2132 Lisp_Object val;
0c8533c6
RS
2133 int prev_c = 0;
2134 int adjust;
ca1d1d23 2135
b7826503 2136 CHECK_STRING (string);
d5db4077
KR
2137 p = SDATA (string);
2138 len = SCHARS (string);
ca1d1d23 2139
0c8533c6
RS
2140 for (i = 0, i_byte = 0; i < len; )
2141 {
2142 int c;
177c0ea7 2143
93daa011 2144 FETCH_STRING_CHAR_AS_MULTIBYTE_ADVANCE (c, string, i, i_byte);
0c8533c6
RS
2145
2146 if (SYNTAX (c) != Sword)
2147 {
2148 punct_count++;
2149 if (i > 0 && SYNTAX (prev_c) == Sword)
2150 word_count++;
2151 }
ca1d1d23 2152
0c8533c6
RS
2153 prev_c = c;
2154 }
2155
2156 if (SYNTAX (prev_c) == Sword)
2157 word_count++;
2158 if (!word_count)
c60416e0 2159 return empty_unibyte_string;
0c8533c6
RS
2160
2161 adjust = - punct_count + 5 * (word_count - 1) + 4;
8a2df937
RS
2162 if (STRING_MULTIBYTE (string))
2163 val = make_uninit_multibyte_string (len + adjust,
d5db4077 2164 SBYTES (string)
8a2df937
RS
2165 + adjust);
2166 else
2167 val = make_uninit_string (len + adjust);
ca1d1d23 2168
d5db4077 2169 o = SDATA (val);
ca1d1d23
JB
2170 *o++ = '\\';
2171 *o++ = 'b';
1e9582d4 2172 prev_c = 0;
ca1d1d23 2173
1e9582d4
RS
2174 for (i = 0, i_byte = 0; i < len; )
2175 {
2176 int c;
2177 int i_byte_orig = i_byte;
177c0ea7 2178
93daa011 2179 FETCH_STRING_CHAR_AS_MULTIBYTE_ADVANCE (c, string, i, i_byte);
1e9582d4
RS
2180
2181 if (SYNTAX (c) == Sword)
2182 {
5d69fe10 2183 bcopy (SDATA (string) + i_byte_orig, o,
1e9582d4
RS
2184 i_byte - i_byte_orig);
2185 o += i_byte - i_byte_orig;
2186 }
2187 else if (i > 0 && SYNTAX (prev_c) == Sword && --word_count)
2188 {
2189 *o++ = '\\';
2190 *o++ = 'W';
2191 *o++ = '\\';
2192 *o++ = 'W';
2193 *o++ = '*';
2194 }
2195
2196 prev_c = c;
2197 }
ca1d1d23
JB
2198
2199 *o++ = '\\';
2200 *o++ = 'b';
2201
2202 return val;
2203}
2204\f
2205DEFUN ("search-backward", Fsearch_backward, Ssearch_backward, 1, 4,
8c1a1077
PJ
2206 "MSearch backward: ",
2207 doc: /* Search backward from point for STRING.
2208Set point to the beginning of the occurrence found, and return point.
2209An optional second argument bounds the search; it is a buffer position.
2210The match found must not extend before that position.
2211Optional third argument, if t, means if fail just return nil (no error).
2212 If not nil and not t, position at limit of search and return nil.
2213Optional fourth argument is repeat count--search for successive occurrences.
2214
2215Search case-sensitivity is determined by the value of the variable
2216`case-fold-search', which see.
2217
2218See also the functions `match-beginning', `match-end' and `replace-match'. */)
2219 (string, bound, noerror, count)
ca1d1d23
JB
2220 Lisp_Object string, bound, noerror, count;
2221{
b819a390 2222 return search_command (string, bound, noerror, count, -1, 0, 0);
ca1d1d23
JB
2223}
2224
6af43974 2225DEFUN ("search-forward", Fsearch_forward, Ssearch_forward, 1, 4, "MSearch: ",
8c1a1077
PJ
2226 doc: /* Search forward from point for STRING.
2227Set point to the end of the occurrence found, and return point.
2228An optional second argument bounds the search; it is a buffer position.
d6aacdcd 2229The match found must not extend after that position. A value of nil is
48740f01 2230 equivalent to (point-max).
8c1a1077
PJ
2231Optional third argument, if t, means if fail just return nil (no error).
2232 If not nil and not t, move to limit of search and return nil.
2233Optional fourth argument is repeat count--search for successive occurrences.
2234
2235Search case-sensitivity is determined by the value of the variable
2236`case-fold-search', which see.
2237
2238See also the functions `match-beginning', `match-end' and `replace-match'. */)
2239 (string, bound, noerror, count)
ca1d1d23
JB
2240 Lisp_Object string, bound, noerror, count;
2241{
b819a390 2242 return search_command (string, bound, noerror, count, 1, 0, 0);
ca1d1d23
JB
2243}
2244
2245DEFUN ("word-search-backward", Fword_search_backward, Sword_search_backward, 1, 4,
8c1a1077
PJ
2246 "sWord search backward: ",
2247 doc: /* Search backward from point for STRING, ignoring differences in punctuation.
2248Set point to the beginning of the occurrence found, and return point.
2249An optional second argument bounds the search; it is a buffer position.
2250The match found must not extend before that position.
2251Optional third argument, if t, means if fail just return nil (no error).
2252 If not nil and not t, move to limit of search and return nil.
2253Optional fourth argument is repeat count--search for successive occurrences. */)
2254 (string, bound, noerror, count)
ca1d1d23
JB
2255 Lisp_Object string, bound, noerror, count;
2256{
b819a390 2257 return search_command (wordify (string), bound, noerror, count, -1, 1, 0);
ca1d1d23
JB
2258}
2259
2260DEFUN ("word-search-forward", Fword_search_forward, Sword_search_forward, 1, 4,
8c1a1077
PJ
2261 "sWord search: ",
2262 doc: /* Search forward from point for STRING, ignoring differences in punctuation.
2263Set point to the end of the occurrence found, and return point.
2264An optional second argument bounds the search; it is a buffer position.
2265The match found must not extend after that position.
2266Optional third argument, if t, means if fail just return nil (no error).
2267 If not nil and not t, move to limit of search and return nil.
2268Optional fourth argument is repeat count--search for successive occurrences. */)
2269 (string, bound, noerror, count)
ca1d1d23
JB
2270 Lisp_Object string, bound, noerror, count;
2271{
b819a390 2272 return search_command (wordify (string), bound, noerror, count, 1, 1, 0);
ca1d1d23
JB
2273}
2274
2275DEFUN ("re-search-backward", Fre_search_backward, Sre_search_backward, 1, 4,
8c1a1077
PJ
2276 "sRE search backward: ",
2277 doc: /* Search backward from point for match for regular expression REGEXP.
2278Set point to the beginning of the match, and return point.
2279The match found is the one starting last in the buffer
2280and yet ending before the origin of the search.
2281An optional second argument bounds the search; it is a buffer position.
2282The match found must start at or after that position.
2283Optional third argument, if t, means if fail just return nil (no error).
2284 If not nil and not t, move to limit of search and return nil.
2285Optional fourth argument is repeat count--search for successive occurrences.
2286See also the functions `match-beginning', `match-end', `match-string',
2287and `replace-match'. */)
2288 (regexp, bound, noerror, count)
19c0a730 2289 Lisp_Object regexp, bound, noerror, count;
ca1d1d23 2290{
b819a390 2291 return search_command (regexp, bound, noerror, count, -1, 1, 0);
ca1d1d23
JB
2292}
2293
2294DEFUN ("re-search-forward", Fre_search_forward, Sre_search_forward, 1, 4,
8c1a1077
PJ
2295 "sRE search: ",
2296 doc: /* Search forward from point for regular expression REGEXP.
2297Set point to the end of the occurrence found, and return point.
2298An optional second argument bounds the search; it is a buffer position.
2299The match found must not extend after that position.
2300Optional third argument, if t, means if fail just return nil (no error).
2301 If not nil and not t, move to limit of search and return nil.
2302Optional fourth argument is repeat count--search for successive occurrences.
2303See also the functions `match-beginning', `match-end', `match-string',
2304and `replace-match'. */)
2305 (regexp, bound, noerror, count)
19c0a730 2306 Lisp_Object regexp, bound, noerror, count;
ca1d1d23 2307{
b819a390
RS
2308 return search_command (regexp, bound, noerror, count, 1, 1, 0);
2309}
2310
2311DEFUN ("posix-search-backward", Fposix_search_backward, Sposix_search_backward, 1, 4,
8c1a1077
PJ
2312 "sPosix search backward: ",
2313 doc: /* Search backward from point for match for regular expression REGEXP.
2314Find the longest match in accord with Posix regular expression rules.
2315Set point to the beginning of the match, and return point.
2316The match found is the one starting last in the buffer
2317and yet ending before the origin of the search.
2318An optional second argument bounds the search; it is a buffer position.
2319The match found must start at or after that position.
2320Optional third argument, if t, means if fail just return nil (no error).
2321 If not nil and not t, move to limit of search and return nil.
2322Optional fourth argument is repeat count--search for successive occurrences.
2323See also the functions `match-beginning', `match-end', `match-string',
2324and `replace-match'. */)
2325 (regexp, bound, noerror, count)
b819a390
RS
2326 Lisp_Object regexp, bound, noerror, count;
2327{
2328 return search_command (regexp, bound, noerror, count, -1, 1, 1);
2329}
2330
2331DEFUN ("posix-search-forward", Fposix_search_forward, Sposix_search_forward, 1, 4,
8c1a1077
PJ
2332 "sPosix search: ",
2333 doc: /* Search forward from point for regular expression REGEXP.
2334Find the longest match in accord with Posix regular expression rules.
2335Set point to the end of the occurrence found, and return point.
2336An optional second argument bounds the search; it is a buffer position.
2337The match found must not extend after that position.
2338Optional third argument, if t, means if fail just return nil (no error).
2339 If not nil and not t, move to limit of search and return nil.
2340Optional fourth argument is repeat count--search for successive occurrences.
2341See also the functions `match-beginning', `match-end', `match-string',
2342and `replace-match'. */)
2343 (regexp, bound, noerror, count)
b819a390
RS
2344 Lisp_Object regexp, bound, noerror, count;
2345{
2346 return search_command (regexp, bound, noerror, count, 1, 1, 1);
ca1d1d23
JB
2347}
2348\f
d7a5ad5f 2349DEFUN ("replace-match", Freplace_match, Sreplace_match, 1, 5, 0,
8c1a1077 2350 doc: /* Replace text matched by last search with NEWTEXT.
4dd0c271
RS
2351Leave point at the end of the replacement text.
2352
8c1a1077
PJ
2353If second arg FIXEDCASE is non-nil, do not alter case of replacement text.
2354Otherwise maybe capitalize the whole text, or maybe just word initials,
2355based on the replaced text.
2356If the replaced text has only capital letters
2357and has at least one multiletter word, convert NEWTEXT to all caps.
4dd0c271
RS
2358Otherwise if all words are capitalized in the replaced text,
2359capitalize each word in NEWTEXT.
8c1a1077
PJ
2360
2361If third arg LITERAL is non-nil, insert NEWTEXT literally.
2362Otherwise treat `\\' as special:
2363 `\\&' in NEWTEXT means substitute original matched text.
2364 `\\N' means substitute what matched the Nth `\\(...\\)'.
2365 If Nth parens didn't match, substitute nothing.
2366 `\\\\' means insert one `\\'.
4dd0c271
RS
2367Case conversion does not apply to these substitutions.
2368
8c1a1077 2369FIXEDCASE and LITERAL are optional arguments.
8c1a1077
PJ
2370
2371The optional fourth argument STRING can be a string to modify.
2372This is meaningful when the previous match was done against STRING,
2373using `string-match'. When used this way, `replace-match'
2374creates and returns a new string made by copying STRING and replacing
2375the part of STRING that was matched.
2376
2377The optional fifth argument SUBEXP specifies a subexpression;
2378it says to replace just that subexpression with NEWTEXT,
2379rather than replacing the entire matched text.
2380This is, in a vague sense, the inverse of using `\\N' in NEWTEXT;
2381`\\N' copies subexp N into NEWTEXT, but using N as SUBEXP puts
2382NEWTEXT in place of subexp N.
2383This is useful only after a regular expression search or match,
2384since only regular expressions have distinguished subexpressions. */)
2385 (newtext, fixedcase, literal, string, subexp)
d7a5ad5f 2386 Lisp_Object newtext, fixedcase, literal, string, subexp;
ca1d1d23
JB
2387{
2388 enum { nochange, all_caps, cap_initial } case_action;
ac3b28b1 2389 register int pos, pos_byte;
ca1d1d23 2390 int some_multiletter_word;
97832bd0 2391 int some_lowercase;
73dc8771 2392 int some_uppercase;
208767c3 2393 int some_nonuppercase_initial;
ca1d1d23 2394 register int c, prevc;
d7a5ad5f 2395 int sub;
3e18eecf 2396 int opoint, newpoint;
ca1d1d23 2397
b7826503 2398 CHECK_STRING (newtext);
ca1d1d23 2399
080c45fd 2400 if (! NILP (string))
b7826503 2401 CHECK_STRING (string);
080c45fd 2402
ca1d1d23
JB
2403 case_action = nochange; /* We tried an initialization */
2404 /* but some C compilers blew it */
4746118a
JB
2405
2406 if (search_regs.num_regs <= 0)
d72cdbfc 2407 error ("`replace-match' called before any match found");
4746118a 2408
d7a5ad5f
RS
2409 if (NILP (subexp))
2410 sub = 0;
2411 else
2412 {
b7826503 2413 CHECK_NUMBER (subexp);
d7a5ad5f
RS
2414 sub = XINT (subexp);
2415 if (sub < 0 || sub >= search_regs.num_regs)
2416 args_out_of_range (subexp, make_number (search_regs.num_regs));
2417 }
2418
080c45fd
RS
2419 if (NILP (string))
2420 {
d7a5ad5f
RS
2421 if (search_regs.start[sub] < BEGV
2422 || search_regs.start[sub] > search_regs.end[sub]
2423 || search_regs.end[sub] > ZV)
2424 args_out_of_range (make_number (search_regs.start[sub]),
2425 make_number (search_regs.end[sub]));
080c45fd
RS
2426 }
2427 else
2428 {
d7a5ad5f
RS
2429 if (search_regs.start[sub] < 0
2430 || search_regs.start[sub] > search_regs.end[sub]
d5db4077 2431 || search_regs.end[sub] > SCHARS (string))
d7a5ad5f
RS
2432 args_out_of_range (make_number (search_regs.start[sub]),
2433 make_number (search_regs.end[sub]));
080c45fd 2434 }
ca1d1d23
JB
2435
2436 if (NILP (fixedcase))
2437 {
2438 /* Decide how to casify by examining the matched text. */
ac3b28b1 2439 int last;
ca1d1d23 2440
ac3b28b1
KH
2441 pos = search_regs.start[sub];
2442 last = search_regs.end[sub];
fa8ed3e0
RS
2443
2444 if (NILP (string))
ac3b28b1 2445 pos_byte = CHAR_TO_BYTE (pos);
fa8ed3e0 2446 else
ac3b28b1 2447 pos_byte = string_char_to_byte (string, pos);
fa8ed3e0 2448
ca1d1d23
JB
2449 prevc = '\n';
2450 case_action = all_caps;
2451
2452 /* some_multiletter_word is set nonzero if any original word
2453 is more than one letter long. */
2454 some_multiletter_word = 0;
97832bd0 2455 some_lowercase = 0;
208767c3 2456 some_nonuppercase_initial = 0;
73dc8771 2457 some_uppercase = 0;
ca1d1d23 2458
ac3b28b1 2459 while (pos < last)
ca1d1d23 2460 {
080c45fd 2461 if (NILP (string))
ac3b28b1 2462 {
93daa011 2463 c = FETCH_CHAR_AS_MULTIBYTE (pos_byte);
ac3b28b1
KH
2464 INC_BOTH (pos, pos_byte);
2465 }
080c45fd 2466 else
93daa011 2467 FETCH_STRING_CHAR_AS_MULTIBYTE_ADVANCE (c, string, pos, pos_byte);
080c45fd 2468
ca1d1d23
JB
2469 if (LOWERCASEP (c))
2470 {
2471 /* Cannot be all caps if any original char is lower case */
2472
97832bd0 2473 some_lowercase = 1;
ca1d1d23 2474 if (SYNTAX (prevc) != Sword)
208767c3 2475 some_nonuppercase_initial = 1;
ca1d1d23
JB
2476 else
2477 some_multiletter_word = 1;
2478 }
d16c2b66 2479 else if (UPPERCASEP (c))
ca1d1d23 2480 {
73dc8771 2481 some_uppercase = 1;
97832bd0 2482 if (SYNTAX (prevc) != Sword)
c4d460ce 2483 ;
97832bd0 2484 else
ca1d1d23
JB
2485 some_multiletter_word = 1;
2486 }
208767c3
RS
2487 else
2488 {
2489 /* If the initial is a caseless word constituent,
2490 treat that like a lowercase initial. */
2491 if (SYNTAX (prevc) != Sword)
2492 some_nonuppercase_initial = 1;
2493 }
ca1d1d23
JB
2494
2495 prevc = c;
2496 }
2497
97832bd0
RS
2498 /* Convert to all caps if the old text is all caps
2499 and has at least one multiletter word. */
2500 if (! some_lowercase && some_multiletter_word)
2501 case_action = all_caps;
c4d460ce 2502 /* Capitalize each word, if the old text has all capitalized words. */
208767c3 2503 else if (!some_nonuppercase_initial && some_multiletter_word)
ca1d1d23 2504 case_action = cap_initial;
208767c3 2505 else if (!some_nonuppercase_initial && some_uppercase)
73dc8771
KH
2506 /* Should x -> yz, operating on X, give Yz or YZ?
2507 We'll assume the latter. */
2508 case_action = all_caps;
97832bd0
RS
2509 else
2510 case_action = nochange;
ca1d1d23
JB
2511 }
2512
080c45fd
RS
2513 /* Do replacement in a string. */
2514 if (!NILP (string))
2515 {
2516 Lisp_Object before, after;
2517
2518 before = Fsubstring (string, make_number (0),
d7a5ad5f
RS
2519 make_number (search_regs.start[sub]));
2520 after = Fsubstring (string, make_number (search_regs.end[sub]), Qnil);
080c45fd 2521
636a5e28
RS
2522 /* Substitute parts of the match into NEWTEXT
2523 if desired. */
080c45fd
RS
2524 if (NILP (literal))
2525 {
d131e79c
RS
2526 int lastpos = 0;
2527 int lastpos_byte = 0;
080c45fd
RS
2528 /* We build up the substituted string in ACCUM. */
2529 Lisp_Object accum;
2530 Lisp_Object middle;
d5db4077 2531 int length = SBYTES (newtext);
080c45fd
RS
2532
2533 accum = Qnil;
2534
ac3b28b1 2535 for (pos_byte = 0, pos = 0; pos_byte < length;)
080c45fd
RS
2536 {
2537 int substart = -1;
6bbd7a29 2538 int subend = 0;
1e79ec24 2539 int delbackslash = 0;
080c45fd 2540
0c8533c6
RS
2541 FETCH_STRING_CHAR_ADVANCE (c, newtext, pos, pos_byte);
2542
080c45fd
RS
2543 if (c == '\\')
2544 {
0c8533c6 2545 FETCH_STRING_CHAR_ADVANCE (c, newtext, pos, pos_byte);
177c0ea7 2546
080c45fd
RS
2547 if (c == '&')
2548 {
d7a5ad5f
RS
2549 substart = search_regs.start[sub];
2550 subend = search_regs.end[sub];
080c45fd 2551 }
76fc1ea2 2552 else if (c >= '1' && c <= '9')
080c45fd 2553 {
76fc1ea2
KH
2554 if (search_regs.start[c - '0'] >= 0
2555 && c <= search_regs.num_regs + '0')
080c45fd
RS
2556 {
2557 substart = search_regs.start[c - '0'];
2558 subend = search_regs.end[c - '0'];
2559 }
76fc1ea2
KH
2560 else
2561 {
2562 /* If that subexp did not match,
2563 replace \\N with nothing. */
2564 substart = 0;
2565 subend = 0;
2566 }
080c45fd 2567 }
1e79ec24
KH
2568 else if (c == '\\')
2569 delbackslash = 1;
636a5e28
RS
2570 else
2571 error ("Invalid use of `\\' in replacement text");
080c45fd
RS
2572 }
2573 if (substart >= 0)
2574 {
d131e79c
RS
2575 if (pos - 2 != lastpos)
2576 middle = substring_both (newtext, lastpos,
2577 lastpos_byte,
2578 pos - 2, pos_byte - 2);
080c45fd
RS
2579 else
2580 middle = Qnil;
2581 accum = concat3 (accum, middle,
0c8533c6
RS
2582 Fsubstring (string,
2583 make_number (substart),
080c45fd
RS
2584 make_number (subend)));
2585 lastpos = pos;
0c8533c6 2586 lastpos_byte = pos_byte;
080c45fd 2587 }
1e79ec24
KH
2588 else if (delbackslash)
2589 {
d131e79c
RS
2590 middle = substring_both (newtext, lastpos,
2591 lastpos_byte,
2592 pos - 1, pos_byte - 1);
0c8533c6 2593
1e79ec24
KH
2594 accum = concat2 (accum, middle);
2595 lastpos = pos;
0c8533c6 2596 lastpos_byte = pos_byte;
1e79ec24 2597 }
080c45fd
RS
2598 }
2599
d131e79c
RS
2600 if (pos != lastpos)
2601 middle = substring_both (newtext, lastpos,
2602 lastpos_byte,
0c8533c6 2603 pos, pos_byte);
080c45fd
RS
2604 else
2605 middle = Qnil;
2606
2607 newtext = concat2 (accum, middle);
2608 }
2609
636a5e28 2610 /* Do case substitution in NEWTEXT if desired. */
080c45fd
RS
2611 if (case_action == all_caps)
2612 newtext = Fupcase (newtext);
2613 else if (case_action == cap_initial)
2b2eead9 2614 newtext = Fupcase_initials (newtext);
080c45fd
RS
2615
2616 return concat3 (before, newtext, after);
2617 }
2618
09c4719e 2619 /* Record point, then move (quietly) to the start of the match. */
9160906f 2620 if (PT >= search_regs.end[sub])
b0eba991 2621 opoint = PT - ZV;
9160906f
RS
2622 else if (PT > search_regs.start[sub])
2623 opoint = search_regs.end[sub] - ZV;
b0eba991
RS
2624 else
2625 opoint = PT;
2626
886ed6ec
RS
2627 /* If we want non-literal replacement,
2628 perform substitution on the replacement string. */
2629 if (NILP (literal))
ca1d1d23 2630 {
d5db4077 2631 int length = SBYTES (newtext);
68e69fbd
RS
2632 unsigned char *substed;
2633 int substed_alloc_size, substed_len;
3bc25e52
KH
2634 int buf_multibyte = !NILP (current_buffer->enable_multibyte_characters);
2635 int str_multibyte = STRING_MULTIBYTE (newtext);
2636 Lisp_Object rev_tbl;
886ed6ec 2637 int really_changed = 0;
3bc25e52 2638
8f924df7 2639 rev_tbl = Qnil;
ac3b28b1 2640
68e69fbd
RS
2641 substed_alloc_size = length * 2 + 100;
2642 substed = (unsigned char *) xmalloc (substed_alloc_size + 1);
2643 substed_len = 0;
2644
3bc25e52
KH
2645 /* Go thru NEWTEXT, producing the actual text to insert in
2646 SUBSTED while adjusting multibyteness to that of the current
2647 buffer. */
ca1d1d23 2648
ac3b28b1 2649 for (pos_byte = 0, pos = 0; pos_byte < length;)
ca1d1d23 2650 {
68e69fbd 2651 unsigned char str[MAX_MULTIBYTE_LENGTH];
f8ce8a0d
GM
2652 unsigned char *add_stuff = NULL;
2653 int add_len = 0;
68e69fbd 2654 int idx = -1;
9a76659d 2655
3bc25e52
KH
2656 if (str_multibyte)
2657 {
eb99a8dd 2658 FETCH_STRING_CHAR_ADVANCE_NO_CHECK (c, newtext, pos, pos_byte);
3bc25e52
KH
2659 if (!buf_multibyte)
2660 c = multibyte_char_to_unibyte (c, rev_tbl);
2661 }
2662 else
2663 {
2664 /* Note that we don't have to increment POS. */
5d69fe10 2665 c = SREF (newtext, pos_byte++);
3bc25e52
KH
2666 if (buf_multibyte)
2667 c = unibyte_char_to_multibyte (c);
2668 }
ac3b28b1 2669
68e69fbd
RS
2670 /* Either set ADD_STUFF and ADD_LEN to the text to put in SUBSTED,
2671 or set IDX to a match index, which means put that part
2672 of the buffer text into SUBSTED. */
2673
ca1d1d23
JB
2674 if (c == '\\')
2675 {
886ed6ec
RS
2676 really_changed = 1;
2677
3bc25e52
KH
2678 if (str_multibyte)
2679 {
eb99a8dd
KH
2680 FETCH_STRING_CHAR_ADVANCE_NO_CHECK (c, newtext,
2681 pos, pos_byte);
071ce769 2682 if (!buf_multibyte && !ASCII_CHAR_P (c))
3bc25e52
KH
2683 c = multibyte_char_to_unibyte (c, rev_tbl);
2684 }
2685 else
2686 {
d5db4077 2687 c = SREF (newtext, pos_byte++);
3bc25e52
KH
2688 if (buf_multibyte)
2689 c = unibyte_char_to_multibyte (c);
2690 }
2691
ca1d1d23 2692 if (c == '&')
68e69fbd 2693 idx = sub;
78445046 2694 else if (c >= '1' && c <= '9' && c <= search_regs.num_regs + '0')
ca1d1d23
JB
2695 {
2696 if (search_regs.start[c - '0'] >= 1)
68e69fbd 2697 idx = c - '0';
ca1d1d23 2698 }
636a5e28 2699 else if (c == '\\')
68e69fbd 2700 add_len = 1, add_stuff = "\\";
636a5e28 2701 else
3bc25e52
KH
2702 {
2703 xfree (substed);
2704 error ("Invalid use of `\\' in replacement text");
2705 }
ca1d1d23
JB
2706 }
2707 else
68e69fbd
RS
2708 {
2709 add_len = CHAR_STRING (c, str);
2710 add_stuff = str;
2711 }
2712
2713 /* If we want to copy part of a previous match,
2714 set up ADD_STUFF and ADD_LEN to point to it. */
2715 if (idx >= 0)
2716 {
2717 int begbyte = CHAR_TO_BYTE (search_regs.start[idx]);
2718 add_len = CHAR_TO_BYTE (search_regs.end[idx]) - begbyte;
2719 if (search_regs.start[idx] < GPT && GPT < search_regs.end[idx])
2720 move_gap (search_regs.start[idx]);
2721 add_stuff = BYTE_POS_ADDR (begbyte);
2722 }
2723
2724 /* Now the stuff we want to add to SUBSTED
2725 is invariably ADD_LEN bytes starting at ADD_STUFF. */
2726
2727 /* Make sure SUBSTED is big enough. */
2728 if (substed_len + add_len >= substed_alloc_size)
2729 {
2730 substed_alloc_size = substed_len + add_len + 500;
2731 substed = (unsigned char *) xrealloc (substed,
2732 substed_alloc_size + 1);
2733 }
2734
2735 /* Now add to the end of SUBSTED. */
f8ce8a0d
GM
2736 if (add_stuff)
2737 {
2738 bcopy (add_stuff, substed + substed_len, add_len);
2739 substed_len += add_len;
2740 }
ca1d1d23 2741 }
68e69fbd 2742
886ed6ec 2743 if (really_changed)
76fc1ea2
KH
2744 {
2745 if (buf_multibyte)
2746 {
2747 int nchars = multibyte_chars_in_text (substed, substed_len);
68e69fbd 2748
76fc1ea2
KH
2749 newtext = make_multibyte_string (substed, nchars, substed_len);
2750 }
2751 else
2752 newtext = make_unibyte_string (substed, substed_len);
2753 }
68e69fbd 2754 xfree (substed);
ca1d1d23
JB
2755 }
2756
886ed6ec
RS
2757 /* Replace the old text with the new in the cleanest possible way. */
2758 replace_range (search_regs.start[sub], search_regs.end[sub],
2759 newtext, 1, 0, 1);
d5db4077 2760 newpoint = search_regs.start[sub] + SCHARS (newtext);
ca1d1d23
JB
2761
2762 if (case_action == all_caps)
886ed6ec
RS
2763 Fupcase_region (make_number (search_regs.start[sub]),
2764 make_number (newpoint));
ca1d1d23 2765 else if (case_action == cap_initial)
886ed6ec
RS
2766 Fupcase_initials_region (make_number (search_regs.start[sub]),
2767 make_number (newpoint));
3e18eecf 2768
98e942e0
RS
2769 /* Adjust search data for this change. */
2770 {
5b88a2c5 2771 int oldend = search_regs.end[sub];
41c01205 2772 int oldstart = search_regs.start[sub];
98e942e0
RS
2773 int change = newpoint - search_regs.end[sub];
2774 int i;
2775
2776 for (i = 0; i < search_regs.num_regs; i++)
2777 {
41c01205 2778 if (search_regs.start[i] >= oldend)
98e942e0 2779 search_regs.start[i] += change;
41c01205
DK
2780 else if (search_regs.start[i] > oldstart)
2781 search_regs.start[i] = oldstart;
2782 if (search_regs.end[i] >= oldend)
98e942e0 2783 search_regs.end[i] += change;
41c01205
DK
2784 else if (search_regs.end[i] > oldstart)
2785 search_regs.end[i] = oldstart;
98e942e0
RS
2786 }
2787 }
2788
b0eba991 2789 /* Put point back where it was in the text. */
8d808a65 2790 if (opoint <= 0)
fa8ed3e0 2791 TEMP_SET_PT (opoint + ZV);
b0eba991 2792 else
fa8ed3e0 2793 TEMP_SET_PT (opoint);
b0eba991
RS
2794
2795 /* Now move point "officially" to the start of the inserted replacement. */
3e18eecf 2796 move_if_not_intangible (newpoint);
177c0ea7 2797
ca1d1d23
JB
2798 return Qnil;
2799}
2800\f
2801static Lisp_Object
2802match_limit (num, beginningp)
2803 Lisp_Object num;
2804 int beginningp;
2805{
2806 register int n;
2807
b7826503 2808 CHECK_NUMBER (num);
ca1d1d23 2809 n = XINT (num);
f90a5bf5 2810 if (n < 0)
bd2cbd56 2811 args_out_of_range (num, make_number (0));
f90a5bf5
RS
2812 if (search_regs.num_regs <= 0)
2813 error ("No match data, because no search succeeded");
9b9ceb61 2814 if (n >= search_regs.num_regs
4746118a 2815 || search_regs.start[n] < 0)
ca1d1d23
JB
2816 return Qnil;
2817 return (make_number ((beginningp) ? search_regs.start[n]
2818 : search_regs.end[n]));
2819}
2820
2821DEFUN ("match-beginning", Fmatch_beginning, Smatch_beginning, 1, 1, 0,
8c1a1077
PJ
2822 doc: /* Return position of start of text matched by last search.
2823SUBEXP, a number, specifies which parenthesized expression in the last
2824 regexp.
2825Value is nil if SUBEXPth pair didn't match, or there were less than
2826 SUBEXP pairs.
2827Zero means the entire text matched by the whole regexp or whole string. */)
2828 (subexp)
5806161b 2829 Lisp_Object subexp;
ca1d1d23 2830{
5806161b 2831 return match_limit (subexp, 1);
ca1d1d23
JB
2832}
2833
2834DEFUN ("match-end", Fmatch_end, Smatch_end, 1, 1, 0,
8c1a1077
PJ
2835 doc: /* Return position of end of text matched by last search.
2836SUBEXP, a number, specifies which parenthesized expression in the last
2837 regexp.
2838Value is nil if SUBEXPth pair didn't match, or there were less than
2839 SUBEXP pairs.
2840Zero means the entire text matched by the whole regexp or whole string. */)
2841 (subexp)
5806161b 2842 Lisp_Object subexp;
ca1d1d23 2843{
5806161b 2844 return match_limit (subexp, 0);
177c0ea7 2845}
ca1d1d23 2846
abd0071c 2847DEFUN ("match-data", Fmatch_data, Smatch_data, 0, 3, 0,
8c1a1077
PJ
2848 doc: /* Return a list containing all info on what the last search matched.
2849Element 2N is `(match-beginning N)'; element 2N + 1 is `(match-end N)'.
2850All the elements are markers or nil (nil if the Nth pair didn't match)
2851if the last match was on a buffer; integers or nil if a string was matched.
2852Use `store-match-data' to reinstate the data in this list.
2853
41c01205
DK
2854If INTEGERS (the optional first argument) is non-nil, always use
2855integers \(rather than markers) to represent buffer positions. In
2856this case, and if the last match was in a buffer, the buffer will get
2857stored as one additional element at the end of the list.
2858
abd0071c
KS
2859If REUSE is a list, reuse it as part of the value. If REUSE is long
2860enough to hold all the values, and if INTEGERS is non-nil, no consing
2861is done.
2862
2863If optional third arg RESEAT is non-nil, any previous markers on the
2864REUSE list will be modified to point to nowhere.
2865
140a6b7e 2866Return value is undefined if the last search failed. */)
abd0071c
KS
2867 (integers, reuse, reseat)
2868 Lisp_Object integers, reuse, reseat;
ca1d1d23 2869{
56256c2a 2870 Lisp_Object tail, prev;
4746118a 2871 Lisp_Object *data;
ca1d1d23
JB
2872 int i, len;
2873
abd0071c
KS
2874 if (!NILP (reseat))
2875 for (tail = reuse; CONSP (tail); tail = XCDR (tail))
2876 if (MARKERP (XCAR (tail)))
2877 {
51f10faa 2878 unchain_marker (XMARKER (XCAR (tail)));
abd0071c
KS
2879 XSETCAR (tail, Qnil);
2880 }
2881
daa37602 2882 if (NILP (last_thing_searched))
c36bcf1b 2883 return Qnil;
daa37602 2884
6bbd7a29
GM
2885 prev = Qnil;
2886
41c01205 2887 data = (Lisp_Object *) alloca ((2 * search_regs.num_regs + 1)
4746118a
JB
2888 * sizeof (Lisp_Object));
2889
41c01205 2890 len = 0;
4746118a 2891 for (i = 0; i < search_regs.num_regs; i++)
ca1d1d23
JB
2892 {
2893 int start = search_regs.start[i];
2894 if (start >= 0)
2895 {
56256c2a
RS
2896 if (EQ (last_thing_searched, Qt)
2897 || ! NILP (integers))
ca1d1d23 2898 {
c235cce7
KH
2899 XSETFASTINT (data[2 * i], start);
2900 XSETFASTINT (data[2 * i + 1], search_regs.end[i]);
ca1d1d23 2901 }
0ed62dc7 2902 else if (BUFFERP (last_thing_searched))
ca1d1d23
JB
2903 {
2904 data[2 * i] = Fmake_marker ();
daa37602
JB
2905 Fset_marker (data[2 * i],
2906 make_number (start),
2907 last_thing_searched);
ca1d1d23
JB
2908 data[2 * i + 1] = Fmake_marker ();
2909 Fset_marker (data[2 * i + 1],
177c0ea7 2910 make_number (search_regs.end[i]),
daa37602 2911 last_thing_searched);
ca1d1d23 2912 }
daa37602
JB
2913 else
2914 /* last_thing_searched must always be Qt, a buffer, or Qnil. */
2915 abort ();
2916
abd0071c 2917 len = 2 * i + 2;
ca1d1d23
JB
2918 }
2919 else
abd0071c 2920 data[2 * i] = data[2 * i + 1] = Qnil;
ca1d1d23 2921 }
56256c2a 2922
bd2cbd56 2923 if (BUFFERP (last_thing_searched) && !NILP (integers))
41c01205 2924 {
bd2cbd56 2925 data[len] = last_thing_searched;
41c01205
DK
2926 len++;
2927 }
2928
56256c2a
RS
2929 /* If REUSE is not usable, cons up the values and return them. */
2930 if (! CONSP (reuse))
41c01205 2931 return Flist (len, data);
56256c2a
RS
2932
2933 /* If REUSE is a list, store as many value elements as will fit
2934 into the elements of REUSE. */
2935 for (i = 0, tail = reuse; CONSP (tail);
c1d497be 2936 i++, tail = XCDR (tail))
56256c2a 2937 {
41c01205 2938 if (i < len)
f3fbd155 2939 XSETCAR (tail, data[i]);
56256c2a 2940 else
f3fbd155 2941 XSETCAR (tail, Qnil);
56256c2a
RS
2942 prev = tail;
2943 }
2944
2945 /* If we couldn't fit all value elements into REUSE,
2946 cons up the rest of them and add them to the end of REUSE. */
41c01205
DK
2947 if (i < len)
2948 XSETCDR (prev, Flist (len - i, data + i));
56256c2a
RS
2949
2950 return reuse;
ca1d1d23
JB
2951}
2952
b51d6c92
SM
2953/* We used to have an internal use variant of `reseat' described as:
2954
2955 If RESEAT is `evaporate', put the markers back on the free list
2956 immediately. No other references to the markers must exist in this
2957 case, so it is used only internally on the unwind stack and
2958 save-match-data from Lisp.
2959
2960 But it was ill-conceived: those supposedly-internal markers get exposed via
2961 the undo-list, so freeing them here is unsafe. */
ca1d1d23 2962
abd0071c 2963DEFUN ("set-match-data", Fset_match_data, Sset_match_data, 1, 2, 0,
8c1a1077 2964 doc: /* Set internal data on last search match from elements of LIST.
abd0071c
KS
2965LIST should have been created by calling `match-data' previously.
2966
51f10faa 2967If optional arg RESEAT is non-nil, make markers on LIST point nowhere. */)
abd0071c
KS
2968 (list, reseat)
2969 register Lisp_Object list, reseat;
ca1d1d23
JB
2970{
2971 register int i;
2972 register Lisp_Object marker;
2973
7074fde6
FP
2974 if (running_asynch_code)
2975 save_search_regs ();
2976
29100cea 2977 CHECK_LIST (list);
ca1d1d23 2978
41c01205
DK
2979 /* Unless we find a marker with a buffer or an explicit buffer
2980 in LIST, assume that this match data came from a string. */
daa37602
JB
2981 last_thing_searched = Qt;
2982
4746118a
JB
2983 /* Allocate registers if they don't already exist. */
2984 {
d084e942 2985 int length = XFASTINT (Flength (list)) / 2;
4746118a
JB
2986
2987 if (length > search_regs.num_regs)
2988 {
1113d9db
JB
2989 if (search_regs.num_regs == 0)
2990 {
2991 search_regs.start
2992 = (regoff_t *) xmalloc (length * sizeof (regoff_t));
2993 search_regs.end
2994 = (regoff_t *) xmalloc (length * sizeof (regoff_t));
2995 }
4746118a 2996 else
1113d9db
JB
2997 {
2998 search_regs.start
2999 = (regoff_t *) xrealloc (search_regs.start,
3000 length * sizeof (regoff_t));
3001 search_regs.end
3002 = (regoff_t *) xrealloc (search_regs.end,
3003 length * sizeof (regoff_t));
3004 }
4746118a 3005
e62371e9
KH
3006 for (i = search_regs.num_regs; i < length; i++)
3007 search_regs.start[i] = -1;
3008
487282dc 3009 search_regs.num_regs = length;
4746118a 3010 }
ca1d1d23 3011
abd0071c 3012 for (i = 0; CONSP (list); i++)
41c01205 3013 {
abd0071c 3014 marker = XCAR (list);
bd2cbd56 3015 if (BUFFERP (marker))
c3762cbd 3016 {
bd2cbd56 3017 last_thing_searched = marker;
c3762cbd
DK
3018 break;
3019 }
3020 if (i >= length)
3021 break;
41c01205
DK
3022 if (NILP (marker))
3023 {
3024 search_regs.start[i] = -1;
abd0071c 3025 list = XCDR (list);
41c01205
DK
3026 }
3027 else
3028 {
3029 int from;
abd0071c 3030 Lisp_Object m;
e2811828 3031
abd0071c 3032 m = marker;
41c01205
DK
3033 if (MARKERP (marker))
3034 {
3035 if (XMARKER (marker)->buffer == 0)
3036 XSETFASTINT (marker, 0);
3037 else
3038 XSETBUFFER (last_thing_searched, XMARKER (marker)->buffer);
3039 }
e2811828 3040
41c01205
DK
3041 CHECK_NUMBER_COERCE_MARKER (marker);
3042 from = XINT (marker);
e2811828 3043
abd0071c
KS
3044 if (!NILP (reseat) && MARKERP (m))
3045 {
b51d6c92 3046 unchain_marker (XMARKER (m));
9ad54a7e 3047 XSETCAR (list, Qnil);
abd0071c
KS
3048 }
3049
3050 if ((list = XCDR (list), !CONSP (list)))
3051 break;
3052
3053 m = marker = XCAR (list);
3054
41c01205
DK
3055 if (MARKERP (marker) && XMARKER (marker)->buffer == 0)
3056 XSETFASTINT (marker, 0);
e2811828 3057
41c01205
DK
3058 CHECK_NUMBER_COERCE_MARKER (marker);
3059 search_regs.start[i] = from;
3060 search_regs.end[i] = XINT (marker);
abd0071c
KS
3061
3062 if (!NILP (reseat) && MARKERP (m))
3063 {
b51d6c92 3064 unchain_marker (XMARKER (m));
9ad54a7e 3065 XSETCAR (list, Qnil);
abd0071c 3066 }
41c01205 3067 }
abd0071c 3068 list = XCDR (list);
41c01205 3069 }
ca1d1d23 3070
41c01205
DK
3071 for (; i < search_regs.num_regs; i++)
3072 search_regs.start[i] = -1;
3073 }
ca1d1d23 3074
177c0ea7 3075 return Qnil;
ca1d1d23
JB
3076}
3077
7074fde6
FP
3078/* If non-zero the match data have been saved in saved_search_regs
3079 during the execution of a sentinel or filter. */
75ebf74b 3080static int search_regs_saved;
7074fde6 3081static struct re_registers saved_search_regs;
41c01205 3082static Lisp_Object saved_last_thing_searched;
7074fde6
FP
3083
3084/* Called from Flooking_at, Fstring_match, search_buffer, Fstore_match_data
3085 if asynchronous code (filter or sentinel) is running. */
3086static void
3087save_search_regs ()
3088{
3089 if (!search_regs_saved)
3090 {
3091 saved_search_regs.num_regs = search_regs.num_regs;
3092 saved_search_regs.start = search_regs.start;
3093 saved_search_regs.end = search_regs.end;
41c01205
DK
3094 saved_last_thing_searched = last_thing_searched;
3095 last_thing_searched = Qnil;
7074fde6 3096 search_regs.num_regs = 0;
2d4a771a
RS
3097 search_regs.start = 0;
3098 search_regs.end = 0;
7074fde6
FP
3099
3100 search_regs_saved = 1;
3101 }
3102}
3103
3104/* Called upon exit from filters and sentinels. */
3105void
abd0071c 3106restore_search_regs ()
7074fde6
FP
3107{
3108 if (search_regs_saved)
3109 {
3110 if (search_regs.num_regs > 0)
3111 {
3112 xfree (search_regs.start);
3113 xfree (search_regs.end);
3114 }
3115 search_regs.num_regs = saved_search_regs.num_regs;
3116 search_regs.start = saved_search_regs.start;
3117 search_regs.end = saved_search_regs.end;
41c01205
DK
3118 last_thing_searched = saved_last_thing_searched;
3119 saved_last_thing_searched = Qnil;
7074fde6
FP
3120 search_regs_saved = 0;
3121 }
3122}
3123
abd0071c
KS
3124static Lisp_Object
3125unwind_set_match_data (list)
3126 Lisp_Object list;
3127{
b51d6c92
SM
3128 /* It is NOT ALWAYS safe to free (evaporate) the markers immediately. */
3129 return Fset_match_data (list, Qt);
abd0071c
KS
3130}
3131
3132/* Called to unwind protect the match data. */
3133void
3134record_unwind_save_match_data ()
3135{
3136 record_unwind_protect (unwind_set_match_data,
3137 Fmatch_data (Qnil, Qnil, Qnil));
3138}
3139
ca1d1d23
JB
3140/* Quote a string to inactivate reg-expr chars */
3141
3142DEFUN ("regexp-quote", Fregexp_quote, Sregexp_quote, 1, 1, 0,
8c1a1077
PJ
3143 doc: /* Return a regexp string which matches exactly STRING and nothing else. */)
3144 (string)
5806161b 3145 Lisp_Object string;
ca1d1d23
JB
3146{
3147 register unsigned char *in, *out, *end;
3148 register unsigned char *temp;
0c8533c6 3149 int backslashes_added = 0;
ca1d1d23 3150
b7826503 3151 CHECK_STRING (string);
ca1d1d23 3152
d5db4077 3153 temp = (unsigned char *) alloca (SBYTES (string) * 2);
ca1d1d23
JB
3154
3155 /* Now copy the data into the new string, inserting escapes. */
3156
d5db4077
KR
3157 in = SDATA (string);
3158 end = in + SBYTES (string);
177c0ea7 3159 out = temp;
ca1d1d23
JB
3160
3161 for (; in != end; in++)
3162 {
66bc6082 3163 if (*in == '['
ca1d1d23
JB
3164 || *in == '*' || *in == '.' || *in == '\\'
3165 || *in == '?' || *in == '+'
3166 || *in == '^' || *in == '$')
0c8533c6 3167 *out++ = '\\', backslashes_added++;
ca1d1d23
JB
3168 *out++ = *in;
3169 }
3170
3f8100f1 3171 return make_specified_string (temp,
d5db4077 3172 SCHARS (string) + backslashes_added,
3f8100f1
RS
3173 out - temp,
3174 STRING_MULTIBYTE (string));
ca1d1d23 3175}
177c0ea7 3176\f
dfcf069d 3177void
ca1d1d23
JB
3178syms_of_search ()
3179{
3180 register int i;
3181
487282dc
KH
3182 for (i = 0; i < REGEXP_CACHE_SIZE; ++i)
3183 {
3184 searchbufs[i].buf.allocated = 100;
b23c0a83 3185 searchbufs[i].buf.buffer = (unsigned char *) xmalloc (100);
487282dc
KH
3186 searchbufs[i].buf.fastmap = searchbufs[i].fastmap;
3187 searchbufs[i].regexp = Qnil;
ecdb561e 3188 searchbufs[i].whitespace_regexp = Qnil;
b69e3c18 3189 searchbufs[i].syntax_table = Qnil;
487282dc 3190 staticpro (&searchbufs[i].regexp);
aa77b5ce 3191 staticpro (&searchbufs[i].whitespace_regexp);
b69e3c18 3192 staticpro (&searchbufs[i].syntax_table);
487282dc
KH
3193 searchbufs[i].next = (i == REGEXP_CACHE_SIZE-1 ? 0 : &searchbufs[i+1]);
3194 }
3195 searchbuf_head = &searchbufs[0];
ca1d1d23
JB
3196
3197 Qsearch_failed = intern ("search-failed");
3198 staticpro (&Qsearch_failed);
3199 Qinvalid_regexp = intern ("invalid-regexp");
3200 staticpro (&Qinvalid_regexp);
3201
3202 Fput (Qsearch_failed, Qerror_conditions,
3203 Fcons (Qsearch_failed, Fcons (Qerror, Qnil)));
3204 Fput (Qsearch_failed, Qerror_message,
3205 build_string ("Search failed"));
3206
3207 Fput (Qinvalid_regexp, Qerror_conditions,
3208 Fcons (Qinvalid_regexp, Fcons (Qerror, Qnil)));
3209 Fput (Qinvalid_regexp, Qerror_message,
3210 build_string ("Invalid regexp"));
3211
daa37602
JB
3212 last_thing_searched = Qnil;
3213 staticpro (&last_thing_searched);
3214
0f6af254
DK
3215 saved_last_thing_searched = Qnil;
3216 staticpro (&saved_last_thing_searched);
3217
41a33295 3218 DEFVAR_LISP ("search-spaces-regexp", &Vsearch_spaces_regexp,
e2811828 3219 doc: /* Regexp to substitute for bunches of spaces in regexp search.
f31a9a68
RS
3220Some commands use this for user-specified regexps.
3221Spaces that occur inside character classes or repetition operators
3222or other such regexp constructs are not replaced with this.
3223A value of nil (which is the normal value) means treat spaces literally. */);
41a33295 3224 Vsearch_spaces_regexp = Qnil;
f31a9a68 3225
ef887810
RS
3226 DEFVAR_LISP ("inhibit-changing-match-data", &Vinhibit_changing_match_data,
3227 doc: /* Internal use only.
39d0bf74
RS
3228If non-nil, the primitive searching and matching functions
3229such as `looking-at', `string-match', `re-search-forward', etc.,
3230do not set the match data. The proper way to use this variable
3231is to bind it with `let' around a small expression. */);
ef887810
RS
3232 Vinhibit_changing_match_data = Qnil;
3233
ca1d1d23 3234 defsubr (&Slooking_at);
b819a390
RS
3235 defsubr (&Sposix_looking_at);
3236 defsubr (&Sstring_match);
3237 defsubr (&Sposix_string_match);
ca1d1d23
JB
3238 defsubr (&Ssearch_forward);
3239 defsubr (&Ssearch_backward);
3240 defsubr (&Sword_search_forward);
3241 defsubr (&Sword_search_backward);
3242 defsubr (&Sre_search_forward);
3243 defsubr (&Sre_search_backward);
b819a390
RS
3244 defsubr (&Sposix_search_forward);
3245 defsubr (&Sposix_search_backward);
ca1d1d23
JB
3246 defsubr (&Sreplace_match);
3247 defsubr (&Smatch_beginning);
3248 defsubr (&Smatch_end);
3249 defsubr (&Smatch_data);
3f1c005b 3250 defsubr (&Sset_match_data);
ca1d1d23
JB
3251 defsubr (&Sregexp_quote);
3252}
76fc1ea2
KH
3253
3254/* arch-tag: a6059d79-0552-4f14-a2cb-d379a4e3c78f
3255 (do not change this comment) */