(syms_of_search): staticpro
[bpt/emacs.git] / src / search.c
CommitLineData
ca1d1d23 1/* String search routines for GNU Emacs.
bd2cbd56
SM
2 Copyright (C) 1985, 86,87,93,94,97,98, 1999, 2004
3 Free Software Foundation, Inc.
ca1d1d23
JB
4
5This file is part of GNU Emacs.
6
7GNU Emacs is free software; you can redistribute it and/or modify
8it under the terms of the GNU General Public License as published by
7c938215 9the Free Software Foundation; either version 2, or (at your option)
ca1d1d23
JB
10any later version.
11
12GNU Emacs is distributed in the hope that it will be useful,
13but WITHOUT ANY WARRANTY; without even the implied warranty of
14MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15GNU General Public License for more details.
16
17You should have received a copy of the GNU General Public License
18along with GNU Emacs; see the file COPYING. If not, write to
3b7ad313
EN
19the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
20Boston, MA 02111-1307, USA. */
ca1d1d23
JB
21
22
18160b98 23#include <config.h>
ca1d1d23
JB
24#include "lisp.h"
25#include "syntax.h"
5679531d 26#include "category.h"
ca1d1d23 27#include "buffer.h"
5679531d 28#include "charset.h"
9169c321 29#include "region-cache.h"
ca1d1d23 30#include "commands.h"
9ac0d9e0 31#include "blockinput.h"
bf1760bb 32#include "intervals.h"
4746118a 33
ca1d1d23
JB
34#include <sys/types.h>
35#include "regex.h"
36
1d288aef 37#define REGEXP_CACHE_SIZE 20
ca1d1d23 38
487282dc
KH
39/* If the regexp is non-nil, then the buffer contains the compiled form
40 of that regexp, suitable for searching. */
1d288aef
RS
41struct regexp_cache
42{
487282dc
KH
43 struct regexp_cache *next;
44 Lisp_Object regexp;
45 struct re_pattern_buffer buf;
46 char fastmap[0400];
b819a390
RS
47 /* Nonzero means regexp was compiled to do full POSIX backtracking. */
48 char posix;
487282dc 49};
ca1d1d23 50
487282dc
KH
51/* The instances of that struct. */
52struct regexp_cache searchbufs[REGEXP_CACHE_SIZE];
ca1d1d23 53
487282dc
KH
54/* The head of the linked list; points to the most recently used buffer. */
55struct regexp_cache *searchbuf_head;
ca1d1d23 56
ca1d1d23 57
4746118a
JB
58/* Every call to re_match, etc., must pass &search_regs as the regs
59 argument unless you can show it is unnecessary (i.e., if re_match
60 is certainly going to be called again before region-around-match
61 can be called).
62
63 Since the registers are now dynamically allocated, we need to make
64 sure not to refer to the Nth register before checking that it has
1113d9db
JB
65 been allocated by checking search_regs.num_regs.
66
67 The regex code keeps track of whether it has allocated the search
487282dc
KH
68 buffer using bits in the re_pattern_buffer. This means that whenever
69 you compile a new pattern, it completely forgets whether it has
1113d9db
JB
70 allocated any registers, and will allocate new registers the next
71 time you call a searching or matching function. Therefore, we need
72 to call re_set_registers after compiling a new pattern or after
73 setting the match registers, so that the regex functions will be
74 able to free or re-allocate it properly. */
ca1d1d23
JB
75static struct re_registers search_regs;
76
daa37602
JB
77/* The buffer in which the last search was performed, or
78 Qt if the last search was done in a string;
79 Qnil if no searching has been done yet. */
80static Lisp_Object last_thing_searched;
ca1d1d23 81
8e6208c5 82/* error condition signaled when regexp compile_pattern fails */
ca1d1d23
JB
83
84Lisp_Object Qinvalid_regexp;
85
ca325161 86static void set_search_regs ();
044f81f1 87static void save_search_regs ();
facdc750
RS
88static int simple_search ();
89static int boyer_moore ();
b819a390
RS
90static int search_buffer ();
91
ca1d1d23
JB
92static void
93matcher_overflow ()
94{
95 error ("Stack overflow in regexp matcher");
96}
97
b819a390
RS
98/* Compile a regexp and signal a Lisp error if anything goes wrong.
99 PATTERN is the pattern to compile.
100 CP is the place to put the result.
facdc750 101 TRANSLATE is a translation table for ignoring case, or nil for none.
b819a390
RS
102 REGP is the structure that says where to store the "register"
103 values that will result from matching this pattern.
104 If it is 0, we should compile the pattern not to record any
105 subexpression bounds.
106 POSIX is nonzero if we want full backtracking (POSIX style)
5679531d
KH
107 for this pattern. 0 means backtrack only enough to get a valid match.
108 MULTIBYTE is nonzero if we want to handle multibyte characters in
109 PATTERN. 0 means all multibyte characters are recognized just as
110 sequences of binary data. */
ca1d1d23 111
487282dc 112static void
5679531d 113compile_pattern_1 (cp, pattern, translate, regp, posix, multibyte)
487282dc 114 struct regexp_cache *cp;
ca1d1d23 115 Lisp_Object pattern;
facdc750 116 Lisp_Object translate;
487282dc 117 struct re_registers *regp;
b819a390 118 int posix;
5679531d 119 int multibyte;
ca1d1d23 120{
7276d3d8 121 unsigned char *raw_pattern;
f8bd51c4 122 int raw_pattern_size;
d451e4db 123 char *val;
b819a390 124 reg_syntax_t old;
ca1d1d23 125
f8bd51c4
KH
126 /* MULTIBYTE says whether the text to be searched is multibyte.
127 We must convert PATTERN to match that, or we will not really
128 find things right. */
129
130 if (multibyte == STRING_MULTIBYTE (pattern))
131 {
d5db4077
KR
132 raw_pattern = (unsigned char *) SDATA (pattern);
133 raw_pattern_size = SBYTES (pattern);
f8bd51c4
KH
134 }
135 else if (multibyte)
136 {
d5db4077
KR
137 raw_pattern_size = count_size_as_multibyte (SDATA (pattern),
138 SCHARS (pattern));
7276d3d8 139 raw_pattern = (unsigned char *) alloca (raw_pattern_size + 1);
d5db4077
KR
140 copy_text (SDATA (pattern), raw_pattern,
141 SCHARS (pattern), 0, 1);
f8bd51c4
KH
142 }
143 else
144 {
145 /* Converting multibyte to single-byte.
146
147 ??? Perhaps this conversion should be done in a special way
148 by subtracting nonascii-insert-offset from each non-ASCII char,
149 so that only the multibyte chars which really correspond to
150 the chosen single-byte character set can possibly match. */
d5db4077 151 raw_pattern_size = SCHARS (pattern);
7276d3d8 152 raw_pattern = (unsigned char *) alloca (raw_pattern_size + 1);
d5db4077
KR
153 copy_text (SDATA (pattern), raw_pattern,
154 SBYTES (pattern), 1, 0);
f8bd51c4
KH
155 }
156
487282dc 157 cp->regexp = Qnil;
59fab369 158 cp->buf.translate = (! NILP (translate) ? translate : make_number (0));
b819a390 159 cp->posix = posix;
5679531d 160 cp->buf.multibyte = multibyte;
9ac0d9e0 161 BLOCK_INPUT;
fb4a568d 162 old = re_set_syntax (RE_SYNTAX_EMACS
b819a390 163 | (posix ? 0 : RE_NO_POSIX_BACKTRACKING));
7276d3d8
RS
164 val = (char *) re_compile_pattern ((char *)raw_pattern,
165 raw_pattern_size, &cp->buf);
b819a390 166 re_set_syntax (old);
9ac0d9e0 167 UNBLOCK_INPUT;
ca1d1d23 168 if (val)
487282dc 169 Fsignal (Qinvalid_regexp, Fcons (build_string (val), Qnil));
1113d9db 170
487282dc 171 cp->regexp = Fcopy_sequence (pattern);
487282dc
KH
172}
173
6efc7887
RS
174/* Shrink each compiled regexp buffer in the cache
175 to the size actually used right now.
176 This is called from garbage collection. */
177
178void
179shrink_regexp_cache ()
180{
a968f437 181 struct regexp_cache *cp;
6efc7887
RS
182
183 for (cp = searchbuf_head; cp != 0; cp = cp->next)
184 {
185 cp->buf.allocated = cp->buf.used;
186 cp->buf.buffer
b23c0a83 187 = (unsigned char *) xrealloc (cp->buf.buffer, cp->buf.used);
6efc7887
RS
188 }
189}
190
487282dc 191/* Compile a regexp if necessary, but first check to see if there's one in
b819a390
RS
192 the cache.
193 PATTERN is the pattern to compile.
facdc750 194 TRANSLATE is a translation table for ignoring case, or nil for none.
b819a390
RS
195 REGP is the structure that says where to store the "register"
196 values that will result from matching this pattern.
197 If it is 0, we should compile the pattern not to record any
198 subexpression bounds.
199 POSIX is nonzero if we want full backtracking (POSIX style)
200 for this pattern. 0 means backtrack only enough to get a valid match. */
487282dc
KH
201
202struct re_pattern_buffer *
0c8533c6 203compile_pattern (pattern, regp, translate, posix, multibyte)
487282dc
KH
204 Lisp_Object pattern;
205 struct re_registers *regp;
facdc750 206 Lisp_Object translate;
0c8533c6 207 int posix, multibyte;
487282dc
KH
208{
209 struct regexp_cache *cp, **cpp;
210
211 for (cpp = &searchbuf_head; ; cpp = &cp->next)
212 {
213 cp = *cpp;
f1b9c7c1
KR
214 /* Entries are initialized to nil, and may be set to nil by
215 compile_pattern_1 if the pattern isn't valid. Don't apply
49a5f770
KR
216 string accessors in those cases. However, compile_pattern_1
217 is only applied to the cache entry we pick here to reuse. So
218 nil should never appear before a non-nil entry. */
7c752c80 219 if (NILP (cp->regexp))
f1b9c7c1 220 goto compile_it;
d5db4077 221 if (SCHARS (cp->regexp) == SCHARS (pattern)
cf69b13e 222 && STRING_MULTIBYTE (cp->regexp) == STRING_MULTIBYTE (pattern)
1d288aef 223 && !NILP (Fstring_equal (cp->regexp, pattern))
59fab369 224 && EQ (cp->buf.translate, (! NILP (translate) ? translate : make_number (0)))
5679531d
KH
225 && cp->posix == posix
226 && cp->buf.multibyte == multibyte)
487282dc
KH
227 break;
228
f1b9c7c1
KR
229 /* If we're at the end of the cache, compile into the nil cell
230 we found, or the last (least recently used) cell with a
231 string value. */
487282dc
KH
232 if (cp->next == 0)
233 {
f1b9c7c1 234 compile_it:
5679531d 235 compile_pattern_1 (cp, pattern, translate, regp, posix, multibyte);
487282dc
KH
236 break;
237 }
238 }
239
240 /* When we get here, cp (aka *cpp) contains the compiled pattern,
241 either because we found it in the cache or because we just compiled it.
242 Move it to the front of the queue to mark it as most recently used. */
243 *cpp = cp->next;
244 cp->next = searchbuf_head;
245 searchbuf_head = cp;
1113d9db 246
6639708c
RS
247 /* Advise the searching functions about the space we have allocated
248 for register data. */
249 if (regp)
250 re_set_registers (&cp->buf, regp, regp->num_regs, regp->start, regp->end);
251
487282dc 252 return &cp->buf;
ca1d1d23
JB
253}
254
255/* Error condition used for failing searches */
256Lisp_Object Qsearch_failed;
257
258Lisp_Object
259signal_failure (arg)
260 Lisp_Object arg;
261{
262 Fsignal (Qsearch_failed, Fcons (arg, Qnil));
263 return Qnil;
264}
265\f
b819a390
RS
266static Lisp_Object
267looking_at_1 (string, posix)
ca1d1d23 268 Lisp_Object string;
b819a390 269 int posix;
ca1d1d23
JB
270{
271 Lisp_Object val;
272 unsigned char *p1, *p2;
273 int s1, s2;
274 register int i;
487282dc 275 struct re_pattern_buffer *bufp;
ca1d1d23 276
7074fde6
FP
277 if (running_asynch_code)
278 save_search_regs ();
279
b7826503 280 CHECK_STRING (string);
487282dc
KH
281 bufp = compile_pattern (string, &search_regs,
282 (!NILP (current_buffer->case_fold_search)
facdc750 283 ? DOWNCASE_TABLE : Qnil),
0c8533c6
RS
284 posix,
285 !NILP (current_buffer->enable_multibyte_characters));
ca1d1d23
JB
286
287 immediate_quit = 1;
288 QUIT; /* Do a pending quit right away, to avoid paradoxical behavior */
289
290 /* Get pointers and sizes of the two strings
291 that make up the visible portion of the buffer. */
292
293 p1 = BEGV_ADDR;
fa8ed3e0 294 s1 = GPT_BYTE - BEGV_BYTE;
ca1d1d23 295 p2 = GAP_END_ADDR;
fa8ed3e0 296 s2 = ZV_BYTE - GPT_BYTE;
ca1d1d23
JB
297 if (s1 < 0)
298 {
299 p2 = p1;
fa8ed3e0 300 s2 = ZV_BYTE - BEGV_BYTE;
ca1d1d23
JB
301 s1 = 0;
302 }
303 if (s2 < 0)
304 {
fa8ed3e0 305 s1 = ZV_BYTE - BEGV_BYTE;
ca1d1d23
JB
306 s2 = 0;
307 }
8bb43c28
RS
308
309 re_match_object = Qnil;
177c0ea7 310
487282dc 311 i = re_match_2 (bufp, (char *) p1, s1, (char *) p2, s2,
fa8ed3e0
RS
312 PT_BYTE - BEGV_BYTE, &search_regs,
313 ZV_BYTE - BEGV_BYTE);
de182d70 314 immediate_quit = 0;
177c0ea7 315
ca1d1d23
JB
316 if (i == -2)
317 matcher_overflow ();
318
319 val = (0 <= i ? Qt : Qnil);
fa8ed3e0
RS
320 if (i >= 0)
321 for (i = 0; i < search_regs.num_regs; i++)
322 if (search_regs.start[i] >= 0)
323 {
324 search_regs.start[i]
325 = BYTE_TO_CHAR (search_regs.start[i] + BEGV_BYTE);
326 search_regs.end[i]
327 = BYTE_TO_CHAR (search_regs.end[i] + BEGV_BYTE);
328 }
a3668d92 329 XSETBUFFER (last_thing_searched, current_buffer);
ca1d1d23
JB
330 return val;
331}
332
b819a390 333DEFUN ("looking-at", Flooking_at, Slooking_at, 1, 1, 0,
8c1a1077
PJ
334 doc: /* Return t if text after point matches regular expression REGEXP.
335This function modifies the match data that `match-beginning',
336`match-end' and `match-data' access; save and restore the match
337data if you want to preserve them. */)
338 (regexp)
94f94972 339 Lisp_Object regexp;
b819a390 340{
94f94972 341 return looking_at_1 (regexp, 0);
b819a390
RS
342}
343
344DEFUN ("posix-looking-at", Fposix_looking_at, Sposix_looking_at, 1, 1, 0,
8c1a1077
PJ
345 doc: /* Return t if text after point matches regular expression REGEXP.
346Find the longest match, in accord with Posix regular expression rules.
347This function modifies the match data that `match-beginning',
348`match-end' and `match-data' access; save and restore the match
349data if you want to preserve them. */)
350 (regexp)
94f94972 351 Lisp_Object regexp;
b819a390 352{
94f94972 353 return looking_at_1 (regexp, 1);
b819a390
RS
354}
355\f
356static Lisp_Object
357string_match_1 (regexp, string, start, posix)
ca1d1d23 358 Lisp_Object regexp, string, start;
b819a390 359 int posix;
ca1d1d23
JB
360{
361 int val;
487282dc 362 struct re_pattern_buffer *bufp;
0c8533c6
RS
363 int pos, pos_byte;
364 int i;
ca1d1d23 365
7074fde6
FP
366 if (running_asynch_code)
367 save_search_regs ();
368
b7826503
PJ
369 CHECK_STRING (regexp);
370 CHECK_STRING (string);
ca1d1d23
JB
371
372 if (NILP (start))
0c8533c6 373 pos = 0, pos_byte = 0;
ca1d1d23
JB
374 else
375 {
d5db4077 376 int len = SCHARS (string);
ca1d1d23 377
b7826503 378 CHECK_NUMBER (start);
0c8533c6
RS
379 pos = XINT (start);
380 if (pos < 0 && -pos <= len)
381 pos = len + pos;
382 else if (0 > pos || pos > len)
ca1d1d23 383 args_out_of_range (string, start);
0c8533c6 384 pos_byte = string_char_to_byte (string, pos);
ca1d1d23
JB
385 }
386
487282dc
KH
387 bufp = compile_pattern (regexp, &search_regs,
388 (!NILP (current_buffer->case_fold_search)
facdc750 389 ? DOWNCASE_TABLE : Qnil),
0c8533c6
RS
390 posix,
391 STRING_MULTIBYTE (string));
ca1d1d23 392 immediate_quit = 1;
8bb43c28 393 re_match_object = string;
177c0ea7 394
d5db4077
KR
395 val = re_search (bufp, (char *) SDATA (string),
396 SBYTES (string), pos_byte,
397 SBYTES (string) - pos_byte,
ca1d1d23
JB
398 &search_regs);
399 immediate_quit = 0;
daa37602 400 last_thing_searched = Qt;
ca1d1d23
JB
401 if (val == -2)
402 matcher_overflow ();
403 if (val < 0) return Qnil;
0c8533c6
RS
404
405 for (i = 0; i < search_regs.num_regs; i++)
406 if (search_regs.start[i] >= 0)
407 {
408 search_regs.start[i]
409 = string_byte_to_char (string, search_regs.start[i]);
410 search_regs.end[i]
411 = string_byte_to_char (string, search_regs.end[i]);
412 }
413
414 return make_number (string_byte_to_char (string, val));
ca1d1d23 415}
e59a8453 416
b819a390 417DEFUN ("string-match", Fstring_match, Sstring_match, 2, 3, 0,
8c1a1077
PJ
418 doc: /* Return index of start of first match for REGEXP in STRING, or nil.
419Case is ignored if `case-fold-search' is non-nil in the current buffer.
420If third arg START is non-nil, start search at that index in STRING.
421For index of first char beyond the match, do (match-end 0).
422`match-end' and `match-beginning' also give indices of substrings
2bd2f32d
RS
423matched by parenthesis constructs in the pattern.
424
425You can use the function `match-string' to extract the substrings
426matched by the parenthesis constructions in REGEXP. */)
8c1a1077 427 (regexp, string, start)
b819a390
RS
428 Lisp_Object regexp, string, start;
429{
430 return string_match_1 (regexp, string, start, 0);
431}
432
433DEFUN ("posix-string-match", Fposix_string_match, Sposix_string_match, 2, 3, 0,
8c1a1077
PJ
434 doc: /* Return index of start of first match for REGEXP in STRING, or nil.
435Find the longest match, in accord with Posix regular expression rules.
436Case is ignored if `case-fold-search' is non-nil in the current buffer.
437If third arg START is non-nil, start search at that index in STRING.
438For index of first char beyond the match, do (match-end 0).
439`match-end' and `match-beginning' also give indices of substrings
440matched by parenthesis constructs in the pattern. */)
441 (regexp, string, start)
b819a390
RS
442 Lisp_Object regexp, string, start;
443{
444 return string_match_1 (regexp, string, start, 1);
445}
446
e59a8453
RS
447/* Match REGEXP against STRING, searching all of STRING,
448 and return the index of the match, or negative on failure.
449 This does not clobber the match data. */
450
451int
452fast_string_match (regexp, string)
453 Lisp_Object regexp, string;
454{
455 int val;
487282dc 456 struct re_pattern_buffer *bufp;
e59a8453 457
facdc750
RS
458 bufp = compile_pattern (regexp, 0, Qnil,
459 0, STRING_MULTIBYTE (string));
e59a8453 460 immediate_quit = 1;
8bb43c28 461 re_match_object = string;
177c0ea7 462
d5db4077
KR
463 val = re_search (bufp, (char *) SDATA (string),
464 SBYTES (string), 0,
465 SBYTES (string), 0);
e59a8453
RS
466 immediate_quit = 0;
467 return val;
468}
5679531d
KH
469
470/* Match REGEXP against STRING, searching all of STRING ignoring case,
471 and return the index of the match, or negative on failure.
0c8533c6
RS
472 This does not clobber the match data.
473 We assume that STRING contains single-byte characters. */
5679531d
KH
474
475extern Lisp_Object Vascii_downcase_table;
476
477int
b4577c63 478fast_c_string_match_ignore_case (regexp, string)
5679531d 479 Lisp_Object regexp;
96b80561 480 const char *string;
5679531d
KH
481{
482 int val;
483 struct re_pattern_buffer *bufp;
484 int len = strlen (string);
485
0c8533c6 486 regexp = string_make_unibyte (regexp);
b4577c63 487 re_match_object = Qt;
5679531d 488 bufp = compile_pattern (regexp, 0,
facdc750 489 Vascii_downcase_table, 0,
f8bd51c4 490 0);
5679531d
KH
491 immediate_quit = 1;
492 val = re_search (bufp, string, len, 0, len, 0);
493 immediate_quit = 0;
494 return val;
495}
ca1d1d23 496\f
9169c321
JB
497/* The newline cache: remembering which sections of text have no newlines. */
498
499/* If the user has requested newline caching, make sure it's on.
500 Otherwise, make sure it's off.
501 This is our cheezy way of associating an action with the change of
502 state of a buffer-local variable. */
503static void
504newline_cache_on_off (buf)
505 struct buffer *buf;
506{
507 if (NILP (buf->cache_long_line_scans))
508 {
509 /* It should be off. */
510 if (buf->newline_cache)
511 {
512 free_region_cache (buf->newline_cache);
513 buf->newline_cache = 0;
514 }
515 }
516 else
517 {
518 /* It should be on. */
519 if (buf->newline_cache == 0)
520 buf->newline_cache = new_region_cache ();
521 }
522}
523
524\f
525/* Search for COUNT instances of the character TARGET between START and END.
526
527 If COUNT is positive, search forwards; END must be >= START.
528 If COUNT is negative, search backwards for the -COUNTth instance;
529 END must be <= START.
530 If COUNT is zero, do anything you please; run rogue, for all I care.
531
532 If END is zero, use BEGV or ZV instead, as appropriate for the
533 direction indicated by COUNT.
ffd56f97
JB
534
535 If we find COUNT instances, set *SHORTAGE to zero, and return the
5bfe95c9
RS
536 position after the COUNTth match. Note that for reverse motion
537 this is not the same as the usual convention for Emacs motion commands.
ffd56f97 538
9169c321
JB
539 If we don't find COUNT instances before reaching END, set *SHORTAGE
540 to the number of TARGETs left unfound, and return END.
ffd56f97 541
087a5f81
RS
542 If ALLOW_QUIT is non-zero, set immediate_quit. That's good to do
543 except when inside redisplay. */
544
dfcf069d 545int
9169c321
JB
546scan_buffer (target, start, end, count, shortage, allow_quit)
547 register int target;
548 int start, end;
549 int count;
550 int *shortage;
087a5f81 551 int allow_quit;
ca1d1d23 552{
9169c321 553 struct region_cache *newline_cache;
177c0ea7 554 int direction;
ffd56f97 555
9169c321
JB
556 if (count > 0)
557 {
558 direction = 1;
559 if (! end) end = ZV;
560 }
561 else
562 {
563 direction = -1;
564 if (! end) end = BEGV;
565 }
ffd56f97 566
9169c321
JB
567 newline_cache_on_off (current_buffer);
568 newline_cache = current_buffer->newline_cache;
ca1d1d23
JB
569
570 if (shortage != 0)
571 *shortage = 0;
572
087a5f81 573 immediate_quit = allow_quit;
ca1d1d23 574
ffd56f97 575 if (count > 0)
9169c321 576 while (start != end)
ca1d1d23 577 {
9169c321
JB
578 /* Our innermost scanning loop is very simple; it doesn't know
579 about gaps, buffer ends, or the newline cache. ceiling is
580 the position of the last character before the next such
581 obstacle --- the last character the dumb search loop should
582 examine. */
fa8ed3e0
RS
583 int ceiling_byte = CHAR_TO_BYTE (end) - 1;
584 int start_byte = CHAR_TO_BYTE (start);
67ce527d 585 int tem;
9169c321
JB
586
587 /* If we're looking for a newline, consult the newline cache
588 to see where we can avoid some scanning. */
589 if (target == '\n' && newline_cache)
590 {
591 int next_change;
592 immediate_quit = 0;
593 while (region_cache_forward
fa8ed3e0
RS
594 (current_buffer, newline_cache, start_byte, &next_change))
595 start_byte = next_change;
cbe0db0d 596 immediate_quit = allow_quit;
9169c321 597
fa8ed3e0
RS
598 /* START should never be after END. */
599 if (start_byte > ceiling_byte)
600 start_byte = ceiling_byte;
9169c321
JB
601
602 /* Now the text after start is an unknown region, and
603 next_change is the position of the next known region. */
fa8ed3e0 604 ceiling_byte = min (next_change - 1, ceiling_byte);
9169c321
JB
605 }
606
607 /* The dumb loop can only scan text stored in contiguous
608 bytes. BUFFER_CEILING_OF returns the last character
609 position that is contiguous, so the ceiling is the
610 position after that. */
67ce527d
KH
611 tem = BUFFER_CEILING_OF (start_byte);
612 ceiling_byte = min (tem, ceiling_byte);
9169c321
JB
613
614 {
177c0ea7 615 /* The termination address of the dumb loop. */
fa8ed3e0
RS
616 register unsigned char *ceiling_addr
617 = BYTE_POS_ADDR (ceiling_byte) + 1;
618 register unsigned char *cursor
619 = BYTE_POS_ADDR (start_byte);
9169c321
JB
620 unsigned char *base = cursor;
621
622 while (cursor < ceiling_addr)
623 {
624 unsigned char *scan_start = cursor;
625
626 /* The dumb loop. */
627 while (*cursor != target && ++cursor < ceiling_addr)
628 ;
629
630 /* If we're looking for newlines, cache the fact that
631 the region from start to cursor is free of them. */
632 if (target == '\n' && newline_cache)
633 know_region_cache (current_buffer, newline_cache,
fa8ed3e0
RS
634 start_byte + scan_start - base,
635 start_byte + cursor - base);
9169c321
JB
636
637 /* Did we find the target character? */
638 if (cursor < ceiling_addr)
639 {
640 if (--count == 0)
641 {
642 immediate_quit = 0;
fa8ed3e0 643 return BYTE_TO_CHAR (start_byte + cursor - base + 1);
9169c321
JB
644 }
645 cursor++;
646 }
647 }
648
fa8ed3e0 649 start = BYTE_TO_CHAR (start_byte + cursor - base);
9169c321 650 }
ca1d1d23
JB
651 }
652 else
9169c321
JB
653 while (start > end)
654 {
655 /* The last character to check before the next obstacle. */
fa8ed3e0
RS
656 int ceiling_byte = CHAR_TO_BYTE (end);
657 int start_byte = CHAR_TO_BYTE (start);
67ce527d 658 int tem;
9169c321
JB
659
660 /* Consult the newline cache, if appropriate. */
661 if (target == '\n' && newline_cache)
662 {
663 int next_change;
664 immediate_quit = 0;
665 while (region_cache_backward
fa8ed3e0
RS
666 (current_buffer, newline_cache, start_byte, &next_change))
667 start_byte = next_change;
cbe0db0d 668 immediate_quit = allow_quit;
9169c321
JB
669
670 /* Start should never be at or before end. */
fa8ed3e0
RS
671 if (start_byte <= ceiling_byte)
672 start_byte = ceiling_byte + 1;
9169c321
JB
673
674 /* Now the text before start is an unknown region, and
675 next_change is the position of the next known region. */
fa8ed3e0 676 ceiling_byte = max (next_change, ceiling_byte);
9169c321
JB
677 }
678
679 /* Stop scanning before the gap. */
67ce527d
KH
680 tem = BUFFER_FLOOR_OF (start_byte - 1);
681 ceiling_byte = max (tem, ceiling_byte);
9169c321
JB
682
683 {
684 /* The termination address of the dumb loop. */
fa8ed3e0
RS
685 register unsigned char *ceiling_addr = BYTE_POS_ADDR (ceiling_byte);
686 register unsigned char *cursor = BYTE_POS_ADDR (start_byte - 1);
9169c321
JB
687 unsigned char *base = cursor;
688
689 while (cursor >= ceiling_addr)
690 {
691 unsigned char *scan_start = cursor;
692
693 while (*cursor != target && --cursor >= ceiling_addr)
694 ;
695
696 /* If we're looking for newlines, cache the fact that
697 the region from after the cursor to start is free of them. */
698 if (target == '\n' && newline_cache)
699 know_region_cache (current_buffer, newline_cache,
fa8ed3e0
RS
700 start_byte + cursor - base,
701 start_byte + scan_start - base);
9169c321
JB
702
703 /* Did we find the target character? */
704 if (cursor >= ceiling_addr)
705 {
706 if (++count >= 0)
707 {
708 immediate_quit = 0;
fa8ed3e0 709 return BYTE_TO_CHAR (start_byte + cursor - base);
9169c321
JB
710 }
711 cursor--;
712 }
713 }
714
fa8ed3e0 715 start = BYTE_TO_CHAR (start_byte + cursor - base);
9169c321
JB
716 }
717 }
718
ca1d1d23
JB
719 immediate_quit = 0;
720 if (shortage != 0)
ffd56f97 721 *shortage = count * direction;
9169c321 722 return start;
ca1d1d23 723}
fa8ed3e0
RS
724\f
725/* Search for COUNT instances of a line boundary, which means either a
726 newline or (if selective display enabled) a carriage return.
727 Start at START. If COUNT is negative, search backwards.
728
729 We report the resulting position by calling TEMP_SET_PT_BOTH.
730
731 If we find COUNT instances. we position after (always after,
732 even if scanning backwards) the COUNTth match, and return 0.
733
734 If we don't find COUNT instances before reaching the end of the
735 buffer (or the beginning, if scanning backwards), we return
736 the number of line boundaries left unfound, and position at
737 the limit we bumped up against.
738
739 If ALLOW_QUIT is non-zero, set immediate_quit. That's good to do
d5d57b92 740 except in special cases. */
ca1d1d23 741
63fa018d 742int
fa8ed3e0
RS
743scan_newline (start, start_byte, limit, limit_byte, count, allow_quit)
744 int start, start_byte;
745 int limit, limit_byte;
746 register int count;
747 int allow_quit;
63fa018d 748{
fa8ed3e0
RS
749 int direction = ((count > 0) ? 1 : -1);
750
751 register unsigned char *cursor;
752 unsigned char *base;
753
754 register int ceiling;
755 register unsigned char *ceiling_addr;
756
d5d57b92
RS
757 int old_immediate_quit = immediate_quit;
758
fa8ed3e0
RS
759 /* The code that follows is like scan_buffer
760 but checks for either newline or carriage return. */
761
d5d57b92
RS
762 if (allow_quit)
763 immediate_quit++;
fa8ed3e0
RS
764
765 start_byte = CHAR_TO_BYTE (start);
766
767 if (count > 0)
768 {
769 while (start_byte < limit_byte)
770 {
771 ceiling = BUFFER_CEILING_OF (start_byte);
772 ceiling = min (limit_byte - 1, ceiling);
773 ceiling_addr = BYTE_POS_ADDR (ceiling) + 1;
774 base = (cursor = BYTE_POS_ADDR (start_byte));
775 while (1)
776 {
777 while (*cursor != '\n' && ++cursor != ceiling_addr)
778 ;
779
780 if (cursor != ceiling_addr)
781 {
782 if (--count == 0)
783 {
d5d57b92 784 immediate_quit = old_immediate_quit;
fa8ed3e0
RS
785 start_byte = start_byte + cursor - base + 1;
786 start = BYTE_TO_CHAR (start_byte);
787 TEMP_SET_PT_BOTH (start, start_byte);
788 return 0;
789 }
790 else
791 if (++cursor == ceiling_addr)
792 break;
793 }
794 else
795 break;
796 }
797 start_byte += cursor - base;
798 }
799 }
800 else
801 {
fa8ed3e0
RS
802 while (start_byte > limit_byte)
803 {
804 ceiling = BUFFER_FLOOR_OF (start_byte - 1);
805 ceiling = max (limit_byte, ceiling);
806 ceiling_addr = BYTE_POS_ADDR (ceiling) - 1;
807 base = (cursor = BYTE_POS_ADDR (start_byte - 1) + 1);
808 while (1)
809 {
810 while (--cursor != ceiling_addr && *cursor != '\n')
811 ;
812
813 if (cursor != ceiling_addr)
814 {
815 if (++count == 0)
816 {
d5d57b92 817 immediate_quit = old_immediate_quit;
fa8ed3e0
RS
818 /* Return the position AFTER the match we found. */
819 start_byte = start_byte + cursor - base + 1;
820 start = BYTE_TO_CHAR (start_byte);
821 TEMP_SET_PT_BOTH (start, start_byte);
822 return 0;
823 }
824 }
825 else
826 break;
827 }
828 /* Here we add 1 to compensate for the last decrement
829 of CURSOR, which took it past the valid range. */
830 start_byte += cursor - base + 1;
831 }
832 }
833
834 TEMP_SET_PT_BOTH (limit, limit_byte);
d5d57b92 835 immediate_quit = old_immediate_quit;
fa8ed3e0
RS
836
837 return count * direction;
63fa018d
RS
838}
839
ca1d1d23 840int
fa8ed3e0 841find_next_newline_no_quit (from, cnt)
ca1d1d23
JB
842 register int from, cnt;
843{
fa8ed3e0 844 return scan_buffer ('\n', from, 0, cnt, (int *) 0, 0);
9169c321
JB
845}
846
9169c321
JB
847/* Like find_next_newline, but returns position before the newline,
848 not after, and only search up to TO. This isn't just
849 find_next_newline (...)-1, because you might hit TO. */
fa8ed3e0 850
9169c321
JB
851int
852find_before_next_newline (from, to, cnt)
cbe0db0d 853 int from, to, cnt;
9169c321
JB
854{
855 int shortage;
856 int pos = scan_buffer ('\n', from, to, cnt, &shortage, 1);
857
858 if (shortage == 0)
859 pos--;
177c0ea7 860
9169c321 861 return pos;
ca1d1d23
JB
862}
863\f
ca1d1d23
JB
864/* Subroutines of Lisp buffer search functions. */
865
866static Lisp_Object
b819a390 867search_command (string, bound, noerror, count, direction, RE, posix)
ca1d1d23
JB
868 Lisp_Object string, bound, noerror, count;
869 int direction;
870 int RE;
b819a390 871 int posix;
ca1d1d23
JB
872{
873 register int np;
9f43ad85 874 int lim, lim_byte;
ca1d1d23
JB
875 int n = direction;
876
877 if (!NILP (count))
878 {
b7826503 879 CHECK_NUMBER (count);
ca1d1d23
JB
880 n *= XINT (count);
881 }
882
b7826503 883 CHECK_STRING (string);
ca1d1d23 884 if (NILP (bound))
9f43ad85
RS
885 {
886 if (n > 0)
887 lim = ZV, lim_byte = ZV_BYTE;
888 else
889 lim = BEGV, lim_byte = BEGV_BYTE;
890 }
ca1d1d23
JB
891 else
892 {
b7826503 893 CHECK_NUMBER_COERCE_MARKER (bound);
ca1d1d23 894 lim = XINT (bound);
6ec8bbd2 895 if (n > 0 ? lim < PT : lim > PT)
ca1d1d23
JB
896 error ("Invalid search bound (wrong side of point)");
897 if (lim > ZV)
9f43ad85 898 lim = ZV, lim_byte = ZV_BYTE;
588d2fd5 899 else if (lim < BEGV)
9f43ad85 900 lim = BEGV, lim_byte = BEGV_BYTE;
588d2fd5
KH
901 else
902 lim_byte = CHAR_TO_BYTE (lim);
ca1d1d23
JB
903 }
904
9f43ad85 905 np = search_buffer (string, PT, PT_BYTE, lim, lim_byte, n, RE,
ca1d1d23 906 (!NILP (current_buffer->case_fold_search)
facdc750 907 ? current_buffer->case_canon_table
3135e9fd 908 : Qnil),
ca1d1d23 909 (!NILP (current_buffer->case_fold_search)
facdc750 910 ? current_buffer->case_eqv_table
3135e9fd 911 : Qnil),
b819a390 912 posix);
ca1d1d23
JB
913 if (np <= 0)
914 {
915 if (NILP (noerror))
916 return signal_failure (string);
917 if (!EQ (noerror, Qt))
918 {
919 if (lim < BEGV || lim > ZV)
920 abort ();
9f43ad85 921 SET_PT_BOTH (lim, lim_byte);
a5f217b8
RS
922 return Qnil;
923#if 0 /* This would be clean, but maybe programs depend on
924 a value of nil here. */
481399bf 925 np = lim;
a5f217b8 926#endif
ca1d1d23 927 }
481399bf
RS
928 else
929 return Qnil;
ca1d1d23
JB
930 }
931
932 if (np < BEGV || np > ZV)
933 abort ();
934
935 SET_PT (np);
936
937 return make_number (np);
938}
939\f
fa8ed3e0
RS
940/* Return 1 if REGEXP it matches just one constant string. */
941
b6d6a51c
KH
942static int
943trivial_regexp_p (regexp)
944 Lisp_Object regexp;
945{
d5db4077
KR
946 int len = SBYTES (regexp);
947 unsigned char *s = SDATA (regexp);
b6d6a51c
KH
948 while (--len >= 0)
949 {
950 switch (*s++)
951 {
952 case '.': case '*': case '+': case '?': case '[': case '^': case '$':
953 return 0;
954 case '\\':
955 if (--len < 0)
956 return 0;
957 switch (*s++)
958 {
959 case '|': case '(': case ')': case '`': case '\'': case 'b':
960 case 'B': case '<': case '>': case 'w': case 'W': case 's':
29f89fe7 961 case 'S': case '=': case '{': case '}': case '_':
5679531d 962 case 'c': case 'C': /* for categoryspec and notcategoryspec */
866f60fd 963 case '1': case '2': case '3': case '4': case '5':
b6d6a51c
KH
964 case '6': case '7': case '8': case '9':
965 return 0;
966 }
967 }
968 }
969 return 1;
970}
971
ca325161 972/* Search for the n'th occurrence of STRING in the current buffer,
ca1d1d23 973 starting at position POS and stopping at position LIM,
b819a390 974 treating STRING as a literal string if RE is false or as
ca1d1d23
JB
975 a regular expression if RE is true.
976
977 If N is positive, searching is forward and LIM must be greater than POS.
978 If N is negative, searching is backward and LIM must be less than POS.
979
facdc750 980 Returns -x if x occurrences remain to be found (x > 0),
ca1d1d23 981 or else the position at the beginning of the Nth occurrence
b819a390
RS
982 (if searching backward) or the end (if searching forward).
983
984 POSIX is nonzero if we want full backtracking (POSIX style)
985 for this pattern. 0 means backtrack only enough to get a valid match. */
ca1d1d23 986
aff2ce94
RS
987#define TRANSLATE(out, trt, d) \
988do \
989 { \
990 if (! NILP (trt)) \
991 { \
992 Lisp_Object temp; \
993 temp = Faref (trt, make_number (d)); \
994 if (INTEGERP (temp)) \
995 out = XINT (temp); \
996 else \
997 out = d; \
998 } \
999 else \
1000 out = d; \
1001 } \
1002while (0)
facdc750 1003
b819a390 1004static int
9f43ad85
RS
1005search_buffer (string, pos, pos_byte, lim, lim_byte, n,
1006 RE, trt, inverse_trt, posix)
ca1d1d23
JB
1007 Lisp_Object string;
1008 int pos;
9f43ad85 1009 int pos_byte;
ca1d1d23 1010 int lim;
9f43ad85 1011 int lim_byte;
ca1d1d23
JB
1012 int n;
1013 int RE;
facdc750
RS
1014 Lisp_Object trt;
1015 Lisp_Object inverse_trt;
b819a390 1016 int posix;
ca1d1d23 1017{
d5db4077
KR
1018 int len = SCHARS (string);
1019 int len_byte = SBYTES (string);
facdc750 1020 register int i;
ca1d1d23 1021
7074fde6
FP
1022 if (running_asynch_code)
1023 save_search_regs ();
1024
a7e4cdde 1025 /* Searching 0 times means don't move. */
ca1d1d23 1026 /* Null string is found at starting position. */
a7e4cdde 1027 if (len == 0 || n == 0)
ca325161 1028 {
0353b28f 1029 set_search_regs (pos_byte, 0);
ca325161
RS
1030 return pos;
1031 }
3f57a499 1032
b6d6a51c 1033 if (RE && !trivial_regexp_p (string))
ca1d1d23 1034 {
facdc750
RS
1035 unsigned char *p1, *p2;
1036 int s1, s2;
487282dc
KH
1037 struct re_pattern_buffer *bufp;
1038
0c8533c6
RS
1039 bufp = compile_pattern (string, &search_regs, trt, posix,
1040 !NILP (current_buffer->enable_multibyte_characters));
ca1d1d23 1041
ca1d1d23
JB
1042 immediate_quit = 1; /* Quit immediately if user types ^G,
1043 because letting this function finish
1044 can take too long. */
1045 QUIT; /* Do a pending quit right away,
1046 to avoid paradoxical behavior */
1047 /* Get pointers and sizes of the two strings
1048 that make up the visible portion of the buffer. */
1049
1050 p1 = BEGV_ADDR;
fa8ed3e0 1051 s1 = GPT_BYTE - BEGV_BYTE;
ca1d1d23 1052 p2 = GAP_END_ADDR;
fa8ed3e0 1053 s2 = ZV_BYTE - GPT_BYTE;
ca1d1d23
JB
1054 if (s1 < 0)
1055 {
1056 p2 = p1;
fa8ed3e0 1057 s2 = ZV_BYTE - BEGV_BYTE;
ca1d1d23
JB
1058 s1 = 0;
1059 }
1060 if (s2 < 0)
1061 {
fa8ed3e0 1062 s1 = ZV_BYTE - BEGV_BYTE;
ca1d1d23
JB
1063 s2 = 0;
1064 }
8bb43c28 1065 re_match_object = Qnil;
177c0ea7 1066
ca1d1d23
JB
1067 while (n < 0)
1068 {
42db823b 1069 int val;
487282dc 1070 val = re_search_2 (bufp, (char *) p1, s1, (char *) p2, s2,
4996330b
KH
1071 pos_byte - BEGV_BYTE, lim_byte - pos_byte,
1072 &search_regs,
42db823b 1073 /* Don't allow match past current point */
4996330b 1074 pos_byte - BEGV_BYTE);
ca1d1d23 1075 if (val == -2)
b6d6a51c
KH
1076 {
1077 matcher_overflow ();
1078 }
ca1d1d23
JB
1079 if (val >= 0)
1080 {
26aff150 1081 pos_byte = search_regs.start[0] + BEGV_BYTE;
4746118a 1082 for (i = 0; i < search_regs.num_regs; i++)
ca1d1d23
JB
1083 if (search_regs.start[i] >= 0)
1084 {
fa8ed3e0
RS
1085 search_regs.start[i]
1086 = BYTE_TO_CHAR (search_regs.start[i] + BEGV_BYTE);
1087 search_regs.end[i]
1088 = BYTE_TO_CHAR (search_regs.end[i] + BEGV_BYTE);
ca1d1d23 1089 }
a3668d92 1090 XSETBUFFER (last_thing_searched, current_buffer);
ca1d1d23
JB
1091 /* Set pos to the new position. */
1092 pos = search_regs.start[0];
1093 }
1094 else
1095 {
1096 immediate_quit = 0;
1097 return (n);
1098 }
1099 n++;
1100 }
1101 while (n > 0)
1102 {
42db823b 1103 int val;
487282dc 1104 val = re_search_2 (bufp, (char *) p1, s1, (char *) p2, s2,
4996330b
KH
1105 pos_byte - BEGV_BYTE, lim_byte - pos_byte,
1106 &search_regs,
1107 lim_byte - BEGV_BYTE);
ca1d1d23 1108 if (val == -2)
b6d6a51c
KH
1109 {
1110 matcher_overflow ();
1111 }
ca1d1d23
JB
1112 if (val >= 0)
1113 {
26aff150 1114 pos_byte = search_regs.end[0] + BEGV_BYTE;
4746118a 1115 for (i = 0; i < search_regs.num_regs; i++)
ca1d1d23
JB
1116 if (search_regs.start[i] >= 0)
1117 {
fa8ed3e0
RS
1118 search_regs.start[i]
1119 = BYTE_TO_CHAR (search_regs.start[i] + BEGV_BYTE);
1120 search_regs.end[i]
1121 = BYTE_TO_CHAR (search_regs.end[i] + BEGV_BYTE);
ca1d1d23 1122 }
a3668d92 1123 XSETBUFFER (last_thing_searched, current_buffer);
ca1d1d23
JB
1124 pos = search_regs.end[0];
1125 }
1126 else
1127 {
1128 immediate_quit = 0;
1129 return (0 - n);
1130 }
1131 n--;
1132 }
1133 immediate_quit = 0;
1134 return (pos);
1135 }
1136 else /* non-RE case */
1137 {
facdc750
RS
1138 unsigned char *raw_pattern, *pat;
1139 int raw_pattern_size;
1140 int raw_pattern_size_byte;
1141 unsigned char *patbuf;
1142 int multibyte = !NILP (current_buffer->enable_multibyte_characters);
d5db4077 1143 unsigned char *base_pat = SDATA (string);
facdc750 1144 int charset_base = -1;
040272ce 1145 int boyer_moore_ok = 1;
facdc750
RS
1146
1147 /* MULTIBYTE says whether the text to be searched is multibyte.
1148 We must convert PATTERN to match that, or we will not really
1149 find things right. */
1150
1151 if (multibyte == STRING_MULTIBYTE (string))
1152 {
d5db4077
KR
1153 raw_pattern = (unsigned char *) SDATA (string);
1154 raw_pattern_size = SCHARS (string);
1155 raw_pattern_size_byte = SBYTES (string);
facdc750
RS
1156 }
1157 else if (multibyte)
1158 {
d5db4077 1159 raw_pattern_size = SCHARS (string);
facdc750 1160 raw_pattern_size_byte
d5db4077 1161 = count_size_as_multibyte (SDATA (string),
facdc750 1162 raw_pattern_size);
7276d3d8 1163 raw_pattern = (unsigned char *) alloca (raw_pattern_size_byte + 1);
d5db4077
KR
1164 copy_text (SDATA (string), raw_pattern,
1165 SCHARS (string), 0, 1);
facdc750
RS
1166 }
1167 else
1168 {
1169 /* Converting multibyte to single-byte.
1170
1171 ??? Perhaps this conversion should be done in a special way
1172 by subtracting nonascii-insert-offset from each non-ASCII char,
1173 so that only the multibyte chars which really correspond to
1174 the chosen single-byte character set can possibly match. */
d5db4077
KR
1175 raw_pattern_size = SCHARS (string);
1176 raw_pattern_size_byte = SCHARS (string);
7276d3d8 1177 raw_pattern = (unsigned char *) alloca (raw_pattern_size + 1);
d5db4077
KR
1178 copy_text (SDATA (string), raw_pattern,
1179 SBYTES (string), 1, 0);
facdc750
RS
1180 }
1181
1182 /* Copy and optionally translate the pattern. */
1183 len = raw_pattern_size;
1184 len_byte = raw_pattern_size_byte;
1185 patbuf = (unsigned char *) alloca (len_byte);
1186 pat = patbuf;
1187 base_pat = raw_pattern;
1188 if (multibyte)
1189 {
1190 while (--len >= 0)
1191 {
daaa6ed8 1192 unsigned char str[MAX_MULTIBYTE_LENGTH];
aff2ce94 1193 int c, translated, inverse;
facdc750
RS
1194 int in_charlen, charlen;
1195
1196 /* If we got here and the RE flag is set, it's because we're
1197 dealing with a regexp known to be trivial, so the backslash
1198 just quotes the next character. */
1199 if (RE && *base_pat == '\\')
1200 {
1201 len--;
1202 len_byte--;
1203 base_pat++;
1204 }
1205
1206 c = STRING_CHAR_AND_LENGTH (base_pat, len_byte, in_charlen);
040272ce 1207
facdc750 1208 /* Translate the character, if requested. */
aff2ce94 1209 TRANSLATE (translated, trt, c);
facdc750
RS
1210 /* If translation changed the byte-length, go back
1211 to the original character. */
daaa6ed8 1212 charlen = CHAR_STRING (translated, str);
facdc750
RS
1213 if (in_charlen != charlen)
1214 {
1215 translated = c;
daaa6ed8 1216 charlen = CHAR_STRING (c, str);
facdc750
RS
1217 }
1218
5ffaf437
RS
1219 /* If we are searching for something strange,
1220 an invalid multibyte code, don't use boyer-moore. */
1221 if (! ASCII_BYTE_P (translated)
1222 && (charlen == 1 /* 8bit code */
1223 || charlen != in_charlen /* invalid multibyte code */
1224 ))
1225 boyer_moore_ok = 0;
1226
aff2ce94
RS
1227 TRANSLATE (inverse, inverse_trt, c);
1228
facdc750
RS
1229 /* Did this char actually get translated?
1230 Would any other char get translated into it? */
aff2ce94 1231 if (translated != c || inverse != c)
facdc750
RS
1232 {
1233 /* Keep track of which character set row
1234 contains the characters that need translation. */
5ffaf437 1235 int charset_base_code = c & ~CHAR_FIELD3_MASK;
d2ac725b
KH
1236 int inverse_charset_base = inverse & ~CHAR_FIELD3_MASK;
1237
1238 if (charset_base_code != inverse_charset_base)
1239 boyer_moore_ok = 0;
1240 else if (charset_base == -1)
facdc750
RS
1241 charset_base = charset_base_code;
1242 else if (charset_base != charset_base_code)
1243 /* If two different rows appear, needing translation,
1244 then we cannot use boyer_moore search. */
040272ce 1245 boyer_moore_ok = 0;
aff2ce94 1246 }
facdc750
RS
1247
1248 /* Store this character into the translated pattern. */
1249 bcopy (str, pat, charlen);
1250 pat += charlen;
1251 base_pat += in_charlen;
1252 len_byte -= in_charlen;
1253 }
1254 }
1255 else
1256 {
040272ce
KH
1257 /* Unibyte buffer. */
1258 charset_base = 0;
facdc750
RS
1259 while (--len >= 0)
1260 {
040272ce 1261 int c, translated;
facdc750
RS
1262
1263 /* If we got here and the RE flag is set, it's because we're
1264 dealing with a regexp known to be trivial, so the backslash
1265 just quotes the next character. */
1266 if (RE && *base_pat == '\\')
1267 {
1268 len--;
1269 base_pat++;
1270 }
1271 c = *base_pat++;
aff2ce94 1272 TRANSLATE (translated, trt, c);
facdc750
RS
1273 *pat++ = translated;
1274 }
1275 }
1276
1277 len_byte = pat - patbuf;
1278 len = raw_pattern_size;
1279 pat = base_pat = patbuf;
1280
040272ce 1281 if (boyer_moore_ok)
facdc750 1282 return boyer_moore (n, pat, len, len_byte, trt, inverse_trt,
aff2ce94
RS
1283 pos, pos_byte, lim, lim_byte,
1284 charset_base);
facdc750
RS
1285 else
1286 return simple_search (n, pat, len, len_byte, trt,
1287 pos, pos_byte, lim, lim_byte);
1288 }
1289}
1290\f
1291/* Do a simple string search N times for the string PAT,
1292 whose length is LEN/LEN_BYTE,
1293 from buffer position POS/POS_BYTE until LIM/LIM_BYTE.
1294 TRT is the translation table.
f8bd51c4 1295
facdc750
RS
1296 Return the character position where the match is found.
1297 Otherwise, if M matches remained to be found, return -M.
f8bd51c4 1298
facdc750
RS
1299 This kind of search works regardless of what is in PAT and
1300 regardless of what is in TRT. It is used in cases where
1301 boyer_moore cannot work. */
1302
1303static int
1304simple_search (n, pat, len, len_byte, trt, pos, pos_byte, lim, lim_byte)
1305 int n;
1306 unsigned char *pat;
1307 int len, len_byte;
1308 Lisp_Object trt;
1309 int pos, pos_byte;
1310 int lim, lim_byte;
1311{
1312 int multibyte = ! NILP (current_buffer->enable_multibyte_characters);
ab228c24 1313 int forward = n > 0;
facdc750
RS
1314
1315 if (lim > pos && multibyte)
1316 while (n > 0)
1317 {
1318 while (1)
f8bd51c4 1319 {
facdc750
RS
1320 /* Try matching at position POS. */
1321 int this_pos = pos;
1322 int this_pos_byte = pos_byte;
1323 int this_len = len;
1324 int this_len_byte = len_byte;
1325 unsigned char *p = pat;
1326 if (pos + len > lim)
1327 goto stop;
1328
1329 while (this_len > 0)
1330 {
1331 int charlen, buf_charlen;
ab228c24 1332 int pat_ch, buf_ch;
facdc750 1333
ab228c24 1334 pat_ch = STRING_CHAR_AND_LENGTH (p, this_len_byte, charlen);
facdc750
RS
1335 buf_ch = STRING_CHAR_AND_LENGTH (BYTE_POS_ADDR (this_pos_byte),
1336 ZV_BYTE - this_pos_byte,
1337 buf_charlen);
aff2ce94 1338 TRANSLATE (buf_ch, trt, buf_ch);
facdc750
RS
1339
1340 if (buf_ch != pat_ch)
1341 break;
ab228c24
RS
1342
1343 this_len_byte -= charlen;
1344 this_len--;
1345 p += charlen;
1346
1347 this_pos_byte += buf_charlen;
1348 this_pos++;
facdc750
RS
1349 }
1350
1351 if (this_len == 0)
1352 {
1353 pos += len;
1354 pos_byte += len_byte;
1355 break;
1356 }
1357
1358 INC_BOTH (pos, pos_byte);
f8bd51c4 1359 }
facdc750
RS
1360
1361 n--;
1362 }
1363 else if (lim > pos)
1364 while (n > 0)
1365 {
1366 while (1)
f8bd51c4 1367 {
facdc750
RS
1368 /* Try matching at position POS. */
1369 int this_pos = pos;
1370 int this_len = len;
1371 unsigned char *p = pat;
1372
1373 if (pos + len > lim)
1374 goto stop;
1375
1376 while (this_len > 0)
1377 {
1378 int pat_ch = *p++;
1379 int buf_ch = FETCH_BYTE (this_pos);
aff2ce94 1380 TRANSLATE (buf_ch, trt, buf_ch);
facdc750
RS
1381
1382 if (buf_ch != pat_ch)
1383 break;
ab228c24
RS
1384
1385 this_len--;
1386 this_pos++;
facdc750
RS
1387 }
1388
1389 if (this_len == 0)
1390 {
1391 pos += len;
1392 break;
1393 }
1394
1395 pos++;
f8bd51c4 1396 }
facdc750
RS
1397
1398 n--;
1399 }
1400 /* Backwards search. */
1401 else if (lim < pos && multibyte)
1402 while (n < 0)
1403 {
1404 while (1)
f8bd51c4 1405 {
facdc750
RS
1406 /* Try matching at position POS. */
1407 int this_pos = pos - len;
1408 int this_pos_byte = pos_byte - len_byte;
1409 int this_len = len;
1410 int this_len_byte = len_byte;
1411 unsigned char *p = pat;
1412
1413 if (pos - len < lim)
1414 goto stop;
1415
1416 while (this_len > 0)
1417 {
1418 int charlen, buf_charlen;
ab228c24 1419 int pat_ch, buf_ch;
facdc750 1420
ab228c24 1421 pat_ch = STRING_CHAR_AND_LENGTH (p, this_len_byte, charlen);
facdc750
RS
1422 buf_ch = STRING_CHAR_AND_LENGTH (BYTE_POS_ADDR (this_pos_byte),
1423 ZV_BYTE - this_pos_byte,
1424 buf_charlen);
aff2ce94 1425 TRANSLATE (buf_ch, trt, buf_ch);
facdc750
RS
1426
1427 if (buf_ch != pat_ch)
1428 break;
ab228c24
RS
1429
1430 this_len_byte -= charlen;
1431 this_len--;
1432 p += charlen;
1433 this_pos_byte += buf_charlen;
1434 this_pos++;
facdc750
RS
1435 }
1436
1437 if (this_len == 0)
1438 {
1439 pos -= len;
1440 pos_byte -= len_byte;
1441 break;
1442 }
1443
1444 DEC_BOTH (pos, pos_byte);
f8bd51c4
KH
1445 }
1446
facdc750
RS
1447 n++;
1448 }
1449 else if (lim < pos)
1450 while (n < 0)
1451 {
1452 while (1)
b6d6a51c 1453 {
facdc750
RS
1454 /* Try matching at position POS. */
1455 int this_pos = pos - len;
1456 int this_len = len;
1457 unsigned char *p = pat;
1458
1459 if (pos - len < lim)
1460 goto stop;
1461
1462 while (this_len > 0)
1463 {
1464 int pat_ch = *p++;
1465 int buf_ch = FETCH_BYTE (this_pos);
aff2ce94 1466 TRANSLATE (buf_ch, trt, buf_ch);
facdc750
RS
1467
1468 if (buf_ch != pat_ch)
1469 break;
ab228c24
RS
1470 this_len--;
1471 this_pos++;
facdc750
RS
1472 }
1473
1474 if (this_len == 0)
b6d6a51c 1475 {
facdc750
RS
1476 pos -= len;
1477 break;
b6d6a51c 1478 }
facdc750
RS
1479
1480 pos--;
b6d6a51c 1481 }
facdc750
RS
1482
1483 n++;
b6d6a51c 1484 }
facdc750
RS
1485
1486 stop:
1487 if (n == 0)
aff2ce94 1488 {
ab228c24
RS
1489 if (forward)
1490 set_search_regs ((multibyte ? pos_byte : pos) - len_byte, len_byte);
1491 else
1492 set_search_regs (multibyte ? pos_byte : pos, len_byte);
aff2ce94
RS
1493
1494 return pos;
1495 }
facdc750
RS
1496 else if (n > 0)
1497 return -n;
1498 else
1499 return n;
1500}
1501\f
1502/* Do Boyer-Moore search N times for the string PAT,
1503 whose length is LEN/LEN_BYTE,
1504 from buffer position POS/POS_BYTE until LIM/LIM_BYTE.
1505 DIRECTION says which direction we search in.
1506 TRT and INVERSE_TRT are translation tables.
1507
1508 This kind of search works if all the characters in PAT that have
1509 nontrivial translation are the same aside from the last byte. This
1510 makes it possible to translate just the last byte of a character,
1511 and do so after just a simple test of the context.
1512
1513 If that criterion is not satisfied, do not call this function. */
1514
1515static int
1516boyer_moore (n, base_pat, len, len_byte, trt, inverse_trt,
aff2ce94 1517 pos, pos_byte, lim, lim_byte, charset_base)
facdc750
RS
1518 int n;
1519 unsigned char *base_pat;
1520 int len, len_byte;
1521 Lisp_Object trt;
1522 Lisp_Object inverse_trt;
1523 int pos, pos_byte;
1524 int lim, lim_byte;
aff2ce94 1525 int charset_base;
facdc750
RS
1526{
1527 int direction = ((n > 0) ? 1 : -1);
1528 register int dirlen;
a968f437 1529 int infinity, limit, stride_for_teases = 0;
facdc750
RS
1530 register int *BM_tab;
1531 int *BM_tab_base;
177c0ea7 1532 register unsigned char *cursor, *p_limit;
facdc750 1533 register int i, j;
cb6792d2 1534 unsigned char *pat, *pat_end;
facdc750
RS
1535 int multibyte = ! NILP (current_buffer->enable_multibyte_characters);
1536
1537 unsigned char simple_translate[0400];
6bbd7a29
GM
1538 int translate_prev_byte = 0;
1539 int translate_anteprev_byte = 0;
facdc750
RS
1540
1541#ifdef C_ALLOCA
1542 int BM_tab_space[0400];
1543 BM_tab = &BM_tab_space[0];
1544#else
1545 BM_tab = (int *) alloca (0400 * sizeof (int));
1546#endif
1547 /* The general approach is that we are going to maintain that we know */
1548 /* the first (closest to the present position, in whatever direction */
1549 /* we're searching) character that could possibly be the last */
1550 /* (furthest from present position) character of a valid match. We */
1551 /* advance the state of our knowledge by looking at that character */
1552 /* and seeing whether it indeed matches the last character of the */
1553 /* pattern. If it does, we take a closer look. If it does not, we */
1554 /* move our pointer (to putative last characters) as far as is */
1555 /* logically possible. This amount of movement, which I call a */
1556 /* stride, will be the length of the pattern if the actual character */
1557 /* appears nowhere in the pattern, otherwise it will be the distance */
1558 /* from the last occurrence of that character to the end of the */
1559 /* pattern. */
1560 /* As a coding trick, an enormous stride is coded into the table for */
1561 /* characters that match the last character. This allows use of only */
1562 /* a single test, a test for having gone past the end of the */
1563 /* permissible match region, to test for both possible matches (when */
1564 /* the stride goes past the end immediately) and failure to */
177c0ea7 1565 /* match (where you get nudged past the end one stride at a time). */
facdc750
RS
1566
1567 /* Here we make a "mickey mouse" BM table. The stride of the search */
1568 /* is determined only by the last character of the putative match. */
1569 /* If that character does not match, we will stride the proper */
1570 /* distance to propose a match that superimposes it on the last */
1571 /* instance of a character that matches it (per trt), or misses */
177c0ea7 1572 /* it entirely if there is none. */
facdc750
RS
1573
1574 dirlen = len_byte * direction;
1575 infinity = dirlen - (lim_byte + pos_byte + len_byte + len_byte) * direction;
cb6792d2
RS
1576
1577 /* Record position after the end of the pattern. */
1578 pat_end = base_pat + len_byte;
1579 /* BASE_PAT points to a character that we start scanning from.
1580 It is the first character in a forward search,
1581 the last character in a backward search. */
facdc750 1582 if (direction < 0)
cb6792d2
RS
1583 base_pat = pat_end - 1;
1584
facdc750
RS
1585 BM_tab_base = BM_tab;
1586 BM_tab += 0400;
1587 j = dirlen; /* to get it in a register */
1588 /* A character that does not appear in the pattern induces a */
1589 /* stride equal to the pattern length. */
1590 while (BM_tab_base != BM_tab)
1591 {
1592 *--BM_tab = j;
1593 *--BM_tab = j;
1594 *--BM_tab = j;
1595 *--BM_tab = j;
1596 }
1597
1598 /* We use this for translation, instead of TRT itself.
1599 We fill this in to handle the characters that actually
1600 occur in the pattern. Others don't matter anyway! */
1601 bzero (simple_translate, sizeof simple_translate);
1602 for (i = 0; i < 0400; i++)
1603 simple_translate[i] = i;
1604
1605 i = 0;
1606 while (i != infinity)
1607 {
cb6792d2 1608 unsigned char *ptr = base_pat + i;
facdc750
RS
1609 i += direction;
1610 if (i == dirlen)
1611 i = infinity;
1612 if (! NILP (trt))
ca1d1d23 1613 {
facdc750 1614 int ch;
aff2ce94 1615 int untranslated;
facdc750
RS
1616 int this_translated = 1;
1617
1618 if (multibyte
cb6792d2
RS
1619 /* Is *PTR the last byte of a character? */
1620 && (pat_end - ptr == 1 || CHAR_HEAD_P (ptr[1])))
ca1d1d23 1621 {
facdc750
RS
1622 unsigned char *charstart = ptr;
1623 while (! CHAR_HEAD_P (*charstart))
1624 charstart--;
aff2ce94 1625 untranslated = STRING_CHAR (charstart, ptr - charstart + 1);
6397418a 1626 if (charset_base == (untranslated & ~CHAR_FIELD3_MASK))
facdc750 1627 {
ab228c24 1628 TRANSLATE (ch, trt, untranslated);
aff2ce94
RS
1629 if (! CHAR_HEAD_P (*ptr))
1630 {
1631 translate_prev_byte = ptr[-1];
1632 if (! CHAR_HEAD_P (translate_prev_byte))
1633 translate_anteprev_byte = ptr[-2];
1634 }
facdc750 1635 }
aff2ce94 1636 else
ab228c24
RS
1637 {
1638 this_translated = 0;
1639 ch = *ptr;
1640 }
ca1d1d23 1641 }
facdc750 1642 else if (!multibyte)
aff2ce94 1643 TRANSLATE (ch, trt, *ptr);
ca1d1d23
JB
1644 else
1645 {
facdc750
RS
1646 ch = *ptr;
1647 this_translated = 0;
ca1d1d23 1648 }
facdc750 1649
ab228c24
RS
1650 if (ch > 0400)
1651 j = ((unsigned char) ch) | 0200;
1652 else
1653 j = (unsigned char) ch;
1654
facdc750
RS
1655 if (i == infinity)
1656 stride_for_teases = BM_tab[j];
ab228c24 1657
facdc750
RS
1658 BM_tab[j] = dirlen - i;
1659 /* A translation table is accompanied by its inverse -- see */
177c0ea7 1660 /* comment following downcase_table for details */
facdc750 1661 if (this_translated)
ab228c24
RS
1662 {
1663 int starting_ch = ch;
1664 int starting_j = j;
1665 while (1)
1666 {
1667 TRANSLATE (ch, inverse_trt, ch);
1668 if (ch > 0400)
1669 j = ((unsigned char) ch) | 0200;
1670 else
1671 j = (unsigned char) ch;
1672
1673 /* For all the characters that map into CH,
1674 set up simple_translate to map the last byte
1675 into STARTING_J. */
1676 simple_translate[j] = starting_j;
1677 if (ch == starting_ch)
1678 break;
1679 BM_tab[j] = dirlen - i;
1680 }
1681 }
facdc750
RS
1682 }
1683 else
1684 {
1685 j = *ptr;
1686
1687 if (i == infinity)
1688 stride_for_teases = BM_tab[j];
1689 BM_tab[j] = dirlen - i;
ca1d1d23 1690 }
facdc750
RS
1691 /* stride_for_teases tells how much to stride if we get a */
1692 /* match on the far character but are subsequently */
1693 /* disappointed, by recording what the stride would have been */
1694 /* for that character if the last character had been */
1695 /* different. */
1696 }
1697 infinity = dirlen - infinity;
1698 pos_byte += dirlen - ((direction > 0) ? direction : 0);
1699 /* loop invariant - POS_BYTE points at where last char (first
1700 char if reverse) of pattern would align in a possible match. */
1701 while (n != 0)
1702 {
1703 int tail_end;
1704 unsigned char *tail_end_ptr;
1705
1706 /* It's been reported that some (broken) compiler thinks that
1707 Boolean expressions in an arithmetic context are unsigned.
1708 Using an explicit ?1:0 prevents this. */
1709 if ((lim_byte - pos_byte - ((direction > 0) ? 1 : 0)) * direction
1710 < 0)
1711 return (n * (0 - direction));
1712 /* First we do the part we can by pointers (maybe nothing) */
1713 QUIT;
1714 pat = base_pat;
1715 limit = pos_byte - dirlen + direction;
67ce527d
KH
1716 if (direction > 0)
1717 {
1718 limit = BUFFER_CEILING_OF (limit);
1719 /* LIMIT is now the last (not beyond-last!) value POS_BYTE
1720 can take on without hitting edge of buffer or the gap. */
1721 limit = min (limit, pos_byte + 20000);
1722 limit = min (limit, lim_byte - 1);
1723 }
1724 else
1725 {
1726 limit = BUFFER_FLOOR_OF (limit);
1727 /* LIMIT is now the last (not beyond-last!) value POS_BYTE
1728 can take on without hitting edge of buffer or the gap. */
1729 limit = max (limit, pos_byte - 20000);
1730 limit = max (limit, lim_byte);
1731 }
facdc750
RS
1732 tail_end = BUFFER_CEILING_OF (pos_byte) + 1;
1733 tail_end_ptr = BYTE_POS_ADDR (tail_end);
1734
1735 if ((limit - pos_byte) * direction > 20)
ca1d1d23 1736 {
facdc750
RS
1737 unsigned char *p2;
1738
1739 p_limit = BYTE_POS_ADDR (limit);
1740 p2 = (cursor = BYTE_POS_ADDR (pos_byte));
1741 /* In this loop, pos + cursor - p2 is the surrogate for pos */
1742 while (1) /* use one cursor setting as long as i can */
ca1d1d23 1743 {
facdc750 1744 if (direction > 0) /* worth duplicating */
ca1d1d23 1745 {
facdc750
RS
1746 /* Use signed comparison if appropriate
1747 to make cursor+infinity sure to be > p_limit.
1748 Assuming that the buffer lies in a range of addresses
1749 that are all "positive" (as ints) or all "negative",
1750 either kind of comparison will work as long
1751 as we don't step by infinity. So pick the kind
1752 that works when we do step by infinity. */
1753 if ((EMACS_INT) (p_limit + infinity) > (EMACS_INT) p_limit)
1754 while ((EMACS_INT) cursor <= (EMACS_INT) p_limit)
1755 cursor += BM_tab[*cursor];
ca1d1d23 1756 else
facdc750
RS
1757 while ((EMACS_UINT) cursor <= (EMACS_UINT) p_limit)
1758 cursor += BM_tab[*cursor];
1759 }
1760 else
1761 {
1762 if ((EMACS_INT) (p_limit + infinity) < (EMACS_INT) p_limit)
1763 while ((EMACS_INT) cursor >= (EMACS_INT) p_limit)
1764 cursor += BM_tab[*cursor];
1765 else
1766 while ((EMACS_UINT) cursor >= (EMACS_UINT) p_limit)
1767 cursor += BM_tab[*cursor];
1768 }
ca1d1d23 1769/* If you are here, cursor is beyond the end of the searched region. */
facdc750
RS
1770/* This can happen if you match on the far character of the pattern, */
1771/* because the "stride" of that character is infinity, a number able */
1772/* to throw you well beyond the end of the search. It can also */
1773/* happen if you fail to match within the permitted region and would */
1774/* otherwise try a character beyond that region */
1775 if ((cursor - p_limit) * direction <= len_byte)
1776 break; /* a small overrun is genuine */
1777 cursor -= infinity; /* large overrun = hit */
1778 i = dirlen - direction;
1779 if (! NILP (trt))
1780 {
1781 while ((i -= direction) + direction != 0)
ca1d1d23 1782 {
facdc750
RS
1783 int ch;
1784 cursor -= direction;
1785 /* Translate only the last byte of a character. */
1786 if (! multibyte
1787 || ((cursor == tail_end_ptr
1788 || CHAR_HEAD_P (cursor[1]))
1789 && (CHAR_HEAD_P (cursor[0])
1790 || (translate_prev_byte == cursor[-1]
1791 && (CHAR_HEAD_P (translate_prev_byte)
1792 || translate_anteprev_byte == cursor[-2])))))
1793 ch = simple_translate[*cursor];
1794 else
1795 ch = *cursor;
1796 if (pat[i] != ch)
1797 break;
ca1d1d23 1798 }
facdc750
RS
1799 }
1800 else
1801 {
1802 while ((i -= direction) + direction != 0)
ca1d1d23 1803 {
facdc750
RS
1804 cursor -= direction;
1805 if (pat[i] != *cursor)
1806 break;
ca1d1d23 1807 }
facdc750
RS
1808 }
1809 cursor += dirlen - i - direction; /* fix cursor */
1810 if (i + direction == 0)
1811 {
1812 int position;
0c8533c6 1813
facdc750 1814 cursor -= direction;
1113d9db 1815
facdc750
RS
1816 position = pos_byte + cursor - p2 + ((direction > 0)
1817 ? 1 - len_byte : 0);
1818 set_search_regs (position, len_byte);
ca325161 1819
facdc750
RS
1820 if ((n -= direction) != 0)
1821 cursor += dirlen; /* to resume search */
ca1d1d23 1822 else
facdc750
RS
1823 return ((direction > 0)
1824 ? search_regs.end[0] : search_regs.start[0]);
ca1d1d23 1825 }
facdc750
RS
1826 else
1827 cursor += stride_for_teases; /* <sigh> we lose - */
ca1d1d23 1828 }
facdc750
RS
1829 pos_byte += cursor - p2;
1830 }
1831 else
1832 /* Now we'll pick up a clump that has to be done the hard */
1833 /* way because it covers a discontinuity */
1834 {
1835 limit = ((direction > 0)
1836 ? BUFFER_CEILING_OF (pos_byte - dirlen + 1)
1837 : BUFFER_FLOOR_OF (pos_byte - dirlen - 1));
1838 limit = ((direction > 0)
1839 ? min (limit + len_byte, lim_byte - 1)
1840 : max (limit - len_byte, lim_byte));
1841 /* LIMIT is now the last value POS_BYTE can have
1842 and still be valid for a possible match. */
1843 while (1)
ca1d1d23 1844 {
facdc750
RS
1845 /* This loop can be coded for space rather than */
1846 /* speed because it will usually run only once. */
1847 /* (the reach is at most len + 21, and typically */
177c0ea7 1848 /* does not exceed len) */
facdc750
RS
1849 while ((limit - pos_byte) * direction >= 0)
1850 pos_byte += BM_tab[FETCH_BYTE (pos_byte)];
1851 /* now run the same tests to distinguish going off the */
1852 /* end, a match or a phony match. */
1853 if ((pos_byte - limit) * direction <= len_byte)
1854 break; /* ran off the end */
1855 /* Found what might be a match.
1856 Set POS_BYTE back to last (first if reverse) pos. */
1857 pos_byte -= infinity;
1858 i = dirlen - direction;
1859 while ((i -= direction) + direction != 0)
ca1d1d23 1860 {
facdc750
RS
1861 int ch;
1862 unsigned char *ptr;
1863 pos_byte -= direction;
1864 ptr = BYTE_POS_ADDR (pos_byte);
1865 /* Translate only the last byte of a character. */
1866 if (! multibyte
1867 || ((ptr == tail_end_ptr
1868 || CHAR_HEAD_P (ptr[1]))
1869 && (CHAR_HEAD_P (ptr[0])
1870 || (translate_prev_byte == ptr[-1]
1871 && (CHAR_HEAD_P (translate_prev_byte)
1872 || translate_anteprev_byte == ptr[-2])))))
1873 ch = simple_translate[*ptr];
1874 else
1875 ch = *ptr;
1876 if (pat[i] != ch)
1877 break;
1878 }
1879 /* Above loop has moved POS_BYTE part or all the way
1880 back to the first pos (last pos if reverse).
1881 Set it once again at the last (first if reverse) char. */
1882 pos_byte += dirlen - i- direction;
1883 if (i + direction == 0)
1884 {
1885 int position;
1886 pos_byte -= direction;
1113d9db 1887
facdc750 1888 position = pos_byte + ((direction > 0) ? 1 - len_byte : 0);
0c8533c6 1889
facdc750 1890 set_search_regs (position, len_byte);
ca325161 1891
facdc750
RS
1892 if ((n -= direction) != 0)
1893 pos_byte += dirlen; /* to resume search */
ca1d1d23 1894 else
facdc750
RS
1895 return ((direction > 0)
1896 ? search_regs.end[0] : search_regs.start[0]);
ca1d1d23 1897 }
facdc750
RS
1898 else
1899 pos_byte += stride_for_teases;
1900 }
1901 }
1902 /* We have done one clump. Can we continue? */
1903 if ((lim_byte - pos_byte) * direction < 0)
1904 return ((0 - n) * direction);
ca1d1d23 1905 }
facdc750 1906 return BYTE_TO_CHAR (pos_byte);
ca1d1d23 1907}
ca325161 1908
fa8ed3e0 1909/* Record beginning BEG_BYTE and end BEG_BYTE + NBYTES
a7e4cdde
RS
1910 for the overall match just found in the current buffer.
1911 Also clear out the match data for registers 1 and up. */
ca325161
RS
1912
1913static void
fa8ed3e0
RS
1914set_search_regs (beg_byte, nbytes)
1915 int beg_byte, nbytes;
ca325161 1916{
a7e4cdde
RS
1917 int i;
1918
ca325161
RS
1919 /* Make sure we have registers in which to store
1920 the match position. */
1921 if (search_regs.num_regs == 0)
1922 {
2d4a771a
RS
1923 search_regs.start = (regoff_t *) xmalloc (2 * sizeof (regoff_t));
1924 search_regs.end = (regoff_t *) xmalloc (2 * sizeof (regoff_t));
487282dc 1925 search_regs.num_regs = 2;
ca325161
RS
1926 }
1927
a7e4cdde
RS
1928 /* Clear out the other registers. */
1929 for (i = 1; i < search_regs.num_regs; i++)
1930 {
1931 search_regs.start[i] = -1;
1932 search_regs.end[i] = -1;
1933 }
1934
fa8ed3e0
RS
1935 search_regs.start[0] = BYTE_TO_CHAR (beg_byte);
1936 search_regs.end[0] = BYTE_TO_CHAR (beg_byte + nbytes);
a3668d92 1937 XSETBUFFER (last_thing_searched, current_buffer);
ca325161 1938}
ca1d1d23
JB
1939\f
1940/* Given a string of words separated by word delimiters,
1941 compute a regexp that matches those exact words
1942 separated by arbitrary punctuation. */
1943
1944static Lisp_Object
1945wordify (string)
1946 Lisp_Object string;
1947{
1948 register unsigned char *p, *o;
0c8533c6 1949 register int i, i_byte, len, punct_count = 0, word_count = 0;
ca1d1d23 1950 Lisp_Object val;
0c8533c6
RS
1951 int prev_c = 0;
1952 int adjust;
ca1d1d23 1953
b7826503 1954 CHECK_STRING (string);
d5db4077
KR
1955 p = SDATA (string);
1956 len = SCHARS (string);
ca1d1d23 1957
0c8533c6
RS
1958 for (i = 0, i_byte = 0; i < len; )
1959 {
1960 int c;
177c0ea7 1961
eb99a8dd 1962 FETCH_STRING_CHAR_ADVANCE (c, string, i, i_byte);
0c8533c6
RS
1963
1964 if (SYNTAX (c) != Sword)
1965 {
1966 punct_count++;
1967 if (i > 0 && SYNTAX (prev_c) == Sword)
1968 word_count++;
1969 }
ca1d1d23 1970
0c8533c6
RS
1971 prev_c = c;
1972 }
1973
1974 if (SYNTAX (prev_c) == Sword)
1975 word_count++;
1976 if (!word_count)
b07b65aa 1977 return empty_string;
0c8533c6
RS
1978
1979 adjust = - punct_count + 5 * (word_count - 1) + 4;
8a2df937
RS
1980 if (STRING_MULTIBYTE (string))
1981 val = make_uninit_multibyte_string (len + adjust,
d5db4077 1982 SBYTES (string)
8a2df937
RS
1983 + adjust);
1984 else
1985 val = make_uninit_string (len + adjust);
ca1d1d23 1986
d5db4077 1987 o = SDATA (val);
ca1d1d23
JB
1988 *o++ = '\\';
1989 *o++ = 'b';
1e9582d4 1990 prev_c = 0;
ca1d1d23 1991
1e9582d4
RS
1992 for (i = 0, i_byte = 0; i < len; )
1993 {
1994 int c;
1995 int i_byte_orig = i_byte;
177c0ea7 1996
eb99a8dd 1997 FETCH_STRING_CHAR_ADVANCE (c, string, i, i_byte);
1e9582d4
RS
1998
1999 if (SYNTAX (c) == Sword)
2000 {
5d69fe10 2001 bcopy (SDATA (string) + i_byte_orig, o,
1e9582d4
RS
2002 i_byte - i_byte_orig);
2003 o += i_byte - i_byte_orig;
2004 }
2005 else if (i > 0 && SYNTAX (prev_c) == Sword && --word_count)
2006 {
2007 *o++ = '\\';
2008 *o++ = 'W';
2009 *o++ = '\\';
2010 *o++ = 'W';
2011 *o++ = '*';
2012 }
2013
2014 prev_c = c;
2015 }
ca1d1d23
JB
2016
2017 *o++ = '\\';
2018 *o++ = 'b';
2019
2020 return val;
2021}
2022\f
2023DEFUN ("search-backward", Fsearch_backward, Ssearch_backward, 1, 4,
8c1a1077
PJ
2024 "MSearch backward: ",
2025 doc: /* Search backward from point for STRING.
2026Set point to the beginning of the occurrence found, and return point.
2027An optional second argument bounds the search; it is a buffer position.
2028The match found must not extend before that position.
2029Optional third argument, if t, means if fail just return nil (no error).
2030 If not nil and not t, position at limit of search and return nil.
2031Optional fourth argument is repeat count--search for successive occurrences.
2032
2033Search case-sensitivity is determined by the value of the variable
2034`case-fold-search', which see.
2035
2036See also the functions `match-beginning', `match-end' and `replace-match'. */)
2037 (string, bound, noerror, count)
ca1d1d23
JB
2038 Lisp_Object string, bound, noerror, count;
2039{
b819a390 2040 return search_command (string, bound, noerror, count, -1, 0, 0);
ca1d1d23
JB
2041}
2042
6af43974 2043DEFUN ("search-forward", Fsearch_forward, Ssearch_forward, 1, 4, "MSearch: ",
8c1a1077
PJ
2044 doc: /* Search forward from point for STRING.
2045Set point to the end of the occurrence found, and return point.
2046An optional second argument bounds the search; it is a buffer position.
2047The match found must not extend after that position. nil is equivalent
2048 to (point-max).
2049Optional third argument, if t, means if fail just return nil (no error).
2050 If not nil and not t, move to limit of search and return nil.
2051Optional fourth argument is repeat count--search for successive occurrences.
2052
2053Search case-sensitivity is determined by the value of the variable
2054`case-fold-search', which see.
2055
2056See also the functions `match-beginning', `match-end' and `replace-match'. */)
2057 (string, bound, noerror, count)
ca1d1d23
JB
2058 Lisp_Object string, bound, noerror, count;
2059{
b819a390 2060 return search_command (string, bound, noerror, count, 1, 0, 0);
ca1d1d23
JB
2061}
2062
2063DEFUN ("word-search-backward", Fword_search_backward, Sword_search_backward, 1, 4,
8c1a1077
PJ
2064 "sWord search backward: ",
2065 doc: /* Search backward from point for STRING, ignoring differences in punctuation.
2066Set point to the beginning of the occurrence found, and return point.
2067An optional second argument bounds the search; it is a buffer position.
2068The match found must not extend before that position.
2069Optional third argument, if t, means if fail just return nil (no error).
2070 If not nil and not t, move to limit of search and return nil.
2071Optional fourth argument is repeat count--search for successive occurrences. */)
2072 (string, bound, noerror, count)
ca1d1d23
JB
2073 Lisp_Object string, bound, noerror, count;
2074{
b819a390 2075 return search_command (wordify (string), bound, noerror, count, -1, 1, 0);
ca1d1d23
JB
2076}
2077
2078DEFUN ("word-search-forward", Fword_search_forward, Sword_search_forward, 1, 4,
8c1a1077
PJ
2079 "sWord search: ",
2080 doc: /* Search forward from point for STRING, ignoring differences in punctuation.
2081Set point to the end of the occurrence found, and return point.
2082An optional second argument bounds the search; it is a buffer position.
2083The match found must not extend after that position.
2084Optional third argument, if t, means if fail just return nil (no error).
2085 If not nil and not t, move to limit of search and return nil.
2086Optional fourth argument is repeat count--search for successive occurrences. */)
2087 (string, bound, noerror, count)
ca1d1d23
JB
2088 Lisp_Object string, bound, noerror, count;
2089{
b819a390 2090 return search_command (wordify (string), bound, noerror, count, 1, 1, 0);
ca1d1d23
JB
2091}
2092
2093DEFUN ("re-search-backward", Fre_search_backward, Sre_search_backward, 1, 4,
8c1a1077
PJ
2094 "sRE search backward: ",
2095 doc: /* Search backward from point for match for regular expression REGEXP.
2096Set point to the beginning of the match, and return point.
2097The match found is the one starting last in the buffer
2098and yet ending before the origin of the search.
2099An optional second argument bounds the search; it is a buffer position.
2100The match found must start at or after that position.
2101Optional third argument, if t, means if fail just return nil (no error).
2102 If not nil and not t, move to limit of search and return nil.
2103Optional fourth argument is repeat count--search for successive occurrences.
2104See also the functions `match-beginning', `match-end', `match-string',
2105and `replace-match'. */)
2106 (regexp, bound, noerror, count)
19c0a730 2107 Lisp_Object regexp, bound, noerror, count;
ca1d1d23 2108{
b819a390 2109 return search_command (regexp, bound, noerror, count, -1, 1, 0);
ca1d1d23
JB
2110}
2111
2112DEFUN ("re-search-forward", Fre_search_forward, Sre_search_forward, 1, 4,
8c1a1077
PJ
2113 "sRE search: ",
2114 doc: /* Search forward from point for regular expression REGEXP.
2115Set point to the end of the occurrence found, and return point.
2116An optional second argument bounds the search; it is a buffer position.
2117The match found must not extend after that position.
2118Optional third argument, if t, means if fail just return nil (no error).
2119 If not nil and not t, move to limit of search and return nil.
2120Optional fourth argument is repeat count--search for successive occurrences.
2121See also the functions `match-beginning', `match-end', `match-string',
2122and `replace-match'. */)
2123 (regexp, bound, noerror, count)
19c0a730 2124 Lisp_Object regexp, bound, noerror, count;
ca1d1d23 2125{
b819a390
RS
2126 return search_command (regexp, bound, noerror, count, 1, 1, 0);
2127}
2128
2129DEFUN ("posix-search-backward", Fposix_search_backward, Sposix_search_backward, 1, 4,
8c1a1077
PJ
2130 "sPosix search backward: ",
2131 doc: /* Search backward from point for match for regular expression REGEXP.
2132Find the longest match in accord with Posix regular expression rules.
2133Set point to the beginning of the match, and return point.
2134The match found is the one starting last in the buffer
2135and yet ending before the origin of the search.
2136An optional second argument bounds the search; it is a buffer position.
2137The match found must start at or after that position.
2138Optional third argument, if t, means if fail just return nil (no error).
2139 If not nil and not t, move to limit of search and return nil.
2140Optional fourth argument is repeat count--search for successive occurrences.
2141See also the functions `match-beginning', `match-end', `match-string',
2142and `replace-match'. */)
2143 (regexp, bound, noerror, count)
b819a390
RS
2144 Lisp_Object regexp, bound, noerror, count;
2145{
2146 return search_command (regexp, bound, noerror, count, -1, 1, 1);
2147}
2148
2149DEFUN ("posix-search-forward", Fposix_search_forward, Sposix_search_forward, 1, 4,
8c1a1077
PJ
2150 "sPosix search: ",
2151 doc: /* Search forward from point for regular expression REGEXP.
2152Find the longest match in accord with Posix regular expression rules.
2153Set point to the end of the occurrence found, and return point.
2154An optional second argument bounds the search; it is a buffer position.
2155The match found must not extend after that position.
2156Optional third argument, if t, means if fail just return nil (no error).
2157 If not nil and not t, move to limit of search and return nil.
2158Optional fourth argument is repeat count--search for successive occurrences.
2159See also the functions `match-beginning', `match-end', `match-string',
2160and `replace-match'. */)
2161 (regexp, bound, noerror, count)
b819a390
RS
2162 Lisp_Object regexp, bound, noerror, count;
2163{
2164 return search_command (regexp, bound, noerror, count, 1, 1, 1);
ca1d1d23
JB
2165}
2166\f
d7a5ad5f 2167DEFUN ("replace-match", Freplace_match, Sreplace_match, 1, 5, 0,
8c1a1077 2168 doc: /* Replace text matched by last search with NEWTEXT.
4dd0c271
RS
2169Leave point at the end of the replacement text.
2170
8c1a1077
PJ
2171If second arg FIXEDCASE is non-nil, do not alter case of replacement text.
2172Otherwise maybe capitalize the whole text, or maybe just word initials,
2173based on the replaced text.
2174If the replaced text has only capital letters
2175and has at least one multiletter word, convert NEWTEXT to all caps.
4dd0c271
RS
2176Otherwise if all words are capitalized in the replaced text,
2177capitalize each word in NEWTEXT.
8c1a1077
PJ
2178
2179If third arg LITERAL is non-nil, insert NEWTEXT literally.
2180Otherwise treat `\\' as special:
2181 `\\&' in NEWTEXT means substitute original matched text.
2182 `\\N' means substitute what matched the Nth `\\(...\\)'.
2183 If Nth parens didn't match, substitute nothing.
2184 `\\\\' means insert one `\\'.
4dd0c271
RS
2185Case conversion does not apply to these substitutions.
2186
8c1a1077 2187FIXEDCASE and LITERAL are optional arguments.
8c1a1077
PJ
2188
2189The optional fourth argument STRING can be a string to modify.
2190This is meaningful when the previous match was done against STRING,
2191using `string-match'. When used this way, `replace-match'
2192creates and returns a new string made by copying STRING and replacing
2193the part of STRING that was matched.
2194
2195The optional fifth argument SUBEXP specifies a subexpression;
2196it says to replace just that subexpression with NEWTEXT,
2197rather than replacing the entire matched text.
2198This is, in a vague sense, the inverse of using `\\N' in NEWTEXT;
2199`\\N' copies subexp N into NEWTEXT, but using N as SUBEXP puts
2200NEWTEXT in place of subexp N.
2201This is useful only after a regular expression search or match,
2202since only regular expressions have distinguished subexpressions. */)
2203 (newtext, fixedcase, literal, string, subexp)
d7a5ad5f 2204 Lisp_Object newtext, fixedcase, literal, string, subexp;
ca1d1d23
JB
2205{
2206 enum { nochange, all_caps, cap_initial } case_action;
ac3b28b1 2207 register int pos, pos_byte;
ca1d1d23 2208 int some_multiletter_word;
97832bd0 2209 int some_lowercase;
73dc8771 2210 int some_uppercase;
208767c3 2211 int some_nonuppercase_initial;
ca1d1d23 2212 register int c, prevc;
d7a5ad5f 2213 int sub;
3e18eecf 2214 int opoint, newpoint;
ca1d1d23 2215
b7826503 2216 CHECK_STRING (newtext);
ca1d1d23 2217
080c45fd 2218 if (! NILP (string))
b7826503 2219 CHECK_STRING (string);
080c45fd 2220
ca1d1d23
JB
2221 case_action = nochange; /* We tried an initialization */
2222 /* but some C compilers blew it */
4746118a
JB
2223
2224 if (search_regs.num_regs <= 0)
2225 error ("replace-match called before any match found");
2226
d7a5ad5f
RS
2227 if (NILP (subexp))
2228 sub = 0;
2229 else
2230 {
b7826503 2231 CHECK_NUMBER (subexp);
d7a5ad5f
RS
2232 sub = XINT (subexp);
2233 if (sub < 0 || sub >= search_regs.num_regs)
2234 args_out_of_range (subexp, make_number (search_regs.num_regs));
2235 }
2236
080c45fd
RS
2237 if (NILP (string))
2238 {
d7a5ad5f
RS
2239 if (search_regs.start[sub] < BEGV
2240 || search_regs.start[sub] > search_regs.end[sub]
2241 || search_regs.end[sub] > ZV)
2242 args_out_of_range (make_number (search_regs.start[sub]),
2243 make_number (search_regs.end[sub]));
080c45fd
RS
2244 }
2245 else
2246 {
d7a5ad5f
RS
2247 if (search_regs.start[sub] < 0
2248 || search_regs.start[sub] > search_regs.end[sub]
d5db4077 2249 || search_regs.end[sub] > SCHARS (string))
d7a5ad5f
RS
2250 args_out_of_range (make_number (search_regs.start[sub]),
2251 make_number (search_regs.end[sub]));
080c45fd 2252 }
ca1d1d23
JB
2253
2254 if (NILP (fixedcase))
2255 {
2256 /* Decide how to casify by examining the matched text. */
ac3b28b1 2257 int last;
ca1d1d23 2258
ac3b28b1
KH
2259 pos = search_regs.start[sub];
2260 last = search_regs.end[sub];
fa8ed3e0
RS
2261
2262 if (NILP (string))
ac3b28b1 2263 pos_byte = CHAR_TO_BYTE (pos);
fa8ed3e0 2264 else
ac3b28b1 2265 pos_byte = string_char_to_byte (string, pos);
fa8ed3e0 2266
ca1d1d23
JB
2267 prevc = '\n';
2268 case_action = all_caps;
2269
2270 /* some_multiletter_word is set nonzero if any original word
2271 is more than one letter long. */
2272 some_multiletter_word = 0;
97832bd0 2273 some_lowercase = 0;
208767c3 2274 some_nonuppercase_initial = 0;
73dc8771 2275 some_uppercase = 0;
ca1d1d23 2276
ac3b28b1 2277 while (pos < last)
ca1d1d23 2278 {
080c45fd 2279 if (NILP (string))
ac3b28b1
KH
2280 {
2281 c = FETCH_CHAR (pos_byte);
2282 INC_BOTH (pos, pos_byte);
2283 }
080c45fd 2284 else
ac3b28b1 2285 FETCH_STRING_CHAR_ADVANCE (c, string, pos, pos_byte);
080c45fd 2286
ca1d1d23
JB
2287 if (LOWERCASEP (c))
2288 {
2289 /* Cannot be all caps if any original char is lower case */
2290
97832bd0 2291 some_lowercase = 1;
ca1d1d23 2292 if (SYNTAX (prevc) != Sword)
208767c3 2293 some_nonuppercase_initial = 1;
ca1d1d23
JB
2294 else
2295 some_multiletter_word = 1;
2296 }
2297 else if (!NOCASEP (c))
2298 {
73dc8771 2299 some_uppercase = 1;
97832bd0 2300 if (SYNTAX (prevc) != Sword)
c4d460ce 2301 ;
97832bd0 2302 else
ca1d1d23
JB
2303 some_multiletter_word = 1;
2304 }
208767c3
RS
2305 else
2306 {
2307 /* If the initial is a caseless word constituent,
2308 treat that like a lowercase initial. */
2309 if (SYNTAX (prevc) != Sword)
2310 some_nonuppercase_initial = 1;
2311 }
ca1d1d23
JB
2312
2313 prevc = c;
2314 }
2315
97832bd0
RS
2316 /* Convert to all caps if the old text is all caps
2317 and has at least one multiletter word. */
2318 if (! some_lowercase && some_multiletter_word)
2319 case_action = all_caps;
c4d460ce 2320 /* Capitalize each word, if the old text has all capitalized words. */
208767c3 2321 else if (!some_nonuppercase_initial && some_multiletter_word)
ca1d1d23 2322 case_action = cap_initial;
208767c3 2323 else if (!some_nonuppercase_initial && some_uppercase)
73dc8771
KH
2324 /* Should x -> yz, operating on X, give Yz or YZ?
2325 We'll assume the latter. */
2326 case_action = all_caps;
97832bd0
RS
2327 else
2328 case_action = nochange;
ca1d1d23
JB
2329 }
2330
080c45fd
RS
2331 /* Do replacement in a string. */
2332 if (!NILP (string))
2333 {
2334 Lisp_Object before, after;
2335
2336 before = Fsubstring (string, make_number (0),
d7a5ad5f
RS
2337 make_number (search_regs.start[sub]));
2338 after = Fsubstring (string, make_number (search_regs.end[sub]), Qnil);
080c45fd 2339
636a5e28
RS
2340 /* Substitute parts of the match into NEWTEXT
2341 if desired. */
080c45fd
RS
2342 if (NILP (literal))
2343 {
d131e79c
RS
2344 int lastpos = 0;
2345 int lastpos_byte = 0;
080c45fd
RS
2346 /* We build up the substituted string in ACCUM. */
2347 Lisp_Object accum;
2348 Lisp_Object middle;
d5db4077 2349 int length = SBYTES (newtext);
080c45fd
RS
2350
2351 accum = Qnil;
2352
ac3b28b1 2353 for (pos_byte = 0, pos = 0; pos_byte < length;)
080c45fd
RS
2354 {
2355 int substart = -1;
6bbd7a29 2356 int subend = 0;
1e79ec24 2357 int delbackslash = 0;
080c45fd 2358
0c8533c6
RS
2359 FETCH_STRING_CHAR_ADVANCE (c, newtext, pos, pos_byte);
2360
080c45fd
RS
2361 if (c == '\\')
2362 {
0c8533c6 2363 FETCH_STRING_CHAR_ADVANCE (c, newtext, pos, pos_byte);
177c0ea7 2364
080c45fd
RS
2365 if (c == '&')
2366 {
d7a5ad5f
RS
2367 substart = search_regs.start[sub];
2368 subend = search_regs.end[sub];
080c45fd 2369 }
5fbbc83b 2370 else if (c >= '1' && c <= '9')
080c45fd 2371 {
5fbbc83b
RS
2372 if (search_regs.start[c - '0'] >= 0
2373 && c <= search_regs.num_regs + '0')
080c45fd
RS
2374 {
2375 substart = search_regs.start[c - '0'];
2376 subend = search_regs.end[c - '0'];
2377 }
5fbbc83b
RS
2378 else
2379 {
2380 /* If that subexp did not match,
2381 replace \\N with nothing. */
2382 substart = 0;
2383 subend = 0;
2384 }
080c45fd 2385 }
1e79ec24
KH
2386 else if (c == '\\')
2387 delbackslash = 1;
636a5e28
RS
2388 else
2389 error ("Invalid use of `\\' in replacement text");
080c45fd
RS
2390 }
2391 if (substart >= 0)
2392 {
d131e79c
RS
2393 if (pos - 2 != lastpos)
2394 middle = substring_both (newtext, lastpos,
2395 lastpos_byte,
2396 pos - 2, pos_byte - 2);
080c45fd
RS
2397 else
2398 middle = Qnil;
2399 accum = concat3 (accum, middle,
0c8533c6
RS
2400 Fsubstring (string,
2401 make_number (substart),
080c45fd
RS
2402 make_number (subend)));
2403 lastpos = pos;
0c8533c6 2404 lastpos_byte = pos_byte;
080c45fd 2405 }
1e79ec24
KH
2406 else if (delbackslash)
2407 {
d131e79c
RS
2408 middle = substring_both (newtext, lastpos,
2409 lastpos_byte,
2410 pos - 1, pos_byte - 1);
0c8533c6 2411
1e79ec24
KH
2412 accum = concat2 (accum, middle);
2413 lastpos = pos;
0c8533c6 2414 lastpos_byte = pos_byte;
1e79ec24 2415 }
080c45fd
RS
2416 }
2417
d131e79c
RS
2418 if (pos != lastpos)
2419 middle = substring_both (newtext, lastpos,
2420 lastpos_byte,
0c8533c6 2421 pos, pos_byte);
080c45fd
RS
2422 else
2423 middle = Qnil;
2424
2425 newtext = concat2 (accum, middle);
2426 }
2427
636a5e28 2428 /* Do case substitution in NEWTEXT if desired. */
080c45fd
RS
2429 if (case_action == all_caps)
2430 newtext = Fupcase (newtext);
2431 else if (case_action == cap_initial)
2b2eead9 2432 newtext = Fupcase_initials (newtext);
080c45fd
RS
2433
2434 return concat3 (before, newtext, after);
2435 }
2436
09c4719e 2437 /* Record point, then move (quietly) to the start of the match. */
9160906f 2438 if (PT >= search_regs.end[sub])
b0eba991 2439 opoint = PT - ZV;
9160906f
RS
2440 else if (PT > search_regs.start[sub])
2441 opoint = search_regs.end[sub] - ZV;
b0eba991
RS
2442 else
2443 opoint = PT;
2444
886ed6ec
RS
2445 /* If we want non-literal replacement,
2446 perform substitution on the replacement string. */
2447 if (NILP (literal))
ca1d1d23 2448 {
d5db4077 2449 int length = SBYTES (newtext);
68e69fbd
RS
2450 unsigned char *substed;
2451 int substed_alloc_size, substed_len;
3bc25e52
KH
2452 int buf_multibyte = !NILP (current_buffer->enable_multibyte_characters);
2453 int str_multibyte = STRING_MULTIBYTE (newtext);
2454 Lisp_Object rev_tbl;
886ed6ec 2455 int really_changed = 0;
3bc25e52
KH
2456
2457 rev_tbl= (!buf_multibyte && CHAR_TABLE_P (Vnonascii_translation_table)
2458 ? Fchar_table_extra_slot (Vnonascii_translation_table,
2459 make_number (0))
2460 : Qnil);
ac3b28b1 2461
68e69fbd
RS
2462 substed_alloc_size = length * 2 + 100;
2463 substed = (unsigned char *) xmalloc (substed_alloc_size + 1);
2464 substed_len = 0;
2465
3bc25e52
KH
2466 /* Go thru NEWTEXT, producing the actual text to insert in
2467 SUBSTED while adjusting multibyteness to that of the current
2468 buffer. */
ca1d1d23 2469
ac3b28b1 2470 for (pos_byte = 0, pos = 0; pos_byte < length;)
ca1d1d23 2471 {
68e69fbd 2472 unsigned char str[MAX_MULTIBYTE_LENGTH];
f8ce8a0d
GM
2473 unsigned char *add_stuff = NULL;
2474 int add_len = 0;
68e69fbd 2475 int idx = -1;
9a76659d 2476
3bc25e52
KH
2477 if (str_multibyte)
2478 {
eb99a8dd 2479 FETCH_STRING_CHAR_ADVANCE_NO_CHECK (c, newtext, pos, pos_byte);
3bc25e52
KH
2480 if (!buf_multibyte)
2481 c = multibyte_char_to_unibyte (c, rev_tbl);
2482 }
2483 else
2484 {
2485 /* Note that we don't have to increment POS. */
5d69fe10 2486 c = SREF (newtext, pos_byte++);
3bc25e52
KH
2487 if (buf_multibyte)
2488 c = unibyte_char_to_multibyte (c);
2489 }
ac3b28b1 2490
68e69fbd
RS
2491 /* Either set ADD_STUFF and ADD_LEN to the text to put in SUBSTED,
2492 or set IDX to a match index, which means put that part
2493 of the buffer text into SUBSTED. */
2494
ca1d1d23
JB
2495 if (c == '\\')
2496 {
886ed6ec
RS
2497 really_changed = 1;
2498
3bc25e52
KH
2499 if (str_multibyte)
2500 {
eb99a8dd
KH
2501 FETCH_STRING_CHAR_ADVANCE_NO_CHECK (c, newtext,
2502 pos, pos_byte);
3bc25e52
KH
2503 if (!buf_multibyte && !SINGLE_BYTE_CHAR_P (c))
2504 c = multibyte_char_to_unibyte (c, rev_tbl);
2505 }
2506 else
2507 {
d5db4077 2508 c = SREF (newtext, pos_byte++);
3bc25e52
KH
2509 if (buf_multibyte)
2510 c = unibyte_char_to_multibyte (c);
2511 }
2512
ca1d1d23 2513 if (c == '&')
68e69fbd 2514 idx = sub;
78445046 2515 else if (c >= '1' && c <= '9' && c <= search_regs.num_regs + '0')
ca1d1d23
JB
2516 {
2517 if (search_regs.start[c - '0'] >= 1)
68e69fbd 2518 idx = c - '0';
ca1d1d23 2519 }
636a5e28 2520 else if (c == '\\')
68e69fbd 2521 add_len = 1, add_stuff = "\\";
636a5e28 2522 else
3bc25e52
KH
2523 {
2524 xfree (substed);
2525 error ("Invalid use of `\\' in replacement text");
2526 }
ca1d1d23
JB
2527 }
2528 else
68e69fbd
RS
2529 {
2530 add_len = CHAR_STRING (c, str);
2531 add_stuff = str;
2532 }
2533
2534 /* If we want to copy part of a previous match,
2535 set up ADD_STUFF and ADD_LEN to point to it. */
2536 if (idx >= 0)
2537 {
2538 int begbyte = CHAR_TO_BYTE (search_regs.start[idx]);
2539 add_len = CHAR_TO_BYTE (search_regs.end[idx]) - begbyte;
2540 if (search_regs.start[idx] < GPT && GPT < search_regs.end[idx])
2541 move_gap (search_regs.start[idx]);
2542 add_stuff = BYTE_POS_ADDR (begbyte);
2543 }
2544
2545 /* Now the stuff we want to add to SUBSTED
2546 is invariably ADD_LEN bytes starting at ADD_STUFF. */
2547
2548 /* Make sure SUBSTED is big enough. */
2549 if (substed_len + add_len >= substed_alloc_size)
2550 {
2551 substed_alloc_size = substed_len + add_len + 500;
2552 substed = (unsigned char *) xrealloc (substed,
2553 substed_alloc_size + 1);
2554 }
2555
2556 /* Now add to the end of SUBSTED. */
f8ce8a0d
GM
2557 if (add_stuff)
2558 {
2559 bcopy (add_stuff, substed + substed_len, add_len);
2560 substed_len += add_len;
2561 }
ca1d1d23 2562 }
68e69fbd 2563
886ed6ec 2564 if (really_changed)
80460525
KH
2565 {
2566 if (buf_multibyte)
2567 {
2568 int nchars = multibyte_chars_in_text (substed, substed_len);
68e69fbd 2569
80460525
KH
2570 newtext = make_multibyte_string (substed, nchars, substed_len);
2571 }
2572 else
2573 newtext = make_unibyte_string (substed, substed_len);
2574 }
68e69fbd 2575 xfree (substed);
ca1d1d23
JB
2576 }
2577
886ed6ec
RS
2578 /* Replace the old text with the new in the cleanest possible way. */
2579 replace_range (search_regs.start[sub], search_regs.end[sub],
2580 newtext, 1, 0, 1);
d5db4077 2581 newpoint = search_regs.start[sub] + SCHARS (newtext);
ca1d1d23
JB
2582
2583 if (case_action == all_caps)
886ed6ec
RS
2584 Fupcase_region (make_number (search_regs.start[sub]),
2585 make_number (newpoint));
ca1d1d23 2586 else if (case_action == cap_initial)
886ed6ec
RS
2587 Fupcase_initials_region (make_number (search_regs.start[sub]),
2588 make_number (newpoint));
3e18eecf 2589
98e942e0
RS
2590 /* Adjust search data for this change. */
2591 {
5b88a2c5 2592 int oldend = search_regs.end[sub];
41c01205 2593 int oldstart = search_regs.start[sub];
98e942e0
RS
2594 int change = newpoint - search_regs.end[sub];
2595 int i;
2596
2597 for (i = 0; i < search_regs.num_regs; i++)
2598 {
41c01205 2599 if (search_regs.start[i] >= oldend)
98e942e0 2600 search_regs.start[i] += change;
41c01205
DK
2601 else if (search_regs.start[i] > oldstart)
2602 search_regs.start[i] = oldstart;
2603 if (search_regs.end[i] >= oldend)
98e942e0 2604 search_regs.end[i] += change;
41c01205
DK
2605 else if (search_regs.end[i] > oldstart)
2606 search_regs.end[i] = oldstart;
98e942e0
RS
2607 }
2608 }
2609
b0eba991 2610 /* Put point back where it was in the text. */
8d808a65 2611 if (opoint <= 0)
fa8ed3e0 2612 TEMP_SET_PT (opoint + ZV);
b0eba991 2613 else
fa8ed3e0 2614 TEMP_SET_PT (opoint);
b0eba991
RS
2615
2616 /* Now move point "officially" to the start of the inserted replacement. */
3e18eecf 2617 move_if_not_intangible (newpoint);
177c0ea7 2618
ca1d1d23
JB
2619 return Qnil;
2620}
2621\f
2622static Lisp_Object
2623match_limit (num, beginningp)
2624 Lisp_Object num;
2625 int beginningp;
2626{
2627 register int n;
2628
b7826503 2629 CHECK_NUMBER (num);
ca1d1d23 2630 n = XINT (num);
f90a5bf5 2631 if (n < 0)
bd2cbd56 2632 args_out_of_range (num, make_number (0));
f90a5bf5
RS
2633 if (search_regs.num_regs <= 0)
2634 error ("No match data, because no search succeeded");
9b9ceb61 2635 if (n >= search_regs.num_regs
4746118a 2636 || search_regs.start[n] < 0)
ca1d1d23
JB
2637 return Qnil;
2638 return (make_number ((beginningp) ? search_regs.start[n]
2639 : search_regs.end[n]));
2640}
2641
2642DEFUN ("match-beginning", Fmatch_beginning, Smatch_beginning, 1, 1, 0,
8c1a1077
PJ
2643 doc: /* Return position of start of text matched by last search.
2644SUBEXP, a number, specifies which parenthesized expression in the last
2645 regexp.
2646Value is nil if SUBEXPth pair didn't match, or there were less than
2647 SUBEXP pairs.
2648Zero means the entire text matched by the whole regexp or whole string. */)
2649 (subexp)
5806161b 2650 Lisp_Object subexp;
ca1d1d23 2651{
5806161b 2652 return match_limit (subexp, 1);
ca1d1d23
JB
2653}
2654
2655DEFUN ("match-end", Fmatch_end, Smatch_end, 1, 1, 0,
8c1a1077
PJ
2656 doc: /* Return position of end of text matched by last search.
2657SUBEXP, a number, specifies which parenthesized expression in the last
2658 regexp.
2659Value is nil if SUBEXPth pair didn't match, or there were less than
2660 SUBEXP pairs.
2661Zero means the entire text matched by the whole regexp or whole string. */)
2662 (subexp)
5806161b 2663 Lisp_Object subexp;
ca1d1d23 2664{
5806161b 2665 return match_limit (subexp, 0);
177c0ea7 2666}
ca1d1d23 2667
56256c2a 2668DEFUN ("match-data", Fmatch_data, Smatch_data, 0, 2, 0,
8c1a1077
PJ
2669 doc: /* Return a list containing all info on what the last search matched.
2670Element 2N is `(match-beginning N)'; element 2N + 1 is `(match-end N)'.
2671All the elements are markers or nil (nil if the Nth pair didn't match)
2672if the last match was on a buffer; integers or nil if a string was matched.
2673Use `store-match-data' to reinstate the data in this list.
2674
41c01205
DK
2675If INTEGERS (the optional first argument) is non-nil, always use
2676integers \(rather than markers) to represent buffer positions. In
2677this case, and if the last match was in a buffer, the buffer will get
2678stored as one additional element at the end of the list.
2679
8c1a1077 2680If REUSE is a list, reuse it as part of the value. If REUSE is long enough
140a6b7e
KS
2681to hold all the values, and if INTEGERS is non-nil, no consing is done.
2682
2683Return value is undefined if the last search failed. */)
8c1a1077 2684 (integers, reuse)
56256c2a 2685 Lisp_Object integers, reuse;
ca1d1d23 2686{
56256c2a 2687 Lisp_Object tail, prev;
4746118a 2688 Lisp_Object *data;
ca1d1d23
JB
2689 int i, len;
2690
daa37602 2691 if (NILP (last_thing_searched))
c36bcf1b 2692 return Qnil;
daa37602 2693
6bbd7a29
GM
2694 prev = Qnil;
2695
41c01205 2696 data = (Lisp_Object *) alloca ((2 * search_regs.num_regs + 1)
4746118a
JB
2697 * sizeof (Lisp_Object));
2698
41c01205 2699 len = 0;
4746118a 2700 for (i = 0; i < search_regs.num_regs; i++)
ca1d1d23
JB
2701 {
2702 int start = search_regs.start[i];
2703 if (start >= 0)
2704 {
56256c2a
RS
2705 if (EQ (last_thing_searched, Qt)
2706 || ! NILP (integers))
ca1d1d23 2707 {
c235cce7
KH
2708 XSETFASTINT (data[2 * i], start);
2709 XSETFASTINT (data[2 * i + 1], search_regs.end[i]);
ca1d1d23 2710 }
0ed62dc7 2711 else if (BUFFERP (last_thing_searched))
ca1d1d23
JB
2712 {
2713 data[2 * i] = Fmake_marker ();
daa37602
JB
2714 Fset_marker (data[2 * i],
2715 make_number (start),
2716 last_thing_searched);
ca1d1d23
JB
2717 data[2 * i + 1] = Fmake_marker ();
2718 Fset_marker (data[2 * i + 1],
177c0ea7 2719 make_number (search_regs.end[i]),
daa37602 2720 last_thing_searched);
ca1d1d23 2721 }
daa37602
JB
2722 else
2723 /* last_thing_searched must always be Qt, a buffer, or Qnil. */
2724 abort ();
2725
41c01205 2726 len = 2*(i+1);
ca1d1d23
JB
2727 }
2728 else
2729 data[2 * i] = data [2 * i + 1] = Qnil;
2730 }
56256c2a 2731
bd2cbd56 2732 if (BUFFERP (last_thing_searched) && !NILP (integers))
41c01205 2733 {
bd2cbd56 2734 data[len] = last_thing_searched;
41c01205
DK
2735 len++;
2736 }
2737
56256c2a
RS
2738 /* If REUSE is not usable, cons up the values and return them. */
2739 if (! CONSP (reuse))
41c01205 2740 return Flist (len, data);
56256c2a
RS
2741
2742 /* If REUSE is a list, store as many value elements as will fit
2743 into the elements of REUSE. */
2744 for (i = 0, tail = reuse; CONSP (tail);
c1d497be 2745 i++, tail = XCDR (tail))
56256c2a 2746 {
41c01205 2747 if (i < len)
f3fbd155 2748 XSETCAR (tail, data[i]);
56256c2a 2749 else
f3fbd155 2750 XSETCAR (tail, Qnil);
56256c2a
RS
2751 prev = tail;
2752 }
2753
2754 /* If we couldn't fit all value elements into REUSE,
2755 cons up the rest of them and add them to the end of REUSE. */
41c01205
DK
2756 if (i < len)
2757 XSETCDR (prev, Flist (len - i, data + i));
56256c2a
RS
2758
2759 return reuse;
ca1d1d23
JB
2760}
2761
2762
3f1c005b 2763DEFUN ("set-match-data", Fset_match_data, Sset_match_data, 1, 1, 0,
8c1a1077
PJ
2764 doc: /* Set internal data on last search match from elements of LIST.
2765LIST should have been created by calling `match-data' previously. */)
2766 (list)
ca1d1d23
JB
2767 register Lisp_Object list;
2768{
2769 register int i;
2770 register Lisp_Object marker;
2771
7074fde6
FP
2772 if (running_asynch_code)
2773 save_search_regs ();
2774
ca1d1d23 2775 if (!CONSP (list) && !NILP (list))
b37902c8 2776 list = wrong_type_argument (Qconsp, list);
ca1d1d23 2777
41c01205
DK
2778 /* Unless we find a marker with a buffer or an explicit buffer
2779 in LIST, assume that this match data came from a string. */
daa37602
JB
2780 last_thing_searched = Qt;
2781
4746118a
JB
2782 /* Allocate registers if they don't already exist. */
2783 {
d084e942 2784 int length = XFASTINT (Flength (list)) / 2;
4746118a
JB
2785
2786 if (length > search_regs.num_regs)
2787 {
1113d9db
JB
2788 if (search_regs.num_regs == 0)
2789 {
2790 search_regs.start
2791 = (regoff_t *) xmalloc (length * sizeof (regoff_t));
2792 search_regs.end
2793 = (regoff_t *) xmalloc (length * sizeof (regoff_t));
2794 }
4746118a 2795 else
1113d9db
JB
2796 {
2797 search_regs.start
2798 = (regoff_t *) xrealloc (search_regs.start,
2799 length * sizeof (regoff_t));
2800 search_regs.end
2801 = (regoff_t *) xrealloc (search_regs.end,
2802 length * sizeof (regoff_t));
2803 }
4746118a 2804
e62371e9
KH
2805 for (i = search_regs.num_regs; i < length; i++)
2806 search_regs.start[i] = -1;
2807
487282dc 2808 search_regs.num_regs = length;
4746118a 2809 }
ca1d1d23 2810
c3762cbd 2811 for (i = 0;; i++)
41c01205
DK
2812 {
2813 marker = Fcar (list);
bd2cbd56 2814 if (BUFFERP (marker))
c3762cbd 2815 {
bd2cbd56 2816 last_thing_searched = marker;
c3762cbd
DK
2817 break;
2818 }
2819 if (i >= length)
2820 break;
41c01205
DK
2821 if (NILP (marker))
2822 {
2823 search_regs.start[i] = -1;
2824 list = Fcdr (list);
2825 }
2826 else
2827 {
2828 int from;
2829
2830 if (MARKERP (marker))
2831 {
2832 if (XMARKER (marker)->buffer == 0)
2833 XSETFASTINT (marker, 0);
2834 else
2835 XSETBUFFER (last_thing_searched, XMARKER (marker)->buffer);
2836 }
2837
2838 CHECK_NUMBER_COERCE_MARKER (marker);
2839 from = XINT (marker);
2840 list = Fcdr (list);
2841
2842 marker = Fcar (list);
2843 if (MARKERP (marker) && XMARKER (marker)->buffer == 0)
2844 XSETFASTINT (marker, 0);
2845
2846 CHECK_NUMBER_COERCE_MARKER (marker);
2847 search_regs.start[i] = from;
2848 search_regs.end[i] = XINT (marker);
2849 }
2850 list = Fcdr (list);
2851 }
ca1d1d23 2852
41c01205
DK
2853 for (; i < search_regs.num_regs; i++)
2854 search_regs.start[i] = -1;
2855 }
ca1d1d23 2856
177c0ea7 2857 return Qnil;
ca1d1d23
JB
2858}
2859
7074fde6
FP
2860/* If non-zero the match data have been saved in saved_search_regs
2861 during the execution of a sentinel or filter. */
75ebf74b 2862static int search_regs_saved;
7074fde6 2863static struct re_registers saved_search_regs;
41c01205 2864static Lisp_Object saved_last_thing_searched;
7074fde6
FP
2865
2866/* Called from Flooking_at, Fstring_match, search_buffer, Fstore_match_data
2867 if asynchronous code (filter or sentinel) is running. */
2868static void
2869save_search_regs ()
2870{
2871 if (!search_regs_saved)
2872 {
2873 saved_search_regs.num_regs = search_regs.num_regs;
2874 saved_search_regs.start = search_regs.start;
2875 saved_search_regs.end = search_regs.end;
41c01205
DK
2876 saved_last_thing_searched = last_thing_searched;
2877 last_thing_searched = Qnil;
7074fde6 2878 search_regs.num_regs = 0;
2d4a771a
RS
2879 search_regs.start = 0;
2880 search_regs.end = 0;
7074fde6
FP
2881
2882 search_regs_saved = 1;
2883 }
2884}
2885
2886/* Called upon exit from filters and sentinels. */
2887void
2888restore_match_data ()
2889{
2890 if (search_regs_saved)
2891 {
2892 if (search_regs.num_regs > 0)
2893 {
2894 xfree (search_regs.start);
2895 xfree (search_regs.end);
2896 }
2897 search_regs.num_regs = saved_search_regs.num_regs;
2898 search_regs.start = saved_search_regs.start;
2899 search_regs.end = saved_search_regs.end;
41c01205
DK
2900 last_thing_searched = saved_last_thing_searched;
2901 saved_last_thing_searched = Qnil;
7074fde6
FP
2902 search_regs_saved = 0;
2903 }
2904}
2905
ca1d1d23
JB
2906/* Quote a string to inactivate reg-expr chars */
2907
2908DEFUN ("regexp-quote", Fregexp_quote, Sregexp_quote, 1, 1, 0,
8c1a1077
PJ
2909 doc: /* Return a regexp string which matches exactly STRING and nothing else. */)
2910 (string)
5806161b 2911 Lisp_Object string;
ca1d1d23
JB
2912{
2913 register unsigned char *in, *out, *end;
2914 register unsigned char *temp;
0c8533c6 2915 int backslashes_added = 0;
ca1d1d23 2916
b7826503 2917 CHECK_STRING (string);
ca1d1d23 2918
d5db4077 2919 temp = (unsigned char *) alloca (SBYTES (string) * 2);
ca1d1d23
JB
2920
2921 /* Now copy the data into the new string, inserting escapes. */
2922
d5db4077
KR
2923 in = SDATA (string);
2924 end = in + SBYTES (string);
177c0ea7 2925 out = temp;
ca1d1d23
JB
2926
2927 for (; in != end; in++)
2928 {
2929 if (*in == '[' || *in == ']'
2930 || *in == '*' || *in == '.' || *in == '\\'
2931 || *in == '?' || *in == '+'
2932 || *in == '^' || *in == '$')
0c8533c6 2933 *out++ = '\\', backslashes_added++;
ca1d1d23
JB
2934 *out++ = *in;
2935 }
2936
3f8100f1 2937 return make_specified_string (temp,
d5db4077 2938 SCHARS (string) + backslashes_added,
3f8100f1
RS
2939 out - temp,
2940 STRING_MULTIBYTE (string));
ca1d1d23 2941}
177c0ea7 2942\f
dfcf069d 2943void
ca1d1d23
JB
2944syms_of_search ()
2945{
2946 register int i;
2947
487282dc
KH
2948 for (i = 0; i < REGEXP_CACHE_SIZE; ++i)
2949 {
2950 searchbufs[i].buf.allocated = 100;
b23c0a83 2951 searchbufs[i].buf.buffer = (unsigned char *) xmalloc (100);
487282dc
KH
2952 searchbufs[i].buf.fastmap = searchbufs[i].fastmap;
2953 searchbufs[i].regexp = Qnil;
2954 staticpro (&searchbufs[i].regexp);
2955 searchbufs[i].next = (i == REGEXP_CACHE_SIZE-1 ? 0 : &searchbufs[i+1]);
2956 }
2957 searchbuf_head = &searchbufs[0];
ca1d1d23
JB
2958
2959 Qsearch_failed = intern ("search-failed");
2960 staticpro (&Qsearch_failed);
2961 Qinvalid_regexp = intern ("invalid-regexp");
2962 staticpro (&Qinvalid_regexp);
2963
2964 Fput (Qsearch_failed, Qerror_conditions,
2965 Fcons (Qsearch_failed, Fcons (Qerror, Qnil)));
2966 Fput (Qsearch_failed, Qerror_message,
2967 build_string ("Search failed"));
2968
2969 Fput (Qinvalid_regexp, Qerror_conditions,
2970 Fcons (Qinvalid_regexp, Fcons (Qerror, Qnil)));
2971 Fput (Qinvalid_regexp, Qerror_message,
2972 build_string ("Invalid regexp"));
2973
daa37602
JB
2974 last_thing_searched = Qnil;
2975 staticpro (&last_thing_searched);
2976
0f6af254
DK
2977 saved_last_thing_searched = Qnil;
2978 staticpro (&saved_last_thing_searched);
2979
ca1d1d23 2980 defsubr (&Slooking_at);
b819a390
RS
2981 defsubr (&Sposix_looking_at);
2982 defsubr (&Sstring_match);
2983 defsubr (&Sposix_string_match);
ca1d1d23
JB
2984 defsubr (&Ssearch_forward);
2985 defsubr (&Ssearch_backward);
2986 defsubr (&Sword_search_forward);
2987 defsubr (&Sword_search_backward);
2988 defsubr (&Sre_search_forward);
2989 defsubr (&Sre_search_backward);
b819a390
RS
2990 defsubr (&Sposix_search_forward);
2991 defsubr (&Sposix_search_backward);
ca1d1d23
JB
2992 defsubr (&Sreplace_match);
2993 defsubr (&Smatch_beginning);
2994 defsubr (&Smatch_end);
2995 defsubr (&Smatch_data);
3f1c005b 2996 defsubr (&Sset_match_data);
ca1d1d23
JB
2997 defsubr (&Sregexp_quote);
2998}
ab5796a9
MB
2999
3000/* arch-tag: a6059d79-0552-4f14-a2cb-d379a4e3c78f
3001 (do not change this comment) */