(quail-help): The output message is improved.
[bpt/emacs.git] / src / search.c
CommitLineData
ca1d1d23 1/* String search routines for GNU Emacs.
68c45bf0 2 Copyright (C) 1985, 86,87,93,94,97,98, 1999 Free Software Foundation, Inc.
ca1d1d23
JB
3
4This file is part of GNU Emacs.
5
6GNU Emacs is free software; you can redistribute it and/or modify
7it under the terms of the GNU General Public License as published by
7c938215 8the Free Software Foundation; either version 2, or (at your option)
ca1d1d23
JB
9any later version.
10
11GNU Emacs is distributed in the hope that it will be useful,
12but WITHOUT ANY WARRANTY; without even the implied warranty of
13MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14GNU General Public License for more details.
15
16You should have received a copy of the GNU General Public License
17along with GNU Emacs; see the file COPYING. If not, write to
3b7ad313
EN
18the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
19Boston, MA 02111-1307, USA. */
ca1d1d23
JB
20
21
18160b98 22#include <config.h>
ca1d1d23
JB
23#include "lisp.h"
24#include "syntax.h"
5679531d 25#include "category.h"
ca1d1d23 26#include "buffer.h"
5679531d 27#include "charset.h"
9169c321 28#include "region-cache.h"
ca1d1d23 29#include "commands.h"
9ac0d9e0 30#include "blockinput.h"
bf1760bb 31#include "intervals.h"
4746118a 32
ca1d1d23
JB
33#include <sys/types.h>
34#include "regex.h"
35
67ce527d
KH
36#define min(a, b) ((a) < (b) ? (a) : (b))
37#define max(a, b) ((a) > (b) ? (a) : (b))
38
1d288aef 39#define REGEXP_CACHE_SIZE 20
ca1d1d23 40
487282dc
KH
41/* If the regexp is non-nil, then the buffer contains the compiled form
42 of that regexp, suitable for searching. */
1d288aef
RS
43struct regexp_cache
44{
487282dc
KH
45 struct regexp_cache *next;
46 Lisp_Object regexp;
47 struct re_pattern_buffer buf;
48 char fastmap[0400];
b819a390
RS
49 /* Nonzero means regexp was compiled to do full POSIX backtracking. */
50 char posix;
487282dc 51};
ca1d1d23 52
487282dc
KH
53/* The instances of that struct. */
54struct regexp_cache searchbufs[REGEXP_CACHE_SIZE];
ca1d1d23 55
487282dc
KH
56/* The head of the linked list; points to the most recently used buffer. */
57struct regexp_cache *searchbuf_head;
ca1d1d23 58
ca1d1d23 59
4746118a
JB
60/* Every call to re_match, etc., must pass &search_regs as the regs
61 argument unless you can show it is unnecessary (i.e., if re_match
62 is certainly going to be called again before region-around-match
63 can be called).
64
65 Since the registers are now dynamically allocated, we need to make
66 sure not to refer to the Nth register before checking that it has
1113d9db
JB
67 been allocated by checking search_regs.num_regs.
68
69 The regex code keeps track of whether it has allocated the search
487282dc
KH
70 buffer using bits in the re_pattern_buffer. This means that whenever
71 you compile a new pattern, it completely forgets whether it has
1113d9db
JB
72 allocated any registers, and will allocate new registers the next
73 time you call a searching or matching function. Therefore, we need
74 to call re_set_registers after compiling a new pattern or after
75 setting the match registers, so that the regex functions will be
76 able to free or re-allocate it properly. */
ca1d1d23
JB
77static struct re_registers search_regs;
78
daa37602
JB
79/* The buffer in which the last search was performed, or
80 Qt if the last search was done in a string;
81 Qnil if no searching has been done yet. */
82static Lisp_Object last_thing_searched;
ca1d1d23 83
8e6208c5 84/* error condition signaled when regexp compile_pattern fails */
ca1d1d23
JB
85
86Lisp_Object Qinvalid_regexp;
87
ca325161 88static void set_search_regs ();
044f81f1 89static void save_search_regs ();
facdc750
RS
90static int simple_search ();
91static int boyer_moore ();
b819a390
RS
92static int search_buffer ();
93
ca1d1d23
JB
94static void
95matcher_overflow ()
96{
97 error ("Stack overflow in regexp matcher");
98}
99
b819a390
RS
100/* Compile a regexp and signal a Lisp error if anything goes wrong.
101 PATTERN is the pattern to compile.
102 CP is the place to put the result.
facdc750 103 TRANSLATE is a translation table for ignoring case, or nil for none.
b819a390
RS
104 REGP is the structure that says where to store the "register"
105 values that will result from matching this pattern.
106 If it is 0, we should compile the pattern not to record any
107 subexpression bounds.
108 POSIX is nonzero if we want full backtracking (POSIX style)
5679531d
KH
109 for this pattern. 0 means backtrack only enough to get a valid match.
110 MULTIBYTE is nonzero if we want to handle multibyte characters in
111 PATTERN. 0 means all multibyte characters are recognized just as
112 sequences of binary data. */
ca1d1d23 113
487282dc 114static void
5679531d 115compile_pattern_1 (cp, pattern, translate, regp, posix, multibyte)
487282dc 116 struct regexp_cache *cp;
ca1d1d23 117 Lisp_Object pattern;
facdc750 118 Lisp_Object translate;
487282dc 119 struct re_registers *regp;
b819a390 120 int posix;
5679531d 121 int multibyte;
ca1d1d23 122{
7276d3d8 123 unsigned char *raw_pattern;
f8bd51c4 124 int raw_pattern_size;
d451e4db 125 char *val;
b819a390 126 reg_syntax_t old;
ca1d1d23 127
f8bd51c4
KH
128 /* MULTIBYTE says whether the text to be searched is multibyte.
129 We must convert PATTERN to match that, or we will not really
130 find things right. */
131
132 if (multibyte == STRING_MULTIBYTE (pattern))
133 {
7276d3d8 134 raw_pattern = (unsigned char *) XSTRING (pattern)->data;
fc932ac6 135 raw_pattern_size = STRING_BYTES (XSTRING (pattern));
f8bd51c4
KH
136 }
137 else if (multibyte)
138 {
139 raw_pattern_size = count_size_as_multibyte (XSTRING (pattern)->data,
140 XSTRING (pattern)->size);
7276d3d8 141 raw_pattern = (unsigned char *) alloca (raw_pattern_size + 1);
f8bd51c4
KH
142 copy_text (XSTRING (pattern)->data, raw_pattern,
143 XSTRING (pattern)->size, 0, 1);
144 }
145 else
146 {
147 /* Converting multibyte to single-byte.
148
149 ??? Perhaps this conversion should be done in a special way
150 by subtracting nonascii-insert-offset from each non-ASCII char,
151 so that only the multibyte chars which really correspond to
152 the chosen single-byte character set can possibly match. */
153 raw_pattern_size = XSTRING (pattern)->size;
7276d3d8 154 raw_pattern = (unsigned char *) alloca (raw_pattern_size + 1);
f8bd51c4 155 copy_text (XSTRING (pattern)->data, raw_pattern,
fc932ac6 156 STRING_BYTES (XSTRING (pattern)), 1, 0);
f8bd51c4
KH
157 }
158
487282dc 159 cp->regexp = Qnil;
59fab369 160 cp->buf.translate = (! NILP (translate) ? translate : make_number (0));
b819a390 161 cp->posix = posix;
5679531d 162 cp->buf.multibyte = multibyte;
9ac0d9e0 163 BLOCK_INPUT;
fb4a568d 164 old = re_set_syntax (RE_SYNTAX_EMACS
b819a390 165 | (posix ? 0 : RE_NO_POSIX_BACKTRACKING));
7276d3d8
RS
166 val = (char *) re_compile_pattern ((char *)raw_pattern,
167 raw_pattern_size, &cp->buf);
b819a390 168 re_set_syntax (old);
9ac0d9e0 169 UNBLOCK_INPUT;
ca1d1d23 170 if (val)
487282dc 171 Fsignal (Qinvalid_regexp, Fcons (build_string (val), Qnil));
1113d9db 172
487282dc 173 cp->regexp = Fcopy_sequence (pattern);
487282dc
KH
174}
175
6efc7887
RS
176/* Shrink each compiled regexp buffer in the cache
177 to the size actually used right now.
178 This is called from garbage collection. */
179
180void
181shrink_regexp_cache ()
182{
183 struct regexp_cache *cp, **cpp;
184
185 for (cp = searchbuf_head; cp != 0; cp = cp->next)
186 {
187 cp->buf.allocated = cp->buf.used;
188 cp->buf.buffer
189 = (unsigned char *) realloc (cp->buf.buffer, cp->buf.used);
190 }
191}
192
487282dc 193/* Compile a regexp if necessary, but first check to see if there's one in
b819a390
RS
194 the cache.
195 PATTERN is the pattern to compile.
facdc750 196 TRANSLATE is a translation table for ignoring case, or nil for none.
b819a390
RS
197 REGP is the structure that says where to store the "register"
198 values that will result from matching this pattern.
199 If it is 0, we should compile the pattern not to record any
200 subexpression bounds.
201 POSIX is nonzero if we want full backtracking (POSIX style)
202 for this pattern. 0 means backtrack only enough to get a valid match. */
487282dc
KH
203
204struct re_pattern_buffer *
0c8533c6 205compile_pattern (pattern, regp, translate, posix, multibyte)
487282dc
KH
206 Lisp_Object pattern;
207 struct re_registers *regp;
facdc750 208 Lisp_Object translate;
0c8533c6 209 int posix, multibyte;
487282dc
KH
210{
211 struct regexp_cache *cp, **cpp;
212
213 for (cpp = &searchbuf_head; ; cpp = &cp->next)
214 {
215 cp = *cpp;
f1b9c7c1
KR
216 /* Entries are initialized to nil, and may be set to nil by
217 compile_pattern_1 if the pattern isn't valid. Don't apply
218 XSTRING in those cases. However, compile_pattern_1 is only
219 applied to the cache entry we pick here to reuse. So nil
220 should never appear before a non-nil entry. */
7c752c80 221 if (NILP (cp->regexp))
f1b9c7c1 222 goto compile_it;
1d288aef 223 if (XSTRING (cp->regexp)->size == XSTRING (pattern)->size
cf69b13e 224 && STRING_MULTIBYTE (cp->regexp) == STRING_MULTIBYTE (pattern)
1d288aef 225 && !NILP (Fstring_equal (cp->regexp, pattern))
59fab369 226 && EQ (cp->buf.translate, (! NILP (translate) ? translate : make_number (0)))
5679531d
KH
227 && cp->posix == posix
228 && cp->buf.multibyte == multibyte)
487282dc
KH
229 break;
230
f1b9c7c1
KR
231 /* If we're at the end of the cache, compile into the nil cell
232 we found, or the last (least recently used) cell with a
233 string value. */
487282dc
KH
234 if (cp->next == 0)
235 {
f1b9c7c1 236 compile_it:
5679531d 237 compile_pattern_1 (cp, pattern, translate, regp, posix, multibyte);
487282dc
KH
238 break;
239 }
240 }
241
242 /* When we get here, cp (aka *cpp) contains the compiled pattern,
243 either because we found it in the cache or because we just compiled it.
244 Move it to the front of the queue to mark it as most recently used. */
245 *cpp = cp->next;
246 cp->next = searchbuf_head;
247 searchbuf_head = cp;
1113d9db 248
6639708c
RS
249 /* Advise the searching functions about the space we have allocated
250 for register data. */
251 if (regp)
252 re_set_registers (&cp->buf, regp, regp->num_regs, regp->start, regp->end);
253
487282dc 254 return &cp->buf;
ca1d1d23
JB
255}
256
257/* Error condition used for failing searches */
258Lisp_Object Qsearch_failed;
259
260Lisp_Object
261signal_failure (arg)
262 Lisp_Object arg;
263{
264 Fsignal (Qsearch_failed, Fcons (arg, Qnil));
265 return Qnil;
266}
267\f
b819a390
RS
268static Lisp_Object
269looking_at_1 (string, posix)
ca1d1d23 270 Lisp_Object string;
b819a390 271 int posix;
ca1d1d23
JB
272{
273 Lisp_Object val;
274 unsigned char *p1, *p2;
275 int s1, s2;
276 register int i;
487282dc 277 struct re_pattern_buffer *bufp;
ca1d1d23 278
7074fde6
FP
279 if (running_asynch_code)
280 save_search_regs ();
281
ca1d1d23 282 CHECK_STRING (string, 0);
487282dc
KH
283 bufp = compile_pattern (string, &search_regs,
284 (!NILP (current_buffer->case_fold_search)
facdc750 285 ? DOWNCASE_TABLE : Qnil),
0c8533c6
RS
286 posix,
287 !NILP (current_buffer->enable_multibyte_characters));
ca1d1d23
JB
288
289 immediate_quit = 1;
290 QUIT; /* Do a pending quit right away, to avoid paradoxical behavior */
291
292 /* Get pointers and sizes of the two strings
293 that make up the visible portion of the buffer. */
294
295 p1 = BEGV_ADDR;
fa8ed3e0 296 s1 = GPT_BYTE - BEGV_BYTE;
ca1d1d23 297 p2 = GAP_END_ADDR;
fa8ed3e0 298 s2 = ZV_BYTE - GPT_BYTE;
ca1d1d23
JB
299 if (s1 < 0)
300 {
301 p2 = p1;
fa8ed3e0 302 s2 = ZV_BYTE - BEGV_BYTE;
ca1d1d23
JB
303 s1 = 0;
304 }
305 if (s2 < 0)
306 {
fa8ed3e0 307 s1 = ZV_BYTE - BEGV_BYTE;
ca1d1d23
JB
308 s2 = 0;
309 }
8bb43c28
RS
310
311 re_match_object = Qnil;
ca1d1d23 312
487282dc 313 i = re_match_2 (bufp, (char *) p1, s1, (char *) p2, s2,
fa8ed3e0
RS
314 PT_BYTE - BEGV_BYTE, &search_regs,
315 ZV_BYTE - BEGV_BYTE);
de182d70
GM
316 immediate_quit = 0;
317
ca1d1d23
JB
318 if (i == -2)
319 matcher_overflow ();
320
321 val = (0 <= i ? Qt : Qnil);
fa8ed3e0
RS
322 if (i >= 0)
323 for (i = 0; i < search_regs.num_regs; i++)
324 if (search_regs.start[i] >= 0)
325 {
326 search_regs.start[i]
327 = BYTE_TO_CHAR (search_regs.start[i] + BEGV_BYTE);
328 search_regs.end[i]
329 = BYTE_TO_CHAR (search_regs.end[i] + BEGV_BYTE);
330 }
a3668d92 331 XSETBUFFER (last_thing_searched, current_buffer);
ca1d1d23
JB
332 return val;
333}
334
b819a390 335DEFUN ("looking-at", Flooking_at, Slooking_at, 1, 1, 0,
94f94972 336 "Return t if text after point matches regular expression REGEXP.\n\
b819a390
RS
337This function modifies the match data that `match-beginning',\n\
338`match-end' and `match-data' access; save and restore the match\n\
339data if you want to preserve them.")
94f94972
RS
340 (regexp)
341 Lisp_Object regexp;
b819a390 342{
94f94972 343 return looking_at_1 (regexp, 0);
b819a390
RS
344}
345
346DEFUN ("posix-looking-at", Fposix_looking_at, Sposix_looking_at, 1, 1, 0,
94f94972 347 "Return t if text after point matches regular expression REGEXP.\n\
b819a390
RS
348Find the longest match, in accord with Posix regular expression rules.\n\
349This function modifies the match data that `match-beginning',\n\
350`match-end' and `match-data' access; save and restore the match\n\
351data if you want to preserve them.")
94f94972
RS
352 (regexp)
353 Lisp_Object regexp;
b819a390 354{
94f94972 355 return looking_at_1 (regexp, 1);
b819a390
RS
356}
357\f
358static Lisp_Object
359string_match_1 (regexp, string, start, posix)
ca1d1d23 360 Lisp_Object regexp, string, start;
b819a390 361 int posix;
ca1d1d23
JB
362{
363 int val;
487282dc 364 struct re_pattern_buffer *bufp;
0c8533c6
RS
365 int pos, pos_byte;
366 int i;
ca1d1d23 367
7074fde6
FP
368 if (running_asynch_code)
369 save_search_regs ();
370
ca1d1d23
JB
371 CHECK_STRING (regexp, 0);
372 CHECK_STRING (string, 1);
373
374 if (NILP (start))
0c8533c6 375 pos = 0, pos_byte = 0;
ca1d1d23
JB
376 else
377 {
378 int len = XSTRING (string)->size;
379
380 CHECK_NUMBER (start, 2);
0c8533c6
RS
381 pos = XINT (start);
382 if (pos < 0 && -pos <= len)
383 pos = len + pos;
384 else if (0 > pos || pos > len)
ca1d1d23 385 args_out_of_range (string, start);
0c8533c6 386 pos_byte = string_char_to_byte (string, pos);
ca1d1d23
JB
387 }
388
487282dc
KH
389 bufp = compile_pattern (regexp, &search_regs,
390 (!NILP (current_buffer->case_fold_search)
facdc750 391 ? DOWNCASE_TABLE : Qnil),
0c8533c6
RS
392 posix,
393 STRING_MULTIBYTE (string));
ca1d1d23 394 immediate_quit = 1;
8bb43c28
RS
395 re_match_object = string;
396
487282dc 397 val = re_search (bufp, (char *) XSTRING (string)->data,
fc932ac6
RS
398 STRING_BYTES (XSTRING (string)), pos_byte,
399 STRING_BYTES (XSTRING (string)) - pos_byte,
ca1d1d23
JB
400 &search_regs);
401 immediate_quit = 0;
daa37602 402 last_thing_searched = Qt;
ca1d1d23
JB
403 if (val == -2)
404 matcher_overflow ();
405 if (val < 0) return Qnil;
0c8533c6
RS
406
407 for (i = 0; i < search_regs.num_regs; i++)
408 if (search_regs.start[i] >= 0)
409 {
410 search_regs.start[i]
411 = string_byte_to_char (string, search_regs.start[i]);
412 search_regs.end[i]
413 = string_byte_to_char (string, search_regs.end[i]);
414 }
415
416 return make_number (string_byte_to_char (string, val));
ca1d1d23 417}
e59a8453 418
b819a390
RS
419DEFUN ("string-match", Fstring_match, Sstring_match, 2, 3, 0,
420 "Return index of start of first match for REGEXP in STRING, or nil.\n\
21e01890 421Case is ignored if `case-fold-search' is non-nil in the current buffer.\n\
b819a390
RS
422If third arg START is non-nil, start search at that index in STRING.\n\
423For index of first char beyond the match, do (match-end 0).\n\
424`match-end' and `match-beginning' also give indices of substrings\n\
425matched by parenthesis constructs in the pattern.")
426 (regexp, string, start)
427 Lisp_Object regexp, string, start;
428{
429 return string_match_1 (regexp, string, start, 0);
430}
431
432DEFUN ("posix-string-match", Fposix_string_match, Sposix_string_match, 2, 3, 0,
433 "Return index of start of first match for REGEXP in STRING, or nil.\n\
434Find the longest match, in accord with Posix regular expression rules.\n\
21e01890 435Case is ignored if `case-fold-search' is non-nil in the current buffer.\n\
b819a390
RS
436If third arg START is non-nil, start search at that index in STRING.\n\
437For index of first char beyond the match, do (match-end 0).\n\
438`match-end' and `match-beginning' also give indices of substrings\n\
439matched by parenthesis constructs in the pattern.")
440 (regexp, string, start)
441 Lisp_Object regexp, string, start;
442{
443 return string_match_1 (regexp, string, start, 1);
444}
445
e59a8453
RS
446/* Match REGEXP against STRING, searching all of STRING,
447 and return the index of the match, or negative on failure.
448 This does not clobber the match data. */
449
450int
451fast_string_match (regexp, string)
452 Lisp_Object regexp, string;
453{
454 int val;
487282dc 455 struct re_pattern_buffer *bufp;
e59a8453 456
facdc750
RS
457 bufp = compile_pattern (regexp, 0, Qnil,
458 0, STRING_MULTIBYTE (string));
e59a8453 459 immediate_quit = 1;
8bb43c28
RS
460 re_match_object = string;
461
487282dc 462 val = re_search (bufp, (char *) XSTRING (string)->data,
fc932ac6
RS
463 STRING_BYTES (XSTRING (string)), 0,
464 STRING_BYTES (XSTRING (string)), 0);
e59a8453
RS
465 immediate_quit = 0;
466 return val;
467}
5679531d
KH
468
469/* Match REGEXP against STRING, searching all of STRING ignoring case,
470 and return the index of the match, or negative on failure.
0c8533c6
RS
471 This does not clobber the match data.
472 We assume that STRING contains single-byte characters. */
5679531d
KH
473
474extern Lisp_Object Vascii_downcase_table;
475
476int
b4577c63 477fast_c_string_match_ignore_case (regexp, string)
5679531d
KH
478 Lisp_Object regexp;
479 char *string;
480{
481 int val;
482 struct re_pattern_buffer *bufp;
483 int len = strlen (string);
484
0c8533c6 485 regexp = string_make_unibyte (regexp);
b4577c63 486 re_match_object = Qt;
5679531d 487 bufp = compile_pattern (regexp, 0,
facdc750 488 Vascii_downcase_table, 0,
f8bd51c4 489 0);
5679531d
KH
490 immediate_quit = 1;
491 val = re_search (bufp, string, len, 0, len, 0);
492 immediate_quit = 0;
493 return val;
494}
ca1d1d23 495\f
9169c321
JB
496/* The newline cache: remembering which sections of text have no newlines. */
497
498/* If the user has requested newline caching, make sure it's on.
499 Otherwise, make sure it's off.
500 This is our cheezy way of associating an action with the change of
501 state of a buffer-local variable. */
502static void
503newline_cache_on_off (buf)
504 struct buffer *buf;
505{
506 if (NILP (buf->cache_long_line_scans))
507 {
508 /* It should be off. */
509 if (buf->newline_cache)
510 {
511 free_region_cache (buf->newline_cache);
512 buf->newline_cache = 0;
513 }
514 }
515 else
516 {
517 /* It should be on. */
518 if (buf->newline_cache == 0)
519 buf->newline_cache = new_region_cache ();
520 }
521}
522
523\f
524/* Search for COUNT instances of the character TARGET between START and END.
525
526 If COUNT is positive, search forwards; END must be >= START.
527 If COUNT is negative, search backwards for the -COUNTth instance;
528 END must be <= START.
529 If COUNT is zero, do anything you please; run rogue, for all I care.
530
531 If END is zero, use BEGV or ZV instead, as appropriate for the
532 direction indicated by COUNT.
ffd56f97
JB
533
534 If we find COUNT instances, set *SHORTAGE to zero, and return the
5bfe95c9
RS
535 position after the COUNTth match. Note that for reverse motion
536 this is not the same as the usual convention for Emacs motion commands.
ffd56f97 537
9169c321
JB
538 If we don't find COUNT instances before reaching END, set *SHORTAGE
539 to the number of TARGETs left unfound, and return END.
ffd56f97 540
087a5f81
RS
541 If ALLOW_QUIT is non-zero, set immediate_quit. That's good to do
542 except when inside redisplay. */
543
dfcf069d 544int
9169c321
JB
545scan_buffer (target, start, end, count, shortage, allow_quit)
546 register int target;
547 int start, end;
548 int count;
549 int *shortage;
087a5f81 550 int allow_quit;
ca1d1d23 551{
9169c321
JB
552 struct region_cache *newline_cache;
553 int direction;
ffd56f97 554
9169c321
JB
555 if (count > 0)
556 {
557 direction = 1;
558 if (! end) end = ZV;
559 }
560 else
561 {
562 direction = -1;
563 if (! end) end = BEGV;
564 }
ffd56f97 565
9169c321
JB
566 newline_cache_on_off (current_buffer);
567 newline_cache = current_buffer->newline_cache;
ca1d1d23
JB
568
569 if (shortage != 0)
570 *shortage = 0;
571
087a5f81 572 immediate_quit = allow_quit;
ca1d1d23 573
ffd56f97 574 if (count > 0)
9169c321 575 while (start != end)
ca1d1d23 576 {
9169c321
JB
577 /* Our innermost scanning loop is very simple; it doesn't know
578 about gaps, buffer ends, or the newline cache. ceiling is
579 the position of the last character before the next such
580 obstacle --- the last character the dumb search loop should
581 examine. */
fa8ed3e0
RS
582 int ceiling_byte = CHAR_TO_BYTE (end) - 1;
583 int start_byte = CHAR_TO_BYTE (start);
67ce527d 584 int tem;
9169c321
JB
585
586 /* If we're looking for a newline, consult the newline cache
587 to see where we can avoid some scanning. */
588 if (target == '\n' && newline_cache)
589 {
590 int next_change;
591 immediate_quit = 0;
592 while (region_cache_forward
fa8ed3e0
RS
593 (current_buffer, newline_cache, start_byte, &next_change))
594 start_byte = next_change;
cbe0db0d 595 immediate_quit = allow_quit;
9169c321 596
fa8ed3e0
RS
597 /* START should never be after END. */
598 if (start_byte > ceiling_byte)
599 start_byte = ceiling_byte;
9169c321
JB
600
601 /* Now the text after start is an unknown region, and
602 next_change is the position of the next known region. */
fa8ed3e0 603 ceiling_byte = min (next_change - 1, ceiling_byte);
9169c321
JB
604 }
605
606 /* The dumb loop can only scan text stored in contiguous
607 bytes. BUFFER_CEILING_OF returns the last character
608 position that is contiguous, so the ceiling is the
609 position after that. */
67ce527d
KH
610 tem = BUFFER_CEILING_OF (start_byte);
611 ceiling_byte = min (tem, ceiling_byte);
9169c321
JB
612
613 {
614 /* The termination address of the dumb loop. */
fa8ed3e0
RS
615 register unsigned char *ceiling_addr
616 = BYTE_POS_ADDR (ceiling_byte) + 1;
617 register unsigned char *cursor
618 = BYTE_POS_ADDR (start_byte);
9169c321
JB
619 unsigned char *base = cursor;
620
621 while (cursor < ceiling_addr)
622 {
623 unsigned char *scan_start = cursor;
624
625 /* The dumb loop. */
626 while (*cursor != target && ++cursor < ceiling_addr)
627 ;
628
629 /* If we're looking for newlines, cache the fact that
630 the region from start to cursor is free of them. */
631 if (target == '\n' && newline_cache)
632 know_region_cache (current_buffer, newline_cache,
fa8ed3e0
RS
633 start_byte + scan_start - base,
634 start_byte + cursor - base);
9169c321
JB
635
636 /* Did we find the target character? */
637 if (cursor < ceiling_addr)
638 {
639 if (--count == 0)
640 {
641 immediate_quit = 0;
fa8ed3e0 642 return BYTE_TO_CHAR (start_byte + cursor - base + 1);
9169c321
JB
643 }
644 cursor++;
645 }
646 }
647
fa8ed3e0 648 start = BYTE_TO_CHAR (start_byte + cursor - base);
9169c321 649 }
ca1d1d23
JB
650 }
651 else
9169c321
JB
652 while (start > end)
653 {
654 /* The last character to check before the next obstacle. */
fa8ed3e0
RS
655 int ceiling_byte = CHAR_TO_BYTE (end);
656 int start_byte = CHAR_TO_BYTE (start);
67ce527d 657 int tem;
9169c321
JB
658
659 /* Consult the newline cache, if appropriate. */
660 if (target == '\n' && newline_cache)
661 {
662 int next_change;
663 immediate_quit = 0;
664 while (region_cache_backward
fa8ed3e0
RS
665 (current_buffer, newline_cache, start_byte, &next_change))
666 start_byte = next_change;
cbe0db0d 667 immediate_quit = allow_quit;
9169c321
JB
668
669 /* Start should never be at or before end. */
fa8ed3e0
RS
670 if (start_byte <= ceiling_byte)
671 start_byte = ceiling_byte + 1;
9169c321
JB
672
673 /* Now the text before start is an unknown region, and
674 next_change is the position of the next known region. */
fa8ed3e0 675 ceiling_byte = max (next_change, ceiling_byte);
9169c321
JB
676 }
677
678 /* Stop scanning before the gap. */
67ce527d
KH
679 tem = BUFFER_FLOOR_OF (start_byte - 1);
680 ceiling_byte = max (tem, ceiling_byte);
9169c321
JB
681
682 {
683 /* The termination address of the dumb loop. */
fa8ed3e0
RS
684 register unsigned char *ceiling_addr = BYTE_POS_ADDR (ceiling_byte);
685 register unsigned char *cursor = BYTE_POS_ADDR (start_byte - 1);
9169c321
JB
686 unsigned char *base = cursor;
687
688 while (cursor >= ceiling_addr)
689 {
690 unsigned char *scan_start = cursor;
691
692 while (*cursor != target && --cursor >= ceiling_addr)
693 ;
694
695 /* If we're looking for newlines, cache the fact that
696 the region from after the cursor to start is free of them. */
697 if (target == '\n' && newline_cache)
698 know_region_cache (current_buffer, newline_cache,
fa8ed3e0
RS
699 start_byte + cursor - base,
700 start_byte + scan_start - base);
9169c321
JB
701
702 /* Did we find the target character? */
703 if (cursor >= ceiling_addr)
704 {
705 if (++count >= 0)
706 {
707 immediate_quit = 0;
fa8ed3e0 708 return BYTE_TO_CHAR (start_byte + cursor - base);
9169c321
JB
709 }
710 cursor--;
711 }
712 }
713
fa8ed3e0 714 start = BYTE_TO_CHAR (start_byte + cursor - base);
9169c321
JB
715 }
716 }
717
ca1d1d23
JB
718 immediate_quit = 0;
719 if (shortage != 0)
ffd56f97 720 *shortage = count * direction;
9169c321 721 return start;
ca1d1d23 722}
fa8ed3e0
RS
723\f
724/* Search for COUNT instances of a line boundary, which means either a
725 newline or (if selective display enabled) a carriage return.
726 Start at START. If COUNT is negative, search backwards.
727
728 We report the resulting position by calling TEMP_SET_PT_BOTH.
729
730 If we find COUNT instances. we position after (always after,
731 even if scanning backwards) the COUNTth match, and return 0.
732
733 If we don't find COUNT instances before reaching the end of the
734 buffer (or the beginning, if scanning backwards), we return
735 the number of line boundaries left unfound, and position at
736 the limit we bumped up against.
737
738 If ALLOW_QUIT is non-zero, set immediate_quit. That's good to do
d5d57b92 739 except in special cases. */
ca1d1d23 740
63fa018d 741int
fa8ed3e0
RS
742scan_newline (start, start_byte, limit, limit_byte, count, allow_quit)
743 int start, start_byte;
744 int limit, limit_byte;
745 register int count;
746 int allow_quit;
63fa018d 747{
fa8ed3e0
RS
748 int direction = ((count > 0) ? 1 : -1);
749
750 register unsigned char *cursor;
751 unsigned char *base;
752
753 register int ceiling;
754 register unsigned char *ceiling_addr;
755
d5d57b92
RS
756 int old_immediate_quit = immediate_quit;
757
fa8ed3e0
RS
758 /* If we are not in selective display mode,
759 check only for newlines. */
760 int selective_display = (!NILP (current_buffer->selective_display)
761 && !INTEGERP (current_buffer->selective_display));
762
763 /* The code that follows is like scan_buffer
764 but checks for either newline or carriage return. */
765
d5d57b92
RS
766 if (allow_quit)
767 immediate_quit++;
fa8ed3e0
RS
768
769 start_byte = CHAR_TO_BYTE (start);
770
771 if (count > 0)
772 {
773 while (start_byte < limit_byte)
774 {
775 ceiling = BUFFER_CEILING_OF (start_byte);
776 ceiling = min (limit_byte - 1, ceiling);
777 ceiling_addr = BYTE_POS_ADDR (ceiling) + 1;
778 base = (cursor = BYTE_POS_ADDR (start_byte));
779 while (1)
780 {
781 while (*cursor != '\n' && ++cursor != ceiling_addr)
782 ;
783
784 if (cursor != ceiling_addr)
785 {
786 if (--count == 0)
787 {
d5d57b92 788 immediate_quit = old_immediate_quit;
fa8ed3e0
RS
789 start_byte = start_byte + cursor - base + 1;
790 start = BYTE_TO_CHAR (start_byte);
791 TEMP_SET_PT_BOTH (start, start_byte);
792 return 0;
793 }
794 else
795 if (++cursor == ceiling_addr)
796 break;
797 }
798 else
799 break;
800 }
801 start_byte += cursor - base;
802 }
803 }
804 else
805 {
fa8ed3e0
RS
806 while (start_byte > limit_byte)
807 {
808 ceiling = BUFFER_FLOOR_OF (start_byte - 1);
809 ceiling = max (limit_byte, ceiling);
810 ceiling_addr = BYTE_POS_ADDR (ceiling) - 1;
811 base = (cursor = BYTE_POS_ADDR (start_byte - 1) + 1);
812 while (1)
813 {
814 while (--cursor != ceiling_addr && *cursor != '\n')
815 ;
816
817 if (cursor != ceiling_addr)
818 {
819 if (++count == 0)
820 {
d5d57b92 821 immediate_quit = old_immediate_quit;
fa8ed3e0
RS
822 /* Return the position AFTER the match we found. */
823 start_byte = start_byte + cursor - base + 1;
824 start = BYTE_TO_CHAR (start_byte);
825 TEMP_SET_PT_BOTH (start, start_byte);
826 return 0;
827 }
828 }
829 else
830 break;
831 }
832 /* Here we add 1 to compensate for the last decrement
833 of CURSOR, which took it past the valid range. */
834 start_byte += cursor - base + 1;
835 }
836 }
837
838 TEMP_SET_PT_BOTH (limit, limit_byte);
d5d57b92 839 immediate_quit = old_immediate_quit;
fa8ed3e0
RS
840
841 return count * direction;
63fa018d
RS
842}
843
ca1d1d23 844int
fa8ed3e0 845find_next_newline_no_quit (from, cnt)
ca1d1d23
JB
846 register int from, cnt;
847{
fa8ed3e0 848 return scan_buffer ('\n', from, 0, cnt, (int *) 0, 0);
9169c321
JB
849}
850
9169c321
JB
851/* Like find_next_newline, but returns position before the newline,
852 not after, and only search up to TO. This isn't just
853 find_next_newline (...)-1, because you might hit TO. */
fa8ed3e0 854
9169c321
JB
855int
856find_before_next_newline (from, to, cnt)
cbe0db0d 857 int from, to, cnt;
9169c321
JB
858{
859 int shortage;
860 int pos = scan_buffer ('\n', from, to, cnt, &shortage, 1);
861
862 if (shortage == 0)
863 pos--;
864
865 return pos;
ca1d1d23
JB
866}
867\f
ca1d1d23
JB
868/* Subroutines of Lisp buffer search functions. */
869
870static Lisp_Object
b819a390 871search_command (string, bound, noerror, count, direction, RE, posix)
ca1d1d23
JB
872 Lisp_Object string, bound, noerror, count;
873 int direction;
874 int RE;
b819a390 875 int posix;
ca1d1d23
JB
876{
877 register int np;
9f43ad85 878 int lim, lim_byte;
ca1d1d23
JB
879 int n = direction;
880
881 if (!NILP (count))
882 {
883 CHECK_NUMBER (count, 3);
884 n *= XINT (count);
885 }
886
887 CHECK_STRING (string, 0);
888 if (NILP (bound))
9f43ad85
RS
889 {
890 if (n > 0)
891 lim = ZV, lim_byte = ZV_BYTE;
892 else
893 lim = BEGV, lim_byte = BEGV_BYTE;
894 }
ca1d1d23
JB
895 else
896 {
897 CHECK_NUMBER_COERCE_MARKER (bound, 1);
898 lim = XINT (bound);
6ec8bbd2 899 if (n > 0 ? lim < PT : lim > PT)
ca1d1d23
JB
900 error ("Invalid search bound (wrong side of point)");
901 if (lim > ZV)
9f43ad85 902 lim = ZV, lim_byte = ZV_BYTE;
588d2fd5 903 else if (lim < BEGV)
9f43ad85 904 lim = BEGV, lim_byte = BEGV_BYTE;
588d2fd5
KH
905 else
906 lim_byte = CHAR_TO_BYTE (lim);
ca1d1d23
JB
907 }
908
9f43ad85 909 np = search_buffer (string, PT, PT_BYTE, lim, lim_byte, n, RE,
ca1d1d23 910 (!NILP (current_buffer->case_fold_search)
facdc750 911 ? current_buffer->case_canon_table
3135e9fd 912 : Qnil),
ca1d1d23 913 (!NILP (current_buffer->case_fold_search)
facdc750 914 ? current_buffer->case_eqv_table
3135e9fd 915 : Qnil),
b819a390 916 posix);
ca1d1d23
JB
917 if (np <= 0)
918 {
919 if (NILP (noerror))
920 return signal_failure (string);
921 if (!EQ (noerror, Qt))
922 {
923 if (lim < BEGV || lim > ZV)
924 abort ();
9f43ad85 925 SET_PT_BOTH (lim, lim_byte);
a5f217b8
RS
926 return Qnil;
927#if 0 /* This would be clean, but maybe programs depend on
928 a value of nil here. */
481399bf 929 np = lim;
a5f217b8 930#endif
ca1d1d23 931 }
481399bf
RS
932 else
933 return Qnil;
ca1d1d23
JB
934 }
935
936 if (np < BEGV || np > ZV)
937 abort ();
938
939 SET_PT (np);
940
941 return make_number (np);
942}
943\f
fa8ed3e0
RS
944/* Return 1 if REGEXP it matches just one constant string. */
945
b6d6a51c
KH
946static int
947trivial_regexp_p (regexp)
948 Lisp_Object regexp;
949{
fc932ac6 950 int len = STRING_BYTES (XSTRING (regexp));
b6d6a51c
KH
951 unsigned char *s = XSTRING (regexp)->data;
952 unsigned char c;
953 while (--len >= 0)
954 {
955 switch (*s++)
956 {
957 case '.': case '*': case '+': case '?': case '[': case '^': case '$':
958 return 0;
959 case '\\':
960 if (--len < 0)
961 return 0;
962 switch (*s++)
963 {
964 case '|': case '(': case ')': case '`': case '\'': case 'b':
965 case 'B': case '<': case '>': case 'w': case 'W': case 's':
866f60fd 966 case 'S': case '=':
5679531d 967 case 'c': case 'C': /* for categoryspec and notcategoryspec */
866f60fd 968 case '1': case '2': case '3': case '4': case '5':
b6d6a51c
KH
969 case '6': case '7': case '8': case '9':
970 return 0;
971 }
972 }
973 }
974 return 1;
975}
976
ca325161 977/* Search for the n'th occurrence of STRING in the current buffer,
ca1d1d23 978 starting at position POS and stopping at position LIM,
b819a390 979 treating STRING as a literal string if RE is false or as
ca1d1d23
JB
980 a regular expression if RE is true.
981
982 If N is positive, searching is forward and LIM must be greater than POS.
983 If N is negative, searching is backward and LIM must be less than POS.
984
facdc750 985 Returns -x if x occurrences remain to be found (x > 0),
ca1d1d23 986 or else the position at the beginning of the Nth occurrence
b819a390
RS
987 (if searching backward) or the end (if searching forward).
988
989 POSIX is nonzero if we want full backtracking (POSIX style)
990 for this pattern. 0 means backtrack only enough to get a valid match. */
ca1d1d23 991
aff2ce94
RS
992#define TRANSLATE(out, trt, d) \
993do \
994 { \
995 if (! NILP (trt)) \
996 { \
997 Lisp_Object temp; \
998 temp = Faref (trt, make_number (d)); \
999 if (INTEGERP (temp)) \
1000 out = XINT (temp); \
1001 else \
1002 out = d; \
1003 } \
1004 else \
1005 out = d; \
1006 } \
1007while (0)
facdc750 1008
b819a390 1009static int
9f43ad85
RS
1010search_buffer (string, pos, pos_byte, lim, lim_byte, n,
1011 RE, trt, inverse_trt, posix)
ca1d1d23
JB
1012 Lisp_Object string;
1013 int pos;
9f43ad85 1014 int pos_byte;
ca1d1d23 1015 int lim;
9f43ad85 1016 int lim_byte;
ca1d1d23
JB
1017 int n;
1018 int RE;
facdc750
RS
1019 Lisp_Object trt;
1020 Lisp_Object inverse_trt;
b819a390 1021 int posix;
ca1d1d23
JB
1022{
1023 int len = XSTRING (string)->size;
fc932ac6 1024 int len_byte = STRING_BYTES (XSTRING (string));
facdc750 1025 register int i;
ca1d1d23 1026
7074fde6
FP
1027 if (running_asynch_code)
1028 save_search_regs ();
1029
a7e4cdde 1030 /* Searching 0 times means don't move. */
ca1d1d23 1031 /* Null string is found at starting position. */
a7e4cdde 1032 if (len == 0 || n == 0)
ca325161
RS
1033 {
1034 set_search_regs (pos, 0);
1035 return pos;
1036 }
3f57a499 1037
b6d6a51c 1038 if (RE && !trivial_regexp_p (string))
ca1d1d23 1039 {
facdc750
RS
1040 unsigned char *p1, *p2;
1041 int s1, s2;
487282dc
KH
1042 struct re_pattern_buffer *bufp;
1043
0c8533c6
RS
1044 bufp = compile_pattern (string, &search_regs, trt, posix,
1045 !NILP (current_buffer->enable_multibyte_characters));
ca1d1d23 1046
ca1d1d23
JB
1047 immediate_quit = 1; /* Quit immediately if user types ^G,
1048 because letting this function finish
1049 can take too long. */
1050 QUIT; /* Do a pending quit right away,
1051 to avoid paradoxical behavior */
1052 /* Get pointers and sizes of the two strings
1053 that make up the visible portion of the buffer. */
1054
1055 p1 = BEGV_ADDR;
fa8ed3e0 1056 s1 = GPT_BYTE - BEGV_BYTE;
ca1d1d23 1057 p2 = GAP_END_ADDR;
fa8ed3e0 1058 s2 = ZV_BYTE - GPT_BYTE;
ca1d1d23
JB
1059 if (s1 < 0)
1060 {
1061 p2 = p1;
fa8ed3e0 1062 s2 = ZV_BYTE - BEGV_BYTE;
ca1d1d23
JB
1063 s1 = 0;
1064 }
1065 if (s2 < 0)
1066 {
fa8ed3e0 1067 s1 = ZV_BYTE - BEGV_BYTE;
ca1d1d23
JB
1068 s2 = 0;
1069 }
8bb43c28
RS
1070 re_match_object = Qnil;
1071
ca1d1d23
JB
1072 while (n < 0)
1073 {
42db823b 1074 int val;
487282dc 1075 val = re_search_2 (bufp, (char *) p1, s1, (char *) p2, s2,
4996330b
KH
1076 pos_byte - BEGV_BYTE, lim_byte - pos_byte,
1077 &search_regs,
42db823b 1078 /* Don't allow match past current point */
4996330b 1079 pos_byte - BEGV_BYTE);
ca1d1d23 1080 if (val == -2)
b6d6a51c
KH
1081 {
1082 matcher_overflow ();
1083 }
ca1d1d23
JB
1084 if (val >= 0)
1085 {
26aff150 1086 pos_byte = search_regs.start[0] + BEGV_BYTE;
4746118a 1087 for (i = 0; i < search_regs.num_regs; i++)
ca1d1d23
JB
1088 if (search_regs.start[i] >= 0)
1089 {
fa8ed3e0
RS
1090 search_regs.start[i]
1091 = BYTE_TO_CHAR (search_regs.start[i] + BEGV_BYTE);
1092 search_regs.end[i]
1093 = BYTE_TO_CHAR (search_regs.end[i] + BEGV_BYTE);
ca1d1d23 1094 }
a3668d92 1095 XSETBUFFER (last_thing_searched, current_buffer);
ca1d1d23
JB
1096 /* Set pos to the new position. */
1097 pos = search_regs.start[0];
1098 }
1099 else
1100 {
1101 immediate_quit = 0;
1102 return (n);
1103 }
1104 n++;
1105 }
1106 while (n > 0)
1107 {
42db823b 1108 int val;
487282dc 1109 val = re_search_2 (bufp, (char *) p1, s1, (char *) p2, s2,
4996330b
KH
1110 pos_byte - BEGV_BYTE, lim_byte - pos_byte,
1111 &search_regs,
1112 lim_byte - BEGV_BYTE);
ca1d1d23 1113 if (val == -2)
b6d6a51c
KH
1114 {
1115 matcher_overflow ();
1116 }
ca1d1d23
JB
1117 if (val >= 0)
1118 {
26aff150 1119 pos_byte = search_regs.end[0] + BEGV_BYTE;
4746118a 1120 for (i = 0; i < search_regs.num_regs; i++)
ca1d1d23
JB
1121 if (search_regs.start[i] >= 0)
1122 {
fa8ed3e0
RS
1123 search_regs.start[i]
1124 = BYTE_TO_CHAR (search_regs.start[i] + BEGV_BYTE);
1125 search_regs.end[i]
1126 = BYTE_TO_CHAR (search_regs.end[i] + BEGV_BYTE);
ca1d1d23 1127 }
a3668d92 1128 XSETBUFFER (last_thing_searched, current_buffer);
ca1d1d23
JB
1129 pos = search_regs.end[0];
1130 }
1131 else
1132 {
1133 immediate_quit = 0;
1134 return (0 - n);
1135 }
1136 n--;
1137 }
1138 immediate_quit = 0;
1139 return (pos);
1140 }
1141 else /* non-RE case */
1142 {
facdc750
RS
1143 unsigned char *raw_pattern, *pat;
1144 int raw_pattern_size;
1145 int raw_pattern_size_byte;
1146 unsigned char *patbuf;
1147 int multibyte = !NILP (current_buffer->enable_multibyte_characters);
1148 unsigned char *base_pat = XSTRING (string)->data;
1149 int charset_base = -1;
040272ce 1150 int boyer_moore_ok = 1;
facdc750
RS
1151
1152 /* MULTIBYTE says whether the text to be searched is multibyte.
1153 We must convert PATTERN to match that, or we will not really
1154 find things right. */
1155
1156 if (multibyte == STRING_MULTIBYTE (string))
1157 {
7276d3d8 1158 raw_pattern = (unsigned char *) XSTRING (string)->data;
facdc750 1159 raw_pattern_size = XSTRING (string)->size;
fc932ac6 1160 raw_pattern_size_byte = STRING_BYTES (XSTRING (string));
facdc750
RS
1161 }
1162 else if (multibyte)
1163 {
1164 raw_pattern_size = XSTRING (string)->size;
1165 raw_pattern_size_byte
1166 = count_size_as_multibyte (XSTRING (string)->data,
1167 raw_pattern_size);
7276d3d8 1168 raw_pattern = (unsigned char *) alloca (raw_pattern_size_byte + 1);
facdc750
RS
1169 copy_text (XSTRING (string)->data, raw_pattern,
1170 XSTRING (string)->size, 0, 1);
1171 }
1172 else
1173 {
1174 /* Converting multibyte to single-byte.
1175
1176 ??? Perhaps this conversion should be done in a special way
1177 by subtracting nonascii-insert-offset from each non-ASCII char,
1178 so that only the multibyte chars which really correspond to
1179 the chosen single-byte character set can possibly match. */
1180 raw_pattern_size = XSTRING (string)->size;
1181 raw_pattern_size_byte = XSTRING (string)->size;
7276d3d8 1182 raw_pattern = (unsigned char *) alloca (raw_pattern_size + 1);
facdc750 1183 copy_text (XSTRING (string)->data, raw_pattern,
fc932ac6 1184 STRING_BYTES (XSTRING (string)), 1, 0);
facdc750
RS
1185 }
1186
1187 /* Copy and optionally translate the pattern. */
1188 len = raw_pattern_size;
1189 len_byte = raw_pattern_size_byte;
1190 patbuf = (unsigned char *) alloca (len_byte);
1191 pat = patbuf;
1192 base_pat = raw_pattern;
1193 if (multibyte)
1194 {
1195 while (--len >= 0)
1196 {
daaa6ed8 1197 unsigned char str[MAX_MULTIBYTE_LENGTH];
aff2ce94 1198 int c, translated, inverse;
facdc750
RS
1199 int in_charlen, charlen;
1200
1201 /* If we got here and the RE flag is set, it's because we're
1202 dealing with a regexp known to be trivial, so the backslash
1203 just quotes the next character. */
1204 if (RE && *base_pat == '\\')
1205 {
1206 len--;
1207 len_byte--;
1208 base_pat++;
1209 }
1210
1211 c = STRING_CHAR_AND_LENGTH (base_pat, len_byte, in_charlen);
040272ce 1212
facdc750 1213 /* Translate the character, if requested. */
aff2ce94 1214 TRANSLATE (translated, trt, c);
facdc750
RS
1215 /* If translation changed the byte-length, go back
1216 to the original character. */
daaa6ed8 1217 charlen = CHAR_STRING (translated, str);
facdc750
RS
1218 if (in_charlen != charlen)
1219 {
1220 translated = c;
daaa6ed8 1221 charlen = CHAR_STRING (c, str);
facdc750
RS
1222 }
1223
5ffaf437
RS
1224 /* If we are searching for something strange,
1225 an invalid multibyte code, don't use boyer-moore. */
1226 if (! ASCII_BYTE_P (translated)
1227 && (charlen == 1 /* 8bit code */
1228 || charlen != in_charlen /* invalid multibyte code */
1229 ))
1230 boyer_moore_ok = 0;
1231
aff2ce94
RS
1232 TRANSLATE (inverse, inverse_trt, c);
1233
facdc750
RS
1234 /* Did this char actually get translated?
1235 Would any other char get translated into it? */
aff2ce94 1236 if (translated != c || inverse != c)
facdc750
RS
1237 {
1238 /* Keep track of which character set row
1239 contains the characters that need translation. */
5ffaf437 1240 int charset_base_code = c & ~CHAR_FIELD3_MASK;
facdc750
RS
1241 if (charset_base == -1)
1242 charset_base = charset_base_code;
1243 else if (charset_base != charset_base_code)
1244 /* If two different rows appear, needing translation,
1245 then we cannot use boyer_moore search. */
040272ce 1246 boyer_moore_ok = 0;
aff2ce94 1247 }
facdc750
RS
1248
1249 /* Store this character into the translated pattern. */
1250 bcopy (str, pat, charlen);
1251 pat += charlen;
1252 base_pat += in_charlen;
1253 len_byte -= in_charlen;
1254 }
1255 }
1256 else
1257 {
040272ce
KH
1258 /* Unibyte buffer. */
1259 charset_base = 0;
facdc750
RS
1260 while (--len >= 0)
1261 {
040272ce 1262 int c, translated;
facdc750
RS
1263
1264 /* If we got here and the RE flag is set, it's because we're
1265 dealing with a regexp known to be trivial, so the backslash
1266 just quotes the next character. */
1267 if (RE && *base_pat == '\\')
1268 {
1269 len--;
1270 base_pat++;
1271 }
1272 c = *base_pat++;
aff2ce94 1273 TRANSLATE (translated, trt, c);
facdc750
RS
1274 *pat++ = translated;
1275 }
1276 }
1277
1278 len_byte = pat - patbuf;
1279 len = raw_pattern_size;
1280 pat = base_pat = patbuf;
1281
040272ce 1282 if (boyer_moore_ok)
facdc750 1283 return boyer_moore (n, pat, len, len_byte, trt, inverse_trt,
aff2ce94
RS
1284 pos, pos_byte, lim, lim_byte,
1285 charset_base);
facdc750
RS
1286 else
1287 return simple_search (n, pat, len, len_byte, trt,
1288 pos, pos_byte, lim, lim_byte);
1289 }
1290}
1291\f
1292/* Do a simple string search N times for the string PAT,
1293 whose length is LEN/LEN_BYTE,
1294 from buffer position POS/POS_BYTE until LIM/LIM_BYTE.
1295 TRT is the translation table.
f8bd51c4 1296
facdc750
RS
1297 Return the character position where the match is found.
1298 Otherwise, if M matches remained to be found, return -M.
f8bd51c4 1299
facdc750
RS
1300 This kind of search works regardless of what is in PAT and
1301 regardless of what is in TRT. It is used in cases where
1302 boyer_moore cannot work. */
1303
1304static int
1305simple_search (n, pat, len, len_byte, trt, pos, pos_byte, lim, lim_byte)
1306 int n;
1307 unsigned char *pat;
1308 int len, len_byte;
1309 Lisp_Object trt;
1310 int pos, pos_byte;
1311 int lim, lim_byte;
1312{
1313 int multibyte = ! NILP (current_buffer->enable_multibyte_characters);
ab228c24 1314 int forward = n > 0;
facdc750
RS
1315
1316 if (lim > pos && multibyte)
1317 while (n > 0)
1318 {
1319 while (1)
f8bd51c4 1320 {
facdc750
RS
1321 /* Try matching at position POS. */
1322 int this_pos = pos;
1323 int this_pos_byte = pos_byte;
1324 int this_len = len;
1325 int this_len_byte = len_byte;
1326 unsigned char *p = pat;
1327 if (pos + len > lim)
1328 goto stop;
1329
1330 while (this_len > 0)
1331 {
1332 int charlen, buf_charlen;
ab228c24 1333 int pat_ch, buf_ch;
facdc750 1334
ab228c24 1335 pat_ch = STRING_CHAR_AND_LENGTH (p, this_len_byte, charlen);
facdc750
RS
1336 buf_ch = STRING_CHAR_AND_LENGTH (BYTE_POS_ADDR (this_pos_byte),
1337 ZV_BYTE - this_pos_byte,
1338 buf_charlen);
aff2ce94 1339 TRANSLATE (buf_ch, trt, buf_ch);
facdc750
RS
1340
1341 if (buf_ch != pat_ch)
1342 break;
ab228c24
RS
1343
1344 this_len_byte -= charlen;
1345 this_len--;
1346 p += charlen;
1347
1348 this_pos_byte += buf_charlen;
1349 this_pos++;
facdc750
RS
1350 }
1351
1352 if (this_len == 0)
1353 {
1354 pos += len;
1355 pos_byte += len_byte;
1356 break;
1357 }
1358
1359 INC_BOTH (pos, pos_byte);
f8bd51c4 1360 }
facdc750
RS
1361
1362 n--;
1363 }
1364 else if (lim > pos)
1365 while (n > 0)
1366 {
1367 while (1)
f8bd51c4 1368 {
facdc750
RS
1369 /* Try matching at position POS. */
1370 int this_pos = pos;
1371 int this_len = len;
1372 unsigned char *p = pat;
1373
1374 if (pos + len > lim)
1375 goto stop;
1376
1377 while (this_len > 0)
1378 {
1379 int pat_ch = *p++;
1380 int buf_ch = FETCH_BYTE (this_pos);
aff2ce94 1381 TRANSLATE (buf_ch, trt, buf_ch);
facdc750
RS
1382
1383 if (buf_ch != pat_ch)
1384 break;
ab228c24
RS
1385
1386 this_len--;
1387 this_pos++;
facdc750
RS
1388 }
1389
1390 if (this_len == 0)
1391 {
1392 pos += len;
1393 break;
1394 }
1395
1396 pos++;
f8bd51c4 1397 }
facdc750
RS
1398
1399 n--;
1400 }
1401 /* Backwards search. */
1402 else if (lim < pos && multibyte)
1403 while (n < 0)
1404 {
1405 while (1)
f8bd51c4 1406 {
facdc750
RS
1407 /* Try matching at position POS. */
1408 int this_pos = pos - len;
1409 int this_pos_byte = pos_byte - len_byte;
1410 int this_len = len;
1411 int this_len_byte = len_byte;
1412 unsigned char *p = pat;
1413
1414 if (pos - len < lim)
1415 goto stop;
1416
1417 while (this_len > 0)
1418 {
1419 int charlen, buf_charlen;
ab228c24 1420 int pat_ch, buf_ch;
facdc750 1421
ab228c24 1422 pat_ch = STRING_CHAR_AND_LENGTH (p, this_len_byte, charlen);
facdc750
RS
1423 buf_ch = STRING_CHAR_AND_LENGTH (BYTE_POS_ADDR (this_pos_byte),
1424 ZV_BYTE - this_pos_byte,
1425 buf_charlen);
aff2ce94 1426 TRANSLATE (buf_ch, trt, buf_ch);
facdc750
RS
1427
1428 if (buf_ch != pat_ch)
1429 break;
ab228c24
RS
1430
1431 this_len_byte -= charlen;
1432 this_len--;
1433 p += charlen;
1434 this_pos_byte += buf_charlen;
1435 this_pos++;
facdc750
RS
1436 }
1437
1438 if (this_len == 0)
1439 {
1440 pos -= len;
1441 pos_byte -= len_byte;
1442 break;
1443 }
1444
1445 DEC_BOTH (pos, pos_byte);
f8bd51c4
KH
1446 }
1447
facdc750
RS
1448 n++;
1449 }
1450 else if (lim < pos)
1451 while (n < 0)
1452 {
1453 while (1)
b6d6a51c 1454 {
facdc750
RS
1455 /* Try matching at position POS. */
1456 int this_pos = pos - len;
1457 int this_len = len;
1458 unsigned char *p = pat;
1459
1460 if (pos - len < lim)
1461 goto stop;
1462
1463 while (this_len > 0)
1464 {
1465 int pat_ch = *p++;
1466 int buf_ch = FETCH_BYTE (this_pos);
aff2ce94 1467 TRANSLATE (buf_ch, trt, buf_ch);
facdc750
RS
1468
1469 if (buf_ch != pat_ch)
1470 break;
ab228c24
RS
1471 this_len--;
1472 this_pos++;
facdc750
RS
1473 }
1474
1475 if (this_len == 0)
b6d6a51c 1476 {
facdc750
RS
1477 pos -= len;
1478 break;
b6d6a51c 1479 }
facdc750
RS
1480
1481 pos--;
b6d6a51c 1482 }
facdc750
RS
1483
1484 n++;
b6d6a51c 1485 }
facdc750
RS
1486
1487 stop:
1488 if (n == 0)
aff2ce94 1489 {
ab228c24
RS
1490 if (forward)
1491 set_search_regs ((multibyte ? pos_byte : pos) - len_byte, len_byte);
1492 else
1493 set_search_regs (multibyte ? pos_byte : pos, len_byte);
aff2ce94
RS
1494
1495 return pos;
1496 }
facdc750
RS
1497 else if (n > 0)
1498 return -n;
1499 else
1500 return n;
1501}
1502\f
1503/* Do Boyer-Moore search N times for the string PAT,
1504 whose length is LEN/LEN_BYTE,
1505 from buffer position POS/POS_BYTE until LIM/LIM_BYTE.
1506 DIRECTION says which direction we search in.
1507 TRT and INVERSE_TRT are translation tables.
1508
1509 This kind of search works if all the characters in PAT that have
1510 nontrivial translation are the same aside from the last byte. This
1511 makes it possible to translate just the last byte of a character,
1512 and do so after just a simple test of the context.
1513
1514 If that criterion is not satisfied, do not call this function. */
1515
1516static int
1517boyer_moore (n, base_pat, len, len_byte, trt, inverse_trt,
aff2ce94 1518 pos, pos_byte, lim, lim_byte, charset_base)
facdc750
RS
1519 int n;
1520 unsigned char *base_pat;
1521 int len, len_byte;
1522 Lisp_Object trt;
1523 Lisp_Object inverse_trt;
1524 int pos, pos_byte;
1525 int lim, lim_byte;
aff2ce94 1526 int charset_base;
facdc750
RS
1527{
1528 int direction = ((n > 0) ? 1 : -1);
1529 register int dirlen;
1530 int infinity, limit, k, stride_for_teases;
1531 register int *BM_tab;
1532 int *BM_tab_base;
1533 register unsigned char *cursor, *p_limit;
1534 register int i, j;
cb6792d2 1535 unsigned char *pat, *pat_end;
facdc750
RS
1536 int multibyte = ! NILP (current_buffer->enable_multibyte_characters);
1537
1538 unsigned char simple_translate[0400];
1539 int translate_prev_byte;
1540 int translate_anteprev_byte;
1541
1542#ifdef C_ALLOCA
1543 int BM_tab_space[0400];
1544 BM_tab = &BM_tab_space[0];
1545#else
1546 BM_tab = (int *) alloca (0400 * sizeof (int));
1547#endif
1548 /* The general approach is that we are going to maintain that we know */
1549 /* the first (closest to the present position, in whatever direction */
1550 /* we're searching) character that could possibly be the last */
1551 /* (furthest from present position) character of a valid match. We */
1552 /* advance the state of our knowledge by looking at that character */
1553 /* and seeing whether it indeed matches the last character of the */
1554 /* pattern. If it does, we take a closer look. If it does not, we */
1555 /* move our pointer (to putative last characters) as far as is */
1556 /* logically possible. This amount of movement, which I call a */
1557 /* stride, will be the length of the pattern if the actual character */
1558 /* appears nowhere in the pattern, otherwise it will be the distance */
1559 /* from the last occurrence of that character to the end of the */
1560 /* pattern. */
1561 /* As a coding trick, an enormous stride is coded into the table for */
1562 /* characters that match the last character. This allows use of only */
1563 /* a single test, a test for having gone past the end of the */
1564 /* permissible match region, to test for both possible matches (when */
1565 /* the stride goes past the end immediately) and failure to */
1566 /* match (where you get nudged past the end one stride at a time). */
1567
1568 /* Here we make a "mickey mouse" BM table. The stride of the search */
1569 /* is determined only by the last character of the putative match. */
1570 /* If that character does not match, we will stride the proper */
1571 /* distance to propose a match that superimposes it on the last */
1572 /* instance of a character that matches it (per trt), or misses */
1573 /* it entirely if there is none. */
1574
1575 dirlen = len_byte * direction;
1576 infinity = dirlen - (lim_byte + pos_byte + len_byte + len_byte) * direction;
cb6792d2
RS
1577
1578 /* Record position after the end of the pattern. */
1579 pat_end = base_pat + len_byte;
1580 /* BASE_PAT points to a character that we start scanning from.
1581 It is the first character in a forward search,
1582 the last character in a backward search. */
facdc750 1583 if (direction < 0)
cb6792d2
RS
1584 base_pat = pat_end - 1;
1585
facdc750
RS
1586 BM_tab_base = BM_tab;
1587 BM_tab += 0400;
1588 j = dirlen; /* to get it in a register */
1589 /* A character that does not appear in the pattern induces a */
1590 /* stride equal to the pattern length. */
1591 while (BM_tab_base != BM_tab)
1592 {
1593 *--BM_tab = j;
1594 *--BM_tab = j;
1595 *--BM_tab = j;
1596 *--BM_tab = j;
1597 }
1598
1599 /* We use this for translation, instead of TRT itself.
1600 We fill this in to handle the characters that actually
1601 occur in the pattern. Others don't matter anyway! */
1602 bzero (simple_translate, sizeof simple_translate);
1603 for (i = 0; i < 0400; i++)
1604 simple_translate[i] = i;
1605
1606 i = 0;
1607 while (i != infinity)
1608 {
cb6792d2 1609 unsigned char *ptr = base_pat + i;
facdc750
RS
1610 i += direction;
1611 if (i == dirlen)
1612 i = infinity;
1613 if (! NILP (trt))
ca1d1d23 1614 {
facdc750 1615 int ch;
aff2ce94 1616 int untranslated;
facdc750
RS
1617 int this_translated = 1;
1618
1619 if (multibyte
cb6792d2
RS
1620 /* Is *PTR the last byte of a character? */
1621 && (pat_end - ptr == 1 || CHAR_HEAD_P (ptr[1])))
ca1d1d23 1622 {
facdc750
RS
1623 unsigned char *charstart = ptr;
1624 while (! CHAR_HEAD_P (*charstart))
1625 charstart--;
aff2ce94 1626 untranslated = STRING_CHAR (charstart, ptr - charstart + 1);
6397418a 1627 if (charset_base == (untranslated & ~CHAR_FIELD3_MASK))
facdc750 1628 {
ab228c24 1629 TRANSLATE (ch, trt, untranslated);
aff2ce94
RS
1630 if (! CHAR_HEAD_P (*ptr))
1631 {
1632 translate_prev_byte = ptr[-1];
1633 if (! CHAR_HEAD_P (translate_prev_byte))
1634 translate_anteprev_byte = ptr[-2];
1635 }
facdc750 1636 }
aff2ce94 1637 else
ab228c24
RS
1638 {
1639 this_translated = 0;
1640 ch = *ptr;
1641 }
ca1d1d23 1642 }
facdc750 1643 else if (!multibyte)
aff2ce94 1644 TRANSLATE (ch, trt, *ptr);
ca1d1d23
JB
1645 else
1646 {
facdc750
RS
1647 ch = *ptr;
1648 this_translated = 0;
ca1d1d23 1649 }
facdc750 1650
ab228c24
RS
1651 if (ch > 0400)
1652 j = ((unsigned char) ch) | 0200;
1653 else
1654 j = (unsigned char) ch;
1655
facdc750
RS
1656 if (i == infinity)
1657 stride_for_teases = BM_tab[j];
ab228c24 1658
facdc750
RS
1659 BM_tab[j] = dirlen - i;
1660 /* A translation table is accompanied by its inverse -- see */
1661 /* comment following downcase_table for details */
1662 if (this_translated)
ab228c24
RS
1663 {
1664 int starting_ch = ch;
1665 int starting_j = j;
1666 while (1)
1667 {
1668 TRANSLATE (ch, inverse_trt, ch);
1669 if (ch > 0400)
1670 j = ((unsigned char) ch) | 0200;
1671 else
1672 j = (unsigned char) ch;
1673
1674 /* For all the characters that map into CH,
1675 set up simple_translate to map the last byte
1676 into STARTING_J. */
1677 simple_translate[j] = starting_j;
1678 if (ch == starting_ch)
1679 break;
1680 BM_tab[j] = dirlen - i;
1681 }
1682 }
facdc750
RS
1683 }
1684 else
1685 {
1686 j = *ptr;
1687
1688 if (i == infinity)
1689 stride_for_teases = BM_tab[j];
1690 BM_tab[j] = dirlen - i;
ca1d1d23 1691 }
facdc750
RS
1692 /* stride_for_teases tells how much to stride if we get a */
1693 /* match on the far character but are subsequently */
1694 /* disappointed, by recording what the stride would have been */
1695 /* for that character if the last character had been */
1696 /* different. */
1697 }
1698 infinity = dirlen - infinity;
1699 pos_byte += dirlen - ((direction > 0) ? direction : 0);
1700 /* loop invariant - POS_BYTE points at where last char (first
1701 char if reverse) of pattern would align in a possible match. */
1702 while (n != 0)
1703 {
1704 int tail_end;
1705 unsigned char *tail_end_ptr;
1706
1707 /* It's been reported that some (broken) compiler thinks that
1708 Boolean expressions in an arithmetic context are unsigned.
1709 Using an explicit ?1:0 prevents this. */
1710 if ((lim_byte - pos_byte - ((direction > 0) ? 1 : 0)) * direction
1711 < 0)
1712 return (n * (0 - direction));
1713 /* First we do the part we can by pointers (maybe nothing) */
1714 QUIT;
1715 pat = base_pat;
1716 limit = pos_byte - dirlen + direction;
67ce527d
KH
1717 if (direction > 0)
1718 {
1719 limit = BUFFER_CEILING_OF (limit);
1720 /* LIMIT is now the last (not beyond-last!) value POS_BYTE
1721 can take on without hitting edge of buffer or the gap. */
1722 limit = min (limit, pos_byte + 20000);
1723 limit = min (limit, lim_byte - 1);
1724 }
1725 else
1726 {
1727 limit = BUFFER_FLOOR_OF (limit);
1728 /* LIMIT is now the last (not beyond-last!) value POS_BYTE
1729 can take on without hitting edge of buffer or the gap. */
1730 limit = max (limit, pos_byte - 20000);
1731 limit = max (limit, lim_byte);
1732 }
facdc750
RS
1733 tail_end = BUFFER_CEILING_OF (pos_byte) + 1;
1734 tail_end_ptr = BYTE_POS_ADDR (tail_end);
1735
1736 if ((limit - pos_byte) * direction > 20)
ca1d1d23 1737 {
facdc750
RS
1738 unsigned char *p2;
1739
1740 p_limit = BYTE_POS_ADDR (limit);
1741 p2 = (cursor = BYTE_POS_ADDR (pos_byte));
1742 /* In this loop, pos + cursor - p2 is the surrogate for pos */
1743 while (1) /* use one cursor setting as long as i can */
ca1d1d23 1744 {
facdc750 1745 if (direction > 0) /* worth duplicating */
ca1d1d23 1746 {
facdc750
RS
1747 /* Use signed comparison if appropriate
1748 to make cursor+infinity sure to be > p_limit.
1749 Assuming that the buffer lies in a range of addresses
1750 that are all "positive" (as ints) or all "negative",
1751 either kind of comparison will work as long
1752 as we don't step by infinity. So pick the kind
1753 that works when we do step by infinity. */
1754 if ((EMACS_INT) (p_limit + infinity) > (EMACS_INT) p_limit)
1755 while ((EMACS_INT) cursor <= (EMACS_INT) p_limit)
1756 cursor += BM_tab[*cursor];
ca1d1d23 1757 else
facdc750
RS
1758 while ((EMACS_UINT) cursor <= (EMACS_UINT) p_limit)
1759 cursor += BM_tab[*cursor];
1760 }
1761 else
1762 {
1763 if ((EMACS_INT) (p_limit + infinity) < (EMACS_INT) p_limit)
1764 while ((EMACS_INT) cursor >= (EMACS_INT) p_limit)
1765 cursor += BM_tab[*cursor];
1766 else
1767 while ((EMACS_UINT) cursor >= (EMACS_UINT) p_limit)
1768 cursor += BM_tab[*cursor];
1769 }
ca1d1d23 1770/* If you are here, cursor is beyond the end of the searched region. */
facdc750
RS
1771/* This can happen if you match on the far character of the pattern, */
1772/* because the "stride" of that character is infinity, a number able */
1773/* to throw you well beyond the end of the search. It can also */
1774/* happen if you fail to match within the permitted region and would */
1775/* otherwise try a character beyond that region */
1776 if ((cursor - p_limit) * direction <= len_byte)
1777 break; /* a small overrun is genuine */
1778 cursor -= infinity; /* large overrun = hit */
1779 i = dirlen - direction;
1780 if (! NILP (trt))
1781 {
1782 while ((i -= direction) + direction != 0)
ca1d1d23 1783 {
facdc750
RS
1784 int ch;
1785 cursor -= direction;
1786 /* Translate only the last byte of a character. */
1787 if (! multibyte
1788 || ((cursor == tail_end_ptr
1789 || CHAR_HEAD_P (cursor[1]))
1790 && (CHAR_HEAD_P (cursor[0])
1791 || (translate_prev_byte == cursor[-1]
1792 && (CHAR_HEAD_P (translate_prev_byte)
1793 || translate_anteprev_byte == cursor[-2])))))
1794 ch = simple_translate[*cursor];
1795 else
1796 ch = *cursor;
1797 if (pat[i] != ch)
1798 break;
ca1d1d23 1799 }
facdc750
RS
1800 }
1801 else
1802 {
1803 while ((i -= direction) + direction != 0)
ca1d1d23 1804 {
facdc750
RS
1805 cursor -= direction;
1806 if (pat[i] != *cursor)
1807 break;
ca1d1d23 1808 }
facdc750
RS
1809 }
1810 cursor += dirlen - i - direction; /* fix cursor */
1811 if (i + direction == 0)
1812 {
1813 int position;
0c8533c6 1814
facdc750 1815 cursor -= direction;
1113d9db 1816
facdc750
RS
1817 position = pos_byte + cursor - p2 + ((direction > 0)
1818 ? 1 - len_byte : 0);
1819 set_search_regs (position, len_byte);
ca325161 1820
facdc750
RS
1821 if ((n -= direction) != 0)
1822 cursor += dirlen; /* to resume search */
ca1d1d23 1823 else
facdc750
RS
1824 return ((direction > 0)
1825 ? search_regs.end[0] : search_regs.start[0]);
ca1d1d23 1826 }
facdc750
RS
1827 else
1828 cursor += stride_for_teases; /* <sigh> we lose - */
ca1d1d23 1829 }
facdc750
RS
1830 pos_byte += cursor - p2;
1831 }
1832 else
1833 /* Now we'll pick up a clump that has to be done the hard */
1834 /* way because it covers a discontinuity */
1835 {
1836 limit = ((direction > 0)
1837 ? BUFFER_CEILING_OF (pos_byte - dirlen + 1)
1838 : BUFFER_FLOOR_OF (pos_byte - dirlen - 1));
1839 limit = ((direction > 0)
1840 ? min (limit + len_byte, lim_byte - 1)
1841 : max (limit - len_byte, lim_byte));
1842 /* LIMIT is now the last value POS_BYTE can have
1843 and still be valid for a possible match. */
1844 while (1)
ca1d1d23 1845 {
facdc750
RS
1846 /* This loop can be coded for space rather than */
1847 /* speed because it will usually run only once. */
1848 /* (the reach is at most len + 21, and typically */
1849 /* does not exceed len) */
1850 while ((limit - pos_byte) * direction >= 0)
1851 pos_byte += BM_tab[FETCH_BYTE (pos_byte)];
1852 /* now run the same tests to distinguish going off the */
1853 /* end, a match or a phony match. */
1854 if ((pos_byte - limit) * direction <= len_byte)
1855 break; /* ran off the end */
1856 /* Found what might be a match.
1857 Set POS_BYTE back to last (first if reverse) pos. */
1858 pos_byte -= infinity;
1859 i = dirlen - direction;
1860 while ((i -= direction) + direction != 0)
ca1d1d23 1861 {
facdc750
RS
1862 int ch;
1863 unsigned char *ptr;
1864 pos_byte -= direction;
1865 ptr = BYTE_POS_ADDR (pos_byte);
1866 /* Translate only the last byte of a character. */
1867 if (! multibyte
1868 || ((ptr == tail_end_ptr
1869 || CHAR_HEAD_P (ptr[1]))
1870 && (CHAR_HEAD_P (ptr[0])
1871 || (translate_prev_byte == ptr[-1]
1872 && (CHAR_HEAD_P (translate_prev_byte)
1873 || translate_anteprev_byte == ptr[-2])))))
1874 ch = simple_translate[*ptr];
1875 else
1876 ch = *ptr;
1877 if (pat[i] != ch)
1878 break;
1879 }
1880 /* Above loop has moved POS_BYTE part or all the way
1881 back to the first pos (last pos if reverse).
1882 Set it once again at the last (first if reverse) char. */
1883 pos_byte += dirlen - i- direction;
1884 if (i + direction == 0)
1885 {
1886 int position;
1887 pos_byte -= direction;
1113d9db 1888
facdc750 1889 position = pos_byte + ((direction > 0) ? 1 - len_byte : 0);
0c8533c6 1890
facdc750 1891 set_search_regs (position, len_byte);
ca325161 1892
facdc750
RS
1893 if ((n -= direction) != 0)
1894 pos_byte += dirlen; /* to resume search */
ca1d1d23 1895 else
facdc750
RS
1896 return ((direction > 0)
1897 ? search_regs.end[0] : search_regs.start[0]);
ca1d1d23 1898 }
facdc750
RS
1899 else
1900 pos_byte += stride_for_teases;
1901 }
1902 }
1903 /* We have done one clump. Can we continue? */
1904 if ((lim_byte - pos_byte) * direction < 0)
1905 return ((0 - n) * direction);
ca1d1d23 1906 }
facdc750 1907 return BYTE_TO_CHAR (pos_byte);
ca1d1d23 1908}
ca325161 1909
fa8ed3e0 1910/* Record beginning BEG_BYTE and end BEG_BYTE + NBYTES
a7e4cdde
RS
1911 for the overall match just found in the current buffer.
1912 Also clear out the match data for registers 1 and up. */
ca325161
RS
1913
1914static void
fa8ed3e0
RS
1915set_search_regs (beg_byte, nbytes)
1916 int beg_byte, nbytes;
ca325161 1917{
a7e4cdde
RS
1918 int i;
1919
ca325161
RS
1920 /* Make sure we have registers in which to store
1921 the match position. */
1922 if (search_regs.num_regs == 0)
1923 {
2d4a771a
RS
1924 search_regs.start = (regoff_t *) xmalloc (2 * sizeof (regoff_t));
1925 search_regs.end = (regoff_t *) xmalloc (2 * sizeof (regoff_t));
487282dc 1926 search_regs.num_regs = 2;
ca325161
RS
1927 }
1928
a7e4cdde
RS
1929 /* Clear out the other registers. */
1930 for (i = 1; i < search_regs.num_regs; i++)
1931 {
1932 search_regs.start[i] = -1;
1933 search_regs.end[i] = -1;
1934 }
1935
fa8ed3e0
RS
1936 search_regs.start[0] = BYTE_TO_CHAR (beg_byte);
1937 search_regs.end[0] = BYTE_TO_CHAR (beg_byte + nbytes);
a3668d92 1938 XSETBUFFER (last_thing_searched, current_buffer);
ca325161 1939}
ca1d1d23
JB
1940\f
1941/* Given a string of words separated by word delimiters,
1942 compute a regexp that matches those exact words
1943 separated by arbitrary punctuation. */
1944
1945static Lisp_Object
1946wordify (string)
1947 Lisp_Object string;
1948{
1949 register unsigned char *p, *o;
0c8533c6 1950 register int i, i_byte, len, punct_count = 0, word_count = 0;
ca1d1d23 1951 Lisp_Object val;
0c8533c6
RS
1952 int prev_c = 0;
1953 int adjust;
ca1d1d23
JB
1954
1955 CHECK_STRING (string, 0);
1956 p = XSTRING (string)->data;
1957 len = XSTRING (string)->size;
1958
0c8533c6
RS
1959 for (i = 0, i_byte = 0; i < len; )
1960 {
1961 int c;
1962
eb99a8dd 1963 FETCH_STRING_CHAR_ADVANCE (c, string, i, i_byte);
0c8533c6
RS
1964
1965 if (SYNTAX (c) != Sword)
1966 {
1967 punct_count++;
1968 if (i > 0 && SYNTAX (prev_c) == Sword)
1969 word_count++;
1970 }
ca1d1d23 1971
0c8533c6
RS
1972 prev_c = c;
1973 }
1974
1975 if (SYNTAX (prev_c) == Sword)
1976 word_count++;
1977 if (!word_count)
1978 return build_string ("");
1979
1980 adjust = - punct_count + 5 * (word_count - 1) + 4;
8a2df937
RS
1981 if (STRING_MULTIBYTE (string))
1982 val = make_uninit_multibyte_string (len + adjust,
1983 STRING_BYTES (XSTRING (string))
1984 + adjust);
1985 else
1986 val = make_uninit_string (len + adjust);
ca1d1d23
JB
1987
1988 o = XSTRING (val)->data;
1989 *o++ = '\\';
1990 *o++ = 'b';
1e9582d4 1991 prev_c = 0;
ca1d1d23 1992
1e9582d4
RS
1993 for (i = 0, i_byte = 0; i < len; )
1994 {
1995 int c;
1996 int i_byte_orig = i_byte;
1997
eb99a8dd 1998 FETCH_STRING_CHAR_ADVANCE (c, string, i, i_byte);
1e9582d4
RS
1999
2000 if (SYNTAX (c) == Sword)
2001 {
2002 bcopy (&XSTRING (string)->data[i_byte_orig], o,
2003 i_byte - i_byte_orig);
2004 o += i_byte - i_byte_orig;
2005 }
2006 else if (i > 0 && SYNTAX (prev_c) == Sword && --word_count)
2007 {
2008 *o++ = '\\';
2009 *o++ = 'W';
2010 *o++ = '\\';
2011 *o++ = 'W';
2012 *o++ = '*';
2013 }
2014
2015 prev_c = c;
2016 }
ca1d1d23
JB
2017
2018 *o++ = '\\';
2019 *o++ = 'b';
2020
2021 return val;
2022}
2023\f
2024DEFUN ("search-backward", Fsearch_backward, Ssearch_backward, 1, 4,
6af43974 2025 "MSearch backward: ",
ca1d1d23
JB
2026 "Search backward from point for STRING.\n\
2027Set point to the beginning of the occurrence found, and return point.\n\
2028An optional second argument bounds the search; it is a buffer position.\n\
2029The match found must not extend before that position.\n\
2030Optional third argument, if t, means if fail just return nil (no error).\n\
2031 If not nil and not t, position at limit of search and return nil.\n\
2032Optional fourth argument is repeat count--search for successive occurrences.\n\
2033See also the functions `match-beginning', `match-end' and `replace-match'.")
2034 (string, bound, noerror, count)
2035 Lisp_Object string, bound, noerror, count;
2036{
b819a390 2037 return search_command (string, bound, noerror, count, -1, 0, 0);
ca1d1d23
JB
2038}
2039
6af43974 2040DEFUN ("search-forward", Fsearch_forward, Ssearch_forward, 1, 4, "MSearch: ",
ca1d1d23
JB
2041 "Search forward from point for STRING.\n\
2042Set point to the end of the occurrence found, and return point.\n\
2043An optional second argument bounds the search; it is a buffer position.\n\
2044The match found must not extend after that position. nil is equivalent\n\
2045 to (point-max).\n\
2046Optional third argument, if t, means if fail just return nil (no error).\n\
2047 If not nil and not t, move to limit of search and return nil.\n\
2048Optional fourth argument is repeat count--search for successive occurrences.\n\
2049See also the functions `match-beginning', `match-end' and `replace-match'.")
2050 (string, bound, noerror, count)
2051 Lisp_Object string, bound, noerror, count;
2052{
b819a390 2053 return search_command (string, bound, noerror, count, 1, 0, 0);
ca1d1d23
JB
2054}
2055
2056DEFUN ("word-search-backward", Fword_search_backward, Sword_search_backward, 1, 4,
2057 "sWord search backward: ",
2058 "Search backward from point for STRING, ignoring differences in punctuation.\n\
2059Set point to the beginning of the occurrence found, and return point.\n\
2060An optional second argument bounds the search; it is a buffer position.\n\
2061The match found must not extend before that position.\n\
2062Optional third argument, if t, means if fail just return nil (no error).\n\
2063 If not nil and not t, move to limit of search and return nil.\n\
2064Optional fourth argument is repeat count--search for successive occurrences.")
2065 (string, bound, noerror, count)
2066 Lisp_Object string, bound, noerror, count;
2067{
b819a390 2068 return search_command (wordify (string), bound, noerror, count, -1, 1, 0);
ca1d1d23
JB
2069}
2070
2071DEFUN ("word-search-forward", Fword_search_forward, Sword_search_forward, 1, 4,
2072 "sWord search: ",
2073 "Search forward from point for STRING, ignoring differences in punctuation.\n\
2074Set point to the end of the occurrence found, and return point.\n\
2075An optional second argument bounds the search; it is a buffer position.\n\
2076The match found must not extend after that position.\n\
2077Optional third argument, if t, means if fail just return nil (no error).\n\
2078 If not nil and not t, move to limit of search and return nil.\n\
2079Optional fourth argument is repeat count--search for successive occurrences.")
2080 (string, bound, noerror, count)
2081 Lisp_Object string, bound, noerror, count;
2082{
b819a390 2083 return search_command (wordify (string), bound, noerror, count, 1, 1, 0);
ca1d1d23
JB
2084}
2085
2086DEFUN ("re-search-backward", Fre_search_backward, Sre_search_backward, 1, 4,
2087 "sRE search backward: ",
2088 "Search backward from point for match for regular expression REGEXP.\n\
2089Set point to the beginning of the match, and return point.\n\
2090The match found is the one starting last in the buffer\n\
19c0a730 2091and yet ending before the origin of the search.\n\
ca1d1d23
JB
2092An optional second argument bounds the search; it is a buffer position.\n\
2093The match found must start at or after that position.\n\
2094Optional third argument, if t, means if fail just return nil (no error).\n\
2095 If not nil and not t, move to limit of search and return nil.\n\
2096Optional fourth argument is repeat count--search for successive occurrences.\n\
9dddb23f 2097See also the functions `match-beginning', `match-end', `match-string',\n\
cb6560a1 2098and `replace-match'.")
19c0a730
KH
2099 (regexp, bound, noerror, count)
2100 Lisp_Object regexp, bound, noerror, count;
ca1d1d23 2101{
b819a390 2102 return search_command (regexp, bound, noerror, count, -1, 1, 0);
ca1d1d23
JB
2103}
2104
2105DEFUN ("re-search-forward", Fre_search_forward, Sre_search_forward, 1, 4,
2106 "sRE search: ",
2107 "Search forward from point for regular expression REGEXP.\n\
2108Set point to the end of the occurrence found, and return point.\n\
2109An optional second argument bounds the search; it is a buffer position.\n\
2110The match found must not extend after that position.\n\
2111Optional third argument, if t, means if fail just return nil (no error).\n\
2112 If not nil and not t, move to limit of search and return nil.\n\
2113Optional fourth argument is repeat count--search for successive occurrences.\n\
9dddb23f 2114See also the functions `match-beginning', `match-end', `match-string',\n\
cb6560a1 2115and `replace-match'.")
19c0a730
KH
2116 (regexp, bound, noerror, count)
2117 Lisp_Object regexp, bound, noerror, count;
ca1d1d23 2118{
b819a390
RS
2119 return search_command (regexp, bound, noerror, count, 1, 1, 0);
2120}
2121
2122DEFUN ("posix-search-backward", Fposix_search_backward, Sposix_search_backward, 1, 4,
2123 "sPosix search backward: ",
2124 "Search backward from point for match for regular expression REGEXP.\n\
2125Find the longest match in accord with Posix regular expression rules.\n\
2126Set point to the beginning of the match, and return point.\n\
2127The match found is the one starting last in the buffer\n\
2128and yet ending before the origin of the search.\n\
2129An optional second argument bounds the search; it is a buffer position.\n\
2130The match found must start at or after that position.\n\
2131Optional third argument, if t, means if fail just return nil (no error).\n\
2132 If not nil and not t, move to limit of search and return nil.\n\
2133Optional fourth argument is repeat count--search for successive occurrences.\n\
9dddb23f 2134See also the functions `match-beginning', `match-end', `match-string',\n\
cb6560a1 2135and `replace-match'.")
b819a390
RS
2136 (regexp, bound, noerror, count)
2137 Lisp_Object regexp, bound, noerror, count;
2138{
2139 return search_command (regexp, bound, noerror, count, -1, 1, 1);
2140}
2141
2142DEFUN ("posix-search-forward", Fposix_search_forward, Sposix_search_forward, 1, 4,
2143 "sPosix search: ",
2144 "Search forward from point for regular expression REGEXP.\n\
2145Find the longest match in accord with Posix regular expression rules.\n\
2146Set point to the end of the occurrence found, and return point.\n\
2147An optional second argument bounds the search; it is a buffer position.\n\
2148The match found must not extend after that position.\n\
2149Optional third argument, if t, means if fail just return nil (no error).\n\
2150 If not nil and not t, move to limit of search and return nil.\n\
2151Optional fourth argument is repeat count--search for successive occurrences.\n\
9dddb23f 2152See also the functions `match-beginning', `match-end', `match-string',\n\
cb6560a1 2153and `replace-match'.")
b819a390
RS
2154 (regexp, bound, noerror, count)
2155 Lisp_Object regexp, bound, noerror, count;
2156{
2157 return search_command (regexp, bound, noerror, count, 1, 1, 1);
ca1d1d23
JB
2158}
2159\f
d7a5ad5f 2160DEFUN ("replace-match", Freplace_match, Sreplace_match, 1, 5, 0,
ca1d1d23
JB
2161 "Replace text matched by last search with NEWTEXT.\n\
2162If second arg FIXEDCASE is non-nil, do not alter case of replacement text.\n\
5b9cf4b2
RS
2163Otherwise maybe capitalize the whole text, or maybe just word initials,\n\
2164based on the replaced text.\n\
2165If the replaced text has only capital letters\n\
2166and has at least one multiletter word, convert NEWTEXT to all caps.\n\
2167If the replaced text has at least one word starting with a capital letter,\n\
2168then capitalize each word in NEWTEXT.\n\n\
ca1d1d23
JB
2169If third arg LITERAL is non-nil, insert NEWTEXT literally.\n\
2170Otherwise treat `\\' as special:\n\
2171 `\\&' in NEWTEXT means substitute original matched text.\n\
2172 `\\N' means substitute what matched the Nth `\\(...\\)'.\n\
2173 If Nth parens didn't match, substitute nothing.\n\
2174 `\\\\' means insert one `\\'.\n\
1113d9db 2175FIXEDCASE and LITERAL are optional arguments.\n\
080c45fd
RS
2176Leaves point at end of replacement text.\n\
2177\n\
2178The optional fourth argument STRING can be a string to modify.\n\
2179In that case, this function creates and returns a new string\n\
d7a5ad5f
RS
2180which is made by replacing the part of STRING that was matched.\n\
2181\n\
2182The optional fifth argument SUBEXP specifies a subexpression of the match.\n\
2183It says to replace just that subexpression instead of the whole match.\n\
2184This is useful only after a regular expression search or match\n\
2185since only regular expressions have distinguished subexpressions.")
2186 (newtext, fixedcase, literal, string, subexp)
2187 Lisp_Object newtext, fixedcase, literal, string, subexp;
ca1d1d23
JB
2188{
2189 enum { nochange, all_caps, cap_initial } case_action;
ac3b28b1 2190 register int pos, pos_byte;
ca1d1d23 2191 int some_multiletter_word;
97832bd0 2192 int some_lowercase;
73dc8771 2193 int some_uppercase;
208767c3 2194 int some_nonuppercase_initial;
ca1d1d23
JB
2195 register int c, prevc;
2196 int inslen;
d7a5ad5f 2197 int sub;
3e18eecf 2198 int opoint, newpoint;
ca1d1d23 2199
16fdc568 2200 CHECK_STRING (newtext, 0);
ca1d1d23 2201
080c45fd
RS
2202 if (! NILP (string))
2203 CHECK_STRING (string, 4);
2204
ca1d1d23
JB
2205 case_action = nochange; /* We tried an initialization */
2206 /* but some C compilers blew it */
4746118a
JB
2207
2208 if (search_regs.num_regs <= 0)
2209 error ("replace-match called before any match found");
2210
d7a5ad5f
RS
2211 if (NILP (subexp))
2212 sub = 0;
2213 else
2214 {
2215 CHECK_NUMBER (subexp, 3);
2216 sub = XINT (subexp);
2217 if (sub < 0 || sub >= search_regs.num_regs)
2218 args_out_of_range (subexp, make_number (search_regs.num_regs));
2219 }
2220
080c45fd
RS
2221 if (NILP (string))
2222 {
d7a5ad5f
RS
2223 if (search_regs.start[sub] < BEGV
2224 || search_regs.start[sub] > search_regs.end[sub]
2225 || search_regs.end[sub] > ZV)
2226 args_out_of_range (make_number (search_regs.start[sub]),
2227 make_number (search_regs.end[sub]));
080c45fd
RS
2228 }
2229 else
2230 {
d7a5ad5f
RS
2231 if (search_regs.start[sub] < 0
2232 || search_regs.start[sub] > search_regs.end[sub]
2233 || search_regs.end[sub] > XSTRING (string)->size)
2234 args_out_of_range (make_number (search_regs.start[sub]),
2235 make_number (search_regs.end[sub]));
080c45fd 2236 }
ca1d1d23
JB
2237
2238 if (NILP (fixedcase))
2239 {
2240 /* Decide how to casify by examining the matched text. */
ac3b28b1 2241 int last;
ca1d1d23 2242
ac3b28b1
KH
2243 pos = search_regs.start[sub];
2244 last = search_regs.end[sub];
fa8ed3e0
RS
2245
2246 if (NILP (string))
ac3b28b1 2247 pos_byte = CHAR_TO_BYTE (pos);
fa8ed3e0 2248 else
ac3b28b1 2249 pos_byte = string_char_to_byte (string, pos);
fa8ed3e0 2250
ca1d1d23
JB
2251 prevc = '\n';
2252 case_action = all_caps;
2253
2254 /* some_multiletter_word is set nonzero if any original word
2255 is more than one letter long. */
2256 some_multiletter_word = 0;
97832bd0 2257 some_lowercase = 0;
208767c3 2258 some_nonuppercase_initial = 0;
73dc8771 2259 some_uppercase = 0;
ca1d1d23 2260
ac3b28b1 2261 while (pos < last)
ca1d1d23 2262 {
080c45fd 2263 if (NILP (string))
ac3b28b1
KH
2264 {
2265 c = FETCH_CHAR (pos_byte);
2266 INC_BOTH (pos, pos_byte);
2267 }
080c45fd 2268 else
ac3b28b1 2269 FETCH_STRING_CHAR_ADVANCE (c, string, pos, pos_byte);
080c45fd 2270
ca1d1d23
JB
2271 if (LOWERCASEP (c))
2272 {
2273 /* Cannot be all caps if any original char is lower case */
2274
97832bd0 2275 some_lowercase = 1;
ca1d1d23 2276 if (SYNTAX (prevc) != Sword)
208767c3 2277 some_nonuppercase_initial = 1;
ca1d1d23
JB
2278 else
2279 some_multiletter_word = 1;
2280 }
2281 else if (!NOCASEP (c))
2282 {
73dc8771 2283 some_uppercase = 1;
97832bd0 2284 if (SYNTAX (prevc) != Sword)
c4d460ce 2285 ;
97832bd0 2286 else
ca1d1d23
JB
2287 some_multiletter_word = 1;
2288 }
208767c3
RS
2289 else
2290 {
2291 /* If the initial is a caseless word constituent,
2292 treat that like a lowercase initial. */
2293 if (SYNTAX (prevc) != Sword)
2294 some_nonuppercase_initial = 1;
2295 }
ca1d1d23
JB
2296
2297 prevc = c;
2298 }
2299
97832bd0
RS
2300 /* Convert to all caps if the old text is all caps
2301 and has at least one multiletter word. */
2302 if (! some_lowercase && some_multiletter_word)
2303 case_action = all_caps;
c4d460ce 2304 /* Capitalize each word, if the old text has all capitalized words. */
208767c3 2305 else if (!some_nonuppercase_initial && some_multiletter_word)
ca1d1d23 2306 case_action = cap_initial;
208767c3 2307 else if (!some_nonuppercase_initial && some_uppercase)
73dc8771
KH
2308 /* Should x -> yz, operating on X, give Yz or YZ?
2309 We'll assume the latter. */
2310 case_action = all_caps;
97832bd0
RS
2311 else
2312 case_action = nochange;
ca1d1d23
JB
2313 }
2314
080c45fd
RS
2315 /* Do replacement in a string. */
2316 if (!NILP (string))
2317 {
2318 Lisp_Object before, after;
2319
2320 before = Fsubstring (string, make_number (0),
d7a5ad5f
RS
2321 make_number (search_regs.start[sub]));
2322 after = Fsubstring (string, make_number (search_regs.end[sub]), Qnil);
080c45fd 2323
636a5e28
RS
2324 /* Substitute parts of the match into NEWTEXT
2325 if desired. */
080c45fd
RS
2326 if (NILP (literal))
2327 {
d131e79c
RS
2328 int lastpos = 0;
2329 int lastpos_byte = 0;
080c45fd
RS
2330 /* We build up the substituted string in ACCUM. */
2331 Lisp_Object accum;
2332 Lisp_Object middle;
ac3b28b1 2333 int length = STRING_BYTES (XSTRING (newtext));
080c45fd
RS
2334
2335 accum = Qnil;
2336
ac3b28b1 2337 for (pos_byte = 0, pos = 0; pos_byte < length;)
080c45fd
RS
2338 {
2339 int substart = -1;
2340 int subend;
1e79ec24 2341 int delbackslash = 0;
080c45fd 2342
0c8533c6
RS
2343 FETCH_STRING_CHAR_ADVANCE (c, newtext, pos, pos_byte);
2344
080c45fd
RS
2345 if (c == '\\')
2346 {
0c8533c6 2347 FETCH_STRING_CHAR_ADVANCE (c, newtext, pos, pos_byte);
eb99a8dd 2348
080c45fd
RS
2349 if (c == '&')
2350 {
d7a5ad5f
RS
2351 substart = search_regs.start[sub];
2352 subend = search_regs.end[sub];
080c45fd
RS
2353 }
2354 else if (c >= '1' && c <= '9' && c <= search_regs.num_regs + '0')
2355 {
ad10348f 2356 if (search_regs.start[c - '0'] >= 0)
080c45fd
RS
2357 {
2358 substart = search_regs.start[c - '0'];
2359 subend = search_regs.end[c - '0'];
2360 }
2361 }
1e79ec24
KH
2362 else if (c == '\\')
2363 delbackslash = 1;
636a5e28
RS
2364 else
2365 error ("Invalid use of `\\' in replacement text");
080c45fd
RS
2366 }
2367 if (substart >= 0)
2368 {
d131e79c
RS
2369 if (pos - 2 != lastpos)
2370 middle = substring_both (newtext, lastpos,
2371 lastpos_byte,
2372 pos - 2, pos_byte - 2);
080c45fd
RS
2373 else
2374 middle = Qnil;
2375 accum = concat3 (accum, middle,
0c8533c6
RS
2376 Fsubstring (string,
2377 make_number (substart),
080c45fd
RS
2378 make_number (subend)));
2379 lastpos = pos;
0c8533c6 2380 lastpos_byte = pos_byte;
080c45fd 2381 }
1e79ec24
KH
2382 else if (delbackslash)
2383 {
d131e79c
RS
2384 middle = substring_both (newtext, lastpos,
2385 lastpos_byte,
2386 pos - 1, pos_byte - 1);
0c8533c6 2387
1e79ec24
KH
2388 accum = concat2 (accum, middle);
2389 lastpos = pos;
0c8533c6 2390 lastpos_byte = pos_byte;
1e79ec24 2391 }
080c45fd
RS
2392 }
2393
d131e79c
RS
2394 if (pos != lastpos)
2395 middle = substring_both (newtext, lastpos,
2396 lastpos_byte,
0c8533c6 2397 pos, pos_byte);
080c45fd
RS
2398 else
2399 middle = Qnil;
2400
2401 newtext = concat2 (accum, middle);
2402 }
2403
636a5e28 2404 /* Do case substitution in NEWTEXT if desired. */
080c45fd
RS
2405 if (case_action == all_caps)
2406 newtext = Fupcase (newtext);
2407 else if (case_action == cap_initial)
2b2eead9 2408 newtext = Fupcase_initials (newtext);
080c45fd
RS
2409
2410 return concat3 (before, newtext, after);
2411 }
2412
b0eba991 2413 /* Record point, the move (quietly) to the start of the match. */
9160906f 2414 if (PT >= search_regs.end[sub])
b0eba991 2415 opoint = PT - ZV;
9160906f
RS
2416 else if (PT > search_regs.start[sub])
2417 opoint = search_regs.end[sub] - ZV;
b0eba991
RS
2418 else
2419 opoint = PT;
2420
fa8ed3e0 2421 TEMP_SET_PT (search_regs.start[sub]);
b0eba991 2422
9a76659d
JB
2423 /* We insert the replacement text before the old text, and then
2424 delete the original text. This means that markers at the
2425 beginning or end of the original will float to the corresponding
2426 position in the replacement. */
ca1d1d23 2427 if (!NILP (literal))
16fdc568 2428 Finsert_and_inherit (1, &newtext);
ca1d1d23
JB
2429 else
2430 {
ac3b28b1 2431 int length = STRING_BYTES (XSTRING (newtext));
68e69fbd
RS
2432 unsigned char *substed;
2433 int substed_alloc_size, substed_len;
3bc25e52
KH
2434 int buf_multibyte = !NILP (current_buffer->enable_multibyte_characters);
2435 int str_multibyte = STRING_MULTIBYTE (newtext);
2436 Lisp_Object rev_tbl;
2437
2438 rev_tbl= (!buf_multibyte && CHAR_TABLE_P (Vnonascii_translation_table)
2439 ? Fchar_table_extra_slot (Vnonascii_translation_table,
2440 make_number (0))
2441 : Qnil);
ac3b28b1 2442
68e69fbd
RS
2443 substed_alloc_size = length * 2 + 100;
2444 substed = (unsigned char *) xmalloc (substed_alloc_size + 1);
2445 substed_len = 0;
2446
3bc25e52
KH
2447 /* Go thru NEWTEXT, producing the actual text to insert in
2448 SUBSTED while adjusting multibyteness to that of the current
2449 buffer. */
ca1d1d23 2450
ac3b28b1 2451 for (pos_byte = 0, pos = 0; pos_byte < length;)
ca1d1d23 2452 {
68e69fbd 2453 unsigned char str[MAX_MULTIBYTE_LENGTH];
f8ce8a0d
GM
2454 unsigned char *add_stuff = NULL;
2455 int add_len = 0;
68e69fbd 2456 int idx = -1;
9a76659d 2457
3bc25e52
KH
2458 if (str_multibyte)
2459 {
eb99a8dd 2460 FETCH_STRING_CHAR_ADVANCE_NO_CHECK (c, newtext, pos, pos_byte);
3bc25e52
KH
2461 if (!buf_multibyte)
2462 c = multibyte_char_to_unibyte (c, rev_tbl);
2463 }
2464 else
2465 {
2466 /* Note that we don't have to increment POS. */
2467 c = XSTRING (newtext)->data[pos_byte++];
2468 if (buf_multibyte)
2469 c = unibyte_char_to_multibyte (c);
2470 }
ac3b28b1 2471
68e69fbd
RS
2472 /* Either set ADD_STUFF and ADD_LEN to the text to put in SUBSTED,
2473 or set IDX to a match index, which means put that part
2474 of the buffer text into SUBSTED. */
2475
ca1d1d23
JB
2476 if (c == '\\')
2477 {
3bc25e52
KH
2478 if (str_multibyte)
2479 {
eb99a8dd
KH
2480 FETCH_STRING_CHAR_ADVANCE_NO_CHECK (c, newtext,
2481 pos, pos_byte);
3bc25e52
KH
2482 if (!buf_multibyte && !SINGLE_BYTE_CHAR_P (c))
2483 c = multibyte_char_to_unibyte (c, rev_tbl);
2484 }
2485 else
2486 {
2487 c = XSTRING (newtext)->data[pos_byte++];
2488 if (buf_multibyte)
2489 c = unibyte_char_to_multibyte (c);
2490 }
2491
ca1d1d23 2492 if (c == '&')
68e69fbd 2493 idx = sub;
78445046 2494 else if (c >= '1' && c <= '9' && c <= search_regs.num_regs + '0')
ca1d1d23
JB
2495 {
2496 if (search_regs.start[c - '0'] >= 1)
68e69fbd 2497 idx = c - '0';
ca1d1d23 2498 }
636a5e28 2499 else if (c == '\\')
68e69fbd 2500 add_len = 1, add_stuff = "\\";
636a5e28 2501 else
3bc25e52
KH
2502 {
2503 xfree (substed);
2504 error ("Invalid use of `\\' in replacement text");
2505 }
ca1d1d23
JB
2506 }
2507 else
68e69fbd
RS
2508 {
2509 add_len = CHAR_STRING (c, str);
2510 add_stuff = str;
2511 }
2512
2513 /* If we want to copy part of a previous match,
2514 set up ADD_STUFF and ADD_LEN to point to it. */
2515 if (idx >= 0)
2516 {
2517 int begbyte = CHAR_TO_BYTE (search_regs.start[idx]);
2518 add_len = CHAR_TO_BYTE (search_regs.end[idx]) - begbyte;
2519 if (search_regs.start[idx] < GPT && GPT < search_regs.end[idx])
2520 move_gap (search_regs.start[idx]);
2521 add_stuff = BYTE_POS_ADDR (begbyte);
2522 }
2523
2524 /* Now the stuff we want to add to SUBSTED
2525 is invariably ADD_LEN bytes starting at ADD_STUFF. */
2526
2527 /* Make sure SUBSTED is big enough. */
2528 if (substed_len + add_len >= substed_alloc_size)
2529 {
2530 substed_alloc_size = substed_len + add_len + 500;
2531 substed = (unsigned char *) xrealloc (substed,
2532 substed_alloc_size + 1);
2533 }
2534
2535 /* Now add to the end of SUBSTED. */
f8ce8a0d
GM
2536 if (add_stuff)
2537 {
2538 bcopy (add_stuff, substed + substed_len, add_len);
2539 substed_len += add_len;
2540 }
ca1d1d23 2541 }
68e69fbd
RS
2542
2543 /* Now insert what we accumulated. */
2544 insert_and_inherit (substed, substed_len);
2545
2546 xfree (substed);
ca1d1d23
JB
2547 }
2548
6ec8bbd2 2549 inslen = PT - (search_regs.start[sub]);
d7a5ad5f 2550 del_range (search_regs.start[sub] + inslen, search_regs.end[sub] + inslen);
ca1d1d23
JB
2551
2552 if (case_action == all_caps)
6ec8bbd2 2553 Fupcase_region (make_number (PT - inslen), make_number (PT));
ca1d1d23 2554 else if (case_action == cap_initial)
6ec8bbd2 2555 Fupcase_initials_region (make_number (PT - inslen), make_number (PT));
b0eba991 2556
3e18eecf
RS
2557 newpoint = PT;
2558
b0eba991 2559 /* Put point back where it was in the text. */
8d808a65 2560 if (opoint <= 0)
fa8ed3e0 2561 TEMP_SET_PT (opoint + ZV);
b0eba991 2562 else
fa8ed3e0 2563 TEMP_SET_PT (opoint);
b0eba991
RS
2564
2565 /* Now move point "officially" to the start of the inserted replacement. */
3e18eecf 2566 move_if_not_intangible (newpoint);
b0eba991 2567
ca1d1d23
JB
2568 return Qnil;
2569}
2570\f
2571static Lisp_Object
2572match_limit (num, beginningp)
2573 Lisp_Object num;
2574 int beginningp;
2575{
2576 register int n;
2577
2578 CHECK_NUMBER (num, 0);
2579 n = XINT (num);
4746118a
JB
2580 if (n < 0 || n >= search_regs.num_regs)
2581 args_out_of_range (num, make_number (search_regs.num_regs));
2582 if (search_regs.num_regs <= 0
2583 || search_regs.start[n] < 0)
ca1d1d23
JB
2584 return Qnil;
2585 return (make_number ((beginningp) ? search_regs.start[n]
2586 : search_regs.end[n]));
2587}
2588
2589DEFUN ("match-beginning", Fmatch_beginning, Smatch_beginning, 1, 1, 0,
2590 "Return position of start of text matched by last search.\n\
5806161b
EN
2591SUBEXP, a number, specifies which parenthesized expression in the last\n\
2592 regexp.\n\
2593Value is nil if SUBEXPth pair didn't match, or there were less than\n\
2594 SUBEXP pairs.\n\
ca1d1d23 2595Zero means the entire text matched by the whole regexp or whole string.")
5806161b
EN
2596 (subexp)
2597 Lisp_Object subexp;
ca1d1d23 2598{
5806161b 2599 return match_limit (subexp, 1);
ca1d1d23
JB
2600}
2601
2602DEFUN ("match-end", Fmatch_end, Smatch_end, 1, 1, 0,
2603 "Return position of end of text matched by last search.\n\
5806161b
EN
2604SUBEXP, a number, specifies which parenthesized expression in the last\n\
2605 regexp.\n\
2606Value is nil if SUBEXPth pair didn't match, or there were less than\n\
2607 SUBEXP pairs.\n\
ca1d1d23 2608Zero means the entire text matched by the whole regexp or whole string.")
5806161b
EN
2609 (subexp)
2610 Lisp_Object subexp;
ca1d1d23 2611{
5806161b 2612 return match_limit (subexp, 0);
ca1d1d23
JB
2613}
2614
56256c2a 2615DEFUN ("match-data", Fmatch_data, Smatch_data, 0, 2, 0,
ca1d1d23
JB
2616 "Return a list containing all info on what the last search matched.\n\
2617Element 2N is `(match-beginning N)'; element 2N + 1 is `(match-end N)'.\n\
2618All the elements are markers or nil (nil if the Nth pair didn't match)\n\
2619if the last match was on a buffer; integers or nil if a string was matched.\n\
56256c2a
RS
2620Use `store-match-data' to reinstate the data in this list.\n\
2621\n\
2622If INTEGERS (the optional first argument) is non-nil, always use integers\n\
8ca821e9 2623\(rather than markers) to represent buffer positions.\n\
56256c2a
RS
2624If REUSE is a list, reuse it as part of the value. If REUSE is long enough\n\
2625to hold all the values, and if INTEGERS is non-nil, no consing is done.")
2626 (integers, reuse)
2627 Lisp_Object integers, reuse;
ca1d1d23 2628{
56256c2a 2629 Lisp_Object tail, prev;
4746118a 2630 Lisp_Object *data;
ca1d1d23
JB
2631 int i, len;
2632
daa37602 2633 if (NILP (last_thing_searched))
c36bcf1b 2634 return Qnil;
daa37602 2635
4746118a
JB
2636 data = (Lisp_Object *) alloca ((2 * search_regs.num_regs)
2637 * sizeof (Lisp_Object));
2638
ca1d1d23 2639 len = -1;
4746118a 2640 for (i = 0; i < search_regs.num_regs; i++)
ca1d1d23
JB
2641 {
2642 int start = search_regs.start[i];
2643 if (start >= 0)
2644 {
56256c2a
RS
2645 if (EQ (last_thing_searched, Qt)
2646 || ! NILP (integers))
ca1d1d23 2647 {
c235cce7
KH
2648 XSETFASTINT (data[2 * i], start);
2649 XSETFASTINT (data[2 * i + 1], search_regs.end[i]);
ca1d1d23 2650 }
0ed62dc7 2651 else if (BUFFERP (last_thing_searched))
ca1d1d23
JB
2652 {
2653 data[2 * i] = Fmake_marker ();
daa37602
JB
2654 Fset_marker (data[2 * i],
2655 make_number (start),
2656 last_thing_searched);
ca1d1d23
JB
2657 data[2 * i + 1] = Fmake_marker ();
2658 Fset_marker (data[2 * i + 1],
daa37602
JB
2659 make_number (search_regs.end[i]),
2660 last_thing_searched);
ca1d1d23 2661 }
daa37602
JB
2662 else
2663 /* last_thing_searched must always be Qt, a buffer, or Qnil. */
2664 abort ();
2665
ca1d1d23
JB
2666 len = i;
2667 }
2668 else
2669 data[2 * i] = data [2 * i + 1] = Qnil;
2670 }
56256c2a
RS
2671
2672 /* If REUSE is not usable, cons up the values and return them. */
2673 if (! CONSP (reuse))
2674 return Flist (2 * len + 2, data);
2675
2676 /* If REUSE is a list, store as many value elements as will fit
2677 into the elements of REUSE. */
2678 for (i = 0, tail = reuse; CONSP (tail);
c1d497be 2679 i++, tail = XCDR (tail))
56256c2a
RS
2680 {
2681 if (i < 2 * len + 2)
c1d497be 2682 XCAR (tail) = data[i];
56256c2a 2683 else
c1d497be 2684 XCAR (tail) = Qnil;
56256c2a
RS
2685 prev = tail;
2686 }
2687
2688 /* If we couldn't fit all value elements into REUSE,
2689 cons up the rest of them and add them to the end of REUSE. */
2690 if (i < 2 * len + 2)
c1d497be 2691 XCDR (prev) = Flist (2 * len + 2 - i, data + i);
56256c2a
RS
2692
2693 return reuse;
ca1d1d23
JB
2694}
2695
2696
3f1c005b 2697DEFUN ("set-match-data", Fset_match_data, Sset_match_data, 1, 1, 0,
ca1d1d23
JB
2698 "Set internal data on last search match from elements of LIST.\n\
2699LIST should have been created by calling `match-data' previously.")
2700 (list)
2701 register Lisp_Object list;
2702{
2703 register int i;
2704 register Lisp_Object marker;
2705
7074fde6
FP
2706 if (running_asynch_code)
2707 save_search_regs ();
2708
ca1d1d23 2709 if (!CONSP (list) && !NILP (list))
b37902c8 2710 list = wrong_type_argument (Qconsp, list);
ca1d1d23 2711
daa37602
JB
2712 /* Unless we find a marker with a buffer in LIST, assume that this
2713 match data came from a string. */
2714 last_thing_searched = Qt;
2715
4746118a
JB
2716 /* Allocate registers if they don't already exist. */
2717 {
d084e942 2718 int length = XFASTINT (Flength (list)) / 2;
4746118a
JB
2719
2720 if (length > search_regs.num_regs)
2721 {
1113d9db
JB
2722 if (search_regs.num_regs == 0)
2723 {
2724 search_regs.start
2725 = (regoff_t *) xmalloc (length * sizeof (regoff_t));
2726 search_regs.end
2727 = (regoff_t *) xmalloc (length * sizeof (regoff_t));
2728 }
4746118a 2729 else
1113d9db
JB
2730 {
2731 search_regs.start
2732 = (regoff_t *) xrealloc (search_regs.start,
2733 length * sizeof (regoff_t));
2734 search_regs.end
2735 = (regoff_t *) xrealloc (search_regs.end,
2736 length * sizeof (regoff_t));
2737 }
4746118a 2738
487282dc 2739 search_regs.num_regs = length;
4746118a
JB
2740 }
2741 }
2742
2743 for (i = 0; i < search_regs.num_regs; i++)
ca1d1d23
JB
2744 {
2745 marker = Fcar (list);
2746 if (NILP (marker))
2747 {
2748 search_regs.start[i] = -1;
2749 list = Fcdr (list);
2750 }
2751 else
2752 {
0ed62dc7 2753 if (MARKERP (marker))
daa37602
JB
2754 {
2755 if (XMARKER (marker)->buffer == 0)
c235cce7 2756 XSETFASTINT (marker, 0);
daa37602 2757 else
a3668d92 2758 XSETBUFFER (last_thing_searched, XMARKER (marker)->buffer);
daa37602 2759 }
ca1d1d23
JB
2760
2761 CHECK_NUMBER_COERCE_MARKER (marker, 0);
2762 search_regs.start[i] = XINT (marker);
2763 list = Fcdr (list);
2764
2765 marker = Fcar (list);
0ed62dc7 2766 if (MARKERP (marker) && XMARKER (marker)->buffer == 0)
c235cce7 2767 XSETFASTINT (marker, 0);
ca1d1d23
JB
2768
2769 CHECK_NUMBER_COERCE_MARKER (marker, 0);
2770 search_regs.end[i] = XINT (marker);
2771 }
2772 list = Fcdr (list);
2773 }
2774
2775 return Qnil;
2776}
2777
7074fde6
FP
2778/* If non-zero the match data have been saved in saved_search_regs
2779 during the execution of a sentinel or filter. */
75ebf74b 2780static int search_regs_saved;
7074fde6
FP
2781static struct re_registers saved_search_regs;
2782
2783/* Called from Flooking_at, Fstring_match, search_buffer, Fstore_match_data
2784 if asynchronous code (filter or sentinel) is running. */
2785static void
2786save_search_regs ()
2787{
2788 if (!search_regs_saved)
2789 {
2790 saved_search_regs.num_regs = search_regs.num_regs;
2791 saved_search_regs.start = search_regs.start;
2792 saved_search_regs.end = search_regs.end;
2793 search_regs.num_regs = 0;
2d4a771a
RS
2794 search_regs.start = 0;
2795 search_regs.end = 0;
7074fde6
FP
2796
2797 search_regs_saved = 1;
2798 }
2799}
2800
2801/* Called upon exit from filters and sentinels. */
2802void
2803restore_match_data ()
2804{
2805 if (search_regs_saved)
2806 {
2807 if (search_regs.num_regs > 0)
2808 {
2809 xfree (search_regs.start);
2810 xfree (search_regs.end);
2811 }
2812 search_regs.num_regs = saved_search_regs.num_regs;
2813 search_regs.start = saved_search_regs.start;
2814 search_regs.end = saved_search_regs.end;
2815
2816 search_regs_saved = 0;
2817 }
2818}
2819
ca1d1d23
JB
2820/* Quote a string to inactivate reg-expr chars */
2821
2822DEFUN ("regexp-quote", Fregexp_quote, Sregexp_quote, 1, 1, 0,
2823 "Return a regexp string which matches exactly STRING and nothing else.")
5806161b
EN
2824 (string)
2825 Lisp_Object string;
ca1d1d23
JB
2826{
2827 register unsigned char *in, *out, *end;
2828 register unsigned char *temp;
0c8533c6 2829 int backslashes_added = 0;
ca1d1d23 2830
5806161b 2831 CHECK_STRING (string, 0);
ca1d1d23 2832
fc932ac6 2833 temp = (unsigned char *) alloca (STRING_BYTES (XSTRING (string)) * 2);
ca1d1d23
JB
2834
2835 /* Now copy the data into the new string, inserting escapes. */
2836
5806161b 2837 in = XSTRING (string)->data;
fc932ac6 2838 end = in + STRING_BYTES (XSTRING (string));
ca1d1d23
JB
2839 out = temp;
2840
2841 for (; in != end; in++)
2842 {
2843 if (*in == '[' || *in == ']'
2844 || *in == '*' || *in == '.' || *in == '\\'
2845 || *in == '?' || *in == '+'
2846 || *in == '^' || *in == '$')
0c8533c6 2847 *out++ = '\\', backslashes_added++;
ca1d1d23
JB
2848 *out++ = *in;
2849 }
2850
3f8100f1 2851 return make_specified_string (temp,
0c8533c6 2852 XSTRING (string)->size + backslashes_added,
3f8100f1
RS
2853 out - temp,
2854 STRING_MULTIBYTE (string));
ca1d1d23
JB
2855}
2856\f
dfcf069d 2857void
ca1d1d23
JB
2858syms_of_search ()
2859{
2860 register int i;
2861
487282dc
KH
2862 for (i = 0; i < REGEXP_CACHE_SIZE; ++i)
2863 {
2864 searchbufs[i].buf.allocated = 100;
2865 searchbufs[i].buf.buffer = (unsigned char *) malloc (100);
2866 searchbufs[i].buf.fastmap = searchbufs[i].fastmap;
2867 searchbufs[i].regexp = Qnil;
2868 staticpro (&searchbufs[i].regexp);
2869 searchbufs[i].next = (i == REGEXP_CACHE_SIZE-1 ? 0 : &searchbufs[i+1]);
2870 }
2871 searchbuf_head = &searchbufs[0];
ca1d1d23
JB
2872
2873 Qsearch_failed = intern ("search-failed");
2874 staticpro (&Qsearch_failed);
2875 Qinvalid_regexp = intern ("invalid-regexp");
2876 staticpro (&Qinvalid_regexp);
2877
2878 Fput (Qsearch_failed, Qerror_conditions,
2879 Fcons (Qsearch_failed, Fcons (Qerror, Qnil)));
2880 Fput (Qsearch_failed, Qerror_message,
2881 build_string ("Search failed"));
2882
2883 Fput (Qinvalid_regexp, Qerror_conditions,
2884 Fcons (Qinvalid_regexp, Fcons (Qerror, Qnil)));
2885 Fput (Qinvalid_regexp, Qerror_message,
2886 build_string ("Invalid regexp"));
2887
daa37602
JB
2888 last_thing_searched = Qnil;
2889 staticpro (&last_thing_searched);
2890
ca1d1d23 2891 defsubr (&Slooking_at);
b819a390
RS
2892 defsubr (&Sposix_looking_at);
2893 defsubr (&Sstring_match);
2894 defsubr (&Sposix_string_match);
ca1d1d23
JB
2895 defsubr (&Ssearch_forward);
2896 defsubr (&Ssearch_backward);
2897 defsubr (&Sword_search_forward);
2898 defsubr (&Sword_search_backward);
2899 defsubr (&Sre_search_forward);
2900 defsubr (&Sre_search_backward);
b819a390
RS
2901 defsubr (&Sposix_search_forward);
2902 defsubr (&Sposix_search_backward);
ca1d1d23
JB
2903 defsubr (&Sreplace_match);
2904 defsubr (&Smatch_beginning);
2905 defsubr (&Smatch_end);
2906 defsubr (&Smatch_data);
3f1c005b 2907 defsubr (&Sset_match_data);
ca1d1d23
JB
2908 defsubr (&Sregexp_quote);
2909}