Copyright up-date.
[bpt/emacs.git] / src / search.c
CommitLineData
ca1d1d23 1/* String search routines for GNU Emacs.
68c45bf0 2 Copyright (C) 1985, 86,87,93,94,97,98, 1999 Free Software Foundation, Inc.
ca1d1d23
JB
3
4This file is part of GNU Emacs.
5
6GNU Emacs is free software; you can redistribute it and/or modify
7it under the terms of the GNU General Public License as published by
7c938215 8the Free Software Foundation; either version 2, or (at your option)
ca1d1d23
JB
9any later version.
10
11GNU Emacs is distributed in the hope that it will be useful,
12but WITHOUT ANY WARRANTY; without even the implied warranty of
13MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14GNU General Public License for more details.
15
16You should have received a copy of the GNU General Public License
17along with GNU Emacs; see the file COPYING. If not, write to
3b7ad313
EN
18the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
19Boston, MA 02111-1307, USA. */
ca1d1d23
JB
20
21
18160b98 22#include <config.h>
ca1d1d23
JB
23#include "lisp.h"
24#include "syntax.h"
5679531d 25#include "category.h"
ca1d1d23 26#include "buffer.h"
5679531d 27#include "charset.h"
9169c321 28#include "region-cache.h"
ca1d1d23 29#include "commands.h"
9ac0d9e0 30#include "blockinput.h"
bf1760bb 31#include "intervals.h"
4746118a 32
ca1d1d23
JB
33#include <sys/types.h>
34#include "regex.h"
35
67ce527d
KH
36#define min(a, b) ((a) < (b) ? (a) : (b))
37#define max(a, b) ((a) > (b) ? (a) : (b))
38
1d288aef 39#define REGEXP_CACHE_SIZE 20
ca1d1d23 40
487282dc
KH
41/* If the regexp is non-nil, then the buffer contains the compiled form
42 of that regexp, suitable for searching. */
1d288aef
RS
43struct regexp_cache
44{
487282dc
KH
45 struct regexp_cache *next;
46 Lisp_Object regexp;
47 struct re_pattern_buffer buf;
48 char fastmap[0400];
b819a390
RS
49 /* Nonzero means regexp was compiled to do full POSIX backtracking. */
50 char posix;
487282dc 51};
ca1d1d23 52
487282dc
KH
53/* The instances of that struct. */
54struct regexp_cache searchbufs[REGEXP_CACHE_SIZE];
ca1d1d23 55
487282dc
KH
56/* The head of the linked list; points to the most recently used buffer. */
57struct regexp_cache *searchbuf_head;
ca1d1d23 58
ca1d1d23 59
4746118a
JB
60/* Every call to re_match, etc., must pass &search_regs as the regs
61 argument unless you can show it is unnecessary (i.e., if re_match
62 is certainly going to be called again before region-around-match
63 can be called).
64
65 Since the registers are now dynamically allocated, we need to make
66 sure not to refer to the Nth register before checking that it has
1113d9db
JB
67 been allocated by checking search_regs.num_regs.
68
69 The regex code keeps track of whether it has allocated the search
487282dc
KH
70 buffer using bits in the re_pattern_buffer. This means that whenever
71 you compile a new pattern, it completely forgets whether it has
1113d9db
JB
72 allocated any registers, and will allocate new registers the next
73 time you call a searching or matching function. Therefore, we need
74 to call re_set_registers after compiling a new pattern or after
75 setting the match registers, so that the regex functions will be
76 able to free or re-allocate it properly. */
ca1d1d23
JB
77static struct re_registers search_regs;
78
daa37602
JB
79/* The buffer in which the last search was performed, or
80 Qt if the last search was done in a string;
81 Qnil if no searching has been done yet. */
82static Lisp_Object last_thing_searched;
ca1d1d23 83
8e6208c5 84/* error condition signaled when regexp compile_pattern fails */
ca1d1d23
JB
85
86Lisp_Object Qinvalid_regexp;
87
ca325161 88static void set_search_regs ();
044f81f1 89static void save_search_regs ();
facdc750
RS
90static int simple_search ();
91static int boyer_moore ();
b819a390
RS
92static int search_buffer ();
93
ca1d1d23
JB
94static void
95matcher_overflow ()
96{
97 error ("Stack overflow in regexp matcher");
98}
99
b819a390
RS
100/* Compile a regexp and signal a Lisp error if anything goes wrong.
101 PATTERN is the pattern to compile.
102 CP is the place to put the result.
facdc750 103 TRANSLATE is a translation table for ignoring case, or nil for none.
b819a390
RS
104 REGP is the structure that says where to store the "register"
105 values that will result from matching this pattern.
106 If it is 0, we should compile the pattern not to record any
107 subexpression bounds.
108 POSIX is nonzero if we want full backtracking (POSIX style)
5679531d
KH
109 for this pattern. 0 means backtrack only enough to get a valid match.
110 MULTIBYTE is nonzero if we want to handle multibyte characters in
111 PATTERN. 0 means all multibyte characters are recognized just as
112 sequences of binary data. */
ca1d1d23 113
487282dc 114static void
5679531d 115compile_pattern_1 (cp, pattern, translate, regp, posix, multibyte)
487282dc 116 struct regexp_cache *cp;
ca1d1d23 117 Lisp_Object pattern;
facdc750 118 Lisp_Object translate;
487282dc 119 struct re_registers *regp;
b819a390 120 int posix;
5679531d 121 int multibyte;
ca1d1d23 122{
7276d3d8 123 unsigned char *raw_pattern;
f8bd51c4 124 int raw_pattern_size;
d451e4db 125 char *val;
b819a390 126 reg_syntax_t old;
ca1d1d23 127
f8bd51c4
KH
128 /* MULTIBYTE says whether the text to be searched is multibyte.
129 We must convert PATTERN to match that, or we will not really
130 find things right. */
131
132 if (multibyte == STRING_MULTIBYTE (pattern))
133 {
7276d3d8 134 raw_pattern = (unsigned char *) XSTRING (pattern)->data;
fc932ac6 135 raw_pattern_size = STRING_BYTES (XSTRING (pattern));
f8bd51c4
KH
136 }
137 else if (multibyte)
138 {
139 raw_pattern_size = count_size_as_multibyte (XSTRING (pattern)->data,
140 XSTRING (pattern)->size);
7276d3d8 141 raw_pattern = (unsigned char *) alloca (raw_pattern_size + 1);
f8bd51c4
KH
142 copy_text (XSTRING (pattern)->data, raw_pattern,
143 XSTRING (pattern)->size, 0, 1);
144 }
145 else
146 {
147 /* Converting multibyte to single-byte.
148
149 ??? Perhaps this conversion should be done in a special way
150 by subtracting nonascii-insert-offset from each non-ASCII char,
151 so that only the multibyte chars which really correspond to
152 the chosen single-byte character set can possibly match. */
153 raw_pattern_size = XSTRING (pattern)->size;
7276d3d8 154 raw_pattern = (unsigned char *) alloca (raw_pattern_size + 1);
f8bd51c4 155 copy_text (XSTRING (pattern)->data, raw_pattern,
fc932ac6 156 STRING_BYTES (XSTRING (pattern)), 1, 0);
f8bd51c4
KH
157 }
158
487282dc 159 cp->regexp = Qnil;
59fab369 160 cp->buf.translate = (! NILP (translate) ? translate : make_number (0));
b819a390 161 cp->posix = posix;
5679531d 162 cp->buf.multibyte = multibyte;
9ac0d9e0 163 BLOCK_INPUT;
fb4a568d 164 old = re_set_syntax (RE_SYNTAX_EMACS
b819a390 165 | (posix ? 0 : RE_NO_POSIX_BACKTRACKING));
7276d3d8
RS
166 val = (char *) re_compile_pattern ((char *)raw_pattern,
167 raw_pattern_size, &cp->buf);
b819a390 168 re_set_syntax (old);
9ac0d9e0 169 UNBLOCK_INPUT;
ca1d1d23 170 if (val)
487282dc 171 Fsignal (Qinvalid_regexp, Fcons (build_string (val), Qnil));
1113d9db 172
487282dc 173 cp->regexp = Fcopy_sequence (pattern);
487282dc
KH
174}
175
6efc7887
RS
176/* Shrink each compiled regexp buffer in the cache
177 to the size actually used right now.
178 This is called from garbage collection. */
179
180void
181shrink_regexp_cache ()
182{
a968f437 183 struct regexp_cache *cp;
6efc7887
RS
184
185 for (cp = searchbuf_head; cp != 0; cp = cp->next)
186 {
187 cp->buf.allocated = cp->buf.used;
188 cp->buf.buffer
189 = (unsigned char *) realloc (cp->buf.buffer, cp->buf.used);
190 }
191}
192
487282dc 193/* Compile a regexp if necessary, but first check to see if there's one in
b819a390
RS
194 the cache.
195 PATTERN is the pattern to compile.
facdc750 196 TRANSLATE is a translation table for ignoring case, or nil for none.
b819a390
RS
197 REGP is the structure that says where to store the "register"
198 values that will result from matching this pattern.
199 If it is 0, we should compile the pattern not to record any
200 subexpression bounds.
201 POSIX is nonzero if we want full backtracking (POSIX style)
202 for this pattern. 0 means backtrack only enough to get a valid match. */
487282dc
KH
203
204struct re_pattern_buffer *
0c8533c6 205compile_pattern (pattern, regp, translate, posix, multibyte)
487282dc
KH
206 Lisp_Object pattern;
207 struct re_registers *regp;
facdc750 208 Lisp_Object translate;
0c8533c6 209 int posix, multibyte;
487282dc
KH
210{
211 struct regexp_cache *cp, **cpp;
212
213 for (cpp = &searchbuf_head; ; cpp = &cp->next)
214 {
215 cp = *cpp;
f1b9c7c1
KR
216 /* Entries are initialized to nil, and may be set to nil by
217 compile_pattern_1 if the pattern isn't valid. Don't apply
218 XSTRING in those cases. However, compile_pattern_1 is only
219 applied to the cache entry we pick here to reuse. So nil
220 should never appear before a non-nil entry. */
7c752c80 221 if (NILP (cp->regexp))
f1b9c7c1 222 goto compile_it;
1d288aef 223 if (XSTRING (cp->regexp)->size == XSTRING (pattern)->size
cf69b13e 224 && STRING_MULTIBYTE (cp->regexp) == STRING_MULTIBYTE (pattern)
1d288aef 225 && !NILP (Fstring_equal (cp->regexp, pattern))
59fab369 226 && EQ (cp->buf.translate, (! NILP (translate) ? translate : make_number (0)))
5679531d
KH
227 && cp->posix == posix
228 && cp->buf.multibyte == multibyte)
487282dc
KH
229 break;
230
f1b9c7c1
KR
231 /* If we're at the end of the cache, compile into the nil cell
232 we found, or the last (least recently used) cell with a
233 string value. */
487282dc
KH
234 if (cp->next == 0)
235 {
f1b9c7c1 236 compile_it:
5679531d 237 compile_pattern_1 (cp, pattern, translate, regp, posix, multibyte);
487282dc
KH
238 break;
239 }
240 }
241
242 /* When we get here, cp (aka *cpp) contains the compiled pattern,
243 either because we found it in the cache or because we just compiled it.
244 Move it to the front of the queue to mark it as most recently used. */
245 *cpp = cp->next;
246 cp->next = searchbuf_head;
247 searchbuf_head = cp;
1113d9db 248
6639708c
RS
249 /* Advise the searching functions about the space we have allocated
250 for register data. */
251 if (regp)
252 re_set_registers (&cp->buf, regp, regp->num_regs, regp->start, regp->end);
253
487282dc 254 return &cp->buf;
ca1d1d23
JB
255}
256
257/* Error condition used for failing searches */
258Lisp_Object Qsearch_failed;
259
260Lisp_Object
261signal_failure (arg)
262 Lisp_Object arg;
263{
264 Fsignal (Qsearch_failed, Fcons (arg, Qnil));
265 return Qnil;
266}
267\f
b819a390
RS
268static Lisp_Object
269looking_at_1 (string, posix)
ca1d1d23 270 Lisp_Object string;
b819a390 271 int posix;
ca1d1d23
JB
272{
273 Lisp_Object val;
274 unsigned char *p1, *p2;
275 int s1, s2;
276 register int i;
487282dc 277 struct re_pattern_buffer *bufp;
ca1d1d23 278
7074fde6
FP
279 if (running_asynch_code)
280 save_search_regs ();
281
ca1d1d23 282 CHECK_STRING (string, 0);
487282dc
KH
283 bufp = compile_pattern (string, &search_regs,
284 (!NILP (current_buffer->case_fold_search)
facdc750 285 ? DOWNCASE_TABLE : Qnil),
0c8533c6
RS
286 posix,
287 !NILP (current_buffer->enable_multibyte_characters));
ca1d1d23
JB
288
289 immediate_quit = 1;
290 QUIT; /* Do a pending quit right away, to avoid paradoxical behavior */
291
292 /* Get pointers and sizes of the two strings
293 that make up the visible portion of the buffer. */
294
295 p1 = BEGV_ADDR;
fa8ed3e0 296 s1 = GPT_BYTE - BEGV_BYTE;
ca1d1d23 297 p2 = GAP_END_ADDR;
fa8ed3e0 298 s2 = ZV_BYTE - GPT_BYTE;
ca1d1d23
JB
299 if (s1 < 0)
300 {
301 p2 = p1;
fa8ed3e0 302 s2 = ZV_BYTE - BEGV_BYTE;
ca1d1d23
JB
303 s1 = 0;
304 }
305 if (s2 < 0)
306 {
fa8ed3e0 307 s1 = ZV_BYTE - BEGV_BYTE;
ca1d1d23
JB
308 s2 = 0;
309 }
8bb43c28
RS
310
311 re_match_object = Qnil;
ca1d1d23 312
487282dc 313 i = re_match_2 (bufp, (char *) p1, s1, (char *) p2, s2,
fa8ed3e0
RS
314 PT_BYTE - BEGV_BYTE, &search_regs,
315 ZV_BYTE - BEGV_BYTE);
de182d70
GM
316 immediate_quit = 0;
317
ca1d1d23
JB
318 if (i == -2)
319 matcher_overflow ();
320
321 val = (0 <= i ? Qt : Qnil);
fa8ed3e0
RS
322 if (i >= 0)
323 for (i = 0; i < search_regs.num_regs; i++)
324 if (search_regs.start[i] >= 0)
325 {
326 search_regs.start[i]
327 = BYTE_TO_CHAR (search_regs.start[i] + BEGV_BYTE);
328 search_regs.end[i]
329 = BYTE_TO_CHAR (search_regs.end[i] + BEGV_BYTE);
330 }
a3668d92 331 XSETBUFFER (last_thing_searched, current_buffer);
ca1d1d23
JB
332 return val;
333}
334
b819a390 335DEFUN ("looking-at", Flooking_at, Slooking_at, 1, 1, 0,
94f94972 336 "Return t if text after point matches regular expression REGEXP.\n\
b819a390
RS
337This function modifies the match data that `match-beginning',\n\
338`match-end' and `match-data' access; save and restore the match\n\
339data if you want to preserve them.")
94f94972
RS
340 (regexp)
341 Lisp_Object regexp;
b819a390 342{
94f94972 343 return looking_at_1 (regexp, 0);
b819a390
RS
344}
345
346DEFUN ("posix-looking-at", Fposix_looking_at, Sposix_looking_at, 1, 1, 0,
94f94972 347 "Return t if text after point matches regular expression REGEXP.\n\
b819a390
RS
348Find the longest match, in accord with Posix regular expression rules.\n\
349This function modifies the match data that `match-beginning',\n\
350`match-end' and `match-data' access; save and restore the match\n\
351data if you want to preserve them.")
94f94972
RS
352 (regexp)
353 Lisp_Object regexp;
b819a390 354{
94f94972 355 return looking_at_1 (regexp, 1);
b819a390
RS
356}
357\f
358static Lisp_Object
359string_match_1 (regexp, string, start, posix)
ca1d1d23 360 Lisp_Object regexp, string, start;
b819a390 361 int posix;
ca1d1d23
JB
362{
363 int val;
487282dc 364 struct re_pattern_buffer *bufp;
0c8533c6
RS
365 int pos, pos_byte;
366 int i;
ca1d1d23 367
7074fde6
FP
368 if (running_asynch_code)
369 save_search_regs ();
370
ca1d1d23
JB
371 CHECK_STRING (regexp, 0);
372 CHECK_STRING (string, 1);
373
374 if (NILP (start))
0c8533c6 375 pos = 0, pos_byte = 0;
ca1d1d23
JB
376 else
377 {
378 int len = XSTRING (string)->size;
379
380 CHECK_NUMBER (start, 2);
0c8533c6
RS
381 pos = XINT (start);
382 if (pos < 0 && -pos <= len)
383 pos = len + pos;
384 else if (0 > pos || pos > len)
ca1d1d23 385 args_out_of_range (string, start);
0c8533c6 386 pos_byte = string_char_to_byte (string, pos);
ca1d1d23
JB
387 }
388
487282dc
KH
389 bufp = compile_pattern (regexp, &search_regs,
390 (!NILP (current_buffer->case_fold_search)
facdc750 391 ? DOWNCASE_TABLE : Qnil),
0c8533c6
RS
392 posix,
393 STRING_MULTIBYTE (string));
ca1d1d23 394 immediate_quit = 1;
8bb43c28
RS
395 re_match_object = string;
396
487282dc 397 val = re_search (bufp, (char *) XSTRING (string)->data,
fc932ac6
RS
398 STRING_BYTES (XSTRING (string)), pos_byte,
399 STRING_BYTES (XSTRING (string)) - pos_byte,
ca1d1d23
JB
400 &search_regs);
401 immediate_quit = 0;
daa37602 402 last_thing_searched = Qt;
ca1d1d23
JB
403 if (val == -2)
404 matcher_overflow ();
405 if (val < 0) return Qnil;
0c8533c6
RS
406
407 for (i = 0; i < search_regs.num_regs; i++)
408 if (search_regs.start[i] >= 0)
409 {
410 search_regs.start[i]
411 = string_byte_to_char (string, search_regs.start[i]);
412 search_regs.end[i]
413 = string_byte_to_char (string, search_regs.end[i]);
414 }
415
416 return make_number (string_byte_to_char (string, val));
ca1d1d23 417}
e59a8453 418
b819a390
RS
419DEFUN ("string-match", Fstring_match, Sstring_match, 2, 3, 0,
420 "Return index of start of first match for REGEXP in STRING, or nil.\n\
21e01890 421Case is ignored if `case-fold-search' is non-nil in the current buffer.\n\
b819a390
RS
422If third arg START is non-nil, start search at that index in STRING.\n\
423For index of first char beyond the match, do (match-end 0).\n\
424`match-end' and `match-beginning' also give indices of substrings\n\
425matched by parenthesis constructs in the pattern.")
426 (regexp, string, start)
427 Lisp_Object regexp, string, start;
428{
429 return string_match_1 (regexp, string, start, 0);
430}
431
432DEFUN ("posix-string-match", Fposix_string_match, Sposix_string_match, 2, 3, 0,
433 "Return index of start of first match for REGEXP in STRING, or nil.\n\
434Find the longest match, in accord with Posix regular expression rules.\n\
21e01890 435Case is ignored if `case-fold-search' is non-nil in the current buffer.\n\
b819a390
RS
436If third arg START is non-nil, start search at that index in STRING.\n\
437For index of first char beyond the match, do (match-end 0).\n\
438`match-end' and `match-beginning' also give indices of substrings\n\
439matched by parenthesis constructs in the pattern.")
440 (regexp, string, start)
441 Lisp_Object regexp, string, start;
442{
443 return string_match_1 (regexp, string, start, 1);
444}
445
e59a8453
RS
446/* Match REGEXP against STRING, searching all of STRING,
447 and return the index of the match, or negative on failure.
448 This does not clobber the match data. */
449
450int
451fast_string_match (regexp, string)
452 Lisp_Object regexp, string;
453{
454 int val;
487282dc 455 struct re_pattern_buffer *bufp;
e59a8453 456
facdc750
RS
457 bufp = compile_pattern (regexp, 0, Qnil,
458 0, STRING_MULTIBYTE (string));
e59a8453 459 immediate_quit = 1;
8bb43c28
RS
460 re_match_object = string;
461
487282dc 462 val = re_search (bufp, (char *) XSTRING (string)->data,
fc932ac6
RS
463 STRING_BYTES (XSTRING (string)), 0,
464 STRING_BYTES (XSTRING (string)), 0);
e59a8453
RS
465 immediate_quit = 0;
466 return val;
467}
5679531d
KH
468
469/* Match REGEXP against STRING, searching all of STRING ignoring case,
470 and return the index of the match, or negative on failure.
0c8533c6
RS
471 This does not clobber the match data.
472 We assume that STRING contains single-byte characters. */
5679531d
KH
473
474extern Lisp_Object Vascii_downcase_table;
475
476int
b4577c63 477fast_c_string_match_ignore_case (regexp, string)
5679531d
KH
478 Lisp_Object regexp;
479 char *string;
480{
481 int val;
482 struct re_pattern_buffer *bufp;
483 int len = strlen (string);
484
0c8533c6 485 regexp = string_make_unibyte (regexp);
b4577c63 486 re_match_object = Qt;
5679531d 487 bufp = compile_pattern (regexp, 0,
facdc750 488 Vascii_downcase_table, 0,
f8bd51c4 489 0);
5679531d
KH
490 immediate_quit = 1;
491 val = re_search (bufp, string, len, 0, len, 0);
492 immediate_quit = 0;
493 return val;
494}
ca1d1d23 495\f
9169c321
JB
496/* The newline cache: remembering which sections of text have no newlines. */
497
498/* If the user has requested newline caching, make sure it's on.
499 Otherwise, make sure it's off.
500 This is our cheezy way of associating an action with the change of
501 state of a buffer-local variable. */
502static void
503newline_cache_on_off (buf)
504 struct buffer *buf;
505{
506 if (NILP (buf->cache_long_line_scans))
507 {
508 /* It should be off. */
509 if (buf->newline_cache)
510 {
511 free_region_cache (buf->newline_cache);
512 buf->newline_cache = 0;
513 }
514 }
515 else
516 {
517 /* It should be on. */
518 if (buf->newline_cache == 0)
519 buf->newline_cache = new_region_cache ();
520 }
521}
522
523\f
524/* Search for COUNT instances of the character TARGET between START and END.
525
526 If COUNT is positive, search forwards; END must be >= START.
527 If COUNT is negative, search backwards for the -COUNTth instance;
528 END must be <= START.
529 If COUNT is zero, do anything you please; run rogue, for all I care.
530
531 If END is zero, use BEGV or ZV instead, as appropriate for the
532 direction indicated by COUNT.
ffd56f97
JB
533
534 If we find COUNT instances, set *SHORTAGE to zero, and return the
5bfe95c9
RS
535 position after the COUNTth match. Note that for reverse motion
536 this is not the same as the usual convention for Emacs motion commands.
ffd56f97 537
9169c321
JB
538 If we don't find COUNT instances before reaching END, set *SHORTAGE
539 to the number of TARGETs left unfound, and return END.
ffd56f97 540
087a5f81
RS
541 If ALLOW_QUIT is non-zero, set immediate_quit. That's good to do
542 except when inside redisplay. */
543
dfcf069d 544int
9169c321
JB
545scan_buffer (target, start, end, count, shortage, allow_quit)
546 register int target;
547 int start, end;
548 int count;
549 int *shortage;
087a5f81 550 int allow_quit;
ca1d1d23 551{
9169c321
JB
552 struct region_cache *newline_cache;
553 int direction;
ffd56f97 554
9169c321
JB
555 if (count > 0)
556 {
557 direction = 1;
558 if (! end) end = ZV;
559 }
560 else
561 {
562 direction = -1;
563 if (! end) end = BEGV;
564 }
ffd56f97 565
9169c321
JB
566 newline_cache_on_off (current_buffer);
567 newline_cache = current_buffer->newline_cache;
ca1d1d23
JB
568
569 if (shortage != 0)
570 *shortage = 0;
571
087a5f81 572 immediate_quit = allow_quit;
ca1d1d23 573
ffd56f97 574 if (count > 0)
9169c321 575 while (start != end)
ca1d1d23 576 {
9169c321
JB
577 /* Our innermost scanning loop is very simple; it doesn't know
578 about gaps, buffer ends, or the newline cache. ceiling is
579 the position of the last character before the next such
580 obstacle --- the last character the dumb search loop should
581 examine. */
fa8ed3e0
RS
582 int ceiling_byte = CHAR_TO_BYTE (end) - 1;
583 int start_byte = CHAR_TO_BYTE (start);
67ce527d 584 int tem;
9169c321
JB
585
586 /* If we're looking for a newline, consult the newline cache
587 to see where we can avoid some scanning. */
588 if (target == '\n' && newline_cache)
589 {
590 int next_change;
591 immediate_quit = 0;
592 while (region_cache_forward
fa8ed3e0
RS
593 (current_buffer, newline_cache, start_byte, &next_change))
594 start_byte = next_change;
cbe0db0d 595 immediate_quit = allow_quit;
9169c321 596
fa8ed3e0
RS
597 /* START should never be after END. */
598 if (start_byte > ceiling_byte)
599 start_byte = ceiling_byte;
9169c321
JB
600
601 /* Now the text after start is an unknown region, and
602 next_change is the position of the next known region. */
fa8ed3e0 603 ceiling_byte = min (next_change - 1, ceiling_byte);
9169c321
JB
604 }
605
606 /* The dumb loop can only scan text stored in contiguous
607 bytes. BUFFER_CEILING_OF returns the last character
608 position that is contiguous, so the ceiling is the
609 position after that. */
67ce527d
KH
610 tem = BUFFER_CEILING_OF (start_byte);
611 ceiling_byte = min (tem, ceiling_byte);
9169c321
JB
612
613 {
614 /* The termination address of the dumb loop. */
fa8ed3e0
RS
615 register unsigned char *ceiling_addr
616 = BYTE_POS_ADDR (ceiling_byte) + 1;
617 register unsigned char *cursor
618 = BYTE_POS_ADDR (start_byte);
9169c321
JB
619 unsigned char *base = cursor;
620
621 while (cursor < ceiling_addr)
622 {
623 unsigned char *scan_start = cursor;
624
625 /* The dumb loop. */
626 while (*cursor != target && ++cursor < ceiling_addr)
627 ;
628
629 /* If we're looking for newlines, cache the fact that
630 the region from start to cursor is free of them. */
631 if (target == '\n' && newline_cache)
632 know_region_cache (current_buffer, newline_cache,
fa8ed3e0
RS
633 start_byte + scan_start - base,
634 start_byte + cursor - base);
9169c321
JB
635
636 /* Did we find the target character? */
637 if (cursor < ceiling_addr)
638 {
639 if (--count == 0)
640 {
641 immediate_quit = 0;
fa8ed3e0 642 return BYTE_TO_CHAR (start_byte + cursor - base + 1);
9169c321
JB
643 }
644 cursor++;
645 }
646 }
647
fa8ed3e0 648 start = BYTE_TO_CHAR (start_byte + cursor - base);
9169c321 649 }
ca1d1d23
JB
650 }
651 else
9169c321
JB
652 while (start > end)
653 {
654 /* The last character to check before the next obstacle. */
fa8ed3e0
RS
655 int ceiling_byte = CHAR_TO_BYTE (end);
656 int start_byte = CHAR_TO_BYTE (start);
67ce527d 657 int tem;
9169c321
JB
658
659 /* Consult the newline cache, if appropriate. */
660 if (target == '\n' && newline_cache)
661 {
662 int next_change;
663 immediate_quit = 0;
664 while (region_cache_backward
fa8ed3e0
RS
665 (current_buffer, newline_cache, start_byte, &next_change))
666 start_byte = next_change;
cbe0db0d 667 immediate_quit = allow_quit;
9169c321
JB
668
669 /* Start should never be at or before end. */
fa8ed3e0
RS
670 if (start_byte <= ceiling_byte)
671 start_byte = ceiling_byte + 1;
9169c321
JB
672
673 /* Now the text before start is an unknown region, and
674 next_change is the position of the next known region. */
fa8ed3e0 675 ceiling_byte = max (next_change, ceiling_byte);
9169c321
JB
676 }
677
678 /* Stop scanning before the gap. */
67ce527d
KH
679 tem = BUFFER_FLOOR_OF (start_byte - 1);
680 ceiling_byte = max (tem, ceiling_byte);
9169c321
JB
681
682 {
683 /* The termination address of the dumb loop. */
fa8ed3e0
RS
684 register unsigned char *ceiling_addr = BYTE_POS_ADDR (ceiling_byte);
685 register unsigned char *cursor = BYTE_POS_ADDR (start_byte - 1);
9169c321
JB
686 unsigned char *base = cursor;
687
688 while (cursor >= ceiling_addr)
689 {
690 unsigned char *scan_start = cursor;
691
692 while (*cursor != target && --cursor >= ceiling_addr)
693 ;
694
695 /* If we're looking for newlines, cache the fact that
696 the region from after the cursor to start is free of them. */
697 if (target == '\n' && newline_cache)
698 know_region_cache (current_buffer, newline_cache,
fa8ed3e0
RS
699 start_byte + cursor - base,
700 start_byte + scan_start - base);
9169c321
JB
701
702 /* Did we find the target character? */
703 if (cursor >= ceiling_addr)
704 {
705 if (++count >= 0)
706 {
707 immediate_quit = 0;
fa8ed3e0 708 return BYTE_TO_CHAR (start_byte + cursor - base);
9169c321
JB
709 }
710 cursor--;
711 }
712 }
713
fa8ed3e0 714 start = BYTE_TO_CHAR (start_byte + cursor - base);
9169c321
JB
715 }
716 }
717
ca1d1d23
JB
718 immediate_quit = 0;
719 if (shortage != 0)
ffd56f97 720 *shortage = count * direction;
9169c321 721 return start;
ca1d1d23 722}
fa8ed3e0
RS
723\f
724/* Search for COUNT instances of a line boundary, which means either a
725 newline or (if selective display enabled) a carriage return.
726 Start at START. If COUNT is negative, search backwards.
727
728 We report the resulting position by calling TEMP_SET_PT_BOTH.
729
730 If we find COUNT instances. we position after (always after,
731 even if scanning backwards) the COUNTth match, and return 0.
732
733 If we don't find COUNT instances before reaching the end of the
734 buffer (or the beginning, if scanning backwards), we return
735 the number of line boundaries left unfound, and position at
736 the limit we bumped up against.
737
738 If ALLOW_QUIT is non-zero, set immediate_quit. That's good to do
d5d57b92 739 except in special cases. */
ca1d1d23 740
63fa018d 741int
fa8ed3e0
RS
742scan_newline (start, start_byte, limit, limit_byte, count, allow_quit)
743 int start, start_byte;
744 int limit, limit_byte;
745 register int count;
746 int allow_quit;
63fa018d 747{
fa8ed3e0
RS
748 int direction = ((count > 0) ? 1 : -1);
749
750 register unsigned char *cursor;
751 unsigned char *base;
752
753 register int ceiling;
754 register unsigned char *ceiling_addr;
755
d5d57b92
RS
756 int old_immediate_quit = immediate_quit;
757
fa8ed3e0
RS
758 /* If we are not in selective display mode,
759 check only for newlines. */
760 int selective_display = (!NILP (current_buffer->selective_display)
761 && !INTEGERP (current_buffer->selective_display));
762
763 /* The code that follows is like scan_buffer
764 but checks for either newline or carriage return. */
765
d5d57b92
RS
766 if (allow_quit)
767 immediate_quit++;
fa8ed3e0
RS
768
769 start_byte = CHAR_TO_BYTE (start);
770
771 if (count > 0)
772 {
773 while (start_byte < limit_byte)
774 {
775 ceiling = BUFFER_CEILING_OF (start_byte);
776 ceiling = min (limit_byte - 1, ceiling);
777 ceiling_addr = BYTE_POS_ADDR (ceiling) + 1;
778 base = (cursor = BYTE_POS_ADDR (start_byte));
779 while (1)
780 {
781 while (*cursor != '\n' && ++cursor != ceiling_addr)
782 ;
783
784 if (cursor != ceiling_addr)
785 {
786 if (--count == 0)
787 {
d5d57b92 788 immediate_quit = old_immediate_quit;
fa8ed3e0
RS
789 start_byte = start_byte + cursor - base + 1;
790 start = BYTE_TO_CHAR (start_byte);
791 TEMP_SET_PT_BOTH (start, start_byte);
792 return 0;
793 }
794 else
795 if (++cursor == ceiling_addr)
796 break;
797 }
798 else
799 break;
800 }
801 start_byte += cursor - base;
802 }
803 }
804 else
805 {
fa8ed3e0
RS
806 while (start_byte > limit_byte)
807 {
808 ceiling = BUFFER_FLOOR_OF (start_byte - 1);
809 ceiling = max (limit_byte, ceiling);
810 ceiling_addr = BYTE_POS_ADDR (ceiling) - 1;
811 base = (cursor = BYTE_POS_ADDR (start_byte - 1) + 1);
812 while (1)
813 {
814 while (--cursor != ceiling_addr && *cursor != '\n')
815 ;
816
817 if (cursor != ceiling_addr)
818 {
819 if (++count == 0)
820 {
d5d57b92 821 immediate_quit = old_immediate_quit;
fa8ed3e0
RS
822 /* Return the position AFTER the match we found. */
823 start_byte = start_byte + cursor - base + 1;
824 start = BYTE_TO_CHAR (start_byte);
825 TEMP_SET_PT_BOTH (start, start_byte);
826 return 0;
827 }
828 }
829 else
830 break;
831 }
832 /* Here we add 1 to compensate for the last decrement
833 of CURSOR, which took it past the valid range. */
834 start_byte += cursor - base + 1;
835 }
836 }
837
838 TEMP_SET_PT_BOTH (limit, limit_byte);
d5d57b92 839 immediate_quit = old_immediate_quit;
fa8ed3e0
RS
840
841 return count * direction;
63fa018d
RS
842}
843
ca1d1d23 844int
fa8ed3e0 845find_next_newline_no_quit (from, cnt)
ca1d1d23
JB
846 register int from, cnt;
847{
fa8ed3e0 848 return scan_buffer ('\n', from, 0, cnt, (int *) 0, 0);
9169c321
JB
849}
850
9169c321
JB
851/* Like find_next_newline, but returns position before the newline,
852 not after, and only search up to TO. This isn't just
853 find_next_newline (...)-1, because you might hit TO. */
fa8ed3e0 854
9169c321
JB
855int
856find_before_next_newline (from, to, cnt)
cbe0db0d 857 int from, to, cnt;
9169c321
JB
858{
859 int shortage;
860 int pos = scan_buffer ('\n', from, to, cnt, &shortage, 1);
861
862 if (shortage == 0)
863 pos--;
864
865 return pos;
ca1d1d23
JB
866}
867\f
ca1d1d23
JB
868/* Subroutines of Lisp buffer search functions. */
869
870static Lisp_Object
b819a390 871search_command (string, bound, noerror, count, direction, RE, posix)
ca1d1d23
JB
872 Lisp_Object string, bound, noerror, count;
873 int direction;
874 int RE;
b819a390 875 int posix;
ca1d1d23
JB
876{
877 register int np;
9f43ad85 878 int lim, lim_byte;
ca1d1d23
JB
879 int n = direction;
880
881 if (!NILP (count))
882 {
883 CHECK_NUMBER (count, 3);
884 n *= XINT (count);
885 }
886
887 CHECK_STRING (string, 0);
888 if (NILP (bound))
9f43ad85
RS
889 {
890 if (n > 0)
891 lim = ZV, lim_byte = ZV_BYTE;
892 else
893 lim = BEGV, lim_byte = BEGV_BYTE;
894 }
ca1d1d23
JB
895 else
896 {
897 CHECK_NUMBER_COERCE_MARKER (bound, 1);
898 lim = XINT (bound);
6ec8bbd2 899 if (n > 0 ? lim < PT : lim > PT)
ca1d1d23
JB
900 error ("Invalid search bound (wrong side of point)");
901 if (lim > ZV)
9f43ad85 902 lim = ZV, lim_byte = ZV_BYTE;
588d2fd5 903 else if (lim < BEGV)
9f43ad85 904 lim = BEGV, lim_byte = BEGV_BYTE;
588d2fd5
KH
905 else
906 lim_byte = CHAR_TO_BYTE (lim);
ca1d1d23
JB
907 }
908
9f43ad85 909 np = search_buffer (string, PT, PT_BYTE, lim, lim_byte, n, RE,
ca1d1d23 910 (!NILP (current_buffer->case_fold_search)
facdc750 911 ? current_buffer->case_canon_table
3135e9fd 912 : Qnil),
ca1d1d23 913 (!NILP (current_buffer->case_fold_search)
facdc750 914 ? current_buffer->case_eqv_table
3135e9fd 915 : Qnil),
b819a390 916 posix);
ca1d1d23
JB
917 if (np <= 0)
918 {
919 if (NILP (noerror))
920 return signal_failure (string);
921 if (!EQ (noerror, Qt))
922 {
923 if (lim < BEGV || lim > ZV)
924 abort ();
9f43ad85 925 SET_PT_BOTH (lim, lim_byte);
a5f217b8
RS
926 return Qnil;
927#if 0 /* This would be clean, but maybe programs depend on
928 a value of nil here. */
481399bf 929 np = lim;
a5f217b8 930#endif
ca1d1d23 931 }
481399bf
RS
932 else
933 return Qnil;
ca1d1d23
JB
934 }
935
936 if (np < BEGV || np > ZV)
937 abort ();
938
939 SET_PT (np);
940
941 return make_number (np);
942}
943\f
fa8ed3e0
RS
944/* Return 1 if REGEXP it matches just one constant string. */
945
b6d6a51c
KH
946static int
947trivial_regexp_p (regexp)
948 Lisp_Object regexp;
949{
fc932ac6 950 int len = STRING_BYTES (XSTRING (regexp));
b6d6a51c 951 unsigned char *s = XSTRING (regexp)->data;
b6d6a51c
KH
952 while (--len >= 0)
953 {
954 switch (*s++)
955 {
956 case '.': case '*': case '+': case '?': case '[': case '^': case '$':
957 return 0;
958 case '\\':
959 if (--len < 0)
960 return 0;
961 switch (*s++)
962 {
963 case '|': case '(': case ')': case '`': case '\'': case 'b':
964 case 'B': case '<': case '>': case 'w': case 'W': case 's':
866f60fd 965 case 'S': case '=':
5679531d 966 case 'c': case 'C': /* for categoryspec and notcategoryspec */
866f60fd 967 case '1': case '2': case '3': case '4': case '5':
b6d6a51c
KH
968 case '6': case '7': case '8': case '9':
969 return 0;
970 }
971 }
972 }
973 return 1;
974}
975
ca325161 976/* Search for the n'th occurrence of STRING in the current buffer,
ca1d1d23 977 starting at position POS and stopping at position LIM,
b819a390 978 treating STRING as a literal string if RE is false or as
ca1d1d23
JB
979 a regular expression if RE is true.
980
981 If N is positive, searching is forward and LIM must be greater than POS.
982 If N is negative, searching is backward and LIM must be less than POS.
983
facdc750 984 Returns -x if x occurrences remain to be found (x > 0),
ca1d1d23 985 or else the position at the beginning of the Nth occurrence
b819a390
RS
986 (if searching backward) or the end (if searching forward).
987
988 POSIX is nonzero if we want full backtracking (POSIX style)
989 for this pattern. 0 means backtrack only enough to get a valid match. */
ca1d1d23 990
aff2ce94
RS
991#define TRANSLATE(out, trt, d) \
992do \
993 { \
994 if (! NILP (trt)) \
995 { \
996 Lisp_Object temp; \
997 temp = Faref (trt, make_number (d)); \
998 if (INTEGERP (temp)) \
999 out = XINT (temp); \
1000 else \
1001 out = d; \
1002 } \
1003 else \
1004 out = d; \
1005 } \
1006while (0)
facdc750 1007
b819a390 1008static int
9f43ad85
RS
1009search_buffer (string, pos, pos_byte, lim, lim_byte, n,
1010 RE, trt, inverse_trt, posix)
ca1d1d23
JB
1011 Lisp_Object string;
1012 int pos;
9f43ad85 1013 int pos_byte;
ca1d1d23 1014 int lim;
9f43ad85 1015 int lim_byte;
ca1d1d23
JB
1016 int n;
1017 int RE;
facdc750
RS
1018 Lisp_Object trt;
1019 Lisp_Object inverse_trt;
b819a390 1020 int posix;
ca1d1d23
JB
1021{
1022 int len = XSTRING (string)->size;
fc932ac6 1023 int len_byte = STRING_BYTES (XSTRING (string));
facdc750 1024 register int i;
ca1d1d23 1025
7074fde6
FP
1026 if (running_asynch_code)
1027 save_search_regs ();
1028
a7e4cdde 1029 /* Searching 0 times means don't move. */
ca1d1d23 1030 /* Null string is found at starting position. */
a7e4cdde 1031 if (len == 0 || n == 0)
ca325161
RS
1032 {
1033 set_search_regs (pos, 0);
1034 return pos;
1035 }
3f57a499 1036
b6d6a51c 1037 if (RE && !trivial_regexp_p (string))
ca1d1d23 1038 {
facdc750
RS
1039 unsigned char *p1, *p2;
1040 int s1, s2;
487282dc
KH
1041 struct re_pattern_buffer *bufp;
1042
0c8533c6
RS
1043 bufp = compile_pattern (string, &search_regs, trt, posix,
1044 !NILP (current_buffer->enable_multibyte_characters));
ca1d1d23 1045
ca1d1d23
JB
1046 immediate_quit = 1; /* Quit immediately if user types ^G,
1047 because letting this function finish
1048 can take too long. */
1049 QUIT; /* Do a pending quit right away,
1050 to avoid paradoxical behavior */
1051 /* Get pointers and sizes of the two strings
1052 that make up the visible portion of the buffer. */
1053
1054 p1 = BEGV_ADDR;
fa8ed3e0 1055 s1 = GPT_BYTE - BEGV_BYTE;
ca1d1d23 1056 p2 = GAP_END_ADDR;
fa8ed3e0 1057 s2 = ZV_BYTE - GPT_BYTE;
ca1d1d23
JB
1058 if (s1 < 0)
1059 {
1060 p2 = p1;
fa8ed3e0 1061 s2 = ZV_BYTE - BEGV_BYTE;
ca1d1d23
JB
1062 s1 = 0;
1063 }
1064 if (s2 < 0)
1065 {
fa8ed3e0 1066 s1 = ZV_BYTE - BEGV_BYTE;
ca1d1d23
JB
1067 s2 = 0;
1068 }
8bb43c28
RS
1069 re_match_object = Qnil;
1070
ca1d1d23
JB
1071 while (n < 0)
1072 {
42db823b 1073 int val;
487282dc 1074 val = re_search_2 (bufp, (char *) p1, s1, (char *) p2, s2,
4996330b
KH
1075 pos_byte - BEGV_BYTE, lim_byte - pos_byte,
1076 &search_regs,
42db823b 1077 /* Don't allow match past current point */
4996330b 1078 pos_byte - BEGV_BYTE);
ca1d1d23 1079 if (val == -2)
b6d6a51c
KH
1080 {
1081 matcher_overflow ();
1082 }
ca1d1d23
JB
1083 if (val >= 0)
1084 {
26aff150 1085 pos_byte = search_regs.start[0] + BEGV_BYTE;
4746118a 1086 for (i = 0; i < search_regs.num_regs; i++)
ca1d1d23
JB
1087 if (search_regs.start[i] >= 0)
1088 {
fa8ed3e0
RS
1089 search_regs.start[i]
1090 = BYTE_TO_CHAR (search_regs.start[i] + BEGV_BYTE);
1091 search_regs.end[i]
1092 = BYTE_TO_CHAR (search_regs.end[i] + BEGV_BYTE);
ca1d1d23 1093 }
a3668d92 1094 XSETBUFFER (last_thing_searched, current_buffer);
ca1d1d23
JB
1095 /* Set pos to the new position. */
1096 pos = search_regs.start[0];
1097 }
1098 else
1099 {
1100 immediate_quit = 0;
1101 return (n);
1102 }
1103 n++;
1104 }
1105 while (n > 0)
1106 {
42db823b 1107 int val;
487282dc 1108 val = re_search_2 (bufp, (char *) p1, s1, (char *) p2, s2,
4996330b
KH
1109 pos_byte - BEGV_BYTE, lim_byte - pos_byte,
1110 &search_regs,
1111 lim_byte - BEGV_BYTE);
ca1d1d23 1112 if (val == -2)
b6d6a51c
KH
1113 {
1114 matcher_overflow ();
1115 }
ca1d1d23
JB
1116 if (val >= 0)
1117 {
26aff150 1118 pos_byte = search_regs.end[0] + BEGV_BYTE;
4746118a 1119 for (i = 0; i < search_regs.num_regs; i++)
ca1d1d23
JB
1120 if (search_regs.start[i] >= 0)
1121 {
fa8ed3e0
RS
1122 search_regs.start[i]
1123 = BYTE_TO_CHAR (search_regs.start[i] + BEGV_BYTE);
1124 search_regs.end[i]
1125 = BYTE_TO_CHAR (search_regs.end[i] + BEGV_BYTE);
ca1d1d23 1126 }
a3668d92 1127 XSETBUFFER (last_thing_searched, current_buffer);
ca1d1d23
JB
1128 pos = search_regs.end[0];
1129 }
1130 else
1131 {
1132 immediate_quit = 0;
1133 return (0 - n);
1134 }
1135 n--;
1136 }
1137 immediate_quit = 0;
1138 return (pos);
1139 }
1140 else /* non-RE case */
1141 {
facdc750
RS
1142 unsigned char *raw_pattern, *pat;
1143 int raw_pattern_size;
1144 int raw_pattern_size_byte;
1145 unsigned char *patbuf;
1146 int multibyte = !NILP (current_buffer->enable_multibyte_characters);
1147 unsigned char *base_pat = XSTRING (string)->data;
1148 int charset_base = -1;
040272ce 1149 int boyer_moore_ok = 1;
facdc750
RS
1150
1151 /* MULTIBYTE says whether the text to be searched is multibyte.
1152 We must convert PATTERN to match that, or we will not really
1153 find things right. */
1154
1155 if (multibyte == STRING_MULTIBYTE (string))
1156 {
7276d3d8 1157 raw_pattern = (unsigned char *) XSTRING (string)->data;
facdc750 1158 raw_pattern_size = XSTRING (string)->size;
fc932ac6 1159 raw_pattern_size_byte = STRING_BYTES (XSTRING (string));
facdc750
RS
1160 }
1161 else if (multibyte)
1162 {
1163 raw_pattern_size = XSTRING (string)->size;
1164 raw_pattern_size_byte
1165 = count_size_as_multibyte (XSTRING (string)->data,
1166 raw_pattern_size);
7276d3d8 1167 raw_pattern = (unsigned char *) alloca (raw_pattern_size_byte + 1);
facdc750
RS
1168 copy_text (XSTRING (string)->data, raw_pattern,
1169 XSTRING (string)->size, 0, 1);
1170 }
1171 else
1172 {
1173 /* Converting multibyte to single-byte.
1174
1175 ??? Perhaps this conversion should be done in a special way
1176 by subtracting nonascii-insert-offset from each non-ASCII char,
1177 so that only the multibyte chars which really correspond to
1178 the chosen single-byte character set can possibly match. */
1179 raw_pattern_size = XSTRING (string)->size;
1180 raw_pattern_size_byte = XSTRING (string)->size;
7276d3d8 1181 raw_pattern = (unsigned char *) alloca (raw_pattern_size + 1);
facdc750 1182 copy_text (XSTRING (string)->data, raw_pattern,
fc932ac6 1183 STRING_BYTES (XSTRING (string)), 1, 0);
facdc750
RS
1184 }
1185
1186 /* Copy and optionally translate the pattern. */
1187 len = raw_pattern_size;
1188 len_byte = raw_pattern_size_byte;
1189 patbuf = (unsigned char *) alloca (len_byte);
1190 pat = patbuf;
1191 base_pat = raw_pattern;
1192 if (multibyte)
1193 {
1194 while (--len >= 0)
1195 {
daaa6ed8 1196 unsigned char str[MAX_MULTIBYTE_LENGTH];
aff2ce94 1197 int c, translated, inverse;
facdc750
RS
1198 int in_charlen, charlen;
1199
1200 /* If we got here and the RE flag is set, it's because we're
1201 dealing with a regexp known to be trivial, so the backslash
1202 just quotes the next character. */
1203 if (RE && *base_pat == '\\')
1204 {
1205 len--;
1206 len_byte--;
1207 base_pat++;
1208 }
1209
1210 c = STRING_CHAR_AND_LENGTH (base_pat, len_byte, in_charlen);
040272ce 1211
facdc750 1212 /* Translate the character, if requested. */
aff2ce94 1213 TRANSLATE (translated, trt, c);
facdc750
RS
1214 /* If translation changed the byte-length, go back
1215 to the original character. */
daaa6ed8 1216 charlen = CHAR_STRING (translated, str);
facdc750
RS
1217 if (in_charlen != charlen)
1218 {
1219 translated = c;
daaa6ed8 1220 charlen = CHAR_STRING (c, str);
facdc750
RS
1221 }
1222
5ffaf437
RS
1223 /* If we are searching for something strange,
1224 an invalid multibyte code, don't use boyer-moore. */
1225 if (! ASCII_BYTE_P (translated)
1226 && (charlen == 1 /* 8bit code */
1227 || charlen != in_charlen /* invalid multibyte code */
1228 ))
1229 boyer_moore_ok = 0;
1230
aff2ce94
RS
1231 TRANSLATE (inverse, inverse_trt, c);
1232
facdc750
RS
1233 /* Did this char actually get translated?
1234 Would any other char get translated into it? */
aff2ce94 1235 if (translated != c || inverse != c)
facdc750
RS
1236 {
1237 /* Keep track of which character set row
1238 contains the characters that need translation. */
5ffaf437 1239 int charset_base_code = c & ~CHAR_FIELD3_MASK;
facdc750
RS
1240 if (charset_base == -1)
1241 charset_base = charset_base_code;
1242 else if (charset_base != charset_base_code)
1243 /* If two different rows appear, needing translation,
1244 then we cannot use boyer_moore search. */
040272ce 1245 boyer_moore_ok = 0;
aff2ce94 1246 }
facdc750
RS
1247
1248 /* Store this character into the translated pattern. */
1249 bcopy (str, pat, charlen);
1250 pat += charlen;
1251 base_pat += in_charlen;
1252 len_byte -= in_charlen;
1253 }
1254 }
1255 else
1256 {
040272ce
KH
1257 /* Unibyte buffer. */
1258 charset_base = 0;
facdc750
RS
1259 while (--len >= 0)
1260 {
040272ce 1261 int c, translated;
facdc750
RS
1262
1263 /* If we got here and the RE flag is set, it's because we're
1264 dealing with a regexp known to be trivial, so the backslash
1265 just quotes the next character. */
1266 if (RE && *base_pat == '\\')
1267 {
1268 len--;
1269 base_pat++;
1270 }
1271 c = *base_pat++;
aff2ce94 1272 TRANSLATE (translated, trt, c);
facdc750
RS
1273 *pat++ = translated;
1274 }
1275 }
1276
1277 len_byte = pat - patbuf;
1278 len = raw_pattern_size;
1279 pat = base_pat = patbuf;
1280
040272ce 1281 if (boyer_moore_ok)
facdc750 1282 return boyer_moore (n, pat, len, len_byte, trt, inverse_trt,
aff2ce94
RS
1283 pos, pos_byte, lim, lim_byte,
1284 charset_base);
facdc750
RS
1285 else
1286 return simple_search (n, pat, len, len_byte, trt,
1287 pos, pos_byte, lim, lim_byte);
1288 }
1289}
1290\f
1291/* Do a simple string search N times for the string PAT,
1292 whose length is LEN/LEN_BYTE,
1293 from buffer position POS/POS_BYTE until LIM/LIM_BYTE.
1294 TRT is the translation table.
f8bd51c4 1295
facdc750
RS
1296 Return the character position where the match is found.
1297 Otherwise, if M matches remained to be found, return -M.
f8bd51c4 1298
facdc750
RS
1299 This kind of search works regardless of what is in PAT and
1300 regardless of what is in TRT. It is used in cases where
1301 boyer_moore cannot work. */
1302
1303static int
1304simple_search (n, pat, len, len_byte, trt, pos, pos_byte, lim, lim_byte)
1305 int n;
1306 unsigned char *pat;
1307 int len, len_byte;
1308 Lisp_Object trt;
1309 int pos, pos_byte;
1310 int lim, lim_byte;
1311{
1312 int multibyte = ! NILP (current_buffer->enable_multibyte_characters);
ab228c24 1313 int forward = n > 0;
facdc750
RS
1314
1315 if (lim > pos && multibyte)
1316 while (n > 0)
1317 {
1318 while (1)
f8bd51c4 1319 {
facdc750
RS
1320 /* Try matching at position POS. */
1321 int this_pos = pos;
1322 int this_pos_byte = pos_byte;
1323 int this_len = len;
1324 int this_len_byte = len_byte;
1325 unsigned char *p = pat;
1326 if (pos + len > lim)
1327 goto stop;
1328
1329 while (this_len > 0)
1330 {
1331 int charlen, buf_charlen;
ab228c24 1332 int pat_ch, buf_ch;
facdc750 1333
ab228c24 1334 pat_ch = STRING_CHAR_AND_LENGTH (p, this_len_byte, charlen);
facdc750
RS
1335 buf_ch = STRING_CHAR_AND_LENGTH (BYTE_POS_ADDR (this_pos_byte),
1336 ZV_BYTE - this_pos_byte,
1337 buf_charlen);
aff2ce94 1338 TRANSLATE (buf_ch, trt, buf_ch);
facdc750
RS
1339
1340 if (buf_ch != pat_ch)
1341 break;
ab228c24
RS
1342
1343 this_len_byte -= charlen;
1344 this_len--;
1345 p += charlen;
1346
1347 this_pos_byte += buf_charlen;
1348 this_pos++;
facdc750
RS
1349 }
1350
1351 if (this_len == 0)
1352 {
1353 pos += len;
1354 pos_byte += len_byte;
1355 break;
1356 }
1357
1358 INC_BOTH (pos, pos_byte);
f8bd51c4 1359 }
facdc750
RS
1360
1361 n--;
1362 }
1363 else if (lim > pos)
1364 while (n > 0)
1365 {
1366 while (1)
f8bd51c4 1367 {
facdc750
RS
1368 /* Try matching at position POS. */
1369 int this_pos = pos;
1370 int this_len = len;
1371 unsigned char *p = pat;
1372
1373 if (pos + len > lim)
1374 goto stop;
1375
1376 while (this_len > 0)
1377 {
1378 int pat_ch = *p++;
1379 int buf_ch = FETCH_BYTE (this_pos);
aff2ce94 1380 TRANSLATE (buf_ch, trt, buf_ch);
facdc750
RS
1381
1382 if (buf_ch != pat_ch)
1383 break;
ab228c24
RS
1384
1385 this_len--;
1386 this_pos++;
facdc750
RS
1387 }
1388
1389 if (this_len == 0)
1390 {
1391 pos += len;
1392 break;
1393 }
1394
1395 pos++;
f8bd51c4 1396 }
facdc750
RS
1397
1398 n--;
1399 }
1400 /* Backwards search. */
1401 else if (lim < pos && multibyte)
1402 while (n < 0)
1403 {
1404 while (1)
f8bd51c4 1405 {
facdc750
RS
1406 /* Try matching at position POS. */
1407 int this_pos = pos - len;
1408 int this_pos_byte = pos_byte - len_byte;
1409 int this_len = len;
1410 int this_len_byte = len_byte;
1411 unsigned char *p = pat;
1412
1413 if (pos - len < lim)
1414 goto stop;
1415
1416 while (this_len > 0)
1417 {
1418 int charlen, buf_charlen;
ab228c24 1419 int pat_ch, buf_ch;
facdc750 1420
ab228c24 1421 pat_ch = STRING_CHAR_AND_LENGTH (p, this_len_byte, charlen);
facdc750
RS
1422 buf_ch = STRING_CHAR_AND_LENGTH (BYTE_POS_ADDR (this_pos_byte),
1423 ZV_BYTE - this_pos_byte,
1424 buf_charlen);
aff2ce94 1425 TRANSLATE (buf_ch, trt, buf_ch);
facdc750
RS
1426
1427 if (buf_ch != pat_ch)
1428 break;
ab228c24
RS
1429
1430 this_len_byte -= charlen;
1431 this_len--;
1432 p += charlen;
1433 this_pos_byte += buf_charlen;
1434 this_pos++;
facdc750
RS
1435 }
1436
1437 if (this_len == 0)
1438 {
1439 pos -= len;
1440 pos_byte -= len_byte;
1441 break;
1442 }
1443
1444 DEC_BOTH (pos, pos_byte);
f8bd51c4
KH
1445 }
1446
facdc750
RS
1447 n++;
1448 }
1449 else if (lim < pos)
1450 while (n < 0)
1451 {
1452 while (1)
b6d6a51c 1453 {
facdc750
RS
1454 /* Try matching at position POS. */
1455 int this_pos = pos - len;
1456 int this_len = len;
1457 unsigned char *p = pat;
1458
1459 if (pos - len < lim)
1460 goto stop;
1461
1462 while (this_len > 0)
1463 {
1464 int pat_ch = *p++;
1465 int buf_ch = FETCH_BYTE (this_pos);
aff2ce94 1466 TRANSLATE (buf_ch, trt, buf_ch);
facdc750
RS
1467
1468 if (buf_ch != pat_ch)
1469 break;
ab228c24
RS
1470 this_len--;
1471 this_pos++;
facdc750
RS
1472 }
1473
1474 if (this_len == 0)
b6d6a51c 1475 {
facdc750
RS
1476 pos -= len;
1477 break;
b6d6a51c 1478 }
facdc750
RS
1479
1480 pos--;
b6d6a51c 1481 }
facdc750
RS
1482
1483 n++;
b6d6a51c 1484 }
facdc750
RS
1485
1486 stop:
1487 if (n == 0)
aff2ce94 1488 {
ab228c24
RS
1489 if (forward)
1490 set_search_regs ((multibyte ? pos_byte : pos) - len_byte, len_byte);
1491 else
1492 set_search_regs (multibyte ? pos_byte : pos, len_byte);
aff2ce94
RS
1493
1494 return pos;
1495 }
facdc750
RS
1496 else if (n > 0)
1497 return -n;
1498 else
1499 return n;
1500}
1501\f
1502/* Do Boyer-Moore search N times for the string PAT,
1503 whose length is LEN/LEN_BYTE,
1504 from buffer position POS/POS_BYTE until LIM/LIM_BYTE.
1505 DIRECTION says which direction we search in.
1506 TRT and INVERSE_TRT are translation tables.
1507
1508 This kind of search works if all the characters in PAT that have
1509 nontrivial translation are the same aside from the last byte. This
1510 makes it possible to translate just the last byte of a character,
1511 and do so after just a simple test of the context.
1512
1513 If that criterion is not satisfied, do not call this function. */
1514
1515static int
1516boyer_moore (n, base_pat, len, len_byte, trt, inverse_trt,
aff2ce94 1517 pos, pos_byte, lim, lim_byte, charset_base)
facdc750
RS
1518 int n;
1519 unsigned char *base_pat;
1520 int len, len_byte;
1521 Lisp_Object trt;
1522 Lisp_Object inverse_trt;
1523 int pos, pos_byte;
1524 int lim, lim_byte;
aff2ce94 1525 int charset_base;
facdc750
RS
1526{
1527 int direction = ((n > 0) ? 1 : -1);
1528 register int dirlen;
a968f437 1529 int infinity, limit, stride_for_teases = 0;
facdc750
RS
1530 register int *BM_tab;
1531 int *BM_tab_base;
1532 register unsigned char *cursor, *p_limit;
1533 register int i, j;
cb6792d2 1534 unsigned char *pat, *pat_end;
facdc750
RS
1535 int multibyte = ! NILP (current_buffer->enable_multibyte_characters);
1536
1537 unsigned char simple_translate[0400];
6bbd7a29
GM
1538 int translate_prev_byte = 0;
1539 int translate_anteprev_byte = 0;
facdc750
RS
1540
1541#ifdef C_ALLOCA
1542 int BM_tab_space[0400];
1543 BM_tab = &BM_tab_space[0];
1544#else
1545 BM_tab = (int *) alloca (0400 * sizeof (int));
1546#endif
1547 /* The general approach is that we are going to maintain that we know */
1548 /* the first (closest to the present position, in whatever direction */
1549 /* we're searching) character that could possibly be the last */
1550 /* (furthest from present position) character of a valid match. We */
1551 /* advance the state of our knowledge by looking at that character */
1552 /* and seeing whether it indeed matches the last character of the */
1553 /* pattern. If it does, we take a closer look. If it does not, we */
1554 /* move our pointer (to putative last characters) as far as is */
1555 /* logically possible. This amount of movement, which I call a */
1556 /* stride, will be the length of the pattern if the actual character */
1557 /* appears nowhere in the pattern, otherwise it will be the distance */
1558 /* from the last occurrence of that character to the end of the */
1559 /* pattern. */
1560 /* As a coding trick, an enormous stride is coded into the table for */
1561 /* characters that match the last character. This allows use of only */
1562 /* a single test, a test for having gone past the end of the */
1563 /* permissible match region, to test for both possible matches (when */
1564 /* the stride goes past the end immediately) and failure to */
1565 /* match (where you get nudged past the end one stride at a time). */
1566
1567 /* Here we make a "mickey mouse" BM table. The stride of the search */
1568 /* is determined only by the last character of the putative match. */
1569 /* If that character does not match, we will stride the proper */
1570 /* distance to propose a match that superimposes it on the last */
1571 /* instance of a character that matches it (per trt), or misses */
1572 /* it entirely if there is none. */
1573
1574 dirlen = len_byte * direction;
1575 infinity = dirlen - (lim_byte + pos_byte + len_byte + len_byte) * direction;
cb6792d2
RS
1576
1577 /* Record position after the end of the pattern. */
1578 pat_end = base_pat + len_byte;
1579 /* BASE_PAT points to a character that we start scanning from.
1580 It is the first character in a forward search,
1581 the last character in a backward search. */
facdc750 1582 if (direction < 0)
cb6792d2
RS
1583 base_pat = pat_end - 1;
1584
facdc750
RS
1585 BM_tab_base = BM_tab;
1586 BM_tab += 0400;
1587 j = dirlen; /* to get it in a register */
1588 /* A character that does not appear in the pattern induces a */
1589 /* stride equal to the pattern length. */
1590 while (BM_tab_base != BM_tab)
1591 {
1592 *--BM_tab = j;
1593 *--BM_tab = j;
1594 *--BM_tab = j;
1595 *--BM_tab = j;
1596 }
1597
1598 /* We use this for translation, instead of TRT itself.
1599 We fill this in to handle the characters that actually
1600 occur in the pattern. Others don't matter anyway! */
1601 bzero (simple_translate, sizeof simple_translate);
1602 for (i = 0; i < 0400; i++)
1603 simple_translate[i] = i;
1604
1605 i = 0;
1606 while (i != infinity)
1607 {
cb6792d2 1608 unsigned char *ptr = base_pat + i;
facdc750
RS
1609 i += direction;
1610 if (i == dirlen)
1611 i = infinity;
1612 if (! NILP (trt))
ca1d1d23 1613 {
facdc750 1614 int ch;
aff2ce94 1615 int untranslated;
facdc750
RS
1616 int this_translated = 1;
1617
1618 if (multibyte
cb6792d2
RS
1619 /* Is *PTR the last byte of a character? */
1620 && (pat_end - ptr == 1 || CHAR_HEAD_P (ptr[1])))
ca1d1d23 1621 {
facdc750
RS
1622 unsigned char *charstart = ptr;
1623 while (! CHAR_HEAD_P (*charstart))
1624 charstart--;
aff2ce94 1625 untranslated = STRING_CHAR (charstart, ptr - charstart + 1);
6397418a 1626 if (charset_base == (untranslated & ~CHAR_FIELD3_MASK))
facdc750 1627 {
ab228c24 1628 TRANSLATE (ch, trt, untranslated);
aff2ce94
RS
1629 if (! CHAR_HEAD_P (*ptr))
1630 {
1631 translate_prev_byte = ptr[-1];
1632 if (! CHAR_HEAD_P (translate_prev_byte))
1633 translate_anteprev_byte = ptr[-2];
1634 }
facdc750 1635 }
aff2ce94 1636 else
ab228c24
RS
1637 {
1638 this_translated = 0;
1639 ch = *ptr;
1640 }
ca1d1d23 1641 }
facdc750 1642 else if (!multibyte)
aff2ce94 1643 TRANSLATE (ch, trt, *ptr);
ca1d1d23
JB
1644 else
1645 {
facdc750
RS
1646 ch = *ptr;
1647 this_translated = 0;
ca1d1d23 1648 }
facdc750 1649
ab228c24
RS
1650 if (ch > 0400)
1651 j = ((unsigned char) ch) | 0200;
1652 else
1653 j = (unsigned char) ch;
1654
facdc750
RS
1655 if (i == infinity)
1656 stride_for_teases = BM_tab[j];
ab228c24 1657
facdc750
RS
1658 BM_tab[j] = dirlen - i;
1659 /* A translation table is accompanied by its inverse -- see */
1660 /* comment following downcase_table for details */
1661 if (this_translated)
ab228c24
RS
1662 {
1663 int starting_ch = ch;
1664 int starting_j = j;
1665 while (1)
1666 {
1667 TRANSLATE (ch, inverse_trt, ch);
1668 if (ch > 0400)
1669 j = ((unsigned char) ch) | 0200;
1670 else
1671 j = (unsigned char) ch;
1672
1673 /* For all the characters that map into CH,
1674 set up simple_translate to map the last byte
1675 into STARTING_J. */
1676 simple_translate[j] = starting_j;
1677 if (ch == starting_ch)
1678 break;
1679 BM_tab[j] = dirlen - i;
1680 }
1681 }
facdc750
RS
1682 }
1683 else
1684 {
1685 j = *ptr;
1686
1687 if (i == infinity)
1688 stride_for_teases = BM_tab[j];
1689 BM_tab[j] = dirlen - i;
ca1d1d23 1690 }
facdc750
RS
1691 /* stride_for_teases tells how much to stride if we get a */
1692 /* match on the far character but are subsequently */
1693 /* disappointed, by recording what the stride would have been */
1694 /* for that character if the last character had been */
1695 /* different. */
1696 }
1697 infinity = dirlen - infinity;
1698 pos_byte += dirlen - ((direction > 0) ? direction : 0);
1699 /* loop invariant - POS_BYTE points at where last char (first
1700 char if reverse) of pattern would align in a possible match. */
1701 while (n != 0)
1702 {
1703 int tail_end;
1704 unsigned char *tail_end_ptr;
1705
1706 /* It's been reported that some (broken) compiler thinks that
1707 Boolean expressions in an arithmetic context are unsigned.
1708 Using an explicit ?1:0 prevents this. */
1709 if ((lim_byte - pos_byte - ((direction > 0) ? 1 : 0)) * direction
1710 < 0)
1711 return (n * (0 - direction));
1712 /* First we do the part we can by pointers (maybe nothing) */
1713 QUIT;
1714 pat = base_pat;
1715 limit = pos_byte - dirlen + direction;
67ce527d
KH
1716 if (direction > 0)
1717 {
1718 limit = BUFFER_CEILING_OF (limit);
1719 /* LIMIT is now the last (not beyond-last!) value POS_BYTE
1720 can take on without hitting edge of buffer or the gap. */
1721 limit = min (limit, pos_byte + 20000);
1722 limit = min (limit, lim_byte - 1);
1723 }
1724 else
1725 {
1726 limit = BUFFER_FLOOR_OF (limit);
1727 /* LIMIT is now the last (not beyond-last!) value POS_BYTE
1728 can take on without hitting edge of buffer or the gap. */
1729 limit = max (limit, pos_byte - 20000);
1730 limit = max (limit, lim_byte);
1731 }
facdc750
RS
1732 tail_end = BUFFER_CEILING_OF (pos_byte) + 1;
1733 tail_end_ptr = BYTE_POS_ADDR (tail_end);
1734
1735 if ((limit - pos_byte) * direction > 20)
ca1d1d23 1736 {
facdc750
RS
1737 unsigned char *p2;
1738
1739 p_limit = BYTE_POS_ADDR (limit);
1740 p2 = (cursor = BYTE_POS_ADDR (pos_byte));
1741 /* In this loop, pos + cursor - p2 is the surrogate for pos */
1742 while (1) /* use one cursor setting as long as i can */
ca1d1d23 1743 {
facdc750 1744 if (direction > 0) /* worth duplicating */
ca1d1d23 1745 {
facdc750
RS
1746 /* Use signed comparison if appropriate
1747 to make cursor+infinity sure to be > p_limit.
1748 Assuming that the buffer lies in a range of addresses
1749 that are all "positive" (as ints) or all "negative",
1750 either kind of comparison will work as long
1751 as we don't step by infinity. So pick the kind
1752 that works when we do step by infinity. */
1753 if ((EMACS_INT) (p_limit + infinity) > (EMACS_INT) p_limit)
1754 while ((EMACS_INT) cursor <= (EMACS_INT) p_limit)
1755 cursor += BM_tab[*cursor];
ca1d1d23 1756 else
facdc750
RS
1757 while ((EMACS_UINT) cursor <= (EMACS_UINT) p_limit)
1758 cursor += BM_tab[*cursor];
1759 }
1760 else
1761 {
1762 if ((EMACS_INT) (p_limit + infinity) < (EMACS_INT) p_limit)
1763 while ((EMACS_INT) cursor >= (EMACS_INT) p_limit)
1764 cursor += BM_tab[*cursor];
1765 else
1766 while ((EMACS_UINT) cursor >= (EMACS_UINT) p_limit)
1767 cursor += BM_tab[*cursor];
1768 }
ca1d1d23 1769/* If you are here, cursor is beyond the end of the searched region. */
facdc750
RS
1770/* This can happen if you match on the far character of the pattern, */
1771/* because the "stride" of that character is infinity, a number able */
1772/* to throw you well beyond the end of the search. It can also */
1773/* happen if you fail to match within the permitted region and would */
1774/* otherwise try a character beyond that region */
1775 if ((cursor - p_limit) * direction <= len_byte)
1776 break; /* a small overrun is genuine */
1777 cursor -= infinity; /* large overrun = hit */
1778 i = dirlen - direction;
1779 if (! NILP (trt))
1780 {
1781 while ((i -= direction) + direction != 0)
ca1d1d23 1782 {
facdc750
RS
1783 int ch;
1784 cursor -= direction;
1785 /* Translate only the last byte of a character. */
1786 if (! multibyte
1787 || ((cursor == tail_end_ptr
1788 || CHAR_HEAD_P (cursor[1]))
1789 && (CHAR_HEAD_P (cursor[0])
1790 || (translate_prev_byte == cursor[-1]
1791 && (CHAR_HEAD_P (translate_prev_byte)
1792 || translate_anteprev_byte == cursor[-2])))))
1793 ch = simple_translate[*cursor];
1794 else
1795 ch = *cursor;
1796 if (pat[i] != ch)
1797 break;
ca1d1d23 1798 }
facdc750
RS
1799 }
1800 else
1801 {
1802 while ((i -= direction) + direction != 0)
ca1d1d23 1803 {
facdc750
RS
1804 cursor -= direction;
1805 if (pat[i] != *cursor)
1806 break;
ca1d1d23 1807 }
facdc750
RS
1808 }
1809 cursor += dirlen - i - direction; /* fix cursor */
1810 if (i + direction == 0)
1811 {
1812 int position;
0c8533c6 1813
facdc750 1814 cursor -= direction;
1113d9db 1815
facdc750
RS
1816 position = pos_byte + cursor - p2 + ((direction > 0)
1817 ? 1 - len_byte : 0);
1818 set_search_regs (position, len_byte);
ca325161 1819
facdc750
RS
1820 if ((n -= direction) != 0)
1821 cursor += dirlen; /* to resume search */
ca1d1d23 1822 else
facdc750
RS
1823 return ((direction > 0)
1824 ? search_regs.end[0] : search_regs.start[0]);
ca1d1d23 1825 }
facdc750
RS
1826 else
1827 cursor += stride_for_teases; /* <sigh> we lose - */
ca1d1d23 1828 }
facdc750
RS
1829 pos_byte += cursor - p2;
1830 }
1831 else
1832 /* Now we'll pick up a clump that has to be done the hard */
1833 /* way because it covers a discontinuity */
1834 {
1835 limit = ((direction > 0)
1836 ? BUFFER_CEILING_OF (pos_byte - dirlen + 1)
1837 : BUFFER_FLOOR_OF (pos_byte - dirlen - 1));
1838 limit = ((direction > 0)
1839 ? min (limit + len_byte, lim_byte - 1)
1840 : max (limit - len_byte, lim_byte));
1841 /* LIMIT is now the last value POS_BYTE can have
1842 and still be valid for a possible match. */
1843 while (1)
ca1d1d23 1844 {
facdc750
RS
1845 /* This loop can be coded for space rather than */
1846 /* speed because it will usually run only once. */
1847 /* (the reach is at most len + 21, and typically */
1848 /* does not exceed len) */
1849 while ((limit - pos_byte) * direction >= 0)
1850 pos_byte += BM_tab[FETCH_BYTE (pos_byte)];
1851 /* now run the same tests to distinguish going off the */
1852 /* end, a match or a phony match. */
1853 if ((pos_byte - limit) * direction <= len_byte)
1854 break; /* ran off the end */
1855 /* Found what might be a match.
1856 Set POS_BYTE back to last (first if reverse) pos. */
1857 pos_byte -= infinity;
1858 i = dirlen - direction;
1859 while ((i -= direction) + direction != 0)
ca1d1d23 1860 {
facdc750
RS
1861 int ch;
1862 unsigned char *ptr;
1863 pos_byte -= direction;
1864 ptr = BYTE_POS_ADDR (pos_byte);
1865 /* Translate only the last byte of a character. */
1866 if (! multibyte
1867 || ((ptr == tail_end_ptr
1868 || CHAR_HEAD_P (ptr[1]))
1869 && (CHAR_HEAD_P (ptr[0])
1870 || (translate_prev_byte == ptr[-1]
1871 && (CHAR_HEAD_P (translate_prev_byte)
1872 || translate_anteprev_byte == ptr[-2])))))
1873 ch = simple_translate[*ptr];
1874 else
1875 ch = *ptr;
1876 if (pat[i] != ch)
1877 break;
1878 }
1879 /* Above loop has moved POS_BYTE part or all the way
1880 back to the first pos (last pos if reverse).
1881 Set it once again at the last (first if reverse) char. */
1882 pos_byte += dirlen - i- direction;
1883 if (i + direction == 0)
1884 {
1885 int position;
1886 pos_byte -= direction;
1113d9db 1887
facdc750 1888 position = pos_byte + ((direction > 0) ? 1 - len_byte : 0);
0c8533c6 1889
facdc750 1890 set_search_regs (position, len_byte);
ca325161 1891
facdc750
RS
1892 if ((n -= direction) != 0)
1893 pos_byte += dirlen; /* to resume search */
ca1d1d23 1894 else
facdc750
RS
1895 return ((direction > 0)
1896 ? search_regs.end[0] : search_regs.start[0]);
ca1d1d23 1897 }
facdc750
RS
1898 else
1899 pos_byte += stride_for_teases;
1900 }
1901 }
1902 /* We have done one clump. Can we continue? */
1903 if ((lim_byte - pos_byte) * direction < 0)
1904 return ((0 - n) * direction);
ca1d1d23 1905 }
facdc750 1906 return BYTE_TO_CHAR (pos_byte);
ca1d1d23 1907}
ca325161 1908
fa8ed3e0 1909/* Record beginning BEG_BYTE and end BEG_BYTE + NBYTES
a7e4cdde
RS
1910 for the overall match just found in the current buffer.
1911 Also clear out the match data for registers 1 and up. */
ca325161
RS
1912
1913static void
fa8ed3e0
RS
1914set_search_regs (beg_byte, nbytes)
1915 int beg_byte, nbytes;
ca325161 1916{
a7e4cdde
RS
1917 int i;
1918
ca325161
RS
1919 /* Make sure we have registers in which to store
1920 the match position. */
1921 if (search_regs.num_regs == 0)
1922 {
2d4a771a
RS
1923 search_regs.start = (regoff_t *) xmalloc (2 * sizeof (regoff_t));
1924 search_regs.end = (regoff_t *) xmalloc (2 * sizeof (regoff_t));
487282dc 1925 search_regs.num_regs = 2;
ca325161
RS
1926 }
1927
a7e4cdde
RS
1928 /* Clear out the other registers. */
1929 for (i = 1; i < search_regs.num_regs; i++)
1930 {
1931 search_regs.start[i] = -1;
1932 search_regs.end[i] = -1;
1933 }
1934
fa8ed3e0
RS
1935 search_regs.start[0] = BYTE_TO_CHAR (beg_byte);
1936 search_regs.end[0] = BYTE_TO_CHAR (beg_byte + nbytes);
a3668d92 1937 XSETBUFFER (last_thing_searched, current_buffer);
ca325161 1938}
ca1d1d23
JB
1939\f
1940/* Given a string of words separated by word delimiters,
1941 compute a regexp that matches those exact words
1942 separated by arbitrary punctuation. */
1943
1944static Lisp_Object
1945wordify (string)
1946 Lisp_Object string;
1947{
1948 register unsigned char *p, *o;
0c8533c6 1949 register int i, i_byte, len, punct_count = 0, word_count = 0;
ca1d1d23 1950 Lisp_Object val;
0c8533c6
RS
1951 int prev_c = 0;
1952 int adjust;
ca1d1d23
JB
1953
1954 CHECK_STRING (string, 0);
1955 p = XSTRING (string)->data;
1956 len = XSTRING (string)->size;
1957
0c8533c6
RS
1958 for (i = 0, i_byte = 0; i < len; )
1959 {
1960 int c;
1961
eb99a8dd 1962 FETCH_STRING_CHAR_ADVANCE (c, string, i, i_byte);
0c8533c6
RS
1963
1964 if (SYNTAX (c) != Sword)
1965 {
1966 punct_count++;
1967 if (i > 0 && SYNTAX (prev_c) == Sword)
1968 word_count++;
1969 }
ca1d1d23 1970
0c8533c6
RS
1971 prev_c = c;
1972 }
1973
1974 if (SYNTAX (prev_c) == Sword)
1975 word_count++;
1976 if (!word_count)
1977 return build_string ("");
1978
1979 adjust = - punct_count + 5 * (word_count - 1) + 4;
8a2df937
RS
1980 if (STRING_MULTIBYTE (string))
1981 val = make_uninit_multibyte_string (len + adjust,
1982 STRING_BYTES (XSTRING (string))
1983 + adjust);
1984 else
1985 val = make_uninit_string (len + adjust);
ca1d1d23
JB
1986
1987 o = XSTRING (val)->data;
1988 *o++ = '\\';
1989 *o++ = 'b';
1e9582d4 1990 prev_c = 0;
ca1d1d23 1991
1e9582d4
RS
1992 for (i = 0, i_byte = 0; i < len; )
1993 {
1994 int c;
1995 int i_byte_orig = i_byte;
1996
eb99a8dd 1997 FETCH_STRING_CHAR_ADVANCE (c, string, i, i_byte);
1e9582d4
RS
1998
1999 if (SYNTAX (c) == Sword)
2000 {
2001 bcopy (&XSTRING (string)->data[i_byte_orig], o,
2002 i_byte - i_byte_orig);
2003 o += i_byte - i_byte_orig;
2004 }
2005 else if (i > 0 && SYNTAX (prev_c) == Sword && --word_count)
2006 {
2007 *o++ = '\\';
2008 *o++ = 'W';
2009 *o++ = '\\';
2010 *o++ = 'W';
2011 *o++ = '*';
2012 }
2013
2014 prev_c = c;
2015 }
ca1d1d23
JB
2016
2017 *o++ = '\\';
2018 *o++ = 'b';
2019
2020 return val;
2021}
2022\f
2023DEFUN ("search-backward", Fsearch_backward, Ssearch_backward, 1, 4,
6af43974 2024 "MSearch backward: ",
ca1d1d23
JB
2025 "Search backward from point for STRING.\n\
2026Set point to the beginning of the occurrence found, and return point.\n\
2027An optional second argument bounds the search; it is a buffer position.\n\
2028The match found must not extend before that position.\n\
2029Optional third argument, if t, means if fail just return nil (no error).\n\
2030 If not nil and not t, position at limit of search and return nil.\n\
2031Optional fourth argument is repeat count--search for successive occurrences.\n\
d594a73b
EZ
2032\n\
2033Search case-sensitivity is determined by the value of the variable\n\
2034`case-fold-search', which see.\n\
2035\n\
ca1d1d23
JB
2036See also the functions `match-beginning', `match-end' and `replace-match'.")
2037 (string, bound, noerror, count)
2038 Lisp_Object string, bound, noerror, count;
2039{
b819a390 2040 return search_command (string, bound, noerror, count, -1, 0, 0);
ca1d1d23
JB
2041}
2042
6af43974 2043DEFUN ("search-forward", Fsearch_forward, Ssearch_forward, 1, 4, "MSearch: ",
ca1d1d23
JB
2044 "Search forward from point for STRING.\n\
2045Set point to the end of the occurrence found, and return point.\n\
2046An optional second argument bounds the search; it is a buffer position.\n\
2047The match found must not extend after that position. nil is equivalent\n\
2048 to (point-max).\n\
2049Optional third argument, if t, means if fail just return nil (no error).\n\
2050 If not nil and not t, move to limit of search and return nil.\n\
2051Optional fourth argument is repeat count--search for successive occurrences.\n\
d594a73b
EZ
2052\n\
2053Search case-sensitivity is determined by the value of the variable\n\
2054`case-fold-search', which see.\n\
2055\n\
ca1d1d23
JB
2056See also the functions `match-beginning', `match-end' and `replace-match'.")
2057 (string, bound, noerror, count)
2058 Lisp_Object string, bound, noerror, count;
2059{
b819a390 2060 return search_command (string, bound, noerror, count, 1, 0, 0);
ca1d1d23
JB
2061}
2062
2063DEFUN ("word-search-backward", Fword_search_backward, Sword_search_backward, 1, 4,
2064 "sWord search backward: ",
2065 "Search backward from point for STRING, ignoring differences in punctuation.\n\
2066Set point to the beginning of the occurrence found, and return point.\n\
2067An optional second argument bounds the search; it is a buffer position.\n\
2068The match found must not extend before that position.\n\
2069Optional third argument, if t, means if fail just return nil (no error).\n\
2070 If not nil and not t, move to limit of search and return nil.\n\
2071Optional fourth argument is repeat count--search for successive occurrences.")
2072 (string, bound, noerror, count)
2073 Lisp_Object string, bound, noerror, count;
2074{
b819a390 2075 return search_command (wordify (string), bound, noerror, count, -1, 1, 0);
ca1d1d23
JB
2076}
2077
2078DEFUN ("word-search-forward", Fword_search_forward, Sword_search_forward, 1, 4,
2079 "sWord search: ",
2080 "Search forward from point for STRING, ignoring differences in punctuation.\n\
2081Set point to the end of the occurrence found, and return point.\n\
2082An optional second argument bounds the search; it is a buffer position.\n\
2083The match found must not extend after that position.\n\
2084Optional third argument, if t, means if fail just return nil (no error).\n\
2085 If not nil and not t, move to limit of search and return nil.\n\
2086Optional fourth argument is repeat count--search for successive occurrences.")
2087 (string, bound, noerror, count)
2088 Lisp_Object string, bound, noerror, count;
2089{
b819a390 2090 return search_command (wordify (string), bound, noerror, count, 1, 1, 0);
ca1d1d23
JB
2091}
2092
2093DEFUN ("re-search-backward", Fre_search_backward, Sre_search_backward, 1, 4,
2094 "sRE search backward: ",
2095 "Search backward from point for match for regular expression REGEXP.\n\
2096Set point to the beginning of the match, and return point.\n\
2097The match found is the one starting last in the buffer\n\
19c0a730 2098and yet ending before the origin of the search.\n\
ca1d1d23
JB
2099An optional second argument bounds the search; it is a buffer position.\n\
2100The match found must start at or after that position.\n\
2101Optional third argument, if t, means if fail just return nil (no error).\n\
2102 If not nil and not t, move to limit of search and return nil.\n\
2103Optional fourth argument is repeat count--search for successive occurrences.\n\
9dddb23f 2104See also the functions `match-beginning', `match-end', `match-string',\n\
cb6560a1 2105and `replace-match'.")
19c0a730
KH
2106 (regexp, bound, noerror, count)
2107 Lisp_Object regexp, bound, noerror, count;
ca1d1d23 2108{
b819a390 2109 return search_command (regexp, bound, noerror, count, -1, 1, 0);
ca1d1d23
JB
2110}
2111
2112DEFUN ("re-search-forward", Fre_search_forward, Sre_search_forward, 1, 4,
2113 "sRE search: ",
2114 "Search forward from point for regular expression REGEXP.\n\
2115Set point to the end of the occurrence found, and return point.\n\
2116An optional second argument bounds the search; it is a buffer position.\n\
2117The match found must not extend after that position.\n\
2118Optional third argument, if t, means if fail just return nil (no error).\n\
2119 If not nil and not t, move to limit of search and return nil.\n\
2120Optional fourth argument is repeat count--search for successive occurrences.\n\
9dddb23f 2121See also the functions `match-beginning', `match-end', `match-string',\n\
cb6560a1 2122and `replace-match'.")
19c0a730
KH
2123 (regexp, bound, noerror, count)
2124 Lisp_Object regexp, bound, noerror, count;
ca1d1d23 2125{
b819a390
RS
2126 return search_command (regexp, bound, noerror, count, 1, 1, 0);
2127}
2128
2129DEFUN ("posix-search-backward", Fposix_search_backward, Sposix_search_backward, 1, 4,
2130 "sPosix search backward: ",
2131 "Search backward from point for match for regular expression REGEXP.\n\
2132Find the longest match in accord with Posix regular expression rules.\n\
2133Set point to the beginning of the match, and return point.\n\
2134The match found is the one starting last in the buffer\n\
2135and yet ending before the origin of the search.\n\
2136An optional second argument bounds the search; it is a buffer position.\n\
2137The match found must start at or after that position.\n\
2138Optional third argument, if t, means if fail just return nil (no error).\n\
2139 If not nil and not t, move to limit of search and return nil.\n\
2140Optional fourth argument is repeat count--search for successive occurrences.\n\
9dddb23f 2141See also the functions `match-beginning', `match-end', `match-string',\n\
cb6560a1 2142and `replace-match'.")
b819a390
RS
2143 (regexp, bound, noerror, count)
2144 Lisp_Object regexp, bound, noerror, count;
2145{
2146 return search_command (regexp, bound, noerror, count, -1, 1, 1);
2147}
2148
2149DEFUN ("posix-search-forward", Fposix_search_forward, Sposix_search_forward, 1, 4,
2150 "sPosix search: ",
2151 "Search forward from point for regular expression REGEXP.\n\
2152Find the longest match in accord with Posix regular expression rules.\n\
2153Set point to the end of the occurrence found, and return point.\n\
2154An optional second argument bounds the search; it is a buffer position.\n\
2155The match found must not extend after that position.\n\
2156Optional third argument, if t, means if fail just return nil (no error).\n\
2157 If not nil and not t, move to limit of search and return nil.\n\
2158Optional fourth argument is repeat count--search for successive occurrences.\n\
9dddb23f 2159See also the functions `match-beginning', `match-end', `match-string',\n\
cb6560a1 2160and `replace-match'.")
b819a390
RS
2161 (regexp, bound, noerror, count)
2162 Lisp_Object regexp, bound, noerror, count;
2163{
2164 return search_command (regexp, bound, noerror, count, 1, 1, 1);
ca1d1d23
JB
2165}
2166\f
d7a5ad5f 2167DEFUN ("replace-match", Freplace_match, Sreplace_match, 1, 5, 0,
ca1d1d23
JB
2168 "Replace text matched by last search with NEWTEXT.\n\
2169If second arg FIXEDCASE is non-nil, do not alter case of replacement text.\n\
5b9cf4b2
RS
2170Otherwise maybe capitalize the whole text, or maybe just word initials,\n\
2171based on the replaced text.\n\
2172If the replaced text has only capital letters\n\
2173and has at least one multiletter word, convert NEWTEXT to all caps.\n\
2174If the replaced text has at least one word starting with a capital letter,\n\
2175then capitalize each word in NEWTEXT.\n\n\
ca1d1d23
JB
2176If third arg LITERAL is non-nil, insert NEWTEXT literally.\n\
2177Otherwise treat `\\' as special:\n\
2178 `\\&' in NEWTEXT means substitute original matched text.\n\
2179 `\\N' means substitute what matched the Nth `\\(...\\)'.\n\
2180 If Nth parens didn't match, substitute nothing.\n\
2181 `\\\\' means insert one `\\'.\n\
1113d9db 2182FIXEDCASE and LITERAL are optional arguments.\n\
080c45fd
RS
2183Leaves point at end of replacement text.\n\
2184\n\
2185The optional fourth argument STRING can be a string to modify.\n\
2186In that case, this function creates and returns a new string\n\
d7a5ad5f
RS
2187which is made by replacing the part of STRING that was matched.\n\
2188\n\
2189The optional fifth argument SUBEXP specifies a subexpression of the match.\n\
2190It says to replace just that subexpression instead of the whole match.\n\
2191This is useful only after a regular expression search or match\n\
2192since only regular expressions have distinguished subexpressions.")
2193 (newtext, fixedcase, literal, string, subexp)
2194 Lisp_Object newtext, fixedcase, literal, string, subexp;
ca1d1d23
JB
2195{
2196 enum { nochange, all_caps, cap_initial } case_action;
ac3b28b1 2197 register int pos, pos_byte;
ca1d1d23 2198 int some_multiletter_word;
97832bd0 2199 int some_lowercase;
73dc8771 2200 int some_uppercase;
208767c3 2201 int some_nonuppercase_initial;
ca1d1d23
JB
2202 register int c, prevc;
2203 int inslen;
d7a5ad5f 2204 int sub;
3e18eecf 2205 int opoint, newpoint;
ca1d1d23 2206
16fdc568 2207 CHECK_STRING (newtext, 0);
ca1d1d23 2208
080c45fd
RS
2209 if (! NILP (string))
2210 CHECK_STRING (string, 4);
2211
ca1d1d23
JB
2212 case_action = nochange; /* We tried an initialization */
2213 /* but some C compilers blew it */
4746118a
JB
2214
2215 if (search_regs.num_regs <= 0)
2216 error ("replace-match called before any match found");
2217
d7a5ad5f
RS
2218 if (NILP (subexp))
2219 sub = 0;
2220 else
2221 {
2222 CHECK_NUMBER (subexp, 3);
2223 sub = XINT (subexp);
2224 if (sub < 0 || sub >= search_regs.num_regs)
2225 args_out_of_range (subexp, make_number (search_regs.num_regs));
2226 }
2227
080c45fd
RS
2228 if (NILP (string))
2229 {
d7a5ad5f
RS
2230 if (search_regs.start[sub] < BEGV
2231 || search_regs.start[sub] > search_regs.end[sub]
2232 || search_regs.end[sub] > ZV)
2233 args_out_of_range (make_number (search_regs.start[sub]),
2234 make_number (search_regs.end[sub]));
080c45fd
RS
2235 }
2236 else
2237 {
d7a5ad5f
RS
2238 if (search_regs.start[sub] < 0
2239 || search_regs.start[sub] > search_regs.end[sub]
2240 || search_regs.end[sub] > XSTRING (string)->size)
2241 args_out_of_range (make_number (search_regs.start[sub]),
2242 make_number (search_regs.end[sub]));
080c45fd 2243 }
ca1d1d23
JB
2244
2245 if (NILP (fixedcase))
2246 {
2247 /* Decide how to casify by examining the matched text. */
ac3b28b1 2248 int last;
ca1d1d23 2249
ac3b28b1
KH
2250 pos = search_regs.start[sub];
2251 last = search_regs.end[sub];
fa8ed3e0
RS
2252
2253 if (NILP (string))
ac3b28b1 2254 pos_byte = CHAR_TO_BYTE (pos);
fa8ed3e0 2255 else
ac3b28b1 2256 pos_byte = string_char_to_byte (string, pos);
fa8ed3e0 2257
ca1d1d23
JB
2258 prevc = '\n';
2259 case_action = all_caps;
2260
2261 /* some_multiletter_word is set nonzero if any original word
2262 is more than one letter long. */
2263 some_multiletter_word = 0;
97832bd0 2264 some_lowercase = 0;
208767c3 2265 some_nonuppercase_initial = 0;
73dc8771 2266 some_uppercase = 0;
ca1d1d23 2267
ac3b28b1 2268 while (pos < last)
ca1d1d23 2269 {
080c45fd 2270 if (NILP (string))
ac3b28b1
KH
2271 {
2272 c = FETCH_CHAR (pos_byte);
2273 INC_BOTH (pos, pos_byte);
2274 }
080c45fd 2275 else
ac3b28b1 2276 FETCH_STRING_CHAR_ADVANCE (c, string, pos, pos_byte);
080c45fd 2277
ca1d1d23
JB
2278 if (LOWERCASEP (c))
2279 {
2280 /* Cannot be all caps if any original char is lower case */
2281
97832bd0 2282 some_lowercase = 1;
ca1d1d23 2283 if (SYNTAX (prevc) != Sword)
208767c3 2284 some_nonuppercase_initial = 1;
ca1d1d23
JB
2285 else
2286 some_multiletter_word = 1;
2287 }
2288 else if (!NOCASEP (c))
2289 {
73dc8771 2290 some_uppercase = 1;
97832bd0 2291 if (SYNTAX (prevc) != Sword)
c4d460ce 2292 ;
97832bd0 2293 else
ca1d1d23
JB
2294 some_multiletter_word = 1;
2295 }
208767c3
RS
2296 else
2297 {
2298 /* If the initial is a caseless word constituent,
2299 treat that like a lowercase initial. */
2300 if (SYNTAX (prevc) != Sword)
2301 some_nonuppercase_initial = 1;
2302 }
ca1d1d23
JB
2303
2304 prevc = c;
2305 }
2306
97832bd0
RS
2307 /* Convert to all caps if the old text is all caps
2308 and has at least one multiletter word. */
2309 if (! some_lowercase && some_multiletter_word)
2310 case_action = all_caps;
c4d460ce 2311 /* Capitalize each word, if the old text has all capitalized words. */
208767c3 2312 else if (!some_nonuppercase_initial && some_multiletter_word)
ca1d1d23 2313 case_action = cap_initial;
208767c3 2314 else if (!some_nonuppercase_initial && some_uppercase)
73dc8771
KH
2315 /* Should x -> yz, operating on X, give Yz or YZ?
2316 We'll assume the latter. */
2317 case_action = all_caps;
97832bd0
RS
2318 else
2319 case_action = nochange;
ca1d1d23
JB
2320 }
2321
080c45fd
RS
2322 /* Do replacement in a string. */
2323 if (!NILP (string))
2324 {
2325 Lisp_Object before, after;
2326
2327 before = Fsubstring (string, make_number (0),
d7a5ad5f
RS
2328 make_number (search_regs.start[sub]));
2329 after = Fsubstring (string, make_number (search_regs.end[sub]), Qnil);
080c45fd 2330
636a5e28
RS
2331 /* Substitute parts of the match into NEWTEXT
2332 if desired. */
080c45fd
RS
2333 if (NILP (literal))
2334 {
d131e79c
RS
2335 int lastpos = 0;
2336 int lastpos_byte = 0;
080c45fd
RS
2337 /* We build up the substituted string in ACCUM. */
2338 Lisp_Object accum;
2339 Lisp_Object middle;
ac3b28b1 2340 int length = STRING_BYTES (XSTRING (newtext));
080c45fd
RS
2341
2342 accum = Qnil;
2343
ac3b28b1 2344 for (pos_byte = 0, pos = 0; pos_byte < length;)
080c45fd
RS
2345 {
2346 int substart = -1;
6bbd7a29 2347 int subend = 0;
1e79ec24 2348 int delbackslash = 0;
080c45fd 2349
0c8533c6
RS
2350 FETCH_STRING_CHAR_ADVANCE (c, newtext, pos, pos_byte);
2351
080c45fd
RS
2352 if (c == '\\')
2353 {
0c8533c6 2354 FETCH_STRING_CHAR_ADVANCE (c, newtext, pos, pos_byte);
eb99a8dd 2355
080c45fd
RS
2356 if (c == '&')
2357 {
d7a5ad5f
RS
2358 substart = search_regs.start[sub];
2359 subend = search_regs.end[sub];
080c45fd
RS
2360 }
2361 else if (c >= '1' && c <= '9' && c <= search_regs.num_regs + '0')
2362 {
ad10348f 2363 if (search_regs.start[c - '0'] >= 0)
080c45fd
RS
2364 {
2365 substart = search_regs.start[c - '0'];
2366 subend = search_regs.end[c - '0'];
2367 }
2368 }
1e79ec24
KH
2369 else if (c == '\\')
2370 delbackslash = 1;
636a5e28
RS
2371 else
2372 error ("Invalid use of `\\' in replacement text");
080c45fd
RS
2373 }
2374 if (substart >= 0)
2375 {
d131e79c
RS
2376 if (pos - 2 != lastpos)
2377 middle = substring_both (newtext, lastpos,
2378 lastpos_byte,
2379 pos - 2, pos_byte - 2);
080c45fd
RS
2380 else
2381 middle = Qnil;
2382 accum = concat3 (accum, middle,
0c8533c6
RS
2383 Fsubstring (string,
2384 make_number (substart),
080c45fd
RS
2385 make_number (subend)));
2386 lastpos = pos;
0c8533c6 2387 lastpos_byte = pos_byte;
080c45fd 2388 }
1e79ec24
KH
2389 else if (delbackslash)
2390 {
d131e79c
RS
2391 middle = substring_both (newtext, lastpos,
2392 lastpos_byte,
2393 pos - 1, pos_byte - 1);
0c8533c6 2394
1e79ec24
KH
2395 accum = concat2 (accum, middle);
2396 lastpos = pos;
0c8533c6 2397 lastpos_byte = pos_byte;
1e79ec24 2398 }
080c45fd
RS
2399 }
2400
d131e79c
RS
2401 if (pos != lastpos)
2402 middle = substring_both (newtext, lastpos,
2403 lastpos_byte,
0c8533c6 2404 pos, pos_byte);
080c45fd
RS
2405 else
2406 middle = Qnil;
2407
2408 newtext = concat2 (accum, middle);
2409 }
2410
636a5e28 2411 /* Do case substitution in NEWTEXT if desired. */
080c45fd
RS
2412 if (case_action == all_caps)
2413 newtext = Fupcase (newtext);
2414 else if (case_action == cap_initial)
2b2eead9 2415 newtext = Fupcase_initials (newtext);
080c45fd
RS
2416
2417 return concat3 (before, newtext, after);
2418 }
2419
b0eba991 2420 /* Record point, the move (quietly) to the start of the match. */
9160906f 2421 if (PT >= search_regs.end[sub])
b0eba991 2422 opoint = PT - ZV;
9160906f
RS
2423 else if (PT > search_regs.start[sub])
2424 opoint = search_regs.end[sub] - ZV;
b0eba991
RS
2425 else
2426 opoint = PT;
2427
fa8ed3e0 2428 TEMP_SET_PT (search_regs.start[sub]);
b0eba991 2429
9a76659d
JB
2430 /* We insert the replacement text before the old text, and then
2431 delete the original text. This means that markers at the
2432 beginning or end of the original will float to the corresponding
2433 position in the replacement. */
ca1d1d23 2434 if (!NILP (literal))
16fdc568 2435 Finsert_and_inherit (1, &newtext);
ca1d1d23
JB
2436 else
2437 {
ac3b28b1 2438 int length = STRING_BYTES (XSTRING (newtext));
68e69fbd
RS
2439 unsigned char *substed;
2440 int substed_alloc_size, substed_len;
3bc25e52
KH
2441 int buf_multibyte = !NILP (current_buffer->enable_multibyte_characters);
2442 int str_multibyte = STRING_MULTIBYTE (newtext);
2443 Lisp_Object rev_tbl;
2444
2445 rev_tbl= (!buf_multibyte && CHAR_TABLE_P (Vnonascii_translation_table)
2446 ? Fchar_table_extra_slot (Vnonascii_translation_table,
2447 make_number (0))
2448 : Qnil);
ac3b28b1 2449
68e69fbd
RS
2450 substed_alloc_size = length * 2 + 100;
2451 substed = (unsigned char *) xmalloc (substed_alloc_size + 1);
2452 substed_len = 0;
2453
3bc25e52
KH
2454 /* Go thru NEWTEXT, producing the actual text to insert in
2455 SUBSTED while adjusting multibyteness to that of the current
2456 buffer. */
ca1d1d23 2457
ac3b28b1 2458 for (pos_byte = 0, pos = 0; pos_byte < length;)
ca1d1d23 2459 {
68e69fbd 2460 unsigned char str[MAX_MULTIBYTE_LENGTH];
f8ce8a0d
GM
2461 unsigned char *add_stuff = NULL;
2462 int add_len = 0;
68e69fbd 2463 int idx = -1;
9a76659d 2464
3bc25e52
KH
2465 if (str_multibyte)
2466 {
eb99a8dd 2467 FETCH_STRING_CHAR_ADVANCE_NO_CHECK (c, newtext, pos, pos_byte);
3bc25e52
KH
2468 if (!buf_multibyte)
2469 c = multibyte_char_to_unibyte (c, rev_tbl);
2470 }
2471 else
2472 {
2473 /* Note that we don't have to increment POS. */
2474 c = XSTRING (newtext)->data[pos_byte++];
2475 if (buf_multibyte)
2476 c = unibyte_char_to_multibyte (c);
2477 }
ac3b28b1 2478
68e69fbd
RS
2479 /* Either set ADD_STUFF and ADD_LEN to the text to put in SUBSTED,
2480 or set IDX to a match index, which means put that part
2481 of the buffer text into SUBSTED. */
2482
ca1d1d23
JB
2483 if (c == '\\')
2484 {
3bc25e52
KH
2485 if (str_multibyte)
2486 {
eb99a8dd
KH
2487 FETCH_STRING_CHAR_ADVANCE_NO_CHECK (c, newtext,
2488 pos, pos_byte);
3bc25e52
KH
2489 if (!buf_multibyte && !SINGLE_BYTE_CHAR_P (c))
2490 c = multibyte_char_to_unibyte (c, rev_tbl);
2491 }
2492 else
2493 {
2494 c = XSTRING (newtext)->data[pos_byte++];
2495 if (buf_multibyte)
2496 c = unibyte_char_to_multibyte (c);
2497 }
2498
ca1d1d23 2499 if (c == '&')
68e69fbd 2500 idx = sub;
78445046 2501 else if (c >= '1' && c <= '9' && c <= search_regs.num_regs + '0')
ca1d1d23
JB
2502 {
2503 if (search_regs.start[c - '0'] >= 1)
68e69fbd 2504 idx = c - '0';
ca1d1d23 2505 }
636a5e28 2506 else if (c == '\\')
68e69fbd 2507 add_len = 1, add_stuff = "\\";
636a5e28 2508 else
3bc25e52
KH
2509 {
2510 xfree (substed);
2511 error ("Invalid use of `\\' in replacement text");
2512 }
ca1d1d23
JB
2513 }
2514 else
68e69fbd
RS
2515 {
2516 add_len = CHAR_STRING (c, str);
2517 add_stuff = str;
2518 }
2519
2520 /* If we want to copy part of a previous match,
2521 set up ADD_STUFF and ADD_LEN to point to it. */
2522 if (idx >= 0)
2523 {
2524 int begbyte = CHAR_TO_BYTE (search_regs.start[idx]);
2525 add_len = CHAR_TO_BYTE (search_regs.end[idx]) - begbyte;
2526 if (search_regs.start[idx] < GPT && GPT < search_regs.end[idx])
2527 move_gap (search_regs.start[idx]);
2528 add_stuff = BYTE_POS_ADDR (begbyte);
2529 }
2530
2531 /* Now the stuff we want to add to SUBSTED
2532 is invariably ADD_LEN bytes starting at ADD_STUFF. */
2533
2534 /* Make sure SUBSTED is big enough. */
2535 if (substed_len + add_len >= substed_alloc_size)
2536 {
2537 substed_alloc_size = substed_len + add_len + 500;
2538 substed = (unsigned char *) xrealloc (substed,
2539 substed_alloc_size + 1);
2540 }
2541
2542 /* Now add to the end of SUBSTED. */
f8ce8a0d
GM
2543 if (add_stuff)
2544 {
2545 bcopy (add_stuff, substed + substed_len, add_len);
2546 substed_len += add_len;
2547 }
ca1d1d23 2548 }
68e69fbd
RS
2549
2550 /* Now insert what we accumulated. */
2551 insert_and_inherit (substed, substed_len);
2552
2553 xfree (substed);
ca1d1d23
JB
2554 }
2555
6ec8bbd2 2556 inslen = PT - (search_regs.start[sub]);
d7a5ad5f 2557 del_range (search_regs.start[sub] + inslen, search_regs.end[sub] + inslen);
ca1d1d23
JB
2558
2559 if (case_action == all_caps)
6ec8bbd2 2560 Fupcase_region (make_number (PT - inslen), make_number (PT));
ca1d1d23 2561 else if (case_action == cap_initial)
6ec8bbd2 2562 Fupcase_initials_region (make_number (PT - inslen), make_number (PT));
b0eba991 2563
3e18eecf
RS
2564 newpoint = PT;
2565
b0eba991 2566 /* Put point back where it was in the text. */
8d808a65 2567 if (opoint <= 0)
fa8ed3e0 2568 TEMP_SET_PT (opoint + ZV);
b0eba991 2569 else
fa8ed3e0 2570 TEMP_SET_PT (opoint);
b0eba991
RS
2571
2572 /* Now move point "officially" to the start of the inserted replacement. */
3e18eecf 2573 move_if_not_intangible (newpoint);
b0eba991 2574
ca1d1d23
JB
2575 return Qnil;
2576}
2577\f
2578static Lisp_Object
2579match_limit (num, beginningp)
2580 Lisp_Object num;
2581 int beginningp;
2582{
2583 register int n;
2584
2585 CHECK_NUMBER (num, 0);
2586 n = XINT (num);
4746118a
JB
2587 if (n < 0 || n >= search_regs.num_regs)
2588 args_out_of_range (num, make_number (search_regs.num_regs));
2589 if (search_regs.num_regs <= 0
2590 || search_regs.start[n] < 0)
ca1d1d23
JB
2591 return Qnil;
2592 return (make_number ((beginningp) ? search_regs.start[n]
2593 : search_regs.end[n]));
2594}
2595
2596DEFUN ("match-beginning", Fmatch_beginning, Smatch_beginning, 1, 1, 0,
2597 "Return position of start of text matched by last search.\n\
5806161b
EN
2598SUBEXP, a number, specifies which parenthesized expression in the last\n\
2599 regexp.\n\
2600Value is nil if SUBEXPth pair didn't match, or there were less than\n\
2601 SUBEXP pairs.\n\
ca1d1d23 2602Zero means the entire text matched by the whole regexp or whole string.")
5806161b
EN
2603 (subexp)
2604 Lisp_Object subexp;
ca1d1d23 2605{
5806161b 2606 return match_limit (subexp, 1);
ca1d1d23
JB
2607}
2608
2609DEFUN ("match-end", Fmatch_end, Smatch_end, 1, 1, 0,
2610 "Return position of end of text matched by last search.\n\
5806161b
EN
2611SUBEXP, a number, specifies which parenthesized expression in the last\n\
2612 regexp.\n\
2613Value is nil if SUBEXPth pair didn't match, or there were less than\n\
2614 SUBEXP pairs.\n\
ca1d1d23 2615Zero means the entire text matched by the whole regexp or whole string.")
5806161b
EN
2616 (subexp)
2617 Lisp_Object subexp;
ca1d1d23 2618{
5806161b 2619 return match_limit (subexp, 0);
ca1d1d23
JB
2620}
2621
56256c2a 2622DEFUN ("match-data", Fmatch_data, Smatch_data, 0, 2, 0,
ca1d1d23
JB
2623 "Return a list containing all info on what the last search matched.\n\
2624Element 2N is `(match-beginning N)'; element 2N + 1 is `(match-end N)'.\n\
2625All the elements are markers or nil (nil if the Nth pair didn't match)\n\
2626if the last match was on a buffer; integers or nil if a string was matched.\n\
56256c2a
RS
2627Use `store-match-data' to reinstate the data in this list.\n\
2628\n\
2629If INTEGERS (the optional first argument) is non-nil, always use integers\n\
8ca821e9 2630\(rather than markers) to represent buffer positions.\n\
56256c2a
RS
2631If REUSE is a list, reuse it as part of the value. If REUSE is long enough\n\
2632to hold all the values, and if INTEGERS is non-nil, no consing is done.")
2633 (integers, reuse)
2634 Lisp_Object integers, reuse;
ca1d1d23 2635{
56256c2a 2636 Lisp_Object tail, prev;
4746118a 2637 Lisp_Object *data;
ca1d1d23
JB
2638 int i, len;
2639
daa37602 2640 if (NILP (last_thing_searched))
c36bcf1b 2641 return Qnil;
daa37602 2642
6bbd7a29
GM
2643 prev = Qnil;
2644
4746118a
JB
2645 data = (Lisp_Object *) alloca ((2 * search_regs.num_regs)
2646 * sizeof (Lisp_Object));
2647
ca1d1d23 2648 len = -1;
4746118a 2649 for (i = 0; i < search_regs.num_regs; i++)
ca1d1d23
JB
2650 {
2651 int start = search_regs.start[i];
2652 if (start >= 0)
2653 {
56256c2a
RS
2654 if (EQ (last_thing_searched, Qt)
2655 || ! NILP (integers))
ca1d1d23 2656 {
c235cce7
KH
2657 XSETFASTINT (data[2 * i], start);
2658 XSETFASTINT (data[2 * i + 1], search_regs.end[i]);
ca1d1d23 2659 }
0ed62dc7 2660 else if (BUFFERP (last_thing_searched))
ca1d1d23
JB
2661 {
2662 data[2 * i] = Fmake_marker ();
daa37602
JB
2663 Fset_marker (data[2 * i],
2664 make_number (start),
2665 last_thing_searched);
ca1d1d23
JB
2666 data[2 * i + 1] = Fmake_marker ();
2667 Fset_marker (data[2 * i + 1],
daa37602
JB
2668 make_number (search_regs.end[i]),
2669 last_thing_searched);
ca1d1d23 2670 }
daa37602
JB
2671 else
2672 /* last_thing_searched must always be Qt, a buffer, or Qnil. */
2673 abort ();
2674
ca1d1d23
JB
2675 len = i;
2676 }
2677 else
2678 data[2 * i] = data [2 * i + 1] = Qnil;
2679 }
56256c2a
RS
2680
2681 /* If REUSE is not usable, cons up the values and return them. */
2682 if (! CONSP (reuse))
2683 return Flist (2 * len + 2, data);
2684
2685 /* If REUSE is a list, store as many value elements as will fit
2686 into the elements of REUSE. */
2687 for (i = 0, tail = reuse; CONSP (tail);
c1d497be 2688 i++, tail = XCDR (tail))
56256c2a
RS
2689 {
2690 if (i < 2 * len + 2)
c1d497be 2691 XCAR (tail) = data[i];
56256c2a 2692 else
c1d497be 2693 XCAR (tail) = Qnil;
56256c2a
RS
2694 prev = tail;
2695 }
2696
2697 /* If we couldn't fit all value elements into REUSE,
2698 cons up the rest of them and add them to the end of REUSE. */
2699 if (i < 2 * len + 2)
c1d497be 2700 XCDR (prev) = Flist (2 * len + 2 - i, data + i);
56256c2a
RS
2701
2702 return reuse;
ca1d1d23
JB
2703}
2704
2705
3f1c005b 2706DEFUN ("set-match-data", Fset_match_data, Sset_match_data, 1, 1, 0,
ca1d1d23
JB
2707 "Set internal data on last search match from elements of LIST.\n\
2708LIST should have been created by calling `match-data' previously.")
2709 (list)
2710 register Lisp_Object list;
2711{
2712 register int i;
2713 register Lisp_Object marker;
2714
7074fde6
FP
2715 if (running_asynch_code)
2716 save_search_regs ();
2717
ca1d1d23 2718 if (!CONSP (list) && !NILP (list))
b37902c8 2719 list = wrong_type_argument (Qconsp, list);
ca1d1d23 2720
daa37602
JB
2721 /* Unless we find a marker with a buffer in LIST, assume that this
2722 match data came from a string. */
2723 last_thing_searched = Qt;
2724
4746118a
JB
2725 /* Allocate registers if they don't already exist. */
2726 {
d084e942 2727 int length = XFASTINT (Flength (list)) / 2;
4746118a
JB
2728
2729 if (length > search_regs.num_regs)
2730 {
1113d9db
JB
2731 if (search_regs.num_regs == 0)
2732 {
2733 search_regs.start
2734 = (regoff_t *) xmalloc (length * sizeof (regoff_t));
2735 search_regs.end
2736 = (regoff_t *) xmalloc (length * sizeof (regoff_t));
2737 }
4746118a 2738 else
1113d9db
JB
2739 {
2740 search_regs.start
2741 = (regoff_t *) xrealloc (search_regs.start,
2742 length * sizeof (regoff_t));
2743 search_regs.end
2744 = (regoff_t *) xrealloc (search_regs.end,
2745 length * sizeof (regoff_t));
2746 }
4746118a 2747
e62371e9
KH
2748 for (i = search_regs.num_regs; i < length; i++)
2749 search_regs.start[i] = -1;
2750
487282dc 2751 search_regs.num_regs = length;
4746118a
JB
2752 }
2753 }
2754
2755 for (i = 0; i < search_regs.num_regs; i++)
ca1d1d23
JB
2756 {
2757 marker = Fcar (list);
2758 if (NILP (marker))
2759 {
2760 search_regs.start[i] = -1;
2761 list = Fcdr (list);
2762 }
2763 else
2764 {
e62371e9
KH
2765 int from;
2766
0ed62dc7 2767 if (MARKERP (marker))
daa37602
JB
2768 {
2769 if (XMARKER (marker)->buffer == 0)
c235cce7 2770 XSETFASTINT (marker, 0);
daa37602 2771 else
a3668d92 2772 XSETBUFFER (last_thing_searched, XMARKER (marker)->buffer);
daa37602 2773 }
ca1d1d23
JB
2774
2775 CHECK_NUMBER_COERCE_MARKER (marker, 0);
e62371e9 2776 from = XINT (marker);
ca1d1d23
JB
2777 list = Fcdr (list);
2778
2779 marker = Fcar (list);
0ed62dc7 2780 if (MARKERP (marker) && XMARKER (marker)->buffer == 0)
c235cce7 2781 XSETFASTINT (marker, 0);
ca1d1d23
JB
2782
2783 CHECK_NUMBER_COERCE_MARKER (marker, 0);
e62371e9 2784 search_regs.start[i] = from;
ca1d1d23
JB
2785 search_regs.end[i] = XINT (marker);
2786 }
2787 list = Fcdr (list);
2788 }
2789
2790 return Qnil;
2791}
2792
7074fde6
FP
2793/* If non-zero the match data have been saved in saved_search_regs
2794 during the execution of a sentinel or filter. */
75ebf74b 2795static int search_regs_saved;
7074fde6
FP
2796static struct re_registers saved_search_regs;
2797
2798/* Called from Flooking_at, Fstring_match, search_buffer, Fstore_match_data
2799 if asynchronous code (filter or sentinel) is running. */
2800static void
2801save_search_regs ()
2802{
2803 if (!search_regs_saved)
2804 {
2805 saved_search_regs.num_regs = search_regs.num_regs;
2806 saved_search_regs.start = search_regs.start;
2807 saved_search_regs.end = search_regs.end;
2808 search_regs.num_regs = 0;
2d4a771a
RS
2809 search_regs.start = 0;
2810 search_regs.end = 0;
7074fde6
FP
2811
2812 search_regs_saved = 1;
2813 }
2814}
2815
2816/* Called upon exit from filters and sentinels. */
2817void
2818restore_match_data ()
2819{
2820 if (search_regs_saved)
2821 {
2822 if (search_regs.num_regs > 0)
2823 {
2824 xfree (search_regs.start);
2825 xfree (search_regs.end);
2826 }
2827 search_regs.num_regs = saved_search_regs.num_regs;
2828 search_regs.start = saved_search_regs.start;
2829 search_regs.end = saved_search_regs.end;
2830
2831 search_regs_saved = 0;
2832 }
2833}
2834
ca1d1d23
JB
2835/* Quote a string to inactivate reg-expr chars */
2836
2837DEFUN ("regexp-quote", Fregexp_quote, Sregexp_quote, 1, 1, 0,
2838 "Return a regexp string which matches exactly STRING and nothing else.")
5806161b
EN
2839 (string)
2840 Lisp_Object string;
ca1d1d23
JB
2841{
2842 register unsigned char *in, *out, *end;
2843 register unsigned char *temp;
0c8533c6 2844 int backslashes_added = 0;
ca1d1d23 2845
5806161b 2846 CHECK_STRING (string, 0);
ca1d1d23 2847
fc932ac6 2848 temp = (unsigned char *) alloca (STRING_BYTES (XSTRING (string)) * 2);
ca1d1d23
JB
2849
2850 /* Now copy the data into the new string, inserting escapes. */
2851
5806161b 2852 in = XSTRING (string)->data;
fc932ac6 2853 end = in + STRING_BYTES (XSTRING (string));
ca1d1d23
JB
2854 out = temp;
2855
2856 for (; in != end; in++)
2857 {
2858 if (*in == '[' || *in == ']'
2859 || *in == '*' || *in == '.' || *in == '\\'
2860 || *in == '?' || *in == '+'
2861 || *in == '^' || *in == '$')
0c8533c6 2862 *out++ = '\\', backslashes_added++;
ca1d1d23
JB
2863 *out++ = *in;
2864 }
2865
3f8100f1 2866 return make_specified_string (temp,
0c8533c6 2867 XSTRING (string)->size + backslashes_added,
3f8100f1
RS
2868 out - temp,
2869 STRING_MULTIBYTE (string));
ca1d1d23
JB
2870}
2871\f
dfcf069d 2872void
ca1d1d23
JB
2873syms_of_search ()
2874{
2875 register int i;
2876
487282dc
KH
2877 for (i = 0; i < REGEXP_CACHE_SIZE; ++i)
2878 {
2879 searchbufs[i].buf.allocated = 100;
2880 searchbufs[i].buf.buffer = (unsigned char *) malloc (100);
2881 searchbufs[i].buf.fastmap = searchbufs[i].fastmap;
2882 searchbufs[i].regexp = Qnil;
2883 staticpro (&searchbufs[i].regexp);
2884 searchbufs[i].next = (i == REGEXP_CACHE_SIZE-1 ? 0 : &searchbufs[i+1]);
2885 }
2886 searchbuf_head = &searchbufs[0];
ca1d1d23
JB
2887
2888 Qsearch_failed = intern ("search-failed");
2889 staticpro (&Qsearch_failed);
2890 Qinvalid_regexp = intern ("invalid-regexp");
2891 staticpro (&Qinvalid_regexp);
2892
2893 Fput (Qsearch_failed, Qerror_conditions,
2894 Fcons (Qsearch_failed, Fcons (Qerror, Qnil)));
2895 Fput (Qsearch_failed, Qerror_message,
2896 build_string ("Search failed"));
2897
2898 Fput (Qinvalid_regexp, Qerror_conditions,
2899 Fcons (Qinvalid_regexp, Fcons (Qerror, Qnil)));
2900 Fput (Qinvalid_regexp, Qerror_message,
2901 build_string ("Invalid regexp"));
2902
daa37602
JB
2903 last_thing_searched = Qnil;
2904 staticpro (&last_thing_searched);
2905
ca1d1d23 2906 defsubr (&Slooking_at);
b819a390
RS
2907 defsubr (&Sposix_looking_at);
2908 defsubr (&Sstring_match);
2909 defsubr (&Sposix_string_match);
ca1d1d23
JB
2910 defsubr (&Ssearch_forward);
2911 defsubr (&Ssearch_backward);
2912 defsubr (&Sword_search_forward);
2913 defsubr (&Sword_search_backward);
2914 defsubr (&Sre_search_forward);
2915 defsubr (&Sre_search_backward);
b819a390
RS
2916 defsubr (&Sposix_search_forward);
2917 defsubr (&Sposix_search_backward);
ca1d1d23
JB
2918 defsubr (&Sreplace_match);
2919 defsubr (&Smatch_beginning);
2920 defsubr (&Smatch_end);
2921 defsubr (&Smatch_data);
3f1c005b 2922 defsubr (&Sset_match_data);
ca1d1d23
JB
2923 defsubr (&Sregexp_quote);
2924}