*** empty log message ***
[bpt/emacs.git] / src / search.c
CommitLineData
ca1d1d23 1/* String search routines for GNU Emacs.
68c45bf0 2 Copyright (C) 1985, 86,87,93,94,97,98, 1999 Free Software Foundation, Inc.
ca1d1d23
JB
3
4This file is part of GNU Emacs.
5
6GNU Emacs is free software; you can redistribute it and/or modify
7it under the terms of the GNU General Public License as published by
7c938215 8the Free Software Foundation; either version 2, or (at your option)
ca1d1d23
JB
9any later version.
10
11GNU Emacs is distributed in the hope that it will be useful,
12but WITHOUT ANY WARRANTY; without even the implied warranty of
13MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14GNU General Public License for more details.
15
16You should have received a copy of the GNU General Public License
17along with GNU Emacs; see the file COPYING. If not, write to
3b7ad313
EN
18the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
19Boston, MA 02111-1307, USA. */
ca1d1d23
JB
20
21
18160b98 22#include <config.h>
ca1d1d23
JB
23#include "lisp.h"
24#include "syntax.h"
5679531d 25#include "category.h"
ca1d1d23 26#include "buffer.h"
5679531d 27#include "charset.h"
9169c321 28#include "region-cache.h"
ca1d1d23 29#include "commands.h"
9ac0d9e0 30#include "blockinput.h"
bf1760bb 31#include "intervals.h"
4746118a 32
ca1d1d23
JB
33#include <sys/types.h>
34#include "regex.h"
35
67ce527d
KH
36#define min(a, b) ((a) < (b) ? (a) : (b))
37#define max(a, b) ((a) > (b) ? (a) : (b))
38
1d288aef 39#define REGEXP_CACHE_SIZE 20
ca1d1d23 40
487282dc
KH
41/* If the regexp is non-nil, then the buffer contains the compiled form
42 of that regexp, suitable for searching. */
1d288aef
RS
43struct regexp_cache
44{
487282dc
KH
45 struct regexp_cache *next;
46 Lisp_Object regexp;
47 struct re_pattern_buffer buf;
48 char fastmap[0400];
b819a390
RS
49 /* Nonzero means regexp was compiled to do full POSIX backtracking. */
50 char posix;
487282dc 51};
ca1d1d23 52
487282dc
KH
53/* The instances of that struct. */
54struct regexp_cache searchbufs[REGEXP_CACHE_SIZE];
ca1d1d23 55
487282dc
KH
56/* The head of the linked list; points to the most recently used buffer. */
57struct regexp_cache *searchbuf_head;
ca1d1d23 58
ca1d1d23 59
4746118a
JB
60/* Every call to re_match, etc., must pass &search_regs as the regs
61 argument unless you can show it is unnecessary (i.e., if re_match
62 is certainly going to be called again before region-around-match
63 can be called).
64
65 Since the registers are now dynamically allocated, we need to make
66 sure not to refer to the Nth register before checking that it has
1113d9db
JB
67 been allocated by checking search_regs.num_regs.
68
69 The regex code keeps track of whether it has allocated the search
487282dc
KH
70 buffer using bits in the re_pattern_buffer. This means that whenever
71 you compile a new pattern, it completely forgets whether it has
1113d9db
JB
72 allocated any registers, and will allocate new registers the next
73 time you call a searching or matching function. Therefore, we need
74 to call re_set_registers after compiling a new pattern or after
75 setting the match registers, so that the regex functions will be
76 able to free or re-allocate it properly. */
ca1d1d23
JB
77static struct re_registers search_regs;
78
daa37602
JB
79/* The buffer in which the last search was performed, or
80 Qt if the last search was done in a string;
81 Qnil if no searching has been done yet. */
82static Lisp_Object last_thing_searched;
ca1d1d23 83
8e6208c5 84/* error condition signaled when regexp compile_pattern fails */
ca1d1d23
JB
85
86Lisp_Object Qinvalid_regexp;
87
ca325161 88static void set_search_regs ();
044f81f1 89static void save_search_regs ();
facdc750
RS
90static int simple_search ();
91static int boyer_moore ();
b819a390
RS
92static int search_buffer ();
93
ca1d1d23
JB
94static void
95matcher_overflow ()
96{
97 error ("Stack overflow in regexp matcher");
98}
99
b819a390
RS
100/* Compile a regexp and signal a Lisp error if anything goes wrong.
101 PATTERN is the pattern to compile.
102 CP is the place to put the result.
facdc750 103 TRANSLATE is a translation table for ignoring case, or nil for none.
b819a390
RS
104 REGP is the structure that says where to store the "register"
105 values that will result from matching this pattern.
106 If it is 0, we should compile the pattern not to record any
107 subexpression bounds.
108 POSIX is nonzero if we want full backtracking (POSIX style)
5679531d
KH
109 for this pattern. 0 means backtrack only enough to get a valid match.
110 MULTIBYTE is nonzero if we want to handle multibyte characters in
111 PATTERN. 0 means all multibyte characters are recognized just as
112 sequences of binary data. */
ca1d1d23 113
487282dc 114static void
5679531d 115compile_pattern_1 (cp, pattern, translate, regp, posix, multibyte)
487282dc 116 struct regexp_cache *cp;
ca1d1d23 117 Lisp_Object pattern;
facdc750 118 Lisp_Object translate;
487282dc 119 struct re_registers *regp;
b819a390 120 int posix;
5679531d 121 int multibyte;
ca1d1d23 122{
7276d3d8 123 unsigned char *raw_pattern;
f8bd51c4 124 int raw_pattern_size;
d451e4db 125 char *val;
b819a390 126 reg_syntax_t old;
ca1d1d23 127
f8bd51c4
KH
128 /* MULTIBYTE says whether the text to be searched is multibyte.
129 We must convert PATTERN to match that, or we will not really
130 find things right. */
131
132 if (multibyte == STRING_MULTIBYTE (pattern))
133 {
7276d3d8 134 raw_pattern = (unsigned char *) XSTRING (pattern)->data;
fc932ac6 135 raw_pattern_size = STRING_BYTES (XSTRING (pattern));
f8bd51c4
KH
136 }
137 else if (multibyte)
138 {
139 raw_pattern_size = count_size_as_multibyte (XSTRING (pattern)->data,
140 XSTRING (pattern)->size);
7276d3d8 141 raw_pattern = (unsigned char *) alloca (raw_pattern_size + 1);
f8bd51c4
KH
142 copy_text (XSTRING (pattern)->data, raw_pattern,
143 XSTRING (pattern)->size, 0, 1);
144 }
145 else
146 {
147 /* Converting multibyte to single-byte.
148
149 ??? Perhaps this conversion should be done in a special way
150 by subtracting nonascii-insert-offset from each non-ASCII char,
151 so that only the multibyte chars which really correspond to
152 the chosen single-byte character set can possibly match. */
153 raw_pattern_size = XSTRING (pattern)->size;
7276d3d8 154 raw_pattern = (unsigned char *) alloca (raw_pattern_size + 1);
f8bd51c4 155 copy_text (XSTRING (pattern)->data, raw_pattern,
fc932ac6 156 STRING_BYTES (XSTRING (pattern)), 1, 0);
f8bd51c4
KH
157 }
158
487282dc 159 cp->regexp = Qnil;
59fab369 160 cp->buf.translate = (! NILP (translate) ? translate : make_number (0));
b819a390 161 cp->posix = posix;
5679531d 162 cp->buf.multibyte = multibyte;
9ac0d9e0 163 BLOCK_INPUT;
fb4a568d 164 old = re_set_syntax (RE_SYNTAX_EMACS
b819a390 165 | (posix ? 0 : RE_NO_POSIX_BACKTRACKING));
7276d3d8
RS
166 val = (char *) re_compile_pattern ((char *)raw_pattern,
167 raw_pattern_size, &cp->buf);
b819a390 168 re_set_syntax (old);
9ac0d9e0 169 UNBLOCK_INPUT;
ca1d1d23 170 if (val)
487282dc 171 Fsignal (Qinvalid_regexp, Fcons (build_string (val), Qnil));
1113d9db 172
487282dc 173 cp->regexp = Fcopy_sequence (pattern);
487282dc
KH
174}
175
6efc7887
RS
176/* Shrink each compiled regexp buffer in the cache
177 to the size actually used right now.
178 This is called from garbage collection. */
179
180void
181shrink_regexp_cache ()
182{
183 struct regexp_cache *cp, **cpp;
184
185 for (cp = searchbuf_head; cp != 0; cp = cp->next)
186 {
187 cp->buf.allocated = cp->buf.used;
188 cp->buf.buffer
189 = (unsigned char *) realloc (cp->buf.buffer, cp->buf.used);
190 }
191}
192
487282dc 193/* Compile a regexp if necessary, but first check to see if there's one in
b819a390
RS
194 the cache.
195 PATTERN is the pattern to compile.
facdc750 196 TRANSLATE is a translation table for ignoring case, or nil for none.
b819a390
RS
197 REGP is the structure that says where to store the "register"
198 values that will result from matching this pattern.
199 If it is 0, we should compile the pattern not to record any
200 subexpression bounds.
201 POSIX is nonzero if we want full backtracking (POSIX style)
202 for this pattern. 0 means backtrack only enough to get a valid match. */
487282dc
KH
203
204struct re_pattern_buffer *
0c8533c6 205compile_pattern (pattern, regp, translate, posix, multibyte)
487282dc
KH
206 Lisp_Object pattern;
207 struct re_registers *regp;
facdc750 208 Lisp_Object translate;
0c8533c6 209 int posix, multibyte;
487282dc
KH
210{
211 struct regexp_cache *cp, **cpp;
212
213 for (cpp = &searchbuf_head; ; cpp = &cp->next)
214 {
215 cp = *cpp;
f1b9c7c1
KR
216 /* Entries are initialized to nil, and may be set to nil by
217 compile_pattern_1 if the pattern isn't valid. Don't apply
218 XSTRING in those cases. However, compile_pattern_1 is only
219 applied to the cache entry we pick here to reuse. So nil
220 should never appear before a non-nil entry. */
7c752c80 221 if (NILP (cp->regexp))
f1b9c7c1 222 goto compile_it;
1d288aef 223 if (XSTRING (cp->regexp)->size == XSTRING (pattern)->size
cf69b13e 224 && STRING_MULTIBYTE (cp->regexp) == STRING_MULTIBYTE (pattern)
1d288aef 225 && !NILP (Fstring_equal (cp->regexp, pattern))
59fab369 226 && EQ (cp->buf.translate, (! NILP (translate) ? translate : make_number (0)))
5679531d
KH
227 && cp->posix == posix
228 && cp->buf.multibyte == multibyte)
487282dc
KH
229 break;
230
f1b9c7c1
KR
231 /* If we're at the end of the cache, compile into the nil cell
232 we found, or the last (least recently used) cell with a
233 string value. */
487282dc
KH
234 if (cp->next == 0)
235 {
f1b9c7c1 236 compile_it:
5679531d 237 compile_pattern_1 (cp, pattern, translate, regp, posix, multibyte);
487282dc
KH
238 break;
239 }
240 }
241
242 /* When we get here, cp (aka *cpp) contains the compiled pattern,
243 either because we found it in the cache or because we just compiled it.
244 Move it to the front of the queue to mark it as most recently used. */
245 *cpp = cp->next;
246 cp->next = searchbuf_head;
247 searchbuf_head = cp;
1113d9db 248
6639708c
RS
249 /* Advise the searching functions about the space we have allocated
250 for register data. */
251 if (regp)
252 re_set_registers (&cp->buf, regp, regp->num_regs, regp->start, regp->end);
253
487282dc 254 return &cp->buf;
ca1d1d23
JB
255}
256
257/* Error condition used for failing searches */
258Lisp_Object Qsearch_failed;
259
260Lisp_Object
261signal_failure (arg)
262 Lisp_Object arg;
263{
264 Fsignal (Qsearch_failed, Fcons (arg, Qnil));
265 return Qnil;
266}
267\f
b819a390
RS
268static Lisp_Object
269looking_at_1 (string, posix)
ca1d1d23 270 Lisp_Object string;
b819a390 271 int posix;
ca1d1d23
JB
272{
273 Lisp_Object val;
274 unsigned char *p1, *p2;
275 int s1, s2;
276 register int i;
487282dc 277 struct re_pattern_buffer *bufp;
ca1d1d23 278
7074fde6
FP
279 if (running_asynch_code)
280 save_search_regs ();
281
ca1d1d23 282 CHECK_STRING (string, 0);
487282dc
KH
283 bufp = compile_pattern (string, &search_regs,
284 (!NILP (current_buffer->case_fold_search)
facdc750 285 ? DOWNCASE_TABLE : Qnil),
0c8533c6
RS
286 posix,
287 !NILP (current_buffer->enable_multibyte_characters));
ca1d1d23
JB
288
289 immediate_quit = 1;
290 QUIT; /* Do a pending quit right away, to avoid paradoxical behavior */
291
292 /* Get pointers and sizes of the two strings
293 that make up the visible portion of the buffer. */
294
295 p1 = BEGV_ADDR;
fa8ed3e0 296 s1 = GPT_BYTE - BEGV_BYTE;
ca1d1d23 297 p2 = GAP_END_ADDR;
fa8ed3e0 298 s2 = ZV_BYTE - GPT_BYTE;
ca1d1d23
JB
299 if (s1 < 0)
300 {
301 p2 = p1;
fa8ed3e0 302 s2 = ZV_BYTE - BEGV_BYTE;
ca1d1d23
JB
303 s1 = 0;
304 }
305 if (s2 < 0)
306 {
fa8ed3e0 307 s1 = ZV_BYTE - BEGV_BYTE;
ca1d1d23
JB
308 s2 = 0;
309 }
8bb43c28
RS
310
311 re_match_object = Qnil;
ca1d1d23 312
487282dc 313 i = re_match_2 (bufp, (char *) p1, s1, (char *) p2, s2,
fa8ed3e0
RS
314 PT_BYTE - BEGV_BYTE, &search_regs,
315 ZV_BYTE - BEGV_BYTE);
de182d70
GM
316 immediate_quit = 0;
317
ca1d1d23
JB
318 if (i == -2)
319 matcher_overflow ();
320
321 val = (0 <= i ? Qt : Qnil);
fa8ed3e0
RS
322 if (i >= 0)
323 for (i = 0; i < search_regs.num_regs; i++)
324 if (search_regs.start[i] >= 0)
325 {
326 search_regs.start[i]
327 = BYTE_TO_CHAR (search_regs.start[i] + BEGV_BYTE);
328 search_regs.end[i]
329 = BYTE_TO_CHAR (search_regs.end[i] + BEGV_BYTE);
330 }
a3668d92 331 XSETBUFFER (last_thing_searched, current_buffer);
ca1d1d23
JB
332 return val;
333}
334
b819a390 335DEFUN ("looking-at", Flooking_at, Slooking_at, 1, 1, 0,
94f94972 336 "Return t if text after point matches regular expression REGEXP.\n\
b819a390
RS
337This function modifies the match data that `match-beginning',\n\
338`match-end' and `match-data' access; save and restore the match\n\
339data if you want to preserve them.")
94f94972
RS
340 (regexp)
341 Lisp_Object regexp;
b819a390 342{
94f94972 343 return looking_at_1 (regexp, 0);
b819a390
RS
344}
345
346DEFUN ("posix-looking-at", Fposix_looking_at, Sposix_looking_at, 1, 1, 0,
94f94972 347 "Return t if text after point matches regular expression REGEXP.\n\
b819a390
RS
348Find the longest match, in accord with Posix regular expression rules.\n\
349This function modifies the match data that `match-beginning',\n\
350`match-end' and `match-data' access; save and restore the match\n\
351data if you want to preserve them.")
94f94972
RS
352 (regexp)
353 Lisp_Object regexp;
b819a390 354{
94f94972 355 return looking_at_1 (regexp, 1);
b819a390
RS
356}
357\f
358static Lisp_Object
359string_match_1 (regexp, string, start, posix)
ca1d1d23 360 Lisp_Object regexp, string, start;
b819a390 361 int posix;
ca1d1d23
JB
362{
363 int val;
487282dc 364 struct re_pattern_buffer *bufp;
0c8533c6
RS
365 int pos, pos_byte;
366 int i;
ca1d1d23 367
7074fde6
FP
368 if (running_asynch_code)
369 save_search_regs ();
370
ca1d1d23
JB
371 CHECK_STRING (regexp, 0);
372 CHECK_STRING (string, 1);
373
374 if (NILP (start))
0c8533c6 375 pos = 0, pos_byte = 0;
ca1d1d23
JB
376 else
377 {
378 int len = XSTRING (string)->size;
379
380 CHECK_NUMBER (start, 2);
0c8533c6
RS
381 pos = XINT (start);
382 if (pos < 0 && -pos <= len)
383 pos = len + pos;
384 else if (0 > pos || pos > len)
ca1d1d23 385 args_out_of_range (string, start);
0c8533c6 386 pos_byte = string_char_to_byte (string, pos);
ca1d1d23
JB
387 }
388
487282dc
KH
389 bufp = compile_pattern (regexp, &search_regs,
390 (!NILP (current_buffer->case_fold_search)
facdc750 391 ? DOWNCASE_TABLE : Qnil),
0c8533c6
RS
392 posix,
393 STRING_MULTIBYTE (string));
ca1d1d23 394 immediate_quit = 1;
8bb43c28
RS
395 re_match_object = string;
396
487282dc 397 val = re_search (bufp, (char *) XSTRING (string)->data,
fc932ac6
RS
398 STRING_BYTES (XSTRING (string)), pos_byte,
399 STRING_BYTES (XSTRING (string)) - pos_byte,
ca1d1d23
JB
400 &search_regs);
401 immediate_quit = 0;
daa37602 402 last_thing_searched = Qt;
ca1d1d23
JB
403 if (val == -2)
404 matcher_overflow ();
405 if (val < 0) return Qnil;
0c8533c6
RS
406
407 for (i = 0; i < search_regs.num_regs; i++)
408 if (search_regs.start[i] >= 0)
409 {
410 search_regs.start[i]
411 = string_byte_to_char (string, search_regs.start[i]);
412 search_regs.end[i]
413 = string_byte_to_char (string, search_regs.end[i]);
414 }
415
416 return make_number (string_byte_to_char (string, val));
ca1d1d23 417}
e59a8453 418
b819a390
RS
419DEFUN ("string-match", Fstring_match, Sstring_match, 2, 3, 0,
420 "Return index of start of first match for REGEXP in STRING, or nil.\n\
21e01890 421Case is ignored if `case-fold-search' is non-nil in the current buffer.\n\
b819a390
RS
422If third arg START is non-nil, start search at that index in STRING.\n\
423For index of first char beyond the match, do (match-end 0).\n\
424`match-end' and `match-beginning' also give indices of substrings\n\
425matched by parenthesis constructs in the pattern.")
426 (regexp, string, start)
427 Lisp_Object regexp, string, start;
428{
429 return string_match_1 (regexp, string, start, 0);
430}
431
432DEFUN ("posix-string-match", Fposix_string_match, Sposix_string_match, 2, 3, 0,
433 "Return index of start of first match for REGEXP in STRING, or nil.\n\
434Find the longest match, in accord with Posix regular expression rules.\n\
21e01890 435Case is ignored if `case-fold-search' is non-nil in the current buffer.\n\
b819a390
RS
436If third arg START is non-nil, start search at that index in STRING.\n\
437For index of first char beyond the match, do (match-end 0).\n\
438`match-end' and `match-beginning' also give indices of substrings\n\
439matched by parenthesis constructs in the pattern.")
440 (regexp, string, start)
441 Lisp_Object regexp, string, start;
442{
443 return string_match_1 (regexp, string, start, 1);
444}
445
e59a8453
RS
446/* Match REGEXP against STRING, searching all of STRING,
447 and return the index of the match, or negative on failure.
448 This does not clobber the match data. */
449
450int
451fast_string_match (regexp, string)
452 Lisp_Object regexp, string;
453{
454 int val;
487282dc 455 struct re_pattern_buffer *bufp;
e59a8453 456
facdc750
RS
457 bufp = compile_pattern (regexp, 0, Qnil,
458 0, STRING_MULTIBYTE (string));
e59a8453 459 immediate_quit = 1;
8bb43c28
RS
460 re_match_object = string;
461
487282dc 462 val = re_search (bufp, (char *) XSTRING (string)->data,
fc932ac6
RS
463 STRING_BYTES (XSTRING (string)), 0,
464 STRING_BYTES (XSTRING (string)), 0);
e59a8453
RS
465 immediate_quit = 0;
466 return val;
467}
5679531d
KH
468
469/* Match REGEXP against STRING, searching all of STRING ignoring case,
470 and return the index of the match, or negative on failure.
0c8533c6
RS
471 This does not clobber the match data.
472 We assume that STRING contains single-byte characters. */
5679531d
KH
473
474extern Lisp_Object Vascii_downcase_table;
475
476int
b4577c63 477fast_c_string_match_ignore_case (regexp, string)
5679531d
KH
478 Lisp_Object regexp;
479 char *string;
480{
481 int val;
482 struct re_pattern_buffer *bufp;
483 int len = strlen (string);
484
0c8533c6 485 regexp = string_make_unibyte (regexp);
b4577c63 486 re_match_object = Qt;
5679531d 487 bufp = compile_pattern (regexp, 0,
facdc750 488 Vascii_downcase_table, 0,
f8bd51c4 489 0);
5679531d
KH
490 immediate_quit = 1;
491 val = re_search (bufp, string, len, 0, len, 0);
492 immediate_quit = 0;
493 return val;
494}
ca1d1d23 495\f
9169c321
JB
496/* The newline cache: remembering which sections of text have no newlines. */
497
498/* If the user has requested newline caching, make sure it's on.
499 Otherwise, make sure it's off.
500 This is our cheezy way of associating an action with the change of
501 state of a buffer-local variable. */
502static void
503newline_cache_on_off (buf)
504 struct buffer *buf;
505{
506 if (NILP (buf->cache_long_line_scans))
507 {
508 /* It should be off. */
509 if (buf->newline_cache)
510 {
511 free_region_cache (buf->newline_cache);
512 buf->newline_cache = 0;
513 }
514 }
515 else
516 {
517 /* It should be on. */
518 if (buf->newline_cache == 0)
519 buf->newline_cache = new_region_cache ();
520 }
521}
522
523\f
524/* Search for COUNT instances of the character TARGET between START and END.
525
526 If COUNT is positive, search forwards; END must be >= START.
527 If COUNT is negative, search backwards for the -COUNTth instance;
528 END must be <= START.
529 If COUNT is zero, do anything you please; run rogue, for all I care.
530
531 If END is zero, use BEGV or ZV instead, as appropriate for the
532 direction indicated by COUNT.
ffd56f97
JB
533
534 If we find COUNT instances, set *SHORTAGE to zero, and return the
5bfe95c9
RS
535 position after the COUNTth match. Note that for reverse motion
536 this is not the same as the usual convention for Emacs motion commands.
ffd56f97 537
9169c321
JB
538 If we don't find COUNT instances before reaching END, set *SHORTAGE
539 to the number of TARGETs left unfound, and return END.
ffd56f97 540
087a5f81
RS
541 If ALLOW_QUIT is non-zero, set immediate_quit. That's good to do
542 except when inside redisplay. */
543
dfcf069d 544int
9169c321
JB
545scan_buffer (target, start, end, count, shortage, allow_quit)
546 register int target;
547 int start, end;
548 int count;
549 int *shortage;
087a5f81 550 int allow_quit;
ca1d1d23 551{
9169c321
JB
552 struct region_cache *newline_cache;
553 int direction;
ffd56f97 554
9169c321
JB
555 if (count > 0)
556 {
557 direction = 1;
558 if (! end) end = ZV;
559 }
560 else
561 {
562 direction = -1;
563 if (! end) end = BEGV;
564 }
ffd56f97 565
9169c321
JB
566 newline_cache_on_off (current_buffer);
567 newline_cache = current_buffer->newline_cache;
ca1d1d23
JB
568
569 if (shortage != 0)
570 *shortage = 0;
571
087a5f81 572 immediate_quit = allow_quit;
ca1d1d23 573
ffd56f97 574 if (count > 0)
9169c321 575 while (start != end)
ca1d1d23 576 {
9169c321
JB
577 /* Our innermost scanning loop is very simple; it doesn't know
578 about gaps, buffer ends, or the newline cache. ceiling is
579 the position of the last character before the next such
580 obstacle --- the last character the dumb search loop should
581 examine. */
fa8ed3e0
RS
582 int ceiling_byte = CHAR_TO_BYTE (end) - 1;
583 int start_byte = CHAR_TO_BYTE (start);
67ce527d 584 int tem;
9169c321
JB
585
586 /* If we're looking for a newline, consult the newline cache
587 to see where we can avoid some scanning. */
588 if (target == '\n' && newline_cache)
589 {
590 int next_change;
591 immediate_quit = 0;
592 while (region_cache_forward
fa8ed3e0
RS
593 (current_buffer, newline_cache, start_byte, &next_change))
594 start_byte = next_change;
cbe0db0d 595 immediate_quit = allow_quit;
9169c321 596
fa8ed3e0
RS
597 /* START should never be after END. */
598 if (start_byte > ceiling_byte)
599 start_byte = ceiling_byte;
9169c321
JB
600
601 /* Now the text after start is an unknown region, and
602 next_change is the position of the next known region. */
fa8ed3e0 603 ceiling_byte = min (next_change - 1, ceiling_byte);
9169c321
JB
604 }
605
606 /* The dumb loop can only scan text stored in contiguous
607 bytes. BUFFER_CEILING_OF returns the last character
608 position that is contiguous, so the ceiling is the
609 position after that. */
67ce527d
KH
610 tem = BUFFER_CEILING_OF (start_byte);
611 ceiling_byte = min (tem, ceiling_byte);
9169c321
JB
612
613 {
614 /* The termination address of the dumb loop. */
fa8ed3e0
RS
615 register unsigned char *ceiling_addr
616 = BYTE_POS_ADDR (ceiling_byte) + 1;
617 register unsigned char *cursor
618 = BYTE_POS_ADDR (start_byte);
9169c321
JB
619 unsigned char *base = cursor;
620
621 while (cursor < ceiling_addr)
622 {
623 unsigned char *scan_start = cursor;
624
625 /* The dumb loop. */
626 while (*cursor != target && ++cursor < ceiling_addr)
627 ;
628
629 /* If we're looking for newlines, cache the fact that
630 the region from start to cursor is free of them. */
631 if (target == '\n' && newline_cache)
632 know_region_cache (current_buffer, newline_cache,
fa8ed3e0
RS
633 start_byte + scan_start - base,
634 start_byte + cursor - base);
9169c321
JB
635
636 /* Did we find the target character? */
637 if (cursor < ceiling_addr)
638 {
639 if (--count == 0)
640 {
641 immediate_quit = 0;
fa8ed3e0 642 return BYTE_TO_CHAR (start_byte + cursor - base + 1);
9169c321
JB
643 }
644 cursor++;
645 }
646 }
647
fa8ed3e0 648 start = BYTE_TO_CHAR (start_byte + cursor - base);
9169c321 649 }
ca1d1d23
JB
650 }
651 else
9169c321
JB
652 while (start > end)
653 {
654 /* The last character to check before the next obstacle. */
fa8ed3e0
RS
655 int ceiling_byte = CHAR_TO_BYTE (end);
656 int start_byte = CHAR_TO_BYTE (start);
67ce527d 657 int tem;
9169c321
JB
658
659 /* Consult the newline cache, if appropriate. */
660 if (target == '\n' && newline_cache)
661 {
662 int next_change;
663 immediate_quit = 0;
664 while (region_cache_backward
fa8ed3e0
RS
665 (current_buffer, newline_cache, start_byte, &next_change))
666 start_byte = next_change;
cbe0db0d 667 immediate_quit = allow_quit;
9169c321
JB
668
669 /* Start should never be at or before end. */
fa8ed3e0
RS
670 if (start_byte <= ceiling_byte)
671 start_byte = ceiling_byte + 1;
9169c321
JB
672
673 /* Now the text before start is an unknown region, and
674 next_change is the position of the next known region. */
fa8ed3e0 675 ceiling_byte = max (next_change, ceiling_byte);
9169c321
JB
676 }
677
678 /* Stop scanning before the gap. */
67ce527d
KH
679 tem = BUFFER_FLOOR_OF (start_byte - 1);
680 ceiling_byte = max (tem, ceiling_byte);
9169c321
JB
681
682 {
683 /* The termination address of the dumb loop. */
fa8ed3e0
RS
684 register unsigned char *ceiling_addr = BYTE_POS_ADDR (ceiling_byte);
685 register unsigned char *cursor = BYTE_POS_ADDR (start_byte - 1);
9169c321
JB
686 unsigned char *base = cursor;
687
688 while (cursor >= ceiling_addr)
689 {
690 unsigned char *scan_start = cursor;
691
692 while (*cursor != target && --cursor >= ceiling_addr)
693 ;
694
695 /* If we're looking for newlines, cache the fact that
696 the region from after the cursor to start is free of them. */
697 if (target == '\n' && newline_cache)
698 know_region_cache (current_buffer, newline_cache,
fa8ed3e0
RS
699 start_byte + cursor - base,
700 start_byte + scan_start - base);
9169c321
JB
701
702 /* Did we find the target character? */
703 if (cursor >= ceiling_addr)
704 {
705 if (++count >= 0)
706 {
707 immediate_quit = 0;
fa8ed3e0 708 return BYTE_TO_CHAR (start_byte + cursor - base);
9169c321
JB
709 }
710 cursor--;
711 }
712 }
713
fa8ed3e0 714 start = BYTE_TO_CHAR (start_byte + cursor - base);
9169c321
JB
715 }
716 }
717
ca1d1d23
JB
718 immediate_quit = 0;
719 if (shortage != 0)
ffd56f97 720 *shortage = count * direction;
9169c321 721 return start;
ca1d1d23 722}
fa8ed3e0
RS
723\f
724/* Search for COUNT instances of a line boundary, which means either a
725 newline or (if selective display enabled) a carriage return.
726 Start at START. If COUNT is negative, search backwards.
727
728 We report the resulting position by calling TEMP_SET_PT_BOTH.
729
730 If we find COUNT instances. we position after (always after,
731 even if scanning backwards) the COUNTth match, and return 0.
732
733 If we don't find COUNT instances before reaching the end of the
734 buffer (or the beginning, if scanning backwards), we return
735 the number of line boundaries left unfound, and position at
736 the limit we bumped up against.
737
738 If ALLOW_QUIT is non-zero, set immediate_quit. That's good to do
d5d57b92 739 except in special cases. */
ca1d1d23 740
63fa018d 741int
fa8ed3e0
RS
742scan_newline (start, start_byte, limit, limit_byte, count, allow_quit)
743 int start, start_byte;
744 int limit, limit_byte;
745 register int count;
746 int allow_quit;
63fa018d 747{
fa8ed3e0
RS
748 int direction = ((count > 0) ? 1 : -1);
749
750 register unsigned char *cursor;
751 unsigned char *base;
752
753 register int ceiling;
754 register unsigned char *ceiling_addr;
755
d5d57b92
RS
756 int old_immediate_quit = immediate_quit;
757
fa8ed3e0
RS
758 /* If we are not in selective display mode,
759 check only for newlines. */
760 int selective_display = (!NILP (current_buffer->selective_display)
761 && !INTEGERP (current_buffer->selective_display));
762
763 /* The code that follows is like scan_buffer
764 but checks for either newline or carriage return. */
765
d5d57b92
RS
766 if (allow_quit)
767 immediate_quit++;
fa8ed3e0
RS
768
769 start_byte = CHAR_TO_BYTE (start);
770
771 if (count > 0)
772 {
773 while (start_byte < limit_byte)
774 {
775 ceiling = BUFFER_CEILING_OF (start_byte);
776 ceiling = min (limit_byte - 1, ceiling);
777 ceiling_addr = BYTE_POS_ADDR (ceiling) + 1;
778 base = (cursor = BYTE_POS_ADDR (start_byte));
779 while (1)
780 {
781 while (*cursor != '\n' && ++cursor != ceiling_addr)
782 ;
783
784 if (cursor != ceiling_addr)
785 {
786 if (--count == 0)
787 {
d5d57b92 788 immediate_quit = old_immediate_quit;
fa8ed3e0
RS
789 start_byte = start_byte + cursor - base + 1;
790 start = BYTE_TO_CHAR (start_byte);
791 TEMP_SET_PT_BOTH (start, start_byte);
792 return 0;
793 }
794 else
795 if (++cursor == ceiling_addr)
796 break;
797 }
798 else
799 break;
800 }
801 start_byte += cursor - base;
802 }
803 }
804 else
805 {
fa8ed3e0
RS
806 while (start_byte > limit_byte)
807 {
808 ceiling = BUFFER_FLOOR_OF (start_byte - 1);
809 ceiling = max (limit_byte, ceiling);
810 ceiling_addr = BYTE_POS_ADDR (ceiling) - 1;
811 base = (cursor = BYTE_POS_ADDR (start_byte - 1) + 1);
812 while (1)
813 {
814 while (--cursor != ceiling_addr && *cursor != '\n')
815 ;
816
817 if (cursor != ceiling_addr)
818 {
819 if (++count == 0)
820 {
d5d57b92 821 immediate_quit = old_immediate_quit;
fa8ed3e0
RS
822 /* Return the position AFTER the match we found. */
823 start_byte = start_byte + cursor - base + 1;
824 start = BYTE_TO_CHAR (start_byte);
825 TEMP_SET_PT_BOTH (start, start_byte);
826 return 0;
827 }
828 }
829 else
830 break;
831 }
832 /* Here we add 1 to compensate for the last decrement
833 of CURSOR, which took it past the valid range. */
834 start_byte += cursor - base + 1;
835 }
836 }
837
838 TEMP_SET_PT_BOTH (limit, limit_byte);
d5d57b92 839 immediate_quit = old_immediate_quit;
fa8ed3e0
RS
840
841 return count * direction;
63fa018d
RS
842}
843
ca1d1d23 844int
fa8ed3e0 845find_next_newline_no_quit (from, cnt)
ca1d1d23
JB
846 register int from, cnt;
847{
fa8ed3e0 848 return scan_buffer ('\n', from, 0, cnt, (int *) 0, 0);
9169c321
JB
849}
850
9169c321
JB
851/* Like find_next_newline, but returns position before the newline,
852 not after, and only search up to TO. This isn't just
853 find_next_newline (...)-1, because you might hit TO. */
fa8ed3e0 854
9169c321
JB
855int
856find_before_next_newline (from, to, cnt)
cbe0db0d 857 int from, to, cnt;
9169c321
JB
858{
859 int shortage;
860 int pos = scan_buffer ('\n', from, to, cnt, &shortage, 1);
861
862 if (shortage == 0)
863 pos--;
864
865 return pos;
ca1d1d23
JB
866}
867\f
ca1d1d23
JB
868/* Subroutines of Lisp buffer search functions. */
869
870static Lisp_Object
b819a390 871search_command (string, bound, noerror, count, direction, RE, posix)
ca1d1d23
JB
872 Lisp_Object string, bound, noerror, count;
873 int direction;
874 int RE;
b819a390 875 int posix;
ca1d1d23
JB
876{
877 register int np;
9f43ad85 878 int lim, lim_byte;
ca1d1d23
JB
879 int n = direction;
880
881 if (!NILP (count))
882 {
883 CHECK_NUMBER (count, 3);
884 n *= XINT (count);
885 }
886
887 CHECK_STRING (string, 0);
888 if (NILP (bound))
9f43ad85
RS
889 {
890 if (n > 0)
891 lim = ZV, lim_byte = ZV_BYTE;
892 else
893 lim = BEGV, lim_byte = BEGV_BYTE;
894 }
ca1d1d23
JB
895 else
896 {
897 CHECK_NUMBER_COERCE_MARKER (bound, 1);
898 lim = XINT (bound);
6ec8bbd2 899 if (n > 0 ? lim < PT : lim > PT)
ca1d1d23
JB
900 error ("Invalid search bound (wrong side of point)");
901 if (lim > ZV)
9f43ad85 902 lim = ZV, lim_byte = ZV_BYTE;
588d2fd5 903 else if (lim < BEGV)
9f43ad85 904 lim = BEGV, lim_byte = BEGV_BYTE;
588d2fd5
KH
905 else
906 lim_byte = CHAR_TO_BYTE (lim);
ca1d1d23
JB
907 }
908
9f43ad85 909 np = search_buffer (string, PT, PT_BYTE, lim, lim_byte, n, RE,
ca1d1d23 910 (!NILP (current_buffer->case_fold_search)
facdc750 911 ? current_buffer->case_canon_table
3135e9fd 912 : Qnil),
ca1d1d23 913 (!NILP (current_buffer->case_fold_search)
facdc750 914 ? current_buffer->case_eqv_table
3135e9fd 915 : Qnil),
b819a390 916 posix);
ca1d1d23
JB
917 if (np <= 0)
918 {
919 if (NILP (noerror))
920 return signal_failure (string);
921 if (!EQ (noerror, Qt))
922 {
923 if (lim < BEGV || lim > ZV)
924 abort ();
9f43ad85 925 SET_PT_BOTH (lim, lim_byte);
a5f217b8
RS
926 return Qnil;
927#if 0 /* This would be clean, but maybe programs depend on
928 a value of nil here. */
481399bf 929 np = lim;
a5f217b8 930#endif
ca1d1d23 931 }
481399bf
RS
932 else
933 return Qnil;
ca1d1d23
JB
934 }
935
936 if (np < BEGV || np > ZV)
937 abort ();
938
939 SET_PT (np);
940
941 return make_number (np);
942}
943\f
fa8ed3e0
RS
944/* Return 1 if REGEXP it matches just one constant string. */
945
b6d6a51c
KH
946static int
947trivial_regexp_p (regexp)
948 Lisp_Object regexp;
949{
fc932ac6 950 int len = STRING_BYTES (XSTRING (regexp));
b6d6a51c
KH
951 unsigned char *s = XSTRING (regexp)->data;
952 unsigned char c;
953 while (--len >= 0)
954 {
955 switch (*s++)
956 {
957 case '.': case '*': case '+': case '?': case '[': case '^': case '$':
958 return 0;
959 case '\\':
960 if (--len < 0)
961 return 0;
962 switch (*s++)
963 {
964 case '|': case '(': case ')': case '`': case '\'': case 'b':
965 case 'B': case '<': case '>': case 'w': case 'W': case 's':
866f60fd 966 case 'S': case '=':
5679531d 967 case 'c': case 'C': /* for categoryspec and notcategoryspec */
866f60fd 968 case '1': case '2': case '3': case '4': case '5':
b6d6a51c
KH
969 case '6': case '7': case '8': case '9':
970 return 0;
971 }
972 }
973 }
974 return 1;
975}
976
ca325161 977/* Search for the n'th occurrence of STRING in the current buffer,
ca1d1d23 978 starting at position POS and stopping at position LIM,
b819a390 979 treating STRING as a literal string if RE is false or as
ca1d1d23
JB
980 a regular expression if RE is true.
981
982 If N is positive, searching is forward and LIM must be greater than POS.
983 If N is negative, searching is backward and LIM must be less than POS.
984
facdc750 985 Returns -x if x occurrences remain to be found (x > 0),
ca1d1d23 986 or else the position at the beginning of the Nth occurrence
b819a390
RS
987 (if searching backward) or the end (if searching forward).
988
989 POSIX is nonzero if we want full backtracking (POSIX style)
990 for this pattern. 0 means backtrack only enough to get a valid match. */
ca1d1d23 991
aff2ce94
RS
992#define TRANSLATE(out, trt, d) \
993do \
994 { \
995 if (! NILP (trt)) \
996 { \
997 Lisp_Object temp; \
998 temp = Faref (trt, make_number (d)); \
999 if (INTEGERP (temp)) \
1000 out = XINT (temp); \
1001 else \
1002 out = d; \
1003 } \
1004 else \
1005 out = d; \
1006 } \
1007while (0)
facdc750 1008
b819a390 1009static int
9f43ad85
RS
1010search_buffer (string, pos, pos_byte, lim, lim_byte, n,
1011 RE, trt, inverse_trt, posix)
ca1d1d23
JB
1012 Lisp_Object string;
1013 int pos;
9f43ad85 1014 int pos_byte;
ca1d1d23 1015 int lim;
9f43ad85 1016 int lim_byte;
ca1d1d23
JB
1017 int n;
1018 int RE;
facdc750
RS
1019 Lisp_Object trt;
1020 Lisp_Object inverse_trt;
b819a390 1021 int posix;
ca1d1d23
JB
1022{
1023 int len = XSTRING (string)->size;
fc932ac6 1024 int len_byte = STRING_BYTES (XSTRING (string));
facdc750 1025 register int i;
ca1d1d23 1026
7074fde6
FP
1027 if (running_asynch_code)
1028 save_search_regs ();
1029
a7e4cdde 1030 /* Searching 0 times means don't move. */
ca1d1d23 1031 /* Null string is found at starting position. */
a7e4cdde 1032 if (len == 0 || n == 0)
ca325161
RS
1033 {
1034 set_search_regs (pos, 0);
1035 return pos;
1036 }
3f57a499 1037
b6d6a51c 1038 if (RE && !trivial_regexp_p (string))
ca1d1d23 1039 {
facdc750
RS
1040 unsigned char *p1, *p2;
1041 int s1, s2;
487282dc
KH
1042 struct re_pattern_buffer *bufp;
1043
0c8533c6
RS
1044 bufp = compile_pattern (string, &search_regs, trt, posix,
1045 !NILP (current_buffer->enable_multibyte_characters));
ca1d1d23 1046
ca1d1d23
JB
1047 immediate_quit = 1; /* Quit immediately if user types ^G,
1048 because letting this function finish
1049 can take too long. */
1050 QUIT; /* Do a pending quit right away,
1051 to avoid paradoxical behavior */
1052 /* Get pointers and sizes of the two strings
1053 that make up the visible portion of the buffer. */
1054
1055 p1 = BEGV_ADDR;
fa8ed3e0 1056 s1 = GPT_BYTE - BEGV_BYTE;
ca1d1d23 1057 p2 = GAP_END_ADDR;
fa8ed3e0 1058 s2 = ZV_BYTE - GPT_BYTE;
ca1d1d23
JB
1059 if (s1 < 0)
1060 {
1061 p2 = p1;
fa8ed3e0 1062 s2 = ZV_BYTE - BEGV_BYTE;
ca1d1d23
JB
1063 s1 = 0;
1064 }
1065 if (s2 < 0)
1066 {
fa8ed3e0 1067 s1 = ZV_BYTE - BEGV_BYTE;
ca1d1d23
JB
1068 s2 = 0;
1069 }
8bb43c28
RS
1070 re_match_object = Qnil;
1071
ca1d1d23
JB
1072 while (n < 0)
1073 {
42db823b 1074 int val;
487282dc 1075 val = re_search_2 (bufp, (char *) p1, s1, (char *) p2, s2,
4996330b
KH
1076 pos_byte - BEGV_BYTE, lim_byte - pos_byte,
1077 &search_regs,
42db823b 1078 /* Don't allow match past current point */
4996330b 1079 pos_byte - BEGV_BYTE);
ca1d1d23 1080 if (val == -2)
b6d6a51c
KH
1081 {
1082 matcher_overflow ();
1083 }
ca1d1d23
JB
1084 if (val >= 0)
1085 {
26aff150 1086 pos_byte = search_regs.start[0] + BEGV_BYTE;
4746118a 1087 for (i = 0; i < search_regs.num_regs; i++)
ca1d1d23
JB
1088 if (search_regs.start[i] >= 0)
1089 {
fa8ed3e0
RS
1090 search_regs.start[i]
1091 = BYTE_TO_CHAR (search_regs.start[i] + BEGV_BYTE);
1092 search_regs.end[i]
1093 = BYTE_TO_CHAR (search_regs.end[i] + BEGV_BYTE);
ca1d1d23 1094 }
a3668d92 1095 XSETBUFFER (last_thing_searched, current_buffer);
ca1d1d23
JB
1096 /* Set pos to the new position. */
1097 pos = search_regs.start[0];
1098 }
1099 else
1100 {
1101 immediate_quit = 0;
1102 return (n);
1103 }
1104 n++;
1105 }
1106 while (n > 0)
1107 {
42db823b 1108 int val;
487282dc 1109 val = re_search_2 (bufp, (char *) p1, s1, (char *) p2, s2,
4996330b
KH
1110 pos_byte - BEGV_BYTE, lim_byte - pos_byte,
1111 &search_regs,
1112 lim_byte - BEGV_BYTE);
ca1d1d23 1113 if (val == -2)
b6d6a51c
KH
1114 {
1115 matcher_overflow ();
1116 }
ca1d1d23
JB
1117 if (val >= 0)
1118 {
26aff150 1119 pos_byte = search_regs.end[0] + BEGV_BYTE;
4746118a 1120 for (i = 0; i < search_regs.num_regs; i++)
ca1d1d23
JB
1121 if (search_regs.start[i] >= 0)
1122 {
fa8ed3e0
RS
1123 search_regs.start[i]
1124 = BYTE_TO_CHAR (search_regs.start[i] + BEGV_BYTE);
1125 search_regs.end[i]
1126 = BYTE_TO_CHAR (search_regs.end[i] + BEGV_BYTE);
ca1d1d23 1127 }
a3668d92 1128 XSETBUFFER (last_thing_searched, current_buffer);
ca1d1d23
JB
1129 pos = search_regs.end[0];
1130 }
1131 else
1132 {
1133 immediate_quit = 0;
1134 return (0 - n);
1135 }
1136 n--;
1137 }
1138 immediate_quit = 0;
1139 return (pos);
1140 }
1141 else /* non-RE case */
1142 {
facdc750
RS
1143 unsigned char *raw_pattern, *pat;
1144 int raw_pattern_size;
1145 int raw_pattern_size_byte;
1146 unsigned char *patbuf;
1147 int multibyte = !NILP (current_buffer->enable_multibyte_characters);
1148 unsigned char *base_pat = XSTRING (string)->data;
1149 int charset_base = -1;
040272ce 1150 int boyer_moore_ok = 1;
facdc750
RS
1151
1152 /* MULTIBYTE says whether the text to be searched is multibyte.
1153 We must convert PATTERN to match that, or we will not really
1154 find things right. */
1155
1156 if (multibyte == STRING_MULTIBYTE (string))
1157 {
7276d3d8 1158 raw_pattern = (unsigned char *) XSTRING (string)->data;
facdc750 1159 raw_pattern_size = XSTRING (string)->size;
fc932ac6 1160 raw_pattern_size_byte = STRING_BYTES (XSTRING (string));
facdc750
RS
1161 }
1162 else if (multibyte)
1163 {
1164 raw_pattern_size = XSTRING (string)->size;
1165 raw_pattern_size_byte
1166 = count_size_as_multibyte (XSTRING (string)->data,
1167 raw_pattern_size);
7276d3d8 1168 raw_pattern = (unsigned char *) alloca (raw_pattern_size_byte + 1);
facdc750
RS
1169 copy_text (XSTRING (string)->data, raw_pattern,
1170 XSTRING (string)->size, 0, 1);
1171 }
1172 else
1173 {
1174 /* Converting multibyte to single-byte.
1175
1176 ??? Perhaps this conversion should be done in a special way
1177 by subtracting nonascii-insert-offset from each non-ASCII char,
1178 so that only the multibyte chars which really correspond to
1179 the chosen single-byte character set can possibly match. */
1180 raw_pattern_size = XSTRING (string)->size;
1181 raw_pattern_size_byte = XSTRING (string)->size;
7276d3d8 1182 raw_pattern = (unsigned char *) alloca (raw_pattern_size + 1);
facdc750 1183 copy_text (XSTRING (string)->data, raw_pattern,
fc932ac6 1184 STRING_BYTES (XSTRING (string)), 1, 0);
facdc750
RS
1185 }
1186
1187 /* Copy and optionally translate the pattern. */
1188 len = raw_pattern_size;
1189 len_byte = raw_pattern_size_byte;
1190 patbuf = (unsigned char *) alloca (len_byte);
1191 pat = patbuf;
1192 base_pat = raw_pattern;
1193 if (multibyte)
1194 {
1195 while (--len >= 0)
1196 {
daaa6ed8 1197 unsigned char str[MAX_MULTIBYTE_LENGTH];
aff2ce94 1198 int c, translated, inverse;
facdc750
RS
1199 int in_charlen, charlen;
1200
1201 /* If we got here and the RE flag is set, it's because we're
1202 dealing with a regexp known to be trivial, so the backslash
1203 just quotes the next character. */
1204 if (RE && *base_pat == '\\')
1205 {
1206 len--;
1207 len_byte--;
1208 base_pat++;
1209 }
1210
1211 c = STRING_CHAR_AND_LENGTH (base_pat, len_byte, in_charlen);
040272ce 1212
facdc750 1213 /* Translate the character, if requested. */
aff2ce94 1214 TRANSLATE (translated, trt, c);
facdc750
RS
1215 /* If translation changed the byte-length, go back
1216 to the original character. */
daaa6ed8 1217 charlen = CHAR_STRING (translated, str);
facdc750
RS
1218 if (in_charlen != charlen)
1219 {
1220 translated = c;
daaa6ed8 1221 charlen = CHAR_STRING (c, str);
facdc750
RS
1222 }
1223
5ffaf437
RS
1224 /* If we are searching for something strange,
1225 an invalid multibyte code, don't use boyer-moore. */
1226 if (! ASCII_BYTE_P (translated)
1227 && (charlen == 1 /* 8bit code */
1228 || charlen != in_charlen /* invalid multibyte code */
1229 ))
1230 boyer_moore_ok = 0;
1231
aff2ce94
RS
1232 TRANSLATE (inverse, inverse_trt, c);
1233
facdc750
RS
1234 /* Did this char actually get translated?
1235 Would any other char get translated into it? */
aff2ce94 1236 if (translated != c || inverse != c)
facdc750
RS
1237 {
1238 /* Keep track of which character set row
1239 contains the characters that need translation. */
5ffaf437 1240 int charset_base_code = c & ~CHAR_FIELD3_MASK;
facdc750
RS
1241 if (charset_base == -1)
1242 charset_base = charset_base_code;
1243 else if (charset_base != charset_base_code)
1244 /* If two different rows appear, needing translation,
1245 then we cannot use boyer_moore search. */
040272ce 1246 boyer_moore_ok = 0;
aff2ce94 1247 }
facdc750
RS
1248
1249 /* Store this character into the translated pattern. */
1250 bcopy (str, pat, charlen);
1251 pat += charlen;
1252 base_pat += in_charlen;
1253 len_byte -= in_charlen;
1254 }
1255 }
1256 else
1257 {
040272ce
KH
1258 /* Unibyte buffer. */
1259 charset_base = 0;
facdc750
RS
1260 while (--len >= 0)
1261 {
040272ce 1262 int c, translated;
facdc750
RS
1263
1264 /* If we got here and the RE flag is set, it's because we're
1265 dealing with a regexp known to be trivial, so the backslash
1266 just quotes the next character. */
1267 if (RE && *base_pat == '\\')
1268 {
1269 len--;
1270 base_pat++;
1271 }
1272 c = *base_pat++;
aff2ce94 1273 TRANSLATE (translated, trt, c);
facdc750
RS
1274 *pat++ = translated;
1275 }
1276 }
1277
1278 len_byte = pat - patbuf;
1279 len = raw_pattern_size;
1280 pat = base_pat = patbuf;
1281
040272ce 1282 if (boyer_moore_ok)
facdc750 1283 return boyer_moore (n, pat, len, len_byte, trt, inverse_trt,
aff2ce94
RS
1284 pos, pos_byte, lim, lim_byte,
1285 charset_base);
facdc750
RS
1286 else
1287 return simple_search (n, pat, len, len_byte, trt,
1288 pos, pos_byte, lim, lim_byte);
1289 }
1290}
1291\f
1292/* Do a simple string search N times for the string PAT,
1293 whose length is LEN/LEN_BYTE,
1294 from buffer position POS/POS_BYTE until LIM/LIM_BYTE.
1295 TRT is the translation table.
f8bd51c4 1296
facdc750
RS
1297 Return the character position where the match is found.
1298 Otherwise, if M matches remained to be found, return -M.
f8bd51c4 1299
facdc750
RS
1300 This kind of search works regardless of what is in PAT and
1301 regardless of what is in TRT. It is used in cases where
1302 boyer_moore cannot work. */
1303
1304static int
1305simple_search (n, pat, len, len_byte, trt, pos, pos_byte, lim, lim_byte)
1306 int n;
1307 unsigned char *pat;
1308 int len, len_byte;
1309 Lisp_Object trt;
1310 int pos, pos_byte;
1311 int lim, lim_byte;
1312{
1313 int multibyte = ! NILP (current_buffer->enable_multibyte_characters);
ab228c24 1314 int forward = n > 0;
facdc750
RS
1315
1316 if (lim > pos && multibyte)
1317 while (n > 0)
1318 {
1319 while (1)
f8bd51c4 1320 {
facdc750
RS
1321 /* Try matching at position POS. */
1322 int this_pos = pos;
1323 int this_pos_byte = pos_byte;
1324 int this_len = len;
1325 int this_len_byte = len_byte;
1326 unsigned char *p = pat;
1327 if (pos + len > lim)
1328 goto stop;
1329
1330 while (this_len > 0)
1331 {
1332 int charlen, buf_charlen;
ab228c24 1333 int pat_ch, buf_ch;
facdc750 1334
ab228c24 1335 pat_ch = STRING_CHAR_AND_LENGTH (p, this_len_byte, charlen);
facdc750
RS
1336 buf_ch = STRING_CHAR_AND_LENGTH (BYTE_POS_ADDR (this_pos_byte),
1337 ZV_BYTE - this_pos_byte,
1338 buf_charlen);
aff2ce94 1339 TRANSLATE (buf_ch, trt, buf_ch);
facdc750
RS
1340
1341 if (buf_ch != pat_ch)
1342 break;
ab228c24
RS
1343
1344 this_len_byte -= charlen;
1345 this_len--;
1346 p += charlen;
1347
1348 this_pos_byte += buf_charlen;
1349 this_pos++;
facdc750
RS
1350 }
1351
1352 if (this_len == 0)
1353 {
1354 pos += len;
1355 pos_byte += len_byte;
1356 break;
1357 }
1358
1359 INC_BOTH (pos, pos_byte);
f8bd51c4 1360 }
facdc750
RS
1361
1362 n--;
1363 }
1364 else if (lim > pos)
1365 while (n > 0)
1366 {
1367 while (1)
f8bd51c4 1368 {
facdc750
RS
1369 /* Try matching at position POS. */
1370 int this_pos = pos;
1371 int this_len = len;
1372 unsigned char *p = pat;
1373
1374 if (pos + len > lim)
1375 goto stop;
1376
1377 while (this_len > 0)
1378 {
1379 int pat_ch = *p++;
1380 int buf_ch = FETCH_BYTE (this_pos);
aff2ce94 1381 TRANSLATE (buf_ch, trt, buf_ch);
facdc750
RS
1382
1383 if (buf_ch != pat_ch)
1384 break;
ab228c24
RS
1385
1386 this_len--;
1387 this_pos++;
facdc750
RS
1388 }
1389
1390 if (this_len == 0)
1391 {
1392 pos += len;
1393 break;
1394 }
1395
1396 pos++;
f8bd51c4 1397 }
facdc750
RS
1398
1399 n--;
1400 }
1401 /* Backwards search. */
1402 else if (lim < pos && multibyte)
1403 while (n < 0)
1404 {
1405 while (1)
f8bd51c4 1406 {
facdc750
RS
1407 /* Try matching at position POS. */
1408 int this_pos = pos - len;
1409 int this_pos_byte = pos_byte - len_byte;
1410 int this_len = len;
1411 int this_len_byte = len_byte;
1412 unsigned char *p = pat;
1413
1414 if (pos - len < lim)
1415 goto stop;
1416
1417 while (this_len > 0)
1418 {
1419 int charlen, buf_charlen;
ab228c24 1420 int pat_ch, buf_ch;
facdc750 1421
ab228c24 1422 pat_ch = STRING_CHAR_AND_LENGTH (p, this_len_byte, charlen);
facdc750
RS
1423 buf_ch = STRING_CHAR_AND_LENGTH (BYTE_POS_ADDR (this_pos_byte),
1424 ZV_BYTE - this_pos_byte,
1425 buf_charlen);
aff2ce94 1426 TRANSLATE (buf_ch, trt, buf_ch);
facdc750
RS
1427
1428 if (buf_ch != pat_ch)
1429 break;
ab228c24
RS
1430
1431 this_len_byte -= charlen;
1432 this_len--;
1433 p += charlen;
1434 this_pos_byte += buf_charlen;
1435 this_pos++;
facdc750
RS
1436 }
1437
1438 if (this_len == 0)
1439 {
1440 pos -= len;
1441 pos_byte -= len_byte;
1442 break;
1443 }
1444
1445 DEC_BOTH (pos, pos_byte);
f8bd51c4
KH
1446 }
1447
facdc750
RS
1448 n++;
1449 }
1450 else if (lim < pos)
1451 while (n < 0)
1452 {
1453 while (1)
b6d6a51c 1454 {
facdc750
RS
1455 /* Try matching at position POS. */
1456 int this_pos = pos - len;
1457 int this_len = len;
1458 unsigned char *p = pat;
1459
1460 if (pos - len < lim)
1461 goto stop;
1462
1463 while (this_len > 0)
1464 {
1465 int pat_ch = *p++;
1466 int buf_ch = FETCH_BYTE (this_pos);
aff2ce94 1467 TRANSLATE (buf_ch, trt, buf_ch);
facdc750
RS
1468
1469 if (buf_ch != pat_ch)
1470 break;
ab228c24
RS
1471 this_len--;
1472 this_pos++;
facdc750
RS
1473 }
1474
1475 if (this_len == 0)
b6d6a51c 1476 {
facdc750
RS
1477 pos -= len;
1478 break;
b6d6a51c 1479 }
facdc750
RS
1480
1481 pos--;
b6d6a51c 1482 }
facdc750
RS
1483
1484 n++;
b6d6a51c 1485 }
facdc750
RS
1486
1487 stop:
1488 if (n == 0)
aff2ce94 1489 {
ab228c24
RS
1490 if (forward)
1491 set_search_regs ((multibyte ? pos_byte : pos) - len_byte, len_byte);
1492 else
1493 set_search_regs (multibyte ? pos_byte : pos, len_byte);
aff2ce94
RS
1494
1495 return pos;
1496 }
facdc750
RS
1497 else if (n > 0)
1498 return -n;
1499 else
1500 return n;
1501}
1502\f
1503/* Do Boyer-Moore search N times for the string PAT,
1504 whose length is LEN/LEN_BYTE,
1505 from buffer position POS/POS_BYTE until LIM/LIM_BYTE.
1506 DIRECTION says which direction we search in.
1507 TRT and INVERSE_TRT are translation tables.
1508
1509 This kind of search works if all the characters in PAT that have
1510 nontrivial translation are the same aside from the last byte. This
1511 makes it possible to translate just the last byte of a character,
1512 and do so after just a simple test of the context.
1513
1514 If that criterion is not satisfied, do not call this function. */
1515
1516static int
1517boyer_moore (n, base_pat, len, len_byte, trt, inverse_trt,
aff2ce94 1518 pos, pos_byte, lim, lim_byte, charset_base)
facdc750
RS
1519 int n;
1520 unsigned char *base_pat;
1521 int len, len_byte;
1522 Lisp_Object trt;
1523 Lisp_Object inverse_trt;
1524 int pos, pos_byte;
1525 int lim, lim_byte;
aff2ce94 1526 int charset_base;
facdc750
RS
1527{
1528 int direction = ((n > 0) ? 1 : -1);
1529 register int dirlen;
6bbd7a29 1530 int infinity, limit, k, stride_for_teases = 0;
facdc750
RS
1531 register int *BM_tab;
1532 int *BM_tab_base;
1533 register unsigned char *cursor, *p_limit;
1534 register int i, j;
cb6792d2 1535 unsigned char *pat, *pat_end;
facdc750
RS
1536 int multibyte = ! NILP (current_buffer->enable_multibyte_characters);
1537
1538 unsigned char simple_translate[0400];
6bbd7a29
GM
1539 int translate_prev_byte = 0;
1540 int translate_anteprev_byte = 0;
facdc750
RS
1541
1542#ifdef C_ALLOCA
1543 int BM_tab_space[0400];
1544 BM_tab = &BM_tab_space[0];
1545#else
1546 BM_tab = (int *) alloca (0400 * sizeof (int));
1547#endif
1548 /* The general approach is that we are going to maintain that we know */
1549 /* the first (closest to the present position, in whatever direction */
1550 /* we're searching) character that could possibly be the last */
1551 /* (furthest from present position) character of a valid match. We */
1552 /* advance the state of our knowledge by looking at that character */
1553 /* and seeing whether it indeed matches the last character of the */
1554 /* pattern. If it does, we take a closer look. If it does not, we */
1555 /* move our pointer (to putative last characters) as far as is */
1556 /* logically possible. This amount of movement, which I call a */
1557 /* stride, will be the length of the pattern if the actual character */
1558 /* appears nowhere in the pattern, otherwise it will be the distance */
1559 /* from the last occurrence of that character to the end of the */
1560 /* pattern. */
1561 /* As a coding trick, an enormous stride is coded into the table for */
1562 /* characters that match the last character. This allows use of only */
1563 /* a single test, a test for having gone past the end of the */
1564 /* permissible match region, to test for both possible matches (when */
1565 /* the stride goes past the end immediately) and failure to */
1566 /* match (where you get nudged past the end one stride at a time). */
1567
1568 /* Here we make a "mickey mouse" BM table. The stride of the search */
1569 /* is determined only by the last character of the putative match. */
1570 /* If that character does not match, we will stride the proper */
1571 /* distance to propose a match that superimposes it on the last */
1572 /* instance of a character that matches it (per trt), or misses */
1573 /* it entirely if there is none. */
1574
1575 dirlen = len_byte * direction;
1576 infinity = dirlen - (lim_byte + pos_byte + len_byte + len_byte) * direction;
cb6792d2
RS
1577
1578 /* Record position after the end of the pattern. */
1579 pat_end = base_pat + len_byte;
1580 /* BASE_PAT points to a character that we start scanning from.
1581 It is the first character in a forward search,
1582 the last character in a backward search. */
facdc750 1583 if (direction < 0)
cb6792d2
RS
1584 base_pat = pat_end - 1;
1585
facdc750
RS
1586 BM_tab_base = BM_tab;
1587 BM_tab += 0400;
1588 j = dirlen; /* to get it in a register */
1589 /* A character that does not appear in the pattern induces a */
1590 /* stride equal to the pattern length. */
1591 while (BM_tab_base != BM_tab)
1592 {
1593 *--BM_tab = j;
1594 *--BM_tab = j;
1595 *--BM_tab = j;
1596 *--BM_tab = j;
1597 }
1598
1599 /* We use this for translation, instead of TRT itself.
1600 We fill this in to handle the characters that actually
1601 occur in the pattern. Others don't matter anyway! */
1602 bzero (simple_translate, sizeof simple_translate);
1603 for (i = 0; i < 0400; i++)
1604 simple_translate[i] = i;
1605
1606 i = 0;
1607 while (i != infinity)
1608 {
cb6792d2 1609 unsigned char *ptr = base_pat + i;
facdc750
RS
1610 i += direction;
1611 if (i == dirlen)
1612 i = infinity;
1613 if (! NILP (trt))
ca1d1d23 1614 {
facdc750 1615 int ch;
aff2ce94 1616 int untranslated;
facdc750
RS
1617 int this_translated = 1;
1618
1619 if (multibyte
cb6792d2
RS
1620 /* Is *PTR the last byte of a character? */
1621 && (pat_end - ptr == 1 || CHAR_HEAD_P (ptr[1])))
ca1d1d23 1622 {
facdc750
RS
1623 unsigned char *charstart = ptr;
1624 while (! CHAR_HEAD_P (*charstart))
1625 charstart--;
aff2ce94 1626 untranslated = STRING_CHAR (charstart, ptr - charstart + 1);
6397418a 1627 if (charset_base == (untranslated & ~CHAR_FIELD3_MASK))
facdc750 1628 {
ab228c24 1629 TRANSLATE (ch, trt, untranslated);
aff2ce94
RS
1630 if (! CHAR_HEAD_P (*ptr))
1631 {
1632 translate_prev_byte = ptr[-1];
1633 if (! CHAR_HEAD_P (translate_prev_byte))
1634 translate_anteprev_byte = ptr[-2];
1635 }
facdc750 1636 }
aff2ce94 1637 else
ab228c24
RS
1638 {
1639 this_translated = 0;
1640 ch = *ptr;
1641 }
ca1d1d23 1642 }
facdc750 1643 else if (!multibyte)
aff2ce94 1644 TRANSLATE (ch, trt, *ptr);
ca1d1d23
JB
1645 else
1646 {
facdc750
RS
1647 ch = *ptr;
1648 this_translated = 0;
ca1d1d23 1649 }
facdc750 1650
ab228c24
RS
1651 if (ch > 0400)
1652 j = ((unsigned char) ch) | 0200;
1653 else
1654 j = (unsigned char) ch;
1655
facdc750
RS
1656 if (i == infinity)
1657 stride_for_teases = BM_tab[j];
ab228c24 1658
facdc750
RS
1659 BM_tab[j] = dirlen - i;
1660 /* A translation table is accompanied by its inverse -- see */
1661 /* comment following downcase_table for details */
1662 if (this_translated)
ab228c24
RS
1663 {
1664 int starting_ch = ch;
1665 int starting_j = j;
1666 while (1)
1667 {
1668 TRANSLATE (ch, inverse_trt, ch);
1669 if (ch > 0400)
1670 j = ((unsigned char) ch) | 0200;
1671 else
1672 j = (unsigned char) ch;
1673
1674 /* For all the characters that map into CH,
1675 set up simple_translate to map the last byte
1676 into STARTING_J. */
1677 simple_translate[j] = starting_j;
1678 if (ch == starting_ch)
1679 break;
1680 BM_tab[j] = dirlen - i;
1681 }
1682 }
facdc750
RS
1683 }
1684 else
1685 {
1686 j = *ptr;
1687
1688 if (i == infinity)
1689 stride_for_teases = BM_tab[j];
1690 BM_tab[j] = dirlen - i;
ca1d1d23 1691 }
facdc750
RS
1692 /* stride_for_teases tells how much to stride if we get a */
1693 /* match on the far character but are subsequently */
1694 /* disappointed, by recording what the stride would have been */
1695 /* for that character if the last character had been */
1696 /* different. */
1697 }
1698 infinity = dirlen - infinity;
1699 pos_byte += dirlen - ((direction > 0) ? direction : 0);
1700 /* loop invariant - POS_BYTE points at where last char (first
1701 char if reverse) of pattern would align in a possible match. */
1702 while (n != 0)
1703 {
1704 int tail_end;
1705 unsigned char *tail_end_ptr;
1706
1707 /* It's been reported that some (broken) compiler thinks that
1708 Boolean expressions in an arithmetic context are unsigned.
1709 Using an explicit ?1:0 prevents this. */
1710 if ((lim_byte - pos_byte - ((direction > 0) ? 1 : 0)) * direction
1711 < 0)
1712 return (n * (0 - direction));
1713 /* First we do the part we can by pointers (maybe nothing) */
1714 QUIT;
1715 pat = base_pat;
1716 limit = pos_byte - dirlen + direction;
67ce527d
KH
1717 if (direction > 0)
1718 {
1719 limit = BUFFER_CEILING_OF (limit);
1720 /* LIMIT is now the last (not beyond-last!) value POS_BYTE
1721 can take on without hitting edge of buffer or the gap. */
1722 limit = min (limit, pos_byte + 20000);
1723 limit = min (limit, lim_byte - 1);
1724 }
1725 else
1726 {
1727 limit = BUFFER_FLOOR_OF (limit);
1728 /* LIMIT is now the last (not beyond-last!) value POS_BYTE
1729 can take on without hitting edge of buffer or the gap. */
1730 limit = max (limit, pos_byte - 20000);
1731 limit = max (limit, lim_byte);
1732 }
facdc750
RS
1733 tail_end = BUFFER_CEILING_OF (pos_byte) + 1;
1734 tail_end_ptr = BYTE_POS_ADDR (tail_end);
1735
1736 if ((limit - pos_byte) * direction > 20)
ca1d1d23 1737 {
facdc750
RS
1738 unsigned char *p2;
1739
1740 p_limit = BYTE_POS_ADDR (limit);
1741 p2 = (cursor = BYTE_POS_ADDR (pos_byte));
1742 /* In this loop, pos + cursor - p2 is the surrogate for pos */
1743 while (1) /* use one cursor setting as long as i can */
ca1d1d23 1744 {
facdc750 1745 if (direction > 0) /* worth duplicating */
ca1d1d23 1746 {
facdc750
RS
1747 /* Use signed comparison if appropriate
1748 to make cursor+infinity sure to be > p_limit.
1749 Assuming that the buffer lies in a range of addresses
1750 that are all "positive" (as ints) or all "negative",
1751 either kind of comparison will work as long
1752 as we don't step by infinity. So pick the kind
1753 that works when we do step by infinity. */
1754 if ((EMACS_INT) (p_limit + infinity) > (EMACS_INT) p_limit)
1755 while ((EMACS_INT) cursor <= (EMACS_INT) p_limit)
1756 cursor += BM_tab[*cursor];
ca1d1d23 1757 else
facdc750
RS
1758 while ((EMACS_UINT) cursor <= (EMACS_UINT) p_limit)
1759 cursor += BM_tab[*cursor];
1760 }
1761 else
1762 {
1763 if ((EMACS_INT) (p_limit + infinity) < (EMACS_INT) p_limit)
1764 while ((EMACS_INT) cursor >= (EMACS_INT) p_limit)
1765 cursor += BM_tab[*cursor];
1766 else
1767 while ((EMACS_UINT) cursor >= (EMACS_UINT) p_limit)
1768 cursor += BM_tab[*cursor];
1769 }
ca1d1d23 1770/* If you are here, cursor is beyond the end of the searched region. */
facdc750
RS
1771/* This can happen if you match on the far character of the pattern, */
1772/* because the "stride" of that character is infinity, a number able */
1773/* to throw you well beyond the end of the search. It can also */
1774/* happen if you fail to match within the permitted region and would */
1775/* otherwise try a character beyond that region */
1776 if ((cursor - p_limit) * direction <= len_byte)
1777 break; /* a small overrun is genuine */
1778 cursor -= infinity; /* large overrun = hit */
1779 i = dirlen - direction;
1780 if (! NILP (trt))
1781 {
1782 while ((i -= direction) + direction != 0)
ca1d1d23 1783 {
facdc750
RS
1784 int ch;
1785 cursor -= direction;
1786 /* Translate only the last byte of a character. */
1787 if (! multibyte
1788 || ((cursor == tail_end_ptr
1789 || CHAR_HEAD_P (cursor[1]))
1790 && (CHAR_HEAD_P (cursor[0])
1791 || (translate_prev_byte == cursor[-1]
1792 && (CHAR_HEAD_P (translate_prev_byte)
1793 || translate_anteprev_byte == cursor[-2])))))
1794 ch = simple_translate[*cursor];
1795 else
1796 ch = *cursor;
1797 if (pat[i] != ch)
1798 break;
ca1d1d23 1799 }
facdc750
RS
1800 }
1801 else
1802 {
1803 while ((i -= direction) + direction != 0)
ca1d1d23 1804 {
facdc750
RS
1805 cursor -= direction;
1806 if (pat[i] != *cursor)
1807 break;
ca1d1d23 1808 }
facdc750
RS
1809 }
1810 cursor += dirlen - i - direction; /* fix cursor */
1811 if (i + direction == 0)
1812 {
1813 int position;
0c8533c6 1814
facdc750 1815 cursor -= direction;
1113d9db 1816
facdc750
RS
1817 position = pos_byte + cursor - p2 + ((direction > 0)
1818 ? 1 - len_byte : 0);
1819 set_search_regs (position, len_byte);
ca325161 1820
facdc750
RS
1821 if ((n -= direction) != 0)
1822 cursor += dirlen; /* to resume search */
ca1d1d23 1823 else
facdc750
RS
1824 return ((direction > 0)
1825 ? search_regs.end[0] : search_regs.start[0]);
ca1d1d23 1826 }
facdc750
RS
1827 else
1828 cursor += stride_for_teases; /* <sigh> we lose - */
ca1d1d23 1829 }
facdc750
RS
1830 pos_byte += cursor - p2;
1831 }
1832 else
1833 /* Now we'll pick up a clump that has to be done the hard */
1834 /* way because it covers a discontinuity */
1835 {
1836 limit = ((direction > 0)
1837 ? BUFFER_CEILING_OF (pos_byte - dirlen + 1)
1838 : BUFFER_FLOOR_OF (pos_byte - dirlen - 1));
1839 limit = ((direction > 0)
1840 ? min (limit + len_byte, lim_byte - 1)
1841 : max (limit - len_byte, lim_byte));
1842 /* LIMIT is now the last value POS_BYTE can have
1843 and still be valid for a possible match. */
1844 while (1)
ca1d1d23 1845 {
facdc750
RS
1846 /* This loop can be coded for space rather than */
1847 /* speed because it will usually run only once. */
1848 /* (the reach is at most len + 21, and typically */
1849 /* does not exceed len) */
1850 while ((limit - pos_byte) * direction >= 0)
1851 pos_byte += BM_tab[FETCH_BYTE (pos_byte)];
1852 /* now run the same tests to distinguish going off the */
1853 /* end, a match or a phony match. */
1854 if ((pos_byte - limit) * direction <= len_byte)
1855 break; /* ran off the end */
1856 /* Found what might be a match.
1857 Set POS_BYTE back to last (first if reverse) pos. */
1858 pos_byte -= infinity;
1859 i = dirlen - direction;
1860 while ((i -= direction) + direction != 0)
ca1d1d23 1861 {
facdc750
RS
1862 int ch;
1863 unsigned char *ptr;
1864 pos_byte -= direction;
1865 ptr = BYTE_POS_ADDR (pos_byte);
1866 /* Translate only the last byte of a character. */
1867 if (! multibyte
1868 || ((ptr == tail_end_ptr
1869 || CHAR_HEAD_P (ptr[1]))
1870 && (CHAR_HEAD_P (ptr[0])
1871 || (translate_prev_byte == ptr[-1]
1872 && (CHAR_HEAD_P (translate_prev_byte)
1873 || translate_anteprev_byte == ptr[-2])))))
1874 ch = simple_translate[*ptr];
1875 else
1876 ch = *ptr;
1877 if (pat[i] != ch)
1878 break;
1879 }
1880 /* Above loop has moved POS_BYTE part or all the way
1881 back to the first pos (last pos if reverse).
1882 Set it once again at the last (first if reverse) char. */
1883 pos_byte += dirlen - i- direction;
1884 if (i + direction == 0)
1885 {
1886 int position;
1887 pos_byte -= direction;
1113d9db 1888
facdc750 1889 position = pos_byte + ((direction > 0) ? 1 - len_byte : 0);
0c8533c6 1890
facdc750 1891 set_search_regs (position, len_byte);
ca325161 1892
facdc750
RS
1893 if ((n -= direction) != 0)
1894 pos_byte += dirlen; /* to resume search */
ca1d1d23 1895 else
facdc750
RS
1896 return ((direction > 0)
1897 ? search_regs.end[0] : search_regs.start[0]);
ca1d1d23 1898 }
facdc750
RS
1899 else
1900 pos_byte += stride_for_teases;
1901 }
1902 }
1903 /* We have done one clump. Can we continue? */
1904 if ((lim_byte - pos_byte) * direction < 0)
1905 return ((0 - n) * direction);
ca1d1d23 1906 }
facdc750 1907 return BYTE_TO_CHAR (pos_byte);
ca1d1d23 1908}
ca325161 1909
fa8ed3e0 1910/* Record beginning BEG_BYTE and end BEG_BYTE + NBYTES
a7e4cdde
RS
1911 for the overall match just found in the current buffer.
1912 Also clear out the match data for registers 1 and up. */
ca325161
RS
1913
1914static void
fa8ed3e0
RS
1915set_search_regs (beg_byte, nbytes)
1916 int beg_byte, nbytes;
ca325161 1917{
a7e4cdde
RS
1918 int i;
1919
ca325161
RS
1920 /* Make sure we have registers in which to store
1921 the match position. */
1922 if (search_regs.num_regs == 0)
1923 {
2d4a771a
RS
1924 search_regs.start = (regoff_t *) xmalloc (2 * sizeof (regoff_t));
1925 search_regs.end = (regoff_t *) xmalloc (2 * sizeof (regoff_t));
487282dc 1926 search_regs.num_regs = 2;
ca325161
RS
1927 }
1928
a7e4cdde
RS
1929 /* Clear out the other registers. */
1930 for (i = 1; i < search_regs.num_regs; i++)
1931 {
1932 search_regs.start[i] = -1;
1933 search_regs.end[i] = -1;
1934 }
1935
fa8ed3e0
RS
1936 search_regs.start[0] = BYTE_TO_CHAR (beg_byte);
1937 search_regs.end[0] = BYTE_TO_CHAR (beg_byte + nbytes);
a3668d92 1938 XSETBUFFER (last_thing_searched, current_buffer);
ca325161 1939}
ca1d1d23
JB
1940\f
1941/* Given a string of words separated by word delimiters,
1942 compute a regexp that matches those exact words
1943 separated by arbitrary punctuation. */
1944
1945static Lisp_Object
1946wordify (string)
1947 Lisp_Object string;
1948{
1949 register unsigned char *p, *o;
0c8533c6 1950 register int i, i_byte, len, punct_count = 0, word_count = 0;
ca1d1d23 1951 Lisp_Object val;
0c8533c6
RS
1952 int prev_c = 0;
1953 int adjust;
ca1d1d23
JB
1954
1955 CHECK_STRING (string, 0);
1956 p = XSTRING (string)->data;
1957 len = XSTRING (string)->size;
1958
0c8533c6
RS
1959 for (i = 0, i_byte = 0; i < len; )
1960 {
1961 int c;
1962
eb99a8dd 1963 FETCH_STRING_CHAR_ADVANCE (c, string, i, i_byte);
0c8533c6
RS
1964
1965 if (SYNTAX (c) != Sword)
1966 {
1967 punct_count++;
1968 if (i > 0 && SYNTAX (prev_c) == Sword)
1969 word_count++;
1970 }
ca1d1d23 1971
0c8533c6
RS
1972 prev_c = c;
1973 }
1974
1975 if (SYNTAX (prev_c) == Sword)
1976 word_count++;
1977 if (!word_count)
1978 return build_string ("");
1979
1980 adjust = - punct_count + 5 * (word_count - 1) + 4;
8a2df937
RS
1981 if (STRING_MULTIBYTE (string))
1982 val = make_uninit_multibyte_string (len + adjust,
1983 STRING_BYTES (XSTRING (string))
1984 + adjust);
1985 else
1986 val = make_uninit_string (len + adjust);
ca1d1d23
JB
1987
1988 o = XSTRING (val)->data;
1989 *o++ = '\\';
1990 *o++ = 'b';
1e9582d4 1991 prev_c = 0;
ca1d1d23 1992
1e9582d4
RS
1993 for (i = 0, i_byte = 0; i < len; )
1994 {
1995 int c;
1996 int i_byte_orig = i_byte;
1997
eb99a8dd 1998 FETCH_STRING_CHAR_ADVANCE (c, string, i, i_byte);
1e9582d4
RS
1999
2000 if (SYNTAX (c) == Sword)
2001 {
2002 bcopy (&XSTRING (string)->data[i_byte_orig], o,
2003 i_byte - i_byte_orig);
2004 o += i_byte - i_byte_orig;
2005 }
2006 else if (i > 0 && SYNTAX (prev_c) == Sword && --word_count)
2007 {
2008 *o++ = '\\';
2009 *o++ = 'W';
2010 *o++ = '\\';
2011 *o++ = 'W';
2012 *o++ = '*';
2013 }
2014
2015 prev_c = c;
2016 }
ca1d1d23
JB
2017
2018 *o++ = '\\';
2019 *o++ = 'b';
2020
2021 return val;
2022}
2023\f
2024DEFUN ("search-backward", Fsearch_backward, Ssearch_backward, 1, 4,
6af43974 2025 "MSearch backward: ",
ca1d1d23
JB
2026 "Search backward from point for STRING.\n\
2027Set point to the beginning of the occurrence found, and return point.\n\
2028An optional second argument bounds the search; it is a buffer position.\n\
2029The match found must not extend before that position.\n\
2030Optional third argument, if t, means if fail just return nil (no error).\n\
2031 If not nil and not t, position at limit of search and return nil.\n\
2032Optional fourth argument is repeat count--search for successive occurrences.\n\
d594a73b
EZ
2033\n\
2034Search case-sensitivity is determined by the value of the variable\n\
2035`case-fold-search', which see.\n\
2036\n\
ca1d1d23
JB
2037See also the functions `match-beginning', `match-end' and `replace-match'.")
2038 (string, bound, noerror, count)
2039 Lisp_Object string, bound, noerror, count;
2040{
b819a390 2041 return search_command (string, bound, noerror, count, -1, 0, 0);
ca1d1d23
JB
2042}
2043
6af43974 2044DEFUN ("search-forward", Fsearch_forward, Ssearch_forward, 1, 4, "MSearch: ",
ca1d1d23
JB
2045 "Search forward from point for STRING.\n\
2046Set point to the end of the occurrence found, and return point.\n\
2047An optional second argument bounds the search; it is a buffer position.\n\
2048The match found must not extend after that position. nil is equivalent\n\
2049 to (point-max).\n\
2050Optional third argument, if t, means if fail just return nil (no error).\n\
2051 If not nil and not t, move to limit of search and return nil.\n\
2052Optional fourth argument is repeat count--search for successive occurrences.\n\
d594a73b
EZ
2053\n\
2054Search case-sensitivity is determined by the value of the variable\n\
2055`case-fold-search', which see.\n\
2056\n\
ca1d1d23
JB
2057See also the functions `match-beginning', `match-end' and `replace-match'.")
2058 (string, bound, noerror, count)
2059 Lisp_Object string, bound, noerror, count;
2060{
b819a390 2061 return search_command (string, bound, noerror, count, 1, 0, 0);
ca1d1d23
JB
2062}
2063
2064DEFUN ("word-search-backward", Fword_search_backward, Sword_search_backward, 1, 4,
2065 "sWord search backward: ",
2066 "Search backward from point for STRING, ignoring differences in punctuation.\n\
2067Set point to the beginning of the occurrence found, and return point.\n\
2068An optional second argument bounds the search; it is a buffer position.\n\
2069The match found must not extend before that position.\n\
2070Optional third argument, if t, means if fail just return nil (no error).\n\
2071 If not nil and not t, move to limit of search and return nil.\n\
2072Optional fourth argument is repeat count--search for successive occurrences.")
2073 (string, bound, noerror, count)
2074 Lisp_Object string, bound, noerror, count;
2075{
b819a390 2076 return search_command (wordify (string), bound, noerror, count, -1, 1, 0);
ca1d1d23
JB
2077}
2078
2079DEFUN ("word-search-forward", Fword_search_forward, Sword_search_forward, 1, 4,
2080 "sWord search: ",
2081 "Search forward from point for STRING, ignoring differences in punctuation.\n\
2082Set point to the end of the occurrence found, and return point.\n\
2083An optional second argument bounds the search; it is a buffer position.\n\
2084The match found must not extend after that position.\n\
2085Optional third argument, if t, means if fail just return nil (no error).\n\
2086 If not nil and not t, move to limit of search and return nil.\n\
2087Optional fourth argument is repeat count--search for successive occurrences.")
2088 (string, bound, noerror, count)
2089 Lisp_Object string, bound, noerror, count;
2090{
b819a390 2091 return search_command (wordify (string), bound, noerror, count, 1, 1, 0);
ca1d1d23
JB
2092}
2093
2094DEFUN ("re-search-backward", Fre_search_backward, Sre_search_backward, 1, 4,
2095 "sRE search backward: ",
2096 "Search backward from point for match for regular expression REGEXP.\n\
2097Set point to the beginning of the match, and return point.\n\
2098The match found is the one starting last in the buffer\n\
19c0a730 2099and yet ending before the origin of the search.\n\
ca1d1d23
JB
2100An optional second argument bounds the search; it is a buffer position.\n\
2101The match found must start at or after that position.\n\
2102Optional third argument, if t, means if fail just return nil (no error).\n\
2103 If not nil and not t, move to limit of search and return nil.\n\
2104Optional fourth argument is repeat count--search for successive occurrences.\n\
9dddb23f 2105See also the functions `match-beginning', `match-end', `match-string',\n\
cb6560a1 2106and `replace-match'.")
19c0a730
KH
2107 (regexp, bound, noerror, count)
2108 Lisp_Object regexp, bound, noerror, count;
ca1d1d23 2109{
b819a390 2110 return search_command (regexp, bound, noerror, count, -1, 1, 0);
ca1d1d23
JB
2111}
2112
2113DEFUN ("re-search-forward", Fre_search_forward, Sre_search_forward, 1, 4,
2114 "sRE search: ",
2115 "Search forward from point for regular expression REGEXP.\n\
2116Set point to the end of the occurrence found, and return point.\n\
2117An optional second argument bounds the search; it is a buffer position.\n\
2118The match found must not extend after that position.\n\
2119Optional third argument, if t, means if fail just return nil (no error).\n\
2120 If not nil and not t, move to limit of search and return nil.\n\
2121Optional fourth argument is repeat count--search for successive occurrences.\n\
9dddb23f 2122See also the functions `match-beginning', `match-end', `match-string',\n\
cb6560a1 2123and `replace-match'.")
19c0a730
KH
2124 (regexp, bound, noerror, count)
2125 Lisp_Object regexp, bound, noerror, count;
ca1d1d23 2126{
b819a390
RS
2127 return search_command (regexp, bound, noerror, count, 1, 1, 0);
2128}
2129
2130DEFUN ("posix-search-backward", Fposix_search_backward, Sposix_search_backward, 1, 4,
2131 "sPosix search backward: ",
2132 "Search backward from point for match for regular expression REGEXP.\n\
2133Find the longest match in accord with Posix regular expression rules.\n\
2134Set point to the beginning of the match, and return point.\n\
2135The match found is the one starting last in the buffer\n\
2136and yet ending before the origin of the search.\n\
2137An optional second argument bounds the search; it is a buffer position.\n\
2138The match found must start at or after that position.\n\
2139Optional third argument, if t, means if fail just return nil (no error).\n\
2140 If not nil and not t, move to limit of search and return nil.\n\
2141Optional fourth argument is repeat count--search for successive occurrences.\n\
9dddb23f 2142See also the functions `match-beginning', `match-end', `match-string',\n\
cb6560a1 2143and `replace-match'.")
b819a390
RS
2144 (regexp, bound, noerror, count)
2145 Lisp_Object regexp, bound, noerror, count;
2146{
2147 return search_command (regexp, bound, noerror, count, -1, 1, 1);
2148}
2149
2150DEFUN ("posix-search-forward", Fposix_search_forward, Sposix_search_forward, 1, 4,
2151 "sPosix search: ",
2152 "Search forward from point for regular expression REGEXP.\n\
2153Find the longest match in accord with Posix regular expression rules.\n\
2154Set point to the end of the occurrence found, and return point.\n\
2155An optional second argument bounds the search; it is a buffer position.\n\
2156The match found must not extend after that position.\n\
2157Optional third argument, if t, means if fail just return nil (no error).\n\
2158 If not nil and not t, move to limit of search and return nil.\n\
2159Optional fourth argument is repeat count--search for successive occurrences.\n\
9dddb23f 2160See also the functions `match-beginning', `match-end', `match-string',\n\
cb6560a1 2161and `replace-match'.")
b819a390
RS
2162 (regexp, bound, noerror, count)
2163 Lisp_Object regexp, bound, noerror, count;
2164{
2165 return search_command (regexp, bound, noerror, count, 1, 1, 1);
ca1d1d23
JB
2166}
2167\f
d7a5ad5f 2168DEFUN ("replace-match", Freplace_match, Sreplace_match, 1, 5, 0,
ca1d1d23
JB
2169 "Replace text matched by last search with NEWTEXT.\n\
2170If second arg FIXEDCASE is non-nil, do not alter case of replacement text.\n\
5b9cf4b2
RS
2171Otherwise maybe capitalize the whole text, or maybe just word initials,\n\
2172based on the replaced text.\n\
2173If the replaced text has only capital letters\n\
2174and has at least one multiletter word, convert NEWTEXT to all caps.\n\
2175If the replaced text has at least one word starting with a capital letter,\n\
2176then capitalize each word in NEWTEXT.\n\n\
ca1d1d23
JB
2177If third arg LITERAL is non-nil, insert NEWTEXT literally.\n\
2178Otherwise treat `\\' as special:\n\
2179 `\\&' in NEWTEXT means substitute original matched text.\n\
2180 `\\N' means substitute what matched the Nth `\\(...\\)'.\n\
2181 If Nth parens didn't match, substitute nothing.\n\
2182 `\\\\' means insert one `\\'.\n\
1113d9db 2183FIXEDCASE and LITERAL are optional arguments.\n\
080c45fd
RS
2184Leaves point at end of replacement text.\n\
2185\n\
2186The optional fourth argument STRING can be a string to modify.\n\
2187In that case, this function creates and returns a new string\n\
d7a5ad5f
RS
2188which is made by replacing the part of STRING that was matched.\n\
2189\n\
2190The optional fifth argument SUBEXP specifies a subexpression of the match.\n\
2191It says to replace just that subexpression instead of the whole match.\n\
2192This is useful only after a regular expression search or match\n\
2193since only regular expressions have distinguished subexpressions.")
2194 (newtext, fixedcase, literal, string, subexp)
2195 Lisp_Object newtext, fixedcase, literal, string, subexp;
ca1d1d23
JB
2196{
2197 enum { nochange, all_caps, cap_initial } case_action;
ac3b28b1 2198 register int pos, pos_byte;
ca1d1d23 2199 int some_multiletter_word;
97832bd0 2200 int some_lowercase;
73dc8771 2201 int some_uppercase;
208767c3 2202 int some_nonuppercase_initial;
ca1d1d23
JB
2203 register int c, prevc;
2204 int inslen;
d7a5ad5f 2205 int sub;
3e18eecf 2206 int opoint, newpoint;
ca1d1d23 2207
16fdc568 2208 CHECK_STRING (newtext, 0);
ca1d1d23 2209
080c45fd
RS
2210 if (! NILP (string))
2211 CHECK_STRING (string, 4);
2212
ca1d1d23
JB
2213 case_action = nochange; /* We tried an initialization */
2214 /* but some C compilers blew it */
4746118a
JB
2215
2216 if (search_regs.num_regs <= 0)
2217 error ("replace-match called before any match found");
2218
d7a5ad5f
RS
2219 if (NILP (subexp))
2220 sub = 0;
2221 else
2222 {
2223 CHECK_NUMBER (subexp, 3);
2224 sub = XINT (subexp);
2225 if (sub < 0 || sub >= search_regs.num_regs)
2226 args_out_of_range (subexp, make_number (search_regs.num_regs));
2227 }
2228
080c45fd
RS
2229 if (NILP (string))
2230 {
d7a5ad5f
RS
2231 if (search_regs.start[sub] < BEGV
2232 || search_regs.start[sub] > search_regs.end[sub]
2233 || search_regs.end[sub] > ZV)
2234 args_out_of_range (make_number (search_regs.start[sub]),
2235 make_number (search_regs.end[sub]));
080c45fd
RS
2236 }
2237 else
2238 {
d7a5ad5f
RS
2239 if (search_regs.start[sub] < 0
2240 || search_regs.start[sub] > search_regs.end[sub]
2241 || search_regs.end[sub] > XSTRING (string)->size)
2242 args_out_of_range (make_number (search_regs.start[sub]),
2243 make_number (search_regs.end[sub]));
080c45fd 2244 }
ca1d1d23
JB
2245
2246 if (NILP (fixedcase))
2247 {
2248 /* Decide how to casify by examining the matched text. */
ac3b28b1 2249 int last;
ca1d1d23 2250
ac3b28b1
KH
2251 pos = search_regs.start[sub];
2252 last = search_regs.end[sub];
fa8ed3e0
RS
2253
2254 if (NILP (string))
ac3b28b1 2255 pos_byte = CHAR_TO_BYTE (pos);
fa8ed3e0 2256 else
ac3b28b1 2257 pos_byte = string_char_to_byte (string, pos);
fa8ed3e0 2258
ca1d1d23
JB
2259 prevc = '\n';
2260 case_action = all_caps;
2261
2262 /* some_multiletter_word is set nonzero if any original word
2263 is more than one letter long. */
2264 some_multiletter_word = 0;
97832bd0 2265 some_lowercase = 0;
208767c3 2266 some_nonuppercase_initial = 0;
73dc8771 2267 some_uppercase = 0;
ca1d1d23 2268
ac3b28b1 2269 while (pos < last)
ca1d1d23 2270 {
080c45fd 2271 if (NILP (string))
ac3b28b1
KH
2272 {
2273 c = FETCH_CHAR (pos_byte);
2274 INC_BOTH (pos, pos_byte);
2275 }
080c45fd 2276 else
ac3b28b1 2277 FETCH_STRING_CHAR_ADVANCE (c, string, pos, pos_byte);
080c45fd 2278
ca1d1d23
JB
2279 if (LOWERCASEP (c))
2280 {
2281 /* Cannot be all caps if any original char is lower case */
2282
97832bd0 2283 some_lowercase = 1;
ca1d1d23 2284 if (SYNTAX (prevc) != Sword)
208767c3 2285 some_nonuppercase_initial = 1;
ca1d1d23
JB
2286 else
2287 some_multiletter_word = 1;
2288 }
2289 else if (!NOCASEP (c))
2290 {
73dc8771 2291 some_uppercase = 1;
97832bd0 2292 if (SYNTAX (prevc) != Sword)
c4d460ce 2293 ;
97832bd0 2294 else
ca1d1d23
JB
2295 some_multiletter_word = 1;
2296 }
208767c3
RS
2297 else
2298 {
2299 /* If the initial is a caseless word constituent,
2300 treat that like a lowercase initial. */
2301 if (SYNTAX (prevc) != Sword)
2302 some_nonuppercase_initial = 1;
2303 }
ca1d1d23
JB
2304
2305 prevc = c;
2306 }
2307
97832bd0
RS
2308 /* Convert to all caps if the old text is all caps
2309 and has at least one multiletter word. */
2310 if (! some_lowercase && some_multiletter_word)
2311 case_action = all_caps;
c4d460ce 2312 /* Capitalize each word, if the old text has all capitalized words. */
208767c3 2313 else if (!some_nonuppercase_initial && some_multiletter_word)
ca1d1d23 2314 case_action = cap_initial;
208767c3 2315 else if (!some_nonuppercase_initial && some_uppercase)
73dc8771
KH
2316 /* Should x -> yz, operating on X, give Yz or YZ?
2317 We'll assume the latter. */
2318 case_action = all_caps;
97832bd0
RS
2319 else
2320 case_action = nochange;
ca1d1d23
JB
2321 }
2322
080c45fd
RS
2323 /* Do replacement in a string. */
2324 if (!NILP (string))
2325 {
2326 Lisp_Object before, after;
2327
2328 before = Fsubstring (string, make_number (0),
d7a5ad5f
RS
2329 make_number (search_regs.start[sub]));
2330 after = Fsubstring (string, make_number (search_regs.end[sub]), Qnil);
080c45fd 2331
636a5e28
RS
2332 /* Substitute parts of the match into NEWTEXT
2333 if desired. */
080c45fd
RS
2334 if (NILP (literal))
2335 {
d131e79c
RS
2336 int lastpos = 0;
2337 int lastpos_byte = 0;
080c45fd
RS
2338 /* We build up the substituted string in ACCUM. */
2339 Lisp_Object accum;
2340 Lisp_Object middle;
ac3b28b1 2341 int length = STRING_BYTES (XSTRING (newtext));
080c45fd
RS
2342
2343 accum = Qnil;
2344
ac3b28b1 2345 for (pos_byte = 0, pos = 0; pos_byte < length;)
080c45fd
RS
2346 {
2347 int substart = -1;
6bbd7a29 2348 int subend = 0;
1e79ec24 2349 int delbackslash = 0;
080c45fd 2350
0c8533c6
RS
2351 FETCH_STRING_CHAR_ADVANCE (c, newtext, pos, pos_byte);
2352
080c45fd
RS
2353 if (c == '\\')
2354 {
0c8533c6 2355 FETCH_STRING_CHAR_ADVANCE (c, newtext, pos, pos_byte);
eb99a8dd 2356
080c45fd
RS
2357 if (c == '&')
2358 {
d7a5ad5f
RS
2359 substart = search_regs.start[sub];
2360 subend = search_regs.end[sub];
080c45fd
RS
2361 }
2362 else if (c >= '1' && c <= '9' && c <= search_regs.num_regs + '0')
2363 {
ad10348f 2364 if (search_regs.start[c - '0'] >= 0)
080c45fd
RS
2365 {
2366 substart = search_regs.start[c - '0'];
2367 subend = search_regs.end[c - '0'];
2368 }
2369 }
1e79ec24
KH
2370 else if (c == '\\')
2371 delbackslash = 1;
636a5e28
RS
2372 else
2373 error ("Invalid use of `\\' in replacement text");
080c45fd
RS
2374 }
2375 if (substart >= 0)
2376 {
d131e79c
RS
2377 if (pos - 2 != lastpos)
2378 middle = substring_both (newtext, lastpos,
2379 lastpos_byte,
2380 pos - 2, pos_byte - 2);
080c45fd
RS
2381 else
2382 middle = Qnil;
2383 accum = concat3 (accum, middle,
0c8533c6
RS
2384 Fsubstring (string,
2385 make_number (substart),
080c45fd
RS
2386 make_number (subend)));
2387 lastpos = pos;
0c8533c6 2388 lastpos_byte = pos_byte;
080c45fd 2389 }
1e79ec24
KH
2390 else if (delbackslash)
2391 {
d131e79c
RS
2392 middle = substring_both (newtext, lastpos,
2393 lastpos_byte,
2394 pos - 1, pos_byte - 1);
0c8533c6 2395
1e79ec24
KH
2396 accum = concat2 (accum, middle);
2397 lastpos = pos;
0c8533c6 2398 lastpos_byte = pos_byte;
1e79ec24 2399 }
080c45fd
RS
2400 }
2401
d131e79c
RS
2402 if (pos != lastpos)
2403 middle = substring_both (newtext, lastpos,
2404 lastpos_byte,
0c8533c6 2405 pos, pos_byte);
080c45fd
RS
2406 else
2407 middle = Qnil;
2408
2409 newtext = concat2 (accum, middle);
2410 }
2411
636a5e28 2412 /* Do case substitution in NEWTEXT if desired. */
080c45fd
RS
2413 if (case_action == all_caps)
2414 newtext = Fupcase (newtext);
2415 else if (case_action == cap_initial)
2b2eead9 2416 newtext = Fupcase_initials (newtext);
080c45fd
RS
2417
2418 return concat3 (before, newtext, after);
2419 }
2420
b0eba991 2421 /* Record point, the move (quietly) to the start of the match. */
9160906f 2422 if (PT >= search_regs.end[sub])
b0eba991 2423 opoint = PT - ZV;
9160906f
RS
2424 else if (PT > search_regs.start[sub])
2425 opoint = search_regs.end[sub] - ZV;
b0eba991
RS
2426 else
2427 opoint = PT;
2428
fa8ed3e0 2429 TEMP_SET_PT (search_regs.start[sub]);
b0eba991 2430
9a76659d
JB
2431 /* We insert the replacement text before the old text, and then
2432 delete the original text. This means that markers at the
2433 beginning or end of the original will float to the corresponding
2434 position in the replacement. */
ca1d1d23 2435 if (!NILP (literal))
16fdc568 2436 Finsert_and_inherit (1, &newtext);
ca1d1d23
JB
2437 else
2438 {
ac3b28b1 2439 int length = STRING_BYTES (XSTRING (newtext));
68e69fbd
RS
2440 unsigned char *substed;
2441 int substed_alloc_size, substed_len;
3bc25e52
KH
2442 int buf_multibyte = !NILP (current_buffer->enable_multibyte_characters);
2443 int str_multibyte = STRING_MULTIBYTE (newtext);
2444 Lisp_Object rev_tbl;
2445
2446 rev_tbl= (!buf_multibyte && CHAR_TABLE_P (Vnonascii_translation_table)
2447 ? Fchar_table_extra_slot (Vnonascii_translation_table,
2448 make_number (0))
2449 : Qnil);
ac3b28b1 2450
68e69fbd
RS
2451 substed_alloc_size = length * 2 + 100;
2452 substed = (unsigned char *) xmalloc (substed_alloc_size + 1);
2453 substed_len = 0;
2454
3bc25e52
KH
2455 /* Go thru NEWTEXT, producing the actual text to insert in
2456 SUBSTED while adjusting multibyteness to that of the current
2457 buffer. */
ca1d1d23 2458
ac3b28b1 2459 for (pos_byte = 0, pos = 0; pos_byte < length;)
ca1d1d23 2460 {
68e69fbd 2461 unsigned char str[MAX_MULTIBYTE_LENGTH];
f8ce8a0d
GM
2462 unsigned char *add_stuff = NULL;
2463 int add_len = 0;
68e69fbd 2464 int idx = -1;
9a76659d 2465
3bc25e52
KH
2466 if (str_multibyte)
2467 {
eb99a8dd 2468 FETCH_STRING_CHAR_ADVANCE_NO_CHECK (c, newtext, pos, pos_byte);
3bc25e52
KH
2469 if (!buf_multibyte)
2470 c = multibyte_char_to_unibyte (c, rev_tbl);
2471 }
2472 else
2473 {
2474 /* Note that we don't have to increment POS. */
2475 c = XSTRING (newtext)->data[pos_byte++];
2476 if (buf_multibyte)
2477 c = unibyte_char_to_multibyte (c);
2478 }
ac3b28b1 2479
68e69fbd
RS
2480 /* Either set ADD_STUFF and ADD_LEN to the text to put in SUBSTED,
2481 or set IDX to a match index, which means put that part
2482 of the buffer text into SUBSTED. */
2483
ca1d1d23
JB
2484 if (c == '\\')
2485 {
3bc25e52
KH
2486 if (str_multibyte)
2487 {
eb99a8dd
KH
2488 FETCH_STRING_CHAR_ADVANCE_NO_CHECK (c, newtext,
2489 pos, pos_byte);
3bc25e52
KH
2490 if (!buf_multibyte && !SINGLE_BYTE_CHAR_P (c))
2491 c = multibyte_char_to_unibyte (c, rev_tbl);
2492 }
2493 else
2494 {
2495 c = XSTRING (newtext)->data[pos_byte++];
2496 if (buf_multibyte)
2497 c = unibyte_char_to_multibyte (c);
2498 }
2499
ca1d1d23 2500 if (c == '&')
68e69fbd 2501 idx = sub;
78445046 2502 else if (c >= '1' && c <= '9' && c <= search_regs.num_regs + '0')
ca1d1d23
JB
2503 {
2504 if (search_regs.start[c - '0'] >= 1)
68e69fbd 2505 idx = c - '0';
ca1d1d23 2506 }
636a5e28 2507 else if (c == '\\')
68e69fbd 2508 add_len = 1, add_stuff = "\\";
636a5e28 2509 else
3bc25e52
KH
2510 {
2511 xfree (substed);
2512 error ("Invalid use of `\\' in replacement text");
2513 }
ca1d1d23
JB
2514 }
2515 else
68e69fbd
RS
2516 {
2517 add_len = CHAR_STRING (c, str);
2518 add_stuff = str;
2519 }
2520
2521 /* If we want to copy part of a previous match,
2522 set up ADD_STUFF and ADD_LEN to point to it. */
2523 if (idx >= 0)
2524 {
2525 int begbyte = CHAR_TO_BYTE (search_regs.start[idx]);
2526 add_len = CHAR_TO_BYTE (search_regs.end[idx]) - begbyte;
2527 if (search_regs.start[idx] < GPT && GPT < search_regs.end[idx])
2528 move_gap (search_regs.start[idx]);
2529 add_stuff = BYTE_POS_ADDR (begbyte);
2530 }
2531
2532 /* Now the stuff we want to add to SUBSTED
2533 is invariably ADD_LEN bytes starting at ADD_STUFF. */
2534
2535 /* Make sure SUBSTED is big enough. */
2536 if (substed_len + add_len >= substed_alloc_size)
2537 {
2538 substed_alloc_size = substed_len + add_len + 500;
2539 substed = (unsigned char *) xrealloc (substed,
2540 substed_alloc_size + 1);
2541 }
2542
2543 /* Now add to the end of SUBSTED. */
f8ce8a0d
GM
2544 if (add_stuff)
2545 {
2546 bcopy (add_stuff, substed + substed_len, add_len);
2547 substed_len += add_len;
2548 }
ca1d1d23 2549 }
68e69fbd
RS
2550
2551 /* Now insert what we accumulated. */
2552 insert_and_inherit (substed, substed_len);
2553
2554 xfree (substed);
ca1d1d23
JB
2555 }
2556
6ec8bbd2 2557 inslen = PT - (search_regs.start[sub]);
d7a5ad5f 2558 del_range (search_regs.start[sub] + inslen, search_regs.end[sub] + inslen);
ca1d1d23
JB
2559
2560 if (case_action == all_caps)
6ec8bbd2 2561 Fupcase_region (make_number (PT - inslen), make_number (PT));
ca1d1d23 2562 else if (case_action == cap_initial)
6ec8bbd2 2563 Fupcase_initials_region (make_number (PT - inslen), make_number (PT));
b0eba991 2564
3e18eecf
RS
2565 newpoint = PT;
2566
b0eba991 2567 /* Put point back where it was in the text. */
8d808a65 2568 if (opoint <= 0)
fa8ed3e0 2569 TEMP_SET_PT (opoint + ZV);
b0eba991 2570 else
fa8ed3e0 2571 TEMP_SET_PT (opoint);
b0eba991
RS
2572
2573 /* Now move point "officially" to the start of the inserted replacement. */
3e18eecf 2574 move_if_not_intangible (newpoint);
b0eba991 2575
ca1d1d23
JB
2576 return Qnil;
2577}
2578\f
2579static Lisp_Object
2580match_limit (num, beginningp)
2581 Lisp_Object num;
2582 int beginningp;
2583{
2584 register int n;
2585
2586 CHECK_NUMBER (num, 0);
2587 n = XINT (num);
4746118a
JB
2588 if (n < 0 || n >= search_regs.num_regs)
2589 args_out_of_range (num, make_number (search_regs.num_regs));
2590 if (search_regs.num_regs <= 0
2591 || search_regs.start[n] < 0)
ca1d1d23
JB
2592 return Qnil;
2593 return (make_number ((beginningp) ? search_regs.start[n]
2594 : search_regs.end[n]));
2595}
2596
2597DEFUN ("match-beginning", Fmatch_beginning, Smatch_beginning, 1, 1, 0,
2598 "Return position of start of text matched by last search.\n\
5806161b
EN
2599SUBEXP, a number, specifies which parenthesized expression in the last\n\
2600 regexp.\n\
2601Value is nil if SUBEXPth pair didn't match, or there were less than\n\
2602 SUBEXP pairs.\n\
ca1d1d23 2603Zero means the entire text matched by the whole regexp or whole string.")
5806161b
EN
2604 (subexp)
2605 Lisp_Object subexp;
ca1d1d23 2606{
5806161b 2607 return match_limit (subexp, 1);
ca1d1d23
JB
2608}
2609
2610DEFUN ("match-end", Fmatch_end, Smatch_end, 1, 1, 0,
2611 "Return position of end of text matched by last search.\n\
5806161b
EN
2612SUBEXP, a number, specifies which parenthesized expression in the last\n\
2613 regexp.\n\
2614Value is nil if SUBEXPth pair didn't match, or there were less than\n\
2615 SUBEXP pairs.\n\
ca1d1d23 2616Zero means the entire text matched by the whole regexp or whole string.")
5806161b
EN
2617 (subexp)
2618 Lisp_Object subexp;
ca1d1d23 2619{
5806161b 2620 return match_limit (subexp, 0);
ca1d1d23
JB
2621}
2622
56256c2a 2623DEFUN ("match-data", Fmatch_data, Smatch_data, 0, 2, 0,
ca1d1d23
JB
2624 "Return a list containing all info on what the last search matched.\n\
2625Element 2N is `(match-beginning N)'; element 2N + 1 is `(match-end N)'.\n\
2626All the elements are markers or nil (nil if the Nth pair didn't match)\n\
2627if the last match was on a buffer; integers or nil if a string was matched.\n\
56256c2a
RS
2628Use `store-match-data' to reinstate the data in this list.\n\
2629\n\
2630If INTEGERS (the optional first argument) is non-nil, always use integers\n\
8ca821e9 2631\(rather than markers) to represent buffer positions.\n\
56256c2a
RS
2632If REUSE is a list, reuse it as part of the value. If REUSE is long enough\n\
2633to hold all the values, and if INTEGERS is non-nil, no consing is done.")
2634 (integers, reuse)
2635 Lisp_Object integers, reuse;
ca1d1d23 2636{
56256c2a 2637 Lisp_Object tail, prev;
4746118a 2638 Lisp_Object *data;
ca1d1d23
JB
2639 int i, len;
2640
daa37602 2641 if (NILP (last_thing_searched))
c36bcf1b 2642 return Qnil;
daa37602 2643
6bbd7a29
GM
2644 prev = Qnil;
2645
4746118a
JB
2646 data = (Lisp_Object *) alloca ((2 * search_regs.num_regs)
2647 * sizeof (Lisp_Object));
2648
ca1d1d23 2649 len = -1;
4746118a 2650 for (i = 0; i < search_regs.num_regs; i++)
ca1d1d23
JB
2651 {
2652 int start = search_regs.start[i];
2653 if (start >= 0)
2654 {
56256c2a
RS
2655 if (EQ (last_thing_searched, Qt)
2656 || ! NILP (integers))
ca1d1d23 2657 {
c235cce7
KH
2658 XSETFASTINT (data[2 * i], start);
2659 XSETFASTINT (data[2 * i + 1], search_regs.end[i]);
ca1d1d23 2660 }
0ed62dc7 2661 else if (BUFFERP (last_thing_searched))
ca1d1d23
JB
2662 {
2663 data[2 * i] = Fmake_marker ();
daa37602
JB
2664 Fset_marker (data[2 * i],
2665 make_number (start),
2666 last_thing_searched);
ca1d1d23
JB
2667 data[2 * i + 1] = Fmake_marker ();
2668 Fset_marker (data[2 * i + 1],
daa37602
JB
2669 make_number (search_regs.end[i]),
2670 last_thing_searched);
ca1d1d23 2671 }
daa37602
JB
2672 else
2673 /* last_thing_searched must always be Qt, a buffer, or Qnil. */
2674 abort ();
2675
ca1d1d23
JB
2676 len = i;
2677 }
2678 else
2679 data[2 * i] = data [2 * i + 1] = Qnil;
2680 }
56256c2a
RS
2681
2682 /* If REUSE is not usable, cons up the values and return them. */
2683 if (! CONSP (reuse))
2684 return Flist (2 * len + 2, data);
2685
2686 /* If REUSE is a list, store as many value elements as will fit
2687 into the elements of REUSE. */
2688 for (i = 0, tail = reuse; CONSP (tail);
c1d497be 2689 i++, tail = XCDR (tail))
56256c2a
RS
2690 {
2691 if (i < 2 * len + 2)
c1d497be 2692 XCAR (tail) = data[i];
56256c2a 2693 else
c1d497be 2694 XCAR (tail) = Qnil;
56256c2a
RS
2695 prev = tail;
2696 }
2697
2698 /* If we couldn't fit all value elements into REUSE,
2699 cons up the rest of them and add them to the end of REUSE. */
2700 if (i < 2 * len + 2)
c1d497be 2701 XCDR (prev) = Flist (2 * len + 2 - i, data + i);
56256c2a
RS
2702
2703 return reuse;
ca1d1d23
JB
2704}
2705
2706
3f1c005b 2707DEFUN ("set-match-data", Fset_match_data, Sset_match_data, 1, 1, 0,
ca1d1d23
JB
2708 "Set internal data on last search match from elements of LIST.\n\
2709LIST should have been created by calling `match-data' previously.")
2710 (list)
2711 register Lisp_Object list;
2712{
2713 register int i;
2714 register Lisp_Object marker;
2715
7074fde6
FP
2716 if (running_asynch_code)
2717 save_search_regs ();
2718
ca1d1d23 2719 if (!CONSP (list) && !NILP (list))
b37902c8 2720 list = wrong_type_argument (Qconsp, list);
ca1d1d23 2721
daa37602
JB
2722 /* Unless we find a marker with a buffer in LIST, assume that this
2723 match data came from a string. */
2724 last_thing_searched = Qt;
2725
4746118a
JB
2726 /* Allocate registers if they don't already exist. */
2727 {
d084e942 2728 int length = XFASTINT (Flength (list)) / 2;
4746118a
JB
2729
2730 if (length > search_regs.num_regs)
2731 {
1113d9db
JB
2732 if (search_regs.num_regs == 0)
2733 {
2734 search_regs.start
2735 = (regoff_t *) xmalloc (length * sizeof (regoff_t));
2736 search_regs.end
2737 = (regoff_t *) xmalloc (length * sizeof (regoff_t));
2738 }
4746118a 2739 else
1113d9db
JB
2740 {
2741 search_regs.start
2742 = (regoff_t *) xrealloc (search_regs.start,
2743 length * sizeof (regoff_t));
2744 search_regs.end
2745 = (regoff_t *) xrealloc (search_regs.end,
2746 length * sizeof (regoff_t));
2747 }
4746118a 2748
487282dc 2749 search_regs.num_regs = length;
4746118a
JB
2750 }
2751 }
2752
2753 for (i = 0; i < search_regs.num_regs; i++)
ca1d1d23
JB
2754 {
2755 marker = Fcar (list);
2756 if (NILP (marker))
2757 {
2758 search_regs.start[i] = -1;
2759 list = Fcdr (list);
2760 }
2761 else
2762 {
0ed62dc7 2763 if (MARKERP (marker))
daa37602
JB
2764 {
2765 if (XMARKER (marker)->buffer == 0)
c235cce7 2766 XSETFASTINT (marker, 0);
daa37602 2767 else
a3668d92 2768 XSETBUFFER (last_thing_searched, XMARKER (marker)->buffer);
daa37602 2769 }
ca1d1d23
JB
2770
2771 CHECK_NUMBER_COERCE_MARKER (marker, 0);
2772 search_regs.start[i] = XINT (marker);
2773 list = Fcdr (list);
2774
2775 marker = Fcar (list);
0ed62dc7 2776 if (MARKERP (marker) && XMARKER (marker)->buffer == 0)
c235cce7 2777 XSETFASTINT (marker, 0);
ca1d1d23
JB
2778
2779 CHECK_NUMBER_COERCE_MARKER (marker, 0);
2780 search_regs.end[i] = XINT (marker);
2781 }
2782 list = Fcdr (list);
2783 }
2784
2785 return Qnil;
2786}
2787
7074fde6
FP
2788/* If non-zero the match data have been saved in saved_search_regs
2789 during the execution of a sentinel or filter. */
75ebf74b 2790static int search_regs_saved;
7074fde6
FP
2791static struct re_registers saved_search_regs;
2792
2793/* Called from Flooking_at, Fstring_match, search_buffer, Fstore_match_data
2794 if asynchronous code (filter or sentinel) is running. */
2795static void
2796save_search_regs ()
2797{
2798 if (!search_regs_saved)
2799 {
2800 saved_search_regs.num_regs = search_regs.num_regs;
2801 saved_search_regs.start = search_regs.start;
2802 saved_search_regs.end = search_regs.end;
2803 search_regs.num_regs = 0;
2d4a771a
RS
2804 search_regs.start = 0;
2805 search_regs.end = 0;
7074fde6
FP
2806
2807 search_regs_saved = 1;
2808 }
2809}
2810
2811/* Called upon exit from filters and sentinels. */
2812void
2813restore_match_data ()
2814{
2815 if (search_regs_saved)
2816 {
2817 if (search_regs.num_regs > 0)
2818 {
2819 xfree (search_regs.start);
2820 xfree (search_regs.end);
2821 }
2822 search_regs.num_regs = saved_search_regs.num_regs;
2823 search_regs.start = saved_search_regs.start;
2824 search_regs.end = saved_search_regs.end;
2825
2826 search_regs_saved = 0;
2827 }
2828}
2829
ca1d1d23
JB
2830/* Quote a string to inactivate reg-expr chars */
2831
2832DEFUN ("regexp-quote", Fregexp_quote, Sregexp_quote, 1, 1, 0,
2833 "Return a regexp string which matches exactly STRING and nothing else.")
5806161b
EN
2834 (string)
2835 Lisp_Object string;
ca1d1d23
JB
2836{
2837 register unsigned char *in, *out, *end;
2838 register unsigned char *temp;
0c8533c6 2839 int backslashes_added = 0;
ca1d1d23 2840
5806161b 2841 CHECK_STRING (string, 0);
ca1d1d23 2842
fc932ac6 2843 temp = (unsigned char *) alloca (STRING_BYTES (XSTRING (string)) * 2);
ca1d1d23
JB
2844
2845 /* Now copy the data into the new string, inserting escapes. */
2846
5806161b 2847 in = XSTRING (string)->data;
fc932ac6 2848 end = in + STRING_BYTES (XSTRING (string));
ca1d1d23
JB
2849 out = temp;
2850
2851 for (; in != end; in++)
2852 {
2853 if (*in == '[' || *in == ']'
2854 || *in == '*' || *in == '.' || *in == '\\'
2855 || *in == '?' || *in == '+'
2856 || *in == '^' || *in == '$')
0c8533c6 2857 *out++ = '\\', backslashes_added++;
ca1d1d23
JB
2858 *out++ = *in;
2859 }
2860
3f8100f1 2861 return make_specified_string (temp,
0c8533c6 2862 XSTRING (string)->size + backslashes_added,
3f8100f1
RS
2863 out - temp,
2864 STRING_MULTIBYTE (string));
ca1d1d23
JB
2865}
2866\f
dfcf069d 2867void
ca1d1d23
JB
2868syms_of_search ()
2869{
2870 register int i;
2871
487282dc
KH
2872 for (i = 0; i < REGEXP_CACHE_SIZE; ++i)
2873 {
2874 searchbufs[i].buf.allocated = 100;
2875 searchbufs[i].buf.buffer = (unsigned char *) malloc (100);
2876 searchbufs[i].buf.fastmap = searchbufs[i].fastmap;
2877 searchbufs[i].regexp = Qnil;
2878 staticpro (&searchbufs[i].regexp);
2879 searchbufs[i].next = (i == REGEXP_CACHE_SIZE-1 ? 0 : &searchbufs[i+1]);
2880 }
2881 searchbuf_head = &searchbufs[0];
ca1d1d23
JB
2882
2883 Qsearch_failed = intern ("search-failed");
2884 staticpro (&Qsearch_failed);
2885 Qinvalid_regexp = intern ("invalid-regexp");
2886 staticpro (&Qinvalid_regexp);
2887
2888 Fput (Qsearch_failed, Qerror_conditions,
2889 Fcons (Qsearch_failed, Fcons (Qerror, Qnil)));
2890 Fput (Qsearch_failed, Qerror_message,
2891 build_string ("Search failed"));
2892
2893 Fput (Qinvalid_regexp, Qerror_conditions,
2894 Fcons (Qinvalid_regexp, Fcons (Qerror, Qnil)));
2895 Fput (Qinvalid_regexp, Qerror_message,
2896 build_string ("Invalid regexp"));
2897
daa37602
JB
2898 last_thing_searched = Qnil;
2899 staticpro (&last_thing_searched);
2900
ca1d1d23 2901 defsubr (&Slooking_at);
b819a390
RS
2902 defsubr (&Sposix_looking_at);
2903 defsubr (&Sstring_match);
2904 defsubr (&Sposix_string_match);
ca1d1d23
JB
2905 defsubr (&Ssearch_forward);
2906 defsubr (&Ssearch_backward);
2907 defsubr (&Sword_search_forward);
2908 defsubr (&Sword_search_backward);
2909 defsubr (&Sre_search_forward);
2910 defsubr (&Sre_search_backward);
b819a390
RS
2911 defsubr (&Sposix_search_forward);
2912 defsubr (&Sposix_search_backward);
ca1d1d23
JB
2913 defsubr (&Sreplace_match);
2914 defsubr (&Smatch_beginning);
2915 defsubr (&Smatch_end);
2916 defsubr (&Smatch_data);
3f1c005b 2917 defsubr (&Sset_match_data);
ca1d1d23
JB
2918 defsubr (&Sregexp_quote);
2919}