(rmail-edit-current-message, rmail-cease-edit):
[bpt/emacs.git] / src / search.c
CommitLineData
ca1d1d23 1/* String search routines for GNU Emacs.
31c8f881 2 Copyright (C) 1985, 86, 87, 93, 94, 97, 1998 Free Software Foundation, Inc.
ca1d1d23
JB
3
4This file is part of GNU Emacs.
5
6GNU Emacs is free software; you can redistribute it and/or modify
7it under the terms of the GNU General Public License as published by
7c938215 8the Free Software Foundation; either version 2, or (at your option)
ca1d1d23
JB
9any later version.
10
11GNU Emacs is distributed in the hope that it will be useful,
12but WITHOUT ANY WARRANTY; without even the implied warranty of
13MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14GNU General Public License for more details.
15
16You should have received a copy of the GNU General Public License
17along with GNU Emacs; see the file COPYING. If not, write to
3b7ad313
EN
18the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
19Boston, MA 02111-1307, USA. */
ca1d1d23
JB
20
21
18160b98 22#include <config.h>
dfcf069d
AS
23#ifdef STDC_HEADERS
24#include <stdlib.h>
25#endif
ca1d1d23
JB
26#include "lisp.h"
27#include "syntax.h"
5679531d 28#include "category.h"
ca1d1d23 29#include "buffer.h"
5679531d 30#include "charset.h"
9169c321 31#include "region-cache.h"
ca1d1d23 32#include "commands.h"
9ac0d9e0 33#include "blockinput.h"
bf1760bb 34#include "intervals.h"
4746118a 35
ca1d1d23
JB
36#include <sys/types.h>
37#include "regex.h"
38
67ce527d
KH
39#define min(a, b) ((a) < (b) ? (a) : (b))
40#define max(a, b) ((a) > (b) ? (a) : (b))
41
1d288aef 42#define REGEXP_CACHE_SIZE 20
ca1d1d23 43
487282dc
KH
44/* If the regexp is non-nil, then the buffer contains the compiled form
45 of that regexp, suitable for searching. */
1d288aef
RS
46struct regexp_cache
47{
487282dc
KH
48 struct regexp_cache *next;
49 Lisp_Object regexp;
50 struct re_pattern_buffer buf;
51 char fastmap[0400];
b819a390
RS
52 /* Nonzero means regexp was compiled to do full POSIX backtracking. */
53 char posix;
487282dc 54};
ca1d1d23 55
487282dc
KH
56/* The instances of that struct. */
57struct regexp_cache searchbufs[REGEXP_CACHE_SIZE];
ca1d1d23 58
487282dc
KH
59/* The head of the linked list; points to the most recently used buffer. */
60struct regexp_cache *searchbuf_head;
ca1d1d23 61
ca1d1d23 62
4746118a
JB
63/* Every call to re_match, etc., must pass &search_regs as the regs
64 argument unless you can show it is unnecessary (i.e., if re_match
65 is certainly going to be called again before region-around-match
66 can be called).
67
68 Since the registers are now dynamically allocated, we need to make
69 sure not to refer to the Nth register before checking that it has
1113d9db
JB
70 been allocated by checking search_regs.num_regs.
71
72 The regex code keeps track of whether it has allocated the search
487282dc
KH
73 buffer using bits in the re_pattern_buffer. This means that whenever
74 you compile a new pattern, it completely forgets whether it has
1113d9db
JB
75 allocated any registers, and will allocate new registers the next
76 time you call a searching or matching function. Therefore, we need
77 to call re_set_registers after compiling a new pattern or after
78 setting the match registers, so that the regex functions will be
79 able to free or re-allocate it properly. */
ca1d1d23
JB
80static struct re_registers search_regs;
81
daa37602
JB
82/* The buffer in which the last search was performed, or
83 Qt if the last search was done in a string;
84 Qnil if no searching has been done yet. */
85static Lisp_Object last_thing_searched;
ca1d1d23 86
8e6208c5 87/* error condition signaled when regexp compile_pattern fails */
ca1d1d23
JB
88
89Lisp_Object Qinvalid_regexp;
90
ca325161 91static void set_search_regs ();
044f81f1 92static void save_search_regs ();
facdc750
RS
93static int simple_search ();
94static int boyer_moore ();
b819a390
RS
95static int search_buffer ();
96
ca1d1d23
JB
97static void
98matcher_overflow ()
99{
100 error ("Stack overflow in regexp matcher");
101}
102
103#ifdef __STDC__
104#define CONST const
105#else
106#define CONST
107#endif
108
b819a390
RS
109/* Compile a regexp and signal a Lisp error if anything goes wrong.
110 PATTERN is the pattern to compile.
111 CP is the place to put the result.
facdc750 112 TRANSLATE is a translation table for ignoring case, or nil for none.
b819a390
RS
113 REGP is the structure that says where to store the "register"
114 values that will result from matching this pattern.
115 If it is 0, we should compile the pattern not to record any
116 subexpression bounds.
117 POSIX is nonzero if we want full backtracking (POSIX style)
5679531d
KH
118 for this pattern. 0 means backtrack only enough to get a valid match.
119 MULTIBYTE is nonzero if we want to handle multibyte characters in
120 PATTERN. 0 means all multibyte characters are recognized just as
121 sequences of binary data. */
ca1d1d23 122
487282dc 123static void
5679531d 124compile_pattern_1 (cp, pattern, translate, regp, posix, multibyte)
487282dc 125 struct regexp_cache *cp;
ca1d1d23 126 Lisp_Object pattern;
facdc750 127 Lisp_Object translate;
487282dc 128 struct re_registers *regp;
b819a390 129 int posix;
5679531d 130 int multibyte;
ca1d1d23 131{
7276d3d8 132 unsigned char *raw_pattern;
f8bd51c4 133 int raw_pattern_size;
d451e4db 134 char *val;
b819a390 135 reg_syntax_t old;
ca1d1d23 136
f8bd51c4
KH
137 /* MULTIBYTE says whether the text to be searched is multibyte.
138 We must convert PATTERN to match that, or we will not really
139 find things right. */
140
141 if (multibyte == STRING_MULTIBYTE (pattern))
142 {
7276d3d8 143 raw_pattern = (unsigned char *) XSTRING (pattern)->data;
fc932ac6 144 raw_pattern_size = STRING_BYTES (XSTRING (pattern));
f8bd51c4
KH
145 }
146 else if (multibyte)
147 {
148 raw_pattern_size = count_size_as_multibyte (XSTRING (pattern)->data,
149 XSTRING (pattern)->size);
7276d3d8 150 raw_pattern = (unsigned char *) alloca (raw_pattern_size + 1);
f8bd51c4
KH
151 copy_text (XSTRING (pattern)->data, raw_pattern,
152 XSTRING (pattern)->size, 0, 1);
153 }
154 else
155 {
156 /* Converting multibyte to single-byte.
157
158 ??? Perhaps this conversion should be done in a special way
159 by subtracting nonascii-insert-offset from each non-ASCII char,
160 so that only the multibyte chars which really correspond to
161 the chosen single-byte character set can possibly match. */
162 raw_pattern_size = XSTRING (pattern)->size;
7276d3d8 163 raw_pattern = (unsigned char *) alloca (raw_pattern_size + 1);
f8bd51c4 164 copy_text (XSTRING (pattern)->data, raw_pattern,
fc932ac6 165 STRING_BYTES (XSTRING (pattern)), 1, 0);
f8bd51c4
KH
166 }
167
487282dc 168 cp->regexp = Qnil;
59fab369 169 cp->buf.translate = (! NILP (translate) ? translate : make_number (0));
b819a390 170 cp->posix = posix;
5679531d 171 cp->buf.multibyte = multibyte;
9ac0d9e0 172 BLOCK_INPUT;
b819a390
RS
173 old = re_set_syntax (RE_SYNTAX_EMACS
174 | (posix ? 0 : RE_NO_POSIX_BACKTRACKING));
7276d3d8
RS
175 val = (char *) re_compile_pattern ((char *)raw_pattern,
176 raw_pattern_size, &cp->buf);
b819a390 177 re_set_syntax (old);
9ac0d9e0 178 UNBLOCK_INPUT;
ca1d1d23 179 if (val)
487282dc 180 Fsignal (Qinvalid_regexp, Fcons (build_string (val), Qnil));
1113d9db 181
487282dc 182 cp->regexp = Fcopy_sequence (pattern);
487282dc
KH
183}
184
6efc7887
RS
185/* Shrink each compiled regexp buffer in the cache
186 to the size actually used right now.
187 This is called from garbage collection. */
188
189void
190shrink_regexp_cache ()
191{
192 struct regexp_cache *cp, **cpp;
193
194 for (cp = searchbuf_head; cp != 0; cp = cp->next)
195 {
196 cp->buf.allocated = cp->buf.used;
197 cp->buf.buffer
198 = (unsigned char *) realloc (cp->buf.buffer, cp->buf.used);
199 }
200}
201
487282dc 202/* Compile a regexp if necessary, but first check to see if there's one in
b819a390
RS
203 the cache.
204 PATTERN is the pattern to compile.
facdc750 205 TRANSLATE is a translation table for ignoring case, or nil for none.
b819a390
RS
206 REGP is the structure that says where to store the "register"
207 values that will result from matching this pattern.
208 If it is 0, we should compile the pattern not to record any
209 subexpression bounds.
210 POSIX is nonzero if we want full backtracking (POSIX style)
211 for this pattern. 0 means backtrack only enough to get a valid match. */
487282dc
KH
212
213struct re_pattern_buffer *
0c8533c6 214compile_pattern (pattern, regp, translate, posix, multibyte)
487282dc
KH
215 Lisp_Object pattern;
216 struct re_registers *regp;
facdc750 217 Lisp_Object translate;
0c8533c6 218 int posix, multibyte;
487282dc
KH
219{
220 struct regexp_cache *cp, **cpp;
221
222 for (cpp = &searchbuf_head; ; cpp = &cp->next)
223 {
224 cp = *cpp;
1d288aef
RS
225 if (XSTRING (cp->regexp)->size == XSTRING (pattern)->size
226 && !NILP (Fstring_equal (cp->regexp, pattern))
59fab369 227 && EQ (cp->buf.translate, (! NILP (translate) ? translate : make_number (0)))
5679531d
KH
228 && cp->posix == posix
229 && cp->buf.multibyte == multibyte)
487282dc
KH
230 break;
231
232 /* If we're at the end of the cache, compile into the last cell. */
233 if (cp->next == 0)
234 {
5679531d 235 compile_pattern_1 (cp, pattern, translate, regp, posix, multibyte);
487282dc
KH
236 break;
237 }
238 }
239
240 /* When we get here, cp (aka *cpp) contains the compiled pattern,
241 either because we found it in the cache or because we just compiled it.
242 Move it to the front of the queue to mark it as most recently used. */
243 *cpp = cp->next;
244 cp->next = searchbuf_head;
245 searchbuf_head = cp;
1113d9db 246
6639708c
RS
247 /* Advise the searching functions about the space we have allocated
248 for register data. */
249 if (regp)
250 re_set_registers (&cp->buf, regp, regp->num_regs, regp->start, regp->end);
251
487282dc 252 return &cp->buf;
ca1d1d23
JB
253}
254
255/* Error condition used for failing searches */
256Lisp_Object Qsearch_failed;
257
258Lisp_Object
259signal_failure (arg)
260 Lisp_Object arg;
261{
262 Fsignal (Qsearch_failed, Fcons (arg, Qnil));
263 return Qnil;
264}
265\f
b819a390
RS
266static Lisp_Object
267looking_at_1 (string, posix)
ca1d1d23 268 Lisp_Object string;
b819a390 269 int posix;
ca1d1d23
JB
270{
271 Lisp_Object val;
272 unsigned char *p1, *p2;
273 int s1, s2;
274 register int i;
487282dc 275 struct re_pattern_buffer *bufp;
ca1d1d23 276
7074fde6
FP
277 if (running_asynch_code)
278 save_search_regs ();
279
ca1d1d23 280 CHECK_STRING (string, 0);
487282dc
KH
281 bufp = compile_pattern (string, &search_regs,
282 (!NILP (current_buffer->case_fold_search)
facdc750 283 ? DOWNCASE_TABLE : Qnil),
0c8533c6
RS
284 posix,
285 !NILP (current_buffer->enable_multibyte_characters));
ca1d1d23
JB
286
287 immediate_quit = 1;
288 QUIT; /* Do a pending quit right away, to avoid paradoxical behavior */
289
290 /* Get pointers and sizes of the two strings
291 that make up the visible portion of the buffer. */
292
293 p1 = BEGV_ADDR;
fa8ed3e0 294 s1 = GPT_BYTE - BEGV_BYTE;
ca1d1d23 295 p2 = GAP_END_ADDR;
fa8ed3e0 296 s2 = ZV_BYTE - GPT_BYTE;
ca1d1d23
JB
297 if (s1 < 0)
298 {
299 p2 = p1;
fa8ed3e0 300 s2 = ZV_BYTE - BEGV_BYTE;
ca1d1d23
JB
301 s1 = 0;
302 }
303 if (s2 < 0)
304 {
fa8ed3e0 305 s1 = ZV_BYTE - BEGV_BYTE;
ca1d1d23
JB
306 s2 = 0;
307 }
8bb43c28
RS
308
309 re_match_object = Qnil;
ca1d1d23 310
487282dc 311 i = re_match_2 (bufp, (char *) p1, s1, (char *) p2, s2,
fa8ed3e0
RS
312 PT_BYTE - BEGV_BYTE, &search_regs,
313 ZV_BYTE - BEGV_BYTE);
ca1d1d23
JB
314 if (i == -2)
315 matcher_overflow ();
316
317 val = (0 <= i ? Qt : Qnil);
fa8ed3e0
RS
318 if (i >= 0)
319 for (i = 0; i < search_regs.num_regs; i++)
320 if (search_regs.start[i] >= 0)
321 {
322 search_regs.start[i]
323 = BYTE_TO_CHAR (search_regs.start[i] + BEGV_BYTE);
324 search_regs.end[i]
325 = BYTE_TO_CHAR (search_regs.end[i] + BEGV_BYTE);
326 }
a3668d92 327 XSETBUFFER (last_thing_searched, current_buffer);
ca1d1d23
JB
328 immediate_quit = 0;
329 return val;
330}
331
b819a390 332DEFUN ("looking-at", Flooking_at, Slooking_at, 1, 1, 0,
94f94972 333 "Return t if text after point matches regular expression REGEXP.\n\
b819a390
RS
334This function modifies the match data that `match-beginning',\n\
335`match-end' and `match-data' access; save and restore the match\n\
336data if you want to preserve them.")
94f94972
RS
337 (regexp)
338 Lisp_Object regexp;
b819a390 339{
94f94972 340 return looking_at_1 (regexp, 0);
b819a390
RS
341}
342
343DEFUN ("posix-looking-at", Fposix_looking_at, Sposix_looking_at, 1, 1, 0,
94f94972 344 "Return t if text after point matches regular expression REGEXP.\n\
b819a390
RS
345Find the longest match, in accord with Posix regular expression rules.\n\
346This function modifies the match data that `match-beginning',\n\
347`match-end' and `match-data' access; save and restore the match\n\
348data if you want to preserve them.")
94f94972
RS
349 (regexp)
350 Lisp_Object regexp;
b819a390 351{
94f94972 352 return looking_at_1 (regexp, 1);
b819a390
RS
353}
354\f
355static Lisp_Object
356string_match_1 (regexp, string, start, posix)
ca1d1d23 357 Lisp_Object regexp, string, start;
b819a390 358 int posix;
ca1d1d23
JB
359{
360 int val;
487282dc 361 struct re_pattern_buffer *bufp;
0c8533c6
RS
362 int pos, pos_byte;
363 int i;
ca1d1d23 364
7074fde6
FP
365 if (running_asynch_code)
366 save_search_regs ();
367
ca1d1d23
JB
368 CHECK_STRING (regexp, 0);
369 CHECK_STRING (string, 1);
370
371 if (NILP (start))
0c8533c6 372 pos = 0, pos_byte = 0;
ca1d1d23
JB
373 else
374 {
375 int len = XSTRING (string)->size;
376
377 CHECK_NUMBER (start, 2);
0c8533c6
RS
378 pos = XINT (start);
379 if (pos < 0 && -pos <= len)
380 pos = len + pos;
381 else if (0 > pos || pos > len)
ca1d1d23 382 args_out_of_range (string, start);
0c8533c6 383 pos_byte = string_char_to_byte (string, pos);
ca1d1d23
JB
384 }
385
487282dc
KH
386 bufp = compile_pattern (regexp, &search_regs,
387 (!NILP (current_buffer->case_fold_search)
facdc750 388 ? DOWNCASE_TABLE : Qnil),
0c8533c6
RS
389 posix,
390 STRING_MULTIBYTE (string));
ca1d1d23 391 immediate_quit = 1;
8bb43c28
RS
392 re_match_object = string;
393
487282dc 394 val = re_search (bufp, (char *) XSTRING (string)->data,
fc932ac6
RS
395 STRING_BYTES (XSTRING (string)), pos_byte,
396 STRING_BYTES (XSTRING (string)) - pos_byte,
ca1d1d23
JB
397 &search_regs);
398 immediate_quit = 0;
daa37602 399 last_thing_searched = Qt;
ca1d1d23
JB
400 if (val == -2)
401 matcher_overflow ();
402 if (val < 0) return Qnil;
0c8533c6
RS
403
404 for (i = 0; i < search_regs.num_regs; i++)
405 if (search_regs.start[i] >= 0)
406 {
407 search_regs.start[i]
408 = string_byte_to_char (string, search_regs.start[i]);
409 search_regs.end[i]
410 = string_byte_to_char (string, search_regs.end[i]);
411 }
412
413 return make_number (string_byte_to_char (string, val));
ca1d1d23 414}
e59a8453 415
b819a390
RS
416DEFUN ("string-match", Fstring_match, Sstring_match, 2, 3, 0,
417 "Return index of start of first match for REGEXP in STRING, or nil.\n\
418If third arg START is non-nil, start search at that index in STRING.\n\
419For index of first char beyond the match, do (match-end 0).\n\
420`match-end' and `match-beginning' also give indices of substrings\n\
421matched by parenthesis constructs in the pattern.")
422 (regexp, string, start)
423 Lisp_Object regexp, string, start;
424{
425 return string_match_1 (regexp, string, start, 0);
426}
427
428DEFUN ("posix-string-match", Fposix_string_match, Sposix_string_match, 2, 3, 0,
429 "Return index of start of first match for REGEXP in STRING, or nil.\n\
430Find the longest match, in accord with Posix regular expression rules.\n\
431If third arg START is non-nil, start search at that index in STRING.\n\
432For index of first char beyond the match, do (match-end 0).\n\
433`match-end' and `match-beginning' also give indices of substrings\n\
434matched by parenthesis constructs in the pattern.")
435 (regexp, string, start)
436 Lisp_Object regexp, string, start;
437{
438 return string_match_1 (regexp, string, start, 1);
439}
440
e59a8453
RS
441/* Match REGEXP against STRING, searching all of STRING,
442 and return the index of the match, or negative on failure.
443 This does not clobber the match data. */
444
445int
446fast_string_match (regexp, string)
447 Lisp_Object regexp, string;
448{
449 int val;
487282dc 450 struct re_pattern_buffer *bufp;
e59a8453 451
facdc750
RS
452 bufp = compile_pattern (regexp, 0, Qnil,
453 0, STRING_MULTIBYTE (string));
e59a8453 454 immediate_quit = 1;
8bb43c28
RS
455 re_match_object = string;
456
487282dc 457 val = re_search (bufp, (char *) XSTRING (string)->data,
fc932ac6
RS
458 STRING_BYTES (XSTRING (string)), 0,
459 STRING_BYTES (XSTRING (string)), 0);
e59a8453
RS
460 immediate_quit = 0;
461 return val;
462}
5679531d
KH
463
464/* Match REGEXP against STRING, searching all of STRING ignoring case,
465 and return the index of the match, or negative on failure.
0c8533c6
RS
466 This does not clobber the match data.
467 We assume that STRING contains single-byte characters. */
5679531d
KH
468
469extern Lisp_Object Vascii_downcase_table;
470
471int
b4577c63 472fast_c_string_match_ignore_case (regexp, string)
5679531d
KH
473 Lisp_Object regexp;
474 char *string;
475{
476 int val;
477 struct re_pattern_buffer *bufp;
478 int len = strlen (string);
479
0c8533c6 480 regexp = string_make_unibyte (regexp);
b4577c63 481 re_match_object = Qt;
5679531d 482 bufp = compile_pattern (regexp, 0,
facdc750 483 Vascii_downcase_table, 0,
f8bd51c4 484 0);
5679531d
KH
485 immediate_quit = 1;
486 val = re_search (bufp, string, len, 0, len, 0);
487 immediate_quit = 0;
488 return val;
489}
ca1d1d23 490\f
9169c321
JB
491/* The newline cache: remembering which sections of text have no newlines. */
492
493/* If the user has requested newline caching, make sure it's on.
494 Otherwise, make sure it's off.
495 This is our cheezy way of associating an action with the change of
496 state of a buffer-local variable. */
497static void
498newline_cache_on_off (buf)
499 struct buffer *buf;
500{
501 if (NILP (buf->cache_long_line_scans))
502 {
503 /* It should be off. */
504 if (buf->newline_cache)
505 {
506 free_region_cache (buf->newline_cache);
507 buf->newline_cache = 0;
508 }
509 }
510 else
511 {
512 /* It should be on. */
513 if (buf->newline_cache == 0)
514 buf->newline_cache = new_region_cache ();
515 }
516}
517
518\f
519/* Search for COUNT instances of the character TARGET between START and END.
520
521 If COUNT is positive, search forwards; END must be >= START.
522 If COUNT is negative, search backwards for the -COUNTth instance;
523 END must be <= START.
524 If COUNT is zero, do anything you please; run rogue, for all I care.
525
526 If END is zero, use BEGV or ZV instead, as appropriate for the
527 direction indicated by COUNT.
ffd56f97
JB
528
529 If we find COUNT instances, set *SHORTAGE to zero, and return the
5bfe95c9
RS
530 position after the COUNTth match. Note that for reverse motion
531 this is not the same as the usual convention for Emacs motion commands.
ffd56f97 532
9169c321
JB
533 If we don't find COUNT instances before reaching END, set *SHORTAGE
534 to the number of TARGETs left unfound, and return END.
ffd56f97 535
087a5f81
RS
536 If ALLOW_QUIT is non-zero, set immediate_quit. That's good to do
537 except when inside redisplay. */
538
dfcf069d 539int
9169c321
JB
540scan_buffer (target, start, end, count, shortage, allow_quit)
541 register int target;
542 int start, end;
543 int count;
544 int *shortage;
087a5f81 545 int allow_quit;
ca1d1d23 546{
9169c321
JB
547 struct region_cache *newline_cache;
548 int direction;
ffd56f97 549
9169c321
JB
550 if (count > 0)
551 {
552 direction = 1;
553 if (! end) end = ZV;
554 }
555 else
556 {
557 direction = -1;
558 if (! end) end = BEGV;
559 }
ffd56f97 560
9169c321
JB
561 newline_cache_on_off (current_buffer);
562 newline_cache = current_buffer->newline_cache;
ca1d1d23
JB
563
564 if (shortage != 0)
565 *shortage = 0;
566
087a5f81 567 immediate_quit = allow_quit;
ca1d1d23 568
ffd56f97 569 if (count > 0)
9169c321 570 while (start != end)
ca1d1d23 571 {
9169c321
JB
572 /* Our innermost scanning loop is very simple; it doesn't know
573 about gaps, buffer ends, or the newline cache. ceiling is
574 the position of the last character before the next such
575 obstacle --- the last character the dumb search loop should
576 examine. */
fa8ed3e0
RS
577 int ceiling_byte = CHAR_TO_BYTE (end) - 1;
578 int start_byte = CHAR_TO_BYTE (start);
67ce527d 579 int tem;
9169c321
JB
580
581 /* If we're looking for a newline, consult the newline cache
582 to see where we can avoid some scanning. */
583 if (target == '\n' && newline_cache)
584 {
585 int next_change;
586 immediate_quit = 0;
587 while (region_cache_forward
fa8ed3e0
RS
588 (current_buffer, newline_cache, start_byte, &next_change))
589 start_byte = next_change;
cbe0db0d 590 immediate_quit = allow_quit;
9169c321 591
fa8ed3e0
RS
592 /* START should never be after END. */
593 if (start_byte > ceiling_byte)
594 start_byte = ceiling_byte;
9169c321
JB
595
596 /* Now the text after start is an unknown region, and
597 next_change is the position of the next known region. */
fa8ed3e0 598 ceiling_byte = min (next_change - 1, ceiling_byte);
9169c321
JB
599 }
600
601 /* The dumb loop can only scan text stored in contiguous
602 bytes. BUFFER_CEILING_OF returns the last character
603 position that is contiguous, so the ceiling is the
604 position after that. */
67ce527d
KH
605 tem = BUFFER_CEILING_OF (start_byte);
606 ceiling_byte = min (tem, ceiling_byte);
9169c321
JB
607
608 {
609 /* The termination address of the dumb loop. */
fa8ed3e0
RS
610 register unsigned char *ceiling_addr
611 = BYTE_POS_ADDR (ceiling_byte) + 1;
612 register unsigned char *cursor
613 = BYTE_POS_ADDR (start_byte);
9169c321
JB
614 unsigned char *base = cursor;
615
616 while (cursor < ceiling_addr)
617 {
618 unsigned char *scan_start = cursor;
619
620 /* The dumb loop. */
621 while (*cursor != target && ++cursor < ceiling_addr)
622 ;
623
624 /* If we're looking for newlines, cache the fact that
625 the region from start to cursor is free of them. */
626 if (target == '\n' && newline_cache)
627 know_region_cache (current_buffer, newline_cache,
fa8ed3e0
RS
628 start_byte + scan_start - base,
629 start_byte + cursor - base);
9169c321
JB
630
631 /* Did we find the target character? */
632 if (cursor < ceiling_addr)
633 {
634 if (--count == 0)
635 {
636 immediate_quit = 0;
fa8ed3e0 637 return BYTE_TO_CHAR (start_byte + cursor - base + 1);
9169c321
JB
638 }
639 cursor++;
640 }
641 }
642
fa8ed3e0 643 start = BYTE_TO_CHAR (start_byte + cursor - base);
9169c321 644 }
ca1d1d23
JB
645 }
646 else
9169c321
JB
647 while (start > end)
648 {
649 /* The last character to check before the next obstacle. */
fa8ed3e0
RS
650 int ceiling_byte = CHAR_TO_BYTE (end);
651 int start_byte = CHAR_TO_BYTE (start);
67ce527d 652 int tem;
9169c321
JB
653
654 /* Consult the newline cache, if appropriate. */
655 if (target == '\n' && newline_cache)
656 {
657 int next_change;
658 immediate_quit = 0;
659 while (region_cache_backward
fa8ed3e0
RS
660 (current_buffer, newline_cache, start_byte, &next_change))
661 start_byte = next_change;
cbe0db0d 662 immediate_quit = allow_quit;
9169c321
JB
663
664 /* Start should never be at or before end. */
fa8ed3e0
RS
665 if (start_byte <= ceiling_byte)
666 start_byte = ceiling_byte + 1;
9169c321
JB
667
668 /* Now the text before start is an unknown region, and
669 next_change is the position of the next known region. */
fa8ed3e0 670 ceiling_byte = max (next_change, ceiling_byte);
9169c321
JB
671 }
672
673 /* Stop scanning before the gap. */
67ce527d
KH
674 tem = BUFFER_FLOOR_OF (start_byte - 1);
675 ceiling_byte = max (tem, ceiling_byte);
9169c321
JB
676
677 {
678 /* The termination address of the dumb loop. */
fa8ed3e0
RS
679 register unsigned char *ceiling_addr = BYTE_POS_ADDR (ceiling_byte);
680 register unsigned char *cursor = BYTE_POS_ADDR (start_byte - 1);
9169c321
JB
681 unsigned char *base = cursor;
682
683 while (cursor >= ceiling_addr)
684 {
685 unsigned char *scan_start = cursor;
686
687 while (*cursor != target && --cursor >= ceiling_addr)
688 ;
689
690 /* If we're looking for newlines, cache the fact that
691 the region from after the cursor to start is free of them. */
692 if (target == '\n' && newline_cache)
693 know_region_cache (current_buffer, newline_cache,
fa8ed3e0
RS
694 start_byte + cursor - base,
695 start_byte + scan_start - base);
9169c321
JB
696
697 /* Did we find the target character? */
698 if (cursor >= ceiling_addr)
699 {
700 if (++count >= 0)
701 {
702 immediate_quit = 0;
fa8ed3e0 703 return BYTE_TO_CHAR (start_byte + cursor - base);
9169c321
JB
704 }
705 cursor--;
706 }
707 }
708
fa8ed3e0 709 start = BYTE_TO_CHAR (start_byte + cursor - base);
9169c321
JB
710 }
711 }
712
ca1d1d23
JB
713 immediate_quit = 0;
714 if (shortage != 0)
ffd56f97 715 *shortage = count * direction;
9169c321 716 return start;
ca1d1d23 717}
fa8ed3e0
RS
718\f
719/* Search for COUNT instances of a line boundary, which means either a
720 newline or (if selective display enabled) a carriage return.
721 Start at START. If COUNT is negative, search backwards.
722
723 We report the resulting position by calling TEMP_SET_PT_BOTH.
724
725 If we find COUNT instances. we position after (always after,
726 even if scanning backwards) the COUNTth match, and return 0.
727
728 If we don't find COUNT instances before reaching the end of the
729 buffer (or the beginning, if scanning backwards), we return
730 the number of line boundaries left unfound, and position at
731 the limit we bumped up against.
732
733 If ALLOW_QUIT is non-zero, set immediate_quit. That's good to do
d5d57b92 734 except in special cases. */
ca1d1d23 735
63fa018d 736int
fa8ed3e0
RS
737scan_newline (start, start_byte, limit, limit_byte, count, allow_quit)
738 int start, start_byte;
739 int limit, limit_byte;
740 register int count;
741 int allow_quit;
63fa018d 742{
fa8ed3e0
RS
743 int direction = ((count > 0) ? 1 : -1);
744
745 register unsigned char *cursor;
746 unsigned char *base;
747
748 register int ceiling;
749 register unsigned char *ceiling_addr;
750
d5d57b92
RS
751 int old_immediate_quit = immediate_quit;
752
fa8ed3e0
RS
753 /* If we are not in selective display mode,
754 check only for newlines. */
755 int selective_display = (!NILP (current_buffer->selective_display)
756 && !INTEGERP (current_buffer->selective_display));
757
758 /* The code that follows is like scan_buffer
759 but checks for either newline or carriage return. */
760
d5d57b92
RS
761 if (allow_quit)
762 immediate_quit++;
fa8ed3e0
RS
763
764 start_byte = CHAR_TO_BYTE (start);
765
766 if (count > 0)
767 {
768 while (start_byte < limit_byte)
769 {
770 ceiling = BUFFER_CEILING_OF (start_byte);
771 ceiling = min (limit_byte - 1, ceiling);
772 ceiling_addr = BYTE_POS_ADDR (ceiling) + 1;
773 base = (cursor = BYTE_POS_ADDR (start_byte));
774 while (1)
775 {
776 while (*cursor != '\n' && ++cursor != ceiling_addr)
777 ;
778
779 if (cursor != ceiling_addr)
780 {
781 if (--count == 0)
782 {
d5d57b92 783 immediate_quit = old_immediate_quit;
fa8ed3e0
RS
784 start_byte = start_byte + cursor - base + 1;
785 start = BYTE_TO_CHAR (start_byte);
786 TEMP_SET_PT_BOTH (start, start_byte);
787 return 0;
788 }
789 else
790 if (++cursor == ceiling_addr)
791 break;
792 }
793 else
794 break;
795 }
796 start_byte += cursor - base;
797 }
798 }
799 else
800 {
fa8ed3e0
RS
801 while (start_byte > limit_byte)
802 {
803 ceiling = BUFFER_FLOOR_OF (start_byte - 1);
804 ceiling = max (limit_byte, ceiling);
805 ceiling_addr = BYTE_POS_ADDR (ceiling) - 1;
806 base = (cursor = BYTE_POS_ADDR (start_byte - 1) + 1);
807 while (1)
808 {
809 while (--cursor != ceiling_addr && *cursor != '\n')
810 ;
811
812 if (cursor != ceiling_addr)
813 {
814 if (++count == 0)
815 {
d5d57b92 816 immediate_quit = old_immediate_quit;
fa8ed3e0
RS
817 /* Return the position AFTER the match we found. */
818 start_byte = start_byte + cursor - base + 1;
819 start = BYTE_TO_CHAR (start_byte);
820 TEMP_SET_PT_BOTH (start, start_byte);
821 return 0;
822 }
823 }
824 else
825 break;
826 }
827 /* Here we add 1 to compensate for the last decrement
828 of CURSOR, which took it past the valid range. */
829 start_byte += cursor - base + 1;
830 }
831 }
832
833 TEMP_SET_PT_BOTH (limit, limit_byte);
d5d57b92 834 immediate_quit = old_immediate_quit;
fa8ed3e0
RS
835
836 return count * direction;
63fa018d
RS
837}
838
ca1d1d23 839int
fa8ed3e0 840find_next_newline_no_quit (from, cnt)
ca1d1d23
JB
841 register int from, cnt;
842{
fa8ed3e0 843 return scan_buffer ('\n', from, 0, cnt, (int *) 0, 0);
9169c321
JB
844}
845
9169c321
JB
846/* Like find_next_newline, but returns position before the newline,
847 not after, and only search up to TO. This isn't just
848 find_next_newline (...)-1, because you might hit TO. */
fa8ed3e0 849
9169c321
JB
850int
851find_before_next_newline (from, to, cnt)
cbe0db0d 852 int from, to, cnt;
9169c321
JB
853{
854 int shortage;
855 int pos = scan_buffer ('\n', from, to, cnt, &shortage, 1);
856
857 if (shortage == 0)
858 pos--;
859
860 return pos;
ca1d1d23
JB
861}
862\f
ca1d1d23
JB
863/* Subroutines of Lisp buffer search functions. */
864
865static Lisp_Object
b819a390 866search_command (string, bound, noerror, count, direction, RE, posix)
ca1d1d23
JB
867 Lisp_Object string, bound, noerror, count;
868 int direction;
869 int RE;
b819a390 870 int posix;
ca1d1d23
JB
871{
872 register int np;
9f43ad85 873 int lim, lim_byte;
ca1d1d23
JB
874 int n = direction;
875
876 if (!NILP (count))
877 {
878 CHECK_NUMBER (count, 3);
879 n *= XINT (count);
880 }
881
882 CHECK_STRING (string, 0);
883 if (NILP (bound))
9f43ad85
RS
884 {
885 if (n > 0)
886 lim = ZV, lim_byte = ZV_BYTE;
887 else
888 lim = BEGV, lim_byte = BEGV_BYTE;
889 }
ca1d1d23
JB
890 else
891 {
892 CHECK_NUMBER_COERCE_MARKER (bound, 1);
893 lim = XINT (bound);
6ec8bbd2 894 if (n > 0 ? lim < PT : lim > PT)
ca1d1d23
JB
895 error ("Invalid search bound (wrong side of point)");
896 if (lim > ZV)
9f43ad85 897 lim = ZV, lim_byte = ZV_BYTE;
588d2fd5 898 else if (lim < BEGV)
9f43ad85 899 lim = BEGV, lim_byte = BEGV_BYTE;
588d2fd5
KH
900 else
901 lim_byte = CHAR_TO_BYTE (lim);
ca1d1d23
JB
902 }
903
9f43ad85 904 np = search_buffer (string, PT, PT_BYTE, lim, lim_byte, n, RE,
ca1d1d23 905 (!NILP (current_buffer->case_fold_search)
facdc750 906 ? current_buffer->case_canon_table
3135e9fd 907 : Qnil),
ca1d1d23 908 (!NILP (current_buffer->case_fold_search)
facdc750 909 ? current_buffer->case_eqv_table
3135e9fd 910 : Qnil),
b819a390 911 posix);
ca1d1d23
JB
912 if (np <= 0)
913 {
914 if (NILP (noerror))
915 return signal_failure (string);
916 if (!EQ (noerror, Qt))
917 {
918 if (lim < BEGV || lim > ZV)
919 abort ();
9f43ad85 920 SET_PT_BOTH (lim, lim_byte);
a5f217b8
RS
921 return Qnil;
922#if 0 /* This would be clean, but maybe programs depend on
923 a value of nil here. */
481399bf 924 np = lim;
a5f217b8 925#endif
ca1d1d23 926 }
481399bf
RS
927 else
928 return Qnil;
ca1d1d23
JB
929 }
930
931 if (np < BEGV || np > ZV)
932 abort ();
933
934 SET_PT (np);
935
936 return make_number (np);
937}
938\f
fa8ed3e0
RS
939/* Return 1 if REGEXP it matches just one constant string. */
940
b6d6a51c
KH
941static int
942trivial_regexp_p (regexp)
943 Lisp_Object regexp;
944{
fc932ac6 945 int len = STRING_BYTES (XSTRING (regexp));
b6d6a51c
KH
946 unsigned char *s = XSTRING (regexp)->data;
947 unsigned char c;
948 while (--len >= 0)
949 {
950 switch (*s++)
951 {
952 case '.': case '*': case '+': case '?': case '[': case '^': case '$':
953 return 0;
954 case '\\':
955 if (--len < 0)
956 return 0;
957 switch (*s++)
958 {
959 case '|': case '(': case ')': case '`': case '\'': case 'b':
960 case 'B': case '<': case '>': case 'w': case 'W': case 's':
866f60fd 961 case 'S': case '=':
5679531d 962 case 'c': case 'C': /* for categoryspec and notcategoryspec */
866f60fd 963 case '1': case '2': case '3': case '4': case '5':
b6d6a51c
KH
964 case '6': case '7': case '8': case '9':
965 return 0;
966 }
967 }
968 }
969 return 1;
970}
971
ca325161 972/* Search for the n'th occurrence of STRING in the current buffer,
ca1d1d23 973 starting at position POS and stopping at position LIM,
b819a390 974 treating STRING as a literal string if RE is false or as
ca1d1d23
JB
975 a regular expression if RE is true.
976
977 If N is positive, searching is forward and LIM must be greater than POS.
978 If N is negative, searching is backward and LIM must be less than POS.
979
facdc750 980 Returns -x if x occurrences remain to be found (x > 0),
ca1d1d23 981 or else the position at the beginning of the Nth occurrence
b819a390
RS
982 (if searching backward) or the end (if searching forward).
983
984 POSIX is nonzero if we want full backtracking (POSIX style)
985 for this pattern. 0 means backtrack only enough to get a valid match. */
ca1d1d23 986
aff2ce94
RS
987#define TRANSLATE(out, trt, d) \
988do \
989 { \
990 if (! NILP (trt)) \
991 { \
992 Lisp_Object temp; \
993 temp = Faref (trt, make_number (d)); \
994 if (INTEGERP (temp)) \
995 out = XINT (temp); \
996 else \
997 out = d; \
998 } \
999 else \
1000 out = d; \
1001 } \
1002while (0)
facdc750 1003
b819a390 1004static int
9f43ad85
RS
1005search_buffer (string, pos, pos_byte, lim, lim_byte, n,
1006 RE, trt, inverse_trt, posix)
ca1d1d23
JB
1007 Lisp_Object string;
1008 int pos;
9f43ad85 1009 int pos_byte;
ca1d1d23 1010 int lim;
9f43ad85 1011 int lim_byte;
ca1d1d23
JB
1012 int n;
1013 int RE;
facdc750
RS
1014 Lisp_Object trt;
1015 Lisp_Object inverse_trt;
b819a390 1016 int posix;
ca1d1d23
JB
1017{
1018 int len = XSTRING (string)->size;
fc932ac6 1019 int len_byte = STRING_BYTES (XSTRING (string));
facdc750 1020 register int i;
ca1d1d23 1021
7074fde6
FP
1022 if (running_asynch_code)
1023 save_search_regs ();
1024
a7e4cdde 1025 /* Searching 0 times means don't move. */
ca1d1d23 1026 /* Null string is found at starting position. */
a7e4cdde 1027 if (len == 0 || n == 0)
ca325161
RS
1028 {
1029 set_search_regs (pos, 0);
1030 return pos;
1031 }
3f57a499 1032
b6d6a51c 1033 if (RE && !trivial_regexp_p (string))
ca1d1d23 1034 {
facdc750
RS
1035 unsigned char *p1, *p2;
1036 int s1, s2;
487282dc
KH
1037 struct re_pattern_buffer *bufp;
1038
0c8533c6
RS
1039 bufp = compile_pattern (string, &search_regs, trt, posix,
1040 !NILP (current_buffer->enable_multibyte_characters));
ca1d1d23 1041
ca1d1d23
JB
1042 immediate_quit = 1; /* Quit immediately if user types ^G,
1043 because letting this function finish
1044 can take too long. */
1045 QUIT; /* Do a pending quit right away,
1046 to avoid paradoxical behavior */
1047 /* Get pointers and sizes of the two strings
1048 that make up the visible portion of the buffer. */
1049
1050 p1 = BEGV_ADDR;
fa8ed3e0 1051 s1 = GPT_BYTE - BEGV_BYTE;
ca1d1d23 1052 p2 = GAP_END_ADDR;
fa8ed3e0 1053 s2 = ZV_BYTE - GPT_BYTE;
ca1d1d23
JB
1054 if (s1 < 0)
1055 {
1056 p2 = p1;
fa8ed3e0 1057 s2 = ZV_BYTE - BEGV_BYTE;
ca1d1d23
JB
1058 s1 = 0;
1059 }
1060 if (s2 < 0)
1061 {
fa8ed3e0 1062 s1 = ZV_BYTE - BEGV_BYTE;
ca1d1d23
JB
1063 s2 = 0;
1064 }
8bb43c28
RS
1065 re_match_object = Qnil;
1066
ca1d1d23
JB
1067 while (n < 0)
1068 {
42db823b 1069 int val;
487282dc 1070 val = re_search_2 (bufp, (char *) p1, s1, (char *) p2, s2,
4996330b
KH
1071 pos_byte - BEGV_BYTE, lim_byte - pos_byte,
1072 &search_regs,
42db823b 1073 /* Don't allow match past current point */
4996330b 1074 pos_byte - BEGV_BYTE);
ca1d1d23 1075 if (val == -2)
b6d6a51c
KH
1076 {
1077 matcher_overflow ();
1078 }
ca1d1d23
JB
1079 if (val >= 0)
1080 {
26aff150 1081 pos_byte = search_regs.start[0] + BEGV_BYTE;
4746118a 1082 for (i = 0; i < search_regs.num_regs; i++)
ca1d1d23
JB
1083 if (search_regs.start[i] >= 0)
1084 {
fa8ed3e0
RS
1085 search_regs.start[i]
1086 = BYTE_TO_CHAR (search_regs.start[i] + BEGV_BYTE);
1087 search_regs.end[i]
1088 = BYTE_TO_CHAR (search_regs.end[i] + BEGV_BYTE);
ca1d1d23 1089 }
a3668d92 1090 XSETBUFFER (last_thing_searched, current_buffer);
ca1d1d23
JB
1091 /* Set pos to the new position. */
1092 pos = search_regs.start[0];
1093 }
1094 else
1095 {
1096 immediate_quit = 0;
1097 return (n);
1098 }
1099 n++;
1100 }
1101 while (n > 0)
1102 {
42db823b 1103 int val;
487282dc 1104 val = re_search_2 (bufp, (char *) p1, s1, (char *) p2, s2,
4996330b
KH
1105 pos_byte - BEGV_BYTE, lim_byte - pos_byte,
1106 &search_regs,
1107 lim_byte - BEGV_BYTE);
ca1d1d23 1108 if (val == -2)
b6d6a51c
KH
1109 {
1110 matcher_overflow ();
1111 }
ca1d1d23
JB
1112 if (val >= 0)
1113 {
26aff150 1114 pos_byte = search_regs.end[0] + BEGV_BYTE;
4746118a 1115 for (i = 0; i < search_regs.num_regs; i++)
ca1d1d23
JB
1116 if (search_regs.start[i] >= 0)
1117 {
fa8ed3e0
RS
1118 search_regs.start[i]
1119 = BYTE_TO_CHAR (search_regs.start[i] + BEGV_BYTE);
1120 search_regs.end[i]
1121 = BYTE_TO_CHAR (search_regs.end[i] + BEGV_BYTE);
ca1d1d23 1122 }
a3668d92 1123 XSETBUFFER (last_thing_searched, current_buffer);
ca1d1d23
JB
1124 pos = search_regs.end[0];
1125 }
1126 else
1127 {
1128 immediate_quit = 0;
1129 return (0 - n);
1130 }
1131 n--;
1132 }
1133 immediate_quit = 0;
1134 return (pos);
1135 }
1136 else /* non-RE case */
1137 {
facdc750
RS
1138 unsigned char *raw_pattern, *pat;
1139 int raw_pattern_size;
1140 int raw_pattern_size_byte;
1141 unsigned char *patbuf;
1142 int multibyte = !NILP (current_buffer->enable_multibyte_characters);
1143 unsigned char *base_pat = XSTRING (string)->data;
1144 int charset_base = -1;
1145 int simple = 1;
1146
1147 /* MULTIBYTE says whether the text to be searched is multibyte.
1148 We must convert PATTERN to match that, or we will not really
1149 find things right. */
1150
1151 if (multibyte == STRING_MULTIBYTE (string))
1152 {
7276d3d8 1153 raw_pattern = (unsigned char *) XSTRING (string)->data;
facdc750 1154 raw_pattern_size = XSTRING (string)->size;
fc932ac6 1155 raw_pattern_size_byte = STRING_BYTES (XSTRING (string));
facdc750
RS
1156 }
1157 else if (multibyte)
1158 {
1159 raw_pattern_size = XSTRING (string)->size;
1160 raw_pattern_size_byte
1161 = count_size_as_multibyte (XSTRING (string)->data,
1162 raw_pattern_size);
7276d3d8 1163 raw_pattern = (unsigned char *) alloca (raw_pattern_size_byte + 1);
facdc750
RS
1164 copy_text (XSTRING (string)->data, raw_pattern,
1165 XSTRING (string)->size, 0, 1);
1166 }
1167 else
1168 {
1169 /* Converting multibyte to single-byte.
1170
1171 ??? Perhaps this conversion should be done in a special way
1172 by subtracting nonascii-insert-offset from each non-ASCII char,
1173 so that only the multibyte chars which really correspond to
1174 the chosen single-byte character set can possibly match. */
1175 raw_pattern_size = XSTRING (string)->size;
1176 raw_pattern_size_byte = XSTRING (string)->size;
7276d3d8 1177 raw_pattern = (unsigned char *) alloca (raw_pattern_size + 1);
facdc750 1178 copy_text (XSTRING (string)->data, raw_pattern,
fc932ac6 1179 STRING_BYTES (XSTRING (string)), 1, 0);
facdc750
RS
1180 }
1181
1182 /* Copy and optionally translate the pattern. */
1183 len = raw_pattern_size;
1184 len_byte = raw_pattern_size_byte;
1185 patbuf = (unsigned char *) alloca (len_byte);
1186 pat = patbuf;
1187 base_pat = raw_pattern;
1188 if (multibyte)
1189 {
1190 while (--len >= 0)
1191 {
1192 unsigned char workbuf[4], *str;
aff2ce94 1193 int c, translated, inverse;
facdc750
RS
1194 int in_charlen, charlen;
1195
1196 /* If we got here and the RE flag is set, it's because we're
1197 dealing with a regexp known to be trivial, so the backslash
1198 just quotes the next character. */
1199 if (RE && *base_pat == '\\')
1200 {
1201 len--;
1202 len_byte--;
1203 base_pat++;
1204 }
1205
1206 c = STRING_CHAR_AND_LENGTH (base_pat, len_byte, in_charlen);
1207 /* Translate the character, if requested. */
aff2ce94 1208 TRANSLATE (translated, trt, c);
facdc750
RS
1209 /* If translation changed the byte-length, go back
1210 to the original character. */
1211 charlen = CHAR_STRING (translated, workbuf, str);
1212 if (in_charlen != charlen)
1213 {
1214 translated = c;
1215 charlen = CHAR_STRING (c, workbuf, str);
1216 }
1217
aff2ce94
RS
1218 TRANSLATE (inverse, inverse_trt, c);
1219
facdc750
RS
1220 /* Did this char actually get translated?
1221 Would any other char get translated into it? */
aff2ce94 1222 if (translated != c || inverse != c)
facdc750
RS
1223 {
1224 /* Keep track of which character set row
1225 contains the characters that need translation. */
1226 int charset_base_code = c & ~0xff;
1227 if (charset_base == -1)
1228 charset_base = charset_base_code;
1229 else if (charset_base != charset_base_code)
1230 /* If two different rows appear, needing translation,
1231 then we cannot use boyer_moore search. */
1232 simple = 0;
1233 /* ??? Handa: this must do simple = 0
1234 if c is a composite character. */
aff2ce94 1235 }
facdc750
RS
1236
1237 /* Store this character into the translated pattern. */
1238 bcopy (str, pat, charlen);
1239 pat += charlen;
1240 base_pat += in_charlen;
1241 len_byte -= in_charlen;
1242 }
1243 }
1244 else
1245 {
1246 while (--len >= 0)
1247 {
aff2ce94 1248 int c, translated, inverse;
facdc750
RS
1249
1250 /* If we got here and the RE flag is set, it's because we're
1251 dealing with a regexp known to be trivial, so the backslash
1252 just quotes the next character. */
1253 if (RE && *base_pat == '\\')
1254 {
1255 len--;
1256 base_pat++;
1257 }
1258 c = *base_pat++;
aff2ce94
RS
1259 TRANSLATE (translated, trt, c);
1260 TRANSLATE (inverse, inverse_trt, c);
facdc750
RS
1261
1262 /* Did this char actually get translated?
1263 Would any other char get translated into it? */
aff2ce94 1264 if (translated != c || inverse != c)
facdc750
RS
1265 {
1266 /* Keep track of which character set row
1267 contains the characters that need translation. */
1268 int charset_base_code = c & ~0xff;
1269 if (charset_base == -1)
1270 charset_base = charset_base_code;
1271 else if (charset_base != charset_base_code)
1272 /* If two different rows appear, needing translation,
1273 then we cannot use boyer_moore search. */
1274 simple = 0;
aff2ce94 1275 }
facdc750
RS
1276 *pat++ = translated;
1277 }
1278 }
1279
1280 len_byte = pat - patbuf;
1281 len = raw_pattern_size;
1282 pat = base_pat = patbuf;
1283
1284 if (simple)
1285 return boyer_moore (n, pat, len, len_byte, trt, inverse_trt,
aff2ce94
RS
1286 pos, pos_byte, lim, lim_byte,
1287 charset_base);
facdc750
RS
1288 else
1289 return simple_search (n, pat, len, len_byte, trt,
1290 pos, pos_byte, lim, lim_byte);
1291 }
1292}
1293\f
1294/* Do a simple string search N times for the string PAT,
1295 whose length is LEN/LEN_BYTE,
1296 from buffer position POS/POS_BYTE until LIM/LIM_BYTE.
1297 TRT is the translation table.
f8bd51c4 1298
facdc750
RS
1299 Return the character position where the match is found.
1300 Otherwise, if M matches remained to be found, return -M.
f8bd51c4 1301
facdc750
RS
1302 This kind of search works regardless of what is in PAT and
1303 regardless of what is in TRT. It is used in cases where
1304 boyer_moore cannot work. */
1305
1306static int
1307simple_search (n, pat, len, len_byte, trt, pos, pos_byte, lim, lim_byte)
1308 int n;
1309 unsigned char *pat;
1310 int len, len_byte;
1311 Lisp_Object trt;
1312 int pos, pos_byte;
1313 int lim, lim_byte;
1314{
1315 int multibyte = ! NILP (current_buffer->enable_multibyte_characters);
ab228c24 1316 int forward = n > 0;
facdc750
RS
1317
1318 if (lim > pos && multibyte)
1319 while (n > 0)
1320 {
1321 while (1)
f8bd51c4 1322 {
facdc750
RS
1323 /* Try matching at position POS. */
1324 int this_pos = pos;
1325 int this_pos_byte = pos_byte;
1326 int this_len = len;
1327 int this_len_byte = len_byte;
1328 unsigned char *p = pat;
1329 if (pos + len > lim)
1330 goto stop;
1331
1332 while (this_len > 0)
1333 {
1334 int charlen, buf_charlen;
ab228c24 1335 int pat_ch, buf_ch;
facdc750 1336
ab228c24 1337 pat_ch = STRING_CHAR_AND_LENGTH (p, this_len_byte, charlen);
facdc750
RS
1338 buf_ch = STRING_CHAR_AND_LENGTH (BYTE_POS_ADDR (this_pos_byte),
1339 ZV_BYTE - this_pos_byte,
1340 buf_charlen);
aff2ce94 1341 TRANSLATE (buf_ch, trt, buf_ch);
facdc750
RS
1342
1343 if (buf_ch != pat_ch)
1344 break;
ab228c24
RS
1345
1346 this_len_byte -= charlen;
1347 this_len--;
1348 p += charlen;
1349
1350 this_pos_byte += buf_charlen;
1351 this_pos++;
facdc750
RS
1352 }
1353
1354 if (this_len == 0)
1355 {
1356 pos += len;
1357 pos_byte += len_byte;
1358 break;
1359 }
1360
1361 INC_BOTH (pos, pos_byte);
f8bd51c4 1362 }
facdc750
RS
1363
1364 n--;
1365 }
1366 else if (lim > pos)
1367 while (n > 0)
1368 {
1369 while (1)
f8bd51c4 1370 {
facdc750
RS
1371 /* Try matching at position POS. */
1372 int this_pos = pos;
1373 int this_len = len;
1374 unsigned char *p = pat;
1375
1376 if (pos + len > lim)
1377 goto stop;
1378
1379 while (this_len > 0)
1380 {
1381 int pat_ch = *p++;
1382 int buf_ch = FETCH_BYTE (this_pos);
aff2ce94 1383 TRANSLATE (buf_ch, trt, buf_ch);
facdc750
RS
1384
1385 if (buf_ch != pat_ch)
1386 break;
ab228c24
RS
1387
1388 this_len--;
1389 this_pos++;
facdc750
RS
1390 }
1391
1392 if (this_len == 0)
1393 {
1394 pos += len;
1395 break;
1396 }
1397
1398 pos++;
f8bd51c4 1399 }
facdc750
RS
1400
1401 n--;
1402 }
1403 /* Backwards search. */
1404 else if (lim < pos && multibyte)
1405 while (n < 0)
1406 {
1407 while (1)
f8bd51c4 1408 {
facdc750
RS
1409 /* Try matching at position POS. */
1410 int this_pos = pos - len;
1411 int this_pos_byte = pos_byte - len_byte;
1412 int this_len = len;
1413 int this_len_byte = len_byte;
1414 unsigned char *p = pat;
1415
1416 if (pos - len < lim)
1417 goto stop;
1418
1419 while (this_len > 0)
1420 {
1421 int charlen, buf_charlen;
ab228c24 1422 int pat_ch, buf_ch;
facdc750 1423
ab228c24 1424 pat_ch = STRING_CHAR_AND_LENGTH (p, this_len_byte, charlen);
facdc750
RS
1425 buf_ch = STRING_CHAR_AND_LENGTH (BYTE_POS_ADDR (this_pos_byte),
1426 ZV_BYTE - this_pos_byte,
1427 buf_charlen);
aff2ce94 1428 TRANSLATE (buf_ch, trt, buf_ch);
facdc750
RS
1429
1430 if (buf_ch != pat_ch)
1431 break;
ab228c24
RS
1432
1433 this_len_byte -= charlen;
1434 this_len--;
1435 p += charlen;
1436 this_pos_byte += buf_charlen;
1437 this_pos++;
facdc750
RS
1438 }
1439
1440 if (this_len == 0)
1441 {
1442 pos -= len;
1443 pos_byte -= len_byte;
1444 break;
1445 }
1446
1447 DEC_BOTH (pos, pos_byte);
f8bd51c4
KH
1448 }
1449
facdc750
RS
1450 n++;
1451 }
1452 else if (lim < pos)
1453 while (n < 0)
1454 {
1455 while (1)
b6d6a51c 1456 {
facdc750
RS
1457 /* Try matching at position POS. */
1458 int this_pos = pos - len;
1459 int this_len = len;
1460 unsigned char *p = pat;
1461
1462 if (pos - len < lim)
1463 goto stop;
1464
1465 while (this_len > 0)
1466 {
1467 int pat_ch = *p++;
1468 int buf_ch = FETCH_BYTE (this_pos);
aff2ce94 1469 TRANSLATE (buf_ch, trt, buf_ch);
facdc750
RS
1470
1471 if (buf_ch != pat_ch)
1472 break;
ab228c24
RS
1473 this_len--;
1474 this_pos++;
facdc750
RS
1475 }
1476
1477 if (this_len == 0)
b6d6a51c 1478 {
facdc750
RS
1479 pos -= len;
1480 break;
b6d6a51c 1481 }
facdc750
RS
1482
1483 pos--;
b6d6a51c 1484 }
facdc750
RS
1485
1486 n++;
b6d6a51c 1487 }
facdc750
RS
1488
1489 stop:
1490 if (n == 0)
aff2ce94 1491 {
ab228c24
RS
1492 if (forward)
1493 set_search_regs ((multibyte ? pos_byte : pos) - len_byte, len_byte);
1494 else
1495 set_search_regs (multibyte ? pos_byte : pos, len_byte);
aff2ce94
RS
1496
1497 return pos;
1498 }
facdc750
RS
1499 else if (n > 0)
1500 return -n;
1501 else
1502 return n;
1503}
1504\f
1505/* Do Boyer-Moore search N times for the string PAT,
1506 whose length is LEN/LEN_BYTE,
1507 from buffer position POS/POS_BYTE until LIM/LIM_BYTE.
1508 DIRECTION says which direction we search in.
1509 TRT and INVERSE_TRT are translation tables.
1510
1511 This kind of search works if all the characters in PAT that have
1512 nontrivial translation are the same aside from the last byte. This
1513 makes it possible to translate just the last byte of a character,
1514 and do so after just a simple test of the context.
1515
1516 If that criterion is not satisfied, do not call this function. */
1517
1518static int
1519boyer_moore (n, base_pat, len, len_byte, trt, inverse_trt,
aff2ce94 1520 pos, pos_byte, lim, lim_byte, charset_base)
facdc750
RS
1521 int n;
1522 unsigned char *base_pat;
1523 int len, len_byte;
1524 Lisp_Object trt;
1525 Lisp_Object inverse_trt;
1526 int pos, pos_byte;
1527 int lim, lim_byte;
aff2ce94 1528 int charset_base;
facdc750
RS
1529{
1530 int direction = ((n > 0) ? 1 : -1);
1531 register int dirlen;
1532 int infinity, limit, k, stride_for_teases;
1533 register int *BM_tab;
1534 int *BM_tab_base;
1535 register unsigned char *cursor, *p_limit;
1536 register int i, j;
cb6792d2 1537 unsigned char *pat, *pat_end;
facdc750
RS
1538 int multibyte = ! NILP (current_buffer->enable_multibyte_characters);
1539
1540 unsigned char simple_translate[0400];
1541 int translate_prev_byte;
1542 int translate_anteprev_byte;
1543
1544#ifdef C_ALLOCA
1545 int BM_tab_space[0400];
1546 BM_tab = &BM_tab_space[0];
1547#else
1548 BM_tab = (int *) alloca (0400 * sizeof (int));
1549#endif
1550 /* The general approach is that we are going to maintain that we know */
1551 /* the first (closest to the present position, in whatever direction */
1552 /* we're searching) character that could possibly be the last */
1553 /* (furthest from present position) character of a valid match. We */
1554 /* advance the state of our knowledge by looking at that character */
1555 /* and seeing whether it indeed matches the last character of the */
1556 /* pattern. If it does, we take a closer look. If it does not, we */
1557 /* move our pointer (to putative last characters) as far as is */
1558 /* logically possible. This amount of movement, which I call a */
1559 /* stride, will be the length of the pattern if the actual character */
1560 /* appears nowhere in the pattern, otherwise it will be the distance */
1561 /* from the last occurrence of that character to the end of the */
1562 /* pattern. */
1563 /* As a coding trick, an enormous stride is coded into the table for */
1564 /* characters that match the last character. This allows use of only */
1565 /* a single test, a test for having gone past the end of the */
1566 /* permissible match region, to test for both possible matches (when */
1567 /* the stride goes past the end immediately) and failure to */
1568 /* match (where you get nudged past the end one stride at a time). */
1569
1570 /* Here we make a "mickey mouse" BM table. The stride of the search */
1571 /* is determined only by the last character of the putative match. */
1572 /* If that character does not match, we will stride the proper */
1573 /* distance to propose a match that superimposes it on the last */
1574 /* instance of a character that matches it (per trt), or misses */
1575 /* it entirely if there is none. */
1576
1577 dirlen = len_byte * direction;
1578 infinity = dirlen - (lim_byte + pos_byte + len_byte + len_byte) * direction;
cb6792d2
RS
1579
1580 /* Record position after the end of the pattern. */
1581 pat_end = base_pat + len_byte;
1582 /* BASE_PAT points to a character that we start scanning from.
1583 It is the first character in a forward search,
1584 the last character in a backward search. */
facdc750 1585 if (direction < 0)
cb6792d2
RS
1586 base_pat = pat_end - 1;
1587
facdc750
RS
1588 BM_tab_base = BM_tab;
1589 BM_tab += 0400;
1590 j = dirlen; /* to get it in a register */
1591 /* A character that does not appear in the pattern induces a */
1592 /* stride equal to the pattern length. */
1593 while (BM_tab_base != BM_tab)
1594 {
1595 *--BM_tab = j;
1596 *--BM_tab = j;
1597 *--BM_tab = j;
1598 *--BM_tab = j;
1599 }
1600
1601 /* We use this for translation, instead of TRT itself.
1602 We fill this in to handle the characters that actually
1603 occur in the pattern. Others don't matter anyway! */
1604 bzero (simple_translate, sizeof simple_translate);
1605 for (i = 0; i < 0400; i++)
1606 simple_translate[i] = i;
1607
1608 i = 0;
1609 while (i != infinity)
1610 {
cb6792d2 1611 unsigned char *ptr = base_pat + i;
facdc750
RS
1612 i += direction;
1613 if (i == dirlen)
1614 i = infinity;
1615 if (! NILP (trt))
ca1d1d23 1616 {
facdc750 1617 int ch;
aff2ce94 1618 int untranslated;
facdc750
RS
1619 int this_translated = 1;
1620
1621 if (multibyte
cb6792d2
RS
1622 /* Is *PTR the last byte of a character? */
1623 && (pat_end - ptr == 1 || CHAR_HEAD_P (ptr[1])))
ca1d1d23 1624 {
facdc750
RS
1625 unsigned char *charstart = ptr;
1626 while (! CHAR_HEAD_P (*charstart))
1627 charstart--;
aff2ce94 1628 untranslated = STRING_CHAR (charstart, ptr - charstart + 1);
ab228c24 1629 if (charset_base == (untranslated & ~0xff))
facdc750 1630 {
ab228c24 1631 TRANSLATE (ch, trt, untranslated);
aff2ce94
RS
1632 if (! CHAR_HEAD_P (*ptr))
1633 {
1634 translate_prev_byte = ptr[-1];
1635 if (! CHAR_HEAD_P (translate_prev_byte))
1636 translate_anteprev_byte = ptr[-2];
1637 }
facdc750 1638 }
aff2ce94 1639 else
ab228c24
RS
1640 {
1641 this_translated = 0;
1642 ch = *ptr;
1643 }
ca1d1d23 1644 }
facdc750 1645 else if (!multibyte)
aff2ce94 1646 TRANSLATE (ch, trt, *ptr);
ca1d1d23
JB
1647 else
1648 {
facdc750
RS
1649 ch = *ptr;
1650 this_translated = 0;
ca1d1d23 1651 }
facdc750 1652
ab228c24
RS
1653 if (ch > 0400)
1654 j = ((unsigned char) ch) | 0200;
1655 else
1656 j = (unsigned char) ch;
1657
facdc750
RS
1658 if (i == infinity)
1659 stride_for_teases = BM_tab[j];
ab228c24 1660
facdc750
RS
1661 BM_tab[j] = dirlen - i;
1662 /* A translation table is accompanied by its inverse -- see */
1663 /* comment following downcase_table for details */
1664 if (this_translated)
ab228c24
RS
1665 {
1666 int starting_ch = ch;
1667 int starting_j = j;
1668 while (1)
1669 {
1670 TRANSLATE (ch, inverse_trt, ch);
1671 if (ch > 0400)
1672 j = ((unsigned char) ch) | 0200;
1673 else
1674 j = (unsigned char) ch;
1675
1676 /* For all the characters that map into CH,
1677 set up simple_translate to map the last byte
1678 into STARTING_J. */
1679 simple_translate[j] = starting_j;
1680 if (ch == starting_ch)
1681 break;
1682 BM_tab[j] = dirlen - i;
1683 }
1684 }
facdc750
RS
1685 }
1686 else
1687 {
1688 j = *ptr;
1689
1690 if (i == infinity)
1691 stride_for_teases = BM_tab[j];
1692 BM_tab[j] = dirlen - i;
ca1d1d23 1693 }
facdc750
RS
1694 /* stride_for_teases tells how much to stride if we get a */
1695 /* match on the far character but are subsequently */
1696 /* disappointed, by recording what the stride would have been */
1697 /* for that character if the last character had been */
1698 /* different. */
1699 }
1700 infinity = dirlen - infinity;
1701 pos_byte += dirlen - ((direction > 0) ? direction : 0);
1702 /* loop invariant - POS_BYTE points at where last char (first
1703 char if reverse) of pattern would align in a possible match. */
1704 while (n != 0)
1705 {
1706 int tail_end;
1707 unsigned char *tail_end_ptr;
1708
1709 /* It's been reported that some (broken) compiler thinks that
1710 Boolean expressions in an arithmetic context are unsigned.
1711 Using an explicit ?1:0 prevents this. */
1712 if ((lim_byte - pos_byte - ((direction > 0) ? 1 : 0)) * direction
1713 < 0)
1714 return (n * (0 - direction));
1715 /* First we do the part we can by pointers (maybe nothing) */
1716 QUIT;
1717 pat = base_pat;
1718 limit = pos_byte - dirlen + direction;
67ce527d
KH
1719 if (direction > 0)
1720 {
1721 limit = BUFFER_CEILING_OF (limit);
1722 /* LIMIT is now the last (not beyond-last!) value POS_BYTE
1723 can take on without hitting edge of buffer or the gap. */
1724 limit = min (limit, pos_byte + 20000);
1725 limit = min (limit, lim_byte - 1);
1726 }
1727 else
1728 {
1729 limit = BUFFER_FLOOR_OF (limit);
1730 /* LIMIT is now the last (not beyond-last!) value POS_BYTE
1731 can take on without hitting edge of buffer or the gap. */
1732 limit = max (limit, pos_byte - 20000);
1733 limit = max (limit, lim_byte);
1734 }
facdc750
RS
1735 tail_end = BUFFER_CEILING_OF (pos_byte) + 1;
1736 tail_end_ptr = BYTE_POS_ADDR (tail_end);
1737
1738 if ((limit - pos_byte) * direction > 20)
ca1d1d23 1739 {
facdc750
RS
1740 unsigned char *p2;
1741
1742 p_limit = BYTE_POS_ADDR (limit);
1743 p2 = (cursor = BYTE_POS_ADDR (pos_byte));
1744 /* In this loop, pos + cursor - p2 is the surrogate for pos */
1745 while (1) /* use one cursor setting as long as i can */
ca1d1d23 1746 {
facdc750 1747 if (direction > 0) /* worth duplicating */
ca1d1d23 1748 {
facdc750
RS
1749 /* Use signed comparison if appropriate
1750 to make cursor+infinity sure to be > p_limit.
1751 Assuming that the buffer lies in a range of addresses
1752 that are all "positive" (as ints) or all "negative",
1753 either kind of comparison will work as long
1754 as we don't step by infinity. So pick the kind
1755 that works when we do step by infinity. */
1756 if ((EMACS_INT) (p_limit + infinity) > (EMACS_INT) p_limit)
1757 while ((EMACS_INT) cursor <= (EMACS_INT) p_limit)
1758 cursor += BM_tab[*cursor];
ca1d1d23 1759 else
facdc750
RS
1760 while ((EMACS_UINT) cursor <= (EMACS_UINT) p_limit)
1761 cursor += BM_tab[*cursor];
1762 }
1763 else
1764 {
1765 if ((EMACS_INT) (p_limit + infinity) < (EMACS_INT) p_limit)
1766 while ((EMACS_INT) cursor >= (EMACS_INT) p_limit)
1767 cursor += BM_tab[*cursor];
1768 else
1769 while ((EMACS_UINT) cursor >= (EMACS_UINT) p_limit)
1770 cursor += BM_tab[*cursor];
1771 }
ca1d1d23 1772/* If you are here, cursor is beyond the end of the searched region. */
facdc750
RS
1773/* This can happen if you match on the far character of the pattern, */
1774/* because the "stride" of that character is infinity, a number able */
1775/* to throw you well beyond the end of the search. It can also */
1776/* happen if you fail to match within the permitted region and would */
1777/* otherwise try a character beyond that region */
1778 if ((cursor - p_limit) * direction <= len_byte)
1779 break; /* a small overrun is genuine */
1780 cursor -= infinity; /* large overrun = hit */
1781 i = dirlen - direction;
1782 if (! NILP (trt))
1783 {
1784 while ((i -= direction) + direction != 0)
ca1d1d23 1785 {
facdc750
RS
1786 int ch;
1787 cursor -= direction;
1788 /* Translate only the last byte of a character. */
1789 if (! multibyte
1790 || ((cursor == tail_end_ptr
1791 || CHAR_HEAD_P (cursor[1]))
1792 && (CHAR_HEAD_P (cursor[0])
1793 || (translate_prev_byte == cursor[-1]
1794 && (CHAR_HEAD_P (translate_prev_byte)
1795 || translate_anteprev_byte == cursor[-2])))))
1796 ch = simple_translate[*cursor];
1797 else
1798 ch = *cursor;
1799 if (pat[i] != ch)
1800 break;
ca1d1d23 1801 }
facdc750
RS
1802 }
1803 else
1804 {
1805 while ((i -= direction) + direction != 0)
ca1d1d23 1806 {
facdc750
RS
1807 cursor -= direction;
1808 if (pat[i] != *cursor)
1809 break;
ca1d1d23 1810 }
facdc750
RS
1811 }
1812 cursor += dirlen - i - direction; /* fix cursor */
1813 if (i + direction == 0)
1814 {
1815 int position;
0c8533c6 1816
facdc750 1817 cursor -= direction;
1113d9db 1818
facdc750
RS
1819 position = pos_byte + cursor - p2 + ((direction > 0)
1820 ? 1 - len_byte : 0);
1821 set_search_regs (position, len_byte);
ca325161 1822
facdc750
RS
1823 if ((n -= direction) != 0)
1824 cursor += dirlen; /* to resume search */
ca1d1d23 1825 else
facdc750
RS
1826 return ((direction > 0)
1827 ? search_regs.end[0] : search_regs.start[0]);
ca1d1d23 1828 }
facdc750
RS
1829 else
1830 cursor += stride_for_teases; /* <sigh> we lose - */
ca1d1d23 1831 }
facdc750
RS
1832 pos_byte += cursor - p2;
1833 }
1834 else
1835 /* Now we'll pick up a clump that has to be done the hard */
1836 /* way because it covers a discontinuity */
1837 {
1838 limit = ((direction > 0)
1839 ? BUFFER_CEILING_OF (pos_byte - dirlen + 1)
1840 : BUFFER_FLOOR_OF (pos_byte - dirlen - 1));
1841 limit = ((direction > 0)
1842 ? min (limit + len_byte, lim_byte - 1)
1843 : max (limit - len_byte, lim_byte));
1844 /* LIMIT is now the last value POS_BYTE can have
1845 and still be valid for a possible match. */
1846 while (1)
ca1d1d23 1847 {
facdc750
RS
1848 /* This loop can be coded for space rather than */
1849 /* speed because it will usually run only once. */
1850 /* (the reach is at most len + 21, and typically */
1851 /* does not exceed len) */
1852 while ((limit - pos_byte) * direction >= 0)
1853 pos_byte += BM_tab[FETCH_BYTE (pos_byte)];
1854 /* now run the same tests to distinguish going off the */
1855 /* end, a match or a phony match. */
1856 if ((pos_byte - limit) * direction <= len_byte)
1857 break; /* ran off the end */
1858 /* Found what might be a match.
1859 Set POS_BYTE back to last (first if reverse) pos. */
1860 pos_byte -= infinity;
1861 i = dirlen - direction;
1862 while ((i -= direction) + direction != 0)
ca1d1d23 1863 {
facdc750
RS
1864 int ch;
1865 unsigned char *ptr;
1866 pos_byte -= direction;
1867 ptr = BYTE_POS_ADDR (pos_byte);
1868 /* Translate only the last byte of a character. */
1869 if (! multibyte
1870 || ((ptr == tail_end_ptr
1871 || CHAR_HEAD_P (ptr[1]))
1872 && (CHAR_HEAD_P (ptr[0])
1873 || (translate_prev_byte == ptr[-1]
1874 && (CHAR_HEAD_P (translate_prev_byte)
1875 || translate_anteprev_byte == ptr[-2])))))
1876 ch = simple_translate[*ptr];
1877 else
1878 ch = *ptr;
1879 if (pat[i] != ch)
1880 break;
1881 }
1882 /* Above loop has moved POS_BYTE part or all the way
1883 back to the first pos (last pos if reverse).
1884 Set it once again at the last (first if reverse) char. */
1885 pos_byte += dirlen - i- direction;
1886 if (i + direction == 0)
1887 {
1888 int position;
1889 pos_byte -= direction;
1113d9db 1890
facdc750 1891 position = pos_byte + ((direction > 0) ? 1 - len_byte : 0);
0c8533c6 1892
facdc750 1893 set_search_regs (position, len_byte);
ca325161 1894
facdc750
RS
1895 if ((n -= direction) != 0)
1896 pos_byte += dirlen; /* to resume search */
ca1d1d23 1897 else
facdc750
RS
1898 return ((direction > 0)
1899 ? search_regs.end[0] : search_regs.start[0]);
ca1d1d23 1900 }
facdc750
RS
1901 else
1902 pos_byte += stride_for_teases;
1903 }
1904 }
1905 /* We have done one clump. Can we continue? */
1906 if ((lim_byte - pos_byte) * direction < 0)
1907 return ((0 - n) * direction);
ca1d1d23 1908 }
facdc750 1909 return BYTE_TO_CHAR (pos_byte);
ca1d1d23 1910}
ca325161 1911
fa8ed3e0 1912/* Record beginning BEG_BYTE and end BEG_BYTE + NBYTES
a7e4cdde
RS
1913 for the overall match just found in the current buffer.
1914 Also clear out the match data for registers 1 and up. */
ca325161
RS
1915
1916static void
fa8ed3e0
RS
1917set_search_regs (beg_byte, nbytes)
1918 int beg_byte, nbytes;
ca325161 1919{
a7e4cdde
RS
1920 int i;
1921
ca325161
RS
1922 /* Make sure we have registers in which to store
1923 the match position. */
1924 if (search_regs.num_regs == 0)
1925 {
2d4a771a
RS
1926 search_regs.start = (regoff_t *) xmalloc (2 * sizeof (regoff_t));
1927 search_regs.end = (regoff_t *) xmalloc (2 * sizeof (regoff_t));
487282dc 1928 search_regs.num_regs = 2;
ca325161
RS
1929 }
1930
a7e4cdde
RS
1931 /* Clear out the other registers. */
1932 for (i = 1; i < search_regs.num_regs; i++)
1933 {
1934 search_regs.start[i] = -1;
1935 search_regs.end[i] = -1;
1936 }
1937
fa8ed3e0
RS
1938 search_regs.start[0] = BYTE_TO_CHAR (beg_byte);
1939 search_regs.end[0] = BYTE_TO_CHAR (beg_byte + nbytes);
a3668d92 1940 XSETBUFFER (last_thing_searched, current_buffer);
ca325161 1941}
ca1d1d23
JB
1942\f
1943/* Given a string of words separated by word delimiters,
1944 compute a regexp that matches those exact words
1945 separated by arbitrary punctuation. */
1946
1947static Lisp_Object
1948wordify (string)
1949 Lisp_Object string;
1950{
1951 register unsigned char *p, *o;
0c8533c6 1952 register int i, i_byte, len, punct_count = 0, word_count = 0;
ca1d1d23 1953 Lisp_Object val;
0c8533c6
RS
1954 int prev_c = 0;
1955 int adjust;
ca1d1d23
JB
1956
1957 CHECK_STRING (string, 0);
1958 p = XSTRING (string)->data;
1959 len = XSTRING (string)->size;
1960
0c8533c6
RS
1961 for (i = 0, i_byte = 0; i < len; )
1962 {
1963 int c;
1964
1965 if (STRING_MULTIBYTE (string))
1966 FETCH_STRING_CHAR_ADVANCE (c, string, i, i_byte);
1967 else
1968 c = XSTRING (string)->data[i++];
1969
1970 if (SYNTAX (c) != Sword)
1971 {
1972 punct_count++;
1973 if (i > 0 && SYNTAX (prev_c) == Sword)
1974 word_count++;
1975 }
ca1d1d23 1976
0c8533c6
RS
1977 prev_c = c;
1978 }
1979
1980 if (SYNTAX (prev_c) == Sword)
1981 word_count++;
1982 if (!word_count)
1983 return build_string ("");
1984
1985 adjust = - punct_count + 5 * (word_count - 1) + 4;
8a2df937
RS
1986 if (STRING_MULTIBYTE (string))
1987 val = make_uninit_multibyte_string (len + adjust,
1988 STRING_BYTES (XSTRING (string))
1989 + adjust);
1990 else
1991 val = make_uninit_string (len + adjust);
ca1d1d23
JB
1992
1993 o = XSTRING (val)->data;
1994 *o++ = '\\';
1995 *o++ = 'b';
1e9582d4 1996 prev_c = 0;
ca1d1d23 1997
1e9582d4
RS
1998 for (i = 0, i_byte = 0; i < len; )
1999 {
2000 int c;
2001 int i_byte_orig = i_byte;
2002
2003 if (STRING_MULTIBYTE (string))
2004 FETCH_STRING_CHAR_ADVANCE (c, string, i, i_byte);
2005 else
8a2df937
RS
2006 {
2007 c = XSTRING (string)->data[i++];
2008 i_byte++;
2009 }
1e9582d4
RS
2010
2011 if (SYNTAX (c) == Sword)
2012 {
2013 bcopy (&XSTRING (string)->data[i_byte_orig], o,
2014 i_byte - i_byte_orig);
2015 o += i_byte - i_byte_orig;
2016 }
2017 else if (i > 0 && SYNTAX (prev_c) == Sword && --word_count)
2018 {
2019 *o++ = '\\';
2020 *o++ = 'W';
2021 *o++ = '\\';
2022 *o++ = 'W';
2023 *o++ = '*';
2024 }
2025
2026 prev_c = c;
2027 }
ca1d1d23
JB
2028
2029 *o++ = '\\';
2030 *o++ = 'b';
2031
2032 return val;
2033}
2034\f
2035DEFUN ("search-backward", Fsearch_backward, Ssearch_backward, 1, 4,
6af43974 2036 "MSearch backward: ",
ca1d1d23
JB
2037 "Search backward from point for STRING.\n\
2038Set point to the beginning of the occurrence found, and return point.\n\
2039An optional second argument bounds the search; it is a buffer position.\n\
2040The match found must not extend before that position.\n\
2041Optional third argument, if t, means if fail just return nil (no error).\n\
2042 If not nil and not t, position at limit of search and return nil.\n\
2043Optional fourth argument is repeat count--search for successive occurrences.\n\
2044See also the functions `match-beginning', `match-end' and `replace-match'.")
2045 (string, bound, noerror, count)
2046 Lisp_Object string, bound, noerror, count;
2047{
b819a390 2048 return search_command (string, bound, noerror, count, -1, 0, 0);
ca1d1d23
JB
2049}
2050
6af43974 2051DEFUN ("search-forward", Fsearch_forward, Ssearch_forward, 1, 4, "MSearch: ",
ca1d1d23
JB
2052 "Search forward from point for STRING.\n\
2053Set point to the end of the occurrence found, and return point.\n\
2054An optional second argument bounds the search; it is a buffer position.\n\
2055The match found must not extend after that position. nil is equivalent\n\
2056 to (point-max).\n\
2057Optional third argument, if t, means if fail just return nil (no error).\n\
2058 If not nil and not t, move to limit of search and return nil.\n\
2059Optional fourth argument is repeat count--search for successive occurrences.\n\
2060See also the functions `match-beginning', `match-end' and `replace-match'.")
2061 (string, bound, noerror, count)
2062 Lisp_Object string, bound, noerror, count;
2063{
b819a390 2064 return search_command (string, bound, noerror, count, 1, 0, 0);
ca1d1d23
JB
2065}
2066
2067DEFUN ("word-search-backward", Fword_search_backward, Sword_search_backward, 1, 4,
2068 "sWord search backward: ",
2069 "Search backward from point for STRING, ignoring differences in punctuation.\n\
2070Set point to the beginning of the occurrence found, and return point.\n\
2071An optional second argument bounds the search; it is a buffer position.\n\
2072The match found must not extend before that position.\n\
2073Optional third argument, if t, means if fail just return nil (no error).\n\
2074 If not nil and not t, move to limit of search and return nil.\n\
2075Optional fourth argument is repeat count--search for successive occurrences.")
2076 (string, bound, noerror, count)
2077 Lisp_Object string, bound, noerror, count;
2078{
b819a390 2079 return search_command (wordify (string), bound, noerror, count, -1, 1, 0);
ca1d1d23
JB
2080}
2081
2082DEFUN ("word-search-forward", Fword_search_forward, Sword_search_forward, 1, 4,
2083 "sWord search: ",
2084 "Search forward from point for STRING, ignoring differences in punctuation.\n\
2085Set point to the end of the occurrence found, and return point.\n\
2086An optional second argument bounds the search; it is a buffer position.\n\
2087The match found must not extend after that position.\n\
2088Optional third argument, if t, means if fail just return nil (no error).\n\
2089 If not nil and not t, move to limit of search and return nil.\n\
2090Optional fourth argument is repeat count--search for successive occurrences.")
2091 (string, bound, noerror, count)
2092 Lisp_Object string, bound, noerror, count;
2093{
b819a390 2094 return search_command (wordify (string), bound, noerror, count, 1, 1, 0);
ca1d1d23
JB
2095}
2096
2097DEFUN ("re-search-backward", Fre_search_backward, Sre_search_backward, 1, 4,
2098 "sRE search backward: ",
2099 "Search backward from point for match for regular expression REGEXP.\n\
2100Set point to the beginning of the match, and return point.\n\
2101The match found is the one starting last in the buffer\n\
19c0a730 2102and yet ending before the origin of the search.\n\
ca1d1d23
JB
2103An optional second argument bounds the search; it is a buffer position.\n\
2104The match found must start at or after that position.\n\
2105Optional third argument, if t, means if fail just return nil (no error).\n\
2106 If not nil and not t, move to limit of search and return nil.\n\
2107Optional fourth argument is repeat count--search for successive occurrences.\n\
2108See also the functions `match-beginning', `match-end' and `replace-match'.")
19c0a730
KH
2109 (regexp, bound, noerror, count)
2110 Lisp_Object regexp, bound, noerror, count;
ca1d1d23 2111{
b819a390 2112 return search_command (regexp, bound, noerror, count, -1, 1, 0);
ca1d1d23
JB
2113}
2114
2115DEFUN ("re-search-forward", Fre_search_forward, Sre_search_forward, 1, 4,
2116 "sRE search: ",
2117 "Search forward from point for regular expression REGEXP.\n\
2118Set point to the end of the occurrence found, and return point.\n\
2119An optional second argument bounds the search; it is a buffer position.\n\
2120The match found must not extend after that position.\n\
2121Optional third argument, if t, means if fail just return nil (no error).\n\
2122 If not nil and not t, move to limit of search and return nil.\n\
2123Optional fourth argument is repeat count--search for successive occurrences.\n\
2124See also the functions `match-beginning', `match-end' and `replace-match'.")
19c0a730
KH
2125 (regexp, bound, noerror, count)
2126 Lisp_Object regexp, bound, noerror, count;
ca1d1d23 2127{
b819a390
RS
2128 return search_command (regexp, bound, noerror, count, 1, 1, 0);
2129}
2130
2131DEFUN ("posix-search-backward", Fposix_search_backward, Sposix_search_backward, 1, 4,
2132 "sPosix search backward: ",
2133 "Search backward from point for match for regular expression REGEXP.\n\
2134Find the longest match in accord with Posix regular expression rules.\n\
2135Set point to the beginning of the match, and return point.\n\
2136The match found is the one starting last in the buffer\n\
2137and yet ending before the origin of the search.\n\
2138An optional second argument bounds the search; it is a buffer position.\n\
2139The match found must start at or after that position.\n\
2140Optional third argument, if t, means if fail just return nil (no error).\n\
2141 If not nil and not t, move to limit of search and return nil.\n\
2142Optional fourth argument is repeat count--search for successive occurrences.\n\
2143See also the functions `match-beginning', `match-end' and `replace-match'.")
2144 (regexp, bound, noerror, count)
2145 Lisp_Object regexp, bound, noerror, count;
2146{
2147 return search_command (regexp, bound, noerror, count, -1, 1, 1);
2148}
2149
2150DEFUN ("posix-search-forward", Fposix_search_forward, Sposix_search_forward, 1, 4,
2151 "sPosix search: ",
2152 "Search forward from point for regular expression REGEXP.\n\
2153Find the longest match in accord with Posix regular expression rules.\n\
2154Set point to the end of the occurrence found, and return point.\n\
2155An optional second argument bounds the search; it is a buffer position.\n\
2156The match found must not extend after that position.\n\
2157Optional third argument, if t, means if fail just return nil (no error).\n\
2158 If not nil and not t, move to limit of search and return nil.\n\
2159Optional fourth argument is repeat count--search for successive occurrences.\n\
2160See also the functions `match-beginning', `match-end' and `replace-match'.")
2161 (regexp, bound, noerror, count)
2162 Lisp_Object regexp, bound, noerror, count;
2163{
2164 return search_command (regexp, bound, noerror, count, 1, 1, 1);
ca1d1d23
JB
2165}
2166\f
d7a5ad5f 2167DEFUN ("replace-match", Freplace_match, Sreplace_match, 1, 5, 0,
ca1d1d23
JB
2168 "Replace text matched by last search with NEWTEXT.\n\
2169If second arg FIXEDCASE is non-nil, do not alter case of replacement text.\n\
5b9cf4b2
RS
2170Otherwise maybe capitalize the whole text, or maybe just word initials,\n\
2171based on the replaced text.\n\
2172If the replaced text has only capital letters\n\
2173and has at least one multiletter word, convert NEWTEXT to all caps.\n\
2174If the replaced text has at least one word starting with a capital letter,\n\
2175then capitalize each word in NEWTEXT.\n\n\
ca1d1d23
JB
2176If third arg LITERAL is non-nil, insert NEWTEXT literally.\n\
2177Otherwise treat `\\' as special:\n\
2178 `\\&' in NEWTEXT means substitute original matched text.\n\
2179 `\\N' means substitute what matched the Nth `\\(...\\)'.\n\
2180 If Nth parens didn't match, substitute nothing.\n\
2181 `\\\\' means insert one `\\'.\n\
1113d9db 2182FIXEDCASE and LITERAL are optional arguments.\n\
080c45fd
RS
2183Leaves point at end of replacement text.\n\
2184\n\
2185The optional fourth argument STRING can be a string to modify.\n\
2186In that case, this function creates and returns a new string\n\
d7a5ad5f
RS
2187which is made by replacing the part of STRING that was matched.\n\
2188\n\
2189The optional fifth argument SUBEXP specifies a subexpression of the match.\n\
2190It says to replace just that subexpression instead of the whole match.\n\
2191This is useful only after a regular expression search or match\n\
2192since only regular expressions have distinguished subexpressions.")
2193 (newtext, fixedcase, literal, string, subexp)
2194 Lisp_Object newtext, fixedcase, literal, string, subexp;
ca1d1d23
JB
2195{
2196 enum { nochange, all_caps, cap_initial } case_action;
ac3b28b1 2197 register int pos, pos_byte;
ca1d1d23 2198 int some_multiletter_word;
97832bd0 2199 int some_lowercase;
73dc8771 2200 int some_uppercase;
208767c3 2201 int some_nonuppercase_initial;
ca1d1d23
JB
2202 register int c, prevc;
2203 int inslen;
d7a5ad5f 2204 int sub;
3e18eecf 2205 int opoint, newpoint;
ca1d1d23 2206
16fdc568 2207 CHECK_STRING (newtext, 0);
ca1d1d23 2208
080c45fd
RS
2209 if (! NILP (string))
2210 CHECK_STRING (string, 4);
2211
ca1d1d23
JB
2212 case_action = nochange; /* We tried an initialization */
2213 /* but some C compilers blew it */
4746118a
JB
2214
2215 if (search_regs.num_regs <= 0)
2216 error ("replace-match called before any match found");
2217
d7a5ad5f
RS
2218 if (NILP (subexp))
2219 sub = 0;
2220 else
2221 {
2222 CHECK_NUMBER (subexp, 3);
2223 sub = XINT (subexp);
2224 if (sub < 0 || sub >= search_regs.num_regs)
2225 args_out_of_range (subexp, make_number (search_regs.num_regs));
2226 }
2227
080c45fd
RS
2228 if (NILP (string))
2229 {
d7a5ad5f
RS
2230 if (search_regs.start[sub] < BEGV
2231 || search_regs.start[sub] > search_regs.end[sub]
2232 || search_regs.end[sub] > ZV)
2233 args_out_of_range (make_number (search_regs.start[sub]),
2234 make_number (search_regs.end[sub]));
080c45fd
RS
2235 }
2236 else
2237 {
d7a5ad5f
RS
2238 if (search_regs.start[sub] < 0
2239 || search_regs.start[sub] > search_regs.end[sub]
2240 || search_regs.end[sub] > XSTRING (string)->size)
2241 args_out_of_range (make_number (search_regs.start[sub]),
2242 make_number (search_regs.end[sub]));
080c45fd 2243 }
ca1d1d23
JB
2244
2245 if (NILP (fixedcase))
2246 {
2247 /* Decide how to casify by examining the matched text. */
ac3b28b1 2248 int last;
ca1d1d23 2249
ac3b28b1
KH
2250 pos = search_regs.start[sub];
2251 last = search_regs.end[sub];
fa8ed3e0
RS
2252
2253 if (NILP (string))
ac3b28b1 2254 pos_byte = CHAR_TO_BYTE (pos);
fa8ed3e0 2255 else
ac3b28b1 2256 pos_byte = string_char_to_byte (string, pos);
fa8ed3e0 2257
ca1d1d23
JB
2258 prevc = '\n';
2259 case_action = all_caps;
2260
2261 /* some_multiletter_word is set nonzero if any original word
2262 is more than one letter long. */
2263 some_multiletter_word = 0;
97832bd0 2264 some_lowercase = 0;
208767c3 2265 some_nonuppercase_initial = 0;
73dc8771 2266 some_uppercase = 0;
ca1d1d23 2267
ac3b28b1 2268 while (pos < last)
ca1d1d23 2269 {
080c45fd 2270 if (NILP (string))
ac3b28b1
KH
2271 {
2272 c = FETCH_CHAR (pos_byte);
2273 INC_BOTH (pos, pos_byte);
2274 }
080c45fd 2275 else
ac3b28b1 2276 FETCH_STRING_CHAR_ADVANCE (c, string, pos, pos_byte);
080c45fd 2277
ca1d1d23
JB
2278 if (LOWERCASEP (c))
2279 {
2280 /* Cannot be all caps if any original char is lower case */
2281
97832bd0 2282 some_lowercase = 1;
ca1d1d23 2283 if (SYNTAX (prevc) != Sword)
208767c3 2284 some_nonuppercase_initial = 1;
ca1d1d23
JB
2285 else
2286 some_multiletter_word = 1;
2287 }
2288 else if (!NOCASEP (c))
2289 {
73dc8771 2290 some_uppercase = 1;
97832bd0 2291 if (SYNTAX (prevc) != Sword)
c4d460ce 2292 ;
97832bd0 2293 else
ca1d1d23
JB
2294 some_multiletter_word = 1;
2295 }
208767c3
RS
2296 else
2297 {
2298 /* If the initial is a caseless word constituent,
2299 treat that like a lowercase initial. */
2300 if (SYNTAX (prevc) != Sword)
2301 some_nonuppercase_initial = 1;
2302 }
ca1d1d23
JB
2303
2304 prevc = c;
2305 }
2306
97832bd0
RS
2307 /* Convert to all caps if the old text is all caps
2308 and has at least one multiletter word. */
2309 if (! some_lowercase && some_multiletter_word)
2310 case_action = all_caps;
c4d460ce 2311 /* Capitalize each word, if the old text has all capitalized words. */
208767c3 2312 else if (!some_nonuppercase_initial && some_multiletter_word)
ca1d1d23 2313 case_action = cap_initial;
208767c3 2314 else if (!some_nonuppercase_initial && some_uppercase)
73dc8771
KH
2315 /* Should x -> yz, operating on X, give Yz or YZ?
2316 We'll assume the latter. */
2317 case_action = all_caps;
97832bd0
RS
2318 else
2319 case_action = nochange;
ca1d1d23
JB
2320 }
2321
080c45fd
RS
2322 /* Do replacement in a string. */
2323 if (!NILP (string))
2324 {
2325 Lisp_Object before, after;
2326
2327 before = Fsubstring (string, make_number (0),
d7a5ad5f
RS
2328 make_number (search_regs.start[sub]));
2329 after = Fsubstring (string, make_number (search_regs.end[sub]), Qnil);
080c45fd 2330
636a5e28
RS
2331 /* Substitute parts of the match into NEWTEXT
2332 if desired. */
080c45fd
RS
2333 if (NILP (literal))
2334 {
d131e79c
RS
2335 int lastpos = 0;
2336 int lastpos_byte = 0;
080c45fd
RS
2337 /* We build up the substituted string in ACCUM. */
2338 Lisp_Object accum;
2339 Lisp_Object middle;
ac3b28b1 2340 int length = STRING_BYTES (XSTRING (newtext));
080c45fd
RS
2341
2342 accum = Qnil;
2343
ac3b28b1 2344 for (pos_byte = 0, pos = 0; pos_byte < length;)
080c45fd
RS
2345 {
2346 int substart = -1;
2347 int subend;
1e79ec24 2348 int delbackslash = 0;
080c45fd 2349
0c8533c6
RS
2350 FETCH_STRING_CHAR_ADVANCE (c, newtext, pos, pos_byte);
2351
080c45fd
RS
2352 if (c == '\\')
2353 {
0c8533c6 2354 FETCH_STRING_CHAR_ADVANCE (c, newtext, pos, pos_byte);
080c45fd
RS
2355 if (c == '&')
2356 {
d7a5ad5f
RS
2357 substart = search_regs.start[sub];
2358 subend = search_regs.end[sub];
080c45fd
RS
2359 }
2360 else if (c >= '1' && c <= '9' && c <= search_regs.num_regs + '0')
2361 {
ad10348f 2362 if (search_regs.start[c - '0'] >= 0)
080c45fd
RS
2363 {
2364 substart = search_regs.start[c - '0'];
2365 subend = search_regs.end[c - '0'];
2366 }
2367 }
1e79ec24
KH
2368 else if (c == '\\')
2369 delbackslash = 1;
636a5e28
RS
2370 else
2371 error ("Invalid use of `\\' in replacement text");
080c45fd
RS
2372 }
2373 if (substart >= 0)
2374 {
d131e79c
RS
2375 if (pos - 2 != lastpos)
2376 middle = substring_both (newtext, lastpos,
2377 lastpos_byte,
2378 pos - 2, pos_byte - 2);
080c45fd
RS
2379 else
2380 middle = Qnil;
2381 accum = concat3 (accum, middle,
0c8533c6
RS
2382 Fsubstring (string,
2383 make_number (substart),
080c45fd
RS
2384 make_number (subend)));
2385 lastpos = pos;
0c8533c6 2386 lastpos_byte = pos_byte;
080c45fd 2387 }
1e79ec24
KH
2388 else if (delbackslash)
2389 {
d131e79c
RS
2390 middle = substring_both (newtext, lastpos,
2391 lastpos_byte,
2392 pos - 1, pos_byte - 1);
0c8533c6 2393
1e79ec24
KH
2394 accum = concat2 (accum, middle);
2395 lastpos = pos;
0c8533c6 2396 lastpos_byte = pos_byte;
1e79ec24 2397 }
080c45fd
RS
2398 }
2399
d131e79c
RS
2400 if (pos != lastpos)
2401 middle = substring_both (newtext, lastpos,
2402 lastpos_byte,
0c8533c6 2403 pos, pos_byte);
080c45fd
RS
2404 else
2405 middle = Qnil;
2406
2407 newtext = concat2 (accum, middle);
2408 }
2409
636a5e28 2410 /* Do case substitution in NEWTEXT if desired. */
080c45fd
RS
2411 if (case_action == all_caps)
2412 newtext = Fupcase (newtext);
2413 else if (case_action == cap_initial)
2b2eead9 2414 newtext = Fupcase_initials (newtext);
080c45fd
RS
2415
2416 return concat3 (before, newtext, after);
2417 }
2418
b0eba991
RS
2419 /* Record point, the move (quietly) to the start of the match. */
2420 if (PT > search_regs.start[sub])
2421 opoint = PT - ZV;
2422 else
2423 opoint = PT;
2424
fa8ed3e0 2425 TEMP_SET_PT (search_regs.start[sub]);
b0eba991 2426
9a76659d
JB
2427 /* We insert the replacement text before the old text, and then
2428 delete the original text. This means that markers at the
2429 beginning or end of the original will float to the corresponding
2430 position in the replacement. */
ca1d1d23 2431 if (!NILP (literal))
16fdc568 2432 Finsert_and_inherit (1, &newtext);
ca1d1d23
JB
2433 else
2434 {
2435 struct gcpro gcpro1;
ac3b28b1
KH
2436 int length = STRING_BYTES (XSTRING (newtext));
2437
16fdc568 2438 GCPRO1 (newtext);
ca1d1d23 2439
ac3b28b1 2440 for (pos_byte = 0, pos = 0; pos_byte < length;)
ca1d1d23 2441 {
6ec8bbd2 2442 int offset = PT - search_regs.start[sub];
9a76659d 2443
ac3b28b1
KH
2444 FETCH_STRING_CHAR_ADVANCE (c, newtext, pos, pos_byte);
2445
ca1d1d23
JB
2446 if (c == '\\')
2447 {
ac3b28b1 2448 FETCH_STRING_CHAR_ADVANCE (c, newtext, pos, pos_byte);
ca1d1d23 2449 if (c == '&')
9a76659d
JB
2450 Finsert_buffer_substring
2451 (Fcurrent_buffer (),
d7a5ad5f
RS
2452 make_number (search_regs.start[sub] + offset),
2453 make_number (search_regs.end[sub] + offset));
78445046 2454 else if (c >= '1' && c <= '9' && c <= search_regs.num_regs + '0')
ca1d1d23
JB
2455 {
2456 if (search_regs.start[c - '0'] >= 1)
9a76659d
JB
2457 Finsert_buffer_substring
2458 (Fcurrent_buffer (),
2459 make_number (search_regs.start[c - '0'] + offset),
2460 make_number (search_regs.end[c - '0'] + offset));
ca1d1d23 2461 }
636a5e28 2462 else if (c == '\\')
ca1d1d23 2463 insert_char (c);
636a5e28
RS
2464 else
2465 error ("Invalid use of `\\' in replacement text");
ca1d1d23
JB
2466 }
2467 else
2468 insert_char (c);
2469 }
2470 UNGCPRO;
2471 }
2472
6ec8bbd2 2473 inslen = PT - (search_regs.start[sub]);
d7a5ad5f 2474 del_range (search_regs.start[sub] + inslen, search_regs.end[sub] + inslen);
ca1d1d23
JB
2475
2476 if (case_action == all_caps)
6ec8bbd2 2477 Fupcase_region (make_number (PT - inslen), make_number (PT));
ca1d1d23 2478 else if (case_action == cap_initial)
6ec8bbd2 2479 Fupcase_initials_region (make_number (PT - inslen), make_number (PT));
b0eba991 2480
3e18eecf
RS
2481 newpoint = PT;
2482
b0eba991 2483 /* Put point back where it was in the text. */
8d808a65 2484 if (opoint <= 0)
fa8ed3e0 2485 TEMP_SET_PT (opoint + ZV);
b0eba991 2486 else
fa8ed3e0 2487 TEMP_SET_PT (opoint);
b0eba991
RS
2488
2489 /* Now move point "officially" to the start of the inserted replacement. */
3e18eecf 2490 move_if_not_intangible (newpoint);
b0eba991 2491
ca1d1d23
JB
2492 return Qnil;
2493}
2494\f
2495static Lisp_Object
2496match_limit (num, beginningp)
2497 Lisp_Object num;
2498 int beginningp;
2499{
2500 register int n;
2501
2502 CHECK_NUMBER (num, 0);
2503 n = XINT (num);
4746118a
JB
2504 if (n < 0 || n >= search_regs.num_regs)
2505 args_out_of_range (num, make_number (search_regs.num_regs));
2506 if (search_regs.num_regs <= 0
2507 || search_regs.start[n] < 0)
ca1d1d23
JB
2508 return Qnil;
2509 return (make_number ((beginningp) ? search_regs.start[n]
2510 : search_regs.end[n]));
2511}
2512
2513DEFUN ("match-beginning", Fmatch_beginning, Smatch_beginning, 1, 1, 0,
2514 "Return position of start of text matched by last search.\n\
5806161b
EN
2515SUBEXP, a number, specifies which parenthesized expression in the last\n\
2516 regexp.\n\
2517Value is nil if SUBEXPth pair didn't match, or there were less than\n\
2518 SUBEXP pairs.\n\
ca1d1d23 2519Zero means the entire text matched by the whole regexp or whole string.")
5806161b
EN
2520 (subexp)
2521 Lisp_Object subexp;
ca1d1d23 2522{
5806161b 2523 return match_limit (subexp, 1);
ca1d1d23
JB
2524}
2525
2526DEFUN ("match-end", Fmatch_end, Smatch_end, 1, 1, 0,
2527 "Return position of end of text matched by last search.\n\
5806161b
EN
2528SUBEXP, a number, specifies which parenthesized expression in the last\n\
2529 regexp.\n\
2530Value is nil if SUBEXPth pair didn't match, or there were less than\n\
2531 SUBEXP pairs.\n\
ca1d1d23 2532Zero means the entire text matched by the whole regexp or whole string.")
5806161b
EN
2533 (subexp)
2534 Lisp_Object subexp;
ca1d1d23 2535{
5806161b 2536 return match_limit (subexp, 0);
ca1d1d23
JB
2537}
2538
56256c2a 2539DEFUN ("match-data", Fmatch_data, Smatch_data, 0, 2, 0,
ca1d1d23
JB
2540 "Return a list containing all info on what the last search matched.\n\
2541Element 2N is `(match-beginning N)'; element 2N + 1 is `(match-end N)'.\n\
2542All the elements are markers or nil (nil if the Nth pair didn't match)\n\
2543if the last match was on a buffer; integers or nil if a string was matched.\n\
56256c2a
RS
2544Use `store-match-data' to reinstate the data in this list.\n\
2545\n\
2546If INTEGERS (the optional first argument) is non-nil, always use integers\n\
8ca821e9 2547\(rather than markers) to represent buffer positions.\n\
56256c2a
RS
2548If REUSE is a list, reuse it as part of the value. If REUSE is long enough\n\
2549to hold all the values, and if INTEGERS is non-nil, no consing is done.")
2550 (integers, reuse)
2551 Lisp_Object integers, reuse;
ca1d1d23 2552{
56256c2a 2553 Lisp_Object tail, prev;
4746118a 2554 Lisp_Object *data;
ca1d1d23
JB
2555 int i, len;
2556
daa37602 2557 if (NILP (last_thing_searched))
c36bcf1b 2558 return Qnil;
daa37602 2559
4746118a
JB
2560 data = (Lisp_Object *) alloca ((2 * search_regs.num_regs)
2561 * sizeof (Lisp_Object));
2562
ca1d1d23 2563 len = -1;
4746118a 2564 for (i = 0; i < search_regs.num_regs; i++)
ca1d1d23
JB
2565 {
2566 int start = search_regs.start[i];
2567 if (start >= 0)
2568 {
56256c2a
RS
2569 if (EQ (last_thing_searched, Qt)
2570 || ! NILP (integers))
ca1d1d23 2571 {
c235cce7
KH
2572 XSETFASTINT (data[2 * i], start);
2573 XSETFASTINT (data[2 * i + 1], search_regs.end[i]);
ca1d1d23 2574 }
0ed62dc7 2575 else if (BUFFERP (last_thing_searched))
ca1d1d23
JB
2576 {
2577 data[2 * i] = Fmake_marker ();
daa37602
JB
2578 Fset_marker (data[2 * i],
2579 make_number (start),
2580 last_thing_searched);
ca1d1d23
JB
2581 data[2 * i + 1] = Fmake_marker ();
2582 Fset_marker (data[2 * i + 1],
daa37602
JB
2583 make_number (search_regs.end[i]),
2584 last_thing_searched);
ca1d1d23 2585 }
daa37602
JB
2586 else
2587 /* last_thing_searched must always be Qt, a buffer, or Qnil. */
2588 abort ();
2589
ca1d1d23
JB
2590 len = i;
2591 }
2592 else
2593 data[2 * i] = data [2 * i + 1] = Qnil;
2594 }
56256c2a
RS
2595
2596 /* If REUSE is not usable, cons up the values and return them. */
2597 if (! CONSP (reuse))
2598 return Flist (2 * len + 2, data);
2599
2600 /* If REUSE is a list, store as many value elements as will fit
2601 into the elements of REUSE. */
2602 for (i = 0, tail = reuse; CONSP (tail);
2603 i++, tail = XCONS (tail)->cdr)
2604 {
2605 if (i < 2 * len + 2)
2606 XCONS (tail)->car = data[i];
2607 else
2608 XCONS (tail)->car = Qnil;
2609 prev = tail;
2610 }
2611
2612 /* If we couldn't fit all value elements into REUSE,
2613 cons up the rest of them and add them to the end of REUSE. */
2614 if (i < 2 * len + 2)
2615 XCONS (prev)->cdr = Flist (2 * len + 2 - i, data + i);
2616
2617 return reuse;
ca1d1d23
JB
2618}
2619
2620
3f1c005b 2621DEFUN ("set-match-data", Fset_match_data, Sset_match_data, 1, 1, 0,
ca1d1d23
JB
2622 "Set internal data on last search match from elements of LIST.\n\
2623LIST should have been created by calling `match-data' previously.")
2624 (list)
2625 register Lisp_Object list;
2626{
2627 register int i;
2628 register Lisp_Object marker;
2629
7074fde6
FP
2630 if (running_asynch_code)
2631 save_search_regs ();
2632
ca1d1d23 2633 if (!CONSP (list) && !NILP (list))
b37902c8 2634 list = wrong_type_argument (Qconsp, list);
ca1d1d23 2635
daa37602
JB
2636 /* Unless we find a marker with a buffer in LIST, assume that this
2637 match data came from a string. */
2638 last_thing_searched = Qt;
2639
4746118a
JB
2640 /* Allocate registers if they don't already exist. */
2641 {
d084e942 2642 int length = XFASTINT (Flength (list)) / 2;
4746118a
JB
2643
2644 if (length > search_regs.num_regs)
2645 {
1113d9db
JB
2646 if (search_regs.num_regs == 0)
2647 {
2648 search_regs.start
2649 = (regoff_t *) xmalloc (length * sizeof (regoff_t));
2650 search_regs.end
2651 = (regoff_t *) xmalloc (length * sizeof (regoff_t));
2652 }
4746118a 2653 else
1113d9db
JB
2654 {
2655 search_regs.start
2656 = (regoff_t *) xrealloc (search_regs.start,
2657 length * sizeof (regoff_t));
2658 search_regs.end
2659 = (regoff_t *) xrealloc (search_regs.end,
2660 length * sizeof (regoff_t));
2661 }
4746118a 2662
487282dc 2663 search_regs.num_regs = length;
4746118a
JB
2664 }
2665 }
2666
2667 for (i = 0; i < search_regs.num_regs; i++)
ca1d1d23
JB
2668 {
2669 marker = Fcar (list);
2670 if (NILP (marker))
2671 {
2672 search_regs.start[i] = -1;
2673 list = Fcdr (list);
2674 }
2675 else
2676 {
0ed62dc7 2677 if (MARKERP (marker))
daa37602
JB
2678 {
2679 if (XMARKER (marker)->buffer == 0)
c235cce7 2680 XSETFASTINT (marker, 0);
daa37602 2681 else
a3668d92 2682 XSETBUFFER (last_thing_searched, XMARKER (marker)->buffer);
daa37602 2683 }
ca1d1d23
JB
2684
2685 CHECK_NUMBER_COERCE_MARKER (marker, 0);
2686 search_regs.start[i] = XINT (marker);
2687 list = Fcdr (list);
2688
2689 marker = Fcar (list);
0ed62dc7 2690 if (MARKERP (marker) && XMARKER (marker)->buffer == 0)
c235cce7 2691 XSETFASTINT (marker, 0);
ca1d1d23
JB
2692
2693 CHECK_NUMBER_COERCE_MARKER (marker, 0);
2694 search_regs.end[i] = XINT (marker);
2695 }
2696 list = Fcdr (list);
2697 }
2698
2699 return Qnil;
2700}
2701
7074fde6
FP
2702/* If non-zero the match data have been saved in saved_search_regs
2703 during the execution of a sentinel or filter. */
75ebf74b 2704static int search_regs_saved;
7074fde6
FP
2705static struct re_registers saved_search_regs;
2706
2707/* Called from Flooking_at, Fstring_match, search_buffer, Fstore_match_data
2708 if asynchronous code (filter or sentinel) is running. */
2709static void
2710save_search_regs ()
2711{
2712 if (!search_regs_saved)
2713 {
2714 saved_search_regs.num_regs = search_regs.num_regs;
2715 saved_search_regs.start = search_regs.start;
2716 saved_search_regs.end = search_regs.end;
2717 search_regs.num_regs = 0;
2d4a771a
RS
2718 search_regs.start = 0;
2719 search_regs.end = 0;
7074fde6
FP
2720
2721 search_regs_saved = 1;
2722 }
2723}
2724
2725/* Called upon exit from filters and sentinels. */
2726void
2727restore_match_data ()
2728{
2729 if (search_regs_saved)
2730 {
2731 if (search_regs.num_regs > 0)
2732 {
2733 xfree (search_regs.start);
2734 xfree (search_regs.end);
2735 }
2736 search_regs.num_regs = saved_search_regs.num_regs;
2737 search_regs.start = saved_search_regs.start;
2738 search_regs.end = saved_search_regs.end;
2739
2740 search_regs_saved = 0;
2741 }
2742}
2743
ca1d1d23
JB
2744/* Quote a string to inactivate reg-expr chars */
2745
2746DEFUN ("regexp-quote", Fregexp_quote, Sregexp_quote, 1, 1, 0,
2747 "Return a regexp string which matches exactly STRING and nothing else.")
5806161b
EN
2748 (string)
2749 Lisp_Object string;
ca1d1d23
JB
2750{
2751 register unsigned char *in, *out, *end;
2752 register unsigned char *temp;
0c8533c6 2753 int backslashes_added = 0;
ca1d1d23 2754
5806161b 2755 CHECK_STRING (string, 0);
ca1d1d23 2756
fc932ac6 2757 temp = (unsigned char *) alloca (STRING_BYTES (XSTRING (string)) * 2);
ca1d1d23
JB
2758
2759 /* Now copy the data into the new string, inserting escapes. */
2760
5806161b 2761 in = XSTRING (string)->data;
fc932ac6 2762 end = in + STRING_BYTES (XSTRING (string));
ca1d1d23
JB
2763 out = temp;
2764
2765 for (; in != end; in++)
2766 {
2767 if (*in == '[' || *in == ']'
2768 || *in == '*' || *in == '.' || *in == '\\'
2769 || *in == '?' || *in == '+'
2770 || *in == '^' || *in == '$')
0c8533c6 2771 *out++ = '\\', backslashes_added++;
ca1d1d23
JB
2772 *out++ = *in;
2773 }
2774
3f8100f1 2775 return make_specified_string (temp,
0c8533c6 2776 XSTRING (string)->size + backslashes_added,
3f8100f1
RS
2777 out - temp,
2778 STRING_MULTIBYTE (string));
ca1d1d23
JB
2779}
2780\f
dfcf069d 2781void
ca1d1d23
JB
2782syms_of_search ()
2783{
2784 register int i;
2785
487282dc
KH
2786 for (i = 0; i < REGEXP_CACHE_SIZE; ++i)
2787 {
2788 searchbufs[i].buf.allocated = 100;
2789 searchbufs[i].buf.buffer = (unsigned char *) malloc (100);
2790 searchbufs[i].buf.fastmap = searchbufs[i].fastmap;
2791 searchbufs[i].regexp = Qnil;
2792 staticpro (&searchbufs[i].regexp);
2793 searchbufs[i].next = (i == REGEXP_CACHE_SIZE-1 ? 0 : &searchbufs[i+1]);
2794 }
2795 searchbuf_head = &searchbufs[0];
ca1d1d23
JB
2796
2797 Qsearch_failed = intern ("search-failed");
2798 staticpro (&Qsearch_failed);
2799 Qinvalid_regexp = intern ("invalid-regexp");
2800 staticpro (&Qinvalid_regexp);
2801
2802 Fput (Qsearch_failed, Qerror_conditions,
2803 Fcons (Qsearch_failed, Fcons (Qerror, Qnil)));
2804 Fput (Qsearch_failed, Qerror_message,
2805 build_string ("Search failed"));
2806
2807 Fput (Qinvalid_regexp, Qerror_conditions,
2808 Fcons (Qinvalid_regexp, Fcons (Qerror, Qnil)));
2809 Fput (Qinvalid_regexp, Qerror_message,
2810 build_string ("Invalid regexp"));
2811
daa37602
JB
2812 last_thing_searched = Qnil;
2813 staticpro (&last_thing_searched);
2814
ca1d1d23 2815 defsubr (&Slooking_at);
b819a390
RS
2816 defsubr (&Sposix_looking_at);
2817 defsubr (&Sstring_match);
2818 defsubr (&Sposix_string_match);
ca1d1d23
JB
2819 defsubr (&Ssearch_forward);
2820 defsubr (&Ssearch_backward);
2821 defsubr (&Sword_search_forward);
2822 defsubr (&Sword_search_backward);
2823 defsubr (&Sre_search_forward);
2824 defsubr (&Sre_search_backward);
b819a390
RS
2825 defsubr (&Sposix_search_forward);
2826 defsubr (&Sposix_search_backward);
ca1d1d23
JB
2827 defsubr (&Sreplace_match);
2828 defsubr (&Smatch_beginning);
2829 defsubr (&Smatch_end);
2830 defsubr (&Smatch_data);
3f1c005b 2831 defsubr (&Sset_match_data);
ca1d1d23
JB
2832 defsubr (&Sregexp_quote);
2833}