(ediff-autostore-merges): Doc fix.
[bpt/emacs.git] / src / search.c
CommitLineData
ca1d1d23 1/* String search routines for GNU Emacs.
31c8f881 2 Copyright (C) 1985, 86, 87, 93, 94, 97, 1998 Free Software Foundation, Inc.
ca1d1d23
JB
3
4This file is part of GNU Emacs.
5
6GNU Emacs is free software; you can redistribute it and/or modify
7it under the terms of the GNU General Public License as published by
7c938215 8the Free Software Foundation; either version 2, or (at your option)
ca1d1d23
JB
9any later version.
10
11GNU Emacs is distributed in the hope that it will be useful,
12but WITHOUT ANY WARRANTY; without even the implied warranty of
13MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14GNU General Public License for more details.
15
16You should have received a copy of the GNU General Public License
17along with GNU Emacs; see the file COPYING. If not, write to
3b7ad313
EN
18the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
19Boston, MA 02111-1307, USA. */
ca1d1d23
JB
20
21
18160b98 22#include <config.h>
ca1d1d23
JB
23#include "lisp.h"
24#include "syntax.h"
5679531d 25#include "category.h"
ca1d1d23 26#include "buffer.h"
5679531d 27#include "charset.h"
9169c321 28#include "region-cache.h"
ca1d1d23 29#include "commands.h"
9ac0d9e0 30#include "blockinput.h"
bf1760bb 31#include "intervals.h"
4746118a 32
ca1d1d23
JB
33#include <sys/types.h>
34#include "regex.h"
35
67ce527d
KH
36#define min(a, b) ((a) < (b) ? (a) : (b))
37#define max(a, b) ((a) > (b) ? (a) : (b))
38
1d288aef 39#define REGEXP_CACHE_SIZE 20
ca1d1d23 40
487282dc
KH
41/* If the regexp is non-nil, then the buffer contains the compiled form
42 of that regexp, suitable for searching. */
1d288aef
RS
43struct regexp_cache
44{
487282dc
KH
45 struct regexp_cache *next;
46 Lisp_Object regexp;
47 struct re_pattern_buffer buf;
48 char fastmap[0400];
b819a390
RS
49 /* Nonzero means regexp was compiled to do full POSIX backtracking. */
50 char posix;
487282dc 51};
ca1d1d23 52
487282dc
KH
53/* The instances of that struct. */
54struct regexp_cache searchbufs[REGEXP_CACHE_SIZE];
ca1d1d23 55
487282dc
KH
56/* The head of the linked list; points to the most recently used buffer. */
57struct regexp_cache *searchbuf_head;
ca1d1d23 58
ca1d1d23 59
4746118a
JB
60/* Every call to re_match, etc., must pass &search_regs as the regs
61 argument unless you can show it is unnecessary (i.e., if re_match
62 is certainly going to be called again before region-around-match
63 can be called).
64
65 Since the registers are now dynamically allocated, we need to make
66 sure not to refer to the Nth register before checking that it has
1113d9db
JB
67 been allocated by checking search_regs.num_regs.
68
69 The regex code keeps track of whether it has allocated the search
487282dc
KH
70 buffer using bits in the re_pattern_buffer. This means that whenever
71 you compile a new pattern, it completely forgets whether it has
1113d9db
JB
72 allocated any registers, and will allocate new registers the next
73 time you call a searching or matching function. Therefore, we need
74 to call re_set_registers after compiling a new pattern or after
75 setting the match registers, so that the regex functions will be
76 able to free or re-allocate it properly. */
ca1d1d23
JB
77static struct re_registers search_regs;
78
daa37602
JB
79/* The buffer in which the last search was performed, or
80 Qt if the last search was done in a string;
81 Qnil if no searching has been done yet. */
82static Lisp_Object last_thing_searched;
ca1d1d23 83
8e6208c5 84/* error condition signaled when regexp compile_pattern fails */
ca1d1d23
JB
85
86Lisp_Object Qinvalid_regexp;
87
ca325161 88static void set_search_regs ();
044f81f1 89static void save_search_regs ();
facdc750
RS
90static int simple_search ();
91static int boyer_moore ();
b819a390
RS
92static int search_buffer ();
93
ca1d1d23
JB
94static void
95matcher_overflow ()
96{
97 error ("Stack overflow in regexp matcher");
98}
99
100#ifdef __STDC__
101#define CONST const
102#else
103#define CONST
104#endif
105
b819a390
RS
106/* Compile a regexp and signal a Lisp error if anything goes wrong.
107 PATTERN is the pattern to compile.
108 CP is the place to put the result.
facdc750 109 TRANSLATE is a translation table for ignoring case, or nil for none.
b819a390
RS
110 REGP is the structure that says where to store the "register"
111 values that will result from matching this pattern.
112 If it is 0, we should compile the pattern not to record any
113 subexpression bounds.
114 POSIX is nonzero if we want full backtracking (POSIX style)
5679531d
KH
115 for this pattern. 0 means backtrack only enough to get a valid match.
116 MULTIBYTE is nonzero if we want to handle multibyte characters in
117 PATTERN. 0 means all multibyte characters are recognized just as
118 sequences of binary data. */
ca1d1d23 119
487282dc 120static void
5679531d 121compile_pattern_1 (cp, pattern, translate, regp, posix, multibyte)
487282dc 122 struct regexp_cache *cp;
ca1d1d23 123 Lisp_Object pattern;
facdc750 124 Lisp_Object translate;
487282dc 125 struct re_registers *regp;
b819a390 126 int posix;
5679531d 127 int multibyte;
ca1d1d23 128{
f8bd51c4
KH
129 char *raw_pattern;
130 int raw_pattern_size;
d451e4db 131 char *val;
b819a390 132 reg_syntax_t old;
ca1d1d23 133
f8bd51c4
KH
134 /* MULTIBYTE says whether the text to be searched is multibyte.
135 We must convert PATTERN to match that, or we will not really
136 find things right. */
137
138 if (multibyte == STRING_MULTIBYTE (pattern))
139 {
140 raw_pattern = (char *) XSTRING (pattern)->data;
fc932ac6 141 raw_pattern_size = STRING_BYTES (XSTRING (pattern));
f8bd51c4
KH
142 }
143 else if (multibyte)
144 {
145 raw_pattern_size = count_size_as_multibyte (XSTRING (pattern)->data,
146 XSTRING (pattern)->size);
147 raw_pattern = (char *) alloca (raw_pattern_size + 1);
148 copy_text (XSTRING (pattern)->data, raw_pattern,
149 XSTRING (pattern)->size, 0, 1);
150 }
151 else
152 {
153 /* Converting multibyte to single-byte.
154
155 ??? Perhaps this conversion should be done in a special way
156 by subtracting nonascii-insert-offset from each non-ASCII char,
157 so that only the multibyte chars which really correspond to
158 the chosen single-byte character set can possibly match. */
159 raw_pattern_size = XSTRING (pattern)->size;
160 raw_pattern = (char *) alloca (raw_pattern_size + 1);
161 copy_text (XSTRING (pattern)->data, raw_pattern,
fc932ac6 162 STRING_BYTES (XSTRING (pattern)), 1, 0);
f8bd51c4
KH
163 }
164
487282dc 165 cp->regexp = Qnil;
facdc750 166 cp->buf.translate = (! NILP (translate) ? translate : 0);
b819a390 167 cp->posix = posix;
5679531d 168 cp->buf.multibyte = multibyte;
9ac0d9e0 169 BLOCK_INPUT;
b819a390
RS
170 old = re_set_syntax (RE_SYNTAX_EMACS
171 | (posix ? 0 : RE_NO_POSIX_BACKTRACKING));
f8bd51c4 172 val = (char *) re_compile_pattern (raw_pattern, raw_pattern_size, &cp->buf);
b819a390 173 re_set_syntax (old);
9ac0d9e0 174 UNBLOCK_INPUT;
ca1d1d23 175 if (val)
487282dc 176 Fsignal (Qinvalid_regexp, Fcons (build_string (val), Qnil));
1113d9db 177
487282dc 178 cp->regexp = Fcopy_sequence (pattern);
487282dc
KH
179}
180
181/* Compile a regexp if necessary, but first check to see if there's one in
b819a390
RS
182 the cache.
183 PATTERN is the pattern to compile.
facdc750 184 TRANSLATE is a translation table for ignoring case, or nil for none.
b819a390
RS
185 REGP is the structure that says where to store the "register"
186 values that will result from matching this pattern.
187 If it is 0, we should compile the pattern not to record any
188 subexpression bounds.
189 POSIX is nonzero if we want full backtracking (POSIX style)
190 for this pattern. 0 means backtrack only enough to get a valid match. */
487282dc
KH
191
192struct re_pattern_buffer *
0c8533c6 193compile_pattern (pattern, regp, translate, posix, multibyte)
487282dc
KH
194 Lisp_Object pattern;
195 struct re_registers *regp;
facdc750 196 Lisp_Object translate;
0c8533c6 197 int posix, multibyte;
487282dc
KH
198{
199 struct regexp_cache *cp, **cpp;
200
201 for (cpp = &searchbuf_head; ; cpp = &cp->next)
202 {
203 cp = *cpp;
1d288aef
RS
204 if (XSTRING (cp->regexp)->size == XSTRING (pattern)->size
205 && !NILP (Fstring_equal (cp->regexp, pattern))
facdc750 206 && cp->buf.translate == (! NILP (translate) ? translate : 0)
5679531d
KH
207 && cp->posix == posix
208 && cp->buf.multibyte == multibyte)
487282dc
KH
209 break;
210
211 /* If we're at the end of the cache, compile into the last cell. */
212 if (cp->next == 0)
213 {
5679531d 214 compile_pattern_1 (cp, pattern, translate, regp, posix, multibyte);
487282dc
KH
215 break;
216 }
217 }
218
219 /* When we get here, cp (aka *cpp) contains the compiled pattern,
220 either because we found it in the cache or because we just compiled it.
221 Move it to the front of the queue to mark it as most recently used. */
222 *cpp = cp->next;
223 cp->next = searchbuf_head;
224 searchbuf_head = cp;
1113d9db 225
6639708c
RS
226 /* Advise the searching functions about the space we have allocated
227 for register data. */
228 if (regp)
229 re_set_registers (&cp->buf, regp, regp->num_regs, regp->start, regp->end);
230
487282dc 231 return &cp->buf;
ca1d1d23
JB
232}
233
234/* Error condition used for failing searches */
235Lisp_Object Qsearch_failed;
236
237Lisp_Object
238signal_failure (arg)
239 Lisp_Object arg;
240{
241 Fsignal (Qsearch_failed, Fcons (arg, Qnil));
242 return Qnil;
243}
244\f
b819a390
RS
245static Lisp_Object
246looking_at_1 (string, posix)
ca1d1d23 247 Lisp_Object string;
b819a390 248 int posix;
ca1d1d23
JB
249{
250 Lisp_Object val;
251 unsigned char *p1, *p2;
252 int s1, s2;
253 register int i;
487282dc 254 struct re_pattern_buffer *bufp;
ca1d1d23 255
7074fde6
FP
256 if (running_asynch_code)
257 save_search_regs ();
258
ca1d1d23 259 CHECK_STRING (string, 0);
487282dc
KH
260 bufp = compile_pattern (string, &search_regs,
261 (!NILP (current_buffer->case_fold_search)
facdc750 262 ? DOWNCASE_TABLE : Qnil),
0c8533c6
RS
263 posix,
264 !NILP (current_buffer->enable_multibyte_characters));
ca1d1d23
JB
265
266 immediate_quit = 1;
267 QUIT; /* Do a pending quit right away, to avoid paradoxical behavior */
268
269 /* Get pointers and sizes of the two strings
270 that make up the visible portion of the buffer. */
271
272 p1 = BEGV_ADDR;
fa8ed3e0 273 s1 = GPT_BYTE - BEGV_BYTE;
ca1d1d23 274 p2 = GAP_END_ADDR;
fa8ed3e0 275 s2 = ZV_BYTE - GPT_BYTE;
ca1d1d23
JB
276 if (s1 < 0)
277 {
278 p2 = p1;
fa8ed3e0 279 s2 = ZV_BYTE - BEGV_BYTE;
ca1d1d23
JB
280 s1 = 0;
281 }
282 if (s2 < 0)
283 {
fa8ed3e0 284 s1 = ZV_BYTE - BEGV_BYTE;
ca1d1d23
JB
285 s2 = 0;
286 }
8bb43c28
RS
287
288 re_match_object = Qnil;
ca1d1d23 289
487282dc 290 i = re_match_2 (bufp, (char *) p1, s1, (char *) p2, s2,
fa8ed3e0
RS
291 PT_BYTE - BEGV_BYTE, &search_regs,
292 ZV_BYTE - BEGV_BYTE);
ca1d1d23
JB
293 if (i == -2)
294 matcher_overflow ();
295
296 val = (0 <= i ? Qt : Qnil);
fa8ed3e0
RS
297 if (i >= 0)
298 for (i = 0; i < search_regs.num_regs; i++)
299 if (search_regs.start[i] >= 0)
300 {
301 search_regs.start[i]
302 = BYTE_TO_CHAR (search_regs.start[i] + BEGV_BYTE);
303 search_regs.end[i]
304 = BYTE_TO_CHAR (search_regs.end[i] + BEGV_BYTE);
305 }
a3668d92 306 XSETBUFFER (last_thing_searched, current_buffer);
ca1d1d23
JB
307 immediate_quit = 0;
308 return val;
309}
310
b819a390 311DEFUN ("looking-at", Flooking_at, Slooking_at, 1, 1, 0,
94f94972 312 "Return t if text after point matches regular expression REGEXP.\n\
b819a390
RS
313This function modifies the match data that `match-beginning',\n\
314`match-end' and `match-data' access; save and restore the match\n\
315data if you want to preserve them.")
94f94972
RS
316 (regexp)
317 Lisp_Object regexp;
b819a390 318{
94f94972 319 return looking_at_1 (regexp, 0);
b819a390
RS
320}
321
322DEFUN ("posix-looking-at", Fposix_looking_at, Sposix_looking_at, 1, 1, 0,
94f94972 323 "Return t if text after point matches regular expression REGEXP.\n\
b819a390
RS
324Find the longest match, in accord with Posix regular expression rules.\n\
325This function modifies the match data that `match-beginning',\n\
326`match-end' and `match-data' access; save and restore the match\n\
327data if you want to preserve them.")
94f94972
RS
328 (regexp)
329 Lisp_Object regexp;
b819a390 330{
94f94972 331 return looking_at_1 (regexp, 1);
b819a390
RS
332}
333\f
334static Lisp_Object
335string_match_1 (regexp, string, start, posix)
ca1d1d23 336 Lisp_Object regexp, string, start;
b819a390 337 int posix;
ca1d1d23
JB
338{
339 int val;
487282dc 340 struct re_pattern_buffer *bufp;
0c8533c6
RS
341 int pos, pos_byte;
342 int i;
ca1d1d23 343
7074fde6
FP
344 if (running_asynch_code)
345 save_search_regs ();
346
ca1d1d23
JB
347 CHECK_STRING (regexp, 0);
348 CHECK_STRING (string, 1);
349
350 if (NILP (start))
0c8533c6 351 pos = 0, pos_byte = 0;
ca1d1d23
JB
352 else
353 {
354 int len = XSTRING (string)->size;
355
356 CHECK_NUMBER (start, 2);
0c8533c6
RS
357 pos = XINT (start);
358 if (pos < 0 && -pos <= len)
359 pos = len + pos;
360 else if (0 > pos || pos > len)
ca1d1d23 361 args_out_of_range (string, start);
0c8533c6 362 pos_byte = string_char_to_byte (string, pos);
ca1d1d23
JB
363 }
364
487282dc
KH
365 bufp = compile_pattern (regexp, &search_regs,
366 (!NILP (current_buffer->case_fold_search)
facdc750 367 ? DOWNCASE_TABLE : Qnil),
0c8533c6
RS
368 posix,
369 STRING_MULTIBYTE (string));
ca1d1d23 370 immediate_quit = 1;
8bb43c28
RS
371 re_match_object = string;
372
487282dc 373 val = re_search (bufp, (char *) XSTRING (string)->data,
fc932ac6
RS
374 STRING_BYTES (XSTRING (string)), pos_byte,
375 STRING_BYTES (XSTRING (string)) - pos_byte,
ca1d1d23
JB
376 &search_regs);
377 immediate_quit = 0;
daa37602 378 last_thing_searched = Qt;
ca1d1d23
JB
379 if (val == -2)
380 matcher_overflow ();
381 if (val < 0) return Qnil;
0c8533c6
RS
382
383 for (i = 0; i < search_regs.num_regs; i++)
384 if (search_regs.start[i] >= 0)
385 {
386 search_regs.start[i]
387 = string_byte_to_char (string, search_regs.start[i]);
388 search_regs.end[i]
389 = string_byte_to_char (string, search_regs.end[i]);
390 }
391
392 return make_number (string_byte_to_char (string, val));
ca1d1d23 393}
e59a8453 394
b819a390
RS
395DEFUN ("string-match", Fstring_match, Sstring_match, 2, 3, 0,
396 "Return index of start of first match for REGEXP in STRING, or nil.\n\
397If third arg START is non-nil, start search at that index in STRING.\n\
398For index of first char beyond the match, do (match-end 0).\n\
399`match-end' and `match-beginning' also give indices of substrings\n\
400matched by parenthesis constructs in the pattern.")
401 (regexp, string, start)
402 Lisp_Object regexp, string, start;
403{
404 return string_match_1 (regexp, string, start, 0);
405}
406
407DEFUN ("posix-string-match", Fposix_string_match, Sposix_string_match, 2, 3, 0,
408 "Return index of start of first match for REGEXP in STRING, or nil.\n\
409Find the longest match, in accord with Posix regular expression rules.\n\
410If third arg START is non-nil, start search at that index in STRING.\n\
411For index of first char beyond the match, do (match-end 0).\n\
412`match-end' and `match-beginning' also give indices of substrings\n\
413matched by parenthesis constructs in the pattern.")
414 (regexp, string, start)
415 Lisp_Object regexp, string, start;
416{
417 return string_match_1 (regexp, string, start, 1);
418}
419
e59a8453
RS
420/* Match REGEXP against STRING, searching all of STRING,
421 and return the index of the match, or negative on failure.
422 This does not clobber the match data. */
423
424int
425fast_string_match (regexp, string)
426 Lisp_Object regexp, string;
427{
428 int val;
487282dc 429 struct re_pattern_buffer *bufp;
e59a8453 430
facdc750
RS
431 bufp = compile_pattern (regexp, 0, Qnil,
432 0, STRING_MULTIBYTE (string));
e59a8453 433 immediate_quit = 1;
8bb43c28
RS
434 re_match_object = string;
435
487282dc 436 val = re_search (bufp, (char *) XSTRING (string)->data,
fc932ac6
RS
437 STRING_BYTES (XSTRING (string)), 0,
438 STRING_BYTES (XSTRING (string)), 0);
e59a8453
RS
439 immediate_quit = 0;
440 return val;
441}
5679531d
KH
442
443/* Match REGEXP against STRING, searching all of STRING ignoring case,
444 and return the index of the match, or negative on failure.
0c8533c6
RS
445 This does not clobber the match data.
446 We assume that STRING contains single-byte characters. */
5679531d
KH
447
448extern Lisp_Object Vascii_downcase_table;
449
450int
b4577c63 451fast_c_string_match_ignore_case (regexp, string)
5679531d
KH
452 Lisp_Object regexp;
453 char *string;
454{
455 int val;
456 struct re_pattern_buffer *bufp;
457 int len = strlen (string);
458
0c8533c6 459 regexp = string_make_unibyte (regexp);
b4577c63 460 re_match_object = Qt;
5679531d 461 bufp = compile_pattern (regexp, 0,
facdc750 462 Vascii_downcase_table, 0,
f8bd51c4 463 0);
5679531d
KH
464 immediate_quit = 1;
465 val = re_search (bufp, string, len, 0, len, 0);
466 immediate_quit = 0;
467 return val;
468}
ca1d1d23 469\f
9169c321
JB
470/* The newline cache: remembering which sections of text have no newlines. */
471
472/* If the user has requested newline caching, make sure it's on.
473 Otherwise, make sure it's off.
474 This is our cheezy way of associating an action with the change of
475 state of a buffer-local variable. */
476static void
477newline_cache_on_off (buf)
478 struct buffer *buf;
479{
480 if (NILP (buf->cache_long_line_scans))
481 {
482 /* It should be off. */
483 if (buf->newline_cache)
484 {
485 free_region_cache (buf->newline_cache);
486 buf->newline_cache = 0;
487 }
488 }
489 else
490 {
491 /* It should be on. */
492 if (buf->newline_cache == 0)
493 buf->newline_cache = new_region_cache ();
494 }
495}
496
497\f
498/* Search for COUNT instances of the character TARGET between START and END.
499
500 If COUNT is positive, search forwards; END must be >= START.
501 If COUNT is negative, search backwards for the -COUNTth instance;
502 END must be <= START.
503 If COUNT is zero, do anything you please; run rogue, for all I care.
504
505 If END is zero, use BEGV or ZV instead, as appropriate for the
506 direction indicated by COUNT.
ffd56f97
JB
507
508 If we find COUNT instances, set *SHORTAGE to zero, and return the
5bfe95c9
RS
509 position after the COUNTth match. Note that for reverse motion
510 this is not the same as the usual convention for Emacs motion commands.
ffd56f97 511
9169c321
JB
512 If we don't find COUNT instances before reaching END, set *SHORTAGE
513 to the number of TARGETs left unfound, and return END.
ffd56f97 514
087a5f81
RS
515 If ALLOW_QUIT is non-zero, set immediate_quit. That's good to do
516 except when inside redisplay. */
517
9169c321
JB
518scan_buffer (target, start, end, count, shortage, allow_quit)
519 register int target;
520 int start, end;
521 int count;
522 int *shortage;
087a5f81 523 int allow_quit;
ca1d1d23 524{
9169c321
JB
525 struct region_cache *newline_cache;
526 int direction;
ffd56f97 527
9169c321
JB
528 if (count > 0)
529 {
530 direction = 1;
531 if (! end) end = ZV;
532 }
533 else
534 {
535 direction = -1;
536 if (! end) end = BEGV;
537 }
ffd56f97 538
9169c321
JB
539 newline_cache_on_off (current_buffer);
540 newline_cache = current_buffer->newline_cache;
ca1d1d23
JB
541
542 if (shortage != 0)
543 *shortage = 0;
544
087a5f81 545 immediate_quit = allow_quit;
ca1d1d23 546
ffd56f97 547 if (count > 0)
9169c321 548 while (start != end)
ca1d1d23 549 {
9169c321
JB
550 /* Our innermost scanning loop is very simple; it doesn't know
551 about gaps, buffer ends, or the newline cache. ceiling is
552 the position of the last character before the next such
553 obstacle --- the last character the dumb search loop should
554 examine. */
fa8ed3e0
RS
555 int ceiling_byte = CHAR_TO_BYTE (end) - 1;
556 int start_byte = CHAR_TO_BYTE (start);
67ce527d 557 int tem;
9169c321
JB
558
559 /* If we're looking for a newline, consult the newline cache
560 to see where we can avoid some scanning. */
561 if (target == '\n' && newline_cache)
562 {
563 int next_change;
564 immediate_quit = 0;
565 while (region_cache_forward
fa8ed3e0
RS
566 (current_buffer, newline_cache, start_byte, &next_change))
567 start_byte = next_change;
cbe0db0d 568 immediate_quit = allow_quit;
9169c321 569
fa8ed3e0
RS
570 /* START should never be after END. */
571 if (start_byte > ceiling_byte)
572 start_byte = ceiling_byte;
9169c321
JB
573
574 /* Now the text after start is an unknown region, and
575 next_change is the position of the next known region. */
fa8ed3e0 576 ceiling_byte = min (next_change - 1, ceiling_byte);
9169c321
JB
577 }
578
579 /* The dumb loop can only scan text stored in contiguous
580 bytes. BUFFER_CEILING_OF returns the last character
581 position that is contiguous, so the ceiling is the
582 position after that. */
67ce527d
KH
583 tem = BUFFER_CEILING_OF (start_byte);
584 ceiling_byte = min (tem, ceiling_byte);
9169c321
JB
585
586 {
587 /* The termination address of the dumb loop. */
fa8ed3e0
RS
588 register unsigned char *ceiling_addr
589 = BYTE_POS_ADDR (ceiling_byte) + 1;
590 register unsigned char *cursor
591 = BYTE_POS_ADDR (start_byte);
9169c321
JB
592 unsigned char *base = cursor;
593
594 while (cursor < ceiling_addr)
595 {
596 unsigned char *scan_start = cursor;
597
598 /* The dumb loop. */
599 while (*cursor != target && ++cursor < ceiling_addr)
600 ;
601
602 /* If we're looking for newlines, cache the fact that
603 the region from start to cursor is free of them. */
604 if (target == '\n' && newline_cache)
605 know_region_cache (current_buffer, newline_cache,
fa8ed3e0
RS
606 start_byte + scan_start - base,
607 start_byte + cursor - base);
9169c321
JB
608
609 /* Did we find the target character? */
610 if (cursor < ceiling_addr)
611 {
612 if (--count == 0)
613 {
614 immediate_quit = 0;
fa8ed3e0 615 return BYTE_TO_CHAR (start_byte + cursor - base + 1);
9169c321
JB
616 }
617 cursor++;
618 }
619 }
620
fa8ed3e0 621 start = BYTE_TO_CHAR (start_byte + cursor - base);
9169c321 622 }
ca1d1d23
JB
623 }
624 else
9169c321
JB
625 while (start > end)
626 {
627 /* The last character to check before the next obstacle. */
fa8ed3e0
RS
628 int ceiling_byte = CHAR_TO_BYTE (end);
629 int start_byte = CHAR_TO_BYTE (start);
67ce527d 630 int tem;
9169c321
JB
631
632 /* Consult the newline cache, if appropriate. */
633 if (target == '\n' && newline_cache)
634 {
635 int next_change;
636 immediate_quit = 0;
637 while (region_cache_backward
fa8ed3e0
RS
638 (current_buffer, newline_cache, start_byte, &next_change))
639 start_byte = next_change;
cbe0db0d 640 immediate_quit = allow_quit;
9169c321
JB
641
642 /* Start should never be at or before end. */
fa8ed3e0
RS
643 if (start_byte <= ceiling_byte)
644 start_byte = ceiling_byte + 1;
9169c321
JB
645
646 /* Now the text before start is an unknown region, and
647 next_change is the position of the next known region. */
fa8ed3e0 648 ceiling_byte = max (next_change, ceiling_byte);
9169c321
JB
649 }
650
651 /* Stop scanning before the gap. */
67ce527d
KH
652 tem = BUFFER_FLOOR_OF (start_byte - 1);
653 ceiling_byte = max (tem, ceiling_byte);
9169c321
JB
654
655 {
656 /* The termination address of the dumb loop. */
fa8ed3e0
RS
657 register unsigned char *ceiling_addr = BYTE_POS_ADDR (ceiling_byte);
658 register unsigned char *cursor = BYTE_POS_ADDR (start_byte - 1);
9169c321
JB
659 unsigned char *base = cursor;
660
661 while (cursor >= ceiling_addr)
662 {
663 unsigned char *scan_start = cursor;
664
665 while (*cursor != target && --cursor >= ceiling_addr)
666 ;
667
668 /* If we're looking for newlines, cache the fact that
669 the region from after the cursor to start is free of them. */
670 if (target == '\n' && newline_cache)
671 know_region_cache (current_buffer, newline_cache,
fa8ed3e0
RS
672 start_byte + cursor - base,
673 start_byte + scan_start - base);
9169c321
JB
674
675 /* Did we find the target character? */
676 if (cursor >= ceiling_addr)
677 {
678 if (++count >= 0)
679 {
680 immediate_quit = 0;
fa8ed3e0 681 return BYTE_TO_CHAR (start_byte + cursor - base);
9169c321
JB
682 }
683 cursor--;
684 }
685 }
686
fa8ed3e0 687 start = BYTE_TO_CHAR (start_byte + cursor - base);
9169c321
JB
688 }
689 }
690
ca1d1d23
JB
691 immediate_quit = 0;
692 if (shortage != 0)
ffd56f97 693 *shortage = count * direction;
9169c321 694 return start;
ca1d1d23 695}
fa8ed3e0
RS
696\f
697/* Search for COUNT instances of a line boundary, which means either a
698 newline or (if selective display enabled) a carriage return.
699 Start at START. If COUNT is negative, search backwards.
700
701 We report the resulting position by calling TEMP_SET_PT_BOTH.
702
703 If we find COUNT instances. we position after (always after,
704 even if scanning backwards) the COUNTth match, and return 0.
705
706 If we don't find COUNT instances before reaching the end of the
707 buffer (or the beginning, if scanning backwards), we return
708 the number of line boundaries left unfound, and position at
709 the limit we bumped up against.
710
711 If ALLOW_QUIT is non-zero, set immediate_quit. That's good to do
d5d57b92 712 except in special cases. */
ca1d1d23 713
63fa018d 714int
fa8ed3e0
RS
715scan_newline (start, start_byte, limit, limit_byte, count, allow_quit)
716 int start, start_byte;
717 int limit, limit_byte;
718 register int count;
719 int allow_quit;
63fa018d 720{
fa8ed3e0
RS
721 int direction = ((count > 0) ? 1 : -1);
722
723 register unsigned char *cursor;
724 unsigned char *base;
725
726 register int ceiling;
727 register unsigned char *ceiling_addr;
728
d5d57b92
RS
729 int old_immediate_quit = immediate_quit;
730
fa8ed3e0
RS
731 /* If we are not in selective display mode,
732 check only for newlines. */
733 int selective_display = (!NILP (current_buffer->selective_display)
734 && !INTEGERP (current_buffer->selective_display));
735
736 /* The code that follows is like scan_buffer
737 but checks for either newline or carriage return. */
738
d5d57b92
RS
739 if (allow_quit)
740 immediate_quit++;
fa8ed3e0
RS
741
742 start_byte = CHAR_TO_BYTE (start);
743
744 if (count > 0)
745 {
746 while (start_byte < limit_byte)
747 {
748 ceiling = BUFFER_CEILING_OF (start_byte);
749 ceiling = min (limit_byte - 1, ceiling);
750 ceiling_addr = BYTE_POS_ADDR (ceiling) + 1;
751 base = (cursor = BYTE_POS_ADDR (start_byte));
752 while (1)
753 {
754 while (*cursor != '\n' && ++cursor != ceiling_addr)
755 ;
756
757 if (cursor != ceiling_addr)
758 {
759 if (--count == 0)
760 {
d5d57b92 761 immediate_quit = old_immediate_quit;
fa8ed3e0
RS
762 start_byte = start_byte + cursor - base + 1;
763 start = BYTE_TO_CHAR (start_byte);
764 TEMP_SET_PT_BOTH (start, start_byte);
765 return 0;
766 }
767 else
768 if (++cursor == ceiling_addr)
769 break;
770 }
771 else
772 break;
773 }
774 start_byte += cursor - base;
775 }
776 }
777 else
778 {
fa8ed3e0
RS
779 while (start_byte > limit_byte)
780 {
781 ceiling = BUFFER_FLOOR_OF (start_byte - 1);
782 ceiling = max (limit_byte, ceiling);
783 ceiling_addr = BYTE_POS_ADDR (ceiling) - 1;
784 base = (cursor = BYTE_POS_ADDR (start_byte - 1) + 1);
785 while (1)
786 {
787 while (--cursor != ceiling_addr && *cursor != '\n')
788 ;
789
790 if (cursor != ceiling_addr)
791 {
792 if (++count == 0)
793 {
d5d57b92 794 immediate_quit = old_immediate_quit;
fa8ed3e0
RS
795 /* Return the position AFTER the match we found. */
796 start_byte = start_byte + cursor - base + 1;
797 start = BYTE_TO_CHAR (start_byte);
798 TEMP_SET_PT_BOTH (start, start_byte);
799 return 0;
800 }
801 }
802 else
803 break;
804 }
805 /* Here we add 1 to compensate for the last decrement
806 of CURSOR, which took it past the valid range. */
807 start_byte += cursor - base + 1;
808 }
809 }
810
811 TEMP_SET_PT_BOTH (limit, limit_byte);
d5d57b92 812 immediate_quit = old_immediate_quit;
fa8ed3e0
RS
813
814 return count * direction;
63fa018d
RS
815}
816
ca1d1d23 817int
fa8ed3e0 818find_next_newline_no_quit (from, cnt)
ca1d1d23
JB
819 register int from, cnt;
820{
fa8ed3e0 821 return scan_buffer ('\n', from, 0, cnt, (int *) 0, 0);
9169c321
JB
822}
823
9169c321
JB
824/* Like find_next_newline, but returns position before the newline,
825 not after, and only search up to TO. This isn't just
826 find_next_newline (...)-1, because you might hit TO. */
fa8ed3e0 827
9169c321
JB
828int
829find_before_next_newline (from, to, cnt)
cbe0db0d 830 int from, to, cnt;
9169c321
JB
831{
832 int shortage;
833 int pos = scan_buffer ('\n', from, to, cnt, &shortage, 1);
834
835 if (shortage == 0)
836 pos--;
837
838 return pos;
ca1d1d23
JB
839}
840\f
ca1d1d23
JB
841/* Subroutines of Lisp buffer search functions. */
842
843static Lisp_Object
b819a390 844search_command (string, bound, noerror, count, direction, RE, posix)
ca1d1d23
JB
845 Lisp_Object string, bound, noerror, count;
846 int direction;
847 int RE;
b819a390 848 int posix;
ca1d1d23
JB
849{
850 register int np;
9f43ad85 851 int lim, lim_byte;
ca1d1d23
JB
852 int n = direction;
853
854 if (!NILP (count))
855 {
856 CHECK_NUMBER (count, 3);
857 n *= XINT (count);
858 }
859
860 CHECK_STRING (string, 0);
861 if (NILP (bound))
9f43ad85
RS
862 {
863 if (n > 0)
864 lim = ZV, lim_byte = ZV_BYTE;
865 else
866 lim = BEGV, lim_byte = BEGV_BYTE;
867 }
ca1d1d23
JB
868 else
869 {
870 CHECK_NUMBER_COERCE_MARKER (bound, 1);
871 lim = XINT (bound);
6ec8bbd2 872 if (n > 0 ? lim < PT : lim > PT)
ca1d1d23
JB
873 error ("Invalid search bound (wrong side of point)");
874 if (lim > ZV)
9f43ad85 875 lim = ZV, lim_byte = ZV_BYTE;
588d2fd5 876 else if (lim < BEGV)
9f43ad85 877 lim = BEGV, lim_byte = BEGV_BYTE;
588d2fd5
KH
878 else
879 lim_byte = CHAR_TO_BYTE (lim);
ca1d1d23
JB
880 }
881
9f43ad85 882 np = search_buffer (string, PT, PT_BYTE, lim, lim_byte, n, RE,
ca1d1d23 883 (!NILP (current_buffer->case_fold_search)
facdc750 884 ? current_buffer->case_canon_table
3135e9fd 885 : Qnil),
ca1d1d23 886 (!NILP (current_buffer->case_fold_search)
facdc750 887 ? current_buffer->case_eqv_table
3135e9fd 888 : Qnil),
b819a390 889 posix);
ca1d1d23
JB
890 if (np <= 0)
891 {
892 if (NILP (noerror))
893 return signal_failure (string);
894 if (!EQ (noerror, Qt))
895 {
896 if (lim < BEGV || lim > ZV)
897 abort ();
9f43ad85 898 SET_PT_BOTH (lim, lim_byte);
a5f217b8
RS
899 return Qnil;
900#if 0 /* This would be clean, but maybe programs depend on
901 a value of nil here. */
481399bf 902 np = lim;
a5f217b8 903#endif
ca1d1d23 904 }
481399bf
RS
905 else
906 return Qnil;
ca1d1d23
JB
907 }
908
909 if (np < BEGV || np > ZV)
910 abort ();
911
912 SET_PT (np);
913
914 return make_number (np);
915}
916\f
fa8ed3e0
RS
917/* Return 1 if REGEXP it matches just one constant string. */
918
b6d6a51c
KH
919static int
920trivial_regexp_p (regexp)
921 Lisp_Object regexp;
922{
fc932ac6 923 int len = STRING_BYTES (XSTRING (regexp));
b6d6a51c
KH
924 unsigned char *s = XSTRING (regexp)->data;
925 unsigned char c;
926 while (--len >= 0)
927 {
928 switch (*s++)
929 {
930 case '.': case '*': case '+': case '?': case '[': case '^': case '$':
931 return 0;
932 case '\\':
933 if (--len < 0)
934 return 0;
935 switch (*s++)
936 {
937 case '|': case '(': case ')': case '`': case '\'': case 'b':
938 case 'B': case '<': case '>': case 'w': case 'W': case 's':
866f60fd 939 case 'S': case '=':
5679531d 940 case 'c': case 'C': /* for categoryspec and notcategoryspec */
866f60fd 941 case '1': case '2': case '3': case '4': case '5':
b6d6a51c
KH
942 case '6': case '7': case '8': case '9':
943 return 0;
944 }
945 }
946 }
947 return 1;
948}
949
ca325161 950/* Search for the n'th occurrence of STRING in the current buffer,
ca1d1d23 951 starting at position POS and stopping at position LIM,
b819a390 952 treating STRING as a literal string if RE is false or as
ca1d1d23
JB
953 a regular expression if RE is true.
954
955 If N is positive, searching is forward and LIM must be greater than POS.
956 If N is negative, searching is backward and LIM must be less than POS.
957
facdc750 958 Returns -x if x occurrences remain to be found (x > 0),
ca1d1d23 959 or else the position at the beginning of the Nth occurrence
b819a390
RS
960 (if searching backward) or the end (if searching forward).
961
962 POSIX is nonzero if we want full backtracking (POSIX style)
963 for this pattern. 0 means backtrack only enough to get a valid match. */
ca1d1d23 964
aff2ce94
RS
965#define TRANSLATE(out, trt, d) \
966do \
967 { \
968 if (! NILP (trt)) \
969 { \
970 Lisp_Object temp; \
971 temp = Faref (trt, make_number (d)); \
972 if (INTEGERP (temp)) \
973 out = XINT (temp); \
974 else \
975 out = d; \
976 } \
977 else \
978 out = d; \
979 } \
980while (0)
facdc750 981
b819a390 982static int
9f43ad85
RS
983search_buffer (string, pos, pos_byte, lim, lim_byte, n,
984 RE, trt, inverse_trt, posix)
ca1d1d23
JB
985 Lisp_Object string;
986 int pos;
9f43ad85 987 int pos_byte;
ca1d1d23 988 int lim;
9f43ad85 989 int lim_byte;
ca1d1d23
JB
990 int n;
991 int RE;
facdc750
RS
992 Lisp_Object trt;
993 Lisp_Object inverse_trt;
b819a390 994 int posix;
ca1d1d23
JB
995{
996 int len = XSTRING (string)->size;
fc932ac6 997 int len_byte = STRING_BYTES (XSTRING (string));
facdc750 998 register int i;
ca1d1d23 999
7074fde6
FP
1000 if (running_asynch_code)
1001 save_search_regs ();
1002
ca1d1d23 1003 /* Null string is found at starting position. */
3f57a499 1004 if (len == 0)
ca325161
RS
1005 {
1006 set_search_regs (pos, 0);
1007 return pos;
1008 }
3f57a499
RS
1009
1010 /* Searching 0 times means don't move. */
1011 if (n == 0)
ca1d1d23
JB
1012 return pos;
1013
b6d6a51c 1014 if (RE && !trivial_regexp_p (string))
ca1d1d23 1015 {
facdc750
RS
1016 unsigned char *p1, *p2;
1017 int s1, s2;
487282dc
KH
1018 struct re_pattern_buffer *bufp;
1019
0c8533c6
RS
1020 bufp = compile_pattern (string, &search_regs, trt, posix,
1021 !NILP (current_buffer->enable_multibyte_characters));
ca1d1d23 1022
ca1d1d23
JB
1023 immediate_quit = 1; /* Quit immediately if user types ^G,
1024 because letting this function finish
1025 can take too long. */
1026 QUIT; /* Do a pending quit right away,
1027 to avoid paradoxical behavior */
1028 /* Get pointers and sizes of the two strings
1029 that make up the visible portion of the buffer. */
1030
1031 p1 = BEGV_ADDR;
fa8ed3e0 1032 s1 = GPT_BYTE - BEGV_BYTE;
ca1d1d23 1033 p2 = GAP_END_ADDR;
fa8ed3e0 1034 s2 = ZV_BYTE - GPT_BYTE;
ca1d1d23
JB
1035 if (s1 < 0)
1036 {
1037 p2 = p1;
fa8ed3e0 1038 s2 = ZV_BYTE - BEGV_BYTE;
ca1d1d23
JB
1039 s1 = 0;
1040 }
1041 if (s2 < 0)
1042 {
fa8ed3e0 1043 s1 = ZV_BYTE - BEGV_BYTE;
ca1d1d23
JB
1044 s2 = 0;
1045 }
8bb43c28
RS
1046 re_match_object = Qnil;
1047
ca1d1d23
JB
1048 while (n < 0)
1049 {
42db823b 1050 int val;
487282dc 1051 val = re_search_2 (bufp, (char *) p1, s1, (char *) p2, s2,
4996330b
KH
1052 pos_byte - BEGV_BYTE, lim_byte - pos_byte,
1053 &search_regs,
42db823b 1054 /* Don't allow match past current point */
4996330b 1055 pos_byte - BEGV_BYTE);
ca1d1d23 1056 if (val == -2)
b6d6a51c
KH
1057 {
1058 matcher_overflow ();
1059 }
ca1d1d23
JB
1060 if (val >= 0)
1061 {
26aff150 1062 pos_byte = search_regs.start[0] + BEGV_BYTE;
4746118a 1063 for (i = 0; i < search_regs.num_regs; i++)
ca1d1d23
JB
1064 if (search_regs.start[i] >= 0)
1065 {
fa8ed3e0
RS
1066 search_regs.start[i]
1067 = BYTE_TO_CHAR (search_regs.start[i] + BEGV_BYTE);
1068 search_regs.end[i]
1069 = BYTE_TO_CHAR (search_regs.end[i] + BEGV_BYTE);
ca1d1d23 1070 }
a3668d92 1071 XSETBUFFER (last_thing_searched, current_buffer);
ca1d1d23
JB
1072 /* Set pos to the new position. */
1073 pos = search_regs.start[0];
1074 }
1075 else
1076 {
1077 immediate_quit = 0;
1078 return (n);
1079 }
1080 n++;
1081 }
1082 while (n > 0)
1083 {
42db823b 1084 int val;
487282dc 1085 val = re_search_2 (bufp, (char *) p1, s1, (char *) p2, s2,
4996330b
KH
1086 pos_byte - BEGV_BYTE, lim_byte - pos_byte,
1087 &search_regs,
1088 lim_byte - BEGV_BYTE);
ca1d1d23 1089 if (val == -2)
b6d6a51c
KH
1090 {
1091 matcher_overflow ();
1092 }
ca1d1d23
JB
1093 if (val >= 0)
1094 {
26aff150 1095 pos_byte = search_regs.end[0] + BEGV_BYTE;
4746118a 1096 for (i = 0; i < search_regs.num_regs; i++)
ca1d1d23
JB
1097 if (search_regs.start[i] >= 0)
1098 {
fa8ed3e0
RS
1099 search_regs.start[i]
1100 = BYTE_TO_CHAR (search_regs.start[i] + BEGV_BYTE);
1101 search_regs.end[i]
1102 = BYTE_TO_CHAR (search_regs.end[i] + BEGV_BYTE);
ca1d1d23 1103 }
a3668d92 1104 XSETBUFFER (last_thing_searched, current_buffer);
ca1d1d23
JB
1105 pos = search_regs.end[0];
1106 }
1107 else
1108 {
1109 immediate_quit = 0;
1110 return (0 - n);
1111 }
1112 n--;
1113 }
1114 immediate_quit = 0;
1115 return (pos);
1116 }
1117 else /* non-RE case */
1118 {
facdc750
RS
1119 unsigned char *raw_pattern, *pat;
1120 int raw_pattern_size;
1121 int raw_pattern_size_byte;
1122 unsigned char *patbuf;
1123 int multibyte = !NILP (current_buffer->enable_multibyte_characters);
1124 unsigned char *base_pat = XSTRING (string)->data;
1125 int charset_base = -1;
1126 int simple = 1;
1127
1128 /* MULTIBYTE says whether the text to be searched is multibyte.
1129 We must convert PATTERN to match that, or we will not really
1130 find things right. */
1131
1132 if (multibyte == STRING_MULTIBYTE (string))
1133 {
1134 raw_pattern = (char *) XSTRING (string)->data;
1135 raw_pattern_size = XSTRING (string)->size;
fc932ac6 1136 raw_pattern_size_byte = STRING_BYTES (XSTRING (string));
facdc750
RS
1137 }
1138 else if (multibyte)
1139 {
1140 raw_pattern_size = XSTRING (string)->size;
1141 raw_pattern_size_byte
1142 = count_size_as_multibyte (XSTRING (string)->data,
1143 raw_pattern_size);
1144 raw_pattern = (char *) alloca (raw_pattern_size_byte + 1);
1145 copy_text (XSTRING (string)->data, raw_pattern,
1146 XSTRING (string)->size, 0, 1);
1147 }
1148 else
1149 {
1150 /* Converting multibyte to single-byte.
1151
1152 ??? Perhaps this conversion should be done in a special way
1153 by subtracting nonascii-insert-offset from each non-ASCII char,
1154 so that only the multibyte chars which really correspond to
1155 the chosen single-byte character set can possibly match. */
1156 raw_pattern_size = XSTRING (string)->size;
1157 raw_pattern_size_byte = XSTRING (string)->size;
1158 raw_pattern = (char *) alloca (raw_pattern_size + 1);
1159 copy_text (XSTRING (string)->data, raw_pattern,
fc932ac6 1160 STRING_BYTES (XSTRING (string)), 1, 0);
facdc750
RS
1161 }
1162
1163 /* Copy and optionally translate the pattern. */
1164 len = raw_pattern_size;
1165 len_byte = raw_pattern_size_byte;
1166 patbuf = (unsigned char *) alloca (len_byte);
1167 pat = patbuf;
1168 base_pat = raw_pattern;
1169 if (multibyte)
1170 {
1171 while (--len >= 0)
1172 {
1173 unsigned char workbuf[4], *str;
aff2ce94 1174 int c, translated, inverse;
facdc750
RS
1175 int in_charlen, charlen;
1176
1177 /* If we got here and the RE flag is set, it's because we're
1178 dealing with a regexp known to be trivial, so the backslash
1179 just quotes the next character. */
1180 if (RE && *base_pat == '\\')
1181 {
1182 len--;
1183 len_byte--;
1184 base_pat++;
1185 }
1186
1187 c = STRING_CHAR_AND_LENGTH (base_pat, len_byte, in_charlen);
1188 /* Translate the character, if requested. */
aff2ce94 1189 TRANSLATE (translated, trt, c);
facdc750
RS
1190 /* If translation changed the byte-length, go back
1191 to the original character. */
1192 charlen = CHAR_STRING (translated, workbuf, str);
1193 if (in_charlen != charlen)
1194 {
1195 translated = c;
1196 charlen = CHAR_STRING (c, workbuf, str);
1197 }
1198
aff2ce94
RS
1199 TRANSLATE (inverse, inverse_trt, c);
1200
facdc750
RS
1201 /* Did this char actually get translated?
1202 Would any other char get translated into it? */
aff2ce94 1203 if (translated != c || inverse != c)
facdc750
RS
1204 {
1205 /* Keep track of which character set row
1206 contains the characters that need translation. */
1207 int charset_base_code = c & ~0xff;
1208 if (charset_base == -1)
1209 charset_base = charset_base_code;
1210 else if (charset_base != charset_base_code)
1211 /* If two different rows appear, needing translation,
1212 then we cannot use boyer_moore search. */
1213 simple = 0;
1214 /* ??? Handa: this must do simple = 0
1215 if c is a composite character. */
aff2ce94 1216 }
facdc750
RS
1217
1218 /* Store this character into the translated pattern. */
1219 bcopy (str, pat, charlen);
1220 pat += charlen;
1221 base_pat += in_charlen;
1222 len_byte -= in_charlen;
1223 }
1224 }
1225 else
1226 {
1227 while (--len >= 0)
1228 {
aff2ce94 1229 int c, translated, inverse;
facdc750
RS
1230
1231 /* If we got here and the RE flag is set, it's because we're
1232 dealing with a regexp known to be trivial, so the backslash
1233 just quotes the next character. */
1234 if (RE && *base_pat == '\\')
1235 {
1236 len--;
1237 base_pat++;
1238 }
1239 c = *base_pat++;
aff2ce94
RS
1240 TRANSLATE (translated, trt, c);
1241 TRANSLATE (inverse, inverse_trt, c);
facdc750
RS
1242
1243 /* Did this char actually get translated?
1244 Would any other char get translated into it? */
aff2ce94 1245 if (translated != c || inverse != c)
facdc750
RS
1246 {
1247 /* Keep track of which character set row
1248 contains the characters that need translation. */
1249 int charset_base_code = c & ~0xff;
1250 if (charset_base == -1)
1251 charset_base = charset_base_code;
1252 else if (charset_base != charset_base_code)
1253 /* If two different rows appear, needing translation,
1254 then we cannot use boyer_moore search. */
1255 simple = 0;
aff2ce94 1256 }
facdc750
RS
1257 *pat++ = translated;
1258 }
1259 }
1260
1261 len_byte = pat - patbuf;
1262 len = raw_pattern_size;
1263 pat = base_pat = patbuf;
1264
1265 if (simple)
1266 return boyer_moore (n, pat, len, len_byte, trt, inverse_trt,
aff2ce94
RS
1267 pos, pos_byte, lim, lim_byte,
1268 charset_base);
facdc750
RS
1269 else
1270 return simple_search (n, pat, len, len_byte, trt,
1271 pos, pos_byte, lim, lim_byte);
1272 }
1273}
1274\f
1275/* Do a simple string search N times for the string PAT,
1276 whose length is LEN/LEN_BYTE,
1277 from buffer position POS/POS_BYTE until LIM/LIM_BYTE.
1278 TRT is the translation table.
f8bd51c4 1279
facdc750
RS
1280 Return the character position where the match is found.
1281 Otherwise, if M matches remained to be found, return -M.
f8bd51c4 1282
facdc750
RS
1283 This kind of search works regardless of what is in PAT and
1284 regardless of what is in TRT. It is used in cases where
1285 boyer_moore cannot work. */
1286
1287static int
1288simple_search (n, pat, len, len_byte, trt, pos, pos_byte, lim, lim_byte)
1289 int n;
1290 unsigned char *pat;
1291 int len, len_byte;
1292 Lisp_Object trt;
1293 int pos, pos_byte;
1294 int lim, lim_byte;
1295{
1296 int multibyte = ! NILP (current_buffer->enable_multibyte_characters);
ab228c24 1297 int forward = n > 0;
facdc750
RS
1298
1299 if (lim > pos && multibyte)
1300 while (n > 0)
1301 {
1302 while (1)
f8bd51c4 1303 {
facdc750
RS
1304 /* Try matching at position POS. */
1305 int this_pos = pos;
1306 int this_pos_byte = pos_byte;
1307 int this_len = len;
1308 int this_len_byte = len_byte;
1309 unsigned char *p = pat;
1310 if (pos + len > lim)
1311 goto stop;
1312
1313 while (this_len > 0)
1314 {
1315 int charlen, buf_charlen;
ab228c24 1316 int pat_ch, buf_ch;
facdc750 1317
ab228c24 1318 pat_ch = STRING_CHAR_AND_LENGTH (p, this_len_byte, charlen);
facdc750
RS
1319 buf_ch = STRING_CHAR_AND_LENGTH (BYTE_POS_ADDR (this_pos_byte),
1320 ZV_BYTE - this_pos_byte,
1321 buf_charlen);
aff2ce94 1322 TRANSLATE (buf_ch, trt, buf_ch);
facdc750
RS
1323
1324 if (buf_ch != pat_ch)
1325 break;
ab228c24
RS
1326
1327 this_len_byte -= charlen;
1328 this_len--;
1329 p += charlen;
1330
1331 this_pos_byte += buf_charlen;
1332 this_pos++;
facdc750
RS
1333 }
1334
1335 if (this_len == 0)
1336 {
1337 pos += len;
1338 pos_byte += len_byte;
1339 break;
1340 }
1341
1342 INC_BOTH (pos, pos_byte);
f8bd51c4 1343 }
facdc750
RS
1344
1345 n--;
1346 }
1347 else if (lim > pos)
1348 while (n > 0)
1349 {
1350 while (1)
f8bd51c4 1351 {
facdc750
RS
1352 /* Try matching at position POS. */
1353 int this_pos = pos;
1354 int this_len = len;
1355 unsigned char *p = pat;
1356
1357 if (pos + len > lim)
1358 goto stop;
1359
1360 while (this_len > 0)
1361 {
1362 int pat_ch = *p++;
1363 int buf_ch = FETCH_BYTE (this_pos);
aff2ce94 1364 TRANSLATE (buf_ch, trt, buf_ch);
facdc750
RS
1365
1366 if (buf_ch != pat_ch)
1367 break;
ab228c24
RS
1368
1369 this_len--;
1370 this_pos++;
facdc750
RS
1371 }
1372
1373 if (this_len == 0)
1374 {
1375 pos += len;
1376 break;
1377 }
1378
1379 pos++;
f8bd51c4 1380 }
facdc750
RS
1381
1382 n--;
1383 }
1384 /* Backwards search. */
1385 else if (lim < pos && multibyte)
1386 while (n < 0)
1387 {
1388 while (1)
f8bd51c4 1389 {
facdc750
RS
1390 /* Try matching at position POS. */
1391 int this_pos = pos - len;
1392 int this_pos_byte = pos_byte - len_byte;
1393 int this_len = len;
1394 int this_len_byte = len_byte;
1395 unsigned char *p = pat;
1396
1397 if (pos - len < lim)
1398 goto stop;
1399
1400 while (this_len > 0)
1401 {
1402 int charlen, buf_charlen;
ab228c24 1403 int pat_ch, buf_ch;
facdc750 1404
ab228c24 1405 pat_ch = STRING_CHAR_AND_LENGTH (p, this_len_byte, charlen);
facdc750
RS
1406 buf_ch = STRING_CHAR_AND_LENGTH (BYTE_POS_ADDR (this_pos_byte),
1407 ZV_BYTE - this_pos_byte,
1408 buf_charlen);
aff2ce94 1409 TRANSLATE (buf_ch, trt, buf_ch);
facdc750
RS
1410
1411 if (buf_ch != pat_ch)
1412 break;
ab228c24
RS
1413
1414 this_len_byte -= charlen;
1415 this_len--;
1416 p += charlen;
1417 this_pos_byte += buf_charlen;
1418 this_pos++;
facdc750
RS
1419 }
1420
1421 if (this_len == 0)
1422 {
1423 pos -= len;
1424 pos_byte -= len_byte;
1425 break;
1426 }
1427
1428 DEC_BOTH (pos, pos_byte);
f8bd51c4
KH
1429 }
1430
facdc750
RS
1431 n++;
1432 }
1433 else if (lim < pos)
1434 while (n < 0)
1435 {
1436 while (1)
b6d6a51c 1437 {
facdc750
RS
1438 /* Try matching at position POS. */
1439 int this_pos = pos - len;
1440 int this_len = len;
1441 unsigned char *p = pat;
1442
1443 if (pos - len < lim)
1444 goto stop;
1445
1446 while (this_len > 0)
1447 {
1448 int pat_ch = *p++;
1449 int buf_ch = FETCH_BYTE (this_pos);
aff2ce94 1450 TRANSLATE (buf_ch, trt, buf_ch);
facdc750
RS
1451
1452 if (buf_ch != pat_ch)
1453 break;
ab228c24
RS
1454 this_len--;
1455 this_pos++;
facdc750
RS
1456 }
1457
1458 if (this_len == 0)
b6d6a51c 1459 {
facdc750
RS
1460 pos -= len;
1461 break;
b6d6a51c 1462 }
facdc750
RS
1463
1464 pos--;
b6d6a51c 1465 }
facdc750
RS
1466
1467 n++;
b6d6a51c 1468 }
facdc750
RS
1469
1470 stop:
1471 if (n == 0)
aff2ce94 1472 {
ab228c24
RS
1473 if (forward)
1474 set_search_regs ((multibyte ? pos_byte : pos) - len_byte, len_byte);
1475 else
1476 set_search_regs (multibyte ? pos_byte : pos, len_byte);
aff2ce94
RS
1477
1478 return pos;
1479 }
facdc750
RS
1480 else if (n > 0)
1481 return -n;
1482 else
1483 return n;
1484}
1485\f
1486/* Do Boyer-Moore search N times for the string PAT,
1487 whose length is LEN/LEN_BYTE,
1488 from buffer position POS/POS_BYTE until LIM/LIM_BYTE.
1489 DIRECTION says which direction we search in.
1490 TRT and INVERSE_TRT are translation tables.
1491
1492 This kind of search works if all the characters in PAT that have
1493 nontrivial translation are the same aside from the last byte. This
1494 makes it possible to translate just the last byte of a character,
1495 and do so after just a simple test of the context.
1496
1497 If that criterion is not satisfied, do not call this function. */
1498
1499static int
1500boyer_moore (n, base_pat, len, len_byte, trt, inverse_trt,
aff2ce94 1501 pos, pos_byte, lim, lim_byte, charset_base)
facdc750
RS
1502 int n;
1503 unsigned char *base_pat;
1504 int len, len_byte;
1505 Lisp_Object trt;
1506 Lisp_Object inverse_trt;
1507 int pos, pos_byte;
1508 int lim, lim_byte;
aff2ce94 1509 int charset_base;
facdc750
RS
1510{
1511 int direction = ((n > 0) ? 1 : -1);
1512 register int dirlen;
1513 int infinity, limit, k, stride_for_teases;
1514 register int *BM_tab;
1515 int *BM_tab_base;
1516 register unsigned char *cursor, *p_limit;
1517 register int i, j;
1518 unsigned char *pat;
1519 int multibyte = ! NILP (current_buffer->enable_multibyte_characters);
1520
1521 unsigned char simple_translate[0400];
1522 int translate_prev_byte;
1523 int translate_anteprev_byte;
1524
1525#ifdef C_ALLOCA
1526 int BM_tab_space[0400];
1527 BM_tab = &BM_tab_space[0];
1528#else
1529 BM_tab = (int *) alloca (0400 * sizeof (int));
1530#endif
1531 /* The general approach is that we are going to maintain that we know */
1532 /* the first (closest to the present position, in whatever direction */
1533 /* we're searching) character that could possibly be the last */
1534 /* (furthest from present position) character of a valid match. We */
1535 /* advance the state of our knowledge by looking at that character */
1536 /* and seeing whether it indeed matches the last character of the */
1537 /* pattern. If it does, we take a closer look. If it does not, we */
1538 /* move our pointer (to putative last characters) as far as is */
1539 /* logically possible. This amount of movement, which I call a */
1540 /* stride, will be the length of the pattern if the actual character */
1541 /* appears nowhere in the pattern, otherwise it will be the distance */
1542 /* from the last occurrence of that character to the end of the */
1543 /* pattern. */
1544 /* As a coding trick, an enormous stride is coded into the table for */
1545 /* characters that match the last character. This allows use of only */
1546 /* a single test, a test for having gone past the end of the */
1547 /* permissible match region, to test for both possible matches (when */
1548 /* the stride goes past the end immediately) and failure to */
1549 /* match (where you get nudged past the end one stride at a time). */
1550
1551 /* Here we make a "mickey mouse" BM table. The stride of the search */
1552 /* is determined only by the last character of the putative match. */
1553 /* If that character does not match, we will stride the proper */
1554 /* distance to propose a match that superimposes it on the last */
1555 /* instance of a character that matches it (per trt), or misses */
1556 /* it entirely if there is none. */
1557
1558 dirlen = len_byte * direction;
1559 infinity = dirlen - (lim_byte + pos_byte + len_byte + len_byte) * direction;
1560 if (direction < 0)
1561 pat = (base_pat += len_byte - 1);
1562 else
1563 pat = base_pat;
1564 BM_tab_base = BM_tab;
1565 BM_tab += 0400;
1566 j = dirlen; /* to get it in a register */
1567 /* A character that does not appear in the pattern induces a */
1568 /* stride equal to the pattern length. */
1569 while (BM_tab_base != BM_tab)
1570 {
1571 *--BM_tab = j;
1572 *--BM_tab = j;
1573 *--BM_tab = j;
1574 *--BM_tab = j;
1575 }
1576
1577 /* We use this for translation, instead of TRT itself.
1578 We fill this in to handle the characters that actually
1579 occur in the pattern. Others don't matter anyway! */
1580 bzero (simple_translate, sizeof simple_translate);
1581 for (i = 0; i < 0400; i++)
1582 simple_translate[i] = i;
1583
1584 i = 0;
1585 while (i != infinity)
1586 {
1587 unsigned char *ptr = pat + i;
1588 i += direction;
1589 if (i == dirlen)
1590 i = infinity;
1591 if (! NILP (trt))
ca1d1d23 1592 {
facdc750 1593 int ch;
aff2ce94 1594 int untranslated;
facdc750
RS
1595 int this_translated = 1;
1596
1597 if (multibyte
1598 && (ptr + 1 == pat + len_byte || CHAR_HEAD_P (ptr[1])))
ca1d1d23 1599 {
facdc750
RS
1600 unsigned char *charstart = ptr;
1601 while (! CHAR_HEAD_P (*charstart))
1602 charstart--;
aff2ce94 1603 untranslated = STRING_CHAR (charstart, ptr - charstart + 1);
ab228c24 1604 if (charset_base == (untranslated & ~0xff))
facdc750 1605 {
ab228c24 1606 TRANSLATE (ch, trt, untranslated);
aff2ce94
RS
1607 if (! CHAR_HEAD_P (*ptr))
1608 {
1609 translate_prev_byte = ptr[-1];
1610 if (! CHAR_HEAD_P (translate_prev_byte))
1611 translate_anteprev_byte = ptr[-2];
1612 }
facdc750 1613 }
aff2ce94 1614 else
ab228c24
RS
1615 {
1616 this_translated = 0;
1617 ch = *ptr;
1618 }
ca1d1d23 1619 }
facdc750 1620 else if (!multibyte)
aff2ce94 1621 TRANSLATE (ch, trt, *ptr);
ca1d1d23
JB
1622 else
1623 {
facdc750
RS
1624 ch = *ptr;
1625 this_translated = 0;
ca1d1d23 1626 }
facdc750 1627
ab228c24
RS
1628 if (ch > 0400)
1629 j = ((unsigned char) ch) | 0200;
1630 else
1631 j = (unsigned char) ch;
1632
facdc750
RS
1633 if (i == infinity)
1634 stride_for_teases = BM_tab[j];
ab228c24 1635
facdc750
RS
1636 BM_tab[j] = dirlen - i;
1637 /* A translation table is accompanied by its inverse -- see */
1638 /* comment following downcase_table for details */
1639 if (this_translated)
ab228c24
RS
1640 {
1641 int starting_ch = ch;
1642 int starting_j = j;
1643 while (1)
1644 {
1645 TRANSLATE (ch, inverse_trt, ch);
1646 if (ch > 0400)
1647 j = ((unsigned char) ch) | 0200;
1648 else
1649 j = (unsigned char) ch;
1650
1651 /* For all the characters that map into CH,
1652 set up simple_translate to map the last byte
1653 into STARTING_J. */
1654 simple_translate[j] = starting_j;
1655 if (ch == starting_ch)
1656 break;
1657 BM_tab[j] = dirlen - i;
1658 }
1659 }
facdc750
RS
1660 }
1661 else
1662 {
1663 j = *ptr;
1664
1665 if (i == infinity)
1666 stride_for_teases = BM_tab[j];
1667 BM_tab[j] = dirlen - i;
ca1d1d23 1668 }
facdc750
RS
1669 /* stride_for_teases tells how much to stride if we get a */
1670 /* match on the far character but are subsequently */
1671 /* disappointed, by recording what the stride would have been */
1672 /* for that character if the last character had been */
1673 /* different. */
1674 }
1675 infinity = dirlen - infinity;
1676 pos_byte += dirlen - ((direction > 0) ? direction : 0);
1677 /* loop invariant - POS_BYTE points at where last char (first
1678 char if reverse) of pattern would align in a possible match. */
1679 while (n != 0)
1680 {
1681 int tail_end;
1682 unsigned char *tail_end_ptr;
1683
1684 /* It's been reported that some (broken) compiler thinks that
1685 Boolean expressions in an arithmetic context are unsigned.
1686 Using an explicit ?1:0 prevents this. */
1687 if ((lim_byte - pos_byte - ((direction > 0) ? 1 : 0)) * direction
1688 < 0)
1689 return (n * (0 - direction));
1690 /* First we do the part we can by pointers (maybe nothing) */
1691 QUIT;
1692 pat = base_pat;
1693 limit = pos_byte - dirlen + direction;
67ce527d
KH
1694 if (direction > 0)
1695 {
1696 limit = BUFFER_CEILING_OF (limit);
1697 /* LIMIT is now the last (not beyond-last!) value POS_BYTE
1698 can take on without hitting edge of buffer or the gap. */
1699 limit = min (limit, pos_byte + 20000);
1700 limit = min (limit, lim_byte - 1);
1701 }
1702 else
1703 {
1704 limit = BUFFER_FLOOR_OF (limit);
1705 /* LIMIT is now the last (not beyond-last!) value POS_BYTE
1706 can take on without hitting edge of buffer or the gap. */
1707 limit = max (limit, pos_byte - 20000);
1708 limit = max (limit, lim_byte);
1709 }
facdc750
RS
1710 tail_end = BUFFER_CEILING_OF (pos_byte) + 1;
1711 tail_end_ptr = BYTE_POS_ADDR (tail_end);
1712
1713 if ((limit - pos_byte) * direction > 20)
ca1d1d23 1714 {
facdc750
RS
1715 unsigned char *p2;
1716
1717 p_limit = BYTE_POS_ADDR (limit);
1718 p2 = (cursor = BYTE_POS_ADDR (pos_byte));
1719 /* In this loop, pos + cursor - p2 is the surrogate for pos */
1720 while (1) /* use one cursor setting as long as i can */
ca1d1d23 1721 {
facdc750 1722 if (direction > 0) /* worth duplicating */
ca1d1d23 1723 {
facdc750
RS
1724 /* Use signed comparison if appropriate
1725 to make cursor+infinity sure to be > p_limit.
1726 Assuming that the buffer lies in a range of addresses
1727 that are all "positive" (as ints) or all "negative",
1728 either kind of comparison will work as long
1729 as we don't step by infinity. So pick the kind
1730 that works when we do step by infinity. */
1731 if ((EMACS_INT) (p_limit + infinity) > (EMACS_INT) p_limit)
1732 while ((EMACS_INT) cursor <= (EMACS_INT) p_limit)
1733 cursor += BM_tab[*cursor];
ca1d1d23 1734 else
facdc750
RS
1735 while ((EMACS_UINT) cursor <= (EMACS_UINT) p_limit)
1736 cursor += BM_tab[*cursor];
1737 }
1738 else
1739 {
1740 if ((EMACS_INT) (p_limit + infinity) < (EMACS_INT) p_limit)
1741 while ((EMACS_INT) cursor >= (EMACS_INT) p_limit)
1742 cursor += BM_tab[*cursor];
1743 else
1744 while ((EMACS_UINT) cursor >= (EMACS_UINT) p_limit)
1745 cursor += BM_tab[*cursor];
1746 }
ca1d1d23 1747/* If you are here, cursor is beyond the end of the searched region. */
facdc750
RS
1748/* This can happen if you match on the far character of the pattern, */
1749/* because the "stride" of that character is infinity, a number able */
1750/* to throw you well beyond the end of the search. It can also */
1751/* happen if you fail to match within the permitted region and would */
1752/* otherwise try a character beyond that region */
1753 if ((cursor - p_limit) * direction <= len_byte)
1754 break; /* a small overrun is genuine */
1755 cursor -= infinity; /* large overrun = hit */
1756 i = dirlen - direction;
1757 if (! NILP (trt))
1758 {
1759 while ((i -= direction) + direction != 0)
ca1d1d23 1760 {
facdc750
RS
1761 int ch;
1762 cursor -= direction;
1763 /* Translate only the last byte of a character. */
1764 if (! multibyte
1765 || ((cursor == tail_end_ptr
1766 || CHAR_HEAD_P (cursor[1]))
1767 && (CHAR_HEAD_P (cursor[0])
1768 || (translate_prev_byte == cursor[-1]
1769 && (CHAR_HEAD_P (translate_prev_byte)
1770 || translate_anteprev_byte == cursor[-2])))))
1771 ch = simple_translate[*cursor];
1772 else
1773 ch = *cursor;
1774 if (pat[i] != ch)
1775 break;
ca1d1d23 1776 }
facdc750
RS
1777 }
1778 else
1779 {
1780 while ((i -= direction) + direction != 0)
ca1d1d23 1781 {
facdc750
RS
1782 cursor -= direction;
1783 if (pat[i] != *cursor)
1784 break;
ca1d1d23 1785 }
facdc750
RS
1786 }
1787 cursor += dirlen - i - direction; /* fix cursor */
1788 if (i + direction == 0)
1789 {
1790 int position;
0c8533c6 1791
facdc750 1792 cursor -= direction;
1113d9db 1793
facdc750
RS
1794 position = pos_byte + cursor - p2 + ((direction > 0)
1795 ? 1 - len_byte : 0);
1796 set_search_regs (position, len_byte);
ca325161 1797
facdc750
RS
1798 if ((n -= direction) != 0)
1799 cursor += dirlen; /* to resume search */
ca1d1d23 1800 else
facdc750
RS
1801 return ((direction > 0)
1802 ? search_regs.end[0] : search_regs.start[0]);
ca1d1d23 1803 }
facdc750
RS
1804 else
1805 cursor += stride_for_teases; /* <sigh> we lose - */
ca1d1d23 1806 }
facdc750
RS
1807 pos_byte += cursor - p2;
1808 }
1809 else
1810 /* Now we'll pick up a clump that has to be done the hard */
1811 /* way because it covers a discontinuity */
1812 {
1813 limit = ((direction > 0)
1814 ? BUFFER_CEILING_OF (pos_byte - dirlen + 1)
1815 : BUFFER_FLOOR_OF (pos_byte - dirlen - 1));
1816 limit = ((direction > 0)
1817 ? min (limit + len_byte, lim_byte - 1)
1818 : max (limit - len_byte, lim_byte));
1819 /* LIMIT is now the last value POS_BYTE can have
1820 and still be valid for a possible match. */
1821 while (1)
ca1d1d23 1822 {
facdc750
RS
1823 /* This loop can be coded for space rather than */
1824 /* speed because it will usually run only once. */
1825 /* (the reach is at most len + 21, and typically */
1826 /* does not exceed len) */
1827 while ((limit - pos_byte) * direction >= 0)
1828 pos_byte += BM_tab[FETCH_BYTE (pos_byte)];
1829 /* now run the same tests to distinguish going off the */
1830 /* end, a match or a phony match. */
1831 if ((pos_byte - limit) * direction <= len_byte)
1832 break; /* ran off the end */
1833 /* Found what might be a match.
1834 Set POS_BYTE back to last (first if reverse) pos. */
1835 pos_byte -= infinity;
1836 i = dirlen - direction;
1837 while ((i -= direction) + direction != 0)
ca1d1d23 1838 {
facdc750
RS
1839 int ch;
1840 unsigned char *ptr;
1841 pos_byte -= direction;
1842 ptr = BYTE_POS_ADDR (pos_byte);
1843 /* Translate only the last byte of a character. */
1844 if (! multibyte
1845 || ((ptr == tail_end_ptr
1846 || CHAR_HEAD_P (ptr[1]))
1847 && (CHAR_HEAD_P (ptr[0])
1848 || (translate_prev_byte == ptr[-1]
1849 && (CHAR_HEAD_P (translate_prev_byte)
1850 || translate_anteprev_byte == ptr[-2])))))
1851 ch = simple_translate[*ptr];
1852 else
1853 ch = *ptr;
1854 if (pat[i] != ch)
1855 break;
1856 }
1857 /* Above loop has moved POS_BYTE part or all the way
1858 back to the first pos (last pos if reverse).
1859 Set it once again at the last (first if reverse) char. */
1860 pos_byte += dirlen - i- direction;
1861 if (i + direction == 0)
1862 {
1863 int position;
1864 pos_byte -= direction;
1113d9db 1865
facdc750 1866 position = pos_byte + ((direction > 0) ? 1 - len_byte : 0);
0c8533c6 1867
facdc750 1868 set_search_regs (position, len_byte);
ca325161 1869
facdc750
RS
1870 if ((n -= direction) != 0)
1871 pos_byte += dirlen; /* to resume search */
ca1d1d23 1872 else
facdc750
RS
1873 return ((direction > 0)
1874 ? search_regs.end[0] : search_regs.start[0]);
ca1d1d23 1875 }
facdc750
RS
1876 else
1877 pos_byte += stride_for_teases;
1878 }
1879 }
1880 /* We have done one clump. Can we continue? */
1881 if ((lim_byte - pos_byte) * direction < 0)
1882 return ((0 - n) * direction);
ca1d1d23 1883 }
facdc750 1884 return BYTE_TO_CHAR (pos_byte);
ca1d1d23 1885}
ca325161 1886
fa8ed3e0 1887/* Record beginning BEG_BYTE and end BEG_BYTE + NBYTES
ca325161
RS
1888 for a match just found in the current buffer. */
1889
1890static void
fa8ed3e0
RS
1891set_search_regs (beg_byte, nbytes)
1892 int beg_byte, nbytes;
ca325161
RS
1893{
1894 /* Make sure we have registers in which to store
1895 the match position. */
1896 if (search_regs.num_regs == 0)
1897 {
2d4a771a
RS
1898 search_regs.start = (regoff_t *) xmalloc (2 * sizeof (regoff_t));
1899 search_regs.end = (regoff_t *) xmalloc (2 * sizeof (regoff_t));
487282dc 1900 search_regs.num_regs = 2;
ca325161
RS
1901 }
1902
fa8ed3e0
RS
1903 search_regs.start[0] = BYTE_TO_CHAR (beg_byte);
1904 search_regs.end[0] = BYTE_TO_CHAR (beg_byte + nbytes);
a3668d92 1905 XSETBUFFER (last_thing_searched, current_buffer);
ca325161 1906}
ca1d1d23
JB
1907\f
1908/* Given a string of words separated by word delimiters,
1909 compute a regexp that matches those exact words
1910 separated by arbitrary punctuation. */
1911
1912static Lisp_Object
1913wordify (string)
1914 Lisp_Object string;
1915{
1916 register unsigned char *p, *o;
0c8533c6 1917 register int i, i_byte, len, punct_count = 0, word_count = 0;
ca1d1d23 1918 Lisp_Object val;
0c8533c6
RS
1919 int prev_c = 0;
1920 int adjust;
ca1d1d23
JB
1921
1922 CHECK_STRING (string, 0);
1923 p = XSTRING (string)->data;
1924 len = XSTRING (string)->size;
1925
0c8533c6
RS
1926 for (i = 0, i_byte = 0; i < len; )
1927 {
1928 int c;
1929
1930 if (STRING_MULTIBYTE (string))
1931 FETCH_STRING_CHAR_ADVANCE (c, string, i, i_byte);
1932 else
1933 c = XSTRING (string)->data[i++];
1934
1935 if (SYNTAX (c) != Sword)
1936 {
1937 punct_count++;
1938 if (i > 0 && SYNTAX (prev_c) == Sword)
1939 word_count++;
1940 }
ca1d1d23 1941
0c8533c6
RS
1942 prev_c = c;
1943 }
1944
1945 if (SYNTAX (prev_c) == Sword)
1946 word_count++;
1947 if (!word_count)
1948 return build_string ("");
1949
1950 adjust = - punct_count + 5 * (word_count - 1) + 4;
1951 val = make_uninit_multibyte_string (len + adjust,
fc932ac6 1952 STRING_BYTES (XSTRING (string)) + adjust);
ca1d1d23
JB
1953
1954 o = XSTRING (val)->data;
1955 *o++ = '\\';
1956 *o++ = 'b';
1957
fc932ac6 1958 for (i = 0; i < STRING_BYTES (XSTRING (val)); i++)
ca1d1d23
JB
1959 if (SYNTAX (p[i]) == Sword)
1960 *o++ = p[i];
1961 else if (i > 0 && SYNTAX (p[i-1]) == Sword && --word_count)
1962 {
1963 *o++ = '\\';
1964 *o++ = 'W';
1965 *o++ = '\\';
1966 *o++ = 'W';
1967 *o++ = '*';
1968 }
1969
1970 *o++ = '\\';
1971 *o++ = 'b';
1972
1973 return val;
1974}
1975\f
1976DEFUN ("search-backward", Fsearch_backward, Ssearch_backward, 1, 4,
6af43974 1977 "MSearch backward: ",
ca1d1d23
JB
1978 "Search backward from point for STRING.\n\
1979Set point to the beginning of the occurrence found, and return point.\n\
1980An optional second argument bounds the search; it is a buffer position.\n\
1981The match found must not extend before that position.\n\
1982Optional third argument, if t, means if fail just return nil (no error).\n\
1983 If not nil and not t, position at limit of search and return nil.\n\
1984Optional fourth argument is repeat count--search for successive occurrences.\n\
1985See also the functions `match-beginning', `match-end' and `replace-match'.")
1986 (string, bound, noerror, count)
1987 Lisp_Object string, bound, noerror, count;
1988{
b819a390 1989 return search_command (string, bound, noerror, count, -1, 0, 0);
ca1d1d23
JB
1990}
1991
6af43974 1992DEFUN ("search-forward", Fsearch_forward, Ssearch_forward, 1, 4, "MSearch: ",
ca1d1d23
JB
1993 "Search forward from point for STRING.\n\
1994Set point to the end of the occurrence found, and return point.\n\
1995An optional second argument bounds the search; it is a buffer position.\n\
1996The match found must not extend after that position. nil is equivalent\n\
1997 to (point-max).\n\
1998Optional third argument, if t, means if fail just return nil (no error).\n\
1999 If not nil and not t, move to limit of search and return nil.\n\
2000Optional fourth argument is repeat count--search for successive occurrences.\n\
2001See also the functions `match-beginning', `match-end' and `replace-match'.")
2002 (string, bound, noerror, count)
2003 Lisp_Object string, bound, noerror, count;
2004{
b819a390 2005 return search_command (string, bound, noerror, count, 1, 0, 0);
ca1d1d23
JB
2006}
2007
2008DEFUN ("word-search-backward", Fword_search_backward, Sword_search_backward, 1, 4,
2009 "sWord search backward: ",
2010 "Search backward from point for STRING, ignoring differences in punctuation.\n\
2011Set point to the beginning of the occurrence found, and return point.\n\
2012An optional second argument bounds the search; it is a buffer position.\n\
2013The match found must not extend before that position.\n\
2014Optional third argument, if t, means if fail just return nil (no error).\n\
2015 If not nil and not t, move to limit of search and return nil.\n\
2016Optional fourth argument is repeat count--search for successive occurrences.")
2017 (string, bound, noerror, count)
2018 Lisp_Object string, bound, noerror, count;
2019{
b819a390 2020 return search_command (wordify (string), bound, noerror, count, -1, 1, 0);
ca1d1d23
JB
2021}
2022
2023DEFUN ("word-search-forward", Fword_search_forward, Sword_search_forward, 1, 4,
2024 "sWord search: ",
2025 "Search forward from point for STRING, ignoring differences in punctuation.\n\
2026Set point to the end of the occurrence found, and return point.\n\
2027An optional second argument bounds the search; it is a buffer position.\n\
2028The match found must not extend after that position.\n\
2029Optional third argument, if t, means if fail just return nil (no error).\n\
2030 If not nil and not t, move to limit of search and return nil.\n\
2031Optional fourth argument is repeat count--search for successive occurrences.")
2032 (string, bound, noerror, count)
2033 Lisp_Object string, bound, noerror, count;
2034{
b819a390 2035 return search_command (wordify (string), bound, noerror, count, 1, 1, 0);
ca1d1d23
JB
2036}
2037
2038DEFUN ("re-search-backward", Fre_search_backward, Sre_search_backward, 1, 4,
2039 "sRE search backward: ",
2040 "Search backward from point for match for regular expression REGEXP.\n\
2041Set point to the beginning of the match, and return point.\n\
2042The match found is the one starting last in the buffer\n\
19c0a730 2043and yet ending before the origin of the search.\n\
ca1d1d23
JB
2044An optional second argument bounds the search; it is a buffer position.\n\
2045The match found must start at or after that position.\n\
2046Optional third argument, if t, means if fail just return nil (no error).\n\
2047 If not nil and not t, move to limit of search and return nil.\n\
2048Optional fourth argument is repeat count--search for successive occurrences.\n\
2049See also the functions `match-beginning', `match-end' and `replace-match'.")
19c0a730
KH
2050 (regexp, bound, noerror, count)
2051 Lisp_Object regexp, bound, noerror, count;
ca1d1d23 2052{
b819a390 2053 return search_command (regexp, bound, noerror, count, -1, 1, 0);
ca1d1d23
JB
2054}
2055
2056DEFUN ("re-search-forward", Fre_search_forward, Sre_search_forward, 1, 4,
2057 "sRE search: ",
2058 "Search forward from point for regular expression REGEXP.\n\
2059Set point to the end of the occurrence found, and return point.\n\
2060An optional second argument bounds the search; it is a buffer position.\n\
2061The match found must not extend after that position.\n\
2062Optional third argument, if t, means if fail just return nil (no error).\n\
2063 If not nil and not t, move to limit of search and return nil.\n\
2064Optional fourth argument is repeat count--search for successive occurrences.\n\
2065See also the functions `match-beginning', `match-end' and `replace-match'.")
19c0a730
KH
2066 (regexp, bound, noerror, count)
2067 Lisp_Object regexp, bound, noerror, count;
ca1d1d23 2068{
b819a390
RS
2069 return search_command (regexp, bound, noerror, count, 1, 1, 0);
2070}
2071
2072DEFUN ("posix-search-backward", Fposix_search_backward, Sposix_search_backward, 1, 4,
2073 "sPosix search backward: ",
2074 "Search backward from point for match for regular expression REGEXP.\n\
2075Find the longest match in accord with Posix regular expression rules.\n\
2076Set point to the beginning of the match, and return point.\n\
2077The match found is the one starting last in the buffer\n\
2078and yet ending before the origin of the search.\n\
2079An optional second argument bounds the search; it is a buffer position.\n\
2080The match found must start at or after that position.\n\
2081Optional third argument, if t, means if fail just return nil (no error).\n\
2082 If not nil and not t, move to limit of search and return nil.\n\
2083Optional fourth argument is repeat count--search for successive occurrences.\n\
2084See also the functions `match-beginning', `match-end' and `replace-match'.")
2085 (regexp, bound, noerror, count)
2086 Lisp_Object regexp, bound, noerror, count;
2087{
2088 return search_command (regexp, bound, noerror, count, -1, 1, 1);
2089}
2090
2091DEFUN ("posix-search-forward", Fposix_search_forward, Sposix_search_forward, 1, 4,
2092 "sPosix search: ",
2093 "Search forward from point for regular expression REGEXP.\n\
2094Find the longest match in accord with Posix regular expression rules.\n\
2095Set point to the end of the occurrence found, and return point.\n\
2096An optional second argument bounds the search; it is a buffer position.\n\
2097The match found must not extend after that position.\n\
2098Optional third argument, if t, means if fail just return nil (no error).\n\
2099 If not nil and not t, move to limit of search and return nil.\n\
2100Optional fourth argument is repeat count--search for successive occurrences.\n\
2101See also the functions `match-beginning', `match-end' and `replace-match'.")
2102 (regexp, bound, noerror, count)
2103 Lisp_Object regexp, bound, noerror, count;
2104{
2105 return search_command (regexp, bound, noerror, count, 1, 1, 1);
ca1d1d23
JB
2106}
2107\f
d7a5ad5f 2108DEFUN ("replace-match", Freplace_match, Sreplace_match, 1, 5, 0,
ca1d1d23
JB
2109 "Replace text matched by last search with NEWTEXT.\n\
2110If second arg FIXEDCASE is non-nil, do not alter case of replacement text.\n\
5b9cf4b2
RS
2111Otherwise maybe capitalize the whole text, or maybe just word initials,\n\
2112based on the replaced text.\n\
2113If the replaced text has only capital letters\n\
2114and has at least one multiletter word, convert NEWTEXT to all caps.\n\
2115If the replaced text has at least one word starting with a capital letter,\n\
2116then capitalize each word in NEWTEXT.\n\n\
ca1d1d23
JB
2117If third arg LITERAL is non-nil, insert NEWTEXT literally.\n\
2118Otherwise treat `\\' as special:\n\
2119 `\\&' in NEWTEXT means substitute original matched text.\n\
2120 `\\N' means substitute what matched the Nth `\\(...\\)'.\n\
2121 If Nth parens didn't match, substitute nothing.\n\
2122 `\\\\' means insert one `\\'.\n\
1113d9db 2123FIXEDCASE and LITERAL are optional arguments.\n\
080c45fd
RS
2124Leaves point at end of replacement text.\n\
2125\n\
2126The optional fourth argument STRING can be a string to modify.\n\
2127In that case, this function creates and returns a new string\n\
d7a5ad5f
RS
2128which is made by replacing the part of STRING that was matched.\n\
2129\n\
2130The optional fifth argument SUBEXP specifies a subexpression of the match.\n\
2131It says to replace just that subexpression instead of the whole match.\n\
2132This is useful only after a regular expression search or match\n\
2133since only regular expressions have distinguished subexpressions.")
2134 (newtext, fixedcase, literal, string, subexp)
2135 Lisp_Object newtext, fixedcase, literal, string, subexp;
ca1d1d23
JB
2136{
2137 enum { nochange, all_caps, cap_initial } case_action;
2138 register int pos, last;
2139 int some_multiletter_word;
97832bd0 2140 int some_lowercase;
73dc8771 2141 int some_uppercase;
208767c3 2142 int some_nonuppercase_initial;
ca1d1d23
JB
2143 register int c, prevc;
2144 int inslen;
d7a5ad5f 2145 int sub;
3e18eecf 2146 int opoint, newpoint;
ca1d1d23 2147
16fdc568 2148 CHECK_STRING (newtext, 0);
ca1d1d23 2149
080c45fd
RS
2150 if (! NILP (string))
2151 CHECK_STRING (string, 4);
2152
ca1d1d23
JB
2153 case_action = nochange; /* We tried an initialization */
2154 /* but some C compilers blew it */
4746118a
JB
2155
2156 if (search_regs.num_regs <= 0)
2157 error ("replace-match called before any match found");
2158
d7a5ad5f
RS
2159 if (NILP (subexp))
2160 sub = 0;
2161 else
2162 {
2163 CHECK_NUMBER (subexp, 3);
2164 sub = XINT (subexp);
2165 if (sub < 0 || sub >= search_regs.num_regs)
2166 args_out_of_range (subexp, make_number (search_regs.num_regs));
2167 }
2168
080c45fd
RS
2169 if (NILP (string))
2170 {
d7a5ad5f
RS
2171 if (search_regs.start[sub] < BEGV
2172 || search_regs.start[sub] > search_regs.end[sub]
2173 || search_regs.end[sub] > ZV)
2174 args_out_of_range (make_number (search_regs.start[sub]),
2175 make_number (search_regs.end[sub]));
080c45fd
RS
2176 }
2177 else
2178 {
d7a5ad5f
RS
2179 if (search_regs.start[sub] < 0
2180 || search_regs.start[sub] > search_regs.end[sub]
2181 || search_regs.end[sub] > XSTRING (string)->size)
2182 args_out_of_range (make_number (search_regs.start[sub]),
2183 make_number (search_regs.end[sub]));
080c45fd 2184 }
ca1d1d23
JB
2185
2186 if (NILP (fixedcase))
2187 {
fa8ed3e0 2188 int beg;
ca1d1d23
JB
2189 /* Decide how to casify by examining the matched text. */
2190
fa8ed3e0
RS
2191 if (NILP (string))
2192 last = CHAR_TO_BYTE (search_regs.end[sub]);
2193 else
2194 last = search_regs.end[sub];
2195
2196 if (NILP (string))
2197 beg = CHAR_TO_BYTE (search_regs.start[sub]);
2198 else
2199 beg = search_regs.start[sub];
2200
ca1d1d23
JB
2201 prevc = '\n';
2202 case_action = all_caps;
2203
2204 /* some_multiletter_word is set nonzero if any original word
2205 is more than one letter long. */
2206 some_multiletter_word = 0;
97832bd0 2207 some_lowercase = 0;
208767c3 2208 some_nonuppercase_initial = 0;
73dc8771 2209 some_uppercase = 0;
ca1d1d23 2210
fa8ed3e0 2211 for (pos = beg; pos < last; pos++)
ca1d1d23 2212 {
080c45fd 2213 if (NILP (string))
5679531d 2214 c = FETCH_BYTE (pos);
080c45fd
RS
2215 else
2216 c = XSTRING (string)->data[pos];
2217
ca1d1d23
JB
2218 if (LOWERCASEP (c))
2219 {
2220 /* Cannot be all caps if any original char is lower case */
2221
97832bd0 2222 some_lowercase = 1;
ca1d1d23 2223 if (SYNTAX (prevc) != Sword)
208767c3 2224 some_nonuppercase_initial = 1;
ca1d1d23
JB
2225 else
2226 some_multiletter_word = 1;
2227 }
2228 else if (!NOCASEP (c))
2229 {
73dc8771 2230 some_uppercase = 1;
97832bd0 2231 if (SYNTAX (prevc) != Sword)
c4d460ce 2232 ;
97832bd0 2233 else
ca1d1d23
JB
2234 some_multiletter_word = 1;
2235 }
208767c3
RS
2236 else
2237 {
2238 /* If the initial is a caseless word constituent,
2239 treat that like a lowercase initial. */
2240 if (SYNTAX (prevc) != Sword)
2241 some_nonuppercase_initial = 1;
2242 }
ca1d1d23
JB
2243
2244 prevc = c;
2245 }
2246
97832bd0
RS
2247 /* Convert to all caps if the old text is all caps
2248 and has at least one multiletter word. */
2249 if (! some_lowercase && some_multiletter_word)
2250 case_action = all_caps;
c4d460ce 2251 /* Capitalize each word, if the old text has all capitalized words. */
208767c3 2252 else if (!some_nonuppercase_initial && some_multiletter_word)
ca1d1d23 2253 case_action = cap_initial;
208767c3 2254 else if (!some_nonuppercase_initial && some_uppercase)
73dc8771
KH
2255 /* Should x -> yz, operating on X, give Yz or YZ?
2256 We'll assume the latter. */
2257 case_action = all_caps;
97832bd0
RS
2258 else
2259 case_action = nochange;
ca1d1d23
JB
2260 }
2261
080c45fd
RS
2262 /* Do replacement in a string. */
2263 if (!NILP (string))
2264 {
2265 Lisp_Object before, after;
2266
2267 before = Fsubstring (string, make_number (0),
d7a5ad5f
RS
2268 make_number (search_regs.start[sub]));
2269 after = Fsubstring (string, make_number (search_regs.end[sub]), Qnil);
080c45fd 2270
636a5e28
RS
2271 /* Substitute parts of the match into NEWTEXT
2272 if desired. */
080c45fd
RS
2273 if (NILP (literal))
2274 {
2275 int lastpos = -1;
0c8533c6 2276 int lastpos_byte = -1;
080c45fd
RS
2277 /* We build up the substituted string in ACCUM. */
2278 Lisp_Object accum;
2279 Lisp_Object middle;
0c8533c6 2280 int pos_byte;
080c45fd
RS
2281
2282 accum = Qnil;
2283
fc932ac6 2284 for (pos_byte = 0, pos = 0; pos_byte < STRING_BYTES (XSTRING (newtext));)
080c45fd
RS
2285 {
2286 int substart = -1;
2287 int subend;
1e79ec24 2288 int delbackslash = 0;
080c45fd 2289
0c8533c6
RS
2290 FETCH_STRING_CHAR_ADVANCE (c, newtext, pos, pos_byte);
2291
080c45fd
RS
2292 if (c == '\\')
2293 {
0c8533c6 2294 FETCH_STRING_CHAR_ADVANCE (c, newtext, pos, pos_byte);
080c45fd
RS
2295 if (c == '&')
2296 {
d7a5ad5f
RS
2297 substart = search_regs.start[sub];
2298 subend = search_regs.end[sub];
080c45fd
RS
2299 }
2300 else if (c >= '1' && c <= '9' && c <= search_regs.num_regs + '0')
2301 {
ad10348f 2302 if (search_regs.start[c - '0'] >= 0)
080c45fd
RS
2303 {
2304 substart = search_regs.start[c - '0'];
2305 subend = search_regs.end[c - '0'];
2306 }
2307 }
1e79ec24
KH
2308 else if (c == '\\')
2309 delbackslash = 1;
636a5e28
RS
2310 else
2311 error ("Invalid use of `\\' in replacement text");
080c45fd
RS
2312 }
2313 if (substart >= 0)
2314 {
2315 if (pos - 1 != lastpos + 1)
0c8533c6
RS
2316 middle = substring_both (newtext, lastpos + 1,
2317 lastpos_byte + 1,
2318 pos - 1, pos_byte - 1);
080c45fd
RS
2319 else
2320 middle = Qnil;
2321 accum = concat3 (accum, middle,
0c8533c6
RS
2322 Fsubstring (string,
2323 make_number (substart),
080c45fd
RS
2324 make_number (subend)));
2325 lastpos = pos;
0c8533c6 2326 lastpos_byte = pos_byte;
080c45fd 2327 }
1e79ec24
KH
2328 else if (delbackslash)
2329 {
0c8533c6
RS
2330 middle = substring_both (newtext, lastpos + 1,
2331 lastpos_byte + 1,
2332 pos, pos_byte);
2333
1e79ec24
KH
2334 accum = concat2 (accum, middle);
2335 lastpos = pos;
0c8533c6 2336 lastpos_byte = pos_byte;
1e79ec24 2337 }
080c45fd
RS
2338 }
2339
2340 if (pos != lastpos + 1)
0c8533c6
RS
2341 middle = substring_both (newtext, lastpos + 1,
2342 lastpos_byte + 1,
2343 pos, pos_byte);
080c45fd
RS
2344 else
2345 middle = Qnil;
2346
2347 newtext = concat2 (accum, middle);
2348 }
2349
636a5e28 2350 /* Do case substitution in NEWTEXT if desired. */
080c45fd
RS
2351 if (case_action == all_caps)
2352 newtext = Fupcase (newtext);
2353 else if (case_action == cap_initial)
2b2eead9 2354 newtext = Fupcase_initials (newtext);
080c45fd
RS
2355
2356 return concat3 (before, newtext, after);
2357 }
2358
b0eba991
RS
2359 /* Record point, the move (quietly) to the start of the match. */
2360 if (PT > search_regs.start[sub])
2361 opoint = PT - ZV;
2362 else
2363 opoint = PT;
2364
fa8ed3e0 2365 TEMP_SET_PT (search_regs.start[sub]);
b0eba991 2366
9a76659d
JB
2367 /* We insert the replacement text before the old text, and then
2368 delete the original text. This means that markers at the
2369 beginning or end of the original will float to the corresponding
2370 position in the replacement. */
ca1d1d23 2371 if (!NILP (literal))
16fdc568 2372 Finsert_and_inherit (1, &newtext);
ca1d1d23
JB
2373 else
2374 {
2375 struct gcpro gcpro1;
16fdc568 2376 GCPRO1 (newtext);
ca1d1d23 2377
16fdc568 2378 for (pos = 0; pos < XSTRING (newtext)->size; pos++)
ca1d1d23 2379 {
6ec8bbd2 2380 int offset = PT - search_regs.start[sub];
9a76659d 2381
16fdc568 2382 c = XSTRING (newtext)->data[pos];
ca1d1d23
JB
2383 if (c == '\\')
2384 {
16fdc568 2385 c = XSTRING (newtext)->data[++pos];
ca1d1d23 2386 if (c == '&')
9a76659d
JB
2387 Finsert_buffer_substring
2388 (Fcurrent_buffer (),
d7a5ad5f
RS
2389 make_number (search_regs.start[sub] + offset),
2390 make_number (search_regs.end[sub] + offset));
78445046 2391 else if (c >= '1' && c <= '9' && c <= search_regs.num_regs + '0')
ca1d1d23
JB
2392 {
2393 if (search_regs.start[c - '0'] >= 1)
9a76659d
JB
2394 Finsert_buffer_substring
2395 (Fcurrent_buffer (),
2396 make_number (search_regs.start[c - '0'] + offset),
2397 make_number (search_regs.end[c - '0'] + offset));
ca1d1d23 2398 }
636a5e28 2399 else if (c == '\\')
ca1d1d23 2400 insert_char (c);
636a5e28
RS
2401 else
2402 error ("Invalid use of `\\' in replacement text");
ca1d1d23
JB
2403 }
2404 else
2405 insert_char (c);
2406 }
2407 UNGCPRO;
2408 }
2409
6ec8bbd2 2410 inslen = PT - (search_regs.start[sub]);
d7a5ad5f 2411 del_range (search_regs.start[sub] + inslen, search_regs.end[sub] + inslen);
ca1d1d23
JB
2412
2413 if (case_action == all_caps)
6ec8bbd2 2414 Fupcase_region (make_number (PT - inslen), make_number (PT));
ca1d1d23 2415 else if (case_action == cap_initial)
6ec8bbd2 2416 Fupcase_initials_region (make_number (PT - inslen), make_number (PT));
b0eba991 2417
3e18eecf
RS
2418 newpoint = PT;
2419
b0eba991 2420 /* Put point back where it was in the text. */
8d808a65 2421 if (opoint <= 0)
fa8ed3e0 2422 TEMP_SET_PT (opoint + ZV);
b0eba991 2423 else
fa8ed3e0 2424 TEMP_SET_PT (opoint);
b0eba991
RS
2425
2426 /* Now move point "officially" to the start of the inserted replacement. */
3e18eecf 2427 move_if_not_intangible (newpoint);
b0eba991 2428
ca1d1d23
JB
2429 return Qnil;
2430}
2431\f
2432static Lisp_Object
2433match_limit (num, beginningp)
2434 Lisp_Object num;
2435 int beginningp;
2436{
2437 register int n;
2438
2439 CHECK_NUMBER (num, 0);
2440 n = XINT (num);
4746118a
JB
2441 if (n < 0 || n >= search_regs.num_regs)
2442 args_out_of_range (num, make_number (search_regs.num_regs));
2443 if (search_regs.num_regs <= 0
2444 || search_regs.start[n] < 0)
ca1d1d23
JB
2445 return Qnil;
2446 return (make_number ((beginningp) ? search_regs.start[n]
2447 : search_regs.end[n]));
2448}
2449
2450DEFUN ("match-beginning", Fmatch_beginning, Smatch_beginning, 1, 1, 0,
2451 "Return position of start of text matched by last search.\n\
5806161b
EN
2452SUBEXP, a number, specifies which parenthesized expression in the last\n\
2453 regexp.\n\
2454Value is nil if SUBEXPth pair didn't match, or there were less than\n\
2455 SUBEXP pairs.\n\
ca1d1d23 2456Zero means the entire text matched by the whole regexp or whole string.")
5806161b
EN
2457 (subexp)
2458 Lisp_Object subexp;
ca1d1d23 2459{
5806161b 2460 return match_limit (subexp, 1);
ca1d1d23
JB
2461}
2462
2463DEFUN ("match-end", Fmatch_end, Smatch_end, 1, 1, 0,
2464 "Return position of end of text matched by last search.\n\
5806161b
EN
2465SUBEXP, a number, specifies which parenthesized expression in the last\n\
2466 regexp.\n\
2467Value is nil if SUBEXPth pair didn't match, or there were less than\n\
2468 SUBEXP pairs.\n\
ca1d1d23 2469Zero means the entire text matched by the whole regexp or whole string.")
5806161b
EN
2470 (subexp)
2471 Lisp_Object subexp;
ca1d1d23 2472{
5806161b 2473 return match_limit (subexp, 0);
ca1d1d23
JB
2474}
2475
56256c2a 2476DEFUN ("match-data", Fmatch_data, Smatch_data, 0, 2, 0,
ca1d1d23
JB
2477 "Return a list containing all info on what the last search matched.\n\
2478Element 2N is `(match-beginning N)'; element 2N + 1 is `(match-end N)'.\n\
2479All the elements are markers or nil (nil if the Nth pair didn't match)\n\
2480if the last match was on a buffer; integers or nil if a string was matched.\n\
56256c2a
RS
2481Use `store-match-data' to reinstate the data in this list.\n\
2482\n\
2483If INTEGERS (the optional first argument) is non-nil, always use integers\n\
8ca821e9 2484\(rather than markers) to represent buffer positions.\n\
56256c2a
RS
2485If REUSE is a list, reuse it as part of the value. If REUSE is long enough\n\
2486to hold all the values, and if INTEGERS is non-nil, no consing is done.")
2487 (integers, reuse)
2488 Lisp_Object integers, reuse;
ca1d1d23 2489{
56256c2a 2490 Lisp_Object tail, prev;
4746118a 2491 Lisp_Object *data;
ca1d1d23
JB
2492 int i, len;
2493
daa37602 2494 if (NILP (last_thing_searched))
c36bcf1b 2495 return Qnil;
daa37602 2496
4746118a
JB
2497 data = (Lisp_Object *) alloca ((2 * search_regs.num_regs)
2498 * sizeof (Lisp_Object));
2499
ca1d1d23 2500 len = -1;
4746118a 2501 for (i = 0; i < search_regs.num_regs; i++)
ca1d1d23
JB
2502 {
2503 int start = search_regs.start[i];
2504 if (start >= 0)
2505 {
56256c2a
RS
2506 if (EQ (last_thing_searched, Qt)
2507 || ! NILP (integers))
ca1d1d23 2508 {
c235cce7
KH
2509 XSETFASTINT (data[2 * i], start);
2510 XSETFASTINT (data[2 * i + 1], search_regs.end[i]);
ca1d1d23 2511 }
0ed62dc7 2512 else if (BUFFERP (last_thing_searched))
ca1d1d23
JB
2513 {
2514 data[2 * i] = Fmake_marker ();
daa37602
JB
2515 Fset_marker (data[2 * i],
2516 make_number (start),
2517 last_thing_searched);
ca1d1d23
JB
2518 data[2 * i + 1] = Fmake_marker ();
2519 Fset_marker (data[2 * i + 1],
daa37602
JB
2520 make_number (search_regs.end[i]),
2521 last_thing_searched);
ca1d1d23 2522 }
daa37602
JB
2523 else
2524 /* last_thing_searched must always be Qt, a buffer, or Qnil. */
2525 abort ();
2526
ca1d1d23
JB
2527 len = i;
2528 }
2529 else
2530 data[2 * i] = data [2 * i + 1] = Qnil;
2531 }
56256c2a
RS
2532
2533 /* If REUSE is not usable, cons up the values and return them. */
2534 if (! CONSP (reuse))
2535 return Flist (2 * len + 2, data);
2536
2537 /* If REUSE is a list, store as many value elements as will fit
2538 into the elements of REUSE. */
2539 for (i = 0, tail = reuse; CONSP (tail);
2540 i++, tail = XCONS (tail)->cdr)
2541 {
2542 if (i < 2 * len + 2)
2543 XCONS (tail)->car = data[i];
2544 else
2545 XCONS (tail)->car = Qnil;
2546 prev = tail;
2547 }
2548
2549 /* If we couldn't fit all value elements into REUSE,
2550 cons up the rest of them and add them to the end of REUSE. */
2551 if (i < 2 * len + 2)
2552 XCONS (prev)->cdr = Flist (2 * len + 2 - i, data + i);
2553
2554 return reuse;
ca1d1d23
JB
2555}
2556
2557
3f1c005b 2558DEFUN ("set-match-data", Fset_match_data, Sset_match_data, 1, 1, 0,
ca1d1d23
JB
2559 "Set internal data on last search match from elements of LIST.\n\
2560LIST should have been created by calling `match-data' previously.")
2561 (list)
2562 register Lisp_Object list;
2563{
2564 register int i;
2565 register Lisp_Object marker;
2566
7074fde6
FP
2567 if (running_asynch_code)
2568 save_search_regs ();
2569
ca1d1d23 2570 if (!CONSP (list) && !NILP (list))
b37902c8 2571 list = wrong_type_argument (Qconsp, list);
ca1d1d23 2572
daa37602
JB
2573 /* Unless we find a marker with a buffer in LIST, assume that this
2574 match data came from a string. */
2575 last_thing_searched = Qt;
2576
4746118a
JB
2577 /* Allocate registers if they don't already exist. */
2578 {
d084e942 2579 int length = XFASTINT (Flength (list)) / 2;
4746118a
JB
2580
2581 if (length > search_regs.num_regs)
2582 {
1113d9db
JB
2583 if (search_regs.num_regs == 0)
2584 {
2585 search_regs.start
2586 = (regoff_t *) xmalloc (length * sizeof (regoff_t));
2587 search_regs.end
2588 = (regoff_t *) xmalloc (length * sizeof (regoff_t));
2589 }
4746118a 2590 else
1113d9db
JB
2591 {
2592 search_regs.start
2593 = (regoff_t *) xrealloc (search_regs.start,
2594 length * sizeof (regoff_t));
2595 search_regs.end
2596 = (regoff_t *) xrealloc (search_regs.end,
2597 length * sizeof (regoff_t));
2598 }
4746118a 2599
487282dc 2600 search_regs.num_regs = length;
4746118a
JB
2601 }
2602 }
2603
2604 for (i = 0; i < search_regs.num_regs; i++)
ca1d1d23
JB
2605 {
2606 marker = Fcar (list);
2607 if (NILP (marker))
2608 {
2609 search_regs.start[i] = -1;
2610 list = Fcdr (list);
2611 }
2612 else
2613 {
0ed62dc7 2614 if (MARKERP (marker))
daa37602
JB
2615 {
2616 if (XMARKER (marker)->buffer == 0)
c235cce7 2617 XSETFASTINT (marker, 0);
daa37602 2618 else
a3668d92 2619 XSETBUFFER (last_thing_searched, XMARKER (marker)->buffer);
daa37602 2620 }
ca1d1d23
JB
2621
2622 CHECK_NUMBER_COERCE_MARKER (marker, 0);
2623 search_regs.start[i] = XINT (marker);
2624 list = Fcdr (list);
2625
2626 marker = Fcar (list);
0ed62dc7 2627 if (MARKERP (marker) && XMARKER (marker)->buffer == 0)
c235cce7 2628 XSETFASTINT (marker, 0);
ca1d1d23
JB
2629
2630 CHECK_NUMBER_COERCE_MARKER (marker, 0);
2631 search_regs.end[i] = XINT (marker);
2632 }
2633 list = Fcdr (list);
2634 }
2635
2636 return Qnil;
2637}
2638
7074fde6
FP
2639/* If non-zero the match data have been saved in saved_search_regs
2640 during the execution of a sentinel or filter. */
75ebf74b 2641static int search_regs_saved;
7074fde6
FP
2642static struct re_registers saved_search_regs;
2643
2644/* Called from Flooking_at, Fstring_match, search_buffer, Fstore_match_data
2645 if asynchronous code (filter or sentinel) is running. */
2646static void
2647save_search_regs ()
2648{
2649 if (!search_regs_saved)
2650 {
2651 saved_search_regs.num_regs = search_regs.num_regs;
2652 saved_search_regs.start = search_regs.start;
2653 saved_search_regs.end = search_regs.end;
2654 search_regs.num_regs = 0;
2d4a771a
RS
2655 search_regs.start = 0;
2656 search_regs.end = 0;
7074fde6
FP
2657
2658 search_regs_saved = 1;
2659 }
2660}
2661
2662/* Called upon exit from filters and sentinels. */
2663void
2664restore_match_data ()
2665{
2666 if (search_regs_saved)
2667 {
2668 if (search_regs.num_regs > 0)
2669 {
2670 xfree (search_regs.start);
2671 xfree (search_regs.end);
2672 }
2673 search_regs.num_regs = saved_search_regs.num_regs;
2674 search_regs.start = saved_search_regs.start;
2675 search_regs.end = saved_search_regs.end;
2676
2677 search_regs_saved = 0;
2678 }
2679}
2680
ca1d1d23
JB
2681/* Quote a string to inactivate reg-expr chars */
2682
2683DEFUN ("regexp-quote", Fregexp_quote, Sregexp_quote, 1, 1, 0,
2684 "Return a regexp string which matches exactly STRING and nothing else.")
5806161b
EN
2685 (string)
2686 Lisp_Object string;
ca1d1d23
JB
2687{
2688 register unsigned char *in, *out, *end;
2689 register unsigned char *temp;
0c8533c6 2690 int backslashes_added = 0;
ca1d1d23 2691
5806161b 2692 CHECK_STRING (string, 0);
ca1d1d23 2693
fc932ac6 2694 temp = (unsigned char *) alloca (STRING_BYTES (XSTRING (string)) * 2);
ca1d1d23
JB
2695
2696 /* Now copy the data into the new string, inserting escapes. */
2697
5806161b 2698 in = XSTRING (string)->data;
fc932ac6 2699 end = in + STRING_BYTES (XSTRING (string));
ca1d1d23
JB
2700 out = temp;
2701
2702 for (; in != end; in++)
2703 {
2704 if (*in == '[' || *in == ']'
2705 || *in == '*' || *in == '.' || *in == '\\'
2706 || *in == '?' || *in == '+'
2707 || *in == '^' || *in == '$')
0c8533c6 2708 *out++ = '\\', backslashes_added++;
ca1d1d23
JB
2709 *out++ = *in;
2710 }
2711
3f8100f1 2712 return make_specified_string (temp,
0c8533c6 2713 XSTRING (string)->size + backslashes_added,
3f8100f1
RS
2714 out - temp,
2715 STRING_MULTIBYTE (string));
ca1d1d23
JB
2716}
2717\f
2718syms_of_search ()
2719{
2720 register int i;
2721
487282dc
KH
2722 for (i = 0; i < REGEXP_CACHE_SIZE; ++i)
2723 {
2724 searchbufs[i].buf.allocated = 100;
2725 searchbufs[i].buf.buffer = (unsigned char *) malloc (100);
2726 searchbufs[i].buf.fastmap = searchbufs[i].fastmap;
2727 searchbufs[i].regexp = Qnil;
2728 staticpro (&searchbufs[i].regexp);
2729 searchbufs[i].next = (i == REGEXP_CACHE_SIZE-1 ? 0 : &searchbufs[i+1]);
2730 }
2731 searchbuf_head = &searchbufs[0];
ca1d1d23
JB
2732
2733 Qsearch_failed = intern ("search-failed");
2734 staticpro (&Qsearch_failed);
2735 Qinvalid_regexp = intern ("invalid-regexp");
2736 staticpro (&Qinvalid_regexp);
2737
2738 Fput (Qsearch_failed, Qerror_conditions,
2739 Fcons (Qsearch_failed, Fcons (Qerror, Qnil)));
2740 Fput (Qsearch_failed, Qerror_message,
2741 build_string ("Search failed"));
2742
2743 Fput (Qinvalid_regexp, Qerror_conditions,
2744 Fcons (Qinvalid_regexp, Fcons (Qerror, Qnil)));
2745 Fput (Qinvalid_regexp, Qerror_message,
2746 build_string ("Invalid regexp"));
2747
daa37602
JB
2748 last_thing_searched = Qnil;
2749 staticpro (&last_thing_searched);
2750
ca1d1d23 2751 defsubr (&Slooking_at);
b819a390
RS
2752 defsubr (&Sposix_looking_at);
2753 defsubr (&Sstring_match);
2754 defsubr (&Sposix_string_match);
ca1d1d23
JB
2755 defsubr (&Ssearch_forward);
2756 defsubr (&Ssearch_backward);
2757 defsubr (&Sword_search_forward);
2758 defsubr (&Sword_search_backward);
2759 defsubr (&Sre_search_forward);
2760 defsubr (&Sre_search_backward);
b819a390
RS
2761 defsubr (&Sposix_search_forward);
2762 defsubr (&Sposix_search_backward);
ca1d1d23
JB
2763 defsubr (&Sreplace_match);
2764 defsubr (&Smatch_beginning);
2765 defsubr (&Smatch_end);
2766 defsubr (&Smatch_data);
3f1c005b 2767 defsubr (&Sset_match_data);
ca1d1d23
JB
2768 defsubr (&Sregexp_quote);
2769}