Fix -Wimplicit warnings.
[bpt/emacs.git] / src / search.c
CommitLineData
ca1d1d23 1/* String search routines for GNU Emacs.
31c8f881 2 Copyright (C) 1985, 86, 87, 93, 94, 97, 1998 Free Software Foundation, Inc.
ca1d1d23
JB
3
4This file is part of GNU Emacs.
5
6GNU Emacs is free software; you can redistribute it and/or modify
7it under the terms of the GNU General Public License as published by
7c938215 8the Free Software Foundation; either version 2, or (at your option)
ca1d1d23
JB
9any later version.
10
11GNU Emacs is distributed in the hope that it will be useful,
12but WITHOUT ANY WARRANTY; without even the implied warranty of
13MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14GNU General Public License for more details.
15
16You should have received a copy of the GNU General Public License
17along with GNU Emacs; see the file COPYING. If not, write to
3b7ad313
EN
18the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
19Boston, MA 02111-1307, USA. */
ca1d1d23
JB
20
21
18160b98 22#include <config.h>
dfcf069d
AS
23#ifdef STDC_HEADERS
24#include <stdlib.h>
25#endif
ca1d1d23
JB
26#include "lisp.h"
27#include "syntax.h"
5679531d 28#include "category.h"
ca1d1d23 29#include "buffer.h"
5679531d 30#include "charset.h"
9169c321 31#include "region-cache.h"
ca1d1d23 32#include "commands.h"
9ac0d9e0 33#include "blockinput.h"
bf1760bb 34#include "intervals.h"
4746118a 35
ca1d1d23
JB
36#include <sys/types.h>
37#include "regex.h"
38
67ce527d
KH
39#define min(a, b) ((a) < (b) ? (a) : (b))
40#define max(a, b) ((a) > (b) ? (a) : (b))
41
1d288aef 42#define REGEXP_CACHE_SIZE 20
ca1d1d23 43
487282dc
KH
44/* If the regexp is non-nil, then the buffer contains the compiled form
45 of that regexp, suitable for searching. */
1d288aef
RS
46struct regexp_cache
47{
487282dc
KH
48 struct regexp_cache *next;
49 Lisp_Object regexp;
50 struct re_pattern_buffer buf;
51 char fastmap[0400];
b819a390
RS
52 /* Nonzero means regexp was compiled to do full POSIX backtracking. */
53 char posix;
487282dc 54};
ca1d1d23 55
487282dc
KH
56/* The instances of that struct. */
57struct regexp_cache searchbufs[REGEXP_CACHE_SIZE];
ca1d1d23 58
487282dc
KH
59/* The head of the linked list; points to the most recently used buffer. */
60struct regexp_cache *searchbuf_head;
ca1d1d23 61
ca1d1d23 62
4746118a
JB
63/* Every call to re_match, etc., must pass &search_regs as the regs
64 argument unless you can show it is unnecessary (i.e., if re_match
65 is certainly going to be called again before region-around-match
66 can be called).
67
68 Since the registers are now dynamically allocated, we need to make
69 sure not to refer to the Nth register before checking that it has
1113d9db
JB
70 been allocated by checking search_regs.num_regs.
71
72 The regex code keeps track of whether it has allocated the search
487282dc
KH
73 buffer using bits in the re_pattern_buffer. This means that whenever
74 you compile a new pattern, it completely forgets whether it has
1113d9db
JB
75 allocated any registers, and will allocate new registers the next
76 time you call a searching or matching function. Therefore, we need
77 to call re_set_registers after compiling a new pattern or after
78 setting the match registers, so that the regex functions will be
79 able to free or re-allocate it properly. */
ca1d1d23
JB
80static struct re_registers search_regs;
81
daa37602
JB
82/* The buffer in which the last search was performed, or
83 Qt if the last search was done in a string;
84 Qnil if no searching has been done yet. */
85static Lisp_Object last_thing_searched;
ca1d1d23 86
8e6208c5 87/* error condition signaled when regexp compile_pattern fails */
ca1d1d23
JB
88
89Lisp_Object Qinvalid_regexp;
90
ca325161 91static void set_search_regs ();
044f81f1 92static void save_search_regs ();
facdc750
RS
93static int simple_search ();
94static int boyer_moore ();
b819a390
RS
95static int search_buffer ();
96
ca1d1d23
JB
97static void
98matcher_overflow ()
99{
100 error ("Stack overflow in regexp matcher");
101}
102
103#ifdef __STDC__
104#define CONST const
105#else
106#define CONST
107#endif
108
b819a390
RS
109/* Compile a regexp and signal a Lisp error if anything goes wrong.
110 PATTERN is the pattern to compile.
111 CP is the place to put the result.
facdc750 112 TRANSLATE is a translation table for ignoring case, or nil for none.
b819a390
RS
113 REGP is the structure that says where to store the "register"
114 values that will result from matching this pattern.
115 If it is 0, we should compile the pattern not to record any
116 subexpression bounds.
117 POSIX is nonzero if we want full backtracking (POSIX style)
5679531d
KH
118 for this pattern. 0 means backtrack only enough to get a valid match.
119 MULTIBYTE is nonzero if we want to handle multibyte characters in
120 PATTERN. 0 means all multibyte characters are recognized just as
121 sequences of binary data. */
ca1d1d23 122
487282dc 123static void
5679531d 124compile_pattern_1 (cp, pattern, translate, regp, posix, multibyte)
487282dc 125 struct regexp_cache *cp;
ca1d1d23 126 Lisp_Object pattern;
facdc750 127 Lisp_Object translate;
487282dc 128 struct re_registers *regp;
b819a390 129 int posix;
5679531d 130 int multibyte;
ca1d1d23 131{
f8bd51c4
KH
132 char *raw_pattern;
133 int raw_pattern_size;
d451e4db 134 char *val;
b819a390 135 reg_syntax_t old;
ca1d1d23 136
f8bd51c4
KH
137 /* MULTIBYTE says whether the text to be searched is multibyte.
138 We must convert PATTERN to match that, or we will not really
139 find things right. */
140
141 if (multibyte == STRING_MULTIBYTE (pattern))
142 {
143 raw_pattern = (char *) XSTRING (pattern)->data;
fc932ac6 144 raw_pattern_size = STRING_BYTES (XSTRING (pattern));
f8bd51c4
KH
145 }
146 else if (multibyte)
147 {
148 raw_pattern_size = count_size_as_multibyte (XSTRING (pattern)->data,
149 XSTRING (pattern)->size);
150 raw_pattern = (char *) alloca (raw_pattern_size + 1);
151 copy_text (XSTRING (pattern)->data, raw_pattern,
152 XSTRING (pattern)->size, 0, 1);
153 }
154 else
155 {
156 /* Converting multibyte to single-byte.
157
158 ??? Perhaps this conversion should be done in a special way
159 by subtracting nonascii-insert-offset from each non-ASCII char,
160 so that only the multibyte chars which really correspond to
161 the chosen single-byte character set can possibly match. */
162 raw_pattern_size = XSTRING (pattern)->size;
163 raw_pattern = (char *) alloca (raw_pattern_size + 1);
164 copy_text (XSTRING (pattern)->data, raw_pattern,
fc932ac6 165 STRING_BYTES (XSTRING (pattern)), 1, 0);
f8bd51c4
KH
166 }
167
487282dc 168 cp->regexp = Qnil;
facdc750 169 cp->buf.translate = (! NILP (translate) ? translate : 0);
b819a390 170 cp->posix = posix;
5679531d 171 cp->buf.multibyte = multibyte;
9ac0d9e0 172 BLOCK_INPUT;
b819a390
RS
173 old = re_set_syntax (RE_SYNTAX_EMACS
174 | (posix ? 0 : RE_NO_POSIX_BACKTRACKING));
f8bd51c4 175 val = (char *) re_compile_pattern (raw_pattern, raw_pattern_size, &cp->buf);
b819a390 176 re_set_syntax (old);
9ac0d9e0 177 UNBLOCK_INPUT;
ca1d1d23 178 if (val)
487282dc 179 Fsignal (Qinvalid_regexp, Fcons (build_string (val), Qnil));
1113d9db 180
487282dc 181 cp->regexp = Fcopy_sequence (pattern);
487282dc
KH
182}
183
184/* Compile a regexp if necessary, but first check to see if there's one in
b819a390
RS
185 the cache.
186 PATTERN is the pattern to compile.
facdc750 187 TRANSLATE is a translation table for ignoring case, or nil for none.
b819a390
RS
188 REGP is the structure that says where to store the "register"
189 values that will result from matching this pattern.
190 If it is 0, we should compile the pattern not to record any
191 subexpression bounds.
192 POSIX is nonzero if we want full backtracking (POSIX style)
193 for this pattern. 0 means backtrack only enough to get a valid match. */
487282dc
KH
194
195struct re_pattern_buffer *
0c8533c6 196compile_pattern (pattern, regp, translate, posix, multibyte)
487282dc
KH
197 Lisp_Object pattern;
198 struct re_registers *regp;
facdc750 199 Lisp_Object translate;
0c8533c6 200 int posix, multibyte;
487282dc
KH
201{
202 struct regexp_cache *cp, **cpp;
203
204 for (cpp = &searchbuf_head; ; cpp = &cp->next)
205 {
206 cp = *cpp;
1d288aef
RS
207 if (XSTRING (cp->regexp)->size == XSTRING (pattern)->size
208 && !NILP (Fstring_equal (cp->regexp, pattern))
facdc750 209 && cp->buf.translate == (! NILP (translate) ? translate : 0)
5679531d
KH
210 && cp->posix == posix
211 && cp->buf.multibyte == multibyte)
487282dc
KH
212 break;
213
214 /* If we're at the end of the cache, compile into the last cell. */
215 if (cp->next == 0)
216 {
5679531d 217 compile_pattern_1 (cp, pattern, translate, regp, posix, multibyte);
487282dc
KH
218 break;
219 }
220 }
221
222 /* When we get here, cp (aka *cpp) contains the compiled pattern,
223 either because we found it in the cache or because we just compiled it.
224 Move it to the front of the queue to mark it as most recently used. */
225 *cpp = cp->next;
226 cp->next = searchbuf_head;
227 searchbuf_head = cp;
1113d9db 228
6639708c
RS
229 /* Advise the searching functions about the space we have allocated
230 for register data. */
231 if (regp)
232 re_set_registers (&cp->buf, regp, regp->num_regs, regp->start, regp->end);
233
487282dc 234 return &cp->buf;
ca1d1d23
JB
235}
236
237/* Error condition used for failing searches */
238Lisp_Object Qsearch_failed;
239
240Lisp_Object
241signal_failure (arg)
242 Lisp_Object arg;
243{
244 Fsignal (Qsearch_failed, Fcons (arg, Qnil));
245 return Qnil;
246}
247\f
b819a390
RS
248static Lisp_Object
249looking_at_1 (string, posix)
ca1d1d23 250 Lisp_Object string;
b819a390 251 int posix;
ca1d1d23
JB
252{
253 Lisp_Object val;
254 unsigned char *p1, *p2;
255 int s1, s2;
256 register int i;
487282dc 257 struct re_pattern_buffer *bufp;
ca1d1d23 258
7074fde6
FP
259 if (running_asynch_code)
260 save_search_regs ();
261
ca1d1d23 262 CHECK_STRING (string, 0);
487282dc
KH
263 bufp = compile_pattern (string, &search_regs,
264 (!NILP (current_buffer->case_fold_search)
facdc750 265 ? DOWNCASE_TABLE : Qnil),
0c8533c6
RS
266 posix,
267 !NILP (current_buffer->enable_multibyte_characters));
ca1d1d23
JB
268
269 immediate_quit = 1;
270 QUIT; /* Do a pending quit right away, to avoid paradoxical behavior */
271
272 /* Get pointers and sizes of the two strings
273 that make up the visible portion of the buffer. */
274
275 p1 = BEGV_ADDR;
fa8ed3e0 276 s1 = GPT_BYTE - BEGV_BYTE;
ca1d1d23 277 p2 = GAP_END_ADDR;
fa8ed3e0 278 s2 = ZV_BYTE - GPT_BYTE;
ca1d1d23
JB
279 if (s1 < 0)
280 {
281 p2 = p1;
fa8ed3e0 282 s2 = ZV_BYTE - BEGV_BYTE;
ca1d1d23
JB
283 s1 = 0;
284 }
285 if (s2 < 0)
286 {
fa8ed3e0 287 s1 = ZV_BYTE - BEGV_BYTE;
ca1d1d23
JB
288 s2 = 0;
289 }
8bb43c28
RS
290
291 re_match_object = Qnil;
ca1d1d23 292
487282dc 293 i = re_match_2 (bufp, (char *) p1, s1, (char *) p2, s2,
fa8ed3e0
RS
294 PT_BYTE - BEGV_BYTE, &search_regs,
295 ZV_BYTE - BEGV_BYTE);
ca1d1d23
JB
296 if (i == -2)
297 matcher_overflow ();
298
299 val = (0 <= i ? Qt : Qnil);
fa8ed3e0
RS
300 if (i >= 0)
301 for (i = 0; i < search_regs.num_regs; i++)
302 if (search_regs.start[i] >= 0)
303 {
304 search_regs.start[i]
305 = BYTE_TO_CHAR (search_regs.start[i] + BEGV_BYTE);
306 search_regs.end[i]
307 = BYTE_TO_CHAR (search_regs.end[i] + BEGV_BYTE);
308 }
a3668d92 309 XSETBUFFER (last_thing_searched, current_buffer);
ca1d1d23
JB
310 immediate_quit = 0;
311 return val;
312}
313
b819a390 314DEFUN ("looking-at", Flooking_at, Slooking_at, 1, 1, 0,
94f94972 315 "Return t if text after point matches regular expression REGEXP.\n\
b819a390
RS
316This function modifies the match data that `match-beginning',\n\
317`match-end' and `match-data' access; save and restore the match\n\
318data if you want to preserve them.")
94f94972
RS
319 (regexp)
320 Lisp_Object regexp;
b819a390 321{
94f94972 322 return looking_at_1 (regexp, 0);
b819a390
RS
323}
324
325DEFUN ("posix-looking-at", Fposix_looking_at, Sposix_looking_at, 1, 1, 0,
94f94972 326 "Return t if text after point matches regular expression REGEXP.\n\
b819a390
RS
327Find the longest match, in accord with Posix regular expression rules.\n\
328This function modifies the match data that `match-beginning',\n\
329`match-end' and `match-data' access; save and restore the match\n\
330data if you want to preserve them.")
94f94972
RS
331 (regexp)
332 Lisp_Object regexp;
b819a390 333{
94f94972 334 return looking_at_1 (regexp, 1);
b819a390
RS
335}
336\f
337static Lisp_Object
338string_match_1 (regexp, string, start, posix)
ca1d1d23 339 Lisp_Object regexp, string, start;
b819a390 340 int posix;
ca1d1d23
JB
341{
342 int val;
487282dc 343 struct re_pattern_buffer *bufp;
0c8533c6
RS
344 int pos, pos_byte;
345 int i;
ca1d1d23 346
7074fde6
FP
347 if (running_asynch_code)
348 save_search_regs ();
349
ca1d1d23
JB
350 CHECK_STRING (regexp, 0);
351 CHECK_STRING (string, 1);
352
353 if (NILP (start))
0c8533c6 354 pos = 0, pos_byte = 0;
ca1d1d23
JB
355 else
356 {
357 int len = XSTRING (string)->size;
358
359 CHECK_NUMBER (start, 2);
0c8533c6
RS
360 pos = XINT (start);
361 if (pos < 0 && -pos <= len)
362 pos = len + pos;
363 else if (0 > pos || pos > len)
ca1d1d23 364 args_out_of_range (string, start);
0c8533c6 365 pos_byte = string_char_to_byte (string, pos);
ca1d1d23
JB
366 }
367
487282dc
KH
368 bufp = compile_pattern (regexp, &search_regs,
369 (!NILP (current_buffer->case_fold_search)
facdc750 370 ? DOWNCASE_TABLE : Qnil),
0c8533c6
RS
371 posix,
372 STRING_MULTIBYTE (string));
ca1d1d23 373 immediate_quit = 1;
8bb43c28
RS
374 re_match_object = string;
375
487282dc 376 val = re_search (bufp, (char *) XSTRING (string)->data,
fc932ac6
RS
377 STRING_BYTES (XSTRING (string)), pos_byte,
378 STRING_BYTES (XSTRING (string)) - pos_byte,
ca1d1d23
JB
379 &search_regs);
380 immediate_quit = 0;
daa37602 381 last_thing_searched = Qt;
ca1d1d23
JB
382 if (val == -2)
383 matcher_overflow ();
384 if (val < 0) return Qnil;
0c8533c6
RS
385
386 for (i = 0; i < search_regs.num_regs; i++)
387 if (search_regs.start[i] >= 0)
388 {
389 search_regs.start[i]
390 = string_byte_to_char (string, search_regs.start[i]);
391 search_regs.end[i]
392 = string_byte_to_char (string, search_regs.end[i]);
393 }
394
395 return make_number (string_byte_to_char (string, val));
ca1d1d23 396}
e59a8453 397
b819a390
RS
398DEFUN ("string-match", Fstring_match, Sstring_match, 2, 3, 0,
399 "Return index of start of first match for REGEXP in STRING, or nil.\n\
400If third arg START is non-nil, start search at that index in STRING.\n\
401For index of first char beyond the match, do (match-end 0).\n\
402`match-end' and `match-beginning' also give indices of substrings\n\
403matched by parenthesis constructs in the pattern.")
404 (regexp, string, start)
405 Lisp_Object regexp, string, start;
406{
407 return string_match_1 (regexp, string, start, 0);
408}
409
410DEFUN ("posix-string-match", Fposix_string_match, Sposix_string_match, 2, 3, 0,
411 "Return index of start of first match for REGEXP in STRING, or nil.\n\
412Find the longest match, in accord with Posix regular expression rules.\n\
413If third arg START is non-nil, start search at that index in STRING.\n\
414For index of first char beyond the match, do (match-end 0).\n\
415`match-end' and `match-beginning' also give indices of substrings\n\
416matched by parenthesis constructs in the pattern.")
417 (regexp, string, start)
418 Lisp_Object regexp, string, start;
419{
420 return string_match_1 (regexp, string, start, 1);
421}
422
e59a8453
RS
423/* Match REGEXP against STRING, searching all of STRING,
424 and return the index of the match, or negative on failure.
425 This does not clobber the match data. */
426
427int
428fast_string_match (regexp, string)
429 Lisp_Object regexp, string;
430{
431 int val;
487282dc 432 struct re_pattern_buffer *bufp;
e59a8453 433
facdc750
RS
434 bufp = compile_pattern (regexp, 0, Qnil,
435 0, STRING_MULTIBYTE (string));
e59a8453 436 immediate_quit = 1;
8bb43c28
RS
437 re_match_object = string;
438
487282dc 439 val = re_search (bufp, (char *) XSTRING (string)->data,
fc932ac6
RS
440 STRING_BYTES (XSTRING (string)), 0,
441 STRING_BYTES (XSTRING (string)), 0);
e59a8453
RS
442 immediate_quit = 0;
443 return val;
444}
5679531d
KH
445
446/* Match REGEXP against STRING, searching all of STRING ignoring case,
447 and return the index of the match, or negative on failure.
0c8533c6
RS
448 This does not clobber the match data.
449 We assume that STRING contains single-byte characters. */
5679531d
KH
450
451extern Lisp_Object Vascii_downcase_table;
452
453int
b4577c63 454fast_c_string_match_ignore_case (regexp, string)
5679531d
KH
455 Lisp_Object regexp;
456 char *string;
457{
458 int val;
459 struct re_pattern_buffer *bufp;
460 int len = strlen (string);
461
0c8533c6 462 regexp = string_make_unibyte (regexp);
b4577c63 463 re_match_object = Qt;
5679531d 464 bufp = compile_pattern (regexp, 0,
facdc750 465 Vascii_downcase_table, 0,
f8bd51c4 466 0);
5679531d
KH
467 immediate_quit = 1;
468 val = re_search (bufp, string, len, 0, len, 0);
469 immediate_quit = 0;
470 return val;
471}
ca1d1d23 472\f
9169c321
JB
473/* The newline cache: remembering which sections of text have no newlines. */
474
475/* If the user has requested newline caching, make sure it's on.
476 Otherwise, make sure it's off.
477 This is our cheezy way of associating an action with the change of
478 state of a buffer-local variable. */
479static void
480newline_cache_on_off (buf)
481 struct buffer *buf;
482{
483 if (NILP (buf->cache_long_line_scans))
484 {
485 /* It should be off. */
486 if (buf->newline_cache)
487 {
488 free_region_cache (buf->newline_cache);
489 buf->newline_cache = 0;
490 }
491 }
492 else
493 {
494 /* It should be on. */
495 if (buf->newline_cache == 0)
496 buf->newline_cache = new_region_cache ();
497 }
498}
499
500\f
501/* Search for COUNT instances of the character TARGET between START and END.
502
503 If COUNT is positive, search forwards; END must be >= START.
504 If COUNT is negative, search backwards for the -COUNTth instance;
505 END must be <= START.
506 If COUNT is zero, do anything you please; run rogue, for all I care.
507
508 If END is zero, use BEGV or ZV instead, as appropriate for the
509 direction indicated by COUNT.
ffd56f97
JB
510
511 If we find COUNT instances, set *SHORTAGE to zero, and return the
5bfe95c9
RS
512 position after the COUNTth match. Note that for reverse motion
513 this is not the same as the usual convention for Emacs motion commands.
ffd56f97 514
9169c321
JB
515 If we don't find COUNT instances before reaching END, set *SHORTAGE
516 to the number of TARGETs left unfound, and return END.
ffd56f97 517
087a5f81
RS
518 If ALLOW_QUIT is non-zero, set immediate_quit. That's good to do
519 except when inside redisplay. */
520
dfcf069d 521int
9169c321
JB
522scan_buffer (target, start, end, count, shortage, allow_quit)
523 register int target;
524 int start, end;
525 int count;
526 int *shortage;
087a5f81 527 int allow_quit;
ca1d1d23 528{
9169c321
JB
529 struct region_cache *newline_cache;
530 int direction;
ffd56f97 531
9169c321
JB
532 if (count > 0)
533 {
534 direction = 1;
535 if (! end) end = ZV;
536 }
537 else
538 {
539 direction = -1;
540 if (! end) end = BEGV;
541 }
ffd56f97 542
9169c321
JB
543 newline_cache_on_off (current_buffer);
544 newline_cache = current_buffer->newline_cache;
ca1d1d23
JB
545
546 if (shortage != 0)
547 *shortage = 0;
548
087a5f81 549 immediate_quit = allow_quit;
ca1d1d23 550
ffd56f97 551 if (count > 0)
9169c321 552 while (start != end)
ca1d1d23 553 {
9169c321
JB
554 /* Our innermost scanning loop is very simple; it doesn't know
555 about gaps, buffer ends, or the newline cache. ceiling is
556 the position of the last character before the next such
557 obstacle --- the last character the dumb search loop should
558 examine. */
fa8ed3e0
RS
559 int ceiling_byte = CHAR_TO_BYTE (end) - 1;
560 int start_byte = CHAR_TO_BYTE (start);
67ce527d 561 int tem;
9169c321
JB
562
563 /* If we're looking for a newline, consult the newline cache
564 to see where we can avoid some scanning. */
565 if (target == '\n' && newline_cache)
566 {
567 int next_change;
568 immediate_quit = 0;
569 while (region_cache_forward
fa8ed3e0
RS
570 (current_buffer, newline_cache, start_byte, &next_change))
571 start_byte = next_change;
cbe0db0d 572 immediate_quit = allow_quit;
9169c321 573
fa8ed3e0
RS
574 /* START should never be after END. */
575 if (start_byte > ceiling_byte)
576 start_byte = ceiling_byte;
9169c321
JB
577
578 /* Now the text after start is an unknown region, and
579 next_change is the position of the next known region. */
fa8ed3e0 580 ceiling_byte = min (next_change - 1, ceiling_byte);
9169c321
JB
581 }
582
583 /* The dumb loop can only scan text stored in contiguous
584 bytes. BUFFER_CEILING_OF returns the last character
585 position that is contiguous, so the ceiling is the
586 position after that. */
67ce527d
KH
587 tem = BUFFER_CEILING_OF (start_byte);
588 ceiling_byte = min (tem, ceiling_byte);
9169c321
JB
589
590 {
591 /* The termination address of the dumb loop. */
fa8ed3e0
RS
592 register unsigned char *ceiling_addr
593 = BYTE_POS_ADDR (ceiling_byte) + 1;
594 register unsigned char *cursor
595 = BYTE_POS_ADDR (start_byte);
9169c321
JB
596 unsigned char *base = cursor;
597
598 while (cursor < ceiling_addr)
599 {
600 unsigned char *scan_start = cursor;
601
602 /* The dumb loop. */
603 while (*cursor != target && ++cursor < ceiling_addr)
604 ;
605
606 /* If we're looking for newlines, cache the fact that
607 the region from start to cursor is free of them. */
608 if (target == '\n' && newline_cache)
609 know_region_cache (current_buffer, newline_cache,
fa8ed3e0
RS
610 start_byte + scan_start - base,
611 start_byte + cursor - base);
9169c321
JB
612
613 /* Did we find the target character? */
614 if (cursor < ceiling_addr)
615 {
616 if (--count == 0)
617 {
618 immediate_quit = 0;
fa8ed3e0 619 return BYTE_TO_CHAR (start_byte + cursor - base + 1);
9169c321
JB
620 }
621 cursor++;
622 }
623 }
624
fa8ed3e0 625 start = BYTE_TO_CHAR (start_byte + cursor - base);
9169c321 626 }
ca1d1d23
JB
627 }
628 else
9169c321
JB
629 while (start > end)
630 {
631 /* The last character to check before the next obstacle. */
fa8ed3e0
RS
632 int ceiling_byte = CHAR_TO_BYTE (end);
633 int start_byte = CHAR_TO_BYTE (start);
67ce527d 634 int tem;
9169c321
JB
635
636 /* Consult the newline cache, if appropriate. */
637 if (target == '\n' && newline_cache)
638 {
639 int next_change;
640 immediate_quit = 0;
641 while (region_cache_backward
fa8ed3e0
RS
642 (current_buffer, newline_cache, start_byte, &next_change))
643 start_byte = next_change;
cbe0db0d 644 immediate_quit = allow_quit;
9169c321
JB
645
646 /* Start should never be at or before end. */
fa8ed3e0
RS
647 if (start_byte <= ceiling_byte)
648 start_byte = ceiling_byte + 1;
9169c321
JB
649
650 /* Now the text before start is an unknown region, and
651 next_change is the position of the next known region. */
fa8ed3e0 652 ceiling_byte = max (next_change, ceiling_byte);
9169c321
JB
653 }
654
655 /* Stop scanning before the gap. */
67ce527d
KH
656 tem = BUFFER_FLOOR_OF (start_byte - 1);
657 ceiling_byte = max (tem, ceiling_byte);
9169c321
JB
658
659 {
660 /* The termination address of the dumb loop. */
fa8ed3e0
RS
661 register unsigned char *ceiling_addr = BYTE_POS_ADDR (ceiling_byte);
662 register unsigned char *cursor = BYTE_POS_ADDR (start_byte - 1);
9169c321
JB
663 unsigned char *base = cursor;
664
665 while (cursor >= ceiling_addr)
666 {
667 unsigned char *scan_start = cursor;
668
669 while (*cursor != target && --cursor >= ceiling_addr)
670 ;
671
672 /* If we're looking for newlines, cache the fact that
673 the region from after the cursor to start is free of them. */
674 if (target == '\n' && newline_cache)
675 know_region_cache (current_buffer, newline_cache,
fa8ed3e0
RS
676 start_byte + cursor - base,
677 start_byte + scan_start - base);
9169c321
JB
678
679 /* Did we find the target character? */
680 if (cursor >= ceiling_addr)
681 {
682 if (++count >= 0)
683 {
684 immediate_quit = 0;
fa8ed3e0 685 return BYTE_TO_CHAR (start_byte + cursor - base);
9169c321
JB
686 }
687 cursor--;
688 }
689 }
690
fa8ed3e0 691 start = BYTE_TO_CHAR (start_byte + cursor - base);
9169c321
JB
692 }
693 }
694
ca1d1d23
JB
695 immediate_quit = 0;
696 if (shortage != 0)
ffd56f97 697 *shortage = count * direction;
9169c321 698 return start;
ca1d1d23 699}
fa8ed3e0
RS
700\f
701/* Search for COUNT instances of a line boundary, which means either a
702 newline or (if selective display enabled) a carriage return.
703 Start at START. If COUNT is negative, search backwards.
704
705 We report the resulting position by calling TEMP_SET_PT_BOTH.
706
707 If we find COUNT instances. we position after (always after,
708 even if scanning backwards) the COUNTth match, and return 0.
709
710 If we don't find COUNT instances before reaching the end of the
711 buffer (or the beginning, if scanning backwards), we return
712 the number of line boundaries left unfound, and position at
713 the limit we bumped up against.
714
715 If ALLOW_QUIT is non-zero, set immediate_quit. That's good to do
d5d57b92 716 except in special cases. */
ca1d1d23 717
63fa018d 718int
fa8ed3e0
RS
719scan_newline (start, start_byte, limit, limit_byte, count, allow_quit)
720 int start, start_byte;
721 int limit, limit_byte;
722 register int count;
723 int allow_quit;
63fa018d 724{
fa8ed3e0
RS
725 int direction = ((count > 0) ? 1 : -1);
726
727 register unsigned char *cursor;
728 unsigned char *base;
729
730 register int ceiling;
731 register unsigned char *ceiling_addr;
732
d5d57b92
RS
733 int old_immediate_quit = immediate_quit;
734
fa8ed3e0
RS
735 /* If we are not in selective display mode,
736 check only for newlines. */
737 int selective_display = (!NILP (current_buffer->selective_display)
738 && !INTEGERP (current_buffer->selective_display));
739
740 /* The code that follows is like scan_buffer
741 but checks for either newline or carriage return. */
742
d5d57b92
RS
743 if (allow_quit)
744 immediate_quit++;
fa8ed3e0
RS
745
746 start_byte = CHAR_TO_BYTE (start);
747
748 if (count > 0)
749 {
750 while (start_byte < limit_byte)
751 {
752 ceiling = BUFFER_CEILING_OF (start_byte);
753 ceiling = min (limit_byte - 1, ceiling);
754 ceiling_addr = BYTE_POS_ADDR (ceiling) + 1;
755 base = (cursor = BYTE_POS_ADDR (start_byte));
756 while (1)
757 {
758 while (*cursor != '\n' && ++cursor != ceiling_addr)
759 ;
760
761 if (cursor != ceiling_addr)
762 {
763 if (--count == 0)
764 {
d5d57b92 765 immediate_quit = old_immediate_quit;
fa8ed3e0
RS
766 start_byte = start_byte + cursor - base + 1;
767 start = BYTE_TO_CHAR (start_byte);
768 TEMP_SET_PT_BOTH (start, start_byte);
769 return 0;
770 }
771 else
772 if (++cursor == ceiling_addr)
773 break;
774 }
775 else
776 break;
777 }
778 start_byte += cursor - base;
779 }
780 }
781 else
782 {
fa8ed3e0
RS
783 while (start_byte > limit_byte)
784 {
785 ceiling = BUFFER_FLOOR_OF (start_byte - 1);
786 ceiling = max (limit_byte, ceiling);
787 ceiling_addr = BYTE_POS_ADDR (ceiling) - 1;
788 base = (cursor = BYTE_POS_ADDR (start_byte - 1) + 1);
789 while (1)
790 {
791 while (--cursor != ceiling_addr && *cursor != '\n')
792 ;
793
794 if (cursor != ceiling_addr)
795 {
796 if (++count == 0)
797 {
d5d57b92 798 immediate_quit = old_immediate_quit;
fa8ed3e0
RS
799 /* Return the position AFTER the match we found. */
800 start_byte = start_byte + cursor - base + 1;
801 start = BYTE_TO_CHAR (start_byte);
802 TEMP_SET_PT_BOTH (start, start_byte);
803 return 0;
804 }
805 }
806 else
807 break;
808 }
809 /* Here we add 1 to compensate for the last decrement
810 of CURSOR, which took it past the valid range. */
811 start_byte += cursor - base + 1;
812 }
813 }
814
815 TEMP_SET_PT_BOTH (limit, limit_byte);
d5d57b92 816 immediate_quit = old_immediate_quit;
fa8ed3e0
RS
817
818 return count * direction;
63fa018d
RS
819}
820
ca1d1d23 821int
fa8ed3e0 822find_next_newline_no_quit (from, cnt)
ca1d1d23
JB
823 register int from, cnt;
824{
fa8ed3e0 825 return scan_buffer ('\n', from, 0, cnt, (int *) 0, 0);
9169c321
JB
826}
827
9169c321
JB
828/* Like find_next_newline, but returns position before the newline,
829 not after, and only search up to TO. This isn't just
830 find_next_newline (...)-1, because you might hit TO. */
fa8ed3e0 831
9169c321
JB
832int
833find_before_next_newline (from, to, cnt)
cbe0db0d 834 int from, to, cnt;
9169c321
JB
835{
836 int shortage;
837 int pos = scan_buffer ('\n', from, to, cnt, &shortage, 1);
838
839 if (shortage == 0)
840 pos--;
841
842 return pos;
ca1d1d23
JB
843}
844\f
ca1d1d23
JB
845/* Subroutines of Lisp buffer search functions. */
846
847static Lisp_Object
b819a390 848search_command (string, bound, noerror, count, direction, RE, posix)
ca1d1d23
JB
849 Lisp_Object string, bound, noerror, count;
850 int direction;
851 int RE;
b819a390 852 int posix;
ca1d1d23
JB
853{
854 register int np;
9f43ad85 855 int lim, lim_byte;
ca1d1d23
JB
856 int n = direction;
857
858 if (!NILP (count))
859 {
860 CHECK_NUMBER (count, 3);
861 n *= XINT (count);
862 }
863
864 CHECK_STRING (string, 0);
865 if (NILP (bound))
9f43ad85
RS
866 {
867 if (n > 0)
868 lim = ZV, lim_byte = ZV_BYTE;
869 else
870 lim = BEGV, lim_byte = BEGV_BYTE;
871 }
ca1d1d23
JB
872 else
873 {
874 CHECK_NUMBER_COERCE_MARKER (bound, 1);
875 lim = XINT (bound);
6ec8bbd2 876 if (n > 0 ? lim < PT : lim > PT)
ca1d1d23
JB
877 error ("Invalid search bound (wrong side of point)");
878 if (lim > ZV)
9f43ad85 879 lim = ZV, lim_byte = ZV_BYTE;
588d2fd5 880 else if (lim < BEGV)
9f43ad85 881 lim = BEGV, lim_byte = BEGV_BYTE;
588d2fd5
KH
882 else
883 lim_byte = CHAR_TO_BYTE (lim);
ca1d1d23
JB
884 }
885
9f43ad85 886 np = search_buffer (string, PT, PT_BYTE, lim, lim_byte, n, RE,
ca1d1d23 887 (!NILP (current_buffer->case_fold_search)
facdc750 888 ? current_buffer->case_canon_table
3135e9fd 889 : Qnil),
ca1d1d23 890 (!NILP (current_buffer->case_fold_search)
facdc750 891 ? current_buffer->case_eqv_table
3135e9fd 892 : Qnil),
b819a390 893 posix);
ca1d1d23
JB
894 if (np <= 0)
895 {
896 if (NILP (noerror))
897 return signal_failure (string);
898 if (!EQ (noerror, Qt))
899 {
900 if (lim < BEGV || lim > ZV)
901 abort ();
9f43ad85 902 SET_PT_BOTH (lim, lim_byte);
a5f217b8
RS
903 return Qnil;
904#if 0 /* This would be clean, but maybe programs depend on
905 a value of nil here. */
481399bf 906 np = lim;
a5f217b8 907#endif
ca1d1d23 908 }
481399bf
RS
909 else
910 return Qnil;
ca1d1d23
JB
911 }
912
913 if (np < BEGV || np > ZV)
914 abort ();
915
916 SET_PT (np);
917
918 return make_number (np);
919}
920\f
fa8ed3e0
RS
921/* Return 1 if REGEXP it matches just one constant string. */
922
b6d6a51c
KH
923static int
924trivial_regexp_p (regexp)
925 Lisp_Object regexp;
926{
fc932ac6 927 int len = STRING_BYTES (XSTRING (regexp));
b6d6a51c
KH
928 unsigned char *s = XSTRING (regexp)->data;
929 unsigned char c;
930 while (--len >= 0)
931 {
932 switch (*s++)
933 {
934 case '.': case '*': case '+': case '?': case '[': case '^': case '$':
935 return 0;
936 case '\\':
937 if (--len < 0)
938 return 0;
939 switch (*s++)
940 {
941 case '|': case '(': case ')': case '`': case '\'': case 'b':
942 case 'B': case '<': case '>': case 'w': case 'W': case 's':
866f60fd 943 case 'S': case '=':
5679531d 944 case 'c': case 'C': /* for categoryspec and notcategoryspec */
866f60fd 945 case '1': case '2': case '3': case '4': case '5':
b6d6a51c
KH
946 case '6': case '7': case '8': case '9':
947 return 0;
948 }
949 }
950 }
951 return 1;
952}
953
ca325161 954/* Search for the n'th occurrence of STRING in the current buffer,
ca1d1d23 955 starting at position POS and stopping at position LIM,
b819a390 956 treating STRING as a literal string if RE is false or as
ca1d1d23
JB
957 a regular expression if RE is true.
958
959 If N is positive, searching is forward and LIM must be greater than POS.
960 If N is negative, searching is backward and LIM must be less than POS.
961
facdc750 962 Returns -x if x occurrences remain to be found (x > 0),
ca1d1d23 963 or else the position at the beginning of the Nth occurrence
b819a390
RS
964 (if searching backward) or the end (if searching forward).
965
966 POSIX is nonzero if we want full backtracking (POSIX style)
967 for this pattern. 0 means backtrack only enough to get a valid match. */
ca1d1d23 968
aff2ce94
RS
969#define TRANSLATE(out, trt, d) \
970do \
971 { \
972 if (! NILP (trt)) \
973 { \
974 Lisp_Object temp; \
975 temp = Faref (trt, make_number (d)); \
976 if (INTEGERP (temp)) \
977 out = XINT (temp); \
978 else \
979 out = d; \
980 } \
981 else \
982 out = d; \
983 } \
984while (0)
facdc750 985
b819a390 986static int
9f43ad85
RS
987search_buffer (string, pos, pos_byte, lim, lim_byte, n,
988 RE, trt, inverse_trt, posix)
ca1d1d23
JB
989 Lisp_Object string;
990 int pos;
9f43ad85 991 int pos_byte;
ca1d1d23 992 int lim;
9f43ad85 993 int lim_byte;
ca1d1d23
JB
994 int n;
995 int RE;
facdc750
RS
996 Lisp_Object trt;
997 Lisp_Object inverse_trt;
b819a390 998 int posix;
ca1d1d23
JB
999{
1000 int len = XSTRING (string)->size;
fc932ac6 1001 int len_byte = STRING_BYTES (XSTRING (string));
facdc750 1002 register int i;
ca1d1d23 1003
7074fde6
FP
1004 if (running_asynch_code)
1005 save_search_regs ();
1006
ca1d1d23 1007 /* Null string is found at starting position. */
3f57a499 1008 if (len == 0)
ca325161
RS
1009 {
1010 set_search_regs (pos, 0);
1011 return pos;
1012 }
3f57a499
RS
1013
1014 /* Searching 0 times means don't move. */
1015 if (n == 0)
ca1d1d23
JB
1016 return pos;
1017
b6d6a51c 1018 if (RE && !trivial_regexp_p (string))
ca1d1d23 1019 {
facdc750
RS
1020 unsigned char *p1, *p2;
1021 int s1, s2;
487282dc
KH
1022 struct re_pattern_buffer *bufp;
1023
0c8533c6
RS
1024 bufp = compile_pattern (string, &search_regs, trt, posix,
1025 !NILP (current_buffer->enable_multibyte_characters));
ca1d1d23 1026
ca1d1d23
JB
1027 immediate_quit = 1; /* Quit immediately if user types ^G,
1028 because letting this function finish
1029 can take too long. */
1030 QUIT; /* Do a pending quit right away,
1031 to avoid paradoxical behavior */
1032 /* Get pointers and sizes of the two strings
1033 that make up the visible portion of the buffer. */
1034
1035 p1 = BEGV_ADDR;
fa8ed3e0 1036 s1 = GPT_BYTE - BEGV_BYTE;
ca1d1d23 1037 p2 = GAP_END_ADDR;
fa8ed3e0 1038 s2 = ZV_BYTE - GPT_BYTE;
ca1d1d23
JB
1039 if (s1 < 0)
1040 {
1041 p2 = p1;
fa8ed3e0 1042 s2 = ZV_BYTE - BEGV_BYTE;
ca1d1d23
JB
1043 s1 = 0;
1044 }
1045 if (s2 < 0)
1046 {
fa8ed3e0 1047 s1 = ZV_BYTE - BEGV_BYTE;
ca1d1d23
JB
1048 s2 = 0;
1049 }
8bb43c28
RS
1050 re_match_object = Qnil;
1051
ca1d1d23
JB
1052 while (n < 0)
1053 {
42db823b 1054 int val;
487282dc 1055 val = re_search_2 (bufp, (char *) p1, s1, (char *) p2, s2,
4996330b
KH
1056 pos_byte - BEGV_BYTE, lim_byte - pos_byte,
1057 &search_regs,
42db823b 1058 /* Don't allow match past current point */
4996330b 1059 pos_byte - BEGV_BYTE);
ca1d1d23 1060 if (val == -2)
b6d6a51c
KH
1061 {
1062 matcher_overflow ();
1063 }
ca1d1d23
JB
1064 if (val >= 0)
1065 {
26aff150 1066 pos_byte = search_regs.start[0] + BEGV_BYTE;
4746118a 1067 for (i = 0; i < search_regs.num_regs; i++)
ca1d1d23
JB
1068 if (search_regs.start[i] >= 0)
1069 {
fa8ed3e0
RS
1070 search_regs.start[i]
1071 = BYTE_TO_CHAR (search_regs.start[i] + BEGV_BYTE);
1072 search_regs.end[i]
1073 = BYTE_TO_CHAR (search_regs.end[i] + BEGV_BYTE);
ca1d1d23 1074 }
a3668d92 1075 XSETBUFFER (last_thing_searched, current_buffer);
ca1d1d23
JB
1076 /* Set pos to the new position. */
1077 pos = search_regs.start[0];
1078 }
1079 else
1080 {
1081 immediate_quit = 0;
1082 return (n);
1083 }
1084 n++;
1085 }
1086 while (n > 0)
1087 {
42db823b 1088 int val;
487282dc 1089 val = re_search_2 (bufp, (char *) p1, s1, (char *) p2, s2,
4996330b
KH
1090 pos_byte - BEGV_BYTE, lim_byte - pos_byte,
1091 &search_regs,
1092 lim_byte - BEGV_BYTE);
ca1d1d23 1093 if (val == -2)
b6d6a51c
KH
1094 {
1095 matcher_overflow ();
1096 }
ca1d1d23
JB
1097 if (val >= 0)
1098 {
26aff150 1099 pos_byte = search_regs.end[0] + BEGV_BYTE;
4746118a 1100 for (i = 0; i < search_regs.num_regs; i++)
ca1d1d23
JB
1101 if (search_regs.start[i] >= 0)
1102 {
fa8ed3e0
RS
1103 search_regs.start[i]
1104 = BYTE_TO_CHAR (search_regs.start[i] + BEGV_BYTE);
1105 search_regs.end[i]
1106 = BYTE_TO_CHAR (search_regs.end[i] + BEGV_BYTE);
ca1d1d23 1107 }
a3668d92 1108 XSETBUFFER (last_thing_searched, current_buffer);
ca1d1d23
JB
1109 pos = search_regs.end[0];
1110 }
1111 else
1112 {
1113 immediate_quit = 0;
1114 return (0 - n);
1115 }
1116 n--;
1117 }
1118 immediate_quit = 0;
1119 return (pos);
1120 }
1121 else /* non-RE case */
1122 {
facdc750
RS
1123 unsigned char *raw_pattern, *pat;
1124 int raw_pattern_size;
1125 int raw_pattern_size_byte;
1126 unsigned char *patbuf;
1127 int multibyte = !NILP (current_buffer->enable_multibyte_characters);
1128 unsigned char *base_pat = XSTRING (string)->data;
1129 int charset_base = -1;
1130 int simple = 1;
1131
1132 /* MULTIBYTE says whether the text to be searched is multibyte.
1133 We must convert PATTERN to match that, or we will not really
1134 find things right. */
1135
1136 if (multibyte == STRING_MULTIBYTE (string))
1137 {
1138 raw_pattern = (char *) XSTRING (string)->data;
1139 raw_pattern_size = XSTRING (string)->size;
fc932ac6 1140 raw_pattern_size_byte = STRING_BYTES (XSTRING (string));
facdc750
RS
1141 }
1142 else if (multibyte)
1143 {
1144 raw_pattern_size = XSTRING (string)->size;
1145 raw_pattern_size_byte
1146 = count_size_as_multibyte (XSTRING (string)->data,
1147 raw_pattern_size);
1148 raw_pattern = (char *) alloca (raw_pattern_size_byte + 1);
1149 copy_text (XSTRING (string)->data, raw_pattern,
1150 XSTRING (string)->size, 0, 1);
1151 }
1152 else
1153 {
1154 /* Converting multibyte to single-byte.
1155
1156 ??? Perhaps this conversion should be done in a special way
1157 by subtracting nonascii-insert-offset from each non-ASCII char,
1158 so that only the multibyte chars which really correspond to
1159 the chosen single-byte character set can possibly match. */
1160 raw_pattern_size = XSTRING (string)->size;
1161 raw_pattern_size_byte = XSTRING (string)->size;
1162 raw_pattern = (char *) alloca (raw_pattern_size + 1);
1163 copy_text (XSTRING (string)->data, raw_pattern,
fc932ac6 1164 STRING_BYTES (XSTRING (string)), 1, 0);
facdc750
RS
1165 }
1166
1167 /* Copy and optionally translate the pattern. */
1168 len = raw_pattern_size;
1169 len_byte = raw_pattern_size_byte;
1170 patbuf = (unsigned char *) alloca (len_byte);
1171 pat = patbuf;
1172 base_pat = raw_pattern;
1173 if (multibyte)
1174 {
1175 while (--len >= 0)
1176 {
1177 unsigned char workbuf[4], *str;
aff2ce94 1178 int c, translated, inverse;
facdc750
RS
1179 int in_charlen, charlen;
1180
1181 /* If we got here and the RE flag is set, it's because we're
1182 dealing with a regexp known to be trivial, so the backslash
1183 just quotes the next character. */
1184 if (RE && *base_pat == '\\')
1185 {
1186 len--;
1187 len_byte--;
1188 base_pat++;
1189 }
1190
1191 c = STRING_CHAR_AND_LENGTH (base_pat, len_byte, in_charlen);
1192 /* Translate the character, if requested. */
aff2ce94 1193 TRANSLATE (translated, trt, c);
facdc750
RS
1194 /* If translation changed the byte-length, go back
1195 to the original character. */
1196 charlen = CHAR_STRING (translated, workbuf, str);
1197 if (in_charlen != charlen)
1198 {
1199 translated = c;
1200 charlen = CHAR_STRING (c, workbuf, str);
1201 }
1202
aff2ce94
RS
1203 TRANSLATE (inverse, inverse_trt, c);
1204
facdc750
RS
1205 /* Did this char actually get translated?
1206 Would any other char get translated into it? */
aff2ce94 1207 if (translated != c || inverse != c)
facdc750
RS
1208 {
1209 /* Keep track of which character set row
1210 contains the characters that need translation. */
1211 int charset_base_code = c & ~0xff;
1212 if (charset_base == -1)
1213 charset_base = charset_base_code;
1214 else if (charset_base != charset_base_code)
1215 /* If two different rows appear, needing translation,
1216 then we cannot use boyer_moore search. */
1217 simple = 0;
1218 /* ??? Handa: this must do simple = 0
1219 if c is a composite character. */
aff2ce94 1220 }
facdc750
RS
1221
1222 /* Store this character into the translated pattern. */
1223 bcopy (str, pat, charlen);
1224 pat += charlen;
1225 base_pat += in_charlen;
1226 len_byte -= in_charlen;
1227 }
1228 }
1229 else
1230 {
1231 while (--len >= 0)
1232 {
aff2ce94 1233 int c, translated, inverse;
facdc750
RS
1234
1235 /* If we got here and the RE flag is set, it's because we're
1236 dealing with a regexp known to be trivial, so the backslash
1237 just quotes the next character. */
1238 if (RE && *base_pat == '\\')
1239 {
1240 len--;
1241 base_pat++;
1242 }
1243 c = *base_pat++;
aff2ce94
RS
1244 TRANSLATE (translated, trt, c);
1245 TRANSLATE (inverse, inverse_trt, c);
facdc750
RS
1246
1247 /* Did this char actually get translated?
1248 Would any other char get translated into it? */
aff2ce94 1249 if (translated != c || inverse != c)
facdc750
RS
1250 {
1251 /* Keep track of which character set row
1252 contains the characters that need translation. */
1253 int charset_base_code = c & ~0xff;
1254 if (charset_base == -1)
1255 charset_base = charset_base_code;
1256 else if (charset_base != charset_base_code)
1257 /* If two different rows appear, needing translation,
1258 then we cannot use boyer_moore search. */
1259 simple = 0;
aff2ce94 1260 }
facdc750
RS
1261 *pat++ = translated;
1262 }
1263 }
1264
1265 len_byte = pat - patbuf;
1266 len = raw_pattern_size;
1267 pat = base_pat = patbuf;
1268
1269 if (simple)
1270 return boyer_moore (n, pat, len, len_byte, trt, inverse_trt,
aff2ce94
RS
1271 pos, pos_byte, lim, lim_byte,
1272 charset_base);
facdc750
RS
1273 else
1274 return simple_search (n, pat, len, len_byte, trt,
1275 pos, pos_byte, lim, lim_byte);
1276 }
1277}
1278\f
1279/* Do a simple string search N times for the string PAT,
1280 whose length is LEN/LEN_BYTE,
1281 from buffer position POS/POS_BYTE until LIM/LIM_BYTE.
1282 TRT is the translation table.
f8bd51c4 1283
facdc750
RS
1284 Return the character position where the match is found.
1285 Otherwise, if M matches remained to be found, return -M.
f8bd51c4 1286
facdc750
RS
1287 This kind of search works regardless of what is in PAT and
1288 regardless of what is in TRT. It is used in cases where
1289 boyer_moore cannot work. */
1290
1291static int
1292simple_search (n, pat, len, len_byte, trt, pos, pos_byte, lim, lim_byte)
1293 int n;
1294 unsigned char *pat;
1295 int len, len_byte;
1296 Lisp_Object trt;
1297 int pos, pos_byte;
1298 int lim, lim_byte;
1299{
1300 int multibyte = ! NILP (current_buffer->enable_multibyte_characters);
ab228c24 1301 int forward = n > 0;
facdc750
RS
1302
1303 if (lim > pos && multibyte)
1304 while (n > 0)
1305 {
1306 while (1)
f8bd51c4 1307 {
facdc750
RS
1308 /* Try matching at position POS. */
1309 int this_pos = pos;
1310 int this_pos_byte = pos_byte;
1311 int this_len = len;
1312 int this_len_byte = len_byte;
1313 unsigned char *p = pat;
1314 if (pos + len > lim)
1315 goto stop;
1316
1317 while (this_len > 0)
1318 {
1319 int charlen, buf_charlen;
ab228c24 1320 int pat_ch, buf_ch;
facdc750 1321
ab228c24 1322 pat_ch = STRING_CHAR_AND_LENGTH (p, this_len_byte, charlen);
facdc750
RS
1323 buf_ch = STRING_CHAR_AND_LENGTH (BYTE_POS_ADDR (this_pos_byte),
1324 ZV_BYTE - this_pos_byte,
1325 buf_charlen);
aff2ce94 1326 TRANSLATE (buf_ch, trt, buf_ch);
facdc750
RS
1327
1328 if (buf_ch != pat_ch)
1329 break;
ab228c24
RS
1330
1331 this_len_byte -= charlen;
1332 this_len--;
1333 p += charlen;
1334
1335 this_pos_byte += buf_charlen;
1336 this_pos++;
facdc750
RS
1337 }
1338
1339 if (this_len == 0)
1340 {
1341 pos += len;
1342 pos_byte += len_byte;
1343 break;
1344 }
1345
1346 INC_BOTH (pos, pos_byte);
f8bd51c4 1347 }
facdc750
RS
1348
1349 n--;
1350 }
1351 else if (lim > pos)
1352 while (n > 0)
1353 {
1354 while (1)
f8bd51c4 1355 {
facdc750
RS
1356 /* Try matching at position POS. */
1357 int this_pos = pos;
1358 int this_len = len;
1359 unsigned char *p = pat;
1360
1361 if (pos + len > lim)
1362 goto stop;
1363
1364 while (this_len > 0)
1365 {
1366 int pat_ch = *p++;
1367 int buf_ch = FETCH_BYTE (this_pos);
aff2ce94 1368 TRANSLATE (buf_ch, trt, buf_ch);
facdc750
RS
1369
1370 if (buf_ch != pat_ch)
1371 break;
ab228c24
RS
1372
1373 this_len--;
1374 this_pos++;
facdc750
RS
1375 }
1376
1377 if (this_len == 0)
1378 {
1379 pos += len;
1380 break;
1381 }
1382
1383 pos++;
f8bd51c4 1384 }
facdc750
RS
1385
1386 n--;
1387 }
1388 /* Backwards search. */
1389 else if (lim < pos && multibyte)
1390 while (n < 0)
1391 {
1392 while (1)
f8bd51c4 1393 {
facdc750
RS
1394 /* Try matching at position POS. */
1395 int this_pos = pos - len;
1396 int this_pos_byte = pos_byte - len_byte;
1397 int this_len = len;
1398 int this_len_byte = len_byte;
1399 unsigned char *p = pat;
1400
1401 if (pos - len < lim)
1402 goto stop;
1403
1404 while (this_len > 0)
1405 {
1406 int charlen, buf_charlen;
ab228c24 1407 int pat_ch, buf_ch;
facdc750 1408
ab228c24 1409 pat_ch = STRING_CHAR_AND_LENGTH (p, this_len_byte, charlen);
facdc750
RS
1410 buf_ch = STRING_CHAR_AND_LENGTH (BYTE_POS_ADDR (this_pos_byte),
1411 ZV_BYTE - this_pos_byte,
1412 buf_charlen);
aff2ce94 1413 TRANSLATE (buf_ch, trt, buf_ch);
facdc750
RS
1414
1415 if (buf_ch != pat_ch)
1416 break;
ab228c24
RS
1417
1418 this_len_byte -= charlen;
1419 this_len--;
1420 p += charlen;
1421 this_pos_byte += buf_charlen;
1422 this_pos++;
facdc750
RS
1423 }
1424
1425 if (this_len == 0)
1426 {
1427 pos -= len;
1428 pos_byte -= len_byte;
1429 break;
1430 }
1431
1432 DEC_BOTH (pos, pos_byte);
f8bd51c4
KH
1433 }
1434
facdc750
RS
1435 n++;
1436 }
1437 else if (lim < pos)
1438 while (n < 0)
1439 {
1440 while (1)
b6d6a51c 1441 {
facdc750
RS
1442 /* Try matching at position POS. */
1443 int this_pos = pos - len;
1444 int this_len = len;
1445 unsigned char *p = pat;
1446
1447 if (pos - len < lim)
1448 goto stop;
1449
1450 while (this_len > 0)
1451 {
1452 int pat_ch = *p++;
1453 int buf_ch = FETCH_BYTE (this_pos);
aff2ce94 1454 TRANSLATE (buf_ch, trt, buf_ch);
facdc750
RS
1455
1456 if (buf_ch != pat_ch)
1457 break;
ab228c24
RS
1458 this_len--;
1459 this_pos++;
facdc750
RS
1460 }
1461
1462 if (this_len == 0)
b6d6a51c 1463 {
facdc750
RS
1464 pos -= len;
1465 break;
b6d6a51c 1466 }
facdc750
RS
1467
1468 pos--;
b6d6a51c 1469 }
facdc750
RS
1470
1471 n++;
b6d6a51c 1472 }
facdc750
RS
1473
1474 stop:
1475 if (n == 0)
aff2ce94 1476 {
ab228c24
RS
1477 if (forward)
1478 set_search_regs ((multibyte ? pos_byte : pos) - len_byte, len_byte);
1479 else
1480 set_search_regs (multibyte ? pos_byte : pos, len_byte);
aff2ce94
RS
1481
1482 return pos;
1483 }
facdc750
RS
1484 else if (n > 0)
1485 return -n;
1486 else
1487 return n;
1488}
1489\f
1490/* Do Boyer-Moore search N times for the string PAT,
1491 whose length is LEN/LEN_BYTE,
1492 from buffer position POS/POS_BYTE until LIM/LIM_BYTE.
1493 DIRECTION says which direction we search in.
1494 TRT and INVERSE_TRT are translation tables.
1495
1496 This kind of search works if all the characters in PAT that have
1497 nontrivial translation are the same aside from the last byte. This
1498 makes it possible to translate just the last byte of a character,
1499 and do so after just a simple test of the context.
1500
1501 If that criterion is not satisfied, do not call this function. */
1502
1503static int
1504boyer_moore (n, base_pat, len, len_byte, trt, inverse_trt,
aff2ce94 1505 pos, pos_byte, lim, lim_byte, charset_base)
facdc750
RS
1506 int n;
1507 unsigned char *base_pat;
1508 int len, len_byte;
1509 Lisp_Object trt;
1510 Lisp_Object inverse_trt;
1511 int pos, pos_byte;
1512 int lim, lim_byte;
aff2ce94 1513 int charset_base;
facdc750
RS
1514{
1515 int direction = ((n > 0) ? 1 : -1);
1516 register int dirlen;
1517 int infinity, limit, k, stride_for_teases;
1518 register int *BM_tab;
1519 int *BM_tab_base;
1520 register unsigned char *cursor, *p_limit;
1521 register int i, j;
1522 unsigned char *pat;
1523 int multibyte = ! NILP (current_buffer->enable_multibyte_characters);
1524
1525 unsigned char simple_translate[0400];
1526 int translate_prev_byte;
1527 int translate_anteprev_byte;
1528
1529#ifdef C_ALLOCA
1530 int BM_tab_space[0400];
1531 BM_tab = &BM_tab_space[0];
1532#else
1533 BM_tab = (int *) alloca (0400 * sizeof (int));
1534#endif
1535 /* The general approach is that we are going to maintain that we know */
1536 /* the first (closest to the present position, in whatever direction */
1537 /* we're searching) character that could possibly be the last */
1538 /* (furthest from present position) character of a valid match. We */
1539 /* advance the state of our knowledge by looking at that character */
1540 /* and seeing whether it indeed matches the last character of the */
1541 /* pattern. If it does, we take a closer look. If it does not, we */
1542 /* move our pointer (to putative last characters) as far as is */
1543 /* logically possible. This amount of movement, which I call a */
1544 /* stride, will be the length of the pattern if the actual character */
1545 /* appears nowhere in the pattern, otherwise it will be the distance */
1546 /* from the last occurrence of that character to the end of the */
1547 /* pattern. */
1548 /* As a coding trick, an enormous stride is coded into the table for */
1549 /* characters that match the last character. This allows use of only */
1550 /* a single test, a test for having gone past the end of the */
1551 /* permissible match region, to test for both possible matches (when */
1552 /* the stride goes past the end immediately) and failure to */
1553 /* match (where you get nudged past the end one stride at a time). */
1554
1555 /* Here we make a "mickey mouse" BM table. The stride of the search */
1556 /* is determined only by the last character of the putative match. */
1557 /* If that character does not match, we will stride the proper */
1558 /* distance to propose a match that superimposes it on the last */
1559 /* instance of a character that matches it (per trt), or misses */
1560 /* it entirely if there is none. */
1561
1562 dirlen = len_byte * direction;
1563 infinity = dirlen - (lim_byte + pos_byte + len_byte + len_byte) * direction;
1564 if (direction < 0)
1565 pat = (base_pat += len_byte - 1);
1566 else
1567 pat = base_pat;
1568 BM_tab_base = BM_tab;
1569 BM_tab += 0400;
1570 j = dirlen; /* to get it in a register */
1571 /* A character that does not appear in the pattern induces a */
1572 /* stride equal to the pattern length. */
1573 while (BM_tab_base != BM_tab)
1574 {
1575 *--BM_tab = j;
1576 *--BM_tab = j;
1577 *--BM_tab = j;
1578 *--BM_tab = j;
1579 }
1580
1581 /* We use this for translation, instead of TRT itself.
1582 We fill this in to handle the characters that actually
1583 occur in the pattern. Others don't matter anyway! */
1584 bzero (simple_translate, sizeof simple_translate);
1585 for (i = 0; i < 0400; i++)
1586 simple_translate[i] = i;
1587
1588 i = 0;
1589 while (i != infinity)
1590 {
1591 unsigned char *ptr = pat + i;
1592 i += direction;
1593 if (i == dirlen)
1594 i = infinity;
1595 if (! NILP (trt))
ca1d1d23 1596 {
facdc750 1597 int ch;
aff2ce94 1598 int untranslated;
facdc750
RS
1599 int this_translated = 1;
1600
1601 if (multibyte
1602 && (ptr + 1 == pat + len_byte || CHAR_HEAD_P (ptr[1])))
ca1d1d23 1603 {
facdc750
RS
1604 unsigned char *charstart = ptr;
1605 while (! CHAR_HEAD_P (*charstart))
1606 charstart--;
aff2ce94 1607 untranslated = STRING_CHAR (charstart, ptr - charstart + 1);
ab228c24 1608 if (charset_base == (untranslated & ~0xff))
facdc750 1609 {
ab228c24 1610 TRANSLATE (ch, trt, untranslated);
aff2ce94
RS
1611 if (! CHAR_HEAD_P (*ptr))
1612 {
1613 translate_prev_byte = ptr[-1];
1614 if (! CHAR_HEAD_P (translate_prev_byte))
1615 translate_anteprev_byte = ptr[-2];
1616 }
facdc750 1617 }
aff2ce94 1618 else
ab228c24
RS
1619 {
1620 this_translated = 0;
1621 ch = *ptr;
1622 }
ca1d1d23 1623 }
facdc750 1624 else if (!multibyte)
aff2ce94 1625 TRANSLATE (ch, trt, *ptr);
ca1d1d23
JB
1626 else
1627 {
facdc750
RS
1628 ch = *ptr;
1629 this_translated = 0;
ca1d1d23 1630 }
facdc750 1631
ab228c24
RS
1632 if (ch > 0400)
1633 j = ((unsigned char) ch) | 0200;
1634 else
1635 j = (unsigned char) ch;
1636
facdc750
RS
1637 if (i == infinity)
1638 stride_for_teases = BM_tab[j];
ab228c24 1639
facdc750
RS
1640 BM_tab[j] = dirlen - i;
1641 /* A translation table is accompanied by its inverse -- see */
1642 /* comment following downcase_table for details */
1643 if (this_translated)
ab228c24
RS
1644 {
1645 int starting_ch = ch;
1646 int starting_j = j;
1647 while (1)
1648 {
1649 TRANSLATE (ch, inverse_trt, ch);
1650 if (ch > 0400)
1651 j = ((unsigned char) ch) | 0200;
1652 else
1653 j = (unsigned char) ch;
1654
1655 /* For all the characters that map into CH,
1656 set up simple_translate to map the last byte
1657 into STARTING_J. */
1658 simple_translate[j] = starting_j;
1659 if (ch == starting_ch)
1660 break;
1661 BM_tab[j] = dirlen - i;
1662 }
1663 }
facdc750
RS
1664 }
1665 else
1666 {
1667 j = *ptr;
1668
1669 if (i == infinity)
1670 stride_for_teases = BM_tab[j];
1671 BM_tab[j] = dirlen - i;
ca1d1d23 1672 }
facdc750
RS
1673 /* stride_for_teases tells how much to stride if we get a */
1674 /* match on the far character but are subsequently */
1675 /* disappointed, by recording what the stride would have been */
1676 /* for that character if the last character had been */
1677 /* different. */
1678 }
1679 infinity = dirlen - infinity;
1680 pos_byte += dirlen - ((direction > 0) ? direction : 0);
1681 /* loop invariant - POS_BYTE points at where last char (first
1682 char if reverse) of pattern would align in a possible match. */
1683 while (n != 0)
1684 {
1685 int tail_end;
1686 unsigned char *tail_end_ptr;
1687
1688 /* It's been reported that some (broken) compiler thinks that
1689 Boolean expressions in an arithmetic context are unsigned.
1690 Using an explicit ?1:0 prevents this. */
1691 if ((lim_byte - pos_byte - ((direction > 0) ? 1 : 0)) * direction
1692 < 0)
1693 return (n * (0 - direction));
1694 /* First we do the part we can by pointers (maybe nothing) */
1695 QUIT;
1696 pat = base_pat;
1697 limit = pos_byte - dirlen + direction;
67ce527d
KH
1698 if (direction > 0)
1699 {
1700 limit = BUFFER_CEILING_OF (limit);
1701 /* LIMIT is now the last (not beyond-last!) value POS_BYTE
1702 can take on without hitting edge of buffer or the gap. */
1703 limit = min (limit, pos_byte + 20000);
1704 limit = min (limit, lim_byte - 1);
1705 }
1706 else
1707 {
1708 limit = BUFFER_FLOOR_OF (limit);
1709 /* LIMIT is now the last (not beyond-last!) value POS_BYTE
1710 can take on without hitting edge of buffer or the gap. */
1711 limit = max (limit, pos_byte - 20000);
1712 limit = max (limit, lim_byte);
1713 }
facdc750
RS
1714 tail_end = BUFFER_CEILING_OF (pos_byte) + 1;
1715 tail_end_ptr = BYTE_POS_ADDR (tail_end);
1716
1717 if ((limit - pos_byte) * direction > 20)
ca1d1d23 1718 {
facdc750
RS
1719 unsigned char *p2;
1720
1721 p_limit = BYTE_POS_ADDR (limit);
1722 p2 = (cursor = BYTE_POS_ADDR (pos_byte));
1723 /* In this loop, pos + cursor - p2 is the surrogate for pos */
1724 while (1) /* use one cursor setting as long as i can */
ca1d1d23 1725 {
facdc750 1726 if (direction > 0) /* worth duplicating */
ca1d1d23 1727 {
facdc750
RS
1728 /* Use signed comparison if appropriate
1729 to make cursor+infinity sure to be > p_limit.
1730 Assuming that the buffer lies in a range of addresses
1731 that are all "positive" (as ints) or all "negative",
1732 either kind of comparison will work as long
1733 as we don't step by infinity. So pick the kind
1734 that works when we do step by infinity. */
1735 if ((EMACS_INT) (p_limit + infinity) > (EMACS_INT) p_limit)
1736 while ((EMACS_INT) cursor <= (EMACS_INT) p_limit)
1737 cursor += BM_tab[*cursor];
ca1d1d23 1738 else
facdc750
RS
1739 while ((EMACS_UINT) cursor <= (EMACS_UINT) p_limit)
1740 cursor += BM_tab[*cursor];
1741 }
1742 else
1743 {
1744 if ((EMACS_INT) (p_limit + infinity) < (EMACS_INT) p_limit)
1745 while ((EMACS_INT) cursor >= (EMACS_INT) p_limit)
1746 cursor += BM_tab[*cursor];
1747 else
1748 while ((EMACS_UINT) cursor >= (EMACS_UINT) p_limit)
1749 cursor += BM_tab[*cursor];
1750 }
ca1d1d23 1751/* If you are here, cursor is beyond the end of the searched region. */
facdc750
RS
1752/* This can happen if you match on the far character of the pattern, */
1753/* because the "stride" of that character is infinity, a number able */
1754/* to throw you well beyond the end of the search. It can also */
1755/* happen if you fail to match within the permitted region and would */
1756/* otherwise try a character beyond that region */
1757 if ((cursor - p_limit) * direction <= len_byte)
1758 break; /* a small overrun is genuine */
1759 cursor -= infinity; /* large overrun = hit */
1760 i = dirlen - direction;
1761 if (! NILP (trt))
1762 {
1763 while ((i -= direction) + direction != 0)
ca1d1d23 1764 {
facdc750
RS
1765 int ch;
1766 cursor -= direction;
1767 /* Translate only the last byte of a character. */
1768 if (! multibyte
1769 || ((cursor == tail_end_ptr
1770 || CHAR_HEAD_P (cursor[1]))
1771 && (CHAR_HEAD_P (cursor[0])
1772 || (translate_prev_byte == cursor[-1]
1773 && (CHAR_HEAD_P (translate_prev_byte)
1774 || translate_anteprev_byte == cursor[-2])))))
1775 ch = simple_translate[*cursor];
1776 else
1777 ch = *cursor;
1778 if (pat[i] != ch)
1779 break;
ca1d1d23 1780 }
facdc750
RS
1781 }
1782 else
1783 {
1784 while ((i -= direction) + direction != 0)
ca1d1d23 1785 {
facdc750
RS
1786 cursor -= direction;
1787 if (pat[i] != *cursor)
1788 break;
ca1d1d23 1789 }
facdc750
RS
1790 }
1791 cursor += dirlen - i - direction; /* fix cursor */
1792 if (i + direction == 0)
1793 {
1794 int position;
0c8533c6 1795
facdc750 1796 cursor -= direction;
1113d9db 1797
facdc750
RS
1798 position = pos_byte + cursor - p2 + ((direction > 0)
1799 ? 1 - len_byte : 0);
1800 set_search_regs (position, len_byte);
ca325161 1801
facdc750
RS
1802 if ((n -= direction) != 0)
1803 cursor += dirlen; /* to resume search */
ca1d1d23 1804 else
facdc750
RS
1805 return ((direction > 0)
1806 ? search_regs.end[0] : search_regs.start[0]);
ca1d1d23 1807 }
facdc750
RS
1808 else
1809 cursor += stride_for_teases; /* <sigh> we lose - */
ca1d1d23 1810 }
facdc750
RS
1811 pos_byte += cursor - p2;
1812 }
1813 else
1814 /* Now we'll pick up a clump that has to be done the hard */
1815 /* way because it covers a discontinuity */
1816 {
1817 limit = ((direction > 0)
1818 ? BUFFER_CEILING_OF (pos_byte - dirlen + 1)
1819 : BUFFER_FLOOR_OF (pos_byte - dirlen - 1));
1820 limit = ((direction > 0)
1821 ? min (limit + len_byte, lim_byte - 1)
1822 : max (limit - len_byte, lim_byte));
1823 /* LIMIT is now the last value POS_BYTE can have
1824 and still be valid for a possible match. */
1825 while (1)
ca1d1d23 1826 {
facdc750
RS
1827 /* This loop can be coded for space rather than */
1828 /* speed because it will usually run only once. */
1829 /* (the reach is at most len + 21, and typically */
1830 /* does not exceed len) */
1831 while ((limit - pos_byte) * direction >= 0)
1832 pos_byte += BM_tab[FETCH_BYTE (pos_byte)];
1833 /* now run the same tests to distinguish going off the */
1834 /* end, a match or a phony match. */
1835 if ((pos_byte - limit) * direction <= len_byte)
1836 break; /* ran off the end */
1837 /* Found what might be a match.
1838 Set POS_BYTE back to last (first if reverse) pos. */
1839 pos_byte -= infinity;
1840 i = dirlen - direction;
1841 while ((i -= direction) + direction != 0)
ca1d1d23 1842 {
facdc750
RS
1843 int ch;
1844 unsigned char *ptr;
1845 pos_byte -= direction;
1846 ptr = BYTE_POS_ADDR (pos_byte);
1847 /* Translate only the last byte of a character. */
1848 if (! multibyte
1849 || ((ptr == tail_end_ptr
1850 || CHAR_HEAD_P (ptr[1]))
1851 && (CHAR_HEAD_P (ptr[0])
1852 || (translate_prev_byte == ptr[-1]
1853 && (CHAR_HEAD_P (translate_prev_byte)
1854 || translate_anteprev_byte == ptr[-2])))))
1855 ch = simple_translate[*ptr];
1856 else
1857 ch = *ptr;
1858 if (pat[i] != ch)
1859 break;
1860 }
1861 /* Above loop has moved POS_BYTE part or all the way
1862 back to the first pos (last pos if reverse).
1863 Set it once again at the last (first if reverse) char. */
1864 pos_byte += dirlen - i- direction;
1865 if (i + direction == 0)
1866 {
1867 int position;
1868 pos_byte -= direction;
1113d9db 1869
facdc750 1870 position = pos_byte + ((direction > 0) ? 1 - len_byte : 0);
0c8533c6 1871
facdc750 1872 set_search_regs (position, len_byte);
ca325161 1873
facdc750
RS
1874 if ((n -= direction) != 0)
1875 pos_byte += dirlen; /* to resume search */
ca1d1d23 1876 else
facdc750
RS
1877 return ((direction > 0)
1878 ? search_regs.end[0] : search_regs.start[0]);
ca1d1d23 1879 }
facdc750
RS
1880 else
1881 pos_byte += stride_for_teases;
1882 }
1883 }
1884 /* We have done one clump. Can we continue? */
1885 if ((lim_byte - pos_byte) * direction < 0)
1886 return ((0 - n) * direction);
ca1d1d23 1887 }
facdc750 1888 return BYTE_TO_CHAR (pos_byte);
ca1d1d23 1889}
ca325161 1890
fa8ed3e0 1891/* Record beginning BEG_BYTE and end BEG_BYTE + NBYTES
ca325161
RS
1892 for a match just found in the current buffer. */
1893
1894static void
fa8ed3e0
RS
1895set_search_regs (beg_byte, nbytes)
1896 int beg_byte, nbytes;
ca325161
RS
1897{
1898 /* Make sure we have registers in which to store
1899 the match position. */
1900 if (search_regs.num_regs == 0)
1901 {
2d4a771a
RS
1902 search_regs.start = (regoff_t *) xmalloc (2 * sizeof (regoff_t));
1903 search_regs.end = (regoff_t *) xmalloc (2 * sizeof (regoff_t));
487282dc 1904 search_regs.num_regs = 2;
ca325161
RS
1905 }
1906
fa8ed3e0
RS
1907 search_regs.start[0] = BYTE_TO_CHAR (beg_byte);
1908 search_regs.end[0] = BYTE_TO_CHAR (beg_byte + nbytes);
a3668d92 1909 XSETBUFFER (last_thing_searched, current_buffer);
ca325161 1910}
ca1d1d23
JB
1911\f
1912/* Given a string of words separated by word delimiters,
1913 compute a regexp that matches those exact words
1914 separated by arbitrary punctuation. */
1915
1916static Lisp_Object
1917wordify (string)
1918 Lisp_Object string;
1919{
1920 register unsigned char *p, *o;
0c8533c6 1921 register int i, i_byte, len, punct_count = 0, word_count = 0;
ca1d1d23 1922 Lisp_Object val;
0c8533c6
RS
1923 int prev_c = 0;
1924 int adjust;
ca1d1d23
JB
1925
1926 CHECK_STRING (string, 0);
1927 p = XSTRING (string)->data;
1928 len = XSTRING (string)->size;
1929
0c8533c6
RS
1930 for (i = 0, i_byte = 0; i < len; )
1931 {
1932 int c;
1933
1934 if (STRING_MULTIBYTE (string))
1935 FETCH_STRING_CHAR_ADVANCE (c, string, i, i_byte);
1936 else
1937 c = XSTRING (string)->data[i++];
1938
1939 if (SYNTAX (c) != Sword)
1940 {
1941 punct_count++;
1942 if (i > 0 && SYNTAX (prev_c) == Sword)
1943 word_count++;
1944 }
ca1d1d23 1945
0c8533c6
RS
1946 prev_c = c;
1947 }
1948
1949 if (SYNTAX (prev_c) == Sword)
1950 word_count++;
1951 if (!word_count)
1952 return build_string ("");
1953
1954 adjust = - punct_count + 5 * (word_count - 1) + 4;
1955 val = make_uninit_multibyte_string (len + adjust,
fc932ac6 1956 STRING_BYTES (XSTRING (string)) + adjust);
ca1d1d23
JB
1957
1958 o = XSTRING (val)->data;
1959 *o++ = '\\';
1960 *o++ = 'b';
1961
fc932ac6 1962 for (i = 0; i < STRING_BYTES (XSTRING (val)); i++)
ca1d1d23
JB
1963 if (SYNTAX (p[i]) == Sword)
1964 *o++ = p[i];
1965 else if (i > 0 && SYNTAX (p[i-1]) == Sword && --word_count)
1966 {
1967 *o++ = '\\';
1968 *o++ = 'W';
1969 *o++ = '\\';
1970 *o++ = 'W';
1971 *o++ = '*';
1972 }
1973
1974 *o++ = '\\';
1975 *o++ = 'b';
1976
1977 return val;
1978}
1979\f
1980DEFUN ("search-backward", Fsearch_backward, Ssearch_backward, 1, 4,
6af43974 1981 "MSearch backward: ",
ca1d1d23
JB
1982 "Search backward from point for STRING.\n\
1983Set point to the beginning of the occurrence found, and return point.\n\
1984An optional second argument bounds the search; it is a buffer position.\n\
1985The match found must not extend before that position.\n\
1986Optional third argument, if t, means if fail just return nil (no error).\n\
1987 If not nil and not t, position at limit of search and return nil.\n\
1988Optional fourth argument is repeat count--search for successive occurrences.\n\
1989See also the functions `match-beginning', `match-end' and `replace-match'.")
1990 (string, bound, noerror, count)
1991 Lisp_Object string, bound, noerror, count;
1992{
b819a390 1993 return search_command (string, bound, noerror, count, -1, 0, 0);
ca1d1d23
JB
1994}
1995
6af43974 1996DEFUN ("search-forward", Fsearch_forward, Ssearch_forward, 1, 4, "MSearch: ",
ca1d1d23
JB
1997 "Search forward from point for STRING.\n\
1998Set point to the end of the occurrence found, and return point.\n\
1999An optional second argument bounds the search; it is a buffer position.\n\
2000The match found must not extend after that position. nil is equivalent\n\
2001 to (point-max).\n\
2002Optional third argument, if t, means if fail just return nil (no error).\n\
2003 If not nil and not t, move to limit of search and return nil.\n\
2004Optional fourth argument is repeat count--search for successive occurrences.\n\
2005See also the functions `match-beginning', `match-end' and `replace-match'.")
2006 (string, bound, noerror, count)
2007 Lisp_Object string, bound, noerror, count;
2008{
b819a390 2009 return search_command (string, bound, noerror, count, 1, 0, 0);
ca1d1d23
JB
2010}
2011
2012DEFUN ("word-search-backward", Fword_search_backward, Sword_search_backward, 1, 4,
2013 "sWord search backward: ",
2014 "Search backward from point for STRING, ignoring differences in punctuation.\n\
2015Set point to the beginning of the occurrence found, and return point.\n\
2016An optional second argument bounds the search; it is a buffer position.\n\
2017The match found must not extend before that position.\n\
2018Optional third argument, if t, means if fail just return nil (no error).\n\
2019 If not nil and not t, move to limit of search and return nil.\n\
2020Optional fourth argument is repeat count--search for successive occurrences.")
2021 (string, bound, noerror, count)
2022 Lisp_Object string, bound, noerror, count;
2023{
b819a390 2024 return search_command (wordify (string), bound, noerror, count, -1, 1, 0);
ca1d1d23
JB
2025}
2026
2027DEFUN ("word-search-forward", Fword_search_forward, Sword_search_forward, 1, 4,
2028 "sWord search: ",
2029 "Search forward from point for STRING, ignoring differences in punctuation.\n\
2030Set point to the end of the occurrence found, and return point.\n\
2031An optional second argument bounds the search; it is a buffer position.\n\
2032The match found must not extend after that position.\n\
2033Optional third argument, if t, means if fail just return nil (no error).\n\
2034 If not nil and not t, move to limit of search and return nil.\n\
2035Optional fourth argument is repeat count--search for successive occurrences.")
2036 (string, bound, noerror, count)
2037 Lisp_Object string, bound, noerror, count;
2038{
b819a390 2039 return search_command (wordify (string), bound, noerror, count, 1, 1, 0);
ca1d1d23
JB
2040}
2041
2042DEFUN ("re-search-backward", Fre_search_backward, Sre_search_backward, 1, 4,
2043 "sRE search backward: ",
2044 "Search backward from point for match for regular expression REGEXP.\n\
2045Set point to the beginning of the match, and return point.\n\
2046The match found is the one starting last in the buffer\n\
19c0a730 2047and yet ending before the origin of the search.\n\
ca1d1d23
JB
2048An optional second argument bounds the search; it is a buffer position.\n\
2049The match found must start at or after that position.\n\
2050Optional third argument, if t, means if fail just return nil (no error).\n\
2051 If not nil and not t, move to limit of search and return nil.\n\
2052Optional fourth argument is repeat count--search for successive occurrences.\n\
2053See also the functions `match-beginning', `match-end' and `replace-match'.")
19c0a730
KH
2054 (regexp, bound, noerror, count)
2055 Lisp_Object regexp, bound, noerror, count;
ca1d1d23 2056{
b819a390 2057 return search_command (regexp, bound, noerror, count, -1, 1, 0);
ca1d1d23
JB
2058}
2059
2060DEFUN ("re-search-forward", Fre_search_forward, Sre_search_forward, 1, 4,
2061 "sRE search: ",
2062 "Search forward from point for regular expression REGEXP.\n\
2063Set point to the end of the occurrence found, and return point.\n\
2064An optional second argument bounds the search; it is a buffer position.\n\
2065The match found must not extend after that position.\n\
2066Optional third argument, if t, means if fail just return nil (no error).\n\
2067 If not nil and not t, move to limit of search and return nil.\n\
2068Optional fourth argument is repeat count--search for successive occurrences.\n\
2069See also the functions `match-beginning', `match-end' and `replace-match'.")
19c0a730
KH
2070 (regexp, bound, noerror, count)
2071 Lisp_Object regexp, bound, noerror, count;
ca1d1d23 2072{
b819a390
RS
2073 return search_command (regexp, bound, noerror, count, 1, 1, 0);
2074}
2075
2076DEFUN ("posix-search-backward", Fposix_search_backward, Sposix_search_backward, 1, 4,
2077 "sPosix search backward: ",
2078 "Search backward from point for match for regular expression REGEXP.\n\
2079Find the longest match in accord with Posix regular expression rules.\n\
2080Set point to the beginning of the match, and return point.\n\
2081The match found is the one starting last in the buffer\n\
2082and yet ending before the origin of the search.\n\
2083An optional second argument bounds the search; it is a buffer position.\n\
2084The match found must start at or after that position.\n\
2085Optional third argument, if t, means if fail just return nil (no error).\n\
2086 If not nil and not t, move to limit of search and return nil.\n\
2087Optional fourth argument is repeat count--search for successive occurrences.\n\
2088See also the functions `match-beginning', `match-end' and `replace-match'.")
2089 (regexp, bound, noerror, count)
2090 Lisp_Object regexp, bound, noerror, count;
2091{
2092 return search_command (regexp, bound, noerror, count, -1, 1, 1);
2093}
2094
2095DEFUN ("posix-search-forward", Fposix_search_forward, Sposix_search_forward, 1, 4,
2096 "sPosix search: ",
2097 "Search forward from point for regular expression REGEXP.\n\
2098Find the longest match in accord with Posix regular expression rules.\n\
2099Set point to the end of the occurrence found, and return point.\n\
2100An optional second argument bounds the search; it is a buffer position.\n\
2101The match found must not extend after that position.\n\
2102Optional third argument, if t, means if fail just return nil (no error).\n\
2103 If not nil and not t, move to limit of search and return nil.\n\
2104Optional fourth argument is repeat count--search for successive occurrences.\n\
2105See also the functions `match-beginning', `match-end' and `replace-match'.")
2106 (regexp, bound, noerror, count)
2107 Lisp_Object regexp, bound, noerror, count;
2108{
2109 return search_command (regexp, bound, noerror, count, 1, 1, 1);
ca1d1d23
JB
2110}
2111\f
d7a5ad5f 2112DEFUN ("replace-match", Freplace_match, Sreplace_match, 1, 5, 0,
ca1d1d23
JB
2113 "Replace text matched by last search with NEWTEXT.\n\
2114If second arg FIXEDCASE is non-nil, do not alter case of replacement text.\n\
5b9cf4b2
RS
2115Otherwise maybe capitalize the whole text, or maybe just word initials,\n\
2116based on the replaced text.\n\
2117If the replaced text has only capital letters\n\
2118and has at least one multiletter word, convert NEWTEXT to all caps.\n\
2119If the replaced text has at least one word starting with a capital letter,\n\
2120then capitalize each word in NEWTEXT.\n\n\
ca1d1d23
JB
2121If third arg LITERAL is non-nil, insert NEWTEXT literally.\n\
2122Otherwise treat `\\' as special:\n\
2123 `\\&' in NEWTEXT means substitute original matched text.\n\
2124 `\\N' means substitute what matched the Nth `\\(...\\)'.\n\
2125 If Nth parens didn't match, substitute nothing.\n\
2126 `\\\\' means insert one `\\'.\n\
1113d9db 2127FIXEDCASE and LITERAL are optional arguments.\n\
080c45fd
RS
2128Leaves point at end of replacement text.\n\
2129\n\
2130The optional fourth argument STRING can be a string to modify.\n\
2131In that case, this function creates and returns a new string\n\
d7a5ad5f
RS
2132which is made by replacing the part of STRING that was matched.\n\
2133\n\
2134The optional fifth argument SUBEXP specifies a subexpression of the match.\n\
2135It says to replace just that subexpression instead of the whole match.\n\
2136This is useful only after a regular expression search or match\n\
2137since only regular expressions have distinguished subexpressions.")
2138 (newtext, fixedcase, literal, string, subexp)
2139 Lisp_Object newtext, fixedcase, literal, string, subexp;
ca1d1d23
JB
2140{
2141 enum { nochange, all_caps, cap_initial } case_action;
2142 register int pos, last;
2143 int some_multiletter_word;
97832bd0 2144 int some_lowercase;
73dc8771 2145 int some_uppercase;
208767c3 2146 int some_nonuppercase_initial;
ca1d1d23
JB
2147 register int c, prevc;
2148 int inslen;
d7a5ad5f 2149 int sub;
3e18eecf 2150 int opoint, newpoint;
ca1d1d23 2151
16fdc568 2152 CHECK_STRING (newtext, 0);
ca1d1d23 2153
080c45fd
RS
2154 if (! NILP (string))
2155 CHECK_STRING (string, 4);
2156
ca1d1d23
JB
2157 case_action = nochange; /* We tried an initialization */
2158 /* but some C compilers blew it */
4746118a
JB
2159
2160 if (search_regs.num_regs <= 0)
2161 error ("replace-match called before any match found");
2162
d7a5ad5f
RS
2163 if (NILP (subexp))
2164 sub = 0;
2165 else
2166 {
2167 CHECK_NUMBER (subexp, 3);
2168 sub = XINT (subexp);
2169 if (sub < 0 || sub >= search_regs.num_regs)
2170 args_out_of_range (subexp, make_number (search_regs.num_regs));
2171 }
2172
080c45fd
RS
2173 if (NILP (string))
2174 {
d7a5ad5f
RS
2175 if (search_regs.start[sub] < BEGV
2176 || search_regs.start[sub] > search_regs.end[sub]
2177 || search_regs.end[sub] > ZV)
2178 args_out_of_range (make_number (search_regs.start[sub]),
2179 make_number (search_regs.end[sub]));
080c45fd
RS
2180 }
2181 else
2182 {
d7a5ad5f
RS
2183 if (search_regs.start[sub] < 0
2184 || search_regs.start[sub] > search_regs.end[sub]
2185 || search_regs.end[sub] > XSTRING (string)->size)
2186 args_out_of_range (make_number (search_regs.start[sub]),
2187 make_number (search_regs.end[sub]));
080c45fd 2188 }
ca1d1d23
JB
2189
2190 if (NILP (fixedcase))
2191 {
fa8ed3e0 2192 int beg;
ca1d1d23
JB
2193 /* Decide how to casify by examining the matched text. */
2194
fa8ed3e0
RS
2195 if (NILP (string))
2196 last = CHAR_TO_BYTE (search_regs.end[sub]);
2197 else
2198 last = search_regs.end[sub];
2199
2200 if (NILP (string))
2201 beg = CHAR_TO_BYTE (search_regs.start[sub]);
2202 else
2203 beg = search_regs.start[sub];
2204
ca1d1d23
JB
2205 prevc = '\n';
2206 case_action = all_caps;
2207
2208 /* some_multiletter_word is set nonzero if any original word
2209 is more than one letter long. */
2210 some_multiletter_word = 0;
97832bd0 2211 some_lowercase = 0;
208767c3 2212 some_nonuppercase_initial = 0;
73dc8771 2213 some_uppercase = 0;
ca1d1d23 2214
fa8ed3e0 2215 for (pos = beg; pos < last; pos++)
ca1d1d23 2216 {
080c45fd 2217 if (NILP (string))
5679531d 2218 c = FETCH_BYTE (pos);
080c45fd
RS
2219 else
2220 c = XSTRING (string)->data[pos];
2221
ca1d1d23
JB
2222 if (LOWERCASEP (c))
2223 {
2224 /* Cannot be all caps if any original char is lower case */
2225
97832bd0 2226 some_lowercase = 1;
ca1d1d23 2227 if (SYNTAX (prevc) != Sword)
208767c3 2228 some_nonuppercase_initial = 1;
ca1d1d23
JB
2229 else
2230 some_multiletter_word = 1;
2231 }
2232 else if (!NOCASEP (c))
2233 {
73dc8771 2234 some_uppercase = 1;
97832bd0 2235 if (SYNTAX (prevc) != Sword)
c4d460ce 2236 ;
97832bd0 2237 else
ca1d1d23
JB
2238 some_multiletter_word = 1;
2239 }
208767c3
RS
2240 else
2241 {
2242 /* If the initial is a caseless word constituent,
2243 treat that like a lowercase initial. */
2244 if (SYNTAX (prevc) != Sword)
2245 some_nonuppercase_initial = 1;
2246 }
ca1d1d23
JB
2247
2248 prevc = c;
2249 }
2250
97832bd0
RS
2251 /* Convert to all caps if the old text is all caps
2252 and has at least one multiletter word. */
2253 if (! some_lowercase && some_multiletter_word)
2254 case_action = all_caps;
c4d460ce 2255 /* Capitalize each word, if the old text has all capitalized words. */
208767c3 2256 else if (!some_nonuppercase_initial && some_multiletter_word)
ca1d1d23 2257 case_action = cap_initial;
208767c3 2258 else if (!some_nonuppercase_initial && some_uppercase)
73dc8771
KH
2259 /* Should x -> yz, operating on X, give Yz or YZ?
2260 We'll assume the latter. */
2261 case_action = all_caps;
97832bd0
RS
2262 else
2263 case_action = nochange;
ca1d1d23
JB
2264 }
2265
080c45fd
RS
2266 /* Do replacement in a string. */
2267 if (!NILP (string))
2268 {
2269 Lisp_Object before, after;
2270
2271 before = Fsubstring (string, make_number (0),
d7a5ad5f
RS
2272 make_number (search_regs.start[sub]));
2273 after = Fsubstring (string, make_number (search_regs.end[sub]), Qnil);
080c45fd 2274
636a5e28
RS
2275 /* Substitute parts of the match into NEWTEXT
2276 if desired. */
080c45fd
RS
2277 if (NILP (literal))
2278 {
2279 int lastpos = -1;
0c8533c6 2280 int lastpos_byte = -1;
080c45fd
RS
2281 /* We build up the substituted string in ACCUM. */
2282 Lisp_Object accum;
2283 Lisp_Object middle;
0c8533c6 2284 int pos_byte;
080c45fd
RS
2285
2286 accum = Qnil;
2287
fc932ac6 2288 for (pos_byte = 0, pos = 0; pos_byte < STRING_BYTES (XSTRING (newtext));)
080c45fd
RS
2289 {
2290 int substart = -1;
2291 int subend;
1e79ec24 2292 int delbackslash = 0;
080c45fd 2293
0c8533c6
RS
2294 FETCH_STRING_CHAR_ADVANCE (c, newtext, pos, pos_byte);
2295
080c45fd
RS
2296 if (c == '\\')
2297 {
0c8533c6 2298 FETCH_STRING_CHAR_ADVANCE (c, newtext, pos, pos_byte);
080c45fd
RS
2299 if (c == '&')
2300 {
d7a5ad5f
RS
2301 substart = search_regs.start[sub];
2302 subend = search_regs.end[sub];
080c45fd
RS
2303 }
2304 else if (c >= '1' && c <= '9' && c <= search_regs.num_regs + '0')
2305 {
ad10348f 2306 if (search_regs.start[c - '0'] >= 0)
080c45fd
RS
2307 {
2308 substart = search_regs.start[c - '0'];
2309 subend = search_regs.end[c - '0'];
2310 }
2311 }
1e79ec24
KH
2312 else if (c == '\\')
2313 delbackslash = 1;
636a5e28
RS
2314 else
2315 error ("Invalid use of `\\' in replacement text");
080c45fd
RS
2316 }
2317 if (substart >= 0)
2318 {
2319 if (pos - 1 != lastpos + 1)
0c8533c6
RS
2320 middle = substring_both (newtext, lastpos + 1,
2321 lastpos_byte + 1,
2322 pos - 1, pos_byte - 1);
080c45fd
RS
2323 else
2324 middle = Qnil;
2325 accum = concat3 (accum, middle,
0c8533c6
RS
2326 Fsubstring (string,
2327 make_number (substart),
080c45fd
RS
2328 make_number (subend)));
2329 lastpos = pos;
0c8533c6 2330 lastpos_byte = pos_byte;
080c45fd 2331 }
1e79ec24
KH
2332 else if (delbackslash)
2333 {
0c8533c6
RS
2334 middle = substring_both (newtext, lastpos + 1,
2335 lastpos_byte + 1,
2336 pos, pos_byte);
2337
1e79ec24
KH
2338 accum = concat2 (accum, middle);
2339 lastpos = pos;
0c8533c6 2340 lastpos_byte = pos_byte;
1e79ec24 2341 }
080c45fd
RS
2342 }
2343
2344 if (pos != lastpos + 1)
0c8533c6
RS
2345 middle = substring_both (newtext, lastpos + 1,
2346 lastpos_byte + 1,
2347 pos, pos_byte);
080c45fd
RS
2348 else
2349 middle = Qnil;
2350
2351 newtext = concat2 (accum, middle);
2352 }
2353
636a5e28 2354 /* Do case substitution in NEWTEXT if desired. */
080c45fd
RS
2355 if (case_action == all_caps)
2356 newtext = Fupcase (newtext);
2357 else if (case_action == cap_initial)
2b2eead9 2358 newtext = Fupcase_initials (newtext);
080c45fd
RS
2359
2360 return concat3 (before, newtext, after);
2361 }
2362
b0eba991
RS
2363 /* Record point, the move (quietly) to the start of the match. */
2364 if (PT > search_regs.start[sub])
2365 opoint = PT - ZV;
2366 else
2367 opoint = PT;
2368
fa8ed3e0 2369 TEMP_SET_PT (search_regs.start[sub]);
b0eba991 2370
9a76659d
JB
2371 /* We insert the replacement text before the old text, and then
2372 delete the original text. This means that markers at the
2373 beginning or end of the original will float to the corresponding
2374 position in the replacement. */
ca1d1d23 2375 if (!NILP (literal))
16fdc568 2376 Finsert_and_inherit (1, &newtext);
ca1d1d23
JB
2377 else
2378 {
2379 struct gcpro gcpro1;
16fdc568 2380 GCPRO1 (newtext);
ca1d1d23 2381
16fdc568 2382 for (pos = 0; pos < XSTRING (newtext)->size; pos++)
ca1d1d23 2383 {
6ec8bbd2 2384 int offset = PT - search_regs.start[sub];
9a76659d 2385
16fdc568 2386 c = XSTRING (newtext)->data[pos];
ca1d1d23
JB
2387 if (c == '\\')
2388 {
16fdc568 2389 c = XSTRING (newtext)->data[++pos];
ca1d1d23 2390 if (c == '&')
9a76659d
JB
2391 Finsert_buffer_substring
2392 (Fcurrent_buffer (),
d7a5ad5f
RS
2393 make_number (search_regs.start[sub] + offset),
2394 make_number (search_regs.end[sub] + offset));
78445046 2395 else if (c >= '1' && c <= '9' && c <= search_regs.num_regs + '0')
ca1d1d23
JB
2396 {
2397 if (search_regs.start[c - '0'] >= 1)
9a76659d
JB
2398 Finsert_buffer_substring
2399 (Fcurrent_buffer (),
2400 make_number (search_regs.start[c - '0'] + offset),
2401 make_number (search_regs.end[c - '0'] + offset));
ca1d1d23 2402 }
636a5e28 2403 else if (c == '\\')
ca1d1d23 2404 insert_char (c);
636a5e28
RS
2405 else
2406 error ("Invalid use of `\\' in replacement text");
ca1d1d23
JB
2407 }
2408 else
2409 insert_char (c);
2410 }
2411 UNGCPRO;
2412 }
2413
6ec8bbd2 2414 inslen = PT - (search_regs.start[sub]);
d7a5ad5f 2415 del_range (search_regs.start[sub] + inslen, search_regs.end[sub] + inslen);
ca1d1d23
JB
2416
2417 if (case_action == all_caps)
6ec8bbd2 2418 Fupcase_region (make_number (PT - inslen), make_number (PT));
ca1d1d23 2419 else if (case_action == cap_initial)
6ec8bbd2 2420 Fupcase_initials_region (make_number (PT - inslen), make_number (PT));
b0eba991 2421
3e18eecf
RS
2422 newpoint = PT;
2423
b0eba991 2424 /* Put point back where it was in the text. */
8d808a65 2425 if (opoint <= 0)
fa8ed3e0 2426 TEMP_SET_PT (opoint + ZV);
b0eba991 2427 else
fa8ed3e0 2428 TEMP_SET_PT (opoint);
b0eba991
RS
2429
2430 /* Now move point "officially" to the start of the inserted replacement. */
3e18eecf 2431 move_if_not_intangible (newpoint);
b0eba991 2432
ca1d1d23
JB
2433 return Qnil;
2434}
2435\f
2436static Lisp_Object
2437match_limit (num, beginningp)
2438 Lisp_Object num;
2439 int beginningp;
2440{
2441 register int n;
2442
2443 CHECK_NUMBER (num, 0);
2444 n = XINT (num);
4746118a
JB
2445 if (n < 0 || n >= search_regs.num_regs)
2446 args_out_of_range (num, make_number (search_regs.num_regs));
2447 if (search_regs.num_regs <= 0
2448 || search_regs.start[n] < 0)
ca1d1d23
JB
2449 return Qnil;
2450 return (make_number ((beginningp) ? search_regs.start[n]
2451 : search_regs.end[n]));
2452}
2453
2454DEFUN ("match-beginning", Fmatch_beginning, Smatch_beginning, 1, 1, 0,
2455 "Return position of start of text matched by last search.\n\
5806161b
EN
2456SUBEXP, a number, specifies which parenthesized expression in the last\n\
2457 regexp.\n\
2458Value is nil if SUBEXPth pair didn't match, or there were less than\n\
2459 SUBEXP pairs.\n\
ca1d1d23 2460Zero means the entire text matched by the whole regexp or whole string.")
5806161b
EN
2461 (subexp)
2462 Lisp_Object subexp;
ca1d1d23 2463{
5806161b 2464 return match_limit (subexp, 1);
ca1d1d23
JB
2465}
2466
2467DEFUN ("match-end", Fmatch_end, Smatch_end, 1, 1, 0,
2468 "Return position of end of text matched by last search.\n\
5806161b
EN
2469SUBEXP, a number, specifies which parenthesized expression in the last\n\
2470 regexp.\n\
2471Value is nil if SUBEXPth pair didn't match, or there were less than\n\
2472 SUBEXP pairs.\n\
ca1d1d23 2473Zero means the entire text matched by the whole regexp or whole string.")
5806161b
EN
2474 (subexp)
2475 Lisp_Object subexp;
ca1d1d23 2476{
5806161b 2477 return match_limit (subexp, 0);
ca1d1d23
JB
2478}
2479
56256c2a 2480DEFUN ("match-data", Fmatch_data, Smatch_data, 0, 2, 0,
ca1d1d23
JB
2481 "Return a list containing all info on what the last search matched.\n\
2482Element 2N is `(match-beginning N)'; element 2N + 1 is `(match-end N)'.\n\
2483All the elements are markers or nil (nil if the Nth pair didn't match)\n\
2484if the last match was on a buffer; integers or nil if a string was matched.\n\
56256c2a
RS
2485Use `store-match-data' to reinstate the data in this list.\n\
2486\n\
2487If INTEGERS (the optional first argument) is non-nil, always use integers\n\
8ca821e9 2488\(rather than markers) to represent buffer positions.\n\
56256c2a
RS
2489If REUSE is a list, reuse it as part of the value. If REUSE is long enough\n\
2490to hold all the values, and if INTEGERS is non-nil, no consing is done.")
2491 (integers, reuse)
2492 Lisp_Object integers, reuse;
ca1d1d23 2493{
56256c2a 2494 Lisp_Object tail, prev;
4746118a 2495 Lisp_Object *data;
ca1d1d23
JB
2496 int i, len;
2497
daa37602 2498 if (NILP (last_thing_searched))
c36bcf1b 2499 return Qnil;
daa37602 2500
4746118a
JB
2501 data = (Lisp_Object *) alloca ((2 * search_regs.num_regs)
2502 * sizeof (Lisp_Object));
2503
ca1d1d23 2504 len = -1;
4746118a 2505 for (i = 0; i < search_regs.num_regs; i++)
ca1d1d23
JB
2506 {
2507 int start = search_regs.start[i];
2508 if (start >= 0)
2509 {
56256c2a
RS
2510 if (EQ (last_thing_searched, Qt)
2511 || ! NILP (integers))
ca1d1d23 2512 {
c235cce7
KH
2513 XSETFASTINT (data[2 * i], start);
2514 XSETFASTINT (data[2 * i + 1], search_regs.end[i]);
ca1d1d23 2515 }
0ed62dc7 2516 else if (BUFFERP (last_thing_searched))
ca1d1d23
JB
2517 {
2518 data[2 * i] = Fmake_marker ();
daa37602
JB
2519 Fset_marker (data[2 * i],
2520 make_number (start),
2521 last_thing_searched);
ca1d1d23
JB
2522 data[2 * i + 1] = Fmake_marker ();
2523 Fset_marker (data[2 * i + 1],
daa37602
JB
2524 make_number (search_regs.end[i]),
2525 last_thing_searched);
ca1d1d23 2526 }
daa37602
JB
2527 else
2528 /* last_thing_searched must always be Qt, a buffer, or Qnil. */
2529 abort ();
2530
ca1d1d23
JB
2531 len = i;
2532 }
2533 else
2534 data[2 * i] = data [2 * i + 1] = Qnil;
2535 }
56256c2a
RS
2536
2537 /* If REUSE is not usable, cons up the values and return them. */
2538 if (! CONSP (reuse))
2539 return Flist (2 * len + 2, data);
2540
2541 /* If REUSE is a list, store as many value elements as will fit
2542 into the elements of REUSE. */
2543 for (i = 0, tail = reuse; CONSP (tail);
2544 i++, tail = XCONS (tail)->cdr)
2545 {
2546 if (i < 2 * len + 2)
2547 XCONS (tail)->car = data[i];
2548 else
2549 XCONS (tail)->car = Qnil;
2550 prev = tail;
2551 }
2552
2553 /* If we couldn't fit all value elements into REUSE,
2554 cons up the rest of them and add them to the end of REUSE. */
2555 if (i < 2 * len + 2)
2556 XCONS (prev)->cdr = Flist (2 * len + 2 - i, data + i);
2557
2558 return reuse;
ca1d1d23
JB
2559}
2560
2561
3f1c005b 2562DEFUN ("set-match-data", Fset_match_data, Sset_match_data, 1, 1, 0,
ca1d1d23
JB
2563 "Set internal data on last search match from elements of LIST.\n\
2564LIST should have been created by calling `match-data' previously.")
2565 (list)
2566 register Lisp_Object list;
2567{
2568 register int i;
2569 register Lisp_Object marker;
2570
7074fde6
FP
2571 if (running_asynch_code)
2572 save_search_regs ();
2573
ca1d1d23 2574 if (!CONSP (list) && !NILP (list))
b37902c8 2575 list = wrong_type_argument (Qconsp, list);
ca1d1d23 2576
daa37602
JB
2577 /* Unless we find a marker with a buffer in LIST, assume that this
2578 match data came from a string. */
2579 last_thing_searched = Qt;
2580
4746118a
JB
2581 /* Allocate registers if they don't already exist. */
2582 {
d084e942 2583 int length = XFASTINT (Flength (list)) / 2;
4746118a
JB
2584
2585 if (length > search_regs.num_regs)
2586 {
1113d9db
JB
2587 if (search_regs.num_regs == 0)
2588 {
2589 search_regs.start
2590 = (regoff_t *) xmalloc (length * sizeof (regoff_t));
2591 search_regs.end
2592 = (regoff_t *) xmalloc (length * sizeof (regoff_t));
2593 }
4746118a 2594 else
1113d9db
JB
2595 {
2596 search_regs.start
2597 = (regoff_t *) xrealloc (search_regs.start,
2598 length * sizeof (regoff_t));
2599 search_regs.end
2600 = (regoff_t *) xrealloc (search_regs.end,
2601 length * sizeof (regoff_t));
2602 }
4746118a 2603
487282dc 2604 search_regs.num_regs = length;
4746118a
JB
2605 }
2606 }
2607
2608 for (i = 0; i < search_regs.num_regs; i++)
ca1d1d23
JB
2609 {
2610 marker = Fcar (list);
2611 if (NILP (marker))
2612 {
2613 search_regs.start[i] = -1;
2614 list = Fcdr (list);
2615 }
2616 else
2617 {
0ed62dc7 2618 if (MARKERP (marker))
daa37602
JB
2619 {
2620 if (XMARKER (marker)->buffer == 0)
c235cce7 2621 XSETFASTINT (marker, 0);
daa37602 2622 else
a3668d92 2623 XSETBUFFER (last_thing_searched, XMARKER (marker)->buffer);
daa37602 2624 }
ca1d1d23
JB
2625
2626 CHECK_NUMBER_COERCE_MARKER (marker, 0);
2627 search_regs.start[i] = XINT (marker);
2628 list = Fcdr (list);
2629
2630 marker = Fcar (list);
0ed62dc7 2631 if (MARKERP (marker) && XMARKER (marker)->buffer == 0)
c235cce7 2632 XSETFASTINT (marker, 0);
ca1d1d23
JB
2633
2634 CHECK_NUMBER_COERCE_MARKER (marker, 0);
2635 search_regs.end[i] = XINT (marker);
2636 }
2637 list = Fcdr (list);
2638 }
2639
2640 return Qnil;
2641}
2642
7074fde6
FP
2643/* If non-zero the match data have been saved in saved_search_regs
2644 during the execution of a sentinel or filter. */
75ebf74b 2645static int search_regs_saved;
7074fde6
FP
2646static struct re_registers saved_search_regs;
2647
2648/* Called from Flooking_at, Fstring_match, search_buffer, Fstore_match_data
2649 if asynchronous code (filter or sentinel) is running. */
2650static void
2651save_search_regs ()
2652{
2653 if (!search_regs_saved)
2654 {
2655 saved_search_regs.num_regs = search_regs.num_regs;
2656 saved_search_regs.start = search_regs.start;
2657 saved_search_regs.end = search_regs.end;
2658 search_regs.num_regs = 0;
2d4a771a
RS
2659 search_regs.start = 0;
2660 search_regs.end = 0;
7074fde6
FP
2661
2662 search_regs_saved = 1;
2663 }
2664}
2665
2666/* Called upon exit from filters and sentinels. */
2667void
2668restore_match_data ()
2669{
2670 if (search_regs_saved)
2671 {
2672 if (search_regs.num_regs > 0)
2673 {
2674 xfree (search_regs.start);
2675 xfree (search_regs.end);
2676 }
2677 search_regs.num_regs = saved_search_regs.num_regs;
2678 search_regs.start = saved_search_regs.start;
2679 search_regs.end = saved_search_regs.end;
2680
2681 search_regs_saved = 0;
2682 }
2683}
2684
ca1d1d23
JB
2685/* Quote a string to inactivate reg-expr chars */
2686
2687DEFUN ("regexp-quote", Fregexp_quote, Sregexp_quote, 1, 1, 0,
2688 "Return a regexp string which matches exactly STRING and nothing else.")
5806161b
EN
2689 (string)
2690 Lisp_Object string;
ca1d1d23
JB
2691{
2692 register unsigned char *in, *out, *end;
2693 register unsigned char *temp;
0c8533c6 2694 int backslashes_added = 0;
ca1d1d23 2695
5806161b 2696 CHECK_STRING (string, 0);
ca1d1d23 2697
fc932ac6 2698 temp = (unsigned char *) alloca (STRING_BYTES (XSTRING (string)) * 2);
ca1d1d23
JB
2699
2700 /* Now copy the data into the new string, inserting escapes. */
2701
5806161b 2702 in = XSTRING (string)->data;
fc932ac6 2703 end = in + STRING_BYTES (XSTRING (string));
ca1d1d23
JB
2704 out = temp;
2705
2706 for (; in != end; in++)
2707 {
2708 if (*in == '[' || *in == ']'
2709 || *in == '*' || *in == '.' || *in == '\\'
2710 || *in == '?' || *in == '+'
2711 || *in == '^' || *in == '$')
0c8533c6 2712 *out++ = '\\', backslashes_added++;
ca1d1d23
JB
2713 *out++ = *in;
2714 }
2715
3f8100f1 2716 return make_specified_string (temp,
0c8533c6 2717 XSTRING (string)->size + backslashes_added,
3f8100f1
RS
2718 out - temp,
2719 STRING_MULTIBYTE (string));
ca1d1d23
JB
2720}
2721\f
dfcf069d 2722void
ca1d1d23
JB
2723syms_of_search ()
2724{
2725 register int i;
2726
487282dc
KH
2727 for (i = 0; i < REGEXP_CACHE_SIZE; ++i)
2728 {
2729 searchbufs[i].buf.allocated = 100;
2730 searchbufs[i].buf.buffer = (unsigned char *) malloc (100);
2731 searchbufs[i].buf.fastmap = searchbufs[i].fastmap;
2732 searchbufs[i].regexp = Qnil;
2733 staticpro (&searchbufs[i].regexp);
2734 searchbufs[i].next = (i == REGEXP_CACHE_SIZE-1 ? 0 : &searchbufs[i+1]);
2735 }
2736 searchbuf_head = &searchbufs[0];
ca1d1d23
JB
2737
2738 Qsearch_failed = intern ("search-failed");
2739 staticpro (&Qsearch_failed);
2740 Qinvalid_regexp = intern ("invalid-regexp");
2741 staticpro (&Qinvalid_regexp);
2742
2743 Fput (Qsearch_failed, Qerror_conditions,
2744 Fcons (Qsearch_failed, Fcons (Qerror, Qnil)));
2745 Fput (Qsearch_failed, Qerror_message,
2746 build_string ("Search failed"));
2747
2748 Fput (Qinvalid_regexp, Qerror_conditions,
2749 Fcons (Qinvalid_regexp, Fcons (Qerror, Qnil)));
2750 Fput (Qinvalid_regexp, Qerror_message,
2751 build_string ("Invalid regexp"));
2752
daa37602
JB
2753 last_thing_searched = Qnil;
2754 staticpro (&last_thing_searched);
2755
ca1d1d23 2756 defsubr (&Slooking_at);
b819a390
RS
2757 defsubr (&Sposix_looking_at);
2758 defsubr (&Sstring_match);
2759 defsubr (&Sposix_string_match);
ca1d1d23
JB
2760 defsubr (&Ssearch_forward);
2761 defsubr (&Ssearch_backward);
2762 defsubr (&Sword_search_forward);
2763 defsubr (&Sword_search_backward);
2764 defsubr (&Sre_search_forward);
2765 defsubr (&Sre_search_backward);
b819a390
RS
2766 defsubr (&Sposix_search_forward);
2767 defsubr (&Sposix_search_backward);
ca1d1d23
JB
2768 defsubr (&Sreplace_match);
2769 defsubr (&Smatch_beginning);
2770 defsubr (&Smatch_end);
2771 defsubr (&Smatch_data);
3f1c005b 2772 defsubr (&Sset_match_data);
ca1d1d23
JB
2773 defsubr (&Sregexp_quote);
2774}