(add-log-current-defun) <lisp>:
[bpt/emacs.git] / src / search.c
CommitLineData
ca1d1d23 1/* String search routines for GNU Emacs.
31c8f881 2 Copyright (C) 1985, 86, 87, 93, 94, 97, 1998 Free Software Foundation, Inc.
ca1d1d23
JB
3
4This file is part of GNU Emacs.
5
6GNU Emacs is free software; you can redistribute it and/or modify
7it under the terms of the GNU General Public License as published by
7c938215 8the Free Software Foundation; either version 2, or (at your option)
ca1d1d23
JB
9any later version.
10
11GNU Emacs is distributed in the hope that it will be useful,
12but WITHOUT ANY WARRANTY; without even the implied warranty of
13MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14GNU General Public License for more details.
15
16You should have received a copy of the GNU General Public License
17along with GNU Emacs; see the file COPYING. If not, write to
3b7ad313
EN
18the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
19Boston, MA 02111-1307, USA. */
ca1d1d23
JB
20
21
18160b98 22#include <config.h>
dfcf069d
AS
23#ifdef STDC_HEADERS
24#include <stdlib.h>
25#endif
ca1d1d23
JB
26#include "lisp.h"
27#include "syntax.h"
5679531d 28#include "category.h"
ca1d1d23 29#include "buffer.h"
5679531d 30#include "charset.h"
9169c321 31#include "region-cache.h"
ca1d1d23 32#include "commands.h"
9ac0d9e0 33#include "blockinput.h"
bf1760bb 34#include "intervals.h"
4746118a 35
ca1d1d23
JB
36#include <sys/types.h>
37#include "regex.h"
38
67ce527d
KH
39#define min(a, b) ((a) < (b) ? (a) : (b))
40#define max(a, b) ((a) > (b) ? (a) : (b))
41
1d288aef 42#define REGEXP_CACHE_SIZE 20
ca1d1d23 43
487282dc
KH
44/* If the regexp is non-nil, then the buffer contains the compiled form
45 of that regexp, suitable for searching. */
1d288aef
RS
46struct regexp_cache
47{
487282dc
KH
48 struct regexp_cache *next;
49 Lisp_Object regexp;
50 struct re_pattern_buffer buf;
51 char fastmap[0400];
b819a390
RS
52 /* Nonzero means regexp was compiled to do full POSIX backtracking. */
53 char posix;
487282dc 54};
ca1d1d23 55
487282dc
KH
56/* The instances of that struct. */
57struct regexp_cache searchbufs[REGEXP_CACHE_SIZE];
ca1d1d23 58
487282dc
KH
59/* The head of the linked list; points to the most recently used buffer. */
60struct regexp_cache *searchbuf_head;
ca1d1d23 61
ca1d1d23 62
4746118a
JB
63/* Every call to re_match, etc., must pass &search_regs as the regs
64 argument unless you can show it is unnecessary (i.e., if re_match
65 is certainly going to be called again before region-around-match
66 can be called).
67
68 Since the registers are now dynamically allocated, we need to make
69 sure not to refer to the Nth register before checking that it has
1113d9db
JB
70 been allocated by checking search_regs.num_regs.
71
72 The regex code keeps track of whether it has allocated the search
487282dc
KH
73 buffer using bits in the re_pattern_buffer. This means that whenever
74 you compile a new pattern, it completely forgets whether it has
1113d9db
JB
75 allocated any registers, and will allocate new registers the next
76 time you call a searching or matching function. Therefore, we need
77 to call re_set_registers after compiling a new pattern or after
78 setting the match registers, so that the regex functions will be
79 able to free or re-allocate it properly. */
ca1d1d23
JB
80static struct re_registers search_regs;
81
daa37602
JB
82/* The buffer in which the last search was performed, or
83 Qt if the last search was done in a string;
84 Qnil if no searching has been done yet. */
85static Lisp_Object last_thing_searched;
ca1d1d23 86
8e6208c5 87/* error condition signaled when regexp compile_pattern fails */
ca1d1d23
JB
88
89Lisp_Object Qinvalid_regexp;
90
ca325161 91static void set_search_regs ();
044f81f1 92static void save_search_regs ();
facdc750
RS
93static int simple_search ();
94static int boyer_moore ();
b819a390
RS
95static int search_buffer ();
96
ca1d1d23
JB
97static void
98matcher_overflow ()
99{
100 error ("Stack overflow in regexp matcher");
101}
102
103#ifdef __STDC__
104#define CONST const
105#else
106#define CONST
107#endif
108
b819a390
RS
109/* Compile a regexp and signal a Lisp error if anything goes wrong.
110 PATTERN is the pattern to compile.
111 CP is the place to put the result.
facdc750 112 TRANSLATE is a translation table for ignoring case, or nil for none.
b819a390
RS
113 REGP is the structure that says where to store the "register"
114 values that will result from matching this pattern.
115 If it is 0, we should compile the pattern not to record any
116 subexpression bounds.
117 POSIX is nonzero if we want full backtracking (POSIX style)
5679531d
KH
118 for this pattern. 0 means backtrack only enough to get a valid match.
119 MULTIBYTE is nonzero if we want to handle multibyte characters in
120 PATTERN. 0 means all multibyte characters are recognized just as
121 sequences of binary data. */
ca1d1d23 122
487282dc 123static void
5679531d 124compile_pattern_1 (cp, pattern, translate, regp, posix, multibyte)
487282dc 125 struct regexp_cache *cp;
ca1d1d23 126 Lisp_Object pattern;
facdc750 127 Lisp_Object translate;
487282dc 128 struct re_registers *regp;
b819a390 129 int posix;
5679531d 130 int multibyte;
ca1d1d23 131{
7276d3d8 132 unsigned char *raw_pattern;
f8bd51c4 133 int raw_pattern_size;
d451e4db 134 char *val;
b819a390 135 reg_syntax_t old;
ca1d1d23 136
f8bd51c4
KH
137 /* MULTIBYTE says whether the text to be searched is multibyte.
138 We must convert PATTERN to match that, or we will not really
139 find things right. */
140
141 if (multibyte == STRING_MULTIBYTE (pattern))
142 {
7276d3d8 143 raw_pattern = (unsigned char *) XSTRING (pattern)->data;
fc932ac6 144 raw_pattern_size = STRING_BYTES (XSTRING (pattern));
f8bd51c4
KH
145 }
146 else if (multibyte)
147 {
148 raw_pattern_size = count_size_as_multibyte (XSTRING (pattern)->data,
149 XSTRING (pattern)->size);
7276d3d8 150 raw_pattern = (unsigned char *) alloca (raw_pattern_size + 1);
f8bd51c4
KH
151 copy_text (XSTRING (pattern)->data, raw_pattern,
152 XSTRING (pattern)->size, 0, 1);
153 }
154 else
155 {
156 /* Converting multibyte to single-byte.
157
158 ??? Perhaps this conversion should be done in a special way
159 by subtracting nonascii-insert-offset from each non-ASCII char,
160 so that only the multibyte chars which really correspond to
161 the chosen single-byte character set can possibly match. */
162 raw_pattern_size = XSTRING (pattern)->size;
7276d3d8 163 raw_pattern = (unsigned char *) alloca (raw_pattern_size + 1);
f8bd51c4 164 copy_text (XSTRING (pattern)->data, raw_pattern,
fc932ac6 165 STRING_BYTES (XSTRING (pattern)), 1, 0);
f8bd51c4
KH
166 }
167
487282dc 168 cp->regexp = Qnil;
59fab369 169 cp->buf.translate = (! NILP (translate) ? translate : make_number (0));
b819a390 170 cp->posix = posix;
5679531d 171 cp->buf.multibyte = multibyte;
9ac0d9e0 172 BLOCK_INPUT;
b819a390
RS
173 old = re_set_syntax (RE_SYNTAX_EMACS
174 | (posix ? 0 : RE_NO_POSIX_BACKTRACKING));
7276d3d8
RS
175 val = (char *) re_compile_pattern ((char *)raw_pattern,
176 raw_pattern_size, &cp->buf);
b819a390 177 re_set_syntax (old);
9ac0d9e0 178 UNBLOCK_INPUT;
ca1d1d23 179 if (val)
487282dc 180 Fsignal (Qinvalid_regexp, Fcons (build_string (val), Qnil));
1113d9db 181
487282dc 182 cp->regexp = Fcopy_sequence (pattern);
487282dc
KH
183}
184
185/* Compile a regexp if necessary, but first check to see if there's one in
b819a390
RS
186 the cache.
187 PATTERN is the pattern to compile.
facdc750 188 TRANSLATE is a translation table for ignoring case, or nil for none.
b819a390
RS
189 REGP is the structure that says where to store the "register"
190 values that will result from matching this pattern.
191 If it is 0, we should compile the pattern not to record any
192 subexpression bounds.
193 POSIX is nonzero if we want full backtracking (POSIX style)
194 for this pattern. 0 means backtrack only enough to get a valid match. */
487282dc
KH
195
196struct re_pattern_buffer *
0c8533c6 197compile_pattern (pattern, regp, translate, posix, multibyte)
487282dc
KH
198 Lisp_Object pattern;
199 struct re_registers *regp;
facdc750 200 Lisp_Object translate;
0c8533c6 201 int posix, multibyte;
487282dc
KH
202{
203 struct regexp_cache *cp, **cpp;
204
205 for (cpp = &searchbuf_head; ; cpp = &cp->next)
206 {
207 cp = *cpp;
1d288aef
RS
208 if (XSTRING (cp->regexp)->size == XSTRING (pattern)->size
209 && !NILP (Fstring_equal (cp->regexp, pattern))
59fab369 210 && EQ (cp->buf.translate, (! NILP (translate) ? translate : make_number (0)))
5679531d
KH
211 && cp->posix == posix
212 && cp->buf.multibyte == multibyte)
487282dc
KH
213 break;
214
215 /* If we're at the end of the cache, compile into the last cell. */
216 if (cp->next == 0)
217 {
5679531d 218 compile_pattern_1 (cp, pattern, translate, regp, posix, multibyte);
487282dc
KH
219 break;
220 }
221 }
222
223 /* When we get here, cp (aka *cpp) contains the compiled pattern,
224 either because we found it in the cache or because we just compiled it.
225 Move it to the front of the queue to mark it as most recently used. */
226 *cpp = cp->next;
227 cp->next = searchbuf_head;
228 searchbuf_head = cp;
1113d9db 229
6639708c
RS
230 /* Advise the searching functions about the space we have allocated
231 for register data. */
232 if (regp)
233 re_set_registers (&cp->buf, regp, regp->num_regs, regp->start, regp->end);
234
487282dc 235 return &cp->buf;
ca1d1d23
JB
236}
237
238/* Error condition used for failing searches */
239Lisp_Object Qsearch_failed;
240
241Lisp_Object
242signal_failure (arg)
243 Lisp_Object arg;
244{
245 Fsignal (Qsearch_failed, Fcons (arg, Qnil));
246 return Qnil;
247}
248\f
b819a390
RS
249static Lisp_Object
250looking_at_1 (string, posix)
ca1d1d23 251 Lisp_Object string;
b819a390 252 int posix;
ca1d1d23
JB
253{
254 Lisp_Object val;
255 unsigned char *p1, *p2;
256 int s1, s2;
257 register int i;
487282dc 258 struct re_pattern_buffer *bufp;
ca1d1d23 259
7074fde6
FP
260 if (running_asynch_code)
261 save_search_regs ();
262
ca1d1d23 263 CHECK_STRING (string, 0);
487282dc
KH
264 bufp = compile_pattern (string, &search_regs,
265 (!NILP (current_buffer->case_fold_search)
facdc750 266 ? DOWNCASE_TABLE : Qnil),
0c8533c6
RS
267 posix,
268 !NILP (current_buffer->enable_multibyte_characters));
ca1d1d23
JB
269
270 immediate_quit = 1;
271 QUIT; /* Do a pending quit right away, to avoid paradoxical behavior */
272
273 /* Get pointers and sizes of the two strings
274 that make up the visible portion of the buffer. */
275
276 p1 = BEGV_ADDR;
fa8ed3e0 277 s1 = GPT_BYTE - BEGV_BYTE;
ca1d1d23 278 p2 = GAP_END_ADDR;
fa8ed3e0 279 s2 = ZV_BYTE - GPT_BYTE;
ca1d1d23
JB
280 if (s1 < 0)
281 {
282 p2 = p1;
fa8ed3e0 283 s2 = ZV_BYTE - BEGV_BYTE;
ca1d1d23
JB
284 s1 = 0;
285 }
286 if (s2 < 0)
287 {
fa8ed3e0 288 s1 = ZV_BYTE - BEGV_BYTE;
ca1d1d23
JB
289 s2 = 0;
290 }
8bb43c28
RS
291
292 re_match_object = Qnil;
ca1d1d23 293
487282dc 294 i = re_match_2 (bufp, (char *) p1, s1, (char *) p2, s2,
fa8ed3e0
RS
295 PT_BYTE - BEGV_BYTE, &search_regs,
296 ZV_BYTE - BEGV_BYTE);
ca1d1d23
JB
297 if (i == -2)
298 matcher_overflow ();
299
300 val = (0 <= i ? Qt : Qnil);
fa8ed3e0
RS
301 if (i >= 0)
302 for (i = 0; i < search_regs.num_regs; i++)
303 if (search_regs.start[i] >= 0)
304 {
305 search_regs.start[i]
306 = BYTE_TO_CHAR (search_regs.start[i] + BEGV_BYTE);
307 search_regs.end[i]
308 = BYTE_TO_CHAR (search_regs.end[i] + BEGV_BYTE);
309 }
a3668d92 310 XSETBUFFER (last_thing_searched, current_buffer);
ca1d1d23
JB
311 immediate_quit = 0;
312 return val;
313}
314
b819a390 315DEFUN ("looking-at", Flooking_at, Slooking_at, 1, 1, 0,
94f94972 316 "Return t if text after point matches regular expression REGEXP.\n\
b819a390
RS
317This function modifies the match data that `match-beginning',\n\
318`match-end' and `match-data' access; save and restore the match\n\
319data if you want to preserve them.")
94f94972
RS
320 (regexp)
321 Lisp_Object regexp;
b819a390 322{
94f94972 323 return looking_at_1 (regexp, 0);
b819a390
RS
324}
325
326DEFUN ("posix-looking-at", Fposix_looking_at, Sposix_looking_at, 1, 1, 0,
94f94972 327 "Return t if text after point matches regular expression REGEXP.\n\
b819a390
RS
328Find the longest match, in accord with Posix regular expression rules.\n\
329This function modifies the match data that `match-beginning',\n\
330`match-end' and `match-data' access; save and restore the match\n\
331data if you want to preserve them.")
94f94972
RS
332 (regexp)
333 Lisp_Object regexp;
b819a390 334{
94f94972 335 return looking_at_1 (regexp, 1);
b819a390
RS
336}
337\f
338static Lisp_Object
339string_match_1 (regexp, string, start, posix)
ca1d1d23 340 Lisp_Object regexp, string, start;
b819a390 341 int posix;
ca1d1d23
JB
342{
343 int val;
487282dc 344 struct re_pattern_buffer *bufp;
0c8533c6
RS
345 int pos, pos_byte;
346 int i;
ca1d1d23 347
7074fde6
FP
348 if (running_asynch_code)
349 save_search_regs ();
350
ca1d1d23
JB
351 CHECK_STRING (regexp, 0);
352 CHECK_STRING (string, 1);
353
354 if (NILP (start))
0c8533c6 355 pos = 0, pos_byte = 0;
ca1d1d23
JB
356 else
357 {
358 int len = XSTRING (string)->size;
359
360 CHECK_NUMBER (start, 2);
0c8533c6
RS
361 pos = XINT (start);
362 if (pos < 0 && -pos <= len)
363 pos = len + pos;
364 else if (0 > pos || pos > len)
ca1d1d23 365 args_out_of_range (string, start);
0c8533c6 366 pos_byte = string_char_to_byte (string, pos);
ca1d1d23
JB
367 }
368
487282dc
KH
369 bufp = compile_pattern (regexp, &search_regs,
370 (!NILP (current_buffer->case_fold_search)
facdc750 371 ? DOWNCASE_TABLE : Qnil),
0c8533c6
RS
372 posix,
373 STRING_MULTIBYTE (string));
ca1d1d23 374 immediate_quit = 1;
8bb43c28
RS
375 re_match_object = string;
376
487282dc 377 val = re_search (bufp, (char *) XSTRING (string)->data,
fc932ac6
RS
378 STRING_BYTES (XSTRING (string)), pos_byte,
379 STRING_BYTES (XSTRING (string)) - pos_byte,
ca1d1d23
JB
380 &search_regs);
381 immediate_quit = 0;
daa37602 382 last_thing_searched = Qt;
ca1d1d23
JB
383 if (val == -2)
384 matcher_overflow ();
385 if (val < 0) return Qnil;
0c8533c6
RS
386
387 for (i = 0; i < search_regs.num_regs; i++)
388 if (search_regs.start[i] >= 0)
389 {
390 search_regs.start[i]
391 = string_byte_to_char (string, search_regs.start[i]);
392 search_regs.end[i]
393 = string_byte_to_char (string, search_regs.end[i]);
394 }
395
396 return make_number (string_byte_to_char (string, val));
ca1d1d23 397}
e59a8453 398
b819a390
RS
399DEFUN ("string-match", Fstring_match, Sstring_match, 2, 3, 0,
400 "Return index of start of first match for REGEXP in STRING, or nil.\n\
401If third arg START is non-nil, start search at that index in STRING.\n\
402For index of first char beyond the match, do (match-end 0).\n\
403`match-end' and `match-beginning' also give indices of substrings\n\
404matched by parenthesis constructs in the pattern.")
405 (regexp, string, start)
406 Lisp_Object regexp, string, start;
407{
408 return string_match_1 (regexp, string, start, 0);
409}
410
411DEFUN ("posix-string-match", Fposix_string_match, Sposix_string_match, 2, 3, 0,
412 "Return index of start of first match for REGEXP in STRING, or nil.\n\
413Find the longest match, in accord with Posix regular expression rules.\n\
414If third arg START is non-nil, start search at that index in STRING.\n\
415For index of first char beyond the match, do (match-end 0).\n\
416`match-end' and `match-beginning' also give indices of substrings\n\
417matched by parenthesis constructs in the pattern.")
418 (regexp, string, start)
419 Lisp_Object regexp, string, start;
420{
421 return string_match_1 (regexp, string, start, 1);
422}
423
e59a8453
RS
424/* Match REGEXP against STRING, searching all of STRING,
425 and return the index of the match, or negative on failure.
426 This does not clobber the match data. */
427
428int
429fast_string_match (regexp, string)
430 Lisp_Object regexp, string;
431{
432 int val;
487282dc 433 struct re_pattern_buffer *bufp;
e59a8453 434
facdc750
RS
435 bufp = compile_pattern (regexp, 0, Qnil,
436 0, STRING_MULTIBYTE (string));
e59a8453 437 immediate_quit = 1;
8bb43c28
RS
438 re_match_object = string;
439
487282dc 440 val = re_search (bufp, (char *) XSTRING (string)->data,
fc932ac6
RS
441 STRING_BYTES (XSTRING (string)), 0,
442 STRING_BYTES (XSTRING (string)), 0);
e59a8453
RS
443 immediate_quit = 0;
444 return val;
445}
5679531d
KH
446
447/* Match REGEXP against STRING, searching all of STRING ignoring case,
448 and return the index of the match, or negative on failure.
0c8533c6
RS
449 This does not clobber the match data.
450 We assume that STRING contains single-byte characters. */
5679531d
KH
451
452extern Lisp_Object Vascii_downcase_table;
453
454int
b4577c63 455fast_c_string_match_ignore_case (regexp, string)
5679531d
KH
456 Lisp_Object regexp;
457 char *string;
458{
459 int val;
460 struct re_pattern_buffer *bufp;
461 int len = strlen (string);
462
0c8533c6 463 regexp = string_make_unibyte (regexp);
b4577c63 464 re_match_object = Qt;
5679531d 465 bufp = compile_pattern (regexp, 0,
facdc750 466 Vascii_downcase_table, 0,
f8bd51c4 467 0);
5679531d
KH
468 immediate_quit = 1;
469 val = re_search (bufp, string, len, 0, len, 0);
470 immediate_quit = 0;
471 return val;
472}
ca1d1d23 473\f
9169c321
JB
474/* The newline cache: remembering which sections of text have no newlines. */
475
476/* If the user has requested newline caching, make sure it's on.
477 Otherwise, make sure it's off.
478 This is our cheezy way of associating an action with the change of
479 state of a buffer-local variable. */
480static void
481newline_cache_on_off (buf)
482 struct buffer *buf;
483{
484 if (NILP (buf->cache_long_line_scans))
485 {
486 /* It should be off. */
487 if (buf->newline_cache)
488 {
489 free_region_cache (buf->newline_cache);
490 buf->newline_cache = 0;
491 }
492 }
493 else
494 {
495 /* It should be on. */
496 if (buf->newline_cache == 0)
497 buf->newline_cache = new_region_cache ();
498 }
499}
500
501\f
502/* Search for COUNT instances of the character TARGET between START and END.
503
504 If COUNT is positive, search forwards; END must be >= START.
505 If COUNT is negative, search backwards for the -COUNTth instance;
506 END must be <= START.
507 If COUNT is zero, do anything you please; run rogue, for all I care.
508
509 If END is zero, use BEGV or ZV instead, as appropriate for the
510 direction indicated by COUNT.
ffd56f97
JB
511
512 If we find COUNT instances, set *SHORTAGE to zero, and return the
5bfe95c9
RS
513 position after the COUNTth match. Note that for reverse motion
514 this is not the same as the usual convention for Emacs motion commands.
ffd56f97 515
9169c321
JB
516 If we don't find COUNT instances before reaching END, set *SHORTAGE
517 to the number of TARGETs left unfound, and return END.
ffd56f97 518
087a5f81
RS
519 If ALLOW_QUIT is non-zero, set immediate_quit. That's good to do
520 except when inside redisplay. */
521
dfcf069d 522int
9169c321
JB
523scan_buffer (target, start, end, count, shortage, allow_quit)
524 register int target;
525 int start, end;
526 int count;
527 int *shortage;
087a5f81 528 int allow_quit;
ca1d1d23 529{
9169c321
JB
530 struct region_cache *newline_cache;
531 int direction;
ffd56f97 532
9169c321
JB
533 if (count > 0)
534 {
535 direction = 1;
536 if (! end) end = ZV;
537 }
538 else
539 {
540 direction = -1;
541 if (! end) end = BEGV;
542 }
ffd56f97 543
9169c321
JB
544 newline_cache_on_off (current_buffer);
545 newline_cache = current_buffer->newline_cache;
ca1d1d23
JB
546
547 if (shortage != 0)
548 *shortage = 0;
549
087a5f81 550 immediate_quit = allow_quit;
ca1d1d23 551
ffd56f97 552 if (count > 0)
9169c321 553 while (start != end)
ca1d1d23 554 {
9169c321
JB
555 /* Our innermost scanning loop is very simple; it doesn't know
556 about gaps, buffer ends, or the newline cache. ceiling is
557 the position of the last character before the next such
558 obstacle --- the last character the dumb search loop should
559 examine. */
fa8ed3e0
RS
560 int ceiling_byte = CHAR_TO_BYTE (end) - 1;
561 int start_byte = CHAR_TO_BYTE (start);
67ce527d 562 int tem;
9169c321
JB
563
564 /* If we're looking for a newline, consult the newline cache
565 to see where we can avoid some scanning. */
566 if (target == '\n' && newline_cache)
567 {
568 int next_change;
569 immediate_quit = 0;
570 while (region_cache_forward
fa8ed3e0
RS
571 (current_buffer, newline_cache, start_byte, &next_change))
572 start_byte = next_change;
cbe0db0d 573 immediate_quit = allow_quit;
9169c321 574
fa8ed3e0
RS
575 /* START should never be after END. */
576 if (start_byte > ceiling_byte)
577 start_byte = ceiling_byte;
9169c321
JB
578
579 /* Now the text after start is an unknown region, and
580 next_change is the position of the next known region. */
fa8ed3e0 581 ceiling_byte = min (next_change - 1, ceiling_byte);
9169c321
JB
582 }
583
584 /* The dumb loop can only scan text stored in contiguous
585 bytes. BUFFER_CEILING_OF returns the last character
586 position that is contiguous, so the ceiling is the
587 position after that. */
67ce527d
KH
588 tem = BUFFER_CEILING_OF (start_byte);
589 ceiling_byte = min (tem, ceiling_byte);
9169c321
JB
590
591 {
592 /* The termination address of the dumb loop. */
fa8ed3e0
RS
593 register unsigned char *ceiling_addr
594 = BYTE_POS_ADDR (ceiling_byte) + 1;
595 register unsigned char *cursor
596 = BYTE_POS_ADDR (start_byte);
9169c321
JB
597 unsigned char *base = cursor;
598
599 while (cursor < ceiling_addr)
600 {
601 unsigned char *scan_start = cursor;
602
603 /* The dumb loop. */
604 while (*cursor != target && ++cursor < ceiling_addr)
605 ;
606
607 /* If we're looking for newlines, cache the fact that
608 the region from start to cursor is free of them. */
609 if (target == '\n' && newline_cache)
610 know_region_cache (current_buffer, newline_cache,
fa8ed3e0
RS
611 start_byte + scan_start - base,
612 start_byte + cursor - base);
9169c321
JB
613
614 /* Did we find the target character? */
615 if (cursor < ceiling_addr)
616 {
617 if (--count == 0)
618 {
619 immediate_quit = 0;
fa8ed3e0 620 return BYTE_TO_CHAR (start_byte + cursor - base + 1);
9169c321
JB
621 }
622 cursor++;
623 }
624 }
625
fa8ed3e0 626 start = BYTE_TO_CHAR (start_byte + cursor - base);
9169c321 627 }
ca1d1d23
JB
628 }
629 else
9169c321
JB
630 while (start > end)
631 {
632 /* The last character to check before the next obstacle. */
fa8ed3e0
RS
633 int ceiling_byte = CHAR_TO_BYTE (end);
634 int start_byte = CHAR_TO_BYTE (start);
67ce527d 635 int tem;
9169c321
JB
636
637 /* Consult the newline cache, if appropriate. */
638 if (target == '\n' && newline_cache)
639 {
640 int next_change;
641 immediate_quit = 0;
642 while (region_cache_backward
fa8ed3e0
RS
643 (current_buffer, newline_cache, start_byte, &next_change))
644 start_byte = next_change;
cbe0db0d 645 immediate_quit = allow_quit;
9169c321
JB
646
647 /* Start should never be at or before end. */
fa8ed3e0
RS
648 if (start_byte <= ceiling_byte)
649 start_byte = ceiling_byte + 1;
9169c321
JB
650
651 /* Now the text before start is an unknown region, and
652 next_change is the position of the next known region. */
fa8ed3e0 653 ceiling_byte = max (next_change, ceiling_byte);
9169c321
JB
654 }
655
656 /* Stop scanning before the gap. */
67ce527d
KH
657 tem = BUFFER_FLOOR_OF (start_byte - 1);
658 ceiling_byte = max (tem, ceiling_byte);
9169c321
JB
659
660 {
661 /* The termination address of the dumb loop. */
fa8ed3e0
RS
662 register unsigned char *ceiling_addr = BYTE_POS_ADDR (ceiling_byte);
663 register unsigned char *cursor = BYTE_POS_ADDR (start_byte - 1);
9169c321
JB
664 unsigned char *base = cursor;
665
666 while (cursor >= ceiling_addr)
667 {
668 unsigned char *scan_start = cursor;
669
670 while (*cursor != target && --cursor >= ceiling_addr)
671 ;
672
673 /* If we're looking for newlines, cache the fact that
674 the region from after the cursor to start is free of them. */
675 if (target == '\n' && newline_cache)
676 know_region_cache (current_buffer, newline_cache,
fa8ed3e0
RS
677 start_byte + cursor - base,
678 start_byte + scan_start - base);
9169c321
JB
679
680 /* Did we find the target character? */
681 if (cursor >= ceiling_addr)
682 {
683 if (++count >= 0)
684 {
685 immediate_quit = 0;
fa8ed3e0 686 return BYTE_TO_CHAR (start_byte + cursor - base);
9169c321
JB
687 }
688 cursor--;
689 }
690 }
691
fa8ed3e0 692 start = BYTE_TO_CHAR (start_byte + cursor - base);
9169c321
JB
693 }
694 }
695
ca1d1d23
JB
696 immediate_quit = 0;
697 if (shortage != 0)
ffd56f97 698 *shortage = count * direction;
9169c321 699 return start;
ca1d1d23 700}
fa8ed3e0
RS
701\f
702/* Search for COUNT instances of a line boundary, which means either a
703 newline or (if selective display enabled) a carriage return.
704 Start at START. If COUNT is negative, search backwards.
705
706 We report the resulting position by calling TEMP_SET_PT_BOTH.
707
708 If we find COUNT instances. we position after (always after,
709 even if scanning backwards) the COUNTth match, and return 0.
710
711 If we don't find COUNT instances before reaching the end of the
712 buffer (or the beginning, if scanning backwards), we return
713 the number of line boundaries left unfound, and position at
714 the limit we bumped up against.
715
716 If ALLOW_QUIT is non-zero, set immediate_quit. That's good to do
d5d57b92 717 except in special cases. */
ca1d1d23 718
63fa018d 719int
fa8ed3e0
RS
720scan_newline (start, start_byte, limit, limit_byte, count, allow_quit)
721 int start, start_byte;
722 int limit, limit_byte;
723 register int count;
724 int allow_quit;
63fa018d 725{
fa8ed3e0
RS
726 int direction = ((count > 0) ? 1 : -1);
727
728 register unsigned char *cursor;
729 unsigned char *base;
730
731 register int ceiling;
732 register unsigned char *ceiling_addr;
733
d5d57b92
RS
734 int old_immediate_quit = immediate_quit;
735
fa8ed3e0
RS
736 /* If we are not in selective display mode,
737 check only for newlines. */
738 int selective_display = (!NILP (current_buffer->selective_display)
739 && !INTEGERP (current_buffer->selective_display));
740
741 /* The code that follows is like scan_buffer
742 but checks for either newline or carriage return. */
743
d5d57b92
RS
744 if (allow_quit)
745 immediate_quit++;
fa8ed3e0
RS
746
747 start_byte = CHAR_TO_BYTE (start);
748
749 if (count > 0)
750 {
751 while (start_byte < limit_byte)
752 {
753 ceiling = BUFFER_CEILING_OF (start_byte);
754 ceiling = min (limit_byte - 1, ceiling);
755 ceiling_addr = BYTE_POS_ADDR (ceiling) + 1;
756 base = (cursor = BYTE_POS_ADDR (start_byte));
757 while (1)
758 {
759 while (*cursor != '\n' && ++cursor != ceiling_addr)
760 ;
761
762 if (cursor != ceiling_addr)
763 {
764 if (--count == 0)
765 {
d5d57b92 766 immediate_quit = old_immediate_quit;
fa8ed3e0
RS
767 start_byte = start_byte + cursor - base + 1;
768 start = BYTE_TO_CHAR (start_byte);
769 TEMP_SET_PT_BOTH (start, start_byte);
770 return 0;
771 }
772 else
773 if (++cursor == ceiling_addr)
774 break;
775 }
776 else
777 break;
778 }
779 start_byte += cursor - base;
780 }
781 }
782 else
783 {
fa8ed3e0
RS
784 while (start_byte > limit_byte)
785 {
786 ceiling = BUFFER_FLOOR_OF (start_byte - 1);
787 ceiling = max (limit_byte, ceiling);
788 ceiling_addr = BYTE_POS_ADDR (ceiling) - 1;
789 base = (cursor = BYTE_POS_ADDR (start_byte - 1) + 1);
790 while (1)
791 {
792 while (--cursor != ceiling_addr && *cursor != '\n')
793 ;
794
795 if (cursor != ceiling_addr)
796 {
797 if (++count == 0)
798 {
d5d57b92 799 immediate_quit = old_immediate_quit;
fa8ed3e0
RS
800 /* Return the position AFTER the match we found. */
801 start_byte = start_byte + cursor - base + 1;
802 start = BYTE_TO_CHAR (start_byte);
803 TEMP_SET_PT_BOTH (start, start_byte);
804 return 0;
805 }
806 }
807 else
808 break;
809 }
810 /* Here we add 1 to compensate for the last decrement
811 of CURSOR, which took it past the valid range. */
812 start_byte += cursor - base + 1;
813 }
814 }
815
816 TEMP_SET_PT_BOTH (limit, limit_byte);
d5d57b92 817 immediate_quit = old_immediate_quit;
fa8ed3e0
RS
818
819 return count * direction;
63fa018d
RS
820}
821
ca1d1d23 822int
fa8ed3e0 823find_next_newline_no_quit (from, cnt)
ca1d1d23
JB
824 register int from, cnt;
825{
fa8ed3e0 826 return scan_buffer ('\n', from, 0, cnt, (int *) 0, 0);
9169c321
JB
827}
828
9169c321
JB
829/* Like find_next_newline, but returns position before the newline,
830 not after, and only search up to TO. This isn't just
831 find_next_newline (...)-1, because you might hit TO. */
fa8ed3e0 832
9169c321
JB
833int
834find_before_next_newline (from, to, cnt)
cbe0db0d 835 int from, to, cnt;
9169c321
JB
836{
837 int shortage;
838 int pos = scan_buffer ('\n', from, to, cnt, &shortage, 1);
839
840 if (shortage == 0)
841 pos--;
842
843 return pos;
ca1d1d23
JB
844}
845\f
ca1d1d23
JB
846/* Subroutines of Lisp buffer search functions. */
847
848static Lisp_Object
b819a390 849search_command (string, bound, noerror, count, direction, RE, posix)
ca1d1d23
JB
850 Lisp_Object string, bound, noerror, count;
851 int direction;
852 int RE;
b819a390 853 int posix;
ca1d1d23
JB
854{
855 register int np;
9f43ad85 856 int lim, lim_byte;
ca1d1d23
JB
857 int n = direction;
858
859 if (!NILP (count))
860 {
861 CHECK_NUMBER (count, 3);
862 n *= XINT (count);
863 }
864
865 CHECK_STRING (string, 0);
866 if (NILP (bound))
9f43ad85
RS
867 {
868 if (n > 0)
869 lim = ZV, lim_byte = ZV_BYTE;
870 else
871 lim = BEGV, lim_byte = BEGV_BYTE;
872 }
ca1d1d23
JB
873 else
874 {
875 CHECK_NUMBER_COERCE_MARKER (bound, 1);
876 lim = XINT (bound);
6ec8bbd2 877 if (n > 0 ? lim < PT : lim > PT)
ca1d1d23
JB
878 error ("Invalid search bound (wrong side of point)");
879 if (lim > ZV)
9f43ad85 880 lim = ZV, lim_byte = ZV_BYTE;
588d2fd5 881 else if (lim < BEGV)
9f43ad85 882 lim = BEGV, lim_byte = BEGV_BYTE;
588d2fd5
KH
883 else
884 lim_byte = CHAR_TO_BYTE (lim);
ca1d1d23
JB
885 }
886
9f43ad85 887 np = search_buffer (string, PT, PT_BYTE, lim, lim_byte, n, RE,
ca1d1d23 888 (!NILP (current_buffer->case_fold_search)
facdc750 889 ? current_buffer->case_canon_table
3135e9fd 890 : Qnil),
ca1d1d23 891 (!NILP (current_buffer->case_fold_search)
facdc750 892 ? current_buffer->case_eqv_table
3135e9fd 893 : Qnil),
b819a390 894 posix);
ca1d1d23
JB
895 if (np <= 0)
896 {
897 if (NILP (noerror))
898 return signal_failure (string);
899 if (!EQ (noerror, Qt))
900 {
901 if (lim < BEGV || lim > ZV)
902 abort ();
9f43ad85 903 SET_PT_BOTH (lim, lim_byte);
a5f217b8
RS
904 return Qnil;
905#if 0 /* This would be clean, but maybe programs depend on
906 a value of nil here. */
481399bf 907 np = lim;
a5f217b8 908#endif
ca1d1d23 909 }
481399bf
RS
910 else
911 return Qnil;
ca1d1d23
JB
912 }
913
914 if (np < BEGV || np > ZV)
915 abort ();
916
917 SET_PT (np);
918
919 return make_number (np);
920}
921\f
fa8ed3e0
RS
922/* Return 1 if REGEXP it matches just one constant string. */
923
b6d6a51c
KH
924static int
925trivial_regexp_p (regexp)
926 Lisp_Object regexp;
927{
fc932ac6 928 int len = STRING_BYTES (XSTRING (regexp));
b6d6a51c
KH
929 unsigned char *s = XSTRING (regexp)->data;
930 unsigned char c;
931 while (--len >= 0)
932 {
933 switch (*s++)
934 {
935 case '.': case '*': case '+': case '?': case '[': case '^': case '$':
936 return 0;
937 case '\\':
938 if (--len < 0)
939 return 0;
940 switch (*s++)
941 {
942 case '|': case '(': case ')': case '`': case '\'': case 'b':
943 case 'B': case '<': case '>': case 'w': case 'W': case 's':
866f60fd 944 case 'S': case '=':
5679531d 945 case 'c': case 'C': /* for categoryspec and notcategoryspec */
866f60fd 946 case '1': case '2': case '3': case '4': case '5':
b6d6a51c
KH
947 case '6': case '7': case '8': case '9':
948 return 0;
949 }
950 }
951 }
952 return 1;
953}
954
ca325161 955/* Search for the n'th occurrence of STRING in the current buffer,
ca1d1d23 956 starting at position POS and stopping at position LIM,
b819a390 957 treating STRING as a literal string if RE is false or as
ca1d1d23
JB
958 a regular expression if RE is true.
959
960 If N is positive, searching is forward and LIM must be greater than POS.
961 If N is negative, searching is backward and LIM must be less than POS.
962
facdc750 963 Returns -x if x occurrences remain to be found (x > 0),
ca1d1d23 964 or else the position at the beginning of the Nth occurrence
b819a390
RS
965 (if searching backward) or the end (if searching forward).
966
967 POSIX is nonzero if we want full backtracking (POSIX style)
968 for this pattern. 0 means backtrack only enough to get a valid match. */
ca1d1d23 969
aff2ce94
RS
970#define TRANSLATE(out, trt, d) \
971do \
972 { \
973 if (! NILP (trt)) \
974 { \
975 Lisp_Object temp; \
976 temp = Faref (trt, make_number (d)); \
977 if (INTEGERP (temp)) \
978 out = XINT (temp); \
979 else \
980 out = d; \
981 } \
982 else \
983 out = d; \
984 } \
985while (0)
facdc750 986
b819a390 987static int
9f43ad85
RS
988search_buffer (string, pos, pos_byte, lim, lim_byte, n,
989 RE, trt, inverse_trt, posix)
ca1d1d23
JB
990 Lisp_Object string;
991 int pos;
9f43ad85 992 int pos_byte;
ca1d1d23 993 int lim;
9f43ad85 994 int lim_byte;
ca1d1d23
JB
995 int n;
996 int RE;
facdc750
RS
997 Lisp_Object trt;
998 Lisp_Object inverse_trt;
b819a390 999 int posix;
ca1d1d23
JB
1000{
1001 int len = XSTRING (string)->size;
fc932ac6 1002 int len_byte = STRING_BYTES (XSTRING (string));
facdc750 1003 register int i;
ca1d1d23 1004
7074fde6
FP
1005 if (running_asynch_code)
1006 save_search_regs ();
1007
ca1d1d23 1008 /* Null string is found at starting position. */
3f57a499 1009 if (len == 0)
ca325161
RS
1010 {
1011 set_search_regs (pos, 0);
1012 return pos;
1013 }
3f57a499
RS
1014
1015 /* Searching 0 times means don't move. */
1016 if (n == 0)
ca1d1d23
JB
1017 return pos;
1018
b6d6a51c 1019 if (RE && !trivial_regexp_p (string))
ca1d1d23 1020 {
facdc750
RS
1021 unsigned char *p1, *p2;
1022 int s1, s2;
487282dc
KH
1023 struct re_pattern_buffer *bufp;
1024
0c8533c6
RS
1025 bufp = compile_pattern (string, &search_regs, trt, posix,
1026 !NILP (current_buffer->enable_multibyte_characters));
ca1d1d23 1027
ca1d1d23
JB
1028 immediate_quit = 1; /* Quit immediately if user types ^G,
1029 because letting this function finish
1030 can take too long. */
1031 QUIT; /* Do a pending quit right away,
1032 to avoid paradoxical behavior */
1033 /* Get pointers and sizes of the two strings
1034 that make up the visible portion of the buffer. */
1035
1036 p1 = BEGV_ADDR;
fa8ed3e0 1037 s1 = GPT_BYTE - BEGV_BYTE;
ca1d1d23 1038 p2 = GAP_END_ADDR;
fa8ed3e0 1039 s2 = ZV_BYTE - GPT_BYTE;
ca1d1d23
JB
1040 if (s1 < 0)
1041 {
1042 p2 = p1;
fa8ed3e0 1043 s2 = ZV_BYTE - BEGV_BYTE;
ca1d1d23
JB
1044 s1 = 0;
1045 }
1046 if (s2 < 0)
1047 {
fa8ed3e0 1048 s1 = ZV_BYTE - BEGV_BYTE;
ca1d1d23
JB
1049 s2 = 0;
1050 }
8bb43c28
RS
1051 re_match_object = Qnil;
1052
ca1d1d23
JB
1053 while (n < 0)
1054 {
42db823b 1055 int val;
487282dc 1056 val = re_search_2 (bufp, (char *) p1, s1, (char *) p2, s2,
4996330b
KH
1057 pos_byte - BEGV_BYTE, lim_byte - pos_byte,
1058 &search_regs,
42db823b 1059 /* Don't allow match past current point */
4996330b 1060 pos_byte - BEGV_BYTE);
ca1d1d23 1061 if (val == -2)
b6d6a51c
KH
1062 {
1063 matcher_overflow ();
1064 }
ca1d1d23
JB
1065 if (val >= 0)
1066 {
26aff150 1067 pos_byte = search_regs.start[0] + BEGV_BYTE;
4746118a 1068 for (i = 0; i < search_regs.num_regs; i++)
ca1d1d23
JB
1069 if (search_regs.start[i] >= 0)
1070 {
fa8ed3e0
RS
1071 search_regs.start[i]
1072 = BYTE_TO_CHAR (search_regs.start[i] + BEGV_BYTE);
1073 search_regs.end[i]
1074 = BYTE_TO_CHAR (search_regs.end[i] + BEGV_BYTE);
ca1d1d23 1075 }
a3668d92 1076 XSETBUFFER (last_thing_searched, current_buffer);
ca1d1d23
JB
1077 /* Set pos to the new position. */
1078 pos = search_regs.start[0];
1079 }
1080 else
1081 {
1082 immediate_quit = 0;
1083 return (n);
1084 }
1085 n++;
1086 }
1087 while (n > 0)
1088 {
42db823b 1089 int val;
487282dc 1090 val = re_search_2 (bufp, (char *) p1, s1, (char *) p2, s2,
4996330b
KH
1091 pos_byte - BEGV_BYTE, lim_byte - pos_byte,
1092 &search_regs,
1093 lim_byte - BEGV_BYTE);
ca1d1d23 1094 if (val == -2)
b6d6a51c
KH
1095 {
1096 matcher_overflow ();
1097 }
ca1d1d23
JB
1098 if (val >= 0)
1099 {
26aff150 1100 pos_byte = search_regs.end[0] + BEGV_BYTE;
4746118a 1101 for (i = 0; i < search_regs.num_regs; i++)
ca1d1d23
JB
1102 if (search_regs.start[i] >= 0)
1103 {
fa8ed3e0
RS
1104 search_regs.start[i]
1105 = BYTE_TO_CHAR (search_regs.start[i] + BEGV_BYTE);
1106 search_regs.end[i]
1107 = BYTE_TO_CHAR (search_regs.end[i] + BEGV_BYTE);
ca1d1d23 1108 }
a3668d92 1109 XSETBUFFER (last_thing_searched, current_buffer);
ca1d1d23
JB
1110 pos = search_regs.end[0];
1111 }
1112 else
1113 {
1114 immediate_quit = 0;
1115 return (0 - n);
1116 }
1117 n--;
1118 }
1119 immediate_quit = 0;
1120 return (pos);
1121 }
1122 else /* non-RE case */
1123 {
facdc750
RS
1124 unsigned char *raw_pattern, *pat;
1125 int raw_pattern_size;
1126 int raw_pattern_size_byte;
1127 unsigned char *patbuf;
1128 int multibyte = !NILP (current_buffer->enable_multibyte_characters);
1129 unsigned char *base_pat = XSTRING (string)->data;
1130 int charset_base = -1;
1131 int simple = 1;
1132
1133 /* MULTIBYTE says whether the text to be searched is multibyte.
1134 We must convert PATTERN to match that, or we will not really
1135 find things right. */
1136
1137 if (multibyte == STRING_MULTIBYTE (string))
1138 {
7276d3d8 1139 raw_pattern = (unsigned char *) XSTRING (string)->data;
facdc750 1140 raw_pattern_size = XSTRING (string)->size;
fc932ac6 1141 raw_pattern_size_byte = STRING_BYTES (XSTRING (string));
facdc750
RS
1142 }
1143 else if (multibyte)
1144 {
1145 raw_pattern_size = XSTRING (string)->size;
1146 raw_pattern_size_byte
1147 = count_size_as_multibyte (XSTRING (string)->data,
1148 raw_pattern_size);
7276d3d8 1149 raw_pattern = (unsigned char *) alloca (raw_pattern_size_byte + 1);
facdc750
RS
1150 copy_text (XSTRING (string)->data, raw_pattern,
1151 XSTRING (string)->size, 0, 1);
1152 }
1153 else
1154 {
1155 /* Converting multibyte to single-byte.
1156
1157 ??? Perhaps this conversion should be done in a special way
1158 by subtracting nonascii-insert-offset from each non-ASCII char,
1159 so that only the multibyte chars which really correspond to
1160 the chosen single-byte character set can possibly match. */
1161 raw_pattern_size = XSTRING (string)->size;
1162 raw_pattern_size_byte = XSTRING (string)->size;
7276d3d8 1163 raw_pattern = (unsigned char *) alloca (raw_pattern_size + 1);
facdc750 1164 copy_text (XSTRING (string)->data, raw_pattern,
fc932ac6 1165 STRING_BYTES (XSTRING (string)), 1, 0);
facdc750
RS
1166 }
1167
1168 /* Copy and optionally translate the pattern. */
1169 len = raw_pattern_size;
1170 len_byte = raw_pattern_size_byte;
1171 patbuf = (unsigned char *) alloca (len_byte);
1172 pat = patbuf;
1173 base_pat = raw_pattern;
1174 if (multibyte)
1175 {
1176 while (--len >= 0)
1177 {
1178 unsigned char workbuf[4], *str;
aff2ce94 1179 int c, translated, inverse;
facdc750
RS
1180 int in_charlen, charlen;
1181
1182 /* If we got here and the RE flag is set, it's because we're
1183 dealing with a regexp known to be trivial, so the backslash
1184 just quotes the next character. */
1185 if (RE && *base_pat == '\\')
1186 {
1187 len--;
1188 len_byte--;
1189 base_pat++;
1190 }
1191
1192 c = STRING_CHAR_AND_LENGTH (base_pat, len_byte, in_charlen);
1193 /* Translate the character, if requested. */
aff2ce94 1194 TRANSLATE (translated, trt, c);
facdc750
RS
1195 /* If translation changed the byte-length, go back
1196 to the original character. */
1197 charlen = CHAR_STRING (translated, workbuf, str);
1198 if (in_charlen != charlen)
1199 {
1200 translated = c;
1201 charlen = CHAR_STRING (c, workbuf, str);
1202 }
1203
aff2ce94
RS
1204 TRANSLATE (inverse, inverse_trt, c);
1205
facdc750
RS
1206 /* Did this char actually get translated?
1207 Would any other char get translated into it? */
aff2ce94 1208 if (translated != c || inverse != c)
facdc750
RS
1209 {
1210 /* Keep track of which character set row
1211 contains the characters that need translation. */
1212 int charset_base_code = c & ~0xff;
1213 if (charset_base == -1)
1214 charset_base = charset_base_code;
1215 else if (charset_base != charset_base_code)
1216 /* If two different rows appear, needing translation,
1217 then we cannot use boyer_moore search. */
1218 simple = 0;
1219 /* ??? Handa: this must do simple = 0
1220 if c is a composite character. */
aff2ce94 1221 }
facdc750
RS
1222
1223 /* Store this character into the translated pattern. */
1224 bcopy (str, pat, charlen);
1225 pat += charlen;
1226 base_pat += in_charlen;
1227 len_byte -= in_charlen;
1228 }
1229 }
1230 else
1231 {
1232 while (--len >= 0)
1233 {
aff2ce94 1234 int c, translated, inverse;
facdc750
RS
1235
1236 /* If we got here and the RE flag is set, it's because we're
1237 dealing with a regexp known to be trivial, so the backslash
1238 just quotes the next character. */
1239 if (RE && *base_pat == '\\')
1240 {
1241 len--;
1242 base_pat++;
1243 }
1244 c = *base_pat++;
aff2ce94
RS
1245 TRANSLATE (translated, trt, c);
1246 TRANSLATE (inverse, inverse_trt, c);
facdc750
RS
1247
1248 /* Did this char actually get translated?
1249 Would any other char get translated into it? */
aff2ce94 1250 if (translated != c || inverse != c)
facdc750
RS
1251 {
1252 /* Keep track of which character set row
1253 contains the characters that need translation. */
1254 int charset_base_code = c & ~0xff;
1255 if (charset_base == -1)
1256 charset_base = charset_base_code;
1257 else if (charset_base != charset_base_code)
1258 /* If two different rows appear, needing translation,
1259 then we cannot use boyer_moore search. */
1260 simple = 0;
aff2ce94 1261 }
facdc750
RS
1262 *pat++ = translated;
1263 }
1264 }
1265
1266 len_byte = pat - patbuf;
1267 len = raw_pattern_size;
1268 pat = base_pat = patbuf;
1269
1270 if (simple)
1271 return boyer_moore (n, pat, len, len_byte, trt, inverse_trt,
aff2ce94
RS
1272 pos, pos_byte, lim, lim_byte,
1273 charset_base);
facdc750
RS
1274 else
1275 return simple_search (n, pat, len, len_byte, trt,
1276 pos, pos_byte, lim, lim_byte);
1277 }
1278}
1279\f
1280/* Do a simple string search N times for the string PAT,
1281 whose length is LEN/LEN_BYTE,
1282 from buffer position POS/POS_BYTE until LIM/LIM_BYTE.
1283 TRT is the translation table.
f8bd51c4 1284
facdc750
RS
1285 Return the character position where the match is found.
1286 Otherwise, if M matches remained to be found, return -M.
f8bd51c4 1287
facdc750
RS
1288 This kind of search works regardless of what is in PAT and
1289 regardless of what is in TRT. It is used in cases where
1290 boyer_moore cannot work. */
1291
1292static int
1293simple_search (n, pat, len, len_byte, trt, pos, pos_byte, lim, lim_byte)
1294 int n;
1295 unsigned char *pat;
1296 int len, len_byte;
1297 Lisp_Object trt;
1298 int pos, pos_byte;
1299 int lim, lim_byte;
1300{
1301 int multibyte = ! NILP (current_buffer->enable_multibyte_characters);
ab228c24 1302 int forward = n > 0;
facdc750
RS
1303
1304 if (lim > pos && multibyte)
1305 while (n > 0)
1306 {
1307 while (1)
f8bd51c4 1308 {
facdc750
RS
1309 /* Try matching at position POS. */
1310 int this_pos = pos;
1311 int this_pos_byte = pos_byte;
1312 int this_len = len;
1313 int this_len_byte = len_byte;
1314 unsigned char *p = pat;
1315 if (pos + len > lim)
1316 goto stop;
1317
1318 while (this_len > 0)
1319 {
1320 int charlen, buf_charlen;
ab228c24 1321 int pat_ch, buf_ch;
facdc750 1322
ab228c24 1323 pat_ch = STRING_CHAR_AND_LENGTH (p, this_len_byte, charlen);
facdc750
RS
1324 buf_ch = STRING_CHAR_AND_LENGTH (BYTE_POS_ADDR (this_pos_byte),
1325 ZV_BYTE - this_pos_byte,
1326 buf_charlen);
aff2ce94 1327 TRANSLATE (buf_ch, trt, buf_ch);
facdc750
RS
1328
1329 if (buf_ch != pat_ch)
1330 break;
ab228c24
RS
1331
1332 this_len_byte -= charlen;
1333 this_len--;
1334 p += charlen;
1335
1336 this_pos_byte += buf_charlen;
1337 this_pos++;
facdc750
RS
1338 }
1339
1340 if (this_len == 0)
1341 {
1342 pos += len;
1343 pos_byte += len_byte;
1344 break;
1345 }
1346
1347 INC_BOTH (pos, pos_byte);
f8bd51c4 1348 }
facdc750
RS
1349
1350 n--;
1351 }
1352 else if (lim > pos)
1353 while (n > 0)
1354 {
1355 while (1)
f8bd51c4 1356 {
facdc750
RS
1357 /* Try matching at position POS. */
1358 int this_pos = pos;
1359 int this_len = len;
1360 unsigned char *p = pat;
1361
1362 if (pos + len > lim)
1363 goto stop;
1364
1365 while (this_len > 0)
1366 {
1367 int pat_ch = *p++;
1368 int buf_ch = FETCH_BYTE (this_pos);
aff2ce94 1369 TRANSLATE (buf_ch, trt, buf_ch);
facdc750
RS
1370
1371 if (buf_ch != pat_ch)
1372 break;
ab228c24
RS
1373
1374 this_len--;
1375 this_pos++;
facdc750
RS
1376 }
1377
1378 if (this_len == 0)
1379 {
1380 pos += len;
1381 break;
1382 }
1383
1384 pos++;
f8bd51c4 1385 }
facdc750
RS
1386
1387 n--;
1388 }
1389 /* Backwards search. */
1390 else if (lim < pos && multibyte)
1391 while (n < 0)
1392 {
1393 while (1)
f8bd51c4 1394 {
facdc750
RS
1395 /* Try matching at position POS. */
1396 int this_pos = pos - len;
1397 int this_pos_byte = pos_byte - len_byte;
1398 int this_len = len;
1399 int this_len_byte = len_byte;
1400 unsigned char *p = pat;
1401
1402 if (pos - len < lim)
1403 goto stop;
1404
1405 while (this_len > 0)
1406 {
1407 int charlen, buf_charlen;
ab228c24 1408 int pat_ch, buf_ch;
facdc750 1409
ab228c24 1410 pat_ch = STRING_CHAR_AND_LENGTH (p, this_len_byte, charlen);
facdc750
RS
1411 buf_ch = STRING_CHAR_AND_LENGTH (BYTE_POS_ADDR (this_pos_byte),
1412 ZV_BYTE - this_pos_byte,
1413 buf_charlen);
aff2ce94 1414 TRANSLATE (buf_ch, trt, buf_ch);
facdc750
RS
1415
1416 if (buf_ch != pat_ch)
1417 break;
ab228c24
RS
1418
1419 this_len_byte -= charlen;
1420 this_len--;
1421 p += charlen;
1422 this_pos_byte += buf_charlen;
1423 this_pos++;
facdc750
RS
1424 }
1425
1426 if (this_len == 0)
1427 {
1428 pos -= len;
1429 pos_byte -= len_byte;
1430 break;
1431 }
1432
1433 DEC_BOTH (pos, pos_byte);
f8bd51c4
KH
1434 }
1435
facdc750
RS
1436 n++;
1437 }
1438 else if (lim < pos)
1439 while (n < 0)
1440 {
1441 while (1)
b6d6a51c 1442 {
facdc750
RS
1443 /* Try matching at position POS. */
1444 int this_pos = pos - len;
1445 int this_len = len;
1446 unsigned char *p = pat;
1447
1448 if (pos - len < lim)
1449 goto stop;
1450
1451 while (this_len > 0)
1452 {
1453 int pat_ch = *p++;
1454 int buf_ch = FETCH_BYTE (this_pos);
aff2ce94 1455 TRANSLATE (buf_ch, trt, buf_ch);
facdc750
RS
1456
1457 if (buf_ch != pat_ch)
1458 break;
ab228c24
RS
1459 this_len--;
1460 this_pos++;
facdc750
RS
1461 }
1462
1463 if (this_len == 0)
b6d6a51c 1464 {
facdc750
RS
1465 pos -= len;
1466 break;
b6d6a51c 1467 }
facdc750
RS
1468
1469 pos--;
b6d6a51c 1470 }
facdc750
RS
1471
1472 n++;
b6d6a51c 1473 }
facdc750
RS
1474
1475 stop:
1476 if (n == 0)
aff2ce94 1477 {
ab228c24
RS
1478 if (forward)
1479 set_search_regs ((multibyte ? pos_byte : pos) - len_byte, len_byte);
1480 else
1481 set_search_regs (multibyte ? pos_byte : pos, len_byte);
aff2ce94
RS
1482
1483 return pos;
1484 }
facdc750
RS
1485 else if (n > 0)
1486 return -n;
1487 else
1488 return n;
1489}
1490\f
1491/* Do Boyer-Moore search N times for the string PAT,
1492 whose length is LEN/LEN_BYTE,
1493 from buffer position POS/POS_BYTE until LIM/LIM_BYTE.
1494 DIRECTION says which direction we search in.
1495 TRT and INVERSE_TRT are translation tables.
1496
1497 This kind of search works if all the characters in PAT that have
1498 nontrivial translation are the same aside from the last byte. This
1499 makes it possible to translate just the last byte of a character,
1500 and do so after just a simple test of the context.
1501
1502 If that criterion is not satisfied, do not call this function. */
1503
1504static int
1505boyer_moore (n, base_pat, len, len_byte, trt, inverse_trt,
aff2ce94 1506 pos, pos_byte, lim, lim_byte, charset_base)
facdc750
RS
1507 int n;
1508 unsigned char *base_pat;
1509 int len, len_byte;
1510 Lisp_Object trt;
1511 Lisp_Object inverse_trt;
1512 int pos, pos_byte;
1513 int lim, lim_byte;
aff2ce94 1514 int charset_base;
facdc750
RS
1515{
1516 int direction = ((n > 0) ? 1 : -1);
1517 register int dirlen;
1518 int infinity, limit, k, stride_for_teases;
1519 register int *BM_tab;
1520 int *BM_tab_base;
1521 register unsigned char *cursor, *p_limit;
1522 register int i, j;
1523 unsigned char *pat;
1524 int multibyte = ! NILP (current_buffer->enable_multibyte_characters);
1525
1526 unsigned char simple_translate[0400];
1527 int translate_prev_byte;
1528 int translate_anteprev_byte;
1529
1530#ifdef C_ALLOCA
1531 int BM_tab_space[0400];
1532 BM_tab = &BM_tab_space[0];
1533#else
1534 BM_tab = (int *) alloca (0400 * sizeof (int));
1535#endif
1536 /* The general approach is that we are going to maintain that we know */
1537 /* the first (closest to the present position, in whatever direction */
1538 /* we're searching) character that could possibly be the last */
1539 /* (furthest from present position) character of a valid match. We */
1540 /* advance the state of our knowledge by looking at that character */
1541 /* and seeing whether it indeed matches the last character of the */
1542 /* pattern. If it does, we take a closer look. If it does not, we */
1543 /* move our pointer (to putative last characters) as far as is */
1544 /* logically possible. This amount of movement, which I call a */
1545 /* stride, will be the length of the pattern if the actual character */
1546 /* appears nowhere in the pattern, otherwise it will be the distance */
1547 /* from the last occurrence of that character to the end of the */
1548 /* pattern. */
1549 /* As a coding trick, an enormous stride is coded into the table for */
1550 /* characters that match the last character. This allows use of only */
1551 /* a single test, a test for having gone past the end of the */
1552 /* permissible match region, to test for both possible matches (when */
1553 /* the stride goes past the end immediately) and failure to */
1554 /* match (where you get nudged past the end one stride at a time). */
1555
1556 /* Here we make a "mickey mouse" BM table. The stride of the search */
1557 /* is determined only by the last character of the putative match. */
1558 /* If that character does not match, we will stride the proper */
1559 /* distance to propose a match that superimposes it on the last */
1560 /* instance of a character that matches it (per trt), or misses */
1561 /* it entirely if there is none. */
1562
1563 dirlen = len_byte * direction;
1564 infinity = dirlen - (lim_byte + pos_byte + len_byte + len_byte) * direction;
1565 if (direction < 0)
1566 pat = (base_pat += len_byte - 1);
1567 else
1568 pat = base_pat;
1569 BM_tab_base = BM_tab;
1570 BM_tab += 0400;
1571 j = dirlen; /* to get it in a register */
1572 /* A character that does not appear in the pattern induces a */
1573 /* stride equal to the pattern length. */
1574 while (BM_tab_base != BM_tab)
1575 {
1576 *--BM_tab = j;
1577 *--BM_tab = j;
1578 *--BM_tab = j;
1579 *--BM_tab = j;
1580 }
1581
1582 /* We use this for translation, instead of TRT itself.
1583 We fill this in to handle the characters that actually
1584 occur in the pattern. Others don't matter anyway! */
1585 bzero (simple_translate, sizeof simple_translate);
1586 for (i = 0; i < 0400; i++)
1587 simple_translate[i] = i;
1588
1589 i = 0;
1590 while (i != infinity)
1591 {
1592 unsigned char *ptr = pat + i;
1593 i += direction;
1594 if (i == dirlen)
1595 i = infinity;
1596 if (! NILP (trt))
ca1d1d23 1597 {
facdc750 1598 int ch;
aff2ce94 1599 int untranslated;
facdc750
RS
1600 int this_translated = 1;
1601
1602 if (multibyte
1603 && (ptr + 1 == pat + len_byte || CHAR_HEAD_P (ptr[1])))
ca1d1d23 1604 {
facdc750
RS
1605 unsigned char *charstart = ptr;
1606 while (! CHAR_HEAD_P (*charstart))
1607 charstart--;
aff2ce94 1608 untranslated = STRING_CHAR (charstart, ptr - charstart + 1);
ab228c24 1609 if (charset_base == (untranslated & ~0xff))
facdc750 1610 {
ab228c24 1611 TRANSLATE (ch, trt, untranslated);
aff2ce94
RS
1612 if (! CHAR_HEAD_P (*ptr))
1613 {
1614 translate_prev_byte = ptr[-1];
1615 if (! CHAR_HEAD_P (translate_prev_byte))
1616 translate_anteprev_byte = ptr[-2];
1617 }
facdc750 1618 }
aff2ce94 1619 else
ab228c24
RS
1620 {
1621 this_translated = 0;
1622 ch = *ptr;
1623 }
ca1d1d23 1624 }
facdc750 1625 else if (!multibyte)
aff2ce94 1626 TRANSLATE (ch, trt, *ptr);
ca1d1d23
JB
1627 else
1628 {
facdc750
RS
1629 ch = *ptr;
1630 this_translated = 0;
ca1d1d23 1631 }
facdc750 1632
ab228c24
RS
1633 if (ch > 0400)
1634 j = ((unsigned char) ch) | 0200;
1635 else
1636 j = (unsigned char) ch;
1637
facdc750
RS
1638 if (i == infinity)
1639 stride_for_teases = BM_tab[j];
ab228c24 1640
facdc750
RS
1641 BM_tab[j] = dirlen - i;
1642 /* A translation table is accompanied by its inverse -- see */
1643 /* comment following downcase_table for details */
1644 if (this_translated)
ab228c24
RS
1645 {
1646 int starting_ch = ch;
1647 int starting_j = j;
1648 while (1)
1649 {
1650 TRANSLATE (ch, inverse_trt, ch);
1651 if (ch > 0400)
1652 j = ((unsigned char) ch) | 0200;
1653 else
1654 j = (unsigned char) ch;
1655
1656 /* For all the characters that map into CH,
1657 set up simple_translate to map the last byte
1658 into STARTING_J. */
1659 simple_translate[j] = starting_j;
1660 if (ch == starting_ch)
1661 break;
1662 BM_tab[j] = dirlen - i;
1663 }
1664 }
facdc750
RS
1665 }
1666 else
1667 {
1668 j = *ptr;
1669
1670 if (i == infinity)
1671 stride_for_teases = BM_tab[j];
1672 BM_tab[j] = dirlen - i;
ca1d1d23 1673 }
facdc750
RS
1674 /* stride_for_teases tells how much to stride if we get a */
1675 /* match on the far character but are subsequently */
1676 /* disappointed, by recording what the stride would have been */
1677 /* for that character if the last character had been */
1678 /* different. */
1679 }
1680 infinity = dirlen - infinity;
1681 pos_byte += dirlen - ((direction > 0) ? direction : 0);
1682 /* loop invariant - POS_BYTE points at where last char (first
1683 char if reverse) of pattern would align in a possible match. */
1684 while (n != 0)
1685 {
1686 int tail_end;
1687 unsigned char *tail_end_ptr;
1688
1689 /* It's been reported that some (broken) compiler thinks that
1690 Boolean expressions in an arithmetic context are unsigned.
1691 Using an explicit ?1:0 prevents this. */
1692 if ((lim_byte - pos_byte - ((direction > 0) ? 1 : 0)) * direction
1693 < 0)
1694 return (n * (0 - direction));
1695 /* First we do the part we can by pointers (maybe nothing) */
1696 QUIT;
1697 pat = base_pat;
1698 limit = pos_byte - dirlen + direction;
67ce527d
KH
1699 if (direction > 0)
1700 {
1701 limit = BUFFER_CEILING_OF (limit);
1702 /* LIMIT is now the last (not beyond-last!) value POS_BYTE
1703 can take on without hitting edge of buffer or the gap. */
1704 limit = min (limit, pos_byte + 20000);
1705 limit = min (limit, lim_byte - 1);
1706 }
1707 else
1708 {
1709 limit = BUFFER_FLOOR_OF (limit);
1710 /* LIMIT is now the last (not beyond-last!) value POS_BYTE
1711 can take on without hitting edge of buffer or the gap. */
1712 limit = max (limit, pos_byte - 20000);
1713 limit = max (limit, lim_byte);
1714 }
facdc750
RS
1715 tail_end = BUFFER_CEILING_OF (pos_byte) + 1;
1716 tail_end_ptr = BYTE_POS_ADDR (tail_end);
1717
1718 if ((limit - pos_byte) * direction > 20)
ca1d1d23 1719 {
facdc750
RS
1720 unsigned char *p2;
1721
1722 p_limit = BYTE_POS_ADDR (limit);
1723 p2 = (cursor = BYTE_POS_ADDR (pos_byte));
1724 /* In this loop, pos + cursor - p2 is the surrogate for pos */
1725 while (1) /* use one cursor setting as long as i can */
ca1d1d23 1726 {
facdc750 1727 if (direction > 0) /* worth duplicating */
ca1d1d23 1728 {
facdc750
RS
1729 /* Use signed comparison if appropriate
1730 to make cursor+infinity sure to be > p_limit.
1731 Assuming that the buffer lies in a range of addresses
1732 that are all "positive" (as ints) or all "negative",
1733 either kind of comparison will work as long
1734 as we don't step by infinity. So pick the kind
1735 that works when we do step by infinity. */
1736 if ((EMACS_INT) (p_limit + infinity) > (EMACS_INT) p_limit)
1737 while ((EMACS_INT) cursor <= (EMACS_INT) p_limit)
1738 cursor += BM_tab[*cursor];
ca1d1d23 1739 else
facdc750
RS
1740 while ((EMACS_UINT) cursor <= (EMACS_UINT) p_limit)
1741 cursor += BM_tab[*cursor];
1742 }
1743 else
1744 {
1745 if ((EMACS_INT) (p_limit + infinity) < (EMACS_INT) p_limit)
1746 while ((EMACS_INT) cursor >= (EMACS_INT) p_limit)
1747 cursor += BM_tab[*cursor];
1748 else
1749 while ((EMACS_UINT) cursor >= (EMACS_UINT) p_limit)
1750 cursor += BM_tab[*cursor];
1751 }
ca1d1d23 1752/* If you are here, cursor is beyond the end of the searched region. */
facdc750
RS
1753/* This can happen if you match on the far character of the pattern, */
1754/* because the "stride" of that character is infinity, a number able */
1755/* to throw you well beyond the end of the search. It can also */
1756/* happen if you fail to match within the permitted region and would */
1757/* otherwise try a character beyond that region */
1758 if ((cursor - p_limit) * direction <= len_byte)
1759 break; /* a small overrun is genuine */
1760 cursor -= infinity; /* large overrun = hit */
1761 i = dirlen - direction;
1762 if (! NILP (trt))
1763 {
1764 while ((i -= direction) + direction != 0)
ca1d1d23 1765 {
facdc750
RS
1766 int ch;
1767 cursor -= direction;
1768 /* Translate only the last byte of a character. */
1769 if (! multibyte
1770 || ((cursor == tail_end_ptr
1771 || CHAR_HEAD_P (cursor[1]))
1772 && (CHAR_HEAD_P (cursor[0])
1773 || (translate_prev_byte == cursor[-1]
1774 && (CHAR_HEAD_P (translate_prev_byte)
1775 || translate_anteprev_byte == cursor[-2])))))
1776 ch = simple_translate[*cursor];
1777 else
1778 ch = *cursor;
1779 if (pat[i] != ch)
1780 break;
ca1d1d23 1781 }
facdc750
RS
1782 }
1783 else
1784 {
1785 while ((i -= direction) + direction != 0)
ca1d1d23 1786 {
facdc750
RS
1787 cursor -= direction;
1788 if (pat[i] != *cursor)
1789 break;
ca1d1d23 1790 }
facdc750
RS
1791 }
1792 cursor += dirlen - i - direction; /* fix cursor */
1793 if (i + direction == 0)
1794 {
1795 int position;
0c8533c6 1796
facdc750 1797 cursor -= direction;
1113d9db 1798
facdc750
RS
1799 position = pos_byte + cursor - p2 + ((direction > 0)
1800 ? 1 - len_byte : 0);
1801 set_search_regs (position, len_byte);
ca325161 1802
facdc750
RS
1803 if ((n -= direction) != 0)
1804 cursor += dirlen; /* to resume search */
ca1d1d23 1805 else
facdc750
RS
1806 return ((direction > 0)
1807 ? search_regs.end[0] : search_regs.start[0]);
ca1d1d23 1808 }
facdc750
RS
1809 else
1810 cursor += stride_for_teases; /* <sigh> we lose - */
ca1d1d23 1811 }
facdc750
RS
1812 pos_byte += cursor - p2;
1813 }
1814 else
1815 /* Now we'll pick up a clump that has to be done the hard */
1816 /* way because it covers a discontinuity */
1817 {
1818 limit = ((direction > 0)
1819 ? BUFFER_CEILING_OF (pos_byte - dirlen + 1)
1820 : BUFFER_FLOOR_OF (pos_byte - dirlen - 1));
1821 limit = ((direction > 0)
1822 ? min (limit + len_byte, lim_byte - 1)
1823 : max (limit - len_byte, lim_byte));
1824 /* LIMIT is now the last value POS_BYTE can have
1825 and still be valid for a possible match. */
1826 while (1)
ca1d1d23 1827 {
facdc750
RS
1828 /* This loop can be coded for space rather than */
1829 /* speed because it will usually run only once. */
1830 /* (the reach is at most len + 21, and typically */
1831 /* does not exceed len) */
1832 while ((limit - pos_byte) * direction >= 0)
1833 pos_byte += BM_tab[FETCH_BYTE (pos_byte)];
1834 /* now run the same tests to distinguish going off the */
1835 /* end, a match or a phony match. */
1836 if ((pos_byte - limit) * direction <= len_byte)
1837 break; /* ran off the end */
1838 /* Found what might be a match.
1839 Set POS_BYTE back to last (first if reverse) pos. */
1840 pos_byte -= infinity;
1841 i = dirlen - direction;
1842 while ((i -= direction) + direction != 0)
ca1d1d23 1843 {
facdc750
RS
1844 int ch;
1845 unsigned char *ptr;
1846 pos_byte -= direction;
1847 ptr = BYTE_POS_ADDR (pos_byte);
1848 /* Translate only the last byte of a character. */
1849 if (! multibyte
1850 || ((ptr == tail_end_ptr
1851 || CHAR_HEAD_P (ptr[1]))
1852 && (CHAR_HEAD_P (ptr[0])
1853 || (translate_prev_byte == ptr[-1]
1854 && (CHAR_HEAD_P (translate_prev_byte)
1855 || translate_anteprev_byte == ptr[-2])))))
1856 ch = simple_translate[*ptr];
1857 else
1858 ch = *ptr;
1859 if (pat[i] != ch)
1860 break;
1861 }
1862 /* Above loop has moved POS_BYTE part or all the way
1863 back to the first pos (last pos if reverse).
1864 Set it once again at the last (first if reverse) char. */
1865 pos_byte += dirlen - i- direction;
1866 if (i + direction == 0)
1867 {
1868 int position;
1869 pos_byte -= direction;
1113d9db 1870
facdc750 1871 position = pos_byte + ((direction > 0) ? 1 - len_byte : 0);
0c8533c6 1872
facdc750 1873 set_search_regs (position, len_byte);
ca325161 1874
facdc750
RS
1875 if ((n -= direction) != 0)
1876 pos_byte += dirlen; /* to resume search */
ca1d1d23 1877 else
facdc750
RS
1878 return ((direction > 0)
1879 ? search_regs.end[0] : search_regs.start[0]);
ca1d1d23 1880 }
facdc750
RS
1881 else
1882 pos_byte += stride_for_teases;
1883 }
1884 }
1885 /* We have done one clump. Can we continue? */
1886 if ((lim_byte - pos_byte) * direction < 0)
1887 return ((0 - n) * direction);
ca1d1d23 1888 }
facdc750 1889 return BYTE_TO_CHAR (pos_byte);
ca1d1d23 1890}
ca325161 1891
fa8ed3e0 1892/* Record beginning BEG_BYTE and end BEG_BYTE + NBYTES
ca325161
RS
1893 for a match just found in the current buffer. */
1894
1895static void
fa8ed3e0
RS
1896set_search_regs (beg_byte, nbytes)
1897 int beg_byte, nbytes;
ca325161
RS
1898{
1899 /* Make sure we have registers in which to store
1900 the match position. */
1901 if (search_regs.num_regs == 0)
1902 {
2d4a771a
RS
1903 search_regs.start = (regoff_t *) xmalloc (2 * sizeof (regoff_t));
1904 search_regs.end = (regoff_t *) xmalloc (2 * sizeof (regoff_t));
487282dc 1905 search_regs.num_regs = 2;
ca325161
RS
1906 }
1907
fa8ed3e0
RS
1908 search_regs.start[0] = BYTE_TO_CHAR (beg_byte);
1909 search_regs.end[0] = BYTE_TO_CHAR (beg_byte + nbytes);
a3668d92 1910 XSETBUFFER (last_thing_searched, current_buffer);
ca325161 1911}
ca1d1d23
JB
1912\f
1913/* Given a string of words separated by word delimiters,
1914 compute a regexp that matches those exact words
1915 separated by arbitrary punctuation. */
1916
1917static Lisp_Object
1918wordify (string)
1919 Lisp_Object string;
1920{
1921 register unsigned char *p, *o;
0c8533c6 1922 register int i, i_byte, len, punct_count = 0, word_count = 0;
ca1d1d23 1923 Lisp_Object val;
0c8533c6
RS
1924 int prev_c = 0;
1925 int adjust;
ca1d1d23
JB
1926
1927 CHECK_STRING (string, 0);
1928 p = XSTRING (string)->data;
1929 len = XSTRING (string)->size;
1930
0c8533c6
RS
1931 for (i = 0, i_byte = 0; i < len; )
1932 {
1933 int c;
1934
1935 if (STRING_MULTIBYTE (string))
1936 FETCH_STRING_CHAR_ADVANCE (c, string, i, i_byte);
1937 else
1938 c = XSTRING (string)->data[i++];
1939
1940 if (SYNTAX (c) != Sword)
1941 {
1942 punct_count++;
1943 if (i > 0 && SYNTAX (prev_c) == Sword)
1944 word_count++;
1945 }
ca1d1d23 1946
0c8533c6
RS
1947 prev_c = c;
1948 }
1949
1950 if (SYNTAX (prev_c) == Sword)
1951 word_count++;
1952 if (!word_count)
1953 return build_string ("");
1954
1955 adjust = - punct_count + 5 * (word_count - 1) + 4;
1956 val = make_uninit_multibyte_string (len + adjust,
fc932ac6 1957 STRING_BYTES (XSTRING (string)) + adjust);
ca1d1d23
JB
1958
1959 o = XSTRING (val)->data;
1960 *o++ = '\\';
1961 *o++ = 'b';
1e9582d4 1962 prev_c = 0;
ca1d1d23 1963
1e9582d4
RS
1964 for (i = 0, i_byte = 0; i < len; )
1965 {
1966 int c;
1967 int i_byte_orig = i_byte;
1968
1969 if (STRING_MULTIBYTE (string))
1970 FETCH_STRING_CHAR_ADVANCE (c, string, i, i_byte);
1971 else
1972 c = XSTRING (string)->data[i++];
1973
1974 if (SYNTAX (c) == Sword)
1975 {
1976 bcopy (&XSTRING (string)->data[i_byte_orig], o,
1977 i_byte - i_byte_orig);
1978 o += i_byte - i_byte_orig;
1979 }
1980 else if (i > 0 && SYNTAX (prev_c) == Sword && --word_count)
1981 {
1982 *o++ = '\\';
1983 *o++ = 'W';
1984 *o++ = '\\';
1985 *o++ = 'W';
1986 *o++ = '*';
1987 }
1988
1989 prev_c = c;
1990 }
ca1d1d23
JB
1991
1992 *o++ = '\\';
1993 *o++ = 'b';
1994
1995 return val;
1996}
1997\f
1998DEFUN ("search-backward", Fsearch_backward, Ssearch_backward, 1, 4,
6af43974 1999 "MSearch backward: ",
ca1d1d23
JB
2000 "Search backward from point for STRING.\n\
2001Set point to the beginning of the occurrence found, and return point.\n\
2002An optional second argument bounds the search; it is a buffer position.\n\
2003The match found must not extend before that position.\n\
2004Optional third argument, if t, means if fail just return nil (no error).\n\
2005 If not nil and not t, position at limit of search and return nil.\n\
2006Optional fourth argument is repeat count--search for successive occurrences.\n\
2007See also the functions `match-beginning', `match-end' and `replace-match'.")
2008 (string, bound, noerror, count)
2009 Lisp_Object string, bound, noerror, count;
2010{
b819a390 2011 return search_command (string, bound, noerror, count, -1, 0, 0);
ca1d1d23
JB
2012}
2013
6af43974 2014DEFUN ("search-forward", Fsearch_forward, Ssearch_forward, 1, 4, "MSearch: ",
ca1d1d23
JB
2015 "Search forward from point for STRING.\n\
2016Set point to the end of the occurrence found, and return point.\n\
2017An optional second argument bounds the search; it is a buffer position.\n\
2018The match found must not extend after that position. nil is equivalent\n\
2019 to (point-max).\n\
2020Optional third argument, if t, means if fail just return nil (no error).\n\
2021 If not nil and not t, move to limit of search and return nil.\n\
2022Optional fourth argument is repeat count--search for successive occurrences.\n\
2023See also the functions `match-beginning', `match-end' and `replace-match'.")
2024 (string, bound, noerror, count)
2025 Lisp_Object string, bound, noerror, count;
2026{
b819a390 2027 return search_command (string, bound, noerror, count, 1, 0, 0);
ca1d1d23
JB
2028}
2029
2030DEFUN ("word-search-backward", Fword_search_backward, Sword_search_backward, 1, 4,
2031 "sWord search backward: ",
2032 "Search backward from point for STRING, ignoring differences in punctuation.\n\
2033Set point to the beginning of the occurrence found, and return point.\n\
2034An optional second argument bounds the search; it is a buffer position.\n\
2035The match found must not extend before that position.\n\
2036Optional third argument, if t, means if fail just return nil (no error).\n\
2037 If not nil and not t, move to limit of search and return nil.\n\
2038Optional fourth argument is repeat count--search for successive occurrences.")
2039 (string, bound, noerror, count)
2040 Lisp_Object string, bound, noerror, count;
2041{
b819a390 2042 return search_command (wordify (string), bound, noerror, count, -1, 1, 0);
ca1d1d23
JB
2043}
2044
2045DEFUN ("word-search-forward", Fword_search_forward, Sword_search_forward, 1, 4,
2046 "sWord search: ",
2047 "Search forward from point for STRING, ignoring differences in punctuation.\n\
2048Set point to the end of the occurrence found, and return point.\n\
2049An optional second argument bounds the search; it is a buffer position.\n\
2050The match found must not extend after that position.\n\
2051Optional third argument, if t, means if fail just return nil (no error).\n\
2052 If not nil and not t, move to limit of search and return nil.\n\
2053Optional fourth argument is repeat count--search for successive occurrences.")
2054 (string, bound, noerror, count)
2055 Lisp_Object string, bound, noerror, count;
2056{
b819a390 2057 return search_command (wordify (string), bound, noerror, count, 1, 1, 0);
ca1d1d23
JB
2058}
2059
2060DEFUN ("re-search-backward", Fre_search_backward, Sre_search_backward, 1, 4,
2061 "sRE search backward: ",
2062 "Search backward from point for match for regular expression REGEXP.\n\
2063Set point to the beginning of the match, and return point.\n\
2064The match found is the one starting last in the buffer\n\
19c0a730 2065and yet ending before the origin of the search.\n\
ca1d1d23
JB
2066An optional second argument bounds the search; it is a buffer position.\n\
2067The match found must start at or after that position.\n\
2068Optional third argument, if t, means if fail just return nil (no error).\n\
2069 If not nil and not t, move to limit of search and return nil.\n\
2070Optional fourth argument is repeat count--search for successive occurrences.\n\
2071See also the functions `match-beginning', `match-end' and `replace-match'.")
19c0a730
KH
2072 (regexp, bound, noerror, count)
2073 Lisp_Object regexp, bound, noerror, count;
ca1d1d23 2074{
b819a390 2075 return search_command (regexp, bound, noerror, count, -1, 1, 0);
ca1d1d23
JB
2076}
2077
2078DEFUN ("re-search-forward", Fre_search_forward, Sre_search_forward, 1, 4,
2079 "sRE search: ",
2080 "Search forward from point for regular expression REGEXP.\n\
2081Set point to the end of the occurrence found, and return point.\n\
2082An optional second argument bounds the search; it is a buffer position.\n\
2083The match found must not extend after that position.\n\
2084Optional third argument, if t, means if fail just return nil (no error).\n\
2085 If not nil and not t, move to limit of search and return nil.\n\
2086Optional fourth argument is repeat count--search for successive occurrences.\n\
2087See also the functions `match-beginning', `match-end' and `replace-match'.")
19c0a730
KH
2088 (regexp, bound, noerror, count)
2089 Lisp_Object regexp, bound, noerror, count;
ca1d1d23 2090{
b819a390
RS
2091 return search_command (regexp, bound, noerror, count, 1, 1, 0);
2092}
2093
2094DEFUN ("posix-search-backward", Fposix_search_backward, Sposix_search_backward, 1, 4,
2095 "sPosix search backward: ",
2096 "Search backward from point for match for regular expression REGEXP.\n\
2097Find the longest match in accord with Posix regular expression rules.\n\
2098Set point to the beginning of the match, and return point.\n\
2099The match found is the one starting last in the buffer\n\
2100and yet ending before the origin of the search.\n\
2101An optional second argument bounds the search; it is a buffer position.\n\
2102The match found must start at or after that position.\n\
2103Optional third argument, if t, means if fail just return nil (no error).\n\
2104 If not nil and not t, move to limit of search and return nil.\n\
2105Optional fourth argument is repeat count--search for successive occurrences.\n\
2106See also the functions `match-beginning', `match-end' and `replace-match'.")
2107 (regexp, bound, noerror, count)
2108 Lisp_Object regexp, bound, noerror, count;
2109{
2110 return search_command (regexp, bound, noerror, count, -1, 1, 1);
2111}
2112
2113DEFUN ("posix-search-forward", Fposix_search_forward, Sposix_search_forward, 1, 4,
2114 "sPosix search: ",
2115 "Search forward from point for regular expression REGEXP.\n\
2116Find the longest match in accord with Posix regular expression rules.\n\
2117Set point to the end of the occurrence found, and return point.\n\
2118An optional second argument bounds the search; it is a buffer position.\n\
2119The match found must not extend after that position.\n\
2120Optional third argument, if t, means if fail just return nil (no error).\n\
2121 If not nil and not t, move to limit of search and return nil.\n\
2122Optional fourth argument is repeat count--search for successive occurrences.\n\
2123See also the functions `match-beginning', `match-end' and `replace-match'.")
2124 (regexp, bound, noerror, count)
2125 Lisp_Object regexp, bound, noerror, count;
2126{
2127 return search_command (regexp, bound, noerror, count, 1, 1, 1);
ca1d1d23
JB
2128}
2129\f
d7a5ad5f 2130DEFUN ("replace-match", Freplace_match, Sreplace_match, 1, 5, 0,
ca1d1d23
JB
2131 "Replace text matched by last search with NEWTEXT.\n\
2132If second arg FIXEDCASE is non-nil, do not alter case of replacement text.\n\
5b9cf4b2
RS
2133Otherwise maybe capitalize the whole text, or maybe just word initials,\n\
2134based on the replaced text.\n\
2135If the replaced text has only capital letters\n\
2136and has at least one multiletter word, convert NEWTEXT to all caps.\n\
2137If the replaced text has at least one word starting with a capital letter,\n\
2138then capitalize each word in NEWTEXT.\n\n\
ca1d1d23
JB
2139If third arg LITERAL is non-nil, insert NEWTEXT literally.\n\
2140Otherwise treat `\\' as special:\n\
2141 `\\&' in NEWTEXT means substitute original matched text.\n\
2142 `\\N' means substitute what matched the Nth `\\(...\\)'.\n\
2143 If Nth parens didn't match, substitute nothing.\n\
2144 `\\\\' means insert one `\\'.\n\
1113d9db 2145FIXEDCASE and LITERAL are optional arguments.\n\
080c45fd
RS
2146Leaves point at end of replacement text.\n\
2147\n\
2148The optional fourth argument STRING can be a string to modify.\n\
2149In that case, this function creates and returns a new string\n\
d7a5ad5f
RS
2150which is made by replacing the part of STRING that was matched.\n\
2151\n\
2152The optional fifth argument SUBEXP specifies a subexpression of the match.\n\
2153It says to replace just that subexpression instead of the whole match.\n\
2154This is useful only after a regular expression search or match\n\
2155since only regular expressions have distinguished subexpressions.")
2156 (newtext, fixedcase, literal, string, subexp)
2157 Lisp_Object newtext, fixedcase, literal, string, subexp;
ca1d1d23
JB
2158{
2159 enum { nochange, all_caps, cap_initial } case_action;
2160 register int pos, last;
2161 int some_multiletter_word;
97832bd0 2162 int some_lowercase;
73dc8771 2163 int some_uppercase;
208767c3 2164 int some_nonuppercase_initial;
ca1d1d23
JB
2165 register int c, prevc;
2166 int inslen;
d7a5ad5f 2167 int sub;
3e18eecf 2168 int opoint, newpoint;
ca1d1d23 2169
16fdc568 2170 CHECK_STRING (newtext, 0);
ca1d1d23 2171
080c45fd
RS
2172 if (! NILP (string))
2173 CHECK_STRING (string, 4);
2174
ca1d1d23
JB
2175 case_action = nochange; /* We tried an initialization */
2176 /* but some C compilers blew it */
4746118a
JB
2177
2178 if (search_regs.num_regs <= 0)
2179 error ("replace-match called before any match found");
2180
d7a5ad5f
RS
2181 if (NILP (subexp))
2182 sub = 0;
2183 else
2184 {
2185 CHECK_NUMBER (subexp, 3);
2186 sub = XINT (subexp);
2187 if (sub < 0 || sub >= search_regs.num_regs)
2188 args_out_of_range (subexp, make_number (search_regs.num_regs));
2189 }
2190
080c45fd
RS
2191 if (NILP (string))
2192 {
d7a5ad5f
RS
2193 if (search_regs.start[sub] < BEGV
2194 || search_regs.start[sub] > search_regs.end[sub]
2195 || search_regs.end[sub] > ZV)
2196 args_out_of_range (make_number (search_regs.start[sub]),
2197 make_number (search_regs.end[sub]));
080c45fd
RS
2198 }
2199 else
2200 {
d7a5ad5f
RS
2201 if (search_regs.start[sub] < 0
2202 || search_regs.start[sub] > search_regs.end[sub]
2203 || search_regs.end[sub] > XSTRING (string)->size)
2204 args_out_of_range (make_number (search_regs.start[sub]),
2205 make_number (search_regs.end[sub]));
080c45fd 2206 }
ca1d1d23
JB
2207
2208 if (NILP (fixedcase))
2209 {
fa8ed3e0 2210 int beg;
ca1d1d23
JB
2211 /* Decide how to casify by examining the matched text. */
2212
fa8ed3e0
RS
2213 if (NILP (string))
2214 last = CHAR_TO_BYTE (search_regs.end[sub]);
2215 else
2216 last = search_regs.end[sub];
2217
2218 if (NILP (string))
2219 beg = CHAR_TO_BYTE (search_regs.start[sub]);
2220 else
2221 beg = search_regs.start[sub];
2222
ca1d1d23
JB
2223 prevc = '\n';
2224 case_action = all_caps;
2225
2226 /* some_multiletter_word is set nonzero if any original word
2227 is more than one letter long. */
2228 some_multiletter_word = 0;
97832bd0 2229 some_lowercase = 0;
208767c3 2230 some_nonuppercase_initial = 0;
73dc8771 2231 some_uppercase = 0;
ca1d1d23 2232
fa8ed3e0 2233 for (pos = beg; pos < last; pos++)
ca1d1d23 2234 {
080c45fd 2235 if (NILP (string))
5679531d 2236 c = FETCH_BYTE (pos);
080c45fd
RS
2237 else
2238 c = XSTRING (string)->data[pos];
2239
ca1d1d23
JB
2240 if (LOWERCASEP (c))
2241 {
2242 /* Cannot be all caps if any original char is lower case */
2243
97832bd0 2244 some_lowercase = 1;
ca1d1d23 2245 if (SYNTAX (prevc) != Sword)
208767c3 2246 some_nonuppercase_initial = 1;
ca1d1d23
JB
2247 else
2248 some_multiletter_word = 1;
2249 }
2250 else if (!NOCASEP (c))
2251 {
73dc8771 2252 some_uppercase = 1;
97832bd0 2253 if (SYNTAX (prevc) != Sword)
c4d460ce 2254 ;
97832bd0 2255 else
ca1d1d23
JB
2256 some_multiletter_word = 1;
2257 }
208767c3
RS
2258 else
2259 {
2260 /* If the initial is a caseless word constituent,
2261 treat that like a lowercase initial. */
2262 if (SYNTAX (prevc) != Sword)
2263 some_nonuppercase_initial = 1;
2264 }
ca1d1d23
JB
2265
2266 prevc = c;
2267 }
2268
97832bd0
RS
2269 /* Convert to all caps if the old text is all caps
2270 and has at least one multiletter word. */
2271 if (! some_lowercase && some_multiletter_word)
2272 case_action = all_caps;
c4d460ce 2273 /* Capitalize each word, if the old text has all capitalized words. */
208767c3 2274 else if (!some_nonuppercase_initial && some_multiletter_word)
ca1d1d23 2275 case_action = cap_initial;
208767c3 2276 else if (!some_nonuppercase_initial && some_uppercase)
73dc8771
KH
2277 /* Should x -> yz, operating on X, give Yz or YZ?
2278 We'll assume the latter. */
2279 case_action = all_caps;
97832bd0
RS
2280 else
2281 case_action = nochange;
ca1d1d23
JB
2282 }
2283
080c45fd
RS
2284 /* Do replacement in a string. */
2285 if (!NILP (string))
2286 {
2287 Lisp_Object before, after;
2288
2289 before = Fsubstring (string, make_number (0),
d7a5ad5f
RS
2290 make_number (search_regs.start[sub]));
2291 after = Fsubstring (string, make_number (search_regs.end[sub]), Qnil);
080c45fd 2292
636a5e28
RS
2293 /* Substitute parts of the match into NEWTEXT
2294 if desired. */
080c45fd
RS
2295 if (NILP (literal))
2296 {
2297 int lastpos = -1;
0c8533c6 2298 int lastpos_byte = -1;
080c45fd
RS
2299 /* We build up the substituted string in ACCUM. */
2300 Lisp_Object accum;
2301 Lisp_Object middle;
0c8533c6 2302 int pos_byte;
080c45fd
RS
2303
2304 accum = Qnil;
2305
fc932ac6 2306 for (pos_byte = 0, pos = 0; pos_byte < STRING_BYTES (XSTRING (newtext));)
080c45fd
RS
2307 {
2308 int substart = -1;
2309 int subend;
1e79ec24 2310 int delbackslash = 0;
080c45fd 2311
0c8533c6
RS
2312 FETCH_STRING_CHAR_ADVANCE (c, newtext, pos, pos_byte);
2313
080c45fd
RS
2314 if (c == '\\')
2315 {
0c8533c6 2316 FETCH_STRING_CHAR_ADVANCE (c, newtext, pos, pos_byte);
080c45fd
RS
2317 if (c == '&')
2318 {
d7a5ad5f
RS
2319 substart = search_regs.start[sub];
2320 subend = search_regs.end[sub];
080c45fd
RS
2321 }
2322 else if (c >= '1' && c <= '9' && c <= search_regs.num_regs + '0')
2323 {
ad10348f 2324 if (search_regs.start[c - '0'] >= 0)
080c45fd
RS
2325 {
2326 substart = search_regs.start[c - '0'];
2327 subend = search_regs.end[c - '0'];
2328 }
2329 }
1e79ec24
KH
2330 else if (c == '\\')
2331 delbackslash = 1;
636a5e28
RS
2332 else
2333 error ("Invalid use of `\\' in replacement text");
080c45fd
RS
2334 }
2335 if (substart >= 0)
2336 {
2337 if (pos - 1 != lastpos + 1)
0c8533c6
RS
2338 middle = substring_both (newtext, lastpos + 1,
2339 lastpos_byte + 1,
2340 pos - 1, pos_byte - 1);
080c45fd
RS
2341 else
2342 middle = Qnil;
2343 accum = concat3 (accum, middle,
0c8533c6
RS
2344 Fsubstring (string,
2345 make_number (substart),
080c45fd
RS
2346 make_number (subend)));
2347 lastpos = pos;
0c8533c6 2348 lastpos_byte = pos_byte;
080c45fd 2349 }
1e79ec24
KH
2350 else if (delbackslash)
2351 {
0c8533c6
RS
2352 middle = substring_both (newtext, lastpos + 1,
2353 lastpos_byte + 1,
2354 pos, pos_byte);
2355
1e79ec24
KH
2356 accum = concat2 (accum, middle);
2357 lastpos = pos;
0c8533c6 2358 lastpos_byte = pos_byte;
1e79ec24 2359 }
080c45fd
RS
2360 }
2361
2362 if (pos != lastpos + 1)
0c8533c6
RS
2363 middle = substring_both (newtext, lastpos + 1,
2364 lastpos_byte + 1,
2365 pos, pos_byte);
080c45fd
RS
2366 else
2367 middle = Qnil;
2368
2369 newtext = concat2 (accum, middle);
2370 }
2371
636a5e28 2372 /* Do case substitution in NEWTEXT if desired. */
080c45fd
RS
2373 if (case_action == all_caps)
2374 newtext = Fupcase (newtext);
2375 else if (case_action == cap_initial)
2b2eead9 2376 newtext = Fupcase_initials (newtext);
080c45fd
RS
2377
2378 return concat3 (before, newtext, after);
2379 }
2380
b0eba991
RS
2381 /* Record point, the move (quietly) to the start of the match. */
2382 if (PT > search_regs.start[sub])
2383 opoint = PT - ZV;
2384 else
2385 opoint = PT;
2386
fa8ed3e0 2387 TEMP_SET_PT (search_regs.start[sub]);
b0eba991 2388
9a76659d
JB
2389 /* We insert the replacement text before the old text, and then
2390 delete the original text. This means that markers at the
2391 beginning or end of the original will float to the corresponding
2392 position in the replacement. */
ca1d1d23 2393 if (!NILP (literal))
16fdc568 2394 Finsert_and_inherit (1, &newtext);
ca1d1d23
JB
2395 else
2396 {
2397 struct gcpro gcpro1;
16fdc568 2398 GCPRO1 (newtext);
ca1d1d23 2399
16fdc568 2400 for (pos = 0; pos < XSTRING (newtext)->size; pos++)
ca1d1d23 2401 {
6ec8bbd2 2402 int offset = PT - search_regs.start[sub];
9a76659d 2403
16fdc568 2404 c = XSTRING (newtext)->data[pos];
ca1d1d23
JB
2405 if (c == '\\')
2406 {
16fdc568 2407 c = XSTRING (newtext)->data[++pos];
ca1d1d23 2408 if (c == '&')
9a76659d
JB
2409 Finsert_buffer_substring
2410 (Fcurrent_buffer (),
d7a5ad5f
RS
2411 make_number (search_regs.start[sub] + offset),
2412 make_number (search_regs.end[sub] + offset));
78445046 2413 else if (c >= '1' && c <= '9' && c <= search_regs.num_regs + '0')
ca1d1d23
JB
2414 {
2415 if (search_regs.start[c - '0'] >= 1)
9a76659d
JB
2416 Finsert_buffer_substring
2417 (Fcurrent_buffer (),
2418 make_number (search_regs.start[c - '0'] + offset),
2419 make_number (search_regs.end[c - '0'] + offset));
ca1d1d23 2420 }
636a5e28 2421 else if (c == '\\')
ca1d1d23 2422 insert_char (c);
636a5e28
RS
2423 else
2424 error ("Invalid use of `\\' in replacement text");
ca1d1d23
JB
2425 }
2426 else
2427 insert_char (c);
2428 }
2429 UNGCPRO;
2430 }
2431
6ec8bbd2 2432 inslen = PT - (search_regs.start[sub]);
d7a5ad5f 2433 del_range (search_regs.start[sub] + inslen, search_regs.end[sub] + inslen);
ca1d1d23
JB
2434
2435 if (case_action == all_caps)
6ec8bbd2 2436 Fupcase_region (make_number (PT - inslen), make_number (PT));
ca1d1d23 2437 else if (case_action == cap_initial)
6ec8bbd2 2438 Fupcase_initials_region (make_number (PT - inslen), make_number (PT));
b0eba991 2439
3e18eecf
RS
2440 newpoint = PT;
2441
b0eba991 2442 /* Put point back where it was in the text. */
8d808a65 2443 if (opoint <= 0)
fa8ed3e0 2444 TEMP_SET_PT (opoint + ZV);
b0eba991 2445 else
fa8ed3e0 2446 TEMP_SET_PT (opoint);
b0eba991
RS
2447
2448 /* Now move point "officially" to the start of the inserted replacement. */
3e18eecf 2449 move_if_not_intangible (newpoint);
b0eba991 2450
ca1d1d23
JB
2451 return Qnil;
2452}
2453\f
2454static Lisp_Object
2455match_limit (num, beginningp)
2456 Lisp_Object num;
2457 int beginningp;
2458{
2459 register int n;
2460
2461 CHECK_NUMBER (num, 0);
2462 n = XINT (num);
4746118a
JB
2463 if (n < 0 || n >= search_regs.num_regs)
2464 args_out_of_range (num, make_number (search_regs.num_regs));
2465 if (search_regs.num_regs <= 0
2466 || search_regs.start[n] < 0)
ca1d1d23
JB
2467 return Qnil;
2468 return (make_number ((beginningp) ? search_regs.start[n]
2469 : search_regs.end[n]));
2470}
2471
2472DEFUN ("match-beginning", Fmatch_beginning, Smatch_beginning, 1, 1, 0,
2473 "Return position of start of text matched by last search.\n\
5806161b
EN
2474SUBEXP, a number, specifies which parenthesized expression in the last\n\
2475 regexp.\n\
2476Value is nil if SUBEXPth pair didn't match, or there were less than\n\
2477 SUBEXP pairs.\n\
ca1d1d23 2478Zero means the entire text matched by the whole regexp or whole string.")
5806161b
EN
2479 (subexp)
2480 Lisp_Object subexp;
ca1d1d23 2481{
5806161b 2482 return match_limit (subexp, 1);
ca1d1d23
JB
2483}
2484
2485DEFUN ("match-end", Fmatch_end, Smatch_end, 1, 1, 0,
2486 "Return position of end of text matched by last search.\n\
5806161b
EN
2487SUBEXP, a number, specifies which parenthesized expression in the last\n\
2488 regexp.\n\
2489Value is nil if SUBEXPth pair didn't match, or there were less than\n\
2490 SUBEXP pairs.\n\
ca1d1d23 2491Zero means the entire text matched by the whole regexp or whole string.")
5806161b
EN
2492 (subexp)
2493 Lisp_Object subexp;
ca1d1d23 2494{
5806161b 2495 return match_limit (subexp, 0);
ca1d1d23
JB
2496}
2497
56256c2a 2498DEFUN ("match-data", Fmatch_data, Smatch_data, 0, 2, 0,
ca1d1d23
JB
2499 "Return a list containing all info on what the last search matched.\n\
2500Element 2N is `(match-beginning N)'; element 2N + 1 is `(match-end N)'.\n\
2501All the elements are markers or nil (nil if the Nth pair didn't match)\n\
2502if the last match was on a buffer; integers or nil if a string was matched.\n\
56256c2a
RS
2503Use `store-match-data' to reinstate the data in this list.\n\
2504\n\
2505If INTEGERS (the optional first argument) is non-nil, always use integers\n\
8ca821e9 2506\(rather than markers) to represent buffer positions.\n\
56256c2a
RS
2507If REUSE is a list, reuse it as part of the value. If REUSE is long enough\n\
2508to hold all the values, and if INTEGERS is non-nil, no consing is done.")
2509 (integers, reuse)
2510 Lisp_Object integers, reuse;
ca1d1d23 2511{
56256c2a 2512 Lisp_Object tail, prev;
4746118a 2513 Lisp_Object *data;
ca1d1d23
JB
2514 int i, len;
2515
daa37602 2516 if (NILP (last_thing_searched))
c36bcf1b 2517 return Qnil;
daa37602 2518
4746118a
JB
2519 data = (Lisp_Object *) alloca ((2 * search_regs.num_regs)
2520 * sizeof (Lisp_Object));
2521
ca1d1d23 2522 len = -1;
4746118a 2523 for (i = 0; i < search_regs.num_regs; i++)
ca1d1d23
JB
2524 {
2525 int start = search_regs.start[i];
2526 if (start >= 0)
2527 {
56256c2a
RS
2528 if (EQ (last_thing_searched, Qt)
2529 || ! NILP (integers))
ca1d1d23 2530 {
c235cce7
KH
2531 XSETFASTINT (data[2 * i], start);
2532 XSETFASTINT (data[2 * i + 1], search_regs.end[i]);
ca1d1d23 2533 }
0ed62dc7 2534 else if (BUFFERP (last_thing_searched))
ca1d1d23
JB
2535 {
2536 data[2 * i] = Fmake_marker ();
daa37602
JB
2537 Fset_marker (data[2 * i],
2538 make_number (start),
2539 last_thing_searched);
ca1d1d23
JB
2540 data[2 * i + 1] = Fmake_marker ();
2541 Fset_marker (data[2 * i + 1],
daa37602
JB
2542 make_number (search_regs.end[i]),
2543 last_thing_searched);
ca1d1d23 2544 }
daa37602
JB
2545 else
2546 /* last_thing_searched must always be Qt, a buffer, or Qnil. */
2547 abort ();
2548
ca1d1d23
JB
2549 len = i;
2550 }
2551 else
2552 data[2 * i] = data [2 * i + 1] = Qnil;
2553 }
56256c2a
RS
2554
2555 /* If REUSE is not usable, cons up the values and return them. */
2556 if (! CONSP (reuse))
2557 return Flist (2 * len + 2, data);
2558
2559 /* If REUSE is a list, store as many value elements as will fit
2560 into the elements of REUSE. */
2561 for (i = 0, tail = reuse; CONSP (tail);
2562 i++, tail = XCONS (tail)->cdr)
2563 {
2564 if (i < 2 * len + 2)
2565 XCONS (tail)->car = data[i];
2566 else
2567 XCONS (tail)->car = Qnil;
2568 prev = tail;
2569 }
2570
2571 /* If we couldn't fit all value elements into REUSE,
2572 cons up the rest of them and add them to the end of REUSE. */
2573 if (i < 2 * len + 2)
2574 XCONS (prev)->cdr = Flist (2 * len + 2 - i, data + i);
2575
2576 return reuse;
ca1d1d23
JB
2577}
2578
2579
3f1c005b 2580DEFUN ("set-match-data", Fset_match_data, Sset_match_data, 1, 1, 0,
ca1d1d23
JB
2581 "Set internal data on last search match from elements of LIST.\n\
2582LIST should have been created by calling `match-data' previously.")
2583 (list)
2584 register Lisp_Object list;
2585{
2586 register int i;
2587 register Lisp_Object marker;
2588
7074fde6
FP
2589 if (running_asynch_code)
2590 save_search_regs ();
2591
ca1d1d23 2592 if (!CONSP (list) && !NILP (list))
b37902c8 2593 list = wrong_type_argument (Qconsp, list);
ca1d1d23 2594
daa37602
JB
2595 /* Unless we find a marker with a buffer in LIST, assume that this
2596 match data came from a string. */
2597 last_thing_searched = Qt;
2598
4746118a
JB
2599 /* Allocate registers if they don't already exist. */
2600 {
d084e942 2601 int length = XFASTINT (Flength (list)) / 2;
4746118a
JB
2602
2603 if (length > search_regs.num_regs)
2604 {
1113d9db
JB
2605 if (search_regs.num_regs == 0)
2606 {
2607 search_regs.start
2608 = (regoff_t *) xmalloc (length * sizeof (regoff_t));
2609 search_regs.end
2610 = (regoff_t *) xmalloc (length * sizeof (regoff_t));
2611 }
4746118a 2612 else
1113d9db
JB
2613 {
2614 search_regs.start
2615 = (regoff_t *) xrealloc (search_regs.start,
2616 length * sizeof (regoff_t));
2617 search_regs.end
2618 = (regoff_t *) xrealloc (search_regs.end,
2619 length * sizeof (regoff_t));
2620 }
4746118a 2621
487282dc 2622 search_regs.num_regs = length;
4746118a
JB
2623 }
2624 }
2625
2626 for (i = 0; i < search_regs.num_regs; i++)
ca1d1d23
JB
2627 {
2628 marker = Fcar (list);
2629 if (NILP (marker))
2630 {
2631 search_regs.start[i] = -1;
2632 list = Fcdr (list);
2633 }
2634 else
2635 {
0ed62dc7 2636 if (MARKERP (marker))
daa37602
JB
2637 {
2638 if (XMARKER (marker)->buffer == 0)
c235cce7 2639 XSETFASTINT (marker, 0);
daa37602 2640 else
a3668d92 2641 XSETBUFFER (last_thing_searched, XMARKER (marker)->buffer);
daa37602 2642 }
ca1d1d23
JB
2643
2644 CHECK_NUMBER_COERCE_MARKER (marker, 0);
2645 search_regs.start[i] = XINT (marker);
2646 list = Fcdr (list);
2647
2648 marker = Fcar (list);
0ed62dc7 2649 if (MARKERP (marker) && XMARKER (marker)->buffer == 0)
c235cce7 2650 XSETFASTINT (marker, 0);
ca1d1d23
JB
2651
2652 CHECK_NUMBER_COERCE_MARKER (marker, 0);
2653 search_regs.end[i] = XINT (marker);
2654 }
2655 list = Fcdr (list);
2656 }
2657
2658 return Qnil;
2659}
2660
7074fde6
FP
2661/* If non-zero the match data have been saved in saved_search_regs
2662 during the execution of a sentinel or filter. */
75ebf74b 2663static int search_regs_saved;
7074fde6
FP
2664static struct re_registers saved_search_regs;
2665
2666/* Called from Flooking_at, Fstring_match, search_buffer, Fstore_match_data
2667 if asynchronous code (filter or sentinel) is running. */
2668static void
2669save_search_regs ()
2670{
2671 if (!search_regs_saved)
2672 {
2673 saved_search_regs.num_regs = search_regs.num_regs;
2674 saved_search_regs.start = search_regs.start;
2675 saved_search_regs.end = search_regs.end;
2676 search_regs.num_regs = 0;
2d4a771a
RS
2677 search_regs.start = 0;
2678 search_regs.end = 0;
7074fde6
FP
2679
2680 search_regs_saved = 1;
2681 }
2682}
2683
2684/* Called upon exit from filters and sentinels. */
2685void
2686restore_match_data ()
2687{
2688 if (search_regs_saved)
2689 {
2690 if (search_regs.num_regs > 0)
2691 {
2692 xfree (search_regs.start);
2693 xfree (search_regs.end);
2694 }
2695 search_regs.num_regs = saved_search_regs.num_regs;
2696 search_regs.start = saved_search_regs.start;
2697 search_regs.end = saved_search_regs.end;
2698
2699 search_regs_saved = 0;
2700 }
2701}
2702
ca1d1d23
JB
2703/* Quote a string to inactivate reg-expr chars */
2704
2705DEFUN ("regexp-quote", Fregexp_quote, Sregexp_quote, 1, 1, 0,
2706 "Return a regexp string which matches exactly STRING and nothing else.")
5806161b
EN
2707 (string)
2708 Lisp_Object string;
ca1d1d23
JB
2709{
2710 register unsigned char *in, *out, *end;
2711 register unsigned char *temp;
0c8533c6 2712 int backslashes_added = 0;
ca1d1d23 2713
5806161b 2714 CHECK_STRING (string, 0);
ca1d1d23 2715
fc932ac6 2716 temp = (unsigned char *) alloca (STRING_BYTES (XSTRING (string)) * 2);
ca1d1d23
JB
2717
2718 /* Now copy the data into the new string, inserting escapes. */
2719
5806161b 2720 in = XSTRING (string)->data;
fc932ac6 2721 end = in + STRING_BYTES (XSTRING (string));
ca1d1d23
JB
2722 out = temp;
2723
2724 for (; in != end; in++)
2725 {
2726 if (*in == '[' || *in == ']'
2727 || *in == '*' || *in == '.' || *in == '\\'
2728 || *in == '?' || *in == '+'
2729 || *in == '^' || *in == '$')
0c8533c6 2730 *out++ = '\\', backslashes_added++;
ca1d1d23
JB
2731 *out++ = *in;
2732 }
2733
3f8100f1 2734 return make_specified_string (temp,
0c8533c6 2735 XSTRING (string)->size + backslashes_added,
3f8100f1
RS
2736 out - temp,
2737 STRING_MULTIBYTE (string));
ca1d1d23
JB
2738}
2739\f
dfcf069d 2740void
ca1d1d23
JB
2741syms_of_search ()
2742{
2743 register int i;
2744
487282dc
KH
2745 for (i = 0; i < REGEXP_CACHE_SIZE; ++i)
2746 {
2747 searchbufs[i].buf.allocated = 100;
2748 searchbufs[i].buf.buffer = (unsigned char *) malloc (100);
2749 searchbufs[i].buf.fastmap = searchbufs[i].fastmap;
2750 searchbufs[i].regexp = Qnil;
2751 staticpro (&searchbufs[i].regexp);
2752 searchbufs[i].next = (i == REGEXP_CACHE_SIZE-1 ? 0 : &searchbufs[i+1]);
2753 }
2754 searchbuf_head = &searchbufs[0];
ca1d1d23
JB
2755
2756 Qsearch_failed = intern ("search-failed");
2757 staticpro (&Qsearch_failed);
2758 Qinvalid_regexp = intern ("invalid-regexp");
2759 staticpro (&Qinvalid_regexp);
2760
2761 Fput (Qsearch_failed, Qerror_conditions,
2762 Fcons (Qsearch_failed, Fcons (Qerror, Qnil)));
2763 Fput (Qsearch_failed, Qerror_message,
2764 build_string ("Search failed"));
2765
2766 Fput (Qinvalid_regexp, Qerror_conditions,
2767 Fcons (Qinvalid_regexp, Fcons (Qerror, Qnil)));
2768 Fput (Qinvalid_regexp, Qerror_message,
2769 build_string ("Invalid regexp"));
2770
daa37602
JB
2771 last_thing_searched = Qnil;
2772 staticpro (&last_thing_searched);
2773
ca1d1d23 2774 defsubr (&Slooking_at);
b819a390
RS
2775 defsubr (&Sposix_looking_at);
2776 defsubr (&Sstring_match);
2777 defsubr (&Sposix_string_match);
ca1d1d23
JB
2778 defsubr (&Ssearch_forward);
2779 defsubr (&Ssearch_backward);
2780 defsubr (&Sword_search_forward);
2781 defsubr (&Sword_search_backward);
2782 defsubr (&Sre_search_forward);
2783 defsubr (&Sre_search_backward);
b819a390
RS
2784 defsubr (&Sposix_search_forward);
2785 defsubr (&Sposix_search_backward);
ca1d1d23
JB
2786 defsubr (&Sreplace_match);
2787 defsubr (&Smatch_beginning);
2788 defsubr (&Smatch_end);
2789 defsubr (&Smatch_data);
3f1c005b 2790 defsubr (&Sset_match_data);
ca1d1d23
JB
2791 defsubr (&Sregexp_quote);
2792}