(comint-preoutput-filter-functions): Doc fix.
[bpt/emacs.git] / src / search.c
CommitLineData
ca1d1d23 1/* String search routines for GNU Emacs.
31c8f881 2 Copyright (C) 1985, 86, 87, 93, 94, 97, 1998 Free Software Foundation, Inc.
ca1d1d23
JB
3
4This file is part of GNU Emacs.
5
6GNU Emacs is free software; you can redistribute it and/or modify
7it under the terms of the GNU General Public License as published by
7c938215 8the Free Software Foundation; either version 2, or (at your option)
ca1d1d23
JB
9any later version.
10
11GNU Emacs is distributed in the hope that it will be useful,
12but WITHOUT ANY WARRANTY; without even the implied warranty of
13MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14GNU General Public License for more details.
15
16You should have received a copy of the GNU General Public License
17along with GNU Emacs; see the file COPYING. If not, write to
3b7ad313
EN
18the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
19Boston, MA 02111-1307, USA. */
ca1d1d23
JB
20
21
18160b98 22#include <config.h>
ca1d1d23
JB
23#include "lisp.h"
24#include "syntax.h"
5679531d 25#include "category.h"
ca1d1d23 26#include "buffer.h"
5679531d 27#include "charset.h"
9169c321 28#include "region-cache.h"
ca1d1d23 29#include "commands.h"
9ac0d9e0 30#include "blockinput.h"
bf1760bb 31#include "intervals.h"
4746118a 32
ca1d1d23
JB
33#include <sys/types.h>
34#include "regex.h"
35
1d288aef 36#define REGEXP_CACHE_SIZE 20
ca1d1d23 37
487282dc
KH
38/* If the regexp is non-nil, then the buffer contains the compiled form
39 of that regexp, suitable for searching. */
1d288aef
RS
40struct regexp_cache
41{
487282dc
KH
42 struct regexp_cache *next;
43 Lisp_Object regexp;
44 struct re_pattern_buffer buf;
45 char fastmap[0400];
b819a390
RS
46 /* Nonzero means regexp was compiled to do full POSIX backtracking. */
47 char posix;
487282dc 48};
ca1d1d23 49
487282dc
KH
50/* The instances of that struct. */
51struct regexp_cache searchbufs[REGEXP_CACHE_SIZE];
ca1d1d23 52
487282dc
KH
53/* The head of the linked list; points to the most recently used buffer. */
54struct regexp_cache *searchbuf_head;
ca1d1d23 55
ca1d1d23 56
4746118a
JB
57/* Every call to re_match, etc., must pass &search_regs as the regs
58 argument unless you can show it is unnecessary (i.e., if re_match
59 is certainly going to be called again before region-around-match
60 can be called).
61
62 Since the registers are now dynamically allocated, we need to make
63 sure not to refer to the Nth register before checking that it has
1113d9db
JB
64 been allocated by checking search_regs.num_regs.
65
66 The regex code keeps track of whether it has allocated the search
487282dc
KH
67 buffer using bits in the re_pattern_buffer. This means that whenever
68 you compile a new pattern, it completely forgets whether it has
1113d9db
JB
69 allocated any registers, and will allocate new registers the next
70 time you call a searching or matching function. Therefore, we need
71 to call re_set_registers after compiling a new pattern or after
72 setting the match registers, so that the regex functions will be
73 able to free or re-allocate it properly. */
ca1d1d23
JB
74static struct re_registers search_regs;
75
daa37602
JB
76/* The buffer in which the last search was performed, or
77 Qt if the last search was done in a string;
78 Qnil if no searching has been done yet. */
79static Lisp_Object last_thing_searched;
ca1d1d23 80
8e6208c5 81/* error condition signaled when regexp compile_pattern fails */
ca1d1d23
JB
82
83Lisp_Object Qinvalid_regexp;
84
ca325161 85static void set_search_regs ();
044f81f1 86static void save_search_regs ();
facdc750
RS
87static int simple_search ();
88static int boyer_moore ();
b819a390
RS
89static int search_buffer ();
90
ca1d1d23
JB
91static void
92matcher_overflow ()
93{
94 error ("Stack overflow in regexp matcher");
95}
96
97#ifdef __STDC__
98#define CONST const
99#else
100#define CONST
101#endif
102
b819a390
RS
103/* Compile a regexp and signal a Lisp error if anything goes wrong.
104 PATTERN is the pattern to compile.
105 CP is the place to put the result.
facdc750 106 TRANSLATE is a translation table for ignoring case, or nil for none.
b819a390
RS
107 REGP is the structure that says where to store the "register"
108 values that will result from matching this pattern.
109 If it is 0, we should compile the pattern not to record any
110 subexpression bounds.
111 POSIX is nonzero if we want full backtracking (POSIX style)
5679531d
KH
112 for this pattern. 0 means backtrack only enough to get a valid match.
113 MULTIBYTE is nonzero if we want to handle multibyte characters in
114 PATTERN. 0 means all multibyte characters are recognized just as
115 sequences of binary data. */
ca1d1d23 116
487282dc 117static void
5679531d 118compile_pattern_1 (cp, pattern, translate, regp, posix, multibyte)
487282dc 119 struct regexp_cache *cp;
ca1d1d23 120 Lisp_Object pattern;
facdc750 121 Lisp_Object translate;
487282dc 122 struct re_registers *regp;
b819a390 123 int posix;
5679531d 124 int multibyte;
ca1d1d23 125{
f8bd51c4
KH
126 char *raw_pattern;
127 int raw_pattern_size;
d451e4db 128 char *val;
b819a390 129 reg_syntax_t old;
ca1d1d23 130
f8bd51c4
KH
131 /* MULTIBYTE says whether the text to be searched is multibyte.
132 We must convert PATTERN to match that, or we will not really
133 find things right. */
134
135 if (multibyte == STRING_MULTIBYTE (pattern))
136 {
137 raw_pattern = (char *) XSTRING (pattern)->data;
138 raw_pattern_size = XSTRING (pattern)->size_byte;
139 }
140 else if (multibyte)
141 {
142 raw_pattern_size = count_size_as_multibyte (XSTRING (pattern)->data,
143 XSTRING (pattern)->size);
144 raw_pattern = (char *) alloca (raw_pattern_size + 1);
145 copy_text (XSTRING (pattern)->data, raw_pattern,
146 XSTRING (pattern)->size, 0, 1);
147 }
148 else
149 {
150 /* Converting multibyte to single-byte.
151
152 ??? Perhaps this conversion should be done in a special way
153 by subtracting nonascii-insert-offset from each non-ASCII char,
154 so that only the multibyte chars which really correspond to
155 the chosen single-byte character set can possibly match. */
156 raw_pattern_size = XSTRING (pattern)->size;
157 raw_pattern = (char *) alloca (raw_pattern_size + 1);
158 copy_text (XSTRING (pattern)->data, raw_pattern,
facdc750 159 XSTRING (pattern)->size_byte, 1, 0);
f8bd51c4
KH
160 }
161
487282dc 162 cp->regexp = Qnil;
facdc750 163 cp->buf.translate = (! NILP (translate) ? translate : 0);
b819a390 164 cp->posix = posix;
5679531d 165 cp->buf.multibyte = multibyte;
9ac0d9e0 166 BLOCK_INPUT;
b819a390
RS
167 old = re_set_syntax (RE_SYNTAX_EMACS
168 | (posix ? 0 : RE_NO_POSIX_BACKTRACKING));
f8bd51c4 169 val = (char *) re_compile_pattern (raw_pattern, raw_pattern_size, &cp->buf);
b819a390 170 re_set_syntax (old);
9ac0d9e0 171 UNBLOCK_INPUT;
ca1d1d23 172 if (val)
487282dc 173 Fsignal (Qinvalid_regexp, Fcons (build_string (val), Qnil));
1113d9db 174
487282dc 175 cp->regexp = Fcopy_sequence (pattern);
487282dc
KH
176}
177
178/* Compile a regexp if necessary, but first check to see if there's one in
b819a390
RS
179 the cache.
180 PATTERN is the pattern to compile.
facdc750 181 TRANSLATE is a translation table for ignoring case, or nil for none.
b819a390
RS
182 REGP is the structure that says where to store the "register"
183 values that will result from matching this pattern.
184 If it is 0, we should compile the pattern not to record any
185 subexpression bounds.
186 POSIX is nonzero if we want full backtracking (POSIX style)
187 for this pattern. 0 means backtrack only enough to get a valid match. */
487282dc
KH
188
189struct re_pattern_buffer *
0c8533c6 190compile_pattern (pattern, regp, translate, posix, multibyte)
487282dc
KH
191 Lisp_Object pattern;
192 struct re_registers *regp;
facdc750 193 Lisp_Object translate;
0c8533c6 194 int posix, multibyte;
487282dc
KH
195{
196 struct regexp_cache *cp, **cpp;
197
198 for (cpp = &searchbuf_head; ; cpp = &cp->next)
199 {
200 cp = *cpp;
1d288aef
RS
201 if (XSTRING (cp->regexp)->size == XSTRING (pattern)->size
202 && !NILP (Fstring_equal (cp->regexp, pattern))
facdc750 203 && cp->buf.translate == (! NILP (translate) ? translate : 0)
5679531d
KH
204 && cp->posix == posix
205 && cp->buf.multibyte == multibyte)
487282dc
KH
206 break;
207
208 /* If we're at the end of the cache, compile into the last cell. */
209 if (cp->next == 0)
210 {
5679531d 211 compile_pattern_1 (cp, pattern, translate, regp, posix, multibyte);
487282dc
KH
212 break;
213 }
214 }
215
216 /* When we get here, cp (aka *cpp) contains the compiled pattern,
217 either because we found it in the cache or because we just compiled it.
218 Move it to the front of the queue to mark it as most recently used. */
219 *cpp = cp->next;
220 cp->next = searchbuf_head;
221 searchbuf_head = cp;
1113d9db 222
6639708c
RS
223 /* Advise the searching functions about the space we have allocated
224 for register data. */
225 if (regp)
226 re_set_registers (&cp->buf, regp, regp->num_regs, regp->start, regp->end);
227
487282dc 228 return &cp->buf;
ca1d1d23
JB
229}
230
231/* Error condition used for failing searches */
232Lisp_Object Qsearch_failed;
233
234Lisp_Object
235signal_failure (arg)
236 Lisp_Object arg;
237{
238 Fsignal (Qsearch_failed, Fcons (arg, Qnil));
239 return Qnil;
240}
241\f
b819a390
RS
242static Lisp_Object
243looking_at_1 (string, posix)
ca1d1d23 244 Lisp_Object string;
b819a390 245 int posix;
ca1d1d23
JB
246{
247 Lisp_Object val;
248 unsigned char *p1, *p2;
249 int s1, s2;
250 register int i;
487282dc 251 struct re_pattern_buffer *bufp;
ca1d1d23 252
7074fde6
FP
253 if (running_asynch_code)
254 save_search_regs ();
255
ca1d1d23 256 CHECK_STRING (string, 0);
487282dc
KH
257 bufp = compile_pattern (string, &search_regs,
258 (!NILP (current_buffer->case_fold_search)
facdc750 259 ? DOWNCASE_TABLE : Qnil),
0c8533c6
RS
260 posix,
261 !NILP (current_buffer->enable_multibyte_characters));
ca1d1d23
JB
262
263 immediate_quit = 1;
264 QUIT; /* Do a pending quit right away, to avoid paradoxical behavior */
265
266 /* Get pointers and sizes of the two strings
267 that make up the visible portion of the buffer. */
268
269 p1 = BEGV_ADDR;
fa8ed3e0 270 s1 = GPT_BYTE - BEGV_BYTE;
ca1d1d23 271 p2 = GAP_END_ADDR;
fa8ed3e0 272 s2 = ZV_BYTE - GPT_BYTE;
ca1d1d23
JB
273 if (s1 < 0)
274 {
275 p2 = p1;
fa8ed3e0 276 s2 = ZV_BYTE - BEGV_BYTE;
ca1d1d23
JB
277 s1 = 0;
278 }
279 if (s2 < 0)
280 {
fa8ed3e0 281 s1 = ZV_BYTE - BEGV_BYTE;
ca1d1d23
JB
282 s2 = 0;
283 }
8bb43c28
RS
284
285 re_match_object = Qnil;
ca1d1d23 286
487282dc 287 i = re_match_2 (bufp, (char *) p1, s1, (char *) p2, s2,
fa8ed3e0
RS
288 PT_BYTE - BEGV_BYTE, &search_regs,
289 ZV_BYTE - BEGV_BYTE);
ca1d1d23
JB
290 if (i == -2)
291 matcher_overflow ();
292
293 val = (0 <= i ? Qt : Qnil);
fa8ed3e0
RS
294 if (i >= 0)
295 for (i = 0; i < search_regs.num_regs; i++)
296 if (search_regs.start[i] >= 0)
297 {
298 search_regs.start[i]
299 = BYTE_TO_CHAR (search_regs.start[i] + BEGV_BYTE);
300 search_regs.end[i]
301 = BYTE_TO_CHAR (search_regs.end[i] + BEGV_BYTE);
302 }
a3668d92 303 XSETBUFFER (last_thing_searched, current_buffer);
ca1d1d23
JB
304 immediate_quit = 0;
305 return val;
306}
307
b819a390 308DEFUN ("looking-at", Flooking_at, Slooking_at, 1, 1, 0,
94f94972 309 "Return t if text after point matches regular expression REGEXP.\n\
b819a390
RS
310This function modifies the match data that `match-beginning',\n\
311`match-end' and `match-data' access; save and restore the match\n\
312data if you want to preserve them.")
94f94972
RS
313 (regexp)
314 Lisp_Object regexp;
b819a390 315{
94f94972 316 return looking_at_1 (regexp, 0);
b819a390
RS
317}
318
319DEFUN ("posix-looking-at", Fposix_looking_at, Sposix_looking_at, 1, 1, 0,
94f94972 320 "Return t if text after point matches regular expression REGEXP.\n\
b819a390
RS
321Find the longest match, in accord with Posix regular expression rules.\n\
322This function modifies the match data that `match-beginning',\n\
323`match-end' and `match-data' access; save and restore the match\n\
324data if you want to preserve them.")
94f94972
RS
325 (regexp)
326 Lisp_Object regexp;
b819a390 327{
94f94972 328 return looking_at_1 (regexp, 1);
b819a390
RS
329}
330\f
331static Lisp_Object
332string_match_1 (regexp, string, start, posix)
ca1d1d23 333 Lisp_Object regexp, string, start;
b819a390 334 int posix;
ca1d1d23
JB
335{
336 int val;
487282dc 337 struct re_pattern_buffer *bufp;
0c8533c6
RS
338 int pos, pos_byte;
339 int i;
ca1d1d23 340
7074fde6
FP
341 if (running_asynch_code)
342 save_search_regs ();
343
ca1d1d23
JB
344 CHECK_STRING (regexp, 0);
345 CHECK_STRING (string, 1);
346
347 if (NILP (start))
0c8533c6 348 pos = 0, pos_byte = 0;
ca1d1d23
JB
349 else
350 {
351 int len = XSTRING (string)->size;
352
353 CHECK_NUMBER (start, 2);
0c8533c6
RS
354 pos = XINT (start);
355 if (pos < 0 && -pos <= len)
356 pos = len + pos;
357 else if (0 > pos || pos > len)
ca1d1d23 358 args_out_of_range (string, start);
0c8533c6 359 pos_byte = string_char_to_byte (string, pos);
ca1d1d23
JB
360 }
361
487282dc
KH
362 bufp = compile_pattern (regexp, &search_regs,
363 (!NILP (current_buffer->case_fold_search)
facdc750 364 ? DOWNCASE_TABLE : Qnil),
0c8533c6
RS
365 posix,
366 STRING_MULTIBYTE (string));
ca1d1d23 367 immediate_quit = 1;
8bb43c28
RS
368 re_match_object = string;
369
487282dc 370 val = re_search (bufp, (char *) XSTRING (string)->data,
0c8533c6
RS
371 XSTRING (string)->size_byte, pos_byte,
372 XSTRING (string)->size_byte - pos_byte,
ca1d1d23
JB
373 &search_regs);
374 immediate_quit = 0;
daa37602 375 last_thing_searched = Qt;
ca1d1d23
JB
376 if (val == -2)
377 matcher_overflow ();
378 if (val < 0) return Qnil;
0c8533c6
RS
379
380 for (i = 0; i < search_regs.num_regs; i++)
381 if (search_regs.start[i] >= 0)
382 {
383 search_regs.start[i]
384 = string_byte_to_char (string, search_regs.start[i]);
385 search_regs.end[i]
386 = string_byte_to_char (string, search_regs.end[i]);
387 }
388
389 return make_number (string_byte_to_char (string, val));
ca1d1d23 390}
e59a8453 391
b819a390
RS
392DEFUN ("string-match", Fstring_match, Sstring_match, 2, 3, 0,
393 "Return index of start of first match for REGEXP in STRING, or nil.\n\
394If third arg START is non-nil, start search at that index in STRING.\n\
395For index of first char beyond the match, do (match-end 0).\n\
396`match-end' and `match-beginning' also give indices of substrings\n\
397matched by parenthesis constructs in the pattern.")
398 (regexp, string, start)
399 Lisp_Object regexp, string, start;
400{
401 return string_match_1 (regexp, string, start, 0);
402}
403
404DEFUN ("posix-string-match", Fposix_string_match, Sposix_string_match, 2, 3, 0,
405 "Return index of start of first match for REGEXP in STRING, or nil.\n\
406Find the longest match, in accord with Posix regular expression rules.\n\
407If third arg START is non-nil, start search at that index in STRING.\n\
408For index of first char beyond the match, do (match-end 0).\n\
409`match-end' and `match-beginning' also give indices of substrings\n\
410matched by parenthesis constructs in the pattern.")
411 (regexp, string, start)
412 Lisp_Object regexp, string, start;
413{
414 return string_match_1 (regexp, string, start, 1);
415}
416
e59a8453
RS
417/* Match REGEXP against STRING, searching all of STRING,
418 and return the index of the match, or negative on failure.
419 This does not clobber the match data. */
420
421int
422fast_string_match (regexp, string)
423 Lisp_Object regexp, string;
424{
425 int val;
487282dc 426 struct re_pattern_buffer *bufp;
e59a8453 427
facdc750
RS
428 bufp = compile_pattern (regexp, 0, Qnil,
429 0, STRING_MULTIBYTE (string));
e59a8453 430 immediate_quit = 1;
8bb43c28
RS
431 re_match_object = string;
432
487282dc 433 val = re_search (bufp, (char *) XSTRING (string)->data,
4996330b 434 XSTRING (string)->size_byte, 0, XSTRING (string)->size_byte,
e59a8453
RS
435 0);
436 immediate_quit = 0;
437 return val;
438}
5679531d
KH
439
440/* Match REGEXP against STRING, searching all of STRING ignoring case,
441 and return the index of the match, or negative on failure.
0c8533c6
RS
442 This does not clobber the match data.
443 We assume that STRING contains single-byte characters. */
5679531d
KH
444
445extern Lisp_Object Vascii_downcase_table;
446
447int
b4577c63 448fast_c_string_match_ignore_case (regexp, string)
5679531d
KH
449 Lisp_Object regexp;
450 char *string;
451{
452 int val;
453 struct re_pattern_buffer *bufp;
454 int len = strlen (string);
455
0c8533c6 456 regexp = string_make_unibyte (regexp);
b4577c63 457 re_match_object = Qt;
5679531d 458 bufp = compile_pattern (regexp, 0,
facdc750 459 Vascii_downcase_table, 0,
f8bd51c4 460 0);
5679531d
KH
461 immediate_quit = 1;
462 val = re_search (bufp, string, len, 0, len, 0);
463 immediate_quit = 0;
464 return val;
465}
ca1d1d23 466\f
9169c321
JB
467/* max and min. */
468
469static int
470max (a, b)
471 int a, b;
472{
473 return ((a > b) ? a : b);
474}
475
476static int
477min (a, b)
478 int a, b;
479{
480 return ((a < b) ? a : b);
481}
482
483\f
484/* The newline cache: remembering which sections of text have no newlines. */
485
486/* If the user has requested newline caching, make sure it's on.
487 Otherwise, make sure it's off.
488 This is our cheezy way of associating an action with the change of
489 state of a buffer-local variable. */
490static void
491newline_cache_on_off (buf)
492 struct buffer *buf;
493{
494 if (NILP (buf->cache_long_line_scans))
495 {
496 /* It should be off. */
497 if (buf->newline_cache)
498 {
499 free_region_cache (buf->newline_cache);
500 buf->newline_cache = 0;
501 }
502 }
503 else
504 {
505 /* It should be on. */
506 if (buf->newline_cache == 0)
507 buf->newline_cache = new_region_cache ();
508 }
509}
510
511\f
512/* Search for COUNT instances of the character TARGET between START and END.
513
514 If COUNT is positive, search forwards; END must be >= START.
515 If COUNT is negative, search backwards for the -COUNTth instance;
516 END must be <= START.
517 If COUNT is zero, do anything you please; run rogue, for all I care.
518
519 If END is zero, use BEGV or ZV instead, as appropriate for the
520 direction indicated by COUNT.
ffd56f97
JB
521
522 If we find COUNT instances, set *SHORTAGE to zero, and return the
5bfe95c9
RS
523 position after the COUNTth match. Note that for reverse motion
524 this is not the same as the usual convention for Emacs motion commands.
ffd56f97 525
9169c321
JB
526 If we don't find COUNT instances before reaching END, set *SHORTAGE
527 to the number of TARGETs left unfound, and return END.
ffd56f97 528
087a5f81
RS
529 If ALLOW_QUIT is non-zero, set immediate_quit. That's good to do
530 except when inside redisplay. */
531
9169c321
JB
532scan_buffer (target, start, end, count, shortage, allow_quit)
533 register int target;
534 int start, end;
535 int count;
536 int *shortage;
087a5f81 537 int allow_quit;
ca1d1d23 538{
9169c321
JB
539 struct region_cache *newline_cache;
540 int direction;
ffd56f97 541
9169c321
JB
542 if (count > 0)
543 {
544 direction = 1;
545 if (! end) end = ZV;
546 }
547 else
548 {
549 direction = -1;
550 if (! end) end = BEGV;
551 }
ffd56f97 552
9169c321
JB
553 newline_cache_on_off (current_buffer);
554 newline_cache = current_buffer->newline_cache;
ca1d1d23
JB
555
556 if (shortage != 0)
557 *shortage = 0;
558
087a5f81 559 immediate_quit = allow_quit;
ca1d1d23 560
ffd56f97 561 if (count > 0)
9169c321 562 while (start != end)
ca1d1d23 563 {
9169c321
JB
564 /* Our innermost scanning loop is very simple; it doesn't know
565 about gaps, buffer ends, or the newline cache. ceiling is
566 the position of the last character before the next such
567 obstacle --- the last character the dumb search loop should
568 examine. */
fa8ed3e0
RS
569 int ceiling_byte = CHAR_TO_BYTE (end) - 1;
570 int start_byte = CHAR_TO_BYTE (start);
9169c321
JB
571
572 /* If we're looking for a newline, consult the newline cache
573 to see where we can avoid some scanning. */
574 if (target == '\n' && newline_cache)
575 {
576 int next_change;
577 immediate_quit = 0;
578 while (region_cache_forward
fa8ed3e0
RS
579 (current_buffer, newline_cache, start_byte, &next_change))
580 start_byte = next_change;
cbe0db0d 581 immediate_quit = allow_quit;
9169c321 582
fa8ed3e0
RS
583 /* START should never be after END. */
584 if (start_byte > ceiling_byte)
585 start_byte = ceiling_byte;
9169c321
JB
586
587 /* Now the text after start is an unknown region, and
588 next_change is the position of the next known region. */
fa8ed3e0 589 ceiling_byte = min (next_change - 1, ceiling_byte);
9169c321
JB
590 }
591
592 /* The dumb loop can only scan text stored in contiguous
593 bytes. BUFFER_CEILING_OF returns the last character
594 position that is contiguous, so the ceiling is the
595 position after that. */
fa8ed3e0 596 ceiling_byte = min (BUFFER_CEILING_OF (start_byte), ceiling_byte);
9169c321
JB
597
598 {
599 /* The termination address of the dumb loop. */
fa8ed3e0
RS
600 register unsigned char *ceiling_addr
601 = BYTE_POS_ADDR (ceiling_byte) + 1;
602 register unsigned char *cursor
603 = BYTE_POS_ADDR (start_byte);
9169c321
JB
604 unsigned char *base = cursor;
605
606 while (cursor < ceiling_addr)
607 {
608 unsigned char *scan_start = cursor;
609
610 /* The dumb loop. */
611 while (*cursor != target && ++cursor < ceiling_addr)
612 ;
613
614 /* If we're looking for newlines, cache the fact that
615 the region from start to cursor is free of them. */
616 if (target == '\n' && newline_cache)
617 know_region_cache (current_buffer, newline_cache,
fa8ed3e0
RS
618 start_byte + scan_start - base,
619 start_byte + cursor - base);
9169c321
JB
620
621 /* Did we find the target character? */
622 if (cursor < ceiling_addr)
623 {
624 if (--count == 0)
625 {
626 immediate_quit = 0;
fa8ed3e0 627 return BYTE_TO_CHAR (start_byte + cursor - base + 1);
9169c321
JB
628 }
629 cursor++;
630 }
631 }
632
fa8ed3e0 633 start = BYTE_TO_CHAR (start_byte + cursor - base);
9169c321 634 }
ca1d1d23
JB
635 }
636 else
9169c321
JB
637 while (start > end)
638 {
639 /* The last character to check before the next obstacle. */
fa8ed3e0
RS
640 int ceiling_byte = CHAR_TO_BYTE (end);
641 int start_byte = CHAR_TO_BYTE (start);
9169c321
JB
642
643 /* Consult the newline cache, if appropriate. */
644 if (target == '\n' && newline_cache)
645 {
646 int next_change;
647 immediate_quit = 0;
648 while (region_cache_backward
fa8ed3e0
RS
649 (current_buffer, newline_cache, start_byte, &next_change))
650 start_byte = next_change;
cbe0db0d 651 immediate_quit = allow_quit;
9169c321
JB
652
653 /* Start should never be at or before end. */
fa8ed3e0
RS
654 if (start_byte <= ceiling_byte)
655 start_byte = ceiling_byte + 1;
9169c321
JB
656
657 /* Now the text before start is an unknown region, and
658 next_change is the position of the next known region. */
fa8ed3e0 659 ceiling_byte = max (next_change, ceiling_byte);
9169c321
JB
660 }
661
662 /* Stop scanning before the gap. */
fa8ed3e0 663 ceiling_byte = max (BUFFER_FLOOR_OF (start_byte - 1), ceiling_byte);
9169c321
JB
664
665 {
666 /* The termination address of the dumb loop. */
fa8ed3e0
RS
667 register unsigned char *ceiling_addr = BYTE_POS_ADDR (ceiling_byte);
668 register unsigned char *cursor = BYTE_POS_ADDR (start_byte - 1);
9169c321
JB
669 unsigned char *base = cursor;
670
671 while (cursor >= ceiling_addr)
672 {
673 unsigned char *scan_start = cursor;
674
675 while (*cursor != target && --cursor >= ceiling_addr)
676 ;
677
678 /* If we're looking for newlines, cache the fact that
679 the region from after the cursor to start is free of them. */
680 if (target == '\n' && newline_cache)
681 know_region_cache (current_buffer, newline_cache,
fa8ed3e0
RS
682 start_byte + cursor - base,
683 start_byte + scan_start - base);
9169c321
JB
684
685 /* Did we find the target character? */
686 if (cursor >= ceiling_addr)
687 {
688 if (++count >= 0)
689 {
690 immediate_quit = 0;
fa8ed3e0 691 return BYTE_TO_CHAR (start_byte + cursor - base);
9169c321
JB
692 }
693 cursor--;
694 }
695 }
696
fa8ed3e0 697 start = BYTE_TO_CHAR (start_byte + cursor - base);
9169c321
JB
698 }
699 }
700
ca1d1d23
JB
701 immediate_quit = 0;
702 if (shortage != 0)
ffd56f97 703 *shortage = count * direction;
9169c321 704 return start;
ca1d1d23 705}
fa8ed3e0
RS
706\f
707/* Search for COUNT instances of a line boundary, which means either a
708 newline or (if selective display enabled) a carriage return.
709 Start at START. If COUNT is negative, search backwards.
710
711 We report the resulting position by calling TEMP_SET_PT_BOTH.
712
713 If we find COUNT instances. we position after (always after,
714 even if scanning backwards) the COUNTth match, and return 0.
715
716 If we don't find COUNT instances before reaching the end of the
717 buffer (or the beginning, if scanning backwards), we return
718 the number of line boundaries left unfound, and position at
719 the limit we bumped up against.
720
721 If ALLOW_QUIT is non-zero, set immediate_quit. That's good to do
d5d57b92 722 except in special cases. */
ca1d1d23 723
63fa018d 724int
fa8ed3e0
RS
725scan_newline (start, start_byte, limit, limit_byte, count, allow_quit)
726 int start, start_byte;
727 int limit, limit_byte;
728 register int count;
729 int allow_quit;
63fa018d 730{
fa8ed3e0
RS
731 int direction = ((count > 0) ? 1 : -1);
732
733 register unsigned char *cursor;
734 unsigned char *base;
735
736 register int ceiling;
737 register unsigned char *ceiling_addr;
738
d5d57b92
RS
739 int old_immediate_quit = immediate_quit;
740
fa8ed3e0
RS
741 /* If we are not in selective display mode,
742 check only for newlines. */
743 int selective_display = (!NILP (current_buffer->selective_display)
744 && !INTEGERP (current_buffer->selective_display));
745
746 /* The code that follows is like scan_buffer
747 but checks for either newline or carriage return. */
748
d5d57b92
RS
749 if (allow_quit)
750 immediate_quit++;
fa8ed3e0
RS
751
752 start_byte = CHAR_TO_BYTE (start);
753
754 if (count > 0)
755 {
756 while (start_byte < limit_byte)
757 {
758 ceiling = BUFFER_CEILING_OF (start_byte);
759 ceiling = min (limit_byte - 1, ceiling);
760 ceiling_addr = BYTE_POS_ADDR (ceiling) + 1;
761 base = (cursor = BYTE_POS_ADDR (start_byte));
762 while (1)
763 {
764 while (*cursor != '\n' && ++cursor != ceiling_addr)
765 ;
766
767 if (cursor != ceiling_addr)
768 {
769 if (--count == 0)
770 {
d5d57b92 771 immediate_quit = old_immediate_quit;
fa8ed3e0
RS
772 start_byte = start_byte + cursor - base + 1;
773 start = BYTE_TO_CHAR (start_byte);
774 TEMP_SET_PT_BOTH (start, start_byte);
775 return 0;
776 }
777 else
778 if (++cursor == ceiling_addr)
779 break;
780 }
781 else
782 break;
783 }
784 start_byte += cursor - base;
785 }
786 }
787 else
788 {
fa8ed3e0
RS
789 while (start_byte > limit_byte)
790 {
791 ceiling = BUFFER_FLOOR_OF (start_byte - 1);
792 ceiling = max (limit_byte, ceiling);
793 ceiling_addr = BYTE_POS_ADDR (ceiling) - 1;
794 base = (cursor = BYTE_POS_ADDR (start_byte - 1) + 1);
795 while (1)
796 {
797 while (--cursor != ceiling_addr && *cursor != '\n')
798 ;
799
800 if (cursor != ceiling_addr)
801 {
802 if (++count == 0)
803 {
d5d57b92 804 immediate_quit = old_immediate_quit;
fa8ed3e0
RS
805 /* Return the position AFTER the match we found. */
806 start_byte = start_byte + cursor - base + 1;
807 start = BYTE_TO_CHAR (start_byte);
808 TEMP_SET_PT_BOTH (start, start_byte);
809 return 0;
810 }
811 }
812 else
813 break;
814 }
815 /* Here we add 1 to compensate for the last decrement
816 of CURSOR, which took it past the valid range. */
817 start_byte += cursor - base + 1;
818 }
819 }
820
821 TEMP_SET_PT_BOTH (limit, limit_byte);
d5d57b92 822 immediate_quit = old_immediate_quit;
fa8ed3e0
RS
823
824 return count * direction;
63fa018d
RS
825}
826
ca1d1d23 827int
fa8ed3e0 828find_next_newline_no_quit (from, cnt)
ca1d1d23
JB
829 register int from, cnt;
830{
fa8ed3e0 831 return scan_buffer ('\n', from, 0, cnt, (int *) 0, 0);
9169c321
JB
832}
833
9169c321
JB
834/* Like find_next_newline, but returns position before the newline,
835 not after, and only search up to TO. This isn't just
836 find_next_newline (...)-1, because you might hit TO. */
fa8ed3e0 837
9169c321
JB
838int
839find_before_next_newline (from, to, cnt)
cbe0db0d 840 int from, to, cnt;
9169c321
JB
841{
842 int shortage;
843 int pos = scan_buffer ('\n', from, to, cnt, &shortage, 1);
844
845 if (shortage == 0)
846 pos--;
847
848 return pos;
ca1d1d23
JB
849}
850\f
ca1d1d23
JB
851/* Subroutines of Lisp buffer search functions. */
852
853static Lisp_Object
b819a390 854search_command (string, bound, noerror, count, direction, RE, posix)
ca1d1d23
JB
855 Lisp_Object string, bound, noerror, count;
856 int direction;
857 int RE;
b819a390 858 int posix;
ca1d1d23
JB
859{
860 register int np;
9f43ad85 861 int lim, lim_byte;
ca1d1d23
JB
862 int n = direction;
863
864 if (!NILP (count))
865 {
866 CHECK_NUMBER (count, 3);
867 n *= XINT (count);
868 }
869
870 CHECK_STRING (string, 0);
871 if (NILP (bound))
9f43ad85
RS
872 {
873 if (n > 0)
874 lim = ZV, lim_byte = ZV_BYTE;
875 else
876 lim = BEGV, lim_byte = BEGV_BYTE;
877 }
ca1d1d23
JB
878 else
879 {
880 CHECK_NUMBER_COERCE_MARKER (bound, 1);
881 lim = XINT (bound);
6ec8bbd2 882 if (n > 0 ? lim < PT : lim > PT)
ca1d1d23
JB
883 error ("Invalid search bound (wrong side of point)");
884 if (lim > ZV)
9f43ad85 885 lim = ZV, lim_byte = ZV_BYTE;
588d2fd5 886 else if (lim < BEGV)
9f43ad85 887 lim = BEGV, lim_byte = BEGV_BYTE;
588d2fd5
KH
888 else
889 lim_byte = CHAR_TO_BYTE (lim);
ca1d1d23
JB
890 }
891
9f43ad85 892 np = search_buffer (string, PT, PT_BYTE, lim, lim_byte, n, RE,
ca1d1d23 893 (!NILP (current_buffer->case_fold_search)
facdc750 894 ? current_buffer->case_canon_table
3135e9fd 895 : Qnil),
ca1d1d23 896 (!NILP (current_buffer->case_fold_search)
facdc750 897 ? current_buffer->case_eqv_table
3135e9fd 898 : Qnil),
b819a390 899 posix);
ca1d1d23
JB
900 if (np <= 0)
901 {
902 if (NILP (noerror))
903 return signal_failure (string);
904 if (!EQ (noerror, Qt))
905 {
906 if (lim < BEGV || lim > ZV)
907 abort ();
9f43ad85 908 SET_PT_BOTH (lim, lim_byte);
a5f217b8
RS
909 return Qnil;
910#if 0 /* This would be clean, but maybe programs depend on
911 a value of nil here. */
481399bf 912 np = lim;
a5f217b8 913#endif
ca1d1d23 914 }
481399bf
RS
915 else
916 return Qnil;
ca1d1d23
JB
917 }
918
919 if (np < BEGV || np > ZV)
920 abort ();
921
922 SET_PT (np);
923
924 return make_number (np);
925}
926\f
fa8ed3e0
RS
927/* Return 1 if REGEXP it matches just one constant string. */
928
b6d6a51c
KH
929static int
930trivial_regexp_p (regexp)
931 Lisp_Object regexp;
932{
4996330b 933 int len = XSTRING (regexp)->size_byte;
b6d6a51c
KH
934 unsigned char *s = XSTRING (regexp)->data;
935 unsigned char c;
936 while (--len >= 0)
937 {
938 switch (*s++)
939 {
940 case '.': case '*': case '+': case '?': case '[': case '^': case '$':
941 return 0;
942 case '\\':
943 if (--len < 0)
944 return 0;
945 switch (*s++)
946 {
947 case '|': case '(': case ')': case '`': case '\'': case 'b':
948 case 'B': case '<': case '>': case 'w': case 'W': case 's':
866f60fd 949 case 'S': case '=':
5679531d 950 case 'c': case 'C': /* for categoryspec and notcategoryspec */
866f60fd 951 case '1': case '2': case '3': case '4': case '5':
b6d6a51c
KH
952 case '6': case '7': case '8': case '9':
953 return 0;
954 }
955 }
956 }
957 return 1;
958}
959
ca325161 960/* Search for the n'th occurrence of STRING in the current buffer,
ca1d1d23 961 starting at position POS and stopping at position LIM,
b819a390 962 treating STRING as a literal string if RE is false or as
ca1d1d23
JB
963 a regular expression if RE is true.
964
965 If N is positive, searching is forward and LIM must be greater than POS.
966 If N is negative, searching is backward and LIM must be less than POS.
967
facdc750 968 Returns -x if x occurrences remain to be found (x > 0),
ca1d1d23 969 or else the position at the beginning of the Nth occurrence
b819a390
RS
970 (if searching backward) or the end (if searching forward).
971
972 POSIX is nonzero if we want full backtracking (POSIX style)
973 for this pattern. 0 means backtrack only enough to get a valid match. */
ca1d1d23 974
aff2ce94
RS
975#define TRANSLATE(out, trt, d) \
976do \
977 { \
978 if (! NILP (trt)) \
979 { \
980 Lisp_Object temp; \
981 temp = Faref (trt, make_number (d)); \
982 if (INTEGERP (temp)) \
983 out = XINT (temp); \
984 else \
985 out = d; \
986 } \
987 else \
988 out = d; \
989 } \
990while (0)
facdc750 991
b819a390 992static int
9f43ad85
RS
993search_buffer (string, pos, pos_byte, lim, lim_byte, n,
994 RE, trt, inverse_trt, posix)
ca1d1d23
JB
995 Lisp_Object string;
996 int pos;
9f43ad85 997 int pos_byte;
ca1d1d23 998 int lim;
9f43ad85 999 int lim_byte;
ca1d1d23
JB
1000 int n;
1001 int RE;
facdc750
RS
1002 Lisp_Object trt;
1003 Lisp_Object inverse_trt;
b819a390 1004 int posix;
ca1d1d23
JB
1005{
1006 int len = XSTRING (string)->size;
0c8533c6 1007 int len_byte = XSTRING (string)->size_byte;
facdc750 1008 register int i;
ca1d1d23 1009
7074fde6
FP
1010 if (running_asynch_code)
1011 save_search_regs ();
1012
ca1d1d23 1013 /* Null string is found at starting position. */
3f57a499 1014 if (len == 0)
ca325161
RS
1015 {
1016 set_search_regs (pos, 0);
1017 return pos;
1018 }
3f57a499
RS
1019
1020 /* Searching 0 times means don't move. */
1021 if (n == 0)
ca1d1d23
JB
1022 return pos;
1023
b6d6a51c 1024 if (RE && !trivial_regexp_p (string))
ca1d1d23 1025 {
facdc750
RS
1026 unsigned char *p1, *p2;
1027 int s1, s2;
487282dc
KH
1028 struct re_pattern_buffer *bufp;
1029
0c8533c6
RS
1030 bufp = compile_pattern (string, &search_regs, trt, posix,
1031 !NILP (current_buffer->enable_multibyte_characters));
ca1d1d23 1032
ca1d1d23
JB
1033 immediate_quit = 1; /* Quit immediately if user types ^G,
1034 because letting this function finish
1035 can take too long. */
1036 QUIT; /* Do a pending quit right away,
1037 to avoid paradoxical behavior */
1038 /* Get pointers and sizes of the two strings
1039 that make up the visible portion of the buffer. */
1040
1041 p1 = BEGV_ADDR;
fa8ed3e0 1042 s1 = GPT_BYTE - BEGV_BYTE;
ca1d1d23 1043 p2 = GAP_END_ADDR;
fa8ed3e0 1044 s2 = ZV_BYTE - GPT_BYTE;
ca1d1d23
JB
1045 if (s1 < 0)
1046 {
1047 p2 = p1;
fa8ed3e0 1048 s2 = ZV_BYTE - BEGV_BYTE;
ca1d1d23
JB
1049 s1 = 0;
1050 }
1051 if (s2 < 0)
1052 {
fa8ed3e0 1053 s1 = ZV_BYTE - BEGV_BYTE;
ca1d1d23
JB
1054 s2 = 0;
1055 }
8bb43c28
RS
1056 re_match_object = Qnil;
1057
ca1d1d23
JB
1058 while (n < 0)
1059 {
42db823b 1060 int val;
487282dc 1061 val = re_search_2 (bufp, (char *) p1, s1, (char *) p2, s2,
4996330b
KH
1062 pos_byte - BEGV_BYTE, lim_byte - pos_byte,
1063 &search_regs,
42db823b 1064 /* Don't allow match past current point */
4996330b 1065 pos_byte - BEGV_BYTE);
ca1d1d23 1066 if (val == -2)
b6d6a51c
KH
1067 {
1068 matcher_overflow ();
1069 }
ca1d1d23
JB
1070 if (val >= 0)
1071 {
26aff150 1072 pos_byte = search_regs.start[0] + BEGV_BYTE;
4746118a 1073 for (i = 0; i < search_regs.num_regs; i++)
ca1d1d23
JB
1074 if (search_regs.start[i] >= 0)
1075 {
fa8ed3e0
RS
1076 search_regs.start[i]
1077 = BYTE_TO_CHAR (search_regs.start[i] + BEGV_BYTE);
1078 search_regs.end[i]
1079 = BYTE_TO_CHAR (search_regs.end[i] + BEGV_BYTE);
ca1d1d23 1080 }
a3668d92 1081 XSETBUFFER (last_thing_searched, current_buffer);
ca1d1d23
JB
1082 /* Set pos to the new position. */
1083 pos = search_regs.start[0];
1084 }
1085 else
1086 {
1087 immediate_quit = 0;
1088 return (n);
1089 }
1090 n++;
1091 }
1092 while (n > 0)
1093 {
42db823b 1094 int val;
487282dc 1095 val = re_search_2 (bufp, (char *) p1, s1, (char *) p2, s2,
4996330b
KH
1096 pos_byte - BEGV_BYTE, lim_byte - pos_byte,
1097 &search_regs,
1098 lim_byte - BEGV_BYTE);
ca1d1d23 1099 if (val == -2)
b6d6a51c
KH
1100 {
1101 matcher_overflow ();
1102 }
ca1d1d23
JB
1103 if (val >= 0)
1104 {
26aff150 1105 pos_byte = search_regs.end[0] + BEGV_BYTE;
4746118a 1106 for (i = 0; i < search_regs.num_regs; i++)
ca1d1d23
JB
1107 if (search_regs.start[i] >= 0)
1108 {
fa8ed3e0
RS
1109 search_regs.start[i]
1110 = BYTE_TO_CHAR (search_regs.start[i] + BEGV_BYTE);
1111 search_regs.end[i]
1112 = BYTE_TO_CHAR (search_regs.end[i] + BEGV_BYTE);
ca1d1d23 1113 }
a3668d92 1114 XSETBUFFER (last_thing_searched, current_buffer);
ca1d1d23
JB
1115 pos = search_regs.end[0];
1116 }
1117 else
1118 {
1119 immediate_quit = 0;
1120 return (0 - n);
1121 }
1122 n--;
1123 }
1124 immediate_quit = 0;
1125 return (pos);
1126 }
1127 else /* non-RE case */
1128 {
facdc750
RS
1129 unsigned char *raw_pattern, *pat;
1130 int raw_pattern_size;
1131 int raw_pattern_size_byte;
1132 unsigned char *patbuf;
1133 int multibyte = !NILP (current_buffer->enable_multibyte_characters);
1134 unsigned char *base_pat = XSTRING (string)->data;
1135 int charset_base = -1;
1136 int simple = 1;
1137
1138 /* MULTIBYTE says whether the text to be searched is multibyte.
1139 We must convert PATTERN to match that, or we will not really
1140 find things right. */
1141
1142 if (multibyte == STRING_MULTIBYTE (string))
1143 {
1144 raw_pattern = (char *) XSTRING (string)->data;
1145 raw_pattern_size = XSTRING (string)->size;
1146 raw_pattern_size_byte = XSTRING (string)->size_byte;
1147 }
1148 else if (multibyte)
1149 {
1150 raw_pattern_size = XSTRING (string)->size;
1151 raw_pattern_size_byte
1152 = count_size_as_multibyte (XSTRING (string)->data,
1153 raw_pattern_size);
1154 raw_pattern = (char *) alloca (raw_pattern_size_byte + 1);
1155 copy_text (XSTRING (string)->data, raw_pattern,
1156 XSTRING (string)->size, 0, 1);
1157 }
1158 else
1159 {
1160 /* Converting multibyte to single-byte.
1161
1162 ??? Perhaps this conversion should be done in a special way
1163 by subtracting nonascii-insert-offset from each non-ASCII char,
1164 so that only the multibyte chars which really correspond to
1165 the chosen single-byte character set can possibly match. */
1166 raw_pattern_size = XSTRING (string)->size;
1167 raw_pattern_size_byte = XSTRING (string)->size;
1168 raw_pattern = (char *) alloca (raw_pattern_size + 1);
1169 copy_text (XSTRING (string)->data, raw_pattern,
1170 XSTRING (string)->size_byte, 1, 0);
1171 }
1172
1173 /* Copy and optionally translate the pattern. */
1174 len = raw_pattern_size;
1175 len_byte = raw_pattern_size_byte;
1176 patbuf = (unsigned char *) alloca (len_byte);
1177 pat = patbuf;
1178 base_pat = raw_pattern;
1179 if (multibyte)
1180 {
1181 while (--len >= 0)
1182 {
1183 unsigned char workbuf[4], *str;
aff2ce94 1184 int c, translated, inverse;
facdc750
RS
1185 int in_charlen, charlen;
1186
1187 /* If we got here and the RE flag is set, it's because we're
1188 dealing with a regexp known to be trivial, so the backslash
1189 just quotes the next character. */
1190 if (RE && *base_pat == '\\')
1191 {
1192 len--;
1193 len_byte--;
1194 base_pat++;
1195 }
1196
1197 c = STRING_CHAR_AND_LENGTH (base_pat, len_byte, in_charlen);
1198 /* Translate the character, if requested. */
aff2ce94 1199 TRANSLATE (translated, trt, c);
facdc750
RS
1200 /* If translation changed the byte-length, go back
1201 to the original character. */
1202 charlen = CHAR_STRING (translated, workbuf, str);
1203 if (in_charlen != charlen)
1204 {
1205 translated = c;
1206 charlen = CHAR_STRING (c, workbuf, str);
1207 }
1208
aff2ce94
RS
1209 TRANSLATE (inverse, inverse_trt, c);
1210
facdc750
RS
1211 /* Did this char actually get translated?
1212 Would any other char get translated into it? */
aff2ce94 1213 if (translated != c || inverse != c)
facdc750
RS
1214 {
1215 /* Keep track of which character set row
1216 contains the characters that need translation. */
1217 int charset_base_code = c & ~0xff;
1218 if (charset_base == -1)
1219 charset_base = charset_base_code;
1220 else if (charset_base != charset_base_code)
1221 /* If two different rows appear, needing translation,
1222 then we cannot use boyer_moore search. */
1223 simple = 0;
1224 /* ??? Handa: this must do simple = 0
1225 if c is a composite character. */
aff2ce94 1226 }
facdc750
RS
1227
1228 /* Store this character into the translated pattern. */
1229 bcopy (str, pat, charlen);
1230 pat += charlen;
1231 base_pat += in_charlen;
1232 len_byte -= in_charlen;
1233 }
1234 }
1235 else
1236 {
1237 while (--len >= 0)
1238 {
aff2ce94 1239 int c, translated, inverse;
facdc750
RS
1240
1241 /* If we got here and the RE flag is set, it's because we're
1242 dealing with a regexp known to be trivial, so the backslash
1243 just quotes the next character. */
1244 if (RE && *base_pat == '\\')
1245 {
1246 len--;
1247 base_pat++;
1248 }
1249 c = *base_pat++;
aff2ce94
RS
1250 TRANSLATE (translated, trt, c);
1251 TRANSLATE (inverse, inverse_trt, c);
facdc750
RS
1252
1253 /* Did this char actually get translated?
1254 Would any other char get translated into it? */
aff2ce94 1255 if (translated != c || inverse != c)
facdc750
RS
1256 {
1257 /* Keep track of which character set row
1258 contains the characters that need translation. */
1259 int charset_base_code = c & ~0xff;
1260 if (charset_base == -1)
1261 charset_base = charset_base_code;
1262 else if (charset_base != charset_base_code)
1263 /* If two different rows appear, needing translation,
1264 then we cannot use boyer_moore search. */
1265 simple = 0;
aff2ce94 1266 }
facdc750
RS
1267 *pat++ = translated;
1268 }
1269 }
1270
1271 len_byte = pat - patbuf;
1272 len = raw_pattern_size;
1273 pat = base_pat = patbuf;
1274
1275 if (simple)
1276 return boyer_moore (n, pat, len, len_byte, trt, inverse_trt,
aff2ce94
RS
1277 pos, pos_byte, lim, lim_byte,
1278 charset_base);
facdc750
RS
1279 else
1280 return simple_search (n, pat, len, len_byte, trt,
1281 pos, pos_byte, lim, lim_byte);
1282 }
1283}
1284\f
1285/* Do a simple string search N times for the string PAT,
1286 whose length is LEN/LEN_BYTE,
1287 from buffer position POS/POS_BYTE until LIM/LIM_BYTE.
1288 TRT is the translation table.
f8bd51c4 1289
facdc750
RS
1290 Return the character position where the match is found.
1291 Otherwise, if M matches remained to be found, return -M.
f8bd51c4 1292
facdc750
RS
1293 This kind of search works regardless of what is in PAT and
1294 regardless of what is in TRT. It is used in cases where
1295 boyer_moore cannot work. */
1296
1297static int
1298simple_search (n, pat, len, len_byte, trt, pos, pos_byte, lim, lim_byte)
1299 int n;
1300 unsigned char *pat;
1301 int len, len_byte;
1302 Lisp_Object trt;
1303 int pos, pos_byte;
1304 int lim, lim_byte;
1305{
1306 int multibyte = ! NILP (current_buffer->enable_multibyte_characters);
ab228c24 1307 int forward = n > 0;
facdc750
RS
1308
1309 if (lim > pos && multibyte)
1310 while (n > 0)
1311 {
1312 while (1)
f8bd51c4 1313 {
facdc750
RS
1314 /* Try matching at position POS. */
1315 int this_pos = pos;
1316 int this_pos_byte = pos_byte;
1317 int this_len = len;
1318 int this_len_byte = len_byte;
1319 unsigned char *p = pat;
1320 if (pos + len > lim)
1321 goto stop;
1322
1323 while (this_len > 0)
1324 {
1325 int charlen, buf_charlen;
ab228c24 1326 int pat_ch, buf_ch;
facdc750 1327
ab228c24 1328 pat_ch = STRING_CHAR_AND_LENGTH (p, this_len_byte, charlen);
facdc750
RS
1329 buf_ch = STRING_CHAR_AND_LENGTH (BYTE_POS_ADDR (this_pos_byte),
1330 ZV_BYTE - this_pos_byte,
1331 buf_charlen);
aff2ce94 1332 TRANSLATE (buf_ch, trt, buf_ch);
facdc750
RS
1333
1334 if (buf_ch != pat_ch)
1335 break;
ab228c24
RS
1336
1337 this_len_byte -= charlen;
1338 this_len--;
1339 p += charlen;
1340
1341 this_pos_byte += buf_charlen;
1342 this_pos++;
facdc750
RS
1343 }
1344
1345 if (this_len == 0)
1346 {
1347 pos += len;
1348 pos_byte += len_byte;
1349 break;
1350 }
1351
1352 INC_BOTH (pos, pos_byte);
f8bd51c4 1353 }
facdc750
RS
1354
1355 n--;
1356 }
1357 else if (lim > pos)
1358 while (n > 0)
1359 {
1360 while (1)
f8bd51c4 1361 {
facdc750
RS
1362 /* Try matching at position POS. */
1363 int this_pos = pos;
1364 int this_len = len;
1365 unsigned char *p = pat;
1366
1367 if (pos + len > lim)
1368 goto stop;
1369
1370 while (this_len > 0)
1371 {
1372 int pat_ch = *p++;
1373 int buf_ch = FETCH_BYTE (this_pos);
aff2ce94 1374 TRANSLATE (buf_ch, trt, buf_ch);
facdc750
RS
1375
1376 if (buf_ch != pat_ch)
1377 break;
ab228c24
RS
1378
1379 this_len--;
1380 this_pos++;
facdc750
RS
1381 }
1382
1383 if (this_len == 0)
1384 {
1385 pos += len;
1386 break;
1387 }
1388
1389 pos++;
f8bd51c4 1390 }
facdc750
RS
1391
1392 n--;
1393 }
1394 /* Backwards search. */
1395 else if (lim < pos && multibyte)
1396 while (n < 0)
1397 {
1398 while (1)
f8bd51c4 1399 {
facdc750
RS
1400 /* Try matching at position POS. */
1401 int this_pos = pos - len;
1402 int this_pos_byte = pos_byte - len_byte;
1403 int this_len = len;
1404 int this_len_byte = len_byte;
1405 unsigned char *p = pat;
1406
1407 if (pos - len < lim)
1408 goto stop;
1409
1410 while (this_len > 0)
1411 {
1412 int charlen, buf_charlen;
ab228c24 1413 int pat_ch, buf_ch;
facdc750 1414
ab228c24 1415 pat_ch = STRING_CHAR_AND_LENGTH (p, this_len_byte, charlen);
facdc750
RS
1416 buf_ch = STRING_CHAR_AND_LENGTH (BYTE_POS_ADDR (this_pos_byte),
1417 ZV_BYTE - this_pos_byte,
1418 buf_charlen);
aff2ce94 1419 TRANSLATE (buf_ch, trt, buf_ch);
facdc750
RS
1420
1421 if (buf_ch != pat_ch)
1422 break;
ab228c24
RS
1423
1424 this_len_byte -= charlen;
1425 this_len--;
1426 p += charlen;
1427 this_pos_byte += buf_charlen;
1428 this_pos++;
facdc750
RS
1429 }
1430
1431 if (this_len == 0)
1432 {
1433 pos -= len;
1434 pos_byte -= len_byte;
1435 break;
1436 }
1437
1438 DEC_BOTH (pos, pos_byte);
f8bd51c4
KH
1439 }
1440
facdc750
RS
1441 n++;
1442 }
1443 else if (lim < pos)
1444 while (n < 0)
1445 {
1446 while (1)
b6d6a51c 1447 {
facdc750
RS
1448 /* Try matching at position POS. */
1449 int this_pos = pos - len;
1450 int this_len = len;
1451 unsigned char *p = pat;
1452
1453 if (pos - len < lim)
1454 goto stop;
1455
1456 while (this_len > 0)
1457 {
1458 int pat_ch = *p++;
1459 int buf_ch = FETCH_BYTE (this_pos);
aff2ce94 1460 TRANSLATE (buf_ch, trt, buf_ch);
facdc750
RS
1461
1462 if (buf_ch != pat_ch)
1463 break;
ab228c24
RS
1464 this_len--;
1465 this_pos++;
facdc750
RS
1466 }
1467
1468 if (this_len == 0)
b6d6a51c 1469 {
facdc750
RS
1470 pos -= len;
1471 break;
b6d6a51c 1472 }
facdc750
RS
1473
1474 pos--;
b6d6a51c 1475 }
facdc750
RS
1476
1477 n++;
b6d6a51c 1478 }
facdc750
RS
1479
1480 stop:
1481 if (n == 0)
aff2ce94 1482 {
ab228c24
RS
1483 if (forward)
1484 set_search_regs ((multibyte ? pos_byte : pos) - len_byte, len_byte);
1485 else
1486 set_search_regs (multibyte ? pos_byte : pos, len_byte);
aff2ce94
RS
1487
1488 return pos;
1489 }
facdc750
RS
1490 else if (n > 0)
1491 return -n;
1492 else
1493 return n;
1494}
1495\f
1496/* Do Boyer-Moore search N times for the string PAT,
1497 whose length is LEN/LEN_BYTE,
1498 from buffer position POS/POS_BYTE until LIM/LIM_BYTE.
1499 DIRECTION says which direction we search in.
1500 TRT and INVERSE_TRT are translation tables.
1501
1502 This kind of search works if all the characters in PAT that have
1503 nontrivial translation are the same aside from the last byte. This
1504 makes it possible to translate just the last byte of a character,
1505 and do so after just a simple test of the context.
1506
1507 If that criterion is not satisfied, do not call this function. */
1508
1509static int
1510boyer_moore (n, base_pat, len, len_byte, trt, inverse_trt,
aff2ce94 1511 pos, pos_byte, lim, lim_byte, charset_base)
facdc750
RS
1512 int n;
1513 unsigned char *base_pat;
1514 int len, len_byte;
1515 Lisp_Object trt;
1516 Lisp_Object inverse_trt;
1517 int pos, pos_byte;
1518 int lim, lim_byte;
aff2ce94 1519 int charset_base;
facdc750
RS
1520{
1521 int direction = ((n > 0) ? 1 : -1);
1522 register int dirlen;
1523 int infinity, limit, k, stride_for_teases;
1524 register int *BM_tab;
1525 int *BM_tab_base;
1526 register unsigned char *cursor, *p_limit;
1527 register int i, j;
1528 unsigned char *pat;
1529 int multibyte = ! NILP (current_buffer->enable_multibyte_characters);
1530
1531 unsigned char simple_translate[0400];
1532 int translate_prev_byte;
1533 int translate_anteprev_byte;
1534
1535#ifdef C_ALLOCA
1536 int BM_tab_space[0400];
1537 BM_tab = &BM_tab_space[0];
1538#else
1539 BM_tab = (int *) alloca (0400 * sizeof (int));
1540#endif
1541 /* The general approach is that we are going to maintain that we know */
1542 /* the first (closest to the present position, in whatever direction */
1543 /* we're searching) character that could possibly be the last */
1544 /* (furthest from present position) character of a valid match. We */
1545 /* advance the state of our knowledge by looking at that character */
1546 /* and seeing whether it indeed matches the last character of the */
1547 /* pattern. If it does, we take a closer look. If it does not, we */
1548 /* move our pointer (to putative last characters) as far as is */
1549 /* logically possible. This amount of movement, which I call a */
1550 /* stride, will be the length of the pattern if the actual character */
1551 /* appears nowhere in the pattern, otherwise it will be the distance */
1552 /* from the last occurrence of that character to the end of the */
1553 /* pattern. */
1554 /* As a coding trick, an enormous stride is coded into the table for */
1555 /* characters that match the last character. This allows use of only */
1556 /* a single test, a test for having gone past the end of the */
1557 /* permissible match region, to test for both possible matches (when */
1558 /* the stride goes past the end immediately) and failure to */
1559 /* match (where you get nudged past the end one stride at a time). */
1560
1561 /* Here we make a "mickey mouse" BM table. The stride of the search */
1562 /* is determined only by the last character of the putative match. */
1563 /* If that character does not match, we will stride the proper */
1564 /* distance to propose a match that superimposes it on the last */
1565 /* instance of a character that matches it (per trt), or misses */
1566 /* it entirely if there is none. */
1567
1568 dirlen = len_byte * direction;
1569 infinity = dirlen - (lim_byte + pos_byte + len_byte + len_byte) * direction;
1570 if (direction < 0)
1571 pat = (base_pat += len_byte - 1);
1572 else
1573 pat = base_pat;
1574 BM_tab_base = BM_tab;
1575 BM_tab += 0400;
1576 j = dirlen; /* to get it in a register */
1577 /* A character that does not appear in the pattern induces a */
1578 /* stride equal to the pattern length. */
1579 while (BM_tab_base != BM_tab)
1580 {
1581 *--BM_tab = j;
1582 *--BM_tab = j;
1583 *--BM_tab = j;
1584 *--BM_tab = j;
1585 }
1586
1587 /* We use this for translation, instead of TRT itself.
1588 We fill this in to handle the characters that actually
1589 occur in the pattern. Others don't matter anyway! */
1590 bzero (simple_translate, sizeof simple_translate);
1591 for (i = 0; i < 0400; i++)
1592 simple_translate[i] = i;
1593
1594 i = 0;
1595 while (i != infinity)
1596 {
1597 unsigned char *ptr = pat + i;
1598 i += direction;
1599 if (i == dirlen)
1600 i = infinity;
1601 if (! NILP (trt))
ca1d1d23 1602 {
facdc750 1603 int ch;
aff2ce94 1604 int untranslated;
facdc750
RS
1605 int this_translated = 1;
1606
1607 if (multibyte
1608 && (ptr + 1 == pat + len_byte || CHAR_HEAD_P (ptr[1])))
ca1d1d23 1609 {
facdc750
RS
1610 unsigned char *charstart = ptr;
1611 while (! CHAR_HEAD_P (*charstart))
1612 charstart--;
aff2ce94 1613 untranslated = STRING_CHAR (charstart, ptr - charstart + 1);
ab228c24 1614 if (charset_base == (untranslated & ~0xff))
facdc750 1615 {
ab228c24 1616 TRANSLATE (ch, trt, untranslated);
aff2ce94
RS
1617 if (! CHAR_HEAD_P (*ptr))
1618 {
1619 translate_prev_byte = ptr[-1];
1620 if (! CHAR_HEAD_P (translate_prev_byte))
1621 translate_anteprev_byte = ptr[-2];
1622 }
facdc750 1623 }
aff2ce94 1624 else
ab228c24
RS
1625 {
1626 this_translated = 0;
1627 ch = *ptr;
1628 }
ca1d1d23 1629 }
facdc750 1630 else if (!multibyte)
aff2ce94 1631 TRANSLATE (ch, trt, *ptr);
ca1d1d23
JB
1632 else
1633 {
facdc750
RS
1634 ch = *ptr;
1635 this_translated = 0;
ca1d1d23 1636 }
facdc750 1637
ab228c24
RS
1638 if (ch > 0400)
1639 j = ((unsigned char) ch) | 0200;
1640 else
1641 j = (unsigned char) ch;
1642
facdc750
RS
1643 if (i == infinity)
1644 stride_for_teases = BM_tab[j];
ab228c24 1645
facdc750
RS
1646 BM_tab[j] = dirlen - i;
1647 /* A translation table is accompanied by its inverse -- see */
1648 /* comment following downcase_table for details */
1649 if (this_translated)
ab228c24
RS
1650 {
1651 int starting_ch = ch;
1652 int starting_j = j;
1653 while (1)
1654 {
1655 TRANSLATE (ch, inverse_trt, ch);
1656 if (ch > 0400)
1657 j = ((unsigned char) ch) | 0200;
1658 else
1659 j = (unsigned char) ch;
1660
1661 /* For all the characters that map into CH,
1662 set up simple_translate to map the last byte
1663 into STARTING_J. */
1664 simple_translate[j] = starting_j;
1665 if (ch == starting_ch)
1666 break;
1667 BM_tab[j] = dirlen - i;
1668 }
1669 }
facdc750
RS
1670 }
1671 else
1672 {
1673 j = *ptr;
1674
1675 if (i == infinity)
1676 stride_for_teases = BM_tab[j];
1677 BM_tab[j] = dirlen - i;
ca1d1d23 1678 }
facdc750
RS
1679 /* stride_for_teases tells how much to stride if we get a */
1680 /* match on the far character but are subsequently */
1681 /* disappointed, by recording what the stride would have been */
1682 /* for that character if the last character had been */
1683 /* different. */
1684 }
1685 infinity = dirlen - infinity;
1686 pos_byte += dirlen - ((direction > 0) ? direction : 0);
1687 /* loop invariant - POS_BYTE points at where last char (first
1688 char if reverse) of pattern would align in a possible match. */
1689 while (n != 0)
1690 {
1691 int tail_end;
1692 unsigned char *tail_end_ptr;
1693
1694 /* It's been reported that some (broken) compiler thinks that
1695 Boolean expressions in an arithmetic context are unsigned.
1696 Using an explicit ?1:0 prevents this. */
1697 if ((lim_byte - pos_byte - ((direction > 0) ? 1 : 0)) * direction
1698 < 0)
1699 return (n * (0 - direction));
1700 /* First we do the part we can by pointers (maybe nothing) */
1701 QUIT;
1702 pat = base_pat;
1703 limit = pos_byte - dirlen + direction;
1704 limit = ((direction > 0)
1705 ? BUFFER_CEILING_OF (limit)
1706 : BUFFER_FLOOR_OF (limit));
1707 /* LIMIT is now the last (not beyond-last!) value POS_BYTE
1708 can take on without hitting edge of buffer or the gap. */
1709 limit = ((direction > 0)
1710 ? min (lim_byte - 1, min (limit, pos_byte + 20000))
1711 : max (lim_byte, max (limit, pos_byte - 20000)));
1712 tail_end = BUFFER_CEILING_OF (pos_byte) + 1;
1713 tail_end_ptr = BYTE_POS_ADDR (tail_end);
1714
1715 if ((limit - pos_byte) * direction > 20)
ca1d1d23 1716 {
facdc750
RS
1717 unsigned char *p2;
1718
1719 p_limit = BYTE_POS_ADDR (limit);
1720 p2 = (cursor = BYTE_POS_ADDR (pos_byte));
1721 /* In this loop, pos + cursor - p2 is the surrogate for pos */
1722 while (1) /* use one cursor setting as long as i can */
ca1d1d23 1723 {
facdc750 1724 if (direction > 0) /* worth duplicating */
ca1d1d23 1725 {
facdc750
RS
1726 /* Use signed comparison if appropriate
1727 to make cursor+infinity sure to be > p_limit.
1728 Assuming that the buffer lies in a range of addresses
1729 that are all "positive" (as ints) or all "negative",
1730 either kind of comparison will work as long
1731 as we don't step by infinity. So pick the kind
1732 that works when we do step by infinity. */
1733 if ((EMACS_INT) (p_limit + infinity) > (EMACS_INT) p_limit)
1734 while ((EMACS_INT) cursor <= (EMACS_INT) p_limit)
1735 cursor += BM_tab[*cursor];
ca1d1d23 1736 else
facdc750
RS
1737 while ((EMACS_UINT) cursor <= (EMACS_UINT) p_limit)
1738 cursor += BM_tab[*cursor];
1739 }
1740 else
1741 {
1742 if ((EMACS_INT) (p_limit + infinity) < (EMACS_INT) p_limit)
1743 while ((EMACS_INT) cursor >= (EMACS_INT) p_limit)
1744 cursor += BM_tab[*cursor];
1745 else
1746 while ((EMACS_UINT) cursor >= (EMACS_UINT) p_limit)
1747 cursor += BM_tab[*cursor];
1748 }
ca1d1d23 1749/* If you are here, cursor is beyond the end of the searched region. */
facdc750
RS
1750/* This can happen if you match on the far character of the pattern, */
1751/* because the "stride" of that character is infinity, a number able */
1752/* to throw you well beyond the end of the search. It can also */
1753/* happen if you fail to match within the permitted region and would */
1754/* otherwise try a character beyond that region */
1755 if ((cursor - p_limit) * direction <= len_byte)
1756 break; /* a small overrun is genuine */
1757 cursor -= infinity; /* large overrun = hit */
1758 i = dirlen - direction;
1759 if (! NILP (trt))
1760 {
1761 while ((i -= direction) + direction != 0)
ca1d1d23 1762 {
facdc750
RS
1763 int ch;
1764 cursor -= direction;
1765 /* Translate only the last byte of a character. */
1766 if (! multibyte
1767 || ((cursor == tail_end_ptr
1768 || CHAR_HEAD_P (cursor[1]))
1769 && (CHAR_HEAD_P (cursor[0])
1770 || (translate_prev_byte == cursor[-1]
1771 && (CHAR_HEAD_P (translate_prev_byte)
1772 || translate_anteprev_byte == cursor[-2])))))
1773 ch = simple_translate[*cursor];
1774 else
1775 ch = *cursor;
1776 if (pat[i] != ch)
1777 break;
ca1d1d23 1778 }
facdc750
RS
1779 }
1780 else
1781 {
1782 while ((i -= direction) + direction != 0)
ca1d1d23 1783 {
facdc750
RS
1784 cursor -= direction;
1785 if (pat[i] != *cursor)
1786 break;
ca1d1d23 1787 }
facdc750
RS
1788 }
1789 cursor += dirlen - i - direction; /* fix cursor */
1790 if (i + direction == 0)
1791 {
1792 int position;
0c8533c6 1793
facdc750 1794 cursor -= direction;
1113d9db 1795
facdc750
RS
1796 position = pos_byte + cursor - p2 + ((direction > 0)
1797 ? 1 - len_byte : 0);
1798 set_search_regs (position, len_byte);
ca325161 1799
facdc750
RS
1800 if ((n -= direction) != 0)
1801 cursor += dirlen; /* to resume search */
ca1d1d23 1802 else
facdc750
RS
1803 return ((direction > 0)
1804 ? search_regs.end[0] : search_regs.start[0]);
ca1d1d23 1805 }
facdc750
RS
1806 else
1807 cursor += stride_for_teases; /* <sigh> we lose - */
ca1d1d23 1808 }
facdc750
RS
1809 pos_byte += cursor - p2;
1810 }
1811 else
1812 /* Now we'll pick up a clump that has to be done the hard */
1813 /* way because it covers a discontinuity */
1814 {
1815 limit = ((direction > 0)
1816 ? BUFFER_CEILING_OF (pos_byte - dirlen + 1)
1817 : BUFFER_FLOOR_OF (pos_byte - dirlen - 1));
1818 limit = ((direction > 0)
1819 ? min (limit + len_byte, lim_byte - 1)
1820 : max (limit - len_byte, lim_byte));
1821 /* LIMIT is now the last value POS_BYTE can have
1822 and still be valid for a possible match. */
1823 while (1)
ca1d1d23 1824 {
facdc750
RS
1825 /* This loop can be coded for space rather than */
1826 /* speed because it will usually run only once. */
1827 /* (the reach is at most len + 21, and typically */
1828 /* does not exceed len) */
1829 while ((limit - pos_byte) * direction >= 0)
1830 pos_byte += BM_tab[FETCH_BYTE (pos_byte)];
1831 /* now run the same tests to distinguish going off the */
1832 /* end, a match or a phony match. */
1833 if ((pos_byte - limit) * direction <= len_byte)
1834 break; /* ran off the end */
1835 /* Found what might be a match.
1836 Set POS_BYTE back to last (first if reverse) pos. */
1837 pos_byte -= infinity;
1838 i = dirlen - direction;
1839 while ((i -= direction) + direction != 0)
ca1d1d23 1840 {
facdc750
RS
1841 int ch;
1842 unsigned char *ptr;
1843 pos_byte -= direction;
1844 ptr = BYTE_POS_ADDR (pos_byte);
1845 /* Translate only the last byte of a character. */
1846 if (! multibyte
1847 || ((ptr == tail_end_ptr
1848 || CHAR_HEAD_P (ptr[1]))
1849 && (CHAR_HEAD_P (ptr[0])
1850 || (translate_prev_byte == ptr[-1]
1851 && (CHAR_HEAD_P (translate_prev_byte)
1852 || translate_anteprev_byte == ptr[-2])))))
1853 ch = simple_translate[*ptr];
1854 else
1855 ch = *ptr;
1856 if (pat[i] != ch)
1857 break;
1858 }
1859 /* Above loop has moved POS_BYTE part or all the way
1860 back to the first pos (last pos if reverse).
1861 Set it once again at the last (first if reverse) char. */
1862 pos_byte += dirlen - i- direction;
1863 if (i + direction == 0)
1864 {
1865 int position;
1866 pos_byte -= direction;
1113d9db 1867
facdc750 1868 position = pos_byte + ((direction > 0) ? 1 - len_byte : 0);
0c8533c6 1869
facdc750 1870 set_search_regs (position, len_byte);
ca325161 1871
facdc750
RS
1872 if ((n -= direction) != 0)
1873 pos_byte += dirlen; /* to resume search */
ca1d1d23 1874 else
facdc750
RS
1875 return ((direction > 0)
1876 ? search_regs.end[0] : search_regs.start[0]);
ca1d1d23 1877 }
facdc750
RS
1878 else
1879 pos_byte += stride_for_teases;
1880 }
1881 }
1882 /* We have done one clump. Can we continue? */
1883 if ((lim_byte - pos_byte) * direction < 0)
1884 return ((0 - n) * direction);
ca1d1d23 1885 }
facdc750 1886 return BYTE_TO_CHAR (pos_byte);
ca1d1d23 1887}
ca325161 1888
fa8ed3e0 1889/* Record beginning BEG_BYTE and end BEG_BYTE + NBYTES
ca325161
RS
1890 for a match just found in the current buffer. */
1891
1892static void
fa8ed3e0
RS
1893set_search_regs (beg_byte, nbytes)
1894 int beg_byte, nbytes;
ca325161
RS
1895{
1896 /* Make sure we have registers in which to store
1897 the match position. */
1898 if (search_regs.num_regs == 0)
1899 {
2d4a771a
RS
1900 search_regs.start = (regoff_t *) xmalloc (2 * sizeof (regoff_t));
1901 search_regs.end = (regoff_t *) xmalloc (2 * sizeof (regoff_t));
487282dc 1902 search_regs.num_regs = 2;
ca325161
RS
1903 }
1904
fa8ed3e0
RS
1905 search_regs.start[0] = BYTE_TO_CHAR (beg_byte);
1906 search_regs.end[0] = BYTE_TO_CHAR (beg_byte + nbytes);
a3668d92 1907 XSETBUFFER (last_thing_searched, current_buffer);
ca325161 1908}
ca1d1d23
JB
1909\f
1910/* Given a string of words separated by word delimiters,
1911 compute a regexp that matches those exact words
1912 separated by arbitrary punctuation. */
1913
1914static Lisp_Object
1915wordify (string)
1916 Lisp_Object string;
1917{
1918 register unsigned char *p, *o;
0c8533c6 1919 register int i, i_byte, len, punct_count = 0, word_count = 0;
ca1d1d23 1920 Lisp_Object val;
0c8533c6
RS
1921 int prev_c = 0;
1922 int adjust;
ca1d1d23
JB
1923
1924 CHECK_STRING (string, 0);
1925 p = XSTRING (string)->data;
1926 len = XSTRING (string)->size;
1927
0c8533c6
RS
1928 for (i = 0, i_byte = 0; i < len; )
1929 {
1930 int c;
1931
1932 if (STRING_MULTIBYTE (string))
1933 FETCH_STRING_CHAR_ADVANCE (c, string, i, i_byte);
1934 else
1935 c = XSTRING (string)->data[i++];
1936
1937 if (SYNTAX (c) != Sword)
1938 {
1939 punct_count++;
1940 if (i > 0 && SYNTAX (prev_c) == Sword)
1941 word_count++;
1942 }
ca1d1d23 1943
0c8533c6
RS
1944 prev_c = c;
1945 }
1946
1947 if (SYNTAX (prev_c) == Sword)
1948 word_count++;
1949 if (!word_count)
1950 return build_string ("");
1951
1952 adjust = - punct_count + 5 * (word_count - 1) + 4;
1953 val = make_uninit_multibyte_string (len + adjust,
1954 XSTRING (string)->size_byte + adjust);
ca1d1d23
JB
1955
1956 o = XSTRING (val)->data;
1957 *o++ = '\\';
1958 *o++ = 'b';
1959
0c8533c6 1960 for (i = 0; i < XSTRING (val)->size_byte; i++)
ca1d1d23
JB
1961 if (SYNTAX (p[i]) == Sword)
1962 *o++ = p[i];
1963 else if (i > 0 && SYNTAX (p[i-1]) == Sword && --word_count)
1964 {
1965 *o++ = '\\';
1966 *o++ = 'W';
1967 *o++ = '\\';
1968 *o++ = 'W';
1969 *o++ = '*';
1970 }
1971
1972 *o++ = '\\';
1973 *o++ = 'b';
1974
1975 return val;
1976}
1977\f
1978DEFUN ("search-backward", Fsearch_backward, Ssearch_backward, 1, 4,
6af43974 1979 "MSearch backward: ",
ca1d1d23
JB
1980 "Search backward from point for STRING.\n\
1981Set point to the beginning of the occurrence found, and return point.\n\
1982An optional second argument bounds the search; it is a buffer position.\n\
1983The match found must not extend before that position.\n\
1984Optional third argument, if t, means if fail just return nil (no error).\n\
1985 If not nil and not t, position at limit of search and return nil.\n\
1986Optional fourth argument is repeat count--search for successive occurrences.\n\
1987See also the functions `match-beginning', `match-end' and `replace-match'.")
1988 (string, bound, noerror, count)
1989 Lisp_Object string, bound, noerror, count;
1990{
b819a390 1991 return search_command (string, bound, noerror, count, -1, 0, 0);
ca1d1d23
JB
1992}
1993
6af43974 1994DEFUN ("search-forward", Fsearch_forward, Ssearch_forward, 1, 4, "MSearch: ",
ca1d1d23
JB
1995 "Search forward from point for STRING.\n\
1996Set point to the end of the occurrence found, and return point.\n\
1997An optional second argument bounds the search; it is a buffer position.\n\
1998The match found must not extend after that position. nil is equivalent\n\
1999 to (point-max).\n\
2000Optional third argument, if t, means if fail just return nil (no error).\n\
2001 If not nil and not t, move to limit of search and return nil.\n\
2002Optional fourth argument is repeat count--search for successive occurrences.\n\
2003See also the functions `match-beginning', `match-end' and `replace-match'.")
2004 (string, bound, noerror, count)
2005 Lisp_Object string, bound, noerror, count;
2006{
b819a390 2007 return search_command (string, bound, noerror, count, 1, 0, 0);
ca1d1d23
JB
2008}
2009
2010DEFUN ("word-search-backward", Fword_search_backward, Sword_search_backward, 1, 4,
2011 "sWord search backward: ",
2012 "Search backward from point for STRING, ignoring differences in punctuation.\n\
2013Set point to the beginning of the occurrence found, and return point.\n\
2014An optional second argument bounds the search; it is a buffer position.\n\
2015The match found must not extend before that position.\n\
2016Optional third argument, if t, means if fail just return nil (no error).\n\
2017 If not nil and not t, move to limit of search and return nil.\n\
2018Optional fourth argument is repeat count--search for successive occurrences.")
2019 (string, bound, noerror, count)
2020 Lisp_Object string, bound, noerror, count;
2021{
b819a390 2022 return search_command (wordify (string), bound, noerror, count, -1, 1, 0);
ca1d1d23
JB
2023}
2024
2025DEFUN ("word-search-forward", Fword_search_forward, Sword_search_forward, 1, 4,
2026 "sWord search: ",
2027 "Search forward from point for STRING, ignoring differences in punctuation.\n\
2028Set point to the end of the occurrence found, and return point.\n\
2029An optional second argument bounds the search; it is a buffer position.\n\
2030The match found must not extend after that position.\n\
2031Optional third argument, if t, means if fail just return nil (no error).\n\
2032 If not nil and not t, move to limit of search and return nil.\n\
2033Optional fourth argument is repeat count--search for successive occurrences.")
2034 (string, bound, noerror, count)
2035 Lisp_Object string, bound, noerror, count;
2036{
b819a390 2037 return search_command (wordify (string), bound, noerror, count, 1, 1, 0);
ca1d1d23
JB
2038}
2039
2040DEFUN ("re-search-backward", Fre_search_backward, Sre_search_backward, 1, 4,
2041 "sRE search backward: ",
2042 "Search backward from point for match for regular expression REGEXP.\n\
2043Set point to the beginning of the match, and return point.\n\
2044The match found is the one starting last in the buffer\n\
19c0a730 2045and yet ending before the origin of the search.\n\
ca1d1d23
JB
2046An optional second argument bounds the search; it is a buffer position.\n\
2047The match found must start at or after that position.\n\
2048Optional third argument, if t, means if fail just return nil (no error).\n\
2049 If not nil and not t, move to limit of search and return nil.\n\
2050Optional fourth argument is repeat count--search for successive occurrences.\n\
2051See also the functions `match-beginning', `match-end' and `replace-match'.")
19c0a730
KH
2052 (regexp, bound, noerror, count)
2053 Lisp_Object regexp, bound, noerror, count;
ca1d1d23 2054{
b819a390 2055 return search_command (regexp, bound, noerror, count, -1, 1, 0);
ca1d1d23
JB
2056}
2057
2058DEFUN ("re-search-forward", Fre_search_forward, Sre_search_forward, 1, 4,
2059 "sRE search: ",
2060 "Search forward from point for regular expression REGEXP.\n\
2061Set point to the end of the occurrence found, and return point.\n\
2062An optional second argument bounds the search; it is a buffer position.\n\
2063The match found must not extend after that position.\n\
2064Optional third argument, if t, means if fail just return nil (no error).\n\
2065 If not nil and not t, move to limit of search and return nil.\n\
2066Optional fourth argument is repeat count--search for successive occurrences.\n\
2067See also the functions `match-beginning', `match-end' and `replace-match'.")
19c0a730
KH
2068 (regexp, bound, noerror, count)
2069 Lisp_Object regexp, bound, noerror, count;
ca1d1d23 2070{
b819a390
RS
2071 return search_command (regexp, bound, noerror, count, 1, 1, 0);
2072}
2073
2074DEFUN ("posix-search-backward", Fposix_search_backward, Sposix_search_backward, 1, 4,
2075 "sPosix search backward: ",
2076 "Search backward from point for match for regular expression REGEXP.\n\
2077Find the longest match in accord with Posix regular expression rules.\n\
2078Set point to the beginning of the match, and return point.\n\
2079The match found is the one starting last in the buffer\n\
2080and yet ending before the origin of the search.\n\
2081An optional second argument bounds the search; it is a buffer position.\n\
2082The match found must start at or after that position.\n\
2083Optional third argument, if t, means if fail just return nil (no error).\n\
2084 If not nil and not t, move to limit of search and return nil.\n\
2085Optional fourth argument is repeat count--search for successive occurrences.\n\
2086See also the functions `match-beginning', `match-end' and `replace-match'.")
2087 (regexp, bound, noerror, count)
2088 Lisp_Object regexp, bound, noerror, count;
2089{
2090 return search_command (regexp, bound, noerror, count, -1, 1, 1);
2091}
2092
2093DEFUN ("posix-search-forward", Fposix_search_forward, Sposix_search_forward, 1, 4,
2094 "sPosix search: ",
2095 "Search forward from point for regular expression REGEXP.\n\
2096Find the longest match in accord with Posix regular expression rules.\n\
2097Set point to the end of the occurrence found, and return point.\n\
2098An optional second argument bounds the search; it is a buffer position.\n\
2099The match found must not extend after that position.\n\
2100Optional third argument, if t, means if fail just return nil (no error).\n\
2101 If not nil and not t, move to limit of search and return nil.\n\
2102Optional fourth argument is repeat count--search for successive occurrences.\n\
2103See also the functions `match-beginning', `match-end' and `replace-match'.")
2104 (regexp, bound, noerror, count)
2105 Lisp_Object regexp, bound, noerror, count;
2106{
2107 return search_command (regexp, bound, noerror, count, 1, 1, 1);
ca1d1d23
JB
2108}
2109\f
d7a5ad5f 2110DEFUN ("replace-match", Freplace_match, Sreplace_match, 1, 5, 0,
ca1d1d23
JB
2111 "Replace text matched by last search with NEWTEXT.\n\
2112If second arg FIXEDCASE is non-nil, do not alter case of replacement text.\n\
5b9cf4b2
RS
2113Otherwise maybe capitalize the whole text, or maybe just word initials,\n\
2114based on the replaced text.\n\
2115If the replaced text has only capital letters\n\
2116and has at least one multiletter word, convert NEWTEXT to all caps.\n\
2117If the replaced text has at least one word starting with a capital letter,\n\
2118then capitalize each word in NEWTEXT.\n\n\
ca1d1d23
JB
2119If third arg LITERAL is non-nil, insert NEWTEXT literally.\n\
2120Otherwise treat `\\' as special:\n\
2121 `\\&' in NEWTEXT means substitute original matched text.\n\
2122 `\\N' means substitute what matched the Nth `\\(...\\)'.\n\
2123 If Nth parens didn't match, substitute nothing.\n\
2124 `\\\\' means insert one `\\'.\n\
1113d9db 2125FIXEDCASE and LITERAL are optional arguments.\n\
080c45fd
RS
2126Leaves point at end of replacement text.\n\
2127\n\
2128The optional fourth argument STRING can be a string to modify.\n\
2129In that case, this function creates and returns a new string\n\
d7a5ad5f
RS
2130which is made by replacing the part of STRING that was matched.\n\
2131\n\
2132The optional fifth argument SUBEXP specifies a subexpression of the match.\n\
2133It says to replace just that subexpression instead of the whole match.\n\
2134This is useful only after a regular expression search or match\n\
2135since only regular expressions have distinguished subexpressions.")
2136 (newtext, fixedcase, literal, string, subexp)
2137 Lisp_Object newtext, fixedcase, literal, string, subexp;
ca1d1d23
JB
2138{
2139 enum { nochange, all_caps, cap_initial } case_action;
2140 register int pos, last;
2141 int some_multiletter_word;
97832bd0 2142 int some_lowercase;
73dc8771 2143 int some_uppercase;
208767c3 2144 int some_nonuppercase_initial;
ca1d1d23
JB
2145 register int c, prevc;
2146 int inslen;
d7a5ad5f 2147 int sub;
3e18eecf 2148 int opoint, newpoint;
ca1d1d23 2149
16fdc568 2150 CHECK_STRING (newtext, 0);
ca1d1d23 2151
080c45fd
RS
2152 if (! NILP (string))
2153 CHECK_STRING (string, 4);
2154
ca1d1d23
JB
2155 case_action = nochange; /* We tried an initialization */
2156 /* but some C compilers blew it */
4746118a
JB
2157
2158 if (search_regs.num_regs <= 0)
2159 error ("replace-match called before any match found");
2160
d7a5ad5f
RS
2161 if (NILP (subexp))
2162 sub = 0;
2163 else
2164 {
2165 CHECK_NUMBER (subexp, 3);
2166 sub = XINT (subexp);
2167 if (sub < 0 || sub >= search_regs.num_regs)
2168 args_out_of_range (subexp, make_number (search_regs.num_regs));
2169 }
2170
080c45fd
RS
2171 if (NILP (string))
2172 {
d7a5ad5f
RS
2173 if (search_regs.start[sub] < BEGV
2174 || search_regs.start[sub] > search_regs.end[sub]
2175 || search_regs.end[sub] > ZV)
2176 args_out_of_range (make_number (search_regs.start[sub]),
2177 make_number (search_regs.end[sub]));
080c45fd
RS
2178 }
2179 else
2180 {
d7a5ad5f
RS
2181 if (search_regs.start[sub] < 0
2182 || search_regs.start[sub] > search_regs.end[sub]
2183 || search_regs.end[sub] > XSTRING (string)->size)
2184 args_out_of_range (make_number (search_regs.start[sub]),
2185 make_number (search_regs.end[sub]));
080c45fd 2186 }
ca1d1d23
JB
2187
2188 if (NILP (fixedcase))
2189 {
fa8ed3e0 2190 int beg;
ca1d1d23
JB
2191 /* Decide how to casify by examining the matched text. */
2192
fa8ed3e0
RS
2193 if (NILP (string))
2194 last = CHAR_TO_BYTE (search_regs.end[sub]);
2195 else
2196 last = search_regs.end[sub];
2197
2198 if (NILP (string))
2199 beg = CHAR_TO_BYTE (search_regs.start[sub]);
2200 else
2201 beg = search_regs.start[sub];
2202
ca1d1d23
JB
2203 prevc = '\n';
2204 case_action = all_caps;
2205
2206 /* some_multiletter_word is set nonzero if any original word
2207 is more than one letter long. */
2208 some_multiletter_word = 0;
97832bd0 2209 some_lowercase = 0;
208767c3 2210 some_nonuppercase_initial = 0;
73dc8771 2211 some_uppercase = 0;
ca1d1d23 2212
fa8ed3e0 2213 for (pos = beg; pos < last; pos++)
ca1d1d23 2214 {
080c45fd 2215 if (NILP (string))
5679531d 2216 c = FETCH_BYTE (pos);
080c45fd
RS
2217 else
2218 c = XSTRING (string)->data[pos];
2219
ca1d1d23
JB
2220 if (LOWERCASEP (c))
2221 {
2222 /* Cannot be all caps if any original char is lower case */
2223
97832bd0 2224 some_lowercase = 1;
ca1d1d23 2225 if (SYNTAX (prevc) != Sword)
208767c3 2226 some_nonuppercase_initial = 1;
ca1d1d23
JB
2227 else
2228 some_multiletter_word = 1;
2229 }
2230 else if (!NOCASEP (c))
2231 {
73dc8771 2232 some_uppercase = 1;
97832bd0 2233 if (SYNTAX (prevc) != Sword)
c4d460ce 2234 ;
97832bd0 2235 else
ca1d1d23
JB
2236 some_multiletter_word = 1;
2237 }
208767c3
RS
2238 else
2239 {
2240 /* If the initial is a caseless word constituent,
2241 treat that like a lowercase initial. */
2242 if (SYNTAX (prevc) != Sword)
2243 some_nonuppercase_initial = 1;
2244 }
ca1d1d23
JB
2245
2246 prevc = c;
2247 }
2248
97832bd0
RS
2249 /* Convert to all caps if the old text is all caps
2250 and has at least one multiletter word. */
2251 if (! some_lowercase && some_multiletter_word)
2252 case_action = all_caps;
c4d460ce 2253 /* Capitalize each word, if the old text has all capitalized words. */
208767c3 2254 else if (!some_nonuppercase_initial && some_multiletter_word)
ca1d1d23 2255 case_action = cap_initial;
208767c3 2256 else if (!some_nonuppercase_initial && some_uppercase)
73dc8771
KH
2257 /* Should x -> yz, operating on X, give Yz or YZ?
2258 We'll assume the latter. */
2259 case_action = all_caps;
97832bd0
RS
2260 else
2261 case_action = nochange;
ca1d1d23
JB
2262 }
2263
080c45fd
RS
2264 /* Do replacement in a string. */
2265 if (!NILP (string))
2266 {
2267 Lisp_Object before, after;
2268
2269 before = Fsubstring (string, make_number (0),
d7a5ad5f
RS
2270 make_number (search_regs.start[sub]));
2271 after = Fsubstring (string, make_number (search_regs.end[sub]), Qnil);
080c45fd 2272
636a5e28
RS
2273 /* Substitute parts of the match into NEWTEXT
2274 if desired. */
080c45fd
RS
2275 if (NILP (literal))
2276 {
2277 int lastpos = -1;
0c8533c6 2278 int lastpos_byte = -1;
080c45fd
RS
2279 /* We build up the substituted string in ACCUM. */
2280 Lisp_Object accum;
2281 Lisp_Object middle;
0c8533c6 2282 int pos_byte;
080c45fd
RS
2283
2284 accum = Qnil;
2285
0c8533c6 2286 for (pos_byte = 0, pos = 0; pos_byte < XSTRING (newtext)->size_byte;)
080c45fd
RS
2287 {
2288 int substart = -1;
2289 int subend;
1e79ec24 2290 int delbackslash = 0;
080c45fd 2291
0c8533c6
RS
2292 FETCH_STRING_CHAR_ADVANCE (c, newtext, pos, pos_byte);
2293
080c45fd
RS
2294 if (c == '\\')
2295 {
0c8533c6 2296 FETCH_STRING_CHAR_ADVANCE (c, newtext, pos, pos_byte);
080c45fd
RS
2297 if (c == '&')
2298 {
d7a5ad5f
RS
2299 substart = search_regs.start[sub];
2300 subend = search_regs.end[sub];
080c45fd
RS
2301 }
2302 else if (c >= '1' && c <= '9' && c <= search_regs.num_regs + '0')
2303 {
ad10348f 2304 if (search_regs.start[c - '0'] >= 0)
080c45fd
RS
2305 {
2306 substart = search_regs.start[c - '0'];
2307 subend = search_regs.end[c - '0'];
2308 }
2309 }
1e79ec24
KH
2310 else if (c == '\\')
2311 delbackslash = 1;
636a5e28
RS
2312 else
2313 error ("Invalid use of `\\' in replacement text");
080c45fd
RS
2314 }
2315 if (substart >= 0)
2316 {
2317 if (pos - 1 != lastpos + 1)
0c8533c6
RS
2318 middle = substring_both (newtext, lastpos + 1,
2319 lastpos_byte + 1,
2320 pos - 1, pos_byte - 1);
080c45fd
RS
2321 else
2322 middle = Qnil;
2323 accum = concat3 (accum, middle,
0c8533c6
RS
2324 Fsubstring (string,
2325 make_number (substart),
080c45fd
RS
2326 make_number (subend)));
2327 lastpos = pos;
0c8533c6 2328 lastpos_byte = pos_byte;
080c45fd 2329 }
1e79ec24
KH
2330 else if (delbackslash)
2331 {
0c8533c6
RS
2332 middle = substring_both (newtext, lastpos + 1,
2333 lastpos_byte + 1,
2334 pos, pos_byte);
2335
1e79ec24
KH
2336 accum = concat2 (accum, middle);
2337 lastpos = pos;
0c8533c6 2338 lastpos_byte = pos_byte;
1e79ec24 2339 }
080c45fd
RS
2340 }
2341
2342 if (pos != lastpos + 1)
0c8533c6
RS
2343 middle = substring_both (newtext, lastpos + 1,
2344 lastpos_byte + 1,
2345 pos, pos_byte);
080c45fd
RS
2346 else
2347 middle = Qnil;
2348
2349 newtext = concat2 (accum, middle);
2350 }
2351
636a5e28 2352 /* Do case substitution in NEWTEXT if desired. */
080c45fd
RS
2353 if (case_action == all_caps)
2354 newtext = Fupcase (newtext);
2355 else if (case_action == cap_initial)
2b2eead9 2356 newtext = Fupcase_initials (newtext);
080c45fd
RS
2357
2358 return concat3 (before, newtext, after);
2359 }
2360
b0eba991
RS
2361 /* Record point, the move (quietly) to the start of the match. */
2362 if (PT > search_regs.start[sub])
2363 opoint = PT - ZV;
2364 else
2365 opoint = PT;
2366
fa8ed3e0 2367 TEMP_SET_PT (search_regs.start[sub]);
b0eba991 2368
9a76659d
JB
2369 /* We insert the replacement text before the old text, and then
2370 delete the original text. This means that markers at the
2371 beginning or end of the original will float to the corresponding
2372 position in the replacement. */
ca1d1d23 2373 if (!NILP (literal))
16fdc568 2374 Finsert_and_inherit (1, &newtext);
ca1d1d23
JB
2375 else
2376 {
2377 struct gcpro gcpro1;
16fdc568 2378 GCPRO1 (newtext);
ca1d1d23 2379
16fdc568 2380 for (pos = 0; pos < XSTRING (newtext)->size; pos++)
ca1d1d23 2381 {
6ec8bbd2 2382 int offset = PT - search_regs.start[sub];
9a76659d 2383
16fdc568 2384 c = XSTRING (newtext)->data[pos];
ca1d1d23
JB
2385 if (c == '\\')
2386 {
16fdc568 2387 c = XSTRING (newtext)->data[++pos];
ca1d1d23 2388 if (c == '&')
9a76659d
JB
2389 Finsert_buffer_substring
2390 (Fcurrent_buffer (),
d7a5ad5f
RS
2391 make_number (search_regs.start[sub] + offset),
2392 make_number (search_regs.end[sub] + offset));
78445046 2393 else if (c >= '1' && c <= '9' && c <= search_regs.num_regs + '0')
ca1d1d23
JB
2394 {
2395 if (search_regs.start[c - '0'] >= 1)
9a76659d
JB
2396 Finsert_buffer_substring
2397 (Fcurrent_buffer (),
2398 make_number (search_regs.start[c - '0'] + offset),
2399 make_number (search_regs.end[c - '0'] + offset));
ca1d1d23 2400 }
636a5e28 2401 else if (c == '\\')
ca1d1d23 2402 insert_char (c);
636a5e28
RS
2403 else
2404 error ("Invalid use of `\\' in replacement text");
ca1d1d23
JB
2405 }
2406 else
2407 insert_char (c);
2408 }
2409 UNGCPRO;
2410 }
2411
6ec8bbd2 2412 inslen = PT - (search_regs.start[sub]);
d7a5ad5f 2413 del_range (search_regs.start[sub] + inslen, search_regs.end[sub] + inslen);
ca1d1d23
JB
2414
2415 if (case_action == all_caps)
6ec8bbd2 2416 Fupcase_region (make_number (PT - inslen), make_number (PT));
ca1d1d23 2417 else if (case_action == cap_initial)
6ec8bbd2 2418 Fupcase_initials_region (make_number (PT - inslen), make_number (PT));
b0eba991 2419
3e18eecf
RS
2420 newpoint = PT;
2421
b0eba991 2422 /* Put point back where it was in the text. */
8d808a65 2423 if (opoint <= 0)
fa8ed3e0 2424 TEMP_SET_PT (opoint + ZV);
b0eba991 2425 else
fa8ed3e0 2426 TEMP_SET_PT (opoint);
b0eba991
RS
2427
2428 /* Now move point "officially" to the start of the inserted replacement. */
3e18eecf 2429 move_if_not_intangible (newpoint);
b0eba991 2430
ca1d1d23
JB
2431 return Qnil;
2432}
2433\f
2434static Lisp_Object
2435match_limit (num, beginningp)
2436 Lisp_Object num;
2437 int beginningp;
2438{
2439 register int n;
2440
2441 CHECK_NUMBER (num, 0);
2442 n = XINT (num);
4746118a
JB
2443 if (n < 0 || n >= search_regs.num_regs)
2444 args_out_of_range (num, make_number (search_regs.num_regs));
2445 if (search_regs.num_regs <= 0
2446 || search_regs.start[n] < 0)
ca1d1d23
JB
2447 return Qnil;
2448 return (make_number ((beginningp) ? search_regs.start[n]
2449 : search_regs.end[n]));
2450}
2451
2452DEFUN ("match-beginning", Fmatch_beginning, Smatch_beginning, 1, 1, 0,
2453 "Return position of start of text matched by last search.\n\
5806161b
EN
2454SUBEXP, a number, specifies which parenthesized expression in the last\n\
2455 regexp.\n\
2456Value is nil if SUBEXPth pair didn't match, or there were less than\n\
2457 SUBEXP pairs.\n\
ca1d1d23 2458Zero means the entire text matched by the whole regexp or whole string.")
5806161b
EN
2459 (subexp)
2460 Lisp_Object subexp;
ca1d1d23 2461{
5806161b 2462 return match_limit (subexp, 1);
ca1d1d23
JB
2463}
2464
2465DEFUN ("match-end", Fmatch_end, Smatch_end, 1, 1, 0,
2466 "Return position of end of text matched by last search.\n\
5806161b
EN
2467SUBEXP, a number, specifies which parenthesized expression in the last\n\
2468 regexp.\n\
2469Value is nil if SUBEXPth pair didn't match, or there were less than\n\
2470 SUBEXP pairs.\n\
ca1d1d23 2471Zero means the entire text matched by the whole regexp or whole string.")
5806161b
EN
2472 (subexp)
2473 Lisp_Object subexp;
ca1d1d23 2474{
5806161b 2475 return match_limit (subexp, 0);
ca1d1d23
JB
2476}
2477
56256c2a 2478DEFUN ("match-data", Fmatch_data, Smatch_data, 0, 2, 0,
ca1d1d23
JB
2479 "Return a list containing all info on what the last search matched.\n\
2480Element 2N is `(match-beginning N)'; element 2N + 1 is `(match-end N)'.\n\
2481All the elements are markers or nil (nil if the Nth pair didn't match)\n\
2482if the last match was on a buffer; integers or nil if a string was matched.\n\
56256c2a
RS
2483Use `store-match-data' to reinstate the data in this list.\n\
2484\n\
2485If INTEGERS (the optional first argument) is non-nil, always use integers\n\
8ca821e9 2486\(rather than markers) to represent buffer positions.\n\
56256c2a
RS
2487If REUSE is a list, reuse it as part of the value. If REUSE is long enough\n\
2488to hold all the values, and if INTEGERS is non-nil, no consing is done.")
2489 (integers, reuse)
2490 Lisp_Object integers, reuse;
ca1d1d23 2491{
56256c2a 2492 Lisp_Object tail, prev;
4746118a 2493 Lisp_Object *data;
ca1d1d23
JB
2494 int i, len;
2495
daa37602 2496 if (NILP (last_thing_searched))
c36bcf1b 2497 return Qnil;
daa37602 2498
4746118a
JB
2499 data = (Lisp_Object *) alloca ((2 * search_regs.num_regs)
2500 * sizeof (Lisp_Object));
2501
ca1d1d23 2502 len = -1;
4746118a 2503 for (i = 0; i < search_regs.num_regs; i++)
ca1d1d23
JB
2504 {
2505 int start = search_regs.start[i];
2506 if (start >= 0)
2507 {
56256c2a
RS
2508 if (EQ (last_thing_searched, Qt)
2509 || ! NILP (integers))
ca1d1d23 2510 {
c235cce7
KH
2511 XSETFASTINT (data[2 * i], start);
2512 XSETFASTINT (data[2 * i + 1], search_regs.end[i]);
ca1d1d23 2513 }
0ed62dc7 2514 else if (BUFFERP (last_thing_searched))
ca1d1d23
JB
2515 {
2516 data[2 * i] = Fmake_marker ();
daa37602
JB
2517 Fset_marker (data[2 * i],
2518 make_number (start),
2519 last_thing_searched);
ca1d1d23
JB
2520 data[2 * i + 1] = Fmake_marker ();
2521 Fset_marker (data[2 * i + 1],
daa37602
JB
2522 make_number (search_regs.end[i]),
2523 last_thing_searched);
ca1d1d23 2524 }
daa37602
JB
2525 else
2526 /* last_thing_searched must always be Qt, a buffer, or Qnil. */
2527 abort ();
2528
ca1d1d23
JB
2529 len = i;
2530 }
2531 else
2532 data[2 * i] = data [2 * i + 1] = Qnil;
2533 }
56256c2a
RS
2534
2535 /* If REUSE is not usable, cons up the values and return them. */
2536 if (! CONSP (reuse))
2537 return Flist (2 * len + 2, data);
2538
2539 /* If REUSE is a list, store as many value elements as will fit
2540 into the elements of REUSE. */
2541 for (i = 0, tail = reuse; CONSP (tail);
2542 i++, tail = XCONS (tail)->cdr)
2543 {
2544 if (i < 2 * len + 2)
2545 XCONS (tail)->car = data[i];
2546 else
2547 XCONS (tail)->car = Qnil;
2548 prev = tail;
2549 }
2550
2551 /* If we couldn't fit all value elements into REUSE,
2552 cons up the rest of them and add them to the end of REUSE. */
2553 if (i < 2 * len + 2)
2554 XCONS (prev)->cdr = Flist (2 * len + 2 - i, data + i);
2555
2556 return reuse;
ca1d1d23
JB
2557}
2558
2559
3f1c005b 2560DEFUN ("set-match-data", Fset_match_data, Sset_match_data, 1, 1, 0,
ca1d1d23
JB
2561 "Set internal data on last search match from elements of LIST.\n\
2562LIST should have been created by calling `match-data' previously.")
2563 (list)
2564 register Lisp_Object list;
2565{
2566 register int i;
2567 register Lisp_Object marker;
2568
7074fde6
FP
2569 if (running_asynch_code)
2570 save_search_regs ();
2571
ca1d1d23 2572 if (!CONSP (list) && !NILP (list))
b37902c8 2573 list = wrong_type_argument (Qconsp, list);
ca1d1d23 2574
daa37602
JB
2575 /* Unless we find a marker with a buffer in LIST, assume that this
2576 match data came from a string. */
2577 last_thing_searched = Qt;
2578
4746118a
JB
2579 /* Allocate registers if they don't already exist. */
2580 {
d084e942 2581 int length = XFASTINT (Flength (list)) / 2;
4746118a
JB
2582
2583 if (length > search_regs.num_regs)
2584 {
1113d9db
JB
2585 if (search_regs.num_regs == 0)
2586 {
2587 search_regs.start
2588 = (regoff_t *) xmalloc (length * sizeof (regoff_t));
2589 search_regs.end
2590 = (regoff_t *) xmalloc (length * sizeof (regoff_t));
2591 }
4746118a 2592 else
1113d9db
JB
2593 {
2594 search_regs.start
2595 = (regoff_t *) xrealloc (search_regs.start,
2596 length * sizeof (regoff_t));
2597 search_regs.end
2598 = (regoff_t *) xrealloc (search_regs.end,
2599 length * sizeof (regoff_t));
2600 }
4746118a 2601
487282dc 2602 search_regs.num_regs = length;
4746118a
JB
2603 }
2604 }
2605
2606 for (i = 0; i < search_regs.num_regs; i++)
ca1d1d23
JB
2607 {
2608 marker = Fcar (list);
2609 if (NILP (marker))
2610 {
2611 search_regs.start[i] = -1;
2612 list = Fcdr (list);
2613 }
2614 else
2615 {
0ed62dc7 2616 if (MARKERP (marker))
daa37602
JB
2617 {
2618 if (XMARKER (marker)->buffer == 0)
c235cce7 2619 XSETFASTINT (marker, 0);
daa37602 2620 else
a3668d92 2621 XSETBUFFER (last_thing_searched, XMARKER (marker)->buffer);
daa37602 2622 }
ca1d1d23
JB
2623
2624 CHECK_NUMBER_COERCE_MARKER (marker, 0);
2625 search_regs.start[i] = XINT (marker);
2626 list = Fcdr (list);
2627
2628 marker = Fcar (list);
0ed62dc7 2629 if (MARKERP (marker) && XMARKER (marker)->buffer == 0)
c235cce7 2630 XSETFASTINT (marker, 0);
ca1d1d23
JB
2631
2632 CHECK_NUMBER_COERCE_MARKER (marker, 0);
2633 search_regs.end[i] = XINT (marker);
2634 }
2635 list = Fcdr (list);
2636 }
2637
2638 return Qnil;
2639}
2640
7074fde6
FP
2641/* If non-zero the match data have been saved in saved_search_regs
2642 during the execution of a sentinel or filter. */
75ebf74b 2643static int search_regs_saved;
7074fde6
FP
2644static struct re_registers saved_search_regs;
2645
2646/* Called from Flooking_at, Fstring_match, search_buffer, Fstore_match_data
2647 if asynchronous code (filter or sentinel) is running. */
2648static void
2649save_search_regs ()
2650{
2651 if (!search_regs_saved)
2652 {
2653 saved_search_regs.num_regs = search_regs.num_regs;
2654 saved_search_regs.start = search_regs.start;
2655 saved_search_regs.end = search_regs.end;
2656 search_regs.num_regs = 0;
2d4a771a
RS
2657 search_regs.start = 0;
2658 search_regs.end = 0;
7074fde6
FP
2659
2660 search_regs_saved = 1;
2661 }
2662}
2663
2664/* Called upon exit from filters and sentinels. */
2665void
2666restore_match_data ()
2667{
2668 if (search_regs_saved)
2669 {
2670 if (search_regs.num_regs > 0)
2671 {
2672 xfree (search_regs.start);
2673 xfree (search_regs.end);
2674 }
2675 search_regs.num_regs = saved_search_regs.num_regs;
2676 search_regs.start = saved_search_regs.start;
2677 search_regs.end = saved_search_regs.end;
2678
2679 search_regs_saved = 0;
2680 }
2681}
2682
ca1d1d23
JB
2683/* Quote a string to inactivate reg-expr chars */
2684
2685DEFUN ("regexp-quote", Fregexp_quote, Sregexp_quote, 1, 1, 0,
2686 "Return a regexp string which matches exactly STRING and nothing else.")
5806161b
EN
2687 (string)
2688 Lisp_Object string;
ca1d1d23
JB
2689{
2690 register unsigned char *in, *out, *end;
2691 register unsigned char *temp;
0c8533c6 2692 int backslashes_added = 0;
ca1d1d23 2693
5806161b 2694 CHECK_STRING (string, 0);
ca1d1d23 2695
0c8533c6 2696 temp = (unsigned char *) alloca (XSTRING (string)->size_byte * 2);
ca1d1d23
JB
2697
2698 /* Now copy the data into the new string, inserting escapes. */
2699
5806161b 2700 in = XSTRING (string)->data;
0c8533c6 2701 end = in + XSTRING (string)->size_byte;
ca1d1d23
JB
2702 out = temp;
2703
2704 for (; in != end; in++)
2705 {
2706 if (*in == '[' || *in == ']'
2707 || *in == '*' || *in == '.' || *in == '\\'
2708 || *in == '?' || *in == '+'
2709 || *in == '^' || *in == '$')
0c8533c6 2710 *out++ = '\\', backslashes_added++;
ca1d1d23
JB
2711 *out++ = *in;
2712 }
2713
0c8533c6
RS
2714 return make_multibyte_string (temp,
2715 XSTRING (string)->size + backslashes_added,
2716 out - temp);
ca1d1d23
JB
2717}
2718\f
2719syms_of_search ()
2720{
2721 register int i;
2722
487282dc
KH
2723 for (i = 0; i < REGEXP_CACHE_SIZE; ++i)
2724 {
2725 searchbufs[i].buf.allocated = 100;
2726 searchbufs[i].buf.buffer = (unsigned char *) malloc (100);
2727 searchbufs[i].buf.fastmap = searchbufs[i].fastmap;
2728 searchbufs[i].regexp = Qnil;
2729 staticpro (&searchbufs[i].regexp);
2730 searchbufs[i].next = (i == REGEXP_CACHE_SIZE-1 ? 0 : &searchbufs[i+1]);
2731 }
2732 searchbuf_head = &searchbufs[0];
ca1d1d23
JB
2733
2734 Qsearch_failed = intern ("search-failed");
2735 staticpro (&Qsearch_failed);
2736 Qinvalid_regexp = intern ("invalid-regexp");
2737 staticpro (&Qinvalid_regexp);
2738
2739 Fput (Qsearch_failed, Qerror_conditions,
2740 Fcons (Qsearch_failed, Fcons (Qerror, Qnil)));
2741 Fput (Qsearch_failed, Qerror_message,
2742 build_string ("Search failed"));
2743
2744 Fput (Qinvalid_regexp, Qerror_conditions,
2745 Fcons (Qinvalid_regexp, Fcons (Qerror, Qnil)));
2746 Fput (Qinvalid_regexp, Qerror_message,
2747 build_string ("Invalid regexp"));
2748
daa37602
JB
2749 last_thing_searched = Qnil;
2750 staticpro (&last_thing_searched);
2751
ca1d1d23 2752 defsubr (&Slooking_at);
b819a390
RS
2753 defsubr (&Sposix_looking_at);
2754 defsubr (&Sstring_match);
2755 defsubr (&Sposix_string_match);
ca1d1d23
JB
2756 defsubr (&Ssearch_forward);
2757 defsubr (&Ssearch_backward);
2758 defsubr (&Sword_search_forward);
2759 defsubr (&Sword_search_backward);
2760 defsubr (&Sre_search_forward);
2761 defsubr (&Sre_search_backward);
b819a390
RS
2762 defsubr (&Sposix_search_forward);
2763 defsubr (&Sposix_search_backward);
ca1d1d23
JB
2764 defsubr (&Sreplace_match);
2765 defsubr (&Smatch_beginning);
2766 defsubr (&Smatch_end);
2767 defsubr (&Smatch_data);
3f1c005b 2768 defsubr (&Sset_match_data);
ca1d1d23
JB
2769 defsubr (&Sregexp_quote);
2770}