(Fread_file_name): Alter the history
[bpt/emacs.git] / src / search.c
CommitLineData
ca1d1d23 1/* String search routines for GNU Emacs.
31c8f881 2 Copyright (C) 1985, 86, 87, 93, 94, 97, 1998 Free Software Foundation, Inc.
ca1d1d23
JB
3
4This file is part of GNU Emacs.
5
6GNU Emacs is free software; you can redistribute it and/or modify
7it under the terms of the GNU General Public License as published by
7c938215 8the Free Software Foundation; either version 2, or (at your option)
ca1d1d23
JB
9any later version.
10
11GNU Emacs is distributed in the hope that it will be useful,
12but WITHOUT ANY WARRANTY; without even the implied warranty of
13MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14GNU General Public License for more details.
15
16You should have received a copy of the GNU General Public License
17along with GNU Emacs; see the file COPYING. If not, write to
3b7ad313
EN
18the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
19Boston, MA 02111-1307, USA. */
ca1d1d23
JB
20
21
18160b98 22#include <config.h>
ca1d1d23
JB
23#include "lisp.h"
24#include "syntax.h"
5679531d 25#include "category.h"
ca1d1d23 26#include "buffer.h"
5679531d 27#include "charset.h"
9169c321 28#include "region-cache.h"
ca1d1d23 29#include "commands.h"
9ac0d9e0 30#include "blockinput.h"
bf1760bb 31#include "intervals.h"
4746118a 32
ca1d1d23
JB
33#include <sys/types.h>
34#include "regex.h"
35
1d288aef 36#define REGEXP_CACHE_SIZE 20
ca1d1d23 37
487282dc
KH
38/* If the regexp is non-nil, then the buffer contains the compiled form
39 of that regexp, suitable for searching. */
1d288aef
RS
40struct regexp_cache
41{
487282dc
KH
42 struct regexp_cache *next;
43 Lisp_Object regexp;
44 struct re_pattern_buffer buf;
45 char fastmap[0400];
b819a390
RS
46 /* Nonzero means regexp was compiled to do full POSIX backtracking. */
47 char posix;
487282dc 48};
ca1d1d23 49
487282dc
KH
50/* The instances of that struct. */
51struct regexp_cache searchbufs[REGEXP_CACHE_SIZE];
ca1d1d23 52
487282dc
KH
53/* The head of the linked list; points to the most recently used buffer. */
54struct regexp_cache *searchbuf_head;
ca1d1d23 55
ca1d1d23 56
4746118a
JB
57/* Every call to re_match, etc., must pass &search_regs as the regs
58 argument unless you can show it is unnecessary (i.e., if re_match
59 is certainly going to be called again before region-around-match
60 can be called).
61
62 Since the registers are now dynamically allocated, we need to make
63 sure not to refer to the Nth register before checking that it has
1113d9db
JB
64 been allocated by checking search_regs.num_regs.
65
66 The regex code keeps track of whether it has allocated the search
487282dc
KH
67 buffer using bits in the re_pattern_buffer. This means that whenever
68 you compile a new pattern, it completely forgets whether it has
1113d9db
JB
69 allocated any registers, and will allocate new registers the next
70 time you call a searching or matching function. Therefore, we need
71 to call re_set_registers after compiling a new pattern or after
72 setting the match registers, so that the regex functions will be
73 able to free or re-allocate it properly. */
ca1d1d23
JB
74static struct re_registers search_regs;
75
daa37602
JB
76/* The buffer in which the last search was performed, or
77 Qt if the last search was done in a string;
78 Qnil if no searching has been done yet. */
79static Lisp_Object last_thing_searched;
ca1d1d23 80
8e6208c5 81/* error condition signaled when regexp compile_pattern fails */
ca1d1d23
JB
82
83Lisp_Object Qinvalid_regexp;
84
ca325161 85static void set_search_regs ();
044f81f1 86static void save_search_regs ();
facdc750
RS
87static int simple_search ();
88static int boyer_moore ();
b819a390
RS
89static int search_buffer ();
90
ca1d1d23
JB
91static void
92matcher_overflow ()
93{
94 error ("Stack overflow in regexp matcher");
95}
96
97#ifdef __STDC__
98#define CONST const
99#else
100#define CONST
101#endif
102
b819a390
RS
103/* Compile a regexp and signal a Lisp error if anything goes wrong.
104 PATTERN is the pattern to compile.
105 CP is the place to put the result.
facdc750 106 TRANSLATE is a translation table for ignoring case, or nil for none.
b819a390
RS
107 REGP is the structure that says where to store the "register"
108 values that will result from matching this pattern.
109 If it is 0, we should compile the pattern not to record any
110 subexpression bounds.
111 POSIX is nonzero if we want full backtracking (POSIX style)
5679531d
KH
112 for this pattern. 0 means backtrack only enough to get a valid match.
113 MULTIBYTE is nonzero if we want to handle multibyte characters in
114 PATTERN. 0 means all multibyte characters are recognized just as
115 sequences of binary data. */
ca1d1d23 116
487282dc 117static void
5679531d 118compile_pattern_1 (cp, pattern, translate, regp, posix, multibyte)
487282dc 119 struct regexp_cache *cp;
ca1d1d23 120 Lisp_Object pattern;
facdc750 121 Lisp_Object translate;
487282dc 122 struct re_registers *regp;
b819a390 123 int posix;
5679531d 124 int multibyte;
ca1d1d23 125{
f8bd51c4
KH
126 char *raw_pattern;
127 int raw_pattern_size;
d451e4db 128 char *val;
b819a390 129 reg_syntax_t old;
ca1d1d23 130
f8bd51c4
KH
131 /* MULTIBYTE says whether the text to be searched is multibyte.
132 We must convert PATTERN to match that, or we will not really
133 find things right. */
134
135 if (multibyte == STRING_MULTIBYTE (pattern))
136 {
137 raw_pattern = (char *) XSTRING (pattern)->data;
138 raw_pattern_size = XSTRING (pattern)->size_byte;
139 }
140 else if (multibyte)
141 {
142 raw_pattern_size = count_size_as_multibyte (XSTRING (pattern)->data,
143 XSTRING (pattern)->size);
144 raw_pattern = (char *) alloca (raw_pattern_size + 1);
145 copy_text (XSTRING (pattern)->data, raw_pattern,
146 XSTRING (pattern)->size, 0, 1);
147 }
148 else
149 {
150 /* Converting multibyte to single-byte.
151
152 ??? Perhaps this conversion should be done in a special way
153 by subtracting nonascii-insert-offset from each non-ASCII char,
154 so that only the multibyte chars which really correspond to
155 the chosen single-byte character set can possibly match. */
156 raw_pattern_size = XSTRING (pattern)->size;
157 raw_pattern = (char *) alloca (raw_pattern_size + 1);
158 copy_text (XSTRING (pattern)->data, raw_pattern,
facdc750 159 XSTRING (pattern)->size_byte, 1, 0);
f8bd51c4
KH
160 }
161
487282dc 162 cp->regexp = Qnil;
facdc750 163 cp->buf.translate = (! NILP (translate) ? translate : 0);
b819a390 164 cp->posix = posix;
5679531d 165 cp->buf.multibyte = multibyte;
9ac0d9e0 166 BLOCK_INPUT;
b819a390
RS
167 old = re_set_syntax (RE_SYNTAX_EMACS
168 | (posix ? 0 : RE_NO_POSIX_BACKTRACKING));
f8bd51c4 169 val = (char *) re_compile_pattern (raw_pattern, raw_pattern_size, &cp->buf);
b819a390 170 re_set_syntax (old);
9ac0d9e0 171 UNBLOCK_INPUT;
ca1d1d23 172 if (val)
487282dc 173 Fsignal (Qinvalid_regexp, Fcons (build_string (val), Qnil));
1113d9db 174
487282dc 175 cp->regexp = Fcopy_sequence (pattern);
487282dc
KH
176}
177
178/* Compile a regexp if necessary, but first check to see if there's one in
b819a390
RS
179 the cache.
180 PATTERN is the pattern to compile.
facdc750 181 TRANSLATE is a translation table for ignoring case, or nil for none.
b819a390
RS
182 REGP is the structure that says where to store the "register"
183 values that will result from matching this pattern.
184 If it is 0, we should compile the pattern not to record any
185 subexpression bounds.
186 POSIX is nonzero if we want full backtracking (POSIX style)
187 for this pattern. 0 means backtrack only enough to get a valid match. */
487282dc
KH
188
189struct re_pattern_buffer *
0c8533c6 190compile_pattern (pattern, regp, translate, posix, multibyte)
487282dc
KH
191 Lisp_Object pattern;
192 struct re_registers *regp;
facdc750 193 Lisp_Object translate;
0c8533c6 194 int posix, multibyte;
487282dc
KH
195{
196 struct regexp_cache *cp, **cpp;
197
198 for (cpp = &searchbuf_head; ; cpp = &cp->next)
199 {
200 cp = *cpp;
1d288aef
RS
201 if (XSTRING (cp->regexp)->size == XSTRING (pattern)->size
202 && !NILP (Fstring_equal (cp->regexp, pattern))
facdc750 203 && cp->buf.translate == (! NILP (translate) ? translate : 0)
5679531d
KH
204 && cp->posix == posix
205 && cp->buf.multibyte == multibyte)
487282dc
KH
206 break;
207
208 /* If we're at the end of the cache, compile into the last cell. */
209 if (cp->next == 0)
210 {
5679531d 211 compile_pattern_1 (cp, pattern, translate, regp, posix, multibyte);
487282dc
KH
212 break;
213 }
214 }
215
216 /* When we get here, cp (aka *cpp) contains the compiled pattern,
217 either because we found it in the cache or because we just compiled it.
218 Move it to the front of the queue to mark it as most recently used. */
219 *cpp = cp->next;
220 cp->next = searchbuf_head;
221 searchbuf_head = cp;
1113d9db 222
6639708c
RS
223 /* Advise the searching functions about the space we have allocated
224 for register data. */
225 if (regp)
226 re_set_registers (&cp->buf, regp, regp->num_regs, regp->start, regp->end);
227
487282dc 228 return &cp->buf;
ca1d1d23
JB
229}
230
231/* Error condition used for failing searches */
232Lisp_Object Qsearch_failed;
233
234Lisp_Object
235signal_failure (arg)
236 Lisp_Object arg;
237{
238 Fsignal (Qsearch_failed, Fcons (arg, Qnil));
239 return Qnil;
240}
241\f
b819a390
RS
242static Lisp_Object
243looking_at_1 (string, posix)
ca1d1d23 244 Lisp_Object string;
b819a390 245 int posix;
ca1d1d23
JB
246{
247 Lisp_Object val;
248 unsigned char *p1, *p2;
249 int s1, s2;
250 register int i;
487282dc 251 struct re_pattern_buffer *bufp;
ca1d1d23 252
7074fde6
FP
253 if (running_asynch_code)
254 save_search_regs ();
255
ca1d1d23 256 CHECK_STRING (string, 0);
487282dc
KH
257 bufp = compile_pattern (string, &search_regs,
258 (!NILP (current_buffer->case_fold_search)
facdc750 259 ? DOWNCASE_TABLE : Qnil),
0c8533c6
RS
260 posix,
261 !NILP (current_buffer->enable_multibyte_characters));
ca1d1d23
JB
262
263 immediate_quit = 1;
264 QUIT; /* Do a pending quit right away, to avoid paradoxical behavior */
265
266 /* Get pointers and sizes of the two strings
267 that make up the visible portion of the buffer. */
268
269 p1 = BEGV_ADDR;
fa8ed3e0 270 s1 = GPT_BYTE - BEGV_BYTE;
ca1d1d23 271 p2 = GAP_END_ADDR;
fa8ed3e0 272 s2 = ZV_BYTE - GPT_BYTE;
ca1d1d23
JB
273 if (s1 < 0)
274 {
275 p2 = p1;
fa8ed3e0 276 s2 = ZV_BYTE - BEGV_BYTE;
ca1d1d23
JB
277 s1 = 0;
278 }
279 if (s2 < 0)
280 {
fa8ed3e0 281 s1 = ZV_BYTE - BEGV_BYTE;
ca1d1d23
JB
282 s2 = 0;
283 }
8bb43c28
RS
284
285 re_match_object = Qnil;
ca1d1d23 286
487282dc 287 i = re_match_2 (bufp, (char *) p1, s1, (char *) p2, s2,
fa8ed3e0
RS
288 PT_BYTE - BEGV_BYTE, &search_regs,
289 ZV_BYTE - BEGV_BYTE);
ca1d1d23
JB
290 if (i == -2)
291 matcher_overflow ();
292
293 val = (0 <= i ? Qt : Qnil);
fa8ed3e0
RS
294 if (i >= 0)
295 for (i = 0; i < search_regs.num_regs; i++)
296 if (search_regs.start[i] >= 0)
297 {
298 search_regs.start[i]
299 = BYTE_TO_CHAR (search_regs.start[i] + BEGV_BYTE);
300 search_regs.end[i]
301 = BYTE_TO_CHAR (search_regs.end[i] + BEGV_BYTE);
302 }
a3668d92 303 XSETBUFFER (last_thing_searched, current_buffer);
ca1d1d23
JB
304 immediate_quit = 0;
305 return val;
306}
307
b819a390 308DEFUN ("looking-at", Flooking_at, Slooking_at, 1, 1, 0,
94f94972 309 "Return t if text after point matches regular expression REGEXP.\n\
b819a390
RS
310This function modifies the match data that `match-beginning',\n\
311`match-end' and `match-data' access; save and restore the match\n\
312data if you want to preserve them.")
94f94972
RS
313 (regexp)
314 Lisp_Object regexp;
b819a390 315{
94f94972 316 return looking_at_1 (regexp, 0);
b819a390
RS
317}
318
319DEFUN ("posix-looking-at", Fposix_looking_at, Sposix_looking_at, 1, 1, 0,
94f94972 320 "Return t if text after point matches regular expression REGEXP.\n\
b819a390
RS
321Find the longest match, in accord with Posix regular expression rules.\n\
322This function modifies the match data that `match-beginning',\n\
323`match-end' and `match-data' access; save and restore the match\n\
324data if you want to preserve them.")
94f94972
RS
325 (regexp)
326 Lisp_Object regexp;
b819a390 327{
94f94972 328 return looking_at_1 (regexp, 1);
b819a390
RS
329}
330\f
331static Lisp_Object
332string_match_1 (regexp, string, start, posix)
ca1d1d23 333 Lisp_Object regexp, string, start;
b819a390 334 int posix;
ca1d1d23
JB
335{
336 int val;
487282dc 337 struct re_pattern_buffer *bufp;
0c8533c6
RS
338 int pos, pos_byte;
339 int i;
ca1d1d23 340
7074fde6
FP
341 if (running_asynch_code)
342 save_search_regs ();
343
ca1d1d23
JB
344 CHECK_STRING (regexp, 0);
345 CHECK_STRING (string, 1);
346
347 if (NILP (start))
0c8533c6 348 pos = 0, pos_byte = 0;
ca1d1d23
JB
349 else
350 {
351 int len = XSTRING (string)->size;
352
353 CHECK_NUMBER (start, 2);
0c8533c6
RS
354 pos = XINT (start);
355 if (pos < 0 && -pos <= len)
356 pos = len + pos;
357 else if (0 > pos || pos > len)
ca1d1d23 358 args_out_of_range (string, start);
0c8533c6 359 pos_byte = string_char_to_byte (string, pos);
ca1d1d23
JB
360 }
361
487282dc
KH
362 bufp = compile_pattern (regexp, &search_regs,
363 (!NILP (current_buffer->case_fold_search)
facdc750 364 ? DOWNCASE_TABLE : Qnil),
0c8533c6
RS
365 posix,
366 STRING_MULTIBYTE (string));
ca1d1d23 367 immediate_quit = 1;
8bb43c28
RS
368 re_match_object = string;
369
487282dc 370 val = re_search (bufp, (char *) XSTRING (string)->data,
0c8533c6
RS
371 XSTRING (string)->size_byte, pos_byte,
372 XSTRING (string)->size_byte - pos_byte,
ca1d1d23
JB
373 &search_regs);
374 immediate_quit = 0;
daa37602 375 last_thing_searched = Qt;
ca1d1d23
JB
376 if (val == -2)
377 matcher_overflow ();
378 if (val < 0) return Qnil;
0c8533c6
RS
379
380 for (i = 0; i < search_regs.num_regs; i++)
381 if (search_regs.start[i] >= 0)
382 {
383 search_regs.start[i]
384 = string_byte_to_char (string, search_regs.start[i]);
385 search_regs.end[i]
386 = string_byte_to_char (string, search_regs.end[i]);
387 }
388
389 return make_number (string_byte_to_char (string, val));
ca1d1d23 390}
e59a8453 391
b819a390
RS
392DEFUN ("string-match", Fstring_match, Sstring_match, 2, 3, 0,
393 "Return index of start of first match for REGEXP in STRING, or nil.\n\
394If third arg START is non-nil, start search at that index in STRING.\n\
395For index of first char beyond the match, do (match-end 0).\n\
396`match-end' and `match-beginning' also give indices of substrings\n\
397matched by parenthesis constructs in the pattern.")
398 (regexp, string, start)
399 Lisp_Object regexp, string, start;
400{
401 return string_match_1 (regexp, string, start, 0);
402}
403
404DEFUN ("posix-string-match", Fposix_string_match, Sposix_string_match, 2, 3, 0,
405 "Return index of start of first match for REGEXP in STRING, or nil.\n\
406Find the longest match, in accord with Posix regular expression rules.\n\
407If third arg START is non-nil, start search at that index in STRING.\n\
408For index of first char beyond the match, do (match-end 0).\n\
409`match-end' and `match-beginning' also give indices of substrings\n\
410matched by parenthesis constructs in the pattern.")
411 (regexp, string, start)
412 Lisp_Object regexp, string, start;
413{
414 return string_match_1 (regexp, string, start, 1);
415}
416
e59a8453
RS
417/* Match REGEXP against STRING, searching all of STRING,
418 and return the index of the match, or negative on failure.
419 This does not clobber the match data. */
420
421int
422fast_string_match (regexp, string)
423 Lisp_Object regexp, string;
424{
425 int val;
487282dc 426 struct re_pattern_buffer *bufp;
e59a8453 427
facdc750
RS
428 bufp = compile_pattern (regexp, 0, Qnil,
429 0, STRING_MULTIBYTE (string));
e59a8453 430 immediate_quit = 1;
8bb43c28
RS
431 re_match_object = string;
432
487282dc 433 val = re_search (bufp, (char *) XSTRING (string)->data,
4996330b 434 XSTRING (string)->size_byte, 0, XSTRING (string)->size_byte,
e59a8453
RS
435 0);
436 immediate_quit = 0;
437 return val;
438}
5679531d
KH
439
440/* Match REGEXP against STRING, searching all of STRING ignoring case,
441 and return the index of the match, or negative on failure.
0c8533c6
RS
442 This does not clobber the match data.
443 We assume that STRING contains single-byte characters. */
5679531d
KH
444
445extern Lisp_Object Vascii_downcase_table;
446
447int
b4577c63 448fast_c_string_match_ignore_case (regexp, string)
5679531d
KH
449 Lisp_Object regexp;
450 char *string;
451{
452 int val;
453 struct re_pattern_buffer *bufp;
454 int len = strlen (string);
455
0c8533c6 456 regexp = string_make_unibyte (regexp);
b4577c63 457 re_match_object = Qt;
5679531d 458 bufp = compile_pattern (regexp, 0,
facdc750 459 Vascii_downcase_table, 0,
f8bd51c4 460 0);
5679531d
KH
461 immediate_quit = 1;
462 val = re_search (bufp, string, len, 0, len, 0);
463 immediate_quit = 0;
464 return val;
465}
ca1d1d23 466\f
9169c321
JB
467/* max and min. */
468
469static int
470max (a, b)
471 int a, b;
472{
473 return ((a > b) ? a : b);
474}
475
476static int
477min (a, b)
478 int a, b;
479{
480 return ((a < b) ? a : b);
481}
482
483\f
484/* The newline cache: remembering which sections of text have no newlines. */
485
486/* If the user has requested newline caching, make sure it's on.
487 Otherwise, make sure it's off.
488 This is our cheezy way of associating an action with the change of
489 state of a buffer-local variable. */
490static void
491newline_cache_on_off (buf)
492 struct buffer *buf;
493{
494 if (NILP (buf->cache_long_line_scans))
495 {
496 /* It should be off. */
497 if (buf->newline_cache)
498 {
499 free_region_cache (buf->newline_cache);
500 buf->newline_cache = 0;
501 }
502 }
503 else
504 {
505 /* It should be on. */
506 if (buf->newline_cache == 0)
507 buf->newline_cache = new_region_cache ();
508 }
509}
510
511\f
512/* Search for COUNT instances of the character TARGET between START and END.
513
514 If COUNT is positive, search forwards; END must be >= START.
515 If COUNT is negative, search backwards for the -COUNTth instance;
516 END must be <= START.
517 If COUNT is zero, do anything you please; run rogue, for all I care.
518
519 If END is zero, use BEGV or ZV instead, as appropriate for the
520 direction indicated by COUNT.
ffd56f97
JB
521
522 If we find COUNT instances, set *SHORTAGE to zero, and return the
5bfe95c9
RS
523 position after the COUNTth match. Note that for reverse motion
524 this is not the same as the usual convention for Emacs motion commands.
ffd56f97 525
9169c321
JB
526 If we don't find COUNT instances before reaching END, set *SHORTAGE
527 to the number of TARGETs left unfound, and return END.
ffd56f97 528
087a5f81
RS
529 If ALLOW_QUIT is non-zero, set immediate_quit. That's good to do
530 except when inside redisplay. */
531
9169c321
JB
532scan_buffer (target, start, end, count, shortage, allow_quit)
533 register int target;
534 int start, end;
535 int count;
536 int *shortage;
087a5f81 537 int allow_quit;
ca1d1d23 538{
9169c321
JB
539 struct region_cache *newline_cache;
540 int direction;
ffd56f97 541
9169c321
JB
542 if (count > 0)
543 {
544 direction = 1;
545 if (! end) end = ZV;
546 }
547 else
548 {
549 direction = -1;
550 if (! end) end = BEGV;
551 }
ffd56f97 552
9169c321
JB
553 newline_cache_on_off (current_buffer);
554 newline_cache = current_buffer->newline_cache;
ca1d1d23
JB
555
556 if (shortage != 0)
557 *shortage = 0;
558
087a5f81 559 immediate_quit = allow_quit;
ca1d1d23 560
ffd56f97 561 if (count > 0)
9169c321 562 while (start != end)
ca1d1d23 563 {
9169c321
JB
564 /* Our innermost scanning loop is very simple; it doesn't know
565 about gaps, buffer ends, or the newline cache. ceiling is
566 the position of the last character before the next such
567 obstacle --- the last character the dumb search loop should
568 examine. */
fa8ed3e0
RS
569 int ceiling_byte = CHAR_TO_BYTE (end) - 1;
570 int start_byte = CHAR_TO_BYTE (start);
9169c321
JB
571
572 /* If we're looking for a newline, consult the newline cache
573 to see where we can avoid some scanning. */
574 if (target == '\n' && newline_cache)
575 {
576 int next_change;
577 immediate_quit = 0;
578 while (region_cache_forward
fa8ed3e0
RS
579 (current_buffer, newline_cache, start_byte, &next_change))
580 start_byte = next_change;
cbe0db0d 581 immediate_quit = allow_quit;
9169c321 582
fa8ed3e0
RS
583 /* START should never be after END. */
584 if (start_byte > ceiling_byte)
585 start_byte = ceiling_byte;
9169c321
JB
586
587 /* Now the text after start is an unknown region, and
588 next_change is the position of the next known region. */
fa8ed3e0 589 ceiling_byte = min (next_change - 1, ceiling_byte);
9169c321
JB
590 }
591
592 /* The dumb loop can only scan text stored in contiguous
593 bytes. BUFFER_CEILING_OF returns the last character
594 position that is contiguous, so the ceiling is the
595 position after that. */
fa8ed3e0 596 ceiling_byte = min (BUFFER_CEILING_OF (start_byte), ceiling_byte);
9169c321
JB
597
598 {
599 /* The termination address of the dumb loop. */
fa8ed3e0
RS
600 register unsigned char *ceiling_addr
601 = BYTE_POS_ADDR (ceiling_byte) + 1;
602 register unsigned char *cursor
603 = BYTE_POS_ADDR (start_byte);
9169c321
JB
604 unsigned char *base = cursor;
605
606 while (cursor < ceiling_addr)
607 {
608 unsigned char *scan_start = cursor;
609
610 /* The dumb loop. */
611 while (*cursor != target && ++cursor < ceiling_addr)
612 ;
613
614 /* If we're looking for newlines, cache the fact that
615 the region from start to cursor is free of them. */
616 if (target == '\n' && newline_cache)
617 know_region_cache (current_buffer, newline_cache,
fa8ed3e0
RS
618 start_byte + scan_start - base,
619 start_byte + cursor - base);
9169c321
JB
620
621 /* Did we find the target character? */
622 if (cursor < ceiling_addr)
623 {
624 if (--count == 0)
625 {
626 immediate_quit = 0;
fa8ed3e0 627 return BYTE_TO_CHAR (start_byte + cursor - base + 1);
9169c321
JB
628 }
629 cursor++;
630 }
631 }
632
fa8ed3e0 633 start = BYTE_TO_CHAR (start_byte + cursor - base);
9169c321 634 }
ca1d1d23
JB
635 }
636 else
9169c321
JB
637 while (start > end)
638 {
639 /* The last character to check before the next obstacle. */
fa8ed3e0
RS
640 int ceiling_byte = CHAR_TO_BYTE (end);
641 int start_byte = CHAR_TO_BYTE (start);
9169c321
JB
642
643 /* Consult the newline cache, if appropriate. */
644 if (target == '\n' && newline_cache)
645 {
646 int next_change;
647 immediate_quit = 0;
648 while (region_cache_backward
fa8ed3e0
RS
649 (current_buffer, newline_cache, start_byte, &next_change))
650 start_byte = next_change;
cbe0db0d 651 immediate_quit = allow_quit;
9169c321
JB
652
653 /* Start should never be at or before end. */
fa8ed3e0
RS
654 if (start_byte <= ceiling_byte)
655 start_byte = ceiling_byte + 1;
9169c321
JB
656
657 /* Now the text before start is an unknown region, and
658 next_change is the position of the next known region. */
fa8ed3e0 659 ceiling_byte = max (next_change, ceiling_byte);
9169c321
JB
660 }
661
662 /* Stop scanning before the gap. */
fa8ed3e0 663 ceiling_byte = max (BUFFER_FLOOR_OF (start_byte - 1), ceiling_byte);
9169c321
JB
664
665 {
666 /* The termination address of the dumb loop. */
fa8ed3e0
RS
667 register unsigned char *ceiling_addr = BYTE_POS_ADDR (ceiling_byte);
668 register unsigned char *cursor = BYTE_POS_ADDR (start_byte - 1);
9169c321
JB
669 unsigned char *base = cursor;
670
671 while (cursor >= ceiling_addr)
672 {
673 unsigned char *scan_start = cursor;
674
675 while (*cursor != target && --cursor >= ceiling_addr)
676 ;
677
678 /* If we're looking for newlines, cache the fact that
679 the region from after the cursor to start is free of them. */
680 if (target == '\n' && newline_cache)
681 know_region_cache (current_buffer, newline_cache,
fa8ed3e0
RS
682 start_byte + cursor - base,
683 start_byte + scan_start - base);
9169c321
JB
684
685 /* Did we find the target character? */
686 if (cursor >= ceiling_addr)
687 {
688 if (++count >= 0)
689 {
690 immediate_quit = 0;
fa8ed3e0 691 return BYTE_TO_CHAR (start_byte + cursor - base);
9169c321
JB
692 }
693 cursor--;
694 }
695 }
696
fa8ed3e0 697 start = BYTE_TO_CHAR (start_byte + cursor - base);
9169c321
JB
698 }
699 }
700
ca1d1d23
JB
701 immediate_quit = 0;
702 if (shortage != 0)
ffd56f97 703 *shortage = count * direction;
9169c321 704 return start;
ca1d1d23 705}
fa8ed3e0
RS
706\f
707/* Search for COUNT instances of a line boundary, which means either a
708 newline or (if selective display enabled) a carriage return.
709 Start at START. If COUNT is negative, search backwards.
710
711 We report the resulting position by calling TEMP_SET_PT_BOTH.
712
713 If we find COUNT instances. we position after (always after,
714 even if scanning backwards) the COUNTth match, and return 0.
715
716 If we don't find COUNT instances before reaching the end of the
717 buffer (or the beginning, if scanning backwards), we return
718 the number of line boundaries left unfound, and position at
719 the limit we bumped up against.
720
721 If ALLOW_QUIT is non-zero, set immediate_quit. That's good to do
d5d57b92 722 except in special cases. */
ca1d1d23 723
63fa018d 724int
fa8ed3e0
RS
725scan_newline (start, start_byte, limit, limit_byte, count, allow_quit)
726 int start, start_byte;
727 int limit, limit_byte;
728 register int count;
729 int allow_quit;
63fa018d 730{
fa8ed3e0
RS
731 int direction = ((count > 0) ? 1 : -1);
732
733 register unsigned char *cursor;
734 unsigned char *base;
735
736 register int ceiling;
737 register unsigned char *ceiling_addr;
738
d5d57b92
RS
739 int old_immediate_quit = immediate_quit;
740
fa8ed3e0
RS
741 /* If we are not in selective display mode,
742 check only for newlines. */
743 int selective_display = (!NILP (current_buffer->selective_display)
744 && !INTEGERP (current_buffer->selective_display));
745
746 /* The code that follows is like scan_buffer
747 but checks for either newline or carriage return. */
748
d5d57b92
RS
749 if (allow_quit)
750 immediate_quit++;
fa8ed3e0
RS
751
752 start_byte = CHAR_TO_BYTE (start);
753
754 if (count > 0)
755 {
756 while (start_byte < limit_byte)
757 {
758 ceiling = BUFFER_CEILING_OF (start_byte);
759 ceiling = min (limit_byte - 1, ceiling);
760 ceiling_addr = BYTE_POS_ADDR (ceiling) + 1;
761 base = (cursor = BYTE_POS_ADDR (start_byte));
762 while (1)
763 {
764 while (*cursor != '\n' && ++cursor != ceiling_addr)
765 ;
766
767 if (cursor != ceiling_addr)
768 {
769 if (--count == 0)
770 {
d5d57b92 771 immediate_quit = old_immediate_quit;
fa8ed3e0
RS
772 start_byte = start_byte + cursor - base + 1;
773 start = BYTE_TO_CHAR (start_byte);
774 TEMP_SET_PT_BOTH (start, start_byte);
775 return 0;
776 }
777 else
778 if (++cursor == ceiling_addr)
779 break;
780 }
781 else
782 break;
783 }
784 start_byte += cursor - base;
785 }
786 }
787 else
788 {
789 int start_byte = CHAR_TO_BYTE (start);
790 while (start_byte > limit_byte)
791 {
792 ceiling = BUFFER_FLOOR_OF (start_byte - 1);
793 ceiling = max (limit_byte, ceiling);
794 ceiling_addr = BYTE_POS_ADDR (ceiling) - 1;
795 base = (cursor = BYTE_POS_ADDR (start_byte - 1) + 1);
796 while (1)
797 {
798 while (--cursor != ceiling_addr && *cursor != '\n')
799 ;
800
801 if (cursor != ceiling_addr)
802 {
803 if (++count == 0)
804 {
d5d57b92 805 immediate_quit = old_immediate_quit;
fa8ed3e0
RS
806 /* Return the position AFTER the match we found. */
807 start_byte = start_byte + cursor - base + 1;
808 start = BYTE_TO_CHAR (start_byte);
809 TEMP_SET_PT_BOTH (start, start_byte);
810 return 0;
811 }
812 }
813 else
814 break;
815 }
816 /* Here we add 1 to compensate for the last decrement
817 of CURSOR, which took it past the valid range. */
818 start_byte += cursor - base + 1;
819 }
820 }
821
822 TEMP_SET_PT_BOTH (limit, limit_byte);
d5d57b92 823 immediate_quit = old_immediate_quit;
fa8ed3e0
RS
824
825 return count * direction;
63fa018d
RS
826}
827
ca1d1d23 828int
fa8ed3e0 829find_next_newline_no_quit (from, cnt)
ca1d1d23
JB
830 register int from, cnt;
831{
fa8ed3e0 832 return scan_buffer ('\n', from, 0, cnt, (int *) 0, 0);
9169c321
JB
833}
834
9169c321
JB
835/* Like find_next_newline, but returns position before the newline,
836 not after, and only search up to TO. This isn't just
837 find_next_newline (...)-1, because you might hit TO. */
fa8ed3e0 838
9169c321
JB
839int
840find_before_next_newline (from, to, cnt)
cbe0db0d 841 int from, to, cnt;
9169c321
JB
842{
843 int shortage;
844 int pos = scan_buffer ('\n', from, to, cnt, &shortage, 1);
845
846 if (shortage == 0)
847 pos--;
848
849 return pos;
ca1d1d23
JB
850}
851\f
ca1d1d23
JB
852/* Subroutines of Lisp buffer search functions. */
853
854static Lisp_Object
b819a390 855search_command (string, bound, noerror, count, direction, RE, posix)
ca1d1d23
JB
856 Lisp_Object string, bound, noerror, count;
857 int direction;
858 int RE;
b819a390 859 int posix;
ca1d1d23
JB
860{
861 register int np;
9f43ad85 862 int lim, lim_byte;
ca1d1d23
JB
863 int n = direction;
864
865 if (!NILP (count))
866 {
867 CHECK_NUMBER (count, 3);
868 n *= XINT (count);
869 }
870
871 CHECK_STRING (string, 0);
872 if (NILP (bound))
9f43ad85
RS
873 {
874 if (n > 0)
875 lim = ZV, lim_byte = ZV_BYTE;
876 else
877 lim = BEGV, lim_byte = BEGV_BYTE;
878 }
ca1d1d23
JB
879 else
880 {
881 CHECK_NUMBER_COERCE_MARKER (bound, 1);
882 lim = XINT (bound);
9f43ad85 883 lim_byte = CHAR_TO_BYTE (lim);
6ec8bbd2 884 if (n > 0 ? lim < PT : lim > PT)
ca1d1d23
JB
885 error ("Invalid search bound (wrong side of point)");
886 if (lim > ZV)
9f43ad85 887 lim = ZV, lim_byte = ZV_BYTE;
ca1d1d23 888 if (lim < BEGV)
9f43ad85 889 lim = BEGV, lim_byte = BEGV_BYTE;
ca1d1d23
JB
890 }
891
9f43ad85 892 np = search_buffer (string, PT, PT_BYTE, lim, lim_byte, n, RE,
ca1d1d23 893 (!NILP (current_buffer->case_fold_search)
facdc750 894 ? current_buffer->case_canon_table
3135e9fd 895 : Qnil),
ca1d1d23 896 (!NILP (current_buffer->case_fold_search)
facdc750 897 ? current_buffer->case_eqv_table
3135e9fd 898 : Qnil),
b819a390 899 posix);
ca1d1d23
JB
900 if (np <= 0)
901 {
902 if (NILP (noerror))
903 return signal_failure (string);
904 if (!EQ (noerror, Qt))
905 {
906 if (lim < BEGV || lim > ZV)
907 abort ();
9f43ad85 908 SET_PT_BOTH (lim, lim_byte);
a5f217b8
RS
909 return Qnil;
910#if 0 /* This would be clean, but maybe programs depend on
911 a value of nil here. */
481399bf 912 np = lim;
a5f217b8 913#endif
ca1d1d23 914 }
481399bf
RS
915 else
916 return Qnil;
ca1d1d23
JB
917 }
918
919 if (np < BEGV || np > ZV)
920 abort ();
921
922 SET_PT (np);
923
924 return make_number (np);
925}
926\f
fa8ed3e0
RS
927/* Return 1 if REGEXP it matches just one constant string. */
928
b6d6a51c
KH
929static int
930trivial_regexp_p (regexp)
931 Lisp_Object regexp;
932{
4996330b 933 int len = XSTRING (regexp)->size_byte;
b6d6a51c
KH
934 unsigned char *s = XSTRING (regexp)->data;
935 unsigned char c;
936 while (--len >= 0)
937 {
938 switch (*s++)
939 {
940 case '.': case '*': case '+': case '?': case '[': case '^': case '$':
941 return 0;
942 case '\\':
943 if (--len < 0)
944 return 0;
945 switch (*s++)
946 {
947 case '|': case '(': case ')': case '`': case '\'': case 'b':
948 case 'B': case '<': case '>': case 'w': case 'W': case 's':
866f60fd 949 case 'S': case '=':
5679531d 950 case 'c': case 'C': /* for categoryspec and notcategoryspec */
866f60fd 951 case '1': case '2': case '3': case '4': case '5':
b6d6a51c
KH
952 case '6': case '7': case '8': case '9':
953 return 0;
954 }
955 }
956 }
957 return 1;
958}
959
ca325161 960/* Search for the n'th occurrence of STRING in the current buffer,
ca1d1d23 961 starting at position POS and stopping at position LIM,
b819a390 962 treating STRING as a literal string if RE is false or as
ca1d1d23
JB
963 a regular expression if RE is true.
964
965 If N is positive, searching is forward and LIM must be greater than POS.
966 If N is negative, searching is backward and LIM must be less than POS.
967
facdc750 968 Returns -x if x occurrences remain to be found (x > 0),
ca1d1d23 969 or else the position at the beginning of the Nth occurrence
b819a390
RS
970 (if searching backward) or the end (if searching forward).
971
972 POSIX is nonzero if we want full backtracking (POSIX style)
973 for this pattern. 0 means backtrack only enough to get a valid match. */
ca1d1d23 974
facdc750
RS
975#define TRANSLATE(trt, d) \
976 (! NILP (trt) ? XINT (Faref (trt, make_number (d))) : (d))
977
b819a390 978static int
9f43ad85
RS
979search_buffer (string, pos, pos_byte, lim, lim_byte, n,
980 RE, trt, inverse_trt, posix)
ca1d1d23
JB
981 Lisp_Object string;
982 int pos;
9f43ad85 983 int pos_byte;
ca1d1d23 984 int lim;
9f43ad85 985 int lim_byte;
ca1d1d23
JB
986 int n;
987 int RE;
facdc750
RS
988 Lisp_Object trt;
989 Lisp_Object inverse_trt;
b819a390 990 int posix;
ca1d1d23
JB
991{
992 int len = XSTRING (string)->size;
0c8533c6 993 int len_byte = XSTRING (string)->size_byte;
facdc750 994 register int i;
ca1d1d23 995
7074fde6
FP
996 if (running_asynch_code)
997 save_search_regs ();
998
ca1d1d23 999 /* Null string is found at starting position. */
3f57a499 1000 if (len == 0)
ca325161
RS
1001 {
1002 set_search_regs (pos, 0);
1003 return pos;
1004 }
3f57a499
RS
1005
1006 /* Searching 0 times means don't move. */
1007 if (n == 0)
ca1d1d23
JB
1008 return pos;
1009
b6d6a51c 1010 if (RE && !trivial_regexp_p (string))
ca1d1d23 1011 {
facdc750
RS
1012 unsigned char *p1, *p2;
1013 int s1, s2;
487282dc
KH
1014 struct re_pattern_buffer *bufp;
1015
0c8533c6
RS
1016 bufp = compile_pattern (string, &search_regs, trt, posix,
1017 !NILP (current_buffer->enable_multibyte_characters));
ca1d1d23 1018
ca1d1d23
JB
1019 immediate_quit = 1; /* Quit immediately if user types ^G,
1020 because letting this function finish
1021 can take too long. */
1022 QUIT; /* Do a pending quit right away,
1023 to avoid paradoxical behavior */
1024 /* Get pointers and sizes of the two strings
1025 that make up the visible portion of the buffer. */
1026
1027 p1 = BEGV_ADDR;
fa8ed3e0 1028 s1 = GPT_BYTE - BEGV_BYTE;
ca1d1d23 1029 p2 = GAP_END_ADDR;
fa8ed3e0 1030 s2 = ZV_BYTE - GPT_BYTE;
ca1d1d23
JB
1031 if (s1 < 0)
1032 {
1033 p2 = p1;
fa8ed3e0 1034 s2 = ZV_BYTE - BEGV_BYTE;
ca1d1d23
JB
1035 s1 = 0;
1036 }
1037 if (s2 < 0)
1038 {
fa8ed3e0 1039 s1 = ZV_BYTE - BEGV_BYTE;
ca1d1d23
JB
1040 s2 = 0;
1041 }
8bb43c28
RS
1042 re_match_object = Qnil;
1043
ca1d1d23
JB
1044 while (n < 0)
1045 {
42db823b 1046 int val;
487282dc 1047 val = re_search_2 (bufp, (char *) p1, s1, (char *) p2, s2,
4996330b
KH
1048 pos_byte - BEGV_BYTE, lim_byte - pos_byte,
1049 &search_regs,
42db823b 1050 /* Don't allow match past current point */
4996330b 1051 pos_byte - BEGV_BYTE);
ca1d1d23 1052 if (val == -2)
b6d6a51c
KH
1053 {
1054 matcher_overflow ();
1055 }
ca1d1d23
JB
1056 if (val >= 0)
1057 {
4746118a 1058 for (i = 0; i < search_regs.num_regs; i++)
ca1d1d23
JB
1059 if (search_regs.start[i] >= 0)
1060 {
fa8ed3e0
RS
1061 search_regs.start[i]
1062 = BYTE_TO_CHAR (search_regs.start[i] + BEGV_BYTE);
1063 search_regs.end[i]
1064 = BYTE_TO_CHAR (search_regs.end[i] + BEGV_BYTE);
ca1d1d23 1065 }
a3668d92 1066 XSETBUFFER (last_thing_searched, current_buffer);
ca1d1d23
JB
1067 /* Set pos to the new position. */
1068 pos = search_regs.start[0];
1069 }
1070 else
1071 {
1072 immediate_quit = 0;
1073 return (n);
1074 }
1075 n++;
1076 }
1077 while (n > 0)
1078 {
42db823b 1079 int val;
487282dc 1080 val = re_search_2 (bufp, (char *) p1, s1, (char *) p2, s2,
4996330b
KH
1081 pos_byte - BEGV_BYTE, lim_byte - pos_byte,
1082 &search_regs,
1083 lim_byte - BEGV_BYTE);
ca1d1d23 1084 if (val == -2)
b6d6a51c
KH
1085 {
1086 matcher_overflow ();
1087 }
ca1d1d23
JB
1088 if (val >= 0)
1089 {
4746118a 1090 for (i = 0; i < search_regs.num_regs; i++)
ca1d1d23
JB
1091 if (search_regs.start[i] >= 0)
1092 {
fa8ed3e0
RS
1093 search_regs.start[i]
1094 = BYTE_TO_CHAR (search_regs.start[i] + BEGV_BYTE);
1095 search_regs.end[i]
1096 = BYTE_TO_CHAR (search_regs.end[i] + BEGV_BYTE);
ca1d1d23 1097 }
a3668d92 1098 XSETBUFFER (last_thing_searched, current_buffer);
ca1d1d23
JB
1099 pos = search_regs.end[0];
1100 }
1101 else
1102 {
1103 immediate_quit = 0;
1104 return (0 - n);
1105 }
1106 n--;
1107 }
1108 immediate_quit = 0;
1109 return (pos);
1110 }
1111 else /* non-RE case */
1112 {
facdc750
RS
1113 unsigned char *raw_pattern, *pat;
1114 int raw_pattern_size;
1115 int raw_pattern_size_byte;
1116 unsigned char *patbuf;
1117 int multibyte = !NILP (current_buffer->enable_multibyte_characters);
1118 unsigned char *base_pat = XSTRING (string)->data;
1119 int charset_base = -1;
1120 int simple = 1;
1121
1122 /* MULTIBYTE says whether the text to be searched is multibyte.
1123 We must convert PATTERN to match that, or we will not really
1124 find things right. */
1125
1126 if (multibyte == STRING_MULTIBYTE (string))
1127 {
1128 raw_pattern = (char *) XSTRING (string)->data;
1129 raw_pattern_size = XSTRING (string)->size;
1130 raw_pattern_size_byte = XSTRING (string)->size_byte;
1131 }
1132 else if (multibyte)
1133 {
1134 raw_pattern_size = XSTRING (string)->size;
1135 raw_pattern_size_byte
1136 = count_size_as_multibyte (XSTRING (string)->data,
1137 raw_pattern_size);
1138 raw_pattern = (char *) alloca (raw_pattern_size_byte + 1);
1139 copy_text (XSTRING (string)->data, raw_pattern,
1140 XSTRING (string)->size, 0, 1);
1141 }
1142 else
1143 {
1144 /* Converting multibyte to single-byte.
1145
1146 ??? Perhaps this conversion should be done in a special way
1147 by subtracting nonascii-insert-offset from each non-ASCII char,
1148 so that only the multibyte chars which really correspond to
1149 the chosen single-byte character set can possibly match. */
1150 raw_pattern_size = XSTRING (string)->size;
1151 raw_pattern_size_byte = XSTRING (string)->size;
1152 raw_pattern = (char *) alloca (raw_pattern_size + 1);
1153 copy_text (XSTRING (string)->data, raw_pattern,
1154 XSTRING (string)->size_byte, 1, 0);
1155 }
1156
1157 /* Copy and optionally translate the pattern. */
1158 len = raw_pattern_size;
1159 len_byte = raw_pattern_size_byte;
1160 patbuf = (unsigned char *) alloca (len_byte);
1161 pat = patbuf;
1162 base_pat = raw_pattern;
1163 if (multibyte)
1164 {
1165 while (--len >= 0)
1166 {
1167 unsigned char workbuf[4], *str;
1168 int c, translated;
1169 int in_charlen, charlen;
1170
1171 /* If we got here and the RE flag is set, it's because we're
1172 dealing with a regexp known to be trivial, so the backslash
1173 just quotes the next character. */
1174 if (RE && *base_pat == '\\')
1175 {
1176 len--;
1177 len_byte--;
1178 base_pat++;
1179 }
1180
1181 c = STRING_CHAR_AND_LENGTH (base_pat, len_byte, in_charlen);
1182 /* Translate the character, if requested. */
1183 translated = TRANSLATE (trt, c);
1184 /* If translation changed the byte-length, go back
1185 to the original character. */
1186 charlen = CHAR_STRING (translated, workbuf, str);
1187 if (in_charlen != charlen)
1188 {
1189 translated = c;
1190 charlen = CHAR_STRING (c, workbuf, str);
1191 }
1192
1193 /* Did this char actually get translated?
1194 Would any other char get translated into it? */
1195 if (translated != c
1196 || TRANSLATE (inverse_trt, c) != c)
1197 {
1198 /* Keep track of which character set row
1199 contains the characters that need translation. */
1200 int charset_base_code = c & ~0xff;
1201 if (charset_base == -1)
1202 charset_base = charset_base_code;
1203 else if (charset_base != charset_base_code)
1204 /* If two different rows appear, needing translation,
1205 then we cannot use boyer_moore search. */
1206 simple = 0;
1207 /* ??? Handa: this must do simple = 0
1208 if c is a composite character. */
1209 }
1210
1211 /* Store this character into the translated pattern. */
1212 bcopy (str, pat, charlen);
1213 pat += charlen;
1214 base_pat += in_charlen;
1215 len_byte -= in_charlen;
1216 }
1217 }
1218 else
1219 {
1220 while (--len >= 0)
1221 {
1222 int c, translated;
1223
1224 /* If we got here and the RE flag is set, it's because we're
1225 dealing with a regexp known to be trivial, so the backslash
1226 just quotes the next character. */
1227 if (RE && *base_pat == '\\')
1228 {
1229 len--;
1230 base_pat++;
1231 }
1232 c = *base_pat++;
1233 translated = TRANSLATE (trt, c);
1234
1235 /* Did this char actually get translated?
1236 Would any other char get translated into it? */
1237 if (translated != c
1238 || TRANSLATE (inverse_trt, c) != c)
1239 {
1240 /* Keep track of which character set row
1241 contains the characters that need translation. */
1242 int charset_base_code = c & ~0xff;
1243 if (charset_base == -1)
1244 charset_base = charset_base_code;
1245 else if (charset_base != charset_base_code)
1246 /* If two different rows appear, needing translation,
1247 then we cannot use boyer_moore search. */
1248 simple = 0;
1249 }
1250 *pat++ = translated;
1251 }
1252 }
1253
1254 len_byte = pat - patbuf;
1255 len = raw_pattern_size;
1256 pat = base_pat = patbuf;
1257
1258 if (simple)
1259 return boyer_moore (n, pat, len, len_byte, trt, inverse_trt,
1260 pos, pos_byte, lim, lim_byte);
1261 else
1262 return simple_search (n, pat, len, len_byte, trt,
1263 pos, pos_byte, lim, lim_byte);
1264 }
1265}
1266\f
1267/* Do a simple string search N times for the string PAT,
1268 whose length is LEN/LEN_BYTE,
1269 from buffer position POS/POS_BYTE until LIM/LIM_BYTE.
1270 TRT is the translation table.
f8bd51c4 1271
facdc750
RS
1272 Return the character position where the match is found.
1273 Otherwise, if M matches remained to be found, return -M.
f8bd51c4 1274
facdc750
RS
1275 This kind of search works regardless of what is in PAT and
1276 regardless of what is in TRT. It is used in cases where
1277 boyer_moore cannot work. */
1278
1279static int
1280simple_search (n, pat, len, len_byte, trt, pos, pos_byte, lim, lim_byte)
1281 int n;
1282 unsigned char *pat;
1283 int len, len_byte;
1284 Lisp_Object trt;
1285 int pos, pos_byte;
1286 int lim, lim_byte;
1287{
1288 int multibyte = ! NILP (current_buffer->enable_multibyte_characters);
1289
1290 if (lim > pos && multibyte)
1291 while (n > 0)
1292 {
1293 while (1)
f8bd51c4 1294 {
facdc750
RS
1295 /* Try matching at position POS. */
1296 int this_pos = pos;
1297 int this_pos_byte = pos_byte;
1298 int this_len = len;
1299 int this_len_byte = len_byte;
1300 unsigned char *p = pat;
1301 if (pos + len > lim)
1302 goto stop;
1303
1304 while (this_len > 0)
1305 {
1306 int charlen, buf_charlen;
1307 int pat_ch = STRING_CHAR_AND_LENGTH (p, this_len_byte, charlen);
1308 int buf_ch;
1309
1310 this_len_byte -= charlen;
1311 this_len--;
1312 p += charlen;
1313
1314 buf_ch = STRING_CHAR_AND_LENGTH (BYTE_POS_ADDR (this_pos_byte),
1315 ZV_BYTE - this_pos_byte,
1316 buf_charlen);
1317 this_pos_byte += buf_charlen;
1318 this_pos++;
1319 buf_ch = TRANSLATE (trt, buf_ch);
1320
1321 if (buf_ch != pat_ch)
1322 break;
1323 }
1324
1325 if (this_len == 0)
1326 {
1327 pos += len;
1328 pos_byte += len_byte;
1329 break;
1330 }
1331
1332 INC_BOTH (pos, pos_byte);
f8bd51c4 1333 }
facdc750
RS
1334
1335 n--;
1336 }
1337 else if (lim > pos)
1338 while (n > 0)
1339 {
1340 while (1)
f8bd51c4 1341 {
facdc750
RS
1342 /* Try matching at position POS. */
1343 int this_pos = pos;
1344 int this_len = len;
1345 unsigned char *p = pat;
1346
1347 if (pos + len > lim)
1348 goto stop;
1349
1350 while (this_len > 0)
1351 {
1352 int pat_ch = *p++;
1353 int buf_ch = FETCH_BYTE (this_pos);
1354 this_len--;
1355 this_pos++;
1356 buf_ch = TRANSLATE (trt, buf_ch);
1357
1358 if (buf_ch != pat_ch)
1359 break;
1360 }
1361
1362 if (this_len == 0)
1363 {
1364 pos += len;
1365 break;
1366 }
1367
1368 pos++;
f8bd51c4 1369 }
facdc750
RS
1370
1371 n--;
1372 }
1373 /* Backwards search. */
1374 else if (lim < pos && multibyte)
1375 while (n < 0)
1376 {
1377 while (1)
f8bd51c4 1378 {
facdc750
RS
1379 /* Try matching at position POS. */
1380 int this_pos = pos - len;
1381 int this_pos_byte = pos_byte - len_byte;
1382 int this_len = len;
1383 int this_len_byte = len_byte;
1384 unsigned char *p = pat;
1385
1386 if (pos - len < lim)
1387 goto stop;
1388
1389 while (this_len > 0)
1390 {
1391 int charlen, buf_charlen;
1392 int pat_ch = STRING_CHAR_AND_LENGTH (p, this_len_byte, charlen);
1393 int buf_ch;
1394
1395 this_len_byte -= charlen;
1396 this_len--;
1397 p += charlen;
1398
1399 buf_ch = STRING_CHAR_AND_LENGTH (BYTE_POS_ADDR (this_pos_byte),
1400 ZV_BYTE - this_pos_byte,
1401 buf_charlen);
1402 this_pos_byte += buf_charlen;
1403 this_pos++;
1404 buf_ch = TRANSLATE (trt, buf_ch);
1405
1406 if (buf_ch != pat_ch)
1407 break;
1408 }
1409
1410 if (this_len == 0)
1411 {
1412 pos -= len;
1413 pos_byte -= len_byte;
1414 break;
1415 }
1416
1417 DEC_BOTH (pos, pos_byte);
f8bd51c4
KH
1418 }
1419
facdc750
RS
1420 n++;
1421 }
1422 else if (lim < pos)
1423 while (n < 0)
1424 {
1425 while (1)
b6d6a51c 1426 {
facdc750
RS
1427 /* Try matching at position POS. */
1428 int this_pos = pos - len;
1429 int this_len = len;
1430 unsigned char *p = pat;
1431
1432 if (pos - len < lim)
1433 goto stop;
1434
1435 while (this_len > 0)
1436 {
1437 int pat_ch = *p++;
1438 int buf_ch = FETCH_BYTE (this_pos);
1439 this_len--;
1440 this_pos++;
1441 buf_ch = TRANSLATE (trt, buf_ch);
1442
1443 if (buf_ch != pat_ch)
1444 break;
1445 }
1446
1447 if (this_len == 0)
b6d6a51c 1448 {
facdc750
RS
1449 pos -= len;
1450 break;
b6d6a51c 1451 }
facdc750
RS
1452
1453 pos--;
b6d6a51c 1454 }
facdc750
RS
1455
1456 n++;
b6d6a51c 1457 }
facdc750
RS
1458
1459 stop:
1460 if (n == 0)
1461 return pos;
1462 else if (n > 0)
1463 return -n;
1464 else
1465 return n;
1466}
1467\f
1468/* Do Boyer-Moore search N times for the string PAT,
1469 whose length is LEN/LEN_BYTE,
1470 from buffer position POS/POS_BYTE until LIM/LIM_BYTE.
1471 DIRECTION says which direction we search in.
1472 TRT and INVERSE_TRT are translation tables.
1473
1474 This kind of search works if all the characters in PAT that have
1475 nontrivial translation are the same aside from the last byte. This
1476 makes it possible to translate just the last byte of a character,
1477 and do so after just a simple test of the context.
1478
1479 If that criterion is not satisfied, do not call this function. */
1480
1481static int
1482boyer_moore (n, base_pat, len, len_byte, trt, inverse_trt,
1483 pos, pos_byte, lim, lim_byte)
1484 int n;
1485 unsigned char *base_pat;
1486 int len, len_byte;
1487 Lisp_Object trt;
1488 Lisp_Object inverse_trt;
1489 int pos, pos_byte;
1490 int lim, lim_byte;
1491{
1492 int direction = ((n > 0) ? 1 : -1);
1493 register int dirlen;
1494 int infinity, limit, k, stride_for_teases;
1495 register int *BM_tab;
1496 int *BM_tab_base;
1497 register unsigned char *cursor, *p_limit;
1498 register int i, j;
1499 unsigned char *pat;
1500 int multibyte = ! NILP (current_buffer->enable_multibyte_characters);
1501
1502 unsigned char simple_translate[0400];
1503 int translate_prev_byte;
1504 int translate_anteprev_byte;
1505
1506#ifdef C_ALLOCA
1507 int BM_tab_space[0400];
1508 BM_tab = &BM_tab_space[0];
1509#else
1510 BM_tab = (int *) alloca (0400 * sizeof (int));
1511#endif
1512 /* The general approach is that we are going to maintain that we know */
1513 /* the first (closest to the present position, in whatever direction */
1514 /* we're searching) character that could possibly be the last */
1515 /* (furthest from present position) character of a valid match. We */
1516 /* advance the state of our knowledge by looking at that character */
1517 /* and seeing whether it indeed matches the last character of the */
1518 /* pattern. If it does, we take a closer look. If it does not, we */
1519 /* move our pointer (to putative last characters) as far as is */
1520 /* logically possible. This amount of movement, which I call a */
1521 /* stride, will be the length of the pattern if the actual character */
1522 /* appears nowhere in the pattern, otherwise it will be the distance */
1523 /* from the last occurrence of that character to the end of the */
1524 /* pattern. */
1525 /* As a coding trick, an enormous stride is coded into the table for */
1526 /* characters that match the last character. This allows use of only */
1527 /* a single test, a test for having gone past the end of the */
1528 /* permissible match region, to test for both possible matches (when */
1529 /* the stride goes past the end immediately) and failure to */
1530 /* match (where you get nudged past the end one stride at a time). */
1531
1532 /* Here we make a "mickey mouse" BM table. The stride of the search */
1533 /* is determined only by the last character of the putative match. */
1534 /* If that character does not match, we will stride the proper */
1535 /* distance to propose a match that superimposes it on the last */
1536 /* instance of a character that matches it (per trt), or misses */
1537 /* it entirely if there is none. */
1538
1539 dirlen = len_byte * direction;
1540 infinity = dirlen - (lim_byte + pos_byte + len_byte + len_byte) * direction;
1541 if (direction < 0)
1542 pat = (base_pat += len_byte - 1);
1543 else
1544 pat = base_pat;
1545 BM_tab_base = BM_tab;
1546 BM_tab += 0400;
1547 j = dirlen; /* to get it in a register */
1548 /* A character that does not appear in the pattern induces a */
1549 /* stride equal to the pattern length. */
1550 while (BM_tab_base != BM_tab)
1551 {
1552 *--BM_tab = j;
1553 *--BM_tab = j;
1554 *--BM_tab = j;
1555 *--BM_tab = j;
1556 }
1557
1558 /* We use this for translation, instead of TRT itself.
1559 We fill this in to handle the characters that actually
1560 occur in the pattern. Others don't matter anyway! */
1561 bzero (simple_translate, sizeof simple_translate);
1562 for (i = 0; i < 0400; i++)
1563 simple_translate[i] = i;
1564
1565 i = 0;
1566 while (i != infinity)
1567 {
1568 unsigned char *ptr = pat + i;
1569 i += direction;
1570 if (i == dirlen)
1571 i = infinity;
1572 if (! NILP (trt))
ca1d1d23 1573 {
facdc750
RS
1574 int ch;
1575 int this_translated = 1;
1576
1577 if (multibyte
1578 && (ptr + 1 == pat + len_byte || CHAR_HEAD_P (ptr[1])))
ca1d1d23 1579 {
facdc750
RS
1580 unsigned char *charstart = ptr;
1581 while (! CHAR_HEAD_P (*charstart))
1582 charstart--;
1583 if (! CHAR_HEAD_P (*ptr))
1584 {
1585 translate_prev_byte = ptr[-1];
1586 if (! CHAR_HEAD_P (translate_prev_byte))
1587 translate_anteprev_byte = ptr[-2];
1588 }
1589 ch = STRING_CHAR (charstart, ptr - charstart + 1);
1590 ch = TRANSLATE (trt, ch);
ca1d1d23 1591 }
facdc750
RS
1592 else if (!multibyte)
1593 ch = TRANSLATE (trt, *ptr);
ca1d1d23
JB
1594 else
1595 {
facdc750
RS
1596 ch = *ptr;
1597 this_translated = 0;
ca1d1d23 1598 }
facdc750
RS
1599
1600 k = j = (unsigned char) ch;
1601 if (i == infinity)
1602 stride_for_teases = BM_tab[j];
1603 BM_tab[j] = dirlen - i;
1604 /* A translation table is accompanied by its inverse -- see */
1605 /* comment following downcase_table for details */
1606 if (this_translated)
1607 while (1)
1608 {
1609 ch = TRANSLATE (inverse_trt, ch);
1610 /* For all the characters that map into K,
1611 set up simple_translate to map them into K. */
1612 simple_translate[(unsigned char) ch] = k;
1613 if ((unsigned char) ch == k)
1614 break;
1615 BM_tab[(unsigned char) ch] = dirlen - i;
1616 }
1617 }
1618 else
1619 {
1620 j = *ptr;
1621
1622 if (i == infinity)
1623 stride_for_teases = BM_tab[j];
1624 BM_tab[j] = dirlen - i;
ca1d1d23 1625 }
facdc750
RS
1626 /* stride_for_teases tells how much to stride if we get a */
1627 /* match on the far character but are subsequently */
1628 /* disappointed, by recording what the stride would have been */
1629 /* for that character if the last character had been */
1630 /* different. */
1631 }
1632 infinity = dirlen - infinity;
1633 pos_byte += dirlen - ((direction > 0) ? direction : 0);
1634 /* loop invariant - POS_BYTE points at where last char (first
1635 char if reverse) of pattern would align in a possible match. */
1636 while (n != 0)
1637 {
1638 int tail_end;
1639 unsigned char *tail_end_ptr;
1640
1641 /* It's been reported that some (broken) compiler thinks that
1642 Boolean expressions in an arithmetic context are unsigned.
1643 Using an explicit ?1:0 prevents this. */
1644 if ((lim_byte - pos_byte - ((direction > 0) ? 1 : 0)) * direction
1645 < 0)
1646 return (n * (0 - direction));
1647 /* First we do the part we can by pointers (maybe nothing) */
1648 QUIT;
1649 pat = base_pat;
1650 limit = pos_byte - dirlen + direction;
1651 limit = ((direction > 0)
1652 ? BUFFER_CEILING_OF (limit)
1653 : BUFFER_FLOOR_OF (limit));
1654 /* LIMIT is now the last (not beyond-last!) value POS_BYTE
1655 can take on without hitting edge of buffer or the gap. */
1656 limit = ((direction > 0)
1657 ? min (lim_byte - 1, min (limit, pos_byte + 20000))
1658 : max (lim_byte, max (limit, pos_byte - 20000)));
1659 tail_end = BUFFER_CEILING_OF (pos_byte) + 1;
1660 tail_end_ptr = BYTE_POS_ADDR (tail_end);
1661
1662 if ((limit - pos_byte) * direction > 20)
ca1d1d23 1663 {
facdc750
RS
1664 unsigned char *p2;
1665
1666 p_limit = BYTE_POS_ADDR (limit);
1667 p2 = (cursor = BYTE_POS_ADDR (pos_byte));
1668 /* In this loop, pos + cursor - p2 is the surrogate for pos */
1669 while (1) /* use one cursor setting as long as i can */
ca1d1d23 1670 {
facdc750 1671 if (direction > 0) /* worth duplicating */
ca1d1d23 1672 {
facdc750
RS
1673 /* Use signed comparison if appropriate
1674 to make cursor+infinity sure to be > p_limit.
1675 Assuming that the buffer lies in a range of addresses
1676 that are all "positive" (as ints) or all "negative",
1677 either kind of comparison will work as long
1678 as we don't step by infinity. So pick the kind
1679 that works when we do step by infinity. */
1680 if ((EMACS_INT) (p_limit + infinity) > (EMACS_INT) p_limit)
1681 while ((EMACS_INT) cursor <= (EMACS_INT) p_limit)
1682 cursor += BM_tab[*cursor];
ca1d1d23 1683 else
facdc750
RS
1684 while ((EMACS_UINT) cursor <= (EMACS_UINT) p_limit)
1685 cursor += BM_tab[*cursor];
1686 }
1687 else
1688 {
1689 if ((EMACS_INT) (p_limit + infinity) < (EMACS_INT) p_limit)
1690 while ((EMACS_INT) cursor >= (EMACS_INT) p_limit)
1691 cursor += BM_tab[*cursor];
1692 else
1693 while ((EMACS_UINT) cursor >= (EMACS_UINT) p_limit)
1694 cursor += BM_tab[*cursor];
1695 }
ca1d1d23 1696/* If you are here, cursor is beyond the end of the searched region. */
facdc750
RS
1697/* This can happen if you match on the far character of the pattern, */
1698/* because the "stride" of that character is infinity, a number able */
1699/* to throw you well beyond the end of the search. It can also */
1700/* happen if you fail to match within the permitted region and would */
1701/* otherwise try a character beyond that region */
1702 if ((cursor - p_limit) * direction <= len_byte)
1703 break; /* a small overrun is genuine */
1704 cursor -= infinity; /* large overrun = hit */
1705 i = dirlen - direction;
1706 if (! NILP (trt))
1707 {
1708 while ((i -= direction) + direction != 0)
ca1d1d23 1709 {
facdc750
RS
1710 int ch;
1711 cursor -= direction;
1712 /* Translate only the last byte of a character. */
1713 if (! multibyte
1714 || ((cursor == tail_end_ptr
1715 || CHAR_HEAD_P (cursor[1]))
1716 && (CHAR_HEAD_P (cursor[0])
1717 || (translate_prev_byte == cursor[-1]
1718 && (CHAR_HEAD_P (translate_prev_byte)
1719 || translate_anteprev_byte == cursor[-2])))))
1720 ch = simple_translate[*cursor];
1721 else
1722 ch = *cursor;
1723 if (pat[i] != ch)
1724 break;
ca1d1d23 1725 }
facdc750
RS
1726 }
1727 else
1728 {
1729 while ((i -= direction) + direction != 0)
ca1d1d23 1730 {
facdc750
RS
1731 cursor -= direction;
1732 if (pat[i] != *cursor)
1733 break;
ca1d1d23 1734 }
facdc750
RS
1735 }
1736 cursor += dirlen - i - direction; /* fix cursor */
1737 if (i + direction == 0)
1738 {
1739 int position;
0c8533c6 1740
facdc750 1741 cursor -= direction;
1113d9db 1742
facdc750
RS
1743 position = pos_byte + cursor - p2 + ((direction > 0)
1744 ? 1 - len_byte : 0);
1745 set_search_regs (position, len_byte);
ca325161 1746
facdc750
RS
1747 if ((n -= direction) != 0)
1748 cursor += dirlen; /* to resume search */
ca1d1d23 1749 else
facdc750
RS
1750 return ((direction > 0)
1751 ? search_regs.end[0] : search_regs.start[0]);
ca1d1d23 1752 }
facdc750
RS
1753 else
1754 cursor += stride_for_teases; /* <sigh> we lose - */
ca1d1d23 1755 }
facdc750
RS
1756 pos_byte += cursor - p2;
1757 }
1758 else
1759 /* Now we'll pick up a clump that has to be done the hard */
1760 /* way because it covers a discontinuity */
1761 {
1762 limit = ((direction > 0)
1763 ? BUFFER_CEILING_OF (pos_byte - dirlen + 1)
1764 : BUFFER_FLOOR_OF (pos_byte - dirlen - 1));
1765 limit = ((direction > 0)
1766 ? min (limit + len_byte, lim_byte - 1)
1767 : max (limit - len_byte, lim_byte));
1768 /* LIMIT is now the last value POS_BYTE can have
1769 and still be valid for a possible match. */
1770 while (1)
ca1d1d23 1771 {
facdc750
RS
1772 /* This loop can be coded for space rather than */
1773 /* speed because it will usually run only once. */
1774 /* (the reach is at most len + 21, and typically */
1775 /* does not exceed len) */
1776 while ((limit - pos_byte) * direction >= 0)
1777 pos_byte += BM_tab[FETCH_BYTE (pos_byte)];
1778 /* now run the same tests to distinguish going off the */
1779 /* end, a match or a phony match. */
1780 if ((pos_byte - limit) * direction <= len_byte)
1781 break; /* ran off the end */
1782 /* Found what might be a match.
1783 Set POS_BYTE back to last (first if reverse) pos. */
1784 pos_byte -= infinity;
1785 i = dirlen - direction;
1786 while ((i -= direction) + direction != 0)
ca1d1d23 1787 {
facdc750
RS
1788 int ch;
1789 unsigned char *ptr;
1790 pos_byte -= direction;
1791 ptr = BYTE_POS_ADDR (pos_byte);
1792 /* Translate only the last byte of a character. */
1793 if (! multibyte
1794 || ((ptr == tail_end_ptr
1795 || CHAR_HEAD_P (ptr[1]))
1796 && (CHAR_HEAD_P (ptr[0])
1797 || (translate_prev_byte == ptr[-1]
1798 && (CHAR_HEAD_P (translate_prev_byte)
1799 || translate_anteprev_byte == ptr[-2])))))
1800 ch = simple_translate[*ptr];
1801 else
1802 ch = *ptr;
1803 if (pat[i] != ch)
1804 break;
1805 }
1806 /* Above loop has moved POS_BYTE part or all the way
1807 back to the first pos (last pos if reverse).
1808 Set it once again at the last (first if reverse) char. */
1809 pos_byte += dirlen - i- direction;
1810 if (i + direction == 0)
1811 {
1812 int position;
1813 pos_byte -= direction;
1113d9db 1814
facdc750 1815 position = pos_byte + ((direction > 0) ? 1 - len_byte : 0);
0c8533c6 1816
facdc750 1817 set_search_regs (position, len_byte);
ca325161 1818
facdc750
RS
1819 if ((n -= direction) != 0)
1820 pos_byte += dirlen; /* to resume search */
ca1d1d23 1821 else
facdc750
RS
1822 return ((direction > 0)
1823 ? search_regs.end[0] : search_regs.start[0]);
ca1d1d23 1824 }
facdc750
RS
1825 else
1826 pos_byte += stride_for_teases;
1827 }
1828 }
1829 /* We have done one clump. Can we continue? */
1830 if ((lim_byte - pos_byte) * direction < 0)
1831 return ((0 - n) * direction);
ca1d1d23 1832 }
facdc750 1833 return BYTE_TO_CHAR (pos_byte);
ca1d1d23 1834}
ca325161 1835
fa8ed3e0 1836/* Record beginning BEG_BYTE and end BEG_BYTE + NBYTES
ca325161
RS
1837 for a match just found in the current buffer. */
1838
1839static void
fa8ed3e0
RS
1840set_search_regs (beg_byte, nbytes)
1841 int beg_byte, nbytes;
ca325161
RS
1842{
1843 /* Make sure we have registers in which to store
1844 the match position. */
1845 if (search_regs.num_regs == 0)
1846 {
2d4a771a
RS
1847 search_regs.start = (regoff_t *) xmalloc (2 * sizeof (regoff_t));
1848 search_regs.end = (regoff_t *) xmalloc (2 * sizeof (regoff_t));
487282dc 1849 search_regs.num_regs = 2;
ca325161
RS
1850 }
1851
fa8ed3e0
RS
1852 search_regs.start[0] = BYTE_TO_CHAR (beg_byte);
1853 search_regs.end[0] = BYTE_TO_CHAR (beg_byte + nbytes);
a3668d92 1854 XSETBUFFER (last_thing_searched, current_buffer);
ca325161 1855}
ca1d1d23
JB
1856\f
1857/* Given a string of words separated by word delimiters,
1858 compute a regexp that matches those exact words
1859 separated by arbitrary punctuation. */
1860
1861static Lisp_Object
1862wordify (string)
1863 Lisp_Object string;
1864{
1865 register unsigned char *p, *o;
0c8533c6 1866 register int i, i_byte, len, punct_count = 0, word_count = 0;
ca1d1d23 1867 Lisp_Object val;
0c8533c6
RS
1868 int prev_c = 0;
1869 int adjust;
ca1d1d23
JB
1870
1871 CHECK_STRING (string, 0);
1872 p = XSTRING (string)->data;
1873 len = XSTRING (string)->size;
1874
0c8533c6
RS
1875 for (i = 0, i_byte = 0; i < len; )
1876 {
1877 int c;
1878
1879 if (STRING_MULTIBYTE (string))
1880 FETCH_STRING_CHAR_ADVANCE (c, string, i, i_byte);
1881 else
1882 c = XSTRING (string)->data[i++];
1883
1884 if (SYNTAX (c) != Sword)
1885 {
1886 punct_count++;
1887 if (i > 0 && SYNTAX (prev_c) == Sword)
1888 word_count++;
1889 }
ca1d1d23 1890
0c8533c6
RS
1891 prev_c = c;
1892 }
1893
1894 if (SYNTAX (prev_c) == Sword)
1895 word_count++;
1896 if (!word_count)
1897 return build_string ("");
1898
1899 adjust = - punct_count + 5 * (word_count - 1) + 4;
1900 val = make_uninit_multibyte_string (len + adjust,
1901 XSTRING (string)->size_byte + adjust);
ca1d1d23
JB
1902
1903 o = XSTRING (val)->data;
1904 *o++ = '\\';
1905 *o++ = 'b';
1906
0c8533c6 1907 for (i = 0; i < XSTRING (val)->size_byte; i++)
ca1d1d23
JB
1908 if (SYNTAX (p[i]) == Sword)
1909 *o++ = p[i];
1910 else if (i > 0 && SYNTAX (p[i-1]) == Sword && --word_count)
1911 {
1912 *o++ = '\\';
1913 *o++ = 'W';
1914 *o++ = '\\';
1915 *o++ = 'W';
1916 *o++ = '*';
1917 }
1918
1919 *o++ = '\\';
1920 *o++ = 'b';
1921
1922 return val;
1923}
1924\f
1925DEFUN ("search-backward", Fsearch_backward, Ssearch_backward, 1, 4,
6af43974 1926 "MSearch backward: ",
ca1d1d23
JB
1927 "Search backward from point for STRING.\n\
1928Set point to the beginning of the occurrence found, and return point.\n\
1929An optional second argument bounds the search; it is a buffer position.\n\
1930The match found must not extend before that position.\n\
1931Optional third argument, if t, means if fail just return nil (no error).\n\
1932 If not nil and not t, position at limit of search and return nil.\n\
1933Optional fourth argument is repeat count--search for successive occurrences.\n\
1934See also the functions `match-beginning', `match-end' and `replace-match'.")
1935 (string, bound, noerror, count)
1936 Lisp_Object string, bound, noerror, count;
1937{
b819a390 1938 return search_command (string, bound, noerror, count, -1, 0, 0);
ca1d1d23
JB
1939}
1940
6af43974 1941DEFUN ("search-forward", Fsearch_forward, Ssearch_forward, 1, 4, "MSearch: ",
ca1d1d23
JB
1942 "Search forward from point for STRING.\n\
1943Set point to the end of the occurrence found, and return point.\n\
1944An optional second argument bounds the search; it is a buffer position.\n\
1945The match found must not extend after that position. nil is equivalent\n\
1946 to (point-max).\n\
1947Optional third argument, if t, means if fail just return nil (no error).\n\
1948 If not nil and not t, move to limit of search and return nil.\n\
1949Optional fourth argument is repeat count--search for successive occurrences.\n\
1950See also the functions `match-beginning', `match-end' and `replace-match'.")
1951 (string, bound, noerror, count)
1952 Lisp_Object string, bound, noerror, count;
1953{
b819a390 1954 return search_command (string, bound, noerror, count, 1, 0, 0);
ca1d1d23
JB
1955}
1956
1957DEFUN ("word-search-backward", Fword_search_backward, Sword_search_backward, 1, 4,
1958 "sWord search backward: ",
1959 "Search backward from point for STRING, ignoring differences in punctuation.\n\
1960Set point to the beginning of the occurrence found, and return point.\n\
1961An optional second argument bounds the search; it is a buffer position.\n\
1962The match found must not extend before that position.\n\
1963Optional third argument, if t, means if fail just return nil (no error).\n\
1964 If not nil and not t, move to limit of search and return nil.\n\
1965Optional fourth argument is repeat count--search for successive occurrences.")
1966 (string, bound, noerror, count)
1967 Lisp_Object string, bound, noerror, count;
1968{
b819a390 1969 return search_command (wordify (string), bound, noerror, count, -1, 1, 0);
ca1d1d23
JB
1970}
1971
1972DEFUN ("word-search-forward", Fword_search_forward, Sword_search_forward, 1, 4,
1973 "sWord search: ",
1974 "Search forward from point for STRING, ignoring differences in punctuation.\n\
1975Set point to the end of the occurrence found, and return point.\n\
1976An optional second argument bounds the search; it is a buffer position.\n\
1977The match found must not extend after that position.\n\
1978Optional third argument, if t, means if fail just return nil (no error).\n\
1979 If not nil and not t, move to limit of search and return nil.\n\
1980Optional fourth argument is repeat count--search for successive occurrences.")
1981 (string, bound, noerror, count)
1982 Lisp_Object string, bound, noerror, count;
1983{
b819a390 1984 return search_command (wordify (string), bound, noerror, count, 1, 1, 0);
ca1d1d23
JB
1985}
1986
1987DEFUN ("re-search-backward", Fre_search_backward, Sre_search_backward, 1, 4,
1988 "sRE search backward: ",
1989 "Search backward from point for match for regular expression REGEXP.\n\
1990Set point to the beginning of the match, and return point.\n\
1991The match found is the one starting last in the buffer\n\
19c0a730 1992and yet ending before the origin of the search.\n\
ca1d1d23
JB
1993An optional second argument bounds the search; it is a buffer position.\n\
1994The match found must start at or after that position.\n\
1995Optional third argument, if t, means if fail just return nil (no error).\n\
1996 If not nil and not t, move to limit of search and return nil.\n\
1997Optional fourth argument is repeat count--search for successive occurrences.\n\
1998See also the functions `match-beginning', `match-end' and `replace-match'.")
19c0a730
KH
1999 (regexp, bound, noerror, count)
2000 Lisp_Object regexp, bound, noerror, count;
ca1d1d23 2001{
b819a390 2002 return search_command (regexp, bound, noerror, count, -1, 1, 0);
ca1d1d23
JB
2003}
2004
2005DEFUN ("re-search-forward", Fre_search_forward, Sre_search_forward, 1, 4,
2006 "sRE search: ",
2007 "Search forward from point for regular expression REGEXP.\n\
2008Set point to the end of the occurrence found, and return point.\n\
2009An optional second argument bounds the search; it is a buffer position.\n\
2010The match found must not extend after that position.\n\
2011Optional third argument, if t, means if fail just return nil (no error).\n\
2012 If not nil and not t, move to limit of search and return nil.\n\
2013Optional fourth argument is repeat count--search for successive occurrences.\n\
2014See also the functions `match-beginning', `match-end' and `replace-match'.")
19c0a730
KH
2015 (regexp, bound, noerror, count)
2016 Lisp_Object regexp, bound, noerror, count;
ca1d1d23 2017{
b819a390
RS
2018 return search_command (regexp, bound, noerror, count, 1, 1, 0);
2019}
2020
2021DEFUN ("posix-search-backward", Fposix_search_backward, Sposix_search_backward, 1, 4,
2022 "sPosix search backward: ",
2023 "Search backward from point for match for regular expression REGEXP.\n\
2024Find the longest match in accord with Posix regular expression rules.\n\
2025Set point to the beginning of the match, and return point.\n\
2026The match found is the one starting last in the buffer\n\
2027and yet ending before the origin of the search.\n\
2028An optional second argument bounds the search; it is a buffer position.\n\
2029The match found must start at or after that position.\n\
2030Optional third argument, if t, means if fail just return nil (no error).\n\
2031 If not nil and not t, move to limit of search and return nil.\n\
2032Optional fourth argument is repeat count--search for successive occurrences.\n\
2033See also the functions `match-beginning', `match-end' and `replace-match'.")
2034 (regexp, bound, noerror, count)
2035 Lisp_Object regexp, bound, noerror, count;
2036{
2037 return search_command (regexp, bound, noerror, count, -1, 1, 1);
2038}
2039
2040DEFUN ("posix-search-forward", Fposix_search_forward, Sposix_search_forward, 1, 4,
2041 "sPosix search: ",
2042 "Search forward from point for regular expression REGEXP.\n\
2043Find the longest match in accord with Posix regular expression rules.\n\
2044Set point to the end of the occurrence found, and return point.\n\
2045An optional second argument bounds the search; it is a buffer position.\n\
2046The match found must not extend after that position.\n\
2047Optional third argument, if t, means if fail just return nil (no error).\n\
2048 If not nil and not t, move to limit of search and return nil.\n\
2049Optional fourth argument is repeat count--search for successive occurrences.\n\
2050See also the functions `match-beginning', `match-end' and `replace-match'.")
2051 (regexp, bound, noerror, count)
2052 Lisp_Object regexp, bound, noerror, count;
2053{
2054 return search_command (regexp, bound, noerror, count, 1, 1, 1);
ca1d1d23
JB
2055}
2056\f
d7a5ad5f 2057DEFUN ("replace-match", Freplace_match, Sreplace_match, 1, 5, 0,
ca1d1d23
JB
2058 "Replace text matched by last search with NEWTEXT.\n\
2059If second arg FIXEDCASE is non-nil, do not alter case of replacement text.\n\
5b9cf4b2
RS
2060Otherwise maybe capitalize the whole text, or maybe just word initials,\n\
2061based on the replaced text.\n\
2062If the replaced text has only capital letters\n\
2063and has at least one multiletter word, convert NEWTEXT to all caps.\n\
2064If the replaced text has at least one word starting with a capital letter,\n\
2065then capitalize each word in NEWTEXT.\n\n\
ca1d1d23
JB
2066If third arg LITERAL is non-nil, insert NEWTEXT literally.\n\
2067Otherwise treat `\\' as special:\n\
2068 `\\&' in NEWTEXT means substitute original matched text.\n\
2069 `\\N' means substitute what matched the Nth `\\(...\\)'.\n\
2070 If Nth parens didn't match, substitute nothing.\n\
2071 `\\\\' means insert one `\\'.\n\
1113d9db 2072FIXEDCASE and LITERAL are optional arguments.\n\
080c45fd
RS
2073Leaves point at end of replacement text.\n\
2074\n\
2075The optional fourth argument STRING can be a string to modify.\n\
2076In that case, this function creates and returns a new string\n\
d7a5ad5f
RS
2077which is made by replacing the part of STRING that was matched.\n\
2078\n\
2079The optional fifth argument SUBEXP specifies a subexpression of the match.\n\
2080It says to replace just that subexpression instead of the whole match.\n\
2081This is useful only after a regular expression search or match\n\
2082since only regular expressions have distinguished subexpressions.")
2083 (newtext, fixedcase, literal, string, subexp)
2084 Lisp_Object newtext, fixedcase, literal, string, subexp;
ca1d1d23
JB
2085{
2086 enum { nochange, all_caps, cap_initial } case_action;
2087 register int pos, last;
2088 int some_multiletter_word;
97832bd0 2089 int some_lowercase;
73dc8771 2090 int some_uppercase;
208767c3 2091 int some_nonuppercase_initial;
ca1d1d23
JB
2092 register int c, prevc;
2093 int inslen;
d7a5ad5f 2094 int sub;
3e18eecf 2095 int opoint, newpoint;
ca1d1d23 2096
16fdc568 2097 CHECK_STRING (newtext, 0);
ca1d1d23 2098
080c45fd
RS
2099 if (! NILP (string))
2100 CHECK_STRING (string, 4);
2101
ca1d1d23
JB
2102 case_action = nochange; /* We tried an initialization */
2103 /* but some C compilers blew it */
4746118a
JB
2104
2105 if (search_regs.num_regs <= 0)
2106 error ("replace-match called before any match found");
2107
d7a5ad5f
RS
2108 if (NILP (subexp))
2109 sub = 0;
2110 else
2111 {
2112 CHECK_NUMBER (subexp, 3);
2113 sub = XINT (subexp);
2114 if (sub < 0 || sub >= search_regs.num_regs)
2115 args_out_of_range (subexp, make_number (search_regs.num_regs));
2116 }
2117
080c45fd
RS
2118 if (NILP (string))
2119 {
d7a5ad5f
RS
2120 if (search_regs.start[sub] < BEGV
2121 || search_regs.start[sub] > search_regs.end[sub]
2122 || search_regs.end[sub] > ZV)
2123 args_out_of_range (make_number (search_regs.start[sub]),
2124 make_number (search_regs.end[sub]));
080c45fd
RS
2125 }
2126 else
2127 {
d7a5ad5f
RS
2128 if (search_regs.start[sub] < 0
2129 || search_regs.start[sub] > search_regs.end[sub]
2130 || search_regs.end[sub] > XSTRING (string)->size)
2131 args_out_of_range (make_number (search_regs.start[sub]),
2132 make_number (search_regs.end[sub]));
080c45fd 2133 }
ca1d1d23
JB
2134
2135 if (NILP (fixedcase))
2136 {
fa8ed3e0 2137 int beg;
ca1d1d23
JB
2138 /* Decide how to casify by examining the matched text. */
2139
fa8ed3e0
RS
2140 if (NILP (string))
2141 last = CHAR_TO_BYTE (search_regs.end[sub]);
2142 else
2143 last = search_regs.end[sub];
2144
2145 if (NILP (string))
2146 beg = CHAR_TO_BYTE (search_regs.start[sub]);
2147 else
2148 beg = search_regs.start[sub];
2149
ca1d1d23
JB
2150 prevc = '\n';
2151 case_action = all_caps;
2152
2153 /* some_multiletter_word is set nonzero if any original word
2154 is more than one letter long. */
2155 some_multiletter_word = 0;
97832bd0 2156 some_lowercase = 0;
208767c3 2157 some_nonuppercase_initial = 0;
73dc8771 2158 some_uppercase = 0;
ca1d1d23 2159
fa8ed3e0 2160 for (pos = beg; pos < last; pos++)
ca1d1d23 2161 {
080c45fd 2162 if (NILP (string))
5679531d 2163 c = FETCH_BYTE (pos);
080c45fd
RS
2164 else
2165 c = XSTRING (string)->data[pos];
2166
ca1d1d23
JB
2167 if (LOWERCASEP (c))
2168 {
2169 /* Cannot be all caps if any original char is lower case */
2170
97832bd0 2171 some_lowercase = 1;
ca1d1d23 2172 if (SYNTAX (prevc) != Sword)
208767c3 2173 some_nonuppercase_initial = 1;
ca1d1d23
JB
2174 else
2175 some_multiletter_word = 1;
2176 }
2177 else if (!NOCASEP (c))
2178 {
73dc8771 2179 some_uppercase = 1;
97832bd0 2180 if (SYNTAX (prevc) != Sword)
c4d460ce 2181 ;
97832bd0 2182 else
ca1d1d23
JB
2183 some_multiletter_word = 1;
2184 }
208767c3
RS
2185 else
2186 {
2187 /* If the initial is a caseless word constituent,
2188 treat that like a lowercase initial. */
2189 if (SYNTAX (prevc) != Sword)
2190 some_nonuppercase_initial = 1;
2191 }
ca1d1d23
JB
2192
2193 prevc = c;
2194 }
2195
97832bd0
RS
2196 /* Convert to all caps if the old text is all caps
2197 and has at least one multiletter word. */
2198 if (! some_lowercase && some_multiletter_word)
2199 case_action = all_caps;
c4d460ce 2200 /* Capitalize each word, if the old text has all capitalized words. */
208767c3 2201 else if (!some_nonuppercase_initial && some_multiletter_word)
ca1d1d23 2202 case_action = cap_initial;
208767c3 2203 else if (!some_nonuppercase_initial && some_uppercase)
73dc8771
KH
2204 /* Should x -> yz, operating on X, give Yz or YZ?
2205 We'll assume the latter. */
2206 case_action = all_caps;
97832bd0
RS
2207 else
2208 case_action = nochange;
ca1d1d23
JB
2209 }
2210
080c45fd
RS
2211 /* Do replacement in a string. */
2212 if (!NILP (string))
2213 {
2214 Lisp_Object before, after;
2215
2216 before = Fsubstring (string, make_number (0),
d7a5ad5f
RS
2217 make_number (search_regs.start[sub]));
2218 after = Fsubstring (string, make_number (search_regs.end[sub]), Qnil);
080c45fd 2219
636a5e28
RS
2220 /* Substitute parts of the match into NEWTEXT
2221 if desired. */
080c45fd
RS
2222 if (NILP (literal))
2223 {
2224 int lastpos = -1;
0c8533c6 2225 int lastpos_byte = -1;
080c45fd
RS
2226 /* We build up the substituted string in ACCUM. */
2227 Lisp_Object accum;
2228 Lisp_Object middle;
0c8533c6 2229 int pos_byte;
080c45fd
RS
2230
2231 accum = Qnil;
2232
0c8533c6 2233 for (pos_byte = 0, pos = 0; pos_byte < XSTRING (newtext)->size_byte;)
080c45fd
RS
2234 {
2235 int substart = -1;
2236 int subend;
1e79ec24 2237 int delbackslash = 0;
080c45fd 2238
0c8533c6
RS
2239 FETCH_STRING_CHAR_ADVANCE (c, newtext, pos, pos_byte);
2240
080c45fd
RS
2241 if (c == '\\')
2242 {
0c8533c6 2243 FETCH_STRING_CHAR_ADVANCE (c, newtext, pos, pos_byte);
080c45fd
RS
2244 if (c == '&')
2245 {
d7a5ad5f
RS
2246 substart = search_regs.start[sub];
2247 subend = search_regs.end[sub];
080c45fd
RS
2248 }
2249 else if (c >= '1' && c <= '9' && c <= search_regs.num_regs + '0')
2250 {
ad10348f 2251 if (search_regs.start[c - '0'] >= 0)
080c45fd
RS
2252 {
2253 substart = search_regs.start[c - '0'];
2254 subend = search_regs.end[c - '0'];
2255 }
2256 }
1e79ec24
KH
2257 else if (c == '\\')
2258 delbackslash = 1;
636a5e28
RS
2259 else
2260 error ("Invalid use of `\\' in replacement text");
080c45fd
RS
2261 }
2262 if (substart >= 0)
2263 {
2264 if (pos - 1 != lastpos + 1)
0c8533c6
RS
2265 middle = substring_both (newtext, lastpos + 1,
2266 lastpos_byte + 1,
2267 pos - 1, pos_byte - 1);
080c45fd
RS
2268 else
2269 middle = Qnil;
2270 accum = concat3 (accum, middle,
0c8533c6
RS
2271 Fsubstring (string,
2272 make_number (substart),
080c45fd
RS
2273 make_number (subend)));
2274 lastpos = pos;
0c8533c6 2275 lastpos_byte = pos_byte;
080c45fd 2276 }
1e79ec24
KH
2277 else if (delbackslash)
2278 {
0c8533c6
RS
2279 middle = substring_both (newtext, lastpos + 1,
2280 lastpos_byte + 1,
2281 pos, pos_byte);
2282
1e79ec24
KH
2283 accum = concat2 (accum, middle);
2284 lastpos = pos;
0c8533c6 2285 lastpos_byte = pos_byte;
1e79ec24 2286 }
080c45fd
RS
2287 }
2288
2289 if (pos != lastpos + 1)
0c8533c6
RS
2290 middle = substring_both (newtext, lastpos + 1,
2291 lastpos_byte + 1,
2292 pos, pos_byte);
080c45fd
RS
2293 else
2294 middle = Qnil;
2295
2296 newtext = concat2 (accum, middle);
2297 }
2298
636a5e28 2299 /* Do case substitution in NEWTEXT if desired. */
080c45fd
RS
2300 if (case_action == all_caps)
2301 newtext = Fupcase (newtext);
2302 else if (case_action == cap_initial)
2b2eead9 2303 newtext = Fupcase_initials (newtext);
080c45fd
RS
2304
2305 return concat3 (before, newtext, after);
2306 }
2307
b0eba991
RS
2308 /* Record point, the move (quietly) to the start of the match. */
2309 if (PT > search_regs.start[sub])
2310 opoint = PT - ZV;
2311 else
2312 opoint = PT;
2313
fa8ed3e0 2314 TEMP_SET_PT (search_regs.start[sub]);
b0eba991 2315
9a76659d
JB
2316 /* We insert the replacement text before the old text, and then
2317 delete the original text. This means that markers at the
2318 beginning or end of the original will float to the corresponding
2319 position in the replacement. */
ca1d1d23 2320 if (!NILP (literal))
16fdc568 2321 Finsert_and_inherit (1, &newtext);
ca1d1d23
JB
2322 else
2323 {
2324 struct gcpro gcpro1;
16fdc568 2325 GCPRO1 (newtext);
ca1d1d23 2326
16fdc568 2327 for (pos = 0; pos < XSTRING (newtext)->size; pos++)
ca1d1d23 2328 {
6ec8bbd2 2329 int offset = PT - search_regs.start[sub];
9a76659d 2330
16fdc568 2331 c = XSTRING (newtext)->data[pos];
ca1d1d23
JB
2332 if (c == '\\')
2333 {
16fdc568 2334 c = XSTRING (newtext)->data[++pos];
ca1d1d23 2335 if (c == '&')
9a76659d
JB
2336 Finsert_buffer_substring
2337 (Fcurrent_buffer (),
d7a5ad5f
RS
2338 make_number (search_regs.start[sub] + offset),
2339 make_number (search_regs.end[sub] + offset));
78445046 2340 else if (c >= '1' && c <= '9' && c <= search_regs.num_regs + '0')
ca1d1d23
JB
2341 {
2342 if (search_regs.start[c - '0'] >= 1)
9a76659d
JB
2343 Finsert_buffer_substring
2344 (Fcurrent_buffer (),
2345 make_number (search_regs.start[c - '0'] + offset),
2346 make_number (search_regs.end[c - '0'] + offset));
ca1d1d23 2347 }
636a5e28 2348 else if (c == '\\')
ca1d1d23 2349 insert_char (c);
636a5e28
RS
2350 else
2351 error ("Invalid use of `\\' in replacement text");
ca1d1d23
JB
2352 }
2353 else
2354 insert_char (c);
2355 }
2356 UNGCPRO;
2357 }
2358
6ec8bbd2 2359 inslen = PT - (search_regs.start[sub]);
d7a5ad5f 2360 del_range (search_regs.start[sub] + inslen, search_regs.end[sub] + inslen);
ca1d1d23
JB
2361
2362 if (case_action == all_caps)
6ec8bbd2 2363 Fupcase_region (make_number (PT - inslen), make_number (PT));
ca1d1d23 2364 else if (case_action == cap_initial)
6ec8bbd2 2365 Fupcase_initials_region (make_number (PT - inslen), make_number (PT));
b0eba991 2366
3e18eecf
RS
2367 newpoint = PT;
2368
b0eba991 2369 /* Put point back where it was in the text. */
8d808a65 2370 if (opoint <= 0)
fa8ed3e0 2371 TEMP_SET_PT (opoint + ZV);
b0eba991 2372 else
fa8ed3e0 2373 TEMP_SET_PT (opoint);
b0eba991
RS
2374
2375 /* Now move point "officially" to the start of the inserted replacement. */
3e18eecf 2376 move_if_not_intangible (newpoint);
b0eba991 2377
ca1d1d23
JB
2378 return Qnil;
2379}
2380\f
2381static Lisp_Object
2382match_limit (num, beginningp)
2383 Lisp_Object num;
2384 int beginningp;
2385{
2386 register int n;
2387
2388 CHECK_NUMBER (num, 0);
2389 n = XINT (num);
4746118a
JB
2390 if (n < 0 || n >= search_regs.num_regs)
2391 args_out_of_range (num, make_number (search_regs.num_regs));
2392 if (search_regs.num_regs <= 0
2393 || search_regs.start[n] < 0)
ca1d1d23
JB
2394 return Qnil;
2395 return (make_number ((beginningp) ? search_regs.start[n]
2396 : search_regs.end[n]));
2397}
2398
2399DEFUN ("match-beginning", Fmatch_beginning, Smatch_beginning, 1, 1, 0,
2400 "Return position of start of text matched by last search.\n\
5806161b
EN
2401SUBEXP, a number, specifies which parenthesized expression in the last\n\
2402 regexp.\n\
2403Value is nil if SUBEXPth pair didn't match, or there were less than\n\
2404 SUBEXP pairs.\n\
ca1d1d23 2405Zero means the entire text matched by the whole regexp or whole string.")
5806161b
EN
2406 (subexp)
2407 Lisp_Object subexp;
ca1d1d23 2408{
5806161b 2409 return match_limit (subexp, 1);
ca1d1d23
JB
2410}
2411
2412DEFUN ("match-end", Fmatch_end, Smatch_end, 1, 1, 0,
2413 "Return position of end of text matched by last search.\n\
5806161b
EN
2414SUBEXP, a number, specifies which parenthesized expression in the last\n\
2415 regexp.\n\
2416Value is nil if SUBEXPth pair didn't match, or there were less than\n\
2417 SUBEXP pairs.\n\
ca1d1d23 2418Zero means the entire text matched by the whole regexp or whole string.")
5806161b
EN
2419 (subexp)
2420 Lisp_Object subexp;
ca1d1d23 2421{
5806161b 2422 return match_limit (subexp, 0);
ca1d1d23
JB
2423}
2424
56256c2a 2425DEFUN ("match-data", Fmatch_data, Smatch_data, 0, 2, 0,
ca1d1d23
JB
2426 "Return a list containing all info on what the last search matched.\n\
2427Element 2N is `(match-beginning N)'; element 2N + 1 is `(match-end N)'.\n\
2428All the elements are markers or nil (nil if the Nth pair didn't match)\n\
2429if the last match was on a buffer; integers or nil if a string was matched.\n\
56256c2a
RS
2430Use `store-match-data' to reinstate the data in this list.\n\
2431\n\
2432If INTEGERS (the optional first argument) is non-nil, always use integers\n\
8ca821e9 2433\(rather than markers) to represent buffer positions.\n\
56256c2a
RS
2434If REUSE is a list, reuse it as part of the value. If REUSE is long enough\n\
2435to hold all the values, and if INTEGERS is non-nil, no consing is done.")
2436 (integers, reuse)
2437 Lisp_Object integers, reuse;
ca1d1d23 2438{
56256c2a 2439 Lisp_Object tail, prev;
4746118a 2440 Lisp_Object *data;
ca1d1d23
JB
2441 int i, len;
2442
daa37602 2443 if (NILP (last_thing_searched))
c36bcf1b 2444 return Qnil;
daa37602 2445
4746118a
JB
2446 data = (Lisp_Object *) alloca ((2 * search_regs.num_regs)
2447 * sizeof (Lisp_Object));
2448
ca1d1d23 2449 len = -1;
4746118a 2450 for (i = 0; i < search_regs.num_regs; i++)
ca1d1d23
JB
2451 {
2452 int start = search_regs.start[i];
2453 if (start >= 0)
2454 {
56256c2a
RS
2455 if (EQ (last_thing_searched, Qt)
2456 || ! NILP (integers))
ca1d1d23 2457 {
c235cce7
KH
2458 XSETFASTINT (data[2 * i], start);
2459 XSETFASTINT (data[2 * i + 1], search_regs.end[i]);
ca1d1d23 2460 }
0ed62dc7 2461 else if (BUFFERP (last_thing_searched))
ca1d1d23
JB
2462 {
2463 data[2 * i] = Fmake_marker ();
daa37602
JB
2464 Fset_marker (data[2 * i],
2465 make_number (start),
2466 last_thing_searched);
ca1d1d23
JB
2467 data[2 * i + 1] = Fmake_marker ();
2468 Fset_marker (data[2 * i + 1],
daa37602
JB
2469 make_number (search_regs.end[i]),
2470 last_thing_searched);
ca1d1d23 2471 }
daa37602
JB
2472 else
2473 /* last_thing_searched must always be Qt, a buffer, or Qnil. */
2474 abort ();
2475
ca1d1d23
JB
2476 len = i;
2477 }
2478 else
2479 data[2 * i] = data [2 * i + 1] = Qnil;
2480 }
56256c2a
RS
2481
2482 /* If REUSE is not usable, cons up the values and return them. */
2483 if (! CONSP (reuse))
2484 return Flist (2 * len + 2, data);
2485
2486 /* If REUSE is a list, store as many value elements as will fit
2487 into the elements of REUSE. */
2488 for (i = 0, tail = reuse; CONSP (tail);
2489 i++, tail = XCONS (tail)->cdr)
2490 {
2491 if (i < 2 * len + 2)
2492 XCONS (tail)->car = data[i];
2493 else
2494 XCONS (tail)->car = Qnil;
2495 prev = tail;
2496 }
2497
2498 /* If we couldn't fit all value elements into REUSE,
2499 cons up the rest of them and add them to the end of REUSE. */
2500 if (i < 2 * len + 2)
2501 XCONS (prev)->cdr = Flist (2 * len + 2 - i, data + i);
2502
2503 return reuse;
ca1d1d23
JB
2504}
2505
2506
2507DEFUN ("store-match-data", Fstore_match_data, Sstore_match_data, 1, 1, 0,
2508 "Set internal data on last search match from elements of LIST.\n\
2509LIST should have been created by calling `match-data' previously.")
2510 (list)
2511 register Lisp_Object list;
2512{
2513 register int i;
2514 register Lisp_Object marker;
2515
7074fde6
FP
2516 if (running_asynch_code)
2517 save_search_regs ();
2518
ca1d1d23 2519 if (!CONSP (list) && !NILP (list))
b37902c8 2520 list = wrong_type_argument (Qconsp, list);
ca1d1d23 2521
daa37602
JB
2522 /* Unless we find a marker with a buffer in LIST, assume that this
2523 match data came from a string. */
2524 last_thing_searched = Qt;
2525
4746118a
JB
2526 /* Allocate registers if they don't already exist. */
2527 {
d084e942 2528 int length = XFASTINT (Flength (list)) / 2;
4746118a
JB
2529
2530 if (length > search_regs.num_regs)
2531 {
1113d9db
JB
2532 if (search_regs.num_regs == 0)
2533 {
2534 search_regs.start
2535 = (regoff_t *) xmalloc (length * sizeof (regoff_t));
2536 search_regs.end
2537 = (regoff_t *) xmalloc (length * sizeof (regoff_t));
2538 }
4746118a 2539 else
1113d9db
JB
2540 {
2541 search_regs.start
2542 = (regoff_t *) xrealloc (search_regs.start,
2543 length * sizeof (regoff_t));
2544 search_regs.end
2545 = (regoff_t *) xrealloc (search_regs.end,
2546 length * sizeof (regoff_t));
2547 }
4746118a 2548
487282dc 2549 search_regs.num_regs = length;
4746118a
JB
2550 }
2551 }
2552
2553 for (i = 0; i < search_regs.num_regs; i++)
ca1d1d23
JB
2554 {
2555 marker = Fcar (list);
2556 if (NILP (marker))
2557 {
2558 search_regs.start[i] = -1;
2559 list = Fcdr (list);
2560 }
2561 else
2562 {
0ed62dc7 2563 if (MARKERP (marker))
daa37602
JB
2564 {
2565 if (XMARKER (marker)->buffer == 0)
c235cce7 2566 XSETFASTINT (marker, 0);
daa37602 2567 else
a3668d92 2568 XSETBUFFER (last_thing_searched, XMARKER (marker)->buffer);
daa37602 2569 }
ca1d1d23
JB
2570
2571 CHECK_NUMBER_COERCE_MARKER (marker, 0);
2572 search_regs.start[i] = XINT (marker);
2573 list = Fcdr (list);
2574
2575 marker = Fcar (list);
0ed62dc7 2576 if (MARKERP (marker) && XMARKER (marker)->buffer == 0)
c235cce7 2577 XSETFASTINT (marker, 0);
ca1d1d23
JB
2578
2579 CHECK_NUMBER_COERCE_MARKER (marker, 0);
2580 search_regs.end[i] = XINT (marker);
2581 }
2582 list = Fcdr (list);
2583 }
2584
2585 return Qnil;
2586}
2587
7074fde6
FP
2588/* If non-zero the match data have been saved in saved_search_regs
2589 during the execution of a sentinel or filter. */
75ebf74b 2590static int search_regs_saved;
7074fde6
FP
2591static struct re_registers saved_search_regs;
2592
2593/* Called from Flooking_at, Fstring_match, search_buffer, Fstore_match_data
2594 if asynchronous code (filter or sentinel) is running. */
2595static void
2596save_search_regs ()
2597{
2598 if (!search_regs_saved)
2599 {
2600 saved_search_regs.num_regs = search_regs.num_regs;
2601 saved_search_regs.start = search_regs.start;
2602 saved_search_regs.end = search_regs.end;
2603 search_regs.num_regs = 0;
2d4a771a
RS
2604 search_regs.start = 0;
2605 search_regs.end = 0;
7074fde6
FP
2606
2607 search_regs_saved = 1;
2608 }
2609}
2610
2611/* Called upon exit from filters and sentinels. */
2612void
2613restore_match_data ()
2614{
2615 if (search_regs_saved)
2616 {
2617 if (search_regs.num_regs > 0)
2618 {
2619 xfree (search_regs.start);
2620 xfree (search_regs.end);
2621 }
2622 search_regs.num_regs = saved_search_regs.num_regs;
2623 search_regs.start = saved_search_regs.start;
2624 search_regs.end = saved_search_regs.end;
2625
2626 search_regs_saved = 0;
2627 }
2628}
2629
ca1d1d23
JB
2630/* Quote a string to inactivate reg-expr chars */
2631
2632DEFUN ("regexp-quote", Fregexp_quote, Sregexp_quote, 1, 1, 0,
2633 "Return a regexp string which matches exactly STRING and nothing else.")
5806161b
EN
2634 (string)
2635 Lisp_Object string;
ca1d1d23
JB
2636{
2637 register unsigned char *in, *out, *end;
2638 register unsigned char *temp;
0c8533c6 2639 int backslashes_added = 0;
ca1d1d23 2640
5806161b 2641 CHECK_STRING (string, 0);
ca1d1d23 2642
0c8533c6 2643 temp = (unsigned char *) alloca (XSTRING (string)->size_byte * 2);
ca1d1d23
JB
2644
2645 /* Now copy the data into the new string, inserting escapes. */
2646
5806161b 2647 in = XSTRING (string)->data;
0c8533c6 2648 end = in + XSTRING (string)->size_byte;
ca1d1d23
JB
2649 out = temp;
2650
2651 for (; in != end; in++)
2652 {
2653 if (*in == '[' || *in == ']'
2654 || *in == '*' || *in == '.' || *in == '\\'
2655 || *in == '?' || *in == '+'
2656 || *in == '^' || *in == '$')
0c8533c6 2657 *out++ = '\\', backslashes_added++;
ca1d1d23
JB
2658 *out++ = *in;
2659 }
2660
0c8533c6
RS
2661 return make_multibyte_string (temp,
2662 XSTRING (string)->size + backslashes_added,
2663 out - temp);
ca1d1d23
JB
2664}
2665\f
2666syms_of_search ()
2667{
2668 register int i;
2669
487282dc
KH
2670 for (i = 0; i < REGEXP_CACHE_SIZE; ++i)
2671 {
2672 searchbufs[i].buf.allocated = 100;
2673 searchbufs[i].buf.buffer = (unsigned char *) malloc (100);
2674 searchbufs[i].buf.fastmap = searchbufs[i].fastmap;
2675 searchbufs[i].regexp = Qnil;
2676 staticpro (&searchbufs[i].regexp);
2677 searchbufs[i].next = (i == REGEXP_CACHE_SIZE-1 ? 0 : &searchbufs[i+1]);
2678 }
2679 searchbuf_head = &searchbufs[0];
ca1d1d23
JB
2680
2681 Qsearch_failed = intern ("search-failed");
2682 staticpro (&Qsearch_failed);
2683 Qinvalid_regexp = intern ("invalid-regexp");
2684 staticpro (&Qinvalid_regexp);
2685
2686 Fput (Qsearch_failed, Qerror_conditions,
2687 Fcons (Qsearch_failed, Fcons (Qerror, Qnil)));
2688 Fput (Qsearch_failed, Qerror_message,
2689 build_string ("Search failed"));
2690
2691 Fput (Qinvalid_regexp, Qerror_conditions,
2692 Fcons (Qinvalid_regexp, Fcons (Qerror, Qnil)));
2693 Fput (Qinvalid_regexp, Qerror_message,
2694 build_string ("Invalid regexp"));
2695
daa37602
JB
2696 last_thing_searched = Qnil;
2697 staticpro (&last_thing_searched);
2698
ca1d1d23 2699 defsubr (&Slooking_at);
b819a390
RS
2700 defsubr (&Sposix_looking_at);
2701 defsubr (&Sstring_match);
2702 defsubr (&Sposix_string_match);
ca1d1d23
JB
2703 defsubr (&Ssearch_forward);
2704 defsubr (&Ssearch_backward);
2705 defsubr (&Sword_search_forward);
2706 defsubr (&Sword_search_backward);
2707 defsubr (&Sre_search_forward);
2708 defsubr (&Sre_search_backward);
b819a390
RS
2709 defsubr (&Sposix_search_forward);
2710 defsubr (&Sposix_search_backward);
ca1d1d23
JB
2711 defsubr (&Sreplace_match);
2712 defsubr (&Smatch_beginning);
2713 defsubr (&Smatch_end);
2714 defsubr (&Smatch_data);
2715 defsubr (&Sstore_match_data);
2716 defsubr (&Sregexp_quote);
2717}