Add 2008 to copyright years.
[bpt/emacs.git] / src / search.c
1 /* String search routines for GNU Emacs.
2 Copyright (C) 1985, 1986, 1987, 1993, 1994, 1997, 1998, 1999, 2001, 2002,
3 2003, 2004, 2005, 2006, 2007, 2008
4 Free Software Foundation, Inc.
5
6 This file is part of GNU Emacs.
7
8 GNU Emacs is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 3, or (at your option)
11 any later version.
12
13 GNU Emacs is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
17
18 You should have received a copy of the GNU General Public License
19 along with GNU Emacs; see the file COPYING. If not, write to
20 the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
21 Boston, MA 02110-1301, USA. */
22
23
24 #include <config.h>
25 #include "lisp.h"
26 #include "syntax.h"
27 #include "category.h"
28 #include "buffer.h"
29 #include "charset.h"
30 #include "region-cache.h"
31 #include "commands.h"
32 #include "blockinput.h"
33 #include "intervals.h"
34
35 #include <sys/types.h>
36 #include "regex.h"
37
38 #define REGEXP_CACHE_SIZE 20
39
40 /* If the regexp is non-nil, then the buffer contains the compiled form
41 of that regexp, suitable for searching. */
42 struct regexp_cache
43 {
44 struct regexp_cache *next;
45 Lisp_Object regexp, whitespace_regexp;
46 /* Syntax table for which the regexp applies. We need this because
47 of character classes. If this is t, then the compiled pattern is valid
48 for any syntax-table. */
49 Lisp_Object syntax_table;
50 struct re_pattern_buffer buf;
51 char fastmap[0400];
52 /* Nonzero means regexp was compiled to do full POSIX backtracking. */
53 char posix;
54 };
55
56 /* The instances of that struct. */
57 struct regexp_cache searchbufs[REGEXP_CACHE_SIZE];
58
59 /* The head of the linked list; points to the most recently used buffer. */
60 struct regexp_cache *searchbuf_head;
61
62
63 /* Every call to re_match, etc., must pass &search_regs as the regs
64 argument unless you can show it is unnecessary (i.e., if re_match
65 is certainly going to be called again before region-around-match
66 can be called).
67
68 Since the registers are now dynamically allocated, we need to make
69 sure not to refer to the Nth register before checking that it has
70 been allocated by checking search_regs.num_regs.
71
72 The regex code keeps track of whether it has allocated the search
73 buffer using bits in the re_pattern_buffer. This means that whenever
74 you compile a new pattern, it completely forgets whether it has
75 allocated any registers, and will allocate new registers the next
76 time you call a searching or matching function. Therefore, we need
77 to call re_set_registers after compiling a new pattern or after
78 setting the match registers, so that the regex functions will be
79 able to free or re-allocate it properly. */
80 static struct re_registers search_regs;
81
82 /* The buffer in which the last search was performed, or
83 Qt if the last search was done in a string;
84 Qnil if no searching has been done yet. */
85 static Lisp_Object last_thing_searched;
86
87 /* error condition signaled when regexp compile_pattern fails */
88
89 Lisp_Object Qinvalid_regexp;
90
91 /* Error condition used for failing searches */
92 Lisp_Object Qsearch_failed;
93
94 Lisp_Object Vsearch_spaces_regexp;
95
96 static void set_search_regs ();
97 static void save_search_regs ();
98 static int simple_search ();
99 static int boyer_moore ();
100 static int search_buffer ();
101 static void matcher_overflow () NO_RETURN;
102
103 static void
104 matcher_overflow ()
105 {
106 error ("Stack overflow in regexp matcher");
107 }
108
109 /* Compile a regexp and signal a Lisp error if anything goes wrong.
110 PATTERN is the pattern to compile.
111 CP is the place to put the result.
112 TRANSLATE is a translation table for ignoring case, or nil for none.
113 REGP is the structure that says where to store the "register"
114 values that will result from matching this pattern.
115 If it is 0, we should compile the pattern not to record any
116 subexpression bounds.
117 POSIX is nonzero if we want full backtracking (POSIX style)
118 for this pattern. 0 means backtrack only enough to get a valid match.
119 MULTIBYTE is nonzero if we want to handle multibyte characters in
120 PATTERN. 0 means all multibyte characters are recognized just as
121 sequences of binary data.
122
123 The behavior also depends on Vsearch_spaces_regexp. */
124
125 static void
126 compile_pattern_1 (cp, pattern, translate, regp, posix, multibyte)
127 struct regexp_cache *cp;
128 Lisp_Object pattern;
129 Lisp_Object translate;
130 struct re_registers *regp;
131 int posix;
132 int multibyte;
133 {
134 unsigned char *raw_pattern;
135 int raw_pattern_size;
136 char *val;
137 reg_syntax_t old;
138
139 /* MULTIBYTE says whether the text to be searched is multibyte.
140 We must convert PATTERN to match that, or we will not really
141 find things right. */
142
143 if (multibyte == STRING_MULTIBYTE (pattern))
144 {
145 raw_pattern = (unsigned char *) SDATA (pattern);
146 raw_pattern_size = SBYTES (pattern);
147 }
148 else if (multibyte)
149 {
150 raw_pattern_size = count_size_as_multibyte (SDATA (pattern),
151 SCHARS (pattern));
152 raw_pattern = (unsigned char *) alloca (raw_pattern_size + 1);
153 copy_text (SDATA (pattern), raw_pattern,
154 SCHARS (pattern), 0, 1);
155 }
156 else
157 {
158 /* Converting multibyte to single-byte.
159
160 ??? Perhaps this conversion should be done in a special way
161 by subtracting nonascii-insert-offset from each non-ASCII char,
162 so that only the multibyte chars which really correspond to
163 the chosen single-byte character set can possibly match. */
164 raw_pattern_size = SCHARS (pattern);
165 raw_pattern = (unsigned char *) alloca (raw_pattern_size + 1);
166 copy_text (SDATA (pattern), raw_pattern,
167 SBYTES (pattern), 1, 0);
168 }
169
170 cp->regexp = Qnil;
171 cp->buf.translate = (! NILP (translate) ? translate : make_number (0));
172 cp->posix = posix;
173 cp->buf.multibyte = multibyte;
174 cp->whitespace_regexp = Vsearch_spaces_regexp;
175 /* rms: I think BLOCK_INPUT is not needed here any more,
176 because regex.c defines malloc to call xmalloc.
177 Using BLOCK_INPUT here means the debugger won't run if an error occurs.
178 So let's turn it off. */
179 /* BLOCK_INPUT; */
180 old = re_set_syntax (RE_SYNTAX_EMACS
181 | (posix ? 0 : RE_NO_POSIX_BACKTRACKING));
182
183 re_set_whitespace_regexp (NILP (Vsearch_spaces_regexp) ? NULL
184 : SDATA (Vsearch_spaces_regexp));
185
186 val = (char *) re_compile_pattern ((char *)raw_pattern,
187 raw_pattern_size, &cp->buf);
188
189 /* If the compiled pattern hard codes some of the contents of the
190 syntax-table, it can only be reused with *this* syntax table. */
191 cp->syntax_table = cp->buf.used_syntax ? current_buffer->syntax_table : Qt;
192
193 re_set_whitespace_regexp (NULL);
194
195 re_set_syntax (old);
196 /* UNBLOCK_INPUT; */
197 if (val)
198 xsignal1 (Qinvalid_regexp, build_string (val));
199
200 cp->regexp = Fcopy_sequence (pattern);
201 }
202
203 /* Shrink each compiled regexp buffer in the cache
204 to the size actually used right now.
205 This is called from garbage collection. */
206
207 void
208 shrink_regexp_cache ()
209 {
210 struct regexp_cache *cp;
211
212 for (cp = searchbuf_head; cp != 0; cp = cp->next)
213 {
214 cp->buf.allocated = cp->buf.used;
215 cp->buf.buffer
216 = (unsigned char *) xrealloc (cp->buf.buffer, cp->buf.used);
217 }
218 }
219
220 /* Clear the regexp cache w.r.t. a particular syntax table,
221 because it was changed.
222 There is no danger of memory leak here because re_compile_pattern
223 automagically manages the memory in each re_pattern_buffer struct,
224 based on its `allocated' and `buffer' values. */
225 void
226 clear_regexp_cache ()
227 {
228 int i;
229
230 for (i = 0; i < REGEXP_CACHE_SIZE; ++i)
231 /* It's tempting to compare with the syntax-table we've actually changd,
232 but it's not sufficient because char-table inheritance mewans that
233 modifying one syntax-table can change others at the same time. */
234 if (!EQ (searchbufs[i].syntax_table, Qt))
235 searchbufs[i].regexp = Qnil;
236 }
237
238 /* Compile a regexp if necessary, but first check to see if there's one in
239 the cache.
240 PATTERN is the pattern to compile.
241 TRANSLATE is a translation table for ignoring case, or nil for none.
242 REGP is the structure that says where to store the "register"
243 values that will result from matching this pattern.
244 If it is 0, we should compile the pattern not to record any
245 subexpression bounds.
246 POSIX is nonzero if we want full backtracking (POSIX style)
247 for this pattern. 0 means backtrack only enough to get a valid match. */
248
249 struct re_pattern_buffer *
250 compile_pattern (pattern, regp, translate, posix, multibyte)
251 Lisp_Object pattern;
252 struct re_registers *regp;
253 Lisp_Object translate;
254 int posix, multibyte;
255 {
256 struct regexp_cache *cp, **cpp;
257
258 for (cpp = &searchbuf_head; ; cpp = &cp->next)
259 {
260 cp = *cpp;
261 /* Entries are initialized to nil, and may be set to nil by
262 compile_pattern_1 if the pattern isn't valid. Don't apply
263 string accessors in those cases. However, compile_pattern_1
264 is only applied to the cache entry we pick here to reuse. So
265 nil should never appear before a non-nil entry. */
266 if (NILP (cp->regexp))
267 goto compile_it;
268 if (SCHARS (cp->regexp) == SCHARS (pattern)
269 && STRING_MULTIBYTE (cp->regexp) == STRING_MULTIBYTE (pattern)
270 && !NILP (Fstring_equal (cp->regexp, pattern))
271 && EQ (cp->buf.translate, (! NILP (translate) ? translate : make_number (0)))
272 && cp->posix == posix
273 && cp->buf.multibyte == multibyte
274 && (EQ (cp->syntax_table, Qt)
275 || EQ (cp->syntax_table, current_buffer->syntax_table))
276 && !NILP (Fequal (cp->whitespace_regexp, Vsearch_spaces_regexp)))
277 break;
278
279 /* If we're at the end of the cache, compile into the nil cell
280 we found, or the last (least recently used) cell with a
281 string value. */
282 if (cp->next == 0)
283 {
284 compile_it:
285 compile_pattern_1 (cp, pattern, translate, regp, posix, multibyte);
286 break;
287 }
288 }
289
290 /* When we get here, cp (aka *cpp) contains the compiled pattern,
291 either because we found it in the cache or because we just compiled it.
292 Move it to the front of the queue to mark it as most recently used. */
293 *cpp = cp->next;
294 cp->next = searchbuf_head;
295 searchbuf_head = cp;
296
297 /* Advise the searching functions about the space we have allocated
298 for register data. */
299 if (regp)
300 re_set_registers (&cp->buf, regp, regp->num_regs, regp->start, regp->end);
301
302 return &cp->buf;
303 }
304
305 \f
306 static Lisp_Object
307 looking_at_1 (string, posix)
308 Lisp_Object string;
309 int posix;
310 {
311 Lisp_Object val;
312 unsigned char *p1, *p2;
313 int s1, s2;
314 register int i;
315 struct re_pattern_buffer *bufp;
316
317 if (running_asynch_code)
318 save_search_regs ();
319
320 /* This is so set_image_of_range_1 in regex.c can find the EQV table. */
321 XCHAR_TABLE (current_buffer->case_canon_table)->extras[2]
322 = current_buffer->case_eqv_table;
323
324 CHECK_STRING (string);
325 bufp = compile_pattern (string, &search_regs,
326 (!NILP (current_buffer->case_fold_search)
327 ? current_buffer->case_canon_table : Qnil),
328 posix,
329 !NILP (current_buffer->enable_multibyte_characters));
330
331 immediate_quit = 1;
332 QUIT; /* Do a pending quit right away, to avoid paradoxical behavior */
333
334 /* Get pointers and sizes of the two strings
335 that make up the visible portion of the buffer. */
336
337 p1 = BEGV_ADDR;
338 s1 = GPT_BYTE - BEGV_BYTE;
339 p2 = GAP_END_ADDR;
340 s2 = ZV_BYTE - GPT_BYTE;
341 if (s1 < 0)
342 {
343 p2 = p1;
344 s2 = ZV_BYTE - BEGV_BYTE;
345 s1 = 0;
346 }
347 if (s2 < 0)
348 {
349 s1 = ZV_BYTE - BEGV_BYTE;
350 s2 = 0;
351 }
352
353 re_match_object = Qnil;
354
355 i = re_match_2 (bufp, (char *) p1, s1, (char *) p2, s2,
356 PT_BYTE - BEGV_BYTE, &search_regs,
357 ZV_BYTE - BEGV_BYTE);
358 immediate_quit = 0;
359
360 if (i == -2)
361 matcher_overflow ();
362
363 val = (0 <= i ? Qt : Qnil);
364 if (i >= 0)
365 for (i = 0; i < search_regs.num_regs; i++)
366 if (search_regs.start[i] >= 0)
367 {
368 search_regs.start[i]
369 = BYTE_TO_CHAR (search_regs.start[i] + BEGV_BYTE);
370 search_regs.end[i]
371 = BYTE_TO_CHAR (search_regs.end[i] + BEGV_BYTE);
372 }
373 XSETBUFFER (last_thing_searched, current_buffer);
374 return val;
375 }
376
377 DEFUN ("looking-at", Flooking_at, Slooking_at, 1, 1, 0,
378 doc: /* Return t if text after point matches regular expression REGEXP.
379 This function modifies the match data that `match-beginning',
380 `match-end' and `match-data' access; save and restore the match
381 data if you want to preserve them. */)
382 (regexp)
383 Lisp_Object regexp;
384 {
385 return looking_at_1 (regexp, 0);
386 }
387
388 DEFUN ("posix-looking-at", Fposix_looking_at, Sposix_looking_at, 1, 1, 0,
389 doc: /* Return t if text after point matches regular expression REGEXP.
390 Find the longest match, in accord with Posix regular expression rules.
391 This function modifies the match data that `match-beginning',
392 `match-end' and `match-data' access; save and restore the match
393 data if you want to preserve them. */)
394 (regexp)
395 Lisp_Object regexp;
396 {
397 return looking_at_1 (regexp, 1);
398 }
399 \f
400 static Lisp_Object
401 string_match_1 (regexp, string, start, posix)
402 Lisp_Object regexp, string, start;
403 int posix;
404 {
405 int val;
406 struct re_pattern_buffer *bufp;
407 int pos, pos_byte;
408 int i;
409
410 if (running_asynch_code)
411 save_search_regs ();
412
413 CHECK_STRING (regexp);
414 CHECK_STRING (string);
415
416 if (NILP (start))
417 pos = 0, pos_byte = 0;
418 else
419 {
420 int len = SCHARS (string);
421
422 CHECK_NUMBER (start);
423 pos = XINT (start);
424 if (pos < 0 && -pos <= len)
425 pos = len + pos;
426 else if (0 > pos || pos > len)
427 args_out_of_range (string, start);
428 pos_byte = string_char_to_byte (string, pos);
429 }
430
431 /* This is so set_image_of_range_1 in regex.c can find the EQV table. */
432 XCHAR_TABLE (current_buffer->case_canon_table)->extras[2]
433 = current_buffer->case_eqv_table;
434
435 bufp = compile_pattern (regexp, &search_regs,
436 (!NILP (current_buffer->case_fold_search)
437 ? current_buffer->case_canon_table : Qnil),
438 posix,
439 STRING_MULTIBYTE (string));
440 immediate_quit = 1;
441 re_match_object = string;
442
443 val = re_search (bufp, (char *) SDATA (string),
444 SBYTES (string), pos_byte,
445 SBYTES (string) - pos_byte,
446 &search_regs);
447 immediate_quit = 0;
448 last_thing_searched = Qt;
449 if (val == -2)
450 matcher_overflow ();
451 if (val < 0) return Qnil;
452
453 for (i = 0; i < search_regs.num_regs; i++)
454 if (search_regs.start[i] >= 0)
455 {
456 search_regs.start[i]
457 = string_byte_to_char (string, search_regs.start[i]);
458 search_regs.end[i]
459 = string_byte_to_char (string, search_regs.end[i]);
460 }
461
462 return make_number (string_byte_to_char (string, val));
463 }
464
465 DEFUN ("string-match", Fstring_match, Sstring_match, 2, 3, 0,
466 doc: /* Return index of start of first match for REGEXP in STRING, or nil.
467 Matching ignores case if `case-fold-search' is non-nil.
468 If third arg START is non-nil, start search at that index in STRING.
469 For index of first char beyond the match, do (match-end 0).
470 `match-end' and `match-beginning' also give indices of substrings
471 matched by parenthesis constructs in the pattern.
472
473 You can use the function `match-string' to extract the substrings
474 matched by the parenthesis constructions in REGEXP. */)
475 (regexp, string, start)
476 Lisp_Object regexp, string, start;
477 {
478 return string_match_1 (regexp, string, start, 0);
479 }
480
481 DEFUN ("posix-string-match", Fposix_string_match, Sposix_string_match, 2, 3, 0,
482 doc: /* Return index of start of first match for REGEXP in STRING, or nil.
483 Find the longest match, in accord with Posix regular expression rules.
484 Case is ignored if `case-fold-search' is non-nil in the current buffer.
485 If third arg START is non-nil, start search at that index in STRING.
486 For index of first char beyond the match, do (match-end 0).
487 `match-end' and `match-beginning' also give indices of substrings
488 matched by parenthesis constructs in the pattern. */)
489 (regexp, string, start)
490 Lisp_Object regexp, string, start;
491 {
492 return string_match_1 (regexp, string, start, 1);
493 }
494
495 /* Match REGEXP against STRING, searching all of STRING,
496 and return the index of the match, or negative on failure.
497 This does not clobber the match data. */
498
499 int
500 fast_string_match (regexp, string)
501 Lisp_Object regexp, string;
502 {
503 int val;
504 struct re_pattern_buffer *bufp;
505
506 bufp = compile_pattern (regexp, 0, Qnil,
507 0, STRING_MULTIBYTE (string));
508 immediate_quit = 1;
509 re_match_object = string;
510
511 val = re_search (bufp, (char *) SDATA (string),
512 SBYTES (string), 0,
513 SBYTES (string), 0);
514 immediate_quit = 0;
515 return val;
516 }
517
518 /* Match REGEXP against STRING, searching all of STRING ignoring case,
519 and return the index of the match, or negative on failure.
520 This does not clobber the match data.
521 We assume that STRING contains single-byte characters. */
522
523 extern Lisp_Object Vascii_downcase_table;
524
525 int
526 fast_c_string_match_ignore_case (regexp, string)
527 Lisp_Object regexp;
528 const char *string;
529 {
530 int val;
531 struct re_pattern_buffer *bufp;
532 int len = strlen (string);
533
534 regexp = string_make_unibyte (regexp);
535 re_match_object = Qt;
536 bufp = compile_pattern (regexp, 0,
537 Vascii_canon_table, 0,
538 0);
539 immediate_quit = 1;
540 val = re_search (bufp, string, len, 0, len, 0);
541 immediate_quit = 0;
542 return val;
543 }
544
545 /* Like fast_string_match but ignore case. */
546
547 int
548 fast_string_match_ignore_case (regexp, string)
549 Lisp_Object regexp, string;
550 {
551 int val;
552 struct re_pattern_buffer *bufp;
553
554 bufp = compile_pattern (regexp, 0, Vascii_canon_table,
555 0, STRING_MULTIBYTE (string));
556 immediate_quit = 1;
557 re_match_object = string;
558
559 val = re_search (bufp, (char *) SDATA (string),
560 SBYTES (string), 0,
561 SBYTES (string), 0);
562 immediate_quit = 0;
563 return val;
564 }
565 \f
566 /* The newline cache: remembering which sections of text have no newlines. */
567
568 /* If the user has requested newline caching, make sure it's on.
569 Otherwise, make sure it's off.
570 This is our cheezy way of associating an action with the change of
571 state of a buffer-local variable. */
572 static void
573 newline_cache_on_off (buf)
574 struct buffer *buf;
575 {
576 if (NILP (buf->cache_long_line_scans))
577 {
578 /* It should be off. */
579 if (buf->newline_cache)
580 {
581 free_region_cache (buf->newline_cache);
582 buf->newline_cache = 0;
583 }
584 }
585 else
586 {
587 /* It should be on. */
588 if (buf->newline_cache == 0)
589 buf->newline_cache = new_region_cache ();
590 }
591 }
592
593 \f
594 /* Search for COUNT instances of the character TARGET between START and END.
595
596 If COUNT is positive, search forwards; END must be >= START.
597 If COUNT is negative, search backwards for the -COUNTth instance;
598 END must be <= START.
599 If COUNT is zero, do anything you please; run rogue, for all I care.
600
601 If END is zero, use BEGV or ZV instead, as appropriate for the
602 direction indicated by COUNT.
603
604 If we find COUNT instances, set *SHORTAGE to zero, and return the
605 position past the COUNTth match. Note that for reverse motion
606 this is not the same as the usual convention for Emacs motion commands.
607
608 If we don't find COUNT instances before reaching END, set *SHORTAGE
609 to the number of TARGETs left unfound, and return END.
610
611 If ALLOW_QUIT is non-zero, set immediate_quit. That's good to do
612 except when inside redisplay. */
613
614 int
615 scan_buffer (target, start, end, count, shortage, allow_quit)
616 register int target;
617 int start, end;
618 int count;
619 int *shortage;
620 int allow_quit;
621 {
622 struct region_cache *newline_cache;
623 int direction;
624
625 if (count > 0)
626 {
627 direction = 1;
628 if (! end) end = ZV;
629 }
630 else
631 {
632 direction = -1;
633 if (! end) end = BEGV;
634 }
635
636 newline_cache_on_off (current_buffer);
637 newline_cache = current_buffer->newline_cache;
638
639 if (shortage != 0)
640 *shortage = 0;
641
642 immediate_quit = allow_quit;
643
644 if (count > 0)
645 while (start != end)
646 {
647 /* Our innermost scanning loop is very simple; it doesn't know
648 about gaps, buffer ends, or the newline cache. ceiling is
649 the position of the last character before the next such
650 obstacle --- the last character the dumb search loop should
651 examine. */
652 int ceiling_byte = CHAR_TO_BYTE (end) - 1;
653 int start_byte = CHAR_TO_BYTE (start);
654 int tem;
655
656 /* If we're looking for a newline, consult the newline cache
657 to see where we can avoid some scanning. */
658 if (target == '\n' && newline_cache)
659 {
660 int next_change;
661 immediate_quit = 0;
662 while (region_cache_forward
663 (current_buffer, newline_cache, start_byte, &next_change))
664 start_byte = next_change;
665 immediate_quit = allow_quit;
666
667 /* START should never be after END. */
668 if (start_byte > ceiling_byte)
669 start_byte = ceiling_byte;
670
671 /* Now the text after start is an unknown region, and
672 next_change is the position of the next known region. */
673 ceiling_byte = min (next_change - 1, ceiling_byte);
674 }
675
676 /* The dumb loop can only scan text stored in contiguous
677 bytes. BUFFER_CEILING_OF returns the last character
678 position that is contiguous, so the ceiling is the
679 position after that. */
680 tem = BUFFER_CEILING_OF (start_byte);
681 ceiling_byte = min (tem, ceiling_byte);
682
683 {
684 /* The termination address of the dumb loop. */
685 register unsigned char *ceiling_addr
686 = BYTE_POS_ADDR (ceiling_byte) + 1;
687 register unsigned char *cursor
688 = BYTE_POS_ADDR (start_byte);
689 unsigned char *base = cursor;
690
691 while (cursor < ceiling_addr)
692 {
693 unsigned char *scan_start = cursor;
694
695 /* The dumb loop. */
696 while (*cursor != target && ++cursor < ceiling_addr)
697 ;
698
699 /* If we're looking for newlines, cache the fact that
700 the region from start to cursor is free of them. */
701 if (target == '\n' && newline_cache)
702 know_region_cache (current_buffer, newline_cache,
703 start_byte + scan_start - base,
704 start_byte + cursor - base);
705
706 /* Did we find the target character? */
707 if (cursor < ceiling_addr)
708 {
709 if (--count == 0)
710 {
711 immediate_quit = 0;
712 return BYTE_TO_CHAR (start_byte + cursor - base + 1);
713 }
714 cursor++;
715 }
716 }
717
718 start = BYTE_TO_CHAR (start_byte + cursor - base);
719 }
720 }
721 else
722 while (start > end)
723 {
724 /* The last character to check before the next obstacle. */
725 int ceiling_byte = CHAR_TO_BYTE (end);
726 int start_byte = CHAR_TO_BYTE (start);
727 int tem;
728
729 /* Consult the newline cache, if appropriate. */
730 if (target == '\n' && newline_cache)
731 {
732 int next_change;
733 immediate_quit = 0;
734 while (region_cache_backward
735 (current_buffer, newline_cache, start_byte, &next_change))
736 start_byte = next_change;
737 immediate_quit = allow_quit;
738
739 /* Start should never be at or before end. */
740 if (start_byte <= ceiling_byte)
741 start_byte = ceiling_byte + 1;
742
743 /* Now the text before start is an unknown region, and
744 next_change is the position of the next known region. */
745 ceiling_byte = max (next_change, ceiling_byte);
746 }
747
748 /* Stop scanning before the gap. */
749 tem = BUFFER_FLOOR_OF (start_byte - 1);
750 ceiling_byte = max (tem, ceiling_byte);
751
752 {
753 /* The termination address of the dumb loop. */
754 register unsigned char *ceiling_addr = BYTE_POS_ADDR (ceiling_byte);
755 register unsigned char *cursor = BYTE_POS_ADDR (start_byte - 1);
756 unsigned char *base = cursor;
757
758 while (cursor >= ceiling_addr)
759 {
760 unsigned char *scan_start = cursor;
761
762 while (*cursor != target && --cursor >= ceiling_addr)
763 ;
764
765 /* If we're looking for newlines, cache the fact that
766 the region from after the cursor to start is free of them. */
767 if (target == '\n' && newline_cache)
768 know_region_cache (current_buffer, newline_cache,
769 start_byte + cursor - base,
770 start_byte + scan_start - base);
771
772 /* Did we find the target character? */
773 if (cursor >= ceiling_addr)
774 {
775 if (++count >= 0)
776 {
777 immediate_quit = 0;
778 return BYTE_TO_CHAR (start_byte + cursor - base);
779 }
780 cursor--;
781 }
782 }
783
784 start = BYTE_TO_CHAR (start_byte + cursor - base);
785 }
786 }
787
788 immediate_quit = 0;
789 if (shortage != 0)
790 *shortage = count * direction;
791 return start;
792 }
793 \f
794 /* Search for COUNT instances of a line boundary, which means either a
795 newline or (if selective display enabled) a carriage return.
796 Start at START. If COUNT is negative, search backwards.
797
798 We report the resulting position by calling TEMP_SET_PT_BOTH.
799
800 If we find COUNT instances. we position after (always after,
801 even if scanning backwards) the COUNTth match, and return 0.
802
803 If we don't find COUNT instances before reaching the end of the
804 buffer (or the beginning, if scanning backwards), we return
805 the number of line boundaries left unfound, and position at
806 the limit we bumped up against.
807
808 If ALLOW_QUIT is non-zero, set immediate_quit. That's good to do
809 except in special cases. */
810
811 int
812 scan_newline (start, start_byte, limit, limit_byte, count, allow_quit)
813 int start, start_byte;
814 int limit, limit_byte;
815 register int count;
816 int allow_quit;
817 {
818 int direction = ((count > 0) ? 1 : -1);
819
820 register unsigned char *cursor;
821 unsigned char *base;
822
823 register int ceiling;
824 register unsigned char *ceiling_addr;
825
826 int old_immediate_quit = immediate_quit;
827
828 /* The code that follows is like scan_buffer
829 but checks for either newline or carriage return. */
830
831 if (allow_quit)
832 immediate_quit++;
833
834 start_byte = CHAR_TO_BYTE (start);
835
836 if (count > 0)
837 {
838 while (start_byte < limit_byte)
839 {
840 ceiling = BUFFER_CEILING_OF (start_byte);
841 ceiling = min (limit_byte - 1, ceiling);
842 ceiling_addr = BYTE_POS_ADDR (ceiling) + 1;
843 base = (cursor = BYTE_POS_ADDR (start_byte));
844 while (1)
845 {
846 while (*cursor != '\n' && ++cursor != ceiling_addr)
847 ;
848
849 if (cursor != ceiling_addr)
850 {
851 if (--count == 0)
852 {
853 immediate_quit = old_immediate_quit;
854 start_byte = start_byte + cursor - base + 1;
855 start = BYTE_TO_CHAR (start_byte);
856 TEMP_SET_PT_BOTH (start, start_byte);
857 return 0;
858 }
859 else
860 if (++cursor == ceiling_addr)
861 break;
862 }
863 else
864 break;
865 }
866 start_byte += cursor - base;
867 }
868 }
869 else
870 {
871 while (start_byte > limit_byte)
872 {
873 ceiling = BUFFER_FLOOR_OF (start_byte - 1);
874 ceiling = max (limit_byte, ceiling);
875 ceiling_addr = BYTE_POS_ADDR (ceiling) - 1;
876 base = (cursor = BYTE_POS_ADDR (start_byte - 1) + 1);
877 while (1)
878 {
879 while (--cursor != ceiling_addr && *cursor != '\n')
880 ;
881
882 if (cursor != ceiling_addr)
883 {
884 if (++count == 0)
885 {
886 immediate_quit = old_immediate_quit;
887 /* Return the position AFTER the match we found. */
888 start_byte = start_byte + cursor - base + 1;
889 start = BYTE_TO_CHAR (start_byte);
890 TEMP_SET_PT_BOTH (start, start_byte);
891 return 0;
892 }
893 }
894 else
895 break;
896 }
897 /* Here we add 1 to compensate for the last decrement
898 of CURSOR, which took it past the valid range. */
899 start_byte += cursor - base + 1;
900 }
901 }
902
903 TEMP_SET_PT_BOTH (limit, limit_byte);
904 immediate_quit = old_immediate_quit;
905
906 return count * direction;
907 }
908
909 int
910 find_next_newline_no_quit (from, cnt)
911 register int from, cnt;
912 {
913 return scan_buffer ('\n', from, 0, cnt, (int *) 0, 0);
914 }
915
916 /* Like find_next_newline, but returns position before the newline,
917 not after, and only search up to TO. This isn't just
918 find_next_newline (...)-1, because you might hit TO. */
919
920 int
921 find_before_next_newline (from, to, cnt)
922 int from, to, cnt;
923 {
924 int shortage;
925 int pos = scan_buffer ('\n', from, to, cnt, &shortage, 1);
926
927 if (shortage == 0)
928 pos--;
929
930 return pos;
931 }
932 \f
933 /* Subroutines of Lisp buffer search functions. */
934
935 static Lisp_Object
936 search_command (string, bound, noerror, count, direction, RE, posix)
937 Lisp_Object string, bound, noerror, count;
938 int direction;
939 int RE;
940 int posix;
941 {
942 register int np;
943 int lim, lim_byte;
944 int n = direction;
945
946 if (!NILP (count))
947 {
948 CHECK_NUMBER (count);
949 n *= XINT (count);
950 }
951
952 CHECK_STRING (string);
953 if (NILP (bound))
954 {
955 if (n > 0)
956 lim = ZV, lim_byte = ZV_BYTE;
957 else
958 lim = BEGV, lim_byte = BEGV_BYTE;
959 }
960 else
961 {
962 CHECK_NUMBER_COERCE_MARKER (bound);
963 lim = XINT (bound);
964 if (n > 0 ? lim < PT : lim > PT)
965 error ("Invalid search bound (wrong side of point)");
966 if (lim > ZV)
967 lim = ZV, lim_byte = ZV_BYTE;
968 else if (lim < BEGV)
969 lim = BEGV, lim_byte = BEGV_BYTE;
970 else
971 lim_byte = CHAR_TO_BYTE (lim);
972 }
973
974 /* This is so set_image_of_range_1 in regex.c can find the EQV table. */
975 XCHAR_TABLE (current_buffer->case_canon_table)->extras[2]
976 = current_buffer->case_eqv_table;
977
978 np = search_buffer (string, PT, PT_BYTE, lim, lim_byte, n, RE,
979 (!NILP (current_buffer->case_fold_search)
980 ? current_buffer->case_canon_table
981 : Qnil),
982 (!NILP (current_buffer->case_fold_search)
983 ? current_buffer->case_eqv_table
984 : Qnil),
985 posix);
986 if (np <= 0)
987 {
988 if (NILP (noerror))
989 xsignal1 (Qsearch_failed, string);
990
991 if (!EQ (noerror, Qt))
992 {
993 if (lim < BEGV || lim > ZV)
994 abort ();
995 SET_PT_BOTH (lim, lim_byte);
996 return Qnil;
997 #if 0 /* This would be clean, but maybe programs depend on
998 a value of nil here. */
999 np = lim;
1000 #endif
1001 }
1002 else
1003 return Qnil;
1004 }
1005
1006 if (np < BEGV || np > ZV)
1007 abort ();
1008
1009 SET_PT (np);
1010
1011 return make_number (np);
1012 }
1013 \f
1014 /* Return 1 if REGEXP it matches just one constant string. */
1015
1016 static int
1017 trivial_regexp_p (regexp)
1018 Lisp_Object regexp;
1019 {
1020 int len = SBYTES (regexp);
1021 unsigned char *s = SDATA (regexp);
1022 while (--len >= 0)
1023 {
1024 switch (*s++)
1025 {
1026 case '.': case '*': case '+': case '?': case '[': case '^': case '$':
1027 return 0;
1028 case '\\':
1029 if (--len < 0)
1030 return 0;
1031 switch (*s++)
1032 {
1033 case '|': case '(': case ')': case '`': case '\'': case 'b':
1034 case 'B': case '<': case '>': case 'w': case 'W': case 's':
1035 case 'S': case '=': case '{': case '}': case '_':
1036 case 'c': case 'C': /* for categoryspec and notcategoryspec */
1037 case '1': case '2': case '3': case '4': case '5':
1038 case '6': case '7': case '8': case '9':
1039 return 0;
1040 }
1041 }
1042 }
1043 return 1;
1044 }
1045
1046 /* Search for the n'th occurrence of STRING in the current buffer,
1047 starting at position POS and stopping at position LIM,
1048 treating STRING as a literal string if RE is false or as
1049 a regular expression if RE is true.
1050
1051 If N is positive, searching is forward and LIM must be greater than POS.
1052 If N is negative, searching is backward and LIM must be less than POS.
1053
1054 Returns -x if x occurrences remain to be found (x > 0),
1055 or else the position at the beginning of the Nth occurrence
1056 (if searching backward) or the end (if searching forward).
1057
1058 POSIX is nonzero if we want full backtracking (POSIX style)
1059 for this pattern. 0 means backtrack only enough to get a valid match. */
1060
1061 #define TRANSLATE(out, trt, d) \
1062 do \
1063 { \
1064 if (! NILP (trt)) \
1065 { \
1066 Lisp_Object temp; \
1067 temp = Faref (trt, make_number (d)); \
1068 if (INTEGERP (temp)) \
1069 out = XINT (temp); \
1070 else \
1071 out = d; \
1072 } \
1073 else \
1074 out = d; \
1075 } \
1076 while (0)
1077
1078 static int
1079 search_buffer (string, pos, pos_byte, lim, lim_byte, n,
1080 RE, trt, inverse_trt, posix)
1081 Lisp_Object string;
1082 int pos;
1083 int pos_byte;
1084 int lim;
1085 int lim_byte;
1086 int n;
1087 int RE;
1088 Lisp_Object trt;
1089 Lisp_Object inverse_trt;
1090 int posix;
1091 {
1092 int len = SCHARS (string);
1093 int len_byte = SBYTES (string);
1094 register int i;
1095
1096 if (running_asynch_code)
1097 save_search_regs ();
1098
1099 /* Searching 0 times means don't move. */
1100 /* Null string is found at starting position. */
1101 if (len == 0 || n == 0)
1102 {
1103 set_search_regs (pos_byte, 0);
1104 return pos;
1105 }
1106
1107 if (RE && !(trivial_regexp_p (string) && NILP (Vsearch_spaces_regexp)))
1108 {
1109 unsigned char *p1, *p2;
1110 int s1, s2;
1111 struct re_pattern_buffer *bufp;
1112
1113 bufp = compile_pattern (string, &search_regs, trt, posix,
1114 !NILP (current_buffer->enable_multibyte_characters));
1115
1116 immediate_quit = 1; /* Quit immediately if user types ^G,
1117 because letting this function finish
1118 can take too long. */
1119 QUIT; /* Do a pending quit right away,
1120 to avoid paradoxical behavior */
1121 /* Get pointers and sizes of the two strings
1122 that make up the visible portion of the buffer. */
1123
1124 p1 = BEGV_ADDR;
1125 s1 = GPT_BYTE - BEGV_BYTE;
1126 p2 = GAP_END_ADDR;
1127 s2 = ZV_BYTE - GPT_BYTE;
1128 if (s1 < 0)
1129 {
1130 p2 = p1;
1131 s2 = ZV_BYTE - BEGV_BYTE;
1132 s1 = 0;
1133 }
1134 if (s2 < 0)
1135 {
1136 s1 = ZV_BYTE - BEGV_BYTE;
1137 s2 = 0;
1138 }
1139 re_match_object = Qnil;
1140
1141 while (n < 0)
1142 {
1143 int val;
1144 val = re_search_2 (bufp, (char *) p1, s1, (char *) p2, s2,
1145 pos_byte - BEGV_BYTE, lim_byte - pos_byte,
1146 &search_regs,
1147 /* Don't allow match past current point */
1148 pos_byte - BEGV_BYTE);
1149 if (val == -2)
1150 {
1151 matcher_overflow ();
1152 }
1153 if (val >= 0)
1154 {
1155 pos_byte = search_regs.start[0] + BEGV_BYTE;
1156 for (i = 0; i < search_regs.num_regs; i++)
1157 if (search_regs.start[i] >= 0)
1158 {
1159 search_regs.start[i]
1160 = BYTE_TO_CHAR (search_regs.start[i] + BEGV_BYTE);
1161 search_regs.end[i]
1162 = BYTE_TO_CHAR (search_regs.end[i] + BEGV_BYTE);
1163 }
1164 XSETBUFFER (last_thing_searched, current_buffer);
1165 /* Set pos to the new position. */
1166 pos = search_regs.start[0];
1167 }
1168 else
1169 {
1170 immediate_quit = 0;
1171 return (n);
1172 }
1173 n++;
1174 }
1175 while (n > 0)
1176 {
1177 int val;
1178 val = re_search_2 (bufp, (char *) p1, s1, (char *) p2, s2,
1179 pos_byte - BEGV_BYTE, lim_byte - pos_byte,
1180 &search_regs,
1181 lim_byte - BEGV_BYTE);
1182 if (val == -2)
1183 {
1184 matcher_overflow ();
1185 }
1186 if (val >= 0)
1187 {
1188 pos_byte = search_regs.end[0] + BEGV_BYTE;
1189 for (i = 0; i < search_regs.num_regs; i++)
1190 if (search_regs.start[i] >= 0)
1191 {
1192 search_regs.start[i]
1193 = BYTE_TO_CHAR (search_regs.start[i] + BEGV_BYTE);
1194 search_regs.end[i]
1195 = BYTE_TO_CHAR (search_regs.end[i] + BEGV_BYTE);
1196 }
1197 XSETBUFFER (last_thing_searched, current_buffer);
1198 pos = search_regs.end[0];
1199 }
1200 else
1201 {
1202 immediate_quit = 0;
1203 return (0 - n);
1204 }
1205 n--;
1206 }
1207 immediate_quit = 0;
1208 return (pos);
1209 }
1210 else /* non-RE case */
1211 {
1212 unsigned char *raw_pattern, *pat;
1213 int raw_pattern_size;
1214 int raw_pattern_size_byte;
1215 unsigned char *patbuf;
1216 int multibyte = !NILP (current_buffer->enable_multibyte_characters);
1217 unsigned char *base_pat;
1218 /* Set to positive if we find a non-ASCII char that need
1219 translation. Otherwise set to zero later. */
1220 int charset_base = -1;
1221 int boyer_moore_ok = 1;
1222
1223 /* MULTIBYTE says whether the text to be searched is multibyte.
1224 We must convert PATTERN to match that, or we will not really
1225 find things right. */
1226
1227 if (multibyte == STRING_MULTIBYTE (string))
1228 {
1229 raw_pattern = (unsigned char *) SDATA (string);
1230 raw_pattern_size = SCHARS (string);
1231 raw_pattern_size_byte = SBYTES (string);
1232 }
1233 else if (multibyte)
1234 {
1235 raw_pattern_size = SCHARS (string);
1236 raw_pattern_size_byte
1237 = count_size_as_multibyte (SDATA (string),
1238 raw_pattern_size);
1239 raw_pattern = (unsigned char *) alloca (raw_pattern_size_byte + 1);
1240 copy_text (SDATA (string), raw_pattern,
1241 SCHARS (string), 0, 1);
1242 }
1243 else
1244 {
1245 /* Converting multibyte to single-byte.
1246
1247 ??? Perhaps this conversion should be done in a special way
1248 by subtracting nonascii-insert-offset from each non-ASCII char,
1249 so that only the multibyte chars which really correspond to
1250 the chosen single-byte character set can possibly match. */
1251 raw_pattern_size = SCHARS (string);
1252 raw_pattern_size_byte = SCHARS (string);
1253 raw_pattern = (unsigned char *) alloca (raw_pattern_size + 1);
1254 copy_text (SDATA (string), raw_pattern,
1255 SBYTES (string), 1, 0);
1256 }
1257
1258 /* Copy and optionally translate the pattern. */
1259 len = raw_pattern_size;
1260 len_byte = raw_pattern_size_byte;
1261 patbuf = (unsigned char *) alloca (len_byte);
1262 pat = patbuf;
1263 base_pat = raw_pattern;
1264 if (multibyte)
1265 {
1266 /* Fill patbuf by translated characters in STRING while
1267 checking if we can use boyer-moore search. If TRT is
1268 non-nil, we can use boyer-moore search only if TRT can be
1269 represented by the byte array of 256 elements. For that,
1270 all non-ASCII case-equivalents of all case-senstive
1271 characters in STRING must belong to the same charset and
1272 row. */
1273
1274 while (--len >= 0)
1275 {
1276 unsigned char str_base[MAX_MULTIBYTE_LENGTH], *str;
1277 int c, translated, inverse;
1278 int in_charlen, charlen;
1279
1280 /* If we got here and the RE flag is set, it's because we're
1281 dealing with a regexp known to be trivial, so the backslash
1282 just quotes the next character. */
1283 if (RE && *base_pat == '\\')
1284 {
1285 len--;
1286 raw_pattern_size--;
1287 len_byte--;
1288 base_pat++;
1289 }
1290
1291 c = STRING_CHAR_AND_LENGTH (base_pat, len_byte, in_charlen);
1292
1293 if (NILP (trt))
1294 {
1295 str = base_pat;
1296 charlen = in_charlen;
1297 }
1298 else
1299 {
1300 /* Translate the character. */
1301 TRANSLATE (translated, trt, c);
1302 charlen = CHAR_STRING (translated, str_base);
1303 str = str_base;
1304
1305 /* Check if C has any other case-equivalents. */
1306 TRANSLATE (inverse, inverse_trt, c);
1307 /* If so, check if we can use boyer-moore. */
1308 if (c != inverse && boyer_moore_ok)
1309 {
1310 /* Check if all equivalents belong to the same
1311 charset & row. Note that the check of C
1312 itself is done by the last iteration. Note
1313 also that we don't have to check ASCII
1314 characters because boyer-moore search can
1315 always handle their translation. */
1316 while (1)
1317 {
1318 if (ASCII_BYTE_P (inverse))
1319 {
1320 if (charset_base > 0)
1321 {
1322 boyer_moore_ok = 0;
1323 break;
1324 }
1325 charset_base = 0;
1326 }
1327 else if (SINGLE_BYTE_CHAR_P (inverse))
1328 {
1329 /* Boyer-moore search can't handle a
1330 translation of an eight-bit
1331 character. */
1332 boyer_moore_ok = 0;
1333 break;
1334 }
1335 else if (charset_base < 0)
1336 charset_base = inverse & ~CHAR_FIELD3_MASK;
1337 else if ((inverse & ~CHAR_FIELD3_MASK)
1338 != charset_base)
1339 {
1340 boyer_moore_ok = 0;
1341 break;
1342 }
1343 if (c == inverse)
1344 break;
1345 TRANSLATE (inverse, inverse_trt, inverse);
1346 }
1347 }
1348 }
1349 if (charset_base < 0)
1350 charset_base = 0;
1351
1352 /* Store this character into the translated pattern. */
1353 bcopy (str, pat, charlen);
1354 pat += charlen;
1355 base_pat += in_charlen;
1356 len_byte -= in_charlen;
1357 }
1358 }
1359 else
1360 {
1361 /* Unibyte buffer. */
1362 charset_base = 0;
1363 while (--len >= 0)
1364 {
1365 int c, translated;
1366
1367 /* If we got here and the RE flag is set, it's because we're
1368 dealing with a regexp known to be trivial, so the backslash
1369 just quotes the next character. */
1370 if (RE && *base_pat == '\\')
1371 {
1372 len--;
1373 raw_pattern_size--;
1374 base_pat++;
1375 }
1376 c = *base_pat++;
1377 TRANSLATE (translated, trt, c);
1378 *pat++ = translated;
1379 }
1380 }
1381
1382 len_byte = pat - patbuf;
1383 len = raw_pattern_size;
1384 pat = base_pat = patbuf;
1385
1386 if (boyer_moore_ok)
1387 return boyer_moore (n, pat, len, len_byte, trt, inverse_trt,
1388 pos, pos_byte, lim, lim_byte,
1389 charset_base);
1390 else
1391 return simple_search (n, pat, len, len_byte, trt,
1392 pos, pos_byte, lim, lim_byte);
1393 }
1394 }
1395 \f
1396 /* Do a simple string search N times for the string PAT,
1397 whose length is LEN/LEN_BYTE,
1398 from buffer position POS/POS_BYTE until LIM/LIM_BYTE.
1399 TRT is the translation table.
1400
1401 Return the character position where the match is found.
1402 Otherwise, if M matches remained to be found, return -M.
1403
1404 This kind of search works regardless of what is in PAT and
1405 regardless of what is in TRT. It is used in cases where
1406 boyer_moore cannot work. */
1407
1408 static int
1409 simple_search (n, pat, len, len_byte, trt, pos, pos_byte, lim, lim_byte)
1410 int n;
1411 unsigned char *pat;
1412 int len, len_byte;
1413 Lisp_Object trt;
1414 int pos, pos_byte;
1415 int lim, lim_byte;
1416 {
1417 int multibyte = ! NILP (current_buffer->enable_multibyte_characters);
1418 int forward = n > 0;
1419
1420 if (lim > pos && multibyte)
1421 while (n > 0)
1422 {
1423 while (1)
1424 {
1425 /* Try matching at position POS. */
1426 int this_pos = pos;
1427 int this_pos_byte = pos_byte;
1428 int this_len = len;
1429 int this_len_byte = len_byte;
1430 unsigned char *p = pat;
1431 if (pos + len > lim)
1432 goto stop;
1433
1434 while (this_len > 0)
1435 {
1436 int charlen, buf_charlen;
1437 int pat_ch, buf_ch;
1438
1439 pat_ch = STRING_CHAR_AND_LENGTH (p, this_len_byte, charlen);
1440 buf_ch = STRING_CHAR_AND_LENGTH (BYTE_POS_ADDR (this_pos_byte),
1441 ZV_BYTE - this_pos_byte,
1442 buf_charlen);
1443 TRANSLATE (buf_ch, trt, buf_ch);
1444
1445 if (buf_ch != pat_ch)
1446 break;
1447
1448 this_len_byte -= charlen;
1449 this_len--;
1450 p += charlen;
1451
1452 this_pos_byte += buf_charlen;
1453 this_pos++;
1454 }
1455
1456 if (this_len == 0)
1457 {
1458 pos += len;
1459 pos_byte += len_byte;
1460 break;
1461 }
1462
1463 INC_BOTH (pos, pos_byte);
1464 }
1465
1466 n--;
1467 }
1468 else if (lim > pos)
1469 while (n > 0)
1470 {
1471 while (1)
1472 {
1473 /* Try matching at position POS. */
1474 int this_pos = pos;
1475 int this_len = len;
1476 unsigned char *p = pat;
1477
1478 if (pos + len > lim)
1479 goto stop;
1480
1481 while (this_len > 0)
1482 {
1483 int pat_ch = *p++;
1484 int buf_ch = FETCH_BYTE (this_pos);
1485 TRANSLATE (buf_ch, trt, buf_ch);
1486
1487 if (buf_ch != pat_ch)
1488 break;
1489
1490 this_len--;
1491 this_pos++;
1492 }
1493
1494 if (this_len == 0)
1495 {
1496 pos += len;
1497 break;
1498 }
1499
1500 pos++;
1501 }
1502
1503 n--;
1504 }
1505 /* Backwards search. */
1506 else if (lim < pos && multibyte)
1507 while (n < 0)
1508 {
1509 while (1)
1510 {
1511 /* Try matching at position POS. */
1512 int this_pos = pos - len;
1513 int this_pos_byte = pos_byte - len_byte;
1514 int this_len = len;
1515 int this_len_byte = len_byte;
1516 unsigned char *p = pat;
1517
1518 if (this_pos < lim || this_pos_byte < lim_byte)
1519 goto stop;
1520
1521 while (this_len > 0)
1522 {
1523 int charlen, buf_charlen;
1524 int pat_ch, buf_ch;
1525
1526 pat_ch = STRING_CHAR_AND_LENGTH (p, this_len_byte, charlen);
1527 buf_ch = STRING_CHAR_AND_LENGTH (BYTE_POS_ADDR (this_pos_byte),
1528 ZV_BYTE - this_pos_byte,
1529 buf_charlen);
1530 TRANSLATE (buf_ch, trt, buf_ch);
1531
1532 if (buf_ch != pat_ch)
1533 break;
1534
1535 this_len_byte -= charlen;
1536 this_len--;
1537 p += charlen;
1538 this_pos_byte += buf_charlen;
1539 this_pos++;
1540 }
1541
1542 if (this_len == 0)
1543 {
1544 pos -= len;
1545 pos_byte -= len_byte;
1546 break;
1547 }
1548
1549 DEC_BOTH (pos, pos_byte);
1550 }
1551
1552 n++;
1553 }
1554 else if (lim < pos)
1555 while (n < 0)
1556 {
1557 while (1)
1558 {
1559 /* Try matching at position POS. */
1560 int this_pos = pos - len;
1561 int this_len = len;
1562 unsigned char *p = pat;
1563
1564 if (pos - len < lim)
1565 goto stop;
1566
1567 while (this_len > 0)
1568 {
1569 int pat_ch = *p++;
1570 int buf_ch = FETCH_BYTE (this_pos);
1571 TRANSLATE (buf_ch, trt, buf_ch);
1572
1573 if (buf_ch != pat_ch)
1574 break;
1575 this_len--;
1576 this_pos++;
1577 }
1578
1579 if (this_len == 0)
1580 {
1581 pos -= len;
1582 break;
1583 }
1584
1585 pos--;
1586 }
1587
1588 n++;
1589 }
1590
1591 stop:
1592 if (n == 0)
1593 {
1594 if (forward)
1595 set_search_regs ((multibyte ? pos_byte : pos) - len_byte, len_byte);
1596 else
1597 set_search_regs (multibyte ? pos_byte : pos, len_byte);
1598
1599 return pos;
1600 }
1601 else if (n > 0)
1602 return -n;
1603 else
1604 return n;
1605 }
1606 \f
1607 /* Do Boyer-Moore search N times for the string BASE_PAT,
1608 whose length is LEN/LEN_BYTE,
1609 from buffer position POS/POS_BYTE until LIM/LIM_BYTE.
1610 DIRECTION says which direction we search in.
1611 TRT and INVERSE_TRT are translation tables.
1612 Characters in PAT are already translated by TRT.
1613
1614 This kind of search works if all the characters in BASE_PAT that
1615 have nontrivial translation are the same aside from the last byte.
1616 This makes it possible to translate just the last byte of a
1617 character, and do so after just a simple test of the context.
1618 CHARSET_BASE is nonzero if there is such a non-ASCII character.
1619
1620 If that criterion is not satisfied, do not call this function. */
1621
1622 static int
1623 boyer_moore (n, base_pat, len, len_byte, trt, inverse_trt,
1624 pos, pos_byte, lim, lim_byte, charset_base)
1625 int n;
1626 unsigned char *base_pat;
1627 int len, len_byte;
1628 Lisp_Object trt;
1629 Lisp_Object inverse_trt;
1630 int pos, pos_byte;
1631 int lim, lim_byte;
1632 int charset_base;
1633 {
1634 int direction = ((n > 0) ? 1 : -1);
1635 register int dirlen;
1636 int infinity, limit, stride_for_teases = 0;
1637 register int *BM_tab;
1638 int *BM_tab_base;
1639 register unsigned char *cursor, *p_limit;
1640 register int i, j;
1641 unsigned char *pat, *pat_end;
1642 int multibyte = ! NILP (current_buffer->enable_multibyte_characters);
1643
1644 unsigned char simple_translate[0400];
1645 /* These are set to the preceding bytes of a byte to be translated
1646 if charset_base is nonzero. As the maximum byte length of a
1647 multibyte character is 4, we have to check at most three previous
1648 bytes. */
1649 int translate_prev_byte1 = 0;
1650 int translate_prev_byte2 = 0;
1651 int translate_prev_byte3 = 0;
1652
1653 #ifdef C_ALLOCA
1654 int BM_tab_space[0400];
1655 BM_tab = &BM_tab_space[0];
1656 #else
1657 BM_tab = (int *) alloca (0400 * sizeof (int));
1658 #endif
1659 /* The general approach is that we are going to maintain that we know */
1660 /* the first (closest to the present position, in whatever direction */
1661 /* we're searching) character that could possibly be the last */
1662 /* (furthest from present position) character of a valid match. We */
1663 /* advance the state of our knowledge by looking at that character */
1664 /* and seeing whether it indeed matches the last character of the */
1665 /* pattern. If it does, we take a closer look. If it does not, we */
1666 /* move our pointer (to putative last characters) as far as is */
1667 /* logically possible. This amount of movement, which I call a */
1668 /* stride, will be the length of the pattern if the actual character */
1669 /* appears nowhere in the pattern, otherwise it will be the distance */
1670 /* from the last occurrence of that character to the end of the */
1671 /* pattern. */
1672 /* As a coding trick, an enormous stride is coded into the table for */
1673 /* characters that match the last character. This allows use of only */
1674 /* a single test, a test for having gone past the end of the */
1675 /* permissible match region, to test for both possible matches (when */
1676 /* the stride goes past the end immediately) and failure to */
1677 /* match (where you get nudged past the end one stride at a time). */
1678
1679 /* Here we make a "mickey mouse" BM table. The stride of the search */
1680 /* is determined only by the last character of the putative match. */
1681 /* If that character does not match, we will stride the proper */
1682 /* distance to propose a match that superimposes it on the last */
1683 /* instance of a character that matches it (per trt), or misses */
1684 /* it entirely if there is none. */
1685
1686 dirlen = len_byte * direction;
1687 infinity = dirlen - (lim_byte + pos_byte + len_byte + len_byte) * direction;
1688
1689 /* Record position after the end of the pattern. */
1690 pat_end = base_pat + len_byte;
1691 /* BASE_PAT points to a character that we start scanning from.
1692 It is the first character in a forward search,
1693 the last character in a backward search. */
1694 if (direction < 0)
1695 base_pat = pat_end - 1;
1696
1697 BM_tab_base = BM_tab;
1698 BM_tab += 0400;
1699 j = dirlen; /* to get it in a register */
1700 /* A character that does not appear in the pattern induces a */
1701 /* stride equal to the pattern length. */
1702 while (BM_tab_base != BM_tab)
1703 {
1704 *--BM_tab = j;
1705 *--BM_tab = j;
1706 *--BM_tab = j;
1707 *--BM_tab = j;
1708 }
1709
1710 /* We use this for translation, instead of TRT itself.
1711 We fill this in to handle the characters that actually
1712 occur in the pattern. Others don't matter anyway! */
1713 bzero (simple_translate, sizeof simple_translate);
1714 for (i = 0; i < 0400; i++)
1715 simple_translate[i] = i;
1716
1717 if (charset_base)
1718 {
1719 /* Setup translate_prev_byte1/2/3 from CHARSET_BASE. Only a
1720 byte following them are the target of translation. */
1721 int sample_char = charset_base | 0x20;
1722 unsigned char str[MAX_MULTIBYTE_LENGTH];
1723 int len = CHAR_STRING (sample_char, str);
1724
1725 translate_prev_byte1 = str[len - 2];
1726 if (len > 2)
1727 {
1728 translate_prev_byte2 = str[len - 3];
1729 if (len > 3)
1730 translate_prev_byte3 = str[len - 4];
1731 }
1732 }
1733
1734 i = 0;
1735 while (i != infinity)
1736 {
1737 unsigned char *ptr = base_pat + i;
1738 i += direction;
1739 if (i == dirlen)
1740 i = infinity;
1741 if (! NILP (trt))
1742 {
1743 /* If the byte currently looking at is the last of a
1744 character to check case-equivalents, set CH to that
1745 character. An ASCII character and a non-ASCII character
1746 matching with CHARSET_BASE are to be checked. */
1747 int ch = -1;
1748
1749 if (ASCII_BYTE_P (*ptr) || ! multibyte)
1750 ch = *ptr;
1751 else if (charset_base
1752 && ((pat_end - ptr) == 1 || CHAR_HEAD_P (ptr[1])))
1753 {
1754 unsigned char *charstart = ptr - 1;
1755
1756 while (! (CHAR_HEAD_P (*charstart)))
1757 charstart--;
1758 ch = STRING_CHAR (charstart, ptr - charstart + 1);
1759 if (charset_base != (ch & ~CHAR_FIELD3_MASK))
1760 ch = -1;
1761 }
1762
1763 if (ch >= 0400)
1764 j = ((unsigned char) ch) | 0200;
1765 else
1766 j = *ptr;
1767
1768 if (i == infinity)
1769 stride_for_teases = BM_tab[j];
1770
1771 BM_tab[j] = dirlen - i;
1772 /* A translation table is accompanied by its inverse -- see */
1773 /* comment following downcase_table for details */
1774 if (ch >= 0)
1775 {
1776 int starting_ch = ch;
1777 int starting_j = j;
1778
1779 while (1)
1780 {
1781 TRANSLATE (ch, inverse_trt, ch);
1782 if (ch >= 0400)
1783 j = ((unsigned char) ch) | 0200;
1784 else
1785 j = (unsigned char) ch;
1786
1787 /* For all the characters that map into CH,
1788 set up simple_translate to map the last byte
1789 into STARTING_J. */
1790 simple_translate[j] = starting_j;
1791 if (ch == starting_ch)
1792 break;
1793 BM_tab[j] = dirlen - i;
1794 }
1795 }
1796 }
1797 else
1798 {
1799 j = *ptr;
1800
1801 if (i == infinity)
1802 stride_for_teases = BM_tab[j];
1803 BM_tab[j] = dirlen - i;
1804 }
1805 /* stride_for_teases tells how much to stride if we get a */
1806 /* match on the far character but are subsequently */
1807 /* disappointed, by recording what the stride would have been */
1808 /* for that character if the last character had been */
1809 /* different. */
1810 }
1811 infinity = dirlen - infinity;
1812 pos_byte += dirlen - ((direction > 0) ? direction : 0);
1813 /* loop invariant - POS_BYTE points at where last char (first
1814 char if reverse) of pattern would align in a possible match. */
1815 while (n != 0)
1816 {
1817 int tail_end;
1818 unsigned char *tail_end_ptr;
1819
1820 /* It's been reported that some (broken) compiler thinks that
1821 Boolean expressions in an arithmetic context are unsigned.
1822 Using an explicit ?1:0 prevents this. */
1823 if ((lim_byte - pos_byte - ((direction > 0) ? 1 : 0)) * direction
1824 < 0)
1825 return (n * (0 - direction));
1826 /* First we do the part we can by pointers (maybe nothing) */
1827 QUIT;
1828 pat = base_pat;
1829 limit = pos_byte - dirlen + direction;
1830 if (direction > 0)
1831 {
1832 limit = BUFFER_CEILING_OF (limit);
1833 /* LIMIT is now the last (not beyond-last!) value POS_BYTE
1834 can take on without hitting edge of buffer or the gap. */
1835 limit = min (limit, pos_byte + 20000);
1836 limit = min (limit, lim_byte - 1);
1837 }
1838 else
1839 {
1840 limit = BUFFER_FLOOR_OF (limit);
1841 /* LIMIT is now the last (not beyond-last!) value POS_BYTE
1842 can take on without hitting edge of buffer or the gap. */
1843 limit = max (limit, pos_byte - 20000);
1844 limit = max (limit, lim_byte);
1845 }
1846 tail_end = BUFFER_CEILING_OF (pos_byte) + 1;
1847 tail_end_ptr = BYTE_POS_ADDR (tail_end);
1848
1849 if ((limit - pos_byte) * direction > 20)
1850 {
1851 unsigned char *p2;
1852
1853 p_limit = BYTE_POS_ADDR (limit);
1854 p2 = (cursor = BYTE_POS_ADDR (pos_byte));
1855 /* In this loop, pos + cursor - p2 is the surrogate for pos */
1856 while (1) /* use one cursor setting as long as i can */
1857 {
1858 if (direction > 0) /* worth duplicating */
1859 {
1860 /* Use signed comparison if appropriate
1861 to make cursor+infinity sure to be > p_limit.
1862 Assuming that the buffer lies in a range of addresses
1863 that are all "positive" (as ints) or all "negative",
1864 either kind of comparison will work as long
1865 as we don't step by infinity. So pick the kind
1866 that works when we do step by infinity. */
1867 if ((EMACS_INT) (p_limit + infinity) > (EMACS_INT) p_limit)
1868 while ((EMACS_INT) cursor <= (EMACS_INT) p_limit)
1869 cursor += BM_tab[*cursor];
1870 else
1871 while ((EMACS_UINT) cursor <= (EMACS_UINT) p_limit)
1872 cursor += BM_tab[*cursor];
1873 }
1874 else
1875 {
1876 if ((EMACS_INT) (p_limit + infinity) < (EMACS_INT) p_limit)
1877 while ((EMACS_INT) cursor >= (EMACS_INT) p_limit)
1878 cursor += BM_tab[*cursor];
1879 else
1880 while ((EMACS_UINT) cursor >= (EMACS_UINT) p_limit)
1881 cursor += BM_tab[*cursor];
1882 }
1883 /* If you are here, cursor is beyond the end of the searched region. */
1884 /* This can happen if you match on the far character of the pattern, */
1885 /* because the "stride" of that character is infinity, a number able */
1886 /* to throw you well beyond the end of the search. It can also */
1887 /* happen if you fail to match within the permitted region and would */
1888 /* otherwise try a character beyond that region */
1889 if ((cursor - p_limit) * direction <= len_byte)
1890 break; /* a small overrun is genuine */
1891 cursor -= infinity; /* large overrun = hit */
1892 i = dirlen - direction;
1893 if (! NILP (trt))
1894 {
1895 while ((i -= direction) + direction != 0)
1896 {
1897 int ch;
1898 cursor -= direction;
1899 /* Translate only the last byte of a character. */
1900 if (! multibyte
1901 || ((cursor == tail_end_ptr
1902 || CHAR_HEAD_P (cursor[1]))
1903 && (CHAR_HEAD_P (cursor[0])
1904 /* Check if this is the last byte of
1905 a translable character. */
1906 || (translate_prev_byte1 == cursor[-1]
1907 && (CHAR_HEAD_P (translate_prev_byte1)
1908 || (translate_prev_byte2 == cursor[-2]
1909 && (CHAR_HEAD_P (translate_prev_byte2)
1910 || (translate_prev_byte3 == cursor[-3]))))))))
1911 ch = simple_translate[*cursor];
1912 else
1913 ch = *cursor;
1914 if (pat[i] != ch)
1915 break;
1916 }
1917 }
1918 else
1919 {
1920 while ((i -= direction) + direction != 0)
1921 {
1922 cursor -= direction;
1923 if (pat[i] != *cursor)
1924 break;
1925 }
1926 }
1927 cursor += dirlen - i - direction; /* fix cursor */
1928 if (i + direction == 0)
1929 {
1930 int position;
1931
1932 cursor -= direction;
1933
1934 position = pos_byte + cursor - p2 + ((direction > 0)
1935 ? 1 - len_byte : 0);
1936 set_search_regs (position, len_byte);
1937
1938 if ((n -= direction) != 0)
1939 cursor += dirlen; /* to resume search */
1940 else
1941 return ((direction > 0)
1942 ? search_regs.end[0] : search_regs.start[0]);
1943 }
1944 else
1945 cursor += stride_for_teases; /* <sigh> we lose - */
1946 }
1947 pos_byte += cursor - p2;
1948 }
1949 else
1950 /* Now we'll pick up a clump that has to be done the hard */
1951 /* way because it covers a discontinuity */
1952 {
1953 limit = ((direction > 0)
1954 ? BUFFER_CEILING_OF (pos_byte - dirlen + 1)
1955 : BUFFER_FLOOR_OF (pos_byte - dirlen - 1));
1956 limit = ((direction > 0)
1957 ? min (limit + len_byte, lim_byte - 1)
1958 : max (limit - len_byte, lim_byte));
1959 /* LIMIT is now the last value POS_BYTE can have
1960 and still be valid for a possible match. */
1961 while (1)
1962 {
1963 /* This loop can be coded for space rather than */
1964 /* speed because it will usually run only once. */
1965 /* (the reach is at most len + 21, and typically */
1966 /* does not exceed len) */
1967 while ((limit - pos_byte) * direction >= 0)
1968 pos_byte += BM_tab[FETCH_BYTE (pos_byte)];
1969 /* now run the same tests to distinguish going off the */
1970 /* end, a match or a phony match. */
1971 if ((pos_byte - limit) * direction <= len_byte)
1972 break; /* ran off the end */
1973 /* Found what might be a match.
1974 Set POS_BYTE back to last (first if reverse) pos. */
1975 pos_byte -= infinity;
1976 i = dirlen - direction;
1977 while ((i -= direction) + direction != 0)
1978 {
1979 int ch;
1980 unsigned char *ptr;
1981 pos_byte -= direction;
1982 ptr = BYTE_POS_ADDR (pos_byte);
1983 /* Translate only the last byte of a character. */
1984 if (! multibyte
1985 || ((ptr == tail_end_ptr
1986 || CHAR_HEAD_P (ptr[1]))
1987 && (CHAR_HEAD_P (ptr[0])
1988 /* Check if this is the last byte of a
1989 translable character. */
1990 || (translate_prev_byte1 == ptr[-1]
1991 && (CHAR_HEAD_P (translate_prev_byte1)
1992 || (translate_prev_byte2 == ptr[-2]
1993 && (CHAR_HEAD_P (translate_prev_byte2)
1994 || translate_prev_byte3 == ptr[-3])))))))
1995 ch = simple_translate[*ptr];
1996 else
1997 ch = *ptr;
1998 if (pat[i] != ch)
1999 break;
2000 }
2001 /* Above loop has moved POS_BYTE part or all the way
2002 back to the first pos (last pos if reverse).
2003 Set it once again at the last (first if reverse) char. */
2004 pos_byte += dirlen - i- direction;
2005 if (i + direction == 0)
2006 {
2007 int position;
2008 pos_byte -= direction;
2009
2010 position = pos_byte + ((direction > 0) ? 1 - len_byte : 0);
2011
2012 set_search_regs (position, len_byte);
2013
2014 if ((n -= direction) != 0)
2015 pos_byte += dirlen; /* to resume search */
2016 else
2017 return ((direction > 0)
2018 ? search_regs.end[0] : search_regs.start[0]);
2019 }
2020 else
2021 pos_byte += stride_for_teases;
2022 }
2023 }
2024 /* We have done one clump. Can we continue? */
2025 if ((lim_byte - pos_byte) * direction < 0)
2026 return ((0 - n) * direction);
2027 }
2028 return BYTE_TO_CHAR (pos_byte);
2029 }
2030
2031 /* Record beginning BEG_BYTE and end BEG_BYTE + NBYTES
2032 for the overall match just found in the current buffer.
2033 Also clear out the match data for registers 1 and up. */
2034
2035 static void
2036 set_search_regs (beg_byte, nbytes)
2037 int beg_byte, nbytes;
2038 {
2039 int i;
2040
2041 /* Make sure we have registers in which to store
2042 the match position. */
2043 if (search_regs.num_regs == 0)
2044 {
2045 search_regs.start = (regoff_t *) xmalloc (2 * sizeof (regoff_t));
2046 search_regs.end = (regoff_t *) xmalloc (2 * sizeof (regoff_t));
2047 search_regs.num_regs = 2;
2048 }
2049
2050 /* Clear out the other registers. */
2051 for (i = 1; i < search_regs.num_regs; i++)
2052 {
2053 search_regs.start[i] = -1;
2054 search_regs.end[i] = -1;
2055 }
2056
2057 search_regs.start[0] = BYTE_TO_CHAR (beg_byte);
2058 search_regs.end[0] = BYTE_TO_CHAR (beg_byte + nbytes);
2059 XSETBUFFER (last_thing_searched, current_buffer);
2060 }
2061 \f
2062 /* Given a string of words separated by word delimiters,
2063 compute a regexp that matches those exact words
2064 separated by arbitrary punctuation. */
2065
2066 static Lisp_Object
2067 wordify (string)
2068 Lisp_Object string;
2069 {
2070 register unsigned char *p, *o;
2071 register int i, i_byte, len, punct_count = 0, word_count = 0;
2072 Lisp_Object val;
2073 int prev_c = 0;
2074 int adjust;
2075
2076 CHECK_STRING (string);
2077 p = SDATA (string);
2078 len = SCHARS (string);
2079
2080 for (i = 0, i_byte = 0; i < len; )
2081 {
2082 int c;
2083
2084 FETCH_STRING_CHAR_ADVANCE (c, string, i, i_byte);
2085
2086 if (SYNTAX (c) != Sword)
2087 {
2088 punct_count++;
2089 if (i > 0 && SYNTAX (prev_c) == Sword)
2090 word_count++;
2091 }
2092
2093 prev_c = c;
2094 }
2095
2096 if (SYNTAX (prev_c) == Sword)
2097 word_count++;
2098 if (!word_count)
2099 return empty_string;
2100
2101 adjust = - punct_count + 5 * (word_count - 1) + 4;
2102 if (STRING_MULTIBYTE (string))
2103 val = make_uninit_multibyte_string (len + adjust,
2104 SBYTES (string)
2105 + adjust);
2106 else
2107 val = make_uninit_string (len + adjust);
2108
2109 o = SDATA (val);
2110 *o++ = '\\';
2111 *o++ = 'b';
2112 prev_c = 0;
2113
2114 for (i = 0, i_byte = 0; i < len; )
2115 {
2116 int c;
2117 int i_byte_orig = i_byte;
2118
2119 FETCH_STRING_CHAR_ADVANCE (c, string, i, i_byte);
2120
2121 if (SYNTAX (c) == Sword)
2122 {
2123 bcopy (SDATA (string) + i_byte_orig, o,
2124 i_byte - i_byte_orig);
2125 o += i_byte - i_byte_orig;
2126 }
2127 else if (i > 0 && SYNTAX (prev_c) == Sword && --word_count)
2128 {
2129 *o++ = '\\';
2130 *o++ = 'W';
2131 *o++ = '\\';
2132 *o++ = 'W';
2133 *o++ = '*';
2134 }
2135
2136 prev_c = c;
2137 }
2138
2139 *o++ = '\\';
2140 *o++ = 'b';
2141
2142 return val;
2143 }
2144 \f
2145 DEFUN ("search-backward", Fsearch_backward, Ssearch_backward, 1, 4,
2146 "MSearch backward: ",
2147 doc: /* Search backward from point for STRING.
2148 Set point to the beginning of the occurrence found, and return point.
2149 An optional second argument bounds the search; it is a buffer position.
2150 The match found must not extend before that position.
2151 Optional third argument, if t, means if fail just return nil (no error).
2152 If not nil and not t, position at limit of search and return nil.
2153 Optional fourth argument is repeat count--search for successive occurrences.
2154
2155 Search case-sensitivity is determined by the value of the variable
2156 `case-fold-search', which see.
2157
2158 See also the functions `match-beginning', `match-end' and `replace-match'. */)
2159 (string, bound, noerror, count)
2160 Lisp_Object string, bound, noerror, count;
2161 {
2162 return search_command (string, bound, noerror, count, -1, 0, 0);
2163 }
2164
2165 DEFUN ("search-forward", Fsearch_forward, Ssearch_forward, 1, 4, "MSearch: ",
2166 doc: /* Search forward from point for STRING.
2167 Set point to the end of the occurrence found, and return point.
2168 An optional second argument bounds the search; it is a buffer position.
2169 The match found must not extend after that position. A value of nil is
2170 equivalent to (point-max).
2171 Optional third argument, if t, means if fail just return nil (no error).
2172 If not nil and not t, move to limit of search and return nil.
2173 Optional fourth argument is repeat count--search for successive occurrences.
2174
2175 Search case-sensitivity is determined by the value of the variable
2176 `case-fold-search', which see.
2177
2178 See also the functions `match-beginning', `match-end' and `replace-match'. */)
2179 (string, bound, noerror, count)
2180 Lisp_Object string, bound, noerror, count;
2181 {
2182 return search_command (string, bound, noerror, count, 1, 0, 0);
2183 }
2184
2185 DEFUN ("word-search-backward", Fword_search_backward, Sword_search_backward, 1, 4,
2186 "sWord search backward: ",
2187 doc: /* Search backward from point for STRING, ignoring differences in punctuation.
2188 Set point to the beginning of the occurrence found, and return point.
2189 An optional second argument bounds the search; it is a buffer position.
2190 The match found must not extend before that position.
2191 Optional third argument, if t, means if fail just return nil (no error).
2192 If not nil and not t, move to limit of search and return nil.
2193 Optional fourth argument is repeat count--search for successive occurrences. */)
2194 (string, bound, noerror, count)
2195 Lisp_Object string, bound, noerror, count;
2196 {
2197 return search_command (wordify (string), bound, noerror, count, -1, 1, 0);
2198 }
2199
2200 DEFUN ("word-search-forward", Fword_search_forward, Sword_search_forward, 1, 4,
2201 "sWord search: ",
2202 doc: /* Search forward from point for STRING, ignoring differences in punctuation.
2203 Set point to the end of the occurrence found, and return point.
2204 An optional second argument bounds the search; it is a buffer position.
2205 The match found must not extend after that position.
2206 Optional third argument, if t, means if fail just return nil (no error).
2207 If not nil and not t, move to limit of search and return nil.
2208 Optional fourth argument is repeat count--search for successive occurrences. */)
2209 (string, bound, noerror, count)
2210 Lisp_Object string, bound, noerror, count;
2211 {
2212 return search_command (wordify (string), bound, noerror, count, 1, 1, 0);
2213 }
2214
2215 DEFUN ("re-search-backward", Fre_search_backward, Sre_search_backward, 1, 4,
2216 "sRE search backward: ",
2217 doc: /* Search backward from point for match for regular expression REGEXP.
2218 Set point to the beginning of the match, and return point.
2219 The match found is the one starting last in the buffer
2220 and yet ending before the origin of the search.
2221 An optional second argument bounds the search; it is a buffer position.
2222 The match found must start at or after that position.
2223 Optional third argument, if t, means if fail just return nil (no error).
2224 If not nil and not t, move to limit of search and return nil.
2225 Optional fourth argument is repeat count--search for successive occurrences.
2226 See also the functions `match-beginning', `match-end', `match-string',
2227 and `replace-match'. */)
2228 (regexp, bound, noerror, count)
2229 Lisp_Object regexp, bound, noerror, count;
2230 {
2231 return search_command (regexp, bound, noerror, count, -1, 1, 0);
2232 }
2233
2234 DEFUN ("re-search-forward", Fre_search_forward, Sre_search_forward, 1, 4,
2235 "sRE search: ",
2236 doc: /* Search forward from point for regular expression REGEXP.
2237 Set point to the end of the occurrence found, and return point.
2238 An optional second argument bounds the search; it is a buffer position.
2239 The match found must not extend after that position.
2240 Optional third argument, if t, means if fail just return nil (no error).
2241 If not nil and not t, move to limit of search and return nil.
2242 Optional fourth argument is repeat count--search for successive occurrences.
2243 See also the functions `match-beginning', `match-end', `match-string',
2244 and `replace-match'. */)
2245 (regexp, bound, noerror, count)
2246 Lisp_Object regexp, bound, noerror, count;
2247 {
2248 return search_command (regexp, bound, noerror, count, 1, 1, 0);
2249 }
2250
2251 DEFUN ("posix-search-backward", Fposix_search_backward, Sposix_search_backward, 1, 4,
2252 "sPosix search backward: ",
2253 doc: /* Search backward from point for match for regular expression REGEXP.
2254 Find the longest match in accord with Posix regular expression rules.
2255 Set point to the beginning of the match, and return point.
2256 The match found is the one starting last in the buffer
2257 and yet ending before the origin of the search.
2258 An optional second argument bounds the search; it is a buffer position.
2259 The match found must start at or after that position.
2260 Optional third argument, if t, means if fail just return nil (no error).
2261 If not nil and not t, move to limit of search and return nil.
2262 Optional fourth argument is repeat count--search for successive occurrences.
2263 See also the functions `match-beginning', `match-end', `match-string',
2264 and `replace-match'. */)
2265 (regexp, bound, noerror, count)
2266 Lisp_Object regexp, bound, noerror, count;
2267 {
2268 return search_command (regexp, bound, noerror, count, -1, 1, 1);
2269 }
2270
2271 DEFUN ("posix-search-forward", Fposix_search_forward, Sposix_search_forward, 1, 4,
2272 "sPosix search: ",
2273 doc: /* Search forward from point for regular expression REGEXP.
2274 Find the longest match in accord with Posix regular expression rules.
2275 Set point to the end of the occurrence found, and return point.
2276 An optional second argument bounds the search; it is a buffer position.
2277 The match found must not extend after that position.
2278 Optional third argument, if t, means if fail just return nil (no error).
2279 If not nil and not t, move to limit of search and return nil.
2280 Optional fourth argument is repeat count--search for successive occurrences.
2281 See also the functions `match-beginning', `match-end', `match-string',
2282 and `replace-match'. */)
2283 (regexp, bound, noerror, count)
2284 Lisp_Object regexp, bound, noerror, count;
2285 {
2286 return search_command (regexp, bound, noerror, count, 1, 1, 1);
2287 }
2288 \f
2289 DEFUN ("replace-match", Freplace_match, Sreplace_match, 1, 5, 0,
2290 doc: /* Replace text matched by last search with NEWTEXT.
2291 Leave point at the end of the replacement text.
2292
2293 If second arg FIXEDCASE is non-nil, do not alter case of replacement text.
2294 Otherwise maybe capitalize the whole text, or maybe just word initials,
2295 based on the replaced text.
2296 If the replaced text has only capital letters
2297 and has at least one multiletter word, convert NEWTEXT to all caps.
2298 Otherwise if all words are capitalized in the replaced text,
2299 capitalize each word in NEWTEXT.
2300
2301 If third arg LITERAL is non-nil, insert NEWTEXT literally.
2302 Otherwise treat `\\' as special:
2303 `\\&' in NEWTEXT means substitute original matched text.
2304 `\\N' means substitute what matched the Nth `\\(...\\)'.
2305 If Nth parens didn't match, substitute nothing.
2306 `\\\\' means insert one `\\'.
2307 Case conversion does not apply to these substitutions.
2308
2309 FIXEDCASE and LITERAL are optional arguments.
2310
2311 The optional fourth argument STRING can be a string to modify.
2312 This is meaningful when the previous match was done against STRING,
2313 using `string-match'. When used this way, `replace-match'
2314 creates and returns a new string made by copying STRING and replacing
2315 the part of STRING that was matched.
2316
2317 The optional fifth argument SUBEXP specifies a subexpression;
2318 it says to replace just that subexpression with NEWTEXT,
2319 rather than replacing the entire matched text.
2320 This is, in a vague sense, the inverse of using `\\N' in NEWTEXT;
2321 `\\N' copies subexp N into NEWTEXT, but using N as SUBEXP puts
2322 NEWTEXT in place of subexp N.
2323 This is useful only after a regular expression search or match,
2324 since only regular expressions have distinguished subexpressions. */)
2325 (newtext, fixedcase, literal, string, subexp)
2326 Lisp_Object newtext, fixedcase, literal, string, subexp;
2327 {
2328 enum { nochange, all_caps, cap_initial } case_action;
2329 register int pos, pos_byte;
2330 int some_multiletter_word;
2331 int some_lowercase;
2332 int some_uppercase;
2333 int some_nonuppercase_initial;
2334 register int c, prevc;
2335 int sub;
2336 int opoint, newpoint;
2337
2338 CHECK_STRING (newtext);
2339
2340 if (! NILP (string))
2341 CHECK_STRING (string);
2342
2343 case_action = nochange; /* We tried an initialization */
2344 /* but some C compilers blew it */
2345
2346 if (search_regs.num_regs <= 0)
2347 error ("`replace-match' called before any match found");
2348
2349 if (NILP (subexp))
2350 sub = 0;
2351 else
2352 {
2353 CHECK_NUMBER (subexp);
2354 sub = XINT (subexp);
2355 if (sub < 0 || sub >= search_regs.num_regs)
2356 args_out_of_range (subexp, make_number (search_regs.num_regs));
2357 }
2358
2359 if (NILP (string))
2360 {
2361 if (search_regs.start[sub] < BEGV
2362 || search_regs.start[sub] > search_regs.end[sub]
2363 || search_regs.end[sub] > ZV)
2364 args_out_of_range (make_number (search_regs.start[sub]),
2365 make_number (search_regs.end[sub]));
2366 }
2367 else
2368 {
2369 if (search_regs.start[sub] < 0
2370 || search_regs.start[sub] > search_regs.end[sub]
2371 || search_regs.end[sub] > SCHARS (string))
2372 args_out_of_range (make_number (search_regs.start[sub]),
2373 make_number (search_regs.end[sub]));
2374 }
2375
2376 if (NILP (fixedcase))
2377 {
2378 /* Decide how to casify by examining the matched text. */
2379 int last;
2380
2381 pos = search_regs.start[sub];
2382 last = search_regs.end[sub];
2383
2384 if (NILP (string))
2385 pos_byte = CHAR_TO_BYTE (pos);
2386 else
2387 pos_byte = string_char_to_byte (string, pos);
2388
2389 prevc = '\n';
2390 case_action = all_caps;
2391
2392 /* some_multiletter_word is set nonzero if any original word
2393 is more than one letter long. */
2394 some_multiletter_word = 0;
2395 some_lowercase = 0;
2396 some_nonuppercase_initial = 0;
2397 some_uppercase = 0;
2398
2399 while (pos < last)
2400 {
2401 if (NILP (string))
2402 {
2403 c = FETCH_CHAR (pos_byte);
2404 INC_BOTH (pos, pos_byte);
2405 }
2406 else
2407 FETCH_STRING_CHAR_ADVANCE (c, string, pos, pos_byte);
2408
2409 if (LOWERCASEP (c))
2410 {
2411 /* Cannot be all caps if any original char is lower case */
2412
2413 some_lowercase = 1;
2414 if (SYNTAX (prevc) != Sword)
2415 some_nonuppercase_initial = 1;
2416 else
2417 some_multiletter_word = 1;
2418 }
2419 else if (UPPERCASEP (c))
2420 {
2421 some_uppercase = 1;
2422 if (SYNTAX (prevc) != Sword)
2423 ;
2424 else
2425 some_multiletter_word = 1;
2426 }
2427 else
2428 {
2429 /* If the initial is a caseless word constituent,
2430 treat that like a lowercase initial. */
2431 if (SYNTAX (prevc) != Sword)
2432 some_nonuppercase_initial = 1;
2433 }
2434
2435 prevc = c;
2436 }
2437
2438 /* Convert to all caps if the old text is all caps
2439 and has at least one multiletter word. */
2440 if (! some_lowercase && some_multiletter_word)
2441 case_action = all_caps;
2442 /* Capitalize each word, if the old text has all capitalized words. */
2443 else if (!some_nonuppercase_initial && some_multiletter_word)
2444 case_action = cap_initial;
2445 else if (!some_nonuppercase_initial && some_uppercase)
2446 /* Should x -> yz, operating on X, give Yz or YZ?
2447 We'll assume the latter. */
2448 case_action = all_caps;
2449 else
2450 case_action = nochange;
2451 }
2452
2453 /* Do replacement in a string. */
2454 if (!NILP (string))
2455 {
2456 Lisp_Object before, after;
2457
2458 before = Fsubstring (string, make_number (0),
2459 make_number (search_regs.start[sub]));
2460 after = Fsubstring (string, make_number (search_regs.end[sub]), Qnil);
2461
2462 /* Substitute parts of the match into NEWTEXT
2463 if desired. */
2464 if (NILP (literal))
2465 {
2466 int lastpos = 0;
2467 int lastpos_byte = 0;
2468 /* We build up the substituted string in ACCUM. */
2469 Lisp_Object accum;
2470 Lisp_Object middle;
2471 int length = SBYTES (newtext);
2472
2473 accum = Qnil;
2474
2475 for (pos_byte = 0, pos = 0; pos_byte < length;)
2476 {
2477 int substart = -1;
2478 int subend = 0;
2479 int delbackslash = 0;
2480
2481 FETCH_STRING_CHAR_ADVANCE (c, newtext, pos, pos_byte);
2482
2483 if (c == '\\')
2484 {
2485 FETCH_STRING_CHAR_ADVANCE (c, newtext, pos, pos_byte);
2486
2487 if (c == '&')
2488 {
2489 substart = search_regs.start[sub];
2490 subend = search_regs.end[sub];
2491 }
2492 else if (c >= '1' && c <= '9')
2493 {
2494 if (search_regs.start[c - '0'] >= 0
2495 && c <= search_regs.num_regs + '0')
2496 {
2497 substart = search_regs.start[c - '0'];
2498 subend = search_regs.end[c - '0'];
2499 }
2500 else
2501 {
2502 /* If that subexp did not match,
2503 replace \\N with nothing. */
2504 substart = 0;
2505 subend = 0;
2506 }
2507 }
2508 else if (c == '\\')
2509 delbackslash = 1;
2510 else
2511 error ("Invalid use of `\\' in replacement text");
2512 }
2513 if (substart >= 0)
2514 {
2515 if (pos - 2 != lastpos)
2516 middle = substring_both (newtext, lastpos,
2517 lastpos_byte,
2518 pos - 2, pos_byte - 2);
2519 else
2520 middle = Qnil;
2521 accum = concat3 (accum, middle,
2522 Fsubstring (string,
2523 make_number (substart),
2524 make_number (subend)));
2525 lastpos = pos;
2526 lastpos_byte = pos_byte;
2527 }
2528 else if (delbackslash)
2529 {
2530 middle = substring_both (newtext, lastpos,
2531 lastpos_byte,
2532 pos - 1, pos_byte - 1);
2533
2534 accum = concat2 (accum, middle);
2535 lastpos = pos;
2536 lastpos_byte = pos_byte;
2537 }
2538 }
2539
2540 if (pos != lastpos)
2541 middle = substring_both (newtext, lastpos,
2542 lastpos_byte,
2543 pos, pos_byte);
2544 else
2545 middle = Qnil;
2546
2547 newtext = concat2 (accum, middle);
2548 }
2549
2550 /* Do case substitution in NEWTEXT if desired. */
2551 if (case_action == all_caps)
2552 newtext = Fupcase (newtext);
2553 else if (case_action == cap_initial)
2554 newtext = Fupcase_initials (newtext);
2555
2556 return concat3 (before, newtext, after);
2557 }
2558
2559 /* Record point, then move (quietly) to the start of the match. */
2560 if (PT >= search_regs.end[sub])
2561 opoint = PT - ZV;
2562 else if (PT > search_regs.start[sub])
2563 opoint = search_regs.end[sub] - ZV;
2564 else
2565 opoint = PT;
2566
2567 /* If we want non-literal replacement,
2568 perform substitution on the replacement string. */
2569 if (NILP (literal))
2570 {
2571 int length = SBYTES (newtext);
2572 unsigned char *substed;
2573 int substed_alloc_size, substed_len;
2574 int buf_multibyte = !NILP (current_buffer->enable_multibyte_characters);
2575 int str_multibyte = STRING_MULTIBYTE (newtext);
2576 Lisp_Object rev_tbl;
2577 int really_changed = 0;
2578
2579 rev_tbl= (!buf_multibyte && CHAR_TABLE_P (Vnonascii_translation_table)
2580 ? Fchar_table_extra_slot (Vnonascii_translation_table,
2581 make_number (0))
2582 : Qnil);
2583
2584 substed_alloc_size = length * 2 + 100;
2585 substed = (unsigned char *) xmalloc (substed_alloc_size + 1);
2586 substed_len = 0;
2587
2588 /* Go thru NEWTEXT, producing the actual text to insert in
2589 SUBSTED while adjusting multibyteness to that of the current
2590 buffer. */
2591
2592 for (pos_byte = 0, pos = 0; pos_byte < length;)
2593 {
2594 unsigned char str[MAX_MULTIBYTE_LENGTH];
2595 unsigned char *add_stuff = NULL;
2596 int add_len = 0;
2597 int idx = -1;
2598
2599 if (str_multibyte)
2600 {
2601 FETCH_STRING_CHAR_ADVANCE_NO_CHECK (c, newtext, pos, pos_byte);
2602 if (!buf_multibyte)
2603 c = multibyte_char_to_unibyte (c, rev_tbl);
2604 }
2605 else
2606 {
2607 /* Note that we don't have to increment POS. */
2608 c = SREF (newtext, pos_byte++);
2609 if (buf_multibyte)
2610 c = unibyte_char_to_multibyte (c);
2611 }
2612
2613 /* Either set ADD_STUFF and ADD_LEN to the text to put in SUBSTED,
2614 or set IDX to a match index, which means put that part
2615 of the buffer text into SUBSTED. */
2616
2617 if (c == '\\')
2618 {
2619 really_changed = 1;
2620
2621 if (str_multibyte)
2622 {
2623 FETCH_STRING_CHAR_ADVANCE_NO_CHECK (c, newtext,
2624 pos, pos_byte);
2625 if (!buf_multibyte && !SINGLE_BYTE_CHAR_P (c))
2626 c = multibyte_char_to_unibyte (c, rev_tbl);
2627 }
2628 else
2629 {
2630 c = SREF (newtext, pos_byte++);
2631 if (buf_multibyte)
2632 c = unibyte_char_to_multibyte (c);
2633 }
2634
2635 if (c == '&')
2636 idx = sub;
2637 else if (c >= '1' && c <= '9' && c <= search_regs.num_regs + '0')
2638 {
2639 if (search_regs.start[c - '0'] >= 1)
2640 idx = c - '0';
2641 }
2642 else if (c == '\\')
2643 add_len = 1, add_stuff = "\\";
2644 else
2645 {
2646 xfree (substed);
2647 error ("Invalid use of `\\' in replacement text");
2648 }
2649 }
2650 else
2651 {
2652 add_len = CHAR_STRING (c, str);
2653 add_stuff = str;
2654 }
2655
2656 /* If we want to copy part of a previous match,
2657 set up ADD_STUFF and ADD_LEN to point to it. */
2658 if (idx >= 0)
2659 {
2660 int begbyte = CHAR_TO_BYTE (search_regs.start[idx]);
2661 add_len = CHAR_TO_BYTE (search_regs.end[idx]) - begbyte;
2662 if (search_regs.start[idx] < GPT && GPT < search_regs.end[idx])
2663 move_gap (search_regs.start[idx]);
2664 add_stuff = BYTE_POS_ADDR (begbyte);
2665 }
2666
2667 /* Now the stuff we want to add to SUBSTED
2668 is invariably ADD_LEN bytes starting at ADD_STUFF. */
2669
2670 /* Make sure SUBSTED is big enough. */
2671 if (substed_len + add_len >= substed_alloc_size)
2672 {
2673 substed_alloc_size = substed_len + add_len + 500;
2674 substed = (unsigned char *) xrealloc (substed,
2675 substed_alloc_size + 1);
2676 }
2677
2678 /* Now add to the end of SUBSTED. */
2679 if (add_stuff)
2680 {
2681 bcopy (add_stuff, substed + substed_len, add_len);
2682 substed_len += add_len;
2683 }
2684 }
2685
2686 if (really_changed)
2687 {
2688 if (buf_multibyte)
2689 {
2690 int nchars = multibyte_chars_in_text (substed, substed_len);
2691
2692 newtext = make_multibyte_string (substed, nchars, substed_len);
2693 }
2694 else
2695 newtext = make_unibyte_string (substed, substed_len);
2696 }
2697 xfree (substed);
2698 }
2699
2700 /* Replace the old text with the new in the cleanest possible way. */
2701 replace_range (search_regs.start[sub], search_regs.end[sub],
2702 newtext, 1, 0, 1);
2703 newpoint = search_regs.start[sub] + SCHARS (newtext);
2704
2705 if (case_action == all_caps)
2706 Fupcase_region (make_number (search_regs.start[sub]),
2707 make_number (newpoint));
2708 else if (case_action == cap_initial)
2709 Fupcase_initials_region (make_number (search_regs.start[sub]),
2710 make_number (newpoint));
2711
2712 /* Adjust search data for this change. */
2713 {
2714 int oldend = search_regs.end[sub];
2715 int oldstart = search_regs.start[sub];
2716 int change = newpoint - search_regs.end[sub];
2717 int i;
2718
2719 for (i = 0; i < search_regs.num_regs; i++)
2720 {
2721 if (search_regs.start[i] >= oldend)
2722 search_regs.start[i] += change;
2723 else if (search_regs.start[i] > oldstart)
2724 search_regs.start[i] = oldstart;
2725 if (search_regs.end[i] >= oldend)
2726 search_regs.end[i] += change;
2727 else if (search_regs.end[i] > oldstart)
2728 search_regs.end[i] = oldstart;
2729 }
2730 }
2731
2732 /* Put point back where it was in the text. */
2733 if (opoint <= 0)
2734 TEMP_SET_PT (opoint + ZV);
2735 else
2736 TEMP_SET_PT (opoint);
2737
2738 /* Now move point "officially" to the start of the inserted replacement. */
2739 move_if_not_intangible (newpoint);
2740
2741 return Qnil;
2742 }
2743 \f
2744 static Lisp_Object
2745 match_limit (num, beginningp)
2746 Lisp_Object num;
2747 int beginningp;
2748 {
2749 register int n;
2750
2751 CHECK_NUMBER (num);
2752 n = XINT (num);
2753 if (n < 0)
2754 args_out_of_range (num, make_number (0));
2755 if (search_regs.num_regs <= 0)
2756 error ("No match data, because no search succeeded");
2757 if (n >= search_regs.num_regs
2758 || search_regs.start[n] < 0)
2759 return Qnil;
2760 return (make_number ((beginningp) ? search_regs.start[n]
2761 : search_regs.end[n]));
2762 }
2763
2764 DEFUN ("match-beginning", Fmatch_beginning, Smatch_beginning, 1, 1, 0,
2765 doc: /* Return position of start of text matched by last search.
2766 SUBEXP, a number, specifies which parenthesized expression in the last
2767 regexp.
2768 Value is nil if SUBEXPth pair didn't match, or there were less than
2769 SUBEXP pairs.
2770 Zero means the entire text matched by the whole regexp or whole string. */)
2771 (subexp)
2772 Lisp_Object subexp;
2773 {
2774 return match_limit (subexp, 1);
2775 }
2776
2777 DEFUN ("match-end", Fmatch_end, Smatch_end, 1, 1, 0,
2778 doc: /* Return position of end of text matched by last search.
2779 SUBEXP, a number, specifies which parenthesized expression in the last
2780 regexp.
2781 Value is nil if SUBEXPth pair didn't match, or there were less than
2782 SUBEXP pairs.
2783 Zero means the entire text matched by the whole regexp or whole string. */)
2784 (subexp)
2785 Lisp_Object subexp;
2786 {
2787 return match_limit (subexp, 0);
2788 }
2789
2790 DEFUN ("match-data", Fmatch_data, Smatch_data, 0, 3, 0,
2791 doc: /* Return a list containing all info on what the last search matched.
2792 Element 2N is `(match-beginning N)'; element 2N + 1 is `(match-end N)'.
2793 All the elements are markers or nil (nil if the Nth pair didn't match)
2794 if the last match was on a buffer; integers or nil if a string was matched.
2795 Use `store-match-data' to reinstate the data in this list.
2796
2797 If INTEGERS (the optional first argument) is non-nil, always use
2798 integers \(rather than markers) to represent buffer positions. In
2799 this case, and if the last match was in a buffer, the buffer will get
2800 stored as one additional element at the end of the list.
2801
2802 If REUSE is a list, reuse it as part of the value. If REUSE is long
2803 enough to hold all the values, and if INTEGERS is non-nil, no consing
2804 is done.
2805
2806 If optional third arg RESEAT is non-nil, any previous markers on the
2807 REUSE list will be modified to point to nowhere.
2808
2809 Return value is undefined if the last search failed. */)
2810 (integers, reuse, reseat)
2811 Lisp_Object integers, reuse, reseat;
2812 {
2813 Lisp_Object tail, prev;
2814 Lisp_Object *data;
2815 int i, len;
2816
2817 if (!NILP (reseat))
2818 for (tail = reuse; CONSP (tail); tail = XCDR (tail))
2819 if (MARKERP (XCAR (tail)))
2820 {
2821 unchain_marker (XMARKER (XCAR (tail)));
2822 XSETCAR (tail, Qnil);
2823 }
2824
2825 if (NILP (last_thing_searched))
2826 return Qnil;
2827
2828 prev = Qnil;
2829
2830 data = (Lisp_Object *) alloca ((2 * search_regs.num_regs + 1)
2831 * sizeof (Lisp_Object));
2832
2833 len = 0;
2834 for (i = 0; i < search_regs.num_regs; i++)
2835 {
2836 int start = search_regs.start[i];
2837 if (start >= 0)
2838 {
2839 if (EQ (last_thing_searched, Qt)
2840 || ! NILP (integers))
2841 {
2842 XSETFASTINT (data[2 * i], start);
2843 XSETFASTINT (data[2 * i + 1], search_regs.end[i]);
2844 }
2845 else if (BUFFERP (last_thing_searched))
2846 {
2847 data[2 * i] = Fmake_marker ();
2848 Fset_marker (data[2 * i],
2849 make_number (start),
2850 last_thing_searched);
2851 data[2 * i + 1] = Fmake_marker ();
2852 Fset_marker (data[2 * i + 1],
2853 make_number (search_regs.end[i]),
2854 last_thing_searched);
2855 }
2856 else
2857 /* last_thing_searched must always be Qt, a buffer, or Qnil. */
2858 abort ();
2859
2860 len = 2 * i + 2;
2861 }
2862 else
2863 data[2 * i] = data[2 * i + 1] = Qnil;
2864 }
2865
2866 if (BUFFERP (last_thing_searched) && !NILP (integers))
2867 {
2868 data[len] = last_thing_searched;
2869 len++;
2870 }
2871
2872 /* If REUSE is not usable, cons up the values and return them. */
2873 if (! CONSP (reuse))
2874 return Flist (len, data);
2875
2876 /* If REUSE is a list, store as many value elements as will fit
2877 into the elements of REUSE. */
2878 for (i = 0, tail = reuse; CONSP (tail);
2879 i++, tail = XCDR (tail))
2880 {
2881 if (i < len)
2882 XSETCAR (tail, data[i]);
2883 else
2884 XSETCAR (tail, Qnil);
2885 prev = tail;
2886 }
2887
2888 /* If we couldn't fit all value elements into REUSE,
2889 cons up the rest of them and add them to the end of REUSE. */
2890 if (i < len)
2891 XSETCDR (prev, Flist (len - i, data + i));
2892
2893 return reuse;
2894 }
2895
2896 /* We used to have an internal use variant of `reseat' described as:
2897
2898 If RESEAT is `evaporate', put the markers back on the free list
2899 immediately. No other references to the markers must exist in this
2900 case, so it is used only internally on the unwind stack and
2901 save-match-data from Lisp.
2902
2903 But it was ill-conceived: those supposedly-internal markers get exposed via
2904 the undo-list, so freeing them here is unsafe. */
2905
2906 DEFUN ("set-match-data", Fset_match_data, Sset_match_data, 1, 2, 0,
2907 doc: /* Set internal data on last search match from elements of LIST.
2908 LIST should have been created by calling `match-data' previously.
2909
2910 If optional arg RESEAT is non-nil, make markers on LIST point nowhere. */)
2911 (list, reseat)
2912 register Lisp_Object list, reseat;
2913 {
2914 register int i;
2915 register Lisp_Object marker;
2916
2917 if (running_asynch_code)
2918 save_search_regs ();
2919
2920 CHECK_LIST (list);
2921
2922 /* Unless we find a marker with a buffer or an explicit buffer
2923 in LIST, assume that this match data came from a string. */
2924 last_thing_searched = Qt;
2925
2926 /* Allocate registers if they don't already exist. */
2927 {
2928 int length = XFASTINT (Flength (list)) / 2;
2929
2930 if (length > search_regs.num_regs)
2931 {
2932 if (search_regs.num_regs == 0)
2933 {
2934 search_regs.start
2935 = (regoff_t *) xmalloc (length * sizeof (regoff_t));
2936 search_regs.end
2937 = (regoff_t *) xmalloc (length * sizeof (regoff_t));
2938 }
2939 else
2940 {
2941 search_regs.start
2942 = (regoff_t *) xrealloc (search_regs.start,
2943 length * sizeof (regoff_t));
2944 search_regs.end
2945 = (regoff_t *) xrealloc (search_regs.end,
2946 length * sizeof (regoff_t));
2947 }
2948
2949 for (i = search_regs.num_regs; i < length; i++)
2950 search_regs.start[i] = -1;
2951
2952 search_regs.num_regs = length;
2953 }
2954
2955 for (i = 0; CONSP (list); i++)
2956 {
2957 marker = XCAR (list);
2958 if (BUFFERP (marker))
2959 {
2960 last_thing_searched = marker;
2961 break;
2962 }
2963 if (i >= length)
2964 break;
2965 if (NILP (marker))
2966 {
2967 search_regs.start[i] = -1;
2968 list = XCDR (list);
2969 }
2970 else
2971 {
2972 int from;
2973 Lisp_Object m;
2974
2975 m = marker;
2976 if (MARKERP (marker))
2977 {
2978 if (XMARKER (marker)->buffer == 0)
2979 XSETFASTINT (marker, 0);
2980 else
2981 XSETBUFFER (last_thing_searched, XMARKER (marker)->buffer);
2982 }
2983
2984 CHECK_NUMBER_COERCE_MARKER (marker);
2985 from = XINT (marker);
2986
2987 if (!NILP (reseat) && MARKERP (m))
2988 {
2989 unchain_marker (XMARKER (m));
2990 XSETCAR (list, Qnil);
2991 }
2992
2993 if ((list = XCDR (list), !CONSP (list)))
2994 break;
2995
2996 m = marker = XCAR (list);
2997
2998 if (MARKERP (marker) && XMARKER (marker)->buffer == 0)
2999 XSETFASTINT (marker, 0);
3000
3001 CHECK_NUMBER_COERCE_MARKER (marker);
3002 search_regs.start[i] = from;
3003 search_regs.end[i] = XINT (marker);
3004
3005 if (!NILP (reseat) && MARKERP (m))
3006 {
3007 unchain_marker (XMARKER (m));
3008 XSETCAR (list, Qnil);
3009 }
3010 }
3011 list = XCDR (list);
3012 }
3013
3014 for (; i < search_regs.num_regs; i++)
3015 search_regs.start[i] = -1;
3016 }
3017
3018 return Qnil;
3019 }
3020
3021 /* If non-zero the match data have been saved in saved_search_regs
3022 during the execution of a sentinel or filter. */
3023 static int search_regs_saved;
3024 static struct re_registers saved_search_regs;
3025 static Lisp_Object saved_last_thing_searched;
3026
3027 /* Called from Flooking_at, Fstring_match, search_buffer, Fstore_match_data
3028 if asynchronous code (filter or sentinel) is running. */
3029 static void
3030 save_search_regs ()
3031 {
3032 if (!search_regs_saved)
3033 {
3034 saved_search_regs.num_regs = search_regs.num_regs;
3035 saved_search_regs.start = search_regs.start;
3036 saved_search_regs.end = search_regs.end;
3037 saved_last_thing_searched = last_thing_searched;
3038 last_thing_searched = Qnil;
3039 search_regs.num_regs = 0;
3040 search_regs.start = 0;
3041 search_regs.end = 0;
3042
3043 search_regs_saved = 1;
3044 }
3045 }
3046
3047 /* Called upon exit from filters and sentinels. */
3048 void
3049 restore_search_regs ()
3050 {
3051 if (search_regs_saved)
3052 {
3053 if (search_regs.num_regs > 0)
3054 {
3055 xfree (search_regs.start);
3056 xfree (search_regs.end);
3057 }
3058 search_regs.num_regs = saved_search_regs.num_regs;
3059 search_regs.start = saved_search_regs.start;
3060 search_regs.end = saved_search_regs.end;
3061 last_thing_searched = saved_last_thing_searched;
3062 saved_last_thing_searched = Qnil;
3063 search_regs_saved = 0;
3064 }
3065 }
3066
3067 static Lisp_Object
3068 unwind_set_match_data (list)
3069 Lisp_Object list;
3070 {
3071 /* It is NOT ALWAYS safe to free (evaporate) the markers immediately. */
3072 return Fset_match_data (list, Qt);
3073 }
3074
3075 /* Called to unwind protect the match data. */
3076 void
3077 record_unwind_save_match_data ()
3078 {
3079 record_unwind_protect (unwind_set_match_data,
3080 Fmatch_data (Qnil, Qnil, Qnil));
3081 }
3082
3083 /* Quote a string to inactivate reg-expr chars */
3084
3085 DEFUN ("regexp-quote", Fregexp_quote, Sregexp_quote, 1, 1, 0,
3086 doc: /* Return a regexp string which matches exactly STRING and nothing else. */)
3087 (string)
3088 Lisp_Object string;
3089 {
3090 register unsigned char *in, *out, *end;
3091 register unsigned char *temp;
3092 int backslashes_added = 0;
3093
3094 CHECK_STRING (string);
3095
3096 temp = (unsigned char *) alloca (SBYTES (string) * 2);
3097
3098 /* Now copy the data into the new string, inserting escapes. */
3099
3100 in = SDATA (string);
3101 end = in + SBYTES (string);
3102 out = temp;
3103
3104 for (; in != end; in++)
3105 {
3106 if (*in == '['
3107 || *in == '*' || *in == '.' || *in == '\\'
3108 || *in == '?' || *in == '+'
3109 || *in == '^' || *in == '$')
3110 *out++ = '\\', backslashes_added++;
3111 *out++ = *in;
3112 }
3113
3114 return make_specified_string (temp,
3115 SCHARS (string) + backslashes_added,
3116 out - temp,
3117 STRING_MULTIBYTE (string));
3118 }
3119 \f
3120 void
3121 syms_of_search ()
3122 {
3123 register int i;
3124
3125 for (i = 0; i < REGEXP_CACHE_SIZE; ++i)
3126 {
3127 searchbufs[i].buf.allocated = 100;
3128 searchbufs[i].buf.buffer = (unsigned char *) xmalloc (100);
3129 searchbufs[i].buf.fastmap = searchbufs[i].fastmap;
3130 searchbufs[i].regexp = Qnil;
3131 searchbufs[i].whitespace_regexp = Qnil;
3132 searchbufs[i].syntax_table = Qnil;
3133 staticpro (&searchbufs[i].regexp);
3134 staticpro (&searchbufs[i].whitespace_regexp);
3135 staticpro (&searchbufs[i].syntax_table);
3136 searchbufs[i].next = (i == REGEXP_CACHE_SIZE-1 ? 0 : &searchbufs[i+1]);
3137 }
3138 searchbuf_head = &searchbufs[0];
3139
3140 Qsearch_failed = intern ("search-failed");
3141 staticpro (&Qsearch_failed);
3142 Qinvalid_regexp = intern ("invalid-regexp");
3143 staticpro (&Qinvalid_regexp);
3144
3145 Fput (Qsearch_failed, Qerror_conditions,
3146 Fcons (Qsearch_failed, Fcons (Qerror, Qnil)));
3147 Fput (Qsearch_failed, Qerror_message,
3148 build_string ("Search failed"));
3149
3150 Fput (Qinvalid_regexp, Qerror_conditions,
3151 Fcons (Qinvalid_regexp, Fcons (Qerror, Qnil)));
3152 Fput (Qinvalid_regexp, Qerror_message,
3153 build_string ("Invalid regexp"));
3154
3155 last_thing_searched = Qnil;
3156 staticpro (&last_thing_searched);
3157
3158 saved_last_thing_searched = Qnil;
3159 staticpro (&saved_last_thing_searched);
3160
3161 DEFVAR_LISP ("search-spaces-regexp", &Vsearch_spaces_regexp,
3162 doc: /* Regexp to substitute for bunches of spaces in regexp search.
3163 Some commands use this for user-specified regexps.
3164 Spaces that occur inside character classes or repetition operators
3165 or other such regexp constructs are not replaced with this.
3166 A value of nil (which is the normal value) means treat spaces literally. */);
3167 Vsearch_spaces_regexp = Qnil;
3168
3169 defsubr (&Slooking_at);
3170 defsubr (&Sposix_looking_at);
3171 defsubr (&Sstring_match);
3172 defsubr (&Sposix_string_match);
3173 defsubr (&Ssearch_forward);
3174 defsubr (&Ssearch_backward);
3175 defsubr (&Sword_search_forward);
3176 defsubr (&Sword_search_backward);
3177 defsubr (&Sre_search_forward);
3178 defsubr (&Sre_search_backward);
3179 defsubr (&Sposix_search_forward);
3180 defsubr (&Sposix_search_backward);
3181 defsubr (&Sreplace_match);
3182 defsubr (&Smatch_beginning);
3183 defsubr (&Smatch_end);
3184 defsubr (&Smatch_data);
3185 defsubr (&Sset_match_data);
3186 defsubr (&Sregexp_quote);
3187 }
3188
3189 /* arch-tag: a6059d79-0552-4f14-a2cb-d379a4e3c78f
3190 (do not change this comment) */