Replace bcopy, bzero, bcmp by memcpy, memmove, memset, memcmp
[bpt/emacs.git] / src / search.c
1 /* String search routines for GNU Emacs.
2 Copyright (C) 1985, 1986, 1987, 1993, 1994, 1997, 1998, 1999, 2001, 2002,
3 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010
4 Free Software Foundation, Inc.
5
6 This file is part of GNU Emacs.
7
8 GNU Emacs is free software: you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation, either version 3 of the License, or
11 (at your option) any later version.
12
13 GNU Emacs is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
17
18 You should have received a copy of the GNU General Public License
19 along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>. */
20
21
22 #include <config.h>
23 #include <setjmp.h>
24 #include "lisp.h"
25 #include "syntax.h"
26 #include "category.h"
27 #include "buffer.h"
28 #include "character.h"
29 #include "charset.h"
30 #include "region-cache.h"
31 #include "commands.h"
32 #include "blockinput.h"
33 #include "intervals.h"
34
35 #include <sys/types.h>
36 #include "regex.h"
37
38 #define REGEXP_CACHE_SIZE 20
39
40 /* If the regexp is non-nil, then the buffer contains the compiled form
41 of that regexp, suitable for searching. */
42 struct regexp_cache
43 {
44 struct regexp_cache *next;
45 Lisp_Object regexp, whitespace_regexp;
46 /* Syntax table for which the regexp applies. We need this because
47 of character classes. If this is t, then the compiled pattern is valid
48 for any syntax-table. */
49 Lisp_Object syntax_table;
50 struct re_pattern_buffer buf;
51 char fastmap[0400];
52 /* Nonzero means regexp was compiled to do full POSIX backtracking. */
53 char posix;
54 };
55
56 /* The instances of that struct. */
57 struct regexp_cache searchbufs[REGEXP_CACHE_SIZE];
58
59 /* The head of the linked list; points to the most recently used buffer. */
60 struct regexp_cache *searchbuf_head;
61
62
63 /* Every call to re_match, etc., must pass &search_regs as the regs
64 argument unless you can show it is unnecessary (i.e., if re_match
65 is certainly going to be called again before region-around-match
66 can be called).
67
68 Since the registers are now dynamically allocated, we need to make
69 sure not to refer to the Nth register before checking that it has
70 been allocated by checking search_regs.num_regs.
71
72 The regex code keeps track of whether it has allocated the search
73 buffer using bits in the re_pattern_buffer. This means that whenever
74 you compile a new pattern, it completely forgets whether it has
75 allocated any registers, and will allocate new registers the next
76 time you call a searching or matching function. Therefore, we need
77 to call re_set_registers after compiling a new pattern or after
78 setting the match registers, so that the regex functions will be
79 able to free or re-allocate it properly. */
80 static struct re_registers search_regs;
81
82 /* The buffer in which the last search was performed, or
83 Qt if the last search was done in a string;
84 Qnil if no searching has been done yet. */
85 static Lisp_Object last_thing_searched;
86
87 /* error condition signaled when regexp compile_pattern fails */
88
89 Lisp_Object Qinvalid_regexp;
90
91 /* Error condition used for failing searches */
92 Lisp_Object Qsearch_failed;
93
94 Lisp_Object Vsearch_spaces_regexp;
95
96 /* If non-nil, the match data will not be changed during call to
97 searching or matching functions. This variable is for internal use
98 only. */
99 Lisp_Object Vinhibit_changing_match_data;
100
101 static void set_search_regs (EMACS_INT, EMACS_INT);
102 static void save_search_regs (void);
103 static EMACS_INT simple_search (int, unsigned char *, int, int,
104 Lisp_Object, EMACS_INT, EMACS_INT,
105 EMACS_INT, EMACS_INT);
106 static EMACS_INT boyer_moore (int, unsigned char *, int, int,
107 Lisp_Object, Lisp_Object,
108 EMACS_INT, EMACS_INT,
109 EMACS_INT, EMACS_INT, int);
110 static EMACS_INT search_buffer (Lisp_Object, EMACS_INT, EMACS_INT,
111 EMACS_INT, EMACS_INT, int, int,
112 Lisp_Object, Lisp_Object, int);
113 static void matcher_overflow (void) NO_RETURN;
114
115 static void
116 matcher_overflow (void)
117 {
118 error ("Stack overflow in regexp matcher");
119 }
120
121 /* Compile a regexp and signal a Lisp error if anything goes wrong.
122 PATTERN is the pattern to compile.
123 CP is the place to put the result.
124 TRANSLATE is a translation table for ignoring case, or nil for none.
125 REGP is the structure that says where to store the "register"
126 values that will result from matching this pattern.
127 If it is 0, we should compile the pattern not to record any
128 subexpression bounds.
129 POSIX is nonzero if we want full backtracking (POSIX style)
130 for this pattern. 0 means backtrack only enough to get a valid match.
131
132 The behavior also depends on Vsearch_spaces_regexp. */
133
134 static void
135 compile_pattern_1 (struct regexp_cache *cp, Lisp_Object pattern, Lisp_Object translate, struct re_registers *regp, int posix)
136 {
137 char *val;
138 reg_syntax_t old;
139
140 cp->regexp = Qnil;
141 cp->buf.translate = (! NILP (translate) ? translate : make_number (0));
142 cp->posix = posix;
143 cp->buf.multibyte = STRING_MULTIBYTE (pattern);
144 cp->buf.charset_unibyte = charset_unibyte;
145 if (STRINGP (Vsearch_spaces_regexp))
146 cp->whitespace_regexp = Vsearch_spaces_regexp;
147 else
148 cp->whitespace_regexp = Qnil;
149
150 /* rms: I think BLOCK_INPUT is not needed here any more,
151 because regex.c defines malloc to call xmalloc.
152 Using BLOCK_INPUT here means the debugger won't run if an error occurs.
153 So let's turn it off. */
154 /* BLOCK_INPUT; */
155 old = re_set_syntax (RE_SYNTAX_EMACS
156 | (posix ? 0 : RE_NO_POSIX_BACKTRACKING));
157
158 if (STRINGP (Vsearch_spaces_regexp))
159 re_set_whitespace_regexp (SDATA (Vsearch_spaces_regexp));
160 else
161 re_set_whitespace_regexp (NULL);
162
163 val = (char *) re_compile_pattern ((char *) SDATA (pattern),
164 SBYTES (pattern), &cp->buf);
165
166 /* If the compiled pattern hard codes some of the contents of the
167 syntax-table, it can only be reused with *this* syntax table. */
168 cp->syntax_table = cp->buf.used_syntax ? current_buffer->syntax_table : Qt;
169
170 re_set_whitespace_regexp (NULL);
171
172 re_set_syntax (old);
173 /* UNBLOCK_INPUT; */
174 if (val)
175 xsignal1 (Qinvalid_regexp, build_string (val));
176
177 cp->regexp = Fcopy_sequence (pattern);
178 }
179
180 /* Shrink each compiled regexp buffer in the cache
181 to the size actually used right now.
182 This is called from garbage collection. */
183
184 void
185 shrink_regexp_cache (void)
186 {
187 struct regexp_cache *cp;
188
189 for (cp = searchbuf_head; cp != 0; cp = cp->next)
190 {
191 cp->buf.allocated = cp->buf.used;
192 cp->buf.buffer
193 = (unsigned char *) xrealloc (cp->buf.buffer, cp->buf.used);
194 }
195 }
196
197 /* Clear the regexp cache w.r.t. a particular syntax table,
198 because it was changed.
199 There is no danger of memory leak here because re_compile_pattern
200 automagically manages the memory in each re_pattern_buffer struct,
201 based on its `allocated' and `buffer' values. */
202 void
203 clear_regexp_cache (void)
204 {
205 int i;
206
207 for (i = 0; i < REGEXP_CACHE_SIZE; ++i)
208 /* It's tempting to compare with the syntax-table we've actually changed,
209 but it's not sufficient because char-table inheritance means that
210 modifying one syntax-table can change others at the same time. */
211 if (!EQ (searchbufs[i].syntax_table, Qt))
212 searchbufs[i].regexp = Qnil;
213 }
214
215 /* Compile a regexp if necessary, but first check to see if there's one in
216 the cache.
217 PATTERN is the pattern to compile.
218 TRANSLATE is a translation table for ignoring case, or nil for none.
219 REGP is the structure that says where to store the "register"
220 values that will result from matching this pattern.
221 If it is 0, we should compile the pattern not to record any
222 subexpression bounds.
223 POSIX is nonzero if we want full backtracking (POSIX style)
224 for this pattern. 0 means backtrack only enough to get a valid match. */
225
226 struct re_pattern_buffer *
227 compile_pattern (Lisp_Object pattern, struct re_registers *regp, Lisp_Object translate, int posix, int multibyte)
228 {
229 struct regexp_cache *cp, **cpp;
230
231 for (cpp = &searchbuf_head; ; cpp = &cp->next)
232 {
233 cp = *cpp;
234 /* Entries are initialized to nil, and may be set to nil by
235 compile_pattern_1 if the pattern isn't valid. Don't apply
236 string accessors in those cases. However, compile_pattern_1
237 is only applied to the cache entry we pick here to reuse. So
238 nil should never appear before a non-nil entry. */
239 if (NILP (cp->regexp))
240 goto compile_it;
241 if (SCHARS (cp->regexp) == SCHARS (pattern)
242 && STRING_MULTIBYTE (cp->regexp) == STRING_MULTIBYTE (pattern)
243 && !NILP (Fstring_equal (cp->regexp, pattern))
244 && EQ (cp->buf.translate, (! NILP (translate) ? translate : make_number (0)))
245 && cp->posix == posix
246 && (EQ (cp->syntax_table, Qt)
247 || EQ (cp->syntax_table, current_buffer->syntax_table))
248 && !NILP (Fequal (cp->whitespace_regexp, Vsearch_spaces_regexp))
249 && cp->buf.charset_unibyte == charset_unibyte)
250 break;
251
252 /* If we're at the end of the cache, compile into the nil cell
253 we found, or the last (least recently used) cell with a
254 string value. */
255 if (cp->next == 0)
256 {
257 compile_it:
258 compile_pattern_1 (cp, pattern, translate, regp, posix);
259 break;
260 }
261 }
262
263 /* When we get here, cp (aka *cpp) contains the compiled pattern,
264 either because we found it in the cache or because we just compiled it.
265 Move it to the front of the queue to mark it as most recently used. */
266 *cpp = cp->next;
267 cp->next = searchbuf_head;
268 searchbuf_head = cp;
269
270 /* Advise the searching functions about the space we have allocated
271 for register data. */
272 if (regp)
273 re_set_registers (&cp->buf, regp, regp->num_regs, regp->start, regp->end);
274
275 /* The compiled pattern can be used both for multibyte and unibyte
276 target. But, we have to tell which the pattern is used for. */
277 cp->buf.target_multibyte = multibyte;
278
279 return &cp->buf;
280 }
281
282 \f
283 static Lisp_Object
284 looking_at_1 (Lisp_Object string, int posix)
285 {
286 Lisp_Object val;
287 unsigned char *p1, *p2;
288 EMACS_INT s1, s2;
289 register int i;
290 struct re_pattern_buffer *bufp;
291
292 if (running_asynch_code)
293 save_search_regs ();
294
295 /* This is so set_image_of_range_1 in regex.c can find the EQV table. */
296 XCHAR_TABLE (current_buffer->case_canon_table)->extras[2]
297 = current_buffer->case_eqv_table;
298
299 CHECK_STRING (string);
300 bufp = compile_pattern (string,
301 (NILP (Vinhibit_changing_match_data)
302 ? &search_regs : NULL),
303 (!NILP (current_buffer->case_fold_search)
304 ? current_buffer->case_canon_table : Qnil),
305 posix,
306 !NILP (current_buffer->enable_multibyte_characters));
307
308 immediate_quit = 1;
309 QUIT; /* Do a pending quit right away, to avoid paradoxical behavior */
310
311 /* Get pointers and sizes of the two strings
312 that make up the visible portion of the buffer. */
313
314 p1 = BEGV_ADDR;
315 s1 = GPT_BYTE - BEGV_BYTE;
316 p2 = GAP_END_ADDR;
317 s2 = ZV_BYTE - GPT_BYTE;
318 if (s1 < 0)
319 {
320 p2 = p1;
321 s2 = ZV_BYTE - BEGV_BYTE;
322 s1 = 0;
323 }
324 if (s2 < 0)
325 {
326 s1 = ZV_BYTE - BEGV_BYTE;
327 s2 = 0;
328 }
329
330 re_match_object = Qnil;
331
332 i = re_match_2 (bufp, (char *) p1, s1, (char *) p2, s2,
333 PT_BYTE - BEGV_BYTE,
334 (NILP (Vinhibit_changing_match_data)
335 ? &search_regs : NULL),
336 ZV_BYTE - BEGV_BYTE);
337 immediate_quit = 0;
338
339 if (i == -2)
340 matcher_overflow ();
341
342 val = (0 <= i ? Qt : Qnil);
343 if (NILP (Vinhibit_changing_match_data) && i >= 0)
344 for (i = 0; i < search_regs.num_regs; i++)
345 if (search_regs.start[i] >= 0)
346 {
347 search_regs.start[i]
348 = BYTE_TO_CHAR (search_regs.start[i] + BEGV_BYTE);
349 search_regs.end[i]
350 = BYTE_TO_CHAR (search_regs.end[i] + BEGV_BYTE);
351 }
352
353 /* Set last_thing_searched only when match data is changed. */
354 if (NILP (Vinhibit_changing_match_data))
355 XSETBUFFER (last_thing_searched, current_buffer);
356
357 return val;
358 }
359
360 DEFUN ("looking-at", Flooking_at, Slooking_at, 1, 1, 0,
361 doc: /* Return t if text after point matches regular expression REGEXP.
362 This function modifies the match data that `match-beginning',
363 `match-end' and `match-data' access; save and restore the match
364 data if you want to preserve them. */)
365 (regexp)
366 Lisp_Object regexp;
367 {
368 return looking_at_1 (regexp, 0);
369 }
370
371 DEFUN ("posix-looking-at", Fposix_looking_at, Sposix_looking_at, 1, 1, 0,
372 doc: /* Return t if text after point matches regular expression REGEXP.
373 Find the longest match, in accord with Posix regular expression rules.
374 This function modifies the match data that `match-beginning',
375 `match-end' and `match-data' access; save and restore the match
376 data if you want to preserve them. */)
377 (regexp)
378 Lisp_Object regexp;
379 {
380 return looking_at_1 (regexp, 1);
381 }
382 \f
383 static Lisp_Object
384 string_match_1 (Lisp_Object regexp, Lisp_Object string, Lisp_Object start, int posix)
385 {
386 int val;
387 struct re_pattern_buffer *bufp;
388 EMACS_INT pos, pos_byte;
389 int i;
390
391 if (running_asynch_code)
392 save_search_regs ();
393
394 CHECK_STRING (regexp);
395 CHECK_STRING (string);
396
397 if (NILP (start))
398 pos = 0, pos_byte = 0;
399 else
400 {
401 int len = SCHARS (string);
402
403 CHECK_NUMBER (start);
404 pos = XINT (start);
405 if (pos < 0 && -pos <= len)
406 pos = len + pos;
407 else if (0 > pos || pos > len)
408 args_out_of_range (string, start);
409 pos_byte = string_char_to_byte (string, pos);
410 }
411
412 /* This is so set_image_of_range_1 in regex.c can find the EQV table. */
413 XCHAR_TABLE (current_buffer->case_canon_table)->extras[2]
414 = current_buffer->case_eqv_table;
415
416 bufp = compile_pattern (regexp,
417 (NILP (Vinhibit_changing_match_data)
418 ? &search_regs : NULL),
419 (!NILP (current_buffer->case_fold_search)
420 ? current_buffer->case_canon_table : Qnil),
421 posix,
422 STRING_MULTIBYTE (string));
423 immediate_quit = 1;
424 re_match_object = string;
425
426 val = re_search (bufp, (char *) SDATA (string),
427 SBYTES (string), pos_byte,
428 SBYTES (string) - pos_byte,
429 (NILP (Vinhibit_changing_match_data)
430 ? &search_regs : NULL));
431 immediate_quit = 0;
432
433 /* Set last_thing_searched only when match data is changed. */
434 if (NILP (Vinhibit_changing_match_data))
435 last_thing_searched = Qt;
436
437 if (val == -2)
438 matcher_overflow ();
439 if (val < 0) return Qnil;
440
441 if (NILP (Vinhibit_changing_match_data))
442 for (i = 0; i < search_regs.num_regs; i++)
443 if (search_regs.start[i] >= 0)
444 {
445 search_regs.start[i]
446 = string_byte_to_char (string, search_regs.start[i]);
447 search_regs.end[i]
448 = string_byte_to_char (string, search_regs.end[i]);
449 }
450
451 return make_number (string_byte_to_char (string, val));
452 }
453
454 DEFUN ("string-match", Fstring_match, Sstring_match, 2, 3, 0,
455 doc: /* Return index of start of first match for REGEXP in STRING, or nil.
456 Matching ignores case if `case-fold-search' is non-nil.
457 If third arg START is non-nil, start search at that index in STRING.
458 For index of first char beyond the match, do (match-end 0).
459 `match-end' and `match-beginning' also give indices of substrings
460 matched by parenthesis constructs in the pattern.
461
462 You can use the function `match-string' to extract the substrings
463 matched by the parenthesis constructions in REGEXP. */)
464 (regexp, string, start)
465 Lisp_Object regexp, string, start;
466 {
467 return string_match_1 (regexp, string, start, 0);
468 }
469
470 DEFUN ("posix-string-match", Fposix_string_match, Sposix_string_match, 2, 3, 0,
471 doc: /* Return index of start of first match for REGEXP in STRING, or nil.
472 Find the longest match, in accord with Posix regular expression rules.
473 Case is ignored if `case-fold-search' is non-nil in the current buffer.
474 If third arg START is non-nil, start search at that index in STRING.
475 For index of first char beyond the match, do (match-end 0).
476 `match-end' and `match-beginning' also give indices of substrings
477 matched by parenthesis constructs in the pattern. */)
478 (regexp, string, start)
479 Lisp_Object regexp, string, start;
480 {
481 return string_match_1 (regexp, string, start, 1);
482 }
483
484 /* Match REGEXP against STRING, searching all of STRING,
485 and return the index of the match, or negative on failure.
486 This does not clobber the match data. */
487
488 int
489 fast_string_match (Lisp_Object regexp, Lisp_Object string)
490 {
491 int val;
492 struct re_pattern_buffer *bufp;
493
494 bufp = compile_pattern (regexp, 0, Qnil,
495 0, STRING_MULTIBYTE (string));
496 immediate_quit = 1;
497 re_match_object = string;
498
499 val = re_search (bufp, (char *) SDATA (string),
500 SBYTES (string), 0,
501 SBYTES (string), 0);
502 immediate_quit = 0;
503 return val;
504 }
505
506 /* Match REGEXP against STRING, searching all of STRING ignoring case,
507 and return the index of the match, or negative on failure.
508 This does not clobber the match data.
509 We assume that STRING contains single-byte characters. */
510
511 extern Lisp_Object Vascii_downcase_table;
512
513 int
514 fast_c_string_match_ignore_case (Lisp_Object regexp, const char *string)
515 {
516 int val;
517 struct re_pattern_buffer *bufp;
518 int len = strlen (string);
519
520 regexp = string_make_unibyte (regexp);
521 re_match_object = Qt;
522 bufp = compile_pattern (regexp, 0,
523 Vascii_canon_table, 0,
524 0);
525 immediate_quit = 1;
526 val = re_search (bufp, string, len, 0, len, 0);
527 immediate_quit = 0;
528 return val;
529 }
530
531 /* Like fast_string_match but ignore case. */
532
533 int
534 fast_string_match_ignore_case (Lisp_Object regexp, Lisp_Object string)
535 {
536 int val;
537 struct re_pattern_buffer *bufp;
538
539 bufp = compile_pattern (regexp, 0, Vascii_canon_table,
540 0, STRING_MULTIBYTE (string));
541 immediate_quit = 1;
542 re_match_object = string;
543
544 val = re_search (bufp, (char *) SDATA (string),
545 SBYTES (string), 0,
546 SBYTES (string), 0);
547 immediate_quit = 0;
548 return val;
549 }
550 \f
551 /* Match REGEXP against the characters after POS to LIMIT, and return
552 the number of matched characters. If STRING is non-nil, match
553 against the characters in it. In that case, POS and LIMIT are
554 indices into the string. This function doesn't modify the match
555 data. */
556
557 EMACS_INT
558 fast_looking_at (Lisp_Object regexp, EMACS_INT pos, EMACS_INT pos_byte, EMACS_INT limit, EMACS_INT limit_byte, Lisp_Object string)
559 {
560 int multibyte;
561 struct re_pattern_buffer *buf;
562 unsigned char *p1, *p2;
563 EMACS_INT s1, s2;
564 EMACS_INT len;
565
566 if (STRINGP (string))
567 {
568 if (pos_byte < 0)
569 pos_byte = string_char_to_byte (string, pos);
570 if (limit_byte < 0)
571 limit_byte = string_char_to_byte (string, limit);
572 p1 = NULL;
573 s1 = 0;
574 p2 = SDATA (string);
575 s2 = SBYTES (string);
576 re_match_object = string;
577 multibyte = STRING_MULTIBYTE (string);
578 }
579 else
580 {
581 if (pos_byte < 0)
582 pos_byte = CHAR_TO_BYTE (pos);
583 if (limit_byte < 0)
584 limit_byte = CHAR_TO_BYTE (limit);
585 pos_byte -= BEGV_BYTE;
586 limit_byte -= BEGV_BYTE;
587 p1 = BEGV_ADDR;
588 s1 = GPT_BYTE - BEGV_BYTE;
589 p2 = GAP_END_ADDR;
590 s2 = ZV_BYTE - GPT_BYTE;
591 if (s1 < 0)
592 {
593 p2 = p1;
594 s2 = ZV_BYTE - BEGV_BYTE;
595 s1 = 0;
596 }
597 if (s2 < 0)
598 {
599 s1 = ZV_BYTE - BEGV_BYTE;
600 s2 = 0;
601 }
602 re_match_object = Qnil;
603 multibyte = ! NILP (current_buffer->enable_multibyte_characters);
604 }
605
606 buf = compile_pattern (regexp, 0, Qnil, 0, multibyte);
607 immediate_quit = 1;
608 len = re_match_2 (buf, (char *) p1, s1, (char *) p2, s2,
609 pos_byte, NULL, limit_byte);
610 immediate_quit = 0;
611
612 return len;
613 }
614
615 \f
616 /* The newline cache: remembering which sections of text have no newlines. */
617
618 /* If the user has requested newline caching, make sure it's on.
619 Otherwise, make sure it's off.
620 This is our cheezy way of associating an action with the change of
621 state of a buffer-local variable. */
622 static void
623 newline_cache_on_off (struct buffer *buf)
624 {
625 if (NILP (buf->cache_long_line_scans))
626 {
627 /* It should be off. */
628 if (buf->newline_cache)
629 {
630 free_region_cache (buf->newline_cache);
631 buf->newline_cache = 0;
632 }
633 }
634 else
635 {
636 /* It should be on. */
637 if (buf->newline_cache == 0)
638 buf->newline_cache = new_region_cache ();
639 }
640 }
641
642 \f
643 /* Search for COUNT instances of the character TARGET between START and END.
644
645 If COUNT is positive, search forwards; END must be >= START.
646 If COUNT is negative, search backwards for the -COUNTth instance;
647 END must be <= START.
648 If COUNT is zero, do anything you please; run rogue, for all I care.
649
650 If END is zero, use BEGV or ZV instead, as appropriate for the
651 direction indicated by COUNT.
652
653 If we find COUNT instances, set *SHORTAGE to zero, and return the
654 position past the COUNTth match. Note that for reverse motion
655 this is not the same as the usual convention for Emacs motion commands.
656
657 If we don't find COUNT instances before reaching END, set *SHORTAGE
658 to the number of TARGETs left unfound, and return END.
659
660 If ALLOW_QUIT is non-zero, set immediate_quit. That's good to do
661 except when inside redisplay. */
662
663 int
664 scan_buffer (register int target, EMACS_INT start, EMACS_INT end, int count, int *shortage, int allow_quit)
665 {
666 struct region_cache *newline_cache;
667 int direction;
668
669 if (count > 0)
670 {
671 direction = 1;
672 if (! end) end = ZV;
673 }
674 else
675 {
676 direction = -1;
677 if (! end) end = BEGV;
678 }
679
680 newline_cache_on_off (current_buffer);
681 newline_cache = current_buffer->newline_cache;
682
683 if (shortage != 0)
684 *shortage = 0;
685
686 immediate_quit = allow_quit;
687
688 if (count > 0)
689 while (start != end)
690 {
691 /* Our innermost scanning loop is very simple; it doesn't know
692 about gaps, buffer ends, or the newline cache. ceiling is
693 the position of the last character before the next such
694 obstacle --- the last character the dumb search loop should
695 examine. */
696 EMACS_INT ceiling_byte = CHAR_TO_BYTE (end) - 1;
697 EMACS_INT start_byte = CHAR_TO_BYTE (start);
698 EMACS_INT tem;
699
700 /* If we're looking for a newline, consult the newline cache
701 to see where we can avoid some scanning. */
702 if (target == '\n' && newline_cache)
703 {
704 int next_change;
705 immediate_quit = 0;
706 while (region_cache_forward
707 (current_buffer, newline_cache, start_byte, &next_change))
708 start_byte = next_change;
709 immediate_quit = allow_quit;
710
711 /* START should never be after END. */
712 if (start_byte > ceiling_byte)
713 start_byte = ceiling_byte;
714
715 /* Now the text after start is an unknown region, and
716 next_change is the position of the next known region. */
717 ceiling_byte = min (next_change - 1, ceiling_byte);
718 }
719
720 /* The dumb loop can only scan text stored in contiguous
721 bytes. BUFFER_CEILING_OF returns the last character
722 position that is contiguous, so the ceiling is the
723 position after that. */
724 tem = BUFFER_CEILING_OF (start_byte);
725 ceiling_byte = min (tem, ceiling_byte);
726
727 {
728 /* The termination address of the dumb loop. */
729 register unsigned char *ceiling_addr
730 = BYTE_POS_ADDR (ceiling_byte) + 1;
731 register unsigned char *cursor
732 = BYTE_POS_ADDR (start_byte);
733 unsigned char *base = cursor;
734
735 while (cursor < ceiling_addr)
736 {
737 unsigned char *scan_start = cursor;
738
739 /* The dumb loop. */
740 while (*cursor != target && ++cursor < ceiling_addr)
741 ;
742
743 /* If we're looking for newlines, cache the fact that
744 the region from start to cursor is free of them. */
745 if (target == '\n' && newline_cache)
746 know_region_cache (current_buffer, newline_cache,
747 start_byte + scan_start - base,
748 start_byte + cursor - base);
749
750 /* Did we find the target character? */
751 if (cursor < ceiling_addr)
752 {
753 if (--count == 0)
754 {
755 immediate_quit = 0;
756 return BYTE_TO_CHAR (start_byte + cursor - base + 1);
757 }
758 cursor++;
759 }
760 }
761
762 start = BYTE_TO_CHAR (start_byte + cursor - base);
763 }
764 }
765 else
766 while (start > end)
767 {
768 /* The last character to check before the next obstacle. */
769 EMACS_INT ceiling_byte = CHAR_TO_BYTE (end);
770 EMACS_INT start_byte = CHAR_TO_BYTE (start);
771 EMACS_INT tem;
772
773 /* Consult the newline cache, if appropriate. */
774 if (target == '\n' && newline_cache)
775 {
776 int next_change;
777 immediate_quit = 0;
778 while (region_cache_backward
779 (current_buffer, newline_cache, start_byte, &next_change))
780 start_byte = next_change;
781 immediate_quit = allow_quit;
782
783 /* Start should never be at or before end. */
784 if (start_byte <= ceiling_byte)
785 start_byte = ceiling_byte + 1;
786
787 /* Now the text before start is an unknown region, and
788 next_change is the position of the next known region. */
789 ceiling_byte = max (next_change, ceiling_byte);
790 }
791
792 /* Stop scanning before the gap. */
793 tem = BUFFER_FLOOR_OF (start_byte - 1);
794 ceiling_byte = max (tem, ceiling_byte);
795
796 {
797 /* The termination address of the dumb loop. */
798 register unsigned char *ceiling_addr = BYTE_POS_ADDR (ceiling_byte);
799 register unsigned char *cursor = BYTE_POS_ADDR (start_byte - 1);
800 unsigned char *base = cursor;
801
802 while (cursor >= ceiling_addr)
803 {
804 unsigned char *scan_start = cursor;
805
806 while (*cursor != target && --cursor >= ceiling_addr)
807 ;
808
809 /* If we're looking for newlines, cache the fact that
810 the region from after the cursor to start is free of them. */
811 if (target == '\n' && newline_cache)
812 know_region_cache (current_buffer, newline_cache,
813 start_byte + cursor - base,
814 start_byte + scan_start - base);
815
816 /* Did we find the target character? */
817 if (cursor >= ceiling_addr)
818 {
819 if (++count >= 0)
820 {
821 immediate_quit = 0;
822 return BYTE_TO_CHAR (start_byte + cursor - base);
823 }
824 cursor--;
825 }
826 }
827
828 start = BYTE_TO_CHAR (start_byte + cursor - base);
829 }
830 }
831
832 immediate_quit = 0;
833 if (shortage != 0)
834 *shortage = count * direction;
835 return start;
836 }
837 \f
838 /* Search for COUNT instances of a line boundary, which means either a
839 newline or (if selective display enabled) a carriage return.
840 Start at START. If COUNT is negative, search backwards.
841
842 We report the resulting position by calling TEMP_SET_PT_BOTH.
843
844 If we find COUNT instances. we position after (always after,
845 even if scanning backwards) the COUNTth match, and return 0.
846
847 If we don't find COUNT instances before reaching the end of the
848 buffer (or the beginning, if scanning backwards), we return
849 the number of line boundaries left unfound, and position at
850 the limit we bumped up against.
851
852 If ALLOW_QUIT is non-zero, set immediate_quit. That's good to do
853 except in special cases. */
854
855 int
856 scan_newline (EMACS_INT start, EMACS_INT start_byte, EMACS_INT limit, EMACS_INT limit_byte, register int count, int allow_quit)
857 {
858 int direction = ((count > 0) ? 1 : -1);
859
860 register unsigned char *cursor;
861 unsigned char *base;
862
863 EMACS_INT ceiling;
864 register unsigned char *ceiling_addr;
865
866 int old_immediate_quit = immediate_quit;
867
868 /* The code that follows is like scan_buffer
869 but checks for either newline or carriage return. */
870
871 if (allow_quit)
872 immediate_quit++;
873
874 start_byte = CHAR_TO_BYTE (start);
875
876 if (count > 0)
877 {
878 while (start_byte < limit_byte)
879 {
880 ceiling = BUFFER_CEILING_OF (start_byte);
881 ceiling = min (limit_byte - 1, ceiling);
882 ceiling_addr = BYTE_POS_ADDR (ceiling) + 1;
883 base = (cursor = BYTE_POS_ADDR (start_byte));
884 while (1)
885 {
886 while (*cursor != '\n' && ++cursor != ceiling_addr)
887 ;
888
889 if (cursor != ceiling_addr)
890 {
891 if (--count == 0)
892 {
893 immediate_quit = old_immediate_quit;
894 start_byte = start_byte + cursor - base + 1;
895 start = BYTE_TO_CHAR (start_byte);
896 TEMP_SET_PT_BOTH (start, start_byte);
897 return 0;
898 }
899 else
900 if (++cursor == ceiling_addr)
901 break;
902 }
903 else
904 break;
905 }
906 start_byte += cursor - base;
907 }
908 }
909 else
910 {
911 while (start_byte > limit_byte)
912 {
913 ceiling = BUFFER_FLOOR_OF (start_byte - 1);
914 ceiling = max (limit_byte, ceiling);
915 ceiling_addr = BYTE_POS_ADDR (ceiling) - 1;
916 base = (cursor = BYTE_POS_ADDR (start_byte - 1) + 1);
917 while (1)
918 {
919 while (--cursor != ceiling_addr && *cursor != '\n')
920 ;
921
922 if (cursor != ceiling_addr)
923 {
924 if (++count == 0)
925 {
926 immediate_quit = old_immediate_quit;
927 /* Return the position AFTER the match we found. */
928 start_byte = start_byte + cursor - base + 1;
929 start = BYTE_TO_CHAR (start_byte);
930 TEMP_SET_PT_BOTH (start, start_byte);
931 return 0;
932 }
933 }
934 else
935 break;
936 }
937 /* Here we add 1 to compensate for the last decrement
938 of CURSOR, which took it past the valid range. */
939 start_byte += cursor - base + 1;
940 }
941 }
942
943 TEMP_SET_PT_BOTH (limit, limit_byte);
944 immediate_quit = old_immediate_quit;
945
946 return count * direction;
947 }
948
949 int
950 find_next_newline_no_quit (EMACS_INT from, int cnt)
951 {
952 return scan_buffer ('\n', from, 0, cnt, (int *) 0, 0);
953 }
954
955 /* Like find_next_newline, but returns position before the newline,
956 not after, and only search up to TO. This isn't just
957 find_next_newline (...)-1, because you might hit TO. */
958
959 int
960 find_before_next_newline (EMACS_INT from, EMACS_INT to, int cnt)
961 {
962 int shortage;
963 int pos = scan_buffer ('\n', from, to, cnt, &shortage, 1);
964
965 if (shortage == 0)
966 pos--;
967
968 return pos;
969 }
970 \f
971 /* Subroutines of Lisp buffer search functions. */
972
973 static Lisp_Object
974 search_command (Lisp_Object string, Lisp_Object bound, Lisp_Object noerror, Lisp_Object count, int direction, int RE, int posix)
975 {
976 register int np;
977 int lim, lim_byte;
978 int n = direction;
979
980 if (!NILP (count))
981 {
982 CHECK_NUMBER (count);
983 n *= XINT (count);
984 }
985
986 CHECK_STRING (string);
987 if (NILP (bound))
988 {
989 if (n > 0)
990 lim = ZV, lim_byte = ZV_BYTE;
991 else
992 lim = BEGV, lim_byte = BEGV_BYTE;
993 }
994 else
995 {
996 CHECK_NUMBER_COERCE_MARKER (bound);
997 lim = XINT (bound);
998 if (n > 0 ? lim < PT : lim > PT)
999 error ("Invalid search bound (wrong side of point)");
1000 if (lim > ZV)
1001 lim = ZV, lim_byte = ZV_BYTE;
1002 else if (lim < BEGV)
1003 lim = BEGV, lim_byte = BEGV_BYTE;
1004 else
1005 lim_byte = CHAR_TO_BYTE (lim);
1006 }
1007
1008 /* This is so set_image_of_range_1 in regex.c can find the EQV table. */
1009 XCHAR_TABLE (current_buffer->case_canon_table)->extras[2]
1010 = current_buffer->case_eqv_table;
1011
1012 np = search_buffer (string, PT, PT_BYTE, lim, lim_byte, n, RE,
1013 (!NILP (current_buffer->case_fold_search)
1014 ? current_buffer->case_canon_table
1015 : Qnil),
1016 (!NILP (current_buffer->case_fold_search)
1017 ? current_buffer->case_eqv_table
1018 : Qnil),
1019 posix);
1020 if (np <= 0)
1021 {
1022 if (NILP (noerror))
1023 xsignal1 (Qsearch_failed, string);
1024
1025 if (!EQ (noerror, Qt))
1026 {
1027 if (lim < BEGV || lim > ZV)
1028 abort ();
1029 SET_PT_BOTH (lim, lim_byte);
1030 return Qnil;
1031 #if 0 /* This would be clean, but maybe programs depend on
1032 a value of nil here. */
1033 np = lim;
1034 #endif
1035 }
1036 else
1037 return Qnil;
1038 }
1039
1040 if (np < BEGV || np > ZV)
1041 abort ();
1042
1043 SET_PT (np);
1044
1045 return make_number (np);
1046 }
1047 \f
1048 /* Return 1 if REGEXP it matches just one constant string. */
1049
1050 static int
1051 trivial_regexp_p (Lisp_Object regexp)
1052 {
1053 int len = SBYTES (regexp);
1054 unsigned char *s = SDATA (regexp);
1055 while (--len >= 0)
1056 {
1057 switch (*s++)
1058 {
1059 case '.': case '*': case '+': case '?': case '[': case '^': case '$':
1060 return 0;
1061 case '\\':
1062 if (--len < 0)
1063 return 0;
1064 switch (*s++)
1065 {
1066 case '|': case '(': case ')': case '`': case '\'': case 'b':
1067 case 'B': case '<': case '>': case 'w': case 'W': case 's':
1068 case 'S': case '=': case '{': case '}': case '_':
1069 case 'c': case 'C': /* for categoryspec and notcategoryspec */
1070 case '1': case '2': case '3': case '4': case '5':
1071 case '6': case '7': case '8': case '9':
1072 return 0;
1073 }
1074 }
1075 }
1076 return 1;
1077 }
1078
1079 /* Search for the n'th occurrence of STRING in the current buffer,
1080 starting at position POS and stopping at position LIM,
1081 treating STRING as a literal string if RE is false or as
1082 a regular expression if RE is true.
1083
1084 If N is positive, searching is forward and LIM must be greater than POS.
1085 If N is negative, searching is backward and LIM must be less than POS.
1086
1087 Returns -x if x occurrences remain to be found (x > 0),
1088 or else the position at the beginning of the Nth occurrence
1089 (if searching backward) or the end (if searching forward).
1090
1091 POSIX is nonzero if we want full backtracking (POSIX style)
1092 for this pattern. 0 means backtrack only enough to get a valid match. */
1093
1094 #define TRANSLATE(out, trt, d) \
1095 do \
1096 { \
1097 if (! NILP (trt)) \
1098 { \
1099 Lisp_Object temp; \
1100 temp = Faref (trt, make_number (d)); \
1101 if (INTEGERP (temp)) \
1102 out = XINT (temp); \
1103 else \
1104 out = d; \
1105 } \
1106 else \
1107 out = d; \
1108 } \
1109 while (0)
1110
1111 /* Only used in search_buffer, to record the end position of the match
1112 when searching regexps and SEARCH_REGS should not be changed
1113 (i.e. Vinhibit_changing_match_data is non-nil). */
1114 static struct re_registers search_regs_1;
1115
1116 static EMACS_INT
1117 search_buffer (string, pos, pos_byte, lim, lim_byte, n,
1118 RE, trt, inverse_trt, posix)
1119 Lisp_Object string;
1120 EMACS_INT pos;
1121 EMACS_INT pos_byte;
1122 EMACS_INT lim;
1123 EMACS_INT lim_byte;
1124 int n;
1125 int RE;
1126 Lisp_Object trt;
1127 Lisp_Object inverse_trt;
1128 int posix;
1129 {
1130 int len = SCHARS (string);
1131 int len_byte = SBYTES (string);
1132 register int i;
1133
1134 if (running_asynch_code)
1135 save_search_regs ();
1136
1137 /* Searching 0 times means don't move. */
1138 /* Null string is found at starting position. */
1139 if (len == 0 || n == 0)
1140 {
1141 set_search_regs (pos_byte, 0);
1142 return pos;
1143 }
1144
1145 if (RE && !(trivial_regexp_p (string) && NILP (Vsearch_spaces_regexp)))
1146 {
1147 unsigned char *p1, *p2;
1148 int s1, s2;
1149 struct re_pattern_buffer *bufp;
1150
1151 bufp = compile_pattern (string,
1152 (NILP (Vinhibit_changing_match_data)
1153 ? &search_regs : &search_regs_1),
1154 trt, posix,
1155 !NILP (current_buffer->enable_multibyte_characters));
1156
1157 immediate_quit = 1; /* Quit immediately if user types ^G,
1158 because letting this function finish
1159 can take too long. */
1160 QUIT; /* Do a pending quit right away,
1161 to avoid paradoxical behavior */
1162 /* Get pointers and sizes of the two strings
1163 that make up the visible portion of the buffer. */
1164
1165 p1 = BEGV_ADDR;
1166 s1 = GPT_BYTE - BEGV_BYTE;
1167 p2 = GAP_END_ADDR;
1168 s2 = ZV_BYTE - GPT_BYTE;
1169 if (s1 < 0)
1170 {
1171 p2 = p1;
1172 s2 = ZV_BYTE - BEGV_BYTE;
1173 s1 = 0;
1174 }
1175 if (s2 < 0)
1176 {
1177 s1 = ZV_BYTE - BEGV_BYTE;
1178 s2 = 0;
1179 }
1180 re_match_object = Qnil;
1181
1182 while (n < 0)
1183 {
1184 int val;
1185 val = re_search_2 (bufp, (char *) p1, s1, (char *) p2, s2,
1186 pos_byte - BEGV_BYTE, lim_byte - pos_byte,
1187 (NILP (Vinhibit_changing_match_data)
1188 ? &search_regs : &search_regs_1),
1189 /* Don't allow match past current point */
1190 pos_byte - BEGV_BYTE);
1191 if (val == -2)
1192 {
1193 matcher_overflow ();
1194 }
1195 if (val >= 0)
1196 {
1197 if (NILP (Vinhibit_changing_match_data))
1198 {
1199 pos_byte = search_regs.start[0] + BEGV_BYTE;
1200 for (i = 0; i < search_regs.num_regs; i++)
1201 if (search_regs.start[i] >= 0)
1202 {
1203 search_regs.start[i]
1204 = BYTE_TO_CHAR (search_regs.start[i] + BEGV_BYTE);
1205 search_regs.end[i]
1206 = BYTE_TO_CHAR (search_regs.end[i] + BEGV_BYTE);
1207 }
1208 XSETBUFFER (last_thing_searched, current_buffer);
1209 /* Set pos to the new position. */
1210 pos = search_regs.start[0];
1211 }
1212 else
1213 {
1214 pos_byte = search_regs_1.start[0] + BEGV_BYTE;
1215 /* Set pos to the new position. */
1216 pos = BYTE_TO_CHAR (search_regs_1.start[0] + BEGV_BYTE);
1217 }
1218 }
1219 else
1220 {
1221 immediate_quit = 0;
1222 return (n);
1223 }
1224 n++;
1225 }
1226 while (n > 0)
1227 {
1228 int val;
1229 val = re_search_2 (bufp, (char *) p1, s1, (char *) p2, s2,
1230 pos_byte - BEGV_BYTE, lim_byte - pos_byte,
1231 (NILP (Vinhibit_changing_match_data)
1232 ? &search_regs : &search_regs_1),
1233 lim_byte - BEGV_BYTE);
1234 if (val == -2)
1235 {
1236 matcher_overflow ();
1237 }
1238 if (val >= 0)
1239 {
1240 if (NILP (Vinhibit_changing_match_data))
1241 {
1242 pos_byte = search_regs.end[0] + BEGV_BYTE;
1243 for (i = 0; i < search_regs.num_regs; i++)
1244 if (search_regs.start[i] >= 0)
1245 {
1246 search_regs.start[i]
1247 = BYTE_TO_CHAR (search_regs.start[i] + BEGV_BYTE);
1248 search_regs.end[i]
1249 = BYTE_TO_CHAR (search_regs.end[i] + BEGV_BYTE);
1250 }
1251 XSETBUFFER (last_thing_searched, current_buffer);
1252 pos = search_regs.end[0];
1253 }
1254 else
1255 {
1256 pos_byte = search_regs_1.end[0] + BEGV_BYTE;
1257 pos = BYTE_TO_CHAR (search_regs_1.end[0] + BEGV_BYTE);
1258 }
1259 }
1260 else
1261 {
1262 immediate_quit = 0;
1263 return (0 - n);
1264 }
1265 n--;
1266 }
1267 immediate_quit = 0;
1268 return (pos);
1269 }
1270 else /* non-RE case */
1271 {
1272 unsigned char *raw_pattern, *pat;
1273 int raw_pattern_size;
1274 int raw_pattern_size_byte;
1275 unsigned char *patbuf;
1276 int multibyte = !NILP (current_buffer->enable_multibyte_characters);
1277 unsigned char *base_pat;
1278 /* Set to positive if we find a non-ASCII char that need
1279 translation. Otherwise set to zero later. */
1280 int char_base = -1;
1281 int boyer_moore_ok = 1;
1282
1283 /* MULTIBYTE says whether the text to be searched is multibyte.
1284 We must convert PATTERN to match that, or we will not really
1285 find things right. */
1286
1287 if (multibyte == STRING_MULTIBYTE (string))
1288 {
1289 raw_pattern = (unsigned char *) SDATA (string);
1290 raw_pattern_size = SCHARS (string);
1291 raw_pattern_size_byte = SBYTES (string);
1292 }
1293 else if (multibyte)
1294 {
1295 raw_pattern_size = SCHARS (string);
1296 raw_pattern_size_byte
1297 = count_size_as_multibyte (SDATA (string),
1298 raw_pattern_size);
1299 raw_pattern = (unsigned char *) alloca (raw_pattern_size_byte + 1);
1300 copy_text (SDATA (string), raw_pattern,
1301 SCHARS (string), 0, 1);
1302 }
1303 else
1304 {
1305 /* Converting multibyte to single-byte.
1306
1307 ??? Perhaps this conversion should be done in a special way
1308 by subtracting nonascii-insert-offset from each non-ASCII char,
1309 so that only the multibyte chars which really correspond to
1310 the chosen single-byte character set can possibly match. */
1311 raw_pattern_size = SCHARS (string);
1312 raw_pattern_size_byte = SCHARS (string);
1313 raw_pattern = (unsigned char *) alloca (raw_pattern_size + 1);
1314 copy_text (SDATA (string), raw_pattern,
1315 SBYTES (string), 1, 0);
1316 }
1317
1318 /* Copy and optionally translate the pattern. */
1319 len = raw_pattern_size;
1320 len_byte = raw_pattern_size_byte;
1321 patbuf = (unsigned char *) alloca (len * MAX_MULTIBYTE_LENGTH);
1322 pat = patbuf;
1323 base_pat = raw_pattern;
1324 if (multibyte)
1325 {
1326 /* Fill patbuf by translated characters in STRING while
1327 checking if we can use boyer-moore search. If TRT is
1328 non-nil, we can use boyer-moore search only if TRT can be
1329 represented by the byte array of 256 elements. For that,
1330 all non-ASCII case-equivalents of all case-senstive
1331 characters in STRING must belong to the same charset and
1332 row. */
1333
1334 while (--len >= 0)
1335 {
1336 unsigned char str_base[MAX_MULTIBYTE_LENGTH], *str;
1337 int c, translated, inverse;
1338 int in_charlen, charlen;
1339
1340 /* If we got here and the RE flag is set, it's because we're
1341 dealing with a regexp known to be trivial, so the backslash
1342 just quotes the next character. */
1343 if (RE && *base_pat == '\\')
1344 {
1345 len--;
1346 raw_pattern_size--;
1347 len_byte--;
1348 base_pat++;
1349 }
1350
1351 c = STRING_CHAR_AND_LENGTH (base_pat, in_charlen);
1352
1353 if (NILP (trt))
1354 {
1355 str = base_pat;
1356 charlen = in_charlen;
1357 }
1358 else
1359 {
1360 /* Translate the character. */
1361 TRANSLATE (translated, trt, c);
1362 charlen = CHAR_STRING (translated, str_base);
1363 str = str_base;
1364
1365 /* Check if C has any other case-equivalents. */
1366 TRANSLATE (inverse, inverse_trt, c);
1367 /* If so, check if we can use boyer-moore. */
1368 if (c != inverse && boyer_moore_ok)
1369 {
1370 /* Check if all equivalents belong to the same
1371 group of characters. Note that the check of C
1372 itself is done by the last iteration. */
1373 int this_char_base = -1;
1374
1375 while (boyer_moore_ok)
1376 {
1377 if (ASCII_BYTE_P (inverse))
1378 {
1379 if (this_char_base > 0)
1380 boyer_moore_ok = 0;
1381 else
1382 this_char_base = 0;
1383 }
1384 else if (CHAR_BYTE8_P (inverse))
1385 /* Boyer-moore search can't handle a
1386 translation of an eight-bit
1387 character. */
1388 boyer_moore_ok = 0;
1389 else if (this_char_base < 0)
1390 {
1391 this_char_base = inverse & ~0x3F;
1392 if (char_base < 0)
1393 char_base = this_char_base;
1394 else if (this_char_base != char_base)
1395 boyer_moore_ok = 0;
1396 }
1397 else if ((inverse & ~0x3F) != this_char_base)
1398 boyer_moore_ok = 0;
1399 if (c == inverse)
1400 break;
1401 TRANSLATE (inverse, inverse_trt, inverse);
1402 }
1403 }
1404 }
1405
1406 /* Store this character into the translated pattern. */
1407 memcpy (pat, str, charlen);
1408 pat += charlen;
1409 base_pat += in_charlen;
1410 len_byte -= in_charlen;
1411 }
1412
1413 /* If char_base is still negative we didn't find any translated
1414 non-ASCII characters. */
1415 if (char_base < 0)
1416 char_base = 0;
1417 }
1418 else
1419 {
1420 /* Unibyte buffer. */
1421 char_base = 0;
1422 while (--len >= 0)
1423 {
1424 int c, translated;
1425
1426 /* If we got here and the RE flag is set, it's because we're
1427 dealing with a regexp known to be trivial, so the backslash
1428 just quotes the next character. */
1429 if (RE && *base_pat == '\\')
1430 {
1431 len--;
1432 raw_pattern_size--;
1433 base_pat++;
1434 }
1435 c = *base_pat++;
1436 TRANSLATE (translated, trt, c);
1437 *pat++ = translated;
1438 }
1439 }
1440
1441 len_byte = pat - patbuf;
1442 len = raw_pattern_size;
1443 pat = base_pat = patbuf;
1444
1445 if (boyer_moore_ok)
1446 return boyer_moore (n, pat, len, len_byte, trt, inverse_trt,
1447 pos, pos_byte, lim, lim_byte,
1448 char_base);
1449 else
1450 return simple_search (n, pat, len, len_byte, trt,
1451 pos, pos_byte, lim, lim_byte);
1452 }
1453 }
1454 \f
1455 /* Do a simple string search N times for the string PAT,
1456 whose length is LEN/LEN_BYTE,
1457 from buffer position POS/POS_BYTE until LIM/LIM_BYTE.
1458 TRT is the translation table.
1459
1460 Return the character position where the match is found.
1461 Otherwise, if M matches remained to be found, return -M.
1462
1463 This kind of search works regardless of what is in PAT and
1464 regardless of what is in TRT. It is used in cases where
1465 boyer_moore cannot work. */
1466
1467 static EMACS_INT
1468 simple_search (int n, unsigned char *pat, int len, int len_byte, Lisp_Object trt, EMACS_INT pos, EMACS_INT pos_byte, EMACS_INT lim, EMACS_INT lim_byte)
1469 {
1470 int multibyte = ! NILP (current_buffer->enable_multibyte_characters);
1471 int forward = n > 0;
1472 /* Number of buffer bytes matched. Note that this may be different
1473 from len_byte in a multibyte buffer. */
1474 int match_byte;
1475
1476 if (lim > pos && multibyte)
1477 while (n > 0)
1478 {
1479 while (1)
1480 {
1481 /* Try matching at position POS. */
1482 EMACS_INT this_pos = pos;
1483 EMACS_INT this_pos_byte = pos_byte;
1484 int this_len = len;
1485 unsigned char *p = pat;
1486 if (pos + len > lim || pos_byte + len_byte > lim_byte)
1487 goto stop;
1488
1489 while (this_len > 0)
1490 {
1491 int charlen, buf_charlen;
1492 int pat_ch, buf_ch;
1493
1494 pat_ch = STRING_CHAR_AND_LENGTH (p, charlen);
1495 buf_ch = STRING_CHAR_AND_LENGTH (BYTE_POS_ADDR (this_pos_byte),
1496 buf_charlen);
1497 TRANSLATE (buf_ch, trt, buf_ch);
1498
1499 if (buf_ch != pat_ch)
1500 break;
1501
1502 this_len--;
1503 p += charlen;
1504
1505 this_pos_byte += buf_charlen;
1506 this_pos++;
1507 }
1508
1509 if (this_len == 0)
1510 {
1511 match_byte = this_pos_byte - pos_byte;
1512 pos += len;
1513 pos_byte += match_byte;
1514 break;
1515 }
1516
1517 INC_BOTH (pos, pos_byte);
1518 }
1519
1520 n--;
1521 }
1522 else if (lim > pos)
1523 while (n > 0)
1524 {
1525 while (1)
1526 {
1527 /* Try matching at position POS. */
1528 EMACS_INT this_pos = pos;
1529 int this_len = len;
1530 unsigned char *p = pat;
1531
1532 if (pos + len > lim)
1533 goto stop;
1534
1535 while (this_len > 0)
1536 {
1537 int pat_ch = *p++;
1538 int buf_ch = FETCH_BYTE (this_pos);
1539 TRANSLATE (buf_ch, trt, buf_ch);
1540
1541 if (buf_ch != pat_ch)
1542 break;
1543
1544 this_len--;
1545 this_pos++;
1546 }
1547
1548 if (this_len == 0)
1549 {
1550 match_byte = len;
1551 pos += len;
1552 break;
1553 }
1554
1555 pos++;
1556 }
1557
1558 n--;
1559 }
1560 /* Backwards search. */
1561 else if (lim < pos && multibyte)
1562 while (n < 0)
1563 {
1564 while (1)
1565 {
1566 /* Try matching at position POS. */
1567 EMACS_INT this_pos = pos;
1568 EMACS_INT this_pos_byte = pos_byte;
1569 int this_len = len;
1570 const unsigned char *p = pat + len_byte;
1571
1572 if (this_pos - len < lim || (pos_byte - len_byte) < lim_byte)
1573 goto stop;
1574
1575 while (this_len > 0)
1576 {
1577 int charlen;
1578 int pat_ch, buf_ch;
1579
1580 DEC_BOTH (this_pos, this_pos_byte);
1581 PREV_CHAR_BOUNDARY (p, pat);
1582 pat_ch = STRING_CHAR (p);
1583 buf_ch = STRING_CHAR (BYTE_POS_ADDR (this_pos_byte));
1584 TRANSLATE (buf_ch, trt, buf_ch);
1585
1586 if (buf_ch != pat_ch)
1587 break;
1588
1589 this_len--;
1590 }
1591
1592 if (this_len == 0)
1593 {
1594 match_byte = pos_byte - this_pos_byte;
1595 pos = this_pos;
1596 pos_byte = this_pos_byte;
1597 break;
1598 }
1599
1600 DEC_BOTH (pos, pos_byte);
1601 }
1602
1603 n++;
1604 }
1605 else if (lim < pos)
1606 while (n < 0)
1607 {
1608 while (1)
1609 {
1610 /* Try matching at position POS. */
1611 EMACS_INT this_pos = pos - len;
1612 int this_len = len;
1613 unsigned char *p = pat;
1614
1615 if (this_pos < lim)
1616 goto stop;
1617
1618 while (this_len > 0)
1619 {
1620 int pat_ch = *p++;
1621 int buf_ch = FETCH_BYTE (this_pos);
1622 TRANSLATE (buf_ch, trt, buf_ch);
1623
1624 if (buf_ch != pat_ch)
1625 break;
1626 this_len--;
1627 this_pos++;
1628 }
1629
1630 if (this_len == 0)
1631 {
1632 match_byte = len;
1633 pos -= len;
1634 break;
1635 }
1636
1637 pos--;
1638 }
1639
1640 n++;
1641 }
1642
1643 stop:
1644 if (n == 0)
1645 {
1646 if (forward)
1647 set_search_regs ((multibyte ? pos_byte : pos) - match_byte, match_byte);
1648 else
1649 set_search_regs (multibyte ? pos_byte : pos, match_byte);
1650
1651 return pos;
1652 }
1653 else if (n > 0)
1654 return -n;
1655 else
1656 return n;
1657 }
1658 \f
1659 /* Do Boyer-Moore search N times for the string BASE_PAT,
1660 whose length is LEN/LEN_BYTE,
1661 from buffer position POS/POS_BYTE until LIM/LIM_BYTE.
1662 DIRECTION says which direction we search in.
1663 TRT and INVERSE_TRT are translation tables.
1664 Characters in PAT are already translated by TRT.
1665
1666 This kind of search works if all the characters in BASE_PAT that
1667 have nontrivial translation are the same aside from the last byte.
1668 This makes it possible to translate just the last byte of a
1669 character, and do so after just a simple test of the context.
1670 CHAR_BASE is nonzero if there is such a non-ASCII character.
1671
1672 If that criterion is not satisfied, do not call this function. */
1673
1674 static EMACS_INT
1675 boyer_moore (n, base_pat, len, len_byte, trt, inverse_trt,
1676 pos, pos_byte, lim, lim_byte, char_base)
1677 int n;
1678 unsigned char *base_pat;
1679 int len, len_byte;
1680 Lisp_Object trt;
1681 Lisp_Object inverse_trt;
1682 EMACS_INT pos, pos_byte;
1683 EMACS_INT lim, lim_byte;
1684 int char_base;
1685 {
1686 int direction = ((n > 0) ? 1 : -1);
1687 register int dirlen;
1688 EMACS_INT limit;
1689 int stride_for_teases = 0;
1690 int BM_tab[0400];
1691 register unsigned char *cursor, *p_limit;
1692 register int i, j;
1693 unsigned char *pat, *pat_end;
1694 int multibyte = ! NILP (current_buffer->enable_multibyte_characters);
1695
1696 unsigned char simple_translate[0400];
1697 /* These are set to the preceding bytes of a byte to be translated
1698 if char_base is nonzero. As the maximum byte length of a
1699 multibyte character is 5, we have to check at most four previous
1700 bytes. */
1701 int translate_prev_byte1 = 0;
1702 int translate_prev_byte2 = 0;
1703 int translate_prev_byte3 = 0;
1704 int translate_prev_byte4 = 0;
1705
1706 /* The general approach is that we are going to maintain that we know
1707 the first (closest to the present position, in whatever direction
1708 we're searching) character that could possibly be the last
1709 (furthest from present position) character of a valid match. We
1710 advance the state of our knowledge by looking at that character
1711 and seeing whether it indeed matches the last character of the
1712 pattern. If it does, we take a closer look. If it does not, we
1713 move our pointer (to putative last characters) as far as is
1714 logically possible. This amount of movement, which I call a
1715 stride, will be the length of the pattern if the actual character
1716 appears nowhere in the pattern, otherwise it will be the distance
1717 from the last occurrence of that character to the end of the
1718 pattern. If the amount is zero we have a possible match. */
1719
1720 /* Here we make a "mickey mouse" BM table. The stride of the search
1721 is determined only by the last character of the putative match.
1722 If that character does not match, we will stride the proper
1723 distance to propose a match that superimposes it on the last
1724 instance of a character that matches it (per trt), or misses
1725 it entirely if there is none. */
1726
1727 dirlen = len_byte * direction;
1728
1729 /* Record position after the end of the pattern. */
1730 pat_end = base_pat + len_byte;
1731 /* BASE_PAT points to a character that we start scanning from.
1732 It is the first character in a forward search,
1733 the last character in a backward search. */
1734 if (direction < 0)
1735 base_pat = pat_end - 1;
1736
1737 /* A character that does not appear in the pattern induces a
1738 stride equal to the pattern length. */
1739 for (i = 0; i < 0400; i++)
1740 BM_tab[i] = dirlen;
1741
1742 /* We use this for translation, instead of TRT itself.
1743 We fill this in to handle the characters that actually
1744 occur in the pattern. Others don't matter anyway! */
1745 for (i = 0; i < 0400; i++)
1746 simple_translate[i] = i;
1747
1748 if (char_base)
1749 {
1750 /* Setup translate_prev_byte1/2/3/4 from CHAR_BASE. Only a
1751 byte following them are the target of translation. */
1752 unsigned char str[MAX_MULTIBYTE_LENGTH];
1753 int len = CHAR_STRING (char_base, str);
1754
1755 translate_prev_byte1 = str[len - 2];
1756 if (len > 2)
1757 {
1758 translate_prev_byte2 = str[len - 3];
1759 if (len > 3)
1760 {
1761 translate_prev_byte3 = str[len - 4];
1762 if (len > 4)
1763 translate_prev_byte4 = str[len - 5];
1764 }
1765 }
1766 }
1767
1768 i = 0;
1769 while (i != dirlen)
1770 {
1771 unsigned char *ptr = base_pat + i;
1772 i += direction;
1773 if (! NILP (trt))
1774 {
1775 /* If the byte currently looking at is the last of a
1776 character to check case-equivalents, set CH to that
1777 character. An ASCII character and a non-ASCII character
1778 matching with CHAR_BASE are to be checked. */
1779 int ch = -1;
1780
1781 if (ASCII_BYTE_P (*ptr) || ! multibyte)
1782 ch = *ptr;
1783 else if (char_base
1784 && ((pat_end - ptr) == 1 || CHAR_HEAD_P (ptr[1])))
1785 {
1786 unsigned char *charstart = ptr - 1;
1787
1788 while (! (CHAR_HEAD_P (*charstart)))
1789 charstart--;
1790 ch = STRING_CHAR (charstart);
1791 if (char_base != (ch & ~0x3F))
1792 ch = -1;
1793 }
1794
1795 if (ch >= 0200)
1796 j = (ch & 0x3F) | 0200;
1797 else
1798 j = *ptr;
1799
1800 if (i == dirlen)
1801 stride_for_teases = BM_tab[j];
1802
1803 BM_tab[j] = dirlen - i;
1804 /* A translation table is accompanied by its inverse -- see */
1805 /* comment following downcase_table for details */
1806 if (ch >= 0)
1807 {
1808 int starting_ch = ch;
1809 int starting_j = j;
1810
1811 while (1)
1812 {
1813 TRANSLATE (ch, inverse_trt, ch);
1814 if (ch >= 0200)
1815 j = (ch & 0x3F) | 0200;
1816 else
1817 j = ch;
1818
1819 /* For all the characters that map into CH,
1820 set up simple_translate to map the last byte
1821 into STARTING_J. */
1822 simple_translate[j] = starting_j;
1823 if (ch == starting_ch)
1824 break;
1825 BM_tab[j] = dirlen - i;
1826 }
1827 }
1828 }
1829 else
1830 {
1831 j = *ptr;
1832
1833 if (i == dirlen)
1834 stride_for_teases = BM_tab[j];
1835 BM_tab[j] = dirlen - i;
1836 }
1837 /* stride_for_teases tells how much to stride if we get a
1838 match on the far character but are subsequently
1839 disappointed, by recording what the stride would have been
1840 for that character if the last character had been
1841 different. */
1842 }
1843 pos_byte += dirlen - ((direction > 0) ? direction : 0);
1844 /* loop invariant - POS_BYTE points at where last char (first
1845 char if reverse) of pattern would align in a possible match. */
1846 while (n != 0)
1847 {
1848 EMACS_INT tail_end;
1849 unsigned char *tail_end_ptr;
1850
1851 /* It's been reported that some (broken) compiler thinks that
1852 Boolean expressions in an arithmetic context are unsigned.
1853 Using an explicit ?1:0 prevents this. */
1854 if ((lim_byte - pos_byte - ((direction > 0) ? 1 : 0)) * direction
1855 < 0)
1856 return (n * (0 - direction));
1857 /* First we do the part we can by pointers (maybe nothing) */
1858 QUIT;
1859 pat = base_pat;
1860 limit = pos_byte - dirlen + direction;
1861 if (direction > 0)
1862 {
1863 limit = BUFFER_CEILING_OF (limit);
1864 /* LIMIT is now the last (not beyond-last!) value POS_BYTE
1865 can take on without hitting edge of buffer or the gap. */
1866 limit = min (limit, pos_byte + 20000);
1867 limit = min (limit, lim_byte - 1);
1868 }
1869 else
1870 {
1871 limit = BUFFER_FLOOR_OF (limit);
1872 /* LIMIT is now the last (not beyond-last!) value POS_BYTE
1873 can take on without hitting edge of buffer or the gap. */
1874 limit = max (limit, pos_byte - 20000);
1875 limit = max (limit, lim_byte);
1876 }
1877 tail_end = BUFFER_CEILING_OF (pos_byte) + 1;
1878 tail_end_ptr = BYTE_POS_ADDR (tail_end);
1879
1880 if ((limit - pos_byte) * direction > 20)
1881 {
1882 unsigned char *p2;
1883
1884 p_limit = BYTE_POS_ADDR (limit);
1885 p2 = (cursor = BYTE_POS_ADDR (pos_byte));
1886 /* In this loop, pos + cursor - p2 is the surrogate for pos. */
1887 while (1) /* use one cursor setting as long as i can */
1888 {
1889 if (direction > 0) /* worth duplicating */
1890 {
1891 while (cursor <= p_limit)
1892 {
1893 if (BM_tab[*cursor] == 0)
1894 goto hit;
1895 cursor += BM_tab[*cursor];
1896 }
1897 }
1898 else
1899 {
1900 while (cursor >= p_limit)
1901 {
1902 if (BM_tab[*cursor] == 0)
1903 goto hit;
1904 cursor += BM_tab[*cursor];
1905 }
1906 }
1907 /* If you are here, cursor is beyond the end of the
1908 searched region. You fail to match within the
1909 permitted region and would otherwise try a character
1910 beyond that region. */
1911 break;
1912
1913 hit:
1914 i = dirlen - direction;
1915 if (! NILP (trt))
1916 {
1917 while ((i -= direction) + direction != 0)
1918 {
1919 int ch;
1920 cursor -= direction;
1921 /* Translate only the last byte of a character. */
1922 if (! multibyte
1923 || ((cursor == tail_end_ptr
1924 || CHAR_HEAD_P (cursor[1]))
1925 && (CHAR_HEAD_P (cursor[0])
1926 /* Check if this is the last byte of
1927 a translable character. */
1928 || (translate_prev_byte1 == cursor[-1]
1929 && (CHAR_HEAD_P (translate_prev_byte1)
1930 || (translate_prev_byte2 == cursor[-2]
1931 && (CHAR_HEAD_P (translate_prev_byte2)
1932 || (translate_prev_byte3 == cursor[-3]))))))))
1933 ch = simple_translate[*cursor];
1934 else
1935 ch = *cursor;
1936 if (pat[i] != ch)
1937 break;
1938 }
1939 }
1940 else
1941 {
1942 while ((i -= direction) + direction != 0)
1943 {
1944 cursor -= direction;
1945 if (pat[i] != *cursor)
1946 break;
1947 }
1948 }
1949 cursor += dirlen - i - direction; /* fix cursor */
1950 if (i + direction == 0)
1951 {
1952 EMACS_INT position, start, end;
1953
1954 cursor -= direction;
1955
1956 position = pos_byte + cursor - p2 + ((direction > 0)
1957 ? 1 - len_byte : 0);
1958 set_search_regs (position, len_byte);
1959
1960 if (NILP (Vinhibit_changing_match_data))
1961 {
1962 start = search_regs.start[0];
1963 end = search_regs.end[0];
1964 }
1965 else
1966 /* If Vinhibit_changing_match_data is non-nil,
1967 search_regs will not be changed. So let's
1968 compute start and end here. */
1969 {
1970 start = BYTE_TO_CHAR (position);
1971 end = BYTE_TO_CHAR (position + len_byte);
1972 }
1973
1974 if ((n -= direction) != 0)
1975 cursor += dirlen; /* to resume search */
1976 else
1977 return direction > 0 ? end : start;
1978 }
1979 else
1980 cursor += stride_for_teases; /* <sigh> we lose - */
1981 }
1982 pos_byte += cursor - p2;
1983 }
1984 else
1985 /* Now we'll pick up a clump that has to be done the hard
1986 way because it covers a discontinuity. */
1987 {
1988 limit = ((direction > 0)
1989 ? BUFFER_CEILING_OF (pos_byte - dirlen + 1)
1990 : BUFFER_FLOOR_OF (pos_byte - dirlen - 1));
1991 limit = ((direction > 0)
1992 ? min (limit + len_byte, lim_byte - 1)
1993 : max (limit - len_byte, lim_byte));
1994 /* LIMIT is now the last value POS_BYTE can have
1995 and still be valid for a possible match. */
1996 while (1)
1997 {
1998 /* This loop can be coded for space rather than
1999 speed because it will usually run only once.
2000 (the reach is at most len + 21, and typically
2001 does not exceed len). */
2002 while ((limit - pos_byte) * direction >= 0)
2003 {
2004 int ch = FETCH_BYTE (pos_byte);
2005 if (BM_tab[ch] == 0)
2006 goto hit2;
2007 pos_byte += BM_tab[ch];
2008 }
2009 break; /* ran off the end */
2010
2011 hit2:
2012 /* Found what might be a match. */
2013 i = dirlen - direction;
2014 while ((i -= direction) + direction != 0)
2015 {
2016 int ch;
2017 unsigned char *ptr;
2018 pos_byte -= direction;
2019 ptr = BYTE_POS_ADDR (pos_byte);
2020 /* Translate only the last byte of a character. */
2021 if (! multibyte
2022 || ((ptr == tail_end_ptr
2023 || CHAR_HEAD_P (ptr[1]))
2024 && (CHAR_HEAD_P (ptr[0])
2025 /* Check if this is the last byte of a
2026 translable character. */
2027 || (translate_prev_byte1 == ptr[-1]
2028 && (CHAR_HEAD_P (translate_prev_byte1)
2029 || (translate_prev_byte2 == ptr[-2]
2030 && (CHAR_HEAD_P (translate_prev_byte2)
2031 || translate_prev_byte3 == ptr[-3])))))))
2032 ch = simple_translate[*ptr];
2033 else
2034 ch = *ptr;
2035 if (pat[i] != ch)
2036 break;
2037 }
2038 /* Above loop has moved POS_BYTE part or all the way
2039 back to the first pos (last pos if reverse).
2040 Set it once again at the last (first if reverse) char. */
2041 pos_byte += dirlen - i - direction;
2042 if (i + direction == 0)
2043 {
2044 EMACS_INT position, start, end;
2045 pos_byte -= direction;
2046
2047 position = pos_byte + ((direction > 0) ? 1 - len_byte : 0);
2048 set_search_regs (position, len_byte);
2049
2050 if (NILP (Vinhibit_changing_match_data))
2051 {
2052 start = search_regs.start[0];
2053 end = search_regs.end[0];
2054 }
2055 else
2056 /* If Vinhibit_changing_match_data is non-nil,
2057 search_regs will not be changed. So let's
2058 compute start and end here. */
2059 {
2060 start = BYTE_TO_CHAR (position);
2061 end = BYTE_TO_CHAR (position + len_byte);
2062 }
2063
2064 if ((n -= direction) != 0)
2065 pos_byte += dirlen; /* to resume search */
2066 else
2067 return direction > 0 ? end : start;
2068 }
2069 else
2070 pos_byte += stride_for_teases;
2071 }
2072 }
2073 /* We have done one clump. Can we continue? */
2074 if ((lim_byte - pos_byte) * direction < 0)
2075 return ((0 - n) * direction);
2076 }
2077 return BYTE_TO_CHAR (pos_byte);
2078 }
2079
2080 /* Record beginning BEG_BYTE and end BEG_BYTE + NBYTES
2081 for the overall match just found in the current buffer.
2082 Also clear out the match data for registers 1 and up. */
2083
2084 static void
2085 set_search_regs (EMACS_INT beg_byte, EMACS_INT nbytes)
2086 {
2087 int i;
2088
2089 if (!NILP (Vinhibit_changing_match_data))
2090 return;
2091
2092 /* Make sure we have registers in which to store
2093 the match position. */
2094 if (search_regs.num_regs == 0)
2095 {
2096 search_regs.start = (regoff_t *) xmalloc (2 * sizeof (regoff_t));
2097 search_regs.end = (regoff_t *) xmalloc (2 * sizeof (regoff_t));
2098 search_regs.num_regs = 2;
2099 }
2100
2101 /* Clear out the other registers. */
2102 for (i = 1; i < search_regs.num_regs; i++)
2103 {
2104 search_regs.start[i] = -1;
2105 search_regs.end[i] = -1;
2106 }
2107
2108 search_regs.start[0] = BYTE_TO_CHAR (beg_byte);
2109 search_regs.end[0] = BYTE_TO_CHAR (beg_byte + nbytes);
2110 XSETBUFFER (last_thing_searched, current_buffer);
2111 }
2112 \f
2113 /* Given STRING, a string of words separated by word delimiters,
2114 compute a regexp that matches those exact words separated by
2115 arbitrary punctuation. If LAX is nonzero, the end of the string
2116 need not match a word boundary unless it ends in whitespace. */
2117
2118 static Lisp_Object
2119 wordify (Lisp_Object string, int lax)
2120 {
2121 register unsigned char *p, *o;
2122 register int i, i_byte, len, punct_count = 0, word_count = 0;
2123 Lisp_Object val;
2124 int prev_c = 0;
2125 int adjust, whitespace_at_end;
2126
2127 CHECK_STRING (string);
2128 p = SDATA (string);
2129 len = SCHARS (string);
2130
2131 for (i = 0, i_byte = 0; i < len; )
2132 {
2133 int c;
2134
2135 FETCH_STRING_CHAR_AS_MULTIBYTE_ADVANCE (c, string, i, i_byte);
2136
2137 if (SYNTAX (c) != Sword)
2138 {
2139 punct_count++;
2140 if (i > 0 && SYNTAX (prev_c) == Sword)
2141 word_count++;
2142 }
2143
2144 prev_c = c;
2145 }
2146
2147 if (SYNTAX (prev_c) == Sword)
2148 {
2149 word_count++;
2150 whitespace_at_end = 0;
2151 }
2152 else
2153 whitespace_at_end = 1;
2154
2155 if (!word_count)
2156 return empty_unibyte_string;
2157
2158 adjust = - punct_count + 5 * (word_count - 1)
2159 + ((lax && !whitespace_at_end) ? 2 : 4);
2160 if (STRING_MULTIBYTE (string))
2161 val = make_uninit_multibyte_string (len + adjust,
2162 SBYTES (string)
2163 + adjust);
2164 else
2165 val = make_uninit_string (len + adjust);
2166
2167 o = SDATA (val);
2168 *o++ = '\\';
2169 *o++ = 'b';
2170 prev_c = 0;
2171
2172 for (i = 0, i_byte = 0; i < len; )
2173 {
2174 int c;
2175 int i_byte_orig = i_byte;
2176
2177 FETCH_STRING_CHAR_AS_MULTIBYTE_ADVANCE (c, string, i, i_byte);
2178
2179 if (SYNTAX (c) == Sword)
2180 {
2181 memcpy (o, SDATA (string) + i_byte_orig, i_byte - i_byte_orig);
2182 o += i_byte - i_byte_orig;
2183 }
2184 else if (i > 0 && SYNTAX (prev_c) == Sword && --word_count)
2185 {
2186 *o++ = '\\';
2187 *o++ = 'W';
2188 *o++ = '\\';
2189 *o++ = 'W';
2190 *o++ = '*';
2191 }
2192
2193 prev_c = c;
2194 }
2195
2196 if (!lax || whitespace_at_end)
2197 {
2198 *o++ = '\\';
2199 *o++ = 'b';
2200 }
2201
2202 return val;
2203 }
2204 \f
2205 DEFUN ("search-backward", Fsearch_backward, Ssearch_backward, 1, 4,
2206 "MSearch backward: ",
2207 doc: /* Search backward from point for STRING.
2208 Set point to the beginning of the occurrence found, and return point.
2209 An optional second argument bounds the search; it is a buffer position.
2210 The match found must not extend before that position.
2211 Optional third argument, if t, means if fail just return nil (no error).
2212 If not nil and not t, position at limit of search and return nil.
2213 Optional fourth argument is repeat count--search for successive occurrences.
2214
2215 Search case-sensitivity is determined by the value of the variable
2216 `case-fold-search', which see.
2217
2218 See also the functions `match-beginning', `match-end' and `replace-match'. */)
2219 (string, bound, noerror, count)
2220 Lisp_Object string, bound, noerror, count;
2221 {
2222 return search_command (string, bound, noerror, count, -1, 0, 0);
2223 }
2224
2225 DEFUN ("search-forward", Fsearch_forward, Ssearch_forward, 1, 4, "MSearch: ",
2226 doc: /* Search forward from point for STRING.
2227 Set point to the end of the occurrence found, and return point.
2228 An optional second argument bounds the search; it is a buffer position.
2229 The match found must not extend after that position. A value of nil is
2230 equivalent to (point-max).
2231 Optional third argument, if t, means if fail just return nil (no error).
2232 If not nil and not t, move to limit of search and return nil.
2233 Optional fourth argument is repeat count--search for successive occurrences.
2234
2235 Search case-sensitivity is determined by the value of the variable
2236 `case-fold-search', which see.
2237
2238 See also the functions `match-beginning', `match-end' and `replace-match'. */)
2239 (string, bound, noerror, count)
2240 Lisp_Object string, bound, noerror, count;
2241 {
2242 return search_command (string, bound, noerror, count, 1, 0, 0);
2243 }
2244
2245 DEFUN ("word-search-backward", Fword_search_backward, Sword_search_backward, 1, 4,
2246 "sWord search backward: ",
2247 doc: /* Search backward from point for STRING, ignoring differences in punctuation.
2248 Set point to the beginning of the occurrence found, and return point.
2249 An optional second argument bounds the search; it is a buffer position.
2250 The match found must not extend before that position.
2251 Optional third argument, if t, means if fail just return nil (no error).
2252 If not nil and not t, move to limit of search and return nil.
2253 Optional fourth argument is repeat count--search for successive occurrences. */)
2254 (string, bound, noerror, count)
2255 Lisp_Object string, bound, noerror, count;
2256 {
2257 return search_command (wordify (string, 0), bound, noerror, count, -1, 1, 0);
2258 }
2259
2260 DEFUN ("word-search-forward", Fword_search_forward, Sword_search_forward, 1, 4,
2261 "sWord search: ",
2262 doc: /* Search forward from point for STRING, ignoring differences in punctuation.
2263 Set point to the end of the occurrence found, and return point.
2264 An optional second argument bounds the search; it is a buffer position.
2265 The match found must not extend after that position.
2266 Optional third argument, if t, means if fail just return nil (no error).
2267 If not nil and not t, move to limit of search and return nil.
2268 Optional fourth argument is repeat count--search for successive occurrences. */)
2269 (string, bound, noerror, count)
2270 Lisp_Object string, bound, noerror, count;
2271 {
2272 return search_command (wordify (string, 0), bound, noerror, count, 1, 1, 0);
2273 }
2274
2275 DEFUN ("word-search-backward-lax", Fword_search_backward_lax, Sword_search_backward_lax, 1, 4,
2276 "sWord search backward: ",
2277 doc: /* Search backward from point for STRING, ignoring differences in punctuation.
2278 Set point to the beginning of the occurrence found, and return point.
2279
2280 Unlike `word-search-backward', the end of STRING need not match a word
2281 boundary unless it ends in whitespace.
2282
2283 An optional second argument bounds the search; it is a buffer position.
2284 The match found must not extend before that position.
2285 Optional third argument, if t, means if fail just return nil (no error).
2286 If not nil and not t, move to limit of search and return nil.
2287 Optional fourth argument is repeat count--search for successive occurrences. */)
2288 (string, bound, noerror, count)
2289 Lisp_Object string, bound, noerror, count;
2290 {
2291 return search_command (wordify (string, 1), bound, noerror, count, -1, 1, 0);
2292 }
2293
2294 DEFUN ("word-search-forward-lax", Fword_search_forward_lax, Sword_search_forward_lax, 1, 4,
2295 "sWord search: ",
2296 doc: /* Search forward from point for STRING, ignoring differences in punctuation.
2297 Set point to the end of the occurrence found, and return point.
2298
2299 Unlike `word-search-forward', the end of STRING need not match a word
2300 boundary unless it ends in whitespace.
2301
2302 An optional second argument bounds the search; it is a buffer position.
2303 The match found must not extend after that position.
2304 Optional third argument, if t, means if fail just return nil (no error).
2305 If not nil and not t, move to limit of search and return nil.
2306 Optional fourth argument is repeat count--search for successive occurrences. */)
2307 (string, bound, noerror, count)
2308 Lisp_Object string, bound, noerror, count;
2309 {
2310 return search_command (wordify (string, 1), bound, noerror, count, 1, 1, 0);
2311 }
2312
2313 DEFUN ("re-search-backward", Fre_search_backward, Sre_search_backward, 1, 4,
2314 "sRE search backward: ",
2315 doc: /* Search backward from point for match for regular expression REGEXP.
2316 Set point to the beginning of the match, and return point.
2317 The match found is the one starting last in the buffer
2318 and yet ending before the origin of the search.
2319 An optional second argument bounds the search; it is a buffer position.
2320 The match found must start at or after that position.
2321 Optional third argument, if t, means if fail just return nil (no error).
2322 If not nil and not t, move to limit of search and return nil.
2323 Optional fourth argument is repeat count--search for successive occurrences.
2324 See also the functions `match-beginning', `match-end', `match-string',
2325 and `replace-match'. */)
2326 (regexp, bound, noerror, count)
2327 Lisp_Object regexp, bound, noerror, count;
2328 {
2329 return search_command (regexp, bound, noerror, count, -1, 1, 0);
2330 }
2331
2332 DEFUN ("re-search-forward", Fre_search_forward, Sre_search_forward, 1, 4,
2333 "sRE search: ",
2334 doc: /* Search forward from point for regular expression REGEXP.
2335 Set point to the end of the occurrence found, and return point.
2336 An optional second argument bounds the search; it is a buffer position.
2337 The match found must not extend after that position.
2338 Optional third argument, if t, means if fail just return nil (no error).
2339 If not nil and not t, move to limit of search and return nil.
2340 Optional fourth argument is repeat count--search for successive occurrences.
2341 See also the functions `match-beginning', `match-end', `match-string',
2342 and `replace-match'. */)
2343 (regexp, bound, noerror, count)
2344 Lisp_Object regexp, bound, noerror, count;
2345 {
2346 return search_command (regexp, bound, noerror, count, 1, 1, 0);
2347 }
2348
2349 DEFUN ("posix-search-backward", Fposix_search_backward, Sposix_search_backward, 1, 4,
2350 "sPosix search backward: ",
2351 doc: /* Search backward from point for match for regular expression REGEXP.
2352 Find the longest match in accord with Posix regular expression rules.
2353 Set point to the beginning of the match, and return point.
2354 The match found is the one starting last in the buffer
2355 and yet ending before the origin of the search.
2356 An optional second argument bounds the search; it is a buffer position.
2357 The match found must start at or after that position.
2358 Optional third argument, if t, means if fail just return nil (no error).
2359 If not nil and not t, move to limit of search and return nil.
2360 Optional fourth argument is repeat count--search for successive occurrences.
2361 See also the functions `match-beginning', `match-end', `match-string',
2362 and `replace-match'. */)
2363 (regexp, bound, noerror, count)
2364 Lisp_Object regexp, bound, noerror, count;
2365 {
2366 return search_command (regexp, bound, noerror, count, -1, 1, 1);
2367 }
2368
2369 DEFUN ("posix-search-forward", Fposix_search_forward, Sposix_search_forward, 1, 4,
2370 "sPosix search: ",
2371 doc: /* Search forward from point for regular expression REGEXP.
2372 Find the longest match in accord with Posix regular expression rules.
2373 Set point to the end of the occurrence found, and return point.
2374 An optional second argument bounds the search; it is a buffer position.
2375 The match found must not extend after that position.
2376 Optional third argument, if t, means if fail just return nil (no error).
2377 If not nil and not t, move to limit of search and return nil.
2378 Optional fourth argument is repeat count--search for successive occurrences.
2379 See also the functions `match-beginning', `match-end', `match-string',
2380 and `replace-match'. */)
2381 (regexp, bound, noerror, count)
2382 Lisp_Object regexp, bound, noerror, count;
2383 {
2384 return search_command (regexp, bound, noerror, count, 1, 1, 1);
2385 }
2386 \f
2387 DEFUN ("replace-match", Freplace_match, Sreplace_match, 1, 5, 0,
2388 doc: /* Replace text matched by last search with NEWTEXT.
2389 Leave point at the end of the replacement text.
2390
2391 If second arg FIXEDCASE is non-nil, do not alter case of replacement text.
2392 Otherwise maybe capitalize the whole text, or maybe just word initials,
2393 based on the replaced text.
2394 If the replaced text has only capital letters
2395 and has at least one multiletter word, convert NEWTEXT to all caps.
2396 Otherwise if all words are capitalized in the replaced text,
2397 capitalize each word in NEWTEXT.
2398
2399 If third arg LITERAL is non-nil, insert NEWTEXT literally.
2400 Otherwise treat `\\' as special:
2401 `\\&' in NEWTEXT means substitute original matched text.
2402 `\\N' means substitute what matched the Nth `\\(...\\)'.
2403 If Nth parens didn't match, substitute nothing.
2404 `\\\\' means insert one `\\'.
2405 Case conversion does not apply to these substitutions.
2406
2407 FIXEDCASE and LITERAL are optional arguments.
2408
2409 The optional fourth argument STRING can be a string to modify.
2410 This is meaningful when the previous match was done against STRING,
2411 using `string-match'. When used this way, `replace-match'
2412 creates and returns a new string made by copying STRING and replacing
2413 the part of STRING that was matched.
2414
2415 The optional fifth argument SUBEXP specifies a subexpression;
2416 it says to replace just that subexpression with NEWTEXT,
2417 rather than replacing the entire matched text.
2418 This is, in a vague sense, the inverse of using `\\N' in NEWTEXT;
2419 `\\N' copies subexp N into NEWTEXT, but using N as SUBEXP puts
2420 NEWTEXT in place of subexp N.
2421 This is useful only after a regular expression search or match,
2422 since only regular expressions have distinguished subexpressions. */)
2423 (newtext, fixedcase, literal, string, subexp)
2424 Lisp_Object newtext, fixedcase, literal, string, subexp;
2425 {
2426 enum { nochange, all_caps, cap_initial } case_action;
2427 register int pos, pos_byte;
2428 int some_multiletter_word;
2429 int some_lowercase;
2430 int some_uppercase;
2431 int some_nonuppercase_initial;
2432 register int c, prevc;
2433 int sub;
2434 EMACS_INT opoint, newpoint;
2435
2436 CHECK_STRING (newtext);
2437
2438 if (! NILP (string))
2439 CHECK_STRING (string);
2440
2441 case_action = nochange; /* We tried an initialization */
2442 /* but some C compilers blew it */
2443
2444 if (search_regs.num_regs <= 0)
2445 error ("`replace-match' called before any match found");
2446
2447 if (NILP (subexp))
2448 sub = 0;
2449 else
2450 {
2451 CHECK_NUMBER (subexp);
2452 sub = XINT (subexp);
2453 if (sub < 0 || sub >= search_regs.num_regs)
2454 args_out_of_range (subexp, make_number (search_regs.num_regs));
2455 }
2456
2457 if (NILP (string))
2458 {
2459 if (search_regs.start[sub] < BEGV
2460 || search_regs.start[sub] > search_regs.end[sub]
2461 || search_regs.end[sub] > ZV)
2462 args_out_of_range (make_number (search_regs.start[sub]),
2463 make_number (search_regs.end[sub]));
2464 }
2465 else
2466 {
2467 if (search_regs.start[sub] < 0
2468 || search_regs.start[sub] > search_regs.end[sub]
2469 || search_regs.end[sub] > SCHARS (string))
2470 args_out_of_range (make_number (search_regs.start[sub]),
2471 make_number (search_regs.end[sub]));
2472 }
2473
2474 if (NILP (fixedcase))
2475 {
2476 /* Decide how to casify by examining the matched text. */
2477 EMACS_INT last;
2478
2479 pos = search_regs.start[sub];
2480 last = search_regs.end[sub];
2481
2482 if (NILP (string))
2483 pos_byte = CHAR_TO_BYTE (pos);
2484 else
2485 pos_byte = string_char_to_byte (string, pos);
2486
2487 prevc = '\n';
2488 case_action = all_caps;
2489
2490 /* some_multiletter_word is set nonzero if any original word
2491 is more than one letter long. */
2492 some_multiletter_word = 0;
2493 some_lowercase = 0;
2494 some_nonuppercase_initial = 0;
2495 some_uppercase = 0;
2496
2497 while (pos < last)
2498 {
2499 if (NILP (string))
2500 {
2501 c = FETCH_CHAR_AS_MULTIBYTE (pos_byte);
2502 INC_BOTH (pos, pos_byte);
2503 }
2504 else
2505 FETCH_STRING_CHAR_AS_MULTIBYTE_ADVANCE (c, string, pos, pos_byte);
2506
2507 if (LOWERCASEP (c))
2508 {
2509 /* Cannot be all caps if any original char is lower case */
2510
2511 some_lowercase = 1;
2512 if (SYNTAX (prevc) != Sword)
2513 some_nonuppercase_initial = 1;
2514 else
2515 some_multiletter_word = 1;
2516 }
2517 else if (UPPERCASEP (c))
2518 {
2519 some_uppercase = 1;
2520 if (SYNTAX (prevc) != Sword)
2521 ;
2522 else
2523 some_multiletter_word = 1;
2524 }
2525 else
2526 {
2527 /* If the initial is a caseless word constituent,
2528 treat that like a lowercase initial. */
2529 if (SYNTAX (prevc) != Sword)
2530 some_nonuppercase_initial = 1;
2531 }
2532
2533 prevc = c;
2534 }
2535
2536 /* Convert to all caps if the old text is all caps
2537 and has at least one multiletter word. */
2538 if (! some_lowercase && some_multiletter_word)
2539 case_action = all_caps;
2540 /* Capitalize each word, if the old text has all capitalized words. */
2541 else if (!some_nonuppercase_initial && some_multiletter_word)
2542 case_action = cap_initial;
2543 else if (!some_nonuppercase_initial && some_uppercase)
2544 /* Should x -> yz, operating on X, give Yz or YZ?
2545 We'll assume the latter. */
2546 case_action = all_caps;
2547 else
2548 case_action = nochange;
2549 }
2550
2551 /* Do replacement in a string. */
2552 if (!NILP (string))
2553 {
2554 Lisp_Object before, after;
2555
2556 before = Fsubstring (string, make_number (0),
2557 make_number (search_regs.start[sub]));
2558 after = Fsubstring (string, make_number (search_regs.end[sub]), Qnil);
2559
2560 /* Substitute parts of the match into NEWTEXT
2561 if desired. */
2562 if (NILP (literal))
2563 {
2564 EMACS_INT lastpos = 0;
2565 EMACS_INT lastpos_byte = 0;
2566 /* We build up the substituted string in ACCUM. */
2567 Lisp_Object accum;
2568 Lisp_Object middle;
2569 int length = SBYTES (newtext);
2570
2571 accum = Qnil;
2572
2573 for (pos_byte = 0, pos = 0; pos_byte < length;)
2574 {
2575 int substart = -1;
2576 int subend = 0;
2577 int delbackslash = 0;
2578
2579 FETCH_STRING_CHAR_ADVANCE (c, newtext, pos, pos_byte);
2580
2581 if (c == '\\')
2582 {
2583 FETCH_STRING_CHAR_ADVANCE (c, newtext, pos, pos_byte);
2584
2585 if (c == '&')
2586 {
2587 substart = search_regs.start[sub];
2588 subend = search_regs.end[sub];
2589 }
2590 else if (c >= '1' && c <= '9')
2591 {
2592 if (search_regs.start[c - '0'] >= 0
2593 && c <= search_regs.num_regs + '0')
2594 {
2595 substart = search_regs.start[c - '0'];
2596 subend = search_regs.end[c - '0'];
2597 }
2598 else
2599 {
2600 /* If that subexp did not match,
2601 replace \\N with nothing. */
2602 substart = 0;
2603 subend = 0;
2604 }
2605 }
2606 else if (c == '\\')
2607 delbackslash = 1;
2608 else
2609 error ("Invalid use of `\\' in replacement text");
2610 }
2611 if (substart >= 0)
2612 {
2613 if (pos - 2 != lastpos)
2614 middle = substring_both (newtext, lastpos,
2615 lastpos_byte,
2616 pos - 2, pos_byte - 2);
2617 else
2618 middle = Qnil;
2619 accum = concat3 (accum, middle,
2620 Fsubstring (string,
2621 make_number (substart),
2622 make_number (subend)));
2623 lastpos = pos;
2624 lastpos_byte = pos_byte;
2625 }
2626 else if (delbackslash)
2627 {
2628 middle = substring_both (newtext, lastpos,
2629 lastpos_byte,
2630 pos - 1, pos_byte - 1);
2631
2632 accum = concat2 (accum, middle);
2633 lastpos = pos;
2634 lastpos_byte = pos_byte;
2635 }
2636 }
2637
2638 if (pos != lastpos)
2639 middle = substring_both (newtext, lastpos,
2640 lastpos_byte,
2641 pos, pos_byte);
2642 else
2643 middle = Qnil;
2644
2645 newtext = concat2 (accum, middle);
2646 }
2647
2648 /* Do case substitution in NEWTEXT if desired. */
2649 if (case_action == all_caps)
2650 newtext = Fupcase (newtext);
2651 else if (case_action == cap_initial)
2652 newtext = Fupcase_initials (newtext);
2653
2654 return concat3 (before, newtext, after);
2655 }
2656
2657 /* Record point, then move (quietly) to the start of the match. */
2658 if (PT >= search_regs.end[sub])
2659 opoint = PT - ZV;
2660 else if (PT > search_regs.start[sub])
2661 opoint = search_regs.end[sub] - ZV;
2662 else
2663 opoint = PT;
2664
2665 /* If we want non-literal replacement,
2666 perform substitution on the replacement string. */
2667 if (NILP (literal))
2668 {
2669 int length = SBYTES (newtext);
2670 unsigned char *substed;
2671 int substed_alloc_size, substed_len;
2672 int buf_multibyte = !NILP (current_buffer->enable_multibyte_characters);
2673 int str_multibyte = STRING_MULTIBYTE (newtext);
2674 Lisp_Object rev_tbl;
2675 int really_changed = 0;
2676
2677 rev_tbl = Qnil;
2678
2679 substed_alloc_size = length * 2 + 100;
2680 substed = (unsigned char *) xmalloc (substed_alloc_size + 1);
2681 substed_len = 0;
2682
2683 /* Go thru NEWTEXT, producing the actual text to insert in
2684 SUBSTED while adjusting multibyteness to that of the current
2685 buffer. */
2686
2687 for (pos_byte = 0, pos = 0; pos_byte < length;)
2688 {
2689 unsigned char str[MAX_MULTIBYTE_LENGTH];
2690 unsigned char *add_stuff = NULL;
2691 int add_len = 0;
2692 int idx = -1;
2693
2694 if (str_multibyte)
2695 {
2696 FETCH_STRING_CHAR_ADVANCE_NO_CHECK (c, newtext, pos, pos_byte);
2697 if (!buf_multibyte)
2698 c = multibyte_char_to_unibyte (c, rev_tbl);
2699 }
2700 else
2701 {
2702 /* Note that we don't have to increment POS. */
2703 c = SREF (newtext, pos_byte++);
2704 if (buf_multibyte)
2705 MAKE_CHAR_MULTIBYTE (c);
2706 }
2707
2708 /* Either set ADD_STUFF and ADD_LEN to the text to put in SUBSTED,
2709 or set IDX to a match index, which means put that part
2710 of the buffer text into SUBSTED. */
2711
2712 if (c == '\\')
2713 {
2714 really_changed = 1;
2715
2716 if (str_multibyte)
2717 {
2718 FETCH_STRING_CHAR_ADVANCE_NO_CHECK (c, newtext,
2719 pos, pos_byte);
2720 if (!buf_multibyte && !ASCII_CHAR_P (c))
2721 c = multibyte_char_to_unibyte (c, rev_tbl);
2722 }
2723 else
2724 {
2725 c = SREF (newtext, pos_byte++);
2726 if (buf_multibyte)
2727 MAKE_CHAR_MULTIBYTE (c);
2728 }
2729
2730 if (c == '&')
2731 idx = sub;
2732 else if (c >= '1' && c <= '9' && c <= search_regs.num_regs + '0')
2733 {
2734 if (search_regs.start[c - '0'] >= 1)
2735 idx = c - '0';
2736 }
2737 else if (c == '\\')
2738 add_len = 1, add_stuff = "\\";
2739 else
2740 {
2741 xfree (substed);
2742 error ("Invalid use of `\\' in replacement text");
2743 }
2744 }
2745 else
2746 {
2747 add_len = CHAR_STRING (c, str);
2748 add_stuff = str;
2749 }
2750
2751 /* If we want to copy part of a previous match,
2752 set up ADD_STUFF and ADD_LEN to point to it. */
2753 if (idx >= 0)
2754 {
2755 EMACS_INT begbyte = CHAR_TO_BYTE (search_regs.start[idx]);
2756 add_len = CHAR_TO_BYTE (search_regs.end[idx]) - begbyte;
2757 if (search_regs.start[idx] < GPT && GPT < search_regs.end[idx])
2758 move_gap (search_regs.start[idx]);
2759 add_stuff = BYTE_POS_ADDR (begbyte);
2760 }
2761
2762 /* Now the stuff we want to add to SUBSTED
2763 is invariably ADD_LEN bytes starting at ADD_STUFF. */
2764
2765 /* Make sure SUBSTED is big enough. */
2766 if (substed_len + add_len >= substed_alloc_size)
2767 {
2768 substed_alloc_size = substed_len + add_len + 500;
2769 substed = (unsigned char *) xrealloc (substed,
2770 substed_alloc_size + 1);
2771 }
2772
2773 /* Now add to the end of SUBSTED. */
2774 if (add_stuff)
2775 {
2776 memcpy (substed + substed_len, add_stuff, add_len);
2777 substed_len += add_len;
2778 }
2779 }
2780
2781 if (really_changed)
2782 {
2783 if (buf_multibyte)
2784 {
2785 int nchars = multibyte_chars_in_text (substed, substed_len);
2786
2787 newtext = make_multibyte_string (substed, nchars, substed_len);
2788 }
2789 else
2790 newtext = make_unibyte_string (substed, substed_len);
2791 }
2792 xfree (substed);
2793 }
2794
2795 /* Replace the old text with the new in the cleanest possible way. */
2796 replace_range (search_regs.start[sub], search_regs.end[sub],
2797 newtext, 1, 0, 1);
2798 newpoint = search_regs.start[sub] + SCHARS (newtext);
2799
2800 if (case_action == all_caps)
2801 Fupcase_region (make_number (search_regs.start[sub]),
2802 make_number (newpoint));
2803 else if (case_action == cap_initial)
2804 Fupcase_initials_region (make_number (search_regs.start[sub]),
2805 make_number (newpoint));
2806
2807 /* Adjust search data for this change. */
2808 {
2809 EMACS_INT oldend = search_regs.end[sub];
2810 EMACS_INT oldstart = search_regs.start[sub];
2811 EMACS_INT change = newpoint - search_regs.end[sub];
2812 int i;
2813
2814 for (i = 0; i < search_regs.num_regs; i++)
2815 {
2816 if (search_regs.start[i] >= oldend)
2817 search_regs.start[i] += change;
2818 else if (search_regs.start[i] > oldstart)
2819 search_regs.start[i] = oldstart;
2820 if (search_regs.end[i] >= oldend)
2821 search_regs.end[i] += change;
2822 else if (search_regs.end[i] > oldstart)
2823 search_regs.end[i] = oldstart;
2824 }
2825 }
2826
2827 /* Put point back where it was in the text. */
2828 if (opoint <= 0)
2829 TEMP_SET_PT (opoint + ZV);
2830 else
2831 TEMP_SET_PT (opoint);
2832
2833 /* Now move point "officially" to the start of the inserted replacement. */
2834 move_if_not_intangible (newpoint);
2835
2836 return Qnil;
2837 }
2838 \f
2839 static Lisp_Object
2840 match_limit (Lisp_Object num, int beginningp)
2841 {
2842 register int n;
2843
2844 CHECK_NUMBER (num);
2845 n = XINT (num);
2846 if (n < 0)
2847 args_out_of_range (num, make_number (0));
2848 if (search_regs.num_regs <= 0)
2849 error ("No match data, because no search succeeded");
2850 if (n >= search_regs.num_regs
2851 || search_regs.start[n] < 0)
2852 return Qnil;
2853 return (make_number ((beginningp) ? search_regs.start[n]
2854 : search_regs.end[n]));
2855 }
2856
2857 DEFUN ("match-beginning", Fmatch_beginning, Smatch_beginning, 1, 1, 0,
2858 doc: /* Return position of start of text matched by last search.
2859 SUBEXP, a number, specifies which parenthesized expression in the last
2860 regexp.
2861 Value is nil if SUBEXPth pair didn't match, or there were less than
2862 SUBEXP pairs.
2863 Zero means the entire text matched by the whole regexp or whole string. */)
2864 (subexp)
2865 Lisp_Object subexp;
2866 {
2867 return match_limit (subexp, 1);
2868 }
2869
2870 DEFUN ("match-end", Fmatch_end, Smatch_end, 1, 1, 0,
2871 doc: /* Return position of end of text matched by last search.
2872 SUBEXP, a number, specifies which parenthesized expression in the last
2873 regexp.
2874 Value is nil if SUBEXPth pair didn't match, or there were less than
2875 SUBEXP pairs.
2876 Zero means the entire text matched by the whole regexp or whole string. */)
2877 (subexp)
2878 Lisp_Object subexp;
2879 {
2880 return match_limit (subexp, 0);
2881 }
2882
2883 DEFUN ("match-data", Fmatch_data, Smatch_data, 0, 3, 0,
2884 doc: /* Return a list containing all info on what the last search matched.
2885 Element 2N is `(match-beginning N)'; element 2N + 1 is `(match-end N)'.
2886 All the elements are markers or nil (nil if the Nth pair didn't match)
2887 if the last match was on a buffer; integers or nil if a string was matched.
2888 Use `set-match-data' to reinstate the data in this list.
2889
2890 If INTEGERS (the optional first argument) is non-nil, always use
2891 integers \(rather than markers) to represent buffer positions. In
2892 this case, and if the last match was in a buffer, the buffer will get
2893 stored as one additional element at the end of the list.
2894
2895 If REUSE is a list, reuse it as part of the value. If REUSE is long
2896 enough to hold all the values, and if INTEGERS is non-nil, no consing
2897 is done.
2898
2899 If optional third arg RESEAT is non-nil, any previous markers on the
2900 REUSE list will be modified to point to nowhere.
2901
2902 Return value is undefined if the last search failed. */)
2903 (integers, reuse, reseat)
2904 Lisp_Object integers, reuse, reseat;
2905 {
2906 Lisp_Object tail, prev;
2907 Lisp_Object *data;
2908 int i, len;
2909
2910 if (!NILP (reseat))
2911 for (tail = reuse; CONSP (tail); tail = XCDR (tail))
2912 if (MARKERP (XCAR (tail)))
2913 {
2914 unchain_marker (XMARKER (XCAR (tail)));
2915 XSETCAR (tail, Qnil);
2916 }
2917
2918 if (NILP (last_thing_searched))
2919 return Qnil;
2920
2921 prev = Qnil;
2922
2923 data = (Lisp_Object *) alloca ((2 * search_regs.num_regs + 1)
2924 * sizeof (Lisp_Object));
2925
2926 len = 0;
2927 for (i = 0; i < search_regs.num_regs; i++)
2928 {
2929 int start = search_regs.start[i];
2930 if (start >= 0)
2931 {
2932 if (EQ (last_thing_searched, Qt)
2933 || ! NILP (integers))
2934 {
2935 XSETFASTINT (data[2 * i], start);
2936 XSETFASTINT (data[2 * i + 1], search_regs.end[i]);
2937 }
2938 else if (BUFFERP (last_thing_searched))
2939 {
2940 data[2 * i] = Fmake_marker ();
2941 Fset_marker (data[2 * i],
2942 make_number (start),
2943 last_thing_searched);
2944 data[2 * i + 1] = Fmake_marker ();
2945 Fset_marker (data[2 * i + 1],
2946 make_number (search_regs.end[i]),
2947 last_thing_searched);
2948 }
2949 else
2950 /* last_thing_searched must always be Qt, a buffer, or Qnil. */
2951 abort ();
2952
2953 len = 2 * i + 2;
2954 }
2955 else
2956 data[2 * i] = data[2 * i + 1] = Qnil;
2957 }
2958
2959 if (BUFFERP (last_thing_searched) && !NILP (integers))
2960 {
2961 data[len] = last_thing_searched;
2962 len++;
2963 }
2964
2965 /* If REUSE is not usable, cons up the values and return them. */
2966 if (! CONSP (reuse))
2967 return Flist (len, data);
2968
2969 /* If REUSE is a list, store as many value elements as will fit
2970 into the elements of REUSE. */
2971 for (i = 0, tail = reuse; CONSP (tail);
2972 i++, tail = XCDR (tail))
2973 {
2974 if (i < len)
2975 XSETCAR (tail, data[i]);
2976 else
2977 XSETCAR (tail, Qnil);
2978 prev = tail;
2979 }
2980
2981 /* If we couldn't fit all value elements into REUSE,
2982 cons up the rest of them and add them to the end of REUSE. */
2983 if (i < len)
2984 XSETCDR (prev, Flist (len - i, data + i));
2985
2986 return reuse;
2987 }
2988
2989 /* We used to have an internal use variant of `reseat' described as:
2990
2991 If RESEAT is `evaporate', put the markers back on the free list
2992 immediately. No other references to the markers must exist in this
2993 case, so it is used only internally on the unwind stack and
2994 save-match-data from Lisp.
2995
2996 But it was ill-conceived: those supposedly-internal markers get exposed via
2997 the undo-list, so freeing them here is unsafe. */
2998
2999 DEFUN ("set-match-data", Fset_match_data, Sset_match_data, 1, 2, 0,
3000 doc: /* Set internal data on last search match from elements of LIST.
3001 LIST should have been created by calling `match-data' previously.
3002
3003 If optional arg RESEAT is non-nil, make markers on LIST point nowhere. */)
3004 (list, reseat)
3005 register Lisp_Object list, reseat;
3006 {
3007 register int i;
3008 register Lisp_Object marker;
3009
3010 if (running_asynch_code)
3011 save_search_regs ();
3012
3013 CHECK_LIST (list);
3014
3015 /* Unless we find a marker with a buffer or an explicit buffer
3016 in LIST, assume that this match data came from a string. */
3017 last_thing_searched = Qt;
3018
3019 /* Allocate registers if they don't already exist. */
3020 {
3021 int length = XFASTINT (Flength (list)) / 2;
3022
3023 if (length > search_regs.num_regs)
3024 {
3025 if (search_regs.num_regs == 0)
3026 {
3027 search_regs.start
3028 = (regoff_t *) xmalloc (length * sizeof (regoff_t));
3029 search_regs.end
3030 = (regoff_t *) xmalloc (length * sizeof (regoff_t));
3031 }
3032 else
3033 {
3034 search_regs.start
3035 = (regoff_t *) xrealloc (search_regs.start,
3036 length * sizeof (regoff_t));
3037 search_regs.end
3038 = (regoff_t *) xrealloc (search_regs.end,
3039 length * sizeof (regoff_t));
3040 }
3041
3042 for (i = search_regs.num_regs; i < length; i++)
3043 search_regs.start[i] = -1;
3044
3045 search_regs.num_regs = length;
3046 }
3047
3048 for (i = 0; CONSP (list); i++)
3049 {
3050 marker = XCAR (list);
3051 if (BUFFERP (marker))
3052 {
3053 last_thing_searched = marker;
3054 break;
3055 }
3056 if (i >= length)
3057 break;
3058 if (NILP (marker))
3059 {
3060 search_regs.start[i] = -1;
3061 list = XCDR (list);
3062 }
3063 else
3064 {
3065 EMACS_INT from;
3066 Lisp_Object m;
3067
3068 m = marker;
3069 if (MARKERP (marker))
3070 {
3071 if (XMARKER (marker)->buffer == 0)
3072 XSETFASTINT (marker, 0);
3073 else
3074 XSETBUFFER (last_thing_searched, XMARKER (marker)->buffer);
3075 }
3076
3077 CHECK_NUMBER_COERCE_MARKER (marker);
3078 from = XINT (marker);
3079
3080 if (!NILP (reseat) && MARKERP (m))
3081 {
3082 unchain_marker (XMARKER (m));
3083 XSETCAR (list, Qnil);
3084 }
3085
3086 if ((list = XCDR (list), !CONSP (list)))
3087 break;
3088
3089 m = marker = XCAR (list);
3090
3091 if (MARKERP (marker) && XMARKER (marker)->buffer == 0)
3092 XSETFASTINT (marker, 0);
3093
3094 CHECK_NUMBER_COERCE_MARKER (marker);
3095 search_regs.start[i] = from;
3096 search_regs.end[i] = XINT (marker);
3097
3098 if (!NILP (reseat) && MARKERP (m))
3099 {
3100 unchain_marker (XMARKER (m));
3101 XSETCAR (list, Qnil);
3102 }
3103 }
3104 list = XCDR (list);
3105 }
3106
3107 for (; i < search_regs.num_regs; i++)
3108 search_regs.start[i] = -1;
3109 }
3110
3111 return Qnil;
3112 }
3113
3114 /* If non-zero the match data have been saved in saved_search_regs
3115 during the execution of a sentinel or filter. */
3116 static int search_regs_saved;
3117 static struct re_registers saved_search_regs;
3118 static Lisp_Object saved_last_thing_searched;
3119
3120 /* Called from Flooking_at, Fstring_match, search_buffer, Fstore_match_data
3121 if asynchronous code (filter or sentinel) is running. */
3122 static void
3123 save_search_regs (void)
3124 {
3125 if (!search_regs_saved)
3126 {
3127 saved_search_regs.num_regs = search_regs.num_regs;
3128 saved_search_regs.start = search_regs.start;
3129 saved_search_regs.end = search_regs.end;
3130 saved_last_thing_searched = last_thing_searched;
3131 last_thing_searched = Qnil;
3132 search_regs.num_regs = 0;
3133 search_regs.start = 0;
3134 search_regs.end = 0;
3135
3136 search_regs_saved = 1;
3137 }
3138 }
3139
3140 /* Called upon exit from filters and sentinels. */
3141 void
3142 restore_search_regs (void)
3143 {
3144 if (search_regs_saved)
3145 {
3146 if (search_regs.num_regs > 0)
3147 {
3148 xfree (search_regs.start);
3149 xfree (search_regs.end);
3150 }
3151 search_regs.num_regs = saved_search_regs.num_regs;
3152 search_regs.start = saved_search_regs.start;
3153 search_regs.end = saved_search_regs.end;
3154 last_thing_searched = saved_last_thing_searched;
3155 saved_last_thing_searched = Qnil;
3156 search_regs_saved = 0;
3157 }
3158 }
3159
3160 static Lisp_Object
3161 unwind_set_match_data (Lisp_Object list)
3162 {
3163 /* It is NOT ALWAYS safe to free (evaporate) the markers immediately. */
3164 return Fset_match_data (list, Qt);
3165 }
3166
3167 /* Called to unwind protect the match data. */
3168 void
3169 record_unwind_save_match_data (void)
3170 {
3171 record_unwind_protect (unwind_set_match_data,
3172 Fmatch_data (Qnil, Qnil, Qnil));
3173 }
3174
3175 /* Quote a string to inactivate reg-expr chars */
3176
3177 DEFUN ("regexp-quote", Fregexp_quote, Sregexp_quote, 1, 1, 0,
3178 doc: /* Return a regexp string which matches exactly STRING and nothing else. */)
3179 (string)
3180 Lisp_Object string;
3181 {
3182 register unsigned char *in, *out, *end;
3183 register unsigned char *temp;
3184 int backslashes_added = 0;
3185
3186 CHECK_STRING (string);
3187
3188 temp = (unsigned char *) alloca (SBYTES (string) * 2);
3189
3190 /* Now copy the data into the new string, inserting escapes. */
3191
3192 in = SDATA (string);
3193 end = in + SBYTES (string);
3194 out = temp;
3195
3196 for (; in != end; in++)
3197 {
3198 if (*in == '['
3199 || *in == '*' || *in == '.' || *in == '\\'
3200 || *in == '?' || *in == '+'
3201 || *in == '^' || *in == '$')
3202 *out++ = '\\', backslashes_added++;
3203 *out++ = *in;
3204 }
3205
3206 return make_specified_string (temp,
3207 SCHARS (string) + backslashes_added,
3208 out - temp,
3209 STRING_MULTIBYTE (string));
3210 }
3211 \f
3212 void
3213 syms_of_search (void)
3214 {
3215 register int i;
3216
3217 for (i = 0; i < REGEXP_CACHE_SIZE; ++i)
3218 {
3219 searchbufs[i].buf.allocated = 100;
3220 searchbufs[i].buf.buffer = (unsigned char *) xmalloc (100);
3221 searchbufs[i].buf.fastmap = searchbufs[i].fastmap;
3222 searchbufs[i].regexp = Qnil;
3223 searchbufs[i].whitespace_regexp = Qnil;
3224 searchbufs[i].syntax_table = Qnil;
3225 staticpro (&searchbufs[i].regexp);
3226 staticpro (&searchbufs[i].whitespace_regexp);
3227 staticpro (&searchbufs[i].syntax_table);
3228 searchbufs[i].next = (i == REGEXP_CACHE_SIZE-1 ? 0 : &searchbufs[i+1]);
3229 }
3230 searchbuf_head = &searchbufs[0];
3231
3232 Qsearch_failed = intern_c_string ("search-failed");
3233 staticpro (&Qsearch_failed);
3234 Qinvalid_regexp = intern_c_string ("invalid-regexp");
3235 staticpro (&Qinvalid_regexp);
3236
3237 Fput (Qsearch_failed, Qerror_conditions,
3238 pure_cons (Qsearch_failed, pure_cons (Qerror, Qnil)));
3239 Fput (Qsearch_failed, Qerror_message,
3240 make_pure_c_string ("Search failed"));
3241
3242 Fput (Qinvalid_regexp, Qerror_conditions,
3243 pure_cons (Qinvalid_regexp, pure_cons (Qerror, Qnil)));
3244 Fput (Qinvalid_regexp, Qerror_message,
3245 make_pure_c_string ("Invalid regexp"));
3246
3247 last_thing_searched = Qnil;
3248 staticpro (&last_thing_searched);
3249
3250 saved_last_thing_searched = Qnil;
3251 staticpro (&saved_last_thing_searched);
3252
3253 DEFVAR_LISP ("search-spaces-regexp", &Vsearch_spaces_regexp,
3254 doc: /* Regexp to substitute for bunches of spaces in regexp search.
3255 Some commands use this for user-specified regexps.
3256 Spaces that occur inside character classes or repetition operators
3257 or other such regexp constructs are not replaced with this.
3258 A value of nil (which is the normal value) means treat spaces literally. */);
3259 Vsearch_spaces_regexp = Qnil;
3260
3261 DEFVAR_LISP ("inhibit-changing-match-data", &Vinhibit_changing_match_data,
3262 doc: /* Internal use only.
3263 If non-nil, the primitive searching and matching functions
3264 such as `looking-at', `string-match', `re-search-forward', etc.,
3265 do not set the match data. The proper way to use this variable
3266 is to bind it with `let' around a small expression. */);
3267 Vinhibit_changing_match_data = Qnil;
3268
3269 defsubr (&Slooking_at);
3270 defsubr (&Sposix_looking_at);
3271 defsubr (&Sstring_match);
3272 defsubr (&Sposix_string_match);
3273 defsubr (&Ssearch_forward);
3274 defsubr (&Ssearch_backward);
3275 defsubr (&Sword_search_forward);
3276 defsubr (&Sword_search_backward);
3277 defsubr (&Sword_search_forward_lax);
3278 defsubr (&Sword_search_backward_lax);
3279 defsubr (&Sre_search_forward);
3280 defsubr (&Sre_search_backward);
3281 defsubr (&Sposix_search_forward);
3282 defsubr (&Sposix_search_backward);
3283 defsubr (&Sreplace_match);
3284 defsubr (&Smatch_beginning);
3285 defsubr (&Smatch_end);
3286 defsubr (&Smatch_data);
3287 defsubr (&Sset_match_data);
3288 defsubr (&Sregexp_quote);
3289 }
3290
3291 /* arch-tag: a6059d79-0552-4f14-a2cb-d379a4e3c78f
3292 (do not change this comment) */