Convert (most) functions in src to standard C.
[bpt/emacs.git] / src / search.c
1 /* String search routines for GNU Emacs.
2 Copyright (C) 1985, 1986, 1987, 1993, 1994, 1997, 1998, 1999, 2001, 2002,
3 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010
4 Free Software Foundation, Inc.
5
6 This file is part of GNU Emacs.
7
8 GNU Emacs is free software: you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation, either version 3 of the License, or
11 (at your option) any later version.
12
13 GNU Emacs is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
17
18 You should have received a copy of the GNU General Public License
19 along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>. */
20
21
22 #include <config.h>
23 #include <setjmp.h>
24 #include "lisp.h"
25 #include "syntax.h"
26 #include "category.h"
27 #include "buffer.h"
28 #include "character.h"
29 #include "charset.h"
30 #include "region-cache.h"
31 #include "commands.h"
32 #include "blockinput.h"
33 #include "intervals.h"
34
35 #include <sys/types.h>
36 #include "regex.h"
37
38 #define REGEXP_CACHE_SIZE 20
39
40 /* If the regexp is non-nil, then the buffer contains the compiled form
41 of that regexp, suitable for searching. */
42 struct regexp_cache
43 {
44 struct regexp_cache *next;
45 Lisp_Object regexp, whitespace_regexp;
46 /* Syntax table for which the regexp applies. We need this because
47 of character classes. If this is t, then the compiled pattern is valid
48 for any syntax-table. */
49 Lisp_Object syntax_table;
50 struct re_pattern_buffer buf;
51 char fastmap[0400];
52 /* Nonzero means regexp was compiled to do full POSIX backtracking. */
53 char posix;
54 };
55
56 /* The instances of that struct. */
57 struct regexp_cache searchbufs[REGEXP_CACHE_SIZE];
58
59 /* The head of the linked list; points to the most recently used buffer. */
60 struct regexp_cache *searchbuf_head;
61
62
63 /* Every call to re_match, etc., must pass &search_regs as the regs
64 argument unless you can show it is unnecessary (i.e., if re_match
65 is certainly going to be called again before region-around-match
66 can be called).
67
68 Since the registers are now dynamically allocated, we need to make
69 sure not to refer to the Nth register before checking that it has
70 been allocated by checking search_regs.num_regs.
71
72 The regex code keeps track of whether it has allocated the search
73 buffer using bits in the re_pattern_buffer. This means that whenever
74 you compile a new pattern, it completely forgets whether it has
75 allocated any registers, and will allocate new registers the next
76 time you call a searching or matching function. Therefore, we need
77 to call re_set_registers after compiling a new pattern or after
78 setting the match registers, so that the regex functions will be
79 able to free or re-allocate it properly. */
80 static struct re_registers search_regs;
81
82 /* The buffer in which the last search was performed, or
83 Qt if the last search was done in a string;
84 Qnil if no searching has been done yet. */
85 static Lisp_Object last_thing_searched;
86
87 /* error condition signaled when regexp compile_pattern fails */
88
89 Lisp_Object Qinvalid_regexp;
90
91 /* Error condition used for failing searches */
92 Lisp_Object Qsearch_failed;
93
94 Lisp_Object Vsearch_spaces_regexp;
95
96 /* If non-nil, the match data will not be changed during call to
97 searching or matching functions. This variable is for internal use
98 only. */
99 Lisp_Object Vinhibit_changing_match_data;
100
101 static void set_search_regs (EMACS_INT, EMACS_INT);
102 static void save_search_regs (void);
103 static EMACS_INT simple_search (int, unsigned char *, int, int,
104 Lisp_Object, EMACS_INT, EMACS_INT,
105 EMACS_INT, EMACS_INT);
106 static EMACS_INT boyer_moore (int, unsigned char *, int, int,
107 Lisp_Object, Lisp_Object,
108 EMACS_INT, EMACS_INT,
109 EMACS_INT, EMACS_INT, int);
110 static EMACS_INT search_buffer (Lisp_Object, EMACS_INT, EMACS_INT,
111 EMACS_INT, EMACS_INT, int, int,
112 Lisp_Object, Lisp_Object, int);
113 static void matcher_overflow (void) NO_RETURN;
114
115 static void
116 matcher_overflow (void)
117 {
118 error ("Stack overflow in regexp matcher");
119 }
120
121 /* Compile a regexp and signal a Lisp error if anything goes wrong.
122 PATTERN is the pattern to compile.
123 CP is the place to put the result.
124 TRANSLATE is a translation table for ignoring case, or nil for none.
125 REGP is the structure that says where to store the "register"
126 values that will result from matching this pattern.
127 If it is 0, we should compile the pattern not to record any
128 subexpression bounds.
129 POSIX is nonzero if we want full backtracking (POSIX style)
130 for this pattern. 0 means backtrack only enough to get a valid match.
131
132 The behavior also depends on Vsearch_spaces_regexp. */
133
134 static void
135 compile_pattern_1 (struct regexp_cache *cp, Lisp_Object pattern, Lisp_Object translate, struct re_registers *regp, int posix)
136 {
137 char *val;
138 reg_syntax_t old;
139
140 cp->regexp = Qnil;
141 cp->buf.translate = (! NILP (translate) ? translate : make_number (0));
142 cp->posix = posix;
143 cp->buf.multibyte = STRING_MULTIBYTE (pattern);
144 cp->buf.charset_unibyte = charset_unibyte;
145 if (STRINGP (Vsearch_spaces_regexp))
146 cp->whitespace_regexp = Vsearch_spaces_regexp;
147 else
148 cp->whitespace_regexp = Qnil;
149
150 /* rms: I think BLOCK_INPUT is not needed here any more,
151 because regex.c defines malloc to call xmalloc.
152 Using BLOCK_INPUT here means the debugger won't run if an error occurs.
153 So let's turn it off. */
154 /* BLOCK_INPUT; */
155 old = re_set_syntax (RE_SYNTAX_EMACS
156 | (posix ? 0 : RE_NO_POSIX_BACKTRACKING));
157
158 if (STRINGP (Vsearch_spaces_regexp))
159 re_set_whitespace_regexp (SDATA (Vsearch_spaces_regexp));
160 else
161 re_set_whitespace_regexp (NULL);
162
163 val = (char *) re_compile_pattern ((char *) SDATA (pattern),
164 SBYTES (pattern), &cp->buf);
165
166 /* If the compiled pattern hard codes some of the contents of the
167 syntax-table, it can only be reused with *this* syntax table. */
168 cp->syntax_table = cp->buf.used_syntax ? current_buffer->syntax_table : Qt;
169
170 re_set_whitespace_regexp (NULL);
171
172 re_set_syntax (old);
173 /* UNBLOCK_INPUT; */
174 if (val)
175 xsignal1 (Qinvalid_regexp, build_string (val));
176
177 cp->regexp = Fcopy_sequence (pattern);
178 }
179
180 /* Shrink each compiled regexp buffer in the cache
181 to the size actually used right now.
182 This is called from garbage collection. */
183
184 void
185 shrink_regexp_cache (void)
186 {
187 struct regexp_cache *cp;
188
189 for (cp = searchbuf_head; cp != 0; cp = cp->next)
190 {
191 cp->buf.allocated = cp->buf.used;
192 cp->buf.buffer
193 = (unsigned char *) xrealloc (cp->buf.buffer, cp->buf.used);
194 }
195 }
196
197 /* Clear the regexp cache w.r.t. a particular syntax table,
198 because it was changed.
199 There is no danger of memory leak here because re_compile_pattern
200 automagically manages the memory in each re_pattern_buffer struct,
201 based on its `allocated' and `buffer' values. */
202 void
203 clear_regexp_cache (void)
204 {
205 int i;
206
207 for (i = 0; i < REGEXP_CACHE_SIZE; ++i)
208 /* It's tempting to compare with the syntax-table we've actually changed,
209 but it's not sufficient because char-table inheritance means that
210 modifying one syntax-table can change others at the same time. */
211 if (!EQ (searchbufs[i].syntax_table, Qt))
212 searchbufs[i].regexp = Qnil;
213 }
214
215 /* Compile a regexp if necessary, but first check to see if there's one in
216 the cache.
217 PATTERN is the pattern to compile.
218 TRANSLATE is a translation table for ignoring case, or nil for none.
219 REGP is the structure that says where to store the "register"
220 values that will result from matching this pattern.
221 If it is 0, we should compile the pattern not to record any
222 subexpression bounds.
223 POSIX is nonzero if we want full backtracking (POSIX style)
224 for this pattern. 0 means backtrack only enough to get a valid match. */
225
226 struct re_pattern_buffer *
227 compile_pattern (Lisp_Object pattern, struct re_registers *regp, Lisp_Object translate, int posix, int multibyte)
228 {
229 struct regexp_cache *cp, **cpp;
230
231 for (cpp = &searchbuf_head; ; cpp = &cp->next)
232 {
233 cp = *cpp;
234 /* Entries are initialized to nil, and may be set to nil by
235 compile_pattern_1 if the pattern isn't valid. Don't apply
236 string accessors in those cases. However, compile_pattern_1
237 is only applied to the cache entry we pick here to reuse. So
238 nil should never appear before a non-nil entry. */
239 if (NILP (cp->regexp))
240 goto compile_it;
241 if (SCHARS (cp->regexp) == SCHARS (pattern)
242 && STRING_MULTIBYTE (cp->regexp) == STRING_MULTIBYTE (pattern)
243 && !NILP (Fstring_equal (cp->regexp, pattern))
244 && EQ (cp->buf.translate, (! NILP (translate) ? translate : make_number (0)))
245 && cp->posix == posix
246 && (EQ (cp->syntax_table, Qt)
247 || EQ (cp->syntax_table, current_buffer->syntax_table))
248 && !NILP (Fequal (cp->whitespace_regexp, Vsearch_spaces_regexp))
249 && cp->buf.charset_unibyte == charset_unibyte)
250 break;
251
252 /* If we're at the end of the cache, compile into the nil cell
253 we found, or the last (least recently used) cell with a
254 string value. */
255 if (cp->next == 0)
256 {
257 compile_it:
258 compile_pattern_1 (cp, pattern, translate, regp, posix);
259 break;
260 }
261 }
262
263 /* When we get here, cp (aka *cpp) contains the compiled pattern,
264 either because we found it in the cache or because we just compiled it.
265 Move it to the front of the queue to mark it as most recently used. */
266 *cpp = cp->next;
267 cp->next = searchbuf_head;
268 searchbuf_head = cp;
269
270 /* Advise the searching functions about the space we have allocated
271 for register data. */
272 if (regp)
273 re_set_registers (&cp->buf, regp, regp->num_regs, regp->start, regp->end);
274
275 /* The compiled pattern can be used both for multibyte and unibyte
276 target. But, we have to tell which the pattern is used for. */
277 cp->buf.target_multibyte = multibyte;
278
279 return &cp->buf;
280 }
281
282 \f
283 static Lisp_Object
284 looking_at_1 (Lisp_Object string, int posix)
285 {
286 Lisp_Object val;
287 unsigned char *p1, *p2;
288 EMACS_INT s1, s2;
289 register int i;
290 struct re_pattern_buffer *bufp;
291
292 if (running_asynch_code)
293 save_search_regs ();
294
295 /* This is so set_image_of_range_1 in regex.c can find the EQV table. */
296 XCHAR_TABLE (current_buffer->case_canon_table)->extras[2]
297 = current_buffer->case_eqv_table;
298
299 CHECK_STRING (string);
300 bufp = compile_pattern (string,
301 (NILP (Vinhibit_changing_match_data)
302 ? &search_regs : NULL),
303 (!NILP (current_buffer->case_fold_search)
304 ? current_buffer->case_canon_table : Qnil),
305 posix,
306 !NILP (current_buffer->enable_multibyte_characters));
307
308 immediate_quit = 1;
309 QUIT; /* Do a pending quit right away, to avoid paradoxical behavior */
310
311 /* Get pointers and sizes of the two strings
312 that make up the visible portion of the buffer. */
313
314 p1 = BEGV_ADDR;
315 s1 = GPT_BYTE - BEGV_BYTE;
316 p2 = GAP_END_ADDR;
317 s2 = ZV_BYTE - GPT_BYTE;
318 if (s1 < 0)
319 {
320 p2 = p1;
321 s2 = ZV_BYTE - BEGV_BYTE;
322 s1 = 0;
323 }
324 if (s2 < 0)
325 {
326 s1 = ZV_BYTE - BEGV_BYTE;
327 s2 = 0;
328 }
329
330 re_match_object = Qnil;
331
332 i = re_match_2 (bufp, (char *) p1, s1, (char *) p2, s2,
333 PT_BYTE - BEGV_BYTE,
334 (NILP (Vinhibit_changing_match_data)
335 ? &search_regs : NULL),
336 ZV_BYTE - BEGV_BYTE);
337 immediate_quit = 0;
338
339 if (i == -2)
340 matcher_overflow ();
341
342 val = (0 <= i ? Qt : Qnil);
343 if (NILP (Vinhibit_changing_match_data) && i >= 0)
344 for (i = 0; i < search_regs.num_regs; i++)
345 if (search_regs.start[i] >= 0)
346 {
347 search_regs.start[i]
348 = BYTE_TO_CHAR (search_regs.start[i] + BEGV_BYTE);
349 search_regs.end[i]
350 = BYTE_TO_CHAR (search_regs.end[i] + BEGV_BYTE);
351 }
352
353 /* Set last_thing_searched only when match data is changed. */
354 if (NILP (Vinhibit_changing_match_data))
355 XSETBUFFER (last_thing_searched, current_buffer);
356
357 return val;
358 }
359
360 DEFUN ("looking-at", Flooking_at, Slooking_at, 1, 1, 0,
361 doc: /* Return t if text after point matches regular expression REGEXP.
362 This function modifies the match data that `match-beginning',
363 `match-end' and `match-data' access; save and restore the match
364 data if you want to preserve them. */)
365 (regexp)
366 Lisp_Object regexp;
367 {
368 return looking_at_1 (regexp, 0);
369 }
370
371 DEFUN ("posix-looking-at", Fposix_looking_at, Sposix_looking_at, 1, 1, 0,
372 doc: /* Return t if text after point matches regular expression REGEXP.
373 Find the longest match, in accord with Posix regular expression rules.
374 This function modifies the match data that `match-beginning',
375 `match-end' and `match-data' access; save and restore the match
376 data if you want to preserve them. */)
377 (regexp)
378 Lisp_Object regexp;
379 {
380 return looking_at_1 (regexp, 1);
381 }
382 \f
383 static Lisp_Object
384 string_match_1 (Lisp_Object regexp, Lisp_Object string, Lisp_Object start, int posix)
385 {
386 int val;
387 struct re_pattern_buffer *bufp;
388 EMACS_INT pos, pos_byte;
389 int i;
390
391 if (running_asynch_code)
392 save_search_regs ();
393
394 CHECK_STRING (regexp);
395 CHECK_STRING (string);
396
397 if (NILP (start))
398 pos = 0, pos_byte = 0;
399 else
400 {
401 int len = SCHARS (string);
402
403 CHECK_NUMBER (start);
404 pos = XINT (start);
405 if (pos < 0 && -pos <= len)
406 pos = len + pos;
407 else if (0 > pos || pos > len)
408 args_out_of_range (string, start);
409 pos_byte = string_char_to_byte (string, pos);
410 }
411
412 /* This is so set_image_of_range_1 in regex.c can find the EQV table. */
413 XCHAR_TABLE (current_buffer->case_canon_table)->extras[2]
414 = current_buffer->case_eqv_table;
415
416 bufp = compile_pattern (regexp,
417 (NILP (Vinhibit_changing_match_data)
418 ? &search_regs : NULL),
419 (!NILP (current_buffer->case_fold_search)
420 ? current_buffer->case_canon_table : Qnil),
421 posix,
422 STRING_MULTIBYTE (string));
423 immediate_quit = 1;
424 re_match_object = string;
425
426 val = re_search (bufp, (char *) SDATA (string),
427 SBYTES (string), pos_byte,
428 SBYTES (string) - pos_byte,
429 (NILP (Vinhibit_changing_match_data)
430 ? &search_regs : NULL));
431 immediate_quit = 0;
432
433 /* Set last_thing_searched only when match data is changed. */
434 if (NILP (Vinhibit_changing_match_data))
435 last_thing_searched = Qt;
436
437 if (val == -2)
438 matcher_overflow ();
439 if (val < 0) return Qnil;
440
441 if (NILP (Vinhibit_changing_match_data))
442 for (i = 0; i < search_regs.num_regs; i++)
443 if (search_regs.start[i] >= 0)
444 {
445 search_regs.start[i]
446 = string_byte_to_char (string, search_regs.start[i]);
447 search_regs.end[i]
448 = string_byte_to_char (string, search_regs.end[i]);
449 }
450
451 return make_number (string_byte_to_char (string, val));
452 }
453
454 DEFUN ("string-match", Fstring_match, Sstring_match, 2, 3, 0,
455 doc: /* Return index of start of first match for REGEXP in STRING, or nil.
456 Matching ignores case if `case-fold-search' is non-nil.
457 If third arg START is non-nil, start search at that index in STRING.
458 For index of first char beyond the match, do (match-end 0).
459 `match-end' and `match-beginning' also give indices of substrings
460 matched by parenthesis constructs in the pattern.
461
462 You can use the function `match-string' to extract the substrings
463 matched by the parenthesis constructions in REGEXP. */)
464 (regexp, string, start)
465 Lisp_Object regexp, string, start;
466 {
467 return string_match_1 (regexp, string, start, 0);
468 }
469
470 DEFUN ("posix-string-match", Fposix_string_match, Sposix_string_match, 2, 3, 0,
471 doc: /* Return index of start of first match for REGEXP in STRING, or nil.
472 Find the longest match, in accord with Posix regular expression rules.
473 Case is ignored if `case-fold-search' is non-nil in the current buffer.
474 If third arg START is non-nil, start search at that index in STRING.
475 For index of first char beyond the match, do (match-end 0).
476 `match-end' and `match-beginning' also give indices of substrings
477 matched by parenthesis constructs in the pattern. */)
478 (regexp, string, start)
479 Lisp_Object regexp, string, start;
480 {
481 return string_match_1 (regexp, string, start, 1);
482 }
483
484 /* Match REGEXP against STRING, searching all of STRING,
485 and return the index of the match, or negative on failure.
486 This does not clobber the match data. */
487
488 int
489 fast_string_match (Lisp_Object regexp, Lisp_Object string)
490 {
491 int val;
492 struct re_pattern_buffer *bufp;
493
494 bufp = compile_pattern (regexp, 0, Qnil,
495 0, STRING_MULTIBYTE (string));
496 immediate_quit = 1;
497 re_match_object = string;
498
499 val = re_search (bufp, (char *) SDATA (string),
500 SBYTES (string), 0,
501 SBYTES (string), 0);
502 immediate_quit = 0;
503 return val;
504 }
505
506 /* Match REGEXP against STRING, searching all of STRING ignoring case,
507 and return the index of the match, or negative on failure.
508 This does not clobber the match data.
509 We assume that STRING contains single-byte characters. */
510
511 extern Lisp_Object Vascii_downcase_table;
512
513 int
514 fast_c_string_match_ignore_case (Lisp_Object regexp, const char *string)
515 {
516 int val;
517 struct re_pattern_buffer *bufp;
518 int len = strlen (string);
519
520 regexp = string_make_unibyte (regexp);
521 re_match_object = Qt;
522 bufp = compile_pattern (regexp, 0,
523 Vascii_canon_table, 0,
524 0);
525 immediate_quit = 1;
526 val = re_search (bufp, string, len, 0, len, 0);
527 immediate_quit = 0;
528 return val;
529 }
530
531 /* Like fast_string_match but ignore case. */
532
533 int
534 fast_string_match_ignore_case (Lisp_Object regexp, Lisp_Object string)
535 {
536 int val;
537 struct re_pattern_buffer *bufp;
538
539 bufp = compile_pattern (regexp, 0, Vascii_canon_table,
540 0, STRING_MULTIBYTE (string));
541 immediate_quit = 1;
542 re_match_object = string;
543
544 val = re_search (bufp, (char *) SDATA (string),
545 SBYTES (string), 0,
546 SBYTES (string), 0);
547 immediate_quit = 0;
548 return val;
549 }
550 \f
551 /* Match REGEXP against the characters after POS to LIMIT, and return
552 the number of matched characters. If STRING is non-nil, match
553 against the characters in it. In that case, POS and LIMIT are
554 indices into the string. This function doesn't modify the match
555 data. */
556
557 EMACS_INT
558 fast_looking_at (Lisp_Object regexp, EMACS_INT pos, EMACS_INT pos_byte, EMACS_INT limit, EMACS_INT limit_byte, Lisp_Object string)
559 {
560 int multibyte;
561 struct re_pattern_buffer *buf;
562 unsigned char *p1, *p2;
563 EMACS_INT s1, s2;
564 EMACS_INT len;
565
566 if (STRINGP (string))
567 {
568 if (pos_byte < 0)
569 pos_byte = string_char_to_byte (string, pos);
570 if (limit_byte < 0)
571 limit_byte = string_char_to_byte (string, limit);
572 p1 = NULL;
573 s1 = 0;
574 p2 = SDATA (string);
575 s2 = SBYTES (string);
576 re_match_object = string;
577 multibyte = STRING_MULTIBYTE (string);
578 }
579 else
580 {
581 if (pos_byte < 0)
582 pos_byte = CHAR_TO_BYTE (pos);
583 if (limit_byte < 0)
584 limit_byte = CHAR_TO_BYTE (limit);
585 pos_byte -= BEGV_BYTE;
586 limit_byte -= BEGV_BYTE;
587 p1 = BEGV_ADDR;
588 s1 = GPT_BYTE - BEGV_BYTE;
589 p2 = GAP_END_ADDR;
590 s2 = ZV_BYTE - GPT_BYTE;
591 if (s1 < 0)
592 {
593 p2 = p1;
594 s2 = ZV_BYTE - BEGV_BYTE;
595 s1 = 0;
596 }
597 if (s2 < 0)
598 {
599 s1 = ZV_BYTE - BEGV_BYTE;
600 s2 = 0;
601 }
602 re_match_object = Qnil;
603 multibyte = ! NILP (current_buffer->enable_multibyte_characters);
604 }
605
606 buf = compile_pattern (regexp, 0, Qnil, 0, multibyte);
607 immediate_quit = 1;
608 len = re_match_2 (buf, (char *) p1, s1, (char *) p2, s2,
609 pos_byte, NULL, limit_byte);
610 immediate_quit = 0;
611
612 return len;
613 }
614
615 \f
616 /* The newline cache: remembering which sections of text have no newlines. */
617
618 /* If the user has requested newline caching, make sure it's on.
619 Otherwise, make sure it's off.
620 This is our cheezy way of associating an action with the change of
621 state of a buffer-local variable. */
622 static void
623 newline_cache_on_off (struct buffer *buf)
624 {
625 if (NILP (buf->cache_long_line_scans))
626 {
627 /* It should be off. */
628 if (buf->newline_cache)
629 {
630 free_region_cache (buf->newline_cache);
631 buf->newline_cache = 0;
632 }
633 }
634 else
635 {
636 /* It should be on. */
637 if (buf->newline_cache == 0)
638 buf->newline_cache = new_region_cache ();
639 }
640 }
641
642 \f
643 /* Search for COUNT instances of the character TARGET between START and END.
644
645 If COUNT is positive, search forwards; END must be >= START.
646 If COUNT is negative, search backwards for the -COUNTth instance;
647 END must be <= START.
648 If COUNT is zero, do anything you please; run rogue, for all I care.
649
650 If END is zero, use BEGV or ZV instead, as appropriate for the
651 direction indicated by COUNT.
652
653 If we find COUNT instances, set *SHORTAGE to zero, and return the
654 position past the COUNTth match. Note that for reverse motion
655 this is not the same as the usual convention for Emacs motion commands.
656
657 If we don't find COUNT instances before reaching END, set *SHORTAGE
658 to the number of TARGETs left unfound, and return END.
659
660 If ALLOW_QUIT is non-zero, set immediate_quit. That's good to do
661 except when inside redisplay. */
662
663 int
664 scan_buffer (register int target, EMACS_INT start, EMACS_INT end, int count, int *shortage, int allow_quit)
665 {
666 struct region_cache *newline_cache;
667 int direction;
668
669 if (count > 0)
670 {
671 direction = 1;
672 if (! end) end = ZV;
673 }
674 else
675 {
676 direction = -1;
677 if (! end) end = BEGV;
678 }
679
680 newline_cache_on_off (current_buffer);
681 newline_cache = current_buffer->newline_cache;
682
683 if (shortage != 0)
684 *shortage = 0;
685
686 immediate_quit = allow_quit;
687
688 if (count > 0)
689 while (start != end)
690 {
691 /* Our innermost scanning loop is very simple; it doesn't know
692 about gaps, buffer ends, or the newline cache. ceiling is
693 the position of the last character before the next such
694 obstacle --- the last character the dumb search loop should
695 examine. */
696 EMACS_INT ceiling_byte = CHAR_TO_BYTE (end) - 1;
697 EMACS_INT start_byte = CHAR_TO_BYTE (start);
698 EMACS_INT tem;
699
700 /* If we're looking for a newline, consult the newline cache
701 to see where we can avoid some scanning. */
702 if (target == '\n' && newline_cache)
703 {
704 int next_change;
705 immediate_quit = 0;
706 while (region_cache_forward
707 (current_buffer, newline_cache, start_byte, &next_change))
708 start_byte = next_change;
709 immediate_quit = allow_quit;
710
711 /* START should never be after END. */
712 if (start_byte > ceiling_byte)
713 start_byte = ceiling_byte;
714
715 /* Now the text after start is an unknown region, and
716 next_change is the position of the next known region. */
717 ceiling_byte = min (next_change - 1, ceiling_byte);
718 }
719
720 /* The dumb loop can only scan text stored in contiguous
721 bytes. BUFFER_CEILING_OF returns the last character
722 position that is contiguous, so the ceiling is the
723 position after that. */
724 tem = BUFFER_CEILING_OF (start_byte);
725 ceiling_byte = min (tem, ceiling_byte);
726
727 {
728 /* The termination address of the dumb loop. */
729 register unsigned char *ceiling_addr
730 = BYTE_POS_ADDR (ceiling_byte) + 1;
731 register unsigned char *cursor
732 = BYTE_POS_ADDR (start_byte);
733 unsigned char *base = cursor;
734
735 while (cursor < ceiling_addr)
736 {
737 unsigned char *scan_start = cursor;
738
739 /* The dumb loop. */
740 while (*cursor != target && ++cursor < ceiling_addr)
741 ;
742
743 /* If we're looking for newlines, cache the fact that
744 the region from start to cursor is free of them. */
745 if (target == '\n' && newline_cache)
746 know_region_cache (current_buffer, newline_cache,
747 start_byte + scan_start - base,
748 start_byte + cursor - base);
749
750 /* Did we find the target character? */
751 if (cursor < ceiling_addr)
752 {
753 if (--count == 0)
754 {
755 immediate_quit = 0;
756 return BYTE_TO_CHAR (start_byte + cursor - base + 1);
757 }
758 cursor++;
759 }
760 }
761
762 start = BYTE_TO_CHAR (start_byte + cursor - base);
763 }
764 }
765 else
766 while (start > end)
767 {
768 /* The last character to check before the next obstacle. */
769 EMACS_INT ceiling_byte = CHAR_TO_BYTE (end);
770 EMACS_INT start_byte = CHAR_TO_BYTE (start);
771 EMACS_INT tem;
772
773 /* Consult the newline cache, if appropriate. */
774 if (target == '\n' && newline_cache)
775 {
776 int next_change;
777 immediate_quit = 0;
778 while (region_cache_backward
779 (current_buffer, newline_cache, start_byte, &next_change))
780 start_byte = next_change;
781 immediate_quit = allow_quit;
782
783 /* Start should never be at or before end. */
784 if (start_byte <= ceiling_byte)
785 start_byte = ceiling_byte + 1;
786
787 /* Now the text before start is an unknown region, and
788 next_change is the position of the next known region. */
789 ceiling_byte = max (next_change, ceiling_byte);
790 }
791
792 /* Stop scanning before the gap. */
793 tem = BUFFER_FLOOR_OF (start_byte - 1);
794 ceiling_byte = max (tem, ceiling_byte);
795
796 {
797 /* The termination address of the dumb loop. */
798 register unsigned char *ceiling_addr = BYTE_POS_ADDR (ceiling_byte);
799 register unsigned char *cursor = BYTE_POS_ADDR (start_byte - 1);
800 unsigned char *base = cursor;
801
802 while (cursor >= ceiling_addr)
803 {
804 unsigned char *scan_start = cursor;
805
806 while (*cursor != target && --cursor >= ceiling_addr)
807 ;
808
809 /* If we're looking for newlines, cache the fact that
810 the region from after the cursor to start is free of them. */
811 if (target == '\n' && newline_cache)
812 know_region_cache (current_buffer, newline_cache,
813 start_byte + cursor - base,
814 start_byte + scan_start - base);
815
816 /* Did we find the target character? */
817 if (cursor >= ceiling_addr)
818 {
819 if (++count >= 0)
820 {
821 immediate_quit = 0;
822 return BYTE_TO_CHAR (start_byte + cursor - base);
823 }
824 cursor--;
825 }
826 }
827
828 start = BYTE_TO_CHAR (start_byte + cursor - base);
829 }
830 }
831
832 immediate_quit = 0;
833 if (shortage != 0)
834 *shortage = count * direction;
835 return start;
836 }
837 \f
838 /* Search for COUNT instances of a line boundary, which means either a
839 newline or (if selective display enabled) a carriage return.
840 Start at START. If COUNT is negative, search backwards.
841
842 We report the resulting position by calling TEMP_SET_PT_BOTH.
843
844 If we find COUNT instances. we position after (always after,
845 even if scanning backwards) the COUNTth match, and return 0.
846
847 If we don't find COUNT instances before reaching the end of the
848 buffer (or the beginning, if scanning backwards), we return
849 the number of line boundaries left unfound, and position at
850 the limit we bumped up against.
851
852 If ALLOW_QUIT is non-zero, set immediate_quit. That's good to do
853 except in special cases. */
854
855 int
856 scan_newline (EMACS_INT start, EMACS_INT start_byte, EMACS_INT limit, EMACS_INT limit_byte, register int count, int allow_quit)
857 {
858 int direction = ((count > 0) ? 1 : -1);
859
860 register unsigned char *cursor;
861 unsigned char *base;
862
863 EMACS_INT ceiling;
864 register unsigned char *ceiling_addr;
865
866 int old_immediate_quit = immediate_quit;
867
868 /* The code that follows is like scan_buffer
869 but checks for either newline or carriage return. */
870
871 if (allow_quit)
872 immediate_quit++;
873
874 start_byte = CHAR_TO_BYTE (start);
875
876 if (count > 0)
877 {
878 while (start_byte < limit_byte)
879 {
880 ceiling = BUFFER_CEILING_OF (start_byte);
881 ceiling = min (limit_byte - 1, ceiling);
882 ceiling_addr = BYTE_POS_ADDR (ceiling) + 1;
883 base = (cursor = BYTE_POS_ADDR (start_byte));
884 while (1)
885 {
886 while (*cursor != '\n' && ++cursor != ceiling_addr)
887 ;
888
889 if (cursor != ceiling_addr)
890 {
891 if (--count == 0)
892 {
893 immediate_quit = old_immediate_quit;
894 start_byte = start_byte + cursor - base + 1;
895 start = BYTE_TO_CHAR (start_byte);
896 TEMP_SET_PT_BOTH (start, start_byte);
897 return 0;
898 }
899 else
900 if (++cursor == ceiling_addr)
901 break;
902 }
903 else
904 break;
905 }
906 start_byte += cursor - base;
907 }
908 }
909 else
910 {
911 while (start_byte > limit_byte)
912 {
913 ceiling = BUFFER_FLOOR_OF (start_byte - 1);
914 ceiling = max (limit_byte, ceiling);
915 ceiling_addr = BYTE_POS_ADDR (ceiling) - 1;
916 base = (cursor = BYTE_POS_ADDR (start_byte - 1) + 1);
917 while (1)
918 {
919 while (--cursor != ceiling_addr && *cursor != '\n')
920 ;
921
922 if (cursor != ceiling_addr)
923 {
924 if (++count == 0)
925 {
926 immediate_quit = old_immediate_quit;
927 /* Return the position AFTER the match we found. */
928 start_byte = start_byte + cursor - base + 1;
929 start = BYTE_TO_CHAR (start_byte);
930 TEMP_SET_PT_BOTH (start, start_byte);
931 return 0;
932 }
933 }
934 else
935 break;
936 }
937 /* Here we add 1 to compensate for the last decrement
938 of CURSOR, which took it past the valid range. */
939 start_byte += cursor - base + 1;
940 }
941 }
942
943 TEMP_SET_PT_BOTH (limit, limit_byte);
944 immediate_quit = old_immediate_quit;
945
946 return count * direction;
947 }
948
949 int
950 find_next_newline_no_quit (EMACS_INT from, int cnt)
951 {
952 return scan_buffer ('\n', from, 0, cnt, (int *) 0, 0);
953 }
954
955 /* Like find_next_newline, but returns position before the newline,
956 not after, and only search up to TO. This isn't just
957 find_next_newline (...)-1, because you might hit TO. */
958
959 int
960 find_before_next_newline (EMACS_INT from, EMACS_INT to, int cnt)
961 {
962 int shortage;
963 int pos = scan_buffer ('\n', from, to, cnt, &shortage, 1);
964
965 if (shortage == 0)
966 pos--;
967
968 return pos;
969 }
970 \f
971 /* Subroutines of Lisp buffer search functions. */
972
973 static Lisp_Object
974 search_command (Lisp_Object string, Lisp_Object bound, Lisp_Object noerror, Lisp_Object count, int direction, int RE, int posix)
975 {
976 register int np;
977 int lim, lim_byte;
978 int n = direction;
979
980 if (!NILP (count))
981 {
982 CHECK_NUMBER (count);
983 n *= XINT (count);
984 }
985
986 CHECK_STRING (string);
987 if (NILP (bound))
988 {
989 if (n > 0)
990 lim = ZV, lim_byte = ZV_BYTE;
991 else
992 lim = BEGV, lim_byte = BEGV_BYTE;
993 }
994 else
995 {
996 CHECK_NUMBER_COERCE_MARKER (bound);
997 lim = XINT (bound);
998 if (n > 0 ? lim < PT : lim > PT)
999 error ("Invalid search bound (wrong side of point)");
1000 if (lim > ZV)
1001 lim = ZV, lim_byte = ZV_BYTE;
1002 else if (lim < BEGV)
1003 lim = BEGV, lim_byte = BEGV_BYTE;
1004 else
1005 lim_byte = CHAR_TO_BYTE (lim);
1006 }
1007
1008 /* This is so set_image_of_range_1 in regex.c can find the EQV table. */
1009 XCHAR_TABLE (current_buffer->case_canon_table)->extras[2]
1010 = current_buffer->case_eqv_table;
1011
1012 np = search_buffer (string, PT, PT_BYTE, lim, lim_byte, n, RE,
1013 (!NILP (current_buffer->case_fold_search)
1014 ? current_buffer->case_canon_table
1015 : Qnil),
1016 (!NILP (current_buffer->case_fold_search)
1017 ? current_buffer->case_eqv_table
1018 : Qnil),
1019 posix);
1020 if (np <= 0)
1021 {
1022 if (NILP (noerror))
1023 xsignal1 (Qsearch_failed, string);
1024
1025 if (!EQ (noerror, Qt))
1026 {
1027 if (lim < BEGV || lim > ZV)
1028 abort ();
1029 SET_PT_BOTH (lim, lim_byte);
1030 return Qnil;
1031 #if 0 /* This would be clean, but maybe programs depend on
1032 a value of nil here. */
1033 np = lim;
1034 #endif
1035 }
1036 else
1037 return Qnil;
1038 }
1039
1040 if (np < BEGV || np > ZV)
1041 abort ();
1042
1043 SET_PT (np);
1044
1045 return make_number (np);
1046 }
1047 \f
1048 /* Return 1 if REGEXP it matches just one constant string. */
1049
1050 static int
1051 trivial_regexp_p (Lisp_Object regexp)
1052 {
1053 int len = SBYTES (regexp);
1054 unsigned char *s = SDATA (regexp);
1055 while (--len >= 0)
1056 {
1057 switch (*s++)
1058 {
1059 case '.': case '*': case '+': case '?': case '[': case '^': case '$':
1060 return 0;
1061 case '\\':
1062 if (--len < 0)
1063 return 0;
1064 switch (*s++)
1065 {
1066 case '|': case '(': case ')': case '`': case '\'': case 'b':
1067 case 'B': case '<': case '>': case 'w': case 'W': case 's':
1068 case 'S': case '=': case '{': case '}': case '_':
1069 case 'c': case 'C': /* for categoryspec and notcategoryspec */
1070 case '1': case '2': case '3': case '4': case '5':
1071 case '6': case '7': case '8': case '9':
1072 return 0;
1073 }
1074 }
1075 }
1076 return 1;
1077 }
1078
1079 /* Search for the n'th occurrence of STRING in the current buffer,
1080 starting at position POS and stopping at position LIM,
1081 treating STRING as a literal string if RE is false or as
1082 a regular expression if RE is true.
1083
1084 If N is positive, searching is forward and LIM must be greater than POS.
1085 If N is negative, searching is backward and LIM must be less than POS.
1086
1087 Returns -x if x occurrences remain to be found (x > 0),
1088 or else the position at the beginning of the Nth occurrence
1089 (if searching backward) or the end (if searching forward).
1090
1091 POSIX is nonzero if we want full backtracking (POSIX style)
1092 for this pattern. 0 means backtrack only enough to get a valid match. */
1093
1094 #define TRANSLATE(out, trt, d) \
1095 do \
1096 { \
1097 if (! NILP (trt)) \
1098 { \
1099 Lisp_Object temp; \
1100 temp = Faref (trt, make_number (d)); \
1101 if (INTEGERP (temp)) \
1102 out = XINT (temp); \
1103 else \
1104 out = d; \
1105 } \
1106 else \
1107 out = d; \
1108 } \
1109 while (0)
1110
1111 /* Only used in search_buffer, to record the end position of the match
1112 when searching regexps and SEARCH_REGS should not be changed
1113 (i.e. Vinhibit_changing_match_data is non-nil). */
1114 static struct re_registers search_regs_1;
1115
1116 static EMACS_INT
1117 search_buffer (string, pos, pos_byte, lim, lim_byte, n,
1118 RE, trt, inverse_trt, posix)
1119 Lisp_Object string;
1120 EMACS_INT pos;
1121 EMACS_INT pos_byte;
1122 EMACS_INT lim;
1123 EMACS_INT lim_byte;
1124 int n;
1125 int RE;
1126 Lisp_Object trt;
1127 Lisp_Object inverse_trt;
1128 int posix;
1129 {
1130 int len = SCHARS (string);
1131 int len_byte = SBYTES (string);
1132 register int i;
1133
1134 if (running_asynch_code)
1135 save_search_regs ();
1136
1137 /* Searching 0 times means don't move. */
1138 /* Null string is found at starting position. */
1139 if (len == 0 || n == 0)
1140 {
1141 set_search_regs (pos_byte, 0);
1142 return pos;
1143 }
1144
1145 if (RE && !(trivial_regexp_p (string) && NILP (Vsearch_spaces_regexp)))
1146 {
1147 unsigned char *p1, *p2;
1148 int s1, s2;
1149 struct re_pattern_buffer *bufp;
1150
1151 bufp = compile_pattern (string,
1152 (NILP (Vinhibit_changing_match_data)
1153 ? &search_regs : &search_regs_1),
1154 trt, posix,
1155 !NILP (current_buffer->enable_multibyte_characters));
1156
1157 immediate_quit = 1; /* Quit immediately if user types ^G,
1158 because letting this function finish
1159 can take too long. */
1160 QUIT; /* Do a pending quit right away,
1161 to avoid paradoxical behavior */
1162 /* Get pointers and sizes of the two strings
1163 that make up the visible portion of the buffer. */
1164
1165 p1 = BEGV_ADDR;
1166 s1 = GPT_BYTE - BEGV_BYTE;
1167 p2 = GAP_END_ADDR;
1168 s2 = ZV_BYTE - GPT_BYTE;
1169 if (s1 < 0)
1170 {
1171 p2 = p1;
1172 s2 = ZV_BYTE - BEGV_BYTE;
1173 s1 = 0;
1174 }
1175 if (s2 < 0)
1176 {
1177 s1 = ZV_BYTE - BEGV_BYTE;
1178 s2 = 0;
1179 }
1180 re_match_object = Qnil;
1181
1182 while (n < 0)
1183 {
1184 int val;
1185 val = re_search_2 (bufp, (char *) p1, s1, (char *) p2, s2,
1186 pos_byte - BEGV_BYTE, lim_byte - pos_byte,
1187 (NILP (Vinhibit_changing_match_data)
1188 ? &search_regs : &search_regs_1),
1189 /* Don't allow match past current point */
1190 pos_byte - BEGV_BYTE);
1191 if (val == -2)
1192 {
1193 matcher_overflow ();
1194 }
1195 if (val >= 0)
1196 {
1197 if (NILP (Vinhibit_changing_match_data))
1198 {
1199 pos_byte = search_regs.start[0] + BEGV_BYTE;
1200 for (i = 0; i < search_regs.num_regs; i++)
1201 if (search_regs.start[i] >= 0)
1202 {
1203 search_regs.start[i]
1204 = BYTE_TO_CHAR (search_regs.start[i] + BEGV_BYTE);
1205 search_regs.end[i]
1206 = BYTE_TO_CHAR (search_regs.end[i] + BEGV_BYTE);
1207 }
1208 XSETBUFFER (last_thing_searched, current_buffer);
1209 /* Set pos to the new position. */
1210 pos = search_regs.start[0];
1211 }
1212 else
1213 {
1214 pos_byte = search_regs_1.start[0] + BEGV_BYTE;
1215 /* Set pos to the new position. */
1216 pos = BYTE_TO_CHAR (search_regs_1.start[0] + BEGV_BYTE);
1217 }
1218 }
1219 else
1220 {
1221 immediate_quit = 0;
1222 return (n);
1223 }
1224 n++;
1225 }
1226 while (n > 0)
1227 {
1228 int val;
1229 val = re_search_2 (bufp, (char *) p1, s1, (char *) p2, s2,
1230 pos_byte - BEGV_BYTE, lim_byte - pos_byte,
1231 (NILP (Vinhibit_changing_match_data)
1232 ? &search_regs : &search_regs_1),
1233 lim_byte - BEGV_BYTE);
1234 if (val == -2)
1235 {
1236 matcher_overflow ();
1237 }
1238 if (val >= 0)
1239 {
1240 if (NILP (Vinhibit_changing_match_data))
1241 {
1242 pos_byte = search_regs.end[0] + BEGV_BYTE;
1243 for (i = 0; i < search_regs.num_regs; i++)
1244 if (search_regs.start[i] >= 0)
1245 {
1246 search_regs.start[i]
1247 = BYTE_TO_CHAR (search_regs.start[i] + BEGV_BYTE);
1248 search_regs.end[i]
1249 = BYTE_TO_CHAR (search_regs.end[i] + BEGV_BYTE);
1250 }
1251 XSETBUFFER (last_thing_searched, current_buffer);
1252 pos = search_regs.end[0];
1253 }
1254 else
1255 {
1256 pos_byte = search_regs_1.end[0] + BEGV_BYTE;
1257 pos = BYTE_TO_CHAR (search_regs_1.end[0] + BEGV_BYTE);
1258 }
1259 }
1260 else
1261 {
1262 immediate_quit = 0;
1263 return (0 - n);
1264 }
1265 n--;
1266 }
1267 immediate_quit = 0;
1268 return (pos);
1269 }
1270 else /* non-RE case */
1271 {
1272 unsigned char *raw_pattern, *pat;
1273 int raw_pattern_size;
1274 int raw_pattern_size_byte;
1275 unsigned char *patbuf;
1276 int multibyte = !NILP (current_buffer->enable_multibyte_characters);
1277 unsigned char *base_pat;
1278 /* Set to positive if we find a non-ASCII char that need
1279 translation. Otherwise set to zero later. */
1280 int char_base = -1;
1281 int boyer_moore_ok = 1;
1282
1283 /* MULTIBYTE says whether the text to be searched is multibyte.
1284 We must convert PATTERN to match that, or we will not really
1285 find things right. */
1286
1287 if (multibyte == STRING_MULTIBYTE (string))
1288 {
1289 raw_pattern = (unsigned char *) SDATA (string);
1290 raw_pattern_size = SCHARS (string);
1291 raw_pattern_size_byte = SBYTES (string);
1292 }
1293 else if (multibyte)
1294 {
1295 raw_pattern_size = SCHARS (string);
1296 raw_pattern_size_byte
1297 = count_size_as_multibyte (SDATA (string),
1298 raw_pattern_size);
1299 raw_pattern = (unsigned char *) alloca (raw_pattern_size_byte + 1);
1300 copy_text (SDATA (string), raw_pattern,
1301 SCHARS (string), 0, 1);
1302 }
1303 else
1304 {
1305 /* Converting multibyte to single-byte.
1306
1307 ??? Perhaps this conversion should be done in a special way
1308 by subtracting nonascii-insert-offset from each non-ASCII char,
1309 so that only the multibyte chars which really correspond to
1310 the chosen single-byte character set can possibly match. */
1311 raw_pattern_size = SCHARS (string);
1312 raw_pattern_size_byte = SCHARS (string);
1313 raw_pattern = (unsigned char *) alloca (raw_pattern_size + 1);
1314 copy_text (SDATA (string), raw_pattern,
1315 SBYTES (string), 1, 0);
1316 }
1317
1318 /* Copy and optionally translate the pattern. */
1319 len = raw_pattern_size;
1320 len_byte = raw_pattern_size_byte;
1321 patbuf = (unsigned char *) alloca (len * MAX_MULTIBYTE_LENGTH);
1322 pat = patbuf;
1323 base_pat = raw_pattern;
1324 if (multibyte)
1325 {
1326 /* Fill patbuf by translated characters in STRING while
1327 checking if we can use boyer-moore search. If TRT is
1328 non-nil, we can use boyer-moore search only if TRT can be
1329 represented by the byte array of 256 elements. For that,
1330 all non-ASCII case-equivalents of all case-senstive
1331 characters in STRING must belong to the same charset and
1332 row. */
1333
1334 while (--len >= 0)
1335 {
1336 unsigned char str_base[MAX_MULTIBYTE_LENGTH], *str;
1337 int c, translated, inverse;
1338 int in_charlen, charlen;
1339
1340 /* If we got here and the RE flag is set, it's because we're
1341 dealing with a regexp known to be trivial, so the backslash
1342 just quotes the next character. */
1343 if (RE && *base_pat == '\\')
1344 {
1345 len--;
1346 raw_pattern_size--;
1347 len_byte--;
1348 base_pat++;
1349 }
1350
1351 c = STRING_CHAR_AND_LENGTH (base_pat, in_charlen);
1352
1353 if (NILP (trt))
1354 {
1355 str = base_pat;
1356 charlen = in_charlen;
1357 }
1358 else
1359 {
1360 /* Translate the character. */
1361 TRANSLATE (translated, trt, c);
1362 charlen = CHAR_STRING (translated, str_base);
1363 str = str_base;
1364
1365 /* Check if C has any other case-equivalents. */
1366 TRANSLATE (inverse, inverse_trt, c);
1367 /* If so, check if we can use boyer-moore. */
1368 if (c != inverse && boyer_moore_ok)
1369 {
1370 /* Check if all equivalents belong to the same
1371 group of characters. Note that the check of C
1372 itself is done by the last iteration. */
1373 int this_char_base = -1;
1374
1375 while (boyer_moore_ok)
1376 {
1377 if (ASCII_BYTE_P (inverse))
1378 {
1379 if (this_char_base > 0)
1380 boyer_moore_ok = 0;
1381 else
1382 this_char_base = 0;
1383 }
1384 else if (CHAR_BYTE8_P (inverse))
1385 /* Boyer-moore search can't handle a
1386 translation of an eight-bit
1387 character. */
1388 boyer_moore_ok = 0;
1389 else if (this_char_base < 0)
1390 {
1391 this_char_base = inverse & ~0x3F;
1392 if (char_base < 0)
1393 char_base = this_char_base;
1394 else if (this_char_base != char_base)
1395 boyer_moore_ok = 0;
1396 }
1397 else if ((inverse & ~0x3F) != this_char_base)
1398 boyer_moore_ok = 0;
1399 if (c == inverse)
1400 break;
1401 TRANSLATE (inverse, inverse_trt, inverse);
1402 }
1403 }
1404 }
1405
1406 /* Store this character into the translated pattern. */
1407 bcopy (str, pat, charlen);
1408 pat += charlen;
1409 base_pat += in_charlen;
1410 len_byte -= in_charlen;
1411 }
1412
1413 /* If char_base is still negative we didn't find any translated
1414 non-ASCII characters. */
1415 if (char_base < 0)
1416 char_base = 0;
1417 }
1418 else
1419 {
1420 /* Unibyte buffer. */
1421 char_base = 0;
1422 while (--len >= 0)
1423 {
1424 int c, translated;
1425
1426 /* If we got here and the RE flag is set, it's because we're
1427 dealing with a regexp known to be trivial, so the backslash
1428 just quotes the next character. */
1429 if (RE && *base_pat == '\\')
1430 {
1431 len--;
1432 raw_pattern_size--;
1433 base_pat++;
1434 }
1435 c = *base_pat++;
1436 TRANSLATE (translated, trt, c);
1437 *pat++ = translated;
1438 }
1439 }
1440
1441 len_byte = pat - patbuf;
1442 len = raw_pattern_size;
1443 pat = base_pat = patbuf;
1444
1445 if (boyer_moore_ok)
1446 return boyer_moore (n, pat, len, len_byte, trt, inverse_trt,
1447 pos, pos_byte, lim, lim_byte,
1448 char_base);
1449 else
1450 return simple_search (n, pat, len, len_byte, trt,
1451 pos, pos_byte, lim, lim_byte);
1452 }
1453 }
1454 \f
1455 /* Do a simple string search N times for the string PAT,
1456 whose length is LEN/LEN_BYTE,
1457 from buffer position POS/POS_BYTE until LIM/LIM_BYTE.
1458 TRT is the translation table.
1459
1460 Return the character position where the match is found.
1461 Otherwise, if M matches remained to be found, return -M.
1462
1463 This kind of search works regardless of what is in PAT and
1464 regardless of what is in TRT. It is used in cases where
1465 boyer_moore cannot work. */
1466
1467 static EMACS_INT
1468 simple_search (int n, unsigned char *pat, int len, int len_byte, Lisp_Object trt, EMACS_INT pos, EMACS_INT pos_byte, EMACS_INT lim, EMACS_INT lim_byte)
1469 {
1470 int multibyte = ! NILP (current_buffer->enable_multibyte_characters);
1471 int forward = n > 0;
1472 /* Number of buffer bytes matched. Note that this may be different
1473 from len_byte in a multibyte buffer. */
1474 int match_byte;
1475
1476 if (lim > pos && multibyte)
1477 while (n > 0)
1478 {
1479 while (1)
1480 {
1481 /* Try matching at position POS. */
1482 EMACS_INT this_pos = pos;
1483 EMACS_INT this_pos_byte = pos_byte;
1484 int this_len = len;
1485 unsigned char *p = pat;
1486 if (pos + len > lim || pos_byte + len_byte > lim_byte)
1487 goto stop;
1488
1489 while (this_len > 0)
1490 {
1491 int charlen, buf_charlen;
1492 int pat_ch, buf_ch;
1493
1494 pat_ch = STRING_CHAR_AND_LENGTH (p, charlen);
1495 buf_ch = STRING_CHAR_AND_LENGTH (BYTE_POS_ADDR (this_pos_byte),
1496 buf_charlen);
1497 TRANSLATE (buf_ch, trt, buf_ch);
1498
1499 if (buf_ch != pat_ch)
1500 break;
1501
1502 this_len--;
1503 p += charlen;
1504
1505 this_pos_byte += buf_charlen;
1506 this_pos++;
1507 }
1508
1509 if (this_len == 0)
1510 {
1511 match_byte = this_pos_byte - pos_byte;
1512 pos += len;
1513 pos_byte += match_byte;
1514 break;
1515 }
1516
1517 INC_BOTH (pos, pos_byte);
1518 }
1519
1520 n--;
1521 }
1522 else if (lim > pos)
1523 while (n > 0)
1524 {
1525 while (1)
1526 {
1527 /* Try matching at position POS. */
1528 EMACS_INT this_pos = pos;
1529 int this_len = len;
1530 unsigned char *p = pat;
1531
1532 if (pos + len > lim)
1533 goto stop;
1534
1535 while (this_len > 0)
1536 {
1537 int pat_ch = *p++;
1538 int buf_ch = FETCH_BYTE (this_pos);
1539 TRANSLATE (buf_ch, trt, buf_ch);
1540
1541 if (buf_ch != pat_ch)
1542 break;
1543
1544 this_len--;
1545 this_pos++;
1546 }
1547
1548 if (this_len == 0)
1549 {
1550 match_byte = len;
1551 pos += len;
1552 break;
1553 }
1554
1555 pos++;
1556 }
1557
1558 n--;
1559 }
1560 /* Backwards search. */
1561 else if (lim < pos && multibyte)
1562 while (n < 0)
1563 {
1564 while (1)
1565 {
1566 /* Try matching at position POS. */
1567 EMACS_INT this_pos = pos;
1568 EMACS_INT this_pos_byte = pos_byte;
1569 int this_len = len;
1570 const unsigned char *p = pat + len_byte;
1571
1572 if (this_pos - len < lim || (pos_byte - len_byte) < lim_byte)
1573 goto stop;
1574
1575 while (this_len > 0)
1576 {
1577 int charlen;
1578 int pat_ch, buf_ch;
1579
1580 DEC_BOTH (this_pos, this_pos_byte);
1581 PREV_CHAR_BOUNDARY (p, pat);
1582 pat_ch = STRING_CHAR (p);
1583 buf_ch = STRING_CHAR (BYTE_POS_ADDR (this_pos_byte));
1584 TRANSLATE (buf_ch, trt, buf_ch);
1585
1586 if (buf_ch != pat_ch)
1587 break;
1588
1589 this_len--;
1590 }
1591
1592 if (this_len == 0)
1593 {
1594 match_byte = pos_byte - this_pos_byte;
1595 pos = this_pos;
1596 pos_byte = this_pos_byte;
1597 break;
1598 }
1599
1600 DEC_BOTH (pos, pos_byte);
1601 }
1602
1603 n++;
1604 }
1605 else if (lim < pos)
1606 while (n < 0)
1607 {
1608 while (1)
1609 {
1610 /* Try matching at position POS. */
1611 EMACS_INT this_pos = pos - len;
1612 int this_len = len;
1613 unsigned char *p = pat;
1614
1615 if (this_pos < lim)
1616 goto stop;
1617
1618 while (this_len > 0)
1619 {
1620 int pat_ch = *p++;
1621 int buf_ch = FETCH_BYTE (this_pos);
1622 TRANSLATE (buf_ch, trt, buf_ch);
1623
1624 if (buf_ch != pat_ch)
1625 break;
1626 this_len--;
1627 this_pos++;
1628 }
1629
1630 if (this_len == 0)
1631 {
1632 match_byte = len;
1633 pos -= len;
1634 break;
1635 }
1636
1637 pos--;
1638 }
1639
1640 n++;
1641 }
1642
1643 stop:
1644 if (n == 0)
1645 {
1646 if (forward)
1647 set_search_regs ((multibyte ? pos_byte : pos) - match_byte, match_byte);
1648 else
1649 set_search_regs (multibyte ? pos_byte : pos, match_byte);
1650
1651 return pos;
1652 }
1653 else if (n > 0)
1654 return -n;
1655 else
1656 return n;
1657 }
1658 \f
1659 /* Do Boyer-Moore search N times for the string BASE_PAT,
1660 whose length is LEN/LEN_BYTE,
1661 from buffer position POS/POS_BYTE until LIM/LIM_BYTE.
1662 DIRECTION says which direction we search in.
1663 TRT and INVERSE_TRT are translation tables.
1664 Characters in PAT are already translated by TRT.
1665
1666 This kind of search works if all the characters in BASE_PAT that
1667 have nontrivial translation are the same aside from the last byte.
1668 This makes it possible to translate just the last byte of a
1669 character, and do so after just a simple test of the context.
1670 CHAR_BASE is nonzero if there is such a non-ASCII character.
1671
1672 If that criterion is not satisfied, do not call this function. */
1673
1674 static EMACS_INT
1675 boyer_moore (n, base_pat, len, len_byte, trt, inverse_trt,
1676 pos, pos_byte, lim, lim_byte, char_base)
1677 int n;
1678 unsigned char *base_pat;
1679 int len, len_byte;
1680 Lisp_Object trt;
1681 Lisp_Object inverse_trt;
1682 EMACS_INT pos, pos_byte;
1683 EMACS_INT lim, lim_byte;
1684 int char_base;
1685 {
1686 int direction = ((n > 0) ? 1 : -1);
1687 register int dirlen;
1688 EMACS_INT limit;
1689 int stride_for_teases = 0;
1690 int BM_tab[0400];
1691 register unsigned char *cursor, *p_limit;
1692 register int i, j;
1693 unsigned char *pat, *pat_end;
1694 int multibyte = ! NILP (current_buffer->enable_multibyte_characters);
1695
1696 unsigned char simple_translate[0400];
1697 /* These are set to the preceding bytes of a byte to be translated
1698 if char_base is nonzero. As the maximum byte length of a
1699 multibyte character is 5, we have to check at most four previous
1700 bytes. */
1701 int translate_prev_byte1 = 0;
1702 int translate_prev_byte2 = 0;
1703 int translate_prev_byte3 = 0;
1704 int translate_prev_byte4 = 0;
1705
1706 /* The general approach is that we are going to maintain that we know
1707 the first (closest to the present position, in whatever direction
1708 we're searching) character that could possibly be the last
1709 (furthest from present position) character of a valid match. We
1710 advance the state of our knowledge by looking at that character
1711 and seeing whether it indeed matches the last character of the
1712 pattern. If it does, we take a closer look. If it does not, we
1713 move our pointer (to putative last characters) as far as is
1714 logically possible. This amount of movement, which I call a
1715 stride, will be the length of the pattern if the actual character
1716 appears nowhere in the pattern, otherwise it will be the distance
1717 from the last occurrence of that character to the end of the
1718 pattern. If the amount is zero we have a possible match. */
1719
1720 /* Here we make a "mickey mouse" BM table. The stride of the search
1721 is determined only by the last character of the putative match.
1722 If that character does not match, we will stride the proper
1723 distance to propose a match that superimposes it on the last
1724 instance of a character that matches it (per trt), or misses
1725 it entirely if there is none. */
1726
1727 dirlen = len_byte * direction;
1728
1729 /* Record position after the end of the pattern. */
1730 pat_end = base_pat + len_byte;
1731 /* BASE_PAT points to a character that we start scanning from.
1732 It is the first character in a forward search,
1733 the last character in a backward search. */
1734 if (direction < 0)
1735 base_pat = pat_end - 1;
1736
1737 /* A character that does not appear in the pattern induces a
1738 stride equal to the pattern length. */
1739 for (i = 0; i < 0400; i++)
1740 BM_tab[i] = dirlen;
1741
1742 /* We use this for translation, instead of TRT itself.
1743 We fill this in to handle the characters that actually
1744 occur in the pattern. Others don't matter anyway! */
1745 for (i = 0; i < 0400; i++)
1746 simple_translate[i] = i;
1747
1748 if (char_base)
1749 {
1750 /* Setup translate_prev_byte1/2/3/4 from CHAR_BASE. Only a
1751 byte following them are the target of translation. */
1752 unsigned char str[MAX_MULTIBYTE_LENGTH];
1753 int len = CHAR_STRING (char_base, str);
1754
1755 translate_prev_byte1 = str[len - 2];
1756 if (len > 2)
1757 {
1758 translate_prev_byte2 = str[len - 3];
1759 if (len > 3)
1760 {
1761 translate_prev_byte3 = str[len - 4];
1762 if (len > 4)
1763 translate_prev_byte4 = str[len - 5];
1764 }
1765 }
1766 }
1767
1768 i = 0;
1769 while (i != dirlen)
1770 {
1771 unsigned char *ptr = base_pat + i;
1772 i += direction;
1773 if (! NILP (trt))
1774 {
1775 /* If the byte currently looking at is the last of a
1776 character to check case-equivalents, set CH to that
1777 character. An ASCII character and a non-ASCII character
1778 matching with CHAR_BASE are to be checked. */
1779 int ch = -1;
1780
1781 if (ASCII_BYTE_P (*ptr) || ! multibyte)
1782 ch = *ptr;
1783 else if (char_base
1784 && ((pat_end - ptr) == 1 || CHAR_HEAD_P (ptr[1])))
1785 {
1786 unsigned char *charstart = ptr - 1;
1787
1788 while (! (CHAR_HEAD_P (*charstart)))
1789 charstart--;
1790 ch = STRING_CHAR (charstart);
1791 if (char_base != (ch & ~0x3F))
1792 ch = -1;
1793 }
1794
1795 if (ch >= 0200)
1796 j = (ch & 0x3F) | 0200;
1797 else
1798 j = *ptr;
1799
1800 if (i == dirlen)
1801 stride_for_teases = BM_tab[j];
1802
1803 BM_tab[j] = dirlen - i;
1804 /* A translation table is accompanied by its inverse -- see */
1805 /* comment following downcase_table for details */
1806 if (ch >= 0)
1807 {
1808 int starting_ch = ch;
1809 int starting_j = j;
1810
1811 while (1)
1812 {
1813 TRANSLATE (ch, inverse_trt, ch);
1814 if (ch >= 0200)
1815 j = (ch & 0x3F) | 0200;
1816 else
1817 j = ch;
1818
1819 /* For all the characters that map into CH,
1820 set up simple_translate to map the last byte
1821 into STARTING_J. */
1822 simple_translate[j] = starting_j;
1823 if (ch == starting_ch)
1824 break;
1825 BM_tab[j] = dirlen - i;
1826 }
1827 }
1828 }
1829 else
1830 {
1831 j = *ptr;
1832
1833 if (i == dirlen)
1834 stride_for_teases = BM_tab[j];
1835 BM_tab[j] = dirlen - i;
1836 }
1837 /* stride_for_teases tells how much to stride if we get a
1838 match on the far character but are subsequently
1839 disappointed, by recording what the stride would have been
1840 for that character if the last character had been
1841 different. */
1842 }
1843 pos_byte += dirlen - ((direction > 0) ? direction : 0);
1844 /* loop invariant - POS_BYTE points at where last char (first
1845 char if reverse) of pattern would align in a possible match. */
1846 while (n != 0)
1847 {
1848 EMACS_INT tail_end;
1849 unsigned char *tail_end_ptr;
1850
1851 /* It's been reported that some (broken) compiler thinks that
1852 Boolean expressions in an arithmetic context are unsigned.
1853 Using an explicit ?1:0 prevents this. */
1854 if ((lim_byte - pos_byte - ((direction > 0) ? 1 : 0)) * direction
1855 < 0)
1856 return (n * (0 - direction));
1857 /* First we do the part we can by pointers (maybe nothing) */
1858 QUIT;
1859 pat = base_pat;
1860 limit = pos_byte - dirlen + direction;
1861 if (direction > 0)
1862 {
1863 limit = BUFFER_CEILING_OF (limit);
1864 /* LIMIT is now the last (not beyond-last!) value POS_BYTE
1865 can take on without hitting edge of buffer or the gap. */
1866 limit = min (limit, pos_byte + 20000);
1867 limit = min (limit, lim_byte - 1);
1868 }
1869 else
1870 {
1871 limit = BUFFER_FLOOR_OF (limit);
1872 /* LIMIT is now the last (not beyond-last!) value POS_BYTE
1873 can take on without hitting edge of buffer or the gap. */
1874 limit = max (limit, pos_byte - 20000);
1875 limit = max (limit, lim_byte);
1876 }
1877 tail_end = BUFFER_CEILING_OF (pos_byte) + 1;
1878 tail_end_ptr = BYTE_POS_ADDR (tail_end);
1879
1880 if ((limit - pos_byte) * direction > 20)
1881 {
1882 unsigned char *p2;
1883
1884 p_limit = BYTE_POS_ADDR (limit);
1885 p2 = (cursor = BYTE_POS_ADDR (pos_byte));
1886 /* In this loop, pos + cursor - p2 is the surrogate for pos. */
1887 while (1) /* use one cursor setting as long as i can */
1888 {
1889 if (direction > 0) /* worth duplicating */
1890 {
1891 while (cursor <= p_limit)
1892 {
1893 if (BM_tab[*cursor] == 0)
1894 goto hit;
1895 cursor += BM_tab[*cursor];
1896 }
1897 }
1898 else
1899 {
1900 while (cursor >= p_limit)
1901 {
1902 if (BM_tab[*cursor] == 0)
1903 goto hit;
1904 cursor += BM_tab[*cursor];
1905 }
1906 }
1907 /* If you are here, cursor is beyond the end of the
1908 searched region. You fail to match within the
1909 permitted region and would otherwise try a character
1910 beyond that region. */
1911 break;
1912
1913 hit:
1914 i = dirlen - direction;
1915 if (! NILP (trt))
1916 {
1917 while ((i -= direction) + direction != 0)
1918 {
1919 int ch;
1920 cursor -= direction;
1921 /* Translate only the last byte of a character. */
1922 if (! multibyte
1923 || ((cursor == tail_end_ptr
1924 || CHAR_HEAD_P (cursor[1]))
1925 && (CHAR_HEAD_P (cursor[0])
1926 /* Check if this is the last byte of
1927 a translable character. */
1928 || (translate_prev_byte1 == cursor[-1]
1929 && (CHAR_HEAD_P (translate_prev_byte1)
1930 || (translate_prev_byte2 == cursor[-2]
1931 && (CHAR_HEAD_P (translate_prev_byte2)
1932 || (translate_prev_byte3 == cursor[-3]))))))))
1933 ch = simple_translate[*cursor];
1934 else
1935 ch = *cursor;
1936 if (pat[i] != ch)
1937 break;
1938 }
1939 }
1940 else
1941 {
1942 while ((i -= direction) + direction != 0)
1943 {
1944 cursor -= direction;
1945 if (pat[i] != *cursor)
1946 break;
1947 }
1948 }
1949 cursor += dirlen - i - direction; /* fix cursor */
1950 if (i + direction == 0)
1951 {
1952 EMACS_INT position, start, end;
1953
1954 cursor -= direction;
1955
1956 position = pos_byte + cursor - p2 + ((direction > 0)
1957 ? 1 - len_byte : 0);
1958 set_search_regs (position, len_byte);
1959
1960 if (NILP (Vinhibit_changing_match_data))
1961 {
1962 start = search_regs.start[0];
1963 end = search_regs.end[0];
1964 }
1965 else
1966 /* If Vinhibit_changing_match_data is non-nil,
1967 search_regs will not be changed. So let's
1968 compute start and end here. */
1969 {
1970 start = BYTE_TO_CHAR (position);
1971 end = BYTE_TO_CHAR (position + len_byte);
1972 }
1973
1974 if ((n -= direction) != 0)
1975 cursor += dirlen; /* to resume search */
1976 else
1977 return direction > 0 ? end : start;
1978 }
1979 else
1980 cursor += stride_for_teases; /* <sigh> we lose - */
1981 }
1982 pos_byte += cursor - p2;
1983 }
1984 else
1985 /* Now we'll pick up a clump that has to be done the hard
1986 way because it covers a discontinuity. */
1987 {
1988 limit = ((direction > 0)
1989 ? BUFFER_CEILING_OF (pos_byte - dirlen + 1)
1990 : BUFFER_FLOOR_OF (pos_byte - dirlen - 1));
1991 limit = ((direction > 0)
1992 ? min (limit + len_byte, lim_byte - 1)
1993 : max (limit - len_byte, lim_byte));
1994 /* LIMIT is now the last value POS_BYTE can have
1995 and still be valid for a possible match. */
1996 while (1)
1997 {
1998 /* This loop can be coded for space rather than
1999 speed because it will usually run only once.
2000 (the reach is at most len + 21, and typically
2001 does not exceed len). */
2002 while ((limit - pos_byte) * direction >= 0)
2003 {
2004 int ch = FETCH_BYTE (pos_byte);
2005 if (BM_tab[ch] == 0)
2006 goto hit2;
2007 pos_byte += BM_tab[ch];
2008 }
2009 break; /* ran off the end */
2010
2011 hit2:
2012 /* Found what might be a match. */
2013 i = dirlen - direction;
2014 while ((i -= direction) + direction != 0)
2015 {
2016 int ch;
2017 unsigned char *ptr;
2018 pos_byte -= direction;
2019 ptr = BYTE_POS_ADDR (pos_byte);
2020 /* Translate only the last byte of a character. */
2021 if (! multibyte
2022 || ((ptr == tail_end_ptr
2023 || CHAR_HEAD_P (ptr[1]))
2024 && (CHAR_HEAD_P (ptr[0])
2025 /* Check if this is the last byte of a
2026 translable character. */
2027 || (translate_prev_byte1 == ptr[-1]
2028 && (CHAR_HEAD_P (translate_prev_byte1)
2029 || (translate_prev_byte2 == ptr[-2]
2030 && (CHAR_HEAD_P (translate_prev_byte2)
2031 || translate_prev_byte3 == ptr[-3])))))))
2032 ch = simple_translate[*ptr];
2033 else
2034 ch = *ptr;
2035 if (pat[i] != ch)
2036 break;
2037 }
2038 /* Above loop has moved POS_BYTE part or all the way
2039 back to the first pos (last pos if reverse).
2040 Set it once again at the last (first if reverse) char. */
2041 pos_byte += dirlen - i - direction;
2042 if (i + direction == 0)
2043 {
2044 EMACS_INT position, start, end;
2045 pos_byte -= direction;
2046
2047 position = pos_byte + ((direction > 0) ? 1 - len_byte : 0);
2048 set_search_regs (position, len_byte);
2049
2050 if (NILP (Vinhibit_changing_match_data))
2051 {
2052 start = search_regs.start[0];
2053 end = search_regs.end[0];
2054 }
2055 else
2056 /* If Vinhibit_changing_match_data is non-nil,
2057 search_regs will not be changed. So let's
2058 compute start and end here. */
2059 {
2060 start = BYTE_TO_CHAR (position);
2061 end = BYTE_TO_CHAR (position + len_byte);
2062 }
2063
2064 if ((n -= direction) != 0)
2065 pos_byte += dirlen; /* to resume search */
2066 else
2067 return direction > 0 ? end : start;
2068 }
2069 else
2070 pos_byte += stride_for_teases;
2071 }
2072 }
2073 /* We have done one clump. Can we continue? */
2074 if ((lim_byte - pos_byte) * direction < 0)
2075 return ((0 - n) * direction);
2076 }
2077 return BYTE_TO_CHAR (pos_byte);
2078 }
2079
2080 /* Record beginning BEG_BYTE and end BEG_BYTE + NBYTES
2081 for the overall match just found in the current buffer.
2082 Also clear out the match data for registers 1 and up. */
2083
2084 static void
2085 set_search_regs (EMACS_INT beg_byte, EMACS_INT nbytes)
2086 {
2087 int i;
2088
2089 if (!NILP (Vinhibit_changing_match_data))
2090 return;
2091
2092 /* Make sure we have registers in which to store
2093 the match position. */
2094 if (search_regs.num_regs == 0)
2095 {
2096 search_regs.start = (regoff_t *) xmalloc (2 * sizeof (regoff_t));
2097 search_regs.end = (regoff_t *) xmalloc (2 * sizeof (regoff_t));
2098 search_regs.num_regs = 2;
2099 }
2100
2101 /* Clear out the other registers. */
2102 for (i = 1; i < search_regs.num_regs; i++)
2103 {
2104 search_regs.start[i] = -1;
2105 search_regs.end[i] = -1;
2106 }
2107
2108 search_regs.start[0] = BYTE_TO_CHAR (beg_byte);
2109 search_regs.end[0] = BYTE_TO_CHAR (beg_byte + nbytes);
2110 XSETBUFFER (last_thing_searched, current_buffer);
2111 }
2112 \f
2113 /* Given STRING, a string of words separated by word delimiters,
2114 compute a regexp that matches those exact words separated by
2115 arbitrary punctuation. If LAX is nonzero, the end of the string
2116 need not match a word boundary unless it ends in whitespace. */
2117
2118 static Lisp_Object
2119 wordify (Lisp_Object string, int lax)
2120 {
2121 register unsigned char *p, *o;
2122 register int i, i_byte, len, punct_count = 0, word_count = 0;
2123 Lisp_Object val;
2124 int prev_c = 0;
2125 int adjust, whitespace_at_end;
2126
2127 CHECK_STRING (string);
2128 p = SDATA (string);
2129 len = SCHARS (string);
2130
2131 for (i = 0, i_byte = 0; i < len; )
2132 {
2133 int c;
2134
2135 FETCH_STRING_CHAR_AS_MULTIBYTE_ADVANCE (c, string, i, i_byte);
2136
2137 if (SYNTAX (c) != Sword)
2138 {
2139 punct_count++;
2140 if (i > 0 && SYNTAX (prev_c) == Sword)
2141 word_count++;
2142 }
2143
2144 prev_c = c;
2145 }
2146
2147 if (SYNTAX (prev_c) == Sword)
2148 {
2149 word_count++;
2150 whitespace_at_end = 0;
2151 }
2152 else
2153 whitespace_at_end = 1;
2154
2155 if (!word_count)
2156 return empty_unibyte_string;
2157
2158 adjust = - punct_count + 5 * (word_count - 1)
2159 + ((lax && !whitespace_at_end) ? 2 : 4);
2160 if (STRING_MULTIBYTE (string))
2161 val = make_uninit_multibyte_string (len + adjust,
2162 SBYTES (string)
2163 + adjust);
2164 else
2165 val = make_uninit_string (len + adjust);
2166
2167 o = SDATA (val);
2168 *o++ = '\\';
2169 *o++ = 'b';
2170 prev_c = 0;
2171
2172 for (i = 0, i_byte = 0; i < len; )
2173 {
2174 int c;
2175 int i_byte_orig = i_byte;
2176
2177 FETCH_STRING_CHAR_AS_MULTIBYTE_ADVANCE (c, string, i, i_byte);
2178
2179 if (SYNTAX (c) == Sword)
2180 {
2181 bcopy (SDATA (string) + i_byte_orig, o,
2182 i_byte - i_byte_orig);
2183 o += i_byte - i_byte_orig;
2184 }
2185 else if (i > 0 && SYNTAX (prev_c) == Sword && --word_count)
2186 {
2187 *o++ = '\\';
2188 *o++ = 'W';
2189 *o++ = '\\';
2190 *o++ = 'W';
2191 *o++ = '*';
2192 }
2193
2194 prev_c = c;
2195 }
2196
2197 if (!lax || whitespace_at_end)
2198 {
2199 *o++ = '\\';
2200 *o++ = 'b';
2201 }
2202
2203 return val;
2204 }
2205 \f
2206 DEFUN ("search-backward", Fsearch_backward, Ssearch_backward, 1, 4,
2207 "MSearch backward: ",
2208 doc: /* Search backward from point for STRING.
2209 Set point to the beginning of the occurrence found, and return point.
2210 An optional second argument bounds the search; it is a buffer position.
2211 The match found must not extend before that position.
2212 Optional third argument, if t, means if fail just return nil (no error).
2213 If not nil and not t, position at limit of search and return nil.
2214 Optional fourth argument is repeat count--search for successive occurrences.
2215
2216 Search case-sensitivity is determined by the value of the variable
2217 `case-fold-search', which see.
2218
2219 See also the functions `match-beginning', `match-end' and `replace-match'. */)
2220 (string, bound, noerror, count)
2221 Lisp_Object string, bound, noerror, count;
2222 {
2223 return search_command (string, bound, noerror, count, -1, 0, 0);
2224 }
2225
2226 DEFUN ("search-forward", Fsearch_forward, Ssearch_forward, 1, 4, "MSearch: ",
2227 doc: /* Search forward from point for STRING.
2228 Set point to the end of the occurrence found, and return point.
2229 An optional second argument bounds the search; it is a buffer position.
2230 The match found must not extend after that position. A value of nil is
2231 equivalent to (point-max).
2232 Optional third argument, if t, means if fail just return nil (no error).
2233 If not nil and not t, move to limit of search and return nil.
2234 Optional fourth argument is repeat count--search for successive occurrences.
2235
2236 Search case-sensitivity is determined by the value of the variable
2237 `case-fold-search', which see.
2238
2239 See also the functions `match-beginning', `match-end' and `replace-match'. */)
2240 (string, bound, noerror, count)
2241 Lisp_Object string, bound, noerror, count;
2242 {
2243 return search_command (string, bound, noerror, count, 1, 0, 0);
2244 }
2245
2246 DEFUN ("word-search-backward", Fword_search_backward, Sword_search_backward, 1, 4,
2247 "sWord search backward: ",
2248 doc: /* Search backward from point for STRING, ignoring differences in punctuation.
2249 Set point to the beginning of the occurrence found, and return point.
2250 An optional second argument bounds the search; it is a buffer position.
2251 The match found must not extend before that position.
2252 Optional third argument, if t, means if fail just return nil (no error).
2253 If not nil and not t, move to limit of search and return nil.
2254 Optional fourth argument is repeat count--search for successive occurrences. */)
2255 (string, bound, noerror, count)
2256 Lisp_Object string, bound, noerror, count;
2257 {
2258 return search_command (wordify (string, 0), bound, noerror, count, -1, 1, 0);
2259 }
2260
2261 DEFUN ("word-search-forward", Fword_search_forward, Sword_search_forward, 1, 4,
2262 "sWord search: ",
2263 doc: /* Search forward from point for STRING, ignoring differences in punctuation.
2264 Set point to the end of the occurrence found, and return point.
2265 An optional second argument bounds the search; it is a buffer position.
2266 The match found must not extend after that position.
2267 Optional third argument, if t, means if fail just return nil (no error).
2268 If not nil and not t, move to limit of search and return nil.
2269 Optional fourth argument is repeat count--search for successive occurrences. */)
2270 (string, bound, noerror, count)
2271 Lisp_Object string, bound, noerror, count;
2272 {
2273 return search_command (wordify (string, 0), bound, noerror, count, 1, 1, 0);
2274 }
2275
2276 DEFUN ("word-search-backward-lax", Fword_search_backward_lax, Sword_search_backward_lax, 1, 4,
2277 "sWord search backward: ",
2278 doc: /* Search backward from point for STRING, ignoring differences in punctuation.
2279 Set point to the beginning of the occurrence found, and return point.
2280
2281 Unlike `word-search-backward', the end of STRING need not match a word
2282 boundary unless it ends in whitespace.
2283
2284 An optional second argument bounds the search; it is a buffer position.
2285 The match found must not extend before that position.
2286 Optional third argument, if t, means if fail just return nil (no error).
2287 If not nil and not t, move to limit of search and return nil.
2288 Optional fourth argument is repeat count--search for successive occurrences. */)
2289 (string, bound, noerror, count)
2290 Lisp_Object string, bound, noerror, count;
2291 {
2292 return search_command (wordify (string, 1), bound, noerror, count, -1, 1, 0);
2293 }
2294
2295 DEFUN ("word-search-forward-lax", Fword_search_forward_lax, Sword_search_forward_lax, 1, 4,
2296 "sWord search: ",
2297 doc: /* Search forward from point for STRING, ignoring differences in punctuation.
2298 Set point to the end of the occurrence found, and return point.
2299
2300 Unlike `word-search-forward', the end of STRING need not match a word
2301 boundary unless it ends in whitespace.
2302
2303 An optional second argument bounds the search; it is a buffer position.
2304 The match found must not extend after that position.
2305 Optional third argument, if t, means if fail just return nil (no error).
2306 If not nil and not t, move to limit of search and return nil.
2307 Optional fourth argument is repeat count--search for successive occurrences. */)
2308 (string, bound, noerror, count)
2309 Lisp_Object string, bound, noerror, count;
2310 {
2311 return search_command (wordify (string, 1), bound, noerror, count, 1, 1, 0);
2312 }
2313
2314 DEFUN ("re-search-backward", Fre_search_backward, Sre_search_backward, 1, 4,
2315 "sRE search backward: ",
2316 doc: /* Search backward from point for match for regular expression REGEXP.
2317 Set point to the beginning of the match, and return point.
2318 The match found is the one starting last in the buffer
2319 and yet ending before the origin of the search.
2320 An optional second argument bounds the search; it is a buffer position.
2321 The match found must start at or after that position.
2322 Optional third argument, if t, means if fail just return nil (no error).
2323 If not nil and not t, move to limit of search and return nil.
2324 Optional fourth argument is repeat count--search for successive occurrences.
2325 See also the functions `match-beginning', `match-end', `match-string',
2326 and `replace-match'. */)
2327 (regexp, bound, noerror, count)
2328 Lisp_Object regexp, bound, noerror, count;
2329 {
2330 return search_command (regexp, bound, noerror, count, -1, 1, 0);
2331 }
2332
2333 DEFUN ("re-search-forward", Fre_search_forward, Sre_search_forward, 1, 4,
2334 "sRE search: ",
2335 doc: /* Search forward from point for regular expression REGEXP.
2336 Set point to the end of the occurrence found, and return point.
2337 An optional second argument bounds the search; it is a buffer position.
2338 The match found must not extend after that position.
2339 Optional third argument, if t, means if fail just return nil (no error).
2340 If not nil and not t, move to limit of search and return nil.
2341 Optional fourth argument is repeat count--search for successive occurrences.
2342 See also the functions `match-beginning', `match-end', `match-string',
2343 and `replace-match'. */)
2344 (regexp, bound, noerror, count)
2345 Lisp_Object regexp, bound, noerror, count;
2346 {
2347 return search_command (regexp, bound, noerror, count, 1, 1, 0);
2348 }
2349
2350 DEFUN ("posix-search-backward", Fposix_search_backward, Sposix_search_backward, 1, 4,
2351 "sPosix search backward: ",
2352 doc: /* Search backward from point for match for regular expression REGEXP.
2353 Find the longest match in accord with Posix regular expression rules.
2354 Set point to the beginning of the match, and return point.
2355 The match found is the one starting last in the buffer
2356 and yet ending before the origin of the search.
2357 An optional second argument bounds the search; it is a buffer position.
2358 The match found must start at or after that position.
2359 Optional third argument, if t, means if fail just return nil (no error).
2360 If not nil and not t, move to limit of search and return nil.
2361 Optional fourth argument is repeat count--search for successive occurrences.
2362 See also the functions `match-beginning', `match-end', `match-string',
2363 and `replace-match'. */)
2364 (regexp, bound, noerror, count)
2365 Lisp_Object regexp, bound, noerror, count;
2366 {
2367 return search_command (regexp, bound, noerror, count, -1, 1, 1);
2368 }
2369
2370 DEFUN ("posix-search-forward", Fposix_search_forward, Sposix_search_forward, 1, 4,
2371 "sPosix search: ",
2372 doc: /* Search forward from point for regular expression REGEXP.
2373 Find the longest match in accord with Posix regular expression rules.
2374 Set point to the end of the occurrence found, and return point.
2375 An optional second argument bounds the search; it is a buffer position.
2376 The match found must not extend after that position.
2377 Optional third argument, if t, means if fail just return nil (no error).
2378 If not nil and not t, move to limit of search and return nil.
2379 Optional fourth argument is repeat count--search for successive occurrences.
2380 See also the functions `match-beginning', `match-end', `match-string',
2381 and `replace-match'. */)
2382 (regexp, bound, noerror, count)
2383 Lisp_Object regexp, bound, noerror, count;
2384 {
2385 return search_command (regexp, bound, noerror, count, 1, 1, 1);
2386 }
2387 \f
2388 DEFUN ("replace-match", Freplace_match, Sreplace_match, 1, 5, 0,
2389 doc: /* Replace text matched by last search with NEWTEXT.
2390 Leave point at the end of the replacement text.
2391
2392 If second arg FIXEDCASE is non-nil, do not alter case of replacement text.
2393 Otherwise maybe capitalize the whole text, or maybe just word initials,
2394 based on the replaced text.
2395 If the replaced text has only capital letters
2396 and has at least one multiletter word, convert NEWTEXT to all caps.
2397 Otherwise if all words are capitalized in the replaced text,
2398 capitalize each word in NEWTEXT.
2399
2400 If third arg LITERAL is non-nil, insert NEWTEXT literally.
2401 Otherwise treat `\\' as special:
2402 `\\&' in NEWTEXT means substitute original matched text.
2403 `\\N' means substitute what matched the Nth `\\(...\\)'.
2404 If Nth parens didn't match, substitute nothing.
2405 `\\\\' means insert one `\\'.
2406 Case conversion does not apply to these substitutions.
2407
2408 FIXEDCASE and LITERAL are optional arguments.
2409
2410 The optional fourth argument STRING can be a string to modify.
2411 This is meaningful when the previous match was done against STRING,
2412 using `string-match'. When used this way, `replace-match'
2413 creates and returns a new string made by copying STRING and replacing
2414 the part of STRING that was matched.
2415
2416 The optional fifth argument SUBEXP specifies a subexpression;
2417 it says to replace just that subexpression with NEWTEXT,
2418 rather than replacing the entire matched text.
2419 This is, in a vague sense, the inverse of using `\\N' in NEWTEXT;
2420 `\\N' copies subexp N into NEWTEXT, but using N as SUBEXP puts
2421 NEWTEXT in place of subexp N.
2422 This is useful only after a regular expression search or match,
2423 since only regular expressions have distinguished subexpressions. */)
2424 (newtext, fixedcase, literal, string, subexp)
2425 Lisp_Object newtext, fixedcase, literal, string, subexp;
2426 {
2427 enum { nochange, all_caps, cap_initial } case_action;
2428 register int pos, pos_byte;
2429 int some_multiletter_word;
2430 int some_lowercase;
2431 int some_uppercase;
2432 int some_nonuppercase_initial;
2433 register int c, prevc;
2434 int sub;
2435 EMACS_INT opoint, newpoint;
2436
2437 CHECK_STRING (newtext);
2438
2439 if (! NILP (string))
2440 CHECK_STRING (string);
2441
2442 case_action = nochange; /* We tried an initialization */
2443 /* but some C compilers blew it */
2444
2445 if (search_regs.num_regs <= 0)
2446 error ("`replace-match' called before any match found");
2447
2448 if (NILP (subexp))
2449 sub = 0;
2450 else
2451 {
2452 CHECK_NUMBER (subexp);
2453 sub = XINT (subexp);
2454 if (sub < 0 || sub >= search_regs.num_regs)
2455 args_out_of_range (subexp, make_number (search_regs.num_regs));
2456 }
2457
2458 if (NILP (string))
2459 {
2460 if (search_regs.start[sub] < BEGV
2461 || search_regs.start[sub] > search_regs.end[sub]
2462 || search_regs.end[sub] > ZV)
2463 args_out_of_range (make_number (search_regs.start[sub]),
2464 make_number (search_regs.end[sub]));
2465 }
2466 else
2467 {
2468 if (search_regs.start[sub] < 0
2469 || search_regs.start[sub] > search_regs.end[sub]
2470 || search_regs.end[sub] > SCHARS (string))
2471 args_out_of_range (make_number (search_regs.start[sub]),
2472 make_number (search_regs.end[sub]));
2473 }
2474
2475 if (NILP (fixedcase))
2476 {
2477 /* Decide how to casify by examining the matched text. */
2478 EMACS_INT last;
2479
2480 pos = search_regs.start[sub];
2481 last = search_regs.end[sub];
2482
2483 if (NILP (string))
2484 pos_byte = CHAR_TO_BYTE (pos);
2485 else
2486 pos_byte = string_char_to_byte (string, pos);
2487
2488 prevc = '\n';
2489 case_action = all_caps;
2490
2491 /* some_multiletter_word is set nonzero if any original word
2492 is more than one letter long. */
2493 some_multiletter_word = 0;
2494 some_lowercase = 0;
2495 some_nonuppercase_initial = 0;
2496 some_uppercase = 0;
2497
2498 while (pos < last)
2499 {
2500 if (NILP (string))
2501 {
2502 c = FETCH_CHAR_AS_MULTIBYTE (pos_byte);
2503 INC_BOTH (pos, pos_byte);
2504 }
2505 else
2506 FETCH_STRING_CHAR_AS_MULTIBYTE_ADVANCE (c, string, pos, pos_byte);
2507
2508 if (LOWERCASEP (c))
2509 {
2510 /* Cannot be all caps if any original char is lower case */
2511
2512 some_lowercase = 1;
2513 if (SYNTAX (prevc) != Sword)
2514 some_nonuppercase_initial = 1;
2515 else
2516 some_multiletter_word = 1;
2517 }
2518 else if (UPPERCASEP (c))
2519 {
2520 some_uppercase = 1;
2521 if (SYNTAX (prevc) != Sword)
2522 ;
2523 else
2524 some_multiletter_word = 1;
2525 }
2526 else
2527 {
2528 /* If the initial is a caseless word constituent,
2529 treat that like a lowercase initial. */
2530 if (SYNTAX (prevc) != Sword)
2531 some_nonuppercase_initial = 1;
2532 }
2533
2534 prevc = c;
2535 }
2536
2537 /* Convert to all caps if the old text is all caps
2538 and has at least one multiletter word. */
2539 if (! some_lowercase && some_multiletter_word)
2540 case_action = all_caps;
2541 /* Capitalize each word, if the old text has all capitalized words. */
2542 else if (!some_nonuppercase_initial && some_multiletter_word)
2543 case_action = cap_initial;
2544 else if (!some_nonuppercase_initial && some_uppercase)
2545 /* Should x -> yz, operating on X, give Yz or YZ?
2546 We'll assume the latter. */
2547 case_action = all_caps;
2548 else
2549 case_action = nochange;
2550 }
2551
2552 /* Do replacement in a string. */
2553 if (!NILP (string))
2554 {
2555 Lisp_Object before, after;
2556
2557 before = Fsubstring (string, make_number (0),
2558 make_number (search_regs.start[sub]));
2559 after = Fsubstring (string, make_number (search_regs.end[sub]), Qnil);
2560
2561 /* Substitute parts of the match into NEWTEXT
2562 if desired. */
2563 if (NILP (literal))
2564 {
2565 EMACS_INT lastpos = 0;
2566 EMACS_INT lastpos_byte = 0;
2567 /* We build up the substituted string in ACCUM. */
2568 Lisp_Object accum;
2569 Lisp_Object middle;
2570 int length = SBYTES (newtext);
2571
2572 accum = Qnil;
2573
2574 for (pos_byte = 0, pos = 0; pos_byte < length;)
2575 {
2576 int substart = -1;
2577 int subend = 0;
2578 int delbackslash = 0;
2579
2580 FETCH_STRING_CHAR_ADVANCE (c, newtext, pos, pos_byte);
2581
2582 if (c == '\\')
2583 {
2584 FETCH_STRING_CHAR_ADVANCE (c, newtext, pos, pos_byte);
2585
2586 if (c == '&')
2587 {
2588 substart = search_regs.start[sub];
2589 subend = search_regs.end[sub];
2590 }
2591 else if (c >= '1' && c <= '9')
2592 {
2593 if (search_regs.start[c - '0'] >= 0
2594 && c <= search_regs.num_regs + '0')
2595 {
2596 substart = search_regs.start[c - '0'];
2597 subend = search_regs.end[c - '0'];
2598 }
2599 else
2600 {
2601 /* If that subexp did not match,
2602 replace \\N with nothing. */
2603 substart = 0;
2604 subend = 0;
2605 }
2606 }
2607 else if (c == '\\')
2608 delbackslash = 1;
2609 else
2610 error ("Invalid use of `\\' in replacement text");
2611 }
2612 if (substart >= 0)
2613 {
2614 if (pos - 2 != lastpos)
2615 middle = substring_both (newtext, lastpos,
2616 lastpos_byte,
2617 pos - 2, pos_byte - 2);
2618 else
2619 middle = Qnil;
2620 accum = concat3 (accum, middle,
2621 Fsubstring (string,
2622 make_number (substart),
2623 make_number (subend)));
2624 lastpos = pos;
2625 lastpos_byte = pos_byte;
2626 }
2627 else if (delbackslash)
2628 {
2629 middle = substring_both (newtext, lastpos,
2630 lastpos_byte,
2631 pos - 1, pos_byte - 1);
2632
2633 accum = concat2 (accum, middle);
2634 lastpos = pos;
2635 lastpos_byte = pos_byte;
2636 }
2637 }
2638
2639 if (pos != lastpos)
2640 middle = substring_both (newtext, lastpos,
2641 lastpos_byte,
2642 pos, pos_byte);
2643 else
2644 middle = Qnil;
2645
2646 newtext = concat2 (accum, middle);
2647 }
2648
2649 /* Do case substitution in NEWTEXT if desired. */
2650 if (case_action == all_caps)
2651 newtext = Fupcase (newtext);
2652 else if (case_action == cap_initial)
2653 newtext = Fupcase_initials (newtext);
2654
2655 return concat3 (before, newtext, after);
2656 }
2657
2658 /* Record point, then move (quietly) to the start of the match. */
2659 if (PT >= search_regs.end[sub])
2660 opoint = PT - ZV;
2661 else if (PT > search_regs.start[sub])
2662 opoint = search_regs.end[sub] - ZV;
2663 else
2664 opoint = PT;
2665
2666 /* If we want non-literal replacement,
2667 perform substitution on the replacement string. */
2668 if (NILP (literal))
2669 {
2670 int length = SBYTES (newtext);
2671 unsigned char *substed;
2672 int substed_alloc_size, substed_len;
2673 int buf_multibyte = !NILP (current_buffer->enable_multibyte_characters);
2674 int str_multibyte = STRING_MULTIBYTE (newtext);
2675 Lisp_Object rev_tbl;
2676 int really_changed = 0;
2677
2678 rev_tbl = Qnil;
2679
2680 substed_alloc_size = length * 2 + 100;
2681 substed = (unsigned char *) xmalloc (substed_alloc_size + 1);
2682 substed_len = 0;
2683
2684 /* Go thru NEWTEXT, producing the actual text to insert in
2685 SUBSTED while adjusting multibyteness to that of the current
2686 buffer. */
2687
2688 for (pos_byte = 0, pos = 0; pos_byte < length;)
2689 {
2690 unsigned char str[MAX_MULTIBYTE_LENGTH];
2691 unsigned char *add_stuff = NULL;
2692 int add_len = 0;
2693 int idx = -1;
2694
2695 if (str_multibyte)
2696 {
2697 FETCH_STRING_CHAR_ADVANCE_NO_CHECK (c, newtext, pos, pos_byte);
2698 if (!buf_multibyte)
2699 c = multibyte_char_to_unibyte (c, rev_tbl);
2700 }
2701 else
2702 {
2703 /* Note that we don't have to increment POS. */
2704 c = SREF (newtext, pos_byte++);
2705 if (buf_multibyte)
2706 MAKE_CHAR_MULTIBYTE (c);
2707 }
2708
2709 /* Either set ADD_STUFF and ADD_LEN to the text to put in SUBSTED,
2710 or set IDX to a match index, which means put that part
2711 of the buffer text into SUBSTED. */
2712
2713 if (c == '\\')
2714 {
2715 really_changed = 1;
2716
2717 if (str_multibyte)
2718 {
2719 FETCH_STRING_CHAR_ADVANCE_NO_CHECK (c, newtext,
2720 pos, pos_byte);
2721 if (!buf_multibyte && !ASCII_CHAR_P (c))
2722 c = multibyte_char_to_unibyte (c, rev_tbl);
2723 }
2724 else
2725 {
2726 c = SREF (newtext, pos_byte++);
2727 if (buf_multibyte)
2728 MAKE_CHAR_MULTIBYTE (c);
2729 }
2730
2731 if (c == '&')
2732 idx = sub;
2733 else if (c >= '1' && c <= '9' && c <= search_regs.num_regs + '0')
2734 {
2735 if (search_regs.start[c - '0'] >= 1)
2736 idx = c - '0';
2737 }
2738 else if (c == '\\')
2739 add_len = 1, add_stuff = "\\";
2740 else
2741 {
2742 xfree (substed);
2743 error ("Invalid use of `\\' in replacement text");
2744 }
2745 }
2746 else
2747 {
2748 add_len = CHAR_STRING (c, str);
2749 add_stuff = str;
2750 }
2751
2752 /* If we want to copy part of a previous match,
2753 set up ADD_STUFF and ADD_LEN to point to it. */
2754 if (idx >= 0)
2755 {
2756 EMACS_INT begbyte = CHAR_TO_BYTE (search_regs.start[idx]);
2757 add_len = CHAR_TO_BYTE (search_regs.end[idx]) - begbyte;
2758 if (search_regs.start[idx] < GPT && GPT < search_regs.end[idx])
2759 move_gap (search_regs.start[idx]);
2760 add_stuff = BYTE_POS_ADDR (begbyte);
2761 }
2762
2763 /* Now the stuff we want to add to SUBSTED
2764 is invariably ADD_LEN bytes starting at ADD_STUFF. */
2765
2766 /* Make sure SUBSTED is big enough. */
2767 if (substed_len + add_len >= substed_alloc_size)
2768 {
2769 substed_alloc_size = substed_len + add_len + 500;
2770 substed = (unsigned char *) xrealloc (substed,
2771 substed_alloc_size + 1);
2772 }
2773
2774 /* Now add to the end of SUBSTED. */
2775 if (add_stuff)
2776 {
2777 bcopy (add_stuff, substed + substed_len, add_len);
2778 substed_len += add_len;
2779 }
2780 }
2781
2782 if (really_changed)
2783 {
2784 if (buf_multibyte)
2785 {
2786 int nchars = multibyte_chars_in_text (substed, substed_len);
2787
2788 newtext = make_multibyte_string (substed, nchars, substed_len);
2789 }
2790 else
2791 newtext = make_unibyte_string (substed, substed_len);
2792 }
2793 xfree (substed);
2794 }
2795
2796 /* Replace the old text with the new in the cleanest possible way. */
2797 replace_range (search_regs.start[sub], search_regs.end[sub],
2798 newtext, 1, 0, 1);
2799 newpoint = search_regs.start[sub] + SCHARS (newtext);
2800
2801 if (case_action == all_caps)
2802 Fupcase_region (make_number (search_regs.start[sub]),
2803 make_number (newpoint));
2804 else if (case_action == cap_initial)
2805 Fupcase_initials_region (make_number (search_regs.start[sub]),
2806 make_number (newpoint));
2807
2808 /* Adjust search data for this change. */
2809 {
2810 EMACS_INT oldend = search_regs.end[sub];
2811 EMACS_INT oldstart = search_regs.start[sub];
2812 EMACS_INT change = newpoint - search_regs.end[sub];
2813 int i;
2814
2815 for (i = 0; i < search_regs.num_regs; i++)
2816 {
2817 if (search_regs.start[i] >= oldend)
2818 search_regs.start[i] += change;
2819 else if (search_regs.start[i] > oldstart)
2820 search_regs.start[i] = oldstart;
2821 if (search_regs.end[i] >= oldend)
2822 search_regs.end[i] += change;
2823 else if (search_regs.end[i] > oldstart)
2824 search_regs.end[i] = oldstart;
2825 }
2826 }
2827
2828 /* Put point back where it was in the text. */
2829 if (opoint <= 0)
2830 TEMP_SET_PT (opoint + ZV);
2831 else
2832 TEMP_SET_PT (opoint);
2833
2834 /* Now move point "officially" to the start of the inserted replacement. */
2835 move_if_not_intangible (newpoint);
2836
2837 return Qnil;
2838 }
2839 \f
2840 static Lisp_Object
2841 match_limit (Lisp_Object num, int beginningp)
2842 {
2843 register int n;
2844
2845 CHECK_NUMBER (num);
2846 n = XINT (num);
2847 if (n < 0)
2848 args_out_of_range (num, make_number (0));
2849 if (search_regs.num_regs <= 0)
2850 error ("No match data, because no search succeeded");
2851 if (n >= search_regs.num_regs
2852 || search_regs.start[n] < 0)
2853 return Qnil;
2854 return (make_number ((beginningp) ? search_regs.start[n]
2855 : search_regs.end[n]));
2856 }
2857
2858 DEFUN ("match-beginning", Fmatch_beginning, Smatch_beginning, 1, 1, 0,
2859 doc: /* Return position of start of text matched by last search.
2860 SUBEXP, a number, specifies which parenthesized expression in the last
2861 regexp.
2862 Value is nil if SUBEXPth pair didn't match, or there were less than
2863 SUBEXP pairs.
2864 Zero means the entire text matched by the whole regexp or whole string. */)
2865 (subexp)
2866 Lisp_Object subexp;
2867 {
2868 return match_limit (subexp, 1);
2869 }
2870
2871 DEFUN ("match-end", Fmatch_end, Smatch_end, 1, 1, 0,
2872 doc: /* Return position of end of text matched by last search.
2873 SUBEXP, a number, specifies which parenthesized expression in the last
2874 regexp.
2875 Value is nil if SUBEXPth pair didn't match, or there were less than
2876 SUBEXP pairs.
2877 Zero means the entire text matched by the whole regexp or whole string. */)
2878 (subexp)
2879 Lisp_Object subexp;
2880 {
2881 return match_limit (subexp, 0);
2882 }
2883
2884 DEFUN ("match-data", Fmatch_data, Smatch_data, 0, 3, 0,
2885 doc: /* Return a list containing all info on what the last search matched.
2886 Element 2N is `(match-beginning N)'; element 2N + 1 is `(match-end N)'.
2887 All the elements are markers or nil (nil if the Nth pair didn't match)
2888 if the last match was on a buffer; integers or nil if a string was matched.
2889 Use `set-match-data' to reinstate the data in this list.
2890
2891 If INTEGERS (the optional first argument) is non-nil, always use
2892 integers \(rather than markers) to represent buffer positions. In
2893 this case, and if the last match was in a buffer, the buffer will get
2894 stored as one additional element at the end of the list.
2895
2896 If REUSE is a list, reuse it as part of the value. If REUSE is long
2897 enough to hold all the values, and if INTEGERS is non-nil, no consing
2898 is done.
2899
2900 If optional third arg RESEAT is non-nil, any previous markers on the
2901 REUSE list will be modified to point to nowhere.
2902
2903 Return value is undefined if the last search failed. */)
2904 (integers, reuse, reseat)
2905 Lisp_Object integers, reuse, reseat;
2906 {
2907 Lisp_Object tail, prev;
2908 Lisp_Object *data;
2909 int i, len;
2910
2911 if (!NILP (reseat))
2912 for (tail = reuse; CONSP (tail); tail = XCDR (tail))
2913 if (MARKERP (XCAR (tail)))
2914 {
2915 unchain_marker (XMARKER (XCAR (tail)));
2916 XSETCAR (tail, Qnil);
2917 }
2918
2919 if (NILP (last_thing_searched))
2920 return Qnil;
2921
2922 prev = Qnil;
2923
2924 data = (Lisp_Object *) alloca ((2 * search_regs.num_regs + 1)
2925 * sizeof (Lisp_Object));
2926
2927 len = 0;
2928 for (i = 0; i < search_regs.num_regs; i++)
2929 {
2930 int start = search_regs.start[i];
2931 if (start >= 0)
2932 {
2933 if (EQ (last_thing_searched, Qt)
2934 || ! NILP (integers))
2935 {
2936 XSETFASTINT (data[2 * i], start);
2937 XSETFASTINT (data[2 * i + 1], search_regs.end[i]);
2938 }
2939 else if (BUFFERP (last_thing_searched))
2940 {
2941 data[2 * i] = Fmake_marker ();
2942 Fset_marker (data[2 * i],
2943 make_number (start),
2944 last_thing_searched);
2945 data[2 * i + 1] = Fmake_marker ();
2946 Fset_marker (data[2 * i + 1],
2947 make_number (search_regs.end[i]),
2948 last_thing_searched);
2949 }
2950 else
2951 /* last_thing_searched must always be Qt, a buffer, or Qnil. */
2952 abort ();
2953
2954 len = 2 * i + 2;
2955 }
2956 else
2957 data[2 * i] = data[2 * i + 1] = Qnil;
2958 }
2959
2960 if (BUFFERP (last_thing_searched) && !NILP (integers))
2961 {
2962 data[len] = last_thing_searched;
2963 len++;
2964 }
2965
2966 /* If REUSE is not usable, cons up the values and return them. */
2967 if (! CONSP (reuse))
2968 return Flist (len, data);
2969
2970 /* If REUSE is a list, store as many value elements as will fit
2971 into the elements of REUSE. */
2972 for (i = 0, tail = reuse; CONSP (tail);
2973 i++, tail = XCDR (tail))
2974 {
2975 if (i < len)
2976 XSETCAR (tail, data[i]);
2977 else
2978 XSETCAR (tail, Qnil);
2979 prev = tail;
2980 }
2981
2982 /* If we couldn't fit all value elements into REUSE,
2983 cons up the rest of them and add them to the end of REUSE. */
2984 if (i < len)
2985 XSETCDR (prev, Flist (len - i, data + i));
2986
2987 return reuse;
2988 }
2989
2990 /* We used to have an internal use variant of `reseat' described as:
2991
2992 If RESEAT is `evaporate', put the markers back on the free list
2993 immediately. No other references to the markers must exist in this
2994 case, so it is used only internally on the unwind stack and
2995 save-match-data from Lisp.
2996
2997 But it was ill-conceived: those supposedly-internal markers get exposed via
2998 the undo-list, so freeing them here is unsafe. */
2999
3000 DEFUN ("set-match-data", Fset_match_data, Sset_match_data, 1, 2, 0,
3001 doc: /* Set internal data on last search match from elements of LIST.
3002 LIST should have been created by calling `match-data' previously.
3003
3004 If optional arg RESEAT is non-nil, make markers on LIST point nowhere. */)
3005 (list, reseat)
3006 register Lisp_Object list, reseat;
3007 {
3008 register int i;
3009 register Lisp_Object marker;
3010
3011 if (running_asynch_code)
3012 save_search_regs ();
3013
3014 CHECK_LIST (list);
3015
3016 /* Unless we find a marker with a buffer or an explicit buffer
3017 in LIST, assume that this match data came from a string. */
3018 last_thing_searched = Qt;
3019
3020 /* Allocate registers if they don't already exist. */
3021 {
3022 int length = XFASTINT (Flength (list)) / 2;
3023
3024 if (length > search_regs.num_regs)
3025 {
3026 if (search_regs.num_regs == 0)
3027 {
3028 search_regs.start
3029 = (regoff_t *) xmalloc (length * sizeof (regoff_t));
3030 search_regs.end
3031 = (regoff_t *) xmalloc (length * sizeof (regoff_t));
3032 }
3033 else
3034 {
3035 search_regs.start
3036 = (regoff_t *) xrealloc (search_regs.start,
3037 length * sizeof (regoff_t));
3038 search_regs.end
3039 = (regoff_t *) xrealloc (search_regs.end,
3040 length * sizeof (regoff_t));
3041 }
3042
3043 for (i = search_regs.num_regs; i < length; i++)
3044 search_regs.start[i] = -1;
3045
3046 search_regs.num_regs = length;
3047 }
3048
3049 for (i = 0; CONSP (list); i++)
3050 {
3051 marker = XCAR (list);
3052 if (BUFFERP (marker))
3053 {
3054 last_thing_searched = marker;
3055 break;
3056 }
3057 if (i >= length)
3058 break;
3059 if (NILP (marker))
3060 {
3061 search_regs.start[i] = -1;
3062 list = XCDR (list);
3063 }
3064 else
3065 {
3066 EMACS_INT from;
3067 Lisp_Object m;
3068
3069 m = marker;
3070 if (MARKERP (marker))
3071 {
3072 if (XMARKER (marker)->buffer == 0)
3073 XSETFASTINT (marker, 0);
3074 else
3075 XSETBUFFER (last_thing_searched, XMARKER (marker)->buffer);
3076 }
3077
3078 CHECK_NUMBER_COERCE_MARKER (marker);
3079 from = XINT (marker);
3080
3081 if (!NILP (reseat) && MARKERP (m))
3082 {
3083 unchain_marker (XMARKER (m));
3084 XSETCAR (list, Qnil);
3085 }
3086
3087 if ((list = XCDR (list), !CONSP (list)))
3088 break;
3089
3090 m = marker = XCAR (list);
3091
3092 if (MARKERP (marker) && XMARKER (marker)->buffer == 0)
3093 XSETFASTINT (marker, 0);
3094
3095 CHECK_NUMBER_COERCE_MARKER (marker);
3096 search_regs.start[i] = from;
3097 search_regs.end[i] = XINT (marker);
3098
3099 if (!NILP (reseat) && MARKERP (m))
3100 {
3101 unchain_marker (XMARKER (m));
3102 XSETCAR (list, Qnil);
3103 }
3104 }
3105 list = XCDR (list);
3106 }
3107
3108 for (; i < search_regs.num_regs; i++)
3109 search_regs.start[i] = -1;
3110 }
3111
3112 return Qnil;
3113 }
3114
3115 /* If non-zero the match data have been saved in saved_search_regs
3116 during the execution of a sentinel or filter. */
3117 static int search_regs_saved;
3118 static struct re_registers saved_search_regs;
3119 static Lisp_Object saved_last_thing_searched;
3120
3121 /* Called from Flooking_at, Fstring_match, search_buffer, Fstore_match_data
3122 if asynchronous code (filter or sentinel) is running. */
3123 static void
3124 save_search_regs (void)
3125 {
3126 if (!search_regs_saved)
3127 {
3128 saved_search_regs.num_regs = search_regs.num_regs;
3129 saved_search_regs.start = search_regs.start;
3130 saved_search_regs.end = search_regs.end;
3131 saved_last_thing_searched = last_thing_searched;
3132 last_thing_searched = Qnil;
3133 search_regs.num_regs = 0;
3134 search_regs.start = 0;
3135 search_regs.end = 0;
3136
3137 search_regs_saved = 1;
3138 }
3139 }
3140
3141 /* Called upon exit from filters and sentinels. */
3142 void
3143 restore_search_regs (void)
3144 {
3145 if (search_regs_saved)
3146 {
3147 if (search_regs.num_regs > 0)
3148 {
3149 xfree (search_regs.start);
3150 xfree (search_regs.end);
3151 }
3152 search_regs.num_regs = saved_search_regs.num_regs;
3153 search_regs.start = saved_search_regs.start;
3154 search_regs.end = saved_search_regs.end;
3155 last_thing_searched = saved_last_thing_searched;
3156 saved_last_thing_searched = Qnil;
3157 search_regs_saved = 0;
3158 }
3159 }
3160
3161 static Lisp_Object
3162 unwind_set_match_data (Lisp_Object list)
3163 {
3164 /* It is NOT ALWAYS safe to free (evaporate) the markers immediately. */
3165 return Fset_match_data (list, Qt);
3166 }
3167
3168 /* Called to unwind protect the match data. */
3169 void
3170 record_unwind_save_match_data (void)
3171 {
3172 record_unwind_protect (unwind_set_match_data,
3173 Fmatch_data (Qnil, Qnil, Qnil));
3174 }
3175
3176 /* Quote a string to inactivate reg-expr chars */
3177
3178 DEFUN ("regexp-quote", Fregexp_quote, Sregexp_quote, 1, 1, 0,
3179 doc: /* Return a regexp string which matches exactly STRING and nothing else. */)
3180 (string)
3181 Lisp_Object string;
3182 {
3183 register unsigned char *in, *out, *end;
3184 register unsigned char *temp;
3185 int backslashes_added = 0;
3186
3187 CHECK_STRING (string);
3188
3189 temp = (unsigned char *) alloca (SBYTES (string) * 2);
3190
3191 /* Now copy the data into the new string, inserting escapes. */
3192
3193 in = SDATA (string);
3194 end = in + SBYTES (string);
3195 out = temp;
3196
3197 for (; in != end; in++)
3198 {
3199 if (*in == '['
3200 || *in == '*' || *in == '.' || *in == '\\'
3201 || *in == '?' || *in == '+'
3202 || *in == '^' || *in == '$')
3203 *out++ = '\\', backslashes_added++;
3204 *out++ = *in;
3205 }
3206
3207 return make_specified_string (temp,
3208 SCHARS (string) + backslashes_added,
3209 out - temp,
3210 STRING_MULTIBYTE (string));
3211 }
3212 \f
3213 void
3214 syms_of_search (void)
3215 {
3216 register int i;
3217
3218 for (i = 0; i < REGEXP_CACHE_SIZE; ++i)
3219 {
3220 searchbufs[i].buf.allocated = 100;
3221 searchbufs[i].buf.buffer = (unsigned char *) xmalloc (100);
3222 searchbufs[i].buf.fastmap = searchbufs[i].fastmap;
3223 searchbufs[i].regexp = Qnil;
3224 searchbufs[i].whitespace_regexp = Qnil;
3225 searchbufs[i].syntax_table = Qnil;
3226 staticpro (&searchbufs[i].regexp);
3227 staticpro (&searchbufs[i].whitespace_regexp);
3228 staticpro (&searchbufs[i].syntax_table);
3229 searchbufs[i].next = (i == REGEXP_CACHE_SIZE-1 ? 0 : &searchbufs[i+1]);
3230 }
3231 searchbuf_head = &searchbufs[0];
3232
3233 Qsearch_failed = intern_c_string ("search-failed");
3234 staticpro (&Qsearch_failed);
3235 Qinvalid_regexp = intern_c_string ("invalid-regexp");
3236 staticpro (&Qinvalid_regexp);
3237
3238 Fput (Qsearch_failed, Qerror_conditions,
3239 pure_cons (Qsearch_failed, pure_cons (Qerror, Qnil)));
3240 Fput (Qsearch_failed, Qerror_message,
3241 make_pure_c_string ("Search failed"));
3242
3243 Fput (Qinvalid_regexp, Qerror_conditions,
3244 pure_cons (Qinvalid_regexp, pure_cons (Qerror, Qnil)));
3245 Fput (Qinvalid_regexp, Qerror_message,
3246 make_pure_c_string ("Invalid regexp"));
3247
3248 last_thing_searched = Qnil;
3249 staticpro (&last_thing_searched);
3250
3251 saved_last_thing_searched = Qnil;
3252 staticpro (&saved_last_thing_searched);
3253
3254 DEFVAR_LISP ("search-spaces-regexp", &Vsearch_spaces_regexp,
3255 doc: /* Regexp to substitute for bunches of spaces in regexp search.
3256 Some commands use this for user-specified regexps.
3257 Spaces that occur inside character classes or repetition operators
3258 or other such regexp constructs are not replaced with this.
3259 A value of nil (which is the normal value) means treat spaces literally. */);
3260 Vsearch_spaces_regexp = Qnil;
3261
3262 DEFVAR_LISP ("inhibit-changing-match-data", &Vinhibit_changing_match_data,
3263 doc: /* Internal use only.
3264 If non-nil, the primitive searching and matching functions
3265 such as `looking-at', `string-match', `re-search-forward', etc.,
3266 do not set the match data. The proper way to use this variable
3267 is to bind it with `let' around a small expression. */);
3268 Vinhibit_changing_match_data = Qnil;
3269
3270 defsubr (&Slooking_at);
3271 defsubr (&Sposix_looking_at);
3272 defsubr (&Sstring_match);
3273 defsubr (&Sposix_string_match);
3274 defsubr (&Ssearch_forward);
3275 defsubr (&Ssearch_backward);
3276 defsubr (&Sword_search_forward);
3277 defsubr (&Sword_search_backward);
3278 defsubr (&Sword_search_forward_lax);
3279 defsubr (&Sword_search_backward_lax);
3280 defsubr (&Sre_search_forward);
3281 defsubr (&Sre_search_backward);
3282 defsubr (&Sposix_search_forward);
3283 defsubr (&Sposix_search_backward);
3284 defsubr (&Sreplace_match);
3285 defsubr (&Smatch_beginning);
3286 defsubr (&Smatch_end);
3287 defsubr (&Smatch_data);
3288 defsubr (&Sset_match_data);
3289 defsubr (&Sregexp_quote);
3290 }
3291
3292 /* arch-tag: a6059d79-0552-4f14-a2cb-d379a4e3c78f
3293 (do not change this comment) */