Fix -Wimplicit warnings.
[bpt/emacs.git] / src / search.c
1 /* String search routines for GNU Emacs.
2 Copyright (C) 1985, 86, 87, 93, 94, 97, 1998 Free Software Foundation, Inc.
3
4 This file is part of GNU Emacs.
5
6 GNU Emacs is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2, or (at your option)
9 any later version.
10
11 GNU Emacs is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
15
16 You should have received a copy of the GNU General Public License
17 along with GNU Emacs; see the file COPYING. If not, write to
18 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
19 Boston, MA 02111-1307, USA. */
20
21
22 #include <config.h>
23 #ifdef STDC_HEADERS
24 #include <stdlib.h>
25 #endif
26 #include "lisp.h"
27 #include "syntax.h"
28 #include "category.h"
29 #include "buffer.h"
30 #include "charset.h"
31 #include "region-cache.h"
32 #include "commands.h"
33 #include "blockinput.h"
34 #include "intervals.h"
35
36 #include <sys/types.h>
37 #include "regex.h"
38
39 #define min(a, b) ((a) < (b) ? (a) : (b))
40 #define max(a, b) ((a) > (b) ? (a) : (b))
41
42 #define REGEXP_CACHE_SIZE 20
43
44 /* If the regexp is non-nil, then the buffer contains the compiled form
45 of that regexp, suitable for searching. */
46 struct regexp_cache
47 {
48 struct regexp_cache *next;
49 Lisp_Object regexp;
50 struct re_pattern_buffer buf;
51 char fastmap[0400];
52 /* Nonzero means regexp was compiled to do full POSIX backtracking. */
53 char posix;
54 };
55
56 /* The instances of that struct. */
57 struct regexp_cache searchbufs[REGEXP_CACHE_SIZE];
58
59 /* The head of the linked list; points to the most recently used buffer. */
60 struct regexp_cache *searchbuf_head;
61
62
63 /* Every call to re_match, etc., must pass &search_regs as the regs
64 argument unless you can show it is unnecessary (i.e., if re_match
65 is certainly going to be called again before region-around-match
66 can be called).
67
68 Since the registers are now dynamically allocated, we need to make
69 sure not to refer to the Nth register before checking that it has
70 been allocated by checking search_regs.num_regs.
71
72 The regex code keeps track of whether it has allocated the search
73 buffer using bits in the re_pattern_buffer. This means that whenever
74 you compile a new pattern, it completely forgets whether it has
75 allocated any registers, and will allocate new registers the next
76 time you call a searching or matching function. Therefore, we need
77 to call re_set_registers after compiling a new pattern or after
78 setting the match registers, so that the regex functions will be
79 able to free or re-allocate it properly. */
80 static struct re_registers search_regs;
81
82 /* The buffer in which the last search was performed, or
83 Qt if the last search was done in a string;
84 Qnil if no searching has been done yet. */
85 static Lisp_Object last_thing_searched;
86
87 /* error condition signaled when regexp compile_pattern fails */
88
89 Lisp_Object Qinvalid_regexp;
90
91 static void set_search_regs ();
92 static void save_search_regs ();
93 static int simple_search ();
94 static int boyer_moore ();
95 static int search_buffer ();
96
97 static void
98 matcher_overflow ()
99 {
100 error ("Stack overflow in regexp matcher");
101 }
102
103 #ifdef __STDC__
104 #define CONST const
105 #else
106 #define CONST
107 #endif
108
109 /* Compile a regexp and signal a Lisp error if anything goes wrong.
110 PATTERN is the pattern to compile.
111 CP is the place to put the result.
112 TRANSLATE is a translation table for ignoring case, or nil for none.
113 REGP is the structure that says where to store the "register"
114 values that will result from matching this pattern.
115 If it is 0, we should compile the pattern not to record any
116 subexpression bounds.
117 POSIX is nonzero if we want full backtracking (POSIX style)
118 for this pattern. 0 means backtrack only enough to get a valid match.
119 MULTIBYTE is nonzero if we want to handle multibyte characters in
120 PATTERN. 0 means all multibyte characters are recognized just as
121 sequences of binary data. */
122
123 static void
124 compile_pattern_1 (cp, pattern, translate, regp, posix, multibyte)
125 struct regexp_cache *cp;
126 Lisp_Object pattern;
127 Lisp_Object translate;
128 struct re_registers *regp;
129 int posix;
130 int multibyte;
131 {
132 char *raw_pattern;
133 int raw_pattern_size;
134 char *val;
135 reg_syntax_t old;
136
137 /* MULTIBYTE says whether the text to be searched is multibyte.
138 We must convert PATTERN to match that, or we will not really
139 find things right. */
140
141 if (multibyte == STRING_MULTIBYTE (pattern))
142 {
143 raw_pattern = (char *) XSTRING (pattern)->data;
144 raw_pattern_size = STRING_BYTES (XSTRING (pattern));
145 }
146 else if (multibyte)
147 {
148 raw_pattern_size = count_size_as_multibyte (XSTRING (pattern)->data,
149 XSTRING (pattern)->size);
150 raw_pattern = (char *) alloca (raw_pattern_size + 1);
151 copy_text (XSTRING (pattern)->data, raw_pattern,
152 XSTRING (pattern)->size, 0, 1);
153 }
154 else
155 {
156 /* Converting multibyte to single-byte.
157
158 ??? Perhaps this conversion should be done in a special way
159 by subtracting nonascii-insert-offset from each non-ASCII char,
160 so that only the multibyte chars which really correspond to
161 the chosen single-byte character set can possibly match. */
162 raw_pattern_size = XSTRING (pattern)->size;
163 raw_pattern = (char *) alloca (raw_pattern_size + 1);
164 copy_text (XSTRING (pattern)->data, raw_pattern,
165 STRING_BYTES (XSTRING (pattern)), 1, 0);
166 }
167
168 cp->regexp = Qnil;
169 cp->buf.translate = (! NILP (translate) ? translate : 0);
170 cp->posix = posix;
171 cp->buf.multibyte = multibyte;
172 BLOCK_INPUT;
173 old = re_set_syntax (RE_SYNTAX_EMACS
174 | (posix ? 0 : RE_NO_POSIX_BACKTRACKING));
175 val = (char *) re_compile_pattern (raw_pattern, raw_pattern_size, &cp->buf);
176 re_set_syntax (old);
177 UNBLOCK_INPUT;
178 if (val)
179 Fsignal (Qinvalid_regexp, Fcons (build_string (val), Qnil));
180
181 cp->regexp = Fcopy_sequence (pattern);
182 }
183
184 /* Compile a regexp if necessary, but first check to see if there's one in
185 the cache.
186 PATTERN is the pattern to compile.
187 TRANSLATE is a translation table for ignoring case, or nil for none.
188 REGP is the structure that says where to store the "register"
189 values that will result from matching this pattern.
190 If it is 0, we should compile the pattern not to record any
191 subexpression bounds.
192 POSIX is nonzero if we want full backtracking (POSIX style)
193 for this pattern. 0 means backtrack only enough to get a valid match. */
194
195 struct re_pattern_buffer *
196 compile_pattern (pattern, regp, translate, posix, multibyte)
197 Lisp_Object pattern;
198 struct re_registers *regp;
199 Lisp_Object translate;
200 int posix, multibyte;
201 {
202 struct regexp_cache *cp, **cpp;
203
204 for (cpp = &searchbuf_head; ; cpp = &cp->next)
205 {
206 cp = *cpp;
207 if (XSTRING (cp->regexp)->size == XSTRING (pattern)->size
208 && !NILP (Fstring_equal (cp->regexp, pattern))
209 && cp->buf.translate == (! NILP (translate) ? translate : 0)
210 && cp->posix == posix
211 && cp->buf.multibyte == multibyte)
212 break;
213
214 /* If we're at the end of the cache, compile into the last cell. */
215 if (cp->next == 0)
216 {
217 compile_pattern_1 (cp, pattern, translate, regp, posix, multibyte);
218 break;
219 }
220 }
221
222 /* When we get here, cp (aka *cpp) contains the compiled pattern,
223 either because we found it in the cache or because we just compiled it.
224 Move it to the front of the queue to mark it as most recently used. */
225 *cpp = cp->next;
226 cp->next = searchbuf_head;
227 searchbuf_head = cp;
228
229 /* Advise the searching functions about the space we have allocated
230 for register data. */
231 if (regp)
232 re_set_registers (&cp->buf, regp, regp->num_regs, regp->start, regp->end);
233
234 return &cp->buf;
235 }
236
237 /* Error condition used for failing searches */
238 Lisp_Object Qsearch_failed;
239
240 Lisp_Object
241 signal_failure (arg)
242 Lisp_Object arg;
243 {
244 Fsignal (Qsearch_failed, Fcons (arg, Qnil));
245 return Qnil;
246 }
247 \f
248 static Lisp_Object
249 looking_at_1 (string, posix)
250 Lisp_Object string;
251 int posix;
252 {
253 Lisp_Object val;
254 unsigned char *p1, *p2;
255 int s1, s2;
256 register int i;
257 struct re_pattern_buffer *bufp;
258
259 if (running_asynch_code)
260 save_search_regs ();
261
262 CHECK_STRING (string, 0);
263 bufp = compile_pattern (string, &search_regs,
264 (!NILP (current_buffer->case_fold_search)
265 ? DOWNCASE_TABLE : Qnil),
266 posix,
267 !NILP (current_buffer->enable_multibyte_characters));
268
269 immediate_quit = 1;
270 QUIT; /* Do a pending quit right away, to avoid paradoxical behavior */
271
272 /* Get pointers and sizes of the two strings
273 that make up the visible portion of the buffer. */
274
275 p1 = BEGV_ADDR;
276 s1 = GPT_BYTE - BEGV_BYTE;
277 p2 = GAP_END_ADDR;
278 s2 = ZV_BYTE - GPT_BYTE;
279 if (s1 < 0)
280 {
281 p2 = p1;
282 s2 = ZV_BYTE - BEGV_BYTE;
283 s1 = 0;
284 }
285 if (s2 < 0)
286 {
287 s1 = ZV_BYTE - BEGV_BYTE;
288 s2 = 0;
289 }
290
291 re_match_object = Qnil;
292
293 i = re_match_2 (bufp, (char *) p1, s1, (char *) p2, s2,
294 PT_BYTE - BEGV_BYTE, &search_regs,
295 ZV_BYTE - BEGV_BYTE);
296 if (i == -2)
297 matcher_overflow ();
298
299 val = (0 <= i ? Qt : Qnil);
300 if (i >= 0)
301 for (i = 0; i < search_regs.num_regs; i++)
302 if (search_regs.start[i] >= 0)
303 {
304 search_regs.start[i]
305 = BYTE_TO_CHAR (search_regs.start[i] + BEGV_BYTE);
306 search_regs.end[i]
307 = BYTE_TO_CHAR (search_regs.end[i] + BEGV_BYTE);
308 }
309 XSETBUFFER (last_thing_searched, current_buffer);
310 immediate_quit = 0;
311 return val;
312 }
313
314 DEFUN ("looking-at", Flooking_at, Slooking_at, 1, 1, 0,
315 "Return t if text after point matches regular expression REGEXP.\n\
316 This function modifies the match data that `match-beginning',\n\
317 `match-end' and `match-data' access; save and restore the match\n\
318 data if you want to preserve them.")
319 (regexp)
320 Lisp_Object regexp;
321 {
322 return looking_at_1 (regexp, 0);
323 }
324
325 DEFUN ("posix-looking-at", Fposix_looking_at, Sposix_looking_at, 1, 1, 0,
326 "Return t if text after point matches regular expression REGEXP.\n\
327 Find the longest match, in accord with Posix regular expression rules.\n\
328 This function modifies the match data that `match-beginning',\n\
329 `match-end' and `match-data' access; save and restore the match\n\
330 data if you want to preserve them.")
331 (regexp)
332 Lisp_Object regexp;
333 {
334 return looking_at_1 (regexp, 1);
335 }
336 \f
337 static Lisp_Object
338 string_match_1 (regexp, string, start, posix)
339 Lisp_Object regexp, string, start;
340 int posix;
341 {
342 int val;
343 struct re_pattern_buffer *bufp;
344 int pos, pos_byte;
345 int i;
346
347 if (running_asynch_code)
348 save_search_regs ();
349
350 CHECK_STRING (regexp, 0);
351 CHECK_STRING (string, 1);
352
353 if (NILP (start))
354 pos = 0, pos_byte = 0;
355 else
356 {
357 int len = XSTRING (string)->size;
358
359 CHECK_NUMBER (start, 2);
360 pos = XINT (start);
361 if (pos < 0 && -pos <= len)
362 pos = len + pos;
363 else if (0 > pos || pos > len)
364 args_out_of_range (string, start);
365 pos_byte = string_char_to_byte (string, pos);
366 }
367
368 bufp = compile_pattern (regexp, &search_regs,
369 (!NILP (current_buffer->case_fold_search)
370 ? DOWNCASE_TABLE : Qnil),
371 posix,
372 STRING_MULTIBYTE (string));
373 immediate_quit = 1;
374 re_match_object = string;
375
376 val = re_search (bufp, (char *) XSTRING (string)->data,
377 STRING_BYTES (XSTRING (string)), pos_byte,
378 STRING_BYTES (XSTRING (string)) - pos_byte,
379 &search_regs);
380 immediate_quit = 0;
381 last_thing_searched = Qt;
382 if (val == -2)
383 matcher_overflow ();
384 if (val < 0) return Qnil;
385
386 for (i = 0; i < search_regs.num_regs; i++)
387 if (search_regs.start[i] >= 0)
388 {
389 search_regs.start[i]
390 = string_byte_to_char (string, search_regs.start[i]);
391 search_regs.end[i]
392 = string_byte_to_char (string, search_regs.end[i]);
393 }
394
395 return make_number (string_byte_to_char (string, val));
396 }
397
398 DEFUN ("string-match", Fstring_match, Sstring_match, 2, 3, 0,
399 "Return index of start of first match for REGEXP in STRING, or nil.\n\
400 If third arg START is non-nil, start search at that index in STRING.\n\
401 For index of first char beyond the match, do (match-end 0).\n\
402 `match-end' and `match-beginning' also give indices of substrings\n\
403 matched by parenthesis constructs in the pattern.")
404 (regexp, string, start)
405 Lisp_Object regexp, string, start;
406 {
407 return string_match_1 (regexp, string, start, 0);
408 }
409
410 DEFUN ("posix-string-match", Fposix_string_match, Sposix_string_match, 2, 3, 0,
411 "Return index of start of first match for REGEXP in STRING, or nil.\n\
412 Find the longest match, in accord with Posix regular expression rules.\n\
413 If third arg START is non-nil, start search at that index in STRING.\n\
414 For index of first char beyond the match, do (match-end 0).\n\
415 `match-end' and `match-beginning' also give indices of substrings\n\
416 matched by parenthesis constructs in the pattern.")
417 (regexp, string, start)
418 Lisp_Object regexp, string, start;
419 {
420 return string_match_1 (regexp, string, start, 1);
421 }
422
423 /* Match REGEXP against STRING, searching all of STRING,
424 and return the index of the match, or negative on failure.
425 This does not clobber the match data. */
426
427 int
428 fast_string_match (regexp, string)
429 Lisp_Object regexp, string;
430 {
431 int val;
432 struct re_pattern_buffer *bufp;
433
434 bufp = compile_pattern (regexp, 0, Qnil,
435 0, STRING_MULTIBYTE (string));
436 immediate_quit = 1;
437 re_match_object = string;
438
439 val = re_search (bufp, (char *) XSTRING (string)->data,
440 STRING_BYTES (XSTRING (string)), 0,
441 STRING_BYTES (XSTRING (string)), 0);
442 immediate_quit = 0;
443 return val;
444 }
445
446 /* Match REGEXP against STRING, searching all of STRING ignoring case,
447 and return the index of the match, or negative on failure.
448 This does not clobber the match data.
449 We assume that STRING contains single-byte characters. */
450
451 extern Lisp_Object Vascii_downcase_table;
452
453 int
454 fast_c_string_match_ignore_case (regexp, string)
455 Lisp_Object regexp;
456 char *string;
457 {
458 int val;
459 struct re_pattern_buffer *bufp;
460 int len = strlen (string);
461
462 regexp = string_make_unibyte (regexp);
463 re_match_object = Qt;
464 bufp = compile_pattern (regexp, 0,
465 Vascii_downcase_table, 0,
466 0);
467 immediate_quit = 1;
468 val = re_search (bufp, string, len, 0, len, 0);
469 immediate_quit = 0;
470 return val;
471 }
472 \f
473 /* The newline cache: remembering which sections of text have no newlines. */
474
475 /* If the user has requested newline caching, make sure it's on.
476 Otherwise, make sure it's off.
477 This is our cheezy way of associating an action with the change of
478 state of a buffer-local variable. */
479 static void
480 newline_cache_on_off (buf)
481 struct buffer *buf;
482 {
483 if (NILP (buf->cache_long_line_scans))
484 {
485 /* It should be off. */
486 if (buf->newline_cache)
487 {
488 free_region_cache (buf->newline_cache);
489 buf->newline_cache = 0;
490 }
491 }
492 else
493 {
494 /* It should be on. */
495 if (buf->newline_cache == 0)
496 buf->newline_cache = new_region_cache ();
497 }
498 }
499
500 \f
501 /* Search for COUNT instances of the character TARGET between START and END.
502
503 If COUNT is positive, search forwards; END must be >= START.
504 If COUNT is negative, search backwards for the -COUNTth instance;
505 END must be <= START.
506 If COUNT is zero, do anything you please; run rogue, for all I care.
507
508 If END is zero, use BEGV or ZV instead, as appropriate for the
509 direction indicated by COUNT.
510
511 If we find COUNT instances, set *SHORTAGE to zero, and return the
512 position after the COUNTth match. Note that for reverse motion
513 this is not the same as the usual convention for Emacs motion commands.
514
515 If we don't find COUNT instances before reaching END, set *SHORTAGE
516 to the number of TARGETs left unfound, and return END.
517
518 If ALLOW_QUIT is non-zero, set immediate_quit. That's good to do
519 except when inside redisplay. */
520
521 int
522 scan_buffer (target, start, end, count, shortage, allow_quit)
523 register int target;
524 int start, end;
525 int count;
526 int *shortage;
527 int allow_quit;
528 {
529 struct region_cache *newline_cache;
530 int direction;
531
532 if (count > 0)
533 {
534 direction = 1;
535 if (! end) end = ZV;
536 }
537 else
538 {
539 direction = -1;
540 if (! end) end = BEGV;
541 }
542
543 newline_cache_on_off (current_buffer);
544 newline_cache = current_buffer->newline_cache;
545
546 if (shortage != 0)
547 *shortage = 0;
548
549 immediate_quit = allow_quit;
550
551 if (count > 0)
552 while (start != end)
553 {
554 /* Our innermost scanning loop is very simple; it doesn't know
555 about gaps, buffer ends, or the newline cache. ceiling is
556 the position of the last character before the next such
557 obstacle --- the last character the dumb search loop should
558 examine. */
559 int ceiling_byte = CHAR_TO_BYTE (end) - 1;
560 int start_byte = CHAR_TO_BYTE (start);
561 int tem;
562
563 /* If we're looking for a newline, consult the newline cache
564 to see where we can avoid some scanning. */
565 if (target == '\n' && newline_cache)
566 {
567 int next_change;
568 immediate_quit = 0;
569 while (region_cache_forward
570 (current_buffer, newline_cache, start_byte, &next_change))
571 start_byte = next_change;
572 immediate_quit = allow_quit;
573
574 /* START should never be after END. */
575 if (start_byte > ceiling_byte)
576 start_byte = ceiling_byte;
577
578 /* Now the text after start is an unknown region, and
579 next_change is the position of the next known region. */
580 ceiling_byte = min (next_change - 1, ceiling_byte);
581 }
582
583 /* The dumb loop can only scan text stored in contiguous
584 bytes. BUFFER_CEILING_OF returns the last character
585 position that is contiguous, so the ceiling is the
586 position after that. */
587 tem = BUFFER_CEILING_OF (start_byte);
588 ceiling_byte = min (tem, ceiling_byte);
589
590 {
591 /* The termination address of the dumb loop. */
592 register unsigned char *ceiling_addr
593 = BYTE_POS_ADDR (ceiling_byte) + 1;
594 register unsigned char *cursor
595 = BYTE_POS_ADDR (start_byte);
596 unsigned char *base = cursor;
597
598 while (cursor < ceiling_addr)
599 {
600 unsigned char *scan_start = cursor;
601
602 /* The dumb loop. */
603 while (*cursor != target && ++cursor < ceiling_addr)
604 ;
605
606 /* If we're looking for newlines, cache the fact that
607 the region from start to cursor is free of them. */
608 if (target == '\n' && newline_cache)
609 know_region_cache (current_buffer, newline_cache,
610 start_byte + scan_start - base,
611 start_byte + cursor - base);
612
613 /* Did we find the target character? */
614 if (cursor < ceiling_addr)
615 {
616 if (--count == 0)
617 {
618 immediate_quit = 0;
619 return BYTE_TO_CHAR (start_byte + cursor - base + 1);
620 }
621 cursor++;
622 }
623 }
624
625 start = BYTE_TO_CHAR (start_byte + cursor - base);
626 }
627 }
628 else
629 while (start > end)
630 {
631 /* The last character to check before the next obstacle. */
632 int ceiling_byte = CHAR_TO_BYTE (end);
633 int start_byte = CHAR_TO_BYTE (start);
634 int tem;
635
636 /* Consult the newline cache, if appropriate. */
637 if (target == '\n' && newline_cache)
638 {
639 int next_change;
640 immediate_quit = 0;
641 while (region_cache_backward
642 (current_buffer, newline_cache, start_byte, &next_change))
643 start_byte = next_change;
644 immediate_quit = allow_quit;
645
646 /* Start should never be at or before end. */
647 if (start_byte <= ceiling_byte)
648 start_byte = ceiling_byte + 1;
649
650 /* Now the text before start is an unknown region, and
651 next_change is the position of the next known region. */
652 ceiling_byte = max (next_change, ceiling_byte);
653 }
654
655 /* Stop scanning before the gap. */
656 tem = BUFFER_FLOOR_OF (start_byte - 1);
657 ceiling_byte = max (tem, ceiling_byte);
658
659 {
660 /* The termination address of the dumb loop. */
661 register unsigned char *ceiling_addr = BYTE_POS_ADDR (ceiling_byte);
662 register unsigned char *cursor = BYTE_POS_ADDR (start_byte - 1);
663 unsigned char *base = cursor;
664
665 while (cursor >= ceiling_addr)
666 {
667 unsigned char *scan_start = cursor;
668
669 while (*cursor != target && --cursor >= ceiling_addr)
670 ;
671
672 /* If we're looking for newlines, cache the fact that
673 the region from after the cursor to start is free of them. */
674 if (target == '\n' && newline_cache)
675 know_region_cache (current_buffer, newline_cache,
676 start_byte + cursor - base,
677 start_byte + scan_start - base);
678
679 /* Did we find the target character? */
680 if (cursor >= ceiling_addr)
681 {
682 if (++count >= 0)
683 {
684 immediate_quit = 0;
685 return BYTE_TO_CHAR (start_byte + cursor - base);
686 }
687 cursor--;
688 }
689 }
690
691 start = BYTE_TO_CHAR (start_byte + cursor - base);
692 }
693 }
694
695 immediate_quit = 0;
696 if (shortage != 0)
697 *shortage = count * direction;
698 return start;
699 }
700 \f
701 /* Search for COUNT instances of a line boundary, which means either a
702 newline or (if selective display enabled) a carriage return.
703 Start at START. If COUNT is negative, search backwards.
704
705 We report the resulting position by calling TEMP_SET_PT_BOTH.
706
707 If we find COUNT instances. we position after (always after,
708 even if scanning backwards) the COUNTth match, and return 0.
709
710 If we don't find COUNT instances before reaching the end of the
711 buffer (or the beginning, if scanning backwards), we return
712 the number of line boundaries left unfound, and position at
713 the limit we bumped up against.
714
715 If ALLOW_QUIT is non-zero, set immediate_quit. That's good to do
716 except in special cases. */
717
718 int
719 scan_newline (start, start_byte, limit, limit_byte, count, allow_quit)
720 int start, start_byte;
721 int limit, limit_byte;
722 register int count;
723 int allow_quit;
724 {
725 int direction = ((count > 0) ? 1 : -1);
726
727 register unsigned char *cursor;
728 unsigned char *base;
729
730 register int ceiling;
731 register unsigned char *ceiling_addr;
732
733 int old_immediate_quit = immediate_quit;
734
735 /* If we are not in selective display mode,
736 check only for newlines. */
737 int selective_display = (!NILP (current_buffer->selective_display)
738 && !INTEGERP (current_buffer->selective_display));
739
740 /* The code that follows is like scan_buffer
741 but checks for either newline or carriage return. */
742
743 if (allow_quit)
744 immediate_quit++;
745
746 start_byte = CHAR_TO_BYTE (start);
747
748 if (count > 0)
749 {
750 while (start_byte < limit_byte)
751 {
752 ceiling = BUFFER_CEILING_OF (start_byte);
753 ceiling = min (limit_byte - 1, ceiling);
754 ceiling_addr = BYTE_POS_ADDR (ceiling) + 1;
755 base = (cursor = BYTE_POS_ADDR (start_byte));
756 while (1)
757 {
758 while (*cursor != '\n' && ++cursor != ceiling_addr)
759 ;
760
761 if (cursor != ceiling_addr)
762 {
763 if (--count == 0)
764 {
765 immediate_quit = old_immediate_quit;
766 start_byte = start_byte + cursor - base + 1;
767 start = BYTE_TO_CHAR (start_byte);
768 TEMP_SET_PT_BOTH (start, start_byte);
769 return 0;
770 }
771 else
772 if (++cursor == ceiling_addr)
773 break;
774 }
775 else
776 break;
777 }
778 start_byte += cursor - base;
779 }
780 }
781 else
782 {
783 while (start_byte > limit_byte)
784 {
785 ceiling = BUFFER_FLOOR_OF (start_byte - 1);
786 ceiling = max (limit_byte, ceiling);
787 ceiling_addr = BYTE_POS_ADDR (ceiling) - 1;
788 base = (cursor = BYTE_POS_ADDR (start_byte - 1) + 1);
789 while (1)
790 {
791 while (--cursor != ceiling_addr && *cursor != '\n')
792 ;
793
794 if (cursor != ceiling_addr)
795 {
796 if (++count == 0)
797 {
798 immediate_quit = old_immediate_quit;
799 /* Return the position AFTER the match we found. */
800 start_byte = start_byte + cursor - base + 1;
801 start = BYTE_TO_CHAR (start_byte);
802 TEMP_SET_PT_BOTH (start, start_byte);
803 return 0;
804 }
805 }
806 else
807 break;
808 }
809 /* Here we add 1 to compensate for the last decrement
810 of CURSOR, which took it past the valid range. */
811 start_byte += cursor - base + 1;
812 }
813 }
814
815 TEMP_SET_PT_BOTH (limit, limit_byte);
816 immediate_quit = old_immediate_quit;
817
818 return count * direction;
819 }
820
821 int
822 find_next_newline_no_quit (from, cnt)
823 register int from, cnt;
824 {
825 return scan_buffer ('\n', from, 0, cnt, (int *) 0, 0);
826 }
827
828 /* Like find_next_newline, but returns position before the newline,
829 not after, and only search up to TO. This isn't just
830 find_next_newline (...)-1, because you might hit TO. */
831
832 int
833 find_before_next_newline (from, to, cnt)
834 int from, to, cnt;
835 {
836 int shortage;
837 int pos = scan_buffer ('\n', from, to, cnt, &shortage, 1);
838
839 if (shortage == 0)
840 pos--;
841
842 return pos;
843 }
844 \f
845 /* Subroutines of Lisp buffer search functions. */
846
847 static Lisp_Object
848 search_command (string, bound, noerror, count, direction, RE, posix)
849 Lisp_Object string, bound, noerror, count;
850 int direction;
851 int RE;
852 int posix;
853 {
854 register int np;
855 int lim, lim_byte;
856 int n = direction;
857
858 if (!NILP (count))
859 {
860 CHECK_NUMBER (count, 3);
861 n *= XINT (count);
862 }
863
864 CHECK_STRING (string, 0);
865 if (NILP (bound))
866 {
867 if (n > 0)
868 lim = ZV, lim_byte = ZV_BYTE;
869 else
870 lim = BEGV, lim_byte = BEGV_BYTE;
871 }
872 else
873 {
874 CHECK_NUMBER_COERCE_MARKER (bound, 1);
875 lim = XINT (bound);
876 if (n > 0 ? lim < PT : lim > PT)
877 error ("Invalid search bound (wrong side of point)");
878 if (lim > ZV)
879 lim = ZV, lim_byte = ZV_BYTE;
880 else if (lim < BEGV)
881 lim = BEGV, lim_byte = BEGV_BYTE;
882 else
883 lim_byte = CHAR_TO_BYTE (lim);
884 }
885
886 np = search_buffer (string, PT, PT_BYTE, lim, lim_byte, n, RE,
887 (!NILP (current_buffer->case_fold_search)
888 ? current_buffer->case_canon_table
889 : Qnil),
890 (!NILP (current_buffer->case_fold_search)
891 ? current_buffer->case_eqv_table
892 : Qnil),
893 posix);
894 if (np <= 0)
895 {
896 if (NILP (noerror))
897 return signal_failure (string);
898 if (!EQ (noerror, Qt))
899 {
900 if (lim < BEGV || lim > ZV)
901 abort ();
902 SET_PT_BOTH (lim, lim_byte);
903 return Qnil;
904 #if 0 /* This would be clean, but maybe programs depend on
905 a value of nil here. */
906 np = lim;
907 #endif
908 }
909 else
910 return Qnil;
911 }
912
913 if (np < BEGV || np > ZV)
914 abort ();
915
916 SET_PT (np);
917
918 return make_number (np);
919 }
920 \f
921 /* Return 1 if REGEXP it matches just one constant string. */
922
923 static int
924 trivial_regexp_p (regexp)
925 Lisp_Object regexp;
926 {
927 int len = STRING_BYTES (XSTRING (regexp));
928 unsigned char *s = XSTRING (regexp)->data;
929 unsigned char c;
930 while (--len >= 0)
931 {
932 switch (*s++)
933 {
934 case '.': case '*': case '+': case '?': case '[': case '^': case '$':
935 return 0;
936 case '\\':
937 if (--len < 0)
938 return 0;
939 switch (*s++)
940 {
941 case '|': case '(': case ')': case '`': case '\'': case 'b':
942 case 'B': case '<': case '>': case 'w': case 'W': case 's':
943 case 'S': case '=':
944 case 'c': case 'C': /* for categoryspec and notcategoryspec */
945 case '1': case '2': case '3': case '4': case '5':
946 case '6': case '7': case '8': case '9':
947 return 0;
948 }
949 }
950 }
951 return 1;
952 }
953
954 /* Search for the n'th occurrence of STRING in the current buffer,
955 starting at position POS and stopping at position LIM,
956 treating STRING as a literal string if RE is false or as
957 a regular expression if RE is true.
958
959 If N is positive, searching is forward and LIM must be greater than POS.
960 If N is negative, searching is backward and LIM must be less than POS.
961
962 Returns -x if x occurrences remain to be found (x > 0),
963 or else the position at the beginning of the Nth occurrence
964 (if searching backward) or the end (if searching forward).
965
966 POSIX is nonzero if we want full backtracking (POSIX style)
967 for this pattern. 0 means backtrack only enough to get a valid match. */
968
969 #define TRANSLATE(out, trt, d) \
970 do \
971 { \
972 if (! NILP (trt)) \
973 { \
974 Lisp_Object temp; \
975 temp = Faref (trt, make_number (d)); \
976 if (INTEGERP (temp)) \
977 out = XINT (temp); \
978 else \
979 out = d; \
980 } \
981 else \
982 out = d; \
983 } \
984 while (0)
985
986 static int
987 search_buffer (string, pos, pos_byte, lim, lim_byte, n,
988 RE, trt, inverse_trt, posix)
989 Lisp_Object string;
990 int pos;
991 int pos_byte;
992 int lim;
993 int lim_byte;
994 int n;
995 int RE;
996 Lisp_Object trt;
997 Lisp_Object inverse_trt;
998 int posix;
999 {
1000 int len = XSTRING (string)->size;
1001 int len_byte = STRING_BYTES (XSTRING (string));
1002 register int i;
1003
1004 if (running_asynch_code)
1005 save_search_regs ();
1006
1007 /* Null string is found at starting position. */
1008 if (len == 0)
1009 {
1010 set_search_regs (pos, 0);
1011 return pos;
1012 }
1013
1014 /* Searching 0 times means don't move. */
1015 if (n == 0)
1016 return pos;
1017
1018 if (RE && !trivial_regexp_p (string))
1019 {
1020 unsigned char *p1, *p2;
1021 int s1, s2;
1022 struct re_pattern_buffer *bufp;
1023
1024 bufp = compile_pattern (string, &search_regs, trt, posix,
1025 !NILP (current_buffer->enable_multibyte_characters));
1026
1027 immediate_quit = 1; /* Quit immediately if user types ^G,
1028 because letting this function finish
1029 can take too long. */
1030 QUIT; /* Do a pending quit right away,
1031 to avoid paradoxical behavior */
1032 /* Get pointers and sizes of the two strings
1033 that make up the visible portion of the buffer. */
1034
1035 p1 = BEGV_ADDR;
1036 s1 = GPT_BYTE - BEGV_BYTE;
1037 p2 = GAP_END_ADDR;
1038 s2 = ZV_BYTE - GPT_BYTE;
1039 if (s1 < 0)
1040 {
1041 p2 = p1;
1042 s2 = ZV_BYTE - BEGV_BYTE;
1043 s1 = 0;
1044 }
1045 if (s2 < 0)
1046 {
1047 s1 = ZV_BYTE - BEGV_BYTE;
1048 s2 = 0;
1049 }
1050 re_match_object = Qnil;
1051
1052 while (n < 0)
1053 {
1054 int val;
1055 val = re_search_2 (bufp, (char *) p1, s1, (char *) p2, s2,
1056 pos_byte - BEGV_BYTE, lim_byte - pos_byte,
1057 &search_regs,
1058 /* Don't allow match past current point */
1059 pos_byte - BEGV_BYTE);
1060 if (val == -2)
1061 {
1062 matcher_overflow ();
1063 }
1064 if (val >= 0)
1065 {
1066 pos_byte = search_regs.start[0] + BEGV_BYTE;
1067 for (i = 0; i < search_regs.num_regs; i++)
1068 if (search_regs.start[i] >= 0)
1069 {
1070 search_regs.start[i]
1071 = BYTE_TO_CHAR (search_regs.start[i] + BEGV_BYTE);
1072 search_regs.end[i]
1073 = BYTE_TO_CHAR (search_regs.end[i] + BEGV_BYTE);
1074 }
1075 XSETBUFFER (last_thing_searched, current_buffer);
1076 /* Set pos to the new position. */
1077 pos = search_regs.start[0];
1078 }
1079 else
1080 {
1081 immediate_quit = 0;
1082 return (n);
1083 }
1084 n++;
1085 }
1086 while (n > 0)
1087 {
1088 int val;
1089 val = re_search_2 (bufp, (char *) p1, s1, (char *) p2, s2,
1090 pos_byte - BEGV_BYTE, lim_byte - pos_byte,
1091 &search_regs,
1092 lim_byte - BEGV_BYTE);
1093 if (val == -2)
1094 {
1095 matcher_overflow ();
1096 }
1097 if (val >= 0)
1098 {
1099 pos_byte = search_regs.end[0] + BEGV_BYTE;
1100 for (i = 0; i < search_regs.num_regs; i++)
1101 if (search_regs.start[i] >= 0)
1102 {
1103 search_regs.start[i]
1104 = BYTE_TO_CHAR (search_regs.start[i] + BEGV_BYTE);
1105 search_regs.end[i]
1106 = BYTE_TO_CHAR (search_regs.end[i] + BEGV_BYTE);
1107 }
1108 XSETBUFFER (last_thing_searched, current_buffer);
1109 pos = search_regs.end[0];
1110 }
1111 else
1112 {
1113 immediate_quit = 0;
1114 return (0 - n);
1115 }
1116 n--;
1117 }
1118 immediate_quit = 0;
1119 return (pos);
1120 }
1121 else /* non-RE case */
1122 {
1123 unsigned char *raw_pattern, *pat;
1124 int raw_pattern_size;
1125 int raw_pattern_size_byte;
1126 unsigned char *patbuf;
1127 int multibyte = !NILP (current_buffer->enable_multibyte_characters);
1128 unsigned char *base_pat = XSTRING (string)->data;
1129 int charset_base = -1;
1130 int simple = 1;
1131
1132 /* MULTIBYTE says whether the text to be searched is multibyte.
1133 We must convert PATTERN to match that, or we will not really
1134 find things right. */
1135
1136 if (multibyte == STRING_MULTIBYTE (string))
1137 {
1138 raw_pattern = (char *) XSTRING (string)->data;
1139 raw_pattern_size = XSTRING (string)->size;
1140 raw_pattern_size_byte = STRING_BYTES (XSTRING (string));
1141 }
1142 else if (multibyte)
1143 {
1144 raw_pattern_size = XSTRING (string)->size;
1145 raw_pattern_size_byte
1146 = count_size_as_multibyte (XSTRING (string)->data,
1147 raw_pattern_size);
1148 raw_pattern = (char *) alloca (raw_pattern_size_byte + 1);
1149 copy_text (XSTRING (string)->data, raw_pattern,
1150 XSTRING (string)->size, 0, 1);
1151 }
1152 else
1153 {
1154 /* Converting multibyte to single-byte.
1155
1156 ??? Perhaps this conversion should be done in a special way
1157 by subtracting nonascii-insert-offset from each non-ASCII char,
1158 so that only the multibyte chars which really correspond to
1159 the chosen single-byte character set can possibly match. */
1160 raw_pattern_size = XSTRING (string)->size;
1161 raw_pattern_size_byte = XSTRING (string)->size;
1162 raw_pattern = (char *) alloca (raw_pattern_size + 1);
1163 copy_text (XSTRING (string)->data, raw_pattern,
1164 STRING_BYTES (XSTRING (string)), 1, 0);
1165 }
1166
1167 /* Copy and optionally translate the pattern. */
1168 len = raw_pattern_size;
1169 len_byte = raw_pattern_size_byte;
1170 patbuf = (unsigned char *) alloca (len_byte);
1171 pat = patbuf;
1172 base_pat = raw_pattern;
1173 if (multibyte)
1174 {
1175 while (--len >= 0)
1176 {
1177 unsigned char workbuf[4], *str;
1178 int c, translated, inverse;
1179 int in_charlen, charlen;
1180
1181 /* If we got here and the RE flag is set, it's because we're
1182 dealing with a regexp known to be trivial, so the backslash
1183 just quotes the next character. */
1184 if (RE && *base_pat == '\\')
1185 {
1186 len--;
1187 len_byte--;
1188 base_pat++;
1189 }
1190
1191 c = STRING_CHAR_AND_LENGTH (base_pat, len_byte, in_charlen);
1192 /* Translate the character, if requested. */
1193 TRANSLATE (translated, trt, c);
1194 /* If translation changed the byte-length, go back
1195 to the original character. */
1196 charlen = CHAR_STRING (translated, workbuf, str);
1197 if (in_charlen != charlen)
1198 {
1199 translated = c;
1200 charlen = CHAR_STRING (c, workbuf, str);
1201 }
1202
1203 TRANSLATE (inverse, inverse_trt, c);
1204
1205 /* Did this char actually get translated?
1206 Would any other char get translated into it? */
1207 if (translated != c || inverse != c)
1208 {
1209 /* Keep track of which character set row
1210 contains the characters that need translation. */
1211 int charset_base_code = c & ~0xff;
1212 if (charset_base == -1)
1213 charset_base = charset_base_code;
1214 else if (charset_base != charset_base_code)
1215 /* If two different rows appear, needing translation,
1216 then we cannot use boyer_moore search. */
1217 simple = 0;
1218 /* ??? Handa: this must do simple = 0
1219 if c is a composite character. */
1220 }
1221
1222 /* Store this character into the translated pattern. */
1223 bcopy (str, pat, charlen);
1224 pat += charlen;
1225 base_pat += in_charlen;
1226 len_byte -= in_charlen;
1227 }
1228 }
1229 else
1230 {
1231 while (--len >= 0)
1232 {
1233 int c, translated, inverse;
1234
1235 /* If we got here and the RE flag is set, it's because we're
1236 dealing with a regexp known to be trivial, so the backslash
1237 just quotes the next character. */
1238 if (RE && *base_pat == '\\')
1239 {
1240 len--;
1241 base_pat++;
1242 }
1243 c = *base_pat++;
1244 TRANSLATE (translated, trt, c);
1245 TRANSLATE (inverse, inverse_trt, c);
1246
1247 /* Did this char actually get translated?
1248 Would any other char get translated into it? */
1249 if (translated != c || inverse != c)
1250 {
1251 /* Keep track of which character set row
1252 contains the characters that need translation. */
1253 int charset_base_code = c & ~0xff;
1254 if (charset_base == -1)
1255 charset_base = charset_base_code;
1256 else if (charset_base != charset_base_code)
1257 /* If two different rows appear, needing translation,
1258 then we cannot use boyer_moore search. */
1259 simple = 0;
1260 }
1261 *pat++ = translated;
1262 }
1263 }
1264
1265 len_byte = pat - patbuf;
1266 len = raw_pattern_size;
1267 pat = base_pat = patbuf;
1268
1269 if (simple)
1270 return boyer_moore (n, pat, len, len_byte, trt, inverse_trt,
1271 pos, pos_byte, lim, lim_byte,
1272 charset_base);
1273 else
1274 return simple_search (n, pat, len, len_byte, trt,
1275 pos, pos_byte, lim, lim_byte);
1276 }
1277 }
1278 \f
1279 /* Do a simple string search N times for the string PAT,
1280 whose length is LEN/LEN_BYTE,
1281 from buffer position POS/POS_BYTE until LIM/LIM_BYTE.
1282 TRT is the translation table.
1283
1284 Return the character position where the match is found.
1285 Otherwise, if M matches remained to be found, return -M.
1286
1287 This kind of search works regardless of what is in PAT and
1288 regardless of what is in TRT. It is used in cases where
1289 boyer_moore cannot work. */
1290
1291 static int
1292 simple_search (n, pat, len, len_byte, trt, pos, pos_byte, lim, lim_byte)
1293 int n;
1294 unsigned char *pat;
1295 int len, len_byte;
1296 Lisp_Object trt;
1297 int pos, pos_byte;
1298 int lim, lim_byte;
1299 {
1300 int multibyte = ! NILP (current_buffer->enable_multibyte_characters);
1301 int forward = n > 0;
1302
1303 if (lim > pos && multibyte)
1304 while (n > 0)
1305 {
1306 while (1)
1307 {
1308 /* Try matching at position POS. */
1309 int this_pos = pos;
1310 int this_pos_byte = pos_byte;
1311 int this_len = len;
1312 int this_len_byte = len_byte;
1313 unsigned char *p = pat;
1314 if (pos + len > lim)
1315 goto stop;
1316
1317 while (this_len > 0)
1318 {
1319 int charlen, buf_charlen;
1320 int pat_ch, buf_ch;
1321
1322 pat_ch = STRING_CHAR_AND_LENGTH (p, this_len_byte, charlen);
1323 buf_ch = STRING_CHAR_AND_LENGTH (BYTE_POS_ADDR (this_pos_byte),
1324 ZV_BYTE - this_pos_byte,
1325 buf_charlen);
1326 TRANSLATE (buf_ch, trt, buf_ch);
1327
1328 if (buf_ch != pat_ch)
1329 break;
1330
1331 this_len_byte -= charlen;
1332 this_len--;
1333 p += charlen;
1334
1335 this_pos_byte += buf_charlen;
1336 this_pos++;
1337 }
1338
1339 if (this_len == 0)
1340 {
1341 pos += len;
1342 pos_byte += len_byte;
1343 break;
1344 }
1345
1346 INC_BOTH (pos, pos_byte);
1347 }
1348
1349 n--;
1350 }
1351 else if (lim > pos)
1352 while (n > 0)
1353 {
1354 while (1)
1355 {
1356 /* Try matching at position POS. */
1357 int this_pos = pos;
1358 int this_len = len;
1359 unsigned char *p = pat;
1360
1361 if (pos + len > lim)
1362 goto stop;
1363
1364 while (this_len > 0)
1365 {
1366 int pat_ch = *p++;
1367 int buf_ch = FETCH_BYTE (this_pos);
1368 TRANSLATE (buf_ch, trt, buf_ch);
1369
1370 if (buf_ch != pat_ch)
1371 break;
1372
1373 this_len--;
1374 this_pos++;
1375 }
1376
1377 if (this_len == 0)
1378 {
1379 pos += len;
1380 break;
1381 }
1382
1383 pos++;
1384 }
1385
1386 n--;
1387 }
1388 /* Backwards search. */
1389 else if (lim < pos && multibyte)
1390 while (n < 0)
1391 {
1392 while (1)
1393 {
1394 /* Try matching at position POS. */
1395 int this_pos = pos - len;
1396 int this_pos_byte = pos_byte - len_byte;
1397 int this_len = len;
1398 int this_len_byte = len_byte;
1399 unsigned char *p = pat;
1400
1401 if (pos - len < lim)
1402 goto stop;
1403
1404 while (this_len > 0)
1405 {
1406 int charlen, buf_charlen;
1407 int pat_ch, buf_ch;
1408
1409 pat_ch = STRING_CHAR_AND_LENGTH (p, this_len_byte, charlen);
1410 buf_ch = STRING_CHAR_AND_LENGTH (BYTE_POS_ADDR (this_pos_byte),
1411 ZV_BYTE - this_pos_byte,
1412 buf_charlen);
1413 TRANSLATE (buf_ch, trt, buf_ch);
1414
1415 if (buf_ch != pat_ch)
1416 break;
1417
1418 this_len_byte -= charlen;
1419 this_len--;
1420 p += charlen;
1421 this_pos_byte += buf_charlen;
1422 this_pos++;
1423 }
1424
1425 if (this_len == 0)
1426 {
1427 pos -= len;
1428 pos_byte -= len_byte;
1429 break;
1430 }
1431
1432 DEC_BOTH (pos, pos_byte);
1433 }
1434
1435 n++;
1436 }
1437 else if (lim < pos)
1438 while (n < 0)
1439 {
1440 while (1)
1441 {
1442 /* Try matching at position POS. */
1443 int this_pos = pos - len;
1444 int this_len = len;
1445 unsigned char *p = pat;
1446
1447 if (pos - len < lim)
1448 goto stop;
1449
1450 while (this_len > 0)
1451 {
1452 int pat_ch = *p++;
1453 int buf_ch = FETCH_BYTE (this_pos);
1454 TRANSLATE (buf_ch, trt, buf_ch);
1455
1456 if (buf_ch != pat_ch)
1457 break;
1458 this_len--;
1459 this_pos++;
1460 }
1461
1462 if (this_len == 0)
1463 {
1464 pos -= len;
1465 break;
1466 }
1467
1468 pos--;
1469 }
1470
1471 n++;
1472 }
1473
1474 stop:
1475 if (n == 0)
1476 {
1477 if (forward)
1478 set_search_regs ((multibyte ? pos_byte : pos) - len_byte, len_byte);
1479 else
1480 set_search_regs (multibyte ? pos_byte : pos, len_byte);
1481
1482 return pos;
1483 }
1484 else if (n > 0)
1485 return -n;
1486 else
1487 return n;
1488 }
1489 \f
1490 /* Do Boyer-Moore search N times for the string PAT,
1491 whose length is LEN/LEN_BYTE,
1492 from buffer position POS/POS_BYTE until LIM/LIM_BYTE.
1493 DIRECTION says which direction we search in.
1494 TRT and INVERSE_TRT are translation tables.
1495
1496 This kind of search works if all the characters in PAT that have
1497 nontrivial translation are the same aside from the last byte. This
1498 makes it possible to translate just the last byte of a character,
1499 and do so after just a simple test of the context.
1500
1501 If that criterion is not satisfied, do not call this function. */
1502
1503 static int
1504 boyer_moore (n, base_pat, len, len_byte, trt, inverse_trt,
1505 pos, pos_byte, lim, lim_byte, charset_base)
1506 int n;
1507 unsigned char *base_pat;
1508 int len, len_byte;
1509 Lisp_Object trt;
1510 Lisp_Object inverse_trt;
1511 int pos, pos_byte;
1512 int lim, lim_byte;
1513 int charset_base;
1514 {
1515 int direction = ((n > 0) ? 1 : -1);
1516 register int dirlen;
1517 int infinity, limit, k, stride_for_teases;
1518 register int *BM_tab;
1519 int *BM_tab_base;
1520 register unsigned char *cursor, *p_limit;
1521 register int i, j;
1522 unsigned char *pat;
1523 int multibyte = ! NILP (current_buffer->enable_multibyte_characters);
1524
1525 unsigned char simple_translate[0400];
1526 int translate_prev_byte;
1527 int translate_anteprev_byte;
1528
1529 #ifdef C_ALLOCA
1530 int BM_tab_space[0400];
1531 BM_tab = &BM_tab_space[0];
1532 #else
1533 BM_tab = (int *) alloca (0400 * sizeof (int));
1534 #endif
1535 /* The general approach is that we are going to maintain that we know */
1536 /* the first (closest to the present position, in whatever direction */
1537 /* we're searching) character that could possibly be the last */
1538 /* (furthest from present position) character of a valid match. We */
1539 /* advance the state of our knowledge by looking at that character */
1540 /* and seeing whether it indeed matches the last character of the */
1541 /* pattern. If it does, we take a closer look. If it does not, we */
1542 /* move our pointer (to putative last characters) as far as is */
1543 /* logically possible. This amount of movement, which I call a */
1544 /* stride, will be the length of the pattern if the actual character */
1545 /* appears nowhere in the pattern, otherwise it will be the distance */
1546 /* from the last occurrence of that character to the end of the */
1547 /* pattern. */
1548 /* As a coding trick, an enormous stride is coded into the table for */
1549 /* characters that match the last character. This allows use of only */
1550 /* a single test, a test for having gone past the end of the */
1551 /* permissible match region, to test for both possible matches (when */
1552 /* the stride goes past the end immediately) and failure to */
1553 /* match (where you get nudged past the end one stride at a time). */
1554
1555 /* Here we make a "mickey mouse" BM table. The stride of the search */
1556 /* is determined only by the last character of the putative match. */
1557 /* If that character does not match, we will stride the proper */
1558 /* distance to propose a match that superimposes it on the last */
1559 /* instance of a character that matches it (per trt), or misses */
1560 /* it entirely if there is none. */
1561
1562 dirlen = len_byte * direction;
1563 infinity = dirlen - (lim_byte + pos_byte + len_byte + len_byte) * direction;
1564 if (direction < 0)
1565 pat = (base_pat += len_byte - 1);
1566 else
1567 pat = base_pat;
1568 BM_tab_base = BM_tab;
1569 BM_tab += 0400;
1570 j = dirlen; /* to get it in a register */
1571 /* A character that does not appear in the pattern induces a */
1572 /* stride equal to the pattern length. */
1573 while (BM_tab_base != BM_tab)
1574 {
1575 *--BM_tab = j;
1576 *--BM_tab = j;
1577 *--BM_tab = j;
1578 *--BM_tab = j;
1579 }
1580
1581 /* We use this for translation, instead of TRT itself.
1582 We fill this in to handle the characters that actually
1583 occur in the pattern. Others don't matter anyway! */
1584 bzero (simple_translate, sizeof simple_translate);
1585 for (i = 0; i < 0400; i++)
1586 simple_translate[i] = i;
1587
1588 i = 0;
1589 while (i != infinity)
1590 {
1591 unsigned char *ptr = pat + i;
1592 i += direction;
1593 if (i == dirlen)
1594 i = infinity;
1595 if (! NILP (trt))
1596 {
1597 int ch;
1598 int untranslated;
1599 int this_translated = 1;
1600
1601 if (multibyte
1602 && (ptr + 1 == pat + len_byte || CHAR_HEAD_P (ptr[1])))
1603 {
1604 unsigned char *charstart = ptr;
1605 while (! CHAR_HEAD_P (*charstart))
1606 charstart--;
1607 untranslated = STRING_CHAR (charstart, ptr - charstart + 1);
1608 if (charset_base == (untranslated & ~0xff))
1609 {
1610 TRANSLATE (ch, trt, untranslated);
1611 if (! CHAR_HEAD_P (*ptr))
1612 {
1613 translate_prev_byte = ptr[-1];
1614 if (! CHAR_HEAD_P (translate_prev_byte))
1615 translate_anteprev_byte = ptr[-2];
1616 }
1617 }
1618 else
1619 {
1620 this_translated = 0;
1621 ch = *ptr;
1622 }
1623 }
1624 else if (!multibyte)
1625 TRANSLATE (ch, trt, *ptr);
1626 else
1627 {
1628 ch = *ptr;
1629 this_translated = 0;
1630 }
1631
1632 if (ch > 0400)
1633 j = ((unsigned char) ch) | 0200;
1634 else
1635 j = (unsigned char) ch;
1636
1637 if (i == infinity)
1638 stride_for_teases = BM_tab[j];
1639
1640 BM_tab[j] = dirlen - i;
1641 /* A translation table is accompanied by its inverse -- see */
1642 /* comment following downcase_table for details */
1643 if (this_translated)
1644 {
1645 int starting_ch = ch;
1646 int starting_j = j;
1647 while (1)
1648 {
1649 TRANSLATE (ch, inverse_trt, ch);
1650 if (ch > 0400)
1651 j = ((unsigned char) ch) | 0200;
1652 else
1653 j = (unsigned char) ch;
1654
1655 /* For all the characters that map into CH,
1656 set up simple_translate to map the last byte
1657 into STARTING_J. */
1658 simple_translate[j] = starting_j;
1659 if (ch == starting_ch)
1660 break;
1661 BM_tab[j] = dirlen - i;
1662 }
1663 }
1664 }
1665 else
1666 {
1667 j = *ptr;
1668
1669 if (i == infinity)
1670 stride_for_teases = BM_tab[j];
1671 BM_tab[j] = dirlen - i;
1672 }
1673 /* stride_for_teases tells how much to stride if we get a */
1674 /* match on the far character but are subsequently */
1675 /* disappointed, by recording what the stride would have been */
1676 /* for that character if the last character had been */
1677 /* different. */
1678 }
1679 infinity = dirlen - infinity;
1680 pos_byte += dirlen - ((direction > 0) ? direction : 0);
1681 /* loop invariant - POS_BYTE points at where last char (first
1682 char if reverse) of pattern would align in a possible match. */
1683 while (n != 0)
1684 {
1685 int tail_end;
1686 unsigned char *tail_end_ptr;
1687
1688 /* It's been reported that some (broken) compiler thinks that
1689 Boolean expressions in an arithmetic context are unsigned.
1690 Using an explicit ?1:0 prevents this. */
1691 if ((lim_byte - pos_byte - ((direction > 0) ? 1 : 0)) * direction
1692 < 0)
1693 return (n * (0 - direction));
1694 /* First we do the part we can by pointers (maybe nothing) */
1695 QUIT;
1696 pat = base_pat;
1697 limit = pos_byte - dirlen + direction;
1698 if (direction > 0)
1699 {
1700 limit = BUFFER_CEILING_OF (limit);
1701 /* LIMIT is now the last (not beyond-last!) value POS_BYTE
1702 can take on without hitting edge of buffer or the gap. */
1703 limit = min (limit, pos_byte + 20000);
1704 limit = min (limit, lim_byte - 1);
1705 }
1706 else
1707 {
1708 limit = BUFFER_FLOOR_OF (limit);
1709 /* LIMIT is now the last (not beyond-last!) value POS_BYTE
1710 can take on without hitting edge of buffer or the gap. */
1711 limit = max (limit, pos_byte - 20000);
1712 limit = max (limit, lim_byte);
1713 }
1714 tail_end = BUFFER_CEILING_OF (pos_byte) + 1;
1715 tail_end_ptr = BYTE_POS_ADDR (tail_end);
1716
1717 if ((limit - pos_byte) * direction > 20)
1718 {
1719 unsigned char *p2;
1720
1721 p_limit = BYTE_POS_ADDR (limit);
1722 p2 = (cursor = BYTE_POS_ADDR (pos_byte));
1723 /* In this loop, pos + cursor - p2 is the surrogate for pos */
1724 while (1) /* use one cursor setting as long as i can */
1725 {
1726 if (direction > 0) /* worth duplicating */
1727 {
1728 /* Use signed comparison if appropriate
1729 to make cursor+infinity sure to be > p_limit.
1730 Assuming that the buffer lies in a range of addresses
1731 that are all "positive" (as ints) or all "negative",
1732 either kind of comparison will work as long
1733 as we don't step by infinity. So pick the kind
1734 that works when we do step by infinity. */
1735 if ((EMACS_INT) (p_limit + infinity) > (EMACS_INT) p_limit)
1736 while ((EMACS_INT) cursor <= (EMACS_INT) p_limit)
1737 cursor += BM_tab[*cursor];
1738 else
1739 while ((EMACS_UINT) cursor <= (EMACS_UINT) p_limit)
1740 cursor += BM_tab[*cursor];
1741 }
1742 else
1743 {
1744 if ((EMACS_INT) (p_limit + infinity) < (EMACS_INT) p_limit)
1745 while ((EMACS_INT) cursor >= (EMACS_INT) p_limit)
1746 cursor += BM_tab[*cursor];
1747 else
1748 while ((EMACS_UINT) cursor >= (EMACS_UINT) p_limit)
1749 cursor += BM_tab[*cursor];
1750 }
1751 /* If you are here, cursor is beyond the end of the searched region. */
1752 /* This can happen if you match on the far character of the pattern, */
1753 /* because the "stride" of that character is infinity, a number able */
1754 /* to throw you well beyond the end of the search. It can also */
1755 /* happen if you fail to match within the permitted region and would */
1756 /* otherwise try a character beyond that region */
1757 if ((cursor - p_limit) * direction <= len_byte)
1758 break; /* a small overrun is genuine */
1759 cursor -= infinity; /* large overrun = hit */
1760 i = dirlen - direction;
1761 if (! NILP (trt))
1762 {
1763 while ((i -= direction) + direction != 0)
1764 {
1765 int ch;
1766 cursor -= direction;
1767 /* Translate only the last byte of a character. */
1768 if (! multibyte
1769 || ((cursor == tail_end_ptr
1770 || CHAR_HEAD_P (cursor[1]))
1771 && (CHAR_HEAD_P (cursor[0])
1772 || (translate_prev_byte == cursor[-1]
1773 && (CHAR_HEAD_P (translate_prev_byte)
1774 || translate_anteprev_byte == cursor[-2])))))
1775 ch = simple_translate[*cursor];
1776 else
1777 ch = *cursor;
1778 if (pat[i] != ch)
1779 break;
1780 }
1781 }
1782 else
1783 {
1784 while ((i -= direction) + direction != 0)
1785 {
1786 cursor -= direction;
1787 if (pat[i] != *cursor)
1788 break;
1789 }
1790 }
1791 cursor += dirlen - i - direction; /* fix cursor */
1792 if (i + direction == 0)
1793 {
1794 int position;
1795
1796 cursor -= direction;
1797
1798 position = pos_byte + cursor - p2 + ((direction > 0)
1799 ? 1 - len_byte : 0);
1800 set_search_regs (position, len_byte);
1801
1802 if ((n -= direction) != 0)
1803 cursor += dirlen; /* to resume search */
1804 else
1805 return ((direction > 0)
1806 ? search_regs.end[0] : search_regs.start[0]);
1807 }
1808 else
1809 cursor += stride_for_teases; /* <sigh> we lose - */
1810 }
1811 pos_byte += cursor - p2;
1812 }
1813 else
1814 /* Now we'll pick up a clump that has to be done the hard */
1815 /* way because it covers a discontinuity */
1816 {
1817 limit = ((direction > 0)
1818 ? BUFFER_CEILING_OF (pos_byte - dirlen + 1)
1819 : BUFFER_FLOOR_OF (pos_byte - dirlen - 1));
1820 limit = ((direction > 0)
1821 ? min (limit + len_byte, lim_byte - 1)
1822 : max (limit - len_byte, lim_byte));
1823 /* LIMIT is now the last value POS_BYTE can have
1824 and still be valid for a possible match. */
1825 while (1)
1826 {
1827 /* This loop can be coded for space rather than */
1828 /* speed because it will usually run only once. */
1829 /* (the reach is at most len + 21, and typically */
1830 /* does not exceed len) */
1831 while ((limit - pos_byte) * direction >= 0)
1832 pos_byte += BM_tab[FETCH_BYTE (pos_byte)];
1833 /* now run the same tests to distinguish going off the */
1834 /* end, a match or a phony match. */
1835 if ((pos_byte - limit) * direction <= len_byte)
1836 break; /* ran off the end */
1837 /* Found what might be a match.
1838 Set POS_BYTE back to last (first if reverse) pos. */
1839 pos_byte -= infinity;
1840 i = dirlen - direction;
1841 while ((i -= direction) + direction != 0)
1842 {
1843 int ch;
1844 unsigned char *ptr;
1845 pos_byte -= direction;
1846 ptr = BYTE_POS_ADDR (pos_byte);
1847 /* Translate only the last byte of a character. */
1848 if (! multibyte
1849 || ((ptr == tail_end_ptr
1850 || CHAR_HEAD_P (ptr[1]))
1851 && (CHAR_HEAD_P (ptr[0])
1852 || (translate_prev_byte == ptr[-1]
1853 && (CHAR_HEAD_P (translate_prev_byte)
1854 || translate_anteprev_byte == ptr[-2])))))
1855 ch = simple_translate[*ptr];
1856 else
1857 ch = *ptr;
1858 if (pat[i] != ch)
1859 break;
1860 }
1861 /* Above loop has moved POS_BYTE part or all the way
1862 back to the first pos (last pos if reverse).
1863 Set it once again at the last (first if reverse) char. */
1864 pos_byte += dirlen - i- direction;
1865 if (i + direction == 0)
1866 {
1867 int position;
1868 pos_byte -= direction;
1869
1870 position = pos_byte + ((direction > 0) ? 1 - len_byte : 0);
1871
1872 set_search_regs (position, len_byte);
1873
1874 if ((n -= direction) != 0)
1875 pos_byte += dirlen; /* to resume search */
1876 else
1877 return ((direction > 0)
1878 ? search_regs.end[0] : search_regs.start[0]);
1879 }
1880 else
1881 pos_byte += stride_for_teases;
1882 }
1883 }
1884 /* We have done one clump. Can we continue? */
1885 if ((lim_byte - pos_byte) * direction < 0)
1886 return ((0 - n) * direction);
1887 }
1888 return BYTE_TO_CHAR (pos_byte);
1889 }
1890
1891 /* Record beginning BEG_BYTE and end BEG_BYTE + NBYTES
1892 for a match just found in the current buffer. */
1893
1894 static void
1895 set_search_regs (beg_byte, nbytes)
1896 int beg_byte, nbytes;
1897 {
1898 /* Make sure we have registers in which to store
1899 the match position. */
1900 if (search_regs.num_regs == 0)
1901 {
1902 search_regs.start = (regoff_t *) xmalloc (2 * sizeof (regoff_t));
1903 search_regs.end = (regoff_t *) xmalloc (2 * sizeof (regoff_t));
1904 search_regs.num_regs = 2;
1905 }
1906
1907 search_regs.start[0] = BYTE_TO_CHAR (beg_byte);
1908 search_regs.end[0] = BYTE_TO_CHAR (beg_byte + nbytes);
1909 XSETBUFFER (last_thing_searched, current_buffer);
1910 }
1911 \f
1912 /* Given a string of words separated by word delimiters,
1913 compute a regexp that matches those exact words
1914 separated by arbitrary punctuation. */
1915
1916 static Lisp_Object
1917 wordify (string)
1918 Lisp_Object string;
1919 {
1920 register unsigned char *p, *o;
1921 register int i, i_byte, len, punct_count = 0, word_count = 0;
1922 Lisp_Object val;
1923 int prev_c = 0;
1924 int adjust;
1925
1926 CHECK_STRING (string, 0);
1927 p = XSTRING (string)->data;
1928 len = XSTRING (string)->size;
1929
1930 for (i = 0, i_byte = 0; i < len; )
1931 {
1932 int c;
1933
1934 if (STRING_MULTIBYTE (string))
1935 FETCH_STRING_CHAR_ADVANCE (c, string, i, i_byte);
1936 else
1937 c = XSTRING (string)->data[i++];
1938
1939 if (SYNTAX (c) != Sword)
1940 {
1941 punct_count++;
1942 if (i > 0 && SYNTAX (prev_c) == Sword)
1943 word_count++;
1944 }
1945
1946 prev_c = c;
1947 }
1948
1949 if (SYNTAX (prev_c) == Sword)
1950 word_count++;
1951 if (!word_count)
1952 return build_string ("");
1953
1954 adjust = - punct_count + 5 * (word_count - 1) + 4;
1955 val = make_uninit_multibyte_string (len + adjust,
1956 STRING_BYTES (XSTRING (string)) + adjust);
1957
1958 o = XSTRING (val)->data;
1959 *o++ = '\\';
1960 *o++ = 'b';
1961
1962 for (i = 0; i < STRING_BYTES (XSTRING (val)); i++)
1963 if (SYNTAX (p[i]) == Sword)
1964 *o++ = p[i];
1965 else if (i > 0 && SYNTAX (p[i-1]) == Sword && --word_count)
1966 {
1967 *o++ = '\\';
1968 *o++ = 'W';
1969 *o++ = '\\';
1970 *o++ = 'W';
1971 *o++ = '*';
1972 }
1973
1974 *o++ = '\\';
1975 *o++ = 'b';
1976
1977 return val;
1978 }
1979 \f
1980 DEFUN ("search-backward", Fsearch_backward, Ssearch_backward, 1, 4,
1981 "MSearch backward: ",
1982 "Search backward from point for STRING.\n\
1983 Set point to the beginning of the occurrence found, and return point.\n\
1984 An optional second argument bounds the search; it is a buffer position.\n\
1985 The match found must not extend before that position.\n\
1986 Optional third argument, if t, means if fail just return nil (no error).\n\
1987 If not nil and not t, position at limit of search and return nil.\n\
1988 Optional fourth argument is repeat count--search for successive occurrences.\n\
1989 See also the functions `match-beginning', `match-end' and `replace-match'.")
1990 (string, bound, noerror, count)
1991 Lisp_Object string, bound, noerror, count;
1992 {
1993 return search_command (string, bound, noerror, count, -1, 0, 0);
1994 }
1995
1996 DEFUN ("search-forward", Fsearch_forward, Ssearch_forward, 1, 4, "MSearch: ",
1997 "Search forward from point for STRING.\n\
1998 Set point to the end of the occurrence found, and return point.\n\
1999 An optional second argument bounds the search; it is a buffer position.\n\
2000 The match found must not extend after that position. nil is equivalent\n\
2001 to (point-max).\n\
2002 Optional third argument, if t, means if fail just return nil (no error).\n\
2003 If not nil and not t, move to limit of search and return nil.\n\
2004 Optional fourth argument is repeat count--search for successive occurrences.\n\
2005 See also the functions `match-beginning', `match-end' and `replace-match'.")
2006 (string, bound, noerror, count)
2007 Lisp_Object string, bound, noerror, count;
2008 {
2009 return search_command (string, bound, noerror, count, 1, 0, 0);
2010 }
2011
2012 DEFUN ("word-search-backward", Fword_search_backward, Sword_search_backward, 1, 4,
2013 "sWord search backward: ",
2014 "Search backward from point for STRING, ignoring differences in punctuation.\n\
2015 Set point to the beginning of the occurrence found, and return point.\n\
2016 An optional second argument bounds the search; it is a buffer position.\n\
2017 The match found must not extend before that position.\n\
2018 Optional third argument, if t, means if fail just return nil (no error).\n\
2019 If not nil and not t, move to limit of search and return nil.\n\
2020 Optional fourth argument is repeat count--search for successive occurrences.")
2021 (string, bound, noerror, count)
2022 Lisp_Object string, bound, noerror, count;
2023 {
2024 return search_command (wordify (string), bound, noerror, count, -1, 1, 0);
2025 }
2026
2027 DEFUN ("word-search-forward", Fword_search_forward, Sword_search_forward, 1, 4,
2028 "sWord search: ",
2029 "Search forward from point for STRING, ignoring differences in punctuation.\n\
2030 Set point to the end of the occurrence found, and return point.\n\
2031 An optional second argument bounds the search; it is a buffer position.\n\
2032 The match found must not extend after that position.\n\
2033 Optional third argument, if t, means if fail just return nil (no error).\n\
2034 If not nil and not t, move to limit of search and return nil.\n\
2035 Optional fourth argument is repeat count--search for successive occurrences.")
2036 (string, bound, noerror, count)
2037 Lisp_Object string, bound, noerror, count;
2038 {
2039 return search_command (wordify (string), bound, noerror, count, 1, 1, 0);
2040 }
2041
2042 DEFUN ("re-search-backward", Fre_search_backward, Sre_search_backward, 1, 4,
2043 "sRE search backward: ",
2044 "Search backward from point for match for regular expression REGEXP.\n\
2045 Set point to the beginning of the match, and return point.\n\
2046 The match found is the one starting last in the buffer\n\
2047 and yet ending before the origin of the search.\n\
2048 An optional second argument bounds the search; it is a buffer position.\n\
2049 The match found must start at or after that position.\n\
2050 Optional third argument, if t, means if fail just return nil (no error).\n\
2051 If not nil and not t, move to limit of search and return nil.\n\
2052 Optional fourth argument is repeat count--search for successive occurrences.\n\
2053 See also the functions `match-beginning', `match-end' and `replace-match'.")
2054 (regexp, bound, noerror, count)
2055 Lisp_Object regexp, bound, noerror, count;
2056 {
2057 return search_command (regexp, bound, noerror, count, -1, 1, 0);
2058 }
2059
2060 DEFUN ("re-search-forward", Fre_search_forward, Sre_search_forward, 1, 4,
2061 "sRE search: ",
2062 "Search forward from point for regular expression REGEXP.\n\
2063 Set point to the end of the occurrence found, and return point.\n\
2064 An optional second argument bounds the search; it is a buffer position.\n\
2065 The match found must not extend after that position.\n\
2066 Optional third argument, if t, means if fail just return nil (no error).\n\
2067 If not nil and not t, move to limit of search and return nil.\n\
2068 Optional fourth argument is repeat count--search for successive occurrences.\n\
2069 See also the functions `match-beginning', `match-end' and `replace-match'.")
2070 (regexp, bound, noerror, count)
2071 Lisp_Object regexp, bound, noerror, count;
2072 {
2073 return search_command (regexp, bound, noerror, count, 1, 1, 0);
2074 }
2075
2076 DEFUN ("posix-search-backward", Fposix_search_backward, Sposix_search_backward, 1, 4,
2077 "sPosix search backward: ",
2078 "Search backward from point for match for regular expression REGEXP.\n\
2079 Find the longest match in accord with Posix regular expression rules.\n\
2080 Set point to the beginning of the match, and return point.\n\
2081 The match found is the one starting last in the buffer\n\
2082 and yet ending before the origin of the search.\n\
2083 An optional second argument bounds the search; it is a buffer position.\n\
2084 The match found must start at or after that position.\n\
2085 Optional third argument, if t, means if fail just return nil (no error).\n\
2086 If not nil and not t, move to limit of search and return nil.\n\
2087 Optional fourth argument is repeat count--search for successive occurrences.\n\
2088 See also the functions `match-beginning', `match-end' and `replace-match'.")
2089 (regexp, bound, noerror, count)
2090 Lisp_Object regexp, bound, noerror, count;
2091 {
2092 return search_command (regexp, bound, noerror, count, -1, 1, 1);
2093 }
2094
2095 DEFUN ("posix-search-forward", Fposix_search_forward, Sposix_search_forward, 1, 4,
2096 "sPosix search: ",
2097 "Search forward from point for regular expression REGEXP.\n\
2098 Find the longest match in accord with Posix regular expression rules.\n\
2099 Set point to the end of the occurrence found, and return point.\n\
2100 An optional second argument bounds the search; it is a buffer position.\n\
2101 The match found must not extend after that position.\n\
2102 Optional third argument, if t, means if fail just return nil (no error).\n\
2103 If not nil and not t, move to limit of search and return nil.\n\
2104 Optional fourth argument is repeat count--search for successive occurrences.\n\
2105 See also the functions `match-beginning', `match-end' and `replace-match'.")
2106 (regexp, bound, noerror, count)
2107 Lisp_Object regexp, bound, noerror, count;
2108 {
2109 return search_command (regexp, bound, noerror, count, 1, 1, 1);
2110 }
2111 \f
2112 DEFUN ("replace-match", Freplace_match, Sreplace_match, 1, 5, 0,
2113 "Replace text matched by last search with NEWTEXT.\n\
2114 If second arg FIXEDCASE is non-nil, do not alter case of replacement text.\n\
2115 Otherwise maybe capitalize the whole text, or maybe just word initials,\n\
2116 based on the replaced text.\n\
2117 If the replaced text has only capital letters\n\
2118 and has at least one multiletter word, convert NEWTEXT to all caps.\n\
2119 If the replaced text has at least one word starting with a capital letter,\n\
2120 then capitalize each word in NEWTEXT.\n\n\
2121 If third arg LITERAL is non-nil, insert NEWTEXT literally.\n\
2122 Otherwise treat `\\' as special:\n\
2123 `\\&' in NEWTEXT means substitute original matched text.\n\
2124 `\\N' means substitute what matched the Nth `\\(...\\)'.\n\
2125 If Nth parens didn't match, substitute nothing.\n\
2126 `\\\\' means insert one `\\'.\n\
2127 FIXEDCASE and LITERAL are optional arguments.\n\
2128 Leaves point at end of replacement text.\n\
2129 \n\
2130 The optional fourth argument STRING can be a string to modify.\n\
2131 In that case, this function creates and returns a new string\n\
2132 which is made by replacing the part of STRING that was matched.\n\
2133 \n\
2134 The optional fifth argument SUBEXP specifies a subexpression of the match.\n\
2135 It says to replace just that subexpression instead of the whole match.\n\
2136 This is useful only after a regular expression search or match\n\
2137 since only regular expressions have distinguished subexpressions.")
2138 (newtext, fixedcase, literal, string, subexp)
2139 Lisp_Object newtext, fixedcase, literal, string, subexp;
2140 {
2141 enum { nochange, all_caps, cap_initial } case_action;
2142 register int pos, last;
2143 int some_multiletter_word;
2144 int some_lowercase;
2145 int some_uppercase;
2146 int some_nonuppercase_initial;
2147 register int c, prevc;
2148 int inslen;
2149 int sub;
2150 int opoint, newpoint;
2151
2152 CHECK_STRING (newtext, 0);
2153
2154 if (! NILP (string))
2155 CHECK_STRING (string, 4);
2156
2157 case_action = nochange; /* We tried an initialization */
2158 /* but some C compilers blew it */
2159
2160 if (search_regs.num_regs <= 0)
2161 error ("replace-match called before any match found");
2162
2163 if (NILP (subexp))
2164 sub = 0;
2165 else
2166 {
2167 CHECK_NUMBER (subexp, 3);
2168 sub = XINT (subexp);
2169 if (sub < 0 || sub >= search_regs.num_regs)
2170 args_out_of_range (subexp, make_number (search_regs.num_regs));
2171 }
2172
2173 if (NILP (string))
2174 {
2175 if (search_regs.start[sub] < BEGV
2176 || search_regs.start[sub] > search_regs.end[sub]
2177 || search_regs.end[sub] > ZV)
2178 args_out_of_range (make_number (search_regs.start[sub]),
2179 make_number (search_regs.end[sub]));
2180 }
2181 else
2182 {
2183 if (search_regs.start[sub] < 0
2184 || search_regs.start[sub] > search_regs.end[sub]
2185 || search_regs.end[sub] > XSTRING (string)->size)
2186 args_out_of_range (make_number (search_regs.start[sub]),
2187 make_number (search_regs.end[sub]));
2188 }
2189
2190 if (NILP (fixedcase))
2191 {
2192 int beg;
2193 /* Decide how to casify by examining the matched text. */
2194
2195 if (NILP (string))
2196 last = CHAR_TO_BYTE (search_regs.end[sub]);
2197 else
2198 last = search_regs.end[sub];
2199
2200 if (NILP (string))
2201 beg = CHAR_TO_BYTE (search_regs.start[sub]);
2202 else
2203 beg = search_regs.start[sub];
2204
2205 prevc = '\n';
2206 case_action = all_caps;
2207
2208 /* some_multiletter_word is set nonzero if any original word
2209 is more than one letter long. */
2210 some_multiletter_word = 0;
2211 some_lowercase = 0;
2212 some_nonuppercase_initial = 0;
2213 some_uppercase = 0;
2214
2215 for (pos = beg; pos < last; pos++)
2216 {
2217 if (NILP (string))
2218 c = FETCH_BYTE (pos);
2219 else
2220 c = XSTRING (string)->data[pos];
2221
2222 if (LOWERCASEP (c))
2223 {
2224 /* Cannot be all caps if any original char is lower case */
2225
2226 some_lowercase = 1;
2227 if (SYNTAX (prevc) != Sword)
2228 some_nonuppercase_initial = 1;
2229 else
2230 some_multiletter_word = 1;
2231 }
2232 else if (!NOCASEP (c))
2233 {
2234 some_uppercase = 1;
2235 if (SYNTAX (prevc) != Sword)
2236 ;
2237 else
2238 some_multiletter_word = 1;
2239 }
2240 else
2241 {
2242 /* If the initial is a caseless word constituent,
2243 treat that like a lowercase initial. */
2244 if (SYNTAX (prevc) != Sword)
2245 some_nonuppercase_initial = 1;
2246 }
2247
2248 prevc = c;
2249 }
2250
2251 /* Convert to all caps if the old text is all caps
2252 and has at least one multiletter word. */
2253 if (! some_lowercase && some_multiletter_word)
2254 case_action = all_caps;
2255 /* Capitalize each word, if the old text has all capitalized words. */
2256 else if (!some_nonuppercase_initial && some_multiletter_word)
2257 case_action = cap_initial;
2258 else if (!some_nonuppercase_initial && some_uppercase)
2259 /* Should x -> yz, operating on X, give Yz or YZ?
2260 We'll assume the latter. */
2261 case_action = all_caps;
2262 else
2263 case_action = nochange;
2264 }
2265
2266 /* Do replacement in a string. */
2267 if (!NILP (string))
2268 {
2269 Lisp_Object before, after;
2270
2271 before = Fsubstring (string, make_number (0),
2272 make_number (search_regs.start[sub]));
2273 after = Fsubstring (string, make_number (search_regs.end[sub]), Qnil);
2274
2275 /* Substitute parts of the match into NEWTEXT
2276 if desired. */
2277 if (NILP (literal))
2278 {
2279 int lastpos = -1;
2280 int lastpos_byte = -1;
2281 /* We build up the substituted string in ACCUM. */
2282 Lisp_Object accum;
2283 Lisp_Object middle;
2284 int pos_byte;
2285
2286 accum = Qnil;
2287
2288 for (pos_byte = 0, pos = 0; pos_byte < STRING_BYTES (XSTRING (newtext));)
2289 {
2290 int substart = -1;
2291 int subend;
2292 int delbackslash = 0;
2293
2294 FETCH_STRING_CHAR_ADVANCE (c, newtext, pos, pos_byte);
2295
2296 if (c == '\\')
2297 {
2298 FETCH_STRING_CHAR_ADVANCE (c, newtext, pos, pos_byte);
2299 if (c == '&')
2300 {
2301 substart = search_regs.start[sub];
2302 subend = search_regs.end[sub];
2303 }
2304 else if (c >= '1' && c <= '9' && c <= search_regs.num_regs + '0')
2305 {
2306 if (search_regs.start[c - '0'] >= 0)
2307 {
2308 substart = search_regs.start[c - '0'];
2309 subend = search_regs.end[c - '0'];
2310 }
2311 }
2312 else if (c == '\\')
2313 delbackslash = 1;
2314 else
2315 error ("Invalid use of `\\' in replacement text");
2316 }
2317 if (substart >= 0)
2318 {
2319 if (pos - 1 != lastpos + 1)
2320 middle = substring_both (newtext, lastpos + 1,
2321 lastpos_byte + 1,
2322 pos - 1, pos_byte - 1);
2323 else
2324 middle = Qnil;
2325 accum = concat3 (accum, middle,
2326 Fsubstring (string,
2327 make_number (substart),
2328 make_number (subend)));
2329 lastpos = pos;
2330 lastpos_byte = pos_byte;
2331 }
2332 else if (delbackslash)
2333 {
2334 middle = substring_both (newtext, lastpos + 1,
2335 lastpos_byte + 1,
2336 pos, pos_byte);
2337
2338 accum = concat2 (accum, middle);
2339 lastpos = pos;
2340 lastpos_byte = pos_byte;
2341 }
2342 }
2343
2344 if (pos != lastpos + 1)
2345 middle = substring_both (newtext, lastpos + 1,
2346 lastpos_byte + 1,
2347 pos, pos_byte);
2348 else
2349 middle = Qnil;
2350
2351 newtext = concat2 (accum, middle);
2352 }
2353
2354 /* Do case substitution in NEWTEXT if desired. */
2355 if (case_action == all_caps)
2356 newtext = Fupcase (newtext);
2357 else if (case_action == cap_initial)
2358 newtext = Fupcase_initials (newtext);
2359
2360 return concat3 (before, newtext, after);
2361 }
2362
2363 /* Record point, the move (quietly) to the start of the match. */
2364 if (PT > search_regs.start[sub])
2365 opoint = PT - ZV;
2366 else
2367 opoint = PT;
2368
2369 TEMP_SET_PT (search_regs.start[sub]);
2370
2371 /* We insert the replacement text before the old text, and then
2372 delete the original text. This means that markers at the
2373 beginning or end of the original will float to the corresponding
2374 position in the replacement. */
2375 if (!NILP (literal))
2376 Finsert_and_inherit (1, &newtext);
2377 else
2378 {
2379 struct gcpro gcpro1;
2380 GCPRO1 (newtext);
2381
2382 for (pos = 0; pos < XSTRING (newtext)->size; pos++)
2383 {
2384 int offset = PT - search_regs.start[sub];
2385
2386 c = XSTRING (newtext)->data[pos];
2387 if (c == '\\')
2388 {
2389 c = XSTRING (newtext)->data[++pos];
2390 if (c == '&')
2391 Finsert_buffer_substring
2392 (Fcurrent_buffer (),
2393 make_number (search_regs.start[sub] + offset),
2394 make_number (search_regs.end[sub] + offset));
2395 else if (c >= '1' && c <= '9' && c <= search_regs.num_regs + '0')
2396 {
2397 if (search_regs.start[c - '0'] >= 1)
2398 Finsert_buffer_substring
2399 (Fcurrent_buffer (),
2400 make_number (search_regs.start[c - '0'] + offset),
2401 make_number (search_regs.end[c - '0'] + offset));
2402 }
2403 else if (c == '\\')
2404 insert_char (c);
2405 else
2406 error ("Invalid use of `\\' in replacement text");
2407 }
2408 else
2409 insert_char (c);
2410 }
2411 UNGCPRO;
2412 }
2413
2414 inslen = PT - (search_regs.start[sub]);
2415 del_range (search_regs.start[sub] + inslen, search_regs.end[sub] + inslen);
2416
2417 if (case_action == all_caps)
2418 Fupcase_region (make_number (PT - inslen), make_number (PT));
2419 else if (case_action == cap_initial)
2420 Fupcase_initials_region (make_number (PT - inslen), make_number (PT));
2421
2422 newpoint = PT;
2423
2424 /* Put point back where it was in the text. */
2425 if (opoint <= 0)
2426 TEMP_SET_PT (opoint + ZV);
2427 else
2428 TEMP_SET_PT (opoint);
2429
2430 /* Now move point "officially" to the start of the inserted replacement. */
2431 move_if_not_intangible (newpoint);
2432
2433 return Qnil;
2434 }
2435 \f
2436 static Lisp_Object
2437 match_limit (num, beginningp)
2438 Lisp_Object num;
2439 int beginningp;
2440 {
2441 register int n;
2442
2443 CHECK_NUMBER (num, 0);
2444 n = XINT (num);
2445 if (n < 0 || n >= search_regs.num_regs)
2446 args_out_of_range (num, make_number (search_regs.num_regs));
2447 if (search_regs.num_regs <= 0
2448 || search_regs.start[n] < 0)
2449 return Qnil;
2450 return (make_number ((beginningp) ? search_regs.start[n]
2451 : search_regs.end[n]));
2452 }
2453
2454 DEFUN ("match-beginning", Fmatch_beginning, Smatch_beginning, 1, 1, 0,
2455 "Return position of start of text matched by last search.\n\
2456 SUBEXP, a number, specifies which parenthesized expression in the last\n\
2457 regexp.\n\
2458 Value is nil if SUBEXPth pair didn't match, or there were less than\n\
2459 SUBEXP pairs.\n\
2460 Zero means the entire text matched by the whole regexp or whole string.")
2461 (subexp)
2462 Lisp_Object subexp;
2463 {
2464 return match_limit (subexp, 1);
2465 }
2466
2467 DEFUN ("match-end", Fmatch_end, Smatch_end, 1, 1, 0,
2468 "Return position of end of text matched by last search.\n\
2469 SUBEXP, a number, specifies which parenthesized expression in the last\n\
2470 regexp.\n\
2471 Value is nil if SUBEXPth pair didn't match, or there were less than\n\
2472 SUBEXP pairs.\n\
2473 Zero means the entire text matched by the whole regexp or whole string.")
2474 (subexp)
2475 Lisp_Object subexp;
2476 {
2477 return match_limit (subexp, 0);
2478 }
2479
2480 DEFUN ("match-data", Fmatch_data, Smatch_data, 0, 2, 0,
2481 "Return a list containing all info on what the last search matched.\n\
2482 Element 2N is `(match-beginning N)'; element 2N + 1 is `(match-end N)'.\n\
2483 All the elements are markers or nil (nil if the Nth pair didn't match)\n\
2484 if the last match was on a buffer; integers or nil if a string was matched.\n\
2485 Use `store-match-data' to reinstate the data in this list.\n\
2486 \n\
2487 If INTEGERS (the optional first argument) is non-nil, always use integers\n\
2488 \(rather than markers) to represent buffer positions.\n\
2489 If REUSE is a list, reuse it as part of the value. If REUSE is long enough\n\
2490 to hold all the values, and if INTEGERS is non-nil, no consing is done.")
2491 (integers, reuse)
2492 Lisp_Object integers, reuse;
2493 {
2494 Lisp_Object tail, prev;
2495 Lisp_Object *data;
2496 int i, len;
2497
2498 if (NILP (last_thing_searched))
2499 return Qnil;
2500
2501 data = (Lisp_Object *) alloca ((2 * search_regs.num_regs)
2502 * sizeof (Lisp_Object));
2503
2504 len = -1;
2505 for (i = 0; i < search_regs.num_regs; i++)
2506 {
2507 int start = search_regs.start[i];
2508 if (start >= 0)
2509 {
2510 if (EQ (last_thing_searched, Qt)
2511 || ! NILP (integers))
2512 {
2513 XSETFASTINT (data[2 * i], start);
2514 XSETFASTINT (data[2 * i + 1], search_regs.end[i]);
2515 }
2516 else if (BUFFERP (last_thing_searched))
2517 {
2518 data[2 * i] = Fmake_marker ();
2519 Fset_marker (data[2 * i],
2520 make_number (start),
2521 last_thing_searched);
2522 data[2 * i + 1] = Fmake_marker ();
2523 Fset_marker (data[2 * i + 1],
2524 make_number (search_regs.end[i]),
2525 last_thing_searched);
2526 }
2527 else
2528 /* last_thing_searched must always be Qt, a buffer, or Qnil. */
2529 abort ();
2530
2531 len = i;
2532 }
2533 else
2534 data[2 * i] = data [2 * i + 1] = Qnil;
2535 }
2536
2537 /* If REUSE is not usable, cons up the values and return them. */
2538 if (! CONSP (reuse))
2539 return Flist (2 * len + 2, data);
2540
2541 /* If REUSE is a list, store as many value elements as will fit
2542 into the elements of REUSE. */
2543 for (i = 0, tail = reuse; CONSP (tail);
2544 i++, tail = XCONS (tail)->cdr)
2545 {
2546 if (i < 2 * len + 2)
2547 XCONS (tail)->car = data[i];
2548 else
2549 XCONS (tail)->car = Qnil;
2550 prev = tail;
2551 }
2552
2553 /* If we couldn't fit all value elements into REUSE,
2554 cons up the rest of them and add them to the end of REUSE. */
2555 if (i < 2 * len + 2)
2556 XCONS (prev)->cdr = Flist (2 * len + 2 - i, data + i);
2557
2558 return reuse;
2559 }
2560
2561
2562 DEFUN ("set-match-data", Fset_match_data, Sset_match_data, 1, 1, 0,
2563 "Set internal data on last search match from elements of LIST.\n\
2564 LIST should have been created by calling `match-data' previously.")
2565 (list)
2566 register Lisp_Object list;
2567 {
2568 register int i;
2569 register Lisp_Object marker;
2570
2571 if (running_asynch_code)
2572 save_search_regs ();
2573
2574 if (!CONSP (list) && !NILP (list))
2575 list = wrong_type_argument (Qconsp, list);
2576
2577 /* Unless we find a marker with a buffer in LIST, assume that this
2578 match data came from a string. */
2579 last_thing_searched = Qt;
2580
2581 /* Allocate registers if they don't already exist. */
2582 {
2583 int length = XFASTINT (Flength (list)) / 2;
2584
2585 if (length > search_regs.num_regs)
2586 {
2587 if (search_regs.num_regs == 0)
2588 {
2589 search_regs.start
2590 = (regoff_t *) xmalloc (length * sizeof (regoff_t));
2591 search_regs.end
2592 = (regoff_t *) xmalloc (length * sizeof (regoff_t));
2593 }
2594 else
2595 {
2596 search_regs.start
2597 = (regoff_t *) xrealloc (search_regs.start,
2598 length * sizeof (regoff_t));
2599 search_regs.end
2600 = (regoff_t *) xrealloc (search_regs.end,
2601 length * sizeof (regoff_t));
2602 }
2603
2604 search_regs.num_regs = length;
2605 }
2606 }
2607
2608 for (i = 0; i < search_regs.num_regs; i++)
2609 {
2610 marker = Fcar (list);
2611 if (NILP (marker))
2612 {
2613 search_regs.start[i] = -1;
2614 list = Fcdr (list);
2615 }
2616 else
2617 {
2618 if (MARKERP (marker))
2619 {
2620 if (XMARKER (marker)->buffer == 0)
2621 XSETFASTINT (marker, 0);
2622 else
2623 XSETBUFFER (last_thing_searched, XMARKER (marker)->buffer);
2624 }
2625
2626 CHECK_NUMBER_COERCE_MARKER (marker, 0);
2627 search_regs.start[i] = XINT (marker);
2628 list = Fcdr (list);
2629
2630 marker = Fcar (list);
2631 if (MARKERP (marker) && XMARKER (marker)->buffer == 0)
2632 XSETFASTINT (marker, 0);
2633
2634 CHECK_NUMBER_COERCE_MARKER (marker, 0);
2635 search_regs.end[i] = XINT (marker);
2636 }
2637 list = Fcdr (list);
2638 }
2639
2640 return Qnil;
2641 }
2642
2643 /* If non-zero the match data have been saved in saved_search_regs
2644 during the execution of a sentinel or filter. */
2645 static int search_regs_saved;
2646 static struct re_registers saved_search_regs;
2647
2648 /* Called from Flooking_at, Fstring_match, search_buffer, Fstore_match_data
2649 if asynchronous code (filter or sentinel) is running. */
2650 static void
2651 save_search_regs ()
2652 {
2653 if (!search_regs_saved)
2654 {
2655 saved_search_regs.num_regs = search_regs.num_regs;
2656 saved_search_regs.start = search_regs.start;
2657 saved_search_regs.end = search_regs.end;
2658 search_regs.num_regs = 0;
2659 search_regs.start = 0;
2660 search_regs.end = 0;
2661
2662 search_regs_saved = 1;
2663 }
2664 }
2665
2666 /* Called upon exit from filters and sentinels. */
2667 void
2668 restore_match_data ()
2669 {
2670 if (search_regs_saved)
2671 {
2672 if (search_regs.num_regs > 0)
2673 {
2674 xfree (search_regs.start);
2675 xfree (search_regs.end);
2676 }
2677 search_regs.num_regs = saved_search_regs.num_regs;
2678 search_regs.start = saved_search_regs.start;
2679 search_regs.end = saved_search_regs.end;
2680
2681 search_regs_saved = 0;
2682 }
2683 }
2684
2685 /* Quote a string to inactivate reg-expr chars */
2686
2687 DEFUN ("regexp-quote", Fregexp_quote, Sregexp_quote, 1, 1, 0,
2688 "Return a regexp string which matches exactly STRING and nothing else.")
2689 (string)
2690 Lisp_Object string;
2691 {
2692 register unsigned char *in, *out, *end;
2693 register unsigned char *temp;
2694 int backslashes_added = 0;
2695
2696 CHECK_STRING (string, 0);
2697
2698 temp = (unsigned char *) alloca (STRING_BYTES (XSTRING (string)) * 2);
2699
2700 /* Now copy the data into the new string, inserting escapes. */
2701
2702 in = XSTRING (string)->data;
2703 end = in + STRING_BYTES (XSTRING (string));
2704 out = temp;
2705
2706 for (; in != end; in++)
2707 {
2708 if (*in == '[' || *in == ']'
2709 || *in == '*' || *in == '.' || *in == '\\'
2710 || *in == '?' || *in == '+'
2711 || *in == '^' || *in == '$')
2712 *out++ = '\\', backslashes_added++;
2713 *out++ = *in;
2714 }
2715
2716 return make_specified_string (temp,
2717 XSTRING (string)->size + backslashes_added,
2718 out - temp,
2719 STRING_MULTIBYTE (string));
2720 }
2721 \f
2722 void
2723 syms_of_search ()
2724 {
2725 register int i;
2726
2727 for (i = 0; i < REGEXP_CACHE_SIZE; ++i)
2728 {
2729 searchbufs[i].buf.allocated = 100;
2730 searchbufs[i].buf.buffer = (unsigned char *) malloc (100);
2731 searchbufs[i].buf.fastmap = searchbufs[i].fastmap;
2732 searchbufs[i].regexp = Qnil;
2733 staticpro (&searchbufs[i].regexp);
2734 searchbufs[i].next = (i == REGEXP_CACHE_SIZE-1 ? 0 : &searchbufs[i+1]);
2735 }
2736 searchbuf_head = &searchbufs[0];
2737
2738 Qsearch_failed = intern ("search-failed");
2739 staticpro (&Qsearch_failed);
2740 Qinvalid_regexp = intern ("invalid-regexp");
2741 staticpro (&Qinvalid_regexp);
2742
2743 Fput (Qsearch_failed, Qerror_conditions,
2744 Fcons (Qsearch_failed, Fcons (Qerror, Qnil)));
2745 Fput (Qsearch_failed, Qerror_message,
2746 build_string ("Search failed"));
2747
2748 Fput (Qinvalid_regexp, Qerror_conditions,
2749 Fcons (Qinvalid_regexp, Fcons (Qerror, Qnil)));
2750 Fput (Qinvalid_regexp, Qerror_message,
2751 build_string ("Invalid regexp"));
2752
2753 last_thing_searched = Qnil;
2754 staticpro (&last_thing_searched);
2755
2756 defsubr (&Slooking_at);
2757 defsubr (&Sposix_looking_at);
2758 defsubr (&Sstring_match);
2759 defsubr (&Sposix_string_match);
2760 defsubr (&Ssearch_forward);
2761 defsubr (&Ssearch_backward);
2762 defsubr (&Sword_search_forward);
2763 defsubr (&Sword_search_backward);
2764 defsubr (&Sre_search_forward);
2765 defsubr (&Sre_search_backward);
2766 defsubr (&Sposix_search_forward);
2767 defsubr (&Sposix_search_backward);
2768 defsubr (&Sreplace_match);
2769 defsubr (&Smatch_beginning);
2770 defsubr (&Smatch_end);
2771 defsubr (&Smatch_data);
2772 defsubr (&Sset_match_data);
2773 defsubr (&Sregexp_quote);
2774 }