Use STRING_BYTES and SET_STRING_BYTES.
[bpt/emacs.git] / src / search.c
1 /* String search routines for GNU Emacs.
2 Copyright (C) 1985, 86, 87, 93, 94, 97, 1998 Free Software Foundation, Inc.
3
4 This file is part of GNU Emacs.
5
6 GNU Emacs is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2, or (at your option)
9 any later version.
10
11 GNU Emacs is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
15
16 You should have received a copy of the GNU General Public License
17 along with GNU Emacs; see the file COPYING. If not, write to
18 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
19 Boston, MA 02111-1307, USA. */
20
21
22 #include <config.h>
23 #include "lisp.h"
24 #include "syntax.h"
25 #include "category.h"
26 #include "buffer.h"
27 #include "charset.h"
28 #include "region-cache.h"
29 #include "commands.h"
30 #include "blockinput.h"
31 #include "intervals.h"
32
33 #include <sys/types.h>
34 #include "regex.h"
35
36 #define REGEXP_CACHE_SIZE 20
37
38 /* If the regexp is non-nil, then the buffer contains the compiled form
39 of that regexp, suitable for searching. */
40 struct regexp_cache
41 {
42 struct regexp_cache *next;
43 Lisp_Object regexp;
44 struct re_pattern_buffer buf;
45 char fastmap[0400];
46 /* Nonzero means regexp was compiled to do full POSIX backtracking. */
47 char posix;
48 };
49
50 /* The instances of that struct. */
51 struct regexp_cache searchbufs[REGEXP_CACHE_SIZE];
52
53 /* The head of the linked list; points to the most recently used buffer. */
54 struct regexp_cache *searchbuf_head;
55
56
57 /* Every call to re_match, etc., must pass &search_regs as the regs
58 argument unless you can show it is unnecessary (i.e., if re_match
59 is certainly going to be called again before region-around-match
60 can be called).
61
62 Since the registers are now dynamically allocated, we need to make
63 sure not to refer to the Nth register before checking that it has
64 been allocated by checking search_regs.num_regs.
65
66 The regex code keeps track of whether it has allocated the search
67 buffer using bits in the re_pattern_buffer. This means that whenever
68 you compile a new pattern, it completely forgets whether it has
69 allocated any registers, and will allocate new registers the next
70 time you call a searching or matching function. Therefore, we need
71 to call re_set_registers after compiling a new pattern or after
72 setting the match registers, so that the regex functions will be
73 able to free or re-allocate it properly. */
74 static struct re_registers search_regs;
75
76 /* The buffer in which the last search was performed, or
77 Qt if the last search was done in a string;
78 Qnil if no searching has been done yet. */
79 static Lisp_Object last_thing_searched;
80
81 /* error condition signaled when regexp compile_pattern fails */
82
83 Lisp_Object Qinvalid_regexp;
84
85 static void set_search_regs ();
86 static void save_search_regs ();
87 static int simple_search ();
88 static int boyer_moore ();
89 static int search_buffer ();
90
91 static void
92 matcher_overflow ()
93 {
94 error ("Stack overflow in regexp matcher");
95 }
96
97 #ifdef __STDC__
98 #define CONST const
99 #else
100 #define CONST
101 #endif
102
103 /* Compile a regexp and signal a Lisp error if anything goes wrong.
104 PATTERN is the pattern to compile.
105 CP is the place to put the result.
106 TRANSLATE is a translation table for ignoring case, or nil for none.
107 REGP is the structure that says where to store the "register"
108 values that will result from matching this pattern.
109 If it is 0, we should compile the pattern not to record any
110 subexpression bounds.
111 POSIX is nonzero if we want full backtracking (POSIX style)
112 for this pattern. 0 means backtrack only enough to get a valid match.
113 MULTIBYTE is nonzero if we want to handle multibyte characters in
114 PATTERN. 0 means all multibyte characters are recognized just as
115 sequences of binary data. */
116
117 static void
118 compile_pattern_1 (cp, pattern, translate, regp, posix, multibyte)
119 struct regexp_cache *cp;
120 Lisp_Object pattern;
121 Lisp_Object translate;
122 struct re_registers *regp;
123 int posix;
124 int multibyte;
125 {
126 char *raw_pattern;
127 int raw_pattern_size;
128 char *val;
129 reg_syntax_t old;
130
131 /* MULTIBYTE says whether the text to be searched is multibyte.
132 We must convert PATTERN to match that, or we will not really
133 find things right. */
134
135 if (multibyte == STRING_MULTIBYTE (pattern))
136 {
137 raw_pattern = (char *) XSTRING (pattern)->data;
138 raw_pattern_size = STRING_BYTES (XSTRING (pattern));
139 }
140 else if (multibyte)
141 {
142 raw_pattern_size = count_size_as_multibyte (XSTRING (pattern)->data,
143 XSTRING (pattern)->size);
144 raw_pattern = (char *) alloca (raw_pattern_size + 1);
145 copy_text (XSTRING (pattern)->data, raw_pattern,
146 XSTRING (pattern)->size, 0, 1);
147 }
148 else
149 {
150 /* Converting multibyte to single-byte.
151
152 ??? Perhaps this conversion should be done in a special way
153 by subtracting nonascii-insert-offset from each non-ASCII char,
154 so that only the multibyte chars which really correspond to
155 the chosen single-byte character set can possibly match. */
156 raw_pattern_size = XSTRING (pattern)->size;
157 raw_pattern = (char *) alloca (raw_pattern_size + 1);
158 copy_text (XSTRING (pattern)->data, raw_pattern,
159 STRING_BYTES (XSTRING (pattern)), 1, 0);
160 }
161
162 cp->regexp = Qnil;
163 cp->buf.translate = (! NILP (translate) ? translate : 0);
164 cp->posix = posix;
165 cp->buf.multibyte = multibyte;
166 BLOCK_INPUT;
167 old = re_set_syntax (RE_SYNTAX_EMACS
168 | (posix ? 0 : RE_NO_POSIX_BACKTRACKING));
169 val = (char *) re_compile_pattern (raw_pattern, raw_pattern_size, &cp->buf);
170 re_set_syntax (old);
171 UNBLOCK_INPUT;
172 if (val)
173 Fsignal (Qinvalid_regexp, Fcons (build_string (val), Qnil));
174
175 cp->regexp = Fcopy_sequence (pattern);
176 }
177
178 /* Compile a regexp if necessary, but first check to see if there's one in
179 the cache.
180 PATTERN is the pattern to compile.
181 TRANSLATE is a translation table for ignoring case, or nil for none.
182 REGP is the structure that says where to store the "register"
183 values that will result from matching this pattern.
184 If it is 0, we should compile the pattern not to record any
185 subexpression bounds.
186 POSIX is nonzero if we want full backtracking (POSIX style)
187 for this pattern. 0 means backtrack only enough to get a valid match. */
188
189 struct re_pattern_buffer *
190 compile_pattern (pattern, regp, translate, posix, multibyte)
191 Lisp_Object pattern;
192 struct re_registers *regp;
193 Lisp_Object translate;
194 int posix, multibyte;
195 {
196 struct regexp_cache *cp, **cpp;
197
198 for (cpp = &searchbuf_head; ; cpp = &cp->next)
199 {
200 cp = *cpp;
201 if (XSTRING (cp->regexp)->size == XSTRING (pattern)->size
202 && !NILP (Fstring_equal (cp->regexp, pattern))
203 && cp->buf.translate == (! NILP (translate) ? translate : 0)
204 && cp->posix == posix
205 && cp->buf.multibyte == multibyte)
206 break;
207
208 /* If we're at the end of the cache, compile into the last cell. */
209 if (cp->next == 0)
210 {
211 compile_pattern_1 (cp, pattern, translate, regp, posix, multibyte);
212 break;
213 }
214 }
215
216 /* When we get here, cp (aka *cpp) contains the compiled pattern,
217 either because we found it in the cache or because we just compiled it.
218 Move it to the front of the queue to mark it as most recently used. */
219 *cpp = cp->next;
220 cp->next = searchbuf_head;
221 searchbuf_head = cp;
222
223 /* Advise the searching functions about the space we have allocated
224 for register data. */
225 if (regp)
226 re_set_registers (&cp->buf, regp, regp->num_regs, regp->start, regp->end);
227
228 return &cp->buf;
229 }
230
231 /* Error condition used for failing searches */
232 Lisp_Object Qsearch_failed;
233
234 Lisp_Object
235 signal_failure (arg)
236 Lisp_Object arg;
237 {
238 Fsignal (Qsearch_failed, Fcons (arg, Qnil));
239 return Qnil;
240 }
241 \f
242 static Lisp_Object
243 looking_at_1 (string, posix)
244 Lisp_Object string;
245 int posix;
246 {
247 Lisp_Object val;
248 unsigned char *p1, *p2;
249 int s1, s2;
250 register int i;
251 struct re_pattern_buffer *bufp;
252
253 if (running_asynch_code)
254 save_search_regs ();
255
256 CHECK_STRING (string, 0);
257 bufp = compile_pattern (string, &search_regs,
258 (!NILP (current_buffer->case_fold_search)
259 ? DOWNCASE_TABLE : Qnil),
260 posix,
261 !NILP (current_buffer->enable_multibyte_characters));
262
263 immediate_quit = 1;
264 QUIT; /* Do a pending quit right away, to avoid paradoxical behavior */
265
266 /* Get pointers and sizes of the two strings
267 that make up the visible portion of the buffer. */
268
269 p1 = BEGV_ADDR;
270 s1 = GPT_BYTE - BEGV_BYTE;
271 p2 = GAP_END_ADDR;
272 s2 = ZV_BYTE - GPT_BYTE;
273 if (s1 < 0)
274 {
275 p2 = p1;
276 s2 = ZV_BYTE - BEGV_BYTE;
277 s1 = 0;
278 }
279 if (s2 < 0)
280 {
281 s1 = ZV_BYTE - BEGV_BYTE;
282 s2 = 0;
283 }
284
285 re_match_object = Qnil;
286
287 i = re_match_2 (bufp, (char *) p1, s1, (char *) p2, s2,
288 PT_BYTE - BEGV_BYTE, &search_regs,
289 ZV_BYTE - BEGV_BYTE);
290 if (i == -2)
291 matcher_overflow ();
292
293 val = (0 <= i ? Qt : Qnil);
294 if (i >= 0)
295 for (i = 0; i < search_regs.num_regs; i++)
296 if (search_regs.start[i] >= 0)
297 {
298 search_regs.start[i]
299 = BYTE_TO_CHAR (search_regs.start[i] + BEGV_BYTE);
300 search_regs.end[i]
301 = BYTE_TO_CHAR (search_regs.end[i] + BEGV_BYTE);
302 }
303 XSETBUFFER (last_thing_searched, current_buffer);
304 immediate_quit = 0;
305 return val;
306 }
307
308 DEFUN ("looking-at", Flooking_at, Slooking_at, 1, 1, 0,
309 "Return t if text after point matches regular expression REGEXP.\n\
310 This function modifies the match data that `match-beginning',\n\
311 `match-end' and `match-data' access; save and restore the match\n\
312 data if you want to preserve them.")
313 (regexp)
314 Lisp_Object regexp;
315 {
316 return looking_at_1 (regexp, 0);
317 }
318
319 DEFUN ("posix-looking-at", Fposix_looking_at, Sposix_looking_at, 1, 1, 0,
320 "Return t if text after point matches regular expression REGEXP.\n\
321 Find the longest match, in accord with Posix regular expression rules.\n\
322 This function modifies the match data that `match-beginning',\n\
323 `match-end' and `match-data' access; save and restore the match\n\
324 data if you want to preserve them.")
325 (regexp)
326 Lisp_Object regexp;
327 {
328 return looking_at_1 (regexp, 1);
329 }
330 \f
331 static Lisp_Object
332 string_match_1 (regexp, string, start, posix)
333 Lisp_Object regexp, string, start;
334 int posix;
335 {
336 int val;
337 struct re_pattern_buffer *bufp;
338 int pos, pos_byte;
339 int i;
340
341 if (running_asynch_code)
342 save_search_regs ();
343
344 CHECK_STRING (regexp, 0);
345 CHECK_STRING (string, 1);
346
347 if (NILP (start))
348 pos = 0, pos_byte = 0;
349 else
350 {
351 int len = XSTRING (string)->size;
352
353 CHECK_NUMBER (start, 2);
354 pos = XINT (start);
355 if (pos < 0 && -pos <= len)
356 pos = len + pos;
357 else if (0 > pos || pos > len)
358 args_out_of_range (string, start);
359 pos_byte = string_char_to_byte (string, pos);
360 }
361
362 bufp = compile_pattern (regexp, &search_regs,
363 (!NILP (current_buffer->case_fold_search)
364 ? DOWNCASE_TABLE : Qnil),
365 posix,
366 STRING_MULTIBYTE (string));
367 immediate_quit = 1;
368 re_match_object = string;
369
370 val = re_search (bufp, (char *) XSTRING (string)->data,
371 STRING_BYTES (XSTRING (string)), pos_byte,
372 STRING_BYTES (XSTRING (string)) - pos_byte,
373 &search_regs);
374 immediate_quit = 0;
375 last_thing_searched = Qt;
376 if (val == -2)
377 matcher_overflow ();
378 if (val < 0) return Qnil;
379
380 for (i = 0; i < search_regs.num_regs; i++)
381 if (search_regs.start[i] >= 0)
382 {
383 search_regs.start[i]
384 = string_byte_to_char (string, search_regs.start[i]);
385 search_regs.end[i]
386 = string_byte_to_char (string, search_regs.end[i]);
387 }
388
389 return make_number (string_byte_to_char (string, val));
390 }
391
392 DEFUN ("string-match", Fstring_match, Sstring_match, 2, 3, 0,
393 "Return index of start of first match for REGEXP in STRING, or nil.\n\
394 If third arg START is non-nil, start search at that index in STRING.\n\
395 For index of first char beyond the match, do (match-end 0).\n\
396 `match-end' and `match-beginning' also give indices of substrings\n\
397 matched by parenthesis constructs in the pattern.")
398 (regexp, string, start)
399 Lisp_Object regexp, string, start;
400 {
401 return string_match_1 (regexp, string, start, 0);
402 }
403
404 DEFUN ("posix-string-match", Fposix_string_match, Sposix_string_match, 2, 3, 0,
405 "Return index of start of first match for REGEXP in STRING, or nil.\n\
406 Find the longest match, in accord with Posix regular expression rules.\n\
407 If third arg START is non-nil, start search at that index in STRING.\n\
408 For index of first char beyond the match, do (match-end 0).\n\
409 `match-end' and `match-beginning' also give indices of substrings\n\
410 matched by parenthesis constructs in the pattern.")
411 (regexp, string, start)
412 Lisp_Object regexp, string, start;
413 {
414 return string_match_1 (regexp, string, start, 1);
415 }
416
417 /* Match REGEXP against STRING, searching all of STRING,
418 and return the index of the match, or negative on failure.
419 This does not clobber the match data. */
420
421 int
422 fast_string_match (regexp, string)
423 Lisp_Object regexp, string;
424 {
425 int val;
426 struct re_pattern_buffer *bufp;
427
428 bufp = compile_pattern (regexp, 0, Qnil,
429 0, STRING_MULTIBYTE (string));
430 immediate_quit = 1;
431 re_match_object = string;
432
433 val = re_search (bufp, (char *) XSTRING (string)->data,
434 STRING_BYTES (XSTRING (string)), 0,
435 STRING_BYTES (XSTRING (string)), 0);
436 immediate_quit = 0;
437 return val;
438 }
439
440 /* Match REGEXP against STRING, searching all of STRING ignoring case,
441 and return the index of the match, or negative on failure.
442 This does not clobber the match data.
443 We assume that STRING contains single-byte characters. */
444
445 extern Lisp_Object Vascii_downcase_table;
446
447 int
448 fast_c_string_match_ignore_case (regexp, string)
449 Lisp_Object regexp;
450 char *string;
451 {
452 int val;
453 struct re_pattern_buffer *bufp;
454 int len = strlen (string);
455
456 regexp = string_make_unibyte (regexp);
457 re_match_object = Qt;
458 bufp = compile_pattern (regexp, 0,
459 Vascii_downcase_table, 0,
460 0);
461 immediate_quit = 1;
462 val = re_search (bufp, string, len, 0, len, 0);
463 immediate_quit = 0;
464 return val;
465 }
466 \f
467 /* max and min. */
468
469 static int
470 max (a, b)
471 int a, b;
472 {
473 return ((a > b) ? a : b);
474 }
475
476 static int
477 min (a, b)
478 int a, b;
479 {
480 return ((a < b) ? a : b);
481 }
482
483 \f
484 /* The newline cache: remembering which sections of text have no newlines. */
485
486 /* If the user has requested newline caching, make sure it's on.
487 Otherwise, make sure it's off.
488 This is our cheezy way of associating an action with the change of
489 state of a buffer-local variable. */
490 static void
491 newline_cache_on_off (buf)
492 struct buffer *buf;
493 {
494 if (NILP (buf->cache_long_line_scans))
495 {
496 /* It should be off. */
497 if (buf->newline_cache)
498 {
499 free_region_cache (buf->newline_cache);
500 buf->newline_cache = 0;
501 }
502 }
503 else
504 {
505 /* It should be on. */
506 if (buf->newline_cache == 0)
507 buf->newline_cache = new_region_cache ();
508 }
509 }
510
511 \f
512 /* Search for COUNT instances of the character TARGET between START and END.
513
514 If COUNT is positive, search forwards; END must be >= START.
515 If COUNT is negative, search backwards for the -COUNTth instance;
516 END must be <= START.
517 If COUNT is zero, do anything you please; run rogue, for all I care.
518
519 If END is zero, use BEGV or ZV instead, as appropriate for the
520 direction indicated by COUNT.
521
522 If we find COUNT instances, set *SHORTAGE to zero, and return the
523 position after the COUNTth match. Note that for reverse motion
524 this is not the same as the usual convention for Emacs motion commands.
525
526 If we don't find COUNT instances before reaching END, set *SHORTAGE
527 to the number of TARGETs left unfound, and return END.
528
529 If ALLOW_QUIT is non-zero, set immediate_quit. That's good to do
530 except when inside redisplay. */
531
532 scan_buffer (target, start, end, count, shortage, allow_quit)
533 register int target;
534 int start, end;
535 int count;
536 int *shortage;
537 int allow_quit;
538 {
539 struct region_cache *newline_cache;
540 int direction;
541
542 if (count > 0)
543 {
544 direction = 1;
545 if (! end) end = ZV;
546 }
547 else
548 {
549 direction = -1;
550 if (! end) end = BEGV;
551 }
552
553 newline_cache_on_off (current_buffer);
554 newline_cache = current_buffer->newline_cache;
555
556 if (shortage != 0)
557 *shortage = 0;
558
559 immediate_quit = allow_quit;
560
561 if (count > 0)
562 while (start != end)
563 {
564 /* Our innermost scanning loop is very simple; it doesn't know
565 about gaps, buffer ends, or the newline cache. ceiling is
566 the position of the last character before the next such
567 obstacle --- the last character the dumb search loop should
568 examine. */
569 int ceiling_byte = CHAR_TO_BYTE (end) - 1;
570 int start_byte = CHAR_TO_BYTE (start);
571
572 /* If we're looking for a newline, consult the newline cache
573 to see where we can avoid some scanning. */
574 if (target == '\n' && newline_cache)
575 {
576 int next_change;
577 immediate_quit = 0;
578 while (region_cache_forward
579 (current_buffer, newline_cache, start_byte, &next_change))
580 start_byte = next_change;
581 immediate_quit = allow_quit;
582
583 /* START should never be after END. */
584 if (start_byte > ceiling_byte)
585 start_byte = ceiling_byte;
586
587 /* Now the text after start is an unknown region, and
588 next_change is the position of the next known region. */
589 ceiling_byte = min (next_change - 1, ceiling_byte);
590 }
591
592 /* The dumb loop can only scan text stored in contiguous
593 bytes. BUFFER_CEILING_OF returns the last character
594 position that is contiguous, so the ceiling is the
595 position after that. */
596 ceiling_byte = min (BUFFER_CEILING_OF (start_byte), ceiling_byte);
597
598 {
599 /* The termination address of the dumb loop. */
600 register unsigned char *ceiling_addr
601 = BYTE_POS_ADDR (ceiling_byte) + 1;
602 register unsigned char *cursor
603 = BYTE_POS_ADDR (start_byte);
604 unsigned char *base = cursor;
605
606 while (cursor < ceiling_addr)
607 {
608 unsigned char *scan_start = cursor;
609
610 /* The dumb loop. */
611 while (*cursor != target && ++cursor < ceiling_addr)
612 ;
613
614 /* If we're looking for newlines, cache the fact that
615 the region from start to cursor is free of them. */
616 if (target == '\n' && newline_cache)
617 know_region_cache (current_buffer, newline_cache,
618 start_byte + scan_start - base,
619 start_byte + cursor - base);
620
621 /* Did we find the target character? */
622 if (cursor < ceiling_addr)
623 {
624 if (--count == 0)
625 {
626 immediate_quit = 0;
627 return BYTE_TO_CHAR (start_byte + cursor - base + 1);
628 }
629 cursor++;
630 }
631 }
632
633 start = BYTE_TO_CHAR (start_byte + cursor - base);
634 }
635 }
636 else
637 while (start > end)
638 {
639 /* The last character to check before the next obstacle. */
640 int ceiling_byte = CHAR_TO_BYTE (end);
641 int start_byte = CHAR_TO_BYTE (start);
642
643 /* Consult the newline cache, if appropriate. */
644 if (target == '\n' && newline_cache)
645 {
646 int next_change;
647 immediate_quit = 0;
648 while (region_cache_backward
649 (current_buffer, newline_cache, start_byte, &next_change))
650 start_byte = next_change;
651 immediate_quit = allow_quit;
652
653 /* Start should never be at or before end. */
654 if (start_byte <= ceiling_byte)
655 start_byte = ceiling_byte + 1;
656
657 /* Now the text before start is an unknown region, and
658 next_change is the position of the next known region. */
659 ceiling_byte = max (next_change, ceiling_byte);
660 }
661
662 /* Stop scanning before the gap. */
663 ceiling_byte = max (BUFFER_FLOOR_OF (start_byte - 1), ceiling_byte);
664
665 {
666 /* The termination address of the dumb loop. */
667 register unsigned char *ceiling_addr = BYTE_POS_ADDR (ceiling_byte);
668 register unsigned char *cursor = BYTE_POS_ADDR (start_byte - 1);
669 unsigned char *base = cursor;
670
671 while (cursor >= ceiling_addr)
672 {
673 unsigned char *scan_start = cursor;
674
675 while (*cursor != target && --cursor >= ceiling_addr)
676 ;
677
678 /* If we're looking for newlines, cache the fact that
679 the region from after the cursor to start is free of them. */
680 if (target == '\n' && newline_cache)
681 know_region_cache (current_buffer, newline_cache,
682 start_byte + cursor - base,
683 start_byte + scan_start - base);
684
685 /* Did we find the target character? */
686 if (cursor >= ceiling_addr)
687 {
688 if (++count >= 0)
689 {
690 immediate_quit = 0;
691 return BYTE_TO_CHAR (start_byte + cursor - base);
692 }
693 cursor--;
694 }
695 }
696
697 start = BYTE_TO_CHAR (start_byte + cursor - base);
698 }
699 }
700
701 immediate_quit = 0;
702 if (shortage != 0)
703 *shortage = count * direction;
704 return start;
705 }
706 \f
707 /* Search for COUNT instances of a line boundary, which means either a
708 newline or (if selective display enabled) a carriage return.
709 Start at START. If COUNT is negative, search backwards.
710
711 We report the resulting position by calling TEMP_SET_PT_BOTH.
712
713 If we find COUNT instances. we position after (always after,
714 even if scanning backwards) the COUNTth match, and return 0.
715
716 If we don't find COUNT instances before reaching the end of the
717 buffer (or the beginning, if scanning backwards), we return
718 the number of line boundaries left unfound, and position at
719 the limit we bumped up against.
720
721 If ALLOW_QUIT is non-zero, set immediate_quit. That's good to do
722 except in special cases. */
723
724 int
725 scan_newline (start, start_byte, limit, limit_byte, count, allow_quit)
726 int start, start_byte;
727 int limit, limit_byte;
728 register int count;
729 int allow_quit;
730 {
731 int direction = ((count > 0) ? 1 : -1);
732
733 register unsigned char *cursor;
734 unsigned char *base;
735
736 register int ceiling;
737 register unsigned char *ceiling_addr;
738
739 int old_immediate_quit = immediate_quit;
740
741 /* If we are not in selective display mode,
742 check only for newlines. */
743 int selective_display = (!NILP (current_buffer->selective_display)
744 && !INTEGERP (current_buffer->selective_display));
745
746 /* The code that follows is like scan_buffer
747 but checks for either newline or carriage return. */
748
749 if (allow_quit)
750 immediate_quit++;
751
752 start_byte = CHAR_TO_BYTE (start);
753
754 if (count > 0)
755 {
756 while (start_byte < limit_byte)
757 {
758 ceiling = BUFFER_CEILING_OF (start_byte);
759 ceiling = min (limit_byte - 1, ceiling);
760 ceiling_addr = BYTE_POS_ADDR (ceiling) + 1;
761 base = (cursor = BYTE_POS_ADDR (start_byte));
762 while (1)
763 {
764 while (*cursor != '\n' && ++cursor != ceiling_addr)
765 ;
766
767 if (cursor != ceiling_addr)
768 {
769 if (--count == 0)
770 {
771 immediate_quit = old_immediate_quit;
772 start_byte = start_byte + cursor - base + 1;
773 start = BYTE_TO_CHAR (start_byte);
774 TEMP_SET_PT_BOTH (start, start_byte);
775 return 0;
776 }
777 else
778 if (++cursor == ceiling_addr)
779 break;
780 }
781 else
782 break;
783 }
784 start_byte += cursor - base;
785 }
786 }
787 else
788 {
789 while (start_byte > limit_byte)
790 {
791 ceiling = BUFFER_FLOOR_OF (start_byte - 1);
792 ceiling = max (limit_byte, ceiling);
793 ceiling_addr = BYTE_POS_ADDR (ceiling) - 1;
794 base = (cursor = BYTE_POS_ADDR (start_byte - 1) + 1);
795 while (1)
796 {
797 while (--cursor != ceiling_addr && *cursor != '\n')
798 ;
799
800 if (cursor != ceiling_addr)
801 {
802 if (++count == 0)
803 {
804 immediate_quit = old_immediate_quit;
805 /* Return the position AFTER the match we found. */
806 start_byte = start_byte + cursor - base + 1;
807 start = BYTE_TO_CHAR (start_byte);
808 TEMP_SET_PT_BOTH (start, start_byte);
809 return 0;
810 }
811 }
812 else
813 break;
814 }
815 /* Here we add 1 to compensate for the last decrement
816 of CURSOR, which took it past the valid range. */
817 start_byte += cursor - base + 1;
818 }
819 }
820
821 TEMP_SET_PT_BOTH (limit, limit_byte);
822 immediate_quit = old_immediate_quit;
823
824 return count * direction;
825 }
826
827 int
828 find_next_newline_no_quit (from, cnt)
829 register int from, cnt;
830 {
831 return scan_buffer ('\n', from, 0, cnt, (int *) 0, 0);
832 }
833
834 /* Like find_next_newline, but returns position before the newline,
835 not after, and only search up to TO. This isn't just
836 find_next_newline (...)-1, because you might hit TO. */
837
838 int
839 find_before_next_newline (from, to, cnt)
840 int from, to, cnt;
841 {
842 int shortage;
843 int pos = scan_buffer ('\n', from, to, cnt, &shortage, 1);
844
845 if (shortage == 0)
846 pos--;
847
848 return pos;
849 }
850 \f
851 /* Subroutines of Lisp buffer search functions. */
852
853 static Lisp_Object
854 search_command (string, bound, noerror, count, direction, RE, posix)
855 Lisp_Object string, bound, noerror, count;
856 int direction;
857 int RE;
858 int posix;
859 {
860 register int np;
861 int lim, lim_byte;
862 int n = direction;
863
864 if (!NILP (count))
865 {
866 CHECK_NUMBER (count, 3);
867 n *= XINT (count);
868 }
869
870 CHECK_STRING (string, 0);
871 if (NILP (bound))
872 {
873 if (n > 0)
874 lim = ZV, lim_byte = ZV_BYTE;
875 else
876 lim = BEGV, lim_byte = BEGV_BYTE;
877 }
878 else
879 {
880 CHECK_NUMBER_COERCE_MARKER (bound, 1);
881 lim = XINT (bound);
882 if (n > 0 ? lim < PT : lim > PT)
883 error ("Invalid search bound (wrong side of point)");
884 if (lim > ZV)
885 lim = ZV, lim_byte = ZV_BYTE;
886 else if (lim < BEGV)
887 lim = BEGV, lim_byte = BEGV_BYTE;
888 else
889 lim_byte = CHAR_TO_BYTE (lim);
890 }
891
892 np = search_buffer (string, PT, PT_BYTE, lim, lim_byte, n, RE,
893 (!NILP (current_buffer->case_fold_search)
894 ? current_buffer->case_canon_table
895 : Qnil),
896 (!NILP (current_buffer->case_fold_search)
897 ? current_buffer->case_eqv_table
898 : Qnil),
899 posix);
900 if (np <= 0)
901 {
902 if (NILP (noerror))
903 return signal_failure (string);
904 if (!EQ (noerror, Qt))
905 {
906 if (lim < BEGV || lim > ZV)
907 abort ();
908 SET_PT_BOTH (lim, lim_byte);
909 return Qnil;
910 #if 0 /* This would be clean, but maybe programs depend on
911 a value of nil here. */
912 np = lim;
913 #endif
914 }
915 else
916 return Qnil;
917 }
918
919 if (np < BEGV || np > ZV)
920 abort ();
921
922 SET_PT (np);
923
924 return make_number (np);
925 }
926 \f
927 /* Return 1 if REGEXP it matches just one constant string. */
928
929 static int
930 trivial_regexp_p (regexp)
931 Lisp_Object regexp;
932 {
933 int len = STRING_BYTES (XSTRING (regexp));
934 unsigned char *s = XSTRING (regexp)->data;
935 unsigned char c;
936 while (--len >= 0)
937 {
938 switch (*s++)
939 {
940 case '.': case '*': case '+': case '?': case '[': case '^': case '$':
941 return 0;
942 case '\\':
943 if (--len < 0)
944 return 0;
945 switch (*s++)
946 {
947 case '|': case '(': case ')': case '`': case '\'': case 'b':
948 case 'B': case '<': case '>': case 'w': case 'W': case 's':
949 case 'S': case '=':
950 case 'c': case 'C': /* for categoryspec and notcategoryspec */
951 case '1': case '2': case '3': case '4': case '5':
952 case '6': case '7': case '8': case '9':
953 return 0;
954 }
955 }
956 }
957 return 1;
958 }
959
960 /* Search for the n'th occurrence of STRING in the current buffer,
961 starting at position POS and stopping at position LIM,
962 treating STRING as a literal string if RE is false or as
963 a regular expression if RE is true.
964
965 If N is positive, searching is forward and LIM must be greater than POS.
966 If N is negative, searching is backward and LIM must be less than POS.
967
968 Returns -x if x occurrences remain to be found (x > 0),
969 or else the position at the beginning of the Nth occurrence
970 (if searching backward) or the end (if searching forward).
971
972 POSIX is nonzero if we want full backtracking (POSIX style)
973 for this pattern. 0 means backtrack only enough to get a valid match. */
974
975 #define TRANSLATE(out, trt, d) \
976 do \
977 { \
978 if (! NILP (trt)) \
979 { \
980 Lisp_Object temp; \
981 temp = Faref (trt, make_number (d)); \
982 if (INTEGERP (temp)) \
983 out = XINT (temp); \
984 else \
985 out = d; \
986 } \
987 else \
988 out = d; \
989 } \
990 while (0)
991
992 static int
993 search_buffer (string, pos, pos_byte, lim, lim_byte, n,
994 RE, trt, inverse_trt, posix)
995 Lisp_Object string;
996 int pos;
997 int pos_byte;
998 int lim;
999 int lim_byte;
1000 int n;
1001 int RE;
1002 Lisp_Object trt;
1003 Lisp_Object inverse_trt;
1004 int posix;
1005 {
1006 int len = XSTRING (string)->size;
1007 int len_byte = STRING_BYTES (XSTRING (string));
1008 register int i;
1009
1010 if (running_asynch_code)
1011 save_search_regs ();
1012
1013 /* Null string is found at starting position. */
1014 if (len == 0)
1015 {
1016 set_search_regs (pos, 0);
1017 return pos;
1018 }
1019
1020 /* Searching 0 times means don't move. */
1021 if (n == 0)
1022 return pos;
1023
1024 if (RE && !trivial_regexp_p (string))
1025 {
1026 unsigned char *p1, *p2;
1027 int s1, s2;
1028 struct re_pattern_buffer *bufp;
1029
1030 bufp = compile_pattern (string, &search_regs, trt, posix,
1031 !NILP (current_buffer->enable_multibyte_characters));
1032
1033 immediate_quit = 1; /* Quit immediately if user types ^G,
1034 because letting this function finish
1035 can take too long. */
1036 QUIT; /* Do a pending quit right away,
1037 to avoid paradoxical behavior */
1038 /* Get pointers and sizes of the two strings
1039 that make up the visible portion of the buffer. */
1040
1041 p1 = BEGV_ADDR;
1042 s1 = GPT_BYTE - BEGV_BYTE;
1043 p2 = GAP_END_ADDR;
1044 s2 = ZV_BYTE - GPT_BYTE;
1045 if (s1 < 0)
1046 {
1047 p2 = p1;
1048 s2 = ZV_BYTE - BEGV_BYTE;
1049 s1 = 0;
1050 }
1051 if (s2 < 0)
1052 {
1053 s1 = ZV_BYTE - BEGV_BYTE;
1054 s2 = 0;
1055 }
1056 re_match_object = Qnil;
1057
1058 while (n < 0)
1059 {
1060 int val;
1061 val = re_search_2 (bufp, (char *) p1, s1, (char *) p2, s2,
1062 pos_byte - BEGV_BYTE, lim_byte - pos_byte,
1063 &search_regs,
1064 /* Don't allow match past current point */
1065 pos_byte - BEGV_BYTE);
1066 if (val == -2)
1067 {
1068 matcher_overflow ();
1069 }
1070 if (val >= 0)
1071 {
1072 pos_byte = search_regs.start[0] + BEGV_BYTE;
1073 for (i = 0; i < search_regs.num_regs; i++)
1074 if (search_regs.start[i] >= 0)
1075 {
1076 search_regs.start[i]
1077 = BYTE_TO_CHAR (search_regs.start[i] + BEGV_BYTE);
1078 search_regs.end[i]
1079 = BYTE_TO_CHAR (search_regs.end[i] + BEGV_BYTE);
1080 }
1081 XSETBUFFER (last_thing_searched, current_buffer);
1082 /* Set pos to the new position. */
1083 pos = search_regs.start[0];
1084 }
1085 else
1086 {
1087 immediate_quit = 0;
1088 return (n);
1089 }
1090 n++;
1091 }
1092 while (n > 0)
1093 {
1094 int val;
1095 val = re_search_2 (bufp, (char *) p1, s1, (char *) p2, s2,
1096 pos_byte - BEGV_BYTE, lim_byte - pos_byte,
1097 &search_regs,
1098 lim_byte - BEGV_BYTE);
1099 if (val == -2)
1100 {
1101 matcher_overflow ();
1102 }
1103 if (val >= 0)
1104 {
1105 pos_byte = search_regs.end[0] + BEGV_BYTE;
1106 for (i = 0; i < search_regs.num_regs; i++)
1107 if (search_regs.start[i] >= 0)
1108 {
1109 search_regs.start[i]
1110 = BYTE_TO_CHAR (search_regs.start[i] + BEGV_BYTE);
1111 search_regs.end[i]
1112 = BYTE_TO_CHAR (search_regs.end[i] + BEGV_BYTE);
1113 }
1114 XSETBUFFER (last_thing_searched, current_buffer);
1115 pos = search_regs.end[0];
1116 }
1117 else
1118 {
1119 immediate_quit = 0;
1120 return (0 - n);
1121 }
1122 n--;
1123 }
1124 immediate_quit = 0;
1125 return (pos);
1126 }
1127 else /* non-RE case */
1128 {
1129 unsigned char *raw_pattern, *pat;
1130 int raw_pattern_size;
1131 int raw_pattern_size_byte;
1132 unsigned char *patbuf;
1133 int multibyte = !NILP (current_buffer->enable_multibyte_characters);
1134 unsigned char *base_pat = XSTRING (string)->data;
1135 int charset_base = -1;
1136 int simple = 1;
1137
1138 /* MULTIBYTE says whether the text to be searched is multibyte.
1139 We must convert PATTERN to match that, or we will not really
1140 find things right. */
1141
1142 if (multibyte == STRING_MULTIBYTE (string))
1143 {
1144 raw_pattern = (char *) XSTRING (string)->data;
1145 raw_pattern_size = XSTRING (string)->size;
1146 raw_pattern_size_byte = STRING_BYTES (XSTRING (string));
1147 }
1148 else if (multibyte)
1149 {
1150 raw_pattern_size = XSTRING (string)->size;
1151 raw_pattern_size_byte
1152 = count_size_as_multibyte (XSTRING (string)->data,
1153 raw_pattern_size);
1154 raw_pattern = (char *) alloca (raw_pattern_size_byte + 1);
1155 copy_text (XSTRING (string)->data, raw_pattern,
1156 XSTRING (string)->size, 0, 1);
1157 }
1158 else
1159 {
1160 /* Converting multibyte to single-byte.
1161
1162 ??? Perhaps this conversion should be done in a special way
1163 by subtracting nonascii-insert-offset from each non-ASCII char,
1164 so that only the multibyte chars which really correspond to
1165 the chosen single-byte character set can possibly match. */
1166 raw_pattern_size = XSTRING (string)->size;
1167 raw_pattern_size_byte = XSTRING (string)->size;
1168 raw_pattern = (char *) alloca (raw_pattern_size + 1);
1169 copy_text (XSTRING (string)->data, raw_pattern,
1170 STRING_BYTES (XSTRING (string)), 1, 0);
1171 }
1172
1173 /* Copy and optionally translate the pattern. */
1174 len = raw_pattern_size;
1175 len_byte = raw_pattern_size_byte;
1176 patbuf = (unsigned char *) alloca (len_byte);
1177 pat = patbuf;
1178 base_pat = raw_pattern;
1179 if (multibyte)
1180 {
1181 while (--len >= 0)
1182 {
1183 unsigned char workbuf[4], *str;
1184 int c, translated, inverse;
1185 int in_charlen, charlen;
1186
1187 /* If we got here and the RE flag is set, it's because we're
1188 dealing with a regexp known to be trivial, so the backslash
1189 just quotes the next character. */
1190 if (RE && *base_pat == '\\')
1191 {
1192 len--;
1193 len_byte--;
1194 base_pat++;
1195 }
1196
1197 c = STRING_CHAR_AND_LENGTH (base_pat, len_byte, in_charlen);
1198 /* Translate the character, if requested. */
1199 TRANSLATE (translated, trt, c);
1200 /* If translation changed the byte-length, go back
1201 to the original character. */
1202 charlen = CHAR_STRING (translated, workbuf, str);
1203 if (in_charlen != charlen)
1204 {
1205 translated = c;
1206 charlen = CHAR_STRING (c, workbuf, str);
1207 }
1208
1209 TRANSLATE (inverse, inverse_trt, c);
1210
1211 /* Did this char actually get translated?
1212 Would any other char get translated into it? */
1213 if (translated != c || inverse != c)
1214 {
1215 /* Keep track of which character set row
1216 contains the characters that need translation. */
1217 int charset_base_code = c & ~0xff;
1218 if (charset_base == -1)
1219 charset_base = charset_base_code;
1220 else if (charset_base != charset_base_code)
1221 /* If two different rows appear, needing translation,
1222 then we cannot use boyer_moore search. */
1223 simple = 0;
1224 /* ??? Handa: this must do simple = 0
1225 if c is a composite character. */
1226 }
1227
1228 /* Store this character into the translated pattern. */
1229 bcopy (str, pat, charlen);
1230 pat += charlen;
1231 base_pat += in_charlen;
1232 len_byte -= in_charlen;
1233 }
1234 }
1235 else
1236 {
1237 while (--len >= 0)
1238 {
1239 int c, translated, inverse;
1240
1241 /* If we got here and the RE flag is set, it's because we're
1242 dealing with a regexp known to be trivial, so the backslash
1243 just quotes the next character. */
1244 if (RE && *base_pat == '\\')
1245 {
1246 len--;
1247 base_pat++;
1248 }
1249 c = *base_pat++;
1250 TRANSLATE (translated, trt, c);
1251 TRANSLATE (inverse, inverse_trt, c);
1252
1253 /* Did this char actually get translated?
1254 Would any other char get translated into it? */
1255 if (translated != c || inverse != c)
1256 {
1257 /* Keep track of which character set row
1258 contains the characters that need translation. */
1259 int charset_base_code = c & ~0xff;
1260 if (charset_base == -1)
1261 charset_base = charset_base_code;
1262 else if (charset_base != charset_base_code)
1263 /* If two different rows appear, needing translation,
1264 then we cannot use boyer_moore search. */
1265 simple = 0;
1266 }
1267 *pat++ = translated;
1268 }
1269 }
1270
1271 len_byte = pat - patbuf;
1272 len = raw_pattern_size;
1273 pat = base_pat = patbuf;
1274
1275 if (simple)
1276 return boyer_moore (n, pat, len, len_byte, trt, inverse_trt,
1277 pos, pos_byte, lim, lim_byte,
1278 charset_base);
1279 else
1280 return simple_search (n, pat, len, len_byte, trt,
1281 pos, pos_byte, lim, lim_byte);
1282 }
1283 }
1284 \f
1285 /* Do a simple string search N times for the string PAT,
1286 whose length is LEN/LEN_BYTE,
1287 from buffer position POS/POS_BYTE until LIM/LIM_BYTE.
1288 TRT is the translation table.
1289
1290 Return the character position where the match is found.
1291 Otherwise, if M matches remained to be found, return -M.
1292
1293 This kind of search works regardless of what is in PAT and
1294 regardless of what is in TRT. It is used in cases where
1295 boyer_moore cannot work. */
1296
1297 static int
1298 simple_search (n, pat, len, len_byte, trt, pos, pos_byte, lim, lim_byte)
1299 int n;
1300 unsigned char *pat;
1301 int len, len_byte;
1302 Lisp_Object trt;
1303 int pos, pos_byte;
1304 int lim, lim_byte;
1305 {
1306 int multibyte = ! NILP (current_buffer->enable_multibyte_characters);
1307 int forward = n > 0;
1308
1309 if (lim > pos && multibyte)
1310 while (n > 0)
1311 {
1312 while (1)
1313 {
1314 /* Try matching at position POS. */
1315 int this_pos = pos;
1316 int this_pos_byte = pos_byte;
1317 int this_len = len;
1318 int this_len_byte = len_byte;
1319 unsigned char *p = pat;
1320 if (pos + len > lim)
1321 goto stop;
1322
1323 while (this_len > 0)
1324 {
1325 int charlen, buf_charlen;
1326 int pat_ch, buf_ch;
1327
1328 pat_ch = STRING_CHAR_AND_LENGTH (p, this_len_byte, charlen);
1329 buf_ch = STRING_CHAR_AND_LENGTH (BYTE_POS_ADDR (this_pos_byte),
1330 ZV_BYTE - this_pos_byte,
1331 buf_charlen);
1332 TRANSLATE (buf_ch, trt, buf_ch);
1333
1334 if (buf_ch != pat_ch)
1335 break;
1336
1337 this_len_byte -= charlen;
1338 this_len--;
1339 p += charlen;
1340
1341 this_pos_byte += buf_charlen;
1342 this_pos++;
1343 }
1344
1345 if (this_len == 0)
1346 {
1347 pos += len;
1348 pos_byte += len_byte;
1349 break;
1350 }
1351
1352 INC_BOTH (pos, pos_byte);
1353 }
1354
1355 n--;
1356 }
1357 else if (lim > pos)
1358 while (n > 0)
1359 {
1360 while (1)
1361 {
1362 /* Try matching at position POS. */
1363 int this_pos = pos;
1364 int this_len = len;
1365 unsigned char *p = pat;
1366
1367 if (pos + len > lim)
1368 goto stop;
1369
1370 while (this_len > 0)
1371 {
1372 int pat_ch = *p++;
1373 int buf_ch = FETCH_BYTE (this_pos);
1374 TRANSLATE (buf_ch, trt, buf_ch);
1375
1376 if (buf_ch != pat_ch)
1377 break;
1378
1379 this_len--;
1380 this_pos++;
1381 }
1382
1383 if (this_len == 0)
1384 {
1385 pos += len;
1386 break;
1387 }
1388
1389 pos++;
1390 }
1391
1392 n--;
1393 }
1394 /* Backwards search. */
1395 else if (lim < pos && multibyte)
1396 while (n < 0)
1397 {
1398 while (1)
1399 {
1400 /* Try matching at position POS. */
1401 int this_pos = pos - len;
1402 int this_pos_byte = pos_byte - len_byte;
1403 int this_len = len;
1404 int this_len_byte = len_byte;
1405 unsigned char *p = pat;
1406
1407 if (pos - len < lim)
1408 goto stop;
1409
1410 while (this_len > 0)
1411 {
1412 int charlen, buf_charlen;
1413 int pat_ch, buf_ch;
1414
1415 pat_ch = STRING_CHAR_AND_LENGTH (p, this_len_byte, charlen);
1416 buf_ch = STRING_CHAR_AND_LENGTH (BYTE_POS_ADDR (this_pos_byte),
1417 ZV_BYTE - this_pos_byte,
1418 buf_charlen);
1419 TRANSLATE (buf_ch, trt, buf_ch);
1420
1421 if (buf_ch != pat_ch)
1422 break;
1423
1424 this_len_byte -= charlen;
1425 this_len--;
1426 p += charlen;
1427 this_pos_byte += buf_charlen;
1428 this_pos++;
1429 }
1430
1431 if (this_len == 0)
1432 {
1433 pos -= len;
1434 pos_byte -= len_byte;
1435 break;
1436 }
1437
1438 DEC_BOTH (pos, pos_byte);
1439 }
1440
1441 n++;
1442 }
1443 else if (lim < pos)
1444 while (n < 0)
1445 {
1446 while (1)
1447 {
1448 /* Try matching at position POS. */
1449 int this_pos = pos - len;
1450 int this_len = len;
1451 unsigned char *p = pat;
1452
1453 if (pos - len < lim)
1454 goto stop;
1455
1456 while (this_len > 0)
1457 {
1458 int pat_ch = *p++;
1459 int buf_ch = FETCH_BYTE (this_pos);
1460 TRANSLATE (buf_ch, trt, buf_ch);
1461
1462 if (buf_ch != pat_ch)
1463 break;
1464 this_len--;
1465 this_pos++;
1466 }
1467
1468 if (this_len == 0)
1469 {
1470 pos -= len;
1471 break;
1472 }
1473
1474 pos--;
1475 }
1476
1477 n++;
1478 }
1479
1480 stop:
1481 if (n == 0)
1482 {
1483 if (forward)
1484 set_search_regs ((multibyte ? pos_byte : pos) - len_byte, len_byte);
1485 else
1486 set_search_regs (multibyte ? pos_byte : pos, len_byte);
1487
1488 return pos;
1489 }
1490 else if (n > 0)
1491 return -n;
1492 else
1493 return n;
1494 }
1495 \f
1496 /* Do Boyer-Moore search N times for the string PAT,
1497 whose length is LEN/LEN_BYTE,
1498 from buffer position POS/POS_BYTE until LIM/LIM_BYTE.
1499 DIRECTION says which direction we search in.
1500 TRT and INVERSE_TRT are translation tables.
1501
1502 This kind of search works if all the characters in PAT that have
1503 nontrivial translation are the same aside from the last byte. This
1504 makes it possible to translate just the last byte of a character,
1505 and do so after just a simple test of the context.
1506
1507 If that criterion is not satisfied, do not call this function. */
1508
1509 static int
1510 boyer_moore (n, base_pat, len, len_byte, trt, inverse_trt,
1511 pos, pos_byte, lim, lim_byte, charset_base)
1512 int n;
1513 unsigned char *base_pat;
1514 int len, len_byte;
1515 Lisp_Object trt;
1516 Lisp_Object inverse_trt;
1517 int pos, pos_byte;
1518 int lim, lim_byte;
1519 int charset_base;
1520 {
1521 int direction = ((n > 0) ? 1 : -1);
1522 register int dirlen;
1523 int infinity, limit, k, stride_for_teases;
1524 register int *BM_tab;
1525 int *BM_tab_base;
1526 register unsigned char *cursor, *p_limit;
1527 register int i, j;
1528 unsigned char *pat;
1529 int multibyte = ! NILP (current_buffer->enable_multibyte_characters);
1530
1531 unsigned char simple_translate[0400];
1532 int translate_prev_byte;
1533 int translate_anteprev_byte;
1534
1535 #ifdef C_ALLOCA
1536 int BM_tab_space[0400];
1537 BM_tab = &BM_tab_space[0];
1538 #else
1539 BM_tab = (int *) alloca (0400 * sizeof (int));
1540 #endif
1541 /* The general approach is that we are going to maintain that we know */
1542 /* the first (closest to the present position, in whatever direction */
1543 /* we're searching) character that could possibly be the last */
1544 /* (furthest from present position) character of a valid match. We */
1545 /* advance the state of our knowledge by looking at that character */
1546 /* and seeing whether it indeed matches the last character of the */
1547 /* pattern. If it does, we take a closer look. If it does not, we */
1548 /* move our pointer (to putative last characters) as far as is */
1549 /* logically possible. This amount of movement, which I call a */
1550 /* stride, will be the length of the pattern if the actual character */
1551 /* appears nowhere in the pattern, otherwise it will be the distance */
1552 /* from the last occurrence of that character to the end of the */
1553 /* pattern. */
1554 /* As a coding trick, an enormous stride is coded into the table for */
1555 /* characters that match the last character. This allows use of only */
1556 /* a single test, a test for having gone past the end of the */
1557 /* permissible match region, to test for both possible matches (when */
1558 /* the stride goes past the end immediately) and failure to */
1559 /* match (where you get nudged past the end one stride at a time). */
1560
1561 /* Here we make a "mickey mouse" BM table. The stride of the search */
1562 /* is determined only by the last character of the putative match. */
1563 /* If that character does not match, we will stride the proper */
1564 /* distance to propose a match that superimposes it on the last */
1565 /* instance of a character that matches it (per trt), or misses */
1566 /* it entirely if there is none. */
1567
1568 dirlen = len_byte * direction;
1569 infinity = dirlen - (lim_byte + pos_byte + len_byte + len_byte) * direction;
1570 if (direction < 0)
1571 pat = (base_pat += len_byte - 1);
1572 else
1573 pat = base_pat;
1574 BM_tab_base = BM_tab;
1575 BM_tab += 0400;
1576 j = dirlen; /* to get it in a register */
1577 /* A character that does not appear in the pattern induces a */
1578 /* stride equal to the pattern length. */
1579 while (BM_tab_base != BM_tab)
1580 {
1581 *--BM_tab = j;
1582 *--BM_tab = j;
1583 *--BM_tab = j;
1584 *--BM_tab = j;
1585 }
1586
1587 /* We use this for translation, instead of TRT itself.
1588 We fill this in to handle the characters that actually
1589 occur in the pattern. Others don't matter anyway! */
1590 bzero (simple_translate, sizeof simple_translate);
1591 for (i = 0; i < 0400; i++)
1592 simple_translate[i] = i;
1593
1594 i = 0;
1595 while (i != infinity)
1596 {
1597 unsigned char *ptr = pat + i;
1598 i += direction;
1599 if (i == dirlen)
1600 i = infinity;
1601 if (! NILP (trt))
1602 {
1603 int ch;
1604 int untranslated;
1605 int this_translated = 1;
1606
1607 if (multibyte
1608 && (ptr + 1 == pat + len_byte || CHAR_HEAD_P (ptr[1])))
1609 {
1610 unsigned char *charstart = ptr;
1611 while (! CHAR_HEAD_P (*charstart))
1612 charstart--;
1613 untranslated = STRING_CHAR (charstart, ptr - charstart + 1);
1614 if (charset_base == (untranslated & ~0xff))
1615 {
1616 TRANSLATE (ch, trt, untranslated);
1617 if (! CHAR_HEAD_P (*ptr))
1618 {
1619 translate_prev_byte = ptr[-1];
1620 if (! CHAR_HEAD_P (translate_prev_byte))
1621 translate_anteprev_byte = ptr[-2];
1622 }
1623 }
1624 else
1625 {
1626 this_translated = 0;
1627 ch = *ptr;
1628 }
1629 }
1630 else if (!multibyte)
1631 TRANSLATE (ch, trt, *ptr);
1632 else
1633 {
1634 ch = *ptr;
1635 this_translated = 0;
1636 }
1637
1638 if (ch > 0400)
1639 j = ((unsigned char) ch) | 0200;
1640 else
1641 j = (unsigned char) ch;
1642
1643 if (i == infinity)
1644 stride_for_teases = BM_tab[j];
1645
1646 BM_tab[j] = dirlen - i;
1647 /* A translation table is accompanied by its inverse -- see */
1648 /* comment following downcase_table for details */
1649 if (this_translated)
1650 {
1651 int starting_ch = ch;
1652 int starting_j = j;
1653 while (1)
1654 {
1655 TRANSLATE (ch, inverse_trt, ch);
1656 if (ch > 0400)
1657 j = ((unsigned char) ch) | 0200;
1658 else
1659 j = (unsigned char) ch;
1660
1661 /* For all the characters that map into CH,
1662 set up simple_translate to map the last byte
1663 into STARTING_J. */
1664 simple_translate[j] = starting_j;
1665 if (ch == starting_ch)
1666 break;
1667 BM_tab[j] = dirlen - i;
1668 }
1669 }
1670 }
1671 else
1672 {
1673 j = *ptr;
1674
1675 if (i == infinity)
1676 stride_for_teases = BM_tab[j];
1677 BM_tab[j] = dirlen - i;
1678 }
1679 /* stride_for_teases tells how much to stride if we get a */
1680 /* match on the far character but are subsequently */
1681 /* disappointed, by recording what the stride would have been */
1682 /* for that character if the last character had been */
1683 /* different. */
1684 }
1685 infinity = dirlen - infinity;
1686 pos_byte += dirlen - ((direction > 0) ? direction : 0);
1687 /* loop invariant - POS_BYTE points at where last char (first
1688 char if reverse) of pattern would align in a possible match. */
1689 while (n != 0)
1690 {
1691 int tail_end;
1692 unsigned char *tail_end_ptr;
1693
1694 /* It's been reported that some (broken) compiler thinks that
1695 Boolean expressions in an arithmetic context are unsigned.
1696 Using an explicit ?1:0 prevents this. */
1697 if ((lim_byte - pos_byte - ((direction > 0) ? 1 : 0)) * direction
1698 < 0)
1699 return (n * (0 - direction));
1700 /* First we do the part we can by pointers (maybe nothing) */
1701 QUIT;
1702 pat = base_pat;
1703 limit = pos_byte - dirlen + direction;
1704 limit = ((direction > 0)
1705 ? BUFFER_CEILING_OF (limit)
1706 : BUFFER_FLOOR_OF (limit));
1707 /* LIMIT is now the last (not beyond-last!) value POS_BYTE
1708 can take on without hitting edge of buffer or the gap. */
1709 limit = ((direction > 0)
1710 ? min (lim_byte - 1, min (limit, pos_byte + 20000))
1711 : max (lim_byte, max (limit, pos_byte - 20000)));
1712 tail_end = BUFFER_CEILING_OF (pos_byte) + 1;
1713 tail_end_ptr = BYTE_POS_ADDR (tail_end);
1714
1715 if ((limit - pos_byte) * direction > 20)
1716 {
1717 unsigned char *p2;
1718
1719 p_limit = BYTE_POS_ADDR (limit);
1720 p2 = (cursor = BYTE_POS_ADDR (pos_byte));
1721 /* In this loop, pos + cursor - p2 is the surrogate for pos */
1722 while (1) /* use one cursor setting as long as i can */
1723 {
1724 if (direction > 0) /* worth duplicating */
1725 {
1726 /* Use signed comparison if appropriate
1727 to make cursor+infinity sure to be > p_limit.
1728 Assuming that the buffer lies in a range of addresses
1729 that are all "positive" (as ints) or all "negative",
1730 either kind of comparison will work as long
1731 as we don't step by infinity. So pick the kind
1732 that works when we do step by infinity. */
1733 if ((EMACS_INT) (p_limit + infinity) > (EMACS_INT) p_limit)
1734 while ((EMACS_INT) cursor <= (EMACS_INT) p_limit)
1735 cursor += BM_tab[*cursor];
1736 else
1737 while ((EMACS_UINT) cursor <= (EMACS_UINT) p_limit)
1738 cursor += BM_tab[*cursor];
1739 }
1740 else
1741 {
1742 if ((EMACS_INT) (p_limit + infinity) < (EMACS_INT) p_limit)
1743 while ((EMACS_INT) cursor >= (EMACS_INT) p_limit)
1744 cursor += BM_tab[*cursor];
1745 else
1746 while ((EMACS_UINT) cursor >= (EMACS_UINT) p_limit)
1747 cursor += BM_tab[*cursor];
1748 }
1749 /* If you are here, cursor is beyond the end of the searched region. */
1750 /* This can happen if you match on the far character of the pattern, */
1751 /* because the "stride" of that character is infinity, a number able */
1752 /* to throw you well beyond the end of the search. It can also */
1753 /* happen if you fail to match within the permitted region and would */
1754 /* otherwise try a character beyond that region */
1755 if ((cursor - p_limit) * direction <= len_byte)
1756 break; /* a small overrun is genuine */
1757 cursor -= infinity; /* large overrun = hit */
1758 i = dirlen - direction;
1759 if (! NILP (trt))
1760 {
1761 while ((i -= direction) + direction != 0)
1762 {
1763 int ch;
1764 cursor -= direction;
1765 /* Translate only the last byte of a character. */
1766 if (! multibyte
1767 || ((cursor == tail_end_ptr
1768 || CHAR_HEAD_P (cursor[1]))
1769 && (CHAR_HEAD_P (cursor[0])
1770 || (translate_prev_byte == cursor[-1]
1771 && (CHAR_HEAD_P (translate_prev_byte)
1772 || translate_anteprev_byte == cursor[-2])))))
1773 ch = simple_translate[*cursor];
1774 else
1775 ch = *cursor;
1776 if (pat[i] != ch)
1777 break;
1778 }
1779 }
1780 else
1781 {
1782 while ((i -= direction) + direction != 0)
1783 {
1784 cursor -= direction;
1785 if (pat[i] != *cursor)
1786 break;
1787 }
1788 }
1789 cursor += dirlen - i - direction; /* fix cursor */
1790 if (i + direction == 0)
1791 {
1792 int position;
1793
1794 cursor -= direction;
1795
1796 position = pos_byte + cursor - p2 + ((direction > 0)
1797 ? 1 - len_byte : 0);
1798 set_search_regs (position, len_byte);
1799
1800 if ((n -= direction) != 0)
1801 cursor += dirlen; /* to resume search */
1802 else
1803 return ((direction > 0)
1804 ? search_regs.end[0] : search_regs.start[0]);
1805 }
1806 else
1807 cursor += stride_for_teases; /* <sigh> we lose - */
1808 }
1809 pos_byte += cursor - p2;
1810 }
1811 else
1812 /* Now we'll pick up a clump that has to be done the hard */
1813 /* way because it covers a discontinuity */
1814 {
1815 limit = ((direction > 0)
1816 ? BUFFER_CEILING_OF (pos_byte - dirlen + 1)
1817 : BUFFER_FLOOR_OF (pos_byte - dirlen - 1));
1818 limit = ((direction > 0)
1819 ? min (limit + len_byte, lim_byte - 1)
1820 : max (limit - len_byte, lim_byte));
1821 /* LIMIT is now the last value POS_BYTE can have
1822 and still be valid for a possible match. */
1823 while (1)
1824 {
1825 /* This loop can be coded for space rather than */
1826 /* speed because it will usually run only once. */
1827 /* (the reach is at most len + 21, and typically */
1828 /* does not exceed len) */
1829 while ((limit - pos_byte) * direction >= 0)
1830 pos_byte += BM_tab[FETCH_BYTE (pos_byte)];
1831 /* now run the same tests to distinguish going off the */
1832 /* end, a match or a phony match. */
1833 if ((pos_byte - limit) * direction <= len_byte)
1834 break; /* ran off the end */
1835 /* Found what might be a match.
1836 Set POS_BYTE back to last (first if reverse) pos. */
1837 pos_byte -= infinity;
1838 i = dirlen - direction;
1839 while ((i -= direction) + direction != 0)
1840 {
1841 int ch;
1842 unsigned char *ptr;
1843 pos_byte -= direction;
1844 ptr = BYTE_POS_ADDR (pos_byte);
1845 /* Translate only the last byte of a character. */
1846 if (! multibyte
1847 || ((ptr == tail_end_ptr
1848 || CHAR_HEAD_P (ptr[1]))
1849 && (CHAR_HEAD_P (ptr[0])
1850 || (translate_prev_byte == ptr[-1]
1851 && (CHAR_HEAD_P (translate_prev_byte)
1852 || translate_anteprev_byte == ptr[-2])))))
1853 ch = simple_translate[*ptr];
1854 else
1855 ch = *ptr;
1856 if (pat[i] != ch)
1857 break;
1858 }
1859 /* Above loop has moved POS_BYTE part or all the way
1860 back to the first pos (last pos if reverse).
1861 Set it once again at the last (first if reverse) char. */
1862 pos_byte += dirlen - i- direction;
1863 if (i + direction == 0)
1864 {
1865 int position;
1866 pos_byte -= direction;
1867
1868 position = pos_byte + ((direction > 0) ? 1 - len_byte : 0);
1869
1870 set_search_regs (position, len_byte);
1871
1872 if ((n -= direction) != 0)
1873 pos_byte += dirlen; /* to resume search */
1874 else
1875 return ((direction > 0)
1876 ? search_regs.end[0] : search_regs.start[0]);
1877 }
1878 else
1879 pos_byte += stride_for_teases;
1880 }
1881 }
1882 /* We have done one clump. Can we continue? */
1883 if ((lim_byte - pos_byte) * direction < 0)
1884 return ((0 - n) * direction);
1885 }
1886 return BYTE_TO_CHAR (pos_byte);
1887 }
1888
1889 /* Record beginning BEG_BYTE and end BEG_BYTE + NBYTES
1890 for a match just found in the current buffer. */
1891
1892 static void
1893 set_search_regs (beg_byte, nbytes)
1894 int beg_byte, nbytes;
1895 {
1896 /* Make sure we have registers in which to store
1897 the match position. */
1898 if (search_regs.num_regs == 0)
1899 {
1900 search_regs.start = (regoff_t *) xmalloc (2 * sizeof (regoff_t));
1901 search_regs.end = (regoff_t *) xmalloc (2 * sizeof (regoff_t));
1902 search_regs.num_regs = 2;
1903 }
1904
1905 search_regs.start[0] = BYTE_TO_CHAR (beg_byte);
1906 search_regs.end[0] = BYTE_TO_CHAR (beg_byte + nbytes);
1907 XSETBUFFER (last_thing_searched, current_buffer);
1908 }
1909 \f
1910 /* Given a string of words separated by word delimiters,
1911 compute a regexp that matches those exact words
1912 separated by arbitrary punctuation. */
1913
1914 static Lisp_Object
1915 wordify (string)
1916 Lisp_Object string;
1917 {
1918 register unsigned char *p, *o;
1919 register int i, i_byte, len, punct_count = 0, word_count = 0;
1920 Lisp_Object val;
1921 int prev_c = 0;
1922 int adjust;
1923
1924 CHECK_STRING (string, 0);
1925 p = XSTRING (string)->data;
1926 len = XSTRING (string)->size;
1927
1928 for (i = 0, i_byte = 0; i < len; )
1929 {
1930 int c;
1931
1932 if (STRING_MULTIBYTE (string))
1933 FETCH_STRING_CHAR_ADVANCE (c, string, i, i_byte);
1934 else
1935 c = XSTRING (string)->data[i++];
1936
1937 if (SYNTAX (c) != Sword)
1938 {
1939 punct_count++;
1940 if (i > 0 && SYNTAX (prev_c) == Sword)
1941 word_count++;
1942 }
1943
1944 prev_c = c;
1945 }
1946
1947 if (SYNTAX (prev_c) == Sword)
1948 word_count++;
1949 if (!word_count)
1950 return build_string ("");
1951
1952 adjust = - punct_count + 5 * (word_count - 1) + 4;
1953 val = make_uninit_multibyte_string (len + adjust,
1954 STRING_BYTES (XSTRING (string)) + adjust);
1955
1956 o = XSTRING (val)->data;
1957 *o++ = '\\';
1958 *o++ = 'b';
1959
1960 for (i = 0; i < STRING_BYTES (XSTRING (val)); i++)
1961 if (SYNTAX (p[i]) == Sword)
1962 *o++ = p[i];
1963 else if (i > 0 && SYNTAX (p[i-1]) == Sword && --word_count)
1964 {
1965 *o++ = '\\';
1966 *o++ = 'W';
1967 *o++ = '\\';
1968 *o++ = 'W';
1969 *o++ = '*';
1970 }
1971
1972 *o++ = '\\';
1973 *o++ = 'b';
1974
1975 return val;
1976 }
1977 \f
1978 DEFUN ("search-backward", Fsearch_backward, Ssearch_backward, 1, 4,
1979 "MSearch backward: ",
1980 "Search backward from point for STRING.\n\
1981 Set point to the beginning of the occurrence found, and return point.\n\
1982 An optional second argument bounds the search; it is a buffer position.\n\
1983 The match found must not extend before that position.\n\
1984 Optional third argument, if t, means if fail just return nil (no error).\n\
1985 If not nil and not t, position at limit of search and return nil.\n\
1986 Optional fourth argument is repeat count--search for successive occurrences.\n\
1987 See also the functions `match-beginning', `match-end' and `replace-match'.")
1988 (string, bound, noerror, count)
1989 Lisp_Object string, bound, noerror, count;
1990 {
1991 return search_command (string, bound, noerror, count, -1, 0, 0);
1992 }
1993
1994 DEFUN ("search-forward", Fsearch_forward, Ssearch_forward, 1, 4, "MSearch: ",
1995 "Search forward from point for STRING.\n\
1996 Set point to the end of the occurrence found, and return point.\n\
1997 An optional second argument bounds the search; it is a buffer position.\n\
1998 The match found must not extend after that position. nil is equivalent\n\
1999 to (point-max).\n\
2000 Optional third argument, if t, means if fail just return nil (no error).\n\
2001 If not nil and not t, move to limit of search and return nil.\n\
2002 Optional fourth argument is repeat count--search for successive occurrences.\n\
2003 See also the functions `match-beginning', `match-end' and `replace-match'.")
2004 (string, bound, noerror, count)
2005 Lisp_Object string, bound, noerror, count;
2006 {
2007 return search_command (string, bound, noerror, count, 1, 0, 0);
2008 }
2009
2010 DEFUN ("word-search-backward", Fword_search_backward, Sword_search_backward, 1, 4,
2011 "sWord search backward: ",
2012 "Search backward from point for STRING, ignoring differences in punctuation.\n\
2013 Set point to the beginning of the occurrence found, and return point.\n\
2014 An optional second argument bounds the search; it is a buffer position.\n\
2015 The match found must not extend before that position.\n\
2016 Optional third argument, if t, means if fail just return nil (no error).\n\
2017 If not nil and not t, move to limit of search and return nil.\n\
2018 Optional fourth argument is repeat count--search for successive occurrences.")
2019 (string, bound, noerror, count)
2020 Lisp_Object string, bound, noerror, count;
2021 {
2022 return search_command (wordify (string), bound, noerror, count, -1, 1, 0);
2023 }
2024
2025 DEFUN ("word-search-forward", Fword_search_forward, Sword_search_forward, 1, 4,
2026 "sWord search: ",
2027 "Search forward from point for STRING, ignoring differences in punctuation.\n\
2028 Set point to the end of the occurrence found, and return point.\n\
2029 An optional second argument bounds the search; it is a buffer position.\n\
2030 The match found must not extend after that position.\n\
2031 Optional third argument, if t, means if fail just return nil (no error).\n\
2032 If not nil and not t, move to limit of search and return nil.\n\
2033 Optional fourth argument is repeat count--search for successive occurrences.")
2034 (string, bound, noerror, count)
2035 Lisp_Object string, bound, noerror, count;
2036 {
2037 return search_command (wordify (string), bound, noerror, count, 1, 1, 0);
2038 }
2039
2040 DEFUN ("re-search-backward", Fre_search_backward, Sre_search_backward, 1, 4,
2041 "sRE search backward: ",
2042 "Search backward from point for match for regular expression REGEXP.\n\
2043 Set point to the beginning of the match, and return point.\n\
2044 The match found is the one starting last in the buffer\n\
2045 and yet ending before the origin of the search.\n\
2046 An optional second argument bounds the search; it is a buffer position.\n\
2047 The match found must start at or after that position.\n\
2048 Optional third argument, if t, means if fail just return nil (no error).\n\
2049 If not nil and not t, move to limit of search and return nil.\n\
2050 Optional fourth argument is repeat count--search for successive occurrences.\n\
2051 See also the functions `match-beginning', `match-end' and `replace-match'.")
2052 (regexp, bound, noerror, count)
2053 Lisp_Object regexp, bound, noerror, count;
2054 {
2055 return search_command (regexp, bound, noerror, count, -1, 1, 0);
2056 }
2057
2058 DEFUN ("re-search-forward", Fre_search_forward, Sre_search_forward, 1, 4,
2059 "sRE search: ",
2060 "Search forward from point for regular expression REGEXP.\n\
2061 Set point to the end of the occurrence found, and return point.\n\
2062 An optional second argument bounds the search; it is a buffer position.\n\
2063 The match found must not extend after that position.\n\
2064 Optional third argument, if t, means if fail just return nil (no error).\n\
2065 If not nil and not t, move to limit of search and return nil.\n\
2066 Optional fourth argument is repeat count--search for successive occurrences.\n\
2067 See also the functions `match-beginning', `match-end' and `replace-match'.")
2068 (regexp, bound, noerror, count)
2069 Lisp_Object regexp, bound, noerror, count;
2070 {
2071 return search_command (regexp, bound, noerror, count, 1, 1, 0);
2072 }
2073
2074 DEFUN ("posix-search-backward", Fposix_search_backward, Sposix_search_backward, 1, 4,
2075 "sPosix search backward: ",
2076 "Search backward from point for match for regular expression REGEXP.\n\
2077 Find the longest match in accord with Posix regular expression rules.\n\
2078 Set point to the beginning of the match, and return point.\n\
2079 The match found is the one starting last in the buffer\n\
2080 and yet ending before the origin of the search.\n\
2081 An optional second argument bounds the search; it is a buffer position.\n\
2082 The match found must start at or after that position.\n\
2083 Optional third argument, if t, means if fail just return nil (no error).\n\
2084 If not nil and not t, move to limit of search and return nil.\n\
2085 Optional fourth argument is repeat count--search for successive occurrences.\n\
2086 See also the functions `match-beginning', `match-end' and `replace-match'.")
2087 (regexp, bound, noerror, count)
2088 Lisp_Object regexp, bound, noerror, count;
2089 {
2090 return search_command (regexp, bound, noerror, count, -1, 1, 1);
2091 }
2092
2093 DEFUN ("posix-search-forward", Fposix_search_forward, Sposix_search_forward, 1, 4,
2094 "sPosix search: ",
2095 "Search forward from point for regular expression REGEXP.\n\
2096 Find the longest match in accord with Posix regular expression rules.\n\
2097 Set point to the end of the occurrence found, and return point.\n\
2098 An optional second argument bounds the search; it is a buffer position.\n\
2099 The match found must not extend after that position.\n\
2100 Optional third argument, if t, means if fail just return nil (no error).\n\
2101 If not nil and not t, move to limit of search and return nil.\n\
2102 Optional fourth argument is repeat count--search for successive occurrences.\n\
2103 See also the functions `match-beginning', `match-end' and `replace-match'.")
2104 (regexp, bound, noerror, count)
2105 Lisp_Object regexp, bound, noerror, count;
2106 {
2107 return search_command (regexp, bound, noerror, count, 1, 1, 1);
2108 }
2109 \f
2110 DEFUN ("replace-match", Freplace_match, Sreplace_match, 1, 5, 0,
2111 "Replace text matched by last search with NEWTEXT.\n\
2112 If second arg FIXEDCASE is non-nil, do not alter case of replacement text.\n\
2113 Otherwise maybe capitalize the whole text, or maybe just word initials,\n\
2114 based on the replaced text.\n\
2115 If the replaced text has only capital letters\n\
2116 and has at least one multiletter word, convert NEWTEXT to all caps.\n\
2117 If the replaced text has at least one word starting with a capital letter,\n\
2118 then capitalize each word in NEWTEXT.\n\n\
2119 If third arg LITERAL is non-nil, insert NEWTEXT literally.\n\
2120 Otherwise treat `\\' as special:\n\
2121 `\\&' in NEWTEXT means substitute original matched text.\n\
2122 `\\N' means substitute what matched the Nth `\\(...\\)'.\n\
2123 If Nth parens didn't match, substitute nothing.\n\
2124 `\\\\' means insert one `\\'.\n\
2125 FIXEDCASE and LITERAL are optional arguments.\n\
2126 Leaves point at end of replacement text.\n\
2127 \n\
2128 The optional fourth argument STRING can be a string to modify.\n\
2129 In that case, this function creates and returns a new string\n\
2130 which is made by replacing the part of STRING that was matched.\n\
2131 \n\
2132 The optional fifth argument SUBEXP specifies a subexpression of the match.\n\
2133 It says to replace just that subexpression instead of the whole match.\n\
2134 This is useful only after a regular expression search or match\n\
2135 since only regular expressions have distinguished subexpressions.")
2136 (newtext, fixedcase, literal, string, subexp)
2137 Lisp_Object newtext, fixedcase, literal, string, subexp;
2138 {
2139 enum { nochange, all_caps, cap_initial } case_action;
2140 register int pos, last;
2141 int some_multiletter_word;
2142 int some_lowercase;
2143 int some_uppercase;
2144 int some_nonuppercase_initial;
2145 register int c, prevc;
2146 int inslen;
2147 int sub;
2148 int opoint, newpoint;
2149
2150 CHECK_STRING (newtext, 0);
2151
2152 if (! NILP (string))
2153 CHECK_STRING (string, 4);
2154
2155 case_action = nochange; /* We tried an initialization */
2156 /* but some C compilers blew it */
2157
2158 if (search_regs.num_regs <= 0)
2159 error ("replace-match called before any match found");
2160
2161 if (NILP (subexp))
2162 sub = 0;
2163 else
2164 {
2165 CHECK_NUMBER (subexp, 3);
2166 sub = XINT (subexp);
2167 if (sub < 0 || sub >= search_regs.num_regs)
2168 args_out_of_range (subexp, make_number (search_regs.num_regs));
2169 }
2170
2171 if (NILP (string))
2172 {
2173 if (search_regs.start[sub] < BEGV
2174 || search_regs.start[sub] > search_regs.end[sub]
2175 || search_regs.end[sub] > ZV)
2176 args_out_of_range (make_number (search_regs.start[sub]),
2177 make_number (search_regs.end[sub]));
2178 }
2179 else
2180 {
2181 if (search_regs.start[sub] < 0
2182 || search_regs.start[sub] > search_regs.end[sub]
2183 || search_regs.end[sub] > XSTRING (string)->size)
2184 args_out_of_range (make_number (search_regs.start[sub]),
2185 make_number (search_regs.end[sub]));
2186 }
2187
2188 if (NILP (fixedcase))
2189 {
2190 int beg;
2191 /* Decide how to casify by examining the matched text. */
2192
2193 if (NILP (string))
2194 last = CHAR_TO_BYTE (search_regs.end[sub]);
2195 else
2196 last = search_regs.end[sub];
2197
2198 if (NILP (string))
2199 beg = CHAR_TO_BYTE (search_regs.start[sub]);
2200 else
2201 beg = search_regs.start[sub];
2202
2203 prevc = '\n';
2204 case_action = all_caps;
2205
2206 /* some_multiletter_word is set nonzero if any original word
2207 is more than one letter long. */
2208 some_multiletter_word = 0;
2209 some_lowercase = 0;
2210 some_nonuppercase_initial = 0;
2211 some_uppercase = 0;
2212
2213 for (pos = beg; pos < last; pos++)
2214 {
2215 if (NILP (string))
2216 c = FETCH_BYTE (pos);
2217 else
2218 c = XSTRING (string)->data[pos];
2219
2220 if (LOWERCASEP (c))
2221 {
2222 /* Cannot be all caps if any original char is lower case */
2223
2224 some_lowercase = 1;
2225 if (SYNTAX (prevc) != Sword)
2226 some_nonuppercase_initial = 1;
2227 else
2228 some_multiletter_word = 1;
2229 }
2230 else if (!NOCASEP (c))
2231 {
2232 some_uppercase = 1;
2233 if (SYNTAX (prevc) != Sword)
2234 ;
2235 else
2236 some_multiletter_word = 1;
2237 }
2238 else
2239 {
2240 /* If the initial is a caseless word constituent,
2241 treat that like a lowercase initial. */
2242 if (SYNTAX (prevc) != Sword)
2243 some_nonuppercase_initial = 1;
2244 }
2245
2246 prevc = c;
2247 }
2248
2249 /* Convert to all caps if the old text is all caps
2250 and has at least one multiletter word. */
2251 if (! some_lowercase && some_multiletter_word)
2252 case_action = all_caps;
2253 /* Capitalize each word, if the old text has all capitalized words. */
2254 else if (!some_nonuppercase_initial && some_multiletter_word)
2255 case_action = cap_initial;
2256 else if (!some_nonuppercase_initial && some_uppercase)
2257 /* Should x -> yz, operating on X, give Yz or YZ?
2258 We'll assume the latter. */
2259 case_action = all_caps;
2260 else
2261 case_action = nochange;
2262 }
2263
2264 /* Do replacement in a string. */
2265 if (!NILP (string))
2266 {
2267 Lisp_Object before, after;
2268
2269 before = Fsubstring (string, make_number (0),
2270 make_number (search_regs.start[sub]));
2271 after = Fsubstring (string, make_number (search_regs.end[sub]), Qnil);
2272
2273 /* Substitute parts of the match into NEWTEXT
2274 if desired. */
2275 if (NILP (literal))
2276 {
2277 int lastpos = -1;
2278 int lastpos_byte = -1;
2279 /* We build up the substituted string in ACCUM. */
2280 Lisp_Object accum;
2281 Lisp_Object middle;
2282 int pos_byte;
2283
2284 accum = Qnil;
2285
2286 for (pos_byte = 0, pos = 0; pos_byte < STRING_BYTES (XSTRING (newtext));)
2287 {
2288 int substart = -1;
2289 int subend;
2290 int delbackslash = 0;
2291
2292 FETCH_STRING_CHAR_ADVANCE (c, newtext, pos, pos_byte);
2293
2294 if (c == '\\')
2295 {
2296 FETCH_STRING_CHAR_ADVANCE (c, newtext, pos, pos_byte);
2297 if (c == '&')
2298 {
2299 substart = search_regs.start[sub];
2300 subend = search_regs.end[sub];
2301 }
2302 else if (c >= '1' && c <= '9' && c <= search_regs.num_regs + '0')
2303 {
2304 if (search_regs.start[c - '0'] >= 0)
2305 {
2306 substart = search_regs.start[c - '0'];
2307 subend = search_regs.end[c - '0'];
2308 }
2309 }
2310 else if (c == '\\')
2311 delbackslash = 1;
2312 else
2313 error ("Invalid use of `\\' in replacement text");
2314 }
2315 if (substart >= 0)
2316 {
2317 if (pos - 1 != lastpos + 1)
2318 middle = substring_both (newtext, lastpos + 1,
2319 lastpos_byte + 1,
2320 pos - 1, pos_byte - 1);
2321 else
2322 middle = Qnil;
2323 accum = concat3 (accum, middle,
2324 Fsubstring (string,
2325 make_number (substart),
2326 make_number (subend)));
2327 lastpos = pos;
2328 lastpos_byte = pos_byte;
2329 }
2330 else if (delbackslash)
2331 {
2332 middle = substring_both (newtext, lastpos + 1,
2333 lastpos_byte + 1,
2334 pos, pos_byte);
2335
2336 accum = concat2 (accum, middle);
2337 lastpos = pos;
2338 lastpos_byte = pos_byte;
2339 }
2340 }
2341
2342 if (pos != lastpos + 1)
2343 middle = substring_both (newtext, lastpos + 1,
2344 lastpos_byte + 1,
2345 pos, pos_byte);
2346 else
2347 middle = Qnil;
2348
2349 newtext = concat2 (accum, middle);
2350 }
2351
2352 /* Do case substitution in NEWTEXT if desired. */
2353 if (case_action == all_caps)
2354 newtext = Fupcase (newtext);
2355 else if (case_action == cap_initial)
2356 newtext = Fupcase_initials (newtext);
2357
2358 return concat3 (before, newtext, after);
2359 }
2360
2361 /* Record point, the move (quietly) to the start of the match. */
2362 if (PT > search_regs.start[sub])
2363 opoint = PT - ZV;
2364 else
2365 opoint = PT;
2366
2367 TEMP_SET_PT (search_regs.start[sub]);
2368
2369 /* We insert the replacement text before the old text, and then
2370 delete the original text. This means that markers at the
2371 beginning or end of the original will float to the corresponding
2372 position in the replacement. */
2373 if (!NILP (literal))
2374 Finsert_and_inherit (1, &newtext);
2375 else
2376 {
2377 struct gcpro gcpro1;
2378 GCPRO1 (newtext);
2379
2380 for (pos = 0; pos < XSTRING (newtext)->size; pos++)
2381 {
2382 int offset = PT - search_regs.start[sub];
2383
2384 c = XSTRING (newtext)->data[pos];
2385 if (c == '\\')
2386 {
2387 c = XSTRING (newtext)->data[++pos];
2388 if (c == '&')
2389 Finsert_buffer_substring
2390 (Fcurrent_buffer (),
2391 make_number (search_regs.start[sub] + offset),
2392 make_number (search_regs.end[sub] + offset));
2393 else if (c >= '1' && c <= '9' && c <= search_regs.num_regs + '0')
2394 {
2395 if (search_regs.start[c - '0'] >= 1)
2396 Finsert_buffer_substring
2397 (Fcurrent_buffer (),
2398 make_number (search_regs.start[c - '0'] + offset),
2399 make_number (search_regs.end[c - '0'] + offset));
2400 }
2401 else if (c == '\\')
2402 insert_char (c);
2403 else
2404 error ("Invalid use of `\\' in replacement text");
2405 }
2406 else
2407 insert_char (c);
2408 }
2409 UNGCPRO;
2410 }
2411
2412 inslen = PT - (search_regs.start[sub]);
2413 del_range (search_regs.start[sub] + inslen, search_regs.end[sub] + inslen);
2414
2415 if (case_action == all_caps)
2416 Fupcase_region (make_number (PT - inslen), make_number (PT));
2417 else if (case_action == cap_initial)
2418 Fupcase_initials_region (make_number (PT - inslen), make_number (PT));
2419
2420 newpoint = PT;
2421
2422 /* Put point back where it was in the text. */
2423 if (opoint <= 0)
2424 TEMP_SET_PT (opoint + ZV);
2425 else
2426 TEMP_SET_PT (opoint);
2427
2428 /* Now move point "officially" to the start of the inserted replacement. */
2429 move_if_not_intangible (newpoint);
2430
2431 return Qnil;
2432 }
2433 \f
2434 static Lisp_Object
2435 match_limit (num, beginningp)
2436 Lisp_Object num;
2437 int beginningp;
2438 {
2439 register int n;
2440
2441 CHECK_NUMBER (num, 0);
2442 n = XINT (num);
2443 if (n < 0 || n >= search_regs.num_regs)
2444 args_out_of_range (num, make_number (search_regs.num_regs));
2445 if (search_regs.num_regs <= 0
2446 || search_regs.start[n] < 0)
2447 return Qnil;
2448 return (make_number ((beginningp) ? search_regs.start[n]
2449 : search_regs.end[n]));
2450 }
2451
2452 DEFUN ("match-beginning", Fmatch_beginning, Smatch_beginning, 1, 1, 0,
2453 "Return position of start of text matched by last search.\n\
2454 SUBEXP, a number, specifies which parenthesized expression in the last\n\
2455 regexp.\n\
2456 Value is nil if SUBEXPth pair didn't match, or there were less than\n\
2457 SUBEXP pairs.\n\
2458 Zero means the entire text matched by the whole regexp or whole string.")
2459 (subexp)
2460 Lisp_Object subexp;
2461 {
2462 return match_limit (subexp, 1);
2463 }
2464
2465 DEFUN ("match-end", Fmatch_end, Smatch_end, 1, 1, 0,
2466 "Return position of end of text matched by last search.\n\
2467 SUBEXP, a number, specifies which parenthesized expression in the last\n\
2468 regexp.\n\
2469 Value is nil if SUBEXPth pair didn't match, or there were less than\n\
2470 SUBEXP pairs.\n\
2471 Zero means the entire text matched by the whole regexp or whole string.")
2472 (subexp)
2473 Lisp_Object subexp;
2474 {
2475 return match_limit (subexp, 0);
2476 }
2477
2478 DEFUN ("match-data", Fmatch_data, Smatch_data, 0, 2, 0,
2479 "Return a list containing all info on what the last search matched.\n\
2480 Element 2N is `(match-beginning N)'; element 2N + 1 is `(match-end N)'.\n\
2481 All the elements are markers or nil (nil if the Nth pair didn't match)\n\
2482 if the last match was on a buffer; integers or nil if a string was matched.\n\
2483 Use `store-match-data' to reinstate the data in this list.\n\
2484 \n\
2485 If INTEGERS (the optional first argument) is non-nil, always use integers\n\
2486 \(rather than markers) to represent buffer positions.\n\
2487 If REUSE is a list, reuse it as part of the value. If REUSE is long enough\n\
2488 to hold all the values, and if INTEGERS is non-nil, no consing is done.")
2489 (integers, reuse)
2490 Lisp_Object integers, reuse;
2491 {
2492 Lisp_Object tail, prev;
2493 Lisp_Object *data;
2494 int i, len;
2495
2496 if (NILP (last_thing_searched))
2497 return Qnil;
2498
2499 data = (Lisp_Object *) alloca ((2 * search_regs.num_regs)
2500 * sizeof (Lisp_Object));
2501
2502 len = -1;
2503 for (i = 0; i < search_regs.num_regs; i++)
2504 {
2505 int start = search_regs.start[i];
2506 if (start >= 0)
2507 {
2508 if (EQ (last_thing_searched, Qt)
2509 || ! NILP (integers))
2510 {
2511 XSETFASTINT (data[2 * i], start);
2512 XSETFASTINT (data[2 * i + 1], search_regs.end[i]);
2513 }
2514 else if (BUFFERP (last_thing_searched))
2515 {
2516 data[2 * i] = Fmake_marker ();
2517 Fset_marker (data[2 * i],
2518 make_number (start),
2519 last_thing_searched);
2520 data[2 * i + 1] = Fmake_marker ();
2521 Fset_marker (data[2 * i + 1],
2522 make_number (search_regs.end[i]),
2523 last_thing_searched);
2524 }
2525 else
2526 /* last_thing_searched must always be Qt, a buffer, or Qnil. */
2527 abort ();
2528
2529 len = i;
2530 }
2531 else
2532 data[2 * i] = data [2 * i + 1] = Qnil;
2533 }
2534
2535 /* If REUSE is not usable, cons up the values and return them. */
2536 if (! CONSP (reuse))
2537 return Flist (2 * len + 2, data);
2538
2539 /* If REUSE is a list, store as many value elements as will fit
2540 into the elements of REUSE. */
2541 for (i = 0, tail = reuse; CONSP (tail);
2542 i++, tail = XCONS (tail)->cdr)
2543 {
2544 if (i < 2 * len + 2)
2545 XCONS (tail)->car = data[i];
2546 else
2547 XCONS (tail)->car = Qnil;
2548 prev = tail;
2549 }
2550
2551 /* If we couldn't fit all value elements into REUSE,
2552 cons up the rest of them and add them to the end of REUSE. */
2553 if (i < 2 * len + 2)
2554 XCONS (prev)->cdr = Flist (2 * len + 2 - i, data + i);
2555
2556 return reuse;
2557 }
2558
2559
2560 DEFUN ("set-match-data", Fset_match_data, Sset_match_data, 1, 1, 0,
2561 "Set internal data on last search match from elements of LIST.\n\
2562 LIST should have been created by calling `match-data' previously.")
2563 (list)
2564 register Lisp_Object list;
2565 {
2566 register int i;
2567 register Lisp_Object marker;
2568
2569 if (running_asynch_code)
2570 save_search_regs ();
2571
2572 if (!CONSP (list) && !NILP (list))
2573 list = wrong_type_argument (Qconsp, list);
2574
2575 /* Unless we find a marker with a buffer in LIST, assume that this
2576 match data came from a string. */
2577 last_thing_searched = Qt;
2578
2579 /* Allocate registers if they don't already exist. */
2580 {
2581 int length = XFASTINT (Flength (list)) / 2;
2582
2583 if (length > search_regs.num_regs)
2584 {
2585 if (search_regs.num_regs == 0)
2586 {
2587 search_regs.start
2588 = (regoff_t *) xmalloc (length * sizeof (regoff_t));
2589 search_regs.end
2590 = (regoff_t *) xmalloc (length * sizeof (regoff_t));
2591 }
2592 else
2593 {
2594 search_regs.start
2595 = (regoff_t *) xrealloc (search_regs.start,
2596 length * sizeof (regoff_t));
2597 search_regs.end
2598 = (regoff_t *) xrealloc (search_regs.end,
2599 length * sizeof (regoff_t));
2600 }
2601
2602 search_regs.num_regs = length;
2603 }
2604 }
2605
2606 for (i = 0; i < search_regs.num_regs; i++)
2607 {
2608 marker = Fcar (list);
2609 if (NILP (marker))
2610 {
2611 search_regs.start[i] = -1;
2612 list = Fcdr (list);
2613 }
2614 else
2615 {
2616 if (MARKERP (marker))
2617 {
2618 if (XMARKER (marker)->buffer == 0)
2619 XSETFASTINT (marker, 0);
2620 else
2621 XSETBUFFER (last_thing_searched, XMARKER (marker)->buffer);
2622 }
2623
2624 CHECK_NUMBER_COERCE_MARKER (marker, 0);
2625 search_regs.start[i] = XINT (marker);
2626 list = Fcdr (list);
2627
2628 marker = Fcar (list);
2629 if (MARKERP (marker) && XMARKER (marker)->buffer == 0)
2630 XSETFASTINT (marker, 0);
2631
2632 CHECK_NUMBER_COERCE_MARKER (marker, 0);
2633 search_regs.end[i] = XINT (marker);
2634 }
2635 list = Fcdr (list);
2636 }
2637
2638 return Qnil;
2639 }
2640
2641 /* If non-zero the match data have been saved in saved_search_regs
2642 during the execution of a sentinel or filter. */
2643 static int search_regs_saved;
2644 static struct re_registers saved_search_regs;
2645
2646 /* Called from Flooking_at, Fstring_match, search_buffer, Fstore_match_data
2647 if asynchronous code (filter or sentinel) is running. */
2648 static void
2649 save_search_regs ()
2650 {
2651 if (!search_regs_saved)
2652 {
2653 saved_search_regs.num_regs = search_regs.num_regs;
2654 saved_search_regs.start = search_regs.start;
2655 saved_search_regs.end = search_regs.end;
2656 search_regs.num_regs = 0;
2657 search_regs.start = 0;
2658 search_regs.end = 0;
2659
2660 search_regs_saved = 1;
2661 }
2662 }
2663
2664 /* Called upon exit from filters and sentinels. */
2665 void
2666 restore_match_data ()
2667 {
2668 if (search_regs_saved)
2669 {
2670 if (search_regs.num_regs > 0)
2671 {
2672 xfree (search_regs.start);
2673 xfree (search_regs.end);
2674 }
2675 search_regs.num_regs = saved_search_regs.num_regs;
2676 search_regs.start = saved_search_regs.start;
2677 search_regs.end = saved_search_regs.end;
2678
2679 search_regs_saved = 0;
2680 }
2681 }
2682
2683 /* Quote a string to inactivate reg-expr chars */
2684
2685 DEFUN ("regexp-quote", Fregexp_quote, Sregexp_quote, 1, 1, 0,
2686 "Return a regexp string which matches exactly STRING and nothing else.")
2687 (string)
2688 Lisp_Object string;
2689 {
2690 register unsigned char *in, *out, *end;
2691 register unsigned char *temp;
2692 int backslashes_added = 0;
2693
2694 CHECK_STRING (string, 0);
2695
2696 temp = (unsigned char *) alloca (STRING_BYTES (XSTRING (string)) * 2);
2697
2698 /* Now copy the data into the new string, inserting escapes. */
2699
2700 in = XSTRING (string)->data;
2701 end = in + STRING_BYTES (XSTRING (string));
2702 out = temp;
2703
2704 for (; in != end; in++)
2705 {
2706 if (*in == '[' || *in == ']'
2707 || *in == '*' || *in == '.' || *in == '\\'
2708 || *in == '?' || *in == '+'
2709 || *in == '^' || *in == '$')
2710 *out++ = '\\', backslashes_added++;
2711 *out++ = *in;
2712 }
2713
2714 return make_multibyte_string (temp,
2715 XSTRING (string)->size + backslashes_added,
2716 out - temp);
2717 }
2718 \f
2719 syms_of_search ()
2720 {
2721 register int i;
2722
2723 for (i = 0; i < REGEXP_CACHE_SIZE; ++i)
2724 {
2725 searchbufs[i].buf.allocated = 100;
2726 searchbufs[i].buf.buffer = (unsigned char *) malloc (100);
2727 searchbufs[i].buf.fastmap = searchbufs[i].fastmap;
2728 searchbufs[i].regexp = Qnil;
2729 staticpro (&searchbufs[i].regexp);
2730 searchbufs[i].next = (i == REGEXP_CACHE_SIZE-1 ? 0 : &searchbufs[i+1]);
2731 }
2732 searchbuf_head = &searchbufs[0];
2733
2734 Qsearch_failed = intern ("search-failed");
2735 staticpro (&Qsearch_failed);
2736 Qinvalid_regexp = intern ("invalid-regexp");
2737 staticpro (&Qinvalid_regexp);
2738
2739 Fput (Qsearch_failed, Qerror_conditions,
2740 Fcons (Qsearch_failed, Fcons (Qerror, Qnil)));
2741 Fput (Qsearch_failed, Qerror_message,
2742 build_string ("Search failed"));
2743
2744 Fput (Qinvalid_regexp, Qerror_conditions,
2745 Fcons (Qinvalid_regexp, Fcons (Qerror, Qnil)));
2746 Fput (Qinvalid_regexp, Qerror_message,
2747 build_string ("Invalid regexp"));
2748
2749 last_thing_searched = Qnil;
2750 staticpro (&last_thing_searched);
2751
2752 defsubr (&Slooking_at);
2753 defsubr (&Sposix_looking_at);
2754 defsubr (&Sstring_match);
2755 defsubr (&Sposix_string_match);
2756 defsubr (&Ssearch_forward);
2757 defsubr (&Ssearch_backward);
2758 defsubr (&Sword_search_forward);
2759 defsubr (&Sword_search_backward);
2760 defsubr (&Sre_search_forward);
2761 defsubr (&Sre_search_backward);
2762 defsubr (&Sposix_search_forward);
2763 defsubr (&Sposix_search_backward);
2764 defsubr (&Sreplace_match);
2765 defsubr (&Smatch_beginning);
2766 defsubr (&Smatch_end);
2767 defsubr (&Smatch_data);
2768 defsubr (&Sset_match_data);
2769 defsubr (&Sregexp_quote);
2770 }