*** empty log message ***
[bpt/emacs.git] / src / search.c
1 /* String search routines for GNU Emacs.
2 Copyright (C) 1985, 1986, 1987, 1992 Free Software Foundation, Inc.
3
4 This file is part of GNU Emacs.
5
6 GNU Emacs is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 1, or (at your option)
9 any later version.
10
11 GNU Emacs is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
15
16 You should have received a copy of the GNU General Public License
17 along with GNU Emacs; see the file COPYING. If not, write to
18 the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. */
19
20
21 #include "config.h"
22 #include "lisp.h"
23 #include "syntax.h"
24 #include "buffer.h"
25 #include "commands.h"
26
27 #include <sys/types.h>
28 #include "regex.h"
29
30 #define max(a, b) ((a) > (b) ? (a) : (b))
31 #define min(a, b) ((a) < (b) ? (a) : (b))
32
33 /* We compile regexps into this buffer and then use it for searching. */
34
35 struct re_pattern_buffer searchbuf;
36
37 char search_fastmap[0400];
38
39 /* Last regexp we compiled */
40
41 Lisp_Object last_regexp;
42
43 /* Every call to re_match, etc., must pass &search_regs as the regs
44 argument unless you can show it is unnecessary (i.e., if re_match
45 is certainly going to be called again before region-around-match
46 can be called).
47
48 Since the registers are now dynamically allocated, we need to make
49 sure not to refer to the Nth register before checking that it has
50 been allocated. */
51
52 static struct re_registers search_regs;
53
54 /* Nonzero if search_regs are indices in a string; 0 if in a buffer. */
55
56 static int search_regs_from_string;
57
58 /* error condition signalled when regexp compile_pattern fails */
59
60 Lisp_Object Qinvalid_regexp;
61
62 static void
63 matcher_overflow ()
64 {
65 error ("Stack overflow in regexp matcher");
66 }
67
68 #ifdef __STDC__
69 #define CONST const
70 #else
71 #define CONST
72 #endif
73
74 /* Compile a regexp and signal a Lisp error if anything goes wrong. */
75
76 compile_pattern (pattern, bufp, translate)
77 Lisp_Object pattern;
78 struct re_pattern_buffer *bufp;
79 char *translate;
80 {
81 CONST char *val;
82 Lisp_Object dummy;
83
84 if (EQ (pattern, last_regexp)
85 && translate == bufp->translate)
86 return;
87 last_regexp = Qnil;
88 bufp->translate = translate;
89 val = re_compile_pattern ((char *) XSTRING (pattern)->data,
90 XSTRING (pattern)->size,
91 bufp);
92 if (val)
93 {
94 dummy = build_string (val);
95 while (1)
96 Fsignal (Qinvalid_regexp, Fcons (dummy, Qnil));
97 }
98 last_regexp = pattern;
99 return;
100 }
101
102 /* Error condition used for failing searches */
103 Lisp_Object Qsearch_failed;
104
105 Lisp_Object
106 signal_failure (arg)
107 Lisp_Object arg;
108 {
109 Fsignal (Qsearch_failed, Fcons (arg, Qnil));
110 return Qnil;
111 }
112 \f
113 DEFUN ("looking-at", Flooking_at, Slooking_at, 1, 1, 0,
114 "Return t if text after point matches regular expression PAT.\n\
115 This function modifies the match data that `match-beginning',\n\
116 `match-end' and `match-data' access; save and restore the match\n\
117 data if you want to preserve them.")
118 (string)
119 Lisp_Object string;
120 {
121 Lisp_Object val;
122 unsigned char *p1, *p2;
123 int s1, s2;
124 register int i;
125
126 CHECK_STRING (string, 0);
127 compile_pattern (string, &searchbuf,
128 !NILP (current_buffer->case_fold_search) ? DOWNCASE_TABLE : 0);
129
130 immediate_quit = 1;
131 QUIT; /* Do a pending quit right away, to avoid paradoxical behavior */
132
133 /* Get pointers and sizes of the two strings
134 that make up the visible portion of the buffer. */
135
136 p1 = BEGV_ADDR;
137 s1 = GPT - BEGV;
138 p2 = GAP_END_ADDR;
139 s2 = ZV - GPT;
140 if (s1 < 0)
141 {
142 p2 = p1;
143 s2 = ZV - BEGV;
144 s1 = 0;
145 }
146 if (s2 < 0)
147 {
148 s1 = ZV - BEGV;
149 s2 = 0;
150 }
151
152 i = re_match_2 (&searchbuf, (char *) p1, s1, (char *) p2, s2,
153 point - BEGV, &search_regs,
154 ZV - BEGV);
155 if (i == -2)
156 matcher_overflow ();
157
158 val = (0 <= i ? Qt : Qnil);
159 for (i = 0; i < search_regs.num_regs; i++)
160 if (search_regs.start[i] >= 0)
161 {
162 search_regs.start[i] += BEGV;
163 search_regs.end[i] += BEGV;
164 }
165 search_regs_from_string = 0;
166 immediate_quit = 0;
167 return val;
168 }
169
170 DEFUN ("string-match", Fstring_match, Sstring_match, 2, 3, 0,
171 "Return index of start of first match for REGEXP in STRING, or nil.\n\
172 If third arg START is non-nil, start search at that index in STRING.\n\
173 For index of first char beyond the match, do (match-end 0).\n\
174 `match-end' and `match-beginning' also give indices of substrings\n\
175 matched by parenthesis constructs in the pattern.")
176 (regexp, string, start)
177 Lisp_Object regexp, string, start;
178 {
179 int val;
180 int s;
181
182 CHECK_STRING (regexp, 0);
183 CHECK_STRING (string, 1);
184
185 if (NILP (start))
186 s = 0;
187 else
188 {
189 int len = XSTRING (string)->size;
190
191 CHECK_NUMBER (start, 2);
192 s = XINT (start);
193 if (s < 0 && -s <= len)
194 s = len - s;
195 else if (0 > s || s > len)
196 args_out_of_range (string, start);
197 }
198
199 compile_pattern (regexp, &searchbuf,
200 !NILP (current_buffer->case_fold_search) ? DOWNCASE_TABLE : 0);
201 immediate_quit = 1;
202 val = re_search (&searchbuf, (char *) XSTRING (string)->data,
203 XSTRING (string)->size, s, XSTRING (string)->size - s,
204 &search_regs);
205 immediate_quit = 0;
206 search_regs_from_string = 1;
207 if (val == -2)
208 matcher_overflow ();
209 if (val < 0) return Qnil;
210 return make_number (val);
211 }
212 \f
213 /* Search for COUNT instances of the character TARGET, starting at START.
214 If COUNT is negative, search backwards.
215
216 If we find COUNT instances, set *SHORTAGE to zero, and return the
217 position of the COUNTth character.
218
219 If we don't find COUNT instances before reaching the end of the
220 buffer (or the beginning, if scanning backwards), set *SHORTAGE to
221 the number of TARGETs left unfound, and return the end of the
222 buffer we bumped up against. */
223
224 scan_buffer (target, start, count, shortage)
225 int *shortage, start;
226 register int count, target;
227 {
228 int limit = ((count > 0) ? ZV - 1 : BEGV);
229 int direction = ((count > 0) ? 1 : -1);
230
231 register unsigned char *cursor;
232 unsigned char *base;
233
234 register int ceiling;
235 register unsigned char *ceiling_addr;
236
237 if (shortage != 0)
238 *shortage = 0;
239
240 immediate_quit = 1;
241
242 if (count > 0)
243 while (start != limit + 1)
244 {
245 ceiling = BUFFER_CEILING_OF (start);
246 ceiling = min (limit, ceiling);
247 ceiling_addr = &FETCH_CHAR (ceiling) + 1;
248 base = (cursor = &FETCH_CHAR (start));
249 while (1)
250 {
251 while (*cursor != target && ++cursor != ceiling_addr)
252 ;
253 if (cursor != ceiling_addr)
254 {
255 if (--count == 0)
256 {
257 immediate_quit = 0;
258 return (start + cursor - base + 1);
259 }
260 else
261 if (++cursor == ceiling_addr)
262 break;
263 }
264 else
265 break;
266 }
267 start += cursor - base;
268 }
269 else
270 {
271 start--; /* first character we scan */
272 while (start > limit - 1)
273 { /* we WILL scan under start */
274 ceiling = BUFFER_FLOOR_OF (start);
275 ceiling = max (limit, ceiling);
276 ceiling_addr = &FETCH_CHAR (ceiling) - 1;
277 base = (cursor = &FETCH_CHAR (start));
278 cursor++;
279 while (1)
280 {
281 while (--cursor != ceiling_addr && *cursor != target)
282 ;
283 if (cursor != ceiling_addr)
284 {
285 if (++count == 0)
286 {
287 immediate_quit = 0;
288 return (start + cursor - base + 1);
289 }
290 }
291 else
292 break;
293 }
294 start += cursor - base;
295 }
296 }
297 immediate_quit = 0;
298 if (shortage != 0)
299 *shortage = count * direction;
300 return (start + ((direction == 1 ? 0 : 1)));
301 }
302
303 int
304 find_next_newline (from, cnt)
305 register int from, cnt;
306 {
307 return (scan_buffer ('\n', from, cnt, (int *) 0));
308 }
309 \f
310 DEFUN ("skip-chars-forward", Fskip_chars_forward, Sskip_chars_forward, 1, 2, 0,
311 "Move point forward, stopping before a char not in CHARS, or at position LIM.\n\
312 CHARS is like the inside of a `[...]' in a regular expression\n\
313 except that `]' is never special and `\\' quotes `^', `-' or `\\'.\n\
314 Thus, with arg \"a-zA-Z\", this skips letters stopping before first nonletter.\n\
315 With arg \"^a-zA-Z\", skips nonletters stopping before first letter.")
316 (string, lim)
317 Lisp_Object string, lim;
318 {
319 skip_chars (1, string, lim);
320 return Qnil;
321 }
322
323 DEFUN ("skip-chars-backward", Fskip_chars_backward, Sskip_chars_backward, 1, 2, 0,
324 "Move point backward, stopping after a char not in CHARS, or at position LIM.\n\
325 See `skip-chars-forward' for details.")
326 (string, lim)
327 Lisp_Object string, lim;
328 {
329 skip_chars (0, string, lim);
330 return Qnil;
331 }
332
333 skip_chars (forwardp, string, lim)
334 int forwardp;
335 Lisp_Object string, lim;
336 {
337 register unsigned char *p, *pend;
338 register unsigned char c;
339 unsigned char fastmap[0400];
340 int negate = 0;
341 register int i;
342
343 CHECK_STRING (string, 0);
344
345 if (NILP (lim))
346 XSET (lim, Lisp_Int, forwardp ? ZV : BEGV);
347 else
348 CHECK_NUMBER_COERCE_MARKER (lim, 1);
349
350 #if 0 /* This breaks some things... jla. */
351 /* In any case, don't allow scan outside bounds of buffer. */
352 if (XFASTINT (lim) > ZV)
353 XFASTINT (lim) = ZV;
354 if (XFASTINT (lim) < BEGV)
355 XFASTINT (lim) = BEGV;
356 #endif
357
358 p = XSTRING (string)->data;
359 pend = p + XSTRING (string)->size;
360 bzero (fastmap, sizeof fastmap);
361
362 if (p != pend && *p == '^')
363 {
364 negate = 1; p++;
365 }
366
367 /* Find the characters specified and set their elements of fastmap. */
368
369 while (p != pend)
370 {
371 c = *p++;
372 if (c == '\\')
373 {
374 if (p == pend) break;
375 c = *p++;
376 }
377 if (p != pend && *p == '-')
378 {
379 p++;
380 if (p == pend) break;
381 while (c <= *p)
382 {
383 fastmap[c] = 1;
384 c++;
385 }
386 p++;
387 }
388 else
389 fastmap[c] = 1;
390 }
391
392 /* If ^ was the first character, complement the fastmap. */
393
394 if (negate)
395 for (i = 0; i < sizeof fastmap; i++)
396 fastmap[i] ^= 1;
397
398 immediate_quit = 1;
399 if (forwardp)
400 {
401 while (point < XINT (lim) && fastmap[FETCH_CHAR (point)])
402 SET_PT (point + 1);
403 }
404 else
405 {
406 while (point > XINT (lim) && fastmap[FETCH_CHAR (point - 1)])
407 SET_PT (point - 1);
408 }
409 immediate_quit = 0;
410 }
411 \f
412 /* Subroutines of Lisp buffer search functions. */
413
414 static Lisp_Object
415 search_command (string, bound, noerror, count, direction, RE)
416 Lisp_Object string, bound, noerror, count;
417 int direction;
418 int RE;
419 {
420 register int np;
421 int lim;
422 int n = direction;
423
424 if (!NILP (count))
425 {
426 CHECK_NUMBER (count, 3);
427 n *= XINT (count);
428 }
429
430 CHECK_STRING (string, 0);
431 if (NILP (bound))
432 lim = n > 0 ? ZV : BEGV;
433 else
434 {
435 CHECK_NUMBER_COERCE_MARKER (bound, 1);
436 lim = XINT (bound);
437 if (n > 0 ? lim < point : lim > point)
438 error ("Invalid search bound (wrong side of point)");
439 if (lim > ZV)
440 lim = ZV;
441 if (lim < BEGV)
442 lim = BEGV;
443 }
444
445 np = search_buffer (string, point, lim, n, RE,
446 (!NILP (current_buffer->case_fold_search)
447 ? XSTRING (current_buffer->case_canon_table)->data : 0),
448 (!NILP (current_buffer->case_fold_search)
449 ? XSTRING (current_buffer->case_eqv_table)->data : 0));
450 if (np <= 0)
451 {
452 if (NILP (noerror))
453 return signal_failure (string);
454 if (!EQ (noerror, Qt))
455 {
456 if (lim < BEGV || lim > ZV)
457 abort ();
458 SET_PT (lim);
459 }
460 return Qnil;
461 }
462
463 if (np < BEGV || np > ZV)
464 abort ();
465
466 SET_PT (np);
467
468 return make_number (np);
469 }
470 \f
471 /* search for the n'th occurrence of STRING in the current buffer,
472 starting at position POS and stopping at position LIM,
473 treating PAT as a literal string if RE is false or as
474 a regular expression if RE is true.
475
476 If N is positive, searching is forward and LIM must be greater than POS.
477 If N is negative, searching is backward and LIM must be less than POS.
478
479 Returns -x if only N-x occurrences found (x > 0),
480 or else the position at the beginning of the Nth occurrence
481 (if searching backward) or the end (if searching forward). */
482
483 search_buffer (string, pos, lim, n, RE, trt, inverse_trt)
484 Lisp_Object string;
485 int pos;
486 int lim;
487 int n;
488 int RE;
489 register unsigned char *trt;
490 register unsigned char *inverse_trt;
491 {
492 int len = XSTRING (string)->size;
493 unsigned char *base_pat = XSTRING (string)->data;
494 register int *BM_tab;
495 int *BM_tab_base;
496 register int direction = ((n > 0) ? 1 : -1);
497 register int dirlen;
498 int infinity, limit, k, stride_for_teases;
499 register unsigned char *pat, *cursor, *p_limit;
500 register int i, j;
501 unsigned char *p1, *p2;
502 int s1, s2;
503
504 /* Null string is found at starting position. */
505 if (!len)
506 return pos;
507
508 if (RE)
509 compile_pattern (string, &searchbuf, (char *) trt);
510
511 if (RE /* Here we detect whether the */
512 /* generality of an RE search is */
513 /* really needed. */
514 /* first item is "exact match" */
515 && *(searchbuf.buffer) == (char) RE_EXACTN_VALUE
516 && searchbuf.buffer[1] + 2 == searchbuf.used) /*first is ONLY item */
517 {
518 RE = 0; /* can do straight (non RE) search */
519 pat = (base_pat = (unsigned char *) searchbuf.buffer + 2);
520 /* trt already applied */
521 len = searchbuf.used - 2;
522 }
523 else if (!RE)
524 {
525 pat = (unsigned char *) alloca (len);
526
527 for (i = len; i--;) /* Copy the pattern; apply trt */
528 *pat++ = (((int) trt) ? trt [*base_pat++] : *base_pat++);
529 pat -= len; base_pat = pat;
530 }
531
532 if (RE)
533 {
534 immediate_quit = 1; /* Quit immediately if user types ^G,
535 because letting this function finish
536 can take too long. */
537 QUIT; /* Do a pending quit right away,
538 to avoid paradoxical behavior */
539 /* Get pointers and sizes of the two strings
540 that make up the visible portion of the buffer. */
541
542 p1 = BEGV_ADDR;
543 s1 = GPT - BEGV;
544 p2 = GAP_END_ADDR;
545 s2 = ZV - GPT;
546 if (s1 < 0)
547 {
548 p2 = p1;
549 s2 = ZV - BEGV;
550 s1 = 0;
551 }
552 if (s2 < 0)
553 {
554 s1 = ZV - BEGV;
555 s2 = 0;
556 }
557 while (n < 0)
558 {
559 int val = re_search_2 (&searchbuf, (char *) p1, s1, (char *) p2, s2,
560 pos - BEGV, lim - pos, &search_regs,
561 /* Don't allow match past current point */
562 pos - BEGV);
563 if (val == -2)
564 matcher_overflow ();
565 if (val >= 0)
566 {
567 j = BEGV;
568 for (i = 0; i < search_regs.num_regs; i++)
569 if (search_regs.start[i] >= 0)
570 {
571 search_regs.start[i] += j;
572 search_regs.end[i] += j;
573 }
574 search_regs_from_string = 0;
575 /* Set pos to the new position. */
576 pos = search_regs.start[0];
577 }
578 else
579 {
580 immediate_quit = 0;
581 return (n);
582 }
583 n++;
584 }
585 while (n > 0)
586 {
587 int val = re_search_2 (&searchbuf, (char *) p1, s1, (char *) p2, s2,
588 pos - BEGV, lim - pos, &search_regs,
589 lim - BEGV);
590 if (val == -2)
591 matcher_overflow ();
592 if (val >= 0)
593 {
594 j = BEGV;
595 for (i = 0; i < search_regs.num_regs; i++)
596 if (search_regs.start[i] >= 0)
597 {
598 search_regs.start[i] += j;
599 search_regs.end[i] += j;
600 }
601 search_regs_from_string = 0;
602 pos = search_regs.end[0];
603 }
604 else
605 {
606 immediate_quit = 0;
607 return (0 - n);
608 }
609 n--;
610 }
611 immediate_quit = 0;
612 return (pos);
613 }
614 else /* non-RE case */
615 {
616 #ifdef C_ALLOCA
617 int BM_tab_space[0400];
618 BM_tab = &BM_tab_space[0];
619 #else
620 BM_tab = (int *) alloca (0400 * sizeof (int));
621 #endif
622 /* The general approach is that we are going to maintain that we know */
623 /* the first (closest to the present position, in whatever direction */
624 /* we're searching) character that could possibly be the last */
625 /* (furthest from present position) character of a valid match. We */
626 /* advance the state of our knowledge by looking at that character */
627 /* and seeing whether it indeed matches the last character of the */
628 /* pattern. If it does, we take a closer look. If it does not, we */
629 /* move our pointer (to putative last characters) as far as is */
630 /* logically possible. This amount of movement, which I call a */
631 /* stride, will be the length of the pattern if the actual character */
632 /* appears nowhere in the pattern, otherwise it will be the distance */
633 /* from the last occurrence of that character to the end of the */
634 /* pattern. */
635 /* As a coding trick, an enormous stride is coded into the table for */
636 /* characters that match the last character. This allows use of only */
637 /* a single test, a test for having gone past the end of the */
638 /* permissible match region, to test for both possible matches (when */
639 /* the stride goes past the end immediately) and failure to */
640 /* match (where you get nudged past the end one stride at a time). */
641
642 /* Here we make a "mickey mouse" BM table. The stride of the search */
643 /* is determined only by the last character of the putative match. */
644 /* If that character does not match, we will stride the proper */
645 /* distance to propose a match that superimposes it on the last */
646 /* instance of a character that matches it (per trt), or misses */
647 /* it entirely if there is none. */
648
649 dirlen = len * direction;
650 infinity = dirlen - (lim + pos + len + len) * direction;
651 if (direction < 0)
652 pat = (base_pat += len - 1);
653 BM_tab_base = BM_tab;
654 BM_tab += 0400;
655 j = dirlen; /* to get it in a register */
656 /* A character that does not appear in the pattern induces a */
657 /* stride equal to the pattern length. */
658 while (BM_tab_base != BM_tab)
659 {
660 *--BM_tab = j;
661 *--BM_tab = j;
662 *--BM_tab = j;
663 *--BM_tab = j;
664 }
665 i = 0;
666 while (i != infinity)
667 {
668 j = pat[i]; i += direction;
669 if (i == dirlen) i = infinity;
670 if ((int) trt)
671 {
672 k = (j = trt[j]);
673 if (i == infinity)
674 stride_for_teases = BM_tab[j];
675 BM_tab[j] = dirlen - i;
676 /* A translation table is accompanied by its inverse -- see */
677 /* comment following downcase_table for details */
678 while ((j = inverse_trt[j]) != k)
679 BM_tab[j] = dirlen - i;
680 }
681 else
682 {
683 if (i == infinity)
684 stride_for_teases = BM_tab[j];
685 BM_tab[j] = dirlen - i;
686 }
687 /* stride_for_teases tells how much to stride if we get a */
688 /* match on the far character but are subsequently */
689 /* disappointed, by recording what the stride would have been */
690 /* for that character if the last character had been */
691 /* different. */
692 }
693 infinity = dirlen - infinity;
694 pos += dirlen - ((direction > 0) ? direction : 0);
695 /* loop invariant - pos points at where last char (first char if reverse)
696 of pattern would align in a possible match. */
697 while (n != 0)
698 {
699 if ((lim - pos - (direction > 0)) * direction < 0)
700 return (n * (0 - direction));
701 /* First we do the part we can by pointers (maybe nothing) */
702 QUIT;
703 pat = base_pat;
704 limit = pos - dirlen + direction;
705 limit = ((direction > 0)
706 ? BUFFER_CEILING_OF (limit)
707 : BUFFER_FLOOR_OF (limit));
708 /* LIMIT is now the last (not beyond-last!) value
709 POS can take on without hitting edge of buffer or the gap. */
710 limit = ((direction > 0)
711 ? min (lim - 1, min (limit, pos + 20000))
712 : max (lim, max (limit, pos - 20000)));
713 if ((limit - pos) * direction > 20)
714 {
715 p_limit = &FETCH_CHAR (limit);
716 p2 = (cursor = &FETCH_CHAR (pos));
717 /* In this loop, pos + cursor - p2 is the surrogate for pos */
718 while (1) /* use one cursor setting as long as i can */
719 {
720 if (direction > 0) /* worth duplicating */
721 {
722 /* Use signed comparison if appropriate
723 to make cursor+infinity sure to be > p_limit.
724 Assuming that the buffer lies in a range of addresses
725 that are all "positive" (as ints) or all "negative",
726 either kind of comparison will work as long
727 as we don't step by infinity. So pick the kind
728 that works when we do step by infinity. */
729 if ((int) (p_limit + infinity) > (int) p_limit)
730 while ((int) cursor <= (int) p_limit)
731 cursor += BM_tab[*cursor];
732 else
733 while ((unsigned int) cursor <= (unsigned int) p_limit)
734 cursor += BM_tab[*cursor];
735 }
736 else
737 {
738 if ((int) (p_limit + infinity) < (int) p_limit)
739 while ((int) cursor >= (int) p_limit)
740 cursor += BM_tab[*cursor];
741 else
742 while ((unsigned int) cursor >= (unsigned int) p_limit)
743 cursor += BM_tab[*cursor];
744 }
745 /* If you are here, cursor is beyond the end of the searched region. */
746 /* This can happen if you match on the far character of the pattern, */
747 /* because the "stride" of that character is infinity, a number able */
748 /* to throw you well beyond the end of the search. It can also */
749 /* happen if you fail to match within the permitted region and would */
750 /* otherwise try a character beyond that region */
751 if ((cursor - p_limit) * direction <= len)
752 break; /* a small overrun is genuine */
753 cursor -= infinity; /* large overrun = hit */
754 i = dirlen - direction;
755 if ((int) trt)
756 {
757 while ((i -= direction) + direction != 0)
758 if (pat[i] != trt[*(cursor -= direction)])
759 break;
760 }
761 else
762 {
763 while ((i -= direction) + direction != 0)
764 if (pat[i] != *(cursor -= direction))
765 break;
766 }
767 cursor += dirlen - i - direction; /* fix cursor */
768 if (i + direction == 0)
769 {
770 cursor -= direction;
771 search_regs.start[0]
772 = pos + cursor - p2 + ((direction > 0)
773 ? 1 - len : 0);
774 search_regs.end[0] = len + search_regs.start[0];
775 search_regs_from_string = 0;
776 if ((n -= direction) != 0)
777 cursor += dirlen; /* to resume search */
778 else
779 return ((direction > 0)
780 ? search_regs.end[0] : search_regs.start[0]);
781 }
782 else
783 cursor += stride_for_teases; /* <sigh> we lose - */
784 }
785 pos += cursor - p2;
786 }
787 else
788 /* Now we'll pick up a clump that has to be done the hard */
789 /* way because it covers a discontinuity */
790 {
791 limit = ((direction > 0)
792 ? BUFFER_CEILING_OF (pos - dirlen + 1)
793 : BUFFER_FLOOR_OF (pos - dirlen - 1));
794 limit = ((direction > 0)
795 ? min (limit + len, lim - 1)
796 : max (limit - len, lim));
797 /* LIMIT is now the last value POS can have
798 and still be valid for a possible match. */
799 while (1)
800 {
801 /* This loop can be coded for space rather than */
802 /* speed because it will usually run only once. */
803 /* (the reach is at most len + 21, and typically */
804 /* does not exceed len) */
805 while ((limit - pos) * direction >= 0)
806 pos += BM_tab[FETCH_CHAR(pos)];
807 /* now run the same tests to distinguish going off the */
808 /* end, a match or a phoney match. */
809 if ((pos - limit) * direction <= len)
810 break; /* ran off the end */
811 /* Found what might be a match.
812 Set POS back to last (first if reverse) char pos. */
813 pos -= infinity;
814 i = dirlen - direction;
815 while ((i -= direction) + direction != 0)
816 {
817 pos -= direction;
818 if (pat[i] != (((int) trt)
819 ? trt[FETCH_CHAR(pos)]
820 : FETCH_CHAR (pos)))
821 break;
822 }
823 /* Above loop has moved POS part or all the way
824 back to the first char pos (last char pos if reverse).
825 Set it once again at the last (first if reverse) char. */
826 pos += dirlen - i- direction;
827 if (i + direction == 0)
828 {
829 pos -= direction;
830 search_regs.start[0]
831 = pos + ((direction > 0) ? 1 - len : 0);
832 search_regs.end[0] = len + search_regs.start[0];
833 search_regs_from_string = 0;
834 if ((n -= direction) != 0)
835 pos += dirlen; /* to resume search */
836 else
837 return ((direction > 0)
838 ? search_regs.end[0] : search_regs.start[0]);
839 }
840 else
841 pos += stride_for_teases;
842 }
843 }
844 /* We have done one clump. Can we continue? */
845 if ((lim - pos) * direction < 0)
846 return ((0 - n) * direction);
847 }
848 return pos;
849 }
850 }
851 \f
852 /* Given a string of words separated by word delimiters,
853 compute a regexp that matches those exact words
854 separated by arbitrary punctuation. */
855
856 static Lisp_Object
857 wordify (string)
858 Lisp_Object string;
859 {
860 register unsigned char *p, *o;
861 register int i, len, punct_count = 0, word_count = 0;
862 Lisp_Object val;
863
864 CHECK_STRING (string, 0);
865 p = XSTRING (string)->data;
866 len = XSTRING (string)->size;
867
868 for (i = 0; i < len; i++)
869 if (SYNTAX (p[i]) != Sword)
870 {
871 punct_count++;
872 if (i > 0 && SYNTAX (p[i-1]) == Sword) word_count++;
873 }
874 if (SYNTAX (p[len-1]) == Sword) word_count++;
875 if (!word_count) return build_string ("");
876
877 val = make_string (p, len - punct_count + 5 * (word_count - 1) + 4);
878
879 o = XSTRING (val)->data;
880 *o++ = '\\';
881 *o++ = 'b';
882
883 for (i = 0; i < len; i++)
884 if (SYNTAX (p[i]) == Sword)
885 *o++ = p[i];
886 else if (i > 0 && SYNTAX (p[i-1]) == Sword && --word_count)
887 {
888 *o++ = '\\';
889 *o++ = 'W';
890 *o++ = '\\';
891 *o++ = 'W';
892 *o++ = '*';
893 }
894
895 *o++ = '\\';
896 *o++ = 'b';
897
898 return val;
899 }
900 \f
901 DEFUN ("search-backward", Fsearch_backward, Ssearch_backward, 1, 4,
902 "sSearch backward: ",
903 "Search backward from point for STRING.\n\
904 Set point to the beginning of the occurrence found, and return point.\n\
905 An optional second argument bounds the search; it is a buffer position.\n\
906 The match found must not extend before that position.\n\
907 Optional third argument, if t, means if fail just return nil (no error).\n\
908 If not nil and not t, position at limit of search and return nil.\n\
909 Optional fourth argument is repeat count--search for successive occurrences.\n\
910 See also the functions `match-beginning', `match-end' and `replace-match'.")
911 (string, bound, noerror, count)
912 Lisp_Object string, bound, noerror, count;
913 {
914 return search_command (string, bound, noerror, count, -1, 0);
915 }
916
917 DEFUN ("search-forward", Fsearch_forward, Ssearch_forward, 1, 4, "sSearch: ",
918 "Search forward from point for STRING.\n\
919 Set point to the end of the occurrence found, and return point.\n\
920 An optional second argument bounds the search; it is a buffer position.\n\
921 The match found must not extend after that position. nil is equivalent\n\
922 to (point-max).\n\
923 Optional third argument, if t, means if fail just return nil (no error).\n\
924 If not nil and not t, move to limit of search and return nil.\n\
925 Optional fourth argument is repeat count--search for successive occurrences.\n\
926 See also the functions `match-beginning', `match-end' and `replace-match'.")
927 (string, bound, noerror, count)
928 Lisp_Object string, bound, noerror, count;
929 {
930 return search_command (string, bound, noerror, count, 1, 0);
931 }
932
933 DEFUN ("word-search-backward", Fword_search_backward, Sword_search_backward, 1, 4,
934 "sWord search backward: ",
935 "Search backward from point for STRING, ignoring differences in punctuation.\n\
936 Set point to the beginning of the occurrence found, and return point.\n\
937 An optional second argument bounds the search; it is a buffer position.\n\
938 The match found must not extend before that position.\n\
939 Optional third argument, if t, means if fail just return nil (no error).\n\
940 If not nil and not t, move to limit of search and return nil.\n\
941 Optional fourth argument is repeat count--search for successive occurrences.")
942 (string, bound, noerror, count)
943 Lisp_Object string, bound, noerror, count;
944 {
945 return search_command (wordify (string), bound, noerror, count, -1, 1);
946 }
947
948 DEFUN ("word-search-forward", Fword_search_forward, Sword_search_forward, 1, 4,
949 "sWord search: ",
950 "Search forward from point for STRING, ignoring differences in punctuation.\n\
951 Set point to the end of the occurrence found, and return point.\n\
952 An optional second argument bounds the search; it is a buffer position.\n\
953 The match found must not extend after that position.\n\
954 Optional third argument, if t, means if fail just return nil (no error).\n\
955 If not nil and not t, move to limit of search and return nil.\n\
956 Optional fourth argument is repeat count--search for successive occurrences.")
957 (string, bound, noerror, count)
958 Lisp_Object string, bound, noerror, count;
959 {
960 return search_command (wordify (string), bound, noerror, count, 1, 1);
961 }
962
963 DEFUN ("re-search-backward", Fre_search_backward, Sre_search_backward, 1, 4,
964 "sRE search backward: ",
965 "Search backward from point for match for regular expression REGEXP.\n\
966 Set point to the beginning of the match, and return point.\n\
967 The match found is the one starting last in the buffer\n\
968 and yet ending before the place the origin of the search.\n\
969 An optional second argument bounds the search; it is a buffer position.\n\
970 The match found must start at or after that position.\n\
971 Optional third argument, if t, means if fail just return nil (no error).\n\
972 If not nil and not t, move to limit of search and return nil.\n\
973 Optional fourth argument is repeat count--search for successive occurrences.\n\
974 See also the functions `match-beginning', `match-end' and `replace-match'.")
975 (string, bound, noerror, count)
976 Lisp_Object string, bound, noerror, count;
977 {
978 return search_command (string, bound, noerror, count, -1, 1);
979 }
980
981 DEFUN ("re-search-forward", Fre_search_forward, Sre_search_forward, 1, 4,
982 "sRE search: ",
983 "Search forward from point for regular expression REGEXP.\n\
984 Set point to the end of the occurrence found, and return point.\n\
985 An optional second argument bounds the search; it is a buffer position.\n\
986 The match found must not extend after that position.\n\
987 Optional third argument, if t, means if fail just return nil (no error).\n\
988 If not nil and not t, move to limit of search and return nil.\n\
989 Optional fourth argument is repeat count--search for successive occurrences.\n\
990 See also the functions `match-beginning', `match-end' and `replace-match'.")
991 (string, bound, noerror, count)
992 Lisp_Object string, bound, noerror, count;
993 {
994 return search_command (string, bound, noerror, count, 1, 1);
995 }
996 \f
997 DEFUN ("replace-match", Freplace_match, Sreplace_match, 1, 3, 0,
998 "Replace text matched by last search with NEWTEXT.\n\
999 If second arg FIXEDCASE is non-nil, do not alter case of replacement text.\n\
1000 Otherwise convert to all caps or cap initials, like replaced text.\n\
1001 If third arg LITERAL is non-nil, insert NEWTEXT literally.\n\
1002 Otherwise treat `\\' as special:\n\
1003 `\\&' in NEWTEXT means substitute original matched text.\n\
1004 `\\N' means substitute what matched the Nth `\\(...\\)'.\n\
1005 If Nth parens didn't match, substitute nothing.\n\
1006 `\\\\' means insert one `\\'.\n\
1007 Leaves point at end of replacement text.")
1008 (string, fixedcase, literal)
1009 Lisp_Object string, fixedcase, literal;
1010 {
1011 enum { nochange, all_caps, cap_initial } case_action;
1012 register int pos, last;
1013 int some_multiletter_word;
1014 int some_letter = 0;
1015 register int c, prevc;
1016 int inslen;
1017
1018 CHECK_STRING (string, 0);
1019
1020 case_action = nochange; /* We tried an initialization */
1021 /* but some C compilers blew it */
1022
1023 if (search_regs.num_regs <= 0)
1024 error ("replace-match called before any match found");
1025
1026 if (search_regs.start[0] < BEGV
1027 || search_regs.start[0] > search_regs.end[0]
1028 || search_regs.end[0] > ZV)
1029 args_out_of_range(make_number (search_regs.start[0]),
1030 make_number (search_regs.end[0]));
1031
1032 if (NILP (fixedcase))
1033 {
1034 /* Decide how to casify by examining the matched text. */
1035
1036 last = search_regs.end[0];
1037 prevc = '\n';
1038 case_action = all_caps;
1039
1040 /* some_multiletter_word is set nonzero if any original word
1041 is more than one letter long. */
1042 some_multiletter_word = 0;
1043
1044 for (pos = search_regs.start[0]; pos < last; pos++)
1045 {
1046 c = FETCH_CHAR (pos);
1047 if (LOWERCASEP (c))
1048 {
1049 /* Cannot be all caps if any original char is lower case */
1050
1051 case_action = cap_initial;
1052 if (SYNTAX (prevc) != Sword)
1053 {
1054 /* Cannot even be cap initials
1055 if some original initial is lower case */
1056 case_action = nochange;
1057 break;
1058 }
1059 else
1060 some_multiletter_word = 1;
1061 }
1062 else if (!NOCASEP (c))
1063 {
1064 some_letter = 1;
1065 if (!some_multiletter_word && SYNTAX (prevc) == Sword)
1066 some_multiletter_word = 1;
1067 }
1068
1069 prevc = c;
1070 }
1071
1072 /* Do not make new text all caps
1073 if the original text contained only single letter words. */
1074 if (case_action == all_caps && !some_multiletter_word)
1075 case_action = cap_initial;
1076
1077 if (!some_letter) case_action = nochange;
1078 }
1079
1080 SET_PT (search_regs.end[0]);
1081 if (!NILP (literal))
1082 Finsert (1, &string);
1083 else
1084 {
1085 struct gcpro gcpro1;
1086 GCPRO1 (string);
1087
1088 for (pos = 0; pos < XSTRING (string)->size; pos++)
1089 {
1090 c = XSTRING (string)->data[pos];
1091 if (c == '\\')
1092 {
1093 c = XSTRING (string)->data[++pos];
1094 if (c == '&')
1095 Finsert_buffer_substring (Fcurrent_buffer (),
1096 make_number (search_regs.start[0]),
1097 make_number (search_regs.end[0]));
1098 else if (c >= '1' && c <= search_regs.num_regs + '0')
1099 {
1100 if (search_regs.start[c - '0'] >= 1)
1101 Finsert_buffer_substring (Fcurrent_buffer (),
1102 make_number (search_regs.start[c - '0']),
1103 make_number (search_regs.end[c - '0']));
1104 }
1105 else
1106 insert_char (c);
1107 }
1108 else
1109 insert_char (c);
1110 }
1111 UNGCPRO;
1112 }
1113
1114 inslen = point - (search_regs.end[0]);
1115 del_range (search_regs.start[0], search_regs.end[0]);
1116
1117 if (case_action == all_caps)
1118 Fupcase_region (make_number (point - inslen), make_number (point));
1119 else if (case_action == cap_initial)
1120 upcase_initials_region (make_number (point - inslen), make_number (point));
1121 return Qnil;
1122 }
1123 \f
1124 static Lisp_Object
1125 match_limit (num, beginningp)
1126 Lisp_Object num;
1127 int beginningp;
1128 {
1129 register int n;
1130
1131 CHECK_NUMBER (num, 0);
1132 n = XINT (num);
1133 if (n < 0 || n >= search_regs.num_regs)
1134 args_out_of_range (num, make_number (search_regs.num_regs));
1135 if (search_regs.num_regs <= 0
1136 || search_regs.start[n] < 0)
1137 return Qnil;
1138 return (make_number ((beginningp) ? search_regs.start[n]
1139 : search_regs.end[n]));
1140 }
1141
1142 DEFUN ("match-beginning", Fmatch_beginning, Smatch_beginning, 1, 1, 0,
1143 "Return position of start of text matched by last search.\n\
1144 ARG, a number, specifies which parenthesized expression in the last regexp.\n\
1145 Value is nil if ARGth pair didn't match, or there were less than ARG pairs.\n\
1146 Zero means the entire text matched by the whole regexp or whole string.")
1147 (num)
1148 Lisp_Object num;
1149 {
1150 return match_limit (num, 1);
1151 }
1152
1153 DEFUN ("match-end", Fmatch_end, Smatch_end, 1, 1, 0,
1154 "Return position of end of text matched by last search.\n\
1155 ARG, a number, specifies which parenthesized expression in the last regexp.\n\
1156 Value is nil if ARGth pair didn't match, or there were less than ARG pairs.\n\
1157 Zero means the entire text matched by the whole regexp or whole string.")
1158 (num)
1159 Lisp_Object num;
1160 {
1161 return match_limit (num, 0);
1162 }
1163
1164 DEFUN ("match-data", Fmatch_data, Smatch_data, 0, 0, 0,
1165 "Return a list containing all info on what the last search matched.\n\
1166 Element 2N is `(match-beginning N)'; element 2N + 1 is `(match-end N)'.\n\
1167 All the elements are markers or nil (nil if the Nth pair didn't match)\n\
1168 if the last match was on a buffer; integers or nil if a string was matched.\n\
1169 Use `store-match-data' to reinstate the data in this list.")
1170 ()
1171 {
1172 Lisp_Object *data;
1173 int i, len;
1174
1175 data = (Lisp_Object *) alloca ((2 * search_regs.num_regs)
1176 * sizeof (Lisp_Object));
1177
1178 len = -1;
1179 for (i = 0; i < search_regs.num_regs; i++)
1180 {
1181 int start = search_regs.start[i];
1182 if (start >= 0)
1183 {
1184 if (search_regs_from_string)
1185 {
1186 XFASTINT (data[2 * i]) = start;
1187 XFASTINT (data[2 * i + 1]) = search_regs.end[i];
1188 }
1189 else
1190 {
1191 data[2 * i] = Fmake_marker ();
1192 Fset_marker (data[2 * i], make_number (start), Qnil);
1193 data[2 * i + 1] = Fmake_marker ();
1194 Fset_marker (data[2 * i + 1],
1195 make_number (search_regs.end[i]), Qnil);
1196 }
1197 len = i;
1198 }
1199 else
1200 data[2 * i] = data [2 * i + 1] = Qnil;
1201 }
1202 return Flist (2 * len + 2, data);
1203 }
1204
1205
1206 DEFUN ("store-match-data", Fstore_match_data, Sstore_match_data, 1, 1, 0,
1207 "Set internal data on last search match from elements of LIST.\n\
1208 LIST should have been created by calling `match-data' previously.")
1209 (list)
1210 register Lisp_Object list;
1211 {
1212 register int i;
1213 register Lisp_Object marker;
1214
1215 if (!CONSP (list) && !NILP (list))
1216 list = wrong_type_argument (Qconsp, list, 0);
1217
1218 /* Allocate registers if they don't already exist. */
1219 {
1220 int length = Flength (list) / 2;
1221
1222 if (length > search_regs.num_regs)
1223 {
1224 if (search_regs.start)
1225 search_regs.start =
1226 (regoff_t *) realloc (search_regs.start,
1227 length * sizeof (regoff_t));
1228 else
1229 search_regs.start = (regoff_t *) malloc (length * sizeof (regoff_t));
1230 if (search_regs.end)
1231 search_regs.end =
1232 (regoff_t *) realloc (search_regs.end,
1233 length * sizeof (regoff_t));
1234 else
1235 search_regs.end = (regoff_t *) malloc (length * sizeof (regoff_t));
1236
1237 search_regs.num_regs = length;
1238 }
1239 }
1240
1241 for (i = 0; i < search_regs.num_regs; i++)
1242 {
1243 marker = Fcar (list);
1244 if (NILP (marker))
1245 {
1246 search_regs.start[i] = -1;
1247 list = Fcdr (list);
1248 }
1249 else
1250 {
1251 if (XTYPE (marker) == Lisp_Marker
1252 && XMARKER (marker)->buffer == 0)
1253 XFASTINT (marker) = 0;
1254
1255 CHECK_NUMBER_COERCE_MARKER (marker, 0);
1256 search_regs.start[i] = XINT (marker);
1257 list = Fcdr (list);
1258
1259 marker = Fcar (list);
1260 if (XTYPE (marker) == Lisp_Marker
1261 && XMARKER (marker)->buffer == 0)
1262 XFASTINT (marker) = 0;
1263
1264 CHECK_NUMBER_COERCE_MARKER (marker, 0);
1265 search_regs.end[i] = XINT (marker);
1266 }
1267 list = Fcdr (list);
1268 }
1269
1270 return Qnil;
1271 }
1272
1273 /* Quote a string to inactivate reg-expr chars */
1274
1275 DEFUN ("regexp-quote", Fregexp_quote, Sregexp_quote, 1, 1, 0,
1276 "Return a regexp string which matches exactly STRING and nothing else.")
1277 (str)
1278 Lisp_Object str;
1279 {
1280 register unsigned char *in, *out, *end;
1281 register unsigned char *temp;
1282
1283 CHECK_STRING (str, 0);
1284
1285 temp = (unsigned char *) alloca (XSTRING (str)->size * 2);
1286
1287 /* Now copy the data into the new string, inserting escapes. */
1288
1289 in = XSTRING (str)->data;
1290 end = in + XSTRING (str)->size;
1291 out = temp;
1292
1293 for (; in != end; in++)
1294 {
1295 if (*in == '[' || *in == ']'
1296 || *in == '*' || *in == '.' || *in == '\\'
1297 || *in == '?' || *in == '+'
1298 || *in == '^' || *in == '$')
1299 *out++ = '\\';
1300 *out++ = *in;
1301 }
1302
1303 return make_string (temp, out - temp);
1304 }
1305 \f
1306 syms_of_search ()
1307 {
1308 register int i;
1309
1310 searchbuf.allocated = 100;
1311 searchbuf.buffer = (unsigned char *) malloc (searchbuf.allocated);
1312 searchbuf.fastmap = search_fastmap;
1313
1314 Qsearch_failed = intern ("search-failed");
1315 staticpro (&Qsearch_failed);
1316 Qinvalid_regexp = intern ("invalid-regexp");
1317 staticpro (&Qinvalid_regexp);
1318
1319 Fput (Qsearch_failed, Qerror_conditions,
1320 Fcons (Qsearch_failed, Fcons (Qerror, Qnil)));
1321 Fput (Qsearch_failed, Qerror_message,
1322 build_string ("Search failed"));
1323
1324 Fput (Qinvalid_regexp, Qerror_conditions,
1325 Fcons (Qinvalid_regexp, Fcons (Qerror, Qnil)));
1326 Fput (Qinvalid_regexp, Qerror_message,
1327 build_string ("Invalid regexp"));
1328
1329 last_regexp = Qnil;
1330 staticpro (&last_regexp);
1331
1332 defsubr (&Sstring_match);
1333 defsubr (&Slooking_at);
1334 defsubr (&Sskip_chars_forward);
1335 defsubr (&Sskip_chars_backward);
1336 defsubr (&Ssearch_forward);
1337 defsubr (&Ssearch_backward);
1338 defsubr (&Sword_search_forward);
1339 defsubr (&Sword_search_backward);
1340 defsubr (&Sre_search_forward);
1341 defsubr (&Sre_search_backward);
1342 defsubr (&Sreplace_match);
1343 defsubr (&Smatch_beginning);
1344 defsubr (&Smatch_end);
1345 defsubr (&Smatch_data);
1346 defsubr (&Sstore_match_data);
1347 defsubr (&Sregexp_quote);
1348 }