*** empty log message ***
[bpt/emacs.git] / src / search.c
CommitLineData
ca1d1d23
JB
1/* String search routines for GNU Emacs.
2 Copyright (C) 1985, 1986, 1987, 1992 Free Software Foundation, Inc.
3
4This file is part of GNU Emacs.
5
6GNU Emacs is free software; you can redistribute it and/or modify
7it under the terms of the GNU General Public License as published by
8the Free Software Foundation; either version 1, or (at your option)
9any later version.
10
11GNU Emacs is distributed in the hope that it will be useful,
12but WITHOUT ANY WARRANTY; without even the implied warranty of
13MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14GNU General Public License for more details.
15
16You should have received a copy of the GNU General Public License
17along with GNU Emacs; see the file COPYING. If not, write to
18the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. */
19
20
21#include "config.h"
22#include "lisp.h"
23#include "syntax.h"
24#include "buffer.h"
25#include "commands.h"
4746118a 26
ca1d1d23
JB
27#include <sys/types.h>
28#include "regex.h"
29
30#define max(a, b) ((a) > (b) ? (a) : (b))
31#define min(a, b) ((a) < (b) ? (a) : (b))
32
33/* We compile regexps into this buffer and then use it for searching. */
34
35struct re_pattern_buffer searchbuf;
36
37char search_fastmap[0400];
38
39/* Last regexp we compiled */
40
41Lisp_Object last_regexp;
42
4746118a
JB
43/* Every call to re_match, etc., must pass &search_regs as the regs
44 argument unless you can show it is unnecessary (i.e., if re_match
45 is certainly going to be called again before region-around-match
46 can be called).
47
48 Since the registers are now dynamically allocated, we need to make
49 sure not to refer to the Nth register before checking that it has
50 been allocated. */
ca1d1d23
JB
51
52static struct re_registers search_regs;
53
54/* Nonzero if search_regs are indices in a string; 0 if in a buffer. */
55
56static int search_regs_from_string;
57
58/* error condition signalled when regexp compile_pattern fails */
59
60Lisp_Object Qinvalid_regexp;
61
62static void
63matcher_overflow ()
64{
65 error ("Stack overflow in regexp matcher");
66}
67
68#ifdef __STDC__
69#define CONST const
70#else
71#define CONST
72#endif
73
74/* Compile a regexp and signal a Lisp error if anything goes wrong. */
75
76compile_pattern (pattern, bufp, translate)
77 Lisp_Object pattern;
78 struct re_pattern_buffer *bufp;
79 char *translate;
80{
81 CONST char *val;
82 Lisp_Object dummy;
83
84 if (EQ (pattern, last_regexp)
85 && translate == bufp->translate)
86 return;
87 last_regexp = Qnil;
88 bufp->translate = translate;
89 val = re_compile_pattern ((char *) XSTRING (pattern)->data,
90 XSTRING (pattern)->size,
91 bufp);
92 if (val)
93 {
94 dummy = build_string (val);
95 while (1)
96 Fsignal (Qinvalid_regexp, Fcons (dummy, Qnil));
97 }
98 last_regexp = pattern;
99 return;
100}
101
102/* Error condition used for failing searches */
103Lisp_Object Qsearch_failed;
104
105Lisp_Object
106signal_failure (arg)
107 Lisp_Object arg;
108{
109 Fsignal (Qsearch_failed, Fcons (arg, Qnil));
110 return Qnil;
111}
112\f
113DEFUN ("looking-at", Flooking_at, Slooking_at, 1, 1, 0,
114 "Return t if text after point matches regular expression PAT.")
115 (string)
116 Lisp_Object string;
117{
118 Lisp_Object val;
119 unsigned char *p1, *p2;
120 int s1, s2;
121 register int i;
122
123 CHECK_STRING (string, 0);
124 compile_pattern (string, &searchbuf,
125 !NILP (current_buffer->case_fold_search) ? DOWNCASE_TABLE : 0);
126
127 immediate_quit = 1;
128 QUIT; /* Do a pending quit right away, to avoid paradoxical behavior */
129
130 /* Get pointers and sizes of the two strings
131 that make up the visible portion of the buffer. */
132
133 p1 = BEGV_ADDR;
134 s1 = GPT - BEGV;
135 p2 = GAP_END_ADDR;
136 s2 = ZV - GPT;
137 if (s1 < 0)
138 {
139 p2 = p1;
140 s2 = ZV - BEGV;
141 s1 = 0;
142 }
143 if (s2 < 0)
144 {
145 s1 = ZV - BEGV;
146 s2 = 0;
147 }
148
149 i = re_match_2 (&searchbuf, (char *) p1, s1, (char *) p2, s2,
150 point - BEGV, &search_regs,
151 ZV - BEGV);
152 if (i == -2)
153 matcher_overflow ();
154
155 val = (0 <= i ? Qt : Qnil);
4746118a 156 for (i = 0; i < search_regs.num_regs; i++)
ca1d1d23
JB
157 if (search_regs.start[i] >= 0)
158 {
159 search_regs.start[i] += BEGV;
160 search_regs.end[i] += BEGV;
161 }
162 search_regs_from_string = 0;
163 immediate_quit = 0;
164 return val;
165}
166
167DEFUN ("string-match", Fstring_match, Sstring_match, 2, 3, 0,
168 "Return index of start of first match for REGEXP in STRING, or nil.\n\
169If third arg START is non-nil, start search at that index in STRING.\n\
170For index of first char beyond the match, do (match-end 0).\n\
171`match-end' and `match-beginning' also give indices of substrings\n\
172matched by parenthesis constructs in the pattern.")
173 (regexp, string, start)
174 Lisp_Object regexp, string, start;
175{
176 int val;
177 int s;
178
179 CHECK_STRING (regexp, 0);
180 CHECK_STRING (string, 1);
181
182 if (NILP (start))
183 s = 0;
184 else
185 {
186 int len = XSTRING (string)->size;
187
188 CHECK_NUMBER (start, 2);
189 s = XINT (start);
190 if (s < 0 && -s <= len)
191 s = len - s;
192 else if (0 > s || s > len)
193 args_out_of_range (string, start);
194 }
195
196 compile_pattern (regexp, &searchbuf,
197 !NILP (current_buffer->case_fold_search) ? DOWNCASE_TABLE : 0);
198 immediate_quit = 1;
199 val = re_search (&searchbuf, (char *) XSTRING (string)->data,
200 XSTRING (string)->size, s, XSTRING (string)->size - s,
201 &search_regs);
202 immediate_quit = 0;
203 search_regs_from_string = 1;
204 if (val == -2)
205 matcher_overflow ();
206 if (val < 0) return Qnil;
207 return make_number (val);
208}
209\f
210scan_buffer (target, pos, cnt, shortage)
211 int *shortage, pos;
212 register int cnt, target;
213{
214 int lim = ((cnt > 0) ? ZV - 1 : BEGV);
215 int direction = ((cnt > 0) ? 1 : -1);
216 register int lim0;
217 unsigned char *base;
218 register unsigned char *cursor, *limit;
219
220 if (shortage != 0)
221 *shortage = 0;
222
223 immediate_quit = 1;
224
225 if (cnt > 0)
226 while (pos != lim + 1)
227 {
228 lim0 = BUFFER_CEILING_OF (pos);
229 lim0 = min (lim, lim0);
230 limit = &FETCH_CHAR (lim0) + 1;
231 base = (cursor = &FETCH_CHAR (pos));
232 while (1)
233 {
234 while (*cursor != target && ++cursor != limit)
235 ;
236 if (cursor != limit)
237 {
238 if (--cnt == 0)
239 {
240 immediate_quit = 0;
241 return (pos + cursor - base + 1);
242 }
243 else
244 if (++cursor == limit)
245 break;
246 }
247 else
248 break;
249 }
250 pos += cursor - base;
251 }
252 else
253 {
254 pos--; /* first character we scan */
255 while (pos > lim - 1)
256 { /* we WILL scan under pos */
257 lim0 = BUFFER_FLOOR_OF (pos);
258 lim0 = max (lim, lim0);
259 limit = &FETCH_CHAR (lim0) - 1;
260 base = (cursor = &FETCH_CHAR (pos));
261 cursor++;
262 while (1)
263 {
264 while (--cursor != limit && *cursor != target)
265 ;
266 if (cursor != limit)
267 {
268 if (++cnt == 0)
269 {
270 immediate_quit = 0;
271 return (pos + cursor - base + 1);
272 }
273 }
274 else
275 break;
276 }
277 pos += cursor - base;
278 }
279 }
280 immediate_quit = 0;
281 if (shortage != 0)
282 *shortage = cnt * direction;
283 return (pos + ((direction == 1 ? 0 : 1)));
284}
285
286int
287find_next_newline (from, cnt)
288 register int from, cnt;
289{
290 return (scan_buffer ('\n', from, cnt, (int *) 0));
291}
292\f
293DEFUN ("skip-chars-forward", Fskip_chars_forward, Sskip_chars_forward, 1, 2, 0,
294 "Move point forward, stopping before a char not in CHARS, or at position LIM.\n\
295CHARS is like the inside of a `[...]' in a regular expression\n\
296except that `]' is never special and `\\' quotes `^', `-' or `\\'.\n\
297Thus, with arg \"a-zA-Z\", this skips letters stopping before first nonletter.\n\
298With arg \"^a-zA-Z\", skips nonletters stopping before first letter.")
299 (string, lim)
300 Lisp_Object string, lim;
301{
302 skip_chars (1, string, lim);
303 return Qnil;
304}
305
306DEFUN ("skip-chars-backward", Fskip_chars_backward, Sskip_chars_backward, 1, 2, 0,
307 "Move point backward, stopping after a char not in CHARS, or at position LIM.\n\
308See `skip-chars-forward' for details.")
309 (string, lim)
310 Lisp_Object string, lim;
311{
312 skip_chars (0, string, lim);
313 return Qnil;
314}
315
316skip_chars (forwardp, string, lim)
317 int forwardp;
318 Lisp_Object string, lim;
319{
320 register unsigned char *p, *pend;
321 register unsigned char c;
322 unsigned char fastmap[0400];
323 int negate = 0;
324 register int i;
325
326 CHECK_STRING (string, 0);
327
328 if (NILP (lim))
329 XSET (lim, Lisp_Int, forwardp ? ZV : BEGV);
330 else
331 CHECK_NUMBER_COERCE_MARKER (lim, 1);
332
333#if 0 /* This breaks some things... jla. */
334 /* In any case, don't allow scan outside bounds of buffer. */
335 if (XFASTINT (lim) > ZV)
336 XFASTINT (lim) = ZV;
337 if (XFASTINT (lim) < BEGV)
338 XFASTINT (lim) = BEGV;
339#endif
340
341 p = XSTRING (string)->data;
342 pend = p + XSTRING (string)->size;
343 bzero (fastmap, sizeof fastmap);
344
345 if (p != pend && *p == '^')
346 {
347 negate = 1; p++;
348 }
349
350 /* Find the characters specified and set their elements of fastmap. */
351
352 while (p != pend)
353 {
354 c = *p++;
355 if (c == '\\')
356 {
357 if (p == pend) break;
358 c = *p++;
359 }
360 if (p != pend && *p == '-')
361 {
362 p++;
363 if (p == pend) break;
364 while (c <= *p)
365 {
366 fastmap[c] = 1;
367 c++;
368 }
369 p++;
370 }
371 else
372 fastmap[c] = 1;
373 }
374
375 /* If ^ was the first character, complement the fastmap. */
376
377 if (negate)
378 for (i = 0; i < sizeof fastmap; i++)
379 fastmap[i] ^= 1;
380
381 immediate_quit = 1;
382 if (forwardp)
383 {
384 while (point < XINT (lim) && fastmap[FETCH_CHAR (point)])
385 SET_PT (point + 1);
386 }
387 else
388 {
389 while (point > XINT (lim) && fastmap[FETCH_CHAR (point - 1)])
390 SET_PT (point - 1);
391 }
392 immediate_quit = 0;
393}
394\f
395/* Subroutines of Lisp buffer search functions. */
396
397static Lisp_Object
398search_command (string, bound, noerror, count, direction, RE)
399 Lisp_Object string, bound, noerror, count;
400 int direction;
401 int RE;
402{
403 register int np;
404 int lim;
405 int n = direction;
406
407 if (!NILP (count))
408 {
409 CHECK_NUMBER (count, 3);
410 n *= XINT (count);
411 }
412
413 CHECK_STRING (string, 0);
414 if (NILP (bound))
415 lim = n > 0 ? ZV : BEGV;
416 else
417 {
418 CHECK_NUMBER_COERCE_MARKER (bound, 1);
419 lim = XINT (bound);
420 if (n > 0 ? lim < point : lim > point)
421 error ("Invalid search bound (wrong side of point)");
422 if (lim > ZV)
423 lim = ZV;
424 if (lim < BEGV)
425 lim = BEGV;
426 }
427
428 np = search_buffer (string, point, lim, n, RE,
429 (!NILP (current_buffer->case_fold_search)
430 ? XSTRING (current_buffer->case_canon_table)->data : 0),
431 (!NILP (current_buffer->case_fold_search)
432 ? XSTRING (current_buffer->case_eqv_table)->data : 0));
433 if (np <= 0)
434 {
435 if (NILP (noerror))
436 return signal_failure (string);
437 if (!EQ (noerror, Qt))
438 {
439 if (lim < BEGV || lim > ZV)
440 abort ();
441 SET_PT (lim);
442 }
443 return Qnil;
444 }
445
446 if (np < BEGV || np > ZV)
447 abort ();
448
449 SET_PT (np);
450
451 return make_number (np);
452}
453\f
454/* search for the n'th occurrence of STRING in the current buffer,
455 starting at position POS and stopping at position LIM,
456 treating PAT as a literal string if RE is false or as
457 a regular expression if RE is true.
458
459 If N is positive, searching is forward and LIM must be greater than POS.
460 If N is negative, searching is backward and LIM must be less than POS.
461
462 Returns -x if only N-x occurrences found (x > 0),
463 or else the position at the beginning of the Nth occurrence
464 (if searching backward) or the end (if searching forward). */
465
466search_buffer (string, pos, lim, n, RE, trt, inverse_trt)
467 Lisp_Object string;
468 int pos;
469 int lim;
470 int n;
471 int RE;
472 register unsigned char *trt;
473 register unsigned char *inverse_trt;
474{
475 int len = XSTRING (string)->size;
476 unsigned char *base_pat = XSTRING (string)->data;
477 register int *BM_tab;
478 int *BM_tab_base;
479 register int direction = ((n > 0) ? 1 : -1);
480 register int dirlen;
481 int infinity, limit, k, stride_for_teases;
482 register unsigned char *pat, *cursor, *p_limit;
483 register int i, j;
484 unsigned char *p1, *p2;
485 int s1, s2;
486
487 /* Null string is found at starting position. */
488 if (!len)
489 return pos;
490
491 if (RE)
492 compile_pattern (string, &searchbuf, (char *) trt);
493
494 if (RE /* Here we detect whether the */
495 /* generality of an RE search is */
496 /* really needed. */
497 /* first item is "exact match" */
4746118a 498 && *(searchbuf.buffer) == (char) RE_EXACTN_VALUE
ca1d1d23
JB
499 && searchbuf.buffer[1] + 2 == searchbuf.used) /*first is ONLY item */
500 {
501 RE = 0; /* can do straight (non RE) search */
502 pat = (base_pat = (unsigned char *) searchbuf.buffer + 2);
503 /* trt already applied */
504 len = searchbuf.used - 2;
505 }
506 else if (!RE)
507 {
508 pat = (unsigned char *) alloca (len);
509
510 for (i = len; i--;) /* Copy the pattern; apply trt */
511 *pat++ = (((int) trt) ? trt [*base_pat++] : *base_pat++);
512 pat -= len; base_pat = pat;
513 }
514
515 if (RE)
516 {
517 immediate_quit = 1; /* Quit immediately if user types ^G,
518 because letting this function finish
519 can take too long. */
520 QUIT; /* Do a pending quit right away,
521 to avoid paradoxical behavior */
522 /* Get pointers and sizes of the two strings
523 that make up the visible portion of the buffer. */
524
525 p1 = BEGV_ADDR;
526 s1 = GPT - BEGV;
527 p2 = GAP_END_ADDR;
528 s2 = ZV - GPT;
529 if (s1 < 0)
530 {
531 p2 = p1;
532 s2 = ZV - BEGV;
533 s1 = 0;
534 }
535 if (s2 < 0)
536 {
537 s1 = ZV - BEGV;
538 s2 = 0;
539 }
540 while (n < 0)
541 {
542 int val = re_search_2 (&searchbuf, (char *) p1, s1, (char *) p2, s2,
543 pos - BEGV, lim - pos, &search_regs,
544 /* Don't allow match past current point */
545 pos - BEGV);
546 if (val == -2)
547 matcher_overflow ();
548 if (val >= 0)
549 {
550 j = BEGV;
4746118a 551 for (i = 0; i < search_regs.num_regs; i++)
ca1d1d23
JB
552 if (search_regs.start[i] >= 0)
553 {
554 search_regs.start[i] += j;
555 search_regs.end[i] += j;
556 }
557 search_regs_from_string = 0;
558 /* Set pos to the new position. */
559 pos = search_regs.start[0];
560 }
561 else
562 {
563 immediate_quit = 0;
564 return (n);
565 }
566 n++;
567 }
568 while (n > 0)
569 {
570 int val = re_search_2 (&searchbuf, (char *) p1, s1, (char *) p2, s2,
571 pos - BEGV, lim - pos, &search_regs,
572 lim - BEGV);
573 if (val == -2)
574 matcher_overflow ();
575 if (val >= 0)
576 {
577 j = BEGV;
4746118a 578 for (i = 0; i < search_regs.num_regs; i++)
ca1d1d23
JB
579 if (search_regs.start[i] >= 0)
580 {
581 search_regs.start[i] += j;
582 search_regs.end[i] += j;
583 }
584 search_regs_from_string = 0;
585 pos = search_regs.end[0];
586 }
587 else
588 {
589 immediate_quit = 0;
590 return (0 - n);
591 }
592 n--;
593 }
594 immediate_quit = 0;
595 return (pos);
596 }
597 else /* non-RE case */
598 {
599#ifdef C_ALLOCA
600 int BM_tab_space[0400];
601 BM_tab = &BM_tab_space[0];
602#else
603 BM_tab = (int *) alloca (0400 * sizeof (int));
604#endif
605 /* The general approach is that we are going to maintain that we know */
606 /* the first (closest to the present position, in whatever direction */
607 /* we're searching) character that could possibly be the last */
608 /* (furthest from present position) character of a valid match. We */
609 /* advance the state of our knowledge by looking at that character */
610 /* and seeing whether it indeed matches the last character of the */
611 /* pattern. If it does, we take a closer look. If it does not, we */
612 /* move our pointer (to putative last characters) as far as is */
613 /* logically possible. This amount of movement, which I call a */
614 /* stride, will be the length of the pattern if the actual character */
615 /* appears nowhere in the pattern, otherwise it will be the distance */
616 /* from the last occurrence of that character to the end of the */
617 /* pattern. */
618 /* As a coding trick, an enormous stride is coded into the table for */
619 /* characters that match the last character. This allows use of only */
620 /* a single test, a test for having gone past the end of the */
621 /* permissible match region, to test for both possible matches (when */
622 /* the stride goes past the end immediately) and failure to */
623 /* match (where you get nudged past the end one stride at a time). */
624
625 /* Here we make a "mickey mouse" BM table. The stride of the search */
626 /* is determined only by the last character of the putative match. */
627 /* If that character does not match, we will stride the proper */
628 /* distance to propose a match that superimposes it on the last */
629 /* instance of a character that matches it (per trt), or misses */
630 /* it entirely if there is none. */
631
632 dirlen = len * direction;
633 infinity = dirlen - (lim + pos + len + len) * direction;
634 if (direction < 0)
635 pat = (base_pat += len - 1);
636 BM_tab_base = BM_tab;
637 BM_tab += 0400;
638 j = dirlen; /* to get it in a register */
639 /* A character that does not appear in the pattern induces a */
640 /* stride equal to the pattern length. */
641 while (BM_tab_base != BM_tab)
642 {
643 *--BM_tab = j;
644 *--BM_tab = j;
645 *--BM_tab = j;
646 *--BM_tab = j;
647 }
648 i = 0;
649 while (i != infinity)
650 {
651 j = pat[i]; i += direction;
652 if (i == dirlen) i = infinity;
653 if ((int) trt)
654 {
655 k = (j = trt[j]);
656 if (i == infinity)
657 stride_for_teases = BM_tab[j];
658 BM_tab[j] = dirlen - i;
659 /* A translation table is accompanied by its inverse -- see */
660 /* comment following downcase_table for details */
661 while ((j = inverse_trt[j]) != k)
662 BM_tab[j] = dirlen - i;
663 }
664 else
665 {
666 if (i == infinity)
667 stride_for_teases = BM_tab[j];
668 BM_tab[j] = dirlen - i;
669 }
670 /* stride_for_teases tells how much to stride if we get a */
671 /* match on the far character but are subsequently */
672 /* disappointed, by recording what the stride would have been */
673 /* for that character if the last character had been */
674 /* different. */
675 }
676 infinity = dirlen - infinity;
677 pos += dirlen - ((direction > 0) ? direction : 0);
678 /* loop invariant - pos points at where last char (first char if reverse)
679 of pattern would align in a possible match. */
680 while (n != 0)
681 {
682 if ((lim - pos - (direction > 0)) * direction < 0)
683 return (n * (0 - direction));
684 /* First we do the part we can by pointers (maybe nothing) */
685 QUIT;
686 pat = base_pat;
687 limit = pos - dirlen + direction;
688 limit = ((direction > 0)
689 ? BUFFER_CEILING_OF (limit)
690 : BUFFER_FLOOR_OF (limit));
691 /* LIMIT is now the last (not beyond-last!) value
692 POS can take on without hitting edge of buffer or the gap. */
693 limit = ((direction > 0)
694 ? min (lim - 1, min (limit, pos + 20000))
695 : max (lim, max (limit, pos - 20000)));
696 if ((limit - pos) * direction > 20)
697 {
698 p_limit = &FETCH_CHAR (limit);
699 p2 = (cursor = &FETCH_CHAR (pos));
700 /* In this loop, pos + cursor - p2 is the surrogate for pos */
701 while (1) /* use one cursor setting as long as i can */
702 {
703 if (direction > 0) /* worth duplicating */
704 {
705 /* Use signed comparison if appropriate
706 to make cursor+infinity sure to be > p_limit.
707 Assuming that the buffer lies in a range of addresses
708 that are all "positive" (as ints) or all "negative",
709 either kind of comparison will work as long
710 as we don't step by infinity. So pick the kind
711 that works when we do step by infinity. */
712 if ((int) (p_limit + infinity) > (int) p_limit)
713 while ((int) cursor <= (int) p_limit)
714 cursor += BM_tab[*cursor];
715 else
716 while ((unsigned int) cursor <= (unsigned int) p_limit)
717 cursor += BM_tab[*cursor];
718 }
719 else
720 {
721 if ((int) (p_limit + infinity) < (int) p_limit)
722 while ((int) cursor >= (int) p_limit)
723 cursor += BM_tab[*cursor];
724 else
725 while ((unsigned int) cursor >= (unsigned int) p_limit)
726 cursor += BM_tab[*cursor];
727 }
728/* If you are here, cursor is beyond the end of the searched region. */
729 /* This can happen if you match on the far character of the pattern, */
730 /* because the "stride" of that character is infinity, a number able */
731 /* to throw you well beyond the end of the search. It can also */
732 /* happen if you fail to match within the permitted region and would */
733 /* otherwise try a character beyond that region */
734 if ((cursor - p_limit) * direction <= len)
735 break; /* a small overrun is genuine */
736 cursor -= infinity; /* large overrun = hit */
737 i = dirlen - direction;
738 if ((int) trt)
739 {
740 while ((i -= direction) + direction != 0)
741 if (pat[i] != trt[*(cursor -= direction)])
742 break;
743 }
744 else
745 {
746 while ((i -= direction) + direction != 0)
747 if (pat[i] != *(cursor -= direction))
748 break;
749 }
750 cursor += dirlen - i - direction; /* fix cursor */
751 if (i + direction == 0)
752 {
753 cursor -= direction;
754 search_regs.start[0]
755 = pos + cursor - p2 + ((direction > 0)
756 ? 1 - len : 0);
757 search_regs.end[0] = len + search_regs.start[0];
758 search_regs_from_string = 0;
759 if ((n -= direction) != 0)
760 cursor += dirlen; /* to resume search */
761 else
762 return ((direction > 0)
763 ? search_regs.end[0] : search_regs.start[0]);
764 }
765 else
766 cursor += stride_for_teases; /* <sigh> we lose - */
767 }
768 pos += cursor - p2;
769 }
770 else
771 /* Now we'll pick up a clump that has to be done the hard */
772 /* way because it covers a discontinuity */
773 {
774 limit = ((direction > 0)
775 ? BUFFER_CEILING_OF (pos - dirlen + 1)
776 : BUFFER_FLOOR_OF (pos - dirlen - 1));
777 limit = ((direction > 0)
778 ? min (limit + len, lim - 1)
779 : max (limit - len, lim));
780 /* LIMIT is now the last value POS can have
781 and still be valid for a possible match. */
782 while (1)
783 {
784 /* This loop can be coded for space rather than */
785 /* speed because it will usually run only once. */
786 /* (the reach is at most len + 21, and typically */
787 /* does not exceed len) */
788 while ((limit - pos) * direction >= 0)
789 pos += BM_tab[FETCH_CHAR(pos)];
790 /* now run the same tests to distinguish going off the */
791 /* end, a match or a phoney match. */
792 if ((pos - limit) * direction <= len)
793 break; /* ran off the end */
794 /* Found what might be a match.
795 Set POS back to last (first if reverse) char pos. */
796 pos -= infinity;
797 i = dirlen - direction;
798 while ((i -= direction) + direction != 0)
799 {
800 pos -= direction;
801 if (pat[i] != (((int) trt)
802 ? trt[FETCH_CHAR(pos)]
803 : FETCH_CHAR (pos)))
804 break;
805 }
806 /* Above loop has moved POS part or all the way
807 back to the first char pos (last char pos if reverse).
808 Set it once again at the last (first if reverse) char. */
809 pos += dirlen - i- direction;
810 if (i + direction == 0)
811 {
812 pos -= direction;
813 search_regs.start[0]
814 = pos + ((direction > 0) ? 1 - len : 0);
815 search_regs.end[0] = len + search_regs.start[0];
816 search_regs_from_string = 0;
817 if ((n -= direction) != 0)
818 pos += dirlen; /* to resume search */
819 else
820 return ((direction > 0)
821 ? search_regs.end[0] : search_regs.start[0]);
822 }
823 else
824 pos += stride_for_teases;
825 }
826 }
827 /* We have done one clump. Can we continue? */
828 if ((lim - pos) * direction < 0)
829 return ((0 - n) * direction);
830 }
831 return pos;
832 }
833}
834\f
835/* Given a string of words separated by word delimiters,
836 compute a regexp that matches those exact words
837 separated by arbitrary punctuation. */
838
839static Lisp_Object
840wordify (string)
841 Lisp_Object string;
842{
843 register unsigned char *p, *o;
844 register int i, len, punct_count = 0, word_count = 0;
845 Lisp_Object val;
846
847 CHECK_STRING (string, 0);
848 p = XSTRING (string)->data;
849 len = XSTRING (string)->size;
850
851 for (i = 0; i < len; i++)
852 if (SYNTAX (p[i]) != Sword)
853 {
854 punct_count++;
855 if (i > 0 && SYNTAX (p[i-1]) == Sword) word_count++;
856 }
857 if (SYNTAX (p[len-1]) == Sword) word_count++;
858 if (!word_count) return build_string ("");
859
860 val = make_string (p, len - punct_count + 5 * (word_count - 1) + 4);
861
862 o = XSTRING (val)->data;
863 *o++ = '\\';
864 *o++ = 'b';
865
866 for (i = 0; i < len; i++)
867 if (SYNTAX (p[i]) == Sword)
868 *o++ = p[i];
869 else if (i > 0 && SYNTAX (p[i-1]) == Sword && --word_count)
870 {
871 *o++ = '\\';
872 *o++ = 'W';
873 *o++ = '\\';
874 *o++ = 'W';
875 *o++ = '*';
876 }
877
878 *o++ = '\\';
879 *o++ = 'b';
880
881 return val;
882}
883\f
884DEFUN ("search-backward", Fsearch_backward, Ssearch_backward, 1, 4,
885 "sSearch backward: ",
886 "Search backward from point for STRING.\n\
887Set point to the beginning of the occurrence found, and return point.\n\
888An optional second argument bounds the search; it is a buffer position.\n\
889The match found must not extend before that position.\n\
890Optional third argument, if t, means if fail just return nil (no error).\n\
891 If not nil and not t, position at limit of search and return nil.\n\
892Optional fourth argument is repeat count--search for successive occurrences.\n\
893See also the functions `match-beginning', `match-end' and `replace-match'.")
894 (string, bound, noerror, count)
895 Lisp_Object string, bound, noerror, count;
896{
897 return search_command (string, bound, noerror, count, -1, 0);
898}
899
900DEFUN ("search-forward", Fsearch_forward, Ssearch_forward, 1, 4, "sSearch: ",
901 "Search forward from point for STRING.\n\
902Set point to the end of the occurrence found, and return point.\n\
903An optional second argument bounds the search; it is a buffer position.\n\
904The match found must not extend after that position. nil is equivalent\n\
905 to (point-max).\n\
906Optional third argument, if t, means if fail just return nil (no error).\n\
907 If not nil and not t, move to limit of search and return nil.\n\
908Optional fourth argument is repeat count--search for successive occurrences.\n\
909See also the functions `match-beginning', `match-end' and `replace-match'.")
910 (string, bound, noerror, count)
911 Lisp_Object string, bound, noerror, count;
912{
913 return search_command (string, bound, noerror, count, 1, 0);
914}
915
916DEFUN ("word-search-backward", Fword_search_backward, Sword_search_backward, 1, 4,
917 "sWord search backward: ",
918 "Search backward from point for STRING, ignoring differences in punctuation.\n\
919Set point to the beginning of the occurrence found, and return point.\n\
920An optional second argument bounds the search; it is a buffer position.\n\
921The match found must not extend before that position.\n\
922Optional third argument, if t, means if fail just return nil (no error).\n\
923 If not nil and not t, move to limit of search and return nil.\n\
924Optional fourth argument is repeat count--search for successive occurrences.")
925 (string, bound, noerror, count)
926 Lisp_Object string, bound, noerror, count;
927{
928 return search_command (wordify (string), bound, noerror, count, -1, 1);
929}
930
931DEFUN ("word-search-forward", Fword_search_forward, Sword_search_forward, 1, 4,
932 "sWord search: ",
933 "Search forward from point for STRING, ignoring differences in punctuation.\n\
934Set point to the end of the occurrence found, and return point.\n\
935An optional second argument bounds the search; it is a buffer position.\n\
936The match found must not extend after that position.\n\
937Optional third argument, if t, means if fail just return nil (no error).\n\
938 If not nil and not t, move to limit of search and return nil.\n\
939Optional fourth argument is repeat count--search for successive occurrences.")
940 (string, bound, noerror, count)
941 Lisp_Object string, bound, noerror, count;
942{
943 return search_command (wordify (string), bound, noerror, count, 1, 1);
944}
945
946DEFUN ("re-search-backward", Fre_search_backward, Sre_search_backward, 1, 4,
947 "sRE search backward: ",
948 "Search backward from point for match for regular expression REGEXP.\n\
949Set point to the beginning of the match, and return point.\n\
950The match found is the one starting last in the buffer\n\
951and yet ending before the place the origin of the search.\n\
952An optional second argument bounds the search; it is a buffer position.\n\
953The match found must start at or after that position.\n\
954Optional third argument, if t, means if fail just return nil (no error).\n\
955 If not nil and not t, move to limit of search and return nil.\n\
956Optional fourth argument is repeat count--search for successive occurrences.\n\
957See also the functions `match-beginning', `match-end' and `replace-match'.")
958 (string, bound, noerror, count)
959 Lisp_Object string, bound, noerror, count;
960{
961 return search_command (string, bound, noerror, count, -1, 1);
962}
963
964DEFUN ("re-search-forward", Fre_search_forward, Sre_search_forward, 1, 4,
965 "sRE search: ",
966 "Search forward from point for regular expression REGEXP.\n\
967Set point to the end of the occurrence found, and return point.\n\
968An optional second argument bounds the search; it is a buffer position.\n\
969The match found must not extend after that position.\n\
970Optional third argument, if t, means if fail just return nil (no error).\n\
971 If not nil and not t, move to limit of search and return nil.\n\
972Optional fourth argument is repeat count--search for successive occurrences.\n\
973See also the functions `match-beginning', `match-end' and `replace-match'.")
974 (string, bound, noerror, count)
975 Lisp_Object string, bound, noerror, count;
976{
977 return search_command (string, bound, noerror, count, 1, 1);
978}
979\f
980DEFUN ("replace-match", Freplace_match, Sreplace_match, 1, 3, 0,
981 "Replace text matched by last search with NEWTEXT.\n\
982If second arg FIXEDCASE is non-nil, do not alter case of replacement text.\n\
983Otherwise convert to all caps or cap initials, like replaced text.\n\
984If third arg LITERAL is non-nil, insert NEWTEXT literally.\n\
985Otherwise treat `\\' as special:\n\
986 `\\&' in NEWTEXT means substitute original matched text.\n\
987 `\\N' means substitute what matched the Nth `\\(...\\)'.\n\
988 If Nth parens didn't match, substitute nothing.\n\
989 `\\\\' means insert one `\\'.\n\
990Leaves point at end of replacement text.")
991 (string, fixedcase, literal)
992 Lisp_Object string, fixedcase, literal;
993{
994 enum { nochange, all_caps, cap_initial } case_action;
995 register int pos, last;
996 int some_multiletter_word;
997 int some_letter = 0;
998 register int c, prevc;
999 int inslen;
1000
1001 CHECK_STRING (string, 0);
1002
1003 case_action = nochange; /* We tried an initialization */
1004 /* but some C compilers blew it */
4746118a
JB
1005
1006 if (search_regs.num_regs <= 0)
1007 error ("replace-match called before any match found");
1008
ca1d1d23
JB
1009 if (search_regs.start[0] < BEGV
1010 || search_regs.start[0] > search_regs.end[0]
1011 || search_regs.end[0] > ZV)
1012 args_out_of_range(make_number (search_regs.start[0]),
1013 make_number (search_regs.end[0]));
1014
1015 if (NILP (fixedcase))
1016 {
1017 /* Decide how to casify by examining the matched text. */
1018
1019 last = search_regs.end[0];
1020 prevc = '\n';
1021 case_action = all_caps;
1022
1023 /* some_multiletter_word is set nonzero if any original word
1024 is more than one letter long. */
1025 some_multiletter_word = 0;
1026
1027 for (pos = search_regs.start[0]; pos < last; pos++)
1028 {
1029 c = FETCH_CHAR (pos);
1030 if (LOWERCASEP (c))
1031 {
1032 /* Cannot be all caps if any original char is lower case */
1033
1034 case_action = cap_initial;
1035 if (SYNTAX (prevc) != Sword)
1036 {
1037 /* Cannot even be cap initials
1038 if some original initial is lower case */
1039 case_action = nochange;
1040 break;
1041 }
1042 else
1043 some_multiletter_word = 1;
1044 }
1045 else if (!NOCASEP (c))
1046 {
1047 some_letter = 1;
1048 if (!some_multiletter_word && SYNTAX (prevc) == Sword)
1049 some_multiletter_word = 1;
1050 }
1051
1052 prevc = c;
1053 }
1054
1055 /* Do not make new text all caps
1056 if the original text contained only single letter words. */
1057 if (case_action == all_caps && !some_multiletter_word)
1058 case_action = cap_initial;
1059
1060 if (!some_letter) case_action = nochange;
1061 }
1062
1063 SET_PT (search_regs.end[0]);
1064 if (!NILP (literal))
1065 Finsert (1, &string);
1066 else
1067 {
1068 struct gcpro gcpro1;
1069 GCPRO1 (string);
1070
1071 for (pos = 0; pos < XSTRING (string)->size; pos++)
1072 {
1073 c = XSTRING (string)->data[pos];
1074 if (c == '\\')
1075 {
1076 c = XSTRING (string)->data[++pos];
1077 if (c == '&')
1078 Finsert_buffer_substring (Fcurrent_buffer (),
1079 make_number (search_regs.start[0]),
1080 make_number (search_regs.end[0]));
4746118a 1081 else if (c >= '1' && c <= search_regs.num_regs + '0')
ca1d1d23
JB
1082 {
1083 if (search_regs.start[c - '0'] >= 1)
1084 Finsert_buffer_substring (Fcurrent_buffer (),
1085 make_number (search_regs.start[c - '0']),
1086 make_number (search_regs.end[c - '0']));
1087 }
1088 else
1089 insert_char (c);
1090 }
1091 else
1092 insert_char (c);
1093 }
1094 UNGCPRO;
1095 }
1096
1097 inslen = point - (search_regs.end[0]);
1098 del_range (search_regs.start[0], search_regs.end[0]);
1099
1100 if (case_action == all_caps)
1101 Fupcase_region (make_number (point - inslen), make_number (point));
1102 else if (case_action == cap_initial)
1103 upcase_initials_region (make_number (point - inslen), make_number (point));
1104 return Qnil;
1105}
1106\f
1107static Lisp_Object
1108match_limit (num, beginningp)
1109 Lisp_Object num;
1110 int beginningp;
1111{
1112 register int n;
1113
1114 CHECK_NUMBER (num, 0);
1115 n = XINT (num);
4746118a
JB
1116 if (n < 0 || n >= search_regs.num_regs)
1117 args_out_of_range (num, make_number (search_regs.num_regs));
1118 if (search_regs.num_regs <= 0
1119 || search_regs.start[n] < 0)
ca1d1d23
JB
1120 return Qnil;
1121 return (make_number ((beginningp) ? search_regs.start[n]
1122 : search_regs.end[n]));
1123}
1124
1125DEFUN ("match-beginning", Fmatch_beginning, Smatch_beginning, 1, 1, 0,
1126 "Return position of start of text matched by last search.\n\
1127ARG, a number, specifies which parenthesized expression in the last regexp.\n\
1128 Value is nil if ARGth pair didn't match, or there were less than ARG pairs.\n\
1129Zero means the entire text matched by the whole regexp or whole string.")
1130 (num)
1131 Lisp_Object num;
1132{
1133 return match_limit (num, 1);
1134}
1135
1136DEFUN ("match-end", Fmatch_end, Smatch_end, 1, 1, 0,
1137 "Return position of end of text matched by last search.\n\
1138ARG, a number, specifies which parenthesized expression in the last regexp.\n\
1139 Value is nil if ARGth pair didn't match, or there were less than ARG pairs.\n\
1140Zero means the entire text matched by the whole regexp or whole string.")
1141 (num)
1142 Lisp_Object num;
1143{
1144 return match_limit (num, 0);
1145}
1146
1147DEFUN ("match-data", Fmatch_data, Smatch_data, 0, 0, 0,
1148 "Return a list containing all info on what the last search matched.\n\
1149Element 2N is `(match-beginning N)'; element 2N + 1 is `(match-end N)'.\n\
1150All the elements are markers or nil (nil if the Nth pair didn't match)\n\
1151if the last match was on a buffer; integers or nil if a string was matched.\n\
1152Use `store-match-data' to reinstate the data in this list.")
1153 ()
1154{
4746118a 1155 Lisp_Object *data;
ca1d1d23
JB
1156 int i, len;
1157
4746118a
JB
1158 data = (Lisp_Object *) alloca ((2 * search_regs.num_regs)
1159 * sizeof (Lisp_Object));
1160
ca1d1d23 1161 len = -1;
4746118a 1162 for (i = 0; i < search_regs.num_regs; i++)
ca1d1d23
JB
1163 {
1164 int start = search_regs.start[i];
1165 if (start >= 0)
1166 {
1167 if (search_regs_from_string)
1168 {
1169 XFASTINT (data[2 * i]) = start;
1170 XFASTINT (data[2 * i + 1]) = search_regs.end[i];
1171 }
1172 else
1173 {
1174 data[2 * i] = Fmake_marker ();
1175 Fset_marker (data[2 * i], make_number (start), Qnil);
1176 data[2 * i + 1] = Fmake_marker ();
1177 Fset_marker (data[2 * i + 1],
1178 make_number (search_regs.end[i]), Qnil);
1179 }
1180 len = i;
1181 }
1182 else
1183 data[2 * i] = data [2 * i + 1] = Qnil;
1184 }
1185 return Flist (2 * len + 2, data);
1186}
1187
1188
1189DEFUN ("store-match-data", Fstore_match_data, Sstore_match_data, 1, 1, 0,
1190 "Set internal data on last search match from elements of LIST.\n\
1191LIST should have been created by calling `match-data' previously.")
1192 (list)
1193 register Lisp_Object list;
1194{
1195 register int i;
1196 register Lisp_Object marker;
1197
1198 if (!CONSP (list) && !NILP (list))
1199 list = wrong_type_argument (Qconsp, list, 0);
1200
4746118a
JB
1201 /* Allocate registers if they don't already exist. */
1202 {
1203 int length = Flength (list) / 2;
1204
1205 if (length > search_regs.num_regs)
1206 {
1207 if (search_regs.start)
1208 search_regs.start =
1209 (regoff_t *) realloc (search_regs.start,
1210 length * sizeof (regoff_t));
1211 else
1212 search_regs.start = (regoff_t *) malloc (length * sizeof (regoff_t));
1213 if (search_regs.end)
1214 search_regs.end =
1215 (regoff_t *) realloc (search_regs.end,
1216 length * sizeof (regoff_t));
1217 else
1218 search_regs.end = (regoff_t *) malloc (length * sizeof (regoff_t));
1219
1220 search_regs.num_regs = length;
1221 }
1222 }
1223
1224 for (i = 0; i < search_regs.num_regs; i++)
ca1d1d23
JB
1225 {
1226 marker = Fcar (list);
1227 if (NILP (marker))
1228 {
1229 search_regs.start[i] = -1;
1230 list = Fcdr (list);
1231 }
1232 else
1233 {
1234 if (XTYPE (marker) == Lisp_Marker
1235 && XMARKER (marker)->buffer == 0)
1236 XFASTINT (marker) = 0;
1237
1238 CHECK_NUMBER_COERCE_MARKER (marker, 0);
1239 search_regs.start[i] = XINT (marker);
1240 list = Fcdr (list);
1241
1242 marker = Fcar (list);
1243 if (XTYPE (marker) == Lisp_Marker
1244 && XMARKER (marker)->buffer == 0)
1245 XFASTINT (marker) = 0;
1246
1247 CHECK_NUMBER_COERCE_MARKER (marker, 0);
1248 search_regs.end[i] = XINT (marker);
1249 }
1250 list = Fcdr (list);
1251 }
1252
1253 return Qnil;
1254}
1255
1256/* Quote a string to inactivate reg-expr chars */
1257
1258DEFUN ("regexp-quote", Fregexp_quote, Sregexp_quote, 1, 1, 0,
1259 "Return a regexp string which matches exactly STRING and nothing else.")
1260 (str)
1261 Lisp_Object str;
1262{
1263 register unsigned char *in, *out, *end;
1264 register unsigned char *temp;
1265
1266 CHECK_STRING (str, 0);
1267
1268 temp = (unsigned char *) alloca (XSTRING (str)->size * 2);
1269
1270 /* Now copy the data into the new string, inserting escapes. */
1271
1272 in = XSTRING (str)->data;
1273 end = in + XSTRING (str)->size;
1274 out = temp;
1275
1276 for (; in != end; in++)
1277 {
1278 if (*in == '[' || *in == ']'
1279 || *in == '*' || *in == '.' || *in == '\\'
1280 || *in == '?' || *in == '+'
1281 || *in == '^' || *in == '$')
1282 *out++ = '\\';
1283 *out++ = *in;
1284 }
1285
1286 return make_string (temp, out - temp);
1287}
1288\f
1289syms_of_search ()
1290{
1291 register int i;
1292
1293 searchbuf.allocated = 100;
8c0e7b73 1294 searchbuf.buffer = (unsigned char *) malloc (searchbuf.allocated);
ca1d1d23
JB
1295 searchbuf.fastmap = search_fastmap;
1296
1297 Qsearch_failed = intern ("search-failed");
1298 staticpro (&Qsearch_failed);
1299 Qinvalid_regexp = intern ("invalid-regexp");
1300 staticpro (&Qinvalid_regexp);
1301
1302 Fput (Qsearch_failed, Qerror_conditions,
1303 Fcons (Qsearch_failed, Fcons (Qerror, Qnil)));
1304 Fput (Qsearch_failed, Qerror_message,
1305 build_string ("Search failed"));
1306
1307 Fput (Qinvalid_regexp, Qerror_conditions,
1308 Fcons (Qinvalid_regexp, Fcons (Qerror, Qnil)));
1309 Fput (Qinvalid_regexp, Qerror_message,
1310 build_string ("Invalid regexp"));
1311
1312 last_regexp = Qnil;
1313 staticpro (&last_regexp);
1314
1315 defsubr (&Sstring_match);
1316 defsubr (&Slooking_at);
1317 defsubr (&Sskip_chars_forward);
1318 defsubr (&Sskip_chars_backward);
1319 defsubr (&Ssearch_forward);
1320 defsubr (&Ssearch_backward);
1321 defsubr (&Sword_search_forward);
1322 defsubr (&Sword_search_backward);
1323 defsubr (&Sre_search_forward);
1324 defsubr (&Sre_search_backward);
1325 defsubr (&Sreplace_match);
1326 defsubr (&Smatch_beginning);
1327 defsubr (&Smatch_end);
1328 defsubr (&Smatch_data);
1329 defsubr (&Sstore_match_data);
1330 defsubr (&Sregexp_quote);
1331}