1 /* String search routines for GNU Emacs.
2 Copyright (C) 1985, 1986, 1987, 1993, 1994 Free Software Foundation, Inc.
4 This file is part of GNU Emacs.
6 GNU Emacs is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 1, or (at your option)
11 GNU Emacs is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with GNU Emacs; see the file COPYING. If not, write to
18 the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. */
26 #include "blockinput.h"
28 #include <sys/types.h>
31 #define max(a, b) ((a) > (b) ? (a) : (b))
32 #define min(a, b) ((a) < (b) ? (a) : (b))
34 /* We compile regexps into this buffer and then use it for searching. */
36 struct re_pattern_buffer searchbuf
;
38 char search_fastmap
[0400];
40 /* Last regexp we compiled */
42 Lisp_Object last_regexp
;
44 /* Every call to re_match, etc., must pass &search_regs as the regs
45 argument unless you can show it is unnecessary (i.e., if re_match
46 is certainly going to be called again before region-around-match
49 Since the registers are now dynamically allocated, we need to make
50 sure not to refer to the Nth register before checking that it has
51 been allocated by checking search_regs.num_regs.
53 The regex code keeps track of whether it has allocated the search
54 buffer using bits in searchbuf. This means that whenever you
55 compile a new pattern, it completely forgets whether it has
56 allocated any registers, and will allocate new registers the next
57 time you call a searching or matching function. Therefore, we need
58 to call re_set_registers after compiling a new pattern or after
59 setting the match registers, so that the regex functions will be
60 able to free or re-allocate it properly. */
61 static struct re_registers search_regs
;
63 /* The buffer in which the last search was performed, or
64 Qt if the last search was done in a string;
65 Qnil if no searching has been done yet. */
66 static Lisp_Object last_thing_searched
;
68 /* error condition signalled when regexp compile_pattern fails */
70 Lisp_Object Qinvalid_regexp
;
72 static void set_search_regs ();
77 error ("Stack overflow in regexp matcher");
86 /* Compile a regexp and signal a Lisp error if anything goes wrong. */
88 compile_pattern (pattern
, bufp
, regp
, translate
)
90 struct re_pattern_buffer
*bufp
;
91 struct re_registers
*regp
;
97 if (EQ (pattern
, last_regexp
)
98 && translate
== bufp
->translate
)
102 bufp
->translate
= translate
;
104 val
= (CONST
char *) re_compile_pattern ((char *) XSTRING (pattern
)->data
,
105 XSTRING (pattern
)->size
, bufp
);
109 dummy
= build_string (val
);
111 Fsignal (Qinvalid_regexp
, Fcons (dummy
, Qnil
));
114 last_regexp
= pattern
;
116 /* Advise the searching functions about the space we have allocated
117 for register data. */
120 re_set_registers (bufp
, regp
, regp
->num_regs
, regp
->start
, regp
->end
);
126 /* Error condition used for failing searches */
127 Lisp_Object Qsearch_failed
;
133 Fsignal (Qsearch_failed
, Fcons (arg
, Qnil
));
137 DEFUN ("looking-at", Flooking_at
, Slooking_at
, 1, 1, 0,
138 "Return t if text after point matches regular expression PAT.\n\
139 This function modifies the match data that `match-beginning',\n\
140 `match-end' and `match-data' access; save and restore the match\n\
141 data if you want to preserve them.")
146 unsigned char *p1
, *p2
;
150 CHECK_STRING (string
, 0);
151 compile_pattern (string
, &searchbuf
, &search_regs
,
152 !NILP (current_buffer
->case_fold_search
) ? DOWNCASE_TABLE
: 0);
155 QUIT
; /* Do a pending quit right away, to avoid paradoxical behavior */
157 /* Get pointers and sizes of the two strings
158 that make up the visible portion of the buffer. */
176 i
= re_match_2 (&searchbuf
, (char *) p1
, s1
, (char *) p2
, s2
,
177 point
- BEGV
, &search_regs
,
182 val
= (0 <= i
? Qt
: Qnil
);
183 for (i
= 0; i
< search_regs
.num_regs
; i
++)
184 if (search_regs
.start
[i
] >= 0)
186 search_regs
.start
[i
] += BEGV
;
187 search_regs
.end
[i
] += BEGV
;
189 XSET (last_thing_searched
, Lisp_Buffer
, current_buffer
);
194 DEFUN ("string-match", Fstring_match
, Sstring_match
, 2, 3, 0,
195 "Return index of start of first match for REGEXP in STRING, or nil.\n\
196 If third arg START is non-nil, start search at that index in STRING.\n\
197 For index of first char beyond the match, do (match-end 0).\n\
198 `match-end' and `match-beginning' also give indices of substrings\n\
199 matched by parenthesis constructs in the pattern.")
200 (regexp
, string
, start
)
201 Lisp_Object regexp
, string
, start
;
206 CHECK_STRING (regexp
, 0);
207 CHECK_STRING (string
, 1);
213 int len
= XSTRING (string
)->size
;
215 CHECK_NUMBER (start
, 2);
217 if (s
< 0 && -s
<= len
)
219 else if (0 > s
|| s
> len
)
220 args_out_of_range (string
, start
);
223 compile_pattern (regexp
, &searchbuf
, &search_regs
,
224 !NILP (current_buffer
->case_fold_search
) ? DOWNCASE_TABLE
: 0);
226 val
= re_search (&searchbuf
, (char *) XSTRING (string
)->data
,
227 XSTRING (string
)->size
, s
, XSTRING (string
)->size
- s
,
230 last_thing_searched
= Qt
;
233 if (val
< 0) return Qnil
;
234 return make_number (val
);
237 /* Match REGEXP against STRING, searching all of STRING,
238 and return the index of the match, or negative on failure.
239 This does not clobber the match data. */
242 fast_string_match (regexp
, string
)
243 Lisp_Object regexp
, string
;
247 compile_pattern (regexp
, &searchbuf
, 0, 0);
249 val
= re_search (&searchbuf
, (char *) XSTRING (string
)->data
,
250 XSTRING (string
)->size
, 0, XSTRING (string
)->size
,
256 /* Search for COUNT instances of the character TARGET, starting at START.
257 If COUNT is negative, search backwards.
259 If we find COUNT instances, set *SHORTAGE to zero, and return the
260 position after the COUNTth match. Note that for reverse motion
261 this is not the same as the usual convention for Emacs motion commands.
263 If we don't find COUNT instances before reaching the end of the
264 buffer (or the beginning, if scanning backwards), set *SHORTAGE to
265 the number of TARGETs left unfound, and return the end of the
266 buffer we bumped up against.
268 If ALLOW_QUIT is non-zero, set immediate_quit. That's good to do
269 except when inside redisplay. */
271 scan_buffer (target
, start
, count
, shortage
, allow_quit
)
272 int *shortage
, start
;
273 register int count
, target
;
276 int limit
= ((count
> 0) ? ZV
- 1 : BEGV
);
277 int direction
= ((count
> 0) ? 1 : -1);
279 register unsigned char *cursor
;
282 register int ceiling
;
283 register unsigned char *ceiling_addr
;
288 immediate_quit
= allow_quit
;
291 while (start
!= limit
+ 1)
293 ceiling
= BUFFER_CEILING_OF (start
);
294 ceiling
= min (limit
, ceiling
);
295 ceiling_addr
= &FETCH_CHAR (ceiling
) + 1;
296 base
= (cursor
= &FETCH_CHAR (start
));
299 while (*cursor
!= target
&& ++cursor
!= ceiling_addr
)
301 if (cursor
!= ceiling_addr
)
306 return (start
+ cursor
- base
+ 1);
309 if (++cursor
== ceiling_addr
)
315 start
+= cursor
- base
;
319 start
--; /* first character we scan */
320 while (start
> limit
- 1)
321 { /* we WILL scan under start */
322 ceiling
= BUFFER_FLOOR_OF (start
);
323 ceiling
= max (limit
, ceiling
);
324 ceiling_addr
= &FETCH_CHAR (ceiling
) - 1;
325 base
= (cursor
= &FETCH_CHAR (start
));
329 while (--cursor
!= ceiling_addr
&& *cursor
!= target
)
331 if (cursor
!= ceiling_addr
)
336 return (start
+ cursor
- base
+ 1);
342 start
+= cursor
- base
;
347 *shortage
= count
* direction
;
348 return (start
+ ((direction
== 1 ? 0 : 1)));
352 find_next_newline_no_quit (from
, cnt
)
353 register int from
, cnt
;
355 return scan_buffer ('\n', from
, cnt
, (int *) 0, 0);
359 find_next_newline (from
, cnt
)
360 register int from
, cnt
;
362 return scan_buffer ('\n', from
, cnt
, (int *) 0, 1);
365 Lisp_Object
skip_chars ();
367 DEFUN ("skip-chars-forward", Fskip_chars_forward
, Sskip_chars_forward
, 1, 2, 0,
368 "Move point forward, stopping before a char not in STRING, or at pos LIM.\n\
369 STRING is like the inside of a `[...]' in a regular expression\n\
370 except that `]' is never special and `\\' quotes `^', `-' or `\\'.\n\
371 Thus, with arg \"a-zA-Z\", this skips letters stopping before first nonletter.\n\
372 With arg \"^a-zA-Z\", skips nonletters stopping before first letter.\n\
373 Returns the distance traveled, either zero or positive.")
375 Lisp_Object string
, lim
;
377 return skip_chars (1, 0, string
, lim
);
380 DEFUN ("skip-chars-backward", Fskip_chars_backward
, Sskip_chars_backward
, 1, 2, 0,
381 "Move point backward, stopping after a char not in STRING, or at pos LIM.\n\
382 See `skip-chars-forward' for details.\n\
383 Returns the distance traveled, either zero or negative.")
385 Lisp_Object string
, lim
;
387 return skip_chars (0, 0, string
, lim
);
390 DEFUN ("skip-syntax-forward", Fskip_syntax_forward
, Sskip_syntax_forward
, 1, 2, 0,
391 "Move point forward across chars in specified syntax classes.\n\
392 SYNTAX is a string of syntax code characters.\n\
393 Stop before a char whose syntax is not in SYNTAX, or at position LIM.\n\
394 If SYNTAX starts with ^, skip characters whose syntax is NOT in SYNTAX.\n\
395 This function returns the distance traveled, either zero or positive.")
397 Lisp_Object syntax
, lim
;
399 return skip_chars (1, 1, syntax
, lim
);
402 DEFUN ("skip-syntax-backward", Fskip_syntax_backward
, Sskip_syntax_backward
, 1, 2, 0,
403 "Move point backward across chars in specified syntax classes.\n\
404 SYNTAX is a string of syntax code characters.\n\
405 Stop on reaching a char whose syntax is not in SYNTAX, or at position LIM.\n\
406 If SYNTAX starts with ^, skip characters whose syntax is NOT in SYNTAX.\n\
407 This function returns the distance traveled, either zero or negative.")
409 Lisp_Object syntax
, lim
;
411 return skip_chars (0, 1, syntax
, lim
);
415 skip_chars (forwardp
, syntaxp
, string
, lim
)
416 int forwardp
, syntaxp
;
417 Lisp_Object string
, lim
;
419 register unsigned char *p
, *pend
;
420 register unsigned char c
;
421 unsigned char fastmap
[0400];
425 CHECK_STRING (string
, 0);
428 XSET (lim
, Lisp_Int
, forwardp
? ZV
: BEGV
);
430 CHECK_NUMBER_COERCE_MARKER (lim
, 1);
432 /* In any case, don't allow scan outside bounds of buffer. */
433 /* jla turned this off, for no known reason.
434 bfox turned the ZV part on, and rms turned the
435 BEGV part back on. */
438 if (XINT (lim
) < BEGV
)
439 XFASTINT (lim
) = BEGV
;
441 p
= XSTRING (string
)->data
;
442 pend
= p
+ XSTRING (string
)->size
;
443 bzero (fastmap
, sizeof fastmap
);
445 if (p
!= pend
&& *p
== '^')
450 /* Find the characters specified and set their elements of fastmap.
451 If syntaxp, each character counts as itself.
452 Otherwise, handle backslashes and ranges specially */
463 if (p
== pend
) break;
466 if (p
!= pend
&& *p
== '-')
469 if (p
== pend
) break;
482 if (syntaxp
&& fastmap
['-'] != 0)
485 /* If ^ was the first character, complement the fastmap. */
488 for (i
= 0; i
< sizeof fastmap
; i
++)
492 int start_point
= point
;
500 while (point
< XINT (lim
)
501 && fastmap
[(unsigned char) syntax_code_spec
[(int) SYNTAX (FETCH_CHAR (point
))]])
506 while (point
> XINT (lim
)
507 && fastmap
[(unsigned char) syntax_code_spec
[(int) SYNTAX (FETCH_CHAR (point
- 1))]])
515 while (point
< XINT (lim
) && fastmap
[FETCH_CHAR (point
)])
520 while (point
> XINT (lim
) && fastmap
[FETCH_CHAR (point
- 1)])
526 return make_number (point
- start_point
);
530 /* Subroutines of Lisp buffer search functions. */
533 search_command (string
, bound
, noerror
, count
, direction
, RE
)
534 Lisp_Object string
, bound
, noerror
, count
;
544 CHECK_NUMBER (count
, 3);
548 CHECK_STRING (string
, 0);
550 lim
= n
> 0 ? ZV
: BEGV
;
553 CHECK_NUMBER_COERCE_MARKER (bound
, 1);
555 if (n
> 0 ? lim
< point
: lim
> point
)
556 error ("Invalid search bound (wrong side of point)");
563 np
= search_buffer (string
, point
, lim
, n
, RE
,
564 (!NILP (current_buffer
->case_fold_search
)
565 ? XSTRING (current_buffer
->case_canon_table
)->data
: 0),
566 (!NILP (current_buffer
->case_fold_search
)
567 ? XSTRING (current_buffer
->case_eqv_table
)->data
: 0));
571 return signal_failure (string
);
572 if (!EQ (noerror
, Qt
))
574 if (lim
< BEGV
|| lim
> ZV
)
578 #if 0 /* This would be clean, but maybe programs depend on
579 a value of nil here. */
587 if (np
< BEGV
|| np
> ZV
)
592 return make_number (np
);
596 trivial_regexp_p (regexp
)
599 int len
= XSTRING (regexp
)->size
;
600 unsigned char *s
= XSTRING (regexp
)->data
;
606 case '.': case '*': case '+': case '?': case '[': case '^': case '$':
613 case '|': case '(': case ')': case '`': case '\'': case 'b':
614 case 'B': case '<': case '>': case 'w': case 'W': case 's':
615 case 'S': case '1': case '2': case '3': case '4': case '5':
616 case '6': case '7': case '8': case '9':
624 /* Search for the n'th occurrence of STRING in the current buffer,
625 starting at position POS and stopping at position LIM,
626 treating PAT as a literal string if RE is false or as
627 a regular expression if RE is true.
629 If N is positive, searching is forward and LIM must be greater than POS.
630 If N is negative, searching is backward and LIM must be less than POS.
632 Returns -x if only N-x occurrences found (x > 0),
633 or else the position at the beginning of the Nth occurrence
634 (if searching backward) or the end (if searching forward). */
636 search_buffer (string
, pos
, lim
, n
, RE
, trt
, inverse_trt
)
642 register unsigned char *trt
;
643 register unsigned char *inverse_trt
;
645 int len
= XSTRING (string
)->size
;
646 unsigned char *base_pat
= XSTRING (string
)->data
;
647 register int *BM_tab
;
649 register int direction
= ((n
> 0) ? 1 : -1);
651 int infinity
, limit
, k
, stride_for_teases
;
652 register unsigned char *pat
, *cursor
, *p_limit
;
654 unsigned char *p1
, *p2
;
657 /* Null string is found at starting position. */
660 set_search_regs (pos
, 0);
664 /* Searching 0 times means don't move. */
668 if (RE
&& !trivial_regexp_p (string
))
670 compile_pattern (string
, &searchbuf
, &search_regs
, (char *) trt
);
672 immediate_quit
= 1; /* Quit immediately if user types ^G,
673 because letting this function finish
674 can take too long. */
675 QUIT
; /* Do a pending quit right away,
676 to avoid paradoxical behavior */
677 /* Get pointers and sizes of the two strings
678 that make up the visible portion of the buffer. */
698 val
= re_search_2 (&searchbuf
, (char *) p1
, s1
, (char *) p2
, s2
,
699 pos
- BEGV
, lim
- pos
, &search_regs
,
700 /* Don't allow match past current point */
709 for (i
= 0; i
< search_regs
.num_regs
; i
++)
710 if (search_regs
.start
[i
] >= 0)
712 search_regs
.start
[i
] += j
;
713 search_regs
.end
[i
] += j
;
715 XSET (last_thing_searched
, Lisp_Buffer
, current_buffer
);
716 /* Set pos to the new position. */
717 pos
= search_regs
.start
[0];
729 val
= re_search_2 (&searchbuf
, (char *) p1
, s1
, (char *) p2
, s2
,
730 pos
- BEGV
, lim
- pos
, &search_regs
,
739 for (i
= 0; i
< search_regs
.num_regs
; i
++)
740 if (search_regs
.start
[i
] >= 0)
742 search_regs
.start
[i
] += j
;
743 search_regs
.end
[i
] += j
;
745 XSET (last_thing_searched
, Lisp_Buffer
, current_buffer
);
746 pos
= search_regs
.end
[0];
758 else /* non-RE case */
761 int BM_tab_space
[0400];
762 BM_tab
= &BM_tab_space
[0];
764 BM_tab
= (int *) alloca (0400 * sizeof (int));
767 unsigned char *patbuf
= (unsigned char *) alloca (len
);
771 /* If we got here and the RE flag is set, it's because we're
772 dealing with a regexp known to be trivial, so the backslash
773 just quotes the next character. */
774 if (RE
&& *base_pat
== '\\')
779 *pat
++ = (trt
? trt
[*base_pat
++] : *base_pat
++);
782 pat
= base_pat
= patbuf
;
784 /* The general approach is that we are going to maintain that we know */
785 /* the first (closest to the present position, in whatever direction */
786 /* we're searching) character that could possibly be the last */
787 /* (furthest from present position) character of a valid match. We */
788 /* advance the state of our knowledge by looking at that character */
789 /* and seeing whether it indeed matches the last character of the */
790 /* pattern. If it does, we take a closer look. If it does not, we */
791 /* move our pointer (to putative last characters) as far as is */
792 /* logically possible. This amount of movement, which I call a */
793 /* stride, will be the length of the pattern if the actual character */
794 /* appears nowhere in the pattern, otherwise it will be the distance */
795 /* from the last occurrence of that character to the end of the */
797 /* As a coding trick, an enormous stride is coded into the table for */
798 /* characters that match the last character. This allows use of only */
799 /* a single test, a test for having gone past the end of the */
800 /* permissible match region, to test for both possible matches (when */
801 /* the stride goes past the end immediately) and failure to */
802 /* match (where you get nudged past the end one stride at a time). */
804 /* Here we make a "mickey mouse" BM table. The stride of the search */
805 /* is determined only by the last character of the putative match. */
806 /* If that character does not match, we will stride the proper */
807 /* distance to propose a match that superimposes it on the last */
808 /* instance of a character that matches it (per trt), or misses */
809 /* it entirely if there is none. */
811 dirlen
= len
* direction
;
812 infinity
= dirlen
- (lim
+ pos
+ len
+ len
) * direction
;
814 pat
= (base_pat
+= len
- 1);
815 BM_tab_base
= BM_tab
;
817 j
= dirlen
; /* to get it in a register */
818 /* A character that does not appear in the pattern induces a */
819 /* stride equal to the pattern length. */
820 while (BM_tab_base
!= BM_tab
)
828 while (i
!= infinity
)
830 j
= pat
[i
]; i
+= direction
;
831 if (i
== dirlen
) i
= infinity
;
836 stride_for_teases
= BM_tab
[j
];
837 BM_tab
[j
] = dirlen
- i
;
838 /* A translation table is accompanied by its inverse -- see */
839 /* comment following downcase_table for details */
840 while ((j
= inverse_trt
[j
]) != k
)
841 BM_tab
[j
] = dirlen
- i
;
846 stride_for_teases
= BM_tab
[j
];
847 BM_tab
[j
] = dirlen
- i
;
849 /* stride_for_teases tells how much to stride if we get a */
850 /* match on the far character but are subsequently */
851 /* disappointed, by recording what the stride would have been */
852 /* for that character if the last character had been */
855 infinity
= dirlen
- infinity
;
856 pos
+= dirlen
- ((direction
> 0) ? direction
: 0);
857 /* loop invariant - pos points at where last char (first char if reverse)
858 of pattern would align in a possible match. */
861 /* It's been reported that some (broken) compiler thinks that
862 Boolean expressions in an arithmetic context are unsigned.
863 Using an explicit ?1:0 prevents this. */
864 if ((lim
- pos
- ((direction
> 0) ? 1 : 0)) * direction
< 0)
865 return (n
* (0 - direction
));
866 /* First we do the part we can by pointers (maybe nothing) */
869 limit
= pos
- dirlen
+ direction
;
870 limit
= ((direction
> 0)
871 ? BUFFER_CEILING_OF (limit
)
872 : BUFFER_FLOOR_OF (limit
));
873 /* LIMIT is now the last (not beyond-last!) value
874 POS can take on without hitting edge of buffer or the gap. */
875 limit
= ((direction
> 0)
876 ? min (lim
- 1, min (limit
, pos
+ 20000))
877 : max (lim
, max (limit
, pos
- 20000)));
878 if ((limit
- pos
) * direction
> 20)
880 p_limit
= &FETCH_CHAR (limit
);
881 p2
= (cursor
= &FETCH_CHAR (pos
));
882 /* In this loop, pos + cursor - p2 is the surrogate for pos */
883 while (1) /* use one cursor setting as long as i can */
885 if (direction
> 0) /* worth duplicating */
887 /* Use signed comparison if appropriate
888 to make cursor+infinity sure to be > p_limit.
889 Assuming that the buffer lies in a range of addresses
890 that are all "positive" (as ints) or all "negative",
891 either kind of comparison will work as long
892 as we don't step by infinity. So pick the kind
893 that works when we do step by infinity. */
894 if ((int) (p_limit
+ infinity
) > (int) p_limit
)
895 while ((int) cursor
<= (int) p_limit
)
896 cursor
+= BM_tab
[*cursor
];
898 while ((unsigned int) cursor
<= (unsigned int) p_limit
)
899 cursor
+= BM_tab
[*cursor
];
903 if ((int) (p_limit
+ infinity
) < (int) p_limit
)
904 while ((int) cursor
>= (int) p_limit
)
905 cursor
+= BM_tab
[*cursor
];
907 while ((unsigned int) cursor
>= (unsigned int) p_limit
)
908 cursor
+= BM_tab
[*cursor
];
910 /* If you are here, cursor is beyond the end of the searched region. */
911 /* This can happen if you match on the far character of the pattern, */
912 /* because the "stride" of that character is infinity, a number able */
913 /* to throw you well beyond the end of the search. It can also */
914 /* happen if you fail to match within the permitted region and would */
915 /* otherwise try a character beyond that region */
916 if ((cursor
- p_limit
) * direction
<= len
)
917 break; /* a small overrun is genuine */
918 cursor
-= infinity
; /* large overrun = hit */
919 i
= dirlen
- direction
;
922 while ((i
-= direction
) + direction
!= 0)
923 if (pat
[i
] != trt
[*(cursor
-= direction
)])
928 while ((i
-= direction
) + direction
!= 0)
929 if (pat
[i
] != *(cursor
-= direction
))
932 cursor
+= dirlen
- i
- direction
; /* fix cursor */
933 if (i
+ direction
== 0)
937 set_search_regs (pos
+ cursor
- p2
+ ((direction
> 0)
941 if ((n
-= direction
) != 0)
942 cursor
+= dirlen
; /* to resume search */
944 return ((direction
> 0)
945 ? search_regs
.end
[0] : search_regs
.start
[0]);
948 cursor
+= stride_for_teases
; /* <sigh> we lose - */
953 /* Now we'll pick up a clump that has to be done the hard */
954 /* way because it covers a discontinuity */
956 limit
= ((direction
> 0)
957 ? BUFFER_CEILING_OF (pos
- dirlen
+ 1)
958 : BUFFER_FLOOR_OF (pos
- dirlen
- 1));
959 limit
= ((direction
> 0)
960 ? min (limit
+ len
, lim
- 1)
961 : max (limit
- len
, lim
));
962 /* LIMIT is now the last value POS can have
963 and still be valid for a possible match. */
966 /* This loop can be coded for space rather than */
967 /* speed because it will usually run only once. */
968 /* (the reach is at most len + 21, and typically */
969 /* does not exceed len) */
970 while ((limit
- pos
) * direction
>= 0)
971 pos
+= BM_tab
[FETCH_CHAR(pos
)];
972 /* now run the same tests to distinguish going off the */
973 /* end, a match or a phony match. */
974 if ((pos
- limit
) * direction
<= len
)
975 break; /* ran off the end */
976 /* Found what might be a match.
977 Set POS back to last (first if reverse) char pos. */
979 i
= dirlen
- direction
;
980 while ((i
-= direction
) + direction
!= 0)
983 if (pat
[i
] != (((int) trt
)
984 ? trt
[FETCH_CHAR(pos
)]
988 /* Above loop has moved POS part or all the way
989 back to the first char pos (last char pos if reverse).
990 Set it once again at the last (first if reverse) char. */
991 pos
+= dirlen
- i
- direction
;
992 if (i
+ direction
== 0)
996 set_search_regs (pos
+ ((direction
> 0) ? 1 - len
: 0),
999 if ((n
-= direction
) != 0)
1000 pos
+= dirlen
; /* to resume search */
1002 return ((direction
> 0)
1003 ? search_regs
.end
[0] : search_regs
.start
[0]);
1006 pos
+= stride_for_teases
;
1009 /* We have done one clump. Can we continue? */
1010 if ((lim
- pos
) * direction
< 0)
1011 return ((0 - n
) * direction
);
1017 /* Record beginning BEG and end BEG + LEN
1018 for a match just found in the current buffer. */
1021 set_search_regs (beg
, len
)
1024 /* Make sure we have registers in which to store
1025 the match position. */
1026 if (search_regs
.num_regs
== 0)
1028 regoff_t
*starts
, *ends
;
1030 starts
= (regoff_t
*) xmalloc (2 * sizeof (regoff_t
));
1031 ends
= (regoff_t
*) xmalloc (2 * sizeof (regoff_t
));
1033 re_set_registers (&searchbuf
,
1039 search_regs
.start
[0] = beg
;
1040 search_regs
.end
[0] = beg
+ len
;
1041 XSET (last_thing_searched
, Lisp_Buffer
, current_buffer
);
1044 /* Given a string of words separated by word delimiters,
1045 compute a regexp that matches those exact words
1046 separated by arbitrary punctuation. */
1052 register unsigned char *p
, *o
;
1053 register int i
, len
, punct_count
= 0, word_count
= 0;
1056 CHECK_STRING (string
, 0);
1057 p
= XSTRING (string
)->data
;
1058 len
= XSTRING (string
)->size
;
1060 for (i
= 0; i
< len
; i
++)
1061 if (SYNTAX (p
[i
]) != Sword
)
1064 if (i
> 0 && SYNTAX (p
[i
-1]) == Sword
) word_count
++;
1066 if (SYNTAX (p
[len
-1]) == Sword
) word_count
++;
1067 if (!word_count
) return build_string ("");
1069 val
= make_string (p
, len
- punct_count
+ 5 * (word_count
- 1) + 4);
1071 o
= XSTRING (val
)->data
;
1075 for (i
= 0; i
< len
; i
++)
1076 if (SYNTAX (p
[i
]) == Sword
)
1078 else if (i
> 0 && SYNTAX (p
[i
-1]) == Sword
&& --word_count
)
1093 DEFUN ("search-backward", Fsearch_backward
, Ssearch_backward
, 1, 4,
1094 "sSearch backward: ",
1095 "Search backward from point for STRING.\n\
1096 Set point to the beginning of the occurrence found, and return point.\n\
1097 An optional second argument bounds the search; it is a buffer position.\n\
1098 The match found must not extend before that position.\n\
1099 Optional third argument, if t, means if fail just return nil (no error).\n\
1100 If not nil and not t, position at limit of search and return nil.\n\
1101 Optional fourth argument is repeat count--search for successive occurrences.\n\
1102 See also the functions `match-beginning', `match-end' and `replace-match'.")
1103 (string
, bound
, noerror
, count
)
1104 Lisp_Object string
, bound
, noerror
, count
;
1106 return search_command (string
, bound
, noerror
, count
, -1, 0);
1109 DEFUN ("search-forward", Fsearch_forward
, Ssearch_forward
, 1, 4, "sSearch: ",
1110 "Search forward from point for STRING.\n\
1111 Set point to the end of the occurrence found, and return point.\n\
1112 An optional second argument bounds the search; it is a buffer position.\n\
1113 The match found must not extend after that position. nil is equivalent\n\
1115 Optional third argument, if t, means if fail just return nil (no error).\n\
1116 If not nil and not t, move to limit of search and return nil.\n\
1117 Optional fourth argument is repeat count--search for successive occurrences.\n\
1118 See also the functions `match-beginning', `match-end' and `replace-match'.")
1119 (string
, bound
, noerror
, count
)
1120 Lisp_Object string
, bound
, noerror
, count
;
1122 return search_command (string
, bound
, noerror
, count
, 1, 0);
1125 DEFUN ("word-search-backward", Fword_search_backward
, Sword_search_backward
, 1, 4,
1126 "sWord search backward: ",
1127 "Search backward from point for STRING, ignoring differences in punctuation.\n\
1128 Set point to the beginning of the occurrence found, and return point.\n\
1129 An optional second argument bounds the search; it is a buffer position.\n\
1130 The match found must not extend before that position.\n\
1131 Optional third argument, if t, means if fail just return nil (no error).\n\
1132 If not nil and not t, move to limit of search and return nil.\n\
1133 Optional fourth argument is repeat count--search for successive occurrences.")
1134 (string
, bound
, noerror
, count
)
1135 Lisp_Object string
, bound
, noerror
, count
;
1137 return search_command (wordify (string
), bound
, noerror
, count
, -1, 1);
1140 DEFUN ("word-search-forward", Fword_search_forward
, Sword_search_forward
, 1, 4,
1142 "Search forward from point for STRING, ignoring differences in punctuation.\n\
1143 Set point to the end of the occurrence found, and return point.\n\
1144 An optional second argument bounds the search; it is a buffer position.\n\
1145 The match found must not extend after that position.\n\
1146 Optional third argument, if t, means if fail just return nil (no error).\n\
1147 If not nil and not t, move to limit of search and return nil.\n\
1148 Optional fourth argument is repeat count--search for successive occurrences.")
1149 (string
, bound
, noerror
, count
)
1150 Lisp_Object string
, bound
, noerror
, count
;
1152 return search_command (wordify (string
), bound
, noerror
, count
, 1, 1);
1155 DEFUN ("re-search-backward", Fre_search_backward
, Sre_search_backward
, 1, 4,
1156 "sRE search backward: ",
1157 "Search backward from point for match for regular expression REGEXP.\n\
1158 Set point to the beginning of the match, and return point.\n\
1159 The match found is the one starting last in the buffer\n\
1160 and yet ending before the origin of the search.\n\
1161 An optional second argument bounds the search; it is a buffer position.\n\
1162 The match found must start at or after that position.\n\
1163 Optional third argument, if t, means if fail just return nil (no error).\n\
1164 If not nil and not t, move to limit of search and return nil.\n\
1165 Optional fourth argument is repeat count--search for successive occurrences.\n\
1166 See also the functions `match-beginning', `match-end' and `replace-match'.")
1167 (regexp
, bound
, noerror
, count
)
1168 Lisp_Object regexp
, bound
, noerror
, count
;
1170 return search_command (regexp
, bound
, noerror
, count
, -1, 1);
1173 DEFUN ("re-search-forward", Fre_search_forward
, Sre_search_forward
, 1, 4,
1175 "Search forward from point for regular expression REGEXP.\n\
1176 Set point to the end of the occurrence found, and return point.\n\
1177 An optional second argument bounds the search; it is a buffer position.\n\
1178 The match found must not extend after that position.\n\
1179 Optional third argument, if t, means if fail just return nil (no error).\n\
1180 If not nil and not t, move to limit of search and return nil.\n\
1181 Optional fourth argument is repeat count--search for successive occurrences.\n\
1182 See also the functions `match-beginning', `match-end' and `replace-match'.")
1183 (regexp
, bound
, noerror
, count
)
1184 Lisp_Object regexp
, bound
, noerror
, count
;
1186 return search_command (regexp
, bound
, noerror
, count
, 1, 1);
1189 DEFUN ("replace-match", Freplace_match
, Sreplace_match
, 1, 4, 0,
1190 "Replace text matched by last search with NEWTEXT.\n\
1191 If second arg FIXEDCASE is non-nil, do not alter case of replacement text.\n\
1192 Otherwise maybe capitalize the whole text, or maybe just word initials,\n\
1193 based on the replaced text.\n\
1194 If the replaced text has only capital letters\n\
1195 and has at least one multiletter word, convert NEWTEXT to all caps.\n\
1196 If the replaced text has at least one word starting with a capital letter,\n\
1197 then capitalize each word in NEWTEXT.\n\n\
1198 If third arg LITERAL is non-nil, insert NEWTEXT literally.\n\
1199 Otherwise treat `\\' as special:\n\
1200 `\\&' in NEWTEXT means substitute original matched text.\n\
1201 `\\N' means substitute what matched the Nth `\\(...\\)'.\n\
1202 If Nth parens didn't match, substitute nothing.\n\
1203 `\\\\' means insert one `\\'.\n\
1204 FIXEDCASE and LITERAL are optional arguments.\n\
1205 Leaves point at end of replacement text.\n\
1207 The optional fourth argument STRING can be a string to modify.\n\
1208 In that case, this function creates and returns a new string\n\
1209 which is made by replacing the part of STRING that was matched.")
1210 (newtext
, fixedcase
, literal
, string
)
1211 Lisp_Object newtext
, fixedcase
, literal
, string
;
1213 enum { nochange
, all_caps
, cap_initial
} case_action
;
1214 register int pos
, last
;
1215 int some_multiletter_word
;
1218 int some_nonuppercase_initial
;
1219 register int c
, prevc
;
1222 CHECK_STRING (newtext
, 0);
1224 if (! NILP (string
))
1225 CHECK_STRING (string
, 4);
1227 case_action
= nochange
; /* We tried an initialization */
1228 /* but some C compilers blew it */
1230 if (search_regs
.num_regs
<= 0)
1231 error ("replace-match called before any match found");
1235 if (search_regs
.start
[0] < BEGV
1236 || search_regs
.start
[0] > search_regs
.end
[0]
1237 || search_regs
.end
[0] > ZV
)
1238 args_out_of_range (make_number (search_regs
.start
[0]),
1239 make_number (search_regs
.end
[0]));
1243 if (search_regs
.start
[0] < 0
1244 || search_regs
.start
[0] > search_regs
.end
[0]
1245 || search_regs
.end
[0] > XSTRING (string
)->size
)
1246 args_out_of_range (make_number (search_regs
.start
[0]),
1247 make_number (search_regs
.end
[0]));
1250 if (NILP (fixedcase
))
1252 /* Decide how to casify by examining the matched text. */
1254 last
= search_regs
.end
[0];
1256 case_action
= all_caps
;
1258 /* some_multiletter_word is set nonzero if any original word
1259 is more than one letter long. */
1260 some_multiletter_word
= 0;
1262 some_nonuppercase_initial
= 0;
1265 for (pos
= search_regs
.start
[0]; pos
< last
; pos
++)
1268 c
= FETCH_CHAR (pos
);
1270 c
= XSTRING (string
)->data
[pos
];
1274 /* Cannot be all caps if any original char is lower case */
1277 if (SYNTAX (prevc
) != Sword
)
1278 some_nonuppercase_initial
= 1;
1280 some_multiletter_word
= 1;
1282 else if (!NOCASEP (c
))
1285 if (SYNTAX (prevc
) != Sword
)
1288 some_multiletter_word
= 1;
1292 /* If the initial is a caseless word constituent,
1293 treat that like a lowercase initial. */
1294 if (SYNTAX (prevc
) != Sword
)
1295 some_nonuppercase_initial
= 1;
1301 /* Convert to all caps if the old text is all caps
1302 and has at least one multiletter word. */
1303 if (! some_lowercase
&& some_multiletter_word
)
1304 case_action
= all_caps
;
1305 /* Capitalize each word, if the old text has all capitalized words. */
1306 else if (!some_nonuppercase_initial
&& some_multiletter_word
)
1307 case_action
= cap_initial
;
1308 else if (!some_nonuppercase_initial
&& some_uppercase
)
1309 /* Should x -> yz, operating on X, give Yz or YZ?
1310 We'll assume the latter. */
1311 case_action
= all_caps
;
1313 case_action
= nochange
;
1316 /* Do replacement in a string. */
1319 Lisp_Object before
, after
;
1321 before
= Fsubstring (string
, make_number (0),
1322 make_number (search_regs
.start
[0]));
1323 after
= Fsubstring (string
, make_number (search_regs
.end
[0]), Qnil
);
1325 /* Do case substitution into NEWTEXT if desired. */
1329 /* We build up the substituted string in ACCUM. */
1335 for (pos
= 0; pos
< XSTRING (newtext
)->size
; pos
++)
1340 c
= XSTRING (newtext
)->data
[pos
];
1343 c
= XSTRING (newtext
)->data
[++pos
];
1346 substart
= search_regs
.start
[0];
1347 subend
= search_regs
.end
[0];
1349 else if (c
>= '1' && c
<= '9' && c
<= search_regs
.num_regs
+ '0')
1351 if (search_regs
.start
[c
- '0'] >= 1)
1353 substart
= search_regs
.start
[c
- '0'];
1354 subend
= search_regs
.end
[c
- '0'];
1360 if (pos
- 1 != lastpos
+ 1)
1361 middle
= Fsubstring (newtext
, lastpos
+ 1, pos
- 1);
1364 accum
= concat3 (accum
, middle
,
1365 Fsubstring (string
, make_number (substart
),
1366 make_number (subend
)));
1371 if (pos
!= lastpos
+ 1)
1372 middle
= Fsubstring (newtext
, lastpos
+ 1, pos
);
1376 newtext
= concat2 (accum
, middle
);
1379 if (case_action
== all_caps
)
1380 newtext
= Fupcase (newtext
);
1381 else if (case_action
== cap_initial
)
1382 newtext
= upcase_initials (newtext
);
1384 return concat3 (before
, newtext
, after
);
1387 /* We insert the replacement text before the old text, and then
1388 delete the original text. This means that markers at the
1389 beginning or end of the original will float to the corresponding
1390 position in the replacement. */
1391 SET_PT (search_regs
.start
[0]);
1392 if (!NILP (literal
))
1393 Finsert_and_inherit (1, &newtext
);
1396 struct gcpro gcpro1
;
1399 for (pos
= 0; pos
< XSTRING (newtext
)->size
; pos
++)
1401 int offset
= point
- search_regs
.start
[0];
1403 c
= XSTRING (newtext
)->data
[pos
];
1406 c
= XSTRING (newtext
)->data
[++pos
];
1408 Finsert_buffer_substring
1409 (Fcurrent_buffer (),
1410 make_number (search_regs
.start
[0] + offset
),
1411 make_number (search_regs
.end
[0] + offset
));
1412 else if (c
>= '1' && c
<= '9' && c
<= search_regs
.num_regs
+ '0')
1414 if (search_regs
.start
[c
- '0'] >= 1)
1415 Finsert_buffer_substring
1416 (Fcurrent_buffer (),
1417 make_number (search_regs
.start
[c
- '0'] + offset
),
1418 make_number (search_regs
.end
[c
- '0'] + offset
));
1429 inslen
= point
- (search_regs
.start
[0]);
1430 del_range (search_regs
.start
[0] + inslen
, search_regs
.end
[0] + inslen
);
1432 if (case_action
== all_caps
)
1433 Fupcase_region (make_number (point
- inslen
), make_number (point
));
1434 else if (case_action
== cap_initial
)
1435 upcase_initials_region (make_number (point
- inslen
), make_number (point
));
1440 match_limit (num
, beginningp
)
1446 CHECK_NUMBER (num
, 0);
1448 if (n
< 0 || n
>= search_regs
.num_regs
)
1449 args_out_of_range (num
, make_number (search_regs
.num_regs
));
1450 if (search_regs
.num_regs
<= 0
1451 || search_regs
.start
[n
] < 0)
1453 return (make_number ((beginningp
) ? search_regs
.start
[n
]
1454 : search_regs
.end
[n
]));
1457 DEFUN ("match-beginning", Fmatch_beginning
, Smatch_beginning
, 1, 1, 0,
1458 "Return position of start of text matched by last search.\n\
1459 NUM specifies which parenthesized expression in the last regexp.\n\
1460 Value is nil if NUMth pair didn't match, or there were less than NUM pairs.\n\
1461 Zero means the entire text matched by the whole regexp or whole string.")
1465 return match_limit (num
, 1);
1468 DEFUN ("match-end", Fmatch_end
, Smatch_end
, 1, 1, 0,
1469 "Return position of end of text matched by last search.\n\
1470 ARG, a number, specifies which parenthesized expression in the last regexp.\n\
1471 Value is nil if ARGth pair didn't match, or there were less than ARG pairs.\n\
1472 Zero means the entire text matched by the whole regexp or whole string.")
1476 return match_limit (num
, 0);
1479 DEFUN ("match-data", Fmatch_data
, Smatch_data
, 0, 0, 0,
1480 "Return a list containing all info on what the last search matched.\n\
1481 Element 2N is `(match-beginning N)'; element 2N + 1 is `(match-end N)'.\n\
1482 All the elements are markers or nil (nil if the Nth pair didn't match)\n\
1483 if the last match was on a buffer; integers or nil if a string was matched.\n\
1484 Use `store-match-data' to reinstate the data in this list.")
1490 if (NILP (last_thing_searched
))
1491 error ("match-data called before any match found");
1493 data
= (Lisp_Object
*) alloca ((2 * search_regs
.num_regs
)
1494 * sizeof (Lisp_Object
));
1497 for (i
= 0; i
< search_regs
.num_regs
; i
++)
1499 int start
= search_regs
.start
[i
];
1502 if (EQ (last_thing_searched
, Qt
))
1504 XFASTINT (data
[2 * i
]) = start
;
1505 XFASTINT (data
[2 * i
+ 1]) = search_regs
.end
[i
];
1507 else if (BUFFERP (last_thing_searched
))
1509 data
[2 * i
] = Fmake_marker ();
1510 Fset_marker (data
[2 * i
],
1511 make_number (start
),
1512 last_thing_searched
);
1513 data
[2 * i
+ 1] = Fmake_marker ();
1514 Fset_marker (data
[2 * i
+ 1],
1515 make_number (search_regs
.end
[i
]),
1516 last_thing_searched
);
1519 /* last_thing_searched must always be Qt, a buffer, or Qnil. */
1525 data
[2 * i
] = data
[2 * i
+ 1] = Qnil
;
1527 return Flist (2 * len
+ 2, data
);
1531 DEFUN ("store-match-data", Fstore_match_data
, Sstore_match_data
, 1, 1, 0,
1532 "Set internal data on last search match from elements of LIST.\n\
1533 LIST should have been created by calling `match-data' previously.")
1535 register Lisp_Object list
;
1538 register Lisp_Object marker
;
1540 if (!CONSP (list
) && !NILP (list
))
1541 list
= wrong_type_argument (Qconsp
, list
);
1543 /* Unless we find a marker with a buffer in LIST, assume that this
1544 match data came from a string. */
1545 last_thing_searched
= Qt
;
1547 /* Allocate registers if they don't already exist. */
1549 int length
= XFASTINT (Flength (list
)) / 2;
1551 if (length
> search_regs
.num_regs
)
1553 if (search_regs
.num_regs
== 0)
1556 = (regoff_t
*) xmalloc (length
* sizeof (regoff_t
));
1558 = (regoff_t
*) xmalloc (length
* sizeof (regoff_t
));
1563 = (regoff_t
*) xrealloc (search_regs
.start
,
1564 length
* sizeof (regoff_t
));
1566 = (regoff_t
*) xrealloc (search_regs
.end
,
1567 length
* sizeof (regoff_t
));
1571 re_set_registers (&searchbuf
, &search_regs
, length
,
1572 search_regs
.start
, search_regs
.end
);
1577 for (i
= 0; i
< search_regs
.num_regs
; i
++)
1579 marker
= Fcar (list
);
1582 search_regs
.start
[i
] = -1;
1587 if (MARKERP (marker
))
1589 if (XMARKER (marker
)->buffer
== 0)
1590 XFASTINT (marker
) = 0;
1592 XSET (last_thing_searched
, Lisp_Buffer
,
1593 XMARKER (marker
)->buffer
);
1596 CHECK_NUMBER_COERCE_MARKER (marker
, 0);
1597 search_regs
.start
[i
] = XINT (marker
);
1600 marker
= Fcar (list
);
1601 if (MARKERP (marker
) && XMARKER (marker
)->buffer
== 0)
1602 XFASTINT (marker
) = 0;
1604 CHECK_NUMBER_COERCE_MARKER (marker
, 0);
1605 search_regs
.end
[i
] = XINT (marker
);
1613 /* Quote a string to inactivate reg-expr chars */
1615 DEFUN ("regexp-quote", Fregexp_quote
, Sregexp_quote
, 1, 1, 0,
1616 "Return a regexp string which matches exactly STRING and nothing else.")
1620 register unsigned char *in
, *out
, *end
;
1621 register unsigned char *temp
;
1623 CHECK_STRING (str
, 0);
1625 temp
= (unsigned char *) alloca (XSTRING (str
)->size
* 2);
1627 /* Now copy the data into the new string, inserting escapes. */
1629 in
= XSTRING (str
)->data
;
1630 end
= in
+ XSTRING (str
)->size
;
1633 for (; in
!= end
; in
++)
1635 if (*in
== '[' || *in
== ']'
1636 || *in
== '*' || *in
== '.' || *in
== '\\'
1637 || *in
== '?' || *in
== '+'
1638 || *in
== '^' || *in
== '$')
1643 return make_string (temp
, out
- temp
);
1650 searchbuf
.allocated
= 100;
1651 searchbuf
.buffer
= (unsigned char *) malloc (searchbuf
.allocated
);
1652 searchbuf
.fastmap
= search_fastmap
;
1654 Qsearch_failed
= intern ("search-failed");
1655 staticpro (&Qsearch_failed
);
1656 Qinvalid_regexp
= intern ("invalid-regexp");
1657 staticpro (&Qinvalid_regexp
);
1659 Fput (Qsearch_failed
, Qerror_conditions
,
1660 Fcons (Qsearch_failed
, Fcons (Qerror
, Qnil
)));
1661 Fput (Qsearch_failed
, Qerror_message
,
1662 build_string ("Search failed"));
1664 Fput (Qinvalid_regexp
, Qerror_conditions
,
1665 Fcons (Qinvalid_regexp
, Fcons (Qerror
, Qnil
)));
1666 Fput (Qinvalid_regexp
, Qerror_message
,
1667 build_string ("Invalid regexp"));
1670 staticpro (&last_regexp
);
1672 last_thing_searched
= Qnil
;
1673 staticpro (&last_thing_searched
);
1675 defsubr (&Sstring_match
);
1676 defsubr (&Slooking_at
);
1677 defsubr (&Sskip_chars_forward
);
1678 defsubr (&Sskip_chars_backward
);
1679 defsubr (&Sskip_syntax_forward
);
1680 defsubr (&Sskip_syntax_backward
);
1681 defsubr (&Ssearch_forward
);
1682 defsubr (&Ssearch_backward
);
1683 defsubr (&Sword_search_forward
);
1684 defsubr (&Sword_search_backward
);
1685 defsubr (&Sre_search_forward
);
1686 defsubr (&Sre_search_backward
);
1687 defsubr (&Sreplace_match
);
1688 defsubr (&Smatch_beginning
);
1689 defsubr (&Smatch_end
);
1690 defsubr (&Smatch_data
);
1691 defsubr (&Sstore_match_data
);
1692 defsubr (&Sregexp_quote
);