* insdel.c (prepare_to_modify_buffer): Invalidate width run and
[bpt/emacs.git] / src / search.c
CommitLineData
ca1d1d23 1/* String search routines for GNU Emacs.
3a22ee35 2 Copyright (C) 1985, 1986, 1987, 1993, 1994 Free Software Foundation, Inc.
ca1d1d23
JB
3
4This file is part of GNU Emacs.
5
6GNU Emacs is free software; you can redistribute it and/or modify
7it under the terms of the GNU General Public License as published by
8the Free Software Foundation; either version 1, or (at your option)
9any later version.
10
11GNU Emacs is distributed in the hope that it will be useful,
12but WITHOUT ANY WARRANTY; without even the implied warranty of
13MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14GNU General Public License for more details.
15
16You should have received a copy of the GNU General Public License
17along with GNU Emacs; see the file COPYING. If not, write to
18the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. */
19
20
18160b98 21#include <config.h>
ca1d1d23
JB
22#include "lisp.h"
23#include "syntax.h"
24#include "buffer.h"
25#include "commands.h"
9ac0d9e0 26#include "blockinput.h"
4746118a 27
ca1d1d23
JB
28#include <sys/types.h>
29#include "regex.h"
30
31#define max(a, b) ((a) > (b) ? (a) : (b))
32#define min(a, b) ((a) < (b) ? (a) : (b))
33
34/* We compile regexps into this buffer and then use it for searching. */
35
36struct re_pattern_buffer searchbuf;
37
38char search_fastmap[0400];
39
40/* Last regexp we compiled */
41
42Lisp_Object last_regexp;
43
4746118a
JB
44/* Every call to re_match, etc., must pass &search_regs as the regs
45 argument unless you can show it is unnecessary (i.e., if re_match
46 is certainly going to be called again before region-around-match
47 can be called).
48
49 Since the registers are now dynamically allocated, we need to make
50 sure not to refer to the Nth register before checking that it has
1113d9db
JB
51 been allocated by checking search_regs.num_regs.
52
53 The regex code keeps track of whether it has allocated the search
54 buffer using bits in searchbuf. This means that whenever you
55 compile a new pattern, it completely forgets whether it has
56 allocated any registers, and will allocate new registers the next
57 time you call a searching or matching function. Therefore, we need
58 to call re_set_registers after compiling a new pattern or after
59 setting the match registers, so that the regex functions will be
60 able to free or re-allocate it properly. */
ca1d1d23
JB
61static struct re_registers search_regs;
62
daa37602
JB
63/* The buffer in which the last search was performed, or
64 Qt if the last search was done in a string;
65 Qnil if no searching has been done yet. */
66static Lisp_Object last_thing_searched;
ca1d1d23
JB
67
68/* error condition signalled when regexp compile_pattern fails */
69
70Lisp_Object Qinvalid_regexp;
71
ca325161
RS
72static void set_search_regs ();
73
ca1d1d23
JB
74static void
75matcher_overflow ()
76{
77 error ("Stack overflow in regexp matcher");
78}
79
80#ifdef __STDC__
81#define CONST const
82#else
83#define CONST
84#endif
85
86/* Compile a regexp and signal a Lisp error if anything goes wrong. */
87
1113d9db 88compile_pattern (pattern, bufp, regp, translate)
ca1d1d23
JB
89 Lisp_Object pattern;
90 struct re_pattern_buffer *bufp;
1113d9db 91 struct re_registers *regp;
ca1d1d23
JB
92 char *translate;
93{
94 CONST char *val;
95 Lisp_Object dummy;
96
97 if (EQ (pattern, last_regexp)
98 && translate == bufp->translate)
99 return;
1113d9db 100
ca1d1d23
JB
101 last_regexp = Qnil;
102 bufp->translate = translate;
9ac0d9e0 103 BLOCK_INPUT;
b90d9e80
RS
104 val = (CONST char *) re_compile_pattern ((char *) XSTRING (pattern)->data,
105 XSTRING (pattern)->size, bufp);
9ac0d9e0 106 UNBLOCK_INPUT;
ca1d1d23
JB
107 if (val)
108 {
109 dummy = build_string (val);
110 while (1)
111 Fsignal (Qinvalid_regexp, Fcons (dummy, Qnil));
112 }
1113d9db 113
ca1d1d23 114 last_regexp = pattern;
1113d9db
JB
115
116 /* Advise the searching functions about the space we have allocated
117 for register data. */
9ac0d9e0 118 BLOCK_INPUT;
ebb9e16f
JB
119 if (regp)
120 re_set_registers (bufp, regp, regp->num_regs, regp->start, regp->end);
9ac0d9e0 121 UNBLOCK_INPUT;
1113d9db 122
ca1d1d23
JB
123 return;
124}
125
126/* Error condition used for failing searches */
127Lisp_Object Qsearch_failed;
128
129Lisp_Object
130signal_failure (arg)
131 Lisp_Object arg;
132{
133 Fsignal (Qsearch_failed, Fcons (arg, Qnil));
134 return Qnil;
135}
136\f
137DEFUN ("looking-at", Flooking_at, Slooking_at, 1, 1, 0,
e065a56e
JB
138 "Return t if text after point matches regular expression PAT.\n\
139This function modifies the match data that `match-beginning',\n\
140`match-end' and `match-data' access; save and restore the match\n\
fe99283d 141data if you want to preserve them.")
ca1d1d23
JB
142 (string)
143 Lisp_Object string;
144{
145 Lisp_Object val;
146 unsigned char *p1, *p2;
147 int s1, s2;
148 register int i;
149
150 CHECK_STRING (string, 0);
1113d9db 151 compile_pattern (string, &searchbuf, &search_regs,
ca1d1d23
JB
152 !NILP (current_buffer->case_fold_search) ? DOWNCASE_TABLE : 0);
153
154 immediate_quit = 1;
155 QUIT; /* Do a pending quit right away, to avoid paradoxical behavior */
156
157 /* Get pointers and sizes of the two strings
158 that make up the visible portion of the buffer. */
159
160 p1 = BEGV_ADDR;
161 s1 = GPT - BEGV;
162 p2 = GAP_END_ADDR;
163 s2 = ZV - GPT;
164 if (s1 < 0)
165 {
166 p2 = p1;
167 s2 = ZV - BEGV;
168 s1 = 0;
169 }
170 if (s2 < 0)
171 {
172 s1 = ZV - BEGV;
173 s2 = 0;
174 }
175
176 i = re_match_2 (&searchbuf, (char *) p1, s1, (char *) p2, s2,
177 point - BEGV, &search_regs,
178 ZV - BEGV);
179 if (i == -2)
180 matcher_overflow ();
181
182 val = (0 <= i ? Qt : Qnil);
4746118a 183 for (i = 0; i < search_regs.num_regs; i++)
ca1d1d23
JB
184 if (search_regs.start[i] >= 0)
185 {
186 search_regs.start[i] += BEGV;
187 search_regs.end[i] += BEGV;
188 }
a3668d92 189 XSETBUFFER (last_thing_searched, current_buffer);
ca1d1d23
JB
190 immediate_quit = 0;
191 return val;
192}
193
194DEFUN ("string-match", Fstring_match, Sstring_match, 2, 3, 0,
195 "Return index of start of first match for REGEXP in STRING, or nil.\n\
196If third arg START is non-nil, start search at that index in STRING.\n\
197For index of first char beyond the match, do (match-end 0).\n\
198`match-end' and `match-beginning' also give indices of substrings\n\
199matched by parenthesis constructs in the pattern.")
200 (regexp, string, start)
201 Lisp_Object regexp, string, start;
202{
203 int val;
204 int s;
205
206 CHECK_STRING (regexp, 0);
207 CHECK_STRING (string, 1);
208
209 if (NILP (start))
210 s = 0;
211 else
212 {
213 int len = XSTRING (string)->size;
214
215 CHECK_NUMBER (start, 2);
216 s = XINT (start);
217 if (s < 0 && -s <= len)
26faf9f4 218 s = len + s;
ca1d1d23
JB
219 else if (0 > s || s > len)
220 args_out_of_range (string, start);
221 }
222
1113d9db 223 compile_pattern (regexp, &searchbuf, &search_regs,
ca1d1d23
JB
224 !NILP (current_buffer->case_fold_search) ? DOWNCASE_TABLE : 0);
225 immediate_quit = 1;
226 val = re_search (&searchbuf, (char *) XSTRING (string)->data,
227 XSTRING (string)->size, s, XSTRING (string)->size - s,
228 &search_regs);
229 immediate_quit = 0;
daa37602 230 last_thing_searched = Qt;
ca1d1d23
JB
231 if (val == -2)
232 matcher_overflow ();
233 if (val < 0) return Qnil;
234 return make_number (val);
235}
e59a8453
RS
236
237/* Match REGEXP against STRING, searching all of STRING,
238 and return the index of the match, or negative on failure.
239 This does not clobber the match data. */
240
241int
242fast_string_match (regexp, string)
243 Lisp_Object regexp, string;
244{
245 int val;
246
247 compile_pattern (regexp, &searchbuf, 0, 0);
248 immediate_quit = 1;
249 val = re_search (&searchbuf, (char *) XSTRING (string)->data,
250 XSTRING (string)->size, 0, XSTRING (string)->size,
251 0);
252 immediate_quit = 0;
253 return val;
254}
ca1d1d23 255\f
ffd56f97
JB
256/* Search for COUNT instances of the character TARGET, starting at START.
257 If COUNT is negative, search backwards.
258
259 If we find COUNT instances, set *SHORTAGE to zero, and return the
5bfe95c9
RS
260 position after the COUNTth match. Note that for reverse motion
261 this is not the same as the usual convention for Emacs motion commands.
ffd56f97
JB
262
263 If we don't find COUNT instances before reaching the end of the
264 buffer (or the beginning, if scanning backwards), set *SHORTAGE to
265 the number of TARGETs left unfound, and return the end of the
087a5f81 266 buffer we bumped up against.
ffd56f97 267
087a5f81
RS
268 If ALLOW_QUIT is non-zero, set immediate_quit. That's good to do
269 except when inside redisplay. */
270
271scan_buffer (target, start, count, shortage, allow_quit)
ffd56f97
JB
272 int *shortage, start;
273 register int count, target;
087a5f81 274 int allow_quit;
ca1d1d23 275{
ffd56f97
JB
276 int limit = ((count > 0) ? ZV - 1 : BEGV);
277 int direction = ((count > 0) ? 1 : -1);
278
279 register unsigned char *cursor;
ca1d1d23 280 unsigned char *base;
ffd56f97
JB
281
282 register int ceiling;
283 register unsigned char *ceiling_addr;
ca1d1d23
JB
284
285 if (shortage != 0)
286 *shortage = 0;
287
087a5f81 288 immediate_quit = allow_quit;
ca1d1d23 289
ffd56f97
JB
290 if (count > 0)
291 while (start != limit + 1)
ca1d1d23 292 {
ffd56f97
JB
293 ceiling = BUFFER_CEILING_OF (start);
294 ceiling = min (limit, ceiling);
295 ceiling_addr = &FETCH_CHAR (ceiling) + 1;
296 base = (cursor = &FETCH_CHAR (start));
ca1d1d23
JB
297 while (1)
298 {
ffd56f97 299 while (*cursor != target && ++cursor != ceiling_addr)
ca1d1d23 300 ;
ffd56f97 301 if (cursor != ceiling_addr)
ca1d1d23 302 {
ffd56f97 303 if (--count == 0)
ca1d1d23
JB
304 {
305 immediate_quit = 0;
ffd56f97 306 return (start + cursor - base + 1);
ca1d1d23
JB
307 }
308 else
ffd56f97 309 if (++cursor == ceiling_addr)
ca1d1d23
JB
310 break;
311 }
312 else
313 break;
314 }
ffd56f97 315 start += cursor - base;
ca1d1d23
JB
316 }
317 else
318 {
ffd56f97
JB
319 start--; /* first character we scan */
320 while (start > limit - 1)
321 { /* we WILL scan under start */
322 ceiling = BUFFER_FLOOR_OF (start);
323 ceiling = max (limit, ceiling);
324 ceiling_addr = &FETCH_CHAR (ceiling) - 1;
325 base = (cursor = &FETCH_CHAR (start));
ca1d1d23
JB
326 cursor++;
327 while (1)
328 {
ffd56f97 329 while (--cursor != ceiling_addr && *cursor != target)
ca1d1d23 330 ;
ffd56f97 331 if (cursor != ceiling_addr)
ca1d1d23 332 {
ffd56f97 333 if (++count == 0)
ca1d1d23
JB
334 {
335 immediate_quit = 0;
ffd56f97 336 return (start + cursor - base + 1);
ca1d1d23
JB
337 }
338 }
339 else
340 break;
341 }
ffd56f97 342 start += cursor - base;
ca1d1d23
JB
343 }
344 }
345 immediate_quit = 0;
346 if (shortage != 0)
ffd56f97
JB
347 *shortage = count * direction;
348 return (start + ((direction == 1 ? 0 : 1)));
ca1d1d23
JB
349}
350
63fa018d
RS
351int
352find_next_newline_no_quit (from, cnt)
353 register int from, cnt;
354{
355 return scan_buffer ('\n', from, cnt, (int *) 0, 0);
356}
357
ca1d1d23
JB
358int
359find_next_newline (from, cnt)
360 register int from, cnt;
361{
087a5f81 362 return scan_buffer ('\n', from, cnt, (int *) 0, 1);
ca1d1d23
JB
363}
364\f
c1dc99a1
JB
365Lisp_Object skip_chars ();
366
ca1d1d23 367DEFUN ("skip-chars-forward", Fskip_chars_forward, Sskip_chars_forward, 1, 2, 0,
3acb9a69
RS
368 "Move point forward, stopping before a char not in STRING, or at pos LIM.\n\
369STRING is like the inside of a `[...]' in a regular expression\n\
ca1d1d23
JB
370except that `]' is never special and `\\' quotes `^', `-' or `\\'.\n\
371Thus, with arg \"a-zA-Z\", this skips letters stopping before first nonletter.\n\
c1dc99a1
JB
372With arg \"^a-zA-Z\", skips nonletters stopping before first letter.\n\
373Returns the distance traveled, either zero or positive.")
ca1d1d23
JB
374 (string, lim)
375 Lisp_Object string, lim;
376{
17431c60 377 return skip_chars (1, 0, string, lim);
ca1d1d23
JB
378}
379
380DEFUN ("skip-chars-backward", Fskip_chars_backward, Sskip_chars_backward, 1, 2, 0,
3acb9a69 381 "Move point backward, stopping after a char not in STRING, or at pos LIM.\n\
c1dc99a1
JB
382See `skip-chars-forward' for details.\n\
383Returns the distance traveled, either zero or negative.")
ca1d1d23
JB
384 (string, lim)
385 Lisp_Object string, lim;
386{
17431c60
RS
387 return skip_chars (0, 0, string, lim);
388}
389
390DEFUN ("skip-syntax-forward", Fskip_syntax_forward, Sskip_syntax_forward, 1, 2, 0,
391 "Move point forward across chars in specified syntax classes.\n\
392SYNTAX is a string of syntax code characters.\n\
393Stop before a char whose syntax is not in SYNTAX, or at position LIM.\n\
394If SYNTAX starts with ^, skip characters whose syntax is NOT in SYNTAX.\n\
395This function returns the distance traveled, either zero or positive.")
396 (syntax, lim)
397 Lisp_Object syntax, lim;
398{
399 return skip_chars (1, 1, syntax, lim);
400}
401
402DEFUN ("skip-syntax-backward", Fskip_syntax_backward, Sskip_syntax_backward, 1, 2, 0,
403 "Move point backward across chars in specified syntax classes.\n\
404SYNTAX is a string of syntax code characters.\n\
405Stop on reaching a char whose syntax is not in SYNTAX, or at position LIM.\n\
406If SYNTAX starts with ^, skip characters whose syntax is NOT in SYNTAX.\n\
407This function returns the distance traveled, either zero or negative.")
408 (syntax, lim)
409 Lisp_Object syntax, lim;
410{
411 return skip_chars (0, 1, syntax, lim);
ca1d1d23
JB
412}
413
c1dc99a1 414Lisp_Object
17431c60
RS
415skip_chars (forwardp, syntaxp, string, lim)
416 int forwardp, syntaxp;
ca1d1d23
JB
417 Lisp_Object string, lim;
418{
419 register unsigned char *p, *pend;
420 register unsigned char c;
421 unsigned char fastmap[0400];
422 int negate = 0;
423 register int i;
424
425 CHECK_STRING (string, 0);
426
427 if (NILP (lim))
a3668d92 428 XSETINT (lim, forwardp ? ZV : BEGV);
ca1d1d23
JB
429 else
430 CHECK_NUMBER_COERCE_MARKER (lim, 1);
431
ca1d1d23 432 /* In any case, don't allow scan outside bounds of buffer. */
c5241910
RS
433 /* jla turned this off, for no known reason.
434 bfox turned the ZV part on, and rms turned the
435 BEGV part back on. */
436 if (XINT (lim) > ZV)
c235cce7 437 XSETFASTINT (lim, ZV);
c5241910 438 if (XINT (lim) < BEGV)
c235cce7 439 XSETFASTINT (lim, BEGV);
ca1d1d23
JB
440
441 p = XSTRING (string)->data;
442 pend = p + XSTRING (string)->size;
443 bzero (fastmap, sizeof fastmap);
444
445 if (p != pend && *p == '^')
446 {
447 negate = 1; p++;
448 }
449
17431c60
RS
450 /* Find the characters specified and set their elements of fastmap.
451 If syntaxp, each character counts as itself.
452 Otherwise, handle backslashes and ranges specially */
ca1d1d23
JB
453
454 while (p != pend)
455 {
456 c = *p++;
17431c60
RS
457 if (syntaxp)
458 fastmap[c] = 1;
459 else
ca1d1d23 460 {
17431c60 461 if (c == '\\')
ca1d1d23 462 {
17431c60
RS
463 if (p == pend) break;
464 c = *p++;
465 }
466 if (p != pend && *p == '-')
467 {
468 p++;
469 if (p == pend) break;
470 while (c <= *p)
471 {
472 fastmap[c] = 1;
473 c++;
474 }
475 p++;
ca1d1d23 476 }
17431c60
RS
477 else
478 fastmap[c] = 1;
ca1d1d23 479 }
ca1d1d23
JB
480 }
481
9239c6c1
RS
482 if (syntaxp && fastmap['-'] != 0)
483 fastmap[' '] = 1;
484
ca1d1d23
JB
485 /* If ^ was the first character, complement the fastmap. */
486
487 if (negate)
488 for (i = 0; i < sizeof fastmap; i++)
489 fastmap[i] ^= 1;
490
c1dc99a1
JB
491 {
492 int start_point = point;
493
494 immediate_quit = 1;
17431c60 495 if (syntaxp)
c1dc99a1 496 {
17431c60
RS
497
498 if (forwardp)
499 {
500 while (point < XINT (lim)
501 && fastmap[(unsigned char) syntax_code_spec[(int) SYNTAX (FETCH_CHAR (point))]])
502 SET_PT (point + 1);
503 }
504 else
505 {
506 while (point > XINT (lim)
507 && fastmap[(unsigned char) syntax_code_spec[(int) SYNTAX (FETCH_CHAR (point - 1))]])
508 SET_PT (point - 1);
509 }
c1dc99a1
JB
510 }
511 else
512 {
17431c60
RS
513 if (forwardp)
514 {
515 while (point < XINT (lim) && fastmap[FETCH_CHAR (point)])
516 SET_PT (point + 1);
517 }
518 else
519 {
520 while (point > XINT (lim) && fastmap[FETCH_CHAR (point - 1)])
521 SET_PT (point - 1);
522 }
c1dc99a1
JB
523 }
524 immediate_quit = 0;
525
526 return make_number (point - start_point);
527 }
ca1d1d23
JB
528}
529\f
530/* Subroutines of Lisp buffer search functions. */
531
532static Lisp_Object
533search_command (string, bound, noerror, count, direction, RE)
534 Lisp_Object string, bound, noerror, count;
535 int direction;
536 int RE;
537{
538 register int np;
539 int lim;
540 int n = direction;
541
542 if (!NILP (count))
543 {
544 CHECK_NUMBER (count, 3);
545 n *= XINT (count);
546 }
547
548 CHECK_STRING (string, 0);
549 if (NILP (bound))
550 lim = n > 0 ? ZV : BEGV;
551 else
552 {
553 CHECK_NUMBER_COERCE_MARKER (bound, 1);
554 lim = XINT (bound);
555 if (n > 0 ? lim < point : lim > point)
556 error ("Invalid search bound (wrong side of point)");
557 if (lim > ZV)
558 lim = ZV;
559 if (lim < BEGV)
560 lim = BEGV;
561 }
562
563 np = search_buffer (string, point, lim, n, RE,
564 (!NILP (current_buffer->case_fold_search)
565 ? XSTRING (current_buffer->case_canon_table)->data : 0),
566 (!NILP (current_buffer->case_fold_search)
567 ? XSTRING (current_buffer->case_eqv_table)->data : 0));
568 if (np <= 0)
569 {
570 if (NILP (noerror))
571 return signal_failure (string);
572 if (!EQ (noerror, Qt))
573 {
574 if (lim < BEGV || lim > ZV)
575 abort ();
a5f217b8
RS
576 SET_PT (lim);
577 return Qnil;
578#if 0 /* This would be clean, but maybe programs depend on
579 a value of nil here. */
481399bf 580 np = lim;
a5f217b8 581#endif
ca1d1d23 582 }
481399bf
RS
583 else
584 return Qnil;
ca1d1d23
JB
585 }
586
587 if (np < BEGV || np > ZV)
588 abort ();
589
590 SET_PT (np);
591
592 return make_number (np);
593}
594\f
b6d6a51c
KH
595static int
596trivial_regexp_p (regexp)
597 Lisp_Object regexp;
598{
599 int len = XSTRING (regexp)->size;
600 unsigned char *s = XSTRING (regexp)->data;
601 unsigned char c;
602 while (--len >= 0)
603 {
604 switch (*s++)
605 {
606 case '.': case '*': case '+': case '?': case '[': case '^': case '$':
607 return 0;
608 case '\\':
609 if (--len < 0)
610 return 0;
611 switch (*s++)
612 {
613 case '|': case '(': case ')': case '`': case '\'': case 'b':
614 case 'B': case '<': case '>': case 'w': case 'W': case 's':
615 case 'S': case '1': case '2': case '3': case '4': case '5':
616 case '6': case '7': case '8': case '9':
617 return 0;
618 }
619 }
620 }
621 return 1;
622}
623
ca325161 624/* Search for the n'th occurrence of STRING in the current buffer,
ca1d1d23
JB
625 starting at position POS and stopping at position LIM,
626 treating PAT as a literal string if RE is false or as
627 a regular expression if RE is true.
628
629 If N is positive, searching is forward and LIM must be greater than POS.
630 If N is negative, searching is backward and LIM must be less than POS.
631
632 Returns -x if only N-x occurrences found (x > 0),
633 or else the position at the beginning of the Nth occurrence
634 (if searching backward) or the end (if searching forward). */
635
636search_buffer (string, pos, lim, n, RE, trt, inverse_trt)
637 Lisp_Object string;
638 int pos;
639 int lim;
640 int n;
641 int RE;
642 register unsigned char *trt;
643 register unsigned char *inverse_trt;
644{
645 int len = XSTRING (string)->size;
646 unsigned char *base_pat = XSTRING (string)->data;
647 register int *BM_tab;
648 int *BM_tab_base;
649 register int direction = ((n > 0) ? 1 : -1);
650 register int dirlen;
651 int infinity, limit, k, stride_for_teases;
652 register unsigned char *pat, *cursor, *p_limit;
653 register int i, j;
654 unsigned char *p1, *p2;
655 int s1, s2;
656
657 /* Null string is found at starting position. */
3f57a499 658 if (len == 0)
ca325161
RS
659 {
660 set_search_regs (pos, 0);
661 return pos;
662 }
3f57a499
RS
663
664 /* Searching 0 times means don't move. */
665 if (n == 0)
ca1d1d23
JB
666 return pos;
667
b6d6a51c 668 if (RE && !trivial_regexp_p (string))
ca1d1d23 669 {
b6d6a51c 670 compile_pattern (string, &searchbuf, &search_regs, (char *) trt);
ca1d1d23 671
ca1d1d23
JB
672 immediate_quit = 1; /* Quit immediately if user types ^G,
673 because letting this function finish
674 can take too long. */
675 QUIT; /* Do a pending quit right away,
676 to avoid paradoxical behavior */
677 /* Get pointers and sizes of the two strings
678 that make up the visible portion of the buffer. */
679
680 p1 = BEGV_ADDR;
681 s1 = GPT - BEGV;
682 p2 = GAP_END_ADDR;
683 s2 = ZV - GPT;
684 if (s1 < 0)
685 {
686 p2 = p1;
687 s2 = ZV - BEGV;
688 s1 = 0;
689 }
690 if (s2 < 0)
691 {
692 s1 = ZV - BEGV;
693 s2 = 0;
694 }
695 while (n < 0)
696 {
42db823b 697 int val;
42db823b
RS
698 val = re_search_2 (&searchbuf, (char *) p1, s1, (char *) p2, s2,
699 pos - BEGV, lim - pos, &search_regs,
700 /* Don't allow match past current point */
701 pos - BEGV);
ca1d1d23 702 if (val == -2)
b6d6a51c
KH
703 {
704 matcher_overflow ();
705 }
ca1d1d23
JB
706 if (val >= 0)
707 {
708 j = BEGV;
4746118a 709 for (i = 0; i < search_regs.num_regs; i++)
ca1d1d23
JB
710 if (search_regs.start[i] >= 0)
711 {
712 search_regs.start[i] += j;
713 search_regs.end[i] += j;
714 }
a3668d92 715 XSETBUFFER (last_thing_searched, current_buffer);
ca1d1d23
JB
716 /* Set pos to the new position. */
717 pos = search_regs.start[0];
718 }
719 else
720 {
721 immediate_quit = 0;
722 return (n);
723 }
724 n++;
725 }
726 while (n > 0)
727 {
42db823b 728 int val;
42db823b
RS
729 val = re_search_2 (&searchbuf, (char *) p1, s1, (char *) p2, s2,
730 pos - BEGV, lim - pos, &search_regs,
731 lim - BEGV);
ca1d1d23 732 if (val == -2)
b6d6a51c
KH
733 {
734 matcher_overflow ();
735 }
ca1d1d23
JB
736 if (val >= 0)
737 {
738 j = BEGV;
4746118a 739 for (i = 0; i < search_regs.num_regs; i++)
ca1d1d23
JB
740 if (search_regs.start[i] >= 0)
741 {
742 search_regs.start[i] += j;
743 search_regs.end[i] += j;
744 }
a3668d92 745 XSETBUFFER (last_thing_searched, current_buffer);
ca1d1d23
JB
746 pos = search_regs.end[0];
747 }
748 else
749 {
750 immediate_quit = 0;
751 return (0 - n);
752 }
753 n--;
754 }
755 immediate_quit = 0;
756 return (pos);
757 }
758 else /* non-RE case */
759 {
760#ifdef C_ALLOCA
761 int BM_tab_space[0400];
762 BM_tab = &BM_tab_space[0];
763#else
764 BM_tab = (int *) alloca (0400 * sizeof (int));
765#endif
b6d6a51c
KH
766 {
767 unsigned char *patbuf = (unsigned char *) alloca (len);
768 pat = patbuf;
769 while (--len >= 0)
770 {
771 /* If we got here and the RE flag is set, it's because we're
772 dealing with a regexp known to be trivial, so the backslash
773 just quotes the next character. */
774 if (RE && *base_pat == '\\')
775 {
776 len--;
777 base_pat++;
778 }
779 *pat++ = (trt ? trt[*base_pat++] : *base_pat++);
780 }
781 len = pat - patbuf;
782 pat = base_pat = patbuf;
783 }
ca1d1d23
JB
784 /* The general approach is that we are going to maintain that we know */
785 /* the first (closest to the present position, in whatever direction */
786 /* we're searching) character that could possibly be the last */
787 /* (furthest from present position) character of a valid match. We */
788 /* advance the state of our knowledge by looking at that character */
789 /* and seeing whether it indeed matches the last character of the */
790 /* pattern. If it does, we take a closer look. If it does not, we */
791 /* move our pointer (to putative last characters) as far as is */
792 /* logically possible. This amount of movement, which I call a */
793 /* stride, will be the length of the pattern if the actual character */
794 /* appears nowhere in the pattern, otherwise it will be the distance */
795 /* from the last occurrence of that character to the end of the */
796 /* pattern. */
797 /* As a coding trick, an enormous stride is coded into the table for */
798 /* characters that match the last character. This allows use of only */
799 /* a single test, a test for having gone past the end of the */
800 /* permissible match region, to test for both possible matches (when */
801 /* the stride goes past the end immediately) and failure to */
802 /* match (where you get nudged past the end one stride at a time). */
803
804 /* Here we make a "mickey mouse" BM table. The stride of the search */
805 /* is determined only by the last character of the putative match. */
806 /* If that character does not match, we will stride the proper */
807 /* distance to propose a match that superimposes it on the last */
808 /* instance of a character that matches it (per trt), or misses */
809 /* it entirely if there is none. */
810
811 dirlen = len * direction;
812 infinity = dirlen - (lim + pos + len + len) * direction;
813 if (direction < 0)
814 pat = (base_pat += len - 1);
815 BM_tab_base = BM_tab;
816 BM_tab += 0400;
817 j = dirlen; /* to get it in a register */
818 /* A character that does not appear in the pattern induces a */
819 /* stride equal to the pattern length. */
820 while (BM_tab_base != BM_tab)
821 {
822 *--BM_tab = j;
823 *--BM_tab = j;
824 *--BM_tab = j;
825 *--BM_tab = j;
826 }
827 i = 0;
828 while (i != infinity)
829 {
830 j = pat[i]; i += direction;
831 if (i == dirlen) i = infinity;
832 if ((int) trt)
833 {
834 k = (j = trt[j]);
835 if (i == infinity)
836 stride_for_teases = BM_tab[j];
837 BM_tab[j] = dirlen - i;
838 /* A translation table is accompanied by its inverse -- see */
839 /* comment following downcase_table for details */
840 while ((j = inverse_trt[j]) != k)
841 BM_tab[j] = dirlen - i;
842 }
843 else
844 {
845 if (i == infinity)
846 stride_for_teases = BM_tab[j];
847 BM_tab[j] = dirlen - i;
848 }
849 /* stride_for_teases tells how much to stride if we get a */
850 /* match on the far character but are subsequently */
851 /* disappointed, by recording what the stride would have been */
852 /* for that character if the last character had been */
853 /* different. */
854 }
855 infinity = dirlen - infinity;
856 pos += dirlen - ((direction > 0) ? direction : 0);
857 /* loop invariant - pos points at where last char (first char if reverse)
858 of pattern would align in a possible match. */
859 while (n != 0)
860 {
b2c71fb4
KH
861 /* It's been reported that some (broken) compiler thinks that
862 Boolean expressions in an arithmetic context are unsigned.
863 Using an explicit ?1:0 prevents this. */
864 if ((lim - pos - ((direction > 0) ? 1 : 0)) * direction < 0)
ca1d1d23
JB
865 return (n * (0 - direction));
866 /* First we do the part we can by pointers (maybe nothing) */
867 QUIT;
868 pat = base_pat;
869 limit = pos - dirlen + direction;
870 limit = ((direction > 0)
871 ? BUFFER_CEILING_OF (limit)
872 : BUFFER_FLOOR_OF (limit));
873 /* LIMIT is now the last (not beyond-last!) value
874 POS can take on without hitting edge of buffer or the gap. */
875 limit = ((direction > 0)
876 ? min (lim - 1, min (limit, pos + 20000))
877 : max (lim, max (limit, pos - 20000)));
878 if ((limit - pos) * direction > 20)
879 {
880 p_limit = &FETCH_CHAR (limit);
881 p2 = (cursor = &FETCH_CHAR (pos));
882 /* In this loop, pos + cursor - p2 is the surrogate for pos */
883 while (1) /* use one cursor setting as long as i can */
884 {
885 if (direction > 0) /* worth duplicating */
886 {
887 /* Use signed comparison if appropriate
888 to make cursor+infinity sure to be > p_limit.
889 Assuming that the buffer lies in a range of addresses
890 that are all "positive" (as ints) or all "negative",
891 either kind of comparison will work as long
892 as we don't step by infinity. So pick the kind
893 that works when we do step by infinity. */
894 if ((int) (p_limit + infinity) > (int) p_limit)
895 while ((int) cursor <= (int) p_limit)
896 cursor += BM_tab[*cursor];
897 else
898 while ((unsigned int) cursor <= (unsigned int) p_limit)
899 cursor += BM_tab[*cursor];
900 }
901 else
902 {
903 if ((int) (p_limit + infinity) < (int) p_limit)
904 while ((int) cursor >= (int) p_limit)
905 cursor += BM_tab[*cursor];
906 else
907 while ((unsigned int) cursor >= (unsigned int) p_limit)
908 cursor += BM_tab[*cursor];
909 }
910/* If you are here, cursor is beyond the end of the searched region. */
911 /* This can happen if you match on the far character of the pattern, */
912 /* because the "stride" of that character is infinity, a number able */
913 /* to throw you well beyond the end of the search. It can also */
914 /* happen if you fail to match within the permitted region and would */
915 /* otherwise try a character beyond that region */
916 if ((cursor - p_limit) * direction <= len)
917 break; /* a small overrun is genuine */
918 cursor -= infinity; /* large overrun = hit */
919 i = dirlen - direction;
920 if ((int) trt)
921 {
922 while ((i -= direction) + direction != 0)
923 if (pat[i] != trt[*(cursor -= direction)])
924 break;
925 }
926 else
927 {
928 while ((i -= direction) + direction != 0)
929 if (pat[i] != *(cursor -= direction))
930 break;
931 }
932 cursor += dirlen - i - direction; /* fix cursor */
933 if (i + direction == 0)
934 {
935 cursor -= direction;
1113d9db 936
ca325161
RS
937 set_search_regs (pos + cursor - p2 + ((direction > 0)
938 ? 1 - len : 0),
939 len);
940
ca1d1d23
JB
941 if ((n -= direction) != 0)
942 cursor += dirlen; /* to resume search */
943 else
944 return ((direction > 0)
945 ? search_regs.end[0] : search_regs.start[0]);
946 }
947 else
948 cursor += stride_for_teases; /* <sigh> we lose - */
949 }
950 pos += cursor - p2;
951 }
952 else
953 /* Now we'll pick up a clump that has to be done the hard */
954 /* way because it covers a discontinuity */
955 {
956 limit = ((direction > 0)
957 ? BUFFER_CEILING_OF (pos - dirlen + 1)
958 : BUFFER_FLOOR_OF (pos - dirlen - 1));
959 limit = ((direction > 0)
960 ? min (limit + len, lim - 1)
961 : max (limit - len, lim));
962 /* LIMIT is now the last value POS can have
963 and still be valid for a possible match. */
964 while (1)
965 {
966 /* This loop can be coded for space rather than */
967 /* speed because it will usually run only once. */
968 /* (the reach is at most len + 21, and typically */
969 /* does not exceed len) */
970 while ((limit - pos) * direction >= 0)
971 pos += BM_tab[FETCH_CHAR(pos)];
972 /* now run the same tests to distinguish going off the */
eb8c3be9 973 /* end, a match or a phony match. */
ca1d1d23
JB
974 if ((pos - limit) * direction <= len)
975 break; /* ran off the end */
976 /* Found what might be a match.
977 Set POS back to last (first if reverse) char pos. */
978 pos -= infinity;
979 i = dirlen - direction;
980 while ((i -= direction) + direction != 0)
981 {
982 pos -= direction;
983 if (pat[i] != (((int) trt)
984 ? trt[FETCH_CHAR(pos)]
985 : FETCH_CHAR (pos)))
986 break;
987 }
988 /* Above loop has moved POS part or all the way
989 back to the first char pos (last char pos if reverse).
990 Set it once again at the last (first if reverse) char. */
991 pos += dirlen - i- direction;
992 if (i + direction == 0)
993 {
994 pos -= direction;
1113d9db 995
ca325161
RS
996 set_search_regs (pos + ((direction > 0) ? 1 - len : 0),
997 len);
998
ca1d1d23
JB
999 if ((n -= direction) != 0)
1000 pos += dirlen; /* to resume search */
1001 else
1002 return ((direction > 0)
1003 ? search_regs.end[0] : search_regs.start[0]);
1004 }
1005 else
1006 pos += stride_for_teases;
1007 }
1008 }
1009 /* We have done one clump. Can we continue? */
1010 if ((lim - pos) * direction < 0)
1011 return ((0 - n) * direction);
1012 }
1013 return pos;
1014 }
1015}
ca325161
RS
1016
1017/* Record beginning BEG and end BEG + LEN
1018 for a match just found in the current buffer. */
1019
1020static void
1021set_search_regs (beg, len)
1022 int beg, len;
1023{
1024 /* Make sure we have registers in which to store
1025 the match position. */
1026 if (search_regs.num_regs == 0)
1027 {
1028 regoff_t *starts, *ends;
1029
1030 starts = (regoff_t *) xmalloc (2 * sizeof (regoff_t));
1031 ends = (regoff_t *) xmalloc (2 * sizeof (regoff_t));
1032 BLOCK_INPUT;
1033 re_set_registers (&searchbuf,
1034 &search_regs,
1035 2, starts, ends);
1036 UNBLOCK_INPUT;
1037 }
1038
1039 search_regs.start[0] = beg;
1040 search_regs.end[0] = beg + len;
a3668d92 1041 XSETBUFFER (last_thing_searched, current_buffer);
ca325161 1042}
ca1d1d23
JB
1043\f
1044/* Given a string of words separated by word delimiters,
1045 compute a regexp that matches those exact words
1046 separated by arbitrary punctuation. */
1047
1048static Lisp_Object
1049wordify (string)
1050 Lisp_Object string;
1051{
1052 register unsigned char *p, *o;
1053 register int i, len, punct_count = 0, word_count = 0;
1054 Lisp_Object val;
1055
1056 CHECK_STRING (string, 0);
1057 p = XSTRING (string)->data;
1058 len = XSTRING (string)->size;
1059
1060 for (i = 0; i < len; i++)
1061 if (SYNTAX (p[i]) != Sword)
1062 {
1063 punct_count++;
1064 if (i > 0 && SYNTAX (p[i-1]) == Sword) word_count++;
1065 }
1066 if (SYNTAX (p[len-1]) == Sword) word_count++;
1067 if (!word_count) return build_string ("");
1068
1069 val = make_string (p, len - punct_count + 5 * (word_count - 1) + 4);
1070
1071 o = XSTRING (val)->data;
1072 *o++ = '\\';
1073 *o++ = 'b';
1074
1075 for (i = 0; i < len; i++)
1076 if (SYNTAX (p[i]) == Sword)
1077 *o++ = p[i];
1078 else if (i > 0 && SYNTAX (p[i-1]) == Sword && --word_count)
1079 {
1080 *o++ = '\\';
1081 *o++ = 'W';
1082 *o++ = '\\';
1083 *o++ = 'W';
1084 *o++ = '*';
1085 }
1086
1087 *o++ = '\\';
1088 *o++ = 'b';
1089
1090 return val;
1091}
1092\f
1093DEFUN ("search-backward", Fsearch_backward, Ssearch_backward, 1, 4,
1094 "sSearch backward: ",
1095 "Search backward from point for STRING.\n\
1096Set point to the beginning of the occurrence found, and return point.\n\
1097An optional second argument bounds the search; it is a buffer position.\n\
1098The match found must not extend before that position.\n\
1099Optional third argument, if t, means if fail just return nil (no error).\n\
1100 If not nil and not t, position at limit of search and return nil.\n\
1101Optional fourth argument is repeat count--search for successive occurrences.\n\
1102See also the functions `match-beginning', `match-end' and `replace-match'.")
1103 (string, bound, noerror, count)
1104 Lisp_Object string, bound, noerror, count;
1105{
1106 return search_command (string, bound, noerror, count, -1, 0);
1107}
1108
1109DEFUN ("search-forward", Fsearch_forward, Ssearch_forward, 1, 4, "sSearch: ",
1110 "Search forward from point for STRING.\n\
1111Set point to the end of the occurrence found, and return point.\n\
1112An optional second argument bounds the search; it is a buffer position.\n\
1113The match found must not extend after that position. nil is equivalent\n\
1114 to (point-max).\n\
1115Optional third argument, if t, means if fail just return nil (no error).\n\
1116 If not nil and not t, move to limit of search and return nil.\n\
1117Optional fourth argument is repeat count--search for successive occurrences.\n\
1118See also the functions `match-beginning', `match-end' and `replace-match'.")
1119 (string, bound, noerror, count)
1120 Lisp_Object string, bound, noerror, count;
1121{
1122 return search_command (string, bound, noerror, count, 1, 0);
1123}
1124
1125DEFUN ("word-search-backward", Fword_search_backward, Sword_search_backward, 1, 4,
1126 "sWord search backward: ",
1127 "Search backward from point for STRING, ignoring differences in punctuation.\n\
1128Set point to the beginning of the occurrence found, and return point.\n\
1129An optional second argument bounds the search; it is a buffer position.\n\
1130The match found must not extend before that position.\n\
1131Optional third argument, if t, means if fail just return nil (no error).\n\
1132 If not nil and not t, move to limit of search and return nil.\n\
1133Optional fourth argument is repeat count--search for successive occurrences.")
1134 (string, bound, noerror, count)
1135 Lisp_Object string, bound, noerror, count;
1136{
1137 return search_command (wordify (string), bound, noerror, count, -1, 1);
1138}
1139
1140DEFUN ("word-search-forward", Fword_search_forward, Sword_search_forward, 1, 4,
1141 "sWord search: ",
1142 "Search forward from point for STRING, ignoring differences in punctuation.\n\
1143Set point to the end of the occurrence found, and return point.\n\
1144An optional second argument bounds the search; it is a buffer position.\n\
1145The match found must not extend after that position.\n\
1146Optional third argument, if t, means if fail just return nil (no error).\n\
1147 If not nil and not t, move to limit of search and return nil.\n\
1148Optional fourth argument is repeat count--search for successive occurrences.")
1149 (string, bound, noerror, count)
1150 Lisp_Object string, bound, noerror, count;
1151{
1152 return search_command (wordify (string), bound, noerror, count, 1, 1);
1153}
1154
1155DEFUN ("re-search-backward", Fre_search_backward, Sre_search_backward, 1, 4,
1156 "sRE search backward: ",
1157 "Search backward from point for match for regular expression REGEXP.\n\
1158Set point to the beginning of the match, and return point.\n\
1159The match found is the one starting last in the buffer\n\
19c0a730 1160and yet ending before the origin of the search.\n\
ca1d1d23
JB
1161An optional second argument bounds the search; it is a buffer position.\n\
1162The match found must start at or after that position.\n\
1163Optional third argument, if t, means if fail just return nil (no error).\n\
1164 If not nil and not t, move to limit of search and return nil.\n\
1165Optional fourth argument is repeat count--search for successive occurrences.\n\
1166See also the functions `match-beginning', `match-end' and `replace-match'.")
19c0a730
KH
1167 (regexp, bound, noerror, count)
1168 Lisp_Object regexp, bound, noerror, count;
ca1d1d23 1169{
19c0a730 1170 return search_command (regexp, bound, noerror, count, -1, 1);
ca1d1d23
JB
1171}
1172
1173DEFUN ("re-search-forward", Fre_search_forward, Sre_search_forward, 1, 4,
1174 "sRE search: ",
1175 "Search forward from point for regular expression REGEXP.\n\
1176Set point to the end of the occurrence found, and return point.\n\
1177An optional second argument bounds the search; it is a buffer position.\n\
1178The match found must not extend after that position.\n\
1179Optional third argument, if t, means if fail just return nil (no error).\n\
1180 If not nil and not t, move to limit of search and return nil.\n\
1181Optional fourth argument is repeat count--search for successive occurrences.\n\
1182See also the functions `match-beginning', `match-end' and `replace-match'.")
19c0a730
KH
1183 (regexp, bound, noerror, count)
1184 Lisp_Object regexp, bound, noerror, count;
ca1d1d23 1185{
19c0a730 1186 return search_command (regexp, bound, noerror, count, 1, 1);
ca1d1d23
JB
1187}
1188\f
080c45fd 1189DEFUN ("replace-match", Freplace_match, Sreplace_match, 1, 4, 0,
ca1d1d23
JB
1190 "Replace text matched by last search with NEWTEXT.\n\
1191If second arg FIXEDCASE is non-nil, do not alter case of replacement text.\n\
5b9cf4b2
RS
1192Otherwise maybe capitalize the whole text, or maybe just word initials,\n\
1193based on the replaced text.\n\
1194If the replaced text has only capital letters\n\
1195and has at least one multiletter word, convert NEWTEXT to all caps.\n\
1196If the replaced text has at least one word starting with a capital letter,\n\
1197then capitalize each word in NEWTEXT.\n\n\
ca1d1d23
JB
1198If third arg LITERAL is non-nil, insert NEWTEXT literally.\n\
1199Otherwise treat `\\' as special:\n\
1200 `\\&' in NEWTEXT means substitute original matched text.\n\
1201 `\\N' means substitute what matched the Nth `\\(...\\)'.\n\
1202 If Nth parens didn't match, substitute nothing.\n\
1203 `\\\\' means insert one `\\'.\n\
1113d9db 1204FIXEDCASE and LITERAL are optional arguments.\n\
080c45fd
RS
1205Leaves point at end of replacement text.\n\
1206\n\
1207The optional fourth argument STRING can be a string to modify.\n\
1208In that case, this function creates and returns a new string\n\
1209which is made by replacing the part of STRING that was matched.")
1210 (newtext, fixedcase, literal, string)
1211 Lisp_Object newtext, fixedcase, literal, string;
ca1d1d23
JB
1212{
1213 enum { nochange, all_caps, cap_initial } case_action;
1214 register int pos, last;
1215 int some_multiletter_word;
97832bd0 1216 int some_lowercase;
73dc8771 1217 int some_uppercase;
208767c3 1218 int some_nonuppercase_initial;
ca1d1d23
JB
1219 register int c, prevc;
1220 int inslen;
1221
16fdc568 1222 CHECK_STRING (newtext, 0);
ca1d1d23 1223
080c45fd
RS
1224 if (! NILP (string))
1225 CHECK_STRING (string, 4);
1226
ca1d1d23
JB
1227 case_action = nochange; /* We tried an initialization */
1228 /* but some C compilers blew it */
4746118a
JB
1229
1230 if (search_regs.num_regs <= 0)
1231 error ("replace-match called before any match found");
1232
080c45fd
RS
1233 if (NILP (string))
1234 {
1235 if (search_regs.start[0] < BEGV
1236 || search_regs.start[0] > search_regs.end[0]
1237 || search_regs.end[0] > ZV)
1238 args_out_of_range (make_number (search_regs.start[0]),
1239 make_number (search_regs.end[0]));
1240 }
1241 else
1242 {
1243 if (search_regs.start[0] < 0
1244 || search_regs.start[0] > search_regs.end[0]
1245 || search_regs.end[0] > XSTRING (string)->size)
1246 args_out_of_range (make_number (search_regs.start[0]),
1247 make_number (search_regs.end[0]));
1248 }
ca1d1d23
JB
1249
1250 if (NILP (fixedcase))
1251 {
1252 /* Decide how to casify by examining the matched text. */
1253
1254 last = search_regs.end[0];
1255 prevc = '\n';
1256 case_action = all_caps;
1257
1258 /* some_multiletter_word is set nonzero if any original word
1259 is more than one letter long. */
1260 some_multiletter_word = 0;
97832bd0 1261 some_lowercase = 0;
208767c3 1262 some_nonuppercase_initial = 0;
73dc8771 1263 some_uppercase = 0;
ca1d1d23
JB
1264
1265 for (pos = search_regs.start[0]; pos < last; pos++)
1266 {
080c45fd
RS
1267 if (NILP (string))
1268 c = FETCH_CHAR (pos);
1269 else
1270 c = XSTRING (string)->data[pos];
1271
ca1d1d23
JB
1272 if (LOWERCASEP (c))
1273 {
1274 /* Cannot be all caps if any original char is lower case */
1275
97832bd0 1276 some_lowercase = 1;
ca1d1d23 1277 if (SYNTAX (prevc) != Sword)
208767c3 1278 some_nonuppercase_initial = 1;
ca1d1d23
JB
1279 else
1280 some_multiletter_word = 1;
1281 }
1282 else if (!NOCASEP (c))
1283 {
73dc8771 1284 some_uppercase = 1;
97832bd0 1285 if (SYNTAX (prevc) != Sword)
c4d460ce 1286 ;
97832bd0 1287 else
ca1d1d23
JB
1288 some_multiletter_word = 1;
1289 }
208767c3
RS
1290 else
1291 {
1292 /* If the initial is a caseless word constituent,
1293 treat that like a lowercase initial. */
1294 if (SYNTAX (prevc) != Sword)
1295 some_nonuppercase_initial = 1;
1296 }
ca1d1d23
JB
1297
1298 prevc = c;
1299 }
1300
97832bd0
RS
1301 /* Convert to all caps if the old text is all caps
1302 and has at least one multiletter word. */
1303 if (! some_lowercase && some_multiletter_word)
1304 case_action = all_caps;
c4d460ce 1305 /* Capitalize each word, if the old text has all capitalized words. */
208767c3 1306 else if (!some_nonuppercase_initial && some_multiletter_word)
ca1d1d23 1307 case_action = cap_initial;
208767c3 1308 else if (!some_nonuppercase_initial && some_uppercase)
73dc8771
KH
1309 /* Should x -> yz, operating on X, give Yz or YZ?
1310 We'll assume the latter. */
1311 case_action = all_caps;
97832bd0
RS
1312 else
1313 case_action = nochange;
ca1d1d23
JB
1314 }
1315
080c45fd
RS
1316 /* Do replacement in a string. */
1317 if (!NILP (string))
1318 {
1319 Lisp_Object before, after;
1320
1321 before = Fsubstring (string, make_number (0),
1322 make_number (search_regs.start[0]));
1323 after = Fsubstring (string, make_number (search_regs.end[0]), Qnil);
1324
1325 /* Do case substitution into NEWTEXT if desired. */
1326 if (NILP (literal))
1327 {
1328 int lastpos = -1;
1329 /* We build up the substituted string in ACCUM. */
1330 Lisp_Object accum;
1331 Lisp_Object middle;
1332
1333 accum = Qnil;
1334
1335 for (pos = 0; pos < XSTRING (newtext)->size; pos++)
1336 {
1337 int substart = -1;
1338 int subend;
1339
1340 c = XSTRING (newtext)->data[pos];
1341 if (c == '\\')
1342 {
1343 c = XSTRING (newtext)->data[++pos];
1344 if (c == '&')
1345 {
1346 substart = search_regs.start[0];
1347 subend = search_regs.end[0];
1348 }
1349 else if (c >= '1' && c <= '9' && c <= search_regs.num_regs + '0')
1350 {
1351 if (search_regs.start[c - '0'] >= 1)
1352 {
1353 substart = search_regs.start[c - '0'];
1354 subend = search_regs.end[c - '0'];
1355 }
1356 }
1357 }
1358 if (substart >= 0)
1359 {
1360 if (pos - 1 != lastpos + 1)
1361 middle = Fsubstring (newtext, lastpos + 1, pos - 1);
1362 else
1363 middle = Qnil;
1364 accum = concat3 (accum, middle,
1365 Fsubstring (string, make_number (substart),
1366 make_number (subend)));
1367 lastpos = pos;
1368 }
1369 }
1370
1371 if (pos != lastpos + 1)
1372 middle = Fsubstring (newtext, lastpos + 1, pos);
1373 else
1374 middle = Qnil;
1375
1376 newtext = concat2 (accum, middle);
1377 }
1378
1379 if (case_action == all_caps)
1380 newtext = Fupcase (newtext);
1381 else if (case_action == cap_initial)
1382 newtext = upcase_initials (newtext);
1383
1384 return concat3 (before, newtext, after);
1385 }
1386
9a76659d
JB
1387 /* We insert the replacement text before the old text, and then
1388 delete the original text. This means that markers at the
1389 beginning or end of the original will float to the corresponding
1390 position in the replacement. */
1391 SET_PT (search_regs.start[0]);
ca1d1d23 1392 if (!NILP (literal))
16fdc568 1393 Finsert_and_inherit (1, &newtext);
ca1d1d23
JB
1394 else
1395 {
1396 struct gcpro gcpro1;
16fdc568 1397 GCPRO1 (newtext);
ca1d1d23 1398
16fdc568 1399 for (pos = 0; pos < XSTRING (newtext)->size; pos++)
ca1d1d23 1400 {
9a76659d
JB
1401 int offset = point - search_regs.start[0];
1402
16fdc568 1403 c = XSTRING (newtext)->data[pos];
ca1d1d23
JB
1404 if (c == '\\')
1405 {
16fdc568 1406 c = XSTRING (newtext)->data[++pos];
ca1d1d23 1407 if (c == '&')
9a76659d
JB
1408 Finsert_buffer_substring
1409 (Fcurrent_buffer (),
1410 make_number (search_regs.start[0] + offset),
1411 make_number (search_regs.end[0] + offset));
78445046 1412 else if (c >= '1' && c <= '9' && c <= search_regs.num_regs + '0')
ca1d1d23
JB
1413 {
1414 if (search_regs.start[c - '0'] >= 1)
9a76659d
JB
1415 Finsert_buffer_substring
1416 (Fcurrent_buffer (),
1417 make_number (search_regs.start[c - '0'] + offset),
1418 make_number (search_regs.end[c - '0'] + offset));
ca1d1d23
JB
1419 }
1420 else
1421 insert_char (c);
1422 }
1423 else
1424 insert_char (c);
1425 }
1426 UNGCPRO;
1427 }
1428
9a76659d
JB
1429 inslen = point - (search_regs.start[0]);
1430 del_range (search_regs.start[0] + inslen, search_regs.end[0] + inslen);
ca1d1d23
JB
1431
1432 if (case_action == all_caps)
1433 Fupcase_region (make_number (point - inslen), make_number (point));
1434 else if (case_action == cap_initial)
1435 upcase_initials_region (make_number (point - inslen), make_number (point));
1436 return Qnil;
1437}
1438\f
1439static Lisp_Object
1440match_limit (num, beginningp)
1441 Lisp_Object num;
1442 int beginningp;
1443{
1444 register int n;
1445
1446 CHECK_NUMBER (num, 0);
1447 n = XINT (num);
4746118a
JB
1448 if (n < 0 || n >= search_regs.num_regs)
1449 args_out_of_range (num, make_number (search_regs.num_regs));
1450 if (search_regs.num_regs <= 0
1451 || search_regs.start[n] < 0)
ca1d1d23
JB
1452 return Qnil;
1453 return (make_number ((beginningp) ? search_regs.start[n]
1454 : search_regs.end[n]));
1455}
1456
1457DEFUN ("match-beginning", Fmatch_beginning, Smatch_beginning, 1, 1, 0,
1458 "Return position of start of text matched by last search.\n\
16fdc568
BF
1459NUM specifies which parenthesized expression in the last regexp.\n\
1460 Value is nil if NUMth pair didn't match, or there were less than NUM pairs.\n\
ca1d1d23
JB
1461Zero means the entire text matched by the whole regexp or whole string.")
1462 (num)
1463 Lisp_Object num;
1464{
1465 return match_limit (num, 1);
1466}
1467
1468DEFUN ("match-end", Fmatch_end, Smatch_end, 1, 1, 0,
1469 "Return position of end of text matched by last search.\n\
1470ARG, a number, specifies which parenthesized expression in the last regexp.\n\
1471 Value is nil if ARGth pair didn't match, or there were less than ARG pairs.\n\
1472Zero means the entire text matched by the whole regexp or whole string.")
1473 (num)
1474 Lisp_Object num;
1475{
1476 return match_limit (num, 0);
1477}
1478
1479DEFUN ("match-data", Fmatch_data, Smatch_data, 0, 0, 0,
1480 "Return a list containing all info on what the last search matched.\n\
1481Element 2N is `(match-beginning N)'; element 2N + 1 is `(match-end N)'.\n\
1482All the elements are markers or nil (nil if the Nth pair didn't match)\n\
1483if the last match was on a buffer; integers or nil if a string was matched.\n\
1484Use `store-match-data' to reinstate the data in this list.")
1485 ()
1486{
4746118a 1487 Lisp_Object *data;
ca1d1d23
JB
1488 int i, len;
1489
daa37602
JB
1490 if (NILP (last_thing_searched))
1491 error ("match-data called before any match found");
1492
4746118a
JB
1493 data = (Lisp_Object *) alloca ((2 * search_regs.num_regs)
1494 * sizeof (Lisp_Object));
1495
ca1d1d23 1496 len = -1;
4746118a 1497 for (i = 0; i < search_regs.num_regs; i++)
ca1d1d23
JB
1498 {
1499 int start = search_regs.start[i];
1500 if (start >= 0)
1501 {
daa37602 1502 if (EQ (last_thing_searched, Qt))
ca1d1d23 1503 {
c235cce7
KH
1504 XSETFASTINT (data[2 * i], start);
1505 XSETFASTINT (data[2 * i + 1], search_regs.end[i]);
ca1d1d23 1506 }
0ed62dc7 1507 else if (BUFFERP (last_thing_searched))
ca1d1d23
JB
1508 {
1509 data[2 * i] = Fmake_marker ();
daa37602
JB
1510 Fset_marker (data[2 * i],
1511 make_number (start),
1512 last_thing_searched);
ca1d1d23
JB
1513 data[2 * i + 1] = Fmake_marker ();
1514 Fset_marker (data[2 * i + 1],
daa37602
JB
1515 make_number (search_regs.end[i]),
1516 last_thing_searched);
ca1d1d23 1517 }
daa37602
JB
1518 else
1519 /* last_thing_searched must always be Qt, a buffer, or Qnil. */
1520 abort ();
1521
ca1d1d23
JB
1522 len = i;
1523 }
1524 else
1525 data[2 * i] = data [2 * i + 1] = Qnil;
1526 }
1527 return Flist (2 * len + 2, data);
1528}
1529
1530
1531DEFUN ("store-match-data", Fstore_match_data, Sstore_match_data, 1, 1, 0,
1532 "Set internal data on last search match from elements of LIST.\n\
1533LIST should have been created by calling `match-data' previously.")
1534 (list)
1535 register Lisp_Object list;
1536{
1537 register int i;
1538 register Lisp_Object marker;
1539
1540 if (!CONSP (list) && !NILP (list))
b37902c8 1541 list = wrong_type_argument (Qconsp, list);
ca1d1d23 1542
daa37602
JB
1543 /* Unless we find a marker with a buffer in LIST, assume that this
1544 match data came from a string. */
1545 last_thing_searched = Qt;
1546
4746118a
JB
1547 /* Allocate registers if they don't already exist. */
1548 {
d084e942 1549 int length = XFASTINT (Flength (list)) / 2;
4746118a
JB
1550
1551 if (length > search_regs.num_regs)
1552 {
1113d9db
JB
1553 if (search_regs.num_regs == 0)
1554 {
1555 search_regs.start
1556 = (regoff_t *) xmalloc (length * sizeof (regoff_t));
1557 search_regs.end
1558 = (regoff_t *) xmalloc (length * sizeof (regoff_t));
1559 }
4746118a 1560 else
1113d9db
JB
1561 {
1562 search_regs.start
1563 = (regoff_t *) xrealloc (search_regs.start,
1564 length * sizeof (regoff_t));
1565 search_regs.end
1566 = (regoff_t *) xrealloc (search_regs.end,
1567 length * sizeof (regoff_t));
1568 }
4746118a 1569
9ac0d9e0 1570 BLOCK_INPUT;
1113d9db
JB
1571 re_set_registers (&searchbuf, &search_regs, length,
1572 search_regs.start, search_regs.end);
9ac0d9e0 1573 UNBLOCK_INPUT;
4746118a
JB
1574 }
1575 }
1576
1577 for (i = 0; i < search_regs.num_regs; i++)
ca1d1d23
JB
1578 {
1579 marker = Fcar (list);
1580 if (NILP (marker))
1581 {
1582 search_regs.start[i] = -1;
1583 list = Fcdr (list);
1584 }
1585 else
1586 {
0ed62dc7 1587 if (MARKERP (marker))
daa37602
JB
1588 {
1589 if (XMARKER (marker)->buffer == 0)
c235cce7 1590 XSETFASTINT (marker, 0);
daa37602 1591 else
a3668d92 1592 XSETBUFFER (last_thing_searched, XMARKER (marker)->buffer);
daa37602 1593 }
ca1d1d23
JB
1594
1595 CHECK_NUMBER_COERCE_MARKER (marker, 0);
1596 search_regs.start[i] = XINT (marker);
1597 list = Fcdr (list);
1598
1599 marker = Fcar (list);
0ed62dc7 1600 if (MARKERP (marker) && XMARKER (marker)->buffer == 0)
c235cce7 1601 XSETFASTINT (marker, 0);
ca1d1d23
JB
1602
1603 CHECK_NUMBER_COERCE_MARKER (marker, 0);
1604 search_regs.end[i] = XINT (marker);
1605 }
1606 list = Fcdr (list);
1607 }
1608
1609 return Qnil;
1610}
1611
1612/* Quote a string to inactivate reg-expr chars */
1613
1614DEFUN ("regexp-quote", Fregexp_quote, Sregexp_quote, 1, 1, 0,
1615 "Return a regexp string which matches exactly STRING and nothing else.")
1616 (str)
1617 Lisp_Object str;
1618{
1619 register unsigned char *in, *out, *end;
1620 register unsigned char *temp;
1621
1622 CHECK_STRING (str, 0);
1623
1624 temp = (unsigned char *) alloca (XSTRING (str)->size * 2);
1625
1626 /* Now copy the data into the new string, inserting escapes. */
1627
1628 in = XSTRING (str)->data;
1629 end = in + XSTRING (str)->size;
1630 out = temp;
1631
1632 for (; in != end; in++)
1633 {
1634 if (*in == '[' || *in == ']'
1635 || *in == '*' || *in == '.' || *in == '\\'
1636 || *in == '?' || *in == '+'
1637 || *in == '^' || *in == '$')
1638 *out++ = '\\';
1639 *out++ = *in;
1640 }
1641
1642 return make_string (temp, out - temp);
1643}
1644\f
1645syms_of_search ()
1646{
1647 register int i;
1648
1649 searchbuf.allocated = 100;
8c0e7b73 1650 searchbuf.buffer = (unsigned char *) malloc (searchbuf.allocated);
ca1d1d23
JB
1651 searchbuf.fastmap = search_fastmap;
1652
1653 Qsearch_failed = intern ("search-failed");
1654 staticpro (&Qsearch_failed);
1655 Qinvalid_regexp = intern ("invalid-regexp");
1656 staticpro (&Qinvalid_regexp);
1657
1658 Fput (Qsearch_failed, Qerror_conditions,
1659 Fcons (Qsearch_failed, Fcons (Qerror, Qnil)));
1660 Fput (Qsearch_failed, Qerror_message,
1661 build_string ("Search failed"));
1662
1663 Fput (Qinvalid_regexp, Qerror_conditions,
1664 Fcons (Qinvalid_regexp, Fcons (Qerror, Qnil)));
1665 Fput (Qinvalid_regexp, Qerror_message,
1666 build_string ("Invalid regexp"));
1667
1668 last_regexp = Qnil;
1669 staticpro (&last_regexp);
1670
daa37602
JB
1671 last_thing_searched = Qnil;
1672 staticpro (&last_thing_searched);
1673
ca1d1d23
JB
1674 defsubr (&Sstring_match);
1675 defsubr (&Slooking_at);
1676 defsubr (&Sskip_chars_forward);
1677 defsubr (&Sskip_chars_backward);
17431c60
RS
1678 defsubr (&Sskip_syntax_forward);
1679 defsubr (&Sskip_syntax_backward);
ca1d1d23
JB
1680 defsubr (&Ssearch_forward);
1681 defsubr (&Ssearch_backward);
1682 defsubr (&Sword_search_forward);
1683 defsubr (&Sword_search_backward);
1684 defsubr (&Sre_search_forward);
1685 defsubr (&Sre_search_backward);
1686 defsubr (&Sreplace_match);
1687 defsubr (&Smatch_beginning);
1688 defsubr (&Smatch_end);
1689 defsubr (&Smatch_data);
1690 defsubr (&Sstore_match_data);
1691 defsubr (&Sregexp_quote);
1692}