(Fdirectory_files): Use the new calling convention for compile_pattern.
[bpt/emacs.git] / src / search.c
CommitLineData
ca1d1d23 1/* String search routines for GNU Emacs.
3a22ee35 2 Copyright (C) 1985, 1986, 1987, 1993, 1994 Free Software Foundation, Inc.
ca1d1d23
JB
3
4This file is part of GNU Emacs.
5
6GNU Emacs is free software; you can redistribute it and/or modify
7it under the terms of the GNU General Public License as published by
8the Free Software Foundation; either version 1, or (at your option)
9any later version.
10
11GNU Emacs is distributed in the hope that it will be useful,
12but WITHOUT ANY WARRANTY; without even the implied warranty of
13MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14GNU General Public License for more details.
15
16You should have received a copy of the GNU General Public License
17along with GNU Emacs; see the file COPYING. If not, write to
18the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. */
19
20
18160b98 21#include <config.h>
ca1d1d23
JB
22#include "lisp.h"
23#include "syntax.h"
24#include "buffer.h"
9169c321 25#include "region-cache.h"
ca1d1d23 26#include "commands.h"
9ac0d9e0 27#include "blockinput.h"
4746118a 28
ca1d1d23
JB
29#include <sys/types.h>
30#include "regex.h"
31
ca1d1d23
JB
32/* We compile regexps into this buffer and then use it for searching. */
33
34struct re_pattern_buffer searchbuf;
35
36char search_fastmap[0400];
37
38/* Last regexp we compiled */
39
40Lisp_Object last_regexp;
41
4746118a
JB
42/* Every call to re_match, etc., must pass &search_regs as the regs
43 argument unless you can show it is unnecessary (i.e., if re_match
44 is certainly going to be called again before region-around-match
45 can be called).
46
47 Since the registers are now dynamically allocated, we need to make
48 sure not to refer to the Nth register before checking that it has
1113d9db
JB
49 been allocated by checking search_regs.num_regs.
50
51 The regex code keeps track of whether it has allocated the search
52 buffer using bits in searchbuf. This means that whenever you
53 compile a new pattern, it completely forgets whether it has
54 allocated any registers, and will allocate new registers the next
55 time you call a searching or matching function. Therefore, we need
56 to call re_set_registers after compiling a new pattern or after
57 setting the match registers, so that the regex functions will be
58 able to free or re-allocate it properly. */
ca1d1d23
JB
59static struct re_registers search_regs;
60
daa37602
JB
61/* The buffer in which the last search was performed, or
62 Qt if the last search was done in a string;
63 Qnil if no searching has been done yet. */
64static Lisp_Object last_thing_searched;
ca1d1d23
JB
65
66/* error condition signalled when regexp compile_pattern fails */
67
68Lisp_Object Qinvalid_regexp;
69
ca325161
RS
70static void set_search_regs ();
71
ca1d1d23
JB
72static void
73matcher_overflow ()
74{
75 error ("Stack overflow in regexp matcher");
76}
77
78#ifdef __STDC__
79#define CONST const
80#else
81#define CONST
82#endif
83
84/* Compile a regexp and signal a Lisp error if anything goes wrong. */
85
1113d9db 86compile_pattern (pattern, bufp, regp, translate)
ca1d1d23
JB
87 Lisp_Object pattern;
88 struct re_pattern_buffer *bufp;
1113d9db 89 struct re_registers *regp;
ca1d1d23
JB
90 char *translate;
91{
92 CONST char *val;
93 Lisp_Object dummy;
94
95 if (EQ (pattern, last_regexp)
96 && translate == bufp->translate)
97 return;
1113d9db 98
ca1d1d23
JB
99 last_regexp = Qnil;
100 bufp->translate = translate;
9ac0d9e0 101 BLOCK_INPUT;
b90d9e80
RS
102 val = (CONST char *) re_compile_pattern ((char *) XSTRING (pattern)->data,
103 XSTRING (pattern)->size, bufp);
9ac0d9e0 104 UNBLOCK_INPUT;
ca1d1d23
JB
105 if (val)
106 {
107 dummy = build_string (val);
108 while (1)
109 Fsignal (Qinvalid_regexp, Fcons (dummy, Qnil));
110 }
1113d9db 111
ca1d1d23 112 last_regexp = pattern;
1113d9db
JB
113
114 /* Advise the searching functions about the space we have allocated
115 for register data. */
9ac0d9e0 116 BLOCK_INPUT;
ebb9e16f
JB
117 if (regp)
118 re_set_registers (bufp, regp, regp->num_regs, regp->start, regp->end);
9ac0d9e0 119 UNBLOCK_INPUT;
1113d9db 120
ca1d1d23
JB
121 return;
122}
123
124/* Error condition used for failing searches */
125Lisp_Object Qsearch_failed;
126
127Lisp_Object
128signal_failure (arg)
129 Lisp_Object arg;
130{
131 Fsignal (Qsearch_failed, Fcons (arg, Qnil));
132 return Qnil;
133}
134\f
135DEFUN ("looking-at", Flooking_at, Slooking_at, 1, 1, 0,
e065a56e
JB
136 "Return t if text after point matches regular expression PAT.\n\
137This function modifies the match data that `match-beginning',\n\
138`match-end' and `match-data' access; save and restore the match\n\
fe99283d 139data if you want to preserve them.")
ca1d1d23
JB
140 (string)
141 Lisp_Object string;
142{
143 Lisp_Object val;
144 unsigned char *p1, *p2;
145 int s1, s2;
146 register int i;
147
148 CHECK_STRING (string, 0);
1113d9db 149 compile_pattern (string, &searchbuf, &search_regs,
ca1d1d23
JB
150 !NILP (current_buffer->case_fold_search) ? DOWNCASE_TABLE : 0);
151
152 immediate_quit = 1;
153 QUIT; /* Do a pending quit right away, to avoid paradoxical behavior */
154
155 /* Get pointers and sizes of the two strings
156 that make up the visible portion of the buffer. */
157
158 p1 = BEGV_ADDR;
159 s1 = GPT - BEGV;
160 p2 = GAP_END_ADDR;
161 s2 = ZV - GPT;
162 if (s1 < 0)
163 {
164 p2 = p1;
165 s2 = ZV - BEGV;
166 s1 = 0;
167 }
168 if (s2 < 0)
169 {
170 s1 = ZV - BEGV;
171 s2 = 0;
172 }
173
174 i = re_match_2 (&searchbuf, (char *) p1, s1, (char *) p2, s2,
175 point - BEGV, &search_regs,
176 ZV - BEGV);
177 if (i == -2)
178 matcher_overflow ();
179
180 val = (0 <= i ? Qt : Qnil);
4746118a 181 for (i = 0; i < search_regs.num_regs; i++)
ca1d1d23
JB
182 if (search_regs.start[i] >= 0)
183 {
184 search_regs.start[i] += BEGV;
185 search_regs.end[i] += BEGV;
186 }
a3668d92 187 XSETBUFFER (last_thing_searched, current_buffer);
ca1d1d23
JB
188 immediate_quit = 0;
189 return val;
190}
191
192DEFUN ("string-match", Fstring_match, Sstring_match, 2, 3, 0,
193 "Return index of start of first match for REGEXP in STRING, or nil.\n\
194If third arg START is non-nil, start search at that index in STRING.\n\
195For index of first char beyond the match, do (match-end 0).\n\
196`match-end' and `match-beginning' also give indices of substrings\n\
197matched by parenthesis constructs in the pattern.")
198 (regexp, string, start)
199 Lisp_Object regexp, string, start;
200{
201 int val;
202 int s;
203
204 CHECK_STRING (regexp, 0);
205 CHECK_STRING (string, 1);
206
207 if (NILP (start))
208 s = 0;
209 else
210 {
211 int len = XSTRING (string)->size;
212
213 CHECK_NUMBER (start, 2);
214 s = XINT (start);
215 if (s < 0 && -s <= len)
26faf9f4 216 s = len + s;
ca1d1d23
JB
217 else if (0 > s || s > len)
218 args_out_of_range (string, start);
219 }
220
1113d9db 221 compile_pattern (regexp, &searchbuf, &search_regs,
ca1d1d23
JB
222 !NILP (current_buffer->case_fold_search) ? DOWNCASE_TABLE : 0);
223 immediate_quit = 1;
224 val = re_search (&searchbuf, (char *) XSTRING (string)->data,
225 XSTRING (string)->size, s, XSTRING (string)->size - s,
226 &search_regs);
227 immediate_quit = 0;
daa37602 228 last_thing_searched = Qt;
ca1d1d23
JB
229 if (val == -2)
230 matcher_overflow ();
231 if (val < 0) return Qnil;
232 return make_number (val);
233}
e59a8453
RS
234
235/* Match REGEXP against STRING, searching all of STRING,
236 and return the index of the match, or negative on failure.
237 This does not clobber the match data. */
238
239int
240fast_string_match (regexp, string)
241 Lisp_Object regexp, string;
242{
243 int val;
244
245 compile_pattern (regexp, &searchbuf, 0, 0);
246 immediate_quit = 1;
247 val = re_search (&searchbuf, (char *) XSTRING (string)->data,
248 XSTRING (string)->size, 0, XSTRING (string)->size,
249 0);
250 immediate_quit = 0;
251 return val;
252}
ca1d1d23 253\f
9169c321
JB
254/* max and min. */
255
256static int
257max (a, b)
258 int a, b;
259{
260 return ((a > b) ? a : b);
261}
262
263static int
264min (a, b)
265 int a, b;
266{
267 return ((a < b) ? a : b);
268}
269
270\f
271/* The newline cache: remembering which sections of text have no newlines. */
272
273/* If the user has requested newline caching, make sure it's on.
274 Otherwise, make sure it's off.
275 This is our cheezy way of associating an action with the change of
276 state of a buffer-local variable. */
277static void
278newline_cache_on_off (buf)
279 struct buffer *buf;
280{
281 if (NILP (buf->cache_long_line_scans))
282 {
283 /* It should be off. */
284 if (buf->newline_cache)
285 {
286 free_region_cache (buf->newline_cache);
287 buf->newline_cache = 0;
288 }
289 }
290 else
291 {
292 /* It should be on. */
293 if (buf->newline_cache == 0)
294 buf->newline_cache = new_region_cache ();
295 }
296}
297
298\f
299/* Search for COUNT instances of the character TARGET between START and END.
300
301 If COUNT is positive, search forwards; END must be >= START.
302 If COUNT is negative, search backwards for the -COUNTth instance;
303 END must be <= START.
304 If COUNT is zero, do anything you please; run rogue, for all I care.
305
306 If END is zero, use BEGV or ZV instead, as appropriate for the
307 direction indicated by COUNT.
ffd56f97
JB
308
309 If we find COUNT instances, set *SHORTAGE to zero, and return the
5bfe95c9
RS
310 position after the COUNTth match. Note that for reverse motion
311 this is not the same as the usual convention for Emacs motion commands.
ffd56f97 312
9169c321
JB
313 If we don't find COUNT instances before reaching END, set *SHORTAGE
314 to the number of TARGETs left unfound, and return END.
ffd56f97 315
087a5f81
RS
316 If ALLOW_QUIT is non-zero, set immediate_quit. That's good to do
317 except when inside redisplay. */
318
9169c321
JB
319scan_buffer (target, start, end, count, shortage, allow_quit)
320 register int target;
321 int start, end;
322 int count;
323 int *shortage;
087a5f81 324 int allow_quit;
ca1d1d23 325{
9169c321
JB
326 struct region_cache *newline_cache;
327 int direction;
ffd56f97 328
9169c321
JB
329 if (count > 0)
330 {
331 direction = 1;
332 if (! end) end = ZV;
333 }
334 else
335 {
336 direction = -1;
337 if (! end) end = BEGV;
338 }
ffd56f97 339
9169c321
JB
340 newline_cache_on_off (current_buffer);
341 newline_cache = current_buffer->newline_cache;
ca1d1d23
JB
342
343 if (shortage != 0)
344 *shortage = 0;
345
087a5f81 346 immediate_quit = allow_quit;
ca1d1d23 347
ffd56f97 348 if (count > 0)
9169c321 349 while (start != end)
ca1d1d23 350 {
9169c321
JB
351 /* Our innermost scanning loop is very simple; it doesn't know
352 about gaps, buffer ends, or the newline cache. ceiling is
353 the position of the last character before the next such
354 obstacle --- the last character the dumb search loop should
355 examine. */
356 register int ceiling = end - 1;
357
358 /* If we're looking for a newline, consult the newline cache
359 to see where we can avoid some scanning. */
360 if (target == '\n' && newline_cache)
361 {
362 int next_change;
363 immediate_quit = 0;
364 while (region_cache_forward
365 (current_buffer, newline_cache, start, &next_change))
366 start = next_change;
cbe0db0d 367 immediate_quit = allow_quit;
9169c321
JB
368
369 /* start should never be after end. */
370 if (start >= end)
371 start = end - 1;
372
373 /* Now the text after start is an unknown region, and
374 next_change is the position of the next known region. */
375 ceiling = min (next_change - 1, ceiling);
376 }
377
378 /* The dumb loop can only scan text stored in contiguous
379 bytes. BUFFER_CEILING_OF returns the last character
380 position that is contiguous, so the ceiling is the
381 position after that. */
382 ceiling = min (BUFFER_CEILING_OF (start), ceiling);
383
384 {
385 /* The termination address of the dumb loop. */
386 register unsigned char *ceiling_addr = &FETCH_CHAR (ceiling) + 1;
387 register unsigned char *cursor = &FETCH_CHAR (start);
388 unsigned char *base = cursor;
389
390 while (cursor < ceiling_addr)
391 {
392 unsigned char *scan_start = cursor;
393
394 /* The dumb loop. */
395 while (*cursor != target && ++cursor < ceiling_addr)
396 ;
397
398 /* If we're looking for newlines, cache the fact that
399 the region from start to cursor is free of them. */
400 if (target == '\n' && newline_cache)
401 know_region_cache (current_buffer, newline_cache,
402 start + scan_start - base,
403 start + cursor - base);
404
405 /* Did we find the target character? */
406 if (cursor < ceiling_addr)
407 {
408 if (--count == 0)
409 {
410 immediate_quit = 0;
411 return (start + cursor - base + 1);
412 }
413 cursor++;
414 }
415 }
416
417 start += cursor - base;
418 }
ca1d1d23
JB
419 }
420 else
9169c321
JB
421 while (start > end)
422 {
423 /* The last character to check before the next obstacle. */
424 register int ceiling = end;
425
426 /* Consult the newline cache, if appropriate. */
427 if (target == '\n' && newline_cache)
428 {
429 int next_change;
430 immediate_quit = 0;
431 while (region_cache_backward
432 (current_buffer, newline_cache, start, &next_change))
433 start = next_change;
cbe0db0d 434 immediate_quit = allow_quit;
9169c321
JB
435
436 /* Start should never be at or before end. */
437 if (start <= end)
438 start = end + 1;
439
440 /* Now the text before start is an unknown region, and
441 next_change is the position of the next known region. */
442 ceiling = max (next_change, ceiling);
443 }
444
445 /* Stop scanning before the gap. */
446 ceiling = max (BUFFER_FLOOR_OF (start - 1), ceiling);
447
448 {
449 /* The termination address of the dumb loop. */
450 register unsigned char *ceiling_addr = &FETCH_CHAR (ceiling);
451 register unsigned char *cursor = &FETCH_CHAR (start - 1);
452 unsigned char *base = cursor;
453
454 while (cursor >= ceiling_addr)
455 {
456 unsigned char *scan_start = cursor;
457
458 while (*cursor != target && --cursor >= ceiling_addr)
459 ;
460
461 /* If we're looking for newlines, cache the fact that
462 the region from after the cursor to start is free of them. */
463 if (target == '\n' && newline_cache)
464 know_region_cache (current_buffer, newline_cache,
465 start + cursor - base,
466 start + scan_start - base);
467
468 /* Did we find the target character? */
469 if (cursor >= ceiling_addr)
470 {
471 if (++count >= 0)
472 {
473 immediate_quit = 0;
474 return (start + cursor - base);
475 }
476 cursor--;
477 }
478 }
479
480 start += cursor - base;
481 }
482 }
483
ca1d1d23
JB
484 immediate_quit = 0;
485 if (shortage != 0)
ffd56f97 486 *shortage = count * direction;
9169c321 487 return start;
ca1d1d23
JB
488}
489
63fa018d
RS
490int
491find_next_newline_no_quit (from, cnt)
492 register int from, cnt;
493{
9169c321 494 return scan_buffer ('\n', from, 0, cnt, (int *) 0, 0);
63fa018d
RS
495}
496
ca1d1d23
JB
497int
498find_next_newline (from, cnt)
499 register int from, cnt;
500{
9169c321
JB
501 return scan_buffer ('\n', from, 0, cnt, (int *) 0, 1);
502}
503
504
505/* Like find_next_newline, but returns position before the newline,
506 not after, and only search up to TO. This isn't just
507 find_next_newline (...)-1, because you might hit TO. */
508int
509find_before_next_newline (from, to, cnt)
cbe0db0d 510 int from, to, cnt;
9169c321
JB
511{
512 int shortage;
513 int pos = scan_buffer ('\n', from, to, cnt, &shortage, 1);
514
515 if (shortage == 0)
516 pos--;
517
518 return pos;
ca1d1d23
JB
519}
520\f
c1dc99a1
JB
521Lisp_Object skip_chars ();
522
ca1d1d23 523DEFUN ("skip-chars-forward", Fskip_chars_forward, Sskip_chars_forward, 1, 2, 0,
3acb9a69
RS
524 "Move point forward, stopping before a char not in STRING, or at pos LIM.\n\
525STRING is like the inside of a `[...]' in a regular expression\n\
ca1d1d23
JB
526except that `]' is never special and `\\' quotes `^', `-' or `\\'.\n\
527Thus, with arg \"a-zA-Z\", this skips letters stopping before first nonletter.\n\
c1dc99a1
JB
528With arg \"^a-zA-Z\", skips nonletters stopping before first letter.\n\
529Returns the distance traveled, either zero or positive.")
ca1d1d23
JB
530 (string, lim)
531 Lisp_Object string, lim;
532{
17431c60 533 return skip_chars (1, 0, string, lim);
ca1d1d23
JB
534}
535
536DEFUN ("skip-chars-backward", Fskip_chars_backward, Sskip_chars_backward, 1, 2, 0,
3acb9a69 537 "Move point backward, stopping after a char not in STRING, or at pos LIM.\n\
c1dc99a1
JB
538See `skip-chars-forward' for details.\n\
539Returns the distance traveled, either zero or negative.")
ca1d1d23
JB
540 (string, lim)
541 Lisp_Object string, lim;
542{
17431c60
RS
543 return skip_chars (0, 0, string, lim);
544}
545
546DEFUN ("skip-syntax-forward", Fskip_syntax_forward, Sskip_syntax_forward, 1, 2, 0,
547 "Move point forward across chars in specified syntax classes.\n\
548SYNTAX is a string of syntax code characters.\n\
549Stop before a char whose syntax is not in SYNTAX, or at position LIM.\n\
550If SYNTAX starts with ^, skip characters whose syntax is NOT in SYNTAX.\n\
551This function returns the distance traveled, either zero or positive.")
552 (syntax, lim)
553 Lisp_Object syntax, lim;
554{
555 return skip_chars (1, 1, syntax, lim);
556}
557
558DEFUN ("skip-syntax-backward", Fskip_syntax_backward, Sskip_syntax_backward, 1, 2, 0,
559 "Move point backward across chars in specified syntax classes.\n\
560SYNTAX is a string of syntax code characters.\n\
561Stop on reaching a char whose syntax is not in SYNTAX, or at position LIM.\n\
562If SYNTAX starts with ^, skip characters whose syntax is NOT in SYNTAX.\n\
563This function returns the distance traveled, either zero or negative.")
564 (syntax, lim)
565 Lisp_Object syntax, lim;
566{
567 return skip_chars (0, 1, syntax, lim);
ca1d1d23
JB
568}
569
c1dc99a1 570Lisp_Object
17431c60
RS
571skip_chars (forwardp, syntaxp, string, lim)
572 int forwardp, syntaxp;
ca1d1d23
JB
573 Lisp_Object string, lim;
574{
575 register unsigned char *p, *pend;
576 register unsigned char c;
577 unsigned char fastmap[0400];
578 int negate = 0;
579 register int i;
580
581 CHECK_STRING (string, 0);
582
583 if (NILP (lim))
a3668d92 584 XSETINT (lim, forwardp ? ZV : BEGV);
ca1d1d23
JB
585 else
586 CHECK_NUMBER_COERCE_MARKER (lim, 1);
587
ca1d1d23 588 /* In any case, don't allow scan outside bounds of buffer. */
c5241910
RS
589 /* jla turned this off, for no known reason.
590 bfox turned the ZV part on, and rms turned the
591 BEGV part back on. */
592 if (XINT (lim) > ZV)
c235cce7 593 XSETFASTINT (lim, ZV);
c5241910 594 if (XINT (lim) < BEGV)
c235cce7 595 XSETFASTINT (lim, BEGV);
ca1d1d23
JB
596
597 p = XSTRING (string)->data;
598 pend = p + XSTRING (string)->size;
599 bzero (fastmap, sizeof fastmap);
600
601 if (p != pend && *p == '^')
602 {
603 negate = 1; p++;
604 }
605
17431c60
RS
606 /* Find the characters specified and set their elements of fastmap.
607 If syntaxp, each character counts as itself.
608 Otherwise, handle backslashes and ranges specially */
ca1d1d23
JB
609
610 while (p != pend)
611 {
612 c = *p++;
17431c60
RS
613 if (syntaxp)
614 fastmap[c] = 1;
615 else
ca1d1d23 616 {
17431c60 617 if (c == '\\')
ca1d1d23 618 {
17431c60
RS
619 if (p == pend) break;
620 c = *p++;
621 }
622 if (p != pend && *p == '-')
623 {
624 p++;
625 if (p == pend) break;
626 while (c <= *p)
627 {
628 fastmap[c] = 1;
629 c++;
630 }
631 p++;
ca1d1d23 632 }
17431c60
RS
633 else
634 fastmap[c] = 1;
ca1d1d23 635 }
ca1d1d23
JB
636 }
637
9239c6c1
RS
638 if (syntaxp && fastmap['-'] != 0)
639 fastmap[' '] = 1;
640
ca1d1d23
JB
641 /* If ^ was the first character, complement the fastmap. */
642
643 if (negate)
644 for (i = 0; i < sizeof fastmap; i++)
645 fastmap[i] ^= 1;
646
c1dc99a1
JB
647 {
648 int start_point = point;
649
650 immediate_quit = 1;
17431c60 651 if (syntaxp)
c1dc99a1 652 {
17431c60
RS
653
654 if (forwardp)
655 {
656 while (point < XINT (lim)
657 && fastmap[(unsigned char) syntax_code_spec[(int) SYNTAX (FETCH_CHAR (point))]])
658 SET_PT (point + 1);
659 }
660 else
661 {
662 while (point > XINT (lim)
663 && fastmap[(unsigned char) syntax_code_spec[(int) SYNTAX (FETCH_CHAR (point - 1))]])
664 SET_PT (point - 1);
665 }
c1dc99a1
JB
666 }
667 else
668 {
17431c60
RS
669 if (forwardp)
670 {
671 while (point < XINT (lim) && fastmap[FETCH_CHAR (point)])
672 SET_PT (point + 1);
673 }
674 else
675 {
676 while (point > XINT (lim) && fastmap[FETCH_CHAR (point - 1)])
677 SET_PT (point - 1);
678 }
c1dc99a1
JB
679 }
680 immediate_quit = 0;
681
682 return make_number (point - start_point);
683 }
ca1d1d23
JB
684}
685\f
686/* Subroutines of Lisp buffer search functions. */
687
688static Lisp_Object
689search_command (string, bound, noerror, count, direction, RE)
690 Lisp_Object string, bound, noerror, count;
691 int direction;
692 int RE;
693{
694 register int np;
695 int lim;
696 int n = direction;
697
698 if (!NILP (count))
699 {
700 CHECK_NUMBER (count, 3);
701 n *= XINT (count);
702 }
703
704 CHECK_STRING (string, 0);
705 if (NILP (bound))
706 lim = n > 0 ? ZV : BEGV;
707 else
708 {
709 CHECK_NUMBER_COERCE_MARKER (bound, 1);
710 lim = XINT (bound);
711 if (n > 0 ? lim < point : lim > point)
712 error ("Invalid search bound (wrong side of point)");
713 if (lim > ZV)
714 lim = ZV;
715 if (lim < BEGV)
716 lim = BEGV;
717 }
718
719 np = search_buffer (string, point, lim, n, RE,
720 (!NILP (current_buffer->case_fold_search)
721 ? XSTRING (current_buffer->case_canon_table)->data : 0),
722 (!NILP (current_buffer->case_fold_search)
723 ? XSTRING (current_buffer->case_eqv_table)->data : 0));
724 if (np <= 0)
725 {
726 if (NILP (noerror))
727 return signal_failure (string);
728 if (!EQ (noerror, Qt))
729 {
730 if (lim < BEGV || lim > ZV)
731 abort ();
a5f217b8
RS
732 SET_PT (lim);
733 return Qnil;
734#if 0 /* This would be clean, but maybe programs depend on
735 a value of nil here. */
481399bf 736 np = lim;
a5f217b8 737#endif
ca1d1d23 738 }
481399bf
RS
739 else
740 return Qnil;
ca1d1d23
JB
741 }
742
743 if (np < BEGV || np > ZV)
744 abort ();
745
746 SET_PT (np);
747
748 return make_number (np);
749}
750\f
b6d6a51c
KH
751static int
752trivial_regexp_p (regexp)
753 Lisp_Object regexp;
754{
755 int len = XSTRING (regexp)->size;
756 unsigned char *s = XSTRING (regexp)->data;
757 unsigned char c;
758 while (--len >= 0)
759 {
760 switch (*s++)
761 {
762 case '.': case '*': case '+': case '?': case '[': case '^': case '$':
763 return 0;
764 case '\\':
765 if (--len < 0)
766 return 0;
767 switch (*s++)
768 {
769 case '|': case '(': case ')': case '`': case '\'': case 'b':
770 case 'B': case '<': case '>': case 'w': case 'W': case 's':
771 case 'S': case '1': case '2': case '3': case '4': case '5':
772 case '6': case '7': case '8': case '9':
773 return 0;
774 }
775 }
776 }
777 return 1;
778}
779
ca325161 780/* Search for the n'th occurrence of STRING in the current buffer,
ca1d1d23
JB
781 starting at position POS and stopping at position LIM,
782 treating PAT as a literal string if RE is false or as
783 a regular expression if RE is true.
784
785 If N is positive, searching is forward and LIM must be greater than POS.
786 If N is negative, searching is backward and LIM must be less than POS.
787
788 Returns -x if only N-x occurrences found (x > 0),
789 or else the position at the beginning of the Nth occurrence
790 (if searching backward) or the end (if searching forward). */
791
792search_buffer (string, pos, lim, n, RE, trt, inverse_trt)
793 Lisp_Object string;
794 int pos;
795 int lim;
796 int n;
797 int RE;
798 register unsigned char *trt;
799 register unsigned char *inverse_trt;
800{
801 int len = XSTRING (string)->size;
802 unsigned char *base_pat = XSTRING (string)->data;
803 register int *BM_tab;
804 int *BM_tab_base;
805 register int direction = ((n > 0) ? 1 : -1);
806 register int dirlen;
807 int infinity, limit, k, stride_for_teases;
808 register unsigned char *pat, *cursor, *p_limit;
809 register int i, j;
810 unsigned char *p1, *p2;
811 int s1, s2;
812
813 /* Null string is found at starting position. */
3f57a499 814 if (len == 0)
ca325161
RS
815 {
816 set_search_regs (pos, 0);
817 return pos;
818 }
3f57a499
RS
819
820 /* Searching 0 times means don't move. */
821 if (n == 0)
ca1d1d23
JB
822 return pos;
823
b6d6a51c 824 if (RE && !trivial_regexp_p (string))
ca1d1d23 825 {
b6d6a51c 826 compile_pattern (string, &searchbuf, &search_regs, (char *) trt);
ca1d1d23 827
ca1d1d23
JB
828 immediate_quit = 1; /* Quit immediately if user types ^G,
829 because letting this function finish
830 can take too long. */
831 QUIT; /* Do a pending quit right away,
832 to avoid paradoxical behavior */
833 /* Get pointers and sizes of the two strings
834 that make up the visible portion of the buffer. */
835
836 p1 = BEGV_ADDR;
837 s1 = GPT - BEGV;
838 p2 = GAP_END_ADDR;
839 s2 = ZV - GPT;
840 if (s1 < 0)
841 {
842 p2 = p1;
843 s2 = ZV - BEGV;
844 s1 = 0;
845 }
846 if (s2 < 0)
847 {
848 s1 = ZV - BEGV;
849 s2 = 0;
850 }
851 while (n < 0)
852 {
42db823b 853 int val;
42db823b
RS
854 val = re_search_2 (&searchbuf, (char *) p1, s1, (char *) p2, s2,
855 pos - BEGV, lim - pos, &search_regs,
856 /* Don't allow match past current point */
857 pos - BEGV);
ca1d1d23 858 if (val == -2)
b6d6a51c
KH
859 {
860 matcher_overflow ();
861 }
ca1d1d23
JB
862 if (val >= 0)
863 {
864 j = BEGV;
4746118a 865 for (i = 0; i < search_regs.num_regs; i++)
ca1d1d23
JB
866 if (search_regs.start[i] >= 0)
867 {
868 search_regs.start[i] += j;
869 search_regs.end[i] += j;
870 }
a3668d92 871 XSETBUFFER (last_thing_searched, current_buffer);
ca1d1d23
JB
872 /* Set pos to the new position. */
873 pos = search_regs.start[0];
874 }
875 else
876 {
877 immediate_quit = 0;
878 return (n);
879 }
880 n++;
881 }
882 while (n > 0)
883 {
42db823b 884 int val;
42db823b
RS
885 val = re_search_2 (&searchbuf, (char *) p1, s1, (char *) p2, s2,
886 pos - BEGV, lim - pos, &search_regs,
887 lim - BEGV);
ca1d1d23 888 if (val == -2)
b6d6a51c
KH
889 {
890 matcher_overflow ();
891 }
ca1d1d23
JB
892 if (val >= 0)
893 {
894 j = BEGV;
4746118a 895 for (i = 0; i < search_regs.num_regs; i++)
ca1d1d23
JB
896 if (search_regs.start[i] >= 0)
897 {
898 search_regs.start[i] += j;
899 search_regs.end[i] += j;
900 }
a3668d92 901 XSETBUFFER (last_thing_searched, current_buffer);
ca1d1d23
JB
902 pos = search_regs.end[0];
903 }
904 else
905 {
906 immediate_quit = 0;
907 return (0 - n);
908 }
909 n--;
910 }
911 immediate_quit = 0;
912 return (pos);
913 }
914 else /* non-RE case */
915 {
916#ifdef C_ALLOCA
917 int BM_tab_space[0400];
918 BM_tab = &BM_tab_space[0];
919#else
920 BM_tab = (int *) alloca (0400 * sizeof (int));
921#endif
b6d6a51c
KH
922 {
923 unsigned char *patbuf = (unsigned char *) alloca (len);
924 pat = patbuf;
925 while (--len >= 0)
926 {
927 /* If we got here and the RE flag is set, it's because we're
928 dealing with a regexp known to be trivial, so the backslash
929 just quotes the next character. */
930 if (RE && *base_pat == '\\')
931 {
932 len--;
933 base_pat++;
934 }
935 *pat++ = (trt ? trt[*base_pat++] : *base_pat++);
936 }
937 len = pat - patbuf;
938 pat = base_pat = patbuf;
939 }
ca1d1d23
JB
940 /* The general approach is that we are going to maintain that we know */
941 /* the first (closest to the present position, in whatever direction */
942 /* we're searching) character that could possibly be the last */
943 /* (furthest from present position) character of a valid match. We */
944 /* advance the state of our knowledge by looking at that character */
945 /* and seeing whether it indeed matches the last character of the */
946 /* pattern. If it does, we take a closer look. If it does not, we */
947 /* move our pointer (to putative last characters) as far as is */
948 /* logically possible. This amount of movement, which I call a */
949 /* stride, will be the length of the pattern if the actual character */
950 /* appears nowhere in the pattern, otherwise it will be the distance */
951 /* from the last occurrence of that character to the end of the */
952 /* pattern. */
953 /* As a coding trick, an enormous stride is coded into the table for */
954 /* characters that match the last character. This allows use of only */
955 /* a single test, a test for having gone past the end of the */
956 /* permissible match region, to test for both possible matches (when */
957 /* the stride goes past the end immediately) and failure to */
958 /* match (where you get nudged past the end one stride at a time). */
959
960 /* Here we make a "mickey mouse" BM table. The stride of the search */
961 /* is determined only by the last character of the putative match. */
962 /* If that character does not match, we will stride the proper */
963 /* distance to propose a match that superimposes it on the last */
964 /* instance of a character that matches it (per trt), or misses */
965 /* it entirely if there is none. */
966
967 dirlen = len * direction;
968 infinity = dirlen - (lim + pos + len + len) * direction;
969 if (direction < 0)
970 pat = (base_pat += len - 1);
971 BM_tab_base = BM_tab;
972 BM_tab += 0400;
973 j = dirlen; /* to get it in a register */
974 /* A character that does not appear in the pattern induces a */
975 /* stride equal to the pattern length. */
976 while (BM_tab_base != BM_tab)
977 {
978 *--BM_tab = j;
979 *--BM_tab = j;
980 *--BM_tab = j;
981 *--BM_tab = j;
982 }
983 i = 0;
984 while (i != infinity)
985 {
986 j = pat[i]; i += direction;
987 if (i == dirlen) i = infinity;
988 if ((int) trt)
989 {
990 k = (j = trt[j]);
991 if (i == infinity)
992 stride_for_teases = BM_tab[j];
993 BM_tab[j] = dirlen - i;
994 /* A translation table is accompanied by its inverse -- see */
995 /* comment following downcase_table for details */
996 while ((j = inverse_trt[j]) != k)
997 BM_tab[j] = dirlen - i;
998 }
999 else
1000 {
1001 if (i == infinity)
1002 stride_for_teases = BM_tab[j];
1003 BM_tab[j] = dirlen - i;
1004 }
1005 /* stride_for_teases tells how much to stride if we get a */
1006 /* match on the far character but are subsequently */
1007 /* disappointed, by recording what the stride would have been */
1008 /* for that character if the last character had been */
1009 /* different. */
1010 }
1011 infinity = dirlen - infinity;
1012 pos += dirlen - ((direction > 0) ? direction : 0);
1013 /* loop invariant - pos points at where last char (first char if reverse)
1014 of pattern would align in a possible match. */
1015 while (n != 0)
1016 {
b2c71fb4
KH
1017 /* It's been reported that some (broken) compiler thinks that
1018 Boolean expressions in an arithmetic context are unsigned.
1019 Using an explicit ?1:0 prevents this. */
1020 if ((lim - pos - ((direction > 0) ? 1 : 0)) * direction < 0)
ca1d1d23
JB
1021 return (n * (0 - direction));
1022 /* First we do the part we can by pointers (maybe nothing) */
1023 QUIT;
1024 pat = base_pat;
1025 limit = pos - dirlen + direction;
1026 limit = ((direction > 0)
1027 ? BUFFER_CEILING_OF (limit)
1028 : BUFFER_FLOOR_OF (limit));
1029 /* LIMIT is now the last (not beyond-last!) value
1030 POS can take on without hitting edge of buffer or the gap. */
1031 limit = ((direction > 0)
1032 ? min (lim - 1, min (limit, pos + 20000))
1033 : max (lim, max (limit, pos - 20000)));
1034 if ((limit - pos) * direction > 20)
1035 {
1036 p_limit = &FETCH_CHAR (limit);
1037 p2 = (cursor = &FETCH_CHAR (pos));
1038 /* In this loop, pos + cursor - p2 is the surrogate for pos */
1039 while (1) /* use one cursor setting as long as i can */
1040 {
1041 if (direction > 0) /* worth duplicating */
1042 {
1043 /* Use signed comparison if appropriate
1044 to make cursor+infinity sure to be > p_limit.
1045 Assuming that the buffer lies in a range of addresses
1046 that are all "positive" (as ints) or all "negative",
1047 either kind of comparison will work as long
1048 as we don't step by infinity. So pick the kind
1049 that works when we do step by infinity. */
1050 if ((int) (p_limit + infinity) > (int) p_limit)
1051 while ((int) cursor <= (int) p_limit)
1052 cursor += BM_tab[*cursor];
1053 else
1054 while ((unsigned int) cursor <= (unsigned int) p_limit)
1055 cursor += BM_tab[*cursor];
1056 }
1057 else
1058 {
1059 if ((int) (p_limit + infinity) < (int) p_limit)
1060 while ((int) cursor >= (int) p_limit)
1061 cursor += BM_tab[*cursor];
1062 else
1063 while ((unsigned int) cursor >= (unsigned int) p_limit)
1064 cursor += BM_tab[*cursor];
1065 }
1066/* If you are here, cursor is beyond the end of the searched region. */
1067 /* This can happen if you match on the far character of the pattern, */
1068 /* because the "stride" of that character is infinity, a number able */
1069 /* to throw you well beyond the end of the search. It can also */
1070 /* happen if you fail to match within the permitted region and would */
1071 /* otherwise try a character beyond that region */
1072 if ((cursor - p_limit) * direction <= len)
1073 break; /* a small overrun is genuine */
1074 cursor -= infinity; /* large overrun = hit */
1075 i = dirlen - direction;
1076 if ((int) trt)
1077 {
1078 while ((i -= direction) + direction != 0)
1079 if (pat[i] != trt[*(cursor -= direction)])
1080 break;
1081 }
1082 else
1083 {
1084 while ((i -= direction) + direction != 0)
1085 if (pat[i] != *(cursor -= direction))
1086 break;
1087 }
1088 cursor += dirlen - i - direction; /* fix cursor */
1089 if (i + direction == 0)
1090 {
1091 cursor -= direction;
1113d9db 1092
ca325161
RS
1093 set_search_regs (pos + cursor - p2 + ((direction > 0)
1094 ? 1 - len : 0),
1095 len);
1096
ca1d1d23
JB
1097 if ((n -= direction) != 0)
1098 cursor += dirlen; /* to resume search */
1099 else
1100 return ((direction > 0)
1101 ? search_regs.end[0] : search_regs.start[0]);
1102 }
1103 else
1104 cursor += stride_for_teases; /* <sigh> we lose - */
1105 }
1106 pos += cursor - p2;
1107 }
1108 else
1109 /* Now we'll pick up a clump that has to be done the hard */
1110 /* way because it covers a discontinuity */
1111 {
1112 limit = ((direction > 0)
1113 ? BUFFER_CEILING_OF (pos - dirlen + 1)
1114 : BUFFER_FLOOR_OF (pos - dirlen - 1));
1115 limit = ((direction > 0)
1116 ? min (limit + len, lim - 1)
1117 : max (limit - len, lim));
1118 /* LIMIT is now the last value POS can have
1119 and still be valid for a possible match. */
1120 while (1)
1121 {
1122 /* This loop can be coded for space rather than */
1123 /* speed because it will usually run only once. */
1124 /* (the reach is at most len + 21, and typically */
1125 /* does not exceed len) */
1126 while ((limit - pos) * direction >= 0)
1127 pos += BM_tab[FETCH_CHAR(pos)];
1128 /* now run the same tests to distinguish going off the */
eb8c3be9 1129 /* end, a match or a phony match. */
ca1d1d23
JB
1130 if ((pos - limit) * direction <= len)
1131 break; /* ran off the end */
1132 /* Found what might be a match.
1133 Set POS back to last (first if reverse) char pos. */
1134 pos -= infinity;
1135 i = dirlen - direction;
1136 while ((i -= direction) + direction != 0)
1137 {
1138 pos -= direction;
1139 if (pat[i] != (((int) trt)
1140 ? trt[FETCH_CHAR(pos)]
1141 : FETCH_CHAR (pos)))
1142 break;
1143 }
1144 /* Above loop has moved POS part or all the way
1145 back to the first char pos (last char pos if reverse).
1146 Set it once again at the last (first if reverse) char. */
1147 pos += dirlen - i- direction;
1148 if (i + direction == 0)
1149 {
1150 pos -= direction;
1113d9db 1151
ca325161
RS
1152 set_search_regs (pos + ((direction > 0) ? 1 - len : 0),
1153 len);
1154
ca1d1d23
JB
1155 if ((n -= direction) != 0)
1156 pos += dirlen; /* to resume search */
1157 else
1158 return ((direction > 0)
1159 ? search_regs.end[0] : search_regs.start[0]);
1160 }
1161 else
1162 pos += stride_for_teases;
1163 }
1164 }
1165 /* We have done one clump. Can we continue? */
1166 if ((lim - pos) * direction < 0)
1167 return ((0 - n) * direction);
1168 }
1169 return pos;
1170 }
1171}
ca325161
RS
1172
1173/* Record beginning BEG and end BEG + LEN
1174 for a match just found in the current buffer. */
1175
1176static void
1177set_search_regs (beg, len)
1178 int beg, len;
1179{
1180 /* Make sure we have registers in which to store
1181 the match position. */
1182 if (search_regs.num_regs == 0)
1183 {
1184 regoff_t *starts, *ends;
1185
1186 starts = (regoff_t *) xmalloc (2 * sizeof (regoff_t));
1187 ends = (regoff_t *) xmalloc (2 * sizeof (regoff_t));
1188 BLOCK_INPUT;
1189 re_set_registers (&searchbuf,
1190 &search_regs,
1191 2, starts, ends);
1192 UNBLOCK_INPUT;
1193 }
1194
1195 search_regs.start[0] = beg;
1196 search_regs.end[0] = beg + len;
a3668d92 1197 XSETBUFFER (last_thing_searched, current_buffer);
ca325161 1198}
ca1d1d23
JB
1199\f
1200/* Given a string of words separated by word delimiters,
1201 compute a regexp that matches those exact words
1202 separated by arbitrary punctuation. */
1203
1204static Lisp_Object
1205wordify (string)
1206 Lisp_Object string;
1207{
1208 register unsigned char *p, *o;
1209 register int i, len, punct_count = 0, word_count = 0;
1210 Lisp_Object val;
1211
1212 CHECK_STRING (string, 0);
1213 p = XSTRING (string)->data;
1214 len = XSTRING (string)->size;
1215
1216 for (i = 0; i < len; i++)
1217 if (SYNTAX (p[i]) != Sword)
1218 {
1219 punct_count++;
1220 if (i > 0 && SYNTAX (p[i-1]) == Sword) word_count++;
1221 }
1222 if (SYNTAX (p[len-1]) == Sword) word_count++;
1223 if (!word_count) return build_string ("");
1224
1225 val = make_string (p, len - punct_count + 5 * (word_count - 1) + 4);
1226
1227 o = XSTRING (val)->data;
1228 *o++ = '\\';
1229 *o++ = 'b';
1230
1231 for (i = 0; i < len; i++)
1232 if (SYNTAX (p[i]) == Sword)
1233 *o++ = p[i];
1234 else if (i > 0 && SYNTAX (p[i-1]) == Sword && --word_count)
1235 {
1236 *o++ = '\\';
1237 *o++ = 'W';
1238 *o++ = '\\';
1239 *o++ = 'W';
1240 *o++ = '*';
1241 }
1242
1243 *o++ = '\\';
1244 *o++ = 'b';
1245
1246 return val;
1247}
1248\f
1249DEFUN ("search-backward", Fsearch_backward, Ssearch_backward, 1, 4,
1250 "sSearch backward: ",
1251 "Search backward from point for STRING.\n\
1252Set point to the beginning of the occurrence found, and return point.\n\
1253An optional second argument bounds the search; it is a buffer position.\n\
1254The match found must not extend before that position.\n\
1255Optional third argument, if t, means if fail just return nil (no error).\n\
1256 If not nil and not t, position at limit of search and return nil.\n\
1257Optional fourth argument is repeat count--search for successive occurrences.\n\
1258See also the functions `match-beginning', `match-end' and `replace-match'.")
1259 (string, bound, noerror, count)
1260 Lisp_Object string, bound, noerror, count;
1261{
1262 return search_command (string, bound, noerror, count, -1, 0);
1263}
1264
1265DEFUN ("search-forward", Fsearch_forward, Ssearch_forward, 1, 4, "sSearch: ",
1266 "Search forward from point for STRING.\n\
1267Set point to the end of the occurrence found, and return point.\n\
1268An optional second argument bounds the search; it is a buffer position.\n\
1269The match found must not extend after that position. nil is equivalent\n\
1270 to (point-max).\n\
1271Optional third argument, if t, means if fail just return nil (no error).\n\
1272 If not nil and not t, move to limit of search and return nil.\n\
1273Optional fourth argument is repeat count--search for successive occurrences.\n\
1274See also the functions `match-beginning', `match-end' and `replace-match'.")
1275 (string, bound, noerror, count)
1276 Lisp_Object string, bound, noerror, count;
1277{
1278 return search_command (string, bound, noerror, count, 1, 0);
1279}
1280
1281DEFUN ("word-search-backward", Fword_search_backward, Sword_search_backward, 1, 4,
1282 "sWord search backward: ",
1283 "Search backward from point for STRING, ignoring differences in punctuation.\n\
1284Set point to the beginning of the occurrence found, and return point.\n\
1285An optional second argument bounds the search; it is a buffer position.\n\
1286The match found must not extend before that position.\n\
1287Optional third argument, if t, means if fail just return nil (no error).\n\
1288 If not nil and not t, move to limit of search and return nil.\n\
1289Optional fourth argument is repeat count--search for successive occurrences.")
1290 (string, bound, noerror, count)
1291 Lisp_Object string, bound, noerror, count;
1292{
1293 return search_command (wordify (string), bound, noerror, count, -1, 1);
1294}
1295
1296DEFUN ("word-search-forward", Fword_search_forward, Sword_search_forward, 1, 4,
1297 "sWord search: ",
1298 "Search forward from point for STRING, ignoring differences in punctuation.\n\
1299Set point to the end of the occurrence found, and return point.\n\
1300An optional second argument bounds the search; it is a buffer position.\n\
1301The match found must not extend after that position.\n\
1302Optional third argument, if t, means if fail just return nil (no error).\n\
1303 If not nil and not t, move to limit of search and return nil.\n\
1304Optional fourth argument is repeat count--search for successive occurrences.")
1305 (string, bound, noerror, count)
1306 Lisp_Object string, bound, noerror, count;
1307{
1308 return search_command (wordify (string), bound, noerror, count, 1, 1);
1309}
1310
1311DEFUN ("re-search-backward", Fre_search_backward, Sre_search_backward, 1, 4,
1312 "sRE search backward: ",
1313 "Search backward from point for match for regular expression REGEXP.\n\
1314Set point to the beginning of the match, and return point.\n\
1315The match found is the one starting last in the buffer\n\
19c0a730 1316and yet ending before the origin of the search.\n\
ca1d1d23
JB
1317An optional second argument bounds the search; it is a buffer position.\n\
1318The match found must start at or after that position.\n\
1319Optional third argument, if t, means if fail just return nil (no error).\n\
1320 If not nil and not t, move to limit of search and return nil.\n\
1321Optional fourth argument is repeat count--search for successive occurrences.\n\
1322See also the functions `match-beginning', `match-end' and `replace-match'.")
19c0a730
KH
1323 (regexp, bound, noerror, count)
1324 Lisp_Object regexp, bound, noerror, count;
ca1d1d23 1325{
19c0a730 1326 return search_command (regexp, bound, noerror, count, -1, 1);
ca1d1d23
JB
1327}
1328
1329DEFUN ("re-search-forward", Fre_search_forward, Sre_search_forward, 1, 4,
1330 "sRE search: ",
1331 "Search forward from point for regular expression REGEXP.\n\
1332Set point to the end of the occurrence found, and return point.\n\
1333An optional second argument bounds the search; it is a buffer position.\n\
1334The match found must not extend after that position.\n\
1335Optional third argument, if t, means if fail just return nil (no error).\n\
1336 If not nil and not t, move to limit of search and return nil.\n\
1337Optional fourth argument is repeat count--search for successive occurrences.\n\
1338See also the functions `match-beginning', `match-end' and `replace-match'.")
19c0a730
KH
1339 (regexp, bound, noerror, count)
1340 Lisp_Object regexp, bound, noerror, count;
ca1d1d23 1341{
19c0a730 1342 return search_command (regexp, bound, noerror, count, 1, 1);
ca1d1d23
JB
1343}
1344\f
080c45fd 1345DEFUN ("replace-match", Freplace_match, Sreplace_match, 1, 4, 0,
ca1d1d23
JB
1346 "Replace text matched by last search with NEWTEXT.\n\
1347If second arg FIXEDCASE is non-nil, do not alter case of replacement text.\n\
5b9cf4b2
RS
1348Otherwise maybe capitalize the whole text, or maybe just word initials,\n\
1349based on the replaced text.\n\
1350If the replaced text has only capital letters\n\
1351and has at least one multiletter word, convert NEWTEXT to all caps.\n\
1352If the replaced text has at least one word starting with a capital letter,\n\
1353then capitalize each word in NEWTEXT.\n\n\
ca1d1d23
JB
1354If third arg LITERAL is non-nil, insert NEWTEXT literally.\n\
1355Otherwise treat `\\' as special:\n\
1356 `\\&' in NEWTEXT means substitute original matched text.\n\
1357 `\\N' means substitute what matched the Nth `\\(...\\)'.\n\
1358 If Nth parens didn't match, substitute nothing.\n\
1359 `\\\\' means insert one `\\'.\n\
1113d9db 1360FIXEDCASE and LITERAL are optional arguments.\n\
080c45fd
RS
1361Leaves point at end of replacement text.\n\
1362\n\
1363The optional fourth argument STRING can be a string to modify.\n\
1364In that case, this function creates and returns a new string\n\
1365which is made by replacing the part of STRING that was matched.")
1366 (newtext, fixedcase, literal, string)
1367 Lisp_Object newtext, fixedcase, literal, string;
ca1d1d23
JB
1368{
1369 enum { nochange, all_caps, cap_initial } case_action;
1370 register int pos, last;
1371 int some_multiletter_word;
97832bd0 1372 int some_lowercase;
73dc8771 1373 int some_uppercase;
208767c3 1374 int some_nonuppercase_initial;
ca1d1d23
JB
1375 register int c, prevc;
1376 int inslen;
1377
16fdc568 1378 CHECK_STRING (newtext, 0);
ca1d1d23 1379
080c45fd
RS
1380 if (! NILP (string))
1381 CHECK_STRING (string, 4);
1382
ca1d1d23
JB
1383 case_action = nochange; /* We tried an initialization */
1384 /* but some C compilers blew it */
4746118a
JB
1385
1386 if (search_regs.num_regs <= 0)
1387 error ("replace-match called before any match found");
1388
080c45fd
RS
1389 if (NILP (string))
1390 {
1391 if (search_regs.start[0] < BEGV
1392 || search_regs.start[0] > search_regs.end[0]
1393 || search_regs.end[0] > ZV)
1394 args_out_of_range (make_number (search_regs.start[0]),
1395 make_number (search_regs.end[0]));
1396 }
1397 else
1398 {
1399 if (search_regs.start[0] < 0
1400 || search_regs.start[0] > search_regs.end[0]
1401 || search_regs.end[0] > XSTRING (string)->size)
1402 args_out_of_range (make_number (search_regs.start[0]),
1403 make_number (search_regs.end[0]));
1404 }
ca1d1d23
JB
1405
1406 if (NILP (fixedcase))
1407 {
1408 /* Decide how to casify by examining the matched text. */
1409
1410 last = search_regs.end[0];
1411 prevc = '\n';
1412 case_action = all_caps;
1413
1414 /* some_multiletter_word is set nonzero if any original word
1415 is more than one letter long. */
1416 some_multiletter_word = 0;
97832bd0 1417 some_lowercase = 0;
208767c3 1418 some_nonuppercase_initial = 0;
73dc8771 1419 some_uppercase = 0;
ca1d1d23
JB
1420
1421 for (pos = search_regs.start[0]; pos < last; pos++)
1422 {
080c45fd
RS
1423 if (NILP (string))
1424 c = FETCH_CHAR (pos);
1425 else
1426 c = XSTRING (string)->data[pos];
1427
ca1d1d23
JB
1428 if (LOWERCASEP (c))
1429 {
1430 /* Cannot be all caps if any original char is lower case */
1431
97832bd0 1432 some_lowercase = 1;
ca1d1d23 1433 if (SYNTAX (prevc) != Sword)
208767c3 1434 some_nonuppercase_initial = 1;
ca1d1d23
JB
1435 else
1436 some_multiletter_word = 1;
1437 }
1438 else if (!NOCASEP (c))
1439 {
73dc8771 1440 some_uppercase = 1;
97832bd0 1441 if (SYNTAX (prevc) != Sword)
c4d460ce 1442 ;
97832bd0 1443 else
ca1d1d23
JB
1444 some_multiletter_word = 1;
1445 }
208767c3
RS
1446 else
1447 {
1448 /* If the initial is a caseless word constituent,
1449 treat that like a lowercase initial. */
1450 if (SYNTAX (prevc) != Sword)
1451 some_nonuppercase_initial = 1;
1452 }
ca1d1d23
JB
1453
1454 prevc = c;
1455 }
1456
97832bd0
RS
1457 /* Convert to all caps if the old text is all caps
1458 and has at least one multiletter word. */
1459 if (! some_lowercase && some_multiletter_word)
1460 case_action = all_caps;
c4d460ce 1461 /* Capitalize each word, if the old text has all capitalized words. */
208767c3 1462 else if (!some_nonuppercase_initial && some_multiletter_word)
ca1d1d23 1463 case_action = cap_initial;
208767c3 1464 else if (!some_nonuppercase_initial && some_uppercase)
73dc8771
KH
1465 /* Should x -> yz, operating on X, give Yz or YZ?
1466 We'll assume the latter. */
1467 case_action = all_caps;
97832bd0
RS
1468 else
1469 case_action = nochange;
ca1d1d23
JB
1470 }
1471
080c45fd
RS
1472 /* Do replacement in a string. */
1473 if (!NILP (string))
1474 {
1475 Lisp_Object before, after;
1476
1477 before = Fsubstring (string, make_number (0),
1478 make_number (search_regs.start[0]));
1479 after = Fsubstring (string, make_number (search_regs.end[0]), Qnil);
1480
1481 /* Do case substitution into NEWTEXT if desired. */
1482 if (NILP (literal))
1483 {
1484 int lastpos = -1;
1485 /* We build up the substituted string in ACCUM. */
1486 Lisp_Object accum;
1487 Lisp_Object middle;
1488
1489 accum = Qnil;
1490
1491 for (pos = 0; pos < XSTRING (newtext)->size; pos++)
1492 {
1493 int substart = -1;
1494 int subend;
1495
1496 c = XSTRING (newtext)->data[pos];
1497 if (c == '\\')
1498 {
1499 c = XSTRING (newtext)->data[++pos];
1500 if (c == '&')
1501 {
1502 substart = search_regs.start[0];
1503 subend = search_regs.end[0];
1504 }
1505 else if (c >= '1' && c <= '9' && c <= search_regs.num_regs + '0')
1506 {
1507 if (search_regs.start[c - '0'] >= 1)
1508 {
1509 substart = search_regs.start[c - '0'];
1510 subend = search_regs.end[c - '0'];
1511 }
1512 }
1513 }
1514 if (substart >= 0)
1515 {
1516 if (pos - 1 != lastpos + 1)
1517 middle = Fsubstring (newtext, lastpos + 1, pos - 1);
1518 else
1519 middle = Qnil;
1520 accum = concat3 (accum, middle,
1521 Fsubstring (string, make_number (substart),
1522 make_number (subend)));
1523 lastpos = pos;
1524 }
1525 }
1526
1527 if (pos != lastpos + 1)
1528 middle = Fsubstring (newtext, lastpos + 1, pos);
1529 else
1530 middle = Qnil;
1531
1532 newtext = concat2 (accum, middle);
1533 }
1534
1535 if (case_action == all_caps)
1536 newtext = Fupcase (newtext);
1537 else if (case_action == cap_initial)
1538 newtext = upcase_initials (newtext);
1539
1540 return concat3 (before, newtext, after);
1541 }
1542
9a76659d
JB
1543 /* We insert the replacement text before the old text, and then
1544 delete the original text. This means that markers at the
1545 beginning or end of the original will float to the corresponding
1546 position in the replacement. */
1547 SET_PT (search_regs.start[0]);
ca1d1d23 1548 if (!NILP (literal))
16fdc568 1549 Finsert_and_inherit (1, &newtext);
ca1d1d23
JB
1550 else
1551 {
1552 struct gcpro gcpro1;
16fdc568 1553 GCPRO1 (newtext);
ca1d1d23 1554
16fdc568 1555 for (pos = 0; pos < XSTRING (newtext)->size; pos++)
ca1d1d23 1556 {
9a76659d
JB
1557 int offset = point - search_regs.start[0];
1558
16fdc568 1559 c = XSTRING (newtext)->data[pos];
ca1d1d23
JB
1560 if (c == '\\')
1561 {
16fdc568 1562 c = XSTRING (newtext)->data[++pos];
ca1d1d23 1563 if (c == '&')
9a76659d
JB
1564 Finsert_buffer_substring
1565 (Fcurrent_buffer (),
1566 make_number (search_regs.start[0] + offset),
1567 make_number (search_regs.end[0] + offset));
78445046 1568 else if (c >= '1' && c <= '9' && c <= search_regs.num_regs + '0')
ca1d1d23
JB
1569 {
1570 if (search_regs.start[c - '0'] >= 1)
9a76659d
JB
1571 Finsert_buffer_substring
1572 (Fcurrent_buffer (),
1573 make_number (search_regs.start[c - '0'] + offset),
1574 make_number (search_regs.end[c - '0'] + offset));
ca1d1d23
JB
1575 }
1576 else
1577 insert_char (c);
1578 }
1579 else
1580 insert_char (c);
1581 }
1582 UNGCPRO;
1583 }
1584
9a76659d
JB
1585 inslen = point - (search_regs.start[0]);
1586 del_range (search_regs.start[0] + inslen, search_regs.end[0] + inslen);
ca1d1d23
JB
1587
1588 if (case_action == all_caps)
1589 Fupcase_region (make_number (point - inslen), make_number (point));
1590 else if (case_action == cap_initial)
1591 upcase_initials_region (make_number (point - inslen), make_number (point));
1592 return Qnil;
1593}
1594\f
1595static Lisp_Object
1596match_limit (num, beginningp)
1597 Lisp_Object num;
1598 int beginningp;
1599{
1600 register int n;
1601
1602 CHECK_NUMBER (num, 0);
1603 n = XINT (num);
4746118a
JB
1604 if (n < 0 || n >= search_regs.num_regs)
1605 args_out_of_range (num, make_number (search_regs.num_regs));
1606 if (search_regs.num_regs <= 0
1607 || search_regs.start[n] < 0)
ca1d1d23
JB
1608 return Qnil;
1609 return (make_number ((beginningp) ? search_regs.start[n]
1610 : search_regs.end[n]));
1611}
1612
1613DEFUN ("match-beginning", Fmatch_beginning, Smatch_beginning, 1, 1, 0,
1614 "Return position of start of text matched by last search.\n\
16fdc568
BF
1615NUM specifies which parenthesized expression in the last regexp.\n\
1616 Value is nil if NUMth pair didn't match, or there were less than NUM pairs.\n\
ca1d1d23
JB
1617Zero means the entire text matched by the whole regexp or whole string.")
1618 (num)
1619 Lisp_Object num;
1620{
1621 return match_limit (num, 1);
1622}
1623
1624DEFUN ("match-end", Fmatch_end, Smatch_end, 1, 1, 0,
1625 "Return position of end of text matched by last search.\n\
1626ARG, a number, specifies which parenthesized expression in the last regexp.\n\
1627 Value is nil if ARGth pair didn't match, or there were less than ARG pairs.\n\
1628Zero means the entire text matched by the whole regexp or whole string.")
1629 (num)
1630 Lisp_Object num;
1631{
1632 return match_limit (num, 0);
1633}
1634
1635DEFUN ("match-data", Fmatch_data, Smatch_data, 0, 0, 0,
1636 "Return a list containing all info on what the last search matched.\n\
1637Element 2N is `(match-beginning N)'; element 2N + 1 is `(match-end N)'.\n\
1638All the elements are markers or nil (nil if the Nth pair didn't match)\n\
1639if the last match was on a buffer; integers or nil if a string was matched.\n\
1640Use `store-match-data' to reinstate the data in this list.")
1641 ()
1642{
4746118a 1643 Lisp_Object *data;
ca1d1d23
JB
1644 int i, len;
1645
daa37602
JB
1646 if (NILP (last_thing_searched))
1647 error ("match-data called before any match found");
1648
4746118a
JB
1649 data = (Lisp_Object *) alloca ((2 * search_regs.num_regs)
1650 * sizeof (Lisp_Object));
1651
ca1d1d23 1652 len = -1;
4746118a 1653 for (i = 0; i < search_regs.num_regs; i++)
ca1d1d23
JB
1654 {
1655 int start = search_regs.start[i];
1656 if (start >= 0)
1657 {
daa37602 1658 if (EQ (last_thing_searched, Qt))
ca1d1d23 1659 {
c235cce7
KH
1660 XSETFASTINT (data[2 * i], start);
1661 XSETFASTINT (data[2 * i + 1], search_regs.end[i]);
ca1d1d23 1662 }
0ed62dc7 1663 else if (BUFFERP (last_thing_searched))
ca1d1d23
JB
1664 {
1665 data[2 * i] = Fmake_marker ();
daa37602
JB
1666 Fset_marker (data[2 * i],
1667 make_number (start),
1668 last_thing_searched);
ca1d1d23
JB
1669 data[2 * i + 1] = Fmake_marker ();
1670 Fset_marker (data[2 * i + 1],
daa37602
JB
1671 make_number (search_regs.end[i]),
1672 last_thing_searched);
ca1d1d23 1673 }
daa37602
JB
1674 else
1675 /* last_thing_searched must always be Qt, a buffer, or Qnil. */
1676 abort ();
1677
ca1d1d23
JB
1678 len = i;
1679 }
1680 else
1681 data[2 * i] = data [2 * i + 1] = Qnil;
1682 }
1683 return Flist (2 * len + 2, data);
1684}
1685
1686
1687DEFUN ("store-match-data", Fstore_match_data, Sstore_match_data, 1, 1, 0,
1688 "Set internal data on last search match from elements of LIST.\n\
1689LIST should have been created by calling `match-data' previously.")
1690 (list)
1691 register Lisp_Object list;
1692{
1693 register int i;
1694 register Lisp_Object marker;
1695
1696 if (!CONSP (list) && !NILP (list))
b37902c8 1697 list = wrong_type_argument (Qconsp, list);
ca1d1d23 1698
daa37602
JB
1699 /* Unless we find a marker with a buffer in LIST, assume that this
1700 match data came from a string. */
1701 last_thing_searched = Qt;
1702
4746118a
JB
1703 /* Allocate registers if they don't already exist. */
1704 {
d084e942 1705 int length = XFASTINT (Flength (list)) / 2;
4746118a
JB
1706
1707 if (length > search_regs.num_regs)
1708 {
1113d9db
JB
1709 if (search_regs.num_regs == 0)
1710 {
1711 search_regs.start
1712 = (regoff_t *) xmalloc (length * sizeof (regoff_t));
1713 search_regs.end
1714 = (regoff_t *) xmalloc (length * sizeof (regoff_t));
1715 }
4746118a 1716 else
1113d9db
JB
1717 {
1718 search_regs.start
1719 = (regoff_t *) xrealloc (search_regs.start,
1720 length * sizeof (regoff_t));
1721 search_regs.end
1722 = (regoff_t *) xrealloc (search_regs.end,
1723 length * sizeof (regoff_t));
1724 }
4746118a 1725
9ac0d9e0 1726 BLOCK_INPUT;
1113d9db
JB
1727 re_set_registers (&searchbuf, &search_regs, length,
1728 search_regs.start, search_regs.end);
9ac0d9e0 1729 UNBLOCK_INPUT;
4746118a
JB
1730 }
1731 }
1732
1733 for (i = 0; i < search_regs.num_regs; i++)
ca1d1d23
JB
1734 {
1735 marker = Fcar (list);
1736 if (NILP (marker))
1737 {
1738 search_regs.start[i] = -1;
1739 list = Fcdr (list);
1740 }
1741 else
1742 {
0ed62dc7 1743 if (MARKERP (marker))
daa37602
JB
1744 {
1745 if (XMARKER (marker)->buffer == 0)
c235cce7 1746 XSETFASTINT (marker, 0);
daa37602 1747 else
a3668d92 1748 XSETBUFFER (last_thing_searched, XMARKER (marker)->buffer);
daa37602 1749 }
ca1d1d23
JB
1750
1751 CHECK_NUMBER_COERCE_MARKER (marker, 0);
1752 search_regs.start[i] = XINT (marker);
1753 list = Fcdr (list);
1754
1755 marker = Fcar (list);
0ed62dc7 1756 if (MARKERP (marker) && XMARKER (marker)->buffer == 0)
c235cce7 1757 XSETFASTINT (marker, 0);
ca1d1d23
JB
1758
1759 CHECK_NUMBER_COERCE_MARKER (marker, 0);
1760 search_regs.end[i] = XINT (marker);
1761 }
1762 list = Fcdr (list);
1763 }
1764
1765 return Qnil;
1766}
1767
1768/* Quote a string to inactivate reg-expr chars */
1769
1770DEFUN ("regexp-quote", Fregexp_quote, Sregexp_quote, 1, 1, 0,
1771 "Return a regexp string which matches exactly STRING and nothing else.")
1772 (str)
1773 Lisp_Object str;
1774{
1775 register unsigned char *in, *out, *end;
1776 register unsigned char *temp;
1777
1778 CHECK_STRING (str, 0);
1779
1780 temp = (unsigned char *) alloca (XSTRING (str)->size * 2);
1781
1782 /* Now copy the data into the new string, inserting escapes. */
1783
1784 in = XSTRING (str)->data;
1785 end = in + XSTRING (str)->size;
1786 out = temp;
1787
1788 for (; in != end; in++)
1789 {
1790 if (*in == '[' || *in == ']'
1791 || *in == '*' || *in == '.' || *in == '\\'
1792 || *in == '?' || *in == '+'
1793 || *in == '^' || *in == '$')
1794 *out++ = '\\';
1795 *out++ = *in;
1796 }
1797
1798 return make_string (temp, out - temp);
1799}
1800\f
1801syms_of_search ()
1802{
1803 register int i;
1804
1805 searchbuf.allocated = 100;
8c0e7b73 1806 searchbuf.buffer = (unsigned char *) malloc (searchbuf.allocated);
ca1d1d23
JB
1807 searchbuf.fastmap = search_fastmap;
1808
1809 Qsearch_failed = intern ("search-failed");
1810 staticpro (&Qsearch_failed);
1811 Qinvalid_regexp = intern ("invalid-regexp");
1812 staticpro (&Qinvalid_regexp);
1813
1814 Fput (Qsearch_failed, Qerror_conditions,
1815 Fcons (Qsearch_failed, Fcons (Qerror, Qnil)));
1816 Fput (Qsearch_failed, Qerror_message,
1817 build_string ("Search failed"));
1818
1819 Fput (Qinvalid_regexp, Qerror_conditions,
1820 Fcons (Qinvalid_regexp, Fcons (Qerror, Qnil)));
1821 Fput (Qinvalid_regexp, Qerror_message,
1822 build_string ("Invalid regexp"));
1823
1824 last_regexp = Qnil;
1825 staticpro (&last_regexp);
1826
daa37602
JB
1827 last_thing_searched = Qnil;
1828 staticpro (&last_thing_searched);
1829
ca1d1d23
JB
1830 defsubr (&Sstring_match);
1831 defsubr (&Slooking_at);
1832 defsubr (&Sskip_chars_forward);
1833 defsubr (&Sskip_chars_backward);
17431c60
RS
1834 defsubr (&Sskip_syntax_forward);
1835 defsubr (&Sskip_syntax_backward);
ca1d1d23
JB
1836 defsubr (&Ssearch_forward);
1837 defsubr (&Ssearch_backward);
1838 defsubr (&Sword_search_forward);
1839 defsubr (&Sword_search_backward);
1840 defsubr (&Sre_search_forward);
1841 defsubr (&Sre_search_backward);
1842 defsubr (&Sreplace_match);
1843 defsubr (&Smatch_beginning);
1844 defsubr (&Smatch_end);
1845 defsubr (&Smatch_data);
1846 defsubr (&Sstore_match_data);
1847 defsubr (&Sregexp_quote);
1848}