Update FSF's address in the preamble.
[bpt/emacs.git] / src / search.c
CommitLineData
ca1d1d23 1/* String search routines for GNU Emacs.
3a22ee35 2 Copyright (C) 1985, 1986, 1987, 1993, 1994 Free Software Foundation, Inc.
ca1d1d23
JB
3
4This file is part of GNU Emacs.
5
6GNU Emacs is free software; you can redistribute it and/or modify
7it under the terms of the GNU General Public License as published by
7c938215 8the Free Software Foundation; either version 2, or (at your option)
ca1d1d23
JB
9any later version.
10
11GNU Emacs is distributed in the hope that it will be useful,
12but WITHOUT ANY WARRANTY; without even the implied warranty of
13MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14GNU General Public License for more details.
15
16You should have received a copy of the GNU General Public License
17along with GNU Emacs; see the file COPYING. If not, write to
3b7ad313
EN
18the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
19Boston, MA 02111-1307, USA. */
ca1d1d23
JB
20
21
18160b98 22#include <config.h>
ca1d1d23
JB
23#include "lisp.h"
24#include "syntax.h"
25#include "buffer.h"
9169c321 26#include "region-cache.h"
ca1d1d23 27#include "commands.h"
9ac0d9e0 28#include "blockinput.h"
4746118a 29
ca1d1d23
JB
30#include <sys/types.h>
31#include "regex.h"
32
487282dc 33#define REGEXP_CACHE_SIZE 5
ca1d1d23 34
487282dc
KH
35/* If the regexp is non-nil, then the buffer contains the compiled form
36 of that regexp, suitable for searching. */
37struct regexp_cache {
38 struct regexp_cache *next;
39 Lisp_Object regexp;
40 struct re_pattern_buffer buf;
41 char fastmap[0400];
b819a390
RS
42 /* Nonzero means regexp was compiled to do full POSIX backtracking. */
43 char posix;
487282dc 44};
ca1d1d23 45
487282dc
KH
46/* The instances of that struct. */
47struct regexp_cache searchbufs[REGEXP_CACHE_SIZE];
ca1d1d23 48
487282dc
KH
49/* The head of the linked list; points to the most recently used buffer. */
50struct regexp_cache *searchbuf_head;
ca1d1d23 51
ca1d1d23 52
4746118a
JB
53/* Every call to re_match, etc., must pass &search_regs as the regs
54 argument unless you can show it is unnecessary (i.e., if re_match
55 is certainly going to be called again before region-around-match
56 can be called).
57
58 Since the registers are now dynamically allocated, we need to make
59 sure not to refer to the Nth register before checking that it has
1113d9db
JB
60 been allocated by checking search_regs.num_regs.
61
62 The regex code keeps track of whether it has allocated the search
487282dc
KH
63 buffer using bits in the re_pattern_buffer. This means that whenever
64 you compile a new pattern, it completely forgets whether it has
1113d9db
JB
65 allocated any registers, and will allocate new registers the next
66 time you call a searching or matching function. Therefore, we need
67 to call re_set_registers after compiling a new pattern or after
68 setting the match registers, so that the regex functions will be
69 able to free or re-allocate it properly. */
ca1d1d23
JB
70static struct re_registers search_regs;
71
daa37602
JB
72/* The buffer in which the last search was performed, or
73 Qt if the last search was done in a string;
74 Qnil if no searching has been done yet. */
75static Lisp_Object last_thing_searched;
ca1d1d23 76
8e6208c5 77/* error condition signaled when regexp compile_pattern fails */
ca1d1d23
JB
78
79Lisp_Object Qinvalid_regexp;
80
ca325161 81static void set_search_regs ();
044f81f1 82static void save_search_regs ();
ca325161 83
b819a390
RS
84static int search_buffer ();
85
ca1d1d23
JB
86static void
87matcher_overflow ()
88{
89 error ("Stack overflow in regexp matcher");
90}
91
92#ifdef __STDC__
93#define CONST const
94#else
95#define CONST
96#endif
97
b819a390
RS
98/* Compile a regexp and signal a Lisp error if anything goes wrong.
99 PATTERN is the pattern to compile.
100 CP is the place to put the result.
101 TRANSLATE is a translation table for ignoring case, or NULL for none.
102 REGP is the structure that says where to store the "register"
103 values that will result from matching this pattern.
104 If it is 0, we should compile the pattern not to record any
105 subexpression bounds.
106 POSIX is nonzero if we want full backtracking (POSIX style)
107 for this pattern. 0 means backtrack only enough to get a valid match. */
ca1d1d23 108
487282dc 109static void
b819a390 110compile_pattern_1 (cp, pattern, translate, regp, posix)
487282dc 111 struct regexp_cache *cp;
ca1d1d23 112 Lisp_Object pattern;
b1428bd8 113 Lisp_Object *translate;
487282dc 114 struct re_registers *regp;
b819a390 115 int posix;
ca1d1d23
JB
116{
117 CONST char *val;
b819a390 118 reg_syntax_t old;
ca1d1d23 119
487282dc
KH
120 cp->regexp = Qnil;
121 cp->buf.translate = translate;
b819a390 122 cp->posix = posix;
9ac0d9e0 123 BLOCK_INPUT;
b819a390
RS
124 old = re_set_syntax (RE_SYNTAX_EMACS
125 | (posix ? 0 : RE_NO_POSIX_BACKTRACKING));
b90d9e80 126 val = (CONST char *) re_compile_pattern ((char *) XSTRING (pattern)->data,
487282dc 127 XSTRING (pattern)->size, &cp->buf);
b819a390 128 re_set_syntax (old);
9ac0d9e0 129 UNBLOCK_INPUT;
ca1d1d23 130 if (val)
487282dc 131 Fsignal (Qinvalid_regexp, Fcons (build_string (val), Qnil));
1113d9db 132
487282dc 133 cp->regexp = Fcopy_sequence (pattern);
487282dc
KH
134}
135
136/* Compile a regexp if necessary, but first check to see if there's one in
b819a390
RS
137 the cache.
138 PATTERN is the pattern to compile.
139 TRANSLATE is a translation table for ignoring case, or NULL for none.
140 REGP is the structure that says where to store the "register"
141 values that will result from matching this pattern.
142 If it is 0, we should compile the pattern not to record any
143 subexpression bounds.
144 POSIX is nonzero if we want full backtracking (POSIX style)
145 for this pattern. 0 means backtrack only enough to get a valid match. */
487282dc
KH
146
147struct re_pattern_buffer *
b819a390 148compile_pattern (pattern, regp, translate, posix)
487282dc
KH
149 Lisp_Object pattern;
150 struct re_registers *regp;
b1428bd8 151 Lisp_Object *translate;
b819a390 152 int posix;
487282dc
KH
153{
154 struct regexp_cache *cp, **cpp;
155
156 for (cpp = &searchbuf_head; ; cpp = &cp->next)
157 {
158 cp = *cpp;
159 if (!NILP (Fstring_equal (cp->regexp, pattern))
b819a390
RS
160 && cp->buf.translate == translate
161 && cp->posix == posix)
487282dc
KH
162 break;
163
164 /* If we're at the end of the cache, compile into the last cell. */
165 if (cp->next == 0)
166 {
b819a390 167 compile_pattern_1 (cp, pattern, translate, regp, posix);
487282dc
KH
168 break;
169 }
170 }
171
172 /* When we get here, cp (aka *cpp) contains the compiled pattern,
173 either because we found it in the cache or because we just compiled it.
174 Move it to the front of the queue to mark it as most recently used. */
175 *cpp = cp->next;
176 cp->next = searchbuf_head;
177 searchbuf_head = cp;
1113d9db 178
6639708c
RS
179 /* Advise the searching functions about the space we have allocated
180 for register data. */
181 if (regp)
182 re_set_registers (&cp->buf, regp, regp->num_regs, regp->start, regp->end);
183
487282dc 184 return &cp->buf;
ca1d1d23
JB
185}
186
187/* Error condition used for failing searches */
188Lisp_Object Qsearch_failed;
189
190Lisp_Object
191signal_failure (arg)
192 Lisp_Object arg;
193{
194 Fsignal (Qsearch_failed, Fcons (arg, Qnil));
195 return Qnil;
196}
197\f
b819a390
RS
198static Lisp_Object
199looking_at_1 (string, posix)
ca1d1d23 200 Lisp_Object string;
b819a390 201 int posix;
ca1d1d23
JB
202{
203 Lisp_Object val;
204 unsigned char *p1, *p2;
205 int s1, s2;
206 register int i;
487282dc 207 struct re_pattern_buffer *bufp;
ca1d1d23 208
7074fde6
FP
209 if (running_asynch_code)
210 save_search_regs ();
211
ca1d1d23 212 CHECK_STRING (string, 0);
487282dc
KH
213 bufp = compile_pattern (string, &search_regs,
214 (!NILP (current_buffer->case_fold_search)
b819a390
RS
215 ? DOWNCASE_TABLE : 0),
216 posix);
ca1d1d23
JB
217
218 immediate_quit = 1;
219 QUIT; /* Do a pending quit right away, to avoid paradoxical behavior */
220
221 /* Get pointers and sizes of the two strings
222 that make up the visible portion of the buffer. */
223
224 p1 = BEGV_ADDR;
225 s1 = GPT - BEGV;
226 p2 = GAP_END_ADDR;
227 s2 = ZV - GPT;
228 if (s1 < 0)
229 {
230 p2 = p1;
231 s2 = ZV - BEGV;
232 s1 = 0;
233 }
234 if (s2 < 0)
235 {
236 s1 = ZV - BEGV;
237 s2 = 0;
238 }
239
487282dc 240 i = re_match_2 (bufp, (char *) p1, s1, (char *) p2, s2,
ca1d1d23
JB
241 point - BEGV, &search_regs,
242 ZV - BEGV);
243 if (i == -2)
244 matcher_overflow ();
245
246 val = (0 <= i ? Qt : Qnil);
4746118a 247 for (i = 0; i < search_regs.num_regs; i++)
ca1d1d23
JB
248 if (search_regs.start[i] >= 0)
249 {
250 search_regs.start[i] += BEGV;
251 search_regs.end[i] += BEGV;
252 }
a3668d92 253 XSETBUFFER (last_thing_searched, current_buffer);
ca1d1d23
JB
254 immediate_quit = 0;
255 return val;
256}
257
b819a390 258DEFUN ("looking-at", Flooking_at, Slooking_at, 1, 1, 0,
94f94972 259 "Return t if text after point matches regular expression REGEXP.\n\
b819a390
RS
260This function modifies the match data that `match-beginning',\n\
261`match-end' and `match-data' access; save and restore the match\n\
262data if you want to preserve them.")
94f94972
RS
263 (regexp)
264 Lisp_Object regexp;
b819a390 265{
94f94972 266 return looking_at_1 (regexp, 0);
b819a390
RS
267}
268
269DEFUN ("posix-looking-at", Fposix_looking_at, Sposix_looking_at, 1, 1, 0,
94f94972 270 "Return t if text after point matches regular expression REGEXP.\n\
b819a390
RS
271Find the longest match, in accord with Posix regular expression rules.\n\
272This function modifies the match data that `match-beginning',\n\
273`match-end' and `match-data' access; save and restore the match\n\
274data if you want to preserve them.")
94f94972
RS
275 (regexp)
276 Lisp_Object regexp;
b819a390 277{
94f94972 278 return looking_at_1 (regexp, 1);
b819a390
RS
279}
280\f
281static Lisp_Object
282string_match_1 (regexp, string, start, posix)
ca1d1d23 283 Lisp_Object regexp, string, start;
b819a390 284 int posix;
ca1d1d23
JB
285{
286 int val;
287 int s;
487282dc 288 struct re_pattern_buffer *bufp;
ca1d1d23 289
7074fde6
FP
290 if (running_asynch_code)
291 save_search_regs ();
292
ca1d1d23
JB
293 CHECK_STRING (regexp, 0);
294 CHECK_STRING (string, 1);
295
296 if (NILP (start))
297 s = 0;
298 else
299 {
300 int len = XSTRING (string)->size;
301
302 CHECK_NUMBER (start, 2);
303 s = XINT (start);
304 if (s < 0 && -s <= len)
26faf9f4 305 s = len + s;
ca1d1d23
JB
306 else if (0 > s || s > len)
307 args_out_of_range (string, start);
308 }
309
487282dc
KH
310 bufp = compile_pattern (regexp, &search_regs,
311 (!NILP (current_buffer->case_fold_search)
b819a390 312 ? DOWNCASE_TABLE : 0),
24b704fa 313 posix);
ca1d1d23 314 immediate_quit = 1;
487282dc 315 val = re_search (bufp, (char *) XSTRING (string)->data,
ca1d1d23
JB
316 XSTRING (string)->size, s, XSTRING (string)->size - s,
317 &search_regs);
318 immediate_quit = 0;
daa37602 319 last_thing_searched = Qt;
ca1d1d23
JB
320 if (val == -2)
321 matcher_overflow ();
322 if (val < 0) return Qnil;
323 return make_number (val);
324}
e59a8453 325
b819a390
RS
326DEFUN ("string-match", Fstring_match, Sstring_match, 2, 3, 0,
327 "Return index of start of first match for REGEXP in STRING, or nil.\n\
328If third arg START is non-nil, start search at that index in STRING.\n\
329For index of first char beyond the match, do (match-end 0).\n\
330`match-end' and `match-beginning' also give indices of substrings\n\
331matched by parenthesis constructs in the pattern.")
332 (regexp, string, start)
333 Lisp_Object regexp, string, start;
334{
335 return string_match_1 (regexp, string, start, 0);
336}
337
338DEFUN ("posix-string-match", Fposix_string_match, Sposix_string_match, 2, 3, 0,
339 "Return index of start of first match for REGEXP in STRING, or nil.\n\
340Find the longest match, in accord with Posix regular expression rules.\n\
341If third arg START is non-nil, start search at that index in STRING.\n\
342For index of first char beyond the match, do (match-end 0).\n\
343`match-end' and `match-beginning' also give indices of substrings\n\
344matched by parenthesis constructs in the pattern.")
345 (regexp, string, start)
346 Lisp_Object regexp, string, start;
347{
348 return string_match_1 (regexp, string, start, 1);
349}
350
e59a8453
RS
351/* Match REGEXP against STRING, searching all of STRING,
352 and return the index of the match, or negative on failure.
353 This does not clobber the match data. */
354
355int
356fast_string_match (regexp, string)
357 Lisp_Object regexp, string;
358{
359 int val;
487282dc 360 struct re_pattern_buffer *bufp;
e59a8453 361
b819a390 362 bufp = compile_pattern (regexp, 0, 0, 0);
e59a8453 363 immediate_quit = 1;
487282dc 364 val = re_search (bufp, (char *) XSTRING (string)->data,
e59a8453
RS
365 XSTRING (string)->size, 0, XSTRING (string)->size,
366 0);
367 immediate_quit = 0;
368 return val;
369}
ca1d1d23 370\f
9169c321
JB
371/* max and min. */
372
373static int
374max (a, b)
375 int a, b;
376{
377 return ((a > b) ? a : b);
378}
379
380static int
381min (a, b)
382 int a, b;
383{
384 return ((a < b) ? a : b);
385}
386
387\f
388/* The newline cache: remembering which sections of text have no newlines. */
389
390/* If the user has requested newline caching, make sure it's on.
391 Otherwise, make sure it's off.
392 This is our cheezy way of associating an action with the change of
393 state of a buffer-local variable. */
394static void
395newline_cache_on_off (buf)
396 struct buffer *buf;
397{
398 if (NILP (buf->cache_long_line_scans))
399 {
400 /* It should be off. */
401 if (buf->newline_cache)
402 {
403 free_region_cache (buf->newline_cache);
404 buf->newline_cache = 0;
405 }
406 }
407 else
408 {
409 /* It should be on. */
410 if (buf->newline_cache == 0)
411 buf->newline_cache = new_region_cache ();
412 }
413}
414
415\f
416/* Search for COUNT instances of the character TARGET between START and END.
417
418 If COUNT is positive, search forwards; END must be >= START.
419 If COUNT is negative, search backwards for the -COUNTth instance;
420 END must be <= START.
421 If COUNT is zero, do anything you please; run rogue, for all I care.
422
423 If END is zero, use BEGV or ZV instead, as appropriate for the
424 direction indicated by COUNT.
ffd56f97
JB
425
426 If we find COUNT instances, set *SHORTAGE to zero, and return the
5bfe95c9
RS
427 position after the COUNTth match. Note that for reverse motion
428 this is not the same as the usual convention for Emacs motion commands.
ffd56f97 429
9169c321
JB
430 If we don't find COUNT instances before reaching END, set *SHORTAGE
431 to the number of TARGETs left unfound, and return END.
ffd56f97 432
087a5f81
RS
433 If ALLOW_QUIT is non-zero, set immediate_quit. That's good to do
434 except when inside redisplay. */
435
9169c321
JB
436scan_buffer (target, start, end, count, shortage, allow_quit)
437 register int target;
438 int start, end;
439 int count;
440 int *shortage;
087a5f81 441 int allow_quit;
ca1d1d23 442{
9169c321
JB
443 struct region_cache *newline_cache;
444 int direction;
ffd56f97 445
9169c321
JB
446 if (count > 0)
447 {
448 direction = 1;
449 if (! end) end = ZV;
450 }
451 else
452 {
453 direction = -1;
454 if (! end) end = BEGV;
455 }
ffd56f97 456
9169c321
JB
457 newline_cache_on_off (current_buffer);
458 newline_cache = current_buffer->newline_cache;
ca1d1d23
JB
459
460 if (shortage != 0)
461 *shortage = 0;
462
087a5f81 463 immediate_quit = allow_quit;
ca1d1d23 464
ffd56f97 465 if (count > 0)
9169c321 466 while (start != end)
ca1d1d23 467 {
9169c321
JB
468 /* Our innermost scanning loop is very simple; it doesn't know
469 about gaps, buffer ends, or the newline cache. ceiling is
470 the position of the last character before the next such
471 obstacle --- the last character the dumb search loop should
472 examine. */
473 register int ceiling = end - 1;
474
475 /* If we're looking for a newline, consult the newline cache
476 to see where we can avoid some scanning. */
477 if (target == '\n' && newline_cache)
478 {
479 int next_change;
480 immediate_quit = 0;
481 while (region_cache_forward
482 (current_buffer, newline_cache, start, &next_change))
483 start = next_change;
cbe0db0d 484 immediate_quit = allow_quit;
9169c321
JB
485
486 /* start should never be after end. */
487 if (start >= end)
488 start = end - 1;
489
490 /* Now the text after start is an unknown region, and
491 next_change is the position of the next known region. */
492 ceiling = min (next_change - 1, ceiling);
493 }
494
495 /* The dumb loop can only scan text stored in contiguous
496 bytes. BUFFER_CEILING_OF returns the last character
497 position that is contiguous, so the ceiling is the
498 position after that. */
499 ceiling = min (BUFFER_CEILING_OF (start), ceiling);
500
501 {
502 /* The termination address of the dumb loop. */
503 register unsigned char *ceiling_addr = &FETCH_CHAR (ceiling) + 1;
504 register unsigned char *cursor = &FETCH_CHAR (start);
505 unsigned char *base = cursor;
506
507 while (cursor < ceiling_addr)
508 {
509 unsigned char *scan_start = cursor;
510
511 /* The dumb loop. */
512 while (*cursor != target && ++cursor < ceiling_addr)
513 ;
514
515 /* If we're looking for newlines, cache the fact that
516 the region from start to cursor is free of them. */
517 if (target == '\n' && newline_cache)
518 know_region_cache (current_buffer, newline_cache,
519 start + scan_start - base,
520 start + cursor - base);
521
522 /* Did we find the target character? */
523 if (cursor < ceiling_addr)
524 {
525 if (--count == 0)
526 {
527 immediate_quit = 0;
528 return (start + cursor - base + 1);
529 }
530 cursor++;
531 }
532 }
533
534 start += cursor - base;
535 }
ca1d1d23
JB
536 }
537 else
9169c321
JB
538 while (start > end)
539 {
540 /* The last character to check before the next obstacle. */
541 register int ceiling = end;
542
543 /* Consult the newline cache, if appropriate. */
544 if (target == '\n' && newline_cache)
545 {
546 int next_change;
547 immediate_quit = 0;
548 while (region_cache_backward
549 (current_buffer, newline_cache, start, &next_change))
550 start = next_change;
cbe0db0d 551 immediate_quit = allow_quit;
9169c321
JB
552
553 /* Start should never be at or before end. */
554 if (start <= end)
555 start = end + 1;
556
557 /* Now the text before start is an unknown region, and
558 next_change is the position of the next known region. */
559 ceiling = max (next_change, ceiling);
560 }
561
562 /* Stop scanning before the gap. */
563 ceiling = max (BUFFER_FLOOR_OF (start - 1), ceiling);
564
565 {
566 /* The termination address of the dumb loop. */
567 register unsigned char *ceiling_addr = &FETCH_CHAR (ceiling);
568 register unsigned char *cursor = &FETCH_CHAR (start - 1);
569 unsigned char *base = cursor;
570
571 while (cursor >= ceiling_addr)
572 {
573 unsigned char *scan_start = cursor;
574
575 while (*cursor != target && --cursor >= ceiling_addr)
576 ;
577
578 /* If we're looking for newlines, cache the fact that
579 the region from after the cursor to start is free of them. */
580 if (target == '\n' && newline_cache)
581 know_region_cache (current_buffer, newline_cache,
582 start + cursor - base,
583 start + scan_start - base);
584
585 /* Did we find the target character? */
586 if (cursor >= ceiling_addr)
587 {
588 if (++count >= 0)
589 {
590 immediate_quit = 0;
591 return (start + cursor - base);
592 }
593 cursor--;
594 }
595 }
596
597 start += cursor - base;
598 }
599 }
600
ca1d1d23
JB
601 immediate_quit = 0;
602 if (shortage != 0)
ffd56f97 603 *shortage = count * direction;
9169c321 604 return start;
ca1d1d23
JB
605}
606
63fa018d
RS
607int
608find_next_newline_no_quit (from, cnt)
609 register int from, cnt;
610{
9169c321 611 return scan_buffer ('\n', from, 0, cnt, (int *) 0, 0);
63fa018d
RS
612}
613
ca1d1d23
JB
614int
615find_next_newline (from, cnt)
616 register int from, cnt;
617{
9169c321
JB
618 return scan_buffer ('\n', from, 0, cnt, (int *) 0, 1);
619}
620
621
622/* Like find_next_newline, but returns position before the newline,
623 not after, and only search up to TO. This isn't just
624 find_next_newline (...)-1, because you might hit TO. */
625int
626find_before_next_newline (from, to, cnt)
cbe0db0d 627 int from, to, cnt;
9169c321
JB
628{
629 int shortage;
630 int pos = scan_buffer ('\n', from, to, cnt, &shortage, 1);
631
632 if (shortage == 0)
633 pos--;
634
635 return pos;
ca1d1d23
JB
636}
637\f
c1dc99a1
JB
638Lisp_Object skip_chars ();
639
ca1d1d23 640DEFUN ("skip-chars-forward", Fskip_chars_forward, Sskip_chars_forward, 1, 2, 0,
3acb9a69
RS
641 "Move point forward, stopping before a char not in STRING, or at pos LIM.\n\
642STRING is like the inside of a `[...]' in a regular expression\n\
ca1d1d23
JB
643except that `]' is never special and `\\' quotes `^', `-' or `\\'.\n\
644Thus, with arg \"a-zA-Z\", this skips letters stopping before first nonletter.\n\
c1dc99a1
JB
645With arg \"^a-zA-Z\", skips nonletters stopping before first letter.\n\
646Returns the distance traveled, either zero or positive.")
ca1d1d23
JB
647 (string, lim)
648 Lisp_Object string, lim;
649{
17431c60 650 return skip_chars (1, 0, string, lim);
ca1d1d23
JB
651}
652
653DEFUN ("skip-chars-backward", Fskip_chars_backward, Sskip_chars_backward, 1, 2, 0,
3acb9a69 654 "Move point backward, stopping after a char not in STRING, or at pos LIM.\n\
c1dc99a1
JB
655See `skip-chars-forward' for details.\n\
656Returns the distance traveled, either zero or negative.")
ca1d1d23
JB
657 (string, lim)
658 Lisp_Object string, lim;
659{
17431c60
RS
660 return skip_chars (0, 0, string, lim);
661}
662
663DEFUN ("skip-syntax-forward", Fskip_syntax_forward, Sskip_syntax_forward, 1, 2, 0,
664 "Move point forward across chars in specified syntax classes.\n\
665SYNTAX is a string of syntax code characters.\n\
666Stop before a char whose syntax is not in SYNTAX, or at position LIM.\n\
667If SYNTAX starts with ^, skip characters whose syntax is NOT in SYNTAX.\n\
668This function returns the distance traveled, either zero or positive.")
669 (syntax, lim)
670 Lisp_Object syntax, lim;
671{
672 return skip_chars (1, 1, syntax, lim);
673}
674
675DEFUN ("skip-syntax-backward", Fskip_syntax_backward, Sskip_syntax_backward, 1, 2, 0,
676 "Move point backward across chars in specified syntax classes.\n\
677SYNTAX is a string of syntax code characters.\n\
678Stop on reaching a char whose syntax is not in SYNTAX, or at position LIM.\n\
679If SYNTAX starts with ^, skip characters whose syntax is NOT in SYNTAX.\n\
680This function returns the distance traveled, either zero or negative.")
681 (syntax, lim)
682 Lisp_Object syntax, lim;
683{
684 return skip_chars (0, 1, syntax, lim);
ca1d1d23
JB
685}
686
c1dc99a1 687Lisp_Object
17431c60
RS
688skip_chars (forwardp, syntaxp, string, lim)
689 int forwardp, syntaxp;
ca1d1d23
JB
690 Lisp_Object string, lim;
691{
692 register unsigned char *p, *pend;
693 register unsigned char c;
694 unsigned char fastmap[0400];
695 int negate = 0;
696 register int i;
697
698 CHECK_STRING (string, 0);
699
700 if (NILP (lim))
a3668d92 701 XSETINT (lim, forwardp ? ZV : BEGV);
ca1d1d23
JB
702 else
703 CHECK_NUMBER_COERCE_MARKER (lim, 1);
704
ca1d1d23 705 /* In any case, don't allow scan outside bounds of buffer. */
c5241910
RS
706 /* jla turned this off, for no known reason.
707 bfox turned the ZV part on, and rms turned the
708 BEGV part back on. */
709 if (XINT (lim) > ZV)
c235cce7 710 XSETFASTINT (lim, ZV);
c5241910 711 if (XINT (lim) < BEGV)
c235cce7 712 XSETFASTINT (lim, BEGV);
ca1d1d23
JB
713
714 p = XSTRING (string)->data;
715 pend = p + XSTRING (string)->size;
716 bzero (fastmap, sizeof fastmap);
717
718 if (p != pend && *p == '^')
719 {
720 negate = 1; p++;
721 }
722
17431c60
RS
723 /* Find the characters specified and set their elements of fastmap.
724 If syntaxp, each character counts as itself.
725 Otherwise, handle backslashes and ranges specially */
ca1d1d23
JB
726
727 while (p != pend)
728 {
729 c = *p++;
17431c60
RS
730 if (syntaxp)
731 fastmap[c] = 1;
732 else
ca1d1d23 733 {
17431c60 734 if (c == '\\')
ca1d1d23 735 {
17431c60
RS
736 if (p == pend) break;
737 c = *p++;
738 }
739 if (p != pend && *p == '-')
740 {
741 p++;
742 if (p == pend) break;
743 while (c <= *p)
744 {
745 fastmap[c] = 1;
746 c++;
747 }
748 p++;
ca1d1d23 749 }
17431c60
RS
750 else
751 fastmap[c] = 1;
ca1d1d23 752 }
ca1d1d23
JB
753 }
754
9239c6c1
RS
755 if (syntaxp && fastmap['-'] != 0)
756 fastmap[' '] = 1;
757
ca1d1d23
JB
758 /* If ^ was the first character, complement the fastmap. */
759
760 if (negate)
761 for (i = 0; i < sizeof fastmap; i++)
762 fastmap[i] ^= 1;
763
c1dc99a1
JB
764 {
765 int start_point = point;
766
767 immediate_quit = 1;
17431c60 768 if (syntaxp)
c1dc99a1 769 {
17431c60
RS
770
771 if (forwardp)
772 {
773 while (point < XINT (lim)
774 && fastmap[(unsigned char) syntax_code_spec[(int) SYNTAX (FETCH_CHAR (point))]])
775 SET_PT (point + 1);
776 }
777 else
778 {
779 while (point > XINT (lim)
780 && fastmap[(unsigned char) syntax_code_spec[(int) SYNTAX (FETCH_CHAR (point - 1))]])
781 SET_PT (point - 1);
782 }
c1dc99a1
JB
783 }
784 else
785 {
17431c60
RS
786 if (forwardp)
787 {
788 while (point < XINT (lim) && fastmap[FETCH_CHAR (point)])
789 SET_PT (point + 1);
790 }
791 else
792 {
793 while (point > XINT (lim) && fastmap[FETCH_CHAR (point - 1)])
794 SET_PT (point - 1);
795 }
c1dc99a1
JB
796 }
797 immediate_quit = 0;
798
799 return make_number (point - start_point);
800 }
ca1d1d23
JB
801}
802\f
803/* Subroutines of Lisp buffer search functions. */
804
805static Lisp_Object
b819a390 806search_command (string, bound, noerror, count, direction, RE, posix)
ca1d1d23
JB
807 Lisp_Object string, bound, noerror, count;
808 int direction;
809 int RE;
b819a390 810 int posix;
ca1d1d23
JB
811{
812 register int np;
813 int lim;
814 int n = direction;
815
816 if (!NILP (count))
817 {
818 CHECK_NUMBER (count, 3);
819 n *= XINT (count);
820 }
821
822 CHECK_STRING (string, 0);
823 if (NILP (bound))
824 lim = n > 0 ? ZV : BEGV;
825 else
826 {
827 CHECK_NUMBER_COERCE_MARKER (bound, 1);
828 lim = XINT (bound);
829 if (n > 0 ? lim < point : lim > point)
830 error ("Invalid search bound (wrong side of point)");
831 if (lim > ZV)
832 lim = ZV;
833 if (lim < BEGV)
834 lim = BEGV;
835 }
836
837 np = search_buffer (string, point, lim, n, RE,
838 (!NILP (current_buffer->case_fold_search)
b1428bd8
RS
839 ? XCHAR_TABLE (current_buffer->case_canon_table)->contents
840 : 0),
ca1d1d23 841 (!NILP (current_buffer->case_fold_search)
b1428bd8
RS
842 ? XCHAR_TABLE (current_buffer->case_eqv_table)->contents
843 : 0),
b819a390 844 posix);
ca1d1d23
JB
845 if (np <= 0)
846 {
847 if (NILP (noerror))
848 return signal_failure (string);
849 if (!EQ (noerror, Qt))
850 {
851 if (lim < BEGV || lim > ZV)
852 abort ();
a5f217b8
RS
853 SET_PT (lim);
854 return Qnil;
855#if 0 /* This would be clean, but maybe programs depend on
856 a value of nil here. */
481399bf 857 np = lim;
a5f217b8 858#endif
ca1d1d23 859 }
481399bf
RS
860 else
861 return Qnil;
ca1d1d23
JB
862 }
863
864 if (np < BEGV || np > ZV)
865 abort ();
866
867 SET_PT (np);
868
869 return make_number (np);
870}
871\f
b6d6a51c
KH
872static int
873trivial_regexp_p (regexp)
874 Lisp_Object regexp;
875{
876 int len = XSTRING (regexp)->size;
877 unsigned char *s = XSTRING (regexp)->data;
878 unsigned char c;
879 while (--len >= 0)
880 {
881 switch (*s++)
882 {
883 case '.': case '*': case '+': case '?': case '[': case '^': case '$':
884 return 0;
885 case '\\':
886 if (--len < 0)
887 return 0;
888 switch (*s++)
889 {
890 case '|': case '(': case ')': case '`': case '\'': case 'b':
891 case 'B': case '<': case '>': case 'w': case 'W': case 's':
866f60fd
KH
892 case 'S': case '=':
893 case '1': case '2': case '3': case '4': case '5':
b6d6a51c
KH
894 case '6': case '7': case '8': case '9':
895 return 0;
896 }
897 }
898 }
899 return 1;
900}
901
ca325161 902/* Search for the n'th occurrence of STRING in the current buffer,
ca1d1d23 903 starting at position POS and stopping at position LIM,
b819a390 904 treating STRING as a literal string if RE is false or as
ca1d1d23
JB
905 a regular expression if RE is true.
906
907 If N is positive, searching is forward and LIM must be greater than POS.
908 If N is negative, searching is backward and LIM must be less than POS.
909
910 Returns -x if only N-x occurrences found (x > 0),
911 or else the position at the beginning of the Nth occurrence
b819a390
RS
912 (if searching backward) or the end (if searching forward).
913
914 POSIX is nonzero if we want full backtracking (POSIX style)
915 for this pattern. 0 means backtrack only enough to get a valid match. */
ca1d1d23 916
b819a390
RS
917static int
918search_buffer (string, pos, lim, n, RE, trt, inverse_trt, posix)
ca1d1d23
JB
919 Lisp_Object string;
920 int pos;
921 int lim;
922 int n;
923 int RE;
b1428bd8
RS
924 Lisp_Object *trt;
925 Lisp_Object *inverse_trt;
b819a390 926 int posix;
ca1d1d23
JB
927{
928 int len = XSTRING (string)->size;
929 unsigned char *base_pat = XSTRING (string)->data;
930 register int *BM_tab;
931 int *BM_tab_base;
932 register int direction = ((n > 0) ? 1 : -1);
933 register int dirlen;
934 int infinity, limit, k, stride_for_teases;
935 register unsigned char *pat, *cursor, *p_limit;
936 register int i, j;
937 unsigned char *p1, *p2;
938 int s1, s2;
939
7074fde6
FP
940 if (running_asynch_code)
941 save_search_regs ();
942
ca1d1d23 943 /* Null string is found at starting position. */
3f57a499 944 if (len == 0)
ca325161
RS
945 {
946 set_search_regs (pos, 0);
947 return pos;
948 }
3f57a499
RS
949
950 /* Searching 0 times means don't move. */
951 if (n == 0)
ca1d1d23
JB
952 return pos;
953
b6d6a51c 954 if (RE && !trivial_regexp_p (string))
ca1d1d23 955 {
487282dc
KH
956 struct re_pattern_buffer *bufp;
957
b1428bd8 958 bufp = compile_pattern (string, &search_regs, trt, posix);
ca1d1d23 959
ca1d1d23
JB
960 immediate_quit = 1; /* Quit immediately if user types ^G,
961 because letting this function finish
962 can take too long. */
963 QUIT; /* Do a pending quit right away,
964 to avoid paradoxical behavior */
965 /* Get pointers and sizes of the two strings
966 that make up the visible portion of the buffer. */
967
968 p1 = BEGV_ADDR;
969 s1 = GPT - BEGV;
970 p2 = GAP_END_ADDR;
971 s2 = ZV - GPT;
972 if (s1 < 0)
973 {
974 p2 = p1;
975 s2 = ZV - BEGV;
976 s1 = 0;
977 }
978 if (s2 < 0)
979 {
980 s1 = ZV - BEGV;
981 s2 = 0;
982 }
983 while (n < 0)
984 {
42db823b 985 int val;
487282dc 986 val = re_search_2 (bufp, (char *) p1, s1, (char *) p2, s2,
42db823b
RS
987 pos - BEGV, lim - pos, &search_regs,
988 /* Don't allow match past current point */
989 pos - BEGV);
ca1d1d23 990 if (val == -2)
b6d6a51c
KH
991 {
992 matcher_overflow ();
993 }
ca1d1d23
JB
994 if (val >= 0)
995 {
996 j = BEGV;
4746118a 997 for (i = 0; i < search_regs.num_regs; i++)
ca1d1d23
JB
998 if (search_regs.start[i] >= 0)
999 {
1000 search_regs.start[i] += j;
1001 search_regs.end[i] += j;
1002 }
a3668d92 1003 XSETBUFFER (last_thing_searched, current_buffer);
ca1d1d23
JB
1004 /* Set pos to the new position. */
1005 pos = search_regs.start[0];
1006 }
1007 else
1008 {
1009 immediate_quit = 0;
1010 return (n);
1011 }
1012 n++;
1013 }
1014 while (n > 0)
1015 {
42db823b 1016 int val;
487282dc 1017 val = re_search_2 (bufp, (char *) p1, s1, (char *) p2, s2,
42db823b
RS
1018 pos - BEGV, lim - pos, &search_regs,
1019 lim - BEGV);
ca1d1d23 1020 if (val == -2)
b6d6a51c
KH
1021 {
1022 matcher_overflow ();
1023 }
ca1d1d23
JB
1024 if (val >= 0)
1025 {
1026 j = BEGV;
4746118a 1027 for (i = 0; i < search_regs.num_regs; i++)
ca1d1d23
JB
1028 if (search_regs.start[i] >= 0)
1029 {
1030 search_regs.start[i] += j;
1031 search_regs.end[i] += j;
1032 }
a3668d92 1033 XSETBUFFER (last_thing_searched, current_buffer);
ca1d1d23
JB
1034 pos = search_regs.end[0];
1035 }
1036 else
1037 {
1038 immediate_quit = 0;
1039 return (0 - n);
1040 }
1041 n--;
1042 }
1043 immediate_quit = 0;
1044 return (pos);
1045 }
1046 else /* non-RE case */
1047 {
1048#ifdef C_ALLOCA
1049 int BM_tab_space[0400];
1050 BM_tab = &BM_tab_space[0];
1051#else
1052 BM_tab = (int *) alloca (0400 * sizeof (int));
1053#endif
b6d6a51c
KH
1054 {
1055 unsigned char *patbuf = (unsigned char *) alloca (len);
1056 pat = patbuf;
1057 while (--len >= 0)
1058 {
1059 /* If we got here and the RE flag is set, it's because we're
1060 dealing with a regexp known to be trivial, so the backslash
1061 just quotes the next character. */
1062 if (RE && *base_pat == '\\')
1063 {
1064 len--;
1065 base_pat++;
1066 }
1067 *pat++ = (trt ? trt[*base_pat++] : *base_pat++);
1068 }
1069 len = pat - patbuf;
1070 pat = base_pat = patbuf;
1071 }
ca1d1d23
JB
1072 /* The general approach is that we are going to maintain that we know */
1073 /* the first (closest to the present position, in whatever direction */
1074 /* we're searching) character that could possibly be the last */
1075 /* (furthest from present position) character of a valid match. We */
1076 /* advance the state of our knowledge by looking at that character */
1077 /* and seeing whether it indeed matches the last character of the */
1078 /* pattern. If it does, we take a closer look. If it does not, we */
1079 /* move our pointer (to putative last characters) as far as is */
1080 /* logically possible. This amount of movement, which I call a */
1081 /* stride, will be the length of the pattern if the actual character */
1082 /* appears nowhere in the pattern, otherwise it will be the distance */
1083 /* from the last occurrence of that character to the end of the */
1084 /* pattern. */
1085 /* As a coding trick, an enormous stride is coded into the table for */
1086 /* characters that match the last character. This allows use of only */
1087 /* a single test, a test for having gone past the end of the */
1088 /* permissible match region, to test for both possible matches (when */
1089 /* the stride goes past the end immediately) and failure to */
1090 /* match (where you get nudged past the end one stride at a time). */
1091
1092 /* Here we make a "mickey mouse" BM table. The stride of the search */
1093 /* is determined only by the last character of the putative match. */
1094 /* If that character does not match, we will stride the proper */
1095 /* distance to propose a match that superimposes it on the last */
1096 /* instance of a character that matches it (per trt), or misses */
1097 /* it entirely if there is none. */
1098
1099 dirlen = len * direction;
1100 infinity = dirlen - (lim + pos + len + len) * direction;
1101 if (direction < 0)
1102 pat = (base_pat += len - 1);
1103 BM_tab_base = BM_tab;
1104 BM_tab += 0400;
1105 j = dirlen; /* to get it in a register */
1106 /* A character that does not appear in the pattern induces a */
1107 /* stride equal to the pattern length. */
1108 while (BM_tab_base != BM_tab)
1109 {
1110 *--BM_tab = j;
1111 *--BM_tab = j;
1112 *--BM_tab = j;
1113 *--BM_tab = j;
1114 }
1115 i = 0;
1116 while (i != infinity)
1117 {
1118 j = pat[i]; i += direction;
1119 if (i == dirlen) i = infinity;
8d505039 1120 if (trt != 0)
ca1d1d23
JB
1121 {
1122 k = (j = trt[j]);
1123 if (i == infinity)
1124 stride_for_teases = BM_tab[j];
1125 BM_tab[j] = dirlen - i;
1126 /* A translation table is accompanied by its inverse -- see */
1127 /* comment following downcase_table for details */
b1428bd8 1128 while ((j = (unsigned char) inverse_trt[j]) != k)
ca1d1d23
JB
1129 BM_tab[j] = dirlen - i;
1130 }
1131 else
1132 {
1133 if (i == infinity)
1134 stride_for_teases = BM_tab[j];
1135 BM_tab[j] = dirlen - i;
1136 }
1137 /* stride_for_teases tells how much to stride if we get a */
1138 /* match on the far character but are subsequently */
1139 /* disappointed, by recording what the stride would have been */
1140 /* for that character if the last character had been */
1141 /* different. */
1142 }
1143 infinity = dirlen - infinity;
1144 pos += dirlen - ((direction > 0) ? direction : 0);
1145 /* loop invariant - pos points at where last char (first char if reverse)
1146 of pattern would align in a possible match. */
1147 while (n != 0)
1148 {
b2c71fb4
KH
1149 /* It's been reported that some (broken) compiler thinks that
1150 Boolean expressions in an arithmetic context are unsigned.
1151 Using an explicit ?1:0 prevents this. */
1152 if ((lim - pos - ((direction > 0) ? 1 : 0)) * direction < 0)
ca1d1d23
JB
1153 return (n * (0 - direction));
1154 /* First we do the part we can by pointers (maybe nothing) */
1155 QUIT;
1156 pat = base_pat;
1157 limit = pos - dirlen + direction;
1158 limit = ((direction > 0)
1159 ? BUFFER_CEILING_OF (limit)
1160 : BUFFER_FLOOR_OF (limit));
1161 /* LIMIT is now the last (not beyond-last!) value
1162 POS can take on without hitting edge of buffer or the gap. */
1163 limit = ((direction > 0)
1164 ? min (lim - 1, min (limit, pos + 20000))
1165 : max (lim, max (limit, pos - 20000)));
1166 if ((limit - pos) * direction > 20)
1167 {
1168 p_limit = &FETCH_CHAR (limit);
1169 p2 = (cursor = &FETCH_CHAR (pos));
1170 /* In this loop, pos + cursor - p2 is the surrogate for pos */
1171 while (1) /* use one cursor setting as long as i can */
1172 {
1173 if (direction > 0) /* worth duplicating */
1174 {
1175 /* Use signed comparison if appropriate
1176 to make cursor+infinity sure to be > p_limit.
1177 Assuming that the buffer lies in a range of addresses
1178 that are all "positive" (as ints) or all "negative",
1179 either kind of comparison will work as long
1180 as we don't step by infinity. So pick the kind
1181 that works when we do step by infinity. */
8d505039 1182 if ((EMACS_INT) (p_limit + infinity) > (EMACS_INT) p_limit)
9fa17f93 1183 while ((EMACS_INT) cursor <= (EMACS_INT) p_limit)
ca1d1d23
JB
1184 cursor += BM_tab[*cursor];
1185 else
8d505039 1186 while ((unsigned EMACS_INT) cursor <= (unsigned EMACS_INT) p_limit)
ca1d1d23
JB
1187 cursor += BM_tab[*cursor];
1188 }
1189 else
1190 {
8d505039
RS
1191 if ((EMACS_INT) (p_limit + infinity) < (EMACS_INT) p_limit)
1192 while ((EMACS_INT) cursor >= (EMACS_INT) p_limit)
ca1d1d23
JB
1193 cursor += BM_tab[*cursor];
1194 else
8d505039 1195 while ((unsigned EMACS_INT) cursor >= (unsigned EMACS_INT) p_limit)
ca1d1d23
JB
1196 cursor += BM_tab[*cursor];
1197 }
1198/* If you are here, cursor is beyond the end of the searched region. */
1199 /* This can happen if you match on the far character of the pattern, */
1200 /* because the "stride" of that character is infinity, a number able */
1201 /* to throw you well beyond the end of the search. It can also */
1202 /* happen if you fail to match within the permitted region and would */
1203 /* otherwise try a character beyond that region */
1204 if ((cursor - p_limit) * direction <= len)
1205 break; /* a small overrun is genuine */
1206 cursor -= infinity; /* large overrun = hit */
1207 i = dirlen - direction;
8d505039 1208 if (trt != 0)
ca1d1d23
JB
1209 {
1210 while ((i -= direction) + direction != 0)
1211 if (pat[i] != trt[*(cursor -= direction)])
1212 break;
1213 }
1214 else
1215 {
1216 while ((i -= direction) + direction != 0)
1217 if (pat[i] != *(cursor -= direction))
1218 break;
1219 }
1220 cursor += dirlen - i - direction; /* fix cursor */
1221 if (i + direction == 0)
1222 {
1223 cursor -= direction;
1113d9db 1224
ca325161
RS
1225 set_search_regs (pos + cursor - p2 + ((direction > 0)
1226 ? 1 - len : 0),
1227 len);
1228
ca1d1d23
JB
1229 if ((n -= direction) != 0)
1230 cursor += dirlen; /* to resume search */
1231 else
1232 return ((direction > 0)
1233 ? search_regs.end[0] : search_regs.start[0]);
1234 }
1235 else
1236 cursor += stride_for_teases; /* <sigh> we lose - */
1237 }
1238 pos += cursor - p2;
1239 }
1240 else
1241 /* Now we'll pick up a clump that has to be done the hard */
1242 /* way because it covers a discontinuity */
1243 {
1244 limit = ((direction > 0)
1245 ? BUFFER_CEILING_OF (pos - dirlen + 1)
1246 : BUFFER_FLOOR_OF (pos - dirlen - 1));
1247 limit = ((direction > 0)
1248 ? min (limit + len, lim - 1)
1249 : max (limit - len, lim));
1250 /* LIMIT is now the last value POS can have
1251 and still be valid for a possible match. */
1252 while (1)
1253 {
1254 /* This loop can be coded for space rather than */
1255 /* speed because it will usually run only once. */
1256 /* (the reach is at most len + 21, and typically */
1257 /* does not exceed len) */
1258 while ((limit - pos) * direction >= 0)
1259 pos += BM_tab[FETCH_CHAR(pos)];
1260 /* now run the same tests to distinguish going off the */
eb8c3be9 1261 /* end, a match or a phony match. */
ca1d1d23
JB
1262 if ((pos - limit) * direction <= len)
1263 break; /* ran off the end */
1264 /* Found what might be a match.
1265 Set POS back to last (first if reverse) char pos. */
1266 pos -= infinity;
1267 i = dirlen - direction;
1268 while ((i -= direction) + direction != 0)
1269 {
1270 pos -= direction;
8d505039 1271 if (pat[i] != (trt != 0
ca1d1d23
JB
1272 ? trt[FETCH_CHAR(pos)]
1273 : FETCH_CHAR (pos)))
1274 break;
1275 }
1276 /* Above loop has moved POS part or all the way
1277 back to the first char pos (last char pos if reverse).
1278 Set it once again at the last (first if reverse) char. */
1279 pos += dirlen - i- direction;
1280 if (i + direction == 0)
1281 {
1282 pos -= direction;
1113d9db 1283
ca325161
RS
1284 set_search_regs (pos + ((direction > 0) ? 1 - len : 0),
1285 len);
1286
ca1d1d23
JB
1287 if ((n -= direction) != 0)
1288 pos += dirlen; /* to resume search */
1289 else
1290 return ((direction > 0)
1291 ? search_regs.end[0] : search_regs.start[0]);
1292 }
1293 else
1294 pos += stride_for_teases;
1295 }
1296 }
1297 /* We have done one clump. Can we continue? */
1298 if ((lim - pos) * direction < 0)
1299 return ((0 - n) * direction);
1300 }
1301 return pos;
1302 }
1303}
ca325161
RS
1304
1305/* Record beginning BEG and end BEG + LEN
1306 for a match just found in the current buffer. */
1307
1308static void
1309set_search_regs (beg, len)
1310 int beg, len;
1311{
1312 /* Make sure we have registers in which to store
1313 the match position. */
1314 if (search_regs.num_regs == 0)
1315 {
2d4a771a
RS
1316 search_regs.start = (regoff_t *) xmalloc (2 * sizeof (regoff_t));
1317 search_regs.end = (regoff_t *) xmalloc (2 * sizeof (regoff_t));
487282dc 1318 search_regs.num_regs = 2;
ca325161
RS
1319 }
1320
1321 search_regs.start[0] = beg;
1322 search_regs.end[0] = beg + len;
a3668d92 1323 XSETBUFFER (last_thing_searched, current_buffer);
ca325161 1324}
ca1d1d23
JB
1325\f
1326/* Given a string of words separated by word delimiters,
1327 compute a regexp that matches those exact words
1328 separated by arbitrary punctuation. */
1329
1330static Lisp_Object
1331wordify (string)
1332 Lisp_Object string;
1333{
1334 register unsigned char *p, *o;
1335 register int i, len, punct_count = 0, word_count = 0;
1336 Lisp_Object val;
1337
1338 CHECK_STRING (string, 0);
1339 p = XSTRING (string)->data;
1340 len = XSTRING (string)->size;
1341
1342 for (i = 0; i < len; i++)
1343 if (SYNTAX (p[i]) != Sword)
1344 {
1345 punct_count++;
1346 if (i > 0 && SYNTAX (p[i-1]) == Sword) word_count++;
1347 }
1348 if (SYNTAX (p[len-1]) == Sword) word_count++;
1349 if (!word_count) return build_string ("");
1350
1351 val = make_string (p, len - punct_count + 5 * (word_count - 1) + 4);
1352
1353 o = XSTRING (val)->data;
1354 *o++ = '\\';
1355 *o++ = 'b';
1356
1357 for (i = 0; i < len; i++)
1358 if (SYNTAX (p[i]) == Sword)
1359 *o++ = p[i];
1360 else if (i > 0 && SYNTAX (p[i-1]) == Sword && --word_count)
1361 {
1362 *o++ = '\\';
1363 *o++ = 'W';
1364 *o++ = '\\';
1365 *o++ = 'W';
1366 *o++ = '*';
1367 }
1368
1369 *o++ = '\\';
1370 *o++ = 'b';
1371
1372 return val;
1373}
1374\f
1375DEFUN ("search-backward", Fsearch_backward, Ssearch_backward, 1, 4,
1376 "sSearch backward: ",
1377 "Search backward from point for STRING.\n\
1378Set point to the beginning of the occurrence found, and return point.\n\
1379An optional second argument bounds the search; it is a buffer position.\n\
1380The match found must not extend before that position.\n\
1381Optional third argument, if t, means if fail just return nil (no error).\n\
1382 If not nil and not t, position at limit of search and return nil.\n\
1383Optional fourth argument is repeat count--search for successive occurrences.\n\
1384See also the functions `match-beginning', `match-end' and `replace-match'.")
1385 (string, bound, noerror, count)
1386 Lisp_Object string, bound, noerror, count;
1387{
b819a390 1388 return search_command (string, bound, noerror, count, -1, 0, 0);
ca1d1d23
JB
1389}
1390
1391DEFUN ("search-forward", Fsearch_forward, Ssearch_forward, 1, 4, "sSearch: ",
1392 "Search forward from point for STRING.\n\
1393Set point to the end of the occurrence found, and return point.\n\
1394An optional second argument bounds the search; it is a buffer position.\n\
1395The match found must not extend after that position. nil is equivalent\n\
1396 to (point-max).\n\
1397Optional third argument, if t, means if fail just return nil (no error).\n\
1398 If not nil and not t, move to limit of search and return nil.\n\
1399Optional fourth argument is repeat count--search for successive occurrences.\n\
1400See also the functions `match-beginning', `match-end' and `replace-match'.")
1401 (string, bound, noerror, count)
1402 Lisp_Object string, bound, noerror, count;
1403{
b819a390 1404 return search_command (string, bound, noerror, count, 1, 0, 0);
ca1d1d23
JB
1405}
1406
1407DEFUN ("word-search-backward", Fword_search_backward, Sword_search_backward, 1, 4,
1408 "sWord search backward: ",
1409 "Search backward from point for STRING, ignoring differences in punctuation.\n\
1410Set point to the beginning of the occurrence found, and return point.\n\
1411An optional second argument bounds the search; it is a buffer position.\n\
1412The match found must not extend before that position.\n\
1413Optional third argument, if t, means if fail just return nil (no error).\n\
1414 If not nil and not t, move to limit of search and return nil.\n\
1415Optional fourth argument is repeat count--search for successive occurrences.")
1416 (string, bound, noerror, count)
1417 Lisp_Object string, bound, noerror, count;
1418{
b819a390 1419 return search_command (wordify (string), bound, noerror, count, -1, 1, 0);
ca1d1d23
JB
1420}
1421
1422DEFUN ("word-search-forward", Fword_search_forward, Sword_search_forward, 1, 4,
1423 "sWord search: ",
1424 "Search forward from point for STRING, ignoring differences in punctuation.\n\
1425Set point to the end of the occurrence found, and return point.\n\
1426An optional second argument bounds the search; it is a buffer position.\n\
1427The match found must not extend after that position.\n\
1428Optional third argument, if t, means if fail just return nil (no error).\n\
1429 If not nil and not t, move to limit of search and return nil.\n\
1430Optional fourth argument is repeat count--search for successive occurrences.")
1431 (string, bound, noerror, count)
1432 Lisp_Object string, bound, noerror, count;
1433{
b819a390 1434 return search_command (wordify (string), bound, noerror, count, 1, 1, 0);
ca1d1d23
JB
1435}
1436
1437DEFUN ("re-search-backward", Fre_search_backward, Sre_search_backward, 1, 4,
1438 "sRE search backward: ",
1439 "Search backward from point for match for regular expression REGEXP.\n\
1440Set point to the beginning of the match, and return point.\n\
1441The match found is the one starting last in the buffer\n\
19c0a730 1442and yet ending before the origin of the search.\n\
ca1d1d23
JB
1443An optional second argument bounds the search; it is a buffer position.\n\
1444The match found must start at or after that position.\n\
1445Optional third argument, if t, means if fail just return nil (no error).\n\
1446 If not nil and not t, move to limit of search and return nil.\n\
1447Optional fourth argument is repeat count--search for successive occurrences.\n\
1448See also the functions `match-beginning', `match-end' and `replace-match'.")
19c0a730
KH
1449 (regexp, bound, noerror, count)
1450 Lisp_Object regexp, bound, noerror, count;
ca1d1d23 1451{
b819a390 1452 return search_command (regexp, bound, noerror, count, -1, 1, 0);
ca1d1d23
JB
1453}
1454
1455DEFUN ("re-search-forward", Fre_search_forward, Sre_search_forward, 1, 4,
1456 "sRE search: ",
1457 "Search forward from point for regular expression REGEXP.\n\
1458Set point to the end of the occurrence found, and return point.\n\
1459An optional second argument bounds the search; it is a buffer position.\n\
1460The match found must not extend after that position.\n\
1461Optional third argument, if t, means if fail just return nil (no error).\n\
1462 If not nil and not t, move to limit of search and return nil.\n\
1463Optional fourth argument is repeat count--search for successive occurrences.\n\
1464See also the functions `match-beginning', `match-end' and `replace-match'.")
19c0a730
KH
1465 (regexp, bound, noerror, count)
1466 Lisp_Object regexp, bound, noerror, count;
ca1d1d23 1467{
b819a390
RS
1468 return search_command (regexp, bound, noerror, count, 1, 1, 0);
1469}
1470
1471DEFUN ("posix-search-backward", Fposix_search_backward, Sposix_search_backward, 1, 4,
1472 "sPosix search backward: ",
1473 "Search backward from point for match for regular expression REGEXP.\n\
1474Find the longest match in accord with Posix regular expression rules.\n\
1475Set point to the beginning of the match, and return point.\n\
1476The match found is the one starting last in the buffer\n\
1477and yet ending before the origin of the search.\n\
1478An optional second argument bounds the search; it is a buffer position.\n\
1479The match found must start at or after that position.\n\
1480Optional third argument, if t, means if fail just return nil (no error).\n\
1481 If not nil and not t, move to limit of search and return nil.\n\
1482Optional fourth argument is repeat count--search for successive occurrences.\n\
1483See also the functions `match-beginning', `match-end' and `replace-match'.")
1484 (regexp, bound, noerror, count)
1485 Lisp_Object regexp, bound, noerror, count;
1486{
1487 return search_command (regexp, bound, noerror, count, -1, 1, 1);
1488}
1489
1490DEFUN ("posix-search-forward", Fposix_search_forward, Sposix_search_forward, 1, 4,
1491 "sPosix search: ",
1492 "Search forward from point for regular expression REGEXP.\n\
1493Find the longest match in accord with Posix regular expression rules.\n\
1494Set point to the end of the occurrence found, and return point.\n\
1495An optional second argument bounds the search; it is a buffer position.\n\
1496The match found must not extend after that position.\n\
1497Optional third argument, if t, means if fail just return nil (no error).\n\
1498 If not nil and not t, move to limit of search and return nil.\n\
1499Optional fourth argument is repeat count--search for successive occurrences.\n\
1500See also the functions `match-beginning', `match-end' and `replace-match'.")
1501 (regexp, bound, noerror, count)
1502 Lisp_Object regexp, bound, noerror, count;
1503{
1504 return search_command (regexp, bound, noerror, count, 1, 1, 1);
ca1d1d23
JB
1505}
1506\f
d7a5ad5f 1507DEFUN ("replace-match", Freplace_match, Sreplace_match, 1, 5, 0,
ca1d1d23
JB
1508 "Replace text matched by last search with NEWTEXT.\n\
1509If second arg FIXEDCASE is non-nil, do not alter case of replacement text.\n\
5b9cf4b2
RS
1510Otherwise maybe capitalize the whole text, or maybe just word initials,\n\
1511based on the replaced text.\n\
1512If the replaced text has only capital letters\n\
1513and has at least one multiletter word, convert NEWTEXT to all caps.\n\
1514If the replaced text has at least one word starting with a capital letter,\n\
1515then capitalize each word in NEWTEXT.\n\n\
ca1d1d23
JB
1516If third arg LITERAL is non-nil, insert NEWTEXT literally.\n\
1517Otherwise treat `\\' as special:\n\
1518 `\\&' in NEWTEXT means substitute original matched text.\n\
1519 `\\N' means substitute what matched the Nth `\\(...\\)'.\n\
1520 If Nth parens didn't match, substitute nothing.\n\
1521 `\\\\' means insert one `\\'.\n\
1113d9db 1522FIXEDCASE and LITERAL are optional arguments.\n\
080c45fd
RS
1523Leaves point at end of replacement text.\n\
1524\n\
1525The optional fourth argument STRING can be a string to modify.\n\
1526In that case, this function creates and returns a new string\n\
d7a5ad5f
RS
1527which is made by replacing the part of STRING that was matched.\n\
1528\n\
1529The optional fifth argument SUBEXP specifies a subexpression of the match.\n\
1530It says to replace just that subexpression instead of the whole match.\n\
1531This is useful only after a regular expression search or match\n\
1532since only regular expressions have distinguished subexpressions.")
1533 (newtext, fixedcase, literal, string, subexp)
1534 Lisp_Object newtext, fixedcase, literal, string, subexp;
ca1d1d23
JB
1535{
1536 enum { nochange, all_caps, cap_initial } case_action;
1537 register int pos, last;
1538 int some_multiletter_word;
97832bd0 1539 int some_lowercase;
73dc8771 1540 int some_uppercase;
208767c3 1541 int some_nonuppercase_initial;
ca1d1d23
JB
1542 register int c, prevc;
1543 int inslen;
d7a5ad5f 1544 int sub;
ca1d1d23 1545
16fdc568 1546 CHECK_STRING (newtext, 0);
ca1d1d23 1547
080c45fd
RS
1548 if (! NILP (string))
1549 CHECK_STRING (string, 4);
1550
ca1d1d23
JB
1551 case_action = nochange; /* We tried an initialization */
1552 /* but some C compilers blew it */
4746118a
JB
1553
1554 if (search_regs.num_regs <= 0)
1555 error ("replace-match called before any match found");
1556
d7a5ad5f
RS
1557 if (NILP (subexp))
1558 sub = 0;
1559 else
1560 {
1561 CHECK_NUMBER (subexp, 3);
1562 sub = XINT (subexp);
1563 if (sub < 0 || sub >= search_regs.num_regs)
1564 args_out_of_range (subexp, make_number (search_regs.num_regs));
1565 }
1566
080c45fd
RS
1567 if (NILP (string))
1568 {
d7a5ad5f
RS
1569 if (search_regs.start[sub] < BEGV
1570 || search_regs.start[sub] > search_regs.end[sub]
1571 || search_regs.end[sub] > ZV)
1572 args_out_of_range (make_number (search_regs.start[sub]),
1573 make_number (search_regs.end[sub]));
080c45fd
RS
1574 }
1575 else
1576 {
d7a5ad5f
RS
1577 if (search_regs.start[sub] < 0
1578 || search_regs.start[sub] > search_regs.end[sub]
1579 || search_regs.end[sub] > XSTRING (string)->size)
1580 args_out_of_range (make_number (search_regs.start[sub]),
1581 make_number (search_regs.end[sub]));
080c45fd 1582 }
ca1d1d23
JB
1583
1584 if (NILP (fixedcase))
1585 {
1586 /* Decide how to casify by examining the matched text. */
1587
d7a5ad5f 1588 last = search_regs.end[sub];
ca1d1d23
JB
1589 prevc = '\n';
1590 case_action = all_caps;
1591
1592 /* some_multiletter_word is set nonzero if any original word
1593 is more than one letter long. */
1594 some_multiletter_word = 0;
97832bd0 1595 some_lowercase = 0;
208767c3 1596 some_nonuppercase_initial = 0;
73dc8771 1597 some_uppercase = 0;
ca1d1d23 1598
d7a5ad5f 1599 for (pos = search_regs.start[sub]; pos < last; pos++)
ca1d1d23 1600 {
080c45fd
RS
1601 if (NILP (string))
1602 c = FETCH_CHAR (pos);
1603 else
1604 c = XSTRING (string)->data[pos];
1605
ca1d1d23
JB
1606 if (LOWERCASEP (c))
1607 {
1608 /* Cannot be all caps if any original char is lower case */
1609
97832bd0 1610 some_lowercase = 1;
ca1d1d23 1611 if (SYNTAX (prevc) != Sword)
208767c3 1612 some_nonuppercase_initial = 1;
ca1d1d23
JB
1613 else
1614 some_multiletter_word = 1;
1615 }
1616 else if (!NOCASEP (c))
1617 {
73dc8771 1618 some_uppercase = 1;
97832bd0 1619 if (SYNTAX (prevc) != Sword)
c4d460ce 1620 ;
97832bd0 1621 else
ca1d1d23
JB
1622 some_multiletter_word = 1;
1623 }
208767c3
RS
1624 else
1625 {
1626 /* If the initial is a caseless word constituent,
1627 treat that like a lowercase initial. */
1628 if (SYNTAX (prevc) != Sword)
1629 some_nonuppercase_initial = 1;
1630 }
ca1d1d23
JB
1631
1632 prevc = c;
1633 }
1634
97832bd0
RS
1635 /* Convert to all caps if the old text is all caps
1636 and has at least one multiletter word. */
1637 if (! some_lowercase && some_multiletter_word)
1638 case_action = all_caps;
c4d460ce 1639 /* Capitalize each word, if the old text has all capitalized words. */
208767c3 1640 else if (!some_nonuppercase_initial && some_multiletter_word)
ca1d1d23 1641 case_action = cap_initial;
208767c3 1642 else if (!some_nonuppercase_initial && some_uppercase)
73dc8771
KH
1643 /* Should x -> yz, operating on X, give Yz or YZ?
1644 We'll assume the latter. */
1645 case_action = all_caps;
97832bd0
RS
1646 else
1647 case_action = nochange;
ca1d1d23
JB
1648 }
1649
080c45fd
RS
1650 /* Do replacement in a string. */
1651 if (!NILP (string))
1652 {
1653 Lisp_Object before, after;
1654
1655 before = Fsubstring (string, make_number (0),
d7a5ad5f
RS
1656 make_number (search_regs.start[sub]));
1657 after = Fsubstring (string, make_number (search_regs.end[sub]), Qnil);
080c45fd
RS
1658
1659 /* Do case substitution into NEWTEXT if desired. */
1660 if (NILP (literal))
1661 {
1662 int lastpos = -1;
1663 /* We build up the substituted string in ACCUM. */
1664 Lisp_Object accum;
1665 Lisp_Object middle;
1666
1667 accum = Qnil;
1668
1669 for (pos = 0; pos < XSTRING (newtext)->size; pos++)
1670 {
1671 int substart = -1;
1672 int subend;
1e79ec24 1673 int delbackslash = 0;
080c45fd
RS
1674
1675 c = XSTRING (newtext)->data[pos];
1676 if (c == '\\')
1677 {
1678 c = XSTRING (newtext)->data[++pos];
1679 if (c == '&')
1680 {
d7a5ad5f
RS
1681 substart = search_regs.start[sub];
1682 subend = search_regs.end[sub];
080c45fd
RS
1683 }
1684 else if (c >= '1' && c <= '9' && c <= search_regs.num_regs + '0')
1685 {
ad10348f 1686 if (search_regs.start[c - '0'] >= 0)
080c45fd
RS
1687 {
1688 substart = search_regs.start[c - '0'];
1689 subend = search_regs.end[c - '0'];
1690 }
1691 }
1e79ec24
KH
1692 else if (c == '\\')
1693 delbackslash = 1;
080c45fd
RS
1694 }
1695 if (substart >= 0)
1696 {
1697 if (pos - 1 != lastpos + 1)
1e79ec24
KH
1698 middle = Fsubstring (newtext,
1699 make_number (lastpos + 1),
1700 make_number (pos - 1));
080c45fd
RS
1701 else
1702 middle = Qnil;
1703 accum = concat3 (accum, middle,
1704 Fsubstring (string, make_number (substart),
1705 make_number (subend)));
1706 lastpos = pos;
1707 }
1e79ec24
KH
1708 else if (delbackslash)
1709 {
1710 middle = Fsubstring (newtext, make_number (lastpos + 1),
1711 make_number (pos));
1712 accum = concat2 (accum, middle);
1713 lastpos = pos;
1714 }
080c45fd
RS
1715 }
1716
1717 if (pos != lastpos + 1)
1e79ec24
KH
1718 middle = Fsubstring (newtext, make_number (lastpos + 1),
1719 make_number (pos));
080c45fd
RS
1720 else
1721 middle = Qnil;
1722
1723 newtext = concat2 (accum, middle);
1724 }
1725
1726 if (case_action == all_caps)
1727 newtext = Fupcase (newtext);
1728 else if (case_action == cap_initial)
2b2eead9 1729 newtext = Fupcase_initials (newtext);
080c45fd
RS
1730
1731 return concat3 (before, newtext, after);
1732 }
1733
9a76659d
JB
1734 /* We insert the replacement text before the old text, and then
1735 delete the original text. This means that markers at the
1736 beginning or end of the original will float to the corresponding
1737 position in the replacement. */
d7a5ad5f 1738 SET_PT (search_regs.start[sub]);
ca1d1d23 1739 if (!NILP (literal))
16fdc568 1740 Finsert_and_inherit (1, &newtext);
ca1d1d23
JB
1741 else
1742 {
1743 struct gcpro gcpro1;
16fdc568 1744 GCPRO1 (newtext);
ca1d1d23 1745
16fdc568 1746 for (pos = 0; pos < XSTRING (newtext)->size; pos++)
ca1d1d23 1747 {
d7a5ad5f 1748 int offset = point - search_regs.start[sub];
9a76659d 1749
16fdc568 1750 c = XSTRING (newtext)->data[pos];
ca1d1d23
JB
1751 if (c == '\\')
1752 {
16fdc568 1753 c = XSTRING (newtext)->data[++pos];
ca1d1d23 1754 if (c == '&')
9a76659d
JB
1755 Finsert_buffer_substring
1756 (Fcurrent_buffer (),
d7a5ad5f
RS
1757 make_number (search_regs.start[sub] + offset),
1758 make_number (search_regs.end[sub] + offset));
78445046 1759 else if (c >= '1' && c <= '9' && c <= search_regs.num_regs + '0')
ca1d1d23
JB
1760 {
1761 if (search_regs.start[c - '0'] >= 1)
9a76659d
JB
1762 Finsert_buffer_substring
1763 (Fcurrent_buffer (),
1764 make_number (search_regs.start[c - '0'] + offset),
1765 make_number (search_regs.end[c - '0'] + offset));
ca1d1d23
JB
1766 }
1767 else
1768 insert_char (c);
1769 }
1770 else
1771 insert_char (c);
1772 }
1773 UNGCPRO;
1774 }
1775
d7a5ad5f
RS
1776 inslen = point - (search_regs.start[sub]);
1777 del_range (search_regs.start[sub] + inslen, search_regs.end[sub] + inslen);
ca1d1d23
JB
1778
1779 if (case_action == all_caps)
1780 Fupcase_region (make_number (point - inslen), make_number (point));
1781 else if (case_action == cap_initial)
2b2eead9 1782 Fupcase_initials_region (make_number (point - inslen), make_number (point));
ca1d1d23
JB
1783 return Qnil;
1784}
1785\f
1786static Lisp_Object
1787match_limit (num, beginningp)
1788 Lisp_Object num;
1789 int beginningp;
1790{
1791 register int n;
1792
1793 CHECK_NUMBER (num, 0);
1794 n = XINT (num);
4746118a
JB
1795 if (n < 0 || n >= search_regs.num_regs)
1796 args_out_of_range (num, make_number (search_regs.num_regs));
1797 if (search_regs.num_regs <= 0
1798 || search_regs.start[n] < 0)
ca1d1d23
JB
1799 return Qnil;
1800 return (make_number ((beginningp) ? search_regs.start[n]
1801 : search_regs.end[n]));
1802}
1803
1804DEFUN ("match-beginning", Fmatch_beginning, Smatch_beginning, 1, 1, 0,
1805 "Return position of start of text matched by last search.\n\
5806161b
EN
1806SUBEXP, a number, specifies which parenthesized expression in the last\n\
1807 regexp.\n\
1808Value is nil if SUBEXPth pair didn't match, or there were less than\n\
1809 SUBEXP pairs.\n\
ca1d1d23 1810Zero means the entire text matched by the whole regexp or whole string.")
5806161b
EN
1811 (subexp)
1812 Lisp_Object subexp;
ca1d1d23 1813{
5806161b 1814 return match_limit (subexp, 1);
ca1d1d23
JB
1815}
1816
1817DEFUN ("match-end", Fmatch_end, Smatch_end, 1, 1, 0,
1818 "Return position of end of text matched by last search.\n\
5806161b
EN
1819SUBEXP, a number, specifies which parenthesized expression in the last\n\
1820 regexp.\n\
1821Value is nil if SUBEXPth pair didn't match, or there were less than\n\
1822 SUBEXP pairs.\n\
ca1d1d23 1823Zero means the entire text matched by the whole regexp or whole string.")
5806161b
EN
1824 (subexp)
1825 Lisp_Object subexp;
ca1d1d23 1826{
5806161b 1827 return match_limit (subexp, 0);
ca1d1d23
JB
1828}
1829
1830DEFUN ("match-data", Fmatch_data, Smatch_data, 0, 0, 0,
1831 "Return a list containing all info on what the last search matched.\n\
1832Element 2N is `(match-beginning N)'; element 2N + 1 is `(match-end N)'.\n\
1833All the elements are markers or nil (nil if the Nth pair didn't match)\n\
1834if the last match was on a buffer; integers or nil if a string was matched.\n\
1835Use `store-match-data' to reinstate the data in this list.")
1836 ()
1837{
4746118a 1838 Lisp_Object *data;
ca1d1d23
JB
1839 int i, len;
1840
daa37602
JB
1841 if (NILP (last_thing_searched))
1842 error ("match-data called before any match found");
1843
4746118a
JB
1844 data = (Lisp_Object *) alloca ((2 * search_regs.num_regs)
1845 * sizeof (Lisp_Object));
1846
ca1d1d23 1847 len = -1;
4746118a 1848 for (i = 0; i < search_regs.num_regs; i++)
ca1d1d23
JB
1849 {
1850 int start = search_regs.start[i];
1851 if (start >= 0)
1852 {
daa37602 1853 if (EQ (last_thing_searched, Qt))
ca1d1d23 1854 {
c235cce7
KH
1855 XSETFASTINT (data[2 * i], start);
1856 XSETFASTINT (data[2 * i + 1], search_regs.end[i]);
ca1d1d23 1857 }
0ed62dc7 1858 else if (BUFFERP (last_thing_searched))
ca1d1d23
JB
1859 {
1860 data[2 * i] = Fmake_marker ();
daa37602
JB
1861 Fset_marker (data[2 * i],
1862 make_number (start),
1863 last_thing_searched);
ca1d1d23
JB
1864 data[2 * i + 1] = Fmake_marker ();
1865 Fset_marker (data[2 * i + 1],
daa37602
JB
1866 make_number (search_regs.end[i]),
1867 last_thing_searched);
ca1d1d23 1868 }
daa37602
JB
1869 else
1870 /* last_thing_searched must always be Qt, a buffer, or Qnil. */
1871 abort ();
1872
ca1d1d23
JB
1873 len = i;
1874 }
1875 else
1876 data[2 * i] = data [2 * i + 1] = Qnil;
1877 }
1878 return Flist (2 * len + 2, data);
1879}
1880
1881
1882DEFUN ("store-match-data", Fstore_match_data, Sstore_match_data, 1, 1, 0,
1883 "Set internal data on last search match from elements of LIST.\n\
1884LIST should have been created by calling `match-data' previously.")
1885 (list)
1886 register Lisp_Object list;
1887{
1888 register int i;
1889 register Lisp_Object marker;
1890
7074fde6
FP
1891 if (running_asynch_code)
1892 save_search_regs ();
1893
ca1d1d23 1894 if (!CONSP (list) && !NILP (list))
b37902c8 1895 list = wrong_type_argument (Qconsp, list);
ca1d1d23 1896
daa37602
JB
1897 /* Unless we find a marker with a buffer in LIST, assume that this
1898 match data came from a string. */
1899 last_thing_searched = Qt;
1900
4746118a
JB
1901 /* Allocate registers if they don't already exist. */
1902 {
d084e942 1903 int length = XFASTINT (Flength (list)) / 2;
4746118a
JB
1904
1905 if (length > search_regs.num_regs)
1906 {
1113d9db
JB
1907 if (search_regs.num_regs == 0)
1908 {
1909 search_regs.start
1910 = (regoff_t *) xmalloc (length * sizeof (regoff_t));
1911 search_regs.end
1912 = (regoff_t *) xmalloc (length * sizeof (regoff_t));
1913 }
4746118a 1914 else
1113d9db
JB
1915 {
1916 search_regs.start
1917 = (regoff_t *) xrealloc (search_regs.start,
1918 length * sizeof (regoff_t));
1919 search_regs.end
1920 = (regoff_t *) xrealloc (search_regs.end,
1921 length * sizeof (regoff_t));
1922 }
4746118a 1923
487282dc 1924 search_regs.num_regs = length;
4746118a
JB
1925 }
1926 }
1927
1928 for (i = 0; i < search_regs.num_regs; i++)
ca1d1d23
JB
1929 {
1930 marker = Fcar (list);
1931 if (NILP (marker))
1932 {
1933 search_regs.start[i] = -1;
1934 list = Fcdr (list);
1935 }
1936 else
1937 {
0ed62dc7 1938 if (MARKERP (marker))
daa37602
JB
1939 {
1940 if (XMARKER (marker)->buffer == 0)
c235cce7 1941 XSETFASTINT (marker, 0);
daa37602 1942 else
a3668d92 1943 XSETBUFFER (last_thing_searched, XMARKER (marker)->buffer);
daa37602 1944 }
ca1d1d23
JB
1945
1946 CHECK_NUMBER_COERCE_MARKER (marker, 0);
1947 search_regs.start[i] = XINT (marker);
1948 list = Fcdr (list);
1949
1950 marker = Fcar (list);
0ed62dc7 1951 if (MARKERP (marker) && XMARKER (marker)->buffer == 0)
c235cce7 1952 XSETFASTINT (marker, 0);
ca1d1d23
JB
1953
1954 CHECK_NUMBER_COERCE_MARKER (marker, 0);
1955 search_regs.end[i] = XINT (marker);
1956 }
1957 list = Fcdr (list);
1958 }
1959
1960 return Qnil;
1961}
1962
7074fde6
FP
1963/* If non-zero the match data have been saved in saved_search_regs
1964 during the execution of a sentinel or filter. */
75ebf74b 1965static int search_regs_saved;
7074fde6
FP
1966static struct re_registers saved_search_regs;
1967
1968/* Called from Flooking_at, Fstring_match, search_buffer, Fstore_match_data
1969 if asynchronous code (filter or sentinel) is running. */
1970static void
1971save_search_regs ()
1972{
1973 if (!search_regs_saved)
1974 {
1975 saved_search_regs.num_regs = search_regs.num_regs;
1976 saved_search_regs.start = search_regs.start;
1977 saved_search_regs.end = search_regs.end;
1978 search_regs.num_regs = 0;
2d4a771a
RS
1979 search_regs.start = 0;
1980 search_regs.end = 0;
7074fde6
FP
1981
1982 search_regs_saved = 1;
1983 }
1984}
1985
1986/* Called upon exit from filters and sentinels. */
1987void
1988restore_match_data ()
1989{
1990 if (search_regs_saved)
1991 {
1992 if (search_regs.num_regs > 0)
1993 {
1994 xfree (search_regs.start);
1995 xfree (search_regs.end);
1996 }
1997 search_regs.num_regs = saved_search_regs.num_regs;
1998 search_regs.start = saved_search_regs.start;
1999 search_regs.end = saved_search_regs.end;
2000
2001 search_regs_saved = 0;
2002 }
2003}
2004
ca1d1d23
JB
2005/* Quote a string to inactivate reg-expr chars */
2006
2007DEFUN ("regexp-quote", Fregexp_quote, Sregexp_quote, 1, 1, 0,
2008 "Return a regexp string which matches exactly STRING and nothing else.")
5806161b
EN
2009 (string)
2010 Lisp_Object string;
ca1d1d23
JB
2011{
2012 register unsigned char *in, *out, *end;
2013 register unsigned char *temp;
2014
5806161b 2015 CHECK_STRING (string, 0);
ca1d1d23 2016
5806161b 2017 temp = (unsigned char *) alloca (XSTRING (string)->size * 2);
ca1d1d23
JB
2018
2019 /* Now copy the data into the new string, inserting escapes. */
2020
5806161b
EN
2021 in = XSTRING (string)->data;
2022 end = in + XSTRING (string)->size;
ca1d1d23
JB
2023 out = temp;
2024
2025 for (; in != end; in++)
2026 {
2027 if (*in == '[' || *in == ']'
2028 || *in == '*' || *in == '.' || *in == '\\'
2029 || *in == '?' || *in == '+'
2030 || *in == '^' || *in == '$')
2031 *out++ = '\\';
2032 *out++ = *in;
2033 }
2034
2035 return make_string (temp, out - temp);
2036}
2037\f
2038syms_of_search ()
2039{
2040 register int i;
2041
487282dc
KH
2042 for (i = 0; i < REGEXP_CACHE_SIZE; ++i)
2043 {
2044 searchbufs[i].buf.allocated = 100;
2045 searchbufs[i].buf.buffer = (unsigned char *) malloc (100);
2046 searchbufs[i].buf.fastmap = searchbufs[i].fastmap;
2047 searchbufs[i].regexp = Qnil;
2048 staticpro (&searchbufs[i].regexp);
2049 searchbufs[i].next = (i == REGEXP_CACHE_SIZE-1 ? 0 : &searchbufs[i+1]);
2050 }
2051 searchbuf_head = &searchbufs[0];
ca1d1d23
JB
2052
2053 Qsearch_failed = intern ("search-failed");
2054 staticpro (&Qsearch_failed);
2055 Qinvalid_regexp = intern ("invalid-regexp");
2056 staticpro (&Qinvalid_regexp);
2057
2058 Fput (Qsearch_failed, Qerror_conditions,
2059 Fcons (Qsearch_failed, Fcons (Qerror, Qnil)));
2060 Fput (Qsearch_failed, Qerror_message,
2061 build_string ("Search failed"));
2062
2063 Fput (Qinvalid_regexp, Qerror_conditions,
2064 Fcons (Qinvalid_regexp, Fcons (Qerror, Qnil)));
2065 Fput (Qinvalid_regexp, Qerror_message,
2066 build_string ("Invalid regexp"));
2067
daa37602
JB
2068 last_thing_searched = Qnil;
2069 staticpro (&last_thing_searched);
2070
ca1d1d23 2071 defsubr (&Slooking_at);
b819a390
RS
2072 defsubr (&Sposix_looking_at);
2073 defsubr (&Sstring_match);
2074 defsubr (&Sposix_string_match);
ca1d1d23
JB
2075 defsubr (&Sskip_chars_forward);
2076 defsubr (&Sskip_chars_backward);
17431c60
RS
2077 defsubr (&Sskip_syntax_forward);
2078 defsubr (&Sskip_syntax_backward);
ca1d1d23
JB
2079 defsubr (&Ssearch_forward);
2080 defsubr (&Ssearch_backward);
2081 defsubr (&Sword_search_forward);
2082 defsubr (&Sword_search_backward);
2083 defsubr (&Sre_search_forward);
2084 defsubr (&Sre_search_backward);
b819a390
RS
2085 defsubr (&Sposix_search_forward);
2086 defsubr (&Sposix_search_backward);
ca1d1d23
JB
2087 defsubr (&Sreplace_match);
2088 defsubr (&Smatch_beginning);
2089 defsubr (&Smatch_end);
2090 defsubr (&Smatch_data);
2091 defsubr (&Sstore_match_data);
2092 defsubr (&Sregexp_quote);
2093}