Major cleanup of unclear or overly picturesque language.
[bpt/emacs.git] / src / search.c
CommitLineData
ca1d1d23 1/* String search routines for GNU Emacs.
3a22ee35 2 Copyright (C) 1985, 1986, 1987, 1993, 1994 Free Software Foundation, Inc.
ca1d1d23
JB
3
4This file is part of GNU Emacs.
5
6GNU Emacs is free software; you can redistribute it and/or modify
7it under the terms of the GNU General Public License as published by
7c938215 8the Free Software Foundation; either version 2, or (at your option)
ca1d1d23
JB
9any later version.
10
11GNU Emacs is distributed in the hope that it will be useful,
12but WITHOUT ANY WARRANTY; without even the implied warranty of
13MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14GNU General Public License for more details.
15
16You should have received a copy of the GNU General Public License
17along with GNU Emacs; see the file COPYING. If not, write to
3b7ad313
EN
18the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
19Boston, MA 02111-1307, USA. */
ca1d1d23
JB
20
21
18160b98 22#include <config.h>
ca1d1d23
JB
23#include "lisp.h"
24#include "syntax.h"
25#include "buffer.h"
9169c321 26#include "region-cache.h"
ca1d1d23 27#include "commands.h"
9ac0d9e0 28#include "blockinput.h"
4746118a 29
ca1d1d23
JB
30#include <sys/types.h>
31#include "regex.h"
32
1d288aef 33#define REGEXP_CACHE_SIZE 20
ca1d1d23 34
487282dc
KH
35/* If the regexp is non-nil, then the buffer contains the compiled form
36 of that regexp, suitable for searching. */
1d288aef
RS
37struct regexp_cache
38{
487282dc
KH
39 struct regexp_cache *next;
40 Lisp_Object regexp;
41 struct re_pattern_buffer buf;
42 char fastmap[0400];
b819a390
RS
43 /* Nonzero means regexp was compiled to do full POSIX backtracking. */
44 char posix;
487282dc 45};
ca1d1d23 46
487282dc
KH
47/* The instances of that struct. */
48struct regexp_cache searchbufs[REGEXP_CACHE_SIZE];
ca1d1d23 49
487282dc
KH
50/* The head of the linked list; points to the most recently used buffer. */
51struct regexp_cache *searchbuf_head;
ca1d1d23 52
ca1d1d23 53
4746118a
JB
54/* Every call to re_match, etc., must pass &search_regs as the regs
55 argument unless you can show it is unnecessary (i.e., if re_match
56 is certainly going to be called again before region-around-match
57 can be called).
58
59 Since the registers are now dynamically allocated, we need to make
60 sure not to refer to the Nth register before checking that it has
1113d9db
JB
61 been allocated by checking search_regs.num_regs.
62
63 The regex code keeps track of whether it has allocated the search
487282dc
KH
64 buffer using bits in the re_pattern_buffer. This means that whenever
65 you compile a new pattern, it completely forgets whether it has
1113d9db
JB
66 allocated any registers, and will allocate new registers the next
67 time you call a searching or matching function. Therefore, we need
68 to call re_set_registers after compiling a new pattern or after
69 setting the match registers, so that the regex functions will be
70 able to free or re-allocate it properly. */
ca1d1d23
JB
71static struct re_registers search_regs;
72
daa37602
JB
73/* The buffer in which the last search was performed, or
74 Qt if the last search was done in a string;
75 Qnil if no searching has been done yet. */
76static Lisp_Object last_thing_searched;
ca1d1d23 77
8e6208c5 78/* error condition signaled when regexp compile_pattern fails */
ca1d1d23
JB
79
80Lisp_Object Qinvalid_regexp;
81
ca325161 82static void set_search_regs ();
044f81f1 83static void save_search_regs ();
ca325161 84
b819a390
RS
85static int search_buffer ();
86
ca1d1d23
JB
87static void
88matcher_overflow ()
89{
90 error ("Stack overflow in regexp matcher");
91}
92
93#ifdef __STDC__
94#define CONST const
95#else
96#define CONST
97#endif
98
b819a390
RS
99/* Compile a regexp and signal a Lisp error if anything goes wrong.
100 PATTERN is the pattern to compile.
101 CP is the place to put the result.
102 TRANSLATE is a translation table for ignoring case, or NULL for none.
103 REGP is the structure that says where to store the "register"
104 values that will result from matching this pattern.
105 If it is 0, we should compile the pattern not to record any
106 subexpression bounds.
107 POSIX is nonzero if we want full backtracking (POSIX style)
108 for this pattern. 0 means backtrack only enough to get a valid match. */
ca1d1d23 109
487282dc 110static void
b819a390 111compile_pattern_1 (cp, pattern, translate, regp, posix)
487282dc 112 struct regexp_cache *cp;
ca1d1d23 113 Lisp_Object pattern;
b1428bd8 114 Lisp_Object *translate;
487282dc 115 struct re_registers *regp;
b819a390 116 int posix;
ca1d1d23
JB
117{
118 CONST char *val;
b819a390 119 reg_syntax_t old;
ca1d1d23 120
487282dc
KH
121 cp->regexp = Qnil;
122 cp->buf.translate = translate;
b819a390 123 cp->posix = posix;
9ac0d9e0 124 BLOCK_INPUT;
b819a390
RS
125 old = re_set_syntax (RE_SYNTAX_EMACS
126 | (posix ? 0 : RE_NO_POSIX_BACKTRACKING));
b90d9e80 127 val = (CONST char *) re_compile_pattern ((char *) XSTRING (pattern)->data,
487282dc 128 XSTRING (pattern)->size, &cp->buf);
b819a390 129 re_set_syntax (old);
9ac0d9e0 130 UNBLOCK_INPUT;
ca1d1d23 131 if (val)
487282dc 132 Fsignal (Qinvalid_regexp, Fcons (build_string (val), Qnil));
1113d9db 133
487282dc 134 cp->regexp = Fcopy_sequence (pattern);
487282dc
KH
135}
136
137/* Compile a regexp if necessary, but first check to see if there's one in
b819a390
RS
138 the cache.
139 PATTERN is the pattern to compile.
140 TRANSLATE is a translation table for ignoring case, or NULL for none.
141 REGP is the structure that says where to store the "register"
142 values that will result from matching this pattern.
143 If it is 0, we should compile the pattern not to record any
144 subexpression bounds.
145 POSIX is nonzero if we want full backtracking (POSIX style)
146 for this pattern. 0 means backtrack only enough to get a valid match. */
487282dc
KH
147
148struct re_pattern_buffer *
b819a390 149compile_pattern (pattern, regp, translate, posix)
487282dc
KH
150 Lisp_Object pattern;
151 struct re_registers *regp;
b1428bd8 152 Lisp_Object *translate;
b819a390 153 int posix;
487282dc
KH
154{
155 struct regexp_cache *cp, **cpp;
156
157 for (cpp = &searchbuf_head; ; cpp = &cp->next)
158 {
159 cp = *cpp;
1d288aef
RS
160 if (XSTRING (cp->regexp)->size == XSTRING (pattern)->size
161 && !NILP (Fstring_equal (cp->regexp, pattern))
b819a390
RS
162 && cp->buf.translate == translate
163 && cp->posix == posix)
487282dc
KH
164 break;
165
166 /* If we're at the end of the cache, compile into the last cell. */
167 if (cp->next == 0)
168 {
b819a390 169 compile_pattern_1 (cp, pattern, translate, regp, posix);
487282dc
KH
170 break;
171 }
172 }
173
174 /* When we get here, cp (aka *cpp) contains the compiled pattern,
175 either because we found it in the cache or because we just compiled it.
176 Move it to the front of the queue to mark it as most recently used. */
177 *cpp = cp->next;
178 cp->next = searchbuf_head;
179 searchbuf_head = cp;
1113d9db 180
6639708c
RS
181 /* Advise the searching functions about the space we have allocated
182 for register data. */
183 if (regp)
184 re_set_registers (&cp->buf, regp, regp->num_regs, regp->start, regp->end);
185
487282dc 186 return &cp->buf;
ca1d1d23
JB
187}
188
189/* Error condition used for failing searches */
190Lisp_Object Qsearch_failed;
191
192Lisp_Object
193signal_failure (arg)
194 Lisp_Object arg;
195{
196 Fsignal (Qsearch_failed, Fcons (arg, Qnil));
197 return Qnil;
198}
199\f
b819a390
RS
200static Lisp_Object
201looking_at_1 (string, posix)
ca1d1d23 202 Lisp_Object string;
b819a390 203 int posix;
ca1d1d23
JB
204{
205 Lisp_Object val;
206 unsigned char *p1, *p2;
207 int s1, s2;
208 register int i;
487282dc 209 struct re_pattern_buffer *bufp;
ca1d1d23 210
7074fde6
FP
211 if (running_asynch_code)
212 save_search_regs ();
213
ca1d1d23 214 CHECK_STRING (string, 0);
487282dc
KH
215 bufp = compile_pattern (string, &search_regs,
216 (!NILP (current_buffer->case_fold_search)
b819a390
RS
217 ? DOWNCASE_TABLE : 0),
218 posix);
ca1d1d23
JB
219
220 immediate_quit = 1;
221 QUIT; /* Do a pending quit right away, to avoid paradoxical behavior */
222
223 /* Get pointers and sizes of the two strings
224 that make up the visible portion of the buffer. */
225
226 p1 = BEGV_ADDR;
227 s1 = GPT - BEGV;
228 p2 = GAP_END_ADDR;
229 s2 = ZV - GPT;
230 if (s1 < 0)
231 {
232 p2 = p1;
233 s2 = ZV - BEGV;
234 s1 = 0;
235 }
236 if (s2 < 0)
237 {
238 s1 = ZV - BEGV;
239 s2 = 0;
240 }
241
487282dc 242 i = re_match_2 (bufp, (char *) p1, s1, (char *) p2, s2,
6ec8bbd2 243 PT - BEGV, &search_regs,
ca1d1d23
JB
244 ZV - BEGV);
245 if (i == -2)
246 matcher_overflow ();
247
248 val = (0 <= i ? Qt : Qnil);
4746118a 249 for (i = 0; i < search_regs.num_regs; i++)
ca1d1d23
JB
250 if (search_regs.start[i] >= 0)
251 {
252 search_regs.start[i] += BEGV;
253 search_regs.end[i] += BEGV;
254 }
a3668d92 255 XSETBUFFER (last_thing_searched, current_buffer);
ca1d1d23
JB
256 immediate_quit = 0;
257 return val;
258}
259
b819a390 260DEFUN ("looking-at", Flooking_at, Slooking_at, 1, 1, 0,
94f94972 261 "Return t if text after point matches regular expression REGEXP.\n\
b819a390
RS
262This function modifies the match data that `match-beginning',\n\
263`match-end' and `match-data' access; save and restore the match\n\
264data if you want to preserve them.")
94f94972
RS
265 (regexp)
266 Lisp_Object regexp;
b819a390 267{
94f94972 268 return looking_at_1 (regexp, 0);
b819a390
RS
269}
270
271DEFUN ("posix-looking-at", Fposix_looking_at, Sposix_looking_at, 1, 1, 0,
94f94972 272 "Return t if text after point matches regular expression REGEXP.\n\
b819a390
RS
273Find the longest match, in accord with Posix regular expression rules.\n\
274This function modifies the match data that `match-beginning',\n\
275`match-end' and `match-data' access; save and restore the match\n\
276data if you want to preserve them.")
94f94972
RS
277 (regexp)
278 Lisp_Object regexp;
b819a390 279{
94f94972 280 return looking_at_1 (regexp, 1);
b819a390
RS
281}
282\f
283static Lisp_Object
284string_match_1 (regexp, string, start, posix)
ca1d1d23 285 Lisp_Object regexp, string, start;
b819a390 286 int posix;
ca1d1d23
JB
287{
288 int val;
289 int s;
487282dc 290 struct re_pattern_buffer *bufp;
ca1d1d23 291
7074fde6
FP
292 if (running_asynch_code)
293 save_search_regs ();
294
ca1d1d23
JB
295 CHECK_STRING (regexp, 0);
296 CHECK_STRING (string, 1);
297
298 if (NILP (start))
299 s = 0;
300 else
301 {
302 int len = XSTRING (string)->size;
303
304 CHECK_NUMBER (start, 2);
305 s = XINT (start);
306 if (s < 0 && -s <= len)
26faf9f4 307 s = len + s;
ca1d1d23
JB
308 else if (0 > s || s > len)
309 args_out_of_range (string, start);
310 }
311
487282dc
KH
312 bufp = compile_pattern (regexp, &search_regs,
313 (!NILP (current_buffer->case_fold_search)
b819a390 314 ? DOWNCASE_TABLE : 0),
24b704fa 315 posix);
ca1d1d23 316 immediate_quit = 1;
487282dc 317 val = re_search (bufp, (char *) XSTRING (string)->data,
ca1d1d23
JB
318 XSTRING (string)->size, s, XSTRING (string)->size - s,
319 &search_regs);
320 immediate_quit = 0;
daa37602 321 last_thing_searched = Qt;
ca1d1d23
JB
322 if (val == -2)
323 matcher_overflow ();
324 if (val < 0) return Qnil;
325 return make_number (val);
326}
e59a8453 327
b819a390
RS
328DEFUN ("string-match", Fstring_match, Sstring_match, 2, 3, 0,
329 "Return index of start of first match for REGEXP in STRING, or nil.\n\
330If third arg START is non-nil, start search at that index in STRING.\n\
331For index of first char beyond the match, do (match-end 0).\n\
332`match-end' and `match-beginning' also give indices of substrings\n\
333matched by parenthesis constructs in the pattern.")
334 (regexp, string, start)
335 Lisp_Object regexp, string, start;
336{
337 return string_match_1 (regexp, string, start, 0);
338}
339
340DEFUN ("posix-string-match", Fposix_string_match, Sposix_string_match, 2, 3, 0,
341 "Return index of start of first match for REGEXP in STRING, or nil.\n\
342Find the longest match, in accord with Posix regular expression rules.\n\
343If third arg START is non-nil, start search at that index in STRING.\n\
344For index of first char beyond the match, do (match-end 0).\n\
345`match-end' and `match-beginning' also give indices of substrings\n\
346matched by parenthesis constructs in the pattern.")
347 (regexp, string, start)
348 Lisp_Object regexp, string, start;
349{
350 return string_match_1 (regexp, string, start, 1);
351}
352
e59a8453
RS
353/* Match REGEXP against STRING, searching all of STRING,
354 and return the index of the match, or negative on failure.
355 This does not clobber the match data. */
356
357int
358fast_string_match (regexp, string)
359 Lisp_Object regexp, string;
360{
361 int val;
487282dc 362 struct re_pattern_buffer *bufp;
e59a8453 363
b819a390 364 bufp = compile_pattern (regexp, 0, 0, 0);
e59a8453 365 immediate_quit = 1;
487282dc 366 val = re_search (bufp, (char *) XSTRING (string)->data,
e59a8453
RS
367 XSTRING (string)->size, 0, XSTRING (string)->size,
368 0);
369 immediate_quit = 0;
370 return val;
371}
ca1d1d23 372\f
9169c321
JB
373/* max and min. */
374
375static int
376max (a, b)
377 int a, b;
378{
379 return ((a > b) ? a : b);
380}
381
382static int
383min (a, b)
384 int a, b;
385{
386 return ((a < b) ? a : b);
387}
388
389\f
390/* The newline cache: remembering which sections of text have no newlines. */
391
392/* If the user has requested newline caching, make sure it's on.
393 Otherwise, make sure it's off.
394 This is our cheezy way of associating an action with the change of
395 state of a buffer-local variable. */
396static void
397newline_cache_on_off (buf)
398 struct buffer *buf;
399{
400 if (NILP (buf->cache_long_line_scans))
401 {
402 /* It should be off. */
403 if (buf->newline_cache)
404 {
405 free_region_cache (buf->newline_cache);
406 buf->newline_cache = 0;
407 }
408 }
409 else
410 {
411 /* It should be on. */
412 if (buf->newline_cache == 0)
413 buf->newline_cache = new_region_cache ();
414 }
415}
416
417\f
418/* Search for COUNT instances of the character TARGET between START and END.
419
420 If COUNT is positive, search forwards; END must be >= START.
421 If COUNT is negative, search backwards for the -COUNTth instance;
422 END must be <= START.
423 If COUNT is zero, do anything you please; run rogue, for all I care.
424
425 If END is zero, use BEGV or ZV instead, as appropriate for the
426 direction indicated by COUNT.
ffd56f97
JB
427
428 If we find COUNT instances, set *SHORTAGE to zero, and return the
5bfe95c9
RS
429 position after the COUNTth match. Note that for reverse motion
430 this is not the same as the usual convention for Emacs motion commands.
ffd56f97 431
9169c321
JB
432 If we don't find COUNT instances before reaching END, set *SHORTAGE
433 to the number of TARGETs left unfound, and return END.
ffd56f97 434
087a5f81
RS
435 If ALLOW_QUIT is non-zero, set immediate_quit. That's good to do
436 except when inside redisplay. */
437
9169c321
JB
438scan_buffer (target, start, end, count, shortage, allow_quit)
439 register int target;
440 int start, end;
441 int count;
442 int *shortage;
087a5f81 443 int allow_quit;
ca1d1d23 444{
9169c321
JB
445 struct region_cache *newline_cache;
446 int direction;
ffd56f97 447
9169c321
JB
448 if (count > 0)
449 {
450 direction = 1;
451 if (! end) end = ZV;
452 }
453 else
454 {
455 direction = -1;
456 if (! end) end = BEGV;
457 }
ffd56f97 458
9169c321
JB
459 newline_cache_on_off (current_buffer);
460 newline_cache = current_buffer->newline_cache;
ca1d1d23
JB
461
462 if (shortage != 0)
463 *shortage = 0;
464
087a5f81 465 immediate_quit = allow_quit;
ca1d1d23 466
ffd56f97 467 if (count > 0)
9169c321 468 while (start != end)
ca1d1d23 469 {
9169c321
JB
470 /* Our innermost scanning loop is very simple; it doesn't know
471 about gaps, buffer ends, or the newline cache. ceiling is
472 the position of the last character before the next such
473 obstacle --- the last character the dumb search loop should
474 examine. */
475 register int ceiling = end - 1;
476
477 /* If we're looking for a newline, consult the newline cache
478 to see where we can avoid some scanning. */
479 if (target == '\n' && newline_cache)
480 {
481 int next_change;
482 immediate_quit = 0;
483 while (region_cache_forward
484 (current_buffer, newline_cache, start, &next_change))
485 start = next_change;
cbe0db0d 486 immediate_quit = allow_quit;
9169c321
JB
487
488 /* start should never be after end. */
489 if (start >= end)
490 start = end - 1;
491
492 /* Now the text after start is an unknown region, and
493 next_change is the position of the next known region. */
494 ceiling = min (next_change - 1, ceiling);
495 }
496
497 /* The dumb loop can only scan text stored in contiguous
498 bytes. BUFFER_CEILING_OF returns the last character
499 position that is contiguous, so the ceiling is the
500 position after that. */
501 ceiling = min (BUFFER_CEILING_OF (start), ceiling);
502
503 {
504 /* The termination address of the dumb loop. */
505 register unsigned char *ceiling_addr = &FETCH_CHAR (ceiling) + 1;
506 register unsigned char *cursor = &FETCH_CHAR (start);
507 unsigned char *base = cursor;
508
509 while (cursor < ceiling_addr)
510 {
511 unsigned char *scan_start = cursor;
512
513 /* The dumb loop. */
514 while (*cursor != target && ++cursor < ceiling_addr)
515 ;
516
517 /* If we're looking for newlines, cache the fact that
518 the region from start to cursor is free of them. */
519 if (target == '\n' && newline_cache)
520 know_region_cache (current_buffer, newline_cache,
521 start + scan_start - base,
522 start + cursor - base);
523
524 /* Did we find the target character? */
525 if (cursor < ceiling_addr)
526 {
527 if (--count == 0)
528 {
529 immediate_quit = 0;
530 return (start + cursor - base + 1);
531 }
532 cursor++;
533 }
534 }
535
536 start += cursor - base;
537 }
ca1d1d23
JB
538 }
539 else
9169c321
JB
540 while (start > end)
541 {
542 /* The last character to check before the next obstacle. */
543 register int ceiling = end;
544
545 /* Consult the newline cache, if appropriate. */
546 if (target == '\n' && newline_cache)
547 {
548 int next_change;
549 immediate_quit = 0;
550 while (region_cache_backward
551 (current_buffer, newline_cache, start, &next_change))
552 start = next_change;
cbe0db0d 553 immediate_quit = allow_quit;
9169c321
JB
554
555 /* Start should never be at or before end. */
556 if (start <= end)
557 start = end + 1;
558
559 /* Now the text before start is an unknown region, and
560 next_change is the position of the next known region. */
561 ceiling = max (next_change, ceiling);
562 }
563
564 /* Stop scanning before the gap. */
565 ceiling = max (BUFFER_FLOOR_OF (start - 1), ceiling);
566
567 {
568 /* The termination address of the dumb loop. */
569 register unsigned char *ceiling_addr = &FETCH_CHAR (ceiling);
570 register unsigned char *cursor = &FETCH_CHAR (start - 1);
571 unsigned char *base = cursor;
572
573 while (cursor >= ceiling_addr)
574 {
575 unsigned char *scan_start = cursor;
576
577 while (*cursor != target && --cursor >= ceiling_addr)
578 ;
579
580 /* If we're looking for newlines, cache the fact that
581 the region from after the cursor to start is free of them. */
582 if (target == '\n' && newline_cache)
583 know_region_cache (current_buffer, newline_cache,
584 start + cursor - base,
585 start + scan_start - base);
586
587 /* Did we find the target character? */
588 if (cursor >= ceiling_addr)
589 {
590 if (++count >= 0)
591 {
592 immediate_quit = 0;
593 return (start + cursor - base);
594 }
595 cursor--;
596 }
597 }
598
599 start += cursor - base;
600 }
601 }
602
ca1d1d23
JB
603 immediate_quit = 0;
604 if (shortage != 0)
ffd56f97 605 *shortage = count * direction;
9169c321 606 return start;
ca1d1d23
JB
607}
608
63fa018d
RS
609int
610find_next_newline_no_quit (from, cnt)
611 register int from, cnt;
612{
9169c321 613 return scan_buffer ('\n', from, 0, cnt, (int *) 0, 0);
63fa018d
RS
614}
615
ca1d1d23
JB
616int
617find_next_newline (from, cnt)
618 register int from, cnt;
619{
9169c321
JB
620 return scan_buffer ('\n', from, 0, cnt, (int *) 0, 1);
621}
622
623
624/* Like find_next_newline, but returns position before the newline,
625 not after, and only search up to TO. This isn't just
626 find_next_newline (...)-1, because you might hit TO. */
627int
628find_before_next_newline (from, to, cnt)
cbe0db0d 629 int from, to, cnt;
9169c321
JB
630{
631 int shortage;
632 int pos = scan_buffer ('\n', from, to, cnt, &shortage, 1);
633
634 if (shortage == 0)
635 pos--;
636
637 return pos;
ca1d1d23
JB
638}
639\f
c1dc99a1
JB
640Lisp_Object skip_chars ();
641
ca1d1d23 642DEFUN ("skip-chars-forward", Fskip_chars_forward, Sskip_chars_forward, 1, 2, 0,
3acb9a69
RS
643 "Move point forward, stopping before a char not in STRING, or at pos LIM.\n\
644STRING is like the inside of a `[...]' in a regular expression\n\
ca1d1d23
JB
645except that `]' is never special and `\\' quotes `^', `-' or `\\'.\n\
646Thus, with arg \"a-zA-Z\", this skips letters stopping before first nonletter.\n\
c1dc99a1
JB
647With arg \"^a-zA-Z\", skips nonletters stopping before first letter.\n\
648Returns the distance traveled, either zero or positive.")
ca1d1d23
JB
649 (string, lim)
650 Lisp_Object string, lim;
651{
17431c60 652 return skip_chars (1, 0, string, lim);
ca1d1d23
JB
653}
654
655DEFUN ("skip-chars-backward", Fskip_chars_backward, Sskip_chars_backward, 1, 2, 0,
3acb9a69 656 "Move point backward, stopping after a char not in STRING, or at pos LIM.\n\
c1dc99a1
JB
657See `skip-chars-forward' for details.\n\
658Returns the distance traveled, either zero or negative.")
ca1d1d23
JB
659 (string, lim)
660 Lisp_Object string, lim;
661{
17431c60
RS
662 return skip_chars (0, 0, string, lim);
663}
664
665DEFUN ("skip-syntax-forward", Fskip_syntax_forward, Sskip_syntax_forward, 1, 2, 0,
666 "Move point forward across chars in specified syntax classes.\n\
667SYNTAX is a string of syntax code characters.\n\
668Stop before a char whose syntax is not in SYNTAX, or at position LIM.\n\
669If SYNTAX starts with ^, skip characters whose syntax is NOT in SYNTAX.\n\
670This function returns the distance traveled, either zero or positive.")
671 (syntax, lim)
672 Lisp_Object syntax, lim;
673{
674 return skip_chars (1, 1, syntax, lim);
675}
676
677DEFUN ("skip-syntax-backward", Fskip_syntax_backward, Sskip_syntax_backward, 1, 2, 0,
678 "Move point backward across chars in specified syntax classes.\n\
679SYNTAX is a string of syntax code characters.\n\
680Stop on reaching a char whose syntax is not in SYNTAX, or at position LIM.\n\
681If SYNTAX starts with ^, skip characters whose syntax is NOT in SYNTAX.\n\
682This function returns the distance traveled, either zero or negative.")
683 (syntax, lim)
684 Lisp_Object syntax, lim;
685{
686 return skip_chars (0, 1, syntax, lim);
ca1d1d23
JB
687}
688
c1dc99a1 689Lisp_Object
17431c60
RS
690skip_chars (forwardp, syntaxp, string, lim)
691 int forwardp, syntaxp;
ca1d1d23
JB
692 Lisp_Object string, lim;
693{
694 register unsigned char *p, *pend;
695 register unsigned char c;
696 unsigned char fastmap[0400];
697 int negate = 0;
698 register int i;
699
700 CHECK_STRING (string, 0);
701
702 if (NILP (lim))
a3668d92 703 XSETINT (lim, forwardp ? ZV : BEGV);
ca1d1d23
JB
704 else
705 CHECK_NUMBER_COERCE_MARKER (lim, 1);
706
ca1d1d23 707 /* In any case, don't allow scan outside bounds of buffer. */
c5241910
RS
708 /* jla turned this off, for no known reason.
709 bfox turned the ZV part on, and rms turned the
710 BEGV part back on. */
711 if (XINT (lim) > ZV)
c235cce7 712 XSETFASTINT (lim, ZV);
c5241910 713 if (XINT (lim) < BEGV)
c235cce7 714 XSETFASTINT (lim, BEGV);
ca1d1d23
JB
715
716 p = XSTRING (string)->data;
717 pend = p + XSTRING (string)->size;
718 bzero (fastmap, sizeof fastmap);
719
720 if (p != pend && *p == '^')
721 {
722 negate = 1; p++;
723 }
724
17431c60
RS
725 /* Find the characters specified and set their elements of fastmap.
726 If syntaxp, each character counts as itself.
727 Otherwise, handle backslashes and ranges specially */
ca1d1d23
JB
728
729 while (p != pend)
730 {
731 c = *p++;
17431c60
RS
732 if (syntaxp)
733 fastmap[c] = 1;
734 else
ca1d1d23 735 {
17431c60 736 if (c == '\\')
ca1d1d23 737 {
17431c60
RS
738 if (p == pend) break;
739 c = *p++;
740 }
741 if (p != pend && *p == '-')
742 {
743 p++;
744 if (p == pend) break;
745 while (c <= *p)
746 {
747 fastmap[c] = 1;
748 c++;
749 }
750 p++;
ca1d1d23 751 }
17431c60
RS
752 else
753 fastmap[c] = 1;
ca1d1d23 754 }
ca1d1d23
JB
755 }
756
9239c6c1
RS
757 if (syntaxp && fastmap['-'] != 0)
758 fastmap[' '] = 1;
759
ca1d1d23
JB
760 /* If ^ was the first character, complement the fastmap. */
761
762 if (negate)
763 for (i = 0; i < sizeof fastmap; i++)
764 fastmap[i] ^= 1;
765
c1dc99a1 766 {
6ec8bbd2 767 int start_point = PT;
c1dc99a1
JB
768
769 immediate_quit = 1;
17431c60 770 if (syntaxp)
c1dc99a1 771 {
17431c60
RS
772
773 if (forwardp)
774 {
6ec8bbd2
KH
775 while (PT < XINT (lim)
776 && fastmap[(unsigned char) syntax_code_spec[(int) SYNTAX (FETCH_CHAR (PT))]])
777 SET_PT (PT + 1);
17431c60
RS
778 }
779 else
780 {
6ec8bbd2
KH
781 while (PT > XINT (lim)
782 && fastmap[(unsigned char) syntax_code_spec[(int) SYNTAX (FETCH_CHAR (PT - 1))]])
783 SET_PT (PT - 1);
17431c60 784 }
c1dc99a1
JB
785 }
786 else
787 {
17431c60
RS
788 if (forwardp)
789 {
6ec8bbd2
KH
790 while (PT < XINT (lim) && fastmap[FETCH_CHAR (PT)])
791 SET_PT (PT + 1);
17431c60
RS
792 }
793 else
794 {
6ec8bbd2
KH
795 while (PT > XINT (lim) && fastmap[FETCH_CHAR (PT - 1)])
796 SET_PT (PT - 1);
17431c60 797 }
c1dc99a1
JB
798 }
799 immediate_quit = 0;
800
6ec8bbd2 801 return make_number (PT - start_point);
c1dc99a1 802 }
ca1d1d23
JB
803}
804\f
805/* Subroutines of Lisp buffer search functions. */
806
807static Lisp_Object
b819a390 808search_command (string, bound, noerror, count, direction, RE, posix)
ca1d1d23
JB
809 Lisp_Object string, bound, noerror, count;
810 int direction;
811 int RE;
b819a390 812 int posix;
ca1d1d23
JB
813{
814 register int np;
815 int lim;
816 int n = direction;
817
818 if (!NILP (count))
819 {
820 CHECK_NUMBER (count, 3);
821 n *= XINT (count);
822 }
823
824 CHECK_STRING (string, 0);
825 if (NILP (bound))
826 lim = n > 0 ? ZV : BEGV;
827 else
828 {
829 CHECK_NUMBER_COERCE_MARKER (bound, 1);
830 lim = XINT (bound);
6ec8bbd2 831 if (n > 0 ? lim < PT : lim > PT)
ca1d1d23
JB
832 error ("Invalid search bound (wrong side of point)");
833 if (lim > ZV)
834 lim = ZV;
835 if (lim < BEGV)
836 lim = BEGV;
837 }
838
6ec8bbd2 839 np = search_buffer (string, PT, lim, n, RE,
ca1d1d23 840 (!NILP (current_buffer->case_fold_search)
b1428bd8
RS
841 ? XCHAR_TABLE (current_buffer->case_canon_table)->contents
842 : 0),
ca1d1d23 843 (!NILP (current_buffer->case_fold_search)
b1428bd8
RS
844 ? XCHAR_TABLE (current_buffer->case_eqv_table)->contents
845 : 0),
b819a390 846 posix);
ca1d1d23
JB
847 if (np <= 0)
848 {
849 if (NILP (noerror))
850 return signal_failure (string);
851 if (!EQ (noerror, Qt))
852 {
853 if (lim < BEGV || lim > ZV)
854 abort ();
a5f217b8
RS
855 SET_PT (lim);
856 return Qnil;
857#if 0 /* This would be clean, but maybe programs depend on
858 a value of nil here. */
481399bf 859 np = lim;
a5f217b8 860#endif
ca1d1d23 861 }
481399bf
RS
862 else
863 return Qnil;
ca1d1d23
JB
864 }
865
866 if (np < BEGV || np > ZV)
867 abort ();
868
869 SET_PT (np);
870
871 return make_number (np);
872}
873\f
b6d6a51c
KH
874static int
875trivial_regexp_p (regexp)
876 Lisp_Object regexp;
877{
878 int len = XSTRING (regexp)->size;
879 unsigned char *s = XSTRING (regexp)->data;
880 unsigned char c;
881 while (--len >= 0)
882 {
883 switch (*s++)
884 {
885 case '.': case '*': case '+': case '?': case '[': case '^': case '$':
886 return 0;
887 case '\\':
888 if (--len < 0)
889 return 0;
890 switch (*s++)
891 {
892 case '|': case '(': case ')': case '`': case '\'': case 'b':
893 case 'B': case '<': case '>': case 'w': case 'W': case 's':
866f60fd
KH
894 case 'S': case '=':
895 case '1': case '2': case '3': case '4': case '5':
b6d6a51c
KH
896 case '6': case '7': case '8': case '9':
897 return 0;
898 }
899 }
900 }
901 return 1;
902}
903
ca325161 904/* Search for the n'th occurrence of STRING in the current buffer,
ca1d1d23 905 starting at position POS and stopping at position LIM,
b819a390 906 treating STRING as a literal string if RE is false or as
ca1d1d23
JB
907 a regular expression if RE is true.
908
909 If N is positive, searching is forward and LIM must be greater than POS.
910 If N is negative, searching is backward and LIM must be less than POS.
911
912 Returns -x if only N-x occurrences found (x > 0),
913 or else the position at the beginning of the Nth occurrence
b819a390
RS
914 (if searching backward) or the end (if searching forward).
915
916 POSIX is nonzero if we want full backtracking (POSIX style)
917 for this pattern. 0 means backtrack only enough to get a valid match. */
ca1d1d23 918
b819a390
RS
919static int
920search_buffer (string, pos, lim, n, RE, trt, inverse_trt, posix)
ca1d1d23
JB
921 Lisp_Object string;
922 int pos;
923 int lim;
924 int n;
925 int RE;
b1428bd8
RS
926 Lisp_Object *trt;
927 Lisp_Object *inverse_trt;
b819a390 928 int posix;
ca1d1d23
JB
929{
930 int len = XSTRING (string)->size;
931 unsigned char *base_pat = XSTRING (string)->data;
932 register int *BM_tab;
933 int *BM_tab_base;
934 register int direction = ((n > 0) ? 1 : -1);
935 register int dirlen;
936 int infinity, limit, k, stride_for_teases;
937 register unsigned char *pat, *cursor, *p_limit;
938 register int i, j;
939 unsigned char *p1, *p2;
940 int s1, s2;
941
7074fde6
FP
942 if (running_asynch_code)
943 save_search_regs ();
944
ca1d1d23 945 /* Null string is found at starting position. */
3f57a499 946 if (len == 0)
ca325161
RS
947 {
948 set_search_regs (pos, 0);
949 return pos;
950 }
3f57a499
RS
951
952 /* Searching 0 times means don't move. */
953 if (n == 0)
ca1d1d23
JB
954 return pos;
955
b6d6a51c 956 if (RE && !trivial_regexp_p (string))
ca1d1d23 957 {
487282dc
KH
958 struct re_pattern_buffer *bufp;
959
b1428bd8 960 bufp = compile_pattern (string, &search_regs, trt, posix);
ca1d1d23 961
ca1d1d23
JB
962 immediate_quit = 1; /* Quit immediately if user types ^G,
963 because letting this function finish
964 can take too long. */
965 QUIT; /* Do a pending quit right away,
966 to avoid paradoxical behavior */
967 /* Get pointers and sizes of the two strings
968 that make up the visible portion of the buffer. */
969
970 p1 = BEGV_ADDR;
971 s1 = GPT - BEGV;
972 p2 = GAP_END_ADDR;
973 s2 = ZV - GPT;
974 if (s1 < 0)
975 {
976 p2 = p1;
977 s2 = ZV - BEGV;
978 s1 = 0;
979 }
980 if (s2 < 0)
981 {
982 s1 = ZV - BEGV;
983 s2 = 0;
984 }
985 while (n < 0)
986 {
42db823b 987 int val;
487282dc 988 val = re_search_2 (bufp, (char *) p1, s1, (char *) p2, s2,
42db823b
RS
989 pos - BEGV, lim - pos, &search_regs,
990 /* Don't allow match past current point */
991 pos - BEGV);
ca1d1d23 992 if (val == -2)
b6d6a51c
KH
993 {
994 matcher_overflow ();
995 }
ca1d1d23
JB
996 if (val >= 0)
997 {
998 j = BEGV;
4746118a 999 for (i = 0; i < search_regs.num_regs; i++)
ca1d1d23
JB
1000 if (search_regs.start[i] >= 0)
1001 {
1002 search_regs.start[i] += j;
1003 search_regs.end[i] += j;
1004 }
a3668d92 1005 XSETBUFFER (last_thing_searched, current_buffer);
ca1d1d23
JB
1006 /* Set pos to the new position. */
1007 pos = search_regs.start[0];
1008 }
1009 else
1010 {
1011 immediate_quit = 0;
1012 return (n);
1013 }
1014 n++;
1015 }
1016 while (n > 0)
1017 {
42db823b 1018 int val;
487282dc 1019 val = re_search_2 (bufp, (char *) p1, s1, (char *) p2, s2,
42db823b
RS
1020 pos - BEGV, lim - pos, &search_regs,
1021 lim - BEGV);
ca1d1d23 1022 if (val == -2)
b6d6a51c
KH
1023 {
1024 matcher_overflow ();
1025 }
ca1d1d23
JB
1026 if (val >= 0)
1027 {
1028 j = BEGV;
4746118a 1029 for (i = 0; i < search_regs.num_regs; i++)
ca1d1d23
JB
1030 if (search_regs.start[i] >= 0)
1031 {
1032 search_regs.start[i] += j;
1033 search_regs.end[i] += j;
1034 }
a3668d92 1035 XSETBUFFER (last_thing_searched, current_buffer);
ca1d1d23
JB
1036 pos = search_regs.end[0];
1037 }
1038 else
1039 {
1040 immediate_quit = 0;
1041 return (0 - n);
1042 }
1043 n--;
1044 }
1045 immediate_quit = 0;
1046 return (pos);
1047 }
1048 else /* non-RE case */
1049 {
1050#ifdef C_ALLOCA
1051 int BM_tab_space[0400];
1052 BM_tab = &BM_tab_space[0];
1053#else
1054 BM_tab = (int *) alloca (0400 * sizeof (int));
1055#endif
b6d6a51c
KH
1056 {
1057 unsigned char *patbuf = (unsigned char *) alloca (len);
1058 pat = patbuf;
1059 while (--len >= 0)
1060 {
1061 /* If we got here and the RE flag is set, it's because we're
1062 dealing with a regexp known to be trivial, so the backslash
1063 just quotes the next character. */
1064 if (RE && *base_pat == '\\')
1065 {
1066 len--;
1067 base_pat++;
1068 }
1069 *pat++ = (trt ? trt[*base_pat++] : *base_pat++);
1070 }
1071 len = pat - patbuf;
1072 pat = base_pat = patbuf;
1073 }
ca1d1d23
JB
1074 /* The general approach is that we are going to maintain that we know */
1075 /* the first (closest to the present position, in whatever direction */
1076 /* we're searching) character that could possibly be the last */
1077 /* (furthest from present position) character of a valid match. We */
1078 /* advance the state of our knowledge by looking at that character */
1079 /* and seeing whether it indeed matches the last character of the */
1080 /* pattern. If it does, we take a closer look. If it does not, we */
1081 /* move our pointer (to putative last characters) as far as is */
1082 /* logically possible. This amount of movement, which I call a */
1083 /* stride, will be the length of the pattern if the actual character */
1084 /* appears nowhere in the pattern, otherwise it will be the distance */
1085 /* from the last occurrence of that character to the end of the */
1086 /* pattern. */
1087 /* As a coding trick, an enormous stride is coded into the table for */
1088 /* characters that match the last character. This allows use of only */
1089 /* a single test, a test for having gone past the end of the */
1090 /* permissible match region, to test for both possible matches (when */
1091 /* the stride goes past the end immediately) and failure to */
1092 /* match (where you get nudged past the end one stride at a time). */
1093
1094 /* Here we make a "mickey mouse" BM table. The stride of the search */
1095 /* is determined only by the last character of the putative match. */
1096 /* If that character does not match, we will stride the proper */
1097 /* distance to propose a match that superimposes it on the last */
1098 /* instance of a character that matches it (per trt), or misses */
1099 /* it entirely if there is none. */
1100
1101 dirlen = len * direction;
1102 infinity = dirlen - (lim + pos + len + len) * direction;
1103 if (direction < 0)
1104 pat = (base_pat += len - 1);
1105 BM_tab_base = BM_tab;
1106 BM_tab += 0400;
1107 j = dirlen; /* to get it in a register */
1108 /* A character that does not appear in the pattern induces a */
1109 /* stride equal to the pattern length. */
1110 while (BM_tab_base != BM_tab)
1111 {
1112 *--BM_tab = j;
1113 *--BM_tab = j;
1114 *--BM_tab = j;
1115 *--BM_tab = j;
1116 }
1117 i = 0;
1118 while (i != infinity)
1119 {
1120 j = pat[i]; i += direction;
1121 if (i == dirlen) i = infinity;
8d505039 1122 if (trt != 0)
ca1d1d23
JB
1123 {
1124 k = (j = trt[j]);
1125 if (i == infinity)
1126 stride_for_teases = BM_tab[j];
1127 BM_tab[j] = dirlen - i;
1128 /* A translation table is accompanied by its inverse -- see */
1129 /* comment following downcase_table for details */
b1428bd8 1130 while ((j = (unsigned char) inverse_trt[j]) != k)
ca1d1d23
JB
1131 BM_tab[j] = dirlen - i;
1132 }
1133 else
1134 {
1135 if (i == infinity)
1136 stride_for_teases = BM_tab[j];
1137 BM_tab[j] = dirlen - i;
1138 }
1139 /* stride_for_teases tells how much to stride if we get a */
1140 /* match on the far character but are subsequently */
1141 /* disappointed, by recording what the stride would have been */
1142 /* for that character if the last character had been */
1143 /* different. */
1144 }
1145 infinity = dirlen - infinity;
1146 pos += dirlen - ((direction > 0) ? direction : 0);
1147 /* loop invariant - pos points at where last char (first char if reverse)
1148 of pattern would align in a possible match. */
1149 while (n != 0)
1150 {
b2c71fb4
KH
1151 /* It's been reported that some (broken) compiler thinks that
1152 Boolean expressions in an arithmetic context are unsigned.
1153 Using an explicit ?1:0 prevents this. */
1154 if ((lim - pos - ((direction > 0) ? 1 : 0)) * direction < 0)
ca1d1d23
JB
1155 return (n * (0 - direction));
1156 /* First we do the part we can by pointers (maybe nothing) */
1157 QUIT;
1158 pat = base_pat;
1159 limit = pos - dirlen + direction;
1160 limit = ((direction > 0)
1161 ? BUFFER_CEILING_OF (limit)
1162 : BUFFER_FLOOR_OF (limit));
1163 /* LIMIT is now the last (not beyond-last!) value
1164 POS can take on without hitting edge of buffer or the gap. */
1165 limit = ((direction > 0)
1166 ? min (lim - 1, min (limit, pos + 20000))
1167 : max (lim, max (limit, pos - 20000)));
1168 if ((limit - pos) * direction > 20)
1169 {
1170 p_limit = &FETCH_CHAR (limit);
1171 p2 = (cursor = &FETCH_CHAR (pos));
1172 /* In this loop, pos + cursor - p2 is the surrogate for pos */
1173 while (1) /* use one cursor setting as long as i can */
1174 {
1175 if (direction > 0) /* worth duplicating */
1176 {
1177 /* Use signed comparison if appropriate
1178 to make cursor+infinity sure to be > p_limit.
1179 Assuming that the buffer lies in a range of addresses
1180 that are all "positive" (as ints) or all "negative",
1181 either kind of comparison will work as long
1182 as we don't step by infinity. So pick the kind
1183 that works when we do step by infinity. */
8d505039 1184 if ((EMACS_INT) (p_limit + infinity) > (EMACS_INT) p_limit)
9fa17f93 1185 while ((EMACS_INT) cursor <= (EMACS_INT) p_limit)
ca1d1d23
JB
1186 cursor += BM_tab[*cursor];
1187 else
45b248b4 1188 while ((EMACS_UINT) cursor <= (EMACS_UINT) p_limit)
ca1d1d23
JB
1189 cursor += BM_tab[*cursor];
1190 }
1191 else
1192 {
8d505039
RS
1193 if ((EMACS_INT) (p_limit + infinity) < (EMACS_INT) p_limit)
1194 while ((EMACS_INT) cursor >= (EMACS_INT) p_limit)
ca1d1d23
JB
1195 cursor += BM_tab[*cursor];
1196 else
45b248b4 1197 while ((EMACS_UINT) cursor >= (EMACS_UINT) p_limit)
ca1d1d23
JB
1198 cursor += BM_tab[*cursor];
1199 }
1200/* If you are here, cursor is beyond the end of the searched region. */
1201 /* This can happen if you match on the far character of the pattern, */
1202 /* because the "stride" of that character is infinity, a number able */
1203 /* to throw you well beyond the end of the search. It can also */
1204 /* happen if you fail to match within the permitted region and would */
1205 /* otherwise try a character beyond that region */
1206 if ((cursor - p_limit) * direction <= len)
1207 break; /* a small overrun is genuine */
1208 cursor -= infinity; /* large overrun = hit */
1209 i = dirlen - direction;
8d505039 1210 if (trt != 0)
ca1d1d23
JB
1211 {
1212 while ((i -= direction) + direction != 0)
1213 if (pat[i] != trt[*(cursor -= direction)])
1214 break;
1215 }
1216 else
1217 {
1218 while ((i -= direction) + direction != 0)
1219 if (pat[i] != *(cursor -= direction))
1220 break;
1221 }
1222 cursor += dirlen - i - direction; /* fix cursor */
1223 if (i + direction == 0)
1224 {
1225 cursor -= direction;
1113d9db 1226
ca325161
RS
1227 set_search_regs (pos + cursor - p2 + ((direction > 0)
1228 ? 1 - len : 0),
1229 len);
1230
ca1d1d23
JB
1231 if ((n -= direction) != 0)
1232 cursor += dirlen; /* to resume search */
1233 else
1234 return ((direction > 0)
1235 ? search_regs.end[0] : search_regs.start[0]);
1236 }
1237 else
1238 cursor += stride_for_teases; /* <sigh> we lose - */
1239 }
1240 pos += cursor - p2;
1241 }
1242 else
1243 /* Now we'll pick up a clump that has to be done the hard */
1244 /* way because it covers a discontinuity */
1245 {
1246 limit = ((direction > 0)
1247 ? BUFFER_CEILING_OF (pos - dirlen + 1)
1248 : BUFFER_FLOOR_OF (pos - dirlen - 1));
1249 limit = ((direction > 0)
1250 ? min (limit + len, lim - 1)
1251 : max (limit - len, lim));
1252 /* LIMIT is now the last value POS can have
1253 and still be valid for a possible match. */
1254 while (1)
1255 {
1256 /* This loop can be coded for space rather than */
1257 /* speed because it will usually run only once. */
1258 /* (the reach is at most len + 21, and typically */
1259 /* does not exceed len) */
1260 while ((limit - pos) * direction >= 0)
1261 pos += BM_tab[FETCH_CHAR(pos)];
1262 /* now run the same tests to distinguish going off the */
eb8c3be9 1263 /* end, a match or a phony match. */
ca1d1d23
JB
1264 if ((pos - limit) * direction <= len)
1265 break; /* ran off the end */
1266 /* Found what might be a match.
1267 Set POS back to last (first if reverse) char pos. */
1268 pos -= infinity;
1269 i = dirlen - direction;
1270 while ((i -= direction) + direction != 0)
1271 {
1272 pos -= direction;
8d505039 1273 if (pat[i] != (trt != 0
ca1d1d23
JB
1274 ? trt[FETCH_CHAR(pos)]
1275 : FETCH_CHAR (pos)))
1276 break;
1277 }
1278 /* Above loop has moved POS part or all the way
1279 back to the first char pos (last char pos if reverse).
1280 Set it once again at the last (first if reverse) char. */
1281 pos += dirlen - i- direction;
1282 if (i + direction == 0)
1283 {
1284 pos -= direction;
1113d9db 1285
ca325161
RS
1286 set_search_regs (pos + ((direction > 0) ? 1 - len : 0),
1287 len);
1288
ca1d1d23
JB
1289 if ((n -= direction) != 0)
1290 pos += dirlen; /* to resume search */
1291 else
1292 return ((direction > 0)
1293 ? search_regs.end[0] : search_regs.start[0]);
1294 }
1295 else
1296 pos += stride_for_teases;
1297 }
1298 }
1299 /* We have done one clump. Can we continue? */
1300 if ((lim - pos) * direction < 0)
1301 return ((0 - n) * direction);
1302 }
1303 return pos;
1304 }
1305}
ca325161
RS
1306
1307/* Record beginning BEG and end BEG + LEN
1308 for a match just found in the current buffer. */
1309
1310static void
1311set_search_regs (beg, len)
1312 int beg, len;
1313{
1314 /* Make sure we have registers in which to store
1315 the match position. */
1316 if (search_regs.num_regs == 0)
1317 {
2d4a771a
RS
1318 search_regs.start = (regoff_t *) xmalloc (2 * sizeof (regoff_t));
1319 search_regs.end = (regoff_t *) xmalloc (2 * sizeof (regoff_t));
487282dc 1320 search_regs.num_regs = 2;
ca325161
RS
1321 }
1322
1323 search_regs.start[0] = beg;
1324 search_regs.end[0] = beg + len;
a3668d92 1325 XSETBUFFER (last_thing_searched, current_buffer);
ca325161 1326}
ca1d1d23
JB
1327\f
1328/* Given a string of words separated by word delimiters,
1329 compute a regexp that matches those exact words
1330 separated by arbitrary punctuation. */
1331
1332static Lisp_Object
1333wordify (string)
1334 Lisp_Object string;
1335{
1336 register unsigned char *p, *o;
1337 register int i, len, punct_count = 0, word_count = 0;
1338 Lisp_Object val;
1339
1340 CHECK_STRING (string, 0);
1341 p = XSTRING (string)->data;
1342 len = XSTRING (string)->size;
1343
1344 for (i = 0; i < len; i++)
1345 if (SYNTAX (p[i]) != Sword)
1346 {
1347 punct_count++;
1348 if (i > 0 && SYNTAX (p[i-1]) == Sword) word_count++;
1349 }
1350 if (SYNTAX (p[len-1]) == Sword) word_count++;
1351 if (!word_count) return build_string ("");
1352
1353 val = make_string (p, len - punct_count + 5 * (word_count - 1) + 4);
1354
1355 o = XSTRING (val)->data;
1356 *o++ = '\\';
1357 *o++ = 'b';
1358
1359 for (i = 0; i < len; i++)
1360 if (SYNTAX (p[i]) == Sword)
1361 *o++ = p[i];
1362 else if (i > 0 && SYNTAX (p[i-1]) == Sword && --word_count)
1363 {
1364 *o++ = '\\';
1365 *o++ = 'W';
1366 *o++ = '\\';
1367 *o++ = 'W';
1368 *o++ = '*';
1369 }
1370
1371 *o++ = '\\';
1372 *o++ = 'b';
1373
1374 return val;
1375}
1376\f
1377DEFUN ("search-backward", Fsearch_backward, Ssearch_backward, 1, 4,
1378 "sSearch backward: ",
1379 "Search backward from point for STRING.\n\
1380Set point to the beginning of the occurrence found, and return point.\n\
1381An optional second argument bounds the search; it is a buffer position.\n\
1382The match found must not extend before that position.\n\
1383Optional third argument, if t, means if fail just return nil (no error).\n\
1384 If not nil and not t, position at limit of search and return nil.\n\
1385Optional fourth argument is repeat count--search for successive occurrences.\n\
1386See also the functions `match-beginning', `match-end' and `replace-match'.")
1387 (string, bound, noerror, count)
1388 Lisp_Object string, bound, noerror, count;
1389{
b819a390 1390 return search_command (string, bound, noerror, count, -1, 0, 0);
ca1d1d23
JB
1391}
1392
1393DEFUN ("search-forward", Fsearch_forward, Ssearch_forward, 1, 4, "sSearch: ",
1394 "Search forward from point for STRING.\n\
1395Set point to the end of the occurrence found, and return point.\n\
1396An optional second argument bounds the search; it is a buffer position.\n\
1397The match found must not extend after that position. nil is equivalent\n\
1398 to (point-max).\n\
1399Optional third argument, if t, means if fail just return nil (no error).\n\
1400 If not nil and not t, move to limit of search and return nil.\n\
1401Optional fourth argument is repeat count--search for successive occurrences.\n\
1402See also the functions `match-beginning', `match-end' and `replace-match'.")
1403 (string, bound, noerror, count)
1404 Lisp_Object string, bound, noerror, count;
1405{
b819a390 1406 return search_command (string, bound, noerror, count, 1, 0, 0);
ca1d1d23
JB
1407}
1408
1409DEFUN ("word-search-backward", Fword_search_backward, Sword_search_backward, 1, 4,
1410 "sWord search backward: ",
1411 "Search backward from point for STRING, ignoring differences in punctuation.\n\
1412Set point to the beginning of the occurrence found, and return point.\n\
1413An optional second argument bounds the search; it is a buffer position.\n\
1414The match found must not extend before that position.\n\
1415Optional third argument, if t, means if fail just return nil (no error).\n\
1416 If not nil and not t, move to limit of search and return nil.\n\
1417Optional fourth argument is repeat count--search for successive occurrences.")
1418 (string, bound, noerror, count)
1419 Lisp_Object string, bound, noerror, count;
1420{
b819a390 1421 return search_command (wordify (string), bound, noerror, count, -1, 1, 0);
ca1d1d23
JB
1422}
1423
1424DEFUN ("word-search-forward", Fword_search_forward, Sword_search_forward, 1, 4,
1425 "sWord search: ",
1426 "Search forward from point for STRING, ignoring differences in punctuation.\n\
1427Set point to the end of the occurrence found, and return point.\n\
1428An optional second argument bounds the search; it is a buffer position.\n\
1429The match found must not extend after that position.\n\
1430Optional third argument, if t, means if fail just return nil (no error).\n\
1431 If not nil and not t, move to limit of search and return nil.\n\
1432Optional fourth argument is repeat count--search for successive occurrences.")
1433 (string, bound, noerror, count)
1434 Lisp_Object string, bound, noerror, count;
1435{
b819a390 1436 return search_command (wordify (string), bound, noerror, count, 1, 1, 0);
ca1d1d23
JB
1437}
1438
1439DEFUN ("re-search-backward", Fre_search_backward, Sre_search_backward, 1, 4,
1440 "sRE search backward: ",
1441 "Search backward from point for match for regular expression REGEXP.\n\
1442Set point to the beginning of the match, and return point.\n\
1443The match found is the one starting last in the buffer\n\
19c0a730 1444and yet ending before the origin of the search.\n\
ca1d1d23
JB
1445An optional second argument bounds the search; it is a buffer position.\n\
1446The match found must start at or after that position.\n\
1447Optional third argument, if t, means if fail just return nil (no error).\n\
1448 If not nil and not t, move to limit of search and return nil.\n\
1449Optional fourth argument is repeat count--search for successive occurrences.\n\
1450See also the functions `match-beginning', `match-end' and `replace-match'.")
19c0a730
KH
1451 (regexp, bound, noerror, count)
1452 Lisp_Object regexp, bound, noerror, count;
ca1d1d23 1453{
b819a390 1454 return search_command (regexp, bound, noerror, count, -1, 1, 0);
ca1d1d23
JB
1455}
1456
1457DEFUN ("re-search-forward", Fre_search_forward, Sre_search_forward, 1, 4,
1458 "sRE search: ",
1459 "Search forward from point for regular expression REGEXP.\n\
1460Set point to the end of the occurrence found, and return point.\n\
1461An optional second argument bounds the search; it is a buffer position.\n\
1462The match found must not extend after that position.\n\
1463Optional third argument, if t, means if fail just return nil (no error).\n\
1464 If not nil and not t, move to limit of search and return nil.\n\
1465Optional fourth argument is repeat count--search for successive occurrences.\n\
1466See also the functions `match-beginning', `match-end' and `replace-match'.")
19c0a730
KH
1467 (regexp, bound, noerror, count)
1468 Lisp_Object regexp, bound, noerror, count;
ca1d1d23 1469{
b819a390
RS
1470 return search_command (regexp, bound, noerror, count, 1, 1, 0);
1471}
1472
1473DEFUN ("posix-search-backward", Fposix_search_backward, Sposix_search_backward, 1, 4,
1474 "sPosix search backward: ",
1475 "Search backward from point for match for regular expression REGEXP.\n\
1476Find the longest match in accord with Posix regular expression rules.\n\
1477Set point to the beginning of the match, and return point.\n\
1478The match found is the one starting last in the buffer\n\
1479and yet ending before the origin of the search.\n\
1480An optional second argument bounds the search; it is a buffer position.\n\
1481The match found must start at or after that position.\n\
1482Optional third argument, if t, means if fail just return nil (no error).\n\
1483 If not nil and not t, move to limit of search and return nil.\n\
1484Optional fourth argument is repeat count--search for successive occurrences.\n\
1485See also the functions `match-beginning', `match-end' and `replace-match'.")
1486 (regexp, bound, noerror, count)
1487 Lisp_Object regexp, bound, noerror, count;
1488{
1489 return search_command (regexp, bound, noerror, count, -1, 1, 1);
1490}
1491
1492DEFUN ("posix-search-forward", Fposix_search_forward, Sposix_search_forward, 1, 4,
1493 "sPosix search: ",
1494 "Search forward from point for regular expression REGEXP.\n\
1495Find the longest match in accord with Posix regular expression rules.\n\
1496Set point to the end of the occurrence found, and return point.\n\
1497An optional second argument bounds the search; it is a buffer position.\n\
1498The match found must not extend after that position.\n\
1499Optional third argument, if t, means if fail just return nil (no error).\n\
1500 If not nil and not t, move to limit of search and return nil.\n\
1501Optional fourth argument is repeat count--search for successive occurrences.\n\
1502See also the functions `match-beginning', `match-end' and `replace-match'.")
1503 (regexp, bound, noerror, count)
1504 Lisp_Object regexp, bound, noerror, count;
1505{
1506 return search_command (regexp, bound, noerror, count, 1, 1, 1);
ca1d1d23
JB
1507}
1508\f
d7a5ad5f 1509DEFUN ("replace-match", Freplace_match, Sreplace_match, 1, 5, 0,
ca1d1d23
JB
1510 "Replace text matched by last search with NEWTEXT.\n\
1511If second arg FIXEDCASE is non-nil, do not alter case of replacement text.\n\
5b9cf4b2
RS
1512Otherwise maybe capitalize the whole text, or maybe just word initials,\n\
1513based on the replaced text.\n\
1514If the replaced text has only capital letters\n\
1515and has at least one multiletter word, convert NEWTEXT to all caps.\n\
1516If the replaced text has at least one word starting with a capital letter,\n\
1517then capitalize each word in NEWTEXT.\n\n\
ca1d1d23
JB
1518If third arg LITERAL is non-nil, insert NEWTEXT literally.\n\
1519Otherwise treat `\\' as special:\n\
1520 `\\&' in NEWTEXT means substitute original matched text.\n\
1521 `\\N' means substitute what matched the Nth `\\(...\\)'.\n\
1522 If Nth parens didn't match, substitute nothing.\n\
1523 `\\\\' means insert one `\\'.\n\
1113d9db 1524FIXEDCASE and LITERAL are optional arguments.\n\
080c45fd
RS
1525Leaves point at end of replacement text.\n\
1526\n\
1527The optional fourth argument STRING can be a string to modify.\n\
1528In that case, this function creates and returns a new string\n\
d7a5ad5f
RS
1529which is made by replacing the part of STRING that was matched.\n\
1530\n\
1531The optional fifth argument SUBEXP specifies a subexpression of the match.\n\
1532It says to replace just that subexpression instead of the whole match.\n\
1533This is useful only after a regular expression search or match\n\
1534since only regular expressions have distinguished subexpressions.")
1535 (newtext, fixedcase, literal, string, subexp)
1536 Lisp_Object newtext, fixedcase, literal, string, subexp;
ca1d1d23
JB
1537{
1538 enum { nochange, all_caps, cap_initial } case_action;
1539 register int pos, last;
1540 int some_multiletter_word;
97832bd0 1541 int some_lowercase;
73dc8771 1542 int some_uppercase;
208767c3 1543 int some_nonuppercase_initial;
ca1d1d23
JB
1544 register int c, prevc;
1545 int inslen;
d7a5ad5f 1546 int sub;
ca1d1d23 1547
16fdc568 1548 CHECK_STRING (newtext, 0);
ca1d1d23 1549
080c45fd
RS
1550 if (! NILP (string))
1551 CHECK_STRING (string, 4);
1552
ca1d1d23
JB
1553 case_action = nochange; /* We tried an initialization */
1554 /* but some C compilers blew it */
4746118a
JB
1555
1556 if (search_regs.num_regs <= 0)
1557 error ("replace-match called before any match found");
1558
d7a5ad5f
RS
1559 if (NILP (subexp))
1560 sub = 0;
1561 else
1562 {
1563 CHECK_NUMBER (subexp, 3);
1564 sub = XINT (subexp);
1565 if (sub < 0 || sub >= search_regs.num_regs)
1566 args_out_of_range (subexp, make_number (search_regs.num_regs));
1567 }
1568
080c45fd
RS
1569 if (NILP (string))
1570 {
d7a5ad5f
RS
1571 if (search_regs.start[sub] < BEGV
1572 || search_regs.start[sub] > search_regs.end[sub]
1573 || search_regs.end[sub] > ZV)
1574 args_out_of_range (make_number (search_regs.start[sub]),
1575 make_number (search_regs.end[sub]));
080c45fd
RS
1576 }
1577 else
1578 {
d7a5ad5f
RS
1579 if (search_regs.start[sub] < 0
1580 || search_regs.start[sub] > search_regs.end[sub]
1581 || search_regs.end[sub] > XSTRING (string)->size)
1582 args_out_of_range (make_number (search_regs.start[sub]),
1583 make_number (search_regs.end[sub]));
080c45fd 1584 }
ca1d1d23
JB
1585
1586 if (NILP (fixedcase))
1587 {
1588 /* Decide how to casify by examining the matched text. */
1589
d7a5ad5f 1590 last = search_regs.end[sub];
ca1d1d23
JB
1591 prevc = '\n';
1592 case_action = all_caps;
1593
1594 /* some_multiletter_word is set nonzero if any original word
1595 is more than one letter long. */
1596 some_multiletter_word = 0;
97832bd0 1597 some_lowercase = 0;
208767c3 1598 some_nonuppercase_initial = 0;
73dc8771 1599 some_uppercase = 0;
ca1d1d23 1600
d7a5ad5f 1601 for (pos = search_regs.start[sub]; pos < last; pos++)
ca1d1d23 1602 {
080c45fd
RS
1603 if (NILP (string))
1604 c = FETCH_CHAR (pos);
1605 else
1606 c = XSTRING (string)->data[pos];
1607
ca1d1d23
JB
1608 if (LOWERCASEP (c))
1609 {
1610 /* Cannot be all caps if any original char is lower case */
1611
97832bd0 1612 some_lowercase = 1;
ca1d1d23 1613 if (SYNTAX (prevc) != Sword)
208767c3 1614 some_nonuppercase_initial = 1;
ca1d1d23
JB
1615 else
1616 some_multiletter_word = 1;
1617 }
1618 else if (!NOCASEP (c))
1619 {
73dc8771 1620 some_uppercase = 1;
97832bd0 1621 if (SYNTAX (prevc) != Sword)
c4d460ce 1622 ;
97832bd0 1623 else
ca1d1d23
JB
1624 some_multiletter_word = 1;
1625 }
208767c3
RS
1626 else
1627 {
1628 /* If the initial is a caseless word constituent,
1629 treat that like a lowercase initial. */
1630 if (SYNTAX (prevc) != Sword)
1631 some_nonuppercase_initial = 1;
1632 }
ca1d1d23
JB
1633
1634 prevc = c;
1635 }
1636
97832bd0
RS
1637 /* Convert to all caps if the old text is all caps
1638 and has at least one multiletter word. */
1639 if (! some_lowercase && some_multiletter_word)
1640 case_action = all_caps;
c4d460ce 1641 /* Capitalize each word, if the old text has all capitalized words. */
208767c3 1642 else if (!some_nonuppercase_initial && some_multiletter_word)
ca1d1d23 1643 case_action = cap_initial;
208767c3 1644 else if (!some_nonuppercase_initial && some_uppercase)
73dc8771
KH
1645 /* Should x -> yz, operating on X, give Yz or YZ?
1646 We'll assume the latter. */
1647 case_action = all_caps;
97832bd0
RS
1648 else
1649 case_action = nochange;
ca1d1d23
JB
1650 }
1651
080c45fd
RS
1652 /* Do replacement in a string. */
1653 if (!NILP (string))
1654 {
1655 Lisp_Object before, after;
1656
1657 before = Fsubstring (string, make_number (0),
d7a5ad5f
RS
1658 make_number (search_regs.start[sub]));
1659 after = Fsubstring (string, make_number (search_regs.end[sub]), Qnil);
080c45fd
RS
1660
1661 /* Do case substitution into NEWTEXT if desired. */
1662 if (NILP (literal))
1663 {
1664 int lastpos = -1;
1665 /* We build up the substituted string in ACCUM. */
1666 Lisp_Object accum;
1667 Lisp_Object middle;
1668
1669 accum = Qnil;
1670
1671 for (pos = 0; pos < XSTRING (newtext)->size; pos++)
1672 {
1673 int substart = -1;
1674 int subend;
1e79ec24 1675 int delbackslash = 0;
080c45fd
RS
1676
1677 c = XSTRING (newtext)->data[pos];
1678 if (c == '\\')
1679 {
1680 c = XSTRING (newtext)->data[++pos];
1681 if (c == '&')
1682 {
d7a5ad5f
RS
1683 substart = search_regs.start[sub];
1684 subend = search_regs.end[sub];
080c45fd
RS
1685 }
1686 else if (c >= '1' && c <= '9' && c <= search_regs.num_regs + '0')
1687 {
ad10348f 1688 if (search_regs.start[c - '0'] >= 0)
080c45fd
RS
1689 {
1690 substart = search_regs.start[c - '0'];
1691 subend = search_regs.end[c - '0'];
1692 }
1693 }
1e79ec24
KH
1694 else if (c == '\\')
1695 delbackslash = 1;
080c45fd
RS
1696 }
1697 if (substart >= 0)
1698 {
1699 if (pos - 1 != lastpos + 1)
1e79ec24
KH
1700 middle = Fsubstring (newtext,
1701 make_number (lastpos + 1),
1702 make_number (pos - 1));
080c45fd
RS
1703 else
1704 middle = Qnil;
1705 accum = concat3 (accum, middle,
1706 Fsubstring (string, make_number (substart),
1707 make_number (subend)));
1708 lastpos = pos;
1709 }
1e79ec24
KH
1710 else if (delbackslash)
1711 {
1712 middle = Fsubstring (newtext, make_number (lastpos + 1),
1713 make_number (pos));
1714 accum = concat2 (accum, middle);
1715 lastpos = pos;
1716 }
080c45fd
RS
1717 }
1718
1719 if (pos != lastpos + 1)
1e79ec24
KH
1720 middle = Fsubstring (newtext, make_number (lastpos + 1),
1721 make_number (pos));
080c45fd
RS
1722 else
1723 middle = Qnil;
1724
1725 newtext = concat2 (accum, middle);
1726 }
1727
1728 if (case_action == all_caps)
1729 newtext = Fupcase (newtext);
1730 else if (case_action == cap_initial)
2b2eead9 1731 newtext = Fupcase_initials (newtext);
080c45fd
RS
1732
1733 return concat3 (before, newtext, after);
1734 }
1735
9a76659d
JB
1736 /* We insert the replacement text before the old text, and then
1737 delete the original text. This means that markers at the
1738 beginning or end of the original will float to the corresponding
1739 position in the replacement. */
d7a5ad5f 1740 SET_PT (search_regs.start[sub]);
ca1d1d23 1741 if (!NILP (literal))
16fdc568 1742 Finsert_and_inherit (1, &newtext);
ca1d1d23
JB
1743 else
1744 {
1745 struct gcpro gcpro1;
16fdc568 1746 GCPRO1 (newtext);
ca1d1d23 1747
16fdc568 1748 for (pos = 0; pos < XSTRING (newtext)->size; pos++)
ca1d1d23 1749 {
6ec8bbd2 1750 int offset = PT - search_regs.start[sub];
9a76659d 1751
16fdc568 1752 c = XSTRING (newtext)->data[pos];
ca1d1d23
JB
1753 if (c == '\\')
1754 {
16fdc568 1755 c = XSTRING (newtext)->data[++pos];
ca1d1d23 1756 if (c == '&')
9a76659d
JB
1757 Finsert_buffer_substring
1758 (Fcurrent_buffer (),
d7a5ad5f
RS
1759 make_number (search_regs.start[sub] + offset),
1760 make_number (search_regs.end[sub] + offset));
78445046 1761 else if (c >= '1' && c <= '9' && c <= search_regs.num_regs + '0')
ca1d1d23
JB
1762 {
1763 if (search_regs.start[c - '0'] >= 1)
9a76659d
JB
1764 Finsert_buffer_substring
1765 (Fcurrent_buffer (),
1766 make_number (search_regs.start[c - '0'] + offset),
1767 make_number (search_regs.end[c - '0'] + offset));
ca1d1d23
JB
1768 }
1769 else
1770 insert_char (c);
1771 }
1772 else
1773 insert_char (c);
1774 }
1775 UNGCPRO;
1776 }
1777
6ec8bbd2 1778 inslen = PT - (search_regs.start[sub]);
d7a5ad5f 1779 del_range (search_regs.start[sub] + inslen, search_regs.end[sub] + inslen);
ca1d1d23
JB
1780
1781 if (case_action == all_caps)
6ec8bbd2 1782 Fupcase_region (make_number (PT - inslen), make_number (PT));
ca1d1d23 1783 else if (case_action == cap_initial)
6ec8bbd2 1784 Fupcase_initials_region (make_number (PT - inslen), make_number (PT));
ca1d1d23
JB
1785 return Qnil;
1786}
1787\f
1788static Lisp_Object
1789match_limit (num, beginningp)
1790 Lisp_Object num;
1791 int beginningp;
1792{
1793 register int n;
1794
1795 CHECK_NUMBER (num, 0);
1796 n = XINT (num);
4746118a
JB
1797 if (n < 0 || n >= search_regs.num_regs)
1798 args_out_of_range (num, make_number (search_regs.num_regs));
1799 if (search_regs.num_regs <= 0
1800 || search_regs.start[n] < 0)
ca1d1d23
JB
1801 return Qnil;
1802 return (make_number ((beginningp) ? search_regs.start[n]
1803 : search_regs.end[n]));
1804}
1805
1806DEFUN ("match-beginning", Fmatch_beginning, Smatch_beginning, 1, 1, 0,
1807 "Return position of start of text matched by last search.\n\
5806161b
EN
1808SUBEXP, a number, specifies which parenthesized expression in the last\n\
1809 regexp.\n\
1810Value is nil if SUBEXPth pair didn't match, or there were less than\n\
1811 SUBEXP pairs.\n\
ca1d1d23 1812Zero means the entire text matched by the whole regexp or whole string.")
5806161b
EN
1813 (subexp)
1814 Lisp_Object subexp;
ca1d1d23 1815{
5806161b 1816 return match_limit (subexp, 1);
ca1d1d23
JB
1817}
1818
1819DEFUN ("match-end", Fmatch_end, Smatch_end, 1, 1, 0,
1820 "Return position of end of text matched by last search.\n\
5806161b
EN
1821SUBEXP, a number, specifies which parenthesized expression in the last\n\
1822 regexp.\n\
1823Value is nil if SUBEXPth pair didn't match, or there were less than\n\
1824 SUBEXP pairs.\n\
ca1d1d23 1825Zero means the entire text matched by the whole regexp or whole string.")
5806161b
EN
1826 (subexp)
1827 Lisp_Object subexp;
ca1d1d23 1828{
5806161b 1829 return match_limit (subexp, 0);
ca1d1d23
JB
1830}
1831
56256c2a 1832DEFUN ("match-data", Fmatch_data, Smatch_data, 0, 2, 0,
ca1d1d23
JB
1833 "Return a list containing all info on what the last search matched.\n\
1834Element 2N is `(match-beginning N)'; element 2N + 1 is `(match-end N)'.\n\
1835All the elements are markers or nil (nil if the Nth pair didn't match)\n\
1836if the last match was on a buffer; integers or nil if a string was matched.\n\
56256c2a
RS
1837Use `store-match-data' to reinstate the data in this list.\n\
1838\n\
1839If INTEGERS (the optional first argument) is non-nil, always use integers\n\
1840(rather than markers) to represent buffer positions.\n\
1841If REUSE is a list, reuse it as part of the value. If REUSE is long enough\n\
1842to hold all the values, and if INTEGERS is non-nil, no consing is done.")
1843 (integers, reuse)
1844 Lisp_Object integers, reuse;
ca1d1d23 1845{
56256c2a 1846 Lisp_Object tail, prev;
4746118a 1847 Lisp_Object *data;
ca1d1d23
JB
1848 int i, len;
1849
daa37602 1850 if (NILP (last_thing_searched))
c36bcf1b 1851 return Qnil;
daa37602 1852
4746118a
JB
1853 data = (Lisp_Object *) alloca ((2 * search_regs.num_regs)
1854 * sizeof (Lisp_Object));
1855
ca1d1d23 1856 len = -1;
4746118a 1857 for (i = 0; i < search_regs.num_regs; i++)
ca1d1d23
JB
1858 {
1859 int start = search_regs.start[i];
1860 if (start >= 0)
1861 {
56256c2a
RS
1862 if (EQ (last_thing_searched, Qt)
1863 || ! NILP (integers))
ca1d1d23 1864 {
c235cce7
KH
1865 XSETFASTINT (data[2 * i], start);
1866 XSETFASTINT (data[2 * i + 1], search_regs.end[i]);
ca1d1d23 1867 }
0ed62dc7 1868 else if (BUFFERP (last_thing_searched))
ca1d1d23
JB
1869 {
1870 data[2 * i] = Fmake_marker ();
daa37602
JB
1871 Fset_marker (data[2 * i],
1872 make_number (start),
1873 last_thing_searched);
ca1d1d23
JB
1874 data[2 * i + 1] = Fmake_marker ();
1875 Fset_marker (data[2 * i + 1],
daa37602
JB
1876 make_number (search_regs.end[i]),
1877 last_thing_searched);
ca1d1d23 1878 }
daa37602
JB
1879 else
1880 /* last_thing_searched must always be Qt, a buffer, or Qnil. */
1881 abort ();
1882
ca1d1d23
JB
1883 len = i;
1884 }
1885 else
1886 data[2 * i] = data [2 * i + 1] = Qnil;
1887 }
56256c2a
RS
1888
1889 /* If REUSE is not usable, cons up the values and return them. */
1890 if (! CONSP (reuse))
1891 return Flist (2 * len + 2, data);
1892
1893 /* If REUSE is a list, store as many value elements as will fit
1894 into the elements of REUSE. */
1895 for (i = 0, tail = reuse; CONSP (tail);
1896 i++, tail = XCONS (tail)->cdr)
1897 {
1898 if (i < 2 * len + 2)
1899 XCONS (tail)->car = data[i];
1900 else
1901 XCONS (tail)->car = Qnil;
1902 prev = tail;
1903 }
1904
1905 /* If we couldn't fit all value elements into REUSE,
1906 cons up the rest of them and add them to the end of REUSE. */
1907 if (i < 2 * len + 2)
1908 XCONS (prev)->cdr = Flist (2 * len + 2 - i, data + i);
1909
1910 return reuse;
ca1d1d23
JB
1911}
1912
1913
1914DEFUN ("store-match-data", Fstore_match_data, Sstore_match_data, 1, 1, 0,
1915 "Set internal data on last search match from elements of LIST.\n\
1916LIST should have been created by calling `match-data' previously.")
1917 (list)
1918 register Lisp_Object list;
1919{
1920 register int i;
1921 register Lisp_Object marker;
1922
7074fde6
FP
1923 if (running_asynch_code)
1924 save_search_regs ();
1925
ca1d1d23 1926 if (!CONSP (list) && !NILP (list))
b37902c8 1927 list = wrong_type_argument (Qconsp, list);
ca1d1d23 1928
daa37602
JB
1929 /* Unless we find a marker with a buffer in LIST, assume that this
1930 match data came from a string. */
1931 last_thing_searched = Qt;
1932
4746118a
JB
1933 /* Allocate registers if they don't already exist. */
1934 {
d084e942 1935 int length = XFASTINT (Flength (list)) / 2;
4746118a
JB
1936
1937 if (length > search_regs.num_regs)
1938 {
1113d9db
JB
1939 if (search_regs.num_regs == 0)
1940 {
1941 search_regs.start
1942 = (regoff_t *) xmalloc (length * sizeof (regoff_t));
1943 search_regs.end
1944 = (regoff_t *) xmalloc (length * sizeof (regoff_t));
1945 }
4746118a 1946 else
1113d9db
JB
1947 {
1948 search_regs.start
1949 = (regoff_t *) xrealloc (search_regs.start,
1950 length * sizeof (regoff_t));
1951 search_regs.end
1952 = (regoff_t *) xrealloc (search_regs.end,
1953 length * sizeof (regoff_t));
1954 }
4746118a 1955
487282dc 1956 search_regs.num_regs = length;
4746118a
JB
1957 }
1958 }
1959
1960 for (i = 0; i < search_regs.num_regs; i++)
ca1d1d23
JB
1961 {
1962 marker = Fcar (list);
1963 if (NILP (marker))
1964 {
1965 search_regs.start[i] = -1;
1966 list = Fcdr (list);
1967 }
1968 else
1969 {
0ed62dc7 1970 if (MARKERP (marker))
daa37602
JB
1971 {
1972 if (XMARKER (marker)->buffer == 0)
c235cce7 1973 XSETFASTINT (marker, 0);
daa37602 1974 else
a3668d92 1975 XSETBUFFER (last_thing_searched, XMARKER (marker)->buffer);
daa37602 1976 }
ca1d1d23
JB
1977
1978 CHECK_NUMBER_COERCE_MARKER (marker, 0);
1979 search_regs.start[i] = XINT (marker);
1980 list = Fcdr (list);
1981
1982 marker = Fcar (list);
0ed62dc7 1983 if (MARKERP (marker) && XMARKER (marker)->buffer == 0)
c235cce7 1984 XSETFASTINT (marker, 0);
ca1d1d23
JB
1985
1986 CHECK_NUMBER_COERCE_MARKER (marker, 0);
1987 search_regs.end[i] = XINT (marker);
1988 }
1989 list = Fcdr (list);
1990 }
1991
1992 return Qnil;
1993}
1994
7074fde6
FP
1995/* If non-zero the match data have been saved in saved_search_regs
1996 during the execution of a sentinel or filter. */
75ebf74b 1997static int search_regs_saved;
7074fde6
FP
1998static struct re_registers saved_search_regs;
1999
2000/* Called from Flooking_at, Fstring_match, search_buffer, Fstore_match_data
2001 if asynchronous code (filter or sentinel) is running. */
2002static void
2003save_search_regs ()
2004{
2005 if (!search_regs_saved)
2006 {
2007 saved_search_regs.num_regs = search_regs.num_regs;
2008 saved_search_regs.start = search_regs.start;
2009 saved_search_regs.end = search_regs.end;
2010 search_regs.num_regs = 0;
2d4a771a
RS
2011 search_regs.start = 0;
2012 search_regs.end = 0;
7074fde6
FP
2013
2014 search_regs_saved = 1;
2015 }
2016}
2017
2018/* Called upon exit from filters and sentinels. */
2019void
2020restore_match_data ()
2021{
2022 if (search_regs_saved)
2023 {
2024 if (search_regs.num_regs > 0)
2025 {
2026 xfree (search_regs.start);
2027 xfree (search_regs.end);
2028 }
2029 search_regs.num_regs = saved_search_regs.num_regs;
2030 search_regs.start = saved_search_regs.start;
2031 search_regs.end = saved_search_regs.end;
2032
2033 search_regs_saved = 0;
2034 }
2035}
2036
ca1d1d23
JB
2037/* Quote a string to inactivate reg-expr chars */
2038
2039DEFUN ("regexp-quote", Fregexp_quote, Sregexp_quote, 1, 1, 0,
2040 "Return a regexp string which matches exactly STRING and nothing else.")
5806161b
EN
2041 (string)
2042 Lisp_Object string;
ca1d1d23
JB
2043{
2044 register unsigned char *in, *out, *end;
2045 register unsigned char *temp;
2046
5806161b 2047 CHECK_STRING (string, 0);
ca1d1d23 2048
5806161b 2049 temp = (unsigned char *) alloca (XSTRING (string)->size * 2);
ca1d1d23
JB
2050
2051 /* Now copy the data into the new string, inserting escapes. */
2052
5806161b
EN
2053 in = XSTRING (string)->data;
2054 end = in + XSTRING (string)->size;
ca1d1d23
JB
2055 out = temp;
2056
2057 for (; in != end; in++)
2058 {
2059 if (*in == '[' || *in == ']'
2060 || *in == '*' || *in == '.' || *in == '\\'
2061 || *in == '?' || *in == '+'
2062 || *in == '^' || *in == '$')
2063 *out++ = '\\';
2064 *out++ = *in;
2065 }
2066
2067 return make_string (temp, out - temp);
2068}
2069\f
2070syms_of_search ()
2071{
2072 register int i;
2073
487282dc
KH
2074 for (i = 0; i < REGEXP_CACHE_SIZE; ++i)
2075 {
2076 searchbufs[i].buf.allocated = 100;
2077 searchbufs[i].buf.buffer = (unsigned char *) malloc (100);
2078 searchbufs[i].buf.fastmap = searchbufs[i].fastmap;
2079 searchbufs[i].regexp = Qnil;
2080 staticpro (&searchbufs[i].regexp);
2081 searchbufs[i].next = (i == REGEXP_CACHE_SIZE-1 ? 0 : &searchbufs[i+1]);
2082 }
2083 searchbuf_head = &searchbufs[0];
ca1d1d23
JB
2084
2085 Qsearch_failed = intern ("search-failed");
2086 staticpro (&Qsearch_failed);
2087 Qinvalid_regexp = intern ("invalid-regexp");
2088 staticpro (&Qinvalid_regexp);
2089
2090 Fput (Qsearch_failed, Qerror_conditions,
2091 Fcons (Qsearch_failed, Fcons (Qerror, Qnil)));
2092 Fput (Qsearch_failed, Qerror_message,
2093 build_string ("Search failed"));
2094
2095 Fput (Qinvalid_regexp, Qerror_conditions,
2096 Fcons (Qinvalid_regexp, Fcons (Qerror, Qnil)));
2097 Fput (Qinvalid_regexp, Qerror_message,
2098 build_string ("Invalid regexp"));
2099
daa37602
JB
2100 last_thing_searched = Qnil;
2101 staticpro (&last_thing_searched);
2102
ca1d1d23 2103 defsubr (&Slooking_at);
b819a390
RS
2104 defsubr (&Sposix_looking_at);
2105 defsubr (&Sstring_match);
2106 defsubr (&Sposix_string_match);
ca1d1d23
JB
2107 defsubr (&Sskip_chars_forward);
2108 defsubr (&Sskip_chars_backward);
17431c60
RS
2109 defsubr (&Sskip_syntax_forward);
2110 defsubr (&Sskip_syntax_backward);
ca1d1d23
JB
2111 defsubr (&Ssearch_forward);
2112 defsubr (&Ssearch_backward);
2113 defsubr (&Sword_search_forward);
2114 defsubr (&Sword_search_backward);
2115 defsubr (&Sre_search_forward);
2116 defsubr (&Sre_search_backward);
b819a390
RS
2117 defsubr (&Sposix_search_forward);
2118 defsubr (&Sposix_search_backward);
ca1d1d23
JB
2119 defsubr (&Sreplace_match);
2120 defsubr (&Smatch_beginning);
2121 defsubr (&Smatch_end);
2122 defsubr (&Smatch_data);
2123 defsubr (&Sstore_match_data);
2124 defsubr (&Sregexp_quote);
2125}