(advance_to_char_boundary): New function.
[bpt/emacs.git] / src / search.c
CommitLineData
ca1d1d23 1/* String search routines for GNU Emacs.
31c8f881 2 Copyright (C) 1985, 86, 87, 93, 94, 97, 1998 Free Software Foundation, Inc.
ca1d1d23
JB
3
4This file is part of GNU Emacs.
5
6GNU Emacs is free software; you can redistribute it and/or modify
7it under the terms of the GNU General Public License as published by
7c938215 8the Free Software Foundation; either version 2, or (at your option)
ca1d1d23
JB
9any later version.
10
11GNU Emacs is distributed in the hope that it will be useful,
12but WITHOUT ANY WARRANTY; without even the implied warranty of
13MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14GNU General Public License for more details.
15
16You should have received a copy of the GNU General Public License
17along with GNU Emacs; see the file COPYING. If not, write to
3b7ad313
EN
18the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
19Boston, MA 02111-1307, USA. */
ca1d1d23
JB
20
21
18160b98 22#include <config.h>
ca1d1d23
JB
23#include "lisp.h"
24#include "syntax.h"
5679531d 25#include "category.h"
ca1d1d23 26#include "buffer.h"
5679531d 27#include "charset.h"
9169c321 28#include "region-cache.h"
ca1d1d23 29#include "commands.h"
9ac0d9e0 30#include "blockinput.h"
bf1760bb 31#include "intervals.h"
4746118a 32
ca1d1d23
JB
33#include <sys/types.h>
34#include "regex.h"
35
1d288aef 36#define REGEXP_CACHE_SIZE 20
ca1d1d23 37
487282dc
KH
38/* If the regexp is non-nil, then the buffer contains the compiled form
39 of that regexp, suitable for searching. */
1d288aef
RS
40struct regexp_cache
41{
487282dc
KH
42 struct regexp_cache *next;
43 Lisp_Object regexp;
44 struct re_pattern_buffer buf;
45 char fastmap[0400];
b819a390
RS
46 /* Nonzero means regexp was compiled to do full POSIX backtracking. */
47 char posix;
487282dc 48};
ca1d1d23 49
487282dc
KH
50/* The instances of that struct. */
51struct regexp_cache searchbufs[REGEXP_CACHE_SIZE];
ca1d1d23 52
487282dc
KH
53/* The head of the linked list; points to the most recently used buffer. */
54struct regexp_cache *searchbuf_head;
ca1d1d23 55
ca1d1d23 56
4746118a
JB
57/* Every call to re_match, etc., must pass &search_regs as the regs
58 argument unless you can show it is unnecessary (i.e., if re_match
59 is certainly going to be called again before region-around-match
60 can be called).
61
62 Since the registers are now dynamically allocated, we need to make
63 sure not to refer to the Nth register before checking that it has
1113d9db
JB
64 been allocated by checking search_regs.num_regs.
65
66 The regex code keeps track of whether it has allocated the search
487282dc
KH
67 buffer using bits in the re_pattern_buffer. This means that whenever
68 you compile a new pattern, it completely forgets whether it has
1113d9db
JB
69 allocated any registers, and will allocate new registers the next
70 time you call a searching or matching function. Therefore, we need
71 to call re_set_registers after compiling a new pattern or after
72 setting the match registers, so that the regex functions will be
73 able to free or re-allocate it properly. */
ca1d1d23
JB
74static struct re_registers search_regs;
75
daa37602
JB
76/* The buffer in which the last search was performed, or
77 Qt if the last search was done in a string;
78 Qnil if no searching has been done yet. */
79static Lisp_Object last_thing_searched;
ca1d1d23 80
8e6208c5 81/* error condition signaled when regexp compile_pattern fails */
ca1d1d23
JB
82
83Lisp_Object Qinvalid_regexp;
84
ca325161 85static void set_search_regs ();
044f81f1 86static void save_search_regs ();
ca325161 87
b819a390
RS
88static int search_buffer ();
89
ca1d1d23
JB
90static void
91matcher_overflow ()
92{
93 error ("Stack overflow in regexp matcher");
94}
95
96#ifdef __STDC__
97#define CONST const
98#else
99#define CONST
100#endif
101
b819a390
RS
102/* Compile a regexp and signal a Lisp error if anything goes wrong.
103 PATTERN is the pattern to compile.
104 CP is the place to put the result.
105 TRANSLATE is a translation table for ignoring case, or NULL for none.
106 REGP is the structure that says where to store the "register"
107 values that will result from matching this pattern.
108 If it is 0, we should compile the pattern not to record any
109 subexpression bounds.
110 POSIX is nonzero if we want full backtracking (POSIX style)
5679531d
KH
111 for this pattern. 0 means backtrack only enough to get a valid match.
112 MULTIBYTE is nonzero if we want to handle multibyte characters in
113 PATTERN. 0 means all multibyte characters are recognized just as
114 sequences of binary data. */
ca1d1d23 115
487282dc 116static void
5679531d 117compile_pattern_1 (cp, pattern, translate, regp, posix, multibyte)
487282dc 118 struct regexp_cache *cp;
ca1d1d23 119 Lisp_Object pattern;
b1428bd8 120 Lisp_Object *translate;
487282dc 121 struct re_registers *regp;
b819a390 122 int posix;
5679531d 123 int multibyte;
ca1d1d23 124{
f8bd51c4
KH
125 char *raw_pattern;
126 int raw_pattern_size;
d451e4db 127 char *val;
b819a390 128 reg_syntax_t old;
ca1d1d23 129
f8bd51c4
KH
130 /* MULTIBYTE says whether the text to be searched is multibyte.
131 We must convert PATTERN to match that, or we will not really
132 find things right. */
133
134 if (multibyte == STRING_MULTIBYTE (pattern))
135 {
136 raw_pattern = (char *) XSTRING (pattern)->data;
137 raw_pattern_size = XSTRING (pattern)->size_byte;
138 }
139 else if (multibyte)
140 {
141 raw_pattern_size = count_size_as_multibyte (XSTRING (pattern)->data,
142 XSTRING (pattern)->size);
143 raw_pattern = (char *) alloca (raw_pattern_size + 1);
144 copy_text (XSTRING (pattern)->data, raw_pattern,
145 XSTRING (pattern)->size, 0, 1);
146 }
147 else
148 {
149 /* Converting multibyte to single-byte.
150
151 ??? Perhaps this conversion should be done in a special way
152 by subtracting nonascii-insert-offset from each non-ASCII char,
153 so that only the multibyte chars which really correspond to
154 the chosen single-byte character set can possibly match. */
155 raw_pattern_size = XSTRING (pattern)->size;
156 raw_pattern = (char *) alloca (raw_pattern_size + 1);
157 copy_text (XSTRING (pattern)->data, raw_pattern,
158 XSTRING (pattern)->size, 1, 0);
159 }
160
487282dc
KH
161 cp->regexp = Qnil;
162 cp->buf.translate = translate;
b819a390 163 cp->posix = posix;
5679531d 164 cp->buf.multibyte = multibyte;
9ac0d9e0 165 BLOCK_INPUT;
b819a390
RS
166 old = re_set_syntax (RE_SYNTAX_EMACS
167 | (posix ? 0 : RE_NO_POSIX_BACKTRACKING));
f8bd51c4 168 val = (char *) re_compile_pattern (raw_pattern, raw_pattern_size, &cp->buf);
b819a390 169 re_set_syntax (old);
9ac0d9e0 170 UNBLOCK_INPUT;
ca1d1d23 171 if (val)
487282dc 172 Fsignal (Qinvalid_regexp, Fcons (build_string (val), Qnil));
1113d9db 173
487282dc 174 cp->regexp = Fcopy_sequence (pattern);
487282dc
KH
175}
176
177/* Compile a regexp if necessary, but first check to see if there's one in
b819a390
RS
178 the cache.
179 PATTERN is the pattern to compile.
180 TRANSLATE is a translation table for ignoring case, or NULL for none.
181 REGP is the structure that says where to store the "register"
182 values that will result from matching this pattern.
183 If it is 0, we should compile the pattern not to record any
184 subexpression bounds.
185 POSIX is nonzero if we want full backtracking (POSIX style)
186 for this pattern. 0 means backtrack only enough to get a valid match. */
487282dc
KH
187
188struct re_pattern_buffer *
0c8533c6 189compile_pattern (pattern, regp, translate, posix, multibyte)
487282dc
KH
190 Lisp_Object pattern;
191 struct re_registers *regp;
b1428bd8 192 Lisp_Object *translate;
0c8533c6 193 int posix, multibyte;
487282dc
KH
194{
195 struct regexp_cache *cp, **cpp;
196
197 for (cpp = &searchbuf_head; ; cpp = &cp->next)
198 {
199 cp = *cpp;
1d288aef
RS
200 if (XSTRING (cp->regexp)->size == XSTRING (pattern)->size
201 && !NILP (Fstring_equal (cp->regexp, pattern))
b819a390 202 && cp->buf.translate == translate
5679531d
KH
203 && cp->posix == posix
204 && cp->buf.multibyte == multibyte)
487282dc
KH
205 break;
206
207 /* If we're at the end of the cache, compile into the last cell. */
208 if (cp->next == 0)
209 {
5679531d 210 compile_pattern_1 (cp, pattern, translate, regp, posix, multibyte);
487282dc
KH
211 break;
212 }
213 }
214
215 /* When we get here, cp (aka *cpp) contains the compiled pattern,
216 either because we found it in the cache or because we just compiled it.
217 Move it to the front of the queue to mark it as most recently used. */
218 *cpp = cp->next;
219 cp->next = searchbuf_head;
220 searchbuf_head = cp;
1113d9db 221
6639708c
RS
222 /* Advise the searching functions about the space we have allocated
223 for register data. */
224 if (regp)
225 re_set_registers (&cp->buf, regp, regp->num_regs, regp->start, regp->end);
226
487282dc 227 return &cp->buf;
ca1d1d23
JB
228}
229
230/* Error condition used for failing searches */
231Lisp_Object Qsearch_failed;
232
233Lisp_Object
234signal_failure (arg)
235 Lisp_Object arg;
236{
237 Fsignal (Qsearch_failed, Fcons (arg, Qnil));
238 return Qnil;
239}
240\f
b819a390
RS
241static Lisp_Object
242looking_at_1 (string, posix)
ca1d1d23 243 Lisp_Object string;
b819a390 244 int posix;
ca1d1d23
JB
245{
246 Lisp_Object val;
247 unsigned char *p1, *p2;
248 int s1, s2;
249 register int i;
487282dc 250 struct re_pattern_buffer *bufp;
ca1d1d23 251
7074fde6
FP
252 if (running_asynch_code)
253 save_search_regs ();
254
ca1d1d23 255 CHECK_STRING (string, 0);
487282dc
KH
256 bufp = compile_pattern (string, &search_regs,
257 (!NILP (current_buffer->case_fold_search)
b0eba991 258 ? XCHAR_TABLE (DOWNCASE_TABLE)->contents : 0),
0c8533c6
RS
259 posix,
260 !NILP (current_buffer->enable_multibyte_characters));
ca1d1d23
JB
261
262 immediate_quit = 1;
263 QUIT; /* Do a pending quit right away, to avoid paradoxical behavior */
264
265 /* Get pointers and sizes of the two strings
266 that make up the visible portion of the buffer. */
267
268 p1 = BEGV_ADDR;
fa8ed3e0 269 s1 = GPT_BYTE - BEGV_BYTE;
ca1d1d23 270 p2 = GAP_END_ADDR;
fa8ed3e0 271 s2 = ZV_BYTE - GPT_BYTE;
ca1d1d23
JB
272 if (s1 < 0)
273 {
274 p2 = p1;
fa8ed3e0 275 s2 = ZV_BYTE - BEGV_BYTE;
ca1d1d23
JB
276 s1 = 0;
277 }
278 if (s2 < 0)
279 {
fa8ed3e0 280 s1 = ZV_BYTE - BEGV_BYTE;
ca1d1d23
JB
281 s2 = 0;
282 }
8bb43c28
RS
283
284 re_match_object = Qnil;
ca1d1d23 285
487282dc 286 i = re_match_2 (bufp, (char *) p1, s1, (char *) p2, s2,
fa8ed3e0
RS
287 PT_BYTE - BEGV_BYTE, &search_regs,
288 ZV_BYTE - BEGV_BYTE);
ca1d1d23
JB
289 if (i == -2)
290 matcher_overflow ();
291
292 val = (0 <= i ? Qt : Qnil);
fa8ed3e0
RS
293 if (i >= 0)
294 for (i = 0; i < search_regs.num_regs; i++)
295 if (search_regs.start[i] >= 0)
296 {
297 search_regs.start[i]
298 = BYTE_TO_CHAR (search_regs.start[i] + BEGV_BYTE);
299 search_regs.end[i]
300 = BYTE_TO_CHAR (search_regs.end[i] + BEGV_BYTE);
301 }
a3668d92 302 XSETBUFFER (last_thing_searched, current_buffer);
ca1d1d23
JB
303 immediate_quit = 0;
304 return val;
305}
306
b819a390 307DEFUN ("looking-at", Flooking_at, Slooking_at, 1, 1, 0,
94f94972 308 "Return t if text after point matches regular expression REGEXP.\n\
b819a390
RS
309This function modifies the match data that `match-beginning',\n\
310`match-end' and `match-data' access; save and restore the match\n\
311data if you want to preserve them.")
94f94972
RS
312 (regexp)
313 Lisp_Object regexp;
b819a390 314{
94f94972 315 return looking_at_1 (regexp, 0);
b819a390
RS
316}
317
318DEFUN ("posix-looking-at", Fposix_looking_at, Sposix_looking_at, 1, 1, 0,
94f94972 319 "Return t if text after point matches regular expression REGEXP.\n\
b819a390
RS
320Find the longest match, in accord with Posix regular expression rules.\n\
321This function modifies the match data that `match-beginning',\n\
322`match-end' and `match-data' access; save and restore the match\n\
323data if you want to preserve them.")
94f94972
RS
324 (regexp)
325 Lisp_Object regexp;
b819a390 326{
94f94972 327 return looking_at_1 (regexp, 1);
b819a390
RS
328}
329\f
330static Lisp_Object
331string_match_1 (regexp, string, start, posix)
ca1d1d23 332 Lisp_Object regexp, string, start;
b819a390 333 int posix;
ca1d1d23
JB
334{
335 int val;
487282dc 336 struct re_pattern_buffer *bufp;
0c8533c6
RS
337 int pos, pos_byte;
338 int i;
ca1d1d23 339
7074fde6
FP
340 if (running_asynch_code)
341 save_search_regs ();
342
ca1d1d23
JB
343 CHECK_STRING (regexp, 0);
344 CHECK_STRING (string, 1);
345
346 if (NILP (start))
0c8533c6 347 pos = 0, pos_byte = 0;
ca1d1d23
JB
348 else
349 {
350 int len = XSTRING (string)->size;
351
352 CHECK_NUMBER (start, 2);
0c8533c6
RS
353 pos = XINT (start);
354 if (pos < 0 && -pos <= len)
355 pos = len + pos;
356 else if (0 > pos || pos > len)
ca1d1d23 357 args_out_of_range (string, start);
0c8533c6 358 pos_byte = string_char_to_byte (string, pos);
ca1d1d23
JB
359 }
360
487282dc
KH
361 bufp = compile_pattern (regexp, &search_regs,
362 (!NILP (current_buffer->case_fold_search)
b0eba991 363 ? XCHAR_TABLE (DOWNCASE_TABLE)->contents : 0),
0c8533c6
RS
364 posix,
365 STRING_MULTIBYTE (string));
ca1d1d23 366 immediate_quit = 1;
8bb43c28
RS
367 re_match_object = string;
368
487282dc 369 val = re_search (bufp, (char *) XSTRING (string)->data,
0c8533c6
RS
370 XSTRING (string)->size_byte, pos_byte,
371 XSTRING (string)->size_byte - pos_byte,
ca1d1d23
JB
372 &search_regs);
373 immediate_quit = 0;
daa37602 374 last_thing_searched = Qt;
ca1d1d23
JB
375 if (val == -2)
376 matcher_overflow ();
377 if (val < 0) return Qnil;
0c8533c6
RS
378
379 for (i = 0; i < search_regs.num_regs; i++)
380 if (search_regs.start[i] >= 0)
381 {
382 search_regs.start[i]
383 = string_byte_to_char (string, search_regs.start[i]);
384 search_regs.end[i]
385 = string_byte_to_char (string, search_regs.end[i]);
386 }
387
388 return make_number (string_byte_to_char (string, val));
ca1d1d23 389}
e59a8453 390
b819a390
RS
391DEFUN ("string-match", Fstring_match, Sstring_match, 2, 3, 0,
392 "Return index of start of first match for REGEXP in STRING, or nil.\n\
393If third arg START is non-nil, start search at that index in STRING.\n\
394For index of first char beyond the match, do (match-end 0).\n\
395`match-end' and `match-beginning' also give indices of substrings\n\
396matched by parenthesis constructs in the pattern.")
397 (regexp, string, start)
398 Lisp_Object regexp, string, start;
399{
400 return string_match_1 (regexp, string, start, 0);
401}
402
403DEFUN ("posix-string-match", Fposix_string_match, Sposix_string_match, 2, 3, 0,
404 "Return index of start of first match for REGEXP in STRING, or nil.\n\
405Find the longest match, in accord with Posix regular expression rules.\n\
406If third arg START is non-nil, start search at that index in STRING.\n\
407For index of first char beyond the match, do (match-end 0).\n\
408`match-end' and `match-beginning' also give indices of substrings\n\
409matched by parenthesis constructs in the pattern.")
410 (regexp, string, start)
411 Lisp_Object regexp, string, start;
412{
413 return string_match_1 (regexp, string, start, 1);
414}
415
e59a8453
RS
416/* Match REGEXP against STRING, searching all of STRING,
417 and return the index of the match, or negative on failure.
418 This does not clobber the match data. */
419
420int
421fast_string_match (regexp, string)
422 Lisp_Object regexp, string;
423{
424 int val;
487282dc 425 struct re_pattern_buffer *bufp;
e59a8453 426
0c8533c6 427 bufp = compile_pattern (regexp, 0, 0, 0, STRING_MULTIBYTE (string));
e59a8453 428 immediate_quit = 1;
8bb43c28
RS
429 re_match_object = string;
430
487282dc 431 val = re_search (bufp, (char *) XSTRING (string)->data,
4996330b 432 XSTRING (string)->size_byte, 0, XSTRING (string)->size_byte,
e59a8453
RS
433 0);
434 immediate_quit = 0;
435 return val;
436}
5679531d
KH
437
438/* Match REGEXP against STRING, searching all of STRING ignoring case,
439 and return the index of the match, or negative on failure.
0c8533c6
RS
440 This does not clobber the match data.
441 We assume that STRING contains single-byte characters. */
5679531d
KH
442
443extern Lisp_Object Vascii_downcase_table;
444
445int
b4577c63 446fast_c_string_match_ignore_case (regexp, string)
5679531d
KH
447 Lisp_Object regexp;
448 char *string;
449{
450 int val;
451 struct re_pattern_buffer *bufp;
452 int len = strlen (string);
453
0c8533c6 454 regexp = string_make_unibyte (regexp);
b4577c63 455 re_match_object = Qt;
5679531d 456 bufp = compile_pattern (regexp, 0,
0c8533c6 457 XCHAR_TABLE (Vascii_downcase_table)->contents, 0,
f8bd51c4 458 0);
5679531d
KH
459 immediate_quit = 1;
460 val = re_search (bufp, string, len, 0, len, 0);
461 immediate_quit = 0;
462 return val;
463}
ca1d1d23 464\f
9169c321
JB
465/* max and min. */
466
467static int
468max (a, b)
469 int a, b;
470{
471 return ((a > b) ? a : b);
472}
473
474static int
475min (a, b)
476 int a, b;
477{
478 return ((a < b) ? a : b);
479}
480
481\f
482/* The newline cache: remembering which sections of text have no newlines. */
483
484/* If the user has requested newline caching, make sure it's on.
485 Otherwise, make sure it's off.
486 This is our cheezy way of associating an action with the change of
487 state of a buffer-local variable. */
488static void
489newline_cache_on_off (buf)
490 struct buffer *buf;
491{
492 if (NILP (buf->cache_long_line_scans))
493 {
494 /* It should be off. */
495 if (buf->newline_cache)
496 {
497 free_region_cache (buf->newline_cache);
498 buf->newline_cache = 0;
499 }
500 }
501 else
502 {
503 /* It should be on. */
504 if (buf->newline_cache == 0)
505 buf->newline_cache = new_region_cache ();
506 }
507}
508
509\f
510/* Search for COUNT instances of the character TARGET between START and END.
511
512 If COUNT is positive, search forwards; END must be >= START.
513 If COUNT is negative, search backwards for the -COUNTth instance;
514 END must be <= START.
515 If COUNT is zero, do anything you please; run rogue, for all I care.
516
517 If END is zero, use BEGV or ZV instead, as appropriate for the
518 direction indicated by COUNT.
ffd56f97
JB
519
520 If we find COUNT instances, set *SHORTAGE to zero, and return the
5bfe95c9
RS
521 position after the COUNTth match. Note that for reverse motion
522 this is not the same as the usual convention for Emacs motion commands.
ffd56f97 523
9169c321
JB
524 If we don't find COUNT instances before reaching END, set *SHORTAGE
525 to the number of TARGETs left unfound, and return END.
ffd56f97 526
087a5f81
RS
527 If ALLOW_QUIT is non-zero, set immediate_quit. That's good to do
528 except when inside redisplay. */
529
9169c321
JB
530scan_buffer (target, start, end, count, shortage, allow_quit)
531 register int target;
532 int start, end;
533 int count;
534 int *shortage;
087a5f81 535 int allow_quit;
ca1d1d23 536{
9169c321
JB
537 struct region_cache *newline_cache;
538 int direction;
ffd56f97 539
9169c321
JB
540 if (count > 0)
541 {
542 direction = 1;
543 if (! end) end = ZV;
544 }
545 else
546 {
547 direction = -1;
548 if (! end) end = BEGV;
549 }
ffd56f97 550
9169c321
JB
551 newline_cache_on_off (current_buffer);
552 newline_cache = current_buffer->newline_cache;
ca1d1d23
JB
553
554 if (shortage != 0)
555 *shortage = 0;
556
087a5f81 557 immediate_quit = allow_quit;
ca1d1d23 558
ffd56f97 559 if (count > 0)
9169c321 560 while (start != end)
ca1d1d23 561 {
9169c321
JB
562 /* Our innermost scanning loop is very simple; it doesn't know
563 about gaps, buffer ends, or the newline cache. ceiling is
564 the position of the last character before the next such
565 obstacle --- the last character the dumb search loop should
566 examine. */
fa8ed3e0
RS
567 int ceiling_byte = CHAR_TO_BYTE (end) - 1;
568 int start_byte = CHAR_TO_BYTE (start);
9169c321
JB
569
570 /* If we're looking for a newline, consult the newline cache
571 to see where we can avoid some scanning. */
572 if (target == '\n' && newline_cache)
573 {
574 int next_change;
575 immediate_quit = 0;
576 while (region_cache_forward
fa8ed3e0
RS
577 (current_buffer, newline_cache, start_byte, &next_change))
578 start_byte = next_change;
cbe0db0d 579 immediate_quit = allow_quit;
9169c321 580
fa8ed3e0
RS
581 /* START should never be after END. */
582 if (start_byte > ceiling_byte)
583 start_byte = ceiling_byte;
9169c321
JB
584
585 /* Now the text after start is an unknown region, and
586 next_change is the position of the next known region. */
fa8ed3e0 587 ceiling_byte = min (next_change - 1, ceiling_byte);
9169c321
JB
588 }
589
590 /* The dumb loop can only scan text stored in contiguous
591 bytes. BUFFER_CEILING_OF returns the last character
592 position that is contiguous, so the ceiling is the
593 position after that. */
fa8ed3e0 594 ceiling_byte = min (BUFFER_CEILING_OF (start_byte), ceiling_byte);
9169c321
JB
595
596 {
597 /* The termination address of the dumb loop. */
fa8ed3e0
RS
598 register unsigned char *ceiling_addr
599 = BYTE_POS_ADDR (ceiling_byte) + 1;
600 register unsigned char *cursor
601 = BYTE_POS_ADDR (start_byte);
9169c321
JB
602 unsigned char *base = cursor;
603
604 while (cursor < ceiling_addr)
605 {
606 unsigned char *scan_start = cursor;
607
608 /* The dumb loop. */
609 while (*cursor != target && ++cursor < ceiling_addr)
610 ;
611
612 /* If we're looking for newlines, cache the fact that
613 the region from start to cursor is free of them. */
614 if (target == '\n' && newline_cache)
615 know_region_cache (current_buffer, newline_cache,
fa8ed3e0
RS
616 start_byte + scan_start - base,
617 start_byte + cursor - base);
9169c321
JB
618
619 /* Did we find the target character? */
620 if (cursor < ceiling_addr)
621 {
622 if (--count == 0)
623 {
624 immediate_quit = 0;
fa8ed3e0 625 return BYTE_TO_CHAR (start_byte + cursor - base + 1);
9169c321
JB
626 }
627 cursor++;
628 }
629 }
630
fa8ed3e0 631 start = BYTE_TO_CHAR (start_byte + cursor - base);
9169c321 632 }
ca1d1d23
JB
633 }
634 else
9169c321
JB
635 while (start > end)
636 {
637 /* The last character to check before the next obstacle. */
fa8ed3e0
RS
638 int ceiling_byte = CHAR_TO_BYTE (end);
639 int start_byte = CHAR_TO_BYTE (start);
9169c321
JB
640
641 /* Consult the newline cache, if appropriate. */
642 if (target == '\n' && newline_cache)
643 {
644 int next_change;
645 immediate_quit = 0;
646 while (region_cache_backward
fa8ed3e0
RS
647 (current_buffer, newline_cache, start_byte, &next_change))
648 start_byte = next_change;
cbe0db0d 649 immediate_quit = allow_quit;
9169c321
JB
650
651 /* Start should never be at or before end. */
fa8ed3e0
RS
652 if (start_byte <= ceiling_byte)
653 start_byte = ceiling_byte + 1;
9169c321
JB
654
655 /* Now the text before start is an unknown region, and
656 next_change is the position of the next known region. */
fa8ed3e0 657 ceiling_byte = max (next_change, ceiling_byte);
9169c321
JB
658 }
659
660 /* Stop scanning before the gap. */
fa8ed3e0 661 ceiling_byte = max (BUFFER_FLOOR_OF (start_byte - 1), ceiling_byte);
9169c321
JB
662
663 {
664 /* The termination address of the dumb loop. */
fa8ed3e0
RS
665 register unsigned char *ceiling_addr = BYTE_POS_ADDR (ceiling_byte);
666 register unsigned char *cursor = BYTE_POS_ADDR (start_byte - 1);
9169c321
JB
667 unsigned char *base = cursor;
668
669 while (cursor >= ceiling_addr)
670 {
671 unsigned char *scan_start = cursor;
672
673 while (*cursor != target && --cursor >= ceiling_addr)
674 ;
675
676 /* If we're looking for newlines, cache the fact that
677 the region from after the cursor to start is free of them. */
678 if (target == '\n' && newline_cache)
679 know_region_cache (current_buffer, newline_cache,
fa8ed3e0
RS
680 start_byte + cursor - base,
681 start_byte + scan_start - base);
9169c321
JB
682
683 /* Did we find the target character? */
684 if (cursor >= ceiling_addr)
685 {
686 if (++count >= 0)
687 {
688 immediate_quit = 0;
fa8ed3e0 689 return BYTE_TO_CHAR (start_byte + cursor - base);
9169c321
JB
690 }
691 cursor--;
692 }
693 }
694
fa8ed3e0 695 start = BYTE_TO_CHAR (start_byte + cursor - base);
9169c321
JB
696 }
697 }
698
ca1d1d23
JB
699 immediate_quit = 0;
700 if (shortage != 0)
ffd56f97 701 *shortage = count * direction;
9169c321 702 return start;
ca1d1d23 703}
fa8ed3e0
RS
704\f
705/* Search for COUNT instances of a line boundary, which means either a
706 newline or (if selective display enabled) a carriage return.
707 Start at START. If COUNT is negative, search backwards.
708
709 We report the resulting position by calling TEMP_SET_PT_BOTH.
710
711 If we find COUNT instances. we position after (always after,
712 even if scanning backwards) the COUNTth match, and return 0.
713
714 If we don't find COUNT instances before reaching the end of the
715 buffer (or the beginning, if scanning backwards), we return
716 the number of line boundaries left unfound, and position at
717 the limit we bumped up against.
718
719 If ALLOW_QUIT is non-zero, set immediate_quit. That's good to do
d5d57b92 720 except in special cases. */
ca1d1d23 721
63fa018d 722int
fa8ed3e0
RS
723scan_newline (start, start_byte, limit, limit_byte, count, allow_quit)
724 int start, start_byte;
725 int limit, limit_byte;
726 register int count;
727 int allow_quit;
63fa018d 728{
fa8ed3e0
RS
729 int direction = ((count > 0) ? 1 : -1);
730
731 register unsigned char *cursor;
732 unsigned char *base;
733
734 register int ceiling;
735 register unsigned char *ceiling_addr;
736
d5d57b92
RS
737 int old_immediate_quit = immediate_quit;
738
fa8ed3e0
RS
739 /* If we are not in selective display mode,
740 check only for newlines. */
741 int selective_display = (!NILP (current_buffer->selective_display)
742 && !INTEGERP (current_buffer->selective_display));
743
744 /* The code that follows is like scan_buffer
745 but checks for either newline or carriage return. */
746
d5d57b92
RS
747 if (allow_quit)
748 immediate_quit++;
fa8ed3e0
RS
749
750 start_byte = CHAR_TO_BYTE (start);
751
752 if (count > 0)
753 {
754 while (start_byte < limit_byte)
755 {
756 ceiling = BUFFER_CEILING_OF (start_byte);
757 ceiling = min (limit_byte - 1, ceiling);
758 ceiling_addr = BYTE_POS_ADDR (ceiling) + 1;
759 base = (cursor = BYTE_POS_ADDR (start_byte));
760 while (1)
761 {
762 while (*cursor != '\n' && ++cursor != ceiling_addr)
763 ;
764
765 if (cursor != ceiling_addr)
766 {
767 if (--count == 0)
768 {
d5d57b92 769 immediate_quit = old_immediate_quit;
fa8ed3e0
RS
770 start_byte = start_byte + cursor - base + 1;
771 start = BYTE_TO_CHAR (start_byte);
772 TEMP_SET_PT_BOTH (start, start_byte);
773 return 0;
774 }
775 else
776 if (++cursor == ceiling_addr)
777 break;
778 }
779 else
780 break;
781 }
782 start_byte += cursor - base;
783 }
784 }
785 else
786 {
787 int start_byte = CHAR_TO_BYTE (start);
788 while (start_byte > limit_byte)
789 {
790 ceiling = BUFFER_FLOOR_OF (start_byte - 1);
791 ceiling = max (limit_byte, ceiling);
792 ceiling_addr = BYTE_POS_ADDR (ceiling) - 1;
793 base = (cursor = BYTE_POS_ADDR (start_byte - 1) + 1);
794 while (1)
795 {
796 while (--cursor != ceiling_addr && *cursor != '\n')
797 ;
798
799 if (cursor != ceiling_addr)
800 {
801 if (++count == 0)
802 {
d5d57b92 803 immediate_quit = old_immediate_quit;
fa8ed3e0
RS
804 /* Return the position AFTER the match we found. */
805 start_byte = start_byte + cursor - base + 1;
806 start = BYTE_TO_CHAR (start_byte);
807 TEMP_SET_PT_BOTH (start, start_byte);
808 return 0;
809 }
810 }
811 else
812 break;
813 }
814 /* Here we add 1 to compensate for the last decrement
815 of CURSOR, which took it past the valid range. */
816 start_byte += cursor - base + 1;
817 }
818 }
819
820 TEMP_SET_PT_BOTH (limit, limit_byte);
d5d57b92 821 immediate_quit = old_immediate_quit;
fa8ed3e0
RS
822
823 return count * direction;
63fa018d
RS
824}
825
ca1d1d23 826int
fa8ed3e0 827find_next_newline_no_quit (from, cnt)
ca1d1d23
JB
828 register int from, cnt;
829{
fa8ed3e0 830 return scan_buffer ('\n', from, 0, cnt, (int *) 0, 0);
9169c321
JB
831}
832
9169c321
JB
833/* Like find_next_newline, but returns position before the newline,
834 not after, and only search up to TO. This isn't just
835 find_next_newline (...)-1, because you might hit TO. */
fa8ed3e0 836
9169c321
JB
837int
838find_before_next_newline (from, to, cnt)
cbe0db0d 839 int from, to, cnt;
9169c321
JB
840{
841 int shortage;
842 int pos = scan_buffer ('\n', from, to, cnt, &shortage, 1);
843
844 if (shortage == 0)
845 pos--;
846
847 return pos;
ca1d1d23
JB
848}
849\f
ca1d1d23
JB
850/* Subroutines of Lisp buffer search functions. */
851
852static Lisp_Object
b819a390 853search_command (string, bound, noerror, count, direction, RE, posix)
ca1d1d23
JB
854 Lisp_Object string, bound, noerror, count;
855 int direction;
856 int RE;
b819a390 857 int posix;
ca1d1d23
JB
858{
859 register int np;
9f43ad85 860 int lim, lim_byte;
ca1d1d23
JB
861 int n = direction;
862
863 if (!NILP (count))
864 {
865 CHECK_NUMBER (count, 3);
866 n *= XINT (count);
867 }
868
869 CHECK_STRING (string, 0);
870 if (NILP (bound))
9f43ad85
RS
871 {
872 if (n > 0)
873 lim = ZV, lim_byte = ZV_BYTE;
874 else
875 lim = BEGV, lim_byte = BEGV_BYTE;
876 }
ca1d1d23
JB
877 else
878 {
879 CHECK_NUMBER_COERCE_MARKER (bound, 1);
880 lim = XINT (bound);
9f43ad85 881 lim_byte = CHAR_TO_BYTE (lim);
6ec8bbd2 882 if (n > 0 ? lim < PT : lim > PT)
ca1d1d23
JB
883 error ("Invalid search bound (wrong side of point)");
884 if (lim > ZV)
9f43ad85 885 lim = ZV, lim_byte = ZV_BYTE;
ca1d1d23 886 if (lim < BEGV)
9f43ad85 887 lim = BEGV, lim_byte = BEGV_BYTE;
ca1d1d23
JB
888 }
889
9f43ad85 890 np = search_buffer (string, PT, PT_BYTE, lim, lim_byte, n, RE,
ca1d1d23 891 (!NILP (current_buffer->case_fold_search)
b1428bd8
RS
892 ? XCHAR_TABLE (current_buffer->case_canon_table)->contents
893 : 0),
ca1d1d23 894 (!NILP (current_buffer->case_fold_search)
b1428bd8
RS
895 ? XCHAR_TABLE (current_buffer->case_eqv_table)->contents
896 : 0),
b819a390 897 posix);
ca1d1d23
JB
898 if (np <= 0)
899 {
900 if (NILP (noerror))
901 return signal_failure (string);
902 if (!EQ (noerror, Qt))
903 {
904 if (lim < BEGV || lim > ZV)
905 abort ();
9f43ad85 906 SET_PT_BOTH (lim, lim_byte);
a5f217b8
RS
907 return Qnil;
908#if 0 /* This would be clean, but maybe programs depend on
909 a value of nil here. */
481399bf 910 np = lim;
a5f217b8 911#endif
ca1d1d23 912 }
481399bf
RS
913 else
914 return Qnil;
ca1d1d23
JB
915 }
916
917 if (np < BEGV || np > ZV)
918 abort ();
919
920 SET_PT (np);
921
922 return make_number (np);
923}
924\f
fa8ed3e0
RS
925/* Return 1 if REGEXP it matches just one constant string. */
926
b6d6a51c
KH
927static int
928trivial_regexp_p (regexp)
929 Lisp_Object regexp;
930{
4996330b 931 int len = XSTRING (regexp)->size_byte;
b6d6a51c
KH
932 unsigned char *s = XSTRING (regexp)->data;
933 unsigned char c;
934 while (--len >= 0)
935 {
936 switch (*s++)
937 {
938 case '.': case '*': case '+': case '?': case '[': case '^': case '$':
939 return 0;
940 case '\\':
941 if (--len < 0)
942 return 0;
943 switch (*s++)
944 {
945 case '|': case '(': case ')': case '`': case '\'': case 'b':
946 case 'B': case '<': case '>': case 'w': case 'W': case 's':
866f60fd 947 case 'S': case '=':
5679531d 948 case 'c': case 'C': /* for categoryspec and notcategoryspec */
866f60fd 949 case '1': case '2': case '3': case '4': case '5':
b6d6a51c
KH
950 case '6': case '7': case '8': case '9':
951 return 0;
952 }
953 }
954 }
955 return 1;
956}
957
ca325161 958/* Search for the n'th occurrence of STRING in the current buffer,
ca1d1d23 959 starting at position POS and stopping at position LIM,
b819a390 960 treating STRING as a literal string if RE is false or as
ca1d1d23
JB
961 a regular expression if RE is true.
962
963 If N is positive, searching is forward and LIM must be greater than POS.
964 If N is negative, searching is backward and LIM must be less than POS.
965
966 Returns -x if only N-x occurrences found (x > 0),
967 or else the position at the beginning of the Nth occurrence
b819a390
RS
968 (if searching backward) or the end (if searching forward).
969
970 POSIX is nonzero if we want full backtracking (POSIX style)
971 for this pattern. 0 means backtrack only enough to get a valid match. */
ca1d1d23 972
b819a390 973static int
9f43ad85
RS
974search_buffer (string, pos, pos_byte, lim, lim_byte, n,
975 RE, trt, inverse_trt, posix)
ca1d1d23
JB
976 Lisp_Object string;
977 int pos;
9f43ad85 978 int pos_byte;
ca1d1d23 979 int lim;
9f43ad85 980 int lim_byte;
ca1d1d23
JB
981 int n;
982 int RE;
b1428bd8
RS
983 Lisp_Object *trt;
984 Lisp_Object *inverse_trt;
b819a390 985 int posix;
ca1d1d23
JB
986{
987 int len = XSTRING (string)->size;
0c8533c6 988 int len_byte = XSTRING (string)->size_byte;
ca1d1d23
JB
989 unsigned char *base_pat = XSTRING (string)->data;
990 register int *BM_tab;
991 int *BM_tab_base;
992 register int direction = ((n > 0) ? 1 : -1);
993 register int dirlen;
994 int infinity, limit, k, stride_for_teases;
995 register unsigned char *pat, *cursor, *p_limit;
996 register int i, j;
997 unsigned char *p1, *p2;
998 int s1, s2;
999
7074fde6
FP
1000 if (running_asynch_code)
1001 save_search_regs ();
1002
ca1d1d23 1003 /* Null string is found at starting position. */
3f57a499 1004 if (len == 0)
ca325161
RS
1005 {
1006 set_search_regs (pos, 0);
1007 return pos;
1008 }
3f57a499
RS
1009
1010 /* Searching 0 times means don't move. */
1011 if (n == 0)
ca1d1d23
JB
1012 return pos;
1013
b6d6a51c 1014 if (RE && !trivial_regexp_p (string))
ca1d1d23 1015 {
487282dc
KH
1016 struct re_pattern_buffer *bufp;
1017
0c8533c6
RS
1018 bufp = compile_pattern (string, &search_regs, trt, posix,
1019 !NILP (current_buffer->enable_multibyte_characters));
ca1d1d23 1020
ca1d1d23
JB
1021 immediate_quit = 1; /* Quit immediately if user types ^G,
1022 because letting this function finish
1023 can take too long. */
1024 QUIT; /* Do a pending quit right away,
1025 to avoid paradoxical behavior */
1026 /* Get pointers and sizes of the two strings
1027 that make up the visible portion of the buffer. */
1028
1029 p1 = BEGV_ADDR;
fa8ed3e0 1030 s1 = GPT_BYTE - BEGV_BYTE;
ca1d1d23 1031 p2 = GAP_END_ADDR;
fa8ed3e0 1032 s2 = ZV_BYTE - GPT_BYTE;
ca1d1d23
JB
1033 if (s1 < 0)
1034 {
1035 p2 = p1;
fa8ed3e0 1036 s2 = ZV_BYTE - BEGV_BYTE;
ca1d1d23
JB
1037 s1 = 0;
1038 }
1039 if (s2 < 0)
1040 {
fa8ed3e0 1041 s1 = ZV_BYTE - BEGV_BYTE;
ca1d1d23
JB
1042 s2 = 0;
1043 }
8bb43c28
RS
1044 re_match_object = Qnil;
1045
ca1d1d23
JB
1046 while (n < 0)
1047 {
42db823b 1048 int val;
487282dc 1049 val = re_search_2 (bufp, (char *) p1, s1, (char *) p2, s2,
4996330b
KH
1050 pos_byte - BEGV_BYTE, lim_byte - pos_byte,
1051 &search_regs,
42db823b 1052 /* Don't allow match past current point */
4996330b 1053 pos_byte - BEGV_BYTE);
ca1d1d23 1054 if (val == -2)
b6d6a51c
KH
1055 {
1056 matcher_overflow ();
1057 }
ca1d1d23
JB
1058 if (val >= 0)
1059 {
4746118a 1060 for (i = 0; i < search_regs.num_regs; i++)
ca1d1d23
JB
1061 if (search_regs.start[i] >= 0)
1062 {
fa8ed3e0
RS
1063 search_regs.start[i]
1064 = BYTE_TO_CHAR (search_regs.start[i] + BEGV_BYTE);
1065 search_regs.end[i]
1066 = BYTE_TO_CHAR (search_regs.end[i] + BEGV_BYTE);
ca1d1d23 1067 }
a3668d92 1068 XSETBUFFER (last_thing_searched, current_buffer);
ca1d1d23
JB
1069 /* Set pos to the new position. */
1070 pos = search_regs.start[0];
1071 }
1072 else
1073 {
1074 immediate_quit = 0;
1075 return (n);
1076 }
1077 n++;
1078 }
1079 while (n > 0)
1080 {
42db823b 1081 int val;
487282dc 1082 val = re_search_2 (bufp, (char *) p1, s1, (char *) p2, s2,
4996330b
KH
1083 pos_byte - BEGV_BYTE, lim_byte - pos_byte,
1084 &search_regs,
1085 lim_byte - BEGV_BYTE);
ca1d1d23 1086 if (val == -2)
b6d6a51c
KH
1087 {
1088 matcher_overflow ();
1089 }
ca1d1d23
JB
1090 if (val >= 0)
1091 {
4746118a 1092 for (i = 0; i < search_regs.num_regs; i++)
ca1d1d23
JB
1093 if (search_regs.start[i] >= 0)
1094 {
fa8ed3e0
RS
1095 search_regs.start[i]
1096 = BYTE_TO_CHAR (search_regs.start[i] + BEGV_BYTE);
1097 search_regs.end[i]
1098 = BYTE_TO_CHAR (search_regs.end[i] + BEGV_BYTE);
ca1d1d23 1099 }
a3668d92 1100 XSETBUFFER (last_thing_searched, current_buffer);
ca1d1d23
JB
1101 pos = search_regs.end[0];
1102 }
1103 else
1104 {
1105 immediate_quit = 0;
1106 return (0 - n);
1107 }
1108 n--;
1109 }
1110 immediate_quit = 0;
1111 return (pos);
1112 }
1113 else /* non-RE case */
1114 {
1115#ifdef C_ALLOCA
1116 int BM_tab_space[0400];
1117 BM_tab = &BM_tab_space[0];
1118#else
1119 BM_tab = (int *) alloca (0400 * sizeof (int));
1120#endif
b6d6a51c 1121 {
f8bd51c4
KH
1122 unsigned char *raw_pattern;
1123 int raw_pattern_size;
1124 unsigned char *patbuf;
1125 int multibyte = !NILP (current_buffer->enable_multibyte_characters);
1126
1127 /* MULTIBYTE says whether the text to be searched is multibyte.
1128 We must convert PATTERN to match that, or we will not really
1129 find things right. */
1130
1131 if (multibyte == STRING_MULTIBYTE (string))
1132 {
1133 raw_pattern = (char *) XSTRING (string)->data;
1134 raw_pattern_size = XSTRING (string)->size_byte;
1135 }
1136 else if (multibyte)
1137 {
1138 raw_pattern_size = count_size_as_multibyte (XSTRING (string)->data,
1139 XSTRING (string)->size);
1140 raw_pattern = (char *) alloca (raw_pattern_size + 1);
1141 copy_text (XSTRING (string)->data, raw_pattern,
1142 XSTRING (string)->size, 0, 1);
1143 }
1144 else
1145 {
1146 /* Converting multibyte to single-byte.
1147
1148 ??? Perhaps this conversion should be done in a special way
1149 by subtracting nonascii-insert-offset from each non-ASCII char,
1150 so that only the multibyte chars which really correspond to
1151 the chosen single-byte character set can possibly match. */
1152 raw_pattern_size = XSTRING (string)->size;
1153 raw_pattern = (char *) alloca (raw_pattern_size + 1);
1154 copy_text (XSTRING (string)->data, raw_pattern,
1155 XSTRING (string)->size, 1, 0);
1156 }
1157
1158 len_byte = raw_pattern_size;
1159 patbuf = (unsigned char *) alloca (len_byte);
b6d6a51c 1160 pat = patbuf;
f8bd51c4 1161 base_pat = raw_pattern;
0c8533c6 1162 while (--len_byte >= 0)
b6d6a51c
KH
1163 {
1164 /* If we got here and the RE flag is set, it's because we're
1165 dealing with a regexp known to be trivial, so the backslash
1166 just quotes the next character. */
1167 if (RE && *base_pat == '\\')
1168 {
0c8533c6 1169 len_byte--;
b6d6a51c
KH
1170 base_pat++;
1171 }
2fd7a4a4 1172 *pat++ = (trt ? XINT (trt[*base_pat++]) : *base_pat++);
b6d6a51c 1173 }
0c8533c6 1174 len_byte = pat - patbuf;
b6d6a51c
KH
1175 pat = base_pat = patbuf;
1176 }
ca1d1d23
JB
1177 /* The general approach is that we are going to maintain that we know */
1178 /* the first (closest to the present position, in whatever direction */
1179 /* we're searching) character that could possibly be the last */
1180 /* (furthest from present position) character of a valid match. We */
1181 /* advance the state of our knowledge by looking at that character */
1182 /* and seeing whether it indeed matches the last character of the */
1183 /* pattern. If it does, we take a closer look. If it does not, we */
1184 /* move our pointer (to putative last characters) as far as is */
1185 /* logically possible. This amount of movement, which I call a */
1186 /* stride, will be the length of the pattern if the actual character */
1187 /* appears nowhere in the pattern, otherwise it will be the distance */
1188 /* from the last occurrence of that character to the end of the */
1189 /* pattern. */
1190 /* As a coding trick, an enormous stride is coded into the table for */
1191 /* characters that match the last character. This allows use of only */
1192 /* a single test, a test for having gone past the end of the */
1193 /* permissible match region, to test for both possible matches (when */
1194 /* the stride goes past the end immediately) and failure to */
1195 /* match (where you get nudged past the end one stride at a time). */
1196
1197 /* Here we make a "mickey mouse" BM table. The stride of the search */
1198 /* is determined only by the last character of the putative match. */
1199 /* If that character does not match, we will stride the proper */
1200 /* distance to propose a match that superimposes it on the last */
1201 /* instance of a character that matches it (per trt), or misses */
1202 /* it entirely if there is none. */
1203
0c8533c6
RS
1204 dirlen = len_byte * direction;
1205 infinity = dirlen - (lim_byte + pos_byte + len_byte + len_byte) * direction;
ca1d1d23 1206 if (direction < 0)
0c8533c6 1207 pat = (base_pat += len_byte - 1);
ca1d1d23
JB
1208 BM_tab_base = BM_tab;
1209 BM_tab += 0400;
1210 j = dirlen; /* to get it in a register */
1211 /* A character that does not appear in the pattern induces a */
1212 /* stride equal to the pattern length. */
1213 while (BM_tab_base != BM_tab)
1214 {
1215 *--BM_tab = j;
1216 *--BM_tab = j;
1217 *--BM_tab = j;
1218 *--BM_tab = j;
1219 }
1220 i = 0;
1221 while (i != infinity)
1222 {
1223 j = pat[i]; i += direction;
1224 if (i == dirlen) i = infinity;
8d505039 1225 if (trt != 0)
ca1d1d23 1226 {
2fd7a4a4 1227 k = (j = XINT (trt[j]));
ca1d1d23
JB
1228 if (i == infinity)
1229 stride_for_teases = BM_tab[j];
1230 BM_tab[j] = dirlen - i;
1231 /* A translation table is accompanied by its inverse -- see */
1232 /* comment following downcase_table for details */
2fd7a4a4 1233 while ((j = (unsigned char) XINT (inverse_trt[j])) != k)
ca1d1d23
JB
1234 BM_tab[j] = dirlen - i;
1235 }
1236 else
1237 {
1238 if (i == infinity)
1239 stride_for_teases = BM_tab[j];
1240 BM_tab[j] = dirlen - i;
1241 }
1242 /* stride_for_teases tells how much to stride if we get a */
1243 /* match on the far character but are subsequently */
1244 /* disappointed, by recording what the stride would have been */
1245 /* for that character if the last character had been */
1246 /* different. */
1247 }
1248 infinity = dirlen - infinity;
fa8ed3e0
RS
1249 pos_byte += dirlen - ((direction > 0) ? direction : 0);
1250 /* loop invariant - POS_BYTE points at where last char (first
1251 char if reverse) of pattern would align in a possible match. */
ca1d1d23
JB
1252 while (n != 0)
1253 {
b2c71fb4
KH
1254 /* It's been reported that some (broken) compiler thinks that
1255 Boolean expressions in an arithmetic context are unsigned.
1256 Using an explicit ?1:0 prevents this. */
fa8ed3e0
RS
1257 if ((lim_byte - pos_byte - ((direction > 0) ? 1 : 0)) * direction
1258 < 0)
ca1d1d23
JB
1259 return (n * (0 - direction));
1260 /* First we do the part we can by pointers (maybe nothing) */
1261 QUIT;
1262 pat = base_pat;
fa8ed3e0 1263 limit = pos_byte - dirlen + direction;
ca1d1d23
JB
1264 limit = ((direction > 0)
1265 ? BUFFER_CEILING_OF (limit)
1266 : BUFFER_FLOOR_OF (limit));
fa8ed3e0
RS
1267 /* LIMIT is now the last (not beyond-last!) value POS_BYTE
1268 can take on without hitting edge of buffer or the gap. */
ca1d1d23 1269 limit = ((direction > 0)
fa8ed3e0
RS
1270 ? min (lim_byte - 1, min (limit, pos_byte + 20000))
1271 : max (lim_byte, max (limit, pos_byte - 20000)));
1272 if ((limit - pos_byte) * direction > 20)
ca1d1d23 1273 {
fa8ed3e0
RS
1274 p_limit = BYTE_POS_ADDR (limit);
1275 p2 = (cursor = BYTE_POS_ADDR (pos_byte));
ca1d1d23
JB
1276 /* In this loop, pos + cursor - p2 is the surrogate for pos */
1277 while (1) /* use one cursor setting as long as i can */
1278 {
1279 if (direction > 0) /* worth duplicating */
1280 {
1281 /* Use signed comparison if appropriate
1282 to make cursor+infinity sure to be > p_limit.
1283 Assuming that the buffer lies in a range of addresses
1284 that are all "positive" (as ints) or all "negative",
1285 either kind of comparison will work as long
1286 as we don't step by infinity. So pick the kind
1287 that works when we do step by infinity. */
8d505039 1288 if ((EMACS_INT) (p_limit + infinity) > (EMACS_INT) p_limit)
9fa17f93 1289 while ((EMACS_INT) cursor <= (EMACS_INT) p_limit)
ca1d1d23
JB
1290 cursor += BM_tab[*cursor];
1291 else
45b248b4 1292 while ((EMACS_UINT) cursor <= (EMACS_UINT) p_limit)
ca1d1d23
JB
1293 cursor += BM_tab[*cursor];
1294 }
1295 else
1296 {
8d505039
RS
1297 if ((EMACS_INT) (p_limit + infinity) < (EMACS_INT) p_limit)
1298 while ((EMACS_INT) cursor >= (EMACS_INT) p_limit)
ca1d1d23
JB
1299 cursor += BM_tab[*cursor];
1300 else
45b248b4 1301 while ((EMACS_UINT) cursor >= (EMACS_UINT) p_limit)
ca1d1d23
JB
1302 cursor += BM_tab[*cursor];
1303 }
1304/* If you are here, cursor is beyond the end of the searched region. */
1305 /* This can happen if you match on the far character of the pattern, */
1306 /* because the "stride" of that character is infinity, a number able */
1307 /* to throw you well beyond the end of the search. It can also */
1308 /* happen if you fail to match within the permitted region and would */
1309 /* otherwise try a character beyond that region */
0c8533c6 1310 if ((cursor - p_limit) * direction <= len_byte)
ca1d1d23
JB
1311 break; /* a small overrun is genuine */
1312 cursor -= infinity; /* large overrun = hit */
1313 i = dirlen - direction;
8d505039 1314 if (trt != 0)
ca1d1d23
JB
1315 {
1316 while ((i -= direction) + direction != 0)
2fd7a4a4 1317 if (pat[i] != XINT (trt[*(cursor -= direction)]))
ca1d1d23
JB
1318 break;
1319 }
1320 else
1321 {
1322 while ((i -= direction) + direction != 0)
1323 if (pat[i] != *(cursor -= direction))
1324 break;
1325 }
1326 cursor += dirlen - i - direction; /* fix cursor */
1327 if (i + direction == 0)
1328 {
0c8533c6
RS
1329 int position;
1330
ca1d1d23 1331 cursor -= direction;
1113d9db 1332
0c8533c6
RS
1333 position = pos_byte + cursor - p2 + ((direction > 0)
1334 ? 1 - len_byte : 0);
1335 set_search_regs (position, len_byte);
ca325161 1336
ca1d1d23
JB
1337 if ((n -= direction) != 0)
1338 cursor += dirlen; /* to resume search */
1339 else
1340 return ((direction > 0)
1341 ? search_regs.end[0] : search_regs.start[0]);
1342 }
1343 else
1344 cursor += stride_for_teases; /* <sigh> we lose - */
1345 }
fa8ed3e0 1346 pos_byte += cursor - p2;
ca1d1d23
JB
1347 }
1348 else
1349 /* Now we'll pick up a clump that has to be done the hard */
1350 /* way because it covers a discontinuity */
1351 {
1352 limit = ((direction > 0)
fa8ed3e0
RS
1353 ? BUFFER_CEILING_OF (pos_byte - dirlen + 1)
1354 : BUFFER_FLOOR_OF (pos_byte - dirlen - 1));
ca1d1d23 1355 limit = ((direction > 0)
0c8533c6
RS
1356 ? min (limit + len_byte, lim_byte - 1)
1357 : max (limit - len_byte, lim_byte));
fa8ed3e0 1358 /* LIMIT is now the last value POS_BYTE can have
ca1d1d23
JB
1359 and still be valid for a possible match. */
1360 while (1)
1361 {
1362 /* This loop can be coded for space rather than */
1363 /* speed because it will usually run only once. */
1364 /* (the reach is at most len + 21, and typically */
1365 /* does not exceed len) */
fa8ed3e0
RS
1366 while ((limit - pos_byte) * direction >= 0)
1367 pos_byte += BM_tab[FETCH_BYTE (pos_byte)];
ca1d1d23 1368 /* now run the same tests to distinguish going off the */
eb8c3be9 1369 /* end, a match or a phony match. */
0c8533c6 1370 if ((pos_byte - limit) * direction <= len_byte)
ca1d1d23
JB
1371 break; /* ran off the end */
1372 /* Found what might be a match.
fa8ed3e0
RS
1373 Set POS_BYTE back to last (first if reverse) pos. */
1374 pos_byte -= infinity;
ca1d1d23
JB
1375 i = dirlen - direction;
1376 while ((i -= direction) + direction != 0)
1377 {
fa8ed3e0 1378 pos_byte -= direction;
8d505039 1379 if (pat[i] != (trt != 0
fa8ed3e0
RS
1380 ? XINT (trt[FETCH_BYTE (pos_byte)])
1381 : FETCH_BYTE (pos_byte)))
ca1d1d23
JB
1382 break;
1383 }
fa8ed3e0
RS
1384 /* Above loop has moved POS_BYTE part or all the way
1385 back to the first pos (last pos if reverse).
ca1d1d23 1386 Set it once again at the last (first if reverse) char. */
fa8ed3e0 1387 pos_byte += dirlen - i- direction;
ca1d1d23
JB
1388 if (i + direction == 0)
1389 {
0c8533c6 1390 int position;
fa8ed3e0 1391 pos_byte -= direction;
1113d9db 1392
0c8533c6
RS
1393 position = pos_byte + ((direction > 0) ? 1 - len_byte : 0);
1394
1395 set_search_regs (position, len_byte);
ca325161 1396
ca1d1d23 1397 if ((n -= direction) != 0)
fa8ed3e0 1398 pos_byte += dirlen; /* to resume search */
ca1d1d23
JB
1399 else
1400 return ((direction > 0)
1401 ? search_regs.end[0] : search_regs.start[0]);
1402 }
1403 else
fa8ed3e0 1404 pos_byte += stride_for_teases;
ca1d1d23
JB
1405 }
1406 }
1407 /* We have done one clump. Can we continue? */
fa8ed3e0 1408 if ((lim_byte - pos_byte) * direction < 0)
ca1d1d23
JB
1409 return ((0 - n) * direction);
1410 }
fa8ed3e0 1411 return BYTE_TO_CHAR (pos_byte);
ca1d1d23
JB
1412 }
1413}
ca325161 1414
fa8ed3e0 1415/* Record beginning BEG_BYTE and end BEG_BYTE + NBYTES
ca325161
RS
1416 for a match just found in the current buffer. */
1417
1418static void
fa8ed3e0
RS
1419set_search_regs (beg_byte, nbytes)
1420 int beg_byte, nbytes;
ca325161
RS
1421{
1422 /* Make sure we have registers in which to store
1423 the match position. */
1424 if (search_regs.num_regs == 0)
1425 {
2d4a771a
RS
1426 search_regs.start = (regoff_t *) xmalloc (2 * sizeof (regoff_t));
1427 search_regs.end = (regoff_t *) xmalloc (2 * sizeof (regoff_t));
487282dc 1428 search_regs.num_regs = 2;
ca325161
RS
1429 }
1430
fa8ed3e0
RS
1431 search_regs.start[0] = BYTE_TO_CHAR (beg_byte);
1432 search_regs.end[0] = BYTE_TO_CHAR (beg_byte + nbytes);
a3668d92 1433 XSETBUFFER (last_thing_searched, current_buffer);
ca325161 1434}
ca1d1d23
JB
1435\f
1436/* Given a string of words separated by word delimiters,
1437 compute a regexp that matches those exact words
1438 separated by arbitrary punctuation. */
1439
1440static Lisp_Object
1441wordify (string)
1442 Lisp_Object string;
1443{
1444 register unsigned char *p, *o;
0c8533c6 1445 register int i, i_byte, len, punct_count = 0, word_count = 0;
ca1d1d23 1446 Lisp_Object val;
0c8533c6
RS
1447 int prev_c = 0;
1448 int adjust;
ca1d1d23
JB
1449
1450 CHECK_STRING (string, 0);
1451 p = XSTRING (string)->data;
1452 len = XSTRING (string)->size;
1453
0c8533c6
RS
1454 for (i = 0, i_byte = 0; i < len; )
1455 {
1456 int c;
1457
1458 if (STRING_MULTIBYTE (string))
1459 FETCH_STRING_CHAR_ADVANCE (c, string, i, i_byte);
1460 else
1461 c = XSTRING (string)->data[i++];
1462
1463 if (SYNTAX (c) != Sword)
1464 {
1465 punct_count++;
1466 if (i > 0 && SYNTAX (prev_c) == Sword)
1467 word_count++;
1468 }
ca1d1d23 1469
0c8533c6
RS
1470 prev_c = c;
1471 }
1472
1473 if (SYNTAX (prev_c) == Sword)
1474 word_count++;
1475 if (!word_count)
1476 return build_string ("");
1477
1478 adjust = - punct_count + 5 * (word_count - 1) + 4;
1479 val = make_uninit_multibyte_string (len + adjust,
1480 XSTRING (string)->size_byte + adjust);
ca1d1d23
JB
1481
1482 o = XSTRING (val)->data;
1483 *o++ = '\\';
1484 *o++ = 'b';
1485
0c8533c6 1486 for (i = 0; i < XSTRING (val)->size_byte; i++)
ca1d1d23
JB
1487 if (SYNTAX (p[i]) == Sword)
1488 *o++ = p[i];
1489 else if (i > 0 && SYNTAX (p[i-1]) == Sword && --word_count)
1490 {
1491 *o++ = '\\';
1492 *o++ = 'W';
1493 *o++ = '\\';
1494 *o++ = 'W';
1495 *o++ = '*';
1496 }
1497
1498 *o++ = '\\';
1499 *o++ = 'b';
1500
1501 return val;
1502}
1503\f
1504DEFUN ("search-backward", Fsearch_backward, Ssearch_backward, 1, 4,
6af43974 1505 "MSearch backward: ",
ca1d1d23
JB
1506 "Search backward from point for STRING.\n\
1507Set point to the beginning of the occurrence found, and return point.\n\
1508An optional second argument bounds the search; it is a buffer position.\n\
1509The match found must not extend before that position.\n\
1510Optional third argument, if t, means if fail just return nil (no error).\n\
1511 If not nil and not t, position at limit of search and return nil.\n\
1512Optional fourth argument is repeat count--search for successive occurrences.\n\
1513See also the functions `match-beginning', `match-end' and `replace-match'.")
1514 (string, bound, noerror, count)
1515 Lisp_Object string, bound, noerror, count;
1516{
b819a390 1517 return search_command (string, bound, noerror, count, -1, 0, 0);
ca1d1d23
JB
1518}
1519
6af43974 1520DEFUN ("search-forward", Fsearch_forward, Ssearch_forward, 1, 4, "MSearch: ",
ca1d1d23
JB
1521 "Search forward from point for STRING.\n\
1522Set point to the end of the occurrence found, and return point.\n\
1523An optional second argument bounds the search; it is a buffer position.\n\
1524The match found must not extend after that position. nil is equivalent\n\
1525 to (point-max).\n\
1526Optional third argument, if t, means if fail just return nil (no error).\n\
1527 If not nil and not t, move to limit of search and return nil.\n\
1528Optional fourth argument is repeat count--search for successive occurrences.\n\
1529See also the functions `match-beginning', `match-end' and `replace-match'.")
1530 (string, bound, noerror, count)
1531 Lisp_Object string, bound, noerror, count;
1532{
b819a390 1533 return search_command (string, bound, noerror, count, 1, 0, 0);
ca1d1d23
JB
1534}
1535
1536DEFUN ("word-search-backward", Fword_search_backward, Sword_search_backward, 1, 4,
1537 "sWord search backward: ",
1538 "Search backward from point for STRING, ignoring differences in punctuation.\n\
1539Set point to the beginning of the occurrence found, and return point.\n\
1540An optional second argument bounds the search; it is a buffer position.\n\
1541The match found must not extend before that position.\n\
1542Optional third argument, if t, means if fail just return nil (no error).\n\
1543 If not nil and not t, move to limit of search and return nil.\n\
1544Optional fourth argument is repeat count--search for successive occurrences.")
1545 (string, bound, noerror, count)
1546 Lisp_Object string, bound, noerror, count;
1547{
b819a390 1548 return search_command (wordify (string), bound, noerror, count, -1, 1, 0);
ca1d1d23
JB
1549}
1550
1551DEFUN ("word-search-forward", Fword_search_forward, Sword_search_forward, 1, 4,
1552 "sWord search: ",
1553 "Search forward from point for STRING, ignoring differences in punctuation.\n\
1554Set point to the end of the occurrence found, and return point.\n\
1555An optional second argument bounds the search; it is a buffer position.\n\
1556The match found must not extend after that position.\n\
1557Optional third argument, if t, means if fail just return nil (no error).\n\
1558 If not nil and not t, move to limit of search and return nil.\n\
1559Optional fourth argument is repeat count--search for successive occurrences.")
1560 (string, bound, noerror, count)
1561 Lisp_Object string, bound, noerror, count;
1562{
b819a390 1563 return search_command (wordify (string), bound, noerror, count, 1, 1, 0);
ca1d1d23
JB
1564}
1565
1566DEFUN ("re-search-backward", Fre_search_backward, Sre_search_backward, 1, 4,
1567 "sRE search backward: ",
1568 "Search backward from point for match for regular expression REGEXP.\n\
1569Set point to the beginning of the match, and return point.\n\
1570The match found is the one starting last in the buffer\n\
19c0a730 1571and yet ending before the origin of the search.\n\
ca1d1d23
JB
1572An optional second argument bounds the search; it is a buffer position.\n\
1573The match found must start at or after that position.\n\
1574Optional third argument, if t, means if fail just return nil (no error).\n\
1575 If not nil and not t, move to limit of search and return nil.\n\
1576Optional fourth argument is repeat count--search for successive occurrences.\n\
1577See also the functions `match-beginning', `match-end' and `replace-match'.")
19c0a730
KH
1578 (regexp, bound, noerror, count)
1579 Lisp_Object regexp, bound, noerror, count;
ca1d1d23 1580{
b819a390 1581 return search_command (regexp, bound, noerror, count, -1, 1, 0);
ca1d1d23
JB
1582}
1583
1584DEFUN ("re-search-forward", Fre_search_forward, Sre_search_forward, 1, 4,
1585 "sRE search: ",
1586 "Search forward from point for regular expression REGEXP.\n\
1587Set point to the end of the occurrence found, and return point.\n\
1588An optional second argument bounds the search; it is a buffer position.\n\
1589The match found must not extend after that position.\n\
1590Optional third argument, if t, means if fail just return nil (no error).\n\
1591 If not nil and not t, move to limit of search and return nil.\n\
1592Optional fourth argument is repeat count--search for successive occurrences.\n\
1593See also the functions `match-beginning', `match-end' and `replace-match'.")
19c0a730
KH
1594 (regexp, bound, noerror, count)
1595 Lisp_Object regexp, bound, noerror, count;
ca1d1d23 1596{
b819a390
RS
1597 return search_command (regexp, bound, noerror, count, 1, 1, 0);
1598}
1599
1600DEFUN ("posix-search-backward", Fposix_search_backward, Sposix_search_backward, 1, 4,
1601 "sPosix search backward: ",
1602 "Search backward from point for match for regular expression REGEXP.\n\
1603Find the longest match in accord with Posix regular expression rules.\n\
1604Set point to the beginning of the match, and return point.\n\
1605The match found is the one starting last in the buffer\n\
1606and yet ending before the origin of the search.\n\
1607An optional second argument bounds the search; it is a buffer position.\n\
1608The match found must start at or after that position.\n\
1609Optional third argument, if t, means if fail just return nil (no error).\n\
1610 If not nil and not t, move to limit of search and return nil.\n\
1611Optional fourth argument is repeat count--search for successive occurrences.\n\
1612See also the functions `match-beginning', `match-end' and `replace-match'.")
1613 (regexp, bound, noerror, count)
1614 Lisp_Object regexp, bound, noerror, count;
1615{
1616 return search_command (regexp, bound, noerror, count, -1, 1, 1);
1617}
1618
1619DEFUN ("posix-search-forward", Fposix_search_forward, Sposix_search_forward, 1, 4,
1620 "sPosix search: ",
1621 "Search forward from point for regular expression REGEXP.\n\
1622Find the longest match in accord with Posix regular expression rules.\n\
1623Set point to the end of the occurrence found, and return point.\n\
1624An optional second argument bounds the search; it is a buffer position.\n\
1625The match found must not extend after that position.\n\
1626Optional third argument, if t, means if fail just return nil (no error).\n\
1627 If not nil and not t, move to limit of search and return nil.\n\
1628Optional fourth argument is repeat count--search for successive occurrences.\n\
1629See also the functions `match-beginning', `match-end' and `replace-match'.")
1630 (regexp, bound, noerror, count)
1631 Lisp_Object regexp, bound, noerror, count;
1632{
1633 return search_command (regexp, bound, noerror, count, 1, 1, 1);
ca1d1d23
JB
1634}
1635\f
d7a5ad5f 1636DEFUN ("replace-match", Freplace_match, Sreplace_match, 1, 5, 0,
ca1d1d23
JB
1637 "Replace text matched by last search with NEWTEXT.\n\
1638If second arg FIXEDCASE is non-nil, do not alter case of replacement text.\n\
5b9cf4b2
RS
1639Otherwise maybe capitalize the whole text, or maybe just word initials,\n\
1640based on the replaced text.\n\
1641If the replaced text has only capital letters\n\
1642and has at least one multiletter word, convert NEWTEXT to all caps.\n\
1643If the replaced text has at least one word starting with a capital letter,\n\
1644then capitalize each word in NEWTEXT.\n\n\
ca1d1d23
JB
1645If third arg LITERAL is non-nil, insert NEWTEXT literally.\n\
1646Otherwise treat `\\' as special:\n\
1647 `\\&' in NEWTEXT means substitute original matched text.\n\
1648 `\\N' means substitute what matched the Nth `\\(...\\)'.\n\
1649 If Nth parens didn't match, substitute nothing.\n\
1650 `\\\\' means insert one `\\'.\n\
1113d9db 1651FIXEDCASE and LITERAL are optional arguments.\n\
080c45fd
RS
1652Leaves point at end of replacement text.\n\
1653\n\
1654The optional fourth argument STRING can be a string to modify.\n\
1655In that case, this function creates and returns a new string\n\
d7a5ad5f
RS
1656which is made by replacing the part of STRING that was matched.\n\
1657\n\
1658The optional fifth argument SUBEXP specifies a subexpression of the match.\n\
1659It says to replace just that subexpression instead of the whole match.\n\
1660This is useful only after a regular expression search or match\n\
1661since only regular expressions have distinguished subexpressions.")
1662 (newtext, fixedcase, literal, string, subexp)
1663 Lisp_Object newtext, fixedcase, literal, string, subexp;
ca1d1d23
JB
1664{
1665 enum { nochange, all_caps, cap_initial } case_action;
1666 register int pos, last;
1667 int some_multiletter_word;
97832bd0 1668 int some_lowercase;
73dc8771 1669 int some_uppercase;
208767c3 1670 int some_nonuppercase_initial;
ca1d1d23
JB
1671 register int c, prevc;
1672 int inslen;
d7a5ad5f 1673 int sub;
3e18eecf 1674 int opoint, newpoint;
ca1d1d23 1675
16fdc568 1676 CHECK_STRING (newtext, 0);
ca1d1d23 1677
080c45fd
RS
1678 if (! NILP (string))
1679 CHECK_STRING (string, 4);
1680
ca1d1d23
JB
1681 case_action = nochange; /* We tried an initialization */
1682 /* but some C compilers blew it */
4746118a
JB
1683
1684 if (search_regs.num_regs <= 0)
1685 error ("replace-match called before any match found");
1686
d7a5ad5f
RS
1687 if (NILP (subexp))
1688 sub = 0;
1689 else
1690 {
1691 CHECK_NUMBER (subexp, 3);
1692 sub = XINT (subexp);
1693 if (sub < 0 || sub >= search_regs.num_regs)
1694 args_out_of_range (subexp, make_number (search_regs.num_regs));
1695 }
1696
080c45fd
RS
1697 if (NILP (string))
1698 {
d7a5ad5f
RS
1699 if (search_regs.start[sub] < BEGV
1700 || search_regs.start[sub] > search_regs.end[sub]
1701 || search_regs.end[sub] > ZV)
1702 args_out_of_range (make_number (search_regs.start[sub]),
1703 make_number (search_regs.end[sub]));
080c45fd
RS
1704 }
1705 else
1706 {
d7a5ad5f
RS
1707 if (search_regs.start[sub] < 0
1708 || search_regs.start[sub] > search_regs.end[sub]
1709 || search_regs.end[sub] > XSTRING (string)->size)
1710 args_out_of_range (make_number (search_regs.start[sub]),
1711 make_number (search_regs.end[sub]));
080c45fd 1712 }
ca1d1d23
JB
1713
1714 if (NILP (fixedcase))
1715 {
fa8ed3e0 1716 int beg;
ca1d1d23
JB
1717 /* Decide how to casify by examining the matched text. */
1718
fa8ed3e0
RS
1719 if (NILP (string))
1720 last = CHAR_TO_BYTE (search_regs.end[sub]);
1721 else
1722 last = search_regs.end[sub];
1723
1724 if (NILP (string))
1725 beg = CHAR_TO_BYTE (search_regs.start[sub]);
1726 else
1727 beg = search_regs.start[sub];
1728
ca1d1d23
JB
1729 prevc = '\n';
1730 case_action = all_caps;
1731
1732 /* some_multiletter_word is set nonzero if any original word
1733 is more than one letter long. */
1734 some_multiletter_word = 0;
97832bd0 1735 some_lowercase = 0;
208767c3 1736 some_nonuppercase_initial = 0;
73dc8771 1737 some_uppercase = 0;
ca1d1d23 1738
fa8ed3e0 1739 for (pos = beg; pos < last; pos++)
ca1d1d23 1740 {
080c45fd 1741 if (NILP (string))
5679531d 1742 c = FETCH_BYTE (pos);
080c45fd
RS
1743 else
1744 c = XSTRING (string)->data[pos];
1745
ca1d1d23
JB
1746 if (LOWERCASEP (c))
1747 {
1748 /* Cannot be all caps if any original char is lower case */
1749
97832bd0 1750 some_lowercase = 1;
ca1d1d23 1751 if (SYNTAX (prevc) != Sword)
208767c3 1752 some_nonuppercase_initial = 1;
ca1d1d23
JB
1753 else
1754 some_multiletter_word = 1;
1755 }
1756 else if (!NOCASEP (c))
1757 {
73dc8771 1758 some_uppercase = 1;
97832bd0 1759 if (SYNTAX (prevc) != Sword)
c4d460ce 1760 ;
97832bd0 1761 else
ca1d1d23
JB
1762 some_multiletter_word = 1;
1763 }
208767c3
RS
1764 else
1765 {
1766 /* If the initial is a caseless word constituent,
1767 treat that like a lowercase initial. */
1768 if (SYNTAX (prevc) != Sword)
1769 some_nonuppercase_initial = 1;
1770 }
ca1d1d23
JB
1771
1772 prevc = c;
1773 }
1774
97832bd0
RS
1775 /* Convert to all caps if the old text is all caps
1776 and has at least one multiletter word. */
1777 if (! some_lowercase && some_multiletter_word)
1778 case_action = all_caps;
c4d460ce 1779 /* Capitalize each word, if the old text has all capitalized words. */
208767c3 1780 else if (!some_nonuppercase_initial && some_multiletter_word)
ca1d1d23 1781 case_action = cap_initial;
208767c3 1782 else if (!some_nonuppercase_initial && some_uppercase)
73dc8771
KH
1783 /* Should x -> yz, operating on X, give Yz or YZ?
1784 We'll assume the latter. */
1785 case_action = all_caps;
97832bd0
RS
1786 else
1787 case_action = nochange;
ca1d1d23
JB
1788 }
1789
080c45fd
RS
1790 /* Do replacement in a string. */
1791 if (!NILP (string))
1792 {
1793 Lisp_Object before, after;
1794
1795 before = Fsubstring (string, make_number (0),
d7a5ad5f
RS
1796 make_number (search_regs.start[sub]));
1797 after = Fsubstring (string, make_number (search_regs.end[sub]), Qnil);
080c45fd 1798
636a5e28
RS
1799 /* Substitute parts of the match into NEWTEXT
1800 if desired. */
080c45fd
RS
1801 if (NILP (literal))
1802 {
1803 int lastpos = -1;
0c8533c6 1804 int lastpos_byte = -1;
080c45fd
RS
1805 /* We build up the substituted string in ACCUM. */
1806 Lisp_Object accum;
1807 Lisp_Object middle;
0c8533c6 1808 int pos_byte;
080c45fd
RS
1809
1810 accum = Qnil;
1811
0c8533c6 1812 for (pos_byte = 0, pos = 0; pos_byte < XSTRING (newtext)->size_byte;)
080c45fd
RS
1813 {
1814 int substart = -1;
1815 int subend;
1e79ec24 1816 int delbackslash = 0;
080c45fd 1817
0c8533c6
RS
1818 FETCH_STRING_CHAR_ADVANCE (c, newtext, pos, pos_byte);
1819
080c45fd
RS
1820 if (c == '\\')
1821 {
0c8533c6 1822 FETCH_STRING_CHAR_ADVANCE (c, newtext, pos, pos_byte);
080c45fd
RS
1823 if (c == '&')
1824 {
d7a5ad5f
RS
1825 substart = search_regs.start[sub];
1826 subend = search_regs.end[sub];
080c45fd
RS
1827 }
1828 else if (c >= '1' && c <= '9' && c <= search_regs.num_regs + '0')
1829 {
ad10348f 1830 if (search_regs.start[c - '0'] >= 0)
080c45fd
RS
1831 {
1832 substart = search_regs.start[c - '0'];
1833 subend = search_regs.end[c - '0'];
1834 }
1835 }
1e79ec24
KH
1836 else if (c == '\\')
1837 delbackslash = 1;
636a5e28
RS
1838 else
1839 error ("Invalid use of `\\' in replacement text");
080c45fd
RS
1840 }
1841 if (substart >= 0)
1842 {
1843 if (pos - 1 != lastpos + 1)
0c8533c6
RS
1844 middle = substring_both (newtext, lastpos + 1,
1845 lastpos_byte + 1,
1846 pos - 1, pos_byte - 1);
080c45fd
RS
1847 else
1848 middle = Qnil;
1849 accum = concat3 (accum, middle,
0c8533c6
RS
1850 Fsubstring (string,
1851 make_number (substart),
080c45fd
RS
1852 make_number (subend)));
1853 lastpos = pos;
0c8533c6 1854 lastpos_byte = pos_byte;
080c45fd 1855 }
1e79ec24
KH
1856 else if (delbackslash)
1857 {
0c8533c6
RS
1858 middle = substring_both (newtext, lastpos + 1,
1859 lastpos_byte + 1,
1860 pos, pos_byte);
1861
1e79ec24
KH
1862 accum = concat2 (accum, middle);
1863 lastpos = pos;
0c8533c6 1864 lastpos_byte = pos_byte;
1e79ec24 1865 }
080c45fd
RS
1866 }
1867
1868 if (pos != lastpos + 1)
0c8533c6
RS
1869 middle = substring_both (newtext, lastpos + 1,
1870 lastpos_byte + 1,
1871 pos, pos_byte);
080c45fd
RS
1872 else
1873 middle = Qnil;
1874
1875 newtext = concat2 (accum, middle);
1876 }
1877
636a5e28 1878 /* Do case substitution in NEWTEXT if desired. */
080c45fd
RS
1879 if (case_action == all_caps)
1880 newtext = Fupcase (newtext);
1881 else if (case_action == cap_initial)
2b2eead9 1882 newtext = Fupcase_initials (newtext);
080c45fd
RS
1883
1884 return concat3 (before, newtext, after);
1885 }
1886
b0eba991
RS
1887 /* Record point, the move (quietly) to the start of the match. */
1888 if (PT > search_regs.start[sub])
1889 opoint = PT - ZV;
1890 else
1891 opoint = PT;
1892
fa8ed3e0 1893 TEMP_SET_PT (search_regs.start[sub]);
b0eba991 1894
9a76659d
JB
1895 /* We insert the replacement text before the old text, and then
1896 delete the original text. This means that markers at the
1897 beginning or end of the original will float to the corresponding
1898 position in the replacement. */
ca1d1d23 1899 if (!NILP (literal))
16fdc568 1900 Finsert_and_inherit (1, &newtext);
ca1d1d23
JB
1901 else
1902 {
1903 struct gcpro gcpro1;
16fdc568 1904 GCPRO1 (newtext);
ca1d1d23 1905
16fdc568 1906 for (pos = 0; pos < XSTRING (newtext)->size; pos++)
ca1d1d23 1907 {
6ec8bbd2 1908 int offset = PT - search_regs.start[sub];
9a76659d 1909
16fdc568 1910 c = XSTRING (newtext)->data[pos];
ca1d1d23
JB
1911 if (c == '\\')
1912 {
16fdc568 1913 c = XSTRING (newtext)->data[++pos];
ca1d1d23 1914 if (c == '&')
9a76659d
JB
1915 Finsert_buffer_substring
1916 (Fcurrent_buffer (),
d7a5ad5f
RS
1917 make_number (search_regs.start[sub] + offset),
1918 make_number (search_regs.end[sub] + offset));
78445046 1919 else if (c >= '1' && c <= '9' && c <= search_regs.num_regs + '0')
ca1d1d23
JB
1920 {
1921 if (search_regs.start[c - '0'] >= 1)
9a76659d
JB
1922 Finsert_buffer_substring
1923 (Fcurrent_buffer (),
1924 make_number (search_regs.start[c - '0'] + offset),
1925 make_number (search_regs.end[c - '0'] + offset));
ca1d1d23 1926 }
636a5e28 1927 else if (c == '\\')
ca1d1d23 1928 insert_char (c);
636a5e28
RS
1929 else
1930 error ("Invalid use of `\\' in replacement text");
ca1d1d23
JB
1931 }
1932 else
1933 insert_char (c);
1934 }
1935 UNGCPRO;
1936 }
1937
6ec8bbd2 1938 inslen = PT - (search_regs.start[sub]);
d7a5ad5f 1939 del_range (search_regs.start[sub] + inslen, search_regs.end[sub] + inslen);
ca1d1d23
JB
1940
1941 if (case_action == all_caps)
6ec8bbd2 1942 Fupcase_region (make_number (PT - inslen), make_number (PT));
ca1d1d23 1943 else if (case_action == cap_initial)
6ec8bbd2 1944 Fupcase_initials_region (make_number (PT - inslen), make_number (PT));
b0eba991 1945
3e18eecf
RS
1946 newpoint = PT;
1947
b0eba991 1948 /* Put point back where it was in the text. */
8d808a65 1949 if (opoint <= 0)
fa8ed3e0 1950 TEMP_SET_PT (opoint + ZV);
b0eba991 1951 else
fa8ed3e0 1952 TEMP_SET_PT (opoint);
b0eba991
RS
1953
1954 /* Now move point "officially" to the start of the inserted replacement. */
3e18eecf 1955 move_if_not_intangible (newpoint);
b0eba991 1956
ca1d1d23
JB
1957 return Qnil;
1958}
1959\f
1960static Lisp_Object
1961match_limit (num, beginningp)
1962 Lisp_Object num;
1963 int beginningp;
1964{
1965 register int n;
1966
1967 CHECK_NUMBER (num, 0);
1968 n = XINT (num);
4746118a
JB
1969 if (n < 0 || n >= search_regs.num_regs)
1970 args_out_of_range (num, make_number (search_regs.num_regs));
1971 if (search_regs.num_regs <= 0
1972 || search_regs.start[n] < 0)
ca1d1d23
JB
1973 return Qnil;
1974 return (make_number ((beginningp) ? search_regs.start[n]
1975 : search_regs.end[n]));
1976}
1977
1978DEFUN ("match-beginning", Fmatch_beginning, Smatch_beginning, 1, 1, 0,
1979 "Return position of start of text matched by last search.\n\
5806161b
EN
1980SUBEXP, a number, specifies which parenthesized expression in the last\n\
1981 regexp.\n\
1982Value is nil if SUBEXPth pair didn't match, or there were less than\n\
1983 SUBEXP pairs.\n\
ca1d1d23 1984Zero means the entire text matched by the whole regexp or whole string.")
5806161b
EN
1985 (subexp)
1986 Lisp_Object subexp;
ca1d1d23 1987{
5806161b 1988 return match_limit (subexp, 1);
ca1d1d23
JB
1989}
1990
1991DEFUN ("match-end", Fmatch_end, Smatch_end, 1, 1, 0,
1992 "Return position of end of text matched by last search.\n\
5806161b
EN
1993SUBEXP, a number, specifies which parenthesized expression in the last\n\
1994 regexp.\n\
1995Value is nil if SUBEXPth pair didn't match, or there were less than\n\
1996 SUBEXP pairs.\n\
ca1d1d23 1997Zero means the entire text matched by the whole regexp or whole string.")
5806161b
EN
1998 (subexp)
1999 Lisp_Object subexp;
ca1d1d23 2000{
5806161b 2001 return match_limit (subexp, 0);
ca1d1d23
JB
2002}
2003
56256c2a 2004DEFUN ("match-data", Fmatch_data, Smatch_data, 0, 2, 0,
ca1d1d23
JB
2005 "Return a list containing all info on what the last search matched.\n\
2006Element 2N is `(match-beginning N)'; element 2N + 1 is `(match-end N)'.\n\
2007All the elements are markers or nil (nil if the Nth pair didn't match)\n\
2008if the last match was on a buffer; integers or nil if a string was matched.\n\
56256c2a
RS
2009Use `store-match-data' to reinstate the data in this list.\n\
2010\n\
2011If INTEGERS (the optional first argument) is non-nil, always use integers\n\
8ca821e9 2012\(rather than markers) to represent buffer positions.\n\
56256c2a
RS
2013If REUSE is a list, reuse it as part of the value. If REUSE is long enough\n\
2014to hold all the values, and if INTEGERS is non-nil, no consing is done.")
2015 (integers, reuse)
2016 Lisp_Object integers, reuse;
ca1d1d23 2017{
56256c2a 2018 Lisp_Object tail, prev;
4746118a 2019 Lisp_Object *data;
ca1d1d23
JB
2020 int i, len;
2021
daa37602 2022 if (NILP (last_thing_searched))
c36bcf1b 2023 return Qnil;
daa37602 2024
4746118a
JB
2025 data = (Lisp_Object *) alloca ((2 * search_regs.num_regs)
2026 * sizeof (Lisp_Object));
2027
ca1d1d23 2028 len = -1;
4746118a 2029 for (i = 0; i < search_regs.num_regs; i++)
ca1d1d23
JB
2030 {
2031 int start = search_regs.start[i];
2032 if (start >= 0)
2033 {
56256c2a
RS
2034 if (EQ (last_thing_searched, Qt)
2035 || ! NILP (integers))
ca1d1d23 2036 {
c235cce7
KH
2037 XSETFASTINT (data[2 * i], start);
2038 XSETFASTINT (data[2 * i + 1], search_regs.end[i]);
ca1d1d23 2039 }
0ed62dc7 2040 else if (BUFFERP (last_thing_searched))
ca1d1d23
JB
2041 {
2042 data[2 * i] = Fmake_marker ();
daa37602
JB
2043 Fset_marker (data[2 * i],
2044 make_number (start),
2045 last_thing_searched);
ca1d1d23
JB
2046 data[2 * i + 1] = Fmake_marker ();
2047 Fset_marker (data[2 * i + 1],
daa37602
JB
2048 make_number (search_regs.end[i]),
2049 last_thing_searched);
ca1d1d23 2050 }
daa37602
JB
2051 else
2052 /* last_thing_searched must always be Qt, a buffer, or Qnil. */
2053 abort ();
2054
ca1d1d23
JB
2055 len = i;
2056 }
2057 else
2058 data[2 * i] = data [2 * i + 1] = Qnil;
2059 }
56256c2a
RS
2060
2061 /* If REUSE is not usable, cons up the values and return them. */
2062 if (! CONSP (reuse))
2063 return Flist (2 * len + 2, data);
2064
2065 /* If REUSE is a list, store as many value elements as will fit
2066 into the elements of REUSE. */
2067 for (i = 0, tail = reuse; CONSP (tail);
2068 i++, tail = XCONS (tail)->cdr)
2069 {
2070 if (i < 2 * len + 2)
2071 XCONS (tail)->car = data[i];
2072 else
2073 XCONS (tail)->car = Qnil;
2074 prev = tail;
2075 }
2076
2077 /* If we couldn't fit all value elements into REUSE,
2078 cons up the rest of them and add them to the end of REUSE. */
2079 if (i < 2 * len + 2)
2080 XCONS (prev)->cdr = Flist (2 * len + 2 - i, data + i);
2081
2082 return reuse;
ca1d1d23
JB
2083}
2084
2085
2086DEFUN ("store-match-data", Fstore_match_data, Sstore_match_data, 1, 1, 0,
2087 "Set internal data on last search match from elements of LIST.\n\
2088LIST should have been created by calling `match-data' previously.")
2089 (list)
2090 register Lisp_Object list;
2091{
2092 register int i;
2093 register Lisp_Object marker;
2094
7074fde6
FP
2095 if (running_asynch_code)
2096 save_search_regs ();
2097
ca1d1d23 2098 if (!CONSP (list) && !NILP (list))
b37902c8 2099 list = wrong_type_argument (Qconsp, list);
ca1d1d23 2100
daa37602
JB
2101 /* Unless we find a marker with a buffer in LIST, assume that this
2102 match data came from a string. */
2103 last_thing_searched = Qt;
2104
4746118a
JB
2105 /* Allocate registers if they don't already exist. */
2106 {
d084e942 2107 int length = XFASTINT (Flength (list)) / 2;
4746118a
JB
2108
2109 if (length > search_regs.num_regs)
2110 {
1113d9db
JB
2111 if (search_regs.num_regs == 0)
2112 {
2113 search_regs.start
2114 = (regoff_t *) xmalloc (length * sizeof (regoff_t));
2115 search_regs.end
2116 = (regoff_t *) xmalloc (length * sizeof (regoff_t));
2117 }
4746118a 2118 else
1113d9db
JB
2119 {
2120 search_regs.start
2121 = (regoff_t *) xrealloc (search_regs.start,
2122 length * sizeof (regoff_t));
2123 search_regs.end
2124 = (regoff_t *) xrealloc (search_regs.end,
2125 length * sizeof (regoff_t));
2126 }
4746118a 2127
487282dc 2128 search_regs.num_regs = length;
4746118a
JB
2129 }
2130 }
2131
2132 for (i = 0; i < search_regs.num_regs; i++)
ca1d1d23
JB
2133 {
2134 marker = Fcar (list);
2135 if (NILP (marker))
2136 {
2137 search_regs.start[i] = -1;
2138 list = Fcdr (list);
2139 }
2140 else
2141 {
0ed62dc7 2142 if (MARKERP (marker))
daa37602
JB
2143 {
2144 if (XMARKER (marker)->buffer == 0)
c235cce7 2145 XSETFASTINT (marker, 0);
daa37602 2146 else
a3668d92 2147 XSETBUFFER (last_thing_searched, XMARKER (marker)->buffer);
daa37602 2148 }
ca1d1d23
JB
2149
2150 CHECK_NUMBER_COERCE_MARKER (marker, 0);
2151 search_regs.start[i] = XINT (marker);
2152 list = Fcdr (list);
2153
2154 marker = Fcar (list);
0ed62dc7 2155 if (MARKERP (marker) && XMARKER (marker)->buffer == 0)
c235cce7 2156 XSETFASTINT (marker, 0);
ca1d1d23
JB
2157
2158 CHECK_NUMBER_COERCE_MARKER (marker, 0);
2159 search_regs.end[i] = XINT (marker);
2160 }
2161 list = Fcdr (list);
2162 }
2163
2164 return Qnil;
2165}
2166
7074fde6
FP
2167/* If non-zero the match data have been saved in saved_search_regs
2168 during the execution of a sentinel or filter. */
75ebf74b 2169static int search_regs_saved;
7074fde6
FP
2170static struct re_registers saved_search_regs;
2171
2172/* Called from Flooking_at, Fstring_match, search_buffer, Fstore_match_data
2173 if asynchronous code (filter or sentinel) is running. */
2174static void
2175save_search_regs ()
2176{
2177 if (!search_regs_saved)
2178 {
2179 saved_search_regs.num_regs = search_regs.num_regs;
2180 saved_search_regs.start = search_regs.start;
2181 saved_search_regs.end = search_regs.end;
2182 search_regs.num_regs = 0;
2d4a771a
RS
2183 search_regs.start = 0;
2184 search_regs.end = 0;
7074fde6
FP
2185
2186 search_regs_saved = 1;
2187 }
2188}
2189
2190/* Called upon exit from filters and sentinels. */
2191void
2192restore_match_data ()
2193{
2194 if (search_regs_saved)
2195 {
2196 if (search_regs.num_regs > 0)
2197 {
2198 xfree (search_regs.start);
2199 xfree (search_regs.end);
2200 }
2201 search_regs.num_regs = saved_search_regs.num_regs;
2202 search_regs.start = saved_search_regs.start;
2203 search_regs.end = saved_search_regs.end;
2204
2205 search_regs_saved = 0;
2206 }
2207}
2208
ca1d1d23
JB
2209/* Quote a string to inactivate reg-expr chars */
2210
2211DEFUN ("regexp-quote", Fregexp_quote, Sregexp_quote, 1, 1, 0,
2212 "Return a regexp string which matches exactly STRING and nothing else.")
5806161b
EN
2213 (string)
2214 Lisp_Object string;
ca1d1d23
JB
2215{
2216 register unsigned char *in, *out, *end;
2217 register unsigned char *temp;
0c8533c6 2218 int backslashes_added = 0;
ca1d1d23 2219
5806161b 2220 CHECK_STRING (string, 0);
ca1d1d23 2221
0c8533c6 2222 temp = (unsigned char *) alloca (XSTRING (string)->size_byte * 2);
ca1d1d23
JB
2223
2224 /* Now copy the data into the new string, inserting escapes. */
2225
5806161b 2226 in = XSTRING (string)->data;
0c8533c6 2227 end = in + XSTRING (string)->size_byte;
ca1d1d23
JB
2228 out = temp;
2229
2230 for (; in != end; in++)
2231 {
2232 if (*in == '[' || *in == ']'
2233 || *in == '*' || *in == '.' || *in == '\\'
2234 || *in == '?' || *in == '+'
2235 || *in == '^' || *in == '$')
0c8533c6 2236 *out++ = '\\', backslashes_added++;
ca1d1d23
JB
2237 *out++ = *in;
2238 }
2239
0c8533c6
RS
2240 return make_multibyte_string (temp,
2241 XSTRING (string)->size + backslashes_added,
2242 out - temp);
ca1d1d23
JB
2243}
2244\f
2245syms_of_search ()
2246{
2247 register int i;
2248
487282dc
KH
2249 for (i = 0; i < REGEXP_CACHE_SIZE; ++i)
2250 {
2251 searchbufs[i].buf.allocated = 100;
2252 searchbufs[i].buf.buffer = (unsigned char *) malloc (100);
2253 searchbufs[i].buf.fastmap = searchbufs[i].fastmap;
2254 searchbufs[i].regexp = Qnil;
2255 staticpro (&searchbufs[i].regexp);
2256 searchbufs[i].next = (i == REGEXP_CACHE_SIZE-1 ? 0 : &searchbufs[i+1]);
2257 }
2258 searchbuf_head = &searchbufs[0];
ca1d1d23
JB
2259
2260 Qsearch_failed = intern ("search-failed");
2261 staticpro (&Qsearch_failed);
2262 Qinvalid_regexp = intern ("invalid-regexp");
2263 staticpro (&Qinvalid_regexp);
2264
2265 Fput (Qsearch_failed, Qerror_conditions,
2266 Fcons (Qsearch_failed, Fcons (Qerror, Qnil)));
2267 Fput (Qsearch_failed, Qerror_message,
2268 build_string ("Search failed"));
2269
2270 Fput (Qinvalid_regexp, Qerror_conditions,
2271 Fcons (Qinvalid_regexp, Fcons (Qerror, Qnil)));
2272 Fput (Qinvalid_regexp, Qerror_message,
2273 build_string ("Invalid regexp"));
2274
daa37602
JB
2275 last_thing_searched = Qnil;
2276 staticpro (&last_thing_searched);
2277
ca1d1d23 2278 defsubr (&Slooking_at);
b819a390
RS
2279 defsubr (&Sposix_looking_at);
2280 defsubr (&Sstring_match);
2281 defsubr (&Sposix_string_match);
ca1d1d23
JB
2282 defsubr (&Ssearch_forward);
2283 defsubr (&Ssearch_backward);
2284 defsubr (&Sword_search_forward);
2285 defsubr (&Sword_search_backward);
2286 defsubr (&Sre_search_forward);
2287 defsubr (&Sre_search_backward);
b819a390
RS
2288 defsubr (&Sposix_search_forward);
2289 defsubr (&Sposix_search_backward);
ca1d1d23
JB
2290 defsubr (&Sreplace_match);
2291 defsubr (&Smatch_beginning);
2292 defsubr (&Smatch_end);
2293 defsubr (&Smatch_data);
2294 defsubr (&Sstore_match_data);
2295 defsubr (&Sregexp_quote);
2296}