*** empty log message ***
[bpt/emacs.git] / src / search.c
CommitLineData
ca1d1d23 1/* String search routines for GNU Emacs.
68c45bf0 2 Copyright (C) 1985, 86,87,93,94,97,98, 1999 Free Software Foundation, Inc.
ca1d1d23
JB
3
4This file is part of GNU Emacs.
5
6GNU Emacs is free software; you can redistribute it and/or modify
7it under the terms of the GNU General Public License as published by
7c938215 8the Free Software Foundation; either version 2, or (at your option)
ca1d1d23
JB
9any later version.
10
11GNU Emacs is distributed in the hope that it will be useful,
12but WITHOUT ANY WARRANTY; without even the implied warranty of
13MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14GNU General Public License for more details.
15
16You should have received a copy of the GNU General Public License
17along with GNU Emacs; see the file COPYING. If not, write to
3b7ad313
EN
18the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
19Boston, MA 02111-1307, USA. */
ca1d1d23
JB
20
21
18160b98 22#include <config.h>
ca1d1d23
JB
23#include "lisp.h"
24#include "syntax.h"
5679531d 25#include "category.h"
ca1d1d23 26#include "buffer.h"
5679531d 27#include "charset.h"
9169c321 28#include "region-cache.h"
ca1d1d23 29#include "commands.h"
9ac0d9e0 30#include "blockinput.h"
bf1760bb 31#include "intervals.h"
4746118a 32
ca1d1d23
JB
33#include <sys/types.h>
34#include "regex.h"
35
1d288aef 36#define REGEXP_CACHE_SIZE 20
ca1d1d23 37
487282dc
KH
38/* If the regexp is non-nil, then the buffer contains the compiled form
39 of that regexp, suitable for searching. */
1d288aef
RS
40struct regexp_cache
41{
487282dc
KH
42 struct regexp_cache *next;
43 Lisp_Object regexp;
44 struct re_pattern_buffer buf;
45 char fastmap[0400];
b819a390
RS
46 /* Nonzero means regexp was compiled to do full POSIX backtracking. */
47 char posix;
487282dc 48};
ca1d1d23 49
487282dc
KH
50/* The instances of that struct. */
51struct regexp_cache searchbufs[REGEXP_CACHE_SIZE];
ca1d1d23 52
487282dc
KH
53/* The head of the linked list; points to the most recently used buffer. */
54struct regexp_cache *searchbuf_head;
ca1d1d23 55
ca1d1d23 56
4746118a
JB
57/* Every call to re_match, etc., must pass &search_regs as the regs
58 argument unless you can show it is unnecessary (i.e., if re_match
59 is certainly going to be called again before region-around-match
60 can be called).
61
62 Since the registers are now dynamically allocated, we need to make
63 sure not to refer to the Nth register before checking that it has
1113d9db
JB
64 been allocated by checking search_regs.num_regs.
65
66 The regex code keeps track of whether it has allocated the search
487282dc
KH
67 buffer using bits in the re_pattern_buffer. This means that whenever
68 you compile a new pattern, it completely forgets whether it has
1113d9db
JB
69 allocated any registers, and will allocate new registers the next
70 time you call a searching or matching function. Therefore, we need
71 to call re_set_registers after compiling a new pattern or after
72 setting the match registers, so that the regex functions will be
73 able to free or re-allocate it properly. */
ca1d1d23
JB
74static struct re_registers search_regs;
75
daa37602
JB
76/* The buffer in which the last search was performed, or
77 Qt if the last search was done in a string;
78 Qnil if no searching has been done yet. */
79static Lisp_Object last_thing_searched;
ca1d1d23 80
8e6208c5 81/* error condition signaled when regexp compile_pattern fails */
ca1d1d23
JB
82
83Lisp_Object Qinvalid_regexp;
84
ca325161 85static void set_search_regs ();
044f81f1 86static void save_search_regs ();
facdc750
RS
87static int simple_search ();
88static int boyer_moore ();
b819a390
RS
89static int search_buffer ();
90
ca1d1d23
JB
91static void
92matcher_overflow ()
93{
94 error ("Stack overflow in regexp matcher");
95}
96
b819a390
RS
97/* Compile a regexp and signal a Lisp error if anything goes wrong.
98 PATTERN is the pattern to compile.
99 CP is the place to put the result.
facdc750 100 TRANSLATE is a translation table for ignoring case, or nil for none.
b819a390
RS
101 REGP is the structure that says where to store the "register"
102 values that will result from matching this pattern.
103 If it is 0, we should compile the pattern not to record any
104 subexpression bounds.
105 POSIX is nonzero if we want full backtracking (POSIX style)
5679531d
KH
106 for this pattern. 0 means backtrack only enough to get a valid match.
107 MULTIBYTE is nonzero if we want to handle multibyte characters in
108 PATTERN. 0 means all multibyte characters are recognized just as
109 sequences of binary data. */
ca1d1d23 110
487282dc 111static void
5679531d 112compile_pattern_1 (cp, pattern, translate, regp, posix, multibyte)
487282dc 113 struct regexp_cache *cp;
ca1d1d23 114 Lisp_Object pattern;
facdc750 115 Lisp_Object translate;
487282dc 116 struct re_registers *regp;
b819a390 117 int posix;
5679531d 118 int multibyte;
ca1d1d23 119{
7276d3d8 120 unsigned char *raw_pattern;
f8bd51c4 121 int raw_pattern_size;
d451e4db 122 char *val;
b819a390 123 reg_syntax_t old;
ca1d1d23 124
f8bd51c4
KH
125 /* MULTIBYTE says whether the text to be searched is multibyte.
126 We must convert PATTERN to match that, or we will not really
127 find things right. */
128
129 if (multibyte == STRING_MULTIBYTE (pattern))
130 {
d5db4077
KR
131 raw_pattern = (unsigned char *) SDATA (pattern);
132 raw_pattern_size = SBYTES (pattern);
f8bd51c4
KH
133 }
134 else if (multibyte)
135 {
d5db4077
KR
136 raw_pattern_size = count_size_as_multibyte (SDATA (pattern),
137 SCHARS (pattern));
7276d3d8 138 raw_pattern = (unsigned char *) alloca (raw_pattern_size + 1);
d5db4077
KR
139 copy_text (SDATA (pattern), raw_pattern,
140 SCHARS (pattern), 0, 1);
f8bd51c4
KH
141 }
142 else
143 {
144 /* Converting multibyte to single-byte.
145
146 ??? Perhaps this conversion should be done in a special way
147 by subtracting nonascii-insert-offset from each non-ASCII char,
148 so that only the multibyte chars which really correspond to
149 the chosen single-byte character set can possibly match. */
d5db4077 150 raw_pattern_size = SCHARS (pattern);
7276d3d8 151 raw_pattern = (unsigned char *) alloca (raw_pattern_size + 1);
d5db4077
KR
152 copy_text (SDATA (pattern), raw_pattern,
153 SBYTES (pattern), 1, 0);
f8bd51c4
KH
154 }
155
487282dc 156 cp->regexp = Qnil;
59fab369 157 cp->buf.translate = (! NILP (translate) ? translate : make_number (0));
b819a390 158 cp->posix = posix;
5679531d 159 cp->buf.multibyte = multibyte;
9ac0d9e0 160 BLOCK_INPUT;
fb4a568d 161 old = re_set_syntax (RE_SYNTAX_EMACS
b819a390 162 | (posix ? 0 : RE_NO_POSIX_BACKTRACKING));
7276d3d8
RS
163 val = (char *) re_compile_pattern ((char *)raw_pattern,
164 raw_pattern_size, &cp->buf);
b819a390 165 re_set_syntax (old);
9ac0d9e0 166 UNBLOCK_INPUT;
ca1d1d23 167 if (val)
487282dc 168 Fsignal (Qinvalid_regexp, Fcons (build_string (val), Qnil));
1113d9db 169
487282dc 170 cp->regexp = Fcopy_sequence (pattern);
487282dc
KH
171}
172
6efc7887
RS
173/* Shrink each compiled regexp buffer in the cache
174 to the size actually used right now.
175 This is called from garbage collection. */
176
177void
178shrink_regexp_cache ()
179{
a968f437 180 struct regexp_cache *cp;
6efc7887
RS
181
182 for (cp = searchbuf_head; cp != 0; cp = cp->next)
183 {
184 cp->buf.allocated = cp->buf.used;
185 cp->buf.buffer
b23c0a83 186 = (unsigned char *) xrealloc (cp->buf.buffer, cp->buf.used);
6efc7887
RS
187 }
188}
189
487282dc 190/* Compile a regexp if necessary, but first check to see if there's one in
b819a390
RS
191 the cache.
192 PATTERN is the pattern to compile.
facdc750 193 TRANSLATE is a translation table for ignoring case, or nil for none.
b819a390
RS
194 REGP is the structure that says where to store the "register"
195 values that will result from matching this pattern.
196 If it is 0, we should compile the pattern not to record any
197 subexpression bounds.
198 POSIX is nonzero if we want full backtracking (POSIX style)
199 for this pattern. 0 means backtrack only enough to get a valid match. */
487282dc
KH
200
201struct re_pattern_buffer *
0c8533c6 202compile_pattern (pattern, regp, translate, posix, multibyte)
487282dc
KH
203 Lisp_Object pattern;
204 struct re_registers *regp;
facdc750 205 Lisp_Object translate;
0c8533c6 206 int posix, multibyte;
487282dc
KH
207{
208 struct regexp_cache *cp, **cpp;
209
210 for (cpp = &searchbuf_head; ; cpp = &cp->next)
211 {
212 cp = *cpp;
f1b9c7c1
KR
213 /* Entries are initialized to nil, and may be set to nil by
214 compile_pattern_1 if the pattern isn't valid. Don't apply
49a5f770
KR
215 string accessors in those cases. However, compile_pattern_1
216 is only applied to the cache entry we pick here to reuse. So
217 nil should never appear before a non-nil entry. */
7c752c80 218 if (NILP (cp->regexp))
f1b9c7c1 219 goto compile_it;
d5db4077 220 if (SCHARS (cp->regexp) == SCHARS (pattern)
cf69b13e 221 && STRING_MULTIBYTE (cp->regexp) == STRING_MULTIBYTE (pattern)
1d288aef 222 && !NILP (Fstring_equal (cp->regexp, pattern))
59fab369 223 && EQ (cp->buf.translate, (! NILP (translate) ? translate : make_number (0)))
5679531d
KH
224 && cp->posix == posix
225 && cp->buf.multibyte == multibyte)
487282dc
KH
226 break;
227
f1b9c7c1
KR
228 /* If we're at the end of the cache, compile into the nil cell
229 we found, or the last (least recently used) cell with a
230 string value. */
487282dc
KH
231 if (cp->next == 0)
232 {
f1b9c7c1 233 compile_it:
5679531d 234 compile_pattern_1 (cp, pattern, translate, regp, posix, multibyte);
487282dc
KH
235 break;
236 }
237 }
238
239 /* When we get here, cp (aka *cpp) contains the compiled pattern,
240 either because we found it in the cache or because we just compiled it.
241 Move it to the front of the queue to mark it as most recently used. */
242 *cpp = cp->next;
243 cp->next = searchbuf_head;
244 searchbuf_head = cp;
1113d9db 245
6639708c
RS
246 /* Advise the searching functions about the space we have allocated
247 for register data. */
248 if (regp)
249 re_set_registers (&cp->buf, regp, regp->num_regs, regp->start, regp->end);
250
487282dc 251 return &cp->buf;
ca1d1d23
JB
252}
253
254/* Error condition used for failing searches */
255Lisp_Object Qsearch_failed;
256
257Lisp_Object
258signal_failure (arg)
259 Lisp_Object arg;
260{
261 Fsignal (Qsearch_failed, Fcons (arg, Qnil));
262 return Qnil;
263}
264\f
b819a390
RS
265static Lisp_Object
266looking_at_1 (string, posix)
ca1d1d23 267 Lisp_Object string;
b819a390 268 int posix;
ca1d1d23
JB
269{
270 Lisp_Object val;
271 unsigned char *p1, *p2;
272 int s1, s2;
273 register int i;
487282dc 274 struct re_pattern_buffer *bufp;
ca1d1d23 275
7074fde6
FP
276 if (running_asynch_code)
277 save_search_regs ();
278
b7826503 279 CHECK_STRING (string);
487282dc
KH
280 bufp = compile_pattern (string, &search_regs,
281 (!NILP (current_buffer->case_fold_search)
facdc750 282 ? DOWNCASE_TABLE : Qnil),
0c8533c6
RS
283 posix,
284 !NILP (current_buffer->enable_multibyte_characters));
ca1d1d23
JB
285
286 immediate_quit = 1;
287 QUIT; /* Do a pending quit right away, to avoid paradoxical behavior */
288
289 /* Get pointers and sizes of the two strings
290 that make up the visible portion of the buffer. */
291
292 p1 = BEGV_ADDR;
fa8ed3e0 293 s1 = GPT_BYTE - BEGV_BYTE;
ca1d1d23 294 p2 = GAP_END_ADDR;
fa8ed3e0 295 s2 = ZV_BYTE - GPT_BYTE;
ca1d1d23
JB
296 if (s1 < 0)
297 {
298 p2 = p1;
fa8ed3e0 299 s2 = ZV_BYTE - BEGV_BYTE;
ca1d1d23
JB
300 s1 = 0;
301 }
302 if (s2 < 0)
303 {
fa8ed3e0 304 s1 = ZV_BYTE - BEGV_BYTE;
ca1d1d23
JB
305 s2 = 0;
306 }
8bb43c28
RS
307
308 re_match_object = Qnil;
177c0ea7 309
487282dc 310 i = re_match_2 (bufp, (char *) p1, s1, (char *) p2, s2,
fa8ed3e0
RS
311 PT_BYTE - BEGV_BYTE, &search_regs,
312 ZV_BYTE - BEGV_BYTE);
de182d70 313 immediate_quit = 0;
177c0ea7 314
ca1d1d23
JB
315 if (i == -2)
316 matcher_overflow ();
317
318 val = (0 <= i ? Qt : Qnil);
fa8ed3e0
RS
319 if (i >= 0)
320 for (i = 0; i < search_regs.num_regs; i++)
321 if (search_regs.start[i] >= 0)
322 {
323 search_regs.start[i]
324 = BYTE_TO_CHAR (search_regs.start[i] + BEGV_BYTE);
325 search_regs.end[i]
326 = BYTE_TO_CHAR (search_regs.end[i] + BEGV_BYTE);
327 }
a3668d92 328 XSETBUFFER (last_thing_searched, current_buffer);
ca1d1d23
JB
329 return val;
330}
331
b819a390 332DEFUN ("looking-at", Flooking_at, Slooking_at, 1, 1, 0,
8c1a1077
PJ
333 doc: /* Return t if text after point matches regular expression REGEXP.
334This function modifies the match data that `match-beginning',
335`match-end' and `match-data' access; save and restore the match
336data if you want to preserve them. */)
337 (regexp)
94f94972 338 Lisp_Object regexp;
b819a390 339{
94f94972 340 return looking_at_1 (regexp, 0);
b819a390
RS
341}
342
343DEFUN ("posix-looking-at", Fposix_looking_at, Sposix_looking_at, 1, 1, 0,
8c1a1077
PJ
344 doc: /* Return t if text after point matches regular expression REGEXP.
345Find the longest match, in accord with Posix regular expression rules.
346This function modifies the match data that `match-beginning',
347`match-end' and `match-data' access; save and restore the match
348data if you want to preserve them. */)
349 (regexp)
94f94972 350 Lisp_Object regexp;
b819a390 351{
94f94972 352 return looking_at_1 (regexp, 1);
b819a390
RS
353}
354\f
355static Lisp_Object
356string_match_1 (regexp, string, start, posix)
ca1d1d23 357 Lisp_Object regexp, string, start;
b819a390 358 int posix;
ca1d1d23
JB
359{
360 int val;
487282dc 361 struct re_pattern_buffer *bufp;
0c8533c6
RS
362 int pos, pos_byte;
363 int i;
ca1d1d23 364
7074fde6
FP
365 if (running_asynch_code)
366 save_search_regs ();
367
b7826503
PJ
368 CHECK_STRING (regexp);
369 CHECK_STRING (string);
ca1d1d23
JB
370
371 if (NILP (start))
0c8533c6 372 pos = 0, pos_byte = 0;
ca1d1d23
JB
373 else
374 {
d5db4077 375 int len = SCHARS (string);
ca1d1d23 376
b7826503 377 CHECK_NUMBER (start);
0c8533c6
RS
378 pos = XINT (start);
379 if (pos < 0 && -pos <= len)
380 pos = len + pos;
381 else if (0 > pos || pos > len)
ca1d1d23 382 args_out_of_range (string, start);
0c8533c6 383 pos_byte = string_char_to_byte (string, pos);
ca1d1d23
JB
384 }
385
487282dc
KH
386 bufp = compile_pattern (regexp, &search_regs,
387 (!NILP (current_buffer->case_fold_search)
facdc750 388 ? DOWNCASE_TABLE : Qnil),
0c8533c6
RS
389 posix,
390 STRING_MULTIBYTE (string));
ca1d1d23 391 immediate_quit = 1;
8bb43c28 392 re_match_object = string;
177c0ea7 393
d5db4077
KR
394 val = re_search (bufp, (char *) SDATA (string),
395 SBYTES (string), pos_byte,
396 SBYTES (string) - pos_byte,
ca1d1d23
JB
397 &search_regs);
398 immediate_quit = 0;
daa37602 399 last_thing_searched = Qt;
ca1d1d23
JB
400 if (val == -2)
401 matcher_overflow ();
402 if (val < 0) return Qnil;
0c8533c6
RS
403
404 for (i = 0; i < search_regs.num_regs; i++)
405 if (search_regs.start[i] >= 0)
406 {
407 search_regs.start[i]
408 = string_byte_to_char (string, search_regs.start[i]);
409 search_regs.end[i]
410 = string_byte_to_char (string, search_regs.end[i]);
411 }
412
413 return make_number (string_byte_to_char (string, val));
ca1d1d23 414}
e59a8453 415
b819a390 416DEFUN ("string-match", Fstring_match, Sstring_match, 2, 3, 0,
8c1a1077
PJ
417 doc: /* Return index of start of first match for REGEXP in STRING, or nil.
418Case is ignored if `case-fold-search' is non-nil in the current buffer.
419If third arg START is non-nil, start search at that index in STRING.
420For index of first char beyond the match, do (match-end 0).
421`match-end' and `match-beginning' also give indices of substrings
2bd2f32d
RS
422matched by parenthesis constructs in the pattern.
423
424You can use the function `match-string' to extract the substrings
425matched by the parenthesis constructions in REGEXP. */)
8c1a1077 426 (regexp, string, start)
b819a390
RS
427 Lisp_Object regexp, string, start;
428{
429 return string_match_1 (regexp, string, start, 0);
430}
431
432DEFUN ("posix-string-match", Fposix_string_match, Sposix_string_match, 2, 3, 0,
8c1a1077
PJ
433 doc: /* Return index of start of first match for REGEXP in STRING, or nil.
434Find the longest match, in accord with Posix regular expression rules.
435Case is ignored if `case-fold-search' is non-nil in the current buffer.
436If third arg START is non-nil, start search at that index in STRING.
437For index of first char beyond the match, do (match-end 0).
438`match-end' and `match-beginning' also give indices of substrings
439matched by parenthesis constructs in the pattern. */)
440 (regexp, string, start)
b819a390
RS
441 Lisp_Object regexp, string, start;
442{
443 return string_match_1 (regexp, string, start, 1);
444}
445
e59a8453
RS
446/* Match REGEXP against STRING, searching all of STRING,
447 and return the index of the match, or negative on failure.
448 This does not clobber the match data. */
449
450int
451fast_string_match (regexp, string)
452 Lisp_Object regexp, string;
453{
454 int val;
487282dc 455 struct re_pattern_buffer *bufp;
e59a8453 456
facdc750
RS
457 bufp = compile_pattern (regexp, 0, Qnil,
458 0, STRING_MULTIBYTE (string));
e59a8453 459 immediate_quit = 1;
8bb43c28 460 re_match_object = string;
177c0ea7 461
d5db4077
KR
462 val = re_search (bufp, (char *) SDATA (string),
463 SBYTES (string), 0,
464 SBYTES (string), 0);
e59a8453
RS
465 immediate_quit = 0;
466 return val;
467}
5679531d
KH
468
469/* Match REGEXP against STRING, searching all of STRING ignoring case,
470 and return the index of the match, or negative on failure.
0c8533c6
RS
471 This does not clobber the match data.
472 We assume that STRING contains single-byte characters. */
5679531d
KH
473
474extern Lisp_Object Vascii_downcase_table;
475
476int
b4577c63 477fast_c_string_match_ignore_case (regexp, string)
5679531d 478 Lisp_Object regexp;
96b80561 479 const char *string;
5679531d
KH
480{
481 int val;
482 struct re_pattern_buffer *bufp;
483 int len = strlen (string);
484
0c8533c6 485 regexp = string_make_unibyte (regexp);
b4577c63 486 re_match_object = Qt;
5679531d 487 bufp = compile_pattern (regexp, 0,
facdc750 488 Vascii_downcase_table, 0,
f8bd51c4 489 0);
5679531d
KH
490 immediate_quit = 1;
491 val = re_search (bufp, string, len, 0, len, 0);
492 immediate_quit = 0;
493 return val;
494}
ca1d1d23 495\f
9169c321
JB
496/* The newline cache: remembering which sections of text have no newlines. */
497
498/* If the user has requested newline caching, make sure it's on.
499 Otherwise, make sure it's off.
500 This is our cheezy way of associating an action with the change of
501 state of a buffer-local variable. */
502static void
503newline_cache_on_off (buf)
504 struct buffer *buf;
505{
506 if (NILP (buf->cache_long_line_scans))
507 {
508 /* It should be off. */
509 if (buf->newline_cache)
510 {
511 free_region_cache (buf->newline_cache);
512 buf->newline_cache = 0;
513 }
514 }
515 else
516 {
517 /* It should be on. */
518 if (buf->newline_cache == 0)
519 buf->newline_cache = new_region_cache ();
520 }
521}
522
523\f
524/* Search for COUNT instances of the character TARGET between START and END.
525
526 If COUNT is positive, search forwards; END must be >= START.
527 If COUNT is negative, search backwards for the -COUNTth instance;
528 END must be <= START.
529 If COUNT is zero, do anything you please; run rogue, for all I care.
530
531 If END is zero, use BEGV or ZV instead, as appropriate for the
532 direction indicated by COUNT.
ffd56f97
JB
533
534 If we find COUNT instances, set *SHORTAGE to zero, and return the
5bfe95c9
RS
535 position after the COUNTth match. Note that for reverse motion
536 this is not the same as the usual convention for Emacs motion commands.
ffd56f97 537
9169c321
JB
538 If we don't find COUNT instances before reaching END, set *SHORTAGE
539 to the number of TARGETs left unfound, and return END.
ffd56f97 540
087a5f81
RS
541 If ALLOW_QUIT is non-zero, set immediate_quit. That's good to do
542 except when inside redisplay. */
543
dfcf069d 544int
9169c321
JB
545scan_buffer (target, start, end, count, shortage, allow_quit)
546 register int target;
547 int start, end;
548 int count;
549 int *shortage;
087a5f81 550 int allow_quit;
ca1d1d23 551{
9169c321 552 struct region_cache *newline_cache;
177c0ea7 553 int direction;
ffd56f97 554
9169c321
JB
555 if (count > 0)
556 {
557 direction = 1;
558 if (! end) end = ZV;
559 }
560 else
561 {
562 direction = -1;
563 if (! end) end = BEGV;
564 }
ffd56f97 565
9169c321
JB
566 newline_cache_on_off (current_buffer);
567 newline_cache = current_buffer->newline_cache;
ca1d1d23
JB
568
569 if (shortage != 0)
570 *shortage = 0;
571
087a5f81 572 immediate_quit = allow_quit;
ca1d1d23 573
ffd56f97 574 if (count > 0)
9169c321 575 while (start != end)
ca1d1d23 576 {
9169c321
JB
577 /* Our innermost scanning loop is very simple; it doesn't know
578 about gaps, buffer ends, or the newline cache. ceiling is
579 the position of the last character before the next such
580 obstacle --- the last character the dumb search loop should
581 examine. */
fa8ed3e0
RS
582 int ceiling_byte = CHAR_TO_BYTE (end) - 1;
583 int start_byte = CHAR_TO_BYTE (start);
67ce527d 584 int tem;
9169c321
JB
585
586 /* If we're looking for a newline, consult the newline cache
587 to see where we can avoid some scanning. */
588 if (target == '\n' && newline_cache)
589 {
590 int next_change;
591 immediate_quit = 0;
592 while (region_cache_forward
fa8ed3e0
RS
593 (current_buffer, newline_cache, start_byte, &next_change))
594 start_byte = next_change;
cbe0db0d 595 immediate_quit = allow_quit;
9169c321 596
fa8ed3e0
RS
597 /* START should never be after END. */
598 if (start_byte > ceiling_byte)
599 start_byte = ceiling_byte;
9169c321
JB
600
601 /* Now the text after start is an unknown region, and
602 next_change is the position of the next known region. */
fa8ed3e0 603 ceiling_byte = min (next_change - 1, ceiling_byte);
9169c321
JB
604 }
605
606 /* The dumb loop can only scan text stored in contiguous
607 bytes. BUFFER_CEILING_OF returns the last character
608 position that is contiguous, so the ceiling is the
609 position after that. */
67ce527d
KH
610 tem = BUFFER_CEILING_OF (start_byte);
611 ceiling_byte = min (tem, ceiling_byte);
9169c321
JB
612
613 {
177c0ea7 614 /* The termination address of the dumb loop. */
fa8ed3e0
RS
615 register unsigned char *ceiling_addr
616 = BYTE_POS_ADDR (ceiling_byte) + 1;
617 register unsigned char *cursor
618 = BYTE_POS_ADDR (start_byte);
9169c321
JB
619 unsigned char *base = cursor;
620
621 while (cursor < ceiling_addr)
622 {
623 unsigned char *scan_start = cursor;
624
625 /* The dumb loop. */
626 while (*cursor != target && ++cursor < ceiling_addr)
627 ;
628
629 /* If we're looking for newlines, cache the fact that
630 the region from start to cursor is free of them. */
631 if (target == '\n' && newline_cache)
632 know_region_cache (current_buffer, newline_cache,
fa8ed3e0
RS
633 start_byte + scan_start - base,
634 start_byte + cursor - base);
9169c321
JB
635
636 /* Did we find the target character? */
637 if (cursor < ceiling_addr)
638 {
639 if (--count == 0)
640 {
641 immediate_quit = 0;
fa8ed3e0 642 return BYTE_TO_CHAR (start_byte + cursor - base + 1);
9169c321
JB
643 }
644 cursor++;
645 }
646 }
647
fa8ed3e0 648 start = BYTE_TO_CHAR (start_byte + cursor - base);
9169c321 649 }
ca1d1d23
JB
650 }
651 else
9169c321
JB
652 while (start > end)
653 {
654 /* The last character to check before the next obstacle. */
fa8ed3e0
RS
655 int ceiling_byte = CHAR_TO_BYTE (end);
656 int start_byte = CHAR_TO_BYTE (start);
67ce527d 657 int tem;
9169c321
JB
658
659 /* Consult the newline cache, if appropriate. */
660 if (target == '\n' && newline_cache)
661 {
662 int next_change;
663 immediate_quit = 0;
664 while (region_cache_backward
fa8ed3e0
RS
665 (current_buffer, newline_cache, start_byte, &next_change))
666 start_byte = next_change;
cbe0db0d 667 immediate_quit = allow_quit;
9169c321
JB
668
669 /* Start should never be at or before end. */
fa8ed3e0
RS
670 if (start_byte <= ceiling_byte)
671 start_byte = ceiling_byte + 1;
9169c321
JB
672
673 /* Now the text before start is an unknown region, and
674 next_change is the position of the next known region. */
fa8ed3e0 675 ceiling_byte = max (next_change, ceiling_byte);
9169c321
JB
676 }
677
678 /* Stop scanning before the gap. */
67ce527d
KH
679 tem = BUFFER_FLOOR_OF (start_byte - 1);
680 ceiling_byte = max (tem, ceiling_byte);
9169c321
JB
681
682 {
683 /* The termination address of the dumb loop. */
fa8ed3e0
RS
684 register unsigned char *ceiling_addr = BYTE_POS_ADDR (ceiling_byte);
685 register unsigned char *cursor = BYTE_POS_ADDR (start_byte - 1);
9169c321
JB
686 unsigned char *base = cursor;
687
688 while (cursor >= ceiling_addr)
689 {
690 unsigned char *scan_start = cursor;
691
692 while (*cursor != target && --cursor >= ceiling_addr)
693 ;
694
695 /* If we're looking for newlines, cache the fact that
696 the region from after the cursor to start is free of them. */
697 if (target == '\n' && newline_cache)
698 know_region_cache (current_buffer, newline_cache,
fa8ed3e0
RS
699 start_byte + cursor - base,
700 start_byte + scan_start - base);
9169c321
JB
701
702 /* Did we find the target character? */
703 if (cursor >= ceiling_addr)
704 {
705 if (++count >= 0)
706 {
707 immediate_quit = 0;
fa8ed3e0 708 return BYTE_TO_CHAR (start_byte + cursor - base);
9169c321
JB
709 }
710 cursor--;
711 }
712 }
713
fa8ed3e0 714 start = BYTE_TO_CHAR (start_byte + cursor - base);
9169c321
JB
715 }
716 }
717
ca1d1d23
JB
718 immediate_quit = 0;
719 if (shortage != 0)
ffd56f97 720 *shortage = count * direction;
9169c321 721 return start;
ca1d1d23 722}
fa8ed3e0
RS
723\f
724/* Search for COUNT instances of a line boundary, which means either a
725 newline or (if selective display enabled) a carriage return.
726 Start at START. If COUNT is negative, search backwards.
727
728 We report the resulting position by calling TEMP_SET_PT_BOTH.
729
730 If we find COUNT instances. we position after (always after,
731 even if scanning backwards) the COUNTth match, and return 0.
732
733 If we don't find COUNT instances before reaching the end of the
734 buffer (or the beginning, if scanning backwards), we return
735 the number of line boundaries left unfound, and position at
736 the limit we bumped up against.
737
738 If ALLOW_QUIT is non-zero, set immediate_quit. That's good to do
d5d57b92 739 except in special cases. */
ca1d1d23 740
63fa018d 741int
fa8ed3e0
RS
742scan_newline (start, start_byte, limit, limit_byte, count, allow_quit)
743 int start, start_byte;
744 int limit, limit_byte;
745 register int count;
746 int allow_quit;
63fa018d 747{
fa8ed3e0
RS
748 int direction = ((count > 0) ? 1 : -1);
749
750 register unsigned char *cursor;
751 unsigned char *base;
752
753 register int ceiling;
754 register unsigned char *ceiling_addr;
755
d5d57b92
RS
756 int old_immediate_quit = immediate_quit;
757
fa8ed3e0
RS
758 /* The code that follows is like scan_buffer
759 but checks for either newline or carriage return. */
760
d5d57b92
RS
761 if (allow_quit)
762 immediate_quit++;
fa8ed3e0
RS
763
764 start_byte = CHAR_TO_BYTE (start);
765
766 if (count > 0)
767 {
768 while (start_byte < limit_byte)
769 {
770 ceiling = BUFFER_CEILING_OF (start_byte);
771 ceiling = min (limit_byte - 1, ceiling);
772 ceiling_addr = BYTE_POS_ADDR (ceiling) + 1;
773 base = (cursor = BYTE_POS_ADDR (start_byte));
774 while (1)
775 {
776 while (*cursor != '\n' && ++cursor != ceiling_addr)
777 ;
778
779 if (cursor != ceiling_addr)
780 {
781 if (--count == 0)
782 {
d5d57b92 783 immediate_quit = old_immediate_quit;
fa8ed3e0
RS
784 start_byte = start_byte + cursor - base + 1;
785 start = BYTE_TO_CHAR (start_byte);
786 TEMP_SET_PT_BOTH (start, start_byte);
787 return 0;
788 }
789 else
790 if (++cursor == ceiling_addr)
791 break;
792 }
793 else
794 break;
795 }
796 start_byte += cursor - base;
797 }
798 }
799 else
800 {
fa8ed3e0
RS
801 while (start_byte > limit_byte)
802 {
803 ceiling = BUFFER_FLOOR_OF (start_byte - 1);
804 ceiling = max (limit_byte, ceiling);
805 ceiling_addr = BYTE_POS_ADDR (ceiling) - 1;
806 base = (cursor = BYTE_POS_ADDR (start_byte - 1) + 1);
807 while (1)
808 {
809 while (--cursor != ceiling_addr && *cursor != '\n')
810 ;
811
812 if (cursor != ceiling_addr)
813 {
814 if (++count == 0)
815 {
d5d57b92 816 immediate_quit = old_immediate_quit;
fa8ed3e0
RS
817 /* Return the position AFTER the match we found. */
818 start_byte = start_byte + cursor - base + 1;
819 start = BYTE_TO_CHAR (start_byte);
820 TEMP_SET_PT_BOTH (start, start_byte);
821 return 0;
822 }
823 }
824 else
825 break;
826 }
827 /* Here we add 1 to compensate for the last decrement
828 of CURSOR, which took it past the valid range. */
829 start_byte += cursor - base + 1;
830 }
831 }
832
833 TEMP_SET_PT_BOTH (limit, limit_byte);
d5d57b92 834 immediate_quit = old_immediate_quit;
fa8ed3e0
RS
835
836 return count * direction;
63fa018d
RS
837}
838
ca1d1d23 839int
fa8ed3e0 840find_next_newline_no_quit (from, cnt)
ca1d1d23
JB
841 register int from, cnt;
842{
fa8ed3e0 843 return scan_buffer ('\n', from, 0, cnt, (int *) 0, 0);
9169c321
JB
844}
845
9169c321
JB
846/* Like find_next_newline, but returns position before the newline,
847 not after, and only search up to TO. This isn't just
848 find_next_newline (...)-1, because you might hit TO. */
fa8ed3e0 849
9169c321
JB
850int
851find_before_next_newline (from, to, cnt)
cbe0db0d 852 int from, to, cnt;
9169c321
JB
853{
854 int shortage;
855 int pos = scan_buffer ('\n', from, to, cnt, &shortage, 1);
856
857 if (shortage == 0)
858 pos--;
177c0ea7 859
9169c321 860 return pos;
ca1d1d23
JB
861}
862\f
ca1d1d23
JB
863/* Subroutines of Lisp buffer search functions. */
864
865static Lisp_Object
b819a390 866search_command (string, bound, noerror, count, direction, RE, posix)
ca1d1d23
JB
867 Lisp_Object string, bound, noerror, count;
868 int direction;
869 int RE;
b819a390 870 int posix;
ca1d1d23
JB
871{
872 register int np;
9f43ad85 873 int lim, lim_byte;
ca1d1d23
JB
874 int n = direction;
875
876 if (!NILP (count))
877 {
b7826503 878 CHECK_NUMBER (count);
ca1d1d23
JB
879 n *= XINT (count);
880 }
881
b7826503 882 CHECK_STRING (string);
ca1d1d23 883 if (NILP (bound))
9f43ad85
RS
884 {
885 if (n > 0)
886 lim = ZV, lim_byte = ZV_BYTE;
887 else
888 lim = BEGV, lim_byte = BEGV_BYTE;
889 }
ca1d1d23
JB
890 else
891 {
b7826503 892 CHECK_NUMBER_COERCE_MARKER (bound);
ca1d1d23 893 lim = XINT (bound);
6ec8bbd2 894 if (n > 0 ? lim < PT : lim > PT)
ca1d1d23
JB
895 error ("Invalid search bound (wrong side of point)");
896 if (lim > ZV)
9f43ad85 897 lim = ZV, lim_byte = ZV_BYTE;
588d2fd5 898 else if (lim < BEGV)
9f43ad85 899 lim = BEGV, lim_byte = BEGV_BYTE;
588d2fd5
KH
900 else
901 lim_byte = CHAR_TO_BYTE (lim);
ca1d1d23
JB
902 }
903
9f43ad85 904 np = search_buffer (string, PT, PT_BYTE, lim, lim_byte, n, RE,
ca1d1d23 905 (!NILP (current_buffer->case_fold_search)
facdc750 906 ? current_buffer->case_canon_table
3135e9fd 907 : Qnil),
ca1d1d23 908 (!NILP (current_buffer->case_fold_search)
facdc750 909 ? current_buffer->case_eqv_table
3135e9fd 910 : Qnil),
b819a390 911 posix);
ca1d1d23
JB
912 if (np <= 0)
913 {
914 if (NILP (noerror))
915 return signal_failure (string);
916 if (!EQ (noerror, Qt))
917 {
918 if (lim < BEGV || lim > ZV)
919 abort ();
9f43ad85 920 SET_PT_BOTH (lim, lim_byte);
a5f217b8
RS
921 return Qnil;
922#if 0 /* This would be clean, but maybe programs depend on
923 a value of nil here. */
481399bf 924 np = lim;
a5f217b8 925#endif
ca1d1d23 926 }
481399bf
RS
927 else
928 return Qnil;
ca1d1d23
JB
929 }
930
931 if (np < BEGV || np > ZV)
932 abort ();
933
934 SET_PT (np);
935
936 return make_number (np);
937}
938\f
fa8ed3e0
RS
939/* Return 1 if REGEXP it matches just one constant string. */
940
b6d6a51c
KH
941static int
942trivial_regexp_p (regexp)
943 Lisp_Object regexp;
944{
d5db4077
KR
945 int len = SBYTES (regexp);
946 unsigned char *s = SDATA (regexp);
b6d6a51c
KH
947 while (--len >= 0)
948 {
949 switch (*s++)
950 {
951 case '.': case '*': case '+': case '?': case '[': case '^': case '$':
952 return 0;
953 case '\\':
954 if (--len < 0)
955 return 0;
956 switch (*s++)
957 {
958 case '|': case '(': case ')': case '`': case '\'': case 'b':
959 case 'B': case '<': case '>': case 'w': case 'W': case 's':
29f89fe7 960 case 'S': case '=': case '{': case '}': case '_':
5679531d 961 case 'c': case 'C': /* for categoryspec and notcategoryspec */
866f60fd 962 case '1': case '2': case '3': case '4': case '5':
b6d6a51c
KH
963 case '6': case '7': case '8': case '9':
964 return 0;
965 }
966 }
967 }
968 return 1;
969}
970
ca325161 971/* Search for the n'th occurrence of STRING in the current buffer,
ca1d1d23 972 starting at position POS and stopping at position LIM,
b819a390 973 treating STRING as a literal string if RE is false or as
ca1d1d23
JB
974 a regular expression if RE is true.
975
976 If N is positive, searching is forward and LIM must be greater than POS.
977 If N is negative, searching is backward and LIM must be less than POS.
978
facdc750 979 Returns -x if x occurrences remain to be found (x > 0),
ca1d1d23 980 or else the position at the beginning of the Nth occurrence
b819a390
RS
981 (if searching backward) or the end (if searching forward).
982
983 POSIX is nonzero if we want full backtracking (POSIX style)
984 for this pattern. 0 means backtrack only enough to get a valid match. */
ca1d1d23 985
aff2ce94
RS
986#define TRANSLATE(out, trt, d) \
987do \
988 { \
989 if (! NILP (trt)) \
990 { \
991 Lisp_Object temp; \
992 temp = Faref (trt, make_number (d)); \
993 if (INTEGERP (temp)) \
994 out = XINT (temp); \
995 else \
996 out = d; \
997 } \
998 else \
999 out = d; \
1000 } \
1001while (0)
facdc750 1002
b819a390 1003static int
9f43ad85
RS
1004search_buffer (string, pos, pos_byte, lim, lim_byte, n,
1005 RE, trt, inverse_trt, posix)
ca1d1d23
JB
1006 Lisp_Object string;
1007 int pos;
9f43ad85 1008 int pos_byte;
ca1d1d23 1009 int lim;
9f43ad85 1010 int lim_byte;
ca1d1d23
JB
1011 int n;
1012 int RE;
facdc750
RS
1013 Lisp_Object trt;
1014 Lisp_Object inverse_trt;
b819a390 1015 int posix;
ca1d1d23 1016{
d5db4077
KR
1017 int len = SCHARS (string);
1018 int len_byte = SBYTES (string);
facdc750 1019 register int i;
ca1d1d23 1020
7074fde6
FP
1021 if (running_asynch_code)
1022 save_search_regs ();
1023
a7e4cdde 1024 /* Searching 0 times means don't move. */
ca1d1d23 1025 /* Null string is found at starting position. */
a7e4cdde 1026 if (len == 0 || n == 0)
ca325161 1027 {
0353b28f 1028 set_search_regs (pos_byte, 0);
ca325161
RS
1029 return pos;
1030 }
3f57a499 1031
b6d6a51c 1032 if (RE && !trivial_regexp_p (string))
ca1d1d23 1033 {
facdc750
RS
1034 unsigned char *p1, *p2;
1035 int s1, s2;
487282dc
KH
1036 struct re_pattern_buffer *bufp;
1037
0c8533c6
RS
1038 bufp = compile_pattern (string, &search_regs, trt, posix,
1039 !NILP (current_buffer->enable_multibyte_characters));
ca1d1d23 1040
ca1d1d23
JB
1041 immediate_quit = 1; /* Quit immediately if user types ^G,
1042 because letting this function finish
1043 can take too long. */
1044 QUIT; /* Do a pending quit right away,
1045 to avoid paradoxical behavior */
1046 /* Get pointers and sizes of the two strings
1047 that make up the visible portion of the buffer. */
1048
1049 p1 = BEGV_ADDR;
fa8ed3e0 1050 s1 = GPT_BYTE - BEGV_BYTE;
ca1d1d23 1051 p2 = GAP_END_ADDR;
fa8ed3e0 1052 s2 = ZV_BYTE - GPT_BYTE;
ca1d1d23
JB
1053 if (s1 < 0)
1054 {
1055 p2 = p1;
fa8ed3e0 1056 s2 = ZV_BYTE - BEGV_BYTE;
ca1d1d23
JB
1057 s1 = 0;
1058 }
1059 if (s2 < 0)
1060 {
fa8ed3e0 1061 s1 = ZV_BYTE - BEGV_BYTE;
ca1d1d23
JB
1062 s2 = 0;
1063 }
8bb43c28 1064 re_match_object = Qnil;
177c0ea7 1065
ca1d1d23
JB
1066 while (n < 0)
1067 {
42db823b 1068 int val;
487282dc 1069 val = re_search_2 (bufp, (char *) p1, s1, (char *) p2, s2,
4996330b
KH
1070 pos_byte - BEGV_BYTE, lim_byte - pos_byte,
1071 &search_regs,
42db823b 1072 /* Don't allow match past current point */
4996330b 1073 pos_byte - BEGV_BYTE);
ca1d1d23 1074 if (val == -2)
b6d6a51c
KH
1075 {
1076 matcher_overflow ();
1077 }
ca1d1d23
JB
1078 if (val >= 0)
1079 {
26aff150 1080 pos_byte = search_regs.start[0] + BEGV_BYTE;
4746118a 1081 for (i = 0; i < search_regs.num_regs; i++)
ca1d1d23
JB
1082 if (search_regs.start[i] >= 0)
1083 {
fa8ed3e0
RS
1084 search_regs.start[i]
1085 = BYTE_TO_CHAR (search_regs.start[i] + BEGV_BYTE);
1086 search_regs.end[i]
1087 = BYTE_TO_CHAR (search_regs.end[i] + BEGV_BYTE);
ca1d1d23 1088 }
a3668d92 1089 XSETBUFFER (last_thing_searched, current_buffer);
ca1d1d23
JB
1090 /* Set pos to the new position. */
1091 pos = search_regs.start[0];
1092 }
1093 else
1094 {
1095 immediate_quit = 0;
1096 return (n);
1097 }
1098 n++;
1099 }
1100 while (n > 0)
1101 {
42db823b 1102 int val;
487282dc 1103 val = re_search_2 (bufp, (char *) p1, s1, (char *) p2, s2,
4996330b
KH
1104 pos_byte - BEGV_BYTE, lim_byte - pos_byte,
1105 &search_regs,
1106 lim_byte - BEGV_BYTE);
ca1d1d23 1107 if (val == -2)
b6d6a51c
KH
1108 {
1109 matcher_overflow ();
1110 }
ca1d1d23
JB
1111 if (val >= 0)
1112 {
26aff150 1113 pos_byte = search_regs.end[0] + BEGV_BYTE;
4746118a 1114 for (i = 0; i < search_regs.num_regs; i++)
ca1d1d23
JB
1115 if (search_regs.start[i] >= 0)
1116 {
fa8ed3e0
RS
1117 search_regs.start[i]
1118 = BYTE_TO_CHAR (search_regs.start[i] + BEGV_BYTE);
1119 search_regs.end[i]
1120 = BYTE_TO_CHAR (search_regs.end[i] + BEGV_BYTE);
ca1d1d23 1121 }
a3668d92 1122 XSETBUFFER (last_thing_searched, current_buffer);
ca1d1d23
JB
1123 pos = search_regs.end[0];
1124 }
1125 else
1126 {
1127 immediate_quit = 0;
1128 return (0 - n);
1129 }
1130 n--;
1131 }
1132 immediate_quit = 0;
1133 return (pos);
1134 }
1135 else /* non-RE case */
1136 {
facdc750
RS
1137 unsigned char *raw_pattern, *pat;
1138 int raw_pattern_size;
1139 int raw_pattern_size_byte;
1140 unsigned char *patbuf;
1141 int multibyte = !NILP (current_buffer->enable_multibyte_characters);
d5db4077 1142 unsigned char *base_pat = SDATA (string);
facdc750 1143 int charset_base = -1;
040272ce 1144 int boyer_moore_ok = 1;
facdc750
RS
1145
1146 /* MULTIBYTE says whether the text to be searched is multibyte.
1147 We must convert PATTERN to match that, or we will not really
1148 find things right. */
1149
1150 if (multibyte == STRING_MULTIBYTE (string))
1151 {
d5db4077
KR
1152 raw_pattern = (unsigned char *) SDATA (string);
1153 raw_pattern_size = SCHARS (string);
1154 raw_pattern_size_byte = SBYTES (string);
facdc750
RS
1155 }
1156 else if (multibyte)
1157 {
d5db4077 1158 raw_pattern_size = SCHARS (string);
facdc750 1159 raw_pattern_size_byte
d5db4077 1160 = count_size_as_multibyte (SDATA (string),
facdc750 1161 raw_pattern_size);
7276d3d8 1162 raw_pattern = (unsigned char *) alloca (raw_pattern_size_byte + 1);
d5db4077
KR
1163 copy_text (SDATA (string), raw_pattern,
1164 SCHARS (string), 0, 1);
facdc750
RS
1165 }
1166 else
1167 {
1168 /* Converting multibyte to single-byte.
1169
1170 ??? Perhaps this conversion should be done in a special way
1171 by subtracting nonascii-insert-offset from each non-ASCII char,
1172 so that only the multibyte chars which really correspond to
1173 the chosen single-byte character set can possibly match. */
d5db4077
KR
1174 raw_pattern_size = SCHARS (string);
1175 raw_pattern_size_byte = SCHARS (string);
7276d3d8 1176 raw_pattern = (unsigned char *) alloca (raw_pattern_size + 1);
d5db4077
KR
1177 copy_text (SDATA (string), raw_pattern,
1178 SBYTES (string), 1, 0);
facdc750
RS
1179 }
1180
1181 /* Copy and optionally translate the pattern. */
1182 len = raw_pattern_size;
1183 len_byte = raw_pattern_size_byte;
1184 patbuf = (unsigned char *) alloca (len_byte);
1185 pat = patbuf;
1186 base_pat = raw_pattern;
1187 if (multibyte)
1188 {
1189 while (--len >= 0)
1190 {
daaa6ed8 1191 unsigned char str[MAX_MULTIBYTE_LENGTH];
aff2ce94 1192 int c, translated, inverse;
facdc750
RS
1193 int in_charlen, charlen;
1194
1195 /* If we got here and the RE flag is set, it's because we're
1196 dealing with a regexp known to be trivial, so the backslash
1197 just quotes the next character. */
1198 if (RE && *base_pat == '\\')
1199 {
1200 len--;
1201 len_byte--;
1202 base_pat++;
1203 }
1204
1205 c = STRING_CHAR_AND_LENGTH (base_pat, len_byte, in_charlen);
040272ce 1206
facdc750 1207 /* Translate the character, if requested. */
aff2ce94 1208 TRANSLATE (translated, trt, c);
facdc750
RS
1209 /* If translation changed the byte-length, go back
1210 to the original character. */
daaa6ed8 1211 charlen = CHAR_STRING (translated, str);
facdc750
RS
1212 if (in_charlen != charlen)
1213 {
1214 translated = c;
daaa6ed8 1215 charlen = CHAR_STRING (c, str);
facdc750
RS
1216 }
1217
5ffaf437
RS
1218 /* If we are searching for something strange,
1219 an invalid multibyte code, don't use boyer-moore. */
1220 if (! ASCII_BYTE_P (translated)
1221 && (charlen == 1 /* 8bit code */
1222 || charlen != in_charlen /* invalid multibyte code */
1223 ))
1224 boyer_moore_ok = 0;
1225
aff2ce94
RS
1226 TRANSLATE (inverse, inverse_trt, c);
1227
facdc750
RS
1228 /* Did this char actually get translated?
1229 Would any other char get translated into it? */
aff2ce94 1230 if (translated != c || inverse != c)
facdc750
RS
1231 {
1232 /* Keep track of which character set row
1233 contains the characters that need translation. */
5ffaf437 1234 int charset_base_code = c & ~CHAR_FIELD3_MASK;
d2ac725b
KH
1235 int inverse_charset_base = inverse & ~CHAR_FIELD3_MASK;
1236
1237 if (charset_base_code != inverse_charset_base)
1238 boyer_moore_ok = 0;
1239 else if (charset_base == -1)
facdc750
RS
1240 charset_base = charset_base_code;
1241 else if (charset_base != charset_base_code)
1242 /* If two different rows appear, needing translation,
1243 then we cannot use boyer_moore search. */
040272ce 1244 boyer_moore_ok = 0;
aff2ce94 1245 }
facdc750
RS
1246
1247 /* Store this character into the translated pattern. */
1248 bcopy (str, pat, charlen);
1249 pat += charlen;
1250 base_pat += in_charlen;
1251 len_byte -= in_charlen;
1252 }
1253 }
1254 else
1255 {
040272ce
KH
1256 /* Unibyte buffer. */
1257 charset_base = 0;
facdc750
RS
1258 while (--len >= 0)
1259 {
040272ce 1260 int c, translated;
facdc750
RS
1261
1262 /* If we got here and the RE flag is set, it's because we're
1263 dealing with a regexp known to be trivial, so the backslash
1264 just quotes the next character. */
1265 if (RE && *base_pat == '\\')
1266 {
1267 len--;
1268 base_pat++;
1269 }
1270 c = *base_pat++;
aff2ce94 1271 TRANSLATE (translated, trt, c);
facdc750
RS
1272 *pat++ = translated;
1273 }
1274 }
1275
1276 len_byte = pat - patbuf;
1277 len = raw_pattern_size;
1278 pat = base_pat = patbuf;
1279
040272ce 1280 if (boyer_moore_ok)
facdc750 1281 return boyer_moore (n, pat, len, len_byte, trt, inverse_trt,
aff2ce94
RS
1282 pos, pos_byte, lim, lim_byte,
1283 charset_base);
facdc750
RS
1284 else
1285 return simple_search (n, pat, len, len_byte, trt,
1286 pos, pos_byte, lim, lim_byte);
1287 }
1288}
1289\f
1290/* Do a simple string search N times for the string PAT,
1291 whose length is LEN/LEN_BYTE,
1292 from buffer position POS/POS_BYTE until LIM/LIM_BYTE.
1293 TRT is the translation table.
f8bd51c4 1294
facdc750
RS
1295 Return the character position where the match is found.
1296 Otherwise, if M matches remained to be found, return -M.
f8bd51c4 1297
facdc750
RS
1298 This kind of search works regardless of what is in PAT and
1299 regardless of what is in TRT. It is used in cases where
1300 boyer_moore cannot work. */
1301
1302static int
1303simple_search (n, pat, len, len_byte, trt, pos, pos_byte, lim, lim_byte)
1304 int n;
1305 unsigned char *pat;
1306 int len, len_byte;
1307 Lisp_Object trt;
1308 int pos, pos_byte;
1309 int lim, lim_byte;
1310{
1311 int multibyte = ! NILP (current_buffer->enable_multibyte_characters);
ab228c24 1312 int forward = n > 0;
facdc750
RS
1313
1314 if (lim > pos && multibyte)
1315 while (n > 0)
1316 {
1317 while (1)
f8bd51c4 1318 {
facdc750
RS
1319 /* Try matching at position POS. */
1320 int this_pos = pos;
1321 int this_pos_byte = pos_byte;
1322 int this_len = len;
1323 int this_len_byte = len_byte;
1324 unsigned char *p = pat;
1325 if (pos + len > lim)
1326 goto stop;
1327
1328 while (this_len > 0)
1329 {
1330 int charlen, buf_charlen;
ab228c24 1331 int pat_ch, buf_ch;
facdc750 1332
ab228c24 1333 pat_ch = STRING_CHAR_AND_LENGTH (p, this_len_byte, charlen);
facdc750
RS
1334 buf_ch = STRING_CHAR_AND_LENGTH (BYTE_POS_ADDR (this_pos_byte),
1335 ZV_BYTE - this_pos_byte,
1336 buf_charlen);
aff2ce94 1337 TRANSLATE (buf_ch, trt, buf_ch);
facdc750
RS
1338
1339 if (buf_ch != pat_ch)
1340 break;
ab228c24
RS
1341
1342 this_len_byte -= charlen;
1343 this_len--;
1344 p += charlen;
1345
1346 this_pos_byte += buf_charlen;
1347 this_pos++;
facdc750
RS
1348 }
1349
1350 if (this_len == 0)
1351 {
1352 pos += len;
1353 pos_byte += len_byte;
1354 break;
1355 }
1356
1357 INC_BOTH (pos, pos_byte);
f8bd51c4 1358 }
facdc750
RS
1359
1360 n--;
1361 }
1362 else if (lim > pos)
1363 while (n > 0)
1364 {
1365 while (1)
f8bd51c4 1366 {
facdc750
RS
1367 /* Try matching at position POS. */
1368 int this_pos = pos;
1369 int this_len = len;
1370 unsigned char *p = pat;
1371
1372 if (pos + len > lim)
1373 goto stop;
1374
1375 while (this_len > 0)
1376 {
1377 int pat_ch = *p++;
1378 int buf_ch = FETCH_BYTE (this_pos);
aff2ce94 1379 TRANSLATE (buf_ch, trt, buf_ch);
facdc750
RS
1380
1381 if (buf_ch != pat_ch)
1382 break;
ab228c24
RS
1383
1384 this_len--;
1385 this_pos++;
facdc750
RS
1386 }
1387
1388 if (this_len == 0)
1389 {
1390 pos += len;
1391 break;
1392 }
1393
1394 pos++;
f8bd51c4 1395 }
facdc750
RS
1396
1397 n--;
1398 }
1399 /* Backwards search. */
1400 else if (lim < pos && multibyte)
1401 while (n < 0)
1402 {
1403 while (1)
f8bd51c4 1404 {
facdc750
RS
1405 /* Try matching at position POS. */
1406 int this_pos = pos - len;
1407 int this_pos_byte = pos_byte - len_byte;
1408 int this_len = len;
1409 int this_len_byte = len_byte;
1410 unsigned char *p = pat;
1411
1412 if (pos - len < lim)
1413 goto stop;
1414
1415 while (this_len > 0)
1416 {
1417 int charlen, buf_charlen;
ab228c24 1418 int pat_ch, buf_ch;
facdc750 1419
ab228c24 1420 pat_ch = STRING_CHAR_AND_LENGTH (p, this_len_byte, charlen);
facdc750
RS
1421 buf_ch = STRING_CHAR_AND_LENGTH (BYTE_POS_ADDR (this_pos_byte),
1422 ZV_BYTE - this_pos_byte,
1423 buf_charlen);
aff2ce94 1424 TRANSLATE (buf_ch, trt, buf_ch);
facdc750
RS
1425
1426 if (buf_ch != pat_ch)
1427 break;
ab228c24
RS
1428
1429 this_len_byte -= charlen;
1430 this_len--;
1431 p += charlen;
1432 this_pos_byte += buf_charlen;
1433 this_pos++;
facdc750
RS
1434 }
1435
1436 if (this_len == 0)
1437 {
1438 pos -= len;
1439 pos_byte -= len_byte;
1440 break;
1441 }
1442
1443 DEC_BOTH (pos, pos_byte);
f8bd51c4
KH
1444 }
1445
facdc750
RS
1446 n++;
1447 }
1448 else if (lim < pos)
1449 while (n < 0)
1450 {
1451 while (1)
b6d6a51c 1452 {
facdc750
RS
1453 /* Try matching at position POS. */
1454 int this_pos = pos - len;
1455 int this_len = len;
1456 unsigned char *p = pat;
1457
1458 if (pos - len < lim)
1459 goto stop;
1460
1461 while (this_len > 0)
1462 {
1463 int pat_ch = *p++;
1464 int buf_ch = FETCH_BYTE (this_pos);
aff2ce94 1465 TRANSLATE (buf_ch, trt, buf_ch);
facdc750
RS
1466
1467 if (buf_ch != pat_ch)
1468 break;
ab228c24
RS
1469 this_len--;
1470 this_pos++;
facdc750
RS
1471 }
1472
1473 if (this_len == 0)
b6d6a51c 1474 {
facdc750
RS
1475 pos -= len;
1476 break;
b6d6a51c 1477 }
facdc750
RS
1478
1479 pos--;
b6d6a51c 1480 }
facdc750
RS
1481
1482 n++;
b6d6a51c 1483 }
facdc750
RS
1484
1485 stop:
1486 if (n == 0)
aff2ce94 1487 {
ab228c24
RS
1488 if (forward)
1489 set_search_regs ((multibyte ? pos_byte : pos) - len_byte, len_byte);
1490 else
1491 set_search_regs (multibyte ? pos_byte : pos, len_byte);
aff2ce94
RS
1492
1493 return pos;
1494 }
facdc750
RS
1495 else if (n > 0)
1496 return -n;
1497 else
1498 return n;
1499}
1500\f
1501/* Do Boyer-Moore search N times for the string PAT,
1502 whose length is LEN/LEN_BYTE,
1503 from buffer position POS/POS_BYTE until LIM/LIM_BYTE.
1504 DIRECTION says which direction we search in.
1505 TRT and INVERSE_TRT are translation tables.
1506
1507 This kind of search works if all the characters in PAT that have
1508 nontrivial translation are the same aside from the last byte. This
1509 makes it possible to translate just the last byte of a character,
1510 and do so after just a simple test of the context.
1511
1512 If that criterion is not satisfied, do not call this function. */
1513
1514static int
1515boyer_moore (n, base_pat, len, len_byte, trt, inverse_trt,
aff2ce94 1516 pos, pos_byte, lim, lim_byte, charset_base)
facdc750
RS
1517 int n;
1518 unsigned char *base_pat;
1519 int len, len_byte;
1520 Lisp_Object trt;
1521 Lisp_Object inverse_trt;
1522 int pos, pos_byte;
1523 int lim, lim_byte;
aff2ce94 1524 int charset_base;
facdc750
RS
1525{
1526 int direction = ((n > 0) ? 1 : -1);
1527 register int dirlen;
a968f437 1528 int infinity, limit, stride_for_teases = 0;
facdc750
RS
1529 register int *BM_tab;
1530 int *BM_tab_base;
177c0ea7 1531 register unsigned char *cursor, *p_limit;
facdc750 1532 register int i, j;
cb6792d2 1533 unsigned char *pat, *pat_end;
facdc750
RS
1534 int multibyte = ! NILP (current_buffer->enable_multibyte_characters);
1535
1536 unsigned char simple_translate[0400];
6bbd7a29
GM
1537 int translate_prev_byte = 0;
1538 int translate_anteprev_byte = 0;
facdc750
RS
1539
1540#ifdef C_ALLOCA
1541 int BM_tab_space[0400];
1542 BM_tab = &BM_tab_space[0];
1543#else
1544 BM_tab = (int *) alloca (0400 * sizeof (int));
1545#endif
1546 /* The general approach is that we are going to maintain that we know */
1547 /* the first (closest to the present position, in whatever direction */
1548 /* we're searching) character that could possibly be the last */
1549 /* (furthest from present position) character of a valid match. We */
1550 /* advance the state of our knowledge by looking at that character */
1551 /* and seeing whether it indeed matches the last character of the */
1552 /* pattern. If it does, we take a closer look. If it does not, we */
1553 /* move our pointer (to putative last characters) as far as is */
1554 /* logically possible. This amount of movement, which I call a */
1555 /* stride, will be the length of the pattern if the actual character */
1556 /* appears nowhere in the pattern, otherwise it will be the distance */
1557 /* from the last occurrence of that character to the end of the */
1558 /* pattern. */
1559 /* As a coding trick, an enormous stride is coded into the table for */
1560 /* characters that match the last character. This allows use of only */
1561 /* a single test, a test for having gone past the end of the */
1562 /* permissible match region, to test for both possible matches (when */
1563 /* the stride goes past the end immediately) and failure to */
177c0ea7 1564 /* match (where you get nudged past the end one stride at a time). */
facdc750
RS
1565
1566 /* Here we make a "mickey mouse" BM table. The stride of the search */
1567 /* is determined only by the last character of the putative match. */
1568 /* If that character does not match, we will stride the proper */
1569 /* distance to propose a match that superimposes it on the last */
1570 /* instance of a character that matches it (per trt), or misses */
177c0ea7 1571 /* it entirely if there is none. */
facdc750
RS
1572
1573 dirlen = len_byte * direction;
1574 infinity = dirlen - (lim_byte + pos_byte + len_byte + len_byte) * direction;
cb6792d2
RS
1575
1576 /* Record position after the end of the pattern. */
1577 pat_end = base_pat + len_byte;
1578 /* BASE_PAT points to a character that we start scanning from.
1579 It is the first character in a forward search,
1580 the last character in a backward search. */
facdc750 1581 if (direction < 0)
cb6792d2
RS
1582 base_pat = pat_end - 1;
1583
facdc750
RS
1584 BM_tab_base = BM_tab;
1585 BM_tab += 0400;
1586 j = dirlen; /* to get it in a register */
1587 /* A character that does not appear in the pattern induces a */
1588 /* stride equal to the pattern length. */
1589 while (BM_tab_base != BM_tab)
1590 {
1591 *--BM_tab = j;
1592 *--BM_tab = j;
1593 *--BM_tab = j;
1594 *--BM_tab = j;
1595 }
1596
1597 /* We use this for translation, instead of TRT itself.
1598 We fill this in to handle the characters that actually
1599 occur in the pattern. Others don't matter anyway! */
1600 bzero (simple_translate, sizeof simple_translate);
1601 for (i = 0; i < 0400; i++)
1602 simple_translate[i] = i;
1603
1604 i = 0;
1605 while (i != infinity)
1606 {
cb6792d2 1607 unsigned char *ptr = base_pat + i;
facdc750
RS
1608 i += direction;
1609 if (i == dirlen)
1610 i = infinity;
1611 if (! NILP (trt))
ca1d1d23 1612 {
facdc750 1613 int ch;
aff2ce94 1614 int untranslated;
facdc750
RS
1615 int this_translated = 1;
1616
1617 if (multibyte
cb6792d2
RS
1618 /* Is *PTR the last byte of a character? */
1619 && (pat_end - ptr == 1 || CHAR_HEAD_P (ptr[1])))
ca1d1d23 1620 {
facdc750
RS
1621 unsigned char *charstart = ptr;
1622 while (! CHAR_HEAD_P (*charstart))
1623 charstart--;
aff2ce94 1624 untranslated = STRING_CHAR (charstart, ptr - charstart + 1);
6397418a 1625 if (charset_base == (untranslated & ~CHAR_FIELD3_MASK))
facdc750 1626 {
ab228c24 1627 TRANSLATE (ch, trt, untranslated);
aff2ce94
RS
1628 if (! CHAR_HEAD_P (*ptr))
1629 {
1630 translate_prev_byte = ptr[-1];
1631 if (! CHAR_HEAD_P (translate_prev_byte))
1632 translate_anteprev_byte = ptr[-2];
1633 }
facdc750 1634 }
aff2ce94 1635 else
ab228c24
RS
1636 {
1637 this_translated = 0;
1638 ch = *ptr;
1639 }
ca1d1d23 1640 }
facdc750 1641 else if (!multibyte)
aff2ce94 1642 TRANSLATE (ch, trt, *ptr);
ca1d1d23
JB
1643 else
1644 {
facdc750
RS
1645 ch = *ptr;
1646 this_translated = 0;
ca1d1d23 1647 }
facdc750 1648
ab228c24
RS
1649 if (ch > 0400)
1650 j = ((unsigned char) ch) | 0200;
1651 else
1652 j = (unsigned char) ch;
1653
facdc750
RS
1654 if (i == infinity)
1655 stride_for_teases = BM_tab[j];
ab228c24 1656
facdc750
RS
1657 BM_tab[j] = dirlen - i;
1658 /* A translation table is accompanied by its inverse -- see */
177c0ea7 1659 /* comment following downcase_table for details */
facdc750 1660 if (this_translated)
ab228c24
RS
1661 {
1662 int starting_ch = ch;
1663 int starting_j = j;
1664 while (1)
1665 {
1666 TRANSLATE (ch, inverse_trt, ch);
1667 if (ch > 0400)
1668 j = ((unsigned char) ch) | 0200;
1669 else
1670 j = (unsigned char) ch;
1671
1672 /* For all the characters that map into CH,
1673 set up simple_translate to map the last byte
1674 into STARTING_J. */
1675 simple_translate[j] = starting_j;
1676 if (ch == starting_ch)
1677 break;
1678 BM_tab[j] = dirlen - i;
1679 }
1680 }
facdc750
RS
1681 }
1682 else
1683 {
1684 j = *ptr;
1685
1686 if (i == infinity)
1687 stride_for_teases = BM_tab[j];
1688 BM_tab[j] = dirlen - i;
ca1d1d23 1689 }
facdc750
RS
1690 /* stride_for_teases tells how much to stride if we get a */
1691 /* match on the far character but are subsequently */
1692 /* disappointed, by recording what the stride would have been */
1693 /* for that character if the last character had been */
1694 /* different. */
1695 }
1696 infinity = dirlen - infinity;
1697 pos_byte += dirlen - ((direction > 0) ? direction : 0);
1698 /* loop invariant - POS_BYTE points at where last char (first
1699 char if reverse) of pattern would align in a possible match. */
1700 while (n != 0)
1701 {
1702 int tail_end;
1703 unsigned char *tail_end_ptr;
1704
1705 /* It's been reported that some (broken) compiler thinks that
1706 Boolean expressions in an arithmetic context are unsigned.
1707 Using an explicit ?1:0 prevents this. */
1708 if ((lim_byte - pos_byte - ((direction > 0) ? 1 : 0)) * direction
1709 < 0)
1710 return (n * (0 - direction));
1711 /* First we do the part we can by pointers (maybe nothing) */
1712 QUIT;
1713 pat = base_pat;
1714 limit = pos_byte - dirlen + direction;
67ce527d
KH
1715 if (direction > 0)
1716 {
1717 limit = BUFFER_CEILING_OF (limit);
1718 /* LIMIT is now the last (not beyond-last!) value POS_BYTE
1719 can take on without hitting edge of buffer or the gap. */
1720 limit = min (limit, pos_byte + 20000);
1721 limit = min (limit, lim_byte - 1);
1722 }
1723 else
1724 {
1725 limit = BUFFER_FLOOR_OF (limit);
1726 /* LIMIT is now the last (not beyond-last!) value POS_BYTE
1727 can take on without hitting edge of buffer or the gap. */
1728 limit = max (limit, pos_byte - 20000);
1729 limit = max (limit, lim_byte);
1730 }
facdc750
RS
1731 tail_end = BUFFER_CEILING_OF (pos_byte) + 1;
1732 tail_end_ptr = BYTE_POS_ADDR (tail_end);
1733
1734 if ((limit - pos_byte) * direction > 20)
ca1d1d23 1735 {
facdc750
RS
1736 unsigned char *p2;
1737
1738 p_limit = BYTE_POS_ADDR (limit);
1739 p2 = (cursor = BYTE_POS_ADDR (pos_byte));
1740 /* In this loop, pos + cursor - p2 is the surrogate for pos */
1741 while (1) /* use one cursor setting as long as i can */
ca1d1d23 1742 {
facdc750 1743 if (direction > 0) /* worth duplicating */
ca1d1d23 1744 {
facdc750
RS
1745 /* Use signed comparison if appropriate
1746 to make cursor+infinity sure to be > p_limit.
1747 Assuming that the buffer lies in a range of addresses
1748 that are all "positive" (as ints) or all "negative",
1749 either kind of comparison will work as long
1750 as we don't step by infinity. So pick the kind
1751 that works when we do step by infinity. */
1752 if ((EMACS_INT) (p_limit + infinity) > (EMACS_INT) p_limit)
1753 while ((EMACS_INT) cursor <= (EMACS_INT) p_limit)
1754 cursor += BM_tab[*cursor];
ca1d1d23 1755 else
facdc750
RS
1756 while ((EMACS_UINT) cursor <= (EMACS_UINT) p_limit)
1757 cursor += BM_tab[*cursor];
1758 }
1759 else
1760 {
1761 if ((EMACS_INT) (p_limit + infinity) < (EMACS_INT) p_limit)
1762 while ((EMACS_INT) cursor >= (EMACS_INT) p_limit)
1763 cursor += BM_tab[*cursor];
1764 else
1765 while ((EMACS_UINT) cursor >= (EMACS_UINT) p_limit)
1766 cursor += BM_tab[*cursor];
1767 }
ca1d1d23 1768/* If you are here, cursor is beyond the end of the searched region. */
facdc750
RS
1769/* This can happen if you match on the far character of the pattern, */
1770/* because the "stride" of that character is infinity, a number able */
1771/* to throw you well beyond the end of the search. It can also */
1772/* happen if you fail to match within the permitted region and would */
1773/* otherwise try a character beyond that region */
1774 if ((cursor - p_limit) * direction <= len_byte)
1775 break; /* a small overrun is genuine */
1776 cursor -= infinity; /* large overrun = hit */
1777 i = dirlen - direction;
1778 if (! NILP (trt))
1779 {
1780 while ((i -= direction) + direction != 0)
ca1d1d23 1781 {
facdc750
RS
1782 int ch;
1783 cursor -= direction;
1784 /* Translate only the last byte of a character. */
1785 if (! multibyte
1786 || ((cursor == tail_end_ptr
1787 || CHAR_HEAD_P (cursor[1]))
1788 && (CHAR_HEAD_P (cursor[0])
1789 || (translate_prev_byte == cursor[-1]
1790 && (CHAR_HEAD_P (translate_prev_byte)
1791 || translate_anteprev_byte == cursor[-2])))))
1792 ch = simple_translate[*cursor];
1793 else
1794 ch = *cursor;
1795 if (pat[i] != ch)
1796 break;
ca1d1d23 1797 }
facdc750
RS
1798 }
1799 else
1800 {
1801 while ((i -= direction) + direction != 0)
ca1d1d23 1802 {
facdc750
RS
1803 cursor -= direction;
1804 if (pat[i] != *cursor)
1805 break;
ca1d1d23 1806 }
facdc750
RS
1807 }
1808 cursor += dirlen - i - direction; /* fix cursor */
1809 if (i + direction == 0)
1810 {
1811 int position;
0c8533c6 1812
facdc750 1813 cursor -= direction;
1113d9db 1814
facdc750
RS
1815 position = pos_byte + cursor - p2 + ((direction > 0)
1816 ? 1 - len_byte : 0);
1817 set_search_regs (position, len_byte);
ca325161 1818
facdc750
RS
1819 if ((n -= direction) != 0)
1820 cursor += dirlen; /* to resume search */
ca1d1d23 1821 else
facdc750
RS
1822 return ((direction > 0)
1823 ? search_regs.end[0] : search_regs.start[0]);
ca1d1d23 1824 }
facdc750
RS
1825 else
1826 cursor += stride_for_teases; /* <sigh> we lose - */
ca1d1d23 1827 }
facdc750
RS
1828 pos_byte += cursor - p2;
1829 }
1830 else
1831 /* Now we'll pick up a clump that has to be done the hard */
1832 /* way because it covers a discontinuity */
1833 {
1834 limit = ((direction > 0)
1835 ? BUFFER_CEILING_OF (pos_byte - dirlen + 1)
1836 : BUFFER_FLOOR_OF (pos_byte - dirlen - 1));
1837 limit = ((direction > 0)
1838 ? min (limit + len_byte, lim_byte - 1)
1839 : max (limit - len_byte, lim_byte));
1840 /* LIMIT is now the last value POS_BYTE can have
1841 and still be valid for a possible match. */
1842 while (1)
ca1d1d23 1843 {
facdc750
RS
1844 /* This loop can be coded for space rather than */
1845 /* speed because it will usually run only once. */
1846 /* (the reach is at most len + 21, and typically */
177c0ea7 1847 /* does not exceed len) */
facdc750
RS
1848 while ((limit - pos_byte) * direction >= 0)
1849 pos_byte += BM_tab[FETCH_BYTE (pos_byte)];
1850 /* now run the same tests to distinguish going off the */
1851 /* end, a match or a phony match. */
1852 if ((pos_byte - limit) * direction <= len_byte)
1853 break; /* ran off the end */
1854 /* Found what might be a match.
1855 Set POS_BYTE back to last (first if reverse) pos. */
1856 pos_byte -= infinity;
1857 i = dirlen - direction;
1858 while ((i -= direction) + direction != 0)
ca1d1d23 1859 {
facdc750
RS
1860 int ch;
1861 unsigned char *ptr;
1862 pos_byte -= direction;
1863 ptr = BYTE_POS_ADDR (pos_byte);
1864 /* Translate only the last byte of a character. */
1865 if (! multibyte
1866 || ((ptr == tail_end_ptr
1867 || CHAR_HEAD_P (ptr[1]))
1868 && (CHAR_HEAD_P (ptr[0])
1869 || (translate_prev_byte == ptr[-1]
1870 && (CHAR_HEAD_P (translate_prev_byte)
1871 || translate_anteprev_byte == ptr[-2])))))
1872 ch = simple_translate[*ptr];
1873 else
1874 ch = *ptr;
1875 if (pat[i] != ch)
1876 break;
1877 }
1878 /* Above loop has moved POS_BYTE part or all the way
1879 back to the first pos (last pos if reverse).
1880 Set it once again at the last (first if reverse) char. */
1881 pos_byte += dirlen - i- direction;
1882 if (i + direction == 0)
1883 {
1884 int position;
1885 pos_byte -= direction;
1113d9db 1886
facdc750 1887 position = pos_byte + ((direction > 0) ? 1 - len_byte : 0);
0c8533c6 1888
facdc750 1889 set_search_regs (position, len_byte);
ca325161 1890
facdc750
RS
1891 if ((n -= direction) != 0)
1892 pos_byte += dirlen; /* to resume search */
ca1d1d23 1893 else
facdc750
RS
1894 return ((direction > 0)
1895 ? search_regs.end[0] : search_regs.start[0]);
ca1d1d23 1896 }
facdc750
RS
1897 else
1898 pos_byte += stride_for_teases;
1899 }
1900 }
1901 /* We have done one clump. Can we continue? */
1902 if ((lim_byte - pos_byte) * direction < 0)
1903 return ((0 - n) * direction);
ca1d1d23 1904 }
facdc750 1905 return BYTE_TO_CHAR (pos_byte);
ca1d1d23 1906}
ca325161 1907
fa8ed3e0 1908/* Record beginning BEG_BYTE and end BEG_BYTE + NBYTES
a7e4cdde
RS
1909 for the overall match just found in the current buffer.
1910 Also clear out the match data for registers 1 and up. */
ca325161
RS
1911
1912static void
fa8ed3e0
RS
1913set_search_regs (beg_byte, nbytes)
1914 int beg_byte, nbytes;
ca325161 1915{
a7e4cdde
RS
1916 int i;
1917
ca325161
RS
1918 /* Make sure we have registers in which to store
1919 the match position. */
1920 if (search_regs.num_regs == 0)
1921 {
2d4a771a
RS
1922 search_regs.start = (regoff_t *) xmalloc (2 * sizeof (regoff_t));
1923 search_regs.end = (regoff_t *) xmalloc (2 * sizeof (regoff_t));
487282dc 1924 search_regs.num_regs = 2;
ca325161
RS
1925 }
1926
a7e4cdde
RS
1927 /* Clear out the other registers. */
1928 for (i = 1; i < search_regs.num_regs; i++)
1929 {
1930 search_regs.start[i] = -1;
1931 search_regs.end[i] = -1;
1932 }
1933
fa8ed3e0
RS
1934 search_regs.start[0] = BYTE_TO_CHAR (beg_byte);
1935 search_regs.end[0] = BYTE_TO_CHAR (beg_byte + nbytes);
a3668d92 1936 XSETBUFFER (last_thing_searched, current_buffer);
ca325161 1937}
ca1d1d23
JB
1938\f
1939/* Given a string of words separated by word delimiters,
1940 compute a regexp that matches those exact words
1941 separated by arbitrary punctuation. */
1942
1943static Lisp_Object
1944wordify (string)
1945 Lisp_Object string;
1946{
1947 register unsigned char *p, *o;
0c8533c6 1948 register int i, i_byte, len, punct_count = 0, word_count = 0;
ca1d1d23 1949 Lisp_Object val;
0c8533c6
RS
1950 int prev_c = 0;
1951 int adjust;
ca1d1d23 1952
b7826503 1953 CHECK_STRING (string);
d5db4077
KR
1954 p = SDATA (string);
1955 len = SCHARS (string);
ca1d1d23 1956
0c8533c6
RS
1957 for (i = 0, i_byte = 0; i < len; )
1958 {
1959 int c;
177c0ea7 1960
eb99a8dd 1961 FETCH_STRING_CHAR_ADVANCE (c, string, i, i_byte);
0c8533c6
RS
1962
1963 if (SYNTAX (c) != Sword)
1964 {
1965 punct_count++;
1966 if (i > 0 && SYNTAX (prev_c) == Sword)
1967 word_count++;
1968 }
ca1d1d23 1969
0c8533c6
RS
1970 prev_c = c;
1971 }
1972
1973 if (SYNTAX (prev_c) == Sword)
1974 word_count++;
1975 if (!word_count)
b07b65aa 1976 return empty_string;
0c8533c6
RS
1977
1978 adjust = - punct_count + 5 * (word_count - 1) + 4;
8a2df937
RS
1979 if (STRING_MULTIBYTE (string))
1980 val = make_uninit_multibyte_string (len + adjust,
d5db4077 1981 SBYTES (string)
8a2df937
RS
1982 + adjust);
1983 else
1984 val = make_uninit_string (len + adjust);
ca1d1d23 1985
d5db4077 1986 o = SDATA (val);
ca1d1d23
JB
1987 *o++ = '\\';
1988 *o++ = 'b';
1e9582d4 1989 prev_c = 0;
ca1d1d23 1990
1e9582d4
RS
1991 for (i = 0, i_byte = 0; i < len; )
1992 {
1993 int c;
1994 int i_byte_orig = i_byte;
177c0ea7 1995
eb99a8dd 1996 FETCH_STRING_CHAR_ADVANCE (c, string, i, i_byte);
1e9582d4
RS
1997
1998 if (SYNTAX (c) == Sword)
1999 {
5d69fe10 2000 bcopy (SDATA (string) + i_byte_orig, o,
1e9582d4
RS
2001 i_byte - i_byte_orig);
2002 o += i_byte - i_byte_orig;
2003 }
2004 else if (i > 0 && SYNTAX (prev_c) == Sword && --word_count)
2005 {
2006 *o++ = '\\';
2007 *o++ = 'W';
2008 *o++ = '\\';
2009 *o++ = 'W';
2010 *o++ = '*';
2011 }
2012
2013 prev_c = c;
2014 }
ca1d1d23
JB
2015
2016 *o++ = '\\';
2017 *o++ = 'b';
2018
2019 return val;
2020}
2021\f
2022DEFUN ("search-backward", Fsearch_backward, Ssearch_backward, 1, 4,
8c1a1077
PJ
2023 "MSearch backward: ",
2024 doc: /* Search backward from point for STRING.
2025Set point to the beginning of the occurrence found, and return point.
2026An optional second argument bounds the search; it is a buffer position.
2027The match found must not extend before that position.
2028Optional third argument, if t, means if fail just return nil (no error).
2029 If not nil and not t, position at limit of search and return nil.
2030Optional fourth argument is repeat count--search for successive occurrences.
2031
2032Search case-sensitivity is determined by the value of the variable
2033`case-fold-search', which see.
2034
2035See also the functions `match-beginning', `match-end' and `replace-match'. */)
2036 (string, bound, noerror, count)
ca1d1d23
JB
2037 Lisp_Object string, bound, noerror, count;
2038{
b819a390 2039 return search_command (string, bound, noerror, count, -1, 0, 0);
ca1d1d23
JB
2040}
2041
6af43974 2042DEFUN ("search-forward", Fsearch_forward, Ssearch_forward, 1, 4, "MSearch: ",
8c1a1077
PJ
2043 doc: /* Search forward from point for STRING.
2044Set point to the end of the occurrence found, and return point.
2045An optional second argument bounds the search; it is a buffer position.
2046The match found must not extend after that position. nil is equivalent
2047 to (point-max).
2048Optional third argument, if t, means if fail just return nil (no error).
2049 If not nil and not t, move to limit of search and return nil.
2050Optional fourth argument is repeat count--search for successive occurrences.
2051
2052Search case-sensitivity is determined by the value of the variable
2053`case-fold-search', which see.
2054
2055See also the functions `match-beginning', `match-end' and `replace-match'. */)
2056 (string, bound, noerror, count)
ca1d1d23
JB
2057 Lisp_Object string, bound, noerror, count;
2058{
b819a390 2059 return search_command (string, bound, noerror, count, 1, 0, 0);
ca1d1d23
JB
2060}
2061
2062DEFUN ("word-search-backward", Fword_search_backward, Sword_search_backward, 1, 4,
8c1a1077
PJ
2063 "sWord search backward: ",
2064 doc: /* Search backward from point for STRING, ignoring differences in punctuation.
2065Set point to the beginning of the occurrence found, and return point.
2066An optional second argument bounds the search; it is a buffer position.
2067The match found must not extend before that position.
2068Optional third argument, if t, means if fail just return nil (no error).
2069 If not nil and not t, move to limit of search and return nil.
2070Optional fourth argument is repeat count--search for successive occurrences. */)
2071 (string, bound, noerror, count)
ca1d1d23
JB
2072 Lisp_Object string, bound, noerror, count;
2073{
b819a390 2074 return search_command (wordify (string), bound, noerror, count, -1, 1, 0);
ca1d1d23
JB
2075}
2076
2077DEFUN ("word-search-forward", Fword_search_forward, Sword_search_forward, 1, 4,
8c1a1077
PJ
2078 "sWord search: ",
2079 doc: /* Search forward from point for STRING, ignoring differences in punctuation.
2080Set point to the end of the occurrence found, and return point.
2081An optional second argument bounds the search; it is a buffer position.
2082The match found must not extend after that position.
2083Optional third argument, if t, means if fail just return nil (no error).
2084 If not nil and not t, move to limit of search and return nil.
2085Optional fourth argument is repeat count--search for successive occurrences. */)
2086 (string, bound, noerror, count)
ca1d1d23
JB
2087 Lisp_Object string, bound, noerror, count;
2088{
b819a390 2089 return search_command (wordify (string), bound, noerror, count, 1, 1, 0);
ca1d1d23
JB
2090}
2091
2092DEFUN ("re-search-backward", Fre_search_backward, Sre_search_backward, 1, 4,
8c1a1077
PJ
2093 "sRE search backward: ",
2094 doc: /* Search backward from point for match for regular expression REGEXP.
2095Set point to the beginning of the match, and return point.
2096The match found is the one starting last in the buffer
2097and yet ending before the origin of the search.
2098An optional second argument bounds the search; it is a buffer position.
2099The match found must start at or after that position.
2100Optional third argument, if t, means if fail just return nil (no error).
2101 If not nil and not t, move to limit of search and return nil.
2102Optional fourth argument is repeat count--search for successive occurrences.
2103See also the functions `match-beginning', `match-end', `match-string',
2104and `replace-match'. */)
2105 (regexp, bound, noerror, count)
19c0a730 2106 Lisp_Object regexp, bound, noerror, count;
ca1d1d23 2107{
b819a390 2108 return search_command (regexp, bound, noerror, count, -1, 1, 0);
ca1d1d23
JB
2109}
2110
2111DEFUN ("re-search-forward", Fre_search_forward, Sre_search_forward, 1, 4,
8c1a1077
PJ
2112 "sRE search: ",
2113 doc: /* Search forward from point for regular expression REGEXP.
2114Set point to the end of the occurrence found, and return point.
2115An optional second argument bounds the search; it is a buffer position.
2116The match found must not extend after that position.
2117Optional third argument, if t, means if fail just return nil (no error).
2118 If not nil and not t, move to limit of search and return nil.
2119Optional fourth argument is repeat count--search for successive occurrences.
2120See also the functions `match-beginning', `match-end', `match-string',
2121and `replace-match'. */)
2122 (regexp, bound, noerror, count)
19c0a730 2123 Lisp_Object regexp, bound, noerror, count;
ca1d1d23 2124{
b819a390
RS
2125 return search_command (regexp, bound, noerror, count, 1, 1, 0);
2126}
2127
2128DEFUN ("posix-search-backward", Fposix_search_backward, Sposix_search_backward, 1, 4,
8c1a1077
PJ
2129 "sPosix search backward: ",
2130 doc: /* Search backward from point for match for regular expression REGEXP.
2131Find the longest match in accord with Posix regular expression rules.
2132Set point to the beginning of the match, and return point.
2133The match found is the one starting last in the buffer
2134and yet ending before the origin of the search.
2135An optional second argument bounds the search; it is a buffer position.
2136The match found must start at or after that position.
2137Optional third argument, if t, means if fail just return nil (no error).
2138 If not nil and not t, move to limit of search and return nil.
2139Optional fourth argument is repeat count--search for successive occurrences.
2140See also the functions `match-beginning', `match-end', `match-string',
2141and `replace-match'. */)
2142 (regexp, bound, noerror, count)
b819a390
RS
2143 Lisp_Object regexp, bound, noerror, count;
2144{
2145 return search_command (regexp, bound, noerror, count, -1, 1, 1);
2146}
2147
2148DEFUN ("posix-search-forward", Fposix_search_forward, Sposix_search_forward, 1, 4,
8c1a1077
PJ
2149 "sPosix search: ",
2150 doc: /* Search forward from point for regular expression REGEXP.
2151Find the longest match in accord with Posix regular expression rules.
2152Set point to the end of the occurrence found, and return point.
2153An optional second argument bounds the search; it is a buffer position.
2154The match found must not extend after that position.
2155Optional third argument, if t, means if fail just return nil (no error).
2156 If not nil and not t, move to limit of search and return nil.
2157Optional fourth argument is repeat count--search for successive occurrences.
2158See also the functions `match-beginning', `match-end', `match-string',
2159and `replace-match'. */)
2160 (regexp, bound, noerror, count)
b819a390
RS
2161 Lisp_Object regexp, bound, noerror, count;
2162{
2163 return search_command (regexp, bound, noerror, count, 1, 1, 1);
ca1d1d23
JB
2164}
2165\f
d7a5ad5f 2166DEFUN ("replace-match", Freplace_match, Sreplace_match, 1, 5, 0,
8c1a1077 2167 doc: /* Replace text matched by last search with NEWTEXT.
4dd0c271
RS
2168Leave point at the end of the replacement text.
2169
8c1a1077
PJ
2170If second arg FIXEDCASE is non-nil, do not alter case of replacement text.
2171Otherwise maybe capitalize the whole text, or maybe just word initials,
2172based on the replaced text.
2173If the replaced text has only capital letters
2174and has at least one multiletter word, convert NEWTEXT to all caps.
4dd0c271
RS
2175Otherwise if all words are capitalized in the replaced text,
2176capitalize each word in NEWTEXT.
8c1a1077
PJ
2177
2178If third arg LITERAL is non-nil, insert NEWTEXT literally.
2179Otherwise treat `\\' as special:
2180 `\\&' in NEWTEXT means substitute original matched text.
2181 `\\N' means substitute what matched the Nth `\\(...\\)'.
2182 If Nth parens didn't match, substitute nothing.
2183 `\\\\' means insert one `\\'.
4dd0c271
RS
2184Case conversion does not apply to these substitutions.
2185
8c1a1077 2186FIXEDCASE and LITERAL are optional arguments.
8c1a1077
PJ
2187
2188The optional fourth argument STRING can be a string to modify.
2189This is meaningful when the previous match was done against STRING,
2190using `string-match'. When used this way, `replace-match'
2191creates and returns a new string made by copying STRING and replacing
2192the part of STRING that was matched.
2193
2194The optional fifth argument SUBEXP specifies a subexpression;
2195it says to replace just that subexpression with NEWTEXT,
2196rather than replacing the entire matched text.
2197This is, in a vague sense, the inverse of using `\\N' in NEWTEXT;
2198`\\N' copies subexp N into NEWTEXT, but using N as SUBEXP puts
2199NEWTEXT in place of subexp N.
2200This is useful only after a regular expression search or match,
2201since only regular expressions have distinguished subexpressions. */)
2202 (newtext, fixedcase, literal, string, subexp)
d7a5ad5f 2203 Lisp_Object newtext, fixedcase, literal, string, subexp;
ca1d1d23
JB
2204{
2205 enum { nochange, all_caps, cap_initial } case_action;
ac3b28b1 2206 register int pos, pos_byte;
ca1d1d23 2207 int some_multiletter_word;
97832bd0 2208 int some_lowercase;
73dc8771 2209 int some_uppercase;
208767c3 2210 int some_nonuppercase_initial;
ca1d1d23 2211 register int c, prevc;
d7a5ad5f 2212 int sub;
3e18eecf 2213 int opoint, newpoint;
ca1d1d23 2214
b7826503 2215 CHECK_STRING (newtext);
ca1d1d23 2216
080c45fd 2217 if (! NILP (string))
b7826503 2218 CHECK_STRING (string);
080c45fd 2219
ca1d1d23
JB
2220 case_action = nochange; /* We tried an initialization */
2221 /* but some C compilers blew it */
4746118a
JB
2222
2223 if (search_regs.num_regs <= 0)
2224 error ("replace-match called before any match found");
2225
d7a5ad5f
RS
2226 if (NILP (subexp))
2227 sub = 0;
2228 else
2229 {
b7826503 2230 CHECK_NUMBER (subexp);
d7a5ad5f
RS
2231 sub = XINT (subexp);
2232 if (sub < 0 || sub >= search_regs.num_regs)
2233 args_out_of_range (subexp, make_number (search_regs.num_regs));
2234 }
2235
080c45fd
RS
2236 if (NILP (string))
2237 {
d7a5ad5f
RS
2238 if (search_regs.start[sub] < BEGV
2239 || search_regs.start[sub] > search_regs.end[sub]
2240 || search_regs.end[sub] > ZV)
2241 args_out_of_range (make_number (search_regs.start[sub]),
2242 make_number (search_regs.end[sub]));
080c45fd
RS
2243 }
2244 else
2245 {
d7a5ad5f
RS
2246 if (search_regs.start[sub] < 0
2247 || search_regs.start[sub] > search_regs.end[sub]
d5db4077 2248 || search_regs.end[sub] > SCHARS (string))
d7a5ad5f
RS
2249 args_out_of_range (make_number (search_regs.start[sub]),
2250 make_number (search_regs.end[sub]));
080c45fd 2251 }
ca1d1d23
JB
2252
2253 if (NILP (fixedcase))
2254 {
2255 /* Decide how to casify by examining the matched text. */
ac3b28b1 2256 int last;
ca1d1d23 2257
ac3b28b1
KH
2258 pos = search_regs.start[sub];
2259 last = search_regs.end[sub];
fa8ed3e0
RS
2260
2261 if (NILP (string))
ac3b28b1 2262 pos_byte = CHAR_TO_BYTE (pos);
fa8ed3e0 2263 else
ac3b28b1 2264 pos_byte = string_char_to_byte (string, pos);
fa8ed3e0 2265
ca1d1d23
JB
2266 prevc = '\n';
2267 case_action = all_caps;
2268
2269 /* some_multiletter_word is set nonzero if any original word
2270 is more than one letter long. */
2271 some_multiletter_word = 0;
97832bd0 2272 some_lowercase = 0;
208767c3 2273 some_nonuppercase_initial = 0;
73dc8771 2274 some_uppercase = 0;
ca1d1d23 2275
ac3b28b1 2276 while (pos < last)
ca1d1d23 2277 {
080c45fd 2278 if (NILP (string))
ac3b28b1
KH
2279 {
2280 c = FETCH_CHAR (pos_byte);
2281 INC_BOTH (pos, pos_byte);
2282 }
080c45fd 2283 else
ac3b28b1 2284 FETCH_STRING_CHAR_ADVANCE (c, string, pos, pos_byte);
080c45fd 2285
ca1d1d23
JB
2286 if (LOWERCASEP (c))
2287 {
2288 /* Cannot be all caps if any original char is lower case */
2289
97832bd0 2290 some_lowercase = 1;
ca1d1d23 2291 if (SYNTAX (prevc) != Sword)
208767c3 2292 some_nonuppercase_initial = 1;
ca1d1d23
JB
2293 else
2294 some_multiletter_word = 1;
2295 }
2296 else if (!NOCASEP (c))
2297 {
73dc8771 2298 some_uppercase = 1;
97832bd0 2299 if (SYNTAX (prevc) != Sword)
c4d460ce 2300 ;
97832bd0 2301 else
ca1d1d23
JB
2302 some_multiletter_word = 1;
2303 }
208767c3
RS
2304 else
2305 {
2306 /* If the initial is a caseless word constituent,
2307 treat that like a lowercase initial. */
2308 if (SYNTAX (prevc) != Sword)
2309 some_nonuppercase_initial = 1;
2310 }
ca1d1d23
JB
2311
2312 prevc = c;
2313 }
2314
97832bd0
RS
2315 /* Convert to all caps if the old text is all caps
2316 and has at least one multiletter word. */
2317 if (! some_lowercase && some_multiletter_word)
2318 case_action = all_caps;
c4d460ce 2319 /* Capitalize each word, if the old text has all capitalized words. */
208767c3 2320 else if (!some_nonuppercase_initial && some_multiletter_word)
ca1d1d23 2321 case_action = cap_initial;
208767c3 2322 else if (!some_nonuppercase_initial && some_uppercase)
73dc8771
KH
2323 /* Should x -> yz, operating on X, give Yz or YZ?
2324 We'll assume the latter. */
2325 case_action = all_caps;
97832bd0
RS
2326 else
2327 case_action = nochange;
ca1d1d23
JB
2328 }
2329
080c45fd
RS
2330 /* Do replacement in a string. */
2331 if (!NILP (string))
2332 {
2333 Lisp_Object before, after;
2334
2335 before = Fsubstring (string, make_number (0),
d7a5ad5f
RS
2336 make_number (search_regs.start[sub]));
2337 after = Fsubstring (string, make_number (search_regs.end[sub]), Qnil);
080c45fd 2338
636a5e28
RS
2339 /* Substitute parts of the match into NEWTEXT
2340 if desired. */
080c45fd
RS
2341 if (NILP (literal))
2342 {
d131e79c
RS
2343 int lastpos = 0;
2344 int lastpos_byte = 0;
080c45fd
RS
2345 /* We build up the substituted string in ACCUM. */
2346 Lisp_Object accum;
2347 Lisp_Object middle;
d5db4077 2348 int length = SBYTES (newtext);
080c45fd
RS
2349
2350 accum = Qnil;
2351
ac3b28b1 2352 for (pos_byte = 0, pos = 0; pos_byte < length;)
080c45fd
RS
2353 {
2354 int substart = -1;
6bbd7a29 2355 int subend = 0;
1e79ec24 2356 int delbackslash = 0;
080c45fd 2357
0c8533c6
RS
2358 FETCH_STRING_CHAR_ADVANCE (c, newtext, pos, pos_byte);
2359
080c45fd
RS
2360 if (c == '\\')
2361 {
0c8533c6 2362 FETCH_STRING_CHAR_ADVANCE (c, newtext, pos, pos_byte);
177c0ea7 2363
080c45fd
RS
2364 if (c == '&')
2365 {
d7a5ad5f
RS
2366 substart = search_regs.start[sub];
2367 subend = search_regs.end[sub];
080c45fd 2368 }
5fbbc83b 2369 else if (c >= '1' && c <= '9')
080c45fd 2370 {
5fbbc83b
RS
2371 if (search_regs.start[c - '0'] >= 0
2372 && c <= search_regs.num_regs + '0')
080c45fd
RS
2373 {
2374 substart = search_regs.start[c - '0'];
2375 subend = search_regs.end[c - '0'];
2376 }
5fbbc83b
RS
2377 else
2378 {
2379 /* If that subexp did not match,
2380 replace \\N with nothing. */
2381 substart = 0;
2382 subend = 0;
2383 }
080c45fd 2384 }
1e79ec24
KH
2385 else if (c == '\\')
2386 delbackslash = 1;
636a5e28
RS
2387 else
2388 error ("Invalid use of `\\' in replacement text");
080c45fd
RS
2389 }
2390 if (substart >= 0)
2391 {
d131e79c
RS
2392 if (pos - 2 != lastpos)
2393 middle = substring_both (newtext, lastpos,
2394 lastpos_byte,
2395 pos - 2, pos_byte - 2);
080c45fd
RS
2396 else
2397 middle = Qnil;
2398 accum = concat3 (accum, middle,
0c8533c6
RS
2399 Fsubstring (string,
2400 make_number (substart),
080c45fd
RS
2401 make_number (subend)));
2402 lastpos = pos;
0c8533c6 2403 lastpos_byte = pos_byte;
080c45fd 2404 }
1e79ec24
KH
2405 else if (delbackslash)
2406 {
d131e79c
RS
2407 middle = substring_both (newtext, lastpos,
2408 lastpos_byte,
2409 pos - 1, pos_byte - 1);
0c8533c6 2410
1e79ec24
KH
2411 accum = concat2 (accum, middle);
2412 lastpos = pos;
0c8533c6 2413 lastpos_byte = pos_byte;
1e79ec24 2414 }
080c45fd
RS
2415 }
2416
d131e79c
RS
2417 if (pos != lastpos)
2418 middle = substring_both (newtext, lastpos,
2419 lastpos_byte,
0c8533c6 2420 pos, pos_byte);
080c45fd
RS
2421 else
2422 middle = Qnil;
2423
2424 newtext = concat2 (accum, middle);
2425 }
2426
636a5e28 2427 /* Do case substitution in NEWTEXT if desired. */
080c45fd
RS
2428 if (case_action == all_caps)
2429 newtext = Fupcase (newtext);
2430 else if (case_action == cap_initial)
2b2eead9 2431 newtext = Fupcase_initials (newtext);
080c45fd
RS
2432
2433 return concat3 (before, newtext, after);
2434 }
2435
09c4719e 2436 /* Record point, then move (quietly) to the start of the match. */
9160906f 2437 if (PT >= search_regs.end[sub])
b0eba991 2438 opoint = PT - ZV;
9160906f
RS
2439 else if (PT > search_regs.start[sub])
2440 opoint = search_regs.end[sub] - ZV;
b0eba991
RS
2441 else
2442 opoint = PT;
2443
886ed6ec
RS
2444 /* If we want non-literal replacement,
2445 perform substitution on the replacement string. */
2446 if (NILP (literal))
ca1d1d23 2447 {
d5db4077 2448 int length = SBYTES (newtext);
68e69fbd
RS
2449 unsigned char *substed;
2450 int substed_alloc_size, substed_len;
3bc25e52
KH
2451 int buf_multibyte = !NILP (current_buffer->enable_multibyte_characters);
2452 int str_multibyte = STRING_MULTIBYTE (newtext);
2453 Lisp_Object rev_tbl;
886ed6ec 2454 int really_changed = 0;
3bc25e52
KH
2455
2456 rev_tbl= (!buf_multibyte && CHAR_TABLE_P (Vnonascii_translation_table)
2457 ? Fchar_table_extra_slot (Vnonascii_translation_table,
2458 make_number (0))
2459 : Qnil);
ac3b28b1 2460
68e69fbd
RS
2461 substed_alloc_size = length * 2 + 100;
2462 substed = (unsigned char *) xmalloc (substed_alloc_size + 1);
2463 substed_len = 0;
2464
3bc25e52
KH
2465 /* Go thru NEWTEXT, producing the actual text to insert in
2466 SUBSTED while adjusting multibyteness to that of the current
2467 buffer. */
ca1d1d23 2468
ac3b28b1 2469 for (pos_byte = 0, pos = 0; pos_byte < length;)
ca1d1d23 2470 {
68e69fbd 2471 unsigned char str[MAX_MULTIBYTE_LENGTH];
f8ce8a0d
GM
2472 unsigned char *add_stuff = NULL;
2473 int add_len = 0;
68e69fbd 2474 int idx = -1;
9a76659d 2475
3bc25e52
KH
2476 if (str_multibyte)
2477 {
eb99a8dd 2478 FETCH_STRING_CHAR_ADVANCE_NO_CHECK (c, newtext, pos, pos_byte);
3bc25e52
KH
2479 if (!buf_multibyte)
2480 c = multibyte_char_to_unibyte (c, rev_tbl);
2481 }
2482 else
2483 {
2484 /* Note that we don't have to increment POS. */
5d69fe10 2485 c = SREF (newtext, pos_byte++);
3bc25e52
KH
2486 if (buf_multibyte)
2487 c = unibyte_char_to_multibyte (c);
2488 }
ac3b28b1 2489
68e69fbd
RS
2490 /* Either set ADD_STUFF and ADD_LEN to the text to put in SUBSTED,
2491 or set IDX to a match index, which means put that part
2492 of the buffer text into SUBSTED. */
2493
ca1d1d23
JB
2494 if (c == '\\')
2495 {
886ed6ec
RS
2496 really_changed = 1;
2497
3bc25e52
KH
2498 if (str_multibyte)
2499 {
eb99a8dd
KH
2500 FETCH_STRING_CHAR_ADVANCE_NO_CHECK (c, newtext,
2501 pos, pos_byte);
3bc25e52
KH
2502 if (!buf_multibyte && !SINGLE_BYTE_CHAR_P (c))
2503 c = multibyte_char_to_unibyte (c, rev_tbl);
2504 }
2505 else
2506 {
d5db4077 2507 c = SREF (newtext, pos_byte++);
3bc25e52
KH
2508 if (buf_multibyte)
2509 c = unibyte_char_to_multibyte (c);
2510 }
2511
ca1d1d23 2512 if (c == '&')
68e69fbd 2513 idx = sub;
78445046 2514 else if (c >= '1' && c <= '9' && c <= search_regs.num_regs + '0')
ca1d1d23
JB
2515 {
2516 if (search_regs.start[c - '0'] >= 1)
68e69fbd 2517 idx = c - '0';
ca1d1d23 2518 }
636a5e28 2519 else if (c == '\\')
68e69fbd 2520 add_len = 1, add_stuff = "\\";
636a5e28 2521 else
3bc25e52
KH
2522 {
2523 xfree (substed);
2524 error ("Invalid use of `\\' in replacement text");
2525 }
ca1d1d23
JB
2526 }
2527 else
68e69fbd
RS
2528 {
2529 add_len = CHAR_STRING (c, str);
2530 add_stuff = str;
2531 }
2532
2533 /* If we want to copy part of a previous match,
2534 set up ADD_STUFF and ADD_LEN to point to it. */
2535 if (idx >= 0)
2536 {
2537 int begbyte = CHAR_TO_BYTE (search_regs.start[idx]);
2538 add_len = CHAR_TO_BYTE (search_regs.end[idx]) - begbyte;
2539 if (search_regs.start[idx] < GPT && GPT < search_regs.end[idx])
2540 move_gap (search_regs.start[idx]);
2541 add_stuff = BYTE_POS_ADDR (begbyte);
2542 }
2543
2544 /* Now the stuff we want to add to SUBSTED
2545 is invariably ADD_LEN bytes starting at ADD_STUFF. */
2546
2547 /* Make sure SUBSTED is big enough. */
2548 if (substed_len + add_len >= substed_alloc_size)
2549 {
2550 substed_alloc_size = substed_len + add_len + 500;
2551 substed = (unsigned char *) xrealloc (substed,
2552 substed_alloc_size + 1);
2553 }
2554
2555 /* Now add to the end of SUBSTED. */
f8ce8a0d
GM
2556 if (add_stuff)
2557 {
2558 bcopy (add_stuff, substed + substed_len, add_len);
2559 substed_len += add_len;
2560 }
ca1d1d23 2561 }
68e69fbd 2562
886ed6ec 2563 if (really_changed)
80460525
KH
2564 {
2565 if (buf_multibyte)
2566 {
2567 int nchars = multibyte_chars_in_text (substed, substed_len);
68e69fbd 2568
80460525
KH
2569 newtext = make_multibyte_string (substed, nchars, substed_len);
2570 }
2571 else
2572 newtext = make_unibyte_string (substed, substed_len);
2573 }
68e69fbd 2574 xfree (substed);
ca1d1d23
JB
2575 }
2576
886ed6ec
RS
2577 /* Replace the old text with the new in the cleanest possible way. */
2578 replace_range (search_regs.start[sub], search_regs.end[sub],
2579 newtext, 1, 0, 1);
d5db4077 2580 newpoint = search_regs.start[sub] + SCHARS (newtext);
ca1d1d23
JB
2581
2582 if (case_action == all_caps)
886ed6ec
RS
2583 Fupcase_region (make_number (search_regs.start[sub]),
2584 make_number (newpoint));
ca1d1d23 2585 else if (case_action == cap_initial)
886ed6ec
RS
2586 Fupcase_initials_region (make_number (search_regs.start[sub]),
2587 make_number (newpoint));
3e18eecf 2588
98e942e0
RS
2589 /* Adjust search data for this change. */
2590 {
5b88a2c5 2591 int oldend = search_regs.end[sub];
98e942e0
RS
2592 int change = newpoint - search_regs.end[sub];
2593 int i;
2594
2595 for (i = 0; i < search_regs.num_regs; i++)
2596 {
5b88a2c5 2597 if (search_regs.start[i] > oldend)
98e942e0 2598 search_regs.start[i] += change;
5b88a2c5 2599 if (search_regs.end[i] > oldend)
98e942e0
RS
2600 search_regs.end[i] += change;
2601 }
2602 }
2603
b0eba991 2604 /* Put point back where it was in the text. */
8d808a65 2605 if (opoint <= 0)
fa8ed3e0 2606 TEMP_SET_PT (opoint + ZV);
b0eba991 2607 else
fa8ed3e0 2608 TEMP_SET_PT (opoint);
b0eba991
RS
2609
2610 /* Now move point "officially" to the start of the inserted replacement. */
3e18eecf 2611 move_if_not_intangible (newpoint);
177c0ea7 2612
ca1d1d23
JB
2613 return Qnil;
2614}
2615\f
2616static Lisp_Object
2617match_limit (num, beginningp)
2618 Lisp_Object num;
2619 int beginningp;
2620{
2621 register int n;
2622
b7826503 2623 CHECK_NUMBER (num);
ca1d1d23 2624 n = XINT (num);
4746118a
JB
2625 if (n < 0 || n >= search_regs.num_regs)
2626 args_out_of_range (num, make_number (search_regs.num_regs));
2627 if (search_regs.num_regs <= 0
2628 || search_regs.start[n] < 0)
ca1d1d23
JB
2629 return Qnil;
2630 return (make_number ((beginningp) ? search_regs.start[n]
2631 : search_regs.end[n]));
2632}
2633
2634DEFUN ("match-beginning", Fmatch_beginning, Smatch_beginning, 1, 1, 0,
8c1a1077
PJ
2635 doc: /* Return position of start of text matched by last search.
2636SUBEXP, a number, specifies which parenthesized expression in the last
2637 regexp.
2638Value is nil if SUBEXPth pair didn't match, or there were less than
2639 SUBEXP pairs.
2640Zero means the entire text matched by the whole regexp or whole string. */)
2641 (subexp)
5806161b 2642 Lisp_Object subexp;
ca1d1d23 2643{
5806161b 2644 return match_limit (subexp, 1);
ca1d1d23
JB
2645}
2646
2647DEFUN ("match-end", Fmatch_end, Smatch_end, 1, 1, 0,
8c1a1077
PJ
2648 doc: /* Return position of end of text matched by last search.
2649SUBEXP, a number, specifies which parenthesized expression in the last
2650 regexp.
2651Value is nil if SUBEXPth pair didn't match, or there were less than
2652 SUBEXP pairs.
2653Zero means the entire text matched by the whole regexp or whole string. */)
2654 (subexp)
5806161b 2655 Lisp_Object subexp;
ca1d1d23 2656{
5806161b 2657 return match_limit (subexp, 0);
177c0ea7 2658}
ca1d1d23 2659
56256c2a 2660DEFUN ("match-data", Fmatch_data, Smatch_data, 0, 2, 0,
8c1a1077
PJ
2661 doc: /* Return a list containing all info on what the last search matched.
2662Element 2N is `(match-beginning N)'; element 2N + 1 is `(match-end N)'.
2663All the elements are markers or nil (nil if the Nth pair didn't match)
2664if the last match was on a buffer; integers or nil if a string was matched.
2665Use `store-match-data' to reinstate the data in this list.
2666
2667If INTEGERS (the optional first argument) is non-nil, always use integers
2668\(rather than markers) to represent buffer positions.
2669If REUSE is a list, reuse it as part of the value. If REUSE is long enough
140a6b7e
KS
2670to hold all the values, and if INTEGERS is non-nil, no consing is done.
2671
2672Return value is undefined if the last search failed. */)
8c1a1077 2673 (integers, reuse)
56256c2a 2674 Lisp_Object integers, reuse;
ca1d1d23 2675{
56256c2a 2676 Lisp_Object tail, prev;
4746118a 2677 Lisp_Object *data;
ca1d1d23
JB
2678 int i, len;
2679
daa37602 2680 if (NILP (last_thing_searched))
c36bcf1b 2681 return Qnil;
daa37602 2682
6bbd7a29
GM
2683 prev = Qnil;
2684
4746118a
JB
2685 data = (Lisp_Object *) alloca ((2 * search_regs.num_regs)
2686 * sizeof (Lisp_Object));
2687
ca1d1d23 2688 len = -1;
4746118a 2689 for (i = 0; i < search_regs.num_regs; i++)
ca1d1d23
JB
2690 {
2691 int start = search_regs.start[i];
2692 if (start >= 0)
2693 {
56256c2a
RS
2694 if (EQ (last_thing_searched, Qt)
2695 || ! NILP (integers))
ca1d1d23 2696 {
c235cce7
KH
2697 XSETFASTINT (data[2 * i], start);
2698 XSETFASTINT (data[2 * i + 1], search_regs.end[i]);
ca1d1d23 2699 }
0ed62dc7 2700 else if (BUFFERP (last_thing_searched))
ca1d1d23
JB
2701 {
2702 data[2 * i] = Fmake_marker ();
daa37602
JB
2703 Fset_marker (data[2 * i],
2704 make_number (start),
2705 last_thing_searched);
ca1d1d23
JB
2706 data[2 * i + 1] = Fmake_marker ();
2707 Fset_marker (data[2 * i + 1],
177c0ea7 2708 make_number (search_regs.end[i]),
daa37602 2709 last_thing_searched);
ca1d1d23 2710 }
daa37602
JB
2711 else
2712 /* last_thing_searched must always be Qt, a buffer, or Qnil. */
2713 abort ();
2714
ca1d1d23
JB
2715 len = i;
2716 }
2717 else
2718 data[2 * i] = data [2 * i + 1] = Qnil;
2719 }
56256c2a
RS
2720
2721 /* If REUSE is not usable, cons up the values and return them. */
2722 if (! CONSP (reuse))
2723 return Flist (2 * len + 2, data);
2724
2725 /* If REUSE is a list, store as many value elements as will fit
2726 into the elements of REUSE. */
2727 for (i = 0, tail = reuse; CONSP (tail);
c1d497be 2728 i++, tail = XCDR (tail))
56256c2a
RS
2729 {
2730 if (i < 2 * len + 2)
f3fbd155 2731 XSETCAR (tail, data[i]);
56256c2a 2732 else
f3fbd155 2733 XSETCAR (tail, Qnil);
56256c2a
RS
2734 prev = tail;
2735 }
2736
2737 /* If we couldn't fit all value elements into REUSE,
2738 cons up the rest of them and add them to the end of REUSE. */
2739 if (i < 2 * len + 2)
f3fbd155 2740 XSETCDR (prev, Flist (2 * len + 2 - i, data + i));
56256c2a
RS
2741
2742 return reuse;
ca1d1d23
JB
2743}
2744
2745
3f1c005b 2746DEFUN ("set-match-data", Fset_match_data, Sset_match_data, 1, 1, 0,
8c1a1077
PJ
2747 doc: /* Set internal data on last search match from elements of LIST.
2748LIST should have been created by calling `match-data' previously. */)
2749 (list)
ca1d1d23
JB
2750 register Lisp_Object list;
2751{
2752 register int i;
2753 register Lisp_Object marker;
2754
7074fde6
FP
2755 if (running_asynch_code)
2756 save_search_regs ();
2757
ca1d1d23 2758 if (!CONSP (list) && !NILP (list))
b37902c8 2759 list = wrong_type_argument (Qconsp, list);
ca1d1d23 2760
177c0ea7 2761 /* Unless we find a marker with a buffer in LIST, assume that this
daa37602
JB
2762 match data came from a string. */
2763 last_thing_searched = Qt;
2764
4746118a
JB
2765 /* Allocate registers if they don't already exist. */
2766 {
d084e942 2767 int length = XFASTINT (Flength (list)) / 2;
4746118a
JB
2768
2769 if (length > search_regs.num_regs)
2770 {
1113d9db
JB
2771 if (search_regs.num_regs == 0)
2772 {
2773 search_regs.start
2774 = (regoff_t *) xmalloc (length * sizeof (regoff_t));
2775 search_regs.end
2776 = (regoff_t *) xmalloc (length * sizeof (regoff_t));
2777 }
4746118a 2778 else
1113d9db
JB
2779 {
2780 search_regs.start
2781 = (regoff_t *) xrealloc (search_regs.start,
2782 length * sizeof (regoff_t));
2783 search_regs.end
2784 = (regoff_t *) xrealloc (search_regs.end,
2785 length * sizeof (regoff_t));
2786 }
4746118a 2787
e62371e9
KH
2788 for (i = search_regs.num_regs; i < length; i++)
2789 search_regs.start[i] = -1;
2790
487282dc 2791 search_regs.num_regs = length;
4746118a
JB
2792 }
2793 }
2794
2795 for (i = 0; i < search_regs.num_regs; i++)
ca1d1d23
JB
2796 {
2797 marker = Fcar (list);
2798 if (NILP (marker))
2799 {
2800 search_regs.start[i] = -1;
2801 list = Fcdr (list);
2802 }
2803 else
2804 {
e62371e9
KH
2805 int from;
2806
0ed62dc7 2807 if (MARKERP (marker))
daa37602
JB
2808 {
2809 if (XMARKER (marker)->buffer == 0)
c235cce7 2810 XSETFASTINT (marker, 0);
daa37602 2811 else
a3668d92 2812 XSETBUFFER (last_thing_searched, XMARKER (marker)->buffer);
daa37602 2813 }
ca1d1d23 2814
b7826503 2815 CHECK_NUMBER_COERCE_MARKER (marker);
e62371e9 2816 from = XINT (marker);
ca1d1d23
JB
2817 list = Fcdr (list);
2818
2819 marker = Fcar (list);
0ed62dc7 2820 if (MARKERP (marker) && XMARKER (marker)->buffer == 0)
c235cce7 2821 XSETFASTINT (marker, 0);
ca1d1d23 2822
b7826503 2823 CHECK_NUMBER_COERCE_MARKER (marker);
e62371e9 2824 search_regs.start[i] = from;
ca1d1d23
JB
2825 search_regs.end[i] = XINT (marker);
2826 }
2827 list = Fcdr (list);
2828 }
2829
177c0ea7 2830 return Qnil;
ca1d1d23
JB
2831}
2832
7074fde6
FP
2833/* If non-zero the match data have been saved in saved_search_regs
2834 during the execution of a sentinel or filter. */
75ebf74b 2835static int search_regs_saved;
7074fde6
FP
2836static struct re_registers saved_search_regs;
2837
2838/* Called from Flooking_at, Fstring_match, search_buffer, Fstore_match_data
2839 if asynchronous code (filter or sentinel) is running. */
2840static void
2841save_search_regs ()
2842{
2843 if (!search_regs_saved)
2844 {
2845 saved_search_regs.num_regs = search_regs.num_regs;
2846 saved_search_regs.start = search_regs.start;
2847 saved_search_regs.end = search_regs.end;
2848 search_regs.num_regs = 0;
2d4a771a
RS
2849 search_regs.start = 0;
2850 search_regs.end = 0;
7074fde6
FP
2851
2852 search_regs_saved = 1;
2853 }
2854}
2855
2856/* Called upon exit from filters and sentinels. */
2857void
2858restore_match_data ()
2859{
2860 if (search_regs_saved)
2861 {
2862 if (search_regs.num_regs > 0)
2863 {
2864 xfree (search_regs.start);
2865 xfree (search_regs.end);
2866 }
2867 search_regs.num_regs = saved_search_regs.num_regs;
2868 search_regs.start = saved_search_regs.start;
2869 search_regs.end = saved_search_regs.end;
2870
2871 search_regs_saved = 0;
2872 }
2873}
2874
ca1d1d23
JB
2875/* Quote a string to inactivate reg-expr chars */
2876
2877DEFUN ("regexp-quote", Fregexp_quote, Sregexp_quote, 1, 1, 0,
8c1a1077
PJ
2878 doc: /* Return a regexp string which matches exactly STRING and nothing else. */)
2879 (string)
5806161b 2880 Lisp_Object string;
ca1d1d23
JB
2881{
2882 register unsigned char *in, *out, *end;
2883 register unsigned char *temp;
0c8533c6 2884 int backslashes_added = 0;
ca1d1d23 2885
b7826503 2886 CHECK_STRING (string);
ca1d1d23 2887
d5db4077 2888 temp = (unsigned char *) alloca (SBYTES (string) * 2);
ca1d1d23
JB
2889
2890 /* Now copy the data into the new string, inserting escapes. */
2891
d5db4077
KR
2892 in = SDATA (string);
2893 end = in + SBYTES (string);
177c0ea7 2894 out = temp;
ca1d1d23
JB
2895
2896 for (; in != end; in++)
2897 {
2898 if (*in == '[' || *in == ']'
2899 || *in == '*' || *in == '.' || *in == '\\'
2900 || *in == '?' || *in == '+'
2901 || *in == '^' || *in == '$')
0c8533c6 2902 *out++ = '\\', backslashes_added++;
ca1d1d23
JB
2903 *out++ = *in;
2904 }
2905
3f8100f1 2906 return make_specified_string (temp,
d5db4077 2907 SCHARS (string) + backslashes_added,
3f8100f1
RS
2908 out - temp,
2909 STRING_MULTIBYTE (string));
ca1d1d23 2910}
177c0ea7 2911\f
dfcf069d 2912void
ca1d1d23
JB
2913syms_of_search ()
2914{
2915 register int i;
2916
487282dc
KH
2917 for (i = 0; i < REGEXP_CACHE_SIZE; ++i)
2918 {
2919 searchbufs[i].buf.allocated = 100;
b23c0a83 2920 searchbufs[i].buf.buffer = (unsigned char *) xmalloc (100);
487282dc
KH
2921 searchbufs[i].buf.fastmap = searchbufs[i].fastmap;
2922 searchbufs[i].regexp = Qnil;
2923 staticpro (&searchbufs[i].regexp);
2924 searchbufs[i].next = (i == REGEXP_CACHE_SIZE-1 ? 0 : &searchbufs[i+1]);
2925 }
2926 searchbuf_head = &searchbufs[0];
ca1d1d23
JB
2927
2928 Qsearch_failed = intern ("search-failed");
2929 staticpro (&Qsearch_failed);
2930 Qinvalid_regexp = intern ("invalid-regexp");
2931 staticpro (&Qinvalid_regexp);
2932
2933 Fput (Qsearch_failed, Qerror_conditions,
2934 Fcons (Qsearch_failed, Fcons (Qerror, Qnil)));
2935 Fput (Qsearch_failed, Qerror_message,
2936 build_string ("Search failed"));
2937
2938 Fput (Qinvalid_regexp, Qerror_conditions,
2939 Fcons (Qinvalid_regexp, Fcons (Qerror, Qnil)));
2940 Fput (Qinvalid_regexp, Qerror_message,
2941 build_string ("Invalid regexp"));
2942
daa37602
JB
2943 last_thing_searched = Qnil;
2944 staticpro (&last_thing_searched);
2945
ca1d1d23 2946 defsubr (&Slooking_at);
b819a390
RS
2947 defsubr (&Sposix_looking_at);
2948 defsubr (&Sstring_match);
2949 defsubr (&Sposix_string_match);
ca1d1d23
JB
2950 defsubr (&Ssearch_forward);
2951 defsubr (&Ssearch_backward);
2952 defsubr (&Sword_search_forward);
2953 defsubr (&Sword_search_backward);
2954 defsubr (&Sre_search_forward);
2955 defsubr (&Sre_search_backward);
b819a390
RS
2956 defsubr (&Sposix_search_forward);
2957 defsubr (&Sposix_search_backward);
ca1d1d23
JB
2958 defsubr (&Sreplace_match);
2959 defsubr (&Smatch_beginning);
2960 defsubr (&Smatch_end);
2961 defsubr (&Smatch_data);
3f1c005b 2962 defsubr (&Sset_match_data);
ca1d1d23
JB
2963 defsubr (&Sregexp_quote);
2964}
ab5796a9
MB
2965
2966/* arch-tag: a6059d79-0552-4f14-a2cb-d379a4e3c78f
2967 (do not change this comment) */