cp->posix = posix;
cp->buf.multibyte = multibyte;
BLOCK_INPUT;
- old = re_set_syntax (RE_SYNTAX_EMACS
+ old = re_set_syntax (RE_SYNTAX_EMACS | RE_CHAR_CLASSES
| (posix ? 0 : RE_NO_POSIX_BACKTRACKING));
val = (char *) re_compile_pattern ((char *)raw_pattern,
raw_pattern_size, &cp->buf);
cp->regexp = Fcopy_sequence (pattern);
}
+/* Shrink each compiled regexp buffer in the cache
+ to the size actually used right now.
+ This is called from garbage collection. */
+
+void
+shrink_regexp_cache ()
+{
+ struct regexp_cache *cp, **cpp;
+
+ for (cp = searchbuf_head; cp != 0; cp = cp->next)
+ {
+ cp->buf.allocated = cp->buf.used;
+ cp->buf.buffer
+ = (unsigned char *) realloc (cp->buf.buffer, cp->buf.used);
+ }
+}
+
/* Compile a regexp if necessary, but first check to see if there's one in
the cache.
PATTERN is the pattern to compile.
DEFUN ("string-match", Fstring_match, Sstring_match, 2, 3, 0,
"Return index of start of first match for REGEXP in STRING, or nil.\n\
+Case is ignored if `case-fold-search' is non-nil in the current buffer.\n\
If third arg START is non-nil, start search at that index in STRING.\n\
For index of first char beyond the match, do (match-end 0).\n\
`match-end' and `match-beginning' also give indices of substrings\n\
DEFUN ("posix-string-match", Fposix_string_match, Sposix_string_match, 2, 3, 0,
"Return index of start of first match for REGEXP in STRING, or nil.\n\
Find the longest match, in accord with Posix regular expression rules.\n\
+Case is ignored if `case-fold-search' is non-nil in the current buffer.\n\
If third arg START is non-nil, start search at that index in STRING.\n\
For index of first char beyond the match, do (match-end 0).\n\
`match-end' and `match-beginning' also give indices of substrings\n\
if (running_asynch_code)
save_search_regs ();
+ /* Searching 0 times means don't move. */
/* Null string is found at starting position. */
- if (len == 0)
+ if (len == 0 || n == 0)
{
set_search_regs (pos, 0);
return pos;
}
- /* Searching 0 times means don't move. */
- if (n == 0)
- return pos;
-
if (RE && !trivial_regexp_p (string))
{
unsigned char *p1, *p2;
int multibyte = !NILP (current_buffer->enable_multibyte_characters);
unsigned char *base_pat = XSTRING (string)->data;
int charset_base = -1;
- int simple = 1;
+ int boyer_moore_ok = 1;
/* MULTIBYTE says whether the text to be searched is multibyte.
We must convert PATTERN to match that, or we will not really
}
c = STRING_CHAR_AND_LENGTH (base_pat, len_byte, in_charlen);
+
/* Translate the character, if requested. */
TRANSLATE (translated, trt, c);
/* If translation changed the byte-length, go back
charlen = CHAR_STRING (c, workbuf, str);
}
+ /* If we are searching for something strange,
+ an invalid multibyte code, don't use boyer-moore. */
+ if (! ASCII_BYTE_P (translated)
+ && (charlen == 1 /* 8bit code */
+ || charlen != in_charlen /* invalid multibyte code */
+ ))
+ boyer_moore_ok = 0;
+
TRANSLATE (inverse, inverse_trt, c);
/* Did this char actually get translated?
{
/* Keep track of which character set row
contains the characters that need translation. */
- int charset_base_code = c & ~0xff;
+ int charset_base_code = c & ~CHAR_FIELD3_MASK;
if (charset_base == -1)
charset_base = charset_base_code;
else if (charset_base != charset_base_code)
/* If two different rows appear, needing translation,
then we cannot use boyer_moore search. */
- simple = 0;
- /* ??? Handa: this must do simple = 0
+ boyer_moore_ok = 0;
+ /* ??? Handa: this must do boyer_moore_ok = 0
if c is a composite character. */
}
}
else
{
+ /* Unibyte buffer. */
+ charset_base = 0;
while (--len >= 0)
{
- int c, translated, inverse;
+ int c, translated;
/* If we got here and the RE flag is set, it's because we're
dealing with a regexp known to be trivial, so the backslash
}
c = *base_pat++;
TRANSLATE (translated, trt, c);
- TRANSLATE (inverse, inverse_trt, c);
-
- /* Did this char actually get translated?
- Would any other char get translated into it? */
- if (translated != c || inverse != c)
- {
- /* Keep track of which character set row
- contains the characters that need translation. */
- int charset_base_code = c & ~0xff;
- if (charset_base == -1)
- charset_base = charset_base_code;
- else if (charset_base != charset_base_code)
- /* If two different rows appear, needing translation,
- then we cannot use boyer_moore search. */
- simple = 0;
- }
*pat++ = translated;
}
}
len = raw_pattern_size;
pat = base_pat = patbuf;
- if (simple)
+ if (boyer_moore_ok)
return boyer_moore (n, pat, len, len_byte, trt, inverse_trt,
pos, pos_byte, lim, lim_byte,
charset_base);
while (! CHAR_HEAD_P (*charstart))
charstart--;
untranslated = STRING_CHAR (charstart, ptr - charstart + 1);
- if (charset_base == (untranslated & ~0xff))
+ if (charset_base == (untranslated & ~CHAR_FIELD3_MASK))
{
TRANSLATE (ch, trt, untranslated);
if (! CHAR_HEAD_P (*ptr))
}
/* Record beginning BEG_BYTE and end BEG_BYTE + NBYTES
- for a match just found in the current buffer. */
+ for the overall match just found in the current buffer.
+ Also clear out the match data for registers 1 and up. */
static void
set_search_regs (beg_byte, nbytes)
int beg_byte, nbytes;
{
+ int i;
+
/* Make sure we have registers in which to store
the match position. */
if (search_regs.num_regs == 0)
search_regs.num_regs = 2;
}
+ /* Clear out the other registers. */
+ for (i = 1; i < search_regs.num_regs; i++)
+ {
+ search_regs.start[i] = -1;
+ search_regs.end[i] = -1;
+ }
+
search_regs.start[0] = BYTE_TO_CHAR (beg_byte);
search_regs.end[0] = BYTE_TO_CHAR (beg_byte + nbytes);
XSETBUFFER (last_thing_searched, current_buffer);
return build_string ("");
adjust = - punct_count + 5 * (word_count - 1) + 4;
- val = make_uninit_multibyte_string (len + adjust,
- STRING_BYTES (XSTRING (string)) + adjust);
+ if (STRING_MULTIBYTE (string))
+ val = make_uninit_multibyte_string (len + adjust,
+ STRING_BYTES (XSTRING (string))
+ + adjust);
+ else
+ val = make_uninit_string (len + adjust);
o = XSTRING (val)->data;
*o++ = '\\';
if (STRING_MULTIBYTE (string))
FETCH_STRING_CHAR_ADVANCE (c, string, i, i_byte);
else
- c = XSTRING (string)->data[i++];
+ {
+ c = XSTRING (string)->data[i++];
+ i_byte++;
+ }
if (SYNTAX (c) == Sword)
{
Lisp_Object newtext, fixedcase, literal, string, subexp;
{
enum { nochange, all_caps, cap_initial } case_action;
- register int pos, last;
+ register int pos, pos_byte;
int some_multiletter_word;
int some_lowercase;
int some_uppercase;
if (NILP (fixedcase))
{
- int beg;
/* Decide how to casify by examining the matched text. */
+ int last;
- if (NILP (string))
- last = CHAR_TO_BYTE (search_regs.end[sub]);
- else
- last = search_regs.end[sub];
+ pos = search_regs.start[sub];
+ last = search_regs.end[sub];
if (NILP (string))
- beg = CHAR_TO_BYTE (search_regs.start[sub]);
+ pos_byte = CHAR_TO_BYTE (pos);
else
- beg = search_regs.start[sub];
+ pos_byte = string_char_to_byte (string, pos);
prevc = '\n';
case_action = all_caps;
some_nonuppercase_initial = 0;
some_uppercase = 0;
- for (pos = beg; pos < last; pos++)
+ while (pos < last)
{
if (NILP (string))
- c = FETCH_BYTE (pos);
+ {
+ c = FETCH_CHAR (pos_byte);
+ INC_BOTH (pos, pos_byte);
+ }
else
- c = XSTRING (string)->data[pos];
+ FETCH_STRING_CHAR_ADVANCE (c, string, pos, pos_byte);
if (LOWERCASEP (c))
{
/* We build up the substituted string in ACCUM. */
Lisp_Object accum;
Lisp_Object middle;
- int pos_byte;
+ int length = STRING_BYTES (XSTRING (newtext));
accum = Qnil;
- for (pos_byte = 0, pos = 0; pos_byte < STRING_BYTES (XSTRING (newtext));)
+ for (pos_byte = 0, pos = 0; pos_byte < length;)
{
int substart = -1;
int subend;
}
/* Record point, the move (quietly) to the start of the match. */
- if (PT > search_regs.start[sub])
+ if (PT >= search_regs.end[sub])
opoint = PT - ZV;
+ else if (PT > search_regs.start[sub])
+ opoint = search_regs.end[sub] - ZV;
else
opoint = PT;
else
{
struct gcpro gcpro1;
+ int length = STRING_BYTES (XSTRING (newtext));
+
GCPRO1 (newtext);
- for (pos = 0; pos < XSTRING (newtext)->size; pos++)
+ for (pos_byte = 0, pos = 0; pos_byte < length;)
{
int offset = PT - search_regs.start[sub];
- c = XSTRING (newtext)->data[pos];
+ FETCH_STRING_CHAR_ADVANCE (c, newtext, pos, pos_byte);
+
if (c == '\\')
{
- c = XSTRING (newtext)->data[++pos];
+ FETCH_STRING_CHAR_ADVANCE (c, newtext, pos, pos_byte);
if (c == '&')
Finsert_buffer_substring
(Fcurrent_buffer (),