/* String search routines for GNU Emacs.
Copyright (C) 1985, 1986, 1987, 1993, 1994, 1997, 1998, 1999, 2002, 2003,
- 2004, 2005 Free Software Foundation, Inc.
+ 2004, 2005, 2006 Free Software Foundation, Inc.
This file is part of GNU Emacs.
Lisp_Object Qinvalid_regexp;
+/* Error condition used for failing searches */
+Lisp_Object Qsearch_failed;
+
Lisp_Object Vsearch_spaces_regexp;
static void set_search_regs ();
static int simple_search ();
static int boyer_moore ();
static int search_buffer ();
+static void matcher_overflow () NO_RETURN;
static void
matcher_overflow ()
re_set_syntax (old);
UNBLOCK_INPUT;
if (val)
- Fsignal (Qinvalid_regexp, Fcons (build_string (val), Qnil));
+ xsignal1 (Qinvalid_regexp, build_string (val));
cp->regexp = Fcopy_sequence (pattern);
}
return &cp->buf;
}
-/* Error condition used for failing searches */
-Lisp_Object Qsearch_failed;
-
-Lisp_Object
-signal_failure (arg)
- Lisp_Object arg;
-{
- Fsignal (Qsearch_failed, Fcons (arg, Qnil));
- return Qnil;
-}
\f
static Lisp_Object
looking_at_1 (string, posix)
DEFUN ("string-match", Fstring_match, Sstring_match, 2, 3, 0,
doc: /* Return index of start of first match for REGEXP in STRING, or nil.
-Case is ignored if `case-fold-search' is non-nil in the current buffer.
+Matching ignores case if `case-fold-search' is non-nil.
If third arg START is non-nil, start search at that index in STRING.
For index of first char beyond the match, do (match-end 0).
`match-end' and `match-beginning' also give indices of substrings
if (np <= 0)
{
if (NILP (noerror))
- return signal_failure (string);
+ xsignal1 (Qsearch_failed, string);
+
if (!EQ (noerror, Qt))
{
if (lim < BEGV || lim > ZV)
int raw_pattern_size_byte;
unsigned char *patbuf;
int multibyte = !NILP (current_buffer->enable_multibyte_characters);
- unsigned char *base_pat = SDATA (string);
- /* Set to nozero if we find a non-ASCII char that need
- translation. */
- int charset_base = 0;
+ unsigned char *base_pat;
+ /* Set to positive if we find a non-ASCII char that need
+ translation. Otherwise set to zero later. */
+ int charset_base = -1;
int boyer_moore_ok = 1;
/* MULTIBYTE says whether the text to be searched is multibyte.
always handle their translation. */
while (1)
{
- if (! ASCII_BYTE_P (inverse))
+ if (ASCII_BYTE_P (inverse))
{
- if (SINGLE_BYTE_CHAR_P (inverse))
- {
- /* Boyer-moore search can't handle a
- translation of an eight-bit
- character. */
- boyer_moore_ok = 0;
- break;
- }
- else if (charset_base == 0)
- charset_base = inverse & ~CHAR_FIELD3_MASK;
- else if ((inverse & ~CHAR_FIELD3_MASK)
- != charset_base)
+ if (charset_base > 0)
{
boyer_moore_ok = 0;
break;
}
+ charset_base = 0;
+ }
+ else if (SINGLE_BYTE_CHAR_P (inverse))
+ {
+ /* Boyer-moore search can't handle a
+ translation of an eight-bit
+ character. */
+ boyer_moore_ok = 0;
+ break;
+ }
+ else if (charset_base < 0)
+ charset_base = inverse & ~CHAR_FIELD3_MASK;
+ else if ((inverse & ~CHAR_FIELD3_MASK)
+ != charset_base)
+ {
+ boyer_moore_ok = 0;
+ break;
}
if (c == inverse)
break;
}
}
}
+ if (charset_base < 0)
+ charset_base = 0;
/* Store this character into the translated pattern. */
bcopy (str, pat, charlen);
if (ASCII_BYTE_P (*ptr) || ! multibyte)
ch = *ptr;
else if (charset_base
- && (pat_end - ptr) == 1 || CHAR_HEAD_P (ptr[1]))
+ && ((pat_end - ptr) == 1 || CHAR_HEAD_P (ptr[1])))
{
unsigned char *charstart = ptr - 1;
ch = -1;
}
- if (ch > 0400)
+ if (ch >= 0400)
j = ((unsigned char) ch) | 0200;
else
j = *ptr;
while (1)
{
TRANSLATE (ch, inverse_trt, ch);
- if (ch > 0400)
+ if (ch >= 0400)
j = ((unsigned char) ch) | 0200;
else
j = (unsigned char) ch;
else
some_multiletter_word = 1;
}
- else if (!NOCASEP (c))
+ else if (UPPERCASEP (c))
{
some_uppercase = 1;
if (SYNTAX (prevc) != Sword)
if (running_asynch_code)
save_search_regs ();
- if (!CONSP (list) && !NILP (list))
- list = wrong_type_argument (Qconsp, list);
+ CHECK_LIST (list);
/* Unless we find a marker with a buffer or an explicit buffer
in LIST, assume that this match data came from a string. */
for (; in != end; in++)
{
- if (*in == '[' || *in == ']'
+ if (*in == '['
|| *in == '*' || *in == '.' || *in == '\\'
|| *in == '?' || *in == '+'
|| *in == '^' || *in == '$')