internationalization features.)
Copyright (C) 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
- 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009
+ 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010
Free Software Foundation, Inc.
This program is free software; you can redistribute it and/or modify
that make sense only in Emacs. */
#ifdef emacs
+# include <setjmp.h>
# include "lisp.h"
# include "buffer.h"
# define RE_MULTIBYTE_P(bufp) ((bufp)->multibyte)
# define RE_TARGET_MULTIBYTE_P(bufp) ((bufp)->target_multibyte)
-# define RE_STRING_CHAR(p, s, multibyte) \
- (multibyte ? (STRING_CHAR (p, s)) : (*(p)))
-# define RE_STRING_CHAR_AND_LENGTH(p, s, len, multibyte) \
- (multibyte ? (STRING_CHAR_AND_LENGTH (p, s, len)) : ((len) = 1, *(p)))
+# define RE_STRING_CHAR(p, multibyte) \
+ (multibyte ? (STRING_CHAR (p)) : (*(p)))
+# define RE_STRING_CHAR_AND_LENGTH(p, len, multibyte) \
+ (multibyte ? (STRING_CHAR_AND_LENGTH (p, len)) : ((len) = 1, *(p)))
# define RE_CHAR_TO_MULTIBYTE(c) UNIBYTE_TO_CHAR (c)
re_char *dtemp = (p) == (str2) ? (end1) : (p); \
re_char *dlimit = ((p) > (str2) && (p) <= (end2)) ? (str2) : (str1); \
while (dtemp-- > dlimit && !CHAR_HEAD_P (*dtemp)); \
- c = STRING_CHAR (dtemp, (p) - dtemp); \
+ c = STRING_CHAR (dtemp); \
} \
else \
{ \
# define GET_CHAR_AFTER(c, p, len) \
do { \
if (target_multibyte) \
- (c) = STRING_CHAR_AND_LENGTH (p, 0, len); \
+ (c) = STRING_CHAR_AND_LENGTH (p, len); \
else \
{ \
(c) = *p; \
# endif
# define realloc xrealloc
-/* When used in Emacs's lib-src, we need to get bzero and bcopy somehow.
- If nothing else has been done, use the method below. */
-# ifdef INHIBIT_STRING_HEADER
-# if !(defined HAVE_BZERO && defined HAVE_BCOPY)
-# if !defined bzero && !defined bcopy
-# undef INHIBIT_STRING_HEADER
-# endif
+/* This is the normal way of making sure we have memcpy, memcmp and memset. */
+# if defined HAVE_STRING_H || defined STDC_HEADERS || defined _LIBC
+# include <string.h>
+# else
+# include <strings.h>
+# ifndef memcmp
+# define memcmp(s1, s2, n) bcmp (s1, s2, n)
# endif
-# endif
-
-/* This is the normal way of making sure we have memcpy, memcmp and bzero.
- This is used in most programs--a few other programs avoid this
- by defining INHIBIT_STRING_HEADER. */
-# ifndef INHIBIT_STRING_HEADER
-# if defined HAVE_STRING_H || defined STDC_HEADERS || defined _LIBC
-# include <string.h>
-# ifndef bzero
-# ifndef _LIBC
-# define bzero(s, n) (memset (s, '\0', n), (s))
-# else
-# define bzero(s, n) __bzero (s, n)
-# endif
-# endif
-# else
-# include <strings.h>
-# ifndef memcmp
-# define memcmp(s1, s2, n) bcmp (s1, s2, n)
-# endif
-# ifndef memcpy
-# define memcpy(d, s, n) (bcopy (s, d, n), (d))
-# endif
+# ifndef memcpy
+# define memcpy(d, s, n) (bcopy (s, d, n), (d))
# endif
# endif
# define SWITCH_ENUM_CAST(x) (x)
/* Dummy macros for non-Emacs environments. */
-# define BASE_LEADING_CODE_P(c) (0)
# define CHAR_CHARSET(c) 0
# define CHARSET_LEADING_CODE_BASE(c) 0
# define MAX_MULTIBYTE_LENGTH 1
# define CHAR_HEAD_P(p) (1)
# define SINGLE_BYTE_CHAR_P(c) (1)
# define SAME_CHARSET_P(c1, c2) (1)
-# define MULTIBYTE_FORM_LENGTH(p, s) (1)
+# define BYTES_BY_CHAR_HEAD(p) (1)
# define PREV_CHAR_BOUNDARY(p, limit) ((p)--)
-# define STRING_CHAR(p, s) (*(p))
-# define RE_STRING_CHAR(p, s, multibyte) STRING_CHAR ((p), (s))
+# define STRING_CHAR(p) (*(p))
+# define RE_STRING_CHAR(p, multibyte) STRING_CHAR (p)
# define CHAR_STRING(c, s) (*(s) = (c), 1)
-# define STRING_CHAR_AND_LENGTH(p, s, actual_len) ((actual_len) = 1, *(p))
-# define RE_STRING_CHAR_AND_LENGTH(p, s, len, multibyte) STRING_CHAR_AND_LENGTH ((p), (s), (len))
+# define STRING_CHAR_AND_LENGTH(p, actual_len) ((actual_len) = 1, *(p))
+# define RE_STRING_CHAR_AND_LENGTH(p, len, multibyte) STRING_CHAR_AND_LENGTH (p, len)
# define RE_CHAR_TO_MULTIBYTE(c) (c)
# define RE_CHAR_TO_UNIBYTE(c) (c)
# define GET_CHAR_BEFORE_2(c, p, str1, end1, str2, end2) \
if (done)
return;
- bzero (re_syntax_table, sizeof re_syntax_table);
+ memset (re_syntax_table, 0, sizeof re_syntax_table);
for (c = 0; c < CHAR_SET_SIZE; ++c)
if (ISALNUM (c))
defined in regex.h. We return the old syntax. */
reg_syntax_t
-re_set_syntax (syntax)
- reg_syntax_t syntax;
+re_set_syntax (reg_syntax_t syntax)
{
reg_syntax_t ret = re_syntax_options;
static re_char *whitespace_regexp;
void
-re_set_whitespace_regexp (regexp)
- const char *regexp;
+re_set_whitespace_regexp (const char *regexp)
{
whitespace_regexp = (re_char *) regexp;
}
do { \
int len; \
if (p == pend) return REG_EEND; \
- c = RE_STRING_CHAR_AND_LENGTH (p, pend - p, len, multibyte); \
+ c = RE_STRING_CHAR_AND_LENGTH (p, len, multibyte); \
p += len; \
} while (0)
} while (0)
-/* Both FROM and TO are mulitbyte characters. */
+/* Both FROM and TO are multibyte characters. */
#define SETUP_MULTIBYTE_RANGE(work_area, FROM, TO) \
do { \
/* Map a string to the char class it names (if any). */
re_wctype_t
-re_wctype (str)
- re_char *str;
+re_wctype (const re_char *str)
{
const char *string = str;
if (STREQ (string, "alnum")) return RECC_ALNUM;
/* True if CH is in the char class CC. */
boolean
-re_iswctype (ch, cc)
- int ch;
- re_wctype_t cc;
+re_iswctype (int ch, re_wctype_t cc)
{
switch (cc)
{
/* Return a bit-pattern to use in the range-table bits to match multibyte
chars of class CC. */
static int
-re_wctype_to_bit (cc)
- re_wctype_t cc;
+re_wctype_to_bit (re_wctype_t cc)
{
switch (cc)
{
/* Actually extend the space in WORK_AREA. */
static void
-extend_range_table_work_area (work_area)
- struct range_table_work_area *work_area;
+extend_range_table_work_area (struct range_table_work_area *work_area)
{
work_area->allocated += 16 * sizeof (int);
if (work_area->table)
} while (0)
static reg_errcode_t
-regex_compile (pattern, size, syntax, bufp)
- re_char *pattern;
- size_t size;
- reg_syntax_t syntax;
- struct re_pattern_buffer *bufp;
+regex_compile (const re_char *pattern, size_t size, reg_syntax_t syntax, struct re_pattern_buffer *bufp)
{
/* We fetch characters from PATTERN here. */
register re_wchar_t c, c1;
BUF_PUSH ((1 << BYTEWIDTH) / BYTEWIDTH);
/* Clear the whole map. */
- bzero (b, (1 << BYTEWIDTH) / BYTEWIDTH);
+ memset (b, 0, (1 << BYTEWIDTH) / BYTEWIDTH);
/* charset_not matches newline according to a syntax bit. */
if ((re_opcode_t) b[-2] == charset_not
don't need to handle them for multibyte.
They are distinguished by a negative wctype. */
+ /* Setup the gl_state object to its buffer-defined
+ value. This hardcodes the buffer-global
+ syntax-table for ASCII chars, while the other chars
+ will obey syntax-table properties. It's not ideal,
+ but it's the way it's been done until now. */
+ SETUP_BUFFER_SYNTAX_TABLE ();
+
for (ch = 0; ch < 256; ++ch)
{
c = RE_CHAR_TO_MULTIBYTE (ch);
if (c1 != c2 && (c1 = RE_CHAR_TO_UNIBYTE (c2)) >= 0)
c = c1;
- }
+ }
*b++ = c;
len = 1;
}
/* Store OP at LOC followed by two-byte integer parameter ARG. */
static void
-store_op1 (op, loc, arg)
- re_opcode_t op;
- unsigned char *loc;
- int arg;
+store_op1 (re_opcode_t op, unsigned char *loc, int arg)
{
*loc = (unsigned char) op;
STORE_NUMBER (loc + 1, arg);
/* Like `store_op1', but for two two-byte parameters ARG1 and ARG2. */
static void
-store_op2 (op, loc, arg1, arg2)
- re_opcode_t op;
- unsigned char *loc;
- int arg1, arg2;
+store_op2 (re_opcode_t op, unsigned char *loc, int arg1, int arg2)
{
*loc = (unsigned char) op;
STORE_NUMBER (loc + 1, arg1);
for OP followed by two-byte integer parameter ARG. */
static void
-insert_op1 (op, loc, arg, end)
- re_opcode_t op;
- unsigned char *loc;
- int arg;
- unsigned char *end;
+insert_op1 (re_opcode_t op, unsigned char *loc, int arg, unsigned char *end)
{
register unsigned char *pfrom = end;
register unsigned char *pto = end + 3;
/* Like `insert_op1', but for two two-byte parameters ARG1 and ARG2. */
static void
-insert_op2 (op, loc, arg1, arg2, end)
- re_opcode_t op;
- unsigned char *loc;
- int arg1, arg2;
- unsigned char *end;
+insert_op2 (re_opcode_t op, unsigned char *loc, int arg1, int arg2, unsigned char *end)
{
register unsigned char *pfrom = end;
register unsigned char *pto = end + 5;
least one character before the ^. */
static boolean
-at_begline_loc_p (pattern, p, syntax)
- re_char *pattern, *p;
- reg_syntax_t syntax;
+at_begline_loc_p (const re_char *pattern, const re_char *p, reg_syntax_t syntax)
{
re_char *prev = p - 2;
boolean prev_prev_backslash = prev > pattern && prev[-1] == '\\';
at least one character after the $, i.e., `P < PEND'. */
static boolean
-at_endline_loc_p (p, pend, syntax)
- re_char *p, *pend;
- reg_syntax_t syntax;
+at_endline_loc_p (const re_char *p, const re_char *pend, reg_syntax_t syntax)
{
re_char *next = p;
boolean next_backslash = *next == '\\';
false if it's not. */
static boolean
-group_in_compile_stack (compile_stack, regnum)
- compile_stack_type compile_stack;
- regnum_t regnum;
+group_in_compile_stack (compile_stack_type compile_stack, regnum_t regnum)
{
int this_element;
Return -1 if fastmap was not updated accurately. */
static int
-analyse_first (p, pend, fastmap, multibyte)
- re_char *p, *pend;
- char *fastmap;
- const int multibyte;
+analyse_first (const re_char *p, const re_char *pend, char *fastmap, const int multibyte)
{
int j, k;
boolean not;
the corresponding multibyte character. */
int c = RE_CHAR_TO_MULTIBYTE (p[1]);
- if (! CHAR_BYTE8_P (c))
- fastmap[CHAR_LEADING_CODE (c)] = 1;
+ fastmap[CHAR_LEADING_CODE (c)] = 1;
}
}
break;
if (/* Any leading code can possibly start a character
which doesn't match the specified set of characters. */
not
- ||
+ ||
/* If we can match a character class, we can match any
multibyte characters. */
(CHARSET_RANGE_TABLE_EXISTS_P (&p[-2])
Returns 0 if we succeed, -2 if an internal error. */
int
-re_compile_fastmap (bufp)
- struct re_pattern_buffer *bufp;
+re_compile_fastmap (struct re_pattern_buffer *bufp)
{
char *fastmap = bufp->fastmap;
int analysis;
assert (fastmap && bufp->buffer);
- bzero (fastmap, 1 << BYTEWIDTH); /* Assume nothing's valid. */
+ memset (fastmap, 0, 1 << BYTEWIDTH); /* Assume nothing's valid. */
bufp->fastmap_accurate = 1; /* It will be when we're done. */
analysis = analyse_first (bufp->buffer, bufp->buffer + bufp->used,
freeing the old data. */
void
-re_set_registers (bufp, regs, num_regs, starts, ends)
- struct re_pattern_buffer *bufp;
- struct re_registers *regs;
- unsigned num_regs;
- regoff_t *starts, *ends;
+re_set_registers (struct re_pattern_buffer *bufp, struct re_registers *regs, unsigned int num_regs, regoff_t *starts, regoff_t *ends)
{
if (num_regs)
{
doesn't let you say where to stop matching. */
int
-re_search (bufp, string, size, startpos, range, regs)
- struct re_pattern_buffer *bufp;
- const char *string;
- int size, startpos, range;
- struct re_registers *regs;
+re_search (struct re_pattern_buffer *bufp, const char *string, int size, int startpos, int range, struct re_registers *regs)
{
return re_search_2 (bufp, NULL, 0, string, size, startpos, range,
regs, size);
stack overflow). */
int
-re_search_2 (bufp, str1, size1, str2, size2, startpos, range, regs, stop)
- struct re_pattern_buffer *bufp;
- const char *str1, *str2;
- int size1, size2;
- int startpos;
- int range;
- struct re_registers *regs;
- int stop;
+re_search_2 (struct re_pattern_buffer *bufp, const char *str1, int size1, const char *str2, int size2, int startpos, int range, struct re_registers *regs, int stop)
{
int val;
re_char *string1 = (re_char*) str1;
anchored_start = (bufp->buffer[0] == begline);
#ifdef emacs
- gl_state.object = re_match_object;
+ gl_state.object = re_match_object; /* Used by SYNTAX_TABLE_BYTE_TO_CHAR. */
{
int charpos = SYNTAX_TABLE_BYTE_TO_CHAR (POS_AS_IN_BUFFER (startpos));
{
int buf_charlen;
- buf_ch = STRING_CHAR_AND_LENGTH (d, range - lim,
- buf_charlen);
+ buf_ch = STRING_CHAR_AND_LENGTH (d, buf_charlen);
buf_ch = RE_TRANSLATE (translate, buf_ch);
if (fastmap[CHAR_LEADING_CODE (buf_ch)])
break;
{
int buf_charlen;
- buf_ch = STRING_CHAR_AND_LENGTH (d, range - lim,
- buf_charlen);
+ buf_ch = STRING_CHAR_AND_LENGTH (d, buf_charlen);
if (fastmap[CHAR_LEADING_CODE (buf_ch)])
break;
range -= buf_charlen;
}
else /* Searching backwards. */
{
- int room = (startpos >= size1
- ? size2 + size1 - startpos
- : size1 - startpos);
if (multibyte)
{
- buf_ch = STRING_CHAR (d, room);
+ buf_ch = STRING_CHAR (d);
buf_ch = TRANSLATE (buf_ch);
if (! fastmap[CHAR_LEADING_CODE (buf_ch)])
goto advance;
{
re_char *p = POS_ADDR_VSTRING (startpos);
re_char *pend = STOP_ADDR_VSTRING (startpos);
- int len = MULTIBYTE_FORM_LENGTH (p, pend - p);
+ int len = BYTES_BY_CHAR_HEAD (*p);
range -= len;
if (range < 0)
/* If the operation is a match against one or more chars,
return a pointer to the next operation, else return NULL. */
static re_char *
-skip_one_char (p)
- re_char *p;
+skip_one_char (const re_char *p)
{
switch (SWITCH_ENUM_CAST (*p++))
{
/* Jump over non-matching operations. */
static re_char *
-skip_noops (p, pend)
- re_char *p, *pend;
+skip_noops (const re_char *p, const re_char *pend)
{
int mcnt;
while (p < pend)
/* Non-zero if "p1 matches something" implies "p2 fails". */
static int
-mutually_exclusive_p (bufp, p1, p2)
- struct re_pattern_buffer *bufp;
- re_char *p1, *p2;
+mutually_exclusive_p (struct re_pattern_buffer *bufp, const re_char *p1, const re_char *p2)
{
re_opcode_t op2;
const boolean multibyte = RE_MULTIBYTE_P (bufp);
{
register re_wchar_t c
= (re_opcode_t) *p2 == endline ? '\n'
- : RE_STRING_CHAR (p2 + 2, pend - p2 - 2, multibyte);
+ : RE_STRING_CHAR (p2 + 2, multibyte);
if ((re_opcode_t) *p1 == exactn)
{
- if (c != RE_STRING_CHAR (p1 + 2, pend - p1 - 2, multibyte))
+ if (c != RE_STRING_CHAR (p1 + 2, multibyte))
{
DEBUG_PRINT3 (" '%c' != '%c' => fast loop.\n", c, p1[2]);
return 1;
matched substring. */
int
-re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop)
- struct re_pattern_buffer *bufp;
- const char *string1, *string2;
- int size1, size2;
- int pos;
- struct re_registers *regs;
- int stop;
+re_match_2 (struct re_pattern_buffer *bufp, const char *string1, int size1, const char *string2, int size2, int pos, struct re_registers *regs, int stop)
{
int result;
#ifdef emacs
int charpos;
- gl_state.object = re_match_object;
+ gl_state.object = re_match_object; /* Used by SYNTAX_TABLE_BYTE_TO_CHAR. */
charpos = SYNTAX_TABLE_BYTE_TO_CHAR (POS_AS_IN_BUFFER (pos));
SETUP_SYNTAX_TABLE_FOR_OBJECT (re_match_object, charpos, 1);
#endif
/* This is a separate function so that we can force an alloca cleanup
afterwards. */
static int
-re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
- struct re_pattern_buffer *bufp;
- re_char *string1, *string2;
- int size1, size2;
- int pos;
- struct re_registers *regs;
- int stop;
+re_match_2_internal (struct re_pattern_buffer *bufp, const re_char *string1, int size1, const re_char *string2, int size2, int pos, struct re_registers *regs, int stop)
{
/* General temporaries. */
int mcnt;
PREFETCH ();
if (multibyte)
- pat_ch = STRING_CHAR_AND_LENGTH (p, pend - p, pat_charlen);
+ pat_ch = STRING_CHAR_AND_LENGTH (p, pat_charlen);
else
{
pat_ch = RE_CHAR_TO_MULTIBYTE (*p);
pat_charlen = 1;
}
- buf_ch = STRING_CHAR_AND_LENGTH (d, dend - d, buf_charlen);
+ buf_ch = STRING_CHAR_AND_LENGTH (d, buf_charlen);
if (TRANSLATE (buf_ch) != pat_ch)
{
PREFETCH ();
if (multibyte)
{
- pat_ch = STRING_CHAR_AND_LENGTH (p, pend - p, pat_charlen);
+ pat_ch = STRING_CHAR_AND_LENGTH (p, pat_charlen);
pat_ch = RE_CHAR_TO_UNIBYTE (pat_ch);
}
else
DEBUG_PRINT1 ("EXECUTING anychar.\n");
PREFETCH ();
- buf_ch = RE_STRING_CHAR_AND_LENGTH (d, dend - d, buf_charlen,
+ buf_ch = RE_STRING_CHAR_AND_LENGTH (d, buf_charlen,
target_multibyte);
buf_ch = TRANSLATE (buf_ch);
}
PREFETCH ();
- c = RE_STRING_CHAR_AND_LENGTH (d, dend - d, len, target_multibyte);
+ c = RE_STRING_CHAR_AND_LENGTH (d, len, target_multibyte);
if (target_multibyte)
{
int c1;
UPDATE_SYNTAX_TABLE (charpos);
#endif
PREFETCH ();
- c2 = RE_STRING_CHAR (d, dend - d, target_multibyte);
+ c2 = RE_STRING_CHAR (d, target_multibyte);
s2 = SYNTAX (c2);
/* Case 2: S2 is neither Sword nor Ssymbol. */
if (!AT_STRINGS_END (d))
{
PREFETCH_NOLIMIT ();
- c2 = RE_STRING_CHAR (d, dend - d, target_multibyte);
+ c2 = RE_STRING_CHAR (d, target_multibyte);
#ifdef emacs
UPDATE_SYNTAX_TABLE_FORWARD (charpos + 1);
#endif
bytes; nonzero otherwise. */
static int
-bcmp_translate (s1, s2, len, translate, target_multibyte)
- re_char *s1, *s2;
- register int len;
- RE_TRANSLATE_TYPE translate;
- const int target_multibyte;
+bcmp_translate (const re_char *s1, const re_char *s2, register int len,
+ RE_TRANSLATE_TYPE translate, const int target_multibyte)
{
register re_char *p1 = s1, *p2 = s2;
re_char *p1_end = s1 + len;
We call regex_compile to do the actual compilation. */
const char *
-re_compile_pattern (pattern, length, bufp)
- const char *pattern;
- size_t length;
- struct re_pattern_buffer *bufp;
+re_compile_pattern (const char *pattern, size_t length, struct re_pattern_buffer *bufp)
{
reg_errcode_t ret;
-#ifdef emacs
- gl_state.current_syntax_table = current_buffer->syntax_table;
-#endif
-
/* GNU code is written to assume at least RE_NREGS registers will be set
(and at least one extra will be -1). */
bufp->regs_allocated = REGS_UNALLOCATED;