HCoop
/
bpt
/
emacs.git
/ blobdiff
commit
grep
author
committer
pickaxe
?
search:
re
summary
|
shortlog
|
log
|
commit
|
commitdiff
|
tree
raw
|
inline
| side by side
remove sigio blocking
[bpt/emacs.git]
/
src
/
regex.c
diff --git
a/src/regex.c
b/src/regex.c
index
79fb28b
..
ac71b79
100644
(file)
--- a/
src/regex.c
+++ b/
src/regex.c
@@
-2,7
+2,7
@@
0.12. (Implements POSIX draft P1003.2/D11.2, except for some of the
internationalization features.)
0.12. (Implements POSIX draft P1003.2/D11.2, except for some of the
internationalization features.)
- Copyright (C) 1993-201
3
Free Software Foundation, Inc.
+ Copyright (C) 1993-201
4
Free Software Foundation, Inc.
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@@
-43,7
+43,7
@@
# endif
#endif
# endif
#endif
-#if 4 < __GNUC__ + (
5
<= __GNUC_MINOR__) && ! defined __clang__
+#if 4 < __GNUC__ + (
6
<= __GNUC_MINOR__) && ! defined __clang__
# pragma GCC diagnostic ignored "-Wunused-but-set-variable"
#endif
# pragma GCC diagnostic ignored "-Wunused-but-set-variable"
#endif
@@
-131,12
+131,12
@@
# include "character.h"
# include "buffer.h"
# include "character.h"
# include "buffer.h"
-/* Make syntax table lookup grant data in gl_state. */
-# define SYNTAX_ENTRY_VIA_PROPERTY
-
# include "syntax.h"
# include "category.h"
# include "syntax.h"
# include "category.h"
+/* Make syntax table lookup grant data in gl_state. */
+# define SYNTAX(c) syntax_property (c, 1)
+
# ifdef malloc
# undef malloc
# endif
# ifdef malloc
# undef malloc
# endif
@@
-257,15
+257,10
@@
xrealloc (void *block, size_t size)
enum syntaxcode { Swhitespace = 0, Sword = 1, Ssymbol = 2 };
/* Dummy macros for non-Emacs environments. */
enum syntaxcode { Swhitespace = 0, Sword = 1, Ssymbol = 2 };
/* Dummy macros for non-Emacs environments. */
-# define CHAR_CHARSET(c) 0
-# define CHARSET_LEADING_CODE_BASE(c) 0
# define MAX_MULTIBYTE_LENGTH 1
# define RE_MULTIBYTE_P(x) 0
# define RE_TARGET_MULTIBYTE_P(x) 0
# define WORD_BOUNDARY_P(c1, c2) (0)
# define MAX_MULTIBYTE_LENGTH 1
# define RE_MULTIBYTE_P(x) 0
# define RE_TARGET_MULTIBYTE_P(x) 0
# define WORD_BOUNDARY_P(c1, c2) (0)
-# define CHAR_HEAD_P(p) (1)
-# define SINGLE_BYTE_CHAR_P(c) (1)
-# define SAME_CHARSET_P(c1, c2) (1)
# define BYTES_BY_CHAR_HEAD(p) (1)
# define PREV_CHAR_BOUNDARY(p, limit) ((p)--)
# define STRING_CHAR(p) (*(p))
# define BYTES_BY_CHAR_HEAD(p) (1)
# define PREV_CHAR_BOUNDARY(p, limit) ((p)--)
# define STRING_CHAR(p) (*(p))
@@
-279,8
+274,6
@@
enum syntaxcode { Swhitespace = 0, Sword = 1, Ssymbol = 2 };
(c = ((p) == (str2) ? *((end1) - 1) : *((p) - 1)))
# define GET_CHAR_AFTER(c, p, len) \
(c = *p, len = 1)
(c = ((p) == (str2) ? *((end1) - 1) : *((p) - 1)))
# define GET_CHAR_AFTER(c, p, len) \
(c = *p, len = 1)
-# define MAKE_CHAR(charset, c1, c2) (c1)
-# define BYTE8_TO_CHAR(c) (c)
# define CHAR_BYTE8_P(c) (0)
# define CHAR_LEADING_CODE(c) (c)
# define CHAR_BYTE8_P(c) (0)
# define CHAR_LEADING_CODE(c) (c)
@@
-468,7
+461,7
@@
init_syntax_once (void)
/* Assumes a `char *destination' variable. */
# define REGEX_REALLOCATE(source, osize, nsize) \
/* Assumes a `char *destination' variable. */
# define REGEX_REALLOCATE(source, osize, nsize) \
- (destination =
(char *) alloca (nsize),
\
+ (destination =
alloca (nsize),
\
memcpy (destination, source, osize))
/* No need to do anything to free, after alloca. */
memcpy (destination, source, osize))
/* No need to do anything to free, after alloca. */
@@
-531,8
+524,10
@@
init_syntax_once (void)
/* Type of source-pattern and string chars. */
#ifdef _MSC_VER
typedef unsigned char re_char;
/* Type of source-pattern and string chars. */
#ifdef _MSC_VER
typedef unsigned char re_char;
+typedef const re_char const_re_char;
#else
typedef const unsigned char re_char;
#else
typedef const unsigned char re_char;
+typedef re_char const_re_char;
#endif
typedef char boolean;
#endif
typedef char boolean;
@@
-718,7
+713,8
@@
typedef enum
static int
extract_number (re_char *source)
{
static int
extract_number (re_char *source)
{
- return (SIGN_EXTEND_CHAR (source[1]) << 8) + source[0];
+ unsigned leading_byte = SIGN_EXTEND_CHAR (source[1]);
+ return (leading_byte << 8) + source[0];
}
/* Same as EXTRACT_NUMBER, except increment SOURCE to after the number.
}
/* Same as EXTRACT_NUMBER, except increment SOURCE to after the number.
@@
-773,10
+769,12
@@
extract_number_and_incr (re_char **source)
and the 2 bytes of flags at the start of the range table. */
#define CHARSET_RANGE_TABLE(p) (&(p)[4 + CHARSET_BITMAP_SIZE (p)])
and the 2 bytes of flags at the start of the range table. */
#define CHARSET_RANGE_TABLE(p) (&(p)[4 + CHARSET_BITMAP_SIZE (p)])
+#ifdef emacs
/* Extract the bit flags that start a range table. */
#define CHARSET_RANGE_TABLE_BITS(p) \
((p)[2 + CHARSET_BITMAP_SIZE (p)] \
+ (p)[3 + CHARSET_BITMAP_SIZE (p)] * 0x100)
/* Extract the bit flags that start a range table. */
#define CHARSET_RANGE_TABLE_BITS(p) \
((p)[2 + CHARSET_BITMAP_SIZE (p)] \
+ (p)[3 + CHARSET_BITMAP_SIZE (p)] * 0x100)
+#endif
/* Return the address of end of RANGE_TABLE. COUNT is number of
ranges (which is a pair of (start, end)) in the RANGE_TABLE. `* 2'
/* Return the address of end of RANGE_TABLE. COUNT is number of
ranges (which is a pair of (start, end)) in the RANGE_TABLE. `* 2'
@@
-1192,12
+1190,7
@@
print_double_string (re_char *where, re_char *string1, ssize_t size1,
# define assert(e)
# define DEBUG_STATEMENT(e)
# define assert(e)
# define DEBUG_STATEMENT(e)
-# if __STDC_VERSION__ < 199901L
-# define DEBUG_COMPILES_ARGUMENTS
-# define DEBUG_PRINT /* 'DEBUG_PRINT (x, y)' discards X and Y. */ (void)
-# else
-# define DEBUG_PRINT(...)
-# endif
+# define DEBUG_PRINT(...)
# define DEBUG_PRINT_COMPILED_PATTERN(p, s, e)
# define DEBUG_PRINT_DOUBLE_STRING(w, s1, sz1, s2, sz2)
# define DEBUG_PRINT_COMPILED_PATTERN(p, s, e)
# define DEBUG_PRINT_DOUBLE_STRING(w, s1, sz1, s2, sz2)
@@
-1236,12
+1229,12
@@
re_set_syntax (reg_syntax_t syntax)
WEAK_ALIAS (__re_set_syntax, re_set_syntax)
/* Regexp to use to replace spaces, or NULL meaning don't. */
WEAK_ALIAS (__re_set_syntax, re_set_syntax)
/* Regexp to use to replace spaces, or NULL meaning don't. */
-static re_char *whitespace_regexp;
+static
const_
re_char *whitespace_regexp;
void
re_set_whitespace_regexp (const char *regexp)
{
void
re_set_whitespace_regexp (const char *regexp)
{
- whitespace_regexp = (re_char *) regexp;
+ whitespace_regexp = (
const_
re_char *) regexp;
}
WEAK_ALIAS (__re_set_syntax, re_set_syntax)
\f
}
WEAK_ALIAS (__re_set_syntax, re_set_syntax)
\f
@@
-1828,6
+1821,8
@@
struct range_table_work_area
int bits; /* flag to record character classes */
};
int bits; /* flag to record character classes */
};
+#ifdef emacs
+
/* Make sure that WORK_AREA can hold more N multibyte characters.
This is used only in set_image_of_range and set_image_of_range_1.
It expects WORK_AREA to be a pointer.
/* Make sure that WORK_AREA can hold more N multibyte characters.
This is used only in set_image_of_range and set_image_of_range_1.
It expects WORK_AREA to be a pointer.
@@
-1846,15
+1841,6
@@
struct range_table_work_area
#define SET_RANGE_TABLE_WORK_AREA_BIT(work_area, bit) \
(work_area).bits |= (bit)
#define SET_RANGE_TABLE_WORK_AREA_BIT(work_area, bit) \
(work_area).bits |= (bit)
-/* Bits used to implement the multibyte-part of the various character classes
- such as [:alnum:] in a charset's range table. */
-#define BIT_WORD 0x1
-#define BIT_LOWER 0x2
-#define BIT_PUNCT 0x4
-#define BIT_SPACE 0x8
-#define BIT_UPPER 0x10
-#define BIT_MULTIBYTE 0x20
-
/* Set a range (RANGE_START, RANGE_END) to WORK_AREA. */
#define SET_RANGE_TABLE_WORK_AREA(work_area, range_start, range_end) \
do { \
/* Set a range (RANGE_START, RANGE_END) to WORK_AREA. */
#define SET_RANGE_TABLE_WORK_AREA(work_area, range_start, range_end) \
do { \
@@
-1863,6
+1849,8
@@
struct range_table_work_area
(work_area).table[(work_area).used++] = (range_end); \
} while (0)
(work_area).table[(work_area).used++] = (range_end); \
} while (0)
+#endif /* emacs */
+
/* Free allocated memory for WORK_AREA. */
#define FREE_RANGE_TABLE_WORK_AREA(work_area) \
do { \
/* Free allocated memory for WORK_AREA. */
#define FREE_RANGE_TABLE_WORK_AREA(work_area) \
do { \
@@
-1874,6
+1862,15
@@
struct range_table_work_area
#define RANGE_TABLE_WORK_USED(work_area) ((work_area).used)
#define RANGE_TABLE_WORK_BITS(work_area) ((work_area).bits)
#define RANGE_TABLE_WORK_ELT(work_area, i) ((work_area).table[i])
#define RANGE_TABLE_WORK_USED(work_area) ((work_area).used)
#define RANGE_TABLE_WORK_BITS(work_area) ((work_area).bits)
#define RANGE_TABLE_WORK_ELT(work_area, i) ((work_area).table[i])
+
+/* Bits used to implement the multibyte-part of the various character classes
+ such as [:alnum:] in a charset's range table. */
+#define BIT_WORD 0x1
+#define BIT_LOWER 0x2
+#define BIT_PUNCT 0x4
+#define BIT_SPACE 0x8
+#define BIT_UPPER 0x10
+#define BIT_MULTIBYTE 0x20
\f
/* Set the bit for character C in a list. */
\f
/* Set the bit for character C in a list. */
@@
-1988,7
+1985,7
@@
struct range_table_work_area
#endif /* emacs */
/* Get the next unsigned number in the uncompiled pattern. */
#endif /* emacs */
/* Get the next unsigned number in the uncompiled pattern. */
-#define GET_
UNSIGNED_NUMBER(num)
\
+#define GET_
INTERVAL_COUNT(num)
\
do { \
if (p == pend) \
FREE_STACK_RETURN (REG_EBRACE); \
do { \
if (p == pend) \
FREE_STACK_RETURN (REG_EBRACE); \
@@
-1997,13
+1994,11
@@
struct range_table_work_area
PATFETCH (c); \
while ('0' <= c && c <= '9') \
{ \
PATFETCH (c); \
while ('0' <= c && c <= '9') \
{ \
- int prev; \
if (num < 0) \
num = 0; \
if (num < 0) \
num = 0; \
- prev = num; \
- num = num * 10 + c - '0'; \
- if (num / 10 != prev) \
+ if (RE_DUP_MAX / 10 - (RE_DUP_MAX % 10 < c - '0') < num) \
FREE_STACK_RETURN (REG_BADBR); \
FREE_STACK_RETURN (REG_BADBR); \
+ num = num * 10 + c - '0'; \
if (p == pend) \
FREE_STACK_RETURN (REG_EBRACE); \
PATFETCH (c); \
if (p == pend) \
FREE_STACK_RETURN (REG_EBRACE); \
PATFETCH (c); \
@@
-2015,7
+2010,7
@@
struct range_table_work_area
/* Map a string to the char class it names (if any). */
re_wctype_t
/* Map a string to the char class it names (if any). */
re_wctype_t
-re_wctype (const
re_char *str)
+re_wctype (const
_
re_char *str)
{
const char *string = (const char *) str;
if (STREQ (string, "alnum")) return RECC_ALNUM;
{
const char *string = (const char *) str;
if (STREQ (string, "alnum")) return RECC_ALNUM;
@@
-2409,7
+2404,8
@@
do { \
} while (0)
static reg_errcode_t
} while (0)
static reg_errcode_t
-regex_compile (const re_char *pattern, size_t size, reg_syntax_t syntax, struct re_pattern_buffer *bufp)
+regex_compile (const_re_char *pattern, size_t size, reg_syntax_t syntax,
+ struct re_pattern_buffer *bufp)
{
/* We fetch characters from PATTERN here. */
register re_wchar_t c, c1;
{
/* We fetch characters from PATTERN here. */
register re_wchar_t c, c1;
@@
-3308,16
+3304,16
@@
regex_compile (const re_char *pattern, size_t size, reg_syntax_t syntax, struct
beg_interval = p;
beg_interval = p;
- GET_
UNSIGNED_NUMBER
(lower_bound);
+ GET_
INTERVAL_COUNT
(lower_bound);
if (c == ',')
if (c == ',')
- GET_
UNSIGNED_NUMBER
(upper_bound);
+ GET_
INTERVAL_COUNT
(upper_bound);
else
/* Interval such as `{1}' => match exactly once. */
upper_bound = lower_bound;
else
/* Interval such as `{1}' => match exactly once. */
upper_bound = lower_bound;
- if (lower_bound < 0
|| upper_bound > RE_DUP_MAX
- || (
upper_bound >= 0 && lower_bound > upp
er_bound))
+ if (lower_bound < 0
+ || (
0 <= upper_bound && upper_bound < low
er_bound))
FREE_STACK_RETURN (REG_BADBR);
if (!(syntax & RE_NO_BK_BRACES))
FREE_STACK_RETURN (REG_BADBR);
if (!(syntax & RE_NO_BK_BRACES))
@@
-3765,7
+3761,7
@@
insert_op2 (re_opcode_t op, unsigned char *loc, int arg1, int arg2, unsigned cha
least one character before the ^. */
static boolean
least one character before the ^. */
static boolean
-at_begline_loc_p (const
re_char *pattern, const
re_char *p, reg_syntax_t syntax)
+at_begline_loc_p (const
_re_char *pattern, const_
re_char *p, reg_syntax_t syntax)
{
re_char *prev = p - 2;
boolean odd_backslashes;
{
re_char *prev = p - 2;
boolean odd_backslashes;
@@
-3806,7
+3802,7
@@
at_begline_loc_p (const re_char *pattern, const re_char *p, reg_syntax_t syntax)
at least one character after the $, i.e., `P < PEND'. */
static boolean
at least one character after the $, i.e., `P < PEND'. */
static boolean
-at_endline_loc_p (const
re_char *p, const
re_char *pend, reg_syntax_t syntax)
+at_endline_loc_p (const
_re_char *p, const_
re_char *pend, reg_syntax_t syntax)
{
re_char *next = p;
boolean next_backslash = *next == '\\';
{
re_char *next = p;
boolean next_backslash = *next == '\\';
@@
-3850,7
+3846,8
@@
group_in_compile_stack (compile_stack_type compile_stack, regnum_t regnum)
Return -1 if fastmap was not updated accurately. */
static int
Return -1 if fastmap was not updated accurately. */
static int
-analyse_first (const re_char *p, const re_char *pend, char *fastmap, const int multibyte)
+analyse_first (const_re_char *p, const_re_char *pend, char *fastmap,
+ const int multibyte)
{
int j, k;
boolean not;
{
int j, k;
boolean not;
@@
-4204,7
+4201,7
@@
re_set_registers (struct re_pattern_buffer *bufp, struct re_registers *regs, uns
{
bufp->regs_allocated = REGS_UNALLOCATED;
regs->num_regs = 0;
{
bufp->regs_allocated = REGS_UNALLOCATED;
regs->num_regs = 0;
- regs->start = regs->end =
(regoff_t *)
0;
+ regs->start = regs->end = 0;
}
}
WEAK_ALIAS (__re_set_registers, re_set_registers)
}
}
WEAK_ALIAS (__re_set_registers, re_set_registers)
@@
-4594,7
+4591,7
@@
static int bcmp_translate (re_char *s1, re_char *s2,
/* If the operation is a match against one or more chars,
return a pointer to the next operation, else return NULL. */
static re_char *
/* If the operation is a match against one or more chars,
return a pointer to the next operation, else return NULL. */
static re_char *
-skip_one_char (const
re_char *p)
+skip_one_char (const
_
re_char *p)
{
switch (*p++)
{
{
switch (*p++)
{
@@
-4636,7
+4633,7
@@
skip_one_char (const re_char *p)
/* Jump over non-matching operations. */
static re_char *
/* Jump over non-matching operations. */
static re_char *
-skip_noops (const
re_char *p, const
re_char *pend)
+skip_noops (const
_re_char *p, const_
re_char *pend)
{
int mcnt;
while (p < pend)
{
int mcnt;
while (p < pend)
@@
-4663,7
+4660,8
@@
skip_noops (const re_char *p, const re_char *pend)
/* Non-zero if "p1 matches something" implies "p2 fails". */
static int
/* Non-zero if "p1 matches something" implies "p2 fails". */
static int
-mutually_exclusive_p (struct re_pattern_buffer *bufp, const re_char *p1, const re_char *p2)
+mutually_exclusive_p (struct re_pattern_buffer *bufp, const_re_char *p1,
+ const_re_char *p2)
{
re_opcode_t op2;
const boolean multibyte = RE_MULTIBYTE_P (bufp);
{
re_opcode_t op2;
const boolean multibyte = RE_MULTIBYTE_P (bufp);
@@
-4922,8
+4920,8
@@
WEAK_ALIAS (__re_match_2, re_match_2)
/* This is a separate function so that we can force an alloca cleanup
afterwards. */
static regoff_t
/* This is a separate function so that we can force an alloca cleanup
afterwards. */
static regoff_t
-re_match_2_internal (struct re_pattern_buffer *bufp, const
re_char *string1,
- size_t size1, const
re_char *string2, size_t size2,
+re_match_2_internal (struct re_pattern_buffer *bufp, const
_
re_char *string1,
+ size_t size1, const
_
re_char *string2, size_t size2,
ssize_t pos, struct re_registers *regs, ssize_t stop)
{
/* General temporaries. */
ssize_t pos, struct re_registers *regs, ssize_t stop)
{
/* General temporaries. */
@@
-5903,7
+5901,7
@@
re_match_2_internal (struct re_pattern_buffer *bufp, const re_char *string1,
EXTRACT_NUMBER_AND_INCR (mcnt, p);
/* Here, we discard `const', making re_match non-reentrant. */
p2 = (unsigned char*) p + mcnt;
EXTRACT_NUMBER_AND_INCR (mcnt, p);
/* Here, we discard `const', making re_match non-reentrant. */
p2 = (unsigned char*) p + mcnt;
- /* Signedness doesn't matter since we only copy MCNT's bits
. */
+ /* Signedness doesn't matter since we only copy MCNT's bits. */
EXTRACT_NUMBER_AND_INCR (mcnt, p);
DEBUG_PRINT (" Setting %p to %d.\n", p2, mcnt);
PUSH_NUMBER (p2, mcnt);
EXTRACT_NUMBER_AND_INCR (mcnt, p);
DEBUG_PRINT (" Setting %p to %d.\n", p2, mcnt);
PUSH_NUMBER (p2, mcnt);
@@
-6265,7
+6263,7
@@
re_match_2_internal (struct re_pattern_buffer *bufp, const re_char *string1,
bytes; nonzero otherwise. */
static int
bytes; nonzero otherwise. */
static int
-bcmp_translate (const
re_char *s1, const
re_char *s2, register ssize_t len,
+bcmp_translate (const
_re_char *s1, const_
re_char *s2, register ssize_t len,
RE_TRANSLATE_TYPE translate, const int target_multibyte)
{
register re_char *p1 = s1, *p2 = s2;
RE_TRANSLATE_TYPE translate, const int target_multibyte)
{
register re_char *p1 = s1, *p2 = s2;
@@
-6390,8
+6388,7
@@
weak_function
re_exec (const char *s)
{
const size_t len = strlen (s);
re_exec (const char *s)
{
const size_t len = strlen (s);
- return (re_search (&re_comp_buf, s, len, 0, len, (struct re_registers *) 0)
- >= 0);
+ return re_search (&re_comp_buf, s, len, 0, len, 0) >= 0;
}
#endif /* _REGEX_RE_COMP */
\f
}
#endif /* _REGEX_RE_COMP */
\f
@@
-6434,7
+6431,7
@@
re_exec (const char *s)
the return codes and their meanings.) */
reg_errcode_t
the return codes and their meanings.) */
reg_errcode_t
-regcomp (regex_t *_
_restrict preg, const char *__restrict
pattern,
+regcomp (regex_t *_
Restrict_ preg, const char *_Restrict_
pattern,
int cflags)
{
reg_errcode_t ret;
int cflags)
{
reg_errcode_t ret;
@@
-6515,8
+6512,8
@@
WEAK_ALIAS (__regcomp, regcomp)
We return 0 if we find a match and REG_NOMATCH if not. */
reg_errcode_t
We return 0 if we find a match and REG_NOMATCH if not. */
reg_errcode_t
-regexec (const regex_t *_
_restrict preg, const char *__restrict
string,
- size_t nmatch, regmatch_t pmatch[_
_restrict_arr
], int eflags)
+regexec (const regex_t *_
Restrict_ preg, const char *_Restrict_
string,
+ size_t nmatch, regmatch_t pmatch[_
Restrict_arr_
], int eflags)
{
regoff_t ret;
struct re_registers regs;
{
regoff_t ret;
struct re_registers regs;
@@
-6555,7
+6552,7
@@
regexec (const regex_t *__restrict preg, const char *__restrict string,
/* Perform the searching operation. */
ret = re_search (&private_preg, string, len,
/* start: */ 0, /* range: */ len,
/* Perform the searching operation. */
ret = re_search (&private_preg, string, len,
/* start: */ 0, /* range: */ len,
- want_reg_info ? ®s :
(struct re_registers *)
0);
+ want_reg_info ? ®s : 0);
/* Copy the register information to the POSIX structure. */
if (want_reg_info)
/* Copy the register information to the POSIX structure. */
if (want_reg_info)