0.12. (Implements POSIX draft P1003.2/D11.2, except for some of the
internationalization features.)
- Copyright (C) 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
- 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011
- Free Software Foundation, Inc.
+ Copyright (C) 1993-2012 Free Software Foundation, Inc.
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
#pragma alloca
#endif
+/* Ignore some GCC warnings for now. This section should go away
+ once the Emacs and Gnulib regex code is merged. */
+#if (__GNUC__ == 4 && 3 <= __GNUC_MINOR__) || 4 < __GNUC__
+# pragma GCC diagnostic ignored "-Wstrict-overflow"
+# ifndef emacs
+# pragma GCC diagnostic ignored "-Wunused-but-set-variable"
+# pragma GCC diagnostic ignored "-Wunused-function"
+# pragma GCC diagnostic ignored "-Wunused-macros"
+# pragma GCC diagnostic ignored "-Wunused-result"
+# pragma GCC diagnostic ignored "-Wunused-variable"
+# endif
+#endif
+
#ifdef HAVE_CONFIG_H
# include <config.h>
#endif
-#if defined STDC_HEADERS && !defined emacs
-# include <stddef.h>
-#else
+#include <stddef.h>
+
+#ifdef emacs
/* We need this for `regex.h', and perhaps for the Emacs include files. */
# include <sys/types.h>
#endif
(HAVE_WCTYPE_H && HAVE_WCHAR_H && HAVE_BTOWC && !emacs)
#endif
-/* For platform which support the ISO C amendement 1 functionality we
+/* For platform which support the ISO C amendment 1 functionality we
support user defined character classes. */
#if WIDE_CHAR_SUPPORT
/* Solaris 2.5 has a bug: <wchar.h> must be included before <wctype.h>. */
# define regexec(pr, st, nm, pm, ef) __regexec (pr, st, nm, pm, ef)
# define regcomp(preg, pattern, cflags) __regcomp (preg, pattern, cflags)
# define regerror(err_code, preg, errbuf, errbuf_size) \
- __regerror(err_code, preg, errbuf, errbuf_size)
+ __regerror (err_code, preg, errbuf, errbuf_size)
# define re_set_registers(bu, re, nu, st, en) \
__re_set_registers (bu, re, nu, st, en)
# define re_match_2(bufp, string1, size1, string2, size2, pos, regs, stop) \
# include <setjmp.h>
# include "lisp.h"
+# include "character.h"
# include "buffer.h"
/* Make syntax table lookup grant data in gl_state. */
# define SYNTAX_ENTRY_VIA_PROPERTY
# include "syntax.h"
-# include "character.h"
# include "category.h"
# ifdef malloc
/* When used in Emacs's lib-src, we need xmalloc and xrealloc. */
-void *
+static void *
xmalloc (size_t size)
{
register void *val;
return val;
}
-void *
+static void *
xrealloc (void *block, size_t size)
{
register void *val;
# endif
# define realloc xrealloc
-/* This is the normal way of making sure we have memcpy, memcmp and memset. */
-# if defined HAVE_STRING_H || defined STDC_HEADERS || defined _LIBC
-# include <string.h>
-# else
-# include <strings.h>
-# ifndef memcmp
-# define memcmp(s1, s2, n) bcmp (s1, s2, n)
-# endif
-# ifndef memcpy
-# define memcpy(d, s, n) (bcopy (s, d, n), (d))
-# endif
-# endif
+# include <string.h>
/* Define the syntax stuff for \<, \>, etc. */
|| ((c) >= 'A' && (c) <= 'Z')) \
: SYNTAX (c) == Sword)
-# define ISLOWER(c) (LOWERCASEP (c))
+# define ISLOWER(c) lowercasep (c)
# define ISPUNCT(c) (IS_REAL_ASCII (c) \
? ((c) > ' ' && (c) < 0177 \
# define ISSPACE(c) (SYNTAX (c) == Swhitespace)
-# define ISUPPER(c) (UPPERCASEP (c))
+# define ISUPPER(c) uppercasep (c)
# define ISWORD(c) (SYNTAX (c) == Sword)
#else /* not emacs */
-/* Jim Meyering writes:
-
- "... Some ctype macros are valid only for character codes that
- isascii says are ASCII (SGI's IRIX-4.0.5 is one such system --when
- using /bin/cc or gcc but without giving an ansi option). So, all
- ctype uses should be through macros like ISPRINT... If
- STDC_HEADERS is defined, then autoconf has verified that the ctype
- macros don't need to be guarded with references to isascii. ...
- Defining isascii to 1 should let any compiler worth its salt
- eliminate the && through constant folding."
- Solaris defines some of these symbols so we must undefine them first. */
-
-# undef ISASCII
-# if defined STDC_HEADERS || (!defined isascii && !defined HAVE_ISASCII)
-# define ISASCII(c) 1
-# else
-# define ISASCII(c) isascii(c)
-# endif
-
/* 1 if C is an ASCII character. */
# define IS_REAL_ASCII(c) ((c) < 0200)
# define ISUNIBYTE(c) 1
# ifdef isblank
-# define ISBLANK(c) (ISASCII (c) && isblank (c))
+# define ISBLANK(c) isblank (c)
# else
# define ISBLANK(c) ((c) == ' ' || (c) == '\t')
# endif
# ifdef isgraph
-# define ISGRAPH(c) (ISASCII (c) && isgraph (c))
+# define ISGRAPH(c) isgraph (c)
# else
-# define ISGRAPH(c) (ISASCII (c) && isprint (c) && !isspace (c))
+# define ISGRAPH(c) (isprint (c) && !isspace (c))
# endif
+/* Solaris defines ISPRINT so we must undefine it first. */
# undef ISPRINT
-# define ISPRINT(c) (ISASCII (c) && isprint (c))
-# define ISDIGIT(c) (ISASCII (c) && isdigit (c))
-# define ISALNUM(c) (ISASCII (c) && isalnum (c))
-# define ISALPHA(c) (ISASCII (c) && isalpha (c))
-# define ISCNTRL(c) (ISASCII (c) && iscntrl (c))
-# define ISLOWER(c) (ISASCII (c) && islower (c))
-# define ISPUNCT(c) (ISASCII (c) && ispunct (c))
-# define ISSPACE(c) (ISASCII (c) && isspace (c))
-# define ISUPPER(c) (ISASCII (c) && isupper (c))
-# define ISXDIGIT(c) (ISASCII (c) && isxdigit (c))
-
-# define ISWORD(c) ISALPHA(c)
+# define ISPRINT(c) isprint (c)
+# define ISDIGIT(c) isdigit (c)
+# define ISALNUM(c) isalnum (c)
+# define ISALPHA(c) isalpha (c)
+# define ISCNTRL(c) iscntrl (c)
+# define ISLOWER(c) islower (c)
+# define ISPUNCT(c) ispunct (c)
+# define ISSPACE(c) isspace (c)
+# define ISUPPER(c) isupper (c)
+# define ISXDIGIT(c) isxdigit (c)
+
+# define ISWORD(c) ISALPHA (c)
# ifdef _tolower
-# define TOLOWER(c) _tolower(c)
+# define TOLOWER(c) _tolower (c)
# else
-# define TOLOWER(c) tolower(c)
+# define TOLOWER(c) tolower (c)
# endif
/* How many characters in the character set. */
#endif /* not emacs */
\f
-#ifndef NULL
-# define NULL (void *)0
-#endif
-
-/* We remove any previous definition of `SIGN_EXTEND_CHAR',
- since ours (we hope) works properly with all combinations of
- machines, compilers, `char' and `unsigned char' argument types.
- (Per Bothner suggested the basic approach.) */
-#undef SIGN_EXTEND_CHAR
-#if __STDC__
-# define SIGN_EXTEND_CHAR(c) ((signed char) (c))
-#else /* not __STDC__ */
-/* As in Harbison and Steele. */
-# define SIGN_EXTEND_CHAR(c) ((((unsigned char) (c)) ^ 128) - 128)
-#endif
+#define SIGN_EXTEND_CHAR(c) ((signed char) (c))
\f
/* Should we use malloc or alloca? If REGEX_MALLOC is not defined, we
use `alloca' instead of `malloc'. This is because using malloc in
/* (Re)Allocate N items of type T using malloc, or fail. */
#define TALLOC(n, t) ((t *) malloc ((n) * sizeof (t)))
#define RETALLOC(addr, n, t) ((addr) = (t *) realloc (addr, (n) * sizeof (t)))
-#define RETALLOC_IF(addr, n, t) \
- if (addr) RETALLOC((addr), (n), t); else (addr) = TALLOC ((n), t)
#define REGEX_TALLOC(n, t) ((t *) REGEX_ALLOCATE ((n) * sizeof (t)))
#define BYTEWIDTH 8 /* In bits. */
#define MIN(a, b) ((a) < (b) ? (a) : (b))
/* Type of source-pattern and string chars. */
+#ifdef _MSC_VER
+typedef unsigned char re_char;
+#else
typedef const unsigned char re_char;
+#endif
typedef char boolean;
#define false 0
#define true 1
-static int re_match_2_internal _RE_ARGS ((struct re_pattern_buffer *bufp,
- re_char *string1, int size1,
- re_char *string2, int size2,
- int pos,
- struct re_registers *regs,
- int stop));
+static regoff_t re_match_2_internal (struct re_pattern_buffer *bufp,
+ re_char *string1, size_t size1,
+ re_char *string2, size_t size2,
+ ssize_t pos,
+ struct re_registers *regs,
+ ssize_t stop);
\f
/* These are the command codes that appear in compiled regular
expressions. Some opcodes are followed by argument bytes. A
on_failure_jump_nastyloop,
/* A smart `on_failure_jump' used for greedy * and + operators.
- It analyses the loop before which it is put and if the
+ It analyzes the loop before which it is put and if the
loop does not require backtracking, it changes itself to
`on_failure_keep_string_jump' and short-circuits the loop,
else it just defaults to changing itself into `on_failure_jump'.
} while (0)
#ifdef DEBUG
-static void extract_number _RE_ARGS ((int *dest, re_char *source));
static void
-extract_number (dest, source)
- int *dest;
- re_char *source;
+extract_number (int *dest, re_char *source)
{
int temp = SIGN_EXTEND_CHAR (*(source + 1));
*dest = *source & 0377;
} while (0)
#ifdef DEBUG
-static void extract_number_and_incr _RE_ARGS ((int *destination,
- re_char **source));
static void
-extract_number_and_incr (destination, source)
- int *destination;
- re_char **source;
+extract_number_and_incr (int *destination, re_char **source)
{
extract_number (destination, *source);
*source += 2;
((p)[2 + CHARSET_BITMAP_SIZE (p)] \
+ (p)[3 + CHARSET_BITMAP_SIZE (p)] * 0x100)
-/* Test if C is listed in the bitmap of charset P. */
-#define CHARSET_LOOKUP_BITMAP(p, c) \
- ((c) < CHARSET_BITMAP_SIZE (p) * BYTEWIDTH \
- && (p)[2 + (c) / BYTEWIDTH] & (1 << ((c) % BYTEWIDTH)))
-
/* Return the address of end of RANGE_TABLE. COUNT is number of
ranges (which is a pair of (start, end)) in the RANGE_TABLE. `* 2'
is start of range and end of range. `* 3' is size of each start
do \
{ \
re_wchar_t range_start, range_end; \
- re_char *p; \
+ re_char *rtp; \
re_char *range_table_end \
= CHARSET_RANGE_TABLE_END ((range_table), (count)); \
\
- for (p = (range_table); p < range_table_end; p += 2 * 3) \
+ for (rtp = (range_table); rtp < range_table_end; rtp += 2 * 3) \
{ \
- EXTRACT_CHARACTER (range_start, p); \
- EXTRACT_CHARACTER (range_end, p + 3); \
+ EXTRACT_CHARACTER (range_start, rtp); \
+ EXTRACT_CHARACTER (range_end, rtp + 3); \
\
if (range_start <= (c) && (c) <= range_end) \
{ \
re_char *where;
re_char *string1;
re_char *string2;
- int size1;
- int size2;
+ ssize_t size1;
+ ssize_t size2;
{
- int this_char;
+ ssize_t this_char;
if (where == NULL)
printf ("(null)");
#endif /* not DEBUG */
\f
+/* Use this to suppress gcc's `...may be used before initialized' warnings. */
+#ifdef lint
+# define IF_LINT(Code) Code
+#else
+# define IF_LINT(Code) /* empty */
+#endif
+\f
/* Set by `re_set_syntax' to the current regexp syntax to recognize. Can
also be assigned to arbitrarily: each pattern buffer stores its own
syntax, so it can be changed between regex compilations. */
} fail_stack_type;
#define FAIL_STACK_EMPTY() (fail_stack.frame == 0)
-#define FAIL_STACK_FULL() (fail_stack.avail == fail_stack.size)
/* Define macros to initialize and free the failure stack.
fail_stack.avail = 0; \
fail_stack.frame = 0; \
} while (0)
-
-# define RESET_FAIL_STACK() REGEX_FREE_STACK (fail_stack.stack)
#else
# define INIT_FAIL_STACK() \
do { \
fail_stack.frame = 0; \
} while (0)
-# define RESET_FAIL_STACK() ((void)0)
+# define RETALLOC_IF(addr, n, t) \
+ if (addr) RETALLOC((addr), (n), t); else (addr) = TALLOC ((n), t)
#endif
#define PUSH_FAILURE_INT(item) \
fail_stack.stack[fail_stack.avail++].integer = (item)
-/* Push a fail_stack_elt_t value onto the failure stack.
- Assumes the variable `fail_stack'. Probably should only
- be called from within `PUSH_FAILURE_POINT'. */
-#define PUSH_FAILURE_ELT(item) \
- fail_stack.stack[fail_stack.avail++] = (item)
-
-/* These three POP... operations complement the three PUSH... operations.
+/* These POP... operations complement the PUSH... operations.
All assume that `fail_stack' is nonempty. */
#define POP_FAILURE_POINTER() fail_stack.stack[--fail_stack.avail].pointer
#define POP_FAILURE_INT() fail_stack.stack[--fail_stack.avail].integer
-#define POP_FAILURE_ELT() fail_stack.stack[--fail_stack.avail]
/* Individual items aside from the registers. */
#define NUM_NONREG_ITEMS 3
/* Pop a saved register off the stack. */
#define POP_FAILURE_REG_OR_COUNT() \
do { \
- int reg = POP_FAILURE_INT (); \
- if (reg == -1) \
+ long pfreg = POP_FAILURE_INT (); \
+ if (pfreg == -1) \
{ \
/* It's a counter. */ \
/* Here, we discard `const', making re_match non-reentrant. */ \
unsigned char *ptr = (unsigned char*) POP_FAILURE_POINTER (); \
- reg = POP_FAILURE_INT (); \
- STORE_NUMBER (ptr, reg); \
- DEBUG_PRINT3 (" Pop counter %p = %d\n", ptr, reg); \
+ pfreg = POP_FAILURE_INT (); \
+ STORE_NUMBER (ptr, pfreg); \
+ DEBUG_PRINT3 (" Pop counter %p = %d\n", ptr, pfreg); \
} \
else \
{ \
- regend[reg] = POP_FAILURE_POINTER (); \
- regstart[reg] = POP_FAILURE_POINTER (); \
+ regend[pfreg] = POP_FAILURE_POINTER (); \
+ regstart[pfreg] = POP_FAILURE_POINTER (); \
DEBUG_PRINT4 (" Pop reg %d (spanning %p -> %p)\n", \
- reg, regstart[reg], regend[reg]); \
+ pfreg, regstart[pfreg], regend[pfreg]); \
} \
} while (0)
/* Check that we are not stuck in an infinite loop. */
#define CHECK_INFINITE_LOOP(pat_cur, string_place) \
do { \
- int failure = TOP_FAILURE_HANDLE (); \
+ ssize_t failure = TOP_FAILURE_HANDLE (); \
/* Check for infinite matching loops */ \
while (failure > 0 \
&& (FAILURE_STR (failure) == string_place \
\f
/* Subroutine declarations and macros for regex_compile. */
-static reg_errcode_t regex_compile _RE_ARGS ((re_char *pattern, size_t size,
- reg_syntax_t syntax,
- struct re_pattern_buffer *bufp));
-static void store_op1 _RE_ARGS ((re_opcode_t op, unsigned char *loc, int arg));
-static void store_op2 _RE_ARGS ((re_opcode_t op, unsigned char *loc,
- int arg1, int arg2));
-static void insert_op1 _RE_ARGS ((re_opcode_t op, unsigned char *loc,
- int arg, unsigned char *end));
-static void insert_op2 _RE_ARGS ((re_opcode_t op, unsigned char *loc,
- int arg1, int arg2, unsigned char *end));
-static boolean at_begline_loc_p _RE_ARGS ((re_char *pattern,
- re_char *p,
- reg_syntax_t syntax));
-static boolean at_endline_loc_p _RE_ARGS ((re_char *p,
- re_char *pend,
- reg_syntax_t syntax));
-static re_char *skip_one_char _RE_ARGS ((re_char *p));
-static int analyse_first _RE_ARGS ((re_char *p, re_char *pend,
- char *fastmap, const int multibyte));
+static reg_errcode_t regex_compile (re_char *pattern, size_t size,
+ reg_syntax_t syntax,
+ struct re_pattern_buffer *bufp);
+static void store_op1 (re_opcode_t op, unsigned char *loc, int arg);
+static void store_op2 (re_opcode_t op, unsigned char *loc, int arg1, int arg2);
+static void insert_op1 (re_opcode_t op, unsigned char *loc,
+ int arg, unsigned char *end);
+static void insert_op2 (re_opcode_t op, unsigned char *loc,
+ int arg1, int arg2, unsigned char *end);
+static boolean at_begline_loc_p (re_char *pattern, re_char *p,
+ reg_syntax_t syntax);
+static boolean at_endline_loc_p (re_char *p, re_char *pend,
+ reg_syntax_t syntax);
+static re_char *skip_one_char (re_char *p);
+static int analyse_first (re_char *p, re_char *pend,
+ char *fastmap, const int multibyte);
/* Fetch the next character in the uncompiled pattern, with no
translation. */
} while (0)
-/* As with BUF_PUSH_2, except for three bytes. */
-#define BUF_PUSH_3(c1, c2, c3) \
- do { \
- GET_BUFFER_SPACE (3); \
- *b++ = (unsigned char) (c1); \
- *b++ = (unsigned char) (c2); \
- *b++ = (unsigned char) (c3); \
- } while (0)
-
-
/* Store a jump with opcode OP at LOC to location TO. We store a
relative address offset by the three bytes the jump itself occupies. */
#define STORE_JUMP(op, loc, to) \
typedef struct
{
compile_stack_elt_t *stack;
- unsigned size;
- unsigned avail; /* Offset of next open position. */
+ size_t size;
+ size_t avail; /* Offset of next open position. */
} compile_stack_type;
re_wctype_t
re_wctype (const re_char *str)
{
- const char *string = str;
+ const char *string = (const char *) str;
if (STREQ (string, "alnum")) return RECC_ALNUM;
else if (STREQ (string, "alpha")) return RECC_ALPHA;
else if (STREQ (string, "word")) return RECC_WORD;
{
switch (cc)
{
- case RECC_ALNUM: return ISALNUM (ch);
- case RECC_ALPHA: return ISALPHA (ch);
- case RECC_BLANK: return ISBLANK (ch);
- case RECC_CNTRL: return ISCNTRL (ch);
- case RECC_DIGIT: return ISDIGIT (ch);
- case RECC_GRAPH: return ISGRAPH (ch);
- case RECC_LOWER: return ISLOWER (ch);
- case RECC_PRINT: return ISPRINT (ch);
- case RECC_PUNCT: return ISPUNCT (ch);
- case RECC_SPACE: return ISSPACE (ch);
- case RECC_UPPER: return ISUPPER (ch);
- case RECC_XDIGIT: return ISXDIGIT (ch);
- case RECC_ASCII: return IS_REAL_ASCII (ch);
+ case RECC_ALNUM: return ISALNUM (ch) != 0;
+ case RECC_ALPHA: return ISALPHA (ch) != 0;
+ case RECC_BLANK: return ISBLANK (ch) != 0;
+ case RECC_CNTRL: return ISCNTRL (ch) != 0;
+ case RECC_DIGIT: return ISDIGIT (ch) != 0;
+ case RECC_GRAPH: return ISGRAPH (ch) != 0;
+ case RECC_LOWER: return ISLOWER (ch) != 0;
+ case RECC_PRINT: return ISPRINT (ch) != 0;
+ case RECC_PUNCT: return ISPUNCT (ch) != 0;
+ case RECC_SPACE: return ISSPACE (ch) != 0;
+ case RECC_UPPER: return ISUPPER (ch) != 0;
+ case RECC_XDIGIT: return ISXDIGIT (ch) != 0;
+ case RECC_ASCII: return IS_REAL_ASCII (ch) != 0;
case RECC_NONASCII: return !IS_REAL_ASCII (ch);
- case RECC_UNIBYTE: return ISUNIBYTE (ch);
+ case RECC_UNIBYTE: return ISUNIBYTE (ch) != 0;
case RECC_MULTIBYTE: return !ISUNIBYTE (ch);
- case RECC_WORD: return ISWORD (ch);
+ case RECC_WORD: return ISWORD (ch) != 0;
case RECC_ERROR: return false;
default:
- abort();
+ abort ();
}
}
case RECC_ASCII: case RECC_DIGIT: case RECC_XDIGIT: case RECC_CNTRL:
case RECC_BLANK: case RECC_UNIBYTE: case RECC_ERROR: return 0;
default:
- abort();
+ abort ();
}
}
#endif
Returns -1 if successful, REG_ESPACE if ran out of space. */
static int
-set_image_of_range_1 (work_area, start, end, translate)
- RE_TRANSLATE_TYPE translate;
- struct range_table_work_area *work_area;
- re_wchar_t start, end;
+set_image_of_range_1 (struct range_table_work_area *work_area,
+ re_wchar_t start, re_wchar_t end,
+ RE_TRANSLATE_TYPE translate)
{
/* `one_case' indicates a character, or a run of characters,
each of which is an isolate (no case-equivalents).
Returns -1 if successful, REG_ESPACE if ran out of space. */
static int
-set_image_of_range (work_area, start, end, translate)
- RE_TRANSLATE_TYPE translate;
- struct range_table_work_area *work_area;
- re_wchar_t start, end;
+set_image_of_range (struct range_table_work_area *work_area,
+ re_wchar_t start, re_wchar_t end,
+ RE_TRANSLATE_TYPE translate)
{
re_wchar_t cmin, cmax;
but don't make them smaller. */
static
-regex_grow_registers (num_regs)
- int num_regs;
+regex_grow_registers (int num_regs)
{
if (num_regs > regs_allocated_size)
{
#endif /* not MATCH_MAY_ALLOCATE */
\f
-static boolean group_in_compile_stack _RE_ARGS ((compile_stack_type
- compile_stack,
- regnum_t regnum));
+static boolean group_in_compile_stack (compile_stack_type compile_stack,
+ regnum_t regnum);
/* `regex_compile' compiles PATTERN (of length SIZE) according to SYNTAX.
Returns one of error codes defined in `regex.h', or zero for success.
/* We fetch characters from PATTERN here. */
register re_wchar_t c, c1;
- /* A random temporary spot in PATTERN. */
- re_char *p1;
-
/* Points to the end of the buffer, where we should append. */
register unsigned char *b;
/* If the object matched can contain multibyte characters. */
const boolean multibyte = RE_MULTIBYTE_P (bufp);
- /* If a target of matching can contain multibyte characters. */
- const boolean target_multibyte = RE_TARGET_MULTIBYTE_P (bufp);
-
/* Nonzero if we have pushed down into a subpattern. */
int in_subpattern = 0;
/* These hold the values of p, pattern, and pend from the main
pattern when we have pushed into a subpattern. */
- re_char *main_p;
- re_char *main_pattern;
- re_char *main_pend;
+ re_char *main_p IF_LINT (= NULL);
+ re_char *main_pattern IF_LINT (= NULL);
+ re_char *main_pend IF_LINT (= NULL);
#ifdef DEBUG
debug++;
main_pend = pend;
main_pattern = pattern;
p = pattern = whitespace_regexp;
- pend = p + strlen (p);
+ pend = p + strlen ((const char *) p);
break;
}
if (many_times_ok)
{
boolean simple = skip_one_char (laststart) == b;
- unsigned int startoffset = 0;
+ size_t startoffset = 0;
re_opcode_t ofj =
/* Check if the loop can match the empty string. */
(simple || !analyse_first (laststart, b, NULL, 0))
case '[':
{
+ re_char *p1;
+
CLEAR_RANGE_TABLE_WORK_USED (range_table_work);
if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
{
boolean escaped_char = false;
const unsigned char *p2 = p;
- re_wchar_t ch, c2;
+ re_wchar_t ch;
if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
them). */
if (c == ':' && *p == ']')
{
- re_wctype_t cc;
- int limit;
-
- cc = re_wctype (str);
+ re_wctype_t cc = re_wctype (str);
if (cc == 0)
FREE_STACK_RETURN (REG_ECTYPE);
_____ _____
| | | |
| v | v
- a | b | c
+ a | b | c
If we are at `b', then fixup_alt_jump right now points to a
three-byte space after `a'. We'll put in the jump, set
at_begline_loc_p (const re_char *pattern, const re_char *p, reg_syntax_t syntax)
{
re_char *prev = p - 2;
- boolean prev_prev_backslash = prev > pattern && prev[-1] == '\\';
+ boolean odd_backslashes;
- return
- /* After a subexpression? */
- (*prev == '(' && (syntax & RE_NO_BK_PARENS || prev_prev_backslash))
- /* After an alternative? */
- || (*prev == '|' && (syntax & RE_NO_BK_VBAR || prev_prev_backslash))
- /* After a shy subexpression? */
- || ((syntax & RE_SHY_GROUPS) && prev - 2 >= pattern
- && prev[-1] == '?' && prev[-2] == '('
- && (syntax & RE_NO_BK_PARENS
- || (prev - 3 >= pattern && prev[-3] == '\\')));
+ /* After a subexpression? */
+ if (*prev == '(')
+ odd_backslashes = (syntax & RE_NO_BK_PARENS) == 0;
+
+ /* After an alternative? */
+ else if (*prev == '|')
+ odd_backslashes = (syntax & RE_NO_BK_VBAR) == 0;
+
+ /* After a shy subexpression? */
+ else if (*prev == ':' && (syntax & RE_SHY_GROUPS))
+ {
+ /* Skip over optional regnum. */
+ while (prev - 1 >= pattern && prev[-1] >= '0' && prev[-1] <= '9')
+ --prev;
+
+ if (!(prev - 2 >= pattern
+ && prev[-1] == '?' && prev[-2] == '('))
+ return false;
+ prev -= 2;
+ odd_backslashes = (syntax & RE_NO_BK_PARENS) == 0;
+ }
+ else
+ return false;
+
+ /* Count the number of preceding backslashes. */
+ p = prev;
+ while (prev - 1 >= pattern && prev[-1] == '\\')
+ --prev;
+ return (p - prev) & odd_backslashes;
}
static boolean
group_in_compile_stack (compile_stack_type compile_stack, regnum_t regnum)
{
- int this_element;
+ ssize_t this_element;
for (this_element = compile_stack.avail - 1;
this_element >= 0;
{
case succeed:
return 1;
- continue;
case duplicate:
/* If the first character has to match a backreference, that means
/* Like re_search_2, below, but only one string is specified, and
doesn't let you say where to stop matching. */
-int
-re_search (struct re_pattern_buffer *bufp, const char *string, int size, int startpos, int range, struct re_registers *regs)
+regoff_t
+re_search (struct re_pattern_buffer *bufp, const char *string, size_t size,
+ ssize_t startpos, ssize_t range, struct re_registers *regs)
{
return re_search_2 (bufp, NULL, 0, string, size, startpos, range,
regs, size);
#define HEAD_ADDR_VSTRING(P) \
(((P) >= size1 ? string2 : string1))
-/* End address of virtual concatenation of string. */
-#define STOP_ADDR_VSTRING(P) \
- (((P) >= size1 ? string2 + size2 : string1 + size1))
-
/* Address of POS in the concatenation of virtual string. */
#define POS_ADDR_VSTRING(POS) \
(((POS) >= size1 ? string2 - size1 : string1) + (POS))
found, -1 if no match, or -2 if error (such as failure
stack overflow). */
-int
-re_search_2 (struct re_pattern_buffer *bufp, const char *str1, int size1, const char *str2, int size2, int startpos, int range, struct re_registers *regs, int stop)
+regoff_t
+re_search_2 (struct re_pattern_buffer *bufp, const char *str1, size_t size1,
+ const char *str2, size_t size2, ssize_t startpos, ssize_t range,
+ struct re_registers *regs, ssize_t stop)
{
- int val;
+ regoff_t val;
re_char *string1 = (re_char*) str1;
re_char *string2 = (re_char*) str2;
register char *fastmap = bufp->fastmap;
register RE_TRANSLATE_TYPE translate = bufp->translate;
- int total_size = size1 + size2;
- int endpos = startpos + range;
+ size_t total_size = size1 + size2;
+ ssize_t endpos = startpos + range;
boolean anchored_start;
/* Nonzero if we are searching multibyte string. */
const boolean multibyte = RE_TARGET_MULTIBYTE_P (bufp);
#ifdef emacs
gl_state.object = re_match_object; /* Used by SYNTAX_TABLE_BYTE_TO_CHAR. */
{
- int charpos = SYNTAX_TABLE_BYTE_TO_CHAR (POS_AS_IN_BUFFER (startpos));
+ ssize_t charpos = SYNTAX_TABLE_BYTE_TO_CHAR (POS_AS_IN_BUFFER (startpos));
SETUP_SYNTAX_TABLE_FOR_OBJECT (re_match_object, charpos, 1);
}
if (range > 0) /* Searching forwards. */
{
register int lim = 0;
- int irange = range;
+ ssize_t irange = range;
if (startpos < size1 && startpos + range >= size1)
lim = range - (size1 - startpos);
if (multibyte)
{
re_char *p = POS_ADDR_VSTRING (startpos);
- re_char *pend = STOP_ADDR_VSTRING (startpos);
int len = BYTES_BY_CHAR_HEAD (*p);
range -= len;
\f
/* Declarations and macros for re_match_2. */
-static int bcmp_translate _RE_ARGS((re_char *s1, re_char *s2,
- register int len,
- RE_TRANSLATE_TYPE translate,
- const int multibyte));
+static int bcmp_translate (re_char *s1, re_char *s2,
+ register ssize_t len,
+ RE_TRANSLATE_TYPE translate,
+ const int multibyte);
/* This converts PTR, a pointer into one of the search strings `string1'
and `string2' into an offset from the beginning of that string. */
#define AT_STRINGS_BEG(d) ((d) == (size1 ? string1 : string2) || !size2)
#define AT_STRINGS_END(d) ((d) == end2)
-
-/* Test if D points to a character which is word-constituent. We have
- two special cases to check for: if past the end of string1, look at
- the first character in string2; and if before the beginning of
- string2, look at the last character in string1. */
-#define WORDCHAR_P(d) \
- (SYNTAX ((d) == end1 ? *string2 \
- : (d) == string2 - 1 ? *(end1 - 1) : *(d)) \
- == Sword)
-
/* Disabled due to a compiler bug -- see comment at case wordbound */
/* The comment at case wordbound is following one, but we don't use
macro and introducing temporary variables works around the bug. */
#if 0
+/* Test if D points to a character which is word-constituent. We have
+ two special cases to check for: if past the end of string1, look at
+ the first character in string2; and if before the beginning of
+ string2, look at the last character in string1. */
+#define WORDCHAR_P(d) \
+ (SYNTAX ((d) == end1 ? *string2 \
+ : (d) == string2 - 1 ? *(end1 - 1) : *(d)) \
+ == Sword)
+
/* Test if the character before D and the one at D differ with respect
to being word-constituent. */
#define AT_WORD_BOUNDARY(d) \
/* Free everything we malloc. */
#ifdef MATCH_MAY_ALLOCATE
-# define FREE_VAR(var) if (var) { REGEX_FREE (var); var = NULL; } else
+# define FREE_VAR(var) \
+ do { \
+ if (var) \
+ { \
+ REGEX_FREE (var); \
+ var = NULL; \
+ } \
+ } while (0)
# define FREE_VARIABLES() \
do { \
REGEX_FREE_STACK (fail_stack.stack); \
&& ((p2[2 + idx] & ~ p1[2 + idx]) == 0))))
break;
- if (idx == p2[1])
- {
- DEBUG_PRINT1 (" No match => fast loop.\n");
- return 1;
- }
+ if (idx == p2[1])
+ {
+ DEBUG_PRINT1 (" No match => fast loop.\n");
+ return 1;
+ }
}
}
}
#ifndef emacs /* Emacs never uses this. */
/* re_match is like re_match_2 except it takes only a single string. */
-int
+regoff_t
re_match (struct re_pattern_buffer *bufp, const char *string,
- int size, int pos, struct re_registers *regs)
+ size_t size, ssize_t pos, struct re_registers *regs)
{
- int result = re_match_2_internal (bufp, NULL, 0, (re_char*) string, size,
- pos, regs, size);
+ regoff_t result = re_match_2_internal (bufp, NULL, 0, (re_char*) string,
+ size, pos, regs, size);
return result;
}
WEAK_ALIAS (__re_match, re_match)
failure stack overflowing). Otherwise, we return the length of the
matched substring. */
-int
-re_match_2 (struct re_pattern_buffer *bufp, const char *string1, int size1, const char *string2, int size2, int pos, struct re_registers *regs, int stop)
+regoff_t
+re_match_2 (struct re_pattern_buffer *bufp, const char *string1,
+ size_t size1, const char *string2, size_t size2, ssize_t pos,
+ struct re_registers *regs, ssize_t stop)
{
- int result;
+ regoff_t result;
#ifdef emacs
- int charpos;
+ ssize_t charpos;
gl_state.object = re_match_object; /* Used by SYNTAX_TABLE_BYTE_TO_CHAR. */
charpos = SYNTAX_TABLE_BYTE_TO_CHAR (POS_AS_IN_BUFFER (pos));
SETUP_SYNTAX_TABLE_FOR_OBJECT (re_match_object, charpos, 1);
/* This is a separate function so that we can force an alloca cleanup
afterwards. */
-static int
-re_match_2_internal (struct re_pattern_buffer *bufp, const re_char *string1, int size1, const re_char *string2, int size2, int pos, struct re_registers *regs, int stop)
+static regoff_t
+re_match_2_internal (struct re_pattern_buffer *bufp, const re_char *string1,
+ size_t size1, const re_char *string2, size_t size2,
+ ssize_t pos, struct re_registers *regs, ssize_t stop)
{
/* General temporaries. */
- int mcnt;
+ ssize_t mcnt;
size_t reg;
- boolean not;
/* Just past the end of the corresponding string. */
re_char *end1, *end2;
else
do
{
- int pat_charlen, buf_charlen;
+ int pat_charlen;
int pat_ch, buf_ch;
PREFETCH ();
/* Start of actual range_table, or end of bitmap if there is no
range table. */
- re_char *range_table;
+ re_char *range_table IF_LINT (= NULL);
/* Nonzero if there is a range table. */
int range_table_exists;
if (!not) goto fail;
d += len;
- break;
}
+ break;
/* The beginning of a group is represented by start_memory.
case wordbound:
case notwordbound:
- not = (re_opcode_t) *(p - 1) == notwordbound;
- DEBUG_PRINT2 ("EXECUTING %swordbound.\n", not?"not":"");
+ {
+ boolean not = (re_opcode_t) *(p - 1) == notwordbound;
+ DEBUG_PRINT2 ("EXECUTING %swordbound.\n", not?"not":"");
- /* We SUCCEED (or FAIL) in one of the following cases: */
+ /* We SUCCEED (or FAIL) in one of the following cases: */
- /* Case 1: D is at the beginning or the end of string. */
- if (AT_STRINGS_BEG (d) || AT_STRINGS_END (d))
- not = !not;
- else
- {
- /* C1 is the character before D, S1 is the syntax of C1, C2
- is the character at D, and S2 is the syntax of C2. */
- re_wchar_t c1, c2;
- int s1, s2;
- int dummy;
+ /* Case 1: D is at the beginning or the end of string. */
+ if (AT_STRINGS_BEG (d) || AT_STRINGS_END (d))
+ not = !not;
+ else
+ {
+ /* C1 is the character before D, S1 is the syntax of C1, C2
+ is the character at D, and S2 is the syntax of C2. */
+ re_wchar_t c1, c2;
+ int s1, s2;
+ int dummy;
#ifdef emacs
- int offset = PTR_TO_OFFSET (d - 1);
- int charpos = SYNTAX_TABLE_BYTE_TO_CHAR (offset);
- UPDATE_SYNTAX_TABLE (charpos);
+ ssize_t offset = PTR_TO_OFFSET (d - 1);
+ ssize_t charpos = SYNTAX_TABLE_BYTE_TO_CHAR (offset);
+ UPDATE_SYNTAX_TABLE (charpos);
#endif
- GET_CHAR_BEFORE_2 (c1, d, string1, end1, string2, end2);
- s1 = SYNTAX (c1);
+ GET_CHAR_BEFORE_2 (c1, d, string1, end1, string2, end2);
+ s1 = SYNTAX (c1);
#ifdef emacs
- UPDATE_SYNTAX_TABLE_FORWARD (charpos + 1);
+ UPDATE_SYNTAX_TABLE_FORWARD (charpos + 1);
#endif
- PREFETCH_NOLIMIT ();
- GET_CHAR_AFTER (c2, d, dummy);
- s2 = SYNTAX (c2);
-
- if (/* Case 2: Only one of S1 and S2 is Sword. */
- ((s1 == Sword) != (s2 == Sword))
- /* Case 3: Both of S1 and S2 are Sword, and macro
- WORD_BOUNDARY_P (C1, C2) returns nonzero. */
- || ((s1 == Sword) && WORD_BOUNDARY_P (c1, c2)))
- not = !not;
- }
- if (not)
- break;
- else
- goto fail;
+ PREFETCH_NOLIMIT ();
+ GET_CHAR_AFTER (c2, d, dummy);
+ s2 = SYNTAX (c2);
+
+ if (/* Case 2: Only one of S1 and S2 is Sword. */
+ ((s1 == Sword) != (s2 == Sword))
+ /* Case 3: Both of S1 and S2 are Sword, and macro
+ WORD_BOUNDARY_P (C1, C2) returns nonzero. */
+ || ((s1 == Sword) && WORD_BOUNDARY_P (c1, c2)))
+ not = !not;
+ }
+ if (not)
+ break;
+ else
+ goto fail;
+ }
case wordbeg:
DEBUG_PRINT1 ("EXECUTING wordbeg.\n");
int s1, s2;
int dummy;
#ifdef emacs
- int offset = PTR_TO_OFFSET (d);
- int charpos = SYNTAX_TABLE_BYTE_TO_CHAR (offset);
+ ssize_t offset = PTR_TO_OFFSET (d);
+ ssize_t charpos = SYNTAX_TABLE_BYTE_TO_CHAR (offset);
UPDATE_SYNTAX_TABLE (charpos);
#endif
PREFETCH ();
int s1, s2;
int dummy;
#ifdef emacs
- int offset = PTR_TO_OFFSET (d) - 1;
- int charpos = SYNTAX_TABLE_BYTE_TO_CHAR (offset);
+ ssize_t offset = PTR_TO_OFFSET (d) - 1;
+ ssize_t charpos = SYNTAX_TABLE_BYTE_TO_CHAR (offset);
UPDATE_SYNTAX_TABLE (charpos);
#endif
GET_CHAR_BEFORE_2 (c1, d, string1, end1, string2, end2);
re_wchar_t c1, c2;
int s1, s2;
#ifdef emacs
- int offset = PTR_TO_OFFSET (d);
- int charpos = SYNTAX_TABLE_BYTE_TO_CHAR (offset);
+ ssize_t offset = PTR_TO_OFFSET (d);
+ ssize_t charpos = SYNTAX_TABLE_BYTE_TO_CHAR (offset);
UPDATE_SYNTAX_TABLE (charpos);
#endif
PREFETCH ();
re_wchar_t c1, c2;
int s1, s2;
#ifdef emacs
- int offset = PTR_TO_OFFSET (d) - 1;
- int charpos = SYNTAX_TABLE_BYTE_TO_CHAR (offset);
+ ssize_t offset = PTR_TO_OFFSET (d) - 1;
+ ssize_t charpos = SYNTAX_TABLE_BYTE_TO_CHAR (offset);
UPDATE_SYNTAX_TABLE (charpos);
#endif
GET_CHAR_BEFORE_2 (c1, d, string1, end1, string2, end2);
case syntaxspec:
case notsyntaxspec:
- not = (re_opcode_t) *(p - 1) == notsyntaxspec;
- mcnt = *p++;
- DEBUG_PRINT3 ("EXECUTING %ssyntaxspec %d.\n", not?"not":"", mcnt);
- PREFETCH ();
-#ifdef emacs
{
- int offset = PTR_TO_OFFSET (d);
- int pos1 = SYNTAX_TABLE_BYTE_TO_CHAR (offset);
- UPDATE_SYNTAX_TABLE (pos1);
- }
+ boolean not = (re_opcode_t) *(p - 1) == notsyntaxspec;
+ mcnt = *p++;
+ DEBUG_PRINT3 ("EXECUTING %ssyntaxspec %d.\n", not?"not":"", mcnt);
+ PREFETCH ();
+#ifdef emacs
+ {
+ ssize_t offset = PTR_TO_OFFSET (d);
+ ssize_t pos1 = SYNTAX_TABLE_BYTE_TO_CHAR (offset);
+ UPDATE_SYNTAX_TABLE (pos1);
+ }
#endif
- {
- int len;
- re_wchar_t c;
+ {
+ int len;
+ re_wchar_t c;
- GET_CHAR_AFTER (c, d, len);
- if ((SYNTAX (c) != (enum syntaxcode) mcnt) ^ not)
- goto fail;
- d += len;
+ GET_CHAR_AFTER (c, d, len);
+ if ((SYNTAX (c) != (enum syntaxcode) mcnt) ^ not)
+ goto fail;
+ d += len;
+ }
}
break;
case categoryspec:
case notcategoryspec:
- not = (re_opcode_t) *(p - 1) == notcategoryspec;
- mcnt = *p++;
- DEBUG_PRINT3 ("EXECUTING %scategoryspec %d.\n", not?"not":"", mcnt);
- PREFETCH ();
{
- int len;
- re_wchar_t c;
+ boolean not = (re_opcode_t) *(p - 1) == notcategoryspec;
+ mcnt = *p++;
+ DEBUG_PRINT3 ("EXECUTING %scategoryspec %d.\n",
+ not?"not":"", mcnt);
+ PREFETCH ();
- GET_CHAR_AFTER (c, d, len);
- if ((!CHAR_HAS_CATEGORY (c, mcnt)) ^ not)
- goto fail;
- d += len;
+ {
+ int len;
+ re_wchar_t c;
+ GET_CHAR_AFTER (c, d, len);
+ if ((!CHAR_HAS_CATEGORY (c, mcnt)) ^ not)
+ goto fail;
+ d += len;
+ }
}
break;
goto fail;
default:
- abort();
+ abort ();
}
assert (p >= bufp->buffer && p <= pend);
bytes; nonzero otherwise. */
static int
-bcmp_translate (const re_char *s1, const re_char *s2, register int len,
+bcmp_translate (const re_char *s1, const re_char *s2, register ssize_t len,
RE_TRANSLATE_TYPE translate, const int target_multibyte)
{
register re_char *p1 = s1, *p2 = s2;
We call regex_compile to do the actual compilation. */
const char *
-re_compile_pattern (const char *pattern, size_t length, struct re_pattern_buffer *bufp)
+re_compile_pattern (const char *pattern, size_t length,
+ struct re_pattern_buffer *bufp)
{
reg_errcode_t ret;
regcomp/regexec below without link errors. */
weak_function
# endif
-re_comp (s)
- const char *s;
+re_comp (const char *s)
{
reg_errcode_t ret;
# ifdef _LIBC
weak_function
# endif
-re_exec (s)
- const char *s;
+re_exec (const char *s)
{
- const int len = strlen (s);
+ const size_t len = strlen (s);
return
0 <= re_search (&re_comp_buf, s, len, 0, len, (struct re_registers *) 0);
}
It returns 0 if it succeeds, nonzero if it doesn't. (See regex.h for
the return codes and their meanings.) */
-int
+reg_errcode_t
regcomp (regex_t *__restrict preg, const char *__restrict pattern,
int cflags)
{
preg->fastmap = NULL;
}
}
- return (int) ret;
+ return ret;
}
WEAK_ALIAS (__regcomp, regcomp)
We return 0 if we find a match and REG_NOMATCH if not. */
-int
+reg_errcode_t
regexec (const regex_t *__restrict preg, const char *__restrict string,
size_t nmatch, regmatch_t pmatch[__restrict_arr], int eflags)
{
- int ret;
+ regoff_t ret;
struct re_registers regs;
regex_t private_preg;
- int len = strlen (string);
+ size_t len = strlen (string);
boolean want_reg_info = !preg->no_sub && nmatch > 0 && pmatch;
private_preg = *preg;
regs.num_regs = nmatch;
regs.start = TALLOC (nmatch * 2, regoff_t);
if (regs.start == NULL)
- return (int) REG_NOMATCH;
+ return REG_NOMATCH;
regs.end = regs.start + nmatch;
}
}
/* We want zero return to mean success, unlike `re_search'. */
- return ret >= 0 ? (int) REG_NOERROR : (int) REG_NOMATCH;
+ return ret >= 0 ? REG_NOERROR : REG_NOMATCH;
}
WEAK_ALIAS (__regexec, regexec)
WEAK_ALIAS (__regfree, regfree)
#endif /* not emacs */
-