0.12. (Implements POSIX draft P1003.2/D11.2, except for some of the
internationalization features.)
- Copyright (C) 1993-2011 Free Software Foundation, Inc.
+ Copyright (C) 1993-2012 Free Software Foundation, Inc.
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
#pragma alloca
#endif
+/* Ignore some GCC warnings for now. This section should go away
+ once the Emacs and Gnulib regex code is merged. */
+#if (__GNUC__ == 4 && 3 <= __GNUC_MINOR__) || 4 < __GNUC__
+# pragma GCC diagnostic ignored "-Wstrict-overflow"
+# ifndef emacs
+# pragma GCC diagnostic ignored "-Wunused-but-set-variable"
+# pragma GCC diagnostic ignored "-Wunused-function"
+# pragma GCC diagnostic ignored "-Wunused-macros"
+# pragma GCC diagnostic ignored "-Wunused-result"
+# pragma GCC diagnostic ignored "-Wunused-variable"
+# endif
+#endif
+
#ifdef HAVE_CONFIG_H
# include <config.h>
#endif
-#if defined STDC_HEADERS && !defined emacs
-# include <stddef.h>
-#else
+#include <stddef.h>
+
+#ifdef emacs
/* We need this for `regex.h', and perhaps for the Emacs include files. */
# include <sys/types.h>
#endif
(HAVE_WCTYPE_H && HAVE_WCHAR_H && HAVE_BTOWC && !emacs)
#endif
-/* For platform which support the ISO C amendement 1 functionality we
+/* For platform which support the ISO C amendment 1 functionality we
support user defined character classes. */
#if WIDE_CHAR_SUPPORT
/* Solaris 2.5 has a bug: <wchar.h> must be included before <wctype.h>. */
# define regexec(pr, st, nm, pm, ef) __regexec (pr, st, nm, pm, ef)
# define regcomp(preg, pattern, cflags) __regcomp (preg, pattern, cflags)
# define regerror(err_code, preg, errbuf, errbuf_size) \
- __regerror(err_code, preg, errbuf, errbuf_size)
+ __regerror (err_code, preg, errbuf, errbuf_size)
# define re_set_registers(bu, re, nu, st, en) \
__re_set_registers (bu, re, nu, st, en)
# define re_match_2(bufp, string1, size1, string2, size2, pos, regs, stop) \
# include <setjmp.h>
# include "lisp.h"
+# include "character.h"
# include "buffer.h"
/* Make syntax table lookup grant data in gl_state. */
# define SYNTAX_ENTRY_VIA_PROPERTY
# include "syntax.h"
-# include "character.h"
# include "category.h"
# ifdef malloc
/* When used in Emacs's lib-src, we need xmalloc and xrealloc. */
-void *
+static void *
xmalloc (size_t size)
{
register void *val;
return val;
}
-void *
+static void *
xrealloc (void *block, size_t size)
{
register void *val;
# endif
# define realloc xrealloc
-/* This is the normal way of making sure we have memcpy, memcmp and memset. */
-# if defined HAVE_STRING_H || defined STDC_HEADERS || defined _LIBC
-# include <string.h>
-# else
-# include <strings.h>
-# ifndef memcmp
-# define memcmp(s1, s2, n) bcmp (s1, s2, n)
-# endif
-# ifndef memcpy
-# define memcpy(d, s, n) (bcopy (s, d, n), (d))
-# endif
-# endif
+# include <string.h>
/* Define the syntax stuff for \<, \>, etc. */
#else /* not emacs */
-/* Jim Meyering writes:
-
- "... Some ctype macros are valid only for character codes that
- isascii says are ASCII (SGI's IRIX-4.0.5 is one such system --when
- using /bin/cc or gcc but without giving an ansi option). So, all
- ctype uses should be through macros like ISPRINT... If
- STDC_HEADERS is defined, then autoconf has verified that the ctype
- macros don't need to be guarded with references to isascii. ...
- Defining isascii to 1 should let any compiler worth its salt
- eliminate the && through constant folding."
- Solaris defines some of these symbols so we must undefine them first. */
-
-# undef ISASCII
-# if defined STDC_HEADERS || (!defined isascii && !defined HAVE_ISASCII)
-# define ISASCII(c) 1
-# else
-# define ISASCII(c) isascii(c)
-# endif
-
/* 1 if C is an ASCII character. */
# define IS_REAL_ASCII(c) ((c) < 0200)
# define ISUNIBYTE(c) 1
# ifdef isblank
-# define ISBLANK(c) (ISASCII (c) && isblank (c))
+# define ISBLANK(c) isblank (c)
# else
# define ISBLANK(c) ((c) == ' ' || (c) == '\t')
# endif
# ifdef isgraph
-# define ISGRAPH(c) (ISASCII (c) && isgraph (c))
+# define ISGRAPH(c) isgraph (c)
# else
-# define ISGRAPH(c) (ISASCII (c) && isprint (c) && !isspace (c))
+# define ISGRAPH(c) (isprint (c) && !isspace (c))
# endif
+/* Solaris defines ISPRINT so we must undefine it first. */
# undef ISPRINT
-# define ISPRINT(c) (ISASCII (c) && isprint (c))
-# define ISDIGIT(c) (ISASCII (c) && isdigit (c))
-# define ISALNUM(c) (ISASCII (c) && isalnum (c))
-# define ISALPHA(c) (ISASCII (c) && isalpha (c))
-# define ISCNTRL(c) (ISASCII (c) && iscntrl (c))
-# define ISLOWER(c) (ISASCII (c) && islower (c))
-# define ISPUNCT(c) (ISASCII (c) && ispunct (c))
-# define ISSPACE(c) (ISASCII (c) && isspace (c))
-# define ISUPPER(c) (ISASCII (c) && isupper (c))
-# define ISXDIGIT(c) (ISASCII (c) && isxdigit (c))
-
-# define ISWORD(c) ISALPHA(c)
+# define ISPRINT(c) isprint (c)
+# define ISDIGIT(c) isdigit (c)
+# define ISALNUM(c) isalnum (c)
+# define ISALPHA(c) isalpha (c)
+# define ISCNTRL(c) iscntrl (c)
+# define ISLOWER(c) islower (c)
+# define ISPUNCT(c) ispunct (c)
+# define ISSPACE(c) isspace (c)
+# define ISUPPER(c) isupper (c)
+# define ISXDIGIT(c) isxdigit (c)
+
+# define ISWORD(c) ISALPHA (c)
# ifdef _tolower
-# define TOLOWER(c) _tolower(c)
+# define TOLOWER(c) _tolower (c)
# else
-# define TOLOWER(c) tolower(c)
+# define TOLOWER(c) tolower (c)
# endif
/* How many characters in the character set. */
#endif /* not emacs */
\f
-#ifndef NULL
-# define NULL (void *)0
-#endif
-
-/* We remove any previous definition of `SIGN_EXTEND_CHAR',
- since ours (we hope) works properly with all combinations of
- machines, compilers, `char' and `unsigned char' argument types.
- (Per Bothner suggested the basic approach.) */
-#undef SIGN_EXTEND_CHAR
-#if __STDC__
-# define SIGN_EXTEND_CHAR(c) ((signed char) (c))
-#else /* not __STDC__ */
-/* As in Harbison and Steele. */
-# define SIGN_EXTEND_CHAR(c) ((((unsigned char) (c)) ^ 128) - 128)
-#endif
+#define SIGN_EXTEND_CHAR(c) ((signed char) (c))
\f
/* Should we use malloc or alloca? If REGEX_MALLOC is not defined, we
use `alloca' instead of `malloc'. This is because using malloc in
#define MIN(a, b) ((a) < (b) ? (a) : (b))
/* Type of source-pattern and string chars. */
+#ifdef _MSC_VER
+typedef unsigned char re_char;
+#else
typedef const unsigned char re_char;
+#endif
typedef char boolean;
#define false 0
#define true 1
-static regoff_t re_match_2_internal _RE_ARGS ((struct re_pattern_buffer *bufp,
- re_char *string1, size_t size1,
- re_char *string2, size_t size2,
- ssize_t pos,
- struct re_registers *regs,
- ssize_t stop));
+static regoff_t re_match_2_internal (struct re_pattern_buffer *bufp,
+ re_char *string1, size_t size1,
+ re_char *string2, size_t size2,
+ ssize_t pos,
+ struct re_registers *regs,
+ ssize_t stop);
\f
/* These are the command codes that appear in compiled regular
expressions. Some opcodes are followed by argument bytes. A
on_failure_jump_nastyloop,
/* A smart `on_failure_jump' used for greedy * and + operators.
- It analyses the loop before which it is put and if the
+ It analyzes the loop before which it is put and if the
loop does not require backtracking, it changes itself to
`on_failure_keep_string_jump' and short-circuits the loop,
else it just defaults to changing itself into `on_failure_jump'.
} while (0)
#ifdef DEBUG
-static void extract_number _RE_ARGS ((int *dest, re_char *source));
static void
-extract_number (dest, source)
- int *dest;
- re_char *source;
+extract_number (int *dest, re_char *source)
{
int temp = SIGN_EXTEND_CHAR (*(source + 1));
*dest = *source & 0377;
} while (0)
#ifdef DEBUG
-static void extract_number_and_incr _RE_ARGS ((int *destination,
- re_char **source));
static void
-extract_number_and_incr (destination, source)
- int *destination;
- re_char **source;
+extract_number_and_incr (int *destination, re_char **source)
{
extract_number (destination, *source);
*source += 2;
\f
/* Subroutine declarations and macros for regex_compile. */
-static reg_errcode_t regex_compile _RE_ARGS ((re_char *pattern, size_t size,
- reg_syntax_t syntax,
- struct re_pattern_buffer *bufp));
-static void store_op1 _RE_ARGS ((re_opcode_t op, unsigned char *loc, int arg));
-static void store_op2 _RE_ARGS ((re_opcode_t op, unsigned char *loc,
- int arg1, int arg2));
-static void insert_op1 _RE_ARGS ((re_opcode_t op, unsigned char *loc,
- int arg, unsigned char *end));
-static void insert_op2 _RE_ARGS ((re_opcode_t op, unsigned char *loc,
- int arg1, int arg2, unsigned char *end));
-static boolean at_begline_loc_p _RE_ARGS ((re_char *pattern,
- re_char *p,
- reg_syntax_t syntax));
-static boolean at_endline_loc_p _RE_ARGS ((re_char *p,
- re_char *pend,
- reg_syntax_t syntax));
-static re_char *skip_one_char _RE_ARGS ((re_char *p));
-static int analyse_first _RE_ARGS ((re_char *p, re_char *pend,
- char *fastmap, const int multibyte));
+static reg_errcode_t regex_compile (re_char *pattern, size_t size,
+ reg_syntax_t syntax,
+ struct re_pattern_buffer *bufp);
+static void store_op1 (re_opcode_t op, unsigned char *loc, int arg);
+static void store_op2 (re_opcode_t op, unsigned char *loc, int arg1, int arg2);
+static void insert_op1 (re_opcode_t op, unsigned char *loc,
+ int arg, unsigned char *end);
+static void insert_op2 (re_opcode_t op, unsigned char *loc,
+ int arg1, int arg2, unsigned char *end);
+static boolean at_begline_loc_p (re_char *pattern, re_char *p,
+ reg_syntax_t syntax);
+static boolean at_endline_loc_p (re_char *p, re_char *pend,
+ reg_syntax_t syntax);
+static re_char *skip_one_char (re_char *p);
+static int analyse_first (re_char *p, re_char *pend,
+ char *fastmap, const int multibyte);
/* Fetch the next character in the uncompiled pattern, with no
translation. */
{
switch (cc)
{
- case RECC_ALNUM: return ISALNUM (ch);
- case RECC_ALPHA: return ISALPHA (ch);
- case RECC_BLANK: return ISBLANK (ch);
- case RECC_CNTRL: return ISCNTRL (ch);
- case RECC_DIGIT: return ISDIGIT (ch);
- case RECC_GRAPH: return ISGRAPH (ch);
- case RECC_LOWER: return ISLOWER (ch);
- case RECC_PRINT: return ISPRINT (ch);
- case RECC_PUNCT: return ISPUNCT (ch);
- case RECC_SPACE: return ISSPACE (ch);
- case RECC_UPPER: return ISUPPER (ch);
- case RECC_XDIGIT: return ISXDIGIT (ch);
- case RECC_ASCII: return IS_REAL_ASCII (ch);
+ case RECC_ALNUM: return ISALNUM (ch) != 0;
+ case RECC_ALPHA: return ISALPHA (ch) != 0;
+ case RECC_BLANK: return ISBLANK (ch) != 0;
+ case RECC_CNTRL: return ISCNTRL (ch) != 0;
+ case RECC_DIGIT: return ISDIGIT (ch) != 0;
+ case RECC_GRAPH: return ISGRAPH (ch) != 0;
+ case RECC_LOWER: return ISLOWER (ch) != 0;
+ case RECC_PRINT: return ISPRINT (ch) != 0;
+ case RECC_PUNCT: return ISPUNCT (ch) != 0;
+ case RECC_SPACE: return ISSPACE (ch) != 0;
+ case RECC_UPPER: return ISUPPER (ch) != 0;
+ case RECC_XDIGIT: return ISXDIGIT (ch) != 0;
+ case RECC_ASCII: return IS_REAL_ASCII (ch) != 0;
case RECC_NONASCII: return !IS_REAL_ASCII (ch);
- case RECC_UNIBYTE: return ISUNIBYTE (ch);
+ case RECC_UNIBYTE: return ISUNIBYTE (ch) != 0;
case RECC_MULTIBYTE: return !ISUNIBYTE (ch);
- case RECC_WORD: return ISWORD (ch);
+ case RECC_WORD: return ISWORD (ch) != 0;
case RECC_ERROR: return false;
default:
- abort();
+ abort ();
}
}
case RECC_ASCII: case RECC_DIGIT: case RECC_XDIGIT: case RECC_CNTRL:
case RECC_BLANK: case RECC_UNIBYTE: case RECC_ERROR: return 0;
default:
- abort();
+ abort ();
}
}
#endif
Returns -1 if successful, REG_ESPACE if ran out of space. */
static int
-set_image_of_range_1 (work_area, start, end, translate)
- RE_TRANSLATE_TYPE translate;
- struct range_table_work_area *work_area;
- re_wchar_t start, end;
+set_image_of_range_1 (struct range_table_work_area *work_area,
+ re_wchar_t start, re_wchar_t end,
+ RE_TRANSLATE_TYPE translate)
{
/* `one_case' indicates a character, or a run of characters,
each of which is an isolate (no case-equivalents).
Returns -1 if successful, REG_ESPACE if ran out of space. */
static int
-set_image_of_range (work_area, start, end, translate)
- RE_TRANSLATE_TYPE translate;
- struct range_table_work_area *work_area;
- re_wchar_t start, end;
+set_image_of_range (struct range_table_work_area *work_area,
+ re_wchar_t start, re_wchar_t end,
+ RE_TRANSLATE_TYPE translate)
{
re_wchar_t cmin, cmax;
but don't make them smaller. */
static
-regex_grow_registers (num_regs)
- int num_regs;
+regex_grow_registers (int num_regs)
{
if (num_regs > regs_allocated_size)
{
#endif /* not MATCH_MAY_ALLOCATE */
\f
-static boolean group_in_compile_stack _RE_ARGS ((compile_stack_type
- compile_stack,
- regnum_t regnum));
+static boolean group_in_compile_stack (compile_stack_type compile_stack,
+ regnum_t regnum);
/* `regex_compile' compiles PATTERN (of length SIZE) according to SYNTAX.
Returns one of error codes defined in `regex.h', or zero for success.
at_begline_loc_p (const re_char *pattern, const re_char *p, reg_syntax_t syntax)
{
re_char *prev = p - 2;
- boolean prev_prev_backslash = prev > pattern && prev[-1] == '\\';
+ boolean odd_backslashes;
- return
- /* After a subexpression? */
- (*prev == '(' && (syntax & RE_NO_BK_PARENS || prev_prev_backslash))
- /* After an alternative? */
- || (*prev == '|' && (syntax & RE_NO_BK_VBAR || prev_prev_backslash))
- /* After a shy subexpression? */
- || ((syntax & RE_SHY_GROUPS) && prev - 2 >= pattern
- && prev[-1] == '?' && prev[-2] == '('
- && (syntax & RE_NO_BK_PARENS
- || (prev - 3 >= pattern && prev[-3] == '\\')));
+ /* After a subexpression? */
+ if (*prev == '(')
+ odd_backslashes = (syntax & RE_NO_BK_PARENS) == 0;
+
+ /* After an alternative? */
+ else if (*prev == '|')
+ odd_backslashes = (syntax & RE_NO_BK_VBAR) == 0;
+
+ /* After a shy subexpression? */
+ else if (*prev == ':' && (syntax & RE_SHY_GROUPS))
+ {
+ /* Skip over optional regnum. */
+ while (prev - 1 >= pattern && prev[-1] >= '0' && prev[-1] <= '9')
+ --prev;
+
+ if (!(prev - 2 >= pattern
+ && prev[-1] == '?' && prev[-2] == '('))
+ return false;
+ prev -= 2;
+ odd_backslashes = (syntax & RE_NO_BK_PARENS) == 0;
+ }
+ else
+ return false;
+
+ /* Count the number of preceding backslashes. */
+ p = prev;
+ while (prev - 1 >= pattern && prev[-1] == '\\')
+ --prev;
+ return (p - prev) & odd_backslashes;
}
\f
/* Declarations and macros for re_match_2. */
-static int bcmp_translate _RE_ARGS((re_char *s1, re_char *s2,
- register ssize_t len,
- RE_TRANSLATE_TYPE translate,
- const int multibyte));
+static int bcmp_translate (re_char *s1, re_char *s2,
+ register ssize_t len,
+ RE_TRANSLATE_TYPE translate,
+ const int multibyte);
/* This converts PTR, a pointer into one of the search strings `string1'
and `string2' into an offset from the beginning of that string. */
goto fail;
default:
- abort();
+ abort ();
}
assert (p >= bufp->buffer && p <= pend);
regcomp/regexec below without link errors. */
weak_function
# endif
-re_comp (s)
- const char *s;
+re_comp (const char *s)
{
reg_errcode_t ret;
}
-regoff_t
+int
# ifdef _LIBC
weak_function
# endif
regexec (const regex_t *__restrict preg, const char *__restrict string,
size_t nmatch, regmatch_t pmatch[__restrict_arr], int eflags)
{
- reg_errcode_t ret;
+ regoff_t ret;
struct re_registers regs;
regex_t private_preg;
size_t len = strlen (string);