0.12. (Implements POSIX draft P1003.2/D11.2, except for some of the
internationalization features.)
- Copyright (C) 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
- 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010
- Free Software Foundation, Inc.
+ Copyright (C) 1993-2011 Free Software Foundation, Inc.
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
even if config.h says that we can. */
# undef REL_ALLOC
-# if defined STDC_HEADERS || defined _LIBC
-# include <stdlib.h>
-# else
-char *malloc ();
-char *realloc ();
-# endif
+# include <unistd.h>
/* When used in Emacs's lib-src, we need xmalloc and xrealloc. */
void *
-xmalloc (size)
- size_t size;
+xmalloc (size_t size)
{
register void *val;
val = (void *) malloc (size);
}
void *
-xrealloc (block, size)
- void *block;
- size_t size;
+xrealloc (void *block, size_t size)
{
register void *val;
/* We must call malloc explicitly when BLOCK is 0, since some
# endif
# define realloc xrealloc
-/* When used in Emacs's lib-src, we need to get bzero and bcopy somehow.
- If nothing else has been done, use the method below. */
-# ifdef INHIBIT_STRING_HEADER
-# if !(defined HAVE_BZERO && defined HAVE_BCOPY)
-# if !defined bzero && !defined bcopy
-# undef INHIBIT_STRING_HEADER
-# endif
+/* This is the normal way of making sure we have memcpy, memcmp and memset. */
+# if defined HAVE_STRING_H || defined STDC_HEADERS || defined _LIBC
+# include <string.h>
+# else
+# include <strings.h>
+# ifndef memcmp
+# define memcmp(s1, s2, n) bcmp (s1, s2, n)
# endif
-# endif
-
-/* This is the normal way of making sure we have memcpy, memcmp and bzero.
- This is used in most programs--a few other programs avoid this
- by defining INHIBIT_STRING_HEADER. */
-# ifndef INHIBIT_STRING_HEADER
-# if defined HAVE_STRING_H || defined STDC_HEADERS || defined _LIBC
-# include <string.h>
-# ifndef bzero
-# ifndef _LIBC
-# define bzero(s, n) (memset (s, '\0', n), (s))
-# else
-# define bzero(s, n) __bzero (s, n)
-# endif
-# endif
-# else
-# include <strings.h>
-# ifndef memcmp
-# define memcmp(s1, s2, n) bcmp (s1, s2, n)
-# endif
-# ifndef memcpy
-# define memcpy(d, s, n) (bcopy (s, d, n), (d))
-# endif
+# ifndef memcpy
+# define memcpy(d, s, n) (bcopy (s, d, n), (d))
# endif
# endif
# define SWITCH_ENUM_CAST(x) (x)
/* Dummy macros for non-Emacs environments. */
-# define BASE_LEADING_CODE_P(c) (0)
# define CHAR_CHARSET(c) 0
# define CHARSET_LEADING_CODE_BASE(c) 0
# define MAX_MULTIBYTE_LENGTH 1
# define CHAR_HEAD_P(p) (1)
# define SINGLE_BYTE_CHAR_P(c) (1)
# define SAME_CHARSET_P(c1, c2) (1)
-# define MULTIBYTE_FORM_LENGTH(p, s) (1)
+# define BYTES_BY_CHAR_HEAD(p) (1)
# define PREV_CHAR_BOUNDARY(p, limit) ((p)--)
# define STRING_CHAR(p) (*(p))
# define RE_STRING_CHAR(p, multibyte) STRING_CHAR (p)
static char re_syntax_table[CHAR_SET_SIZE];
static void
-init_syntax_once ()
+init_syntax_once (void)
{
register int c;
static int done = 0;
if (done)
return;
- bzero (re_syntax_table, sizeof re_syntax_table);
+ memset (re_syntax_table, 0, sizeof re_syntax_table);
for (c = 0; c < CHAR_SET_SIZE; ++c)
if (ISALNUM (c))
defined in regex.h. We return the old syntax. */
reg_syntax_t
-re_set_syntax (syntax)
- reg_syntax_t syntax;
+re_set_syntax (reg_syntax_t syntax)
{
reg_syntax_t ret = re_syntax_options;
static re_char *whitespace_regexp;
void
-re_set_whitespace_regexp (regexp)
- const char *regexp;
+re_set_whitespace_regexp (const char *regexp)
{
whitespace_regexp = (re_char *) regexp;
}
} while (0)
-/* Both FROM and TO are mulitbyte characters. */
+/* Both FROM and TO are multibyte characters. */
#define SETUP_MULTIBYTE_RANGE(work_area, FROM, TO) \
do { \
/* Map a string to the char class it names (if any). */
re_wctype_t
-re_wctype (str)
- re_char *str;
+re_wctype (const re_char *str)
{
const char *string = str;
if (STREQ (string, "alnum")) return RECC_ALNUM;
/* True if CH is in the char class CC. */
boolean
-re_iswctype (ch, cc)
- int ch;
- re_wctype_t cc;
+re_iswctype (int ch, re_wctype_t cc)
{
switch (cc)
{
/* Return a bit-pattern to use in the range-table bits to match multibyte
chars of class CC. */
static int
-re_wctype_to_bit (cc)
- re_wctype_t cc;
+re_wctype_to_bit (re_wctype_t cc)
{
switch (cc)
{
/* Actually extend the space in WORK_AREA. */
static void
-extend_range_table_work_area (work_area)
- struct range_table_work_area *work_area;
+extend_range_table_work_area (struct range_table_work_area *work_area)
{
work_area->allocated += 16 * sizeof (int);
if (work_area->table)
} while (0)
static reg_errcode_t
-regex_compile (pattern, size, syntax, bufp)
- re_char *pattern;
- size_t size;
- reg_syntax_t syntax;
- struct re_pattern_buffer *bufp;
+regex_compile (const re_char *pattern, size_t size, reg_syntax_t syntax, struct re_pattern_buffer *bufp)
{
/* We fetch characters from PATTERN here. */
register re_wchar_t c, c1;
BUF_PUSH ((1 << BYTEWIDTH) / BYTEWIDTH);
/* Clear the whole map. */
- bzero (b, (1 << BYTEWIDTH) / BYTEWIDTH);
+ memset (b, 0, (1 << BYTEWIDTH) / BYTEWIDTH);
/* charset_not matches newline according to a syntax bit. */
if ((re_opcode_t) b[-2] == charset_not
don't need to handle them for multibyte.
They are distinguished by a negative wctype. */
+ /* Setup the gl_state object to its buffer-defined
+ value. This hardcodes the buffer-global
+ syntax-table for ASCII chars, while the other chars
+ will obey syntax-table properties. It's not ideal,
+ but it's the way it's been done until now. */
+ SETUP_BUFFER_SYNTAX_TABLE ();
+
for (ch = 0; ch < 256; ++ch)
{
c = RE_CHAR_TO_MULTIBYTE (ch);
if (c1 != c2 && (c1 = RE_CHAR_TO_UNIBYTE (c2)) >= 0)
c = c1;
- }
+ }
*b++ = c;
len = 1;
}
/* Store OP at LOC followed by two-byte integer parameter ARG. */
static void
-store_op1 (op, loc, arg)
- re_opcode_t op;
- unsigned char *loc;
- int arg;
+store_op1 (re_opcode_t op, unsigned char *loc, int arg)
{
*loc = (unsigned char) op;
STORE_NUMBER (loc + 1, arg);
/* Like `store_op1', but for two two-byte parameters ARG1 and ARG2. */
static void
-store_op2 (op, loc, arg1, arg2)
- re_opcode_t op;
- unsigned char *loc;
- int arg1, arg2;
+store_op2 (re_opcode_t op, unsigned char *loc, int arg1, int arg2)
{
*loc = (unsigned char) op;
STORE_NUMBER (loc + 1, arg1);
for OP followed by two-byte integer parameter ARG. */
static void
-insert_op1 (op, loc, arg, end)
- re_opcode_t op;
- unsigned char *loc;
- int arg;
- unsigned char *end;
+insert_op1 (re_opcode_t op, unsigned char *loc, int arg, unsigned char *end)
{
register unsigned char *pfrom = end;
register unsigned char *pto = end + 3;
/* Like `insert_op1', but for two two-byte parameters ARG1 and ARG2. */
static void
-insert_op2 (op, loc, arg1, arg2, end)
- re_opcode_t op;
- unsigned char *loc;
- int arg1, arg2;
- unsigned char *end;
+insert_op2 (re_opcode_t op, unsigned char *loc, int arg1, int arg2, unsigned char *end)
{
register unsigned char *pfrom = end;
register unsigned char *pto = end + 5;
least one character before the ^. */
static boolean
-at_begline_loc_p (pattern, p, syntax)
- re_char *pattern, *p;
- reg_syntax_t syntax;
+at_begline_loc_p (const re_char *pattern, const re_char *p, reg_syntax_t syntax)
{
re_char *prev = p - 2;
boolean prev_prev_backslash = prev > pattern && prev[-1] == '\\';
at least one character after the $, i.e., `P < PEND'. */
static boolean
-at_endline_loc_p (p, pend, syntax)
- re_char *p, *pend;
- reg_syntax_t syntax;
+at_endline_loc_p (const re_char *p, const re_char *pend, reg_syntax_t syntax)
{
re_char *next = p;
boolean next_backslash = *next == '\\';
false if it's not. */
static boolean
-group_in_compile_stack (compile_stack, regnum)
- compile_stack_type compile_stack;
- regnum_t regnum;
+group_in_compile_stack (compile_stack_type compile_stack, regnum_t regnum)
{
int this_element;
Return -1 if fastmap was not updated accurately. */
static int
-analyse_first (p, pend, fastmap, multibyte)
- re_char *p, *pend;
- char *fastmap;
- const int multibyte;
+analyse_first (const re_char *p, const re_char *pend, char *fastmap, const int multibyte)
{
int j, k;
boolean not;
{
case succeed:
return 1;
- continue;
case duplicate:
/* If the first character has to match a backreference, that means
if (/* Any leading code can possibly start a character
which doesn't match the specified set of characters. */
not
- ||
+ ||
/* If we can match a character class, we can match any
multibyte characters. */
(CHARSET_RANGE_TABLE_EXISTS_P (&p[-2])
&& match_any_multibyte_characters == false)
{
/* Set fastmap[I] to 1 where I is a leading code of each
- multibyte characer in the range table. */
+ multibyte character in the range table. */
int c, count;
unsigned char lc1, lc2;
Returns 0 if we succeed, -2 if an internal error. */
int
-re_compile_fastmap (bufp)
- struct re_pattern_buffer *bufp;
+re_compile_fastmap (struct re_pattern_buffer *bufp)
{
char *fastmap = bufp->fastmap;
int analysis;
assert (fastmap && bufp->buffer);
- bzero (fastmap, 1 << BYTEWIDTH); /* Assume nothing's valid. */
+ memset (fastmap, 0, 1 << BYTEWIDTH); /* Assume nothing's valid. */
bufp->fastmap_accurate = 1; /* It will be when we're done. */
analysis = analyse_first (bufp->buffer, bufp->buffer + bufp->used,
freeing the old data. */
void
-re_set_registers (bufp, regs, num_regs, starts, ends)
- struct re_pattern_buffer *bufp;
- struct re_registers *regs;
- unsigned num_regs;
- regoff_t *starts, *ends;
+re_set_registers (struct re_pattern_buffer *bufp, struct re_registers *regs, unsigned int num_regs, regoff_t *starts, regoff_t *ends)
{
if (num_regs)
{
doesn't let you say where to stop matching. */
int
-re_search (bufp, string, size, startpos, range, regs)
- struct re_pattern_buffer *bufp;
- const char *string;
- int size, startpos, range;
- struct re_registers *regs;
+re_search (struct re_pattern_buffer *bufp, const char *string, int size, int startpos, int range, struct re_registers *regs)
{
return re_search_2 (bufp, NULL, 0, string, size, startpos, range,
regs, size);
stack overflow). */
int
-re_search_2 (bufp, str1, size1, str2, size2, startpos, range, regs, stop)
- struct re_pattern_buffer *bufp;
- const char *str1, *str2;
- int size1, size2;
- int startpos;
- int range;
- struct re_registers *regs;
- int stop;
+re_search_2 (struct re_pattern_buffer *bufp, const char *str1, int size1, const char *str2, int size2, int startpos, int range, struct re_registers *regs, int stop)
{
int val;
re_char *string1 = (re_char*) str1;
anchored_start = (bufp->buffer[0] == begline);
#ifdef emacs
- gl_state.object = re_match_object;
+ gl_state.object = re_match_object; /* Used by SYNTAX_TABLE_BYTE_TO_CHAR. */
{
int charpos = SYNTAX_TABLE_BYTE_TO_CHAR (POS_AS_IN_BUFFER (startpos));
{
re_char *p = POS_ADDR_VSTRING (startpos);
re_char *pend = STOP_ADDR_VSTRING (startpos);
- int len = MULTIBYTE_FORM_LENGTH (p, pend - p);
+ int len = BYTES_BY_CHAR_HEAD (*p);
range -= len;
if (range < 0)
/* If the operation is a match against one or more chars,
return a pointer to the next operation, else return NULL. */
static re_char *
-skip_one_char (p)
- re_char *p;
+skip_one_char (const re_char *p)
{
switch (SWITCH_ENUM_CAST (*p++))
{
/* Jump over non-matching operations. */
static re_char *
-skip_noops (p, pend)
- re_char *p, *pend;
+skip_noops (const re_char *p, const re_char *pend)
{
int mcnt;
while (p < pend)
/* Non-zero if "p1 matches something" implies "p2 fails". */
static int
-mutually_exclusive_p (bufp, p1, p2)
- struct re_pattern_buffer *bufp;
- re_char *p1, *p2;
+mutually_exclusive_p (struct re_pattern_buffer *bufp, const re_char *p1, const re_char *p2)
{
re_opcode_t op2;
const boolean multibyte = RE_MULTIBYTE_P (bufp);
/* re_match is like re_match_2 except it takes only a single string. */
int
-re_match (bufp, string, size, pos, regs)
- struct re_pattern_buffer *bufp;
- const char *string;
- int size, pos;
- struct re_registers *regs;
+re_match (struct re_pattern_buffer *bufp, const char *string,
+ int size, int pos, struct re_registers *regs)
{
int result = re_match_2_internal (bufp, NULL, 0, (re_char*) string, size,
pos, regs, size);
matched substring. */
int
-re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop)
- struct re_pattern_buffer *bufp;
- const char *string1, *string2;
- int size1, size2;
- int pos;
- struct re_registers *regs;
- int stop;
+re_match_2 (struct re_pattern_buffer *bufp, const char *string1, int size1, const char *string2, int size2, int pos, struct re_registers *regs, int stop)
{
int result;
#ifdef emacs
int charpos;
- gl_state.object = re_match_object;
+ gl_state.object = re_match_object; /* Used by SYNTAX_TABLE_BYTE_TO_CHAR. */
charpos = SYNTAX_TABLE_BYTE_TO_CHAR (POS_AS_IN_BUFFER (pos));
SETUP_SYNTAX_TABLE_FOR_OBJECT (re_match_object, charpos, 1);
#endif
/* This is a separate function so that we can force an alloca cleanup
afterwards. */
static int
-re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
- struct re_pattern_buffer *bufp;
- re_char *string1, *string2;
- int size1, size2;
- int pos;
- struct re_registers *regs;
- int stop;
+re_match_2_internal (struct re_pattern_buffer *bufp, const re_char *string1, int size1, const re_char *string2, int size2, int pos, struct re_registers *regs, int stop)
{
/* General temporaries. */
int mcnt;
bytes; nonzero otherwise. */
static int
-bcmp_translate (s1, s2, len, translate, target_multibyte)
- re_char *s1, *s2;
- register int len;
- RE_TRANSLATE_TYPE translate;
- const int target_multibyte;
+bcmp_translate (const re_char *s1, const re_char *s2, register int len,
+ RE_TRANSLATE_TYPE translate, const int target_multibyte)
{
register re_char *p1 = s1, *p2 = s2;
re_char *p1_end = s1 + len;
We call regex_compile to do the actual compilation. */
const char *
-re_compile_pattern (pattern, length, bufp)
- const char *pattern;
- size_t length;
- struct re_pattern_buffer *bufp;
+re_compile_pattern (const char *pattern, size_t length, struct re_pattern_buffer *bufp)
{
reg_errcode_t ret;
-#ifdef emacs
- gl_state.current_syntax_table = current_buffer->syntax_table;
-#endif
-
/* GNU code is written to assume at least RE_NREGS registers will be set
(and at least one extra will be -1). */
bufp->regs_allocated = REGS_UNALLOCATED;
the return codes and their meanings.) */
int
-regcomp (preg, pattern, cflags)
- regex_t *__restrict preg;
- const char *__restrict pattern;
- int cflags;
+regcomp (regex_t *__restrict preg, const char *__restrict pattern,
+ int cflags)
{
reg_errcode_t ret;
reg_syntax_t syntax
We return 0 if we find a match and REG_NOMATCH if not. */
int
-regexec (preg, string, nmatch, pmatch, eflags)
- const regex_t *__restrict preg;
- const char *__restrict string;
- size_t nmatch;
- regmatch_t pmatch[__restrict_arr];
- int eflags;
+regexec (const regex_t *__restrict preg, const char *__restrict string,
+ size_t nmatch, regmatch_t pmatch[__restrict_arr], int eflags)
{
int ret;
struct re_registers regs;
error with msvc8 compiler. */
size_t
-regerror (err_code, preg, errbuf, errbuf_size)
- int err_code;
- const regex_t *preg;
- char *errbuf;
- size_t errbuf_size;
+regerror (int err_code, const regex_t *preg, char *errbuf, size_t errbuf_size)
{
const char *msg;
size_t msg_size;
/* Free dynamically allocated space used by PREG. */
void
-regfree (preg)
- regex_t *preg;
+regfree (regex_t *preg)
{
free (preg->buffer);
preg->buffer = NULL;
WEAK_ALIAS (__regfree, regfree)
#endif /* not emacs */
-
-/* arch-tag: 4ffd68ba-2a9e-435b-a21a-018990f9eeb2
- (do not change this comment) */