X-Git-Url: https://git.hcoop.net/bpt/emacs.git/blobdiff_plain/2db38a6f98c2abb42b746064ce97417cccc27e68..36941b32f3a3cd332b3adaf0cc8178177c2e9166:/src/regex.c diff --git a/src/regex.c b/src/regex.c index 0dbfa5971b..cb6edc2e35 100644 --- a/src/regex.c +++ b/src/regex.c @@ -2,9 +2,7 @@ 0.12. (Implements POSIX draft P1003.2/D11.2, except for some of the internationalization features.) - Copyright (C) 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, - 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010 - Free Software Foundation, Inc. + Copyright (C) 1993-2011 Free Software Foundation, Inc. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -196,18 +194,12 @@ even if config.h says that we can. */ # undef REL_ALLOC -# if defined STDC_HEADERS || defined _LIBC -# include -# else -char *malloc (); -char *realloc (); -# endif +# include /* When used in Emacs's lib-src, we need xmalloc and xrealloc. */ void * -xmalloc (size) - size_t size; +xmalloc (size_t size) { register void *val; val = (void *) malloc (size); @@ -220,9 +212,7 @@ xmalloc (size) } void * -xrealloc (block, size) - void *block; - size_t size; +xrealloc (void *block, size_t size) { register void *val; /* We must call malloc explicitly when BLOCK is 0, since some @@ -248,37 +238,16 @@ xrealloc (block, size) # endif # define realloc xrealloc -/* When used in Emacs's lib-src, we need to get bzero and bcopy somehow. - If nothing else has been done, use the method below. */ -# ifdef INHIBIT_STRING_HEADER -# if !(defined HAVE_BZERO && defined HAVE_BCOPY) -# if !defined bzero && !defined bcopy -# undef INHIBIT_STRING_HEADER -# endif +/* This is the normal way of making sure we have memcpy, memcmp and memset. */ +# if defined HAVE_STRING_H || defined STDC_HEADERS || defined _LIBC +# include +# else +# include +# ifndef memcmp +# define memcmp(s1, s2, n) bcmp (s1, s2, n) # endif -# endif - -/* This is the normal way of making sure we have memcpy, memcmp and bzero. - This is used in most programs--a few other programs avoid this - by defining INHIBIT_STRING_HEADER. */ -# ifndef INHIBIT_STRING_HEADER -# if defined HAVE_STRING_H || defined STDC_HEADERS || defined _LIBC -# include -# ifndef bzero -# ifndef _LIBC -# define bzero(s, n) (memset (s, '\0', n), (s)) -# else -# define bzero(s, n) __bzero (s, n) -# endif -# endif -# else -# include -# ifndef memcmp -# define memcmp(s1, s2, n) bcmp (s1, s2, n) -# endif -# ifndef memcpy -# define memcpy(d, s, n) (bcopy (s, d, n), (d)) -# endif +# ifndef memcpy +# define memcpy(d, s, n) (bcopy (s, d, n), (d)) # endif # endif @@ -290,7 +259,6 @@ enum syntaxcode { Swhitespace = 0, Sword = 1, Ssymbol = 2 }; # define SWITCH_ENUM_CAST(x) (x) /* Dummy macros for non-Emacs environments. */ -# define BASE_LEADING_CODE_P(c) (0) # define CHAR_CHARSET(c) 0 # define CHARSET_LEADING_CODE_BASE(c) 0 # define MAX_MULTIBYTE_LENGTH 1 @@ -300,7 +268,7 @@ enum syntaxcode { Swhitespace = 0, Sword = 1, Ssymbol = 2 }; # define CHAR_HEAD_P(p) (1) # define SINGLE_BYTE_CHAR_P(c) (1) # define SAME_CHARSET_P(c1, c2) (1) -# define MULTIBYTE_FORM_LENGTH(p, s) (1) +# define BYTES_BY_CHAR_HEAD(p) (1) # define PREV_CHAR_BOUNDARY(p, limit) ((p)--) # define STRING_CHAR(p) (*(p)) # define RE_STRING_CHAR(p, multibyte) STRING_CHAR (p) @@ -457,7 +425,7 @@ extern char *re_syntax_table; static char re_syntax_table[CHAR_SET_SIZE]; static void -init_syntax_once () +init_syntax_once (void) { register int c; static int done = 0; @@ -465,7 +433,7 @@ init_syntax_once () if (done) return; - bzero (re_syntax_table, sizeof re_syntax_table); + memset (re_syntax_table, 0, sizeof re_syntax_table); for (c = 0; c < CHAR_SET_SIZE; ++c) if (ISALNUM (c)) @@ -1322,8 +1290,7 @@ reg_syntax_t re_syntax_options; defined in regex.h. We return the old syntax. */ reg_syntax_t -re_set_syntax (syntax) - reg_syntax_t syntax; +re_set_syntax (reg_syntax_t syntax) { reg_syntax_t ret = re_syntax_options; @@ -1336,8 +1303,7 @@ WEAK_ALIAS (__re_set_syntax, re_set_syntax) static re_char *whitespace_regexp; void -re_set_whitespace_regexp (regexp) - const char *regexp; +re_set_whitespace_regexp (const char *regexp) { whitespace_regexp = (re_char *) regexp; } @@ -2086,7 +2052,7 @@ struct range_table_work_area } while (0) -/* Both FROM and TO are mulitbyte characters. */ +/* Both FROM and TO are multibyte characters. */ #define SETUP_MULTIBYTE_RANGE(work_area, FROM, TO) \ do { \ @@ -2150,8 +2116,7 @@ struct range_table_work_area /* Map a string to the char class it names (if any). */ re_wctype_t -re_wctype (str) - re_char *str; +re_wctype (const re_char *str) { const char *string = str; if (STREQ (string, "alnum")) return RECC_ALNUM; @@ -2176,9 +2141,7 @@ re_wctype (str) /* True if CH is in the char class CC. */ boolean -re_iswctype (ch, cc) - int ch; - re_wctype_t cc; +re_iswctype (int ch, re_wctype_t cc) { switch (cc) { @@ -2208,8 +2171,7 @@ re_iswctype (ch, cc) /* Return a bit-pattern to use in the range-table bits to match multibyte chars of class CC. */ static int -re_wctype_to_bit (cc) - re_wctype_t cc; +re_wctype_to_bit (re_wctype_t cc) { switch (cc) { @@ -2233,8 +2195,7 @@ re_wctype_to_bit (cc) /* Actually extend the space in WORK_AREA. */ static void -extend_range_table_work_area (work_area) - struct range_table_work_area *work_area; +extend_range_table_work_area (struct range_table_work_area *work_area) { work_area->allocated += 16 * sizeof (int); if (work_area->table) @@ -2558,11 +2519,7 @@ do { \ } while (0) static reg_errcode_t -regex_compile (pattern, size, syntax, bufp) - re_char *pattern; - size_t size; - reg_syntax_t syntax; - struct re_pattern_buffer *bufp; +regex_compile (const re_char *pattern, size_t size, reg_syntax_t syntax, struct re_pattern_buffer *bufp) { /* We fetch characters from PATTERN here. */ register re_wchar_t c, c1; @@ -2960,7 +2917,7 @@ regex_compile (pattern, size, syntax, bufp) BUF_PUSH ((1 << BYTEWIDTH) / BYTEWIDTH); /* Clear the whole map. */ - bzero (b, (1 << BYTEWIDTH) / BYTEWIDTH); + memset (b, 0, (1 << BYTEWIDTH) / BYTEWIDTH); /* charset_not matches newline according to a syntax bit. */ if ((re_opcode_t) b[-2] == charset_not @@ -3065,6 +3022,13 @@ regex_compile (pattern, size, syntax, bufp) don't need to handle them for multibyte. They are distinguished by a negative wctype. */ + /* Setup the gl_state object to its buffer-defined + value. This hardcodes the buffer-global + syntax-table for ASCII chars, while the other chars + will obey syntax-table properties. It's not ideal, + but it's the way it's been done until now. */ + SETUP_BUFFER_SYNTAX_TABLE (); + for (ch = 0; ch < 256; ++ch) { c = RE_CHAR_TO_MULTIBYTE (ch); @@ -3798,7 +3762,7 @@ regex_compile (pattern, size, syntax, bufp) if (c1 != c2 && (c1 = RE_CHAR_TO_UNIBYTE (c2)) >= 0) c = c1; - } + } *b++ = c; len = 1; } @@ -3869,10 +3833,7 @@ regex_compile (pattern, size, syntax, bufp) /* Store OP at LOC followed by two-byte integer parameter ARG. */ static void -store_op1 (op, loc, arg) - re_opcode_t op; - unsigned char *loc; - int arg; +store_op1 (re_opcode_t op, unsigned char *loc, int arg) { *loc = (unsigned char) op; STORE_NUMBER (loc + 1, arg); @@ -3882,10 +3843,7 @@ store_op1 (op, loc, arg) /* Like `store_op1', but for two two-byte parameters ARG1 and ARG2. */ static void -store_op2 (op, loc, arg1, arg2) - re_opcode_t op; - unsigned char *loc; - int arg1, arg2; +store_op2 (re_opcode_t op, unsigned char *loc, int arg1, int arg2) { *loc = (unsigned char) op; STORE_NUMBER (loc + 1, arg1); @@ -3897,11 +3855,7 @@ store_op2 (op, loc, arg1, arg2) for OP followed by two-byte integer parameter ARG. */ static void -insert_op1 (op, loc, arg, end) - re_opcode_t op; - unsigned char *loc; - int arg; - unsigned char *end; +insert_op1 (re_opcode_t op, unsigned char *loc, int arg, unsigned char *end) { register unsigned char *pfrom = end; register unsigned char *pto = end + 3; @@ -3916,11 +3870,7 @@ insert_op1 (op, loc, arg, end) /* Like `insert_op1', but for two two-byte parameters ARG1 and ARG2. */ static void -insert_op2 (op, loc, arg1, arg2, end) - re_opcode_t op; - unsigned char *loc; - int arg1, arg2; - unsigned char *end; +insert_op2 (re_opcode_t op, unsigned char *loc, int arg1, int arg2, unsigned char *end) { register unsigned char *pfrom = end; register unsigned char *pto = end + 5; @@ -3937,9 +3887,7 @@ insert_op2 (op, loc, arg1, arg2, end) least one character before the ^. */ static boolean -at_begline_loc_p (pattern, p, syntax) - re_char *pattern, *p; - reg_syntax_t syntax; +at_begline_loc_p (const re_char *pattern, const re_char *p, reg_syntax_t syntax) { re_char *prev = p - 2; boolean prev_prev_backslash = prev > pattern && prev[-1] == '\\'; @@ -3961,9 +3909,7 @@ at_begline_loc_p (pattern, p, syntax) at least one character after the $, i.e., `P < PEND'. */ static boolean -at_endline_loc_p (p, pend, syntax) - re_char *p, *pend; - reg_syntax_t syntax; +at_endline_loc_p (const re_char *p, const re_char *pend, reg_syntax_t syntax) { re_char *next = p; boolean next_backslash = *next == '\\'; @@ -3983,9 +3929,7 @@ at_endline_loc_p (p, pend, syntax) false if it's not. */ static boolean -group_in_compile_stack (compile_stack, regnum) - compile_stack_type compile_stack; - regnum_t regnum; +group_in_compile_stack (compile_stack_type compile_stack, regnum_t regnum) { int this_element; @@ -4009,10 +3953,7 @@ group_in_compile_stack (compile_stack, regnum) Return -1 if fastmap was not updated accurately. */ static int -analyse_first (p, pend, fastmap, multibyte) - re_char *p, *pend; - char *fastmap; - const int multibyte; +analyse_first (const re_char *p, const re_char *pend, char *fastmap, const int multibyte) { int j, k; boolean not; @@ -4054,7 +3995,6 @@ analyse_first (p, pend, fastmap, multibyte) { case succeed: return 1; - continue; case duplicate: /* If the first character has to match a backreference, that means @@ -4083,8 +4023,7 @@ analyse_first (p, pend, fastmap, multibyte) the corresponding multibyte character. */ int c = RE_CHAR_TO_MULTIBYTE (p[1]); - if (! CHAR_BYTE8_P (c)) - fastmap[CHAR_LEADING_CODE (c)] = 1; + fastmap[CHAR_LEADING_CODE (c)] = 1; } } break; @@ -4119,7 +4058,7 @@ analyse_first (p, pend, fastmap, multibyte) if (/* Any leading code can possibly start a character which doesn't match the specified set of characters. */ not - || + || /* If we can match a character class, we can match any multibyte characters. */ (CHARSET_RANGE_TABLE_EXISTS_P (&p[-2]) @@ -4139,7 +4078,7 @@ analyse_first (p, pend, fastmap, multibyte) && match_any_multibyte_characters == false) { /* Set fastmap[I] to 1 where I is a leading code of each - multibyte characer in the range table. */ + multibyte character in the range table. */ int c, count; unsigned char lc1, lc2; @@ -4325,15 +4264,14 @@ analyse_first (p, pend, fastmap, multibyte) Returns 0 if we succeed, -2 if an internal error. */ int -re_compile_fastmap (bufp) - struct re_pattern_buffer *bufp; +re_compile_fastmap (struct re_pattern_buffer *bufp) { char *fastmap = bufp->fastmap; int analysis; assert (fastmap && bufp->buffer); - bzero (fastmap, 1 << BYTEWIDTH); /* Assume nothing's valid. */ + memset (fastmap, 0, 1 << BYTEWIDTH); /* Assume nothing's valid. */ bufp->fastmap_accurate = 1; /* It will be when we're done. */ analysis = analyse_first (bufp->buffer, bufp->buffer + bufp->used, @@ -4356,11 +4294,7 @@ re_compile_fastmap (bufp) freeing the old data. */ void -re_set_registers (bufp, regs, num_regs, starts, ends) - struct re_pattern_buffer *bufp; - struct re_registers *regs; - unsigned num_regs; - regoff_t *starts, *ends; +re_set_registers (struct re_pattern_buffer *bufp, struct re_registers *regs, unsigned int num_regs, regoff_t *starts, regoff_t *ends) { if (num_regs) { @@ -4384,11 +4318,7 @@ WEAK_ALIAS (__re_set_registers, re_set_registers) doesn't let you say where to stop matching. */ int -re_search (bufp, string, size, startpos, range, regs) - struct re_pattern_buffer *bufp; - const char *string; - int size, startpos, range; - struct re_registers *regs; +re_search (struct re_pattern_buffer *bufp, const char *string, int size, int startpos, int range, struct re_registers *regs) { return re_search_2 (bufp, NULL, 0, string, size, startpos, range, regs, size); @@ -4429,14 +4359,7 @@ WEAK_ALIAS (__re_search, re_search) stack overflow). */ int -re_search_2 (bufp, str1, size1, str2, size2, startpos, range, regs, stop) - struct re_pattern_buffer *bufp; - const char *str1, *str2; - int size1, size2; - int startpos; - int range; - struct re_registers *regs; - int stop; +re_search_2 (struct re_pattern_buffer *bufp, const char *str1, int size1, const char *str2, int size2, int startpos, int range, struct re_registers *regs, int stop) { int val; re_char *string1 = (re_char*) str1; @@ -4490,7 +4413,7 @@ re_search_2 (bufp, str1, size1, str2, size2, startpos, range, regs, stop) anchored_start = (bufp->buffer[0] == begline); #ifdef emacs - gl_state.object = re_match_object; + gl_state.object = re_match_object; /* Used by SYNTAX_TABLE_BYTE_TO_CHAR. */ { int charpos = SYNTAX_TABLE_BYTE_TO_CHAR (POS_AS_IN_BUFFER (startpos)); @@ -4637,7 +4560,7 @@ re_search_2 (bufp, str1, size1, str2, size2, startpos, range, regs, stop) { re_char *p = POS_ADDR_VSTRING (startpos); re_char *pend = STOP_ADDR_VSTRING (startpos); - int len = MULTIBYTE_FORM_LENGTH (p, pend - p); + int len = BYTES_BY_CHAR_HEAD (*p); range -= len; if (range < 0) @@ -4770,8 +4693,7 @@ static int bcmp_translate _RE_ARGS((re_char *s1, re_char *s2, /* If the operation is a match against one or more chars, return a pointer to the next operation, else return NULL. */ static re_char * -skip_one_char (p) - re_char *p; +skip_one_char (const re_char *p) { switch (SWITCH_ENUM_CAST (*p++)) { @@ -4813,8 +4735,7 @@ skip_one_char (p) /* Jump over non-matching operations. */ static re_char * -skip_noops (p, pend) - re_char *p, *pend; +skip_noops (const re_char *p, const re_char *pend) { int mcnt; while (p < pend) @@ -4841,9 +4762,7 @@ skip_noops (p, pend) /* Non-zero if "p1 matches something" implies "p2 fails". */ static int -mutually_exclusive_p (bufp, p1, p2) - struct re_pattern_buffer *bufp; - re_char *p1, *p2; +mutually_exclusive_p (struct re_pattern_buffer *bufp, const re_char *p1, const re_char *p2) { re_opcode_t op2; const boolean multibyte = RE_MULTIBYTE_P (bufp); @@ -5048,11 +4967,8 @@ mutually_exclusive_p (bufp, p1, p2) /* re_match is like re_match_2 except it takes only a single string. */ int -re_match (bufp, string, size, pos, regs) - struct re_pattern_buffer *bufp; - const char *string; - int size, pos; - struct re_registers *regs; +re_match (struct re_pattern_buffer *bufp, const char *string, + int size, int pos, struct re_registers *regs) { int result = re_match_2_internal (bufp, NULL, 0, (re_char*) string, size, pos, regs, size); @@ -5081,19 +4997,13 @@ Lisp_Object re_match_object; matched substring. */ int -re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop) - struct re_pattern_buffer *bufp; - const char *string1, *string2; - int size1, size2; - int pos; - struct re_registers *regs; - int stop; +re_match_2 (struct re_pattern_buffer *bufp, const char *string1, int size1, const char *string2, int size2, int pos, struct re_registers *regs, int stop) { int result; #ifdef emacs int charpos; - gl_state.object = re_match_object; + gl_state.object = re_match_object; /* Used by SYNTAX_TABLE_BYTE_TO_CHAR. */ charpos = SYNTAX_TABLE_BYTE_TO_CHAR (POS_AS_IN_BUFFER (pos)); SETUP_SYNTAX_TABLE_FOR_OBJECT (re_match_object, charpos, 1); #endif @@ -5109,13 +5019,7 @@ WEAK_ALIAS (__re_match_2, re_match_2) /* This is a separate function so that we can force an alloca cleanup afterwards. */ static int -re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop) - struct re_pattern_buffer *bufp; - re_char *string1, *string2; - int size1, size2; - int pos; - struct re_registers *regs; - int stop; +re_match_2_internal (struct re_pattern_buffer *bufp, const re_char *string1, int size1, const re_char *string2, int size2, int pos, struct re_registers *regs, int stop) { /* General temporaries. */ int mcnt; @@ -6446,11 +6350,8 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop) bytes; nonzero otherwise. */ static int -bcmp_translate (s1, s2, len, translate, target_multibyte) - re_char *s1, *s2; - register int len; - RE_TRANSLATE_TYPE translate; - const int target_multibyte; +bcmp_translate (const re_char *s1, const re_char *s2, register int len, + RE_TRANSLATE_TYPE translate, const int target_multibyte) { register re_char *p1 = s1, *p2 = s2; re_char *p1_end = s1 + len; @@ -6491,17 +6392,10 @@ bcmp_translate (s1, s2, len, translate, target_multibyte) We call regex_compile to do the actual compilation. */ const char * -re_compile_pattern (pattern, length, bufp) - const char *pattern; - size_t length; - struct re_pattern_buffer *bufp; +re_compile_pattern (const char *pattern, size_t length, struct re_pattern_buffer *bufp) { reg_errcode_t ret; -#ifdef emacs - gl_state.current_syntax_table = current_buffer->syntax_table; -#endif - /* GNU code is written to assume at least RE_NREGS registers will be set (and at least one extra will be -1). */ bufp->regs_allocated = REGS_UNALLOCATED; @@ -6626,10 +6520,8 @@ re_exec (s) the return codes and their meanings.) */ int -regcomp (preg, pattern, cflags) - regex_t *__restrict preg; - const char *__restrict pattern; - int cflags; +regcomp (regex_t *__restrict preg, const char *__restrict pattern, + int cflags) { reg_errcode_t ret; reg_syntax_t syntax @@ -6711,12 +6603,8 @@ WEAK_ALIAS (__regcomp, regcomp) We return 0 if we find a match and REG_NOMATCH if not. */ int -regexec (preg, string, nmatch, pmatch, eflags) - const regex_t *__restrict preg; - const char *__restrict string; - size_t nmatch; - regmatch_t pmatch[__restrict_arr]; - int eflags; +regexec (const regex_t *__restrict preg, const char *__restrict string, + size_t nmatch, regmatch_t pmatch[__restrict_arr], int eflags) { int ret; struct re_registers regs; @@ -6788,11 +6676,7 @@ WEAK_ALIAS (__regexec, regexec) error with msvc8 compiler. */ size_t -regerror (err_code, preg, errbuf, errbuf_size) - int err_code; - const regex_t *preg; - char *errbuf; - size_t errbuf_size; +regerror (int err_code, const regex_t *preg, char *errbuf, size_t errbuf_size) { const char *msg; size_t msg_size; @@ -6828,8 +6712,7 @@ WEAK_ALIAS (__regerror, regerror) /* Free dynamically allocated space used by PREG. */ void -regfree (preg) - regex_t *preg; +regfree (regex_t *preg) { free (preg->buffer); preg->buffer = NULL; @@ -6847,6 +6730,3 @@ regfree (preg) WEAK_ALIAS (__regfree, regfree) #endif /* not emacs */ - -/* arch-tag: 4ffd68ba-2a9e-435b-a21a-018990f9eeb2 - (do not change this comment) */