Fix minor problems found by static checking.
[bpt/emacs.git] / src / regex.c
CommitLineData
e318085a 1/* Extended regular expression matching and search library, version
0b32bf0e 2 0.12. (Implements POSIX draft P1003.2/D11.2, except for some of the
bc78d348
KB
3 internationalization features.)
4
acaf905b 5 Copyright (C) 1993-2012 Free Software Foundation, Inc.
bc78d348 6
fa9a63c5
RM
7 This program is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
e468b87f 9 the Free Software Foundation; either version 3, or (at your option)
fa9a63c5
RM
10 any later version.
11
12 This program is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
7814e705 14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
fa9a63c5
RM
15 GNU General Public License for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with this program; if not, write to the Free Software
4fc5845f 19 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301,
7814e705 20 USA. */
fa9a63c5 21
6df42991 22/* TODO:
505bde11 23 - structure the opcode space into opcode+flag.
dc1e502d 24 - merge with glibc's regex.[ch].
01618498 25 - replace (succeed_n + jump_n + set_number_at) with something that doesn't
6dcf2d0e
SM
26 need to modify the compiled regexp so that re_match can be reentrant.
27 - get rid of on_failure_jump_smart by doing the optimization in re_comp
28 rather than at run-time, so that re_match can be reentrant.
01618498 29*/
505bde11 30
fa9a63c5 31/* AIX requires this to be the first thing in the file. */
0b32bf0e 32#if defined _AIX && !defined REGEX_MALLOC
fa9a63c5
RM
33 #pragma alloca
34#endif
35
b8df54ff
PE
36/* Ignore some GCC warnings for now. This section should go away
37 once the Emacs and Gnulib regex code is merged. */
63807d47 38#if (__GNUC__ == 4 && 5 <= __GNUC_MINOR__) || 4 < __GNUC__
b8df54ff
PE
39# pragma GCC diagnostic ignored "-Wstrict-overflow"
40# ifndef emacs
41# pragma GCC diagnostic ignored "-Wunused-but-set-variable"
42# pragma GCC diagnostic ignored "-Wunused-function"
43# pragma GCC diagnostic ignored "-Wunused-macros"
44# pragma GCC diagnostic ignored "-Wunused-result"
45# pragma GCC diagnostic ignored "-Wunused-variable"
46# endif
47#endif
48
cf38a720 49#include <config.h>
fa9a63c5 50
0e926e56
PE
51#include <stddef.h>
52
53#ifdef emacs
4bb91c68
SM
54/* We need this for `regex.h', and perhaps for the Emacs include files. */
55# include <sys/types.h>
56#endif
fa9a63c5 57
14473664
SM
58/* Whether to use ISO C Amendment 1 wide char functions.
59 Those should not be used for Emacs since it uses its own. */
5e5388f6
GM
60#if defined _LIBC
61#define WIDE_CHAR_SUPPORT 1
62#else
14473664 63#define WIDE_CHAR_SUPPORT \
5e5388f6
GM
64 (HAVE_WCTYPE_H && HAVE_WCHAR_H && HAVE_BTOWC && !emacs)
65#endif
14473664 66
fa463103 67/* For platform which support the ISO C amendment 1 functionality we
14473664 68 support user defined character classes. */
a0ad02f7 69#if WIDE_CHAR_SUPPORT
14473664
SM
70/* Solaris 2.5 has a bug: <wchar.h> must be included before <wctype.h>. */
71# include <wchar.h>
72# include <wctype.h>
73#endif
74
c0f9ea08
SM
75#ifdef _LIBC
76/* We have to keep the namespace clean. */
77# define regfree(preg) __regfree (preg)
78# define regexec(pr, st, nm, pm, ef) __regexec (pr, st, nm, pm, ef)
79# define regcomp(preg, pattern, cflags) __regcomp (preg, pattern, cflags)
ec869672 80# define regerror(err_code, preg, errbuf, errbuf_size) \
5e617bc2 81 __regerror (err_code, preg, errbuf, errbuf_size)
c0f9ea08
SM
82# define re_set_registers(bu, re, nu, st, en) \
83 __re_set_registers (bu, re, nu, st, en)
84# define re_match_2(bufp, string1, size1, string2, size2, pos, regs, stop) \
85 __re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop)
86# define re_match(bufp, string, size, pos, regs) \
87 __re_match (bufp, string, size, pos, regs)
88# define re_search(bufp, string, size, startpos, range, regs) \
89 __re_search (bufp, string, size, startpos, range, regs)
90# define re_compile_pattern(pattern, length, bufp) \
91 __re_compile_pattern (pattern, length, bufp)
92# define re_set_syntax(syntax) __re_set_syntax (syntax)
93# define re_search_2(bufp, st1, s1, st2, s2, startpos, range, regs, stop) \
94 __re_search_2 (bufp, st1, s1, st2, s2, startpos, range, regs, stop)
95# define re_compile_fastmap(bufp) __re_compile_fastmap (bufp)
96
14473664
SM
97/* Make sure we call libc's function even if the user overrides them. */
98# define btowc __btowc
99# define iswctype __iswctype
100# define wctype __wctype
101
c0f9ea08
SM
102# define WEAK_ALIAS(a,b) weak_alias (a, b)
103
104/* We are also using some library internals. */
105# include <locale/localeinfo.h>
106# include <locale/elem-hash.h>
107# include <langinfo.h>
108#else
109# define WEAK_ALIAS(a,b)
110#endif
111
4bb91c68 112/* This is for other GNU distributions with internationalized messages. */
0b32bf0e 113#if HAVE_LIBINTL_H || defined _LIBC
fa9a63c5
RM
114# include <libintl.h>
115#else
116# define gettext(msgid) (msgid)
117#endif
118
5e69f11e
RM
119#ifndef gettext_noop
120/* This define is so xgettext can find the internationalizable
121 strings. */
0b32bf0e 122# define gettext_noop(String) String
5e69f11e
RM
123#endif
124
fa9a63c5
RM
125/* The `emacs' switch turns on certain matching commands
126 that make sense only in Emacs. */
127#ifdef emacs
128
d7306fe6 129# include <setjmp.h>
0b32bf0e 130# include "lisp.h"
e5560ff7 131# include "character.h"
0b32bf0e 132# include "buffer.h"
b18215fc
RS
133
134/* Make syntax table lookup grant data in gl_state. */
0b32bf0e 135# define SYNTAX_ENTRY_VIA_PROPERTY
b18215fc 136
0b32bf0e 137# include "syntax.h"
0b32bf0e 138# include "category.h"
fa9a63c5 139
7689ef0b
EZ
140# ifdef malloc
141# undef malloc
142# endif
0b32bf0e 143# define malloc xmalloc
7689ef0b
EZ
144# ifdef realloc
145# undef realloc
146# endif
0b32bf0e 147# define realloc xrealloc
7689ef0b
EZ
148# ifdef free
149# undef free
150# endif
0b32bf0e 151# define free xfree
9abbd165 152
7814e705 153/* Converts the pointer to the char to BEG-based offset from the start. */
0b32bf0e
SM
154# define PTR_TO_OFFSET(d) POS_AS_IN_BUFFER (POINTER_TO_OFFSET (d))
155# define POS_AS_IN_BUFFER(p) ((p) + (NILP (re_match_object) || BUFFERP (re_match_object)))
156
157# define RE_MULTIBYTE_P(bufp) ((bufp)->multibyte)
bf216479 158# define RE_TARGET_MULTIBYTE_P(bufp) ((bufp)->target_multibyte)
62a6e103
AS
159# define RE_STRING_CHAR(p, multibyte) \
160 (multibyte ? (STRING_CHAR (p)) : (*(p)))
161# define RE_STRING_CHAR_AND_LENGTH(p, len, multibyte) \
162 (multibyte ? (STRING_CHAR_AND_LENGTH (p, len)) : ((len) = 1, *(p)))
2d1675e4 163
4c0354d7 164# define RE_CHAR_TO_MULTIBYTE(c) UNIBYTE_TO_CHAR (c)
cf9c99bc 165
2afc21f5 166# define RE_CHAR_TO_UNIBYTE(c) CHAR_TO_BYTE_SAFE (c)
cf9c99bc 167
6fdd04b0
KH
168/* Set C a (possibly converted to multibyte) character before P. P
169 points into a string which is the virtual concatenation of STR1
170 (which ends at END1) or STR2 (which ends at END2). */
bf216479
KH
171# define GET_CHAR_BEFORE_2(c, p, str1, end1, str2, end2) \
172 do { \
02cb78b5 173 if (target_multibyte) \
bf216479
KH
174 { \
175 re_char *dtemp = (p) == (str2) ? (end1) : (p); \
176 re_char *dlimit = ((p) > (str2) && (p) <= (end2)) ? (str2) : (str1); \
177 while (dtemp-- > dlimit && !CHAR_HEAD_P (*dtemp)); \
62a6e103 178 c = STRING_CHAR (dtemp); \
bf216479
KH
179 } \
180 else \
181 { \
182 (c = ((p) == (str2) ? (end1) : (p))[-1]); \
cf9c99bc 183 (c) = RE_CHAR_TO_MULTIBYTE (c); \
bf216479 184 } \
2d1675e4
SM
185 } while (0)
186
6fdd04b0
KH
187/* Set C a (possibly converted to multibyte) character at P, and set
188 LEN to the byte length of that character. */
189# define GET_CHAR_AFTER(c, p, len) \
190 do { \
02cb78b5 191 if (target_multibyte) \
62a6e103 192 (c) = STRING_CHAR_AND_LENGTH (p, len); \
6fdd04b0
KH
193 else \
194 { \
cf9c99bc 195 (c) = *p; \
6fdd04b0 196 len = 1; \
cf9c99bc 197 (c) = RE_CHAR_TO_MULTIBYTE (c); \
6fdd04b0 198 } \
8f924df7 199 } while (0)
4e8a9132 200
fa9a63c5
RM
201#else /* not emacs */
202
203/* If we are not linking with Emacs proper,
204 we can't use the relocating allocator
205 even if config.h says that we can. */
0b32bf0e 206# undef REL_ALLOC
fa9a63c5 207
4004364e 208# include <unistd.h>
fa9a63c5 209
a77f947b
CY
210/* When used in Emacs's lib-src, we need xmalloc and xrealloc. */
211
b8df54ff 212static void *
d2762c86 213xmalloc (size_t size)
a77f947b 214{
38182d90 215 void *val = malloc (size);
a77f947b
CY
216 if (!val && size)
217 {
218 write (2, "virtual memory exhausted\n", 25);
219 exit (1);
220 }
221 return val;
222}
223
b8df54ff 224static void *
d2762c86 225xrealloc (void *block, size_t size)
a77f947b 226{
38182d90 227 void *val;
a77f947b
CY
228 /* We must call malloc explicitly when BLOCK is 0, since some
229 reallocs don't do this. */
230 if (! block)
38182d90 231 val = malloc (size);
a77f947b 232 else
38182d90 233 val = realloc (block, size);
a77f947b
CY
234 if (!val && size)
235 {
236 write (2, "virtual memory exhausted\n", 25);
237 exit (1);
238 }
239 return val;
240}
241
a073faa6
CY
242# ifdef malloc
243# undef malloc
244# endif
245# define malloc xmalloc
246# ifdef realloc
247# undef realloc
248# endif
249# define realloc xrealloc
250
f5d9e83a 251# include <stdbool.h>
9cfdb3ec 252# include <string.h>
fa9a63c5
RM
253
254/* Define the syntax stuff for \<, \>, etc. */
255
990b2375 256/* Sword must be nonzero for the wordchar pattern commands in re_match_2. */
669fa600 257enum syntaxcode { Swhitespace = 0, Sword = 1, Ssymbol = 2 };
fa9a63c5 258
e934739e 259/* Dummy macros for non-Emacs environments. */
0b32bf0e
SM
260# define CHAR_CHARSET(c) 0
261# define CHARSET_LEADING_CODE_BASE(c) 0
262# define MAX_MULTIBYTE_LENGTH 1
263# define RE_MULTIBYTE_P(x) 0
bf216479 264# define RE_TARGET_MULTIBYTE_P(x) 0
0b32bf0e
SM
265# define WORD_BOUNDARY_P(c1, c2) (0)
266# define CHAR_HEAD_P(p) (1)
267# define SINGLE_BYTE_CHAR_P(c) (1)
268# define SAME_CHARSET_P(c1, c2) (1)
aa3830c4 269# define BYTES_BY_CHAR_HEAD(p) (1)
70806df6 270# define PREV_CHAR_BOUNDARY(p, limit) ((p)--)
62a6e103
AS
271# define STRING_CHAR(p) (*(p))
272# define RE_STRING_CHAR(p, multibyte) STRING_CHAR (p)
0b32bf0e 273# define CHAR_STRING(c, s) (*(s) = (c), 1)
62a6e103
AS
274# define STRING_CHAR_AND_LENGTH(p, actual_len) ((actual_len) = 1, *(p))
275# define RE_STRING_CHAR_AND_LENGTH(p, len, multibyte) STRING_CHAR_AND_LENGTH (p, len)
cf9c99bc
KH
276# define RE_CHAR_TO_MULTIBYTE(c) (c)
277# define RE_CHAR_TO_UNIBYTE(c) (c)
0b32bf0e 278# define GET_CHAR_BEFORE_2(c, p, str1, end1, str2, end2) \
b18215fc 279 (c = ((p) == (str2) ? *((end1) - 1) : *((p) - 1)))
6fdd04b0
KH
280# define GET_CHAR_AFTER(c, p, len) \
281 (c = *p, len = 1)
0b32bf0e 282# define MAKE_CHAR(charset, c1, c2) (c1)
9117d724
KH
283# define BYTE8_TO_CHAR(c) (c)
284# define CHAR_BYTE8_P(c) (0)
bf216479 285# define CHAR_LEADING_CODE(c) (c)
8f924df7 286
fa9a63c5 287#endif /* not emacs */
4e8a9132
SM
288
289#ifndef RE_TRANSLATE
0b32bf0e
SM
290# define RE_TRANSLATE(TBL, C) ((unsigned char)(TBL)[C])
291# define RE_TRANSLATE_P(TBL) (TBL)
4e8a9132 292#endif
fa9a63c5
RM
293\f
294/* Get the interface, including the syntax bits. */
295#include "regex.h"
296
f71b19b6
DL
297/* isalpha etc. are used for the character classes. */
298#include <ctype.h>
fa9a63c5 299
f71b19b6 300#ifdef emacs
fa9a63c5 301
f71b19b6 302/* 1 if C is an ASCII character. */
0b32bf0e 303# define IS_REAL_ASCII(c) ((c) < 0200)
fa9a63c5 304
f71b19b6 305/* 1 if C is a unibyte character. */
0b32bf0e 306# define ISUNIBYTE(c) (SINGLE_BYTE_CHAR_P ((c)))
96cc36cc 307
f71b19b6 308/* The Emacs definitions should not be directly affected by locales. */
96cc36cc 309
f71b19b6 310/* In Emacs, these are only used for single-byte characters. */
0b32bf0e
SM
311# define ISDIGIT(c) ((c) >= '0' && (c) <= '9')
312# define ISCNTRL(c) ((c) < ' ')
313# define ISXDIGIT(c) (((c) >= '0' && (c) <= '9') \
f71b19b6
DL
314 || ((c) >= 'a' && (c) <= 'f') \
315 || ((c) >= 'A' && (c) <= 'F'))
96cc36cc
RS
316
317/* This is only used for single-byte characters. */
0b32bf0e 318# define ISBLANK(c) ((c) == ' ' || (c) == '\t')
96cc36cc
RS
319
320/* The rest must handle multibyte characters. */
321
0b32bf0e 322# define ISGRAPH(c) (SINGLE_BYTE_CHAR_P (c) \
f71b19b6 323 ? (c) > ' ' && !((c) >= 0177 && (c) <= 0237) \
96cc36cc
RS
324 : 1)
325
14473664 326# define ISPRINT(c) (SINGLE_BYTE_CHAR_P (c) \
f71b19b6 327 ? (c) >= ' ' && !((c) >= 0177 && (c) <= 0237) \
96cc36cc
RS
328 : 1)
329
0b32bf0e 330# define ISALNUM(c) (IS_REAL_ASCII (c) \
f71b19b6
DL
331 ? (((c) >= 'a' && (c) <= 'z') \
332 || ((c) >= 'A' && (c) <= 'Z') \
333 || ((c) >= '0' && (c) <= '9')) \
96cc36cc
RS
334 : SYNTAX (c) == Sword)
335
0b32bf0e 336# define ISALPHA(c) (IS_REAL_ASCII (c) \
f71b19b6
DL
337 ? (((c) >= 'a' && (c) <= 'z') \
338 || ((c) >= 'A' && (c) <= 'Z')) \
96cc36cc
RS
339 : SYNTAX (c) == Sword)
340
5da9919f 341# define ISLOWER(c) lowercasep (c)
96cc36cc 342
0b32bf0e 343# define ISPUNCT(c) (IS_REAL_ASCII (c) \
f71b19b6
DL
344 ? ((c) > ' ' && (c) < 0177 \
345 && !(((c) >= 'a' && (c) <= 'z') \
4bb91c68
SM
346 || ((c) >= 'A' && (c) <= 'Z') \
347 || ((c) >= '0' && (c) <= '9'))) \
96cc36cc
RS
348 : SYNTAX (c) != Sword)
349
0b32bf0e 350# define ISSPACE(c) (SYNTAX (c) == Swhitespace)
96cc36cc 351
5da9919f 352# define ISUPPER(c) uppercasep (c)
96cc36cc 353
0b32bf0e 354# define ISWORD(c) (SYNTAX (c) == Sword)
96cc36cc
RS
355
356#else /* not emacs */
357
f71b19b6 358/* 1 if C is an ASCII character. */
0b32bf0e 359# define IS_REAL_ASCII(c) ((c) < 0200)
f71b19b6
DL
360
361/* This distinction is not meaningful, except in Emacs. */
0b32bf0e
SM
362# define ISUNIBYTE(c) 1
363
364# ifdef isblank
0e926e56 365# define ISBLANK(c) isblank (c)
0b32bf0e
SM
366# else
367# define ISBLANK(c) ((c) == ' ' || (c) == '\t')
368# endif
369# ifdef isgraph
0e926e56 370# define ISGRAPH(c) isgraph (c)
0b32bf0e 371# else
0e926e56 372# define ISGRAPH(c) (isprint (c) && !isspace (c))
0b32bf0e
SM
373# endif
374
0e926e56 375/* Solaris defines ISPRINT so we must undefine it first. */
4bb91c68 376# undef ISPRINT
0e926e56
PE
377# define ISPRINT(c) isprint (c)
378# define ISDIGIT(c) isdigit (c)
379# define ISALNUM(c) isalnum (c)
380# define ISALPHA(c) isalpha (c)
381# define ISCNTRL(c) iscntrl (c)
382# define ISLOWER(c) islower (c)
383# define ISPUNCT(c) ispunct (c)
384# define ISSPACE(c) isspace (c)
385# define ISUPPER(c) isupper (c)
386# define ISXDIGIT(c) isxdigit (c)
0b32bf0e 387
5e617bc2 388# define ISWORD(c) ISALPHA (c)
0b32bf0e 389
4bb91c68 390# ifdef _tolower
5e617bc2 391# define TOLOWER(c) _tolower (c)
4bb91c68 392# else
5e617bc2 393# define TOLOWER(c) tolower (c)
4bb91c68
SM
394# endif
395
396/* How many characters in the character set. */
397# define CHAR_SET_SIZE 256
398
0b32bf0e 399# ifdef SYNTAX_TABLE
f71b19b6 400
0b32bf0e 401extern char *re_syntax_table;
f71b19b6 402
0b32bf0e
SM
403# else /* not SYNTAX_TABLE */
404
0b32bf0e
SM
405static char re_syntax_table[CHAR_SET_SIZE];
406
407static void
d2762c86 408init_syntax_once (void)
0b32bf0e
SM
409{
410 register int c;
411 static int done = 0;
412
413 if (done)
414 return;
415
72af86bd 416 memset (re_syntax_table, 0, sizeof re_syntax_table);
0b32bf0e 417
4bb91c68
SM
418 for (c = 0; c < CHAR_SET_SIZE; ++c)
419 if (ISALNUM (c))
420 re_syntax_table[c] = Sword;
fa9a63c5 421
669fa600 422 re_syntax_table['_'] = Ssymbol;
fa9a63c5 423
0b32bf0e
SM
424 done = 1;
425}
426
427# endif /* not SYNTAX_TABLE */
96cc36cc 428
4bb91c68
SM
429# define SYNTAX(c) re_syntax_table[(c)]
430
96cc36cc
RS
431#endif /* not emacs */
432\f
261cb4bb 433#define SIGN_EXTEND_CHAR(c) ((signed char) (c))
fa9a63c5
RM
434\f
435/* Should we use malloc or alloca? If REGEX_MALLOC is not defined, we
436 use `alloca' instead of `malloc'. This is because using malloc in
437 re_search* or re_match* could cause memory leaks when C-g is used in
438 Emacs; also, malloc is slower and causes storage fragmentation. On
5e69f11e
RM
439 the other hand, malloc is more portable, and easier to debug.
440
fa9a63c5
RM
441 Because we sometimes use alloca, some routines have to be macros,
442 not functions -- `alloca'-allocated space disappears at the end of the
443 function it is called in. */
444
445#ifdef REGEX_MALLOC
446
0b32bf0e
SM
447# define REGEX_ALLOCATE malloc
448# define REGEX_REALLOCATE(source, osize, nsize) realloc (source, nsize)
449# define REGEX_FREE free
fa9a63c5
RM
450
451#else /* not REGEX_MALLOC */
452
453/* Emacs already defines alloca, sometimes. */
0b32bf0e 454# ifndef alloca
fa9a63c5
RM
455
456/* Make alloca work the best possible way. */
0b32bf0e
SM
457# ifdef __GNUC__
458# define alloca __builtin_alloca
459# else /* not __GNUC__ */
7f585e7a 460# ifdef HAVE_ALLOCA_H
0b32bf0e
SM
461# include <alloca.h>
462# endif /* HAVE_ALLOCA_H */
463# endif /* not __GNUC__ */
fa9a63c5 464
0b32bf0e 465# endif /* not alloca */
fa9a63c5 466
0b32bf0e 467# define REGEX_ALLOCATE alloca
fa9a63c5
RM
468
469/* Assumes a `char *destination' variable. */
0b32bf0e 470# define REGEX_REALLOCATE(source, osize, nsize) \
fa9a63c5 471 (destination = (char *) alloca (nsize), \
4bb91c68 472 memcpy (destination, source, osize))
fa9a63c5
RM
473
474/* No need to do anything to free, after alloca. */
0b32bf0e 475# define REGEX_FREE(arg) ((void)0) /* Do nothing! But inhibit gcc warning. */
fa9a63c5
RM
476
477#endif /* not REGEX_MALLOC */
478
479/* Define how to allocate the failure stack. */
480
0b32bf0e 481#if defined REL_ALLOC && defined REGEX_MALLOC
4297555e 482
0b32bf0e 483# define REGEX_ALLOCATE_STACK(size) \
fa9a63c5 484 r_alloc (&failure_stack_ptr, (size))
0b32bf0e 485# define REGEX_REALLOCATE_STACK(source, osize, nsize) \
fa9a63c5 486 r_re_alloc (&failure_stack_ptr, (nsize))
0b32bf0e 487# define REGEX_FREE_STACK(ptr) \
fa9a63c5
RM
488 r_alloc_free (&failure_stack_ptr)
489
4297555e 490#else /* not using relocating allocator */
fa9a63c5 491
0b32bf0e 492# ifdef REGEX_MALLOC
fa9a63c5 493
0b32bf0e
SM
494# define REGEX_ALLOCATE_STACK malloc
495# define REGEX_REALLOCATE_STACK(source, osize, nsize) realloc (source, nsize)
496# define REGEX_FREE_STACK free
fa9a63c5 497
0b32bf0e 498# else /* not REGEX_MALLOC */
fa9a63c5 499
0b32bf0e 500# define REGEX_ALLOCATE_STACK alloca
fa9a63c5 501
0b32bf0e 502# define REGEX_REALLOCATE_STACK(source, osize, nsize) \
fa9a63c5 503 REGEX_REALLOCATE (source, osize, nsize)
7814e705 504/* No need to explicitly free anything. */
0b32bf0e 505# define REGEX_FREE_STACK(arg) ((void)0)
fa9a63c5 506
0b32bf0e 507# endif /* not REGEX_MALLOC */
4297555e 508#endif /* not using relocating allocator */
fa9a63c5
RM
509
510
511/* True if `size1' is non-NULL and PTR is pointing anywhere inside
512 `string1' or just past its end. This works if PTR is NULL, which is
513 a good thing. */
25fe55af 514#define FIRST_STRING_P(ptr) \
fa9a63c5
RM
515 (size1 && string1 <= (ptr) && (ptr) <= string1 + size1)
516
517/* (Re)Allocate N items of type T using malloc, or fail. */
518#define TALLOC(n, t) ((t *) malloc ((n) * sizeof (t)))
519#define RETALLOC(addr, n, t) ((addr) = (t *) realloc (addr, (n) * sizeof (t)))
fa9a63c5
RM
520#define REGEX_TALLOC(n, t) ((t *) REGEX_ALLOCATE ((n) * sizeof (t)))
521
4bb91c68 522#define BYTEWIDTH 8 /* In bits. */
fa9a63c5
RM
523
524#define STREQ(s1, s2) ((strcmp (s1, s2) == 0))
525
526#undef MAX
527#undef MIN
528#define MAX(a, b) ((a) > (b) ? (a) : (b))
529#define MIN(a, b) ((a) < (b) ? (a) : (b))
530
66f0296e 531/* Type of source-pattern and string chars. */
a6fc3b5c
EZ
532#ifdef _MSC_VER
533typedef unsigned char re_char;
534#else
66f0296e 535typedef const unsigned char re_char;
a6fc3b5c 536#endif
66f0296e 537
fa9a63c5 538typedef char boolean;
fa9a63c5 539
261cb4bb
PE
540static regoff_t re_match_2_internal (struct re_pattern_buffer *bufp,
541 re_char *string1, size_t size1,
542 re_char *string2, size_t size2,
543 ssize_t pos,
544 struct re_registers *regs,
545 ssize_t stop);
fa9a63c5
RM
546\f
547/* These are the command codes that appear in compiled regular
4bb91c68 548 expressions. Some opcodes are followed by argument bytes. A
fa9a63c5
RM
549 command code can specify any interpretation whatsoever for its
550 arguments. Zero bytes may appear in the compiled regular expression. */
551
552typedef enum
553{
554 no_op = 0,
555
4bb91c68 556 /* Succeed right away--no more backtracking. */
fa9a63c5
RM
557 succeed,
558
25fe55af 559 /* Followed by one byte giving n, then by n literal bytes. */
fa9a63c5
RM
560 exactn,
561
25fe55af 562 /* Matches any (more or less) character. */
fa9a63c5
RM
563 anychar,
564
25fe55af
RS
565 /* Matches any one char belonging to specified set. First
566 following byte is number of bitmap bytes. Then come bytes
567 for a bitmap saying which chars are in. Bits in each byte
568 are ordered low-bit-first. A character is in the set if its
569 bit is 1. A character too large to have a bit in the map is
96cc36cc
RS
570 automatically not in the set.
571
572 If the length byte has the 0x80 bit set, then that stuff
573 is followed by a range table:
574 2 bytes of flags for character sets (low 8 bits, high 8 bits)
0b32bf0e 575 See RANGE_TABLE_WORK_BITS below.
01618498 576 2 bytes, the number of pairs that follow (upto 32767)
96cc36cc 577 pairs, each 2 multibyte characters,
0b32bf0e 578 each multibyte character represented as 3 bytes. */
fa9a63c5
RM
579 charset,
580
25fe55af 581 /* Same parameters as charset, but match any character that is
4bb91c68 582 not one of those specified. */
fa9a63c5
RM
583 charset_not,
584
25fe55af
RS
585 /* Start remembering the text that is matched, for storing in a
586 register. Followed by one byte with the register number, in
587 the range 0 to one less than the pattern buffer's re_nsub
505bde11 588 field. */
fa9a63c5
RM
589 start_memory,
590
25fe55af
RS
591 /* Stop remembering the text that is matched and store it in a
592 memory register. Followed by one byte with the register
593 number, in the range 0 to one less than `re_nsub' in the
505bde11 594 pattern buffer. */
fa9a63c5
RM
595 stop_memory,
596
25fe55af 597 /* Match a duplicate of something remembered. Followed by one
4bb91c68 598 byte containing the register number. */
fa9a63c5
RM
599 duplicate,
600
25fe55af 601 /* Fail unless at beginning of line. */
fa9a63c5
RM
602 begline,
603
4bb91c68 604 /* Fail unless at end of line. */
fa9a63c5
RM
605 endline,
606
25fe55af
RS
607 /* Succeeds if at beginning of buffer (if emacs) or at beginning
608 of string to be matched (if not). */
fa9a63c5
RM
609 begbuf,
610
25fe55af 611 /* Analogously, for end of buffer/string. */
fa9a63c5 612 endbuf,
5e69f11e 613
25fe55af 614 /* Followed by two byte relative address to which to jump. */
5e69f11e 615 jump,
fa9a63c5 616
25fe55af 617 /* Followed by two-byte relative address of place to resume at
7814e705 618 in case of failure. */
fa9a63c5 619 on_failure_jump,
5e69f11e 620
25fe55af
RS
621 /* Like on_failure_jump, but pushes a placeholder instead of the
622 current string position when executed. */
fa9a63c5 623 on_failure_keep_string_jump,
5e69f11e 624
505bde11
SM
625 /* Just like `on_failure_jump', except that it checks that we
626 don't get stuck in an infinite loop (matching an empty string
627 indefinitely). */
628 on_failure_jump_loop,
629
0683b6fa
SM
630 /* Just like `on_failure_jump_loop', except that it checks for
631 a different kind of loop (the kind that shows up with non-greedy
632 operators). This operation has to be immediately preceded
633 by a `no_op'. */
634 on_failure_jump_nastyloop,
635
0b32bf0e 636 /* A smart `on_failure_jump' used for greedy * and + operators.
c7015153 637 It analyzes the loop before which it is put and if the
505bde11 638 loop does not require backtracking, it changes itself to
4e8a9132
SM
639 `on_failure_keep_string_jump' and short-circuits the loop,
640 else it just defaults to changing itself into `on_failure_jump'.
641 It assumes that it is pointing to just past a `jump'. */
505bde11 642 on_failure_jump_smart,
fa9a63c5 643
25fe55af 644 /* Followed by two-byte relative address and two-byte number n.
ed0767d8
SM
645 After matching N times, jump to the address upon failure.
646 Does not work if N starts at 0: use on_failure_jump_loop
647 instead. */
fa9a63c5
RM
648 succeed_n,
649
25fe55af
RS
650 /* Followed by two-byte relative address, and two-byte number n.
651 Jump to the address N times, then fail. */
fa9a63c5
RM
652 jump_n,
653
25fe55af 654 /* Set the following two-byte relative address to the
7814e705 655 subsequent two-byte number. The address *includes* the two
25fe55af 656 bytes of number. */
fa9a63c5
RM
657 set_number_at,
658
fa9a63c5
RM
659 wordbeg, /* Succeeds if at word beginning. */
660 wordend, /* Succeeds if at word end. */
661
662 wordbound, /* Succeeds if at a word boundary. */
7814e705 663 notwordbound, /* Succeeds if not at a word boundary. */
fa9a63c5 664
669fa600
SM
665 symbeg, /* Succeeds if at symbol beginning. */
666 symend, /* Succeeds if at symbol end. */
667
fa9a63c5 668 /* Matches any character whose syntax is specified. Followed by
25fe55af 669 a byte which contains a syntax code, e.g., Sword. */
fa9a63c5
RM
670 syntaxspec,
671
672 /* Matches any character whose syntax is not that specified. */
1fb352e0
SM
673 notsyntaxspec
674
675#ifdef emacs
676 ,before_dot, /* Succeeds if before point. */
677 at_dot, /* Succeeds if at point. */
678 after_dot, /* Succeeds if after point. */
b18215fc
RS
679
680 /* Matches any character whose category-set contains the specified
7814e705
JB
681 category. The operator is followed by a byte which contains a
682 category code (mnemonic ASCII character). */
b18215fc
RS
683 categoryspec,
684
685 /* Matches any character whose category-set does not contain the
686 specified category. The operator is followed by a byte which
687 contains the category code (mnemonic ASCII character). */
688 notcategoryspec
fa9a63c5
RM
689#endif /* emacs */
690} re_opcode_t;
691\f
692/* Common operations on the compiled pattern. */
693
694/* Store NUMBER in two contiguous bytes starting at DESTINATION. */
695
696#define STORE_NUMBER(destination, number) \
697 do { \
698 (destination)[0] = (number) & 0377; \
699 (destination)[1] = (number) >> 8; \
700 } while (0)
701
702/* Same as STORE_NUMBER, except increment DESTINATION to
703 the byte after where the number is stored. Therefore, DESTINATION
704 must be an lvalue. */
705
706#define STORE_NUMBER_AND_INCR(destination, number) \
707 do { \
708 STORE_NUMBER (destination, number); \
709 (destination) += 2; \
710 } while (0)
711
712/* Put into DESTINATION a number stored in two contiguous bytes starting
713 at SOURCE. */
714
715#define EXTRACT_NUMBER(destination, source) \
716 do { \
717 (destination) = *(source) & 0377; \
718 (destination) += SIGN_EXTEND_CHAR (*((source) + 1)) << 8; \
719 } while (0)
720
721#ifdef DEBUG
722static void
261cb4bb 723extract_number (int *dest, re_char *source)
fa9a63c5 724{
5e69f11e 725 int temp = SIGN_EXTEND_CHAR (*(source + 1));
fa9a63c5
RM
726 *dest = *source & 0377;
727 *dest += temp << 8;
728}
729
4bb91c68 730# ifndef EXTRACT_MACROS /* To debug the macros. */
0b32bf0e
SM
731# undef EXTRACT_NUMBER
732# define EXTRACT_NUMBER(dest, src) extract_number (&dest, src)
733# endif /* not EXTRACT_MACROS */
fa9a63c5
RM
734
735#endif /* DEBUG */
736
737/* Same as EXTRACT_NUMBER, except increment SOURCE to after the number.
738 SOURCE must be an lvalue. */
739
740#define EXTRACT_NUMBER_AND_INCR(destination, source) \
741 do { \
742 EXTRACT_NUMBER (destination, source); \
25fe55af 743 (source) += 2; \
fa9a63c5
RM
744 } while (0)
745
746#ifdef DEBUG
747static void
261cb4bb 748extract_number_and_incr (int *destination, re_char **source)
5e69f11e 749{
fa9a63c5
RM
750 extract_number (destination, *source);
751 *source += 2;
752}
753
0b32bf0e
SM
754# ifndef EXTRACT_MACROS
755# undef EXTRACT_NUMBER_AND_INCR
756# define EXTRACT_NUMBER_AND_INCR(dest, src) \
fa9a63c5 757 extract_number_and_incr (&dest, &src)
0b32bf0e 758# endif /* not EXTRACT_MACROS */
fa9a63c5
RM
759
760#endif /* DEBUG */
761\f
b18215fc
RS
762/* Store a multibyte character in three contiguous bytes starting
763 DESTINATION, and increment DESTINATION to the byte after where the
7814e705 764 character is stored. Therefore, DESTINATION must be an lvalue. */
b18215fc
RS
765
766#define STORE_CHARACTER_AND_INCR(destination, character) \
767 do { \
768 (destination)[0] = (character) & 0377; \
769 (destination)[1] = ((character) >> 8) & 0377; \
770 (destination)[2] = (character) >> 16; \
771 (destination) += 3; \
772 } while (0)
773
774/* Put into DESTINATION a character stored in three contiguous bytes
7814e705 775 starting at SOURCE. */
b18215fc
RS
776
777#define EXTRACT_CHARACTER(destination, source) \
778 do { \
779 (destination) = ((source)[0] \
780 | ((source)[1] << 8) \
781 | ((source)[2] << 16)); \
782 } while (0)
783
784
785/* Macros for charset. */
786
787/* Size of bitmap of charset P in bytes. P is a start of charset,
788 i.e. *P is (re_opcode_t) charset or (re_opcode_t) charset_not. */
789#define CHARSET_BITMAP_SIZE(p) ((p)[1] & 0x7F)
790
791/* Nonzero if charset P has range table. */
25fe55af 792#define CHARSET_RANGE_TABLE_EXISTS_P(p) ((p)[1] & 0x80)
b18215fc
RS
793
794/* Return the address of range table of charset P. But not the start
795 of table itself, but the before where the number of ranges is
96cc36cc
RS
796 stored. `2 +' means to skip re_opcode_t and size of bitmap,
797 and the 2 bytes of flags at the start of the range table. */
798#define CHARSET_RANGE_TABLE(p) (&(p)[4 + CHARSET_BITMAP_SIZE (p)])
799
800/* Extract the bit flags that start a range table. */
801#define CHARSET_RANGE_TABLE_BITS(p) \
802 ((p)[2 + CHARSET_BITMAP_SIZE (p)] \
803 + (p)[3 + CHARSET_BITMAP_SIZE (p)] * 0x100)
b18215fc 804
b18215fc 805/* Return the address of end of RANGE_TABLE. COUNT is number of
7814e705
JB
806 ranges (which is a pair of (start, end)) in the RANGE_TABLE. `* 2'
807 is start of range and end of range. `* 3' is size of each start
b18215fc
RS
808 and end. */
809#define CHARSET_RANGE_TABLE_END(range_table, count) \
810 ((range_table) + (count) * 2 * 3)
811
7814e705 812/* Test if C is in RANGE_TABLE. A flag NOT is negated if C is in.
b18215fc
RS
813 COUNT is number of ranges in RANGE_TABLE. */
814#define CHARSET_LOOKUP_RANGE_TABLE_RAW(not, c, range_table, count) \
815 do \
816 { \
01618498 817 re_wchar_t range_start, range_end; \
19ed5445 818 re_char *rtp; \
01618498 819 re_char *range_table_end \
b18215fc
RS
820 = CHARSET_RANGE_TABLE_END ((range_table), (count)); \
821 \
19ed5445 822 for (rtp = (range_table); rtp < range_table_end; rtp += 2 * 3) \
b18215fc 823 { \
19ed5445
PE
824 EXTRACT_CHARACTER (range_start, rtp); \
825 EXTRACT_CHARACTER (range_end, rtp + 3); \
b18215fc
RS
826 \
827 if (range_start <= (c) && (c) <= range_end) \
828 { \
829 (not) = !(not); \
830 break; \
831 } \
832 } \
833 } \
834 while (0)
835
836/* Test if C is in range table of CHARSET. The flag NOT is negated if
837 C is listed in it. */
838#define CHARSET_LOOKUP_RANGE_TABLE(not, c, charset) \
839 do \
840 { \
841 /* Number of ranges in range table. */ \
842 int count; \
01618498
SM
843 re_char *range_table = CHARSET_RANGE_TABLE (charset); \
844 \
b18215fc
RS
845 EXTRACT_NUMBER_AND_INCR (count, range_table); \
846 CHARSET_LOOKUP_RANGE_TABLE_RAW ((not), (c), range_table, count); \
847 } \
848 while (0)
849\f
fa9a63c5
RM
850/* If DEBUG is defined, Regex prints many voluminous messages about what
851 it is doing (if the variable `debug' is nonzero). If linked with the
852 main program in `iregex.c', you can enter patterns and strings
853 interactively. And if linked with the main program in `main.c' and
4bb91c68 854 the other test files, you can run the already-written tests. */
fa9a63c5
RM
855
856#ifdef DEBUG
857
858/* We use standard I/O for debugging. */
0b32bf0e 859# include <stdio.h>
fa9a63c5
RM
860
861/* It is useful to test things that ``must'' be true when debugging. */
0b32bf0e 862# include <assert.h>
fa9a63c5 863
99633e97 864static int debug = -100000;
fa9a63c5 865
0b32bf0e
SM
866# define DEBUG_STATEMENT(e) e
867# define DEBUG_PRINT1(x) if (debug > 0) printf (x)
868# define DEBUG_PRINT2(x1, x2) if (debug > 0) printf (x1, x2)
869# define DEBUG_PRINT3(x1, x2, x3) if (debug > 0) printf (x1, x2, x3)
870# define DEBUG_PRINT4(x1, x2, x3, x4) if (debug > 0) printf (x1, x2, x3, x4)
871# define DEBUG_PRINT_COMPILED_PATTERN(p, s, e) \
99633e97 872 if (debug > 0) print_partial_compiled_pattern (s, e)
0b32bf0e 873# define DEBUG_PRINT_DOUBLE_STRING(w, s1, sz1, s2, sz2) \
99633e97 874 if (debug > 0) print_double_string (w, s1, sz1, s2, sz2)
fa9a63c5
RM
875
876
877/* Print the fastmap in human-readable form. */
878
879void
880print_fastmap (fastmap)
881 char *fastmap;
882{
883 unsigned was_a_range = 0;
5e69f11e
RM
884 unsigned i = 0;
885
fa9a63c5
RM
886 while (i < (1 << BYTEWIDTH))
887 {
888 if (fastmap[i++])
889 {
890 was_a_range = 0;
25fe55af
RS
891 putchar (i - 1);
892 while (i < (1 << BYTEWIDTH) && fastmap[i])
893 {
894 was_a_range = 1;
895 i++;
896 }
fa9a63c5 897 if (was_a_range)
25fe55af
RS
898 {
899 printf ("-");
900 putchar (i - 1);
901 }
902 }
fa9a63c5 903 }
5e69f11e 904 putchar ('\n');
fa9a63c5
RM
905}
906
907
908/* Print a compiled pattern string in human-readable form, starting at
909 the START pointer into it and ending just before the pointer END. */
910
911void
912print_partial_compiled_pattern (start, end)
01618498
SM
913 re_char *start;
914 re_char *end;
fa9a63c5
RM
915{
916 int mcnt, mcnt2;
01618498
SM
917 re_char *p = start;
918 re_char *pend = end;
fa9a63c5
RM
919
920 if (start == NULL)
921 {
a1a052df 922 fprintf (stderr, "(null)\n");
fa9a63c5
RM
923 return;
924 }
5e69f11e 925
fa9a63c5
RM
926 /* Loop over pattern commands. */
927 while (p < pend)
928 {
a1a052df 929 fprintf (stderr, "%d:\t", p - start);
fa9a63c5
RM
930
931 switch ((re_opcode_t) *p++)
932 {
25fe55af 933 case no_op:
a1a052df 934 fprintf (stderr, "/no_op");
25fe55af 935 break;
fa9a63c5 936
99633e97 937 case succeed:
a1a052df 938 fprintf (stderr, "/succeed");
99633e97
SM
939 break;
940
fa9a63c5
RM
941 case exactn:
942 mcnt = *p++;
a1a052df 943 fprintf (stderr, "/exactn/%d", mcnt);
25fe55af 944 do
fa9a63c5 945 {
a1a052df 946 fprintf (stderr, "/%c", *p++);
25fe55af
RS
947 }
948 while (--mcnt);
949 break;
fa9a63c5
RM
950
951 case start_memory:
a1a052df 952 fprintf (stderr, "/start_memory/%d", *p++);
25fe55af 953 break;
fa9a63c5
RM
954
955 case stop_memory:
a1a052df 956 fprintf (stderr, "/stop_memory/%d", *p++);
25fe55af 957 break;
fa9a63c5
RM
958
959 case duplicate:
a1a052df 960 fprintf (stderr, "/duplicate/%d", *p++);
fa9a63c5
RM
961 break;
962
963 case anychar:
a1a052df 964 fprintf (stderr, "/anychar");
fa9a63c5
RM
965 break;
966
967 case charset:
25fe55af
RS
968 case charset_not:
969 {
970 register int c, last = -100;
fa9a63c5 971 register int in_range = 0;
99633e97
SM
972 int length = CHARSET_BITMAP_SIZE (p - 1);
973 int has_range_table = CHARSET_RANGE_TABLE_EXISTS_P (p - 1);
fa9a63c5 974
a1a052df 975 fprintf (stderr, "/charset [%s",
839966f3 976 (re_opcode_t) *(p - 1) == charset_not ? "^" : "");
5e69f11e 977
839966f3
KH
978 if (p + *p >= pend)
979 fprintf (stderr, " !extends past end of pattern! ");
fa9a63c5 980
25fe55af 981 for (c = 0; c < 256; c++)
96cc36cc 982 if (c / 8 < length
fa9a63c5
RM
983 && (p[1 + (c/8)] & (1 << (c % 8))))
984 {
985 /* Are we starting a range? */
986 if (last + 1 == c && ! in_range)
987 {
a1a052df 988 fprintf (stderr, "-");
fa9a63c5
RM
989 in_range = 1;
990 }
991 /* Have we broken a range? */
992 else if (last + 1 != c && in_range)
96cc36cc 993 {
a1a052df 994 fprintf (stderr, "%c", last);
fa9a63c5
RM
995 in_range = 0;
996 }
5e69f11e 997
fa9a63c5 998 if (! in_range)
a1a052df 999 fprintf (stderr, "%c", c);
fa9a63c5
RM
1000
1001 last = c;
25fe55af 1002 }
fa9a63c5
RM
1003
1004 if (in_range)
a1a052df 1005 fprintf (stderr, "%c", last);
fa9a63c5 1006
a1a052df 1007 fprintf (stderr, "]");
fa9a63c5 1008
99633e97 1009 p += 1 + length;
96cc36cc 1010
96cc36cc 1011 if (has_range_table)
99633e97
SM
1012 {
1013 int count;
a1a052df 1014 fprintf (stderr, "has-range-table");
99633e97
SM
1015
1016 /* ??? Should print the range table; for now, just skip it. */
1017 p += 2; /* skip range table bits */
1018 EXTRACT_NUMBER_AND_INCR (count, p);
1019 p = CHARSET_RANGE_TABLE_END (p, count);
1020 }
fa9a63c5
RM
1021 }
1022 break;
1023
1024 case begline:
a1a052df 1025 fprintf (stderr, "/begline");
25fe55af 1026 break;
fa9a63c5
RM
1027
1028 case endline:
a1a052df 1029 fprintf (stderr, "/endline");
25fe55af 1030 break;
fa9a63c5
RM
1031
1032 case on_failure_jump:
25fe55af 1033 extract_number_and_incr (&mcnt, &p);
a1a052df 1034 fprintf (stderr, "/on_failure_jump to %d", p + mcnt - start);
25fe55af 1035 break;
fa9a63c5
RM
1036
1037 case on_failure_keep_string_jump:
25fe55af 1038 extract_number_and_incr (&mcnt, &p);
a1a052df 1039 fprintf (stderr, "/on_failure_keep_string_jump to %d", p + mcnt - start);
25fe55af 1040 break;
fa9a63c5 1041
0683b6fa
SM
1042 case on_failure_jump_nastyloop:
1043 extract_number_and_incr (&mcnt, &p);
a1a052df 1044 fprintf (stderr, "/on_failure_jump_nastyloop to %d", p + mcnt - start);
0683b6fa
SM
1045 break;
1046
505bde11 1047 case on_failure_jump_loop:
fa9a63c5 1048 extract_number_and_incr (&mcnt, &p);
a1a052df 1049 fprintf (stderr, "/on_failure_jump_loop to %d", p + mcnt - start);
5e69f11e
RM
1050 break;
1051
505bde11 1052 case on_failure_jump_smart:
fa9a63c5 1053 extract_number_and_incr (&mcnt, &p);
a1a052df 1054 fprintf (stderr, "/on_failure_jump_smart to %d", p + mcnt - start);
5e69f11e
RM
1055 break;
1056
25fe55af 1057 case jump:
fa9a63c5 1058 extract_number_and_incr (&mcnt, &p);
a1a052df 1059 fprintf (stderr, "/jump to %d", p + mcnt - start);
fa9a63c5
RM
1060 break;
1061
25fe55af
RS
1062 case succeed_n:
1063 extract_number_and_incr (&mcnt, &p);
1064 extract_number_and_incr (&mcnt2, &p);
a1a052df 1065 fprintf (stderr, "/succeed_n to %d, %d times", p - 2 + mcnt - start, mcnt2);
25fe55af 1066 break;
5e69f11e 1067
25fe55af
RS
1068 case jump_n:
1069 extract_number_and_incr (&mcnt, &p);
1070 extract_number_and_incr (&mcnt2, &p);
a1a052df 1071 fprintf (stderr, "/jump_n to %d, %d times", p - 2 + mcnt - start, mcnt2);
25fe55af 1072 break;
5e69f11e 1073
25fe55af
RS
1074 case set_number_at:
1075 extract_number_and_incr (&mcnt, &p);
1076 extract_number_and_incr (&mcnt2, &p);
a1a052df 1077 fprintf (stderr, "/set_number_at location %d to %d", p - 2 + mcnt - start, mcnt2);
25fe55af 1078 break;
5e69f11e 1079
25fe55af 1080 case wordbound:
a1a052df 1081 fprintf (stderr, "/wordbound");
fa9a63c5
RM
1082 break;
1083
1084 case notwordbound:
a1a052df 1085 fprintf (stderr, "/notwordbound");
25fe55af 1086 break;
fa9a63c5
RM
1087
1088 case wordbeg:
a1a052df 1089 fprintf (stderr, "/wordbeg");
fa9a63c5 1090 break;
5e69f11e 1091
fa9a63c5 1092 case wordend:
a1a052df 1093 fprintf (stderr, "/wordend");
e2543b02 1094 break;
5e69f11e 1095
669fa600 1096 case symbeg:
e2543b02 1097 fprintf (stderr, "/symbeg");
669fa600
SM
1098 break;
1099
1100 case symend:
e2543b02 1101 fprintf (stderr, "/symend");
669fa600 1102 break;
5e69f11e 1103
1fb352e0 1104 case syntaxspec:
a1a052df 1105 fprintf (stderr, "/syntaxspec");
1fb352e0 1106 mcnt = *p++;
a1a052df 1107 fprintf (stderr, "/%d", mcnt);
1fb352e0
SM
1108 break;
1109
1110 case notsyntaxspec:
a1a052df 1111 fprintf (stderr, "/notsyntaxspec");
1fb352e0 1112 mcnt = *p++;
a1a052df 1113 fprintf (stderr, "/%d", mcnt);
1fb352e0
SM
1114 break;
1115
0b32bf0e 1116# ifdef emacs
fa9a63c5 1117 case before_dot:
a1a052df 1118 fprintf (stderr, "/before_dot");
25fe55af 1119 break;
fa9a63c5
RM
1120
1121 case at_dot:
a1a052df 1122 fprintf (stderr, "/at_dot");
25fe55af 1123 break;
fa9a63c5
RM
1124
1125 case after_dot:
a1a052df 1126 fprintf (stderr, "/after_dot");
25fe55af 1127 break;
fa9a63c5 1128
1fb352e0 1129 case categoryspec:
a1a052df 1130 fprintf (stderr, "/categoryspec");
fa9a63c5 1131 mcnt = *p++;
a1a052df 1132 fprintf (stderr, "/%d", mcnt);
25fe55af 1133 break;
5e69f11e 1134
1fb352e0 1135 case notcategoryspec:
a1a052df 1136 fprintf (stderr, "/notcategoryspec");
fa9a63c5 1137 mcnt = *p++;
a1a052df 1138 fprintf (stderr, "/%d", mcnt);
fa9a63c5 1139 break;
0b32bf0e 1140# endif /* emacs */
fa9a63c5 1141
fa9a63c5 1142 case begbuf:
a1a052df 1143 fprintf (stderr, "/begbuf");
25fe55af 1144 break;
fa9a63c5
RM
1145
1146 case endbuf:
a1a052df 1147 fprintf (stderr, "/endbuf");
25fe55af 1148 break;
fa9a63c5 1149
25fe55af 1150 default:
a1a052df 1151 fprintf (stderr, "?%d", *(p-1));
fa9a63c5
RM
1152 }
1153
a1a052df 1154 fprintf (stderr, "\n");
fa9a63c5
RM
1155 }
1156
a1a052df 1157 fprintf (stderr, "%d:\tend of pattern.\n", p - start);
fa9a63c5
RM
1158}
1159
1160
1161void
1162print_compiled_pattern (bufp)
1163 struct re_pattern_buffer *bufp;
1164{
01618498 1165 re_char *buffer = bufp->buffer;
fa9a63c5
RM
1166
1167 print_partial_compiled_pattern (buffer, buffer + bufp->used);
4bb91c68
SM
1168 printf ("%ld bytes used/%ld bytes allocated.\n",
1169 bufp->used, bufp->allocated);
fa9a63c5
RM
1170
1171 if (bufp->fastmap_accurate && bufp->fastmap)
1172 {
1173 printf ("fastmap: ");
1174 print_fastmap (bufp->fastmap);
1175 }
1176
1177 printf ("re_nsub: %d\t", bufp->re_nsub);
1178 printf ("regs_alloc: %d\t", bufp->regs_allocated);
1179 printf ("can_be_null: %d\t", bufp->can_be_null);
fa9a63c5
RM
1180 printf ("no_sub: %d\t", bufp->no_sub);
1181 printf ("not_bol: %d\t", bufp->not_bol);
1182 printf ("not_eol: %d\t", bufp->not_eol);
4bb91c68 1183 printf ("syntax: %lx\n", bufp->syntax);
505bde11 1184 fflush (stdout);
fa9a63c5
RM
1185 /* Perhaps we should print the translate table? */
1186}
1187
1188
1189void
1190print_double_string (where, string1, size1, string2, size2)
66f0296e
SM
1191 re_char *where;
1192 re_char *string1;
1193 re_char *string2;
d1dfb56c
EZ
1194 ssize_t size1;
1195 ssize_t size2;
fa9a63c5 1196{
d1dfb56c 1197 ssize_t this_char;
5e69f11e 1198
fa9a63c5
RM
1199 if (where == NULL)
1200 printf ("(null)");
1201 else
1202 {
1203 if (FIRST_STRING_P (where))
25fe55af
RS
1204 {
1205 for (this_char = where - string1; this_char < size1; this_char++)
1206 putchar (string1[this_char]);
fa9a63c5 1207
25fe55af
RS
1208 where = string2;
1209 }
fa9a63c5
RM
1210
1211 for (this_char = where - string2; this_char < size2; this_char++)
25fe55af 1212 putchar (string2[this_char]);
fa9a63c5
RM
1213 }
1214}
1215
1216#else /* not DEBUG */
1217
0b32bf0e
SM
1218# undef assert
1219# define assert(e)
fa9a63c5 1220
0b32bf0e
SM
1221# define DEBUG_STATEMENT(e)
1222# define DEBUG_PRINT1(x)
1223# define DEBUG_PRINT2(x1, x2)
1224# define DEBUG_PRINT3(x1, x2, x3)
1225# define DEBUG_PRINT4(x1, x2, x3, x4)
1226# define DEBUG_PRINT_COMPILED_PATTERN(p, s, e)
1227# define DEBUG_PRINT_DOUBLE_STRING(w, s1, sz1, s2, sz2)
fa9a63c5
RM
1228
1229#endif /* not DEBUG */
1230\f
4da60324
PE
1231/* Use this to suppress gcc's `...may be used before initialized' warnings. */
1232#ifdef lint
1233# define IF_LINT(Code) Code
1234#else
1235# define IF_LINT(Code) /* empty */
1236#endif
1237\f
fa9a63c5
RM
1238/* Set by `re_set_syntax' to the current regexp syntax to recognize. Can
1239 also be assigned to arbitrarily: each pattern buffer stores its own
1240 syntax, so it can be changed between regex compilations. */
1241/* This has no initializer because initialized variables in Emacs
1242 become read-only after dumping. */
1243reg_syntax_t re_syntax_options;
1244
1245
1246/* Specify the precise syntax of regexps for compilation. This provides
1247 for compatibility for various utilities which historically have
1248 different, incompatible syntaxes.
1249
1250 The argument SYNTAX is a bit mask comprised of the various bits
4bb91c68 1251 defined in regex.h. We return the old syntax. */
fa9a63c5
RM
1252
1253reg_syntax_t
971de7fb 1254re_set_syntax (reg_syntax_t syntax)
fa9a63c5
RM
1255{
1256 reg_syntax_t ret = re_syntax_options;
5e69f11e 1257
fa9a63c5
RM
1258 re_syntax_options = syntax;
1259 return ret;
1260}
c0f9ea08 1261WEAK_ALIAS (__re_set_syntax, re_set_syntax)
f9b0fd99
RS
1262
1263/* Regexp to use to replace spaces, or NULL meaning don't. */
1264static re_char *whitespace_regexp;
1265
1266void
971de7fb 1267re_set_whitespace_regexp (const char *regexp)
f9b0fd99 1268{
6470ea05 1269 whitespace_regexp = (re_char *) regexp;
f9b0fd99
RS
1270}
1271WEAK_ALIAS (__re_set_syntax, re_set_syntax)
fa9a63c5
RM
1272\f
1273/* This table gives an error message for each of the error codes listed
4bb91c68 1274 in regex.h. Obviously the order here has to be same as there.
fa9a63c5 1275 POSIX doesn't require that we do anything for REG_NOERROR,
4bb91c68 1276 but why not be nice? */
fa9a63c5
RM
1277
1278static const char *re_error_msgid[] =
5e69f11e
RM
1279 {
1280 gettext_noop ("Success"), /* REG_NOERROR */
1281 gettext_noop ("No match"), /* REG_NOMATCH */
1282 gettext_noop ("Invalid regular expression"), /* REG_BADPAT */
1283 gettext_noop ("Invalid collation character"), /* REG_ECOLLATE */
1284 gettext_noop ("Invalid character class name"), /* REG_ECTYPE */
1285 gettext_noop ("Trailing backslash"), /* REG_EESCAPE */
1286 gettext_noop ("Invalid back reference"), /* REG_ESUBREG */
1287 gettext_noop ("Unmatched [ or [^"), /* REG_EBRACK */
1288 gettext_noop ("Unmatched ( or \\("), /* REG_EPAREN */
1289 gettext_noop ("Unmatched \\{"), /* REG_EBRACE */
1290 gettext_noop ("Invalid content of \\{\\}"), /* REG_BADBR */
1291 gettext_noop ("Invalid range end"), /* REG_ERANGE */
1292 gettext_noop ("Memory exhausted"), /* REG_ESPACE */
1293 gettext_noop ("Invalid preceding regular expression"), /* REG_BADRPT */
1294 gettext_noop ("Premature end of regular expression"), /* REG_EEND */
1295 gettext_noop ("Regular expression too big"), /* REG_ESIZE */
1296 gettext_noop ("Unmatched ) or \\)"), /* REG_ERPAREN */
b3e4c897 1297 gettext_noop ("Range striding over charsets") /* REG_ERANGEX */
fa9a63c5
RM
1298 };
1299\f
4bb91c68 1300/* Avoiding alloca during matching, to placate r_alloc. */
fa9a63c5
RM
1301
1302/* Define MATCH_MAY_ALLOCATE unless we need to make sure that the
1303 searching and matching functions should not call alloca. On some
1304 systems, alloca is implemented in terms of malloc, and if we're
1305 using the relocating allocator routines, then malloc could cause a
1306 relocation, which might (if the strings being searched are in the
1307 ralloc heap) shift the data out from underneath the regexp
1308 routines.
1309
5e69f11e 1310 Here's another reason to avoid allocation: Emacs
fa9a63c5
RM
1311 processes input from X in a signal handler; processing X input may
1312 call malloc; if input arrives while a matching routine is calling
1313 malloc, then we're scrod. But Emacs can't just block input while
1314 calling matching routines; then we don't notice interrupts when
1315 they come in. So, Emacs blocks input around all regexp calls
1316 except the matching calls, which it leaves unprotected, in the
1317 faith that they will not malloc. */
1318
1319/* Normally, this is fine. */
1320#define MATCH_MAY_ALLOCATE
1321
fa9a63c5
RM
1322/* The match routines may not allocate if (1) they would do it with malloc
1323 and (2) it's not safe for them to use malloc.
1324 Note that if REL_ALLOC is defined, matching would not use malloc for the
1325 failure stack, but we would still use it for the register vectors;
4bb91c68 1326 so REL_ALLOC should not affect this. */
b588157e 1327#if defined REGEX_MALLOC && defined emacs
0b32bf0e 1328# undef MATCH_MAY_ALLOCATE
fa9a63c5
RM
1329#endif
1330
1331\f
1332/* Failure stack declarations and macros; both re_compile_fastmap and
1333 re_match_2 use a failure stack. These have to be macros because of
1334 REGEX_ALLOCATE_STACK. */
5e69f11e 1335
fa9a63c5 1336
320a2a73 1337/* Approximate number of failure points for which to initially allocate space
fa9a63c5
RM
1338 when matching. If this number is exceeded, we allocate more
1339 space, so it is not a hard limit. */
1340#ifndef INIT_FAILURE_ALLOC
0b32bf0e 1341# define INIT_FAILURE_ALLOC 20
fa9a63c5
RM
1342#endif
1343
1344/* Roughly the maximum number of failure points on the stack. Would be
320a2a73 1345 exactly that if always used TYPICAL_FAILURE_SIZE items each time we failed.
fa9a63c5 1346 This is a variable only so users of regex can assign to it; we never
ada30c0e
SM
1347 change it ourselves. We always multiply it by TYPICAL_FAILURE_SIZE
1348 before using it, so it should probably be a byte-count instead. */
c0f9ea08
SM
1349# if defined MATCH_MAY_ALLOCATE
1350/* Note that 4400 was enough to cause a crash on Alpha OSF/1,
320a2a73
KH
1351 whose default stack limit is 2mb. In order for a larger
1352 value to work reliably, you have to try to make it accord
1353 with the process stack limit. */
c0f9ea08
SM
1354size_t re_max_failures = 40000;
1355# else
1356size_t re_max_failures = 4000;
1357# endif
fa9a63c5
RM
1358
1359union fail_stack_elt
1360{
01618498 1361 re_char *pointer;
c0f9ea08
SM
1362 /* This should be the biggest `int' that's no bigger than a pointer. */
1363 long integer;
fa9a63c5
RM
1364};
1365
1366typedef union fail_stack_elt fail_stack_elt_t;
1367
1368typedef struct
1369{
1370 fail_stack_elt_t *stack;
c0f9ea08
SM
1371 size_t size;
1372 size_t avail; /* Offset of next open position. */
1373 size_t frame; /* Offset of the cur constructed frame. */
fa9a63c5
RM
1374} fail_stack_type;
1375
505bde11 1376#define FAIL_STACK_EMPTY() (fail_stack.frame == 0)
fa9a63c5
RM
1377
1378
1379/* Define macros to initialize and free the failure stack.
1380 Do `return -2' if the alloc fails. */
1381
1382#ifdef MATCH_MAY_ALLOCATE
0b32bf0e 1383# define INIT_FAIL_STACK() \
fa9a63c5 1384 do { \
38182d90 1385 fail_stack.stack = \
320a2a73
KH
1386 REGEX_ALLOCATE_STACK (INIT_FAILURE_ALLOC * TYPICAL_FAILURE_SIZE \
1387 * sizeof (fail_stack_elt_t)); \
fa9a63c5
RM
1388 \
1389 if (fail_stack.stack == NULL) \
1390 return -2; \
1391 \
1392 fail_stack.size = INIT_FAILURE_ALLOC; \
1393 fail_stack.avail = 0; \
505bde11 1394 fail_stack.frame = 0; \
fa9a63c5 1395 } while (0)
fa9a63c5 1396#else
0b32bf0e 1397# define INIT_FAIL_STACK() \
fa9a63c5
RM
1398 do { \
1399 fail_stack.avail = 0; \
505bde11 1400 fail_stack.frame = 0; \
fa9a63c5
RM
1401 } while (0)
1402
b313f9d8
PE
1403# define RETALLOC_IF(addr, n, t) \
1404 if (addr) RETALLOC((addr), (n), t); else (addr) = TALLOC ((n), t)
fa9a63c5
RM
1405#endif
1406
1407
320a2a73
KH
1408/* Double the size of FAIL_STACK, up to a limit
1409 which allows approximately `re_max_failures' items.
fa9a63c5
RM
1410
1411 Return 1 if succeeds, and 0 if either ran out of memory
5e69f11e
RM
1412 allocating space for it or it was already too large.
1413
4bb91c68 1414 REGEX_REALLOCATE_STACK requires `destination' be declared. */
fa9a63c5 1415
320a2a73
KH
1416/* Factor to increase the failure stack size by
1417 when we increase it.
1418 This used to be 2, but 2 was too wasteful
1419 because the old discarded stacks added up to as much space
1420 were as ultimate, maximum-size stack. */
1421#define FAIL_STACK_GROWTH_FACTOR 4
1422
1423#define GROW_FAIL_STACK(fail_stack) \
eead07d6
KH
1424 (((fail_stack).size * sizeof (fail_stack_elt_t) \
1425 >= re_max_failures * TYPICAL_FAILURE_SIZE) \
fa9a63c5 1426 ? 0 \
320a2a73 1427 : ((fail_stack).stack \
38182d90 1428 = REGEX_REALLOCATE_STACK ((fail_stack).stack, \
25fe55af 1429 (fail_stack).size * sizeof (fail_stack_elt_t), \
320a2a73
KH
1430 MIN (re_max_failures * TYPICAL_FAILURE_SIZE, \
1431 ((fail_stack).size * sizeof (fail_stack_elt_t) \
1432 * FAIL_STACK_GROWTH_FACTOR))), \
fa9a63c5
RM
1433 \
1434 (fail_stack).stack == NULL \
1435 ? 0 \
6453db45
KH
1436 : ((fail_stack).size \
1437 = (MIN (re_max_failures * TYPICAL_FAILURE_SIZE, \
1438 ((fail_stack).size * sizeof (fail_stack_elt_t) \
1439 * FAIL_STACK_GROWTH_FACTOR)) \
1440 / sizeof (fail_stack_elt_t)), \
25fe55af 1441 1)))
fa9a63c5
RM
1442
1443
fa9a63c5
RM
1444/* Push a pointer value onto the failure stack.
1445 Assumes the variable `fail_stack'. Probably should only
4bb91c68 1446 be called from within `PUSH_FAILURE_POINT'. */
fa9a63c5 1447#define PUSH_FAILURE_POINTER(item) \
01618498 1448 fail_stack.stack[fail_stack.avail++].pointer = (item)
fa9a63c5
RM
1449
1450/* This pushes an integer-valued item onto the failure stack.
1451 Assumes the variable `fail_stack'. Probably should only
4bb91c68 1452 be called from within `PUSH_FAILURE_POINT'. */
fa9a63c5
RM
1453#define PUSH_FAILURE_INT(item) \
1454 fail_stack.stack[fail_stack.avail++].integer = (item)
1455
b313f9d8 1456/* These POP... operations complement the PUSH... operations.
fa9a63c5
RM
1457 All assume that `fail_stack' is nonempty. */
1458#define POP_FAILURE_POINTER() fail_stack.stack[--fail_stack.avail].pointer
1459#define POP_FAILURE_INT() fail_stack.stack[--fail_stack.avail].integer
fa9a63c5 1460
505bde11
SM
1461/* Individual items aside from the registers. */
1462#define NUM_NONREG_ITEMS 3
1463
1464/* Used to examine the stack (to detect infinite loops). */
1465#define FAILURE_PAT(h) fail_stack.stack[(h) - 1].pointer
66f0296e 1466#define FAILURE_STR(h) (fail_stack.stack[(h) - 2].pointer)
505bde11
SM
1467#define NEXT_FAILURE_HANDLE(h) fail_stack.stack[(h) - 3].integer
1468#define TOP_FAILURE_HANDLE() fail_stack.frame
fa9a63c5
RM
1469
1470
505bde11
SM
1471#define ENSURE_FAIL_STACK(space) \
1472while (REMAINING_AVAIL_SLOTS <= space) { \
1473 if (!GROW_FAIL_STACK (fail_stack)) \
1474 return -2; \
1475 DEBUG_PRINT2 ("\n Doubled stack; size now: %d\n", (fail_stack).size);\
1476 DEBUG_PRINT2 (" slots available: %d\n", REMAINING_AVAIL_SLOTS);\
1477}
1478
1479/* Push register NUM onto the stack. */
1480#define PUSH_FAILURE_REG(num) \
1481do { \
1482 char *destination; \
1483 ENSURE_FAIL_STACK(3); \
1484 DEBUG_PRINT4 (" Push reg %d (spanning %p -> %p)\n", \
1485 num, regstart[num], regend[num]); \
1486 PUSH_FAILURE_POINTER (regstart[num]); \
1487 PUSH_FAILURE_POINTER (regend[num]); \
1488 PUSH_FAILURE_INT (num); \
1489} while (0)
1490
01618498
SM
1491/* Change the counter's value to VAL, but make sure that it will
1492 be reset when backtracking. */
1493#define PUSH_NUMBER(ptr,val) \
dc1e502d
SM
1494do { \
1495 char *destination; \
1496 int c; \
1497 ENSURE_FAIL_STACK(3); \
1498 EXTRACT_NUMBER (c, ptr); \
01618498 1499 DEBUG_PRINT4 (" Push number %p = %d -> %d\n", ptr, c, val); \
dc1e502d
SM
1500 PUSH_FAILURE_INT (c); \
1501 PUSH_FAILURE_POINTER (ptr); \
1502 PUSH_FAILURE_INT (-1); \
01618498 1503 STORE_NUMBER (ptr, val); \
dc1e502d
SM
1504} while (0)
1505
505bde11 1506/* Pop a saved register off the stack. */
dc1e502d 1507#define POP_FAILURE_REG_OR_COUNT() \
505bde11 1508do { \
d1dfb56c 1509 long pfreg = POP_FAILURE_INT (); \
19ed5445 1510 if (pfreg == -1) \
dc1e502d
SM
1511 { \
1512 /* It's a counter. */ \
6dcf2d0e
SM
1513 /* Here, we discard `const', making re_match non-reentrant. */ \
1514 unsigned char *ptr = (unsigned char*) POP_FAILURE_POINTER (); \
19ed5445
PE
1515 pfreg = POP_FAILURE_INT (); \
1516 STORE_NUMBER (ptr, pfreg); \
1517 DEBUG_PRINT3 (" Pop counter %p = %d\n", ptr, pfreg); \
dc1e502d
SM
1518 } \
1519 else \
1520 { \
19ed5445
PE
1521 regend[pfreg] = POP_FAILURE_POINTER (); \
1522 regstart[pfreg] = POP_FAILURE_POINTER (); \
dc1e502d 1523 DEBUG_PRINT4 (" Pop reg %d (spanning %p -> %p)\n", \
19ed5445 1524 pfreg, regstart[pfreg], regend[pfreg]); \
dc1e502d 1525 } \
505bde11
SM
1526} while (0)
1527
1528/* Check that we are not stuck in an infinite loop. */
1529#define CHECK_INFINITE_LOOP(pat_cur, string_place) \
1530do { \
d1dfb56c 1531 ssize_t failure = TOP_FAILURE_HANDLE (); \
505bde11 1532 /* Check for infinite matching loops */ \
f6df485f
RS
1533 while (failure > 0 \
1534 && (FAILURE_STR (failure) == string_place \
1535 || FAILURE_STR (failure) == NULL)) \
505bde11
SM
1536 { \
1537 assert (FAILURE_PAT (failure) >= bufp->buffer \
66f0296e 1538 && FAILURE_PAT (failure) <= bufp->buffer + bufp->used); \
505bde11 1539 if (FAILURE_PAT (failure) == pat_cur) \
f6df485f 1540 { \
6df42991
SM
1541 cycle = 1; \
1542 break; \
f6df485f 1543 } \
66f0296e 1544 DEBUG_PRINT2 (" Other pattern: %p\n", FAILURE_PAT (failure)); \
505bde11
SM
1545 failure = NEXT_FAILURE_HANDLE(failure); \
1546 } \
1547 DEBUG_PRINT2 (" Other string: %p\n", FAILURE_STR (failure)); \
1548} while (0)
6df42991 1549
fa9a63c5 1550/* Push the information about the state we will need
5e69f11e
RM
1551 if we ever fail back to it.
1552
505bde11 1553 Requires variables fail_stack, regstart, regend and
320a2a73 1554 num_regs be declared. GROW_FAIL_STACK requires `destination' be
fa9a63c5 1555 declared.
5e69f11e 1556
fa9a63c5
RM
1557 Does `return FAILURE_CODE' if runs out of memory. */
1558
505bde11
SM
1559#define PUSH_FAILURE_POINT(pattern, string_place) \
1560do { \
1561 char *destination; \
1562 /* Must be int, so when we don't save any registers, the arithmetic \
1563 of 0 + -1 isn't done as unsigned. */ \
1564 \
505bde11 1565 DEBUG_STATEMENT (nfailure_points_pushed++); \
4bb91c68 1566 DEBUG_PRINT1 ("\nPUSH_FAILURE_POINT:\n"); \
505bde11
SM
1567 DEBUG_PRINT2 (" Before push, next avail: %d\n", (fail_stack).avail); \
1568 DEBUG_PRINT2 (" size: %d\n", (fail_stack).size);\
1569 \
1570 ENSURE_FAIL_STACK (NUM_NONREG_ITEMS); \
1571 \
1572 DEBUG_PRINT1 ("\n"); \
1573 \
1574 DEBUG_PRINT2 (" Push frame index: %d\n", fail_stack.frame); \
1575 PUSH_FAILURE_INT (fail_stack.frame); \
1576 \
1577 DEBUG_PRINT2 (" Push string %p: `", string_place); \
1578 DEBUG_PRINT_DOUBLE_STRING (string_place, string1, size1, string2, size2);\
1579 DEBUG_PRINT1 ("'\n"); \
1580 PUSH_FAILURE_POINTER (string_place); \
1581 \
1582 DEBUG_PRINT2 (" Push pattern %p: ", pattern); \
1583 DEBUG_PRINT_COMPILED_PATTERN (bufp, pattern, pend); \
1584 PUSH_FAILURE_POINTER (pattern); \
1585 \
1586 /* Close the frame by moving the frame pointer past it. */ \
1587 fail_stack.frame = fail_stack.avail; \
1588} while (0)
fa9a63c5 1589
320a2a73
KH
1590/* Estimate the size of data pushed by a typical failure stack entry.
1591 An estimate is all we need, because all we use this for
1592 is to choose a limit for how big to make the failure stack. */
ada30c0e 1593/* BEWARE, the value `20' is hard-coded in emacs.c:main(). */
320a2a73 1594#define TYPICAL_FAILURE_SIZE 20
fa9a63c5 1595
fa9a63c5
RM
1596/* How many items can still be added to the stack without overflowing it. */
1597#define REMAINING_AVAIL_SLOTS ((fail_stack).size - (fail_stack).avail)
1598
1599
1600/* Pops what PUSH_FAIL_STACK pushes.
1601
1602 We restore into the parameters, all of which should be lvalues:
1603 STR -- the saved data position.
1604 PAT -- the saved pattern position.
fa9a63c5 1605 REGSTART, REGEND -- arrays of string positions.
5e69f11e 1606
fa9a63c5 1607 Also assumes the variables `fail_stack' and (if debugging), `bufp',
7814e705 1608 `pend', `string1', `size1', `string2', and `size2'. */
fa9a63c5 1609
505bde11
SM
1610#define POP_FAILURE_POINT(str, pat) \
1611do { \
fa9a63c5
RM
1612 assert (!FAIL_STACK_EMPTY ()); \
1613 \
1614 /* Remove failure points and point to how many regs pushed. */ \
1615 DEBUG_PRINT1 ("POP_FAILURE_POINT:\n"); \
1616 DEBUG_PRINT2 (" Before pop, next avail: %d\n", fail_stack.avail); \
25fe55af 1617 DEBUG_PRINT2 (" size: %d\n", fail_stack.size); \
fa9a63c5 1618 \
505bde11
SM
1619 /* Pop the saved registers. */ \
1620 while (fail_stack.frame < fail_stack.avail) \
dc1e502d 1621 POP_FAILURE_REG_OR_COUNT (); \
fa9a63c5 1622 \
01618498 1623 pat = POP_FAILURE_POINTER (); \
505bde11
SM
1624 DEBUG_PRINT2 (" Popping pattern %p: ", pat); \
1625 DEBUG_PRINT_COMPILED_PATTERN (bufp, pat, pend); \
fa9a63c5
RM
1626 \
1627 /* If the saved string location is NULL, it came from an \
1628 on_failure_keep_string_jump opcode, and we want to throw away the \
1629 saved NULL, thus retaining our current position in the string. */ \
01618498 1630 str = POP_FAILURE_POINTER (); \
505bde11 1631 DEBUG_PRINT2 (" Popping string %p: `", str); \
fa9a63c5
RM
1632 DEBUG_PRINT_DOUBLE_STRING (str, string1, size1, string2, size2); \
1633 DEBUG_PRINT1 ("'\n"); \
1634 \
505bde11
SM
1635 fail_stack.frame = POP_FAILURE_INT (); \
1636 DEBUG_PRINT2 (" Popping frame index: %d\n", fail_stack.frame); \
fa9a63c5 1637 \
505bde11
SM
1638 assert (fail_stack.avail >= 0); \
1639 assert (fail_stack.frame <= fail_stack.avail); \
fa9a63c5 1640 \
fa9a63c5 1641 DEBUG_STATEMENT (nfailure_points_popped++); \
505bde11 1642} while (0) /* POP_FAILURE_POINT */
fa9a63c5
RM
1643
1644
1645\f
fa9a63c5 1646/* Registers are set to a sentinel when they haven't yet matched. */
4bb91c68 1647#define REG_UNSET(e) ((e) == NULL)
fa9a63c5
RM
1648\f
1649/* Subroutine declarations and macros for regex_compile. */
1650
261cb4bb
PE
1651static reg_errcode_t regex_compile (re_char *pattern, size_t size,
1652 reg_syntax_t syntax,
1653 struct re_pattern_buffer *bufp);
1654static void store_op1 (re_opcode_t op, unsigned char *loc, int arg);
1655static void store_op2 (re_opcode_t op, unsigned char *loc, int arg1, int arg2);
1656static void insert_op1 (re_opcode_t op, unsigned char *loc,
1657 int arg, unsigned char *end);
1658static void insert_op2 (re_opcode_t op, unsigned char *loc,
1659 int arg1, int arg2, unsigned char *end);
1660static boolean at_begline_loc_p (re_char *pattern, re_char *p,
1661 reg_syntax_t syntax);
1662static boolean at_endline_loc_p (re_char *p, re_char *pend,
1663 reg_syntax_t syntax);
1664static re_char *skip_one_char (re_char *p);
1665static int analyse_first (re_char *p, re_char *pend,
1666 char *fastmap, const int multibyte);
fa9a63c5 1667
fa9a63c5 1668/* Fetch the next character in the uncompiled pattern, with no
4bb91c68 1669 translation. */
36595814 1670#define PATFETCH(c) \
2d1675e4
SM
1671 do { \
1672 int len; \
1673 if (p == pend) return REG_EEND; \
62a6e103 1674 c = RE_STRING_CHAR_AND_LENGTH (p, len, multibyte); \
2d1675e4 1675 p += len; \
fa9a63c5
RM
1676 } while (0)
1677
fa9a63c5
RM
1678
1679/* If `translate' is non-null, return translate[D], else just D. We
1680 cast the subscript to translate because some data is declared as
1681 `char *', to avoid warnings when a string constant is passed. But
1682 when we use a character as a subscript we must make it unsigned. */
6676cb1c 1683#ifndef TRANSLATE
0b32bf0e 1684# define TRANSLATE(d) \
66f0296e 1685 (RE_TRANSLATE_P (translate) ? RE_TRANSLATE (translate, (d)) : (d))
6676cb1c 1686#endif
fa9a63c5
RM
1687
1688
1689/* Macros for outputting the compiled pattern into `buffer'. */
1690
1691/* If the buffer isn't allocated when it comes in, use this. */
1692#define INIT_BUF_SIZE 32
1693
4bb91c68 1694/* Make sure we have at least N more bytes of space in buffer. */
fa9a63c5 1695#define GET_BUFFER_SPACE(n) \
01618498 1696 while ((size_t) (b - bufp->buffer + (n)) > bufp->allocated) \
fa9a63c5
RM
1697 EXTEND_BUFFER ()
1698
1699/* Make sure we have one more byte of buffer space and then add C to it. */
1700#define BUF_PUSH(c) \
1701 do { \
1702 GET_BUFFER_SPACE (1); \
1703 *b++ = (unsigned char) (c); \
1704 } while (0)
1705
1706
1707/* Ensure we have two more bytes of buffer space and then append C1 and C2. */
1708#define BUF_PUSH_2(c1, c2) \
1709 do { \
1710 GET_BUFFER_SPACE (2); \
1711 *b++ = (unsigned char) (c1); \
1712 *b++ = (unsigned char) (c2); \
1713 } while (0)
1714
1715
fa9a63c5 1716/* Store a jump with opcode OP at LOC to location TO. We store a
4bb91c68 1717 relative address offset by the three bytes the jump itself occupies. */
fa9a63c5
RM
1718#define STORE_JUMP(op, loc, to) \
1719 store_op1 (op, loc, (to) - (loc) - 3)
1720
1721/* Likewise, for a two-argument jump. */
1722#define STORE_JUMP2(op, loc, to, arg) \
1723 store_op2 (op, loc, (to) - (loc) - 3, arg)
1724
4bb91c68 1725/* Like `STORE_JUMP', but for inserting. Assume `b' is the buffer end. */
fa9a63c5
RM
1726#define INSERT_JUMP(op, loc, to) \
1727 insert_op1 (op, loc, (to) - (loc) - 3, b)
1728
1729/* Like `STORE_JUMP2', but for inserting. Assume `b' is the buffer end. */
1730#define INSERT_JUMP2(op, loc, to, arg) \
1731 insert_op2 (op, loc, (to) - (loc) - 3, arg, b)
1732
1733
1734/* This is not an arbitrary limit: the arguments which represent offsets
839966f3 1735 into the pattern are two bytes long. So if 2^15 bytes turns out to
fa9a63c5 1736 be too small, many things would have to change. */
839966f3
KH
1737# define MAX_BUF_SIZE (1L << 15)
1738
fa9a63c5
RM
1739/* Extend the buffer by twice its current size via realloc and
1740 reset the pointers that pointed into the old block to point to the
1741 correct places in the new one. If extending the buffer results in it
4bb91c68
SM
1742 being larger than MAX_BUF_SIZE, then flag memory exhausted. */
1743#if __BOUNDED_POINTERS__
1744# define SET_HIGH_BOUND(P) (__ptrhigh (P) = __ptrlow (P) + bufp->allocated)
381880b0
CY
1745# define MOVE_BUFFER_POINTER(P) \
1746 (__ptrlow (P) = new_buffer + (__ptrlow (P) - old_buffer), \
1747 SET_HIGH_BOUND (P), \
1748 __ptrvalue (P) = new_buffer + (__ptrvalue (P) - old_buffer))
4bb91c68
SM
1749# define ELSE_EXTEND_BUFFER_HIGH_BOUND \
1750 else \
1751 { \
1752 SET_HIGH_BOUND (b); \
1753 SET_HIGH_BOUND (begalt); \
1754 if (fixup_alt_jump) \
1755 SET_HIGH_BOUND (fixup_alt_jump); \
1756 if (laststart) \
1757 SET_HIGH_BOUND (laststart); \
1758 if (pending_exact) \
1759 SET_HIGH_BOUND (pending_exact); \
1760 }
1761#else
381880b0 1762# define MOVE_BUFFER_POINTER(P) ((P) = new_buffer + ((P) - old_buffer))
4bb91c68
SM
1763# define ELSE_EXTEND_BUFFER_HIGH_BOUND
1764#endif
fa9a63c5 1765#define EXTEND_BUFFER() \
25fe55af 1766 do { \
381880b0 1767 unsigned char *old_buffer = bufp->buffer; \
25fe55af 1768 if (bufp->allocated == MAX_BUF_SIZE) \
fa9a63c5
RM
1769 return REG_ESIZE; \
1770 bufp->allocated <<= 1; \
1771 if (bufp->allocated > MAX_BUF_SIZE) \
25fe55af 1772 bufp->allocated = MAX_BUF_SIZE; \
01618498 1773 RETALLOC (bufp->buffer, bufp->allocated, unsigned char); \
fa9a63c5
RM
1774 if (bufp->buffer == NULL) \
1775 return REG_ESPACE; \
1776 /* If the buffer moved, move all the pointers into it. */ \
1777 if (old_buffer != bufp->buffer) \
1778 { \
381880b0 1779 unsigned char *new_buffer = bufp->buffer; \
4bb91c68
SM
1780 MOVE_BUFFER_POINTER (b); \
1781 MOVE_BUFFER_POINTER (begalt); \
25fe55af 1782 if (fixup_alt_jump) \
4bb91c68 1783 MOVE_BUFFER_POINTER (fixup_alt_jump); \
25fe55af 1784 if (laststart) \
4bb91c68 1785 MOVE_BUFFER_POINTER (laststart); \
25fe55af 1786 if (pending_exact) \
4bb91c68 1787 MOVE_BUFFER_POINTER (pending_exact); \
fa9a63c5 1788 } \
4bb91c68 1789 ELSE_EXTEND_BUFFER_HIGH_BOUND \
fa9a63c5
RM
1790 } while (0)
1791
1792
1793/* Since we have one byte reserved for the register number argument to
1794 {start,stop}_memory, the maximum number of groups we can report
1795 things about is what fits in that byte. */
1796#define MAX_REGNUM 255
1797
1798/* But patterns can have more than `MAX_REGNUM' registers. We just
1799 ignore the excess. */
098d42af 1800typedef int regnum_t;
fa9a63c5
RM
1801
1802
1803/* Macros for the compile stack. */
1804
1805/* Since offsets can go either forwards or backwards, this type needs to
4bb91c68
SM
1806 be able to hold values from -(MAX_BUF_SIZE - 1) to MAX_BUF_SIZE - 1. */
1807/* int may be not enough when sizeof(int) == 2. */
1808typedef long pattern_offset_t;
fa9a63c5
RM
1809
1810typedef struct
1811{
1812 pattern_offset_t begalt_offset;
1813 pattern_offset_t fixup_alt_jump;
5e69f11e 1814 pattern_offset_t laststart_offset;
fa9a63c5
RM
1815 regnum_t regnum;
1816} compile_stack_elt_t;
1817
1818
1819typedef struct
1820{
1821 compile_stack_elt_t *stack;
d1dfb56c
EZ
1822 size_t size;
1823 size_t avail; /* Offset of next open position. */
fa9a63c5
RM
1824} compile_stack_type;
1825
1826
1827#define INIT_COMPILE_STACK_SIZE 32
1828
1829#define COMPILE_STACK_EMPTY (compile_stack.avail == 0)
1830#define COMPILE_STACK_FULL (compile_stack.avail == compile_stack.size)
1831
4bb91c68 1832/* The next available element. */
fa9a63c5
RM
1833#define COMPILE_STACK_TOP (compile_stack.stack[compile_stack.avail])
1834
1cee1e27
SM
1835/* Explicit quit checking is only used on NTemacs and whenever we
1836 use polling to process input events. */
1837#if defined emacs && (defined WINDOWSNT || defined SYNC_INPUT) && defined QUIT
77d11aec
RS
1838extern int immediate_quit;
1839# define IMMEDIATE_QUIT_CHECK \
1840 do { \
1841 if (immediate_quit) QUIT; \
1842 } while (0)
1843#else
1844# define IMMEDIATE_QUIT_CHECK ((void)0)
1845#endif
1846\f
b18215fc
RS
1847/* Structure to manage work area for range table. */
1848struct range_table_work_area
1849{
1850 int *table; /* actual work area. */
1851 int allocated; /* allocated size for work area in bytes. */
7814e705 1852 int used; /* actually used size in words. */
96cc36cc 1853 int bits; /* flag to record character classes */
b18215fc
RS
1854};
1855
77d11aec
RS
1856/* Make sure that WORK_AREA can hold more N multibyte characters.
1857 This is used only in set_image_of_range and set_image_of_range_1.
1858 It expects WORK_AREA to be a pointer.
1859 If it can't get the space, it returns from the surrounding function. */
1860
1861#define EXTEND_RANGE_TABLE(work_area, n) \
1862 do { \
8f924df7 1863 if (((work_area).used + (n)) * sizeof (int) > (work_area).allocated) \
77d11aec 1864 { \
8f924df7
KH
1865 extend_range_table_work_area (&work_area); \
1866 if ((work_area).table == 0) \
77d11aec
RS
1867 return (REG_ESPACE); \
1868 } \
b18215fc
RS
1869 } while (0)
1870
96cc36cc
RS
1871#define SET_RANGE_TABLE_WORK_AREA_BIT(work_area, bit) \
1872 (work_area).bits |= (bit)
1873
14473664
SM
1874/* Bits used to implement the multibyte-part of the various character classes
1875 such as [:alnum:] in a charset's range table. */
1876#define BIT_WORD 0x1
1877#define BIT_LOWER 0x2
1878#define BIT_PUNCT 0x4
1879#define BIT_SPACE 0x8
1880#define BIT_UPPER 0x10
1881#define BIT_MULTIBYTE 0x20
96cc36cc 1882
b18215fc
RS
1883/* Set a range (RANGE_START, RANGE_END) to WORK_AREA. */
1884#define SET_RANGE_TABLE_WORK_AREA(work_area, range_start, range_end) \
77d11aec 1885 do { \
8f924df7 1886 EXTEND_RANGE_TABLE ((work_area), 2); \
b18215fc
RS
1887 (work_area).table[(work_area).used++] = (range_start); \
1888 (work_area).table[(work_area).used++] = (range_end); \
1889 } while (0)
1890
7814e705 1891/* Free allocated memory for WORK_AREA. */
b18215fc
RS
1892#define FREE_RANGE_TABLE_WORK_AREA(work_area) \
1893 do { \
1894 if ((work_area).table) \
1895 free ((work_area).table); \
1896 } while (0)
1897
96cc36cc 1898#define CLEAR_RANGE_TABLE_WORK_USED(work_area) ((work_area).used = 0, (work_area).bits = 0)
b18215fc 1899#define RANGE_TABLE_WORK_USED(work_area) ((work_area).used)
96cc36cc 1900#define RANGE_TABLE_WORK_BITS(work_area) ((work_area).bits)
b18215fc 1901#define RANGE_TABLE_WORK_ELT(work_area, i) ((work_area).table[i])
77d11aec 1902\f
b18215fc 1903
fa9a63c5 1904/* Set the bit for character C in a list. */
01618498 1905#define SET_LIST_BIT(c) (b[((c)) / BYTEWIDTH] |= 1 << ((c) % BYTEWIDTH))
fa9a63c5
RM
1906
1907
bf216479
KH
1908#ifdef emacs
1909
cf9c99bc
KH
1910/* Store characters in the range FROM to TO in the bitmap at B (for
1911 ASCII and unibyte characters) and WORK_AREA (for multibyte
1912 characters) while translating them and paying attention to the
1913 continuity of translated characters.
8f924df7 1914
cf9c99bc
KH
1915 Implementation note: It is better to implement these fairly big
1916 macros by a function, but it's not that easy because macros called
8f924df7 1917 in this macro assume various local variables already declared. */
bf216479 1918
cf9c99bc
KH
1919/* Both FROM and TO are ASCII characters. */
1920
1921#define SETUP_ASCII_RANGE(work_area, FROM, TO) \
1922 do { \
1923 int C0, C1; \
1924 \
1925 for (C0 = (FROM); C0 <= (TO); C0++) \
1926 { \
1927 C1 = TRANSLATE (C0); \
1928 if (! ASCII_CHAR_P (C1)) \
1929 { \
1930 SET_RANGE_TABLE_WORK_AREA ((work_area), C1, C1); \
1931 if ((C1 = RE_CHAR_TO_UNIBYTE (C1)) < 0) \
1932 C1 = C0; \
1933 } \
1934 SET_LIST_BIT (C1); \
1935 } \
1936 } while (0)
1937
1938
1939/* Both FROM and TO are unibyte characters (0x80..0xFF). */
1940
1941#define SETUP_UNIBYTE_RANGE(work_area, FROM, TO) \
1942 do { \
1943 int C0, C1, C2, I; \
1944 int USED = RANGE_TABLE_WORK_USED (work_area); \
1945 \
1946 for (C0 = (FROM); C0 <= (TO); C0++) \
1947 { \
1948 C1 = RE_CHAR_TO_MULTIBYTE (C0); \
1949 if (CHAR_BYTE8_P (C1)) \
1950 SET_LIST_BIT (C0); \
1951 else \
1952 { \
1953 C2 = TRANSLATE (C1); \
1954 if (C2 == C1 \
1955 || (C1 = RE_CHAR_TO_UNIBYTE (C2)) < 0) \
1956 C1 = C0; \
1957 SET_LIST_BIT (C1); \
1958 for (I = RANGE_TABLE_WORK_USED (work_area) - 2; I >= USED; I -= 2) \
1959 { \
1960 int from = RANGE_TABLE_WORK_ELT (work_area, I); \
1961 int to = RANGE_TABLE_WORK_ELT (work_area, I + 1); \
1962 \
1963 if (C2 >= from - 1 && C2 <= to + 1) \
1964 { \
1965 if (C2 == from - 1) \
1966 RANGE_TABLE_WORK_ELT (work_area, I)--; \
1967 else if (C2 == to + 1) \
1968 RANGE_TABLE_WORK_ELT (work_area, I + 1)++; \
1969 break; \
1970 } \
1971 } \
1972 if (I < USED) \
1973 SET_RANGE_TABLE_WORK_AREA ((work_area), C2, C2); \
1974 } \
1975 } \
1976 } while (0)
1977
1978
78edd3b7 1979/* Both FROM and TO are multibyte characters. */
cf9c99bc
KH
1980
1981#define SETUP_MULTIBYTE_RANGE(work_area, FROM, TO) \
1982 do { \
1983 int C0, C1, C2, I, USED = RANGE_TABLE_WORK_USED (work_area); \
1984 \
1985 SET_RANGE_TABLE_WORK_AREA ((work_area), (FROM), (TO)); \
1986 for (C0 = (FROM); C0 <= (TO); C0++) \
1987 { \
1988 C1 = TRANSLATE (C0); \
1989 if ((C2 = RE_CHAR_TO_UNIBYTE (C1)) >= 0 \
1990 || (C1 != C0 && (C2 = RE_CHAR_TO_UNIBYTE (C0)) >= 0)) \
1991 SET_LIST_BIT (C2); \
1992 if (C1 >= (FROM) && C1 <= (TO)) \
1993 continue; \
1994 for (I = RANGE_TABLE_WORK_USED (work_area) - 2; I >= USED; I -= 2) \
1995 { \
1996 int from = RANGE_TABLE_WORK_ELT (work_area, I); \
1997 int to = RANGE_TABLE_WORK_ELT (work_area, I + 1); \
1998 \
1999 if (C1 >= from - 1 && C1 <= to + 1) \
2000 { \
2001 if (C1 == from - 1) \
2002 RANGE_TABLE_WORK_ELT (work_area, I)--; \
2003 else if (C1 == to + 1) \
2004 RANGE_TABLE_WORK_ELT (work_area, I + 1)++; \
2005 break; \
2006 } \
2007 } \
2008 if (I < USED) \
2009 SET_RANGE_TABLE_WORK_AREA ((work_area), C1, C1); \
2010 } \
bf216479
KH
2011 } while (0)
2012
2013#endif /* emacs */
2014
fa9a63c5 2015/* Get the next unsigned number in the uncompiled pattern. */
25fe55af 2016#define GET_UNSIGNED_NUMBER(num) \
c72b0edd
SM
2017 do { \
2018 if (p == pend) \
2019 FREE_STACK_RETURN (REG_EBRACE); \
2020 else \
2021 { \
2022 PATFETCH (c); \
2023 while ('0' <= c && c <= '9') \
2024 { \
2025 int prev; \
2026 if (num < 0) \
2027 num = 0; \
2028 prev = num; \
2029 num = num * 10 + c - '0'; \
2030 if (num / 10 != prev) \
2031 FREE_STACK_RETURN (REG_BADBR); \
2032 if (p == pend) \
2033 FREE_STACK_RETURN (REG_EBRACE); \
2034 PATFETCH (c); \
2035 } \
2036 } \
2037 } while (0)
77d11aec 2038\f
1fdab503 2039#if ! WIDE_CHAR_SUPPORT
01618498 2040
14473664 2041/* Map a string to the char class it names (if any). */
1fdab503 2042re_wctype_t
971de7fb 2043re_wctype (const re_char *str)
14473664 2044{
5b0534c8 2045 const char *string = (const char *) str;
14473664
SM
2046 if (STREQ (string, "alnum")) return RECC_ALNUM;
2047 else if (STREQ (string, "alpha")) return RECC_ALPHA;
2048 else if (STREQ (string, "word")) return RECC_WORD;
2049 else if (STREQ (string, "ascii")) return RECC_ASCII;
2050 else if (STREQ (string, "nonascii")) return RECC_NONASCII;
2051 else if (STREQ (string, "graph")) return RECC_GRAPH;
2052 else if (STREQ (string, "lower")) return RECC_LOWER;
2053 else if (STREQ (string, "print")) return RECC_PRINT;
2054 else if (STREQ (string, "punct")) return RECC_PUNCT;
2055 else if (STREQ (string, "space")) return RECC_SPACE;
2056 else if (STREQ (string, "upper")) return RECC_UPPER;
2057 else if (STREQ (string, "unibyte")) return RECC_UNIBYTE;
2058 else if (STREQ (string, "multibyte")) return RECC_MULTIBYTE;
2059 else if (STREQ (string, "digit")) return RECC_DIGIT;
2060 else if (STREQ (string, "xdigit")) return RECC_XDIGIT;
2061 else if (STREQ (string, "cntrl")) return RECC_CNTRL;
2062 else if (STREQ (string, "blank")) return RECC_BLANK;
2063 else return 0;
2064}
2065
e0f24100 2066/* True if CH is in the char class CC. */
1fdab503 2067boolean
971de7fb 2068re_iswctype (int ch, re_wctype_t cc)
14473664
SM
2069{
2070 switch (cc)
2071 {
f3fcc40d
AS
2072 case RECC_ALNUM: return ISALNUM (ch) != 0;
2073 case RECC_ALPHA: return ISALPHA (ch) != 0;
2074 case RECC_BLANK: return ISBLANK (ch) != 0;
2075 case RECC_CNTRL: return ISCNTRL (ch) != 0;
2076 case RECC_DIGIT: return ISDIGIT (ch) != 0;
2077 case RECC_GRAPH: return ISGRAPH (ch) != 0;
2078 case RECC_LOWER: return ISLOWER (ch) != 0;
2079 case RECC_PRINT: return ISPRINT (ch) != 0;
2080 case RECC_PUNCT: return ISPUNCT (ch) != 0;
2081 case RECC_SPACE: return ISSPACE (ch) != 0;
2082 case RECC_UPPER: return ISUPPER (ch) != 0;
2083 case RECC_XDIGIT: return ISXDIGIT (ch) != 0;
2084 case RECC_ASCII: return IS_REAL_ASCII (ch) != 0;
213bd7f2 2085 case RECC_NONASCII: return !IS_REAL_ASCII (ch);
f3fcc40d 2086 case RECC_UNIBYTE: return ISUNIBYTE (ch) != 0;
213bd7f2 2087 case RECC_MULTIBYTE: return !ISUNIBYTE (ch);
f3fcc40d 2088 case RECC_WORD: return ISWORD (ch) != 0;
0cdd06f8
SM
2089 case RECC_ERROR: return false;
2090 default:
5e617bc2 2091 abort ();
14473664
SM
2092 }
2093}
fa9a63c5 2094
14473664
SM
2095/* Return a bit-pattern to use in the range-table bits to match multibyte
2096 chars of class CC. */
2097static int
971de7fb 2098re_wctype_to_bit (re_wctype_t cc)
14473664
SM
2099{
2100 switch (cc)
2101 {
2102 case RECC_NONASCII: case RECC_PRINT: case RECC_GRAPH:
0cdd06f8
SM
2103 case RECC_MULTIBYTE: return BIT_MULTIBYTE;
2104 case RECC_ALPHA: case RECC_ALNUM: case RECC_WORD: return BIT_WORD;
2105 case RECC_LOWER: return BIT_LOWER;
2106 case RECC_UPPER: return BIT_UPPER;
2107 case RECC_PUNCT: return BIT_PUNCT;
2108 case RECC_SPACE: return BIT_SPACE;
14473664 2109 case RECC_ASCII: case RECC_DIGIT: case RECC_XDIGIT: case RECC_CNTRL:
0cdd06f8
SM
2110 case RECC_BLANK: case RECC_UNIBYTE: case RECC_ERROR: return 0;
2111 default:
5e617bc2 2112 abort ();
14473664
SM
2113 }
2114}
2115#endif
77d11aec
RS
2116\f
2117/* Filling in the work area of a range. */
2118
2119/* Actually extend the space in WORK_AREA. */
2120
2121static void
971de7fb 2122extend_range_table_work_area (struct range_table_work_area *work_area)
177c0ea7 2123{
77d11aec 2124 work_area->allocated += 16 * sizeof (int);
38182d90 2125 work_area->table = realloc (work_area->table, work_area->allocated);
77d11aec
RS
2126}
2127
8f924df7 2128#if 0
77d11aec
RS
2129#ifdef emacs
2130
2131/* Carefully find the ranges of codes that are equivalent
2132 under case conversion to the range start..end when passed through
2133 TRANSLATE. Handle the case where non-letters can come in between
2134 two upper-case letters (which happens in Latin-1).
2135 Also handle the case of groups of more than 2 case-equivalent chars.
2136
2137 The basic method is to look at consecutive characters and see
2138 if they can form a run that can be handled as one.
2139
2140 Returns -1 if successful, REG_ESPACE if ran out of space. */
2141
2142static int
1dae0f0a
AS
2143set_image_of_range_1 (struct range_table_work_area *work_area,
2144 re_wchar_t start, re_wchar_t end,
2145 RE_TRANSLATE_TYPE translate)
77d11aec
RS
2146{
2147 /* `one_case' indicates a character, or a run of characters,
2148 each of which is an isolate (no case-equivalents).
2149 This includes all ASCII non-letters.
2150
2151 `two_case' indicates a character, or a run of characters,
2152 each of which has two case-equivalent forms.
2153 This includes all ASCII letters.
2154
2155 `strange' indicates a character that has more than one
2156 case-equivalent. */
177c0ea7 2157
77d11aec
RS
2158 enum case_type {one_case, two_case, strange};
2159
2160 /* Describe the run that is in progress,
2161 which the next character can try to extend.
2162 If run_type is strange, that means there really is no run.
2163 If run_type is one_case, then run_start...run_end is the run.
2164 If run_type is two_case, then the run is run_start...run_end,
2165 and the case-equivalents end at run_eqv_end. */
2166
2167 enum case_type run_type = strange;
2168 int run_start, run_end, run_eqv_end;
2169
2170 Lisp_Object eqv_table;
2171
2172 if (!RE_TRANSLATE_P (translate))
2173 {
b7c12565 2174 EXTEND_RANGE_TABLE (work_area, 2);
77d11aec
RS
2175 work_area->table[work_area->used++] = (start);
2176 work_area->table[work_area->used++] = (end);
b7c12565 2177 return -1;
77d11aec
RS
2178 }
2179
2180 eqv_table = XCHAR_TABLE (translate)->extras[2];
99633e97 2181
77d11aec
RS
2182 for (; start <= end; start++)
2183 {
2184 enum case_type this_type;
2185 int eqv = RE_TRANSLATE (eqv_table, start);
2186 int minchar, maxchar;
2187
2188 /* Classify this character */
2189 if (eqv == start)
2190 this_type = one_case;
2191 else if (RE_TRANSLATE (eqv_table, eqv) == start)
2192 this_type = two_case;
2193 else
2194 this_type = strange;
2195
2196 if (start < eqv)
2197 minchar = start, maxchar = eqv;
2198 else
2199 minchar = eqv, maxchar = start;
2200
2201 /* Can this character extend the run in progress? */
2202 if (this_type == strange || this_type != run_type
2203 || !(minchar == run_end + 1
2204 && (run_type == two_case
2205 ? maxchar == run_eqv_end + 1 : 1)))
2206 {
2207 /* No, end the run.
2208 Record each of its equivalent ranges. */
2209 if (run_type == one_case)
2210 {
2211 EXTEND_RANGE_TABLE (work_area, 2);
2212 work_area->table[work_area->used++] = run_start;
2213 work_area->table[work_area->used++] = run_end;
2214 }
2215 else if (run_type == two_case)
2216 {
2217 EXTEND_RANGE_TABLE (work_area, 4);
2218 work_area->table[work_area->used++] = run_start;
2219 work_area->table[work_area->used++] = run_end;
2220 work_area->table[work_area->used++]
2221 = RE_TRANSLATE (eqv_table, run_start);
2222 work_area->table[work_area->used++]
2223 = RE_TRANSLATE (eqv_table, run_end);
2224 }
2225 run_type = strange;
2226 }
177c0ea7 2227
77d11aec
RS
2228 if (this_type == strange)
2229 {
2230 /* For a strange character, add each of its equivalents, one
2231 by one. Don't start a range. */
2232 do
2233 {
2234 EXTEND_RANGE_TABLE (work_area, 2);
2235 work_area->table[work_area->used++] = eqv;
2236 work_area->table[work_area->used++] = eqv;
2237 eqv = RE_TRANSLATE (eqv_table, eqv);
2238 }
2239 while (eqv != start);
2240 }
2241
2242 /* Add this char to the run, or start a new run. */
2243 else if (run_type == strange)
2244 {
2245 /* Initialize a new range. */
2246 run_type = this_type;
2247 run_start = start;
2248 run_end = start;
2249 run_eqv_end = RE_TRANSLATE (eqv_table, run_end);
2250 }
2251 else
2252 {
2253 /* Extend a running range. */
2254 run_end = minchar;
2255 run_eqv_end = RE_TRANSLATE (eqv_table, run_end);
2256 }
2257 }
2258
2259 /* If a run is still in progress at the end, finish it now
2260 by recording its equivalent ranges. */
2261 if (run_type == one_case)
2262 {
2263 EXTEND_RANGE_TABLE (work_area, 2);
2264 work_area->table[work_area->used++] = run_start;
2265 work_area->table[work_area->used++] = run_end;
2266 }
2267 else if (run_type == two_case)
2268 {
2269 EXTEND_RANGE_TABLE (work_area, 4);
2270 work_area->table[work_area->used++] = run_start;
2271 work_area->table[work_area->used++] = run_end;
2272 work_area->table[work_area->used++]
2273 = RE_TRANSLATE (eqv_table, run_start);
2274 work_area->table[work_area->used++]
2275 = RE_TRANSLATE (eqv_table, run_end);
2276 }
2277
2278 return -1;
2279}
36595814 2280
77d11aec 2281#endif /* emacs */
36595814 2282
2b34df4e 2283/* Record the image of the range start..end when passed through
36595814
SM
2284 TRANSLATE. This is not necessarily TRANSLATE(start)..TRANSLATE(end)
2285 and is not even necessarily contiguous.
b7c12565
RS
2286 Normally we approximate it with the smallest contiguous range that contains
2287 all the chars we need. However, for Latin-1 we go to extra effort
2288 to do a better job.
2289
2290 This function is not called for ASCII ranges.
77d11aec
RS
2291
2292 Returns -1 if successful, REG_ESPACE if ran out of space. */
2293
2294static int
1dae0f0a
AS
2295set_image_of_range (struct range_table_work_area *work_area,
2296 re_wchar_t start, re_wchar_t end,
2297 RE_TRANSLATE_TYPE translate)
36595814 2298{
77d11aec
RS
2299 re_wchar_t cmin, cmax;
2300
2301#ifdef emacs
2302 /* For Latin-1 ranges, use set_image_of_range_1
2303 to get proper handling of ranges that include letters and nonletters.
b7c12565 2304 For a range that includes the whole of Latin-1, this is not necessary.
77d11aec 2305 For other character sets, we don't bother to get this right. */
b7c12565
RS
2306 if (RE_TRANSLATE_P (translate) && start < 04400
2307 && !(start < 04200 && end >= 04377))
77d11aec 2308 {
b7c12565 2309 int newend;
77d11aec 2310 int tem;
b7c12565
RS
2311 newend = end;
2312 if (newend > 04377)
2313 newend = 04377;
2314 tem = set_image_of_range_1 (work_area, start, newend, translate);
77d11aec
RS
2315 if (tem > 0)
2316 return tem;
2317
2318 start = 04400;
2319 if (end < 04400)
2320 return -1;
2321 }
2322#endif
2323
b7c12565
RS
2324 EXTEND_RANGE_TABLE (work_area, 2);
2325 work_area->table[work_area->used++] = (start);
2326 work_area->table[work_area->used++] = (end);
2327
2328 cmin = -1, cmax = -1;
77d11aec 2329
36595814 2330 if (RE_TRANSLATE_P (translate))
b7c12565
RS
2331 {
2332 int ch;
77d11aec 2333
b7c12565
RS
2334 for (ch = start; ch <= end; ch++)
2335 {
2336 re_wchar_t c = TRANSLATE (ch);
2337 if (! (start <= c && c <= end))
2338 {
2339 if (cmin == -1)
2340 cmin = c, cmax = c;
2341 else
2342 {
2343 cmin = MIN (cmin, c);
2344 cmax = MAX (cmax, c);
2345 }
2346 }
2347 }
2348
2349 if (cmin != -1)
2350 {
2351 EXTEND_RANGE_TABLE (work_area, 2);
2352 work_area->table[work_area->used++] = (cmin);
2353 work_area->table[work_area->used++] = (cmax);
2354 }
2355 }
36595814 2356
77d11aec
RS
2357 return -1;
2358}
8f924df7 2359#endif /* 0 */
fa9a63c5
RM
2360\f
2361#ifndef MATCH_MAY_ALLOCATE
2362
2363/* If we cannot allocate large objects within re_match_2_internal,
2364 we make the fail stack and register vectors global.
2365 The fail stack, we grow to the maximum size when a regexp
2366 is compiled.
2367 The register vectors, we adjust in size each time we
2368 compile a regexp, according to the number of registers it needs. */
2369
2370static fail_stack_type fail_stack;
2371
2372/* Size with which the following vectors are currently allocated.
2373 That is so we can make them bigger as needed,
4bb91c68 2374 but never make them smaller. */
fa9a63c5
RM
2375static int regs_allocated_size;
2376
66f0296e
SM
2377static re_char ** regstart, ** regend;
2378static re_char **best_regstart, **best_regend;
fa9a63c5
RM
2379
2380/* Make the register vectors big enough for NUM_REGS registers,
4bb91c68 2381 but don't make them smaller. */
fa9a63c5
RM
2382
2383static
1dae0f0a 2384regex_grow_registers (int num_regs)
fa9a63c5
RM
2385{
2386 if (num_regs > regs_allocated_size)
2387 {
66f0296e
SM
2388 RETALLOC_IF (regstart, num_regs, re_char *);
2389 RETALLOC_IF (regend, num_regs, re_char *);
2390 RETALLOC_IF (best_regstart, num_regs, re_char *);
2391 RETALLOC_IF (best_regend, num_regs, re_char *);
fa9a63c5
RM
2392
2393 regs_allocated_size = num_regs;
2394 }
2395}
2396
2397#endif /* not MATCH_MAY_ALLOCATE */
2398\f
261cb4bb
PE
2399static boolean group_in_compile_stack (compile_stack_type compile_stack,
2400 regnum_t regnum);
99633e97 2401
fa9a63c5
RM
2402/* `regex_compile' compiles PATTERN (of length SIZE) according to SYNTAX.
2403 Returns one of error codes defined in `regex.h', or zero for success.
2404
2405 Assumes the `allocated' (and perhaps `buffer') and `translate'
2406 fields are set in BUFP on entry.
2407
2408 If it succeeds, results are put in BUFP (if it returns an error, the
2409 contents of BUFP are undefined):
2410 `buffer' is the compiled pattern;
2411 `syntax' is set to SYNTAX;
2412 `used' is set to the length of the compiled pattern;
2413 `fastmap_accurate' is zero;
2414 `re_nsub' is the number of subexpressions in PATTERN;
2415 `not_bol' and `not_eol' are zero;
5e69f11e 2416
c0f9ea08 2417 The `fastmap' field is neither examined nor set. */
fa9a63c5 2418
505bde11
SM
2419/* Insert the `jump' from the end of last alternative to "here".
2420 The space for the jump has already been allocated. */
2421#define FIXUP_ALT_JUMP() \
2422do { \
2423 if (fixup_alt_jump) \
2424 STORE_JUMP (jump, fixup_alt_jump, b); \
2425} while (0)
2426
2427
fa9a63c5
RM
2428/* Return, freeing storage we allocated. */
2429#define FREE_STACK_RETURN(value) \
b18215fc
RS
2430 do { \
2431 FREE_RANGE_TABLE_WORK_AREA (range_table_work); \
2432 free (compile_stack.stack); \
2433 return value; \
2434 } while (0)
fa9a63c5
RM
2435
2436static reg_errcode_t
971de7fb 2437regex_compile (const re_char *pattern, size_t size, reg_syntax_t syntax, struct re_pattern_buffer *bufp)
fa9a63c5 2438{
01618498
SM
2439 /* We fetch characters from PATTERN here. */
2440 register re_wchar_t c, c1;
5e69f11e 2441
fa9a63c5
RM
2442 /* Points to the end of the buffer, where we should append. */
2443 register unsigned char *b;
5e69f11e 2444
fa9a63c5
RM
2445 /* Keeps track of unclosed groups. */
2446 compile_stack_type compile_stack;
2447
2448 /* Points to the current (ending) position in the pattern. */
22336245
RS
2449#ifdef AIX
2450 /* `const' makes AIX compiler fail. */
66f0296e 2451 unsigned char *p = pattern;
22336245 2452#else
66f0296e 2453 re_char *p = pattern;
22336245 2454#endif
66f0296e 2455 re_char *pend = pattern + size;
5e69f11e 2456
fa9a63c5 2457 /* How to translate the characters in the pattern. */
6676cb1c 2458 RE_TRANSLATE_TYPE translate = bufp->translate;
fa9a63c5
RM
2459
2460 /* Address of the count-byte of the most recently inserted `exactn'
2461 command. This makes it possible to tell if a new exact-match
2462 character can be added to that command or if the character requires
2463 a new `exactn' command. */
2464 unsigned char *pending_exact = 0;
2465
2466 /* Address of start of the most recently finished expression.
2467 This tells, e.g., postfix * where to find the start of its
2468 operand. Reset at the beginning of groups and alternatives. */
2469 unsigned char *laststart = 0;
2470
2471 /* Address of beginning of regexp, or inside of last group. */
2472 unsigned char *begalt;
2473
2474 /* Place in the uncompiled pattern (i.e., the {) to
2475 which to go back if the interval is invalid. */
66f0296e 2476 re_char *beg_interval;
5e69f11e 2477
fa9a63c5 2478 /* Address of the place where a forward jump should go to the end of
7814e705 2479 the containing expression. Each alternative of an `or' -- except the
fa9a63c5
RM
2480 last -- ends with a forward jump of this sort. */
2481 unsigned char *fixup_alt_jump = 0;
2482
b18215fc
RS
2483 /* Work area for range table of charset. */
2484 struct range_table_work_area range_table_work;
2485
2d1675e4
SM
2486 /* If the object matched can contain multibyte characters. */
2487 const boolean multibyte = RE_MULTIBYTE_P (bufp);
2488
f9b0fd99
RS
2489 /* Nonzero if we have pushed down into a subpattern. */
2490 int in_subpattern = 0;
2491
2492 /* These hold the values of p, pattern, and pend from the main
2493 pattern when we have pushed into a subpattern. */
da053e48
PE
2494 re_char *main_p IF_LINT (= NULL);
2495 re_char *main_pattern IF_LINT (= NULL);
2496 re_char *main_pend IF_LINT (= NULL);
f9b0fd99 2497
fa9a63c5 2498#ifdef DEBUG
99633e97 2499 debug++;
fa9a63c5 2500 DEBUG_PRINT1 ("\nCompiling pattern: ");
99633e97 2501 if (debug > 0)
fa9a63c5
RM
2502 {
2503 unsigned debug_count;
5e69f11e 2504
fa9a63c5 2505 for (debug_count = 0; debug_count < size; debug_count++)
25fe55af 2506 putchar (pattern[debug_count]);
fa9a63c5
RM
2507 putchar ('\n');
2508 }
2509#endif /* DEBUG */
2510
2511 /* Initialize the compile stack. */
2512 compile_stack.stack = TALLOC (INIT_COMPILE_STACK_SIZE, compile_stack_elt_t);
2513 if (compile_stack.stack == NULL)
2514 return REG_ESPACE;
2515
2516 compile_stack.size = INIT_COMPILE_STACK_SIZE;
2517 compile_stack.avail = 0;
2518
b18215fc
RS
2519 range_table_work.table = 0;
2520 range_table_work.allocated = 0;
2521
fa9a63c5
RM
2522 /* Initialize the pattern buffer. */
2523 bufp->syntax = syntax;
2524 bufp->fastmap_accurate = 0;
2525 bufp->not_bol = bufp->not_eol = 0;
6224b623 2526 bufp->used_syntax = 0;
fa9a63c5
RM
2527
2528 /* Set `used' to zero, so that if we return an error, the pattern
2529 printer (for debugging) will think there's no pattern. We reset it
2530 at the end. */
2531 bufp->used = 0;
5e69f11e 2532
fa9a63c5 2533 /* Always count groups, whether or not bufp->no_sub is set. */
5e69f11e 2534 bufp->re_nsub = 0;
fa9a63c5 2535
0b32bf0e 2536#if !defined emacs && !defined SYNTAX_TABLE
fa9a63c5
RM
2537 /* Initialize the syntax table. */
2538 init_syntax_once ();
2539#endif
2540
2541 if (bufp->allocated == 0)
2542 {
2543 if (bufp->buffer)
2544 { /* If zero allocated, but buffer is non-null, try to realloc
25fe55af 2545 enough space. This loses if buffer's address is bogus, but
7814e705 2546 that is the user's responsibility. */
25fe55af
RS
2547 RETALLOC (bufp->buffer, INIT_BUF_SIZE, unsigned char);
2548 }
fa9a63c5 2549 else
7814e705 2550 { /* Caller did not allocate a buffer. Do it for them. */
25fe55af
RS
2551 bufp->buffer = TALLOC (INIT_BUF_SIZE, unsigned char);
2552 }
fa9a63c5
RM
2553 if (!bufp->buffer) FREE_STACK_RETURN (REG_ESPACE);
2554
2555 bufp->allocated = INIT_BUF_SIZE;
2556 }
2557
2558 begalt = b = bufp->buffer;
2559
2560 /* Loop through the uncompiled pattern until we're at the end. */
f9b0fd99 2561 while (1)
fa9a63c5 2562 {
f9b0fd99
RS
2563 if (p == pend)
2564 {
2565 /* If this is the end of an included regexp,
2566 pop back to the main regexp and try again. */
2567 if (in_subpattern)
2568 {
2569 in_subpattern = 0;
2570 pattern = main_pattern;
2571 p = main_p;
2572 pend = main_pend;
2573 continue;
2574 }
2575 /* If this is the end of the main regexp, we are done. */
2576 break;
2577 }
2578
fa9a63c5
RM
2579 PATFETCH (c);
2580
2581 switch (c)
25fe55af 2582 {
f9b0fd99
RS
2583 case ' ':
2584 {
2585 re_char *p1 = p;
2586
2587 /* If there's no special whitespace regexp, treat
4fb680cd
RS
2588 spaces normally. And don't try to do this recursively. */
2589 if (!whitespace_regexp || in_subpattern)
f9b0fd99
RS
2590 goto normal_char;
2591
2592 /* Peek past following spaces. */
2593 while (p1 != pend)
2594 {
2595 if (*p1 != ' ')
2596 break;
2597 p1++;
2598 }
2599 /* If the spaces are followed by a repetition op,
2600 treat them normally. */
c721eee5
RS
2601 if (p1 != pend
2602 && (*p1 == '*' || *p1 == '+' || *p1 == '?'
f9b0fd99
RS
2603 || (*p1 == '\\' && p1 + 1 != pend && p1[1] == '{')))
2604 goto normal_char;
2605
2606 /* Replace the spaces with the whitespace regexp. */
2607 in_subpattern = 1;
2608 main_p = p1;
2609 main_pend = pend;
2610 main_pattern = pattern;
2611 p = pattern = whitespace_regexp;
5b0534c8 2612 pend = p + strlen ((const char *) p);
f9b0fd99 2613 break;
7814e705 2614 }
f9b0fd99 2615
25fe55af
RS
2616 case '^':
2617 {
7814e705 2618 if ( /* If at start of pattern, it's an operator. */
25fe55af 2619 p == pattern + 1
7814e705 2620 /* If context independent, it's an operator. */
25fe55af 2621 || syntax & RE_CONTEXT_INDEP_ANCHORS
7814e705 2622 /* Otherwise, depends on what's come before. */
25fe55af 2623 || at_begline_loc_p (pattern, p, syntax))
c0f9ea08 2624 BUF_PUSH ((syntax & RE_NO_NEWLINE_ANCHOR) ? begbuf : begline);
25fe55af
RS
2625 else
2626 goto normal_char;
2627 }
2628 break;
2629
2630
2631 case '$':
2632 {
2633 if ( /* If at end of pattern, it's an operator. */
2634 p == pend
7814e705 2635 /* If context independent, it's an operator. */
25fe55af
RS
2636 || syntax & RE_CONTEXT_INDEP_ANCHORS
2637 /* Otherwise, depends on what's next. */
2638 || at_endline_loc_p (p, pend, syntax))
c0f9ea08 2639 BUF_PUSH ((syntax & RE_NO_NEWLINE_ANCHOR) ? endbuf : endline);
25fe55af
RS
2640 else
2641 goto normal_char;
2642 }
2643 break;
fa9a63c5
RM
2644
2645
2646 case '+':
25fe55af
RS
2647 case '?':
2648 if ((syntax & RE_BK_PLUS_QM)
2649 || (syntax & RE_LIMITED_OPS))
2650 goto normal_char;
2651 handle_plus:
2652 case '*':
2653 /* If there is no previous pattern... */
2654 if (!laststart)
2655 {
2656 if (syntax & RE_CONTEXT_INVALID_OPS)
2657 FREE_STACK_RETURN (REG_BADRPT);
2658 else if (!(syntax & RE_CONTEXT_INDEP_OPS))
2659 goto normal_char;
2660 }
2661
2662 {
7814e705 2663 /* 1 means zero (many) matches is allowed. */
66f0296e
SM
2664 boolean zero_times_ok = 0, many_times_ok = 0;
2665 boolean greedy = 1;
25fe55af
RS
2666
2667 /* If there is a sequence of repetition chars, collapse it
2668 down to just one (the right one). We can't combine
2669 interval operators with these because of, e.g., `a{2}*',
7814e705 2670 which should only match an even number of `a's. */
25fe55af
RS
2671
2672 for (;;)
2673 {
0b32bf0e 2674 if ((syntax & RE_FRUGAL)
1c8c6d39
DL
2675 && c == '?' && (zero_times_ok || many_times_ok))
2676 greedy = 0;
2677 else
2678 {
2679 zero_times_ok |= c != '+';
2680 many_times_ok |= c != '?';
2681 }
25fe55af
RS
2682
2683 if (p == pend)
2684 break;
ed0767d8
SM
2685 else if (*p == '*'
2686 || (!(syntax & RE_BK_PLUS_QM)
2687 && (*p == '+' || *p == '?')))
25fe55af 2688 ;
ed0767d8 2689 else if (syntax & RE_BK_PLUS_QM && *p == '\\')
25fe55af 2690 {
ed0767d8
SM
2691 if (p+1 == pend)
2692 FREE_STACK_RETURN (REG_EESCAPE);
2693 if (p[1] == '+' || p[1] == '?')
2694 PATFETCH (c); /* Gobble up the backslash. */
2695 else
2696 break;
25fe55af
RS
2697 }
2698 else
ed0767d8 2699 break;
25fe55af 2700 /* If we get here, we found another repeat character. */
ed0767d8
SM
2701 PATFETCH (c);
2702 }
25fe55af
RS
2703
2704 /* Star, etc. applied to an empty pattern is equivalent
2705 to an empty pattern. */
4e8a9132 2706 if (!laststart || laststart == b)
25fe55af
RS
2707 break;
2708
2709 /* Now we know whether or not zero matches is allowed
7814e705 2710 and also whether or not two or more matches is allowed. */
1c8c6d39
DL
2711 if (greedy)
2712 {
99633e97 2713 if (many_times_ok)
4e8a9132
SM
2714 {
2715 boolean simple = skip_one_char (laststart) == b;
d1dfb56c 2716 size_t startoffset = 0;
f6a3f532 2717 re_opcode_t ofj =
01618498 2718 /* Check if the loop can match the empty string. */
6df42991
SM
2719 (simple || !analyse_first (laststart, b, NULL, 0))
2720 ? on_failure_jump : on_failure_jump_loop;
4e8a9132 2721 assert (skip_one_char (laststart) <= b);
177c0ea7 2722
4e8a9132
SM
2723 if (!zero_times_ok && simple)
2724 { /* Since simple * loops can be made faster by using
2725 on_failure_keep_string_jump, we turn simple P+
2726 into PP* if P is simple. */
2727 unsigned char *p1, *p2;
2728 startoffset = b - laststart;
2729 GET_BUFFER_SPACE (startoffset);
2730 p1 = b; p2 = laststart;
2731 while (p2 < p1)
2732 *b++ = *p2++;
2733 zero_times_ok = 1;
99633e97 2734 }
4e8a9132
SM
2735
2736 GET_BUFFER_SPACE (6);
2737 if (!zero_times_ok)
2738 /* A + loop. */
f6a3f532 2739 STORE_JUMP (ofj, b, b + 6);
99633e97 2740 else
4e8a9132
SM
2741 /* Simple * loops can use on_failure_keep_string_jump
2742 depending on what follows. But since we don't know
2743 that yet, we leave the decision up to
2744 on_failure_jump_smart. */
f6a3f532 2745 INSERT_JUMP (simple ? on_failure_jump_smart : ofj,
4e8a9132 2746 laststart + startoffset, b + 6);
99633e97 2747 b += 3;
4e8a9132 2748 STORE_JUMP (jump, b, laststart + startoffset);
99633e97
SM
2749 b += 3;
2750 }
2751 else
2752 {
4e8a9132
SM
2753 /* A simple ? pattern. */
2754 assert (zero_times_ok);
2755 GET_BUFFER_SPACE (3);
2756 INSERT_JUMP (on_failure_jump, laststart, b + 3);
99633e97
SM
2757 b += 3;
2758 }
1c8c6d39
DL
2759 }
2760 else /* not greedy */
2761 { /* I wish the greedy and non-greedy cases could be merged. */
2762
0683b6fa 2763 GET_BUFFER_SPACE (7); /* We might use less. */
1c8c6d39
DL
2764 if (many_times_ok)
2765 {
f6a3f532
SM
2766 boolean emptyp = analyse_first (laststart, b, NULL, 0);
2767
6df42991
SM
2768 /* The non-greedy multiple match looks like
2769 a repeat..until: we only need a conditional jump
2770 at the end of the loop. */
f6a3f532
SM
2771 if (emptyp) BUF_PUSH (no_op);
2772 STORE_JUMP (emptyp ? on_failure_jump_nastyloop
2773 : on_failure_jump, b, laststart);
1c8c6d39
DL
2774 b += 3;
2775 if (zero_times_ok)
2776 {
2777 /* The repeat...until naturally matches one or more.
2778 To also match zero times, we need to first jump to
6df42991 2779 the end of the loop (its conditional jump). */
1c8c6d39
DL
2780 INSERT_JUMP (jump, laststart, b);
2781 b += 3;
2782 }
2783 }
2784 else
2785 {
2786 /* non-greedy a?? */
1c8c6d39
DL
2787 INSERT_JUMP (jump, laststart, b + 3);
2788 b += 3;
2789 INSERT_JUMP (on_failure_jump, laststart, laststart + 6);
2790 b += 3;
2791 }
2792 }
2793 }
4e8a9132 2794 pending_exact = 0;
fa9a63c5
RM
2795 break;
2796
2797
2798 case '.':
25fe55af
RS
2799 laststart = b;
2800 BUF_PUSH (anychar);
2801 break;
fa9a63c5
RM
2802
2803
25fe55af
RS
2804 case '[':
2805 {
19ed5445
PE
2806 re_char *p1;
2807
b18215fc 2808 CLEAR_RANGE_TABLE_WORK_USED (range_table_work);
fa9a63c5 2809
25fe55af 2810 if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
fa9a63c5 2811
25fe55af
RS
2812 /* Ensure that we have enough space to push a charset: the
2813 opcode, the length count, and the bitset; 34 bytes in all. */
fa9a63c5
RM
2814 GET_BUFFER_SPACE (34);
2815
25fe55af 2816 laststart = b;
e318085a 2817
25fe55af 2818 /* We test `*p == '^' twice, instead of using an if
7814e705 2819 statement, so we only need one BUF_PUSH. */
25fe55af
RS
2820 BUF_PUSH (*p == '^' ? charset_not : charset);
2821 if (*p == '^')
2822 p++;
e318085a 2823
25fe55af
RS
2824 /* Remember the first position in the bracket expression. */
2825 p1 = p;
e318085a 2826
7814e705 2827 /* Push the number of bytes in the bitmap. */
25fe55af 2828 BUF_PUSH ((1 << BYTEWIDTH) / BYTEWIDTH);
e318085a 2829
25fe55af 2830 /* Clear the whole map. */
72af86bd 2831 memset (b, 0, (1 << BYTEWIDTH) / BYTEWIDTH);
e318085a 2832
25fe55af
RS
2833 /* charset_not matches newline according to a syntax bit. */
2834 if ((re_opcode_t) b[-2] == charset_not
2835 && (syntax & RE_HAT_LISTS_NOT_NEWLINE))
2836 SET_LIST_BIT ('\n');
fa9a63c5 2837
7814e705 2838 /* Read in characters and ranges, setting map bits. */
25fe55af
RS
2839 for (;;)
2840 {
b18215fc 2841 boolean escaped_char = false;
2d1675e4 2842 const unsigned char *p2 = p;
abbd1bcf 2843 re_wchar_t ch;
e318085a 2844
25fe55af 2845 if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
e318085a 2846
36595814
SM
2847 /* Don't translate yet. The range TRANSLATE(X..Y) cannot
2848 always be determined from TRANSLATE(X) and TRANSLATE(Y)
2849 So the translation is done later in a loop. Example:
2850 (let ((case-fold-search t)) (string-match "[A-_]" "A")) */
25fe55af 2851 PATFETCH (c);
e318085a 2852
25fe55af
RS
2853 /* \ might escape characters inside [...] and [^...]. */
2854 if ((syntax & RE_BACKSLASH_ESCAPE_IN_LISTS) && c == '\\')
2855 {
2856 if (p == pend) FREE_STACK_RETURN (REG_EESCAPE);
e318085a
RS
2857
2858 PATFETCH (c);
b18215fc 2859 escaped_char = true;
25fe55af 2860 }
b18215fc
RS
2861 else
2862 {
7814e705 2863 /* Could be the end of the bracket expression. If it's
657fcfbd
RS
2864 not (i.e., when the bracket expression is `[]' so
2865 far), the ']' character bit gets set way below. */
2d1675e4 2866 if (c == ']' && p2 != p1)
657fcfbd 2867 break;
25fe55af 2868 }
b18215fc 2869
25fe55af
RS
2870 /* See if we're at the beginning of a possible character
2871 class. */
b18215fc 2872
2d1675e4
SM
2873 if (!escaped_char &&
2874 syntax & RE_CHAR_CLASSES && c == '[' && *p == ':')
657fcfbd 2875 {
7814e705 2876 /* Leave room for the null. */
14473664 2877 unsigned char str[CHAR_CLASS_MAX_LENGTH + 1];
ed0767d8 2878 const unsigned char *class_beg;
b18215fc 2879
25fe55af
RS
2880 PATFETCH (c);
2881 c1 = 0;
ed0767d8 2882 class_beg = p;
b18215fc 2883
25fe55af
RS
2884 /* If pattern is `[[:'. */
2885 if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
b18215fc 2886
25fe55af
RS
2887 for (;;)
2888 {
14473664
SM
2889 PATFETCH (c);
2890 if ((c == ':' && *p == ']') || p == pend)
2891 break;
2892 if (c1 < CHAR_CLASS_MAX_LENGTH)
2893 str[c1++] = c;
2894 else
2895 /* This is in any case an invalid class name. */
2896 str[0] = '\0';
25fe55af
RS
2897 }
2898 str[c1] = '\0';
b18215fc
RS
2899
2900 /* If isn't a word bracketed by `[:' and `:]':
2901 undo the ending character, the letters, and
2902 leave the leading `:' and `[' (but set bits for
2903 them). */
25fe55af
RS
2904 if (c == ':' && *p == ']')
2905 {
abbd1bcf 2906 re_wctype_t cc = re_wctype (str);
14473664
SM
2907
2908 if (cc == 0)
fa9a63c5
RM
2909 FREE_STACK_RETURN (REG_ECTYPE);
2910
14473664
SM
2911 /* Throw away the ] at the end of the character
2912 class. */
2913 PATFETCH (c);
fa9a63c5 2914
14473664 2915 if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
fa9a63c5 2916
cf9c99bc
KH
2917#ifndef emacs
2918 for (ch = 0; ch < (1 << BYTEWIDTH); ++ch)
8f924df7
KH
2919 if (re_iswctype (btowc (ch), cc))
2920 {
2921 c = TRANSLATE (ch);
ed00c2ac
KH
2922 if (c < (1 << BYTEWIDTH))
2923 SET_LIST_BIT (c);
8f924df7 2924 }
cf9c99bc
KH
2925#else /* emacs */
2926 /* Most character classes in a multibyte match
2927 just set a flag. Exceptions are is_blank,
2928 is_digit, is_cntrl, and is_xdigit, since
2929 they can only match ASCII characters. We
2930 don't need to handle them for multibyte.
2931 They are distinguished by a negative wctype. */
96cc36cc 2932
254c06a8
SM
2933 /* Setup the gl_state object to its buffer-defined
2934 value. This hardcodes the buffer-global
2935 syntax-table for ASCII chars, while the other chars
2936 will obey syntax-table properties. It's not ideal,
2937 but it's the way it's been done until now. */
d48cd3f4 2938 SETUP_BUFFER_SYNTAX_TABLE ();
254c06a8 2939
cf9c99bc 2940 for (ch = 0; ch < 256; ++ch)
25fe55af 2941 {
cf9c99bc
KH
2942 c = RE_CHAR_TO_MULTIBYTE (ch);
2943 if (! CHAR_BYTE8_P (c)
2944 && re_iswctype (c, cc))
8f924df7 2945 {
cf9c99bc
KH
2946 SET_LIST_BIT (ch);
2947 c1 = TRANSLATE (c);
2948 if (c1 == c)
2949 continue;
2950 if (ASCII_CHAR_P (c1))
2951 SET_LIST_BIT (c1);
2952 else if ((c1 = RE_CHAR_TO_UNIBYTE (c1)) >= 0)
2953 SET_LIST_BIT (c1);
8f924df7 2954 }
25fe55af 2955 }
cf9c99bc
KH
2956 SET_RANGE_TABLE_WORK_AREA_BIT
2957 (range_table_work, re_wctype_to_bit (cc));
2958#endif /* emacs */
6224b623
SM
2959 /* In most cases the matching rule for char classes
2960 only uses the syntax table for multibyte chars,
2961 so that the content of the syntax-table it is not
2962 hardcoded in the range_table. SPACE and WORD are
2963 the two exceptions. */
2964 if ((1 << cc) & ((1 << RECC_SPACE) | (1 << RECC_WORD)))
2965 bufp->used_syntax = 1;
2966
b18215fc
RS
2967 /* Repeat the loop. */
2968 continue;
25fe55af
RS
2969 }
2970 else
2971 {
ed0767d8
SM
2972 /* Go back to right after the "[:". */
2973 p = class_beg;
25fe55af 2974 SET_LIST_BIT ('[');
b18215fc
RS
2975
2976 /* Because the `:' may starts the range, we
2977 can't simply set bit and repeat the loop.
7814e705 2978 Instead, just set it to C and handle below. */
b18215fc 2979 c = ':';
25fe55af
RS
2980 }
2981 }
b18215fc
RS
2982
2983 if (p < pend && p[0] == '-' && p[1] != ']')
2984 {
2985
2986 /* Discard the `-'. */
2987 PATFETCH (c1);
2988
2989 /* Fetch the character which ends the range. */
2990 PATFETCH (c1);
cf9c99bc
KH
2991#ifdef emacs
2992 if (CHAR_BYTE8_P (c1)
2993 && ! ASCII_CHAR_P (c) && ! CHAR_BYTE8_P (c))
2994 /* Treat the range from a multibyte character to
2995 raw-byte character as empty. */
2996 c = c1 + 1;
2997#endif /* emacs */
e318085a 2998 }
25fe55af 2999 else
b18215fc
RS
3000 /* Range from C to C. */
3001 c1 = c;
3002
cf9c99bc 3003 if (c > c1)
25fe55af 3004 {
cf9c99bc
KH
3005 if (syntax & RE_NO_EMPTY_RANGES)
3006 FREE_STACK_RETURN (REG_ERANGEX);
3007 /* Else, repeat the loop. */
bf216479 3008 }
6fdd04b0 3009 else
25fe55af 3010 {
cf9c99bc
KH
3011#ifndef emacs
3012 /* Set the range into bitmap */
8f924df7 3013 for (; c <= c1; c++)
b18215fc 3014 {
cf9c99bc
KH
3015 ch = TRANSLATE (c);
3016 if (ch < (1 << BYTEWIDTH))
3017 SET_LIST_BIT (ch);
3018 }
3019#else /* emacs */
3020 if (c < 128)
3021 {
3022 ch = MIN (127, c1);
3023 SETUP_ASCII_RANGE (range_table_work, c, ch);
3024 c = ch + 1;
3025 if (CHAR_BYTE8_P (c1))
3026 c = BYTE8_TO_CHAR (128);
3027 }
3028 if (c <= c1)
3029 {
3030 if (CHAR_BYTE8_P (c))
3031 {
3032 c = CHAR_TO_BYTE8 (c);
3033 c1 = CHAR_TO_BYTE8 (c1);
3034 for (; c <= c1; c++)
3035 SET_LIST_BIT (c);
3036 }
3037 else if (multibyte)
3038 {
3039 SETUP_MULTIBYTE_RANGE (range_table_work, c, c1);
3040 }
3041 else
3042 {
3043 SETUP_UNIBYTE_RANGE (range_table_work, c, c1);
3044 }
e934739e 3045 }
cf9c99bc 3046#endif /* emacs */
25fe55af 3047 }
e318085a
RS
3048 }
3049
25fe55af 3050 /* Discard any (non)matching list bytes that are all 0 at the
7814e705 3051 end of the map. Decrease the map-length byte too. */
25fe55af
RS
3052 while ((int) b[-1] > 0 && b[b[-1] - 1] == 0)
3053 b[-1]--;
3054 b += b[-1];
fa9a63c5 3055
96cc36cc
RS
3056 /* Build real range table from work area. */
3057 if (RANGE_TABLE_WORK_USED (range_table_work)
3058 || RANGE_TABLE_WORK_BITS (range_table_work))
b18215fc
RS
3059 {
3060 int i;
3061 int used = RANGE_TABLE_WORK_USED (range_table_work);
fa9a63c5 3062
b18215fc 3063 /* Allocate space for COUNT + RANGE_TABLE. Needs two
96cc36cc
RS
3064 bytes for flags, two for COUNT, and three bytes for
3065 each character. */
3066 GET_BUFFER_SPACE (4 + used * 3);
fa9a63c5 3067
b18215fc
RS
3068 /* Indicate the existence of range table. */
3069 laststart[1] |= 0x80;
fa9a63c5 3070
96cc36cc
RS
3071 /* Store the character class flag bits into the range table.
3072 If not in emacs, these flag bits are always 0. */
3073 *b++ = RANGE_TABLE_WORK_BITS (range_table_work) & 0xff;
3074 *b++ = RANGE_TABLE_WORK_BITS (range_table_work) >> 8;
3075
b18215fc
RS
3076 STORE_NUMBER_AND_INCR (b, used / 2);
3077 for (i = 0; i < used; i++)
3078 STORE_CHARACTER_AND_INCR
3079 (b, RANGE_TABLE_WORK_ELT (range_table_work, i));
3080 }
25fe55af
RS
3081 }
3082 break;
fa9a63c5
RM
3083
3084
b18215fc 3085 case '(':
25fe55af
RS
3086 if (syntax & RE_NO_BK_PARENS)
3087 goto handle_open;
3088 else
3089 goto normal_char;
fa9a63c5
RM
3090
3091
25fe55af
RS
3092 case ')':
3093 if (syntax & RE_NO_BK_PARENS)
3094 goto handle_close;
3095 else
3096 goto normal_char;
e318085a
RS
3097
3098
25fe55af
RS
3099 case '\n':
3100 if (syntax & RE_NEWLINE_ALT)
3101 goto handle_alt;
3102 else
3103 goto normal_char;
e318085a
RS
3104
3105
b18215fc 3106 case '|':
25fe55af
RS
3107 if (syntax & RE_NO_BK_VBAR)
3108 goto handle_alt;
3109 else
3110 goto normal_char;
3111
3112
3113 case '{':
3114 if (syntax & RE_INTERVALS && syntax & RE_NO_BK_BRACES)
3115 goto handle_interval;
3116 else
3117 goto normal_char;
3118
3119
3120 case '\\':
3121 if (p == pend) FREE_STACK_RETURN (REG_EESCAPE);
3122
3123 /* Do not translate the character after the \, so that we can
3124 distinguish, e.g., \B from \b, even if we normally would
3125 translate, e.g., B to b. */
36595814 3126 PATFETCH (c);
25fe55af
RS
3127
3128 switch (c)
3129 {
3130 case '(':
3131 if (syntax & RE_NO_BK_PARENS)
3132 goto normal_backslash;
3133
3134 handle_open:
505bde11
SM
3135 {
3136 int shy = 0;
c69b0314 3137 regnum_t regnum = 0;
505bde11
SM
3138 if (p+1 < pend)
3139 {
3140 /* Look for a special (?...) construct */
ed0767d8 3141 if ((syntax & RE_SHY_GROUPS) && *p == '?')
505bde11 3142 {
ed0767d8 3143 PATFETCH (c); /* Gobble up the '?'. */
c69b0314 3144 while (!shy)
505bde11 3145 {
c69b0314
SM
3146 PATFETCH (c);
3147 switch (c)
3148 {
3149 case ':': shy = 1; break;
3150 case '0':
3151 /* An explicitly specified regnum must start
3152 with non-0. */
3153 if (regnum == 0)
3154 FREE_STACK_RETURN (REG_BADPAT);
3155 case '1': case '2': case '3': case '4':
3156 case '5': case '6': case '7': case '8': case '9':
3157 regnum = 10*regnum + (c - '0'); break;
3158 default:
3159 /* Only (?:...) is supported right now. */
3160 FREE_STACK_RETURN (REG_BADPAT);
3161 }
505bde11
SM
3162 }
3163 }
505bde11
SM
3164 }
3165
3166 if (!shy)
c69b0314
SM
3167 regnum = ++bufp->re_nsub;
3168 else if (regnum)
3169 { /* It's actually not shy, but explicitly numbered. */
3170 shy = 0;
3171 if (regnum > bufp->re_nsub)
3172 bufp->re_nsub = regnum;
3173 else if (regnum > bufp->re_nsub
3174 /* Ideally, we'd want to check that the specified
3175 group can't have matched (i.e. all subgroups
3176 using the same regnum are in other branches of
3177 OR patterns), but we don't currently keep track
3178 of enough info to do that easily. */
3179 || group_in_compile_stack (compile_stack, regnum))
3180 FREE_STACK_RETURN (REG_BADPAT);
505bde11 3181 }
c69b0314
SM
3182 else
3183 /* It's really shy. */
3184 regnum = - bufp->re_nsub;
25fe55af 3185
99633e97
SM
3186 if (COMPILE_STACK_FULL)
3187 {
3188 RETALLOC (compile_stack.stack, compile_stack.size << 1,
3189 compile_stack_elt_t);
3190 if (compile_stack.stack == NULL) return REG_ESPACE;
25fe55af 3191
99633e97
SM
3192 compile_stack.size <<= 1;
3193 }
25fe55af 3194
99633e97 3195 /* These are the values to restore when we hit end of this
7814e705 3196 group. They are all relative offsets, so that if the
99633e97
SM
3197 whole pattern moves because of realloc, they will still
3198 be valid. */
3199 COMPILE_STACK_TOP.begalt_offset = begalt - bufp->buffer;
3200 COMPILE_STACK_TOP.fixup_alt_jump
3201 = fixup_alt_jump ? fixup_alt_jump - bufp->buffer + 1 : 0;
3202 COMPILE_STACK_TOP.laststart_offset = b - bufp->buffer;
c69b0314 3203 COMPILE_STACK_TOP.regnum = regnum;
99633e97 3204
c69b0314
SM
3205 /* Do not push a start_memory for groups beyond the last one
3206 we can represent in the compiled pattern. */
3207 if (regnum <= MAX_REGNUM && regnum > 0)
99633e97
SM
3208 BUF_PUSH_2 (start_memory, regnum);
3209
3210 compile_stack.avail++;
3211
3212 fixup_alt_jump = 0;
3213 laststart = 0;
3214 begalt = b;
3215 /* If we've reached MAX_REGNUM groups, then this open
3216 won't actually generate any code, so we'll have to
3217 clear pending_exact explicitly. */
3218 pending_exact = 0;
3219 break;
505bde11 3220 }
25fe55af
RS
3221
3222 case ')':
3223 if (syntax & RE_NO_BK_PARENS) goto normal_backslash;
3224
3225 if (COMPILE_STACK_EMPTY)
505bde11
SM
3226 {
3227 if (syntax & RE_UNMATCHED_RIGHT_PAREN_ORD)
3228 goto normal_backslash;
3229 else
3230 FREE_STACK_RETURN (REG_ERPAREN);
3231 }
25fe55af
RS
3232
3233 handle_close:
505bde11 3234 FIXUP_ALT_JUMP ();
25fe55af
RS
3235
3236 /* See similar code for backslashed left paren above. */
3237 if (COMPILE_STACK_EMPTY)
505bde11
SM
3238 {
3239 if (syntax & RE_UNMATCHED_RIGHT_PAREN_ORD)
3240 goto normal_char;
3241 else
3242 FREE_STACK_RETURN (REG_ERPAREN);
3243 }
25fe55af
RS
3244
3245 /* Since we just checked for an empty stack above, this
3246 ``can't happen''. */
3247 assert (compile_stack.avail != 0);
3248 {
3249 /* We don't just want to restore into `regnum', because
3250 later groups should continue to be numbered higher,
7814e705 3251 as in `(ab)c(de)' -- the second group is #2. */
c69b0314 3252 regnum_t regnum;
25fe55af
RS
3253
3254 compile_stack.avail--;
3255 begalt = bufp->buffer + COMPILE_STACK_TOP.begalt_offset;
3256 fixup_alt_jump
3257 = COMPILE_STACK_TOP.fixup_alt_jump
3258 ? bufp->buffer + COMPILE_STACK_TOP.fixup_alt_jump - 1
3259 : 0;
3260 laststart = bufp->buffer + COMPILE_STACK_TOP.laststart_offset;
c69b0314 3261 regnum = COMPILE_STACK_TOP.regnum;
b18215fc
RS
3262 /* If we've reached MAX_REGNUM groups, then this open
3263 won't actually generate any code, so we'll have to
3264 clear pending_exact explicitly. */
3265 pending_exact = 0;
e318085a 3266
25fe55af 3267 /* We're at the end of the group, so now we know how many
7814e705 3268 groups were inside this one. */
c69b0314
SM
3269 if (regnum <= MAX_REGNUM && regnum > 0)
3270 BUF_PUSH_2 (stop_memory, regnum);
25fe55af
RS
3271 }
3272 break;
3273
3274
3275 case '|': /* `\|'. */
3276 if (syntax & RE_LIMITED_OPS || syntax & RE_NO_BK_VBAR)
3277 goto normal_backslash;
3278 handle_alt:
3279 if (syntax & RE_LIMITED_OPS)
3280 goto normal_char;
3281
3282 /* Insert before the previous alternative a jump which
7814e705 3283 jumps to this alternative if the former fails. */
25fe55af
RS
3284 GET_BUFFER_SPACE (3);
3285 INSERT_JUMP (on_failure_jump, begalt, b + 6);
3286 pending_exact = 0;
3287 b += 3;
3288
3289 /* The alternative before this one has a jump after it
3290 which gets executed if it gets matched. Adjust that
3291 jump so it will jump to this alternative's analogous
3292 jump (put in below, which in turn will jump to the next
3293 (if any) alternative's such jump, etc.). The last such
3294 jump jumps to the correct final destination. A picture:
3295 _____ _____
3296 | | | |
3297 | v | v
d1dfb56c 3298 a | b | c
25fe55af
RS
3299
3300 If we are at `b', then fixup_alt_jump right now points to a
3301 three-byte space after `a'. We'll put in the jump, set
3302 fixup_alt_jump to right after `b', and leave behind three
3303 bytes which we'll fill in when we get to after `c'. */
3304
505bde11 3305 FIXUP_ALT_JUMP ();
25fe55af
RS
3306
3307 /* Mark and leave space for a jump after this alternative,
3308 to be filled in later either by next alternative or
3309 when know we're at the end of a series of alternatives. */
3310 fixup_alt_jump = b;
3311 GET_BUFFER_SPACE (3);
3312 b += 3;
3313
3314 laststart = 0;
3315 begalt = b;
3316 break;
3317
3318
3319 case '{':
3320 /* If \{ is a literal. */
3321 if (!(syntax & RE_INTERVALS)
3322 /* If we're at `\{' and it's not the open-interval
3323 operator. */
4bb91c68 3324 || (syntax & RE_NO_BK_BRACES))
25fe55af
RS
3325 goto normal_backslash;
3326
3327 handle_interval:
3328 {
3329 /* If got here, then the syntax allows intervals. */
3330
3331 /* At least (most) this many matches must be made. */
99633e97 3332 int lower_bound = 0, upper_bound = -1;
25fe55af 3333
ed0767d8 3334 beg_interval = p;
25fe55af 3335
25fe55af
RS
3336 GET_UNSIGNED_NUMBER (lower_bound);
3337
3338 if (c == ',')
ed0767d8 3339 GET_UNSIGNED_NUMBER (upper_bound);
25fe55af
RS
3340 else
3341 /* Interval such as `{1}' => match exactly once. */
3342 upper_bound = lower_bound;
3343
3344 if (lower_bound < 0 || upper_bound > RE_DUP_MAX
ed0767d8 3345 || (upper_bound >= 0 && lower_bound > upper_bound))
4bb91c68 3346 FREE_STACK_RETURN (REG_BADBR);
25fe55af
RS
3347
3348 if (!(syntax & RE_NO_BK_BRACES))
3349 {
4bb91c68
SM
3350 if (c != '\\')
3351 FREE_STACK_RETURN (REG_BADBR);
c72b0edd
SM
3352 if (p == pend)
3353 FREE_STACK_RETURN (REG_EESCAPE);
25fe55af
RS
3354 PATFETCH (c);
3355 }
3356
3357 if (c != '}')
4bb91c68 3358 FREE_STACK_RETURN (REG_BADBR);
25fe55af
RS
3359
3360 /* We just parsed a valid interval. */
3361
3362 /* If it's invalid to have no preceding re. */
3363 if (!laststart)
3364 {
3365 if (syntax & RE_CONTEXT_INVALID_OPS)
3366 FREE_STACK_RETURN (REG_BADRPT);
3367 else if (syntax & RE_CONTEXT_INDEP_OPS)
3368 laststart = b;
3369 else
3370 goto unfetch_interval;
3371 }
3372
6df42991
SM
3373 if (upper_bound == 0)
3374 /* If the upper bound is zero, just drop the sub pattern
3375 altogether. */
3376 b = laststart;
3377 else if (lower_bound == 1 && upper_bound == 1)
3378 /* Just match it once: nothing to do here. */
3379 ;
3380
3381 /* Otherwise, we have a nontrivial interval. When
3382 we're all done, the pattern will look like:
3383 set_number_at <jump count> <upper bound>
3384 set_number_at <succeed_n count> <lower bound>
3385 succeed_n <after jump addr> <succeed_n count>
3386 <body of loop>
3387 jump_n <succeed_n addr> <jump count>
3388 (The upper bound and `jump_n' are omitted if
3389 `upper_bound' is 1, though.) */
3390 else
3391 { /* If the upper bound is > 1, we need to insert
3392 more at the end of the loop. */
3393 unsigned int nbytes = (upper_bound < 0 ? 3
3394 : upper_bound > 1 ? 5 : 0);
3395 unsigned int startoffset = 0;
3396
3397 GET_BUFFER_SPACE (20); /* We might use less. */
3398
3399 if (lower_bound == 0)
3400 {
3401 /* A succeed_n that starts with 0 is really a
3402 a simple on_failure_jump_loop. */
3403 INSERT_JUMP (on_failure_jump_loop, laststart,
3404 b + 3 + nbytes);
3405 b += 3;
3406 }
3407 else
3408 {
3409 /* Initialize lower bound of the `succeed_n', even
3410 though it will be set during matching by its
3411 attendant `set_number_at' (inserted next),
3412 because `re_compile_fastmap' needs to know.
3413 Jump to the `jump_n' we might insert below. */
3414 INSERT_JUMP2 (succeed_n, laststart,
3415 b + 5 + nbytes,
3416 lower_bound);
3417 b += 5;
3418
3419 /* Code to initialize the lower bound. Insert
7814e705 3420 before the `succeed_n'. The `5' is the last two
6df42991
SM
3421 bytes of this `set_number_at', plus 3 bytes of
3422 the following `succeed_n'. */
3423 insert_op2 (set_number_at, laststart, 5, lower_bound, b);
3424 b += 5;
3425 startoffset += 5;
3426 }
3427
3428 if (upper_bound < 0)
3429 {
3430 /* A negative upper bound stands for infinity,
3431 in which case it degenerates to a plain jump. */
3432 STORE_JUMP (jump, b, laststart + startoffset);
3433 b += 3;
3434 }
3435 else if (upper_bound > 1)
3436 { /* More than one repetition is allowed, so
3437 append a backward jump to the `succeed_n'
3438 that starts this interval.
3439
3440 When we've reached this during matching,
3441 we'll have matched the interval once, so
3442 jump back only `upper_bound - 1' times. */
3443 STORE_JUMP2 (jump_n, b, laststart + startoffset,
3444 upper_bound - 1);
3445 b += 5;
3446
3447 /* The location we want to set is the second
3448 parameter of the `jump_n'; that is `b-2' as
3449 an absolute address. `laststart' will be
3450 the `set_number_at' we're about to insert;
3451 `laststart+3' the number to set, the source
3452 for the relative address. But we are
3453 inserting into the middle of the pattern --
3454 so everything is getting moved up by 5.
3455 Conclusion: (b - 2) - (laststart + 3) + 5,
3456 i.e., b - laststart.
3457
3458 We insert this at the beginning of the loop
3459 so that if we fail during matching, we'll
3460 reinitialize the bounds. */
3461 insert_op2 (set_number_at, laststart, b - laststart,
3462 upper_bound - 1, b);
3463 b += 5;
3464 }
3465 }
25fe55af
RS
3466 pending_exact = 0;
3467 beg_interval = NULL;
3468 }
3469 break;
3470
3471 unfetch_interval:
3472 /* If an invalid interval, match the characters as literals. */
3473 assert (beg_interval);
3474 p = beg_interval;
3475 beg_interval = NULL;
3476
3477 /* normal_char and normal_backslash need `c'. */
ed0767d8 3478 c = '{';
25fe55af
RS
3479
3480 if (!(syntax & RE_NO_BK_BRACES))
3481 {
ed0767d8
SM
3482 assert (p > pattern && p[-1] == '\\');
3483 goto normal_backslash;
25fe55af 3484 }
ed0767d8
SM
3485 else
3486 goto normal_char;
e318085a 3487
b18215fc 3488#ifdef emacs
25fe55af 3489 /* There is no way to specify the before_dot and after_dot
7814e705 3490 operators. rms says this is ok. --karl */
25fe55af
RS
3491 case '=':
3492 BUF_PUSH (at_dot);
3493 break;
3494
3495 case 's':
3496 laststart = b;
3497 PATFETCH (c);
3498 BUF_PUSH_2 (syntaxspec, syntax_spec_code[c]);
3499 break;
3500
3501 case 'S':
3502 laststart = b;
3503 PATFETCH (c);
3504 BUF_PUSH_2 (notsyntaxspec, syntax_spec_code[c]);
3505 break;
b18215fc
RS
3506
3507 case 'c':
3508 laststart = b;
36595814 3509 PATFETCH (c);
b18215fc
RS
3510 BUF_PUSH_2 (categoryspec, c);
3511 break;
e318085a 3512
b18215fc
RS
3513 case 'C':
3514 laststart = b;
36595814 3515 PATFETCH (c);
b18215fc
RS
3516 BUF_PUSH_2 (notcategoryspec, c);
3517 break;
3518#endif /* emacs */
e318085a 3519
e318085a 3520
25fe55af 3521 case 'w':
4bb91c68
SM
3522 if (syntax & RE_NO_GNU_OPS)
3523 goto normal_char;
25fe55af 3524 laststart = b;
1fb352e0 3525 BUF_PUSH_2 (syntaxspec, Sword);
25fe55af 3526 break;
e318085a 3527
e318085a 3528
25fe55af 3529 case 'W':
4bb91c68
SM
3530 if (syntax & RE_NO_GNU_OPS)
3531 goto normal_char;
25fe55af 3532 laststart = b;
1fb352e0 3533 BUF_PUSH_2 (notsyntaxspec, Sword);
25fe55af 3534 break;
e318085a
RS
3535
3536
25fe55af 3537 case '<':
4bb91c68
SM
3538 if (syntax & RE_NO_GNU_OPS)
3539 goto normal_char;
25fe55af
RS
3540 BUF_PUSH (wordbeg);
3541 break;
e318085a 3542
25fe55af 3543 case '>':
4bb91c68
SM
3544 if (syntax & RE_NO_GNU_OPS)
3545 goto normal_char;
25fe55af
RS
3546 BUF_PUSH (wordend);
3547 break;
e318085a 3548
669fa600
SM
3549 case '_':
3550 if (syntax & RE_NO_GNU_OPS)
3551 goto normal_char;
3552 laststart = b;
3553 PATFETCH (c);
3554 if (c == '<')
3555 BUF_PUSH (symbeg);
3556 else if (c == '>')
3557 BUF_PUSH (symend);
3558 else
3559 FREE_STACK_RETURN (REG_BADPAT);
3560 break;
3561
25fe55af 3562 case 'b':
4bb91c68
SM
3563 if (syntax & RE_NO_GNU_OPS)
3564 goto normal_char;
25fe55af
RS
3565 BUF_PUSH (wordbound);
3566 break;
e318085a 3567
25fe55af 3568 case 'B':
4bb91c68
SM
3569 if (syntax & RE_NO_GNU_OPS)
3570 goto normal_char;
25fe55af
RS
3571 BUF_PUSH (notwordbound);
3572 break;
fa9a63c5 3573
25fe55af 3574 case '`':
4bb91c68
SM
3575 if (syntax & RE_NO_GNU_OPS)
3576 goto normal_char;
25fe55af
RS
3577 BUF_PUSH (begbuf);
3578 break;
e318085a 3579
25fe55af 3580 case '\'':
4bb91c68
SM
3581 if (syntax & RE_NO_GNU_OPS)
3582 goto normal_char;
25fe55af
RS
3583 BUF_PUSH (endbuf);
3584 break;
e318085a 3585
25fe55af
RS
3586 case '1': case '2': case '3': case '4': case '5':
3587 case '6': case '7': case '8': case '9':
0cdd06f8
SM
3588 {
3589 regnum_t reg;
e318085a 3590
0cdd06f8
SM
3591 if (syntax & RE_NO_BK_REFS)
3592 goto normal_backslash;
e318085a 3593
0cdd06f8 3594 reg = c - '0';
e318085a 3595
c69b0314
SM
3596 if (reg > bufp->re_nsub || reg < 1
3597 /* Can't back reference to a subexp before its end. */
3598 || group_in_compile_stack (compile_stack, reg))
0cdd06f8 3599 FREE_STACK_RETURN (REG_ESUBREG);
e318085a 3600
0cdd06f8
SM
3601 laststart = b;
3602 BUF_PUSH_2 (duplicate, reg);
3603 }
25fe55af 3604 break;
e318085a 3605
e318085a 3606
25fe55af
RS
3607 case '+':
3608 case '?':
3609 if (syntax & RE_BK_PLUS_QM)
3610 goto handle_plus;
3611 else
3612 goto normal_backslash;
3613
3614 default:
3615 normal_backslash:
3616 /* You might think it would be useful for \ to mean
3617 not to translate; but if we don't translate it
4bb91c68 3618 it will never match anything. */
25fe55af
RS
3619 goto normal_char;
3620 }
3621 break;
fa9a63c5
RM
3622
3623
3624 default:
25fe55af 3625 /* Expects the character in `c'. */
fa9a63c5 3626 normal_char:
36595814 3627 /* If no exactn currently being built. */
25fe55af 3628 if (!pending_exact
fa9a63c5 3629
25fe55af
RS
3630 /* If last exactn not at current position. */
3631 || pending_exact + *pending_exact + 1 != b
5e69f11e 3632
25fe55af 3633 /* We have only one byte following the exactn for the count. */
2d1675e4 3634 || *pending_exact >= (1 << BYTEWIDTH) - MAX_MULTIBYTE_LENGTH
fa9a63c5 3635
7814e705 3636 /* If followed by a repetition operator. */
9d99031f 3637 || (p != pend && (*p == '*' || *p == '^'))
fa9a63c5 3638 || ((syntax & RE_BK_PLUS_QM)
9d99031f
RS
3639 ? p + 1 < pend && *p == '\\' && (p[1] == '+' || p[1] == '?')
3640 : p != pend && (*p == '+' || *p == '?'))
fa9a63c5 3641 || ((syntax & RE_INTERVALS)
25fe55af 3642 && ((syntax & RE_NO_BK_BRACES)
9d99031f
RS
3643 ? p != pend && *p == '{'
3644 : p + 1 < pend && p[0] == '\\' && p[1] == '{')))
fa9a63c5
RM
3645 {
3646 /* Start building a new exactn. */
5e69f11e 3647
25fe55af 3648 laststart = b;
fa9a63c5
RM
3649
3650 BUF_PUSH_2 (exactn, 0);
3651 pending_exact = b - 1;
25fe55af 3652 }
5e69f11e 3653
2d1675e4
SM
3654 GET_BUFFER_SPACE (MAX_MULTIBYTE_LENGTH);
3655 {
e0277a47
KH
3656 int len;
3657
cf9c99bc 3658 if (multibyte)
6fdd04b0 3659 {
cf9c99bc 3660 c = TRANSLATE (c);
6fdd04b0
KH
3661 len = CHAR_STRING (c, b);
3662 b += len;
3663 }
e0277a47 3664 else
6fdd04b0 3665 {
cf9c99bc
KH
3666 c1 = RE_CHAR_TO_MULTIBYTE (c);
3667 if (! CHAR_BYTE8_P (c1))
3668 {
3669 re_wchar_t c2 = TRANSLATE (c1);
3670
3671 if (c1 != c2 && (c1 = RE_CHAR_TO_UNIBYTE (c2)) >= 0)
3672 c = c1;
409f2919 3673 }
6fdd04b0
KH
3674 *b++ = c;
3675 len = 1;
3676 }
2d1675e4
SM
3677 (*pending_exact) += len;
3678 }
3679
fa9a63c5 3680 break;
25fe55af 3681 } /* switch (c) */
fa9a63c5
RM
3682 } /* while p != pend */
3683
5e69f11e 3684
fa9a63c5 3685 /* Through the pattern now. */
5e69f11e 3686
505bde11 3687 FIXUP_ALT_JUMP ();
fa9a63c5 3688
5e69f11e 3689 if (!COMPILE_STACK_EMPTY)
fa9a63c5
RM
3690 FREE_STACK_RETURN (REG_EPAREN);
3691
3692 /* If we don't want backtracking, force success
3693 the first time we reach the end of the compiled pattern. */
3694 if (syntax & RE_NO_POSIX_BACKTRACKING)
3695 BUF_PUSH (succeed);
3696
fa9a63c5
RM
3697 /* We have succeeded; set the length of the buffer. */
3698 bufp->used = b - bufp->buffer;
3699
3700#ifdef DEBUG
99633e97 3701 if (debug > 0)
fa9a63c5 3702 {
505bde11 3703 re_compile_fastmap (bufp);
fa9a63c5
RM
3704 DEBUG_PRINT1 ("\nCompiled pattern: \n");
3705 print_compiled_pattern (bufp);
3706 }
99633e97 3707 debug--;
fa9a63c5
RM
3708#endif /* DEBUG */
3709
3710#ifndef MATCH_MAY_ALLOCATE
3711 /* Initialize the failure stack to the largest possible stack. This
3712 isn't necessary unless we're trying to avoid calling alloca in
3713 the search and match routines. */
3714 {
3715 int num_regs = bufp->re_nsub + 1;
3716
320a2a73 3717 if (fail_stack.size < re_max_failures * TYPICAL_FAILURE_SIZE)
fa9a63c5 3718 {
a26f4ccd 3719 fail_stack.size = re_max_failures * TYPICAL_FAILURE_SIZE;
38182d90
PE
3720 falk_stack.stack = realloc (fail_stack.stack,
3721 fail_stack.size * sizeof *falk_stack.stack);
fa9a63c5
RM
3722 }
3723
3724 regex_grow_registers (num_regs);
3725 }
3726#endif /* not MATCH_MAY_ALLOCATE */
3727
839966f3 3728 FREE_STACK_RETURN (REG_NOERROR);
fa9a63c5
RM
3729} /* regex_compile */
3730\f
3731/* Subroutines for `regex_compile'. */
3732
7814e705 3733/* Store OP at LOC followed by two-byte integer parameter ARG. */
fa9a63c5
RM
3734
3735static void
971de7fb 3736store_op1 (re_opcode_t op, unsigned char *loc, int arg)
fa9a63c5
RM
3737{
3738 *loc = (unsigned char) op;
3739 STORE_NUMBER (loc + 1, arg);
3740}
3741
3742
3743/* Like `store_op1', but for two two-byte parameters ARG1 and ARG2. */
3744
3745static void
971de7fb 3746store_op2 (re_opcode_t op, unsigned char *loc, int arg1, int arg2)
fa9a63c5
RM
3747{
3748 *loc = (unsigned char) op;
3749 STORE_NUMBER (loc + 1, arg1);
3750 STORE_NUMBER (loc + 3, arg2);
3751}
3752
3753
3754/* Copy the bytes from LOC to END to open up three bytes of space at LOC
3755 for OP followed by two-byte integer parameter ARG. */
3756
3757static void
971de7fb 3758insert_op1 (re_opcode_t op, unsigned char *loc, int arg, unsigned char *end)
fa9a63c5
RM
3759{
3760 register unsigned char *pfrom = end;
3761 register unsigned char *pto = end + 3;
3762
3763 while (pfrom != loc)
3764 *--pto = *--pfrom;
5e69f11e 3765
fa9a63c5
RM
3766 store_op1 (op, loc, arg);
3767}
3768
3769
3770/* Like `insert_op1', but for two two-byte parameters ARG1 and ARG2. */
3771
3772static void
971de7fb 3773insert_op2 (re_opcode_t op, unsigned char *loc, int arg1, int arg2, unsigned char *end)
fa9a63c5
RM
3774{
3775 register unsigned char *pfrom = end;
3776 register unsigned char *pto = end + 5;
3777
3778 while (pfrom != loc)
3779 *--pto = *--pfrom;
5e69f11e 3780
fa9a63c5
RM
3781 store_op2 (op, loc, arg1, arg2);
3782}
3783
3784
3785/* P points to just after a ^ in PATTERN. Return true if that ^ comes
3786 after an alternative or a begin-subexpression. We assume there is at
3787 least one character before the ^. */
3788
3789static boolean
971de7fb 3790at_begline_loc_p (const re_char *pattern, const re_char *p, reg_syntax_t syntax)
fa9a63c5 3791{
01618498 3792 re_char *prev = p - 2;
95988fcf 3793 boolean odd_backslashes;
5e69f11e 3794
95988fcf
AS
3795 /* After a subexpression? */
3796 if (*prev == '(')
3797 odd_backslashes = (syntax & RE_NO_BK_PARENS) == 0;
3798
3799 /* After an alternative? */
3800 else if (*prev == '|')
3801 odd_backslashes = (syntax & RE_NO_BK_VBAR) == 0;
3802
3803 /* After a shy subexpression? */
3804 else if (*prev == ':' && (syntax & RE_SHY_GROUPS))
3805 {
3806 /* Skip over optional regnum. */
3807 while (prev - 1 >= pattern && prev[-1] >= '0' && prev[-1] <= '9')
3808 --prev;
3809
3810 if (!(prev - 2 >= pattern
3811 && prev[-1] == '?' && prev[-2] == '('))
3812 return false;
3813 prev -= 2;
3814 odd_backslashes = (syntax & RE_NO_BK_PARENS) == 0;
3815 }
3816 else
3817 return false;
3818
3819 /* Count the number of preceding backslashes. */
3820 p = prev;
3821 while (prev - 1 >= pattern && prev[-1] == '\\')
3822 --prev;
3823 return (p - prev) & odd_backslashes;
fa9a63c5
RM
3824}
3825
3826
3827/* The dual of at_begline_loc_p. This one is for $. We assume there is
3828 at least one character after the $, i.e., `P < PEND'. */
3829
3830static boolean
971de7fb 3831at_endline_loc_p (const re_char *p, const re_char *pend, reg_syntax_t syntax)
fa9a63c5 3832{
01618498 3833 re_char *next = p;
fa9a63c5 3834 boolean next_backslash = *next == '\\';
01618498 3835 re_char *next_next = p + 1 < pend ? p + 1 : 0;
5e69f11e 3836
fa9a63c5
RM
3837 return
3838 /* Before a subexpression? */
3839 (syntax & RE_NO_BK_PARENS ? *next == ')'
25fe55af 3840 : next_backslash && next_next && *next_next == ')')
fa9a63c5
RM
3841 /* Before an alternative? */
3842 || (syntax & RE_NO_BK_VBAR ? *next == '|'
25fe55af 3843 : next_backslash && next_next && *next_next == '|');
fa9a63c5
RM
3844}
3845
3846
5e69f11e 3847/* Returns true if REGNUM is in one of COMPILE_STACK's elements and
fa9a63c5
RM
3848 false if it's not. */
3849
3850static boolean
971de7fb 3851group_in_compile_stack (compile_stack_type compile_stack, regnum_t regnum)
fa9a63c5 3852{
d1dfb56c 3853 ssize_t this_element;
fa9a63c5 3854
5e69f11e
RM
3855 for (this_element = compile_stack.avail - 1;
3856 this_element >= 0;
fa9a63c5
RM
3857 this_element--)
3858 if (compile_stack.stack[this_element].regnum == regnum)
3859 return true;
3860
3861 return false;
3862}
fa9a63c5 3863\f
f6a3f532
SM
3864/* analyse_first.
3865 If fastmap is non-NULL, go through the pattern and fill fastmap
3866 with all the possible leading chars. If fastmap is NULL, don't
3867 bother filling it up (obviously) and only return whether the
3868 pattern could potentially match the empty string.
3869
3870 Return 1 if p..pend might match the empty string.
3871 Return 0 if p..pend matches at least one char.
01618498 3872 Return -1 if fastmap was not updated accurately. */
f6a3f532
SM
3873
3874static int
438105ed 3875analyse_first (const re_char *p, const re_char *pend, char *fastmap, const int multibyte)
fa9a63c5 3876{
505bde11 3877 int j, k;
1fb352e0 3878 boolean not;
fa9a63c5 3879
b18215fc 3880 /* If all elements for base leading-codes in fastmap is set, this
7814e705 3881 flag is set true. */
b18215fc
RS
3882 boolean match_any_multibyte_characters = false;
3883
f6a3f532 3884 assert (p);
5e69f11e 3885
505bde11
SM
3886 /* The loop below works as follows:
3887 - It has a working-list kept in the PATTERN_STACK and which basically
3888 starts by only containing a pointer to the first operation.
3889 - If the opcode we're looking at is a match against some set of
3890 chars, then we add those chars to the fastmap and go on to the
3891 next work element from the worklist (done via `break').
3892 - If the opcode is a control operator on the other hand, we either
3893 ignore it (if it's meaningless at this point, such as `start_memory')
3894 or execute it (if it's a jump). If the jump has several destinations
3895 (i.e. `on_failure_jump'), then we push the other destination onto the
3896 worklist.
3897 We guarantee termination by ignoring backward jumps (more or less),
3898 so that `p' is monotonically increasing. More to the point, we
3899 never set `p' (or push) anything `<= p1'. */
3900
01618498 3901 while (p < pend)
fa9a63c5 3902 {
505bde11
SM
3903 /* `p1' is used as a marker of how far back a `on_failure_jump'
3904 can go without being ignored. It is normally equal to `p'
3905 (which prevents any backward `on_failure_jump') except right
3906 after a plain `jump', to allow patterns such as:
3907 0: jump 10
3908 3..9: <body>
3909 10: on_failure_jump 3
3910 as used for the *? operator. */
01618498 3911 re_char *p1 = p;
5e69f11e 3912
7393bcbb 3913 switch (*p++)
fa9a63c5 3914 {
f6a3f532 3915 case succeed:
01618498 3916 return 1;
fa9a63c5 3917
fa9a63c5 3918 case duplicate:
505bde11
SM
3919 /* If the first character has to match a backreference, that means
3920 that the group was empty (since it already matched). Since this
3921 is the only case that interests us here, we can assume that the
3922 backreference must match the empty string. */
3923 p++;
3924 continue;
fa9a63c5
RM
3925
3926
3927 /* Following are the cases which match a character. These end
7814e705 3928 with `break'. */
fa9a63c5
RM
3929
3930 case exactn:
e0277a47 3931 if (fastmap)
cf9c99bc
KH
3932 {
3933 /* If multibyte is nonzero, the first byte of each
3934 character is an ASCII or a leading code. Otherwise,
3935 each byte is a character. Thus, this works in both
3936 cases. */
3937 fastmap[p[1]] = 1;
3938 if (! multibyte)
3939 {
3940 /* For the case of matching this unibyte regex
3941 against multibyte, we must set a leading code of
3942 the corresponding multibyte character. */
3943 int c = RE_CHAR_TO_MULTIBYTE (p[1]);
3944
86e893e3 3945 fastmap[CHAR_LEADING_CODE (c)] = 1;
cf9c99bc
KH
3946 }
3947 }
fa9a63c5
RM
3948 break;
3949
3950
1fb352e0
SM
3951 case anychar:
3952 /* We could put all the chars except for \n (and maybe \0)
3953 but we don't bother since it is generally not worth it. */
f6a3f532 3954 if (!fastmap) break;
01618498 3955 return -1;
fa9a63c5
RM
3956
3957
b18215fc 3958 case charset_not:
1fb352e0 3959 if (!fastmap) break;
bf216479
KH
3960 {
3961 /* Chars beyond end of bitmap are possible matches. */
bf216479 3962 for (j = CHARSET_BITMAP_SIZE (&p[-1]) * BYTEWIDTH;
cf9c99bc 3963 j < (1 << BYTEWIDTH); j++)
bf216479
KH
3964 fastmap[j] = 1;
3965 }
3966
1fb352e0
SM
3967 /* Fallthrough */
3968 case charset:
3969 if (!fastmap) break;
3970 not = (re_opcode_t) *(p - 1) == charset_not;
b18215fc
RS
3971 for (j = CHARSET_BITMAP_SIZE (&p[-1]) * BYTEWIDTH - 1, p++;
3972 j >= 0; j--)
1fb352e0 3973 if (!!(p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH))) ^ not)
49da453b 3974 fastmap[j] = 1;
b18215fc 3975
6482db2e
KH
3976#ifdef emacs
3977 if (/* Any leading code can possibly start a character
1fb352e0 3978 which doesn't match the specified set of characters. */
6482db2e 3979 not
409f2919 3980 ||
6482db2e
KH
3981 /* If we can match a character class, we can match any
3982 multibyte characters. */
3983 (CHARSET_RANGE_TABLE_EXISTS_P (&p[-2])
3984 && CHARSET_RANGE_TABLE_BITS (&p[-2]) != 0))
3985
b18215fc 3986 {
b18215fc
RS
3987 if (match_any_multibyte_characters == false)
3988 {
6482db2e
KH
3989 for (j = MIN_MULTIBYTE_LEADING_CODE;
3990 j <= MAX_MULTIBYTE_LEADING_CODE; j++)
6fdd04b0 3991 fastmap[j] = 1;
b18215fc
RS
3992 match_any_multibyte_characters = true;
3993 }
3994 }
b18215fc 3995
1fb352e0
SM
3996 else if (!not && CHARSET_RANGE_TABLE_EXISTS_P (&p[-2])
3997 && match_any_multibyte_characters == false)
3998 {
bf216479 3999 /* Set fastmap[I] to 1 where I is a leading code of each
51e4f4a8 4000 multibyte character in the range table. */
1fb352e0 4001 int c, count;
bf216479 4002 unsigned char lc1, lc2;
b18215fc 4003
1fb352e0 4004 /* Make P points the range table. `+ 2' is to skip flag
0b32bf0e 4005 bits for a character class. */
1fb352e0 4006 p += CHARSET_BITMAP_SIZE (&p[-2]) + 2;
b18215fc 4007
1fb352e0
SM
4008 /* Extract the number of ranges in range table into COUNT. */
4009 EXTRACT_NUMBER_AND_INCR (count, p);
cf9c99bc 4010 for (; count > 0; count--, p += 3)
1fb352e0 4011 {
9117d724
KH
4012 /* Extract the start and end of each range. */
4013 EXTRACT_CHARACTER (c, p);
bf216479 4014 lc1 = CHAR_LEADING_CODE (c);
9117d724 4015 p += 3;
1fb352e0 4016 EXTRACT_CHARACTER (c, p);
bf216479
KH
4017 lc2 = CHAR_LEADING_CODE (c);
4018 for (j = lc1; j <= lc2; j++)
9117d724 4019 fastmap[j] = 1;
1fb352e0
SM
4020 }
4021 }
6482db2e 4022#endif
b18215fc
RS
4023 break;
4024
1fb352e0
SM
4025 case syntaxspec:
4026 case notsyntaxspec:
4027 if (!fastmap) break;
4028#ifndef emacs
4029 not = (re_opcode_t)p[-1] == notsyntaxspec;
4030 k = *p++;
4031 for (j = 0; j < (1 << BYTEWIDTH); j++)
990b2375 4032 if ((SYNTAX (j) == (enum syntaxcode) k) ^ not)
b18215fc 4033 fastmap[j] = 1;
b18215fc 4034 break;
1fb352e0 4035#else /* emacs */
b18215fc
RS
4036 /* This match depends on text properties. These end with
4037 aborting optimizations. */
01618498 4038 return -1;
b18215fc
RS
4039
4040 case categoryspec:
b18215fc 4041 case notcategoryspec:
1fb352e0
SM
4042 if (!fastmap) break;
4043 not = (re_opcode_t)p[-1] == notcategoryspec;
b18215fc 4044 k = *p++;
6482db2e 4045 for (j = (1 << BYTEWIDTH); j >= 0; j--)
1fb352e0 4046 if ((CHAR_HAS_CATEGORY (j, k)) ^ not)
b18215fc
RS
4047 fastmap[j] = 1;
4048
6482db2e
KH
4049 /* Any leading code can possibly start a character which
4050 has or doesn't has the specified category. */
4051 if (match_any_multibyte_characters == false)
6fdd04b0 4052 {
6482db2e
KH
4053 for (j = MIN_MULTIBYTE_LEADING_CODE;
4054 j <= MAX_MULTIBYTE_LEADING_CODE; j++)
4055 fastmap[j] = 1;
4056 match_any_multibyte_characters = true;
6fdd04b0 4057 }
b18215fc
RS
4058 break;
4059
fa9a63c5 4060 /* All cases after this match the empty string. These end with
25fe55af 4061 `continue'. */
fa9a63c5 4062
fa9a63c5
RM
4063 case before_dot:
4064 case at_dot:
4065 case after_dot:
1fb352e0 4066#endif /* !emacs */
25fe55af
RS
4067 case no_op:
4068 case begline:
4069 case endline:
fa9a63c5
RM
4070 case begbuf:
4071 case endbuf:
4072 case wordbound:
4073 case notwordbound:
4074 case wordbeg:
4075 case wordend:
669fa600
SM
4076 case symbeg:
4077 case symend:
25fe55af 4078 continue;
fa9a63c5
RM
4079
4080
fa9a63c5 4081 case jump:
25fe55af 4082 EXTRACT_NUMBER_AND_INCR (j, p);
505bde11
SM
4083 if (j < 0)
4084 /* Backward jumps can only go back to code that we've already
4085 visited. `re_compile' should make sure this is true. */
4086 break;
25fe55af 4087 p += j;
7393bcbb 4088 switch (*p)
505bde11
SM
4089 {
4090 case on_failure_jump:
4091 case on_failure_keep_string_jump:
505bde11 4092 case on_failure_jump_loop:
0683b6fa 4093 case on_failure_jump_nastyloop:
505bde11
SM
4094 case on_failure_jump_smart:
4095 p++;
4096 break;
4097 default:
4098 continue;
4099 };
4100 /* Keep `p1' to allow the `on_failure_jump' we are jumping to
4101 to jump back to "just after here". */
4102 /* Fallthrough */
fa9a63c5 4103
25fe55af
RS
4104 case on_failure_jump:
4105 case on_failure_keep_string_jump:
0683b6fa 4106 case on_failure_jump_nastyloop:
505bde11
SM
4107 case on_failure_jump_loop:
4108 case on_failure_jump_smart:
25fe55af 4109 EXTRACT_NUMBER_AND_INCR (j, p);
505bde11 4110 if (p + j <= p1)
ed0767d8 4111 ; /* Backward jump to be ignored. */
01618498
SM
4112 else
4113 { /* We have to look down both arms.
4114 We first go down the "straight" path so as to minimize
4115 stack usage when going through alternatives. */
4116 int r = analyse_first (p, pend, fastmap, multibyte);
4117 if (r) return r;
4118 p += j;
4119 }
25fe55af 4120 continue;
fa9a63c5
RM
4121
4122
ed0767d8
SM
4123 case jump_n:
4124 /* This code simply does not properly handle forward jump_n. */
4125 DEBUG_STATEMENT (EXTRACT_NUMBER (j, p); assert (j < 0));
4126 p += 4;
4127 /* jump_n can either jump or fall through. The (backward) jump
4128 case has already been handled, so we only need to look at the
4129 fallthrough case. */
4130 continue;
177c0ea7 4131
fa9a63c5 4132 case succeed_n:
ed0767d8
SM
4133 /* If N == 0, it should be an on_failure_jump_loop instead. */
4134 DEBUG_STATEMENT (EXTRACT_NUMBER (j, p + 2); assert (j > 0));
4135 p += 4;
4136 /* We only care about one iteration of the loop, so we don't
4137 need to consider the case where this behaves like an
4138 on_failure_jump. */
25fe55af 4139 continue;
fa9a63c5
RM
4140
4141
4142 case set_number_at:
25fe55af
RS
4143 p += 4;
4144 continue;
fa9a63c5
RM
4145
4146
4147 case start_memory:
25fe55af 4148 case stop_memory:
505bde11 4149 p += 1;
fa9a63c5
RM
4150 continue;
4151
4152
4153 default:
25fe55af
RS
4154 abort (); /* We have listed all the cases. */
4155 } /* switch *p++ */
fa9a63c5
RM
4156
4157 /* Getting here means we have found the possible starting
25fe55af 4158 characters for one path of the pattern -- and that the empty
7814e705 4159 string does not match. We need not follow this path further. */
01618498 4160 return 0;
fa9a63c5
RM
4161 } /* while p */
4162
01618498
SM
4163 /* We reached the end without matching anything. */
4164 return 1;
4165
f6a3f532
SM
4166} /* analyse_first */
4167\f
4168/* re_compile_fastmap computes a ``fastmap'' for the compiled pattern in
4169 BUFP. A fastmap records which of the (1 << BYTEWIDTH) possible
4170 characters can start a string that matches the pattern. This fastmap
4171 is used by re_search to skip quickly over impossible starting points.
4172
4173 Character codes above (1 << BYTEWIDTH) are not represented in the
4174 fastmap, but the leading codes are represented. Thus, the fastmap
4175 indicates which character sets could start a match.
4176
4177 The caller must supply the address of a (1 << BYTEWIDTH)-byte data
4178 area as BUFP->fastmap.
4179
4180 We set the `fastmap', `fastmap_accurate', and `can_be_null' fields in
4181 the pattern buffer.
4182
4183 Returns 0 if we succeed, -2 if an internal error. */
4184
4185int
971de7fb 4186re_compile_fastmap (struct re_pattern_buffer *bufp)
f6a3f532
SM
4187{
4188 char *fastmap = bufp->fastmap;
4189 int analysis;
4190
4191 assert (fastmap && bufp->buffer);
4192
72af86bd 4193 memset (fastmap, 0, 1 << BYTEWIDTH); /* Assume nothing's valid. */
f6a3f532
SM
4194 bufp->fastmap_accurate = 1; /* It will be when we're done. */
4195
4196 analysis = analyse_first (bufp->buffer, bufp->buffer + bufp->used,
2d1675e4 4197 fastmap, RE_MULTIBYTE_P (bufp));
c0f9ea08 4198 bufp->can_be_null = (analysis != 0);
fa9a63c5
RM
4199 return 0;
4200} /* re_compile_fastmap */
4201\f
4202/* Set REGS to hold NUM_REGS registers, storing them in STARTS and
4203 ENDS. Subsequent matches using PATTERN_BUFFER and REGS will use
4204 this memory for recording register information. STARTS and ENDS
4205 must be allocated using the malloc library routine, and must each
4206 be at least NUM_REGS * sizeof (regoff_t) bytes long.
4207
4208 If NUM_REGS == 0, then subsequent matches should allocate their own
4209 register data.
4210
4211 Unless this function is called, the first search or match using
4212 PATTERN_BUFFER will allocate its own register data, without
4213 freeing the old data. */
4214
4215void
971de7fb 4216re_set_registers (struct re_pattern_buffer *bufp, struct re_registers *regs, unsigned int num_regs, regoff_t *starts, regoff_t *ends)
fa9a63c5
RM
4217{
4218 if (num_regs)
4219 {
4220 bufp->regs_allocated = REGS_REALLOCATE;
4221 regs->num_regs = num_regs;
4222 regs->start = starts;
4223 regs->end = ends;
4224 }
4225 else
4226 {
4227 bufp->regs_allocated = REGS_UNALLOCATED;
4228 regs->num_regs = 0;
4229 regs->start = regs->end = (regoff_t *) 0;
4230 }
4231}
c0f9ea08 4232WEAK_ALIAS (__re_set_registers, re_set_registers)
fa9a63c5 4233\f
7814e705 4234/* Searching routines. */
fa9a63c5
RM
4235
4236/* Like re_search_2, below, but only one string is specified, and
4237 doesn't let you say where to stop matching. */
4238
d1dfb56c
EZ
4239regoff_t
4240re_search (struct re_pattern_buffer *bufp, const char *string, size_t size,
4241 ssize_t startpos, ssize_t range, struct re_registers *regs)
fa9a63c5 4242{
5e69f11e 4243 return re_search_2 (bufp, NULL, 0, string, size, startpos, range,
fa9a63c5
RM
4244 regs, size);
4245}
c0f9ea08 4246WEAK_ALIAS (__re_search, re_search)
fa9a63c5 4247
70806df6
KH
4248/* Head address of virtual concatenation of string. */
4249#define HEAD_ADDR_VSTRING(P) \
4250 (((P) >= size1 ? string2 : string1))
4251
b18215fc
RS
4252/* Address of POS in the concatenation of virtual string. */
4253#define POS_ADDR_VSTRING(POS) \
4254 (((POS) >= size1 ? string2 - size1 : string1) + (POS))
fa9a63c5
RM
4255
4256/* Using the compiled pattern in BUFP->buffer, first tries to match the
4257 virtual concatenation of STRING1 and STRING2, starting first at index
4258 STARTPOS, then at STARTPOS + 1, and so on.
5e69f11e 4259
fa9a63c5 4260 STRING1 and STRING2 have length SIZE1 and SIZE2, respectively.
5e69f11e 4261
fa9a63c5
RM
4262 RANGE is how far to scan while trying to match. RANGE = 0 means try
4263 only at STARTPOS; in general, the last start tried is STARTPOS +
4264 RANGE.
5e69f11e 4265
fa9a63c5
RM
4266 In REGS, return the indices of the virtual concatenation of STRING1
4267 and STRING2 that matched the entire BUFP->buffer and its contained
4268 subexpressions.
5e69f11e 4269
fa9a63c5
RM
4270 Do not consider matching one past the index STOP in the virtual
4271 concatenation of STRING1 and STRING2.
4272
4273 We return either the position in the strings at which the match was
4274 found, -1 if no match, or -2 if error (such as failure
4275 stack overflow). */
4276
d1dfb56c
EZ
4277regoff_t
4278re_search_2 (struct re_pattern_buffer *bufp, const char *str1, size_t size1,
4279 const char *str2, size_t size2, ssize_t startpos, ssize_t range,
4280 struct re_registers *regs, ssize_t stop)
fa9a63c5 4281{
d1dfb56c 4282 regoff_t val;
66f0296e
SM
4283 re_char *string1 = (re_char*) str1;
4284 re_char *string2 = (re_char*) str2;
fa9a63c5 4285 register char *fastmap = bufp->fastmap;
6676cb1c 4286 register RE_TRANSLATE_TYPE translate = bufp->translate;
d1dfb56c
EZ
4287 size_t total_size = size1 + size2;
4288 ssize_t endpos = startpos + range;
c0f9ea08 4289 boolean anchored_start;
cf9c99bc
KH
4290 /* Nonzero if we are searching multibyte string. */
4291 const boolean multibyte = RE_TARGET_MULTIBYTE_P (bufp);
b18215fc 4292
fa9a63c5
RM
4293 /* Check for out-of-range STARTPOS. */
4294 if (startpos < 0 || startpos > total_size)
4295 return -1;
5e69f11e 4296
fa9a63c5 4297 /* Fix up RANGE if it might eventually take us outside
34597fa9 4298 the virtual concatenation of STRING1 and STRING2.
5e69f11e 4299 Make sure we won't move STARTPOS below 0 or above TOTAL_SIZE. */
34597fa9
RS
4300 if (endpos < 0)
4301 range = 0 - startpos;
fa9a63c5
RM
4302 else if (endpos > total_size)
4303 range = total_size - startpos;
4304
4305 /* If the search isn't to be a backwards one, don't waste time in a
7b140fd7 4306 search for a pattern anchored at beginning of buffer. */
fa9a63c5
RM
4307 if (bufp->used > 0 && (re_opcode_t) bufp->buffer[0] == begbuf && range > 0)
4308 {
4309 if (startpos > 0)
4310 return -1;
4311 else
7b140fd7 4312 range = 0;
fa9a63c5
RM
4313 }
4314
ae4788a8
RS
4315#ifdef emacs
4316 /* In a forward search for something that starts with \=.
4317 don't keep searching past point. */
4318 if (bufp->used > 0 && (re_opcode_t) bufp->buffer[0] == at_dot && range > 0)
4319 {
7b140fd7
RS
4320 range = PT_BYTE - BEGV_BYTE - startpos;
4321 if (range < 0)
ae4788a8
RS
4322 return -1;
4323 }
4324#endif /* emacs */
4325
fa9a63c5
RM
4326 /* Update the fastmap now if not correct already. */
4327 if (fastmap && !bufp->fastmap_accurate)
01618498 4328 re_compile_fastmap (bufp);
5e69f11e 4329
c8499ba5 4330 /* See whether the pattern is anchored. */
c0f9ea08 4331 anchored_start = (bufp->buffer[0] == begline);
c8499ba5 4332
b18215fc 4333#ifdef emacs
d48cd3f4 4334 gl_state.object = re_match_object; /* Used by SYNTAX_TABLE_BYTE_TO_CHAR. */
cc9b4df2 4335 {
d1dfb56c 4336 ssize_t charpos = SYNTAX_TABLE_BYTE_TO_CHAR (POS_AS_IN_BUFFER (startpos));
cc9b4df2
KH
4337
4338 SETUP_SYNTAX_TABLE_FOR_OBJECT (re_match_object, charpos, 1);
4339 }
b18215fc
RS
4340#endif
4341
fa9a63c5
RM
4342 /* Loop through the string, looking for a place to start matching. */
4343 for (;;)
5e69f11e 4344 {
c8499ba5
RS
4345 /* If the pattern is anchored,
4346 skip quickly past places we cannot match.
4347 We don't bother to treat startpos == 0 specially
4348 because that case doesn't repeat. */
4349 if (anchored_start && startpos > 0)
4350 {
c0f9ea08
SM
4351 if (! ((startpos <= size1 ? string1[startpos - 1]
4352 : string2[startpos - size1 - 1])
4353 == '\n'))
c8499ba5
RS
4354 goto advance;
4355 }
4356
fa9a63c5 4357 /* If a fastmap is supplied, skip quickly over characters that
25fe55af
RS
4358 cannot be the start of a match. If the pattern can match the
4359 null string, however, we don't need to skip characters; we want
7814e705 4360 the first null string. */
fa9a63c5
RM
4361 if (fastmap && startpos < total_size && !bufp->can_be_null)
4362 {
66f0296e 4363 register re_char *d;
01618498 4364 register re_wchar_t buf_ch;
e934739e
RS
4365
4366 d = POS_ADDR_VSTRING (startpos);
4367
7814e705 4368 if (range > 0) /* Searching forwards. */
fa9a63c5 4369 {
fa9a63c5 4370 register int lim = 0;
d1dfb56c 4371 ssize_t irange = range;
fa9a63c5 4372
25fe55af
RS
4373 if (startpos < size1 && startpos + range >= size1)
4374 lim = range - (size1 - startpos);
fa9a63c5 4375
25fe55af
RS
4376 /* Written out as an if-else to avoid testing `translate'
4377 inside the loop. */
28ae27ae
AS
4378 if (RE_TRANSLATE_P (translate))
4379 {
e934739e
RS
4380 if (multibyte)
4381 while (range > lim)
4382 {
4383 int buf_charlen;
4384
62a6e103 4385 buf_ch = STRING_CHAR_AND_LENGTH (d, buf_charlen);
e934739e 4386 buf_ch = RE_TRANSLATE (translate, buf_ch);
bf216479 4387 if (fastmap[CHAR_LEADING_CODE (buf_ch)])
e934739e
RS
4388 break;
4389
4390 range -= buf_charlen;
4391 d += buf_charlen;
4392 }
4393 else
bf216479 4394 while (range > lim)
33c46939 4395 {
cf9c99bc
KH
4396 register re_wchar_t ch, translated;
4397
bf216479 4398 buf_ch = *d;
cf9c99bc
KH
4399 ch = RE_CHAR_TO_MULTIBYTE (buf_ch);
4400 translated = RE_TRANSLATE (translate, ch);
4401 if (translated != ch
4402 && (ch = RE_CHAR_TO_UNIBYTE (translated)) >= 0)
4403 buf_ch = ch;
6fdd04b0 4404 if (fastmap[buf_ch])
bf216479 4405 break;
33c46939
RS
4406 d++;
4407 range--;
4408 }
e934739e 4409 }
fa9a63c5 4410 else
6fdd04b0
KH
4411 {
4412 if (multibyte)
4413 while (range > lim)
4414 {
4415 int buf_charlen;
fa9a63c5 4416
62a6e103 4417 buf_ch = STRING_CHAR_AND_LENGTH (d, buf_charlen);
6fdd04b0
KH
4418 if (fastmap[CHAR_LEADING_CODE (buf_ch)])
4419 break;
4420 range -= buf_charlen;
4421 d += buf_charlen;
4422 }
e934739e 4423 else
6fdd04b0 4424 while (range > lim && !fastmap[*d])
33c46939
RS
4425 {
4426 d++;
4427 range--;
4428 }
e934739e 4429 }
fa9a63c5
RM
4430 startpos += irange - range;
4431 }
7814e705 4432 else /* Searching backwards. */
fa9a63c5 4433 {
ba5e343c
KH
4434 if (multibyte)
4435 {
62a6e103 4436 buf_ch = STRING_CHAR (d);
ba5e343c
KH
4437 buf_ch = TRANSLATE (buf_ch);
4438 if (! fastmap[CHAR_LEADING_CODE (buf_ch)])
4439 goto advance;
4440 }
4441 else
4442 {
cf9c99bc
KH
4443 register re_wchar_t ch, translated;
4444
4445 buf_ch = *d;
4446 ch = RE_CHAR_TO_MULTIBYTE (buf_ch);
4447 translated = TRANSLATE (ch);
4448 if (translated != ch
4449 && (ch = RE_CHAR_TO_UNIBYTE (translated)) >= 0)
4450 buf_ch = ch;
4451 if (! fastmap[TRANSLATE (buf_ch)])
ba5e343c
KH
4452 goto advance;
4453 }
fa9a63c5
RM
4454 }
4455 }
4456
4457 /* If can't match the null string, and that's all we have left, fail. */
4458 if (range >= 0 && startpos == total_size && fastmap
25fe55af 4459 && !bufp->can_be_null)
fa9a63c5
RM
4460 return -1;
4461
4462 val = re_match_2_internal (bufp, string1, size1, string2, size2,
4463 startpos, regs, stop);
fa9a63c5
RM
4464
4465 if (val >= 0)
4466 return startpos;
5e69f11e 4467
fa9a63c5
RM
4468 if (val == -2)
4469 return -2;
4470
4471 advance:
5e69f11e 4472 if (!range)
25fe55af 4473 break;
5e69f11e 4474 else if (range > 0)
25fe55af 4475 {
b18215fc
RS
4476 /* Update STARTPOS to the next character boundary. */
4477 if (multibyte)
4478 {
66f0296e 4479 re_char *p = POS_ADDR_VSTRING (startpos);
aa3830c4 4480 int len = BYTES_BY_CHAR_HEAD (*p);
b18215fc
RS
4481
4482 range -= len;
4483 if (range < 0)
4484 break;
4485 startpos += len;
4486 }
4487 else
4488 {
b560c397
RS
4489 range--;
4490 startpos++;
4491 }
e318085a 4492 }
fa9a63c5 4493 else
25fe55af
RS
4494 {
4495 range++;
4496 startpos--;
b18215fc
RS
4497
4498 /* Update STARTPOS to the previous character boundary. */
4499 if (multibyte)
4500 {
70806df6
KH
4501 re_char *p = POS_ADDR_VSTRING (startpos) + 1;
4502 re_char *p0 = p;
4503 re_char *phead = HEAD_ADDR_VSTRING (startpos);
b18215fc
RS
4504
4505 /* Find the head of multibyte form. */
70806df6
KH
4506 PREV_CHAR_BOUNDARY (p, phead);
4507 range += p0 - 1 - p;
4508 if (range > 0)
4509 break;
b18215fc 4510
70806df6 4511 startpos -= p0 - 1 - p;
b18215fc 4512 }
25fe55af 4513 }
fa9a63c5
RM
4514 }
4515 return -1;
4516} /* re_search_2 */
c0f9ea08 4517WEAK_ALIAS (__re_search_2, re_search_2)
fa9a63c5
RM
4518\f
4519/* Declarations and macros for re_match_2. */
4520
261cb4bb
PE
4521static int bcmp_translate (re_char *s1, re_char *s2,
4522 register ssize_t len,
4523 RE_TRANSLATE_TYPE translate,
4524 const int multibyte);
fa9a63c5
RM
4525
4526/* This converts PTR, a pointer into one of the search strings `string1'
4527 and `string2' into an offset from the beginning of that string. */
4528#define POINTER_TO_OFFSET(ptr) \
4529 (FIRST_STRING_P (ptr) \
4530 ? ((regoff_t) ((ptr) - string1)) \
4531 : ((regoff_t) ((ptr) - string2 + size1)))
4532
fa9a63c5 4533/* Call before fetching a character with *d. This switches over to
419d1c74
SM
4534 string2 if necessary.
4535 Check re_match_2_internal for a discussion of why end_match_2 might
4536 not be within string2 (but be equal to end_match_1 instead). */
fa9a63c5 4537#define PREFETCH() \
25fe55af 4538 while (d == dend) \
fa9a63c5
RM
4539 { \
4540 /* End of string2 => fail. */ \
25fe55af
RS
4541 if (dend == end_match_2) \
4542 goto fail; \
4bb91c68 4543 /* End of string1 => advance to string2. */ \
25fe55af 4544 d = string2; \
fa9a63c5
RM
4545 dend = end_match_2; \
4546 }
4547
f1ad044f
SM
4548/* Call before fetching a char with *d if you already checked other limits.
4549 This is meant for use in lookahead operations like wordend, etc..
4550 where we might need to look at parts of the string that might be
4551 outside of the LIMITs (i.e past `stop'). */
4552#define PREFETCH_NOLIMIT() \
4553 if (d == end1) \
4554 { \
4555 d = string2; \
4556 dend = end_match_2; \
4557 } \
fa9a63c5
RM
4558
4559/* Test if at very beginning or at very end of the virtual concatenation
7814e705 4560 of `string1' and `string2'. If only one string, it's `string2'. */
fa9a63c5 4561#define AT_STRINGS_BEG(d) ((d) == (size1 ? string1 : string2) || !size2)
5e69f11e 4562#define AT_STRINGS_END(d) ((d) == end2)
fa9a63c5 4563
9121ca40 4564/* Disabled due to a compiler bug -- see comment at case wordbound */
b18215fc
RS
4565
4566/* The comment at case wordbound is following one, but we don't use
4567 AT_WORD_BOUNDARY anymore to support multibyte form.
4568
4569 The DEC Alpha C compiler 3.x generates incorrect code for the
25fe55af 4570 test WORDCHAR_P (d - 1) != WORDCHAR_P (d) in the expansion of
7814e705 4571 AT_WORD_BOUNDARY, so this code is disabled. Expanding the
b18215fc
RS
4572 macro and introducing temporary variables works around the bug. */
4573
9121ca40 4574#if 0
b313f9d8
PE
4575/* Test if D points to a character which is word-constituent. We have
4576 two special cases to check for: if past the end of string1, look at
4577 the first character in string2; and if before the beginning of
4578 string2, look at the last character in string1. */
4579#define WORDCHAR_P(d) \
4580 (SYNTAX ((d) == end1 ? *string2 \
4581 : (d) == string2 - 1 ? *(end1 - 1) : *(d)) \
4582 == Sword)
4583
fa9a63c5
RM
4584/* Test if the character before D and the one at D differ with respect
4585 to being word-constituent. */
4586#define AT_WORD_BOUNDARY(d) \
4587 (AT_STRINGS_BEG (d) || AT_STRINGS_END (d) \
4588 || WORDCHAR_P (d - 1) != WORDCHAR_P (d))
9121ca40 4589#endif
fa9a63c5
RM
4590
4591/* Free everything we malloc. */
4592#ifdef MATCH_MAY_ALLOCATE
952db0d7
PE
4593# define FREE_VAR(var) \
4594 do { \
4595 if (var) \
4596 { \
4597 REGEX_FREE (var); \
4598 var = NULL; \
4599 } \
4600 } while (0)
0b32bf0e 4601# define FREE_VARIABLES() \
fa9a63c5
RM
4602 do { \
4603 REGEX_FREE_STACK (fail_stack.stack); \
4604 FREE_VAR (regstart); \
4605 FREE_VAR (regend); \
fa9a63c5
RM
4606 FREE_VAR (best_regstart); \
4607 FREE_VAR (best_regend); \
fa9a63c5
RM
4608 } while (0)
4609#else
0b32bf0e 4610# define FREE_VARIABLES() ((void)0) /* Do nothing! But inhibit gcc warning. */
fa9a63c5
RM
4611#endif /* not MATCH_MAY_ALLOCATE */
4612
505bde11
SM
4613\f
4614/* Optimization routines. */
4615
4e8a9132
SM
4616/* If the operation is a match against one or more chars,
4617 return a pointer to the next operation, else return NULL. */
01618498 4618static re_char *
971de7fb 4619skip_one_char (const re_char *p)
4e8a9132 4620{
7393bcbb 4621 switch (*p++)
4e8a9132
SM
4622 {
4623 case anychar:
4624 break;
177c0ea7 4625
4e8a9132
SM
4626 case exactn:
4627 p += *p + 1;
4628 break;
4629
4630 case charset_not:
4631 case charset:
4632 if (CHARSET_RANGE_TABLE_EXISTS_P (p - 1))
4633 {
4634 int mcnt;
4635 p = CHARSET_RANGE_TABLE (p - 1);
4636 EXTRACT_NUMBER_AND_INCR (mcnt, p);
4637 p = CHARSET_RANGE_TABLE_END (p, mcnt);
4638 }
4639 else
4640 p += 1 + CHARSET_BITMAP_SIZE (p - 1);
4641 break;
177c0ea7 4642
4e8a9132
SM
4643 case syntaxspec:
4644 case notsyntaxspec:
1fb352e0 4645#ifdef emacs
4e8a9132
SM
4646 case categoryspec:
4647 case notcategoryspec:
4648#endif /* emacs */
4649 p++;
4650 break;
4651
4652 default:
4653 p = NULL;
4654 }
4655 return p;
4656}
4657
4658
505bde11 4659/* Jump over non-matching operations. */
839966f3 4660static re_char *
971de7fb 4661skip_noops (const re_char *p, const re_char *pend)
505bde11
SM
4662{
4663 int mcnt;
4664 while (p < pend)
4665 {
7393bcbb 4666 switch (*p)
505bde11
SM
4667 {
4668 case start_memory:
505bde11
SM
4669 case stop_memory:
4670 p += 2; break;
4671 case no_op:
4672 p += 1; break;
4673 case jump:
4674 p += 1;
4675 EXTRACT_NUMBER_AND_INCR (mcnt, p);
4676 p += mcnt;
4677 break;
4678 default:
4679 return p;
4680 }
4681 }
4682 assert (p == pend);
4683 return p;
4684}
4685
4686/* Non-zero if "p1 matches something" implies "p2 fails". */
4687static int
971de7fb 4688mutually_exclusive_p (struct re_pattern_buffer *bufp, const re_char *p1, const re_char *p2)
505bde11 4689{
4e8a9132 4690 re_opcode_t op2;
2d1675e4 4691 const boolean multibyte = RE_MULTIBYTE_P (bufp);
505bde11
SM
4692 unsigned char *pend = bufp->buffer + bufp->used;
4693
4e8a9132 4694 assert (p1 >= bufp->buffer && p1 < pend
505bde11
SM
4695 && p2 >= bufp->buffer && p2 <= pend);
4696
4697 /* Skip over open/close-group commands.
4698 If what follows this loop is a ...+ construct,
4699 look at what begins its body, since we will have to
4700 match at least one of that. */
4e8a9132
SM
4701 p2 = skip_noops (p2, pend);
4702 /* The same skip can be done for p1, except that this function
4703 is only used in the case where p1 is a simple match operator. */
4704 /* p1 = skip_noops (p1, pend); */
4705
4706 assert (p1 >= bufp->buffer && p1 < pend
4707 && p2 >= bufp->buffer && p2 <= pend);
4708
4709 op2 = p2 == pend ? succeed : *p2;
4710
7393bcbb 4711 switch (op2)
505bde11 4712 {
4e8a9132
SM
4713 case succeed:
4714 case endbuf:
4715 /* If we're at the end of the pattern, we can change. */
4716 if (skip_one_char (p1))
505bde11 4717 {
505bde11
SM
4718 DEBUG_PRINT1 (" End of pattern: fast loop.\n");
4719 return 1;
505bde11 4720 }
4e8a9132 4721 break;
177c0ea7 4722
4e8a9132 4723 case endline:
4e8a9132
SM
4724 case exactn:
4725 {
01618498 4726 register re_wchar_t c
4e8a9132 4727 = (re_opcode_t) *p2 == endline ? '\n'
62a6e103 4728 : RE_STRING_CHAR (p2 + 2, multibyte);
505bde11 4729
4e8a9132
SM
4730 if ((re_opcode_t) *p1 == exactn)
4731 {
62a6e103 4732 if (c != RE_STRING_CHAR (p1 + 2, multibyte))
4e8a9132
SM
4733 {
4734 DEBUG_PRINT3 (" '%c' != '%c' => fast loop.\n", c, p1[2]);
4735 return 1;
4736 }
4737 }
505bde11 4738
4e8a9132
SM
4739 else if ((re_opcode_t) *p1 == charset
4740 || (re_opcode_t) *p1 == charset_not)
4741 {
4742 int not = (re_opcode_t) *p1 == charset_not;
505bde11 4743
4e8a9132
SM
4744 /* Test if C is listed in charset (or charset_not)
4745 at `p1'. */
6fdd04b0 4746 if (! multibyte || IS_REAL_ASCII (c))
4e8a9132
SM
4747 {
4748 if (c < CHARSET_BITMAP_SIZE (p1) * BYTEWIDTH
4749 && p1[2 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH)))
4750 not = !not;
4751 }
4752 else if (CHARSET_RANGE_TABLE_EXISTS_P (p1))
4753 CHARSET_LOOKUP_RANGE_TABLE (not, c, p1);
505bde11 4754
4e8a9132
SM
4755 /* `not' is equal to 1 if c would match, which means
4756 that we can't change to pop_failure_jump. */
4757 if (!not)
4758 {
4759 DEBUG_PRINT1 (" No match => fast loop.\n");
4760 return 1;
4761 }
4762 }
4763 else if ((re_opcode_t) *p1 == anychar
4764 && c == '\n')
4765 {
4766 DEBUG_PRINT1 (" . != \\n => fast loop.\n");
4767 return 1;
4768 }
4769 }
4770 break;
505bde11 4771
4e8a9132 4772 case charset:
4e8a9132
SM
4773 {
4774 if ((re_opcode_t) *p1 == exactn)
4775 /* Reuse the code above. */
4776 return mutually_exclusive_p (bufp, p2, p1);
505bde11 4777
505bde11
SM
4778 /* It is hard to list up all the character in charset
4779 P2 if it includes multibyte character. Give up in
4780 such case. */
4781 else if (!multibyte || !CHARSET_RANGE_TABLE_EXISTS_P (p2))
4782 {
4783 /* Now, we are sure that P2 has no range table.
4784 So, for the size of bitmap in P2, `p2[1]' is
7814e705 4785 enough. But P1 may have range table, so the
505bde11
SM
4786 size of bitmap table of P1 is extracted by
4787 using macro `CHARSET_BITMAP_SIZE'.
4788
6fdd04b0
KH
4789 In a multibyte case, we know that all the character
4790 listed in P2 is ASCII. In a unibyte case, P1 has only a
4791 bitmap table. So, in both cases, it is enough to test
4792 only the bitmap table of P1. */
505bde11 4793
411e4203 4794 if ((re_opcode_t) *p1 == charset)
505bde11
SM
4795 {
4796 int idx;
4797 /* We win if the charset inside the loop
4798 has no overlap with the one after the loop. */
4799 for (idx = 0;
4800 (idx < (int) p2[1]
4801 && idx < CHARSET_BITMAP_SIZE (p1));
4802 idx++)
4803 if ((p2[2 + idx] & p1[2 + idx]) != 0)
4804 break;
4805
4806 if (idx == p2[1]
4807 || idx == CHARSET_BITMAP_SIZE (p1))
4808 {
4809 DEBUG_PRINT1 (" No match => fast loop.\n");
4810 return 1;
4811 }
4812 }
411e4203 4813 else if ((re_opcode_t) *p1 == charset_not)
505bde11
SM
4814 {
4815 int idx;
4816 /* We win if the charset_not inside the loop lists
7814e705 4817 every character listed in the charset after. */
505bde11
SM
4818 for (idx = 0; idx < (int) p2[1]; idx++)
4819 if (! (p2[2 + idx] == 0
4820 || (idx < CHARSET_BITMAP_SIZE (p1)
4821 && ((p2[2 + idx] & ~ p1[2 + idx]) == 0))))
4822 break;
4823
d1dfb56c
EZ
4824 if (idx == p2[1])
4825 {
4826 DEBUG_PRINT1 (" No match => fast loop.\n");
4827 return 1;
4828 }
4e8a9132
SM
4829 }
4830 }
4831 }
609b757a 4832 break;
177c0ea7 4833
411e4203 4834 case charset_not:
7393bcbb 4835 switch (*p1)
411e4203
SM
4836 {
4837 case exactn:
4838 case charset:
4839 /* Reuse the code above. */
4840 return mutually_exclusive_p (bufp, p2, p1);
4841 case charset_not:
4842 /* When we have two charset_not, it's very unlikely that
4843 they don't overlap. The union of the two sets of excluded
4844 chars should cover all possible chars, which, as a matter of
4845 fact, is virtually impossible in multibyte buffers. */
36595814 4846 break;
411e4203
SM
4847 }
4848 break;
4849
4e8a9132 4850 case wordend:
669fa600
SM
4851 return ((re_opcode_t) *p1 == syntaxspec && p1[1] == Sword);
4852 case symend:
4e8a9132 4853 return ((re_opcode_t) *p1 == syntaxspec
669fa600
SM
4854 && (p1[1] == Ssymbol || p1[1] == Sword));
4855 case notsyntaxspec:
4856 return ((re_opcode_t) *p1 == syntaxspec && p1[1] == p2[1]);
4e8a9132
SM
4857
4858 case wordbeg:
669fa600
SM
4859 return ((re_opcode_t) *p1 == notsyntaxspec && p1[1] == Sword);
4860 case symbeg:
4e8a9132 4861 return ((re_opcode_t) *p1 == notsyntaxspec
669fa600
SM
4862 && (p1[1] == Ssymbol || p1[1] == Sword));
4863 case syntaxspec:
4864 return ((re_opcode_t) *p1 == notsyntaxspec && p1[1] == p2[1]);
4e8a9132
SM
4865
4866 case wordbound:
4867 return (((re_opcode_t) *p1 == notsyntaxspec
4868 || (re_opcode_t) *p1 == syntaxspec)
4869 && p1[1] == Sword);
4870
1fb352e0 4871#ifdef emacs
4e8a9132
SM
4872 case categoryspec:
4873 return ((re_opcode_t) *p1 == notcategoryspec && p1[1] == p2[1]);
4874 case notcategoryspec:
4875 return ((re_opcode_t) *p1 == categoryspec && p1[1] == p2[1]);
4876#endif /* emacs */
4877
4878 default:
4879 ;
505bde11
SM
4880 }
4881
4882 /* Safe default. */
4883 return 0;
4884}
4885
fa9a63c5
RM
4886\f
4887/* Matching routines. */
4888
25fe55af 4889#ifndef emacs /* Emacs never uses this. */
fa9a63c5
RM
4890/* re_match is like re_match_2 except it takes only a single string. */
4891
d1dfb56c 4892regoff_t
d2762c86 4893re_match (struct re_pattern_buffer *bufp, const char *string,
d1dfb56c 4894 size_t size, ssize_t pos, struct re_registers *regs)
fa9a63c5 4895{
d1dfb56c
EZ
4896 regoff_t result = re_match_2_internal (bufp, NULL, 0, (re_char*) string,
4897 size, pos, regs, size);
fa9a63c5
RM
4898 return result;
4899}
c0f9ea08 4900WEAK_ALIAS (__re_match, re_match)
fa9a63c5
RM
4901#endif /* not emacs */
4902
b18215fc
RS
4903#ifdef emacs
4904/* In Emacs, this is the string or buffer in which we
7814e705 4905 are matching. It is used for looking up syntax properties. */
b18215fc
RS
4906Lisp_Object re_match_object;
4907#endif
fa9a63c5
RM
4908
4909/* re_match_2 matches the compiled pattern in BUFP against the
4910 the (virtual) concatenation of STRING1 and STRING2 (of length SIZE1
4911 and SIZE2, respectively). We start matching at POS, and stop
4912 matching at STOP.
5e69f11e 4913
fa9a63c5 4914 If REGS is non-null and the `no_sub' field of BUFP is nonzero, we
7814e705 4915 store offsets for the substring each group matched in REGS. See the
fa9a63c5
RM
4916 documentation for exactly how many groups we fill.
4917
4918 We return -1 if no match, -2 if an internal error (such as the
7814e705 4919 failure stack overflowing). Otherwise, we return the length of the
fa9a63c5
RM
4920 matched substring. */
4921
d1dfb56c
EZ
4922regoff_t
4923re_match_2 (struct re_pattern_buffer *bufp, const char *string1,
4924 size_t size1, const char *string2, size_t size2, ssize_t pos,
4925 struct re_registers *regs, ssize_t stop)
fa9a63c5 4926{
d1dfb56c 4927 regoff_t result;
25fe55af 4928
b18215fc 4929#ifdef emacs
d1dfb56c 4930 ssize_t charpos;
d48cd3f4 4931 gl_state.object = re_match_object; /* Used by SYNTAX_TABLE_BYTE_TO_CHAR. */
99633e97 4932 charpos = SYNTAX_TABLE_BYTE_TO_CHAR (POS_AS_IN_BUFFER (pos));
cc9b4df2 4933 SETUP_SYNTAX_TABLE_FOR_OBJECT (re_match_object, charpos, 1);
b18215fc
RS
4934#endif
4935
4bb91c68
SM
4936 result = re_match_2_internal (bufp, (re_char*) string1, size1,
4937 (re_char*) string2, size2,
cc9b4df2 4938 pos, regs, stop);
fa9a63c5
RM
4939 return result;
4940}
c0f9ea08 4941WEAK_ALIAS (__re_match_2, re_match_2)
fa9a63c5 4942
bf216479 4943
fa9a63c5 4944/* This is a separate function so that we can force an alloca cleanup
7814e705 4945 afterwards. */
d1dfb56c
EZ
4946static regoff_t
4947re_match_2_internal (struct re_pattern_buffer *bufp, const re_char *string1,
4948 size_t size1, const re_char *string2, size_t size2,
4949 ssize_t pos, struct re_registers *regs, ssize_t stop)
fa9a63c5
RM
4950{
4951 /* General temporaries. */
d1dfb56c 4952 ssize_t mcnt;
01618498 4953 size_t reg;
fa9a63c5
RM
4954
4955 /* Just past the end of the corresponding string. */
66f0296e 4956 re_char *end1, *end2;
fa9a63c5
RM
4957
4958 /* Pointers into string1 and string2, just past the last characters in
7814e705 4959 each to consider matching. */
66f0296e 4960 re_char *end_match_1, *end_match_2;
fa9a63c5
RM
4961
4962 /* Where we are in the data, and the end of the current string. */
66f0296e 4963 re_char *d, *dend;
5e69f11e 4964
99633e97
SM
4965 /* Used sometimes to remember where we were before starting matching
4966 an operator so that we can go back in case of failure. This "atomic"
4967 behavior of matching opcodes is indispensable to the correctness
4968 of the on_failure_keep_string_jump optimization. */
4969 re_char *dfail;
4970
fa9a63c5 4971 /* Where we are in the pattern, and the end of the pattern. */
01618498
SM
4972 re_char *p = bufp->buffer;
4973 re_char *pend = p + bufp->used;
fa9a63c5 4974
25fe55af 4975 /* We use this to map every character in the string. */
6676cb1c 4976 RE_TRANSLATE_TYPE translate = bufp->translate;
fa9a63c5 4977
cf9c99bc 4978 /* Nonzero if BUFP is setup from a multibyte regex. */
2d1675e4 4979 const boolean multibyte = RE_MULTIBYTE_P (bufp);
b18215fc 4980
cf9c99bc
KH
4981 /* Nonzero if STRING1/STRING2 are multibyte. */
4982 const boolean target_multibyte = RE_TARGET_MULTIBYTE_P (bufp);
4983
fa9a63c5
RM
4984 /* Failure point stack. Each place that can handle a failure further
4985 down the line pushes a failure point on this stack. It consists of
505bde11 4986 regstart, and regend for all registers corresponding to
fa9a63c5
RM
4987 the subexpressions we're currently inside, plus the number of such
4988 registers, and, finally, two char *'s. The first char * is where
4989 to resume scanning the pattern; the second one is where to resume
7814e705
JB
4990 scanning the strings. */
4991#ifdef MATCH_MAY_ALLOCATE /* otherwise, this is global. */
fa9a63c5
RM
4992 fail_stack_type fail_stack;
4993#endif
4994#ifdef DEBUG
fa9a63c5
RM
4995 unsigned nfailure_points_pushed = 0, nfailure_points_popped = 0;
4996#endif
4997
0b32bf0e 4998#if defined REL_ALLOC && defined REGEX_MALLOC
fa9a63c5
RM
4999 /* This holds the pointer to the failure stack, when
5000 it is allocated relocatably. */
5001 fail_stack_elt_t *failure_stack_ptr;
99633e97 5002#endif
fa9a63c5
RM
5003
5004 /* We fill all the registers internally, independent of what we
7814e705 5005 return, for use in backreferences. The number here includes
fa9a63c5 5006 an element for register zero. */
4bb91c68 5007 size_t num_regs = bufp->re_nsub + 1;
5e69f11e 5008
fa9a63c5
RM
5009 /* Information on the contents of registers. These are pointers into
5010 the input strings; they record just what was matched (on this
5011 attempt) by a subexpression part of the pattern, that is, the
5012 regnum-th regstart pointer points to where in the pattern we began
5013 matching and the regnum-th regend points to right after where we
5014 stopped matching the regnum-th subexpression. (The zeroth register
5015 keeps track of what the whole pattern matches.) */
5016#ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global. */
66f0296e 5017 re_char **regstart, **regend;
fa9a63c5
RM
5018#endif
5019
fa9a63c5 5020 /* The following record the register info as found in the above
5e69f11e 5021 variables when we find a match better than any we've seen before.
fa9a63c5
RM
5022 This happens as we backtrack through the failure points, which in
5023 turn happens only if we have not yet matched the entire string. */
5024 unsigned best_regs_set = false;
5025#ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global. */
66f0296e 5026 re_char **best_regstart, **best_regend;
fa9a63c5 5027#endif
5e69f11e 5028
fa9a63c5
RM
5029 /* Logically, this is `best_regend[0]'. But we don't want to have to
5030 allocate space for that if we're not allocating space for anything
7814e705 5031 else (see below). Also, we never need info about register 0 for
fa9a63c5
RM
5032 any of the other register vectors, and it seems rather a kludge to
5033 treat `best_regend' differently than the rest. So we keep track of
5034 the end of the best match so far in a separate variable. We
5035 initialize this to NULL so that when we backtrack the first time
5036 and need to test it, it's not garbage. */
66f0296e 5037 re_char *match_end = NULL;
fa9a63c5 5038
fa9a63c5
RM
5039#ifdef DEBUG
5040 /* Counts the total number of registers pushed. */
5e69f11e 5041 unsigned num_regs_pushed = 0;
fa9a63c5
RM
5042#endif
5043
5044 DEBUG_PRINT1 ("\n\nEntering re_match_2.\n");
5e69f11e 5045
fa9a63c5 5046 INIT_FAIL_STACK ();
5e69f11e 5047
fa9a63c5
RM
5048#ifdef MATCH_MAY_ALLOCATE
5049 /* Do not bother to initialize all the register variables if there are
5050 no groups in the pattern, as it takes a fair amount of time. If
5051 there are groups, we include space for register 0 (the whole
5052 pattern), even though we never use it, since it simplifies the
5053 array indexing. We should fix this. */
5054 if (bufp->re_nsub)
5055 {
66f0296e
SM
5056 regstart = REGEX_TALLOC (num_regs, re_char *);
5057 regend = REGEX_TALLOC (num_regs, re_char *);
5058 best_regstart = REGEX_TALLOC (num_regs, re_char *);
5059 best_regend = REGEX_TALLOC (num_regs, re_char *);
fa9a63c5 5060
505bde11 5061 if (!(regstart && regend && best_regstart && best_regend))
25fe55af
RS
5062 {
5063 FREE_VARIABLES ();
5064 return -2;
5065 }
fa9a63c5
RM
5066 }
5067 else
5068 {
5069 /* We must initialize all our variables to NULL, so that
25fe55af 5070 `FREE_VARIABLES' doesn't try to free them. */
505bde11 5071 regstart = regend = best_regstart = best_regend = NULL;
fa9a63c5
RM
5072 }
5073#endif /* MATCH_MAY_ALLOCATE */
5074
5075 /* The starting position is bogus. */
5076 if (pos < 0 || pos > size1 + size2)
5077 {
5078 FREE_VARIABLES ();
5079 return -1;
5080 }
5e69f11e 5081
fa9a63c5
RM
5082 /* Initialize subexpression text positions to -1 to mark ones that no
5083 start_memory/stop_memory has been seen for. Also initialize the
5084 register information struct. */
01618498
SM
5085 for (reg = 1; reg < num_regs; reg++)
5086 regstart[reg] = regend[reg] = NULL;
99633e97 5087
fa9a63c5 5088 /* We move `string1' into `string2' if the latter's empty -- but not if
7814e705 5089 `string1' is null. */
fa9a63c5
RM
5090 if (size2 == 0 && string1 != NULL)
5091 {
5092 string2 = string1;
5093 size2 = size1;
5094 string1 = 0;
5095 size1 = 0;
5096 }
5097 end1 = string1 + size1;
5098 end2 = string2 + size2;
5099
5e69f11e 5100 /* `p' scans through the pattern as `d' scans through the data.
fa9a63c5
RM
5101 `dend' is the end of the input string that `d' points within. `d'
5102 is advanced into the following input string whenever necessary, but
5103 this happens before fetching; therefore, at the beginning of the
5104 loop, `d' can be pointing at the end of a string, but it cannot
5105 equal `string2'. */
419d1c74 5106 if (pos >= size1)
fa9a63c5 5107 {
419d1c74
SM
5108 /* Only match within string2. */
5109 d = string2 + pos - size1;
5110 dend = end_match_2 = string2 + stop - size1;
5111 end_match_1 = end1; /* Just to give it a value. */
fa9a63c5
RM
5112 }
5113 else
5114 {
f1ad044f 5115 if (stop < size1)
419d1c74
SM
5116 {
5117 /* Only match within string1. */
5118 end_match_1 = string1 + stop;
5119 /* BEWARE!
5120 When we reach end_match_1, PREFETCH normally switches to string2.
5121 But in the present case, this means that just doing a PREFETCH
5122 makes us jump from `stop' to `gap' within the string.
5123 What we really want here is for the search to stop as
5124 soon as we hit end_match_1. That's why we set end_match_2
5125 to end_match_1 (since PREFETCH fails as soon as we hit
5126 end_match_2). */
5127 end_match_2 = end_match_1;
5128 }
5129 else
f1ad044f
SM
5130 { /* It's important to use this code when stop == size so that
5131 moving `d' from end1 to string2 will not prevent the d == dend
5132 check from catching the end of string. */
419d1c74
SM
5133 end_match_1 = end1;
5134 end_match_2 = string2 + stop - size1;
5135 }
5136 d = string1 + pos;
5137 dend = end_match_1;
fa9a63c5
RM
5138 }
5139
5140 DEBUG_PRINT1 ("The compiled pattern is: ");
5141 DEBUG_PRINT_COMPILED_PATTERN (bufp, p, pend);
5142 DEBUG_PRINT1 ("The string to match is: `");
5143 DEBUG_PRINT_DOUBLE_STRING (d, string1, size1, string2, size2);
5144 DEBUG_PRINT1 ("'\n");
5e69f11e 5145
7814e705 5146 /* This loops over pattern commands. It exits by returning from the
fa9a63c5
RM
5147 function if the match is complete, or it drops through if the match
5148 fails at this starting point in the input data. */
5149 for (;;)
5150 {
505bde11 5151 DEBUG_PRINT2 ("\n%p: ", p);
fa9a63c5
RM
5152
5153 if (p == pend)
5154 { /* End of pattern means we might have succeeded. */
25fe55af 5155 DEBUG_PRINT1 ("end of pattern ... ");
5e69f11e 5156
fa9a63c5 5157 /* If we haven't matched the entire string, and we want the
25fe55af
RS
5158 longest match, try backtracking. */
5159 if (d != end_match_2)
fa9a63c5
RM
5160 {
5161 /* 1 if this match ends in the same string (string1 or string2)
5162 as the best previous match. */
26d4541d 5163 boolean same_str_p;
fa9a63c5
RM
5164 /* 1 if this match is the best seen so far. */
5165 boolean best_match_p;
5166
26d4541d
PE
5167 same_str_p = (FIRST_STRING_P (match_end)
5168 == FIRST_STRING_P (d));
5169
fa9a63c5 5170 /* AIX compiler got confused when this was combined
7814e705 5171 with the previous declaration. */
fa9a63c5
RM
5172 if (same_str_p)
5173 best_match_p = d > match_end;
5174 else
99633e97 5175 best_match_p = !FIRST_STRING_P (d);
fa9a63c5 5176
25fe55af
RS
5177 DEBUG_PRINT1 ("backtracking.\n");
5178
5179 if (!FAIL_STACK_EMPTY ())
5180 { /* More failure points to try. */
5181
5182 /* If exceeds best match so far, save it. */
5183 if (!best_regs_set || best_match_p)
5184 {
5185 best_regs_set = true;
5186 match_end = d;
5187
5188 DEBUG_PRINT1 ("\nSAVING match as best so far.\n");
5189
01618498 5190 for (reg = 1; reg < num_regs; reg++)
25fe55af 5191 {
01618498
SM
5192 best_regstart[reg] = regstart[reg];
5193 best_regend[reg] = regend[reg];
25fe55af
RS
5194 }
5195 }
5196 goto fail;
5197 }
5198
5199 /* If no failure points, don't restore garbage. And if
5200 last match is real best match, don't restore second
5201 best one. */
5202 else if (best_regs_set && !best_match_p)
5203 {
5204 restore_best_regs:
5205 /* Restore best match. It may happen that `dend ==
5206 end_match_1' while the restored d is in string2.
5207 For example, the pattern `x.*y.*z' against the
5208 strings `x-' and `y-z-', if the two strings are
7814e705 5209 not consecutive in memory. */
25fe55af
RS
5210 DEBUG_PRINT1 ("Restoring best registers.\n");
5211
5212 d = match_end;
5213 dend = ((d >= string1 && d <= end1)
5214 ? end_match_1 : end_match_2);
fa9a63c5 5215
01618498 5216 for (reg = 1; reg < num_regs; reg++)
fa9a63c5 5217 {
01618498
SM
5218 regstart[reg] = best_regstart[reg];
5219 regend[reg] = best_regend[reg];
fa9a63c5 5220 }
25fe55af
RS
5221 }
5222 } /* d != end_match_2 */
fa9a63c5
RM
5223
5224 succeed_label:
25fe55af 5225 DEBUG_PRINT1 ("Accepting match.\n");
fa9a63c5 5226
25fe55af
RS
5227 /* If caller wants register contents data back, do it. */
5228 if (regs && !bufp->no_sub)
fa9a63c5 5229 {
25fe55af
RS
5230 /* Have the register data arrays been allocated? */
5231 if (bufp->regs_allocated == REGS_UNALLOCATED)
7814e705 5232 { /* No. So allocate them with malloc. We need one
25fe55af
RS
5233 extra element beyond `num_regs' for the `-1' marker
5234 GNU code uses. */
5235 regs->num_regs = MAX (RE_NREGS, num_regs + 1);
5236 regs->start = TALLOC (regs->num_regs, regoff_t);
5237 regs->end = TALLOC (regs->num_regs, regoff_t);
5238 if (regs->start == NULL || regs->end == NULL)
fa9a63c5
RM
5239 {
5240 FREE_VARIABLES ();
5241 return -2;
5242 }
25fe55af
RS
5243 bufp->regs_allocated = REGS_REALLOCATE;
5244 }
5245 else if (bufp->regs_allocated == REGS_REALLOCATE)
5246 { /* Yes. If we need more elements than were already
5247 allocated, reallocate them. If we need fewer, just
5248 leave it alone. */
5249 if (regs->num_regs < num_regs + 1)
5250 {
5251 regs->num_regs = num_regs + 1;
5252 RETALLOC (regs->start, regs->num_regs, regoff_t);
5253 RETALLOC (regs->end, regs->num_regs, regoff_t);
5254 if (regs->start == NULL || regs->end == NULL)
fa9a63c5
RM
5255 {
5256 FREE_VARIABLES ();
5257 return -2;
5258 }
25fe55af
RS
5259 }
5260 }
5261 else
fa9a63c5
RM
5262 {
5263 /* These braces fend off a "empty body in an else-statement"
7814e705 5264 warning under GCC when assert expands to nothing. */
fa9a63c5
RM
5265 assert (bufp->regs_allocated == REGS_FIXED);
5266 }
5267
25fe55af
RS
5268 /* Convert the pointer data in `regstart' and `regend' to
5269 indices. Register zero has to be set differently,
5270 since we haven't kept track of any info for it. */
5271 if (regs->num_regs > 0)
5272 {
5273 regs->start[0] = pos;
99633e97 5274 regs->end[0] = POINTER_TO_OFFSET (d);
25fe55af 5275 }
5e69f11e 5276
25fe55af
RS
5277 /* Go through the first `min (num_regs, regs->num_regs)'
5278 registers, since that is all we initialized. */
01618498 5279 for (reg = 1; reg < MIN (num_regs, regs->num_regs); reg++)
fa9a63c5 5280 {
01618498
SM
5281 if (REG_UNSET (regstart[reg]) || REG_UNSET (regend[reg]))
5282 regs->start[reg] = regs->end[reg] = -1;
25fe55af
RS
5283 else
5284 {
01618498
SM
5285 regs->start[reg]
5286 = (regoff_t) POINTER_TO_OFFSET (regstart[reg]);
5287 regs->end[reg]
5288 = (regoff_t) POINTER_TO_OFFSET (regend[reg]);
25fe55af 5289 }
fa9a63c5 5290 }
5e69f11e 5291
25fe55af
RS
5292 /* If the regs structure we return has more elements than
5293 were in the pattern, set the extra elements to -1. If
5294 we (re)allocated the registers, this is the case,
5295 because we always allocate enough to have at least one
7814e705 5296 -1 at the end. */
01618498
SM
5297 for (reg = num_regs; reg < regs->num_regs; reg++)
5298 regs->start[reg] = regs->end[reg] = -1;
fa9a63c5
RM
5299 } /* regs && !bufp->no_sub */
5300
25fe55af
RS
5301 DEBUG_PRINT4 ("%u failure points pushed, %u popped (%u remain).\n",
5302 nfailure_points_pushed, nfailure_points_popped,
5303 nfailure_points_pushed - nfailure_points_popped);
5304 DEBUG_PRINT2 ("%u registers pushed.\n", num_regs_pushed);
fa9a63c5 5305
99633e97 5306 mcnt = POINTER_TO_OFFSET (d) - pos;
fa9a63c5 5307
25fe55af 5308 DEBUG_PRINT2 ("Returning %d from re_match_2.\n", mcnt);
fa9a63c5 5309
25fe55af
RS
5310 FREE_VARIABLES ();
5311 return mcnt;
5312 }
fa9a63c5 5313
7814e705 5314 /* Otherwise match next pattern command. */
7393bcbb 5315 switch (*p++)
fa9a63c5 5316 {
25fe55af
RS
5317 /* Ignore these. Used to ignore the n of succeed_n's which
5318 currently have n == 0. */
5319 case no_op:
5320 DEBUG_PRINT1 ("EXECUTING no_op.\n");
5321 break;
fa9a63c5
RM
5322
5323 case succeed:
25fe55af 5324 DEBUG_PRINT1 ("EXECUTING succeed.\n");
fa9a63c5
RM
5325 goto succeed_label;
5326
7814e705 5327 /* Match the next n pattern characters exactly. The following
25fe55af 5328 byte in the pattern defines n, and the n bytes after that
7814e705 5329 are the characters to match. */
fa9a63c5
RM
5330 case exactn:
5331 mcnt = *p++;
25fe55af 5332 DEBUG_PRINT2 ("EXECUTING exactn %d.\n", mcnt);
fa9a63c5 5333
99633e97
SM
5334 /* Remember the start point to rollback upon failure. */
5335 dfail = d;
5336
6fdd04b0 5337#ifndef emacs
25fe55af
RS
5338 /* This is written out as an if-else so we don't waste time
5339 testing `translate' inside the loop. */
28703c16 5340 if (RE_TRANSLATE_P (translate))
6fdd04b0
KH
5341 do
5342 {
5343 PREFETCH ();
5344 if (RE_TRANSLATE (translate, *d) != *p++)
e934739e 5345 {
6fdd04b0
KH
5346 d = dfail;
5347 goto fail;
e934739e 5348 }
6fdd04b0
KH
5349 d++;
5350 }
5351 while (--mcnt);
fa9a63c5 5352 else
6fdd04b0
KH
5353 do
5354 {
5355 PREFETCH ();
5356 if (*d++ != *p++)
bf216479 5357 {
6fdd04b0
KH
5358 d = dfail;
5359 goto fail;
bf216479 5360 }
6fdd04b0
KH
5361 }
5362 while (--mcnt);
5363#else /* emacs */
5364 /* The cost of testing `translate' is comparatively small. */
cf9c99bc 5365 if (target_multibyte)
6fdd04b0
KH
5366 do
5367 {
5368 int pat_charlen, buf_charlen;
cf9c99bc 5369 int pat_ch, buf_ch;
e934739e 5370
6fdd04b0 5371 PREFETCH ();
cf9c99bc 5372 if (multibyte)
62a6e103 5373 pat_ch = STRING_CHAR_AND_LENGTH (p, pat_charlen);
cf9c99bc
KH
5374 else
5375 {
5376 pat_ch = RE_CHAR_TO_MULTIBYTE (*p);
5377 pat_charlen = 1;
5378 }
62a6e103 5379 buf_ch = STRING_CHAR_AND_LENGTH (d, buf_charlen);
e934739e 5380
6fdd04b0 5381 if (TRANSLATE (buf_ch) != pat_ch)
e934739e 5382 {
6fdd04b0
KH
5383 d = dfail;
5384 goto fail;
e934739e 5385 }
bf216479 5386
6fdd04b0
KH
5387 p += pat_charlen;
5388 d += buf_charlen;
5389 mcnt -= pat_charlen;
5390 }
5391 while (mcnt > 0);
fa9a63c5 5392 else
6fdd04b0
KH
5393 do
5394 {
abbd1bcf 5395 int pat_charlen;
cf9c99bc 5396 int pat_ch, buf_ch;
bf216479 5397
6fdd04b0 5398 PREFETCH ();
cf9c99bc
KH
5399 if (multibyte)
5400 {
62a6e103 5401 pat_ch = STRING_CHAR_AND_LENGTH (p, pat_charlen);
2afc21f5 5402 pat_ch = RE_CHAR_TO_UNIBYTE (pat_ch);
cf9c99bc
KH
5403 }
5404 else
5405 {
5406 pat_ch = *p;
5407 pat_charlen = 1;
5408 }
5409 buf_ch = RE_CHAR_TO_MULTIBYTE (*d);
5410 if (! CHAR_BYTE8_P (buf_ch))
5411 {
5412 buf_ch = TRANSLATE (buf_ch);
5413 buf_ch = RE_CHAR_TO_UNIBYTE (buf_ch);
5414 if (buf_ch < 0)
5415 buf_ch = *d;
5416 }
0e2501ed
AS
5417 else
5418 buf_ch = *d;
cf9c99bc 5419 if (buf_ch != pat_ch)
6fdd04b0
KH
5420 {
5421 d = dfail;
5422 goto fail;
bf216479 5423 }
cf9c99bc
KH
5424 p += pat_charlen;
5425 d++;
6fdd04b0
KH
5426 }
5427 while (--mcnt);
5428#endif
25fe55af 5429 break;
fa9a63c5
RM
5430
5431
25fe55af 5432 /* Match any character except possibly a newline or a null. */
fa9a63c5 5433 case anychar:
e934739e
RS
5434 {
5435 int buf_charlen;
01618498 5436 re_wchar_t buf_ch;
fa9a63c5 5437
e934739e 5438 DEBUG_PRINT1 ("EXECUTING anychar.\n");
fa9a63c5 5439
e934739e 5440 PREFETCH ();
62a6e103 5441 buf_ch = RE_STRING_CHAR_AND_LENGTH (d, buf_charlen,
cf9c99bc 5442 target_multibyte);
e934739e
RS
5443 buf_ch = TRANSLATE (buf_ch);
5444
5445 if ((!(bufp->syntax & RE_DOT_NEWLINE)
5446 && buf_ch == '\n')
5447 || ((bufp->syntax & RE_DOT_NOT_NULL)
5448 && buf_ch == '\000'))
5449 goto fail;
5450
e934739e
RS
5451 DEBUG_PRINT2 (" Matched `%d'.\n", *d);
5452 d += buf_charlen;
5453 }
fa9a63c5
RM
5454 break;
5455
5456
5457 case charset:
5458 case charset_not:
5459 {
b18215fc 5460 register unsigned int c;
fa9a63c5 5461 boolean not = (re_opcode_t) *(p - 1) == charset_not;
b18215fc
RS
5462 int len;
5463
5464 /* Start of actual range_table, or end of bitmap if there is no
5465 range table. */
da053e48 5466 re_char *range_table IF_LINT (= NULL);
b18215fc 5467
96cc36cc 5468 /* Nonzero if there is a range table. */
b18215fc
RS
5469 int range_table_exists;
5470
96cc36cc
RS
5471 /* Number of ranges of range table. This is not included
5472 in the initial byte-length of the command. */
5473 int count = 0;
fa9a63c5 5474
f5020181
AS
5475 /* Whether matching against a unibyte character. */
5476 boolean unibyte_char = false;
5477
25fe55af 5478 DEBUG_PRINT2 ("EXECUTING charset%s.\n", not ? "_not" : "");
fa9a63c5 5479
b18215fc 5480 range_table_exists = CHARSET_RANGE_TABLE_EXISTS_P (&p[-1]);
96cc36cc 5481
b18215fc 5482 if (range_table_exists)
96cc36cc
RS
5483 {
5484 range_table = CHARSET_RANGE_TABLE (&p[-1]); /* Past the bitmap. */
5485 EXTRACT_NUMBER_AND_INCR (count, range_table);
5486 }
b18215fc 5487
2d1675e4 5488 PREFETCH ();
62a6e103 5489 c = RE_STRING_CHAR_AND_LENGTH (d, len, target_multibyte);
cf9c99bc
KH
5490 if (target_multibyte)
5491 {
5492 int c1;
b18215fc 5493
cf9c99bc
KH
5494 c = TRANSLATE (c);
5495 c1 = RE_CHAR_TO_UNIBYTE (c);
5496 if (c1 >= 0)
f5020181
AS
5497 {
5498 unibyte_char = true;
5499 c = c1;
5500 }
cf9c99bc
KH
5501 }
5502 else
5503 {
5504 int c1 = RE_CHAR_TO_MULTIBYTE (c);
5505
5506 if (! CHAR_BYTE8_P (c1))
5507 {
5508 c1 = TRANSLATE (c1);
5509 c1 = RE_CHAR_TO_UNIBYTE (c1);
5510 if (c1 >= 0)
f5020181
AS
5511 {
5512 unibyte_char = true;
5513 c = c1;
5514 }
cf9c99bc 5515 }
0b8be006
AS
5516 else
5517 unibyte_char = true;
cf9c99bc
KH
5518 }
5519
f5020181 5520 if (unibyte_char && c < (1 << BYTEWIDTH))
b18215fc 5521 { /* Lookup bitmap. */
b18215fc
RS
5522 /* Cast to `unsigned' instead of `unsigned char' in
5523 case the bit list is a full 32 bytes long. */
5524 if (c < (unsigned) (CHARSET_BITMAP_SIZE (&p[-1]) * BYTEWIDTH)
96cc36cc
RS
5525 && p[1 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH)))
5526 not = !not;
b18215fc 5527 }
96cc36cc 5528#ifdef emacs
b18215fc 5529 else if (range_table_exists)
96cc36cc
RS
5530 {
5531 int class_bits = CHARSET_RANGE_TABLE_BITS (&p[-1]);
5532
14473664
SM
5533 if ( (class_bits & BIT_LOWER && ISLOWER (c))
5534 | (class_bits & BIT_MULTIBYTE)
96cc36cc
RS
5535 | (class_bits & BIT_PUNCT && ISPUNCT (c))
5536 | (class_bits & BIT_SPACE && ISSPACE (c))
5537 | (class_bits & BIT_UPPER && ISUPPER (c))
5538 | (class_bits & BIT_WORD && ISWORD (c)))
5539 not = !not;
5540 else
5541 CHARSET_LOOKUP_RANGE_TABLE_RAW (not, c, range_table, count);
5542 }
5543#endif /* emacs */
fa9a63c5 5544
96cc36cc
RS
5545 if (range_table_exists)
5546 p = CHARSET_RANGE_TABLE_END (range_table, count);
5547 else
5548 p += CHARSET_BITMAP_SIZE (&p[-1]) + 1;
fa9a63c5
RM
5549
5550 if (!not) goto fail;
5e69f11e 5551
b18215fc 5552 d += len;
fa9a63c5 5553 }
8fb31792 5554 break;
fa9a63c5
RM
5555
5556
25fe55af 5557 /* The beginning of a group is represented by start_memory.
505bde11 5558 The argument is the register number. The text
25fe55af 5559 matched within the group is recorded (in the internal
7814e705 5560 registers data structure) under the register number. */
25fe55af 5561 case start_memory:
505bde11
SM
5562 DEBUG_PRINT2 ("EXECUTING start_memory %d:\n", *p);
5563
5564 /* In case we need to undo this operation (via backtracking). */
5565 PUSH_FAILURE_REG ((unsigned int)*p);
fa9a63c5 5566
25fe55af 5567 regstart[*p] = d;
4bb91c68 5568 regend[*p] = NULL; /* probably unnecessary. -sm */
fa9a63c5
RM
5569 DEBUG_PRINT2 (" regstart: %d\n", POINTER_TO_OFFSET (regstart[*p]));
5570
25fe55af 5571 /* Move past the register number and inner group count. */
505bde11 5572 p += 1;
25fe55af 5573 break;
fa9a63c5
RM
5574
5575
25fe55af 5576 /* The stop_memory opcode represents the end of a group. Its
505bde11 5577 argument is the same as start_memory's: the register number. */
fa9a63c5 5578 case stop_memory:
505bde11
SM
5579 DEBUG_PRINT2 ("EXECUTING stop_memory %d:\n", *p);
5580
5581 assert (!REG_UNSET (regstart[*p]));
5582 /* Strictly speaking, there should be code such as:
177c0ea7 5583
0b32bf0e 5584 assert (REG_UNSET (regend[*p]));
505bde11
SM
5585 PUSH_FAILURE_REGSTOP ((unsigned int)*p);
5586
5587 But the only info to be pushed is regend[*p] and it is known to
5588 be UNSET, so there really isn't anything to push.
5589 Not pushing anything, on the other hand deprives us from the
5590 guarantee that regend[*p] is UNSET since undoing this operation
5591 will not reset its value properly. This is not important since
5592 the value will only be read on the next start_memory or at
5593 the very end and both events can only happen if this stop_memory
5594 is *not* undone. */
fa9a63c5 5595
25fe55af 5596 regend[*p] = d;
fa9a63c5
RM
5597 DEBUG_PRINT2 (" regend: %d\n", POINTER_TO_OFFSET (regend[*p]));
5598
25fe55af 5599 /* Move past the register number and the inner group count. */
505bde11 5600 p += 1;
25fe55af 5601 break;
fa9a63c5
RM
5602
5603
5604 /* \<digit> has been turned into a `duplicate' command which is
25fe55af
RS
5605 followed by the numeric value of <digit> as the register number. */
5606 case duplicate:
fa9a63c5 5607 {
66f0296e 5608 register re_char *d2, *dend2;
7814e705 5609 int regno = *p++; /* Get which register to match against. */
fa9a63c5
RM
5610 DEBUG_PRINT2 ("EXECUTING duplicate %d.\n", regno);
5611
7814e705 5612 /* Can't back reference a group which we've never matched. */
25fe55af
RS
5613 if (REG_UNSET (regstart[regno]) || REG_UNSET (regend[regno]))
5614 goto fail;
5e69f11e 5615
7814e705 5616 /* Where in input to try to start matching. */
25fe55af 5617 d2 = regstart[regno];
5e69f11e 5618
99633e97
SM
5619 /* Remember the start point to rollback upon failure. */
5620 dfail = d;
5621
25fe55af
RS
5622 /* Where to stop matching; if both the place to start and
5623 the place to stop matching are in the same string, then
5624 set to the place to stop, otherwise, for now have to use
5625 the end of the first string. */
fa9a63c5 5626
25fe55af 5627 dend2 = ((FIRST_STRING_P (regstart[regno])
fa9a63c5
RM
5628 == FIRST_STRING_P (regend[regno]))
5629 ? regend[regno] : end_match_1);
5630 for (;;)
5631 {
5632 /* If necessary, advance to next segment in register
25fe55af 5633 contents. */
fa9a63c5
RM
5634 while (d2 == dend2)
5635 {
5636 if (dend2 == end_match_2) break;
5637 if (dend2 == regend[regno]) break;
5638
25fe55af
RS
5639 /* End of string1 => advance to string2. */
5640 d2 = string2;
5641 dend2 = regend[regno];
fa9a63c5
RM
5642 }
5643 /* At end of register contents => success */
5644 if (d2 == dend2) break;
5645
5646 /* If necessary, advance to next segment in data. */
5647 PREFETCH ();
5648
5649 /* How many characters left in this segment to match. */
5650 mcnt = dend - d;
5e69f11e 5651
fa9a63c5 5652 /* Want how many consecutive characters we can match in
25fe55af
RS
5653 one shot, so, if necessary, adjust the count. */
5654 if (mcnt > dend2 - d2)
fa9a63c5 5655 mcnt = dend2 - d2;
5e69f11e 5656
fa9a63c5 5657 /* Compare that many; failure if mismatch, else move
25fe55af 5658 past them. */
28703c16 5659 if (RE_TRANSLATE_P (translate)
02cb78b5 5660 ? bcmp_translate (d, d2, mcnt, translate, target_multibyte)
4bb91c68 5661 : memcmp (d, d2, mcnt))
99633e97
SM
5662 {
5663 d = dfail;
5664 goto fail;
5665 }
fa9a63c5 5666 d += mcnt, d2 += mcnt;
fa9a63c5
RM
5667 }
5668 }
5669 break;
5670
5671
25fe55af 5672 /* begline matches the empty string at the beginning of the string
c0f9ea08 5673 (unless `not_bol' is set in `bufp'), and after newlines. */
fa9a63c5 5674 case begline:
25fe55af 5675 DEBUG_PRINT1 ("EXECUTING begline.\n");
5e69f11e 5676
25fe55af
RS
5677 if (AT_STRINGS_BEG (d))
5678 {
5679 if (!bufp->not_bol) break;
5680 }
419d1c74 5681 else
25fe55af 5682 {
bf216479 5683 unsigned c;
419d1c74 5684 GET_CHAR_BEFORE_2 (c, d, string1, end1, string2, end2);
c0f9ea08 5685 if (c == '\n')
419d1c74 5686 break;
25fe55af
RS
5687 }
5688 /* In all other cases, we fail. */
5689 goto fail;
fa9a63c5
RM
5690
5691
25fe55af 5692 /* endline is the dual of begline. */
fa9a63c5 5693 case endline:
25fe55af 5694 DEBUG_PRINT1 ("EXECUTING endline.\n");
fa9a63c5 5695
25fe55af
RS
5696 if (AT_STRINGS_END (d))
5697 {
5698 if (!bufp->not_eol) break;
5699 }
f1ad044f 5700 else
25fe55af 5701 {
f1ad044f 5702 PREFETCH_NOLIMIT ();
c0f9ea08 5703 if (*d == '\n')
f1ad044f 5704 break;
25fe55af
RS
5705 }
5706 goto fail;
fa9a63c5
RM
5707
5708
5709 /* Match at the very beginning of the data. */
25fe55af
RS
5710 case begbuf:
5711 DEBUG_PRINT1 ("EXECUTING begbuf.\n");
5712 if (AT_STRINGS_BEG (d))
5713 break;
5714 goto fail;
fa9a63c5
RM
5715
5716
5717 /* Match at the very end of the data. */
25fe55af
RS
5718 case endbuf:
5719 DEBUG_PRINT1 ("EXECUTING endbuf.\n");
fa9a63c5
RM
5720 if (AT_STRINGS_END (d))
5721 break;
25fe55af 5722 goto fail;
5e69f11e 5723
5e69f11e 5724
25fe55af
RS
5725 /* on_failure_keep_string_jump is used to optimize `.*\n'. It
5726 pushes NULL as the value for the string on the stack. Then
505bde11 5727 `POP_FAILURE_POINT' will keep the current value for the
25fe55af 5728 string, instead of restoring it. To see why, consider
7814e705 5729 matching `foo\nbar' against `.*\n'. The .* matches the foo;
25fe55af
RS
5730 then the . fails against the \n. But the next thing we want
5731 to do is match the \n against the \n; if we restored the
5732 string value, we would be back at the foo.
5733
5734 Because this is used only in specific cases, we don't need to
5735 check all the things that `on_failure_jump' does, to make
5736 sure the right things get saved on the stack. Hence we don't
5737 share its code. The only reason to push anything on the
5738 stack at all is that otherwise we would have to change
5739 `anychar's code to do something besides goto fail in this
5740 case; that seems worse than this. */
5741 case on_failure_keep_string_jump:
505bde11
SM
5742 EXTRACT_NUMBER_AND_INCR (mcnt, p);
5743 DEBUG_PRINT3 ("EXECUTING on_failure_keep_string_jump %d (to %p):\n",
5744 mcnt, p + mcnt);
fa9a63c5 5745
505bde11
SM
5746 PUSH_FAILURE_POINT (p - 3, NULL);
5747 break;
5748
0683b6fa
SM
5749 /* A nasty loop is introduced by the non-greedy *? and +?.
5750 With such loops, the stack only ever contains one failure point
5751 at a time, so that a plain on_failure_jump_loop kind of
5752 cycle detection cannot work. Worse yet, such a detection
5753 can not only fail to detect a cycle, but it can also wrongly
5754 detect a cycle (between different instantiations of the same
6df42991 5755 loop).
0683b6fa
SM
5756 So the method used for those nasty loops is a little different:
5757 We use a special cycle-detection-stack-frame which is pushed
5758 when the on_failure_jump_nastyloop failure-point is *popped*.
5759 This special frame thus marks the beginning of one iteration
5760 through the loop and we can hence easily check right here
5761 whether something matched between the beginning and the end of
5762 the loop. */
5763 case on_failure_jump_nastyloop:
5764 EXTRACT_NUMBER_AND_INCR (mcnt, p);
5765 DEBUG_PRINT3 ("EXECUTING on_failure_jump_nastyloop %d (to %p):\n",
5766 mcnt, p + mcnt);
5767
5768 assert ((re_opcode_t)p[-4] == no_op);
6df42991
SM
5769 {
5770 int cycle = 0;
5771 CHECK_INFINITE_LOOP (p - 4, d);
5772 if (!cycle)
5773 /* If there's a cycle, just continue without pushing
5774 this failure point. The failure point is the "try again"
5775 option, which shouldn't be tried.
5776 We want (x?)*?y\1z to match both xxyz and xxyxz. */
5777 PUSH_FAILURE_POINT (p - 3, d);
5778 }
0683b6fa
SM
5779 break;
5780
4e8a9132
SM
5781 /* Simple loop detecting on_failure_jump: just check on the
5782 failure stack if the same spot was already hit earlier. */
505bde11
SM
5783 case on_failure_jump_loop:
5784 on_failure:
5785 EXTRACT_NUMBER_AND_INCR (mcnt, p);
5786 DEBUG_PRINT3 ("EXECUTING on_failure_jump_loop %d (to %p):\n",
5787 mcnt, p + mcnt);
6df42991
SM
5788 {
5789 int cycle = 0;
5790 CHECK_INFINITE_LOOP (p - 3, d);
5791 if (cycle)
5792 /* If there's a cycle, get out of the loop, as if the matching
5793 had failed. We used to just `goto fail' here, but that was
5794 aborting the search a bit too early: we want to keep the
5795 empty-loop-match and keep matching after the loop.
5796 We want (x?)*y\1z to match both xxyz and xxyxz. */
5797 p += mcnt;
5798 else
5799 PUSH_FAILURE_POINT (p - 3, d);
5800 }
25fe55af 5801 break;
fa9a63c5
RM
5802
5803
5804 /* Uses of on_failure_jump:
5e69f11e 5805
25fe55af
RS
5806 Each alternative starts with an on_failure_jump that points
5807 to the beginning of the next alternative. Each alternative
5808 except the last ends with a jump that in effect jumps past
5809 the rest of the alternatives. (They really jump to the
5810 ending jump of the following alternative, because tensioning
5811 these jumps is a hassle.)
fa9a63c5 5812
25fe55af
RS
5813 Repeats start with an on_failure_jump that points past both
5814 the repetition text and either the following jump or
5815 pop_failure_jump back to this on_failure_jump. */
fa9a63c5 5816 case on_failure_jump:
25fe55af 5817 EXTRACT_NUMBER_AND_INCR (mcnt, p);
505bde11
SM
5818 DEBUG_PRINT3 ("EXECUTING on_failure_jump %d (to %p):\n",
5819 mcnt, p + mcnt);
25fe55af 5820
505bde11 5821 PUSH_FAILURE_POINT (p -3, d);
25fe55af
RS
5822 break;
5823
4e8a9132 5824 /* This operation is used for greedy *.
505bde11
SM
5825 Compare the beginning of the repeat with what in the
5826 pattern follows its end. If we can establish that there
5827 is nothing that they would both match, i.e., that we
5828 would have to backtrack because of (as in, e.g., `a*a')
5829 then we can use a non-backtracking loop based on
4e8a9132 5830 on_failure_keep_string_jump instead of on_failure_jump. */
505bde11 5831 case on_failure_jump_smart:
25fe55af 5832 EXTRACT_NUMBER_AND_INCR (mcnt, p);
505bde11
SM
5833 DEBUG_PRINT3 ("EXECUTING on_failure_jump_smart %d (to %p).\n",
5834 mcnt, p + mcnt);
25fe55af 5835 {
01618498 5836 re_char *p1 = p; /* Next operation. */
6dcf2d0e
SM
5837 /* Here, we discard `const', making re_match non-reentrant. */
5838 unsigned char *p2 = (unsigned char*) p + mcnt; /* Jump dest. */
5839 unsigned char *p3 = (unsigned char*) p - 3; /* opcode location. */
fa9a63c5 5840
505bde11
SM
5841 p -= 3; /* Reset so that we will re-execute the
5842 instruction once it's been changed. */
fa9a63c5 5843
4e8a9132
SM
5844 EXTRACT_NUMBER (mcnt, p2 - 2);
5845
5846 /* Ensure this is a indeed the trivial kind of loop
5847 we are expecting. */
5848 assert (skip_one_char (p1) == p2 - 3);
5849 assert ((re_opcode_t) p2[-3] == jump && p2 + mcnt == p);
99633e97 5850 DEBUG_STATEMENT (debug += 2);
505bde11 5851 if (mutually_exclusive_p (bufp, p1, p2))
fa9a63c5 5852 {
505bde11 5853 /* Use a fast `on_failure_keep_string_jump' loop. */
4e8a9132 5854 DEBUG_PRINT1 (" smart exclusive => fast loop.\n");
01618498 5855 *p3 = (unsigned char) on_failure_keep_string_jump;
4e8a9132 5856 STORE_NUMBER (p2 - 2, mcnt + 3);
25fe55af 5857 }
505bde11 5858 else
fa9a63c5 5859 {
505bde11
SM
5860 /* Default to a safe `on_failure_jump' loop. */
5861 DEBUG_PRINT1 (" smart default => slow loop.\n");
01618498 5862 *p3 = (unsigned char) on_failure_jump;
fa9a63c5 5863 }
99633e97 5864 DEBUG_STATEMENT (debug -= 2);
25fe55af 5865 }
505bde11 5866 break;
25fe55af
RS
5867
5868 /* Unconditionally jump (without popping any failure points). */
5869 case jump:
fa9a63c5 5870 unconditional_jump:
5b370c2b 5871 IMMEDIATE_QUIT_CHECK;
fa9a63c5 5872 EXTRACT_NUMBER_AND_INCR (mcnt, p); /* Get the amount to jump. */
25fe55af 5873 DEBUG_PRINT2 ("EXECUTING jump %d ", mcnt);
7814e705 5874 p += mcnt; /* Do the jump. */
505bde11 5875 DEBUG_PRINT2 ("(to %p).\n", p);
25fe55af
RS
5876 break;
5877
5878
25fe55af
RS
5879 /* Have to succeed matching what follows at least n times.
5880 After that, handle like `on_failure_jump'. */
5881 case succeed_n:
01618498 5882 /* Signedness doesn't matter since we only compare MCNT to 0. */
25fe55af
RS
5883 EXTRACT_NUMBER (mcnt, p + 2);
5884 DEBUG_PRINT2 ("EXECUTING succeed_n %d.\n", mcnt);
5e69f11e 5885
dc1e502d
SM
5886 /* Originally, mcnt is how many times we HAVE to succeed. */
5887 if (mcnt != 0)
25fe55af 5888 {
6dcf2d0e
SM
5889 /* Here, we discard `const', making re_match non-reentrant. */
5890 unsigned char *p2 = (unsigned char*) p + 2; /* counter loc. */
dc1e502d 5891 mcnt--;
01618498
SM
5892 p += 4;
5893 PUSH_NUMBER (p2, mcnt);
25fe55af 5894 }
dc1e502d
SM
5895 else
5896 /* The two bytes encoding mcnt == 0 are two no_op opcodes. */
5897 goto on_failure;
25fe55af
RS
5898 break;
5899
5900 case jump_n:
01618498 5901 /* Signedness doesn't matter since we only compare MCNT to 0. */
25fe55af
RS
5902 EXTRACT_NUMBER (mcnt, p + 2);
5903 DEBUG_PRINT2 ("EXECUTING jump_n %d.\n", mcnt);
5904
5905 /* Originally, this is how many times we CAN jump. */
dc1e502d 5906 if (mcnt != 0)
25fe55af 5907 {
6dcf2d0e
SM
5908 /* Here, we discard `const', making re_match non-reentrant. */
5909 unsigned char *p2 = (unsigned char*) p + 2; /* counter loc. */
dc1e502d 5910 mcnt--;
01618498 5911 PUSH_NUMBER (p2, mcnt);
dc1e502d 5912 goto unconditional_jump;
25fe55af
RS
5913 }
5914 /* If don't have to jump any more, skip over the rest of command. */
5e69f11e
RM
5915 else
5916 p += 4;
25fe55af 5917 break;
5e69f11e 5918
fa9a63c5
RM
5919 case set_number_at:
5920 {
01618498 5921 unsigned char *p2; /* Location of the counter. */
25fe55af 5922 DEBUG_PRINT1 ("EXECUTING set_number_at.\n");
fa9a63c5 5923
25fe55af 5924 EXTRACT_NUMBER_AND_INCR (mcnt, p);
6dcf2d0e
SM
5925 /* Here, we discard `const', making re_match non-reentrant. */
5926 p2 = (unsigned char*) p + mcnt;
01618498 5927 /* Signedness doesn't matter since we only copy MCNT's bits . */
25fe55af 5928 EXTRACT_NUMBER_AND_INCR (mcnt, p);
01618498
SM
5929 DEBUG_PRINT3 (" Setting %p to %d.\n", p2, mcnt);
5930 PUSH_NUMBER (p2, mcnt);
25fe55af
RS
5931 break;
5932 }
9121ca40
KH
5933
5934 case wordbound:
66f0296e 5935 case notwordbound:
19ed5445
PE
5936 {
5937 boolean not = (re_opcode_t) *(p - 1) == notwordbound;
5938 DEBUG_PRINT2 ("EXECUTING %swordbound.\n", not?"not":"");
fa9a63c5 5939
19ed5445 5940 /* We SUCCEED (or FAIL) in one of the following cases: */
9121ca40 5941
19ed5445
PE
5942 /* Case 1: D is at the beginning or the end of string. */
5943 if (AT_STRINGS_BEG (d) || AT_STRINGS_END (d))
5944 not = !not;
5945 else
5946 {
5947 /* C1 is the character before D, S1 is the syntax of C1, C2
5948 is the character at D, and S2 is the syntax of C2. */
5949 re_wchar_t c1, c2;
5950 int s1, s2;
5951 int dummy;
b18215fc 5952#ifdef emacs
d1dfb56c
EZ
5953 ssize_t offset = PTR_TO_OFFSET (d - 1);
5954 ssize_t charpos = SYNTAX_TABLE_BYTE_TO_CHAR (offset);
19ed5445 5955 UPDATE_SYNTAX_TABLE (charpos);
25fe55af 5956#endif
19ed5445
PE
5957 GET_CHAR_BEFORE_2 (c1, d, string1, end1, string2, end2);
5958 s1 = SYNTAX (c1);
b18215fc 5959#ifdef emacs
19ed5445 5960 UPDATE_SYNTAX_TABLE_FORWARD (charpos + 1);
25fe55af 5961#endif
19ed5445
PE
5962 PREFETCH_NOLIMIT ();
5963 GET_CHAR_AFTER (c2, d, dummy);
5964 s2 = SYNTAX (c2);
5965
5966 if (/* Case 2: Only one of S1 and S2 is Sword. */
5967 ((s1 == Sword) != (s2 == Sword))
5968 /* Case 3: Both of S1 and S2 are Sword, and macro
5969 WORD_BOUNDARY_P (C1, C2) returns nonzero. */
5970 || ((s1 == Sword) && WORD_BOUNDARY_P (c1, c2)))
5971 not = !not;
5972 }
5973 if (not)
5974 break;
5975 else
5976 goto fail;
5977 }
fa9a63c5
RM
5978
5979 case wordbeg:
25fe55af 5980 DEBUG_PRINT1 ("EXECUTING wordbeg.\n");
fa9a63c5 5981
b18215fc
RS
5982 /* We FAIL in one of the following cases: */
5983
7814e705 5984 /* Case 1: D is at the end of string. */
b18215fc 5985 if (AT_STRINGS_END (d))
99633e97 5986 goto fail;
b18215fc
RS
5987 else
5988 {
5989 /* C1 is the character before D, S1 is the syntax of C1, C2
5990 is the character at D, and S2 is the syntax of C2. */
01618498
SM
5991 re_wchar_t c1, c2;
5992 int s1, s2;
bf216479 5993 int dummy;
fa9a63c5 5994#ifdef emacs
d1dfb56c
EZ
5995 ssize_t offset = PTR_TO_OFFSET (d);
5996 ssize_t charpos = SYNTAX_TABLE_BYTE_TO_CHAR (offset);
92432794 5997 UPDATE_SYNTAX_TABLE (charpos);
25fe55af 5998#endif
99633e97 5999 PREFETCH ();
6fdd04b0 6000 GET_CHAR_AFTER (c2, d, dummy);
b18215fc 6001 s2 = SYNTAX (c2);
177c0ea7 6002
b18215fc
RS
6003 /* Case 2: S2 is not Sword. */
6004 if (s2 != Sword)
6005 goto fail;
6006
6007 /* Case 3: D is not at the beginning of string ... */
6008 if (!AT_STRINGS_BEG (d))
6009 {
6010 GET_CHAR_BEFORE_2 (c1, d, string1, end1, string2, end2);
6011#ifdef emacs
5d967c7a 6012 UPDATE_SYNTAX_TABLE_BACKWARD (charpos - 1);
25fe55af 6013#endif
b18215fc
RS
6014 s1 = SYNTAX (c1);
6015
6016 /* ... and S1 is Sword, and WORD_BOUNDARY_P (C1, C2)
7814e705 6017 returns 0. */
b18215fc
RS
6018 if ((s1 == Sword) && !WORD_BOUNDARY_P (c1, c2))
6019 goto fail;
6020 }
6021 }
e318085a
RS
6022 break;
6023
b18215fc 6024 case wordend:
25fe55af 6025 DEBUG_PRINT1 ("EXECUTING wordend.\n");
b18215fc
RS
6026
6027 /* We FAIL in one of the following cases: */
6028
6029 /* Case 1: D is at the beginning of string. */
6030 if (AT_STRINGS_BEG (d))
e318085a 6031 goto fail;
b18215fc
RS
6032 else
6033 {
6034 /* C1 is the character before D, S1 is the syntax of C1, C2
6035 is the character at D, and S2 is the syntax of C2. */
01618498
SM
6036 re_wchar_t c1, c2;
6037 int s1, s2;
bf216479 6038 int dummy;
5d967c7a 6039#ifdef emacs
d1dfb56c
EZ
6040 ssize_t offset = PTR_TO_OFFSET (d) - 1;
6041 ssize_t charpos = SYNTAX_TABLE_BYTE_TO_CHAR (offset);
92432794 6042 UPDATE_SYNTAX_TABLE (charpos);
5d967c7a 6043#endif
99633e97 6044 GET_CHAR_BEFORE_2 (c1, d, string1, end1, string2, end2);
b18215fc
RS
6045 s1 = SYNTAX (c1);
6046
6047 /* Case 2: S1 is not Sword. */
6048 if (s1 != Sword)
6049 goto fail;
6050
6051 /* Case 3: D is not at the end of string ... */
6052 if (!AT_STRINGS_END (d))
6053 {
f1ad044f 6054 PREFETCH_NOLIMIT ();
6fdd04b0 6055 GET_CHAR_AFTER (c2, d, dummy);
5d967c7a
RS
6056#ifdef emacs
6057 UPDATE_SYNTAX_TABLE_FORWARD (charpos);
6058#endif
b18215fc
RS
6059 s2 = SYNTAX (c2);
6060
6061 /* ... and S2 is Sword, and WORD_BOUNDARY_P (C1, C2)
7814e705 6062 returns 0. */
b18215fc 6063 if ((s2 == Sword) && !WORD_BOUNDARY_P (c1, c2))
25fe55af 6064 goto fail;
b18215fc
RS
6065 }
6066 }
e318085a
RS
6067 break;
6068
669fa600
SM
6069 case symbeg:
6070 DEBUG_PRINT1 ("EXECUTING symbeg.\n");
6071
6072 /* We FAIL in one of the following cases: */
6073
7814e705 6074 /* Case 1: D is at the end of string. */
669fa600
SM
6075 if (AT_STRINGS_END (d))
6076 goto fail;
6077 else
6078 {
6079 /* C1 is the character before D, S1 is the syntax of C1, C2
6080 is the character at D, and S2 is the syntax of C2. */
6081 re_wchar_t c1, c2;
6082 int s1, s2;
6083#ifdef emacs
d1dfb56c
EZ
6084 ssize_t offset = PTR_TO_OFFSET (d);
6085 ssize_t charpos = SYNTAX_TABLE_BYTE_TO_CHAR (offset);
669fa600
SM
6086 UPDATE_SYNTAX_TABLE (charpos);
6087#endif
6088 PREFETCH ();
62a6e103 6089 c2 = RE_STRING_CHAR (d, target_multibyte);
669fa600 6090 s2 = SYNTAX (c2);
7814e705 6091
669fa600
SM
6092 /* Case 2: S2 is neither Sword nor Ssymbol. */
6093 if (s2 != Sword && s2 != Ssymbol)
6094 goto fail;
6095
6096 /* Case 3: D is not at the beginning of string ... */
6097 if (!AT_STRINGS_BEG (d))
6098 {
6099 GET_CHAR_BEFORE_2 (c1, d, string1, end1, string2, end2);
6100#ifdef emacs
6101 UPDATE_SYNTAX_TABLE_BACKWARD (charpos - 1);
6102#endif
6103 s1 = SYNTAX (c1);
6104
6105 /* ... and S1 is Sword or Ssymbol. */
6106 if (s1 == Sword || s1 == Ssymbol)
6107 goto fail;
6108 }
6109 }
6110 break;
6111
6112 case symend:
6113 DEBUG_PRINT1 ("EXECUTING symend.\n");
6114
6115 /* We FAIL in one of the following cases: */
6116
6117 /* Case 1: D is at the beginning of string. */
6118 if (AT_STRINGS_BEG (d))
6119 goto fail;
6120 else
6121 {
6122 /* C1 is the character before D, S1 is the syntax of C1, C2
6123 is the character at D, and S2 is the syntax of C2. */
6124 re_wchar_t c1, c2;
6125 int s1, s2;
6126#ifdef emacs
d1dfb56c
EZ
6127 ssize_t offset = PTR_TO_OFFSET (d) - 1;
6128 ssize_t charpos = SYNTAX_TABLE_BYTE_TO_CHAR (offset);
669fa600
SM
6129 UPDATE_SYNTAX_TABLE (charpos);
6130#endif
6131 GET_CHAR_BEFORE_2 (c1, d, string1, end1, string2, end2);
6132 s1 = SYNTAX (c1);
6133
6134 /* Case 2: S1 is neither Ssymbol nor Sword. */
6135 if (s1 != Sword && s1 != Ssymbol)
6136 goto fail;
6137
6138 /* Case 3: D is not at the end of string ... */
6139 if (!AT_STRINGS_END (d))
6140 {
6141 PREFETCH_NOLIMIT ();
62a6e103 6142 c2 = RE_STRING_CHAR (d, target_multibyte);
669fa600 6143#ifdef emacs
134579f2 6144 UPDATE_SYNTAX_TABLE_FORWARD (charpos + 1);
669fa600
SM
6145#endif
6146 s2 = SYNTAX (c2);
6147
6148 /* ... and S2 is Sword or Ssymbol. */
6149 if (s2 == Sword || s2 == Ssymbol)
6150 goto fail;
b18215fc
RS
6151 }
6152 }
e318085a
RS
6153 break;
6154
fa9a63c5 6155 case syntaxspec:
1fb352e0 6156 case notsyntaxspec:
b18215fc 6157 {
19ed5445
PE
6158 boolean not = (re_opcode_t) *(p - 1) == notsyntaxspec;
6159 mcnt = *p++;
6160 DEBUG_PRINT3 ("EXECUTING %ssyntaxspec %d.\n", not?"not":"", mcnt);
6161 PREFETCH ();
6162#ifdef emacs
6163 {
d1dfb56c
EZ
6164 ssize_t offset = PTR_TO_OFFSET (d);
6165 ssize_t pos1 = SYNTAX_TABLE_BYTE_TO_CHAR (offset);
19ed5445
PE
6166 UPDATE_SYNTAX_TABLE (pos1);
6167 }
25fe55af 6168#endif
19ed5445
PE
6169 {
6170 int len;
6171 re_wchar_t c;
b18215fc 6172
19ed5445
PE
6173 GET_CHAR_AFTER (c, d, len);
6174 if ((SYNTAX (c) != (enum syntaxcode) mcnt) ^ not)
6175 goto fail;
6176 d += len;
6177 }
b18215fc 6178 }
8fb31792 6179 break;
fa9a63c5 6180
b18215fc 6181#ifdef emacs
1fb352e0
SM
6182 case before_dot:
6183 DEBUG_PRINT1 ("EXECUTING before_dot.\n");
6184 if (PTR_BYTE_POS (d) >= PT_BYTE)
fa9a63c5 6185 goto fail;
b18215fc
RS
6186 break;
6187
1fb352e0
SM
6188 case at_dot:
6189 DEBUG_PRINT1 ("EXECUTING at_dot.\n");
6190 if (PTR_BYTE_POS (d) != PT_BYTE)
6191 goto fail;
6192 break;
b18215fc 6193
1fb352e0
SM
6194 case after_dot:
6195 DEBUG_PRINT1 ("EXECUTING after_dot.\n");
6196 if (PTR_BYTE_POS (d) <= PT_BYTE)
6197 goto fail;
e318085a 6198 break;
fa9a63c5 6199
1fb352e0 6200 case categoryspec:
b18215fc 6201 case notcategoryspec:
b18215fc 6202 {
8fb31792
PE
6203 boolean not = (re_opcode_t) *(p - 1) == notcategoryspec;
6204 mcnt = *p++;
6205 DEBUG_PRINT3 ("EXECUTING %scategoryspec %d.\n",
6206 not?"not":"", mcnt);
6207 PREFETCH ();
01618498 6208
8fb31792
PE
6209 {
6210 int len;
6211 re_wchar_t c;
6212 GET_CHAR_AFTER (c, d, len);
6213 if ((!CHAR_HAS_CATEGORY (c, mcnt)) ^ not)
6214 goto fail;
6215 d += len;
6216 }
b18215fc 6217 }
fa9a63c5 6218 break;
5e69f11e 6219
1fb352e0 6220#endif /* emacs */
5e69f11e 6221
0b32bf0e
SM
6222 default:
6223 abort ();
fa9a63c5 6224 }
b18215fc 6225 continue; /* Successfully executed one pattern command; keep going. */
fa9a63c5
RM
6226
6227
6228 /* We goto here if a matching operation fails. */
6229 fail:
5b370c2b 6230 IMMEDIATE_QUIT_CHECK;
fa9a63c5 6231 if (!FAIL_STACK_EMPTY ())
505bde11 6232 {
01618498 6233 re_char *str, *pat;
505bde11 6234 /* A restart point is known. Restore to that state. */
0b32bf0e
SM
6235 DEBUG_PRINT1 ("\nFAIL:\n");
6236 POP_FAILURE_POINT (str, pat);
7393bcbb 6237 switch (*pat++)
505bde11
SM
6238 {
6239 case on_failure_keep_string_jump:
6240 assert (str == NULL);
6241 goto continue_failure_jump;
6242
0683b6fa
SM
6243 case on_failure_jump_nastyloop:
6244 assert ((re_opcode_t)pat[-2] == no_op);
6245 PUSH_FAILURE_POINT (pat - 2, str);
6246 /* Fallthrough */
6247
505bde11
SM
6248 case on_failure_jump_loop:
6249 case on_failure_jump:
6250 case succeed_n:
6251 d = str;
6252 continue_failure_jump:
6253 EXTRACT_NUMBER_AND_INCR (mcnt, pat);
6254 p = pat + mcnt;
6255 break;
b18215fc 6256
0683b6fa
SM
6257 case no_op:
6258 /* A special frame used for nastyloops. */
6259 goto fail;
6260
505bde11 6261 default:
5e617bc2 6262 abort ();
505bde11 6263 }
fa9a63c5 6264
505bde11 6265 assert (p >= bufp->buffer && p <= pend);
b18215fc 6266
0b32bf0e 6267 if (d >= string1 && d <= end1)
fa9a63c5 6268 dend = end_match_1;
0b32bf0e 6269 }
fa9a63c5 6270 else
0b32bf0e 6271 break; /* Matching at this starting point really fails. */
fa9a63c5
RM
6272 } /* for (;;) */
6273
6274 if (best_regs_set)
6275 goto restore_best_regs;
6276
6277 FREE_VARIABLES ();
6278
b18215fc 6279 return -1; /* Failure to match. */
fa9a63c5
RM
6280} /* re_match_2 */
6281\f
6282/* Subroutine definitions for re_match_2. */
6283
fa9a63c5
RM
6284/* Return zero if TRANSLATE[S1] and TRANSLATE[S2] are identical for LEN
6285 bytes; nonzero otherwise. */
5e69f11e 6286
fa9a63c5 6287static int
d1dfb56c 6288bcmp_translate (const re_char *s1, const re_char *s2, register ssize_t len,
438105ed 6289 RE_TRANSLATE_TYPE translate, const int target_multibyte)
fa9a63c5 6290{
2d1675e4
SM
6291 register re_char *p1 = s1, *p2 = s2;
6292 re_char *p1_end = s1 + len;
6293 re_char *p2_end = s2 + len;
e934739e 6294
4bb91c68
SM
6295 /* FIXME: Checking both p1 and p2 presumes that the two strings might have
6296 different lengths, but relying on a single `len' would break this. -sm */
6297 while (p1 < p1_end && p2 < p2_end)
fa9a63c5 6298 {
e934739e 6299 int p1_charlen, p2_charlen;
01618498 6300 re_wchar_t p1_ch, p2_ch;
e934739e 6301
6fdd04b0
KH
6302 GET_CHAR_AFTER (p1_ch, p1, p1_charlen);
6303 GET_CHAR_AFTER (p2_ch, p2, p2_charlen);
e934739e
RS
6304
6305 if (RE_TRANSLATE (translate, p1_ch)
6306 != RE_TRANSLATE (translate, p2_ch))
bc192b5b 6307 return 1;
e934739e
RS
6308
6309 p1 += p1_charlen, p2 += p2_charlen;
fa9a63c5 6310 }
e934739e
RS
6311
6312 if (p1 != p1_end || p2 != p2_end)
6313 return 1;
6314
fa9a63c5
RM
6315 return 0;
6316}
6317\f
6318/* Entry points for GNU code. */
6319
6320/* re_compile_pattern is the GNU regular expression compiler: it
6321 compiles PATTERN (of length SIZE) and puts the result in BUFP.
6322 Returns 0 if the pattern was valid, otherwise an error string.
5e69f11e 6323
fa9a63c5
RM
6324 Assumes the `allocated' (and perhaps `buffer') and `translate' fields
6325 are set in BUFP on entry.
5e69f11e 6326
b18215fc 6327 We call regex_compile to do the actual compilation. */
fa9a63c5
RM
6328
6329const char *
d1dfb56c
EZ
6330re_compile_pattern (const char *pattern, size_t length,
6331 struct re_pattern_buffer *bufp)
fa9a63c5
RM
6332{
6333 reg_errcode_t ret;
5e69f11e 6334
fa9a63c5
RM
6335 /* GNU code is written to assume at least RE_NREGS registers will be set
6336 (and at least one extra will be -1). */
6337 bufp->regs_allocated = REGS_UNALLOCATED;
5e69f11e 6338
fa9a63c5
RM
6339 /* And GNU code determines whether or not to get register information
6340 by passing null for the REGS argument to re_match, etc., not by
6341 setting no_sub. */
6342 bufp->no_sub = 0;
5e69f11e 6343
4bb91c68 6344 ret = regex_compile ((re_char*) pattern, length, re_syntax_options, bufp);
fa9a63c5
RM
6345
6346 if (!ret)
6347 return NULL;
6348 return gettext (re_error_msgid[(int) ret]);
5e69f11e 6349}
c0f9ea08 6350WEAK_ALIAS (__re_compile_pattern, re_compile_pattern)
fa9a63c5 6351\f
b18215fc
RS
6352/* Entry points compatible with 4.2 BSD regex library. We don't define
6353 them unless specifically requested. */
fa9a63c5 6354
0b32bf0e 6355#if defined _REGEX_RE_COMP || defined _LIBC
fa9a63c5
RM
6356
6357/* BSD has one and only one pattern buffer. */
6358static struct re_pattern_buffer re_comp_buf;
6359
6360char *
0b32bf0e 6361# ifdef _LIBC
48afdd44
RM
6362/* Make these definitions weak in libc, so POSIX programs can redefine
6363 these names if they don't use our functions, and still use
6364 regcomp/regexec below without link errors. */
6365weak_function
0b32bf0e 6366# endif
31011111 6367re_comp (const char *s)
fa9a63c5
RM
6368{
6369 reg_errcode_t ret;
5e69f11e 6370
fa9a63c5
RM
6371 if (!s)
6372 {
6373 if (!re_comp_buf.buffer)
0b32bf0e 6374 /* Yes, we're discarding `const' here if !HAVE_LIBINTL. */
a60198e5 6375 return (char *) gettext ("No previous regular expression");
fa9a63c5
RM
6376 return 0;
6377 }
6378
6379 if (!re_comp_buf.buffer)
6380 {
38182d90 6381 re_comp_buf.buffer = malloc (200);
fa9a63c5 6382 if (re_comp_buf.buffer == NULL)
0b32bf0e
SM
6383 /* Yes, we're discarding `const' here if !HAVE_LIBINTL. */
6384 return (char *) gettext (re_error_msgid[(int) REG_ESPACE]);
fa9a63c5
RM
6385 re_comp_buf.allocated = 200;
6386
38182d90 6387 re_comp_buf.fastmap = malloc (1 << BYTEWIDTH);
fa9a63c5 6388 if (re_comp_buf.fastmap == NULL)
a60198e5
SM
6389 /* Yes, we're discarding `const' here if !HAVE_LIBINTL. */
6390 return (char *) gettext (re_error_msgid[(int) REG_ESPACE]);
fa9a63c5
RM
6391 }
6392
6393 /* Since `re_exec' always passes NULL for the `regs' argument, we
6394 don't need to initialize the pattern buffer fields which affect it. */
6395
fa9a63c5 6396 ret = regex_compile (s, strlen (s), re_syntax_options, &re_comp_buf);
5e69f11e 6397
fa9a63c5
RM
6398 if (!ret)
6399 return NULL;
6400
6401 /* Yes, we're discarding `const' here if !HAVE_LIBINTL. */
6402 return (char *) gettext (re_error_msgid[(int) ret]);
6403}
6404
6405
31011111 6406int
0b32bf0e 6407# ifdef _LIBC
48afdd44 6408weak_function
0b32bf0e 6409# endif
d1dfb56c 6410re_exec (const char *s)
fa9a63c5 6411{
d1dfb56c 6412 const size_t len = strlen (s);
fa9a63c5
RM
6413 return
6414 0 <= re_search (&re_comp_buf, s, len, 0, len, (struct re_registers *) 0);
6415}
6416#endif /* _REGEX_RE_COMP */
6417\f
6418/* POSIX.2 functions. Don't define these for Emacs. */
6419
6420#ifndef emacs
6421
6422/* regcomp takes a regular expression as a string and compiles it.
6423
b18215fc 6424 PREG is a regex_t *. We do not expect any fields to be initialized,
fa9a63c5
RM
6425 since POSIX says we shouldn't. Thus, we set
6426
6427 `buffer' to the compiled pattern;
6428 `used' to the length of the compiled pattern;
6429 `syntax' to RE_SYNTAX_POSIX_EXTENDED if the
6430 REG_EXTENDED bit in CFLAGS is set; otherwise, to
6431 RE_SYNTAX_POSIX_BASIC;
c0f9ea08
SM
6432 `fastmap' to an allocated space for the fastmap;
6433 `fastmap_accurate' to zero;
fa9a63c5
RM
6434 `re_nsub' to the number of subexpressions in PATTERN.
6435
6436 PATTERN is the address of the pattern string.
6437
6438 CFLAGS is a series of bits which affect compilation.
6439
6440 If REG_EXTENDED is set, we use POSIX extended syntax; otherwise, we
6441 use POSIX basic syntax.
6442
6443 If REG_NEWLINE is set, then . and [^...] don't match newline.
6444 Also, regexec will try a match beginning after every newline.
6445
6446 If REG_ICASE is set, then we considers upper- and lowercase
6447 versions of letters to be equivalent when matching.
6448
6449 If REG_NOSUB is set, then when PREG is passed to regexec, that
6450 routine will report only success or failure, and nothing about the
6451 registers.
6452
b18215fc 6453 It returns 0 if it succeeds, nonzero if it doesn't. (See regex.h for
fa9a63c5
RM
6454 the return codes and their meanings.) */
6455
d1dfb56c 6456reg_errcode_t
d2762c86
DN
6457regcomp (regex_t *__restrict preg, const char *__restrict pattern,
6458 int cflags)
fa9a63c5
RM
6459{
6460 reg_errcode_t ret;
4bb91c68 6461 reg_syntax_t syntax
fa9a63c5
RM
6462 = (cflags & REG_EXTENDED) ?
6463 RE_SYNTAX_POSIX_EXTENDED : RE_SYNTAX_POSIX_BASIC;
6464
6465 /* regex_compile will allocate the space for the compiled pattern. */
6466 preg->buffer = 0;
6467 preg->allocated = 0;
6468 preg->used = 0;
5e69f11e 6469
c0f9ea08 6470 /* Try to allocate space for the fastmap. */
38182d90 6471 preg->fastmap = malloc (1 << BYTEWIDTH);
5e69f11e 6472
fa9a63c5
RM
6473 if (cflags & REG_ICASE)
6474 {
6475 unsigned i;
5e69f11e 6476
38182d90 6477 preg->translate = malloc (CHAR_SET_SIZE * sizeof *preg->translate);
fa9a63c5 6478 if (preg->translate == NULL)
0b32bf0e 6479 return (int) REG_ESPACE;
fa9a63c5
RM
6480
6481 /* Map uppercase characters to corresponding lowercase ones. */
6482 for (i = 0; i < CHAR_SET_SIZE; i++)
4bb91c68 6483 preg->translate[i] = ISUPPER (i) ? TOLOWER (i) : i;
fa9a63c5
RM
6484 }
6485 else
6486 preg->translate = NULL;
6487
6488 /* If REG_NEWLINE is set, newlines are treated differently. */
6489 if (cflags & REG_NEWLINE)
6490 { /* REG_NEWLINE implies neither . nor [^...] match newline. */
6491 syntax &= ~RE_DOT_NEWLINE;
6492 syntax |= RE_HAT_LISTS_NOT_NEWLINE;
fa9a63c5
RM
6493 }
6494 else
c0f9ea08 6495 syntax |= RE_NO_NEWLINE_ANCHOR;
fa9a63c5
RM
6496
6497 preg->no_sub = !!(cflags & REG_NOSUB);
6498
5e69f11e 6499 /* POSIX says a null character in the pattern terminates it, so we
fa9a63c5 6500 can use strlen here in compiling the pattern. */
4bb91c68 6501 ret = regex_compile ((re_char*) pattern, strlen (pattern), syntax, preg);
5e69f11e 6502
fa9a63c5
RM
6503 /* POSIX doesn't distinguish between an unmatched open-group and an
6504 unmatched close-group: both are REG_EPAREN. */
c0f9ea08
SM
6505 if (ret == REG_ERPAREN)
6506 ret = REG_EPAREN;
6507
6508 if (ret == REG_NOERROR && preg->fastmap)
6509 { /* Compute the fastmap now, since regexec cannot modify the pattern
6510 buffer. */
6511 re_compile_fastmap (preg);
6512 if (preg->can_be_null)
6513 { /* The fastmap can't be used anyway. */
6514 free (preg->fastmap);
6515 preg->fastmap = NULL;
6516 }
6517 }
d1dfb56c 6518 return ret;
fa9a63c5 6519}
c0f9ea08 6520WEAK_ALIAS (__regcomp, regcomp)
fa9a63c5
RM
6521
6522
6523/* regexec searches for a given pattern, specified by PREG, in the
6524 string STRING.
5e69f11e 6525
fa9a63c5 6526 If NMATCH is zero or REG_NOSUB was set in the cflags argument to
b18215fc 6527 `regcomp', we ignore PMATCH. Otherwise, we assume PMATCH has at
fa9a63c5
RM
6528 least NMATCH elements, and we set them to the offsets of the
6529 corresponding matched substrings.
5e69f11e 6530
fa9a63c5
RM
6531 EFLAGS specifies `execution flags' which affect matching: if
6532 REG_NOTBOL is set, then ^ does not match at the beginning of the
6533 string; if REG_NOTEOL is set, then $ does not match at the end.
5e69f11e 6534
fa9a63c5
RM
6535 We return 0 if we find a match and REG_NOMATCH if not. */
6536
d1dfb56c 6537reg_errcode_t
d2762c86
DN
6538regexec (const regex_t *__restrict preg, const char *__restrict string,
6539 size_t nmatch, regmatch_t pmatch[__restrict_arr], int eflags)
fa9a63c5 6540{
31011111 6541 regoff_t ret;
fa9a63c5
RM
6542 struct re_registers regs;
6543 regex_t private_preg;
d1dfb56c 6544 size_t len = strlen (string);
c0f9ea08 6545 boolean want_reg_info = !preg->no_sub && nmatch > 0 && pmatch;
fa9a63c5
RM
6546
6547 private_preg = *preg;
5e69f11e 6548
fa9a63c5
RM
6549 private_preg.not_bol = !!(eflags & REG_NOTBOL);
6550 private_preg.not_eol = !!(eflags & REG_NOTEOL);
5e69f11e 6551
fa9a63c5
RM
6552 /* The user has told us exactly how many registers to return
6553 information about, via `nmatch'. We have to pass that on to the
b18215fc 6554 matching routines. */
fa9a63c5 6555 private_preg.regs_allocated = REGS_FIXED;
5e69f11e 6556
fa9a63c5
RM
6557 if (want_reg_info)
6558 {
6559 regs.num_regs = nmatch;
4bb91c68
SM
6560 regs.start = TALLOC (nmatch * 2, regoff_t);
6561 if (regs.start == NULL)
d1dfb56c 6562 return REG_NOMATCH;
4bb91c68 6563 regs.end = regs.start + nmatch;
fa9a63c5
RM
6564 }
6565
c0f9ea08
SM
6566 /* Instead of using not_eol to implement REG_NOTEOL, we could simply
6567 pass (&private_preg, string, len + 1, 0, len, ...) pretending the string
6568 was a little bit longer but still only matching the real part.
6569 This works because the `endline' will check for a '\n' and will find a
6570 '\0', correctly deciding that this is not the end of a line.
6571 But it doesn't work out so nicely for REG_NOTBOL, since we don't have
6572 a convenient '\0' there. For all we know, the string could be preceded
6573 by '\n' which would throw things off. */
6574
fa9a63c5
RM
6575 /* Perform the searching operation. */
6576 ret = re_search (&private_preg, string, len,
0b32bf0e
SM
6577 /* start: */ 0, /* range: */ len,
6578 want_reg_info ? &regs : (struct re_registers *) 0);
5e69f11e 6579
fa9a63c5
RM
6580 /* Copy the register information to the POSIX structure. */
6581 if (want_reg_info)
6582 {
6583 if (ret >= 0)
0b32bf0e
SM
6584 {
6585 unsigned r;
fa9a63c5 6586
0b32bf0e
SM
6587 for (r = 0; r < nmatch; r++)
6588 {
6589 pmatch[r].rm_so = regs.start[r];
6590 pmatch[r].rm_eo = regs.end[r];
6591 }
6592 }
fa9a63c5 6593
b18215fc 6594 /* If we needed the temporary register info, free the space now. */
fa9a63c5 6595 free (regs.start);
fa9a63c5
RM
6596 }
6597
6598 /* We want zero return to mean success, unlike `re_search'. */
d1dfb56c 6599 return ret >= 0 ? REG_NOERROR : REG_NOMATCH;
fa9a63c5 6600}
c0f9ea08 6601WEAK_ALIAS (__regexec, regexec)
fa9a63c5
RM
6602
6603
ec869672
JR
6604/* Returns a message corresponding to an error code, ERR_CODE, returned
6605 from either regcomp or regexec. We don't use PREG here.
6606
6607 ERR_CODE was previously called ERRCODE, but that name causes an
6608 error with msvc8 compiler. */
fa9a63c5
RM
6609
6610size_t
d2762c86 6611regerror (int err_code, const regex_t *preg, char *errbuf, size_t errbuf_size)
fa9a63c5
RM
6612{
6613 const char *msg;
6614 size_t msg_size;
6615
ec869672
JR
6616 if (err_code < 0
6617 || err_code >= (sizeof (re_error_msgid) / sizeof (re_error_msgid[0])))
5e69f11e 6618 /* Only error codes returned by the rest of the code should be passed
b18215fc 6619 to this routine. If we are given anything else, or if other regex
fa9a63c5
RM
6620 code generates an invalid error code, then the program has a bug.
6621 Dump core so we can fix it. */
6622 abort ();
6623
ec869672 6624 msg = gettext (re_error_msgid[err_code]);
fa9a63c5
RM
6625
6626 msg_size = strlen (msg) + 1; /* Includes the null. */
5e69f11e 6627
fa9a63c5
RM
6628 if (errbuf_size != 0)
6629 {
6630 if (msg_size > errbuf_size)
0b32bf0e 6631 {
e99a530f 6632 memcpy (errbuf, msg, errbuf_size - 1);
0b32bf0e
SM
6633 errbuf[errbuf_size - 1] = 0;
6634 }
fa9a63c5 6635 else
0b32bf0e 6636 strcpy (errbuf, msg);
fa9a63c5
RM
6637 }
6638
6639 return msg_size;
6640}
c0f9ea08 6641WEAK_ALIAS (__regerror, regerror)
fa9a63c5
RM
6642
6643
6644/* Free dynamically allocated space used by PREG. */
6645
6646void
d2762c86 6647regfree (regex_t *preg)
fa9a63c5 6648{
c2cd06e6 6649 free (preg->buffer);
fa9a63c5 6650 preg->buffer = NULL;
5e69f11e 6651
fa9a63c5
RM
6652 preg->allocated = 0;
6653 preg->used = 0;
6654
c2cd06e6 6655 free (preg->fastmap);
fa9a63c5
RM
6656 preg->fastmap = NULL;
6657 preg->fastmap_accurate = 0;
6658
c2cd06e6 6659 free (preg->translate);
fa9a63c5
RM
6660 preg->translate = NULL;
6661}
c0f9ea08 6662WEAK_ALIAS (__regfree, regfree)
fa9a63c5
RM
6663
6664#endif /* not emacs */