(makefile-warn-suspicious-lines): clean
[bpt/emacs.git] / src / regex.c
CommitLineData
e318085a
RS
1/* Extended regular expression matching and search library, version
2 0.12. (Implements POSIX draft P10003.2/D11.2, except for
bc78d348
KB
3 internationalization features.)
4
505bde11 5 Copyright (C) 1993,94,95,96,97,98,2000 Free Software Foundation, Inc.
bc78d348 6
fa9a63c5
RM
7 This program is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
10 any later version.
11
12 This program is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
25fe55af 14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
fa9a63c5
RM
15 GNU General Public License for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with this program; if not, write to the Free Software
ba4a8e51 19 Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
25fe55af 20 USA. */
fa9a63c5 21
505bde11
SM
22/* TODO:
23 - detect nasty infinite loops like "\\(\\)+?ab" when matching "ac".
1fb352e0
SM
24 - use analyze_first to optimize non-empty loops
25 - reduce code duplication
26 - optimize succeed_n and jump_n away when possible
27 - clean up multibyte issues
505bde11
SM
28 - structure the opcode space into opcode+flag.
29 - merge with glic's regex.[ch]
30
31 That's it for now -sm */
32
fa9a63c5
RM
33/* AIX requires this to be the first thing in the file. */
34#if defined (_AIX) && !defined (REGEX_MALLOC)
35 #pragma alloca
36#endif
37
68d96f02 38#undef _GNU_SOURCE
fa9a63c5
RM
39#define _GNU_SOURCE
40
51352796 41#ifdef emacs
25fe55af 42/* Converts the pointer to the char to BEG-based offset from the start. */
99633e97 43#define PTR_TO_OFFSET(d) POS_AS_IN_BUFFER (POINTER_TO_OFFSET (d))
7e95234e 44#define POS_AS_IN_BUFFER(p) ((p) + (NILP (re_match_object) || BUFFERP (re_match_object)))
51352796
RS
45#else
46#define PTR_TO_OFFSET(d) 0
47#endif
b18215fc 48
fa9a63c5
RM
49#ifdef HAVE_CONFIG_H
50#include <config.h>
51#endif
52
25fe55af 53/* We need this for `regex.h', and perhaps for the Emacs include files. */
fa9a63c5
RM
54#include <sys/types.h>
55
25fe55af 56/* This is for other GNU distributions with internationalized messages. */
fa9a63c5
RM
57#if HAVE_LIBINTL_H || defined (_LIBC)
58# include <libintl.h>
59#else
60# define gettext(msgid) (msgid)
61#endif
62
5e69f11e
RM
63#ifndef gettext_noop
64/* This define is so xgettext can find the internationalizable
65 strings. */
66#define gettext_noop(String) String
67#endif
68
fa9a63c5
RM
69/* The `emacs' switch turns on certain matching commands
70 that make sense only in Emacs. */
71#ifdef emacs
72
73#include "lisp.h"
74#include "buffer.h"
b18215fc
RS
75
76/* Make syntax table lookup grant data in gl_state. */
77#define SYNTAX_ENTRY_VIA_PROPERTY
78
fa9a63c5 79#include "syntax.h"
b18215fc
RS
80#include "charset.h"
81#include "category.h"
fa9a63c5 82
9abbd165 83#define malloc xmalloc
64e3c718 84#define realloc xrealloc
9abbd165
RS
85#define free xfree
86
4e8a9132
SM
87#define RE_STRING_CHAR(p, s) \
88 (multibyte ? (STRING_CHAR (p, s)) : (*(p)))
89
fa9a63c5
RM
90#else /* not emacs */
91
92/* If we are not linking with Emacs proper,
93 we can't use the relocating allocator
94 even if config.h says that we can. */
95#undef REL_ALLOC
96
97#if defined (STDC_HEADERS) || defined (_LIBC)
98#include <stdlib.h>
99#else
100char *malloc ();
101char *realloc ();
102#endif
103
9e4ecb26 104/* When used in Emacs's lib-src, we need to get bzero and bcopy somehow.
25fe55af 105 If nothing else has been done, use the method below. */
9e4ecb26
KH
106#ifdef INHIBIT_STRING_HEADER
107#if !(defined (HAVE_BZERO) && defined (HAVE_BCOPY))
108#if !defined (bzero) && !defined (bcopy)
109#undef INHIBIT_STRING_HEADER
110#endif
111#endif
112#endif
113
114/* This is the normal way of making sure we have a bcopy and a bzero.
115 This is used in most programs--a few other programs avoid this
116 by defining INHIBIT_STRING_HEADER. */
fa9a63c5 117#ifndef INHIBIT_STRING_HEADER
7f998252 118#if defined (HAVE_STRING_H) || defined (STDC_HEADERS) || defined (_LIBC)
fa9a63c5
RM
119#include <string.h>
120#ifndef bcmp
121#define bcmp(s1, s2, n) memcmp ((s1), (s2), (n))
122#endif
123#ifndef bcopy
124#define bcopy(s, d, n) memcpy ((d), (s), (n))
125#endif
126#ifndef bzero
127#define bzero(s, n) memset ((s), 0, (n))
128#endif
129#else
130#include <strings.h>
131#endif
132#endif
133
134/* Define the syntax stuff for \<, \>, etc. */
135
1fb352e0 136/* This must be nonzero for the wordchar pattern
fa9a63c5 137 commands in re_match_2. */
5e69f11e 138#ifndef Sword
fa9a63c5
RM
139#define Sword 1
140#endif
141
142#ifdef SWITCH_ENUM_BUG
143#define SWITCH_ENUM_CAST(x) ((int)(x))
144#else
145#define SWITCH_ENUM_CAST(x) (x)
146#endif
147
148#ifdef SYNTAX_TABLE
149
150extern char *re_syntax_table;
151
152#else /* not SYNTAX_TABLE */
153
154/* How many characters in the character set. */
155#define CHAR_SET_SIZE 256
156
157static char re_syntax_table[CHAR_SET_SIZE];
158
159static void
160init_syntax_once ()
161{
162 register int c;
163 static int done = 0;
164
165 if (done)
166 return;
167
168 bzero (re_syntax_table, sizeof re_syntax_table);
169
170 for (c = 'a'; c <= 'z'; c++)
171 re_syntax_table[c] = Sword;
172
173 for (c = 'A'; c <= 'Z'; c++)
174 re_syntax_table[c] = Sword;
175
176 for (c = '0'; c <= '9'; c++)
177 re_syntax_table[c] = Sword;
178
179 re_syntax_table['_'] = Sword;
180
181 done = 1;
182}
183
184#endif /* not SYNTAX_TABLE */
185
186#define SYNTAX(c) re_syntax_table[c]
187
e934739e 188/* Dummy macros for non-Emacs environments. */
b18215fc 189#define BASE_LEADING_CODE_P(c) (0)
1fb352e0
SM
190#define CHAR_CHARSET(c) 0
191#define CHARSET_LEADING_CODE_BASE(c) 0
b18215fc
RS
192#define WORD_BOUNDARY_P(c1, c2) (0)
193#define CHAR_HEAD_P(p) (1)
194#define SINGLE_BYTE_CHAR_P(c) (1)
195#define SAME_CHARSET_P(c1, c2) (1)
196#define MULTIBYTE_FORM_LENGTH(p, s) (1)
197#define STRING_CHAR(p, s) (*(p))
4e8a9132 198#define RE_STRING_CHAR STRING_CHAR
b18215fc 199#define STRING_CHAR_AND_LENGTH(p, s, actual_len) ((actual_len) = 1, *(p))
b18215fc
RS
200#define GET_CHAR_BEFORE_2(c, p, str1, end1, str2, end2) \
201 (c = ((p) == (str2) ? *((end1) - 1) : *((p) - 1)))
fa9a63c5 202#endif /* not emacs */
4e8a9132
SM
203
204#ifndef RE_TRANSLATE
205#define RE_TRANSLATE(TBL, C) ((unsigned char)(TBL)[C])
206#define RE_TRANSLATE_P(TBL) (TBL)
207#endif
fa9a63c5
RM
208\f
209/* Get the interface, including the syntax bits. */
210#include "regex.h"
211
f71b19b6
DL
212/* isalpha etc. are used for the character classes. */
213#include <ctype.h>
fa9a63c5 214
f71b19b6 215#ifdef emacs
fa9a63c5 216
f71b19b6
DL
217/* 1 if C is an ASCII character. */
218#define IS_REAL_ASCII(c) ((c) < 0200)
fa9a63c5 219
f71b19b6
DL
220/* 1 if C is a unibyte character. */
221#define ISUNIBYTE(c) (SINGLE_BYTE_CHAR_P ((c)))
96cc36cc 222
f71b19b6 223/* The Emacs definitions should not be directly affected by locales. */
96cc36cc 224
f71b19b6
DL
225/* In Emacs, these are only used for single-byte characters. */
226#define ISDIGIT(c) ((c) >= '0' && (c) <= '9')
227#define ISCNTRL(c) ((c) < ' ')
228#define ISXDIGIT(c) (((c) >= '0' && (c) <= '9') \
229 || ((c) >= 'a' && (c) <= 'f') \
230 || ((c) >= 'A' && (c) <= 'F'))
96cc36cc
RS
231
232/* This is only used for single-byte characters. */
233#define ISBLANK(c) ((c) == ' ' || (c) == '\t')
234
235/* The rest must handle multibyte characters. */
236
237#define ISGRAPH(c) (SINGLE_BYTE_CHAR_P (c) \
f71b19b6 238 ? (c) > ' ' && !((c) >= 0177 && (c) <= 0237) \
96cc36cc
RS
239 : 1)
240
241#define ISPRINT(c) (SINGLE_BYTE_CHAR_P (c) \
f71b19b6 242 ? (c) >= ' ' && !((c) >= 0177 && (c) <= 0237) \
96cc36cc
RS
243 : 1)
244
f71b19b6
DL
245#define ISALNUM(c) (IS_REAL_ASCII (c) \
246 ? (((c) >= 'a' && (c) <= 'z') \
247 || ((c) >= 'A' && (c) <= 'Z') \
248 || ((c) >= '0' && (c) <= '9')) \
96cc36cc
RS
249 : SYNTAX (c) == Sword)
250
f71b19b6
DL
251#define ISALPHA(c) (IS_REAL_ASCII (c) \
252 ? (((c) >= 'a' && (c) <= 'z') \
253 || ((c) >= 'A' && (c) <= 'Z')) \
96cc36cc
RS
254 : SYNTAX (c) == Sword)
255
256#define ISLOWER(c) (LOWERCASEP (c))
257
f71b19b6
DL
258#define ISPUNCT(c) (IS_REAL_ASCII (c) \
259 ? ((c) > ' ' && (c) < 0177 \
260 && !(((c) >= 'a' && (c) <= 'z') \
261 || ((c) >= 'A' && (c) <= 'Z') \
262 || ((c) >= '0' && (c) <= '9'))) \
96cc36cc
RS
263 : SYNTAX (c) != Sword)
264
265#define ISSPACE(c) (SYNTAX (c) == Swhitespace)
266
267#define ISUPPER(c) (UPPERCASEP (c))
268
269#define ISWORD(c) (SYNTAX (c) == Sword)
270
271#else /* not emacs */
272
f71b19b6
DL
273/* Jim Meyering writes:
274
275 "... Some ctype macros are valid only for character codes that
276 isascii says are ASCII (SGI's IRIX-4.0.5 is one such system --when
277 using /bin/cc or gcc but without giving an ansi option). So, all
278 ctype uses should be through macros like ISPRINT... If
279 STDC_HEADERS is defined, then autoconf has verified that the ctype
280 macros don't need to be guarded with references to isascii. ...
281 Defining isascii to 1 should let any compiler worth its salt
282 eliminate the && through constant folding." */
283
284#if defined (STDC_HEADERS) || (!defined (isascii) && !defined (HAVE_ISASCII))
285#define ISASCII(c) 1
286#else
287#define ISASCII(c) isascii(c)
288#endif
289
290/* 1 if C is an ASCII character. */
291#define IS_REAL_ASCII(c) ((c) < 0200)
292
293/* This distinction is not meaningful, except in Emacs. */
294#define ISUNIBYTE(c) 1
295
296#define ISDIGIT(c) (ISASCII (c) && isdigit (c))
297#define ISCNTRL(c) (ISASCII (c) && iscntrl (c))
298#define ISXDIGIT(c) (ISASCII (c) && isxdigit (c))
299
fa9a63c5
RM
300#ifdef isblank
301#define ISBLANK(c) (ISASCII (c) && isblank (c))
302#else
303#define ISBLANK(c) ((c) == ' ' || (c) == '\t')
304#endif
305#ifdef isgraph
306#define ISGRAPH(c) (ISASCII (c) && isgraph (c))
307#else
308#define ISGRAPH(c) (ISASCII (c) && isprint (c) && !isspace (c))
309#endif
310
311#define ISPRINT(c) (ISASCII (c) && isprint (c))
312#define ISDIGIT(c) (ISASCII (c) && isdigit (c))
313#define ISALNUM(c) (ISASCII (c) && isalnum (c))
314#define ISALPHA(c) (ISASCII (c) && isalpha (c))
315#define ISCNTRL(c) (ISASCII (c) && iscntrl (c))
316#define ISLOWER(c) (ISASCII (c) && islower (c))
317#define ISPUNCT(c) (ISASCII (c) && ispunct (c))
318#define ISSPACE(c) (ISASCII (c) && isspace (c))
319#define ISUPPER(c) (ISASCII (c) && isupper (c))
320#define ISXDIGIT(c) (ISASCII (c) && isxdigit (c))
321
96cc36cc
RS
322#define ISWORD(c) ISALPHA(c)
323
324#endif /* not emacs */
325\f
fa9a63c5 326#ifndef NULL
075f06ec 327#define NULL (void *)0
fa9a63c5
RM
328#endif
329
330/* We remove any previous definition of `SIGN_EXTEND_CHAR',
331 since ours (we hope) works properly with all combinations of
332 machines, compilers, `char' and `unsigned char' argument types.
25fe55af 333 (Per Bothner suggested the basic approach.) */
fa9a63c5
RM
334#undef SIGN_EXTEND_CHAR
335#if __STDC__
336#define SIGN_EXTEND_CHAR(c) ((signed char) (c))
337#else /* not __STDC__ */
338/* As in Harbison and Steele. */
339#define SIGN_EXTEND_CHAR(c) ((((unsigned char) (c)) ^ 128) - 128)
340#endif
341\f
342/* Should we use malloc or alloca? If REGEX_MALLOC is not defined, we
343 use `alloca' instead of `malloc'. This is because using malloc in
344 re_search* or re_match* could cause memory leaks when C-g is used in
345 Emacs; also, malloc is slower and causes storage fragmentation. On
5e69f11e
RM
346 the other hand, malloc is more portable, and easier to debug.
347
fa9a63c5
RM
348 Because we sometimes use alloca, some routines have to be macros,
349 not functions -- `alloca'-allocated space disappears at the end of the
350 function it is called in. */
351
352#ifdef REGEX_MALLOC
353
354#define REGEX_ALLOCATE malloc
355#define REGEX_REALLOCATE(source, osize, nsize) realloc (source, nsize)
356#define REGEX_FREE free
357
358#else /* not REGEX_MALLOC */
359
360/* Emacs already defines alloca, sometimes. */
361#ifndef alloca
362
363/* Make alloca work the best possible way. */
364#ifdef __GNUC__
365#define alloca __builtin_alloca
366#else /* not __GNUC__ */
367#if HAVE_ALLOCA_H
368#include <alloca.h>
369#else /* not __GNUC__ or HAVE_ALLOCA_H */
f3c4387f 370#if 0 /* It is a bad idea to declare alloca. We always cast the result. */
25fe55af 371#ifndef _AIX /* Already did AIX, up at the top. */
fa9a63c5
RM
372char *alloca ();
373#endif /* not _AIX */
f3c4387f 374#endif
5e69f11e 375#endif /* not HAVE_ALLOCA_H */
fa9a63c5
RM
376#endif /* not __GNUC__ */
377
378#endif /* not alloca */
379
380#define REGEX_ALLOCATE alloca
381
382/* Assumes a `char *destination' variable. */
383#define REGEX_REALLOCATE(source, osize, nsize) \
384 (destination = (char *) alloca (nsize), \
385 bcopy (source, destination, osize), \
386 destination)
387
388/* No need to do anything to free, after alloca. */
c2e1680a 389#define REGEX_FREE(arg) ((void)0) /* Do nothing! But inhibit gcc warning. */
fa9a63c5
RM
390
391#endif /* not REGEX_MALLOC */
392
393/* Define how to allocate the failure stack. */
394
33487cc8 395#if defined (REL_ALLOC) && defined (REGEX_MALLOC)
4297555e 396
fa9a63c5
RM
397#define REGEX_ALLOCATE_STACK(size) \
398 r_alloc (&failure_stack_ptr, (size))
399#define REGEX_REALLOCATE_STACK(source, osize, nsize) \
400 r_re_alloc (&failure_stack_ptr, (nsize))
401#define REGEX_FREE_STACK(ptr) \
402 r_alloc_free (&failure_stack_ptr)
403
4297555e 404#else /* not using relocating allocator */
fa9a63c5
RM
405
406#ifdef REGEX_MALLOC
407
408#define REGEX_ALLOCATE_STACK malloc
409#define REGEX_REALLOCATE_STACK(source, osize, nsize) realloc (source, nsize)
410#define REGEX_FREE_STACK free
411
412#else /* not REGEX_MALLOC */
413
414#define REGEX_ALLOCATE_STACK alloca
415
416#define REGEX_REALLOCATE_STACK(source, osize, nsize) \
417 REGEX_REALLOCATE (source, osize, nsize)
25fe55af 418/* No need to explicitly free anything. */
fa9a63c5
RM
419#define REGEX_FREE_STACK(arg)
420
421#endif /* not REGEX_MALLOC */
4297555e 422#endif /* not using relocating allocator */
fa9a63c5
RM
423
424
425/* True if `size1' is non-NULL and PTR is pointing anywhere inside
426 `string1' or just past its end. This works if PTR is NULL, which is
427 a good thing. */
25fe55af 428#define FIRST_STRING_P(ptr) \
fa9a63c5
RM
429 (size1 && string1 <= (ptr) && (ptr) <= string1 + size1)
430
431/* (Re)Allocate N items of type T using malloc, or fail. */
432#define TALLOC(n, t) ((t *) malloc ((n) * sizeof (t)))
433#define RETALLOC(addr, n, t) ((addr) = (t *) realloc (addr, (n) * sizeof (t)))
434#define RETALLOC_IF(addr, n, t) \
435 if (addr) RETALLOC((addr), (n), t); else (addr) = TALLOC ((n), t)
436#define REGEX_TALLOC(n, t) ((t *) REGEX_ALLOCATE ((n) * sizeof (t)))
437
25fe55af 438#define BYTEWIDTH 8 /* In bits. */
fa9a63c5
RM
439
440#define STREQ(s1, s2) ((strcmp (s1, s2) == 0))
441
442#undef MAX
443#undef MIN
444#define MAX(a, b) ((a) > (b) ? (a) : (b))
445#define MIN(a, b) ((a) < (b) ? (a) : (b))
446
66f0296e
SM
447/* Type of source-pattern and string chars. */
448typedef const unsigned char re_char;
449
fa9a63c5
RM
450typedef char boolean;
451#define false 0
452#define true 1
453
454static int re_match_2_internal ();
455\f
456/* These are the command codes that appear in compiled regular
25fe55af 457 expressions. Some opcodes are followed by argument bytes. A
fa9a63c5
RM
458 command code can specify any interpretation whatsoever for its
459 arguments. Zero bytes may appear in the compiled regular expression. */
460
461typedef enum
462{
463 no_op = 0,
464
25fe55af 465 /* Succeed right away--no more backtracking. */
fa9a63c5
RM
466 succeed,
467
25fe55af 468 /* Followed by one byte giving n, then by n literal bytes. */
fa9a63c5
RM
469 exactn,
470
25fe55af 471 /* Matches any (more or less) character. */
fa9a63c5
RM
472 anychar,
473
25fe55af
RS
474 /* Matches any one char belonging to specified set. First
475 following byte is number of bitmap bytes. Then come bytes
476 for a bitmap saying which chars are in. Bits in each byte
477 are ordered low-bit-first. A character is in the set if its
478 bit is 1. A character too large to have a bit in the map is
96cc36cc
RS
479 automatically not in the set.
480
481 If the length byte has the 0x80 bit set, then that stuff
482 is followed by a range table:
483 2 bytes of flags for character sets (low 8 bits, high 8 bits)
484 See RANGE_TABLE_WORK_BITS below.
485 2 bytes, the number of pairs that follow
486 pairs, each 2 multibyte characters,
487 each multibyte character represented as 3 bytes. */
fa9a63c5
RM
488 charset,
489
25fe55af
RS
490 /* Same parameters as charset, but match any character that is
491 not one of those specified. */
fa9a63c5
RM
492 charset_not,
493
25fe55af
RS
494 /* Start remembering the text that is matched, for storing in a
495 register. Followed by one byte with the register number, in
496 the range 0 to one less than the pattern buffer's re_nsub
505bde11 497 field. */
fa9a63c5
RM
498 start_memory,
499
25fe55af
RS
500 /* Stop remembering the text that is matched and store it in a
501 memory register. Followed by one byte with the register
502 number, in the range 0 to one less than `re_nsub' in the
505bde11 503 pattern buffer. */
fa9a63c5
RM
504 stop_memory,
505
25fe55af
RS
506 /* Match a duplicate of something remembered. Followed by one
507 byte containing the register number. */
fa9a63c5
RM
508 duplicate,
509
25fe55af 510 /* Fail unless at beginning of line. */
fa9a63c5
RM
511 begline,
512
25fe55af 513 /* Fail unless at end of line. */
fa9a63c5
RM
514 endline,
515
25fe55af
RS
516 /* Succeeds if at beginning of buffer (if emacs) or at beginning
517 of string to be matched (if not). */
fa9a63c5
RM
518 begbuf,
519
25fe55af 520 /* Analogously, for end of buffer/string. */
fa9a63c5 521 endbuf,
5e69f11e 522
25fe55af 523 /* Followed by two byte relative address to which to jump. */
5e69f11e 524 jump,
fa9a63c5 525
25fe55af
RS
526 /* Followed by two-byte relative address of place to resume at
527 in case of failure. */
fa9a63c5 528 on_failure_jump,
5e69f11e 529
25fe55af
RS
530 /* Like on_failure_jump, but pushes a placeholder instead of the
531 current string position when executed. */
fa9a63c5 532 on_failure_keep_string_jump,
5e69f11e 533
505bde11
SM
534 /* Just like `on_failure_jump', except that it checks that we
535 don't get stuck in an infinite loop (matching an empty string
536 indefinitely). */
537 on_failure_jump_loop,
538
539 /* A smart `on_failure_jump' used for greedy * and + operators.
540 It analyses the loop before which it is put and if the
541 loop does not require backtracking, it changes itself to
4e8a9132
SM
542 `on_failure_keep_string_jump' and short-circuits the loop,
543 else it just defaults to changing itself into `on_failure_jump'.
544 It assumes that it is pointing to just past a `jump'. */
505bde11 545 on_failure_jump_smart,
fa9a63c5 546
25fe55af
RS
547 /* Followed by two-byte relative address and two-byte number n.
548 After matching N times, jump to the address upon failure. */
fa9a63c5
RM
549 succeed_n,
550
25fe55af
RS
551 /* Followed by two-byte relative address, and two-byte number n.
552 Jump to the address N times, then fail. */
fa9a63c5
RM
553 jump_n,
554
25fe55af
RS
555 /* Set the following two-byte relative address to the
556 subsequent two-byte number. The address *includes* the two
557 bytes of number. */
fa9a63c5
RM
558 set_number_at,
559
fa9a63c5
RM
560 wordbeg, /* Succeeds if at word beginning. */
561 wordend, /* Succeeds if at word end. */
562
563 wordbound, /* Succeeds if at a word boundary. */
1fb352e0 564 notwordbound, /* Succeeds if not at a word boundary. */
fa9a63c5
RM
565
566 /* Matches any character whose syntax is specified. Followed by
25fe55af 567 a byte which contains a syntax code, e.g., Sword. */
fa9a63c5
RM
568 syntaxspec,
569
570 /* Matches any character whose syntax is not that specified. */
1fb352e0
SM
571 notsyntaxspec
572
573#ifdef emacs
574 ,before_dot, /* Succeeds if before point. */
575 at_dot, /* Succeeds if at point. */
576 after_dot, /* Succeeds if after point. */
b18215fc
RS
577
578 /* Matches any character whose category-set contains the specified
25fe55af
RS
579 category. The operator is followed by a byte which contains a
580 category code (mnemonic ASCII character). */
b18215fc
RS
581 categoryspec,
582
583 /* Matches any character whose category-set does not contain the
584 specified category. The operator is followed by a byte which
585 contains the category code (mnemonic ASCII character). */
586 notcategoryspec
fa9a63c5
RM
587#endif /* emacs */
588} re_opcode_t;
589\f
590/* Common operations on the compiled pattern. */
591
592/* Store NUMBER in two contiguous bytes starting at DESTINATION. */
593
594#define STORE_NUMBER(destination, number) \
595 do { \
596 (destination)[0] = (number) & 0377; \
597 (destination)[1] = (number) >> 8; \
598 } while (0)
599
600/* Same as STORE_NUMBER, except increment DESTINATION to
601 the byte after where the number is stored. Therefore, DESTINATION
602 must be an lvalue. */
603
604#define STORE_NUMBER_AND_INCR(destination, number) \
605 do { \
606 STORE_NUMBER (destination, number); \
607 (destination) += 2; \
608 } while (0)
609
610/* Put into DESTINATION a number stored in two contiguous bytes starting
611 at SOURCE. */
612
613#define EXTRACT_NUMBER(destination, source) \
614 do { \
615 (destination) = *(source) & 0377; \
616 (destination) += SIGN_EXTEND_CHAR (*((source) + 1)) << 8; \
617 } while (0)
618
619#ifdef DEBUG
620static void
621extract_number (dest, source)
622 int *dest;
623 unsigned char *source;
624{
5e69f11e 625 int temp = SIGN_EXTEND_CHAR (*(source + 1));
fa9a63c5
RM
626 *dest = *source & 0377;
627 *dest += temp << 8;
628}
629
25fe55af 630#ifndef EXTRACT_MACROS /* To debug the macros. */
fa9a63c5
RM
631#undef EXTRACT_NUMBER
632#define EXTRACT_NUMBER(dest, src) extract_number (&dest, src)
633#endif /* not EXTRACT_MACROS */
634
635#endif /* DEBUG */
636
637/* Same as EXTRACT_NUMBER, except increment SOURCE to after the number.
638 SOURCE must be an lvalue. */
639
640#define EXTRACT_NUMBER_AND_INCR(destination, source) \
641 do { \
642 EXTRACT_NUMBER (destination, source); \
25fe55af 643 (source) += 2; \
fa9a63c5
RM
644 } while (0)
645
646#ifdef DEBUG
647static void
648extract_number_and_incr (destination, source)
649 int *destination;
650 unsigned char **source;
5e69f11e 651{
fa9a63c5
RM
652 extract_number (destination, *source);
653 *source += 2;
654}
655
656#ifndef EXTRACT_MACROS
657#undef EXTRACT_NUMBER_AND_INCR
658#define EXTRACT_NUMBER_AND_INCR(dest, src) \
659 extract_number_and_incr (&dest, &src)
660#endif /* not EXTRACT_MACROS */
661
662#endif /* DEBUG */
663\f
b18215fc
RS
664/* Store a multibyte character in three contiguous bytes starting
665 DESTINATION, and increment DESTINATION to the byte after where the
25fe55af 666 character is stored. Therefore, DESTINATION must be an lvalue. */
b18215fc
RS
667
668#define STORE_CHARACTER_AND_INCR(destination, character) \
669 do { \
670 (destination)[0] = (character) & 0377; \
671 (destination)[1] = ((character) >> 8) & 0377; \
672 (destination)[2] = (character) >> 16; \
673 (destination) += 3; \
674 } while (0)
675
676/* Put into DESTINATION a character stored in three contiguous bytes
25fe55af 677 starting at SOURCE. */
b18215fc
RS
678
679#define EXTRACT_CHARACTER(destination, source) \
680 do { \
681 (destination) = ((source)[0] \
682 | ((source)[1] << 8) \
683 | ((source)[2] << 16)); \
684 } while (0)
685
686
687/* Macros for charset. */
688
689/* Size of bitmap of charset P in bytes. P is a start of charset,
690 i.e. *P is (re_opcode_t) charset or (re_opcode_t) charset_not. */
691#define CHARSET_BITMAP_SIZE(p) ((p)[1] & 0x7F)
692
693/* Nonzero if charset P has range table. */
25fe55af 694#define CHARSET_RANGE_TABLE_EXISTS_P(p) ((p)[1] & 0x80)
b18215fc
RS
695
696/* Return the address of range table of charset P. But not the start
697 of table itself, but the before where the number of ranges is
96cc36cc
RS
698 stored. `2 +' means to skip re_opcode_t and size of bitmap,
699 and the 2 bytes of flags at the start of the range table. */
700#define CHARSET_RANGE_TABLE(p) (&(p)[4 + CHARSET_BITMAP_SIZE (p)])
701
702/* Extract the bit flags that start a range table. */
703#define CHARSET_RANGE_TABLE_BITS(p) \
704 ((p)[2 + CHARSET_BITMAP_SIZE (p)] \
705 + (p)[3 + CHARSET_BITMAP_SIZE (p)] * 0x100)
b18215fc
RS
706
707/* Test if C is listed in the bitmap of charset P. */
708#define CHARSET_LOOKUP_BITMAP(p, c) \
709 ((c) < CHARSET_BITMAP_SIZE (p) * BYTEWIDTH \
710 && (p)[2 + (c) / BYTEWIDTH] & (1 << ((c) % BYTEWIDTH)))
711
712/* Return the address of end of RANGE_TABLE. COUNT is number of
25fe55af
RS
713 ranges (which is a pair of (start, end)) in the RANGE_TABLE. `* 2'
714 is start of range and end of range. `* 3' is size of each start
b18215fc
RS
715 and end. */
716#define CHARSET_RANGE_TABLE_END(range_table, count) \
717 ((range_table) + (count) * 2 * 3)
718
25fe55af 719/* Test if C is in RANGE_TABLE. A flag NOT is negated if C is in.
b18215fc
RS
720 COUNT is number of ranges in RANGE_TABLE. */
721#define CHARSET_LOOKUP_RANGE_TABLE_RAW(not, c, range_table, count) \
722 do \
723 { \
724 int range_start, range_end; \
725 unsigned char *p; \
726 unsigned char *range_table_end \
727 = CHARSET_RANGE_TABLE_END ((range_table), (count)); \
728 \
729 for (p = (range_table); p < range_table_end; p += 2 * 3) \
730 { \
731 EXTRACT_CHARACTER (range_start, p); \
732 EXTRACT_CHARACTER (range_end, p + 3); \
733 \
734 if (range_start <= (c) && (c) <= range_end) \
735 { \
736 (not) = !(not); \
737 break; \
738 } \
739 } \
740 } \
741 while (0)
742
743/* Test if C is in range table of CHARSET. The flag NOT is negated if
744 C is listed in it. */
745#define CHARSET_LOOKUP_RANGE_TABLE(not, c, charset) \
746 do \
747 { \
748 /* Number of ranges in range table. */ \
749 int count; \
750 unsigned char *range_table = CHARSET_RANGE_TABLE (charset); \
751 \
752 EXTRACT_NUMBER_AND_INCR (count, range_table); \
753 CHARSET_LOOKUP_RANGE_TABLE_RAW ((not), (c), range_table, count); \
754 } \
755 while (0)
756\f
fa9a63c5
RM
757/* If DEBUG is defined, Regex prints many voluminous messages about what
758 it is doing (if the variable `debug' is nonzero). If linked with the
759 main program in `iregex.c', you can enter patterns and strings
760 interactively. And if linked with the main program in `main.c' and
25fe55af 761 the other test files, you can run the already-written tests. */
fa9a63c5
RM
762
763#ifdef DEBUG
764
765/* We use standard I/O for debugging. */
766#include <stdio.h>
767
768/* It is useful to test things that ``must'' be true when debugging. */
769#include <assert.h>
770
99633e97 771static int debug = -100000;
fa9a63c5
RM
772
773#define DEBUG_STATEMENT(e) e
99633e97
SM
774#define DEBUG_PRINT1(x) if (debug > 0) printf (x)
775#define DEBUG_PRINT2(x1, x2) if (debug > 0) printf (x1, x2)
776#define DEBUG_PRINT3(x1, x2, x3) if (debug > 0) printf (x1, x2, x3)
777#define DEBUG_PRINT4(x1, x2, x3, x4) if (debug > 0) printf (x1, x2, x3, x4)
25fe55af 778#define DEBUG_PRINT_COMPILED_PATTERN(p, s, e) \
99633e97 779 if (debug > 0) print_partial_compiled_pattern (s, e)
fa9a63c5 780#define DEBUG_PRINT_DOUBLE_STRING(w, s1, sz1, s2, sz2) \
99633e97 781 if (debug > 0) print_double_string (w, s1, sz1, s2, sz2)
fa9a63c5
RM
782
783
784/* Print the fastmap in human-readable form. */
785
786void
787print_fastmap (fastmap)
788 char *fastmap;
789{
790 unsigned was_a_range = 0;
5e69f11e
RM
791 unsigned i = 0;
792
fa9a63c5
RM
793 while (i < (1 << BYTEWIDTH))
794 {
795 if (fastmap[i++])
796 {
797 was_a_range = 0;
25fe55af
RS
798 putchar (i - 1);
799 while (i < (1 << BYTEWIDTH) && fastmap[i])
800 {
801 was_a_range = 1;
802 i++;
803 }
fa9a63c5 804 if (was_a_range)
25fe55af
RS
805 {
806 printf ("-");
807 putchar (i - 1);
808 }
809 }
fa9a63c5 810 }
5e69f11e 811 putchar ('\n');
fa9a63c5
RM
812}
813
814
815/* Print a compiled pattern string in human-readable form, starting at
816 the START pointer into it and ending just before the pointer END. */
817
818void
819print_partial_compiled_pattern (start, end)
820 unsigned char *start;
821 unsigned char *end;
822{
823 int mcnt, mcnt2;
824 unsigned char *p = start;
825 unsigned char *pend = end;
826
827 if (start == NULL)
828 {
829 printf ("(null)\n");
830 return;
831 }
5e69f11e 832
fa9a63c5
RM
833 /* Loop over pattern commands. */
834 while (p < pend)
835 {
836 printf ("%d:\t", p - start);
837
838 switch ((re_opcode_t) *p++)
839 {
25fe55af
RS
840 case no_op:
841 printf ("/no_op");
842 break;
fa9a63c5 843
99633e97
SM
844 case succeed:
845 printf ("/succeed");
846 break;
847
fa9a63c5
RM
848 case exactn:
849 mcnt = *p++;
25fe55af
RS
850 printf ("/exactn/%d", mcnt);
851 do
fa9a63c5 852 {
25fe55af 853 putchar ('/');
fa9a63c5 854 putchar (*p++);
25fe55af
RS
855 }
856 while (--mcnt);
857 break;
fa9a63c5
RM
858
859 case start_memory:
505bde11 860 printf ("/start_memory/%d", *p++);
25fe55af 861 break;
fa9a63c5
RM
862
863 case stop_memory:
505bde11 864 printf ("/stop_memory/%d", *p++);
25fe55af 865 break;
fa9a63c5
RM
866
867 case duplicate:
868 printf ("/duplicate/%d", *p++);
869 break;
870
871 case anychar:
872 printf ("/anychar");
873 break;
874
875 case charset:
25fe55af
RS
876 case charset_not:
877 {
878 register int c, last = -100;
fa9a63c5 879 register int in_range = 0;
99633e97
SM
880 int length = CHARSET_BITMAP_SIZE (p - 1);
881 int has_range_table = CHARSET_RANGE_TABLE_EXISTS_P (p - 1);
fa9a63c5
RM
882
883 printf ("/charset [%s",
25fe55af 884 (re_opcode_t) *(p - 1) == charset_not ? "^" : "");
5e69f11e 885
25fe55af 886 assert (p + *p < pend);
fa9a63c5 887
25fe55af 888 for (c = 0; c < 256; c++)
96cc36cc 889 if (c / 8 < length
fa9a63c5
RM
890 && (p[1 + (c/8)] & (1 << (c % 8))))
891 {
892 /* Are we starting a range? */
893 if (last + 1 == c && ! in_range)
894 {
895 putchar ('-');
896 in_range = 1;
897 }
898 /* Have we broken a range? */
899 else if (last + 1 != c && in_range)
96cc36cc 900 {
fa9a63c5
RM
901 putchar (last);
902 in_range = 0;
903 }
5e69f11e 904
fa9a63c5
RM
905 if (! in_range)
906 putchar (c);
907
908 last = c;
25fe55af 909 }
fa9a63c5
RM
910
911 if (in_range)
912 putchar (last);
913
914 putchar (']');
915
99633e97 916 p += 1 + length;
96cc36cc 917
96cc36cc 918 if (has_range_table)
99633e97
SM
919 {
920 int count;
921 printf ("has-range-table");
922
923 /* ??? Should print the range table; for now, just skip it. */
924 p += 2; /* skip range table bits */
925 EXTRACT_NUMBER_AND_INCR (count, p);
926 p = CHARSET_RANGE_TABLE_END (p, count);
927 }
fa9a63c5
RM
928 }
929 break;
930
931 case begline:
932 printf ("/begline");
25fe55af 933 break;
fa9a63c5
RM
934
935 case endline:
25fe55af
RS
936 printf ("/endline");
937 break;
fa9a63c5
RM
938
939 case on_failure_jump:
25fe55af
RS
940 extract_number_and_incr (&mcnt, &p);
941 printf ("/on_failure_jump to %d", p + mcnt - start);
942 break;
fa9a63c5
RM
943
944 case on_failure_keep_string_jump:
25fe55af
RS
945 extract_number_and_incr (&mcnt, &p);
946 printf ("/on_failure_keep_string_jump to %d", p + mcnt - start);
947 break;
fa9a63c5 948
505bde11 949 case on_failure_jump_loop:
fa9a63c5 950 extract_number_and_incr (&mcnt, &p);
505bde11 951 printf ("/on_failure_jump_loop to %d", p + mcnt - start);
5e69f11e
RM
952 break;
953
505bde11 954 case on_failure_jump_smart:
fa9a63c5 955 extract_number_and_incr (&mcnt, &p);
505bde11 956 printf ("/on_failure_jump_smart to %d", p + mcnt - start);
5e69f11e
RM
957 break;
958
25fe55af 959 case jump:
fa9a63c5 960 extract_number_and_incr (&mcnt, &p);
25fe55af 961 printf ("/jump to %d", p + mcnt - start);
fa9a63c5
RM
962 break;
963
25fe55af
RS
964 case succeed_n:
965 extract_number_and_incr (&mcnt, &p);
966 extract_number_and_incr (&mcnt2, &p);
99633e97 967 printf ("/succeed_n to %d, %d times", p - 2 + mcnt - start, mcnt2);
25fe55af 968 break;
5e69f11e 969
25fe55af
RS
970 case jump_n:
971 extract_number_and_incr (&mcnt, &p);
972 extract_number_and_incr (&mcnt2, &p);
99633e97 973 printf ("/jump_n to %d, %d times", p - 2 + mcnt - start, mcnt2);
25fe55af 974 break;
5e69f11e 975
25fe55af
RS
976 case set_number_at:
977 extract_number_and_incr (&mcnt, &p);
978 extract_number_and_incr (&mcnt2, &p);
99633e97 979 printf ("/set_number_at location %d to %d", p - 2 + mcnt - start, mcnt2);
25fe55af 980 break;
5e69f11e 981
25fe55af 982 case wordbound:
fa9a63c5
RM
983 printf ("/wordbound");
984 break;
985
986 case notwordbound:
987 printf ("/notwordbound");
25fe55af 988 break;
fa9a63c5
RM
989
990 case wordbeg:
991 printf ("/wordbeg");
992 break;
5e69f11e 993
fa9a63c5
RM
994 case wordend:
995 printf ("/wordend");
5e69f11e 996
1fb352e0
SM
997 case syntaxspec:
998 printf ("/syntaxspec");
999 mcnt = *p++;
1000 printf ("/%d", mcnt);
1001 break;
1002
1003 case notsyntaxspec:
1004 printf ("/notsyntaxspec");
1005 mcnt = *p++;
1006 printf ("/%d", mcnt);
1007 break;
1008
fa9a63c5
RM
1009#ifdef emacs
1010 case before_dot:
1011 printf ("/before_dot");
25fe55af 1012 break;
fa9a63c5
RM
1013
1014 case at_dot:
1015 printf ("/at_dot");
25fe55af 1016 break;
fa9a63c5
RM
1017
1018 case after_dot:
1019 printf ("/after_dot");
25fe55af 1020 break;
fa9a63c5 1021
1fb352e0
SM
1022 case categoryspec:
1023 printf ("/categoryspec");
fa9a63c5
RM
1024 mcnt = *p++;
1025 printf ("/%d", mcnt);
25fe55af 1026 break;
5e69f11e 1027
1fb352e0
SM
1028 case notcategoryspec:
1029 printf ("/notcategoryspec");
fa9a63c5
RM
1030 mcnt = *p++;
1031 printf ("/%d", mcnt);
1032 break;
1033#endif /* emacs */
1034
fa9a63c5
RM
1035 case begbuf:
1036 printf ("/begbuf");
25fe55af 1037 break;
fa9a63c5
RM
1038
1039 case endbuf:
1040 printf ("/endbuf");
25fe55af 1041 break;
fa9a63c5 1042
25fe55af
RS
1043 default:
1044 printf ("?%d", *(p-1));
fa9a63c5
RM
1045 }
1046
1047 putchar ('\n');
1048 }
1049
1050 printf ("%d:\tend of pattern.\n", p - start);
1051}
1052
1053
1054void
1055print_compiled_pattern (bufp)
1056 struct re_pattern_buffer *bufp;
1057{
1058 unsigned char *buffer = bufp->buffer;
1059
1060 print_partial_compiled_pattern (buffer, buffer + bufp->used);
505bde11 1061 printf ("%ld bytes used/%ld bytes allocated.\n", bufp->used, bufp->allocated);
fa9a63c5
RM
1062
1063 if (bufp->fastmap_accurate && bufp->fastmap)
1064 {
1065 printf ("fastmap: ");
1066 print_fastmap (bufp->fastmap);
1067 }
1068
1069 printf ("re_nsub: %d\t", bufp->re_nsub);
1070 printf ("regs_alloc: %d\t", bufp->regs_allocated);
1071 printf ("can_be_null: %d\t", bufp->can_be_null);
1072 printf ("newline_anchor: %d\n", bufp->newline_anchor);
1073 printf ("no_sub: %d\t", bufp->no_sub);
1074 printf ("not_bol: %d\t", bufp->not_bol);
1075 printf ("not_eol: %d\t", bufp->not_eol);
1076 printf ("syntax: %d\n", bufp->syntax);
505bde11 1077 fflush (stdout);
fa9a63c5
RM
1078 /* Perhaps we should print the translate table? */
1079}
1080
1081
1082void
1083print_double_string (where, string1, size1, string2, size2)
66f0296e
SM
1084 re_char *where;
1085 re_char *string1;
1086 re_char *string2;
fa9a63c5
RM
1087 int size1;
1088 int size2;
1089{
1090 unsigned this_char;
5e69f11e 1091
fa9a63c5
RM
1092 if (where == NULL)
1093 printf ("(null)");
1094 else
1095 {
1096 if (FIRST_STRING_P (where))
25fe55af
RS
1097 {
1098 for (this_char = where - string1; this_char < size1; this_char++)
1099 putchar (string1[this_char]);
fa9a63c5 1100
25fe55af
RS
1101 where = string2;
1102 }
fa9a63c5
RM
1103
1104 for (this_char = where - string2; this_char < size2; this_char++)
25fe55af 1105 putchar (string2[this_char]);
fa9a63c5
RM
1106 }
1107}
1108
1109#else /* not DEBUG */
1110
1111#undef assert
1112#define assert(e)
1113
1114#define DEBUG_STATEMENT(e)
1115#define DEBUG_PRINT1(x)
1116#define DEBUG_PRINT2(x1, x2)
1117#define DEBUG_PRINT3(x1, x2, x3)
1118#define DEBUG_PRINT4(x1, x2, x3, x4)
1119#define DEBUG_PRINT_COMPILED_PATTERN(p, s, e)
1120#define DEBUG_PRINT_DOUBLE_STRING(w, s1, sz1, s2, sz2)
1121
1122#endif /* not DEBUG */
1123\f
1124/* Set by `re_set_syntax' to the current regexp syntax to recognize. Can
1125 also be assigned to arbitrarily: each pattern buffer stores its own
1126 syntax, so it can be changed between regex compilations. */
1127/* This has no initializer because initialized variables in Emacs
1128 become read-only after dumping. */
1129reg_syntax_t re_syntax_options;
1130
1131
1132/* Specify the precise syntax of regexps for compilation. This provides
1133 for compatibility for various utilities which historically have
1134 different, incompatible syntaxes.
1135
1136 The argument SYNTAX is a bit mask comprised of the various bits
25fe55af 1137 defined in regex.h. We return the old syntax. */
fa9a63c5
RM
1138
1139reg_syntax_t
1140re_set_syntax (syntax)
1141 reg_syntax_t syntax;
1142{
1143 reg_syntax_t ret = re_syntax_options;
5e69f11e 1144
fa9a63c5
RM
1145 re_syntax_options = syntax;
1146 return ret;
1147}
1148\f
1149/* This table gives an error message for each of the error codes listed
25fe55af 1150 in regex.h. Obviously the order here has to be same as there.
fa9a63c5 1151 POSIX doesn't require that we do anything for REG_NOERROR,
25fe55af 1152 but why not be nice? */
fa9a63c5
RM
1153
1154static const char *re_error_msgid[] =
5e69f11e
RM
1155 {
1156 gettext_noop ("Success"), /* REG_NOERROR */
1157 gettext_noop ("No match"), /* REG_NOMATCH */
1158 gettext_noop ("Invalid regular expression"), /* REG_BADPAT */
1159 gettext_noop ("Invalid collation character"), /* REG_ECOLLATE */
1160 gettext_noop ("Invalid character class name"), /* REG_ECTYPE */
1161 gettext_noop ("Trailing backslash"), /* REG_EESCAPE */
1162 gettext_noop ("Invalid back reference"), /* REG_ESUBREG */
1163 gettext_noop ("Unmatched [ or [^"), /* REG_EBRACK */
1164 gettext_noop ("Unmatched ( or \\("), /* REG_EPAREN */
1165 gettext_noop ("Unmatched \\{"), /* REG_EBRACE */
1166 gettext_noop ("Invalid content of \\{\\}"), /* REG_BADBR */
1167 gettext_noop ("Invalid range end"), /* REG_ERANGE */
1168 gettext_noop ("Memory exhausted"), /* REG_ESPACE */
1169 gettext_noop ("Invalid preceding regular expression"), /* REG_BADRPT */
1170 gettext_noop ("Premature end of regular expression"), /* REG_EEND */
1171 gettext_noop ("Regular expression too big"), /* REG_ESIZE */
1172 gettext_noop ("Unmatched ) or \\)"), /* REG_ERPAREN */
fa9a63c5
RM
1173 };
1174\f
25fe55af 1175/* Avoiding alloca during matching, to placate r_alloc. */
fa9a63c5
RM
1176
1177/* Define MATCH_MAY_ALLOCATE unless we need to make sure that the
1178 searching and matching functions should not call alloca. On some
1179 systems, alloca is implemented in terms of malloc, and if we're
1180 using the relocating allocator routines, then malloc could cause a
1181 relocation, which might (if the strings being searched are in the
1182 ralloc heap) shift the data out from underneath the regexp
1183 routines.
1184
5e69f11e 1185 Here's another reason to avoid allocation: Emacs
fa9a63c5
RM
1186 processes input from X in a signal handler; processing X input may
1187 call malloc; if input arrives while a matching routine is calling
1188 malloc, then we're scrod. But Emacs can't just block input while
1189 calling matching routines; then we don't notice interrupts when
1190 they come in. So, Emacs blocks input around all regexp calls
1191 except the matching calls, which it leaves unprotected, in the
1192 faith that they will not malloc. */
1193
1194/* Normally, this is fine. */
1195#define MATCH_MAY_ALLOCATE
1196
1197/* When using GNU C, we are not REALLY using the C alloca, no matter
1198 what config.h may say. So don't take precautions for it. */
1199#ifdef __GNUC__
1200#undef C_ALLOCA
1201#endif
1202
1203/* The match routines may not allocate if (1) they would do it with malloc
1204 and (2) it's not safe for them to use malloc.
1205 Note that if REL_ALLOC is defined, matching would not use malloc for the
1206 failure stack, but we would still use it for the register vectors;
25fe55af 1207 so REL_ALLOC should not affect this. */
fa9a63c5
RM
1208#if (defined (C_ALLOCA) || defined (REGEX_MALLOC)) && defined (emacs)
1209#undef MATCH_MAY_ALLOCATE
1210#endif
1211
1212\f
1213/* Failure stack declarations and macros; both re_compile_fastmap and
1214 re_match_2 use a failure stack. These have to be macros because of
1215 REGEX_ALLOCATE_STACK. */
5e69f11e 1216
fa9a63c5 1217
320a2a73 1218/* Approximate number of failure points for which to initially allocate space
fa9a63c5
RM
1219 when matching. If this number is exceeded, we allocate more
1220 space, so it is not a hard limit. */
1221#ifndef INIT_FAILURE_ALLOC
320a2a73 1222#define INIT_FAILURE_ALLOC 20
fa9a63c5
RM
1223#endif
1224
1225/* Roughly the maximum number of failure points on the stack. Would be
320a2a73 1226 exactly that if always used TYPICAL_FAILURE_SIZE items each time we failed.
fa9a63c5 1227 This is a variable only so users of regex can assign to it; we never
25fe55af 1228 change it ourselves. */
fa9a63c5 1229#if defined (MATCH_MAY_ALLOCATE)
320a2a73
KH
1230/* Note that 4400 is enough to cause a crash on Alpha OSF/1,
1231 whose default stack limit is 2mb. In order for a larger
1232 value to work reliably, you have to try to make it accord
1233 with the process stack limit. */
1234int re_max_failures = 40000;
fa9a63c5 1235#else
320a2a73 1236int re_max_failures = 4000;
fa9a63c5
RM
1237#endif
1238
1239union fail_stack_elt
1240{
66f0296e 1241 const unsigned char *pointer;
505bde11 1242 unsigned int integer;
fa9a63c5
RM
1243};
1244
1245typedef union fail_stack_elt fail_stack_elt_t;
1246
1247typedef struct
1248{
1249 fail_stack_elt_t *stack;
1250 unsigned size;
505bde11
SM
1251 unsigned avail; /* Offset of next open position. */
1252 unsigned frame; /* Offset of the cur constructed frame. */
fa9a63c5
RM
1253} fail_stack_type;
1254
505bde11
SM
1255#define PATTERN_STACK_EMPTY() (fail_stack.avail == 0)
1256#define FAIL_STACK_EMPTY() (fail_stack.frame == 0)
fa9a63c5
RM
1257#define FAIL_STACK_FULL() (fail_stack.avail == fail_stack.size)
1258
1259
1260/* Define macros to initialize and free the failure stack.
1261 Do `return -2' if the alloc fails. */
1262
1263#ifdef MATCH_MAY_ALLOCATE
1264#define INIT_FAIL_STACK() \
1265 do { \
1266 fail_stack.stack = (fail_stack_elt_t *) \
320a2a73
KH
1267 REGEX_ALLOCATE_STACK (INIT_FAILURE_ALLOC * TYPICAL_FAILURE_SIZE \
1268 * sizeof (fail_stack_elt_t)); \
fa9a63c5
RM
1269 \
1270 if (fail_stack.stack == NULL) \
1271 return -2; \
1272 \
1273 fail_stack.size = INIT_FAILURE_ALLOC; \
1274 fail_stack.avail = 0; \
505bde11 1275 fail_stack.frame = 0; \
fa9a63c5
RM
1276 } while (0)
1277
1278#define RESET_FAIL_STACK() REGEX_FREE_STACK (fail_stack.stack)
1279#else
1280#define INIT_FAIL_STACK() \
1281 do { \
1282 fail_stack.avail = 0; \
505bde11 1283 fail_stack.frame = 0; \
fa9a63c5
RM
1284 } while (0)
1285
1286#define RESET_FAIL_STACK()
1287#endif
1288
1289
320a2a73
KH
1290/* Double the size of FAIL_STACK, up to a limit
1291 which allows approximately `re_max_failures' items.
fa9a63c5
RM
1292
1293 Return 1 if succeeds, and 0 if either ran out of memory
5e69f11e
RM
1294 allocating space for it or it was already too large.
1295
25fe55af 1296 REGEX_REALLOCATE_STACK requires `destination' be declared. */
fa9a63c5 1297
320a2a73
KH
1298/* Factor to increase the failure stack size by
1299 when we increase it.
1300 This used to be 2, but 2 was too wasteful
1301 because the old discarded stacks added up to as much space
1302 were as ultimate, maximum-size stack. */
1303#define FAIL_STACK_GROWTH_FACTOR 4
1304
1305#define GROW_FAIL_STACK(fail_stack) \
eead07d6
KH
1306 (((fail_stack).size * sizeof (fail_stack_elt_t) \
1307 >= re_max_failures * TYPICAL_FAILURE_SIZE) \
fa9a63c5 1308 ? 0 \
320a2a73
KH
1309 : ((fail_stack).stack \
1310 = (fail_stack_elt_t *) \
25fe55af
RS
1311 REGEX_REALLOCATE_STACK ((fail_stack).stack, \
1312 (fail_stack).size * sizeof (fail_stack_elt_t), \
320a2a73
KH
1313 MIN (re_max_failures * TYPICAL_FAILURE_SIZE, \
1314 ((fail_stack).size * sizeof (fail_stack_elt_t) \
1315 * FAIL_STACK_GROWTH_FACTOR))), \
fa9a63c5
RM
1316 \
1317 (fail_stack).stack == NULL \
1318 ? 0 \
6453db45
KH
1319 : ((fail_stack).size \
1320 = (MIN (re_max_failures * TYPICAL_FAILURE_SIZE, \
1321 ((fail_stack).size * sizeof (fail_stack_elt_t) \
1322 * FAIL_STACK_GROWTH_FACTOR)) \
1323 / sizeof (fail_stack_elt_t)), \
25fe55af 1324 1)))
fa9a63c5
RM
1325
1326
5e69f11e 1327/* Push pointer POINTER on FAIL_STACK.
fa9a63c5
RM
1328 Return 1 if was able to do so and 0 if ran out of memory allocating
1329 space to do so. */
1330#define PUSH_PATTERN_OP(POINTER, FAIL_STACK) \
1331 ((FAIL_STACK_FULL () \
320a2a73 1332 && !GROW_FAIL_STACK (FAIL_STACK)) \
fa9a63c5
RM
1333 ? 0 \
1334 : ((FAIL_STACK).stack[(FAIL_STACK).avail++].pointer = POINTER, \
1335 1))
505bde11 1336#define POP_PATTERN_OP() POP_FAILURE_POINTER ()
fa9a63c5
RM
1337
1338/* Push a pointer value onto the failure stack.
1339 Assumes the variable `fail_stack'. Probably should only
25fe55af 1340 be called from within `PUSH_FAILURE_POINT'. */
fa9a63c5
RM
1341#define PUSH_FAILURE_POINTER(item) \
1342 fail_stack.stack[fail_stack.avail++].pointer = (unsigned char *) (item)
1343
1344/* This pushes an integer-valued item onto the failure stack.
1345 Assumes the variable `fail_stack'. Probably should only
25fe55af 1346 be called from within `PUSH_FAILURE_POINT'. */
fa9a63c5
RM
1347#define PUSH_FAILURE_INT(item) \
1348 fail_stack.stack[fail_stack.avail++].integer = (item)
1349
1350/* Push a fail_stack_elt_t value onto the failure stack.
1351 Assumes the variable `fail_stack'. Probably should only
25fe55af 1352 be called from within `PUSH_FAILURE_POINT'. */
fa9a63c5
RM
1353#define PUSH_FAILURE_ELT(item) \
1354 fail_stack.stack[fail_stack.avail++] = (item)
1355
1356/* These three POP... operations complement the three PUSH... operations.
1357 All assume that `fail_stack' is nonempty. */
1358#define POP_FAILURE_POINTER() fail_stack.stack[--fail_stack.avail].pointer
1359#define POP_FAILURE_INT() fail_stack.stack[--fail_stack.avail].integer
1360#define POP_FAILURE_ELT() fail_stack.stack[--fail_stack.avail]
1361
505bde11
SM
1362/* Individual items aside from the registers. */
1363#define NUM_NONREG_ITEMS 3
1364
1365/* Used to examine the stack (to detect infinite loops). */
1366#define FAILURE_PAT(h) fail_stack.stack[(h) - 1].pointer
66f0296e 1367#define FAILURE_STR(h) (fail_stack.stack[(h) - 2].pointer)
505bde11
SM
1368#define NEXT_FAILURE_HANDLE(h) fail_stack.stack[(h) - 3].integer
1369#define TOP_FAILURE_HANDLE() fail_stack.frame
fa9a63c5
RM
1370
1371
505bde11
SM
1372#define ENSURE_FAIL_STACK(space) \
1373while (REMAINING_AVAIL_SLOTS <= space) { \
1374 if (!GROW_FAIL_STACK (fail_stack)) \
1375 return -2; \
1376 DEBUG_PRINT2 ("\n Doubled stack; size now: %d\n", (fail_stack).size);\
1377 DEBUG_PRINT2 (" slots available: %d\n", REMAINING_AVAIL_SLOTS);\
1378}
1379
1380/* Push register NUM onto the stack. */
1381#define PUSH_FAILURE_REG(num) \
1382do { \
1383 char *destination; \
1384 ENSURE_FAIL_STACK(3); \
1385 DEBUG_PRINT4 (" Push reg %d (spanning %p -> %p)\n", \
1386 num, regstart[num], regend[num]); \
1387 PUSH_FAILURE_POINTER (regstart[num]); \
1388 PUSH_FAILURE_POINTER (regend[num]); \
1389 PUSH_FAILURE_INT (num); \
1390} while (0)
1391
1392/* Pop a saved register off the stack. */
1393#define POP_FAILURE_REG() \
1394do { \
1395 int reg = POP_FAILURE_INT (); \
1396 regend[reg] = POP_FAILURE_POINTER (); \
1397 regstart[reg] = POP_FAILURE_POINTER (); \
1398 DEBUG_PRINT4 (" Pop reg %d (spanning %p -> %p)\n", \
1399 reg, regstart[reg], regend[reg]); \
1400} while (0)
1401
1402/* Check that we are not stuck in an infinite loop. */
1403#define CHECK_INFINITE_LOOP(pat_cur, string_place) \
1404do { \
1405 int failure = TOP_FAILURE_HANDLE(); \
1406 /* Check for infinite matching loops */ \
1407 while (failure > 0 && \
1408 (FAILURE_STR (failure) == string_place \
1409 || FAILURE_STR (failure) == NULL)) \
1410 { \
1411 assert (FAILURE_PAT (failure) >= bufp->buffer \
66f0296e 1412 && FAILURE_PAT (failure) <= bufp->buffer + bufp->used); \
505bde11
SM
1413 if (FAILURE_PAT (failure) == pat_cur) \
1414 goto fail; \
66f0296e 1415 DEBUG_PRINT2 (" Other pattern: %p\n", FAILURE_PAT (failure)); \
505bde11
SM
1416 failure = NEXT_FAILURE_HANDLE(failure); \
1417 } \
1418 DEBUG_PRINT2 (" Other string: %p\n", FAILURE_STR (failure)); \
1419} while (0)
1420
fa9a63c5 1421/* Push the information about the state we will need
5e69f11e
RM
1422 if we ever fail back to it.
1423
505bde11 1424 Requires variables fail_stack, regstart, regend and
320a2a73 1425 num_regs be declared. GROW_FAIL_STACK requires `destination' be
fa9a63c5 1426 declared.
5e69f11e 1427
fa9a63c5
RM
1428 Does `return FAILURE_CODE' if runs out of memory. */
1429
505bde11
SM
1430#define PUSH_FAILURE_POINT(pattern, string_place) \
1431do { \
1432 char *destination; \
1433 /* Must be int, so when we don't save any registers, the arithmetic \
1434 of 0 + -1 isn't done as unsigned. */ \
1435 \
1436 DEBUG_STATEMENT (failure_id++); \
1437 DEBUG_STATEMENT (nfailure_points_pushed++); \
1438 DEBUG_PRINT2 ("\nPUSH_FAILURE_POINT #%u:\n", failure_id); \
1439 DEBUG_PRINT2 (" Before push, next avail: %d\n", (fail_stack).avail); \
1440 DEBUG_PRINT2 (" size: %d\n", (fail_stack).size);\
1441 \
1442 ENSURE_FAIL_STACK (NUM_NONREG_ITEMS); \
1443 \
1444 DEBUG_PRINT1 ("\n"); \
1445 \
1446 DEBUG_PRINT2 (" Push frame index: %d\n", fail_stack.frame); \
1447 PUSH_FAILURE_INT (fail_stack.frame); \
1448 \
1449 DEBUG_PRINT2 (" Push string %p: `", string_place); \
1450 DEBUG_PRINT_DOUBLE_STRING (string_place, string1, size1, string2, size2);\
1451 DEBUG_PRINT1 ("'\n"); \
1452 PUSH_FAILURE_POINTER (string_place); \
1453 \
1454 DEBUG_PRINT2 (" Push pattern %p: ", pattern); \
1455 DEBUG_PRINT_COMPILED_PATTERN (bufp, pattern, pend); \
1456 PUSH_FAILURE_POINTER (pattern); \
1457 \
1458 /* Close the frame by moving the frame pointer past it. */ \
1459 fail_stack.frame = fail_stack.avail; \
1460} while (0)
fa9a63c5 1461
320a2a73
KH
1462/* Estimate the size of data pushed by a typical failure stack entry.
1463 An estimate is all we need, because all we use this for
1464 is to choose a limit for how big to make the failure stack. */
1465
1466#define TYPICAL_FAILURE_SIZE 20
fa9a63c5 1467
fa9a63c5
RM
1468/* How many items can still be added to the stack without overflowing it. */
1469#define REMAINING_AVAIL_SLOTS ((fail_stack).size - (fail_stack).avail)
1470
1471
1472/* Pops what PUSH_FAIL_STACK pushes.
1473
1474 We restore into the parameters, all of which should be lvalues:
1475 STR -- the saved data position.
1476 PAT -- the saved pattern position.
fa9a63c5 1477 REGSTART, REGEND -- arrays of string positions.
5e69f11e 1478
fa9a63c5 1479 Also assumes the variables `fail_stack' and (if debugging), `bufp',
25fe55af 1480 `pend', `string1', `size1', `string2', and `size2'. */
fa9a63c5 1481
505bde11
SM
1482#define POP_FAILURE_POINT(str, pat) \
1483do { \
fa9a63c5
RM
1484 assert (!FAIL_STACK_EMPTY ()); \
1485 \
1486 /* Remove failure points and point to how many regs pushed. */ \
1487 DEBUG_PRINT1 ("POP_FAILURE_POINT:\n"); \
1488 DEBUG_PRINT2 (" Before pop, next avail: %d\n", fail_stack.avail); \
25fe55af 1489 DEBUG_PRINT2 (" size: %d\n", fail_stack.size); \
fa9a63c5 1490 \
505bde11
SM
1491 /* Pop the saved registers. */ \
1492 while (fail_stack.frame < fail_stack.avail) \
1493 POP_FAILURE_REG (); \
fa9a63c5 1494 \
505bde11
SM
1495 pat = (unsigned char *) POP_FAILURE_POINTER (); \
1496 DEBUG_PRINT2 (" Popping pattern %p: ", pat); \
1497 DEBUG_PRINT_COMPILED_PATTERN (bufp, pat, pend); \
fa9a63c5
RM
1498 \
1499 /* If the saved string location is NULL, it came from an \
1500 on_failure_keep_string_jump opcode, and we want to throw away the \
1501 saved NULL, thus retaining our current position in the string. */ \
66f0296e 1502 str = (re_char *) POP_FAILURE_POINTER (); \
505bde11 1503 DEBUG_PRINT2 (" Popping string %p: `", str); \
fa9a63c5
RM
1504 DEBUG_PRINT_DOUBLE_STRING (str, string1, size1, string2, size2); \
1505 DEBUG_PRINT1 ("'\n"); \
1506 \
505bde11
SM
1507 fail_stack.frame = POP_FAILURE_INT (); \
1508 DEBUG_PRINT2 (" Popping frame index: %d\n", fail_stack.frame); \
fa9a63c5 1509 \
505bde11
SM
1510 assert (fail_stack.avail >= 0); \
1511 assert (fail_stack.frame <= fail_stack.avail); \
fa9a63c5 1512 \
fa9a63c5 1513 DEBUG_STATEMENT (nfailure_points_popped++); \
505bde11 1514} while (0) /* POP_FAILURE_POINT */
fa9a63c5
RM
1515
1516
1517\f
fa9a63c5 1518/* Registers are set to a sentinel when they haven't yet matched. */
505bde11 1519#define REG_UNSET_VALUE NULL
fa9a63c5
RM
1520#define REG_UNSET(e) ((e) == REG_UNSET_VALUE)
1521\f
1522/* Subroutine declarations and macros for regex_compile. */
1523
99633e97
SM
1524static void store_op1 _RE_ARGS((re_opcode_t op, unsigned char *loc, int arg));
1525static void store_op2 _RE_ARGS((re_opcode_t op, unsigned char *loc,
1526 int arg1, int arg2));
1527static void insert_op1 _RE_ARGS((re_opcode_t op, unsigned char *loc,
1528 int arg, unsigned char *end));
1529static void insert_op2 _RE_ARGS((re_opcode_t op, unsigned char *loc,
1530 int arg1, int arg2, unsigned char *end));
1531static boolean at_begline_loc_p _RE_ARGS((const unsigned char *pattern,
1532 const unsigned char *p,
1533 reg_syntax_t syntax));
1534static boolean at_endline_loc_p _RE_ARGS((const unsigned char *p,
1535 const unsigned char *pend,
1536 reg_syntax_t syntax));
4e8a9132 1537static unsigned char *skip_one_char _RE_ARGS((unsigned char *p));
fa9a63c5 1538
5e69f11e 1539/* Fetch the next character in the uncompiled pattern---translating it
fa9a63c5
RM
1540 if necessary. Also cast from a signed character in the constant
1541 string passed to us by the user to an unsigned char that we can use
1542 as an array index (in, e.g., `translate'). */
6676cb1c 1543#ifndef PATFETCH
fa9a63c5 1544#define PATFETCH(c) \
99633e97
SM
1545 do { \
1546 PATFETCH_RAW (c); \
28703c16 1547 if (RE_TRANSLATE_P (translate)) c = RE_TRANSLATE (translate, c); \
fa9a63c5 1548 } while (0)
6676cb1c 1549#endif
fa9a63c5
RM
1550
1551/* Fetch the next character in the uncompiled pattern, with no
25fe55af 1552 translation. */
fa9a63c5
RM
1553#define PATFETCH_RAW(c) \
1554 do {if (p == pend) return REG_EEND; \
66f0296e 1555 c = *p++; \
fa9a63c5
RM
1556 } while (0)
1557
1558/* Go backwards one character in the pattern. */
1559#define PATUNFETCH p--
1560
1561
1562/* If `translate' is non-null, return translate[D], else just D. We
1563 cast the subscript to translate because some data is declared as
1564 `char *', to avoid warnings when a string constant is passed. But
1565 when we use a character as a subscript we must make it unsigned. */
6676cb1c
RS
1566#ifndef TRANSLATE
1567#define TRANSLATE(d) \
66f0296e 1568 (RE_TRANSLATE_P (translate) ? RE_TRANSLATE (translate, (d)) : (d))
6676cb1c 1569#endif
fa9a63c5
RM
1570
1571
1572/* Macros for outputting the compiled pattern into `buffer'. */
1573
1574/* If the buffer isn't allocated when it comes in, use this. */
1575#define INIT_BUF_SIZE 32
1576
25fe55af 1577/* Make sure we have at least N more bytes of space in buffer. */
fa9a63c5
RM
1578#define GET_BUFFER_SPACE(n) \
1579 while (b - bufp->buffer + (n) > bufp->allocated) \
1580 EXTEND_BUFFER ()
1581
1582/* Make sure we have one more byte of buffer space and then add C to it. */
1583#define BUF_PUSH(c) \
1584 do { \
1585 GET_BUFFER_SPACE (1); \
1586 *b++ = (unsigned char) (c); \
1587 } while (0)
1588
1589
1590/* Ensure we have two more bytes of buffer space and then append C1 and C2. */
1591#define BUF_PUSH_2(c1, c2) \
1592 do { \
1593 GET_BUFFER_SPACE (2); \
1594 *b++ = (unsigned char) (c1); \
1595 *b++ = (unsigned char) (c2); \
1596 } while (0)
1597
1598
25fe55af 1599/* As with BUF_PUSH_2, except for three bytes. */
fa9a63c5
RM
1600#define BUF_PUSH_3(c1, c2, c3) \
1601 do { \
1602 GET_BUFFER_SPACE (3); \
1603 *b++ = (unsigned char) (c1); \
1604 *b++ = (unsigned char) (c2); \
1605 *b++ = (unsigned char) (c3); \
1606 } while (0)
1607
1608
1609/* Store a jump with opcode OP at LOC to location TO. We store a
25fe55af 1610 relative address offset by the three bytes the jump itself occupies. */
fa9a63c5
RM
1611#define STORE_JUMP(op, loc, to) \
1612 store_op1 (op, loc, (to) - (loc) - 3)
1613
1614/* Likewise, for a two-argument jump. */
1615#define STORE_JUMP2(op, loc, to, arg) \
1616 store_op2 (op, loc, (to) - (loc) - 3, arg)
1617
25fe55af 1618/* Like `STORE_JUMP', but for inserting. Assume `b' is the buffer end. */
fa9a63c5
RM
1619#define INSERT_JUMP(op, loc, to) \
1620 insert_op1 (op, loc, (to) - (loc) - 3, b)
1621
1622/* Like `STORE_JUMP2', but for inserting. Assume `b' is the buffer end. */
1623#define INSERT_JUMP2(op, loc, to, arg) \
1624 insert_op2 (op, loc, (to) - (loc) - 3, arg, b)
1625
1626
1627/* This is not an arbitrary limit: the arguments which represent offsets
25fe55af 1628 into the pattern are two bytes long. So if 2^16 bytes turns out to
fa9a63c5
RM
1629 be too small, many things would have to change. */
1630#define MAX_BUF_SIZE (1L << 16)
1631
1632
1633/* Extend the buffer by twice its current size via realloc and
1634 reset the pointers that pointed into the old block to point to the
1635 correct places in the new one. If extending the buffer results in it
25fe55af 1636 being larger than MAX_BUF_SIZE, then flag memory exhausted. */
fa9a63c5 1637#define EXTEND_BUFFER() \
25fe55af 1638 do { \
fa9a63c5 1639 unsigned char *old_buffer = bufp->buffer; \
25fe55af 1640 if (bufp->allocated == MAX_BUF_SIZE) \
fa9a63c5
RM
1641 return REG_ESIZE; \
1642 bufp->allocated <<= 1; \
1643 if (bufp->allocated > MAX_BUF_SIZE) \
25fe55af 1644 bufp->allocated = MAX_BUF_SIZE; \
fa9a63c5
RM
1645 bufp->buffer = (unsigned char *) realloc (bufp->buffer, bufp->allocated);\
1646 if (bufp->buffer == NULL) \
1647 return REG_ESPACE; \
1648 /* If the buffer moved, move all the pointers into it. */ \
1649 if (old_buffer != bufp->buffer) \
1650 { \
25fe55af
RS
1651 b = (b - old_buffer) + bufp->buffer; \
1652 begalt = (begalt - old_buffer) + bufp->buffer; \
1653 if (fixup_alt_jump) \
1654 fixup_alt_jump = (fixup_alt_jump - old_buffer) + bufp->buffer;\
1655 if (laststart) \
1656 laststart = (laststart - old_buffer) + bufp->buffer; \
1657 if (pending_exact) \
1658 pending_exact = (pending_exact - old_buffer) + bufp->buffer; \
fa9a63c5
RM
1659 } \
1660 } while (0)
1661
1662
1663/* Since we have one byte reserved for the register number argument to
1664 {start,stop}_memory, the maximum number of groups we can report
1665 things about is what fits in that byte. */
1666#define MAX_REGNUM 255
1667
1668/* But patterns can have more than `MAX_REGNUM' registers. We just
1669 ignore the excess. */
1670typedef unsigned regnum_t;
1671
1672
1673/* Macros for the compile stack. */
1674
1675/* Since offsets can go either forwards or backwards, this type needs to
25fe55af 1676 be able to hold values from -(MAX_BUF_SIZE - 1) to MAX_BUF_SIZE - 1. */
fa9a63c5
RM
1677typedef int pattern_offset_t;
1678
1679typedef struct
1680{
1681 pattern_offset_t begalt_offset;
1682 pattern_offset_t fixup_alt_jump;
5e69f11e 1683 pattern_offset_t laststart_offset;
fa9a63c5
RM
1684 regnum_t regnum;
1685} compile_stack_elt_t;
1686
1687
1688typedef struct
1689{
1690 compile_stack_elt_t *stack;
1691 unsigned size;
1692 unsigned avail; /* Offset of next open position. */
1693} compile_stack_type;
1694
1695
1696#define INIT_COMPILE_STACK_SIZE 32
1697
1698#define COMPILE_STACK_EMPTY (compile_stack.avail == 0)
1699#define COMPILE_STACK_FULL (compile_stack.avail == compile_stack.size)
1700
25fe55af 1701/* The next available element. */
fa9a63c5
RM
1702#define COMPILE_STACK_TOP (compile_stack.stack[compile_stack.avail])
1703
1704
b18215fc
RS
1705/* Structure to manage work area for range table. */
1706struct range_table_work_area
1707{
1708 int *table; /* actual work area. */
1709 int allocated; /* allocated size for work area in bytes. */
25fe55af 1710 int used; /* actually used size in words. */
96cc36cc 1711 int bits; /* flag to record character classes */
b18215fc
RS
1712};
1713
1714/* Make sure that WORK_AREA can hold more N multibyte characters. */
1715#define EXTEND_RANGE_TABLE_WORK_AREA(work_area, n) \
1716 do { \
1717 if (((work_area).used + (n)) * sizeof (int) > (work_area).allocated) \
1718 { \
1719 (work_area).allocated += 16 * sizeof (int); \
1720 if ((work_area).table) \
1721 (work_area).table \
1722 = (int *) realloc ((work_area).table, (work_area).allocated); \
1723 else \
1724 (work_area).table \
1725 = (int *) malloc ((work_area).allocated); \
1726 if ((work_area).table == 0) \
1727 FREE_STACK_RETURN (REG_ESPACE); \
1728 } \
1729 } while (0)
1730
96cc36cc
RS
1731#define SET_RANGE_TABLE_WORK_AREA_BIT(work_area, bit) \
1732 (work_area).bits |= (bit)
1733
1734/* These bits represent the various character classes such as [:alnum:]
1735 in a charset's range table. */
1736#define BIT_ALNUM 0x1
1737#define BIT_ALPHA 0x2
1738#define BIT_WORD 0x4
f71b19b6
DL
1739#define BIT_ASCII 0x8
1740#define BIT_NONASCII 0x10
96cc36cc
RS
1741#define BIT_GRAPH 0x20
1742#define BIT_LOWER 0x40
1743#define BIT_PRINT 0x80
1744#define BIT_PUNCT 0x100
1745#define BIT_SPACE 0x200
1746#define BIT_UPPER 0x400
f71b19b6
DL
1747#define BIT_UNIBYTE 0x800
1748#define BIT_MULTIBYTE 0x1000
96cc36cc 1749
b18215fc
RS
1750/* Set a range (RANGE_START, RANGE_END) to WORK_AREA. */
1751#define SET_RANGE_TABLE_WORK_AREA(work_area, range_start, range_end) \
1752 do { \
1753 EXTEND_RANGE_TABLE_WORK_AREA ((work_area), 2); \
1754 (work_area).table[(work_area).used++] = (range_start); \
1755 (work_area).table[(work_area).used++] = (range_end); \
1756 } while (0)
1757
25fe55af 1758/* Free allocated memory for WORK_AREA. */
b18215fc
RS
1759#define FREE_RANGE_TABLE_WORK_AREA(work_area) \
1760 do { \
1761 if ((work_area).table) \
1762 free ((work_area).table); \
1763 } while (0)
1764
96cc36cc 1765#define CLEAR_RANGE_TABLE_WORK_USED(work_area) ((work_area).used = 0, (work_area).bits = 0)
b18215fc 1766#define RANGE_TABLE_WORK_USED(work_area) ((work_area).used)
96cc36cc 1767#define RANGE_TABLE_WORK_BITS(work_area) ((work_area).bits)
b18215fc
RS
1768#define RANGE_TABLE_WORK_ELT(work_area, i) ((work_area).table[i])
1769
1770
fa9a63c5 1771/* Set the bit for character C in a list. */
25fe55af
RS
1772#define SET_LIST_BIT(c) \
1773 (b[((unsigned char) (c)) / BYTEWIDTH] \
fa9a63c5
RM
1774 |= 1 << (((unsigned char) c) % BYTEWIDTH))
1775
1776
1777/* Get the next unsigned number in the uncompiled pattern. */
25fe55af 1778#define GET_UNSIGNED_NUMBER(num) \
99633e97 1779 do { if (p != pend) \
fa9a63c5 1780 { \
25fe55af
RS
1781 PATFETCH (c); \
1782 while (ISDIGIT (c)) \
1783 { \
1784 if (num < 0) \
1785 num = 0; \
1786 num = num * 10 + c - '0'; \
1787 if (p == pend) \
1788 break; \
1789 PATFETCH (c); \
1790 } \
1791 } \
99633e97 1792 } while (0)
fa9a63c5
RM
1793
1794#define CHAR_CLASS_MAX_LENGTH 6 /* Namely, `xdigit'. */
1795
1796#define IS_CHAR_CLASS(string) \
1797 (STREQ (string, "alpha") || STREQ (string, "upper") \
1798 || STREQ (string, "lower") || STREQ (string, "digit") \
1799 || STREQ (string, "alnum") || STREQ (string, "xdigit") \
1800 || STREQ (string, "space") || STREQ (string, "print") \
1801 || STREQ (string, "punct") || STREQ (string, "graph") \
96cc36cc 1802 || STREQ (string, "cntrl") || STREQ (string, "blank") \
f71b19b6
DL
1803 || STREQ (string, "word") \
1804 || STREQ (string, "ascii") || STREQ (string, "nonascii") \
1805 || STREQ (string, "unibyte") || STREQ (string, "multibyte"))
99633e97
SM
1806
1807/* QUIT is only used on NTemacs. */
1808#if !defined (WINDOWSNT) || !defined (emacs)
1809#undef QUIT
1810#define QUIT
1811#endif
fa9a63c5
RM
1812\f
1813#ifndef MATCH_MAY_ALLOCATE
1814
1815/* If we cannot allocate large objects within re_match_2_internal,
1816 we make the fail stack and register vectors global.
1817 The fail stack, we grow to the maximum size when a regexp
1818 is compiled.
1819 The register vectors, we adjust in size each time we
1820 compile a regexp, according to the number of registers it needs. */
1821
1822static fail_stack_type fail_stack;
1823
1824/* Size with which the following vectors are currently allocated.
1825 That is so we can make them bigger as needed,
25fe55af 1826 but never make them smaller. */
fa9a63c5
RM
1827static int regs_allocated_size;
1828
66f0296e
SM
1829static re_char ** regstart, ** regend;
1830static re_char **best_regstart, **best_regend;
fa9a63c5
RM
1831
1832/* Make the register vectors big enough for NUM_REGS registers,
25fe55af 1833 but don't make them smaller. */
fa9a63c5
RM
1834
1835static
1836regex_grow_registers (num_regs)
1837 int num_regs;
1838{
1839 if (num_regs > regs_allocated_size)
1840 {
66f0296e
SM
1841 RETALLOC_IF (regstart, num_regs, re_char *);
1842 RETALLOC_IF (regend, num_regs, re_char *);
1843 RETALLOC_IF (best_regstart, num_regs, re_char *);
1844 RETALLOC_IF (best_regend, num_regs, re_char *);
fa9a63c5
RM
1845
1846 regs_allocated_size = num_regs;
1847 }
1848}
1849
1850#endif /* not MATCH_MAY_ALLOCATE */
1851\f
99633e97
SM
1852static boolean group_in_compile_stack _RE_ARGS ((compile_stack_type
1853 compile_stack,
1854 regnum_t regnum));
1855
fa9a63c5
RM
1856/* `regex_compile' compiles PATTERN (of length SIZE) according to SYNTAX.
1857 Returns one of error codes defined in `regex.h', or zero for success.
1858
1859 Assumes the `allocated' (and perhaps `buffer') and `translate'
1860 fields are set in BUFP on entry.
1861
1862 If it succeeds, results are put in BUFP (if it returns an error, the
1863 contents of BUFP are undefined):
1864 `buffer' is the compiled pattern;
1865 `syntax' is set to SYNTAX;
1866 `used' is set to the length of the compiled pattern;
1867 `fastmap_accurate' is zero;
1868 `re_nsub' is the number of subexpressions in PATTERN;
1869 `not_bol' and `not_eol' are zero;
5e69f11e 1870
fa9a63c5
RM
1871 The `fastmap' and `newline_anchor' fields are neither
1872 examined nor set. */
1873
505bde11
SM
1874/* Insert the `jump' from the end of last alternative to "here".
1875 The space for the jump has already been allocated. */
1876#define FIXUP_ALT_JUMP() \
1877do { \
1878 if (fixup_alt_jump) \
1879 STORE_JUMP (jump, fixup_alt_jump, b); \
1880} while (0)
1881
1882
fa9a63c5
RM
1883/* Return, freeing storage we allocated. */
1884#define FREE_STACK_RETURN(value) \
b18215fc
RS
1885 do { \
1886 FREE_RANGE_TABLE_WORK_AREA (range_table_work); \
1887 free (compile_stack.stack); \
1888 return value; \
1889 } while (0)
fa9a63c5
RM
1890
1891static reg_errcode_t
1892regex_compile (pattern, size, syntax, bufp)
66f0296e 1893 re_char *pattern;
fa9a63c5
RM
1894 int size;
1895 reg_syntax_t syntax;
1896 struct re_pattern_buffer *bufp;
1897{
1898 /* We fetch characters from PATTERN here. Even though PATTERN is
1899 `char *' (i.e., signed), we declare these variables as unsigned, so
1900 they can be reliably used as array indices. */
b18215fc 1901 register unsigned int c, c1;
5e69f11e 1902
fa9a63c5 1903 /* A random temporary spot in PATTERN. */
66f0296e 1904 re_char *p1;
fa9a63c5
RM
1905
1906 /* Points to the end of the buffer, where we should append. */
1907 register unsigned char *b;
5e69f11e 1908
fa9a63c5
RM
1909 /* Keeps track of unclosed groups. */
1910 compile_stack_type compile_stack;
1911
1912 /* Points to the current (ending) position in the pattern. */
22336245
RS
1913#ifdef AIX
1914 /* `const' makes AIX compiler fail. */
66f0296e 1915 unsigned char *p = pattern;
22336245 1916#else
66f0296e 1917 re_char *p = pattern;
22336245 1918#endif
66f0296e 1919 re_char *pend = pattern + size;
5e69f11e 1920
fa9a63c5 1921 /* How to translate the characters in the pattern. */
6676cb1c 1922 RE_TRANSLATE_TYPE translate = bufp->translate;
fa9a63c5
RM
1923
1924 /* Address of the count-byte of the most recently inserted `exactn'
1925 command. This makes it possible to tell if a new exact-match
1926 character can be added to that command or if the character requires
1927 a new `exactn' command. */
1928 unsigned char *pending_exact = 0;
1929
1930 /* Address of start of the most recently finished expression.
1931 This tells, e.g., postfix * where to find the start of its
1932 operand. Reset at the beginning of groups and alternatives. */
1933 unsigned char *laststart = 0;
1934
1935 /* Address of beginning of regexp, or inside of last group. */
1936 unsigned char *begalt;
1937
1938 /* Place in the uncompiled pattern (i.e., the {) to
1939 which to go back if the interval is invalid. */
66f0296e 1940 re_char *beg_interval;
5e69f11e 1941
fa9a63c5 1942 /* Address of the place where a forward jump should go to the end of
25fe55af 1943 the containing expression. Each alternative of an `or' -- except the
fa9a63c5
RM
1944 last -- ends with a forward jump of this sort. */
1945 unsigned char *fixup_alt_jump = 0;
1946
1947 /* Counts open-groups as they are encountered. Remembered for the
1948 matching close-group on the compile stack, so the same register
1949 number is put in the stop_memory as the start_memory. */
1950 regnum_t regnum = 0;
1951
b18215fc
RS
1952 /* Work area for range table of charset. */
1953 struct range_table_work_area range_table_work;
1954
fa9a63c5 1955#ifdef DEBUG
99633e97 1956 debug++;
fa9a63c5 1957 DEBUG_PRINT1 ("\nCompiling pattern: ");
99633e97 1958 if (debug > 0)
fa9a63c5
RM
1959 {
1960 unsigned debug_count;
5e69f11e 1961
fa9a63c5 1962 for (debug_count = 0; debug_count < size; debug_count++)
25fe55af 1963 putchar (pattern[debug_count]);
fa9a63c5
RM
1964 putchar ('\n');
1965 }
1966#endif /* DEBUG */
1967
1968 /* Initialize the compile stack. */
1969 compile_stack.stack = TALLOC (INIT_COMPILE_STACK_SIZE, compile_stack_elt_t);
1970 if (compile_stack.stack == NULL)
1971 return REG_ESPACE;
1972
1973 compile_stack.size = INIT_COMPILE_STACK_SIZE;
1974 compile_stack.avail = 0;
1975
b18215fc
RS
1976 range_table_work.table = 0;
1977 range_table_work.allocated = 0;
1978
fa9a63c5
RM
1979 /* Initialize the pattern buffer. */
1980 bufp->syntax = syntax;
1981 bufp->fastmap_accurate = 0;
1982 bufp->not_bol = bufp->not_eol = 0;
1983
1984 /* Set `used' to zero, so that if we return an error, the pattern
1985 printer (for debugging) will think there's no pattern. We reset it
1986 at the end. */
1987 bufp->used = 0;
5e69f11e 1988
fa9a63c5 1989 /* Always count groups, whether or not bufp->no_sub is set. */
5e69f11e 1990 bufp->re_nsub = 0;
fa9a63c5 1991
b18215fc
RS
1992#ifdef emacs
1993 /* bufp->multibyte is set before regex_compile is called, so don't alter
1994 it. */
1995#else /* not emacs */
1996 /* Nothing is recognized as a multibyte character. */
1997 bufp->multibyte = 0;
1998#endif
1999
fa9a63c5
RM
2000#if !defined (emacs) && !defined (SYNTAX_TABLE)
2001 /* Initialize the syntax table. */
2002 init_syntax_once ();
2003#endif
2004
2005 if (bufp->allocated == 0)
2006 {
2007 if (bufp->buffer)
2008 { /* If zero allocated, but buffer is non-null, try to realloc
25fe55af
RS
2009 enough space. This loses if buffer's address is bogus, but
2010 that is the user's responsibility. */
2011 RETALLOC (bufp->buffer, INIT_BUF_SIZE, unsigned char);
2012 }
fa9a63c5 2013 else
25fe55af
RS
2014 { /* Caller did not allocate a buffer. Do it for them. */
2015 bufp->buffer = TALLOC (INIT_BUF_SIZE, unsigned char);
2016 }
fa9a63c5
RM
2017 if (!bufp->buffer) FREE_STACK_RETURN (REG_ESPACE);
2018
2019 bufp->allocated = INIT_BUF_SIZE;
2020 }
2021
2022 begalt = b = bufp->buffer;
2023
2024 /* Loop through the uncompiled pattern until we're at the end. */
2025 while (p != pend)
2026 {
2027 PATFETCH (c);
2028
2029 switch (c)
25fe55af
RS
2030 {
2031 case '^':
2032 {
2033 if ( /* If at start of pattern, it's an operator. */
2034 p == pattern + 1
2035 /* If context independent, it's an operator. */
2036 || syntax & RE_CONTEXT_INDEP_ANCHORS
2037 /* Otherwise, depends on what's come before. */
2038 || at_begline_loc_p (pattern, p, syntax))
2039 BUF_PUSH (begline);
2040 else
2041 goto normal_char;
2042 }
2043 break;
2044
2045
2046 case '$':
2047 {
2048 if ( /* If at end of pattern, it's an operator. */
2049 p == pend
2050 /* If context independent, it's an operator. */
2051 || syntax & RE_CONTEXT_INDEP_ANCHORS
2052 /* Otherwise, depends on what's next. */
2053 || at_endline_loc_p (p, pend, syntax))
2054 BUF_PUSH (endline);
2055 else
2056 goto normal_char;
2057 }
2058 break;
fa9a63c5
RM
2059
2060
2061 case '+':
25fe55af
RS
2062 case '?':
2063 if ((syntax & RE_BK_PLUS_QM)
2064 || (syntax & RE_LIMITED_OPS))
2065 goto normal_char;
2066 handle_plus:
2067 case '*':
2068 /* If there is no previous pattern... */
2069 if (!laststart)
2070 {
2071 if (syntax & RE_CONTEXT_INVALID_OPS)
2072 FREE_STACK_RETURN (REG_BADRPT);
2073 else if (!(syntax & RE_CONTEXT_INDEP_OPS))
2074 goto normal_char;
2075 }
2076
2077 {
25fe55af 2078 /* 1 means zero (many) matches is allowed. */
66f0296e
SM
2079 boolean zero_times_ok = 0, many_times_ok = 0;
2080 boolean greedy = 1;
25fe55af
RS
2081
2082 /* If there is a sequence of repetition chars, collapse it
2083 down to just one (the right one). We can't combine
2084 interval operators with these because of, e.g., `a{2}*',
2085 which should only match an even number of `a's. */
2086
2087 for (;;)
2088 {
1c8c6d39
DL
2089 if (!(syntax & RE_ALL_GREEDY)
2090 && c == '?' && (zero_times_ok || many_times_ok))
2091 greedy = 0;
2092 else
2093 {
2094 zero_times_ok |= c != '+';
2095 many_times_ok |= c != '?';
2096 }
25fe55af
RS
2097
2098 if (p == pend)
2099 break;
2100
2101 PATFETCH (c);
2102
2103 if (c == '*'
2104 || (!(syntax & RE_BK_PLUS_QM) && (c == '+' || c == '?')))
2105 ;
2106
2107 else if (syntax & RE_BK_PLUS_QM && c == '\\')
2108 {
2109 if (p == pend) FREE_STACK_RETURN (REG_EESCAPE);
2110
2111 PATFETCH (c1);
2112 if (!(c1 == '+' || c1 == '?'))
2113 {
2114 PATUNFETCH;
2115 PATUNFETCH;
2116 break;
2117 }
2118
2119 c = c1;
2120 }
2121 else
2122 {
2123 PATUNFETCH;
2124 break;
2125 }
2126
2127 /* If we get here, we found another repeat character. */
99633e97 2128 }
25fe55af
RS
2129
2130 /* Star, etc. applied to an empty pattern is equivalent
2131 to an empty pattern. */
4e8a9132 2132 if (!laststart || laststart == b)
25fe55af
RS
2133 break;
2134
2135 /* Now we know whether or not zero matches is allowed
2136 and also whether or not two or more matches is allowed. */
1c8c6d39
DL
2137 if (greedy)
2138 {
99633e97 2139 if (many_times_ok)
4e8a9132
SM
2140 {
2141 boolean simple = skip_one_char (laststart) == b;
2142 unsigned int startoffset = 0;
2143 assert (skip_one_char (laststart) <= b);
2144
2145 if (!zero_times_ok && simple)
2146 { /* Since simple * loops can be made faster by using
2147 on_failure_keep_string_jump, we turn simple P+
2148 into PP* if P is simple. */
2149 unsigned char *p1, *p2;
2150 startoffset = b - laststart;
2151 GET_BUFFER_SPACE (startoffset);
2152 p1 = b; p2 = laststart;
2153 while (p2 < p1)
2154 *b++ = *p2++;
2155 zero_times_ok = 1;
99633e97 2156 }
4e8a9132
SM
2157
2158 GET_BUFFER_SPACE (6);
2159 if (!zero_times_ok)
2160 /* A + loop. */
2161 STORE_JUMP (on_failure_jump_loop, b, b + 6);
99633e97 2162 else
4e8a9132
SM
2163 /* Simple * loops can use on_failure_keep_string_jump
2164 depending on what follows. But since we don't know
2165 that yet, we leave the decision up to
2166 on_failure_jump_smart. */
2167 INSERT_JUMP (simple ? on_failure_jump_smart
2168 : on_failure_jump_loop,
2169 laststart + startoffset, b + 6);
99633e97 2170 b += 3;
4e8a9132 2171 STORE_JUMP (jump, b, laststart + startoffset);
99633e97
SM
2172 b += 3;
2173 }
2174 else
2175 {
4e8a9132
SM
2176 /* A simple ? pattern. */
2177 assert (zero_times_ok);
2178 GET_BUFFER_SPACE (3);
2179 INSERT_JUMP (on_failure_jump, laststart, b + 3);
99633e97
SM
2180 b += 3;
2181 }
1c8c6d39
DL
2182 }
2183 else /* not greedy */
2184 { /* I wish the greedy and non-greedy cases could be merged. */
2185
2186 if (many_times_ok)
2187 {
505bde11 2188 /* The non-greedy multiple match looks like a repeat..until:
1c8c6d39
DL
2189 we only need a conditional jump at the end of the loop */
2190 GET_BUFFER_SPACE (3);
2191 STORE_JUMP (on_failure_jump, b, laststart);
2192 b += 3;
2193 if (zero_times_ok)
2194 {
2195 /* The repeat...until naturally matches one or more.
2196 To also match zero times, we need to first jump to
2197 the end of the loop (its conditional jump). */
2198 GET_BUFFER_SPACE (3);
2199 INSERT_JUMP (jump, laststart, b);
2200 b += 3;
2201 }
2202 }
2203 else
2204 {
2205 /* non-greedy a?? */
2206 GET_BUFFER_SPACE (6);
2207 INSERT_JUMP (jump, laststart, b + 3);
2208 b += 3;
2209 INSERT_JUMP (on_failure_jump, laststart, laststart + 6);
2210 b += 3;
2211 }
2212 }
2213 }
4e8a9132 2214 pending_exact = 0;
fa9a63c5
RM
2215 break;
2216
2217
2218 case '.':
25fe55af
RS
2219 laststart = b;
2220 BUF_PUSH (anychar);
2221 break;
fa9a63c5
RM
2222
2223
25fe55af
RS
2224 case '[':
2225 {
b18215fc 2226 CLEAR_RANGE_TABLE_WORK_USED (range_table_work);
fa9a63c5 2227
25fe55af 2228 if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
fa9a63c5 2229
25fe55af
RS
2230 /* Ensure that we have enough space to push a charset: the
2231 opcode, the length count, and the bitset; 34 bytes in all. */
fa9a63c5
RM
2232 GET_BUFFER_SPACE (34);
2233
25fe55af 2234 laststart = b;
e318085a 2235
25fe55af
RS
2236 /* We test `*p == '^' twice, instead of using an if
2237 statement, so we only need one BUF_PUSH. */
2238 BUF_PUSH (*p == '^' ? charset_not : charset);
2239 if (*p == '^')
2240 p++;
e318085a 2241
25fe55af
RS
2242 /* Remember the first position in the bracket expression. */
2243 p1 = p;
e318085a 2244
25fe55af
RS
2245 /* Push the number of bytes in the bitmap. */
2246 BUF_PUSH ((1 << BYTEWIDTH) / BYTEWIDTH);
e318085a 2247
25fe55af
RS
2248 /* Clear the whole map. */
2249 bzero (b, (1 << BYTEWIDTH) / BYTEWIDTH);
e318085a 2250
25fe55af
RS
2251 /* charset_not matches newline according to a syntax bit. */
2252 if ((re_opcode_t) b[-2] == charset_not
2253 && (syntax & RE_HAT_LISTS_NOT_NEWLINE))
2254 SET_LIST_BIT ('\n');
fa9a63c5 2255
25fe55af
RS
2256 /* Read in characters and ranges, setting map bits. */
2257 for (;;)
2258 {
b18215fc
RS
2259 int len;
2260 boolean escaped_char = false;
e318085a 2261
25fe55af 2262 if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
e318085a 2263
25fe55af 2264 PATFETCH (c);
e318085a 2265
25fe55af
RS
2266 /* \ might escape characters inside [...] and [^...]. */
2267 if ((syntax & RE_BACKSLASH_ESCAPE_IN_LISTS) && c == '\\')
2268 {
2269 if (p == pend) FREE_STACK_RETURN (REG_EESCAPE);
e318085a
RS
2270
2271 PATFETCH (c);
b18215fc 2272 escaped_char = true;
25fe55af 2273 }
b18215fc
RS
2274 else
2275 {
657fcfbd
RS
2276 /* Could be the end of the bracket expression. If it's
2277 not (i.e., when the bracket expression is `[]' so
2278 far), the ']' character bit gets set way below. */
2279 if (c == ']' && p != p1 + 1)
2280 break;
25fe55af 2281 }
b18215fc
RS
2282
2283 /* If C indicates start of multibyte char, get the
2284 actual character code in C, and set the pattern
2285 pointer P to the next character boundary. */
2286 if (bufp->multibyte && BASE_LEADING_CODE_P (c))
2287 {
2288 PATUNFETCH;
2289 c = STRING_CHAR_AND_LENGTH (p, pend - p, len);
2290 p += len;
25fe55af 2291 }
b18215fc
RS
2292 /* What should we do for the character which is
2293 greater than 0x7F, but not BASE_LEADING_CODE_P?
2294 XXX */
2295
25fe55af
RS
2296 /* See if we're at the beginning of a possible character
2297 class. */
b18215fc
RS
2298
2299 else if (!escaped_char &&
2300 syntax & RE_CHAR_CLASSES && c == '[' && *p == ':')
657fcfbd
RS
2301 {
2302 /* Leave room for the null. */
25fe55af 2303 char str[CHAR_CLASS_MAX_LENGTH + 1];
b18215fc 2304
25fe55af
RS
2305 PATFETCH (c);
2306 c1 = 0;
b18215fc 2307
25fe55af
RS
2308 /* If pattern is `[[:'. */
2309 if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
b18215fc 2310
25fe55af
RS
2311 for (;;)
2312 {
2313 PATFETCH (c);
2314 if (c == ':' || c == ']' || p == pend
2315 || c1 == CHAR_CLASS_MAX_LENGTH)
2316 break;
2317 str[c1++] = c;
2318 }
2319 str[c1] = '\0';
b18215fc
RS
2320
2321 /* If isn't a word bracketed by `[:' and `:]':
2322 undo the ending character, the letters, and
2323 leave the leading `:' and `[' (but set bits for
2324 them). */
25fe55af
RS
2325 if (c == ':' && *p == ']')
2326 {
2327 int ch;
2328 boolean is_alnum = STREQ (str, "alnum");
2329 boolean is_alpha = STREQ (str, "alpha");
f71b19b6 2330 boolean is_ascii = STREQ (str, "ascii");
25fe55af
RS
2331 boolean is_blank = STREQ (str, "blank");
2332 boolean is_cntrl = STREQ (str, "cntrl");
2333 boolean is_digit = STREQ (str, "digit");
2334 boolean is_graph = STREQ (str, "graph");
2335 boolean is_lower = STREQ (str, "lower");
f71b19b6
DL
2336 boolean is_multibyte = STREQ (str, "multibyte");
2337 boolean is_nonascii = STREQ (str, "nonascii");
25fe55af
RS
2338 boolean is_print = STREQ (str, "print");
2339 boolean is_punct = STREQ (str, "punct");
2340 boolean is_space = STREQ (str, "space");
f71b19b6 2341 boolean is_unibyte = STREQ (str, "unibyte");
25fe55af 2342 boolean is_upper = STREQ (str, "upper");
96cc36cc 2343 boolean is_word = STREQ (str, "word");
f71b19b6 2344 boolean is_xdigit = STREQ (str, "xdigit");
25fe55af
RS
2345
2346 if (!IS_CHAR_CLASS (str))
fa9a63c5
RM
2347 FREE_STACK_RETURN (REG_ECTYPE);
2348
25fe55af
RS
2349 /* Throw away the ] at the end of the character
2350 class. */
2351 PATFETCH (c);
fa9a63c5 2352
25fe55af 2353 if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
fa9a63c5 2354
96cc36cc
RS
2355 /* Most character classes in a multibyte match
2356 just set a flag. Exceptions are is_blank,
2357 is_digit, is_cntrl, and is_xdigit, since
2358 they can only match ASCII characters. We
2359 don't need to handle them for multibyte. */
2360
2361 if (bufp->multibyte)
2362 {
2363 int bit = 0;
2364
2365 if (is_alnum) bit = BIT_ALNUM;
2366 if (is_alpha) bit = BIT_ALPHA;
f71b19b6 2367 if (is_ascii) bit = BIT_ASCII;
96cc36cc
RS
2368 if (is_graph) bit = BIT_GRAPH;
2369 if (is_lower) bit = BIT_LOWER;
f71b19b6
DL
2370 if (is_multibyte) bit = BIT_MULTIBYTE;
2371 if (is_nonascii) bit = BIT_NONASCII;
96cc36cc
RS
2372 if (is_print) bit = BIT_PRINT;
2373 if (is_punct) bit = BIT_PUNCT;
2374 if (is_space) bit = BIT_SPACE;
f71b19b6 2375 if (is_unibyte) bit = BIT_UNIBYTE;
96cc36cc
RS
2376 if (is_upper) bit = BIT_UPPER;
2377 if (is_word) bit = BIT_WORD;
2378 if (bit)
2379 SET_RANGE_TABLE_WORK_AREA_BIT (range_table_work,
2380 bit);
2381 }
2382
2383 /* Handle character classes for ASCII characters. */
25fe55af
RS
2384 for (ch = 0; ch < 1 << BYTEWIDTH; ch++)
2385 {
7ae68633 2386 int translated = TRANSLATE (ch);
fa9a63c5
RM
2387 /* This was split into 3 if's to
2388 avoid an arbitrary limit in some compiler. */
25fe55af
RS
2389 if ( (is_alnum && ISALNUM (ch))
2390 || (is_alpha && ISALPHA (ch))
2391 || (is_blank && ISBLANK (ch))
2392 || (is_cntrl && ISCNTRL (ch)))
7ae68633 2393 SET_LIST_BIT (translated);
fa9a63c5 2394 if ( (is_digit && ISDIGIT (ch))
25fe55af
RS
2395 || (is_graph && ISGRAPH (ch))
2396 || (is_lower && ISLOWER (ch))
2397 || (is_print && ISPRINT (ch)))
7ae68633 2398 SET_LIST_BIT (translated);
fa9a63c5 2399 if ( (is_punct && ISPUNCT (ch))
25fe55af
RS
2400 || (is_space && ISSPACE (ch))
2401 || (is_upper && ISUPPER (ch))
2402 || (is_xdigit && ISXDIGIT (ch)))
7ae68633 2403 SET_LIST_BIT (translated);
f71b19b6
DL
2404 if ( (is_ascii && IS_REAL_ASCII (ch))
2405 || (is_nonascii && !IS_REAL_ASCII (ch))
2406 || (is_unibyte && ISUNIBYTE (ch))
2407 || (is_multibyte && !ISUNIBYTE (ch)))
2408 SET_LIST_BIT (translated);
2409
96cc36cc
RS
2410 if ( (is_word && ISWORD (ch)))
2411 SET_LIST_BIT (translated);
25fe55af 2412 }
b18215fc
RS
2413
2414 /* Repeat the loop. */
2415 continue;
25fe55af
RS
2416 }
2417 else
2418 {
2419 c1++;
2420 while (c1--)
2421 PATUNFETCH;
2422 SET_LIST_BIT ('[');
b18215fc
RS
2423
2424 /* Because the `:' may starts the range, we
2425 can't simply set bit and repeat the loop.
25fe55af 2426 Instead, just set it to C and handle below. */
b18215fc 2427 c = ':';
25fe55af
RS
2428 }
2429 }
b18215fc
RS
2430
2431 if (p < pend && p[0] == '-' && p[1] != ']')
2432 {
2433
2434 /* Discard the `-'. */
2435 PATFETCH (c1);
2436
2437 /* Fetch the character which ends the range. */
2438 PATFETCH (c1);
2439 if (bufp->multibyte && BASE_LEADING_CODE_P (c1))
e318085a 2440 {
b18215fc
RS
2441 PATUNFETCH;
2442 c1 = STRING_CHAR_AND_LENGTH (p, pend - p, len);
2443 p += len;
e318085a 2444 }
b18215fc 2445
e934739e
RS
2446 if (SINGLE_BYTE_CHAR_P (c)
2447 && ! SINGLE_BYTE_CHAR_P (c1))
2448 {
2449 /* Handle a range such as \177-\377 in multibyte mode.
2450 Split that into two ranges,,
2451 the low one ending at 0237, and the high one
2452 starting at ...040. */
505bde11
SM
2453 /* Unless I'm missing something,
2454 this line is useless. -sm
2455 int c1_base = (c1 & ~0177) | 040; */
e934739e
RS
2456 SET_RANGE_TABLE_WORK_AREA (range_table_work, c, c1);
2457 c1 = 0237;
2458 }
2459 else if (!SAME_CHARSET_P (c, c1))
b18215fc 2460 FREE_STACK_RETURN (REG_ERANGE);
e318085a 2461 }
25fe55af 2462 else
b18215fc
RS
2463 /* Range from C to C. */
2464 c1 = c;
2465
2466 /* Set the range ... */
2467 if (SINGLE_BYTE_CHAR_P (c))
2468 /* ... into bitmap. */
25fe55af 2469 {
b18215fc
RS
2470 unsigned this_char;
2471 int range_start = c, range_end = c1;
2472
2473 /* If the start is after the end, the range is empty. */
2474 if (range_start > range_end)
2475 {
2476 if (syntax & RE_NO_EMPTY_RANGES)
2477 FREE_STACK_RETURN (REG_ERANGE);
2478 /* Else, repeat the loop. */
2479 }
2480 else
2481 {
2482 for (this_char = range_start; this_char <= range_end;
2483 this_char++)
2484 SET_LIST_BIT (TRANSLATE (this_char));
e934739e 2485 }
25fe55af 2486 }
e318085a 2487 else
b18215fc
RS
2488 /* ... into range table. */
2489 SET_RANGE_TABLE_WORK_AREA (range_table_work, c, c1);
e318085a
RS
2490 }
2491
25fe55af
RS
2492 /* Discard any (non)matching list bytes that are all 0 at the
2493 end of the map. Decrease the map-length byte too. */
2494 while ((int) b[-1] > 0 && b[b[-1] - 1] == 0)
2495 b[-1]--;
2496 b += b[-1];
fa9a63c5 2497
96cc36cc
RS
2498 /* Build real range table from work area. */
2499 if (RANGE_TABLE_WORK_USED (range_table_work)
2500 || RANGE_TABLE_WORK_BITS (range_table_work))
b18215fc
RS
2501 {
2502 int i;
2503 int used = RANGE_TABLE_WORK_USED (range_table_work);
fa9a63c5 2504
b18215fc 2505 /* Allocate space for COUNT + RANGE_TABLE. Needs two
96cc36cc
RS
2506 bytes for flags, two for COUNT, and three bytes for
2507 each character. */
2508 GET_BUFFER_SPACE (4 + used * 3);
fa9a63c5 2509
b18215fc
RS
2510 /* Indicate the existence of range table. */
2511 laststart[1] |= 0x80;
fa9a63c5 2512
96cc36cc
RS
2513 /* Store the character class flag bits into the range table.
2514 If not in emacs, these flag bits are always 0. */
2515 *b++ = RANGE_TABLE_WORK_BITS (range_table_work) & 0xff;
2516 *b++ = RANGE_TABLE_WORK_BITS (range_table_work) >> 8;
2517
b18215fc
RS
2518 STORE_NUMBER_AND_INCR (b, used / 2);
2519 for (i = 0; i < used; i++)
2520 STORE_CHARACTER_AND_INCR
2521 (b, RANGE_TABLE_WORK_ELT (range_table_work, i));
2522 }
25fe55af
RS
2523 }
2524 break;
fa9a63c5
RM
2525
2526
b18215fc 2527 case '(':
25fe55af
RS
2528 if (syntax & RE_NO_BK_PARENS)
2529 goto handle_open;
2530 else
2531 goto normal_char;
fa9a63c5
RM
2532
2533
25fe55af
RS
2534 case ')':
2535 if (syntax & RE_NO_BK_PARENS)
2536 goto handle_close;
2537 else
2538 goto normal_char;
e318085a
RS
2539
2540
25fe55af
RS
2541 case '\n':
2542 if (syntax & RE_NEWLINE_ALT)
2543 goto handle_alt;
2544 else
2545 goto normal_char;
e318085a
RS
2546
2547
b18215fc 2548 case '|':
25fe55af
RS
2549 if (syntax & RE_NO_BK_VBAR)
2550 goto handle_alt;
2551 else
2552 goto normal_char;
2553
2554
2555 case '{':
2556 if (syntax & RE_INTERVALS && syntax & RE_NO_BK_BRACES)
2557 goto handle_interval;
2558 else
2559 goto normal_char;
2560
2561
2562 case '\\':
2563 if (p == pend) FREE_STACK_RETURN (REG_EESCAPE);
2564
2565 /* Do not translate the character after the \, so that we can
2566 distinguish, e.g., \B from \b, even if we normally would
2567 translate, e.g., B to b. */
2568 PATFETCH_RAW (c);
2569
2570 switch (c)
2571 {
2572 case '(':
2573 if (syntax & RE_NO_BK_PARENS)
2574 goto normal_backslash;
2575
2576 handle_open:
505bde11
SM
2577 {
2578 int shy = 0;
2579 if (p+1 < pend)
2580 {
2581 /* Look for a special (?...) construct */
2582 PATFETCH (c);
2583 if ((syntax & RE_SHY_GROUPS) && c == '?')
2584 {
2585 PATFETCH (c);
2586 switch (c)
2587 {
2588 case ':': shy = 1; break;
2589 default:
2590 /* Only (?:...) is supported right now. */
2591 FREE_STACK_RETURN (REG_BADPAT);
2592 }
2593 }
2594 else PATUNFETCH;
2595 }
2596
2597 if (!shy)
2598 {
2599 bufp->re_nsub++;
2600 regnum++;
2601 }
25fe55af 2602
99633e97
SM
2603 if (COMPILE_STACK_FULL)
2604 {
2605 RETALLOC (compile_stack.stack, compile_stack.size << 1,
2606 compile_stack_elt_t);
2607 if (compile_stack.stack == NULL) return REG_ESPACE;
25fe55af 2608
99633e97
SM
2609 compile_stack.size <<= 1;
2610 }
25fe55af 2611
99633e97
SM
2612 /* These are the values to restore when we hit end of this
2613 group. They are all relative offsets, so that if the
2614 whole pattern moves because of realloc, they will still
2615 be valid. */
2616 COMPILE_STACK_TOP.begalt_offset = begalt - bufp->buffer;
2617 COMPILE_STACK_TOP.fixup_alt_jump
2618 = fixup_alt_jump ? fixup_alt_jump - bufp->buffer + 1 : 0;
2619 COMPILE_STACK_TOP.laststart_offset = b - bufp->buffer;
2620 COMPILE_STACK_TOP.regnum = shy ? -regnum : regnum;
2621
2622 /* Do not push a
2623 start_memory for groups beyond the last one we can
2624 represent in the compiled pattern. */
2625 if (regnum <= MAX_REGNUM && !shy)
2626 BUF_PUSH_2 (start_memory, regnum);
2627
2628 compile_stack.avail++;
2629
2630 fixup_alt_jump = 0;
2631 laststart = 0;
2632 begalt = b;
2633 /* If we've reached MAX_REGNUM groups, then this open
2634 won't actually generate any code, so we'll have to
2635 clear pending_exact explicitly. */
2636 pending_exact = 0;
2637 break;
505bde11 2638 }
25fe55af
RS
2639
2640 case ')':
2641 if (syntax & RE_NO_BK_PARENS) goto normal_backslash;
2642
2643 if (COMPILE_STACK_EMPTY)
505bde11
SM
2644 {
2645 if (syntax & RE_UNMATCHED_RIGHT_PAREN_ORD)
2646 goto normal_backslash;
2647 else
2648 FREE_STACK_RETURN (REG_ERPAREN);
2649 }
25fe55af
RS
2650
2651 handle_close:
505bde11 2652 FIXUP_ALT_JUMP ();
25fe55af
RS
2653
2654 /* See similar code for backslashed left paren above. */
2655 if (COMPILE_STACK_EMPTY)
505bde11
SM
2656 {
2657 if (syntax & RE_UNMATCHED_RIGHT_PAREN_ORD)
2658 goto normal_char;
2659 else
2660 FREE_STACK_RETURN (REG_ERPAREN);
2661 }
25fe55af
RS
2662
2663 /* Since we just checked for an empty stack above, this
2664 ``can't happen''. */
2665 assert (compile_stack.avail != 0);
2666 {
2667 /* We don't just want to restore into `regnum', because
2668 later groups should continue to be numbered higher,
2669 as in `(ab)c(de)' -- the second group is #2. */
2670 regnum_t this_group_regnum;
2671
2672 compile_stack.avail--;
2673 begalt = bufp->buffer + COMPILE_STACK_TOP.begalt_offset;
2674 fixup_alt_jump
2675 = COMPILE_STACK_TOP.fixup_alt_jump
2676 ? bufp->buffer + COMPILE_STACK_TOP.fixup_alt_jump - 1
2677 : 0;
2678 laststart = bufp->buffer + COMPILE_STACK_TOP.laststart_offset;
2679 this_group_regnum = COMPILE_STACK_TOP.regnum;
b18215fc
RS
2680 /* If we've reached MAX_REGNUM groups, then this open
2681 won't actually generate any code, so we'll have to
2682 clear pending_exact explicitly. */
2683 pending_exact = 0;
e318085a 2684
25fe55af
RS
2685 /* We're at the end of the group, so now we know how many
2686 groups were inside this one. */
505bde11
SM
2687 if (this_group_regnum <= MAX_REGNUM && this_group_regnum > 0)
2688 BUF_PUSH_2 (stop_memory, this_group_regnum);
25fe55af
RS
2689 }
2690 break;
2691
2692
2693 case '|': /* `\|'. */
2694 if (syntax & RE_LIMITED_OPS || syntax & RE_NO_BK_VBAR)
2695 goto normal_backslash;
2696 handle_alt:
2697 if (syntax & RE_LIMITED_OPS)
2698 goto normal_char;
2699
2700 /* Insert before the previous alternative a jump which
2701 jumps to this alternative if the former fails. */
2702 GET_BUFFER_SPACE (3);
2703 INSERT_JUMP (on_failure_jump, begalt, b + 6);
2704 pending_exact = 0;
2705 b += 3;
2706
2707 /* The alternative before this one has a jump after it
2708 which gets executed if it gets matched. Adjust that
2709 jump so it will jump to this alternative's analogous
2710 jump (put in below, which in turn will jump to the next
2711 (if any) alternative's such jump, etc.). The last such
2712 jump jumps to the correct final destination. A picture:
2713 _____ _____
2714 | | | |
2715 | v | v
2716 a | b | c
2717
2718 If we are at `b', then fixup_alt_jump right now points to a
2719 three-byte space after `a'. We'll put in the jump, set
2720 fixup_alt_jump to right after `b', and leave behind three
2721 bytes which we'll fill in when we get to after `c'. */
2722
505bde11 2723 FIXUP_ALT_JUMP ();
25fe55af
RS
2724
2725 /* Mark and leave space for a jump after this alternative,
2726 to be filled in later either by next alternative or
2727 when know we're at the end of a series of alternatives. */
2728 fixup_alt_jump = b;
2729 GET_BUFFER_SPACE (3);
2730 b += 3;
2731
2732 laststart = 0;
2733 begalt = b;
2734 break;
2735
2736
2737 case '{':
2738 /* If \{ is a literal. */
2739 if (!(syntax & RE_INTERVALS)
2740 /* If we're at `\{' and it's not the open-interval
2741 operator. */
2742 || ((syntax & RE_INTERVALS) && (syntax & RE_NO_BK_BRACES))
2743 || (p - 2 == pattern && p == pend))
2744 goto normal_backslash;
2745
2746 handle_interval:
2747 {
2748 /* If got here, then the syntax allows intervals. */
2749
2750 /* At least (most) this many matches must be made. */
99633e97 2751 int lower_bound = 0, upper_bound = -1;
25fe55af
RS
2752
2753 beg_interval = p - 1;
2754
2755 if (p == pend)
2756 {
2757 if (syntax & RE_NO_BK_BRACES)
2758 goto unfetch_interval;
2759 else
2760 FREE_STACK_RETURN (REG_EBRACE);
2761 }
2762
2763 GET_UNSIGNED_NUMBER (lower_bound);
2764
2765 if (c == ',')
2766 {
2767 GET_UNSIGNED_NUMBER (upper_bound);
2768 if (upper_bound < 0) upper_bound = RE_DUP_MAX;
2769 }
2770 else
2771 /* Interval such as `{1}' => match exactly once. */
2772 upper_bound = lower_bound;
2773
2774 if (lower_bound < 0 || upper_bound > RE_DUP_MAX
2775 || lower_bound > upper_bound)
2776 {
2777 if (syntax & RE_NO_BK_BRACES)
2778 goto unfetch_interval;
2779 else
2780 FREE_STACK_RETURN (REG_BADBR);
2781 }
2782
2783 if (!(syntax & RE_NO_BK_BRACES))
2784 {
2785 if (c != '\\') FREE_STACK_RETURN (REG_EBRACE);
2786
2787 PATFETCH (c);
2788 }
2789
2790 if (c != '}')
2791 {
2792 if (syntax & RE_NO_BK_BRACES)
2793 goto unfetch_interval;
2794 else
2795 FREE_STACK_RETURN (REG_BADBR);
2796 }
2797
2798 /* We just parsed a valid interval. */
2799
2800 /* If it's invalid to have no preceding re. */
2801 if (!laststart)
2802 {
2803 if (syntax & RE_CONTEXT_INVALID_OPS)
2804 FREE_STACK_RETURN (REG_BADRPT);
2805 else if (syntax & RE_CONTEXT_INDEP_OPS)
2806 laststart = b;
2807 else
2808 goto unfetch_interval;
2809 }
2810
2811 /* If the upper bound is zero, don't want to succeed at
2812 all; jump from `laststart' to `b + 3', which will be
2813 the end of the buffer after we insert the jump. */
2814 if (upper_bound == 0)
2815 {
2816 GET_BUFFER_SPACE (3);
2817 INSERT_JUMP (jump, laststart, b + 3);
2818 b += 3;
2819 }
2820
2821 /* Otherwise, we have a nontrivial interval. When
2822 we're all done, the pattern will look like:
2823 set_number_at <jump count> <upper bound>
2824 set_number_at <succeed_n count> <lower bound>
2825 succeed_n <after jump addr> <succeed_n count>
2826 <body of loop>
2827 jump_n <succeed_n addr> <jump count>
2828 (The upper bound and `jump_n' are omitted if
2829 `upper_bound' is 1, though.) */
2830 else
2831 { /* If the upper bound is > 1, we need to insert
2832 more at the end of the loop. */
2833 unsigned nbytes = 10 + (upper_bound > 1) * 10;
2834
2835 GET_BUFFER_SPACE (nbytes);
2836
2837 /* Initialize lower bound of the `succeed_n', even
2838 though it will be set during matching by its
2839 attendant `set_number_at' (inserted next),
2840 because `re_compile_fastmap' needs to know.
2841 Jump to the `jump_n' we might insert below. */
2842 INSERT_JUMP2 (succeed_n, laststart,
2843 b + 5 + (upper_bound > 1) * 5,
2844 lower_bound);
2845 b += 5;
2846
2847 /* Code to initialize the lower bound. Insert
2848 before the `succeed_n'. The `5' is the last two
2849 bytes of this `set_number_at', plus 3 bytes of
2850 the following `succeed_n'. */
2851 insert_op2 (set_number_at, laststart, 5, lower_bound, b);
2852 b += 5;
2853
2854 if (upper_bound > 1)
2855 { /* More than one repetition is allowed, so
2856 append a backward jump to the `succeed_n'
2857 that starts this interval.
2858
2859 When we've reached this during matching,
2860 we'll have matched the interval once, so
2861 jump back only `upper_bound - 1' times. */
2862 STORE_JUMP2 (jump_n, b, laststart + 5,
2863 upper_bound - 1);
2864 b += 5;
2865
2866 /* The location we want to set is the second
2867 parameter of the `jump_n'; that is `b-2' as
2868 an absolute address. `laststart' will be
2869 the `set_number_at' we're about to insert;
2870 `laststart+3' the number to set, the source
2871 for the relative address. But we are
2872 inserting into the middle of the pattern --
2873 so everything is getting moved up by 5.
2874 Conclusion: (b - 2) - (laststart + 3) + 5,
2875 i.e., b - laststart.
2876
2877 We insert this at the beginning of the loop
2878 so that if we fail during matching, we'll
2879 reinitialize the bounds. */
2880 insert_op2 (set_number_at, laststart, b - laststart,
2881 upper_bound - 1, b);
2882 b += 5;
2883 }
2884 }
2885 pending_exact = 0;
2886 beg_interval = NULL;
2887 }
2888 break;
2889
2890 unfetch_interval:
2891 /* If an invalid interval, match the characters as literals. */
2892 assert (beg_interval);
2893 p = beg_interval;
2894 beg_interval = NULL;
2895
2896 /* normal_char and normal_backslash need `c'. */
2897 PATFETCH (c);
2898
2899 if (!(syntax & RE_NO_BK_BRACES))
2900 {
2901 if (p > pattern && p[-1] == '\\')
2902 goto normal_backslash;
2903 }
2904 goto normal_char;
e318085a 2905
b18215fc 2906#ifdef emacs
25fe55af
RS
2907 /* There is no way to specify the before_dot and after_dot
2908 operators. rms says this is ok. --karl */
2909 case '=':
2910 BUF_PUSH (at_dot);
2911 break;
2912
2913 case 's':
2914 laststart = b;
2915 PATFETCH (c);
2916 BUF_PUSH_2 (syntaxspec, syntax_spec_code[c]);
2917 break;
2918
2919 case 'S':
2920 laststart = b;
2921 PATFETCH (c);
2922 BUF_PUSH_2 (notsyntaxspec, syntax_spec_code[c]);
2923 break;
b18215fc
RS
2924
2925 case 'c':
2926 laststart = b;
2927 PATFETCH_RAW (c);
2928 BUF_PUSH_2 (categoryspec, c);
2929 break;
e318085a 2930
b18215fc
RS
2931 case 'C':
2932 laststart = b;
2933 PATFETCH_RAW (c);
2934 BUF_PUSH_2 (notcategoryspec, c);
2935 break;
2936#endif /* emacs */
e318085a 2937
e318085a 2938
25fe55af
RS
2939 case 'w':
2940 laststart = b;
1fb352e0 2941 BUF_PUSH_2 (syntaxspec, Sword);
25fe55af 2942 break;
e318085a 2943
e318085a 2944
25fe55af
RS
2945 case 'W':
2946 laststart = b;
1fb352e0 2947 BUF_PUSH_2 (notsyntaxspec, Sword);
25fe55af 2948 break;
e318085a
RS
2949
2950
25fe55af
RS
2951 case '<':
2952 BUF_PUSH (wordbeg);
2953 break;
e318085a 2954
25fe55af
RS
2955 case '>':
2956 BUF_PUSH (wordend);
2957 break;
e318085a 2958
25fe55af
RS
2959 case 'b':
2960 BUF_PUSH (wordbound);
2961 break;
e318085a 2962
25fe55af
RS
2963 case 'B':
2964 BUF_PUSH (notwordbound);
2965 break;
fa9a63c5 2966
25fe55af
RS
2967 case '`':
2968 BUF_PUSH (begbuf);
2969 break;
e318085a 2970
25fe55af
RS
2971 case '\'':
2972 BUF_PUSH (endbuf);
2973 break;
e318085a 2974
25fe55af
RS
2975 case '1': case '2': case '3': case '4': case '5':
2976 case '6': case '7': case '8': case '9':
2977 if (syntax & RE_NO_BK_REFS)
2978 goto normal_char;
e318085a 2979
25fe55af 2980 c1 = c - '0';
e318085a 2981
25fe55af
RS
2982 if (c1 > regnum)
2983 FREE_STACK_RETURN (REG_ESUBREG);
e318085a 2984
25fe55af
RS
2985 /* Can't back reference to a subexpression if inside of it. */
2986 if (group_in_compile_stack (compile_stack, c1))
2987 goto normal_char;
e318085a 2988
25fe55af
RS
2989 laststart = b;
2990 BUF_PUSH_2 (duplicate, c1);
2991 break;
e318085a 2992
e318085a 2993
25fe55af
RS
2994 case '+':
2995 case '?':
2996 if (syntax & RE_BK_PLUS_QM)
2997 goto handle_plus;
2998 else
2999 goto normal_backslash;
3000
3001 default:
3002 normal_backslash:
3003 /* You might think it would be useful for \ to mean
3004 not to translate; but if we don't translate it
3005 it will never match anything. */
3006 c = TRANSLATE (c);
3007 goto normal_char;
3008 }
3009 break;
fa9a63c5
RM
3010
3011
3012 default:
25fe55af 3013 /* Expects the character in `c'. */
fa9a63c5 3014 normal_char:
b18215fc
RS
3015 p1 = p - 1; /* P1 points the head of C. */
3016#ifdef emacs
3017 if (bufp->multibyte)
3583e969
KH
3018 {
3019 c = STRING_CHAR (p1, pend - p1);
3020 c = TRANSLATE (c);
3021 /* Set P to the next character boundary. */
3022 p += MULTIBYTE_FORM_LENGTH (p1, pend - p1) - 1;
3023 }
b18215fc 3024#endif
fa9a63c5 3025 /* If no exactn currently being built. */
25fe55af 3026 if (!pending_exact
fa9a63c5 3027
25fe55af
RS
3028 /* If last exactn not at current position. */
3029 || pending_exact + *pending_exact + 1 != b
5e69f11e 3030
25fe55af 3031 /* We have only one byte following the exactn for the count. */
b18215fc 3032 || *pending_exact >= (1 << BYTEWIDTH) - (p - p1)
fa9a63c5 3033
25fe55af 3034 /* If followed by a repetition operator. */
9d99031f 3035 || (p != pend && (*p == '*' || *p == '^'))
fa9a63c5 3036 || ((syntax & RE_BK_PLUS_QM)
9d99031f
RS
3037 ? p + 1 < pend && *p == '\\' && (p[1] == '+' || p[1] == '?')
3038 : p != pend && (*p == '+' || *p == '?'))
fa9a63c5 3039 || ((syntax & RE_INTERVALS)
25fe55af 3040 && ((syntax & RE_NO_BK_BRACES)
9d99031f
RS
3041 ? p != pend && *p == '{'
3042 : p + 1 < pend && p[0] == '\\' && p[1] == '{')))
fa9a63c5
RM
3043 {
3044 /* Start building a new exactn. */
5e69f11e 3045
25fe55af 3046 laststart = b;
fa9a63c5
RM
3047
3048 BUF_PUSH_2 (exactn, 0);
3049 pending_exact = b - 1;
25fe55af 3050 }
5e69f11e 3051
3583e969
KH
3052#ifdef emacs
3053 if (! SINGLE_BYTE_CHAR_P (c))
3054 {
1c8c6d39
DL
3055 unsigned char str[MAX_MULTIBYTE_LENGTH];
3056 int i = CHAR_STRING (c, str);
3583e969
KH
3057 int j;
3058 for (j = 0; j < i; j++)
3059 {
3060 BUF_PUSH (str[j]);
3061 (*pending_exact)++;
3062 }
3063 }
3064 else
3065#endif
b18215fc 3066 {
e934739e
RS
3067 BUF_PUSH (c);
3068 (*pending_exact)++;
b18215fc 3069 }
fa9a63c5 3070 break;
25fe55af 3071 } /* switch (c) */
fa9a63c5
RM
3072 } /* while p != pend */
3073
5e69f11e 3074
fa9a63c5 3075 /* Through the pattern now. */
5e69f11e 3076
505bde11 3077 FIXUP_ALT_JUMP ();
fa9a63c5 3078
5e69f11e 3079 if (!COMPILE_STACK_EMPTY)
fa9a63c5
RM
3080 FREE_STACK_RETURN (REG_EPAREN);
3081
3082 /* If we don't want backtracking, force success
3083 the first time we reach the end of the compiled pattern. */
3084 if (syntax & RE_NO_POSIX_BACKTRACKING)
3085 BUF_PUSH (succeed);
3086
3087 free (compile_stack.stack);
3088
3089 /* We have succeeded; set the length of the buffer. */
3090 bufp->used = b - bufp->buffer;
3091
3092#ifdef DEBUG
99633e97 3093 if (debug > 0)
fa9a63c5 3094 {
505bde11 3095 re_compile_fastmap (bufp);
fa9a63c5
RM
3096 DEBUG_PRINT1 ("\nCompiled pattern: \n");
3097 print_compiled_pattern (bufp);
3098 }
99633e97 3099 debug--;
fa9a63c5
RM
3100#endif /* DEBUG */
3101
3102#ifndef MATCH_MAY_ALLOCATE
3103 /* Initialize the failure stack to the largest possible stack. This
3104 isn't necessary unless we're trying to avoid calling alloca in
3105 the search and match routines. */
3106 {
3107 int num_regs = bufp->re_nsub + 1;
3108
320a2a73 3109 if (fail_stack.size < re_max_failures * TYPICAL_FAILURE_SIZE)
fa9a63c5 3110 {
a26f4ccd 3111 fail_stack.size = re_max_failures * TYPICAL_FAILURE_SIZE;
fa9a63c5 3112
fa9a63c5
RM
3113 if (! fail_stack.stack)
3114 fail_stack.stack
5e69f11e 3115 = (fail_stack_elt_t *) malloc (fail_stack.size
fa9a63c5
RM
3116 * sizeof (fail_stack_elt_t));
3117 else
3118 fail_stack.stack
3119 = (fail_stack_elt_t *) realloc (fail_stack.stack,
3120 (fail_stack.size
3121 * sizeof (fail_stack_elt_t)));
fa9a63c5
RM
3122 }
3123
3124 regex_grow_registers (num_regs);
3125 }
3126#endif /* not MATCH_MAY_ALLOCATE */
3127
3128 return REG_NOERROR;
3129} /* regex_compile */
3130\f
3131/* Subroutines for `regex_compile'. */
3132
25fe55af 3133/* Store OP at LOC followed by two-byte integer parameter ARG. */
fa9a63c5
RM
3134
3135static void
3136store_op1 (op, loc, arg)
3137 re_opcode_t op;
3138 unsigned char *loc;
3139 int arg;
3140{
3141 *loc = (unsigned char) op;
3142 STORE_NUMBER (loc + 1, arg);
3143}
3144
3145
3146/* Like `store_op1', but for two two-byte parameters ARG1 and ARG2. */
3147
3148static void
3149store_op2 (op, loc, arg1, arg2)
3150 re_opcode_t op;
3151 unsigned char *loc;
3152 int arg1, arg2;
3153{
3154 *loc = (unsigned char) op;
3155 STORE_NUMBER (loc + 1, arg1);
3156 STORE_NUMBER (loc + 3, arg2);
3157}
3158
3159
3160/* Copy the bytes from LOC to END to open up three bytes of space at LOC
3161 for OP followed by two-byte integer parameter ARG. */
3162
3163static void
3164insert_op1 (op, loc, arg, end)
3165 re_opcode_t op;
3166 unsigned char *loc;
3167 int arg;
5e69f11e 3168 unsigned char *end;
fa9a63c5
RM
3169{
3170 register unsigned char *pfrom = end;
3171 register unsigned char *pto = end + 3;
3172
3173 while (pfrom != loc)
3174 *--pto = *--pfrom;
5e69f11e 3175
fa9a63c5
RM
3176 store_op1 (op, loc, arg);
3177}
3178
3179
3180/* Like `insert_op1', but for two two-byte parameters ARG1 and ARG2. */
3181
3182static void
3183insert_op2 (op, loc, arg1, arg2, end)
3184 re_opcode_t op;
3185 unsigned char *loc;
3186 int arg1, arg2;
5e69f11e 3187 unsigned char *end;
fa9a63c5
RM
3188{
3189 register unsigned char *pfrom = end;
3190 register unsigned char *pto = end + 5;
3191
3192 while (pfrom != loc)
3193 *--pto = *--pfrom;
5e69f11e 3194
fa9a63c5
RM
3195 store_op2 (op, loc, arg1, arg2);
3196}
3197
3198
3199/* P points to just after a ^ in PATTERN. Return true if that ^ comes
3200 after an alternative or a begin-subexpression. We assume there is at
3201 least one character before the ^. */
3202
3203static boolean
3204at_begline_loc_p (pattern, p, syntax)
99633e97 3205 const unsigned char *pattern, *p;
fa9a63c5
RM
3206 reg_syntax_t syntax;
3207{
4e8a9132 3208 const unsigned char *prev = p - 2;
fa9a63c5 3209 boolean prev_prev_backslash = prev > pattern && prev[-1] == '\\';
5e69f11e 3210
fa9a63c5
RM
3211 return
3212 /* After a subexpression? */
3213 (*prev == '(' && (syntax & RE_NO_BK_PARENS || prev_prev_backslash))
25fe55af 3214 /* After an alternative? */
fa9a63c5
RM
3215 || (*prev == '|' && (syntax & RE_NO_BK_VBAR || prev_prev_backslash));
3216}
3217
3218
3219/* The dual of at_begline_loc_p. This one is for $. We assume there is
3220 at least one character after the $, i.e., `P < PEND'. */
3221
3222static boolean
3223at_endline_loc_p (p, pend, syntax)
99633e97
SM
3224 const unsigned char *p, *pend;
3225 reg_syntax_t syntax;
fa9a63c5 3226{
4e8a9132 3227 const unsigned char *next = p;
fa9a63c5 3228 boolean next_backslash = *next == '\\';
4e8a9132 3229 const unsigned char *next_next = p + 1 < pend ? p + 1 : 0;
5e69f11e 3230
fa9a63c5
RM
3231 return
3232 /* Before a subexpression? */
3233 (syntax & RE_NO_BK_PARENS ? *next == ')'
25fe55af 3234 : next_backslash && next_next && *next_next == ')')
fa9a63c5
RM
3235 /* Before an alternative? */
3236 || (syntax & RE_NO_BK_VBAR ? *next == '|'
25fe55af 3237 : next_backslash && next_next && *next_next == '|');
fa9a63c5
RM
3238}
3239
3240
5e69f11e 3241/* Returns true if REGNUM is in one of COMPILE_STACK's elements and
fa9a63c5
RM
3242 false if it's not. */
3243
3244static boolean
3245group_in_compile_stack (compile_stack, regnum)
3246 compile_stack_type compile_stack;
3247 regnum_t regnum;
3248{
3249 int this_element;
3250
5e69f11e
RM
3251 for (this_element = compile_stack.avail - 1;
3252 this_element >= 0;
fa9a63c5
RM
3253 this_element--)
3254 if (compile_stack.stack[this_element].regnum == regnum)
3255 return true;
3256
3257 return false;
3258}
fa9a63c5
RM
3259\f
3260/* re_compile_fastmap computes a ``fastmap'' for the compiled pattern in
3261 BUFP. A fastmap records which of the (1 << BYTEWIDTH) possible
3262 characters can start a string that matches the pattern. This fastmap
3263 is used by re_search to skip quickly over impossible starting points.
3264
96cc36cc
RS
3265 Character codes above (1 << BYTEWIDTH) are not represented in the
3266 fastmap, but the leading codes are represented. Thus, the fastmap
3267 indicates which character sets could start a match.
3268
fa9a63c5
RM
3269 The caller must supply the address of a (1 << BYTEWIDTH)-byte data
3270 area as BUFP->fastmap.
5e69f11e 3271
fa9a63c5
RM
3272 We set the `fastmap', `fastmap_accurate', and `can_be_null' fields in
3273 the pattern buffer.
3274
3275 Returns 0 if we succeed, -2 if an internal error. */
3276
3277int
3278re_compile_fastmap (bufp)
3279 struct re_pattern_buffer *bufp;
3280{
505bde11 3281 int j, k;
1fb352e0 3282 boolean not;
fa9a63c5
RM
3283#ifdef MATCH_MAY_ALLOCATE
3284 fail_stack_type fail_stack;
3285#endif
3286#ifndef REGEX_MALLOC
3287 char *destination;
3288#endif
5e69f11e 3289
fa9a63c5
RM
3290 register char *fastmap = bufp->fastmap;
3291 unsigned char *pattern = bufp->buffer;
3292 unsigned long size = bufp->used;
3293 unsigned char *p = pattern;
3294 register unsigned char *pend = pattern + size;
1fb352e0 3295 const boolean multibyte = bufp->multibyte;
fa9a63c5 3296
99633e97 3297#if defined (REL_ALLOC) && defined (REGEX_MALLOC)
fa9a63c5
RM
3298 /* This holds the pointer to the failure stack, when
3299 it is allocated relocatably. */
3300 fail_stack_elt_t *failure_stack_ptr;
99633e97 3301#endif
fa9a63c5
RM
3302
3303 /* Assume that each path through the pattern can be null until
25fe55af 3304 proven otherwise. We set this false at the bottom of switch
fa9a63c5
RM
3305 statement, to which we get only if a particular path doesn't
3306 match the empty string. */
3307 boolean path_can_be_null = true;
3308
3309 /* We aren't doing a `succeed_n' to begin with. */
3310 boolean succeed_n_p = false;
3311
b18215fc 3312 /* If all elements for base leading-codes in fastmap is set, this
25fe55af 3313 flag is set true. */
b18215fc
RS
3314 boolean match_any_multibyte_characters = false;
3315
fa9a63c5 3316 assert (fastmap != NULL && p != NULL);
5e69f11e 3317
fa9a63c5 3318 INIT_FAIL_STACK ();
25fe55af 3319 bzero (fastmap, 1 << BYTEWIDTH); /* Assume nothing's valid. */
fa9a63c5
RM
3320 bufp->fastmap_accurate = 1; /* It will be when we're done. */
3321 bufp->can_be_null = 0;
5e69f11e 3322
505bde11
SM
3323 /* The loop below works as follows:
3324 - It has a working-list kept in the PATTERN_STACK and which basically
3325 starts by only containing a pointer to the first operation.
3326 - If the opcode we're looking at is a match against some set of
3327 chars, then we add those chars to the fastmap and go on to the
3328 next work element from the worklist (done via `break').
3329 - If the opcode is a control operator on the other hand, we either
3330 ignore it (if it's meaningless at this point, such as `start_memory')
3331 or execute it (if it's a jump). If the jump has several destinations
3332 (i.e. `on_failure_jump'), then we push the other destination onto the
3333 worklist.
3334 We guarantee termination by ignoring backward jumps (more or less),
3335 so that `p' is monotonically increasing. More to the point, we
3336 never set `p' (or push) anything `<= p1'. */
3337
3338 /* If can_be_null is set, then the fastmap will not be used anyway. */
3339 while (!bufp->can_be_null)
fa9a63c5 3340 {
505bde11
SM
3341 /* `p1' is used as a marker of how far back a `on_failure_jump'
3342 can go without being ignored. It is normally equal to `p'
3343 (which prevents any backward `on_failure_jump') except right
3344 after a plain `jump', to allow patterns such as:
3345 0: jump 10
3346 3..9: <body>
3347 10: on_failure_jump 3
3348 as used for the *? operator. */
3349 unsigned char *p1 = p;
3350
fa9a63c5
RM
3351 if (p == pend || *p == succeed)
3352 {
3353 /* We have reached the (effective) end of pattern. */
505bde11 3354 if (!PATTERN_STACK_EMPTY ())
fa9a63c5
RM
3355 {
3356 bufp->can_be_null |= path_can_be_null;
3357
3358 /* Reset for next path. */
3359 path_can_be_null = true;
3360
66f0296e 3361 p = (unsigned char*) POP_PATTERN_OP ();
fa9a63c5
RM
3362
3363 continue;
3364 }
3365 else
3366 break;
3367 }
3368
25fe55af 3369 /* We should never be about to go beyond the end of the pattern. */
fa9a63c5 3370 assert (p < pend);
5e69f11e 3371
fa9a63c5
RM
3372 switch (SWITCH_ENUM_CAST ((re_opcode_t) *p++))
3373 {
3374
fa9a63c5 3375 case duplicate:
505bde11
SM
3376 /* If the first character has to match a backreference, that means
3377 that the group was empty (since it already matched). Since this
3378 is the only case that interests us here, we can assume that the
3379 backreference must match the empty string. */
3380 p++;
3381 continue;
fa9a63c5
RM
3382
3383
3384 /* Following are the cases which match a character. These end
25fe55af 3385 with `break'. */
fa9a63c5
RM
3386
3387 case exactn:
25fe55af 3388 fastmap[p[1]] = 1;
fa9a63c5
RM
3389 break;
3390
3391
1fb352e0
SM
3392 case anychar:
3393 /* We could put all the chars except for \n (and maybe \0)
3394 but we don't bother since it is generally not worth it. */
3395 bufp->can_be_null = 1;
3396 continue;
fa9a63c5
RM
3397
3398
b18215fc 3399 case charset_not:
ba5c004d
RS
3400 /* Chars beyond end of bitmap are possible matches.
3401 All the single-byte codes can occur in multibyte buffers.
3402 So any that are not listed in the charset
3403 are possible matches, even in multibyte buffers. */
1fb352e0 3404 if (!fastmap) break;
b18215fc 3405 for (j = CHARSET_BITMAP_SIZE (&p[-1]) * BYTEWIDTH;
1fb352e0 3406 j < (1 << BYTEWIDTH); j++)
b18215fc 3407 fastmap[j] = 1;
1fb352e0
SM
3408 /* Fallthrough */
3409 case charset:
3410 if (!fastmap) break;
3411 not = (re_opcode_t) *(p - 1) == charset_not;
b18215fc
RS
3412 for (j = CHARSET_BITMAP_SIZE (&p[-1]) * BYTEWIDTH - 1, p++;
3413 j >= 0; j--)
1fb352e0 3414 if (!!(p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH))) ^ not)
b18215fc
RS
3415 fastmap[j] = 1;
3416
1fb352e0
SM
3417 if ((not && multibyte)
3418 /* Any character set can possibly contain a character
3419 which doesn't match the specified set of characters. */
3420 || (CHARSET_RANGE_TABLE_EXISTS_P (&p[-2])
3421 && CHARSET_RANGE_TABLE_BITS (&p[-2]) != 0))
3422 /* If we can match a character class, we can match
3423 any character set. */
b18215fc
RS
3424 {
3425 set_fastmap_for_multibyte_characters:
3426 if (match_any_multibyte_characters == false)
3427 {
3428 for (j = 0x80; j < 0xA0; j++) /* XXX */
3429 if (BASE_LEADING_CODE_P (j))
3430 fastmap[j] = 1;
3431 match_any_multibyte_characters = true;
3432 }
3433 }
b18215fc 3434
1fb352e0
SM
3435 else if (!not && CHARSET_RANGE_TABLE_EXISTS_P (&p[-2])
3436 && match_any_multibyte_characters == false)
3437 {
3438 /* Set fastmap[I] 1 where I is a base leading code of each
3439 multibyte character in the range table. */
3440 int c, count;
b18215fc 3441
1fb352e0
SM
3442 /* Make P points the range table. `+ 2' is to skip flag
3443 bits for a character class. */
3444 p += CHARSET_BITMAP_SIZE (&p[-2]) + 2;
b18215fc 3445
1fb352e0
SM
3446 /* Extract the number of ranges in range table into COUNT. */
3447 EXTRACT_NUMBER_AND_INCR (count, p);
3448 for (; count > 0; count--, p += 2 * 3) /* XXX */
3449 {
3450 /* Extract the start of each range. */
3451 EXTRACT_CHARACTER (c, p);
3452 j = CHAR_CHARSET (c);
3453 fastmap[CHARSET_LEADING_CODE_BASE (j)] = 1;
3454 }
3455 }
b18215fc
RS
3456 break;
3457
1fb352e0
SM
3458 case syntaxspec:
3459 case notsyntaxspec:
3460 if (!fastmap) break;
3461#ifndef emacs
3462 not = (re_opcode_t)p[-1] == notsyntaxspec;
3463 k = *p++;
3464 for (j = 0; j < (1 << BYTEWIDTH); j++)
89084293 3465 if ((SYNTAX (j) == (re_opcode_t) k) ^ not)
b18215fc 3466 fastmap[j] = 1;
b18215fc 3467 break;
1fb352e0 3468#else /* emacs */
b18215fc
RS
3469 /* This match depends on text properties. These end with
3470 aborting optimizations. */
3471 bufp->can_be_null = 1;
505bde11 3472 continue;
b18215fc
RS
3473
3474 case categoryspec:
b18215fc 3475 case notcategoryspec:
1fb352e0
SM
3476 if (!fastmap) break;
3477 not = (re_opcode_t)p[-1] == notcategoryspec;
b18215fc 3478 k = *p++;
1fb352e0
SM
3479 for (j = 0; j < (1 << BYTEWIDTH); j++)
3480 if ((CHAR_HAS_CATEGORY (j, k)) ^ not)
b18215fc
RS
3481 fastmap[j] = 1;
3482
1fb352e0 3483 if (multibyte)
b18215fc 3484 /* Any character set can possibly contain a character
1fb352e0 3485 whose category is K (or not). */
b18215fc
RS
3486 goto set_fastmap_for_multibyte_characters;
3487 break;
3488
fa9a63c5 3489 /* All cases after this match the empty string. These end with
25fe55af 3490 `continue'. */
fa9a63c5 3491
fa9a63c5
RM
3492 case before_dot:
3493 case at_dot:
3494 case after_dot:
1fb352e0 3495#endif /* !emacs */
25fe55af
RS
3496 case no_op:
3497 case begline:
3498 case endline:
fa9a63c5
RM
3499 case begbuf:
3500 case endbuf:
3501 case wordbound:
3502 case notwordbound:
3503 case wordbeg:
3504 case wordend:
25fe55af 3505 continue;
fa9a63c5
RM
3506
3507
3508 case jump_n:
fa9a63c5 3509 case jump:
25fe55af 3510 EXTRACT_NUMBER_AND_INCR (j, p);
505bde11
SM
3511 if (j < 0)
3512 /* Backward jumps can only go back to code that we've already
3513 visited. `re_compile' should make sure this is true. */
3514 break;
25fe55af 3515 p += j;
505bde11
SM
3516 switch (SWITCH_ENUM_CAST ((re_opcode_t) *p))
3517 {
3518 case on_failure_jump:
3519 case on_failure_keep_string_jump:
505bde11
SM
3520 case on_failure_jump_loop:
3521 case on_failure_jump_smart:
3522 p++;
3523 break;
3524 default:
3525 continue;
3526 };
3527 /* Keep `p1' to allow the `on_failure_jump' we are jumping to
3528 to jump back to "just after here". */
3529 /* Fallthrough */
fa9a63c5 3530
25fe55af
RS
3531 case on_failure_jump:
3532 case on_failure_keep_string_jump:
505bde11
SM
3533 case on_failure_jump_loop:
3534 case on_failure_jump_smart:
fa9a63c5 3535 handle_on_failure_jump:
25fe55af
RS
3536 EXTRACT_NUMBER_AND_INCR (j, p);
3537
3538 /* For some patterns, e.g., `(a?)?', `p+j' here points to the
3539 end of the pattern. We don't want to push such a point,
3540 since when we restore it above, entering the switch will
3541 increment `p' past the end of the pattern. We don't need
3542 to push such a point since we obviously won't find any more
3543 fastmap entries beyond `pend'. Such a pattern can match
3544 the null string, though. */
505bde11
SM
3545 if (p + j <= p1)
3546 /* Backward jump to be ignored. */
3547 ;
3548 else if (p + j < pend)
25fe55af
RS
3549 {
3550 if (!PUSH_PATTERN_OP (p + j, fail_stack))
fa9a63c5
RM
3551 {
3552 RESET_FAIL_STACK ();
3553 return -2;
3554 }
fa9a63c5 3555 }
25fe55af
RS
3556 else
3557 bufp->can_be_null = 1;
fa9a63c5 3558
25fe55af
RS
3559 if (succeed_n_p)
3560 {
3561 EXTRACT_NUMBER_AND_INCR (k, p); /* Skip the n. */
3562 succeed_n_p = false;
3563 }
3564
3565 continue;
fa9a63c5
RM
3566
3567
3568 case succeed_n:
25fe55af
RS
3569 /* Get to the number of times to succeed. */
3570 p += 2;
fa9a63c5 3571
25fe55af
RS
3572 /* Increment p past the n for when k != 0. */
3573 EXTRACT_NUMBER_AND_INCR (k, p);
3574 if (k == 0)
fa9a63c5 3575 {
25fe55af
RS
3576 p -= 4;
3577 succeed_n_p = true; /* Spaghetti code alert. */
3578 goto handle_on_failure_jump;
3579 }
3580 continue;
fa9a63c5
RM
3581
3582
3583 case set_number_at:
25fe55af
RS
3584 p += 4;
3585 continue;
fa9a63c5
RM
3586
3587
3588 case start_memory:
25fe55af 3589 case stop_memory:
505bde11 3590 p += 1;
fa9a63c5
RM
3591 continue;
3592
3593
3594 default:
25fe55af
RS
3595 abort (); /* We have listed all the cases. */
3596 } /* switch *p++ */
fa9a63c5
RM
3597
3598 /* Getting here means we have found the possible starting
25fe55af
RS
3599 characters for one path of the pattern -- and that the empty
3600 string does not match. We need not follow this path further.
3601 Instead, look at the next alternative (remembered on the
3602 stack), or quit if no more. The test at the top of the loop
3603 does these things. */
fa9a63c5
RM
3604 path_can_be_null = false;
3605 p = pend;
3606 } /* while p */
3607
3608 /* Set `can_be_null' for the last path (also the first path, if the
25fe55af 3609 pattern is empty). */
fa9a63c5 3610 bufp->can_be_null |= path_can_be_null;
fa9a63c5
RM
3611 RESET_FAIL_STACK ();
3612 return 0;
3613} /* re_compile_fastmap */
3614\f
3615/* Set REGS to hold NUM_REGS registers, storing them in STARTS and
3616 ENDS. Subsequent matches using PATTERN_BUFFER and REGS will use
3617 this memory for recording register information. STARTS and ENDS
3618 must be allocated using the malloc library routine, and must each
3619 be at least NUM_REGS * sizeof (regoff_t) bytes long.
3620
3621 If NUM_REGS == 0, then subsequent matches should allocate their own
3622 register data.
3623
3624 Unless this function is called, the first search or match using
3625 PATTERN_BUFFER will allocate its own register data, without
3626 freeing the old data. */
3627
3628void
3629re_set_registers (bufp, regs, num_regs, starts, ends)
3630 struct re_pattern_buffer *bufp;
3631 struct re_registers *regs;
3632 unsigned num_regs;
3633 regoff_t *starts, *ends;
3634{
3635 if (num_regs)
3636 {
3637 bufp->regs_allocated = REGS_REALLOCATE;
3638 regs->num_regs = num_regs;
3639 regs->start = starts;
3640 regs->end = ends;
3641 }
3642 else
3643 {
3644 bufp->regs_allocated = REGS_UNALLOCATED;
3645 regs->num_regs = 0;
3646 regs->start = regs->end = (regoff_t *) 0;
3647 }
3648}
3649\f
25fe55af 3650/* Searching routines. */
fa9a63c5
RM
3651
3652/* Like re_search_2, below, but only one string is specified, and
3653 doesn't let you say where to stop matching. */
3654
3655int
3656re_search (bufp, string, size, startpos, range, regs)
3657 struct re_pattern_buffer *bufp;
3658 const char *string;
3659 int size, startpos, range;
3660 struct re_registers *regs;
3661{
5e69f11e 3662 return re_search_2 (bufp, NULL, 0, string, size, startpos, range,
fa9a63c5
RM
3663 regs, size);
3664}
3665
b18215fc
RS
3666/* End address of virtual concatenation of string. */
3667#define STOP_ADDR_VSTRING(P) \
3668 (((P) >= size1 ? string2 + size2 : string1 + size1))
3669
3670/* Address of POS in the concatenation of virtual string. */
3671#define POS_ADDR_VSTRING(POS) \
3672 (((POS) >= size1 ? string2 - size1 : string1) + (POS))
fa9a63c5
RM
3673
3674/* Using the compiled pattern in BUFP->buffer, first tries to match the
3675 virtual concatenation of STRING1 and STRING2, starting first at index
3676 STARTPOS, then at STARTPOS + 1, and so on.
5e69f11e 3677
fa9a63c5 3678 STRING1 and STRING2 have length SIZE1 and SIZE2, respectively.
5e69f11e 3679
fa9a63c5
RM
3680 RANGE is how far to scan while trying to match. RANGE = 0 means try
3681 only at STARTPOS; in general, the last start tried is STARTPOS +
3682 RANGE.
5e69f11e 3683
fa9a63c5
RM
3684 In REGS, return the indices of the virtual concatenation of STRING1
3685 and STRING2 that matched the entire BUFP->buffer and its contained
3686 subexpressions.
5e69f11e 3687
fa9a63c5
RM
3688 Do not consider matching one past the index STOP in the virtual
3689 concatenation of STRING1 and STRING2.
3690
3691 We return either the position in the strings at which the match was
3692 found, -1 if no match, or -2 if error (such as failure
3693 stack overflow). */
3694
3695int
66f0296e 3696re_search_2 (bufp, str1, size1, str2, size2, startpos, range, regs, stop)
fa9a63c5 3697 struct re_pattern_buffer *bufp;
66f0296e 3698 const char *str1, *str2;
fa9a63c5
RM
3699 int size1, size2;
3700 int startpos;
3701 int range;
3702 struct re_registers *regs;
3703 int stop;
3704{
3705 int val;
66f0296e
SM
3706 re_char *string1 = (re_char*) str1;
3707 re_char *string2 = (re_char*) str2;
fa9a63c5 3708 register char *fastmap = bufp->fastmap;
6676cb1c 3709 register RE_TRANSLATE_TYPE translate = bufp->translate;
fa9a63c5
RM
3710 int total_size = size1 + size2;
3711 int endpos = startpos + range;
c8499ba5 3712 int anchored_start = 0;
fa9a63c5 3713
25fe55af 3714 /* Nonzero if we have to concern multibyte character. */
b18215fc
RS
3715 int multibyte = bufp->multibyte;
3716
fa9a63c5
RM
3717 /* Check for out-of-range STARTPOS. */
3718 if (startpos < 0 || startpos > total_size)
3719 return -1;
5e69f11e 3720
fa9a63c5 3721 /* Fix up RANGE if it might eventually take us outside
34597fa9 3722 the virtual concatenation of STRING1 and STRING2.
5e69f11e 3723 Make sure we won't move STARTPOS below 0 or above TOTAL_SIZE. */
34597fa9
RS
3724 if (endpos < 0)
3725 range = 0 - startpos;
fa9a63c5
RM
3726 else if (endpos > total_size)
3727 range = total_size - startpos;
3728
3729 /* If the search isn't to be a backwards one, don't waste time in a
7b140fd7 3730 search for a pattern anchored at beginning of buffer. */
fa9a63c5
RM
3731 if (bufp->used > 0 && (re_opcode_t) bufp->buffer[0] == begbuf && range > 0)
3732 {
3733 if (startpos > 0)
3734 return -1;
3735 else
7b140fd7 3736 range = 0;
fa9a63c5
RM
3737 }
3738
ae4788a8
RS
3739#ifdef emacs
3740 /* In a forward search for something that starts with \=.
3741 don't keep searching past point. */
3742 if (bufp->used > 0 && (re_opcode_t) bufp->buffer[0] == at_dot && range > 0)
3743 {
7b140fd7
RS
3744 range = PT_BYTE - BEGV_BYTE - startpos;
3745 if (range < 0)
ae4788a8
RS
3746 return -1;
3747 }
3748#endif /* emacs */
3749
fa9a63c5
RM
3750 /* Update the fastmap now if not correct already. */
3751 if (fastmap && !bufp->fastmap_accurate)
3752 if (re_compile_fastmap (bufp) == -2)
3753 return -2;
5e69f11e 3754
c8499ba5
RS
3755 /* See whether the pattern is anchored. */
3756 if (bufp->buffer[0] == begline)
3757 anchored_start = 1;
3758
b18215fc 3759#ifdef emacs
cc9b4df2
KH
3760 gl_state.object = re_match_object;
3761 {
99633e97 3762 int charpos = SYNTAX_TABLE_BYTE_TO_CHAR (POS_AS_IN_BUFFER (startpos));
cc9b4df2
KH
3763
3764 SETUP_SYNTAX_TABLE_FOR_OBJECT (re_match_object, charpos, 1);
3765 }
b18215fc
RS
3766#endif
3767
fa9a63c5
RM
3768 /* Loop through the string, looking for a place to start matching. */
3769 for (;;)
5e69f11e 3770 {
c8499ba5
RS
3771 /* If the pattern is anchored,
3772 skip quickly past places we cannot match.
3773 We don't bother to treat startpos == 0 specially
3774 because that case doesn't repeat. */
3775 if (anchored_start && startpos > 0)
3776 {
3777 if (! (bufp->newline_anchor
3778 && ((startpos <= size1 ? string1[startpos - 1]
3779 : string2[startpos - size1 - 1])
3780 == '\n')))
3781 goto advance;
3782 }
3783
fa9a63c5 3784 /* If a fastmap is supplied, skip quickly over characters that
25fe55af
RS
3785 cannot be the start of a match. If the pattern can match the
3786 null string, however, we don't need to skip characters; we want
3787 the first null string. */
fa9a63c5
RM
3788 if (fastmap && startpos < total_size && !bufp->can_be_null)
3789 {
66f0296e 3790 register re_char *d;
e934739e
RS
3791 register unsigned int buf_ch;
3792
3793 d = POS_ADDR_VSTRING (startpos);
3794
25fe55af 3795 if (range > 0) /* Searching forwards. */
fa9a63c5 3796 {
fa9a63c5
RM
3797 register int lim = 0;
3798 int irange = range;
3799
25fe55af
RS
3800 if (startpos < size1 && startpos + range >= size1)
3801 lim = range - (size1 - startpos);
fa9a63c5 3802
25fe55af
RS
3803 /* Written out as an if-else to avoid testing `translate'
3804 inside the loop. */
28ae27ae
AS
3805 if (RE_TRANSLATE_P (translate))
3806 {
e934739e
RS
3807 if (multibyte)
3808 while (range > lim)
3809 {
3810 int buf_charlen;
3811
3812 buf_ch = STRING_CHAR_AND_LENGTH (d, range - lim,
3813 buf_charlen);
3814
3815 buf_ch = RE_TRANSLATE (translate, buf_ch);
3816 if (buf_ch >= 0400
3817 || fastmap[buf_ch])
3818 break;
3819
3820 range -= buf_charlen;
3821 d += buf_charlen;
3822 }
3823 else
3824 while (range > lim
66f0296e 3825 && !fastmap[RE_TRANSLATE (translate, *d)])
33c46939
RS
3826 {
3827 d++;
3828 range--;
3829 }
e934739e 3830 }
fa9a63c5 3831 else
66f0296e 3832 while (range > lim && !fastmap[*d])
33c46939
RS
3833 {
3834 d++;
3835 range--;
3836 }
fa9a63c5
RM
3837
3838 startpos += irange - range;
3839 }
25fe55af 3840 else /* Searching backwards. */
fa9a63c5 3841 {
4e8a9132
SM
3842 buf_ch = STRING_CHAR (d, (startpos >= size1
3843 ? size2 + size1 - startpos
3844 : size1 - startpos));
28703c16 3845 if (RE_TRANSLATE_P (translate))
e934739e 3846 buf_ch = RE_TRANSLATE (translate, buf_ch);
fa9a63c5 3847
e934739e
RS
3848 if (! (buf_ch >= 0400
3849 || fastmap[buf_ch]))
fa9a63c5
RM
3850 goto advance;
3851 }
3852 }
3853
3854 /* If can't match the null string, and that's all we have left, fail. */
3855 if (range >= 0 && startpos == total_size && fastmap
25fe55af 3856 && !bufp->can_be_null)
fa9a63c5
RM
3857 return -1;
3858
3859 val = re_match_2_internal (bufp, string1, size1, string2, size2,
3860 startpos, regs, stop);
3861#ifndef REGEX_MALLOC
3862#ifdef C_ALLOCA
3863 alloca (0);
3864#endif
3865#endif
3866
3867 if (val >= 0)
3868 return startpos;
5e69f11e 3869
fa9a63c5
RM
3870 if (val == -2)
3871 return -2;
3872
3873 advance:
5e69f11e 3874 if (!range)
25fe55af 3875 break;
5e69f11e 3876 else if (range > 0)
25fe55af 3877 {
b18215fc
RS
3878 /* Update STARTPOS to the next character boundary. */
3879 if (multibyte)
3880 {
66f0296e
SM
3881 re_char *p = POS_ADDR_VSTRING (startpos);
3882 re_char *pend = STOP_ADDR_VSTRING (startpos);
b18215fc
RS
3883 int len = MULTIBYTE_FORM_LENGTH (p, pend - p);
3884
3885 range -= len;
3886 if (range < 0)
3887 break;
3888 startpos += len;
3889 }
3890 else
3891 {
b560c397
RS
3892 range--;
3893 startpos++;
3894 }
e318085a 3895 }
fa9a63c5 3896 else
25fe55af
RS
3897 {
3898 range++;
3899 startpos--;
b18215fc
RS
3900
3901 /* Update STARTPOS to the previous character boundary. */
3902 if (multibyte)
3903 {
66f0296e 3904 re_char *p = POS_ADDR_VSTRING (startpos);
b18215fc
RS
3905 int len = 0;
3906
3907 /* Find the head of multibyte form. */
5d967c7a 3908 while (!CHAR_HEAD_P (*p))
b18215fc
RS
3909 p--, len++;
3910
3911 /* Adjust it. */
3912#if 0 /* XXX */
3913 if (MULTIBYTE_FORM_LENGTH (p, len + 1) != (len + 1))
3914 ;
3915 else
3916#endif
3917 {
3918 range += len;
3919 if (range > 0)
3920 break;
3921
3922 startpos -= len;
3923 }
3924 }
25fe55af 3925 }
fa9a63c5
RM
3926 }
3927 return -1;
3928} /* re_search_2 */
3929\f
3930/* Declarations and macros for re_match_2. */
3931
3932static int bcmp_translate ();
fa9a63c5
RM
3933
3934/* This converts PTR, a pointer into one of the search strings `string1'
3935 and `string2' into an offset from the beginning of that string. */
3936#define POINTER_TO_OFFSET(ptr) \
3937 (FIRST_STRING_P (ptr) \
3938 ? ((regoff_t) ((ptr) - string1)) \
3939 : ((regoff_t) ((ptr) - string2 + size1)))
3940
fa9a63c5
RM
3941/* Call before fetching a character with *d. This switches over to
3942 string2 if necessary. */
3943#define PREFETCH() \
25fe55af 3944 while (d == dend) \
fa9a63c5
RM
3945 { \
3946 /* End of string2 => fail. */ \
25fe55af
RS
3947 if (dend == end_match_2) \
3948 goto fail; \
3949 /* End of string1 => advance to string2. */ \
3950 d = string2; \
fa9a63c5
RM
3951 dend = end_match_2; \
3952 }
3953
3954
3955/* Test if at very beginning or at very end of the virtual concatenation
25fe55af 3956 of `string1' and `string2'. If only one string, it's `string2'. */
fa9a63c5 3957#define AT_STRINGS_BEG(d) ((d) == (size1 ? string1 : string2) || !size2)
5e69f11e 3958#define AT_STRINGS_END(d) ((d) == end2)
fa9a63c5
RM
3959
3960
3961/* Test if D points to a character which is word-constituent. We have
3962 two special cases to check for: if past the end of string1, look at
3963 the first character in string2; and if before the beginning of
3964 string2, look at the last character in string1. */
3965#define WORDCHAR_P(d) \
3966 (SYNTAX ((d) == end1 ? *string2 \
25fe55af 3967 : (d) == string2 - 1 ? *(end1 - 1) : *(d)) \
fa9a63c5
RM
3968 == Sword)
3969
9121ca40 3970/* Disabled due to a compiler bug -- see comment at case wordbound */
b18215fc
RS
3971
3972/* The comment at case wordbound is following one, but we don't use
3973 AT_WORD_BOUNDARY anymore to support multibyte form.
3974
3975 The DEC Alpha C compiler 3.x generates incorrect code for the
25fe55af
RS
3976 test WORDCHAR_P (d - 1) != WORDCHAR_P (d) in the expansion of
3977 AT_WORD_BOUNDARY, so this code is disabled. Expanding the
b18215fc
RS
3978 macro and introducing temporary variables works around the bug. */
3979
9121ca40 3980#if 0
fa9a63c5
RM
3981/* Test if the character before D and the one at D differ with respect
3982 to being word-constituent. */
3983#define AT_WORD_BOUNDARY(d) \
3984 (AT_STRINGS_BEG (d) || AT_STRINGS_END (d) \
3985 || WORDCHAR_P (d - 1) != WORDCHAR_P (d))
9121ca40 3986#endif
fa9a63c5
RM
3987
3988/* Free everything we malloc. */
3989#ifdef MATCH_MAY_ALLOCATE
00049484 3990#define FREE_VAR(var) if (var) { REGEX_FREE (var); var = NULL; } else
fa9a63c5
RM
3991#define FREE_VARIABLES() \
3992 do { \
3993 REGEX_FREE_STACK (fail_stack.stack); \
3994 FREE_VAR (regstart); \
3995 FREE_VAR (regend); \
fa9a63c5
RM
3996 FREE_VAR (best_regstart); \
3997 FREE_VAR (best_regend); \
fa9a63c5
RM
3998 } while (0)
3999#else
4000#define FREE_VARIABLES() ((void)0) /* Do nothing! But inhibit gcc warning. */
4001#endif /* not MATCH_MAY_ALLOCATE */
4002
505bde11
SM
4003\f
4004/* Optimization routines. */
4005
4e8a9132
SM
4006/* If the operation is a match against one or more chars,
4007 return a pointer to the next operation, else return NULL. */
4008static unsigned char *
4009skip_one_char (p)
4010 unsigned char *p;
4011{
4012 switch (SWITCH_ENUM_CAST (*p++))
4013 {
4014 case anychar:
4015 break;
4016
4017 case exactn:
4018 p += *p + 1;
4019 break;
4020
4021 case charset_not:
4022 case charset:
4023 if (CHARSET_RANGE_TABLE_EXISTS_P (p - 1))
4024 {
4025 int mcnt;
4026 p = CHARSET_RANGE_TABLE (p - 1);
4027 EXTRACT_NUMBER_AND_INCR (mcnt, p);
4028 p = CHARSET_RANGE_TABLE_END (p, mcnt);
4029 }
4030 else
4031 p += 1 + CHARSET_BITMAP_SIZE (p - 1);
4032 break;
4033
4e8a9132
SM
4034 case syntaxspec:
4035 case notsyntaxspec:
1fb352e0 4036#ifdef emacs
4e8a9132
SM
4037 case categoryspec:
4038 case notcategoryspec:
4039#endif /* emacs */
4040 p++;
4041 break;
4042
4043 default:
4044 p = NULL;
4045 }
4046 return p;
4047}
4048
4049
505bde11
SM
4050/* Jump over non-matching operations. */
4051static unsigned char *
4e8a9132 4052skip_noops (p, pend)
505bde11 4053 unsigned char *p, *pend;
505bde11
SM
4054{
4055 int mcnt;
4056 while (p < pend)
4057 {
4058 switch (SWITCH_ENUM_CAST ((re_opcode_t) *p))
4059 {
4060 case start_memory:
505bde11
SM
4061 case stop_memory:
4062 p += 2; break;
4063 case no_op:
4064 p += 1; break;
4065 case jump:
4066 p += 1;
4067 EXTRACT_NUMBER_AND_INCR (mcnt, p);
4068 p += mcnt;
4069 break;
4070 default:
4071 return p;
4072 }
4073 }
4074 assert (p == pend);
4075 return p;
4076}
4077
4078/* Non-zero if "p1 matches something" implies "p2 fails". */
4079static int
4080mutually_exclusive_p (bufp, p1, p2)
4081 struct re_pattern_buffer *bufp;
4082 unsigned char *p1, *p2;
4083{
4e8a9132
SM
4084 re_opcode_t op2;
4085 const boolean multibyte = bufp->multibyte;
505bde11
SM
4086 unsigned char *pend = bufp->buffer + bufp->used;
4087
4e8a9132 4088 assert (p1 >= bufp->buffer && p1 < pend
505bde11
SM
4089 && p2 >= bufp->buffer && p2 <= pend);
4090
4091 /* Skip over open/close-group commands.
4092 If what follows this loop is a ...+ construct,
4093 look at what begins its body, since we will have to
4094 match at least one of that. */
4e8a9132
SM
4095 p2 = skip_noops (p2, pend);
4096 /* The same skip can be done for p1, except that this function
4097 is only used in the case where p1 is a simple match operator. */
4098 /* p1 = skip_noops (p1, pend); */
4099
4100 assert (p1 >= bufp->buffer && p1 < pend
4101 && p2 >= bufp->buffer && p2 <= pend);
4102
4103 op2 = p2 == pend ? succeed : *p2;
4104
4105 switch (SWITCH_ENUM_CAST (op2))
505bde11 4106 {
4e8a9132
SM
4107 case succeed:
4108 case endbuf:
4109 /* If we're at the end of the pattern, we can change. */
4110 if (skip_one_char (p1))
505bde11 4111 {
505bde11
SM
4112 DEBUG_PRINT1 (" End of pattern: fast loop.\n");
4113 return 1;
505bde11 4114 }
4e8a9132
SM
4115 break;
4116
4117 case endline:
4118 if (!bufp->newline_anchor)
4119 break;
4120 /* Fallthrough */
4121 case exactn:
4122 {
4123 register unsigned int c
4124 = (re_opcode_t) *p2 == endline ? '\n'
4125 : RE_STRING_CHAR(p2 + 2, pend - p2 - 2);
505bde11 4126
4e8a9132
SM
4127 if ((re_opcode_t) *p1 == exactn)
4128 {
4129 if (c != RE_STRING_CHAR (p1 + 2, pend - p1 - 2))
4130 {
4131 DEBUG_PRINT3 (" '%c' != '%c' => fast loop.\n", c, p1[2]);
4132 return 1;
4133 }
4134 }
505bde11 4135
4e8a9132
SM
4136 else if ((re_opcode_t) *p1 == charset
4137 || (re_opcode_t) *p1 == charset_not)
4138 {
4139 int not = (re_opcode_t) *p1 == charset_not;
505bde11 4140
4e8a9132
SM
4141 /* Test if C is listed in charset (or charset_not)
4142 at `p1'. */
4143 if (SINGLE_BYTE_CHAR_P (c))
4144 {
4145 if (c < CHARSET_BITMAP_SIZE (p1) * BYTEWIDTH
4146 && p1[2 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH)))
4147 not = !not;
4148 }
4149 else if (CHARSET_RANGE_TABLE_EXISTS_P (p1))
4150 CHARSET_LOOKUP_RANGE_TABLE (not, c, p1);
505bde11 4151
4e8a9132
SM
4152 /* `not' is equal to 1 if c would match, which means
4153 that we can't change to pop_failure_jump. */
4154 if (!not)
4155 {
4156 DEBUG_PRINT1 (" No match => fast loop.\n");
4157 return 1;
4158 }
4159 }
4160 else if ((re_opcode_t) *p1 == anychar
4161 && c == '\n')
4162 {
4163 DEBUG_PRINT1 (" . != \\n => fast loop.\n");
4164 return 1;
4165 }
4166 }
4167 break;
505bde11 4168
4e8a9132
SM
4169 case charset:
4170 case charset_not:
4171 {
4172 if ((re_opcode_t) *p1 == exactn)
4173 /* Reuse the code above. */
4174 return mutually_exclusive_p (bufp, p2, p1);
505bde11
SM
4175
4176
4177 /* It is hard to list up all the character in charset
4178 P2 if it includes multibyte character. Give up in
4179 such case. */
4180 else if (!multibyte || !CHARSET_RANGE_TABLE_EXISTS_P (p2))
4181 {
4182 /* Now, we are sure that P2 has no range table.
4183 So, for the size of bitmap in P2, `p2[1]' is
4184 enough. But P1 may have range table, so the
4185 size of bitmap table of P1 is extracted by
4186 using macro `CHARSET_BITMAP_SIZE'.
4187
4188 Since we know that all the character listed in
4189 P2 is ASCII, it is enough to test only bitmap
4190 table of P1. */
4191
4192 if (*p1 == *p2)
4193 {
4194 int idx;
4195 /* We win if the charset inside the loop
4196 has no overlap with the one after the loop. */
4197 for (idx = 0;
4198 (idx < (int) p2[1]
4199 && idx < CHARSET_BITMAP_SIZE (p1));
4200 idx++)
4201 if ((p2[2 + idx] & p1[2 + idx]) != 0)
4202 break;
4203
4204 if (idx == p2[1]
4205 || idx == CHARSET_BITMAP_SIZE (p1))
4206 {
4207 DEBUG_PRINT1 (" No match => fast loop.\n");
4208 return 1;
4209 }
4210 }
4211 else if ((re_opcode_t) *p1 == charset
4212 || (re_opcode_t) *p1 == charset_not)
4213 {
4214 int idx;
4215 /* We win if the charset_not inside the loop lists
4216 every character listed in the charset after. */
4217 for (idx = 0; idx < (int) p2[1]; idx++)
4218 if (! (p2[2 + idx] == 0
4219 || (idx < CHARSET_BITMAP_SIZE (p1)
4220 && ((p2[2 + idx] & ~ p1[2 + idx]) == 0))))
4221 break;
4222
4e8a9132
SM
4223 if (idx == p2[1])
4224 {
4225 DEBUG_PRINT1 (" No match => fast loop.\n");
4226 return 1;
4227 }
4228 }
4229 }
4230 }
4231
4e8a9132
SM
4232 case wordend:
4233 case notsyntaxspec:
4234 return ((re_opcode_t) *p1 == syntaxspec
4235 && p1[1] == (op2 == wordend ? Sword : p2[1]));
4236
4237 case wordbeg:
4238 case syntaxspec:
4239 return ((re_opcode_t) *p1 == notsyntaxspec
4240 && p1[1] == (op2 == wordend ? Sword : p2[1]));
4241
4242 case wordbound:
4243 return (((re_opcode_t) *p1 == notsyntaxspec
4244 || (re_opcode_t) *p1 == syntaxspec)
4245 && p1[1] == Sword);
4246
1fb352e0 4247#ifdef emacs
4e8a9132
SM
4248 case categoryspec:
4249 return ((re_opcode_t) *p1 == notcategoryspec && p1[1] == p2[1]);
4250 case notcategoryspec:
4251 return ((re_opcode_t) *p1 == categoryspec && p1[1] == p2[1]);
4252#endif /* emacs */
4253
4254 default:
4255 ;
505bde11
SM
4256 }
4257
4258 /* Safe default. */
4259 return 0;
4260}
4261
fa9a63c5
RM
4262\f
4263/* Matching routines. */
4264
25fe55af 4265#ifndef emacs /* Emacs never uses this. */
fa9a63c5
RM
4266/* re_match is like re_match_2 except it takes only a single string. */
4267
4268int
4269re_match (bufp, string, size, pos, regs)
4270 struct re_pattern_buffer *bufp;
4271 const char *string;
4272 int size, pos;
4273 struct re_registers *regs;
4274{
4275 int result = re_match_2_internal (bufp, NULL, 0, string, size,
4276 pos, regs, size);
4277 alloca (0);
4278 return result;
4279}
4280#endif /* not emacs */
4281
b18215fc
RS
4282#ifdef emacs
4283/* In Emacs, this is the string or buffer in which we
25fe55af 4284 are matching. It is used for looking up syntax properties. */
b18215fc
RS
4285Lisp_Object re_match_object;
4286#endif
fa9a63c5
RM
4287
4288/* re_match_2 matches the compiled pattern in BUFP against the
4289 the (virtual) concatenation of STRING1 and STRING2 (of length SIZE1
4290 and SIZE2, respectively). We start matching at POS, and stop
4291 matching at STOP.
5e69f11e 4292
fa9a63c5 4293 If REGS is non-null and the `no_sub' field of BUFP is nonzero, we
25fe55af 4294 store offsets for the substring each group matched in REGS. See the
fa9a63c5
RM
4295 documentation for exactly how many groups we fill.
4296
4297 We return -1 if no match, -2 if an internal error (such as the
25fe55af 4298 failure stack overflowing). Otherwise, we return the length of the
fa9a63c5
RM
4299 matched substring. */
4300
4301int
4302re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop)
4303 struct re_pattern_buffer *bufp;
4304 const char *string1, *string2;
4305 int size1, size2;
4306 int pos;
4307 struct re_registers *regs;
4308 int stop;
4309{
b18215fc 4310 int result;
25fe55af 4311
b18215fc 4312#ifdef emacs
cc9b4df2
KH
4313 int charpos;
4314 gl_state.object = re_match_object;
99633e97 4315 charpos = SYNTAX_TABLE_BYTE_TO_CHAR (POS_AS_IN_BUFFER (pos));
cc9b4df2 4316 SETUP_SYNTAX_TABLE_FOR_OBJECT (re_match_object, charpos, 1);
b18215fc
RS
4317#endif
4318
4319 result = re_match_2_internal (bufp, string1, size1, string2, size2,
cc9b4df2 4320 pos, regs, stop);
fa9a63c5
RM
4321 alloca (0);
4322 return result;
4323}
4324
4325/* This is a separate function so that we can force an alloca cleanup
25fe55af 4326 afterwards. */
fa9a63c5
RM
4327static int
4328re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
4329 struct re_pattern_buffer *bufp;
66f0296e 4330 re_char *string1, *string2;
fa9a63c5
RM
4331 int size1, size2;
4332 int pos;
4333 struct re_registers *regs;
4334 int stop;
4335{
4336 /* General temporaries. */
4337 int mcnt;
66f0296e 4338 boolean not;
fa9a63c5
RM
4339 unsigned char *p1;
4340
4341 /* Just past the end of the corresponding string. */
66f0296e 4342 re_char *end1, *end2;
fa9a63c5
RM
4343
4344 /* Pointers into string1 and string2, just past the last characters in
25fe55af 4345 each to consider matching. */
66f0296e 4346 re_char *end_match_1, *end_match_2;
fa9a63c5
RM
4347
4348 /* Where we are in the data, and the end of the current string. */
66f0296e 4349 re_char *d, *dend;
5e69f11e 4350
99633e97
SM
4351 /* Used sometimes to remember where we were before starting matching
4352 an operator so that we can go back in case of failure. This "atomic"
4353 behavior of matching opcodes is indispensable to the correctness
4354 of the on_failure_keep_string_jump optimization. */
4355 re_char *dfail;
4356
fa9a63c5
RM
4357 /* Where we are in the pattern, and the end of the pattern. */
4358 unsigned char *p = bufp->buffer;
4359 register unsigned char *pend = p + bufp->used;
4360
25fe55af 4361 /* We use this to map every character in the string. */
6676cb1c 4362 RE_TRANSLATE_TYPE translate = bufp->translate;
fa9a63c5 4363
25fe55af 4364 /* Nonzero if we have to concern multibyte character. */
1fb352e0 4365 const boolean multibyte = bufp->multibyte;
b18215fc 4366
fa9a63c5
RM
4367 /* Failure point stack. Each place that can handle a failure further
4368 down the line pushes a failure point on this stack. It consists of
505bde11 4369 regstart, and regend for all registers corresponding to
fa9a63c5
RM
4370 the subexpressions we're currently inside, plus the number of such
4371 registers, and, finally, two char *'s. The first char * is where
4372 to resume scanning the pattern; the second one is where to resume
505bde11 4373 scanning the strings. */
25fe55af 4374#ifdef MATCH_MAY_ALLOCATE /* otherwise, this is global. */
fa9a63c5
RM
4375 fail_stack_type fail_stack;
4376#endif
4377#ifdef DEBUG
4378 static unsigned failure_id = 0;
4379 unsigned nfailure_points_pushed = 0, nfailure_points_popped = 0;
4380#endif
4381
99633e97 4382#if defined (REL_ALLOC) && defined (REGEX_MALLOC)
fa9a63c5
RM
4383 /* This holds the pointer to the failure stack, when
4384 it is allocated relocatably. */
4385 fail_stack_elt_t *failure_stack_ptr;
99633e97 4386#endif
fa9a63c5
RM
4387
4388 /* We fill all the registers internally, independent of what we
25fe55af 4389 return, for use in backreferences. The number here includes
fa9a63c5
RM
4390 an element for register zero. */
4391 unsigned num_regs = bufp->re_nsub + 1;
5e69f11e 4392
fa9a63c5
RM
4393 /* Information on the contents of registers. These are pointers into
4394 the input strings; they record just what was matched (on this
4395 attempt) by a subexpression part of the pattern, that is, the
4396 regnum-th regstart pointer points to where in the pattern we began
4397 matching and the regnum-th regend points to right after where we
4398 stopped matching the regnum-th subexpression. (The zeroth register
4399 keeps track of what the whole pattern matches.) */
4400#ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global. */
66f0296e 4401 re_char **regstart, **regend;
fa9a63c5
RM
4402#endif
4403
fa9a63c5 4404 /* The following record the register info as found in the above
5e69f11e 4405 variables when we find a match better than any we've seen before.
fa9a63c5
RM
4406 This happens as we backtrack through the failure points, which in
4407 turn happens only if we have not yet matched the entire string. */
4408 unsigned best_regs_set = false;
4409#ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global. */
66f0296e 4410 re_char **best_regstart, **best_regend;
fa9a63c5 4411#endif
5e69f11e 4412
fa9a63c5
RM
4413 /* Logically, this is `best_regend[0]'. But we don't want to have to
4414 allocate space for that if we're not allocating space for anything
25fe55af 4415 else (see below). Also, we never need info about register 0 for
fa9a63c5
RM
4416 any of the other register vectors, and it seems rather a kludge to
4417 treat `best_regend' differently than the rest. So we keep track of
4418 the end of the best match so far in a separate variable. We
4419 initialize this to NULL so that when we backtrack the first time
4420 and need to test it, it's not garbage. */
66f0296e 4421 re_char *match_end = NULL;
fa9a63c5 4422
fa9a63c5
RM
4423#ifdef DEBUG
4424 /* Counts the total number of registers pushed. */
5e69f11e 4425 unsigned num_regs_pushed = 0;
fa9a63c5
RM
4426#endif
4427
4428 DEBUG_PRINT1 ("\n\nEntering re_match_2.\n");
5e69f11e 4429
fa9a63c5 4430 INIT_FAIL_STACK ();
5e69f11e 4431
fa9a63c5
RM
4432#ifdef MATCH_MAY_ALLOCATE
4433 /* Do not bother to initialize all the register variables if there are
4434 no groups in the pattern, as it takes a fair amount of time. If
4435 there are groups, we include space for register 0 (the whole
4436 pattern), even though we never use it, since it simplifies the
4437 array indexing. We should fix this. */
4438 if (bufp->re_nsub)
4439 {
66f0296e
SM
4440 regstart = REGEX_TALLOC (num_regs, re_char *);
4441 regend = REGEX_TALLOC (num_regs, re_char *);
4442 best_regstart = REGEX_TALLOC (num_regs, re_char *);
4443 best_regend = REGEX_TALLOC (num_regs, re_char *);
fa9a63c5 4444
505bde11 4445 if (!(regstart && regend && best_regstart && best_regend))
25fe55af
RS
4446 {
4447 FREE_VARIABLES ();
4448 return -2;
4449 }
fa9a63c5
RM
4450 }
4451 else
4452 {
4453 /* We must initialize all our variables to NULL, so that
25fe55af 4454 `FREE_VARIABLES' doesn't try to free them. */
505bde11 4455 regstart = regend = best_regstart = best_regend = NULL;
fa9a63c5
RM
4456 }
4457#endif /* MATCH_MAY_ALLOCATE */
4458
4459 /* The starting position is bogus. */
4460 if (pos < 0 || pos > size1 + size2)
4461 {
4462 FREE_VARIABLES ();
4463 return -1;
4464 }
5e69f11e 4465
fa9a63c5
RM
4466 /* Initialize subexpression text positions to -1 to mark ones that no
4467 start_memory/stop_memory has been seen for. Also initialize the
4468 register information struct. */
4469 for (mcnt = 1; mcnt < num_regs; mcnt++)
99633e97
SM
4470 regstart[mcnt] = regend[mcnt] = REG_UNSET_VALUE;
4471
4472 /* Shorten strings to `stop'. */
4473 if (stop <= size1)
fa9a63c5 4474 {
99633e97
SM
4475 size1 = stop;
4476 size2 = 0;
fa9a63c5 4477 }
99633e97
SM
4478 else if (stop <= size1 + size2)
4479 size2 = stop - size1;
5e69f11e 4480
fa9a63c5 4481 /* We move `string1' into `string2' if the latter's empty -- but not if
25fe55af 4482 `string1' is null. */
fa9a63c5
RM
4483 if (size2 == 0 && string1 != NULL)
4484 {
4485 string2 = string1;
4486 size2 = size1;
4487 string1 = 0;
4488 size1 = 0;
4489 }
4490 end1 = string1 + size1;
4491 end2 = string2 + size2;
4492
4493 /* Compute where to stop matching, within the two strings. */
99633e97
SM
4494 end_match_1 = end1;
4495 end_match_2 = end2;
fa9a63c5 4496
5e69f11e 4497 /* `p' scans through the pattern as `d' scans through the data.
fa9a63c5
RM
4498 `dend' is the end of the input string that `d' points within. `d'
4499 is advanced into the following input string whenever necessary, but
4500 this happens before fetching; therefore, at the beginning of the
4501 loop, `d' can be pointing at the end of a string, but it cannot
4502 equal `string2'. */
4503 if (size1 > 0 && pos <= size1)
4504 {
4505 d = string1 + pos;
4506 dend = end_match_1;
4507 }
4508 else
4509 {
4510 d = string2 + pos - size1;
4511 dend = end_match_2;
4512 }
4513
4514 DEBUG_PRINT1 ("The compiled pattern is: ");
4515 DEBUG_PRINT_COMPILED_PATTERN (bufp, p, pend);
4516 DEBUG_PRINT1 ("The string to match is: `");
4517 DEBUG_PRINT_DOUBLE_STRING (d, string1, size1, string2, size2);
4518 DEBUG_PRINT1 ("'\n");
5e69f11e 4519
25fe55af 4520 /* This loops over pattern commands. It exits by returning from the
fa9a63c5
RM
4521 function if the match is complete, or it drops through if the match
4522 fails at this starting point in the input data. */
4523 for (;;)
4524 {
505bde11 4525 DEBUG_PRINT2 ("\n%p: ", p);
fa9a63c5
RM
4526
4527 if (p == pend)
4528 { /* End of pattern means we might have succeeded. */
25fe55af 4529 DEBUG_PRINT1 ("end of pattern ... ");
5e69f11e 4530
fa9a63c5 4531 /* If we haven't matched the entire string, and we want the
25fe55af
RS
4532 longest match, try backtracking. */
4533 if (d != end_match_2)
fa9a63c5
RM
4534 {
4535 /* 1 if this match ends in the same string (string1 or string2)
4536 as the best previous match. */
5e69f11e 4537 boolean same_str_p = (FIRST_STRING_P (match_end)
99633e97 4538 == FIRST_STRING_P (d));
fa9a63c5
RM
4539 /* 1 if this match is the best seen so far. */
4540 boolean best_match_p;
4541
4542 /* AIX compiler got confused when this was combined
25fe55af 4543 with the previous declaration. */
fa9a63c5
RM
4544 if (same_str_p)
4545 best_match_p = d > match_end;
4546 else
99633e97 4547 best_match_p = !FIRST_STRING_P (d);
fa9a63c5 4548
25fe55af
RS
4549 DEBUG_PRINT1 ("backtracking.\n");
4550
4551 if (!FAIL_STACK_EMPTY ())
4552 { /* More failure points to try. */
4553
4554 /* If exceeds best match so far, save it. */
4555 if (!best_regs_set || best_match_p)
4556 {
4557 best_regs_set = true;
4558 match_end = d;
4559
4560 DEBUG_PRINT1 ("\nSAVING match as best so far.\n");
4561
4562 for (mcnt = 1; mcnt < num_regs; mcnt++)
4563 {
4564 best_regstart[mcnt] = regstart[mcnt];
4565 best_regend[mcnt] = regend[mcnt];
4566 }
4567 }
4568 goto fail;
4569 }
4570
4571 /* If no failure points, don't restore garbage. And if
4572 last match is real best match, don't restore second
4573 best one. */
4574 else if (best_regs_set && !best_match_p)
4575 {
4576 restore_best_regs:
4577 /* Restore best match. It may happen that `dend ==
4578 end_match_1' while the restored d is in string2.
4579 For example, the pattern `x.*y.*z' against the
4580 strings `x-' and `y-z-', if the two strings are
4581 not consecutive in memory. */
4582 DEBUG_PRINT1 ("Restoring best registers.\n");
4583
4584 d = match_end;
4585 dend = ((d >= string1 && d <= end1)
4586 ? end_match_1 : end_match_2);
fa9a63c5
RM
4587
4588 for (mcnt = 1; mcnt < num_regs; mcnt++)
4589 {
4590 regstart[mcnt] = best_regstart[mcnt];
4591 regend[mcnt] = best_regend[mcnt];
4592 }
25fe55af
RS
4593 }
4594 } /* d != end_match_2 */
fa9a63c5
RM
4595
4596 succeed_label:
25fe55af 4597 DEBUG_PRINT1 ("Accepting match.\n");
fa9a63c5 4598
25fe55af
RS
4599 /* If caller wants register contents data back, do it. */
4600 if (regs && !bufp->no_sub)
fa9a63c5 4601 {
25fe55af
RS
4602 /* Have the register data arrays been allocated? */
4603 if (bufp->regs_allocated == REGS_UNALLOCATED)
4604 { /* No. So allocate them with malloc. We need one
4605 extra element beyond `num_regs' for the `-1' marker
4606 GNU code uses. */
4607 regs->num_regs = MAX (RE_NREGS, num_regs + 1);
4608 regs->start = TALLOC (regs->num_regs, regoff_t);
4609 regs->end = TALLOC (regs->num_regs, regoff_t);
4610 if (regs->start == NULL || regs->end == NULL)
fa9a63c5
RM
4611 {
4612 FREE_VARIABLES ();
4613 return -2;
4614 }
25fe55af
RS
4615 bufp->regs_allocated = REGS_REALLOCATE;
4616 }
4617 else if (bufp->regs_allocated == REGS_REALLOCATE)
4618 { /* Yes. If we need more elements than were already
4619 allocated, reallocate them. If we need fewer, just
4620 leave it alone. */
4621 if (regs->num_regs < num_regs + 1)
4622 {
4623 regs->num_regs = num_regs + 1;
4624 RETALLOC (regs->start, regs->num_regs, regoff_t);
4625 RETALLOC (regs->end, regs->num_regs, regoff_t);
4626 if (regs->start == NULL || regs->end == NULL)
fa9a63c5
RM
4627 {
4628 FREE_VARIABLES ();
4629 return -2;
4630 }
25fe55af
RS
4631 }
4632 }
4633 else
fa9a63c5
RM
4634 {
4635 /* These braces fend off a "empty body in an else-statement"
25fe55af 4636 warning under GCC when assert expands to nothing. */
fa9a63c5
RM
4637 assert (bufp->regs_allocated == REGS_FIXED);
4638 }
4639
25fe55af
RS
4640 /* Convert the pointer data in `regstart' and `regend' to
4641 indices. Register zero has to be set differently,
4642 since we haven't kept track of any info for it. */
4643 if (regs->num_regs > 0)
4644 {
4645 regs->start[0] = pos;
99633e97 4646 regs->end[0] = POINTER_TO_OFFSET (d);
25fe55af 4647 }
5e69f11e 4648
25fe55af
RS
4649 /* Go through the first `min (num_regs, regs->num_regs)'
4650 registers, since that is all we initialized. */
fa9a63c5
RM
4651 for (mcnt = 1; mcnt < MIN (num_regs, regs->num_regs); mcnt++)
4652 {
25fe55af
RS
4653 if (REG_UNSET (regstart[mcnt]) || REG_UNSET (regend[mcnt]))
4654 regs->start[mcnt] = regs->end[mcnt] = -1;
4655 else
4656 {
fa9a63c5
RM
4657 regs->start[mcnt]
4658 = (regoff_t) POINTER_TO_OFFSET (regstart[mcnt]);
25fe55af 4659 regs->end[mcnt]
fa9a63c5 4660 = (regoff_t) POINTER_TO_OFFSET (regend[mcnt]);
25fe55af 4661 }
fa9a63c5 4662 }
5e69f11e 4663
25fe55af
RS
4664 /* If the regs structure we return has more elements than
4665 were in the pattern, set the extra elements to -1. If
4666 we (re)allocated the registers, this is the case,
4667 because we always allocate enough to have at least one
4668 -1 at the end. */
4669 for (mcnt = num_regs; mcnt < regs->num_regs; mcnt++)
4670 regs->start[mcnt] = regs->end[mcnt] = -1;
fa9a63c5
RM
4671 } /* regs && !bufp->no_sub */
4672
25fe55af
RS
4673 DEBUG_PRINT4 ("%u failure points pushed, %u popped (%u remain).\n",
4674 nfailure_points_pushed, nfailure_points_popped,
4675 nfailure_points_pushed - nfailure_points_popped);
4676 DEBUG_PRINT2 ("%u registers pushed.\n", num_regs_pushed);
fa9a63c5 4677
99633e97 4678 mcnt = POINTER_TO_OFFSET (d) - pos;
fa9a63c5 4679
25fe55af 4680 DEBUG_PRINT2 ("Returning %d from re_match_2.\n", mcnt);
fa9a63c5 4681
25fe55af
RS
4682 FREE_VARIABLES ();
4683 return mcnt;
4684 }
fa9a63c5 4685
25fe55af 4686 /* Otherwise match next pattern command. */
fa9a63c5
RM
4687 switch (SWITCH_ENUM_CAST ((re_opcode_t) *p++))
4688 {
25fe55af
RS
4689 /* Ignore these. Used to ignore the n of succeed_n's which
4690 currently have n == 0. */
4691 case no_op:
4692 DEBUG_PRINT1 ("EXECUTING no_op.\n");
4693 break;
fa9a63c5
RM
4694
4695 case succeed:
25fe55af 4696 DEBUG_PRINT1 ("EXECUTING succeed.\n");
fa9a63c5
RM
4697 goto succeed_label;
4698
25fe55af
RS
4699 /* Match the next n pattern characters exactly. The following
4700 byte in the pattern defines n, and the n bytes after that
4701 are the characters to match. */
fa9a63c5
RM
4702 case exactn:
4703 mcnt = *p++;
25fe55af 4704 DEBUG_PRINT2 ("EXECUTING exactn %d.\n", mcnt);
fa9a63c5 4705
99633e97
SM
4706 /* Remember the start point to rollback upon failure. */
4707 dfail = d;
4708
25fe55af
RS
4709 /* This is written out as an if-else so we don't waste time
4710 testing `translate' inside the loop. */
28703c16 4711 if (RE_TRANSLATE_P (translate))
fa9a63c5 4712 {
e934739e
RS
4713#ifdef emacs
4714 if (multibyte)
4715 do
4716 {
4717 int pat_charlen, buf_charlen;
e71b1971 4718 unsigned int pat_ch, buf_ch;
e934739e
RS
4719
4720 PREFETCH ();
4721 pat_ch = STRING_CHAR_AND_LENGTH (p, pend - p, pat_charlen);
4722 buf_ch = STRING_CHAR_AND_LENGTH (d, dend - d, buf_charlen);
4723
4724 if (RE_TRANSLATE (translate, buf_ch)
4725 != pat_ch)
99633e97
SM
4726 {
4727 d = dfail;
4728 goto fail;
4729 }
e934739e
RS
4730
4731 p += pat_charlen;
4732 d += buf_charlen;
4733 mcnt -= pat_charlen;
4734 }
4735 while (mcnt > 0);
4736 else
4737#endif /* not emacs */
4738 do
4739 {
4740 PREFETCH ();
66f0296e 4741 if (RE_TRANSLATE (translate, *d) != *p++)
99633e97
SM
4742 {
4743 d = dfail;
4744 goto fail;
4745 }
33c46939 4746 d++;
e934739e
RS
4747 }
4748 while (--mcnt);
fa9a63c5
RM
4749 }
4750 else
4751 {
4752 do
4753 {
4754 PREFETCH ();
99633e97
SM
4755 if (*d++ != *p++)
4756 {
4757 d = dfail;
4758 goto fail;
4759 }
fa9a63c5
RM
4760 }
4761 while (--mcnt);
4762 }
25fe55af 4763 break;
fa9a63c5
RM
4764
4765
25fe55af 4766 /* Match any character except possibly a newline or a null. */
fa9a63c5 4767 case anychar:
e934739e
RS
4768 {
4769 int buf_charlen;
e71b1971 4770 unsigned int buf_ch;
fa9a63c5 4771
e934739e 4772 DEBUG_PRINT1 ("EXECUTING anychar.\n");
fa9a63c5 4773
e934739e 4774 PREFETCH ();
fa9a63c5 4775
e934739e
RS
4776#ifdef emacs
4777 if (multibyte)
4778 buf_ch = STRING_CHAR_AND_LENGTH (d, dend - d, buf_charlen);
4779 else
4780#endif /* not emacs */
4781 {
66f0296e 4782 buf_ch = *d;
e934739e
RS
4783 buf_charlen = 1;
4784 }
4785
4786 buf_ch = TRANSLATE (buf_ch);
4787
4788 if ((!(bufp->syntax & RE_DOT_NEWLINE)
4789 && buf_ch == '\n')
4790 || ((bufp->syntax & RE_DOT_NOT_NULL)
4791 && buf_ch == '\000'))
4792 goto fail;
4793
e934739e
RS
4794 DEBUG_PRINT2 (" Matched `%d'.\n", *d);
4795 d += buf_charlen;
4796 }
fa9a63c5
RM
4797 break;
4798
4799
4800 case charset:
4801 case charset_not:
4802 {
b18215fc 4803 register unsigned int c;
fa9a63c5 4804 boolean not = (re_opcode_t) *(p - 1) == charset_not;
b18215fc
RS
4805 int len;
4806
4807 /* Start of actual range_table, or end of bitmap if there is no
4808 range table. */
4809 unsigned char *range_table;
4810
96cc36cc 4811 /* Nonzero if there is a range table. */
b18215fc
RS
4812 int range_table_exists;
4813
96cc36cc
RS
4814 /* Number of ranges of range table. This is not included
4815 in the initial byte-length of the command. */
4816 int count = 0;
fa9a63c5 4817
25fe55af 4818 DEBUG_PRINT2 ("EXECUTING charset%s.\n", not ? "_not" : "");
fa9a63c5
RM
4819
4820 PREFETCH ();
66f0296e 4821 c = *d;
fa9a63c5 4822
b18215fc 4823 range_table_exists = CHARSET_RANGE_TABLE_EXISTS_P (&p[-1]);
96cc36cc
RS
4824
4825#ifdef emacs
b18215fc 4826 if (range_table_exists)
96cc36cc
RS
4827 {
4828 range_table = CHARSET_RANGE_TABLE (&p[-1]); /* Past the bitmap. */
4829 EXTRACT_NUMBER_AND_INCR (count, range_table);
4830 }
b18215fc
RS
4831
4832 if (multibyte && BASE_LEADING_CODE_P (c))
4833 c = STRING_CHAR_AND_LENGTH (d, dend - d, len);
96cc36cc 4834#endif /* emacs */
b18215fc
RS
4835
4836 if (SINGLE_BYTE_CHAR_P (c))
4837 { /* Lookup bitmap. */
4838 c = TRANSLATE (c); /* The character to match. */
4839 len = 1;
4840
4841 /* Cast to `unsigned' instead of `unsigned char' in
4842 case the bit list is a full 32 bytes long. */
4843 if (c < (unsigned) (CHARSET_BITMAP_SIZE (&p[-1]) * BYTEWIDTH)
96cc36cc
RS
4844 && p[1 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH)))
4845 not = !not;
b18215fc 4846 }
96cc36cc 4847#ifdef emacs
b18215fc 4848 else if (range_table_exists)
96cc36cc
RS
4849 {
4850 int class_bits = CHARSET_RANGE_TABLE_BITS (&p[-1]);
4851
4852 if ( (class_bits & BIT_ALNUM && ISALNUM (c))
4853 | (class_bits & BIT_ALPHA && ISALPHA (c))
f71b19b6 4854 | (class_bits & BIT_ASCII && IS_REAL_ASCII (c))
96cc36cc
RS
4855 | (class_bits & BIT_GRAPH && ISGRAPH (c))
4856 | (class_bits & BIT_LOWER && ISLOWER (c))
f71b19b6
DL
4857 | (class_bits & BIT_MULTIBYTE && !ISUNIBYTE (c))
4858 | (class_bits & BIT_NONASCII && !IS_REAL_ASCII (c))
96cc36cc
RS
4859 | (class_bits & BIT_PRINT && ISPRINT (c))
4860 | (class_bits & BIT_PUNCT && ISPUNCT (c))
4861 | (class_bits & BIT_SPACE && ISSPACE (c))
f71b19b6 4862 | (class_bits & BIT_UNIBYTE && ISUNIBYTE (c))
96cc36cc
RS
4863 | (class_bits & BIT_UPPER && ISUPPER (c))
4864 | (class_bits & BIT_WORD && ISWORD (c)))
4865 not = !not;
4866 else
4867 CHARSET_LOOKUP_RANGE_TABLE_RAW (not, c, range_table, count);
4868 }
4869#endif /* emacs */
fa9a63c5 4870
96cc36cc
RS
4871 if (range_table_exists)
4872 p = CHARSET_RANGE_TABLE_END (range_table, count);
4873 else
4874 p += CHARSET_BITMAP_SIZE (&p[-1]) + 1;
fa9a63c5
RM
4875
4876 if (!not) goto fail;
5e69f11e 4877
b18215fc 4878 d += len;
fa9a63c5
RM
4879 break;
4880 }
4881
4882
25fe55af 4883 /* The beginning of a group is represented by start_memory.
505bde11 4884 The argument is the register number. The text
25fe55af
RS
4885 matched within the group is recorded (in the internal
4886 registers data structure) under the register number. */
4887 case start_memory:
505bde11
SM
4888 DEBUG_PRINT2 ("EXECUTING start_memory %d:\n", *p);
4889
4890 /* In case we need to undo this operation (via backtracking). */
4891 PUSH_FAILURE_REG ((unsigned int)*p);
fa9a63c5 4892
25fe55af 4893 regstart[*p] = d;
505bde11 4894 regend[*p] = REG_UNSET_VALUE; /* probably unnecessary. -sm */
fa9a63c5
RM
4895 DEBUG_PRINT2 (" regstart: %d\n", POINTER_TO_OFFSET (regstart[*p]));
4896
25fe55af 4897 /* Move past the register number and inner group count. */
505bde11 4898 p += 1;
25fe55af 4899 break;
fa9a63c5
RM
4900
4901
25fe55af 4902 /* The stop_memory opcode represents the end of a group. Its
505bde11 4903 argument is the same as start_memory's: the register number. */
fa9a63c5 4904 case stop_memory:
505bde11
SM
4905 DEBUG_PRINT2 ("EXECUTING stop_memory %d:\n", *p);
4906
4907 assert (!REG_UNSET (regstart[*p]));
4908 /* Strictly speaking, there should be code such as:
4909
4910 assert (REG_UNSET (regend[*p]));
4911 PUSH_FAILURE_REGSTOP ((unsigned int)*p);
4912
4913 But the only info to be pushed is regend[*p] and it is known to
4914 be UNSET, so there really isn't anything to push.
4915 Not pushing anything, on the other hand deprives us from the
4916 guarantee that regend[*p] is UNSET since undoing this operation
4917 will not reset its value properly. This is not important since
4918 the value will only be read on the next start_memory or at
4919 the very end and both events can only happen if this stop_memory
4920 is *not* undone. */
fa9a63c5 4921
25fe55af 4922 regend[*p] = d;
fa9a63c5
RM
4923 DEBUG_PRINT2 (" regend: %d\n", POINTER_TO_OFFSET (regend[*p]));
4924
25fe55af 4925 /* Move past the register number and the inner group count. */
505bde11 4926 p += 1;
25fe55af 4927 break;
fa9a63c5
RM
4928
4929
4930 /* \<digit> has been turned into a `duplicate' command which is
25fe55af
RS
4931 followed by the numeric value of <digit> as the register number. */
4932 case duplicate:
fa9a63c5 4933 {
66f0296e 4934 register re_char *d2, *dend2;
25fe55af 4935 int regno = *p++; /* Get which register to match against. */
fa9a63c5
RM
4936 DEBUG_PRINT2 ("EXECUTING duplicate %d.\n", regno);
4937
25fe55af
RS
4938 /* Can't back reference a group which we've never matched. */
4939 if (REG_UNSET (regstart[regno]) || REG_UNSET (regend[regno]))
4940 goto fail;
5e69f11e 4941
25fe55af
RS
4942 /* Where in input to try to start matching. */
4943 d2 = regstart[regno];
5e69f11e 4944
99633e97
SM
4945 /* Remember the start point to rollback upon failure. */
4946 dfail = d;
4947
25fe55af
RS
4948 /* Where to stop matching; if both the place to start and
4949 the place to stop matching are in the same string, then
4950 set to the place to stop, otherwise, for now have to use
4951 the end of the first string. */
fa9a63c5 4952
25fe55af 4953 dend2 = ((FIRST_STRING_P (regstart[regno])
fa9a63c5
RM
4954 == FIRST_STRING_P (regend[regno]))
4955 ? regend[regno] : end_match_1);
4956 for (;;)
4957 {
4958 /* If necessary, advance to next segment in register
25fe55af 4959 contents. */
fa9a63c5
RM
4960 while (d2 == dend2)
4961 {
4962 if (dend2 == end_match_2) break;
4963 if (dend2 == regend[regno]) break;
4964
25fe55af
RS
4965 /* End of string1 => advance to string2. */
4966 d2 = string2;
4967 dend2 = regend[regno];
fa9a63c5
RM
4968 }
4969 /* At end of register contents => success */
4970 if (d2 == dend2) break;
4971
4972 /* If necessary, advance to next segment in data. */
4973 PREFETCH ();
4974
4975 /* How many characters left in this segment to match. */
4976 mcnt = dend - d;
5e69f11e 4977
fa9a63c5 4978 /* Want how many consecutive characters we can match in
25fe55af
RS
4979 one shot, so, if necessary, adjust the count. */
4980 if (mcnt > dend2 - d2)
fa9a63c5 4981 mcnt = dend2 - d2;
5e69f11e 4982
fa9a63c5 4983 /* Compare that many; failure if mismatch, else move
25fe55af 4984 past them. */
28703c16 4985 if (RE_TRANSLATE_P (translate)
25fe55af
RS
4986 ? bcmp_translate (d, d2, mcnt, translate)
4987 : bcmp (d, d2, mcnt))
99633e97
SM
4988 {
4989 d = dfail;
4990 goto fail;
4991 }
fa9a63c5 4992 d += mcnt, d2 += mcnt;
fa9a63c5
RM
4993 }
4994 }
4995 break;
4996
4997
25fe55af
RS
4998 /* begline matches the empty string at the beginning of the string
4999 (unless `not_bol' is set in `bufp'), and, if
5000 `newline_anchor' is set, after newlines. */
fa9a63c5 5001 case begline:
25fe55af 5002 DEBUG_PRINT1 ("EXECUTING begline.\n");
5e69f11e 5003
25fe55af
RS
5004 if (AT_STRINGS_BEG (d))
5005 {
5006 if (!bufp->not_bol) break;
5007 }
5008 else if (d[-1] == '\n' && bufp->newline_anchor)
5009 {
5010 break;
5011 }
5012 /* In all other cases, we fail. */
5013 goto fail;
fa9a63c5
RM
5014
5015
25fe55af 5016 /* endline is the dual of begline. */
fa9a63c5 5017 case endline:
25fe55af 5018 DEBUG_PRINT1 ("EXECUTING endline.\n");
fa9a63c5 5019
25fe55af
RS
5020 if (AT_STRINGS_END (d))
5021 {
5022 if (!bufp->not_eol) break;
5023 }
5e69f11e 5024
25fe55af
RS
5025 /* We have to ``prefetch'' the next character. */
5026 else if ((d == end1 ? *string2 : *d) == '\n'
5027 && bufp->newline_anchor)
5028 {
5029 break;
5030 }
5031 goto fail;
fa9a63c5
RM
5032
5033
5034 /* Match at the very beginning of the data. */
25fe55af
RS
5035 case begbuf:
5036 DEBUG_PRINT1 ("EXECUTING begbuf.\n");
5037 if (AT_STRINGS_BEG (d))
5038 break;
5039 goto fail;
fa9a63c5
RM
5040
5041
5042 /* Match at the very end of the data. */
25fe55af
RS
5043 case endbuf:
5044 DEBUG_PRINT1 ("EXECUTING endbuf.\n");
fa9a63c5
RM
5045 if (AT_STRINGS_END (d))
5046 break;
25fe55af 5047 goto fail;
5e69f11e 5048
5e69f11e 5049
25fe55af
RS
5050 /* on_failure_keep_string_jump is used to optimize `.*\n'. It
5051 pushes NULL as the value for the string on the stack. Then
505bde11 5052 `POP_FAILURE_POINT' will keep the current value for the
25fe55af
RS
5053 string, instead of restoring it. To see why, consider
5054 matching `foo\nbar' against `.*\n'. The .* matches the foo;
5055 then the . fails against the \n. But the next thing we want
5056 to do is match the \n against the \n; if we restored the
5057 string value, we would be back at the foo.
5058
5059 Because this is used only in specific cases, we don't need to
5060 check all the things that `on_failure_jump' does, to make
5061 sure the right things get saved on the stack. Hence we don't
5062 share its code. The only reason to push anything on the
5063 stack at all is that otherwise we would have to change
5064 `anychar's code to do something besides goto fail in this
5065 case; that seems worse than this. */
5066 case on_failure_keep_string_jump:
505bde11
SM
5067 EXTRACT_NUMBER_AND_INCR (mcnt, p);
5068 DEBUG_PRINT3 ("EXECUTING on_failure_keep_string_jump %d (to %p):\n",
5069 mcnt, p + mcnt);
fa9a63c5 5070
505bde11
SM
5071 PUSH_FAILURE_POINT (p - 3, NULL);
5072 break;
5073
505bde11 5074
4e8a9132
SM
5075 /* Simple loop detecting on_failure_jump: just check on the
5076 failure stack if the same spot was already hit earlier. */
505bde11
SM
5077 case on_failure_jump_loop:
5078 on_failure:
5079 EXTRACT_NUMBER_AND_INCR (mcnt, p);
5080 DEBUG_PRINT3 ("EXECUTING on_failure_jump_loop %d (to %p):\n",
5081 mcnt, p + mcnt);
5082
5083 CHECK_INFINITE_LOOP (p - 3, d);
5084 PUSH_FAILURE_POINT (p - 3, d);
25fe55af 5085 break;
fa9a63c5
RM
5086
5087
5088 /* Uses of on_failure_jump:
5e69f11e 5089
25fe55af
RS
5090 Each alternative starts with an on_failure_jump that points
5091 to the beginning of the next alternative. Each alternative
5092 except the last ends with a jump that in effect jumps past
5093 the rest of the alternatives. (They really jump to the
5094 ending jump of the following alternative, because tensioning
5095 these jumps is a hassle.)
fa9a63c5 5096
25fe55af
RS
5097 Repeats start with an on_failure_jump that points past both
5098 the repetition text and either the following jump or
5099 pop_failure_jump back to this on_failure_jump. */
fa9a63c5 5100 case on_failure_jump:
27c3b45d 5101 QUIT;
25fe55af 5102 EXTRACT_NUMBER_AND_INCR (mcnt, p);
505bde11
SM
5103 DEBUG_PRINT3 ("EXECUTING on_failure_jump %d (to %p):\n",
5104 mcnt, p + mcnt);
25fe55af 5105
505bde11 5106 PUSH_FAILURE_POINT (p -3, d);
25fe55af
RS
5107 break;
5108
4e8a9132 5109 /* This operation is used for greedy *.
505bde11
SM
5110 Compare the beginning of the repeat with what in the
5111 pattern follows its end. If we can establish that there
5112 is nothing that they would both match, i.e., that we
5113 would have to backtrack because of (as in, e.g., `a*a')
5114 then we can use a non-backtracking loop based on
4e8a9132 5115 on_failure_keep_string_jump instead of on_failure_jump. */
505bde11 5116 case on_failure_jump_smart:
27c3b45d 5117 QUIT;
25fe55af 5118 EXTRACT_NUMBER_AND_INCR (mcnt, p);
505bde11
SM
5119 DEBUG_PRINT3 ("EXECUTING on_failure_jump_smart %d (to %p).\n",
5120 mcnt, p + mcnt);
25fe55af 5121 {
505bde11
SM
5122 unsigned char *p1 = p; /* Next operation. */
5123 unsigned char *p2 = p + mcnt; /* Destination of the jump. */
fa9a63c5 5124
505bde11
SM
5125 p -= 3; /* Reset so that we will re-execute the
5126 instruction once it's been changed. */
fa9a63c5 5127
4e8a9132
SM
5128 EXTRACT_NUMBER (mcnt, p2 - 2);
5129
5130 /* Ensure this is a indeed the trivial kind of loop
5131 we are expecting. */
5132 assert (skip_one_char (p1) == p2 - 3);
5133 assert ((re_opcode_t) p2[-3] == jump && p2 + mcnt == p);
99633e97 5134 DEBUG_STATEMENT (debug += 2);
505bde11 5135 if (mutually_exclusive_p (bufp, p1, p2))
fa9a63c5 5136 {
505bde11 5137 /* Use a fast `on_failure_keep_string_jump' loop. */
4e8a9132
SM
5138 DEBUG_PRINT1 (" smart exclusive => fast loop.\n");
5139 *p = (unsigned char) on_failure_keep_string_jump;
5140 STORE_NUMBER (p2 - 2, mcnt + 3);
25fe55af 5141 }
505bde11 5142 else
fa9a63c5 5143 {
505bde11
SM
5144 /* Default to a safe `on_failure_jump' loop. */
5145 DEBUG_PRINT1 (" smart default => slow loop.\n");
4e8a9132 5146 *p = (unsigned char) on_failure_jump;
fa9a63c5 5147 }
99633e97 5148 DEBUG_STATEMENT (debug -= 2);
25fe55af 5149 }
505bde11 5150 break;
25fe55af
RS
5151
5152 /* Unconditionally jump (without popping any failure points). */
5153 case jump:
fa9a63c5 5154 unconditional_jump:
27c3b45d 5155 QUIT;
fa9a63c5 5156 EXTRACT_NUMBER_AND_INCR (mcnt, p); /* Get the amount to jump. */
25fe55af
RS
5157 DEBUG_PRINT2 ("EXECUTING jump %d ", mcnt);
5158 p += mcnt; /* Do the jump. */
505bde11 5159 DEBUG_PRINT2 ("(to %p).\n", p);
25fe55af
RS
5160 break;
5161
5162
25fe55af
RS
5163 /* Have to succeed matching what follows at least n times.
5164 After that, handle like `on_failure_jump'. */
5165 case succeed_n:
5166 EXTRACT_NUMBER (mcnt, p + 2);
5167 DEBUG_PRINT2 ("EXECUTING succeed_n %d.\n", mcnt);
5e69f11e 5168
25fe55af
RS
5169 assert (mcnt >= 0);
5170 /* Originally, this is how many times we HAVE to succeed. */
5171 if (mcnt > 0)
5172 {
5173 mcnt--;
fa9a63c5 5174 p += 2;
25fe55af 5175 STORE_NUMBER_AND_INCR (p, mcnt);
505bde11 5176 DEBUG_PRINT3 (" Setting %p to %d.\n", p, mcnt);
25fe55af 5177 }
fa9a63c5 5178 else if (mcnt == 0)
25fe55af 5179 {
505bde11 5180 DEBUG_PRINT2 (" Setting two bytes from %p to no_op.\n", p+2);
fa9a63c5 5181 p[2] = (unsigned char) no_op;
25fe55af
RS
5182 p[3] = (unsigned char) no_op;
5183 goto on_failure;
5184 }
5185 break;
5186
5187 case jump_n:
5188 EXTRACT_NUMBER (mcnt, p + 2);
5189 DEBUG_PRINT2 ("EXECUTING jump_n %d.\n", mcnt);
5190
5191 /* Originally, this is how many times we CAN jump. */
5192 if (mcnt)
5193 {
5194 mcnt--;
5195 STORE_NUMBER (p + 2, mcnt);
5e69f11e 5196 goto unconditional_jump;
25fe55af
RS
5197 }
5198 /* If don't have to jump any more, skip over the rest of command. */
5e69f11e
RM
5199 else
5200 p += 4;
25fe55af 5201 break;
5e69f11e 5202
fa9a63c5
RM
5203 case set_number_at:
5204 {
25fe55af 5205 DEBUG_PRINT1 ("EXECUTING set_number_at.\n");
fa9a63c5 5206
25fe55af
RS
5207 EXTRACT_NUMBER_AND_INCR (mcnt, p);
5208 p1 = p + mcnt;
5209 EXTRACT_NUMBER_AND_INCR (mcnt, p);
505bde11 5210 DEBUG_PRINT3 (" Setting %p to %d.\n", p1, mcnt);
fa9a63c5 5211 STORE_NUMBER (p1, mcnt);
25fe55af
RS
5212 break;
5213 }
9121ca40
KH
5214
5215 case wordbound:
66f0296e
SM
5216 case notwordbound:
5217 not = (re_opcode_t) *(p - 1) == notwordbound;
5218 DEBUG_PRINT2 ("EXECUTING %swordbound.\n", not?"not":"");
fa9a63c5 5219
99633e97 5220 /* We SUCCEED (or FAIL) in one of the following cases: */
9121ca40 5221
b18215fc 5222 /* Case 1: D is at the beginning or the end of string. */
9121ca40 5223 if (AT_STRINGS_BEG (d) || AT_STRINGS_END (d))
66f0296e 5224 not = !not;
b18215fc
RS
5225 else
5226 {
5227 /* C1 is the character before D, S1 is the syntax of C1, C2
5228 is the character at D, and S2 is the syntax of C2. */
5229 int c1, c2, s1, s2;
b18215fc 5230#ifdef emacs
66f0296e 5231 int charpos = SYNTAX_TABLE_BYTE_TO_CHAR (PTR_TO_OFFSET (d - 1));
5d967c7a 5232 UPDATE_SYNTAX_TABLE (charpos);
25fe55af 5233#endif
66f0296e
SM
5234 /* FIXME: This does a STRING_CHAR even for unibyte buffers. */
5235 GET_CHAR_BEFORE_2 (c1, d, string1, end1, string2, end2);
b18215fc
RS
5236 s1 = SYNTAX (c1);
5237#ifdef emacs
5d967c7a 5238 UPDATE_SYNTAX_TABLE_FORWARD (charpos + 1);
25fe55af 5239#endif
66f0296e
SM
5240 PREFETCH ();
5241 /* FIXME: This does a STRING_CHAR even for unibyte buffers. */
5242 c2 = STRING_CHAR (d, dend - d);
b18215fc
RS
5243 s2 = SYNTAX (c2);
5244
5245 if (/* Case 2: Only one of S1 and S2 is Sword. */
5246 ((s1 == Sword) != (s2 == Sword))
5247 /* Case 3: Both of S1 and S2 are Sword, and macro
25fe55af 5248 WORD_BOUNDARY_P (C1, C2) returns nonzero. */
b18215fc 5249 || ((s1 == Sword) && WORD_BOUNDARY_P (c1, c2)))
66f0296e
SM
5250 not = !not;
5251 }
5252 if (not)
9121ca40 5253 break;
b18215fc 5254 else
9121ca40 5255 goto fail;
fa9a63c5
RM
5256
5257 case wordbeg:
25fe55af 5258 DEBUG_PRINT1 ("EXECUTING wordbeg.\n");
fa9a63c5 5259
b18215fc
RS
5260 /* We FAIL in one of the following cases: */
5261
25fe55af 5262 /* Case 1: D is at the end of string. */
b18215fc 5263 if (AT_STRINGS_END (d))
99633e97 5264 goto fail;
b18215fc
RS
5265 else
5266 {
5267 /* C1 is the character before D, S1 is the syntax of C1, C2
5268 is the character at D, and S2 is the syntax of C2. */
5269 int c1, c2, s1, s2;
fa9a63c5 5270#ifdef emacs
99633e97 5271 int charpos = SYNTAX_TABLE_BYTE_TO_CHAR (PTR_TO_OFFSET (d));
92432794 5272 UPDATE_SYNTAX_TABLE (charpos);
25fe55af 5273#endif
99633e97
SM
5274 PREFETCH ();
5275 /* FIXME: This does a STRING_CHAR even for unibyte buffers. */
5276 c2 = STRING_CHAR (d, dend - d);
b18215fc 5277 s2 = SYNTAX (c2);
25fe55af 5278
b18215fc
RS
5279 /* Case 2: S2 is not Sword. */
5280 if (s2 != Sword)
5281 goto fail;
5282
5283 /* Case 3: D is not at the beginning of string ... */
5284 if (!AT_STRINGS_BEG (d))
5285 {
5286 GET_CHAR_BEFORE_2 (c1, d, string1, end1, string2, end2);
5287#ifdef emacs
5d967c7a 5288 UPDATE_SYNTAX_TABLE_BACKWARD (charpos - 1);
25fe55af 5289#endif
b18215fc
RS
5290 s1 = SYNTAX (c1);
5291
5292 /* ... and S1 is Sword, and WORD_BOUNDARY_P (C1, C2)
25fe55af 5293 returns 0. */
b18215fc
RS
5294 if ((s1 == Sword) && !WORD_BOUNDARY_P (c1, c2))
5295 goto fail;
5296 }
5297 }
e318085a
RS
5298 break;
5299
b18215fc 5300 case wordend:
25fe55af 5301 DEBUG_PRINT1 ("EXECUTING wordend.\n");
b18215fc
RS
5302
5303 /* We FAIL in one of the following cases: */
5304
5305 /* Case 1: D is at the beginning of string. */
5306 if (AT_STRINGS_BEG (d))
e318085a 5307 goto fail;
b18215fc
RS
5308 else
5309 {
5310 /* C1 is the character before D, S1 is the syntax of C1, C2
5311 is the character at D, and S2 is the syntax of C2. */
5312 int c1, c2, s1, s2;
5d967c7a 5313#ifdef emacs
99633e97 5314 int charpos = SYNTAX_TABLE_BYTE_TO_CHAR (PTR_TO_OFFSET (d) - 1);
92432794 5315 UPDATE_SYNTAX_TABLE (charpos);
5d967c7a 5316#endif
99633e97 5317 GET_CHAR_BEFORE_2 (c1, d, string1, end1, string2, end2);
b18215fc
RS
5318 s1 = SYNTAX (c1);
5319
5320 /* Case 2: S1 is not Sword. */
5321 if (s1 != Sword)
5322 goto fail;
5323
5324 /* Case 3: D is not at the end of string ... */
5325 if (!AT_STRINGS_END (d))
5326 {
66f0296e 5327 PREFETCH ();
99633e97 5328 /* FIXME: This does a STRING_CHAR even for unibyte buffers. */
66f0296e 5329 c2 = STRING_CHAR (d, dend - d);
5d967c7a
RS
5330#ifdef emacs
5331 UPDATE_SYNTAX_TABLE_FORWARD (charpos);
5332#endif
b18215fc
RS
5333 s2 = SYNTAX (c2);
5334
5335 /* ... and S2 is Sword, and WORD_BOUNDARY_P (C1, C2)
25fe55af 5336 returns 0. */
b18215fc 5337 if ((s2 == Sword) && !WORD_BOUNDARY_P (c1, c2))
25fe55af 5338 goto fail;
b18215fc
RS
5339 }
5340 }
e318085a
RS
5341 break;
5342
fa9a63c5 5343 case syntaxspec:
1fb352e0
SM
5344 case notsyntaxspec:
5345 not = (re_opcode_t) *(p - 1) == notsyntaxspec;
fa9a63c5 5346 mcnt = *p++;
1fb352e0 5347 DEBUG_PRINT3 ("EXECUTING %ssyntaxspec %d.\n", not?"not":"", mcnt);
fa9a63c5 5348 PREFETCH ();
b18215fc
RS
5349#ifdef emacs
5350 {
92432794 5351 int pos1 = SYNTAX_TABLE_BYTE_TO_CHAR (PTR_TO_OFFSET (d));
b18215fc
RS
5352 UPDATE_SYNTAX_TABLE (pos1);
5353 }
25fe55af 5354#endif
b18215fc
RS
5355 {
5356 int c, len;
5357
5358 if (multibyte)
5359 /* we must concern about multibyte form, ... */
5360 c = STRING_CHAR_AND_LENGTH (d, dend - d, len);
5361 else
5362 /* everything should be handled as ASCII, even though it
5363 looks like multibyte form. */
5364 c = *d, len = 1;
5365
89084293 5366 if ((SYNTAX (c) != (re_opcode_t) mcnt) ^ not)
1fb352e0 5367 goto fail;
b18215fc
RS
5368 d += len;
5369 }
fa9a63c5
RM
5370 break;
5371
b18215fc 5372#ifdef emacs
1fb352e0
SM
5373 case before_dot:
5374 DEBUG_PRINT1 ("EXECUTING before_dot.\n");
5375 if (PTR_BYTE_POS (d) >= PT_BYTE)
fa9a63c5 5376 goto fail;
b18215fc
RS
5377 break;
5378
1fb352e0
SM
5379 case at_dot:
5380 DEBUG_PRINT1 ("EXECUTING at_dot.\n");
5381 if (PTR_BYTE_POS (d) != PT_BYTE)
5382 goto fail;
5383 break;
b18215fc 5384
1fb352e0
SM
5385 case after_dot:
5386 DEBUG_PRINT1 ("EXECUTING after_dot.\n");
5387 if (PTR_BYTE_POS (d) <= PT_BYTE)
5388 goto fail;
e318085a 5389 break;
fa9a63c5 5390
1fb352e0 5391 case categoryspec:
b18215fc 5392 case notcategoryspec:
1fb352e0 5393 not = (re_opcode_t) *(p - 1) == notcategoryspec;
b18215fc 5394 mcnt = *p++;
1fb352e0 5395 DEBUG_PRINT3 ("EXECUTING %scategoryspec %d.\n", not?"not":"", mcnt);
b18215fc
RS
5396 PREFETCH ();
5397 {
5398 int c, len;
5399
5400 if (multibyte)
5401 c = STRING_CHAR_AND_LENGTH (d, dend - d, len);
5402 else
5403 c = *d, len = 1;
5404
1fb352e0 5405 if ((!CHAR_HAS_CATEGORY (c, mcnt)) ^ not)
b18215fc
RS
5406 goto fail;
5407 d += len;
5408 }
fa9a63c5 5409 break;
5e69f11e 5410
1fb352e0 5411#endif /* emacs */
5e69f11e 5412
b18215fc
RS
5413 default:
5414 abort ();
fa9a63c5 5415 }
b18215fc 5416 continue; /* Successfully executed one pattern command; keep going. */
fa9a63c5
RM
5417
5418
5419 /* We goto here if a matching operation fails. */
5420 fail:
27c3b45d 5421 QUIT;
fa9a63c5 5422 if (!FAIL_STACK_EMPTY ())
505bde11 5423 {
66f0296e 5424 re_char *str;
505bde11
SM
5425 unsigned char *pat;
5426 /* A restart point is known. Restore to that state. */
b18215fc 5427 DEBUG_PRINT1 ("\nFAIL:\n");
505bde11
SM
5428 POP_FAILURE_POINT (str, pat);
5429 switch (SWITCH_ENUM_CAST ((re_opcode_t) *pat++))
5430 {
5431 case on_failure_keep_string_jump:
5432 assert (str == NULL);
5433 goto continue_failure_jump;
5434
505bde11
SM
5435 case on_failure_jump_loop:
5436 case on_failure_jump:
5437 case succeed_n:
5438 d = str;
5439 continue_failure_jump:
5440 EXTRACT_NUMBER_AND_INCR (mcnt, pat);
5441 p = pat + mcnt;
5442 break;
b18215fc 5443
505bde11
SM
5444 default:
5445 abort();
5446 }
fa9a63c5 5447
505bde11 5448 assert (p >= bufp->buffer && p <= pend);
b18215fc
RS
5449
5450 if (d >= string1 && d <= end1)
fa9a63c5 5451 dend = end_match_1;
b18215fc 5452 }
fa9a63c5 5453 else
b18215fc 5454 break; /* Matching at this starting point really fails. */
fa9a63c5
RM
5455 } /* for (;;) */
5456
5457 if (best_regs_set)
5458 goto restore_best_regs;
5459
5460 FREE_VARIABLES ();
5461
b18215fc 5462 return -1; /* Failure to match. */
fa9a63c5
RM
5463} /* re_match_2 */
5464\f
5465/* Subroutine definitions for re_match_2. */
5466
fa9a63c5
RM
5467/* Return zero if TRANSLATE[S1] and TRANSLATE[S2] are identical for LEN
5468 bytes; nonzero otherwise. */
5e69f11e 5469
fa9a63c5
RM
5470static int
5471bcmp_translate (s1, s2, len, translate)
5472 unsigned char *s1, *s2;
5473 register int len;
6676cb1c 5474 RE_TRANSLATE_TYPE translate;
fa9a63c5
RM
5475{
5476 register unsigned char *p1 = s1, *p2 = s2;
e934739e
RS
5477 unsigned char *p1_end = s1 + len;
5478 unsigned char *p2_end = s2 + len;
5479
5480 while (p1 != p1_end && p2 != p2_end)
fa9a63c5 5481 {
e934739e
RS
5482 int p1_charlen, p2_charlen;
5483 int p1_ch, p2_ch;
5484
99633e97 5485 /* FIXME: This assumes `multibyte = true'. */
e934739e
RS
5486 p1_ch = STRING_CHAR_AND_LENGTH (p1, p1_end - p1, p1_charlen);
5487 p2_ch = STRING_CHAR_AND_LENGTH (p2, p2_end - p2, p2_charlen);
5488
5489 if (RE_TRANSLATE (translate, p1_ch)
5490 != RE_TRANSLATE (translate, p2_ch))
bc192b5b 5491 return 1;
e934739e
RS
5492
5493 p1 += p1_charlen, p2 += p2_charlen;
fa9a63c5 5494 }
e934739e
RS
5495
5496 if (p1 != p1_end || p2 != p2_end)
5497 return 1;
5498
fa9a63c5
RM
5499 return 0;
5500}
5501\f
5502/* Entry points for GNU code. */
5503
5504/* re_compile_pattern is the GNU regular expression compiler: it
5505 compiles PATTERN (of length SIZE) and puts the result in BUFP.
5506 Returns 0 if the pattern was valid, otherwise an error string.
5e69f11e 5507
fa9a63c5
RM
5508 Assumes the `allocated' (and perhaps `buffer') and `translate' fields
5509 are set in BUFP on entry.
5e69f11e 5510
b18215fc 5511 We call regex_compile to do the actual compilation. */
fa9a63c5
RM
5512
5513const char *
5514re_compile_pattern (pattern, length, bufp)
5515 const char *pattern;
5516 int length;
5517 struct re_pattern_buffer *bufp;
5518{
5519 reg_errcode_t ret;
5e69f11e 5520
fa9a63c5
RM
5521 /* GNU code is written to assume at least RE_NREGS registers will be set
5522 (and at least one extra will be -1). */
5523 bufp->regs_allocated = REGS_UNALLOCATED;
5e69f11e 5524
fa9a63c5
RM
5525 /* And GNU code determines whether or not to get register information
5526 by passing null for the REGS argument to re_match, etc., not by
5527 setting no_sub. */
5528 bufp->no_sub = 0;
5e69f11e 5529
b18215fc 5530 /* Match anchors at newline. */
fa9a63c5 5531 bufp->newline_anchor = 1;
5e69f11e 5532
fa9a63c5
RM
5533 ret = regex_compile (pattern, length, re_syntax_options, bufp);
5534
5535 if (!ret)
5536 return NULL;
5537 return gettext (re_error_msgid[(int) ret]);
5e69f11e 5538}
fa9a63c5 5539\f
b18215fc
RS
5540/* Entry points compatible with 4.2 BSD regex library. We don't define
5541 them unless specifically requested. */
fa9a63c5 5542
0c085854 5543#if defined (_REGEX_RE_COMP) || defined (_LIBC)
fa9a63c5
RM
5544
5545/* BSD has one and only one pattern buffer. */
5546static struct re_pattern_buffer re_comp_buf;
5547
5548char *
48afdd44
RM
5549#ifdef _LIBC
5550/* Make these definitions weak in libc, so POSIX programs can redefine
5551 these names if they don't use our functions, and still use
5552 regcomp/regexec below without link errors. */
5553weak_function
5554#endif
fa9a63c5
RM
5555re_comp (s)
5556 const char *s;
5557{
5558 reg_errcode_t ret;
5e69f11e 5559
fa9a63c5
RM
5560 if (!s)
5561 {
5562 if (!re_comp_buf.buffer)
5563 return gettext ("No previous regular expression");
5564 return 0;
5565 }
5566
5567 if (!re_comp_buf.buffer)
5568 {
5569 re_comp_buf.buffer = (unsigned char *) malloc (200);
5570 if (re_comp_buf.buffer == NULL)
b18215fc 5571 return gettext (re_error_msgid[(int) REG_ESPACE]);
fa9a63c5
RM
5572 re_comp_buf.allocated = 200;
5573
5574 re_comp_buf.fastmap = (char *) malloc (1 << BYTEWIDTH);
5575 if (re_comp_buf.fastmap == NULL)
5576 return gettext (re_error_msgid[(int) REG_ESPACE]);
5577 }
5578
5579 /* Since `re_exec' always passes NULL for the `regs' argument, we
5580 don't need to initialize the pattern buffer fields which affect it. */
5581
b18215fc 5582 /* Match anchors at newlines. */
fa9a63c5
RM
5583 re_comp_buf.newline_anchor = 1;
5584
5585 ret = regex_compile (s, strlen (s), re_syntax_options, &re_comp_buf);
5e69f11e 5586
fa9a63c5
RM
5587 if (!ret)
5588 return NULL;
5589
5590 /* Yes, we're discarding `const' here if !HAVE_LIBINTL. */
5591 return (char *) gettext (re_error_msgid[(int) ret]);
5592}
5593
5594
5595int
48afdd44
RM
5596#ifdef _LIBC
5597weak_function
5598#endif
fa9a63c5
RM
5599re_exec (s)
5600 const char *s;
5601{
5602 const int len = strlen (s);
5603 return
5604 0 <= re_search (&re_comp_buf, s, len, 0, len, (struct re_registers *) 0);
5605}
5606#endif /* _REGEX_RE_COMP */
5607\f
5608/* POSIX.2 functions. Don't define these for Emacs. */
5609
5610#ifndef emacs
5611
5612/* regcomp takes a regular expression as a string and compiles it.
5613
b18215fc 5614 PREG is a regex_t *. We do not expect any fields to be initialized,
fa9a63c5
RM
5615 since POSIX says we shouldn't. Thus, we set
5616
5617 `buffer' to the compiled pattern;
5618 `used' to the length of the compiled pattern;
5619 `syntax' to RE_SYNTAX_POSIX_EXTENDED if the
5620 REG_EXTENDED bit in CFLAGS is set; otherwise, to
5621 RE_SYNTAX_POSIX_BASIC;
5622 `newline_anchor' to REG_NEWLINE being set in CFLAGS;
5623 `fastmap' and `fastmap_accurate' to zero;
5624 `re_nsub' to the number of subexpressions in PATTERN.
5625
5626 PATTERN is the address of the pattern string.
5627
5628 CFLAGS is a series of bits which affect compilation.
5629
5630 If REG_EXTENDED is set, we use POSIX extended syntax; otherwise, we
5631 use POSIX basic syntax.
5632
5633 If REG_NEWLINE is set, then . and [^...] don't match newline.
5634 Also, regexec will try a match beginning after every newline.
5635
5636 If REG_ICASE is set, then we considers upper- and lowercase
5637 versions of letters to be equivalent when matching.
5638
5639 If REG_NOSUB is set, then when PREG is passed to regexec, that
5640 routine will report only success or failure, and nothing about the
5641 registers.
5642
b18215fc 5643 It returns 0 if it succeeds, nonzero if it doesn't. (See regex.h for
fa9a63c5
RM
5644 the return codes and their meanings.) */
5645
5646int
5647regcomp (preg, pattern, cflags)
5648 regex_t *preg;
5e69f11e 5649 const char *pattern;
fa9a63c5
RM
5650 int cflags;
5651{
5652 reg_errcode_t ret;
5653 unsigned syntax
5654 = (cflags & REG_EXTENDED) ?
5655 RE_SYNTAX_POSIX_EXTENDED : RE_SYNTAX_POSIX_BASIC;
5656
5657 /* regex_compile will allocate the space for the compiled pattern. */
5658 preg->buffer = 0;
5659 preg->allocated = 0;
5660 preg->used = 0;
5e69f11e 5661
fa9a63c5
RM
5662 /* Don't bother to use a fastmap when searching. This simplifies the
5663 REG_NEWLINE case: if we used a fastmap, we'd have to put all the
5664 characters after newlines into the fastmap. This way, we just try
5665 every character. */
5666 preg->fastmap = 0;
5e69f11e 5667
fa9a63c5
RM
5668 if (cflags & REG_ICASE)
5669 {
5670 unsigned i;
5e69f11e 5671
6676cb1c
RS
5672 preg->translate
5673 = (RE_TRANSLATE_TYPE) malloc (CHAR_SET_SIZE
5674 * sizeof (*(RE_TRANSLATE_TYPE)0));
fa9a63c5 5675 if (preg->translate == NULL)
b18215fc 5676 return (int) REG_ESPACE;
fa9a63c5
RM
5677
5678 /* Map uppercase characters to corresponding lowercase ones. */
5679 for (i = 0; i < CHAR_SET_SIZE; i++)
b18215fc 5680 preg->translate[i] = ISUPPER (i) ? tolower (i) : i;
fa9a63c5
RM
5681 }
5682 else
5683 preg->translate = NULL;
5684
5685 /* If REG_NEWLINE is set, newlines are treated differently. */
5686 if (cflags & REG_NEWLINE)
5687 { /* REG_NEWLINE implies neither . nor [^...] match newline. */
5688 syntax &= ~RE_DOT_NEWLINE;
5689 syntax |= RE_HAT_LISTS_NOT_NEWLINE;
b18215fc 5690 /* It also changes the matching behavior. */
fa9a63c5
RM
5691 preg->newline_anchor = 1;
5692 }
5693 else
5694 preg->newline_anchor = 0;
5695
5696 preg->no_sub = !!(cflags & REG_NOSUB);
5697
5e69f11e 5698 /* POSIX says a null character in the pattern terminates it, so we
fa9a63c5
RM
5699 can use strlen here in compiling the pattern. */
5700 ret = regex_compile (pattern, strlen (pattern), syntax, preg);
5e69f11e 5701
fa9a63c5
RM
5702 /* POSIX doesn't distinguish between an unmatched open-group and an
5703 unmatched close-group: both are REG_EPAREN. */
5704 if (ret == REG_ERPAREN) ret = REG_EPAREN;
5e69f11e 5705
fa9a63c5
RM
5706 return (int) ret;
5707}
5708
5709
5710/* regexec searches for a given pattern, specified by PREG, in the
5711 string STRING.
5e69f11e 5712
fa9a63c5 5713 If NMATCH is zero or REG_NOSUB was set in the cflags argument to
b18215fc 5714 `regcomp', we ignore PMATCH. Otherwise, we assume PMATCH has at
fa9a63c5
RM
5715 least NMATCH elements, and we set them to the offsets of the
5716 corresponding matched substrings.
5e69f11e 5717
fa9a63c5
RM
5718 EFLAGS specifies `execution flags' which affect matching: if
5719 REG_NOTBOL is set, then ^ does not match at the beginning of the
5720 string; if REG_NOTEOL is set, then $ does not match at the end.
5e69f11e 5721
fa9a63c5
RM
5722 We return 0 if we find a match and REG_NOMATCH if not. */
5723
5724int
5725regexec (preg, string, nmatch, pmatch, eflags)
5726 const regex_t *preg;
5e69f11e
RM
5727 const char *string;
5728 size_t nmatch;
5729 regmatch_t pmatch[];
fa9a63c5
RM
5730 int eflags;
5731{
5732 int ret;
5733 struct re_registers regs;
5734 regex_t private_preg;
5735 int len = strlen (string);
5736 boolean want_reg_info = !preg->no_sub && nmatch > 0;
5737
5738 private_preg = *preg;
5e69f11e 5739
fa9a63c5
RM
5740 private_preg.not_bol = !!(eflags & REG_NOTBOL);
5741 private_preg.not_eol = !!(eflags & REG_NOTEOL);
5e69f11e 5742
fa9a63c5
RM
5743 /* The user has told us exactly how many registers to return
5744 information about, via `nmatch'. We have to pass that on to the
b18215fc 5745 matching routines. */
fa9a63c5 5746 private_preg.regs_allocated = REGS_FIXED;
5e69f11e 5747
fa9a63c5
RM
5748 if (want_reg_info)
5749 {
5750 regs.num_regs = nmatch;
5751 regs.start = TALLOC (nmatch, regoff_t);
5752 regs.end = TALLOC (nmatch, regoff_t);
5753 if (regs.start == NULL || regs.end == NULL)
b18215fc 5754 return (int) REG_NOMATCH;
fa9a63c5
RM
5755 }
5756
5757 /* Perform the searching operation. */
5758 ret = re_search (&private_preg, string, len,
b18215fc
RS
5759 /* start: */ 0, /* range: */ len,
5760 want_reg_info ? &regs : (struct re_registers *) 0);
5e69f11e 5761
fa9a63c5
RM
5762 /* Copy the register information to the POSIX structure. */
5763 if (want_reg_info)
5764 {
5765 if (ret >= 0)
b18215fc
RS
5766 {
5767 unsigned r;
fa9a63c5 5768
b18215fc
RS
5769 for (r = 0; r < nmatch; r++)
5770 {
5771 pmatch[r].rm_so = regs.start[r];
5772 pmatch[r].rm_eo = regs.end[r];
5773 }
5774 }
fa9a63c5 5775
b18215fc 5776 /* If we needed the temporary register info, free the space now. */
fa9a63c5
RM
5777 free (regs.start);
5778 free (regs.end);
5779 }
5780
5781 /* We want zero return to mean success, unlike `re_search'. */
5782 return ret >= 0 ? (int) REG_NOERROR : (int) REG_NOMATCH;
5783}
5784
5785
5786/* Returns a message corresponding to an error code, ERRCODE, returned
5787 from either regcomp or regexec. We don't use PREG here. */
5788
5789size_t
5790regerror (errcode, preg, errbuf, errbuf_size)
5791 int errcode;
5792 const regex_t *preg;
5793 char *errbuf;
5794 size_t errbuf_size;
5795{
5796 const char *msg;
5797 size_t msg_size;
5798
5799 if (errcode < 0
5800 || errcode >= (sizeof (re_error_msgid) / sizeof (re_error_msgid[0])))
5e69f11e 5801 /* Only error codes returned by the rest of the code should be passed
b18215fc 5802 to this routine. If we are given anything else, or if other regex
fa9a63c5
RM
5803 code generates an invalid error code, then the program has a bug.
5804 Dump core so we can fix it. */
5805 abort ();
5806
5807 msg = gettext (re_error_msgid[errcode]);
5808
5809 msg_size = strlen (msg) + 1; /* Includes the null. */
5e69f11e 5810
fa9a63c5
RM
5811 if (errbuf_size != 0)
5812 {
5813 if (msg_size > errbuf_size)
b18215fc
RS
5814 {
5815 strncpy (errbuf, msg, errbuf_size - 1);
5816 errbuf[errbuf_size - 1] = 0;
5817 }
fa9a63c5 5818 else
b18215fc 5819 strcpy (errbuf, msg);
fa9a63c5
RM
5820 }
5821
5822 return msg_size;
5823}
5824
5825
5826/* Free dynamically allocated space used by PREG. */
5827
5828void
5829regfree (preg)
5830 regex_t *preg;
5831{
5832 if (preg->buffer != NULL)
5833 free (preg->buffer);
5834 preg->buffer = NULL;
5e69f11e 5835
fa9a63c5
RM
5836 preg->allocated = 0;
5837 preg->used = 0;
5838
5839 if (preg->fastmap != NULL)
5840 free (preg->fastmap);
5841 preg->fastmap = NULL;
5842 preg->fastmap_accurate = 0;
5843
5844 if (preg->translate != NULL)
5845 free (preg->translate);
5846 preg->translate = NULL;
5847}
5848
5849#endif /* not emacs */