src/regex.c

   1 /* Extended regular expression matching and search library, version
   2    0.12.  (Implements POSIX draft P1003.2/D11.2, except for some of the
   3    internationalization features.)
   4
   5    Copyright (C) 1993-2013 Free Software Foundation, Inc.
   6
   7    This program is free software; you can redistribute it and/or modify
   8    it under the terms of the GNU General Public License as published by
   9    the Free Software Foundation; either version 3, or (at your option)
  10    any later version.
  11
  12    This program is distributed in the hope that it will be useful,
  13    but WITHOUT ANY WARRANTY; without even the implied warranty of
  14    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  15    GNU General Public License for more details.
  16
  17    You should have received a copy of the GNU General Public License
  18    along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
  19
  20 /* TODO:
  21    - structure the opcode space into opcode+flag.
  22    - merge with glibc's regex.[ch].
  23    - replace (succeed_n + jump_n + set_number_at) with something that doesn't
  24      need to modify the compiled regexp so that re_match can be reentrant.
  25    - get rid of on_failure_jump_smart by doing the optimization in re_comp
  26      rather than at run-time, so that re_match can be reentrant.
  27 */
  28
  29 /* AIX requires this to be the first thing in the file.  */
  30 #if defined _AIX && !defined REGEX_MALLOC
  31   #pragma alloca
  32 #endif
  33
  34 /* Ignore some GCC warnings for now.  This section should go away
  35    once the Emacs and Gnulib regex code is merged.  */
  36 #if 4 < __GNUC__ + (5 <= __GNUC_MINOR__) || defined __clang__
  37 # pragma GCC diagnostic ignored "-Wstrict-overflow"
  38 # ifndef emacs
  39 #  pragma GCC diagnostic ignored "-Wunused-function"
  40 #  pragma GCC diagnostic ignored "-Wunused-macros"
  41 #  pragma GCC diagnostic ignored "-Wunused-result"
  42 #  pragma GCC diagnostic ignored "-Wunused-variable"
  43 # endif
  44 #endif
  45
  46 #if 4 < __GNUC__ + (5 <= __GNUC_MINOR__) && ! defined __clang__
  47 # pragma GCC diagnostic ignored "-Wunused-but-set-variable"
  48 #endif
  49
  50 #include <config.h>
  51
  52 #include <stddef.h>
  53
  54 #ifdef emacs
  55 /* We need this for `regex.h', and perhaps for the Emacs include files.  */
  56 # include <sys/types.h>
  57 #endif
  58
  59 /* Whether to use ISO C Amendment 1 wide char functions.
  60    Those should not be used for Emacs since it uses its own.  */
  61 #if defined _LIBC
  62 #define WIDE_CHAR_SUPPORT 1
  63 #else
  64 #define WIDE_CHAR_SUPPORT \
  65         (HAVE_WCTYPE_H && HAVE_WCHAR_H && HAVE_BTOWC && !emacs)
  66 #endif
  67
  68 /* For platform which support the ISO C amendment 1 functionality we
  69    support user defined character classes.  */
  70 #if WIDE_CHAR_SUPPORT
  71 /* Solaris 2.5 has a bug: <wchar.h> must be included before <wctype.h>.  */
  72 # include <wchar.h>
  73 # include <wctype.h>
  74 #endif
  75
  76 #ifdef _LIBC
  77 /* We have to keep the namespace clean.  */
  78 # define regfree(preg) __regfree (preg)
  79 # define regexec(pr, st, nm, pm, ef) __regexec (pr, st, nm, pm, ef)
  80 # define regcomp(preg, pattern, cflags) __regcomp (preg, pattern, cflags)
  81 # define regerror(err_code, preg, errbuf, errbuf_size) \
  82         __regerror (err_code, preg, errbuf, errbuf_size)
  83 # define re_set_registers(bu, re, nu, st, en) \
  84         __re_set_registers (bu, re, nu, st, en)
  85 # define re_match_2(bufp, string1, size1, string2, size2, pos, regs, stop) \
  86         __re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop)
  87 # define re_match(bufp, string, size, pos, regs) \
  88         __re_match (bufp, string, size, pos, regs)
  89 # define re_search(bufp, string, size, startpos, range, regs) \
  90         __re_search (bufp, string, size, startpos, range, regs)
  91 # define re_compile_pattern(pattern, length, bufp) \
  92         __re_compile_pattern (pattern, length, bufp)
  93 # define re_set_syntax(syntax) __re_set_syntax (syntax)
  94 # define re_search_2(bufp, st1, s1, st2, s2, startpos, range, regs, stop) \
  95         __re_search_2 (bufp, st1, s1, st2, s2, startpos, range, regs, stop)
  96 # define re_compile_fastmap(bufp) __re_compile_fastmap (bufp)
  97
  98 /* Make sure we call libc's function even if the user overrides them.  */
  99 # define btowc __btowc
 100 # define iswctype __iswctype
 101 # define wctype __wctype
 102
 103 # define WEAK_ALIAS(a,b) weak_alias (a, b)
 104
 105 /* We are also using some library internals.  */
 106 # include <locale/localeinfo.h>
 107 # include <locale/elem-hash.h>
 108 # include <langinfo.h>
 109 #else
 110 # define WEAK_ALIAS(a,b)
 111 #endif
 112
 113 /* This is for other GNU distributions with internationalized messages.  */
 114 #if HAVE_LIBINTL_H || defined _LIBC
 115 # include <libintl.h>
 116 #else
 117 # define gettext(msgid) (msgid)
 118 #endif
 119
 120 #ifndef gettext_noop
 121 /* This define is so xgettext can find the internationalizable
 122    strings.  */
 123 # define gettext_noop(String) String
 124 #endif
 125
 126 /* The `emacs' switch turns on certain matching commands
 127    that make sense only in Emacs. */
 128 #ifdef emacs
 129
 130 # include "lisp.h"
 131 # include "character.h"
 132 # include "buffer.h"
 133
 134 /* Make syntax table lookup grant data in gl_state.  */
 135 # define SYNTAX_ENTRY_VIA_PROPERTY
 136
 137 # include "syntax.h"
 138 # include "category.h"
 139
 140 # ifdef malloc
 141 #  undef malloc
 142 # endif
 143 # define malloc xmalloc
 144 # ifdef realloc
 145 #  undef realloc
 146 # endif
 147 # define realloc xrealloc
 148 # ifdef free
 149 #  undef free
 150 # endif
 151 # define free xfree
 152
 153 /* Converts the pointer to the char to BEG-based offset from the start.  */
 154 # define PTR_TO_OFFSET(d) POS_AS_IN_BUFFER (POINTER_TO_OFFSET (d))
 155 # define POS_AS_IN_BUFFER(p) ((p) + (NILP (re_match_object) || BUFFERP (re_match_object)))
 156
 157 # define RE_MULTIBYTE_P(bufp) ((bufp)->multibyte)
 158 # define RE_TARGET_MULTIBYTE_P(bufp) ((bufp)->target_multibyte)
 159 # define RE_STRING_CHAR(p, multibyte) \
 160   (multibyte ? (STRING_CHAR (p)) : (*(p)))
 161 # define RE_STRING_CHAR_AND_LENGTH(p, len, multibyte) \
 162   (multibyte ? (STRING_CHAR_AND_LENGTH (p, len)) : ((len) = 1, *(p)))
 163
 164 # define RE_CHAR_TO_MULTIBYTE(c) UNIBYTE_TO_CHAR (c)
 165
 166 # define RE_CHAR_TO_UNIBYTE(c) CHAR_TO_BYTE_SAFE (c)
 167
 168 /* Set C a (possibly converted to multibyte) character before P.  P
 169    points into a string which is the virtual concatenation of STR1
 170    (which ends at END1) or STR2 (which ends at END2).  */
 171 # define GET_CHAR_BEFORE_2(c, p, str1, end1, str2, end2)                     \
 172   do {                                                                       \
 173     if (target_multibyte)                                                    \
 174       {                                                                      \
 175         re_char *dtemp = (p) == (str2) ? (end1) : (p);                       \
 176         re_char *dlimit = ((p) > (str2) && (p) <= (end2)) ? (str2) : (str1); \
 177         while (dtemp-- > dlimit && !CHAR_HEAD_P (*dtemp));                   \
 178         c = STRING_CHAR (dtemp);                                             \
 179       }                                                                      \
 180     else                                                                     \
 181       {                                                                      \
 182         (c = ((p) == (str2) ? (end1) : (p))[-1]);                            \
 183         (c) = RE_CHAR_TO_MULTIBYTE (c);                                      \
 184       }                                                                      \
 185   } while (0)
 186
 187 /* Set C a (possibly converted to multibyte) character at P, and set
 188    LEN to the byte length of that character.  */
 189 # define GET_CHAR_AFTER(c, p, len)              \
 190   do {                                          \
 191     if (target_multibyte)                       \
 192       (c) = STRING_CHAR_AND_LENGTH (p, len);    \
 193     else                                        \
 194       {                                         \
 195         (c) = *p;                               \
 196         len = 1;                                \
 197         (c) = RE_CHAR_TO_MULTIBYTE (c);         \
 198       }                                         \
 199    } while (0)
 200
 201 #else  /* not emacs */
 202
 203 /* If we are not linking with Emacs proper,
 204    we can't use the relocating allocator
 205    even if config.h says that we can.  */
 206 # undef REL_ALLOC
 207
 208 # include <unistd.h>
 209
 210 /* When used in Emacs's lib-src, we need xmalloc and xrealloc. */
 211
 212 static void *
 213 xmalloc (size_t size)
 214 {
 215   void *val = malloc (size);
 216   if (!val && size)
 217     {
 218       write (2, "virtual memory exhausted\n", 25);
 219       exit (1);
 220     }
 221   return val;
 222 }
 223
 224 static void *
 225 xrealloc (void *block, size_t size)
 226 {
 227   void *val;
 228   /* We must call malloc explicitly when BLOCK is 0, since some
 229      reallocs don't do this.  */
 230   if (! block)
 231     val = malloc (size);
 232   else
 233     val = realloc (block, size);
 234   if (!val && size)
 235     {
 236       write (2, "virtual memory exhausted\n", 25);
 237       exit (1);
 238     }
 239   return val;
 240 }
 241
 242 # ifdef malloc
 243 #  undef malloc
 244 # endif
 245 # define malloc xmalloc
 246 # ifdef realloc
 247 #  undef realloc
 248 # endif
 249 # define realloc xrealloc
 250
 251 # include <stdbool.h>
 252 # include <string.h>
 253
 254 /* Define the syntax stuff for \<, \>, etc.  */
 255
 256 /* Sword must be nonzero for the wordchar pattern commands in re_match_2.  */
 257 enum syntaxcode { Swhitespace = 0, Sword = 1, Ssymbol = 2 };
 258
 259 /* Dummy macros for non-Emacs environments.  */
 260 # define CHAR_CHARSET(c) 0
 261 # define CHARSET_LEADING_CODE_BASE(c) 0
 262 # define MAX_MULTIBYTE_LENGTH 1
 263 # define RE_MULTIBYTE_P(x) 0
 264 # define RE_TARGET_MULTIBYTE_P(x) 0
 265 # define WORD_BOUNDARY_P(c1, c2) (0)
 266 # define CHAR_HEAD_P(p) (1)
 267 # define SINGLE_BYTE_CHAR_P(c) (1)
 268 # define SAME_CHARSET_P(c1, c2) (1)
 269 # define BYTES_BY_CHAR_HEAD(p) (1)
 270 # define PREV_CHAR_BOUNDARY(p, limit) ((p)--)
 271 # define STRING_CHAR(p) (*(p))
 272 # define RE_STRING_CHAR(p, multibyte) STRING_CHAR (p)
 273 # define CHAR_STRING(c, s) (*(s) = (c), 1)
 274 # define STRING_CHAR_AND_LENGTH(p, actual_len) ((actual_len) = 1, *(p))
 275 # define RE_STRING_CHAR_AND_LENGTH(p, len, multibyte) STRING_CHAR_AND_LENGTH (p, len)
 276 # define RE_CHAR_TO_MULTIBYTE(c) (c)
 277 # define RE_CHAR_TO_UNIBYTE(c) (c)
 278 # define GET_CHAR_BEFORE_2(c, p, str1, end1, str2, end2) \
 279   (c = ((p) == (str2) ? *((end1) - 1) : *((p) - 1)))
 280 # define GET_CHAR_AFTER(c, p, len)      \
 281   (c = *p, len = 1)
 282 # define MAKE_CHAR(charset, c1, c2) (c1)
 283 # define BYTE8_TO_CHAR(c) (c)
 284 # define CHAR_BYTE8_P(c) (0)
 285 # define CHAR_LEADING_CODE(c) (c)
 286
 287 #endif /* not emacs */
 288
 289 #ifndef RE_TRANSLATE
 290 # define RE_TRANSLATE(TBL, C) ((unsigned char)(TBL)[C])
 291 # define RE_TRANSLATE_P(TBL) (TBL)
 292 #endif
 293 \f
 294 /* Get the interface, including the syntax bits.  */
 295 #include "regex.h"
 296
 297 /* isalpha etc. are used for the character classes.  */
 298 #include <ctype.h>
 299
 300 #ifdef emacs
 301
 302 /* 1 if C is an ASCII character.  */
 303 # define IS_REAL_ASCII(c) ((c) < 0200)
 304
 305 /* 1 if C is a unibyte character.  */
 306 # define ISUNIBYTE(c) (SINGLE_BYTE_CHAR_P ((c)))
 307
 308 /* The Emacs definitions should not be directly affected by locales.  */
 309
 310 /* In Emacs, these are only used for single-byte characters.  */
 311 # define ISDIGIT(c) ((c) >= '0' && (c) <= '9')
 312 # define ISCNTRL(c) ((c) < ' ')
 313 # define ISXDIGIT(c) (((c) >= '0' && (c) <= '9')                \
 314                      || ((c) >= 'a' && (c) <= 'f')      \
 315                      || ((c) >= 'A' && (c) <= 'F'))
 316
 317 /* This is only used for single-byte characters.  */
 318 # define ISBLANK(c) ((c) == ' ' || (c) == '\t')
 319
 320 /* The rest must handle multibyte characters.  */
 321
 322 # define ISGRAPH(c) (SINGLE_BYTE_CHAR_P (c)                             \
 323                     ? (c) > ' ' && !((c) >= 0177 && (c) <= 0237)        \
 324                     : 1)
 325
 326 # define ISPRINT(c) (SINGLE_BYTE_CHAR_P (c)                             \
 327                     ? (c) >= ' ' && !((c) >= 0177 && (c) <= 0237)       \
 328                     : 1)
 329
 330 # define ISALNUM(c) (IS_REAL_ASCII (c)                  \
 331                     ? (((c) >= 'a' && (c) <= 'z')       \
 332                        || ((c) >= 'A' && (c) <= 'Z')    \
 333                        || ((c) >= '0' && (c) <= '9'))   \
 334                     : SYNTAX (c) == Sword)
 335
 336 # define ISALPHA(c) (IS_REAL_ASCII (c)                  \
 337                     ? (((c) >= 'a' && (c) <= 'z')       \
 338                        || ((c) >= 'A' && (c) <= 'Z'))   \
 339                     : SYNTAX (c) == Sword)
 340
 341 # define ISLOWER(c) lowercasep (c)
 342
 343 # define ISPUNCT(c) (IS_REAL_ASCII (c)                          \
 344                     ? ((c) > ' ' && (c) < 0177                  \
 345                        && !(((c) >= 'a' && (c) <= 'z')          \
 346                             || ((c) >= 'A' && (c) <= 'Z')       \
 347                             || ((c) >= '0' && (c) <= '9')))     \
 348                     : SYNTAX (c) != Sword)
 349
 350 # define ISSPACE(c) (SYNTAX (c) == Swhitespace)
 351
 352 # define ISUPPER(c) uppercasep (c)
 353
 354 # define ISWORD(c) (SYNTAX (c) == Sword)
 355
 356 #else /* not emacs */
 357
 358 /* 1 if C is an ASCII character.  */
 359 # define IS_REAL_ASCII(c) ((c) < 0200)
 360
 361 /* This distinction is not meaningful, except in Emacs.  */
 362 # define ISUNIBYTE(c) 1
 363
 364 # ifdef isblank
 365 #  define ISBLANK(c) isblank (c)
 366 # else
 367 #  define ISBLANK(c) ((c) == ' ' || (c) == '\t')
 368 # endif
 369 # ifdef isgraph
 370 #  define ISGRAPH(c) isgraph (c)
 371 # else
 372 #  define ISGRAPH(c) (isprint (c) && !isspace (c))
 373 # endif
 374
 375 /* Solaris defines ISPRINT so we must undefine it first.  */
 376 # undef ISPRINT
 377 # define ISPRINT(c) isprint (c)
 378 # define ISDIGIT(c) isdigit (c)
 379 # define ISALNUM(c) isalnum (c)
 380 # define ISALPHA(c) isalpha (c)
 381 # define ISCNTRL(c) iscntrl (c)
 382 # define ISLOWER(c) islower (c)
 383 # define ISPUNCT(c) ispunct (c)
 384 # define ISSPACE(c) isspace (c)
 385 # define ISUPPER(c) isupper (c)
 386 # define ISXDIGIT(c) isxdigit (c)
 387
 388 # define ISWORD(c) ISALPHA (c)
 389
 390 # ifdef _tolower
 391 #  define TOLOWER(c) _tolower (c)
 392 # else
 393 #  define TOLOWER(c) tolower (c)
 394 # endif
 395
 396 /* How many characters in the character set.  */
 397 # define CHAR_SET_SIZE 256
 398
 399 # ifdef SYNTAX_TABLE
 400
 401 extern char *re_syntax_table;
 402
 403 # else /* not SYNTAX_TABLE */
 404
 405 static char re_syntax_table[CHAR_SET_SIZE];
 406
 407 static void
 408 init_syntax_once (void)
 409 {
 410    register int c;
 411    static int done = 0;
 412
 413    if (done)
 414      return;
 415
 416    memset (re_syntax_table, 0, sizeof re_syntax_table);
 417
 418    for (c = 0; c < CHAR_SET_SIZE; ++c)
 419      if (ISALNUM (c))
 420         re_syntax_table[c] = Sword;
 421
 422    re_syntax_table['_'] = Ssymbol;
 423
 424    done = 1;
 425 }
 426
 427 # endif /* not SYNTAX_TABLE */
 428
 429 # define SYNTAX(c) re_syntax_table[(c)]
 430
 431 #endif /* not emacs */
 432 \f
 433 #define SIGN_EXTEND_CHAR(c) ((signed char) (c))
 434 \f
 435 /* Should we use malloc or alloca?  If REGEX_MALLOC is not defined, we
 436    use `alloca' instead of `malloc'.  This is because using malloc in
 437    re_search* or re_match* could cause memory leaks when C-g is used in
 438    Emacs; also, malloc is slower and causes storage fragmentation.  On
 439    the other hand, malloc is more portable, and easier to debug.
 440
 441    Because we sometimes use alloca, some routines have to be macros,
 442    not functions -- `alloca'-allocated space disappears at the end of the
 443    function it is called in.  */
 444
 445 #ifdef REGEX_MALLOC
 446
 447 # define REGEX_ALLOCATE malloc
 448 # define REGEX_REALLOCATE(source, osize, nsize) realloc (source, nsize)
 449 # define REGEX_FREE free
 450
 451 #else /* not REGEX_MALLOC  */
 452
 453 /* Emacs already defines alloca, sometimes.  */
 454 # ifndef alloca
 455
 456 /* Make alloca work the best possible way.  */
 457 #  ifdef __GNUC__
 458 #   define alloca __builtin_alloca
 459 #  else /* not __GNUC__ */
 460 #   ifdef HAVE_ALLOCA_H
 461 #    include <alloca.h>
 462 #   endif /* HAVE_ALLOCA_H */
 463 #  endif /* not __GNUC__ */
 464
 465 # endif /* not alloca */
 466
 467 # define REGEX_ALLOCATE alloca
 468
 469 /* Assumes a `char *destination' variable.  */
 470 # define REGEX_REALLOCATE(source, osize, nsize)                         \
 471   (destination = (char *) alloca (nsize),                               \
 472    memcpy (destination, source, osize))
 473
 474 /* No need to do anything to free, after alloca.  */
 475 # define REGEX_FREE(arg) ((void)0) /* Do nothing!  But inhibit gcc warning.  */
 476
 477 #endif /* not REGEX_MALLOC */
 478
 479 /* Define how to allocate the failure stack.  */
 480
 481 #if defined REL_ALLOC && defined REGEX_MALLOC
 482
 483 # define REGEX_ALLOCATE_STACK(size)                             \
 484   r_alloc (&failure_stack_ptr, (size))
 485 # define REGEX_REALLOCATE_STACK(source, osize, nsize)           \
 486   r_re_alloc (&failure_stack_ptr, (nsize))
 487 # define REGEX_FREE_STACK(ptr)                                  \
 488   r_alloc_free (&failure_stack_ptr)
 489
 490 #else /* not using relocating allocator */
 491
 492 # ifdef REGEX_MALLOC
 493
 494 #  define REGEX_ALLOCATE_STACK malloc
 495 #  define REGEX_REALLOCATE_STACK(source, osize, nsize) realloc (source, nsize)
 496 #  define REGEX_FREE_STACK free
 497
 498 # else /* not REGEX_MALLOC */
 499
 500 #  define REGEX_ALLOCATE_STACK alloca
 501
 502 #  define REGEX_REALLOCATE_STACK(source, osize, nsize)                  \
 503    REGEX_REALLOCATE (source, osize, nsize)
 504 /* No need to explicitly free anything.  */
 505 #  define REGEX_FREE_STACK(arg) ((void)0)
 506
 507 # endif /* not REGEX_MALLOC */
 508 #endif /* not using relocating allocator */
 509
 510
 511 /* True if `size1' is non-NULL and PTR is pointing anywhere inside
 512    `string1' or just past its end.  This works if PTR is NULL, which is
 513    a good thing.  */
 514 #define FIRST_STRING_P(ptr)                                     \
 515   (size1 && string1 <= (ptr) && (ptr) <= string1 + size1)
 516
 517 /* (Re)Allocate N items of type T using malloc, or fail.  */
 518 #define TALLOC(n, t) ((t *) malloc ((n) * sizeof (t)))
 519 #define RETALLOC(addr, n, t) ((addr) = (t *) realloc (addr, (n) * sizeof (t)))
 520 #define REGEX_TALLOC(n, t) ((t *) REGEX_ALLOCATE ((n) * sizeof (t)))
 521
 522 #define BYTEWIDTH 8 /* In bits.  */
 523
 524 #define STREQ(s1, s2) ((strcmp (s1, s2) == 0))
 525
 526 #undef MAX
 527 #undef MIN
 528 #define MAX(a, b) ((a) > (b) ? (a) : (b))
 529 #define MIN(a, b) ((a) < (b) ? (a) : (b))
 530
 531 /* Type of source-pattern and string chars.  */
 532 #ifdef _MSC_VER
 533 typedef unsigned char re_char;
 534 #else
 535 typedef const unsigned char re_char;
 536 #endif
 537
 538 typedef char boolean;
 539
 540 static regoff_t re_match_2_internal (struct re_pattern_buffer *bufp,
 541                                      re_char *string1, size_t size1,
 542                                      re_char *string2, size_t size2,
 543                                      ssize_t pos,
 544                                      struct re_registers *regs,
 545                                      ssize_t stop);
 546 \f
 547 /* These are the command codes that appear in compiled regular
 548    expressions.  Some opcodes are followed by argument bytes.  A
 549    command code can specify any interpretation whatsoever for its
 550    arguments.  Zero bytes may appear in the compiled regular expression.  */
 551
 552 typedef enum
 553 {
 554   no_op = 0,
 555
 556   /* Succeed right away--no more backtracking.  */
 557   succeed,
 558
 559         /* Followed by one byte giving n, then by n literal bytes.  */
 560   exactn,
 561
 562         /* Matches any (more or less) character.  */
 563   anychar,
 564
 565         /* Matches any one char belonging to specified set.  First
 566            following byte is number of bitmap bytes.  Then come bytes
 567            for a bitmap saying which chars are in.  Bits in each byte
 568            are ordered low-bit-first.  A character is in the set if its
 569            bit is 1.  A character too large to have a bit in the map is
 570            automatically not in the set.
 571
 572            If the length byte has the 0x80 bit set, then that stuff
 573            is followed by a range table:
 574                2 bytes of flags for character sets (low 8 bits, high 8 bits)
 575                    See RANGE_TABLE_WORK_BITS below.
 576                2 bytes, the number of pairs that follow (upto 32767)
 577                pairs, each 2 multibyte characters,
 578                    each multibyte character represented as 3 bytes.  */
 579   charset,
 580
 581         /* Same parameters as charset, but match any character that is
 582            not one of those specified.  */
 583   charset_not,
 584
 585         /* Start remembering the text that is matched, for storing in a
 586            register.  Followed by one byte with the register number, in
 587            the range 0 to one less than the pattern buffer's re_nsub
 588            field.  */
 589   start_memory,
 590
 591         /* Stop remembering the text that is matched and store it in a
 592            memory register.  Followed by one byte with the register
 593            number, in the range 0 to one less than `re_nsub' in the
 594            pattern buffer.  */
 595   stop_memory,
 596
 597         /* Match a duplicate of something remembered. Followed by one
 598            byte containing the register number.  */
 599   duplicate,
 600
 601         /* Fail unless at beginning of line.  */
 602   begline,
 603
 604         /* Fail unless at end of line.  */
 605   endline,
 606
 607         /* Succeeds if at beginning of buffer (if emacs) or at beginning
 608            of string to be matched (if not).  */
 609   begbuf,
 610
 611         /* Analogously, for end of buffer/string.  */
 612   endbuf,
 613
 614         /* Followed by two byte relative address to which to jump.  */
 615   jump,
 616
 617         /* Followed by two-byte relative address of place to resume at
 618            in case of failure.  */
 619   on_failure_jump,
 620
 621         /* Like on_failure_jump, but pushes a placeholder instead of the
 622            current string position when executed.  */
 623   on_failure_keep_string_jump,
 624
 625         /* Just like `on_failure_jump', except that it checks that we
 626            don't get stuck in an infinite loop (matching an empty string
 627            indefinitely).  */
 628   on_failure_jump_loop,
 629
 630         /* Just like `on_failure_jump_loop', except that it checks for
 631            a different kind of loop (the kind that shows up with non-greedy
 632            operators).  This operation has to be immediately preceded
 633            by a `no_op'.  */
 634   on_failure_jump_nastyloop,
 635
 636         /* A smart `on_failure_jump' used for greedy * and + operators.
 637            It analyzes the loop before which it is put and if the
 638            loop does not require backtracking, it changes itself to
 639            `on_failure_keep_string_jump' and short-circuits the loop,
 640            else it just defaults to changing itself into `on_failure_jump'.
 641            It assumes that it is pointing to just past a `jump'.  */
 642   on_failure_jump_smart,
 643
 644         /* Followed by two-byte relative address and two-byte number n.
 645            After matching N times, jump to the address upon failure.
 646            Does not work if N starts at 0: use on_failure_jump_loop
 647            instead.  */
 648   succeed_n,
 649
 650         /* Followed by two-byte relative address, and two-byte number n.
 651            Jump to the address N times, then fail.  */
 652   jump_n,
 653
 654         /* Set the following two-byte relative address to the
 655            subsequent two-byte number.  The address *includes* the two
 656            bytes of number.  */
 657   set_number_at,
 658
 659   wordbeg,      /* Succeeds if at word beginning.  */
 660   wordend,      /* Succeeds if at word end.  */
 661
 662   wordbound,    /* Succeeds if at a word boundary.  */
 663   notwordbound, /* Succeeds if not at a word boundary.  */
 664
 665   symbeg,       /* Succeeds if at symbol beginning.  */
 666   symend,       /* Succeeds if at symbol end.  */
 667
 668         /* Matches any character whose syntax is specified.  Followed by
 669            a byte which contains a syntax code, e.g., Sword.  */
 670   syntaxspec,
 671
 672         /* Matches any character whose syntax is not that specified.  */
 673   notsyntaxspec
 674
 675 #ifdef emacs
 676   ,before_dot,  /* Succeeds if before point.  */
 677   at_dot,       /* Succeeds if at point.  */
 678   after_dot,    /* Succeeds if after point.  */
 679
 680   /* Matches any character whose category-set contains the specified
 681      category.  The operator is followed by a byte which contains a
 682      category code (mnemonic ASCII character).  */
 683   categoryspec,
 684
 685   /* Matches any character whose category-set does not contain the
 686      specified category.  The operator is followed by a byte which
 687      contains the category code (mnemonic ASCII character).  */
 688   notcategoryspec
 689 #endif /* emacs */
 690 } re_opcode_t;
 691 \f
 692 /* Common operations on the compiled pattern.  */
 693
 694 /* Store NUMBER in two contiguous bytes starting at DESTINATION.  */
 695
 696 #define STORE_NUMBER(destination, number)                               \
 697   do {                                                                  \
 698     (destination)[0] = (number) & 0377;                                 \
 699     (destination)[1] = (number) >> 8;                                   \
 700   } while (0)
 701
 702 /* Same as STORE_NUMBER, except increment DESTINATION to
 703    the byte after where the number is stored.  Therefore, DESTINATION
 704    must be an lvalue.  */
 705
 706 #define STORE_NUMBER_AND_INCR(destination, number)                      \
 707   do {                                                                  \
 708     STORE_NUMBER (destination, number);                                 \
 709     (destination) += 2;                                                 \
 710   } while (0)
 711
 712 /* Put into DESTINATION a number stored in two contiguous bytes starting
 713    at SOURCE.  */
 714
 715 #define EXTRACT_NUMBER(destination, source)                             \
 716   ((destination) = extract_number (source))
 717
 718 static int
 719 extract_number (re_char *source)
 720 {
 721   return (SIGN_EXTEND_CHAR (source[1]) << 8) + source[0];
 722 }
 723
 724 /* Same as EXTRACT_NUMBER, except increment SOURCE to after the number.
 725    SOURCE must be an lvalue.  */
 726
 727 #define EXTRACT_NUMBER_AND_INCR(destination, source)                    \
 728   ((destination) = extract_number_and_incr (&source))
 729
 730 static int
 731 extract_number_and_incr (re_char **source)
 732 {
 733   int num = extract_number (*source);
 734   *source += 2;
 735   return num;
 736 }
 737 \f
 738 /* Store a multibyte character in three contiguous bytes starting
 739    DESTINATION, and increment DESTINATION to the byte after where the
 740    character is stored.  Therefore, DESTINATION must be an lvalue.  */
 741
 742 #define STORE_CHARACTER_AND_INCR(destination, character)        \
 743   do {                                                          \
 744     (destination)[0] = (character) & 0377;                      \
 745     (destination)[1] = ((character) >> 8) & 0377;               \
 746     (destination)[2] = (character) >> 16;                       \
 747     (destination) += 3;                                         \
 748   } while (0)
 749
 750 /* Put into DESTINATION a character stored in three contiguous bytes
 751    starting at SOURCE.  */
 752
 753 #define EXTRACT_CHARACTER(destination, source)  \
 754   do {                                          \
 755     (destination) = ((source)[0]                \
 756                      | ((source)[1] << 8)       \
 757                      | ((source)[2] << 16));    \
 758   } while (0)
 759
 760
 761 /* Macros for charset. */
 762
 763 /* Size of bitmap of charset P in bytes.  P is a start of charset,
 764    i.e. *P is (re_opcode_t) charset or (re_opcode_t) charset_not.  */
 765 #define CHARSET_BITMAP_SIZE(p) ((p)[1] & 0x7F)
 766
 767 /* Nonzero if charset P has range table.  */
 768 #define CHARSET_RANGE_TABLE_EXISTS_P(p)  ((p)[1] & 0x80)
 769
 770 /* Return the address of range table of charset P.  But not the start
 771    of table itself, but the before where the number of ranges is
 772    stored.  `2 +' means to skip re_opcode_t and size of bitmap,
 773    and the 2 bytes of flags at the start of the range table.  */
 774 #define CHARSET_RANGE_TABLE(p) (&(p)[4 + CHARSET_BITMAP_SIZE (p)])
 775
 776 /* Extract the bit flags that start a range table.  */
 777 #define CHARSET_RANGE_TABLE_BITS(p)             \
 778   ((p)[2 + CHARSET_BITMAP_SIZE (p)]             \
 779    + (p)[3 + CHARSET_BITMAP_SIZE (p)] * 0x100)
 780
 781 /* Return the address of end of RANGE_TABLE.  COUNT is number of
 782    ranges (which is a pair of (start, end)) in the RANGE_TABLE.  `* 2'
 783    is start of range and end of range.  `* 3' is size of each start
 784    and end.  */
 785 #define CHARSET_RANGE_TABLE_END(range_table, count)     \
 786   ((range_table) + (count) * 2 * 3)
 787
 788 /* Test if C is in RANGE_TABLE.  A flag NOT is negated if C is in.
 789    COUNT is number of ranges in RANGE_TABLE.  */
 790 #define CHARSET_LOOKUP_RANGE_TABLE_RAW(not, c, range_table, count)      \
 791   do                                                                    \
 792     {                                                                   \
 793       re_wchar_t range_start, range_end;                                \
 794       re_char *rtp;                                                     \
 795       re_char *range_table_end                                          \
 796         = CHARSET_RANGE_TABLE_END ((range_table), (count));             \
 797                                                                         \
 798       for (rtp = (range_table); rtp < range_table_end; rtp += 2 * 3)    \
 799         {                                                               \
 800           EXTRACT_CHARACTER (range_start, rtp);                         \
 801           EXTRACT_CHARACTER (range_end, rtp + 3);                       \
 802                                                                         \
 803           if (range_start <= (c) && (c) <= range_end)                   \
 804             {                                                           \
 805               (not) = !(not);                                           \
 806               break;                                                    \
 807             }                                                           \
 808         }                                                               \
 809     }                                                                   \
 810   while (0)
 811
 812 /* Test if C is in range table of CHARSET.  The flag NOT is negated if
 813    C is listed in it.  */
 814 #define CHARSET_LOOKUP_RANGE_TABLE(not, c, charset)                     \
 815   do                                                                    \
 816     {                                                                   \
 817       /* Number of ranges in range table. */                            \
 818       int count;                                                        \
 819       re_char *range_table = CHARSET_RANGE_TABLE (charset);             \
 820                                                                         \
 821       EXTRACT_NUMBER_AND_INCR (count, range_table);                     \
 822       CHARSET_LOOKUP_RANGE_TABLE_RAW ((not), (c), range_table, count);  \
 823     }                                                                   \
 824   while (0)
 825 \f
 826 /* If DEBUG is defined, Regex prints many voluminous messages about what
 827    it is doing (if the variable `debug' is nonzero).  If linked with the
 828    main program in `iregex.c', you can enter patterns and strings
 829    interactively.  And if linked with the main program in `main.c' and
 830    the other test files, you can run the already-written tests.  */
 831
 832 #ifdef DEBUG
 833
 834 /* We use standard I/O for debugging.  */
 835 # include <stdio.h>
 836
 837 /* It is useful to test things that ``must'' be true when debugging.  */
 838 # include <assert.h>
 839
 840 static int debug = -100000;
 841
 842 # define DEBUG_STATEMENT(e) e
 843 # define DEBUG_PRINT(...) if (debug > 0) printf (__VA_ARGS__)
 844 # define DEBUG_COMPILES_ARGUMENTS
 845 # define DEBUG_PRINT_COMPILED_PATTERN(p, s, e)                          \
 846   if (debug > 0) print_partial_compiled_pattern (s, e)
 847 # define DEBUG_PRINT_DOUBLE_STRING(w, s1, sz1, s2, sz2)                 \
 848   if (debug > 0) print_double_string (w, s1, sz1, s2, sz2)
 849
 850
 851 /* Print the fastmap in human-readable form.  */
 852
 853 static void
 854 print_fastmap (char *fastmap)
 855 {
 856   unsigned was_a_range = 0;
 857   unsigned i = 0;
 858
 859   while (i < (1 << BYTEWIDTH))
 860     {
 861       if (fastmap[i++])
 862         {
 863           was_a_range = 0;
 864           putchar (i - 1);
 865           while (i < (1 << BYTEWIDTH)  &&  fastmap[i])
 866             {
 867               was_a_range = 1;
 868               i++;
 869             }
 870           if (was_a_range)
 871             {
 872               printf ("-");
 873               putchar (i - 1);
 874             }
 875         }
 876     }
 877   putchar ('\n');
 878 }
 879
 880
 881 /* Print a compiled pattern string in human-readable form, starting at
 882    the START pointer into it and ending just before the pointer END.  */
 883
 884 static void
 885 print_partial_compiled_pattern (re_char *start, re_char *end)
 886 {
 887   int mcnt, mcnt2;
 888   re_char *p = start;
 889   re_char *pend = end;
 890
 891   if (start == NULL)
 892     {
 893       fprintf (stderr, "(null)\n");
 894       return;
 895     }
 896
 897   /* Loop over pattern commands.  */
 898   while (p < pend)
 899     {
 900       fprintf (stderr, "%td:\t", p - start);
 901
 902       switch ((re_opcode_t) *p++)
 903         {
 904         case no_op:
 905           fprintf (stderr, "/no_op");
 906           break;
 907
 908         case succeed:
 909           fprintf (stderr, "/succeed");
 910           break;
 911
 912         case exactn:
 913           mcnt = *p++;
 914           fprintf (stderr, "/exactn/%d", mcnt);
 915           do
 916             {
 917               fprintf (stderr, "/%c", *p++);
 918             }
 919           while (--mcnt);
 920           break;
 921
 922         case start_memory:
 923           fprintf (stderr, "/start_memory/%d", *p++);
 924           break;
 925
 926         case stop_memory:
 927           fprintf (stderr, "/stop_memory/%d", *p++);
 928           break;
 929
 930         case duplicate:
 931           fprintf (stderr, "/duplicate/%d", *p++);
 932           break;
 933
 934         case anychar:
 935           fprintf (stderr, "/anychar");
 936           break;
 937
 938         case charset:
 939         case charset_not:
 940           {
 941             register int c, last = -100;
 942             register int in_range = 0;
 943             int length = CHARSET_BITMAP_SIZE (p - 1);
 944             int has_range_table = CHARSET_RANGE_TABLE_EXISTS_P (p - 1);
 945
 946             fprintf (stderr, "/charset [%s",
 947                      (re_opcode_t) *(p - 1) == charset_not ? "^" : "");
 948
 949             if (p + *p >= pend)
 950               fprintf (stderr, " !extends past end of pattern! ");
 951
 952             for (c = 0; c < 256; c++)
 953               if (c / 8 < length
 954                   && (p[1 + (c/8)] & (1 << (c % 8))))
 955                 {
 956                   /* Are we starting a range?  */
 957                   if (last + 1 == c && ! in_range)
 958                     {
 959                       fprintf (stderr, "-");
 960                       in_range = 1;
 961                     }
 962                   /* Have we broken a range?  */
 963                   else if (last + 1 != c && in_range)
 964                     {
 965                       fprintf (stderr, "%c", last);
 966                       in_range = 0;
 967                     }
 968
 969                   if (! in_range)
 970                     fprintf (stderr, "%c", c);
 971
 972                   last = c;
 973               }
 974
 975             if (in_range)
 976               fprintf (stderr, "%c", last);
 977
 978             fprintf (stderr, "]");
 979
 980             p += 1 + length;
 981
 982             if (has_range_table)
 983               {
 984                 int count;
 985                 fprintf (stderr, "has-range-table");
 986
 987                 /* ??? Should print the range table; for now, just skip it.  */
 988                 p += 2;         /* skip range table bits */
 989                 EXTRACT_NUMBER_AND_INCR (count, p);
 990                 p = CHARSET_RANGE_TABLE_END (p, count);
 991               }
 992           }
 993           break;
 994
 995         case begline:
 996           fprintf (stderr, "/begline");
 997           break;
 998
 999         case endline:
1000           fprintf (stderr, "/endline");
1001           break;
1002
1003         case on_failure_jump:
1004           EXTRACT_NUMBER_AND_INCR (mcnt, p);
1005           fprintf (stderr, "/on_failure_jump to %td", p + mcnt - start);
1006           break;
1007
1008         case on_failure_keep_string_jump:
1009           EXTRACT_NUMBER_AND_INCR (mcnt, p);
1010           fprintf (stderr, "/on_failure_keep_string_jump to %td",
1011                    p + mcnt - start);
1012           break;
1013
1014         case on_failure_jump_nastyloop:
1015           EXTRACT_NUMBER_AND_INCR (mcnt, p);
1016           fprintf (stderr, "/on_failure_jump_nastyloop to %td",
1017                    p + mcnt - start);
1018           break;
1019
1020         case on_failure_jump_loop:
1021           EXTRACT_NUMBER_AND_INCR (mcnt, p);
1022           fprintf (stderr, "/on_failure_jump_loop to %td",
1023                    p + mcnt - start);
1024           break;
1025
1026         case on_failure_jump_smart:
1027           EXTRACT_NUMBER_AND_INCR (mcnt, p);
1028           fprintf (stderr, "/on_failure_jump_smart to %td",
1029                    p + mcnt - start);
1030           break;
1031
1032         case jump:
1033           EXTRACT_NUMBER_AND_INCR (mcnt, p);
1034           fprintf (stderr, "/jump to %td", p + mcnt - start);
1035           break;
1036
1037         case succeed_n:
1038           EXTRACT_NUMBER_AND_INCR (mcnt, p);
1039           EXTRACT_NUMBER_AND_INCR (mcnt2, p);
1040           fprintf (stderr, "/succeed_n to %td, %d times",
1041                    p - 2 + mcnt - start, mcnt2);
1042           break;
1043
1044         case jump_n:
1045           EXTRACT_NUMBER_AND_INCR (mcnt, p);
1046           EXTRACT_NUMBER_AND_INCR (mcnt2, p);
1047           fprintf (stderr, "/jump_n to %td, %d times",
1048                    p - 2 + mcnt - start, mcnt2);
1049           break;
1050
1051         case set_number_at:
1052           EXTRACT_NUMBER_AND_INCR (mcnt, p);
1053           EXTRACT_NUMBER_AND_INCR (mcnt2, p);
1054           fprintf (stderr, "/set_number_at location %td to %d",
1055                    p - 2 + mcnt - start, mcnt2);
1056           break;
1057
1058         case wordbound:
1059           fprintf (stderr, "/wordbound");
1060           break;
1061
1062         case notwordbound:
1063           fprintf (stderr, "/notwordbound");
1064           break;
1065
1066         case wordbeg:
1067           fprintf (stderr, "/wordbeg");
1068           break;
1069
1070         case wordend:
1071           fprintf (stderr, "/wordend");
1072           break;
1073
1074         case symbeg:
1075           fprintf (stderr, "/symbeg");
1076           break;
1077
1078         case symend:
1079           fprintf (stderr, "/symend");
1080           break;
1081
1082         case syntaxspec:
1083           fprintf (stderr, "/syntaxspec");
1084           mcnt = *p++;
1085           fprintf (stderr, "/%d", mcnt);
1086           break;
1087
1088         case notsyntaxspec:
1089           fprintf (stderr, "/notsyntaxspec");
1090           mcnt = *p++;
1091           fprintf (stderr, "/%d", mcnt);
1092           break;
1093
1094 # ifdef emacs
1095         case before_dot:
1096           fprintf (stderr, "/before_dot");
1097           break;
1098
1099         case at_dot:
1100           fprintf (stderr, "/at_dot");
1101           break;
1102
1103         case after_dot:
1104           fprintf (stderr, "/after_dot");
1105           break;
1106
1107         case categoryspec:
1108           fprintf (stderr, "/categoryspec");
1109           mcnt = *p++;
1110           fprintf (stderr, "/%d", mcnt);
1111           break;
1112
1113         case notcategoryspec:
1114           fprintf (stderr, "/notcategoryspec");
1115           mcnt = *p++;
1116           fprintf (stderr, "/%d", mcnt);
1117           break;
1118 # endif /* emacs */
1119
1120         case begbuf:
1121           fprintf (stderr, "/begbuf");
1122           break;
1123
1124         case endbuf:
1125           fprintf (stderr, "/endbuf");
1126           break;
1127
1128         default:
1129           fprintf (stderr, "?%d", *(p-1));
1130         }
1131
1132       fprintf (stderr, "\n");
1133     }
1134
1135   fprintf (stderr, "%td:\tend of pattern.\n", p - start);
1136 }
1137
1138
1139 static void
1140 print_compiled_pattern (struct re_pattern_buffer *bufp)
1141 {
1142   re_char *buffer = bufp->buffer;
1143
1144   print_partial_compiled_pattern (buffer, buffer + bufp->used);
1145   printf ("%ld bytes used/%ld bytes allocated.\n",
1146           bufp->used, bufp->allocated);
1147
1148   if (bufp->fastmap_accurate && bufp->fastmap)
1149     {
1150       printf ("fastmap: ");
1151       print_fastmap (bufp->fastmap);
1152     }
1153
1154   printf ("re_nsub: %zu\t", bufp->re_nsub);
1155   printf ("regs_alloc: %d\t", bufp->regs_allocated);
1156   printf ("can_be_null: %d\t", bufp->can_be_null);
1157   printf ("no_sub: %d\t", bufp->no_sub);
1158   printf ("not_bol: %d\t", bufp->not_bol);
1159   printf ("not_eol: %d\t", bufp->not_eol);
1160   printf ("syntax: %lx\n", bufp->syntax);
1161   fflush (stdout);
1162   /* Perhaps we should print the translate table?  */
1163 }
1164
1165
1166 static void
1167 print_double_string (re_char *where, re_char *string1, ssize_t size1,
1168                      re_char *string2, ssize_t size2)
1169 {
1170   ssize_t this_char;
1171
1172   if (where == NULL)
1173     printf ("(null)");
1174   else
1175     {
1176       if (FIRST_STRING_P (where))
1177         {
1178           for (this_char = where - string1; this_char < size1; this_char++)
1179             putchar (string1[this_char]);
1180
1181           where = string2;
1182         }
1183
1184       for (this_char = where - string2; this_char < size2; this_char++)
1185         putchar (string2[this_char]);
1186     }
1187 }
1188
1189 #else /* not DEBUG */
1190
1191 # undef assert
1192 # define assert(e)
1193
1194 # define DEBUG_STATEMENT(e)
1195 # if __STDC_VERSION__ < 199901L
1196 #  define DEBUG_COMPILES_ARGUMENTS
1197 #  define DEBUG_PRINT /* 'DEBUG_PRINT (x, y)' discards X and Y.  */ (void)
1198 # else
1199 #  define DEBUG_PRINT(...)
1200 # endif
1201 # define DEBUG_PRINT_COMPILED_PATTERN(p, s, e)
1202 # define DEBUG_PRINT_DOUBLE_STRING(w, s1, sz1, s2, sz2)
1203
1204 #endif /* not DEBUG */
1205 \f
1206 /* Use this to suppress gcc's `...may be used before initialized' warnings. */
1207 #ifdef lint
1208 # define IF_LINT(Code) Code
1209 #else
1210 # define IF_LINT(Code) /* empty */
1211 #endif
1212 \f
1213 /* Set by `re_set_syntax' to the current regexp syntax to recognize.  Can
1214    also be assigned to arbitrarily: each pattern buffer stores its own
1215    syntax, so it can be changed between regex compilations.  */
1216 /* This has no initializer because initialized variables in Emacs
1217    become read-only after dumping.  */
1218 reg_syntax_t re_syntax_options;
1219
1220
1221 /* Specify the precise syntax of regexps for compilation.  This provides
1222    for compatibility for various utilities which historically have
1223    different, incompatible syntaxes.
1224
1225    The argument SYNTAX is a bit mask comprised of the various bits
1226    defined in regex.h.  We return the old syntax.  */
1227
1228 reg_syntax_t
1229 re_set_syntax (reg_syntax_t syntax)
1230 {
1231   reg_syntax_t ret = re_syntax_options;
1232
1233   re_syntax_options = syntax;
1234   return ret;
1235 }
1236 WEAK_ALIAS (__re_set_syntax, re_set_syntax)
1237
1238 /* Regexp to use to replace spaces, or NULL meaning don't.  */
1239 static re_char *whitespace_regexp;
1240
1241 void
1242 re_set_whitespace_regexp (const char *regexp)
1243 {
1244   whitespace_regexp = (re_char *) regexp;
1245 }
1246 WEAK_ALIAS (__re_set_syntax, re_set_syntax)
1247 \f
1248 /* This table gives an error message for each of the error codes listed
1249    in regex.h.  Obviously the order here has to be same as there.
1250    POSIX doesn't require that we do anything for REG_NOERROR,
1251    but why not be nice?  */
1252
1253 static const char *re_error_msgid[] =
1254   {
1255     gettext_noop ("Success"),   /* REG_NOERROR */
1256     gettext_noop ("No match"),  /* REG_NOMATCH */
1257     gettext_noop ("Invalid regular expression"), /* REG_BADPAT */
1258     gettext_noop ("Invalid collation character"), /* REG_ECOLLATE */
1259     gettext_noop ("Invalid character class name"), /* REG_ECTYPE */
1260     gettext_noop ("Trailing backslash"), /* REG_EESCAPE */
1261     gettext_noop ("Invalid back reference"), /* REG_ESUBREG */
1262     gettext_noop ("Unmatched [ or [^"), /* REG_EBRACK */
1263     gettext_noop ("Unmatched ( or \\("), /* REG_EPAREN */
1264     gettext_noop ("Unmatched \\{"), /* REG_EBRACE */
1265     gettext_noop ("Invalid content of \\{\\}"), /* REG_BADBR */
1266     gettext_noop ("Invalid range end"), /* REG_ERANGE */
1267     gettext_noop ("Memory exhausted"), /* REG_ESPACE */
1268     gettext_noop ("Invalid preceding regular expression"), /* REG_BADRPT */
1269     gettext_noop ("Premature end of regular expression"), /* REG_EEND */
1270     gettext_noop ("Regular expression too big"), /* REG_ESIZE */
1271     gettext_noop ("Unmatched ) or \\)"), /* REG_ERPAREN */
1272     gettext_noop ("Range striding over charsets") /* REG_ERANGEX  */
1273   };
1274 \f
1275 /* Avoiding alloca during matching, to placate r_alloc.  */
1276
1277 /* Define MATCH_MAY_ALLOCATE unless we need to make sure that the
1278    searching and matching functions should not call alloca.  On some
1279    systems, alloca is implemented in terms of malloc, and if we're
1280    using the relocating allocator routines, then malloc could cause a
1281    relocation, which might (if the strings being searched are in the
1282    ralloc heap) shift the data out from underneath the regexp
1283    routines.
1284
1285    Here's another reason to avoid allocation: Emacs
1286    processes input from X in a signal handler; processing X input may
1287    call malloc; if input arrives while a matching routine is calling
1288    malloc, then we're scrod.  But Emacs can't just block input while
1289    calling matching routines; then we don't notice interrupts when
1290    they come in.  So, Emacs blocks input around all regexp calls
1291    except the matching calls, which it leaves unprotected, in the
1292    faith that they will not malloc.  */
1293
1294 /* Normally, this is fine.  */
1295 #define MATCH_MAY_ALLOCATE
1296
1297 /* The match routines may not allocate if (1) they would do it with malloc
1298    and (2) it's not safe for them to use malloc.
1299    Note that if REL_ALLOC is defined, matching would not use malloc for the
1300    failure stack, but we would still use it for the register vectors;
1301    so REL_ALLOC should not affect this.  */
1302 #if defined REGEX_MALLOC && defined emacs
1303 # undef MATCH_MAY_ALLOCATE
1304 #endif
1305
1306 \f
1307 /* Failure stack declarations and macros; both re_compile_fastmap and
1308    re_match_2 use a failure stack.  These have to be macros because of
1309    REGEX_ALLOCATE_STACK.  */
1310
1311
1312 /* Approximate number of failure points for which to initially allocate space
1313    when matching.  If this number is exceeded, we allocate more
1314    space, so it is not a hard limit.  */
1315 #ifndef INIT_FAILURE_ALLOC
1316 # define INIT_FAILURE_ALLOC 20
1317 #endif
1318
1319 /* Roughly the maximum number of failure points on the stack.  Would be
1320    exactly that if always used TYPICAL_FAILURE_SIZE items each time we failed.
1321    This is a variable only so users of regex can assign to it; we never
1322    change it ourselves.  We always multiply it by TYPICAL_FAILURE_SIZE
1323    before using it, so it should probably be a byte-count instead.  */
1324 # if defined MATCH_MAY_ALLOCATE
1325 /* Note that 4400 was enough to cause a crash on Alpha OSF/1,
1326    whose default stack limit is 2mb.  In order for a larger
1327    value to work reliably, you have to try to make it accord
1328    with the process stack limit.  */
1329 size_t re_max_failures = 40000;
1330 # else
1331 size_t re_max_failures = 4000;
1332 # endif
1333
1334 union fail_stack_elt
1335 {
1336   re_char *pointer;
1337   /* This should be the biggest `int' that's no bigger than a pointer.  */
1338   long integer;
1339 };
1340
1341 typedef union fail_stack_elt fail_stack_elt_t;
1342
1343 typedef struct
1344 {
1345   fail_stack_elt_t *stack;
1346   size_t size;
1347   size_t avail; /* Offset of next open position.  */
1348   size_t frame; /* Offset of the cur constructed frame.  */
1349 } fail_stack_type;
1350
1351 #define FAIL_STACK_EMPTY()     (fail_stack.frame == 0)
1352
1353
1354 /* Define macros to initialize and free the failure stack.
1355    Do `return -2' if the alloc fails.  */
1356
1357 #ifdef MATCH_MAY_ALLOCATE
1358 # define INIT_FAIL_STACK()                                              \
1359   do {                                                                  \
1360     fail_stack.stack =                                                  \
1361       REGEX_ALLOCATE_STACK (INIT_FAILURE_ALLOC * TYPICAL_FAILURE_SIZE   \
1362                             * sizeof (fail_stack_elt_t));               \
1363                                                                         \
1364     if (fail_stack.stack == NULL)                                       \
1365       return -2;                                                        \
1366                                                                         \
1367     fail_stack.size = INIT_FAILURE_ALLOC;                               \
1368     fail_stack.avail = 0;                                               \
1369     fail_stack.frame = 0;                                               \
1370   } while (0)
1371 #else
1372 # define INIT_FAIL_STACK()                                              \
1373   do {                                                                  \
1374     fail_stack.avail = 0;                                               \
1375     fail_stack.frame = 0;                                               \
1376   } while (0)
1377
1378 # define RETALLOC_IF(addr, n, t) \
1379   if (addr) RETALLOC((addr), (n), t); else (addr) = TALLOC ((n), t)
1380 #endif
1381
1382
1383 /* Double the size of FAIL_STACK, up to a limit
1384    which allows approximately `re_max_failures' items.
1385
1386    Return 1 if succeeds, and 0 if either ran out of memory
1387    allocating space for it or it was already too large.
1388
1389    REGEX_REALLOCATE_STACK requires `destination' be declared.   */
1390
1391 /* Factor to increase the failure stack size by
1392    when we increase it.
1393    This used to be 2, but 2 was too wasteful
1394    because the old discarded stacks added up to as much space
1395    were as ultimate, maximum-size stack.  */
1396 #define FAIL_STACK_GROWTH_FACTOR 4
1397
1398 #define GROW_FAIL_STACK(fail_stack)                                     \
1399   (((fail_stack).size * sizeof (fail_stack_elt_t)                       \
1400     >= re_max_failures * TYPICAL_FAILURE_SIZE)                          \
1401    ? 0                                                                  \
1402    : ((fail_stack).stack                                                \
1403       = REGEX_REALLOCATE_STACK ((fail_stack).stack,                     \
1404           (fail_stack).size * sizeof (fail_stack_elt_t),                \
1405           MIN (re_max_failures * TYPICAL_FAILURE_SIZE,                  \
1406                ((fail_stack).size * sizeof (fail_stack_elt_t)           \
1407                 * FAIL_STACK_GROWTH_FACTOR))),                          \
1408                                                                         \
1409       (fail_stack).stack == NULL                                        \
1410       ? 0                                                               \
1411       : ((fail_stack).size                                              \
1412          = (MIN (re_max_failures * TYPICAL_FAILURE_SIZE,                \
1413                  ((fail_stack).size * sizeof (fail_stack_elt_t)         \
1414                   * FAIL_STACK_GROWTH_FACTOR))                          \
1415             / sizeof (fail_stack_elt_t)),                               \
1416          1)))
1417
1418
1419 /* Push a pointer value onto the failure stack.
1420    Assumes the variable `fail_stack'.  Probably should only
1421    be called from within `PUSH_FAILURE_POINT'.  */
1422 #define PUSH_FAILURE_POINTER(item)                                      \
1423   fail_stack.stack[fail_stack.avail++].pointer = (item)
1424
1425 /* This pushes an integer-valued item onto the failure stack.
1426    Assumes the variable `fail_stack'.  Probably should only
1427    be called from within `PUSH_FAILURE_POINT'.  */
1428 #define PUSH_FAILURE_INT(item)                                  \
1429   fail_stack.stack[fail_stack.avail++].integer = (item)
1430
1431 /* These POP... operations complement the PUSH... operations.
1432    All assume that `fail_stack' is nonempty.  */
1433 #define POP_FAILURE_POINTER() fail_stack.stack[--fail_stack.avail].pointer
1434 #define POP_FAILURE_INT() fail_stack.stack[--fail_stack.avail].integer
1435
1436 /* Individual items aside from the registers.  */
1437 #define NUM_NONREG_ITEMS 3
1438
1439 /* Used to examine the stack (to detect infinite loops).  */
1440 #define FAILURE_PAT(h) fail_stack.stack[(h) - 1].pointer
1441 #define FAILURE_STR(h) (fail_stack.stack[(h) - 2].pointer)
1442 #define NEXT_FAILURE_HANDLE(h) fail_stack.stack[(h) - 3].integer
1443 #define TOP_FAILURE_HANDLE() fail_stack.frame
1444
1445
1446 #define ENSURE_FAIL_STACK(space)                                        \
1447 while (REMAINING_AVAIL_SLOTS <= space) {                                \
1448   if (!GROW_FAIL_STACK (fail_stack))                                    \
1449     return -2;                                                          \
1450   DEBUG_PRINT ("\n  Doubled stack; size now: %zd\n", (fail_stack).size);\
1451   DEBUG_PRINT ("         slots available: %zd\n", REMAINING_AVAIL_SLOTS);\
1452 }
1453
1454 /* Push register NUM onto the stack.  */
1455 #define PUSH_FAILURE_REG(num)                                           \
1456 do {                                                                    \
1457   char *destination;                                                    \
1458   long n = num;                                                         \
1459   ENSURE_FAIL_STACK(3);                                                 \
1460   DEBUG_PRINT ("    Push reg %ld (spanning %p -> %p)\n",                \
1461                n, regstart[n], regend[n]);                              \
1462   PUSH_FAILURE_POINTER (regstart[n]);                                   \
1463   PUSH_FAILURE_POINTER (regend[n]);                                     \
1464   PUSH_FAILURE_INT (n);                                                 \
1465 } while (0)
1466
1467 /* Change the counter's value to VAL, but make sure that it will
1468    be reset when backtracking.  */
1469 #define PUSH_NUMBER(ptr,val)                                            \
1470 do {                                                                    \
1471   char *destination;                                                    \
1472   int c;                                                                \
1473   ENSURE_FAIL_STACK(3);                                                 \
1474   EXTRACT_NUMBER (c, ptr);                                              \
1475   DEBUG_PRINT ("    Push number %p = %d -> %d\n", ptr, c, val);         \
1476   PUSH_FAILURE_INT (c);                                                 \
1477   PUSH_FAILURE_POINTER (ptr);                                           \
1478   PUSH_FAILURE_INT (-1);                                                \
1479   STORE_NUMBER (ptr, val);                                              \
1480 } while (0)
1481
1482 /* Pop a saved register off the stack.  */
1483 #define POP_FAILURE_REG_OR_COUNT()                                      \
1484 do {                                                                    \
1485   long pfreg = POP_FAILURE_INT ();                                      \
1486   if (pfreg == -1)                                                      \
1487     {                                                                   \
1488       /* It's a counter.  */                                            \
1489       /* Here, we discard `const', making re_match non-reentrant.  */   \
1490       unsigned char *ptr = (unsigned char*) POP_FAILURE_POINTER ();     \
1491       pfreg = POP_FAILURE_INT ();                                       \
1492       STORE_NUMBER (ptr, pfreg);                                        \
1493       DEBUG_PRINT ("     Pop counter %p = %ld\n", ptr, pfreg);          \
1494     }                                                                   \
1495   else                                                                  \
1496     {                                                                   \
1497       regend[pfreg] = POP_FAILURE_POINTER ();                           \
1498       regstart[pfreg] = POP_FAILURE_POINTER ();                         \
1499       DEBUG_PRINT ("     Pop reg %ld (spanning %p -> %p)\n",            \
1500                    pfreg, regstart[pfreg], regend[pfreg]);              \
1501     }                                                                   \
1502 } while (0)
1503
1504 /* Check that we are not stuck in an infinite loop.  */
1505 #define CHECK_INFINITE_LOOP(pat_cur, string_place)                      \
1506 do {                                                                    \
1507   ssize_t failure = TOP_FAILURE_HANDLE ();                              \
1508   /* Check for infinite matching loops */                               \
1509   while (failure > 0                                                    \
1510          && (FAILURE_STR (failure) == string_place                      \
1511              || FAILURE_STR (failure) == NULL))                         \
1512     {                                                                   \
1513       assert (FAILURE_PAT (failure) >= bufp->buffer                     \
1514               && FAILURE_PAT (failure) <= bufp->buffer + bufp->used);   \
1515       if (FAILURE_PAT (failure) == pat_cur)                             \
1516         {                                                               \
1517           cycle = 1;                                                    \
1518           break;                                                        \
1519         }                                                               \
1520       DEBUG_PRINT ("  Other pattern: %p\n", FAILURE_PAT (failure));     \
1521       failure = NEXT_FAILURE_HANDLE(failure);                           \
1522     }                                                                   \
1523   DEBUG_PRINT ("  Other string: %p\n", FAILURE_STR (failure));          \
1524 } while (0)
1525
1526 /* Push the information about the state we will need
1527    if we ever fail back to it.
1528
1529    Requires variables fail_stack, regstart, regend and
1530    num_regs be declared.  GROW_FAIL_STACK requires `destination' be
1531    declared.
1532
1533    Does `return FAILURE_CODE' if runs out of memory.  */
1534
1535 #define PUSH_FAILURE_POINT(pattern, string_place)                       \
1536 do {                                                                    \
1537   char *destination;                                                    \
1538   /* Must be int, so when we don't save any registers, the arithmetic   \
1539      of 0 + -1 isn't done as unsigned.  */                              \
1540                                                                         \
1541   DEBUG_STATEMENT (nfailure_points_pushed++);                           \
1542   DEBUG_PRINT ("\nPUSH_FAILURE_POINT:\n");                              \
1543   DEBUG_PRINT ("  Before push, next avail: %zd\n", (fail_stack).avail); \
1544   DEBUG_PRINT ("                        size: %zd\n", (fail_stack).size);\
1545                                                                         \
1546   ENSURE_FAIL_STACK (NUM_NONREG_ITEMS);                                 \
1547                                                                         \
1548   DEBUG_PRINT ("\n");                                                   \
1549                                                                         \
1550   DEBUG_PRINT ("  Push frame index: %zd\n", fail_stack.frame);          \
1551   PUSH_FAILURE_INT (fail_stack.frame);                                  \
1552                                                                         \
1553   DEBUG_PRINT ("  Push string %p: `", string_place);                    \
1554   DEBUG_PRINT_DOUBLE_STRING (string_place, string1, size1, string2, size2);\
1555   DEBUG_PRINT ("'\n");                                                  \
1556   PUSH_FAILURE_POINTER (string_place);                                  \
1557                                                                         \
1558   DEBUG_PRINT ("  Push pattern %p: ", pattern);                         \
1559   DEBUG_PRINT_COMPILED_PATTERN (bufp, pattern, pend);                   \
1560   PUSH_FAILURE_POINTER (pattern);                                       \
1561                                                                         \
1562   /* Close the frame by moving the frame pointer past it.  */           \
1563   fail_stack.frame = fail_stack.avail;                                  \
1564 } while (0)
1565
1566 /* Estimate the size of data pushed by a typical failure stack entry.
1567    An estimate is all we need, because all we use this for
1568    is to choose a limit for how big to make the failure stack.  */
1569 /* BEWARE, the value `20' is hard-coded in emacs.c:main().  */
1570 #define TYPICAL_FAILURE_SIZE 20
1571
1572 /* How many items can still be added to the stack without overflowing it.  */
1573 #define REMAINING_AVAIL_SLOTS ((fail_stack).size - (fail_stack).avail)
1574
1575
1576 /* Pops what PUSH_FAIL_STACK pushes.
1577
1578    We restore into the parameters, all of which should be lvalues:
1579      STR -- the saved data position.
1580      PAT -- the saved pattern position.
1581      REGSTART, REGEND -- arrays of string positions.
1582
1583    Also assumes the variables `fail_stack' and (if debugging), `bufp',
1584    `pend', `string1', `size1', `string2', and `size2'.  */
1585
1586 #define POP_FAILURE_POINT(str, pat)                                     \
1587 do {                                                                    \
1588   assert (!FAIL_STACK_EMPTY ());                                        \
1589                                                                         \
1590   /* Remove failure points and point to how many regs pushed.  */       \
1591   DEBUG_PRINT ("POP_FAILURE_POINT:\n");                                 \
1592   DEBUG_PRINT ("  Before pop, next avail: %zd\n", fail_stack.avail);    \
1593   DEBUG_PRINT ("                     size: %zd\n", fail_stack.size);    \
1594                                                                         \
1595   /* Pop the saved registers.  */                                       \
1596   while (fail_stack.frame < fail_stack.avail)                           \
1597     POP_FAILURE_REG_OR_COUNT ();                                        \
1598                                                                         \
1599   pat = POP_FAILURE_POINTER ();                                         \
1600   DEBUG_PRINT ("  Popping pattern %p: ", pat);                          \
1601   DEBUG_PRINT_COMPILED_PATTERN (bufp, pat, pend);                       \
1602                                                                         \
1603   /* If the saved string location is NULL, it came from an              \
1604      on_failure_keep_string_jump opcode, and we want to throw away the  \
1605      saved NULL, thus retaining our current position in the string.  */ \
1606   str = POP_FAILURE_POINTER ();                                         \
1607   DEBUG_PRINT ("  Popping string %p: `", str);                          \
1608   DEBUG_PRINT_DOUBLE_STRING (str, string1, size1, string2, size2);      \
1609   DEBUG_PRINT ("'\n");                                                  \
1610                                                                         \
1611   fail_stack.frame = POP_FAILURE_INT ();                                \
1612   DEBUG_PRINT ("  Popping  frame index: %zd\n", fail_stack.frame);      \
1613                                                                         \
1614   assert (fail_stack.avail >= 0);                                       \
1615   assert (fail_stack.frame <= fail_stack.avail);                        \
1616                                                                         \
1617   DEBUG_STATEMENT (nfailure_points_popped++);                           \
1618 } while (0) /* POP_FAILURE_POINT */
1619
1620
1621 \f
1622 /* Registers are set to a sentinel when they haven't yet matched.  */
1623 #define REG_UNSET(e) ((e) == NULL)
1624 \f
1625 /* Subroutine declarations and macros for regex_compile.  */
1626
1627 static reg_errcode_t regex_compile (re_char *pattern, size_t size,
1628                                     reg_syntax_t syntax,
1629                                     struct re_pattern_buffer *bufp);
1630 static void store_op1 (re_opcode_t op, unsigned char *loc, int arg);
1631 static void store_op2 (re_opcode_t op, unsigned char *loc, int arg1, int arg2);
1632 static void insert_op1 (re_opcode_t op, unsigned char *loc,
1633                         int arg, unsigned char *end);
1634 static void insert_op2 (re_opcode_t op, unsigned char *loc,
1635                         int arg1, int arg2, unsigned char *end);
1636 static boolean at_begline_loc_p (re_char *pattern, re_char *p,
1637                                  reg_syntax_t syntax);
1638 static boolean at_endline_loc_p (re_char *p, re_char *pend,
1639                                  reg_syntax_t syntax);
1640 static re_char *skip_one_char (re_char *p);
1641 static int analyse_first (re_char *p, re_char *pend,
1642                           char *fastmap, const int multibyte);
1643
1644 /* Fetch the next character in the uncompiled pattern, with no
1645    translation.  */
1646 #define PATFETCH(c)                                                     \
1647   do {                                                                  \
1648     int len;                                                            \
1649     if (p == pend) return REG_EEND;                                     \
1650     c = RE_STRING_CHAR_AND_LENGTH (p, len, multibyte);                  \
1651     p += len;                                                           \
1652   } while (0)
1653
1654
1655 /* If `translate' is non-null, return translate[D], else just D.  We
1656    cast the subscript to translate because some data is declared as
1657    `char *', to avoid warnings when a string constant is passed.  But
1658    when we use a character as a subscript we must make it unsigned.  */
1659 #ifndef TRANSLATE
1660 # define TRANSLATE(d) \
1661   (RE_TRANSLATE_P (translate) ? RE_TRANSLATE (translate, (d)) : (d))
1662 #endif
1663
1664
1665 /* Macros for outputting the compiled pattern into `buffer'.  */
1666
1667 /* If the buffer isn't allocated when it comes in, use this.  */
1668 #define INIT_BUF_SIZE  32
1669
1670 /* Make sure we have at least N more bytes of space in buffer.  */
1671 #define GET_BUFFER_SPACE(n)                                             \
1672     while ((size_t) (b - bufp->buffer + (n)) > bufp->allocated)         \
1673       EXTEND_BUFFER ()
1674
1675 /* Make sure we have one more byte of buffer space and then add C to it.  */
1676 #define BUF_PUSH(c)                                                     \
1677   do {                                                                  \
1678     GET_BUFFER_SPACE (1);                                               \
1679     *b++ = (unsigned char) (c);                                         \
1680   } while (0)
1681
1682
1683 /* Ensure we have two more bytes of buffer space and then append C1 and C2.  */
1684 #define BUF_PUSH_2(c1, c2)                                              \
1685   do {                                                                  \
1686     GET_BUFFER_SPACE (2);                                               \
1687     *b++ = (unsigned char) (c1);                                        \
1688     *b++ = (unsigned char) (c2);                                        \
1689   } while (0)
1690
1691
1692 /* Store a jump with opcode OP at LOC to location TO.  We store a
1693    relative address offset by the three bytes the jump itself occupies.  */
1694 #define STORE_JUMP(op, loc, to) \
1695   store_op1 (op, loc, (to) - (loc) - 3)
1696
1697 /* Likewise, for a two-argument jump.  */
1698 #define STORE_JUMP2(op, loc, to, arg) \
1699   store_op2 (op, loc, (to) - (loc) - 3, arg)
1700
1701 /* Like `STORE_JUMP', but for inserting.  Assume `b' is the buffer end.  */
1702 #define INSERT_JUMP(op, loc, to) \
1703   insert_op1 (op, loc, (to) - (loc) - 3, b)
1704
1705 /* Like `STORE_JUMP2', but for inserting.  Assume `b' is the buffer end.  */
1706 #define INSERT_JUMP2(op, loc, to, arg) \
1707   insert_op2 (op, loc, (to) - (loc) - 3, arg, b)
1708
1709
1710 /* This is not an arbitrary limit: the arguments which represent offsets
1711    into the pattern are two bytes long.  So if 2^15 bytes turns out to
1712    be too small, many things would have to change.  */
1713 # define MAX_BUF_SIZE (1L << 15)
1714
1715 /* Extend the buffer by twice its current size via realloc and
1716    reset the pointers that pointed into the old block to point to the
1717    correct places in the new one.  If extending the buffer results in it
1718    being larger than MAX_BUF_SIZE, then flag memory exhausted.  */
1719 #if __BOUNDED_POINTERS__
1720 # define SET_HIGH_BOUND(P) (__ptrhigh (P) = __ptrlow (P) + bufp->allocated)
1721 # define MOVE_BUFFER_POINTER(P)                                 \
1722   (__ptrlow (P) = new_buffer + (__ptrlow (P) - old_buffer),     \
1723    SET_HIGH_BOUND (P),                                          \
1724    __ptrvalue (P) = new_buffer + (__ptrvalue (P) - old_buffer))
1725 # define ELSE_EXTEND_BUFFER_HIGH_BOUND          \
1726   else                                          \
1727     {                                           \
1728       SET_HIGH_BOUND (b);                       \
1729       SET_HIGH_BOUND (begalt);                  \
1730       if (fixup_alt_jump)                       \
1731         SET_HIGH_BOUND (fixup_alt_jump);        \
1732       if (laststart)                            \
1733         SET_HIGH_BOUND (laststart);             \
1734       if (pending_exact)                        \
1735         SET_HIGH_BOUND (pending_exact);         \
1736     }
1737 #else
1738 # define MOVE_BUFFER_POINTER(P) ((P) = new_buffer + ((P) - old_buffer))
1739 # define ELSE_EXTEND_BUFFER_HIGH_BOUND
1740 #endif
1741 #define EXTEND_BUFFER()                                                 \
1742   do {                                                                  \
1743     unsigned char *old_buffer = bufp->buffer;                           \
1744     if (bufp->allocated == MAX_BUF_SIZE)                                \
1745       return REG_ESIZE;                                                 \
1746     bufp->allocated <<= 1;                                              \
1747     if (bufp->allocated > MAX_BUF_SIZE)                                 \
1748       bufp->allocated = MAX_BUF_SIZE;                                   \
1749     RETALLOC (bufp->buffer, bufp->allocated, unsigned char);            \
1750     if (bufp->buffer == NULL)                                           \
1751       return REG_ESPACE;                                                \
1752     /* If the buffer moved, move all the pointers into it.  */          \
1753     if (old_buffer != bufp->buffer)                                     \
1754       {                                                                 \
1755         unsigned char *new_buffer = bufp->buffer;                       \
1756         MOVE_BUFFER_POINTER (b);                                        \
1757         MOVE_BUFFER_POINTER (begalt);                                   \
1758         if (fixup_alt_jump)                                             \
1759           MOVE_BUFFER_POINTER (fixup_alt_jump);                         \
1760         if (laststart)                                                  \
1761           MOVE_BUFFER_POINTER (laststart);                              \
1762         if (pending_exact)                                              \
1763           MOVE_BUFFER_POINTER (pending_exact);                          \
1764       }                                                                 \
1765     ELSE_EXTEND_BUFFER_HIGH_BOUND                                       \
1766   } while (0)
1767
1768
1769 /* Since we have one byte reserved for the register number argument to
1770    {start,stop}_memory, the maximum number of groups we can report
1771    things about is what fits in that byte.  */
1772 #define MAX_REGNUM 255
1773
1774 /* But patterns can have more than `MAX_REGNUM' registers.  We just
1775    ignore the excess.  */
1776 typedef int regnum_t;
1777
1778
1779 /* Macros for the compile stack.  */
1780
1781 /* Since offsets can go either forwards or backwards, this type needs to
1782    be able to hold values from -(MAX_BUF_SIZE - 1) to MAX_BUF_SIZE - 1.  */
1783 /* int may be not enough when sizeof(int) == 2.  */
1784 typedef long pattern_offset_t;
1785
1786 typedef struct
1787 {
1788   pattern_offset_t begalt_offset;
1789   pattern_offset_t fixup_alt_jump;
1790   pattern_offset_t laststart_offset;
1791   regnum_t regnum;
1792 } compile_stack_elt_t;
1793
1794
1795 typedef struct
1796 {
1797   compile_stack_elt_t *stack;
1798   size_t size;
1799   size_t avail;                 /* Offset of next open position.  */
1800 } compile_stack_type;
1801
1802
1803 #define INIT_COMPILE_STACK_SIZE 32
1804
1805 #define COMPILE_STACK_EMPTY  (compile_stack.avail == 0)
1806 #define COMPILE_STACK_FULL  (compile_stack.avail == compile_stack.size)
1807
1808 /* The next available element.  */
1809 #define COMPILE_STACK_TOP (compile_stack.stack[compile_stack.avail])
1810
1811 /* Explicit quit checking is needed for Emacs, which uses polling to
1812    process input events.  */
1813 #ifdef emacs
1814 # define IMMEDIATE_QUIT_CHECK                   \
1815     do {                                        \
1816       if (immediate_quit) QUIT;                 \
1817     } while (0)
1818 #else
1819 # define IMMEDIATE_QUIT_CHECK    ((void)0)
1820 #endif
1821 \f
1822 /* Structure to manage work area for range table.  */
1823 struct range_table_work_area
1824 {
1825   int *table;                   /* actual work area.  */
1826   int allocated;                /* allocated size for work area in bytes.  */
1827   int used;                     /* actually used size in words.  */
1828   int bits;                     /* flag to record character classes */
1829 };
1830
1831 /* Make sure that WORK_AREA can hold more N multibyte characters.
1832    This is used only in set_image_of_range and set_image_of_range_1.
1833    It expects WORK_AREA to be a pointer.
1834    If it can't get the space, it returns from the surrounding function.  */
1835
1836 #define EXTEND_RANGE_TABLE(work_area, n)                                \
1837   do {                                                                  \
1838     if (((work_area).used + (n)) * sizeof (int) > (work_area).allocated) \
1839       {                                                                 \
1840         extend_range_table_work_area (&work_area);                      \
1841         if ((work_area).table == 0)                                     \
1842           return (REG_ESPACE);                                          \
1843       }                                                                 \
1844   } while (0)
1845
1846 #define SET_RANGE_TABLE_WORK_AREA_BIT(work_area, bit)           \
1847   (work_area).bits |= (bit)
1848
1849 /* Bits used to implement the multibyte-part of the various character classes
1850    such as [:alnum:] in a charset's range table.  */
1851 #define BIT_WORD        0x1
1852 #define BIT_LOWER       0x2
1853 #define BIT_PUNCT       0x4
1854 #define BIT_SPACE       0x8
1855 #define BIT_UPPER       0x10
1856 #define BIT_MULTIBYTE   0x20
1857
1858 /* Set a range (RANGE_START, RANGE_END) to WORK_AREA.  */
1859 #define SET_RANGE_TABLE_WORK_AREA(work_area, range_start, range_end)    \
1860   do {                                                                  \
1861     EXTEND_RANGE_TABLE ((work_area), 2);                                \
1862     (work_area).table[(work_area).used++] = (range_start);              \
1863     (work_area).table[(work_area).used++] = (range_end);                \
1864   } while (0)
1865
1866 /* Free allocated memory for WORK_AREA.  */
1867 #define FREE_RANGE_TABLE_WORK_AREA(work_area)   \
1868   do {                                          \
1869     if ((work_area).table)                      \
1870       free ((work_area).table);                 \
1871   } while (0)
1872
1873 #define CLEAR_RANGE_TABLE_WORK_USED(work_area) ((work_area).used = 0, (work_area).bits = 0)
1874 #define RANGE_TABLE_WORK_USED(work_area) ((work_area).used)
1875 #define RANGE_TABLE_WORK_BITS(work_area) ((work_area).bits)
1876 #define RANGE_TABLE_WORK_ELT(work_area, i) ((work_area).table[i])
1877 \f
1878
1879 /* Set the bit for character C in a list.  */
1880 #define SET_LIST_BIT(c) (b[((c)) / BYTEWIDTH] |= 1 << ((c) % BYTEWIDTH))
1881
1882
1883 #ifdef emacs
1884
1885 /* Store characters in the range FROM to TO in the bitmap at B (for
1886    ASCII and unibyte characters) and WORK_AREA (for multibyte
1887    characters) while translating them and paying attention to the
1888    continuity of translated characters.
1889
1890    Implementation note: It is better to implement these fairly big
1891    macros by a function, but it's not that easy because macros called
1892    in this macro assume various local variables already declared.  */
1893
1894 /* Both FROM and TO are ASCII characters.  */
1895
1896 #define SETUP_ASCII_RANGE(work_area, FROM, TO)                  \
1897   do {                                                          \
1898     int C0, C1;                                                 \
1899                                                                 \
1900     for (C0 = (FROM); C0 <= (TO); C0++)                         \
1901       {                                                         \
1902         C1 = TRANSLATE (C0);                                    \
1903         if (! ASCII_CHAR_P (C1))                                \
1904           {                                                     \
1905             SET_RANGE_TABLE_WORK_AREA ((work_area), C1, C1);    \
1906             if ((C1 = RE_CHAR_TO_UNIBYTE (C1)) < 0)             \
1907               C1 = C0;                                          \
1908           }                                                     \
1909         SET_LIST_BIT (C1);                                      \
1910       }                                                         \
1911   } while (0)
1912
1913
1914 /* Both FROM and TO are unibyte characters (0x80..0xFF).  */
1915
1916 #define SETUP_UNIBYTE_RANGE(work_area, FROM, TO)                               \
1917   do {                                                                         \
1918     int C0, C1, C2, I;                                                         \
1919     int USED = RANGE_TABLE_WORK_USED (work_area);                              \
1920                                                                                \
1921     for (C0 = (FROM); C0 <= (TO); C0++)                                        \
1922       {                                                                        \
1923         C1 = RE_CHAR_TO_MULTIBYTE (C0);                                        \
1924         if (CHAR_BYTE8_P (C1))                                                 \
1925           SET_LIST_BIT (C0);                                                   \
1926         else                                                                   \
1927           {                                                                    \
1928             C2 = TRANSLATE (C1);                                               \
1929             if (C2 == C1                                                       \
1930                 || (C1 = RE_CHAR_TO_UNIBYTE (C2)) < 0)                         \
1931               C1 = C0;                                                         \
1932             SET_LIST_BIT (C1);                                                 \
1933             for (I = RANGE_TABLE_WORK_USED (work_area) - 2; I >= USED; I -= 2) \
1934               {                                                                \
1935                 int from = RANGE_TABLE_WORK_ELT (work_area, I);                \
1936                 int to = RANGE_TABLE_WORK_ELT (work_area, I + 1);              \
1937                                                                                \
1938                 if (C2 >= from - 1 && C2 <= to + 1)                            \
1939                   {                                                            \
1940                     if (C2 == from - 1)                                        \
1941                       RANGE_TABLE_WORK_ELT (work_area, I)--;                   \
1942                     else if (C2 == to + 1)                                     \
1943                       RANGE_TABLE_WORK_ELT (work_area, I + 1)++;               \
1944                     break;                                                     \
1945                   }                                                            \
1946               }                                                                \
1947             if (I < USED)                                                      \
1948               SET_RANGE_TABLE_WORK_AREA ((work_area), C2, C2);                 \
1949           }                                                                    \
1950       }                                                                        \
1951   } while (0)
1952
1953
1954 /* Both FROM and TO are multibyte characters.  */
1955
1956 #define SETUP_MULTIBYTE_RANGE(work_area, FROM, TO)                         \
1957   do {                                                                     \
1958     int C0, C1, C2, I, USED = RANGE_TABLE_WORK_USED (work_area);           \
1959                                                                            \
1960     SET_RANGE_TABLE_WORK_AREA ((work_area), (FROM), (TO));                 \
1961     for (C0 = (FROM); C0 <= (TO); C0++)                                    \
1962       {                                                                    \
1963         C1 = TRANSLATE (C0);                                               \
1964         if ((C2 = RE_CHAR_TO_UNIBYTE (C1)) >= 0                            \
1965             || (C1 != C0 && (C2 = RE_CHAR_TO_UNIBYTE (C0)) >= 0))          \
1966           SET_LIST_BIT (C2);                                               \
1967         if (C1 >= (FROM) && C1 <= (TO))                                    \
1968           continue;                                                        \
1969         for (I = RANGE_TABLE_WORK_USED (work_area) - 2; I >= USED; I -= 2) \
1970           {                                                                \
1971             int from = RANGE_TABLE_WORK_ELT (work_area, I);                \
1972             int to = RANGE_TABLE_WORK_ELT (work_area, I + 1);              \
1973                                                                            \
1974             if (C1 >= from - 1 && C1 <= to + 1)                            \
1975               {                                                            \
1976                 if (C1 == from - 1)                                        \
1977                   RANGE_TABLE_WORK_ELT (work_area, I)--;                   \
1978                 else if (C1 == to + 1)                                     \
1979                   RANGE_TABLE_WORK_ELT (work_area, I + 1)++;               \
1980                 break;                                                     \
1981               }                                                            \
1982           }                                                                \
1983         if (I < USED)                                                      \
1984           SET_RANGE_TABLE_WORK_AREA ((work_area), C1, C1);                 \
1985       }                                                                    \
1986   } while (0)
1987
1988 #endif /* emacs */
1989
1990 /* Get the next unsigned number in the uncompiled pattern.  */
1991 #define GET_UNSIGNED_NUMBER(num)                                        \
1992   do {                                                                  \
1993     if (p == pend)                                                      \
1994       FREE_STACK_RETURN (REG_EBRACE);                                   \
1995     else                                                                \
1996       {                                                                 \
1997         PATFETCH (c);                                                   \
1998         while ('0' <= c && c <= '9')                                    \
1999           {                                                             \
2000             int prev;                                                   \
2001             if (num < 0)                                                \
2002               num = 0;                                                  \
2003             prev = num;                                                 \
2004             num = num * 10 + c - '0';                                   \
2005             if (num / 10 != prev)                                       \
2006               FREE_STACK_RETURN (REG_BADBR);                            \
2007             if (p == pend)                                              \
2008               FREE_STACK_RETURN (REG_EBRACE);                           \
2009             PATFETCH (c);                                               \
2010           }                                                             \
2011       }                                                                 \
2012   } while (0)
2013 \f
2014 #if ! WIDE_CHAR_SUPPORT
2015
2016 /* Map a string to the char class it names (if any).  */
2017 re_wctype_t
2018 re_wctype (const re_char *str)
2019 {
2020   const char *string = (const char *) str;
2021   if      (STREQ (string, "alnum"))     return RECC_ALNUM;
2022   else if (STREQ (string, "alpha"))     return RECC_ALPHA;
2023   else if (STREQ (string, "word"))      return RECC_WORD;
2024   else if (STREQ (string, "ascii"))     return RECC_ASCII;
2025   else if (STREQ (string, "nonascii"))  return RECC_NONASCII;
2026   else if (STREQ (string, "graph"))     return RECC_GRAPH;
2027   else if (STREQ (string, "lower"))     return RECC_LOWER;
2028   else if (STREQ (string, "print"))     return RECC_PRINT;
2029   else if (STREQ (string, "punct"))     return RECC_PUNCT;
2030   else if (STREQ (string, "space"))     return RECC_SPACE;
2031   else if (STREQ (string, "upper"))     return RECC_UPPER;
2032   else if (STREQ (string, "unibyte"))   return RECC_UNIBYTE;
2033   else if (STREQ (string, "multibyte")) return RECC_MULTIBYTE;
2034   else if (STREQ (string, "digit"))     return RECC_DIGIT;
2035   else if (STREQ (string, "xdigit"))    return RECC_XDIGIT;
2036   else if (STREQ (string, "cntrl"))     return RECC_CNTRL;
2037   else if (STREQ (string, "blank"))     return RECC_BLANK;
2038   else return 0;
2039 }
2040
2041 /* True if CH is in the char class CC.  */
2042 boolean
2043 re_iswctype (int ch, re_wctype_t cc)
2044 {
2045   switch (cc)
2046     {
2047     case RECC_ALNUM: return ISALNUM (ch) != 0;
2048     case RECC_ALPHA: return ISALPHA (ch) != 0;
2049     case RECC_BLANK: return ISBLANK (ch) != 0;
2050     case RECC_CNTRL: return ISCNTRL (ch) != 0;
2051     case RECC_DIGIT: return ISDIGIT (ch) != 0;
2052     case RECC_GRAPH: return ISGRAPH (ch) != 0;
2053     case RECC_LOWER: return ISLOWER (ch) != 0;
2054     case RECC_PRINT: return ISPRINT (ch) != 0;
2055     case RECC_PUNCT: return ISPUNCT (ch) != 0;
2056     case RECC_SPACE: return ISSPACE (ch) != 0;
2057     case RECC_UPPER: return ISUPPER (ch) != 0;
2058     case RECC_XDIGIT: return ISXDIGIT (ch) != 0;
2059     case RECC_ASCII: return IS_REAL_ASCII (ch) != 0;
2060     case RECC_NONASCII: return !IS_REAL_ASCII (ch);
2061     case RECC_UNIBYTE: return ISUNIBYTE (ch) != 0;
2062     case RECC_MULTIBYTE: return !ISUNIBYTE (ch);
2063     case RECC_WORD: return ISWORD (ch) != 0;
2064     case RECC_ERROR: return false;
2065     default:
2066       abort ();
2067     }
2068 }
2069
2070 /* Return a bit-pattern to use in the range-table bits to match multibyte
2071    chars of class CC.  */
2072 static int
2073 re_wctype_to_bit (re_wctype_t cc)
2074 {
2075   switch (cc)
2076     {
2077     case RECC_NONASCII: case RECC_PRINT: case RECC_GRAPH:
2078     case RECC_MULTIBYTE: return BIT_MULTIBYTE;
2079     case RECC_ALPHA: case RECC_ALNUM: case RECC_WORD: return BIT_WORD;
2080     case RECC_LOWER: return BIT_LOWER;
2081     case RECC_UPPER: return BIT_UPPER;
2082     case RECC_PUNCT: return BIT_PUNCT;
2083     case RECC_SPACE: return BIT_SPACE;
2084     case RECC_ASCII: case RECC_DIGIT: case RECC_XDIGIT: case RECC_CNTRL:
2085     case RECC_BLANK: case RECC_UNIBYTE: case RECC_ERROR: return 0;
2086     default:
2087       abort ();
2088     }
2089 }
2090 #endif
2091 \f
2092 /* Filling in the work area of a range.  */
2093
2094 /* Actually extend the space in WORK_AREA.  */
2095
2096 static void
2097 extend_range_table_work_area (struct range_table_work_area *work_area)
2098 {
2099   work_area->allocated += 16 * sizeof (int);
2100   work_area->table = realloc (work_area->table, work_area->allocated);
2101 }
2102
2103 #if 0
2104 #ifdef emacs
2105
2106 /* Carefully find the ranges of codes that are equivalent
2107    under case conversion to the range start..end when passed through
2108    TRANSLATE.  Handle the case where non-letters can come in between
2109    two upper-case letters (which happens in Latin-1).
2110    Also handle the case of groups of more than 2 case-equivalent chars.
2111
2112    The basic method is to look at consecutive characters and see
2113    if they can form a run that can be handled as one.
2114
2115    Returns -1 if successful, REG_ESPACE if ran out of space.  */
2116
2117 static int
2118 set_image_of_range_1 (struct range_table_work_area *work_area,
2119                       re_wchar_t start, re_wchar_t end,
2120                       RE_TRANSLATE_TYPE translate)
2121 {
2122   /* `one_case' indicates a character, or a run of characters,
2123      each of which is an isolate (no case-equivalents).
2124      This includes all ASCII non-letters.
2125
2126      `two_case' indicates a character, or a run of characters,
2127      each of which has two case-equivalent forms.
2128      This includes all ASCII letters.
2129
2130      `strange' indicates a character that has more than one
2131      case-equivalent.  */
2132
2133   enum case_type {one_case, two_case, strange};
2134
2135   /* Describe the run that is in progress,
2136      which the next character can try to extend.
2137      If run_type is strange, that means there really is no run.
2138      If run_type is one_case, then run_start...run_end is the run.
2139      If run_type is two_case, then the run is run_start...run_end,
2140      and the case-equivalents end at run_eqv_end.  */
2141
2142   enum case_type run_type = strange;
2143   int run_start, run_end, run_eqv_end;
2144
2145   Lisp_Object eqv_table;
2146
2147   if (!RE_TRANSLATE_P (translate))
2148     {
2149       EXTEND_RANGE_TABLE (work_area, 2);
2150       work_area->table[work_area->used++] = (start);
2151       work_area->table[work_area->used++] = (end);
2152       return -1;
2153     }
2154
2155   eqv_table = XCHAR_TABLE (translate)->extras[2];
2156
2157   for (; start <= end; start++)
2158     {
2159       enum case_type this_type;
2160       int eqv = RE_TRANSLATE (eqv_table, start);
2161       int minchar, maxchar;
2162
2163       /* Classify this character */
2164       if (eqv == start)
2165         this_type = one_case;
2166       else if (RE_TRANSLATE (eqv_table, eqv) == start)
2167         this_type = two_case;
2168       else
2169         this_type = strange;
2170
2171       if (start < eqv)
2172         minchar = start, maxchar = eqv;
2173       else
2174         minchar = eqv, maxchar = start;
2175
2176       /* Can this character extend the run in progress?  */
2177       if (this_type == strange || this_type != run_type
2178           || !(minchar == run_end + 1
2179                && (run_type == two_case
2180                    ? maxchar == run_eqv_end + 1 : 1)))
2181         {
2182           /* No, end the run.
2183              Record each of its equivalent ranges.  */
2184           if (run_type == one_case)
2185             {
2186               EXTEND_RANGE_TABLE (work_area, 2);
2187               work_area->table[work_area->used++] = run_start;
2188               work_area->table[work_area->used++] = run_end;
2189             }
2190           else if (run_type == two_case)
2191             {
2192               EXTEND_RANGE_TABLE (work_area, 4);
2193               work_area->table[work_area->used++] = run_start;
2194               work_area->table[work_area->used++] = run_end;
2195               work_area->table[work_area->used++]
2196                 = RE_TRANSLATE (eqv_table, run_start);
2197               work_area->table[work_area->used++]
2198                 = RE_TRANSLATE (eqv_table, run_end);
2199             }
2200           run_type = strange;
2201         }
2202
2203       if (this_type == strange)
2204         {
2205           /* For a strange character, add each of its equivalents, one
2206              by one.  Don't start a range.  */
2207           do
2208             {
2209               EXTEND_RANGE_TABLE (work_area, 2);
2210               work_area->table[work_area->used++] = eqv;
2211               work_area->table[work_area->used++] = eqv;
2212               eqv = RE_TRANSLATE (eqv_table, eqv);
2213             }
2214           while (eqv != start);
2215         }
2216
2217       /* Add this char to the run, or start a new run.  */
2218       else if (run_type == strange)
2219         {
2220           /* Initialize a new range.  */
2221           run_type = this_type;
2222           run_start = start;
2223           run_end = start;
2224           run_eqv_end = RE_TRANSLATE (eqv_table, run_end);
2225         }
2226       else
2227         {
2228           /* Extend a running range.  */
2229           run_end = minchar;
2230           run_eqv_end = RE_TRANSLATE (eqv_table, run_end);
2231         }
2232     }
2233
2234   /* If a run is still in progress at the end, finish it now
2235      by recording its equivalent ranges.  */
2236   if (run_type == one_case)
2237     {
2238       EXTEND_RANGE_TABLE (work_area, 2);
2239       work_area->table[work_area->used++] = run_start;
2240       work_area->table[work_area->used++] = run_end;
2241     }
2242   else if (run_type == two_case)
2243     {
2244       EXTEND_RANGE_TABLE (work_area, 4);
2245       work_area->table[work_area->used++] = run_start;
2246       work_area->table[work_area->used++] = run_end;
2247       work_area->table[work_area->used++]
2248         = RE_TRANSLATE (eqv_table, run_start);
2249       work_area->table[work_area->used++]
2250         = RE_TRANSLATE (eqv_table, run_end);
2251     }
2252
2253   return -1;
2254 }
2255
2256 #endif /* emacs */
2257
2258 /* Record the image of the range start..end when passed through
2259    TRANSLATE.  This is not necessarily TRANSLATE(start)..TRANSLATE(end)
2260    and is not even necessarily contiguous.
2261    Normally we approximate it with the smallest contiguous range that contains
2262    all the chars we need.  However, for Latin-1 we go to extra effort
2263    to do a better job.
2264
2265    This function is not called for ASCII ranges.
2266
2267    Returns -1 if successful, REG_ESPACE if ran out of space.  */
2268
2269 static int
2270 set_image_of_range (struct range_table_work_area *work_area,
2271                     re_wchar_t start, re_wchar_t end,
2272                     RE_TRANSLATE_TYPE translate)
2273 {
2274   re_wchar_t cmin, cmax;
2275
2276 #ifdef emacs
2277   /* For Latin-1 ranges, use set_image_of_range_1
2278      to get proper handling of ranges that include letters and nonletters.
2279      For a range that includes the whole of Latin-1, this is not necessary.
2280      For other character sets, we don't bother to get this right.  */
2281   if (RE_TRANSLATE_P (translate) && start < 04400
2282       && !(start < 04200 && end >= 04377))
2283     {
2284       int newend;
2285       int tem;
2286       newend = end;
2287       if (newend > 04377)
2288         newend = 04377;
2289       tem = set_image_of_range_1 (work_area, start, newend, translate);
2290       if (tem > 0)
2291         return tem;
2292
2293       start = 04400;
2294       if (end < 04400)
2295         return -1;
2296     }
2297 #endif
2298
2299   EXTEND_RANGE_TABLE (work_area, 2);
2300   work_area->table[work_area->used++] = (start);
2301   work_area->table[work_area->used++] = (end);
2302
2303   cmin = -1, cmax = -1;
2304
2305   if (RE_TRANSLATE_P (translate))
2306     {
2307       int ch;
2308
2309       for (ch = start; ch <= end; ch++)
2310         {
2311           re_wchar_t c = TRANSLATE (ch);
2312           if (! (start <= c && c <= end))
2313             {
2314               if (cmin == -1)
2315                 cmin = c, cmax = c;
2316               else
2317                 {
2318                   cmin = MIN (cmin, c);
2319                   cmax = MAX (cmax, c);
2320                 }
2321             }
2322         }
2323
2324       if (cmin != -1)
2325         {
2326           EXTEND_RANGE_TABLE (work_area, 2);
2327           work_area->table[work_area->used++] = (cmin);
2328           work_area->table[work_area->used++] = (cmax);
2329         }
2330     }
2331
2332   return -1;
2333 }
2334 #endif  /* 0 */
2335 \f
2336 #ifndef MATCH_MAY_ALLOCATE
2337
2338 /* If we cannot allocate large objects within re_match_2_internal,
2339    we make the fail stack and register vectors global.
2340    The fail stack, we grow to the maximum size when a regexp
2341    is compiled.
2342    The register vectors, we adjust in size each time we
2343    compile a regexp, according to the number of registers it needs.  */
2344
2345 static fail_stack_type fail_stack;
2346
2347 /* Size with which the following vectors are currently allocated.
2348    That is so we can make them bigger as needed,
2349    but never make them smaller.  */
2350 static int regs_allocated_size;
2351
2352 static re_char **     regstart, **     regend;
2353 static re_char **best_regstart, **best_regend;
2354
2355 /* Make the register vectors big enough for NUM_REGS registers,
2356    but don't make them smaller.  */
2357
2358 static
2359 regex_grow_registers (int num_regs)
2360 {
2361   if (num_regs > regs_allocated_size)
2362     {
2363       RETALLOC_IF (regstart,     num_regs, re_char *);
2364       RETALLOC_IF (regend,       num_regs, re_char *);
2365       RETALLOC_IF (best_regstart, num_regs, re_char *);
2366       RETALLOC_IF (best_regend,  num_regs, re_char *);
2367
2368       regs_allocated_size = num_regs;
2369     }
2370 }
2371
2372 #endif /* not MATCH_MAY_ALLOCATE */
2373 \f
2374 static boolean group_in_compile_stack (compile_stack_type compile_stack,
2375                                        regnum_t regnum);
2376
2377 /* `regex_compile' compiles PATTERN (of length SIZE) according to SYNTAX.
2378    Returns one of error codes defined in `regex.h', or zero for success.
2379
2380    Assumes the `allocated' (and perhaps `buffer') and `translate'
2381    fields are set in BUFP on entry.
2382
2383    If it succeeds, results are put in BUFP (if it returns an error, the
2384    contents of BUFP are undefined):
2385      `buffer' is the compiled pattern;
2386      `syntax' is set to SYNTAX;
2387      `used' is set to the length of the compiled pattern;
2388      `fastmap_accurate' is zero;
2389      `re_nsub' is the number of subexpressions in PATTERN;
2390      `not_bol' and `not_eol' are zero;
2391
2392    The `fastmap' field is neither examined nor set.  */
2393
2394 /* Insert the `jump' from the end of last alternative to "here".
2395    The space for the jump has already been allocated. */
2396 #define FIXUP_ALT_JUMP()                                                \
2397 do {                                                                    \
2398   if (fixup_alt_jump)                                                   \
2399     STORE_JUMP (jump, fixup_alt_jump, b);                               \
2400 } while (0)
2401
2402
2403 /* Return, freeing storage we allocated.  */
2404 #define FREE_STACK_RETURN(value)                \
2405   do {                                                  \
2406     FREE_RANGE_TABLE_WORK_AREA (range_table_work);      \
2407     free (compile_stack.stack);                         \
2408     return value;                                       \
2409   } while (0)
2410
2411 static reg_errcode_t
2412 regex_compile (const re_char *pattern, size_t size, reg_syntax_t syntax, struct re_pattern_buffer *bufp)
2413 {
2414   /* We fetch characters from PATTERN here.  */
2415   register re_wchar_t c, c1;
2416
2417   /* Points to the end of the buffer, where we should append.  */
2418   register unsigned char *b;
2419
2420   /* Keeps track of unclosed groups.  */
2421   compile_stack_type compile_stack;
2422
2423   /* Points to the current (ending) position in the pattern.  */
2424 #ifdef AIX
2425   /* `const' makes AIX compiler fail.  */
2426   unsigned char *p = pattern;
2427 #else
2428   re_char *p = pattern;
2429 #endif
2430   re_char *pend = pattern + size;
2431
2432   /* How to translate the characters in the pattern.  */
2433   RE_TRANSLATE_TYPE translate = bufp->translate;
2434
2435   /* Address of the count-byte of the most recently inserted `exactn'
2436      command.  This makes it possible to tell if a new exact-match
2437      character can be added to that command or if the character requires
2438      a new `exactn' command.  */
2439   unsigned char *pending_exact = 0;
2440
2441   /* Address of start of the most recently finished expression.
2442      This tells, e.g., postfix * where to find the start of its
2443      operand.  Reset at the beginning of groups and alternatives.  */
2444   unsigned char *laststart = 0;
2445
2446   /* Address of beginning of regexp, or inside of last group.  */
2447   unsigned char *begalt;
2448
2449   /* Place in the uncompiled pattern (i.e., the {) to
2450      which to go back if the interval is invalid.  */
2451   re_char *beg_interval;
2452
2453   /* Address of the place where a forward jump should go to the end of
2454      the containing expression.  Each alternative of an `or' -- except the
2455      last -- ends with a forward jump of this sort.  */
2456   unsigned char *fixup_alt_jump = 0;
2457
2458   /* Work area for range table of charset.  */
2459   struct range_table_work_area range_table_work;
2460
2461   /* If the object matched can contain multibyte characters.  */
2462   const boolean multibyte = RE_MULTIBYTE_P (bufp);
2463
2464   /* Nonzero if we have pushed down into a subpattern.  */
2465   int in_subpattern = 0;
2466
2467   /* These hold the values of p, pattern, and pend from the main
2468      pattern when we have pushed into a subpattern.  */
2469   re_char *main_p IF_LINT (= NULL);
2470   re_char *main_pattern IF_LINT (= NULL);
2471   re_char *main_pend IF_LINT (= NULL);
2472
2473 #ifdef DEBUG
2474   debug++;
2475   DEBUG_PRINT ("\nCompiling pattern: ");
2476   if (debug > 0)
2477     {
2478       unsigned debug_count;
2479
2480       for (debug_count = 0; debug_count < size; debug_count++)
2481         putchar (pattern[debug_count]);
2482       putchar ('\n');
2483     }
2484 #endif /* DEBUG */
2485
2486   /* Initialize the compile stack.  */
2487   compile_stack.stack = TALLOC (INIT_COMPILE_STACK_SIZE, compile_stack_elt_t);
2488   if (compile_stack.stack == NULL)
2489     return REG_ESPACE;
2490
2491   compile_stack.size = INIT_COMPILE_STACK_SIZE;
2492   compile_stack.avail = 0;
2493
2494   range_table_work.table = 0;
2495   range_table_work.allocated = 0;
2496
2497   /* Initialize the pattern buffer.  */
2498   bufp->syntax = syntax;
2499   bufp->fastmap_accurate = 0;
2500   bufp->not_bol = bufp->not_eol = 0;
2501   bufp->used_syntax = 0;
2502
2503   /* Set `used' to zero, so that if we return an error, the pattern
2504      printer (for debugging) will think there's no pattern.  We reset it
2505      at the end.  */
2506   bufp->used = 0;
2507
2508   /* Always count groups, whether or not bufp->no_sub is set.  */
2509   bufp->re_nsub = 0;
2510
2511 #if !defined emacs && !defined SYNTAX_TABLE
2512   /* Initialize the syntax table.  */
2513    init_syntax_once ();
2514 #endif
2515
2516   if (bufp->allocated == 0)
2517     {
2518       if (bufp->buffer)
2519         { /* If zero allocated, but buffer is non-null, try to realloc
2520              enough space.  This loses if buffer's address is bogus, but
2521              that is the user's responsibility.  */
2522           RETALLOC (bufp->buffer, INIT_BUF_SIZE, unsigned char);
2523         }
2524       else
2525         { /* Caller did not allocate a buffer.  Do it for them.  */
2526           bufp->buffer = TALLOC (INIT_BUF_SIZE, unsigned char);
2527         }
2528       if (!bufp->buffer) FREE_STACK_RETURN (REG_ESPACE);
2529
2530       bufp->allocated = INIT_BUF_SIZE;
2531     }
2532
2533   begalt = b = bufp->buffer;
2534
2535   /* Loop through the uncompiled pattern until we're at the end.  */
2536   while (1)
2537     {
2538       if (p == pend)
2539         {
2540           /* If this is the end of an included regexp,
2541              pop back to the main regexp and try again.  */
2542           if (in_subpattern)
2543             {
2544               in_subpattern = 0;
2545               pattern = main_pattern;
2546               p = main_p;
2547               pend = main_pend;
2548               continue;
2549             }
2550           /* If this is the end of the main regexp, we are done.  */
2551           break;
2552         }
2553
2554       PATFETCH (c);
2555
2556       switch (c)
2557         {
2558         case ' ':
2559           {
2560             re_char *p1 = p;
2561
2562             /* If there's no special whitespace regexp, treat
2563                spaces normally.  And don't try to do this recursively.  */
2564             if (!whitespace_regexp || in_subpattern)
2565               goto normal_char;
2566
2567             /* Peek past following spaces.  */
2568             while (p1 != pend)
2569               {
2570                 if (*p1 != ' ')
2571                   break;
2572                 p1++;
2573               }
2574             /* If the spaces are followed by a repetition op,
2575                treat them normally.  */
2576             if (p1 != pend
2577                 && (*p1 == '*' || *p1 == '+' || *p1 == '?'
2578                     || (*p1 == '\\' && p1 + 1 != pend && p1[1] == '{')))
2579               goto normal_char;
2580
2581             /* Replace the spaces with the whitespace regexp.  */
2582             in_subpattern = 1;
2583             main_p = p1;
2584             main_pend = pend;
2585             main_pattern = pattern;
2586             p = pattern = whitespace_regexp;
2587             pend = p + strlen ((const char *) p);
2588             break;
2589           }
2590
2591         case '^':
2592           {
2593             if (   /* If at start of pattern, it's an operator.  */
2594                    p == pattern + 1
2595                    /* If context independent, it's an operator.  */
2596                 || syntax & RE_CONTEXT_INDEP_ANCHORS
2597                    /* Otherwise, depends on what's come before.  */
2598                 || at_begline_loc_p (pattern, p, syntax))
2599               BUF_PUSH ((syntax & RE_NO_NEWLINE_ANCHOR) ? begbuf : begline);
2600             else
2601               goto normal_char;
2602           }
2603           break;
2604
2605
2606         case '$':
2607           {
2608             if (   /* If at end of pattern, it's an operator.  */
2609                    p == pend
2610                    /* If context independent, it's an operator.  */
2611                 || syntax & RE_CONTEXT_INDEP_ANCHORS
2612                    /* Otherwise, depends on what's next.  */
2613                 || at_endline_loc_p (p, pend, syntax))
2614                BUF_PUSH ((syntax & RE_NO_NEWLINE_ANCHOR) ? endbuf : endline);
2615              else
2616                goto normal_char;
2617            }
2618            break;
2619
2620
2621         case '+':
2622         case '?':
2623           if ((syntax & RE_BK_PLUS_QM)
2624               || (syntax & RE_LIMITED_OPS))
2625             goto normal_char;
2626         handle_plus:
2627         case '*':
2628           /* If there is no previous pattern...  */
2629           if (!laststart)
2630             {
2631               if (syntax & RE_CONTEXT_INVALID_OPS)
2632                 FREE_STACK_RETURN (REG_BADRPT);
2633               else if (!(syntax & RE_CONTEXT_INDEP_OPS))
2634                 goto normal_char;
2635             }
2636
2637           {
2638             /* 1 means zero (many) matches is allowed.  */
2639             boolean zero_times_ok = 0, many_times_ok = 0;
2640             boolean greedy = 1;
2641
2642             /* If there is a sequence of repetition chars, collapse it
2643                down to just one (the right one).  We can't combine
2644                interval operators with these because of, e.g., `a{2}*',
2645                which should only match an even number of `a's.  */
2646
2647             for (;;)
2648               {
2649                 if ((syntax & RE_FRUGAL)
2650                     && c == '?' && (zero_times_ok || many_times_ok))
2651                   greedy = 0;
2652                 else
2653                   {
2654                     zero_times_ok |= c != '+';
2655                     many_times_ok |= c != '?';
2656                   }
2657
2658                 if (p == pend)
2659                   break;
2660                 else if (*p == '*'
2661                          || (!(syntax & RE_BK_PLUS_QM)
2662                              && (*p == '+' || *p == '?')))
2663                   ;
2664                 else if (syntax & RE_BK_PLUS_QM  && *p == '\\')
2665                   {
2666                     if (p+1 == pend)
2667                       FREE_STACK_RETURN (REG_EESCAPE);
2668                     if (p[1] == '+' || p[1] == '?')
2669                       PATFETCH (c); /* Gobble up the backslash.  */
2670                     else
2671                       break;
2672                   }
2673                 else
2674                   break;
2675                 /* If we get here, we found another repeat character.  */
2676                 PATFETCH (c);
2677                }
2678
2679             /* Star, etc. applied to an empty pattern is equivalent
2680                to an empty pattern.  */
2681             if (!laststart || laststart == b)
2682               break;
2683
2684             /* Now we know whether or not zero matches is allowed
2685                and also whether or not two or more matches is allowed.  */
2686             if (greedy)
2687               {
2688                 if (many_times_ok)
2689                   {
2690                     boolean simple = skip_one_char (laststart) == b;
2691                     size_t startoffset = 0;
2692                     re_opcode_t ofj =
2693                       /* Check if the loop can match the empty string.  */
2694                       (simple || !analyse_first (laststart, b, NULL, 0))
2695                       ? on_failure_jump : on_failure_jump_loop;
2696                     assert (skip_one_char (laststart) <= b);
2697
2698                     if (!zero_times_ok && simple)
2699                       { /* Since simple * loops can be made faster by using
2700                            on_failure_keep_string_jump, we turn simple P+
2701                            into PP* if P is simple.  */
2702                         unsigned char *p1, *p2;
2703                         startoffset = b - laststart;
2704                         GET_BUFFER_SPACE (startoffset);
2705                         p1 = b; p2 = laststart;
2706                         while (p2 < p1)
2707                           *b++ = *p2++;
2708                         zero_times_ok = 1;
2709                       }
2710
2711                     GET_BUFFER_SPACE (6);
2712                     if (!zero_times_ok)
2713                       /* A + loop.  */
2714                       STORE_JUMP (ofj, b, b + 6);
2715                     else
2716                       /* Simple * loops can use on_failure_keep_string_jump
2717                          depending on what follows.  But since we don't know
2718                          that yet, we leave the decision up to
2719                          on_failure_jump_smart.  */
2720                       INSERT_JUMP (simple ? on_failure_jump_smart : ofj,
2721                                    laststart + startoffset, b + 6);
2722                     b += 3;
2723                     STORE_JUMP (jump, b, laststart + startoffset);
2724                     b += 3;
2725                   }
2726                 else
2727                   {
2728                     /* A simple ? pattern.  */
2729                     assert (zero_times_ok);
2730                     GET_BUFFER_SPACE (3);
2731                     INSERT_JUMP (on_failure_jump, laststart, b + 3);
2732                     b += 3;
2733                   }
2734               }
2735             else                /* not greedy */
2736               { /* I wish the greedy and non-greedy cases could be merged.  */
2737
2738                 GET_BUFFER_SPACE (7); /* We might use less.  */
2739                 if (many_times_ok)
2740                   {
2741                     boolean emptyp = analyse_first (laststart, b, NULL, 0);
2742
2743                     /* The non-greedy multiple match looks like
2744                        a repeat..until: we only need a conditional jump
2745                        at the end of the loop.  */
2746                     if (emptyp) BUF_PUSH (no_op);
2747                     STORE_JUMP (emptyp ? on_failure_jump_nastyloop
2748                                 : on_failure_jump, b, laststart);
2749                     b += 3;
2750                     if (zero_times_ok)
2751                       {
2752                         /* The repeat...until naturally matches one or more.
2753                            To also match zero times, we need to first jump to
2754                            the end of the loop (its conditional jump).  */
2755                         INSERT_JUMP (jump, laststart, b);
2756                         b += 3;
2757                       }
2758                   }
2759                 else
2760                   {
2761                     /* non-greedy a?? */
2762                     INSERT_JUMP (jump, laststart, b + 3);
2763                     b += 3;
2764                     INSERT_JUMP (on_failure_jump, laststart, laststart + 6);
2765                     b += 3;
2766                   }
2767               }
2768           }
2769           pending_exact = 0;
2770           break;
2771
2772
2773         case '.':
2774           laststart = b;
2775           BUF_PUSH (anychar);
2776           break;
2777
2778
2779         case '[':
2780           {
2781             re_char *p1;
2782
2783             CLEAR_RANGE_TABLE_WORK_USED (range_table_work);
2784
2785             if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
2786
2787             /* Ensure that we have enough space to push a charset: the
2788                opcode, the length count, and the bitset; 34 bytes in all.  */
2789             GET_BUFFER_SPACE (34);
2790
2791             laststart = b;
2792
2793             /* We test `*p == '^' twice, instead of using an if
2794                statement, so we only need one BUF_PUSH.  */
2795             BUF_PUSH (*p == '^' ? charset_not : charset);
2796             if (*p == '^')
2797               p++;
2798
2799             /* Remember the first position in the bracket expression.  */
2800             p1 = p;
2801
2802             /* Push the number of bytes in the bitmap.  */
2803             BUF_PUSH ((1 << BYTEWIDTH) / BYTEWIDTH);
2804
2805             /* Clear the whole map.  */
2806             memset (b, 0, (1 << BYTEWIDTH) / BYTEWIDTH);
2807
2808             /* charset_not matches newline according to a syntax bit.  */
2809             if ((re_opcode_t) b[-2] == charset_not
2810                 && (syntax & RE_HAT_LISTS_NOT_NEWLINE))
2811               SET_LIST_BIT ('\n');
2812
2813             /* Read in characters and ranges, setting map bits.  */
2814             for (;;)
2815               {
2816                 boolean escaped_char = false;
2817                 const unsigned char *p2 = p;
2818                 re_wchar_t ch;
2819
2820                 if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
2821
2822                 /* Don't translate yet.  The range TRANSLATE(X..Y) cannot
2823                    always be determined from TRANSLATE(X) and TRANSLATE(Y)
2824                    So the translation is done later in a loop.  Example:
2825                    (let ((case-fold-search t)) (string-match "[A-_]" "A"))  */
2826                 PATFETCH (c);
2827
2828                 /* \ might escape characters inside [...] and [^...].  */
2829                 if ((syntax & RE_BACKSLASH_ESCAPE_IN_LISTS) && c == '\\')
2830                   {
2831                     if (p == pend) FREE_STACK_RETURN (REG_EESCAPE);
2832
2833                     PATFETCH (c);
2834                     escaped_char = true;
2835                   }
2836                 else
2837                   {
2838                     /* Could be the end of the bracket expression.  If it's
2839                        not (i.e., when the bracket expression is `[]' so
2840                        far), the ']' character bit gets set way below.  */
2841                     if (c == ']' && p2 != p1)
2842                       break;
2843                   }
2844
2845                 /* See if we're at the beginning of a possible character
2846                    class.  */
2847
2848                 if (!escaped_char &&
2849                     syntax & RE_CHAR_CLASSES && c == '[' && *p == ':')
2850                   {
2851                     /* Leave room for the null.  */
2852                     unsigned char str[CHAR_CLASS_MAX_LENGTH + 1];
2853                     const unsigned char *class_beg;
2854
2855                     PATFETCH (c);
2856                     c1 = 0;
2857                     class_beg = p;
2858
2859                     /* If pattern is `[[:'.  */
2860                     if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
2861
2862                     for (;;)
2863                       {
2864                         PATFETCH (c);
2865                         if ((c == ':' && *p == ']') || p == pend)
2866                           break;
2867                         if (c1 < CHAR_CLASS_MAX_LENGTH)
2868                           str[c1++] = c;
2869                         else
2870                           /* This is in any case an invalid class name.  */
2871                           str[0] = '\0';
2872                       }
2873                     str[c1] = '\0';
2874
2875                     /* If isn't a word bracketed by `[:' and `:]':
2876                        undo the ending character, the letters, and
2877                        leave the leading `:' and `[' (but set bits for
2878                        them).  */
2879                     if (c == ':' && *p == ']')
2880                       {
2881                         re_wctype_t cc = re_wctype (str);
2882
2883                         if (cc == 0)
2884                           FREE_STACK_RETURN (REG_ECTYPE);
2885
2886                         /* Throw away the ] at the end of the character
2887                            class.  */
2888                         PATFETCH (c);
2889
2890                         if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
2891
2892 #ifndef emacs
2893                         for (ch = 0; ch < (1 << BYTEWIDTH); ++ch)
2894                           if (re_iswctype (btowc (ch), cc))
2895                             {
2896                               c = TRANSLATE (ch);
2897                               if (c < (1 << BYTEWIDTH))
2898                                 SET_LIST_BIT (c);
2899                             }
2900 #else  /* emacs */
2901                         /* Most character classes in a multibyte match
2902                            just set a flag.  Exceptions are is_blank,
2903                            is_digit, is_cntrl, and is_xdigit, since
2904                            they can only match ASCII characters.  We
2905                            don't need to handle them for multibyte.
2906                            They are distinguished by a negative wctype.  */
2907
2908                         /* Setup the gl_state object to its buffer-defined
2909                            value.  This hardcodes the buffer-global
2910                            syntax-table for ASCII chars, while the other chars
2911                            will obey syntax-table properties.  It's not ideal,
2912                            but it's the way it's been done until now.  */
2913                         SETUP_BUFFER_SYNTAX_TABLE ();
2914
2915                         for (ch = 0; ch < 256; ++ch)
2916                           {
2917                             c = RE_CHAR_TO_MULTIBYTE (ch);
2918                             if (! CHAR_BYTE8_P (c)
2919                                 && re_iswctype (c, cc))
2920                               {
2921                                 SET_LIST_BIT (ch);
2922                                 c1 = TRANSLATE (c);
2923                                 if (c1 == c)
2924                                   continue;
2925                                 if (ASCII_CHAR_P (c1))
2926                                   SET_LIST_BIT (c1);
2927                                 else if ((c1 = RE_CHAR_TO_UNIBYTE (c1)) >= 0)
2928                                   SET_LIST_BIT (c1);
2929                               }
2930                           }
2931                         SET_RANGE_TABLE_WORK_AREA_BIT
2932                           (range_table_work, re_wctype_to_bit (cc));
2933 #endif  /* emacs */
2934                         /* In most cases the matching rule for char classes
2935                            only uses the syntax table for multibyte chars,
2936                            so that the content of the syntax-table it is not
2937                            hardcoded in the range_table.  SPACE and WORD are
2938                            the two exceptions.  */
2939                         if ((1 << cc) & ((1 << RECC_SPACE) | (1 << RECC_WORD)))
2940                           bufp->used_syntax = 1;
2941
2942                         /* Repeat the loop. */
2943                         continue;
2944                       }
2945                     else
2946                       {
2947                         /* Go back to right after the "[:".  */
2948                         p = class_beg;
2949                         SET_LIST_BIT ('[');
2950
2951                         /* Because the `:' may starts the range, we
2952                            can't simply set bit and repeat the loop.
2953                            Instead, just set it to C and handle below.  */
2954                         c = ':';
2955                       }
2956                   }
2957
2958                 if (p < pend && p[0] == '-' && p[1] != ']')
2959                   {
2960
2961                     /* Discard the `-'. */
2962                     PATFETCH (c1);
2963
2964                     /* Fetch the character which ends the range. */
2965                     PATFETCH (c1);
2966 #ifdef emacs
2967                     if (CHAR_BYTE8_P (c1)
2968                         && ! ASCII_CHAR_P (c) && ! CHAR_BYTE8_P (c))
2969                       /* Treat the range from a multibyte character to
2970                          raw-byte character as empty.  */
2971                       c = c1 + 1;
2972 #endif  /* emacs */
2973                   }
2974                 else
2975                   /* Range from C to C. */
2976                   c1 = c;
2977
2978                 if (c > c1)
2979                   {
2980                     if (syntax & RE_NO_EMPTY_RANGES)
2981                       FREE_STACK_RETURN (REG_ERANGEX);
2982                     /* Else, repeat the loop.  */
2983                   }
2984                 else
2985                   {
2986 #ifndef emacs
2987                     /* Set the range into bitmap */
2988                     for (; c <= c1; c++)
2989                       {
2990                         ch = TRANSLATE (c);
2991                         if (ch < (1 << BYTEWIDTH))
2992                           SET_LIST_BIT (ch);
2993                       }
2994 #else  /* emacs */
2995                     if (c < 128)
2996                       {
2997                         ch = MIN (127, c1);
2998                         SETUP_ASCII_RANGE (range_table_work, c, ch);
2999                         c = ch + 1;
3000                         if (CHAR_BYTE8_P (c1))
3001                           c = BYTE8_TO_CHAR (128);
3002                       }
3003                     if (c <= c1)
3004                       {
3005                         if (CHAR_BYTE8_P (c))
3006                           {
3007                             c = CHAR_TO_BYTE8 (c);
3008                             c1 = CHAR_TO_BYTE8 (c1);
3009                             for (; c <= c1; c++)
3010                               SET_LIST_BIT (c);
3011                           }
3012                         else if (multibyte)
3013                           {
3014                             SETUP_MULTIBYTE_RANGE (range_table_work, c, c1);
3015                           }
3016                         else
3017                           {
3018                             SETUP_UNIBYTE_RANGE (range_table_work, c, c1);
3019                           }
3020                       }
3021 #endif /* emacs */
3022                   }
3023               }
3024
3025             /* Discard any (non)matching list bytes that are all 0 at the
3026                end of the map.  Decrease the map-length byte too.  */
3027             while ((int) b[-1] > 0 && b[b[-1] - 1] == 0)
3028               b[-1]--;
3029             b += b[-1];
3030
3031             /* Build real range table from work area.  */
3032             if (RANGE_TABLE_WORK_USED (range_table_work)
3033                 || RANGE_TABLE_WORK_BITS (range_table_work))
3034               {
3035                 int i;
3036                 int used = RANGE_TABLE_WORK_USED (range_table_work);
3037
3038                 /* Allocate space for COUNT + RANGE_TABLE.  Needs two
3039                    bytes for flags, two for COUNT, and three bytes for
3040                    each character.  */
3041                 GET_BUFFER_SPACE (4 + used * 3);
3042
3043                 /* Indicate the existence of range table.  */
3044                 laststart[1] |= 0x80;
3045
3046                 /* Store the character class flag bits into the range table.
3047                    If not in emacs, these flag bits are always 0.  */
3048                 *b++ = RANGE_TABLE_WORK_BITS (range_table_work) & 0xff;
3049                 *b++ = RANGE_TABLE_WORK_BITS (range_table_work) >> 8;
3050
3051                 STORE_NUMBER_AND_INCR (b, used / 2);
3052                 for (i = 0; i < used; i++)
3053                   STORE_CHARACTER_AND_INCR
3054                     (b, RANGE_TABLE_WORK_ELT (range_table_work, i));
3055               }
3056           }
3057           break;
3058
3059
3060         case '(':
3061           if (syntax & RE_NO_BK_PARENS)
3062             goto handle_open;
3063           else
3064             goto normal_char;
3065
3066
3067         case ')':
3068           if (syntax & RE_NO_BK_PARENS)
3069             goto handle_close;
3070           else
3071             goto normal_char;
3072
3073
3074         case '\n':
3075           if (syntax & RE_NEWLINE_ALT)
3076             goto handle_alt;
3077           else
3078             goto normal_char;
3079
3080
3081         case '|':
3082           if (syntax & RE_NO_BK_VBAR)
3083             goto handle_alt;
3084           else
3085             goto normal_char;
3086
3087
3088         case '{':
3089            if (syntax & RE_INTERVALS && syntax & RE_NO_BK_BRACES)
3090              goto handle_interval;
3091            else
3092              goto normal_char;
3093
3094
3095         case '\\':
3096           if (p == pend) FREE_STACK_RETURN (REG_EESCAPE);
3097
3098           /* Do not translate the character after the \, so that we can
3099              distinguish, e.g., \B from \b, even if we normally would
3100              translate, e.g., B to b.  */
3101           PATFETCH (c);
3102
3103           switch (c)
3104             {
3105             case '(':
3106               if (syntax & RE_NO_BK_PARENS)
3107                 goto normal_backslash;
3108
3109             handle_open:
3110               {
3111                 int shy = 0;
3112                 regnum_t regnum = 0;
3113                 if (p+1 < pend)
3114                   {
3115                     /* Look for a special (?...) construct */
3116                     if ((syntax & RE_SHY_GROUPS) && *p == '?')
3117                       {
3118                         PATFETCH (c); /* Gobble up the '?'.  */
3119                         while (!shy)
3120                           {
3121                             PATFETCH (c);
3122                             switch (c)
3123                               {
3124                               case ':': shy = 1; break;
3125                               case '0':
3126                                 /* An explicitly specified regnum must start
3127                                    with non-0. */
3128                                 if (regnum == 0)
3129                                   FREE_STACK_RETURN (REG_BADPAT);
3130                               case '1': case '2': case '3': case '4':
3131                               case '5': case '6': case '7': case '8': case '9':
3132                                 regnum = 10*regnum + (c - '0'); break;
3133                               default:
3134                                 /* Only (?:...) is supported right now. */
3135                                 FREE_STACK_RETURN (REG_BADPAT);
3136                               }
3137                           }
3138                       }
3139                   }
3140
3141                 if (!shy)
3142                   regnum = ++bufp->re_nsub;
3143                 else if (regnum)
3144                   { /* It's actually not shy, but explicitly numbered.  */
3145                     shy = 0;
3146                     if (regnum > bufp->re_nsub)
3147                       bufp->re_nsub = regnum;
3148                     else if (regnum > bufp->re_nsub
3149                              /* Ideally, we'd want to check that the specified
3150                                 group can't have matched (i.e. all subgroups
3151                                 using the same regnum are in other branches of
3152                                 OR patterns), but we don't currently keep track
3153                                 of enough info to do that easily.  */
3154                              || group_in_compile_stack (compile_stack, regnum))
3155                       FREE_STACK_RETURN (REG_BADPAT);
3156                   }
3157                 else
3158                   /* It's really shy.  */
3159                   regnum = - bufp->re_nsub;
3160
3161                 if (COMPILE_STACK_FULL)
3162                   {
3163                     RETALLOC (compile_stack.stack, compile_stack.size << 1,
3164                               compile_stack_elt_t);
3165                     if (compile_stack.stack == NULL) return REG_ESPACE;
3166
3167                     compile_stack.size <<= 1;
3168                   }
3169
3170                 /* These are the values to restore when we hit end of this
3171                    group.  They are all relative offsets, so that if the
3172                    whole pattern moves because of realloc, they will still
3173                    be valid.  */
3174                 COMPILE_STACK_TOP.begalt_offset = begalt - bufp->buffer;
3175                 COMPILE_STACK_TOP.fixup_alt_jump
3176                   = fixup_alt_jump ? fixup_alt_jump - bufp->buffer + 1 : 0;
3177                 COMPILE_STACK_TOP.laststart_offset = b - bufp->buffer;
3178                 COMPILE_STACK_TOP.regnum = regnum;
3179
3180                 /* Do not push a start_memory for groups beyond the last one
3181                    we can represent in the compiled pattern.  */
3182                 if (regnum <= MAX_REGNUM && regnum > 0)
3183                   BUF_PUSH_2 (start_memory, regnum);
3184
3185                 compile_stack.avail++;
3186
3187                 fixup_alt_jump = 0;
3188                 laststart = 0;
3189                 begalt = b;
3190                 /* If we've reached MAX_REGNUM groups, then this open
3191                    won't actually generate any code, so we'll have to
3192                    clear pending_exact explicitly.  */
3193                 pending_exact = 0;
3194                 break;
3195               }
3196
3197             case ')':
3198               if (syntax & RE_NO_BK_PARENS) goto normal_backslash;
3199
3200               if (COMPILE_STACK_EMPTY)
3201                 {
3202                   if (syntax & RE_UNMATCHED_RIGHT_PAREN_ORD)
3203                     goto normal_backslash;
3204                   else
3205                     FREE_STACK_RETURN (REG_ERPAREN);
3206                 }
3207
3208             handle_close:
3209               FIXUP_ALT_JUMP ();
3210
3211               /* See similar code for backslashed left paren above.  */
3212               if (COMPILE_STACK_EMPTY)
3213                 {
3214                   if (syntax & RE_UNMATCHED_RIGHT_PAREN_ORD)
3215                     goto normal_char;
3216                   else
3217                     FREE_STACK_RETURN (REG_ERPAREN);
3218                 }
3219
3220               /* Since we just checked for an empty stack above, this
3221                  ``can't happen''.  */
3222               assert (compile_stack.avail != 0);
3223               {
3224                 /* We don't just want to restore into `regnum', because
3225                    later groups should continue to be numbered higher,
3226                    as in `(ab)c(de)' -- the second group is #2.  */
3227                 regnum_t regnum;
3228
3229                 compile_stack.avail--;
3230                 begalt = bufp->buffer + COMPILE_STACK_TOP.begalt_offset;
3231                 fixup_alt_jump
3232                   = COMPILE_STACK_TOP.fixup_alt_jump
3233                     ? bufp->buffer + COMPILE_STACK_TOP.fixup_alt_jump - 1
3234                     : 0;
3235                 laststart = bufp->buffer + COMPILE_STACK_TOP.laststart_offset;
3236                 regnum = COMPILE_STACK_TOP.regnum;
3237                 /* If we've reached MAX_REGNUM groups, then this open
3238                    won't actually generate any code, so we'll have to
3239                    clear pending_exact explicitly.  */
3240                 pending_exact = 0;
3241
3242                 /* We're at the end of the group, so now we know how many
3243                    groups were inside this one.  */
3244                 if (regnum <= MAX_REGNUM && regnum > 0)
3245                   BUF_PUSH_2 (stop_memory, regnum);
3246               }
3247               break;
3248
3249
3250             case '|':                                   /* `\|'.  */
3251               if (syntax & RE_LIMITED_OPS || syntax & RE_NO_BK_VBAR)
3252                 goto normal_backslash;
3253             handle_alt:
3254               if (syntax & RE_LIMITED_OPS)
3255                 goto normal_char;
3256
3257               /* Insert before the previous alternative a jump which
3258                  jumps to this alternative if the former fails.  */
3259               GET_BUFFER_SPACE (3);
3260               INSERT_JUMP (on_failure_jump, begalt, b + 6);
3261               pending_exact = 0;
3262               b += 3;
3263
3264               /* The alternative before this one has a jump after it
3265                  which gets executed if it gets matched.  Adjust that
3266                  jump so it will jump to this alternative's analogous
3267                  jump (put in below, which in turn will jump to the next
3268                  (if any) alternative's such jump, etc.).  The last such
3269                  jump jumps to the correct final destination.  A picture:
3270                           _____ _____
3271                           |   | |   |
3272                           |   v |   v
3273                         a | b   | c
3274
3275                  If we are at `b', then fixup_alt_jump right now points to a
3276                  three-byte space after `a'.  We'll put in the jump, set
3277                  fixup_alt_jump to right after `b', and leave behind three
3278                  bytes which we'll fill in when we get to after `c'.  */
3279
3280               FIXUP_ALT_JUMP ();
3281
3282               /* Mark and leave space for a jump after this alternative,
3283                  to be filled in later either by next alternative or
3284                  when know we're at the end of a series of alternatives.  */
3285               fixup_alt_jump = b;
3286               GET_BUFFER_SPACE (3);
3287               b += 3;
3288
3289               laststart = 0;
3290               begalt = b;
3291               break;
3292
3293
3294             case '{':
3295               /* If \{ is a literal.  */
3296               if (!(syntax & RE_INTERVALS)
3297                      /* If we're at `\{' and it's not the open-interval
3298                         operator.  */
3299                   || (syntax & RE_NO_BK_BRACES))
3300                 goto normal_backslash;
3301
3302             handle_interval:
3303               {
3304                 /* If got here, then the syntax allows intervals.  */
3305
3306                 /* At least (most) this many matches must be made.  */
3307                 int lower_bound = 0, upper_bound = -1;
3308
3309                 beg_interval = p;
3310
3311                 GET_UNSIGNED_NUMBER (lower_bound);
3312
3313                 if (c == ',')
3314                   GET_UNSIGNED_NUMBER (upper_bound);
3315                 else
3316                   /* Interval such as `{1}' => match exactly once. */
3317                   upper_bound = lower_bound;
3318
3319                 if (lower_bound < 0 || upper_bound > RE_DUP_MAX
3320                     || (upper_bound >= 0 && lower_bound > upper_bound))
3321                   FREE_STACK_RETURN (REG_BADBR);
3322
3323                 if (!(syntax & RE_NO_BK_BRACES))
3324                   {
3325                     if (c != '\\')
3326                       FREE_STACK_RETURN (REG_BADBR);
3327                     if (p == pend)
3328                       FREE_STACK_RETURN (REG_EESCAPE);
3329                     PATFETCH (c);
3330                   }
3331
3332                 if (c != '}')
3333                   FREE_STACK_RETURN (REG_BADBR);
3334
3335                 /* We just parsed a valid interval.  */
3336
3337                 /* If it's invalid to have no preceding re.  */
3338                 if (!laststart)
3339                   {
3340                     if (syntax & RE_CONTEXT_INVALID_OPS)
3341                       FREE_STACK_RETURN (REG_BADRPT);
3342                     else if (syntax & RE_CONTEXT_INDEP_OPS)
3343                       laststart = b;
3344                     else
3345                       goto unfetch_interval;
3346                   }
3347
3348                 if (upper_bound == 0)
3349                   /* If the upper bound is zero, just drop the sub pattern
3350                      altogether.  */
3351                   b = laststart;
3352                 else if (lower_bound == 1 && upper_bound == 1)
3353                   /* Just match it once: nothing to do here.  */
3354                   ;
3355
3356                 /* Otherwise, we have a nontrivial interval.  When
3357                    we're all done, the pattern will look like:
3358                    set_number_at <jump count> <upper bound>
3359                    set_number_at <succeed_n count> <lower bound>
3360                    succeed_n <after jump addr> <succeed_n count>
3361                    <body of loop>
3362                    jump_n <succeed_n addr> <jump count>
3363                    (The upper bound and `jump_n' are omitted if
3364                    `upper_bound' is 1, though.)  */
3365                 else
3366                   { /* If the upper bound is > 1, we need to insert
3367                        more at the end of the loop.  */
3368                     unsigned int nbytes = (upper_bound < 0 ? 3
3369                                            : upper_bound > 1 ? 5 : 0);
3370                     unsigned int startoffset = 0;
3371
3372                     GET_BUFFER_SPACE (20); /* We might use less.  */
3373
3374                     if (lower_bound == 0)
3375                       {
3376                         /* A succeed_n that starts with 0 is really a
3377                            a simple on_failure_jump_loop.  */
3378                         INSERT_JUMP (on_failure_jump_loop, laststart,
3379                                      b + 3 + nbytes);
3380                         b += 3;
3381                       }
3382                     else
3383                       {
3384                         /* Initialize lower bound of the `succeed_n', even
3385                            though it will be set during matching by its
3386                            attendant `set_number_at' (inserted next),
3387                            because `re_compile_fastmap' needs to know.
3388                            Jump to the `jump_n' we might insert below.  */
3389                         INSERT_JUMP2 (succeed_n, laststart,
3390                                       b + 5 + nbytes,
3391                                       lower_bound);
3392                         b += 5;
3393
3394                         /* Code to initialize the lower bound.  Insert
3395                            before the `succeed_n'.  The `5' is the last two
3396                            bytes of this `set_number_at', plus 3 bytes of
3397                            the following `succeed_n'.  */
3398                         insert_op2 (set_number_at, laststart, 5, lower_bound, b);
3399                         b += 5;
3400                         startoffset += 5;
3401                       }
3402
3403                     if (upper_bound < 0)
3404                       {
3405                         /* A negative upper bound stands for infinity,
3406                            in which case it degenerates to a plain jump.  */
3407                         STORE_JUMP (jump, b, laststart + startoffset);
3408                         b += 3;
3409                       }
3410                     else if (upper_bound > 1)
3411                       { /* More than one repetition is allowed, so
3412                            append a backward jump to the `succeed_n'
3413                            that starts this interval.
3414
3415                            When we've reached this during matching,
3416                            we'll have matched the interval once, so
3417                            jump back only `upper_bound - 1' times.  */
3418                         STORE_JUMP2 (jump_n, b, laststart + startoffset,
3419                                      upper_bound - 1);
3420                         b += 5;
3421
3422                         /* The location we want to set is the second
3423                            parameter of the `jump_n'; that is `b-2' as
3424                            an absolute address.  `laststart' will be
3425                            the `set_number_at' we're about to insert;
3426                            `laststart+3' the number to set, the source
3427                            for the relative address.  But we are
3428                            inserting into the middle of the pattern --
3429                            so everything is getting moved up by 5.
3430                            Conclusion: (b - 2) - (laststart + 3) + 5,
3431                            i.e., b - laststart.
3432
3433                            We insert this at the beginning of the loop
3434                            so that if we fail during matching, we'll
3435                            reinitialize the bounds.  */
3436                         insert_op2 (set_number_at, laststart, b - laststart,
3437                                     upper_bound - 1, b);
3438                         b += 5;
3439                       }
3440                   }
3441                 pending_exact = 0;
3442                 beg_interval = NULL;
3443               }
3444               break;
3445
3446             unfetch_interval:
3447               /* If an invalid interval, match the characters as literals.  */
3448                assert (beg_interval);
3449                p = beg_interval;
3450                beg_interval = NULL;
3451
3452                /* normal_char and normal_backslash need `c'.  */
3453                c = '{';
3454
3455                if (!(syntax & RE_NO_BK_BRACES))
3456                  {
3457                    assert (p > pattern && p[-1] == '\\');
3458                    goto normal_backslash;
3459                  }
3460                else
3461                  goto normal_char;
3462
3463 #ifdef emacs
3464             /* There is no way to specify the before_dot and after_dot
3465                operators.  rms says this is ok.  --karl  */
3466             case '=':
3467               laststart = b;
3468               BUF_PUSH (at_dot);
3469               break;
3470
3471             case 's':
3472               laststart = b;
3473               PATFETCH (c);
3474               BUF_PUSH_2 (syntaxspec, syntax_spec_code[c]);
3475               break;
3476
3477             case 'S':
3478               laststart = b;
3479               PATFETCH (c);
3480               BUF_PUSH_2 (notsyntaxspec, syntax_spec_code[c]);
3481               break;
3482
3483             case 'c':
3484               laststart = b;
3485               PATFETCH (c);
3486               BUF_PUSH_2 (categoryspec, c);
3487               break;
3488
3489             case 'C':
3490               laststart = b;
3491               PATFETCH (c);
3492               BUF_PUSH_2 (notcategoryspec, c);
3493               break;
3494 #endif /* emacs */
3495
3496
3497             case 'w':
3498               if (syntax & RE_NO_GNU_OPS)
3499                 goto normal_char;
3500               laststart = b;
3501               BUF_PUSH_2 (syntaxspec, Sword);
3502               break;
3503
3504
3505             case 'W':
3506               if (syntax & RE_NO_GNU_OPS)
3507                 goto normal_char;
3508               laststart = b;
3509               BUF_PUSH_2 (notsyntaxspec, Sword);
3510               break;
3511
3512
3513             case '<':
3514               if (syntax & RE_NO_GNU_OPS)
3515                 goto normal_char;
3516               laststart = b;
3517               BUF_PUSH (wordbeg);
3518               break;
3519
3520             case '>':
3521               if (syntax & RE_NO_GNU_OPS)
3522                 goto normal_char;
3523               laststart = b;
3524               BUF_PUSH (wordend);
3525               break;
3526
3527             case '_':
3528               if (syntax & RE_NO_GNU_OPS)
3529                 goto normal_char;
3530               laststart = b;
3531               PATFETCH (c);
3532               if (c == '<')
3533                 BUF_PUSH (symbeg);
3534               else if (c == '>')
3535                 BUF_PUSH (symend);
3536               else
3537                 FREE_STACK_RETURN (REG_BADPAT);
3538               break;
3539
3540             case 'b':
3541               if (syntax & RE_NO_GNU_OPS)
3542                 goto normal_char;
3543               BUF_PUSH (wordbound);
3544               break;
3545
3546             case 'B':
3547               if (syntax & RE_NO_GNU_OPS)
3548                 goto normal_char;
3549               BUF_PUSH (notwordbound);
3550               break;
3551
3552             case '`':
3553               if (syntax & RE_NO_GNU_OPS)
3554                 goto normal_char;
3555               BUF_PUSH (begbuf);
3556               break;
3557
3558             case '\'':
3559               if (syntax & RE_NO_GNU_OPS)
3560                 goto normal_char;
3561               BUF_PUSH (endbuf);
3562               break;
3563
3564             case '1': case '2': case '3': case '4': case '5':
3565             case '6': case '7': case '8': case '9':
3566               {
3567                 regnum_t reg;
3568
3569                 if (syntax & RE_NO_BK_REFS)
3570                   goto normal_backslash;
3571
3572                 reg = c - '0';
3573
3574                 if (reg > bufp->re_nsub || reg < 1
3575                     /* Can't back reference to a subexp before its end.  */
3576                     || group_in_compile_stack (compile_stack, reg))
3577                   FREE_STACK_RETURN (REG_ESUBREG);
3578
3579                 laststart = b;
3580                 BUF_PUSH_2 (duplicate, reg);
3581               }
3582               break;
3583
3584
3585             case '+':
3586             case '?':
3587               if (syntax & RE_BK_PLUS_QM)
3588                 goto handle_plus;
3589               else
3590                 goto normal_backslash;
3591
3592             default:
3593             normal_backslash:
3594               /* You might think it would be useful for \ to mean
3595                  not to translate; but if we don't translate it
3596                  it will never match anything.  */
3597               goto normal_char;
3598             }
3599           break;
3600
3601
3602         default:
3603         /* Expects the character in `c'.  */
3604         normal_char:
3605           /* If no exactn currently being built.  */
3606           if (!pending_exact
3607
3608               /* If last exactn not at current position.  */
3609               || pending_exact + *pending_exact + 1 != b
3610
3611               /* We have only one byte following the exactn for the count.  */
3612               || *pending_exact >= (1 << BYTEWIDTH) - MAX_MULTIBYTE_LENGTH
3613
3614               /* If followed by a repetition operator.  */
3615               || (p != pend && (*p == '*' || *p == '^'))
3616               || ((syntax & RE_BK_PLUS_QM)
3617                   ? p + 1 < pend && *p == '\\' && (p[1] == '+' || p[1] == '?')
3618                   : p != pend && (*p == '+' || *p == '?'))
3619               || ((syntax & RE_INTERVALS)
3620                   && ((syntax & RE_NO_BK_BRACES)
3621                       ? p != pend && *p == '{'
3622                       : p + 1 < pend && p[0] == '\\' && p[1] == '{')))
3623             {
3624               /* Start building a new exactn.  */
3625
3626               laststart = b;
3627
3628               BUF_PUSH_2 (exactn, 0);
3629               pending_exact = b - 1;
3630             }
3631
3632           GET_BUFFER_SPACE (MAX_MULTIBYTE_LENGTH);
3633           {
3634             int len;
3635
3636             if (multibyte)
3637               {
3638                 c = TRANSLATE (c);
3639                 len = CHAR_STRING (c, b);
3640                 b += len;
3641               }
3642             else
3643               {
3644                 c1 = RE_CHAR_TO_MULTIBYTE (c);
3645                 if (! CHAR_BYTE8_P (c1))
3646                   {
3647                     re_wchar_t c2 = TRANSLATE (c1);
3648
3649                     if (c1 != c2 && (c1 = RE_CHAR_TO_UNIBYTE (c2)) >= 0)
3650                       c = c1;
3651                   }
3652                 *b++ = c;
3653                 len = 1;
3654               }
3655             (*pending_exact) += len;
3656           }
3657
3658           break;
3659         } /* switch (c) */
3660     } /* while p != pend */
3661
3662
3663   /* Through the pattern now.  */
3664
3665   FIXUP_ALT_JUMP ();
3666
3667   if (!COMPILE_STACK_EMPTY)
3668     FREE_STACK_RETURN (REG_EPAREN);
3669
3670   /* If we don't want backtracking, force success
3671      the first time we reach the end of the compiled pattern.  */
3672   if (syntax & RE_NO_POSIX_BACKTRACKING)
3673     BUF_PUSH (succeed);
3674
3675   /* We have succeeded; set the length of the buffer.  */
3676   bufp->used = b - bufp->buffer;
3677
3678 #ifdef DEBUG
3679   if (debug > 0)
3680     {
3681       re_compile_fastmap (bufp);
3682       DEBUG_PRINT ("\nCompiled pattern: \n");
3683       print_compiled_pattern (bufp);
3684     }
3685   debug--;
3686 #endif /* DEBUG */
3687
3688 #ifndef MATCH_MAY_ALLOCATE
3689   /* Initialize the failure stack to the largest possible stack.  This
3690      isn't necessary unless we're trying to avoid calling alloca in
3691      the search and match routines.  */
3692   {
3693     int num_regs = bufp->re_nsub + 1;
3694
3695     if (fail_stack.size < re_max_failures * TYPICAL_FAILURE_SIZE)
3696       {
3697         fail_stack.size = re_max_failures * TYPICAL_FAILURE_SIZE;
3698         falk_stack.stack = realloc (fail_stack.stack,
3699                                     fail_stack.size * sizeof *falk_stack.stack);
3700       }
3701
3702     regex_grow_registers (num_regs);
3703   }
3704 #endif /* not MATCH_MAY_ALLOCATE */
3705
3706   FREE_STACK_RETURN (REG_NOERROR);
3707 } /* regex_compile */
3708 \f
3709 /* Subroutines for `regex_compile'.  */
3710
3711 /* Store OP at LOC followed by two-byte integer parameter ARG.  */
3712
3713 static void
3714 store_op1 (re_opcode_t op, unsigned char *loc, int arg)
3715 {
3716   *loc = (unsigned char) op;
3717   STORE_NUMBER (loc + 1, arg);
3718 }
3719
3720
3721 /* Like `store_op1', but for two two-byte parameters ARG1 and ARG2.  */
3722
3723 static void
3724 store_op2 (re_opcode_t op, unsigned char *loc, int arg1, int arg2)
3725 {
3726   *loc = (unsigned char) op;
3727   STORE_NUMBER (loc + 1, arg1);
3728   STORE_NUMBER (loc + 3, arg2);
3729 }
3730
3731
3732 /* Copy the bytes from LOC to END to open up three bytes of space at LOC
3733    for OP followed by two-byte integer parameter ARG.  */
3734
3735 static void
3736 insert_op1 (re_opcode_t op, unsigned char *loc, int arg, unsigned char *end)
3737 {
3738   register unsigned char *pfrom = end;
3739   register unsigned char *pto = end + 3;
3740
3741   while (pfrom != loc)
3742     *--pto = *--pfrom;
3743
3744   store_op1 (op, loc, arg);
3745 }
3746
3747
3748 /* Like `insert_op1', but for two two-byte parameters ARG1 and ARG2.  */
3749
3750 static void
3751 insert_op2 (re_opcode_t op, unsigned char *loc, int arg1, int arg2, unsigned char *end)
3752 {
3753   register unsigned char *pfrom = end;
3754   register unsigned char *pto = end + 5;
3755
3756   while (pfrom != loc)
3757     *--pto = *--pfrom;
3758
3759   store_op2 (op, loc, arg1, arg2);
3760 }
3761
3762
3763 /* P points to just after a ^ in PATTERN.  Return true if that ^ comes
3764    after an alternative or a begin-subexpression.  We assume there is at
3765    least one character before the ^.  */
3766
3767 static boolean
3768 at_begline_loc_p (const re_char *pattern, const re_char *p, reg_syntax_t syntax)
3769 {
3770   re_char *prev = p - 2;
3771   boolean odd_backslashes;
3772
3773   /* After a subexpression?  */
3774   if (*prev == '(')
3775     odd_backslashes = (syntax & RE_NO_BK_PARENS) == 0;
3776
3777   /* After an alternative?  */
3778   else if (*prev == '|')
3779     odd_backslashes = (syntax & RE_NO_BK_VBAR) == 0;
3780
3781   /* After a shy subexpression?  */
3782   else if (*prev == ':' && (syntax & RE_SHY_GROUPS))
3783     {
3784       /* Skip over optional regnum.  */
3785       while (prev - 1 >= pattern && prev[-1] >= '0' && prev[-1] <= '9')
3786         --prev;
3787
3788       if (!(prev - 2 >= pattern
3789             && prev[-1] == '?' && prev[-2] == '('))
3790         return false;
3791       prev -= 2;
3792       odd_backslashes = (syntax & RE_NO_BK_PARENS) == 0;
3793     }
3794   else
3795     return false;
3796
3797   /* Count the number of preceding backslashes.  */
3798   p = prev;
3799   while (prev - 1 >= pattern && prev[-1] == '\\')
3800     --prev;
3801   return (p - prev) & odd_backslashes;
3802 }
3803
3804
3805 /* The dual of at_begline_loc_p.  This one is for $.  We assume there is
3806    at least one character after the $, i.e., `P < PEND'.  */
3807
3808 static boolean
3809 at_endline_loc_p (const re_char *p, const re_char *pend, reg_syntax_t syntax)
3810 {
3811   re_char *next = p;
3812   boolean next_backslash = *next == '\\';
3813   re_char *next_next = p + 1 < pend ? p + 1 : 0;
3814
3815   return
3816        /* Before a subexpression?  */
3817        (syntax & RE_NO_BK_PARENS ? *next == ')'
3818         : next_backslash && next_next && *next_next == ')')
3819        /* Before an alternative?  */
3820     || (syntax & RE_NO_BK_VBAR ? *next == '|'
3821         : next_backslash && next_next && *next_next == '|');
3822 }
3823
3824
3825 /* Returns true if REGNUM is in one of COMPILE_STACK's elements and
3826    false if it's not.  */
3827
3828 static boolean
3829 group_in_compile_stack (compile_stack_type compile_stack, regnum_t regnum)
3830 {
3831   ssize_t this_element;
3832
3833   for (this_element = compile_stack.avail - 1;
3834        this_element >= 0;
3835        this_element--)
3836     if (compile_stack.stack[this_element].regnum == regnum)
3837       return true;
3838
3839   return false;
3840 }
3841 \f
3842 /* analyse_first.
3843    If fastmap is non-NULL, go through the pattern and fill fastmap
3844    with all the possible leading chars.  If fastmap is NULL, don't
3845    bother filling it up (obviously) and only return whether the
3846    pattern could potentially match the empty string.
3847
3848    Return 1  if p..pend might match the empty string.
3849    Return 0  if p..pend matches at least one char.
3850    Return -1 if fastmap was not updated accurately.  */
3851
3852 static int
3853 analyse_first (const re_char *p, const re_char *pend, char *fastmap, const int multibyte)
3854 {
3855   int j, k;
3856   boolean not;
3857
3858   /* If all elements for base leading-codes in fastmap is set, this
3859      flag is set true.  */
3860   boolean match_any_multibyte_characters = false;
3861
3862   assert (p);
3863
3864   /* The loop below works as follows:
3865      - It has a working-list kept in the PATTERN_STACK and which basically
3866        starts by only containing a pointer to the first operation.
3867      - If the opcode we're looking at is a match against some set of
3868        chars, then we add those chars to the fastmap and go on to the
3869        next work element from the worklist (done via `break').
3870      - If the opcode is a control operator on the other hand, we either
3871        ignore it (if it's meaningless at this point, such as `start_memory')
3872        or execute it (if it's a jump).  If the jump has several destinations
3873        (i.e. `on_failure_jump'), then we push the other destination onto the
3874        worklist.
3875      We guarantee termination by ignoring backward jumps (more or less),
3876      so that `p' is monotonically increasing.  More to the point, we
3877      never set `p' (or push) anything `<= p1'.  */
3878
3879   while (p < pend)
3880     {
3881       /* `p1' is used as a marker of how far back a `on_failure_jump'
3882          can go without being ignored.  It is normally equal to `p'
3883          (which prevents any backward `on_failure_jump') except right
3884          after a plain `jump', to allow patterns such as:
3885             0: jump 10
3886             3..9: <body>
3887             10: on_failure_jump 3
3888          as used for the *? operator.  */
3889       re_char *p1 = p;
3890
3891       switch (*p++)
3892         {
3893         case succeed:
3894           return 1;
3895
3896         case duplicate:
3897           /* If the first character has to match a backreference, that means
3898              that the group was empty (since it already matched).  Since this
3899              is the only case that interests us here, we can assume that the
3900              backreference must match the empty string.  */
3901           p++;
3902           continue;
3903
3904
3905       /* Following are the cases which match a character.  These end
3906          with `break'.  */
3907
3908         case exactn:
3909           if (fastmap)
3910             {
3911               /* If multibyte is nonzero, the first byte of each
3912                  character is an ASCII or a leading code.  Otherwise,
3913                  each byte is a character.  Thus, this works in both
3914                  cases. */
3915               fastmap[p[1]] = 1;
3916               if (! multibyte)
3917                 {
3918                   /* For the case of matching this unibyte regex
3919                      against multibyte, we must set a leading code of
3920                      the corresponding multibyte character.  */
3921                   int c = RE_CHAR_TO_MULTIBYTE (p[1]);
3922
3923                   fastmap[CHAR_LEADING_CODE (c)] = 1;
3924                 }
3925             }
3926           break;
3927
3928
3929         case anychar:
3930           /* We could put all the chars except for \n (and maybe \0)
3931              but we don't bother since it is generally not worth it.  */
3932           if (!fastmap) break;
3933           return -1;
3934
3935
3936         case charset_not:
3937           if (!fastmap) break;
3938           {
3939             /* Chars beyond end of bitmap are possible matches.  */
3940             for (j = CHARSET_BITMAP_SIZE (&p[-1]) * BYTEWIDTH;
3941                  j < (1 << BYTEWIDTH); j++)
3942               fastmap[j] = 1;
3943           }
3944
3945           /* Fallthrough */
3946         case charset:
3947           if (!fastmap) break;
3948           not = (re_opcode_t) *(p - 1) == charset_not;
3949           for (j = CHARSET_BITMAP_SIZE (&p[-1]) * BYTEWIDTH - 1, p++;
3950                j >= 0; j--)
3951             if (!!(p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH))) ^ not)
3952               fastmap[j] = 1;
3953
3954 #ifdef emacs
3955           if (/* Any leading code can possibly start a character
3956                  which doesn't match the specified set of characters.  */
3957               not
3958               ||
3959               /* If we can match a character class, we can match any
3960                  multibyte characters.  */
3961               (CHARSET_RANGE_TABLE_EXISTS_P (&p[-2])
3962                && CHARSET_RANGE_TABLE_BITS (&p[-2]) != 0))
3963
3964             {
3965               if (match_any_multibyte_characters == false)
3966                 {
3967                   for (j = MIN_MULTIBYTE_LEADING_CODE;
3968                        j <= MAX_MULTIBYTE_LEADING_CODE; j++)
3969                     fastmap[j] = 1;
3970                   match_any_multibyte_characters = true;
3971                 }
3972             }
3973
3974           else if (!not && CHARSET_RANGE_TABLE_EXISTS_P (&p[-2])
3975                    && match_any_multibyte_characters == false)
3976             {
3977               /* Set fastmap[I] to 1 where I is a leading code of each
3978                  multibyte character in the range table. */
3979               int c, count;
3980               unsigned char lc1, lc2;
3981
3982               /* Make P points the range table.  `+ 2' is to skip flag
3983                  bits for a character class.  */
3984               p += CHARSET_BITMAP_SIZE (&p[-2]) + 2;
3985
3986               /* Extract the number of ranges in range table into COUNT.  */
3987               EXTRACT_NUMBER_AND_INCR (count, p);
3988               for (; count > 0; count--, p += 3)
3989                 {
3990                   /* Extract the start and end of each range.  */
3991                   EXTRACT_CHARACTER (c, p);
3992                   lc1 = CHAR_LEADING_CODE (c);
3993                   p += 3;
3994                   EXTRACT_CHARACTER (c, p);
3995                   lc2 = CHAR_LEADING_CODE (c);
3996                   for (j = lc1; j <= lc2; j++)
3997                     fastmap[j] = 1;
3998                 }
3999             }
4000 #endif
4001           break;
4002
4003         case syntaxspec:
4004         case notsyntaxspec:
4005           if (!fastmap) break;
4006 #ifndef emacs
4007           not = (re_opcode_t)p[-1] == notsyntaxspec;
4008           k = *p++;
4009           for (j = 0; j < (1 << BYTEWIDTH); j++)
4010             if ((SYNTAX (j) == (enum syntaxcode) k) ^ not)
4011               fastmap[j] = 1;
4012           break;
4013 #else  /* emacs */
4014           /* This match depends on text properties.  These end with
4015              aborting optimizations.  */
4016           return -1;
4017
4018         case categoryspec:
4019         case notcategoryspec:
4020           if (!fastmap) break;
4021           not = (re_opcode_t)p[-1] == notcategoryspec;
4022           k = *p++;
4023           for (j = (1 << BYTEWIDTH); j >= 0; j--)
4024             if ((CHAR_HAS_CATEGORY (j, k)) ^ not)
4025               fastmap[j] = 1;
4026
4027           /* Any leading code can possibly start a character which
4028              has or doesn't has the specified category.  */
4029           if (match_any_multibyte_characters == false)
4030             {
4031               for (j = MIN_MULTIBYTE_LEADING_CODE;
4032                    j <= MAX_MULTIBYTE_LEADING_CODE; j++)
4033                 fastmap[j] = 1;
4034               match_any_multibyte_characters = true;
4035             }
4036           break;
4037
4038       /* All cases after this match the empty string.  These end with
4039          `continue'.  */
4040
4041         case before_dot:
4042         case at_dot:
4043         case after_dot:
4044 #endif /* !emacs */
4045         case no_op:
4046         case begline:
4047         case endline:
4048         case begbuf:
4049         case endbuf:
4050         case wordbound:
4051         case notwordbound:
4052         case wordbeg:
4053         case wordend:
4054         case symbeg:
4055         case symend:
4056           continue;
4057
4058
4059         case jump:
4060           EXTRACT_NUMBER_AND_INCR (j, p);
4061           if (j < 0)
4062             /* Backward jumps can only go back to code that we've already
4063                visited.  `re_compile' should make sure this is true.  */
4064             break;
4065           p += j;
4066           switch (*p)
4067             {
4068             case on_failure_jump:
4069             case on_failure_keep_string_jump:
4070             case on_failure_jump_loop:
4071             case on_failure_jump_nastyloop:
4072             case on_failure_jump_smart:
4073               p++;
4074               break;
4075             default:
4076               continue;
4077             };
4078           /* Keep `p1' to allow the `on_failure_jump' we are jumping to
4079              to jump back to "just after here".  */
4080           /* Fallthrough */
4081
4082         case on_failure_jump:
4083         case on_failure_keep_string_jump:
4084         case on_failure_jump_nastyloop:
4085         case on_failure_jump_loop:
4086         case on_failure_jump_smart:
4087           EXTRACT_NUMBER_AND_INCR (j, p);
4088           if (p + j <= p1)
4089             ; /* Backward jump to be ignored.  */
4090           else
4091             { /* We have to look down both arms.
4092                  We first go down the "straight" path so as to minimize
4093                  stack usage when going through alternatives.  */
4094               int r = analyse_first (p, pend, fastmap, multibyte);
4095               if (r) return r;
4096               p += j;
4097             }
4098           continue;
4099
4100
4101         case jump_n:
4102           /* This code simply does not properly handle forward jump_n.  */
4103           DEBUG_STATEMENT (EXTRACT_NUMBER (j, p); assert (j < 0));
4104           p += 4;
4105           /* jump_n can either jump or fall through.  The (backward) jump
4106              case has already been handled, so we only need to look at the
4107              fallthrough case.  */
4108           continue;
4109
4110         case succeed_n:
4111           /* If N == 0, it should be an on_failure_jump_loop instead.  */
4112           DEBUG_STATEMENT (EXTRACT_NUMBER (j, p + 2); assert (j > 0));
4113           p += 4;
4114           /* We only care about one iteration of the loop, so we don't
4115              need to consider the case where this behaves like an
4116              on_failure_jump.  */
4117           continue;
4118
4119
4120         case set_number_at:
4121           p += 4;
4122           continue;
4123
4124
4125         case start_memory:
4126         case stop_memory:
4127           p += 1;
4128           continue;
4129
4130
4131         default:
4132           abort (); /* We have listed all the cases.  */
4133         } /* switch *p++ */
4134
4135       /* Getting here means we have found the possible starting
4136          characters for one path of the pattern -- and that the empty
4137          string does not match.  We need not follow this path further.  */
4138       return 0;
4139     } /* while p */
4140
4141   /* We reached the end without matching anything.  */
4142   return 1;
4143
4144 } /* analyse_first */
4145 \f
4146 /* re_compile_fastmap computes a ``fastmap'' for the compiled pattern in
4147    BUFP.  A fastmap records which of the (1 << BYTEWIDTH) possible
4148    characters can start a string that matches the pattern.  This fastmap
4149    is used by re_search to skip quickly over impossible starting points.
4150
4151    Character codes above (1 << BYTEWIDTH) are not represented in the
4152    fastmap, but the leading codes are represented.  Thus, the fastmap
4153    indicates which character sets could start a match.
4154
4155    The caller must supply the address of a (1 << BYTEWIDTH)-byte data
4156    area as BUFP->fastmap.
4157
4158    We set the `fastmap', `fastmap_accurate', and `can_be_null' fields in
4159    the pattern buffer.
4160
4161    Returns 0 if we succeed, -2 if an internal error.   */
4162
4163 int
4164 re_compile_fastmap (struct re_pattern_buffer *bufp)
4165 {
4166   char *fastmap = bufp->fastmap;
4167   int analysis;
4168
4169   assert (fastmap && bufp->buffer);
4170
4171   memset (fastmap, 0, 1 << BYTEWIDTH);  /* Assume nothing's valid.  */
4172   bufp->fastmap_accurate = 1;       /* It will be when we're done.  */
4173
4174   analysis = analyse_first (bufp->buffer, bufp->buffer + bufp->used,
4175                             fastmap, RE_MULTIBYTE_P (bufp));
4176   bufp->can_be_null = (analysis != 0);
4177   return 0;
4178 } /* re_compile_fastmap */
4179 \f
4180 /* Set REGS to hold NUM_REGS registers, storing them in STARTS and
4181    ENDS.  Subsequent matches using PATTERN_BUFFER and REGS will use
4182    this memory for recording register information.  STARTS and ENDS
4183    must be allocated using the malloc library routine, and must each
4184    be at least NUM_REGS * sizeof (regoff_t) bytes long.
4185
4186    If NUM_REGS == 0, then subsequent matches should allocate their own
4187    register data.
4188
4189    Unless this function is called, the first search or match using
4190    PATTERN_BUFFER will allocate its own register data, without
4191    freeing the old data.  */
4192
4193 void
4194 re_set_registers (struct re_pattern_buffer *bufp, struct re_registers *regs, unsigned int num_regs, regoff_t *starts, regoff_t *ends)
4195 {
4196   if (num_regs)
4197     {
4198       bufp->regs_allocated = REGS_REALLOCATE;
4199       regs->num_regs = num_regs;
4200       regs->start = starts;
4201       regs->end = ends;
4202     }
4203   else
4204     {
4205       bufp->regs_allocated = REGS_UNALLOCATED;
4206       regs->num_regs = 0;
4207       regs->start = regs->end = (regoff_t *) 0;
4208     }
4209 }
4210 WEAK_ALIAS (__re_set_registers, re_set_registers)
4211 \f
4212 /* Searching routines.  */
4213
4214 /* Like re_search_2, below, but only one string is specified, and
4215    doesn't let you say where to stop matching. */
4216
4217 regoff_t
4218 re_search (struct re_pattern_buffer *bufp, const char *string, size_t size,
4219            ssize_t startpos, ssize_t range, struct re_registers *regs)
4220 {
4221   return re_search_2 (bufp, NULL, 0, string, size, startpos, range,
4222                       regs, size);
4223 }
4224 WEAK_ALIAS (__re_search, re_search)
4225
4226 /* Head address of virtual concatenation of string.  */
4227 #define HEAD_ADDR_VSTRING(P)            \
4228   (((P) >= size1 ? string2 : string1))
4229
4230 /* Address of POS in the concatenation of virtual string. */
4231 #define POS_ADDR_VSTRING(POS)                                   \
4232   (((POS) >= size1 ? string2 - size1 : string1) + (POS))
4233
4234 /* Using the compiled pattern in BUFP->buffer, first tries to match the
4235    virtual concatenation of STRING1 and STRING2, starting first at index
4236    STARTPOS, then at STARTPOS + 1, and so on.
4237
4238    STRING1 and STRING2 have length SIZE1 and SIZE2, respectively.
4239
4240    RANGE is how far to scan while trying to match.  RANGE = 0 means try
4241    only at STARTPOS; in general, the last start tried is STARTPOS +
4242    RANGE.
4243
4244    In REGS, return the indices of the virtual concatenation of STRING1
4245    and STRING2 that matched the entire BUFP->buffer and its contained
4246    subexpressions.
4247
4248    Do not consider matching one past the index STOP in the virtual
4249    concatenation of STRING1 and STRING2.
4250
4251    We return either the position in the strings at which the match was
4252    found, -1 if no match, or -2 if error (such as failure
4253    stack overflow).  */
4254
4255 regoff_t
4256 re_search_2 (struct re_pattern_buffer *bufp, const char *str1, size_t size1,
4257              const char *str2, size_t size2, ssize_t startpos, ssize_t range,
4258              struct re_registers *regs, ssize_t stop)
4259 {
4260   regoff_t val;
4261   re_char *string1 = (re_char*) str1;
4262   re_char *string2 = (re_char*) str2;
4263   register char *fastmap = bufp->fastmap;
4264   register RE_TRANSLATE_TYPE translate = bufp->translate;
4265   size_t total_size = size1 + size2;
4266   ssize_t endpos = startpos + range;
4267   boolean anchored_start;
4268   /* Nonzero if we are searching multibyte string.  */
4269   const boolean multibyte = RE_TARGET_MULTIBYTE_P (bufp);
4270
4271   /* Check for out-of-range STARTPOS.  */
4272   if (startpos < 0 || startpos > total_size)
4273     return -1;
4274
4275   /* Fix up RANGE if it might eventually take us outside
4276      the virtual concatenation of STRING1 and STRING2.
4277      Make sure we won't move STARTPOS below 0 or above TOTAL_SIZE.  */
4278   if (endpos < 0)
4279     range = 0 - startpos;
4280   else if (endpos > total_size)
4281     range = total_size - startpos;
4282
4283   /* If the search isn't to be a backwards one, don't waste time in a
4284      search for a pattern anchored at beginning of buffer.  */
4285   if (bufp->used > 0 && (re_opcode_t) bufp->buffer[0] == begbuf && range > 0)
4286     {
4287       if (startpos > 0)
4288         return -1;
4289       else
4290         range = 0;
4291     }
4292
4293 #ifdef emacs
4294   /* In a forward search for something that starts with \=.
4295      don't keep searching past point.  */
4296   if (bufp->used > 0 && (re_opcode_t) bufp->buffer[0] == at_dot && range > 0)
4297     {
4298       range = PT_BYTE - BEGV_BYTE - startpos;
4299       if (range < 0)
4300         return -1;
4301     }
4302 #endif /* emacs */
4303
4304   /* Update the fastmap now if not correct already.  */
4305   if (fastmap && !bufp->fastmap_accurate)
4306     re_compile_fastmap (bufp);
4307
4308   /* See whether the pattern is anchored.  */
4309   anchored_start = (bufp->buffer[0] == begline);
4310
4311 #ifdef emacs
4312   gl_state.object = re_match_object; /* Used by SYNTAX_TABLE_BYTE_TO_CHAR. */
4313   {
4314     ssize_t charpos = SYNTAX_TABLE_BYTE_TO_CHAR (POS_AS_IN_BUFFER (startpos));
4315
4316     SETUP_SYNTAX_TABLE_FOR_OBJECT (re_match_object, charpos, 1);
4317   }
4318 #endif
4319
4320   /* Loop through the string, looking for a place to start matching.  */
4321   for (;;)
4322     {
4323       /* If the pattern is anchored,
4324          skip quickly past places we cannot match.
4325          We don't bother to treat startpos == 0 specially
4326          because that case doesn't repeat.  */
4327       if (anchored_start && startpos > 0)
4328         {
4329           if (! ((startpos <= size1 ? string1[startpos - 1]
4330                   : string2[startpos - size1 - 1])
4331                  == '\n'))
4332             goto advance;
4333         }
4334
4335       /* If a fastmap is supplied, skip quickly over characters that
4336          cannot be the start of a match.  If the pattern can match the
4337          null string, however, we don't need to skip characters; we want
4338          the first null string.  */
4339       if (fastmap && startpos < total_size && !bufp->can_be_null)
4340         {
4341           register re_char *d;
4342           register re_wchar_t buf_ch;
4343
4344           d = POS_ADDR_VSTRING (startpos);
4345
4346           if (range > 0)        /* Searching forwards.  */
4347             {
4348               register int lim = 0;
4349               ssize_t irange = range;
4350
4351               if (startpos < size1 && startpos + range >= size1)
4352                 lim = range - (size1 - startpos);
4353
4354               /* Written out as an if-else to avoid testing `translate'
4355                  inside the loop.  */
4356               if (RE_TRANSLATE_P (translate))
4357                 {
4358                   if (multibyte)
4359                     while (range > lim)
4360                       {
4361                         int buf_charlen;
4362
4363                         buf_ch = STRING_CHAR_AND_LENGTH (d, buf_charlen);
4364                         buf_ch = RE_TRANSLATE (translate, buf_ch);
4365                         if (fastmap[CHAR_LEADING_CODE (buf_ch)])
4366                           break;
4367
4368                         range -= buf_charlen;
4369                         d += buf_charlen;
4370                       }
4371                   else
4372                     while (range > lim)
4373                       {
4374                         register re_wchar_t ch, translated;
4375
4376                         buf_ch = *d;
4377                         ch = RE_CHAR_TO_MULTIBYTE (buf_ch);
4378                         translated = RE_TRANSLATE (translate, ch);
4379                         if (translated != ch
4380                             && (ch = RE_CHAR_TO_UNIBYTE (translated)) >= 0)
4381                           buf_ch = ch;
4382                         if (fastmap[buf_ch])
4383                           break;
4384                         d++;
4385                         range--;
4386                       }
4387                 }
4388               else
4389                 {
4390                   if (multibyte)
4391                     while (range > lim)
4392                       {
4393                         int buf_charlen;
4394
4395                         buf_ch = STRING_CHAR_AND_LENGTH (d, buf_charlen);
4396                         if (fastmap[CHAR_LEADING_CODE (buf_ch)])
4397                           break;
4398                         range -= buf_charlen;
4399                         d += buf_charlen;
4400                       }
4401                   else
4402                     while (range > lim && !fastmap[*d])
4403                       {
4404                         d++;
4405                         range--;
4406                       }
4407                 }
4408               startpos += irange - range;
4409             }
4410           else                          /* Searching backwards.  */
4411             {
4412               if (multibyte)
4413                 {
4414                   buf_ch = STRING_CHAR (d);
4415                   buf_ch = TRANSLATE (buf_ch);
4416                   if (! fastmap[CHAR_LEADING_CODE (buf_ch)])
4417                     goto advance;
4418                 }
4419               else
4420                 {
4421                   register re_wchar_t ch, translated;
4422
4423                   buf_ch = *d;
4424                   ch = RE_CHAR_TO_MULTIBYTE (buf_ch);
4425                   translated = TRANSLATE (ch);
4426                   if (translated != ch
4427                       && (ch = RE_CHAR_TO_UNIBYTE (translated)) >= 0)
4428                     buf_ch = ch;
4429                   if (! fastmap[TRANSLATE (buf_ch)])
4430                     goto advance;
4431                 }
4432             }
4433         }
4434
4435       /* If can't match the null string, and that's all we have left, fail.  */
4436       if (range >= 0 && startpos == total_size && fastmap
4437           && !bufp->can_be_null)
4438         return -1;
4439
4440       val = re_match_2_internal (bufp, string1, size1, string2, size2,
4441                                  startpos, regs, stop);
4442
4443       if (val >= 0)
4444         return startpos;
4445
4446       if (val == -2)
4447         return -2;
4448
4449     advance:
4450       if (!range)
4451         break;
4452       else if (range > 0)
4453         {
4454           /* Update STARTPOS to the next character boundary.  */
4455           if (multibyte)
4456             {
4457               re_char *p = POS_ADDR_VSTRING (startpos);
4458               int len = BYTES_BY_CHAR_HEAD (*p);
4459
4460               range -= len;
4461               if (range < 0)
4462                 break;
4463               startpos += len;
4464             }
4465           else
4466             {
4467               range--;
4468               startpos++;
4469             }
4470         }
4471       else
4472         {
4473           range++;
4474           startpos--;
4475
4476           /* Update STARTPOS to the previous character boundary.  */
4477           if (multibyte)
4478             {
4479               re_char *p = POS_ADDR_VSTRING (startpos) + 1;
4480               re_char *p0 = p;
4481               re_char *phead = HEAD_ADDR_VSTRING (startpos);
4482
4483               /* Find the head of multibyte form.  */
4484               PREV_CHAR_BOUNDARY (p, phead);
4485               range += p0 - 1 - p;
4486               if (range > 0)
4487                 break;
4488
4489               startpos -= p0 - 1 - p;
4490             }
4491         }
4492     }
4493   return -1;
4494 } /* re_search_2 */
4495 WEAK_ALIAS (__re_search_2, re_search_2)
4496 \f
4497 /* Declarations and macros for re_match_2.  */
4498
4499 static int bcmp_translate (re_char *s1, re_char *s2,
4500                            register ssize_t len,
4501                            RE_TRANSLATE_TYPE translate,
4502                            const int multibyte);
4503
4504 /* This converts PTR, a pointer into one of the search strings `string1'
4505    and `string2' into an offset from the beginning of that string.  */
4506 #define POINTER_TO_OFFSET(ptr)                  \
4507   (FIRST_STRING_P (ptr)                         \
4508    ? (ptr) - string1                            \
4509    : (ptr) - string2 + (ptrdiff_t) size1)
4510
4511 /* Call before fetching a character with *d.  This switches over to
4512    string2 if necessary.
4513    Check re_match_2_internal for a discussion of why end_match_2 might
4514    not be within string2 (but be equal to end_match_1 instead).  */
4515 #define PREFETCH()                                                      \
4516   while (d == dend)                                                     \
4517     {                                                                   \
4518       /* End of string2 => fail.  */                                    \
4519       if (dend == end_match_2)                                          \
4520         goto fail;                                                      \
4521       /* End of string1 => advance to string2.  */                      \
4522       d = string2;                                                      \
4523       dend = end_match_2;                                               \
4524     }
4525
4526 /* Call before fetching a char with *d if you already checked other limits.
4527    This is meant for use in lookahead operations like wordend, etc..
4528    where we might need to look at parts of the string that might be
4529    outside of the LIMITs (i.e past `stop').  */
4530 #define PREFETCH_NOLIMIT()                                              \
4531   if (d == end1)                                                        \
4532      {                                                                  \
4533        d = string2;                                                     \
4534        dend = end_match_2;                                              \
4535      }                                                                  \
4536
4537 /* Test if at very beginning or at very end of the virtual concatenation
4538    of `string1' and `string2'.  If only one string, it's `string2'.  */
4539 #define AT_STRINGS_BEG(d) ((d) == (size1 ? string1 : string2) || !size2)
4540 #define AT_STRINGS_END(d) ((d) == end2)
4541
4542 /* Disabled due to a compiler bug -- see comment at case wordbound */
4543
4544 /* The comment at case wordbound is following one, but we don't use
4545    AT_WORD_BOUNDARY anymore to support multibyte form.
4546
4547    The DEC Alpha C compiler 3.x generates incorrect code for the
4548    test  WORDCHAR_P (d - 1) != WORDCHAR_P (d)  in the expansion of
4549    AT_WORD_BOUNDARY, so this code is disabled.  Expanding the
4550    macro and introducing temporary variables works around the bug.  */
4551
4552 #if 0
4553 /* Test if D points to a character which is word-constituent.  We have
4554    two special cases to check for: if past the end of string1, look at
4555    the first character in string2; and if before the beginning of
4556    string2, look at the last character in string1.  */
4557 #define WORDCHAR_P(d)                                                   \
4558   (SYNTAX ((d) == end1 ? *string2                                       \
4559            : (d) == string2 - 1 ? *(end1 - 1) : *(d))                   \
4560    == Sword)
4561
4562 /* Test if the character before D and the one at D differ with respect
4563    to being word-constituent.  */
4564 #define AT_WORD_BOUNDARY(d)                                             \
4565   (AT_STRINGS_BEG (d) || AT_STRINGS_END (d)                             \
4566    || WORDCHAR_P (d - 1) != WORDCHAR_P (d))
4567 #endif
4568
4569 /* Free everything we malloc.  */
4570 #ifdef MATCH_MAY_ALLOCATE
4571 # define FREE_VAR(var)                                                  \
4572   do {                                                                  \
4573     if (var)                                                            \
4574       {                                                                 \
4575         REGEX_FREE (var);                                               \
4576         var = NULL;                                                     \
4577       }                                                                 \
4578   } while (0)
4579 # define FREE_VARIABLES()                                               \
4580   do {                                                                  \
4581     REGEX_FREE_STACK (fail_stack.stack);                                \
4582     FREE_VAR (regstart);                                                \
4583     FREE_VAR (regend);                                                  \
4584     FREE_VAR (best_regstart);                                           \
4585     FREE_VAR (best_regend);                                             \
4586   } while (0)
4587 #else
4588 # define FREE_VARIABLES() ((void)0) /* Do nothing!  But inhibit gcc warning.  */
4589 #endif /* not MATCH_MAY_ALLOCATE */
4590
4591 \f
4592 /* Optimization routines.  */
4593
4594 /* If the operation is a match against one or more chars,
4595    return a pointer to the next operation, else return NULL.  */
4596 static re_char *
4597 skip_one_char (const re_char *p)
4598 {
4599   switch (*p++)
4600     {
4601     case anychar:
4602       break;
4603
4604     case exactn:
4605       p += *p + 1;
4606       break;
4607
4608     case charset_not:
4609     case charset:
4610       if (CHARSET_RANGE_TABLE_EXISTS_P (p - 1))
4611         {
4612           int mcnt;
4613           p = CHARSET_RANGE_TABLE (p - 1);
4614           EXTRACT_NUMBER_AND_INCR (mcnt, p);
4615           p = CHARSET_RANGE_TABLE_END (p, mcnt);
4616         }
4617       else
4618         p += 1 + CHARSET_BITMAP_SIZE (p - 1);
4619       break;
4620
4621     case syntaxspec:
4622     case notsyntaxspec:
4623 #ifdef emacs
4624     case categoryspec:
4625     case notcategoryspec:
4626 #endif /* emacs */
4627       p++;
4628       break;
4629
4630     default:
4631       p = NULL;
4632     }
4633   return p;
4634 }
4635
4636
4637 /* Jump over non-matching operations.  */
4638 static re_char *
4639 skip_noops (const re_char *p, const re_char *pend)
4640 {
4641   int mcnt;
4642   while (p < pend)
4643     {
4644       switch (*p)
4645         {
4646         case start_memory:
4647         case stop_memory:
4648           p += 2; break;
4649         case no_op:
4650           p += 1; break;
4651         case jump:
4652           p += 1;
4653           EXTRACT_NUMBER_AND_INCR (mcnt, p);
4654           p += mcnt;
4655           break;
4656         default:
4657           return p;
4658         }
4659     }
4660   assert (p == pend);
4661   return p;
4662 }
4663
4664 /* Non-zero if "p1 matches something" implies "p2 fails".  */
4665 static int
4666 mutually_exclusive_p (struct re_pattern_buffer *bufp, const re_char *p1, const re_char *p2)
4667 {
4668   re_opcode_t op2;
4669   const boolean multibyte = RE_MULTIBYTE_P (bufp);
4670   unsigned char *pend = bufp->buffer + bufp->used;
4671
4672   assert (p1 >= bufp->buffer && p1 < pend
4673           && p2 >= bufp->buffer && p2 <= pend);
4674
4675   /* Skip over open/close-group commands.
4676      If what follows this loop is a ...+ construct,
4677      look at what begins its body, since we will have to
4678      match at least one of that.  */
4679   p2 = skip_noops (p2, pend);
4680   /* The same skip can be done for p1, except that this function
4681      is only used in the case where p1 is a simple match operator.  */
4682   /* p1 = skip_noops (p1, pend); */
4683
4684   assert (p1 >= bufp->buffer && p1 < pend
4685           && p2 >= bufp->buffer && p2 <= pend);
4686
4687   op2 = p2 == pend ? succeed : *p2;
4688
4689   switch (op2)
4690     {
4691     case succeed:
4692     case endbuf:
4693       /* If we're at the end of the pattern, we can change.  */
4694       if (skip_one_char (p1))
4695         {
4696           DEBUG_PRINT ("  End of pattern: fast loop.\n");
4697           return 1;
4698         }
4699       break;
4700
4701     case endline:
4702     case exactn:
4703       {
4704         register re_wchar_t c
4705           = (re_opcode_t) *p2 == endline ? '\n'
4706           : RE_STRING_CHAR (p2 + 2, multibyte);
4707
4708         if ((re_opcode_t) *p1 == exactn)
4709           {
4710             if (c != RE_STRING_CHAR (p1 + 2, multibyte))
4711               {
4712                 DEBUG_PRINT ("  '%c' != '%c' => fast loop.\n", c, p1[2]);
4713                 return 1;
4714               }
4715           }
4716
4717         else if ((re_opcode_t) *p1 == charset
4718                  || (re_opcode_t) *p1 == charset_not)
4719           {
4720             int not = (re_opcode_t) *p1 == charset_not;
4721
4722             /* Test if C is listed in charset (or charset_not)
4723                at `p1'.  */
4724             if (! multibyte || IS_REAL_ASCII (c))
4725               {
4726                 if (c < CHARSET_BITMAP_SIZE (p1) * BYTEWIDTH
4727                     && p1[2 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH)))
4728                   not = !not;
4729               }
4730             else if (CHARSET_RANGE_TABLE_EXISTS_P (p1))
4731               CHARSET_LOOKUP_RANGE_TABLE (not, c, p1);
4732
4733             /* `not' is equal to 1 if c would match, which means
4734                that we can't change to pop_failure_jump.  */
4735             if (!not)
4736               {
4737                 DEBUG_PRINT ("   No match => fast loop.\n");
4738                 return 1;
4739               }
4740           }
4741         else if ((re_opcode_t) *p1 == anychar
4742                  && c == '\n')
4743           {
4744             DEBUG_PRINT ("   . != \\n => fast loop.\n");
4745             return 1;
4746           }
4747       }
4748       break;
4749
4750     case charset:
4751       {
4752         if ((re_opcode_t) *p1 == exactn)
4753           /* Reuse the code above.  */
4754           return mutually_exclusive_p (bufp, p2, p1);
4755
4756       /* It is hard to list up all the character in charset
4757          P2 if it includes multibyte character.  Give up in
4758          such case.  */
4759       else if (!multibyte || !CHARSET_RANGE_TABLE_EXISTS_P (p2))
4760         {
4761           /* Now, we are sure that P2 has no range table.
4762              So, for the size of bitmap in P2, `p2[1]' is
4763              enough.  But P1 may have range table, so the
4764              size of bitmap table of P1 is extracted by
4765              using macro `CHARSET_BITMAP_SIZE'.
4766
4767              In a multibyte case, we know that all the character
4768              listed in P2 is ASCII.  In a unibyte case, P1 has only a
4769              bitmap table.  So, in both cases, it is enough to test
4770              only the bitmap table of P1.  */
4771
4772           if ((re_opcode_t) *p1 == charset)
4773             {
4774               int idx;
4775               /* We win if the charset inside the loop
4776                  has no overlap with the one after the loop.  */
4777               for (idx = 0;
4778                    (idx < (int) p2[1]
4779                     && idx < CHARSET_BITMAP_SIZE (p1));
4780                    idx++)
4781                 if ((p2[2 + idx] & p1[2 + idx]) != 0)
4782                   break;
4783
4784               if (idx == p2[1]
4785                   || idx == CHARSET_BITMAP_SIZE (p1))
4786                 {
4787                   DEBUG_PRINT ("         No match => fast loop.\n");
4788                   return 1;
4789                 }
4790             }
4791           else if ((re_opcode_t) *p1 == charset_not)
4792             {
4793               int idx;
4794               /* We win if the charset_not inside the loop lists
4795                  every character listed in the charset after.  */
4796               for (idx = 0; idx < (int) p2[1]; idx++)
4797                 if (! (p2[2 + idx] == 0
4798                        || (idx < CHARSET_BITMAP_SIZE (p1)
4799                            && ((p2[2 + idx] & ~ p1[2 + idx]) == 0))))
4800                   break;
4801
4802               if (idx == p2[1])
4803                 {
4804                   DEBUG_PRINT ("         No match => fast loop.\n");
4805                   return 1;
4806                 }
4807               }
4808           }
4809       }
4810       break;
4811
4812     case charset_not:
4813       switch (*p1)
4814         {
4815         case exactn:
4816         case charset:
4817           /* Reuse the code above.  */
4818           return mutually_exclusive_p (bufp, p2, p1);
4819         case charset_not:
4820           /* When we have two charset_not, it's very unlikely that
4821              they don't overlap.  The union of the two sets of excluded
4822              chars should cover all possible chars, which, as a matter of
4823              fact, is virtually impossible in multibyte buffers.  */
4824           break;
4825         }
4826       break;
4827
4828     case wordend:
4829       return ((re_opcode_t) *p1 == syntaxspec && p1[1] == Sword);
4830     case symend:
4831       return ((re_opcode_t) *p1 == syntaxspec
4832               && (p1[1] == Ssymbol || p1[1] == Sword));
4833     case notsyntaxspec:
4834       return ((re_opcode_t) *p1 == syntaxspec && p1[1] == p2[1]);
4835
4836     case wordbeg:
4837       return ((re_opcode_t) *p1 == notsyntaxspec && p1[1] == Sword);
4838     case symbeg:
4839       return ((re_opcode_t) *p1 == notsyntaxspec
4840               && (p1[1] == Ssymbol || p1[1] == Sword));
4841     case syntaxspec:
4842       return ((re_opcode_t) *p1 == notsyntaxspec && p1[1] == p2[1]);
4843
4844     case wordbound:
4845       return (((re_opcode_t) *p1 == notsyntaxspec
4846                || (re_opcode_t) *p1 == syntaxspec)
4847               && p1[1] == Sword);
4848
4849 #ifdef emacs
4850     case categoryspec:
4851       return ((re_opcode_t) *p1 == notcategoryspec && p1[1] == p2[1]);
4852     case notcategoryspec:
4853       return ((re_opcode_t) *p1 == categoryspec && p1[1] == p2[1]);
4854 #endif /* emacs */
4855
4856     default:
4857       ;
4858     }
4859
4860   /* Safe default.  */
4861   return 0;
4862 }
4863
4864 \f
4865 /* Matching routines.  */
4866
4867 #ifndef emacs   /* Emacs never uses this.  */
4868 /* re_match is like re_match_2 except it takes only a single string.  */
4869
4870 regoff_t
4871 re_match (struct re_pattern_buffer *bufp, const char *string,
4872           size_t size, ssize_t pos, struct re_registers *regs)
4873 {
4874   regoff_t result = re_match_2_internal (bufp, NULL, 0, (re_char*) string,
4875                                          size, pos, regs, size);
4876   return result;
4877 }
4878 WEAK_ALIAS (__re_match, re_match)
4879 #endif /* not emacs */
4880
4881 #ifdef emacs
4882 /* In Emacs, this is the string or buffer in which we
4883    are matching.  It is used for looking up syntax properties.  */
4884 Lisp_Object re_match_object;
4885 #endif
4886
4887 /* re_match_2 matches the compiled pattern in BUFP against the
4888    the (virtual) concatenation of STRING1 and STRING2 (of length SIZE1
4889    and SIZE2, respectively).  We start matching at POS, and stop
4890    matching at STOP.
4891
4892    If REGS is non-null and the `no_sub' field of BUFP is nonzero, we
4893    store offsets for the substring each group matched in REGS.  See the
4894    documentation for exactly how many groups we fill.
4895
4896    We return -1 if no match, -2 if an internal error (such as the
4897    failure stack overflowing).  Otherwise, we return the length of the
4898    matched substring.  */
4899
4900 regoff_t
4901 re_match_2 (struct re_pattern_buffer *bufp, const char *string1,
4902             size_t size1, const char *string2, size_t size2, ssize_t pos,
4903             struct re_registers *regs, ssize_t stop)
4904 {
4905   regoff_t result;
4906
4907 #ifdef emacs
4908   ssize_t charpos;
4909   gl_state.object = re_match_object; /* Used by SYNTAX_TABLE_BYTE_TO_CHAR. */
4910   charpos = SYNTAX_TABLE_BYTE_TO_CHAR (POS_AS_IN_BUFFER (pos));
4911   SETUP_SYNTAX_TABLE_FOR_OBJECT (re_match_object, charpos, 1);
4912 #endif
4913
4914   result = re_match_2_internal (bufp, (re_char*) string1, size1,
4915                                 (re_char*) string2, size2,
4916                                 pos, regs, stop);
4917   return result;
4918 }
4919 WEAK_ALIAS (__re_match_2, re_match_2)
4920
4921
4922 /* This is a separate function so that we can force an alloca cleanup
4923    afterwards.  */
4924 static regoff_t
4925 re_match_2_internal (struct re_pattern_buffer *bufp, const re_char *string1,
4926                      size_t size1, const re_char *string2, size_t size2,
4927                      ssize_t pos, struct re_registers *regs, ssize_t stop)
4928 {
4929   /* General temporaries.  */
4930   int mcnt;
4931   size_t reg;
4932
4933   /* Just past the end of the corresponding string.  */
4934   re_char *end1, *end2;
4935
4936   /* Pointers into string1 and string2, just past the last characters in
4937      each to consider matching.  */
4938   re_char *end_match_1, *end_match_2;
4939
4940   /* Where we are in the data, and the end of the current string.  */
4941   re_char *d, *dend;
4942
4943   /* Used sometimes to remember where we were before starting matching
4944      an operator so that we can go back in case of failure.  This "atomic"
4945      behavior of matching opcodes is indispensable to the correctness
4946      of the on_failure_keep_string_jump optimization.  */
4947   re_char *dfail;
4948
4949   /* Where we are in the pattern, and the end of the pattern.  */
4950   re_char *p = bufp->buffer;
4951   re_char *pend = p + bufp->used;
4952
4953   /* We use this to map every character in the string.  */
4954   RE_TRANSLATE_TYPE translate = bufp->translate;
4955
4956   /* Nonzero if BUFP is setup from a multibyte regex.  */
4957   const boolean multibyte = RE_MULTIBYTE_P (bufp);
4958
4959   /* Nonzero if STRING1/STRING2 are multibyte.  */
4960   const boolean target_multibyte = RE_TARGET_MULTIBYTE_P (bufp);
4961
4962   /* Failure point stack.  Each place that can handle a failure further
4963      down the line pushes a failure point on this stack.  It consists of
4964      regstart, and regend for all registers corresponding to
4965      the subexpressions we're currently inside, plus the number of such
4966      registers, and, finally, two char *'s.  The first char * is where
4967      to resume scanning the pattern; the second one is where to resume
4968      scanning the strings.  */
4969 #ifdef MATCH_MAY_ALLOCATE /* otherwise, this is global.  */
4970   fail_stack_type fail_stack;
4971 #endif
4972 #ifdef DEBUG_COMPILES_ARGUMENTS
4973   unsigned nfailure_points_pushed = 0, nfailure_points_popped = 0;
4974 #endif
4975
4976 #if defined REL_ALLOC && defined REGEX_MALLOC
4977   /* This holds the pointer to the failure stack, when
4978      it is allocated relocatably.  */
4979   fail_stack_elt_t *failure_stack_ptr;
4980 #endif
4981
4982   /* We fill all the registers internally, independent of what we
4983      return, for use in backreferences.  The number here includes
4984      an element for register zero.  */
4985   size_t num_regs = bufp->re_nsub + 1;
4986
4987   /* Information on the contents of registers. These are pointers into
4988      the input strings; they record just what was matched (on this
4989      attempt) by a subexpression part of the pattern, that is, the
4990      regnum-th regstart pointer points to where in the pattern we began
4991      matching and the regnum-th regend points to right after where we
4992      stopped matching the regnum-th subexpression.  (The zeroth register
4993      keeps track of what the whole pattern matches.)  */
4994 #ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global.  */
4995   re_char **regstart, **regend;
4996 #endif
4997
4998   /* The following record the register info as found in the above
4999      variables when we find a match better than any we've seen before.
5000      This happens as we backtrack through the failure points, which in
5001      turn happens only if we have not yet matched the entire string. */
5002   unsigned best_regs_set = false;
5003 #ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global.  */
5004   re_char **best_regstart, **best_regend;
5005 #endif
5006
5007   /* Logically, this is `best_regend[0]'.  But we don't want to have to
5008      allocate space for that if we're not allocating space for anything
5009      else (see below).  Also, we never need info about register 0 for
5010      any of the other register vectors, and it seems rather a kludge to
5011      treat `best_regend' differently than the rest.  So we keep track of
5012      the end of the best match so far in a separate variable.  We
5013      initialize this to NULL so that when we backtrack the first time
5014      and need to test it, it's not garbage.  */
5015   re_char *match_end = NULL;
5016
5017 #ifdef DEBUG_COMPILES_ARGUMENTS
5018   /* Counts the total number of registers pushed.  */
5019   unsigned num_regs_pushed = 0;
5020 #endif
5021
5022   DEBUG_PRINT ("\n\nEntering re_match_2.\n");
5023
5024   INIT_FAIL_STACK ();
5025
5026 #ifdef MATCH_MAY_ALLOCATE
5027   /* Do not bother to initialize all the register variables if there are
5028      no groups in the pattern, as it takes a fair amount of time.  If
5029      there are groups, we include space for register 0 (the whole
5030      pattern), even though we never use it, since it simplifies the
5031      array indexing.  We should fix this.  */
5032   if (bufp->re_nsub)
5033     {
5034       regstart = REGEX_TALLOC (num_regs, re_char *);
5035       regend = REGEX_TALLOC (num_regs, re_char *);
5036       best_regstart = REGEX_TALLOC (num_regs, re_char *);
5037       best_regend = REGEX_TALLOC (num_regs, re_char *);
5038
5039       if (!(regstart && regend && best_regstart && best_regend))
5040         {
5041           FREE_VARIABLES ();
5042           return -2;
5043         }
5044     }
5045   else
5046     {
5047       /* We must initialize all our variables to NULL, so that
5048          `FREE_VARIABLES' doesn't try to free them.  */
5049       regstart = regend = best_regstart = best_regend = NULL;
5050     }
5051 #endif /* MATCH_MAY_ALLOCATE */
5052
5053   /* The starting position is bogus.  */
5054   if (pos < 0 || pos > size1 + size2)
5055     {
5056       FREE_VARIABLES ();
5057       return -1;
5058     }
5059
5060   /* Initialize subexpression text positions to -1 to mark ones that no
5061      start_memory/stop_memory has been seen for. Also initialize the
5062      register information struct.  */
5063   for (reg = 1; reg < num_regs; reg++)
5064     regstart[reg] = regend[reg] = NULL;
5065
5066   /* We move `string1' into `string2' if the latter's empty -- but not if
5067      `string1' is null.  */
5068   if (size2 == 0 && string1 != NULL)
5069     {
5070       string2 = string1;
5071       size2 = size1;
5072       string1 = 0;
5073       size1 = 0;
5074     }
5075   end1 = string1 + size1;
5076   end2 = string2 + size2;
5077
5078   /* `p' scans through the pattern as `d' scans through the data.
5079      `dend' is the end of the input string that `d' points within.  `d'
5080      is advanced into the following input string whenever necessary, but
5081      this happens before fetching; therefore, at the beginning of the
5082      loop, `d' can be pointing at the end of a string, but it cannot
5083      equal `string2'.  */
5084   if (pos >= size1)
5085     {
5086       /* Only match within string2.  */
5087       d = string2 + pos - size1;
5088       dend = end_match_2 = string2 + stop - size1;
5089       end_match_1 = end1;       /* Just to give it a value.  */
5090     }
5091   else
5092     {
5093       if (stop < size1)
5094         {
5095           /* Only match within string1.  */
5096           end_match_1 = string1 + stop;
5097           /* BEWARE!
5098              When we reach end_match_1, PREFETCH normally switches to string2.
5099              But in the present case, this means that just doing a PREFETCH
5100              makes us jump from `stop' to `gap' within the string.
5101              What we really want here is for the search to stop as
5102              soon as we hit end_match_1.  That's why we set end_match_2
5103              to end_match_1 (since PREFETCH fails as soon as we hit
5104              end_match_2).  */
5105           end_match_2 = end_match_1;
5106         }
5107       else
5108         { /* It's important to use this code when stop == size so that
5109              moving `d' from end1 to string2 will not prevent the d == dend
5110              check from catching the end of string.  */
5111           end_match_1 = end1;
5112           end_match_2 = string2 + stop - size1;
5113         }
5114       d = string1 + pos;
5115       dend = end_match_1;
5116     }
5117
5118   DEBUG_PRINT ("The compiled pattern is: ");
5119   DEBUG_PRINT_COMPILED_PATTERN (bufp, p, pend);
5120   DEBUG_PRINT ("The string to match is: `");
5121   DEBUG_PRINT_DOUBLE_STRING (d, string1, size1, string2, size2);
5122   DEBUG_PRINT ("'\n");
5123
5124   /* This loops over pattern commands.  It exits by returning from the
5125      function if the match is complete, or it drops through if the match
5126      fails at this starting point in the input data.  */
5127   for (;;)
5128     {
5129       DEBUG_PRINT ("\n%p: ", p);
5130
5131       if (p == pend)
5132         {
5133           ptrdiff_t dcnt;
5134
5135           /* End of pattern means we might have succeeded.  */
5136           DEBUG_PRINT ("end of pattern ... ");
5137
5138           /* If we haven't matched the entire string, and we want the
5139              longest match, try backtracking.  */
5140           if (d != end_match_2)
5141             {
5142               /* 1 if this match ends in the same string (string1 or string2)
5143                  as the best previous match.  */
5144               boolean same_str_p = (FIRST_STRING_P (match_end)
5145                                     == FIRST_STRING_P (d));
5146               /* 1 if this match is the best seen so far.  */
5147               boolean best_match_p;
5148
5149               /* AIX compiler got confused when this was combined
5150                  with the previous declaration.  */
5151               if (same_str_p)
5152                 best_match_p = d > match_end;
5153               else
5154                 best_match_p = !FIRST_STRING_P (d);
5155
5156               DEBUG_PRINT ("backtracking.\n");
5157
5158               if (!FAIL_STACK_EMPTY ())
5159                 { /* More failure points to try.  */
5160
5161                   /* If exceeds best match so far, save it.  */
5162                   if (!best_regs_set || best_match_p)
5163                     {
5164                       best_regs_set = true;
5165                       match_end = d;
5166
5167                       DEBUG_PRINT ("\nSAVING match as best so far.\n");
5168
5169                       for (reg = 1; reg < num_regs; reg++)
5170                         {
5171                           best_regstart[reg] = regstart[reg];
5172                           best_regend[reg] = regend[reg];
5173                         }
5174                     }
5175                   goto fail;
5176                 }
5177
5178               /* If no failure points, don't restore garbage.  And if
5179                  last match is real best match, don't restore second
5180                  best one. */
5181               else if (best_regs_set && !best_match_p)
5182                 {
5183                 restore_best_regs:
5184                   /* Restore best match.  It may happen that `dend ==
5185                      end_match_1' while the restored d is in string2.
5186                      For example, the pattern `x.*y.*z' against the
5187                      strings `x-' and `y-z-', if the two strings are
5188                      not consecutive in memory.  */
5189                   DEBUG_PRINT ("Restoring best registers.\n");
5190
5191                   d = match_end;
5192                   dend = ((d >= string1 && d <= end1)
5193                            ? end_match_1 : end_match_2);
5194
5195                   for (reg = 1; reg < num_regs; reg++)
5196                     {
5197                       regstart[reg] = best_regstart[reg];
5198                       regend[reg] = best_regend[reg];
5199                     }
5200                 }
5201             } /* d != end_match_2 */
5202
5203         succeed_label:
5204           DEBUG_PRINT ("Accepting match.\n");
5205
5206           /* If caller wants register contents data back, do it.  */
5207           if (regs && !bufp->no_sub)
5208             {
5209               /* Have the register data arrays been allocated?  */
5210               if (bufp->regs_allocated == REGS_UNALLOCATED)
5211                 { /* No.  So allocate them with malloc.  We need one
5212                      extra element beyond `num_regs' for the `-1' marker
5213                      GNU code uses.  */
5214                   regs->num_regs = MAX (RE_NREGS, num_regs + 1);
5215                   regs->start = TALLOC (regs->num_regs, regoff_t);
5216                   regs->end = TALLOC (regs->num_regs, regoff_t);
5217                   if (regs->start == NULL || regs->end == NULL)
5218                     {
5219                       FREE_VARIABLES ();
5220                       return -2;
5221                     }
5222                   bufp->regs_allocated = REGS_REALLOCATE;
5223                 }
5224               else if (bufp->regs_allocated == REGS_REALLOCATE)
5225                 { /* Yes.  If we need more elements than were already
5226                      allocated, reallocate them.  If we need fewer, just
5227                      leave it alone.  */
5228                   if (regs->num_regs < num_regs + 1)
5229                     {
5230                       regs->num_regs = num_regs + 1;
5231                       RETALLOC (regs->start, regs->num_regs, regoff_t);
5232                       RETALLOC (regs->end, regs->num_regs, regoff_t);
5233                       if (regs->start == NULL || regs->end == NULL)
5234                         {
5235                           FREE_VARIABLES ();
5236                           return -2;
5237                         }
5238                     }
5239                 }
5240               else
5241                 {
5242                   /* These braces fend off a "empty body in an else-statement"
5243                      warning under GCC when assert expands to nothing.  */
5244                   assert (bufp->regs_allocated == REGS_FIXED);
5245                 }
5246
5247               /* Convert the pointer data in `regstart' and `regend' to
5248                  indices.  Register zero has to be set differently,
5249                  since we haven't kept track of any info for it.  */
5250               if (regs->num_regs > 0)
5251                 {
5252                   regs->start[0] = pos;
5253                   regs->end[0] = POINTER_TO_OFFSET (d);
5254                 }
5255
5256               /* Go through the first `min (num_regs, regs->num_regs)'
5257                  registers, since that is all we initialized.  */
5258               for (reg = 1; reg < MIN (num_regs, regs->num_regs); reg++)
5259                 {
5260                   if (REG_UNSET (regstart[reg]) || REG_UNSET (regend[reg]))
5261                     regs->start[reg] = regs->end[reg] = -1;
5262                   else
5263                     {
5264                       regs->start[reg] = POINTER_TO_OFFSET (regstart[reg]);
5265                       regs->end[reg] = POINTER_TO_OFFSET (regend[reg]);
5266                     }
5267                 }
5268
5269               /* If the regs structure we return has more elements than
5270                  were in the pattern, set the extra elements to -1.  If
5271                  we (re)allocated the registers, this is the case,
5272                  because we always allocate enough to have at least one
5273                  -1 at the end.  */
5274               for (reg = num_regs; reg < regs->num_regs; reg++)
5275                 regs->start[reg] = regs->end[reg] = -1;
5276             } /* regs && !bufp->no_sub */
5277
5278           DEBUG_PRINT ("%u failure points pushed, %u popped (%u remain).\n",
5279                        nfailure_points_pushed, nfailure_points_popped,
5280                        nfailure_points_pushed - nfailure_points_popped);
5281           DEBUG_PRINT ("%u registers pushed.\n", num_regs_pushed);
5282
5283           dcnt = POINTER_TO_OFFSET (d) - pos;
5284
5285           DEBUG_PRINT ("Returning %td from re_match_2.\n", dcnt);
5286
5287           FREE_VARIABLES ();
5288           return dcnt;
5289         }
5290
5291       /* Otherwise match next pattern command.  */
5292       switch (*p++)
5293         {
5294         /* Ignore these.  Used to ignore the n of succeed_n's which
5295            currently have n == 0.  */
5296         case no_op:
5297           DEBUG_PRINT ("EXECUTING no_op.\n");
5298           break;
5299
5300         case succeed:
5301           DEBUG_PRINT ("EXECUTING succeed.\n");
5302           goto succeed_label;
5303
5304         /* Match the next n pattern characters exactly.  The following
5305            byte in the pattern defines n, and the n bytes after that
5306            are the characters to match.  */
5307         case exactn:
5308           mcnt = *p++;
5309           DEBUG_PRINT ("EXECUTING exactn %d.\n", mcnt);
5310
5311           /* Remember the start point to rollback upon failure.  */
5312           dfail = d;
5313
5314 #ifndef emacs
5315           /* This is written out as an if-else so we don't waste time
5316              testing `translate' inside the loop.  */
5317           if (RE_TRANSLATE_P (translate))
5318             do
5319               {
5320                 PREFETCH ();
5321                 if (RE_TRANSLATE (translate, *d) != *p++)
5322                   {
5323                     d = dfail;
5324                     goto fail;
5325                   }
5326                 d++;
5327               }
5328             while (--mcnt);
5329           else
5330             do
5331               {
5332                 PREFETCH ();
5333                 if (*d++ != *p++)
5334                   {
5335                     d = dfail;
5336                     goto fail;
5337                   }
5338               }
5339             while (--mcnt);
5340 #else  /* emacs */
5341           /* The cost of testing `translate' is comparatively small.  */
5342           if (target_multibyte)
5343             do
5344               {
5345                 int pat_charlen, buf_charlen;
5346                 int pat_ch, buf_ch;
5347
5348                 PREFETCH ();
5349                 if (multibyte)
5350                   pat_ch = STRING_CHAR_AND_LENGTH (p, pat_charlen);
5351                 else
5352                   {
5353                     pat_ch = RE_CHAR_TO_MULTIBYTE (*p);
5354                     pat_charlen = 1;
5355                   }
5356                 buf_ch = STRING_CHAR_AND_LENGTH (d, buf_charlen);
5357
5358                 if (TRANSLATE (buf_ch) != pat_ch)
5359                   {
5360                     d = dfail;
5361                     goto fail;
5362                   }
5363
5364                 p += pat_charlen;
5365                 d += buf_charlen;
5366                 mcnt -= pat_charlen;
5367               }
5368             while (mcnt > 0);
5369           else
5370             do
5371               {
5372                 int pat_charlen;
5373                 int pat_ch, buf_ch;
5374
5375                 PREFETCH ();
5376                 if (multibyte)
5377                   {
5378                     pat_ch = STRING_CHAR_AND_LENGTH (p, pat_charlen);
5379                     pat_ch = RE_CHAR_TO_UNIBYTE (pat_ch);
5380                   }
5381                 else
5382                   {
5383                     pat_ch = *p;
5384                     pat_charlen = 1;
5385                   }
5386                 buf_ch = RE_CHAR_TO_MULTIBYTE (*d);
5387                 if (! CHAR_BYTE8_P (buf_ch))
5388                   {
5389                     buf_ch = TRANSLATE (buf_ch);
5390                     buf_ch = RE_CHAR_TO_UNIBYTE (buf_ch);
5391                     if (buf_ch < 0)
5392                       buf_ch = *d;
5393                   }
5394                 else
5395                   buf_ch = *d;
5396                 if (buf_ch != pat_ch)
5397                   {
5398                     d = dfail;
5399                     goto fail;
5400                   }
5401                 p += pat_charlen;
5402                 d++;
5403               }
5404             while (--mcnt);
5405 #endif
5406           break;
5407
5408
5409         /* Match any character except possibly a newline or a null.  */
5410         case anychar:
5411           {
5412             int buf_charlen;
5413             re_wchar_t buf_ch;
5414
5415             DEBUG_PRINT ("EXECUTING anychar.\n");
5416
5417             PREFETCH ();
5418             buf_ch = RE_STRING_CHAR_AND_LENGTH (d, buf_charlen,
5419                                                 target_multibyte);
5420             buf_ch = TRANSLATE (buf_ch);
5421
5422             if ((!(bufp->syntax & RE_DOT_NEWLINE)
5423                  && buf_ch == '\n')
5424                 || ((bufp->syntax & RE_DOT_NOT_NULL)
5425                     && buf_ch == '\000'))
5426               goto fail;
5427
5428             DEBUG_PRINT ("  Matched `%d'.\n", *d);
5429             d += buf_charlen;
5430           }
5431           break;
5432
5433
5434         case charset:
5435         case charset_not:
5436           {
5437             register unsigned int c;
5438             boolean not = (re_opcode_t) *(p - 1) == charset_not;
5439             int len;
5440
5441             /* Start of actual range_table, or end of bitmap if there is no
5442                range table.  */
5443             re_char *range_table IF_LINT (= NULL);
5444
5445             /* Nonzero if there is a range table.  */
5446             int range_table_exists;
5447
5448             /* Number of ranges of range table.  This is not included
5449                in the initial byte-length of the command.  */
5450             int count = 0;
5451
5452             /* Whether matching against a unibyte character.  */
5453             boolean unibyte_char = false;
5454
5455             DEBUG_PRINT ("EXECUTING charset%s.\n", not ? "_not" : "");
5456
5457             range_table_exists = CHARSET_RANGE_TABLE_EXISTS_P (&p[-1]);
5458
5459             if (range_table_exists)
5460               {
5461                 range_table = CHARSET_RANGE_TABLE (&p[-1]); /* Past the bitmap.  */
5462                 EXTRACT_NUMBER_AND_INCR (count, range_table);
5463               }
5464
5465             PREFETCH ();
5466             c = RE_STRING_CHAR_AND_LENGTH (d, len, target_multibyte);
5467             if (target_multibyte)
5468               {
5469                 int c1;
5470
5471                 c = TRANSLATE (c);
5472                 c1 = RE_CHAR_TO_UNIBYTE (c);
5473                 if (c1 >= 0)
5474                   {
5475                     unibyte_char = true;
5476                     c = c1;
5477                   }
5478               }
5479             else
5480               {
5481                 int c1 = RE_CHAR_TO_MULTIBYTE (c);
5482
5483                 if (! CHAR_BYTE8_P (c1))
5484                   {
5485                     c1 = TRANSLATE (c1);
5486                     c1 = RE_CHAR_TO_UNIBYTE (c1);
5487                     if (c1 >= 0)
5488                       {
5489                         unibyte_char = true;
5490                         c = c1;
5491                       }
5492                   }
5493                 else
5494                   unibyte_char = true;
5495               }
5496
5497             if (unibyte_char && c < (1 << BYTEWIDTH))
5498               {                 /* Lookup bitmap.  */
5499                 /* Cast to `unsigned' instead of `unsigned char' in
5500                    case the bit list is a full 32 bytes long.  */
5501                 if (c < (unsigned) (CHARSET_BITMAP_SIZE (&p[-1]) * BYTEWIDTH)
5502                     && p[1 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH)))
5503                   not = !not;
5504               }
5505 #ifdef emacs
5506             else if (range_table_exists)
5507               {
5508                 int class_bits = CHARSET_RANGE_TABLE_BITS (&p[-1]);
5509
5510                 if (  (class_bits & BIT_LOWER && ISLOWER (c))
5511                     | (class_bits & BIT_MULTIBYTE)
5512                     | (class_bits & BIT_PUNCT && ISPUNCT (c))
5513                     | (class_bits & BIT_SPACE && ISSPACE (c))
5514                     | (class_bits & BIT_UPPER && ISUPPER (c))
5515                     | (class_bits & BIT_WORD  && ISWORD (c)))
5516                   not = !not;
5517                 else
5518                   CHARSET_LOOKUP_RANGE_TABLE_RAW (not, c, range_table, count);
5519               }
5520 #endif /* emacs */
5521
5522             if (range_table_exists)
5523               p = CHARSET_RANGE_TABLE_END (range_table, count);
5524             else
5525               p += CHARSET_BITMAP_SIZE (&p[-1]) + 1;
5526
5527             if (!not) goto fail;
5528
5529             d += len;
5530           }
5531           break;
5532
5533
5534         /* The beginning of a group is represented by start_memory.
5535            The argument is the register number.  The text
5536            matched within the group is recorded (in the internal
5537            registers data structure) under the register number.  */
5538         case start_memory:
5539           DEBUG_PRINT ("EXECUTING start_memory %d:\n", *p);
5540
5541           /* In case we need to undo this operation (via backtracking).  */
5542           PUSH_FAILURE_REG (*p);
5543
5544           regstart[*p] = d;
5545           regend[*p] = NULL;    /* probably unnecessary.  -sm  */
5546           DEBUG_PRINT ("  regstart: %td\n", POINTER_TO_OFFSET (regstart[*p]));
5547
5548           /* Move past the register number and inner group count.  */
5549           p += 1;
5550           break;
5551
5552
5553         /* The stop_memory opcode represents the end of a group.  Its
5554            argument is the same as start_memory's: the register number.  */
5555         case stop_memory:
5556           DEBUG_PRINT ("EXECUTING stop_memory %d:\n", *p);
5557
5558           assert (!REG_UNSET (regstart[*p]));
5559           /* Strictly speaking, there should be code such as:
5560
5561                 assert (REG_UNSET (regend[*p]));
5562                 PUSH_FAILURE_REGSTOP ((unsigned int)*p);
5563
5564              But the only info to be pushed is regend[*p] and it is known to
5565              be UNSET, so there really isn't anything to push.
5566              Not pushing anything, on the other hand deprives us from the
5567              guarantee that regend[*p] is UNSET since undoing this operation
5568              will not reset its value properly.  This is not important since
5569              the value will only be read on the next start_memory or at
5570              the very end and both events can only happen if this stop_memory
5571              is *not* undone.  */
5572
5573           regend[*p] = d;
5574           DEBUG_PRINT ("      regend: %td\n", POINTER_TO_OFFSET (regend[*p]));
5575
5576           /* Move past the register number and the inner group count.  */
5577           p += 1;
5578           break;
5579
5580
5581         /* \<digit> has been turned into a `duplicate' command which is
5582            followed by the numeric value of <digit> as the register number.  */
5583         case duplicate:
5584           {
5585             register re_char *d2, *dend2;
5586             int regno = *p++;   /* Get which register to match against.  */
5587             DEBUG_PRINT ("EXECUTING duplicate %d.\n", regno);
5588
5589             /* Can't back reference a group which we've never matched.  */
5590             if (REG_UNSET (regstart[regno]) || REG_UNSET (regend[regno]))
5591               goto fail;
5592
5593             /* Where in input to try to start matching.  */
5594             d2 = regstart[regno];
5595
5596             /* Remember the start point to rollback upon failure.  */
5597             dfail = d;
5598
5599             /* Where to stop matching; if both the place to start and
5600                the place to stop matching are in the same string, then
5601                set to the place to stop, otherwise, for now have to use
5602                the end of the first string.  */
5603
5604             dend2 = ((FIRST_STRING_P (regstart[regno])
5605                       == FIRST_STRING_P (regend[regno]))
5606                      ? regend[regno] : end_match_1);
5607             for (;;)
5608               {
5609                 ptrdiff_t dcnt;
5610
5611                 /* If necessary, advance to next segment in register
5612                    contents.  */
5613                 while (d2 == dend2)
5614                   {
5615                     if (dend2 == end_match_2) break;
5616                     if (dend2 == regend[regno]) break;
5617
5618                     /* End of string1 => advance to string2. */
5619                     d2 = string2;
5620                     dend2 = regend[regno];
5621                   }
5622                 /* At end of register contents => success */
5623                 if (d2 == dend2) break;
5624
5625                 /* If necessary, advance to next segment in data.  */
5626                 PREFETCH ();
5627
5628                 /* How many characters left in this segment to match.  */
5629                 dcnt = dend - d;
5630
5631                 /* Want how many consecutive characters we can match in
5632                    one shot, so, if necessary, adjust the count.  */
5633                 if (dcnt > dend2 - d2)
5634                   dcnt = dend2 - d2;
5635
5636                 /* Compare that many; failure if mismatch, else move
5637                    past them.  */
5638                 if (RE_TRANSLATE_P (translate)
5639                     ? bcmp_translate (d, d2, dcnt, translate, target_multibyte)
5640                     : memcmp (d, d2, dcnt))
5641                   {
5642                     d = dfail;
5643                     goto fail;
5644                   }
5645                 d += dcnt, d2 += dcnt;
5646               }
5647           }
5648           break;
5649
5650
5651         /* begline matches the empty string at the beginning of the string
5652            (unless `not_bol' is set in `bufp'), and after newlines.  */
5653         case begline:
5654           DEBUG_PRINT ("EXECUTING begline.\n");
5655
5656           if (AT_STRINGS_BEG (d))
5657             {
5658               if (!bufp->not_bol) break;
5659             }
5660           else
5661             {
5662               unsigned c;
5663               GET_CHAR_BEFORE_2 (c, d, string1, end1, string2, end2);
5664               if (c == '\n')
5665                 break;
5666             }
5667           /* In all other cases, we fail.  */
5668           goto fail;
5669
5670
5671         /* endline is the dual of begline.  */
5672         case endline:
5673           DEBUG_PRINT ("EXECUTING endline.\n");
5674
5675           if (AT_STRINGS_END (d))
5676             {
5677               if (!bufp->not_eol) break;
5678             }
5679           else
5680             {
5681               PREFETCH_NOLIMIT ();
5682               if (*d == '\n')
5683                 break;
5684             }
5685           goto fail;
5686
5687
5688         /* Match at the very beginning of the data.  */
5689         case begbuf:
5690           DEBUG_PRINT ("EXECUTING begbuf.\n");
5691           if (AT_STRINGS_BEG (d))
5692             break;
5693           goto fail;
5694
5695
5696         /* Match at the very end of the data.  */
5697         case endbuf:
5698           DEBUG_PRINT ("EXECUTING endbuf.\n");
5699           if (AT_STRINGS_END (d))
5700             break;
5701           goto fail;
5702
5703
5704         /* on_failure_keep_string_jump is used to optimize `.*\n'.  It
5705            pushes NULL as the value for the string on the stack.  Then
5706            `POP_FAILURE_POINT' will keep the current value for the
5707            string, instead of restoring it.  To see why, consider
5708            matching `foo\nbar' against `.*\n'.  The .* matches the foo;
5709            then the . fails against the \n.  But the next thing we want
5710            to do is match the \n against the \n; if we restored the
5711            string value, we would be back at the foo.
5712
5713            Because this is used only in specific cases, we don't need to
5714            check all the things that `on_failure_jump' does, to make
5715            sure the right things get saved on the stack.  Hence we don't
5716            share its code.  The only reason to push anything on the
5717            stack at all is that otherwise we would have to change
5718            `anychar's code to do something besides goto fail in this
5719            case; that seems worse than this.  */
5720         case on_failure_keep_string_jump:
5721           EXTRACT_NUMBER_AND_INCR (mcnt, p);
5722           DEBUG_PRINT ("EXECUTING on_failure_keep_string_jump %d (to %p):\n",
5723                        mcnt, p + mcnt);
5724
5725           PUSH_FAILURE_POINT (p - 3, NULL);
5726           break;
5727
5728           /* A nasty loop is introduced by the non-greedy *? and +?.
5729              With such loops, the stack only ever contains one failure point
5730              at a time, so that a plain on_failure_jump_loop kind of
5731              cycle detection cannot work.  Worse yet, such a detection
5732              can not only fail to detect a cycle, but it can also wrongly
5733              detect a cycle (between different instantiations of the same
5734              loop).
5735              So the method used for those nasty loops is a little different:
5736              We use a special cycle-detection-stack-frame which is pushed
5737              when the on_failure_jump_nastyloop failure-point is *popped*.
5738              This special frame thus marks the beginning of one iteration
5739              through the loop and we can hence easily check right here
5740              whether something matched between the beginning and the end of
5741              the loop.  */
5742         case on_failure_jump_nastyloop:
5743           EXTRACT_NUMBER_AND_INCR (mcnt, p);
5744           DEBUG_PRINT ("EXECUTING on_failure_jump_nastyloop %d (to %p):\n",
5745                        mcnt, p + mcnt);
5746
5747           assert ((re_opcode_t)p[-4] == no_op);
5748           {
5749             int cycle = 0;
5750             CHECK_INFINITE_LOOP (p - 4, d);
5751             if (!cycle)
5752               /* If there's a cycle, just continue without pushing
5753                  this failure point.  The failure point is the "try again"
5754                  option, which shouldn't be tried.
5755                  We want (x?)*?y\1z to match both xxyz and xxyxz.  */
5756               PUSH_FAILURE_POINT (p - 3, d);
5757           }
5758           break;
5759
5760           /* Simple loop detecting on_failure_jump:  just check on the
5761              failure stack if the same spot was already hit earlier.  */
5762         case on_failure_jump_loop:
5763         on_failure:
5764           EXTRACT_NUMBER_AND_INCR (mcnt, p);
5765           DEBUG_PRINT ("EXECUTING on_failure_jump_loop %d (to %p):\n",
5766                        mcnt, p + mcnt);
5767           {
5768             int cycle = 0;
5769             CHECK_INFINITE_LOOP (p - 3, d);
5770             if (cycle)
5771               /* If there's a cycle, get out of the loop, as if the matching
5772                  had failed.  We used to just `goto fail' here, but that was
5773                  aborting the search a bit too early: we want to keep the
5774                  empty-loop-match and keep matching after the loop.
5775                  We want (x?)*y\1z to match both xxyz and xxyxz.  */
5776               p += mcnt;
5777             else
5778               PUSH_FAILURE_POINT (p - 3, d);
5779           }
5780           break;
5781
5782
5783         /* Uses of on_failure_jump:
5784
5785            Each alternative starts with an on_failure_jump that points
5786            to the beginning of the next alternative.  Each alternative
5787            except the last ends with a jump that in effect jumps past
5788            the rest of the alternatives.  (They really jump to the
5789            ending jump of the following alternative, because tensioning
5790            these jumps is a hassle.)
5791
5792            Repeats start with an on_failure_jump that points past both
5793            the repetition text and either the following jump or
5794            pop_failure_jump back to this on_failure_jump.  */
5795         case on_failure_jump:
5796           EXTRACT_NUMBER_AND_INCR (mcnt, p);
5797           DEBUG_PRINT ("EXECUTING on_failure_jump %d (to %p):\n",
5798                        mcnt, p + mcnt);
5799
5800           PUSH_FAILURE_POINT (p -3, d);
5801           break;
5802
5803         /* This operation is used for greedy *.
5804            Compare the beginning of the repeat with what in the
5805            pattern follows its end. If we can establish that there
5806            is nothing that they would both match, i.e., that we
5807            would have to backtrack because of (as in, e.g., `a*a')
5808            then we can use a non-backtracking loop based on
5809            on_failure_keep_string_jump instead of on_failure_jump.  */
5810         case on_failure_jump_smart:
5811           EXTRACT_NUMBER_AND_INCR (mcnt, p);
5812           DEBUG_PRINT ("EXECUTING on_failure_jump_smart %d (to %p).\n",
5813                        mcnt, p + mcnt);
5814           {
5815             re_char *p1 = p; /* Next operation.  */
5816             /* Here, we discard `const', making re_match non-reentrant.  */
5817             unsigned char *p2 = (unsigned char*) p + mcnt; /* Jump dest.  */
5818             unsigned char *p3 = (unsigned char*) p - 3; /* opcode location.  */
5819
5820             p -= 3;             /* Reset so that we will re-execute the
5821                                    instruction once it's been changed. */
5822
5823             EXTRACT_NUMBER (mcnt, p2 - 2);
5824
5825             /* Ensure this is a indeed the trivial kind of loop
5826                we are expecting.  */
5827             assert (skip_one_char (p1) == p2 - 3);
5828             assert ((re_opcode_t) p2[-3] == jump && p2 + mcnt == p);
5829             DEBUG_STATEMENT (debug += 2);
5830             if (mutually_exclusive_p (bufp, p1, p2))
5831               {
5832                 /* Use a fast `on_failure_keep_string_jump' loop.  */
5833                 DEBUG_PRINT ("  smart exclusive => fast loop.\n");
5834                 *p3 = (unsigned char) on_failure_keep_string_jump;
5835                 STORE_NUMBER (p2 - 2, mcnt + 3);
5836               }
5837             else
5838               {
5839                 /* Default to a safe `on_failure_jump' loop.  */
5840                 DEBUG_PRINT ("  smart default => slow loop.\n");
5841                 *p3 = (unsigned char) on_failure_jump;
5842               }
5843             DEBUG_STATEMENT (debug -= 2);
5844           }
5845           break;
5846
5847         /* Unconditionally jump (without popping any failure points).  */
5848         case jump:
5849         unconditional_jump:
5850           IMMEDIATE_QUIT_CHECK;
5851           EXTRACT_NUMBER_AND_INCR (mcnt, p);    /* Get the amount to jump.  */
5852           DEBUG_PRINT ("EXECUTING jump %d ", mcnt);
5853           p += mcnt;                            /* Do the jump.  */
5854           DEBUG_PRINT ("(to %p).\n", p);
5855           break;
5856
5857
5858         /* Have to succeed matching what follows at least n times.
5859            After that, handle like `on_failure_jump'.  */
5860         case succeed_n:
5861           /* Signedness doesn't matter since we only compare MCNT to 0.  */
5862           EXTRACT_NUMBER (mcnt, p + 2);
5863           DEBUG_PRINT ("EXECUTING succeed_n %d.\n", mcnt);
5864
5865           /* Originally, mcnt is how many times we HAVE to succeed.  */
5866           if (mcnt != 0)
5867             {
5868               /* Here, we discard `const', making re_match non-reentrant.  */
5869               unsigned char *p2 = (unsigned char*) p + 2; /* counter loc.  */
5870               mcnt--;
5871               p += 4;
5872               PUSH_NUMBER (p2, mcnt);
5873             }
5874           else
5875             /* The two bytes encoding mcnt == 0 are two no_op opcodes.  */
5876             goto on_failure;
5877           break;
5878
5879         case jump_n:
5880           /* Signedness doesn't matter since we only compare MCNT to 0.  */
5881           EXTRACT_NUMBER (mcnt, p + 2);
5882           DEBUG_PRINT ("EXECUTING jump_n %d.\n", mcnt);
5883
5884           /* Originally, this is how many times we CAN jump.  */
5885           if (mcnt != 0)
5886             {
5887                /* Here, we discard `const', making re_match non-reentrant.  */
5888               unsigned char *p2 = (unsigned char*) p + 2; /* counter loc.  */
5889               mcnt--;
5890               PUSH_NUMBER (p2, mcnt);
5891               goto unconditional_jump;
5892             }
5893           /* If don't have to jump any more, skip over the rest of command.  */
5894           else
5895             p += 4;
5896           break;
5897
5898         case set_number_at:
5899           {
5900             unsigned char *p2;  /* Location of the counter.  */
5901             DEBUG_PRINT ("EXECUTING set_number_at.\n");
5902
5903             EXTRACT_NUMBER_AND_INCR (mcnt, p);
5904             /* Here, we discard `const', making re_match non-reentrant.  */
5905             p2 = (unsigned char*) p + mcnt;
5906             /* Signedness doesn't matter since we only copy MCNT's bits .  */
5907             EXTRACT_NUMBER_AND_INCR (mcnt, p);
5908             DEBUG_PRINT ("  Setting %p to %d.\n", p2, mcnt);
5909             PUSH_NUMBER (p2, mcnt);
5910             break;
5911           }
5912
5913         case wordbound:
5914         case notwordbound:
5915           {
5916             boolean not = (re_opcode_t) *(p - 1) == notwordbound;
5917             DEBUG_PRINT ("EXECUTING %swordbound.\n", not ? "not" : "");
5918
5919             /* We SUCCEED (or FAIL) in one of the following cases: */
5920
5921             /* Case 1: D is at the beginning or the end of string.  */
5922             if (AT_STRINGS_BEG (d) || AT_STRINGS_END (d))
5923               not = !not;
5924             else
5925               {
5926                 /* C1 is the character before D, S1 is the syntax of C1, C2
5927                    is the character at D, and S2 is the syntax of C2.  */
5928                 re_wchar_t c1, c2;
5929                 int s1, s2;
5930                 int dummy;
5931 #ifdef emacs
5932                 ssize_t offset = PTR_TO_OFFSET (d - 1);
5933                 ssize_t charpos = SYNTAX_TABLE_BYTE_TO_CHAR (offset);
5934                 UPDATE_SYNTAX_TABLE (charpos);
5935 #endif
5936                 GET_CHAR_BEFORE_2 (c1, d, string1, end1, string2, end2);
5937                 s1 = SYNTAX (c1);
5938 #ifdef emacs
5939                 UPDATE_SYNTAX_TABLE_FORWARD (charpos + 1);
5940 #endif
5941                 PREFETCH_NOLIMIT ();
5942                 GET_CHAR_AFTER (c2, d, dummy);
5943                 s2 = SYNTAX (c2);
5944
5945                 if (/* Case 2: Only one of S1 and S2 is Sword.  */
5946                     ((s1 == Sword) != (s2 == Sword))
5947                     /* Case 3: Both of S1 and S2 are Sword, and macro
5948                        WORD_BOUNDARY_P (C1, C2) returns nonzero.  */
5949                     || ((s1 == Sword) && WORD_BOUNDARY_P (c1, c2)))
5950                   not = !not;
5951               }
5952             if (not)
5953               break;
5954             else
5955               goto fail;
5956           }
5957
5958         case wordbeg:
5959           DEBUG_PRINT ("EXECUTING wordbeg.\n");
5960
5961           /* We FAIL in one of the following cases: */
5962
5963           /* Case 1: D is at the end of string.  */
5964           if (AT_STRINGS_END (d))
5965             goto fail;
5966           else
5967             {
5968               /* C1 is the character before D, S1 is the syntax of C1, C2
5969                  is the character at D, and S2 is the syntax of C2.  */
5970               re_wchar_t c1, c2;
5971               int s1, s2;
5972               int dummy;
5973 #ifdef emacs
5974               ssize_t offset = PTR_TO_OFFSET (d);
5975               ssize_t charpos = SYNTAX_TABLE_BYTE_TO_CHAR (offset);
5976               UPDATE_SYNTAX_TABLE (charpos);
5977 #endif
5978               PREFETCH ();
5979               GET_CHAR_AFTER (c2, d, dummy);
5980               s2 = SYNTAX (c2);
5981
5982               /* Case 2: S2 is not Sword. */
5983               if (s2 != Sword)
5984                 goto fail;
5985
5986               /* Case 3: D is not at the beginning of string ... */
5987               if (!AT_STRINGS_BEG (d))
5988                 {
5989                   GET_CHAR_BEFORE_2 (c1, d, string1, end1, string2, end2);
5990 #ifdef emacs
5991                   UPDATE_SYNTAX_TABLE_BACKWARD (charpos - 1);
5992 #endif
5993                   s1 = SYNTAX (c1);
5994
5995                   /* ... and S1 is Sword, and WORD_BOUNDARY_P (C1, C2)
5996                      returns 0.  */
5997                   if ((s1 == Sword) && !WORD_BOUNDARY_P (c1, c2))
5998                     goto fail;
5999                 }
6000             }
6001           break;
6002
6003         case wordend:
6004           DEBUG_PRINT ("EXECUTING wordend.\n");
6005
6006           /* We FAIL in one of the following cases: */
6007
6008           /* Case 1: D is at the beginning of string.  */
6009           if (AT_STRINGS_BEG (d))
6010             goto fail;
6011           else
6012             {
6013               /* C1 is the character before D, S1 is the syntax of C1, C2
6014                  is the character at D, and S2 is the syntax of C2.  */
6015               re_wchar_t c1, c2;
6016               int s1, s2;
6017               int dummy;
6018 #ifdef emacs
6019               ssize_t offset = PTR_TO_OFFSET (d) - 1;
6020               ssize_t charpos = SYNTAX_TABLE_BYTE_TO_CHAR (offset);
6021               UPDATE_SYNTAX_TABLE (charpos);
6022 #endif
6023               GET_CHAR_BEFORE_2 (c1, d, string1, end1, string2, end2);
6024               s1 = SYNTAX (c1);
6025
6026               /* Case 2: S1 is not Sword.  */
6027               if (s1 != Sword)
6028                 goto fail;
6029
6030               /* Case 3: D is not at the end of string ... */
6031               if (!AT_STRINGS_END (d))
6032                 {
6033                   PREFETCH_NOLIMIT ();
6034                   GET_CHAR_AFTER (c2, d, dummy);
6035 #ifdef emacs
6036                   UPDATE_SYNTAX_TABLE_FORWARD (charpos);
6037 #endif
6038                   s2 = SYNTAX (c2);
6039
6040                   /* ... and S2 is Sword, and WORD_BOUNDARY_P (C1, C2)
6041                      returns 0.  */
6042                   if ((s2 == Sword) && !WORD_BOUNDARY_P (c1, c2))
6043           goto fail;
6044                 }
6045             }
6046           break;
6047
6048         case symbeg:
6049           DEBUG_PRINT ("EXECUTING symbeg.\n");
6050
6051           /* We FAIL in one of the following cases: */
6052
6053           /* Case 1: D is at the end of string.  */
6054           if (AT_STRINGS_END (d))
6055             goto fail;
6056           else
6057             {
6058               /* C1 is the character before D, S1 is the syntax of C1, C2
6059                  is the character at D, and S2 is the syntax of C2.  */
6060               re_wchar_t c1, c2;
6061               int s1, s2;
6062 #ifdef emacs
6063               ssize_t offset = PTR_TO_OFFSET (d);
6064               ssize_t charpos = SYNTAX_TABLE_BYTE_TO_CHAR (offset);
6065               UPDATE_SYNTAX_TABLE (charpos);
6066 #endif
6067               PREFETCH ();
6068               c2 = RE_STRING_CHAR (d, target_multibyte);
6069               s2 = SYNTAX (c2);
6070
6071               /* Case 2: S2 is neither Sword nor Ssymbol. */
6072               if (s2 != Sword && s2 != Ssymbol)
6073                 goto fail;
6074
6075               /* Case 3: D is not at the beginning of string ... */
6076               if (!AT_STRINGS_BEG (d))
6077                 {
6078                   GET_CHAR_BEFORE_2 (c1, d, string1, end1, string2, end2);
6079 #ifdef emacs
6080                   UPDATE_SYNTAX_TABLE_BACKWARD (charpos - 1);
6081 #endif
6082                   s1 = SYNTAX (c1);
6083
6084                   /* ... and S1 is Sword or Ssymbol.  */
6085                   if (s1 == Sword || s1 == Ssymbol)
6086                     goto fail;
6087                 }
6088             }
6089           break;
6090
6091         case symend:
6092           DEBUG_PRINT ("EXECUTING symend.\n");
6093
6094           /* We FAIL in one of the following cases: */
6095
6096           /* Case 1: D is at the beginning of string.  */
6097           if (AT_STRINGS_BEG (d))
6098             goto fail;
6099           else
6100             {
6101               /* C1 is the character before D, S1 is the syntax of C1, C2
6102                  is the character at D, and S2 is the syntax of C2.  */
6103               re_wchar_t c1, c2;
6104               int s1, s2;
6105 #ifdef emacs
6106               ssize_t offset = PTR_TO_OFFSET (d) - 1;
6107               ssize_t charpos = SYNTAX_TABLE_BYTE_TO_CHAR (offset);
6108               UPDATE_SYNTAX_TABLE (charpos);
6109 #endif
6110               GET_CHAR_BEFORE_2 (c1, d, string1, end1, string2, end2);
6111               s1 = SYNTAX (c1);
6112
6113               /* Case 2: S1 is neither Ssymbol nor Sword.  */
6114               if (s1 != Sword && s1 != Ssymbol)
6115                 goto fail;
6116
6117               /* Case 3: D is not at the end of string ... */
6118               if (!AT_STRINGS_END (d))
6119                 {
6120                   PREFETCH_NOLIMIT ();
6121                   c2 = RE_STRING_CHAR (d, target_multibyte);
6122 #ifdef emacs
6123                   UPDATE_SYNTAX_TABLE_FORWARD (charpos + 1);
6124 #endif
6125                   s2 = SYNTAX (c2);
6126
6127                   /* ... and S2 is Sword or Ssymbol.  */
6128                   if (s2 == Sword || s2 == Ssymbol)
6129                     goto fail;
6130                 }
6131             }
6132           break;
6133
6134         case syntaxspec:
6135         case notsyntaxspec:
6136           {
6137             boolean not = (re_opcode_t) *(p - 1) == notsyntaxspec;
6138             mcnt = *p++;
6139             DEBUG_PRINT ("EXECUTING %ssyntaxspec %d.\n", not ? "not" : "",
6140                          mcnt);
6141             PREFETCH ();
6142 #ifdef emacs
6143             {
6144               ssize_t offset = PTR_TO_OFFSET (d);
6145               ssize_t pos1 = SYNTAX_TABLE_BYTE_TO_CHAR (offset);
6146               UPDATE_SYNTAX_TABLE (pos1);
6147             }
6148 #endif
6149             {
6150               int len;
6151               re_wchar_t c;
6152
6153               GET_CHAR_AFTER (c, d, len);
6154               if ((SYNTAX (c) != (enum syntaxcode) mcnt) ^ not)
6155                 goto fail;
6156               d += len;
6157             }
6158           }
6159           break;
6160
6161 #ifdef emacs
6162         case before_dot:
6163           DEBUG_PRINT ("EXECUTING before_dot.\n");
6164           if (PTR_BYTE_POS (d) >= PT_BYTE)
6165             goto fail;
6166           break;
6167
6168         case at_dot:
6169           DEBUG_PRINT ("EXECUTING at_dot.\n");
6170           if (PTR_BYTE_POS (d) != PT_BYTE)
6171             goto fail;
6172           break;
6173
6174         case after_dot:
6175           DEBUG_PRINT ("EXECUTING after_dot.\n");
6176           if (PTR_BYTE_POS (d) <= PT_BYTE)
6177             goto fail;
6178           break;
6179
6180         case categoryspec:
6181         case notcategoryspec:
6182           {
6183             boolean not = (re_opcode_t) *(p - 1) == notcategoryspec;
6184             mcnt = *p++;
6185             DEBUG_PRINT ("EXECUTING %scategoryspec %d.\n",
6186                          not ? "not" : "", mcnt);
6187             PREFETCH ();
6188
6189             {
6190               int len;
6191               re_wchar_t c;
6192               GET_CHAR_AFTER (c, d, len);
6193               if ((!CHAR_HAS_CATEGORY (c, mcnt)) ^ not)
6194                 goto fail;
6195               d += len;
6196             }
6197           }
6198           break;
6199
6200 #endif /* emacs */
6201
6202         default:
6203           abort ();
6204         }
6205       continue;  /* Successfully executed one pattern command; keep going.  */
6206
6207
6208     /* We goto here if a matching operation fails. */
6209     fail:
6210       IMMEDIATE_QUIT_CHECK;
6211       if (!FAIL_STACK_EMPTY ())
6212         {
6213           re_char *str, *pat;
6214           /* A restart point is known.  Restore to that state.  */
6215           DEBUG_PRINT ("\nFAIL:\n");
6216           POP_FAILURE_POINT (str, pat);
6217           switch (*pat++)
6218             {
6219             case on_failure_keep_string_jump:
6220               assert (str == NULL);
6221               goto continue_failure_jump;
6222
6223             case on_failure_jump_nastyloop:
6224               assert ((re_opcode_t)pat[-2] == no_op);
6225               PUSH_FAILURE_POINT (pat - 2, str);
6226               /* Fallthrough */
6227
6228             case on_failure_jump_loop:
6229             case on_failure_jump:
6230             case succeed_n:
6231               d = str;
6232             continue_failure_jump:
6233               EXTRACT_NUMBER_AND_INCR (mcnt, pat);
6234               p = pat + mcnt;
6235               break;
6236
6237             case no_op:
6238               /* A special frame used for nastyloops. */
6239               goto fail;
6240
6241             default:
6242               abort ();
6243             }
6244
6245           assert (p >= bufp->buffer && p <= pend);
6246
6247           if (d >= string1 && d <= end1)
6248             dend = end_match_1;
6249         }
6250       else
6251         break;   /* Matching at this starting point really fails.  */
6252     } /* for (;;) */
6253
6254   if (best_regs_set)
6255     goto restore_best_regs;
6256
6257   FREE_VARIABLES ();
6258
6259   return -1;                            /* Failure to match.  */
6260 }
6261 \f
6262 /* Subroutine definitions for re_match_2.  */
6263
6264 /* Return zero if TRANSLATE[S1] and TRANSLATE[S2] are identical for LEN
6265    bytes; nonzero otherwise.  */
6266
6267 static int
6268 bcmp_translate (const re_char *s1, const re_char *s2, register ssize_t len,
6269                 RE_TRANSLATE_TYPE translate, const int target_multibyte)
6270 {
6271   register re_char *p1 = s1, *p2 = s2;
6272   re_char *p1_end = s1 + len;
6273   re_char *p2_end = s2 + len;
6274
6275   /* FIXME: Checking both p1 and p2 presumes that the two strings might have
6276      different lengths, but relying on a single `len' would break this. -sm  */
6277   while (p1 < p1_end && p2 < p2_end)
6278     {
6279       int p1_charlen, p2_charlen;
6280       re_wchar_t p1_ch, p2_ch;
6281
6282       GET_CHAR_AFTER (p1_ch, p1, p1_charlen);
6283       GET_CHAR_AFTER (p2_ch, p2, p2_charlen);
6284
6285       if (RE_TRANSLATE (translate, p1_ch)
6286           != RE_TRANSLATE (translate, p2_ch))
6287         return 1;
6288
6289       p1 += p1_charlen, p2 += p2_charlen;
6290     }
6291
6292   if (p1 != p1_end || p2 != p2_end)
6293     return 1;
6294
6295   return 0;
6296 }
6297 \f
6298 /* Entry points for GNU code.  */
6299
6300 /* re_compile_pattern is the GNU regular expression compiler: it
6301    compiles PATTERN (of length SIZE) and puts the result in BUFP.
6302    Returns 0 if the pattern was valid, otherwise an error string.
6303
6304    Assumes the `allocated' (and perhaps `buffer') and `translate' fields
6305    are set in BUFP on entry.
6306
6307    We call regex_compile to do the actual compilation.  */
6308
6309 const char *
6310 re_compile_pattern (const char *pattern, size_t length,
6311                     struct re_pattern_buffer *bufp)
6312 {
6313   reg_errcode_t ret;
6314
6315   /* GNU code is written to assume at least RE_NREGS registers will be set
6316      (and at least one extra will be -1).  */
6317   bufp->regs_allocated = REGS_UNALLOCATED;
6318
6319   /* And GNU code determines whether or not to get register information
6320      by passing null for the REGS argument to re_match, etc., not by
6321      setting no_sub.  */
6322   bufp->no_sub = 0;
6323
6324   ret = regex_compile ((re_char*) pattern, length, re_syntax_options, bufp);
6325
6326   if (!ret)
6327     return NULL;
6328   return gettext (re_error_msgid[(int) ret]);
6329 }
6330 WEAK_ALIAS (__re_compile_pattern, re_compile_pattern)
6331 \f
6332 /* Entry points compatible with 4.2 BSD regex library.  We don't define
6333    them unless specifically requested.  */
6334
6335 #if defined _REGEX_RE_COMP || defined _LIBC
6336
6337 /* BSD has one and only one pattern buffer.  */
6338 static struct re_pattern_buffer re_comp_buf;
6339
6340 char *
6341 # ifdef _LIBC
6342 /* Make these definitions weak in libc, so POSIX programs can redefine
6343    these names if they don't use our functions, and still use
6344    regcomp/regexec below without link errors.  */
6345 weak_function
6346 # endif
6347 re_comp (const char *s)
6348 {
6349   reg_errcode_t ret;
6350
6351   if (!s)
6352     {
6353       if (!re_comp_buf.buffer)
6354         /* Yes, we're discarding `const' here if !HAVE_LIBINTL.  */
6355         return (char *) gettext ("No previous regular expression");
6356       return 0;
6357     }
6358
6359   if (!re_comp_buf.buffer)
6360     {
6361       re_comp_buf.buffer = malloc (200);
6362       if (re_comp_buf.buffer == NULL)
6363         /* Yes, we're discarding `const' here if !HAVE_LIBINTL.  */
6364         return (char *) gettext (re_error_msgid[(int) REG_ESPACE]);
6365       re_comp_buf.allocated = 200;
6366
6367       re_comp_buf.fastmap = malloc (1 << BYTEWIDTH);
6368       if (re_comp_buf.fastmap == NULL)
6369         /* Yes, we're discarding `const' here if !HAVE_LIBINTL.  */
6370         return (char *) gettext (re_error_msgid[(int) REG_ESPACE]);
6371     }
6372
6373   /* Since `re_exec' always passes NULL for the `regs' argument, we
6374      don't need to initialize the pattern buffer fields which affect it.  */
6375
6376   ret = regex_compile (s, strlen (s), re_syntax_options, &re_comp_buf);
6377
6378   if (!ret)
6379     return NULL;
6380
6381   /* Yes, we're discarding `const' here if !HAVE_LIBINTL.  */
6382   return (char *) gettext (re_error_msgid[(int) ret]);
6383 }
6384
6385
6386 int
6387 # ifdef _LIBC
6388 weak_function
6389 # endif
6390 re_exec (const char *s)
6391 {
6392   const size_t len = strlen (s);
6393   return (re_search (&re_comp_buf, s, len, 0, len, (struct re_registers *) 0)
6394           >= 0);
6395 }
6396 #endif /* _REGEX_RE_COMP */
6397 \f
6398 /* POSIX.2 functions.  Don't define these for Emacs.  */
6399
6400 #ifndef emacs
6401
6402 /* regcomp takes a regular expression as a string and compiles it.
6403
6404    PREG is a regex_t *.  We do not expect any fields to be initialized,
6405    since POSIX says we shouldn't.  Thus, we set
6406
6407      `buffer' to the compiled pattern;
6408      `used' to the length of the compiled pattern;
6409      `syntax' to RE_SYNTAX_POSIX_EXTENDED if the
6410        REG_EXTENDED bit in CFLAGS is set; otherwise, to
6411        RE_SYNTAX_POSIX_BASIC;
6412      `fastmap' to an allocated space for the fastmap;
6413      `fastmap_accurate' to zero;
6414      `re_nsub' to the number of subexpressions in PATTERN.
6415
6416    PATTERN is the address of the pattern string.
6417
6418    CFLAGS is a series of bits which affect compilation.
6419
6420      If REG_EXTENDED is set, we use POSIX extended syntax; otherwise, we
6421      use POSIX basic syntax.
6422
6423      If REG_NEWLINE is set, then . and [^...] don't match newline.
6424      Also, regexec will try a match beginning after every newline.
6425
6426      If REG_ICASE is set, then we considers upper- and lowercase
6427      versions of letters to be equivalent when matching.
6428
6429      If REG_NOSUB is set, then when PREG is passed to regexec, that
6430      routine will report only success or failure, and nothing about the
6431      registers.
6432
6433    It returns 0 if it succeeds, nonzero if it doesn't.  (See regex.h for
6434    the return codes and their meanings.)  */
6435
6436 reg_errcode_t
6437 regcomp (regex_t *__restrict preg, const char *__restrict pattern,
6438          int cflags)
6439 {
6440   reg_errcode_t ret;
6441   reg_syntax_t syntax
6442     = (cflags & REG_EXTENDED) ?
6443       RE_SYNTAX_POSIX_EXTENDED : RE_SYNTAX_POSIX_BASIC;
6444
6445   /* regex_compile will allocate the space for the compiled pattern.  */
6446   preg->buffer = 0;
6447   preg->allocated = 0;
6448   preg->used = 0;
6449
6450   /* Try to allocate space for the fastmap.  */
6451   preg->fastmap = malloc (1 << BYTEWIDTH);
6452
6453   if (cflags & REG_ICASE)
6454     {
6455       unsigned i;
6456
6457       preg->translate = malloc (CHAR_SET_SIZE * sizeof *preg->translate);
6458       if (preg->translate == NULL)
6459         return (int) REG_ESPACE;
6460
6461       /* Map uppercase characters to corresponding lowercase ones.  */
6462       for (i = 0; i < CHAR_SET_SIZE; i++)
6463         preg->translate[i] = ISUPPER (i) ? TOLOWER (i) : i;
6464     }
6465   else
6466     preg->translate = NULL;
6467
6468   /* If REG_NEWLINE is set, newlines are treated differently.  */
6469   if (cflags & REG_NEWLINE)
6470     { /* REG_NEWLINE implies neither . nor [^...] match newline.  */
6471       syntax &= ~RE_DOT_NEWLINE;
6472       syntax |= RE_HAT_LISTS_NOT_NEWLINE;
6473     }
6474   else
6475     syntax |= RE_NO_NEWLINE_ANCHOR;
6476
6477   preg->no_sub = !!(cflags & REG_NOSUB);
6478
6479   /* POSIX says a null character in the pattern terminates it, so we
6480      can use strlen here in compiling the pattern.  */
6481   ret = regex_compile ((re_char*) pattern, strlen (pattern), syntax, preg);
6482
6483   /* POSIX doesn't distinguish between an unmatched open-group and an
6484      unmatched close-group: both are REG_EPAREN.  */
6485   if (ret == REG_ERPAREN)
6486     ret = REG_EPAREN;
6487
6488   if (ret == REG_NOERROR && preg->fastmap)
6489     { /* Compute the fastmap now, since regexec cannot modify the pattern
6490          buffer.  */
6491       re_compile_fastmap (preg);
6492       if (preg->can_be_null)
6493         { /* The fastmap can't be used anyway.  */
6494           free (preg->fastmap);
6495           preg->fastmap = NULL;
6496         }
6497     }
6498   return ret;
6499 }
6500 WEAK_ALIAS (__regcomp, regcomp)
6501
6502
6503 /* regexec searches for a given pattern, specified by PREG, in the
6504    string STRING.
6505
6506    If NMATCH is zero or REG_NOSUB was set in the cflags argument to
6507    `regcomp', we ignore PMATCH.  Otherwise, we assume PMATCH has at
6508    least NMATCH elements, and we set them to the offsets of the
6509    corresponding matched substrings.
6510
6511    EFLAGS specifies `execution flags' which affect matching: if
6512    REG_NOTBOL is set, then ^ does not match at the beginning of the
6513    string; if REG_NOTEOL is set, then $ does not match at the end.
6514
6515    We return 0 if we find a match and REG_NOMATCH if not.  */
6516
6517 reg_errcode_t
6518 regexec (const regex_t *__restrict preg, const char *__restrict string,
6519          size_t nmatch, regmatch_t pmatch[__restrict_arr], int eflags)
6520 {
6521   regoff_t ret;
6522   struct re_registers regs;
6523   regex_t private_preg;
6524   size_t len = strlen (string);
6525   boolean want_reg_info = !preg->no_sub && nmatch > 0 && pmatch;
6526
6527   private_preg = *preg;
6528
6529   private_preg.not_bol = !!(eflags & REG_NOTBOL);
6530   private_preg.not_eol = !!(eflags & REG_NOTEOL);
6531
6532   /* The user has told us exactly how many registers to return
6533      information about, via `nmatch'.  We have to pass that on to the
6534      matching routines.  */
6535   private_preg.regs_allocated = REGS_FIXED;
6536
6537   if (want_reg_info)
6538     {
6539       regs.num_regs = nmatch;
6540       regs.start = TALLOC (nmatch * 2, regoff_t);
6541       if (regs.start == NULL)
6542         return REG_NOMATCH;
6543       regs.end = regs.start + nmatch;
6544     }
6545
6546   /* Instead of using not_eol to implement REG_NOTEOL, we could simply
6547      pass (&private_preg, string, len + 1, 0, len, ...) pretending the string
6548      was a little bit longer but still only matching the real part.
6549      This works because the `endline' will check for a '\n' and will find a
6550      '\0', correctly deciding that this is not the end of a line.
6551      But it doesn't work out so nicely for REG_NOTBOL, since we don't have
6552      a convenient '\0' there.  For all we know, the string could be preceded
6553      by '\n' which would throw things off.  */
6554
6555   /* Perform the searching operation.  */
6556   ret = re_search (&private_preg, string, len,
6557                    /* start: */ 0, /* range: */ len,
6558                    want_reg_info ? &regs : (struct re_registers *) 0);
6559
6560   /* Copy the register information to the POSIX structure.  */
6561   if (want_reg_info)
6562     {
6563       if (ret >= 0)
6564         {
6565           unsigned r;
6566
6567           for (r = 0; r < nmatch; r++)
6568             {
6569               pmatch[r].rm_so = regs.start[r];
6570               pmatch[r].rm_eo = regs.end[r];
6571             }
6572         }
6573
6574       /* If we needed the temporary register info, free the space now.  */
6575       free (regs.start);
6576     }
6577
6578   /* We want zero return to mean success, unlike `re_search'.  */
6579   return ret >= 0 ? REG_NOERROR : REG_NOMATCH;
6580 }
6581 WEAK_ALIAS (__regexec, regexec)
6582
6583
6584 /* Returns a message corresponding to an error code, ERR_CODE, returned
6585    from either regcomp or regexec.   We don't use PREG here.
6586
6587    ERR_CODE was previously called ERRCODE, but that name causes an
6588    error with msvc8 compiler.  */
6589
6590 size_t
6591 regerror (int err_code, const regex_t *preg, char *errbuf, size_t errbuf_size)
6592 {
6593   const char *msg;
6594   size_t msg_size;
6595
6596   if (err_code < 0
6597       || err_code >= (sizeof (re_error_msgid) / sizeof (re_error_msgid[0])))
6598     /* Only error codes returned by the rest of the code should be passed
6599        to this routine.  If we are given anything else, or if other regex
6600        code generates an invalid error code, then the program has a bug.
6601        Dump core so we can fix it.  */
6602     abort ();
6603
6604   msg = gettext (re_error_msgid[err_code]);
6605
6606   msg_size = strlen (msg) + 1; /* Includes the null.  */
6607
6608   if (errbuf_size != 0)
6609     {
6610       if (msg_size > errbuf_size)
6611         {
6612           memcpy (errbuf, msg, errbuf_size - 1);
6613           errbuf[errbuf_size - 1] = 0;
6614         }
6615       else
6616         strcpy (errbuf, msg);
6617     }
6618
6619   return msg_size;
6620 }
6621 WEAK_ALIAS (__regerror, regerror)
6622
6623
6624 /* Free dynamically allocated space used by PREG.  */
6625
6626 void
6627 regfree (regex_t *preg)
6628 {
6629   free (preg->buffer);
6630   preg->buffer = NULL;
6631
6632   preg->allocated = 0;
6633   preg->used = 0;
6634
6635   free (preg->fastmap);
6636   preg->fastmap = NULL;
6637   preg->fastmap_accurate = 0;
6638
6639   free (preg->translate);
6640   preg->translate = NULL;
6641 }
6642 WEAK_ALIAS (__regfree, regfree)
6643
6644 #endif /* not emacs  */