Refill some long/short copyright headers.

[bpt/emacs.git] / src / regex.c
diff --git a/src/regex.c b/src/regex.c

index bb921a5..cb6edc2 100644 (file)
--- a/src/regex.c
+++ b/src/regex.c
@@ -2,9 +2,7 @@
     0.12.  (Implements POSIX draft P1003.2/D11.2, except for some of the
     internationalization features.)
  
-   Copyright (C) 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
-                 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010
-                 Free Software Foundation, Inc.
+   Copyright (C) 1993-2011  Free Software Foundation, Inc.
  
     This program is free software; you can redistribute it and/or modify
     it under the terms of the GNU General Public License as published by
@@ -196,18 +194,12 @@
     even if config.h says that we can.  */
  # undef REL_ALLOC
  
-# if defined STDC_HEADERS || defined _LIBC
-#  include <stdlib.h>
-# else
-char *malloc ();
-char *realloc ();
-# endif
+# include <unistd.h>
  
  /* When used in Emacs's lib-src, we need xmalloc and xrealloc. */
  
  void *
-xmalloc (size)
-     size_t size;
+xmalloc (size_t size)
  {
    register void *val;
    val = (void *) malloc (size);
@@ -220,9 +212,7 @@ xmalloc (size)
  }
  
  void *
-xrealloc (block, size)
-     void *block;
-     size_t size;
+xrealloc (void *block, size_t size)
  {
    register void *val;
    /* We must call malloc explicitly when BLOCK is 0, since some
@@ -248,37 +238,16 @@ xrealloc (block, size)
  # endif
  # define realloc xrealloc
  
-/* When used in Emacs's lib-src, we need to get bzero and bcopy somehow.
-   If nothing else has been done, use the method below.  */
-# ifdef INHIBIT_STRING_HEADER
-#  if !(defined HAVE_BZERO && defined HAVE_BCOPY)
-#   if !defined bzero && !defined bcopy
-#    undef INHIBIT_STRING_HEADER
-#   endif
+/* This is the normal way of making sure we have memcpy, memcmp and memset.  */
+# if defined HAVE_STRING_H || defined STDC_HEADERS || defined _LIBC
+#  include <string.h>
+# else
+#  include <strings.h>
+#  ifndef memcmp
+#   define memcmp(s1, s2, n)   bcmp (s1, s2, n)
  #  endif
-# endif
-
-/* This is the normal way of making sure we have memcpy, memcmp and bzero.
-   This is used in most programs--a few other programs avoid this
-   by defining INHIBIT_STRING_HEADER.  */
-# ifndef INHIBIT_STRING_HEADER
-#  if defined HAVE_STRING_H || defined STDC_HEADERS || defined _LIBC
-#   include <string.h>
-#   ifndef bzero
-#    ifndef _LIBC
-#     define bzero(s, n)       (memset (s, '\0', n), (s))
-#    else
-#     define bzero(s, n)       __bzero (s, n)
-#    endif
-#   endif
-#  else
-#   include <strings.h>
-#   ifndef memcmp
-#    define memcmp(s1, s2, n)  bcmp (s1, s2, n)
-#   endif
-#   ifndef memcpy
-#    define memcpy(d, s, n)    (bcopy (s, d, n), (d))
-#   endif
+#  ifndef memcpy
+#   define memcpy(d, s, n)     (bcopy (s, d, n), (d))
  #  endif
  # endif
  
@@ -290,7 +259,6 @@ enum syntaxcode { Swhitespace = 0, Sword = 1, Ssymbol = 2 };
  #  define SWITCH_ENUM_CAST(x) (x)
  
  /* Dummy macros for non-Emacs environments.  */
-# define BASE_LEADING_CODE_P(c) (0)
  # define CHAR_CHARSET(c) 0
  # define CHARSET_LEADING_CODE_BASE(c) 0
  # define MAX_MULTIBYTE_LENGTH 1
@@ -300,7 +268,7 @@ enum syntaxcode { Swhitespace = 0, Sword = 1, Ssymbol = 2 };
  # define CHAR_HEAD_P(p) (1)
  # define SINGLE_BYTE_CHAR_P(c) (1)
  # define SAME_CHARSET_P(c1, c2) (1)
-# define MULTIBYTE_FORM_LENGTH(p, s) (1)
+# define BYTES_BY_CHAR_HEAD(p) (1)
  # define PREV_CHAR_BOUNDARY(p, limit) ((p)--)
  # define STRING_CHAR(p) (*(p))
  # define RE_STRING_CHAR(p, multibyte) STRING_CHAR (p)
@@ -457,7 +425,7 @@ extern char *re_syntax_table;
  static char re_syntax_table[CHAR_SET_SIZE];
  
  static void
-init_syntax_once ()
+init_syntax_once (void)
  {
     register int c;
     static int done = 0;
@@ -465,7 +433,7 @@ init_syntax_once ()
     if (done)
       return;
  
-   bzero (re_syntax_table, sizeof re_syntax_table);
+   memset (re_syntax_table, 0, sizeof re_syntax_table);
  
     for (c = 0; c < CHAR_SET_SIZE; ++c)
       if (ISALNUM (c))
@@ -1322,8 +1290,7 @@ reg_syntax_t re_syntax_options;
     defined in regex.h.  We return the old syntax.  */
  
  reg_syntax_t
-re_set_syntax (syntax)
-     reg_syntax_t syntax;
+re_set_syntax (reg_syntax_t syntax)
  {
    reg_syntax_t ret = re_syntax_options;
  
@@ -1336,8 +1303,7 @@ WEAK_ALIAS (__re_set_syntax, re_set_syntax)
  static re_char *whitespace_regexp;
  
  void
-re_set_whitespace_regexp (regexp)
-     const char *regexp;
+re_set_whitespace_regexp (const char *regexp)
  {
    whitespace_regexp = (re_char *) regexp;
  }
@@ -2086,7 +2052,7 @@ struct range_table_work_area
    } while (0)
  
  
-/* Both FROM and TO are mulitbyte characters.  */
+/* Both FROM and TO are multibyte characters.  */
  
  #define SETUP_MULTIBYTE_RANGE(work_area, FROM, TO)                        \
    do {                                                                    \
@@ -2150,8 +2116,7 @@ struct range_table_work_area
  
  /* Map a string to the char class it names (if any).  */
  re_wctype_t
-re_wctype (str)
-     re_char *str;
+re_wctype (const re_char *str)
  {
    const char *string = str;
    if      (STREQ (string, "alnum"))    return RECC_ALNUM;
@@ -2176,9 +2141,7 @@ re_wctype (str)
  
  /* True if CH is in the char class CC.  */
  boolean
-re_iswctype (ch, cc)
-     int ch;
-     re_wctype_t cc;
+re_iswctype (int ch, re_wctype_t cc)
  {
    switch (cc)
      {
@@ -2208,8 +2171,7 @@ re_iswctype (ch, cc)
  /* Return a bit-pattern to use in the range-table bits to match multibyte
     chars of class CC.  */
  static int
-re_wctype_to_bit (cc)
-     re_wctype_t cc;
+re_wctype_to_bit (re_wctype_t cc)
  {
    switch (cc)
      {
@@ -2233,8 +2195,7 @@ re_wctype_to_bit (cc)
  /* Actually extend the space in WORK_AREA.  */
  
  static void
-extend_range_table_work_area (work_area)
-     struct range_table_work_area *work_area;
+extend_range_table_work_area (struct range_table_work_area *work_area)
  {
    work_area->allocated += 16 * sizeof (int);
    if (work_area->table)
@@ -2558,11 +2519,7 @@ do {                                                                     \
    } while (0)
  
  static reg_errcode_t
-regex_compile (pattern, size, syntax, bufp)
-     re_char *pattern;
-     size_t size;
-     reg_syntax_t syntax;
-     struct re_pattern_buffer *bufp;
+regex_compile (const re_char *pattern, size_t size, reg_syntax_t syntax, struct re_pattern_buffer *bufp)
  {
    /* We fetch characters from PATTERN here.  */
    register re_wchar_t c, c1;
@@ -2960,7 +2917,7 @@ regex_compile (pattern, size, syntax, bufp)
             BUF_PUSH ((1 << BYTEWIDTH) / BYTEWIDTH);
  
             /* Clear the whole map.  */
-           bzero (b, (1 << BYTEWIDTH) / BYTEWIDTH);
+           memset (b, 0, (1 << BYTEWIDTH) / BYTEWIDTH);
  
             /* charset_not matches newline according to a syntax bit.  */
             if ((re_opcode_t) b[-2] == charset_not
@@ -3065,6 +3022,13 @@ regex_compile (pattern, size, syntax, bufp)
                            don't need to handle them for multibyte.
                            They are distinguished by a negative wctype.  */
  
+                       /* Setup the gl_state object to its buffer-defined
+                          value.  This hardcodes the buffer-global
+                          syntax-table for ASCII chars, while the other chars
+                          will obey syntax-table properties.  It's not ideal,
+                          but it's the way it's been done until now.  */
+                       SETUP_BUFFER_SYNTAX_TABLE ();
+
                         for (ch = 0; ch < 256; ++ch)
                           {
                             c = RE_CHAR_TO_MULTIBYTE (ch);
@@ -3798,7 +3762,7 @@ regex_compile (pattern, size, syntax, bufp)
  
                     if (c1 != c2 && (c1 = RE_CHAR_TO_UNIBYTE (c2)) >= 0)
                       c = c1;
-                 }                   
+                 }
                 *b++ = c;
                 len = 1;
               }
@@ -3869,10 +3833,7 @@ regex_compile (pattern, size, syntax, bufp)
  /* Store OP at LOC followed by two-byte integer parameter ARG.  */
  
  static void
-store_op1 (op, loc, arg)
-    re_opcode_t op;
-    unsigned char *loc;
-    int arg;
+store_op1 (re_opcode_t op, unsigned char *loc, int arg)
  {
    *loc = (unsigned char) op;
    STORE_NUMBER (loc + 1, arg);
@@ -3882,10 +3843,7 @@ store_op1 (op, loc, arg)
  /* Like `store_op1', but for two two-byte parameters ARG1 and ARG2.  */
  
  static void
-store_op2 (op, loc, arg1, arg2)
-    re_opcode_t op;
-    unsigned char *loc;
-    int arg1, arg2;
+store_op2 (re_opcode_t op, unsigned char *loc, int arg1, int arg2)
  {
    *loc = (unsigned char) op;
    STORE_NUMBER (loc + 1, arg1);
@@ -3897,11 +3855,7 @@ store_op2 (op, loc, arg1, arg2)
     for OP followed by two-byte integer parameter ARG.  */
  
  static void
-insert_op1 (op, loc, arg, end)
-    re_opcode_t op;
-    unsigned char *loc;
-    int arg;
-    unsigned char *end;
+insert_op1 (re_opcode_t op, unsigned char *loc, int arg, unsigned char *end)
  {
    register unsigned char *pfrom = end;
    register unsigned char *pto = end + 3;
@@ -3916,11 +3870,7 @@ insert_op1 (op, loc, arg, end)
  /* Like `insert_op1', but for two two-byte parameters ARG1 and ARG2.  */
  
  static void
-insert_op2 (op, loc, arg1, arg2, end)
-    re_opcode_t op;
-    unsigned char *loc;
-    int arg1, arg2;
-    unsigned char *end;
+insert_op2 (re_opcode_t op, unsigned char *loc, int arg1, int arg2, unsigned char *end)
  {
    register unsigned char *pfrom = end;
    register unsigned char *pto = end + 5;
@@ -3937,9 +3887,7 @@ insert_op2 (op, loc, arg1, arg2, end)
     least one character before the ^.  */
  
  static boolean
-at_begline_loc_p (pattern, p, syntax)
-    re_char *pattern, *p;
-    reg_syntax_t syntax;
+at_begline_loc_p (const re_char *pattern, const re_char *p, reg_syntax_t syntax)
  {
    re_char *prev = p - 2;
    boolean prev_prev_backslash = prev > pattern && prev[-1] == '\\';
@@ -3961,9 +3909,7 @@ at_begline_loc_p (pattern, p, syntax)
     at least one character after the $, i.e., `P < PEND'.  */
  
  static boolean
-at_endline_loc_p (p, pend, syntax)
-    re_char *p, *pend;
-    reg_syntax_t syntax;
+at_endline_loc_p (const re_char *p, const re_char *pend, reg_syntax_t syntax)
  {
    re_char *next = p;
    boolean next_backslash = *next == '\\';
@@ -3983,9 +3929,7 @@ at_endline_loc_p (p, pend, syntax)
     false if it's not.  */
  
  static boolean
-group_in_compile_stack (compile_stack, regnum)
-    compile_stack_type compile_stack;
-    regnum_t regnum;
+group_in_compile_stack (compile_stack_type compile_stack, regnum_t regnum)
  {
    int this_element;
  
@@ -4009,10 +3953,7 @@ group_in_compile_stack (compile_stack, regnum)
     Return -1 if fastmap was not updated accurately.  */
  
  static int
-analyse_first (p, pend, fastmap, multibyte)
-     re_char *p, *pend;
-     char *fastmap;
-     const int multibyte;
+analyse_first (const re_char *p, const re_char *pend, char *fastmap, const int multibyte)
  {
    int j, k;
    boolean not;
@@ -4054,7 +3995,6 @@ analyse_first (p, pend, fastmap, multibyte)
         {
         case succeed:
           return 1;
-         continue;
  
         case duplicate:
           /* If the first character has to match a backreference, that means
@@ -4118,7 +4058,7 @@ analyse_first (p, pend, fastmap, multibyte)
           if (/* Any leading code can possibly start a character
                  which doesn't match the specified set of characters.  */
               not
-             || 
+             ||
               /* If we can match a character class, we can match any
                  multibyte characters.  */
               (CHARSET_RANGE_TABLE_EXISTS_P (&p[-2])
@@ -4138,7 +4078,7 @@ analyse_first (p, pend, fastmap, multibyte)
                    && match_any_multibyte_characters == false)
             {
               /* Set fastmap[I] to 1 where I is a leading code of each
-                multibyte characer in the range table. */
+                multibyte character in the range table. */
               int c, count;
               unsigned char lc1, lc2;
  
@@ -4324,15 +4264,14 @@ analyse_first (p, pend, fastmap, multibyte)
     Returns 0 if we succeed, -2 if an internal error.   */
  
  int
-re_compile_fastmap (bufp)
-     struct re_pattern_buffer *bufp;
+re_compile_fastmap (struct re_pattern_buffer *bufp)
  {
    char *fastmap = bufp->fastmap;
    int analysis;
  
    assert (fastmap && bufp->buffer);
  
-  bzero (fastmap, 1 << BYTEWIDTH);  /* Assume nothing's valid.  */
+  memset (fastmap, 0, 1 << BYTEWIDTH);  /* Assume nothing's valid.  */
    bufp->fastmap_accurate = 1;      /* It will be when we're done.  */
  
    analysis = analyse_first (bufp->buffer, bufp->buffer + bufp->used,
@@ -4355,11 +4294,7 @@ re_compile_fastmap (bufp)
     freeing the old data.  */
  
  void
-re_set_registers (bufp, regs, num_regs, starts, ends)
-    struct re_pattern_buffer *bufp;
-    struct re_registers *regs;
-    unsigned num_regs;
-    regoff_t *starts, *ends;
+re_set_registers (struct re_pattern_buffer *bufp, struct re_registers *regs, unsigned int num_regs, regoff_t *starts, regoff_t *ends)
  {
    if (num_regs)
      {
@@ -4383,11 +4318,7 @@ WEAK_ALIAS (__re_set_registers, re_set_registers)
     doesn't let you say where to stop matching. */
  
  int
-re_search (bufp, string, size, startpos, range, regs)
-     struct re_pattern_buffer *bufp;
-     const char *string;
-     int size, startpos, range;
-     struct re_registers *regs;
+re_search (struct re_pattern_buffer *bufp, const char *string, int size, int startpos, int range, struct re_registers *regs)
  {
    return re_search_2 (bufp, NULL, 0, string, size, startpos, range,
                       regs, size);
@@ -4428,14 +4359,7 @@ WEAK_ALIAS (__re_search, re_search)
     stack overflow).  */
  
  int
-re_search_2 (bufp, str1, size1, str2, size2, startpos, range, regs, stop)
-     struct re_pattern_buffer *bufp;
-     const char *str1, *str2;
-     int size1, size2;
-     int startpos;
-     int range;
-     struct re_registers *regs;
-     int stop;
+re_search_2 (struct re_pattern_buffer *bufp, const char *str1, int size1, const char *str2, int size2, int startpos, int range, struct re_registers *regs, int stop)
  {
    int val;
    re_char *string1 = (re_char*) str1;
@@ -4489,7 +4413,7 @@ re_search_2 (bufp, str1, size1, str2, size2, startpos, range, regs, stop)
    anchored_start = (bufp->buffer[0] == begline);
  
  #ifdef emacs
-  gl_state.object = re_match_object;
+  gl_state.object = re_match_object; /* Used by SYNTAX_TABLE_BYTE_TO_CHAR. */
    {
      int charpos = SYNTAX_TABLE_BYTE_TO_CHAR (POS_AS_IN_BUFFER (startpos));
  
@@ -4636,7 +4560,7 @@ re_search_2 (bufp, str1, size1, str2, size2, startpos, range, regs, stop)
             {
               re_char *p = POS_ADDR_VSTRING (startpos);
               re_char *pend = STOP_ADDR_VSTRING (startpos);
-             int len = MULTIBYTE_FORM_LENGTH (p, pend - p);
+             int len = BYTES_BY_CHAR_HEAD (*p);
  
               range -= len;
               if (range < 0)
@@ -4769,8 +4693,7 @@ static int bcmp_translate _RE_ARGS((re_char *s1, re_char *s2,
  /* If the operation is a match against one or more chars,
     return a pointer to the next operation, else return NULL.  */
  static re_char *
-skip_one_char (p)
-     re_char *p;
+skip_one_char (const re_char *p)
  {
    switch (SWITCH_ENUM_CAST (*p++))
      {
@@ -4812,8 +4735,7 @@ skip_one_char (p)
  
  /* Jump over non-matching operations.  */
  static re_char *
-skip_noops (p, pend)
-     re_char *p, *pend;
+skip_noops (const re_char *p, const re_char *pend)
  {
    int mcnt;
    while (p < pend)
@@ -4840,9 +4762,7 @@ skip_noops (p, pend)
  
  /* Non-zero if "p1 matches something" implies "p2 fails".  */
  static int
-mutually_exclusive_p (bufp, p1, p2)
-     struct re_pattern_buffer *bufp;
-     re_char *p1, *p2;
+mutually_exclusive_p (struct re_pattern_buffer *bufp, const re_char *p1, const re_char *p2)
  {
    re_opcode_t op2;
    const boolean multibyte = RE_MULTIBYTE_P (bufp);
@@ -5047,11 +4967,8 @@ mutually_exclusive_p (bufp, p1, p2)
  /* re_match is like re_match_2 except it takes only a single string.  */
  
  int
-re_match (bufp, string, size, pos, regs)
-     struct re_pattern_buffer *bufp;
-     const char *string;
-     int size, pos;
-     struct re_registers *regs;
+re_match (struct re_pattern_buffer *bufp, const char *string,
+         int size, int pos, struct re_registers *regs)
  {
    int result = re_match_2_internal (bufp, NULL, 0, (re_char*) string, size,
                                     pos, regs, size);
@@ -5080,19 +4997,13 @@ Lisp_Object re_match_object;
     matched substring.  */
  
  int
-re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop)
-     struct re_pattern_buffer *bufp;
-     const char *string1, *string2;
-     int size1, size2;
-     int pos;
-     struct re_registers *regs;
-     int stop;
+re_match_2 (struct re_pattern_buffer *bufp, const char *string1, int size1, const char *string2, int size2, int pos, struct re_registers *regs, int stop)
  {
    int result;
  
  #ifdef emacs
    int charpos;
-  gl_state.object = re_match_object;
+  gl_state.object = re_match_object; /* Used by SYNTAX_TABLE_BYTE_TO_CHAR. */
    charpos = SYNTAX_TABLE_BYTE_TO_CHAR (POS_AS_IN_BUFFER (pos));
    SETUP_SYNTAX_TABLE_FOR_OBJECT (re_match_object, charpos, 1);
  #endif
@@ -5108,13 +5019,7 @@ WEAK_ALIAS (__re_match_2, re_match_2)
  /* This is a separate function so that we can force an alloca cleanup
     afterwards.  */
  static int
-re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
-     struct re_pattern_buffer *bufp;
-     re_char *string1, *string2;
-     int size1, size2;
-     int pos;
-     struct re_registers *regs;
-     int stop;
+re_match_2_internal (struct re_pattern_buffer *bufp, const re_char *string1, int size1, const re_char *string2, int size2, int pos, struct re_registers *regs, int stop)
  {
    /* General temporaries.  */
    int mcnt;
@@ -6445,11 +6350,8 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
     bytes; nonzero otherwise.  */
  
  static int
-bcmp_translate (s1, s2, len, translate, target_multibyte)
-     re_char *s1, *s2;
-     register int len;
-     RE_TRANSLATE_TYPE translate;
-     const int target_multibyte;
+bcmp_translate (const re_char *s1, const re_char *s2, register int len,
+               RE_TRANSLATE_TYPE translate, const int target_multibyte)
  {
    register re_char *p1 = s1, *p2 = s2;
    re_char *p1_end = s1 + len;
@@ -6490,17 +6392,10 @@ bcmp_translate (s1, s2, len, translate, target_multibyte)
     We call regex_compile to do the actual compilation.  */
  
  const char *
-re_compile_pattern (pattern, length, bufp)
-     const char *pattern;
-     size_t length;
-     struct re_pattern_buffer *bufp;
+re_compile_pattern (const char *pattern, size_t length, struct re_pattern_buffer *bufp)
  {
    reg_errcode_t ret;
  
-#ifdef emacs
-  gl_state.current_syntax_table = current_buffer->syntax_table;
-#endif
-
    /* GNU code is written to assume at least RE_NREGS registers will be set
       (and at least one extra will be -1).  */
    bufp->regs_allocated = REGS_UNALLOCATED;
@@ -6625,10 +6520,8 @@ re_exec (s)
     the return codes and their meanings.)  */
  
  int
-regcomp (preg, pattern, cflags)
-    regex_t *__restrict preg;
-    const char *__restrict pattern;
-    int cflags;
+regcomp (regex_t *__restrict preg, const char *__restrict pattern,
+        int cflags)
  {
    reg_errcode_t ret;
    reg_syntax_t syntax
@@ -6710,12 +6603,8 @@ WEAK_ALIAS (__regcomp, regcomp)
     We return 0 if we find a match and REG_NOMATCH if not.  */
  
  int
-regexec (preg, string, nmatch, pmatch, eflags)
-    const regex_t *__restrict preg;
-    const char *__restrict string;
-    size_t nmatch;
-    regmatch_t pmatch[__restrict_arr];
-    int eflags;
+regexec (const regex_t *__restrict preg, const char *__restrict string,
+        size_t nmatch, regmatch_t pmatch[__restrict_arr], int eflags)
  {
    int ret;
    struct re_registers regs;
@@ -6787,11 +6676,7 @@ WEAK_ALIAS (__regexec, regexec)
     error with msvc8 compiler.  */
  
  size_t
-regerror (err_code, preg, errbuf, errbuf_size)
-    int err_code;
-    const regex_t *preg;
-    char *errbuf;
-    size_t errbuf_size;
+regerror (int err_code, const regex_t *preg, char *errbuf, size_t errbuf_size)
  {
    const char *msg;
    size_t msg_size;
@@ -6827,8 +6712,7 @@ WEAK_ALIAS (__regerror, regerror)
  /* Free dynamically allocated space used by PREG.  */
  
  void
-regfree (preg)
-    regex_t *preg;
+regfree (regex_t *preg)
  {
    free (preg->buffer);
    preg->buffer = NULL;
@@ -6846,6 +6730,3 @@ regfree (preg)
  WEAK_ALIAS (__regfree, regfree)
  
  #endif /* not emacs  */
-
-/* arch-tag: 4ffd68ba-2a9e-435b-a21a-018990f9eeb2
-   (do not change this comment) */