Convert (most) functions in src to standard C.

[bpt/emacs.git] / src / character.c
diff --git a/src/character.c b/src/character.c

index 27ea100..a6c38df 100644 (file)
--- a/src/character.c
+++ b/src/character.c
@@ -1,9 +1,9 @@
  /* Basic character support.
     Copyright (C) 1995, 1997, 1998, 2001 Electrotechnical Laboratory, JAPAN.
       Licensed to the Free Software Foundation.
-   Copyright (C) 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008
+   Copyright (C) 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010
       Free Software Foundation, Inc.
-   Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008
+   Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010
       National Institute of Advanced Industrial Science and Technology (AIST)
       Registration Number H13PRO009
  
@@ -34,6 +34,7 @@ along with GNU Emacs.  If not, see <http://www.gnu.org/licenses/>.  */
  #ifdef emacs
  
  #include <sys/types.h>
+#include <setjmp.h>
  #include "lisp.h"
  #include "character.h"
  #include "buffer.h"
@@ -86,22 +87,13 @@ Lisp_Object Vscript_representative_chars;
  static Lisp_Object Qchar_script_table;
  
  Lisp_Object Vunicode_category_table;
-
-/* Mapping table from unibyte chars to multibyte chars.  */
-int unibyte_to_multibyte_table[256];
-
-/* Nth element is 1 iff unibyte char N can be mapped to a multibyte
-   char.  */
-char unibyte_has_multibyte_table[256];
-
  \f
  
  /* If character code C has modifier masks, reflect them to the
     character code if possible.  Return the resulting code.  */
  
  int
-char_resolve_modifier_mask (c)
-     int c;
+char_resolve_modifier_mask (int c)
  {
    /* A non-ASCII character can't reflect modifier bits to the code.  */
    if (! ASCII_CHAR_P ((c & ~CHAR_MODIFIER_MASK)))
@@ -134,11 +126,13 @@ char_resolve_modifier_mask (c)
        else if ((c & 0177) >= 0100 && (c & 0177) <= 0137)
         c &= (037 | (~0177 & ~CHAR_CTL));
      }
+#if 0  /* This is outside the scope of this function.  (bug#4751)  */
    if (c & CHAR_META)
      {
        /* Move the meta bit to the right place for a string.  */
        c = (c & ~CHAR_META) | 0x80;
      }
+#endif
  
    return c;
  }
@@ -148,9 +142,7 @@ char_resolve_modifier_mask (c)
     handle them appropriately.  */
  
  int
-char_string (c, p)
-     unsigned c;
-     unsigned char *p;
+char_string (unsigned int c, unsigned char *p)
  {
    int bytes;
  
@@ -204,10 +196,7 @@ char_string (c, p)
     character) of the multibyte form.  */
  
  int
-string_char (p, advanced, len)
-     const unsigned char *p;
-     const unsigned char **advanced;
-     int *len;
+string_char (const unsigned char *p, const unsigned char **advanced, int *len)
  {
    int c;
    const unsigned char *saved_p = p;
@@ -250,9 +239,7 @@ string_char (p, advanced, len)
     case, translace C by all tables.  */
  
  int
-translate_char (table, c)
-     Lisp_Object table;
-     int c;
+translate_char (Lisp_Object table, int c)
  {
    if (CHAR_TABLE_P (table))
      {
@@ -270,43 +257,33 @@ translate_char (table, c)
    return c;
  }
  
-/* Convert the multibyte character C to unibyte 8-bit character based
-   on the current value of charset_unibyte.  If dimension of
-   charset_unibyte is more than one, return (C & 0xFF).
+/* Convert ASCII or 8-bit character C to unibyte.  If C is none of
+   them, return (C & 0xFF).
  
     The argument REV_TBL is now ignored.  It will be removed in the
     future.  */
  
  int
-multibyte_char_to_unibyte (c, rev_tbl)
-     int c;
-     Lisp_Object rev_tbl;
+multibyte_char_to_unibyte (int c, Lisp_Object rev_tbl)
  {
-  struct charset *charset;
-  unsigned c1;
-
+  if (c < 0x80)
+    return c;
    if (CHAR_BYTE8_P (c))
      return CHAR_TO_BYTE8 (c);
-  charset = CHARSET_FROM_ID (charset_unibyte);
-  c1 = ENCODE_CHAR (charset, c);
-  return ((c1 != CHARSET_INVALID_CODE (charset)) ? c1 : c & 0xFF);
+  return (c & 0xFF);
  }
  
  /* Like multibyte_char_to_unibyte, but return -1 if C is not supported
     by charset_unibyte.  */
  
  int
-multibyte_char_to_unibyte_safe (c)
-     int c;
+multibyte_char_to_unibyte_safe (int c)
  {
-  struct charset *charset;
-  unsigned c1;
-
+  if (c < 0x80)
+    return c;
    if (CHAR_BYTE8_P (c))
      return CHAR_TO_BYTE8 (c);
-  charset = CHARSET_FROM_ID (charset_unibyte);
-  c1 = ENCODE_CHAR (charset, c);
-  return ((c1 != CHARSET_INVALID_CODE (charset)) ? c1 : -1);
+  return -1;
  }
  
  DEFUN ("characterp", Fcharacterp, Scharacterp, 1, 2, 0,
@@ -331,16 +308,12 @@ DEFUN ("unibyte-char-to-multibyte", Funibyte_char_to_multibyte,
       Lisp_Object ch;
  {
    int c;
-  struct charset *charset;
  
    CHECK_CHARACTER (ch);
    c = XFASTINT (ch);
-  if (c >= 0400)
-    error ("Invalid unibyte character: %d", c);
-  charset = CHARSET_FROM_ID (charset_unibyte);
-  c = DECODE_CHAR (charset, c);
-  if (c < 0)
-    c = BYTE8_TO_CHAR (XFASTINT (ch));
+  if (c >= 0x100)
+    error ("Not a unibyte character: %d", c);
+  MAKE_CHAR_MULTIBYTE (c);
    return make_number (c);
  }
  
@@ -411,9 +384,7 @@ usage: (char-width CHAR)  */)
     respectively.  */
  
  int
-c_string_width (str, len, precision, nchars, nbytes)
-     const unsigned char *str;
-     int precision, *nchars, *nbytes;
+c_string_width (const unsigned char *str, int len, int precision, int *nchars, int *nbytes)
  {
    int i = 0, i_byte = 0;
    int width = 0;
@@ -423,7 +394,7 @@ c_string_width (str, len, precision, nchars, nbytes)
      {
        int bytes, thiswidth;
        Lisp_Object val;
-      int c = STRING_CHAR_AND_LENGTH (str + i_byte, len - i_byte, bytes);
+      int c = STRING_CHAR_AND_LENGTH (str + i_byte, bytes);
  
        if (dp)
         {
@@ -464,9 +435,7 @@ c_string_width (str, len, precision, nchars, nbytes)
     occupies on the screen.  */
  
  int
-strwidth (str, len)
-     unsigned char *str;
-     int len;
+strwidth (unsigned char *str, int len)
  {
    return c_string_width (str, len, -1, NULL, NULL);
  }
@@ -479,9 +448,7 @@ strwidth (str, len)
     in *NCHARS and *NBYTES respectively.  */
  
  int
-lisp_string_width (string, precision, nchars, nbytes)
-     Lisp_Object string;
-     int precision, *nchars, *nbytes;
+lisp_string_width (Lisp_Object string, int precision, int *nchars, int *nbytes)
  {
    int len = SCHARS (string);
    /* This set multibyte to 0 even if STRING is multibyte when it
@@ -513,7 +480,7 @@ lisp_string_width (string, precision, nchars, nbytes)
           int c;
  
           if (multibyte)
-           c = STRING_CHAR_AND_LENGTH (str + i_byte, len - i_byte, bytes);
+           c = STRING_CHAR_AND_LENGTH (str + i_byte, bytes);
           else
             c = str[i_byte], bytes = 1;
           chars = 1;
@@ -591,9 +558,7 @@ usage: (char-direction CHAR)  */)
     nil, we treat each byte as a character.  */
  
  EMACS_INT
-chars_in_text (ptr, nbytes)
-     const unsigned char *ptr;
-     EMACS_INT nbytes;
+chars_in_text (const unsigned char *ptr, EMACS_INT nbytes)
  {
    /* current_buffer is null at early stages of Emacs initialization.  */
    if (current_buffer == 0
@@ -609,9 +574,7 @@ chars_in_text (ptr, nbytes)
     ignores enable-multibyte-characters.  */
  
  EMACS_INT
-multibyte_chars_in_text (ptr, nbytes)
-     const unsigned char *ptr;
-     EMACS_INT nbytes;
+multibyte_chars_in_text (const unsigned char *ptr, EMACS_INT nbytes)
  {
    const unsigned char *endp = ptr + nbytes;
    int chars = 0;
@@ -636,9 +599,7 @@ multibyte_chars_in_text (ptr, nbytes)
     represented by 2-byte in a multibyte text.  */
  
  void
-parse_str_as_multibyte (str, len, nchars, nbytes)
-     const unsigned char *str;
-     int len, *nchars, *nbytes;
+parse_str_as_multibyte (const unsigned char *str, int len, int *nchars, int *nbytes)
  {
    const unsigned char *endp = str + len;
    int n, chars = 0, bytes = 0;
@@ -648,7 +609,8 @@ parse_str_as_multibyte (str, len, nchars, nbytes)
        const unsigned char *adjusted_endp = endp - MAX_MULTIBYTE_LENGTH;
        while (str < adjusted_endp)
         {
-         if ((n = MULTIBYTE_LENGTH_NO_CHECK (str)) > 0)
+         if (! CHAR_BYTE8_HEAD_P (*str)
+             && (n = MULTIBYTE_LENGTH_NO_CHECK (str)) > 0)
             str += n, bytes += n;
           else
             str++, bytes += 2;
@@ -657,7 +619,8 @@ parse_str_as_multibyte (str, len, nchars, nbytes)
      }
    while (str < endp)
      {
-      if ((n = MULTIBYTE_LENGTH (str, endp)) > 0)
+      if (! CHAR_BYTE8_HEAD_P (*str)
+         && (n = MULTIBYTE_LENGTH (str, endp)) > 0)
         str += n, bytes += n;
        else
         str++, bytes += 2;
@@ -678,9 +641,7 @@ parse_str_as_multibyte (str, len, nchars, nbytes)
     resulting text.  */
  
  int
-str_as_multibyte (str, len, nbytes, nchars)
-     unsigned char *str;
-     int len, nbytes, *nchars;
+str_as_multibyte (unsigned char *str, int len, int nbytes, int *nchars)
  {
    unsigned char *p = str, *endp = str + nbytes;
    unsigned char *to;
@@ -691,10 +652,13 @@ str_as_multibyte (str, len, nbytes, nchars)
      {
        unsigned char *adjusted_endp = endp - MAX_MULTIBYTE_LENGTH;
        while (p < adjusted_endp
+            && ! CHAR_BYTE8_HEAD_P (*p)
              && (n = MULTIBYTE_LENGTH_NO_CHECK (p)) > 0)
         p += n, chars++;
      }
-  while ((n = MULTIBYTE_LENGTH (p, endp)) > 0)
+  while (p < endp
+        && ! CHAR_BYTE8_HEAD_P (*p)
+        && (n = MULTIBYTE_LENGTH (p, endp)) > 0)
      p += n, chars++;
    if (nchars)
      *nchars = chars;
@@ -712,7 +676,8 @@ str_as_multibyte (str, len, nbytes, nchars)
        unsigned char *adjusted_endp = endp - MAX_MULTIBYTE_LENGTH;
        while (p < adjusted_endp)
         {
-         if ((n = MULTIBYTE_LENGTH_NO_CHECK (p)) > 0)
+         if (! CHAR_BYTE8_HEAD_P (*p)
+             && (n = MULTIBYTE_LENGTH_NO_CHECK (p)) > 0)
             {
               while (n--)
                 *to++ = *p++;
@@ -728,7 +693,8 @@ str_as_multibyte (str, len, nbytes, nchars)
      }
    while (p < endp)
      {
-      if ((n = MULTIBYTE_LENGTH (p, endp)) > 0)
+      if (! CHAR_BYTE8_HEAD_P (*p)
+         && (n = MULTIBYTE_LENGTH (p, endp)) > 0)
         {
           while (n--)
             *to++ = *p++;
@@ -751,9 +717,7 @@ str_as_multibyte (str, len, nbytes, nchars)
     `str_to_multibyte'.  */
  
  int
-parse_str_to_multibyte (str, len)
-     unsigned char *str;
-     int len;
+parse_str_to_multibyte (unsigned char *str, int len)
  {
    unsigned char *endp = str + len;
    int bytes;
@@ -771,9 +735,7 @@ parse_str_to_multibyte (str, len)
     enough.  */
  
  int
-str_to_multibyte (str, len, bytes)
-     unsigned char *str;
-     int len, bytes;
+str_to_multibyte (unsigned char *str, int len, int bytes)
  {
    unsigned char *p = str, *endp = str + bytes;
    unsigned char *to;
@@ -802,9 +764,7 @@ str_to_multibyte (str, len, bytes)
     unibyte.  */
  
  int
-str_as_unibyte (str, bytes)
-     unsigned char *str;
-     int bytes;
+str_as_unibyte (unsigned char *str, int bytes)
  {
    const unsigned char *p = str, *endp = str + bytes;
    unsigned char *to;
@@ -846,11 +806,7 @@ str_as_unibyte (str, bytes)
     Note: Currently the arg ACCEPT_LATIN_1 is not used.  */
  
  EMACS_INT
-str_to_unibyte (src, dst, chars, accept_latin_1)
-     const unsigned char *src;
-     unsigned char *dst;
-     EMACS_INT chars;
-     int accept_latin_1;
+str_to_unibyte (const unsigned char *src, unsigned char *dst, EMACS_INT chars, int accept_latin_1)
  {
    EMACS_INT i;
  
@@ -870,8 +826,7 @@ str_to_unibyte (src, dst, chars, accept_latin_1)
  
  
  int
-string_count_byte8 (string)
-     Lisp_Object string;
+string_count_byte8 (Lisp_Object string)
  {
    int multibyte = STRING_MULTIBYTE (string);
    int nbytes = SBYTES (string);
@@ -901,8 +856,7 @@ string_count_byte8 (string)
  
  
  Lisp_Object
-string_escape_byte8 (string)
-     Lisp_Object string;
+string_escape_byte8 (Lisp_Object string)
  {
    int nchars = SCHARS (string);
    int nbytes = SBYTES (string);
@@ -972,10 +926,13 @@ usage: (string &rest CHARACTERS)  */)
       int n;
       Lisp_Object *args;
  {
-  int i;
-  unsigned char *buf = (unsigned char *) alloca (MAX_MULTIBYTE_LENGTH * n);
-  unsigned char *p = buf;
-  int c;
+  int i, c;
+  unsigned char *buf, *p;
+  Lisp_Object str;
+  USE_SAFE_ALLOCA;
+
+  SAFE_ALLOCA (buf, unsigned char *, MAX_MULTIBYTE_LENGTH * n);
+  p = buf;
  
    for (i = 0; i < n; i++)
      {
@@ -984,7 +941,9 @@ usage: (string &rest CHARACTERS)  */)
        p += CHAR_STRING (c, p);
      }
  
-  return make_string_from_bytes ((char *) buf, n, p - buf);
+  str = make_string_from_bytes ((char *) buf, n, p - buf);
+  SAFE_FREE ();
+  return str;
  }
  
  DEFUN ("unibyte-string", Funibyte_string, Sunibyte_string, 0, MANY, 0,
@@ -994,10 +953,13 @@ usage: (unibyte-string &rest BYTES)  */)
       int n;
       Lisp_Object *args;
  {
-  int i;
-  unsigned char *buf = (unsigned char *) alloca (n);
-  unsigned char *p = buf;
-  unsigned c;
+  int i, c;
+  unsigned char *buf, *p;
+  Lisp_Object str;
+  USE_SAFE_ALLOCA;
+
+  SAFE_ALLOCA (buf, unsigned char *, n);
+  p = buf;
  
    for (i = 0; i < n; i++)
      {
@@ -1008,7 +970,9 @@ usage: (unibyte-string &rest BYTES)  */)
        *p++ = c;
      }
  
-  return make_string_from_bytes ((char *) buf, n, p - buf);
+  str = make_string_from_bytes ((char *) buf, n, p - buf);
+  SAFE_FREE ();
+  return str;
  }
  
  DEFUN ("char-resolve-modifiers", Fchar_resolve_modifiers,
@@ -1049,7 +1013,7 @@ character is not ASCII nor 8-bit character, an error is signalled.  */)
        if (NILP (position))
         {
           p = PT_ADDR;
-       }         
+       }
        else
         {
           CHECK_NUMBER_COERCE_MARKER (position);
@@ -1079,7 +1043,7 @@ character is not ASCII nor 8-bit character, an error is signalled.  */)
        if (! STRING_MULTIBYTE (string))
         return make_number (*p);
      }
-  c = STRING_CHAR (p, 0);
+  c = STRING_CHAR (p);
    if (CHAR_BYTE8_P (c))
      c = CHAR_TO_BYTE8 (c);
    else if (! ASCII_CHAR_P (c))
@@ -1089,14 +1053,14 @@ character is not ASCII nor 8-bit character, an error is signalled.  */)
  
  
  void
-init_character_once ()
+init_character_once (void)
  {
  }
  
  #ifdef emacs
  
  void
-syms_of_character ()
+syms_of_character (void)
  {
    DEFSYM (Qcharacterp, "characterp");
    DEFSYM (Qauto_fill_chars, "auto-fill-chars");
@@ -1160,14 +1124,15 @@ It has one extra slot whose value is a list of script symbols.  */);
    /* Intern this now in case it isn't already done.
       Setting this variable twice is harmless.
       But don't staticpro it here--that is done in alloc.c.  */
-  Qchar_table_extra_slots = intern ("char-table-extra-slots");
+  Qchar_table_extra_slots = intern_c_string ("char-table-extra-slots");
    DEFSYM (Qchar_script_table, "char-script-table");
    Fput (Qchar_script_table, Qchar_table_extra_slots, make_number (1));
    Vchar_script_table = Fmake_char_table (Qchar_script_table, Qnil);
  
    DEFVAR_LISP ("script-representative-chars", &Vscript_representative_chars,
                doc: /* Alist of scripts vs the representative characters.
-Each element is a cons (SCRIPT . CHARS), where SCRIPT is a script name symbol,
+Each element is a cons (SCRIPT . CHARS).
+SCRIPT is a symbol representing a script or a subgroup of a script.
  CHARS is a list or a vector of characters.
  If it is a list, all characters in the list are necessary for supporting SCRIPT.
  If it is a vector, one of the characters in the vector is necessary.