Fix shrinking of contiguous bytevectors, as from 'get-bytevector-n'.

[bpt/guile.git] / libguile / bytevectors.c
diff --git a/libguile/bytevectors.c b/libguile/bytevectors.c

index 4246f01..b210440 100644 (file)
--- a/libguile/bytevectors.c
+++ b/libguile/bytevectors.c
@@ -1,4 +1,4 @@
-/* Copyright (C) 2009 Free Software Foundation, Inc.
+/* Copyright (C) 2009, 2010, 2011, 2012, 2014 Free Software Foundation, Inc.
   *
   * This library is free software; you can redistribute it and/or
   * modify it under the terms of the GNU Lesser General Public License
@@ -22,6 +22,7 @@
  #endif
  
  #include <alloca.h>
+#include <assert.h>
  
  #include <gmp.h>
  
@@ -30,7 +31,6 @@
  #include "libguile/bytevectors.h"
  #include "libguile/strings.h"
  #include "libguile/validate.h"
-#include "libguile/ieee-754.h"
  #include "libguile/arrays.h"
  #include "libguile/array-handle.h"
  #include "libguile/uniform.h"
@@ -39,6 +39,7 @@
  #include <byteswap.h>
  #include <striconveh.h>
  #include <uniconv.h>
+#include <unistr.h>
  
  #ifdef HAVE_LIMITS_H
  # include <limits.h>
@@ -129,7 +130,7 @@
    SCM_VALIDATE_SYMBOL (3, endianness);                         \
                                                                 \
    {                                                            \
-    _sign long c_value;                                                \
+    scm_t_signed_bits c_value;                                 \
      INT_TYPE (_len, _sign) c_value_short;                      \
                                                                 \
      if (SCM_UNLIKELY (!SCM_I_INUMP (value)))                   \
@@ -154,7 +155,7 @@
    INTEGER_ACCESSOR_PROLOGUE (_len, _sign);                     \
                                                                 \
    {                                                            \
-    _sign long c_value;                                                \
+    scm_t_signed_bits c_value;                                 \
      INT_TYPE (_len, _sign) c_value_short;                      \
                                                                 \
      if (SCM_UNLIKELY (!SCM_I_INUMP (value)))                   \
@@ -176,17 +177,25 @@
  /* Bytevector type.  */
  
  #define SCM_BYTEVECTOR_HEADER_BYTES            \
-  (SCM_BYTEVECTOR_HEADER_SIZE * sizeof (SCM))
+  (SCM_BYTEVECTOR_HEADER_SIZE * sizeof (scm_t_bits))
  
  #define SCM_BYTEVECTOR_SET_LENGTH(_bv, _len)            \
    SCM_SET_CELL_WORD_1 ((_bv), (scm_t_bits) (_len))
-
-#define SCM_BYTEVECTOR_SET_ELEMENT_TYPE(bv, hint)      \
-  SCM_SET_BYTEVECTOR_FLAGS ((bv), (hint))
+#define SCM_BYTEVECTOR_SET_CONTENTS(_bv, _contents)    \
+  SCM_SET_CELL_WORD_2 ((_bv), (scm_t_bits) (_contents))
+#define SCM_BYTEVECTOR_SET_CONTIGUOUS_P(bv, contiguous_p)      \
+  SCM_SET_BYTEVECTOR_FLAGS ((bv),                              \
+                           SCM_BYTEVECTOR_ELEMENT_TYPE (bv)    \
+                           | ((contiguous_p) << 8UL))
+
+#define SCM_BYTEVECTOR_SET_ELEMENT_TYPE(bv, hint)                      \
+  SCM_SET_BYTEVECTOR_FLAGS ((bv),                                      \
+                            (hint)                                     \
+                            | (SCM_BYTEVECTOR_CONTIGUOUS_P (bv) << 8UL))
  #define SCM_BYTEVECTOR_TYPE_SIZE(var)                           \
    (scm_i_array_element_type_sizes[SCM_BYTEVECTOR_ELEMENT_TYPE (var)]/8)
  #define SCM_BYTEVECTOR_TYPED_LENGTH(var)                        \
-  SCM_BYTEVECTOR_LENGTH (var) / SCM_BYTEVECTOR_TYPE_SIZE (var)
+  (SCM_BYTEVECTOR_LENGTH (var) / SCM_BYTEVECTOR_TYPE_SIZE (var))
  
  /* The empty bytevector.  */
  SCM scm_null_bytevector = SCM_UNSPECIFIED;
@@ -210,13 +219,18 @@ make_bytevector (size_t len, scm_t_array_element_type element_type)
      ret = scm_null_bytevector;
    else
      {
+      signed char *contents;
+
        c_len = len * (scm_i_array_element_type_sizes[element_type] / 8);
  
-      ret = PTR2SCM (scm_gc_malloc_pointerless (SCM_BYTEVECTOR_HEADER_BYTES + c_len,
-                                               SCM_GC_BYTEVECTOR));
+      contents = scm_gc_malloc_pointerless (SCM_BYTEVECTOR_HEADER_BYTES + c_len,
+                                           SCM_GC_BYTEVECTOR);
+      ret = PTR2SCM (contents);
+      contents += SCM_BYTEVECTOR_HEADER_BYTES;
  
-      SCM_SET_CELL_TYPE (ret, scm_tc7_bytevector);
        SCM_BYTEVECTOR_SET_LENGTH (ret, c_len);
+      SCM_BYTEVECTOR_SET_CONTENTS (ret, contents);
+      SCM_BYTEVECTOR_SET_CONTIGUOUS_P (ret, 1);
        SCM_BYTEVECTOR_SET_ELEMENT_TYPE (ret, element_type);
      }
  
@@ -224,28 +238,29 @@ make_bytevector (size_t len, scm_t_array_element_type element_type)
  }
  
  /* Return a bytevector of LEN elements of type ELEMENT_TYPE, with element
-   values taken from CONTENTS.  */
+   values taken from CONTENTS.  Assume that the storage for CONTENTS will be
+   automatically reclaimed when it becomes unreachable.  */
  static inline SCM
  make_bytevector_from_buffer (size_t len, void *contents,
                              scm_t_array_element_type element_type)
  {
    SCM ret;
  
-  /* We actually never reuse storage from CONTENTS.  Hans Boehm says in
-     <gc/gc.h> that realloc(3) "shouldn't have been invented" and he may well
-     be right.  */
-  ret = make_bytevector (len, element_type);
-
-  if (len > 0)
+  if (SCM_UNLIKELY (len == 0))
+    ret = make_bytevector (len, element_type);
+  else
      {
        size_t c_len;
  
+      ret = PTR2SCM (scm_gc_malloc (SCM_BYTEVECTOR_HEADER_BYTES,
+                                   SCM_GC_BYTEVECTOR));
+
        c_len = len * (scm_i_array_element_type_sizes[element_type] / 8);
-      memcpy (SCM_BYTEVECTOR_CONTENTS (ret),
-             contents,
-             c_len);
  
-      scm_gc_free (contents, c_len, SCM_GC_BYTEVECTOR);
+      SCM_BYTEVECTOR_SET_LENGTH (ret, c_len);
+      SCM_BYTEVECTOR_SET_CONTENTS (ret, contents);
+      SCM_BYTEVECTOR_SET_CONTIGUOUS_P (ret, 0);
+      SCM_BYTEVECTOR_SET_ELEMENT_TYPE (ret, element_type);
      }
  
    return ret;
@@ -269,7 +284,7 @@ scm_i_make_typed_bytevector (size_t len, scm_t_array_element_type element_type)
  /* Return a bytevector of size LEN made up of CONTENTS.  The area pointed to
     by CONTENTS must have been allocated using `scm_gc_malloc ()'.  */
  SCM
-scm_c_take_bytevector (signed char *contents, size_t len)
+scm_c_take_gc_bytevector (signed char *contents, size_t len)
  {
    return make_bytevector_from_buffer (len, contents, SCM_ARRAY_ELEMENT_TYPE_VU8);
  }
@@ -299,11 +314,27 @@ scm_c_shrink_bytevector (SCM bv, size_t c_new_len)
  
    SCM_BYTEVECTOR_SET_LENGTH (bv, c_new_len);
  
-  /* Resize the existing buffer.  */
-  new_bv = PTR2SCM (scm_gc_realloc (SCM2PTR (bv),
-                                   c_len + SCM_BYTEVECTOR_HEADER_BYTES,
-                                   c_new_len + SCM_BYTEVECTOR_HEADER_BYTES,
-                                   SCM_GC_BYTEVECTOR));
+  if (SCM_BYTEVECTOR_CONTIGUOUS_P (bv))
+    {
+      signed char *c_bv;
+
+      c_bv = scm_gc_realloc (SCM2PTR (bv),
+                            c_len + SCM_BYTEVECTOR_HEADER_BYTES,
+                            c_new_len + SCM_BYTEVECTOR_HEADER_BYTES,
+                            SCM_GC_BYTEVECTOR);
+      new_bv = PTR2SCM (c_bv);
+      SCM_BYTEVECTOR_SET_CONTENTS (new_bv, c_bv + SCM_BYTEVECTOR_HEADER_BYTES);
+    }
+  else
+    {
+      signed char *c_bv;
+
+      c_bv = scm_gc_realloc (SCM_BYTEVECTOR_CONTENTS (bv),
+                            c_len, c_new_len, SCM_GC_BYTEVECTOR);
+      SCM_BYTEVECTOR_SET_CONTENTS (bv, c_bv);
+
+      new_bv = bv;
+    }
  
    return new_bv;
  }
@@ -436,7 +467,7 @@ SCM_DEFINE (scm_make_bytevector, "make-bytevector", 1, 1, 0,
    signed char c_fill = '\0';
  
    SCM_VALIDATE_UINT_COPY (1, len, c_len);
-  if (fill != SCM_UNDEFINED)
+  if (!scm_is_eq (fill, SCM_UNDEFINED))
      {
        int value;
  
@@ -447,7 +478,7 @@ SCM_DEFINE (scm_make_bytevector, "make-bytevector", 1, 1, 0,
      }
  
    bv = make_bytevector (c_len, SCM_ARRAY_ELEMENT_TYPE_VU8);
-  if (fill != SCM_UNDEFINED)
+  if (!scm_is_eq (fill, SCM_UNDEFINED))
      {
        unsigned i;
        signed char *contents;
@@ -456,6 +487,8 @@ SCM_DEFINE (scm_make_bytevector, "make-bytevector", 1, 1, 0,
        for (i = 0; i < c_len; i++)
         contents[i] = c_fill;
      }
+  else
+    memset (SCM_BYTEVECTOR_CONTENTS (bv), 0, c_len);
  
    return bv;
  }
@@ -485,7 +518,8 @@ SCM_DEFINE (scm_bytevector_eq_p, "bytevector=?", 2, 0, 0,
    c_len1 = SCM_BYTEVECTOR_LENGTH (bv1);
    c_len2 = SCM_BYTEVECTOR_LENGTH (bv2);
  
-  if (c_len1 == c_len2)
+  if (c_len1 == c_len2 && (SCM_BYTEVECTOR_ELEMENT_TYPE (bv1)
+                           == SCM_BYTEVECTOR_ELEMENT_TYPE (bv2)))
      {
        signed char *c_bv1, *c_bv2;
  
@@ -550,9 +584,9 @@ SCM_DEFINE (scm_bytevector_copy_x, "bytevector-copy!", 5, 0, 0,
    if (SCM_UNLIKELY (c_target_start + c_len > c_target_len))
      scm_out_of_range (FUNC_NAME, target_start);
  
-  memcpy (c_target + c_target_start,
-         c_source + c_source_start,
-         c_len);
+  memmove (c_target + c_target_start,
+          c_source + c_source_start,
+          c_len);
  
    return SCM_UNSPECIFIED;
  }
@@ -587,23 +621,31 @@ SCM_DEFINE (scm_uniform_array_to_bytevector, "uniform-array->bytevector",
  #define FUNC_NAME s_scm_uniform_array_to_bytevector
  {
    SCM contents, ret;
-  size_t len;
+  size_t len, sz, byte_len;
    scm_t_array_handle h;
-  const void *base;
-  size_t sz;
+  const void *elts;
    
    contents = scm_array_contents (array, SCM_BOOL_T);
    if (scm_is_false (contents))
      scm_wrong_type_arg_msg (FUNC_NAME, 0, array, "uniform contiguous array");
  
    scm_array_get_handle (contents, &h);
+  assert (h.base == 0);
  
-  base = scm_array_handle_uniform_elements (&h);
+  elts = h.elements;
    len = h.dims->inc * (h.dims->ubnd - h.dims->lbnd + 1);
-  sz = scm_array_handle_uniform_element_size (&h);
+  sz = scm_array_handle_uniform_element_bit_size (&h);
+  if (sz >= 8 && ((sz % 8) == 0))
+    byte_len = len * (sz / 8);
+  else if (sz < 8)
+    /* byte_len = ceil (len * sz / 8) */
+    byte_len = (len * sz + 7) / 8;
+  else
+    /* an internal guile error, really */
+    SCM_MISC_ERROR ("uniform elements larger than 8 bits must fill whole bytes", SCM_EOL);
  
-  ret = make_bytevector (len * sz, SCM_ARRAY_ELEMENT_TYPE_VU8);
-  memcpy (SCM_BYTEVECTOR_CONTENTS (ret), base, len * sz);
+  ret = make_bytevector (byte_len, SCM_ARRAY_ELEMENT_TYPE_VU8);
+  memcpy (SCM_BYTEVECTOR_CONTENTS (ret), elts, byte_len);
  
    scm_array_handle_release (&h);
  
@@ -700,7 +742,7 @@ SCM_DEFINE (scm_u8_list_to_bytevector, "u8-list->bytevector", 1, 0, 0,
  
        if (SCM_LIKELY (SCM_I_INUMP (item)))
         {
-         long c_item;
+         scm_t_signed_bits c_item;
  
           c_item = SCM_I_INUM (item);
           if (SCM_LIKELY ((c_item >= 0) && (c_item < 256)))
@@ -916,7 +958,7 @@ bytevector_unsigned_ref (const char *c_bv, size_t c_size, SCM endianness)
  #define GENERIC_INTEGER_SET(_sign)                                     \
    if (c_size < 3)                                                      \
      {                                                                  \
-      _sign int c_value;                                               \
+      scm_t_signed_bits c_value;                                       \
                                                                         \
        if (SCM_UNLIKELY (!SCM_I_INUMP (value)))                         \
         goto range_error;                                               \
@@ -1063,20 +1105,18 @@ SCM_DEFINE (scm_bytevector_sint_set_x, "bytevector-sint-set!", 5, 0, 0,
                                                                         \
    SCM_VALIDATE_BYTEVECTOR (1, bv);                                     \
    SCM_VALIDATE_SYMBOL (2, endianness);                                 \
-  c_size = scm_to_uint (size);                                         \
+  c_size = scm_to_unsigned_integer (size, 1, (size_t) -1);             \
                                                                         \
    c_len = SCM_BYTEVECTOR_LENGTH (bv);                                  \
-  if (SCM_UNLIKELY (c_len == 0))                                       \
+  if (SCM_UNLIKELY (c_len < c_size))                                   \
      lst = SCM_EOL;                                                     \
-  else if (SCM_UNLIKELY (c_len < c_size))                              \
-    scm_out_of_range (FUNC_NAME, size);                                        \
    else                                                                 \
      {                                                                  \
        const char *c_bv;                                                        \
                                                                         \
        c_bv = (char *) SCM_BYTEVECTOR_CONTENTS (bv);                    \
                                                                         \
-      lst = scm_make_list (scm_from_uint (c_len / c_size),             \
+      lst = scm_make_list (scm_from_size_t (c_len / c_size),           \
                            SCM_UNSPECIFIED);                            \
        for (i = 0, pair = lst;                                          \
            i <= c_len - c_size;                                         \
@@ -1530,6 +1570,18 @@ SCM_DEFINE (scm_bytevector_s64_native_set_x, "bytevector-s64-native-set!",
     Section 2.1 of R6RS-lib (in response to
     http://www.r6rs.org/formal-comments/comment-187.txt).  */
  
+union scm_ieee754_float
+{
+  float f;
+  scm_t_uint32 i;
+};
+
+union scm_ieee754_double
+{
+  double d;
+  scm_t_uint64 i;
+};
+
  
  /* Convert to/from a floating-point number with different endianness.  This
     method is probably not the most efficient but it should be portable.  */
@@ -1538,20 +1590,10 @@ static inline void
  float_to_foreign_endianness (union scm_ieee754_float *target,
                              float source)
  {
-  union scm_ieee754_float src;
-
-  src.f = source;
+  union scm_ieee754_float input;
  
-#ifdef WORDS_BIGENDIAN
-  /* Assuming little endian for both byte and word order.  */
-  target->little_endian.negative = src.big_endian.negative;
-  target->little_endian.exponent = src.big_endian.exponent;
-  target->little_endian.mantissa = src.big_endian.mantissa;
-#else
-  target->big_endian.negative = src.little_endian.negative;
-  target->big_endian.exponent = src.little_endian.exponent;
-  target->big_endian.mantissa = src.little_endian.mantissa;
-#endif
+  input.f = source;
+  target->i = bswap_32 (input.i);
  }
  
  static inline float
@@ -1559,16 +1601,7 @@ float_from_foreign_endianness (const union scm_ieee754_float *source)
  {
    union scm_ieee754_float result;
  
-#ifdef WORDS_BIGENDIAN
-  /* Assuming little endian for both byte and word order.  */
-  result.big_endian.negative = source->little_endian.negative;
-  result.big_endian.exponent = source->little_endian.exponent;
-  result.big_endian.mantissa = source->little_endian.mantissa;
-#else
-  result.little_endian.negative = source->big_endian.negative;
-  result.little_endian.exponent = source->big_endian.exponent;
-  result.little_endian.mantissa = source->big_endian.mantissa;
-#endif
+  result.i = bswap_32 (source->i);
  
    return (result.f);
  }
@@ -1577,22 +1610,10 @@ static inline void
  double_to_foreign_endianness (union scm_ieee754_double *target,
                               double source)
  {
-  union scm_ieee754_double src;
-
-  src.d = source;
+  union scm_ieee754_double input;
  
-#ifdef WORDS_BIGENDIAN
-  /* Assuming little endian for both byte and word order.  */
-  target->little_little_endian.negative  = src.big_endian.negative;
-  target->little_little_endian.exponent  = src.big_endian.exponent;
-  target->little_little_endian.mantissa0 = src.big_endian.mantissa0;
-  target->little_little_endian.mantissa1 = src.big_endian.mantissa1;
-#else
-  target->big_endian.negative  = src.little_little_endian.negative;
-  target->big_endian.exponent  = src.little_little_endian.exponent;
-  target->big_endian.mantissa0 = src.little_little_endian.mantissa0;
-  target->big_endian.mantissa1 = src.little_little_endian.mantissa1;
-#endif
+  input.d = source;
+  target->i = bswap_64 (input.i);
  }
  
  static inline double
@@ -1600,18 +1621,7 @@ double_from_foreign_endianness (const union scm_ieee754_double *source)
  {
    union scm_ieee754_double result;
  
-#ifdef WORDS_BIGENDIAN
-  /* Assuming little endian for both byte and word order.  */
-  result.big_endian.negative  = source->little_little_endian.negative;
-  result.big_endian.exponent  = source->little_little_endian.exponent;
-  result.big_endian.mantissa0 = source->little_little_endian.mantissa0;
-  result.big_endian.mantissa1 = source->little_little_endian.mantissa1;
-#else
-  result.little_little_endian.negative  = source->big_endian.negative;
-  result.little_little_endian.exponent  = source->big_endian.exponent;
-  result.little_little_endian.mantissa0 = source->big_endian.mantissa0;
-  result.little_little_endian.mantissa1 = source->big_endian.mantissa1;
-#endif
+  result.i = bswap_64 (source->i);
  
    return (result.d);
  }
@@ -1630,7 +1640,7 @@ double_from_foreign_endianness (const union scm_ieee754_double *source)
  /* FIXME: SCM_VALIDATE_REAL rejects integers, etc. grrr */
  #define VALIDATE_REAL(pos, v) \
    do { \
-    SCM_ASSERT_TYPE (scm_is_true (scm_rational_p (v)), v, pos, FUNC_NAME, "real"); \
+    SCM_ASSERT_TYPE (scm_is_real (v), v, pos, FUNC_NAME, "real"); \
    } while (0)
  
  /* Templace getters and setters.  */
@@ -1862,58 +1872,50 @@ utf_encoding_name (char *name, size_t utf_width, SCM endianness)
  #define MAX_UTF_ENCODING_NAME_LEN  16
  
  /* Produce the body of a `string->utf' function.  */
-#define STRING_TO_UTF(_utf_width)                                      \
-  SCM utf;                                                             \
-  int err;                                                             \
-  char *c_str;                                                         \
-  char c_utf_name[MAX_UTF_ENCODING_NAME_LEN];                          \
-  char *c_utf = NULL, *c_locale;                                       \
-  size_t c_strlen, c_raw_strlen, c_utf_len = 0;                                \
-                                                                       \
-  SCM_VALIDATE_STRING (1, str);                                                \
-  if (endianness == SCM_UNDEFINED)                                     \
-    endianness = scm_sym_big;                                          \
-  else                                                                 \
-    SCM_VALIDATE_SYMBOL (2, endianness);                               \
-                                                                       \
-  c_strlen = scm_c_string_length (str);                                        \
-  c_raw_strlen = c_strlen * ((_utf_width) / 8);                                \
-  do                                                                   \
-    {                                                                  \
-      c_str = (char *) alloca (c_raw_strlen + 1);                      \
-      c_raw_strlen = scm_to_locale_stringbuf (str, c_str, c_strlen);   \
-    }                                                                  \
-  while (c_raw_strlen > c_strlen);                                     \
-  c_str[c_raw_strlen] = '\0';                                          \
-                                                                       \
-  utf_encoding_name (c_utf_name, (_utf_width), endianness);            \
-                                                                       \
-  c_locale = (char *) alloca (strlen (locale_charset ()) + 1);         \
-  strcpy (c_locale, locale_charset ());                                        \
-                                                                       \
-  err = mem_iconveh (c_str, c_raw_strlen,                              \
-                    c_locale, c_utf_name,                              \
-                    iconveh_question_mark, NULL,                       \
-                    &c_utf, &c_utf_len);                               \
-  if (SCM_UNLIKELY (err))                                              \
-    scm_syserror_msg (FUNC_NAME, "failed to convert string: ~A",       \
-                     scm_list_1 (str), err);                           \
-  else                                                                 \
-    {                                                                  \
-      /* C_UTF is null-terminated.  It is malloc(3)-allocated, so we cannot \
-        use `scm_c_take_bytevector ()'.  */                            \
-      scm_dynwind_begin (0);                                           \
-      scm_dynwind_free (c_utf);                                                \
-                                                                       \
-      utf = make_bytevector (c_utf_len,                                        \
-                             SCM_ARRAY_ELEMENT_TYPE_VU8);              \
-      memcpy (SCM_BYTEVECTOR_CONTENTS (utf), c_utf,                    \
-             c_utf_len);                                               \
-                                                                       \
-      scm_dynwind_end ();                                              \
-    }                                                                  \
-                                                                       \
-  return (utf);
+#define STRING_TO_UTF(_utf_width)                                       \
+  SCM utf;                                                              \
+  int err;                                                              \
+  char c_utf_name[MAX_UTF_ENCODING_NAME_LEN];                           \
+  char *c_utf = NULL;                                                   \
+  size_t c_strlen, c_utf_len = 0;                                       \
+                                                                        \
+  SCM_VALIDATE_STRING (1, str);                                         \
+  if (scm_is_eq (endianness, SCM_UNDEFINED))                            \
+    endianness = scm_sym_big;                                           \
+  else                                                                  \
+    SCM_VALIDATE_SYMBOL (2, endianness);                                \
+                                                                        \
+  utf_encoding_name (c_utf_name, (_utf_width), endianness);             \
+                                                                        \
+  c_strlen = scm_i_string_length (str);                                 \
+  if (scm_i_is_narrow_string (str))                                     \
+    {                                                                   \
+      err = mem_iconveh (scm_i_string_chars (str), c_strlen,            \
+                         "ISO-8859-1", c_utf_name,                      \
+                         iconveh_question_mark, NULL,                   \
+                         &c_utf, &c_utf_len);                           \
+      if (SCM_UNLIKELY (err))                                           \
+        scm_syserror_msg (FUNC_NAME, "failed to convert string: ~A",    \
+                          scm_list_1 (str), err);                       \
+    }                                                                   \
+  else                                                                  \
+    {                                                                   \
+      const scm_t_wchar *wbuf = scm_i_string_wide_chars (str);          \
+      c_utf = u32_conv_to_encoding (c_utf_name,                         \
+                                    iconveh_question_mark,              \
+                                    (scm_t_uint32 *) wbuf,              \
+                                    c_strlen, NULL, NULL, &c_utf_len);  \
+      if (SCM_UNLIKELY (c_utf == NULL))                                 \
+        scm_syserror_msg (FUNC_NAME, "failed to convert string: ~A",    \
+                          scm_list_1 (str), errno);                     \
+    }                                                                   \
+  scm_dynwind_begin (0);                                                \
+  scm_dynwind_free (c_utf);                                             \
+  utf = make_bytevector (c_utf_len, SCM_ARRAY_ELEMENT_TYPE_VU8);        \
+  memcpy (SCM_BYTEVECTOR_CONTENTS (utf), c_utf, c_utf_len);             \
+  scm_dynwind_end ();                                                   \
+                                                                        \
+  return (utf); 
  
  
  
@@ -1925,39 +1927,15 @@ SCM_DEFINE (scm_string_to_utf8, "string->utf8",
  #define FUNC_NAME s_scm_string_to_utf8
  {
    SCM utf;
-  char *c_str;
-  uint8_t *c_utf;
-  size_t c_strlen, c_raw_strlen;
+  scm_t_uint8 *c_utf;
+  size_t c_utf_len = 0;
  
    SCM_VALIDATE_STRING (1, str);
  
-  c_strlen = scm_c_string_length (str);
-  c_raw_strlen = c_strlen;
-  do
-    {
-      c_str = (char *) alloca (c_raw_strlen + 1);
-      c_raw_strlen = scm_to_locale_stringbuf (str, c_str, c_strlen);
-    }
-  while (c_raw_strlen > c_strlen);
-  c_str[c_raw_strlen] = '\0';
-
-  c_utf = u8_strconv_from_locale (c_str);
-  if (SCM_UNLIKELY (c_utf == NULL))
-    scm_syserror (FUNC_NAME);
-  else
-    {
-      /* C_UTF is null-terminated.  It is malloc(3)-allocated, so we cannot
-        use `scm_c_take_bytevector ()'.  */
-      scm_dynwind_begin (0);
-      scm_dynwind_free (c_utf);
-
-      utf = make_bytevector (UTF_STRLEN (8, c_utf),
-                            SCM_ARRAY_ELEMENT_TYPE_VU8);
-      memcpy (SCM_BYTEVECTOR_CONTENTS (utf), c_utf,
-             UTF_STRLEN (8, c_utf));
-
-      scm_dynwind_end ();
-    }
+  c_utf = (scm_t_uint8 *) scm_to_utf8_stringn (str, &c_utf_len);
+  utf = make_bytevector (c_utf_len, SCM_ARRAY_ELEMENT_TYPE_VU8);
+  memcpy (SCM_BYTEVECTOR_CONTENTS (utf), c_utf, c_utf_len);
+  free (c_utf);
  
    return (utf);
  }
@@ -1974,6 +1952,14 @@ SCM_DEFINE (scm_string_to_utf16, "string->utf16",
  }
  #undef FUNC_NAME
  
+static void
+swap_u32 (scm_t_wchar *vals, size_t len)
+{
+  size_t n;
+  for (n = 0; n < len; n++)
+    vals[n] = bswap_32 (vals[n]);
+}
+
  SCM_DEFINE (scm_string_to_utf32, "string->utf32",
             1, 1, 0,
             (SCM str, SCM endianness),
@@ -1981,7 +1967,21 @@ SCM_DEFINE (scm_string_to_utf32, "string->utf32",
             "encoding of @var{str}.")
  #define FUNC_NAME s_scm_string_to_utf32
  {
-  STRING_TO_UTF (32);
+  SCM bv;
+  scm_t_wchar *wchars;
+  size_t wchar_len, bytes_len;
+
+  wchars = scm_to_utf32_stringn (str, &wchar_len);
+  bytes_len = wchar_len * sizeof (scm_t_wchar);
+  if (!scm_is_eq (SCM_UNBNDP (endianness) ? scm_endianness_big : endianness,
+                  scm_i_native_endianness))
+    swap_u32 (wchars, wchar_len);
+  
+  bv = make_bytevector (bytes_len, SCM_ARRAY_ELEMENT_TYPE_VU8);
+  memcpy (SCM_BYTEVECTOR_CONTENTS (bv), wchars, bytes_len);
+  free (wchars);
+
+  return bv;
  }
  #undef FUNC_NAME
  
@@ -1991,13 +1991,13 @@ SCM_DEFINE (scm_string_to_utf32, "string->utf32",
  #define UTF_TO_STRING(_utf_width)                                      \
    SCM str = SCM_BOOL_F;                                                        \
    int err;                                                             \
-  char *c_str = NULL, *c_locale;                                       \
+  char *c_str = NULL;                                                   \
    char c_utf_name[MAX_UTF_ENCODING_NAME_LEN];                          \
-  const char *c_utf;                                                   \
-  size_t c_strlen = 0, c_utf_len;                                      \
+  char *c_utf;                                                          \
+  size_t c_strlen = 0, c_utf_len = 0;                                  \
                                                                         \
    SCM_VALIDATE_BYTEVECTOR (1, utf);                                    \
-  if (endianness == SCM_UNDEFINED)                                     \
+  if (scm_is_eq (endianness, SCM_UNDEFINED))                            \
      endianness = scm_sym_big;                                          \
    else                                                                 \
      SCM_VALIDATE_SYMBOL (2, endianness);                               \
@@ -2006,20 +2006,19 @@ SCM_DEFINE (scm_string_to_utf32, "string->utf32",
    c_utf = (char *) SCM_BYTEVECTOR_CONTENTS (utf);                      \
    utf_encoding_name (c_utf_name, (_utf_width), endianness);            \
                                                                         \
-  c_locale = (char *) alloca (strlen (locale_charset ()) + 1);         \
-  strcpy (c_locale, locale_charset ());                                        \
-                                                                       \
    err = mem_iconveh (c_utf, c_utf_len,                                 \
-                    c_utf_name, c_locale,                              \
+                    c_utf_name, "UTF-8",                               \
                      iconveh_question_mark, NULL,                       \
                      &c_str, &c_strlen);                                \
    if (SCM_UNLIKELY (err))                                              \
      scm_syserror_msg (FUNC_NAME, "failed to convert to string: ~A",    \
                       scm_list_1 (utf), err);                           \
    else                                                                 \
-    /* C_STR is null-terminated.  */                                   \
-    str = scm_take_locale_stringn (c_str, c_strlen);                   \
-                                                                       \
+    {                                                                   \
+      str = scm_from_stringn (c_str, c_strlen, "UTF-8",                 \
+                              SCM_FAILED_CONVERSION_ERROR);             \
+      free (c_str);                                                     \
+    }                                                                   \
    return (str);
  
  
@@ -2031,29 +2030,15 @@ SCM_DEFINE (scm_utf8_to_string, "utf8->string",
  #define FUNC_NAME s_scm_utf8_to_string
  {
    SCM str;
-  int err;
-  char *c_str = NULL, *c_locale;
    const char *c_utf;
-  size_t c_utf_len, c_strlen = 0;
+  size_t c_utf_len = 0;
  
    SCM_VALIDATE_BYTEVECTOR (1, utf);
  
    c_utf_len = SCM_BYTEVECTOR_LENGTH (utf);
-
-  c_locale = (char *) alloca (strlen (locale_charset ()) + 1);
-  strcpy (c_locale, locale_charset ());
-
    c_utf = (char *) SCM_BYTEVECTOR_CONTENTS (utf);
-  err = mem_iconveh (c_utf, c_utf_len,
-                    "UTF-8", c_locale,
-                    iconveh_question_mark, NULL,
-                    &c_str, &c_strlen);
-  if (SCM_UNLIKELY (err))
-    scm_syserror_msg (FUNC_NAME, "failed to convert to string: ~A",
-                     scm_list_1 (utf), err);
-  else
-    /* C_STR is null-terminated.  */
-    str = scm_take_locale_stringn (c_str, c_strlen);
+  str = scm_from_stringn (c_utf, c_utf_len, "UTF-8",
+                          SCM_FAILED_CONVERSION_ERROR);
  
    return (str);
  }
@@ -2081,30 +2066,59 @@ SCM_DEFINE (scm_utf32_to_string, "utf32->string",
  }
  #undef FUNC_NAME
  
-
  \f
  /* Bytevectors as generalized vectors & arrays.  */
  
+#define COMPLEX_ACCESSOR_PROLOGUE(_type)                       \
+  size_t c_len, c_index;                                       \
+  char *c_bv;                                                  \
+                                                               \
+  SCM_VALIDATE_BYTEVECTOR (1, bv);                             \
+  c_index = scm_to_size_t (index);                             \
+                                                               \
+  c_len = SCM_BYTEVECTOR_LENGTH (bv);                          \
+  c_bv = (char *) SCM_BYTEVECTOR_CONTENTS (bv);                        \
+                                                               \
+  if (SCM_UNLIKELY (c_index + 2 * sizeof (_type) - 1 >= c_len))        \
+    scm_out_of_range (FUNC_NAME, index);
+
+/* Template for native access to complex numbers of type TYPE.  */
+#define COMPLEX_NATIVE_REF(_type)                                      \
+  SCM result;                                                          \
+                                                                       \
+  COMPLEX_ACCESSOR_PROLOGUE (_type);                                   \
+                                                                       \
+  {                                                                    \
+    _type real, imag;                                                  \
+                                                                       \
+    memcpy (&real, &c_bv[c_index], sizeof (_type));                    \
+    memcpy (&imag, &c_bv[c_index + sizeof (_type)], sizeof (_type));   \
+                                                                       \
+    result = scm_c_make_rectangular (real, imag);                      \
+  }                                                                    \
+                                                                       \
+  return result;
  
  static SCM
-bytevector_ref_c32 (SCM bv, SCM idx)
-{ /* FIXME add some checks */
-  const float *contents = (const float*)SCM_BYTEVECTOR_CONTENTS (bv);
-  size_t i = scm_to_size_t (idx);
-  return scm_c_make_rectangular (contents[i/8], contents[i/8 + 1]);
+bytevector_ref_c32 (SCM bv, SCM index)
+#define FUNC_NAME "bytevector_ref_c32"
+{
+  COMPLEX_NATIVE_REF (float);
  }
+#undef FUNC_NAME
  
  static SCM
-bytevector_ref_c64 (SCM bv, SCM idx)
-{ /* FIXME add some checks */
-  const double *contents = (const double*)SCM_BYTEVECTOR_CONTENTS (bv);
-  size_t i = scm_to_size_t (idx);
-  return scm_c_make_rectangular (contents[i/16], contents[i/16 + 1]);
+bytevector_ref_c64 (SCM bv, SCM index)
+#define FUNC_NAME "bytevector_ref_c64"
+{
+  COMPLEX_NATIVE_REF (double);
  }
+#undef FUNC_NAME
  
  typedef SCM (*scm_t_bytevector_ref_fn)(SCM, SCM);
  
-const scm_t_bytevector_ref_fn bytevector_ref_fns[SCM_ARRAY_ELEMENT_TYPE_LAST + 1] = 
+static const scm_t_bytevector_ref_fn
+bytevector_ref_fns[SCM_ARRAY_ELEMENT_TYPE_LAST + 1] =
  {
    NULL, /* SCM */
    NULL, /* CHAR */
@@ -2136,25 +2150,36 @@ bv_handle_ref (scm_t_array_handle *h, size_t index)
    return ref_fn (h->array, byte_index);
  }
  
-static SCM
-bytevector_set_c32 (SCM bv, SCM idx, SCM val)
-{ /* checks are unnecessary here */
-  float *contents = (float*)SCM_BYTEVECTOR_CONTENTS (bv);
-  size_t i = scm_to_size_t (idx);
-  contents[i/8] = scm_c_real_part (val);
-  contents[i/8 + 1] = scm_c_imag_part (val);
+/* Template for native modification of complex numbers of type TYPE.  */
+#define COMPLEX_NATIVE_SET(_type)                                      \
+  COMPLEX_ACCESSOR_PROLOGUE (_type);                                   \
+                                                                       \
+  {                                                                    \
+    _type real, imag;                                                  \
+    real = scm_c_real_part (value);                                    \
+    imag = scm_c_imag_part (value);                                    \
+                                                                       \
+    memcpy (&c_bv[c_index], &real, sizeof (_type));                    \
+    memcpy (&c_bv[c_index + sizeof (_type)], &imag, sizeof (_type));   \
+  }                                                                    \
+                                                                       \
    return SCM_UNSPECIFIED;
+
+static SCM
+bytevector_set_c32 (SCM bv, SCM index, SCM value)
+#define FUNC_NAME "bytevector_set_c32"
+{
+  COMPLEX_NATIVE_SET (float);
  }
+#undef FUNC_NAME
  
  static SCM
-bytevector_set_c64 (SCM bv, SCM idx, SCM val)
-{ /* checks are unnecessary here */
-  double *contents = (double*)SCM_BYTEVECTOR_CONTENTS (bv);
-  size_t i = scm_to_size_t (idx);
-  contents[i/16] = scm_c_real_part (val);
-  contents[i/16 + 1] = scm_c_imag_part (val);
-  return SCM_UNSPECIFIED;
+bytevector_set_c64 (SCM bv, SCM index, SCM value)
+#define FUNC_NAME "bytevector_set_c64"
+{
+  COMPLEX_NATIVE_SET (double);
  }
+#undef FUNC_NAME
  
  typedef SCM (*scm_t_bytevector_set_fn)(SCM, SCM, SCM);
  
@@ -2210,18 +2235,18 @@ void
  scm_bootstrap_bytevectors (void)
  {
    /* This must be instantiated here because the generalized-vector API may
-     want to access bytevectors even though `(rnrs bytevector)' hasn't been
+     want to access bytevectors even though `(rnrs bytevectors)' hasn't been
       loaded.  */
-  scm_null_bytevector =
-    scm_gc_protect_object (make_bytevector (0, SCM_ARRAY_ELEMENT_TYPE_VU8));
+  scm_null_bytevector = make_bytevector (0, SCM_ARRAY_ELEMENT_TYPE_VU8);
  
  #ifdef WORDS_BIGENDIAN
-  scm_i_native_endianness = scm_permanent_object (scm_from_locale_symbol ("big"));
+  scm_i_native_endianness = scm_from_latin1_symbol ("big");
  #else
-  scm_i_native_endianness = scm_permanent_object (scm_from_locale_symbol ("little"));
+  scm_i_native_endianness = scm_from_latin1_symbol ("little");
  #endif
  
-  scm_c_register_extension ("libguile", "scm_init_bytevectors",
+  scm_c_register_extension ("libguile-" SCM_EFFECTIVE_VERSION,
+                            "scm_init_bytevectors",
                             (scm_t_extension_init_func) scm_init_bytevectors,
                             NULL);