Fix shrinking of contiguous bytevectors, as from 'get-bytevector-n'.
[bpt/guile.git] / libguile / bytevectors.c
index 4246f01..b210440 100644 (file)
@@ -1,4 +1,4 @@
-/* Copyright (C) 2009 Free Software Foundation, Inc.
+/* Copyright (C) 2009, 2010, 2011, 2012, 2014 Free Software Foundation, Inc.
  *
  * This library is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public License
@@ -22,6 +22,7 @@
 #endif
 
 #include <alloca.h>
+#include <assert.h>
 
 #include <gmp.h>
 
@@ -30,7 +31,6 @@
 #include "libguile/bytevectors.h"
 #include "libguile/strings.h"
 #include "libguile/validate.h"
-#include "libguile/ieee-754.h"
 #include "libguile/arrays.h"
 #include "libguile/array-handle.h"
 #include "libguile/uniform.h"
@@ -39,6 +39,7 @@
 #include <byteswap.h>
 #include <striconveh.h>
 #include <uniconv.h>
+#include <unistr.h>
 
 #ifdef HAVE_LIMITS_H
 # include <limits.h>
   SCM_VALIDATE_SYMBOL (3, endianness);                         \
                                                                \
   {                                                            \
-    _sign long c_value;                                                \
+    scm_t_signed_bits c_value;                                 \
     INT_TYPE (_len, _sign) c_value_short;                      \
                                                                \
     if (SCM_UNLIKELY (!SCM_I_INUMP (value)))                   \
   INTEGER_ACCESSOR_PROLOGUE (_len, _sign);                     \
                                                                \
   {                                                            \
-    _sign long c_value;                                                \
+    scm_t_signed_bits c_value;                                 \
     INT_TYPE (_len, _sign) c_value_short;                      \
                                                                \
     if (SCM_UNLIKELY (!SCM_I_INUMP (value)))                   \
 /* Bytevector type.  */
 
 #define SCM_BYTEVECTOR_HEADER_BYTES            \
-  (SCM_BYTEVECTOR_HEADER_SIZE * sizeof (SCM))
+  (SCM_BYTEVECTOR_HEADER_SIZE * sizeof (scm_t_bits))
 
 #define SCM_BYTEVECTOR_SET_LENGTH(_bv, _len)            \
   SCM_SET_CELL_WORD_1 ((_bv), (scm_t_bits) (_len))
-
-#define SCM_BYTEVECTOR_SET_ELEMENT_TYPE(bv, hint)      \
-  SCM_SET_BYTEVECTOR_FLAGS ((bv), (hint))
+#define SCM_BYTEVECTOR_SET_CONTENTS(_bv, _contents)    \
+  SCM_SET_CELL_WORD_2 ((_bv), (scm_t_bits) (_contents))
+#define SCM_BYTEVECTOR_SET_CONTIGUOUS_P(bv, contiguous_p)      \
+  SCM_SET_BYTEVECTOR_FLAGS ((bv),                              \
+                           SCM_BYTEVECTOR_ELEMENT_TYPE (bv)    \
+                           | ((contiguous_p) << 8UL))
+
+#define SCM_BYTEVECTOR_SET_ELEMENT_TYPE(bv, hint)                      \
+  SCM_SET_BYTEVECTOR_FLAGS ((bv),                                      \
+                            (hint)                                     \
+                            | (SCM_BYTEVECTOR_CONTIGUOUS_P (bv) << 8UL))
 #define SCM_BYTEVECTOR_TYPE_SIZE(var)                           \
   (scm_i_array_element_type_sizes[SCM_BYTEVECTOR_ELEMENT_TYPE (var)]/8)
 #define SCM_BYTEVECTOR_TYPED_LENGTH(var)                        \
-  SCM_BYTEVECTOR_LENGTH (var) / SCM_BYTEVECTOR_TYPE_SIZE (var)
+  (SCM_BYTEVECTOR_LENGTH (var) / SCM_BYTEVECTOR_TYPE_SIZE (var))
 
 /* The empty bytevector.  */
 SCM scm_null_bytevector = SCM_UNSPECIFIED;
@@ -210,13 +219,18 @@ make_bytevector (size_t len, scm_t_array_element_type element_type)
     ret = scm_null_bytevector;
   else
     {
+      signed char *contents;
+
       c_len = len * (scm_i_array_element_type_sizes[element_type] / 8);
 
-      ret = PTR2SCM (scm_gc_malloc_pointerless (SCM_BYTEVECTOR_HEADER_BYTES + c_len,
-                                               SCM_GC_BYTEVECTOR));
+      contents = scm_gc_malloc_pointerless (SCM_BYTEVECTOR_HEADER_BYTES + c_len,
+                                           SCM_GC_BYTEVECTOR);
+      ret = PTR2SCM (contents);
+      contents += SCM_BYTEVECTOR_HEADER_BYTES;
 
-      SCM_SET_CELL_TYPE (ret, scm_tc7_bytevector);
       SCM_BYTEVECTOR_SET_LENGTH (ret, c_len);
+      SCM_BYTEVECTOR_SET_CONTENTS (ret, contents);
+      SCM_BYTEVECTOR_SET_CONTIGUOUS_P (ret, 1);
       SCM_BYTEVECTOR_SET_ELEMENT_TYPE (ret, element_type);
     }
 
@@ -224,28 +238,29 @@ make_bytevector (size_t len, scm_t_array_element_type element_type)
 }
 
 /* Return a bytevector of LEN elements of type ELEMENT_TYPE, with element
-   values taken from CONTENTS.  */
+   values taken from CONTENTS.  Assume that the storage for CONTENTS will be
+   automatically reclaimed when it becomes unreachable.  */
 static inline SCM
 make_bytevector_from_buffer (size_t len, void *contents,
                             scm_t_array_element_type element_type)
 {
   SCM ret;
 
-  /* We actually never reuse storage from CONTENTS.  Hans Boehm says in
-     <gc/gc.h> that realloc(3) "shouldn't have been invented" and he may well
-     be right.  */
-  ret = make_bytevector (len, element_type);
-
-  if (len > 0)
+  if (SCM_UNLIKELY (len == 0))
+    ret = make_bytevector (len, element_type);
+  else
     {
       size_t c_len;
 
+      ret = PTR2SCM (scm_gc_malloc (SCM_BYTEVECTOR_HEADER_BYTES,
+                                   SCM_GC_BYTEVECTOR));
+
       c_len = len * (scm_i_array_element_type_sizes[element_type] / 8);
-      memcpy (SCM_BYTEVECTOR_CONTENTS (ret),
-             contents,
-             c_len);
 
-      scm_gc_free (contents, c_len, SCM_GC_BYTEVECTOR);
+      SCM_BYTEVECTOR_SET_LENGTH (ret, c_len);
+      SCM_BYTEVECTOR_SET_CONTENTS (ret, contents);
+      SCM_BYTEVECTOR_SET_CONTIGUOUS_P (ret, 0);
+      SCM_BYTEVECTOR_SET_ELEMENT_TYPE (ret, element_type);
     }
 
   return ret;
@@ -269,7 +284,7 @@ scm_i_make_typed_bytevector (size_t len, scm_t_array_element_type element_type)
 /* Return a bytevector of size LEN made up of CONTENTS.  The area pointed to
    by CONTENTS must have been allocated using `scm_gc_malloc ()'.  */
 SCM
-scm_c_take_bytevector (signed char *contents, size_t len)
+scm_c_take_gc_bytevector (signed char *contents, size_t len)
 {
   return make_bytevector_from_buffer (len, contents, SCM_ARRAY_ELEMENT_TYPE_VU8);
 }
@@ -299,11 +314,27 @@ scm_c_shrink_bytevector (SCM bv, size_t c_new_len)
 
   SCM_BYTEVECTOR_SET_LENGTH (bv, c_new_len);
 
-  /* Resize the existing buffer.  */
-  new_bv = PTR2SCM (scm_gc_realloc (SCM2PTR (bv),
-                                   c_len + SCM_BYTEVECTOR_HEADER_BYTES,
-                                   c_new_len + SCM_BYTEVECTOR_HEADER_BYTES,
-                                   SCM_GC_BYTEVECTOR));
+  if (SCM_BYTEVECTOR_CONTIGUOUS_P (bv))
+    {
+      signed char *c_bv;
+
+      c_bv = scm_gc_realloc (SCM2PTR (bv),
+                            c_len + SCM_BYTEVECTOR_HEADER_BYTES,
+                            c_new_len + SCM_BYTEVECTOR_HEADER_BYTES,
+                            SCM_GC_BYTEVECTOR);
+      new_bv = PTR2SCM (c_bv);
+      SCM_BYTEVECTOR_SET_CONTENTS (new_bv, c_bv + SCM_BYTEVECTOR_HEADER_BYTES);
+    }
+  else
+    {
+      signed char *c_bv;
+
+      c_bv = scm_gc_realloc (SCM_BYTEVECTOR_CONTENTS (bv),
+                            c_len, c_new_len, SCM_GC_BYTEVECTOR);
+      SCM_BYTEVECTOR_SET_CONTENTS (bv, c_bv);
+
+      new_bv = bv;
+    }
 
   return new_bv;
 }
@@ -436,7 +467,7 @@ SCM_DEFINE (scm_make_bytevector, "make-bytevector", 1, 1, 0,
   signed char c_fill = '\0';
 
   SCM_VALIDATE_UINT_COPY (1, len, c_len);
-  if (fill != SCM_UNDEFINED)
+  if (!scm_is_eq (fill, SCM_UNDEFINED))
     {
       int value;
 
@@ -447,7 +478,7 @@ SCM_DEFINE (scm_make_bytevector, "make-bytevector", 1, 1, 0,
     }
 
   bv = make_bytevector (c_len, SCM_ARRAY_ELEMENT_TYPE_VU8);
-  if (fill != SCM_UNDEFINED)
+  if (!scm_is_eq (fill, SCM_UNDEFINED))
     {
       unsigned i;
       signed char *contents;
@@ -456,6 +487,8 @@ SCM_DEFINE (scm_make_bytevector, "make-bytevector", 1, 1, 0,
       for (i = 0; i < c_len; i++)
        contents[i] = c_fill;
     }
+  else
+    memset (SCM_BYTEVECTOR_CONTENTS (bv), 0, c_len);
 
   return bv;
 }
@@ -485,7 +518,8 @@ SCM_DEFINE (scm_bytevector_eq_p, "bytevector=?", 2, 0, 0,
   c_len1 = SCM_BYTEVECTOR_LENGTH (bv1);
   c_len2 = SCM_BYTEVECTOR_LENGTH (bv2);
 
-  if (c_len1 == c_len2)
+  if (c_len1 == c_len2 && (SCM_BYTEVECTOR_ELEMENT_TYPE (bv1)
+                           == SCM_BYTEVECTOR_ELEMENT_TYPE (bv2)))
     {
       signed char *c_bv1, *c_bv2;
 
@@ -550,9 +584,9 @@ SCM_DEFINE (scm_bytevector_copy_x, "bytevector-copy!", 5, 0, 0,
   if (SCM_UNLIKELY (c_target_start + c_len > c_target_len))
     scm_out_of_range (FUNC_NAME, target_start);
 
-  memcpy (c_target + c_target_start,
-         c_source + c_source_start,
-         c_len);
+  memmove (c_target + c_target_start,
+          c_source + c_source_start,
+          c_len);
 
   return SCM_UNSPECIFIED;
 }
@@ -587,23 +621,31 @@ SCM_DEFINE (scm_uniform_array_to_bytevector, "uniform-array->bytevector",
 #define FUNC_NAME s_scm_uniform_array_to_bytevector
 {
   SCM contents, ret;
-  size_t len;
+  size_t len, sz, byte_len;
   scm_t_array_handle h;
-  const void *base;
-  size_t sz;
+  const void *elts;
   
   contents = scm_array_contents (array, SCM_BOOL_T);
   if (scm_is_false (contents))
     scm_wrong_type_arg_msg (FUNC_NAME, 0, array, "uniform contiguous array");
 
   scm_array_get_handle (contents, &h);
+  assert (h.base == 0);
 
-  base = scm_array_handle_uniform_elements (&h);
+  elts = h.elements;
   len = h.dims->inc * (h.dims->ubnd - h.dims->lbnd + 1);
-  sz = scm_array_handle_uniform_element_size (&h);
+  sz = scm_array_handle_uniform_element_bit_size (&h);
+  if (sz >= 8 && ((sz % 8) == 0))
+    byte_len = len * (sz / 8);
+  else if (sz < 8)
+    /* byte_len = ceil (len * sz / 8) */
+    byte_len = (len * sz + 7) / 8;
+  else
+    /* an internal guile error, really */
+    SCM_MISC_ERROR ("uniform elements larger than 8 bits must fill whole bytes", SCM_EOL);
 
-  ret = make_bytevector (len * sz, SCM_ARRAY_ELEMENT_TYPE_VU8);
-  memcpy (SCM_BYTEVECTOR_CONTENTS (ret), base, len * sz);
+  ret = make_bytevector (byte_len, SCM_ARRAY_ELEMENT_TYPE_VU8);
+  memcpy (SCM_BYTEVECTOR_CONTENTS (ret), elts, byte_len);
 
   scm_array_handle_release (&h);
 
@@ -700,7 +742,7 @@ SCM_DEFINE (scm_u8_list_to_bytevector, "u8-list->bytevector", 1, 0, 0,
 
       if (SCM_LIKELY (SCM_I_INUMP (item)))
        {
-         long c_item;
+         scm_t_signed_bits c_item;
 
          c_item = SCM_I_INUM (item);
          if (SCM_LIKELY ((c_item >= 0) && (c_item < 256)))
@@ -916,7 +958,7 @@ bytevector_unsigned_ref (const char *c_bv, size_t c_size, SCM endianness)
 #define GENERIC_INTEGER_SET(_sign)                                     \
   if (c_size < 3)                                                      \
     {                                                                  \
-      _sign int c_value;                                               \
+      scm_t_signed_bits c_value;                                       \
                                                                        \
       if (SCM_UNLIKELY (!SCM_I_INUMP (value)))                         \
        goto range_error;                                               \
@@ -1063,20 +1105,18 @@ SCM_DEFINE (scm_bytevector_sint_set_x, "bytevector-sint-set!", 5, 0, 0,
                                                                        \
   SCM_VALIDATE_BYTEVECTOR (1, bv);                                     \
   SCM_VALIDATE_SYMBOL (2, endianness);                                 \
-  c_size = scm_to_uint (size);                                         \
+  c_size = scm_to_unsigned_integer (size, 1, (size_t) -1);             \
                                                                        \
   c_len = SCM_BYTEVECTOR_LENGTH (bv);                                  \
-  if (SCM_UNLIKELY (c_len == 0))                                       \
+  if (SCM_UNLIKELY (c_len < c_size))                                   \
     lst = SCM_EOL;                                                     \
-  else if (SCM_UNLIKELY (c_len < c_size))                              \
-    scm_out_of_range (FUNC_NAME, size);                                        \
   else                                                                 \
     {                                                                  \
       const char *c_bv;                                                        \
                                                                        \
       c_bv = (char *) SCM_BYTEVECTOR_CONTENTS (bv);                    \
                                                                        \
-      lst = scm_make_list (scm_from_uint (c_len / c_size),             \
+      lst = scm_make_list (scm_from_size_t (c_len / c_size),           \
                           SCM_UNSPECIFIED);                            \
       for (i = 0, pair = lst;                                          \
           i <= c_len - c_size;                                         \
@@ -1530,6 +1570,18 @@ SCM_DEFINE (scm_bytevector_s64_native_set_x, "bytevector-s64-native-set!",
    Section 2.1 of R6RS-lib (in response to
    http://www.r6rs.org/formal-comments/comment-187.txt).  */
 
+union scm_ieee754_float
+{
+  float f;
+  scm_t_uint32 i;
+};
+
+union scm_ieee754_double
+{
+  double d;
+  scm_t_uint64 i;
+};
+
 
 /* Convert to/from a floating-point number with different endianness.  This
    method is probably not the most efficient but it should be portable.  */
@@ -1538,20 +1590,10 @@ static inline void
 float_to_foreign_endianness (union scm_ieee754_float *target,
                             float source)
 {
-  union scm_ieee754_float src;
-
-  src.f = source;
+  union scm_ieee754_float input;
 
-#ifdef WORDS_BIGENDIAN
-  /* Assuming little endian for both byte and word order.  */
-  target->little_endian.negative = src.big_endian.negative;
-  target->little_endian.exponent = src.big_endian.exponent;
-  target->little_endian.mantissa = src.big_endian.mantissa;
-#else
-  target->big_endian.negative = src.little_endian.negative;
-  target->big_endian.exponent = src.little_endian.exponent;
-  target->big_endian.mantissa = src.little_endian.mantissa;
-#endif
+  input.f = source;
+  target->i = bswap_32 (input.i);
 }
 
 static inline float
@@ -1559,16 +1601,7 @@ float_from_foreign_endianness (const union scm_ieee754_float *source)
 {
   union scm_ieee754_float result;
 
-#ifdef WORDS_BIGENDIAN
-  /* Assuming little endian for both byte and word order.  */
-  result.big_endian.negative = source->little_endian.negative;
-  result.big_endian.exponent = source->little_endian.exponent;
-  result.big_endian.mantissa = source->little_endian.mantissa;
-#else
-  result.little_endian.negative = source->big_endian.negative;
-  result.little_endian.exponent = source->big_endian.exponent;
-  result.little_endian.mantissa = source->big_endian.mantissa;
-#endif
+  result.i = bswap_32 (source->i);
 
   return (result.f);
 }
@@ -1577,22 +1610,10 @@ static inline void
 double_to_foreign_endianness (union scm_ieee754_double *target,
                              double source)
 {
-  union scm_ieee754_double src;
-
-  src.d = source;
+  union scm_ieee754_double input;
 
-#ifdef WORDS_BIGENDIAN
-  /* Assuming little endian for both byte and word order.  */
-  target->little_little_endian.negative  = src.big_endian.negative;
-  target->little_little_endian.exponent  = src.big_endian.exponent;
-  target->little_little_endian.mantissa0 = src.big_endian.mantissa0;
-  target->little_little_endian.mantissa1 = src.big_endian.mantissa1;
-#else
-  target->big_endian.negative  = src.little_little_endian.negative;
-  target->big_endian.exponent  = src.little_little_endian.exponent;
-  target->big_endian.mantissa0 = src.little_little_endian.mantissa0;
-  target->big_endian.mantissa1 = src.little_little_endian.mantissa1;
-#endif
+  input.d = source;
+  target->i = bswap_64 (input.i);
 }
 
 static inline double
@@ -1600,18 +1621,7 @@ double_from_foreign_endianness (const union scm_ieee754_double *source)
 {
   union scm_ieee754_double result;
 
-#ifdef WORDS_BIGENDIAN
-  /* Assuming little endian for both byte and word order.  */
-  result.big_endian.negative  = source->little_little_endian.negative;
-  result.big_endian.exponent  = source->little_little_endian.exponent;
-  result.big_endian.mantissa0 = source->little_little_endian.mantissa0;
-  result.big_endian.mantissa1 = source->little_little_endian.mantissa1;
-#else
-  result.little_little_endian.negative  = source->big_endian.negative;
-  result.little_little_endian.exponent  = source->big_endian.exponent;
-  result.little_little_endian.mantissa0 = source->big_endian.mantissa0;
-  result.little_little_endian.mantissa1 = source->big_endian.mantissa1;
-#endif
+  result.i = bswap_64 (source->i);
 
   return (result.d);
 }
@@ -1630,7 +1640,7 @@ double_from_foreign_endianness (const union scm_ieee754_double *source)
 /* FIXME: SCM_VALIDATE_REAL rejects integers, etc. grrr */
 #define VALIDATE_REAL(pos, v) \
   do { \
-    SCM_ASSERT_TYPE (scm_is_true (scm_rational_p (v)), v, pos, FUNC_NAME, "real"); \
+    SCM_ASSERT_TYPE (scm_is_real (v), v, pos, FUNC_NAME, "real"); \
   } while (0)
 
 /* Templace getters and setters.  */
@@ -1862,58 +1872,50 @@ utf_encoding_name (char *name, size_t utf_width, SCM endianness)
 #define MAX_UTF_ENCODING_NAME_LEN  16
 
 /* Produce the body of a `string->utf' function.  */
-#define STRING_TO_UTF(_utf_width)                                      \
-  SCM utf;                                                             \
-  int err;                                                             \
-  char *c_str;                                                         \
-  char c_utf_name[MAX_UTF_ENCODING_NAME_LEN];                          \
-  char *c_utf = NULL, *c_locale;                                       \
-  size_t c_strlen, c_raw_strlen, c_utf_len = 0;                                \
-                                                                       \
-  SCM_VALIDATE_STRING (1, str);                                                \
-  if (endianness == SCM_UNDEFINED)                                     \
-    endianness = scm_sym_big;                                          \
-  else                                                                 \
-    SCM_VALIDATE_SYMBOL (2, endianness);                               \
-                                                                       \
-  c_strlen = scm_c_string_length (str);                                        \
-  c_raw_strlen = c_strlen * ((_utf_width) / 8);                                \
-  do                                                                   \
-    {                                                                  \
-      c_str = (char *) alloca (c_raw_strlen + 1);                      \
-      c_raw_strlen = scm_to_locale_stringbuf (str, c_str, c_strlen);   \
-    }                                                                  \
-  while (c_raw_strlen > c_strlen);                                     \
-  c_str[c_raw_strlen] = '\0';                                          \
-                                                                       \
-  utf_encoding_name (c_utf_name, (_utf_width), endianness);            \
-                                                                       \
-  c_locale = (char *) alloca (strlen (locale_charset ()) + 1);         \
-  strcpy (c_locale, locale_charset ());                                        \
-                                                                       \
-  err = mem_iconveh (c_str, c_raw_strlen,                              \
-                    c_locale, c_utf_name,                              \
-                    iconveh_question_mark, NULL,                       \
-                    &c_utf, &c_utf_len);                               \
-  if (SCM_UNLIKELY (err))                                              \
-    scm_syserror_msg (FUNC_NAME, "failed to convert string: ~A",       \
-                     scm_list_1 (str), err);                           \
-  else                                                                 \
-    {                                                                  \
-      /* C_UTF is null-terminated.  It is malloc(3)-allocated, so we cannot \
-        use `scm_c_take_bytevector ()'.  */                            \
-      scm_dynwind_begin (0);                                           \
-      scm_dynwind_free (c_utf);                                                \
-                                                                       \
-      utf = make_bytevector (c_utf_len,                                        \
-                             SCM_ARRAY_ELEMENT_TYPE_VU8);              \
-      memcpy (SCM_BYTEVECTOR_CONTENTS (utf), c_utf,                    \
-             c_utf_len);                                               \
-                                                                       \
-      scm_dynwind_end ();                                              \
-    }                                                                  \
-                                                                       \
-  return (utf);
+#define STRING_TO_UTF(_utf_width)                                       \
+  SCM utf;                                                              \
+  int err;                                                              \
+  char c_utf_name[MAX_UTF_ENCODING_NAME_LEN];                           \
+  char *c_utf = NULL;                                                   \
+  size_t c_strlen, c_utf_len = 0;                                       \
+                                                                        \
+  SCM_VALIDATE_STRING (1, str);                                         \
+  if (scm_is_eq (endianness, SCM_UNDEFINED))                            \
+    endianness = scm_sym_big;                                           \
+  else                                                                  \
+    SCM_VALIDATE_SYMBOL (2, endianness);                                \
+                                                                        \
+  utf_encoding_name (c_utf_name, (_utf_width), endianness);             \
+                                                                        \
+  c_strlen = scm_i_string_length (str);                                 \
+  if (scm_i_is_narrow_string (str))                                     \
+    {                                                                   \
+      err = mem_iconveh (scm_i_string_chars (str), c_strlen,            \
+                         "ISO-8859-1", c_utf_name,                      \
+                         iconveh_question_mark, NULL,                   \
+                         &c_utf, &c_utf_len);                           \
+      if (SCM_UNLIKELY (err))                                           \
+        scm_syserror_msg (FUNC_NAME, "failed to convert string: ~A",    \
+                          scm_list_1 (str), err);                       \
+    }                                                                   \
+  else                                                                  \
+    {                                                                   \
+      const scm_t_wchar *wbuf = scm_i_string_wide_chars (str);          \
+      c_utf = u32_conv_to_encoding (c_utf_name,                         \
+                                    iconveh_question_mark,              \
+                                    (scm_t_uint32 *) wbuf,              \
+                                    c_strlen, NULL, NULL, &c_utf_len);  \
+      if (SCM_UNLIKELY (c_utf == NULL))                                 \
+        scm_syserror_msg (FUNC_NAME, "failed to convert string: ~A",    \
+                          scm_list_1 (str), errno);                     \
+    }                                                                   \
+  scm_dynwind_begin (0);                                                \
+  scm_dynwind_free (c_utf);                                             \
+  utf = make_bytevector (c_utf_len, SCM_ARRAY_ELEMENT_TYPE_VU8);        \
+  memcpy (SCM_BYTEVECTOR_CONTENTS (utf), c_utf, c_utf_len);             \
+  scm_dynwind_end ();                                                   \
+                                                                        \
+  return (utf); 
 
 
 
@@ -1925,39 +1927,15 @@ SCM_DEFINE (scm_string_to_utf8, "string->utf8",
 #define FUNC_NAME s_scm_string_to_utf8
 {
   SCM utf;
-  char *c_str;
-  uint8_t *c_utf;
-  size_t c_strlen, c_raw_strlen;
+  scm_t_uint8 *c_utf;
+  size_t c_utf_len = 0;
 
   SCM_VALIDATE_STRING (1, str);
 
-  c_strlen = scm_c_string_length (str);
-  c_raw_strlen = c_strlen;
-  do
-    {
-      c_str = (char *) alloca (c_raw_strlen + 1);
-      c_raw_strlen = scm_to_locale_stringbuf (str, c_str, c_strlen);
-    }
-  while (c_raw_strlen > c_strlen);
-  c_str[c_raw_strlen] = '\0';
-
-  c_utf = u8_strconv_from_locale (c_str);
-  if (SCM_UNLIKELY (c_utf == NULL))
-    scm_syserror (FUNC_NAME);
-  else
-    {
-      /* C_UTF is null-terminated.  It is malloc(3)-allocated, so we cannot
-        use `scm_c_take_bytevector ()'.  */
-      scm_dynwind_begin (0);
-      scm_dynwind_free (c_utf);
-
-      utf = make_bytevector (UTF_STRLEN (8, c_utf),
-                            SCM_ARRAY_ELEMENT_TYPE_VU8);
-      memcpy (SCM_BYTEVECTOR_CONTENTS (utf), c_utf,
-             UTF_STRLEN (8, c_utf));
-
-      scm_dynwind_end ();
-    }
+  c_utf = (scm_t_uint8 *) scm_to_utf8_stringn (str, &c_utf_len);
+  utf = make_bytevector (c_utf_len, SCM_ARRAY_ELEMENT_TYPE_VU8);
+  memcpy (SCM_BYTEVECTOR_CONTENTS (utf), c_utf, c_utf_len);
+  free (c_utf);
 
   return (utf);
 }
@@ -1974,6 +1952,14 @@ SCM_DEFINE (scm_string_to_utf16, "string->utf16",
 }
 #undef FUNC_NAME
 
+static void
+swap_u32 (scm_t_wchar *vals, size_t len)
+{
+  size_t n;
+  for (n = 0; n < len; n++)
+    vals[n] = bswap_32 (vals[n]);
+}
+
 SCM_DEFINE (scm_string_to_utf32, "string->utf32",
            1, 1, 0,
            (SCM str, SCM endianness),
@@ -1981,7 +1967,21 @@ SCM_DEFINE (scm_string_to_utf32, "string->utf32",
            "encoding of @var{str}.")
 #define FUNC_NAME s_scm_string_to_utf32
 {
-  STRING_TO_UTF (32);
+  SCM bv;
+  scm_t_wchar *wchars;
+  size_t wchar_len, bytes_len;
+
+  wchars = scm_to_utf32_stringn (str, &wchar_len);
+  bytes_len = wchar_len * sizeof (scm_t_wchar);
+  if (!scm_is_eq (SCM_UNBNDP (endianness) ? scm_endianness_big : endianness,
+                  scm_i_native_endianness))
+    swap_u32 (wchars, wchar_len);
+  
+  bv = make_bytevector (bytes_len, SCM_ARRAY_ELEMENT_TYPE_VU8);
+  memcpy (SCM_BYTEVECTOR_CONTENTS (bv), wchars, bytes_len);
+  free (wchars);
+
+  return bv;
 }
 #undef FUNC_NAME
 
@@ -1991,13 +1991,13 @@ SCM_DEFINE (scm_string_to_utf32, "string->utf32",
 #define UTF_TO_STRING(_utf_width)                                      \
   SCM str = SCM_BOOL_F;                                                        \
   int err;                                                             \
-  char *c_str = NULL, *c_locale;                                       \
+  char *c_str = NULL;                                                   \
   char c_utf_name[MAX_UTF_ENCODING_NAME_LEN];                          \
-  const char *c_utf;                                                   \
-  size_t c_strlen = 0, c_utf_len;                                      \
+  char *c_utf;                                                          \
+  size_t c_strlen = 0, c_utf_len = 0;                                  \
                                                                        \
   SCM_VALIDATE_BYTEVECTOR (1, utf);                                    \
-  if (endianness == SCM_UNDEFINED)                                     \
+  if (scm_is_eq (endianness, SCM_UNDEFINED))                            \
     endianness = scm_sym_big;                                          \
   else                                                                 \
     SCM_VALIDATE_SYMBOL (2, endianness);                               \
@@ -2006,20 +2006,19 @@ SCM_DEFINE (scm_string_to_utf32, "string->utf32",
   c_utf = (char *) SCM_BYTEVECTOR_CONTENTS (utf);                      \
   utf_encoding_name (c_utf_name, (_utf_width), endianness);            \
                                                                        \
-  c_locale = (char *) alloca (strlen (locale_charset ()) + 1);         \
-  strcpy (c_locale, locale_charset ());                                        \
-                                                                       \
   err = mem_iconveh (c_utf, c_utf_len,                                 \
-                    c_utf_name, c_locale,                              \
+                    c_utf_name, "UTF-8",                               \
                     iconveh_question_mark, NULL,                       \
                     &c_str, &c_strlen);                                \
   if (SCM_UNLIKELY (err))                                              \
     scm_syserror_msg (FUNC_NAME, "failed to convert to string: ~A",    \
                      scm_list_1 (utf), err);                           \
   else                                                                 \
-    /* C_STR is null-terminated.  */                                   \
-    str = scm_take_locale_stringn (c_str, c_strlen);                   \
-                                                                       \
+    {                                                                   \
+      str = scm_from_stringn (c_str, c_strlen, "UTF-8",                 \
+                              SCM_FAILED_CONVERSION_ERROR);             \
+      free (c_str);                                                     \
+    }                                                                   \
   return (str);
 
 
@@ -2031,29 +2030,15 @@ SCM_DEFINE (scm_utf8_to_string, "utf8->string",
 #define FUNC_NAME s_scm_utf8_to_string
 {
   SCM str;
-  int err;
-  char *c_str = NULL, *c_locale;
   const char *c_utf;
-  size_t c_utf_len, c_strlen = 0;
+  size_t c_utf_len = 0;
 
   SCM_VALIDATE_BYTEVECTOR (1, utf);
 
   c_utf_len = SCM_BYTEVECTOR_LENGTH (utf);
-
-  c_locale = (char *) alloca (strlen (locale_charset ()) + 1);
-  strcpy (c_locale, locale_charset ());
-
   c_utf = (char *) SCM_BYTEVECTOR_CONTENTS (utf);
-  err = mem_iconveh (c_utf, c_utf_len,
-                    "UTF-8", c_locale,
-                    iconveh_question_mark, NULL,
-                    &c_str, &c_strlen);
-  if (SCM_UNLIKELY (err))
-    scm_syserror_msg (FUNC_NAME, "failed to convert to string: ~A",
-                     scm_list_1 (utf), err);
-  else
-    /* C_STR is null-terminated.  */
-    str = scm_take_locale_stringn (c_str, c_strlen);
+  str = scm_from_stringn (c_utf, c_utf_len, "UTF-8",
+                          SCM_FAILED_CONVERSION_ERROR);
 
   return (str);
 }
@@ -2081,30 +2066,59 @@ SCM_DEFINE (scm_utf32_to_string, "utf32->string",
 }
 #undef FUNC_NAME
 
-
 \f
 /* Bytevectors as generalized vectors & arrays.  */
 
+#define COMPLEX_ACCESSOR_PROLOGUE(_type)                       \
+  size_t c_len, c_index;                                       \
+  char *c_bv;                                                  \
+                                                               \
+  SCM_VALIDATE_BYTEVECTOR (1, bv);                             \
+  c_index = scm_to_size_t (index);                             \
+                                                               \
+  c_len = SCM_BYTEVECTOR_LENGTH (bv);                          \
+  c_bv = (char *) SCM_BYTEVECTOR_CONTENTS (bv);                        \
+                                                               \
+  if (SCM_UNLIKELY (c_index + 2 * sizeof (_type) - 1 >= c_len))        \
+    scm_out_of_range (FUNC_NAME, index);
+
+/* Template for native access to complex numbers of type TYPE.  */
+#define COMPLEX_NATIVE_REF(_type)                                      \
+  SCM result;                                                          \
+                                                                       \
+  COMPLEX_ACCESSOR_PROLOGUE (_type);                                   \
+                                                                       \
+  {                                                                    \
+    _type real, imag;                                                  \
+                                                                       \
+    memcpy (&real, &c_bv[c_index], sizeof (_type));                    \
+    memcpy (&imag, &c_bv[c_index + sizeof (_type)], sizeof (_type));   \
+                                                                       \
+    result = scm_c_make_rectangular (real, imag);                      \
+  }                                                                    \
+                                                                       \
+  return result;
 
 static SCM
-bytevector_ref_c32 (SCM bv, SCM idx)
-{ /* FIXME add some checks */
-  const float *contents = (const float*)SCM_BYTEVECTOR_CONTENTS (bv);
-  size_t i = scm_to_size_t (idx);
-  return scm_c_make_rectangular (contents[i/8], contents[i/8 + 1]);
+bytevector_ref_c32 (SCM bv, SCM index)
+#define FUNC_NAME "bytevector_ref_c32"
+{
+  COMPLEX_NATIVE_REF (float);
 }
+#undef FUNC_NAME
 
 static SCM
-bytevector_ref_c64 (SCM bv, SCM idx)
-{ /* FIXME add some checks */
-  const double *contents = (const double*)SCM_BYTEVECTOR_CONTENTS (bv);
-  size_t i = scm_to_size_t (idx);
-  return scm_c_make_rectangular (contents[i/16], contents[i/16 + 1]);
+bytevector_ref_c64 (SCM bv, SCM index)
+#define FUNC_NAME "bytevector_ref_c64"
+{
+  COMPLEX_NATIVE_REF (double);
 }
+#undef FUNC_NAME
 
 typedef SCM (*scm_t_bytevector_ref_fn)(SCM, SCM);
 
-const scm_t_bytevector_ref_fn bytevector_ref_fns[SCM_ARRAY_ELEMENT_TYPE_LAST + 1] = 
+static const scm_t_bytevector_ref_fn
+bytevector_ref_fns[SCM_ARRAY_ELEMENT_TYPE_LAST + 1] =
 {
   NULL, /* SCM */
   NULL, /* CHAR */
@@ -2136,25 +2150,36 @@ bv_handle_ref (scm_t_array_handle *h, size_t index)
   return ref_fn (h->array, byte_index);
 }
 
-static SCM
-bytevector_set_c32 (SCM bv, SCM idx, SCM val)
-{ /* checks are unnecessary here */
-  float *contents = (float*)SCM_BYTEVECTOR_CONTENTS (bv);
-  size_t i = scm_to_size_t (idx);
-  contents[i/8] = scm_c_real_part (val);
-  contents[i/8 + 1] = scm_c_imag_part (val);
+/* Template for native modification of complex numbers of type TYPE.  */
+#define COMPLEX_NATIVE_SET(_type)                                      \
+  COMPLEX_ACCESSOR_PROLOGUE (_type);                                   \
+                                                                       \
+  {                                                                    \
+    _type real, imag;                                                  \
+    real = scm_c_real_part (value);                                    \
+    imag = scm_c_imag_part (value);                                    \
+                                                                       \
+    memcpy (&c_bv[c_index], &real, sizeof (_type));                    \
+    memcpy (&c_bv[c_index + sizeof (_type)], &imag, sizeof (_type));   \
+  }                                                                    \
+                                                                       \
   return SCM_UNSPECIFIED;
+
+static SCM
+bytevector_set_c32 (SCM bv, SCM index, SCM value)
+#define FUNC_NAME "bytevector_set_c32"
+{
+  COMPLEX_NATIVE_SET (float);
 }
+#undef FUNC_NAME
 
 static SCM
-bytevector_set_c64 (SCM bv, SCM idx, SCM val)
-{ /* checks are unnecessary here */
-  double *contents = (double*)SCM_BYTEVECTOR_CONTENTS (bv);
-  size_t i = scm_to_size_t (idx);
-  contents[i/16] = scm_c_real_part (val);
-  contents[i/16 + 1] = scm_c_imag_part (val);
-  return SCM_UNSPECIFIED;
+bytevector_set_c64 (SCM bv, SCM index, SCM value)
+#define FUNC_NAME "bytevector_set_c64"
+{
+  COMPLEX_NATIVE_SET (double);
 }
+#undef FUNC_NAME
 
 typedef SCM (*scm_t_bytevector_set_fn)(SCM, SCM, SCM);
 
@@ -2210,18 +2235,18 @@ void
 scm_bootstrap_bytevectors (void)
 {
   /* This must be instantiated here because the generalized-vector API may
-     want to access bytevectors even though `(rnrs bytevector)' hasn't been
+     want to access bytevectors even though `(rnrs bytevectors)' hasn't been
      loaded.  */
-  scm_null_bytevector =
-    scm_gc_protect_object (make_bytevector (0, SCM_ARRAY_ELEMENT_TYPE_VU8));
+  scm_null_bytevector = make_bytevector (0, SCM_ARRAY_ELEMENT_TYPE_VU8);
 
 #ifdef WORDS_BIGENDIAN
-  scm_i_native_endianness = scm_permanent_object (scm_from_locale_symbol ("big"));
+  scm_i_native_endianness = scm_from_latin1_symbol ("big");
 #else
-  scm_i_native_endianness = scm_permanent_object (scm_from_locale_symbol ("little"));
+  scm_i_native_endianness = scm_from_latin1_symbol ("little");
 #endif
 
-  scm_c_register_extension ("libguile", "scm_init_bytevectors",
+  scm_c_register_extension ("libguile-" SCM_EFFECTIVE_VERSION,
+                            "scm_init_bytevectors",
                            (scm_t_extension_init_func) scm_init_bytevectors,
                            NULL);