Report the faulty keyword in errors raised by `scm_c_bind_keyword_arguments'.

[bpt/guile.git] / libguile / strings.c
diff --git a/libguile/strings.c b/libguile/strings.c

index 68fa25c..1b241e5 100644 (file)
--- a/libguile/strings.c
+++ b/libguile/strings.c
@@ -1,4 +1,4 @@
-/* Copyright (C) 1995,1996,1998,2000,2001, 2004, 2006, 2008, 2009, 2010 Free Software Foundation, Inc.
+/* Copyright (C) 1995,1996,1998,2000,2001, 2004, 2006, 2008, 2009, 2010, 2011, 2012 Free Software Foundation, Inc.
   * 
   * This library is free software; you can redistribute it and/or
   * modify it under the terms of the GNU Lesser General Public License
@@ -22,6 +22,7 @@
  # include <config.h>
  #endif
  
+#include <alloca.h>
  #include <string.h>
  #include <stdio.h>
  #include <ctype.h>
@@ -261,30 +262,48 @@ SCM scm_nullstr;
  
  /* Create a scheme string with space for LEN 8-bit Latin-1-encoded
     characters.  CHARSP, if not NULL, will be set to location of the
-   char array.  */
+   char array.  If READ_ONLY_P, the returned string is read-only;
+   otherwise it is writable.  */
  SCM
-scm_i_make_string (size_t len, char **charsp)
+scm_i_make_string (size_t len, char **charsp, int read_only_p)
  {
-  SCM buf = make_stringbuf (len);
+  static SCM null_stringbuf = SCM_BOOL_F;
+  SCM buf;
    SCM res;
+
+  if (len == 0)
+    {
+      if (SCM_UNLIKELY (scm_is_false (null_stringbuf)))
+        {
+          null_stringbuf = make_stringbuf (0);
+          SET_STRINGBUF_SHARED (null_stringbuf);
+        }
+      buf = null_stringbuf;
+    }
+  else
+    buf = make_stringbuf (len);
+
    if (charsp)
      *charsp = (char *) STRINGBUF_CHARS (buf);
-  res = scm_double_cell (STRING_TAG, SCM_UNPACK(buf),
-                        (scm_t_bits)0, (scm_t_bits) len);
+  res = scm_double_cell (read_only_p ? RO_STRING_TAG : STRING_TAG,
+                        SCM_UNPACK (buf),
+                        (scm_t_bits) 0, (scm_t_bits) len);
    return res;
  }
  
  /* Create a scheme string with space for LEN 32-bit UCS-4-encoded
     characters.  CHARSP, if not NULL, will be set to location of the
-   character array.  */
+   character array.  If READ_ONLY_P, the returned string is read-only;
+   otherwise it is writable.  */
  SCM
-scm_i_make_wide_string (size_t len, scm_t_wchar **charsp)
+scm_i_make_wide_string (size_t len, scm_t_wchar **charsp, int read_only_p)
  {
    SCM buf = make_wide_stringbuf (len);
    SCM res;
    if (charsp)
      *charsp = STRINGBUF_WIDE_CHARS (buf);
-  res = scm_double_cell (STRING_TAG, SCM_UNPACK (buf),
+  res = scm_double_cell (read_only_p ? RO_STRING_TAG : STRING_TAG,
+                        SCM_UNPACK (buf),
                           (scm_t_bits) 0, (scm_t_bits) len);
    return res;
  }
@@ -315,56 +334,74 @@ get_str_buf_start (SCM *str, SCM *buf, size_t *start)
  SCM
  scm_i_substring (SCM str, size_t start, size_t end)
  {
-  SCM buf;
-  size_t str_start;
-  get_str_buf_start (&str, &buf, &str_start);
-  scm_i_pthread_mutex_lock (&stringbuf_write_mutex);
-  SET_STRINGBUF_SHARED (buf);
-  scm_i_pthread_mutex_unlock (&stringbuf_write_mutex);
-  return scm_double_cell (STRING_TAG, SCM_UNPACK(buf),
-                         (scm_t_bits)str_start + start,
-                         (scm_t_bits) end - start);
+  if (start == end)
+    return scm_i_make_string (0, NULL, 0);
+  else
+    {
+      SCM buf;
+      size_t str_start;
+      get_str_buf_start (&str, &buf, &str_start);
+      scm_i_pthread_mutex_lock (&stringbuf_write_mutex);
+      SET_STRINGBUF_SHARED (buf);
+      scm_i_pthread_mutex_unlock (&stringbuf_write_mutex);
+      return scm_double_cell (STRING_TAG, SCM_UNPACK(buf),
+                              (scm_t_bits)str_start + start,
+                              (scm_t_bits) end - start);
+    }
  }
  
  SCM
  scm_i_substring_read_only (SCM str, size_t start, size_t end)
  {
-  SCM buf;
-  size_t str_start;
-  get_str_buf_start (&str, &buf, &str_start);
-  scm_i_pthread_mutex_lock (&stringbuf_write_mutex);
-  SET_STRINGBUF_SHARED (buf);
-  scm_i_pthread_mutex_unlock (&stringbuf_write_mutex);
-  return scm_double_cell (RO_STRING_TAG, SCM_UNPACK(buf),
-                         (scm_t_bits)str_start + start,
-                         (scm_t_bits) end - start);
+  if (start == end)
+    return scm_i_make_string (0, NULL, 1);
+  else
+    {
+      SCM buf;
+      size_t str_start;
+      get_str_buf_start (&str, &buf, &str_start);
+      scm_i_pthread_mutex_lock (&stringbuf_write_mutex);
+      SET_STRINGBUF_SHARED (buf);
+      scm_i_pthread_mutex_unlock (&stringbuf_write_mutex);
+      return scm_double_cell (RO_STRING_TAG, SCM_UNPACK(buf),
+                              (scm_t_bits)str_start + start,
+                              (scm_t_bits) end - start);
+    }
  }
  
  SCM
  scm_i_substring_copy (SCM str, size_t start, size_t end)
  {
-  size_t len = end - start;
-  SCM buf, my_buf;
-  size_t str_start;
-  get_str_buf_start (&str, &buf, &str_start);
-  if (scm_i_is_narrow_string (str))
-    {
-      my_buf = make_stringbuf (len);
-      memcpy (STRINGBUF_CHARS (my_buf),
-              STRINGBUF_CHARS (buf) + str_start + start, len);
-    }
+  if (start == end)
+    return scm_i_make_string (0, NULL, 0);
    else
      {
-      my_buf = make_wide_stringbuf (len);
-      u32_cpy ((scm_t_uint32 *) STRINGBUF_WIDE_CHARS (my_buf),
-               (scm_t_uint32 *) (STRINGBUF_WIDE_CHARS (buf) + str_start 
-                                 + start), len);
-      /* Even though this string is wide, the substring may be narrow.
-         Consider adding code to narrow the string.  */
+      size_t len = end - start;
+      SCM buf, my_buf, substr;
+      size_t str_start;
+      int wide = 0;
+      get_str_buf_start (&str, &buf, &str_start);
+      if (scm_i_is_narrow_string (str))
+        {
+          my_buf = make_stringbuf (len);
+          memcpy (STRINGBUF_CHARS (my_buf),
+                  STRINGBUF_CHARS (buf) + str_start + start, len);
+        }
+      else
+        {
+          my_buf = make_wide_stringbuf (len);
+          u32_cpy ((scm_t_uint32 *) STRINGBUF_WIDE_CHARS (my_buf),
+                   (scm_t_uint32 *) (STRINGBUF_WIDE_CHARS (buf) + str_start 
+                                     + start), len);
+          wide = 1;
+        }
+      scm_remember_upto_here_1 (buf);
+      substr = scm_double_cell (STRING_TAG, SCM_UNPACK (my_buf),
+                                (scm_t_bits) 0, (scm_t_bits) len);
+      if (wide)
+        scm_i_try_narrow_string (substr);
+      return substr;
      }
-  scm_remember_upto_here_1 (buf);
-  return scm_double_cell (STRING_TAG, SCM_UNPACK (my_buf),
-                          (scm_t_bits) 0, (scm_t_bits) len);
  }
  
  SCM
@@ -372,7 +409,9 @@ scm_i_substring_shared (SCM str, size_t start, size_t end)
  {
    if (start == 0 && end == STRING_LENGTH (str))
      return str;
-  else 
+  else if (start == end)
+    return scm_i_make_string (0, NULL, 0);
+  else
      {
        size_t len = end - start;
        if (IS_SH_STRING (str))
@@ -431,6 +470,9 @@ scm_i_string_length (SCM str)
  int
  scm_i_is_narrow_string (SCM str)
  {
+  if (IS_SH_STRING (str))
+    str = SH_STRING_STRING (str);
+
    return !STRINGBUF_WIDE (STRING_STRINGBUF (str));
  }
  
@@ -441,6 +483,9 @@ scm_i_is_narrow_string (SCM str)
  int
  scm_i_try_narrow_string (SCM str)
  {
+  if (IS_SH_STRING (str))
+    str = SH_STRING_STRING (str);
+
    SET_STRING_STRINGBUF (str, narrow_stringbuf (STRING_STRINGBUF (str)));
  
    return scm_i_is_narrow_string (str);
@@ -659,6 +704,12 @@ scm_i_string_strcmp (SCM sstr, size_t start_x, const char *cstr)
  void
  scm_i_string_set_x (SCM str, size_t p, scm_t_wchar chr)
  {
+  if (IS_SH_STRING (str))
+    {
+      p += STRING_START (str);
+      str = SH_STRING_STRING (str);
+    }
+
    if (chr > 0xFF && scm_i_is_narrow_string (str))
      SET_STRING_STRINGBUF (str, wide_stringbuf (STRING_STRINGBUF (str)));
  
@@ -697,7 +748,7 @@ scm_i_make_symbol (SCM name, scm_t_bits flags,
        name = SH_STRING_STRING (name);
        start += STRING_START (name);
      }
-  buf = SYMBOL_STRINGBUF (name);
+  buf = STRING_STRINGBUF (name);
  
    if (start == 0 && length == STRINGBUF_LENGTH (buf))
      {
@@ -736,8 +787,8 @@ scm_i_c_make_symbol (const char *name, size_t len,
    SCM buf = make_stringbuf (len);
    memcpy (STRINGBUF_CHARS (buf), name, len);
  
-  return scm_immutable_double_cell (scm_tc7_symbol | flags, SCM_UNPACK (buf),
-                                   (scm_t_bits) hash, SCM_UNPACK (props));
+  return scm_double_cell (scm_tc7_symbol | flags, SCM_UNPACK (buf),
+                          (scm_t_bits) hash, SCM_UNPACK (props));
  }
  
  /* Returns the number of characters in SYM.  This may be different
@@ -856,31 +907,31 @@ SCM_DEFINE (scm_sys_string_dump, "%string-dump", 1, 0, 0, (SCM str),
    SCM_VALIDATE_STRING (1, str);
  
    /* String info */
-  e1 = scm_cons (scm_from_locale_symbol ("string"),
+  e1 = scm_cons (scm_from_latin1_symbol ("string"),
                   str);
-  e2 = scm_cons (scm_from_locale_symbol ("start"),
+  e2 = scm_cons (scm_from_latin1_symbol ("start"),
                   scm_from_size_t (STRING_START (str)));
-  e3 = scm_cons (scm_from_locale_symbol ("length"),
+  e3 = scm_cons (scm_from_latin1_symbol ("length"),
                   scm_from_size_t (STRING_LENGTH (str)));
  
    if (IS_SH_STRING (str))
      {
-      e4 = scm_cons (scm_from_locale_symbol ("shared"),
+      e4 = scm_cons (scm_from_latin1_symbol ("shared"),
                       SH_STRING_STRING (str));
        buf = STRING_STRINGBUF (SH_STRING_STRING (str));
      }
    else
      {
-      e4 = scm_cons (scm_from_locale_symbol ("shared"),
+      e4 = scm_cons (scm_from_latin1_symbol ("shared"),
                       SCM_BOOL_F);
        buf = STRING_STRINGBUF (str);
      }
  
    if (IS_RO_STRING (str))
-    e5 = scm_cons (scm_from_locale_symbol ("read-only"),
+    e5 = scm_cons (scm_from_latin1_symbol ("read-only"),
                     SCM_BOOL_T);
    else
-    e5 = scm_cons (scm_from_locale_symbol ("read-only"),
+    e5 = scm_cons (scm_from_latin1_symbol ("read-only"),
                     SCM_BOOL_F);
  
    /* Stringbuf info */
@@ -888,34 +939,34 @@ SCM_DEFINE (scm_sys_string_dump, "%string-dump", 1, 0, 0, (SCM str),
      {
        size_t len = STRINGBUF_LENGTH (buf);
        char *cbuf;
-      SCM sbc = scm_i_make_string (len, &cbuf);
+      SCM sbc = scm_i_make_string (len, &cbuf, 0);
        memcpy (cbuf, STRINGBUF_CHARS (buf), len);
-      e6 = scm_cons (scm_from_locale_symbol ("stringbuf-chars"),
+      e6 = scm_cons (scm_from_latin1_symbol ("stringbuf-chars"),
                       sbc);
      }
    else
      {
        size_t len = STRINGBUF_LENGTH (buf);
        scm_t_wchar *cbuf;
-      SCM sbc = scm_i_make_wide_string (len, &cbuf);
+      SCM sbc = scm_i_make_wide_string (len, &cbuf, 0);
        u32_cpy ((scm_t_uint32 *) cbuf, 
                 (scm_t_uint32 *) STRINGBUF_WIDE_CHARS (buf), len);
-      e6 = scm_cons (scm_from_locale_symbol ("stringbuf-chars"),
+      e6 = scm_cons (scm_from_latin1_symbol ("stringbuf-chars"),
                       sbc);
      }
-  e7 = scm_cons (scm_from_locale_symbol ("stringbuf-length"), 
+  e7 = scm_cons (scm_from_latin1_symbol ("stringbuf-length"), 
                   scm_from_size_t (STRINGBUF_LENGTH (buf)));
    if (STRINGBUF_SHARED (buf))
-    e8 = scm_cons (scm_from_locale_symbol ("stringbuf-shared"), 
+    e8 = scm_cons (scm_from_latin1_symbol ("stringbuf-shared"), 
                     SCM_BOOL_T);
    else
-    e8 = scm_cons (scm_from_locale_symbol ("stringbuf-shared"), 
+    e8 = scm_cons (scm_from_latin1_symbol ("stringbuf-shared"), 
                     SCM_BOOL_F);
    if (STRINGBUF_WIDE (buf))
-    e9 = scm_cons (scm_from_locale_symbol ("stringbuf-wide"),
+    e9 = scm_cons (scm_from_latin1_symbol ("stringbuf-wide"),
                    SCM_BOOL_T);
    else
-    e9 = scm_cons (scm_from_locale_symbol ("stringbuf-wide"),
+    e9 = scm_cons (scm_from_latin1_symbol ("stringbuf-wide"),
                    SCM_BOOL_F);
  
    return scm_list_n (e1, e2, e3, e4, e5, e6, e7, e8, e9, SCM_UNDEFINED);
@@ -948,11 +999,11 @@ SCM_DEFINE (scm_sys_symbol_dump, "%symbol-dump", 1, 0, 0, (SCM sym),
    SCM e1, e2, e3, e4, e5, e6, e7;
    SCM buf;
    SCM_VALIDATE_SYMBOL (1, sym);
-  e1 = scm_cons (scm_from_locale_symbol ("symbol"),
+  e1 = scm_cons (scm_from_latin1_symbol ("symbol"),
                   sym);
-  e2 = scm_cons (scm_from_locale_symbol ("hash"),
+  e2 = scm_cons (scm_from_latin1_symbol ("hash"),
                   scm_from_ulong (scm_i_symbol_hash (sym)));
-  e3 = scm_cons (scm_from_locale_symbol ("interned"),
+  e3 = scm_cons (scm_from_latin1_symbol ("interned"),
                   scm_symbol_interned_p (sym));
    buf = SYMBOL_STRINGBUF (sym);
  
@@ -961,34 +1012,34 @@ SCM_DEFINE (scm_sys_symbol_dump, "%symbol-dump", 1, 0, 0, (SCM sym),
      {
        size_t len = STRINGBUF_LENGTH (buf);
        char *cbuf;
-      SCM sbc = scm_i_make_string (len, &cbuf);
+      SCM sbc = scm_i_make_string (len, &cbuf, 0);
        memcpy (cbuf, STRINGBUF_CHARS (buf), len);
-      e4 = scm_cons (scm_from_locale_symbol ("stringbuf-chars"),
+      e4 = scm_cons (scm_from_latin1_symbol ("stringbuf-chars"),
                       sbc);
      }
    else
      {
        size_t len = STRINGBUF_LENGTH (buf);
        scm_t_wchar *cbuf;
-      SCM sbc = scm_i_make_wide_string (len, &cbuf);
+      SCM sbc = scm_i_make_wide_string (len, &cbuf, 0);
        u32_cpy ((scm_t_uint32 *) cbuf, 
                 (scm_t_uint32 *) STRINGBUF_WIDE_CHARS (buf), len);
-      e4 = scm_cons (scm_from_locale_symbol ("stringbuf-chars"),
+      e4 = scm_cons (scm_from_latin1_symbol ("stringbuf-chars"),
                       sbc);
      }
-  e5 = scm_cons (scm_from_locale_symbol ("stringbuf-length"), 
+  e5 = scm_cons (scm_from_latin1_symbol ("stringbuf-length"), 
                   scm_from_size_t (STRINGBUF_LENGTH (buf)));
    if (STRINGBUF_SHARED (buf))
-    e6 = scm_cons (scm_from_locale_symbol ("stringbuf-shared"), 
+    e6 = scm_cons (scm_from_latin1_symbol ("stringbuf-shared"), 
                     SCM_BOOL_T);
    else
-    e6 = scm_cons (scm_from_locale_symbol ("stringbuf-shared"), 
+    e6 = scm_cons (scm_from_latin1_symbol ("stringbuf-shared"), 
                     SCM_BOOL_F);
    if (STRINGBUF_WIDE (buf))
-    e7 = scm_cons (scm_from_locale_symbol ("stringbuf-wide"),
+    e7 = scm_cons (scm_from_latin1_symbol ("stringbuf-wide"),
                      SCM_BOOL_T);
    else
-    e7 = scm_cons (scm_from_locale_symbol ("stringbuf-wide"),
+    e7 = scm_cons (scm_from_latin1_symbol ("stringbuf-wide"),
                      SCM_BOOL_F);
    return scm_list_n (e1, e2, e3, e4, e5, e6, e7, SCM_UNDEFINED);
  
@@ -1065,7 +1116,7 @@ SCM_DEFINE (scm_string, "string", 0, 0, 1,
      {
        char *buf;
  
-      result = scm_i_make_string (len, NULL);
+      result = scm_i_make_string (len, NULL, 0);
        result = scm_i_string_start_writing (result);
        buf = scm_i_string_writable_chars (result);
        while (len > 0 && scm_is_pair (rest))
@@ -1082,7 +1133,7 @@ SCM_DEFINE (scm_string, "string", 0, 0, 1,
      {
        scm_t_wchar *buf;
  
-      result = scm_i_make_wide_string (len, NULL);
+      result = scm_i_make_wide_string (len, NULL, 0);
        result = scm_i_string_start_writing (result);
        buf = scm_i_string_writable_wide_chars (result);
        while (len > 0 && scm_is_pair (rest))
@@ -1111,7 +1162,7 @@ SCM_DEFINE (scm_make_string, "make-string", 1, 1, 0,
             "Return a newly allocated string of\n"
              "length @var{k}.  If @var{chr} is given, then all elements of\n"
             "the string are initialized to @var{chr}, otherwise the contents\n"
-           "of the @var{string} are unspecified.")
+           "of the string are all set to @code{#\nul}.")
  #define FUNC_NAME s_scm_make_string
  {
    return scm_c_make_string (scm_to_size_t (k), chr);
@@ -1123,9 +1174,13 @@ scm_c_make_string (size_t len, SCM chr)
  #define FUNC_NAME NULL
  {
    size_t p;
-  SCM res = scm_i_make_string (len, NULL);
+  char *contents = NULL;
+  SCM res = scm_i_make_string (len, &contents, 0);
  
-  if (!SCM_UNBNDP (chr))
+  /* If no char is given, initialize string contents to NULL.  */
+  if (SCM_UNBNDP (chr))
+    memset (contents, 0, len);
+  else
      {
        SCM_VALIDATE_CHAR (0, chr);
        res = scm_i_string_start_writing (res);
@@ -1346,7 +1401,8 @@ SCM_DEFINE (scm_string_append, "string-append", 0, 0, 1,
  #define FUNC_NAME s_scm_string_append
  {
    SCM res;
-  size_t len = 0;
+  size_t total = 0;
+  size_t len;
    int wide = 0;
    SCM l, s;
    size_t i;
@@ -1361,15 +1417,18 @@ SCM_DEFINE (scm_string_append, "string-append", 0, 0, 1,
      {
        s = SCM_CAR (l);
        SCM_VALIDATE_STRING (SCM_ARGn, s);
-      len += scm_i_string_length (s);
+      len = scm_i_string_length (s);
+      if (((size_t) -1) - total < len)
+        scm_num_overflow (s_scm_string_append);
+      total += len;
        if (!scm_i_is_narrow_string (s))
          wide = 1;
      }
    data.narrow = NULL;
    if (!wide)
-    res = scm_i_make_string (len, &data.narrow);
+    res = scm_i_make_string (total, &data.narrow, 0);
    else
-    res = scm_i_make_wide_string (len, &data.wide);
+    res = scm_i_make_wide_string (total, &data.wide, 0);
  
    for (l = args; !scm_is_null (l); l = SCM_CDR (l))
      {
@@ -1377,6 +1436,8 @@ SCM_DEFINE (scm_string_append, "string-append", 0, 0, 1,
        s = SCM_CAR (l);
        SCM_VALIDATE_STRING (SCM_ARGn, s);
        len = scm_i_string_length (s);
+      if (len > total)
+        SCM_MISC_ERROR ("list changed during string-append", SCM_EOL);
        if (!wide)
          {
            memcpy (data.narrow, scm_i_string_chars (s), len);
@@ -1386,48 +1447,61 @@ SCM_DEFINE (scm_string_append, "string-append", 0, 0, 1,
          {
            if (scm_i_is_narrow_string (s))
              {
-              for (i = 0; i < scm_i_string_length (s); i++)
-                data.wide[i] = (unsigned char) scm_i_string_chars (s)[i];
+              const char *src = scm_i_string_chars (s);
+              for (i = 0; i < len; i++)
+                data.wide[i] = (unsigned char) src[i];
              }
            else
              u32_cpy ((scm_t_uint32 *) data.wide,
                       (scm_t_uint32 *) scm_i_string_wide_chars (s), len);
            data.wide += len;
          }
+      total -= len;
        scm_remember_upto_here_1 (s);
      }
+  if (total != 0)
+    SCM_MISC_ERROR ("list changed during string-append", SCM_EOL);
    return res;
  }
  #undef FUNC_NAME
  
-int
-scm_is_string (SCM obj)
-{
-  return IS_STRING (obj);
-}
  
  \f
-/* Conversion to/from other encodings.  */
+/* Charset conversion error handling.  */
  
  SCM_SYMBOL (scm_encoding_error_key, "encoding-error");
+SCM_SYMBOL (scm_decoding_error_key, "decoding-error");
+
+/* Raise an exception informing that character CHR could not be written
+   to PORT in its current encoding.  */
  void
  scm_encoding_error (const char *subr, int err, const char *message,
-                   const char *from, const char *to, SCM string_or_bv)
+                   SCM port, SCM chr)
  {
-  /* Raise an exception that conveys all the information needed to debug the
-     problem.  Only perform locale conversions that are safe; in particular,
-     don't try to display STRING_OR_BV when it's a string since converting it to
-     the output locale may fail.  */
    scm_throw (scm_encoding_error_key,
-            scm_list_n (scm_from_locale_string (subr),
-                        scm_from_locale_string (message),
+            scm_list_n (scm_from_latin1_string (subr),
+                        scm_from_latin1_string (message),
+                        scm_from_int (err),
+                        port, chr,
+                        SCM_UNDEFINED));
+}
+
+/* Raise an exception informing of an encoding error on PORT.  This
+   means that a character could not be written in PORT's encoding.  */
+void
+scm_decoding_error (const char *subr, int err, const char *message, SCM port)
+{
+  scm_throw (scm_decoding_error_key,
+            scm_list_n (scm_from_latin1_string (subr),
+                        scm_from_latin1_string (message),
                          scm_from_int (err),
-                        scm_from_locale_string (from),
-                        scm_from_locale_string (to),
-                        string_or_bv,
+                        port,
                          SCM_UNDEFINED));
  }
  
+\f
+/* String conversion to/from C.  */
+
  SCM
  scm_from_stringn (const char *str, size_t len, const char *encoding,
                    scm_t_string_failed_conversion_handler handler)
@@ -1437,14 +1511,17 @@ scm_from_stringn (const char *str, size_t len, const char *encoding,
    int wide = 0;
    SCM res;
  
-  if (len == 0)
-    return scm_nullstr;
+  /* The order of these checks is important. */
+  if (!str && len != 0)
+    scm_misc_error ("scm_from_stringn", "NULL string pointer", SCM_EOL);
+  if (len == (size_t) -1)
+    len = strlen (str);
  
-  if (encoding == NULL)
+  if (encoding == NULL || len == 0)
      {
-      /* If encoding is null, use Latin-1.  */
+      /* If encoding is null (or the string is empty), use Latin-1.  */
        char *buf;
-      res = scm_i_make_string (len, &buf);
+      res = scm_i_make_string (len, &buf, 0);
        memcpy (buf, str, len);
        return res;
      }
@@ -1466,11 +1543,10 @@ scm_from_stringn (const char *str, size_t len, const char *encoding,
  
        buf = scm_gc_malloc_pointerless (len, "bytevector");
        memcpy (buf, str, len);
-      bv = scm_c_take_bytevector (buf, len);
+      bv = scm_c_take_gc_bytevector (buf, len);
  
-      scm_encoding_error (__func__, errno,
-                         "input locale conversion error",
-                         encoding, "UTF-32", bv);
+      scm_decoding_error (__func__, errno,
+                         "input locale conversion error", bv);
      }
  
    i = 0;
@@ -1484,7 +1560,7 @@ scm_from_stringn (const char *str, size_t len, const char *encoding,
    if (!wide)
      {
        char *dst;
-      res = scm_i_make_string (u32len, &dst);
+      res = scm_i_make_string (u32len, &dst, 0);
        for (i = 0; i < u32len; i ++)
          dst[i] = (unsigned char) u32[i];
        dst[u32len] = '\0';
@@ -1492,7 +1568,7 @@ scm_from_stringn (const char *str, size_t len, const char *encoding,
    else
      {
        scm_t_wchar *wdst;
-      res = scm_i_make_wide_string (u32len, &wdst);
+      res = scm_i_make_wide_string (u32len, &wdst, 0);
        u32_cpy ((scm_t_uint32 *) wdst, (scm_t_uint32 *) u32, u32len);
        wdst[u32len] = 0;
      }
@@ -1502,55 +1578,72 @@ scm_from_stringn (const char *str, size_t len, const char *encoding,
  }
  
  SCM
-scm_from_latin1_stringn (const char *str, size_t len)
+scm_from_locale_string (const char *str)
  {
-  return scm_from_stringn (str, len, NULL, SCM_FAILED_CONVERSION_ERROR);
+  return scm_from_locale_stringn (str, -1);
  }
  
  SCM
  scm_from_locale_stringn (const char *str, size_t len)
  {
-  const char *enc;
-  scm_t_string_failed_conversion_handler hndl;
-  SCM inport;
-  scm_t_port *pt;
+  return scm_from_stringn (str, len, locale_charset (),
+                           scm_i_default_port_conversion_handler ());
+}
+
+SCM
+scm_from_latin1_string (const char *str)
+{
+  return scm_from_latin1_stringn (str, -1);
+}
+
+SCM
+scm_from_latin1_stringn (const char *str, size_t len)
+{
+  char *buf;
+  SCM result;
  
    if (len == (size_t) -1)
      len = strlen (str);
-  if (len == 0)
-    return scm_nullstr;
  
-  inport = scm_current_input_port ();
-  if (!SCM_UNBNDP (inport) && SCM_OPINPORTP (inport))
-    {
-      pt = SCM_PTAB_ENTRY (inport);
-      enc = pt->encoding;
-      hndl = pt->ilseq_handler;
-    }
-  else
-    {
-      enc = NULL;
-      hndl = SCM_FAILED_CONVERSION_ERROR;
-    }
+  /* Make a narrow string and copy STR as is.  */
+  result = scm_i_make_string (len, &buf, 0);
+  memcpy (buf, str, len);
  
-  return scm_from_stringn (str, len, enc, hndl);
+  return result;
  }
  
  SCM
-scm_from_locale_string (const char *str)
+scm_from_utf8_string (const char *str)
  {
-  if (str == NULL)
-    return scm_nullstr;
+  return scm_from_utf8_stringn (str, -1);
+}
  
-  return scm_from_locale_stringn (str, -1);
+SCM
+scm_from_utf8_stringn (const char *str, size_t len)
+{
+  return scm_from_stringn (str, len, "UTF-8", SCM_FAILED_CONVERSION_ERROR);
  }
  
  SCM
-scm_i_from_utf8_string (const scm_t_uint8 *str)
+scm_from_utf32_string (const scm_t_wchar *str)
  {
-  return scm_from_stringn ((const char *) str,
-                           strlen ((char *) str), "UTF-8",
-                           SCM_FAILED_CONVERSION_ERROR);
+  return scm_from_utf32_stringn (str, -1);
+}
+
+SCM
+scm_from_utf32_stringn (const scm_t_wchar *str, size_t len)
+{
+  SCM result;
+  scm_t_wchar *buf;
+
+  if (len == (size_t) -1)
+    len = u32_strlen ((uint32_t *) str);
+
+  result = scm_i_make_wide_string (len, &buf, 0);
+  memcpy (buf, str, len * sizeof (scm_t_wchar));
+  scm_i_try_narrow_string (result);
+
+  return result;
  }
  
  /* Create a new scheme string from the C string STR.  The memory of
@@ -1575,16 +1668,21 @@ scm_take_locale_string (char *str)
    return scm_take_locale_stringn (str, -1);
  }
  
-/* Change libunistring escapes (\uXXXX and \UXXXXXXXX) to \xXX \uXXXX
-   and \UXXXXXX.  */
+/* Change libunistring escapes (`\uXXXX' and `\UXXXXXXXX') in BUF, a
+   *LENP-byte locale-encoded string, to `\xXX', `\uXXXX', or `\UXXXXXX'.
+   Set *LENP to the size of the resulting string.
+
+   FIXME: This is a hack we should get rid of.  See
+   <http://lists.gnu.org/archive/html/bug-libunistring/2010-09/msg00004.html>
+   for details.  */
  static void
-unistring_escapes_to_guile_escapes (char **bufp, size_t *lenp)
+unistring_escapes_to_guile_escapes (char *buf, size_t *lenp)
  {
    char *before, *after;
    size_t i, j;
  
-  before = *bufp;
-  after = *bufp;
+  before = buf;
+  after = buf;
    i = 0;
    j = 0;
    while (i < *lenp)
@@ -1627,12 +1725,15 @@ unistring_escapes_to_guile_escapes (char **bufp, size_t *lenp)
          }
      }
    *lenp = j;
-  after = scm_realloc (after, j);
  }
  
-/* Change libunistring escapes (\uXXXX and \UXXXXXXXX) to \xXXXX; */
+/* Change libunistring escapes (`\uXXXX' and `\UXXXXXXXX') in BUF, a
+   *LENP-byte locale-encoded string, to `\xXXXX;'.  Set *LEN to the size
+   of the resulting string.  BUF must be large enough to handle the
+   worst case when `\uXXXX' escapes (6 characters) are replaced by
+   `\xXXXX;' (7 characters).  */
  static void
-unistring_escapes_to_r6rs_escapes (char **bufp, size_t *lenp)
+unistring_escapes_to_r6rs_escapes (char *buf, size_t *lenp)
  {
    char *before, *after;
    size_t i, j;
@@ -1641,7 +1742,7 @@ unistring_escapes_to_r6rs_escapes (char **bufp, size_t *lenp)
    size_t max_out_len = (*lenp * 7) / 6 + 1;
    size_t nzeros, ndigits;
  
-  before = *bufp;
+  before = buf;
    after = alloca (max_out_len);
    i = 0;
    j = 0;
@@ -1699,37 +1800,237 @@ unistring_escapes_to_r6rs_escapes (char **bufp, size_t *lenp)
          }
      }
    *lenp = j;
-  before = scm_realloc (before, j);
    memcpy (before, after, j);
  }
  
  char *
-scm_to_latin1_stringn (SCM str, size_t *lenp)
+scm_to_locale_string (SCM str)
  {
-  return scm_to_stringn (str, lenp, NULL, SCM_FAILED_CONVERSION_ERROR);
+  return scm_to_locale_stringn (str, NULL);
  }
  
  char *
  scm_to_locale_stringn (SCM str, size_t *lenp)
  {
-  SCM outport;
-  scm_t_port *pt;
-  const char *enc;
+  return scm_to_stringn (str, lenp,
+                         locale_charset (),
+                         scm_i_default_port_conversion_handler ());
+}
+
+char *
+scm_to_latin1_string (SCM str)
+{
+  return scm_to_latin1_stringn (str, NULL);
+}
+
+char *
+scm_to_latin1_stringn (SCM str, size_t *lenp)
+#define FUNC_NAME "scm_to_latin1_stringn"
+{
+  char *result;
  
-  outport = scm_current_output_port ();
-  if (!SCM_UNBNDP (outport) && SCM_OPOUTPORTP (outport))
+  SCM_VALIDATE_STRING (1, str);
+
+  if (scm_i_is_narrow_string (str))
      {
-      pt = SCM_PTAB_ENTRY (outport);
-      enc = pt->encoding;
+      size_t len = scm_i_string_length (str);
+
+      if (lenp)
+        *lenp = len;
+
+      result = scm_strndup (scm_i_string_data (str), len);
      }
    else
-    enc = NULL;
+    result = scm_to_stringn (str, lenp, NULL,
+                             SCM_FAILED_CONVERSION_ERROR);
+
+  return result;
+}
+#undef FUNC_NAME
+
+char *
+scm_to_utf8_string (SCM str)
+{
+  return scm_to_utf8_stringn (str, NULL);
+}
+
+static size_t
+latin1_u8_strlen (const scm_t_uint8 *str, size_t len)
+{
+  size_t ret, i;
+  for (i = 0, ret = 0; i < len; i++)
+    ret += (str[i] < 128) ? 1 : 2;
+  return ret;
+}
+
+static scm_t_uint8*
+latin1_to_u8 (const scm_t_uint8 *str, size_t latin_len,
+              scm_t_uint8 *u8_result, size_t *u8_lenp)
+{
+  size_t i, n;
+  size_t u8_len = latin1_u8_strlen (str, latin_len);
+
+  if (!(u8_result && u8_lenp && *u8_lenp > u8_len))
+    u8_result = scm_malloc (u8_len + 1);
+  if (u8_lenp)
+    *u8_lenp = u8_len;
  
-  return scm_to_stringn (str, lenp, 
-                         enc,
-                         scm_i_get_conversion_strategy (SCM_BOOL_F));
+  for (i = 0, n = 0; i < latin_len; i++)
+    n += u8_uctomb (u8_result + n, str[i], u8_len - n);
+  if (n != u8_len)
+    abort ();
+  u8_result[n] = 0;
+
+  return u8_result;
+}
+
+/* UTF-8 code table
+
+   (Note that this includes code points that are not allowed by Unicode,
+    but since this function has no way to report an error, and its
+    purpose is to determine the size of destination buffers for
+    libunicode conversion functions, we err on the safe side and handle
+    everything that libunicode might conceivably handle, now or in the
+    future.)
+
+   Char. number range  |        UTF-8 octet sequence
+      (hexadecimal)    |              (binary)
+   --------------------+------------------------------------------------------
+   0000 0000-0000 007F | 0xxxxxxx
+   0000 0080-0000 07FF | 110xxxxx 10xxxxxx
+   0000 0800-0000 FFFF | 1110xxxx 10xxxxxx 10xxxxxx
+   0001 0000-001F FFFF | 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
+   0020 0000-03FF FFFF | 111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
+   0400 0000-7FFF FFFF | 1111110x 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
+*/
+
+static size_t
+u32_u8_length_in_bytes (const scm_t_uint32 *str, size_t len)
+{
+  size_t ret, i;
+
+  for (i = 0, ret = 0; i < len; i++)
+    {
+      scm_t_uint32 c = str[i];
+
+      if (c <= 0x7f)
+        ret += 1;
+      else if (c <= 0x7ff)
+        ret += 2;
+      else if (c <= 0xffff)
+        ret += 3;
+      else if (c <= 0x1fffff)
+        ret += 4;
+      else if (c <= 0x3ffffff)
+        ret += 5;
+      else
+        ret += 6;
+    }
+
+  return ret;
  }
  
+char *
+scm_to_utf8_stringn (SCM str, size_t *lenp)
+#define FUNC_NAME "scm_to_utf8_stringn"
+{
+  SCM_VALIDATE_STRING (1, str);
+
+  if (scm_i_is_narrow_string (str))
+    return (char *) latin1_to_u8 ((scm_t_uint8 *) scm_i_string_chars (str),
+                                  scm_i_string_length (str),
+                                  NULL, lenp);
+  else
+    {
+      scm_t_uint32 *chars = (scm_t_uint32 *) scm_i_string_wide_chars (str);
+      scm_t_uint8 *buf, *ret;
+      size_t num_chars = scm_i_string_length (str);
+      size_t num_bytes_predicted, num_bytes_actual;
+
+      num_bytes_predicted = u32_u8_length_in_bytes (chars, num_chars);
+
+      if (lenp)
+        {
+          *lenp = num_bytes_predicted;
+          buf = scm_malloc (num_bytes_predicted);
+        }
+      else
+        {
+          buf = scm_malloc (num_bytes_predicted + 1);
+          buf[num_bytes_predicted] = 0;
+        }
+
+      num_bytes_actual = num_bytes_predicted;
+      ret = u32_to_u8 (chars, num_chars, buf, &num_bytes_actual);
+
+      if (SCM_LIKELY (ret == buf && num_bytes_actual == num_bytes_predicted))
+        return (char *) ret;
+
+      /* An error: a bad codepoint.  */
+      {
+        int saved_errno = errno;
+
+        free (buf);
+        if (!saved_errno)
+          abort ();
+
+        scm_decoding_error ("scm_to_utf8_stringn", errno,
+                            "invalid codepoint in string", str);
+
+        /* Not reached.  */
+        return NULL;
+      }
+    }
+}
+#undef FUNC_NAME
+
+scm_t_wchar *
+scm_to_utf32_string (SCM str)
+{
+  return scm_to_utf32_stringn (str, NULL);
+}
+
+scm_t_wchar *
+scm_to_utf32_stringn (SCM str, size_t *lenp)
+#define FUNC_NAME "scm_to_utf32_stringn"
+{
+  scm_t_wchar *result;
+
+  SCM_VALIDATE_STRING (1, str);
+
+  if (scm_i_is_narrow_string (str))
+    {
+      scm_t_uint8 *codepoints;
+      size_t i, len;
+
+      codepoints = (scm_t_uint8*) scm_i_string_chars (str);
+      len = scm_i_string_length (str);
+      if (lenp)
+       *lenp = len;
+
+      result = scm_malloc ((len + 1) * sizeof (scm_t_wchar));
+      for (i = 0; i < len; i++)
+        result[i] = codepoints[i];
+      result[len] = 0;
+    }
+  else
+    {
+      size_t len;
+
+      len = scm_i_string_length (str);
+      if (lenp)
+       *lenp = len;
+
+      result = scm_malloc ((len + 1) * sizeof (scm_t_wchar));
+      memcpy (result, scm_i_string_wide_chars (str),
+             len * sizeof (scm_t_wchar));
+      result[len] = 0;
+    }
+
+  return result;
+}
+#undef FUNC_NAME
+
  /* Return a malloc(3)-allocated buffer containing the contents of STR encoded
     according to ENCODING.  If LENP is non-NULL, set it to the size in bytes of
     the returned buffer.  If the conversion to ENCODING fails, apply the strategy
@@ -1798,8 +2099,10 @@ scm_to_stringn (SCM str, size_t *lenp, const char *encoding,
  
        if (ret != 0)
          scm_encoding_error (__func__, errno,
-                           "cannot convert to output locale",
-                           "ISO-8859-1", enc, str);
+                           "cannot convert narrow string to output locale",
+                           SCM_BOOL_F,
+                           /* FIXME: Faulty character unknown.  */
+                           SCM_BOOL_F);
      }
    else
      {
@@ -1811,15 +2114,26 @@ scm_to_stringn (SCM str, size_t *lenp, const char *encoding,
                                    NULL, &len);
        if (buf == NULL)
          scm_encoding_error (__func__, errno,
-                           "cannot convert to output locale",
-                           "UTF-32", enc, str);
+                           "cannot convert wide string to output locale",
+                           SCM_BOOL_F,
+                           /* FIXME: Faulty character unknown.  */
+                           SCM_BOOL_F);
      }
    if (handler == SCM_FAILED_CONVERSION_ESCAPE_SEQUENCE)
      {
        if (SCM_R6RS_ESCAPES_P)
-        unistring_escapes_to_r6rs_escapes (&buf, &len);
+       {
+         /* The worst case is if the input string contains all 4-digit
+            hex escapes.  "\uXXXX" (six characters) becomes "\xXXXX;"
+            (seven characters).  Make BUF large enough to hold
+            that.  */
+         buf = scm_realloc (buf, (len * 7) / 6 + 1);
+         unistring_escapes_to_r6rs_escapes (buf, &len);
+       }
        else
-        unistring_escapes_to_guile_escapes (&buf, &len);
+        unistring_escapes_to_guile_escapes (buf, &len);
+
+      buf = scm_realloc (buf, len);
      }
    if (lenp)
      *lenp = len;
@@ -1833,20 +2147,6 @@ scm_to_stringn (SCM str, size_t *lenp, const char *encoding,
    return buf;
  }
  
-char *
-scm_to_locale_string (SCM str)
-{
-  return scm_to_locale_stringn (str, NULL);
-}
-
-scm_t_uint8 *
-scm_i_to_utf8_string (SCM str)
-{
-  char *u8str;
-  u8str = scm_to_stringn (str, NULL, "UTF-8", SCM_FAILED_CONVERSION_ERROR);
-  return (scm_t_uint8 *) u8str;
-}
-
  size_t
  scm_to_locale_stringbuf (SCM str, char *buf, size_t max_len)
  {
@@ -1894,7 +2194,7 @@ normalize_str (SCM string, uninorm_t form)
  
    w_str = u32_normalize (form, w_str, len, NULL, &rlen);  
    
-  ret = scm_i_make_wide_string (rlen, &cbuf);
+  ret = scm_i_make_wide_string (rlen, &cbuf, 0);
    u32_cpy ((scm_t_uint32 *) cbuf, w_str, rlen);
    free (w_str);
  
@@ -1943,8 +2243,9 @@ SCM_DEFINE (scm_string_normalize_nfkd, "string-normalize-nfkd", 1, 0, 0,
  }
  #undef FUNC_NAME
  
-/* converts C scm_array of strings to SCM scm_list of strings. */
-/* If argc < 0, a null terminated scm_array is assumed. */
+/* converts C scm_array of strings to SCM scm_list of strings.
+   If argc < 0, a null terminated scm_array is assumed.
+   The current locale encoding is assumed */
  SCM
  scm_makfromstrs (int argc, char **argv)
  {
@@ -1958,37 +2259,43 @@ scm_makfromstrs (int argc, char **argv)
  }
  
  /* Return a newly allocated array of char pointers to each of the strings
-   in args, with a terminating NULL pointer.  */
+   in args, with a terminating NULL pointer.  The strings are encoded using
+   the current locale. */
  
  char **
  scm_i_allocate_string_pointers (SCM list)
  #define FUNC_NAME "scm_i_allocate_string_pointers"
  {
    char **result;
-  int len = scm_ilength (list);
+  int list_len = scm_ilength (list);
    int i;
  
-  if (len < 0)
+  if (list_len < 0)
      scm_wrong_type_arg_msg (NULL, 0, list, "proper list");
  
-  result = scm_gc_malloc ((len + 1) * sizeof (char *),
+  result = scm_gc_malloc ((list_len + 1) * sizeof (char *),
                           "string pointers");
-  result[len] = NULL;
+  result[list_len] = NULL;
  
-  /* The list might be have been modified in another thread, so
+  /* The list might have been modified in another thread, so
       we check LIST before each access.
     */
-  for (i = 0; i < len && scm_is_pair (list); i++)
+  for (i = 0; i < list_len && scm_is_pair (list); i++)
      {
-      SCM str;
-      size_t len;
-
-      str = SCM_CAR (list);
-      len = scm_c_string_length (str);
-
-      result[i] = scm_gc_malloc_pointerless (len + 1, "string pointers");
-      memcpy (result[i], scm_i_string_chars (str), len);
+      SCM str = SCM_CAR (list);
+      size_t len;  /* String length in bytes */
+      char *c_str = scm_to_locale_stringn (str, &len);
+
+      /* OPTIMIZE-ME: Right now, scm_to_locale_stringn always uses
+        scm_malloc to allocate the returned string, which must be
+        explicitly deallocated.  This forces us to copy the string a
+        second time into a new buffer.  Ideally there would be variants
+        of scm_to_*_stringn that can return garbage-collected buffers. */
+
+      result[i] = scm_gc_malloc_pointerless (len + 1, "string");
+      memcpy (result[i], c_str, len);
        result[i][len] = '\0';
+      free (c_str);
  
        list = SCM_CDR (list);
      }
@@ -2106,7 +2413,7 @@ SCM_VECTOR_IMPLEMENTATION (SCM_ARRAY_ELEMENT_TYPE_CHAR, scm_make_string)
  void
  scm_init_strings ()
  {
-  scm_nullstr = scm_i_make_string (0, NULL);
+  scm_nullstr = scm_i_make_string (0, NULL, 0);
  
  #include "libguile/strings.x"
  }