fix bug where scm_from_utf8_stringn would not detect bad utf-8

author Andy Wingo <wingo@pobox.com>

Tue, 15 Jan 2013 10:01:10 +0000 (11:01 +0100)

committer Andy Wingo <wingo@pobox.com>

Tue, 15 Jan 2013 10:01:10 +0000 (11:01 +0100)
author Andy Wingo <wingo@pobox.com>
Tue, 15 Jan 2013 10:01:10 +0000 (11:01 +0100)
committer Andy Wingo <wingo@pobox.com>
Tue, 15 Jan 2013 10:01:10 +0000 (11:01 +0100)
diff --git a/libguile/bytevectors.c b/libguile/bytevectors.c

index db132d4..4ce90eb 100644 (file)
--- a/libguile/bytevectors.c
+++ b/libguile/bytevectors.c
@@ -1,4 +1,4 @@
-/* Copyright (C) 2009, 2010, 2011, 2012 Free Software Foundation, Inc.
+/* Copyright (C) 2009, 2010, 2011, 2012, 2013 Free Software Foundation, Inc.
   *
   * This library is free software; you can redistribute it and/or
   * modify it under the terms of the GNU Lesser General Public License
@@ -2050,8 +2050,7 @@ SCM_DEFINE (scm_utf8_to_string, "utf8->string",
  
    c_utf_len = SCM_BYTEVECTOR_LENGTH (utf);
    c_utf = (char *) SCM_BYTEVECTOR_CONTENTS (utf);
-  str = scm_from_stringn (c_utf, c_utf_len, "UTF-8",
-                          SCM_FAILED_CONVERSION_ERROR);
+  str = scm_from_utf8_stringn (c_utf, c_utf_len);
  
    return (str);
  }
diff --git a/libguile/strings.c b/libguile/strings.c

index 5130cb3..1e89e63 100644 (file)
--- a/libguile/strings.c
+++ b/libguile/strings.c
@@ -1,4 +1,4 @@
-/* Copyright (C) 1995,1996,1998,2000,2001, 2004, 2006, 2008, 2009, 2010, 2011, 2012 Free Software Foundation, Inc.
+/* Copyright (C) 1995,1996,1998,2000,2001, 2004, 2006, 2008, 2009, 2010, 2011, 2012, 2013 Free Software Foundation, Inc.
   * 
   * This library is free software; you can redistribute it and/or
   * modify it under the terms of the GNU Lesser General Public License
@@ -1526,7 +1526,8 @@ scm_from_stringn (const char *str, size_t len, const char *encoding,
  
    if (encoding == NULL || len == 0)
      return scm_from_latin1_stringn (str, len);
-  else if (strcmp (encoding, "UTF-8") == 0)
+  else if (strcmp (encoding, "UTF-8") == 0
+           && handler == SCM_FAILED_CONVERSION_ERROR)
      return scm_from_utf8_stringn (str, len);
  
    u32len = 0;
@@ -1639,7 +1640,7 @@ scm_from_utf8_stringn (const char *str, size_t len)
  
            nbytes = u8_mbtouc (&c, ustr + i, len - i);
  
-          if (nbytes < 0)
+          if (c == 0xfffd)
              /* Bad UTF-8.  */
              decoding_error (__func__, errno, str, len);
  
diff --git a/libguile/symbols.c b/libguile/symbols.c

index fd7e214..f93833b 100644 (file)
--- a/libguile/symbols.c
+++ b/libguile/symbols.c
@@ -1,5 +1,5 @@
  /* Copyright (C) 1995, 1996, 1997, 1998, 2000, 2001, 2003, 2004,
- *   2006, 2009, 2011 Free Software Foundation, Inc.
+ *   2006, 2009, 2011, 2013 Free Software Foundation, Inc.
   *
   * This library is free software; you can redistribute it and/or
   * modify it under the terms of the GNU Lesser General Public License
@@ -167,7 +167,7 @@ utf8_string_equals_wide_string (const scm_t_uint8 *narrow, size_t nlen,
        nbytes = u8_mbtouc (&c, narrow + byte_idx, nlen - byte_idx);
        if (nbytes == 0)
          break;
-      else if (nbytes < 0)
+      else if (c == 0xfffd)
          /* Bad UTF-8.  */
          return 0;
        else if (c != wide[char_idx])
diff --git a/test-suite/tests/iconv.test b/test-suite/tests/iconv.test

index 9083cd2..be36336 100644 (file)
--- a/test-suite/tests/iconv.test
+++ b/test-suite/tests/iconv.test
@@ -94,6 +94,11 @@
      (pass-if-exception "misparse latin1 as utf8" exception:decoding-error
        (bytevector->string (string->bytevector s "latin1") "utf-8"))
  
+    (pass-if "misparse latin1 as utf8 with substitutions"
+      (equal? (bytevector->string (string->bytevector s "latin1")
+                                  "utf-8" 'substitute)
+              "?t?"))
+
      (pass-if-exception "misparse latin1 as ascii" exception:decoding-error
        (bytevector->string (string->bytevector s "latin1") "ascii"))))
author	Andy Wingo <wingo@pobox.com>
	Tue, 15 Jan 2013 10:01:10 +0000 (11:01 +0100)
committer	Andy Wingo <wingo@pobox.com>
	Tue, 15 Jan 2013 10:01:10 +0000 (11:01 +0100)
libguile/bytevectors.c		patch \| blob \| blame \| history
libguile/strings.c		patch \| blob \| blame \| history
libguile/symbols.c		patch \| blob \| blame \| history
test-suite/tests/iconv.test		patch \| blob \| blame \| history