Improve encoding error reporting.
authorLudovic Courtès <ludo@gnu.org>
Thu, 18 Mar 2010 19:23:12 +0000 (20:23 +0100)
committerLudovic Courtès <ludo@gnu.org>
Thu, 18 Mar 2010 19:41:03 +0000 (20:41 +0100)
* libguile/strings.c (scm_encoding_error): Change arguments to convey
  more information.  Raise the error with `scm_throw ()', passing all
  the information to the handler.
  (scm_from_stringn, scm_to_stringn): Update accordingly.

* test-suite/tests/ports.test ("string ports")["wrong encoding"]: Check
  the arguments passed to the `throw' handler.

* test-suite/tests/r6rs-ports.test ("7.2.11 Binary
  Output")["put-bytevector with wrong-encoding string port"]: Likewise.

libguile/strings.c
test-suite/tests/ports.test
test-suite/tests/r6rs-ports.test

index eb9e389..d136d98 100644 (file)
@@ -1393,9 +1393,21 @@ scm_is_string (SCM obj)
 
 SCM_SYMBOL (scm_encoding_error_key, "encoding-error");
 static void
-scm_encoding_error (const char *subr, const char *message, SCM args)
-{
-  scm_error (scm_encoding_error_key, subr, message, args, SCM_BOOL_F);
+scm_encoding_error (const char *subr, int err, const char *message,
+                   const char *from, const char *to, SCM string_or_bv)
+{
+  /* Raise an exception that conveys all the information needed to debug the
+     problem.  Only perform locale conversions that are safe; in particular,
+     don't try to display STRING_OR_BV when it's a string since converting it to
+     the output locale may fail.  */
+  scm_throw (scm_encoding_error_key,
+            scm_list_n (scm_from_locale_string (subr),
+                        scm_from_locale_string (message),
+                        scm_from_int (err),
+                        scm_from_locale_string (from),
+                        scm_from_locale_string (to),
+                        string_or_bv,
+                        SCM_UNDEFINED));
 }
 
 SCM
@@ -1427,23 +1439,20 @@ scm_from_stringn (const char *str, size_t len, const char *encoding,
                                                 NULL,
                                                 NULL, &u32len);
 
-  if (u32 == NULL)
+  if (SCM_UNLIKELY (u32 == NULL))
     {
-      if (errno == ENOMEM)
-        scm_memory_error ("locale string conversion");
-      else
-        {
-          /* There are invalid sequences in the input string.  */
-          SCM errstr;
-          char *dst;
-          errstr = scm_i_make_string (len, &dst);
-          memcpy (dst, str, len);
-          scm_encoding_error (NULL,
-                             "input locale conversion error from ~s: ~s",
-                             scm_list_2 (scm_from_locale_string (encoding),
-                                         errstr));
-          scm_remember_upto_here_1 (errstr);
-        }
+      /* Raise an error and pass the raw C string as a bytevector to the `throw'
+        handler.  */
+      SCM bv;
+      signed char *buf;
+
+      buf = scm_gc_malloc_pointerless (len, "bytevector");
+      memcpy (buf, str, len);
+      bv = scm_c_take_bytevector (buf, len);
+
+      scm_encoding_error (__func__, errno,
+                         "input locale conversion error",
+                         encoding, "UTF-32", bv);
     }
 
   i = 0;
@@ -1759,8 +1768,9 @@ scm_to_stringn (SCM str, size_t *lenp, const char *encoding,
                          &buf, &len);
 
       if (ret != 0)
-        scm_encoding_error (NULL, "cannot convert to output locale ~s: \"~s\"",
-                            scm_list_2 (scm_from_locale_string (enc), str));
+        scm_encoding_error (__func__, errno,
+                           "cannot convert to output locale",
+                           "ISO-8859-1", enc, str);
     }
   else
     {
@@ -1771,8 +1781,9 @@ scm_to_stringn (SCM str, size_t *lenp, const char *encoding,
                                   NULL,
                                   NULL, &len);
       if (buf == NULL)
-        scm_encoding_error (NULL, "cannot convert to output locale ~s: \"~s\"",
-                            scm_list_2 (scm_from_locale_string (enc), str));
+        scm_encoding_error (__func__, errno,
+                           "cannot convert to output locale",
+                           "UTF-32", enc, str);
     }
   if (handler == SCM_FAILED_CONVERSION_ESCAPE_SEQUENCE)
     {
index 72dcb63..1d8bd50 100644 (file)
                   (lambda ()
                     (display str)))))))
 
-  (pass-if-exception "wrong encoding"
-    exception:encoding-error
+  (pass-if "wrong encoding"
     (let ((str "ĉu bone?"))
-      ;; Latin-1 cannot represent ‘ĉ’.
-      (with-fluids ((%default-port-encoding "ISO-8859-1"))
-        (with-output-to-string
-          (lambda ()
-            (display str)))))))
+      (catch 'encoding-error
+        (lambda ()
+          ;; Latin-1 cannot represent ‘ĉ’.
+          (with-fluids ((%default-port-encoding "ISO-8859-1"))
+            (with-output-to-string
+              (lambda ()
+                (display str)))))
+        (lambda (key subr message errno from to faulty-str)
+          (and (eq? faulty-str str)
+               (string=? from "UTF-32")
+               (string=? to "ISO-8859-1")
+               (string? (strerror errno))))))))
 
 (with-test-prefix "call-with-output-string"
 
index 1d60991..e41d18a 100644 (file)
                   (lambda (port)
                     (put-bytevector port bv)))))))
 
-  (pass-if-exception "put-bytevector with wrong-encoding string port"
-    exception:encoding-error
+  (pass-if "put-bytevector with wrong-encoding string port"
     (let* ((str "hello, world")
            (bv  (string->utf16 str)))
-      (with-fluids ((%default-port-encoding "UTF-32"))
-        (call-with-output-string
-          (lambda (port)
-            (put-bytevector port bv)))))))
+      (catch 'encoding-error
+        (lambda ()
+          (with-fluids ((%default-port-encoding "UTF-32"))
+            (call-with-output-string
+              (lambda (port)
+                (put-bytevector port bv)))))
+        (lambda (key subr message errno from to faulty-bv)
+          (and (bytevector=? faulty-bv bv)
+               (string=? to "UTF-32")
+               (string? (strerror errno))))))))
 
 \f
 (with-test-prefix "7.2.7 Input Ports"