From: Ludovic Courtès Date: Thu, 18 Mar 2010 19:23:12 +0000 (+0100) Subject: Improve encoding error reporting. X-Git-Url: http://git.hcoop.net/bpt/guile.git/commitdiff_plain/ef7e4ba373fbd68ea87c4ba1541a58b38bec12b3 Improve encoding error reporting. * libguile/strings.c (scm_encoding_error): Change arguments to convey more information. Raise the error with `scm_throw ()', passing all the information to the handler. (scm_from_stringn, scm_to_stringn): Update accordingly. * test-suite/tests/ports.test ("string ports")["wrong encoding"]: Check the arguments passed to the `throw' handler. * test-suite/tests/r6rs-ports.test ("7.2.11 Binary Output")["put-bytevector with wrong-encoding string port"]: Likewise. --- diff --git a/libguile/strings.c b/libguile/strings.c index eb9e3896e..d136d9835 100644 --- a/libguile/strings.c +++ b/libguile/strings.c @@ -1393,9 +1393,21 @@ scm_is_string (SCM obj) SCM_SYMBOL (scm_encoding_error_key, "encoding-error"); static void -scm_encoding_error (const char *subr, const char *message, SCM args) -{ - scm_error (scm_encoding_error_key, subr, message, args, SCM_BOOL_F); +scm_encoding_error (const char *subr, int err, const char *message, + const char *from, const char *to, SCM string_or_bv) +{ + /* Raise an exception that conveys all the information needed to debug the + problem. Only perform locale conversions that are safe; in particular, + don't try to display STRING_OR_BV when it's a string since converting it to + the output locale may fail. */ + scm_throw (scm_encoding_error_key, + scm_list_n (scm_from_locale_string (subr), + scm_from_locale_string (message), + scm_from_int (err), + scm_from_locale_string (from), + scm_from_locale_string (to), + string_or_bv, + SCM_UNDEFINED)); } SCM @@ -1427,23 +1439,20 @@ scm_from_stringn (const char *str, size_t len, const char *encoding, NULL, NULL, &u32len); - if (u32 == NULL) + if (SCM_UNLIKELY (u32 == NULL)) { - if (errno == ENOMEM) - scm_memory_error ("locale string conversion"); - else - { - /* There are invalid sequences in the input string. */ - SCM errstr; - char *dst; - errstr = scm_i_make_string (len, &dst); - memcpy (dst, str, len); - scm_encoding_error (NULL, - "input locale conversion error from ~s: ~s", - scm_list_2 (scm_from_locale_string (encoding), - errstr)); - scm_remember_upto_here_1 (errstr); - } + /* Raise an error and pass the raw C string as a bytevector to the `throw' + handler. */ + SCM bv; + signed char *buf; + + buf = scm_gc_malloc_pointerless (len, "bytevector"); + memcpy (buf, str, len); + bv = scm_c_take_bytevector (buf, len); + + scm_encoding_error (__func__, errno, + "input locale conversion error", + encoding, "UTF-32", bv); } i = 0; @@ -1759,8 +1768,9 @@ scm_to_stringn (SCM str, size_t *lenp, const char *encoding, &buf, &len); if (ret != 0) - scm_encoding_error (NULL, "cannot convert to output locale ~s: \"~s\"", - scm_list_2 (scm_from_locale_string (enc), str)); + scm_encoding_error (__func__, errno, + "cannot convert to output locale", + "ISO-8859-1", enc, str); } else { @@ -1771,8 +1781,9 @@ scm_to_stringn (SCM str, size_t *lenp, const char *encoding, NULL, NULL, &len); if (buf == NULL) - scm_encoding_error (NULL, "cannot convert to output locale ~s: \"~s\"", - scm_list_2 (scm_from_locale_string (enc), str)); + scm_encoding_error (__func__, errno, + "cannot convert to output locale", + "UTF-32", enc, str); } if (handler == SCM_FAILED_CONVERSION_ESCAPE_SEQUENCE) { diff --git a/test-suite/tests/ports.test b/test-suite/tests/ports.test index 72dcb6379..1d8bd5052 100644 --- a/test-suite/tests/ports.test +++ b/test-suite/tests/ports.test @@ -335,14 +335,20 @@ (lambda () (display str))))))) - (pass-if-exception "wrong encoding" - exception:encoding-error + (pass-if "wrong encoding" (let ((str "ĉu bone?")) - ;; Latin-1 cannot represent ‘ĉ’. - (with-fluids ((%default-port-encoding "ISO-8859-1")) - (with-output-to-string - (lambda () - (display str))))))) + (catch 'encoding-error + (lambda () + ;; Latin-1 cannot represent ‘ĉ’. + (with-fluids ((%default-port-encoding "ISO-8859-1")) + (with-output-to-string + (lambda () + (display str))))) + (lambda (key subr message errno from to faulty-str) + (and (eq? faulty-str str) + (string=? from "UTF-32") + (string=? to "ISO-8859-1") + (string? (strerror errno)))))))) (with-test-prefix "call-with-output-string" diff --git a/test-suite/tests/r6rs-ports.test b/test-suite/tests/r6rs-ports.test index 1d60991e9..e41d18aab 100644 --- a/test-suite/tests/r6rs-ports.test +++ b/test-suite/tests/r6rs-ports.test @@ -230,14 +230,19 @@ (lambda (port) (put-bytevector port bv))))))) - (pass-if-exception "put-bytevector with wrong-encoding string port" - exception:encoding-error + (pass-if "put-bytevector with wrong-encoding string port" (let* ((str "hello, world") (bv (string->utf16 str))) - (with-fluids ((%default-port-encoding "UTF-32")) - (call-with-output-string - (lambda (port) - (put-bytevector port bv))))))) + (catch 'encoding-error + (lambda () + (with-fluids ((%default-port-encoding "UTF-32")) + (call-with-output-string + (lambda (port) + (put-bytevector port bv))))) + (lambda (key subr message errno from to faulty-bv) + (and (bytevector=? faulty-bv bv) + (string=? to "UTF-32") + (string? (strerror errno)))))))) (with-test-prefix "7.2.7 Input Ports"