* libguile/ports.c (get_codepoint): Reset `pt->input_cd' upon failure.
If `pt->ilseq_handler' is `SCM_ICONVEH_QUESTION_MARK', then return a
question mark.
[failure]: Use `scm_encoding_error' when raising an error.
* test-suite/lib.scm (exception:encoding-error): Adjust regexp.
* test-suite/tests/ports.test ("string ports")["read-char, wrong
encoding, error", "read-char, wrong encoding, escape", "read-char,
wrong encoding, substitute"]: New tests.
failure. */
static scm_t_wchar
get_codepoint (SCM port, char buf[SCM_MBCHAR_BUF_SIZE], size_t *len)
+#define FUNC_NAME "scm_getc"
{
int err, byte_read;
size_t bytes_consumed, output_size;
}
if (err != 0)
- goto failure;
+ {
+ /* Reset the `iconv' state. */
+ iconv (pt->input_cd, NULL, NULL, NULL, NULL);
+
+ if (pt->ilseq_handler == SCM_ICONVEH_QUESTION_MARK)
+ codepoint = '?';
+ else
+ /* Fail when the strategy is SCM_ICONVEH_ERROR or
+ SCM_ICONVEH_ESCAPE_SEQUENCE (the latter doesn't make sense
+ for input encoding errors.) */
+ goto failure;
+ }
+ else
+ /* Convert the UTF8_BUF sequence to a Unicode code point. */
+ codepoint = utf8_to_codepoint (utf8_buf, output_size);
- /* Convert the UTF8_BUF sequence to a Unicode code point. */
- codepoint = utf8_to_codepoint (utf8_buf, output_size);
update_port_lf (codepoint, port);
*len = bytes_consumed;
failure:
{
- char *err_buf;
- SCM err_str = scm_i_make_string (bytes_consumed, &err_buf);
- memcpy (err_buf, buf, bytes_consumed);
-
- if (err == EILSEQ)
- scm_misc_error (NULL, "input encoding error for ~s: ~s",
- scm_list_2 (scm_from_locale_string (scm_i_get_port_encoding (port)),
- err_str));
- else
- scm_misc_error (NULL, "input encoding error (invalid) for ~s: ~s\n",
- scm_list_2 (scm_from_locale_string (scm_i_get_port_encoding (port)),
- err_str));
+ SCM bv;
+
+ bv = scm_c_make_bytevector (bytes_consumed);
+ memcpy (SCM_BYTEVECTOR_CONTENTS (bv), buf, bytes_consumed);
+ scm_encoding_error (FUNC_NAME, err, "input decoding error",
+ pt->encoding, "UTF-8", bv);
}
/* Never gets here. */
return 0;
}
+#undef FUNC_NAME
/* Read a codepoint from PORT and return it. */
scm_t_wchar
;;;; test-suite/lib.scm --- generic support for testing
-;;;; Copyright (C) 1999, 2000, 2001, 2004, 2006, 2007, 2009, 2010 Free Software Foundation, Inc.
+;;;; Copyright (C) 1999, 2000, 2001, 2004, 2006, 2007, 2009, 2010,
+;;;; 2011 Free Software Foundation, Inc.
;;;;
;;;; This program is free software; you can redistribute it and/or
;;;; modify it under the terms of the GNU Lesser General Public
(define exception:system-error
(cons 'system-error ".*"))
(define exception:encoding-error
- (cons 'encoding-error "(cannot convert to output locale|input locale conversion error)"))
+ (cons 'encoding-error "(cannot convert to output locale|input (locale conversion|decoding) error)"))
(define exception:miscellaneous-error
(cons 'misc-error "^.*"))
(define exception:read-error
#:use-module (test-suite guile-test)
#:use-module (ice-9 popen)
#:use-module (ice-9 rdelim)
- #:use-module (rnrs bytevectors))
+ #:use-module (rnrs bytevectors)
+ #:use-module ((rnrs io ports) #:select (open-bytevector-input-port)))
(define (display-line . args)
(for-each display args)
(char=? (peek-char p) #\안)
(char=? (peek-char p) #\안)
(= (port-line p) 0)
- (= (port-column p) 0)))))
+ (= (port-column p) 0))))
+
+ (pass-if-exception "read-char, wrong encoding, error"
+ exception:encoding-error
+ (let ((p (with-fluids ((%default-port-encoding "UTF-8"))
+ (open-bytevector-input-port #vu8(255 1 2 3)))))
+ (set-port-conversion-strategy! p 'error)
+ (read-char p)
+ #t))
+
+ (pass-if-exception "read-char, wrong encoding, escape"
+ exception:encoding-error
+ ;; `escape' should behave like `error'.
+ (let ((p (with-fluids ((%default-port-encoding "UTF-8"))
+ (open-bytevector-input-port #vu8(255 1 2 3)))))
+ (set-port-conversion-strategy! p 'escape)
+ (read-char p)
+ #t))
+
+ (pass-if "read-char, wrong encoding, substitute"
+ (let ((p (with-fluids ((%default-port-encoding "UTF-8"))
+ (open-bytevector-input-port #vu8(255 206 187 206 188)))))
+ (set-port-conversion-strategy! p 'substitute)
+ (equal? (list (read-char p) (read-char p) (read-char p))
+ '(#\? #\λ #\μ)))))
(with-test-prefix "call-with-output-string"