From: Andy Wingo Date: Fri, 24 Feb 2012 22:05:02 +0000 (+0100) Subject: port i/o optimizations for iso-8859-1 X-Git-Url: http://git.hcoop.net/bpt/guile.git/commitdiff_plain/79eb47ea47650ef42c545931726277a7118a0210 port i/o optimizations for iso-8859-1 * libguile/ports.h (scm_t_port_encoding_mode): * libguile/ports.c (scm_c_make_port_with_encoding): (scm_i_set_port_encoding_x): Add special treatment for latin1 encoding. (get_latin1_codepoint, get_codepoint): Add latin1 fast-path. * libguile/print.c (display_string_as_latin1): Add latin1 fastpath. --- diff --git a/libguile/ports.c b/libguile/ports.c index 5b98bf9f7..e17ea069d 100644 --- a/libguile/ports.c +++ b/libguile/ports.c @@ -605,6 +605,8 @@ scm_c_make_port_with_encoding (scm_t_bits tag, unsigned long mode_bits, entry->encoding = encoding ? scm_gc_strdup (encoding, "port") : NULL; if (encoding && strcmp (encoding, "UTF-8") == 0) entry->encoding_mode = SCM_PORT_ENCODING_MODE_UTF8; + else if (!encoding || strcmp (encoding, "ISO-8859-1") == 0) + entry->encoding_mode = SCM_PORT_ENCODING_MODE_LATIN1; else entry->encoding_mode = SCM_PORT_ENCODING_MODE_ICONV; entry->ilseq_handler = handler; @@ -941,15 +943,18 @@ scm_i_set_port_encoding_x (SCM port, const char *encoding) pt = SCM_PTAB_ENTRY (port); prev = pt->iconv_descriptors; - if (encoding == NULL) - encoding = "ISO-8859-1"; - - if (strcmp (encoding, "UTF-8") == 0) + if (encoding && strcmp (encoding, "UTF-8") == 0) { pt->encoding = "UTF-8"; pt->encoding_mode = SCM_PORT_ENCODING_MODE_UTF8; pt->iconv_descriptors = NULL; } + else if (!encoding || strcmp (encoding, "ISO-8859-1") == 0) + { + pt->encoding = "ISO-8859-1"; + pt->encoding_mode = SCM_PORT_ENCODING_MODE_LATIN1; + pt->iconv_descriptors = NULL; + } else { /* Open descriptors before mutating the port. */ @@ -1582,6 +1587,26 @@ get_utf8_codepoint (SCM port, scm_t_wchar *codepoint, #undef ASSERT_NOT_EOF } +/* Read an ISO-8859-1 codepoint (a byte) from PORT. On success, return + *0 and set CODEPOINT to the codepoint that was read, fill BUF with + *its UTF-8 representation, and set *LEN to the length in bytes. + *Return `EILSEQ' on error. */ +static int +get_latin1_codepoint (SCM port, scm_t_wchar *codepoint, + char buf[SCM_MBCHAR_BUF_SIZE], size_t *len) +{ + *codepoint = scm_get_byte_or_eof_unlocked (port); + + if (*codepoint == EOF) + *len = 0; + else + { + *len = 1; + buf[0] = *codepoint; + } + return 0; +} + /* Likewise, read a byte sequence from PORT, passing it through its input conversion descriptor. */ static int @@ -1662,6 +1687,8 @@ get_codepoint (SCM port, scm_t_wchar *codepoint, if (pt->encoding_mode == SCM_PORT_ENCODING_MODE_UTF8) err = get_utf8_codepoint (port, codepoint, (scm_t_uint8 *) buf, len); + else if (pt->encoding_mode == SCM_PORT_ENCODING_MODE_LATIN1) + err = get_latin1_codepoint (port, codepoint, buf, len); else err = get_iconv_codepoint (port, codepoint, buf, len); diff --git a/libguile/ports.h b/libguile/ports.h index c42b501fd..2d277e031 100644 --- a/libguile/ports.h +++ b/libguile/ports.h @@ -50,6 +50,7 @@ typedef enum scm_t_port_rw_active { typedef enum scm_t_port_encoding_mode { SCM_PORT_ENCODING_MODE_UTF8, + SCM_PORT_ENCODING_MODE_LATIN1, SCM_PORT_ENCODING_MODE_ICONV } scm_t_port_encoding_mode; diff --git a/libguile/print.c b/libguile/print.c index a1bf5eded..eb601322e 100644 --- a/libguile/print.c +++ b/libguile/print.c @@ -853,6 +853,54 @@ display_string_as_utf8 (const void *str, int narrow_p, size_t len, return len; } +/* Write STR to PORT as ISO-8859-1. STR is a LEN-codepoint string; it + is narrow if NARROW_P is true, wide otherwise. Return LEN. */ +static size_t +display_string_as_latin1 (const void *str, int narrow_p, size_t len, + SCM port, + scm_t_string_failed_conversion_handler strategy) +{ + size_t printed = 0; + + if (narrow_p) + { + scm_lfwrite_unlocked (str, len, port); + return len; + } + + while (printed < len) + { + char buf[256]; + size_t i; + + for (i = 0; i < sizeof(buf) && printed < len; i++, printed++) + { + scm_t_wchar c = STR_REF (str, printed); + + if (c < 256) + buf[i] = c; + else + break; + } + + scm_lfwrite_unlocked (buf, i, port); + + if (i < sizeof(buf) && printed < len) + { + if (strategy == SCM_FAILED_CONVERSION_ERROR) + break; + else if (strategy == SCM_FAILED_CONVERSION_ESCAPE_SEQUENCE) + write_character_escaped (STR_REF (str, printed), 1, port); + else + /* STRATEGY is `SCM_FAILED_CONVERSION_QUESTION_MARK'. */ + display_string ("?", 1, 1, port, strategy); + printed++; + } + } + + return printed; +} + /* Convert STR through PORT's output conversion descriptor and write the output to PORT. Return the number of codepoints written. */ static size_t @@ -968,9 +1016,10 @@ display_string (const void *str, int narrow_p, if (pt->encoding_mode == SCM_PORT_ENCODING_MODE_UTF8) return display_string_as_utf8 (str, narrow_p, len, port); + else if (pt->encoding_mode == SCM_PORT_ENCODING_MODE_LATIN1) + return display_string_as_latin1 (str, narrow_p, len, port, strategy); else - return display_string_using_iconv (str, narrow_p, len, - port, strategy); + return display_string_using_iconv (str, narrow_p, len, port, strategy); } /* Attempt to display CH to PORT according to STRATEGY. Return non-zero