entry->encoding = encoding ? scm_gc_strdup (encoding, "port") : NULL;
if (encoding && strcmp (encoding, "UTF-8") == 0)
entry->encoding_mode = SCM_PORT_ENCODING_MODE_UTF8;
+ else if (!encoding || strcmp (encoding, "ISO-8859-1") == 0)
+ entry->encoding_mode = SCM_PORT_ENCODING_MODE_LATIN1;
else
entry->encoding_mode = SCM_PORT_ENCODING_MODE_ICONV;
entry->ilseq_handler = handler;
pt = SCM_PTAB_ENTRY (port);
prev = pt->iconv_descriptors;
- if (encoding == NULL)
- encoding = "ISO-8859-1";
-
- if (strcmp (encoding, "UTF-8") == 0)
+ if (encoding && strcmp (encoding, "UTF-8") == 0)
{
pt->encoding = "UTF-8";
pt->encoding_mode = SCM_PORT_ENCODING_MODE_UTF8;
pt->iconv_descriptors = NULL;
}
+ else if (!encoding || strcmp (encoding, "ISO-8859-1") == 0)
+ {
+ pt->encoding = "ISO-8859-1";
+ pt->encoding_mode = SCM_PORT_ENCODING_MODE_LATIN1;
+ pt->iconv_descriptors = NULL;
+ }
else
{
/* Open descriptors before mutating the port. */
#undef ASSERT_NOT_EOF
}
+/* Read an ISO-8859-1 codepoint (a byte) from PORT. On success, return
+ *0 and set CODEPOINT to the codepoint that was read, fill BUF with
+ *its UTF-8 representation, and set *LEN to the length in bytes.
+ *Return `EILSEQ' on error. */
+static int
+get_latin1_codepoint (SCM port, scm_t_wchar *codepoint,
+ char buf[SCM_MBCHAR_BUF_SIZE], size_t *len)
+{
+ *codepoint = scm_get_byte_or_eof_unlocked (port);
+
+ if (*codepoint == EOF)
+ *len = 0;
+ else
+ {
+ *len = 1;
+ buf[0] = *codepoint;
+ }
+ return 0;
+}
+
/* Likewise, read a byte sequence from PORT, passing it through its
input conversion descriptor. */
static int
if (pt->encoding_mode == SCM_PORT_ENCODING_MODE_UTF8)
err = get_utf8_codepoint (port, codepoint, (scm_t_uint8 *) buf, len);
+ else if (pt->encoding_mode == SCM_PORT_ENCODING_MODE_LATIN1)
+ err = get_latin1_codepoint (port, codepoint, buf, len);
else
err = get_iconv_codepoint (port, codepoint, buf, len);
return len;
}
+/* Write STR to PORT as ISO-8859-1. STR is a LEN-codepoint string; it
+ is narrow if NARROW_P is true, wide otherwise. Return LEN. */
+static size_t
+display_string_as_latin1 (const void *str, int narrow_p, size_t len,
+ SCM port,
+ scm_t_string_failed_conversion_handler strategy)
+{
+ size_t printed = 0;
+
+ if (narrow_p)
+ {
+ scm_lfwrite_unlocked (str, len, port);
+ return len;
+ }
+
+ while (printed < len)
+ {
+ char buf[256];
+ size_t i;
+
+ for (i = 0; i < sizeof(buf) && printed < len; i++, printed++)
+ {
+ scm_t_wchar c = STR_REF (str, printed);
+
+ if (c < 256)
+ buf[i] = c;
+ else
+ break;
+ }
+
+ scm_lfwrite_unlocked (buf, i, port);
+
+ if (i < sizeof(buf) && printed < len)
+ {
+ if (strategy == SCM_FAILED_CONVERSION_ERROR)
+ break;
+ else if (strategy == SCM_FAILED_CONVERSION_ESCAPE_SEQUENCE)
+ write_character_escaped (STR_REF (str, printed), 1, port);
+ else
+ /* STRATEGY is `SCM_FAILED_CONVERSION_QUESTION_MARK'. */
+ display_string ("?", 1, 1, port, strategy);
+ printed++;
+ }
+ }
+
+ return printed;
+}
+
/* Convert STR through PORT's output conversion descriptor and write the
output to PORT. Return the number of codepoints written. */
static size_t
if (pt->encoding_mode == SCM_PORT_ENCODING_MODE_UTF8)
return display_string_as_utf8 (str, narrow_p, len, port);
+ else if (pt->encoding_mode == SCM_PORT_ENCODING_MODE_LATIN1)
+ return display_string_as_latin1 (str, narrow_p, len, port, strategy);
else
- return display_string_using_iconv (str, narrow_p, len,
- port, strategy);
+ return display_string_using_iconv (str, narrow_p, len, port, strategy);
}
/* Attempt to display CH to PORT according to STRATEGY. Return non-zero