(SCM port),
"Return the next character available from @var{port}, updating\n"
"@var{port} to point to the following character. If no more\n"
- "characters are available, the end-of-file object is returned.")
+ "characters are available, the end-of-file object is returned.\n"
+ "\n"
+ "When @var{port}'s data cannot be decoded according to its\n"
+ "character encoding, a @code{decoding-error} is raised and\n"
+ "@var{port} points past the erroneous byte sequence.\n")
#define FUNC_NAME s_scm_read_char
{
scm_t_wchar c;
return codepoint;
}
-/* Read a codepoint from PORT and return it. Fill BUF with the byte
- representation of the codepoint in PORT's encoding, and set *LEN to
- the length in bytes of that representation. Raise an error on
- failure. */
-static scm_t_wchar
-get_codepoint (SCM port, char buf[SCM_MBCHAR_BUF_SIZE], size_t *len)
+/* Read a codepoint from PORT and return it in *CODEPOINT. Fill BUF
+ with the byte representation of the codepoint in PORT's encoding, and
+ set *LEN to the length in bytes of that representation. Return 0 on
+ success and an errno value on error. */
+static int
+get_codepoint (SCM port, scm_t_wchar *codepoint,
+ char buf[SCM_MBCHAR_BUF_SIZE], size_t *len)
{
int err, byte_read;
size_t bytes_consumed, output_size;
- scm_t_wchar codepoint;
char *output;
scm_t_uint8 utf8_buf[SCM_MBCHAR_BUF_SIZE];
scm_t_port *pt = SCM_PTAB_ENTRY (port);
if (byte_read == EOF)
{
if (bytes_consumed == 0)
- return (scm_t_wchar) EOF;
+ {
+ *codepoint = (scm_t_wchar) EOF;
+ *len = 0;
+ return 0;
+ }
else
continue;
}
output_size = sizeof (utf8_buf) - output_left;
}
- if (err != 0)
- goto failure;
+ if (SCM_UNLIKELY (err != 0))
+ {
+ /* Reset the `iconv' state. */
+ iconv (pt->input_cd, NULL, NULL, NULL, NULL);
- /* Convert the UTF8_BUF sequence to a Unicode code point. */
- codepoint = utf8_to_codepoint (utf8_buf, output_size);
- update_port_lf (codepoint, port);
+ if (pt->ilseq_handler == SCM_ICONVEH_QUESTION_MARK)
+ {
+ *codepoint = '?';
+ err = 0;
+ }
- *len = bytes_consumed;
+ /* Fail when the strategy is SCM_ICONVEH_ERROR or
+ SCM_ICONVEH_ESCAPE_SEQUENCE (the latter doesn't make sense for
+ input encoding errors.) */
+ }
+ else
+ /* Convert the UTF8_BUF sequence to a Unicode code point. */
+ *codepoint = utf8_to_codepoint (utf8_buf, output_size);
- return codepoint;
+ if (SCM_LIKELY (err == 0))
+ update_port_lf (*codepoint, port);
- failure:
- {
- char *err_buf;
- SCM err_str = scm_i_make_string (bytes_consumed, &err_buf);
- memcpy (err_buf, buf, bytes_consumed);
-
- if (err == EILSEQ)
- scm_misc_error (NULL, "input encoding error for ~s: ~s",
- scm_list_2 (scm_from_locale_string (scm_i_get_port_encoding (port)),
- err_str));
- else
- scm_misc_error (NULL, "input encoding error (invalid) for ~s: ~s\n",
- scm_list_2 (scm_from_locale_string (scm_i_get_port_encoding (port)),
- err_str));
- }
+ *len = bytes_consumed;
- /* Never gets here. */
- return 0;
+ return err;
}
/* Read a codepoint from PORT and return it. */
scm_t_wchar
scm_getc (SCM port)
+#define FUNC_NAME "scm_getc"
{
+ int err;
size_t len;
+ scm_t_wchar codepoint;
char buf[SCM_MBCHAR_BUF_SIZE];
- return get_codepoint (port, buf, &len);
+ err = get_codepoint (port, &codepoint, buf, &len);
+ if (SCM_UNLIKELY (err != 0))
+ /* At this point PORT should point past the invalid encoding, as per
+ R6RS-lib Section 8.2.4. */
+ scm_decoding_error (FUNC_NAME, err, "input decoding error", port);
+
+ return codepoint;
}
+#undef FUNC_NAME
/* this should only be called when the read buffer is empty. it
tries to refill the read buffer. it returns the first char from
pt->rw_active = SCM_PORT_WRITE;
}
-/* Write a scheme string STR to PORT from START inclusive to END
- exclusive. */
+/* Write STR to PORT from START inclusive to END exclusive. */
void
scm_lfwrite_substr (SCM str, size_t start, size_t end, SCM port)
{
- size_t i, size = scm_i_string_length (str);
scm_t_port *pt = SCM_PTAB_ENTRY (port);
- scm_t_ptob_descriptor *ptob = &scm_ptobs[SCM_PTOBNUM (port)];
- scm_t_wchar p;
- char *buf;
- size_t len;
if (pt->rw_active == SCM_PORT_READ)
scm_end_input (port);
- if (end == (size_t) (-1))
- end = size;
- size = end - start;
+ if (end == (size_t) -1)
+ end = scm_i_string_length (str);
- /* Note that making a substring will likely take the
- stringbuf_write_mutex. So, one shouldn't use scm_lfwrite_substr
- if the stringbuf write mutex may still be held elsewhere. */
- buf = scm_to_stringn (scm_c_substring (str, start, end), &len,
- pt->encoding, pt->ilseq_handler);
- ptob->write (port, buf, len);
- free (buf);
-
- for (i = 0; i < size; i++)
- {
- p = scm_i_string_ref (str, i + start);
- update_port_lf (p, port);
- }
+ scm_display (scm_c_substring (str, start, end), port);
if (pt->rw_random)
pt->rw_active = SCM_PORT_WRITE;
"return the value returned by the preceding call to\n"
"@code{peek-char}. In particular, a call to @code{peek-char} on\n"
"an interactive port will hang waiting for input whenever a call\n"
- "to @code{read-char} would have hung.")
+ "to @code{read-char} would have hung.\n"
+ "\n"
+ "As for @code{read-char}, a @code{decoding-error} may be raised\n"
+ "if such a situation occurs. However, unlike with @code{read-char},\n"
+ "@var{port} still points at the beginning of the erroneous byte\n"
+ "sequence when the error is raised.\n")
#define FUNC_NAME s_scm_peek_char
{
+ int err;
SCM result;
scm_t_wchar c;
char bytes[SCM_MBCHAR_BUF_SIZE];
- long column, line;
+ long column, line, i;
size_t len;
if (SCM_UNBNDP (port))
column = SCM_COL (port);
line = SCM_LINUM (port);
- c = get_codepoint (port, bytes, &len);
- if (c == EOF)
- result = SCM_EOF_VAL;
- else
- {
- long i;
+ err = get_codepoint (port, &c, bytes, &len);
+
+ for (i = len - 1; i >= 0; i--)
+ scm_unget_byte (bytes[i], port);
- result = SCM_MAKE_CHAR (c);
+ SCM_COL (port) = column;
+ SCM_LINUM (port) = line;
- for (i = len - 1; i >= 0; i--)
- scm_unget_byte (bytes[i], port);
+ if (SCM_UNLIKELY (err != 0))
+ {
+ scm_decoding_error (FUNC_NAME, err, "input decoding error", port);
- SCM_COL (port) = column;
- SCM_LINUM (port) = line;
+ /* Shouldn't happen since `catch' always aborts to prompt. */
+ result = SCM_BOOL_F;
}
+ else if (c == EOF)
+ result = SCM_EOF_VAL;
+ else
+ result = SCM_MAKE_CHAR (c);
return result;
}
{
scm_t_port *pt;
pt = SCM_PTAB_ENTRY (port);
- if (pt->encoding)
- return pt->encoding;
- else
- return NULL;
+ return pt->encoding;
}
}
/* Set the character encoding for this port. */
pt = SCM_PTAB_ENTRY (port);
- if (valid_enc == NULL)
- pt->encoding = NULL;
- else
- pt->encoding = scm_gc_strdup (valid_enc, "port");
if (valid_enc == NULL)
valid_enc = "ISO-8859-1";
+ pt->encoding = scm_gc_strdup (valid_enc, "port");
+
if (SCM_CELL_WORD_0 (port) & SCM_RDNG)
{
/* Open an input iconv conversion descriptor, from VALID_ENC
#define FUNC_NAME s_scm_set_port_encoding_x
{
char *enc_str;
- const char *valid_enc_str;
SCM_VALIDATE_PORT (1, port);
SCM_VALIDATE_STRING (2, enc);
enc_str = scm_to_locale_string (enc);
- valid_enc_str = find_valid_encoding (enc_str);
- if (valid_enc_str == NULL)
- {
- free (enc_str);
- scm_misc_error (FUNC_NAME, "invalid or unknown character encoding ~s",
- scm_list_1 (enc));
- }
- else
- {
- scm_i_set_port_encoding_x (port, valid_enc_str);
- free (enc_str);
- }
+ scm_i_set_port_encoding_x (port, enc_str);
+
return SCM_UNSPECIFIED;
}
#undef FUNC_NAME