return u8_result;
}
+/* From RFC 3629:
+
+ Char. number range | UTF-8 octet sequence
+ (hexadecimal) | (binary)
+ --------------------+---------------------------------------------
+ 0000 0000-0000 007F | 0xxxxxxx
+ 0000 0080-0000 07FF | 110xxxxx 10xxxxxx
+ 0000 0800-0000 FFFF | 1110xxxx 10xxxxxx 10xxxxxx
+ 0001 0000-0010 FFFF | 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
+*/
+
+static size_t
+u32_u8_strlen (const scm_t_uint32 *str, size_t len)
+{
+ size_t ret, i;
+
+ for (i = 0, ret = 0; i < len; i++)
+ {
+ scm_t_uint32 c = str[i];
+
+ if (c <= 0x7f)
+ ret += 1;
+ else if (c <= 0x7ff)
+ ret += 2;
+ else if (c <= 0xffff)
+ ret += 3;
+ else
+ ret += 4;
+ }
+
+ return ret;
+}
+
char *
scm_to_utf8_stringn (SCM str, size_t *lenp)
{
scm_i_string_length (str),
NULL, lenp);
else
- return (char *) u32_to_u8 ((scm_t_uint32*)scm_i_string_wide_chars (str),
- scm_i_string_length (str),
- NULL, lenp);
+ {
+ scm_t_uint8 *buf, *ret;
+ size_t len, allocated;
+
+ len = u32_u8_strlen ((scm_t_uint32*)scm_i_string_wide_chars (str),
+ scm_i_string_length (str));
+ allocated = len + 1;
+ buf = scm_malloc (allocated);
+
+ ret = u32_to_u8 ((scm_t_uint32*)scm_i_string_wide_chars (str),
+ scm_i_string_length (str), buf, &len);
+
+ if (ret == buf && len + 1 == allocated)
+ {
+ ret[len] = 0;
+ return (char *) ret;
+ }
+
+ /* An error: a bad codepoint. */
+ {
+ int saved_errno = errno;
+
+ free (buf);
+ if (!saved_errno)
+ abort ();
+
+ scm_decoding_error ("scm_to_utf8_stringn", errno,
+ "invalid codepoint in string", str);
+
+ /* Not reached. */
+ return NULL;
+ }
+ }
}
scm_t_wchar *