+#define STR_REF(s, x) \
+ (narrow_p \
+ ? (scm_t_wchar) ((unsigned char *) (s))[x] \
+ : ((scm_t_wchar *) (s))[x])
+
+/* Write STR to PORT as UTF-8. STR is a LEN-codepoint string; it is
+ narrow if NARROW_P is true, wide otherwise. Return LEN. */
+static size_t
+display_string_as_utf8 (const void *str, int narrow_p, size_t len,
+ SCM port)
+{
+ size_t printed = 0;
+
+ while (len > printed)
+ {
+ size_t utf8_len, i;
+ char *input, utf8_buf[256];
+
+ /* Convert STR to UTF-8. */
+ for (i = printed, utf8_len = 0, input = utf8_buf;
+ i < len && utf8_len + 4 < sizeof (utf8_buf);
+ i++)
+ {
+ utf8_len += codepoint_to_utf8 (STR_REF (str, i),
+ (scm_t_uint8 *) input);
+ input = utf8_buf + utf8_len;
+ }
+
+ /* INPUT was successfully converted, entirely; print the
+ result. */
+ scm_lfwrite (utf8_buf, utf8_len, port);
+ printed += i - printed;
+ }
+
+ assert (printed == len);
+
+ return len;
+}
+
+/* Convert STR through PORT's output conversion descriptor and write the
+ output to PORT. Return the number of codepoints written. */
+static size_t
+display_string_using_iconv (const void *str, int narrow_p, size_t len,
+ SCM port,
+ scm_t_string_failed_conversion_handler strategy)
+{
+ size_t printed;
+ scm_t_iconv_descriptors *id;
+
+ id = scm_i_port_iconv_descriptors (port);
+
+ printed = 0;
+
+ while (len > printed)
+ {
+ size_t done, utf8_len, input_left, output_left, i;
+ size_t codepoints_read, output_len;
+ char *input, *output;
+ char utf8_buf[256], encoded_output[256];
+ size_t offsets[256];
+
+ /* Convert STR to UTF-8. */
+ for (i = printed, utf8_len = 0, input = utf8_buf;
+ i < len && utf8_len + 4 < sizeof (utf8_buf);
+ i++)
+ {
+ offsets[utf8_len] = i;
+ utf8_len += codepoint_to_utf8 (STR_REF (str, i),
+ (scm_t_uint8 *) input);
+ input = utf8_buf + utf8_len;
+ }
+
+ input = utf8_buf;
+ input_left = utf8_len;
+
+ output = encoded_output;
+ output_left = sizeof (encoded_output);
+
+ done = iconv (id->output_cd, &input, &input_left,
+ &output, &output_left);
+
+ output_len = sizeof (encoded_output) - output_left;
+
+ if (SCM_UNLIKELY (done == (size_t) -1))
+ {
+ int errno_save = errno;
+
+ /* Reset the `iconv' state. */
+ iconv (id->output_cd, NULL, NULL, NULL, NULL);
+
+ /* Print the OUTPUT_LEN bytes successfully converted. */
+ scm_lfwrite (encoded_output, output_len, port);
+
+ /* See how many input codepoints these OUTPUT_LEN bytes
+ corresponds to. */
+ codepoints_read = offsets[input - utf8_buf] - printed;
+ printed += codepoints_read;
+
+ if (errno_save == EILSEQ &&
+ strategy != SCM_FAILED_CONVERSION_ERROR)
+ {
+ /* Conversion failed somewhere in INPUT and we want to
+ escape or substitute the offending input character. */
+
+ if (strategy == SCM_FAILED_CONVERSION_ESCAPE_SEQUENCE)
+ {
+ scm_t_wchar ch;
+
+ /* Find CH, the offending codepoint, and escape it. */
+ ch = STR_REF (str, offsets[input - utf8_buf]);
+ write_character_escaped (ch, 1, port);
+ }
+ else
+ /* STRATEGY is `SCM_FAILED_CONVERSION_QUESTION_MARK'. */
+ display_string ("?", 1, 1, port, strategy);
+
+ printed++;
+ }
+ else
+ /* Something bad happened that we can't handle: bail out. */
+ break;
+ }
+ else
+ {
+ /* INPUT was successfully converted, entirely; print the
+ result. */
+ scm_lfwrite (encoded_output, output_len, port);
+ codepoints_read = i - printed;
+ printed += codepoints_read;
+ }
+ }
+
+ return printed;
+}
+
+#undef STR_REF
+
+/* Display the LEN codepoints in STR to PORT according to STRATEGY;
+ return the number of codepoints successfully displayed. If NARROW_P,
+ then STR is interpreted as a sequence of `char', denoting a Latin-1
+ string; otherwise it's interpreted as a sequence of
+ `scm_t_wchar'. */
+static size_t
+display_string (const void *str, int narrow_p,
+ size_t len, SCM port,
+ scm_t_string_failed_conversion_handler strategy)
+
+{
+ scm_t_port_internal *pti;
+
+ pti = SCM_PORT_GET_INTERNAL (port);
+
+ if (pti->encoding_mode == SCM_PORT_ENCODING_MODE_UTF8)
+ return display_string_as_utf8 (str, narrow_p, len, port);
+ else
+ return display_string_using_iconv (str, narrow_p, len,
+ port, strategy);
+}
+
+/* Attempt to display CH to PORT according to STRATEGY. Return non-zero
+ if CH was successfully displayed, zero otherwise (e.g., if it was not
+ representable in PORT's encoding.) */
+static int
+display_character (scm_t_wchar ch, SCM port,
+ scm_t_string_failed_conversion_handler strategy)
+{
+ return display_string (&ch, 0, 1, port, strategy) == 1;
+}
+
+/* Attempt to pretty-print CH, a combining character, to PORT. Return
+ zero upon failure, non-zero otherwise. The idea is to print CH above
+ a dotted circle to make it more visible. */
+static int
+write_combining_character (scm_t_wchar ch, SCM port)
+{
+ scm_t_wchar str[2];
+
+ str[0] = SCM_CODEPOINT_DOTTED_CIRCLE;
+ str[1] = ch;
+
+ return display_string (str, 0, 2, port, iconveh_error) == 2;
+}
+
+/* Write CH to PORT in its escaped form, using the string escape syntax
+ if STRING_ESCAPES_P is non-zero. */
+static void
+write_character_escaped (scm_t_wchar ch, int string_escapes_p, SCM port)
+{
+ if (string_escapes_p)
+ {
+ /* Represent CH using the in-string escape syntax. */
+
+ static const char hex[] = "0123456789abcdef";
+ static const char escapes[7] = "abtnvfr";
+ char buf[9];
+
+ if (ch >= 0x07 && ch <= 0x0D && ch != 0x0A)
+ {
+ /* Use special escapes for some C0 controls. */
+ buf[0] = '\\';
+ buf[1] = escapes[ch - 0x07];
+ scm_lfwrite (buf, 2, port);
+ }
+ else if (!SCM_R6RS_ESCAPES_P)
+ {
+ if (ch <= 0xFF)
+ {
+ buf[0] = '\\';
+ buf[1] = 'x';
+ buf[2] = hex[ch / 16];
+ buf[3] = hex[ch % 16];
+ scm_lfwrite (buf, 4, port);
+ }
+ else if (ch <= 0xFFFF)
+ {
+ buf[0] = '\\';
+ buf[1] = 'u';
+ buf[2] = hex[(ch & 0xF000) >> 12];
+ buf[3] = hex[(ch & 0xF00) >> 8];
+ buf[4] = hex[(ch & 0xF0) >> 4];
+ buf[5] = hex[(ch & 0xF)];
+ scm_lfwrite (buf, 6, port);
+ }
+ else if (ch > 0xFFFF)
+ {
+ buf[0] = '\\';
+ buf[1] = 'U';
+ buf[2] = hex[(ch & 0xF00000) >> 20];
+ buf[3] = hex[(ch & 0xF0000) >> 16];
+ buf[4] = hex[(ch & 0xF000) >> 12];
+ buf[5] = hex[(ch & 0xF00) >> 8];
+ buf[6] = hex[(ch & 0xF0) >> 4];
+ buf[7] = hex[(ch & 0xF)];
+ scm_lfwrite (buf, 8, port);
+ }
+ }
+ else
+ {
+ /* Print an R6RS variable-length hex escape: "\xNNNN;". */
+ scm_t_wchar ch2 = ch;
+
+ int i = 8;
+ buf[i] = ';';
+ i --;
+ if (ch == 0)
+ buf[i--] = '0';
+ else
+ while (ch2 > 0)
+ {
+ buf[i] = hex[ch2 & 0xF];
+ ch2 >>= 4;
+ i --;
+ }
+ buf[i] = 'x';
+ i --;
+ buf[i] = '\\';
+ scm_lfwrite (buf + i, 9 - i, port);
+ }
+ }
+ else
+ {
+ /* Represent CH using the character escape syntax. */
+ const char *name;
+
+ name = scm_i_charname (SCM_MAKE_CHAR (ch));
+ if (name != NULL)
+ scm_puts (name, port);
+ else
+ PRINT_CHAR_ESCAPE (ch, port);
+ }
+}
+
+/* Write CH to PORT, escaping it if it's non-graphic or not
+ representable in PORT's encoding. If STRING_ESCAPES_P is true and CH
+ needs to be escaped, it is escaped using the in-string escape syntax;
+ otherwise the character escape syntax is used. */
+static void
+write_character (scm_t_wchar ch, SCM port, int string_escapes_p)
+{
+ int printed = 0;
+ scm_t_string_failed_conversion_handler strategy;
+
+ strategy = PORT_CONVERSION_HANDLER (port);
+
+ if (string_escapes_p)
+ {
+ /* Check if CH deserves special treatment. */
+ if (ch == '"' || ch == '\\')
+ {
+ display_character ('\\', port, iconveh_question_mark);
+ display_character (ch, port, strategy);
+ printed = 1;
+ }
+ else if (ch == '\n' && SCM_PRINT_ESCAPE_NEWLINES_P)
+ {
+ display_character ('\\', port, iconveh_question_mark);
+ display_character ('n', port, strategy);
+ printed = 1;
+ }
+ else if (ch == ' ' || ch == '\n')
+ {
+ display_character (ch, port, strategy);
+ printed = 1;
+ }
+ }
+ else
+ {
+ display_string ("#\\", 1, 2, port, iconveh_question_mark);
+
+ if (uc_combining_class (ch) != UC_CCC_NR)
+ /* Character is a combining character, so attempt to
+ pretty-print it. */
+ printed = write_combining_character (ch, port);
+ }
+
+ if (!printed
+ && uc_is_general_category_withtable (ch,
+ UC_CATEGORY_MASK_L |
+ UC_CATEGORY_MASK_M |
+ UC_CATEGORY_MASK_N |
+ UC_CATEGORY_MASK_P |
+ UC_CATEGORY_MASK_S))
+ /* CH is graphic; attempt to display it. */
+ printed = display_character (ch, port, iconveh_error);
+
+ if (!printed)
+ /* CH isn't graphic or cannot be represented in PORT's encoding. */
+ write_character_escaped (ch, string_escapes_p, port);
+}
+
+/* Display STR to PORT from START inclusive to END exclusive. */
+void
+scm_i_display_substring (SCM str, size_t start, size_t end, SCM port)
+{
+ int narrow_p;
+ const char *buf;
+ size_t len, printed;
+
+ buf = scm_i_string_data (str);
+ len = end - start;
+ narrow_p = scm_i_is_narrow_string (str);
+ buf += start * (narrow_p ? sizeof (char) : sizeof (scm_t_wchar));
+
+ printed = display_string (buf, narrow_p, end - start, port,
+ PORT_CONVERSION_HANDLER (port));
+
+ if (SCM_UNLIKELY (printed < len))
+ scm_encoding_error (__func__, errno,
+ "cannot convert to output locale",
+ port, scm_c_string_ref (str, printed + start));
+}
+
+\f