-/* Copyright (C) 1995-1999,2000,2001, 2002, 2003, 2004, 2006, 2008, 2009, 2010 Free Software Foundation, Inc.
- *
+/* Copyright (C) 1995-1999, 2000, 2001, 2002, 2003, 2004, 2006, 2008,
+ * 2009, 2010, 2011, 2012, 2013 Free Software Foundation, Inc.
+ *
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public License
* as published by the Free Software Foundation; either version 3 of
#endif
#include <errno.h>
+#include <iconv.h>
+#include <stdio.h>
+#include <assert.h>
+
#include <uniconv.h>
#include <unictype.h>
#include "libguile/alist.h"
#include "libguile/struct.h"
#include "libguile/ports.h"
+#include "libguile/ports-internal.h"
#include "libguile/root.h"
#include "libguile/strings.h"
#include "libguile/strports.h"
/* Character printers. */
+#define PORT_CONVERSION_HANDLER(port) \
+ SCM_PTAB_ENTRY (port)->ilseq_handler
+
+static size_t display_string (const void *, int, size_t, SCM,
+ scm_t_string_failed_conversion_handler);
+
static int display_character (scm_t_wchar, SCM,
scm_t_string_failed_conversion_handler);
+
static void write_character (scm_t_wchar, SCM, int);
+static void write_character_escaped (scm_t_wchar, int, SCM);
+
\f
/* {Names of immediate symbols}
SCM_SYMBOL (sym_reader, "reader");
scm_t_option scm_print_opts[] = {
- { SCM_OPTION_SCM, "closure-hook", (unsigned long) SCM_BOOL_F,
- "Hook for printing closures (should handle macros as well)." },
- { SCM_OPTION_BOOLEAN, "source", 0,
- "Print closures with source." },
- { SCM_OPTION_SCM, "highlight-prefix", (unsigned long)SCM_BOOL_F,
+ { SCM_OPTION_SCM, "highlight-prefix", (scm_t_bits)SCM_BOOL_F_BITS,
"The string to print before highlighted values." },
- { SCM_OPTION_SCM, "highlight-suffix", (unsigned long)SCM_BOOL_F,
+ { SCM_OPTION_SCM, "highlight-suffix", (scm_t_bits)SCM_BOOL_F_BITS,
"The string to print after highlighted values." },
- { SCM_OPTION_SCM, "quote-keywordish-symbols", (unsigned long)SCM_BOOL_F,
+ { SCM_OPTION_SCM, "quote-keywordish-symbols", (scm_t_bits)SCM_BOOL_F_BITS,
"How to print symbols that have a colon as their first or last character. "
"The value '#f' does not quote the colons; '#t' quotes them; "
- "'reader' quotes them when the reader option 'keywords' is not '#f'."
- },
+ "'reader' quotes them when the reader option 'keywords' is not '#f'." },
+ { SCM_OPTION_BOOLEAN, "escape-newlines", 1,
+ "Render newlines as \\n when printing using `write'." },
{ 0 },
};
/* Print the name of a symbol. */
static int
-quote_keywordish_symbol (SCM symbol)
+quote_keywordish_symbols (void)
{
- SCM option;
+ SCM option = SCM_PRINT_KEYWORD_STYLE;
- if (scm_i_symbol_ref (symbol, 0) != ':'
- && scm_i_symbol_ref (symbol, scm_i_symbol_length (symbol) - 1) != ':')
- return 0;
-
- option = SCM_PRINT_KEYWORD_STYLE;
if (scm_is_false (option))
return 0;
if (scm_is_eq (option, sym_reader))
return 1;
}
-void
-scm_i_print_symbol_name (SCM str, SCM port)
+#define INITIAL_IDENTIFIER_MASK \
+ (UC_CATEGORY_MASK_Lu | UC_CATEGORY_MASK_Ll | UC_CATEGORY_MASK_Lt \
+ | UC_CATEGORY_MASK_Lm | UC_CATEGORY_MASK_Lo | UC_CATEGORY_MASK_Mn \
+ | UC_CATEGORY_MASK_Nl | UC_CATEGORY_MASK_No | UC_CATEGORY_MASK_Pd \
+ | UC_CATEGORY_MASK_Pc | UC_CATEGORY_MASK_Po | UC_CATEGORY_MASK_Sc \
+ | UC_CATEGORY_MASK_Sm | UC_CATEGORY_MASK_Sk | UC_CATEGORY_MASK_So \
+ | UC_CATEGORY_MASK_Co)
+
+#define SUBSEQUENT_IDENTIFIER_MASK \
+ (INITIAL_IDENTIFIER_MASK \
+ | UC_CATEGORY_MASK_Nd | UC_CATEGORY_MASK_Mc | UC_CATEGORY_MASK_Me)
+
+static int
+symbol_has_extended_read_syntax (SCM sym)
{
- /* This points to the first character that has not yet been written to the
- * port. */
- size_t pos = 0;
- /* This points to the character we're currently looking at. */
- size_t end;
- /* If the name contains weird characters, we'll escape them with
- * backslashes and set this flag; it indicates that we should surround the
- * name with "#{" and "}#". */
- int weird = 0;
- /* Backslashes are not sufficient to make a name weird, but if a name is
- * weird because of other characters, backslahes need to be escaped too.
- * The first time we see a backslash, we set maybe_weird, and mw_pos points
- * to the backslash. Then if the name turns out to be weird, we re-process
- * everything starting from mw_pos.
- * We could instead make backslashes always weird. This is not necessary
- * to ensure that the output is (read)-able, but it would make this code
- * simpler and faster. */
- int maybe_weird = 0;
- size_t mw_pos = 0;
- size_t len = scm_i_symbol_length (str);
- scm_t_wchar str0 = scm_i_symbol_ref (str, 0);
-
- if (len == 0 || str0 == '\'' || str0 == '`' || str0 == ','
- || quote_keywordish_symbol (str)
- || (str0 == '.' && len == 1)
- || scm_is_true (scm_i_string_to_number (scm_symbol_to_string (str), 10)))
+ size_t pos, len = scm_i_symbol_length (sym);
+ scm_t_wchar c;
+
+ /* The empty symbol. */
+ if (len == 0)
+ return 1;
+
+ c = scm_i_symbol_ref (sym, 0);
+
+ /* Single dot; conflicts with dotted-pair notation. */
+ if (len == 1 && c == '.')
+ return 1;
+
+ /* Other initial-character constraints. */
+ if (c == '\'' || c == '`' || c == ',' || c == '"' || c == ';' || c == '#')
+ return 1;
+
+ /* Keywords can be identified by trailing colons too. */
+ if (c == ':' || scm_i_symbol_ref (sym, len - 1) == ':')
+ return quote_keywordish_symbols ();
+
+ /* Number-ish symbols. */
+ if (scm_is_true (scm_i_string_to_number (scm_symbol_to_string (sym), 10)))
+ return 1;
+
+ /* Other disallowed first characters. */
+ if (!uc_is_general_category_withtable (c, INITIAL_IDENTIFIER_MASK))
+ return 1;
+
+ /* Otherwise, any character that's in the identifier category mask is
+ fine to pass through as-is, provided it's not one of the ASCII
+ delimiters like `;'. */
+ for (pos = 1; pos < len; pos++)
{
- scm_lfwrite ("#{", 2, port);
- weird = 1;
+ c = scm_i_symbol_ref (sym, pos);
+ if (!uc_is_general_category_withtable (c, SUBSEQUENT_IDENTIFIER_MASK))
+ return 1;
+ else if (c == '"' || c == ';' || c == '#')
+ return 1;
}
- for (end = pos; end < len; ++end)
- switch (scm_i_symbol_ref (str, end))
- {
-#ifdef BRACKETS_AS_PARENS
- case '[':
- case ']':
-#endif
- case '(':
- case ')':
- case '"':
- case ';':
- case '#':
- case SCM_WHITE_SPACES:
- case SCM_LINE_INCREMENTORS:
- weird_handler:
- if (maybe_weird)
- {
- end = mw_pos;
- maybe_weird = 0;
- }
- if (!weird)
- {
- scm_lfwrite ("#{", 2, port);
- weird = 1;
- }
- if (pos < end)
- scm_lfwrite_substr (scm_symbol_to_string (str), pos, end, port);
- {
- char buf[2];
- buf[0] = '\\';
- buf[1] = (char) (unsigned char) scm_i_symbol_ref (str, end);
- scm_lfwrite (buf, 2, port);
- }
- pos = end + 1;
- break;
- case '\\':
- if (weird)
- goto weird_handler;
- if (!maybe_weird)
- {
- maybe_weird = 1;
- mw_pos = pos;
- }
- break;
- default:
- break;
- }
- if (pos < end)
- scm_lfwrite_substr (scm_symbol_to_string (str), pos, end, port);
- if (weird)
- scm_lfwrite ("}#", 2, port);
+ return 0;
+}
+
+static void
+print_normal_symbol (SCM sym, SCM port)
+{
+ scm_display (scm_symbol_to_string (sym), port);
+}
+
+static void
+print_extended_symbol (SCM sym, SCM port)
+{
+ size_t pos, len;
+ scm_t_string_failed_conversion_handler strategy;
+
+ len = scm_i_symbol_length (sym);
+ strategy = PORT_CONVERSION_HANDLER (port);
+
+ scm_lfwrite ("#{", 2, port);
+
+ for (pos = 0; pos < len; pos++)
+ {
+ scm_t_wchar c = scm_i_symbol_ref (sym, pos);
+
+ if (uc_is_general_category_withtable (c,
+ SUBSEQUENT_IDENTIFIER_MASK
+ | UC_CATEGORY_MASK_Zs))
+ {
+ if (!display_character (c, port, strategy))
+ scm_encoding_error ("print_extended_symbol", errno,
+ "cannot convert to output locale",
+ port, SCM_MAKE_CHAR (c));
+ }
+ else
+ {
+ display_string ("\\x", 1, 2, port, iconveh_question_mark);
+ scm_intprint (c, 16, port);
+ display_character (';', port, iconveh_question_mark);
+ }
+ }
+
+ scm_lfwrite ("}#", 2, port);
+}
+
+/* FIXME: allow R6RS hex escapes instead of #{...}#. */
+void
+scm_i_print_symbol_name (SCM sym, SCM port)
+{
+ if (symbol_has_extended_read_syntax (sym))
+ print_extended_symbol (sym, port);
+ else
+ print_normal_symbol (sym, port);
}
void
else
{
if (!display_character (SCM_CHAR (exp), port,
- scm_i_get_conversion_strategy (port)))
+ PORT_CONVERSION_HANDLER (port)))
scm_encoding_error (__func__, errno,
"cannot convert to output locale",
- "UTF-32", scm_i_get_port_encoding (port),
- scm_string (scm_list_1 (exp)));
+ port, exp);
}
}
else if (SCM_IFLAGP (exp)
if (SCM_OBJ_CLASS_FLAGS (exp) & SCM_CLASSF_GOOPS)
{
SCM pwps, print = pstate->writingp ? g_write : g_display;
- if (!print)
+ if (SCM_UNPACK (print) == 0)
goto print_struct;
pwps = scm_i_port_with_print_state (port, pstate->handle);
pstate->revealed = 1;
{
size_t len, i;
- scm_putc ('"', port);
+ display_character ('"', port, iconveh_question_mark);
len = scm_i_string_length (exp);
for (i = 0; i < len; ++i)
write_character (scm_i_string_ref (exp, i), port, 1);
- scm_putc ('"', port);
+ display_character ('"', port, iconveh_question_mark);
scm_remember_upto_here_1 (exp);
}
else
- scm_lfwrite_str (exp, port);
+ {
+ size_t len, printed;
+
+ len = scm_i_string_length (exp);
+ printed = display_string (scm_i_string_data (exp),
+ scm_i_is_narrow_string (exp),
+ len, port,
+ PORT_CONVERSION_HANDLER (port));
+ if (SCM_UNLIKELY (printed < len))
+ scm_encoding_error (__func__, errno,
+ "cannot convert to output locale",
+ port, scm_c_string_ref (exp, printed));
+ }
+
scm_remember_upto_here_1 (exp);
break;
case scm_tc7_symbol:
case scm_tc7_with_fluids:
scm_i_with_fluids_print (exp, port, pstate);
break;
+ case scm_tc7_array:
+ ENTER_NESTED_DATA (pstate, exp, circref);
+ scm_i_print_array (exp, port, pstate);
+ EXIT_NESTED_DATA (pstate);
+ break;
+ case scm_tc7_bytevector:
+ scm_i_print_bytevector (exp, port, pstate);
+ break;
+ case scm_tc7_bitvector:
+ scm_i_print_bitvector (exp, port, pstate);
+ break;
case scm_tc7_wvect:
ENTER_NESTED_DATA (pstate, exp, circref);
if (SCM_IS_WHVEC (exp))
else
scm_puts ("#w(", port);
goto common_vector_printer;
-
- case scm_tc7_bytevector:
- scm_i_print_bytevector (exp, port, pstate);
- break;
case scm_tc7_vector:
ENTER_NESTED_DATA (pstate, exp, circref);
scm_puts ("#(", port);
}
}
-/* Attempt to display CH to PORT according to STRATEGY. Return non-zero
- if CH was successfully displayed, zero otherwise (e.g., if it was not
- representable in PORT's encoding.) */
-static int
-display_character (scm_t_wchar ch, SCM port,
- scm_t_string_failed_conversion_handler strategy)
+/* Convert codepoint CH to UTF-8 and store the result in UTF8. Return
+ the number of bytes of the UTF-8-encoded string. */
+static size_t
+codepoint_to_utf8 (scm_t_wchar ch, scm_t_uint8 utf8[4])
+{
+ size_t len;
+ scm_t_uint32 codepoint;
+
+ codepoint = (scm_t_uint32) ch;
+
+ if (codepoint <= 0x7f)
+ {
+ len = 1;
+ utf8[0] = (scm_t_uint8) codepoint;
+ }
+ else if (codepoint <= 0x7ffUL)
+ {
+ len = 2;
+ utf8[0] = 0xc0 | (codepoint >> 6);
+ utf8[1] = 0x80 | (codepoint & 0x3f);
+ }
+ else if (codepoint <= 0xffffUL)
+ {
+ len = 3;
+ utf8[0] = 0xe0 | (codepoint >> 12);
+ utf8[1] = 0x80 | ((codepoint >> 6) & 0x3f);
+ utf8[2] = 0x80 | (codepoint & 0x3f);
+ }
+ else
+ {
+ len = 4;
+ utf8[0] = 0xf0 | (codepoint >> 18);
+ utf8[1] = 0x80 | ((codepoint >> 12) & 0x3f);
+ utf8[2] = 0x80 | ((codepoint >> 6) & 0x3f);
+ utf8[3] = 0x80 | (codepoint & 0x3f);
+ }
+
+ return len;
+}
+
+#define STR_REF(s, x) \
+ (narrow_p \
+ ? (scm_t_wchar) ((unsigned char *) (s))[x] \
+ : ((scm_t_wchar *) (s))[x])
+
+/* Write STR to PORT as UTF-8. STR is a LEN-codepoint string; it is
+ narrow if NARROW_P is true, wide otherwise. Return LEN. */
+static size_t
+display_string_as_utf8 (const void *str, int narrow_p, size_t len,
+ SCM port)
{
- int printed;
- const char *encoding;
+ size_t printed = 0;
- encoding = scm_i_get_port_encoding (port);
- if (encoding == NULL)
+ while (len > printed)
{
- if (ch <= 0xff)
+ size_t utf8_len, i;
+ char *input, utf8_buf[256];
+
+ /* Convert STR to UTF-8. */
+ for (i = printed, utf8_len = 0, input = utf8_buf;
+ i < len && utf8_len + 4 < sizeof (utf8_buf);
+ i++)
{
- scm_putc (ch, port);
- printed = 1;
+ utf8_len += codepoint_to_utf8 (STR_REF (str, i),
+ (scm_t_uint8 *) input);
+ input = utf8_buf + utf8_len;
}
- else
- printed = 0;
+
+ /* INPUT was successfully converted, entirely; print the
+ result. */
+ scm_lfwrite (utf8_buf, utf8_len, port);
+ printed += i - printed;
}
- else
+
+ assert (printed == len);
+
+ return len;
+}
+
+/* Convert STR through PORT's output conversion descriptor and write the
+ output to PORT. Return the number of codepoints written. */
+static size_t
+display_string_using_iconv (const void *str, int narrow_p, size_t len,
+ SCM port,
+ scm_t_string_failed_conversion_handler strategy)
+{
+ size_t printed;
+ scm_t_iconv_descriptors *id;
+
+ id = scm_i_port_iconv_descriptors (port);
+
+ printed = 0;
+
+ while (len > printed)
{
- size_t len;
- char locale_encoded[sizeof (ch)], *result;
-
- len = sizeof (locale_encoded);
- result = u32_conv_to_encoding (encoding, strategy,
- (scm_t_uint32 *) &ch, 1,
- NULL, locale_encoded, &len);
- if (result != NULL)
+ size_t done, utf8_len, input_left, output_left, i;
+ size_t codepoints_read, output_len;
+ char *input, *output;
+ char utf8_buf[256], encoded_output[256];
+ size_t offsets[256];
+
+ /* Convert STR to UTF-8. */
+ for (i = printed, utf8_len = 0, input = utf8_buf;
+ i < len && utf8_len + 4 < sizeof (utf8_buf);
+ i++)
{
- /* CH is graphic; print it. */
+ offsets[utf8_len] = i;
+ utf8_len += codepoint_to_utf8 (STR_REF (str, i),
+ (scm_t_uint8 *) input);
+ input = utf8_buf + utf8_len;
+ }
+
+ input = utf8_buf;
+ input_left = utf8_len;
- if (strategy == SCM_FAILED_CONVERSION_ESCAPE_SEQUENCE)
+ output = encoded_output;
+ output_left = sizeof (encoded_output);
+
+ done = iconv (id->output_cd, &input, &input_left,
+ &output, &output_left);
+
+ output_len = sizeof (encoded_output) - output_left;
+
+ if (SCM_UNLIKELY (done == (size_t) -1))
+ {
+ int errno_save = errno;
+
+ /* Reset the `iconv' state. */
+ iconv (id->output_cd, NULL, NULL, NULL, NULL);
+
+ /* Print the OUTPUT_LEN bytes successfully converted. */
+ scm_lfwrite (encoded_output, output_len, port);
+
+ /* See how many input codepoints these OUTPUT_LEN bytes
+ corresponds to. */
+ codepoints_read = offsets[input - utf8_buf] - printed;
+ printed += codepoints_read;
+
+ if (errno_save == EILSEQ &&
+ strategy != SCM_FAILED_CONVERSION_ERROR)
{
- /* Apply the same escaping syntax as in `write_character'. */
- if (SCM_R6RS_ESCAPES_P)
- scm_i_unistring_escapes_to_r6rs_escapes (result, &len);
+ /* Conversion failed somewhere in INPUT and we want to
+ escape or substitute the offending input character. */
+
+ if (strategy == SCM_FAILED_CONVERSION_ESCAPE_SEQUENCE)
+ {
+ scm_t_wchar ch;
+
+ /* Find CH, the offending codepoint, and escape it. */
+ ch = STR_REF (str, offsets[input - utf8_buf]);
+ write_character_escaped (ch, 1, port);
+ }
else
- scm_i_unistring_escapes_to_guile_escapes (result, &len);
+ /* STRATEGY is `SCM_FAILED_CONVERSION_QUESTION_MARK'. */
+ display_string ("?", 1, 1, port, strategy);
+
+ printed++;
}
+ else
+ /* Something bad happened that we can't handle: bail out. */
+ break;
+ }
+ else
+ {
+ /* INPUT was successfully converted, entirely; print the
+ result. */
+ scm_lfwrite (encoded_output, output_len, port);
+ codepoints_read = i - printed;
+ printed += codepoints_read;
+ }
+ }
- scm_lfwrite (result, len, port);
- printed = 1;
+ return printed;
+}
+
+#undef STR_REF
+
+/* Display the LEN codepoints in STR to PORT according to STRATEGY;
+ return the number of codepoints successfully displayed. If NARROW_P,
+ then STR is interpreted as a sequence of `char', denoting a Latin-1
+ string; otherwise it's interpreted as a sequence of
+ `scm_t_wchar'. */
+static size_t
+display_string (const void *str, int narrow_p,
+ size_t len, SCM port,
+ scm_t_string_failed_conversion_handler strategy)
+
+{
+ scm_t_port_internal *pti;
+
+ pti = SCM_PORT_GET_INTERNAL (port);
- if (SCM_UNLIKELY (result != locale_encoded))
- free (result);
+ if (pti->encoding_mode == SCM_PORT_ENCODING_MODE_UTF8)
+ return display_string_as_utf8 (str, narrow_p, len, port);
+ else
+ return display_string_using_iconv (str, narrow_p, len,
+ port, strategy);
+}
+
+/* Attempt to display CH to PORT according to STRATEGY. Return non-zero
+ if CH was successfully displayed, zero otherwise (e.g., if it was not
+ representable in PORT's encoding.) */
+static int
+display_character (scm_t_wchar ch, SCM port,
+ scm_t_string_failed_conversion_handler strategy)
+{
+ return display_string (&ch, 0, 1, port, strategy) == 1;
+}
+
+/* Attempt to pretty-print CH, a combining character, to PORT. Return
+ zero upon failure, non-zero otherwise. The idea is to print CH above
+ a dotted circle to make it more visible. */
+static int
+write_combining_character (scm_t_wchar ch, SCM port)
+{
+ scm_t_wchar str[2];
+
+ str[0] = SCM_CODEPOINT_DOTTED_CIRCLE;
+ str[1] = ch;
+
+ return display_string (str, 0, 2, port, iconveh_error) == 2;
+}
+
+/* Write CH to PORT in its escaped form, using the string escape syntax
+ if STRING_ESCAPES_P is non-zero. */
+static void
+write_character_escaped (scm_t_wchar ch, int string_escapes_p, SCM port)
+{
+ if (string_escapes_p)
+ {
+ /* Represent CH using the in-string escape syntax. */
+
+ static const char hex[] = "0123456789abcdef";
+ static const char escapes[7] = "abtnvfr";
+ char buf[9];
+
+ if (ch >= 0x07 && ch <= 0x0D && ch != 0x0A)
+ {
+ /* Use special escapes for some C0 controls. */
+ buf[0] = '\\';
+ buf[1] = escapes[ch - 0x07];
+ scm_lfwrite (buf, 2, port);
+ }
+ else if (!SCM_R6RS_ESCAPES_P)
+ {
+ if (ch <= 0xFF)
+ {
+ buf[0] = '\\';
+ buf[1] = 'x';
+ buf[2] = hex[ch / 16];
+ buf[3] = hex[ch % 16];
+ scm_lfwrite (buf, 4, port);
+ }
+ else if (ch <= 0xFFFF)
+ {
+ buf[0] = '\\';
+ buf[1] = 'u';
+ buf[2] = hex[(ch & 0xF000) >> 12];
+ buf[3] = hex[(ch & 0xF00) >> 8];
+ buf[4] = hex[(ch & 0xF0) >> 4];
+ buf[5] = hex[(ch & 0xF)];
+ scm_lfwrite (buf, 6, port);
+ }
+ else if (ch > 0xFFFF)
+ {
+ buf[0] = '\\';
+ buf[1] = 'U';
+ buf[2] = hex[(ch & 0xF00000) >> 20];
+ buf[3] = hex[(ch & 0xF0000) >> 16];
+ buf[4] = hex[(ch & 0xF000) >> 12];
+ buf[5] = hex[(ch & 0xF00) >> 8];
+ buf[6] = hex[(ch & 0xF0) >> 4];
+ buf[7] = hex[(ch & 0xF)];
+ scm_lfwrite (buf, 8, port);
+ }
}
else
- printed = 0;
+ {
+ /* Print an R6RS variable-length hex escape: "\xNNNN;". */
+ scm_t_wchar ch2 = ch;
+
+ int i = 8;
+ buf[i] = ';';
+ i --;
+ if (ch == 0)
+ buf[i--] = '0';
+ else
+ while (ch2 > 0)
+ {
+ buf[i] = hex[ch2 & 0xF];
+ ch2 >>= 4;
+ i --;
+ }
+ buf[i] = 'x';
+ i --;
+ buf[i] = '\\';
+ scm_lfwrite (buf + i, 9 - i, port);
+ }
}
+ else
+ {
+ /* Represent CH using the character escape syntax. */
+ const char *name;
- return printed;
+ name = scm_i_charname (SCM_MAKE_CHAR (ch));
+ if (name != NULL)
+ scm_puts (name, port);
+ else
+ PRINT_CHAR_ESCAPE (ch, port);
+ }
}
/* Write CH to PORT, escaping it if it's non-graphic or not
write_character (scm_t_wchar ch, SCM port, int string_escapes_p)
{
int printed = 0;
+ scm_t_string_failed_conversion_handler strategy;
+
+ strategy = PORT_CONVERSION_HANDLER (port);
if (string_escapes_p)
{
/* Check if CH deserves special treatment. */
if (ch == '"' || ch == '\\')
{
- scm_putc ('\\', port);
- scm_putc (ch, port);
+ display_character ('\\', port, iconveh_question_mark);
+ display_character (ch, port, strategy);
printed = 1;
}
+ else if (ch == '\n' && SCM_PRINT_ESCAPE_NEWLINES_P)
+ {
+ display_character ('\\', port, iconveh_question_mark);
+ display_character ('n', port, strategy);
+ printed = 1;
+ }
else if (ch == ' ' || ch == '\n')
{
- scm_putc (ch, port);
+ display_character (ch, port, strategy);
printed = 1;
}
}
else
- scm_puts ("#\\", port);
+ {
+ display_string ("#\\", 1, 2, port, iconveh_question_mark);
+
+ if (uc_combining_class (ch) != UC_CCC_NR)
+ /* Character is a combining character, so attempt to
+ pretty-print it. */
+ printed = write_combining_character (ch, port);
+ }
if (!printed
&& uc_is_general_category_withtable (ch,
printed = display_character (ch, port, iconveh_error);
if (!printed)
- {
- /* CH isn't graphic or cannot be represented in PORT's
- encoding. */
+ /* CH isn't graphic or cannot be represented in PORT's encoding. */
+ write_character_escaped (ch, string_escapes_p, port);
+}
- if (string_escapes_p)
- {
- /* Represent CH using the in-string escape syntax. */
+/* Display STR to PORT from START inclusive to END exclusive. */
+void
+scm_i_display_substring (SCM str, size_t start, size_t end, SCM port)
+{
+ int narrow_p;
+ const char *buf;
+ size_t len, printed;
- static const char hex[] = "0123456789abcdef";
- char buf[9];
+ buf = scm_i_string_data (str);
+ len = end - start;
+ narrow_p = scm_i_is_narrow_string (str);
+ buf += start * (narrow_p ? sizeof (char) : sizeof (scm_t_wchar));
- if (!SCM_R6RS_ESCAPES_P)
- {
- if (ch <= 0xFF)
- {
- buf[0] = '\\';
- buf[1] = 'x';
- buf[2] = hex[ch / 16];
- buf[3] = hex[ch % 16];
- scm_lfwrite (buf, 4, port);
- }
- else if (ch <= 0xFFFF)
- {
- buf[0] = '\\';
- buf[1] = 'u';
- buf[2] = hex[(ch & 0xF000) >> 12];
- buf[3] = hex[(ch & 0xF00) >> 8];
- buf[4] = hex[(ch & 0xF0) >> 4];
- buf[5] = hex[(ch & 0xF)];
- scm_lfwrite (buf, 6, port);
- }
- else if (ch > 0xFFFF)
- {
- buf[0] = '\\';
- buf[1] = 'U';
- buf[2] = hex[(ch & 0xF00000) >> 20];
- buf[3] = hex[(ch & 0xF0000) >> 16];
- buf[4] = hex[(ch & 0xF000) >> 12];
- buf[5] = hex[(ch & 0xF00) >> 8];
- buf[6] = hex[(ch & 0xF0) >> 4];
- buf[7] = hex[(ch & 0xF)];
- scm_lfwrite (buf, 8, port);
- }
- }
- else
- {
- /* Print an R6RS variable-length hex escape: "\xNNNN;". */
- scm_t_wchar ch2 = ch;
-
- int i = 8;
- buf[i] = ';';
- i --;
- if (ch == 0)
- buf[i--] = '0';
- else
- while (ch2 > 0)
- {
- buf[i] = hex[ch2 & 0xF];
- ch2 >>= 4;
- i --;
- }
- buf[i] = 'x';
- i --;
- buf[i] = '\\';
- scm_lfwrite (buf + i, 9 - i, port);
- }
- }
- else
- {
- /* Represent CH using the character escape syntax. */
- const char *name;
+ printed = display_string (buf, narrow_p, end - start, port,
+ PORT_CONVERSION_HANDLER (port));
- name = scm_i_charname (SCM_MAKE_CHAR (ch));
- if (name != NULL)
- scm_puts (name, port);
- else
- PRINT_CHAR_ESCAPE (ch, port);
- }
- }
+ if (SCM_UNLIKELY (printed < len))
+ scm_encoding_error (__func__, errno,
+ "cannot convert to output locale",
+ port, scm_c_string_ref (str, printed + start));
}
+\f
/* Print an integer.
*/
"@var{message} can contain @code{~A} (was @code{%s}) and\n"
"@code{~S} (was @code{%S}) escapes. When printed,\n"
"the escapes are replaced with corresponding members of\n"
- "@var{ARGS}:\n"
+ "@var{args}:\n"
"@code{~A} formats using @code{display} and @code{~S} formats\n"
"using @code{write}.\n"
"If @var{destination} is @code{#t}, then use the current output\n"
else if (scm_is_false (destination))
{
fReturnString = 1;
- port = scm_mkstrport (SCM_INUM0,
- scm_make_string (SCM_INUM0, SCM_UNDEFINED),
+ port = scm_mkstrport (SCM_INUM0, SCM_BOOL_F,
SCM_OPN | SCM_WRTNG,
FUNC_NAME);
destination = port;
port = SCM_COERCE_OUTPORT (port);
if (!display_character (SCM_CHAR (chr), port,
- scm_i_get_conversion_strategy (port)))
+ PORT_CONVERSION_HANDLER (port)))
scm_encoding_error (__func__, errno,
"cannot convert to output locale",
- "UTF-32", scm_i_get_port_encoding (port),
- scm_string (scm_list_1 (chr)));
+ port, chr);
return SCM_UNSPECIFIED;
}
void
scm_init_print ()
{
- SCM vtable, layout, type;
-
- scm_init_opts (scm_print_options, scm_print_opts);
-
- scm_print_options (scm_list_4 (scm_from_locale_symbol ("highlight-prefix"),
- scm_from_locale_string ("{"),
- scm_from_locale_symbol ("highlight-suffix"),
- scm_from_locale_string ("}")));
+ SCM type;
scm_gc_register_root (&print_state_pool);
scm_gc_register_root (&scm_print_state_vtable);
- vtable = scm_make_vtable_vtable (scm_nullstr, SCM_INUM0, SCM_EOL);
- layout =
- scm_make_struct_layout (scm_from_locale_string (SCM_PRINT_STATE_LAYOUT));
- type = scm_make_struct (vtable, SCM_INUM0, scm_list_1 (layout));
- scm_set_struct_vtable_name_x (type, scm_from_locale_symbol ("print-state"));
+ type = scm_make_vtable (scm_from_locale_string (SCM_PRINT_STATE_LAYOUT),
+ SCM_BOOL_F);
+ scm_set_struct_vtable_name_x (type, scm_from_latin1_symbol ("print-state"));
scm_print_state_vtable = type;
/* Don't want to bind a wrapper class in GOOPS, so pass 0 as arg1. */
#include "libguile/print.x"
+ scm_init_opts (scm_print_options, scm_print_opts);
+ scm_print_opts[SCM_PRINT_HIGHLIGHT_PREFIX_I].val =
+ SCM_UNPACK (scm_from_locale_string ("{"));
+ scm_print_opts[SCM_PRINT_HIGHLIGHT_SUFFIX_I].val =
+ SCM_UNPACK (scm_from_locale_string ("}"));
scm_print_opts[SCM_PRINT_KEYWORD_STYLE_I].val = SCM_UNPACK (sym_reader);
}