-/* Copyright (C) 1995-1999,2000,2001, 2002, 2003, 2004, 2006, 2008, 2009, 2010 Free Software Foundation, Inc.
- *
+/* Copyright (C) 1995-1999, 2000, 2001, 2002, 2003, 2004, 2006, 2008,
+ * 2009, 2010, 2011, 2012 Free Software Foundation, Inc.
+ *
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public License
* as published by the Free Software Foundation; either version 3 of
#endif
#include <errno.h>
+#include <iconv.h>
+#include <stdio.h>
+#include <assert.h>
+
#include <uniconv.h>
#include <unictype.h>
#include "libguile/chars.h"
#include "libguile/continuations.h"
#include "libguile/smob.h"
+#include "libguile/control.h"
#include "libguile/eval.h"
#include "libguile/macros.h"
#include "libguile/procprop.h"
#include "libguile/read.h"
-#include "libguile/weaks.h"
#include "libguile/programs.h"
#include "libguile/alist.h"
#include "libguile/struct.h"
#include "libguile/strings.h"
#include "libguile/strports.h"
#include "libguile/vectors.h"
-#include "libguile/lang.h"
#include "libguile/numbers.h"
#include "libguile/vm.h"
\f
+/* Character printers. */
+
+static size_t display_string (const void *, int, size_t, SCM,
+ scm_t_string_failed_conversion_handler);
+
+static int display_character (scm_t_wchar, SCM,
+ scm_t_string_failed_conversion_handler);
+
+static void write_character (scm_t_wchar, SCM, int);
+
+static void write_character_escaped (scm_t_wchar, int, SCM);
+
+\f
+
/* {Names of immediate symbols}
*
* This table must agree with the declarations in scm.h: {Immediate Symbols}.
"#<XXX UNUSED LISP FALSE -- DO NOT USE -- SHOULD NEVER BE SEEN XXX>",
"()",
"#t",
- "#<XXX UNUSED BOOLEAN -- DO NOT USE -- SHOULD NEVER BE SEEN XXX>",
+ "#<XXX UNUSED BOOLEAN 0 -- DO NOT USE -- SHOULD NEVER BE SEEN XXX>",
+ "#<XXX UNUSED BOOLEAN 1 -- DO NOT USE -- SHOULD NEVER BE SEEN XXX>",
+ "#<XXX UNUSED BOOLEAN 2 -- DO NOT USE -- SHOULD NEVER BE SEEN XXX>",
"#<unspecified>",
"#<undefined>",
"#<eof>",
SCM_SYMBOL (sym_reader, "reader");
scm_t_option scm_print_opts[] = {
- { SCM_OPTION_SCM, "closure-hook", (unsigned long) SCM_BOOL_F,
- "Hook for printing closures (should handle macros as well)." },
- { SCM_OPTION_BOOLEAN, "source", 0,
- "Print closures with source." },
- { SCM_OPTION_SCM, "highlight-prefix", (unsigned long)SCM_BOOL_F,
+ { SCM_OPTION_SCM, "highlight-prefix", (scm_t_bits)SCM_BOOL_F_BITS,
"The string to print before highlighted values." },
- { SCM_OPTION_SCM, "highlight-suffix", (unsigned long)SCM_BOOL_F,
+ { SCM_OPTION_SCM, "highlight-suffix", (scm_t_bits)SCM_BOOL_F_BITS,
"The string to print after highlighted values." },
- { SCM_OPTION_SCM, "quote-keywordish-symbols", (unsigned long)SCM_BOOL_F,
+ { SCM_OPTION_SCM, "quote-keywordish-symbols", (scm_t_bits)SCM_BOOL_F_BITS,
"How to print symbols that have a colon as their first or last character. "
"The value '#f' does not quote the colons; '#t' quotes them; "
- "'reader' quotes them when the reader option 'keywords' is not '#f'."
- },
+ "'reader' quotes them when the reader option 'keywords' is not '#f'." },
+ { SCM_OPTION_BOOLEAN, "escape-newlines", 1,
+ "Render newlines as \\n when printing using `write'." },
{ 0 },
};
{ \
if (pstate->top - pstate->list_offset >= pstate->level) \
{ \
- scm_putc ('#', port); \
+ scm_putc_unlocked ('#', port); \
return; \
} \
} \
for (i = pstate->top - 1; 1; --i)
if (scm_is_eq (PSTATE_STACK_REF(pstate, i), ref))
break;
- scm_putc ('#', port);
+ scm_putc_unlocked ('#', port);
scm_intprint (i - self, 10, port);
- scm_putc ('#', port);
+ scm_putc_unlocked ('#', port);
}
/* Print the name of a symbol. */
static int
-quote_keywordish_symbol (SCM symbol)
+quote_keywordish_symbols (void)
{
- SCM option;
-
- if (scm_i_symbol_ref (symbol, 0) != ':'
- && scm_i_symbol_ref (symbol, scm_i_symbol_length (symbol) - 1) != ':')
- return 0;
+ SCM option = SCM_PRINT_KEYWORD_STYLE;
- option = SCM_PRINT_KEYWORD_STYLE;
if (scm_is_false (option))
return 0;
if (scm_is_eq (option, sym_reader))
return 1;
}
-void
-scm_i_print_symbol_name (SCM str, SCM port)
+#define INITIAL_IDENTIFIER_MASK \
+ (UC_CATEGORY_MASK_Lu | UC_CATEGORY_MASK_Ll | UC_CATEGORY_MASK_Lt \
+ | UC_CATEGORY_MASK_Lm | UC_CATEGORY_MASK_Lo | UC_CATEGORY_MASK_Mn \
+ | UC_CATEGORY_MASK_Nl | UC_CATEGORY_MASK_No | UC_CATEGORY_MASK_Pd \
+ | UC_CATEGORY_MASK_Pc | UC_CATEGORY_MASK_Po | UC_CATEGORY_MASK_Sc \
+ | UC_CATEGORY_MASK_Sm | UC_CATEGORY_MASK_Sk | UC_CATEGORY_MASK_So \
+ | UC_CATEGORY_MASK_Co)
+
+#define SUBSEQUENT_IDENTIFIER_MASK \
+ (INITIAL_IDENTIFIER_MASK \
+ | UC_CATEGORY_MASK_Nd | UC_CATEGORY_MASK_Mc | UC_CATEGORY_MASK_Me)
+
+static int
+symbol_has_extended_read_syntax (SCM sym)
{
- /* This points to the first character that has not yet been written to the
- * port. */
- size_t pos = 0;
- /* This points to the character we're currently looking at. */
- size_t end;
- /* If the name contains weird characters, we'll escape them with
- * backslashes and set this flag; it indicates that we should surround the
- * name with "#{" and "}#". */
- int weird = 0;
- /* Backslashes are not sufficient to make a name weird, but if a name is
- * weird because of other characters, backslahes need to be escaped too.
- * The first time we see a backslash, we set maybe_weird, and mw_pos points
- * to the backslash. Then if the name turns out to be weird, we re-process
- * everything starting from mw_pos.
- * We could instead make backslashes always weird. This is not necessary
- * to ensure that the output is (read)-able, but it would make this code
- * simpler and faster. */
- int maybe_weird = 0;
- size_t mw_pos = 0;
- size_t len = scm_i_symbol_length (str);
- scm_t_wchar str0 = scm_i_symbol_ref (str, 0);
-
- if (len == 0 || str0 == '\'' || str0 == '`' || str0 == ','
- || quote_keywordish_symbol (str)
- || (str0 == '.' && len == 1)
- || scm_is_true (scm_i_string_to_number (scm_symbol_to_string (str), 10)))
+ size_t pos, len = scm_i_symbol_length (sym);
+ scm_t_wchar c;
+
+ /* The empty symbol. */
+ if (len == 0)
+ return 1;
+
+ c = scm_i_symbol_ref (sym, 0);
+
+ /* Single dot; conflicts with dotted-pair notation. */
+ if (len == 1 && c == '.')
+ return 1;
+
+ /* Other initial-character constraints. */
+ if (c == '\'' || c == '`' || c == ',' || c == '"' || c == ';' || c == '#')
+ return 1;
+
+ /* Keywords can be identified by trailing colons too. */
+ if (c == ':' || scm_i_symbol_ref (sym, len - 1) == ':')
+ return quote_keywordish_symbols ();
+
+ /* Number-ish symbols. */
+ if (scm_is_true (scm_i_string_to_number (scm_symbol_to_string (sym), 10)))
+ return 1;
+
+ /* Other disallowed first characters. */
+ if (!uc_is_general_category_withtable (c, INITIAL_IDENTIFIER_MASK))
+ return 1;
+
+ /* Otherwise, any character that's in the identifier category mask is
+ fine to pass through as-is, provided it's not one of the ASCII
+ delimiters like `;'. */
+ for (pos = 1; pos < len; pos++)
{
- scm_lfwrite ("#{", 2, port);
- weird = 1;
+ c = scm_i_symbol_ref (sym, pos);
+ if (!uc_is_general_category_withtable (c, SUBSEQUENT_IDENTIFIER_MASK))
+ return 1;
+ else if (c == '"' || c == ';' || c == '#')
+ return 1;
}
- for (end = pos; end < len; ++end)
- switch (scm_i_symbol_ref (str, end))
- {
-#ifdef BRACKETS_AS_PARENS
- case '[':
- case ']':
-#endif
- case '(':
- case ')':
- case '"':
- case ';':
- case '#':
- case SCM_WHITE_SPACES:
- case SCM_LINE_INCREMENTORS:
- weird_handler:
- if (maybe_weird)
- {
- end = mw_pos;
- maybe_weird = 0;
- }
- if (!weird)
- {
- scm_lfwrite ("#{", 2, port);
- weird = 1;
- }
- if (pos < end)
- scm_lfwrite_substr (scm_symbol_to_string (str), pos, end, port);
- {
- char buf[2];
- buf[0] = '\\';
- buf[1] = (char) (unsigned char) scm_i_symbol_ref (str, end);
- scm_lfwrite (buf, 2, port);
- }
- pos = end + 1;
- break;
- case '\\':
- if (weird)
- goto weird_handler;
- if (!maybe_weird)
- {
- maybe_weird = 1;
- mw_pos = pos;
- }
- break;
- default:
- break;
- }
- if (pos < end)
- scm_lfwrite_substr (scm_symbol_to_string (str), pos, end, port);
- if (weird)
- scm_lfwrite ("}#", 2, port);
+ return 0;
+}
+
+static void
+print_normal_symbol (SCM sym, SCM port)
+{
+ scm_display (scm_symbol_to_string (sym), port);
+}
+
+static void
+print_extended_symbol (SCM sym, SCM port)
+{
+ size_t pos, len;
+ scm_t_string_failed_conversion_handler strategy;
+
+ len = scm_i_symbol_length (sym);
+ strategy = scm_i_get_conversion_strategy (port);
+
+ scm_lfwrite_unlocked ("#{", 2, port);
+
+ for (pos = 0; pos < len; pos++)
+ {
+ scm_t_wchar c = scm_i_symbol_ref (sym, pos);
+
+ if (uc_is_general_category_withtable (c,
+ SUBSEQUENT_IDENTIFIER_MASK
+ | UC_CATEGORY_MASK_Zs))
+ {
+ if (!display_character (c, port, strategy))
+ scm_encoding_error ("print_extended_symbol", errno,
+ "cannot convert to output locale",
+ port, SCM_MAKE_CHAR (c));
+ }
+ else
+ {
+ display_string ("\\x", 1, 2, port, iconveh_question_mark);
+ scm_intprint (c, 16, port);
+ display_character (';', port, iconveh_question_mark);
+ }
+ }
+
+ scm_lfwrite_unlocked ("}#", 2, port);
+}
+
+/* FIXME: allow R6RS hex escapes instead of #{...}#. */
+void
+scm_i_print_symbol_name (SCM sym, SCM port)
+{
+ if (symbol_has_extended_read_syntax (sym))
+ print_extended_symbol (sym, port);
+ else
+ print_normal_symbol (sym, port);
}
void
scm_print_symbol_name (const char *str, size_t len, SCM port)
{
- SCM symbol = scm_from_locale_symboln (str, len);
+ SCM symbol = scm_from_utf8_symboln (str, len);
scm_i_print_symbol_name (symbol, port);
}
scm_intprint (i, 8, port); \
else \
{ \
- scm_puts ("x", port); \
+ scm_puts_unlocked ("x", port); \
scm_intprint (i, 16, port); \
} \
} \
case scm_tc3_imm24:
if (SCM_CHARP (exp))
{
- scm_t_wchar i = SCM_CHAR (exp);
- const char *name;
-
if (SCM_WRITINGP (pstate))
+ write_character (SCM_CHAR (exp), port, 0);
+ else
{
- scm_puts ("#\\", port);
- name = scm_i_charname (exp);
- if (name != NULL)
- scm_puts (name, port);
- else if (uc_is_general_category_withtable (i, UC_CATEGORY_MASK_L
- | UC_CATEGORY_MASK_M
- | UC_CATEGORY_MASK_N
- | UC_CATEGORY_MASK_P
- | UC_CATEGORY_MASK_S))
- /* Print the character if is graphic character. */
- {
- scm_t_wchar *wbuf;
- SCM wstr;
- char *buf;
- size_t len;
- const char *enc;
-
- enc = scm_i_get_port_encoding (port);
- if (uc_combining_class (i) == UC_CCC_NR)
- {
- wstr = scm_i_make_wide_string (1, &wbuf);
- wbuf[0] = i;
- }
- else
- {
- /* Character is a combining character: print it connected
- to a dotted circle instead of connecting it to the
- backslash in '#\' */
- wstr = scm_i_make_wide_string (2, &wbuf);
- wbuf[0] = SCM_CODEPOINT_DOTTED_CIRCLE;
- wbuf[1] = i;
- }
- if (enc == NULL)
- {
- if (i <= 0xFF)
- /* Character is graphic and Latin-1. Print it */
- scm_lfwrite_str (wstr, port);
- else
- /* Character is graphic but unrepresentable in
- this port's encoding. */
- PRINT_CHAR_ESCAPE (i, port);
- }
- else
- {
- buf = u32_conv_to_encoding (enc,
- iconveh_error,
- (scm_t_uint32 *) wbuf,
- 1,
- NULL,
- NULL, &len);
- if (buf != NULL)
- {
- /* Character is graphic. Print it. */
- scm_lfwrite_str (wstr, port);
- free (buf);
- }
- else
- /* Character is graphic but unrepresentable in
- this port's encoding. */
- PRINT_CHAR_ESCAPE (i, port);
- }
- }
- else
- /* Character is a non-graphical character. */
- PRINT_CHAR_ESCAPE (i, port);
+ if (!display_character (SCM_CHAR (exp), port,
+ scm_i_get_conversion_strategy (port)))
+ scm_encoding_error (__func__, errno,
+ "cannot convert to output locale",
+ port, exp);
}
- else
- scm_i_charprint (i, port);
}
else if (SCM_IFLAGP (exp)
&& ((size_t) SCM_IFLAGNUM (exp) < (sizeof iflagnames / sizeof (char *))))
{
- scm_puts (iflagnames [SCM_IFLAGNUM (exp)], port);
+ scm_puts_unlocked (iflagnames [SCM_IFLAGNUM (exp)], port);
}
else
{
if (SCM_OBJ_CLASS_FLAGS (exp) & SCM_CLASSF_GOOPS)
{
SCM pwps, print = pstate->writingp ? g_write : g_display;
- if (!print)
+ if (SCM_UNPACK (print) == 0)
goto print_struct;
pwps = scm_i_port_with_print_state (port, pstate->handle);
pstate->revealed = 1;
- scm_call_generic_2 (print, exp, pwps);
+ scm_call_2 (print, exp, pwps);
}
else
{
case scm_tc7_string:
if (SCM_WRITINGP (pstate))
{
- size_t i, len;
- static char const hex[] = "0123456789abcdef";
- char buf[9];
-
+ size_t len, i;
- scm_putc ('"', port);
+ display_character ('"', port, iconveh_question_mark);
len = scm_i_string_length (exp);
for (i = 0; i < len; ++i)
- {
- scm_t_wchar ch = scm_i_string_ref (exp, i);
- int printed = 0;
-
- if (ch == ' ' || ch == '\n')
- {
- scm_putc (ch, port);
- printed = 1;
- }
- else if (ch == '"' || ch == '\\')
- {
- scm_putc ('\\', port);
- scm_i_charprint (ch, port);
- printed = 1;
- }
- else
- if (uc_is_general_category_withtable
- (ch,
- UC_CATEGORY_MASK_L | UC_CATEGORY_MASK_M |
- UC_CATEGORY_MASK_N | UC_CATEGORY_MASK_P |
- UC_CATEGORY_MASK_S))
- {
- /* Print the character since it is a graphic
- character. */
- scm_t_wchar *wbuf;
- SCM wstr = scm_i_make_wide_string (1, &wbuf);
- char *buf;
- size_t len;
-
- if (scm_i_get_port_encoding (port))
- {
- wstr = scm_i_make_wide_string (1, &wbuf);
- wbuf[0] = ch;
- buf = u32_conv_to_encoding (scm_i_get_port_encoding (port),
- iconveh_error,
- (scm_t_uint32 *) wbuf,
- 1 ,
- NULL,
- NULL, &len);
- if (buf != NULL)
- {
- /* Character is graphic and representable in
- this encoding. Print it. */
- scm_lfwrite_str (wstr, port);
- free (buf);
- printed = 1;
- }
- }
- else
- if (ch <= 0xFF)
- {
- scm_putc (ch, port);
- printed = 1;
- }
- }
-
- if (!printed)
- {
- /* Character is graphic but unrepresentable in
- this port's encoding or is not graphic. */
- if (!SCM_R6RS_ESCAPES_P)
- {
- if (ch <= 0xFF)
- {
- buf[0] = '\\';
- buf[1] = 'x';
- buf[2] = hex[ch / 16];
- buf[3] = hex[ch % 16];
- scm_lfwrite (buf, 4, port);
- }
- else if (ch <= 0xFFFF)
- {
- buf[0] = '\\';
- buf[1] = 'u';
- buf[2] = hex[(ch & 0xF000) >> 12];
- buf[3] = hex[(ch & 0xF00) >> 8];
- buf[4] = hex[(ch & 0xF0) >> 4];
- buf[5] = hex[(ch & 0xF)];
- scm_lfwrite (buf, 6, port);
- }
- else if (ch > 0xFFFF)
- {
- buf[0] = '\\';
- buf[1] = 'U';
- buf[2] = hex[(ch & 0xF00000) >> 20];
- buf[3] = hex[(ch & 0xF0000) >> 16];
- buf[4] = hex[(ch & 0xF000) >> 12];
- buf[5] = hex[(ch & 0xF00) >> 8];
- buf[6] = hex[(ch & 0xF0) >> 4];
- buf[7] = hex[(ch & 0xF)];
- scm_lfwrite (buf, 8, port);
- }
- }
- else
- {
- scm_t_wchar ch2 = ch;
-
- /* Print an R6RS variable-length hex escape: "\xNNNN;"
- */
- int i = 8;
- buf[i] = ';';
- i --;
- if (ch == 0)
- buf[i--] = '0';
- else
- while (ch2 > 0)
- {
- buf[i] = hex[ch2 & 0xF];
- ch2 >>= 4;
- i --;
- }
- buf[i] = 'x';
- i --;
- buf[i] = '\\';
- scm_lfwrite (buf + i, 9 - i, port);
- }
- }
- }
- scm_putc ('"', port);
+ write_character (scm_i_string_ref (exp, i), port, 1);
+
+ display_character ('"', port, iconveh_question_mark);
scm_remember_upto_here_1 (exp);
}
else
- scm_lfwrite_str (exp, port);
+ {
+ size_t len, printed;
+
+ len = scm_i_string_length (exp);
+ printed = display_string (scm_i_string_data (exp),
+ scm_i_is_narrow_string (exp),
+ len, port,
+ scm_i_get_conversion_strategy (port));
+ if (SCM_UNLIKELY (printed < len))
+ scm_encoding_error (__func__, errno,
+ "cannot convert to output locale",
+ port, scm_c_string_ref (exp, printed));
+ }
+
scm_remember_upto_here_1 (exp);
break;
case scm_tc7_symbol:
}
else
{
- scm_puts ("#<uninterned-symbol ", port);
+ scm_puts_unlocked ("#<uninterned-symbol ", port);
scm_i_print_symbol_name (exp, port);
- scm_putc (' ', port);
+ scm_putc_unlocked (' ', port);
scm_uintprint (SCM_UNPACK (exp), 16, port);
- scm_putc ('>', port);
+ scm_putc_unlocked ('>', port);
}
break;
case scm_tc7_variable:
case scm_tc7_program:
scm_i_program_print (exp, port, pstate);
break;
- case scm_tc7_foreign:
- scm_i_foreign_print (exp, port, pstate);
+ case scm_tc7_pointer:
+ scm_i_pointer_print (exp, port, pstate);
break;
case scm_tc7_hashtable:
scm_i_hashtable_print (exp, port, pstate);
break;
+ case scm_tc7_weak_set:
+ scm_i_weak_set_print (exp, port, pstate);
+ break;
+ case scm_tc7_weak_table:
+ scm_i_weak_table_print (exp, port, pstate);
+ break;
case scm_tc7_fluid:
scm_i_fluid_print (exp, port, pstate);
break;
case scm_tc7_vm_cont:
scm_i_vm_cont_print (exp, port, pstate);
break;
- case scm_tc7_wvect:
+ case scm_tc7_prompt:
+ scm_i_prompt_print (exp, port, pstate);
+ break;
+ case scm_tc7_with_fluids:
+ scm_i_with_fluids_print (exp, port, pstate);
+ break;
+ case scm_tc7_array:
ENTER_NESTED_DATA (pstate, exp, circref);
- if (SCM_IS_WHVEC (exp))
- scm_puts ("#wh(", port);
- else
- scm_puts ("#w(", port);
- goto common_vector_printer;
-
+ scm_i_print_array (exp, port, pstate);
+ EXIT_NESTED_DATA (pstate);
+ break;
case scm_tc7_bytevector:
scm_i_print_bytevector (exp, port, pstate);
break;
+ case scm_tc7_bitvector:
+ scm_i_print_bitvector (exp, port, pstate);
+ break;
+ case scm_tc7_wvect:
+ ENTER_NESTED_DATA (pstate, exp, circref);
+ scm_puts_unlocked ("#w(", port);
+ goto common_vector_printer;
case scm_tc7_vector:
ENTER_NESTED_DATA (pstate, exp, circref);
- scm_puts ("#(", port);
+ scm_puts_unlocked ("#(", port);
common_vector_printer:
{
register long i;
last = pstate->length - 1;
cutp = 1;
}
- if (SCM_I_WVECTP (exp))
- {
- /* Elements of weak vectors may not be accessed via the
- `SIMPLE_VECTOR_REF ()' macro. */
- for (i = 0; i < last; ++i)
- {
- scm_iprin1 (scm_c_vector_ref (exp, i),
- port, pstate);
- scm_putc (' ', port);
- }
- }
- else
- {
- for (i = 0; i < last; ++i)
- {
- scm_iprin1 (SCM_SIMPLE_VECTOR_REF (exp, i), port, pstate);
- scm_putc (' ', port);
- }
- }
-
+ for (i = 0; i < last; ++i)
+ {
+ scm_iprin1 (scm_c_vector_ref (exp, i), port, pstate);
+ scm_putc_unlocked (' ', port);
+ }
if (i == last)
{
/* CHECK_INTS; */
scm_iprin1 (scm_c_vector_ref (exp, i), port, pstate);
}
if (cutp)
- scm_puts (" ...", port);
- scm_putc (')', port);
+ scm_puts_unlocked (" ...", port);
+ scm_putc_unlocked (')', port);
}
EXIT_NESTED_DATA (pstate);
break;
case scm_tc7_port:
{
- register long i = SCM_PTOBNUM (exp);
- if (i < scm_numptob
- && scm_ptobs[i].print
- && (scm_ptobs[i].print) (exp, port, pstate))
+ scm_t_ptob_descriptor *ptob = SCM_PORT_DESCRIPTOR (exp);
+ if (ptob->print && ptob->print (exp, port, pstate))
break;
goto punk;
}
}
}
-/* Print a character.
- */
-void
-scm_i_charprint (scm_t_wchar ch, SCM port)
+/* Convert codepoint CH to UTF-8 and store the result in UTF8. Return
+ the number of bytes of the UTF-8-encoded string. */
+static size_t
+codepoint_to_utf8 (scm_t_wchar ch, scm_t_uint8 utf8[4])
{
- scm_t_wchar *wbuf;
- SCM wstr = scm_i_make_wide_string (1, &wbuf);
+ size_t len;
+ scm_t_uint32 codepoint;
+
+ codepoint = (scm_t_uint32) ch;
+
+ if (codepoint <= 0x7f)
+ {
+ len = 1;
+ utf8[0] = (scm_t_uint8) codepoint;
+ }
+ else if (codepoint <= 0x7ffUL)
+ {
+ len = 2;
+ utf8[0] = 0xc0 | (codepoint >> 6);
+ utf8[1] = 0x80 | (codepoint & 0x3f);
+ }
+ else if (codepoint <= 0xffffUL)
+ {
+ len = 3;
+ utf8[0] = 0xe0 | (codepoint >> 12);
+ utf8[1] = 0x80 | ((codepoint >> 6) & 0x3f);
+ utf8[2] = 0x80 | (codepoint & 0x3f);
+ }
+ else
+ {
+ len = 4;
+ utf8[0] = 0xf0 | (codepoint >> 18);
+ utf8[1] = 0x80 | ((codepoint >> 12) & 0x3f);
+ utf8[2] = 0x80 | ((codepoint >> 6) & 0x3f);
+ utf8[3] = 0x80 | (codepoint & 0x3f);
+ }
- wbuf[0] = ch;
- scm_lfwrite_str (wstr, port);
+ return len;
+}
+
+#define STR_REF(s, x) \
+ (narrow_p \
+ ? (scm_t_wchar) ((unsigned char *) (s))[x] \
+ : ((scm_t_wchar *) (s))[x])
+
+/* Write STR to PORT as UTF-8. STR is a LEN-codepoint string; it is
+ narrow if NARROW_P is true, wide otherwise. Return LEN. */
+static size_t
+display_string_as_utf8 (const void *str, int narrow_p, size_t len,
+ SCM port)
+{
+ size_t printed = 0;
+
+ while (len > printed)
+ {
+ size_t utf8_len, i;
+ char *input, utf8_buf[256];
+
+ /* Convert STR to UTF-8. */
+ for (i = printed, utf8_len = 0, input = utf8_buf;
+ i < len && utf8_len + 4 < sizeof (utf8_buf);
+ i++)
+ {
+ utf8_len += codepoint_to_utf8 (STR_REF (str, i),
+ (scm_t_uint8 *) input);
+ input = utf8_buf + utf8_len;
+ }
+
+ /* INPUT was successfully converted, entirely; print the
+ result. */
+ scm_lfwrite_unlocked (utf8_buf, utf8_len, port);
+ printed += i - printed;
+ }
+
+ assert (printed == len);
+
+ return len;
+}
+
+/* Convert STR through PORT's output conversion descriptor and write the
+ output to PORT. Return the number of codepoints written. */
+static size_t
+display_string_using_iconv (const void *str, int narrow_p, size_t len,
+ SCM port,
+ scm_t_string_failed_conversion_handler strategy)
+{
+ size_t printed;
+ scm_t_iconv_descriptors *id;
+
+ id = scm_i_port_iconv_descriptors (port);
+
+ printed = 0;
+
+ while (len > printed)
+ {
+ size_t done, utf8_len, input_left, output_left, i;
+ size_t codepoints_read, output_len;
+ char *input, *output;
+ char utf8_buf[256], encoded_output[256];
+ size_t offsets[256];
+
+ /* Convert STR to UTF-8. */
+ for (i = printed, utf8_len = 0, input = utf8_buf;
+ i < len && utf8_len + 4 < sizeof (utf8_buf);
+ i++)
+ {
+ offsets[utf8_len] = i;
+ utf8_len += codepoint_to_utf8 (STR_REF (str, i),
+ (scm_t_uint8 *) input);
+ input = utf8_buf + utf8_len;
+ }
+
+ input = utf8_buf;
+ input_left = utf8_len;
+
+ output = encoded_output;
+ output_left = sizeof (encoded_output);
+
+ done = iconv (id->output_cd, &input, &input_left,
+ &output, &output_left);
+
+ output_len = sizeof (encoded_output) - output_left;
+
+ if (SCM_UNLIKELY (done == (size_t) -1))
+ {
+ int errno_save = errno;
+
+ /* Reset the `iconv' state. */
+ iconv (id->output_cd, NULL, NULL, NULL, NULL);
+
+ /* Print the OUTPUT_LEN bytes successfully converted. */
+ scm_lfwrite_unlocked (encoded_output, output_len, port);
+
+ /* See how many input codepoints these OUTPUT_LEN bytes
+ corresponds to. */
+ codepoints_read = offsets[input - utf8_buf] - printed;
+ printed += codepoints_read;
+
+ if (errno_save == EILSEQ &&
+ strategy != SCM_FAILED_CONVERSION_ERROR)
+ {
+ /* Conversion failed somewhere in INPUT and we want to
+ escape or substitute the offending input character. */
+
+ if (strategy == SCM_FAILED_CONVERSION_ESCAPE_SEQUENCE)
+ {
+ scm_t_wchar ch;
+
+ /* Find CH, the offending codepoint, and escape it. */
+ ch = STR_REF (str, offsets[input - utf8_buf]);
+ write_character_escaped (ch, 1, port);
+ }
+ else
+ /* STRATEGY is `SCM_FAILED_CONVERSION_QUESTION_MARK'. */
+ display_string ("?", 1, 1, port, strategy);
+
+ printed++;
+ }
+ else
+ /* Something bad happened that we can't handle: bail out. */
+ break;
+ }
+ else
+ {
+ /* INPUT was successfully converted, entirely; print the
+ result. */
+ scm_lfwrite_unlocked (encoded_output, output_len, port);
+ codepoints_read = i - printed;
+ printed += codepoints_read;
+ }
+ }
+
+ return printed;
+}
+
+#undef STR_REF
+
+/* Display the LEN codepoints in STR to PORT according to STRATEGY;
+ return the number of codepoints successfully displayed. If NARROW_P,
+ then STR is interpreted as a sequence of `char', denoting a Latin-1
+ string; otherwise it's interpreted as a sequence of
+ `scm_t_wchar'. */
+static size_t
+display_string (const void *str, int narrow_p,
+ size_t len, SCM port,
+ scm_t_string_failed_conversion_handler strategy)
+
+{
+ scm_t_port *pt;
+
+ pt = SCM_PTAB_ENTRY (port);
+
+ if (pt->encoding_mode == SCM_PORT_ENCODING_MODE_UTF8)
+ return display_string_as_utf8 (str, narrow_p, len, port);
+ else
+ return display_string_using_iconv (str, narrow_p, len,
+ port, strategy);
+}
+
+/* Attempt to display CH to PORT according to STRATEGY. Return non-zero
+ if CH was successfully displayed, zero otherwise (e.g., if it was not
+ representable in PORT's encoding.) */
+static int
+display_character (scm_t_wchar ch, SCM port,
+ scm_t_string_failed_conversion_handler strategy)
+{
+ return display_string (&ch, 0, 1, port, strategy) == 1;
+}
+
+/* Attempt to pretty-print CH, a combining character, to PORT. Return
+ zero upon failure, non-zero otherwise. The idea is to print CH above
+ a dotted circle to make it more visible. */
+static int
+write_combining_character (scm_t_wchar ch, SCM port)
+{
+ scm_t_wchar str[2];
+
+ str[0] = SCM_CODEPOINT_DOTTED_CIRCLE;
+ str[1] = ch;
+
+ return display_string (str, 0, 2, port, iconveh_error) == 2;
+}
+
+/* Write CH to PORT in its escaped form, using the string escape syntax
+ if STRING_ESCAPES_P is non-zero. */
+static void
+write_character_escaped (scm_t_wchar ch, int string_escapes_p, SCM port)
+{
+ if (string_escapes_p)
+ {
+ /* Represent CH using the in-string escape syntax. */
+
+ static const char hex[] = "0123456789abcdef";
+ static const char escapes[7] = "abtnvfr";
+ char buf[9];
+
+ if (ch >= 0x07 && ch <= 0x0D && ch != 0x0A)
+ {
+ /* Use special escapes for some C0 controls. */
+ buf[0] = '\\';
+ buf[1] = escapes[ch - 0x07];
+ scm_lfwrite_unlocked (buf, 2, port);
+ }
+ else if (!SCM_R6RS_ESCAPES_P)
+ {
+ if (ch <= 0xFF)
+ {
+ buf[0] = '\\';
+ buf[1] = 'x';
+ buf[2] = hex[ch / 16];
+ buf[3] = hex[ch % 16];
+ scm_lfwrite_unlocked (buf, 4, port);
+ }
+ else if (ch <= 0xFFFF)
+ {
+ buf[0] = '\\';
+ buf[1] = 'u';
+ buf[2] = hex[(ch & 0xF000) >> 12];
+ buf[3] = hex[(ch & 0xF00) >> 8];
+ buf[4] = hex[(ch & 0xF0) >> 4];
+ buf[5] = hex[(ch & 0xF)];
+ scm_lfwrite_unlocked (buf, 6, port);
+ }
+ else if (ch > 0xFFFF)
+ {
+ buf[0] = '\\';
+ buf[1] = 'U';
+ buf[2] = hex[(ch & 0xF00000) >> 20];
+ buf[3] = hex[(ch & 0xF0000) >> 16];
+ buf[4] = hex[(ch & 0xF000) >> 12];
+ buf[5] = hex[(ch & 0xF00) >> 8];
+ buf[6] = hex[(ch & 0xF0) >> 4];
+ buf[7] = hex[(ch & 0xF)];
+ scm_lfwrite_unlocked (buf, 8, port);
+ }
+ }
+ else
+ {
+ /* Print an R6RS variable-length hex escape: "\xNNNN;". */
+ scm_t_wchar ch2 = ch;
+
+ int i = 8;
+ buf[i] = ';';
+ i --;
+ if (ch == 0)
+ buf[i--] = '0';
+ else
+ while (ch2 > 0)
+ {
+ buf[i] = hex[ch2 & 0xF];
+ ch2 >>= 4;
+ i --;
+ }
+ buf[i] = 'x';
+ i --;
+ buf[i] = '\\';
+ scm_lfwrite_unlocked (buf + i, 9 - i, port);
+ }
+ }
+ else
+ {
+ /* Represent CH using the character escape syntax. */
+ const char *name;
+
+ name = scm_i_charname (SCM_MAKE_CHAR (ch));
+ if (name != NULL)
+ scm_puts_unlocked (name, port);
+ else
+ PRINT_CHAR_ESCAPE (ch, port);
+ }
+}
+
+/* Write CH to PORT, escaping it if it's non-graphic or not
+ representable in PORT's encoding. If STRING_ESCAPES_P is true and CH
+ needs to be escaped, it is escaped using the in-string escape syntax;
+ otherwise the character escape syntax is used. */
+static void
+write_character (scm_t_wchar ch, SCM port, int string_escapes_p)
+{
+ int printed = 0;
+ scm_t_string_failed_conversion_handler strategy;
+
+ strategy = scm_i_get_conversion_strategy (port);
+
+ if (string_escapes_p)
+ {
+ /* Check if CH deserves special treatment. */
+ if (ch == '"' || ch == '\\')
+ {
+ display_character ('\\', port, iconveh_question_mark);
+ display_character (ch, port, strategy);
+ printed = 1;
+ }
+ else if (ch == '\n' && SCM_PRINT_ESCAPE_NEWLINES_P)
+ {
+ display_character ('\\', port, iconveh_question_mark);
+ display_character ('n', port, strategy);
+ printed = 1;
+ }
+ else if (ch == ' ' || ch == '\n')
+ {
+ display_character (ch, port, strategy);
+ printed = 1;
+ }
+ }
+ else
+ {
+ display_string ("#\\", 1, 2, port, iconveh_question_mark);
+
+ if (uc_combining_class (ch) != UC_CCC_NR)
+ /* Character is a combining character, so attempt to
+ pretty-print it. */
+ printed = write_combining_character (ch, port);
+ }
+
+ if (!printed
+ && uc_is_general_category_withtable (ch,
+ UC_CATEGORY_MASK_L |
+ UC_CATEGORY_MASK_M |
+ UC_CATEGORY_MASK_N |
+ UC_CATEGORY_MASK_P |
+ UC_CATEGORY_MASK_S))
+ /* CH is graphic; attempt to display it. */
+ printed = display_character (ch, port, iconveh_error);
+
+ if (!printed)
+ /* CH isn't graphic or cannot be represented in PORT's encoding. */
+ write_character_escaped (ch, string_escapes_p, port);
}
/* Print an integer.
scm_intprint (scm_t_intmax n, int radix, SCM port)
{
char num_buf[SCM_INTBUFLEN];
- scm_lfwrite (num_buf, scm_iint2str (n, radix, num_buf), port);
+ scm_lfwrite_unlocked (num_buf, scm_iint2str (n, radix, num_buf), port);
}
void
scm_uintprint (scm_t_uintmax n, int radix, SCM port)
{
char num_buf[SCM_INTBUFLEN];
- scm_lfwrite (num_buf, scm_iuint2str (n, radix, num_buf), port);
+ scm_lfwrite_unlocked (num_buf, scm_iuint2str (n, radix, num_buf), port);
}
/* Print an object of unrecognized type.
void
scm_ipruk (char *hdr, SCM ptr, SCM port)
{
- scm_puts ("#<unknown-", port);
- scm_puts (hdr, port);
+ scm_puts_unlocked ("#<unknown-", port);
+ scm_puts_unlocked (hdr, port);
if (1) /* (scm_in_heap_p (ptr)) */ /* FIXME */
{
- scm_puts (" (0x", port);
+ scm_puts_unlocked (" (0x", port);
scm_uintprint (SCM_CELL_WORD_0 (ptr), 16, port);
- scm_puts (" . 0x", port);
+ scm_puts_unlocked (" . 0x", port);
scm_uintprint (SCM_CELL_WORD_1 (ptr), 16, port);
- scm_puts (") @", port);
+ scm_puts_unlocked (") @", port);
}
- scm_puts (" 0x", port);
+ scm_puts_unlocked (" 0x", port);
scm_uintprint (SCM_UNPACK (ptr), 16, port);
- scm_putc ('>', port);
+ scm_putc_unlocked ('>', port);
}
{
register SCM hare, tortoise;
long floor = pstate->top - 2;
- scm_puts (hdr, port);
+ scm_puts_unlocked (hdr, port);
/* CHECK_INTS; */
if (pstate->fancyp)
goto fancy_printing;
if (scm_is_eq (PSTATE_STACK_REF(pstate, i), exp))
goto circref;
PUSH_REF (pstate, exp);
- scm_putc (' ', port);
+ scm_putc_unlocked (' ', port);
/* CHECK_INTS; */
scm_iprin1 (SCM_CAR (exp), port, pstate);
}
if (!SCM_NULL_OR_NIL_P (exp))
{
- scm_puts (" . ", port);
+ scm_puts_unlocked (" . ", port);
scm_iprin1 (exp, port, pstate);
}
end:
- scm_putc (tlr, port);
+ scm_putc_unlocked (tlr, port);
pstate->top = floor + 2;
return;
{
if (n == 0)
{
- scm_puts (" ...", port);
+ scm_puts_unlocked (" ...", port);
goto skip_tail;
}
else
}
PUSH_REF(pstate, exp);
++pstate->list_offset;
- scm_putc (' ', port);
+ scm_putc_unlocked (' ', port);
/* CHECK_INTS; */
scm_iprin1 (SCM_CAR (exp), port, pstate);
}
}
if (!SCM_NULL_OR_NIL_P (exp))
{
- scm_puts (" . ", port);
+ scm_puts_unlocked (" . ", port);
scm_iprin1 (exp, port, pstate);
}
skip_tail:
pstate->list_offset -= pstate->top - floor - 2;
circref:
- scm_puts (" . ", port);
+ scm_puts_unlocked (" . ", port);
print_circref (port, pstate, exp);
goto end;
}
SCM_ASSERT (scm_valid_oport_value_p (port), port, SCM_ARG2, s_write);
+ scm_dynwind_begin (0);
+ scm_dynwind_lock_port (SCM_COERCE_OUTPORT (port));
scm_prin1 (obj, port, 1);
-#if 0
-#ifdef HAVE_PIPE
-# ifdef EPIPE
- if (EPIPE == errno)
- scm_close_port (port);
-# endif
-#endif
-#endif
+ scm_dynwind_end ();
+
return SCM_UNSPECIFIED;
}
SCM_ASSERT (scm_valid_oport_value_p (port), port, SCM_ARG2, s_display);
+ scm_dynwind_begin (0);
+ scm_dynwind_lock_port (SCM_COERCE_OUTPORT (port));
scm_prin1 (obj, port, 0);
-#if 0
-#ifdef HAVE_PIPE
-# ifdef EPIPE
- if (EPIPE == errno)
- scm_close_port (port);
-# endif
-#endif
-#endif
+ scm_dynwind_end ();
+
return SCM_UNSPECIFIED;
}
"@var{message} can contain @code{~A} (was @code{%s}) and\n"
"@code{~S} (was @code{%S}) escapes. When printed,\n"
"the escapes are replaced with corresponding members of\n"
- "@var{ARGS}:\n"
+ "@var{args}:\n"
"@code{~A} formats using @code{display} and @code{~S} formats\n"
"using @code{write}.\n"
"If @var{destination} is @code{#t}, then use the current output\n"
else if (scm_is_false (destination))
{
fReturnString = 1;
- port = scm_mkstrport (SCM_INUM0,
- scm_make_string (SCM_INUM0, SCM_UNDEFINED),
+ port = scm_mkstrport (SCM_INUM0, SCM_BOOL_F,
SCM_OPN | SCM_WRTNG,
FUNC_NAME);
destination = port;
SCM_VALIDATE_OPORT_VALUE (1, port);
- scm_putc ('\n', SCM_COERCE_OUTPORT (port));
+ scm_putc_unlocked ('\n', SCM_COERCE_OUTPORT (port));
return SCM_UNSPECIFIED;
}
#undef FUNC_NAME
SCM_VALIDATE_CHAR (1, chr);
SCM_VALIDATE_OPORT_VALUE (2, port);
-
- scm_i_charprint (SCM_CHAR (chr), SCM_COERCE_OUTPORT (port));
-#if 0
-#ifdef HAVE_PIPE
-# ifdef EPIPE
- if (EPIPE == errno)
- scm_close_port (port);
-# endif
-#endif
-#endif
+
+ port = SCM_COERCE_OUTPORT (port);
+ if (!display_character (SCM_CHAR (chr), port,
+ scm_i_get_conversion_strategy (port)))
+ scm_encoding_error (__func__, errno,
+ "cannot convert to output locale",
+ port, chr);
+
return SCM_UNSPECIFIED;
}
#undef FUNC_NAME
port_with_ps_print (SCM obj, SCM port, scm_print_state *pstate)
{
obj = SCM_PORT_WITH_PS_PORT (obj);
- return scm_ptobs[SCM_PTOBNUM (obj)].print (obj, port, pstate);
+ return SCM_PORT_DESCRIPTOR (obj)->print (obj, port, pstate);
}
SCM
void
scm_init_print ()
{
- SCM vtable, layout, type;
+ SCM type;
- scm_init_opts (scm_print_options, scm_print_opts);
-
- scm_print_options (scm_list_4 (scm_from_locale_symbol ("highlight-prefix"),
- scm_from_locale_string ("{"),
- scm_from_locale_symbol ("highlight-suffix"),
- scm_from_locale_string ("}")));
-
- scm_gc_register_root (&print_state_pool);
- scm_gc_register_root (&scm_print_state_vtable);
- vtable = scm_make_vtable_vtable (scm_nullstr, SCM_INUM0, SCM_EOL);
- layout =
- scm_make_struct_layout (scm_from_locale_string (SCM_PRINT_STATE_LAYOUT));
- type = scm_make_struct (vtable, SCM_INUM0, scm_list_1 (layout));
- scm_set_struct_vtable_name_x (type, scm_from_locale_symbol ("print-state"));
+ type = scm_make_vtable (scm_from_locale_string (SCM_PRINT_STATE_LAYOUT),
+ SCM_BOOL_F);
+ scm_set_struct_vtable_name_x (type, scm_from_latin1_symbol ("print-state"));
scm_print_state_vtable = type;
/* Don't want to bind a wrapper class in GOOPS, so pass 0 as arg1. */
#include "libguile/print.x"
+ scm_init_opts (scm_print_options, scm_print_opts);
+ scm_print_opts[SCM_PRINT_HIGHLIGHT_PREFIX_I].val =
+ SCM_UNPACK (scm_from_locale_string ("{"));
+ scm_print_opts[SCM_PRINT_HIGHLIGHT_SUFFIX_I].val =
+ SCM_UNPACK (scm_from_locale_string ("}"));
scm_print_opts[SCM_PRINT_KEYWORD_STYLE_I].val = SCM_UNPACK (sym_reader);
}