X-Git-Url: http://git.hcoop.net/bpt/guile.git/blobdiff_plain/dea901d66e46041f96d3d3a0f95bf0ab209387c9..8571dbde639e0ee9885bad49c9e180474bd23646:/libguile/print.c diff --git a/libguile/print.c b/libguile/print.c index dcf28c7c2..4e68fd6c4 100644 --- a/libguile/print.c +++ b/libguile/print.c @@ -1,5 +1,6 @@ -/* Copyright (C) 1995-1999,2000,2001, 2002, 2003, 2004, 2006, 2008, 2009, 2010 Free Software Foundation, Inc. - * +/* Copyright (C) 1995-1999, 2000, 2001, 2002, 2003, 2004, 2006, 2008, + * 2009, 2010, 2011, 2012, 2013 Free Software Foundation, Inc. + * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public License * as published by the Free Software Foundation; either version 3 of @@ -23,13 +24,19 @@ #endif #include +#include +#include +#include + #include #include +#include #include "libguile/_scm.h" #include "libguile/chars.h" #include "libguile/continuations.h" #include "libguile/smob.h" +#include "libguile/control.h" #include "libguile/eval.h" #include "libguile/macros.h" #include "libguile/procprop.h" @@ -39,11 +46,11 @@ #include "libguile/alist.h" #include "libguile/struct.h" #include "libguile/ports.h" +#include "libguile/ports-internal.h" #include "libguile/root.h" #include "libguile/strings.h" #include "libguile/strports.h" #include "libguile/vectors.h" -#include "libguile/lang.h" #include "libguile/numbers.h" #include "libguile/vm.h" @@ -54,6 +61,23 @@ +/* Character printers. */ + +#define PORT_CONVERSION_HANDLER(port) \ + SCM_PTAB_ENTRY (port)->ilseq_handler + +static size_t display_string (const void *, int, size_t, SCM, + scm_t_string_failed_conversion_handler); + +static int display_character (scm_t_wchar, SCM, + scm_t_string_failed_conversion_handler); + +static void write_character (scm_t_wchar, SCM, int); + +static void write_character_escaped (scm_t_wchar, int, SCM); + + + /* {Names of immediate symbols} * * This table must agree with the declarations in scm.h: {Immediate Symbols}. @@ -67,7 +91,9 @@ static const char *iflagnames[] = "#", "()", "#t", - "#", + "#", + "#", + "#", "#", "#", "#", @@ -79,19 +105,16 @@ static const char *iflagnames[] = SCM_SYMBOL (sym_reader, "reader"); scm_t_option scm_print_opts[] = { - { SCM_OPTION_SCM, "closure-hook", (unsigned long) SCM_BOOL_F, - "Hook for printing closures (should handle macros as well)." }, - { SCM_OPTION_BOOLEAN, "source", 0, - "Print closures with source." }, - { SCM_OPTION_SCM, "highlight-prefix", (unsigned long)SCM_BOOL_F, + { SCM_OPTION_SCM, "highlight-prefix", (scm_t_bits)SCM_BOOL_F_BITS, "The string to print before highlighted values." }, - { SCM_OPTION_SCM, "highlight-suffix", (unsigned long)SCM_BOOL_F, + { SCM_OPTION_SCM, "highlight-suffix", (scm_t_bits)SCM_BOOL_F_BITS, "The string to print after highlighted values." }, - { SCM_OPTION_SCM, "quote-keywordish-symbols", (unsigned long)SCM_BOOL_F, + { SCM_OPTION_SCM, "quote-keywordish-symbols", (scm_t_bits)SCM_BOOL_F_BITS, "How to print symbols that have a colon as their first or last character. " "The value '#f' does not quote the colons; '#t' quotes them; " - "'reader' quotes them when the reader option 'keywords' is not '#f'." - }, + "'reader' quotes them when the reader option 'keywords' is not '#f'." }, + { SCM_OPTION_BOOLEAN, "escape-newlines", 1, + "Render newlines as \\n when printing using `write'." }, { 0 }, }; @@ -292,15 +315,10 @@ print_circref (SCM port, scm_print_state *pstate, SCM ref) /* Print the name of a symbol. */ static int -quote_keywordish_symbol (SCM symbol) +quote_keywordish_symbols (void) { - SCM option; + SCM option = SCM_PRINT_KEYWORD_STYLE; - if (scm_i_symbol_ref (symbol, 0) != ':' - && scm_i_symbol_ref (symbol, scm_i_symbol_length (symbol) - 1) != ':') - return 0; - - option = SCM_PRINT_KEYWORD_STYLE; if (scm_is_false (option)) return 0; if (scm_is_eq (option, sym_reader)) @@ -308,91 +326,115 @@ quote_keywordish_symbol (SCM symbol) return 1; } -void -scm_i_print_symbol_name (SCM str, SCM port) +#define INITIAL_IDENTIFIER_MASK \ + (UC_CATEGORY_MASK_Lu | UC_CATEGORY_MASK_Ll | UC_CATEGORY_MASK_Lt \ + | UC_CATEGORY_MASK_Lm | UC_CATEGORY_MASK_Lo | UC_CATEGORY_MASK_Mn \ + | UC_CATEGORY_MASK_Nl | UC_CATEGORY_MASK_No | UC_CATEGORY_MASK_Pd \ + | UC_CATEGORY_MASK_Pc | UC_CATEGORY_MASK_Po | UC_CATEGORY_MASK_Sc \ + | UC_CATEGORY_MASK_Sm | UC_CATEGORY_MASK_Sk | UC_CATEGORY_MASK_So \ + | UC_CATEGORY_MASK_Co) + +#define SUBSEQUENT_IDENTIFIER_MASK \ + (INITIAL_IDENTIFIER_MASK \ + | UC_CATEGORY_MASK_Nd | UC_CATEGORY_MASK_Mc | UC_CATEGORY_MASK_Me) + +static int +symbol_has_extended_read_syntax (SCM sym) { - /* This points to the first character that has not yet been written to the - * port. */ - size_t pos = 0; - /* This points to the character we're currently looking at. */ - size_t end; - /* If the name contains weird characters, we'll escape them with - * backslashes and set this flag; it indicates that we should surround the - * name with "#{" and "}#". */ - int weird = 0; - /* Backslashes are not sufficient to make a name weird, but if a name is - * weird because of other characters, backslahes need to be escaped too. - * The first time we see a backslash, we set maybe_weird, and mw_pos points - * to the backslash. Then if the name turns out to be weird, we re-process - * everything starting from mw_pos. - * We could instead make backslashes always weird. This is not necessary - * to ensure that the output is (read)-able, but it would make this code - * simpler and faster. */ - int maybe_weird = 0; - size_t mw_pos = 0; - size_t len = scm_i_symbol_length (str); - scm_t_wchar str0 = scm_i_symbol_ref (str, 0); - - if (len == 0 || str0 == '\'' || str0 == '`' || str0 == ',' - || quote_keywordish_symbol (str) - || (str0 == '.' && len == 1) - || scm_is_true (scm_i_string_to_number (scm_symbol_to_string (str), 10))) + size_t pos, len = scm_i_symbol_length (sym); + scm_t_wchar c; + + /* The empty symbol. */ + if (len == 0) + return 1; + + c = scm_i_symbol_ref (sym, 0); + + /* Single dot; conflicts with dotted-pair notation. */ + if (len == 1 && c == '.') + return 1; + + /* Other initial-character constraints. */ + if (c == '\'' || c == '`' || c == ',' || c == '"' || c == ';' || c == '#') + return 1; + + /* Keywords can be identified by trailing colons too. */ + if (c == ':' || scm_i_symbol_ref (sym, len - 1) == ':') + return quote_keywordish_symbols (); + + /* Number-ish symbols. */ + if (scm_is_true (scm_i_string_to_number (scm_symbol_to_string (sym), 10))) + return 1; + + /* Other disallowed first characters. */ + if (!uc_is_general_category_withtable (c, INITIAL_IDENTIFIER_MASK)) + return 1; + + /* Otherwise, any character that's in the identifier category mask is + fine to pass through as-is, provided it's not one of the ASCII + delimiters like `;'. */ + for (pos = 1; pos < len; pos++) { - scm_lfwrite ("#{", 2, port); - weird = 1; + c = scm_i_symbol_ref (sym, pos); + if (!uc_is_general_category_withtable (c, SUBSEQUENT_IDENTIFIER_MASK)) + return 1; + else if (c == '"' || c == ';' || c == '#') + return 1; } - for (end = pos; end < len; ++end) - switch (scm_i_symbol_ref (str, end)) - { -#ifdef BRACKETS_AS_PARENS - case '[': - case ']': -#endif - case '(': - case ')': - case '"': - case ';': - case '#': - case SCM_WHITE_SPACES: - case SCM_LINE_INCREMENTORS: - weird_handler: - if (maybe_weird) - { - end = mw_pos; - maybe_weird = 0; - } - if (!weird) - { - scm_lfwrite ("#{", 2, port); - weird = 1; - } - if (pos < end) - scm_lfwrite_substr (scm_symbol_to_string (str), pos, end, port); - { - char buf[2]; - buf[0] = '\\'; - buf[1] = (char) (unsigned char) scm_i_symbol_ref (str, end); - scm_lfwrite (buf, 2, port); - } - pos = end + 1; - break; - case '\\': - if (weird) - goto weird_handler; - if (!maybe_weird) - { - maybe_weird = 1; - mw_pos = pos; - } - break; - default: - break; - } - if (pos < end) - scm_lfwrite_substr (scm_symbol_to_string (str), pos, end, port); - if (weird) - scm_lfwrite ("}#", 2, port); + return 0; +} + +static void +print_normal_symbol (SCM sym, SCM port) +{ + scm_display (scm_symbol_to_string (sym), port); +} + +static void +print_extended_symbol (SCM sym, SCM port) +{ + size_t pos, len; + scm_t_string_failed_conversion_handler strategy; + + len = scm_i_symbol_length (sym); + strategy = PORT_CONVERSION_HANDLER (port); + + scm_lfwrite ("#{", 2, port); + + for (pos = 0; pos < len; pos++) + { + scm_t_wchar c = scm_i_symbol_ref (sym, pos); + + if (uc_is_general_category_withtable (c, + SUBSEQUENT_IDENTIFIER_MASK + | UC_CATEGORY_MASK_Zs)) + { + if (!display_character (c, port, strategy) + || (c == '\\' && !display_character (c, port, strategy))) + scm_encoding_error ("print_extended_symbol", errno, + "cannot convert to output locale", + port, SCM_MAKE_CHAR (c)); + } + else + { + display_string ("\\x", 1, 2, port, iconveh_question_mark); + scm_intprint (c, 16, port); + display_character (';', port, iconveh_question_mark); + } + } + + scm_lfwrite ("}#", 2, port); +} + +/* FIXME: allow R6RS hex escapes instead of #{...}#. */ +void +scm_i_print_symbol_name (SCM sym, SCM port) +{ + if (symbol_has_extended_read_syntax (sym)) + print_extended_symbol (sym, port); + else + print_normal_symbol (sym, port); } void @@ -459,79 +501,16 @@ iprin1 (SCM exp, SCM port, scm_print_state *pstate) case scm_tc3_imm24: if (SCM_CHARP (exp)) { - scm_t_wchar i = SCM_CHAR (exp); - const char *name; - if (SCM_WRITINGP (pstate)) + write_character (SCM_CHAR (exp), port, 0); + else { - scm_puts ("#\\", port); - name = scm_i_charname (exp); - if (name != NULL) - scm_puts (name, port); - else if (uc_is_general_category_withtable (i, UC_CATEGORY_MASK_L - | UC_CATEGORY_MASK_M - | UC_CATEGORY_MASK_N - | UC_CATEGORY_MASK_P - | UC_CATEGORY_MASK_S)) - /* Print the character if is graphic character. */ - { - scm_t_wchar *wbuf; - SCM wstr; - char *buf; - size_t len; - const char *enc; - - enc = scm_i_get_port_encoding (port); - if (uc_combining_class (i) == UC_CCC_NR) - { - wstr = scm_i_make_wide_string (1, &wbuf); - wbuf[0] = i; - } - else - { - /* Character is a combining character: print it connected - to a dotted circle instead of connecting it to the - backslash in '#\' */ - wstr = scm_i_make_wide_string (2, &wbuf); - wbuf[0] = SCM_CODEPOINT_DOTTED_CIRCLE; - wbuf[1] = i; - } - if (enc == NULL) - { - if (i <= 0xFF) - /* Character is graphic and Latin-1. Print it */ - scm_lfwrite_str (wstr, port); - else - /* Character is graphic but unrepresentable in - this port's encoding. */ - PRINT_CHAR_ESCAPE (i, port); - } - else - { - buf = u32_conv_to_encoding (enc, - iconveh_error, - (scm_t_uint32 *) wbuf, - 1, - NULL, - NULL, &len); - if (buf != NULL) - { - /* Character is graphic. Print it. */ - scm_lfwrite_str (wstr, port); - free (buf); - } - else - /* Character is graphic but unrepresentable in - this port's encoding. */ - PRINT_CHAR_ESCAPE (i, port); - } - } - else - /* Character is a non-graphical character. */ - PRINT_CHAR_ESCAPE (i, port); + if (!display_character (SCM_CHAR (exp), port, + PORT_CONVERSION_HANDLER (port))) + scm_encoding_error (__func__, errno, + "cannot convert to output locale", + port, exp); } - else - scm_i_charprint (i, port); } else if (SCM_IFLAGP (exp) && ((size_t) SCM_IFLAGNUM (exp) < (sizeof iflagnames / sizeof (char *)))) @@ -553,7 +532,7 @@ iprin1 (SCM exp, SCM port, scm_print_state *pstate) if (SCM_OBJ_CLASS_FLAGS (exp) & SCM_CLASSF_GOOPS) { SCM pwps, print = pstate->writingp ? g_write : g_display; - if (!print) + if (SCM_UNPACK (print) == 0) goto print_struct; pwps = scm_i_port_with_print_state (port, pstate->handle); pstate->revealed = 1; @@ -595,137 +574,31 @@ iprin1 (SCM exp, SCM port, scm_print_state *pstate) case scm_tc7_string: if (SCM_WRITINGP (pstate)) { - size_t i, len; - static char const hex[] = "0123456789abcdef"; - char buf[9]; - + size_t len, i; - scm_putc ('"', port); + display_character ('"', port, iconveh_question_mark); len = scm_i_string_length (exp); for (i = 0; i < len; ++i) - { - scm_t_wchar ch = scm_i_string_ref (exp, i); - int printed = 0; - - if (ch == ' ' || ch == '\n') - { - scm_putc (ch, port); - printed = 1; - } - else if (ch == '"' || ch == '\\') - { - scm_putc ('\\', port); - scm_i_charprint (ch, port); - printed = 1; - } - else - if (uc_is_general_category_withtable - (ch, - UC_CATEGORY_MASK_L | UC_CATEGORY_MASK_M | - UC_CATEGORY_MASK_N | UC_CATEGORY_MASK_P | - UC_CATEGORY_MASK_S)) - { - /* Print the character since it is a graphic - character. */ - scm_t_wchar *wbuf; - SCM wstr = scm_i_make_wide_string (1, &wbuf); - char *buf; - size_t len; - - if (scm_i_get_port_encoding (port)) - { - wstr = scm_i_make_wide_string (1, &wbuf); - wbuf[0] = ch; - buf = u32_conv_to_encoding (scm_i_get_port_encoding (port), - iconveh_error, - (scm_t_uint32 *) wbuf, - 1 , - NULL, - NULL, &len); - if (buf != NULL) - { - /* Character is graphic and representable in - this encoding. Print it. */ - scm_lfwrite_str (wstr, port); - free (buf); - printed = 1; - } - } - else - if (ch <= 0xFF) - { - scm_putc (ch, port); - printed = 1; - } - } - - if (!printed) - { - /* Character is graphic but unrepresentable in - this port's encoding or is not graphic. */ - if (!SCM_R6RS_ESCAPES_P) - { - if (ch <= 0xFF) - { - buf[0] = '\\'; - buf[1] = 'x'; - buf[2] = hex[ch / 16]; - buf[3] = hex[ch % 16]; - scm_lfwrite (buf, 4, port); - } - else if (ch <= 0xFFFF) - { - buf[0] = '\\'; - buf[1] = 'u'; - buf[2] = hex[(ch & 0xF000) >> 12]; - buf[3] = hex[(ch & 0xF00) >> 8]; - buf[4] = hex[(ch & 0xF0) >> 4]; - buf[5] = hex[(ch & 0xF)]; - scm_lfwrite (buf, 6, port); - } - else if (ch > 0xFFFF) - { - buf[0] = '\\'; - buf[1] = 'U'; - buf[2] = hex[(ch & 0xF00000) >> 20]; - buf[3] = hex[(ch & 0xF0000) >> 16]; - buf[4] = hex[(ch & 0xF000) >> 12]; - buf[5] = hex[(ch & 0xF00) >> 8]; - buf[6] = hex[(ch & 0xF0) >> 4]; - buf[7] = hex[(ch & 0xF)]; - scm_lfwrite (buf, 8, port); - } - } - else - { - scm_t_wchar ch2 = ch; - - /* Print an R6RS variable-length hex escape: "\xNNNN;" - */ - int i = 8; - buf[i] = ';'; - i --; - if (ch == 0) - buf[i--] = '0'; - else - while (ch2 > 0) - { - buf[i] = hex[ch2 & 0xF]; - ch2 >>= 4; - i --; - } - buf[i] = 'x'; - i --; - buf[i] = '\\'; - scm_lfwrite (buf + i, 9 - i, port); - } - } - } - scm_putc ('"', port); + write_character (scm_i_string_ref (exp, i), port, 1); + + display_character ('"', port, iconveh_question_mark); scm_remember_upto_here_1 (exp); } else - scm_lfwrite_str (exp, port); + { + size_t len, printed; + + len = scm_i_string_length (exp); + printed = display_string (scm_i_string_data (exp), + scm_i_is_narrow_string (exp), + len, port, + PORT_CONVERSION_HANDLER (port)); + if (SCM_UNLIKELY (printed < len)) + scm_encoding_error (__func__, errno, + "cannot convert to output locale", + port, scm_c_string_ref (exp, printed)); + } + scm_remember_upto_here_1 (exp); break; case scm_tc7_symbol: @@ -749,8 +622,8 @@ iprin1 (SCM exp, SCM port, scm_print_state *pstate) case scm_tc7_program: scm_i_program_print (exp, port, pstate); break; - case scm_tc7_foreign: - scm_i_foreign_print (exp, port, pstate); + case scm_tc7_pointer: + scm_i_pointer_print (exp, port, pstate); break; case scm_tc7_hashtable: scm_i_hashtable_print (exp, port, pstate); @@ -773,6 +646,23 @@ iprin1 (SCM exp, SCM port, scm_print_state *pstate) case scm_tc7_vm_cont: scm_i_vm_cont_print (exp, port, pstate); break; + case scm_tc7_prompt: + scm_i_prompt_print (exp, port, pstate); + break; + case scm_tc7_with_fluids: + scm_i_with_fluids_print (exp, port, pstate); + break; + case scm_tc7_array: + ENTER_NESTED_DATA (pstate, exp, circref); + scm_i_print_array (exp, port, pstate); + EXIT_NESTED_DATA (pstate); + break; + case scm_tc7_bytevector: + scm_i_print_bytevector (exp, port, pstate); + break; + case scm_tc7_bitvector: + scm_i_print_bitvector (exp, port, pstate); + break; case scm_tc7_wvect: ENTER_NESTED_DATA (pstate, exp, circref); if (SCM_IS_WHVEC (exp)) @@ -780,10 +670,6 @@ iprin1 (SCM exp, SCM port, scm_print_state *pstate) else scm_puts ("#w(", port); goto common_vector_printer; - - case scm_tc7_bytevector: - scm_i_print_bytevector (exp, port, pstate); - break; case scm_tc7_vector: ENTER_NESTED_DATA (pstate, exp, circref); scm_puts ("#(", port); @@ -909,18 +795,415 @@ scm_prin1 (SCM exp, SCM port, int writingp) } } -/* Print a character. - */ -void -scm_i_charprint (scm_t_wchar ch, SCM port) +/* Convert codepoint CH to UTF-8 and store the result in UTF8. Return + the number of bytes of the UTF-8-encoded string. */ +static size_t +codepoint_to_utf8 (scm_t_wchar ch, scm_t_uint8 utf8[4]) +{ + size_t len; + scm_t_uint32 codepoint; + + codepoint = (scm_t_uint32) ch; + + if (codepoint <= 0x7f) + { + len = 1; + utf8[0] = (scm_t_uint8) codepoint; + } + else if (codepoint <= 0x7ffUL) + { + len = 2; + utf8[0] = 0xc0 | (codepoint >> 6); + utf8[1] = 0x80 | (codepoint & 0x3f); + } + else if (codepoint <= 0xffffUL) + { + len = 3; + utf8[0] = 0xe0 | (codepoint >> 12); + utf8[1] = 0x80 | ((codepoint >> 6) & 0x3f); + utf8[2] = 0x80 | (codepoint & 0x3f); + } + else + { + len = 4; + utf8[0] = 0xf0 | (codepoint >> 18); + utf8[1] = 0x80 | ((codepoint >> 12) & 0x3f); + utf8[2] = 0x80 | ((codepoint >> 6) & 0x3f); + utf8[3] = 0x80 | (codepoint & 0x3f); + } + + return len; +} + +#define STR_REF(s, x) \ + (narrow_p \ + ? (scm_t_wchar) ((unsigned char *) (s))[x] \ + : ((scm_t_wchar *) (s))[x]) + +/* Write STR to PORT as UTF-8. STR is a LEN-codepoint string; it is + narrow if NARROW_P is true, wide otherwise. Return LEN. */ +static size_t +display_string_as_utf8 (const void *str, int narrow_p, size_t len, + SCM port) +{ + size_t printed = 0; + + while (len > printed) + { + size_t utf8_len, i; + char *input, utf8_buf[256]; + + /* Convert STR to UTF-8. */ + for (i = printed, utf8_len = 0, input = utf8_buf; + i < len && utf8_len + 4 < sizeof (utf8_buf); + i++) + { + utf8_len += codepoint_to_utf8 (STR_REF (str, i), + (scm_t_uint8 *) input); + input = utf8_buf + utf8_len; + } + + /* INPUT was successfully converted, entirely; print the + result. */ + scm_lfwrite (utf8_buf, utf8_len, port); + printed += i - printed; + } + + assert (printed == len); + + return len; +} + +/* Convert STR through PORT's output conversion descriptor and write the + output to PORT. Return the number of codepoints written. */ +static size_t +display_string_using_iconv (const void *str, int narrow_p, size_t len, + SCM port, + scm_t_string_failed_conversion_handler strategy) +{ + size_t printed; + scm_t_iconv_descriptors *id; + scm_t_port_internal *pti = SCM_PORT_GET_INTERNAL (port); + + id = scm_i_port_iconv_descriptors (port, SCM_PORT_WRITE); + + if (SCM_UNLIKELY (pti->at_stream_start_for_bom_write && len > 0)) + { + scm_t_port *pt = SCM_PTAB_ENTRY (port); + + /* Record that we're no longer at stream start. */ + pti->at_stream_start_for_bom_write = 0; + if (pt->rw_random) + pti->at_stream_start_for_bom_read = 0; + + /* Write a BOM if appropriate. */ + if (SCM_UNLIKELY (c_strcasecmp(pt->encoding, "UTF-16") == 0 + || c_strcasecmp(pt->encoding, "UTF-32") == 0)) + display_character (SCM_UNICODE_BOM, port, iconveh_error); + } + + printed = 0; + + while (len > printed) + { + size_t done, utf8_len, input_left, output_left, i; + size_t codepoints_read, output_len; + char *input, *output; + char utf8_buf[256], encoded_output[256]; + size_t offsets[256]; + + /* Convert STR to UTF-8. */ + for (i = printed, utf8_len = 0, input = utf8_buf; + i < len && utf8_len + 4 < sizeof (utf8_buf); + i++) + { + offsets[utf8_len] = i; + utf8_len += codepoint_to_utf8 (STR_REF (str, i), + (scm_t_uint8 *) input); + input = utf8_buf + utf8_len; + } + + input = utf8_buf; + input_left = utf8_len; + + output = encoded_output; + output_left = sizeof (encoded_output); + + done = iconv (id->output_cd, &input, &input_left, + &output, &output_left); + + output_len = sizeof (encoded_output) - output_left; + + if (SCM_UNLIKELY (done == (size_t) -1)) + { + int errno_save = errno; + + /* Reset the `iconv' state. */ + iconv (id->output_cd, NULL, NULL, NULL, NULL); + + /* Print the OUTPUT_LEN bytes successfully converted. */ + scm_lfwrite (encoded_output, output_len, port); + + /* See how many input codepoints these OUTPUT_LEN bytes + corresponds to. */ + codepoints_read = offsets[input - utf8_buf] - printed; + printed += codepoints_read; + + if (errno_save == EILSEQ && + strategy != SCM_FAILED_CONVERSION_ERROR) + { + /* Conversion failed somewhere in INPUT and we want to + escape or substitute the offending input character. */ + + if (strategy == SCM_FAILED_CONVERSION_ESCAPE_SEQUENCE) + { + scm_t_wchar ch; + + /* Find CH, the offending codepoint, and escape it. */ + ch = STR_REF (str, offsets[input - utf8_buf]); + write_character_escaped (ch, 1, port); + } + else + /* STRATEGY is `SCM_FAILED_CONVERSION_QUESTION_MARK'. */ + display_string ("?", 1, 1, port, strategy); + + printed++; + } + else + /* Something bad happened that we can't handle: bail out. */ + break; + } + else + { + /* INPUT was successfully converted, entirely; print the + result. */ + scm_lfwrite (encoded_output, output_len, port); + codepoints_read = i - printed; + printed += codepoints_read; + } + } + + return printed; +} + +#undef STR_REF + +/* Display the LEN codepoints in STR to PORT according to STRATEGY; + return the number of codepoints successfully displayed. If NARROW_P, + then STR is interpreted as a sequence of `char', denoting a Latin-1 + string; otherwise it's interpreted as a sequence of + `scm_t_wchar'. */ +static size_t +display_string (const void *str, int narrow_p, + size_t len, SCM port, + scm_t_string_failed_conversion_handler strategy) + { - scm_t_wchar *wbuf; - SCM wstr = scm_i_make_wide_string (1, &wbuf); + scm_t_port_internal *pti; - wbuf[0] = ch; - scm_lfwrite_str (wstr, port); + pti = SCM_PORT_GET_INTERNAL (port); + + if (pti->encoding_mode == SCM_PORT_ENCODING_MODE_UTF8) + return display_string_as_utf8 (str, narrow_p, len, port); + else + return display_string_using_iconv (str, narrow_p, len, + port, strategy); } +/* Attempt to display CH to PORT according to STRATEGY. Return non-zero + if CH was successfully displayed, zero otherwise (e.g., if it was not + representable in PORT's encoding.) */ +static int +display_character (scm_t_wchar ch, SCM port, + scm_t_string_failed_conversion_handler strategy) +{ + return display_string (&ch, 0, 1, port, strategy) == 1; +} + +/* Attempt to pretty-print CH, a combining character, to PORT. Return + zero upon failure, non-zero otherwise. The idea is to print CH above + a dotted circle to make it more visible. */ +static int +write_combining_character (scm_t_wchar ch, SCM port) +{ + scm_t_wchar str[2]; + + str[0] = SCM_CODEPOINT_DOTTED_CIRCLE; + str[1] = ch; + + return display_string (str, 0, 2, port, iconveh_error) == 2; +} + +/* Write CH to PORT in its escaped form, using the string escape syntax + if STRING_ESCAPES_P is non-zero. */ +static void +write_character_escaped (scm_t_wchar ch, int string_escapes_p, SCM port) +{ + if (string_escapes_p) + { + /* Represent CH using the in-string escape syntax. */ + + static const char hex[] = "0123456789abcdef"; + static const char escapes[7] = "abtnvfr"; + char buf[9]; + + if (ch >= 0x07 && ch <= 0x0D && ch != 0x0A) + { + /* Use special escapes for some C0 controls. */ + buf[0] = '\\'; + buf[1] = escapes[ch - 0x07]; + scm_lfwrite (buf, 2, port); + } + else if (!SCM_R6RS_ESCAPES_P) + { + if (ch <= 0xFF) + { + buf[0] = '\\'; + buf[1] = 'x'; + buf[2] = hex[ch / 16]; + buf[3] = hex[ch % 16]; + scm_lfwrite (buf, 4, port); + } + else if (ch <= 0xFFFF) + { + buf[0] = '\\'; + buf[1] = 'u'; + buf[2] = hex[(ch & 0xF000) >> 12]; + buf[3] = hex[(ch & 0xF00) >> 8]; + buf[4] = hex[(ch & 0xF0) >> 4]; + buf[5] = hex[(ch & 0xF)]; + scm_lfwrite (buf, 6, port); + } + else if (ch > 0xFFFF) + { + buf[0] = '\\'; + buf[1] = 'U'; + buf[2] = hex[(ch & 0xF00000) >> 20]; + buf[3] = hex[(ch & 0xF0000) >> 16]; + buf[4] = hex[(ch & 0xF000) >> 12]; + buf[5] = hex[(ch & 0xF00) >> 8]; + buf[6] = hex[(ch & 0xF0) >> 4]; + buf[7] = hex[(ch & 0xF)]; + scm_lfwrite (buf, 8, port); + } + } + else + { + /* Print an R6RS variable-length hex escape: "\xNNNN;". */ + scm_t_wchar ch2 = ch; + + int i = 8; + buf[i] = ';'; + i --; + if (ch == 0) + buf[i--] = '0'; + else + while (ch2 > 0) + { + buf[i] = hex[ch2 & 0xF]; + ch2 >>= 4; + i --; + } + buf[i] = 'x'; + i --; + buf[i] = '\\'; + scm_lfwrite (buf + i, 9 - i, port); + } + } + else + { + /* Represent CH using the character escape syntax. */ + const char *name; + + name = scm_i_charname (SCM_MAKE_CHAR (ch)); + if (name != NULL) + scm_puts (name, port); + else + PRINT_CHAR_ESCAPE (ch, port); + } +} + +/* Write CH to PORT, escaping it if it's non-graphic or not + representable in PORT's encoding. If STRING_ESCAPES_P is true and CH + needs to be escaped, it is escaped using the in-string escape syntax; + otherwise the character escape syntax is used. */ +static void +write_character (scm_t_wchar ch, SCM port, int string_escapes_p) +{ + int printed = 0; + scm_t_string_failed_conversion_handler strategy; + + strategy = PORT_CONVERSION_HANDLER (port); + + if (string_escapes_p) + { + /* Check if CH deserves special treatment. */ + if (ch == '"' || ch == '\\') + { + display_character ('\\', port, iconveh_question_mark); + display_character (ch, port, strategy); + printed = 1; + } + else if (ch == '\n' && SCM_PRINT_ESCAPE_NEWLINES_P) + { + display_character ('\\', port, iconveh_question_mark); + display_character ('n', port, strategy); + printed = 1; + } + else if (ch == ' ' || ch == '\n') + { + display_character (ch, port, strategy); + printed = 1; + } + } + else + { + display_string ("#\\", 1, 2, port, iconveh_question_mark); + + if (uc_combining_class (ch) != UC_CCC_NR) + /* Character is a combining character, so attempt to + pretty-print it. */ + printed = write_combining_character (ch, port); + } + + if (!printed + && uc_is_general_category_withtable (ch, + UC_CATEGORY_MASK_L | + UC_CATEGORY_MASK_M | + UC_CATEGORY_MASK_N | + UC_CATEGORY_MASK_P | + UC_CATEGORY_MASK_S)) + /* CH is graphic; attempt to display it. */ + printed = display_character (ch, port, iconveh_error); + + if (!printed) + /* CH isn't graphic or cannot be represented in PORT's encoding. */ + write_character_escaped (ch, string_escapes_p, port); +} + +/* Display STR to PORT from START inclusive to END exclusive. */ +void +scm_i_display_substring (SCM str, size_t start, size_t end, SCM port) +{ + int narrow_p; + const char *buf; + size_t len, printed; + + buf = scm_i_string_data (str); + len = end - start; + narrow_p = scm_i_is_narrow_string (str); + buf += start * (narrow_p ? sizeof (char) : sizeof (scm_t_wchar)); + + printed = display_string (buf, narrow_p, end - start, port, + PORT_CONVERSION_HANDLER (port)); + + if (SCM_UNLIKELY (printed < len)) + scm_encoding_error (__func__, errno, + "cannot convert to output locale", + port, scm_c_string_ref (str, printed + start)); +} + + /* Print an integer. */ @@ -1081,14 +1364,6 @@ scm_write (SCM obj, SCM port) SCM_ASSERT (scm_valid_oport_value_p (port), port, SCM_ARG2, s_write); scm_prin1 (obj, port, 1); -#if 0 -#ifdef HAVE_PIPE -# ifdef EPIPE - if (EPIPE == errno) - scm_close_port (port); -# endif -#endif -#endif return SCM_UNSPECIFIED; } @@ -1104,14 +1379,6 @@ scm_display (SCM obj, SCM port) SCM_ASSERT (scm_valid_oport_value_p (port), port, SCM_ARG2, s_display); scm_prin1 (obj, port, 0); -#if 0 -#ifdef HAVE_PIPE -# ifdef EPIPE - if (EPIPE == errno) - scm_close_port (port); -# endif -#endif -#endif return SCM_UNSPECIFIED; } @@ -1123,7 +1390,7 @@ SCM_DEFINE (scm_simple_format, "simple-format", 2, 0, 1, "@var{message} can contain @code{~A} (was @code{%s}) and\n" "@code{~S} (was @code{%S}) escapes. When printed,\n" "the escapes are replaced with corresponding members of\n" - "@var{ARGS}:\n" + "@var{args}:\n" "@code{~A} formats using @code{display} and @code{~S} formats\n" "using @code{write}.\n" "If @var{destination} is @code{#t}, then use the current output\n" @@ -1143,8 +1410,7 @@ SCM_DEFINE (scm_simple_format, "simple-format", 2, 0, 1, else if (scm_is_false (destination)) { fReturnString = 1; - port = scm_mkstrport (SCM_INUM0, - scm_make_string (SCM_INUM0, SCM_UNDEFINED), + port = scm_mkstrport (SCM_INUM0, SCM_BOOL_F, SCM_OPN | SCM_WRTNG, FUNC_NAME); destination = port; @@ -1240,16 +1506,14 @@ SCM_DEFINE (scm_write_char, "write-char", 1, 1, 0, SCM_VALIDATE_CHAR (1, chr); SCM_VALIDATE_OPORT_VALUE (2, port); - - scm_i_charprint (SCM_CHAR (chr), SCM_COERCE_OUTPORT (port)); -#if 0 -#ifdef HAVE_PIPE -# ifdef EPIPE - if (EPIPE == errno) - scm_close_port (port); -# endif -#endif -#endif + + port = SCM_COERCE_OUTPORT (port); + if (!display_character (SCM_CHAR (chr), port, + PORT_CONVERSION_HANDLER (port))) + scm_encoding_error (__func__, errno, + "cannot convert to output locale", + port, chr); + return SCM_UNSPECIFIED; } #undef FUNC_NAME @@ -1317,22 +1581,13 @@ SCM_DEFINE (scm_get_print_state, "get-print-state", 1, 0, 0, void scm_init_print () { - SCM vtable, layout, type; - - scm_init_opts (scm_print_options, scm_print_opts); - - scm_print_options (scm_list_4 (scm_from_locale_symbol ("highlight-prefix"), - scm_from_locale_string ("{"), - scm_from_locale_symbol ("highlight-suffix"), - scm_from_locale_string ("}"))); + SCM type; scm_gc_register_root (&print_state_pool); scm_gc_register_root (&scm_print_state_vtable); - vtable = scm_make_vtable_vtable (scm_nullstr, SCM_INUM0, SCM_EOL); - layout = - scm_make_struct_layout (scm_from_locale_string (SCM_PRINT_STATE_LAYOUT)); - type = scm_make_struct (vtable, SCM_INUM0, scm_list_1 (layout)); - scm_set_struct_vtable_name_x (type, scm_from_locale_symbol ("print-state")); + type = scm_make_vtable (scm_from_locale_string (SCM_PRINT_STATE_LAYOUT), + SCM_BOOL_F); + scm_set_struct_vtable_name_x (type, scm_from_latin1_symbol ("print-state")); scm_print_state_vtable = type; /* Don't want to bind a wrapper class in GOOPS, so pass 0 as arg1. */ @@ -1341,6 +1596,11 @@ scm_init_print () #include "libguile/print.x" + scm_init_opts (scm_print_options, scm_print_opts); + scm_print_opts[SCM_PRINT_HIGHLIGHT_PREFIX_I].val = + SCM_UNPACK (scm_from_locale_string ("{")); + scm_print_opts[SCM_PRINT_HIGHLIGHT_SUFFIX_I].val = + SCM_UNPACK (scm_from_locale_string ("}")); scm_print_opts[SCM_PRINT_KEYWORD_STYLE_I].val = SCM_UNPACK (sym_reader); }