X-Git-Url: http://git.hcoop.net/bpt/guile.git/blobdiff_plain/7af531508c5931261ff8957708642cac67bf86a5..91ee7515da0bad91330ce5c87b250d6cf12a2789:/libguile/print.c diff --git a/libguile/print.c b/libguile/print.c index c38eba76e..a1c36eb94 100644 --- a/libguile/print.c +++ b/libguile/print.c @@ -1,5 +1,6 @@ -/* Copyright (C) 1995-1999,2000,2001, 2002, 2003, 2004, 2006, 2008, 2009 Free Software Foundation, Inc. - * +/* Copyright (C) 1995-1999, 2000, 2001, 2002, 2003, 2004, 2006, 2008, + * 2009, 2010, 2011 Free Software Foundation, Inc. + * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public License * as published by the Free Software Foundation; either version 3 of @@ -23,6 +24,10 @@ #endif #include +#include +#include +#include + #include #include @@ -30,22 +35,21 @@ #include "libguile/chars.h" #include "libguile/continuations.h" #include "libguile/smob.h" +#include "libguile/control.h" #include "libguile/eval.h" #include "libguile/macros.h" #include "libguile/procprop.h" #include "libguile/read.h" -#include "libguile/weaks.h" #include "libguile/programs.h" #include "libguile/alist.h" #include "libguile/struct.h" -#include "libguile/objects.h" #include "libguile/ports.h" #include "libguile/root.h" #include "libguile/strings.h" #include "libguile/strports.h" #include "libguile/vectors.h" -#include "libguile/lang.h" #include "libguile/numbers.h" +#include "libguile/vm.h" #include "libguile/validate.h" #include "libguile/print.h" @@ -54,6 +58,20 @@ +/* Character printers. */ + +static size_t display_string (const void *, int, size_t, SCM, + scm_t_string_failed_conversion_handler); + +static int display_character (scm_t_wchar, SCM, + scm_t_string_failed_conversion_handler); + +static void write_character (scm_t_wchar, SCM, int); + +static void write_character_escaped (scm_t_wchar, int, SCM); + + + /* {Names of immediate symbols} * * This table must agree with the declarations in scm.h: {Immediate Symbols}. @@ -63,38 +81,35 @@ static const char *iflagnames[] = { "#f", + "#nil", /* Elisp nil value. Should print from elisp as symbol `nil'. */ + "#", + "()", "#t", + "#", + "#", + "#", + "#", "#", "#", - "()", - "#", /* Unbound slot marker for GOOPS. For internal use in GOOPS only. */ "#", - - /* Elisp nil value. This is its Scheme name; whenever it's printed in - * Elisp, it should appear as the symbol `nil'. */ - "#nil" }; SCM_SYMBOL (sym_reader, "reader"); scm_t_option scm_print_opts[] = { - { SCM_OPTION_SCM, "closure-hook", SCM_UNPACK (SCM_BOOL_F), - "Hook for printing closures (should handle macros as well)." }, - { SCM_OPTION_BOOLEAN, "source", 0, - "Print closures with source." }, - { SCM_OPTION_SCM, "highlight-prefix", (unsigned long)SCM_BOOL_F, + { SCM_OPTION_SCM, "highlight-prefix", (scm_t_bits)SCM_BOOL_F_BITS, "The string to print before highlighted values." }, - { SCM_OPTION_SCM, "highlight-suffix", (unsigned long)SCM_BOOL_F, + { SCM_OPTION_SCM, "highlight-suffix", (scm_t_bits)SCM_BOOL_F_BITS, "The string to print after highlighted values." }, - { SCM_OPTION_SCM, "quote-keywordish-symbols", (unsigned long)SCM_BOOL_F, + { SCM_OPTION_SCM, "quote-keywordish-symbols", (scm_t_bits)SCM_BOOL_F_BITS, "How to print symbols that have a colon as their first or last character. " "The value '#f' does not quote the colons; '#t' quotes them; " - "'reader' quotes them when the reader option 'keywords' is not '#f'." - }, + "'reader' quotes them when the reader option 'keywords' is not '#f'." }, + { SCM_OPTION_BOOLEAN, "escape-newlines", 1, + "Render newlines as \\n when printing using `write'." }, { 0 }, - }; SCM_DEFINE (scm_print_options, "print-options-interface", 0, 1, 0, @@ -142,7 +157,7 @@ do \ { \ if (pstate->top - pstate->list_offset >= pstate->level) \ { \ - scm_putc ('#', port); \ + scm_putc_unlocked ('#', port); \ return; \ } \ } \ @@ -286,23 +301,18 @@ print_circref (SCM port, scm_print_state *pstate, SCM ref) for (i = pstate->top - 1; 1; --i) if (scm_is_eq (PSTATE_STACK_REF(pstate, i), ref)) break; - scm_putc ('#', port); + scm_putc_unlocked ('#', port); scm_intprint (i - self, 10, port); - scm_putc ('#', port); + scm_putc_unlocked ('#', port); } /* Print the name of a symbol. */ static int -quote_keywordish_symbol (SCM symbol) +quote_keywordish_symbols (void) { - SCM option; + SCM option = SCM_PRINT_KEYWORD_STYLE; - if (scm_i_symbol_ref (symbol, 0) != ':' - && scm_i_symbol_ref (symbol, scm_i_symbol_length (symbol) - 1) != ':') - return 0; - - option = SCM_PRINT_KEYWORD_STYLE; if (scm_is_false (option)) return 0; if (scm_is_eq (option, sym_reader)) @@ -310,98 +320,121 @@ quote_keywordish_symbol (SCM symbol) return 1; } -void -scm_i_print_symbol_name (SCM str, SCM port) +#define INITIAL_IDENTIFIER_MASK \ + (UC_CATEGORY_MASK_Lu | UC_CATEGORY_MASK_Ll | UC_CATEGORY_MASK_Lt \ + | UC_CATEGORY_MASK_Lm | UC_CATEGORY_MASK_Lo | UC_CATEGORY_MASK_Mn \ + | UC_CATEGORY_MASK_Nl | UC_CATEGORY_MASK_No | UC_CATEGORY_MASK_Pd \ + | UC_CATEGORY_MASK_Pc | UC_CATEGORY_MASK_Po | UC_CATEGORY_MASK_Sc \ + | UC_CATEGORY_MASK_Sm | UC_CATEGORY_MASK_Sk | UC_CATEGORY_MASK_So \ + | UC_CATEGORY_MASK_Co) + +#define SUBSEQUENT_IDENTIFIER_MASK \ + (INITIAL_IDENTIFIER_MASK \ + | UC_CATEGORY_MASK_Nd | UC_CATEGORY_MASK_Mc | UC_CATEGORY_MASK_Me) + +static int +symbol_has_extended_read_syntax (SCM sym) { - /* This points to the first character that has not yet been written to the - * port. */ - size_t pos = 0; - /* This points to the character we're currently looking at. */ - size_t end; - /* If the name contains weird characters, we'll escape them with - * backslashes and set this flag; it indicates that we should surround the - * name with "#{" and "}#". */ - int weird = 0; - /* Backslashes are not sufficient to make a name weird, but if a name is - * weird because of other characters, backslahes need to be escaped too. - * The first time we see a backslash, we set maybe_weird, and mw_pos points - * to the backslash. Then if the name turns out to be weird, we re-process - * everything starting from mw_pos. - * We could instead make backslashes always weird. This is not necessary - * to ensure that the output is (read)-able, but it would make this code - * simpler and faster. */ - int maybe_weird = 0; - size_t mw_pos = 0; - size_t len = scm_i_symbol_length (str); - scm_t_wchar str0 = scm_i_symbol_ref (str, 0); - - if (len == 0 || str0 == '\'' || str0 == '`' || str0 == ',' - || quote_keywordish_symbol (str) - || (str0 == '.' && len == 1) - || scm_is_true (scm_i_string_to_number (scm_symbol_to_string (str), 10))) + size_t pos, len = scm_i_symbol_length (sym); + scm_t_wchar c; + + /* The empty symbol. */ + if (len == 0) + return 1; + + c = scm_i_symbol_ref (sym, 0); + + /* Single dot; conflicts with dotted-pair notation. */ + if (len == 1 && c == '.') + return 1; + + /* Other initial-character constraints. */ + if (c == '\'' || c == '`' || c == ',' || c == '"' || c == ';' || c == '#') + return 1; + + /* Keywords can be identified by trailing colons too. */ + if (c == ':' || scm_i_symbol_ref (sym, len - 1) == ':') + return quote_keywordish_symbols (); + + /* Number-ish symbols. */ + if (scm_is_true (scm_i_string_to_number (scm_symbol_to_string (sym), 10))) + return 1; + + /* Other disallowed first characters. */ + if (!uc_is_general_category_withtable (c, INITIAL_IDENTIFIER_MASK)) + return 1; + + /* Otherwise, any character that's in the identifier category mask is + fine to pass through as-is, provided it's not one of the ASCII + delimiters like `;'. */ + for (pos = 1; pos < len; pos++) { - scm_lfwrite ("#{", 2, port); - weird = 1; + c = scm_i_symbol_ref (sym, pos); + if (!uc_is_general_category_withtable (c, SUBSEQUENT_IDENTIFIER_MASK)) + return 1; + else if (c == '"' || c == ';' || c == '#') + return 1; } - for (end = pos; end < len; ++end) - switch (scm_i_symbol_ref (str, end)) - { -#ifdef BRACKETS_AS_PARENS - case '[': - case ']': -#endif - case '(': - case ')': - case '"': - case ';': - case '#': - case SCM_WHITE_SPACES: - case SCM_LINE_INCREMENTORS: - weird_handler: - if (maybe_weird) - { - end = mw_pos; - maybe_weird = 0; - } - if (!weird) - { - scm_lfwrite ("#{", 2, port); - weird = 1; - } - if (pos < end) - scm_lfwrite_substr (scm_symbol_to_string (str), pos, end, port); - { - char buf[2]; - buf[0] = '\\'; - buf[1] = (char) (unsigned char) scm_i_symbol_ref (str, end); - scm_lfwrite (buf, 2, port); - } - pos = end + 1; - break; - case '\\': - if (weird) - goto weird_handler; - if (!maybe_weird) - { - maybe_weird = 1; - mw_pos = pos; - } - break; - default: - break; - } - if (pos < end) - scm_lfwrite_substr (scm_symbol_to_string (str), pos, end, port); - if (weird) - scm_lfwrite ("}#", 2, port); + return 0; +} + +static void +print_normal_symbol (SCM sym, SCM port) +{ + scm_display (scm_symbol_to_string (sym), port); +} + +static void +print_extended_symbol (SCM sym, SCM port) +{ + size_t pos, len; + scm_t_string_failed_conversion_handler strategy; + + len = scm_i_symbol_length (sym); + strategy = scm_i_get_conversion_strategy (port); + + scm_lfwrite_unlocked ("#{", 2, port); + + for (pos = 0; pos < len; pos++) + { + scm_t_wchar c = scm_i_symbol_ref (sym, pos); + + if (uc_is_general_category_withtable (c, + SUBSEQUENT_IDENTIFIER_MASK + | UC_CATEGORY_MASK_Zs)) + { + if (!display_character (c, port, strategy)) + scm_encoding_error ("print_extended_symbol", errno, + "cannot convert to output locale", + port, SCM_MAKE_CHAR (c)); + } + else + { + display_string ("\\x", 1, 2, port, iconveh_question_mark); + scm_intprint (c, 16, port); + display_character (';', port, iconveh_question_mark); + } + } + + scm_lfwrite_unlocked ("}#", 2, port); +} + +/* FIXME: allow R6RS hex escapes instead of #{...}#. */ +void +scm_i_print_symbol_name (SCM sym, SCM port) +{ + if (symbol_has_extended_read_syntax (sym)) + print_extended_symbol (sym, port); + else + print_normal_symbol (sym, port); } void scm_print_symbol_name (const char *str, size_t len, SCM port) { - SCM symbol = scm_from_locale_symboln (str, len); - return scm_i_print_symbol_name (symbol, port); + SCM symbol = scm_from_utf8_symboln (str, len); + scm_i_print_symbol_name (symbol, port); } /* Print generally. Handles both write and display according to PSTATE. @@ -411,6 +444,22 @@ SCM_GPROC(s_display, "display", 1, 1, 0, scm_display, g_display); static void iprin1 (SCM exp, SCM port, scm_print_state *pstate); + +/* Print a character as an octal or hex escape. */ +#define PRINT_CHAR_ESCAPE(i, port) \ + do \ + { \ + if (!SCM_R6RS_ESCAPES_P) \ + scm_intprint (i, 8, port); \ + else \ + { \ + scm_puts_unlocked ("x", port); \ + scm_intprint (i, 16, port); \ + } \ + } \ + while (0) + + void scm_iprin1 (SCM exp, SCM port, scm_print_state *pstate) { @@ -430,7 +479,6 @@ iprin1 (SCM exp, SCM port, scm_print_state *pstate) { switch (SCM_ITAG3 (exp)) { - case scm_tc3_closure: case scm_tc3_tc7_1: case scm_tc3_tc7_2: /* These tc3 tags should never occur in an immediate value. They are @@ -446,80 +494,22 @@ iprin1 (SCM exp, SCM port, scm_print_state *pstate) case scm_tc3_imm24: if (SCM_CHARP (exp)) { - scm_t_wchar i = SCM_CHAR (exp); - const char *name; - if (SCM_WRITINGP (pstate)) + write_character (SCM_CHAR (exp), port, 0); + else { - scm_puts ("#\\", port); - name = scm_i_charname (exp); - if (name != NULL) - scm_puts (name, port); - else if (uc_is_general_category_withtable (i, UC_CATEGORY_MASK_L - | UC_CATEGORY_MASK_M - | UC_CATEGORY_MASK_N - | UC_CATEGORY_MASK_P - | UC_CATEGORY_MASK_S)) - /* Print the character if is graphic character. */ - { - scm_t_wchar *wbuf; - SCM wstr = scm_i_make_wide_string (1, &wbuf); - char *buf; - size_t len; - const char *enc; - - enc = scm_i_get_port_encoding (port); - wbuf[0] = i; - if (enc == NULL) - { - if (i <= 0xFF) - /* Character is graphic and Latin-1. Print it */ - scm_lfwrite_str (wstr, port); - else - /* Character is graphic but unrepresentable in - this port's encoding. */ - scm_intprint (i, 8, port); - } - else - { - buf = u32_conv_to_encoding (enc, - iconveh_error, - (scm_t_uint32 *) wbuf, - 1, - NULL, - NULL, &len); - if (buf != NULL) - { - /* Character is graphic. Print it. */ - scm_lfwrite_str (wstr, port); - free (buf); - } - else - /* Character is graphic but unrepresentable in - this port's encoding. */ - scm_intprint (i, 8, port); - } - } - else - /* Character is a non-graphical character. */ - scm_intprint (i, 8, port); + if (!display_character (SCM_CHAR (exp), port, + scm_i_get_conversion_strategy (port))) + scm_encoding_error (__func__, errno, + "cannot convert to output locale", + port, exp); } - else - scm_i_charprint (i, port); } else if (SCM_IFLAGP (exp) && ((size_t) SCM_IFLAGNUM (exp) < (sizeof iflagnames / sizeof (char *)))) { - scm_puts (iflagnames [SCM_IFLAGNUM (exp)], port); + scm_puts_unlocked (iflagnames [SCM_IFLAGNUM (exp)], port); } - else if (SCM_ISYMP (exp)) - { - scm_i_print_isym (exp, port); - } - else if (SCM_ILOCP (exp)) - { - scm_i_print_iloc (exp, port); - } else { /* unknown immediate value */ @@ -535,11 +525,11 @@ iprin1 (SCM exp, SCM port, scm_print_state *pstate) if (SCM_OBJ_CLASS_FLAGS (exp) & SCM_CLASSF_GOOPS) { SCM pwps, print = pstate->writingp ? g_write : g_display; - if (!print) + if (SCM_UNPACK (print) == 0) goto print_struct; pwps = scm_i_port_with_print_state (port, pstate->handle); pstate->revealed = 1; - scm_call_generic_2 (print, exp, pwps); + scm_call_2 (print, exp, pwps); } else { @@ -558,30 +548,6 @@ iprin1 (SCM exp, SCM port, scm_print_state *pstate) circref: print_circref (port, pstate, exp); break; - case scm_tcs_closures: - if (scm_is_false (scm_procedure_p (SCM_PRINT_CLOSURE)) - || scm_is_false (scm_printer_apply (SCM_PRINT_CLOSURE, - exp, port, pstate))) - { - SCM formals = SCM_CLOSURE_FORMALS (exp); - scm_puts ("#', port); - } - break; case scm_tc7_number: switch SCM_TYP16 (exp) { case scm_tc16_big: @@ -601,113 +567,31 @@ iprin1 (SCM exp, SCM port, scm_print_state *pstate) case scm_tc7_string: if (SCM_WRITINGP (pstate)) { - size_t i, j, len; - static char const hex[] = "0123456789abcdef"; - char buf[8]; - + size_t len, i; - scm_putc ('"', port); + display_character ('"', port, iconveh_question_mark); len = scm_i_string_length (exp); for (i = 0; i < len; ++i) - { - scm_t_wchar ch = scm_i_string_ref (exp, i); - int printed = 0; - - if (ch == ' ' || ch == '\n') - { - scm_putc (ch, port); - printed = 1; - } - else if (ch == '"' || ch == '\\') - { - scm_putc ('\\', port); - scm_i_charprint (ch, port); - printed = 1; - } - else - if (uc_is_general_category_withtable - (ch, - UC_CATEGORY_MASK_L | UC_CATEGORY_MASK_M | - UC_CATEGORY_MASK_N | UC_CATEGORY_MASK_P | - UC_CATEGORY_MASK_S)) - { - /* Print the character since it is a graphic - character. */ - scm_t_wchar *wbuf; - SCM wstr = scm_i_make_wide_string (1, &wbuf); - char *buf; - size_t len; - - if (scm_i_get_port_encoding (port)) - { - wstr = scm_i_make_wide_string (1, &wbuf); - wbuf[0] = ch; - buf = u32_conv_to_encoding (scm_i_get_port_encoding (port), - iconveh_error, - (scm_t_uint32 *) wbuf, - 1 , - NULL, - NULL, &len); - if (buf != NULL) - { - /* Character is graphic and representable in - this encoding. Print it. */ - scm_lfwrite_str (wstr, port); - free (buf); - printed = 1; - } - } - else - if (ch <= 0xFF) - { - scm_putc (ch, port); - printed = 1; - } - } - - if (!printed) - { - /* Character is graphic but unrepresentable in - this port's encoding or is not graphic. */ - if (ch <= 0xFF) - { - buf[0] = '\\'; - buf[1] = 'x'; - buf[2] = hex[ch / 16]; - buf[3] = hex[ch % 16]; - scm_lfwrite (buf, 4, port); - } - else if (ch <= 0xFFFF) - { - buf[0] = '\\'; - buf[1] = 'u'; - buf[2] = hex[(ch & 0xF000) >> 12]; - buf[3] = hex[(ch & 0xF00) >> 8]; - buf[4] = hex[(ch & 0xF0) >> 4]; - buf[5] = hex[(ch & 0xF)]; - scm_lfwrite (buf, 6, port); - j = i + 1; - } - else if (ch > 0xFFFF) - { - buf[0] = '\\'; - buf[1] = 'U'; - buf[2] = hex[(ch & 0xF00000) >> 20]; - buf[3] = hex[(ch & 0xF0000) >> 16]; - buf[4] = hex[(ch & 0xF000) >> 12]; - buf[5] = hex[(ch & 0xF00) >> 8]; - buf[6] = hex[(ch & 0xF0) >> 4]; - buf[7] = hex[(ch & 0xF)]; - scm_lfwrite (buf, 8, port); - j = i + 1; - } - } - } - scm_putc ('"', port); + write_character (scm_i_string_ref (exp, i), port, 1); + + display_character ('"', port, iconveh_question_mark); scm_remember_upto_here_1 (exp); } else - scm_lfwrite_str (exp, port); + { + size_t len, printed; + + len = scm_i_string_length (exp); + printed = display_string (scm_i_string_data (exp), + scm_i_is_narrow_string (exp), + len, port, + scm_i_get_conversion_strategy (port)); + if (SCM_UNLIKELY (printed < len)) + scm_encoding_error (__func__, errno, + "cannot convert to output locale", + port, scm_c_string_ref (exp, printed)); + } + scm_remember_upto_here_1 (exp); break; case scm_tc7_symbol: @@ -718,11 +602,11 @@ iprin1 (SCM exp, SCM port, scm_print_state *pstate) } else { - scm_puts ("#', port); + scm_putc_unlocked ('>', port); } break; case scm_tc7_variable: @@ -731,17 +615,59 @@ iprin1 (SCM exp, SCM port, scm_print_state *pstate) case scm_tc7_program: scm_i_program_print (exp, port, pstate); break; + case scm_tc7_pointer: + scm_i_pointer_print (exp, port, pstate); + break; + case scm_tc7_hashtable: + scm_i_hashtable_print (exp, port, pstate); + break; + case scm_tc7_weak_set: + scm_i_weak_set_print (exp, port, pstate); + break; + case scm_tc7_weak_table: + scm_i_weak_table_print (exp, port, pstate); + break; + case scm_tc7_fluid: + scm_i_fluid_print (exp, port, pstate); + break; + case scm_tc7_dynamic_state: + scm_i_dynamic_state_print (exp, port, pstate); + break; + case scm_tc7_frame: + scm_i_frame_print (exp, port, pstate); + break; + case scm_tc7_objcode: + scm_i_objcode_print (exp, port, pstate); + break; + case scm_tc7_vm: + scm_i_vm_print (exp, port, pstate); + break; + case scm_tc7_vm_cont: + scm_i_vm_cont_print (exp, port, pstate); + break; + case scm_tc7_prompt: + scm_i_prompt_print (exp, port, pstate); + break; + case scm_tc7_with_fluids: + scm_i_with_fluids_print (exp, port, pstate); + break; + case scm_tc7_array: + ENTER_NESTED_DATA (pstate, exp, circref); + scm_i_print_array (exp, port, pstate); + break; + case scm_tc7_bytevector: + scm_i_print_bytevector (exp, port, pstate); + break; + case scm_tc7_bitvector: + scm_i_print_bitvector (exp, port, pstate); + break; case scm_tc7_wvect: ENTER_NESTED_DATA (pstate, exp, circref); - if (SCM_IS_WHVEC (exp)) - scm_puts ("#wh(", port); - else - scm_puts ("#w(", port); + scm_puts_unlocked ("#w(", port); goto common_vector_printer; - case scm_tc7_vector: ENTER_NESTED_DATA (pstate, exp, circref); - scm_puts ("#(", port); + scm_puts_unlocked ("#(", port); common_vector_printer: { register long i; @@ -753,66 +679,26 @@ iprin1 (SCM exp, SCM port, scm_print_state *pstate) last = pstate->length - 1; cutp = 1; } - if (SCM_I_WVECTP (exp)) - { - /* Elements of weak vectors may not be accessed via the - `SIMPLE_VECTOR_REF ()' macro. */ - for (i = 0; i < last; ++i) - { - scm_iprin1 (scm_c_vector_ref (exp, i), - port, pstate); - scm_putc (' ', port); - } - } - else - { - for (i = 0; i < last; ++i) - { - scm_iprin1 (SCM_SIMPLE_VECTOR_REF (exp, i), port, pstate); - scm_putc (' ', port); - } - } - + for (i = 0; i < last; ++i) + { + scm_iprin1 (scm_c_vector_ref (exp, i), port, pstate); + scm_putc_unlocked (' ', port); + } if (i == last) { /* CHECK_INTS; */ scm_iprin1 (scm_c_vector_ref (exp, i), port, pstate); } if (cutp) - scm_puts (" ...", port); - scm_putc (')', port); + scm_puts_unlocked (" ...", port); + scm_putc_unlocked (')', port); } EXIT_NESTED_DATA (pstate); break; - case scm_tcs_subrs: - { - SCM name = scm_symbol_to_string (SCM_SUBR_NAME (exp)); - scm_puts (SCM_SUBR_GENERIC (exp) - ? "#', port); - break; - } - case scm_tc7_pws: - scm_puts ("#', port); - break; case scm_tc7_port: { - register long i = SCM_PTOBNUM (exp); - if (i < scm_numptob - && scm_ptobs[i].print - && (scm_ptobs[i].print) (exp, port, pstate)) + scm_t_ptob_descriptor *ptob = SCM_PORT_DESCRIPTOR (exp); + if (ptob->print && ptob->print (exp, port, pstate)) break; goto punk; } @@ -822,6 +708,7 @@ iprin1 (SCM exp, SCM port, scm_print_state *pstate) EXIT_NESTED_DATA (pstate); break; default: + /* case scm_tcs_closures: */ punk: scm_ipruk ("type", exp, port); } @@ -886,16 +773,379 @@ scm_prin1 (SCM exp, SCM port, int writingp) } } -/* Print a character. - */ -void -scm_i_charprint (scm_t_wchar ch, SCM port) +/* Convert codepoint CH to UTF-8 and store the result in UTF8. Return + the number of bytes of the UTF-8-encoded string. */ +static size_t +codepoint_to_utf8 (scm_t_wchar ch, scm_t_uint8 utf8[4]) +{ + size_t len; + scm_t_uint32 codepoint; + + codepoint = (scm_t_uint32) ch; + + if (codepoint <= 0x7f) + { + len = 1; + utf8[0] = (scm_t_uint8) codepoint; + } + else if (codepoint <= 0x7ffUL) + { + len = 2; + utf8[0] = 0xc0 | (codepoint >> 6); + utf8[1] = 0x80 | (codepoint & 0x3f); + } + else if (codepoint <= 0xffffUL) + { + len = 3; + utf8[0] = 0xe0 | (codepoint >> 12); + utf8[1] = 0x80 | ((codepoint >> 6) & 0x3f); + utf8[2] = 0x80 | (codepoint & 0x3f); + } + else + { + len = 4; + utf8[0] = 0xf0 | (codepoint >> 18); + utf8[1] = 0x80 | ((codepoint >> 12) & 0x3f); + utf8[2] = 0x80 | ((codepoint >> 6) & 0x3f); + utf8[3] = 0x80 | (codepoint & 0x3f); + } + + return len; +} + +#define STR_REF(s, x) \ + (narrow_p \ + ? (scm_t_wchar) ((unsigned char *) (s))[x] \ + : ((scm_t_wchar *) (s))[x]) + +/* Write STR to PORT as UTF-8. STR is a LEN-codepoint string; it is + narrow if NARROW_P is true, wide otherwise. Return LEN. */ +static size_t +display_string_as_utf8 (const void *str, int narrow_p, size_t len, + SCM port) +{ + size_t printed = 0; + + while (len > printed) + { + size_t utf8_len, i; + char *input, utf8_buf[256]; + + /* Convert STR to UTF-8. */ + for (i = printed, utf8_len = 0, input = utf8_buf; + i < len && utf8_len + 4 < sizeof (utf8_buf); + i++) + { + utf8_len += codepoint_to_utf8 (STR_REF (str, i), + (scm_t_uint8 *) input); + input = utf8_buf + utf8_len; + } + + /* INPUT was successfully converted, entirely; print the + result. */ + scm_lfwrite_unlocked (utf8_buf, utf8_len, port); + printed += i - printed; + } + + assert (printed == len); + + return len; +} + +/* Convert STR through PORT's output conversion descriptor and write the + output to PORT. Return the number of codepoints written. */ +static size_t +display_string_using_iconv (const void *str, int narrow_p, size_t len, + SCM port, + scm_t_string_failed_conversion_handler strategy) +{ + size_t printed; + scm_t_port *pt; + + pt = SCM_PTAB_ENTRY (port); + + printed = 0; + + while (len > printed) + { + size_t done, utf8_len, input_left, output_left, i; + size_t codepoints_read, output_len; + char *input, *output; + char utf8_buf[256], encoded_output[256]; + size_t offsets[256]; + + /* Convert STR to UTF-8. */ + for (i = printed, utf8_len = 0, input = utf8_buf; + i < len && utf8_len + 4 < sizeof (utf8_buf); + i++) + { + offsets[utf8_len] = i; + utf8_len += codepoint_to_utf8 (STR_REF (str, i), + (scm_t_uint8 *) input); + input = utf8_buf + utf8_len; + } + + input = utf8_buf; + input_left = utf8_len; + + output = encoded_output; + output_left = sizeof (encoded_output); + + done = iconv (pt->output_cd, &input, &input_left, + &output, &output_left); + + output_len = sizeof (encoded_output) - output_left; + + if (SCM_UNLIKELY (done == (size_t) -1)) + { + int errno_save = errno; + + /* Reset the `iconv' state. */ + iconv (pt->output_cd, NULL, NULL, NULL, NULL); + + /* Print the OUTPUT_LEN bytes successfully converted. */ + scm_lfwrite_unlocked (encoded_output, output_len, port); + + /* See how many input codepoints these OUTPUT_LEN bytes + corresponds to. */ + codepoints_read = offsets[input - utf8_buf] - printed; + printed += codepoints_read; + + if (errno_save == EILSEQ && + strategy != SCM_FAILED_CONVERSION_ERROR) + { + /* Conversion failed somewhere in INPUT and we want to + escape or substitute the offending input character. */ + + if (strategy == SCM_FAILED_CONVERSION_ESCAPE_SEQUENCE) + { + scm_t_wchar ch; + + /* Find CH, the offending codepoint, and escape it. */ + ch = STR_REF (str, offsets[input - utf8_buf]); + write_character_escaped (ch, 1, port); + } + else + /* STRATEGY is `SCM_FAILED_CONVERSION_QUESTION_MARK'. */ + display_string ("?", 1, 1, port, strategy); + + printed++; + } + else + /* Something bad happened that we can't handle: bail out. */ + break; + } + else + { + /* INPUT was successfully converted, entirely; print the + result. */ + scm_lfwrite_unlocked (encoded_output, output_len, port); + codepoints_read = i - printed; + printed += codepoints_read; + } + } + + return printed; +} + +#undef STR_REF + +/* Display the LEN codepoints in STR to PORT according to STRATEGY; + return the number of codepoints successfully displayed. If NARROW_P, + then STR is interpreted as a sequence of `char', denoting a Latin-1 + string; otherwise it's interpreted as a sequence of + `scm_t_wchar'. */ +static size_t +display_string (const void *str, int narrow_p, + size_t len, SCM port, + scm_t_string_failed_conversion_handler strategy) + { - scm_t_wchar *wbuf; - SCM wstr = scm_i_make_wide_string (1, &wbuf); + scm_t_port *pt; + + pt = SCM_PTAB_ENTRY (port); - wbuf[0] = ch; - scm_lfwrite_str (wstr, port); + if (pt->output_cd == (iconv_t) -1) + /* Initialize the conversion descriptors, if needed. */ + scm_i_set_port_encoding_x (port, pt->encoding); + + /* FIXME: In 2.1, add a flag to determine whether a port is UTF-8. */ + if (pt->output_cd == (iconv_t) -1) + return display_string_as_utf8 (str, narrow_p, len, port); + else + return display_string_using_iconv (str, narrow_p, len, + port, strategy); +} + +/* Attempt to display CH to PORT according to STRATEGY. Return non-zero + if CH was successfully displayed, zero otherwise (e.g., if it was not + representable in PORT's encoding.) */ +static int +display_character (scm_t_wchar ch, SCM port, + scm_t_string_failed_conversion_handler strategy) +{ + return display_string (&ch, 0, 1, port, strategy) == 1; +} + +/* Attempt to pretty-print CH, a combining character, to PORT. Return + zero upon failure, non-zero otherwise. The idea is to print CH above + a dotted circle to make it more visible. */ +static int +write_combining_character (scm_t_wchar ch, SCM port) +{ + scm_t_wchar str[2]; + + str[0] = SCM_CODEPOINT_DOTTED_CIRCLE; + str[1] = ch; + + return display_string (str, 0, 2, port, iconveh_error) == 2; +} + +/* Write CH to PORT in its escaped form, using the string escape syntax + if STRING_ESCAPES_P is non-zero. */ +static void +write_character_escaped (scm_t_wchar ch, int string_escapes_p, SCM port) +{ + if (string_escapes_p) + { + /* Represent CH using the in-string escape syntax. */ + + static const char hex[] = "0123456789abcdef"; + static const char escapes[7] = "abtnvfr"; + char buf[9]; + + if (ch >= 0x07 && ch <= 0x0D && ch != 0x0A) + { + /* Use special escapes for some C0 controls. */ + buf[0] = '\\'; + buf[1] = escapes[ch - 0x07]; + scm_lfwrite_unlocked (buf, 2, port); + } + else if (!SCM_R6RS_ESCAPES_P) + { + if (ch <= 0xFF) + { + buf[0] = '\\'; + buf[1] = 'x'; + buf[2] = hex[ch / 16]; + buf[3] = hex[ch % 16]; + scm_lfwrite_unlocked (buf, 4, port); + } + else if (ch <= 0xFFFF) + { + buf[0] = '\\'; + buf[1] = 'u'; + buf[2] = hex[(ch & 0xF000) >> 12]; + buf[3] = hex[(ch & 0xF00) >> 8]; + buf[4] = hex[(ch & 0xF0) >> 4]; + buf[5] = hex[(ch & 0xF)]; + scm_lfwrite_unlocked (buf, 6, port); + } + else if (ch > 0xFFFF) + { + buf[0] = '\\'; + buf[1] = 'U'; + buf[2] = hex[(ch & 0xF00000) >> 20]; + buf[3] = hex[(ch & 0xF0000) >> 16]; + buf[4] = hex[(ch & 0xF000) >> 12]; + buf[5] = hex[(ch & 0xF00) >> 8]; + buf[6] = hex[(ch & 0xF0) >> 4]; + buf[7] = hex[(ch & 0xF)]; + scm_lfwrite_unlocked (buf, 8, port); + } + } + else + { + /* Print an R6RS variable-length hex escape: "\xNNNN;". */ + scm_t_wchar ch2 = ch; + + int i = 8; + buf[i] = ';'; + i --; + if (ch == 0) + buf[i--] = '0'; + else + while (ch2 > 0) + { + buf[i] = hex[ch2 & 0xF]; + ch2 >>= 4; + i --; + } + buf[i] = 'x'; + i --; + buf[i] = '\\'; + scm_lfwrite_unlocked (buf + i, 9 - i, port); + } + } + else + { + /* Represent CH using the character escape syntax. */ + const char *name; + + name = scm_i_charname (SCM_MAKE_CHAR (ch)); + if (name != NULL) + scm_puts_unlocked (name, port); + else + PRINT_CHAR_ESCAPE (ch, port); + } +} + +/* Write CH to PORT, escaping it if it's non-graphic or not + representable in PORT's encoding. If STRING_ESCAPES_P is true and CH + needs to be escaped, it is escaped using the in-string escape syntax; + otherwise the character escape syntax is used. */ +static void +write_character (scm_t_wchar ch, SCM port, int string_escapes_p) +{ + int printed = 0; + scm_t_string_failed_conversion_handler strategy; + + strategy = scm_i_get_conversion_strategy (port); + + if (string_escapes_p) + { + /* Check if CH deserves special treatment. */ + if (ch == '"' || ch == '\\') + { + display_character ('\\', port, iconveh_question_mark); + display_character (ch, port, strategy); + printed = 1; + } + else if (ch == '\n' && SCM_PRINT_ESCAPE_NEWLINES_P) + { + display_character ('\\', port, iconveh_question_mark); + display_character ('n', port, strategy); + printed = 1; + } + else if (ch == ' ' || ch == '\n') + { + display_character (ch, port, strategy); + printed = 1; + } + } + else + { + display_string ("#\\", 1, 2, port, iconveh_question_mark); + + if (uc_combining_class (ch) != UC_CCC_NR) + /* Character is a combining character, so attempt to + pretty-print it. */ + printed = write_combining_character (ch, port); + } + + if (!printed + && uc_is_general_category_withtable (ch, + UC_CATEGORY_MASK_L | + UC_CATEGORY_MASK_M | + UC_CATEGORY_MASK_N | + UC_CATEGORY_MASK_P | + UC_CATEGORY_MASK_S)) + /* CH is graphic; attempt to display it. */ + printed = display_character (ch, port, iconveh_error); + + if (!printed) + /* CH isn't graphic or cannot be represented in PORT's encoding. */ + write_character_escaped (ch, string_escapes_p, port); } /* Print an integer. @@ -905,14 +1155,14 @@ void scm_intprint (scm_t_intmax n, int radix, SCM port) { char num_buf[SCM_INTBUFLEN]; - scm_lfwrite (num_buf, scm_iint2str (n, radix, num_buf), port); + scm_lfwrite_unlocked (num_buf, scm_iint2str (n, radix, num_buf), port); } void scm_uintprint (scm_t_uintmax n, int radix, SCM port) { char num_buf[SCM_INTBUFLEN]; - scm_lfwrite (num_buf, scm_iuint2str (n, radix, num_buf), port); + scm_lfwrite_unlocked (num_buf, scm_iuint2str (n, radix, num_buf), port); } /* Print an object of unrecognized type. @@ -921,19 +1171,19 @@ scm_uintprint (scm_t_uintmax n, int radix, SCM port) void scm_ipruk (char *hdr, SCM ptr, SCM port) { - scm_puts ("#', port); + scm_putc_unlocked ('>', port); } @@ -944,7 +1194,7 @@ scm_iprlist (char *hdr, SCM exp, int tlr, SCM port, scm_print_state *pstate) { register SCM hare, tortoise; long floor = pstate->top - 2; - scm_puts (hdr, port); + scm_puts_unlocked (hdr, port); /* CHECK_INTS; */ if (pstate->fancyp) goto fancy_printing; @@ -974,18 +1224,18 @@ scm_iprlist (char *hdr, SCM exp, int tlr, SCM port, scm_print_state *pstate) if (scm_is_eq (PSTATE_STACK_REF(pstate, i), exp)) goto circref; PUSH_REF (pstate, exp); - scm_putc (' ', port); + scm_putc_unlocked (' ', port); /* CHECK_INTS; */ scm_iprin1 (SCM_CAR (exp), port, pstate); } if (!SCM_NULL_OR_NIL_P (exp)) { - scm_puts (" . ", port); + scm_puts_unlocked (" . ", port); scm_iprin1 (exp, port, pstate); } end: - scm_putc (tlr, port); + scm_putc_unlocked (tlr, port); pstate->top = floor + 2; return; @@ -1006,7 +1256,7 @@ fancy_printing: { if (n == 0) { - scm_puts (" ...", port); + scm_puts_unlocked (" ...", port); goto skip_tail; } else @@ -1014,14 +1264,14 @@ fancy_printing: } PUSH_REF(pstate, exp); ++pstate->list_offset; - scm_putc (' ', port); + scm_putc_unlocked (' ', port); /* CHECK_INTS; */ scm_iprin1 (SCM_CAR (exp), port, pstate); } } if (!SCM_NULL_OR_NIL_P (exp)) { - scm_puts (" . ", port); + scm_puts_unlocked (" . ", port); scm_iprin1 (exp, port, pstate); } skip_tail: @@ -1032,7 +1282,7 @@ fancy_circref: pstate->list_offset -= pstate->top - floor - 2; circref: - scm_puts (" . ", port); + scm_puts_unlocked (" . ", port); print_circref (port, pstate, exp); goto end; } @@ -1057,15 +1307,11 @@ scm_write (SCM obj, SCM port) SCM_ASSERT (scm_valid_oport_value_p (port), port, SCM_ARG2, s_write); + scm_dynwind_begin (0); + scm_dynwind_lock_port (SCM_COERCE_OUTPORT (port)); scm_prin1 (obj, port, 1); -#if 0 -#ifdef HAVE_PIPE -# ifdef EPIPE - if (EPIPE == errno) - scm_close_port (port); -# endif -#endif -#endif + scm_dynwind_end (); + return SCM_UNSPECIFIED; } @@ -1080,15 +1326,11 @@ scm_display (SCM obj, SCM port) SCM_ASSERT (scm_valid_oport_value_p (port), port, SCM_ARG2, s_display); + scm_dynwind_begin (0); + scm_dynwind_lock_port (SCM_COERCE_OUTPORT (port)); scm_prin1 (obj, port, 0); -#if 0 -#ifdef HAVE_PIPE -# ifdef EPIPE - if (EPIPE == errno) - scm_close_port (port); -# endif -#endif -#endif + scm_dynwind_end (); + return SCM_UNSPECIFIED; } @@ -1120,8 +1362,7 @@ SCM_DEFINE (scm_simple_format, "simple-format", 2, 0, 1, else if (scm_is_false (destination)) { fReturnString = 1; - port = scm_mkstrport (SCM_INUM0, - scm_make_string (SCM_INUM0, SCM_UNDEFINED), + port = scm_mkstrport (SCM_INUM0, SCM_BOOL_F, SCM_OPN | SCM_WRTNG, FUNC_NAME); destination = port; @@ -1202,7 +1443,7 @@ SCM_DEFINE (scm_newline, "newline", 0, 1, 0, SCM_VALIDATE_OPORT_VALUE (1, port); - scm_putc ('\n', SCM_COERCE_OUTPORT (port)); + scm_putc_unlocked ('\n', SCM_COERCE_OUTPORT (port)); return SCM_UNSPECIFIED; } #undef FUNC_NAME @@ -1218,15 +1459,13 @@ SCM_DEFINE (scm_write_char, "write-char", 1, 1, 0, SCM_VALIDATE_CHAR (1, chr); SCM_VALIDATE_OPORT_VALUE (2, port); - scm_putc ((int) SCM_CHAR (chr), SCM_COERCE_OUTPORT (port)); -#if 0 -#ifdef HAVE_PIPE -# ifdef EPIPE - if (EPIPE == errno) - scm_close_port (port); -# endif -#endif -#endif + port = SCM_COERCE_OUTPORT (port); + if (!display_character (SCM_CHAR (chr), port, + scm_i_get_conversion_strategy (port))) + scm_encoding_error (__func__, errno, + "cannot convert to output locale", + port, chr); + return SCM_UNSPECIFIED; } #undef FUNC_NAME @@ -1249,7 +1488,7 @@ static int port_with_ps_print (SCM obj, SCM port, scm_print_state *pstate) { obj = SCM_PORT_WITH_PS_PORT (obj); - return scm_ptobs[SCM_PTOBNUM (obj)].print (obj, port, pstate); + return SCM_PORT_DESCRIPTOR (obj)->print (obj, port, pstate); } SCM @@ -1294,22 +1533,11 @@ SCM_DEFINE (scm_get_print_state, "get-print-state", 1, 0, 0, void scm_init_print () { - SCM vtable, layout, type; - - scm_init_opts (scm_print_options, scm_print_opts); + SCM type; - scm_print_options (scm_list_4 (scm_from_locale_symbol ("highlight-prefix"), - scm_from_locale_string ("{"), - scm_from_locale_symbol ("highlight-suffix"), - scm_from_locale_string ("}"))); - - scm_gc_register_root (&print_state_pool); - scm_gc_register_root (&scm_print_state_vtable); - vtable = scm_make_vtable_vtable (scm_nullstr, SCM_INUM0, SCM_EOL); - layout = - scm_make_struct_layout (scm_from_locale_string (SCM_PRINT_STATE_LAYOUT)); - type = scm_make_struct (vtable, SCM_INUM0, scm_list_1 (layout)); - scm_set_struct_vtable_name_x (type, scm_from_locale_symbol ("print-state")); + type = scm_make_vtable (scm_from_locale_string (SCM_PRINT_STATE_LAYOUT), + SCM_BOOL_F); + scm_set_struct_vtable_name_x (type, scm_from_latin1_symbol ("print-state")); scm_print_state_vtable = type; /* Don't want to bind a wrapper class in GOOPS, so pass 0 as arg1. */ @@ -1318,6 +1546,11 @@ scm_init_print () #include "libguile/print.x" + scm_init_opts (scm_print_options, scm_print_opts); + scm_print_opts[SCM_PRINT_HIGHLIGHT_PREFIX_I].val = + SCM_UNPACK (scm_from_locale_string ("{")); + scm_print_opts[SCM_PRINT_HIGHLIGHT_SUFFIX_I].val = + SCM_UNPACK (scm_from_locale_string ("}")); scm_print_opts[SCM_PRINT_KEYWORD_STYLE_I].val = SCM_UNPACK (sym_reader); }