X-Git-Url: https://git.hcoop.net/bpt/guile.git/blobdiff_plain/bfe35b90ff0c7f78335e70bdb26ea3466f6e98d9..f6f4feb0a2222efcb297e634603621126542e63f:/libguile/print.c diff --git a/libguile/print.c b/libguile/print.c index d8dd24c06..652409134 100644 --- a/libguile/print.c +++ b/libguile/print.c @@ -1,5 +1,5 @@ /* Copyright (C) 1995-1999, 2000, 2001, 2002, 2003, 2004, 2006, 2008, - * 2009, 2010, 2011 Free Software Foundation, Inc. + * 2009, 2010, 2011, 2012, 2013 Free Software Foundation, Inc. * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public License @@ -44,6 +44,7 @@ #include "libguile/alist.h" #include "libguile/struct.h" #include "libguile/ports.h" +#include "libguile/ports-internal.h" #include "libguile/root.h" #include "libguile/strings.h" #include "libguile/strports.h" @@ -60,6 +61,9 @@ /* Character printers. */ +#define PORT_CONVERSION_HANDLER(port) \ + SCM_PTAB_ENTRY (port)->ilseq_handler + static size_t display_string (const void *, int, size_t, SCM, scm_t_string_failed_conversion_handler); @@ -332,6 +336,7 @@ quote_keywordish_symbols (void) (INITIAL_IDENTIFIER_MASK \ | UC_CATEGORY_MASK_Nd | UC_CATEGORY_MASK_Mc | UC_CATEGORY_MASK_Me) +/* FIXME: Cache this information on the symbol, somehow. */ static int symbol_has_extended_read_syntax (SCM sym) { @@ -344,26 +349,56 @@ symbol_has_extended_read_syntax (SCM sym) c = scm_i_symbol_ref (sym, 0); - /* Single dot; conflicts with dotted-pair notation. */ - if (len == 1 && c == '.') - return 1; - - /* Other initial-character constraints. */ - if (c == '\'' || c == '`' || c == ',' || c == '"' || c == ';' || c == '#') - return 1; + switch (c) + { + case '\'': + case '`': + case ',': + case '"': + case ';': + case '#': + /* Some initial-character constraints. */ + return 1; - /* Keywords can be identified by trailing colons too. */ - if (c == ':' || scm_i_symbol_ref (sym, len - 1) == ':') - return quote_keywordish_symbols (); + case ':': + /* Symbols that look like keywords. */ + return quote_keywordish_symbols (); - /* Number-ish symbols. */ - if (scm_is_true (scm_i_string_to_number (scm_symbol_to_string (sym), 10))) - return 1; + case '.': + /* Single dot conflicts with dotted-pair notation. */ + if (len == 1) + return 1; + /* Fall through to check numbers. */ + case '+': + case '-': + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + /* Number-ish symbols. Numbers with radixes already caught be # + above. */ + if (scm_is_true (scm_i_string_to_number (scm_symbol_to_string (sym), 10))) + return 1; + break; + + default: + break; + } /* Other disallowed first characters. */ if (!uc_is_general_category_withtable (c, INITIAL_IDENTIFIER_MASK)) return 1; + /* Keywords can be identified by trailing colons too. */ + if (scm_i_symbol_ref (sym, len - 1) == ':') + return quote_keywordish_symbols (); + /* Otherwise, any character that's in the identifier category mask is fine to pass through as-is, provided it's not one of the ASCII delimiters like `;'. */ @@ -382,7 +417,16 @@ symbol_has_extended_read_syntax (SCM sym) static void print_normal_symbol (SCM sym, SCM port) { - scm_display (scm_symbol_to_string (sym), port); + size_t len; + scm_t_string_failed_conversion_handler strategy; + + len = scm_i_symbol_length (sym); + strategy = SCM_PTAB_ENTRY (port)->ilseq_handler; + + if (scm_i_is_narrow_symbol (sym)) + display_string (scm_i_symbol_chars (sym), 1, len, port, strategy); + else + display_string (scm_i_symbol_wide_chars (sym), 0, len, port, strategy); } static void @@ -392,7 +436,7 @@ print_extended_symbol (SCM sym, SCM port) scm_t_string_failed_conversion_handler strategy; len = scm_i_symbol_length (sym); - strategy = scm_i_get_conversion_strategy (port); + strategy = PORT_CONVERSION_HANDLER (port); scm_lfwrite_unlocked ("#{", 2, port); @@ -421,8 +465,8 @@ print_extended_symbol (SCM sym, SCM port) } /* FIXME: allow R6RS hex escapes instead of #{...}#. */ -void -scm_i_print_symbol_name (SCM sym, SCM port) +static void +print_symbol (SCM sym, SCM port) { if (symbol_has_extended_read_syntax (sym)) print_extended_symbol (sym, port); @@ -434,7 +478,7 @@ void scm_print_symbol_name (const char *str, size_t len, SCM port) { SCM symbol = scm_from_utf8_symboln (str, len); - scm_i_print_symbol_name (symbol, port); + print_symbol (symbol, port); } /* Print generally. Handles both write and display according to PSTATE. @@ -499,7 +543,7 @@ iprin1 (SCM exp, SCM port, scm_print_state *pstate) else { if (!display_character (SCM_CHAR (exp), port, - scm_i_get_conversion_strategy (port))) + PORT_CONVERSION_HANDLER (port))) scm_encoding_error (__func__, errno, "cannot convert to output locale", port, exp); @@ -585,7 +629,7 @@ iprin1 (SCM exp, SCM port, scm_print_state *pstate) printed = display_string (scm_i_string_data (exp), scm_i_is_narrow_string (exp), len, port, - scm_i_get_conversion_strategy (port)); + PORT_CONVERSION_HANDLER (port)); if (SCM_UNLIKELY (printed < len)) scm_encoding_error (__func__, errno, "cannot convert to output locale", @@ -597,13 +641,13 @@ iprin1 (SCM exp, SCM port, scm_print_state *pstate) case scm_tc7_symbol: if (scm_i_symbol_is_interned (exp)) { - scm_i_print_symbol_name (exp, port); + print_symbol (exp, port); scm_remember_upto_here_1 (exp); } else { scm_puts_unlocked ("#', port); @@ -645,20 +689,21 @@ iprin1 (SCM exp, SCM port, scm_print_state *pstate) case scm_tc7_vm_cont: scm_i_vm_cont_print (exp, port, pstate); break; - case scm_tc7_prompt: - scm_i_prompt_print (exp, port, pstate); + case scm_tc7_array: + ENTER_NESTED_DATA (pstate, exp, circref); + scm_i_print_array (exp, port, pstate); + EXIT_NESTED_DATA (pstate); + break; + case scm_tc7_bytevector: + scm_i_print_bytevector (exp, port, pstate); break; - case scm_tc7_with_fluids: - scm_i_with_fluids_print (exp, port, pstate); + case scm_tc7_bitvector: + scm_i_print_bitvector (exp, port, pstate); break; case scm_tc7_wvect: ENTER_NESTED_DATA (pstate, exp, circref); scm_puts_unlocked ("#w(", port); goto common_vector_printer; - - case scm_tc7_bytevector: - scm_i_print_bytevector (exp, port, pstate); - break; case scm_tc7_vector: ENTER_NESTED_DATA (pstate, exp, circref); scm_puts_unlocked ("#(", port); @@ -846,6 +891,54 @@ display_string_as_utf8 (const void *str, int narrow_p, size_t len, return len; } +/* Write STR to PORT as ISO-8859-1. STR is a LEN-codepoint string; it + is narrow if NARROW_P is true, wide otherwise. Return LEN. */ +static size_t +display_string_as_latin1 (const void *str, int narrow_p, size_t len, + SCM port, + scm_t_string_failed_conversion_handler strategy) +{ + size_t printed = 0; + + if (narrow_p) + { + scm_lfwrite_unlocked (str, len, port); + return len; + } + + while (printed < len) + { + char buf[256]; + size_t i; + + for (i = 0; i < sizeof(buf) && printed < len; i++, printed++) + { + scm_t_wchar c = STR_REF (str, printed); + + if (c < 256) + buf[i] = c; + else + break; + } + + scm_lfwrite_unlocked (buf, i, port); + + if (i < sizeof(buf) && printed < len) + { + if (strategy == SCM_FAILED_CONVERSION_ERROR) + break; + else if (strategy == SCM_FAILED_CONVERSION_ESCAPE_SEQUENCE) + write_character_escaped (STR_REF (str, printed), 1, port); + else + /* STRATEGY is `SCM_FAILED_CONVERSION_QUESTION_MARK'. */ + display_string ("?", 1, 1, port, strategy); + printed++; + } + } + + return printed; +} + /* Convert STR through PORT's output conversion descriptor and write the output to PORT. Return the number of codepoints written. */ static size_t @@ -854,9 +947,25 @@ display_string_using_iconv (const void *str, int narrow_p, size_t len, scm_t_string_failed_conversion_handler strategy) { size_t printed; - scm_t_port *pt; + scm_t_iconv_descriptors *id; + scm_t_port_internal *pti = SCM_PORT_GET_INTERNAL (port); + + id = scm_i_port_iconv_descriptors (port, SCM_PORT_WRITE); - pt = SCM_PTAB_ENTRY (port); + if (SCM_UNLIKELY (pti->at_stream_start_for_bom_write && len > 0)) + { + scm_t_port *pt = SCM_PTAB_ENTRY (port); + + /* Record that we're no longer at stream start. */ + pti->at_stream_start_for_bom_write = 0; + if (pt->rw_random) + pti->at_stream_start_for_bom_read = 0; + + /* Write a BOM if appropriate. */ + if (SCM_UNLIKELY (strcmp(pt->encoding, "UTF-16") == 0 + || strcmp(pt->encoding, "UTF-32") == 0)) + display_character (SCM_UNICODE_BOM, port, iconveh_error); + } printed = 0; @@ -885,7 +994,7 @@ display_string_using_iconv (const void *str, int narrow_p, size_t len, output = encoded_output; output_left = sizeof (encoded_output); - done = iconv (pt->output_cd, &input, &input_left, + done = iconv (id->output_cd, &input, &input_left, &output, &output_left); output_len = sizeof (encoded_output) - output_left; @@ -895,7 +1004,7 @@ display_string_using_iconv (const void *str, int narrow_p, size_t len, int errno_save = errno; /* Reset the `iconv' state. */ - iconv (pt->output_cd, NULL, NULL, NULL, NULL); + iconv (id->output_cd, NULL, NULL, NULL, NULL); /* Print the OUTPUT_LEN bytes successfully converted. */ scm_lfwrite_unlocked (encoded_output, output_len, port); @@ -953,22 +1062,17 @@ static size_t display_string (const void *str, int narrow_p, size_t len, SCM port, scm_t_string_failed_conversion_handler strategy) - { - scm_t_port *pt; + scm_t_port_internal *pti; - pt = SCM_PTAB_ENTRY (port); + pti = SCM_PORT_GET_INTERNAL (port); - if (pt->output_cd == (iconv_t) -1) - /* Initialize the conversion descriptors, if needed. */ - scm_i_set_port_encoding_x (port, pt->encoding); - - /* FIXME: In 2.1, add a flag to determine whether a port is UTF-8. */ - if (pt->output_cd == (iconv_t) -1) + if (pti->encoding_mode == SCM_PORT_ENCODING_MODE_UTF8) return display_string_as_utf8 (str, narrow_p, len, port); + else if (pti->encoding_mode == SCM_PORT_ENCODING_MODE_LATIN1) + return display_string_as_latin1 (str, narrow_p, len, port, strategy); else - return display_string_using_iconv (str, narrow_p, len, - port, strategy); + return display_string_using_iconv (str, narrow_p, len, port, strategy); } /* Attempt to display CH to PORT according to STRATEGY. Return non-zero @@ -1094,7 +1198,7 @@ write_character (scm_t_wchar ch, SCM port, int string_escapes_p) int printed = 0; scm_t_string_failed_conversion_handler strategy; - strategy = scm_i_get_conversion_strategy (port); + strategy = PORT_CONVERSION_HANDLER (port); if (string_escapes_p) { @@ -1142,6 +1246,29 @@ write_character (scm_t_wchar ch, SCM port, int string_escapes_p) write_character_escaped (ch, string_escapes_p, port); } +/* Display STR to PORT from START inclusive to END exclusive. */ +void +scm_i_display_substring (SCM str, size_t start, size_t end, SCM port) +{ + int narrow_p; + const char *buf; + size_t len, printed; + + buf = scm_i_string_data (str); + len = end - start; + narrow_p = scm_i_is_narrow_string (str); + buf += start * (narrow_p ? sizeof (char) : sizeof (scm_t_wchar)); + + printed = display_string (buf, narrow_p, end - start, port, + PORT_CONVERSION_HANDLER (port)); + + if (SCM_UNLIKELY (printed < len)) + scm_encoding_error (__func__, errno, + "cannot convert to output locale", + port, scm_c_string_ref (str, printed + start)); +} + + /* Print an integer. */ @@ -1336,7 +1463,7 @@ SCM_DEFINE (scm_simple_format, "simple-format", 2, 0, 1, "@var{message} can contain @code{~A} (was @code{%s}) and\n" "@code{~S} (was @code{%S}) escapes. When printed,\n" "the escapes are replaced with corresponding members of\n" - "@var{ARGS}:\n" + "@var{args}:\n" "@code{~A} formats using @code{display} and @code{~S} formats\n" "using @code{write}.\n" "If @var{destination} is @code{#t}, then use the current output\n" @@ -1455,7 +1582,7 @@ SCM_DEFINE (scm_write_char, "write-char", 1, 1, 0, port = SCM_COERCE_OUTPORT (port); if (!display_character (SCM_CHAR (chr), port, - scm_i_get_conversion_strategy (port))) + PORT_CONVERSION_HANDLER (port))) scm_encoding_error (__func__, errno, "cannot convert to output locale", port, chr);