/* Copyright (C) 1995-1999, 2000, 2001, 2002, 2003, 2004, 2006, 2008,
- * 2009, 2010, 2011, 2012 Free Software Foundation, Inc.
+ * 2009, 2010, 2011, 2012, 2013 Free Software Foundation, Inc.
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public License
#include "libguile/alist.h"
#include "libguile/struct.h"
#include "libguile/ports.h"
+#include "libguile/ports-internal.h"
#include "libguile/root.h"
#include "libguile/strings.h"
#include "libguile/strports.h"
/* Character printers. */
+#define PORT_CONVERSION_HANDLER(port) \
+ SCM_PTAB_ENTRY (port)->ilseq_handler
+
static size_t display_string (const void *, int, size_t, SCM,
scm_t_string_failed_conversion_handler);
(INITIAL_IDENTIFIER_MASK \
| UC_CATEGORY_MASK_Nd | UC_CATEGORY_MASK_Mc | UC_CATEGORY_MASK_Me)
+/* FIXME: Cache this information on the symbol, somehow. */
static int
symbol_has_extended_read_syntax (SCM sym)
{
c = scm_i_symbol_ref (sym, 0);
- /* Single dot; conflicts with dotted-pair notation. */
- if (len == 1 && c == '.')
- return 1;
-
- /* Other initial-character constraints. */
- if (c == '\'' || c == '`' || c == ',' || c == '"' || c == ';' || c == '#')
- return 1;
+ switch (c)
+ {
+ case '\'':
+ case '`':
+ case ',':
+ case '"':
+ case ';':
+ case '#':
+ /* Some initial-character constraints. */
+ return 1;
- /* Keywords can be identified by trailing colons too. */
- if (c == ':' || scm_i_symbol_ref (sym, len - 1) == ':')
- return quote_keywordish_symbols ();
+ case ':':
+ /* Symbols that look like keywords. */
+ return quote_keywordish_symbols ();
- /* Number-ish symbols. */
- if (scm_is_true (scm_i_string_to_number (scm_symbol_to_string (sym), 10)))
- return 1;
+ case '.':
+ /* Single dot conflicts with dotted-pair notation. */
+ if (len == 1)
+ return 1;
+ /* Fall through to check numbers. */
+ case '+':
+ case '-':
+ case '0':
+ case '1':
+ case '2':
+ case '3':
+ case '4':
+ case '5':
+ case '6':
+ case '7':
+ case '8':
+ case '9':
+ /* Number-ish symbols. Numbers with radixes already caught be #
+ above. */
+ if (scm_is_true (scm_i_string_to_number (scm_symbol_to_string (sym), 10)))
+ return 1;
+ break;
+
+ default:
+ break;
+ }
/* Other disallowed first characters. */
if (!uc_is_general_category_withtable (c, INITIAL_IDENTIFIER_MASK))
return 1;
+ /* Keywords can be identified by trailing colons too. */
+ if (scm_i_symbol_ref (sym, len - 1) == ':')
+ return quote_keywordish_symbols ();
+
/* Otherwise, any character that's in the identifier category mask is
fine to pass through as-is, provided it's not one of the ASCII
delimiters like `;'. */
static void
print_normal_symbol (SCM sym, SCM port)
{
- scm_display (scm_symbol_to_string (sym), port);
+ size_t len;
+ scm_t_string_failed_conversion_handler strategy;
+
+ len = scm_i_symbol_length (sym);
+ strategy = SCM_PTAB_ENTRY (port)->ilseq_handler;
+
+ if (scm_i_is_narrow_symbol (sym))
+ display_string (scm_i_symbol_chars (sym), 1, len, port, strategy);
+ else
+ display_string (scm_i_symbol_wide_chars (sym), 0, len, port, strategy);
}
static void
scm_t_string_failed_conversion_handler strategy;
len = scm_i_symbol_length (sym);
- strategy = scm_i_get_conversion_strategy (port);
+ strategy = PORT_CONVERSION_HANDLER (port);
scm_lfwrite_unlocked ("#{", 2, port);
}
/* FIXME: allow R6RS hex escapes instead of #{...}#. */
-void
-scm_i_print_symbol_name (SCM sym, SCM port)
+static void
+print_symbol (SCM sym, SCM port)
{
if (symbol_has_extended_read_syntax (sym))
print_extended_symbol (sym, port);
scm_print_symbol_name (const char *str, size_t len, SCM port)
{
SCM symbol = scm_from_utf8_symboln (str, len);
- scm_i_print_symbol_name (symbol, port);
+ print_symbol (symbol, port);
}
/* Print generally. Handles both write and display according to PSTATE.
else
{
if (!display_character (SCM_CHAR (exp), port,
- scm_i_get_conversion_strategy (port)))
+ PORT_CONVERSION_HANDLER (port)))
scm_encoding_error (__func__, errno,
"cannot convert to output locale",
port, exp);
printed = display_string (scm_i_string_data (exp),
scm_i_is_narrow_string (exp),
len, port,
- scm_i_get_conversion_strategy (port));
+ PORT_CONVERSION_HANDLER (port));
if (SCM_UNLIKELY (printed < len))
scm_encoding_error (__func__, errno,
"cannot convert to output locale",
case scm_tc7_symbol:
if (scm_i_symbol_is_interned (exp))
{
- scm_i_print_symbol_name (exp, port);
+ print_symbol (exp, port);
scm_remember_upto_here_1 (exp);
}
else
{
scm_puts_unlocked ("#<uninterned-symbol ", port);
- scm_i_print_symbol_name (exp, port);
+ print_symbol (exp, port);
scm_putc_unlocked (' ', port);
scm_uintprint (SCM_UNPACK (exp), 16, port);
scm_putc_unlocked ('>', port);
case scm_tc7_vm_cont:
scm_i_vm_cont_print (exp, port, pstate);
break;
- case scm_tc7_prompt:
- scm_i_prompt_print (exp, port, pstate);
- break;
- case scm_tc7_with_fluids:
- scm_i_with_fluids_print (exp, port, pstate);
- break;
case scm_tc7_array:
ENTER_NESTED_DATA (pstate, exp, circref);
scm_i_print_array (exp, port, pstate);
return len;
}
+/* Write STR to PORT as ISO-8859-1. STR is a LEN-codepoint string; it
+ is narrow if NARROW_P is true, wide otherwise. Return LEN. */
+static size_t
+display_string_as_latin1 (const void *str, int narrow_p, size_t len,
+ SCM port,
+ scm_t_string_failed_conversion_handler strategy)
+{
+ size_t printed = 0;
+
+ if (narrow_p)
+ {
+ scm_lfwrite_unlocked (str, len, port);
+ return len;
+ }
+
+ while (printed < len)
+ {
+ char buf[256];
+ size_t i;
+
+ for (i = 0; i < sizeof(buf) && printed < len; i++, printed++)
+ {
+ scm_t_wchar c = STR_REF (str, printed);
+
+ if (c < 256)
+ buf[i] = c;
+ else
+ break;
+ }
+
+ scm_lfwrite_unlocked (buf, i, port);
+
+ if (i < sizeof(buf) && printed < len)
+ {
+ if (strategy == SCM_FAILED_CONVERSION_ERROR)
+ break;
+ else if (strategy == SCM_FAILED_CONVERSION_ESCAPE_SEQUENCE)
+ write_character_escaped (STR_REF (str, printed), 1, port);
+ else
+ /* STRATEGY is `SCM_FAILED_CONVERSION_QUESTION_MARK'. */
+ display_string ("?", 1, 1, port, strategy);
+ printed++;
+ }
+ }
+
+ return printed;
+}
+
/* Convert STR through PORT's output conversion descriptor and write the
output to PORT. Return the number of codepoints written. */
static size_t
scm_t_string_failed_conversion_handler strategy)
{
size_t printed;
- scm_t_port *pt;
+ scm_t_iconv_descriptors *id;
+ scm_t_port_internal *pti = SCM_PORT_GET_INTERNAL (port);
- pt = SCM_PTAB_ENTRY (port);
+ id = scm_i_port_iconv_descriptors (port, SCM_PORT_WRITE);
+
+ if (SCM_UNLIKELY (pti->at_stream_start_for_bom_write && len > 0))
+ {
+ scm_t_port *pt = SCM_PTAB_ENTRY (port);
+
+ /* Record that we're no longer at stream start. */
+ pti->at_stream_start_for_bom_write = 0;
+ if (pt->rw_random)
+ pti->at_stream_start_for_bom_read = 0;
+
+ /* Write a BOM if appropriate. */
+ if (SCM_UNLIKELY (strcmp(pt->encoding, "UTF-16") == 0
+ || strcmp(pt->encoding, "UTF-32") == 0))
+ display_character (SCM_UNICODE_BOM, port, iconveh_error);
+ }
printed = 0;
output = encoded_output;
output_left = sizeof (encoded_output);
- done = iconv (pt->output_cd, &input, &input_left,
+ done = iconv (id->output_cd, &input, &input_left,
&output, &output_left);
output_len = sizeof (encoded_output) - output_left;
int errno_save = errno;
/* Reset the `iconv' state. */
- iconv (pt->output_cd, NULL, NULL, NULL, NULL);
+ iconv (id->output_cd, NULL, NULL, NULL, NULL);
/* Print the OUTPUT_LEN bytes successfully converted. */
scm_lfwrite_unlocked (encoded_output, output_len, port);
display_string (const void *str, int narrow_p,
size_t len, SCM port,
scm_t_string_failed_conversion_handler strategy)
-
{
- scm_t_port *pt;
-
- pt = SCM_PTAB_ENTRY (port);
+ scm_t_port_internal *pti;
- if (pt->output_cd == (iconv_t) -1)
- /* Initialize the conversion descriptors, if needed. */
- scm_i_set_port_encoding_x (port, pt->encoding);
+ pti = SCM_PORT_GET_INTERNAL (port);
- /* FIXME: In 2.1, add a flag to determine whether a port is UTF-8. */
- if (pt->output_cd == (iconv_t) -1)
+ if (pti->encoding_mode == SCM_PORT_ENCODING_MODE_UTF8)
return display_string_as_utf8 (str, narrow_p, len, port);
+ else if (pti->encoding_mode == SCM_PORT_ENCODING_MODE_LATIN1)
+ return display_string_as_latin1 (str, narrow_p, len, port, strategy);
else
- return display_string_using_iconv (str, narrow_p, len,
- port, strategy);
+ return display_string_using_iconv (str, narrow_p, len, port, strategy);
}
/* Attempt to display CH to PORT according to STRATEGY. Return non-zero
int printed = 0;
scm_t_string_failed_conversion_handler strategy;
- strategy = scm_i_get_conversion_strategy (port);
+ strategy = PORT_CONVERSION_HANDLER (port);
if (string_escapes_p)
{
write_character_escaped (ch, string_escapes_p, port);
}
+/* Display STR to PORT from START inclusive to END exclusive. */
+void
+scm_i_display_substring (SCM str, size_t start, size_t end, SCM port)
+{
+ int narrow_p;
+ const char *buf;
+ size_t len, printed;
+
+ buf = scm_i_string_data (str);
+ len = end - start;
+ narrow_p = scm_i_is_narrow_string (str);
+ buf += start * (narrow_p ? sizeof (char) : sizeof (scm_t_wchar));
+
+ printed = display_string (buf, narrow_p, end - start, port,
+ PORT_CONVERSION_HANDLER (port));
+
+ if (SCM_UNLIKELY (printed < len))
+ scm_encoding_error (__func__, errno,
+ "cannot convert to output locale",
+ port, scm_c_string_ref (str, printed + start));
+}
+
+\f
/* Print an integer.
*/
"@var{message} can contain @code{~A} (was @code{%s}) and\n"
"@code{~S} (was @code{%S}) escapes. When printed,\n"
"the escapes are replaced with corresponding members of\n"
- "@var{ARGS}:\n"
+ "@var{args}:\n"
"@code{~A} formats using @code{display} and @code{~S} formats\n"
"using @code{write}.\n"
"If @var{destination} is @code{#t}, then use the current output\n"
port = SCM_COERCE_OUTPORT (port);
if (!display_character (SCM_CHAR (chr), port,
- scm_i_get_conversion_strategy (port)))
+ PORT_CONVERSION_HANDLER (port)))
scm_encoding_error (__func__, errno,
"cannot convert to output locale",
port, chr);