/* Copyright (C) 1995-1999, 2000, 2001, 2002, 2003, 2004, 2006, 2008,
- * 2009, 2010, 2011, 2012 Free Software Foundation, Inc.
+ * 2009, 2010, 2011, 2012, 2013, 2014 Free Software Foundation, Inc.
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public License
#include "libguile/alist.h"
#include "libguile/struct.h"
#include "libguile/ports.h"
+#include "libguile/ports-internal.h"
#include "libguile/root.h"
#include "libguile/strings.h"
#include "libguile/strports.h"
/* Character printers. */
+#define PORT_CONVERSION_HANDLER(port) \
+ SCM_PTAB_ENTRY (port)->ilseq_handler
+
static size_t display_string (const void *, int, size_t, SCM,
scm_t_string_failed_conversion_handler);
"'reader' quotes them when the reader option 'keywords' is not '#f'." },
{ SCM_OPTION_BOOLEAN, "escape-newlines", 1,
"Render newlines as \\n when printing using `write'." },
+ { SCM_OPTION_BOOLEAN, "r7rs-symbols", 0,
+ "Escape symbols using R7RS |...| symbol notation." },
{ 0 },
};
(INITIAL_IDENTIFIER_MASK \
| UC_CATEGORY_MASK_Nd | UC_CATEGORY_MASK_Mc | UC_CATEGORY_MASK_Me)
+/* FIXME: Cache this information on the symbol, somehow. */
static int
symbol_has_extended_read_syntax (SCM sym)
{
c = scm_i_symbol_ref (sym, 0);
- /* Single dot; conflicts with dotted-pair notation. */
- if (len == 1 && c == '.')
- return 1;
-
- /* Other initial-character constraints. */
- if (c == '\'' || c == '`' || c == ',' || c == '"' || c == ';' || c == '#')
- return 1;
+ switch (c)
+ {
+ case '\'':
+ case '`':
+ case ',':
+ case '"':
+ case ';':
+ case '#':
+ /* Some initial-character constraints. */
+ return 1;
+
+ case '|':
+ case '\\':
+ /* R7RS allows neither '|' nor '\' in bare symbols. */
+ if (SCM_PRINT_R7RS_SYMBOLS_P)
+ return 1;
+ break;
- /* Keywords can be identified by trailing colons too. */
- if (c == ':' || scm_i_symbol_ref (sym, len - 1) == ':')
- return quote_keywordish_symbols ();
+ case ':':
+ /* Symbols that look like keywords. */
+ return quote_keywordish_symbols ();
- /* Number-ish symbols. */
- if (scm_is_true (scm_i_string_to_number (scm_symbol_to_string (sym), 10)))
- return 1;
+ case '.':
+ /* Single dot conflicts with dotted-pair notation. */
+ if (len == 1)
+ return 1;
+ /* Fall through to check numbers. */
+ case '+':
+ case '-':
+ case '0':
+ case '1':
+ case '2':
+ case '3':
+ case '4':
+ case '5':
+ case '6':
+ case '7':
+ case '8':
+ case '9':
+ /* Number-ish symbols. Numbers with radixes already caught be #
+ above. */
+ if (scm_is_true (scm_i_string_to_number (scm_symbol_to_string (sym), 10)))
+ return 1;
+ break;
+
+ default:
+ break;
+ }
/* Other disallowed first characters. */
if (!uc_is_general_category_withtable (c, INITIAL_IDENTIFIER_MASK))
return 1;
+ /* Keywords can be identified by trailing colons too. */
+ if (scm_i_symbol_ref (sym, len - 1) == ':')
+ return quote_keywordish_symbols ();
+
/* Otherwise, any character that's in the identifier category mask is
fine to pass through as-is, provided it's not one of the ASCII
delimiters like `;'. */
return 1;
else if (c == '"' || c == ';' || c == '#')
return 1;
+ else if ((c == '|' || c == '\\') && SCM_PRINT_R7RS_SYMBOLS_P)
+ /* R7RS allows neither '|' nor '\' in bare symbols. */
+ return 1;
}
return 0;
static void
print_normal_symbol (SCM sym, SCM port)
{
- scm_display (scm_symbol_to_string (sym), port);
+ size_t len;
+ scm_t_string_failed_conversion_handler strategy;
+
+ len = scm_i_symbol_length (sym);
+ strategy = SCM_PTAB_ENTRY (port)->ilseq_handler;
+
+ if (scm_i_is_narrow_symbol (sym))
+ display_string (scm_i_symbol_chars (sym), 1, len, port, strategy);
+ else
+ display_string (scm_i_symbol_wide_chars (sym), 0, len, port, strategy);
}
static void
scm_t_string_failed_conversion_handler strategy;
len = scm_i_symbol_length (sym);
- strategy = scm_i_get_conversion_strategy (port);
+ strategy = PORT_CONVERSION_HANDLER (port);
scm_lfwrite_unlocked ("#{", 2, port);
SUBSEQUENT_IDENTIFIER_MASK
| UC_CATEGORY_MASK_Zs))
{
- if (!display_character (c, port, strategy))
+ if (!display_character (c, port, strategy)
+ || (c == '\\' && !display_character (c, port, strategy)))
scm_encoding_error ("print_extended_symbol", errno,
"cannot convert to output locale",
port, SCM_MAKE_CHAR (c));
}
else
{
- display_string ("\\x", 1, 2, port, iconveh_question_mark);
+ scm_lfwrite_unlocked ("\\x", 2, port);
scm_intprint (c, 16, port);
- display_character (';', port, iconveh_question_mark);
+ scm_putc_unlocked (';', port);
}
}
scm_lfwrite_unlocked ("}#", 2, port);
}
-/* FIXME: allow R6RS hex escapes instead of #{...}#. */
-void
-scm_i_print_symbol_name (SCM sym, SCM port)
+static void
+print_r7rs_extended_symbol (SCM sym, SCM port)
{
- if (symbol_has_extended_read_syntax (sym))
- print_extended_symbol (sym, port);
- else
+ size_t pos, len;
+ scm_t_string_failed_conversion_handler strategy;
+
+ len = scm_i_symbol_length (sym);
+ strategy = PORT_CONVERSION_HANDLER (port);
+
+ scm_putc_unlocked ('|', port);
+
+ for (pos = 0; pos < len; pos++)
+ {
+ scm_t_wchar c = scm_i_symbol_ref (sym, pos);
+
+ switch (c)
+ {
+ case '\a': scm_lfwrite_unlocked ("\\a", 2, port); break;
+ case '\b': scm_lfwrite_unlocked ("\\b", 2, port); break;
+ case '\t': scm_lfwrite_unlocked ("\\t", 2, port); break;
+ case '\n': scm_lfwrite_unlocked ("\\n", 2, port); break;
+ case '\r': scm_lfwrite_unlocked ("\\r", 2, port); break;
+ case '|': scm_lfwrite_unlocked ("\\|", 2, port); break;
+ case '\\': scm_lfwrite_unlocked ("\\x5c;", 5, port); break;
+ default:
+ if (uc_is_general_category_withtable (c,
+ UC_CATEGORY_MASK_L
+ | UC_CATEGORY_MASK_M
+ | UC_CATEGORY_MASK_N
+ | UC_CATEGORY_MASK_P
+ | UC_CATEGORY_MASK_S)
+ || (c == ' '))
+ {
+ if (!display_character (c, port, strategy))
+ scm_encoding_error ("print_r7rs_extended_symbol", errno,
+ "cannot convert to output locale",
+ port, SCM_MAKE_CHAR (c));
+ }
+ else
+ {
+ scm_lfwrite_unlocked ("\\x", 2, port);
+ scm_intprint (c, 16, port);
+ scm_putc_unlocked (';', port);
+ }
+ break;
+ }
+ }
+
+ scm_putc_unlocked ('|', port);
+}
+
+/* FIXME: allow R6RS hex escapes instead of #{...}# or |...|. */
+static void
+print_symbol (SCM sym, SCM port)
+{
+ if (!symbol_has_extended_read_syntax (sym))
print_normal_symbol (sym, port);
+ else if (SCM_PRINT_R7RS_SYMBOLS_P)
+ print_r7rs_extended_symbol (sym, port);
+ else
+ print_extended_symbol (sym, port);
}
void
scm_print_symbol_name (const char *str, size_t len, SCM port)
{
SCM symbol = scm_from_utf8_symboln (str, len);
- scm_i_print_symbol_name (symbol, port);
+ print_symbol (symbol, port);
}
/* Print generally. Handles both write and display according to PSTATE.
else
{
if (!display_character (SCM_CHAR (exp), port,
- scm_i_get_conversion_strategy (port)))
+ PORT_CONVERSION_HANDLER (port)))
scm_encoding_error (__func__, errno,
"cannot convert to output locale",
port, exp);
break;
}
break;
+ case scm_tc7_stringbuf:
+ scm_i_print_stringbuf (exp, port, pstate);
+ break;
case scm_tc7_string:
if (SCM_WRITINGP (pstate))
{
printed = display_string (scm_i_string_data (exp),
scm_i_is_narrow_string (exp),
len, port,
- scm_i_get_conversion_strategy (port));
+ PORT_CONVERSION_HANDLER (port));
if (SCM_UNLIKELY (printed < len))
scm_encoding_error (__func__, errno,
"cannot convert to output locale",
case scm_tc7_symbol:
if (scm_i_symbol_is_interned (exp))
{
- scm_i_print_symbol_name (exp, port);
+ print_symbol (exp, port);
scm_remember_upto_here_1 (exp);
}
else
{
scm_puts_unlocked ("#<uninterned-symbol ", port);
- scm_i_print_symbol_name (exp, port);
+ print_symbol (exp, port);
scm_putc_unlocked (' ', port);
scm_uintprint (SCM_UNPACK (exp), 16, port);
scm_putc_unlocked ('>', port);
case scm_tc7_frame:
scm_i_frame_print (exp, port, pstate);
break;
- case scm_tc7_objcode:
- scm_i_objcode_print (exp, port, pstate);
- break;
- case scm_tc7_vm:
- scm_i_vm_print (exp, port, pstate);
- break;
case scm_tc7_vm_cont:
scm_i_vm_cont_print (exp, port, pstate);
break;
- case scm_tc7_prompt:
- scm_i_prompt_print (exp, port, pstate);
- break;
- case scm_tc7_with_fluids:
- scm_i_with_fluids_print (exp, port, pstate);
- break;
case scm_tc7_array:
ENTER_NESTED_DATA (pstate, exp, circref);
scm_i_print_array (exp, port, pstate);
return len;
}
+/* Write STR to PORT as ISO-8859-1. STR is a LEN-codepoint string; it
+ is narrow if NARROW_P is true, wide otherwise. Return LEN. */
+static size_t
+display_string_as_latin1 (const void *str, int narrow_p, size_t len,
+ SCM port,
+ scm_t_string_failed_conversion_handler strategy)
+{
+ size_t printed = 0;
+
+ if (narrow_p)
+ {
+ scm_lfwrite_unlocked (str, len, port);
+ return len;
+ }
+
+ while (printed < len)
+ {
+ char buf[256];
+ size_t i;
+
+ for (i = 0; i < sizeof(buf) && printed < len; i++, printed++)
+ {
+ scm_t_wchar c = STR_REF (str, printed);
+
+ if (c < 256)
+ buf[i] = c;
+ else
+ break;
+ }
+
+ scm_lfwrite_unlocked (buf, i, port);
+
+ if (i < sizeof(buf) && printed < len)
+ {
+ if (strategy == SCM_FAILED_CONVERSION_ERROR)
+ break;
+ else if (strategy == SCM_FAILED_CONVERSION_ESCAPE_SEQUENCE)
+ write_character_escaped (STR_REF (str, printed), 1, port);
+ else
+ /* STRATEGY is `SCM_FAILED_CONVERSION_QUESTION_MARK'. */
+ display_string ("?", 1, 1, port, strategy);
+ printed++;
+ }
+ }
+
+ return printed;
+}
+
/* Convert STR through PORT's output conversion descriptor and write the
output to PORT. Return the number of codepoints written. */
static size_t
{
size_t printed;
scm_t_iconv_descriptors *id;
+ scm_t_port_internal *pti = SCM_PORT_GET_INTERNAL (port);
+
+ id = scm_i_port_iconv_descriptors (port, SCM_PORT_WRITE);
- id = scm_i_port_iconv_descriptors (port);
+ if (SCM_UNLIKELY (pti->at_stream_start_for_bom_write && len > 0))
+ {
+ scm_t_port *pt = SCM_PTAB_ENTRY (port);
+
+ /* Record that we're no longer at stream start. */
+ pti->at_stream_start_for_bom_write = 0;
+ if (pt->rw_random)
+ pti->at_stream_start_for_bom_read = 0;
+
+ /* Write a BOM if appropriate. */
+ if (SCM_UNLIKELY (strcmp(pt->encoding, "UTF-16") == 0
+ || strcmp(pt->encoding, "UTF-32") == 0))
+ display_character (SCM_UNICODE_BOM, port, iconveh_error);
+ }
printed = 0;
display_string (const void *str, int narrow_p,
size_t len, SCM port,
scm_t_string_failed_conversion_handler strategy)
-
{
- scm_t_port *pt;
+ scm_t_port_internal *pti;
- pt = SCM_PTAB_ENTRY (port);
+ pti = SCM_PORT_GET_INTERNAL (port);
- if (pt->encoding_mode == SCM_PORT_ENCODING_MODE_UTF8)
+ if (pti->encoding_mode == SCM_PORT_ENCODING_MODE_UTF8)
return display_string_as_utf8 (str, narrow_p, len, port);
+ else if (pti->encoding_mode == SCM_PORT_ENCODING_MODE_LATIN1)
+ return display_string_as_latin1 (str, narrow_p, len, port, strategy);
else
- return display_string_using_iconv (str, narrow_p, len,
- port, strategy);
+ return display_string_using_iconv (str, narrow_p, len, port, strategy);
}
/* Attempt to display CH to PORT according to STRATEGY. Return non-zero
int printed = 0;
scm_t_string_failed_conversion_handler strategy;
- strategy = scm_i_get_conversion_strategy (port);
+ strategy = PORT_CONVERSION_HANDLER (port);
if (string_escapes_p)
{
write_character_escaped (ch, string_escapes_p, port);
}
+/* Display STR to PORT from START inclusive to END exclusive. */
+void
+scm_i_display_substring (SCM str, size_t start, size_t end, SCM port)
+{
+ int narrow_p;
+ const char *buf;
+ size_t len, printed;
+
+ buf = scm_i_string_data (str);
+ len = end - start;
+ narrow_p = scm_i_is_narrow_string (str);
+ buf += start * (narrow_p ? sizeof (char) : sizeof (scm_t_wchar));
+
+ printed = display_string (buf, narrow_p, end - start, port,
+ PORT_CONVERSION_HANDLER (port));
+
+ if (SCM_UNLIKELY (printed < len))
+ scm_encoding_error (__func__, errno,
+ "cannot convert to output locale",
+ port, scm_c_string_ref (str, printed + start));
+}
+
+\f
/* Print an integer.
*/
port = SCM_COERCE_OUTPORT (port);
if (!display_character (SCM_CHAR (chr), port,
- scm_i_get_conversion_strategy (port)))
+ PORT_CONVERSION_HANDLER (port)))
scm_encoding_error (__func__, errno,
"cannot convert to output locale",
port, chr);