simplify inline function infrastructure
[bpt/guile.git] / libguile / print.c
index ea439a6..c2dcd28 100644 (file)
@@ -1,5 +1,6 @@
-/* Copyright (C) 1995-1999,2000,2001, 2002, 2003, 2004, 2006, 2008, 2009 Free Software Foundation, Inc.
- * 
+/* Copyright (C) 1995-1999, 2000, 2001, 2002, 2003, 2004, 2006, 2008,
+ *   2009, 2010, 2011, 2012 Free Software Foundation, Inc.
+ *
  * This library is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public License
  * as published by the Free Software Foundation; either version 3 of
 #endif
 
 #include <errno.h>
+#include <iconv.h>
+#include <stdio.h>
+#include <assert.h>
+
 #include <uniconv.h>
 #include <unictype.h>
 
@@ -30,6 +35,7 @@
 #include "libguile/chars.h"
 #include "libguile/continuations.h"
 #include "libguile/smob.h"
+#include "libguile/control.h"
 #include "libguile/eval.h"
 #include "libguile/macros.h"
 #include "libguile/procprop.h"
@@ -43,8 +49,8 @@
 #include "libguile/strings.h"
 #include "libguile/strports.h"
 #include "libguile/vectors.h"
-#include "libguile/lang.h"
 #include "libguile/numbers.h"
+#include "libguile/vm.h"
 
 #include "libguile/validate.h"
 #include "libguile/print.h"
 
 \f
 
+/* Character printers.  */
+
+static size_t display_string (const void *, int, size_t, SCM,
+                             scm_t_string_failed_conversion_handler);
+
+static int display_character (scm_t_wchar, SCM,
+                             scm_t_string_failed_conversion_handler);
+
+static void write_character (scm_t_wchar, SCM, int);
+
+static void write_character_escaped (scm_t_wchar, int, SCM);
+
+\f
+
 /* {Names of immediate symbols}
  * 
  * This table must agree with the declarations in scm.h: {Immediate Symbols}.
@@ -66,7 +86,9 @@ static const char *iflagnames[] =
   "#<XXX UNUSED LISP FALSE -- DO NOT USE -- SHOULD NEVER BE SEEN XXX>",
   "()",
   "#t",
-  "#<XXX UNUSED BOOLEAN -- DO NOT USE -- SHOULD NEVER BE SEEN XXX>",
+  "#<XXX UNUSED BOOLEAN 0 -- DO NOT USE -- SHOULD NEVER BE SEEN XXX>",
+  "#<XXX UNUSED BOOLEAN 1 -- DO NOT USE -- SHOULD NEVER BE SEEN XXX>",
+  "#<XXX UNUSED BOOLEAN 2 -- DO NOT USE -- SHOULD NEVER BE SEEN XXX>",
   "#<unspecified>",
   "#<undefined>",
   "#<eof>",
@@ -78,21 +100,17 @@ static const char *iflagnames[] =
 SCM_SYMBOL (sym_reader, "reader");
 
 scm_t_option scm_print_opts[] = {
-  { SCM_OPTION_SCM, "closure-hook", SCM_UNPACK (SCM_BOOL_F),
-    "Hook for printing closures (should handle macros as well)." },
-  { SCM_OPTION_BOOLEAN, "source", 0,
-    "Print closures with source." },
-  { SCM_OPTION_SCM, "highlight-prefix", (unsigned long)SCM_BOOL_F,
+  { SCM_OPTION_SCM, "highlight-prefix", (scm_t_bits)SCM_BOOL_F_BITS,
     "The string to print before highlighted values." },
-  { SCM_OPTION_SCM, "highlight-suffix", (unsigned long)SCM_BOOL_F,
+  { SCM_OPTION_SCM, "highlight-suffix", (scm_t_bits)SCM_BOOL_F_BITS,
     "The string to print after highlighted values." },
-  { SCM_OPTION_SCM, "quote-keywordish-symbols", (unsigned long)SCM_BOOL_F,
+  { SCM_OPTION_SCM, "quote-keywordish-symbols", (scm_t_bits)SCM_BOOL_F_BITS,
     "How to print symbols that have a colon as their first or last character. "
     "The value '#f' does not quote the colons; '#t' quotes them; "
-    "'reader' quotes them when the reader option 'keywords' is not '#f'." 
-  },
+    "'reader' quotes them when the reader option 'keywords' is not '#f'." },
+  { SCM_OPTION_BOOLEAN, "escape-newlines", 1,
+    "Render newlines as \\n when printing using `write'." },
   { 0 },
-  
 };
 
 SCM_DEFINE (scm_print_options, "print-options-interface", 0, 1, 0, 
@@ -292,15 +310,10 @@ print_circref (SCM port, scm_print_state *pstate, SCM ref)
 /* Print the name of a symbol. */
 
 static int
-quote_keywordish_symbol (SCM symbol)
+quote_keywordish_symbols (void)
 {
-  SCM option;
+  SCM option = SCM_PRINT_KEYWORD_STYLE;
 
-  if (scm_i_symbol_ref (symbol, 0) != ':'
-      && scm_i_symbol_ref (symbol, scm_i_symbol_length (symbol) - 1) !=  ':')
-    return 0;
-
-  option = SCM_PRINT_KEYWORD_STYLE;
   if (scm_is_false (option))
     return 0;
   if (scm_is_eq (option, sym_reader))
@@ -308,98 +321,121 @@ quote_keywordish_symbol (SCM symbol)
   return 1;
 }
 
-void
-scm_i_print_symbol_name (SCM str, SCM port)
+#define INITIAL_IDENTIFIER_MASK                                      \
+  (UC_CATEGORY_MASK_Lu | UC_CATEGORY_MASK_Ll | UC_CATEGORY_MASK_Lt   \
+   | UC_CATEGORY_MASK_Lm | UC_CATEGORY_MASK_Lo | UC_CATEGORY_MASK_Mn \
+   | UC_CATEGORY_MASK_Nl | UC_CATEGORY_MASK_No | UC_CATEGORY_MASK_Pd \
+   | UC_CATEGORY_MASK_Pc | UC_CATEGORY_MASK_Po | UC_CATEGORY_MASK_Sc \
+   | UC_CATEGORY_MASK_Sm | UC_CATEGORY_MASK_Sk | UC_CATEGORY_MASK_So \
+   | UC_CATEGORY_MASK_Co)
+
+#define SUBSEQUENT_IDENTIFIER_MASK                                      \
+  (INITIAL_IDENTIFIER_MASK                                              \
+   | UC_CATEGORY_MASK_Nd | UC_CATEGORY_MASK_Mc | UC_CATEGORY_MASK_Me)
+
+static int
+symbol_has_extended_read_syntax (SCM sym)
 {
-  /* This points to the first character that has not yet been written to the
-   * port. */
-  size_t pos = 0;
-  /* This points to the character we're currently looking at. */
-  size_t end;
-  /* If the name contains weird characters, we'll escape them with
-   * backslashes and set this flag; it indicates that we should surround the
-   * name with "#{" and "}#". */
-  int weird = 0;
-  /* Backslashes are not sufficient to make a name weird, but if a name is
-   * weird because of other characters, backslahes need to be escaped too.
-   * The first time we see a backslash, we set maybe_weird, and mw_pos points
-   * to the backslash.  Then if the name turns out to be weird, we re-process
-   * everything starting from mw_pos.
-   * We could instead make backslashes always weird.  This is not necessary
-   * to ensure that the output is (read)-able, but it would make this code
-   * simpler and faster. */
-  int maybe_weird = 0;
-  size_t mw_pos = 0;
-  size_t len = scm_i_symbol_length (str);
-  scm_t_wchar str0 = scm_i_symbol_ref (str, 0);
-
-  if (len == 0 || str0 == '\'' || str0 == '`' || str0 == ','
-      || quote_keywordish_symbol (str) 
-      || (str0 == '.' && len == 1)
-      || scm_is_true (scm_i_string_to_number (scm_symbol_to_string (str), 10)))
+  size_t pos, len = scm_i_symbol_length (sym);
+  scm_t_wchar c;
+
+  /* The empty symbol.  */
+  if (len == 0)
+    return 1;
+
+  c = scm_i_symbol_ref (sym, 0);
+
+  /* Single dot; conflicts with dotted-pair notation.  */
+  if (len == 1 && c == '.')
+    return 1;
+
+  /* Other initial-character constraints.  */
+  if (c == '\'' || c == '`' || c == ',' || c == '"' || c == ';' || c == '#')
+    return 1;
+  
+  /* Keywords can be identified by trailing colons too.  */
+  if (c == ':' || scm_i_symbol_ref (sym, len - 1) == ':')
+    return quote_keywordish_symbols ();
+  
+  /* Number-ish symbols.  */
+  if (scm_is_true (scm_i_string_to_number (scm_symbol_to_string (sym), 10)))
+    return 1;
+  
+  /* Other disallowed first characters.  */
+  if (!uc_is_general_category_withtable (c, INITIAL_IDENTIFIER_MASK))
+    return 1;
+
+  /* Otherwise, any character that's in the identifier category mask is
+     fine to pass through as-is, provided it's not one of the ASCII
+     delimiters like `;'.  */
+  for (pos = 1; pos < len; pos++)
     {
-      scm_lfwrite ("#{", 2, port);
-      weird = 1;
+      c = scm_i_symbol_ref (sym, pos);
+      if (!uc_is_general_category_withtable (c, SUBSEQUENT_IDENTIFIER_MASK))
+        return 1;
+      else if (c == '"' || c == ';' || c == '#')
+        return 1;
     }
 
-  for (end = pos; end < len; ++end)
-    switch (scm_i_symbol_ref (str, end))
-      {
-#ifdef BRACKETS_AS_PARENS
-      case '[':
-      case ']':
-#endif
-      case '(':
-      case ')':
-      case '"':
-      case ';':
-      case '#':
-      case SCM_WHITE_SPACES:
-      case SCM_LINE_INCREMENTORS:
-      weird_handler:
-       if (maybe_weird)
-         {
-           end = mw_pos;
-           maybe_weird = 0;
-         }
-       if (!weird)
-         {
-           scm_lfwrite ("#{", 2, port);
-           weird = 1;
-         }
-       if (pos < end)
-         scm_lfwrite_substr (scm_symbol_to_string (str), pos, end, port);
-       {
-         char buf[2];
-         buf[0] = '\\';
-         buf[1] = (char) (unsigned char) scm_i_symbol_ref (str, end);
-         scm_lfwrite (buf, 2, port);
-       }
-       pos = end + 1;
-       break;
-      case '\\':
-       if (weird)
-         goto weird_handler;
-       if (!maybe_weird)
-         {
-           maybe_weird = 1;
-           mw_pos = pos;
-         }
-       break;
-      default:
-       break;
-      }
-  if (pos < end)
-    scm_lfwrite_substr (scm_symbol_to_string (str), pos, end, port);
-  if (weird)
-    scm_lfwrite ("}#", 2, port);
+  return 0;
+}
+
+static void
+print_normal_symbol (SCM sym, SCM port)
+{
+  scm_display (scm_symbol_to_string (sym), port);
+}
+
+static void
+print_extended_symbol (SCM sym, SCM port)
+{
+  size_t pos, len;
+  scm_t_string_failed_conversion_handler strategy;
+
+  len = scm_i_symbol_length (sym);
+  strategy = scm_i_get_conversion_strategy (port);
+
+  scm_lfwrite ("#{", 2, port);
+
+  for (pos = 0; pos < len; pos++)
+    {
+      scm_t_wchar c = scm_i_symbol_ref (sym, pos);
+      
+      if (uc_is_general_category_withtable (c,
+                                            SUBSEQUENT_IDENTIFIER_MASK
+                                            | UC_CATEGORY_MASK_Zs))
+        {
+          if (!display_character (c, port, strategy))
+            scm_encoding_error ("print_extended_symbol", errno,
+                                "cannot convert to output locale",
+                                port, SCM_MAKE_CHAR (c));
+        }
+      else
+        {
+          display_string ("\\x", 1, 2, port, iconveh_question_mark);
+          scm_intprint (c, 16, port);
+          display_character (';', port, iconveh_question_mark);
+        }
+    }
+
+  scm_lfwrite ("}#", 2, port);
+}
+
+/* FIXME: allow R6RS hex escapes instead of #{...}#.  */
+void
+scm_i_print_symbol_name (SCM sym, SCM port)
+{
+  if (symbol_has_extended_read_syntax (sym))
+    print_extended_symbol (sym, port);
+  else
+    print_normal_symbol (sym, port);
 }
 
 void
 scm_print_symbol_name (const char *str, size_t len, SCM port)
 {
   SCM symbol = scm_from_locale_symboln (str, len);
-  return scm_i_print_symbol_name (symbol, port);
+  scm_i_print_symbol_name (symbol, port);
 }
 
 /* Print generally.  Handles both write and display according to PSTATE.
@@ -409,6 +445,22 @@ SCM_GPROC(s_display, "display", 1, 1, 0, scm_display, g_display);
 
 static void iprin1 (SCM exp, SCM port, scm_print_state *pstate);
 
+
+/* Print a character as an octal or hex escape.  */
+#define PRINT_CHAR_ESCAPE(i, port)              \
+  do                                            \
+    {                                           \
+      if (!SCM_R6RS_ESCAPES_P)                  \
+        scm_intprint (i, 8, port);              \
+      else                                      \
+        {                                       \
+          scm_puts ("x", port);                 \
+          scm_intprint (i, 16, port);           \
+        }                                       \
+    }                                           \
+  while (0)
+
+  
 void 
 scm_iprin1 (SCM exp, SCM port, scm_print_state *pstate)
 {
@@ -443,79 +495,16 @@ iprin1 (SCM exp, SCM port, scm_print_state *pstate)
     case scm_tc3_imm24:
       if (SCM_CHARP (exp))
        {
-         scm_t_wchar i = SCM_CHAR (exp);
-          const char *name;
-
          if (SCM_WRITINGP (pstate))
+           write_character (SCM_CHAR (exp), port, 0);
+         else
            {
-             scm_puts ("#\\", port);
-             name = scm_i_charname (exp);
-             if (name != NULL)
-               scm_puts (name, port);
-             else if (uc_is_general_category_withtable (i, UC_CATEGORY_MASK_L
-                                                         | UC_CATEGORY_MASK_M 
-                                                         | UC_CATEGORY_MASK_N 
-                                                         | UC_CATEGORY_MASK_P 
-                                                         | UC_CATEGORY_MASK_S))
-                /* Print the character if is graphic character.  */
-                {
-                  scm_t_wchar *wbuf;
-                  SCM wstr;
-                  char *buf;
-                  size_t len;
-                  const char *enc;
-
-                  enc = scm_i_get_port_encoding (port);
-                  if (uc_combining_class (i) == UC_CCC_NR)
-                    {
-                      wstr = scm_i_make_wide_string (1, &wbuf);
-                      wbuf[0] = i;
-                    }
-                  else
-                    {
-                      /* Character is a combining character: print it connected
-                         to a dotted circle instead of connecting it to the 
-                         backslash in '#\'  */
-                      wstr = scm_i_make_wide_string (2, &wbuf);
-                      wbuf[0] = SCM_CODEPOINT_DOTTED_CIRCLE;
-                      wbuf[1] = i;
-                    }
-                  if (enc == NULL)
-                    {
-                      if (i <= 0xFF)
-                        /* Character is graphic and Latin-1.  Print it  */
-                        scm_lfwrite_str (wstr, port);
-                      else
-                        /* Character is graphic but unrepresentable in
-                           this port's encoding.  */
-                        scm_intprint (i, 8, port);
-                    }
-                  else
-                    {
-                      buf = u32_conv_to_encoding (enc, 
-                                                  iconveh_error,
-                                                  (scm_t_uint32 *) wbuf, 
-                                                  1,
-                                                  NULL,
-                                                  NULL, &len);
-                      if (buf != NULL)
-                        {
-                          /* Character is graphic.  Print it.  */
-                          scm_lfwrite_str (wstr, port);
-                          free (buf);
-                        }
-                      else
-                        /* Character is graphic but unrepresentable in
-                           this port's encoding.  */
-                        scm_intprint (i, 8, port);
-                    }
-                }
-              else
-                /* Character is a non-graphical character.  */
-                scm_intprint (i, 8, port);
+             if (!display_character (SCM_CHAR (exp), port,
+                                     scm_i_get_conversion_strategy (port)))
+               scm_encoding_error (__func__, errno,
+                                   "cannot convert to output locale",
+                                   port, exp);
            }
-         else
-           scm_i_charprint (i, port);
        }
       else if (SCM_IFLAGP (exp)
               && ((size_t) SCM_IFLAGNUM (exp) < (sizeof iflagnames / sizeof (char *))))
@@ -537,7 +526,7 @@ iprin1 (SCM exp, SCM port, scm_print_state *pstate)
            if (SCM_OBJ_CLASS_FLAGS (exp) & SCM_CLASSF_GOOPS)
              {
                SCM pwps, print = pstate->writingp ? g_write : g_display;
-               if (!print)
+               if (SCM_UNPACK (print) == 0)
                  goto print_struct;
                pwps = scm_i_port_with_print_state (port, pstate->handle);
                pstate->revealed = 1;
@@ -579,113 +568,31 @@ iprin1 (SCM exp, SCM port, scm_print_state *pstate)
         case scm_tc7_string:
           if (SCM_WRITINGP (pstate))
             {
-              size_t i, j, len;
-              static char const hex[] = "0123456789abcdef";
-              char buf[8];
+              size_t len, i;
 
-
-              scm_putc ('"', port);
+              display_character ('"', port, iconveh_question_mark);
               len = scm_i_string_length (exp);
               for (i = 0; i < len; ++i)
-                {
-                  scm_t_wchar ch = scm_i_string_ref (exp, i);
-                  int printed = 0;
-
-                  if (ch == ' ' || ch == '\n')
-                    {
-                      scm_putc (ch, port);
-                      printed = 1;
-                    }
-                  else if (ch == '"' || ch == '\\')
-                    {
-                      scm_putc ('\\', port);
-                      scm_i_charprint (ch, port);
-                      printed = 1;
-                    }
-                  else
-                    if (uc_is_general_category_withtable
-                        (ch,
-                         UC_CATEGORY_MASK_L | UC_CATEGORY_MASK_M |
-                         UC_CATEGORY_MASK_N | UC_CATEGORY_MASK_P |
-                         UC_CATEGORY_MASK_S))
-                    {
-                      /* Print the character since it is a graphic
-                         character.  */
-                      scm_t_wchar *wbuf;
-                      SCM wstr = scm_i_make_wide_string (1, &wbuf);
-                      char *buf;
-                      size_t len;
-                      
-                      if (scm_i_get_port_encoding (port))
-                        {
-                          wstr = scm_i_make_wide_string (1, &wbuf);
-                          wbuf[0] = ch;
-                          buf = u32_conv_to_encoding (scm_i_get_port_encoding (port), 
-                                                      iconveh_error,
-                                                      (scm_t_uint32 *) wbuf, 
-                                                      1   ,
-                                                      NULL,
-                                                      NULL, &len);
-                          if (buf != NULL)
-                            {
-                              /* Character is graphic and representable in
-                                 this encoding.  Print it.  */
-                              scm_lfwrite_str (wstr, port);
-                              free (buf);
-                              printed = 1;
-                            }
-                        }
-                      else
-                        if (ch <= 0xFF)
-                          {
-                            scm_putc (ch, port);
-                            printed = 1;
-                          }
-                    }
-
-                  if (!printed)
-                    {
-                      /* Character is graphic but unrepresentable in
-                         this port's encoding or is not graphic.  */
-                      if (ch <= 0xFF)
-                        {
-                          buf[0] = '\\';
-                          buf[1] = 'x';
-                          buf[2] = hex[ch / 16];
-                          buf[3] = hex[ch % 16];
-                          scm_lfwrite (buf, 4, port);
-                        }
-                      else if (ch <= 0xFFFF)
-                        {
-                          buf[0] = '\\';
-                          buf[1] = 'u';
-                          buf[2] = hex[(ch & 0xF000) >> 12];
-                          buf[3] = hex[(ch & 0xF00) >> 8];
-                          buf[4] = hex[(ch & 0xF0) >> 4];
-                          buf[5] = hex[(ch & 0xF)];
-                          scm_lfwrite (buf, 6, port);
-                          j = i + 1;
-                        }
-                      else if (ch > 0xFFFF)
-                        {
-                          buf[0] = '\\';
-                          buf[1] = 'U';
-                          buf[2] = hex[(ch & 0xF00000) >> 20];
-                          buf[3] = hex[(ch & 0xF0000) >> 16];
-                          buf[4] = hex[(ch & 0xF000) >> 12];
-                          buf[5] = hex[(ch & 0xF00) >> 8];
-                          buf[6] = hex[(ch & 0xF0) >> 4];
-                          buf[7] = hex[(ch & 0xF)];
-                          scm_lfwrite (buf, 8, port);
-                          j = i + 1;
-                        }
-                    }
-                }
-              scm_putc ('"', port);
+               write_character (scm_i_string_ref (exp, i), port, 1);
+
+              display_character ('"', port, iconveh_question_mark);
               scm_remember_upto_here_1 (exp);
             }
           else
-            scm_lfwrite_str (exp, port);
+           {
+             size_t len, printed;
+
+             len = scm_i_string_length (exp);
+             printed = display_string (scm_i_string_data (exp),
+                                       scm_i_is_narrow_string (exp),
+                                       len, port,
+                                       scm_i_get_conversion_strategy (port));
+             if (SCM_UNLIKELY (printed < len))
+               scm_encoding_error (__func__, errno,
+                                   "cannot convert to output locale",
+                                   port, scm_c_string_ref (exp, printed));
+           }
+
           scm_remember_upto_here_1 (exp);
           break;
        case scm_tc7_symbol:
@@ -709,9 +616,47 @@ iprin1 (SCM exp, SCM port, scm_print_state *pstate)
        case scm_tc7_program:
          scm_i_program_print (exp, port, pstate);
          break;
+       case scm_tc7_pointer:
+         scm_i_pointer_print (exp, port, pstate);
+         break;
        case scm_tc7_hashtable:
          scm_i_hashtable_print (exp, port, pstate);
          break;
+       case scm_tc7_fluid:
+         scm_i_fluid_print (exp, port, pstate);
+         break;
+       case scm_tc7_dynamic_state:
+         scm_i_dynamic_state_print (exp, port, pstate);
+         break;
+       case scm_tc7_frame:
+         scm_i_frame_print (exp, port, pstate);
+         break;
+       case scm_tc7_objcode:
+         scm_i_objcode_print (exp, port, pstate);
+         break;
+       case scm_tc7_vm:
+         scm_i_vm_print (exp, port, pstate);
+         break;
+       case scm_tc7_vm_cont:
+         scm_i_vm_cont_print (exp, port, pstate);
+         break;
+       case scm_tc7_prompt:
+         scm_i_prompt_print (exp, port, pstate);
+         break;
+       case scm_tc7_with_fluids:
+         scm_i_with_fluids_print (exp, port, pstate);
+         break;
+       case scm_tc7_array:
+         ENTER_NESTED_DATA (pstate, exp, circref);
+          scm_i_print_array (exp, port, pstate);
+          EXIT_NESTED_DATA (pstate);
+          break;
+       case scm_tc7_bytevector:
+         scm_i_print_bytevector (exp, port, pstate);
+         break;
+       case scm_tc7_bitvector:
+         scm_i_print_bitvector (exp, port, pstate);
+         break;
        case scm_tc7_wvect:
          ENTER_NESTED_DATA (pstate, exp, circref);
          if (SCM_IS_WHVEC (exp))
@@ -719,10 +664,6 @@ iprin1 (SCM exp, SCM port, scm_print_state *pstate)
          else
            scm_puts ("#w(", port);
          goto common_vector_printer;
-
-       case scm_tc7_bytevector:
-         scm_i_print_bytevector (exp, port, pstate);
-         break;
        case scm_tc7_vector:
          ENTER_NESTED_DATA (pstate, exp, circref);
          scm_puts ("#(", port);
@@ -768,29 +709,6 @@ iprin1 (SCM exp, SCM port, scm_print_state *pstate)
          }
          EXIT_NESTED_DATA (pstate);
          break;
-       case scm_tc7_gsubr:
-         {
-           SCM name = scm_symbol_to_string (SCM_SUBR_NAME (exp));
-           scm_puts (SCM_SUBR_GENERIC (exp)
-                     ? "#<primitive-generic "
-                     : "#<primitive-procedure ",
-                     port);
-           scm_lfwrite_str (name, port);
-           scm_putc ('>', port);
-           break;
-         }
-       case scm_tc7_pws:
-         scm_puts ("#<procedure-with-setter", port);
-         {
-           SCM name = scm_procedure_name (exp);
-           if (scm_is_true (name))
-             {
-               scm_putc (' ', port);
-               scm_display (name, port);
-             }
-         }
-         scm_putc ('>', port);
-         break;
        case scm_tc7_port:
          {
            register long i = SCM_PTOBNUM (exp);
@@ -871,16 +789,379 @@ scm_prin1 (SCM exp, SCM port, int writingp)
     }
 }
 
-/* Print a character.
- */
-void
-scm_i_charprint (scm_t_wchar ch, SCM port)
+/* Convert codepoint CH to UTF-8 and store the result in UTF8.  Return
  the number of bytes of the UTF-8-encoded string.  */
+static size_t
+codepoint_to_utf8 (scm_t_wchar ch, scm_t_uint8 utf8[4])
 {
-  scm_t_wchar *wbuf;
-  SCM wstr = scm_i_make_wide_string (1, &wbuf);
+  size_t len;
+  scm_t_uint32 codepoint;
+
+  codepoint = (scm_t_uint32) ch;
 
-  wbuf[0] = ch;
-  scm_lfwrite_str (wstr, port);
+  if (codepoint <= 0x7f)
+    {
+      len = 1;
+      utf8[0] = (scm_t_uint8) codepoint;
+    }
+  else if (codepoint <= 0x7ffUL)
+    {
+      len = 2;
+      utf8[0] = 0xc0 | (codepoint >> 6);
+      utf8[1] = 0x80 | (codepoint & 0x3f);
+    }
+  else if (codepoint <= 0xffffUL)
+    {
+      len = 3;
+      utf8[0] = 0xe0 | (codepoint >> 12);
+      utf8[1] = 0x80 | ((codepoint >> 6) & 0x3f);
+      utf8[2] = 0x80 | (codepoint & 0x3f);
+    }
+  else
+    {
+      len = 4;
+      utf8[0] = 0xf0 | (codepoint >> 18);
+      utf8[1] = 0x80 | ((codepoint >> 12) & 0x3f);
+      utf8[2] = 0x80 | ((codepoint >> 6) & 0x3f);
+      utf8[3] = 0x80 | (codepoint & 0x3f);
+    }
+
+  return len;
+}
+
+#define STR_REF(s, x)                          \
+  (narrow_p                                    \
+   ? (scm_t_wchar) ((unsigned char *) (s))[x]  \
+   : ((scm_t_wchar *) (s))[x])
+
+/* Write STR to PORT as UTF-8.  STR is a LEN-codepoint string; it is
+   narrow if NARROW_P is true, wide otherwise.  Return LEN.  */
+static size_t
+display_string_as_utf8 (const void *str, int narrow_p, size_t len,
+                       SCM port)
+{
+  size_t printed = 0;
+
+  while (len > printed)
+    {
+      size_t utf8_len, i;
+      char *input, utf8_buf[256];
+
+      /* Convert STR to UTF-8.  */
+      for (i = printed, utf8_len = 0, input = utf8_buf;
+          i < len && utf8_len + 4 < sizeof (utf8_buf);
+          i++)
+       {
+         utf8_len += codepoint_to_utf8 (STR_REF (str, i),
+                                        (scm_t_uint8 *) input);
+         input = utf8_buf + utf8_len;
+       }
+
+      /* INPUT was successfully converted, entirely; print the
+        result.  */
+      scm_lfwrite (utf8_buf, utf8_len, port);
+      printed += i - printed;
+    }
+
+  assert (printed == len);
+
+  return len;
+}
+
+/* Convert STR through PORT's output conversion descriptor and write the
+   output to PORT.  Return the number of codepoints written.  */
+static size_t
+display_string_using_iconv (const void *str, int narrow_p, size_t len,
+                           SCM port,
+                           scm_t_string_failed_conversion_handler strategy)
+{
+  size_t printed;
+  scm_t_port *pt;
+
+  pt = SCM_PTAB_ENTRY (port);
+
+  printed = 0;
+
+  while (len > printed)
+    {
+      size_t done, utf8_len, input_left, output_left, i;
+      size_t codepoints_read, output_len;
+      char *input, *output;
+      char utf8_buf[256], encoded_output[256];
+      size_t offsets[256];
+
+      /* Convert STR to UTF-8.  */
+      for (i = printed, utf8_len = 0, input = utf8_buf;
+          i < len && utf8_len + 4 < sizeof (utf8_buf);
+          i++)
+       {
+         offsets[utf8_len] = i;
+         utf8_len += codepoint_to_utf8 (STR_REF (str, i),
+                                        (scm_t_uint8 *) input);
+         input = utf8_buf + utf8_len;
+       }
+
+      input = utf8_buf;
+      input_left = utf8_len;
+
+      output = encoded_output;
+      output_left = sizeof (encoded_output);
+
+      done = iconv (pt->output_cd, &input, &input_left,
+                   &output, &output_left);
+
+      output_len = sizeof (encoded_output) - output_left;
+
+      if (SCM_UNLIKELY (done == (size_t) -1))
+       {
+          int errno_save = errno;
+
+         /* Reset the `iconv' state.  */
+         iconv (pt->output_cd, NULL, NULL, NULL, NULL);
+
+         /* Print the OUTPUT_LEN bytes successfully converted.  */
+         scm_lfwrite (encoded_output, output_len, port);
+
+         /* See how many input codepoints these OUTPUT_LEN bytes
+            corresponds to.  */
+         codepoints_read = offsets[input - utf8_buf] - printed;
+         printed += codepoints_read;
+
+         if (errno_save == EILSEQ &&
+             strategy != SCM_FAILED_CONVERSION_ERROR)
+           {
+             /* Conversion failed somewhere in INPUT and we want to
+                escape or substitute the offending input character.  */
+
+             if (strategy == SCM_FAILED_CONVERSION_ESCAPE_SEQUENCE)
+               {
+                 scm_t_wchar ch;
+
+                 /* Find CH, the offending codepoint, and escape it.  */
+                 ch = STR_REF (str, offsets[input - utf8_buf]);
+                 write_character_escaped (ch, 1, port);
+               }
+             else
+               /* STRATEGY is `SCM_FAILED_CONVERSION_QUESTION_MARK'.  */
+               display_string ("?", 1, 1, port, strategy);
+
+             printed++;
+           }
+         else
+           /* Something bad happened that we can't handle: bail out.  */
+           break;
+       }
+      else
+       {
+         /* INPUT was successfully converted, entirely; print the
+            result.  */
+         scm_lfwrite (encoded_output, output_len, port);
+         codepoints_read = i - printed;
+         printed += codepoints_read;
+       }
+    }
+
+  return printed;
+}
+
+#undef STR_REF
+
+/* Display the LEN codepoints in STR to PORT according to STRATEGY;
+   return the number of codepoints successfully displayed.  If NARROW_P,
+   then STR is interpreted as a sequence of `char', denoting a Latin-1
+   string; otherwise it's interpreted as a sequence of
+   `scm_t_wchar'.  */
+static size_t
+display_string (const void *str, int narrow_p,
+               size_t len, SCM port,
+               scm_t_string_failed_conversion_handler strategy)
+
+{
+  scm_t_port *pt;
+
+  pt = SCM_PTAB_ENTRY (port);
+
+  if (pt->output_cd == (iconv_t) -1)
+    /* Initialize the conversion descriptors, if needed.  */
+    scm_i_set_port_encoding_x (port, pt->encoding);
+
+  /* FIXME: In 2.1, add a flag to determine whether a port is UTF-8.  */
+  if (pt->output_cd == (iconv_t) -1)
+    return display_string_as_utf8 (str, narrow_p, len, port);
+  else
+    return display_string_using_iconv (str, narrow_p, len,
+                                      port, strategy);
+}
+
+/* Attempt to display CH to PORT according to STRATEGY.  Return non-zero
+   if CH was successfully displayed, zero otherwise (e.g., if it was not
+   representable in PORT's encoding.)  */
+static int
+display_character (scm_t_wchar ch, SCM port,
+                  scm_t_string_failed_conversion_handler strategy)
+{
+  return display_string (&ch, 0, 1, port, strategy) == 1;
+}
+
+/* Attempt to pretty-print CH, a combining character, to PORT.  Return
+   zero upon failure, non-zero otherwise.  The idea is to print CH above
+   a dotted circle to make it more visible.  */
+static int
+write_combining_character (scm_t_wchar ch, SCM port)
+{
+  scm_t_wchar str[2];
+
+  str[0] = SCM_CODEPOINT_DOTTED_CIRCLE;
+  str[1] = ch;
+
+  return display_string (str, 0, 2, port, iconveh_error) == 2;
+}
+
+/* Write CH to PORT in its escaped form, using the string escape syntax
+   if STRING_ESCAPES_P is non-zero.  */
+static void
+write_character_escaped (scm_t_wchar ch, int string_escapes_p, SCM port)
+{
+  if (string_escapes_p)
+    {
+      /* Represent CH using the in-string escape syntax.  */
+
+      static const char hex[] = "0123456789abcdef";
+      static const char escapes[7] = "abtnvfr";
+      char buf[9];
+
+      if (ch >= 0x07 && ch <= 0x0D && ch != 0x0A)
+       {
+         /* Use special escapes for some C0 controls.  */
+         buf[0] = '\\';
+         buf[1] = escapes[ch - 0x07];
+         scm_lfwrite (buf, 2, port);
+       }
+      else if (!SCM_R6RS_ESCAPES_P)
+       {
+         if (ch <= 0xFF)
+           {
+             buf[0] = '\\';
+             buf[1] = 'x';
+             buf[2] = hex[ch / 16];
+             buf[3] = hex[ch % 16];
+             scm_lfwrite (buf, 4, port);
+           }
+         else if (ch <= 0xFFFF)
+           {
+             buf[0] = '\\';
+             buf[1] = 'u';
+             buf[2] = hex[(ch & 0xF000) >> 12];
+             buf[3] = hex[(ch & 0xF00) >> 8];
+             buf[4] = hex[(ch & 0xF0) >> 4];
+             buf[5] = hex[(ch & 0xF)];
+             scm_lfwrite (buf, 6, port);
+           }
+         else if (ch > 0xFFFF)
+           {
+             buf[0] = '\\';
+             buf[1] = 'U';
+             buf[2] = hex[(ch & 0xF00000) >> 20];
+             buf[3] = hex[(ch & 0xF0000) >> 16];
+             buf[4] = hex[(ch & 0xF000) >> 12];
+             buf[5] = hex[(ch & 0xF00) >> 8];
+             buf[6] = hex[(ch & 0xF0) >> 4];
+             buf[7] = hex[(ch & 0xF)];
+             scm_lfwrite (buf, 8, port);
+           }
+       }
+      else
+       {
+         /* Print an R6RS variable-length hex escape: "\xNNNN;".  */
+         scm_t_wchar ch2 = ch;
+
+         int i = 8;
+         buf[i] = ';';
+         i --;
+         if (ch == 0)
+           buf[i--] = '0';
+         else
+           while (ch2 > 0)
+             {
+               buf[i] = hex[ch2 & 0xF];
+               ch2 >>= 4;
+               i --;
+             }
+         buf[i] = 'x';
+         i --;
+         buf[i] = '\\';
+         scm_lfwrite (buf + i, 9 - i, port);
+       }
+    }
+  else
+    {
+      /* Represent CH using the character escape syntax.  */
+      const char *name;
+
+      name = scm_i_charname (SCM_MAKE_CHAR (ch));
+      if (name != NULL)
+       scm_puts (name, port);
+      else
+       PRINT_CHAR_ESCAPE (ch, port);
+    }
+}
+
+/* Write CH to PORT, escaping it if it's non-graphic or not
+   representable in PORT's encoding.  If STRING_ESCAPES_P is true and CH
+   needs to be escaped, it is escaped using the in-string escape syntax;
+   otherwise the character escape syntax is used.  */
+static void
+write_character (scm_t_wchar ch, SCM port, int string_escapes_p)
+{
+  int printed = 0;
+  scm_t_string_failed_conversion_handler strategy;
+
+  strategy = scm_i_get_conversion_strategy (port);
+
+  if (string_escapes_p)
+    {
+      /* Check if CH deserves special treatment.  */
+      if (ch == '"' || ch == '\\')
+       {
+         display_character ('\\', port, iconveh_question_mark);
+         display_character (ch, port, strategy);
+         printed = 1;
+       }
+      else if (ch == '\n' && SCM_PRINT_ESCAPE_NEWLINES_P)
+        {
+         display_character ('\\', port, iconveh_question_mark);
+         display_character ('n', port, strategy);
+         printed = 1;
+        }
+      else if (ch == ' ' || ch == '\n')
+       {
+         display_character (ch, port, strategy);
+         printed = 1;
+       }
+    }
+  else
+    {
+      display_string ("#\\", 1, 2, port, iconveh_question_mark);
+
+      if (uc_combining_class (ch) != UC_CCC_NR)
+       /* Character is a combining character, so attempt to
+          pretty-print it.  */
+       printed = write_combining_character (ch, port);
+    }
+
+  if (!printed
+      && uc_is_general_category_withtable (ch,
+                                          UC_CATEGORY_MASK_L |
+                                          UC_CATEGORY_MASK_M |
+                                          UC_CATEGORY_MASK_N |
+                                          UC_CATEGORY_MASK_P |
+                                          UC_CATEGORY_MASK_S))
+    /* CH is graphic; attempt to display it.  */
+    printed = display_character (ch, port, iconveh_error);
+
+  if (!printed)
+    /* CH isn't graphic or cannot be represented in PORT's encoding.  */
+    write_character_escaped (ch, string_escapes_p, port);
 }
 
 /* Print an integer.
@@ -1043,14 +1324,6 @@ scm_write (SCM obj, SCM port)
   SCM_ASSERT (scm_valid_oport_value_p (port), port, SCM_ARG2, s_write);
 
   scm_prin1 (obj, port, 1);
-#if 0
-#ifdef HAVE_PIPE
-# ifdef EPIPE
-  if (EPIPE == errno)
-    scm_close_port (port);
-# endif
-#endif
-#endif
   return SCM_UNSPECIFIED;
 }
 
@@ -1066,14 +1339,6 @@ scm_display (SCM obj, SCM port)
   SCM_ASSERT (scm_valid_oport_value_p (port), port, SCM_ARG2, s_display);
 
   scm_prin1 (obj, port, 0);
-#if 0
-#ifdef HAVE_PIPE
-# ifdef EPIPE
-  if (EPIPE == errno)
-    scm_close_port (port);
-# endif
-#endif
-#endif
   return SCM_UNSPECIFIED;
 }
 
@@ -1085,7 +1350,7 @@ SCM_DEFINE (scm_simple_format, "simple-format", 2, 0, 1,
            "@var{message} can contain @code{~A} (was @code{%s}) and\n"
            "@code{~S} (was @code{%S}) escapes.  When printed,\n"
            "the escapes are replaced with corresponding members of\n"
-           "@var{ARGS}:\n"
+           "@var{args}:\n"
            "@code{~A} formats using @code{display} and @code{~S} formats\n"
            "using @code{write}.\n"
            "If @var{destination} is @code{#t}, then use the current output\n"
@@ -1105,8 +1370,7 @@ SCM_DEFINE (scm_simple_format, "simple-format", 2, 0, 1,
   else if (scm_is_false (destination))
     {
       fReturnString = 1;
-      port = scm_mkstrport (SCM_INUM0, 
-                           scm_make_string (SCM_INUM0, SCM_UNDEFINED),
+      port = scm_mkstrport (SCM_INUM0, SCM_BOOL_F,
                            SCM_OPN | SCM_WRTNG,
                            FUNC_NAME);
       destination = port;
@@ -1202,16 +1466,14 @@ SCM_DEFINE (scm_write_char, "write-char", 1, 1, 0,
 
   SCM_VALIDATE_CHAR (1, chr);
   SCM_VALIDATE_OPORT_VALUE (2, port);
-  
-  scm_i_charprint (SCM_CHAR (chr), SCM_COERCE_OUTPORT (port));
-#if 0
-#ifdef HAVE_PIPE
-# ifdef EPIPE
-  if (EPIPE == errno)
-    scm_close_port (port);
-# endif
-#endif
-#endif
+
+  port = SCM_COERCE_OUTPORT (port);
+  if (!display_character (SCM_CHAR (chr), port,
+                         scm_i_get_conversion_strategy (port)))
+    scm_encoding_error (__func__, errno,
+                       "cannot convert to output locale",
+                       port, chr);
+
   return SCM_UNSPECIFIED;
 }
 #undef FUNC_NAME
@@ -1281,20 +1543,13 @@ scm_init_print ()
 {
   SCM vtable, layout, type;
 
-  scm_init_opts (scm_print_options, scm_print_opts);
-
-  scm_print_options (scm_list_4 (scm_from_locale_symbol ("highlight-prefix"),
-                                scm_from_locale_string ("{"),
-                                scm_from_locale_symbol ("highlight-suffix"),
-                                scm_from_locale_string ("}")));
-
   scm_gc_register_root (&print_state_pool);
   scm_gc_register_root (&scm_print_state_vtable);
   vtable = scm_make_vtable_vtable (scm_nullstr, SCM_INUM0, SCM_EOL);
   layout =
     scm_make_struct_layout (scm_from_locale_string (SCM_PRINT_STATE_LAYOUT));
   type = scm_make_struct (vtable, SCM_INUM0, scm_list_1 (layout));
-  scm_set_struct_vtable_name_x (type, scm_from_locale_symbol ("print-state"));
+  scm_set_struct_vtable_name_x (type, scm_from_latin1_symbol ("print-state"));
   scm_print_state_vtable = type;
 
   /* Don't want to bind a wrapper class in GOOPS, so pass 0 as arg1. */
@@ -1303,6 +1558,11 @@ scm_init_print ()
 
 #include "libguile/print.x"
 
+  scm_init_opts (scm_print_options, scm_print_opts);
+  scm_print_opts[SCM_PRINT_HIGHLIGHT_PREFIX_I].val =
+    SCM_UNPACK (scm_from_locale_string ("{"));
+  scm_print_opts[SCM_PRINT_HIGHLIGHT_SUFFIX_I].val =
+    SCM_UNPACK (scm_from_locale_string ("}"));
   scm_print_opts[SCM_PRINT_KEYWORD_STYLE_I].val = SCM_UNPACK (sym_reader);
 }