Merge remote-tracking branch 'origin/stable-2.0'

author Mark H Weaver <mhw@netris.org>

Sun, 14 Apr 2013 06:48:33 +0000 (02:48 -0400)

committer Mark H Weaver <mhw@netris.org>

Sun, 14 Apr 2013 06:48:33 +0000 (02:48 -0400)
author Mark H Weaver <mhw@netris.org>
Sun, 14 Apr 2013 06:48:33 +0000 (02:48 -0400)
committer Mark H Weaver <mhw@netris.org>
Sun, 14 Apr 2013 06:48:33 +0000 (02:48 -0400)
diff --cc configure.ac
Simple merge
diff --cc doc/ref/api-control.texi
Simple merge
diff --cc doc/ref/api-data.texi
Simple merge
diff --cc doc/ref/guile-invoke.texi
Simple merge
diff --cc doc/ref/posix.texi
Simple merge
diff --cc libguile/Makefile.am
Simple merge
diff --cc libguile/array-map.c

index c0f0f00,1c443ac..e47fb56
--- 1/libguile/array-map.c
--- 2/libguile/array-map.c
+++ b/libguile/array-map.c
@@@ -1,5 -1,6 +1,6 @@@
- /* Copyright (C) 1996,1998,2000,2001,2004,2005, 2006, 2008, 2009, 2010, 2011, 2012 Free Software Foundation, Inc.
+ /* Copyright (C) 1996, 1998, 2000, 2001, 2004, 2005, 2006, 2008, 2009,
- - *   2010, 2012, 2013 Free Software Foundation, Inc.
- - *
++ *   2010, 2011, 2012, 2013 Free Software Foundation, Inc.
+ + * 
    * This library is free software; you can redistribute it and/or
    * modify it under the terms of the GNU Lesser General Public License
    * as published by the Free Software Foundation; either version 3 of
diff --cc libguile/filesys.c
Simple merge
diff --cc libguile/fports.c
Simple merge
diff --cc libguile/fports.h

index 4094f14,c32ed95..092b43e
--- 1/libguile/fports.h
--- 2/libguile/fports.h
+++ b/libguile/fports.h
@@@ -57,15 -56,7 +59,16 @@@ SCM_API SCM scm_open_file_with_encodin
   SCM_API SCM scm_open_file (SCM filename, SCM modes);
   SCM_API SCM scm_fdes_to_port (int fdes, char *mode, SCM name);
   SCM_API SCM scm_file_port_p (SCM obj);
+ +
+ +\f
+ +/* Revealed counts.  */
+ +SCM_API int scm_revealed_count (SCM port);
+ +SCM_API SCM scm_port_revealed (SCM port);
+ +SCM_API SCM scm_set_port_revealed_x (SCM port, SCM rcount);
+ +SCM_API SCM scm_adjust_port_revealed_x (SCM port, SCM addend);
+ +
+ +\f
+ SCM_INTERNAL void scm_init_fports_keywords (void);
   SCM_INTERNAL void scm_init_fports (void);
   
   /* internal functions */
diff --cc libguile/gc.h

index b1df82d,a9a499b..0857781
--- 1/libguile/gc.h
--- 2/libguile/gc.h
+++ b/libguile/gc.h
@@@ -4,7 -4,7 +4,7 @@@
   #define SCM_GC_H
   
   /* Copyright (C) 1995, 1996, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2006,
-  *   2007, 2008, 2009, 2010, 2011, 2012 Free Software Foundation, Inc.
- - *   2007, 2008, 2009, 2010, 2011, 2013 Free Software Foundation, Inc.
++ *   2007, 2008, 2009, 2010, 2011, 2012, 2013 Free Software Foundation, Inc.
    *
    * This library is free software; you can redistribute it and/or
    * modify it under the terms of the GNU Lesser General Public License
diff --cc libguile/gen-scmconfig.c
Simple merge
diff --cc libguile/init.c
Simple merge
diff --cc libguile/keywords.c
Simple merge
diff --cc libguile/ports-internal.h

index 0000000,8a3a00b..bff89cb

mode 000000,100644..100644
--- /dev/null
--- 2/libguile/ports-internal.h
+++ b/libguile/ports-internal.h
@@@ -1,0 -1,67 +1,66 @@@
- -   cause finalizers to be registered (FIXME: although currently in 2.0
- -   finalizers are always registered for ports anyway).  */
+ /*
+  * ports-internal.h - internal-only declarations for ports.
+  *
+  * Copyright (C) 2013 Free Software Foundation, Inc.
+  *
+  * This library is free software; you can redistribute it and/or
+  * modify it under the terms of the GNU Lesser General Public License
+  * as published by the Free Software Foundation; either version 3 of
+  * the License, or (at your option) any later version.
+  *
+  * This library is distributed in the hope that it will be useful, but
+  * WITHOUT ANY WARRANTY; without even the implied warranty of
+  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+  * Lesser General Public License for more details.
+  *
+  * You should have received a copy of the GNU Lesser General Public
+  * License along with this library; if not, write to the Free Software
+  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+  * 02110-1301 USA
+  */
+ 
+ #ifndef SCM_PORTS_INTERNAL
+ #define SCM_PORTS_INTERNAL
+ 
+ #include "libguile/_scm.h"
+ #include "libguile/ports.h"
+ 
+ enum scm_port_encoding_mode {
+   SCM_PORT_ENCODING_MODE_UTF8,
++  SCM_PORT_ENCODING_MODE_LATIN1,
+   SCM_PORT_ENCODING_MODE_ICONV
+ };
+ 
+ typedef enum scm_port_encoding_mode scm_t_port_encoding_mode;
+ 
+ /* This is a separate object so that only those ports that use iconv
- -#define SCM_PORT_GET_INTERNAL(x)                                \
- -  ((scm_t_port_internal *) (SCM_PTAB_ENTRY(x)->input_cd))
++   cause finalizers to be registered.  */
+ struct scm_iconv_descriptors
+ {
+   /* input/output iconv conversion descriptors */
+   void *input_cd;
+   void *output_cd;
+ };
+ 
+ typedef struct scm_iconv_descriptors scm_t_iconv_descriptors;
+ 
+ struct scm_port_internal
+ {
+   unsigned at_stream_start_for_bom_read  : 1;
+   unsigned at_stream_start_for_bom_write : 1;
+   scm_t_port_encoding_mode encoding_mode;
+   scm_t_iconv_descriptors *iconv_descriptors;
+   int pending_eof;
+   SCM alist;
+ };
+ 
+ typedef struct scm_port_internal scm_t_port_internal;
+ 
+ #define SCM_UNICODE_BOM  0xFEFFUL  /* Unicode byte-order mark */
+ 
++#define SCM_PORT_GET_INTERNAL(x)  (SCM_PTAB_ENTRY(x)->internal)
+ 
+ SCM_INTERNAL scm_t_iconv_descriptors *
+ scm_i_port_iconv_descriptors (SCM port, scm_t_port_rw_active mode);
+ 
+ #endif
diff --cc libguile/ports.c

index 31d338e,9068c5c..20c9081
--- 1/libguile/ports.c
--- 2/libguile/ports.c
+++ b/libguile/ports.c
@@@ -55,8 -56,9 +55,9 @@@
   #include "libguile/mallocs.h"
   #include "libguile/validate.h"
   #include "libguile/ports.h"
+ #include "libguile/ports-internal.h"
   #include "libguile/vectors.h"
- -#include "libguile/weaks.h"
+ +#include "libguile/weak-set.h"
   #include "libguile/fluids.h"
   #include "libguile/eq.h"
   
@@@ -326,11 -239,159 +327,35 @@@ scm_set_port_truncate (scm_t_bits tc, v
   void
   scm_set_port_input_waiting (scm_t_bits tc, int (*input_waiting) (SCM))
   {
- -  scm_ptobs[SCM_TC2PTOBNUM (tc)].input_waiting = input_waiting;
+ +  scm_c_port_type_ref (SCM_TC2PTOBNUM (tc))->input_waiting = input_waiting;
   }
   
+ static void
+ scm_i_set_pending_eof (SCM port)
+ {
+   SCM_PORT_GET_INTERNAL (port)->pending_eof = 1;
+ }
+ 
+ static void
+ scm_i_clear_pending_eof (SCM port)
+ {
+   SCM_PORT_GET_INTERNAL (port)->pending_eof = 0;
+ }
+ 
+ SCM
+ scm_i_port_alist (SCM port)
+ {
+   return SCM_PORT_GET_INTERNAL (port)->alist;
+ }
+ 
+ void
+ scm_i_set_port_alist_x (SCM port, SCM alist)
+ {
+   SCM_PORT_GET_INTERNAL (port)->alist = alist;
+ }
+ 
   \f
   
- -SCM_DEFINE (scm_char_ready_p, "char-ready?", 0, 1, 0, 
- -          (SCM port),
- -          "Return @code{#t} if a character is ready on input @var{port}\n"
- -          "and return @code{#f} otherwise.  If @code{char-ready?} returns\n"
- -          "@code{#t} then the next @code{read-char} operation on\n"
- -          "@var{port} is guaranteed not to hang.  If @var{port} is a file\n"
- -          "port at end of file then @code{char-ready?} returns @code{#t}.\n"
- -          "\n"
- -          "@code{char-ready?} exists to make it possible for a\n"
- -          "program to accept characters from interactive ports without\n"
- -          "getting stuck waiting for input.  Any input editors associated\n"
- -          "with such ports must make sure that characters whose existence\n"
- -          "has been asserted by @code{char-ready?} cannot be rubbed out.\n"
- -          "If @code{char-ready?} were to return @code{#f} at end of file,\n"
- -          "a port at end of file would be indistinguishable from an\n"
- -          "interactive port that has no ready characters.")
- -#define FUNC_NAME s_scm_char_ready_p
- -{
- -  scm_t_port *pt;
- -
- -  if (SCM_UNBNDP (port))
- -    port = scm_current_input_port ();
- -  /* It's possible to close the current input port, so validate even in
- -     this case. */
- -  SCM_VALIDATE_OPINPORT (1, port);
- -
- -  pt = SCM_PTAB_ENTRY (port);
- -
- -  /* if the current read buffer is filled, or the
- -     last pushed-back char has been read and the saved buffer is
- -     filled, result is true.  */
- -  if (pt->read_pos < pt->read_end 
- -      || (pt->read_buf == pt->putback_buf
- -        && pt->saved_read_pos < pt->saved_read_end))
- -    return SCM_BOOL_T;
- -  else
- -    {
- -      scm_t_ptob_descriptor *ptob = &scm_ptobs[SCM_PTOBNUM (port)];
- -      
- -      if (ptob->input_waiting)
- -      return scm_from_bool(ptob->input_waiting (port));
- -      else
- -      return SCM_BOOL_T;
- -    }
- -}
- -#undef FUNC_NAME
- -
- -/* Move up to READ_LEN bytes from PORT's putback and/or read buffers
- -   into memory starting at DEST.  Return the number of bytes moved.
- -   PORT's line/column numbers are left unchanged.  */
- -size_t
- -scm_take_from_input_buffers (SCM port, char *dest, size_t read_len)
- -{
- -  scm_t_port *pt = SCM_PTAB_ENTRY (port);
- -  size_t bytes_read = 0;
- -  size_t from_buf = min (pt->read_end - pt->read_pos, read_len);
- -
- -  if (from_buf > 0)
- -    {
- -      memcpy (dest, pt->read_pos, from_buf);
- -      pt->read_pos += from_buf;
- -      bytes_read += from_buf;
- -      read_len -= from_buf;
- -      dest += from_buf;
- -    }
- -
- -  /* if putback was active, try the real input buffer too.  */
- -  if (pt->read_buf == pt->putback_buf)
- -    {
- -      from_buf = min (pt->saved_read_end - pt->saved_read_pos, read_len);
- -      if (from_buf > 0)
- -      {
- -        memcpy (dest, pt->saved_read_pos, from_buf);
- -        pt->saved_read_pos += from_buf;
- -        bytes_read += from_buf;
- -      }
- -    }
- -
- -  return bytes_read;
- -}
- -
- -/* Clear a port's read buffers, returning the contents.  */
- -SCM_DEFINE (scm_drain_input, "drain-input", 1, 0, 0, 
- -            (SCM port),
- -          "This procedure clears a port's input buffers, similar\n"
- -          "to the way that force-output clears the output buffer.  The\n"
- -          "contents of the buffers are returned as a single string, e.g.,\n"
- -          "\n"
- -          "@lisp\n"
- -          "(define p (open-input-file ...))\n"
- -          "(drain-input p) => empty string, nothing buffered yet.\n"
- -          "(unread-char (read-char p) p)\n"
- -          "(drain-input p) => initial chars from p, up to the buffer size.\n"
- -          "@end lisp\n\n"
- -          "Draining the buffers may be useful for cleanly finishing\n"
- -          "buffered I/O so that the file descriptor can be used directly\n"
- -          "for further input.")
- -#define FUNC_NAME s_scm_drain_input
- -{
- -  SCM result;
- -  char *data;
- -  scm_t_port *pt;
- -  long count;
- -
- -  SCM_VALIDATE_OPINPORT (1, port);
- -  pt = SCM_PTAB_ENTRY (port);
- -
- -  count = pt->read_end - pt->read_pos;
- -  if (pt->read_buf == pt->putback_buf)
- -    count += pt->saved_read_end - pt->saved_read_pos;
- -
- -  if (count)
- -    {
- -      result = scm_i_make_string (count, &data, 0);
- -      scm_take_from_input_buffers (port, data, count);
- -    }
- -  else
- -    result = scm_nullstr;
- -  
- -  return result;
- -}
- -#undef FUNC_NAME
- -
- -\f
   /* Standard ports --- current input, output, error, and more(!).  */
   
   static SCM cur_inport_fluid = SCM_BOOL_F;
@@@ -497,268 -558,316 +522,275 @@@ scm_i_dynwind_current_load_port (SCM po
     scm_dynwind_fluid (cur_loadport_fluid, port);
   }
   
- -\f
- -/* The port table --- an array of pointers to ports.  */
- -
- -/*
- -  We need a global registry of ports to flush them all at exit, and to
- -  get all the ports matching a file descriptor.
- - */
- -SCM scm_i_port_weak_hash;
- -
- -scm_i_pthread_mutex_t scm_i_port_table_mutex = SCM_I_PTHREAD_MUTEX_INITIALIZER;
   
   \f
- -/* Port finalization.  */
   
+ +/* Retrieving a port's mode.  */
   
- -static void finalize_port (void *, void *);
+ +/* Return the flags that characterize a port based on the mode
+ + * string used to open a file for that port.
+ + *
+ + * See PORT FLAGS in scm.h
+ + */
   
- -/* Register a finalizer for PORT.  */
- -static SCM_C_INLINE_KEYWORD void
- -register_finalizer_for_port (SCM port)
+ +static long
+ +scm_i_mode_bits_n (SCM modes)
   {
- -  /* Register a finalizer for PORT so that its
- -     type's `free' function gets called.  */
- -  scm_i_set_finalizer (SCM2PTR (port), finalize_port, NULL);
+ +  return (SCM_OPN
+ +        | (scm_i_string_contains_char (modes, 'r') 
+ +           || scm_i_string_contains_char (modes, '+') ? SCM_RDNG : 0)
+ +        | (scm_i_string_contains_char (modes, 'w')
+ +           || scm_i_string_contains_char (modes, 'a')
+ +           || scm_i_string_contains_char (modes, '+') ? SCM_WRTNG : 0)
+ +        | (scm_i_string_contains_char (modes, '0') ? SCM_BUF0 : 0)
+ +        | (scm_i_string_contains_char (modes, 'l') ? SCM_BUFLINE : 0));
   }
   
- -/* Finalize the object (a port) pointed to by PTR.  */
- -static void
- -finalize_port (void *ptr, void *data)
+ +long
+ +scm_mode_bits (char *modes)
   {
- -  long port_type;
- -  SCM port = PTR2SCM (ptr);
- -
- -  if (!SCM_PORTP (port))
- -    abort ();
- -
- -  if (SCM_OPENP (port))
- -    {
- -      if (SCM_REVEALED (port) > 0)
- -      /* Keep "revealed" ports alive and re-register a finalizer.  */
- -      register_finalizer_for_port (port);
- -      else
- -      {
- -        port_type = SCM_TC2PTOBNUM (SCM_CELL_TYPE (port));
- -        if (port_type >= scm_numptob)
- -          abort ();
+ +  /* Valid characters are rw+a0l.  So, use latin1.  */
+ +  return scm_i_mode_bits (scm_from_latin1_string (modes));
+ +}
   
- -        if (scm_ptobs[port_type].free)
- -          /* Yes, I really do mean `.free' rather than `.close'.  `.close'
- -             is for explicit `close-port' by user.  */
- -          scm_ptobs[port_type].free (port);
+ +long
+ +scm_i_mode_bits (SCM modes)
+ +{
+ +  long bits;
   
- -        SCM_SETSTREAM (port, 0);
- -        SCM_CLR_PORT_OPEN_FLAG (port);
+ +  if (!scm_is_string (modes))
+ +    scm_wrong_type_arg_msg (NULL, 0, modes, "string");
   
- -        scm_gc_ports_collected++;
- -      }
- -    }
+ +  bits = scm_i_mode_bits_n (modes);
+ +  scm_remember_upto_here_1 (modes);
+ +  return bits;
   }
   
+ +/* Return the mode flags from an open port.
+ + * Some modes such as "append" are only used when opening
+ + * a file and are not returned here.  */
   
- -
- -\f
- -
- -/* This function is not and should not be thread safe. */
- -SCM
- -scm_new_port_table_entry (scm_t_bits tag)
- -#define FUNC_NAME "scm_new_port_table_entry"
+ +SCM_DEFINE (scm_port_mode, "port-mode", 1, 0, 0,
+ +           (SCM port),
+ +          "Return the port modes associated with the open port @var{port}.\n"
+ +          "These will not necessarily be identical to the modes used when\n"
+ +          "the port was opened, since modes such as \"append\" which are\n"
+ +          "used only during port creation are not retained.")
+ +#define FUNC_NAME s_scm_port_mode
   {
- -  /*
- -    We initialize the cell to empty, this is in case scm_gc_calloc
- -    triggers GC ; we don't want the GC to scan a half-finished Z.
- -   */
- -  
- -  SCM z = scm_cons (SCM_EOL, SCM_EOL);
- -  scm_t_port *entry = scm_gc_typed_calloc (scm_t_port);
- -  scm_t_port_internal *pti = scm_gc_typed_calloc (scm_t_port_internal);
- -  const char *encoding;
+ +  char modes[4];
+ +  modes[0] = '\0';
   
- -  entry->file_name = SCM_BOOL_F;
- -  entry->rw_active = SCM_PORT_NEITHER;
- -  entry->port = z;
- -
- -  /* Initialize this port with the thread's current default
- -     encoding.  */
- -  encoding = scm_i_default_port_encoding ();
- -  entry->ilseq_handler = scm_i_default_port_conversion_handler ();
- -  entry->encoding = encoding ? scm_gc_strdup (encoding, "port") : NULL;
- -  if (encoding && c_strcasecmp (encoding, "UTF-8") == 0)
- -    pti->encoding_mode = SCM_PORT_ENCODING_MODE_UTF8;
- -  else
- -    pti->encoding_mode = SCM_PORT_ENCODING_MODE_ICONV;
- -  pti->iconv_descriptors = NULL;
+ +  port = SCM_COERCE_OUTPORT (port);
+ +  SCM_VALIDATE_OPPORT (1, port);
+ +  if (SCM_CELL_WORD_0 (port) & SCM_RDNG) {
+ +    if (SCM_CELL_WORD_0 (port) & SCM_WRTNG)
+ +      strcpy (modes, "r+");
+ +    else
+ +      strcpy (modes, "r");
+ +  }
+ +  else if (SCM_CELL_WORD_0 (port) & SCM_WRTNG)
+ +    strcpy (modes, "w");
+ +  if (SCM_CELL_WORD_0 (port) & SCM_BUF0)
+ +    strcat (modes, "0");
   
- -  pti->at_stream_start_for_bom_read  = 1;
- -  pti->at_stream_start_for_bom_write = 1;
+ +  return scm_from_latin1_string (modes);
+ +}
+ +#undef FUNC_NAME
   
- -  /* XXX These fields are not what they seem.  They have been
- -     repurposed, but cannot safely be renamed in 2.0 without breaking
- -     ABI compatibility.  This will be cleaned up in 2.2.  */
- -  entry->input_cd = pti;   /* XXX pointer to the internal port structure */
- -  entry->output_cd = NULL; /* XXX unused */
   
- -  pti->pending_eof = 0;
- -  pti->alist = SCM_EOL;
+ +\f
   
- -  SCM_SET_CELL_TYPE (z, tag);
- -  SCM_SETPTAB_ENTRY (z, entry);
+ +/* The port table --- a weak set of all ports.
   
- -  scm_hashq_set_x (scm_i_port_weak_hash, z, SCM_BOOL_F);
+ +   We need a global registry of ports to flush them all at exit, and to
+ +   get all the ports matching a file descriptor.  */
+ +SCM scm_i_port_weak_set;
   
- -  /* For each new port, register a finalizer so that it port type's free
- -     function can be invoked eventually.  */
- -  register_finalizer_for_port (z);
   
- -  return z;
- -}
- -#undef FUNC_NAME
+ +\f
   
- -#if SCM_ENABLE_DEPRECATED==1
- -scm_t_port *
- -scm_add_to_port_table (SCM port)
+ +/* Port finalization.  */
+ +
+ +struct do_free_data
   {
- -  SCM z;
- -  scm_t_port * pt;
+ +  scm_t_ptob_descriptor *ptob;
+ +  SCM port;
+ +};
   
- -  scm_c_issue_deprecation_warning ("scm_add_to_port_table is deprecated.");
+ +static SCM
+ +do_free (void *body_data)
+ +{
+ +  struct do_free_data *data = body_data;
   
- -  scm_i_pthread_mutex_lock (&scm_i_port_table_mutex);
- -  z = scm_new_port_table_entry (scm_tc7_port);
- -  pt = SCM_PTAB_ENTRY(z);
- -  pt->port = port;
- -  SCM_SETCAR (z, SCM_EOL);
- -  SCM_SETCDR (z, SCM_EOL);
- -  SCM_SETPTAB_ENTRY (port, pt);
- -  scm_i_pthread_mutex_unlock (&scm_i_port_table_mutex);
+ +  /* `close' is for explicit `close-port' by user.  `free' is for this
+ +     purpose: ports collected by the GC.  */
+ +  data->ptob->free (data->port);
   
- -  return pt;
+ +  return SCM_BOOL_T;
   }
- -#endif
- -
- -
- -/* Remove a port from the table and destroy it.  */
- -
- -static void close_iconv_descriptors (scm_t_iconv_descriptors *id);
   
+ +/* Finalize the object (a port) pointed to by PTR.  */
   static void
- -scm_i_remove_port (SCM port)
- -#define FUNC_NAME "scm_remove_port"
+ +finalize_port (void *ptr, void *data)
   {
- -  scm_t_port *p;
- -  scm_t_port_internal *pti;
+ +  SCM port = SCM_PACK_POINTER (ptr);
   
- -  scm_i_scm_pthread_mutex_lock (&scm_i_port_table_mutex);
- -
- -  p = SCM_PTAB_ENTRY (port);
- -  pti = SCM_PORT_GET_INTERNAL (port);
- -  scm_port_non_buffer (p);
- -  p->putback_buf = NULL;
- -  p->putback_buf_size = 0;
+ +  if (!SCM_PORTP (port))
+ +    abort ();
   
- -  if (pti->iconv_descriptors)
+ +  if (SCM_OPENP (port))
       {
- -      close_iconv_descriptors (pti->iconv_descriptors);
- -      pti->iconv_descriptors = NULL;
- -    }
+ +      struct do_free_data data;
+ +
+ +      SCM_CLR_PORT_OPEN_FLAG (port);
   
- -  SCM_SETPTAB_ENTRY (port, 0);
+ +      data.ptob = SCM_PORT_DESCRIPTOR (port);
+ +      data.port = port;
   
- -  scm_hashq_remove_x (scm_i_port_weak_hash, port);
+ +      scm_internal_catch (SCM_BOOL_T, do_free, &data,
+ +                          scm_handle_by_message_noexit, NULL);
   
- -  scm_i_pthread_mutex_unlock (&scm_i_port_table_mutex);
+ +      scm_gc_ports_collected++;
+ +    }
   }
- -#undef FUNC_NAME
   
   
- -/* Functions for debugging.  */
- -#ifdef GUILE_DEBUG
- -SCM_DEFINE (scm_pt_size, "pt-size", 0, 0, 0,
- -            (),
- -          "Return the number of ports in the port table.  @code{pt-size}\n"
- -          "is only included in @code{--enable-guile-debug} builds.")
- -#define FUNC_NAME s_scm_pt_size
- -{
- -  return scm_from_int (SCM_HASHTABLE_N_ITEMS (scm_i_port_weak_hash));
- -}
- -#undef FUNC_NAME
- -#endif
+ +\f
   
- -void
- -scm_port_non_buffer (scm_t_port *pt)
+ +SCM
+ +scm_c_make_port_with_encoding (scm_t_bits tag, unsigned long mode_bits,
+ +                               const char *encoding,
+ +                               scm_t_string_failed_conversion_handler handler,
+ +                               scm_t_bits stream)
   {
- -  pt->read_pos = pt->read_buf = pt->read_end = &pt->shortbuf;
- -  pt->write_buf = pt->write_pos = &pt->shortbuf;
- -  pt->read_buf_size = pt->write_buf_size = 1;
- -  pt->write_end = pt->write_buf + pt->write_buf_size;
- -}
+ +  SCM ret;
+ +  scm_t_port *entry;
++  scm_t_port_internal *pti;
+ +  scm_t_ptob_descriptor *ptob;
   
-   entry = (scm_t_port *) scm_gc_calloc (sizeof (scm_t_port), "port");
- -\f
- -/* Revealed counts --- an oddity inherited from SCSH.  */
++  entry = scm_gc_typed_calloc (scm_t_port);
++  pti = scm_gc_typed_calloc (scm_t_port_internal);
+ +  ptob = scm_c_port_type_ref (SCM_TC2PTOBNUM (tag));
   
- -/* Find a port in the table and return its revealed count.
- -   Also used by the garbage collector.
- - */
+ +  ret = scm_words (tag | mode_bits, 3);
+ +  SCM_SET_CELL_WORD_1 (ret, (scm_t_bits) entry);
+ +  SCM_SET_CELL_WORD_2 (ret, (scm_t_bits) ptob);
   
- -int
- -scm_revealed_count (SCM port)
- -{
- -  return SCM_REVEALED(port);
- -}
+ +  entry->lock = scm_gc_malloc_pointerless (sizeof (*entry->lock), "port lock");
+ +  scm_i_pthread_mutex_init (entry->lock, scm_i_pthread_mutexattr_recursive);
+ +
++  entry->internal = pti;
+ +  entry->file_name = SCM_BOOL_F;
+ +  entry->rw_active = SCM_PORT_NEITHER;
+ +  entry->port = ret;
+ +  entry->stream = stream;
+ +
+ +  if (encoding_matches (encoding, "UTF-8"))
+ +    {
-       entry->encoding_mode = SCM_PORT_ENCODING_MODE_UTF8;
++      pti->encoding_mode = SCM_PORT_ENCODING_MODE_UTF8;
+ +      entry->encoding = "UTF-8";
+ +    }
+ +  else if (encoding_matches (encoding, "ISO-8859-1"))
+ +    {
-       entry->encoding_mode = SCM_PORT_ENCODING_MODE_LATIN1;
++      pti->encoding_mode = SCM_PORT_ENCODING_MODE_LATIN1;
+ +      entry->encoding = "ISO-8859-1";
+ +    }
+ +  else
+ +    {
-       entry->encoding_mode = SCM_PORT_ENCODING_MODE_ICONV;
++      pti->encoding_mode = SCM_PORT_ENCODING_MODE_ICONV;
+ +      entry->encoding = canonicalize_encoding (encoding);
+ +    }
   
-   entry->iconv_descriptors = NULL;
+ +  entry->ilseq_handler = handler;
++  pti->iconv_descriptors = NULL;
+ 
++  pti->at_stream_start_for_bom_read  = 1;
++  pti->at_stream_start_for_bom_write = 1;
   
-   entry->alist = SCM_EOL;
- -/* Return the revealed count for a port.  */
++  pti->pending_eof = 0;
++  pti->alist = SCM_EOL;
   
- -SCM_DEFINE (scm_port_revealed, "port-revealed", 1, 0, 0,
- -           (SCM port),
- -          "Return the revealed count for @var{port}.")
- -#define FUNC_NAME s_scm_port_revealed
- -{
- -  port = SCM_COERCE_OUTPORT (port);
- -  SCM_VALIDATE_OPENPORT (1, port);
- -  return scm_from_int (scm_revealed_count (port));
+ +  if (SCM_PORT_DESCRIPTOR (ret)->free)
+ +    scm_i_set_finalizer (SCM2PTR (ret), finalize_port, NULL);
+ +
+ +  if (SCM_PORT_DESCRIPTOR (ret)->flags & SCM_PORT_TYPE_HAS_FLUSH)
+ +    scm_weak_set_add_x (scm_i_port_weak_set, ret);
+ +
+ +  return ret;
   }
- -#undef FUNC_NAME
   
- -/* Set the revealed count for a port.  */
- -SCM_DEFINE (scm_set_port_revealed_x, "set-port-revealed!", 2, 0, 0,
- -           (SCM port, SCM rcount),
- -          "Sets the revealed count for a port to a given value.\n"
- -          "The return value is unspecified.")
- -#define FUNC_NAME s_scm_set_port_revealed_x
+ +SCM
+ +scm_c_make_port (scm_t_bits tag, unsigned long mode_bits, scm_t_bits stream)
   {
- -  port = SCM_COERCE_OUTPORT (port);
- -  SCM_VALIDATE_OPENPORT (1, port);
- -  SCM_REVEALED (port) = scm_to_int (rcount);
- -  return SCM_UNSPECIFIED;
+ +  return scm_c_make_port_with_encoding (tag, mode_bits,
+ +                                        scm_i_default_port_encoding (),
+ +                                        scm_i_default_port_conversion_handler (),
+ +                                        stream);
   }
- -#undef FUNC_NAME
   
+ +SCM
+ +scm_new_port_table_entry (scm_t_bits tag)
+ +{
+ +  return scm_c_make_port (tag, 0, 0);
+ +}
   
   \f
- -/* Retrieving a port's mode.  */
   
- -/* Return the flags that characterize a port based on the mode
- - * string used to open a file for that port.
- - *
- - * See PORT FLAGS in scm.h
- - */
+ +/* Predicates.  */
   
- -static long
- -scm_i_mode_bits_n (SCM modes)
+ +SCM_DEFINE (scm_port_p, "port?", 1, 0, 0,
+ +          (SCM x),
+ +          "Return a boolean indicating whether @var{x} is a port.\n"
+ +          "Equivalent to @code{(or (input-port? @var{x}) (output-port?\n"
+ +          "@var{x}))}.")
+ +#define FUNC_NAME s_scm_port_p
   {
- -  return (SCM_OPN
- -        | (scm_i_string_contains_char (modes, 'r') 
- -           || scm_i_string_contains_char (modes, '+') ? SCM_RDNG : 0)
- -        | (scm_i_string_contains_char (modes, 'w')
- -           || scm_i_string_contains_char (modes, 'a')
- -           || scm_i_string_contains_char (modes, '+') ? SCM_WRTNG : 0)
- -        | (scm_i_string_contains_char (modes, '0') ? SCM_BUF0 : 0)
- -        | (scm_i_string_contains_char (modes, 'l') ? SCM_BUFLINE : 0));
+ +  return scm_from_bool (SCM_PORTP (x));
   }
+ +#undef FUNC_NAME
   
- -long
- -scm_mode_bits (char *modes)
+ +SCM_DEFINE (scm_input_port_p, "input-port?", 1, 0, 0,
+ +           (SCM x),
+ +          "Return @code{#t} if @var{x} is an input port, otherwise return\n"
+ +          "@code{#f}.  Any object satisfying this predicate also satisfies\n"
+ +          "@code{port?}.")
+ +#define FUNC_NAME s_scm_input_port_p
   {
- -  return scm_i_mode_bits (scm_from_locale_string (modes));
+ +  return scm_from_bool (SCM_INPUT_PORT_P (x));
   }
+ +#undef FUNC_NAME
   
- -long
- -scm_i_mode_bits (SCM modes)
+ +SCM_DEFINE (scm_output_port_p, "output-port?", 1, 0, 0,
+ +           (SCM x),
+ +          "Return @code{#t} if @var{x} is an output port, otherwise return\n"
+ +          "@code{#f}.  Any object satisfying this predicate also satisfies\n"
+ +          "@code{port?}.")
+ +#define FUNC_NAME s_scm_output_port_p
   {
- -  long bits;
- -
- -  if (!scm_is_string (modes))
- -    scm_wrong_type_arg_msg (NULL, 0, modes, "string");
- -
- -  bits = scm_i_mode_bits_n (modes);
- -  scm_remember_upto_here_1 (modes);
- -  return bits;
+ +  x = SCM_COERCE_OUTPORT (x);
+ +  return scm_from_bool (SCM_OUTPUT_PORT_P (x));
   }
+ +#undef FUNC_NAME
   
- -/* Return the mode flags from an open port.
- - * Some modes such as "append" are only used when opening
- - * a file and are not returned here.  */
- -
- -SCM_DEFINE (scm_port_mode, "port-mode", 1, 0, 0,
+ +SCM_DEFINE (scm_port_closed_p, "port-closed?", 1, 0, 0,
              (SCM port),
- -          "Return the port modes associated with the open port @var{port}.\n"
- -          "These will not necessarily be identical to the modes used when\n"
- -          "the port was opened, since modes such as \"append\" which are\n"
- -          "used only during port creation are not retained.")
- -#define FUNC_NAME s_scm_port_mode
+ +          "Return @code{#t} if @var{port} is closed or @code{#f} if it is\n"
+ +          "open.")
+ +#define FUNC_NAME s_scm_port_closed_p
   {
- -  char modes[4];
- -  modes[0] = '\0';
+ +  SCM_VALIDATE_PORT (1, port);
+ +  return scm_from_bool (!SCM_OPPORTP (port));
+ +}
+ +#undef FUNC_NAME
   
- -  port = SCM_COERCE_OUTPORT (port);
- -  SCM_VALIDATE_OPPORT (1, port);
- -  if (SCM_CELL_WORD_0 (port) & SCM_RDNG) {
- -    if (SCM_CELL_WORD_0 (port) & SCM_WRTNG)
- -      strcpy (modes, "r+");
- -    else
- -      strcpy (modes, "r");
- -  }
- -  else if (SCM_CELL_WORD_0 (port) & SCM_WRTNG)
- -    strcpy (modes, "w");
- -  if (SCM_CELL_WORD_0 (port) & SCM_BUF0)
- -    strcat (modes, "0");
- -  return scm_from_locale_string (modes);
+ +SCM_DEFINE (scm_eof_object_p, "eof-object?", 1, 0, 0,
+ +           (SCM x),
+ +          "Return @code{#t} if @var{x} is an end-of-file object; otherwise\n"
+ +          "return @code{#f}.")
+ +#define FUNC_NAME s_scm_eof_object_p
+ +{
+ +  return scm_from_bool (SCM_EOF_OBJECT_P (x));
   }
   #undef FUNC_NAME
   
@@@ -783,7 -889,7 +815,7 @@@ SCM_DEFINE (scm_close_port, "close-port
             "descriptors.")
   #define FUNC_NAME s_scm_close_port
   {
-   scm_t_port *p;
- -  size_t i;
++  scm_t_port_internal *pti;
     int rv;
   
     port = SCM_COERCE_OUTPORT (port);
@@@ -791,28 -897,13 +823,28 @@@
     SCM_VALIDATE_PORT (1, port);
     if (SCM_CLOSEDP (port))
       return SCM_BOOL_F;
- -  i = SCM_PTOBNUM (port);
- -  if (scm_ptobs[i].close)
- -    rv = (scm_ptobs[i].close) (port);
+ +
-   p = SCM_PTAB_ENTRY (port);
++  pti = SCM_PORT_GET_INTERNAL (port);
+ +  SCM_CLR_PORT_OPEN_FLAG (port);
+ +
+ +  if (SCM_PORT_DESCRIPTOR (port)->flags & SCM_PORT_TYPE_HAS_FLUSH)
+ +    scm_weak_set_remove_x (scm_i_port_weak_set, port);
+ +
+ +  if (SCM_PORT_DESCRIPTOR (port)->close)
+ +    /* Note!  This may throw an exception.  Anything after this point
+ +       should be resilient to non-local exits.  */
+ +    rv = SCM_PORT_DESCRIPTOR (port)->close (port);
     else
       rv = 0;
- -  scm_i_remove_port (port);
- -  SCM_CLR_PORT_OPEN_FLAG (port);
+ +
-   if (p->iconv_descriptors)
++  if (pti->iconv_descriptors)
+ +    {
+ +      /* If we don't get here, the iconv_descriptors finalizer will
+ +         clean up. */
-       close_iconv_descriptors (p->iconv_descriptors);
-       p->iconv_descriptors = NULL;
++      close_iconv_descriptors (pti->iconv_descriptors);
++      pti->iconv_descriptors = NULL;
+ +    }
+ +
     return scm_from_bool (rv >= 0);
   }
   #undef FUNC_NAME
@@@ -897,420 -965,635 +929,494 @@@ scm_i_default_port_encoding (void
       }
   }
   
- -SCM_DEFINE (scm_port_for_each, "port-for-each", 1, 0, 0,
- -          (SCM proc),
- -          "Apply @var{proc} to each port in the Guile port table\n"
- -          "in turn.  The return value is unspecified.  More specifically,\n"
- -          "@var{proc} is applied exactly once to every port that exists\n"
- -          "in the system at the time @code{port-for-each} is invoked.\n"
- -          "Changes to the port table while @code{port-for-each} is running\n"
- -          "have no effect as far as @code{port-for-each} is concerned.") 
- -#define FUNC_NAME s_scm_port_for_each
- -{
- -  SCM ports;
+ +/* A fluid specifying the default conversion handler for newly created
+ +   ports.  Its value should be one of the symbols below.  */
+ +SCM_VARIABLE (default_conversion_strategy_var,
+ +            "%default-port-conversion-strategy");
   
- -  SCM_VALIDATE_PROC (1, proc);
+ +/* Whether the above fluid is initialized.  */
+ +static int scm_conversion_strategy_init = 0;
   
- -  /* Copy out the port table as a list so that we get strong references
- -     to all the values.  */
- -  scm_i_pthread_mutex_lock (&scm_i_port_table_mutex);
- -  ports = scm_internal_hash_fold (collect_keys, NULL,
- -                                SCM_EOL, scm_i_port_weak_hash);
- -  scm_i_pthread_mutex_unlock (&scm_i_port_table_mutex);
+ +/* The possible conversion strategies.  */
+ +SCM_SYMBOL (sym_error, "error");
+ +SCM_SYMBOL (sym_substitute, "substitute");
+ +SCM_SYMBOL (sym_escape, "escape");
   
- -  for (; scm_is_pair (ports); ports = scm_cdr (ports))
- -    if (SCM_PORTP (SCM_CAR (ports)))
- -      scm_call_1 (proc, SCM_CAR (ports));
+ +/* Return the default failed encoding conversion policy for new created
+ +   ports.  */
+ +scm_t_string_failed_conversion_handler
+ +scm_i_default_port_conversion_handler (void)
+ +{
+ +  scm_t_string_failed_conversion_handler handler;
   
- -  return SCM_UNSPECIFIED;
- -}
- -#undef FUNC_NAME
+ +  if (!scm_conversion_strategy_init
+ +      || !scm_is_fluid (SCM_VARIABLE_REF (default_conversion_strategy_var)))
+ +    handler = SCM_FAILED_CONVERSION_QUESTION_MARK;
+ +  else
+ +    {
+ +      SCM fluid, value;
   
+ +      fluid = SCM_VARIABLE_REF (default_conversion_strategy_var);
+ +      value = scm_fluid_ref (fluid);
   
- -\f
- -/* Utter miscellany.  Gosh, we should clean this up some time.  */
+ +      if (scm_is_eq (sym_substitute, value))
+ +      handler = SCM_FAILED_CONVERSION_QUESTION_MARK;
+ +      else if (scm_is_eq (sym_escape, value))
+ +      handler = SCM_FAILED_CONVERSION_ESCAPE_SEQUENCE;
+ +      else
+ +      /* Default to 'error also when the fluid's value is not one of
+ +         the valid symbols.  */
+ +      handler = SCM_FAILED_CONVERSION_ERROR;
+ +    }
   
- -SCM_DEFINE (scm_input_port_p, "input-port?", 1, 0, 0,
- -           (SCM x),
- -          "Return @code{#t} if @var{x} is an input port, otherwise return\n"
- -          "@code{#f}.  Any object satisfying this predicate also satisfies\n"
- -          "@code{port?}.")
- -#define FUNC_NAME s_scm_input_port_p
- -{
- -  return scm_from_bool (SCM_INPUT_PORT_P (x));
+ +  return handler;
   }
- -#undef FUNC_NAME
   
- -SCM_DEFINE (scm_output_port_p, "output-port?", 1, 0, 0,
- -           (SCM x),
- -          "Return @code{#t} if @var{x} is an output port, otherwise return\n"
- -          "@code{#f}.  Any object satisfying this predicate also satisfies\n"
- -          "@code{port?}.")
- -#define FUNC_NAME s_scm_output_port_p
+ +/* Use HANDLER as the default conversion strategy for future ports.  */
+ +void
+ +scm_i_set_default_port_conversion_handler (scm_t_string_failed_conversion_handler
+ +                                         handler)
   {
- -  x = SCM_COERCE_OUTPORT (x);
- -  return scm_from_bool (SCM_OUTPUT_PORT_P (x));
- -}
- -#undef FUNC_NAME
+ +  SCM strategy;
   
- -SCM_DEFINE (scm_port_p, "port?", 1, 0, 0,
- -          (SCM x),
- -          "Return a boolean indicating whether @var{x} is a port.\n"
- -          "Equivalent to @code{(or (input-port? @var{x}) (output-port?\n"
- -          "@var{x}))}.")
- -#define FUNC_NAME s_scm_port_p
- -{
- -  return scm_from_bool (SCM_PORTP (x));
- -}
- -#undef FUNC_NAME
+ +  if (!scm_conversion_strategy_init
+ +      || !scm_is_fluid (SCM_VARIABLE_REF (default_conversion_strategy_var)))
+ +    scm_misc_error (NULL, "tried to set conversion strategy fluid before it is initialized",
+ +                  SCM_EOL);
   
- -SCM_DEFINE (scm_port_closed_p, "port-closed?", 1, 0, 0,
- -           (SCM port),
- -          "Return @code{#t} if @var{port} is closed or @code{#f} if it is\n"
- -          "open.")
- -#define FUNC_NAME s_scm_port_closed_p
- -{
- -  SCM_VALIDATE_PORT (1, port);
- -  return scm_from_bool (!SCM_OPPORTP (port));
- -}
- -#undef FUNC_NAME
+ +  switch (handler)
+ +    {
+ +    case SCM_FAILED_CONVERSION_ERROR:
+ +      strategy = sym_error;
+ +      break;
   
- -SCM_DEFINE (scm_eof_object_p, "eof-object?", 1, 0, 0,
- -           (SCM x),
- -          "Return @code{#t} if @var{x} is an end-of-file object; otherwise\n"
- -          "return @code{#f}.")
- -#define FUNC_NAME s_scm_eof_object_p
- -{
- -  return scm_from_bool(SCM_EOF_OBJECT_P (x));
+ +    case SCM_FAILED_CONVERSION_ESCAPE_SEQUENCE:
+ +      strategy = sym_escape;
+ +      break;
+ +
+ +    case SCM_FAILED_CONVERSION_QUESTION_MARK:
+ +      strategy = sym_substitute;
+ +      break;
+ +
+ +    default:
+ +      abort ();
+ +    }
+ +
+ +  scm_fluid_set_x (SCM_VARIABLE_REF (default_conversion_strategy_var),
+ +                 strategy);
   }
- -#undef FUNC_NAME
   
- -SCM_DEFINE (scm_force_output, "force-output", 0, 1, 0,
- -           (SCM port),
- -          "Flush the specified output port, or the current output port if @var{port}\n"
- -          "is omitted.  The current output buffer contents are passed to the\n"
- -          "underlying port implementation (e.g., in the case of fports, the\n"
- -          "data will be written to the file and the output buffer will be cleared.)\n"
- -          "It has no effect on an unbuffered port.\n\n"
- -          "The return value is unspecified.")
- -#define FUNC_NAME s_scm_force_output
++static void
++scm_i_unget_bytes_unlocked (const unsigned char *buf, size_t len, SCM port);
++
++/* If the next LEN bytes from PORT are equal to those in BYTES, then
++   return 1, else return 0.  Leave the port position unchanged.  */
++static int
++looking_at_bytes (SCM port, const unsigned char *bytes, int len)
+ {
- -  if (SCM_UNBNDP (port))
- -    port = scm_current_output_port ();
- -  else
++  scm_t_port *pt = SCM_PTAB_ENTRY (port);
++  int i = 0;
++
++  while (i < len && scm_peek_byte_or_eof_unlocked (port) == bytes[i])
+     {
- -      port = SCM_COERCE_OUTPORT (port);
- -      SCM_VALIDATE_OPOUTPORT (1, port);
++      pt->read_pos++;
++      i++;
+     }
- -  scm_flush (port);
- -  return SCM_UNSPECIFIED;
++  scm_i_unget_bytes_unlocked (bytes, i, port);
++  return (i == len);
+ }
- -#undef FUNC_NAME
+ 
++static const unsigned char scm_utf8_bom[3]    = {0xEF, 0xBB, 0xBF};
++static const unsigned char scm_utf16be_bom[2] = {0xFE, 0xFF};
++static const unsigned char scm_utf16le_bom[2] = {0xFF, 0xFE};
++static const unsigned char scm_utf32be_bom[4] = {0x00, 0x00, 0xFE, 0xFF};
++static const unsigned char scm_utf32le_bom[4] = {0xFF, 0xFE, 0x00, 0x00};
+ 
- -static void
- -flush_output_port (void *closure, SCM port)
++/* Decide what byte order to use for a UTF-16 port.  Return "UTF-16BE"
++   or "UTF-16LE".  MODE must be either SCM_PORT_READ or SCM_PORT_WRITE,
++   and specifies which operation is about to be done.  The MODE
++   determines how we will decide the byte order.  We deliberately avoid
++   reading from the port unless the user is about to do so.  If the user
++   is about to read, then we look for a BOM, and if present, we use it
++   to determine the byte order.  Otherwise we choose big endian, as
++   recommended by the Unicode Standard.  Note that the BOM (if any) is
++   not consumed here.  */
++static const char *
++decide_utf16_encoding (SCM port, scm_t_port_rw_active mode)
+ {
- -  if (SCM_OPOUTPORTP (port))
- -    scm_flush (port);
++  if (mode == SCM_PORT_READ
++      && SCM_PORT_GET_INTERNAL (port)->at_stream_start_for_bom_read
++      && looking_at_bytes (port, scm_utf16le_bom, sizeof scm_utf16le_bom))
++    return "UTF-16LE";
++  else
++    return "UTF-16BE";
+ }
+ 
- -SCM_DEFINE (scm_flush_all_ports, "flush-all-ports", 0, 0, 0,
- -            (),
- -          "Equivalent to calling @code{force-output} on\n"
- -          "all open output ports.  The return value is unspecified.")
- -#define FUNC_NAME s_scm_flush_all_ports
++/* Decide what byte order to use for a UTF-32 port.  Return "UTF-32BE"
++   or "UTF-32LE".  See the comment above 'decide_utf16_encoding' for
++   details.  */
++static const char *
++decide_utf32_encoding (SCM port, scm_t_port_rw_active mode)
+ {
- -  scm_c_port_for_each (&flush_output_port, NULL);
- -  return SCM_UNSPECIFIED;
++  if (mode == SCM_PORT_READ
++      && SCM_PORT_GET_INTERNAL (port)->at_stream_start_for_bom_read
++      && looking_at_bytes (port, scm_utf32le_bom, sizeof scm_utf32le_bom))
++    return "UTF-32LE";
++  else
++    return "UTF-32BE";
+ }
- -#undef FUNC_NAME
+ 
- -SCM_DEFINE (scm_read_char, "read-char", 0, 1, 0,
- -           (SCM port),
- -          "Return the next character available from @var{port}, updating\n"
- -          "@var{port} to point to the following character.  If no more\n"
- -          "characters are available, the end-of-file object is returned.\n"
- -          "\n"
- -          "When @var{port}'s data cannot be decoded according to its\n"
- -          "character encoding, a @code{decoding-error} is raised and\n"
- -          "@var{port} points past the erroneous byte sequence.\n")
- -#define FUNC_NAME s_scm_read_char
+ +static void
+ +finalize_iconv_descriptors (void *ptr, void *data)
   {
- -  scm_t_wchar c;
- -  if (SCM_UNBNDP (port))
- -    port = scm_current_input_port ();
- -  SCM_VALIDATE_OPINPORT (1, port);
- -  c = scm_getc (port);
- -  if (EOF == c)
- -    return SCM_EOF_VAL;
- -  return SCM_MAKE_CHAR (c);
+ +  close_iconv_descriptors (ptr);
   }
- -#undef FUNC_NAME
   
- -/* Update the line and column number of PORT after consumption of C.  */
- -static inline void
- -update_port_lf (scm_t_wchar c, SCM port)
+ +static scm_t_iconv_descriptors *
+ +open_iconv_descriptors (const char *encoding, int reading, int writing)
   {
- -  switch (c)
- -    {
- -    case '\a':
- -    case EOF:
- -      break;
- -    case '\b':
- -      SCM_DECCOL (port);
- -      break;
- -    case '\n':
- -      SCM_INCLINE (port);
- -      break;
- -    case '\r':
- -      SCM_ZEROCOL (port);
- -      break;
- -    case '\t':
- -      SCM_TABCOL (port);
- -      break;
- -    default:
- -      SCM_INCCOL (port);
- -      break;
- -    }
- -}
+ +  scm_t_iconv_descriptors *id;
+ +  iconv_t input_cd, output_cd;
+ +  size_t i;
   
- -#define SCM_MBCHAR_BUF_SIZE (4)
+ +  input_cd = (iconv_t) -1;
+ +  output_cd = (iconv_t) -1;
   
- -/* Convert the SIZE-byte UTF-8 sequence in UTF8_BUF to a codepoint.
- -   UTF8_BUF is assumed to contain a valid UTF-8 sequence.  */
- -static scm_t_wchar
- -utf8_to_codepoint (const scm_t_uint8 *utf8_buf, size_t size)
- -{
- -  scm_t_wchar codepoint;
+ +  for (i = 0; encoding[i]; i++)
+ +    if (encoding[i] > 127)
+ +      goto invalid_encoding;
   
- -  if (utf8_buf[0] <= 0x7f)
- -    {
- -      assert (size == 1);
- -      codepoint = utf8_buf[0];
- -    }
- -  else if ((utf8_buf[0] & 0xe0) == 0xc0)
+ +  if (reading)
       {
- -      assert (size == 2);
- -      codepoint = ((scm_t_wchar) utf8_buf[0] & 0x1f) << 6UL
- -      | (utf8_buf[1] & 0x3f);
- -    }
- -  else if ((utf8_buf[0] & 0xf0) == 0xe0)
- -    {
- -      assert (size == 3);
- -      codepoint = ((scm_t_wchar) utf8_buf[0] & 0x0f) << 12UL
- -      | ((scm_t_wchar) utf8_buf[1] & 0x3f) << 6UL
- -      | (utf8_buf[2] & 0x3f);
- -    }
- -  else
- -    {
- -      assert (size == 4);
- -      codepoint = ((scm_t_wchar) utf8_buf[0] & 0x07) << 18UL
- -      | ((scm_t_wchar) utf8_buf[1] & 0x3f) << 12UL
- -      | ((scm_t_wchar) utf8_buf[2] & 0x3f) << 6UL
- -      | (utf8_buf[3] & 0x3f);
- -    }
- -
- -  return codepoint;
- -}
- -
- -/* Read a UTF-8 sequence from PORT.  On success, return 0 and set
- -   *CODEPOINT to the codepoint that was read, fill BUF with its UTF-8
- -   representation, and set *LEN to the length in bytes.  Return
- -   `EILSEQ' on error.  */
- -static int
- -get_utf8_codepoint (SCM port, scm_t_wchar *codepoint,
- -                  scm_t_uint8 buf[SCM_MBCHAR_BUF_SIZE], size_t *len)
- -{
- -#define ASSERT_NOT_EOF(b)                     \
- -  if (SCM_UNLIKELY ((b) == EOF))              \
- -    goto invalid_seq
- -#define CONSUME_PEEKED_BYTE()                         \
- -  pt->read_pos++
- -
- -  int byte;
- -  scm_t_port *pt;
+ +      /* Open an input iconv conversion descriptor, from ENCODING
+ +         to UTF-8.  We choose UTF-8, not UTF-32, because iconv
+ +         implementations can typically convert from anything to
+ +         UTF-8, but not to UTF-32 (see
+ +         <http://lists.gnu.org/archive/html/bug-libunistring/2010-09/msg00007.html>).  */
   
- -  *len = 0;
- -  pt = SCM_PTAB_ENTRY (port);
+ +      /* Assume opening an iconv descriptor causes about 16 KB of
+ +         allocation.  */
+ +      scm_gc_register_allocation (16 * 1024);
   
- -  byte = scm_get_byte_or_eof (port);
- -  if (byte == EOF)
- -    {
- -      *codepoint = EOF;
- -      return 0;
+ +      input_cd = iconv_open ("UTF-8", encoding);
+ +      if (input_cd == (iconv_t) -1)
+ +        goto invalid_encoding;
       }
   
- -  buf[0] = (scm_t_uint8) byte;
- -  *len = 1;
- -
- -  if (buf[0] <= 0x7f)
- -    /* 1-byte form.  */
- -    *codepoint = buf[0];
- -  else if (buf[0] >= 0xc2 && buf[0] <= 0xdf)
+ +  if (writing)
       {
- -      /* 2-byte form.  */
- -      byte = scm_peek_byte_or_eof (port);
- -      ASSERT_NOT_EOF (byte);
+ +      /* Assume opening an iconv descriptor causes about 16 KB of
+ +         allocation.  */
+ +      scm_gc_register_allocation (16 * 1024);
   
- -      if (SCM_UNLIKELY ((byte & 0xc0) != 0x80))
- -      goto invalid_seq;
+ +      output_cd = iconv_open (encoding, "UTF-8");
+ +      if (output_cd == (iconv_t) -1)
+ +        {
+ +          if (input_cd != (iconv_t) -1)
+ +            iconv_close (input_cd);
+ +          goto invalid_encoding;
+ +        }
+ +    }
   
- -      CONSUME_PEEKED_BYTE ();
- -      buf[1] = (scm_t_uint8) byte;
- -      *len = 2;
+ +  id = scm_gc_malloc_pointerless (sizeof (*id), "iconv descriptors");
+ +  id->input_cd = input_cd;
+ +  id->output_cd = output_cd;
   
- -      *codepoint = ((scm_t_wchar) buf[0] & 0x1f) << 6UL
- -      | (buf[1] & 0x3f);
- -    }
- -  else if ((buf[0] & 0xf0) == 0xe0)
- -    {
- -      /* 3-byte form.  */
- -      byte = scm_peek_byte_or_eof (port);
- -      ASSERT_NOT_EOF (byte);
+ +  /* Register a finalizer to close the descriptors.  */
+ +  scm_i_set_finalizer (id, finalize_iconv_descriptors, NULL);
   
- -      if (SCM_UNLIKELY ((byte & 0xc0) != 0x80
- -                      || (buf[0] == 0xe0 && byte < 0xa0)
- -                      || (buf[0] == 0xed && byte > 0x9f)))
- -      goto invalid_seq;
+ +  return id;
   
- -      CONSUME_PEEKED_BYTE ();
- -      buf[1] = (scm_t_uint8) byte;
- -      *len = 2;
+ + invalid_encoding:
+ +  {
+ +    SCM err;
+ +    err = scm_from_latin1_string (encoding);
+ +    scm_misc_error ("open_iconv_descriptors",
+ +                  "invalid or unknown character encoding ~s",
+ +                  scm_list_1 (err));
+ +  }
+ +}
   
- -      byte = scm_peek_byte_or_eof (port);
- -      ASSERT_NOT_EOF (byte);
+ +static void
+ +close_iconv_descriptors (scm_t_iconv_descriptors *id)
+ +{
+ +  if (id->input_cd != (iconv_t) -1)
+ +    iconv_close (id->input_cd);
+ +  if (id->output_cd != (iconv_t) -1)
+ +    iconv_close (id->output_cd);
+ +  id->input_cd = (void *) -1;
+ +  id->output_cd = (void *) -1;
+ +}
   
- -      if (SCM_UNLIKELY ((byte & 0xc0) != 0x80))
- -      goto invalid_seq;
+ +scm_t_iconv_descriptors *
- scm_i_port_iconv_descriptors (SCM port)
++scm_i_port_iconv_descriptors (SCM port, scm_t_port_rw_active mode)
+ +{
-   scm_t_port *pt;
++  scm_t_port_internal *pti = SCM_PORT_GET_INTERNAL (port);
   
-   pt = SCM_PTAB_ENTRY (port);
- -      CONSUME_PEEKED_BYTE ();
- -      buf[2] = (scm_t_uint8) byte;
- -      *len = 3;
++  assert (pti->encoding_mode == SCM_PORT_ENCODING_MODE_ICONV);
   
-   assert (pt->encoding_mode == SCM_PORT_ENCODING_MODE_ICONV);
- 
-   if (!pt->iconv_descriptors)
- -      *codepoint = ((scm_t_wchar) buf[0] & 0x0f) << 12UL
- -      | ((scm_t_wchar) buf[1] & 0x3f) << 6UL
- -      | (buf[2] & 0x3f);
- -    }
- -  else if (buf[0] >= 0xf0 && buf[0] <= 0xf4)
++  if (!pti->iconv_descriptors)
       {
- -      /* 4-byte form.  */
- -      byte = scm_peek_byte_or_eof (port);
- -      ASSERT_NOT_EOF (byte);
- -
- -      if (SCM_UNLIKELY (((byte & 0xc0) != 0x80)
- -                      || (buf[0] == 0xf0 && byte < 0x90)
- -                      || (buf[0] == 0xf4 && byte > 0x8f)))
- -      goto invalid_seq;
++      scm_t_port *pt = SCM_PTAB_ENTRY (port);
++      const char *precise_encoding;
+ 
- -      CONSUME_PEEKED_BYTE ();
- -      buf[1] = (scm_t_uint8) byte;
- -      *len = 2;
+ +      if (!pt->encoding)
+ +        pt->encoding = "ISO-8859-1";
-       pt->iconv_descriptors =
-         open_iconv_descriptors (pt->encoding,
+ 
- -      byte = scm_peek_byte_or_eof (port);
- -      ASSERT_NOT_EOF (byte);
++      /* If the specified encoding is UTF-16 or UTF-32, then make
++         that more precise by deciding what byte order to use. */
++      if (strcmp (pt->encoding, "UTF-16") == 0)
++        precise_encoding = decide_utf16_encoding (port, mode);
++      else if (strcmp (pt->encoding, "UTF-32") == 0)
++        precise_encoding = decide_utf32_encoding (port, mode);
++      else
++        precise_encoding = pt->encoding;
+ 
- -      if (SCM_UNLIKELY ((byte & 0xc0) != 0x80))
- -      goto invalid_seq;
++      pti->iconv_descriptors =
++        open_iconv_descriptors (precise_encoding,
+ +                                SCM_INPUT_PORT_P (port),
+ +                                SCM_OUTPUT_PORT_P (port));
+ +    }
   
-   return pt->iconv_descriptors;
- -      CONSUME_PEEKED_BYTE ();
- -      buf[2] = (scm_t_uint8) byte;
- -      *len = 3;
++  return pti->iconv_descriptors;
+ +}
   
- -      byte = scm_peek_byte_or_eof (port);
- -      ASSERT_NOT_EOF (byte);
+ +/* The name of the encoding is itself encoded in ASCII.  */
+ +void
+ +scm_i_set_port_encoding_x (SCM port, const char *encoding)
+ +{
+ +  scm_t_port *pt;
++  scm_t_port_internal *pti;
+ +  scm_t_iconv_descriptors *prev;
   
- -      if (SCM_UNLIKELY ((byte & 0xc0) != 0x80))
- -      goto invalid_seq;
+ +  /* Set the character encoding for this port.  */
+ +  pt = SCM_PTAB_ENTRY (port);
-   prev = pt->iconv_descriptors;
++  pti = SCM_PORT_GET_INTERNAL (port);
++  prev = pti->iconv_descriptors;
+ 
- -      CONSUME_PEEKED_BYTE ();
- -      buf[3] = (scm_t_uint8) byte;
- -      *len = 4;
++  /* In order to handle cases where the encoding changes mid-stream
++     (e.g. within an HTTP stream, or within a file that is composed of
++     segments with different encodings), we consider this to be "stream
++     start" for purposes of BOM handling, regardless of our actual file
++     position. */
++  pti->at_stream_start_for_bom_read  = 1;
++  pti->at_stream_start_for_bom_write = 1;
   
- -      *codepoint = ((scm_t_wchar) buf[0] & 0x07) << 18UL
- -      | ((scm_t_wchar) buf[1] & 0x3f) << 12UL
- -      | ((scm_t_wchar) buf[2] & 0x3f) << 6UL
- -      | (buf[3] & 0x3f);
+ +  if (encoding_matches (encoding, "UTF-8"))
+ +    {
+ +      pt->encoding = "UTF-8";
-       pt->encoding_mode = SCM_PORT_ENCODING_MODE_UTF8;
-       pt->iconv_descriptors = NULL;
++      pti->encoding_mode = SCM_PORT_ENCODING_MODE_UTF8;
+ +    }
+ +  else if (encoding_matches (encoding, "ISO-8859-1"))
+ +    {
+ +      pt->encoding = "ISO-8859-1";
-       pt->encoding_mode = SCM_PORT_ENCODING_MODE_LATIN1;
-       pt->iconv_descriptors = NULL;
++      pti->encoding_mode = SCM_PORT_ENCODING_MODE_LATIN1;
       }
     else
- -    goto invalid_seq;
- -
- -  return 0;
+ +    {
-       /* Open descriptors before mutating the port. */
-       char *gc_encoding = canonicalize_encoding (encoding);
-       pt->iconv_descriptors =
-         open_iconv_descriptors (gc_encoding,
-                                 SCM_INPUT_PORT_P (port),
-                                 SCM_OUTPUT_PORT_P (port));
-       pt->encoding = gc_encoding;
-       pt->encoding_mode = SCM_PORT_ENCODING_MODE_ICONV;
++      pt->encoding = canonicalize_encoding (encoding);
++      pti->encoding_mode = SCM_PORT_ENCODING_MODE_ICONV;
+ +    }
   
- - invalid_seq:
- -  /* Here we could choose the consume the faulty byte when it's not a
- -     valid starting byte, but it's not a requirement.  What Section 3.9
- -     of Unicode 6.0.0 mandates, though, is to not consume a byte that
- -     would otherwise be a valid starting byte.  */
++  pti->iconv_descriptors = NULL;
+ +  if (prev)
+ +    close_iconv_descriptors (prev);
+ +}
   
- -  return EILSEQ;
+ +SCM_DEFINE (scm_port_encoding, "port-encoding", 1, 0, 0,
+ +          (SCM port),
+ +          "Returns, as a string, the character encoding that @var{port}\n"
+ +          "uses to interpret its input and output.\n")
+ +#define FUNC_NAME s_scm_port_encoding
+ +{
+ +  SCM_VALIDATE_PORT (1, port);
   
- -#undef CONSUME_PEEKED_BYTE
- -#undef ASSERT_NOT_EOF
+ +  return scm_from_latin1_string (SCM_PTAB_ENTRY (port)->encoding);
   }
+ +#undef FUNC_NAME
   
- -/* Likewise, read a byte sequence from PORT, passing it through its
- -   input conversion descriptor.  */
- -static int
- -get_iconv_codepoint (SCM port, scm_t_wchar *codepoint,
- -                   char buf[SCM_MBCHAR_BUF_SIZE], size_t *len)
+ +SCM_DEFINE (scm_set_port_encoding_x, "set-port-encoding!", 2, 0, 0,
+ +          (SCM port, SCM enc),
+ +          "Sets the character encoding that will be used to interpret all\n"
+ +          "port I/O.  New ports are created with the encoding\n"
+ +          "appropriate for the current locale if @code{setlocale} has \n"
+ +          "been called or ISO-8859-1 otherwise\n"
+ +          "and this procedure can be used to modify that encoding.\n")
+ +#define FUNC_NAME s_scm_set_port_encoding_x
   {
- -  scm_t_iconv_descriptors *id;
- -  scm_t_uint8 utf8_buf[SCM_MBCHAR_BUF_SIZE];
- -  size_t input_size = 0;
+ +  char *enc_str;
   
- -  id = scm_i_port_iconv_descriptors (port, SCM_PORT_READ);
+ +  SCM_VALIDATE_PORT (1, port);
+ +  SCM_VALIDATE_STRING (2, enc);
   
- -  for (;;)
- -    {
- -      int byte_read;
- -      char *input, *output;
- -      size_t input_left, output_left, done;
+ +  enc_str = scm_to_latin1_string (enc);
+ +  scm_i_set_port_encoding_x (port, enc_str);
+ +  free (enc_str);
   
- -      byte_read = scm_get_byte_or_eof (port);
- -      if (SCM_UNLIKELY (byte_read == EOF))
- -      {
- -          if (SCM_LIKELY (input_size == 0))
- -            {
- -              *codepoint = (scm_t_wchar) EOF;
- -              *len = input_size;
- -              return 0;
- -            }
- -          else
- -            {
- -              /* EOF found in the middle of a multibyte character. */
- -              scm_i_set_pending_eof (port);
- -              return EILSEQ;
- -            }
- -      }
+ +  return SCM_UNSPECIFIED;
+ +}
+ +#undef FUNC_NAME
   
- -      buf[input_size++] = byte_read;
+ +SCM_DEFINE (scm_port_conversion_strategy, "port-conversion-strategy",
+ +          1, 0, 0, (SCM port),
+ +          "Returns the behavior of the port when handling a character that\n"
+ +          "is not representable in the port's current encoding.\n"
+ +          "It returns the symbol @code{error} if unrepresentable characters\n"
+ +          "should cause exceptions, @code{substitute} if the port should\n"
+ +          "try to replace unrepresentable characters with question marks or\n"
+ +          "approximate characters, or @code{escape} if unrepresentable\n"
+ +          "characters should be converted to string escapes.\n"
+ +          "\n"
+ +          "If @var{port} is @code{#f}, then the current default behavior\n"
+ +          "will be returned.  New ports will have this default behavior\n"
+ +          "when they are created.\n")
+ +#define FUNC_NAME s_scm_port_conversion_strategy
+ +{
+ +  scm_t_string_failed_conversion_handler h;
   
- -      input = buf;
- -      input_left = input_size;
- -      output = (char *) utf8_buf;
- -      output_left = sizeof (utf8_buf);
+ +  SCM_VALIDATE_OPPORT (1, port);
   
- -      done = iconv (id->input_cd, &input, &input_left, &output, &output_left);
+ +  if (scm_is_false (port))
+ +    h = scm_i_default_port_conversion_handler ();
+ +  else
+ +    {
+ +      scm_t_port *pt;
   
- -      if (done == (size_t) -1)
- -      {
- -        int err = errno;
- -        if (SCM_LIKELY (err == EINVAL))
- -            /* The input byte sequence did not form a complete
- -               character.  Read another byte and try again. */
- -            continue;
- -          else
- -            return err;
- -      }
- -      else
- -        {
- -          size_t output_size = sizeof (utf8_buf) - output_left;
- -          if (SCM_LIKELY (output_size > 0))
- -            {
- -              /* iconv generated output.  Convert the UTF8_BUF sequence
- -                 to a Unicode code point.  */
- -              *codepoint = utf8_to_codepoint (utf8_buf, output_size);
- -              *len = input_size;
- -              return 0;
- -            }
- -          else
- -            {
- -              /* iconv consumed some bytes without producing any output.
- -                 Most likely this means that a Unicode byte-order mark
- -                 (BOM) was consumed, which should not be included in the
- -                 returned buf.  Shift any remaining bytes to the beginning
- -                 of buf, and continue the loop. */
- -              memmove (buf, input, input_left);
- -              input_size = input_left;
- -              continue;
- -            }
- -        }
+ +      SCM_VALIDATE_OPPORT (1, port);
+ +      pt = SCM_PTAB_ENTRY (port);
+ +
+ +      h = pt->ilseq_handler;
       }
+ +
+ +  if (h == SCM_FAILED_CONVERSION_ERROR)
+ +    return scm_from_latin1_symbol ("error");
+ +  else if (h == SCM_FAILED_CONVERSION_QUESTION_MARK)
+ +    return scm_from_latin1_symbol ("substitute");
+ +  else if (h == SCM_FAILED_CONVERSION_ESCAPE_SEQUENCE)
+ +    return scm_from_latin1_symbol ("escape");
+ +  else
+ +    abort ();
+ +
+ +  /* Never gets here. */
+ +  return SCM_UNDEFINED;
   }
+ +#undef FUNC_NAME
   
- -/* Read a codepoint from PORT and return it in *CODEPOINT.  Fill BUF
- -   with the byte representation of the codepoint in PORT's encoding, and
- -   set *LEN to the length in bytes of that representation.  Return 0 on
- -   success and an errno value on error.  */
- -static int
- -get_codepoint (SCM port, scm_t_wchar *codepoint,
- -             char buf[SCM_MBCHAR_BUF_SIZE], size_t *len)
+ +SCM_DEFINE (scm_set_port_conversion_strategy_x, "set-port-conversion-strategy!",
+ +          2, 0, 0, 
+ +          (SCM port, SCM sym),
+ +          "Sets the behavior of the interpreter when outputting a character\n"
+ +          "that is not representable in the port's current encoding.\n"
+ +          "@var{sym} can be either @code{'error}, @code{'substitute}, or\n"
+ +          "@code{'escape}.  If it is @code{'error}, an error will be thrown\n"
+ +          "when an unconvertible character is encountered.  If it is\n"
+ +          "@code{'substitute}, then unconvertible characters will \n"
+ +          "be replaced with approximate characters, or with question marks\n"
+ +          "if no approximately correct character is available.\n"
+ +          "If it is @code{'escape},\n"
+ +          "it will appear as a hex escape when output.\n"
+ +          "\n"
+ +          "If @var{port} is an open port, the conversion error behavior\n"
+ +          "is set for that port.  If it is @code{#f}, it is set as the\n"
+ +          "default behavior for any future ports that get created in\n"
+ +          "this thread.\n")
+ +#define FUNC_NAME s_scm_set_port_conversion_strategy_x
   {
- -  int err;
- -  scm_t_port *pt = SCM_PTAB_ENTRY (port);
- -  scm_t_port_internal *pti = SCM_PORT_GET_INTERNAL (port);
+ +  scm_t_string_failed_conversion_handler handler;
   
- -  if (pti->encoding_mode == SCM_PORT_ENCODING_MODE_UTF8)
- -    err = get_utf8_codepoint (port, codepoint, (scm_t_uint8 *) buf, len);
+ +  if (scm_is_eq (sym, sym_error))
+ +    handler = SCM_FAILED_CONVERSION_ERROR;
+ +  else if (scm_is_eq (sym, sym_substitute))
+ +    handler = SCM_FAILED_CONVERSION_QUESTION_MARK;
+ +  else if (scm_is_eq (sym, sym_escape))
+ +    handler = SCM_FAILED_CONVERSION_ESCAPE_SEQUENCE;
     else
- -    err = get_iconv_codepoint (port, codepoint, buf, len);
- -
- -  if (SCM_LIKELY (err == 0))
- -    {
- -      if (SCM_UNLIKELY (pti->at_stream_start_for_bom_read))
- -        {
- -          /* Record that we're no longer at stream start. */
- -          pti->at_stream_start_for_bom_read = 0;
- -          if (pt->rw_random)
- -            pti->at_stream_start_for_bom_write = 0;
+ +    SCM_MISC_ERROR ("unknown conversion strategy ~s", scm_list_1 (sym));
   
- -          /* If we just read a BOM in an encoding that recognizes them,
- -             then silently consume it and read another code point. */
- -          if (SCM_UNLIKELY
- -              (*codepoint == SCM_UNICODE_BOM
- -               && (pti->encoding_mode == SCM_PORT_ENCODING_MODE_UTF8
- -                   || c_strcasecmp (pt->encoding, "UTF-16") == 0
- -                   || c_strcasecmp (pt->encoding, "UTF-32") == 0)))
- -            return get_codepoint (port, codepoint, buf, len);
- -        }
- -      update_port_lf (*codepoint, port);
- -    }
- -  else if (pt->ilseq_handler == SCM_ICONVEH_QUESTION_MARK)
+ +  if (scm_is_false (port))
+ +    scm_i_set_default_port_conversion_handler (handler);
+ +  else
       {
- -      *codepoint = '?';
- -      err = 0;
- -      update_port_lf (*codepoint, port);
+ +      SCM_VALIDATE_OPPORT (1, port);
+ +      SCM_PTAB_ENTRY (port)->ilseq_handler = handler;
       }
   
- -  return err;
+ +  return SCM_UNSPECIFIED;
   }
+ +#undef FUNC_NAME
   
- -/* Read a codepoint from PORT and return it.  */
- -scm_t_wchar
- -scm_getc (SCM port)
- -#define FUNC_NAME "scm_getc"
- -{
- -  int err;
- -  size_t len;
- -  scm_t_wchar codepoint;
- -  char buf[SCM_MBCHAR_BUF_SIZE];
   
- -  err = get_codepoint (port, &codepoint, buf, &len);
- -  if (SCM_UNLIKELY (err != 0))
- -    /* At this point PORT should point past the invalid encoding, as per
- -       R6RS-lib Section 8.2.4.  */
- -    scm_decoding_error (FUNC_NAME, err, "input decoding error", port);
+ +\f
   
- -  return codepoint;
- -}
- -#undef FUNC_NAME
+ +/* The port lock.  */
   
- -/* this should only be called when the read buffer is empty.  it
- -   tries to refill the read buffer.  it returns the first char from
- -   the port, which is either EOF or *(pt->read_pos).  */
- -static int
- -scm_i_fill_input (SCM port)
+ +static void
+ +lock_port (void *mutex)
   {
- -  scm_t_port *pt = SCM_PTAB_ENTRY (port);
- -  scm_t_port_internal *pti = SCM_PORT_GET_INTERNAL (port);
- -
- -  assert (pt->read_pos == pt->read_end);
+ +  scm_i_pthread_mutex_lock (mutex);
+ +}
   
- -  if (pti->pending_eof)
- -    {
- -      pti->pending_eof = 0;
- -      return EOF;
- -    }
+ +static void
+ +unlock_port (void *mutex)
+ +{
+ +  scm_i_pthread_mutex_unlock (mutex);
+ +}
   
- -  if (pt->read_buf == pt->putback_buf)
+ +void
+ +scm_dynwind_lock_port (SCM port)
+ +#define FUNC_NAME "dynwind-lock-port"
+ +{
+ +  scm_i_pthread_mutex_t *lock;
+ +  SCM_VALIDATE_OPPORT (SCM_ARG1, port);
+ +  scm_c_lock_port (port, &lock);
+ +  if (lock)
       {
- -      /* finished reading put-back chars.  */
- -      pt->read_buf = pt->saved_read_buf;
- -      pt->read_pos = pt->saved_read_pos;
- -      pt->read_end = pt->saved_read_end;
- -      pt->read_buf_size = pt->saved_read_buf_size;
- -      if (pt->read_pos < pt->read_end)
- -      return *(pt->read_pos);
+ +      scm_dynwind_unwind_handler (unlock_port, lock, SCM_F_WIND_EXPLICITLY);
+ +      scm_dynwind_rewind_handler (lock_port, lock, 0);
       }
- -  return scm_ptobs[SCM_PTOBNUM (port)].fill_input (port);
   }
+ +#undef FUNC_NAME
   
- -int
- -scm_fill_input (SCM port)
- -{
- -  return scm_i_fill_input (port);
- -}
   
- -/* Slow-path fallback for 'scm_get_byte_or_eof' in inline.h */
- -int
- -scm_slow_get_byte_or_eof (SCM port)
- -{
- -  scm_t_port *pt = SCM_PTAB_ENTRY (port);
+ +\f
   
- -  if (pt->rw_active == SCM_PORT_WRITE)
- -    scm_flush (port);
+ +/* Input.  */
   
- -  if (pt->rw_random)
- -    pt->rw_active = SCM_PORT_READ;
+ +int
+ +scm_get_byte_or_eof (SCM port)
+ +{
+ +  scm_i_pthread_mutex_t *lock;
+ +  int ret;
   
- -  if (pt->read_pos >= pt->read_end)
- -    {
- -      if (SCM_UNLIKELY (scm_i_fill_input (port) == EOF))
- -      return EOF;
- -    }
+ +  scm_c_lock_port (port, &lock);
+ +  ret = scm_get_byte_or_eof_unlocked (port);
+ +  if (lock)
+ +    scm_i_pthread_mutex_unlock (lock);
   
- -  return *pt->read_pos++;
+ +  return ret;
   }
   
- -/* Slow-path fallback for 'scm_peek_byte_or_eof' in inline.h */
   int
- -scm_slow_peek_byte_or_eof (SCM port)
+ +scm_peek_byte_or_eof (SCM port)
   {
- -  scm_t_port *pt = SCM_PTAB_ENTRY (port);
- -
- -  if (pt->rw_active == SCM_PORT_WRITE)
- -    scm_flush (port);
- -
- -  if (pt->rw_random)
- -    pt->rw_active = SCM_PORT_READ;
+ +  scm_i_pthread_mutex_t *lock;
+ +  int ret;
   
- -  if (pt->read_pos >= pt->read_end)
- -    {
- -      if (SCM_UNLIKELY (scm_i_fill_input (port) == EOF))
- -        {
- -          scm_i_set_pending_eof (port);
- -          return EOF;
- -        }
- -    }
+ +  scm_c_lock_port (port, &lock);
+ +  ret = scm_peek_byte_or_eof_unlocked (port);
+ +  if (lock)
+ +    scm_i_pthread_mutex_unlock (lock);
   
- -  return *pt->read_pos;
+ +  return ret;
   }
   
- -
- -/* scm_lfwrite
+ +/* scm_c_read
    *
- - * This function differs from scm_c_write; it updates port line and
- - * column. */
- -
- -void
- -scm_lfwrite (const char *ptr, size_t size, SCM port)
- -{
- -  scm_t_port *pt = SCM_PTAB_ENTRY (port);
- -  scm_t_ptob_descriptor *ptob = &scm_ptobs[SCM_PTOBNUM (port)];
- -
- -  if (pt->rw_active == SCM_PORT_READ)
- -    scm_end_input (port);
- -
- -  ptob->write (port, ptr, size);
- -
- -  for (; size; ptr++, size--)
- -    update_port_lf ((scm_t_wchar) (unsigned char) *ptr, port);
- -
- -  if (pt->rw_random)
- -    pt->rw_active = SCM_PORT_WRITE;
- -}
- -
- -/* Write STR to PORT from START inclusive to END exclusive.  */
- -void
- -scm_lfwrite_substr (SCM str, size_t start, size_t end, SCM port)
- -{
- -  scm_t_port *pt = SCM_PTAB_ENTRY (port);
- -
- -  if (pt->rw_active == SCM_PORT_READ)
- -    scm_end_input (port);
- -
- -  if (end == (size_t) -1)
- -    end = scm_i_string_length (str);
- -
- -  scm_i_display_substring (str, start, end, port);
- -
- -  if (pt->rw_random)
- -    pt->rw_active = SCM_PORT_WRITE;
- -}
- -
- -/* scm_c_read
- - *
- - * Used by an application to read arbitrary number of bytes from an
- - * SCM port.  Same semantics as libc read, except that scm_c_read only
- - * returns less than SIZE bytes if at end-of-file.
- - *
- - * Warning: Doesn't update port line and column counts!  */
+ + * Used by an application to read arbitrary number of bytes from an
+ + * SCM port.  Same semantics as libc read, except that scm_c_read only
+ + * returns less than SIZE bytes if at end-of-file.
+ + *
+ + * Warning: Doesn't update port line and column counts!  */
   
   /* This structure, and the following swap_buffer function, are used
      for temporarily swapping a port's own read buffer, and the buffer
@@@ -1338,19 -1621,19 +1444,23 @@@ swap_buffer (void *data
     psb->size = old_size;
   }
   
++static int scm_i_fill_input_unlocked (SCM port);
++
   size_t
- -scm_c_read (SCM port, void *buffer, size_t size)
+ +scm_c_read_unlocked (SCM port, void *buffer, size_t size)
   #define FUNC_NAME "scm_c_read"
   {
     scm_t_port *pt;
++  scm_t_port_internal *pti;
     size_t n_read = 0, n_available;
     struct port_and_swap_buffer psb;
   
     SCM_VALIDATE_OPINPORT (1, port);
   
     pt = SCM_PTAB_ENTRY (port);
++  pti = SCM_PORT_GET_INTERNAL (port);
     if (pt->rw_active == SCM_PORT_WRITE)
- -    scm_ptobs[SCM_PTOBNUM (port)].flush (port);
+ +    SCM_PORT_DESCRIPTOR (port)->flush (port);
   
     if (pt->rw_random)
       pt->rw_active = SCM_PORT_READ;
@@@ -1370,24 -1653,23 +1480,23 @@@
     if (size == 0)
       return n_read;
   
-   /* Now we will call scm_fill_input repeatedly until we have read the
-      requested number of bytes.  (Note that a single scm_fill_input
- -  /* Now we will call scm_i_fill_input repeatedly until we have read the
- -     requested number of bytes.  (Note that a single scm_i_fill_input
--     call does not guarantee to fill the whole of the port's read
--     buffer.) */
- -  if (pt->read_buf_size <= 1 && pt->encoding == NULL)
++  /* Now we will call scm_i_fill_input_unlocked repeatedly until we have
++     read the requested number of bytes.  (Note that a single
++     scm_i_fill_input_unlocked call does not guarantee to fill the whole
++     of the port's read buffer.) */
+ +  if (pt->read_buf_size <= 1
-       && pt->encoding_mode == SCM_PORT_ENCODING_MODE_LATIN1)
++      && pti->encoding_mode == SCM_PORT_ENCODING_MODE_LATIN1)
       {
--      /* The port that we are reading from is unbuffered - i.e. does
--       not have its own persistent buffer - but we have a buffer,
--       provided by our caller, that is the right size for the data
-        that is wanted.  For the following scm_fill_input calls,
- -       that is wanted.  For the following scm_i_fill_input calls,
--       therefore, we use the buffer in hand as the port's read
--       buffer.
--
--       We need to make sure that the port's normal (1 byte) buffer
-        is reinstated in case one of the scm_fill_input () calls
- -       is reinstated in case one of the scm_i_fill_input () calls
--       throws an exception; we use the scm_dynwind_* API to achieve
--       that. 
++      /* The port that we are reading from is unbuffered - i.e. does not
++       have its own persistent buffer - but we have a buffer, provided
++       by our caller, that is the right size for the data that is
++       wanted.  For the following scm_i_fill_input_unlocked calls,
++       therefore, we use the buffer in hand as the port's read buffer.
++
++       We need to make sure that the port's normal (1 byte) buffer is
++       reinstated in case one of the scm_i_fill_input_unlocked ()
++       calls throws an exception; we use the scm_dynwind_* API to
++       achieve that.
   
            A consequence of this optimization is that the fill_input
            functions can't unget characters.  That'll push data to the
@@@ -1402,9 -1684,9 +1511,9 @@@
         scm_dynwind_rewind_handler (swap_buffer, &psb, SCM_F_WIND_EXPLICITLY);
         scm_dynwind_unwind_handler (swap_buffer, &psb, SCM_F_WIND_EXPLICITLY);
   
-       /* Call scm_fill_input until we have all the bytes that we need,
- -      /* Call scm_i_fill_input until we have all the bytes that we need,
--       or we hit EOF. */
-       while (pt->read_buf_size && (scm_fill_input_unlocked (port) != EOF))
- -      while (pt->read_buf_size && (scm_i_fill_input (port) != EOF))
++      /* Call scm_i_fill_input_unlocked until we have all the bytes that
++       we need, or we hit EOF. */
++      while (pt->read_buf_size && (scm_i_fill_input_unlocked (port) != EOF))
         {
           pt->read_buf_size -= (pt->read_end - pt->read_pos);
           pt->read_pos = pt->read_buf = pt->read_end;
@@@ -1428,7 -1710,7 +1537,7 @@@
          that a custom port implementation's entry points (in
          particular, fill_input) can rely on the buffer always being
          the same as they first set up. */
-       while (size && (scm_fill_input_unlocked (port) != EOF))
- -      while (size && (scm_i_fill_input (port) != EOF))
++      while (size && (scm_i_fill_input_unlocked (port) != EOF))
         {
           n_available = min (size, pt->read_end - pt->read_pos);
           memcpy (buffer, pt->read_pos, n_available);
@@@ -1443,474 -1725,168 +1552,552 @@@
   }
   #undef FUNC_NAME
   
- -/* scm_c_write
- - *
- - * Used by an application to write arbitrary number of bytes to an SCM
- - * port.  Similar semantics as libc write.  However, unlike libc
- - * write, scm_c_write writes the requested number of bytes and has no
- - * return value.
- - *
- - * Warning: Doesn't update port line and column counts!
- - */
- -
- -void
- -scm_c_write (SCM port, const void *ptr, size_t size)
- -#define FUNC_NAME "scm_c_write"
+ +size_t
+ +scm_c_read (SCM port, void *buffer, size_t size)
   {
- -  scm_t_port *pt;
- -  scm_t_ptob_descriptor *ptob;
- -
- -  SCM_VALIDATE_OPOUTPORT (1, port);
- -
- -  pt = SCM_PTAB_ENTRY (port);
- -  ptob = &scm_ptobs[SCM_PTOBNUM (port)];
- -
- -  if (pt->rw_active == SCM_PORT_READ)
- -    scm_end_input (port);
+ +  scm_i_pthread_mutex_t *lock;
+ +  size_t ret;
   
- -  ptob->write (port, ptr, size);
+ +  scm_c_lock_port (port, &lock);
+ +  ret = scm_c_read_unlocked (port, buffer, size);
+ +  if (lock)
+ +    scm_i_pthread_mutex_unlock (lock);
+ +  
   
- -  if (pt->rw_random)
- -    pt->rw_active = SCM_PORT_WRITE;
+ +  return ret;
   }
- -#undef FUNC_NAME
   
- -void
- -scm_flush (SCM port)
+ +/* Update the line and column number of PORT after consumption of C.  */
+ +static inline void
+ +update_port_lf (scm_t_wchar c, SCM port)
   {
- -  long i = SCM_PTOBNUM (port);
- -  assert (i >= 0);
- -  (scm_ptobs[i].flush) (port);
+ +  switch (c)
+ +    {
+ +    case '\a':
+ +    case EOF:
+ +      break;
+ +    case '\b':
+ +      SCM_DECCOL (port);
+ +      break;
+ +    case '\n':
+ +      SCM_INCLINE (port);
+ +      break;
+ +    case '\r':
+ +      SCM_ZEROCOL (port);
+ +      break;
+ +    case '\t':
+ +      SCM_TABCOL (port);
+ +      break;
+ +    default:
+ +      SCM_INCCOL (port);
+ +      break;
+ +    }
   }
   
- -void
- -scm_end_input (SCM port)
+ +#define SCM_MBCHAR_BUF_SIZE (4)
+ +
+ +/* Convert the SIZE-byte UTF-8 sequence in UTF8_BUF to a codepoint.
+ +   UTF8_BUF is assumed to contain a valid UTF-8 sequence.  */
+ +static scm_t_wchar
+ +utf8_to_codepoint (const scm_t_uint8 *utf8_buf, size_t size)
   {
- -  long offset;
- -  scm_t_port *pt = SCM_PTAB_ENTRY (port);
+ +  scm_t_wchar codepoint;
   
- -  scm_i_clear_pending_eof (port);
- -  if (pt->read_buf == pt->putback_buf)
+ +  if (utf8_buf[0] <= 0x7f)
       {
- -      offset = pt->read_end - pt->read_pos;
- -      pt->read_buf = pt->saved_read_buf;
- -      pt->read_pos = pt->saved_read_pos;
- -      pt->read_end = pt->saved_read_end;
- -      pt->read_buf_size = pt->saved_read_buf_size;
+ +      assert (size == 1);
+ +      codepoint = utf8_buf[0];
+ +    }
+ +  else if ((utf8_buf[0] & 0xe0) == 0xc0)
+ +    {
+ +      assert (size == 2);
+ +      codepoint = ((scm_t_wchar) utf8_buf[0] & 0x1f) << 6UL
+ +      | (utf8_buf[1] & 0x3f);
+ +    }
+ +  else if ((utf8_buf[0] & 0xf0) == 0xe0)
+ +    {
+ +      assert (size == 3);
+ +      codepoint = ((scm_t_wchar) utf8_buf[0] & 0x0f) << 12UL
+ +      | ((scm_t_wchar) utf8_buf[1] & 0x3f) << 6UL
+ +      | (utf8_buf[2] & 0x3f);
       }
     else
- -    offset = 0;
+ +    {
+ +      assert (size == 4);
+ +      codepoint = ((scm_t_wchar) utf8_buf[0] & 0x07) << 18UL
+ +      | ((scm_t_wchar) utf8_buf[1] & 0x3f) << 12UL
+ +      | ((scm_t_wchar) utf8_buf[2] & 0x3f) << 6UL
+ +      | (utf8_buf[3] & 0x3f);
+ +    }
   
- -  scm_ptobs[SCM_PTOBNUM (port)].end_input (port, offset);
+ +  return codepoint;
   }
   
- -\f
- -
- -
- -static void
- -scm_i_unget_bytes (const unsigned char *buf, size_t len, SCM port)
- -#define FUNC_NAME "scm_unget_bytes"
+ +/* Read a UTF-8 sequence from PORT.  On success, return 0 and set
+ +   *CODEPOINT to the codepoint that was read, fill BUF with its UTF-8
+ +   representation, and set *LEN to the length in bytes.  Return
+ +   `EILSEQ' on error.  */
+ +static int
+ +get_utf8_codepoint (SCM port, scm_t_wchar *codepoint,
+ +                  scm_t_uint8 buf[SCM_MBCHAR_BUF_SIZE], size_t *len)
   {
- -  scm_t_port *pt = SCM_PTAB_ENTRY (port);
- -  size_t old_len, new_len;
- -
- -  scm_i_clear_pending_eof (port);
+ +#define ASSERT_NOT_EOF(b)                     \
+ +  if (SCM_UNLIKELY ((b) == EOF))              \
+ +    goto invalid_seq
+ +#define CONSUME_PEEKED_BYTE()                         \
+ +  pt->read_pos++
   
- -  if (pt->read_buf != pt->putback_buf)
- -    /* switch to the put-back buffer.  */
- -    {
- -      if (pt->putback_buf == NULL)
- -      {
- -          pt->putback_buf_size = (len > SCM_INITIAL_PUTBACK_BUF_SIZE
- -                                  ? len : SCM_INITIAL_PUTBACK_BUF_SIZE);
- -        pt->putback_buf
- -          = (unsigned char *) scm_gc_malloc_pointerless
- -          (pt->putback_buf_size, "putback buffer");
- -      }
+ +  int byte;
+ +  scm_t_port *pt;
   
- -      pt->saved_read_buf = pt->read_buf;
- -      pt->saved_read_pos = pt->read_pos;
- -      pt->saved_read_end = pt->read_end;
- -      pt->saved_read_buf_size = pt->read_buf_size;
+ +  *len = 0;
+ +  pt = SCM_PTAB_ENTRY (port);
   
- -      /* Put read_pos at the end of the buffer, so that ungets will not
- -         have to shift the buffer contents each time.  */
- -      pt->read_buf = pt->putback_buf;
- -      pt->read_pos = pt->read_end = pt->putback_buf + pt->putback_buf_size;
- -      pt->read_buf_size = pt->putback_buf_size;
+ +  byte = scm_get_byte_or_eof_unlocked (port);
+ +  if (byte == EOF)
+ +    {
+ +      *codepoint = EOF;
+ +      return 0;
       }
   
- -  old_len = pt->read_end - pt->read_pos;
- -  new_len = old_len + len;
+ +  buf[0] = (scm_t_uint8) byte;
+ +  *len = 1;
   
- -  if (new_len > pt->read_buf_size)
- -    /* The putback buffer needs to be enlarged.  */
+ +  if (buf[0] <= 0x7f)
+ +    /* 1-byte form.  */
+ +    *codepoint = buf[0];
+ +  else if (buf[0] >= 0xc2 && buf[0] <= 0xdf)
       {
- -      size_t new_buf_size;
- -      unsigned char *new_buf, *new_end, *new_pos;
- -
- -      new_buf_size = pt->read_buf_size * 2;
- -      if (new_buf_size < new_len)
- -        new_buf_size = new_len;
+ +      /* 2-byte form.  */
+ +      byte = scm_peek_byte_or_eof_unlocked (port);
+ +      ASSERT_NOT_EOF (byte);
   
- -      new_buf = (unsigned char *)
- -        scm_gc_malloc_pointerless (new_buf_size, "putback buffer");
+ +      if (SCM_UNLIKELY ((byte & 0xc0) != 0x80))
+ +      goto invalid_seq;
   
- -      /* Put the bytes at the end of the buffer, so that future
- -         ungets won't need to shift the buffer.  */
- -      new_end = new_buf + new_buf_size;
- -      new_pos = new_end - old_len;
- -      memcpy (new_pos, pt->read_pos, old_len);
+ +      CONSUME_PEEKED_BYTE ();
+ +      buf[1] = (scm_t_uint8) byte;
+ +      *len = 2;
   
- -      pt->read_buf = pt->putback_buf = new_buf;
- -      pt->read_pos = new_pos;
- -      pt->read_end = new_end;
- -      pt->read_buf_size = pt->putback_buf_size = new_buf_size;
+ +      *codepoint = ((scm_t_wchar) buf[0] & 0x1f) << 6UL
+ +      | (buf[1] & 0x3f);
       }
- -  else if (pt->read_buf + len < pt->read_pos)
- -    /* If needed, shift the existing buffer contents up.
- -       This should not happen unless some external code
- -       manipulates the putback buffer pointers.  */
+ +  else if ((buf[0] & 0xf0) == 0xe0)
       {
- -      unsigned char *new_end = pt->read_buf + pt->read_buf_size;
- -      unsigned char *new_pos = new_end - old_len;
+ +      /* 3-byte form.  */
+ +      byte = scm_peek_byte_or_eof_unlocked (port);
+ +      ASSERT_NOT_EOF (byte);
   
- -      memmove (new_pos, pt->read_pos, old_len);
- -      pt->read_pos = new_pos;
- -      pt->read_end = new_end;
- -    }
+ +      if (SCM_UNLIKELY ((byte & 0xc0) != 0x80
+ +                      || (buf[0] == 0xe0 && byte < 0xa0)
+ +                      || (buf[0] == 0xed && byte > 0x9f)))
+ +      goto invalid_seq;
   
- -  /* Move read_pos back and copy the bytes there.  */
- -  pt->read_pos -= len;
- -  memcpy (pt->read_buf + (pt->read_pos - pt->read_buf), buf, len);
+ +      CONSUME_PEEKED_BYTE ();
+ +      buf[1] = (scm_t_uint8) byte;
+ +      *len = 2;
   
- -  if (pt->rw_active == SCM_PORT_WRITE)
- -    scm_flush (port);
+ +      byte = scm_peek_byte_or_eof_unlocked (port);
+ +      ASSERT_NOT_EOF (byte);
   
- -  if (pt->rw_random)
- -    pt->rw_active = SCM_PORT_READ;
- -}
- -#undef FUNC_NAME
+ +      if (SCM_UNLIKELY ((byte & 0xc0) != 0x80))
+ +      goto invalid_seq;
   
- -void
- -scm_unget_bytes (const unsigned char *buf, size_t len, SCM port)
+ +      CONSUME_PEEKED_BYTE ();
+ +      buf[2] = (scm_t_uint8) byte;
+ +      *len = 3;
+ +
+ +      *codepoint = ((scm_t_wchar) buf[0] & 0x0f) << 12UL
+ +      | ((scm_t_wchar) buf[1] & 0x3f) << 6UL
+ +      | (buf[2] & 0x3f);
+ +    }
+ +  else if (buf[0] >= 0xf0 && buf[0] <= 0xf4)
+ +    {
+ +      /* 4-byte form.  */
+ +      byte = scm_peek_byte_or_eof_unlocked (port);
+ +      ASSERT_NOT_EOF (byte);
+ +
+ +      if (SCM_UNLIKELY (((byte & 0xc0) != 0x80)
+ +                      || (buf[0] == 0xf0 && byte < 0x90)
+ +                      || (buf[0] == 0xf4 && byte > 0x8f)))
+ +      goto invalid_seq;
+ +
+ +      CONSUME_PEEKED_BYTE ();
+ +      buf[1] = (scm_t_uint8) byte;
+ +      *len = 2;
+ +
+ +      byte = scm_peek_byte_or_eof_unlocked (port);
+ +      ASSERT_NOT_EOF (byte);
+ +
+ +      if (SCM_UNLIKELY ((byte & 0xc0) != 0x80))
+ +      goto invalid_seq;
+ +
+ +      CONSUME_PEEKED_BYTE ();
+ +      buf[2] = (scm_t_uint8) byte;
+ +      *len = 3;
+ +
+ +      byte = scm_peek_byte_or_eof_unlocked (port);
+ +      ASSERT_NOT_EOF (byte);
+ +
+ +      if (SCM_UNLIKELY ((byte & 0xc0) != 0x80))
+ +      goto invalid_seq;
+ +
+ +      CONSUME_PEEKED_BYTE ();
+ +      buf[3] = (scm_t_uint8) byte;
+ +      *len = 4;
+ +
+ +      *codepoint = ((scm_t_wchar) buf[0] & 0x07) << 18UL
+ +      | ((scm_t_wchar) buf[1] & 0x3f) << 12UL
+ +      | ((scm_t_wchar) buf[2] & 0x3f) << 6UL
+ +      | (buf[3] & 0x3f);
+ +    }
+ +  else
+ +    goto invalid_seq;
+ +
+ +  return 0;
+ +
+ + invalid_seq:
+ +  /* Here we could choose the consume the faulty byte when it's not a
+ +     valid starting byte, but it's not a requirement.  What Section 3.9
+ +     of Unicode 6.0.0 mandates, though, is to not consume a byte that
+ +     would otherwise be a valid starting byte.  */
+ +
+ +  return EILSEQ;
+ +
+ +#undef CONSUME_PEEKED_BYTE
+ +#undef ASSERT_NOT_EOF
+ +}
+ +
+ +/* Read an ISO-8859-1 codepoint (a byte) from PORT.  On success, return
+ +   0 and set *CODEPOINT to the codepoint that was read, fill BUF with
+ +   its UTF-8 representation, and set *LEN to the length in bytes.
+ +   Return `EILSEQ' on error.  */
+ +static int
+ +get_latin1_codepoint (SCM port, scm_t_wchar *codepoint,
+ +                      char buf[SCM_MBCHAR_BUF_SIZE], size_t *len)
+ +{
+ +  *codepoint = scm_get_byte_or_eof_unlocked (port);
+ +
+ +  if (*codepoint == EOF)
+ +    *len = 0;
+ +  else
+ +    {
+ +      *len = 1;
+ +      buf[0] = *codepoint;
+ +    }
+ +  return 0;
+ +}
+ +
+ +/* Likewise, read a byte sequence from PORT, passing it through its
+ +   input conversion descriptor.  */
+ +static int
+ +get_iconv_codepoint (SCM port, scm_t_wchar *codepoint,
+ +                   char buf[SCM_MBCHAR_BUF_SIZE], size_t *len)
+ +{
+ +  scm_t_iconv_descriptors *id;
-   int err, byte_read;
-   size_t bytes_consumed, output_size;
-   char *output;
+ +  scm_t_uint8 utf8_buf[SCM_MBCHAR_BUF_SIZE];
++  size_t input_size = 0;
+ +
-   id = scm_i_port_iconv_descriptors (port);
++  id = scm_i_port_iconv_descriptors (port, SCM_PORT_READ);
+ +
-   for (output_size = 0, output = (char *) utf8_buf,
-        bytes_consumed = 0, err = 0;
-        err == 0 && output_size == 0
-        && (bytes_consumed == 0 || byte_read != EOF);
-        bytes_consumed++)
++  for (;;)
+ +    {
-       char *input;
++      int byte_read;
++      char *input, *output;
+ +      size_t input_left, output_left, done;
+ +
+ +      byte_read = scm_get_byte_or_eof_unlocked (port);
-       if (byte_read == EOF)
++      if (SCM_UNLIKELY (byte_read == EOF))
+ +      {
-         if (bytes_consumed == 0)
-           {
-             *codepoint = (scm_t_wchar) EOF;
-             *len = 0;
-             return 0;
-           }
-         else
-           continue;
++          if (SCM_LIKELY (input_size == 0))
++            {
++              *codepoint = (scm_t_wchar) EOF;
++              *len = input_size;
++              return 0;
++            }
++          else
++            {
++              /* EOF found in the middle of a multibyte character. */
++              scm_i_set_pending_eof (port);
++              return EILSEQ;
++            }
+ +      }
+ +
-       buf[bytes_consumed] = byte_read;
++      buf[input_size++] = byte_read;
+ +
+ +      input = buf;
-       input_left = bytes_consumed + 1;
++      input_left = input_size;
++      output = (char *) utf8_buf;
+ +      output_left = sizeof (utf8_buf);
+ +
+ +      done = iconv (id->input_cd, &input, &input_left, &output, &output_left);
++
+ +      if (done == (size_t) -1)
+ +      {
-         err = errno;
-         if (err == EINVAL)
-           /* Missing input: keep trying.  */
-           err = 0;
++        int err = errno;
++        if (SCM_LIKELY (err == EINVAL))
++            /* The input byte sequence did not form a complete
++               character.  Read another byte and try again. */
++            continue;
++          else
++            return err;
+ +      }
+ +      else
-       output_size = sizeof (utf8_buf) - output_left;
-     }
- 
-   if (SCM_UNLIKELY (output_size == 0))
-     /* An unterminated sequence.  */
-     err = EILSEQ;
-   else if (SCM_LIKELY (err == 0))
-     {
-       /* Convert the UTF8_BUF sequence to a Unicode code point.  */
-       *codepoint = utf8_to_codepoint (utf8_buf, output_size);
-       *len = bytes_consumed;
++        {
++          size_t output_size = sizeof (utf8_buf) - output_left;
++          if (SCM_LIKELY (output_size > 0))
++            {
++              /* iconv generated output.  Convert the UTF8_BUF sequence
++                 to a Unicode code point.  */
++              *codepoint = utf8_to_codepoint (utf8_buf, output_size);
++              *len = input_size;
++              return 0;
++            }
++          else
++            {
++              /* iconv consumed some bytes without producing any output.
++                 Most likely this means that a Unicode byte-order mark
++                 (BOM) was consumed, which should not be included in the
++                 returned buf.  Shift any remaining bytes to the beginning
++                 of buf, and continue the loop. */
++              memmove (buf, input, input_left);
++              input_size = input_left;
++              continue;
++            }
++        }
+ +    }
- 
-   return err;
+ +}
+ +
+ +/* Read a codepoint from PORT and return it in *CODEPOINT.  Fill BUF
+ +   with the byte representation of the codepoint in PORT's encoding, and
+ +   set *LEN to the length in bytes of that representation.  Return 0 on
+ +   success and an errno value on error.  */
+ +static SCM_C_INLINE int
+ +get_codepoint (SCM port, scm_t_wchar *codepoint,
+ +             char buf[SCM_MBCHAR_BUF_SIZE], size_t *len)
+ +{
+ +  int err;
+ +  scm_t_port *pt = SCM_PTAB_ENTRY (port);
++  scm_t_port_internal *pti = SCM_PORT_GET_INTERNAL (port);
+ +
-   if (pt->encoding_mode == SCM_PORT_ENCODING_MODE_UTF8)
++  if (pti->encoding_mode == SCM_PORT_ENCODING_MODE_UTF8)
+ +    err = get_utf8_codepoint (port, codepoint, (scm_t_uint8 *) buf, len);
-   else if (pt->encoding_mode == SCM_PORT_ENCODING_MODE_LATIN1)
++  else if (pti->encoding_mode == SCM_PORT_ENCODING_MODE_LATIN1)
+ +    err = get_latin1_codepoint (port, codepoint, buf, len);
+ +  else
+ +    err = get_iconv_codepoint (port, codepoint, buf, len);
+ +
+ +  if (SCM_LIKELY (err == 0))
-     update_port_lf (*codepoint, port);
++    {
++      if (SCM_UNLIKELY (pti->at_stream_start_for_bom_read))
++        {
++          /* Record that we're no longer at stream start. */
++          pti->at_stream_start_for_bom_read = 0;
++          if (pt->rw_random)
++            pti->at_stream_start_for_bom_write = 0;
++
++          /* If we just read a BOM in an encoding that recognizes them,
++             then silently consume it and read another code point. */
++          if (SCM_UNLIKELY
++              (*codepoint == SCM_UNICODE_BOM
++               && (pti->encoding_mode == SCM_PORT_ENCODING_MODE_UTF8
++                   || strcmp (pt->encoding, "UTF-16") == 0
++                   || strcmp (pt->encoding, "UTF-32") == 0)))
++            return get_codepoint (port, codepoint, buf, len);
++        }
++      update_port_lf (*codepoint, port);
++    }
+ +  else if (pt->ilseq_handler == SCM_ICONVEH_QUESTION_MARK)
+ +    {
+ +      *codepoint = '?';
+ +      err = 0;
+ +      update_port_lf (*codepoint, port);
+ +    }
+ +
+ +  return err;
+ +}
+ +
+ +/* Read a codepoint from PORT and return it.  */
+ +scm_t_wchar
+ +scm_getc_unlocked (SCM port)
+ +#define FUNC_NAME "scm_getc"
+ +{
+ +  int err;
+ +  size_t len;
+ +  scm_t_wchar codepoint;
+ +  char buf[SCM_MBCHAR_BUF_SIZE];
+ +
+ +  err = get_codepoint (port, &codepoint, buf, &len);
+ +  if (SCM_UNLIKELY (err != 0))
+ +    /* At this point PORT should point past the invalid encoding, as per
+ +       R6RS-lib Section 8.2.4.  */
+ +    scm_decoding_error (FUNC_NAME, err, "input decoding error", port);
+ +
+ +  return codepoint;
+ +}
+ +#undef FUNC_NAME
+ +
+ +scm_t_wchar
+ +scm_getc (SCM port)
+ +{
+ +  scm_i_pthread_mutex_t *lock;
+ +  scm_t_wchar ret;
+ +
+ +  scm_c_lock_port (port, &lock);
+ +  ret = scm_getc_unlocked (port);
+ +  if (lock)
+ +    scm_i_pthread_mutex_unlock (lock);
+ +  
+ +
+ +  return ret;
+ +}
+ +
+ +SCM_DEFINE (scm_read_char, "read-char", 0, 1, 0,
+ +           (SCM port),
+ +          "Return the next character available from @var{port}, updating\n"
+ +          "@var{port} to point to the following character.  If no more\n"
+ +          "characters are available, the end-of-file object is returned.\n"
+ +          "\n"
+ +          "When @var{port}'s data cannot be decoded according to its\n"
+ +          "character encoding, a @code{decoding-error} is raised and\n"
+ +          "@var{port} points past the erroneous byte sequence.\n")
+ +#define FUNC_NAME s_scm_read_char
+ +{
+ +  scm_t_wchar c;
+ +  if (SCM_UNBNDP (port))
+ +    port = scm_current_input_port ();
+ +  SCM_VALIDATE_OPINPORT (1, port);
+ +  c = scm_getc_unlocked (port);
+ +  if (EOF == c)
+ +    return SCM_EOF_VAL;
+ +  return SCM_MAKE_CHAR (c);
+ +}
+ +#undef FUNC_NAME
+ +
+ +
+ +\f
+ +
+ +/* Pushback.  */
++\f
+ +
- void 
- scm_unget_byte_unlocked (int c, SCM port)
- #define FUNC_NAME "scm_unget_byte"
++
++static void
++scm_i_unget_bytes_unlocked (const unsigned char *buf, size_t len, SCM port)
++#define FUNC_NAME "scm_unget_bytes"
+ +{
+ +  scm_t_port *pt = SCM_PTAB_ENTRY (port);
++  size_t old_len, new_len;
+ +
-   if (pt->read_buf == pt->putback_buf)
-     /* already using the put-back buffer.  */
-     {
-       /* enlarge putback_buf if necessary.  */
-       if (pt->read_end == pt->read_buf + pt->read_buf_size
-         && pt->read_buf == pt->read_pos)
-       {
-         size_t new_size = pt->read_buf_size * 2;
-         unsigned char *tmp = (unsigned char *)
-           scm_gc_realloc (pt->putback_buf, pt->read_buf_size, new_size,
-                           "putback buffer");
- 
-         pt->read_pos = pt->read_buf = pt->putback_buf = tmp;
-         pt->read_end = pt->read_buf + pt->read_buf_size;
-         pt->read_buf_size = pt->putback_buf_size = new_size;
-       }
++  scm_i_clear_pending_eof (port);
+ +
-       /* shift any existing bytes to buffer + 1.  */
-       if (pt->read_pos == pt->read_end)
-       pt->read_end = pt->read_buf + 1;
-       else if (pt->read_pos != pt->read_buf + 1)
-       {
-         int count = pt->read_end - pt->read_pos;
- 
-         memmove (pt->read_buf + 1, pt->read_pos, count);
-         pt->read_end = pt->read_buf + 1 + count;
-       }
- 
-       pt->read_pos = pt->read_buf;
-     }
-   else
++  if (pt->read_buf != pt->putback_buf)
+ +    /* switch to the put-back buffer.  */
+ +    {
+ +      if (pt->putback_buf == NULL)
+ +      {
++          pt->putback_buf_size = (len > SCM_INITIAL_PUTBACK_BUF_SIZE
++                                  ? len : SCM_INITIAL_PUTBACK_BUF_SIZE);
+ +        pt->putback_buf
+ +          = (unsigned char *) scm_gc_malloc_pointerless
-           (SCM_INITIAL_PUTBACK_BUF_SIZE, "putback buffer");
-         pt->putback_buf_size = SCM_INITIAL_PUTBACK_BUF_SIZE;
++          (pt->putback_buf_size, "putback buffer");
+ +      }
+ +
+ +      pt->saved_read_buf = pt->read_buf;
+ +      pt->saved_read_pos = pt->read_pos;
+ +      pt->saved_read_end = pt->read_end;
+ +      pt->saved_read_buf_size = pt->read_buf_size;
+ +
-       pt->read_pos = pt->read_buf = pt->putback_buf;
-       pt->read_end = pt->read_buf + 1;
++      /* Put read_pos at the end of the buffer, so that ungets will not
++         have to shift the buffer contents each time.  */
++      pt->read_buf = pt->putback_buf;
++      pt->read_pos = pt->read_end = pt->putback_buf + pt->putback_buf_size;
+ +      pt->read_buf_size = pt->putback_buf_size;
+ +    }
+ +
-   *pt->read_buf = c;
++  old_len = pt->read_end - pt->read_pos;
++  new_len = old_len + len;
++
++  if (new_len > pt->read_buf_size)
++    /* The putback buffer needs to be enlarged.  */
++    {
++      size_t new_buf_size;
++      unsigned char *new_buf, *new_end, *new_pos;
++
++      new_buf_size = pt->read_buf_size * 2;
++      if (new_buf_size < new_len)
++        new_buf_size = new_len;
++
++      new_buf = (unsigned char *)
++        scm_gc_malloc_pointerless (new_buf_size, "putback buffer");
++
++      /* Put the bytes at the end of the buffer, so that future
++         ungets won't need to shift the buffer.  */
++      new_end = new_buf + new_buf_size;
++      new_pos = new_end - old_len;
++      memcpy (new_pos, pt->read_pos, old_len);
++
++      pt->read_buf = pt->putback_buf = new_buf;
++      pt->read_pos = new_pos;
++      pt->read_end = new_end;
++      pt->read_buf_size = pt->putback_buf_size = new_buf_size;
++    }
++  else if (pt->read_buf + len < pt->read_pos)
++    /* If needed, shift the existing buffer contents up.
++       This should not happen unless some external code
++       manipulates the putback buffer pointers.  */
++    {
++      unsigned char *new_end = pt->read_buf + pt->read_buf_size;
++      unsigned char *new_pos = new_end - old_len;
++
++      memmove (new_pos, pt->read_pos, old_len);
++      pt->read_pos = new_pos;
++      pt->read_end = new_end;
++    }
++
++  /* Move read_pos back and copy the bytes there.  */
++  pt->read_pos -= len;
++  memcpy (pt->read_buf + (pt->read_pos - pt->read_buf), buf, len);
++
++  if (pt->rw_active == SCM_PORT_WRITE)
++    scm_flush (port);
+ +
+ +  if (pt->rw_random)
+ +    pt->rw_active = SCM_PORT_READ;
+ +}
+ +#undef FUNC_NAME
+ +
++void
++scm_unget_bytes_unlocked (const unsigned char *buf, size_t len, SCM port)
+ {
- -  scm_i_unget_bytes (buf, len, port);
++  scm_i_unget_bytes_unlocked (buf, len, port);
+ }
+ 
+ void
- -scm_unget_byte (int c, SCM port)
++scm_unget_byte_unlocked (int c, SCM port)
++{
++  unsigned char byte = c;
++  scm_i_unget_bytes_unlocked (&byte, 1, port);
++}
++
++void
++scm_unget_bytes (const unsigned char *buf, size_t len, SCM port)
+ {
- -  unsigned char byte;
++  scm_i_pthread_mutex_t *lock;
++  scm_c_lock_port (port, &lock);
++  scm_i_unget_bytes_unlocked (buf, len, port);
++  if (lock)
++    scm_i_pthread_mutex_unlock (lock);
++}
+ 
- -  byte = c;
- -  scm_i_unget_bytes (&byte, 1, port);
+ +void 
+ +scm_unget_byte (int c, SCM port)
+ +{
++  unsigned char byte = c;
+ +  scm_i_pthread_mutex_t *lock;
+ +  scm_c_lock_port (port, &lock);
-   scm_unget_byte_unlocked (c, port);
++  scm_i_unget_bytes_unlocked (&byte, 1, port);
+ +  if (lock)
+ +    scm_i_pthread_mutex_unlock (lock);
-   
   }
   
   void
@@@ -1920,11 -1896,16 +2107,10 @@@ scm_ungetc_unlocked (scm_t_wchar c, SC
     scm_t_port *pt = SCM_PTAB_ENTRY (port);
     char *result;
     char result_buf[10];
- -  const char *encoding;
     size_t len;
-   int i;
   
- -  if (pt->encoding != NULL)
- -    encoding = pt->encoding;
- -  else
- -    encoding = "ISO-8859-1";
- -
     len = sizeof (result_buf);
- -  result = u32_conv_to_encoding (encoding,
+ +  result = u32_conv_to_encoding (pt->encoding,
                                  (enum iconv_ilseq_handler) pt->ilseq_handler,
                                  (uint32_t *) &c, 1, NULL,
                                  result_buf, &len);
@@@ -1934,8 -1915,7 +2120,7 @@@
                         "conversion to port encoding failed",
                         SCM_BOOL_F, SCM_MAKE_CHAR (c));
   
-   for (i = len - 1; i >= 0; i--)
-     scm_unget_byte_unlocked (result[i], port);
- -  scm_i_unget_bytes ((unsigned char *) result, len, port);
++  scm_i_unget_bytes_unlocked ((unsigned char *) result, len, port);
   
     if (SCM_UNLIKELY (result != result_buf))
       free (result);
@@@ -2014,37 -1974,256 +2199,39 @@@ SCM_DEFINE (scm_peek_char, "peek-char"
     SCM result;
     scm_t_wchar c;
     char bytes[SCM_MBCHAR_BUF_SIZE];
-   long column, line, i;
+   long column, line;
     size_t len;
   
- -  if (SCM_UNBNDP (port))
- -    port = scm_current_input_port ();
- -  SCM_VALIDATE_OPINPORT (1, port);
- -
- -  column = SCM_COL (port);
- -  line = SCM_LINUM (port);
- -
- -  err = get_codepoint (port, &c, bytes, &len);
- -
- -  scm_i_unget_bytes ((unsigned char *) bytes, len, port);
- -
- -  SCM_COL (port) = column;
- -  SCM_LINUM (port) = line;
- -
- -  if (SCM_UNLIKELY (err != 0))
- -    {
- -      scm_decoding_error (FUNC_NAME, err, "input decoding error", port);
- -
- -      /* Shouldn't happen since `catch' always aborts to prompt.  */
- -      result = SCM_BOOL_F;
- -    }
- -  else if (c == EOF)
- -    {
- -      scm_i_set_pending_eof (port);
- -      result = SCM_EOF_VAL;
- -    }
- -  else
- -    result = SCM_MAKE_CHAR (c);
- -
- -  return result;
- -}
- -#undef FUNC_NAME
- -
- -SCM_DEFINE (scm_unread_char, "unread-char", 1, 1, 0,
- -            (SCM cobj, SCM port),
- -          "Place character @var{cobj} in @var{port} so that it will be\n"
- -          "read by the next read operation.  If called multiple times, the\n"
- -          "unread characters will be read again in last-in first-out\n"
- -          "order.  If @var{port} is not supplied, the current input port\n"
- -          "is used.")
- -#define FUNC_NAME s_scm_unread_char
- -{
- -  int c;
- -
- -  SCM_VALIDATE_CHAR (1, cobj);
- -  if (SCM_UNBNDP (port))
- -    port = scm_current_input_port ();
- -  SCM_VALIDATE_OPINPORT (2, port);
- -
- -  c = SCM_CHAR (cobj);
- -
- -  scm_ungetc (c, port);
- -  return cobj;
- -}
- -#undef FUNC_NAME
- -
- -SCM_DEFINE (scm_unread_string, "unread-string", 2, 0, 0,
- -            (SCM str, SCM port),
- -          "Place the string @var{str} in @var{port} so that its characters will be\n"
- -          "read in subsequent read operations.  If called multiple times, the\n"
- -          "unread characters will be read again in last-in first-out order.  If\n"
- -          "@var{port} is not supplied, the current-input-port is used.")
- -#define FUNC_NAME s_scm_unread_string
- -{
- -  int n;
- -  SCM_VALIDATE_STRING (1, str);
- -  if (SCM_UNBNDP (port))
- -    port = scm_current_input_port ();
- -  SCM_VALIDATE_OPINPORT (2, port);
- -
- -  n = scm_i_string_length (str);
- -
- -  while (n--)
- -    scm_ungetc (scm_i_string_ref (str, n), port);
- -  
- -  return str;
- -}
- -#undef FUNC_NAME
- -
- -SCM_DEFINE (scm_seek, "seek", 3, 0, 0,
- -            (SCM fd_port, SCM offset, SCM whence),
- -          "Sets the current position of @var{fd_port} to the integer\n"
- -          "@var{offset}, which is interpreted according to the value of\n"
- -          "@var{whence}.\n"
- -          "\n"
- -          "One of the following variables should be supplied for\n"
- -          "@var{whence}:\n"
- -          "@defvar SEEK_SET\n"
- -          "Seek from the beginning of the file.\n"
- -          "@end defvar\n"
- -          "@defvar SEEK_CUR\n"
- -          "Seek from the current position.\n"
- -          "@end defvar\n"
- -          "@defvar SEEK_END\n"
- -          "Seek from the end of the file.\n"
- -          "@end defvar\n"
- -          "If @var{fd_port} is a file descriptor, the underlying system\n"
- -          "call is @code{lseek}.  @var{port} may be a string port.\n"
- -          "\n"
- -          "The value returned is the new position in the file.  This means\n"
- -          "that the current position of a port can be obtained using:\n"
- -          "@lisp\n"
- -          "(seek port 0 SEEK_CUR)\n"
- -          "@end lisp")
- -#define FUNC_NAME s_scm_seek
- -{
- -  int how;
- -
- -  fd_port = SCM_COERCE_OUTPORT (fd_port);
- -
- -  how = scm_to_int (whence);
- -  if (how != SEEK_SET && how != SEEK_CUR && how != SEEK_END)
- -    SCM_OUT_OF_RANGE (3, whence);
- -
- -  if (SCM_OPPORTP (fd_port))
- -    {
- -      scm_t_port_internal *pti = SCM_PORT_GET_INTERNAL (fd_port);
- -      scm_t_ptob_descriptor *ptob = scm_ptobs + SCM_PTOBNUM (fd_port);
- -      off_t_or_off64_t off = scm_to_off_t_or_off64_t (offset);
- -      off_t_or_off64_t rv;
- -
- -      if (!ptob->seek)
- -      SCM_MISC_ERROR ("port is not seekable", 
- -                        scm_cons (fd_port, SCM_EOL));
- -      else
- -        rv = ptob->seek (fd_port, off, how);
- -
- -      /* Set stream-start flags according to new position. */
- -      pti->at_stream_start_for_bom_read  = (rv == 0);
- -      pti->at_stream_start_for_bom_write = (rv == 0);
- -
- -      scm_i_clear_pending_eof (fd_port);
- -
- -      return scm_from_off_t_or_off64_t (rv);
- -    }
- -  else /* file descriptor?.  */
- -    {
- -      off_t_or_off64_t off = scm_to_off_t_or_off64_t (offset);
- -      off_t_or_off64_t rv;
- -      rv = lseek_or_lseek64 (scm_to_int (fd_port), off, how);
- -      if (rv == -1)
- -      SCM_SYSERROR;
- -      return scm_from_off_t_or_off64_t (rv);
- -    }
- -}
- -#undef FUNC_NAME
- -
- -#ifndef O_BINARY
- -#define O_BINARY 0
- -#endif
- -
- -/* Mingw has ftruncate(), perhaps implemented above using chsize, but
- -   doesn't have the filename version truncate(), hence this code.  */
- -#if HAVE_FTRUNCATE && ! HAVE_TRUNCATE
- -static int
- -truncate (const char *file, off_t length)
- -{
- -  int ret, fdes;
- -
- -  fdes = open (file, O_BINARY | O_WRONLY);
- -  if (fdes == -1)
- -    return -1;
- -
- -  ret = ftruncate (fdes, length);
- -  if (ret == -1)
- -    {
- -      int save_errno = errno;
- -      close (fdes);
- -      errno = save_errno;
- -      return -1;
- -    }
- -
- -  return close (fdes);
- -}
- -#endif /* HAVE_FTRUNCATE && ! HAVE_TRUNCATE */
- -
- -SCM_DEFINE (scm_truncate_file, "truncate-file", 1, 1, 0,
- -            (SCM object, SCM length),
- -          "Truncate file @var{object} to @var{length} bytes.  @var{object}\n"
- -          "can be a filename string, a port object, or an integer file\n"
- -          "descriptor.\n"
- -          "The return value is unspecified.\n"
- -          "\n"
- -          "For a port or file descriptor @var{length} can be omitted, in\n"
- -          "which case the file is truncated at the current position (per\n"
- -          "@code{ftell} above).\n"
- -          "\n"
- -          "On most systems a file can be extended by giving a length\n"
- -          "greater than the current size, but this is not mandatory in the\n"
- -          "POSIX standard.")
- -#define FUNC_NAME s_scm_truncate_file
- -{
- -  int rv;
- -
- -  /* "object" can be a port, fdes or filename.
- -
- -     Negative "length" makes no sense, but it's left to truncate() or
- -     ftruncate() to give back an error for that (normally EINVAL).
- -     */
- -
- -  if (SCM_UNBNDP (length))
- -    {
- -      /* must supply length if object is a filename.  */
- -      if (scm_is_string (object))
- -        SCM_MISC_ERROR("must supply length if OBJECT is a filename", SCM_EOL);
- -      
- -      length = scm_seek (object, SCM_INUM0, scm_from_int (SEEK_CUR));
- -    }
- -
- -  object = SCM_COERCE_OUTPORT (object);
- -  if (scm_is_integer (object))
- -    {
- -      off_t_or_off64_t c_length = scm_to_off_t_or_off64_t (length);
- -      SCM_SYSCALL (rv = ftruncate_or_ftruncate64 (scm_to_int (object),
- -                                                  c_length));
- -    }
- -  else if (SCM_OPOUTPORTP (object))
- -    {
- -      off_t_or_off64_t c_length = scm_to_off_t_or_off64_t (length);
- -      scm_t_port *pt = SCM_PTAB_ENTRY (object);
- -      scm_t_ptob_descriptor *ptob = scm_ptobs + SCM_PTOBNUM (object);
+ +  if (SCM_UNBNDP (port))
+ +    port = scm_current_input_port ();
+ +  SCM_VALIDATE_OPINPORT (1, port);
   
- -      if (!ptob->truncate)
- -      SCM_MISC_ERROR ("port is not truncatable", SCM_EOL);
+ +  column = SCM_COL (port);
+ +  line = SCM_LINUM (port);
   
- -      scm_i_clear_pending_eof (object);
- -      if (pt->rw_active == SCM_PORT_READ)
- -      scm_end_input (object);
- -      else if (pt->rw_active == SCM_PORT_WRITE)
- -      ptob->flush (object);
+ +  err = get_codepoint (port, &c, bytes, &len);
   
-   for (i = len - 1; i >= 0; i--)
-     scm_unget_byte_unlocked (bytes[i], port);
- -      ptob->truncate (object, c_length);
- -      rv = 0;
++  scm_i_unget_bytes_unlocked ((unsigned char *) bytes, len, port);
+ +
+ +  SCM_COL (port) = column;
+ +  SCM_LINUM (port) = line;
+ +
+ +  if (SCM_UNLIKELY (err != 0))
+ +    {
+ +      scm_decoding_error (FUNC_NAME, err, "input decoding error", port);
+ +
+ +      /* Shouldn't happen since `catch' always aborts to prompt.  */
+ +      result = SCM_BOOL_F;
       }
- -  else
+ +  else if (c == EOF)
-     result = SCM_EOF_VAL;
+     {
- -      off_t_or_off64_t c_length = scm_to_off_t_or_off64_t (length);
- -      char *str = scm_to_locale_string (object);
- -      int eno;
- -      SCM_SYSCALL (rv = truncate_or_truncate64 (str, c_length));
- -      eno = errno;
- -      free (str);
- -      errno = eno;
++      scm_i_set_pending_eof (port);
++      result = SCM_EOF_VAL;
+     }
- -  if (rv == -1)
- -    SCM_SYSERROR;
- -  return SCM_UNSPECIFIED;
+ +  else
+ +    result = SCM_MAKE_CHAR (c);
+ +
+ +  return result;
   }
   #undef FUNC_NAME
   
@@@ -2094,295 -2256,280 +2281,354 @@@ SCM_DEFINE (scm_unread_string, "unread-
   }
   #undef FUNC_NAME
   
- -SCM_DEFINE (scm_port_column, "port-column", 1, 0, 0,
- -            (SCM port),
- -          "Return the current column number of @var{port}.\n"
- -          "If the number is\n"
- -          "unknown, the result is #f.  Otherwise, the result is a 0-origin integer\n"
- -          "- i.e. the first character of the first line is line 0, column 0.\n"
- -          "(However, when you display a file position, for example in an error\n"
- -          "message, we recommend you add 1 to get 1-origin integers.  This is\n"
- -          "because lines and column numbers traditionally start with 1, and that is\n"
- -          "what non-programmers will find most natural.)")
- -#define FUNC_NAME s_scm_port_column
+ +
+ +\f
+ +
+ +/* Manipulating the buffers.  */
+ +
+ +/* This routine does not take any locks, as it is usually called as part
+ +   of a port implementation.  */
+ +void
+ +scm_port_non_buffer (scm_t_port *pt)
   {
- -  port = SCM_COERCE_OUTPORT (port);
- -  SCM_VALIDATE_OPENPORT (1, port);
- -  return scm_from_int (SCM_COL (port));
+ +  pt->read_pos = pt->read_buf = pt->read_end = &pt->shortbuf;
+ +  pt->write_buf = pt->write_pos = &pt->shortbuf;
+ +  pt->read_buf_size = pt->write_buf_size = 1;
+ +  pt->write_end = pt->write_buf + pt->write_buf_size;
   }
- -#undef FUNC_NAME
   
- -SCM_DEFINE (scm_set_port_column_x, "set-port-column!", 2, 0, 0,
- -            (SCM port, SCM column),
- -          "Set the current column of @var{port}.  Before reading the first\n"
- -          "character on a line the column should be 0.")
- -#define FUNC_NAME s_scm_set_port_column_x
+ +/* this should only be called when the read buffer is empty.  it
+ +   tries to refill the read buffer.  it returns the first char from
+ +   the port, which is either EOF or *(pt->read_pos).  */
- int
- scm_fill_input_unlocked (SCM port)
++static int
++scm_i_fill_input_unlocked (SCM port)
   {
- -  port = SCM_COERCE_OUTPORT (port);
- -  SCM_VALIDATE_OPENPORT (1, port);
- -  SCM_PTAB_ENTRY (port)->column_number = scm_to_int (column);
- -  return SCM_UNSPECIFIED;
+ +  scm_t_port *pt = SCM_PTAB_ENTRY (port);
++  scm_t_port_internal *pti = SCM_PORT_GET_INTERNAL (port);
+ +
+ +  assert (pt->read_pos == pt->read_end);
+ +
++  if (pti->pending_eof)
++    {
++      pti->pending_eof = 0;
++      return EOF;
++    }
++
+ +  if (pt->read_buf == pt->putback_buf)
+ +    {
+ +      /* finished reading put-back chars.  */
+ +      pt->read_buf = pt->saved_read_buf;
+ +      pt->read_pos = pt->saved_read_pos;
+ +      pt->read_end = pt->saved_read_end;
+ +      pt->read_buf_size = pt->saved_read_buf_size;
+ +      if (pt->read_pos < pt->read_end)
+ +      return *(pt->read_pos);
+ +    }
+ +  return SCM_PORT_DESCRIPTOR (port)->fill_input (port);
   }
- -#undef FUNC_NAME
   
- -SCM_DEFINE (scm_port_filename, "port-filename", 1, 0, 0,
- -            (SCM port),
- -          "Return the filename associated with @var{port}, or @code{#f}\n"
- -          "if no filename is associated with the port.")
- -#define FUNC_NAME s_scm_port_filename
+ +int
+ +scm_fill_input (SCM port)
   {
- -  port = SCM_COERCE_OUTPORT (port);
- -  SCM_VALIDATE_OPENPORT (1, port);
- -  return SCM_FILENAME (port);
+ +  scm_i_pthread_mutex_t *lock;
+ +  int ret;
+ +  
+ +  scm_c_lock_port (port, &lock);
+ +  ret = scm_fill_input_unlocked (port);
+ +  if (lock)
+ +    scm_i_pthread_mutex_unlock (lock);
+ +  
+ +
+ +  return ret;
   }
- -#undef FUNC_NAME
   
- -SCM_DEFINE (scm_set_port_filename_x, "set-port-filename!", 2, 0, 0,
- -            (SCM port, SCM filename),
- -          "Change the filename associated with @var{port}, using the current input\n"
- -          "port if none is specified.  Note that this does not change the port's\n"
- -          "source of data, but only the value that is returned by\n"
- -          "@code{port-filename} and reported in diagnostic output.")
- -#define FUNC_NAME s_scm_set_port_filename_x
++/* Slow-path fallback for 'scm_get_byte_or_eof_unlocked' */
++int
++scm_slow_get_byte_or_eof_unlocked (SCM port)
+ {
- -  port = SCM_COERCE_OUTPORT (port);
- -  SCM_VALIDATE_OPENPORT (1, port);
- -  /* We allow the user to set the filename to whatever he likes.  */
- -  SCM_SET_FILENAME (port, filename);
- -  return SCM_UNSPECIFIED;
++  scm_t_port *pt = SCM_PTAB_ENTRY (port);
++
++  if (pt->rw_active == SCM_PORT_WRITE)
++    scm_flush_unlocked (port);
++
++  if (pt->rw_random)
++    pt->rw_active = SCM_PORT_READ;
++
++  if (pt->read_pos >= pt->read_end)
++    {
++      if (SCM_UNLIKELY (scm_i_fill_input_unlocked (port) == EOF))
++      return EOF;
++    }
++
++  return *pt->read_pos++;
+ }
- -#undef FUNC_NAME
+ 
- -/* A fluid specifying the default encoding for newly created ports.  If it is
- -   a string, that is the encoding.  If it is #f, it is in the "native"
- -   (Latin-1) encoding.  */
- -SCM_VARIABLE (default_port_encoding_var, "%default-port-encoding");
++/* Slow-path fallback for 'scm_peek_byte_or_eof_unlocked' */
++int
++scm_slow_peek_byte_or_eof_unlocked (SCM port)
++{
++  scm_t_port *pt = SCM_PTAB_ENTRY (port);
+ 
- -static int scm_port_encoding_init = 0;
++  if (pt->rw_active == SCM_PORT_WRITE)
++    scm_flush_unlocked (port);
+ 
- -/* Use ENCODING as the default encoding for future ports.  */
- -void
- -scm_i_set_default_port_encoding (const char *encoding)
++  if (pt->rw_random)
++    pt->rw_active = SCM_PORT_READ;
++
++  if (pt->read_pos >= pt->read_end)
++    {
++      if (SCM_UNLIKELY (scm_i_fill_input_unlocked (port) == EOF))
++        {
++          scm_i_set_pending_eof (port);
++          return EOF;
++        }
++    }
++
++  return *pt->read_pos;
++}
++
+ +/* Move up to READ_LEN bytes from PORT's putback and/or read buffers
+ +   into memory starting at DEST.  Return the number of bytes moved.
+ +   PORT's line/column numbers are left unchanged.  */
+ +size_t
+ +scm_take_from_input_buffers (SCM port, char *dest, size_t read_len)
   {
- -  if (!scm_port_encoding_init
- -      || !scm_is_fluid (SCM_VARIABLE_REF (default_port_encoding_var)))
- -    scm_misc_error (NULL, "tried to set port encoding fluid before it is initialized",
- -                  SCM_EOL);
+ +  scm_t_port *pt = SCM_PTAB_ENTRY (port);
+ +  size_t bytes_read = 0;
+ +  size_t from_buf = min (pt->read_end - pt->read_pos, read_len);
+ +
+ +  if (from_buf > 0)
+ +    {
+ +      memcpy (dest, pt->read_pos, from_buf);
+ +      pt->read_pos += from_buf;
+ +      bytes_read += from_buf;
+ +      read_len -= from_buf;
+ +      dest += from_buf;
+ +    }
+ +
+ +  /* if putback was active, try the real input buffer too.  */
+ +  if (pt->read_buf == pt->putback_buf)
+ +    {
+ +      from_buf = min (pt->saved_read_end - pt->saved_read_pos, read_len);
+ +      if (from_buf > 0)
+ +      {
+ +        memcpy (dest, pt->saved_read_pos, from_buf);
+ +        pt->saved_read_pos += from_buf;
+ +        bytes_read += from_buf;
+ +      }
+ +    }
   
- -  if (encoding == NULL
- -      || c_strcasecmp (encoding, "ASCII") == 0
- -      || c_strcasecmp (encoding, "ANSI_X3.4-1968") == 0
- -      || c_strcasecmp (encoding, "ISO-8859-1") == 0)
- -    scm_fluid_set_x (SCM_VARIABLE_REF (default_port_encoding_var), SCM_BOOL_F);
- -  else
- -    scm_fluid_set_x (SCM_VARIABLE_REF (default_port_encoding_var),
- -                   scm_from_locale_string (encoding));
+ +  return bytes_read;
   }
   
- -/* Return the name of the default encoding for newly created ports; a
- -   return value of NULL means "ISO-8859-1".  */
- -const char *
- -scm_i_default_port_encoding (void)
+ +/* Clear a port's read buffers, returning the contents.  */
+ +SCM_DEFINE (scm_drain_input, "drain-input", 1, 0, 0, 
+ +            (SCM port),
+ +          "This procedure clears a port's input buffers, similar\n"
+ +          "to the way that force-output clears the output buffer.  The\n"
+ +          "contents of the buffers are returned as a single string, e.g.,\n"
+ +          "\n"
+ +          "@lisp\n"
+ +          "(define p (open-input-file ...))\n"
+ +          "(drain-input p) => empty string, nothing buffered yet.\n"
+ +          "(unread-char (read-char p) p)\n"
+ +          "(drain-input p) => initial chars from p, up to the buffer size.\n"
+ +          "@end lisp\n\n"
+ +          "Draining the buffers may be useful for cleanly finishing\n"
+ +          "buffered I/O so that the file descriptor can be used directly\n"
+ +          "for further input.")
+ +#define FUNC_NAME s_scm_drain_input
   {
- -  if (!scm_port_encoding_init)
- -    return NULL;
- -  else if (!scm_is_fluid (SCM_VARIABLE_REF (default_port_encoding_var)))
- -    return NULL;
- -  else
- -    {
- -      SCM encoding;
+ +  SCM result;
+ +  char *data;
+ +  scm_t_port *pt;
+ +  long count;
   
- -      encoding = scm_fluid_ref (SCM_VARIABLE_REF (default_port_encoding_var));
- -      if (!scm_is_string (encoding))
- -      return NULL;
- -      else
- -      return scm_i_string_chars (encoding);
+ +  SCM_VALIDATE_OPINPORT (1, port);
+ +  pt = SCM_PTAB_ENTRY (port);
+ +
+ +  count = pt->read_end - pt->read_pos;
+ +  if (pt->read_buf == pt->putback_buf)
+ +    count += pt->saved_read_end - pt->saved_read_pos;
+ +
+ +  if (count)
+ +    {
+ +      result = scm_i_make_string (count, &data, 0);
+ +      scm_take_from_input_buffers (port, data, count);
       }
+ +  else
+ +    result = scm_nullstr;
+ +  
+ +  return result;
   }
+ +#undef FUNC_NAME
   
- -/* If the next LEN bytes from PORT are equal to those in BYTES, then
- -   return 1, else return 0.  Leave the port position unchanged.  */
- -static int
- -looking_at_bytes (SCM port, const unsigned char *bytes, int len)
+ +void
+ +scm_end_input_unlocked (SCM port)
   {
+ +  long offset;
     scm_t_port *pt = SCM_PTAB_ENTRY (port);
- -  int i = 0;
   
- -  while (i < len && scm_peek_byte_or_eof (port) == bytes[i])
++  scm_i_clear_pending_eof (port);
+ +  if (pt->read_buf == pt->putback_buf)
       {
- -      pt->read_pos++;
- -      i++;
+ +      offset = pt->read_end - pt->read_pos;
+ +      pt->read_buf = pt->saved_read_buf;
+ +      pt->read_pos = pt->saved_read_pos;
+ +      pt->read_end = pt->saved_read_end;
+ +      pt->read_buf_size = pt->saved_read_buf_size;
       }
- -  scm_i_unget_bytes (bytes, i, port);
- -  return (i == len);
+ +  else
+ +    offset = 0;
+ +
+ +  SCM_PORT_DESCRIPTOR (port)->end_input (port, offset);
   }
   
- -static const unsigned char scm_utf8_bom[3]    = {0xEF, 0xBB, 0xBF};
- -static const unsigned char scm_utf16be_bom[2] = {0xFE, 0xFF};
- -static const unsigned char scm_utf16le_bom[2] = {0xFF, 0xFE};
- -static const unsigned char scm_utf32be_bom[4] = {0x00, 0x00, 0xFE, 0xFF};
- -static const unsigned char scm_utf32le_bom[4] = {0xFF, 0xFE, 0x00, 0x00};
+ +void
+ +scm_end_input (SCM port)
+ +{
+ +  scm_i_pthread_mutex_t *lock;
+ +  scm_c_lock_port (port, &lock);
+ +  scm_end_input_unlocked (port);
+ +  if (lock)
+ +    scm_i_pthread_mutex_unlock (lock);
+ +  
+ +}
   
- -/* Decide what byte order to use for a UTF-16 port.  Return "UTF-16BE"
- -   or "UTF-16LE".  MODE must be either SCM_PORT_READ or SCM_PORT_WRITE,
- -   and specifies which operation is about to be done.  The MODE
- -   determines how we will decide the byte order.  We deliberately avoid
- -   reading from the port unless the user is about to do so.  If the user
- -   is about to read, then we look for a BOM, and if present, we use it
- -   to determine the byte order.  Otherwise we choose big endian, as
- -   recommended by the Unicode Standard.  Note that the BOM (if any) is
- -   not consumed here.  */
- -static const char *
- -decide_utf16_encoding (SCM port, scm_t_port_rw_active mode)
+ +SCM_DEFINE (scm_force_output, "force-output", 0, 1, 0,
+ +           (SCM port),
+ +          "Flush the specified output port, or the current output port if @var{port}\n"
+ +          "is omitted.  The current output buffer contents are passed to the\n"
+ +          "underlying port implementation (e.g., in the case of fports, the\n"
+ +          "data will be written to the file and the output buffer will be cleared.)\n"
+ +          "It has no effect on an unbuffered port.\n\n"
+ +          "The return value is unspecified.")
+ +#define FUNC_NAME s_scm_force_output
   {
- -  if (mode == SCM_PORT_READ
- -      && SCM_PORT_GET_INTERNAL (port)->at_stream_start_for_bom_read
- -      && looking_at_bytes (port, scm_utf16le_bom, sizeof scm_utf16le_bom))
- -    return "UTF-16LE";
+ +  if (SCM_UNBNDP (port))
+ +    port = scm_current_output_port ();
     else
- -    return "UTF-16BE";
+ +    {
+ +      port = SCM_COERCE_OUTPORT (port);
+ +      SCM_VALIDATE_OPOUTPORT (1, port);
+ +    }
+ +  scm_flush_unlocked (port);
+ +  return SCM_UNSPECIFIED;
   }
+ +#undef FUNC_NAME
   
- -/* Decide what byte order to use for a UTF-32 port.  Return "UTF-32BE"
- -   or "UTF-32LE".  See the comment above 'decide_utf16_encoding' for
- -   details.  */
- -static const char *
- -decide_utf32_encoding (SCM port, scm_t_port_rw_active mode)
+ +void
+ +scm_flush_unlocked (SCM port)
   {
- -  if (mode == SCM_PORT_READ
- -      && SCM_PORT_GET_INTERNAL (port)->at_stream_start_for_bom_read
- -      && looking_at_bytes (port, scm_utf32le_bom, sizeof scm_utf32le_bom))
- -    return "UTF-32LE";
- -  else
- -    return "UTF-32BE";
+ +  SCM_PORT_DESCRIPTOR (port)->flush (port);
   }
   
- -static void
- -finalize_iconv_descriptors (void *ptr, void *data)
+ +void
+ +scm_flush (SCM port)
   {
- -  close_iconv_descriptors (ptr);
+ +  scm_i_pthread_mutex_t *lock;
+ +  scm_c_lock_port (port, &lock);
+ +  scm_flush_unlocked (port);
+ +  if (lock)
+ +    scm_i_pthread_mutex_unlock (lock);
+ +  
   }
   
- -static scm_t_iconv_descriptors *
- -open_iconv_descriptors (const char *encoding, int reading, int writing)
++int
++scm_fill_input_unlocked (SCM port)
+ {
- -  scm_t_iconv_descriptors *id;
- -  iconv_t input_cd, output_cd;
++  return scm_i_fill_input_unlocked (port);
++}
+ 
- -  input_cd = (iconv_t) -1;
- -  output_cd = (iconv_t) -1;
- -  if (reading)
- -    {
- -      /* Open an input iconv conversion descriptor, from ENCODING
- -         to UTF-8.  We choose UTF-8, not UTF-32, because iconv
- -         implementations can typically convert from anything to
- -         UTF-8, but not to UTF-32 (see
- -         <http://lists.gnu.org/archive/html/bug-libunistring/2010-09/msg00007.html>).  */
   
- -      /* Assume opening an iconv descriptor causes about 16 KB of
- -         allocation.  */
- -      scm_gc_register_allocation (16 * 1024);
+ +\f
   
- -      input_cd = iconv_open ("UTF-8", encoding);
- -      if (input_cd == (iconv_t) -1)
- -        goto invalid_encoding;
- -    }
+ +/* Output.  */
   
- -  if (writing)
- -    {
- -      /* Assume opening an iconv descriptor causes about 16 KB of
- -         allocation.  */
- -      scm_gc_register_allocation (16 * 1024);
+ +void
+ +scm_putc (char c, SCM port)
+ +{
+ +  scm_i_pthread_mutex_t *lock;
+ +  scm_c_lock_port (port, &lock);
+ +  scm_putc_unlocked (c, port);
+ +  if (lock)
+ +    scm_i_pthread_mutex_unlock (lock);
+ +  
+ +}
   
- -      output_cd = iconv_open (encoding, "UTF-8");
- -      if (output_cd == (iconv_t) -1)
- -        {
- -          if (input_cd != (iconv_t) -1)
- -            iconv_close (input_cd);
- -          goto invalid_encoding;
- -        }
- -    }
+ +void
+ +scm_puts (const char *s, SCM port)
+ +{
+ +  scm_i_pthread_mutex_t *lock;
+ +  scm_c_lock_port (port, &lock);
+ +  scm_puts_unlocked (s, port);
+ +  if (lock)
+ +    scm_i_pthread_mutex_unlock (lock);
+ +  
+ +}
+ +  
+ +/* scm_c_write
+ + *
+ + * Used by an application to write arbitrary number of bytes to an SCM
+ + * port.  Similar semantics as libc write.  However, unlike libc
+ + * write, scm_c_write writes the requested number of bytes and has no
+ + * return value.
+ + *
+ + * Warning: Doesn't update port line and column counts!
+ + */
+ +void
+ +scm_c_write_unlocked (SCM port, const void *ptr, size_t size)
+ +#define FUNC_NAME "scm_c_write"
+ +{
+ +  scm_t_port *pt;
+ +  scm_t_ptob_descriptor *ptob;
   
- -  id = scm_gc_malloc_pointerless (sizeof (*id), "iconv descriptors");
- -  id->input_cd = input_cd;
- -  id->output_cd = output_cd;
+ +  SCM_VALIDATE_OPOUTPORT (1, port);
   
- -  /* Register a finalizer to close the descriptors.  */
- -  scm_i_set_finalizer (id, finalize_iconv_descriptors, NULL);
+ +  pt = SCM_PTAB_ENTRY (port);
+ +  ptob = SCM_PORT_DESCRIPTOR (port);
   
- -  return id;
+ +  if (pt->rw_active == SCM_PORT_READ)
+ +    scm_end_input_unlocked (port);
   
- - invalid_encoding:
- -  {
- -    SCM err;
- -    err = scm_from_locale_string (encoding);
- -    scm_misc_error ("open_iconv_descriptors",
- -                    "invalid or unknown character encoding ~s",
- -                    scm_list_1 (err));
- -  }
+ +  ptob->write (port, ptr, size);
+ +
+ +  if (pt->rw_random)
+ +    pt->rw_active = SCM_PORT_WRITE;
   }
+ +#undef FUNC_NAME
   
- -static void
- -close_iconv_descriptors (scm_t_iconv_descriptors *id)
+ +void
+ +scm_c_write (SCM port, const void *ptr, size_t size)
   {
- -  if (id->input_cd != (iconv_t) -1)
- -    iconv_close (id->input_cd);
- -  if (id->output_cd != (iconv_t) -1)
- -    iconv_close (id->output_cd);
- -  id->input_cd = (void *) -1;
- -  id->output_cd = (void *) -1;
+ +  scm_i_pthread_mutex_t *lock;
+ +  scm_c_lock_port (port, &lock);
+ +  scm_c_write_unlocked (port, ptr, size);
+ +  if (lock)
+ +    scm_i_pthread_mutex_unlock (lock);
+ +  
   }
   
- -/* Return the iconv_descriptors, initializing them if necessary.  MODE
- -   must be either SCM_PORT_READ or SCM_PORT_WRITE, and specifies which
- -   operation is about to be done.  We deliberately avoid reading from
- -   the port unless the user was about to do so.  */
- -scm_t_iconv_descriptors *
- -scm_i_port_iconv_descriptors (SCM port, scm_t_port_rw_active mode)
+ +/* scm_lfwrite
+ + *
+ + * This function differs from scm_c_write; it updates port line and
+ + * column. */
+ +void
+ +scm_lfwrite_unlocked (const char *ptr, size_t size, SCM port)
   {
- -  scm_t_port_internal *pti = SCM_PORT_GET_INTERNAL (port);
- -
- -  assert (pti->encoding_mode == SCM_PORT_ENCODING_MODE_ICONV);
- -
- -  if (!pti->iconv_descriptors)
- -    {
- -      scm_t_port *pt = SCM_PTAB_ENTRY (port);
- -      const char *precise_encoding;
- -
- -      if (!pt->encoding)
- -        pt->encoding = "ISO-8859-1";
+ +  scm_t_port *pt = SCM_PTAB_ENTRY (port);
+ +  scm_t_ptob_descriptor *ptob = SCM_PORT_DESCRIPTOR (port);
   
- -      /* If the specified encoding is UTF-16 or UTF-32, then make
- -         that more precise by deciding what byte order to use. */
- -      if (c_strcasecmp (pt->encoding, "UTF-16") == 0)
- -        precise_encoding = decide_utf16_encoding (port, mode);
- -      else if (c_strcasecmp (pt->encoding, "UTF-32") == 0)
- -        precise_encoding = decide_utf32_encoding (port, mode);
- -      else
- -        precise_encoding = pt->encoding;
+ +  if (pt->rw_active == SCM_PORT_READ)
+ +    scm_end_input_unlocked (port);
   
- -      pti->iconv_descriptors =
- -        open_iconv_descriptors (precise_encoding,
- -                                SCM_INPUT_PORT_P (port),
- -                                SCM_OUTPUT_PORT_P (port));
- -    }
+ +  ptob->write (port, ptr, size);
   
- -  return pti->iconv_descriptors;
+ +  for (; size; ptr++, size--)
+ +    update_port_lf ((scm_t_wchar) (unsigned char) *ptr, port);
+ +
+ +  if (pt->rw_random)
+ +    pt->rw_active = SCM_PORT_WRITE;
   }
   
   void
@@@ -2466,193 -2589,146 +2712,203 @@@ SCM_DEFINE (scm_char_ready_p, "char-rea
   }
   #undef FUNC_NAME
   
- -SCM_DEFINE (scm_set_port_encoding_x, "set-port-encoding!", 2, 0, 0,
- -          (SCM port, SCM enc),
- -          "Sets the character encoding that will be used to interpret all\n"
- -          "port I/O.  New ports are created with the encoding\n"
- -          "appropriate for the current locale if @code{setlocale} has \n"
- -          "been called or ISO-8859-1 otherwise\n"
- -          "and this procedure can be used to modify that encoding.\n")
- -#define FUNC_NAME s_scm_set_port_encoding_x
+ +SCM_DEFINE (scm_seek, "seek", 3, 0, 0,
+ +            (SCM fd_port, SCM offset, SCM whence),
+ +          "Sets the current position of @var{fd_port} to the integer\n"
+ +          "@var{offset}, which is interpreted according to the value of\n"
+ +          "@var{whence}.\n"
+ +          "\n"
+ +          "One of the following variables should be supplied for\n"
+ +          "@var{whence}:\n"
+ +          "@defvar SEEK_SET\n"
+ +          "Seek from the beginning of the file.\n"
+ +          "@end defvar\n"
+ +          "@defvar SEEK_CUR\n"
+ +          "Seek from the current position.\n"
+ +          "@end defvar\n"
+ +          "@defvar SEEK_END\n"
+ +          "Seek from the end of the file.\n"
+ +          "@end defvar\n"
+ +          "If @var{fd_port} is a file descriptor, the underlying system\n"
+ +          "call is @code{lseek}.  @var{port} may be a string port.\n"
+ +          "\n"
+ +          "The value returned is the new position in the file.  This means\n"
+ +          "that the current position of a port can be obtained using:\n"
+ +          "@lisp\n"
+ +          "(seek port 0 SEEK_CUR)\n"
+ +          "@end lisp")
+ +#define FUNC_NAME s_scm_seek
   {
- -  char *enc_str;
+ +  int how;
   
- -  SCM_VALIDATE_PORT (1, port);
- -  SCM_VALIDATE_STRING (2, enc);
+ +  fd_port = SCM_COERCE_OUTPORT (fd_port);
   
- -  enc_str = scm_to_locale_string (enc);
- -  scm_i_set_port_encoding_x (port, enc_str);
- -  free (enc_str);
+ +  how = scm_to_int (whence);
+ +  if (how != SEEK_SET && how != SEEK_CUR && how != SEEK_END)
+ +    SCM_OUT_OF_RANGE (3, whence);
   
- -  return SCM_UNSPECIFIED;
- -}
- -#undef FUNC_NAME
+ +  if (SCM_OPPORTP (fd_port))
+ +    {
++      scm_t_port_internal *pti = SCM_PORT_GET_INTERNAL (fd_port);
+ +      scm_t_ptob_descriptor *ptob = SCM_PORT_DESCRIPTOR (fd_port);
+ +      off_t_or_off64_t off = scm_to_off_t_or_off64_t (offset);
+ +      off_t_or_off64_t rv;
   
-       rv = ptob->seek (fd_port, off, how);
+ +      if (!ptob->seek)
+ +      SCM_MISC_ERROR ("port is not seekable", 
+ +                        scm_cons (fd_port, SCM_EOL));
+ +      else
- -/* A fluid specifying the default conversion handler for newly created
- -   ports.  Its value should be one of the symbols below.  */
- -SCM_VARIABLE (default_conversion_strategy_var,
- -            "%default-port-conversion-strategy");
++        rv = ptob->seek (fd_port, off, how);
+ 
- -/* Whether the above fluid is initialized.  */
- -static int scm_conversion_strategy_init = 0;
++      /* Set stream-start flags according to new position. */
++      pti->at_stream_start_for_bom_read  = (rv == 0);
++      pti->at_stream_start_for_bom_write = (rv == 0);
+ 
- -/* The possible conversion strategies.  */
- -SCM_SYMBOL (sym_error, "error");
- -SCM_SYMBOL (sym_substitute, "substitute");
- -SCM_SYMBOL (sym_escape, "escape");
++      scm_i_clear_pending_eof (fd_port);
+ 
+ +      return scm_from_off_t_or_off64_t (rv);
+ +    }
+ +  else /* file descriptor?.  */
+ +    {
+ +      off_t_or_off64_t off = scm_to_off_t_or_off64_t (offset);
+ +      off_t_or_off64_t rv;
+ +      rv = lseek_or_lseek64 (scm_to_int (fd_port), off, how);
+ +      if (rv == -1)
+ +      SCM_SYSERROR;
+ +      return scm_from_off_t_or_off64_t (rv);
+ +    }
+ +}
+ +#undef FUNC_NAME
   
- -/* Return the default failed encoding conversion policy for new created
- -   ports.  */
- -scm_t_string_failed_conversion_handler
- -scm_i_default_port_conversion_handler (void)
- -{
- -  scm_t_string_failed_conversion_handler handler;
+ +#ifndef O_BINARY
+ +#define O_BINARY 0
+ +#endif
   
- -  if (!scm_conversion_strategy_init
- -      || !scm_is_fluid (SCM_VARIABLE_REF (default_conversion_strategy_var)))
- -    handler = SCM_FAILED_CONVERSION_QUESTION_MARK;
- -  else
- -    {
- -      SCM fluid, value;
+ +/* Mingw has ftruncate(), perhaps implemented above using chsize, but
+ +   doesn't have the filename version truncate(), hence this code.  */
+ +#if HAVE_FTRUNCATE && ! HAVE_TRUNCATE
+ +static int
+ +truncate (const char *file, off_t length)
+ +{
+ +  int ret, fdes;
   
- -      fluid = SCM_VARIABLE_REF (default_conversion_strategy_var);
- -      value = scm_fluid_ref (fluid);
+ +  fdes = open (file, O_BINARY | O_WRONLY);
+ +  if (fdes == -1)
+ +    return -1;
   
- -      if (scm_is_eq (sym_substitute, value))
- -      handler = SCM_FAILED_CONVERSION_QUESTION_MARK;
- -      else if (scm_is_eq (sym_escape, value))
- -      handler = SCM_FAILED_CONVERSION_ESCAPE_SEQUENCE;
- -      else
- -      /* Default to 'error also when the fluid's value is not one of
- -         the valid symbols.  */
- -      handler = SCM_FAILED_CONVERSION_ERROR;
+ +  ret = ftruncate (fdes, length);
+ +  if (ret == -1)
+ +    {
+ +      int save_errno = errno;
+ +      close (fdes);
+ +      errno = save_errno;
+ +      return -1;
       }
   
- -  return handler;
+ +  return close (fdes);
   }
+ +#endif /* HAVE_FTRUNCATE && ! HAVE_TRUNCATE */
   
- -/* Use HANDLER as the default conversion strategy for future ports.  */
- -void
- -scm_i_set_default_port_conversion_handler (scm_t_string_failed_conversion_handler
- -                                         handler)
+ +SCM_DEFINE (scm_truncate_file, "truncate-file", 1, 1, 0,
+ +            (SCM object, SCM length),
+ +          "Truncate file @var{object} to @var{length} bytes.  @var{object}\n"
+ +          "can be a filename string, a port object, or an integer file\n"
+ +          "descriptor.\n"
+ +          "The return value is unspecified.\n"
+ +          "\n"
+ +          "For a port or file descriptor @var{length} can be omitted, in\n"
+ +          "which case the file is truncated at the current position (per\n"
+ +          "@code{ftell} above).\n"
+ +          "\n"
+ +          "On most systems a file can be extended by giving a length\n"
+ +          "greater than the current size, but this is not mandatory in the\n"
+ +          "POSIX standard.")
+ +#define FUNC_NAME s_scm_truncate_file
   {
- -  SCM strategy;
- -
- -  if (!scm_conversion_strategy_init
- -      || !scm_is_fluid (SCM_VARIABLE_REF (default_conversion_strategy_var)))
- -    scm_misc_error (NULL, "tried to set conversion strategy fluid before it is initialized",
- -                  SCM_EOL);
+ +  int rv;
   
- -  switch (handler)
- -    {
- -    case SCM_FAILED_CONVERSION_ERROR:
- -      strategy = sym_error;
- -      break;
+ +  /* "object" can be a port, fdes or filename.
   
- -    case SCM_FAILED_CONVERSION_ESCAPE_SEQUENCE:
- -      strategy = sym_escape;
- -      break;
+ +     Negative "length" makes no sense, but it's left to truncate() or
+ +     ftruncate() to give back an error for that (normally EINVAL).
+ +     */
   
- -    case SCM_FAILED_CONVERSION_QUESTION_MARK:
- -      strategy = sym_substitute;
- -      break;
+ +  if (SCM_UNBNDP (length))
+ +    {
+ +      /* must supply length if object is a filename.  */
+ +      if (scm_is_string (object))
+ +        SCM_MISC_ERROR("must supply length if OBJECT is a filename", SCM_EOL);
+ +      
+ +      length = scm_seek (object, SCM_INUM0, scm_from_int (SEEK_CUR));
+ +    }
   
- -    default:
- -      abort ();
+ +  object = SCM_COERCE_OUTPORT (object);
+ +  if (scm_is_integer (object))
+ +    {
+ +      off_t_or_off64_t c_length = scm_to_off_t_or_off64_t (length);
+ +      SCM_SYSCALL (rv = ftruncate_or_ftruncate64 (scm_to_int (object),
+ +                                                  c_length));
       }
-       
+ +  else if (SCM_OPOUTPORTP (object))
+ +    {
+ +      off_t_or_off64_t c_length = scm_to_off_t_or_off64_t (length);
+ +      scm_t_port *pt = SCM_PTAB_ENTRY (object);
+ +      scm_t_ptob_descriptor *ptob = SCM_PORT_DESCRIPTOR (object);
- -  scm_fluid_set_x (SCM_VARIABLE_REF (default_conversion_strategy_var),
- -                 strategy);
- -}
+ 
- -SCM_DEFINE (scm_port_conversion_strategy, "port-conversion-strategy",
- -          1, 0, 0, (SCM port),
- -          "Returns the behavior of the port when handling a character that\n"
- -          "is not representable in the port's current encoding.\n"
- -          "It returns the symbol @code{error} if unrepresentable characters\n"
- -          "should cause exceptions, @code{substitute} if the port should\n"
- -          "try to replace unrepresentable characters with question marks or\n"
- -          "approximate characters, or @code{escape} if unrepresentable\n"
- -          "characters should be converted to string escapes.\n"
- -          "\n"
- -          "If @var{port} is @code{#f}, then the current default behavior\n"
- -          "will be returned.  New ports will have this default behavior\n"
- -          "when they are created.\n")
- -#define FUNC_NAME s_scm_port_conversion_strategy
- -{
- -  scm_t_string_failed_conversion_handler h;
+ +      if (!ptob->truncate)
+ +      SCM_MISC_ERROR ("port is not truncatable", SCM_EOL);
+ 
-       
++      scm_i_clear_pending_eof (object);
+ +      if (pt->rw_active == SCM_PORT_READ)
+ +      scm_end_input_unlocked (object);
+ +      else if (pt->rw_active == SCM_PORT_WRITE)
+ +      ptob->flush (object);
- -  if (scm_is_false (port))
- -    h = scm_i_default_port_conversion_handler ();
+ 
+ +      ptob->truncate (object, c_length);
+ +      rv = 0;
+ +    }
     else
       {
- -      scm_t_port *pt;
- -
- -      SCM_VALIDATE_OPPORT (1, port);
- -      pt = SCM_PTAB_ENTRY (port);
- -
- -      h = pt->ilseq_handler;
+ +      off_t_or_off64_t c_length = scm_to_off_t_or_off64_t (length);
+ +      char *str = scm_to_locale_string (object);
+ +      int eno;
+ +      SCM_SYSCALL (rv = truncate_or_truncate64 (str, c_length));
+ +      eno = errno;
+ +      free (str);
+ +      errno = eno;
       }
+ +  if (rv == -1)
+ +    SCM_SYSERROR;
+ +  return SCM_UNSPECIFIED;
+ +}
+ +#undef FUNC_NAME
   
- -  if (h == SCM_FAILED_CONVERSION_ERROR)
- -    return scm_from_latin1_symbol ("error");
- -  else if (h == SCM_FAILED_CONVERSION_QUESTION_MARK)
- -    return scm_from_latin1_symbol ("substitute");
- -  else if (h == SCM_FAILED_CONVERSION_ESCAPE_SEQUENCE)
- -    return scm_from_latin1_symbol ("escape");
- -  else
- -    abort ();
+ +SCM_DEFINE (scm_port_line, "port-line", 1, 0, 0,
+ +            (SCM port),
+ +          "Return the current line number for @var{port}.\n"
+ +          "\n"
+ +          "The first line of a file is 0.  But you might want to add 1\n"
+ +          "when printing line numbers, since starting from 1 is\n"
+ +          "traditional in error messages, and likely to be more natural to\n"
+ +          "non-programmers.")
+ +#define FUNC_NAME s_scm_port_line
+ +{
+ +  port = SCM_COERCE_OUTPORT (port);
+ +  SCM_VALIDATE_OPENPORT (1, port);
+ +  return scm_from_long (SCM_LINUM (port));
+ +}
+ +#undef FUNC_NAME
   
- -  /* Never gets here. */
- -  return SCM_UNDEFINED;
+ +SCM_DEFINE (scm_set_port_line_x, "set-port-line!", 2, 0, 0,
+ +            (SCM port, SCM line),
+ +          "Set the current line number for @var{port} to @var{line}.  The\n"
+ +          "first line of a file is 0.")
+ +#define FUNC_NAME s_scm_set_port_line_x
+ +{
+ +  port = SCM_COERCE_OUTPORT (port);
+ +  SCM_VALIDATE_OPENPORT (1, port);
+ +  SCM_PTAB_ENTRY (port)->line_number = scm_to_long (line);
+ +  return SCM_UNSPECIFIED;
   }
   #undef FUNC_NAME
   
diff --cc libguile/ports.h

index c1ba719,39317f8..8064489
--- 1/libguile/ports.h
--- 2/libguile/ports.h
+++ b/libguile/ports.h
@@@ -3,8 -3,8 +3,8 @@@
   #ifndef SCM_PORTS_H
   #define SCM_PORTS_H
   
- -/* Copyright (C) 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2003, 2004, 2006,
- - *   2008, 2009, 2010, 2011, 2012, 2013 Free Software Foundation, Inc.
+ +/* Copyright (C) 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2003, 2004,
-  *   2006, 2008, 2009, 2010, 2011, 2012 Free Software Foundation, Inc.
++ *   2006, 2008, 2009, 2010, 2011, 2012, 2013 Free Software Foundation, Inc.
    *
    * This library is free software; you can redistribute it and/or
    * modify it under the terms of the GNU Lesser General Public License
@@@ -48,28 -43,14 +48,19 @@@ typedef enum scm_t_port_rw_active 
     SCM_PORT_WRITE = 2
   } scm_t_port_rw_active;
   
- typedef enum scm_t_port_encoding_mode {
-   SCM_PORT_ENCODING_MODE_UTF8,
-   SCM_PORT_ENCODING_MODE_LATIN1,
-   SCM_PORT_ENCODING_MODE_ICONV
- } scm_t_port_encoding_mode;
- 
- /* This is a separate object so that only those ports that use iconv
-    cause finalizers to be registered.  */
- typedef struct scm_t_iconv_descriptors
- {
-   /* input/output iconv conversion descriptors */
-   void *input_cd;
-   void *output_cd;
- } scm_t_iconv_descriptors;
++/* An internal-only structure defined in ports-internal.h. */
++struct scm_port_internal;
+ +
   /* C representation of a Scheme port.  */
   
   typedef struct 
   {
     SCM port;                   /* Link back to the port object.  */
- -  int revealed;                       /* 0 not revealed, > 1 revealed.
- -                               * Revealed ports do not get GC'd.
- -                               */
+ +  scm_i_pthread_mutex_t *lock;  /* A recursive lock for this port.  */
+ +
++  /* pointer to internal-only port structure */
++  struct scm_port_internal *internal;
++
     /* data for the underlying port implementation as a raw C value. */
     scm_t_bits stream;
   
@@@ -127,15 -112,22 +118,9 @@@
     unsigned char *putback_buf;
     size_t putback_buf_size;        /* allocated size of putback_buf.  */
   
- -  /* IMPORTANT: 'input_cd' and 'output_cd' used to be pointers to the
- -     input and output iconv descriptors, but those have been moved to
- -     the internal-only port structure defined in ports-internal.h.
- -
- -     Given that we must preserve ABI compatibility in 2.0, we cannot
- -     safely change this public structure without running afoul of C
- -     strict aliasing rules.  We cannot even change the member names.
- -
- -     To work around this, in this public structure, 'input_cd' has been
- -     repurposed to be a pointer to the internal port structure (see
- -     ports-internal.h), and 'output_cd' is now unused.
- -
- -     This will be cleaned up in 2.2.  */
- -
- -  void *input_cd;   /* XXX actually a pointer to scm_t_port_internal */
- -  void *output_cd;  /* XXX actually unused */
+ +  /* Character encoding support  */
+ +  char *encoding;
-   scm_t_port_encoding_mode encoding_mode;
+ +  scm_t_string_failed_conversion_handler ilseq_handler;
-   scm_t_iconv_descriptors *iconv_descriptors;
- 
-   /* an alist for storing additional information
-      (e.g. used to store per-port read options) */
-   SCM alist;
   } scm_t_port;
   
   
@@@ -267,74 -270,42 +252,77 @@@ SCM_API SCM scm_set_current_warning_por
   SCM_API void scm_dynwind_current_input_port (SCM port);
   SCM_API void scm_dynwind_current_output_port (SCM port);
   SCM_API void scm_dynwind_current_error_port (SCM port);
- -SCM_API SCM scm_new_port_table_entry (scm_t_bits tag);
- -SCM_API void scm_grow_port_cbuf (SCM port, size_t requested);
- -SCM_API SCM scm_pt_size (void);
- -SCM_API SCM scm_pt_member (SCM member);
- -SCM_API void scm_port_non_buffer (scm_t_port *pt);
- -SCM_API int scm_revealed_count (SCM port);
- -SCM_API SCM scm_port_revealed (SCM port);
- -SCM_API SCM scm_set_port_revealed_x (SCM port, SCM rcount);
+ +SCM_INTERNAL void scm_i_dynwind_current_load_port (SCM port);
+ +
+ +/* Mode bits.  */
+ +SCM_INTERNAL long scm_i_mode_bits (SCM modes);
   SCM_API long scm_mode_bits (char *modes);
   SCM_API SCM scm_port_mode (SCM port);
- -SCM_API SCM scm_close_input_port (SCM port);
- -SCM_API SCM scm_close_output_port (SCM port);
- -SCM_API SCM scm_close_port (SCM port);
- -SCM_API SCM scm_port_for_each (SCM proc);
- -SCM_API void scm_c_port_for_each (void (*proc)(void *data, SCM p), void *data);
+ +
+ +/* Low-level constructors.  */
+ +SCM_API SCM
+ +scm_c_make_port_with_encoding (scm_t_bits tag,
+ +                               unsigned long mode_bits,
+ +                               const char *encoding,
+ +                               scm_t_string_failed_conversion_handler handler,
+ +                               scm_t_bits stream);
+ +SCM_API SCM scm_c_make_port (scm_t_bits tag, unsigned long mode_bits,
+ +                             scm_t_bits stream);
+ +SCM_API SCM scm_new_port_table_entry (scm_t_bits tag);
+ +
+ +/* Predicates.  */
+ +SCM_API SCM scm_port_p (SCM x);
   SCM_API SCM scm_input_port_p (SCM x);
   SCM_API SCM scm_output_port_p (SCM x);
- -SCM_API SCM scm_port_p (SCM x);
   SCM_API SCM scm_port_closed_p (SCM port);
   SCM_API SCM scm_eof_object_p (SCM x);
- -SCM_API SCM scm_force_output (SCM port);
- -SCM_API SCM scm_flush_all_ports (void);
- -SCM_API SCM scm_read_char (SCM port);
- -SCM_API scm_t_wchar scm_getc (SCM port);
+ +
+ +/* Closing ports.  */
+ +SCM_API SCM scm_close_port (SCM port);
+ +SCM_API SCM scm_close_input_port (SCM port);
+ +SCM_API SCM scm_close_output_port (SCM port);
+ +
+ +/* Encoding characters to byte streams, and decoding byte streams to
+ +   characters.  */
+ +SCM_INTERNAL const char *scm_i_default_port_encoding (void);
+ +SCM_INTERNAL void scm_i_set_default_port_encoding (const char *);
+ +SCM_INTERNAL scm_t_string_failed_conversion_handler
+ +scm_i_default_port_conversion_handler (void);
+ +SCM_INTERNAL void
+ +scm_i_set_default_port_conversion_handler (scm_t_string_failed_conversion_handler);
- SCM_INTERNAL scm_t_iconv_descriptors *scm_i_port_iconv_descriptors (SCM port);
+ +SCM_INTERNAL void scm_i_set_port_encoding_x (SCM port, const char *str);
+ +SCM_API SCM scm_port_encoding (SCM port);
+ +SCM_API SCM scm_set_port_encoding_x (SCM port, SCM encoding);
+ +SCM_API SCM scm_port_conversion_strategy (SCM port);
+ +SCM_API SCM scm_set_port_conversion_strategy_x (SCM port, SCM behavior);
+ +
+ +/* Acquiring and releasing the port lock.  */
+ +SCM_API void scm_dynwind_lock_port (SCM port);
+ +SCM_INLINE int scm_c_lock_port (SCM port, scm_i_pthread_mutex_t **lock);
+ +SCM_INLINE int scm_c_try_lock_port (SCM port, scm_i_pthread_mutex_t **lock);
+ +
+ +/* Input.  */
+ +SCM_API int scm_get_byte_or_eof (SCM port);
+ +SCM_INLINE int scm_get_byte_or_eof_unlocked (SCM port);
++SCM_API int scm_slow_get_byte_or_eof_unlocked (SCM port);
+ +SCM_API int scm_peek_byte_or_eof (SCM port);
+ +SCM_INLINE int scm_peek_byte_or_eof_unlocked (SCM port);
++SCM_API int scm_slow_peek_byte_or_eof_unlocked (SCM port);
   SCM_API size_t scm_c_read (SCM port, void *buffer, size_t size);
- -SCM_API void scm_c_write (SCM port, const void *buffer, size_t size);
- -SCM_API void scm_lfwrite (const char *ptr, size_t size, SCM port);
- -SCM_INTERNAL void scm_lfwrite_substr (SCM str, size_t start, size_t end,
- -                                    SCM port);
- -SCM_API void scm_flush (SCM port);
- -SCM_API void scm_end_input (SCM port);
- -SCM_API int scm_fill_input (SCM port);
+ +SCM_API size_t scm_c_read_unlocked (SCM port, void *buffer, size_t size);
+ +SCM_API scm_t_wchar scm_getc (SCM port);
+ +SCM_API scm_t_wchar scm_getc_unlocked (SCM port);
+ +SCM_API SCM scm_read_char (SCM port);
+ +
+ +/* Pushback.  */
+ SCM_API void scm_unget_bytes (const unsigned char *buf, size_t len, SCM port);
++SCM_API void scm_unget_bytes_unlocked (const unsigned char *buf, size_t len, SCM port);
   SCM_API void scm_unget_byte (int c, SCM port);
+ +SCM_API void scm_unget_byte_unlocked (int c, SCM port);
   SCM_API void scm_ungetc (scm_t_wchar c, SCM port);
+ +SCM_API void scm_ungetc_unlocked (scm_t_wchar c, SCM port);
   SCM_API void scm_ungets (const char *s, int n, SCM port);
+ +SCM_API void scm_ungets_unlocked (const char *s, int n, SCM port);
   SCM_API SCM scm_peek_char (SCM port);
   SCM_API SCM scm_unread_char (SCM cobj, SCM port);
   SCM_API SCM scm_unread_string (SCM str, SCM port);
@@@ -373,116 -317,43 +361,96 @@@ SCM_API SCM scm_port_column (SCM port)
   SCM_API SCM scm_set_port_column_x (SCM port, SCM line);
   SCM_API SCM scm_port_filename (SCM port);
   SCM_API SCM scm_set_port_filename_x (SCM port, SCM filename);
- -SCM_INTERNAL const char *scm_i_default_port_encoding (void);
- -SCM_INTERNAL void scm_i_set_default_port_encoding (const char *);
- -SCM_INTERNAL void scm_i_set_port_encoding_x (SCM port, const char *str);
- -SCM_API SCM scm_port_encoding (SCM port);
- -SCM_API SCM scm_set_port_encoding_x (SCM port, SCM encoding);
- -SCM_INTERNAL scm_t_string_failed_conversion_handler
- -scm_i_default_port_conversion_handler (void);
- -/* Use HANDLER as the default conversion strategy for future ports.  */
- -SCM_INTERNAL void
- -scm_i_set_default_port_conversion_handler (scm_t_string_failed_conversion_handler);
- -SCM_API int scm_slow_get_byte_or_eof (SCM port);
- -SCM_API int scm_slow_peek_byte_or_eof (SCM port);
+ +
++/* Port alist.  */
+ SCM_INTERNAL SCM scm_i_port_alist (SCM port);
+ SCM_INTERNAL void scm_i_set_port_alist_x (SCM port, SCM alist);
- -SCM_API SCM scm_port_conversion_strategy (SCM port);
- -SCM_API SCM scm_set_port_conversion_strategy_x (SCM port, SCM behavior);
+ 
+ +/* Implementation helpers for port printing functions.  */
   SCM_API int scm_port_print (SCM exp, SCM port, scm_print_state *);
   SCM_API void scm_print_port_mode (SCM exp, SCM port);
+ +
+ +/* Iterating over all ports.  */
+ +SCM_API SCM scm_port_for_each (SCM proc);
+ +SCM_API void scm_c_port_for_each (void (*proc)(void *data, SCM p), void *data);
+ +SCM_API SCM scm_flush_all_ports (void);
+ +
+ +/* Void ports.  */
   SCM_API SCM scm_void_port (char * mode_str);
   SCM_API SCM scm_sys_make_void_port (SCM mode);
+ +
+ +/* Initialization.  */
   SCM_INTERNAL void scm_init_ports (void);
   
- -#if SCM_ENABLE_DEPRECATED==1
- -SCM_DEPRECATED scm_t_port * scm_add_to_port_table (SCM port);
- -#endif
   
- -#ifdef GUILE_DEBUG
- -SCM_API SCM scm_pt_size (void);
- -SCM_API SCM scm_pt_member (SCM member);
- -#endif /* GUILE_DEBUG */
+ +/* Inline function implementations.  */
   
- -/* internal */
+ +#if SCM_CAN_INLINE || defined SCM_INLINE_C_IMPLEMENTING_INLINES
+ +SCM_INLINE_IMPLEMENTATION int
+ +scm_c_lock_port (SCM port, scm_i_pthread_mutex_t **lock)
+ +{
+ +  *lock = SCM_PTAB_ENTRY (port)->lock;
   
- -SCM_INTERNAL long scm_i_mode_bits (SCM modes);
- -SCM_INTERNAL void scm_i_dynwind_current_load_port (SCM port);
+ +  if (*lock)
+ +    return scm_i_pthread_mutex_lock (*lock);
+ +  else
+ +    return 0;
+ +}
+ +
+ +SCM_INLINE_IMPLEMENTATION int
+ +scm_c_try_lock_port (SCM port, scm_i_pthread_mutex_t **lock)
+ +{
+ +  *lock = SCM_PTAB_ENTRY (port)->lock;
+ +  if (*lock)
+ +    {
+ +      int ret = scm_i_pthread_mutex_trylock (*lock);
+ +      if (ret != 0)
+ +        *lock = NULL;
+ +      return ret;
+ +    }
+ +  else
+ +    return 0;
+ +}
+ +
+ +SCM_INLINE_IMPLEMENTATION int
+ +scm_get_byte_or_eof_unlocked (SCM port)
+ +{
-   int c;
+ +  scm_t_port *pt = SCM_PTAB_ENTRY (port);
+ +
-   if (pt->rw_active == SCM_PORT_WRITE)
-     /* may be marginally faster than calling scm_flush.  */
-     SCM_PORT_DESCRIPTOR (port)->flush (port);
- 
-   if (pt->rw_random)
-     pt->rw_active = SCM_PORT_READ;
- 
-   if (pt->read_pos >= pt->read_end)
-     {
-       if (SCM_UNLIKELY (scm_fill_input_unlocked (port) == EOF))
-       return EOF;
-     }
- 
-   c = *(pt->read_pos++);
- 
-   return c;
++  if (SCM_LIKELY ((pt->rw_active == SCM_PORT_READ || !pt->rw_random)
++                  && pt->read_pos < pt->read_end))
++    return *pt->read_pos++;
++  else
++    return scm_slow_get_byte_or_eof_unlocked (port);
+ +}
+ +
+ +/* Like `scm_get_byte_or_eof' but does not change PORT's `read_pos'.  */
+ +SCM_INLINE_IMPLEMENTATION int
+ +scm_peek_byte_or_eof_unlocked (SCM port)
+ +{
-   int c;
+ +  scm_t_port *pt = SCM_PTAB_ENTRY (port);
+ +
-   if (pt->rw_active == SCM_PORT_WRITE)
-     /* may be marginally faster than calling scm_flush.  */
-     SCM_PORT_DESCRIPTOR (port)->flush (port);
- 
-   if (pt->rw_random)
-     pt->rw_active = SCM_PORT_READ;
- 
-   if (pt->read_pos >= pt->read_end)
-     {
-       if (SCM_UNLIKELY (scm_fill_input_unlocked (port) == EOF))
-       return EOF;
-     }
- 
-   c = *pt->read_pos;
- 
-   return c;
++  if (SCM_LIKELY ((pt->rw_active == SCM_PORT_READ || !pt->rw_random)
++                  && pt->read_pos < pt->read_end))
++    return *pt->read_pos;
++  else
++    return scm_slow_peek_byte_or_eof_unlocked (port);
+ +}
   
+ +SCM_INLINE_IMPLEMENTATION void
+ +scm_putc_unlocked (char c, SCM port)
+ +{
+ +  SCM_ASSERT_TYPE (SCM_OPOUTPORTP (port), port, 0, NULL, "output port");
+ +  scm_lfwrite_unlocked (&c, 1, port);
+ +}
+ +
+ +SCM_INLINE_IMPLEMENTATION void
+ +scm_puts_unlocked (const char *s, SCM port)
+ +{
+ +  SCM_ASSERT_TYPE (SCM_OPOUTPORTP (port), port, 0, NULL, "output port");
+ +  scm_lfwrite_unlocked (s, strlen (s), port);
+ +}
+ +#endif  /* SCM_CAN_INLINE || defined SCM_INLINE_C_IMPLEMENTING_INLINES */
   
   #endif  /* SCM_PORTS_H */
   
diff --cc libguile/print.c

index 5d5c56d,fa84990..6524091
--- 1/libguile/print.c
--- 2/libguile/print.c
+++ b/libguile/print.c
@@@ -947,8 -882,24 +948,24 @@@ display_string_using_iconv (const void 
   {
     size_t printed;
     scm_t_iconv_descriptors *id;
+   scm_t_port_internal *pti = SCM_PORT_GET_INTERNAL (port);
   
-   id = scm_i_port_iconv_descriptors (port);
+   id = scm_i_port_iconv_descriptors (port, SCM_PORT_WRITE);
+ 
+   if (SCM_UNLIKELY (pti->at_stream_start_for_bom_write && len > 0))
+     {
+       scm_t_port *pt = SCM_PTAB_ENTRY (port);
+ 
+       /* Record that we're no longer at stream start.  */
+       pti->at_stream_start_for_bom_write = 0;
+       if (pt->rw_random)
+         pti->at_stream_start_for_bom_read = 0;
+ 
+       /* Write a BOM if appropriate.  */
- -      if (SCM_UNLIKELY (c_strcasecmp(pt->encoding, "UTF-16") == 0
- -                        || c_strcasecmp(pt->encoding, "UTF-32") == 0))
++      if (SCM_UNLIKELY (strcmp(pt->encoding, "UTF-16") == 0
++                        || strcmp(pt->encoding, "UTF-32") == 0))
+         display_character (SCM_UNICODE_BOM, port, iconveh_error);
+     }
   
     printed = 0;
   
@@@ -1045,17 -996,17 +1062,17 @@@ static size_
   display_string (const void *str, int narrow_p,
                 size_t len, SCM port,
                 scm_t_string_failed_conversion_handler strategy)
- -
   {
-   scm_t_port *pt;
+   scm_t_port_internal *pti;
   
-   pt = SCM_PTAB_ENTRY (port);
+   pti = SCM_PORT_GET_INTERNAL (port);
   
-   if (pt->encoding_mode == SCM_PORT_ENCODING_MODE_UTF8)
+   if (pti->encoding_mode == SCM_PORT_ENCODING_MODE_UTF8)
       return display_string_as_utf8 (str, narrow_p, len, port);
-   else if (pt->encoding_mode == SCM_PORT_ENCODING_MODE_LATIN1)
++  else if (pti->encoding_mode == SCM_PORT_ENCODING_MODE_LATIN1)
+ +    return display_string_as_latin1 (str, narrow_p, len, port, strategy);
     else
- -    return display_string_using_iconv (str, narrow_p, len,
- -                                     port, strategy);
+ +    return display_string_using_iconv (str, narrow_p, len, port, strategy);
   }
   
   /* Attempt to display CH to PORT according to STRATEGY.  Return non-zero
diff --cc libguile/r6rs-ports.c

index 7d07051,fecc5bd..c6ad90a
--- 1/libguile/r6rs-ports.c
--- 2/libguile/r6rs-ports.c
+++ b/libguile/r6rs-ports.c
@@@ -534,72 -550,41 +534,41 @@@ SCM_DEFINE (scm_get_bytevector_n_x, "ge
   
   SCM_DEFINE (scm_get_bytevector_some, "get-bytevector-some", 1, 0, 0,
             (SCM port),
-           "Read from @var{port}, blocking as necessary, until data "
-           "are available or and end-of-file is reached.  Return either "
-           "a new bytevector containing the data read or the "
-           "end-of-file object.")
+             "Read from @var{port}, blocking as necessary, until bytes "
+             "are available or an end-of-file is reached.  Return either "
+             "the end-of-file object or a new bytevector containing some "
+             "of the available bytes (at least one), and update the port "
+             "position to point just past these bytes.")
   #define FUNC_NAME s_scm_get_bytevector_some
   {
-   /* Read at least one byte, unless the end-of-file is already reached, and
-      read while characters are available (buffered).  */
- 
-   SCM result;
-   char *c_bv;
-   unsigned c_len;
-   size_t c_total;
+   scm_t_port *pt;
+   size_t size;
+   SCM bv;
   
     SCM_VALIDATE_BINARY_INPUT_PORT (1, port);
+   pt = SCM_PTAB_ENTRY (port);
   
-   c_len = 4096;
-   c_bv = (char *) scm_gc_malloc_pointerless (c_len, SCM_GC_BYTEVECTOR);
-   c_total = 0;
- 
-   do
-     {
-       int c_chr;
- 
-       if (c_total + 1 > c_len)
-       {
-         /* Grow the bytevector.  */
-         c_bv = (char *) scm_gc_realloc (c_bv, c_len, c_len * 2,
-                                         SCM_GC_BYTEVECTOR);
-         c_len *= 2;
-       }
+   if (pt->rw_active == SCM_PORT_WRITE)
- -    scm_ptobs[SCM_PTOBNUM (port)].flush (port);
++    scm_flush_unlocked (port);
   
-       /* We can't use `scm_c_read ()' since it blocks.  */
-       c_chr = scm_get_byte_or_eof_unlocked (port);
-       if (c_chr != EOF)
-       {
-         c_bv[c_total] = (char) c_chr;
-         c_total++;
-       }
-       else
-         break;
-     }
-   /* XXX: We want to check for the availability of a byte, but that's
-      what `scm_char_ready_p' actually does.  */
-   while (scm_is_true (scm_char_ready_p (port)));
+   if (pt->rw_random)
+     pt->rw_active = SCM_PORT_READ;
   
-   if (c_total == 0)
+   if (pt->read_pos >= pt->read_end)
       {
-       result = SCM_EOF_VAL;
-       scm_gc_free (c_bv, c_len, SCM_GC_BYTEVECTOR);
- -      if (scm_fill_input (port) == EOF)
++      if (scm_fill_input_unlocked (port) == EOF)
+       return SCM_EOF_VAL;
       }
-   else
-     {
-       if (c_len > c_total)
-       {
-         /* Shrink the bytevector.  */
-         c_bv = (char *) scm_gc_realloc (c_bv, c_len, c_total,
-                                         SCM_GC_BYTEVECTOR);
-         c_len = (unsigned) c_total;
-       }
   
-       result = scm_c_take_gc_bytevector ((signed char *) c_bv, c_len,
-                                          SCM_BOOL_F);
-     }
+   size = pt->read_end - pt->read_pos;
+   if (pt->read_buf == pt->putback_buf)
+     size += pt->saved_read_end - pt->saved_read_pos;
   
-   return result;
+   bv = scm_c_make_bytevector (size);
+   scm_take_from_input_buffers
+     (port, (char *) SCM_BYTEVECTOR_CONTENTS (bv), size);
+ 
+   return bv;
   }
   #undef FUNC_NAME
   
diff --cc libguile/read.c

index 7c003b4,e2e2e4a..c8db812
--- 1/libguile/read.c
--- 2/libguile/read.c
+++ b/libguile/read.c
@@@ -42,6 -44,6 +43,7 @@@
   #include "libguile/hashtab.h"
   #include "libguile/hash.h"
   #include "libguile/ports.h"
++#include "libguile/ports-internal.h"
   #include "libguile/fports.h"
   #include "libguile/root.h"
   #include "libguile/strings.h"
@@@ -968,7 -976,7 +970,7 @@@ scm_read_character (scm_t_wchar chr, SC
     size_t charname_len, bytes_read;
     scm_t_wchar cp;
     int overflow;
--  scm_t_port *pt;
++  scm_t_port_internal *pti;
   
     overflow = read_token (port, opts, buffer, READER_CHAR_NAME_MAX_SIZE,
                            &bytes_read);
@@@ -986,14 -994,12 +988,14 @@@
         return (SCM_MAKE_CHAR (chr));
       }
   
--  pt = SCM_PTAB_ENTRY (port);
++  pti = SCM_PORT_GET_INTERNAL (port);
   
     /* Simple ASCII characters can be processed immediately.  Also, simple
        ISO-8859-1 characters can be processed immediately if the encoding for this
        port is ISO-8859-1.  */
- -  if (bytes_read == 1 && ((unsigned char) buffer[0] <= 127 || pt->encoding == NULL))
+ +  if (bytes_read == 1 &&
+ +      ((unsigned char) buffer[0] <= 127
-        || pt->encoding_mode == SCM_PORT_ENCODING_MODE_LATIN1))
++       || pti->encoding_mode == SCM_PORT_ENCODING_MODE_LATIN1))
       {
         SCM_COL (port) += 1;
         return SCM_MAKE_CHAR (buffer[0]);
@@@ -2112,7 -2122,7 +2104,7 @@@ SCM_DEFINE (scm_file_encoding, "file-en
       return SCM_BOOL_F;
     else
       {
--      s_enc = scm_from_locale_string (enc);
++      s_enc = scm_string_upcase (scm_from_locale_string (enc));
         return s_enc;
       }
   
diff --cc libguile/strings.c

index 23a1a70,1b241e5..fa97a00
--- 1/libguile/strings.c
--- 2/libguile/strings.c
+++ b/libguile/strings.c
@@@ -29,6 -29,6 +29,7 @@@
   #include <uninorm.h>
   #include <unistr.h>
   #include <uniconv.h>
++#include <c-strcase.h>
   
   #include "striconveh.h"
   
@@@ -36,6 -36,6 +37,8 @@@
   #include "libguile/chars.h"
   #include "libguile/root.h"
   #include "libguile/strings.h"
++#include "libguile/ports.h"
++#include "libguile/ports-internal.h"
   #include "libguile/error.h"
   #include "libguile/generalized-vectors.h"
   #include "libguile/deprecation.h"
@@@ -1534,11 -1517,14 +1537,11 @@@ scm_from_stringn (const char *str, size
     if (len == (size_t) -1)
       len = strlen (str);
   
-   if (strcmp (encoding, "ISO-8859-1") == 0 || len == 0)
- -  if (encoding == NULL || len == 0)
- -    {
- -      /* If encoding is null (or the string is empty), use Latin-1.  */
- -      char *buf;
- -      res = scm_i_make_string (len, &buf, 0);
- -      memcpy (buf, str, len);
- -      return res;
- -    }
++  if (c_strcasecmp (encoding, "ISO-8859-1") == 0 || len == 0)
+ +    return scm_from_latin1_stringn (str, len);
-   else if (strcmp (encoding, "UTF-8") == 0
++  else if (c_strcasecmp (encoding, "UTF-8") == 0
+ +           && handler == SCM_FAILED_CONVERSION_ERROR)
+ +    return scm_from_utf8_stringn (str, len);
   
     u32len = 0;
     u32 = (scm_t_wchar *) u32_conv_from_encoding (encoding,
@@@ -1722,26 -1646,6 +1725,27 @@@ scm_from_utf32_stringn (const scm_t_wch
     return result;
   }
   
-   if (pt->encoding_mode == SCM_PORT_ENCODING_MODE_LATIN1)
+ +SCM
+ +scm_from_port_string (const char *str, SCM port)
+ +{
+ +  return scm_from_port_stringn (str, -1, port);
+ +}
+ +
+ +SCM
+ +scm_from_port_stringn (const char *str, size_t len, SCM port)
+ +{
+ +  scm_t_port *pt = SCM_PTAB_ENTRY (port);
++  scm_t_port_internal *pti = SCM_PORT_GET_INTERNAL (port);
+ +
-   else if (pt->encoding_mode == SCM_PORT_ENCODING_MODE_UTF8
++  if (pti->encoding_mode == SCM_PORT_ENCODING_MODE_LATIN1)
+ +    return scm_from_latin1_stringn (str, len);
++  else if (pti->encoding_mode == SCM_PORT_ENCODING_MODE_UTF8
+ +           && pt->ilseq_handler == SCM_FAILED_CONVERSION_ERROR)
+ +    return scm_from_utf8_stringn (str, len);
+ +  else
+ +    return scm_from_stringn (str, len, pt->encoding, pt->ilseq_handler);
+ +}
+ +
   /* Create a new scheme string from the C string STR.  The memory of
      STR may be used directly as storage for the new string.  */
   /* FIXME: GC-wise, the only way to use the memory area pointed to by STR
@@@ -2127,26 -2031,6 +2131,27 @@@ scm_to_utf32_stringn (SCM str, size_t *
   }
   #undef FUNC_NAME
   
-   if (pt->encoding_mode == SCM_PORT_ENCODING_MODE_LATIN1
+ +char *
+ +scm_to_port_string (SCM str, SCM port)
+ +{
+ +  return scm_to_port_stringn (str, NULL, port);
+ +}
+ +
+ +char *
+ +scm_to_port_stringn (SCM str, size_t *lenp, SCM port)
+ +{
+ +  scm_t_port *pt = SCM_PTAB_ENTRY (port);
++  scm_t_port_internal *pti = SCM_PORT_GET_INTERNAL (port);
+ +
-   else if (pt->encoding_mode == SCM_PORT_ENCODING_MODE_UTF8)
++  if (pti->encoding_mode == SCM_PORT_ENCODING_MODE_LATIN1
+ +      && pt->ilseq_handler == SCM_FAILED_CONVERSION_ERROR)
+ +    return scm_to_latin1_stringn (str, lenp);
++  else if (pti->encoding_mode == SCM_PORT_ENCODING_MODE_UTF8)
+ +    return scm_to_utf8_stringn (str, lenp);
+ +  else
+ +    return scm_to_stringn (str, lenp, pt->encoding, pt->ilseq_handler);
+ +}
+ +
   /* Return a malloc(3)-allocated buffer containing the contents of STR encoded
      according to ENCODING.  If LENP is non-NULL, set it to the size in bytes of
      the returned buffer.  If the conversion to ENCODING fails, apply the strategy
@@@ -2180,7 -2064,7 +2185,8 @@@ scm_to_stringn (SCM str, size_t *lenp, 
                           "string contains #\\nul character: ~S",
                           scm_list_1 (str));
   
-   if (scm_i_is_narrow_string (str) && strcmp (encoding, "ISO-8859-1") == 0)
- -  if (scm_i_is_narrow_string (str) && (encoding == NULL))
++  if (scm_i_is_narrow_string (str)
++      && c_strcasecmp (encoding, "ISO-8859-1") == 0)
       {
         /* If using native Latin-1 encoding, just copy the string
            contents.  */
diff --cc libguile/vports.c

index a886e36,75e7df3..e726330
--- 1/libguile/vports.c
--- 2/libguile/vports.c
+++ b/libguile/vports.c
@@@ -28,6 -28,6 +28,8 @@@
   #include "libguile/_scm.h"
   #include "libguile/eval.h"
   #include "libguile/chars.h"
++#include "libguile/ports.h"
++#include "libguile/ports-internal.h"
   #include "libguile/fports.h"
   #include "libguile/root.h"
   #include "libguile/strings.h"
@@@ -86,15 -86,15 +88,15 @@@ sf_fill_input (SCM port
   {
     SCM p = SCM_PACK (SCM_STREAM (port));
     SCM ans;
--  scm_t_port *pt;
++  scm_t_port_internal *pti;
   
     ans = scm_call_0 (SCM_SIMPLE_VECTOR_REF (p, 3)); /* get char.  */
     if (scm_is_false (ans) || SCM_EOF_OBJECT_P (ans))
       return EOF;
     SCM_ASSERT (SCM_CHARP (ans), ans, SCM_ARG1, "sf_fill_input");
--  pt = SCM_PTAB_ENTRY (port);    
++  pti = SCM_PORT_GET_INTERNAL (port);
   
-   if (pt->encoding_mode == SCM_PORT_ENCODING_MODE_LATIN1)
- -  if (pt->encoding == NULL)
++  if (pti->encoding_mode == SCM_PORT_ENCODING_MODE_LATIN1)
       {
         scm_t_port *pt = SCM_PTAB_ENTRY (port);    
         
diff --cc m4/gnulib-cache.m4
Simple merge
diff --cc meta/Makefile.am
Simple merge
diff --cc module/ice-9/boot-9.scm

index 69a7fbc,0779d27..f1fd041
--- 1/module/ice-9/boot-9.scm
--- 2/module/ice-9/boot-9.scm
+++ b/module/ice-9/boot-9.scm
@@@ -188,105 -186,10 +188,107 @@@ If there is no handler at all, Guile pr
   
   \f
   
- -;;; {R4RS compliance}
+ +;;; {Language primitives}
   ;;;
   
- -(primitive-load-path "ice-9/r4rs")
+ +;; These are are the procedural wrappers around the primitives of
+ +;; Guile's language: @apply, @call-with-current-continuation, etc.
+ +;;
+ +;; Usually, a call to a primitive is compiled specially.  The compiler
+ +;; knows about all these kinds of expressions.  But the primitives may
+ +;; be referenced not only as operators, but as values as well.  These
+ +;; stub procedures are the "values" of apply, dynamic-wind, and other
+ +;; such primitives.
+ +;;
+ +(define (apply fun . args)
+ +  (@apply fun (apply:nconc2last args)))
+ +(define (call-with-current-continuation proc)
+ +  (@call-with-current-continuation proc))
+ +(define (call-with-values producer consumer)
+ +  (@call-with-values producer consumer))
+ +(define (dynamic-wind in thunk out)
+ +  "All three arguments must be 0-argument procedures.
+ +Guard @var{in} is called, then @var{thunk}, then
+ +guard @var{out}.
+ +
+ +If, any time during the execution of @var{thunk}, the
+ +continuation of the @code{dynamic_wind} expression is escaped
+ +non-locally, @var{out} is called.  If the continuation of
+ +the dynamic-wind is re-entered, @var{in} is called.  Thus
+ +@var{in} and @var{out} may be called any number of
+ +times.
+ +@lisp
+ + (define x 'normal-binding)
+ +@result{} x
+ + (define a-cont
+ +   (call-with-current-continuation
+ +     (lambda (escape)
+ +       (let ((old-x x))
+ +         (dynamic-wind
+ +           ;; in-guard:
+ +           ;;
+ +           (lambda () (set! x 'special-binding))
+ +
+ +           ;; thunk
+ +           ;;
+ +           (lambda () (display x) (newline)
+ +                   (call-with-current-continuation escape)
+ +                   (display x) (newline)
+ +                   x)
+ +
+ +           ;; out-guard:
+ +           ;;
+ +           (lambda () (set! x old-x)))))))
+ +
+ +;; Prints:
+ +special-binding
+ +;; Evaluates to:
+ +@result{} a-cont
+ +x
+ +@result{} normal-binding
+ + (a-cont #f)
+ +;; Prints:
+ +special-binding
+ +;; Evaluates to:
+ +@result{} a-cont  ;; the value of the (define a-cont...)
+ +x
+ +@result{} normal-binding
+ +a-cont
+ +@result{} special-binding
+ +@end lisp"
+ +  (@dynamic-wind in (thunk) out))
+ +
+ +\f
+ +
+ +;;; {Low-Level Port Code}
+ +;;;
+ +
+ +;; These are used to request the proper mode to open files in.
+ +;;
+ +(define OPEN_READ "r")
+ +(define OPEN_WRITE "w")
+ +(define OPEN_BOTH "r+")
+ +
+ +(define *null-device* "/dev/null")
+ +
++;; NOTE: Later in this file, this is redefined to support keywords
+ +(define (open-input-file str)
+ +  "Takes a string naming an existing file and returns an input port
+ +capable of delivering characters from the file.  If the file
+ +cannot be opened, an error is signalled."
+ +  (open-file str OPEN_READ))
+ +
++;; NOTE: Later in this file, this is redefined to support keywords
+ +(define (open-output-file str)
+ +  "Takes a string naming an output file to be created and returns an
+ +output port capable of writing characters to a new file by that
+ +name.  If the file cannot be opened, an error is signalled.  If a
+ +file with the given name already exists, the effect is unspecified."
+ +  (open-file str OPEN_WRITE))
+ +
+ +(define (open-io-file str) 
+ +  "Open file with name STR for both input and output."
+ +  (open-file str OPEN_BOTH))
   
   \f
   
@@@ -1228,355 -1247,80 +1230,385 @@@ VALUE.
           (loop (cdr fields) (+ 1 off)))))
       (display ">" p))
   
- -  (let ((rtd (make-struct record-type-vtable 0
- -                          (make-struct-layout
- -                           (apply string-append
- -                                  (map (lambda (f) "pw") fields)))
- -                          (or printer default-record-printer)
- -                          type-name
- -                          (copy-tree fields))))
- -    (struct-set! rtd (+ vtable-offset-user 2)
- -                 (make-constructor rtd (length fields)))
- -    ;; Temporary solution: Associate a name to the record type descriptor
- -    ;; so that the object system can create a wrapper class for it.
- -    (set-struct-vtable-name! rtd (if (symbol? type-name)
- -                                     type-name
- -                                     (string->symbol type-name)))
- -    rtd))
+ +  (let ((rtd (make-struct record-type-vtable 0
+ +                          (make-struct-layout
+ +                           (apply string-append
+ +                                  (map (lambda (f) "pw") fields)))
+ +                          (or printer default-record-printer)
+ +                          type-name
+ +                          (copy-tree fields))))
+ +    (struct-set! rtd (+ vtable-offset-user 2)
+ +                 (make-constructor rtd (length fields)))
+ +    ;; Temporary solution: Associate a name to the record type descriptor
+ +    ;; so that the object system can create a wrapper class for it.
+ +    (set-struct-vtable-name! rtd (if (symbol? type-name)
+ +                                     type-name
+ +                                     (string->symbol type-name)))
+ +    rtd))
+ +
+ +(define (record-type-name obj)
+ +  (if (record-type? obj)
+ +      (struct-ref obj vtable-offset-user)
+ +      (error 'not-a-record-type obj)))
+ +
+ +(define (record-type-fields obj)
+ +  (if (record-type? obj)
+ +      (struct-ref obj (+ 1 vtable-offset-user))
+ +      (error 'not-a-record-type obj)))
+ +
+ +(define* (record-constructor rtd #:optional field-names)
+ +  (if (not field-names)
+ +      (struct-ref rtd (+ 2 vtable-offset-user))
+ +      (primitive-eval
+ +       `(lambda ,field-names
+ +          (make-struct ',rtd 0 ,@(map (lambda (f)
+ +                                        (if (memq f field-names)
+ +                                            f
+ +                                            #f))
+ +                                      (record-type-fields rtd)))))))
+ +          
+ +(define (record-predicate rtd)
+ +  (lambda (obj) (and (struct? obj) (eq? rtd (struct-vtable obj)))))
+ +
+ +(define (%record-type-error rtd obj)  ;; private helper
+ +  (or (eq? rtd (record-type-descriptor obj))
+ +      (scm-error 'wrong-type-arg "%record-type-check"
+ +                 "Wrong type record (want `~S'): ~S"
+ +                 (list (record-type-name rtd) obj)
+ +                 #f)))
+ +
+ +(define (record-accessor rtd field-name)
+ +  (let ((pos (list-index (record-type-fields rtd) field-name)))
+ +    (if (not pos)
+ +        (error 'no-such-field field-name))
+ +    (lambda (obj)
+ +      (if (eq? (struct-vtable obj) rtd)
+ +          (struct-ref obj pos)
+ +          (%record-type-error rtd obj)))))
+ +
+ +(define (record-modifier rtd field-name)
+ +  (let ((pos (list-index (record-type-fields rtd) field-name)))
+ +    (if (not pos)
+ +        (error 'no-such-field field-name))
+ +    (lambda (obj val)
+ +      (if (eq? (struct-vtable obj) rtd)
+ +          (struct-set! obj pos val)
+ +          (%record-type-error rtd obj)))))
+ +
+ +(define (record? obj)
+ +  (and (struct? obj) (record-type? (struct-vtable obj))))
+ +
+ +(define (record-type-descriptor obj)
+ +  (if (struct? obj)
+ +      (struct-vtable obj)
+ +      (error 'not-a-record obj)))
+ +
+ +(provide 'record)
+ +
+ +
+ +\f
+ +;;; {Parameters}
+ +;;;
+ +
+ +(define <parameter>
+ +  ;; Three fields: the procedure itself, the fluid, and the converter.
+ +  (make-struct <applicable-struct-vtable> 0 'pwprpr))
+ +(set-struct-vtable-name! <parameter> '<parameter>)
+ +
+ +(define* (make-parameter init #:optional (conv (lambda (x) x)))
+ +  "Make a new parameter.
+ +
+ +A parameter is a dynamically bound value, accessed through a procedure.
+ +To access the current value, apply the procedure with no arguments:
+ +
+ +  (define p (make-parameter 10))
+ +  (p) => 10
+ +
+ +To provide a new value for the parameter in a dynamic extent, use
+ +`parameterize':
+ +
+ +  (parameterize ((p 20))
+ +    (p)) => 20
+ +  (p) => 10
+ +
+ +The value outside of the dynamic extent of the body is unaffected.  To
+ +update the current value, apply it to one argument:
+ +
+ +  (p 20) => 10
+ +  (p) => 20
+ +
+ +As you can see, the call that updates a parameter returns its previous
+ +value.
+ +
+ +All values for the parameter are first run through the CONV procedure,
+ +including INIT, the initial value.  The default CONV procedure is the
+ +identity procedure.  CONV is commonly used to ensure some set of
+ +invariants on the values that a parameter may have."
+ +  (let ((fluid (make-fluid (conv init))))
+ +    (make-struct <parameter> 0
+ +                 (case-lambda
+ +                   (() (fluid-ref fluid))
+ +                   ((x) (let ((prev (fluid-ref fluid)))
+ +                          (fluid-set! fluid (conv x))
+ +                          prev)))
+ +                 fluid conv)))
+ +
+ +(define (parameter? x)
+ +  (and (struct? x) (eq? (struct-vtable x) <parameter>)))
+ +
+ +(define (parameter-fluid p)
+ +  (if (parameter? p)
+ +      (struct-ref p 1)
+ +      (scm-error 'wrong-type-arg "parameter-fluid"
+ +                 "Not a parameter: ~S" (list p) #f)))
+ +
+ +(define (parameter-converter p)
+ +  (if (parameter? p)
+ +      (struct-ref p 2)
+ +      (scm-error 'wrong-type-arg "parameter-fluid"
+ +                 "Not a parameter: ~S" (list p) #f)))
+ +
+ +(define-syntax parameterize
+ +  (lambda (x)
+ +    (syntax-case x ()
+ +      ((_ ((param value) ...) body body* ...)
+ +       (with-syntax (((p ...) (generate-temporaries #'(param ...))))
+ +         #'(let ((p param) ...)
+ +             (if (not (parameter? p))
+ +                        (scm-error 'wrong-type-arg "parameterize"
+ +                                   "Not a parameter: ~S" (list p) #f))
+ +             ...
+ +             (with-fluids (((struct-ref p 1) ((struct-ref p 2) value))
+ +                           ...)
+ +               body body* ...)))))))
+ +
+ +(define* (fluid->parameter fluid #:optional (conv (lambda (x) x)))
+ +  "Make a parameter that wraps a fluid.
+ +
+ +The value of the parameter will be the same as the value of the fluid.
+ +If the parameter is rebound in some dynamic extent, perhaps via
+ +`parameterize', the new value will be run through the optional CONV
+ +procedure, as with any parameter.  Note that unlike `make-parameter',
+ +CONV is not applied to the initial value."
+ +  (make-struct <parameter> 0
+ +               (case-lambda
+ +                 (() (fluid-ref fluid))
+ +                 ((x) (let ((prev (fluid-ref fluid)))
+ +                        (fluid-set! fluid (conv x))
+ +                        prev)))
+ +               fluid conv))
+ +
+ +\f
+ +
+ +;;; Once parameters have booted, define the default prompt tag as being
+ +;;; a parameter.
+ +;;;
+ +
+ +(set! default-prompt-tag (make-parameter (default-prompt-tag)))
+ +
+ +\f
+ +
+ +;;; Current ports as parameters.
+ +;;;
+ +
+ +(let ()
+ +  (define-syntax-rule (port-parameterize! binding fluid predicate msg)
+ +    (begin
+ +      (set! binding (fluid->parameter (module-ref (current-module) 'fluid)
+ +                                      (lambda (x)
+ +                                        (if (predicate x) x
+ +                                            (error msg x)))))
+ +      (hashq-remove! (%get-pre-modules-obarray) 'fluid)))
+ +  
+ +  (port-parameterize! current-input-port %current-input-port-fluid
+ +                      input-port? "expected an input port")
+ +  (port-parameterize! current-output-port %current-output-port-fluid
+ +                      output-port? "expected an output port")
+ +  (port-parameterize! current-error-port %current-error-port-fluid
+ +                      output-port? "expected an output port"))
+ +
+ +\f
+ +
+ +;;; {Warnings}
+ +;;;
+ +
+ +(define current-warning-port
+ +  (make-parameter (current-error-port)
+ +                  (lambda (x)
+ +                    (if (output-port? x)
+ +                        x
+ +                        (error "expected an output port" x)))))
+ +
+ +
+ +\f
+ +
+ +;;; {Languages}
+ +;;;
+ +
+ +;; The language can be a symbolic name or a <language> object from
+ +;; (system base language).
+ +;;
+ +(define current-language (make-parameter 'scheme))
+ +
+ +
+ +\f
+ +
+ +;;; {High-Level Port Routines}
+ +;;;
+ +
- (define (call-with-input-file str proc)
-   "PROC should be a procedure of one argument, and STR should be a
- string naming a file.  The file must already exist. These procedures
- call PROC with one argument: the port obtained by opening the named file
- for input or output.  If the file cannot be opened, an error is
++(define* (open-input-file
++          file #:key (binary #f) (encoding #f) (guess-encoding #f))
++  "Takes a string naming an existing file and returns an input port
++capable of delivering characters from the file.  If the file
++cannot be opened, an error is signalled."
++  (open-file file (if binary "rb" "r")
++             #:encoding encoding
++             #:guess-encoding guess-encoding))
++
++(define* (open-output-file file #:key (binary #f) (encoding #f))
++  "Takes a string naming an output file to be created and returns an
++output port capable of writing characters to a new file by that
++name.  If the file cannot be opened, an error is signalled.  If a
++file with the given name already exists, the effect is unspecified."
++  (open-file file (if binary "wb" "w")
++             #:encoding encoding))
++
++(define* (call-with-input-file
++          file proc #:key (binary #f) (encoding #f) (guess-encoding #f))
++  "PROC should be a procedure of one argument, and FILE should be a
++string naming a file.  The file must
++already exist. These procedures call PROC
++with one argument: the port obtained by opening the named file for
++input or output.  If the file cannot be opened, an error is
+ +signalled.  If the procedure returns, then the port is closed
- automatically and the values yielded by the procedure are returned.  If
- the procedure does not return, then the port will not be closed
- automatically unless it is possible to prove that the port will never
- again be used for a read or write operation."
-   (let ((p (open-input-file str)))
++automatically and the values yielded by the procedure are returned.
++If the procedure does not return, then the port will not be closed
++automatically unless it is possible to prove that the port will
++never again be used for a read or write operation."
++  (let ((p (open-input-file file
++                            #:binary binary
++                            #:encoding encoding
++                            #:guess-encoding guess-encoding)))
+ +    (call-with-values
+ +      (lambda () (proc p))
+ +      (lambda vals
+ +        (close-input-port p)
+ +        (apply values vals)))))
+ +
- (define (call-with-output-file str proc)
-   "PROC should be a procedure of one argument, and STR should be a
- string naming a file.  The behaviour is unspecified if the file 
++(define* (call-with-output-file file proc #:key (binary #f) (encoding #f))
++  "PROC should be a procedure of one argument, and FILE should be a
++string naming a file.  The behaviour is unspecified if the file
+ +already exists. These procedures call PROC
+ +with one argument: the port obtained by opening the named file for
+ +input or output.  If the file cannot be opened, an error is
+ +signalled.  If the procedure returns, then the port is closed
+ +automatically and the values yielded by the procedure are returned.
+ +If the procedure does not return, then the port will not be closed
+ +automatically unless it is possible to prove that the port will
+ +never again be used for a read or write operation."
-   (let ((p (open-output-file str)))
++  (let ((p (open-output-file file #:binary binary #:encoding encoding)))
+ +    (call-with-values
+ +      (lambda () (proc p))
+ +      (lambda vals
+ +        (close-output-port p)
+ +        (apply values vals)))))
+ +
+ +(define (with-input-from-port port thunk)
+ +  (parameterize ((current-input-port port))
+ +    (thunk)))
+ +
+ +(define (with-output-to-port port thunk)
+ +  (parameterize ((current-output-port port))
+ +    (thunk)))
   
- -(define (record-type-name obj)
- -  (if (record-type? obj)
- -      (struct-ref obj vtable-offset-user)
- -      (error 'not-a-record-type obj)))
+ +(define (with-error-to-port port thunk)
+ +  (parameterize ((current-error-port port))
+ +    (thunk)))
   
- (define (with-input-from-file file thunk)
- -(define (record-type-fields obj)
- -  (if (record-type? obj)
- -      (struct-ref obj (+ 1 vtable-offset-user))
- -      (error 'not-a-record-type obj)))
++(define* (with-input-from-file
++          file thunk #:key (binary #f) (encoding #f) (guess-encoding #f))
+ +  "THUNK must be a procedure of no arguments, and FILE must be a
+ +string naming a file.  The file must already exist. The file is opened for
+ +input, an input port connected to it is made
- the default value returned by `current-input-port', 
++the default value returned by `current-input-port',
+ +and the THUNK is called with no arguments.
+ +When the THUNK returns, the port is closed and the previous
+ +default is restored.  Returns the values yielded by THUNK.  If an
+ +escape procedure is used to escape from the continuation of these
+ +procedures, their behavior is implementation dependent."
+ +  (call-with-input-file file
-    (lambda (p) (with-input-from-port p thunk))))
++   (lambda (p) (with-input-from-port p thunk))
++   #:binary binary
++   #:encoding encoding
++   #:guess-encoding guess-encoding))
   
- (define (with-output-to-file file thunk)
- -(define* (record-constructor rtd #:optional field-names)
- -  (if (not field-names)
- -      (struct-ref rtd (+ 2 vtable-offset-user))
- -      (primitive-eval
- -       `(lambda ,field-names
- -          (make-struct ',rtd 0 ,@(map (lambda (f)
- -                                        (if (memq f field-names)
- -                                            f
- -                                            #f))
- -                                      (record-type-fields rtd)))))))
- -          
- -(define (record-predicate rtd)
- -  (lambda (obj) (and (struct? obj) (eq? rtd (struct-vtable obj)))))
++(define* (with-output-to-file file thunk #:key (binary #f) (encoding #f))
+ +  "THUNK must be a procedure of no arguments, and FILE must be a
- string naming a file.  The effect is unspecified if the file already exists. 
++string naming a file.  The effect is unspecified if the file already exists.
+ +The file is opened for output, an output port connected to it is made
- the default value returned by `current-output-port', 
++the default value returned by `current-output-port',
+ +and the THUNK is called with no arguments.
+ +When the THUNK returns, the port is closed and the previous
+ +default is restored.  Returns the values yielded by THUNK.  If an
+ +escape procedure is used to escape from the continuation of these
+ +procedures, their behavior is implementation dependent."
+ +  (call-with-output-file file
-    (lambda (p) (with-output-to-port p thunk))))
++   (lambda (p) (with-output-to-port p thunk))
++   #:binary binary
++   #:encoding encoding))
   
- (define (with-error-to-file file thunk)
- -(define (%record-type-error rtd obj)  ;; private helper
- -  (or (eq? rtd (record-type-descriptor obj))
- -      (scm-error 'wrong-type-arg "%record-type-check"
- -                 "Wrong type record (want `~S'): ~S"
- -                 (list (record-type-name rtd) obj)
- -                 #f)))
++(define* (with-error-to-file file thunk #:key (binary #f) (encoding #f))
+ +  "THUNK must be a procedure of no arguments, and FILE must be a
- string naming a file.  The effect is unspecified if the file already exists. 
++string naming a file.  The effect is unspecified if the file already exists.
+ +The file is opened for output, an output port connected to it is made
- the default value returned by `current-error-port', 
++the default value returned by `current-error-port',
+ +and the THUNK is called with no arguments.
+ +When the THUNK returns, the port is closed and the previous
+ +default is restored.  Returns the values yielded by THUNK.  If an
+ +escape procedure is used to escape from the continuation of these
+ +procedures, their behavior is implementation dependent."
+ +  (call-with-output-file file
-    (lambda (p) (with-error-to-port p thunk))))
++   (lambda (p) (with-error-to-port p thunk))
++   #:binary binary
++   #:encoding encoding))
   
- -(define (record-accessor rtd field-name)
- -  (let ((pos (list-index (record-type-fields rtd) field-name)))
- -    (if (not pos)
- -        (error 'no-such-field field-name))
- -    (lambda (obj)
- -      (if (eq? (struct-vtable obj) rtd)
- -          (struct-ref obj pos)
- -          (%record-type-error rtd obj)))))
+ +(define (call-with-input-string string proc)
+ +  "Calls the one-argument procedure @var{proc} with a newly created
+ +input port from which @var{string}'s contents may be read.  The value
+ +yielded by the @var{proc} is returned."
+ +  (proc (open-input-string string)))
   
- -(define (record-modifier rtd field-name)
- -  (let ((pos (list-index (record-type-fields rtd) field-name)))
- -    (if (not pos)
- -        (error 'no-such-field field-name))
- -    (lambda (obj val)
- -      (if (eq? (struct-vtable obj) rtd)
- -          (struct-set! obj pos val)
- -          (%record-type-error rtd obj)))))
+ +(define (with-input-from-string string thunk)
+ +  "THUNK must be a procedure of no arguments.
+ +The test of STRING  is opened for
+ +input, an input port connected to it is made, 
+ +and the THUNK is called with no arguments.
+ +When the THUNK returns, the port is closed.
+ +Returns the values yielded by THUNK.  If an
+ +escape procedure is used to escape from the continuation of these
+ +procedures, their behavior is implementation dependent."
+ +  (call-with-input-string string
+ +   (lambda (p) (with-input-from-port p thunk))))
   
- -(define (record? obj)
- -  (and (struct? obj) (record-type? (struct-vtable obj))))
+ +(define (call-with-output-string proc)
+ +  "Calls the one-argument procedure @var{proc} with a newly created output
+ +port.  When the function returns, the string composed of the characters
+ +written into the port is returned."
+ +  (let ((port (open-output-string)))
+ +    (proc port)
+ +    (get-output-string port)))
   
- -(define (record-type-descriptor obj)
- -  (if (struct? obj)
- -      (struct-vtable obj)
- -      (error 'not-a-record obj)))
+ +(define (with-output-to-string thunk)
+ +  "Calls THUNK and returns its output as a string."
+ +  (call-with-output-string
+ +   (lambda (p) (with-output-to-port p thunk))))
   
- -(provide 'record)
+ +(define (with-error-to-string thunk)
+ +  "Calls THUNK and returns its error output as a string."
+ +  (call-with-output-string
+ +   (lambda (p) (with-error-to-port p thunk))))
+ +
+ +(define the-eof-object (call-with-input-string "" (lambda (p) (read-char p))))
   
   \f
   
diff --cc module/ice-9/psyntax-pp.scm
Simple merge
diff --cc module/ice-9/psyntax.scm
Simple merge
diff --cc module/language/tree-il/peval.scm
Simple merge
diff --cc test-suite/standalone/Makefile.am
Simple merge
diff --cc test-suite/tests/00-socket.test

index 7b13c5f,5093575..30a0257
--- 1/test-suite/tests/00-socket.test
--- 2/test-suite/tests/00-socket.test
+++ b/test-suite/tests/00-socket.test
@@@ -1,7 -1,7 +1,7 @@@
- -;;;; socket.test --- test socket functions     -*- scheme -*-
+ +;;;; 00-socket.test --- test socket functions     -*- scheme -*-
   ;;;;
   ;;;; Copyright (C) 2004, 2005, 2006, 2007, 2008, 2009, 2010,
- ;;;;   2011, 2012 Free Software Foundation, Inc.
- -;;;;   2011, 2013 Free Software Foundation, Inc.
++;;;;   2011, 2012, 2013 Free Software Foundation, Inc.
   ;;;;
   ;;;; This library is free software; you can redistribute it and/or
   ;;;; modify it under the terms of the GNU Lesser General Public
diff --cc test-suite/tests/foreign.test
Simple merge
diff --cc test-suite/tests/ports.test

index 886ab24,9b1c6c0..65c87da
--- 1/test-suite/tests/ports.test
--- 2/test-suite/tests/ports.test
+++ b/test-suite/tests/ports.test
@@@ -1149,6 -1520,286 +1520,286 @@@
   
   \f
   
- -                                               #x00 #x00 #xFE #xFF
- -                                               #x00 #x00 #x00 #x62)))
+ (pass-if-equal "unget-bytevector"
+     #vu8(10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 200 201 202 203
+             1 2 3 4 251 253 254 255)
+   (let ((port (open-bytevector-input-port #vu8(1 2 3 4 251 253 254 255))))
+     (unget-bytevector port #vu8(200 201 202 203))
+     (unget-bytevector port #vu8(20 21 22 23 24))
+     (unget-bytevector port #vu8(10 11 12 13 14 15 16 17 18 19) 4)
+     (unget-bytevector port #vu8(10 11 12 13 14 15 16 17 18 19) 2 2)
+     (unget-bytevector port #vu8(10 11))
+     (get-bytevector-all port)))
+ 
+ \f
+ 
+ (with-test-prefix "unicode byte-order marks (BOMs)"
+ 
+   (define (bv-read-test* encoding bv proc)
+     (let ((port (open-bytevector-input-port bv)))
+       (set-port-encoding! port encoding)
+       (proc port)))
+ 
+   (define (bv-read-test encoding bv)
+     (bv-read-test* encoding bv read-string))
+ 
+   (define (bv-write-test* encoding proc)
+     (call-with-values
+         (lambda () (open-bytevector-output-port))
+       (lambda (port get-bytevector)
+         (set-port-encoding! port encoding)
+         (proc port)
+         (get-bytevector))))
+ 
+   (define (bv-write-test encoding str)
+     (bv-write-test* encoding
+                     (lambda (p)
+                       (display str p))))
+ 
+   (pass-if-equal "BOM not discarded from Latin-1 stream"
+       "\xEF\xBB\xBF\x61"
+     (bv-read-test "ISO-8859-1" #vu8(#xEF #xBB #xBF #x61)))
+ 
+   (pass-if-equal "BOM not discarded from Latin-2 stream"
+       "\u010F\u0165\u017C\x61"
+     (bv-read-test "ISO-8859-2" #vu8(#xEF #xBB #xBF #x61)))
+ 
+   (pass-if-equal "BOM not discarded from UTF-16BE stream"
+       "\uFEFF\x61"
+     (bv-read-test "UTF-16BE" #vu8(#xFE #xFF #x00 #x61)))
+ 
+   (pass-if-equal "BOM not discarded from UTF-16LE stream"
+       "\uFEFF\x61"
+     (bv-read-test "UTF-16LE" #vu8(#xFF #xFE #x61 #x00)))
+ 
+   (pass-if-equal "BOM not discarded from UTF-32BE stream"
+       "\uFEFF\x61"
+     (bv-read-test "UTF-32BE" #vu8(#x00 #x00 #xFE #xFF
+                                        #x00 #x00 #x00 #x61)))
+ 
+   (pass-if-equal "BOM not discarded from UTF-32LE stream"
+       "\uFEFF\x61"
+     (bv-read-test "UTF-32LE" #vu8(#xFF #xFE #x00 #x00
+                                        #x61 #x00 #x00 #x00)))
+ 
+   (pass-if-equal "BOM not written to UTF-8 stream"
+       #vu8(#x61)
+     (bv-write-test "UTF-8" "a"))
+ 
+   (pass-if-equal "BOM not written to UTF-16BE stream"
+       #vu8(#x00 #x61)
+     (bv-write-test "UTF-16BE" "a"))
+ 
+   (pass-if-equal "BOM not written to UTF-16LE stream"
+       #vu8(#x61 #x00)
+     (bv-write-test "UTF-16LE" "a"))
+ 
+   (pass-if-equal "BOM not written to UTF-32BE stream"
+       #vu8(#x00 #x00 #x00 #x61)
+     (bv-write-test "UTF-32BE" "a"))
+ 
+   (pass-if-equal "BOM not written to UTF-32LE stream"
+       #vu8(#x61 #x00 #x00 #x00)
+     (bv-write-test "UTF-32LE" "a"))
+ 
+   (pass-if "Don't read from the port unless user asks to"
+     (let* ((p (make-soft-port
+                (vector
+                 (lambda (c) #f)           ; write char
+                 (lambda (s) #f)           ; write string
+                 (lambda () #f)            ; flush
+                 (lambda () (throw 'fail)) ; read char
+                 (lambda () #f))
+                "rw")))
+       (set-port-encoding! p "UTF-16")
+       (display "abc" p)
+       (set-port-encoding! p "UTF-32")
+       (display "def" p)
+       #t))
+ 
+   ;; TODO: test that input and output streams are independent when
+   ;; appropriate, and linked when appropriate.
+ 
+   (pass-if-equal "BOM discarded from start of UTF-8 stream"
+       "a"
+     (bv-read-test "Utf-8" #vu8(#xEF #xBB #xBF #x61)))
+ 
+   (pass-if-equal "BOM discarded from start of UTF-8 stream after seek to 0"
+       '(#\a "a")
+     (bv-read-test* "uTf-8" #vu8(#xEF #xBB #xBF #x61)
+                    (lambda (p)
+                      (let ((c (read-char p)))
+                        (seek p 0 SEEK_SET)
+                        (let ((s (read-string p)))
+                          (list c s))))))
+ 
+   (pass-if-equal "Only one BOM discarded from start of UTF-8 stream"
+       "\uFEFFa"
+     (bv-read-test "UTF-8" #vu8(#xEF #xBB #xBF #xEF #xBB #xBF #x61)))
+ 
+   (pass-if-equal "BOM not discarded from UTF-8 stream after seek to > 0"
+       "\uFEFFb"
+     (bv-read-test* "UTF-8" #vu8(#x61 #xEF #xBB #xBF #x62)
+                    (lambda (p)
+                      (seek p 1 SEEK_SET)
+                      (read-string p))))
+ 
+   (pass-if-equal "BOM not discarded unless at start of UTF-8 stream"
+       "a\uFEFFb"
+     (bv-read-test "UTF-8" #vu8(#x61 #xEF #xBB #xBF #x62)))
+ 
+   (pass-if-equal "BOM (BE) written to start of UTF-16 stream"
+       #vu8(#xFE #xFF #x00 #x61 #x00 #x62)
+     (bv-write-test "UTF-16" "ab"))
+ 
+   (pass-if-equal "BOM (BE) written to UTF-16 stream after set-port-encoding!"
+       #vu8(#xFE #xFF #x00 #x61 #x00 #x62 #xFE #xFF #x00 #x63 #x00 #x64)
+     (bv-write-test* "UTF-16"
+                     (lambda (p)
+                       (display "ab" p)
+                       (set-port-encoding! p "UTF-16")
+                       (display "cd" p))))
+ 
+   (pass-if-equal "BOM discarded from start of UTF-16 stream (BE)"
+       "a"
+     (bv-read-test "UTF-16" #vu8(#xFE #xFF #x00 #x61)))
+ 
+   (pass-if-equal "BOM discarded from start of UTF-16 stream (BE) after seek to 0"
+       '(#\a "a")
+     (bv-read-test* "utf-16" #vu8(#xFE #xFF #x00 #x61)
+                    (lambda (p)
+                      (let ((c (read-char p)))
+                        (seek p 0 SEEK_SET)
+                        (let ((s (read-string p)))
+                          (list c s))))))
+ 
+   (pass-if-equal "Only one BOM discarded from start of UTF-16 stream (BE)"
+       "\uFEFFa"
+     (bv-read-test "Utf-16" #vu8(#xFE #xFF #xFE #xFF #x00 #x61)))
+ 
+   (pass-if-equal "BOM not discarded from UTF-16 stream (BE) after seek to > 0"
+       "\uFEFFa"
+     (bv-read-test* "uTf-16" #vu8(#xFE #xFF #xFE #xFF #x00 #x61)
+                    (lambda (p)
+                      (seek p 2 SEEK_SET)
+                      (read-string p))))
+ 
+   (pass-if-equal "BOM not discarded unless at start of UTF-16 stream"
+       "a\uFEFFb"
+     (bv-read-test "utf-16" #vu8(#x00 #x61 #xFE #xFF #x00 #x62)))
+ 
+   (pass-if-equal "BOM discarded from start of UTF-16 stream (LE)"
+       "a"
+     (bv-read-test "UTF-16" #vu8(#xFF #xFE #x61 #x00)))
+ 
+   (pass-if-equal "BOM discarded from start of UTF-16 stream (LE) after seek to 0"
+       '(#\a "a")
+     (bv-read-test* "Utf-16" #vu8(#xFF #xFE #x61 #x00)
+                    (lambda (p)
+                      (let ((c (read-char p)))
+                        (seek p 0 SEEK_SET)
+                        (let ((s (read-string p)))
+                          (list c s))))))
+ 
+   (pass-if-equal "Only one BOM discarded from start of UTF-16 stream (LE)"
+       "\uFEFFa"
+     (bv-read-test "UTf-16" #vu8(#xFF #xFE #xFF #xFE #x61 #x00)))
+ 
+   (pass-if-equal "BOM discarded from start of UTF-32 stream (BE)"
+       "a"
+     (bv-read-test "UTF-32" #vu8(#x00 #x00 #xFE #xFF
+                                      #x00 #x00 #x00 #x61)))
+ 
+   (pass-if-equal "BOM discarded from start of UTF-32 stream (BE) after seek to 0"
+       '(#\a "a")
+     (bv-read-test* "utF-32" #vu8(#x00 #x00 #xFE #xFF
+                                       #x00 #x00 #x00 #x61)
+                    (lambda (p)
+                      (let ((c (read-char p)))
+                        (seek p 0 SEEK_SET)
+                        (let ((s (read-string p)))
+                          (list c s))))))
+ 
+   (pass-if-equal "Only one BOM discarded from start of UTF-32 stream (BE)"
+       "\uFEFFa"
+     (bv-read-test "UTF-32" #vu8(#x00 #x00 #xFE #xFF
+                                      #x00 #x00 #xFE #xFF
+                                      #x00 #x00 #x00 #x61)))
+ 
+   (pass-if-equal "BOM not discarded from UTF-32 stream (BE) after seek to > 0"
+       "\uFEFFa"
+     (bv-read-test* "UtF-32" #vu8(#x00 #x00 #xFE #xFF
+                                       #x00 #x00 #xFE #xFF
+                                       #x00 #x00 #x00 #x61)
+                    (lambda (p)
+                      (seek p 4 SEEK_SET)
+                      (read-string p))))
+ 
+   (pass-if-equal "BOM discarded within UTF-16 stream (BE) after set-port-encoding!"
+       "ab"
+     (bv-read-test* "UTF-16" #vu8(#x00 #x61 #xFE #xFF #x00 #x62)
+                    (lambda (p)
+                      (let ((a (read-char p)))
+                        (set-port-encoding! p "UTF-16")
+                        (string a (read-char p))))))
+ 
+   (pass-if-equal "BOM discarded within UTF-16 stream (LE,BE) after set-port-encoding!"
+       "ab"
+     (bv-read-test* "utf-16" #vu8(#x00 #x61 #xFF #xFE #x62 #x00)
+                    (lambda (p)
+                      (let ((a (read-char p)))
+                        (set-port-encoding! p "UTF-16")
+                        (string a (read-char p))))))
+ 
+   (pass-if-equal "BOM discarded within UTF-32 stream (BE) after set-port-encoding!"
+       "ab"
+     (bv-read-test* "UTF-32" #vu8(#x00 #x00 #x00 #x61
+                                       #x00 #x00 #xFE #xFF
+                                       #x00 #x00 #x00 #x62)
+                    (lambda (p)
+                      (let ((a (read-char p)))
+                        (set-port-encoding! p "UTF-32")
+                        (string a (read-char p))))))
+ 
+   (pass-if-equal "BOM discarded within UTF-32 stream (LE,BE) after set-port-encoding!"
+       "ab"
+     (bv-read-test* "UTF-32" #vu8(#x00 #x00 #x00 #x61
+                                       #xFF #xFE #x00 #x00
+                                       #x62 #x00 #x00 #x00)
+                    (lambda (p)
+                      (let ((a (read-char p)))
+                        (set-port-encoding! p "UTF-32")
+                        (string a (read-char p))))))
+ 
+   (pass-if-equal "BOM not discarded unless at start of UTF-32 stream"
+       "a\uFEFFb"
+     (bv-read-test "UTF-32" #vu8(#x00 #x00 #x00 #x61
++                                     #x00 #x00 #xFE #xFF
++                                     #x00 #x00 #x00 #x62)))
+ 
+   (pass-if-equal "BOM discarded from start of UTF-32 stream (LE)"
+       "a"
+     (bv-read-test "UTF-32" #vu8(#xFF #xFE #x00 #x00
+                                      #x61 #x00 #x00 #x00)))
+ 
+   (pass-if-equal "BOM discarded from start of UTF-32 stream (LE) after seek to 0"
+       '(#\a "a")
+     (bv-read-test* "UTf-32" #vu8(#xFF #xFE #x00 #x00
+                                       #x61 #x00 #x00 #x00)
+                    (lambda (p)
+                      (let ((c (read-char p)))
+                        (seek p 0 SEEK_SET)
+                        (let ((s (read-string p)))
+                          (list c s))))))
+ 
+   (pass-if-equal "Only one BOM discarded from start of UTF-32 stream (LE)"
+       "\uFEFFa"
+     (bv-read-test "UTF-32" #vu8(#xFF #xFE #x00 #x00
+                                      #xFF #xFE #x00 #x00
+                                      #x61 #x00 #x00 #x00))))
+ 
+ \f
+ 
   (define-syntax-rule (with-load-path path body ...)
     (let ((new path)
           (old %load-path))
diff --cc test-suite/tests/r6rs-ports.test
Simple merge
author	Mark H Weaver <mhw@netris.org>
	Sun, 14 Apr 2013 06:48:33 +0000 (02:48 -0400)
committer	Mark H Weaver <mhw@netris.org>
	Sun, 14 Apr 2013 06:48:33 +0000 (02:48 -0400)
		1	2
configure.ac	patch \|	diff1 \|	diff2 \|	blob \| history
doc/ref/api-control.texi	patch \|	diff1 \|	diff2 \|	blob \| history
doc/ref/api-data.texi	patch \|	diff1 \|	diff2 \|	blob \| history
doc/ref/guile-invoke.texi	patch \|	diff1 \|	diff2 \|	blob \| history
doc/ref/posix.texi	patch \|	diff1 \|	diff2 \|	blob \| history
libguile/Makefile.am	patch \|	diff1 \|	diff2 \|	blob \| history
libguile/array-map.c	patch \|	diff1 \|	diff2 \|	blob \| history
libguile/filesys.c	patch \|	diff1 \|	diff2 \|	blob \| history
libguile/fports.c	patch \|	diff1 \|	diff2 \|	blob \| history
libguile/fports.h	patch \|	diff1 \|	diff2 \|	blob \| history
libguile/gc.h	patch \|	diff1 \|	diff2 \|	blob \| history
libguile/gen-scmconfig.c	patch \|	diff1 \|	diff2 \|	blob \| history
libguile/init.c	patch \|	diff1 \|	diff2 \|	blob \| history
libguile/keywords.c	patch \|	diff1 \|	diff2 \|	blob \| history
libguile/ports-internal.h	patch \|	\|	diff2 \|	blob \| history
libguile/ports.c	patch \|	diff1 \|	diff2 \|	blob \| history
libguile/ports.h	patch \|	diff1 \|	diff2 \|	blob \| history
libguile/print.c	patch \|	diff1 \|	diff2 \|	blob \| history
libguile/r6rs-ports.c	patch \|	diff1 \|	diff2 \|	blob \| history
libguile/read.c	patch \|	diff1 \|	diff2 \|	blob \| history
libguile/strings.c	patch \|	diff1 \|	diff2 \|	blob \| history
libguile/vports.c	patch \|	diff1 \|	diff2 \|	blob \| history
m4/gnulib-cache.m4	patch \|	diff1 \|	diff2 \|	blob \| history
meta/Makefile.am	patch \|	diff1 \|	diff2 \|	blob \| history
module/ice-9/boot-9.scm	patch \|	diff1 \|	diff2 \|	blob \| history
module/ice-9/psyntax-pp.scm	patch \|	diff1 \|	diff2 \|	blob \| history
module/ice-9/psyntax.scm	patch \|	diff1 \|	diff2 \|	blob \| history
module/language/tree-il/peval.scm	patch \|	diff1 \|	diff2 \|	blob \| history
test-suite/standalone/Makefile.am	patch \|	diff1 \|	diff2 \|	blob \| history
test-suite/tests/00-socket.test	patch \|	diff1 \|	diff2 \|	blob \| history
test-suite/tests/foreign.test	patch \|	diff1 \|	diff2 \|	blob \| history
test-suite/tests/ports.test	patch \|	diff1 \|	diff2 \|	blob \| history
test-suite/tests/r6rs-ports.test	patch \|	diff1 \|	diff2 \|	blob \| history