-/* Copyright (C) 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2003, 2004,
- * 2006, 2007, 2008, 2009, 2010, 2011, 2012 Free Software Foundation, Inc.
+/* Copyright (C) 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2003, 2004, 2006,
+ * 2007, 2008, 2009, 2010, 2011, 2012, 2013,
+ * 2014 Free Software Foundation, Inc.
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public License
#include "libguile/mallocs.h"
#include "libguile/validate.h"
#include "libguile/ports.h"
+#include "libguile/ports-internal.h"
#include "libguile/vectors.h"
#include "libguile/weak-set.h"
#include "libguile/fluids.h"
#include "libguile/eq.h"
+#include "libguile/alist.h"
#ifdef HAVE_STRING_H
#include <string.h>
#include <io.h>
#endif
-#ifdef HAVE_UNISTD_H
#include <unistd.h>
-#endif
#ifdef HAVE_SYS_IOCTL_H
#include <sys/ioctl.h>
#endif
\f
+/* Port encodings are case-insensitive ASCII strings. */
+static char
+ascii_toupper (char c)
+{
+ return (c < 'a' || c > 'z') ? c : ('A' + (c - 'a'));
+}
+
+/* It is only necessary to use this function on encodings that come from
+ the user and have not been canonicalized yet. Encodings that are set
+ on ports or in the default encoding fluid are in upper-case, and can
+ be compared with strcmp. */
+static int
+encoding_matches (const char *enc, const char *upper)
+{
+ if (!enc)
+ enc = "ISO-8859-1";
+
+ while (*enc)
+ if (ascii_toupper (*enc++) != *upper++)
+ return 0;
+
+ return !*upper;
+}
+
+static char*
+canonicalize_encoding (const char *enc)
+{
+ char *ret;
+ int i;
+
+ if (!enc)
+ return "ISO-8859-1";
+
+ ret = scm_gc_strdup (enc, "port");
+
+ for (i = 0; ret[i]; i++)
+ {
+ if (ret[i] > 127)
+ /* Restrict to ASCII. */
+ scm_misc_error (NULL, "invalid character encoding ~s",
+ scm_list_1 (scm_from_latin1_string (enc)));
+ else
+ ret[i] = ascii_toupper (ret[i]);
+ }
+
+ return ret;
+}
+
+
+\f
/* The port kind table --- a dynamically resized array of port types. */
scm_c_port_type_ref (SCM_TC2PTOBNUM (tc))->input_waiting = input_waiting;
}
+void
+scm_set_port_setvbuf (scm_t_bits tc, void (*setvbuf) (SCM, long, long))
+{
+ scm_c_port_type_ref (SCM_TC2PTOBNUM (tc))->setvbuf = setvbuf;
+}
+
+static void
+scm_i_set_pending_eof (SCM port)
+{
+ SCM_PORT_GET_INTERNAL (port)->pending_eof = 1;
+}
+
+static void
+scm_i_clear_pending_eof (SCM port)
+{
+ SCM_PORT_GET_INTERNAL (port)->pending_eof = 0;
+}
+
+SCM_DEFINE (scm_i_port_property, "%port-property", 2, 0, 0,
+ (SCM port, SCM key),
+ "Return the property of @var{port} associated with @var{key}.")
+#define FUNC_NAME s_scm_i_port_property
+{
+ scm_i_pthread_mutex_t *lock;
+ SCM result;
+
+ SCM_VALIDATE_OPPORT (1, port);
+ scm_c_lock_port (port, &lock);
+ result = scm_assq_ref (SCM_PORT_GET_INTERNAL (port)->alist, key);
+ if (lock)
+ scm_i_pthread_mutex_unlock (lock);
+ return result;
+}
+#undef FUNC_NAME
+
+SCM_DEFINE (scm_i_set_port_property_x, "%set-port-property!", 3, 0, 0,
+ (SCM port, SCM key, SCM value),
+ "Set the property of @var{port} associated with @var{key} to @var{value}.")
+#define FUNC_NAME s_scm_i_set_port_property_x
+{
+ scm_i_pthread_mutex_t *lock;
+ scm_t_port_internal *pti;
+
+ SCM_VALIDATE_OPPORT (1, port);
+ scm_c_lock_port (port, &lock);
+ pti = SCM_PORT_GET_INTERNAL (port);
+ pti->alist = scm_assq_set_x (pti->alist, key, value);
+ if (lock)
+ scm_i_pthread_mutex_unlock (lock);
+ return SCM_UNSPECIFIED;
+}
+#undef FUNC_NAME
+
\f
/* Standard ports --- current input, output, error, and more(!). */
static SCM cur_inport_fluid = SCM_BOOL_F;
static SCM cur_outport_fluid = SCM_BOOL_F;
static SCM cur_errport_fluid = SCM_BOOL_F;
+static SCM cur_warnport_fluid = SCM_BOOL_F;
static SCM cur_loadport_fluid = SCM_BOOL_F;
SCM_DEFINE (scm_current_input_port, "current-input-port", 0, 0, 0,
- (),
+ (void),
"Return the current input port. This is the default port used\n"
"by many input procedures. Initially, @code{current-input-port}\n"
"returns the @dfn{standard input} in Unix and C terminology.")
#undef FUNC_NAME
SCM_DEFINE (scm_current_output_port, "current-output-port", 0, 0, 0,
- (),
+ (void),
"Return the current output port. This is the default port used\n"
"by many output procedures. Initially,\n"
"@code{current-output-port} returns the @dfn{standard output} in\n"
#undef FUNC_NAME
SCM_DEFINE (scm_current_error_port, "current-error-port", 0, 0, 0,
- (),
+ (void),
"Return the port to which errors and warnings should be sent (the\n"
"@dfn{standard error} in Unix and C terminology).")
#define FUNC_NAME s_scm_current_error_port
}
#undef FUNC_NAME
-SCM
-scm_current_warning_port (void)
+SCM_DEFINE (scm_current_warning_port, "current-warning-port", 0, 0, 0,
+ (void),
+ "Return the port to which diagnostic warnings should be sent.")
+#define FUNC_NAME s_scm_current_warning_port
{
- static SCM cwp_var = SCM_BOOL_F;
-
- if (scm_is_false (cwp_var))
- cwp_var = scm_c_private_lookup ("guile", "current-warning-port");
-
- return scm_call_0 (scm_variable_ref (cwp_var));
+ if (scm_is_true (cur_warnport_fluid))
+ return scm_fluid_ref (cur_warnport_fluid);
+ else
+ return SCM_BOOL_F;
}
+#undef FUNC_NAME
SCM_DEFINE (scm_current_load_port, "current-load-port", 0, 0, 0,
(),
SCM
scm_set_current_warning_port (SCM port)
+#define FUNC_NAME "set-current-warning-port"
{
- static SCM cwp_var = SCM_BOOL_F;
-
- if (scm_is_false (cwp_var))
- cwp_var = scm_c_private_lookup ("guile", "current-warning-port");
-
- return scm_call_1 (scm_variable_ref (cwp_var), port);
+ SCM owarnp = scm_fluid_ref (cur_warnport_fluid);
+ port = SCM_COERCE_OUTPORT (port);
+ SCM_VALIDATE_OPOUTPORT (1, port);
+ scm_fluid_set_x (cur_warnport_fluid, port);
+ return owarnp;
}
+#undef FUNC_NAME
void
}
#undef FUNC_NAME
-
\f
/* The port table --- a weak set of all ports.
{
SCM ret;
scm_t_port *entry;
+ scm_t_port_internal *pti;
scm_t_ptob_descriptor *ptob;
- entry = (scm_t_port *) scm_gc_calloc (sizeof (scm_t_port), "port");
+ entry = scm_gc_typed_calloc (scm_t_port);
+ pti = scm_gc_typed_calloc (scm_t_port_internal);
ptob = scm_c_port_type_ref (SCM_TC2PTOBNUM (tag));
ret = scm_words (tag | mode_bits, 3);
entry->lock = scm_gc_malloc_pointerless (sizeof (*entry->lock), "port lock");
scm_i_pthread_mutex_init (entry->lock, scm_i_pthread_mutexattr_recursive);
+ entry->internal = pti;
entry->file_name = SCM_BOOL_F;
entry->rw_active = SCM_PORT_NEITHER;
entry->port = ret;
entry->stream = stream;
- entry->encoding = encoding ? scm_gc_strdup (encoding, "port") : NULL;
- if (encoding && strcmp (encoding, "UTF-8") == 0)
- entry->encoding_mode = SCM_PORT_ENCODING_MODE_UTF8;
- else if (!encoding || strcmp (encoding, "ISO-8859-1") == 0)
- entry->encoding_mode = SCM_PORT_ENCODING_MODE_LATIN1;
+
+ if (encoding_matches (encoding, "UTF-8"))
+ {
+ pti->encoding_mode = SCM_PORT_ENCODING_MODE_UTF8;
+ entry->encoding = "UTF-8";
+ }
+ else if (encoding_matches (encoding, "ISO-8859-1"))
+ {
+ pti->encoding_mode = SCM_PORT_ENCODING_MODE_LATIN1;
+ entry->encoding = "ISO-8859-1";
+ }
else
- entry->encoding_mode = SCM_PORT_ENCODING_MODE_ICONV;
+ {
+ pti->encoding_mode = SCM_PORT_ENCODING_MODE_ICONV;
+ entry->encoding = canonicalize_encoding (encoding);
+ }
+
entry->ilseq_handler = handler;
- entry->iconv_descriptors = NULL;
+ pti->iconv_descriptors = NULL;
+
+ pti->at_stream_start_for_bom_read = 1;
+ pti->at_stream_start_for_bom_write = 1;
- entry->alist = SCM_EOL;
+ pti->pending_eof = 0;
+ pti->alist = SCM_EOL;
if (SCM_PORT_DESCRIPTOR (ret)->free)
scm_i_set_finalizer (SCM2PTR (ret), finalize_port, NULL);
"descriptors.")
#define FUNC_NAME s_scm_close_port
{
- scm_t_port *p;
+ scm_t_port_internal *pti;
int rv;
port = SCM_COERCE_OUTPORT (port);
if (SCM_CLOSEDP (port))
return SCM_BOOL_F;
- p = SCM_PTAB_ENTRY (port);
+ pti = SCM_PORT_GET_INTERNAL (port);
SCM_CLR_PORT_OPEN_FLAG (port);
if (SCM_PORT_DESCRIPTOR (port)->flags & SCM_PORT_TYPE_HAS_FLUSH)
else
rv = 0;
- if (p->iconv_descriptors)
+ if (pti->iconv_descriptors)
{
/* If we don't get here, the iconv_descriptors finalizer will
clean up. */
- close_iconv_descriptors (p->iconv_descriptors);
- p->iconv_descriptors = NULL;
+ close_iconv_descriptors (pti->iconv_descriptors);
+ pti->iconv_descriptors = NULL;
}
return scm_from_bool (rv >= 0);
scm_misc_error (NULL, "tried to set port encoding fluid before it is initialized",
SCM_EOL);
- if (encoding == NULL
- || !strcmp (encoding, "ASCII")
- || !strcmp (encoding, "ANSI_X3.4-1968")
- || !strcmp (encoding, "ISO-8859-1"))
+ if (encoding_matches (encoding, "ISO-8859-1"))
scm_fluid_set_x (SCM_VARIABLE_REF (default_port_encoding_var), SCM_BOOL_F);
else
- {
- SCM str;
- size_t i;
-
- str = scm_from_latin1_string (encoding);
-
- /* Restrict to ASCII. */
- for (i = 0; encoding[i]; i++)
- if (encoding[i] > 127)
- scm_misc_error ("scm_i_set_default_port_encoding",
- "invalid character encoding ~s", scm_list_1 (str));
-
- scm_fluid_set_x (SCM_VARIABLE_REF (default_port_encoding_var), str);
- }
+ scm_fluid_set_x (SCM_VARIABLE_REF (default_port_encoding_var),
+ scm_from_latin1_string (canonicalize_encoding (encoding)));
}
-/* Return the name of the default encoding for newly created ports; a
- return value of NULL means "ISO-8859-1". */
+/* Return the name of the default encoding for newly created ports. */
const char *
scm_i_default_port_encoding (void)
{
if (!scm_port_encoding_init)
- return NULL;
+ return "ISO-8859-1";
else if (!scm_is_fluid (SCM_VARIABLE_REF (default_port_encoding_var)))
- return NULL;
+ return "ISO-8859-1";
else
{
SCM encoding;
encoding = scm_fluid_ref (SCM_VARIABLE_REF (default_port_encoding_var));
if (!scm_is_string (encoding))
- return NULL;
+ return "ISO-8859-1";
else
return scm_i_string_chars (encoding);
}
strategy);
}
+static void
+scm_i_unget_bytes_unlocked (const unsigned char *buf, size_t len, SCM port);
+
+/* If the next LEN bytes from PORT are equal to those in BYTES, then
+ return 1, else return 0. Leave the port position unchanged. */
+static int
+looking_at_bytes (SCM port, const unsigned char *bytes, int len)
+{
+ scm_t_port *pt = SCM_PTAB_ENTRY (port);
+ int i = 0;
+
+ while (i < len && scm_peek_byte_or_eof_unlocked (port) == bytes[i])
+ {
+ pt->read_pos++;
+ i++;
+ }
+ scm_i_unget_bytes_unlocked (bytes, i, port);
+ return (i == len);
+}
+
+static const unsigned char scm_utf8_bom[3] = {0xEF, 0xBB, 0xBF};
+static const unsigned char scm_utf16be_bom[2] = {0xFE, 0xFF};
+static const unsigned char scm_utf16le_bom[2] = {0xFF, 0xFE};
+static const unsigned char scm_utf32be_bom[4] = {0x00, 0x00, 0xFE, 0xFF};
+static const unsigned char scm_utf32le_bom[4] = {0xFF, 0xFE, 0x00, 0x00};
+
+/* Decide what byte order to use for a UTF-16 port. Return "UTF-16BE"
+ or "UTF-16LE". MODE must be either SCM_PORT_READ or SCM_PORT_WRITE,
+ and specifies which operation is about to be done. The MODE
+ determines how we will decide the byte order. We deliberately avoid
+ reading from the port unless the user is about to do so. If the user
+ is about to read, then we look for a BOM, and if present, we use it
+ to determine the byte order. Otherwise we choose big endian, as
+ recommended by the Unicode Standard. Note that the BOM (if any) is
+ not consumed here. */
+static const char *
+decide_utf16_encoding (SCM port, scm_t_port_rw_active mode)
+{
+ if (mode == SCM_PORT_READ
+ && SCM_PORT_GET_INTERNAL (port)->at_stream_start_for_bom_read
+ && looking_at_bytes (port, scm_utf16le_bom, sizeof scm_utf16le_bom))
+ return "UTF-16LE";
+ else
+ return "UTF-16BE";
+}
+
+/* Decide what byte order to use for a UTF-32 port. Return "UTF-32BE"
+ or "UTF-32LE". See the comment above 'decide_utf16_encoding' for
+ details. */
+static const char *
+decide_utf32_encoding (SCM port, scm_t_port_rw_active mode)
+{
+ if (mode == SCM_PORT_READ
+ && SCM_PORT_GET_INTERNAL (port)->at_stream_start_for_bom_read
+ && looking_at_bytes (port, scm_utf32le_bom, sizeof scm_utf32le_bom))
+ return "UTF-32LE";
+ else
+ return "UTF-32BE";
+}
+
static void
finalize_iconv_descriptors (void *ptr, void *data)
{
}
scm_t_iconv_descriptors *
-scm_i_port_iconv_descriptors (SCM port)
+scm_i_port_iconv_descriptors (SCM port, scm_t_port_rw_active mode)
{
- scm_t_port *pt;
+ scm_t_port_internal *pti = SCM_PORT_GET_INTERNAL (port);
- pt = SCM_PTAB_ENTRY (port);
-
- assert (pt->encoding_mode == SCM_PORT_ENCODING_MODE_ICONV);
+ assert (pti->encoding_mode == SCM_PORT_ENCODING_MODE_ICONV);
- if (!pt->iconv_descriptors)
+ if (!pti->iconv_descriptors)
{
+ scm_t_port *pt = SCM_PTAB_ENTRY (port);
+ const char *precise_encoding;
+
if (!pt->encoding)
pt->encoding = "ISO-8859-1";
- pt->iconv_descriptors =
- open_iconv_descriptors (pt->encoding,
+
+ /* If the specified encoding is UTF-16 or UTF-32, then make
+ that more precise by deciding what byte order to use. */
+ if (strcmp (pt->encoding, "UTF-16") == 0)
+ precise_encoding = decide_utf16_encoding (port, mode);
+ else if (strcmp (pt->encoding, "UTF-32") == 0)
+ precise_encoding = decide_utf32_encoding (port, mode);
+ else
+ precise_encoding = pt->encoding;
+
+ pti->iconv_descriptors =
+ open_iconv_descriptors (precise_encoding,
SCM_INPUT_PORT_P (port),
SCM_OUTPUT_PORT_P (port));
}
- return pt->iconv_descriptors;
+ return pti->iconv_descriptors;
}
/* The name of the encoding is itself encoded in ASCII. */
scm_i_set_port_encoding_x (SCM port, const char *encoding)
{
scm_t_port *pt;
+ scm_t_port_internal *pti;
scm_t_iconv_descriptors *prev;
/* Set the character encoding for this port. */
pt = SCM_PTAB_ENTRY (port);
- prev = pt->iconv_descriptors;
-
- if (encoding && strcmp (encoding, "UTF-8") == 0)
+ pti = SCM_PORT_GET_INTERNAL (port);
+ prev = pti->iconv_descriptors;
+
+ /* In order to handle cases where the encoding changes mid-stream
+ (e.g. within an HTTP stream, or within a file that is composed of
+ segments with different encodings), we consider this to be "stream
+ start" for purposes of BOM handling, regardless of our actual file
+ position. */
+ pti->at_stream_start_for_bom_read = 1;
+ pti->at_stream_start_for_bom_write = 1;
+
+ if (encoding_matches (encoding, "UTF-8"))
{
pt->encoding = "UTF-8";
- pt->encoding_mode = SCM_PORT_ENCODING_MODE_UTF8;
- pt->iconv_descriptors = NULL;
+ pti->encoding_mode = SCM_PORT_ENCODING_MODE_UTF8;
}
- else if (!encoding || strcmp (encoding, "ISO-8859-1") == 0)
+ else if (encoding_matches (encoding, "ISO-8859-1"))
{
pt->encoding = "ISO-8859-1";
- pt->encoding_mode = SCM_PORT_ENCODING_MODE_LATIN1;
- pt->iconv_descriptors = NULL;
+ pti->encoding_mode = SCM_PORT_ENCODING_MODE_LATIN1;
}
else
{
- /* Open descriptors before mutating the port. */
- pt->iconv_descriptors =
- open_iconv_descriptors (encoding,
- SCM_INPUT_PORT_P (port),
- SCM_OUTPUT_PORT_P (port));
- pt->encoding = scm_gc_strdup (encoding, "port");
- pt->encoding_mode = SCM_PORT_ENCODING_MODE_ICONV;
+ pt->encoding = canonicalize_encoding (encoding);
+ pti->encoding_mode = SCM_PORT_ENCODING_MODE_ICONV;
}
+ pti->iconv_descriptors = NULL;
if (prev)
close_iconv_descriptors (prev);
}
"uses to interpret its input and output.\n")
#define FUNC_NAME s_scm_port_encoding
{
- scm_t_port *pt;
- const char *enc;
-
SCM_VALIDATE_PORT (1, port);
- pt = SCM_PTAB_ENTRY (port);
- enc = pt->encoding;
- if (enc)
- return scm_from_latin1_string (pt->encoding);
- else
- return SCM_BOOL_F;
+ return scm_from_latin1_string (SCM_PTAB_ENTRY (port)->encoding);
}
#undef FUNC_NAME
{
scm_t_string_failed_conversion_handler h;
- SCM_VALIDATE_OPPORT (1, port);
-
if (scm_is_false (port))
h = scm_i_default_port_conversion_handler ();
else
static void
lock_port (void *mutex)
{
- scm_i_pthread_mutex_lock (mutex);
+ scm_i_pthread_mutex_lock ((scm_i_pthread_mutex_t *) mutex);
}
static void
unlock_port (void *mutex)
{
- scm_i_pthread_mutex_unlock (mutex);
+ scm_i_pthread_mutex_unlock ((scm_i_pthread_mutex_t *) mutex);
}
void
psb->size = old_size;
}
+static int scm_i_fill_input_unlocked (SCM port);
+
size_t
scm_c_read_unlocked (SCM port, void *buffer, size_t size)
#define FUNC_NAME "scm_c_read"
{
scm_t_port *pt;
+ scm_t_port_internal *pti;
size_t n_read = 0, n_available;
struct port_and_swap_buffer psb;
SCM_VALIDATE_OPINPORT (1, port);
pt = SCM_PTAB_ENTRY (port);
+ pti = SCM_PORT_GET_INTERNAL (port);
if (pt->rw_active == SCM_PORT_WRITE)
SCM_PORT_DESCRIPTOR (port)->flush (port);
if (size == 0)
return n_read;
- /* Now we will call scm_fill_input repeatedly until we have read the
- requested number of bytes. (Note that a single scm_fill_input
- call does not guarantee to fill the whole of the port's read
- buffer.) */
- if (pt->read_buf_size <= 1 && pt->encoding == NULL)
+ /* Now we will call scm_i_fill_input_unlocked repeatedly until we have
+ read the requested number of bytes. (Note that a single
+ scm_i_fill_input_unlocked call does not guarantee to fill the whole
+ of the port's read buffer.) */
+ if (pt->read_buf_size <= 1
+ && pti->encoding_mode == SCM_PORT_ENCODING_MODE_LATIN1)
{
- /* The port that we are reading from is unbuffered - i.e. does
- not have its own persistent buffer - but we have a buffer,
- provided by our caller, that is the right size for the data
- that is wanted. For the following scm_fill_input calls,
- therefore, we use the buffer in hand as the port's read
- buffer.
-
- We need to make sure that the port's normal (1 byte) buffer
- is reinstated in case one of the scm_fill_input () calls
- throws an exception; we use the scm_dynwind_* API to achieve
- that.
+ /* The port that we are reading from is unbuffered - i.e. does not
+ have its own persistent buffer - but we have a buffer, provided
+ by our caller, that is the right size for the data that is
+ wanted. For the following scm_i_fill_input_unlocked calls,
+ therefore, we use the buffer in hand as the port's read buffer.
+
+ We need to make sure that the port's normal (1 byte) buffer is
+ reinstated in case one of the scm_i_fill_input_unlocked ()
+ calls throws an exception; we use the scm_dynwind_* API to
+ achieve that.
A consequence of this optimization is that the fill_input
functions can't unget characters. That'll push data to the
scm_dynwind_rewind_handler (swap_buffer, &psb, SCM_F_WIND_EXPLICITLY);
scm_dynwind_unwind_handler (swap_buffer, &psb, SCM_F_WIND_EXPLICITLY);
- /* Call scm_fill_input until we have all the bytes that we need,
- or we hit EOF. */
- while (pt->read_buf_size && (scm_fill_input_unlocked (port) != EOF))
+ /* Call scm_i_fill_input_unlocked until we have all the bytes that
+ we need, or we hit EOF. */
+ while (pt->read_buf_size && (scm_i_fill_input_unlocked (port) != EOF))
{
pt->read_buf_size -= (pt->read_end - pt->read_pos);
pt->read_pos = pt->read_buf = pt->read_end;
that a custom port implementation's entry points (in
particular, fill_input) can rely on the buffer always being
the same as they first set up. */
- while (size && (scm_fill_input_unlocked (port) != EOF))
+ while (size && (scm_i_fill_input_unlocked (port) != EOF))
{
n_available = min (size, pt->read_end - pt->read_pos);
memcpy (buffer, pt->read_pos, n_available);
char buf[SCM_MBCHAR_BUF_SIZE], size_t *len)
{
scm_t_iconv_descriptors *id;
- int err, byte_read;
- size_t bytes_consumed, output_size;
- char *output;
scm_t_uint8 utf8_buf[SCM_MBCHAR_BUF_SIZE];
+ size_t input_size = 0;
- id = scm_i_port_iconv_descriptors (port);
+ id = scm_i_port_iconv_descriptors (port, SCM_PORT_READ);
- for (output_size = 0, output = (char *) utf8_buf,
- bytes_consumed = 0, err = 0;
- err == 0 && output_size == 0
- && (bytes_consumed == 0 || byte_read != EOF);
- bytes_consumed++)
+ for (;;)
{
- char *input;
+ int byte_read;
+ char *input, *output;
size_t input_left, output_left, done;
byte_read = scm_get_byte_or_eof_unlocked (port);
- if (byte_read == EOF)
+ if (SCM_UNLIKELY (byte_read == EOF))
{
- if (bytes_consumed == 0)
- {
- *codepoint = (scm_t_wchar) EOF;
- *len = 0;
- return 0;
- }
- else
- continue;
+ if (SCM_LIKELY (input_size == 0))
+ {
+ *codepoint = (scm_t_wchar) EOF;
+ *len = input_size;
+ return 0;
+ }
+ else
+ {
+ /* EOF found in the middle of a multibyte character. */
+ scm_i_set_pending_eof (port);
+ return EILSEQ;
+ }
}
- buf[bytes_consumed] = byte_read;
+ buf[input_size++] = byte_read;
input = buf;
- input_left = bytes_consumed + 1;
+ input_left = input_size;
+ output = (char *) utf8_buf;
output_left = sizeof (utf8_buf);
done = iconv (id->input_cd, &input, &input_left, &output, &output_left);
+
if (done == (size_t) -1)
{
- err = errno;
- if (err == EINVAL)
- /* Missing input: keep trying. */
- err = 0;
+ int err = errno;
+ if (SCM_LIKELY (err == EINVAL))
+ /* The input byte sequence did not form a complete
+ character. Read another byte and try again. */
+ continue;
+ else
+ return err;
}
else
- output_size = sizeof (utf8_buf) - output_left;
- }
-
- if (SCM_UNLIKELY (output_size == 0))
- /* An unterminated sequence. */
- err = EILSEQ;
- else if (SCM_LIKELY (err == 0))
- {
- /* Convert the UTF8_BUF sequence to a Unicode code point. */
- *codepoint = utf8_to_codepoint (utf8_buf, output_size);
- *len = bytes_consumed;
+ {
+ size_t output_size = sizeof (utf8_buf) - output_left;
+ if (SCM_LIKELY (output_size > 0))
+ {
+ /* iconv generated output. Convert the UTF8_BUF sequence
+ to a Unicode code point. */
+ *codepoint = utf8_to_codepoint (utf8_buf, output_size);
+ *len = input_size;
+ return 0;
+ }
+ else
+ {
+ /* iconv consumed some bytes without producing any output.
+ Most likely this means that a Unicode byte-order mark
+ (BOM) was consumed, which should not be included in the
+ returned buf. Shift any remaining bytes to the beginning
+ of buf, and continue the loop. */
+ memmove (buf, input, input_left);
+ input_size = input_left;
+ continue;
+ }
+ }
}
-
- return err;
}
/* Read a codepoint from PORT and return it in *CODEPOINT. Fill BUF
{
int err;
scm_t_port *pt = SCM_PTAB_ENTRY (port);
+ scm_t_port_internal *pti = SCM_PORT_GET_INTERNAL (port);
- if (pt->encoding_mode == SCM_PORT_ENCODING_MODE_UTF8)
+ if (pti->encoding_mode == SCM_PORT_ENCODING_MODE_UTF8)
err = get_utf8_codepoint (port, codepoint, (scm_t_uint8 *) buf, len);
- else if (pt->encoding_mode == SCM_PORT_ENCODING_MODE_LATIN1)
+ else if (pti->encoding_mode == SCM_PORT_ENCODING_MODE_LATIN1)
err = get_latin1_codepoint (port, codepoint, buf, len);
else
err = get_iconv_codepoint (port, codepoint, buf, len);
if (SCM_LIKELY (err == 0))
- update_port_lf (*codepoint, port);
+ {
+ if (SCM_UNLIKELY (pti->at_stream_start_for_bom_read))
+ {
+ /* Record that we're no longer at stream start. */
+ pti->at_stream_start_for_bom_read = 0;
+ if (pt->rw_random)
+ pti->at_stream_start_for_bom_write = 0;
+
+ /* If we just read a BOM in an encoding that recognizes them,
+ then silently consume it and read another code point. */
+ if (SCM_UNLIKELY
+ (*codepoint == SCM_UNICODE_BOM
+ && (pti->encoding_mode == SCM_PORT_ENCODING_MODE_UTF8
+ || strcmp (pt->encoding, "UTF-16") == 0
+ || strcmp (pt->encoding, "UTF-32") == 0)))
+ return get_codepoint (port, codepoint, buf, len);
+ }
+ update_port_lf (*codepoint, port);
+ }
else if (pt->ilseq_handler == SCM_ICONVEH_QUESTION_MARK)
{
*codepoint = '?';
\f
/* Pushback. */
+\f
-void
-scm_unget_byte_unlocked (int c, SCM port)
-#define FUNC_NAME "scm_unget_byte"
+
+static void
+scm_i_unget_bytes_unlocked (const unsigned char *buf, size_t len, SCM port)
+#define FUNC_NAME "scm_unget_bytes"
{
scm_t_port *pt = SCM_PTAB_ENTRY (port);
+ size_t old_len, new_len;
- if (pt->read_buf == pt->putback_buf)
- /* already using the put-back buffer. */
- {
- /* enlarge putback_buf if necessary. */
- if (pt->read_end == pt->read_buf + pt->read_buf_size
- && pt->read_buf == pt->read_pos)
- {
- size_t new_size = pt->read_buf_size * 2;
- unsigned char *tmp = (unsigned char *)
- scm_gc_realloc (pt->putback_buf, pt->read_buf_size, new_size,
- "putback buffer");
-
- pt->read_pos = pt->read_buf = pt->putback_buf = tmp;
- pt->read_end = pt->read_buf + pt->read_buf_size;
- pt->read_buf_size = pt->putback_buf_size = new_size;
- }
-
- /* shift any existing bytes to buffer + 1. */
- if (pt->read_pos == pt->read_end)
- pt->read_end = pt->read_buf + 1;
- else if (pt->read_pos != pt->read_buf + 1)
- {
- int count = pt->read_end - pt->read_pos;
+ scm_i_clear_pending_eof (port);
- memmove (pt->read_buf + 1, pt->read_pos, count);
- pt->read_end = pt->read_buf + 1 + count;
- }
-
- pt->read_pos = pt->read_buf;
- }
- else
+ if (pt->read_buf != pt->putback_buf)
/* switch to the put-back buffer. */
{
if (pt->putback_buf == NULL)
{
+ pt->putback_buf_size = (len > SCM_INITIAL_PUTBACK_BUF_SIZE
+ ? len : SCM_INITIAL_PUTBACK_BUF_SIZE);
pt->putback_buf
= (unsigned char *) scm_gc_malloc_pointerless
- (SCM_INITIAL_PUTBACK_BUF_SIZE, "putback buffer");
- pt->putback_buf_size = SCM_INITIAL_PUTBACK_BUF_SIZE;
+ (pt->putback_buf_size, "putback buffer");
}
pt->saved_read_buf = pt->read_buf;
pt->saved_read_end = pt->read_end;
pt->saved_read_buf_size = pt->read_buf_size;
- pt->read_pos = pt->read_buf = pt->putback_buf;
- pt->read_end = pt->read_buf + 1;
+ /* Put read_pos at the end of the buffer, so that ungets will not
+ have to shift the buffer contents each time. */
+ pt->read_buf = pt->putback_buf;
+ pt->read_pos = pt->read_end = pt->putback_buf + pt->putback_buf_size;
pt->read_buf_size = pt->putback_buf_size;
}
- *pt->read_buf = c;
+ old_len = pt->read_end - pt->read_pos;
+ new_len = old_len + len;
+
+ if (new_len > pt->read_buf_size)
+ /* The putback buffer needs to be enlarged. */
+ {
+ size_t new_buf_size;
+ unsigned char *new_buf, *new_end, *new_pos;
+
+ new_buf_size = pt->read_buf_size * 2;
+ if (new_buf_size < new_len)
+ new_buf_size = new_len;
+
+ new_buf = (unsigned char *)
+ scm_gc_malloc_pointerless (new_buf_size, "putback buffer");
+
+ /* Put the bytes at the end of the buffer, so that future
+ ungets won't need to shift the buffer. */
+ new_end = new_buf + new_buf_size;
+ new_pos = new_end - old_len;
+ memcpy (new_pos, pt->read_pos, old_len);
+
+ pt->read_buf = pt->putback_buf = new_buf;
+ pt->read_pos = new_pos;
+ pt->read_end = new_end;
+ pt->read_buf_size = pt->putback_buf_size = new_buf_size;
+ }
+ else if (pt->read_buf + len < pt->read_pos)
+ /* If needed, shift the existing buffer contents up.
+ This should not happen unless some external code
+ manipulates the putback buffer pointers. */
+ {
+ unsigned char *new_end = pt->read_buf + pt->read_buf_size;
+ unsigned char *new_pos = new_end - old_len;
+
+ memmove (new_pos, pt->read_pos, old_len);
+ pt->read_pos = new_pos;
+ pt->read_end = new_end;
+ }
+
+ /* Move read_pos back and copy the bytes there. */
+ pt->read_pos -= len;
+ memcpy (pt->read_buf + (pt->read_pos - pt->read_buf), buf, len);
+
+ if (pt->rw_active == SCM_PORT_WRITE)
+ scm_flush (port);
if (pt->rw_random)
pt->rw_active = SCM_PORT_READ;
}
#undef FUNC_NAME
+void
+scm_unget_bytes_unlocked (const unsigned char *buf, size_t len, SCM port)
+{
+ scm_i_unget_bytes_unlocked (buf, len, port);
+}
+
+void
+scm_unget_byte_unlocked (int c, SCM port)
+{
+ unsigned char byte = c;
+ scm_i_unget_bytes_unlocked (&byte, 1, port);
+}
+
+void
+scm_unget_bytes (const unsigned char *buf, size_t len, SCM port)
+{
+ scm_i_pthread_mutex_t *lock;
+ scm_c_lock_port (port, &lock);
+ scm_i_unget_bytes_unlocked (buf, len, port);
+ if (lock)
+ scm_i_pthread_mutex_unlock (lock);
+}
+
void
scm_unget_byte (int c, SCM port)
{
+ unsigned char byte = c;
scm_i_pthread_mutex_t *lock;
scm_c_lock_port (port, &lock);
- scm_unget_byte_unlocked (c, port);
+ scm_i_unget_bytes_unlocked (&byte, 1, port);
if (lock)
scm_i_pthread_mutex_unlock (lock);
-
}
void
#define FUNC_NAME "scm_ungetc"
{
scm_t_port *pt = SCM_PTAB_ENTRY (port);
+ scm_t_port_internal *pti = SCM_PORT_GET_INTERNAL (port);
char *result;
char result_buf[10];
- const char *encoding;
size_t len;
- int i;
-
- if (pt->encoding != NULL)
- encoding = pt->encoding;
- else
- encoding = "ISO-8859-1";
len = sizeof (result_buf);
- result = u32_conv_to_encoding (encoding,
- (enum iconv_ilseq_handler) pt->ilseq_handler,
- (uint32_t *) &c, 1, NULL,
- result_buf, &len);
+
+ if (pti->encoding_mode == SCM_PORT_ENCODING_MODE_UTF8)
+ {
+ if (c < 0xf0)
+ {
+ result_buf[0] = (char) c;
+ result = result_buf;
+ len = 1;
+ }
+ else
+ result =
+ (char *) u32_to_u8 ((uint32_t *) &c, 1, (uint8_t *) result_buf, &len);
+ }
+ else if (pti->encoding_mode == SCM_PORT_ENCODING_MODE_LATIN1 && c <= 0xff)
+ {
+ result_buf[0] = (char) c;
+ result = result_buf;
+ len = 1;
+ }
+ else
+ result = u32_conv_to_encoding (pt->encoding,
+ (enum iconv_ilseq_handler) pt->ilseq_handler,
+ (uint32_t *) &c, 1, NULL,
+ result_buf, &len);
if (SCM_UNLIKELY (result == NULL || len == 0))
scm_encoding_error (FUNC_NAME, errno,
"conversion to port encoding failed",
SCM_BOOL_F, SCM_MAKE_CHAR (c));
- for (i = len - 1; i >= 0; i--)
- scm_unget_byte_unlocked (result[i], port);
+ scm_i_unget_bytes_unlocked ((unsigned char *) result, len, port);
if (SCM_UNLIKELY (result != result_buf))
free (result);
SCM result;
scm_t_wchar c;
char bytes[SCM_MBCHAR_BUF_SIZE];
- long column, line, i;
- size_t len;
+ long column, line;
+ size_t len = 0;
if (SCM_UNBNDP (port))
port = scm_current_input_port ();
err = get_codepoint (port, &c, bytes, &len);
- for (i = len - 1; i >= 0; i--)
- scm_unget_byte_unlocked (bytes[i], port);
+ scm_i_unget_bytes_unlocked ((unsigned char *) bytes, len, port);
SCM_COL (port) = column;
SCM_LINUM (port) = line;
result = SCM_BOOL_F;
}
else if (c == EOF)
- result = SCM_EOF_VAL;
+ {
+ scm_i_set_pending_eof (port);
+ result = SCM_EOF_VAL;
+ }
else
result = SCM_MAKE_CHAR (c);
/* this should only be called when the read buffer is empty. it
tries to refill the read buffer. it returns the first char from
the port, which is either EOF or *(pt->read_pos). */
-int
-scm_fill_input_unlocked (SCM port)
+static int
+scm_i_fill_input_unlocked (SCM port)
{
scm_t_port *pt = SCM_PTAB_ENTRY (port);
+ scm_t_port_internal *pti = SCM_PORT_GET_INTERNAL (port);
assert (pt->read_pos == pt->read_end);
+ if (pti->pending_eof)
+ {
+ pti->pending_eof = 0;
+ return EOF;
+ }
+
if (pt->read_buf == pt->putback_buf)
{
/* finished reading put-back chars. */
return ret;
}
+/* Slow-path fallback for 'scm_get_byte_or_eof_unlocked' */
+int
+scm_slow_get_byte_or_eof_unlocked (SCM port)
+{
+ scm_t_port *pt = SCM_PTAB_ENTRY (port);
+
+ if (pt->rw_active == SCM_PORT_WRITE)
+ scm_flush_unlocked (port);
+
+ if (pt->rw_random)
+ pt->rw_active = SCM_PORT_READ;
+
+ if (pt->read_pos >= pt->read_end)
+ {
+ if (SCM_UNLIKELY (scm_i_fill_input_unlocked (port) == EOF))
+ return EOF;
+ }
+
+ return *pt->read_pos++;
+}
+
+/* Slow-path fallback for 'scm_peek_byte_or_eof_unlocked' */
+int
+scm_slow_peek_byte_or_eof_unlocked (SCM port)
+{
+ scm_t_port *pt = SCM_PTAB_ENTRY (port);
+
+ if (pt->rw_active == SCM_PORT_WRITE)
+ scm_flush_unlocked (port);
+
+ if (pt->rw_random)
+ pt->rw_active = SCM_PORT_READ;
+
+ if (pt->read_pos >= pt->read_end)
+ {
+ if (SCM_UNLIKELY (scm_i_fill_input_unlocked (port) == EOF))
+ {
+ scm_i_set_pending_eof (port);
+ return EOF;
+ }
+ }
+
+ return *pt->read_pos;
+}
+
/* Move up to READ_LEN bytes from PORT's putback and/or read buffers
into memory starting at DEST. Return the number of bytes moved.
PORT's line/column numbers are left unchanged. */
long offset;
scm_t_port *pt = SCM_PTAB_ENTRY (port);
+ scm_i_clear_pending_eof (port);
if (pt->read_buf == pt->putback_buf)
{
offset = pt->read_end - pt->read_pos;
}
+int
+scm_fill_input_unlocked (SCM port)
+{
+ return scm_i_fill_input_unlocked (port);
+}
+
\f
if (SCM_OPPORTP (fd_port))
{
+ scm_t_port_internal *pti = SCM_PORT_GET_INTERNAL (fd_port);
scm_t_ptob_descriptor *ptob = SCM_PORT_DESCRIPTOR (fd_port);
off_t_or_off64_t off = scm_to_off_t_or_off64_t (offset);
off_t_or_off64_t rv;
SCM_MISC_ERROR ("port is not seekable",
scm_cons (fd_port, SCM_EOL));
else
- rv = ptob->seek (fd_port, off, how);
+ rv = ptob->seek (fd_port, off, how);
+
+ /* Set stream-start flags according to new position. */
+ pti->at_stream_start_for_bom_read = (rv == 0);
+ pti->at_stream_start_for_bom_write = (rv == 0);
+
+ scm_i_clear_pending_eof (fd_port);
+
return scm_from_off_t_or_off64_t (rv);
}
else /* file descriptor?. */
off_t_or_off64_t c_length = scm_to_off_t_or_off64_t (length);
scm_t_port *pt = SCM_PTAB_ENTRY (object);
scm_t_ptob_descriptor *ptob = SCM_PORT_DESCRIPTOR (object);
-
+
if (!ptob->truncate)
SCM_MISC_ERROR ("port is not truncatable", SCM_EOL);
+
+ scm_i_clear_pending_eof (object);
if (pt->rw_active == SCM_PORT_READ)
scm_end_input_unlocked (object);
else if (pt->rw_active == SCM_PORT_WRITE)
ptob->flush (object);
-
+
ptob->truncate (object, c_length);
rv = 0;
}
cur_inport_fluid = scm_make_fluid ();
cur_outport_fluid = scm_make_fluid ();
cur_errport_fluid = scm_make_fluid ();
+ cur_warnport_fluid = scm_make_fluid ();
cur_loadport_fluid = scm_make_fluid ();
scm_i_port_weak_set = scm_c_make_weak_set (31);
scm_c_define ("%current-input-port-fluid", cur_inport_fluid);
scm_c_define ("%current-output-port-fluid", cur_outport_fluid);
scm_c_define ("%current-error-port-fluid", cur_errport_fluid);
+ scm_c_define ("%current-warning-port-fluid", cur_warnport_fluid);
}
/*