-/* Copyright (C) 1995,1996,1997,1998,1999,2000,2001, 2003, 2004, 2006, 2007, 2008, 2009 Free Software Foundation, Inc.
- *
+/* Copyright (C) 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2003, 2004, 2006,
+ * 2007, 2008, 2009, 2010, 2011, 2012, 2013 Free Software Foundation, Inc.
+ *
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public License
* as published by the Free Software Foundation; either version 3 of
#include <errno.h>
#include <fcntl.h> /* for chsize on mingw */
#include <assert.h>
+#include <iconv.h>
#include <uniconv.h>
#include <unistr.h>
#include <striconveh.h>
+#include <c-strcase.h>
+
+#include <assert.h>
#include "libguile/_scm.h"
#include "libguile/async.h"
+#include "libguile/deprecation.h"
#include "libguile/eval.h"
#include "libguile/fports.h" /* direct access for seek and truncate */
-#include "libguile/objects.h"
#include "libguile/goops.h"
#include "libguile/smob.h"
#include "libguile/chars.h"
#include "libguile/mallocs.h"
#include "libguile/validate.h"
#include "libguile/ports.h"
+#include "libguile/ports-internal.h"
#include "libguile/vectors.h"
#include "libguile/weaks.h"
#include "libguile/fluids.h"
#include <string.h>
#endif
-#ifdef HAVE_MALLOC_H
-#include <malloc.h>
-#endif
-
#ifdef HAVE_IO_H
#include <io.h>
#endif
gnulib ftruncate.c has code using fcntl F_CHSIZE and F_FREESP, which
might be possibilities if we've got other systems without ftruncate. */
-#if HAVE_CHSIZE && ! HAVE_FTRUNCATE
+#if defined HAVE_CHSIZE && ! defined HAVE_FTRUNCATE
#define ftruncate(fd, size) chsize (fd, size)
#undef HAVE_FTRUNCATE
#define HAVE_FTRUNCATE 1
* Indexes into this table are used when generating type
* tags for smobjects (if you know a tag you can get an index and conversely).
*/
-scm_t_ptob_descriptor *scm_ptobs;
-long scm_numptob;
+scm_t_ptob_descriptor *scm_ptobs = NULL;
+long scm_numptob = 0;
/* GC marker for a port with stream of SCM type. */
SCM
{
}
-static size_t
-scm_port_free0 (SCM port)
-{
- return 0;
-}
-
scm_t_bits
scm_make_port_type (char *name,
int (*fill_input) (SCM port),
if (SCM_I_MAX_PORT_TYPE_COUNT - 1 <= scm_numptob)
goto ptoberr;
SCM_CRITICAL_SECTION_START;
- SCM_SYSCALL (tmp = (char *) realloc ((char *) scm_ptobs,
- (1 + scm_numptob)
- * sizeof (scm_t_ptob_descriptor)));
+ tmp = (char *) scm_gc_realloc ((char *) scm_ptobs,
+ scm_numptob * sizeof (scm_t_ptob_descriptor),
+ (1 + scm_numptob)
+ * sizeof (scm_t_ptob_descriptor),
+ "port-type");
if (tmp)
{
scm_ptobs = (scm_t_ptob_descriptor *) tmp;
scm_ptobs[scm_numptob].name = name;
scm_ptobs[scm_numptob].mark = 0;
- scm_ptobs[scm_numptob].free = scm_port_free0;
+ scm_ptobs[scm_numptob].free = NULL;
scm_ptobs[scm_numptob].print = scm_port_print;
scm_ptobs[scm_numptob].equalp = 0;
scm_ptobs[scm_numptob].close = 0;
scm_ptobs[SCM_TC2PTOBNUM (tc)].input_waiting = input_waiting;
}
+static void
+scm_i_set_pending_eof (SCM port)
+{
+ SCM_PORT_GET_INTERNAL (port)->pending_eof = 1;
+}
+
+static void
+scm_i_clear_pending_eof (SCM port)
+{
+ SCM_PORT_GET_INTERNAL (port)->pending_eof = 0;
+}
+
+SCM
+scm_i_port_alist (SCM port)
+{
+ return SCM_PORT_GET_INTERNAL (port)->alist;
+}
+
+void
+scm_i_set_port_alist_x (SCM port, SCM alist)
+{
+ SCM_PORT_GET_INTERNAL (port)->alist = alist;
+}
+
\f
SCM_DEFINE (scm_char_ready_p, "char-ready?", 0, 1, 0,
if (SCM_UNBNDP (port))
port = scm_current_input_port ();
- else
- SCM_VALIDATE_OPINPORT (1, port);
+ /* It's possible to close the current input port, so validate even in
+ this case. */
+ SCM_VALIDATE_OPINPORT (1, port);
pt = SCM_PTAB_ENTRY (port);
}
#undef FUNC_NAME
-/* move up to read_len chars from port's putback and/or read buffers
- into memory starting at dest. returns the number of chars moved. */
-size_t scm_take_from_input_buffers (SCM port, char *dest, size_t read_len)
+/* Move up to READ_LEN bytes from PORT's putback and/or read buffers
+ into memory starting at DEST. Return the number of bytes moved.
+ PORT's line/column numbers are left unchanged. */
+size_t
+scm_take_from_input_buffers (SCM port, char *dest, size_t read_len)
{
scm_t_port *pt = SCM_PTAB_ENTRY (port);
- size_t chars_read = 0;
+ size_t bytes_read = 0;
size_t from_buf = min (pt->read_end - pt->read_pos, read_len);
if (from_buf > 0)
{
memcpy (dest, pt->read_pos, from_buf);
pt->read_pos += from_buf;
- chars_read += from_buf;
+ bytes_read += from_buf;
read_len -= from_buf;
dest += from_buf;
}
{
memcpy (dest, pt->saved_read_pos, from_buf);
pt->saved_read_pos += from_buf;
- chars_read += from_buf;
+ bytes_read += from_buf;
}
}
- return chars_read;
+
+ return bytes_read;
}
/* Clear a port's read buffers, returning the contents. */
if (pt->read_buf == pt->putback_buf)
count += pt->saved_read_end - pt->saved_read_pos;
- result = scm_i_make_string (count, &data);
- scm_take_from_input_buffers (port, data, count);
+ if (count)
+ {
+ result = scm_i_make_string (count, &data, 0);
+ scm_take_from_input_buffers (port, data, count);
+ }
+ else
+ result = scm_nullstr;
+
return result;
}
#undef FUNC_NAME
\f
/* Standard ports --- current input, output, error, and more(!). */
-static SCM cur_inport_fluid = 0;
-static SCM cur_outport_fluid = 0;
-static SCM cur_errport_fluid = 0;
-static SCM cur_loadport_fluid = 0;
+static SCM cur_inport_fluid = SCM_BOOL_F;
+static SCM cur_outport_fluid = SCM_BOOL_F;
+static SCM cur_errport_fluid = SCM_BOOL_F;
+static SCM cur_loadport_fluid = SCM_BOOL_F;
SCM_DEFINE (scm_current_input_port, "current-input-port", 0, 0, 0,
(),
"returns the @dfn{standard input} in Unix and C terminology.")
#define FUNC_NAME s_scm_current_input_port
{
- if (cur_inport_fluid)
+ if (scm_is_true (cur_inport_fluid))
return scm_fluid_ref (cur_inport_fluid);
else
return SCM_BOOL_F;
"Unix and C terminology.")
#define FUNC_NAME s_scm_current_output_port
{
- if (cur_outport_fluid)
+ if (scm_is_true (cur_outport_fluid))
return scm_fluid_ref (cur_outport_fluid);
else
return SCM_BOOL_F;
"@dfn{standard error} in Unix and C terminology).")
#define FUNC_NAME s_scm_current_error_port
{
- if (cur_errport_fluid)
+ if (scm_is_true (cur_errport_fluid))
return scm_fluid_ref (cur_errport_fluid);
else
return SCM_BOOL_F;
}
#undef FUNC_NAME
+SCM
+scm_current_warning_port (void)
+{
+ static SCM cwp_var = SCM_UNDEFINED;
+ static scm_i_pthread_mutex_t cwp_var_mutex
+ = SCM_I_PTHREAD_MUTEX_INITIALIZER;
+
+ scm_i_scm_pthread_mutex_lock (&cwp_var_mutex);
+ if (SCM_UNBNDP (cwp_var))
+ cwp_var = scm_c_private_variable ("guile", "current-warning-port");
+ scm_i_pthread_mutex_unlock (&cwp_var_mutex);
+
+ return scm_call_0 (scm_variable_ref (cwp_var));
+}
+
SCM_DEFINE (scm_current_load_port, "current-load-port", 0, 0, 0,
(),
"Return the current-load-port.\n"
}
#undef FUNC_NAME
+
+SCM
+scm_set_current_warning_port (SCM port)
+{
+ static SCM cwp_var = SCM_BOOL_F;
+
+ if (scm_is_false (cwp_var))
+ cwp_var = scm_c_private_lookup ("guile", "current-warning-port");
+
+ return scm_call_1 (scm_variable_ref (cwp_var), port);
+}
+
+
void
scm_dynwind_current_input_port (SCM port)
#define FUNC_NAME NULL
scm_i_pthread_mutex_t scm_i_port_table_mutex = SCM_I_PTHREAD_MUTEX_INITIALIZER;
-/* This function is not and should not be thread safe. */
+\f
+/* Port finalization. */
+
+
+static void finalize_port (void *, void *);
+
+/* Register a finalizer for PORT. */
+static SCM_C_INLINE_KEYWORD void
+register_finalizer_for_port (SCM port)
+{
+ /* Register a finalizer for PORT so that its
+ type's `free' function gets called. */
+ scm_i_set_finalizer (SCM2PTR (port), finalize_port, NULL);
+}
+
+/* Finalize the object (a port) pointed to by PTR. */
+static void
+finalize_port (void *ptr, void *data)
+{
+ long port_type;
+ SCM port = PTR2SCM (ptr);
+
+ if (!SCM_PORTP (port))
+ abort ();
+
+ if (SCM_OPENP (port))
+ {
+ if (SCM_REVEALED (port) > 0)
+ /* Keep "revealed" ports alive and re-register a finalizer. */
+ register_finalizer_for_port (port);
+ else
+ {
+ port_type = SCM_TC2PTOBNUM (SCM_CELL_TYPE (port));
+ if (port_type >= scm_numptob)
+ abort ();
+
+ if (scm_ptobs[port_type].free)
+ /* Yes, I really do mean `.free' rather than `.close'. `.close'
+ is for explicit `close-port' by user. */
+ scm_ptobs[port_type].free (port);
+ SCM_SETSTREAM (port, 0);
+ SCM_CLR_PORT_OPEN_FLAG (port);
+
+ scm_gc_ports_collected++;
+ }
+ }
+}
+
+
+
+\f
+
+/* This function is not and should not be thread safe. */
SCM
scm_new_port_table_entry (scm_t_bits tag)
#define FUNC_NAME "scm_new_port_table_entry"
*/
SCM z = scm_cons (SCM_EOL, SCM_EOL);
- scm_t_port *entry = (scm_t_port *) scm_gc_calloc (sizeof (scm_t_port), "port");
- const char *enc;
+ scm_t_port *entry = scm_gc_typed_calloc (scm_t_port);
+ scm_t_port_internal *pti = scm_gc_typed_calloc (scm_t_port_internal);
+ const char *encoding;
entry->file_name = SCM_BOOL_F;
entry->rw_active = SCM_PORT_NEITHER;
entry->port = z;
+
/* Initialize this port with the thread's current default
encoding. */
- if ((enc = scm_i_get_port_encoding (SCM_BOOL_F)) == NULL)
- entry->encoding = NULL;
+ encoding = scm_i_default_port_encoding ();
+ entry->ilseq_handler = scm_i_default_port_conversion_handler ();
+ entry->encoding = encoding ? scm_gc_strdup (encoding, "port") : NULL;
+ if (encoding && c_strcasecmp (encoding, "UTF-8") == 0)
+ pti->encoding_mode = SCM_PORT_ENCODING_MODE_UTF8;
else
- entry->encoding = strdup (enc);
- entry->ilseq_handler = scm_i_get_conversion_strategy (SCM_BOOL_F);
+ pti->encoding_mode = SCM_PORT_ENCODING_MODE_ICONV;
+ pti->iconv_descriptors = NULL;
+
+ pti->at_stream_start_for_bom_read = 1;
+ pti->at_stream_start_for_bom_write = 1;
+
+ /* XXX These fields are not what they seem. They have been
+ repurposed, but cannot safely be renamed in 2.0 without breaking
+ ABI compatibility. This will be cleaned up in 2.2. */
+ entry->input_cd = pti; /* XXX pointer to the internal port structure */
+ entry->output_cd = NULL; /* XXX unused */
+
+ pti->pending_eof = 0;
+ pti->alist = SCM_EOL;
SCM_SET_CELL_TYPE (z, tag);
SCM_SETPTAB_ENTRY (z, entry);
scm_hashq_set_x (scm_i_port_weak_hash, z, SCM_BOOL_F);
+ /* For each new port, register a finalizer so that it port type's free
+ function can be invoked eventually. */
+ register_finalizer_for_port (z);
+
return z;
}
#undef FUNC_NAME
#if SCM_ENABLE_DEPRECATED==1
-SCM_API scm_t_port *
+scm_t_port *
scm_add_to_port_table (SCM port)
{
- SCM z = scm_new_port_table_entry (scm_tc7_port);
- scm_t_port * pt = SCM_PTAB_ENTRY(z);
+ SCM z;
+ scm_t_port * pt;
+ scm_c_issue_deprecation_warning ("scm_add_to_port_table is deprecated.");
+
+ scm_i_pthread_mutex_lock (&scm_i_port_table_mutex);
+ z = scm_new_port_table_entry (scm_tc7_port);
+ pt = SCM_PTAB_ENTRY(z);
pt->port = port;
SCM_SETCAR (z, SCM_EOL);
SCM_SETCDR (z, SCM_EOL);
SCM_SETPTAB_ENTRY (port, pt);
+ scm_i_pthread_mutex_unlock (&scm_i_port_table_mutex);
+
return pt;
}
#endif
/* Remove a port from the table and destroy it. */
-/* This function is not and should not be thread safe. */
-void
+static void close_iconv_descriptors (scm_t_iconv_descriptors *id);
+
+static void
scm_i_remove_port (SCM port)
#define FUNC_NAME "scm_remove_port"
{
- scm_t_port *p = SCM_PTAB_ENTRY (port);
- if (p->putback_buf)
- scm_gc_free (p->putback_buf, p->putback_buf_size, "putback buffer");
- if (p->encoding)
+ scm_t_port *p;
+ scm_t_port_internal *pti;
+
+ scm_i_scm_pthread_mutex_lock (&scm_i_port_table_mutex);
+
+ p = SCM_PTAB_ENTRY (port);
+ pti = SCM_PORT_GET_INTERNAL (port);
+ scm_port_non_buffer (p);
+ p->putback_buf = NULL;
+ p->putback_buf_size = 0;
+
+ if (pti->iconv_descriptors)
{
- free (p->encoding);
- p->encoding = NULL;
+ close_iconv_descriptors (pti->iconv_descriptors);
+ pti->iconv_descriptors = NULL;
}
- scm_gc_free (p, sizeof (scm_t_port), "port");
SCM_SETPTAB_ENTRY (port, 0);
+
scm_hashq_remove_x (scm_i_port_weak_hash, port);
+
+ scm_i_pthread_mutex_unlock (&scm_i_port_table_mutex);
}
#undef FUNC_NAME
rv = (scm_ptobs[i].close) (port);
else
rv = 0;
- scm_i_scm_pthread_mutex_lock (&scm_i_port_table_mutex);
scm_i_remove_port (port);
- scm_i_pthread_mutex_unlock (&scm_i_port_table_mutex);
SCM_CLR_PORT_OPEN_FLAG (port);
return scm_from_bool (rv >= 0);
}
#undef FUNC_NAME
static SCM
-scm_i_collect_keys_in_vector (void *closure, SCM key, SCM value, SCM result)
+collect_keys (void *unused, SCM key, SCM value, SCM result)
{
- int *i = (int*) closure;
- scm_c_vector_set_x (result, *i, key);
- (*i)++;
-
- return result;
+ return scm_cons (key, result);
}
void
scm_c_port_for_each (void (*proc)(void *data, SCM p), void *data)
{
- int i = 0;
- size_t n;
SCM ports;
- /* Even without pre-emptive multithreading, running arbitrary code
- while scanning the port table is unsafe because the port table
- can change arbitrarily (from a GC, for example). So we first
- collect the ports into a vector. -mvo */
-
- scm_i_scm_pthread_mutex_lock (&scm_i_port_table_mutex);
- n = SCM_HASHTABLE_N_ITEMS (scm_i_port_weak_hash);
- scm_i_pthread_mutex_unlock (&scm_i_port_table_mutex);
- ports = scm_c_make_vector (n, SCM_BOOL_F);
-
+ /* Copy out the port table as a list so that we get strong references
+ to all the values. */
scm_i_pthread_mutex_lock (&scm_i_port_table_mutex);
- ports = scm_internal_hash_fold (scm_i_collect_keys_in_vector, &i,
- ports, scm_i_port_weak_hash);
+ ports = scm_internal_hash_fold (collect_keys, NULL,
+ SCM_EOL, scm_i_port_weak_hash);
scm_i_pthread_mutex_unlock (&scm_i_port_table_mutex);
- for (i = 0; i < n; i++) {
- SCM p = SCM_SIMPLE_VECTOR_REF (ports, i);
- if (SCM_PORTP (p))
- proc (data, p);
- }
-
- scm_remember_upto_here_1 (ports);
+ for (; scm_is_pair (ports); ports = scm_cdr (ports))
+ {
+ SCM p = scm_car (ports);
+ if (SCM_PORTP (p))
+ proc (data, p);
+ }
}
SCM_DEFINE (scm_port_for_each, "port-for-each", 1, 0, 0,
"Apply @var{proc} to each port in the Guile port table\n"
"in turn. The return value is unspecified. More specifically,\n"
"@var{proc} is applied exactly once to every port that exists\n"
- "in the system at the time @var{port-for-each} is invoked.\n"
- "Changes to the port table while @var{port-for-each} is running\n"
- "have no effect as far as @var{port-for-each} is concerned.")
+ "in the system at the time @code{port-for-each} is invoked.\n"
+ "Changes to the port table while @code{port-for-each} is running\n"
+ "have no effect as far as @code{port-for-each} is concerned.")
#define FUNC_NAME s_scm_port_for_each
{
+ SCM ports;
+
SCM_VALIDATE_PROC (1, proc);
- scm_c_port_for_each ((void (*)(void*,SCM))scm_call_1, proc);
+ /* Copy out the port table as a list so that we get strong references
+ to all the values. */
+ scm_i_pthread_mutex_lock (&scm_i_port_table_mutex);
+ ports = scm_internal_hash_fold (collect_keys, NULL,
+ SCM_EOL, scm_i_port_weak_hash);
+ scm_i_pthread_mutex_unlock (&scm_i_port_table_mutex);
+
+ for (; scm_is_pair (ports); ports = scm_cdr (ports))
+ if (SCM_PORTP (SCM_CAR (ports)))
+ scm_call_1 (proc, SCM_CAR (ports));
+
return SCM_UNSPECIFIED;
}
#undef FUNC_NAME
(SCM port),
"Return the next character available from @var{port}, updating\n"
"@var{port} to point to the following character. If no more\n"
- "characters are available, the end-of-file object is returned.")
+ "characters are available, the end-of-file object is returned.\n"
+ "\n"
+ "When @var{port}'s data cannot be decoded according to its\n"
+ "character encoding, a @code{decoding-error} is raised and\n"
+ "@var{port} points past the erroneous byte sequence.\n")
#define FUNC_NAME s_scm_read_char
{
scm_t_wchar c;
}
#undef FUNC_NAME
-#define SCM_MBCHAR_BUF_SIZE (4)
-
-/* Get one codepoint from a file, using the port's encoding. */
-scm_t_wchar
-scm_getc (SCM port)
+/* Update the line and column number of PORT after consumption of C. */
+static inline void
+update_port_lf (scm_t_wchar c, SCM port)
{
- int c;
- unsigned int bufcount = 0;
- char buf[SCM_MBCHAR_BUF_SIZE];
- scm_t_wchar codepoint = 0;
- scm_t_uint32 *u32;
- size_t u32len;
- scm_t_port *pt = SCM_PTAB_ENTRY (port);
-
- c = scm_get_byte_or_eof (port);
- if (c == EOF)
- return (scm_t_wchar) EOF;
-
- buf[0] = c;
- bufcount++;
-
- if (pt->encoding == NULL)
- {
- /* The encoding is Latin-1: bytes are characters. */
- codepoint = (unsigned char) buf[0];
- goto success;
- }
-
- for (;;)
- {
- u32 = u32_conv_from_encoding (pt->encoding,
- (enum iconv_ilseq_handler) pt->ilseq_handler,
- buf, bufcount, NULL, NULL, &u32len);
- if (u32 == NULL || u32len == 0)
- {
- if (errno == ENOMEM)
- scm_memory_error ("Input decoding");
-
- /* Otherwise errno is EILSEQ or EINVAL, so perhaps more
- bytes are needed. Keep looping. */
- }
- else
- {
- /* Complete codepoint found. */
- codepoint = u32[0];
- free (u32);
- goto success;
- }
-
- if (bufcount == SCM_MBCHAR_BUF_SIZE)
- {
- /* We've read several bytes and didn't find a good
- codepoint. Give up. */
- goto failure;
- }
-
- c = scm_get_byte_or_eof (port);
-
- if (c == EOF)
- {
- /* EOF before a complete character was read. Push it all
- back and return EOF. */
- while (bufcount > 0)
- {
- /* FIXME: this will probably cause errors in the port column. */
- scm_unget_byte (buf[bufcount-1], port);
- bufcount --;
- }
- return EOF;
- }
-
- if (c == '\n')
- {
- /* It is always invalid to have EOL in the middle of a
- multibyte character. */
- scm_unget_byte ('\n', port);
- goto failure;
- }
-
- buf[bufcount++] = c;
- }
-
- success:
- switch (codepoint)
+ switch (c)
{
case '\a':
+ case EOF:
break;
case '\b':
SCM_DECCOL (port);
break;
case '\n':
SCM_INCLINE (port);
- break;
+ break;
case '\r':
SCM_ZEROCOL (port);
break;
SCM_INCCOL (port);
break;
}
+}
- return codepoint;
+#define SCM_MBCHAR_BUF_SIZE (4)
- failure:
- {
- char *err_buf;
- SCM err_str = scm_i_make_string (bufcount, &err_buf);
- memcpy (err_buf, buf, bufcount);
-
- if (errno == EILSEQ)
- scm_misc_error (NULL, "input encoding error for ~s: ~s",
- scm_list_2 (scm_from_locale_string (scm_i_get_port_encoding (port)),
- err_str));
- else
- scm_misc_error (NULL, "input encoding error (invalid) for ~s: ~s\n",
- scm_list_2 (scm_from_locale_string (scm_i_get_port_encoding (port)),
- err_str));
- }
+/* Convert the SIZE-byte UTF-8 sequence in UTF8_BUF to a codepoint.
+ UTF8_BUF is assumed to contain a valid UTF-8 sequence. */
+static scm_t_wchar
+utf8_to_codepoint (const scm_t_uint8 *utf8_buf, size_t size)
+{
+ scm_t_wchar codepoint;
- /* Never gets here. */
- return 0;
-}
+ if (utf8_buf[0] <= 0x7f)
+ {
+ assert (size == 1);
+ codepoint = utf8_buf[0];
+ }
+ else if ((utf8_buf[0] & 0xe0) == 0xc0)
+ {
+ assert (size == 2);
+ codepoint = ((scm_t_wchar) utf8_buf[0] & 0x1f) << 6UL
+ | (utf8_buf[1] & 0x3f);
+ }
+ else if ((utf8_buf[0] & 0xf0) == 0xe0)
+ {
+ assert (size == 3);
+ codepoint = ((scm_t_wchar) utf8_buf[0] & 0x0f) << 12UL
+ | ((scm_t_wchar) utf8_buf[1] & 0x3f) << 6UL
+ | (utf8_buf[2] & 0x3f);
+ }
+ else
+ {
+ assert (size == 4);
+ codepoint = ((scm_t_wchar) utf8_buf[0] & 0x07) << 18UL
+ | ((scm_t_wchar) utf8_buf[1] & 0x3f) << 12UL
+ | ((scm_t_wchar) utf8_buf[2] & 0x3f) << 6UL
+ | (utf8_buf[3] & 0x3f);
+ }
+ return codepoint;
+}
-/* this should only be called when the read buffer is empty. it
- tries to refill the read buffer. it returns the first char from
- the port, which is either EOF or *(pt->read_pos). */
-int
-scm_fill_input (SCM port)
+/* Read a UTF-8 sequence from PORT. On success, return 0 and set
+ *CODEPOINT to the codepoint that was read, fill BUF with its UTF-8
+ representation, and set *LEN to the length in bytes. Return
+ `EILSEQ' on error. */
+static int
+get_utf8_codepoint (SCM port, scm_t_wchar *codepoint,
+ scm_t_uint8 buf[SCM_MBCHAR_BUF_SIZE], size_t *len)
{
- scm_t_port *pt = SCM_PTAB_ENTRY (port);
+#define ASSERT_NOT_EOF(b) \
+ if (SCM_UNLIKELY ((b) == EOF)) \
+ goto invalid_seq
+#define CONSUME_PEEKED_BYTE() \
+ pt->read_pos++
- assert (pt->read_pos == pt->read_end);
+ int byte;
+ scm_t_port *pt;
- if (pt->read_buf == pt->putback_buf)
+ *len = 0;
+ pt = SCM_PTAB_ENTRY (port);
+
+ byte = scm_get_byte_or_eof (port);
+ if (byte == EOF)
{
- /* finished reading put-back chars. */
- pt->read_buf = pt->saved_read_buf;
- pt->read_pos = pt->saved_read_pos;
- pt->read_end = pt->saved_read_end;
- pt->read_buf_size = pt->saved_read_buf_size;
- if (pt->read_pos < pt->read_end)
- return *(pt->read_pos);
+ *codepoint = EOF;
+ return 0;
}
- return scm_ptobs[SCM_PTOBNUM (port)].fill_input (port);
-}
+ buf[0] = (scm_t_uint8) byte;
+ *len = 1;
-/* scm_lfwrite
- *
- * This function differs from scm_c_write; it updates port line and
- * column. */
+ if (buf[0] <= 0x7f)
+ /* 1-byte form. */
+ *codepoint = buf[0];
+ else if (buf[0] >= 0xc2 && buf[0] <= 0xdf)
+ {
+ /* 2-byte form. */
+ byte = scm_peek_byte_or_eof (port);
+ ASSERT_NOT_EOF (byte);
-static void
-update_port_lf (scm_t_wchar c, SCM port)
-{
- if (c == '\a')
- ; /* Do nothing. */
- else if (c == '\b')
- SCM_DECCOL (port);
- else if (c == '\n')
- SCM_INCLINE (port);
- else if (c == '\r')
- SCM_ZEROCOL (port);
- else if (c == '\t')
- SCM_TABCOL (port);
- else
- SCM_INCCOL (port);
-}
+ if (SCM_UNLIKELY ((byte & 0xc0) != 0x80))
+ goto invalid_seq;
-void
-scm_lfwrite (const char *ptr, size_t size, SCM port)
-{
- scm_t_port *pt = SCM_PTAB_ENTRY (port);
- scm_t_ptob_descriptor *ptob = &scm_ptobs[SCM_PTOBNUM (port)];
+ CONSUME_PEEKED_BYTE ();
+ buf[1] = (scm_t_uint8) byte;
+ *len = 2;
- if (pt->rw_active == SCM_PORT_READ)
- scm_end_input (port);
+ *codepoint = ((scm_t_wchar) buf[0] & 0x1f) << 6UL
+ | (buf[1] & 0x3f);
+ }
+ else if ((buf[0] & 0xf0) == 0xe0)
+ {
+ /* 3-byte form. */
+ byte = scm_peek_byte_or_eof (port);
+ ASSERT_NOT_EOF (byte);
- ptob->write (port, ptr, size);
+ if (SCM_UNLIKELY ((byte & 0xc0) != 0x80
+ || (buf[0] == 0xe0 && byte < 0xa0)
+ || (buf[0] == 0xed && byte > 0x9f)))
+ goto invalid_seq;
- for (; size; ptr++, size--)
- update_port_lf ((scm_t_wchar) (unsigned char) *ptr, port);
+ CONSUME_PEEKED_BYTE ();
+ buf[1] = (scm_t_uint8) byte;
+ *len = 2;
- if (pt->rw_random)
- pt->rw_active = SCM_PORT_WRITE;
-}
+ byte = scm_peek_byte_or_eof (port);
+ ASSERT_NOT_EOF (byte);
-/* Write a scheme string STR to PORT from START inclusive to END
- exclusive. */
-void
-scm_lfwrite_substr (SCM str, size_t start, size_t end, SCM port)
-{
- size_t i, size = scm_i_string_length (str);
- scm_t_port *pt = SCM_PTAB_ENTRY (port);
- scm_t_ptob_descriptor *ptob = &scm_ptobs[SCM_PTOBNUM (port)];
- scm_t_wchar p;
- char *buf;
- size_t len;
+ if (SCM_UNLIKELY ((byte & 0xc0) != 0x80))
+ goto invalid_seq;
- if (pt->rw_active == SCM_PORT_READ)
- scm_end_input (port);
+ CONSUME_PEEKED_BYTE ();
+ buf[2] = (scm_t_uint8) byte;
+ *len = 3;
+
+ *codepoint = ((scm_t_wchar) buf[0] & 0x0f) << 12UL
+ | ((scm_t_wchar) buf[1] & 0x3f) << 6UL
+ | (buf[2] & 0x3f);
+ }
+ else if (buf[0] >= 0xf0 && buf[0] <= 0xf4)
+ {
+ /* 4-byte form. */
+ byte = scm_peek_byte_or_eof (port);
+ ASSERT_NOT_EOF (byte);
- if (end == (size_t) (-1))
- end = size;
- size = end - start;
+ if (SCM_UNLIKELY (((byte & 0xc0) != 0x80)
+ || (buf[0] == 0xf0 && byte < 0x90)
+ || (buf[0] == 0xf4 && byte > 0x8f)))
+ goto invalid_seq;
- /* Note that making a substring will likely take the
- stringbuf_write_mutex. So, one shouldn't use scm_lfwrite_substr
- if the stringbuf write mutex may still be held elsewhere. */
- buf = scm_to_stringn (scm_c_substring (str, start, end), &len,
- pt->encoding, pt->ilseq_handler);
- ptob->write (port, buf, len);
- free (buf);
+ CONSUME_PEEKED_BYTE ();
+ buf[1] = (scm_t_uint8) byte;
+ *len = 2;
- for (i = 0; i < size; i++)
- {
- p = scm_i_string_ref (str, i + start);
- update_port_lf (p, port);
- }
+ byte = scm_peek_byte_or_eof (port);
+ ASSERT_NOT_EOF (byte);
- if (pt->rw_random)
- pt->rw_active = SCM_PORT_WRITE;
-}
+ if (SCM_UNLIKELY ((byte & 0xc0) != 0x80))
+ goto invalid_seq;
-/* Write a scheme string STR to PORT. */
-void
-scm_lfwrite_str (SCM str, SCM port)
-{
- size_t i, size = scm_i_string_length (str);
- scm_t_port *pt = SCM_PTAB_ENTRY (port);
- scm_t_ptob_descriptor *ptob = &scm_ptobs[SCM_PTOBNUM (port)];
- scm_t_wchar p;
- char *buf;
- size_t len;
+ CONSUME_PEEKED_BYTE ();
+ buf[2] = (scm_t_uint8) byte;
+ *len = 3;
- if (pt->rw_active == SCM_PORT_READ)
- scm_end_input (port);
+ byte = scm_peek_byte_or_eof (port);
+ ASSERT_NOT_EOF (byte);
- buf = scm_to_stringn (str, &len,
- pt->encoding, pt->ilseq_handler);
- ptob->write (port, buf, len);
- free (buf);
+ if (SCM_UNLIKELY ((byte & 0xc0) != 0x80))
+ goto invalid_seq;
- for (i = 0; i < size; i++)
- {
- p = scm_i_string_ref (str, i);
- update_port_lf (p, port);
+ CONSUME_PEEKED_BYTE ();
+ buf[3] = (scm_t_uint8) byte;
+ *len = 4;
+
+ *codepoint = ((scm_t_wchar) buf[0] & 0x07) << 18UL
+ | ((scm_t_wchar) buf[1] & 0x3f) << 12UL
+ | ((scm_t_wchar) buf[2] & 0x3f) << 6UL
+ | (buf[3] & 0x3f);
}
+ else
+ goto invalid_seq;
- if (pt->rw_random)
- pt->rw_active = SCM_PORT_WRITE;
+ return 0;
+
+ invalid_seq:
+ /* Here we could choose the consume the faulty byte when it's not a
+ valid starting byte, but it's not a requirement. What Section 3.9
+ of Unicode 6.0.0 mandates, though, is to not consume a byte that
+ would otherwise be a valid starting byte. */
+
+ return EILSEQ;
+
+#undef CONSUME_PEEKED_BYTE
+#undef ASSERT_NOT_EOF
}
-/* scm_c_read
- *
- * Used by an application to read arbitrary number of bytes from an
- * SCM port. Same semantics as libc read, except that scm_c_read only
- * returns less than SIZE bytes if at end-of-file.
- *
- * Warning: Doesn't update port line and column counts! */
+/* Likewise, read a byte sequence from PORT, passing it through its
+ input conversion descriptor. */
+static int
+get_iconv_codepoint (SCM port, scm_t_wchar *codepoint,
+ char buf[SCM_MBCHAR_BUF_SIZE], size_t *len)
+{
+ scm_t_iconv_descriptors *id;
+ scm_t_uint8 utf8_buf[SCM_MBCHAR_BUF_SIZE];
+ size_t input_size = 0;
-/* This structure, and the following swap_buffer function, are used
+ id = scm_i_port_iconv_descriptors (port, SCM_PORT_READ);
+
+ for (;;)
+ {
+ int byte_read;
+ char *input, *output;
+ size_t input_left, output_left, done;
+
+ byte_read = scm_get_byte_or_eof (port);
+ if (SCM_UNLIKELY (byte_read == EOF))
+ {
+ if (SCM_LIKELY (input_size == 0))
+ {
+ *codepoint = (scm_t_wchar) EOF;
+ *len = input_size;
+ return 0;
+ }
+ else
+ {
+ /* EOF found in the middle of a multibyte character. */
+ scm_i_set_pending_eof (port);
+ return EILSEQ;
+ }
+ }
+
+ buf[input_size++] = byte_read;
+
+ input = buf;
+ input_left = input_size;
+ output = (char *) utf8_buf;
+ output_left = sizeof (utf8_buf);
+
+ done = iconv (id->input_cd, &input, &input_left, &output, &output_left);
+
+ if (done == (size_t) -1)
+ {
+ int err = errno;
+ if (SCM_LIKELY (err == EINVAL))
+ /* The input byte sequence did not form a complete
+ character. Read another byte and try again. */
+ continue;
+ else
+ return err;
+ }
+ else
+ {
+ size_t output_size = sizeof (utf8_buf) - output_left;
+ if (SCM_LIKELY (output_size > 0))
+ {
+ /* iconv generated output. Convert the UTF8_BUF sequence
+ to a Unicode code point. */
+ *codepoint = utf8_to_codepoint (utf8_buf, output_size);
+ *len = input_size;
+ return 0;
+ }
+ else
+ {
+ /* iconv consumed some bytes without producing any output.
+ Most likely this means that a Unicode byte-order mark
+ (BOM) was consumed, which should not be included in the
+ returned buf. Shift any remaining bytes to the beginning
+ of buf, and continue the loop. */
+ memmove (buf, input, input_left);
+ input_size = input_left;
+ continue;
+ }
+ }
+ }
+}
+
+/* Read a codepoint from PORT and return it in *CODEPOINT. Fill BUF
+ with the byte representation of the codepoint in PORT's encoding, and
+ set *LEN to the length in bytes of that representation. Return 0 on
+ success and an errno value on error. */
+static int
+get_codepoint (SCM port, scm_t_wchar *codepoint,
+ char buf[SCM_MBCHAR_BUF_SIZE], size_t *len)
+{
+ int err;
+ scm_t_port *pt = SCM_PTAB_ENTRY (port);
+ scm_t_port_internal *pti = SCM_PORT_GET_INTERNAL (port);
+
+ if (pti->encoding_mode == SCM_PORT_ENCODING_MODE_UTF8)
+ err = get_utf8_codepoint (port, codepoint, (scm_t_uint8 *) buf, len);
+ else
+ err = get_iconv_codepoint (port, codepoint, buf, len);
+
+ if (SCM_LIKELY (err == 0))
+ {
+ if (SCM_UNLIKELY (pti->at_stream_start_for_bom_read))
+ {
+ /* Record that we're no longer at stream start. */
+ pti->at_stream_start_for_bom_read = 0;
+ if (pt->rw_random)
+ pti->at_stream_start_for_bom_write = 0;
+
+ /* If we just read a BOM in an encoding that recognizes them,
+ then silently consume it and read another code point. */
+ if (SCM_UNLIKELY
+ (*codepoint == SCM_UNICODE_BOM
+ && (pti->encoding_mode == SCM_PORT_ENCODING_MODE_UTF8
+ || c_strcasecmp (pt->encoding, "UTF-16") == 0
+ || c_strcasecmp (pt->encoding, "UTF-32") == 0)))
+ return get_codepoint (port, codepoint, buf, len);
+ }
+ update_port_lf (*codepoint, port);
+ }
+ else if (pt->ilseq_handler == SCM_ICONVEH_QUESTION_MARK)
+ {
+ *codepoint = '?';
+ err = 0;
+ update_port_lf (*codepoint, port);
+ }
+
+ return err;
+}
+
+/* Read a codepoint from PORT and return it. */
+scm_t_wchar
+scm_getc (SCM port)
+#define FUNC_NAME "scm_getc"
+{
+ int err;
+ size_t len;
+ scm_t_wchar codepoint;
+ char buf[SCM_MBCHAR_BUF_SIZE];
+
+ err = get_codepoint (port, &codepoint, buf, &len);
+ if (SCM_UNLIKELY (err != 0))
+ /* At this point PORT should point past the invalid encoding, as per
+ R6RS-lib Section 8.2.4. */
+ scm_decoding_error (FUNC_NAME, err, "input decoding error", port);
+
+ return codepoint;
+}
+#undef FUNC_NAME
+
+/* this should only be called when the read buffer is empty. it
+ tries to refill the read buffer. it returns the first char from
+ the port, which is either EOF or *(pt->read_pos). */
+static int
+scm_i_fill_input (SCM port)
+{
+ scm_t_port *pt = SCM_PTAB_ENTRY (port);
+ scm_t_port_internal *pti = SCM_PORT_GET_INTERNAL (port);
+
+ assert (pt->read_pos == pt->read_end);
+
+ if (pti->pending_eof)
+ {
+ pti->pending_eof = 0;
+ return EOF;
+ }
+
+ if (pt->read_buf == pt->putback_buf)
+ {
+ /* finished reading put-back chars. */
+ pt->read_buf = pt->saved_read_buf;
+ pt->read_pos = pt->saved_read_pos;
+ pt->read_end = pt->saved_read_end;
+ pt->read_buf_size = pt->saved_read_buf_size;
+ if (pt->read_pos < pt->read_end)
+ return *(pt->read_pos);
+ }
+ return scm_ptobs[SCM_PTOBNUM (port)].fill_input (port);
+}
+
+int
+scm_fill_input (SCM port)
+{
+ return scm_i_fill_input (port);
+}
+
+/* Slow-path fallback for 'scm_get_byte_or_eof' in inline.h */
+int
+scm_slow_get_byte_or_eof (SCM port)
+{
+ scm_t_port *pt = SCM_PTAB_ENTRY (port);
+
+ if (pt->rw_active == SCM_PORT_WRITE)
+ scm_flush (port);
+
+ if (pt->rw_random)
+ pt->rw_active = SCM_PORT_READ;
+
+ if (pt->read_pos >= pt->read_end)
+ {
+ if (SCM_UNLIKELY (scm_i_fill_input (port) == EOF))
+ return EOF;
+ }
+
+ return *pt->read_pos++;
+}
+
+/* Slow-path fallback for 'scm_peek_byte_or_eof' in inline.h */
+int
+scm_slow_peek_byte_or_eof (SCM port)
+{
+ scm_t_port *pt = SCM_PTAB_ENTRY (port);
+
+ if (pt->rw_active == SCM_PORT_WRITE)
+ scm_flush (port);
+
+ if (pt->rw_random)
+ pt->rw_active = SCM_PORT_READ;
+
+ if (pt->read_pos >= pt->read_end)
+ {
+ if (SCM_UNLIKELY (scm_i_fill_input (port) == EOF))
+ {
+ scm_i_set_pending_eof (port);
+ return EOF;
+ }
+ }
+
+ return *pt->read_pos;
+}
+
+
+/* scm_lfwrite
+ *
+ * This function differs from scm_c_write; it updates port line and
+ * column. */
+
+void
+scm_lfwrite (const char *ptr, size_t size, SCM port)
+{
+ scm_t_port *pt = SCM_PTAB_ENTRY (port);
+ scm_t_ptob_descriptor *ptob = &scm_ptobs[SCM_PTOBNUM (port)];
+
+ if (pt->rw_active == SCM_PORT_READ)
+ scm_end_input (port);
+
+ ptob->write (port, ptr, size);
+
+ for (; size; ptr++, size--)
+ update_port_lf ((scm_t_wchar) (unsigned char) *ptr, port);
+
+ if (pt->rw_random)
+ pt->rw_active = SCM_PORT_WRITE;
+}
+
+/* Write STR to PORT from START inclusive to END exclusive. */
+void
+scm_lfwrite_substr (SCM str, size_t start, size_t end, SCM port)
+{
+ scm_t_port *pt = SCM_PTAB_ENTRY (port);
+
+ if (pt->rw_active == SCM_PORT_READ)
+ scm_end_input (port);
+
+ if (end == (size_t) -1)
+ end = scm_i_string_length (str);
+
+ scm_i_display_substring (str, start, end, port);
+
+ if (pt->rw_random)
+ pt->rw_active = SCM_PORT_WRITE;
+}
+
+/* scm_c_read
+ *
+ * Used by an application to read arbitrary number of bytes from an
+ * SCM port. Same semantics as libc read, except that scm_c_read only
+ * returns less than SIZE bytes if at end-of-file.
+ *
+ * Warning: Doesn't update port line and column counts! */
+
+/* This structure, and the following swap_buffer function, are used
for temporarily swapping a port's own read buffer, and the buffer
that the caller of scm_c_read provides. */
struct port_and_swap_buffer
if (size == 0)
return n_read;
- /* Now we will call scm_fill_input repeatedly until we have read the
- requested number of bytes. (Note that a single scm_fill_input
+ /* Now we will call scm_i_fill_input repeatedly until we have read the
+ requested number of bytes. (Note that a single scm_i_fill_input
call does not guarantee to fill the whole of the port's read
buffer.) */
- if (pt->read_buf_size <= 1)
+ if (pt->read_buf_size <= 1 && pt->encoding == NULL)
{
/* The port that we are reading from is unbuffered - i.e. does
not have its own persistent buffer - but we have a buffer,
provided by our caller, that is the right size for the data
- that is wanted. For the following scm_fill_input calls,
+ that is wanted. For the following scm_i_fill_input calls,
therefore, we use the buffer in hand as the port's read
buffer.
We need to make sure that the port's normal (1 byte) buffer
- is reinstated in case one of the scm_fill_input () calls
+ is reinstated in case one of the scm_i_fill_input () calls
throws an exception; we use the scm_dynwind_* API to achieve
- that. */
+ that.
+
+ A consequence of this optimization is that the fill_input
+ functions can't unget characters. That'll push data to the
+ pushback buffer instead of this psb buffer. */
+#if SCM_DEBUG == 1
+ unsigned char *pback = pt->putback_buf;
+#endif
psb.pt = pt;
psb.buffer = buffer;
psb.size = size;
scm_dynwind_rewind_handler (swap_buffer, &psb, SCM_F_WIND_EXPLICITLY);
scm_dynwind_unwind_handler (swap_buffer, &psb, SCM_F_WIND_EXPLICITLY);
- /* Call scm_fill_input until we have all the bytes that we need,
+ /* Call scm_i_fill_input until we have all the bytes that we need,
or we hit EOF. */
- while (pt->read_buf_size && (scm_fill_input (port) != EOF))
+ while (pt->read_buf_size && (scm_i_fill_input (port) != EOF))
{
pt->read_buf_size -= (pt->read_end - pt->read_pos);
pt->read_pos = pt->read_buf = pt->read_end;
}
+#if SCM_DEBUG == 1
+ if (pback != pt->putback_buf
+ || pt->read_buf - (unsigned char *) buffer < 0)
+ scm_misc_error (FUNC_NAME,
+ "scm_c_read must not call a fill function that pushes "
+ "back characters onto an unbuffered port", SCM_EOL);
+#endif
n_read += pt->read_buf - (unsigned char *) buffer;
-
+
/* Reinstate the port's normal buffer. */
scm_dynwind_end ();
}
that a custom port implementation's entry points (in
particular, fill_input) can rely on the buffer always being
the same as they first set up. */
- while (size && (scm_fill_input (port) != EOF))
+ while (size && (scm_i_fill_input (port) != EOF))
{
n_available = min (size, pt->read_end - pt->read_pos);
memcpy (buffer, pt->read_pos, n_available);
}
#undef FUNC_NAME
-void
+void
scm_flush (SCM port)
{
long i = SCM_PTOBNUM (port);
+ assert (i >= 0);
(scm_ptobs[i].flush) (port);
}
long offset;
scm_t_port *pt = SCM_PTAB_ENTRY (port);
+ scm_i_clear_pending_eof (port);
if (pt->read_buf == pt->putback_buf)
{
offset = pt->read_end - pt->read_pos;
\f
-void
-scm_unget_byte (int c, SCM port)
-#define FUNC_NAME "scm_unget_byte"
+static void
+scm_i_unget_bytes (const unsigned char *buf, size_t len, SCM port)
+#define FUNC_NAME "scm_unget_bytes"
{
scm_t_port *pt = SCM_PTAB_ENTRY (port);
+ size_t old_len, new_len;
- if (pt->read_buf == pt->putback_buf)
- /* already using the put-back buffer. */
- {
- /* enlarge putback_buf if necessary. */
- if (pt->read_end == pt->read_buf + pt->read_buf_size
- && pt->read_buf == pt->read_pos)
- {
- size_t new_size = pt->read_buf_size * 2;
- unsigned char *tmp = (unsigned char *)
- scm_gc_realloc (pt->putback_buf, pt->read_buf_size, new_size,
- "putback buffer");
-
- pt->read_pos = pt->read_buf = pt->putback_buf = tmp;
- pt->read_end = pt->read_buf + pt->read_buf_size;
- pt->read_buf_size = pt->putback_buf_size = new_size;
- }
-
- /* shift any existing bytes to buffer + 1. */
- if (pt->read_pos == pt->read_end)
- pt->read_end = pt->read_buf + 1;
- else if (pt->read_pos != pt->read_buf + 1)
- {
- int count = pt->read_end - pt->read_pos;
-
- memmove (pt->read_buf + 1, pt->read_pos, count);
- pt->read_end = pt->read_buf + 1 + count;
- }
+ scm_i_clear_pending_eof (port);
- pt->read_pos = pt->read_buf;
- }
- else
+ if (pt->read_buf != pt->putback_buf)
/* switch to the put-back buffer. */
{
if (pt->putback_buf == NULL)
{
+ pt->putback_buf_size = (len > SCM_INITIAL_PUTBACK_BUF_SIZE
+ ? len : SCM_INITIAL_PUTBACK_BUF_SIZE);
pt->putback_buf
- = (unsigned char *) scm_gc_malloc (SCM_INITIAL_PUTBACK_BUF_SIZE,
- "putback buffer");
- pt->putback_buf_size = SCM_INITIAL_PUTBACK_BUF_SIZE;
+ = (unsigned char *) scm_gc_malloc_pointerless
+ (pt->putback_buf_size, "putback buffer");
}
pt->saved_read_buf = pt->read_buf;
pt->saved_read_end = pt->read_end;
pt->saved_read_buf_size = pt->read_buf_size;
- pt->read_pos = pt->read_buf = pt->putback_buf;
- pt->read_end = pt->read_buf + 1;
+ /* Put read_pos at the end of the buffer, so that ungets will not
+ have to shift the buffer contents each time. */
+ pt->read_buf = pt->putback_buf;
+ pt->read_pos = pt->read_end = pt->putback_buf + pt->putback_buf_size;
pt->read_buf_size = pt->putback_buf_size;
}
- *pt->read_buf = c;
+ old_len = pt->read_end - pt->read_pos;
+ new_len = old_len + len;
+
+ if (new_len > pt->read_buf_size)
+ /* The putback buffer needs to be enlarged. */
+ {
+ size_t new_buf_size;
+ unsigned char *new_buf, *new_end, *new_pos;
+
+ new_buf_size = pt->read_buf_size * 2;
+ if (new_buf_size < new_len)
+ new_buf_size = new_len;
+
+ new_buf = (unsigned char *)
+ scm_gc_malloc_pointerless (new_buf_size, "putback buffer");
+
+ /* Put the bytes at the end of the buffer, so that future
+ ungets won't need to shift the buffer. */
+ new_end = new_buf + new_buf_size;
+ new_pos = new_end - old_len;
+ memcpy (new_pos, pt->read_pos, old_len);
+
+ pt->read_buf = pt->putback_buf = new_buf;
+ pt->read_pos = new_pos;
+ pt->read_end = new_end;
+ pt->read_buf_size = pt->putback_buf_size = new_buf_size;
+ }
+ else if (pt->read_buf + len < pt->read_pos)
+ /* If needed, shift the existing buffer contents up.
+ This should not happen unless some external code
+ manipulates the putback buffer pointers. */
+ {
+ unsigned char *new_end = pt->read_buf + pt->read_buf_size;
+ unsigned char *new_pos = new_end - old_len;
+
+ memmove (new_pos, pt->read_pos, old_len);
+ pt->read_pos = new_pos;
+ pt->read_end = new_end;
+ }
+
+ /* Move read_pos back and copy the bytes there. */
+ pt->read_pos -= len;
+ memcpy (pt->read_buf + (pt->read_pos - pt->read_buf), buf, len);
+
+ if (pt->rw_active == SCM_PORT_WRITE)
+ scm_flush (port);
if (pt->rw_random)
pt->rw_active = SCM_PORT_READ;
}
#undef FUNC_NAME
-void
+void
+scm_unget_bytes (const unsigned char *buf, size_t len, SCM port)
+{
+ scm_i_unget_bytes (buf, len, port);
+}
+
+void
+scm_unget_byte (int c, SCM port)
+{
+ unsigned char byte;
+
+ byte = c;
+ scm_i_unget_bytes (&byte, 1, port);
+}
+
+void
scm_ungetc (scm_t_wchar c, SCM port)
#define FUNC_NAME "scm_ungetc"
{
scm_t_port *pt = SCM_PTAB_ENTRY (port);
- scm_t_wchar *wbuf;
- SCM str = scm_i_make_wide_string (1, &wbuf);
- char *buf;
+ char *result;
+ char result_buf[10];
+ const char *encoding;
size_t len;
- int i;
- wbuf[0] = c;
- buf = scm_to_stringn (str, &len, pt->encoding, pt->ilseq_handler);
-
- for (i = len - 1; i >= 0; i--)
- scm_unget_byte (buf[i], port);
+ if (pt->encoding != NULL)
+ encoding = pt->encoding;
+ else
+ encoding = "ISO-8859-1";
+
+ len = sizeof (result_buf);
+ result = u32_conv_to_encoding (encoding,
+ (enum iconv_ilseq_handler) pt->ilseq_handler,
+ (uint32_t *) &c, 1, NULL,
+ result_buf, &len);
+
+ if (SCM_UNLIKELY (result == NULL || len == 0))
+ scm_encoding_error (FUNC_NAME, errno,
+ "conversion to port encoding failed",
+ SCM_BOOL_F, SCM_MAKE_CHAR (c));
+
+ scm_i_unget_bytes ((unsigned char *) result, len, port);
+
+ if (SCM_UNLIKELY (result != result_buf))
+ free (result);
if (c == '\n')
{
"return the value returned by the preceding call to\n"
"@code{peek-char}. In particular, a call to @code{peek-char} on\n"
"an interactive port will hang waiting for input whenever a call\n"
- "to @code{read-char} would have hung.")
+ "to @code{read-char} would have hung.\n"
+ "\n"
+ "As for @code{read-char}, a @code{decoding-error} may be raised\n"
+ "if such a situation occurs. However, unlike with @code{read-char},\n"
+ "@var{port} still points at the beginning of the erroneous byte\n"
+ "sequence when the error is raised.\n")
#define FUNC_NAME s_scm_peek_char
{
- scm_t_wchar c, column;
+ int err;
+ SCM result;
+ scm_t_wchar c;
+ char bytes[SCM_MBCHAR_BUF_SIZE];
+ long column, line;
+ size_t len;
+
if (SCM_UNBNDP (port))
port = scm_current_input_port ();
+ SCM_VALIDATE_OPINPORT (1, port);
+
+ column = SCM_COL (port);
+ line = SCM_LINUM (port);
+
+ err = get_codepoint (port, &c, bytes, &len);
+
+ scm_i_unget_bytes ((unsigned char *) bytes, len, port);
+
+ SCM_COL (port) = column;
+ SCM_LINUM (port) = line;
+
+ if (SCM_UNLIKELY (err != 0))
+ {
+ scm_decoding_error (FUNC_NAME, err, "input decoding error", port);
+
+ /* Shouldn't happen since `catch' always aborts to prompt. */
+ result = SCM_BOOL_F;
+ }
+ else if (c == EOF)
+ {
+ scm_i_set_pending_eof (port);
+ result = SCM_EOF_VAL;
+ }
else
- SCM_VALIDATE_OPINPORT (1, port);
- column = SCM_COL(port);
- c = scm_getc (port);
- if (EOF == c)
- return SCM_EOF_VAL;
- scm_ungetc (c, port);
- SCM_COL(port) = column;
- return SCM_MAKE_CHAR (c);
+ result = SCM_MAKE_CHAR (c);
+
+ return result;
}
#undef FUNC_NAME
SCM_DEFINE (scm_unread_char, "unread-char", 1, 1, 0,
(SCM cobj, SCM port),
- "Place @var{char} in @var{port} so that it will be read by the\n"
- "next read operation. If called multiple times, the unread characters\n"
- "will be read again in last-in first-out order. If @var{port} is\n"
- "not supplied, the current input port is used.")
+ "Place character @var{cobj} in @var{port} so that it will be\n"
+ "read by the next read operation. If called multiple times, the\n"
+ "unread characters will be read again in last-in first-out\n"
+ "order. If @var{port} is not supplied, the current input port\n"
+ "is used.")
#define FUNC_NAME s_scm_unread_char
{
int c;
SCM_VALIDATE_CHAR (1, cobj);
if (SCM_UNBNDP (port))
port = scm_current_input_port ();
- else
- SCM_VALIDATE_OPINPORT (2, port);
+ SCM_VALIDATE_OPINPORT (2, port);
c = SCM_CHAR (cobj);
SCM_VALIDATE_STRING (1, str);
if (SCM_UNBNDP (port))
port = scm_current_input_port ();
- else
- SCM_VALIDATE_OPINPORT (2, port);
+ SCM_VALIDATE_OPINPORT (2, port);
n = scm_i_string_length (str);
SCM_DEFINE (scm_seek, "seek", 3, 0, 0,
(SCM fd_port, SCM offset, SCM whence),
- "Sets the current position of @var{fd/port} to the integer\n"
+ "Sets the current position of @var{fd_port} to the integer\n"
"@var{offset}, which is interpreted according to the value of\n"
"@var{whence}.\n"
"\n"
"@defvar SEEK_END\n"
"Seek from the end of the file.\n"
"@end defvar\n"
- "If @var{fd/port} is a file descriptor, the underlying system\n"
+ "If @var{fd_port} is a file descriptor, the underlying system\n"
"call is @code{lseek}. @var{port} may be a string port.\n"
"\n"
"The value returned is the new position in the file. This means\n"
if (SCM_OPPORTP (fd_port))
{
+ scm_t_port_internal *pti = SCM_PORT_GET_INTERNAL (fd_port);
scm_t_ptob_descriptor *ptob = scm_ptobs + SCM_PTOBNUM (fd_port);
off_t_or_off64_t off = scm_to_off_t_or_off64_t (offset);
off_t_or_off64_t rv;
SCM_MISC_ERROR ("port is not seekable",
scm_cons (fd_port, SCM_EOL));
else
- rv = ptob->seek (fd_port, off, how);
+ rv = ptob->seek (fd_port, off, how);
+
+ /* Set stream-start flags according to new position. */
+ pti->at_stream_start_for_bom_read = (rv == 0);
+ pti->at_stream_start_for_bom_write = (rv == 0);
+
+ scm_i_clear_pending_eof (fd_port);
+
return scm_from_off_t_or_off64_t (rv);
}
else /* file descriptor?. */
SCM_DEFINE (scm_truncate_file, "truncate-file", 1, 1, 0,
(SCM object, SCM length),
- "Truncate @var{file} to @var{length} bytes. @var{file} can be a\n"
- "filename string, a port object, or an integer file descriptor.\n"
+ "Truncate file @var{object} to @var{length} bytes. @var{object}\n"
+ "can be a filename string, a port object, or an integer file\n"
+ "descriptor.\n"
"The return value is unspecified.\n"
"\n"
"For a port or file descriptor @var{length} can be omitted, in\n"
off_t_or_off64_t c_length = scm_to_off_t_or_off64_t (length);
scm_t_port *pt = SCM_PTAB_ENTRY (object);
scm_t_ptob_descriptor *ptob = scm_ptobs + SCM_PTOBNUM (object);
-
+
if (!ptob->truncate)
SCM_MISC_ERROR ("port is not truncatable", SCM_EOL);
+
+ scm_i_clear_pending_eof (object);
if (pt->rw_active == SCM_PORT_READ)
scm_end_input (object);
else if (pt->rw_active == SCM_PORT_WRITE)
ptob->flush (object);
-
+
ptob->truncate (object, c_length);
rv = 0;
}
SCM_DEFINE (scm_port_filename, "port-filename", 1, 0, 0,
(SCM port),
- "Return the filename associated with @var{port}. This function returns\n"
- "the strings \"standard input\", \"standard output\" and \"standard error\"\n"
- "when called on the current input, output and error ports respectively.")
+ "Return the filename associated with @var{port}, or @code{#f}\n"
+ "if no filename is associated with the port.")
#define FUNC_NAME s_scm_port_filename
{
port = SCM_COERCE_OUTPORT (port);
}
#undef FUNC_NAME
-/* The default port encoding for this locale. New ports will have this
- encoding. If it is a string, that is the encoding. If it #f, it
- is in the native (Latin-1) encoding. */
-SCM_GLOBAL_VARIABLE (scm_port_encoding_var, "%port-encoding");
+/* A fluid specifying the default encoding for newly created ports. If it is
+ a string, that is the encoding. If it is #f, it is in the "native"
+ (Latin-1) encoding. */
+SCM_VARIABLE (default_port_encoding_var, "%default-port-encoding");
+
static int scm_port_encoding_init = 0;
-/* Return a C string representation of the current encoding. */
+/* Use ENCODING as the default encoding for future ports. */
+void
+scm_i_set_default_port_encoding (const char *encoding)
+{
+ if (!scm_port_encoding_init
+ || !scm_is_fluid (SCM_VARIABLE_REF (default_port_encoding_var)))
+ scm_misc_error (NULL, "tried to set port encoding fluid before it is initialized",
+ SCM_EOL);
+
+ if (encoding == NULL
+ || c_strcasecmp (encoding, "ASCII") == 0
+ || c_strcasecmp (encoding, "ANSI_X3.4-1968") == 0
+ || c_strcasecmp (encoding, "ISO-8859-1") == 0)
+ scm_fluid_set_x (SCM_VARIABLE_REF (default_port_encoding_var), SCM_BOOL_F);
+ else
+ scm_fluid_set_x (SCM_VARIABLE_REF (default_port_encoding_var),
+ scm_from_locale_string (encoding));
+}
+
+/* Return the name of the default encoding for newly created ports; a
+ return value of NULL means "ISO-8859-1". */
const char *
-scm_i_get_port_encoding (SCM port)
+scm_i_default_port_encoding (void)
{
- SCM encoding;
-
- if (scm_is_false (port))
+ if (!scm_port_encoding_init)
+ return NULL;
+ else if (!scm_is_fluid (SCM_VARIABLE_REF (default_port_encoding_var)))
+ return NULL;
+ else
{
- if (!scm_port_encoding_init)
- return NULL;
- else if (!scm_is_fluid (SCM_VARIABLE_REF (scm_port_encoding_var)))
+ SCM encoding;
+
+ encoding = scm_fluid_ref (SCM_VARIABLE_REF (default_port_encoding_var));
+ if (!scm_is_string (encoding))
return NULL;
else
- {
- encoding = scm_fluid_ref (SCM_VARIABLE_REF (scm_port_encoding_var));
- if (!scm_is_string (encoding))
- return NULL;
- else
- return scm_i_string_chars (encoding);
- }
+ return scm_i_string_chars (encoding);
}
- else
+}
+
+/* If the next LEN bytes from PORT are equal to those in BYTES, then
+ return 1, else return 0. Leave the port position unchanged. */
+static int
+looking_at_bytes (SCM port, const unsigned char *bytes, int len)
+{
+ scm_t_port *pt = SCM_PTAB_ENTRY (port);
+ int i = 0;
+
+ while (i < len && scm_peek_byte_or_eof (port) == bytes[i])
{
- scm_t_port *pt;
- pt = SCM_PTAB_ENTRY (port);
- if (pt->encoding)
- return pt->encoding;
- else
- return NULL;
+ pt->read_pos++;
+ i++;
}
+ scm_i_unget_bytes (bytes, i, port);
+ return (i == len);
+}
+
+static const unsigned char scm_utf8_bom[3] = {0xEF, 0xBB, 0xBF};
+static const unsigned char scm_utf16be_bom[2] = {0xFE, 0xFF};
+static const unsigned char scm_utf16le_bom[2] = {0xFF, 0xFE};
+static const unsigned char scm_utf32be_bom[4] = {0x00, 0x00, 0xFE, 0xFF};
+static const unsigned char scm_utf32le_bom[4] = {0xFF, 0xFE, 0x00, 0x00};
+
+/* Decide what byte order to use for a UTF-16 port. Return "UTF-16BE"
+ or "UTF-16LE". MODE must be either SCM_PORT_READ or SCM_PORT_WRITE,
+ and specifies which operation is about to be done. The MODE
+ determines how we will decide the byte order. We deliberately avoid
+ reading from the port unless the user is about to do so. If the user
+ is about to read, then we look for a BOM, and if present, we use it
+ to determine the byte order. Otherwise we choose big endian, as
+ recommended by the Unicode Standard. Note that the BOM (if any) is
+ not consumed here. */
+static const char *
+decide_utf16_encoding (SCM port, scm_t_port_rw_active mode)
+{
+ if (mode == SCM_PORT_READ
+ && SCM_PORT_GET_INTERNAL (port)->at_stream_start_for_bom_read
+ && looking_at_bytes (port, scm_utf16le_bom, sizeof scm_utf16le_bom))
+ return "UTF-16LE";
+ else
+ return "UTF-16BE";
}
-/* Returns ENC is if is a recognized encoding. If it isn't, it tries
- to find an alias of ENC that is valid. Otherwise, it returns
- NULL. */
+/* Decide what byte order to use for a UTF-32 port. Return "UTF-32BE"
+ or "UTF-32LE". See the comment above 'decide_utf16_encoding' for
+ details. */
static const char *
-find_valid_encoding (const char *enc)
+decide_utf32_encoding (SCM port, scm_t_port_rw_active mode)
{
- int isvalid = 0;
- const char str[] = " ";
- scm_t_uint32 *u32;
- size_t u32len;
-
- u32 = u32_conv_from_encoding (enc, iconveh_error, str, 1,
- NULL, NULL, &u32len);
- isvalid = (u32 != NULL);
- free (u32);
-
- if (isvalid)
- return enc;
+ if (mode == SCM_PORT_READ
+ && SCM_PORT_GET_INTERNAL (port)->at_stream_start_for_bom_read
+ && looking_at_bytes (port, scm_utf32le_bom, sizeof scm_utf32le_bom))
+ return "UTF-32LE";
+ else
+ return "UTF-32BE";
+}
- return NULL;
+static void
+finalize_iconv_descriptors (void *ptr, void *data)
+{
+ close_iconv_descriptors (ptr);
}
-void
-scm_i_set_port_encoding_x (SCM port, const char *enc)
+static scm_t_iconv_descriptors *
+open_iconv_descriptors (const char *encoding, int reading, int writing)
{
- const char *valid_enc;
- scm_t_port *pt;
+ scm_t_iconv_descriptors *id;
+ iconv_t input_cd, output_cd;
- /* Null is shorthand for the native, Latin-1 encoding. */
- if (enc == NULL)
- valid_enc = NULL;
- else
+ input_cd = (iconv_t) -1;
+ output_cd = (iconv_t) -1;
+ if (reading)
+ {
+ /* Open an input iconv conversion descriptor, from ENCODING
+ to UTF-8. We choose UTF-8, not UTF-32, because iconv
+ implementations can typically convert from anything to
+ UTF-8, but not to UTF-32 (see
+ <http://lists.gnu.org/archive/html/bug-libunistring/2010-09/msg00007.html>). */
+
+ /* Assume opening an iconv descriptor causes about 16 KB of
+ allocation. */
+ scm_gc_register_allocation (16 * 1024);
+
+ input_cd = iconv_open ("UTF-8", encoding);
+ if (input_cd == (iconv_t) -1)
+ goto invalid_encoding;
+ }
+
+ if (writing)
{
- valid_enc = find_valid_encoding (enc);
- if (valid_enc == NULL)
+ /* Assume opening an iconv descriptor causes about 16 KB of
+ allocation. */
+ scm_gc_register_allocation (16 * 1024);
+
+ output_cd = iconv_open (encoding, "UTF-8");
+ if (output_cd == (iconv_t) -1)
{
- SCM err;
- err = scm_from_locale_string (enc);
- scm_misc_error (NULL, "invalid or unknown character encoding ~s",
- scm_list_1 (err));
+ if (input_cd != (iconv_t) -1)
+ iconv_close (input_cd);
+ goto invalid_encoding;
}
}
- if (scm_is_false (port))
+ id = scm_gc_malloc_pointerless (sizeof (*id), "iconv descriptors");
+ id->input_cd = input_cd;
+ id->output_cd = output_cd;
+
+ /* Register a finalizer to close the descriptors. */
+ scm_i_set_finalizer (id, finalize_iconv_descriptors, NULL);
+
+ return id;
+
+ invalid_encoding:
+ {
+ SCM err;
+ err = scm_from_locale_string (encoding);
+ scm_misc_error ("open_iconv_descriptors",
+ "invalid or unknown character encoding ~s",
+ scm_list_1 (err));
+ }
+}
+
+static void
+close_iconv_descriptors (scm_t_iconv_descriptors *id)
+{
+ if (id->input_cd != (iconv_t) -1)
+ iconv_close (id->input_cd);
+ if (id->output_cd != (iconv_t) -1)
+ iconv_close (id->output_cd);
+ id->input_cd = (void *) -1;
+ id->output_cd = (void *) -1;
+}
+
+/* Return the iconv_descriptors, initializing them if necessary. MODE
+ must be either SCM_PORT_READ or SCM_PORT_WRITE, and specifies which
+ operation is about to be done. We deliberately avoid reading from
+ the port unless the user was about to do so. */
+scm_t_iconv_descriptors *
+scm_i_port_iconv_descriptors (SCM port, scm_t_port_rw_active mode)
+{
+ scm_t_port_internal *pti = SCM_PORT_GET_INTERNAL (port);
+
+ assert (pti->encoding_mode == SCM_PORT_ENCODING_MODE_ICONV);
+
+ if (!pti->iconv_descriptors)
{
- /* Set the default encoding for future ports. */
- if (!scm_port_encoding_init
- || !scm_is_fluid (SCM_VARIABLE_REF (scm_port_encoding_var)))
- scm_misc_error (NULL, "tried to set port encoding fluid before it is initialized",
- SCM_EOL);
-
- if (valid_enc == NULL
- || !strcmp (valid_enc, "ASCII")
- || !strcmp (valid_enc, "ANSI_X3.4-1968")
- || !strcmp (valid_enc, "ISO-8859-1"))
- scm_fluid_set_x (SCM_VARIABLE_REF (scm_port_encoding_var), SCM_BOOL_F);
+ scm_t_port *pt = SCM_PTAB_ENTRY (port);
+ const char *precise_encoding;
+
+ if (!pt->encoding)
+ pt->encoding = "ISO-8859-1";
+
+ /* If the specified encoding is UTF-16 or UTF-32, then make
+ that more precise by deciding what byte order to use. */
+ if (c_strcasecmp (pt->encoding, "UTF-16") == 0)
+ precise_encoding = decide_utf16_encoding (port, mode);
+ else if (c_strcasecmp (pt->encoding, "UTF-32") == 0)
+ precise_encoding = decide_utf32_encoding (port, mode);
else
- scm_fluid_set_x (SCM_VARIABLE_REF (scm_port_encoding_var),
- scm_from_locale_string (valid_enc));
+ precise_encoding = pt->encoding;
+
+ pti->iconv_descriptors =
+ open_iconv_descriptors (precise_encoding,
+ SCM_INPUT_PORT_P (port),
+ SCM_OUTPUT_PORT_P (port));
}
+
+ return pti->iconv_descriptors;
+}
+
+void
+scm_i_set_port_encoding_x (SCM port, const char *encoding)
+{
+ scm_t_port *pt;
+ scm_t_port_internal *pti;
+ scm_t_iconv_descriptors *prev;
+
+ /* Set the character encoding for this port. */
+ pt = SCM_PTAB_ENTRY (port);
+ pti = SCM_PORT_GET_INTERNAL (port);
+ prev = pti->iconv_descriptors;
+
+ /* In order to handle cases where the encoding changes mid-stream
+ (e.g. within an HTTP stream, or within a file that is composed of
+ segments with different encodings), we consider this to be "stream
+ start" for purposes of BOM handling, regardless of our actual file
+ position. */
+ pti->at_stream_start_for_bom_read = 1;
+ pti->at_stream_start_for_bom_write = 1;
+
+ if (encoding == NULL)
+ encoding = "ISO-8859-1";
+
+ /* If ENCODING is UTF-8, then no conversion descriptor is opened
+ because we do I/O ourselves. This saves 100+ KiB for each
+ descriptor. */
+ pt->encoding = scm_gc_strdup (encoding, "port");
+ if (c_strcasecmp (encoding, "UTF-8") == 0)
+ pti->encoding_mode = SCM_PORT_ENCODING_MODE_UTF8;
else
- {
- /* Set the character encoding for this port. */
- pt = SCM_PTAB_ENTRY (port);
- if (pt->encoding)
- free (pt->encoding);
- if (valid_enc == NULL)
- pt->encoding = NULL;
- else
- pt->encoding = strdup (valid_enc);
- }
+ pti->encoding_mode = SCM_PORT_ENCODING_MODE_ICONV;
+
+ pti->iconv_descriptors = NULL;
+ if (prev)
+ close_iconv_descriptors (prev);
}
SCM_DEFINE (scm_port_encoding, "port-encoding", 1, 0, 0,
SCM_VALIDATE_PORT (1, port);
pt = SCM_PTAB_ENTRY (port);
- enc = scm_i_get_port_encoding (port);
+ enc = pt->encoding;
if (enc)
return scm_from_locale_string (pt->encoding);
else
- return scm_from_locale_string ("NONE");
+ return SCM_BOOL_F;
}
#undef FUNC_NAME
-
+
SCM_DEFINE (scm_set_port_encoding_x, "set-port-encoding!", 2, 0, 0,
(SCM port, SCM enc),
"Sets the character encoding that will be used to interpret all\n"
"appropriate for the current locale if @code{setlocale} has \n"
"been called or ISO-8859-1 otherwise\n"
"and this procedure can be used to modify that encoding.\n")
-
#define FUNC_NAME s_scm_set_port_encoding_x
{
char *enc_str;
- const char *valid_enc_str;
SCM_VALIDATE_PORT (1, port);
SCM_VALIDATE_STRING (2, enc);
enc_str = scm_to_locale_string (enc);
- valid_enc_str = find_valid_encoding (enc_str);
- if (valid_enc_str == NULL)
- {
- free (enc_str);
- scm_misc_error (FUNC_NAME, "invalid or unknown character encoding ~s",
- scm_list_1 (enc));
- }
- else
- {
- scm_i_set_port_encoding_x (port, valid_enc_str);
- free (enc_str);
- }
+ scm_i_set_port_encoding_x (port, enc_str);
+ free (enc_str);
+
return SCM_UNSPECIFIED;
}
#undef FUNC_NAME
-/* This determines how conversions handle unconvertible characters. */
-SCM_GLOBAL_VARIABLE (scm_conversion_strategy, "%port-conversion-strategy");
+/* A fluid specifying the default conversion handler for newly created
+ ports. Its value should be one of the symbols below. */
+SCM_VARIABLE (default_conversion_strategy_var,
+ "%default-port-conversion-strategy");
+
+/* Whether the above fluid is initialized. */
static int scm_conversion_strategy_init = 0;
+/* The possible conversion strategies. */
+SCM_SYMBOL (sym_error, "error");
+SCM_SYMBOL (sym_substitute, "substitute");
+SCM_SYMBOL (sym_escape, "escape");
+
+/* Return the default failed encoding conversion policy for new created
+ ports. */
scm_t_string_failed_conversion_handler
-scm_i_get_conversion_strategy (SCM port)
+scm_i_default_port_conversion_handler (void)
{
- SCM encoding;
-
- if (scm_is_false (port))
- {
- if (!scm_conversion_strategy_init
- || !scm_is_fluid (SCM_VARIABLE_REF (scm_conversion_strategy)))
- return SCM_FAILED_CONVERSION_QUESTION_MARK;
- else
- {
- encoding = scm_fluid_ref (SCM_VARIABLE_REF (scm_conversion_strategy));
- if (scm_is_false (encoding))
- return SCM_FAILED_CONVERSION_QUESTION_MARK;
- else
- return (scm_t_string_failed_conversion_handler) scm_to_int (encoding);
- }
- }
+ scm_t_string_failed_conversion_handler handler;
+
+ if (!scm_conversion_strategy_init
+ || !scm_is_fluid (SCM_VARIABLE_REF (default_conversion_strategy_var)))
+ handler = SCM_FAILED_CONVERSION_QUESTION_MARK;
else
{
- scm_t_port *pt;
- pt = SCM_PTAB_ENTRY (port);
- return pt->ilseq_handler;
+ SCM fluid, value;
+
+ fluid = SCM_VARIABLE_REF (default_conversion_strategy_var);
+ value = scm_fluid_ref (fluid);
+
+ if (scm_is_eq (sym_substitute, value))
+ handler = SCM_FAILED_CONVERSION_QUESTION_MARK;
+ else if (scm_is_eq (sym_escape, value))
+ handler = SCM_FAILED_CONVERSION_ESCAPE_SEQUENCE;
+ else
+ /* Default to 'error also when the fluid's value is not one of
+ the valid symbols. */
+ handler = SCM_FAILED_CONVERSION_ERROR;
}
-
+
+ return handler;
}
+/* Use HANDLER as the default conversion strategy for future ports. */
void
-scm_i_set_conversion_strategy_x (SCM port,
- scm_t_string_failed_conversion_handler handler)
+scm_i_set_default_port_conversion_handler (scm_t_string_failed_conversion_handler
+ handler)
{
SCM strategy;
- scm_t_port *pt;
-
- strategy = scm_from_int ((int) handler);
-
- if (scm_is_false (port))
- {
- /* Set the default encoding for future ports. */
- if (!scm_conversion_strategy
- || !scm_is_fluid (SCM_VARIABLE_REF (scm_conversion_strategy)))
- scm_misc_error (NULL, "tried to set conversion strategy fluid before it is initialized",
- SCM_EOL);
- scm_fluid_set_x (SCM_VARIABLE_REF (scm_conversion_strategy), strategy);
- }
- else
+
+ if (!scm_conversion_strategy_init
+ || !scm_is_fluid (SCM_VARIABLE_REF (default_conversion_strategy_var)))
+ scm_misc_error (NULL, "tried to set conversion strategy fluid before it is initialized",
+ SCM_EOL);
+
+ switch (handler)
{
- /* Set the character encoding for this port. */
- pt = SCM_PTAB_ENTRY (port);
- pt->ilseq_handler = handler;
+ case SCM_FAILED_CONVERSION_ERROR:
+ strategy = sym_error;
+ break;
+
+ case SCM_FAILED_CONVERSION_ESCAPE_SEQUENCE:
+ strategy = sym_escape;
+ break;
+
+ case SCM_FAILED_CONVERSION_QUESTION_MARK:
+ strategy = sym_substitute;
+ break;
+
+ default:
+ abort ();
}
+
+ scm_fluid_set_x (SCM_VARIABLE_REF (default_conversion_strategy_var),
+ strategy);
}
SCM_DEFINE (scm_port_conversion_strategy, "port-conversion-strategy",
{
scm_t_string_failed_conversion_handler h;
- SCM_VALIDATE_OPPORT (1, port);
-
- if (!scm_is_false (port))
+ if (scm_is_false (port))
+ h = scm_i_default_port_conversion_handler ();
+ else
{
+ scm_t_port *pt;
+
SCM_VALIDATE_OPPORT (1, port);
+ pt = SCM_PTAB_ENTRY (port);
+
+ h = pt->ilseq_handler;
}
- h = scm_i_get_conversion_strategy (port);
if (h == SCM_FAILED_CONVERSION_ERROR)
- return scm_from_locale_symbol ("error");
+ return scm_from_latin1_symbol ("error");
else if (h == SCM_FAILED_CONVERSION_QUESTION_MARK)
- return scm_from_locale_symbol ("substitute");
+ return scm_from_latin1_symbol ("substitute");
else if (h == SCM_FAILED_CONVERSION_ESCAPE_SEQUENCE)
- return scm_from_locale_symbol ("escape");
+ return scm_from_latin1_symbol ("escape");
else
abort ();
"this thread.\n")
#define FUNC_NAME s_scm_set_port_conversion_strategy_x
{
- SCM err;
- SCM qm;
- SCM esc;
+ scm_t_string_failed_conversion_handler handler;
- if (!scm_is_false (port))
- {
- SCM_VALIDATE_OPPORT (1, port);
- }
-
- err = scm_from_locale_symbol ("error");
- if (scm_is_true (scm_eqv_p (sym, err)))
- {
- scm_i_set_conversion_strategy_x (port, SCM_FAILED_CONVERSION_ERROR);
- return SCM_UNSPECIFIED;
- }
-
- qm = scm_from_locale_symbol ("substitute");
- if (scm_is_true (scm_eqv_p (sym, qm)))
- {
- scm_i_set_conversion_strategy_x (port,
- SCM_FAILED_CONVERSION_QUESTION_MARK);
- return SCM_UNSPECIFIED;
- }
+ if (scm_is_eq (sym, sym_error))
+ handler = SCM_FAILED_CONVERSION_ERROR;
+ else if (scm_is_eq (sym, sym_substitute))
+ handler = SCM_FAILED_CONVERSION_QUESTION_MARK;
+ else if (scm_is_eq (sym, sym_escape))
+ handler = SCM_FAILED_CONVERSION_ESCAPE_SEQUENCE;
+ else
+ SCM_MISC_ERROR ("unknown conversion strategy ~s", scm_list_1 (sym));
- esc = scm_from_locale_symbol ("escape");
- if (scm_is_true (scm_eqv_p (sym, esc)))
+ if (scm_is_false (port))
+ scm_i_set_default_port_conversion_handler (handler);
+ else
{
- scm_i_set_conversion_strategy_x (port,
- SCM_FAILED_CONVERSION_ESCAPE_SEQUENCE);
- return SCM_UNSPECIFIED;
+ SCM_VALIDATE_OPPORT (1, port);
+ SCM_PTAB_ENTRY (port)->ilseq_handler = handler;
}
- SCM_MISC_ERROR ("unknown conversion behavior ~s", scm_list_1 (sym));
-
return SCM_UNSPECIFIED;
}
#undef FUNC_NAME
return 1;
}
-void
-scm_ports_prehistory ()
-{
- scm_numptob = 0;
- scm_ptobs = (scm_t_ptob_descriptor *) scm_malloc (sizeof (scm_t_ptob_descriptor));
-}
-
\f
/* Void ports. */
scm_tc16_void_port = scm_make_port_type ("void", fill_input_void_port,
write_void_port);
- cur_inport_fluid = scm_permanent_object (scm_make_fluid ());
- cur_outport_fluid = scm_permanent_object (scm_make_fluid ());
- cur_errport_fluid = scm_permanent_object (scm_make_fluid ());
- cur_loadport_fluid = scm_permanent_object (scm_make_fluid ());
+ cur_inport_fluid = scm_make_fluid ();
+ cur_outport_fluid = scm_make_fluid ();
+ cur_errport_fluid = scm_make_fluid ();
+ cur_loadport_fluid = scm_make_fluid ();
+
+ scm_i_port_weak_hash = scm_make_weak_key_hash_table (SCM_I_MAKINUM(31));
- scm_i_port_weak_hash = scm_permanent_object (scm_make_weak_key_hash_table (SCM_I_MAKINUM(31)));
#include "libguile/ports.x"
- SCM_VARIABLE_SET (scm_port_encoding_var, scm_make_fluid ());
- scm_fluid_set_x (SCM_VARIABLE_REF (scm_port_encoding_var), SCM_BOOL_F);
+ /* Use Latin-1 as the default port encoding. */
+ SCM_VARIABLE_SET (default_port_encoding_var,
+ scm_make_fluid_with_default (SCM_BOOL_F));
scm_port_encoding_init = 1;
-
- SCM_VARIABLE_SET (scm_conversion_strategy, scm_make_fluid ());
- scm_fluid_set_x (SCM_VARIABLE_REF (scm_conversion_strategy),
- scm_from_int ((int) SCM_FAILED_CONVERSION_QUESTION_MARK));
+
+ SCM_VARIABLE_SET (default_conversion_strategy_var,
+ scm_make_fluid_with_default (sym_substitute));
scm_conversion_strategy_init = 1;
-
+
+ /* These bindings are used when boot-9 turns `current-input-port' et
+ al into parameters. They are then removed from the guile module. */
+ scm_c_define ("%current-input-port-fluid", cur_inport_fluid);
+ scm_c_define ("%current-output-port-fluid", cur_outport_fluid);
+ scm_c_define ("%current-error-port-fluid", cur_errport_fluid);
}
/*