#include "libguile/validate.h"
#include "libguile/ports.h"
#include "libguile/vectors.h"
-#include "libguile/weaks.h"
+#include "libguile/weak-set.h"
#include "libguile/fluids.h"
#include "libguile/eq.h"
\f
/* Standard ports --- current input, output, error, and more(!). */
-static SCM cur_inport_fluid = 0;
-static SCM cur_outport_fluid = 0;
-static SCM cur_errport_fluid = 0;
-static SCM cur_loadport_fluid = 0;
+static SCM cur_inport_fluid = SCM_BOOL_F;
+static SCM cur_outport_fluid = SCM_BOOL_F;
+static SCM cur_errport_fluid = SCM_BOOL_F;
+static SCM cur_loadport_fluid = SCM_BOOL_F;
SCM_DEFINE (scm_current_input_port, "current-input-port", 0, 0, 0,
(),
"returns the @dfn{standard input} in Unix and C terminology.")
#define FUNC_NAME s_scm_current_input_port
{
- if (cur_inport_fluid)
+ if (scm_is_true (cur_inport_fluid))
return scm_fluid_ref (cur_inport_fluid);
else
return SCM_BOOL_F;
"Unix and C terminology.")
#define FUNC_NAME s_scm_current_output_port
{
- if (cur_outport_fluid)
+ if (scm_is_true (cur_outport_fluid))
return scm_fluid_ref (cur_outport_fluid);
else
return SCM_BOOL_F;
"@dfn{standard error} in Unix and C terminology).")
#define FUNC_NAME s_scm_current_error_port
{
- if (cur_errport_fluid)
+ if (scm_is_true (cur_errport_fluid))
return scm_fluid_ref (cur_errport_fluid);
else
return SCM_BOOL_F;
We need a global registry of ports to flush them all at exit, and to
get all the ports matching a file descriptor.
*/
-SCM scm_i_port_weak_hash;
-
-scm_i_pthread_mutex_t scm_i_port_table_mutex = SCM_I_PTHREAD_MUTEX_INITIALIZER;
+SCM scm_i_port_weak_set;
\f
/* Port finalization. */
/* Register a finalizer for PORT so that its iconv CDs get freed and
optionally its type's `free' function gets called. */
- GC_REGISTER_FINALIZER_NO_ORDER (SCM2PTR (port), finalize_port, 0,
+ GC_REGISTER_FINALIZER_NO_ORDER (SCM_HEAP_OBJECT_BASE (port),
+ finalize_port, 0,
&prev_finalizer,
&prev_finalization_data);
}
finalize_port (GC_PTR ptr, GC_PTR data)
{
long port_type;
- SCM port = PTR2SCM (ptr);
+ SCM port = SCM_PACK_POINTER (ptr);
if (!SCM_PORTP (port))
abort ();
\f
-/* This function is not and should not be thread safe. */
SCM
-scm_new_port_table_entry (scm_t_bits tag)
-#define FUNC_NAME "scm_new_port_table_entry"
+scm_c_make_port_with_encoding (scm_t_bits tag, unsigned long mode_bits,
+ const char *encoding,
+ scm_t_string_failed_conversion_handler handler,
+ scm_t_bits stream)
{
- /*
- We initialize the cell to empty, this is in case scm_gc_calloc
- triggers GC ; we don't want the GC to scan a half-finished Z.
- */
-
- SCM z = scm_cons (SCM_EOL, SCM_EOL);
- scm_t_port *entry = (scm_t_port *) scm_gc_calloc (sizeof (scm_t_port), "port");
- const char *enc;
+ SCM ret;
+ scm_t_port *entry;
- entry->file_name = SCM_BOOL_F;
- entry->rw_active = SCM_PORT_NEITHER;
- entry->port = z;
+ entry = (scm_t_port *) scm_gc_calloc (sizeof (scm_t_port), "port");
+ ret = scm_cell (tag | mode_bits, (scm_t_bits)entry);
- /* Initialize this port with the thread's current default
- encoding. */
- enc = scm_i_default_port_encoding ();
- entry->encoding = enc ? scm_gc_strdup (enc, "port") : NULL;
+#if SCM_USE_PTHREAD_THREADS
+ scm_i_pthread_mutex_init (&entry->lock, scm_i_pthread_mutexattr_recursive);
+#endif
+ entry->file_name = SCM_BOOL_F;
+ entry->rw_active = SCM_PORT_NEITHER;
+ entry->port = ret;
+ entry->stream = stream;
+ entry->encoding = encoding ? scm_gc_strdup (encoding, "port") : NULL;
/* The conversion descriptors will be opened lazily. */
entry->input_cd = (iconv_t) -1;
entry->output_cd = (iconv_t) -1;
+ entry->ilseq_handler = handler;
- entry->ilseq_handler = scm_i_get_conversion_strategy (SCM_BOOL_F);
-
- SCM_SET_CELL_TYPE (z, tag);
- SCM_SETPTAB_ENTRY (z, entry);
-
- scm_hashq_set_x (scm_i_port_weak_hash, z, SCM_BOOL_F);
+ scm_weak_set_add_x (scm_i_port_weak_set, ret);
/* For each new port, register a finalizer so that it port type's free
function can be invoked eventually. */
- register_finalizer_for_port (z);
+ register_finalizer_for_port (ret);
- return z;
+ return ret;
}
-#undef FUNC_NAME
-#if SCM_ENABLE_DEPRECATED==1
-scm_t_port *
-scm_add_to_port_table (SCM port)
+SCM
+scm_c_make_port (scm_t_bits tag, unsigned long mode_bits, scm_t_bits stream)
{
- SCM z;
- scm_t_port * pt;
-
- scm_c_issue_deprecation_warning ("scm_add_to_port_table is deprecated.");
-
- scm_i_pthread_mutex_lock (&scm_i_port_table_mutex);
- z = scm_new_port_table_entry (scm_tc7_port);
- pt = SCM_PTAB_ENTRY(z);
- pt->port = port;
- SCM_SETCAR (z, SCM_EOL);
- SCM_SETCDR (z, SCM_EOL);
- SCM_SETPTAB_ENTRY (port, pt);
- scm_i_pthread_mutex_unlock (&scm_i_port_table_mutex);
-
- return pt;
+ return scm_c_make_port_with_encoding (tag, mode_bits,
+ scm_i_default_port_encoding (),
+ scm_i_get_conversion_strategy (SCM_BOOL_F),
+ stream);
}
-#endif
+SCM
+scm_new_port_table_entry (scm_t_bits tag)
+{
+ return scm_c_make_port (tag, 0, 0);
+}
/* Remove a port from the table and destroy it. */
{
scm_t_port *p;
- scm_i_scm_pthread_mutex_lock (&scm_i_port_table_mutex);
-
p = SCM_PTAB_ENTRY (port);
scm_port_non_buffer (p);
+ SCM_SETPTAB_ENTRY (port, 0);
+ scm_weak_set_remove_x (scm_i_port_weak_set, port);
+
p->putback_buf = NULL;
p->putback_buf_size = 0;
iconv_close (p->output_cd);
p->output_cd = (iconv_t) -1;
}
-
- SCM_SETPTAB_ENTRY (port, 0);
-
- scm_hashq_remove_x (scm_i_port_weak_hash, port);
-
- scm_i_pthread_mutex_unlock (&scm_i_port_table_mutex);
}
#undef FUNC_NAME
-/* Functions for debugging. */
-#ifdef GUILE_DEBUG
-SCM_DEFINE (scm_pt_size, "pt-size", 0, 0, 0,
- (),
- "Return the number of ports in the port table. @code{pt-size}\n"
- "is only included in @code{--enable-guile-debug} builds.")
-#define FUNC_NAME s_scm_pt_size
-{
- return scm_from_int (SCM_HASHTABLE_N_ITEMS (scm_i_port_weak_hash));
-}
-#undef FUNC_NAME
-#endif
-
void
scm_port_non_buffer (scm_t_port *pt)
{
}
#undef FUNC_NAME
+struct for_each_data
+{
+ void (*proc) (void *data, SCM p);
+ void *data;
+};
+
static SCM
-collect_keys (void *unused, SCM key, SCM value, SCM result)
+for_each_trampoline (void *data, SCM port, SCM result)
{
- return scm_cons (key, result);
+ struct for_each_data *d = data;
+
+ d->proc (d->data, port);
+
+ return result;
}
void
scm_c_port_for_each (void (*proc)(void *data, SCM p), void *data)
{
- SCM ports;
+ struct for_each_data d;
+
+ d.proc = proc;
+ d.data = data;
- /* Copy out the port table as a list so that we get strong references
- to all the values. */
- scm_i_pthread_mutex_lock (&scm_i_port_table_mutex);
- ports = scm_internal_hash_fold (collect_keys, NULL,
- SCM_EOL, scm_i_port_weak_hash);
- scm_i_pthread_mutex_unlock (&scm_i_port_table_mutex);
+ scm_c_weak_set_fold (for_each_trampoline, &d, SCM_EOL,
+ scm_i_port_weak_set);
+}
- for (; scm_is_pair (ports); ports = scm_cdr (ports))
- {
- SCM p = scm_car (ports);
- if (SCM_PORTP (p))
- proc (data, p);
- }
+static void
+scm_for_each_trampoline (void *data, SCM port)
+{
+ scm_call_1 (SCM_PACK_POINTER (data), port);
}
SCM_DEFINE (scm_port_for_each, "port-for-each", 1, 0, 0,
{
SCM_VALIDATE_PROC (1, proc);
- scm_c_port_for_each ((void (*)(void*,SCM))scm_call_1, proc);
+ scm_c_port_for_each (scm_for_each_trampoline, SCM_UNPACK_POINTER (proc));
+
return SCM_UNSPECIFIED;
}
#undef FUNC_NAME
switch (c)
{
case '\a':
+ case EOF:
break;
case '\b':
SCM_DECCOL (port);
return codepoint;
}
-/* Read a codepoint from PORT and return it in *CODEPOINT. Fill BUF
- with the byte representation of the codepoint in PORT's encoding, and
- set *LEN to the length in bytes of that representation. Return 0 on
- success and an errno value on error. */
+/* Read a UTF-8 sequence from PORT. On success, return 0 and set
+ *CODEPOINT to the codepoint that was read, fill BUF with its UTF-8
+ representation, and set *LEN to the length in bytes. Return
+ `EILSEQ' on error. */
static int
-get_codepoint (SCM port, scm_t_wchar *codepoint,
- char buf[SCM_MBCHAR_BUF_SIZE], size_t *len)
+get_utf8_codepoint (SCM port, scm_t_wchar *codepoint,
+ scm_t_uint8 buf[SCM_MBCHAR_BUF_SIZE], size_t *len)
{
+#define ASSERT_NOT_EOF(b) \
+ if (SCM_UNLIKELY ((b) == EOF)) \
+ goto invalid_seq
+#define CONSUME_PEEKED_BYTE() \
+ pt->read_pos++
+
+ int byte;
+ scm_t_port *pt;
+
+ *len = 0;
+ pt = SCM_PTAB_ENTRY (port);
+
+ byte = scm_get_byte_or_eof (port);
+ if (byte == EOF)
+ {
+ *codepoint = EOF;
+ return 0;
+ }
+
+ buf[0] = (scm_t_uint8) byte;
+ *len = 1;
+
+ if (buf[0] <= 0x7f)
+ /* 1-byte form. */
+ *codepoint = buf[0];
+ else if (buf[0] >= 0xc2 && buf[0] <= 0xdf)
+ {
+ /* 2-byte form. */
+ byte = scm_peek_byte_or_eof (port);
+ ASSERT_NOT_EOF (byte);
+
+ if (SCM_UNLIKELY ((byte & 0xc0) != 0x80))
+ goto invalid_seq;
+
+ CONSUME_PEEKED_BYTE ();
+ buf[1] = (scm_t_uint8) byte;
+ *len = 2;
+
+ *codepoint = ((scm_t_wchar) buf[0] & 0x1f) << 6UL
+ | (buf[1] & 0x3f);
+ }
+ else if ((buf[0] & 0xf0) == 0xe0)
+ {
+ /* 3-byte form. */
+ byte = scm_peek_byte_or_eof (port);
+ ASSERT_NOT_EOF (byte);
+
+ if (SCM_UNLIKELY ((byte & 0xc0) != 0x80
+ || (buf[0] == 0xe0 && byte < 0xa0)
+ || (buf[0] == 0xed && byte > 0x9f)))
+ goto invalid_seq;
+
+ CONSUME_PEEKED_BYTE ();
+ buf[1] = (scm_t_uint8) byte;
+ *len = 2;
+
+ byte = scm_peek_byte_or_eof (port);
+ ASSERT_NOT_EOF (byte);
+
+ if (SCM_UNLIKELY ((byte & 0xc0) != 0x80))
+ goto invalid_seq;
+
+ CONSUME_PEEKED_BYTE ();
+ buf[2] = (scm_t_uint8) byte;
+ *len = 3;
+
+ *codepoint = ((scm_t_wchar) buf[0] & 0x0f) << 12UL
+ | ((scm_t_wchar) buf[1] & 0x3f) << 6UL
+ | (buf[2] & 0x3f);
+ }
+ else if (buf[0] >= 0xf0 && buf[0] <= 0xf4)
+ {
+ /* 4-byte form. */
+ byte = scm_peek_byte_or_eof (port);
+ ASSERT_NOT_EOF (byte);
+
+ if (SCM_UNLIKELY (((byte & 0xc0) != 0x80)
+ || (buf[0] == 0xf0 && byte < 0x90)
+ || (buf[0] == 0xf4 && byte > 0x8f)))
+ goto invalid_seq;
+
+ CONSUME_PEEKED_BYTE ();
+ buf[1] = (scm_t_uint8) byte;
+ *len = 2;
+
+ byte = scm_peek_byte_or_eof (port);
+ ASSERT_NOT_EOF (byte);
+
+ if (SCM_UNLIKELY ((byte & 0xc0) != 0x80))
+ goto invalid_seq;
+
+ CONSUME_PEEKED_BYTE ();
+ buf[2] = (scm_t_uint8) byte;
+ *len = 3;
+
+ byte = scm_peek_byte_or_eof (port);
+ ASSERT_NOT_EOF (byte);
+
+ if (SCM_UNLIKELY ((byte & 0xc0) != 0x80))
+ goto invalid_seq;
+
+ CONSUME_PEEKED_BYTE ();
+ buf[3] = (scm_t_uint8) byte;
+ *len = 4;
+
+ *codepoint = ((scm_t_wchar) buf[0] & 0x07) << 18UL
+ | ((scm_t_wchar) buf[1] & 0x3f) << 12UL
+ | ((scm_t_wchar) buf[2] & 0x3f) << 6UL
+ | (buf[3] & 0x3f);
+ }
+ else
+ goto invalid_seq;
+
+ return 0;
+
+ invalid_seq:
+ /* Here we could choose the consume the faulty byte when it's not a
+ valid starting byte, but it's not a requirement. What Section 3.9
+ of Unicode 6.0.0 mandates, though, is to not consume a byte that
+ would otherwise be a valid starting byte. */
+
+ return EILSEQ;
+
+#undef CONSUME_PEEKED_BYTE
+#undef ASSERT_NOT_EOF
+}
+
+/* Likewise, read a byte sequence from PORT, passing it through its
+ input conversion descriptor. */
+static int
+get_iconv_codepoint (SCM port, scm_t_wchar *codepoint,
+ char buf[SCM_MBCHAR_BUF_SIZE], size_t *len)
+{
+ scm_t_port *pt;
int err, byte_read;
size_t bytes_consumed, output_size;
char *output;
scm_t_uint8 utf8_buf[SCM_MBCHAR_BUF_SIZE];
- scm_t_port *pt = SCM_PTAB_ENTRY (port);
- if (SCM_UNLIKELY (pt->input_cd == (iconv_t) -1))
- /* Initialize the conversion descriptors. */
- scm_i_set_port_encoding_x (port, pt->encoding);
+ pt = SCM_PTAB_ENTRY (port);
for (output_size = 0, output = (char *) utf8_buf,
bytes_consumed = 0, err = 0;
if (SCM_UNLIKELY (output_size == 0))
/* An unterminated sequence. */
err = EILSEQ;
-
- if (SCM_UNLIKELY (err != 0))
+ else if (SCM_LIKELY (err == 0))
{
- /* Reset the `iconv' state. */
- iconv (pt->input_cd, NULL, NULL, NULL, NULL);
+ /* Convert the UTF8_BUF sequence to a Unicode code point. */
+ *codepoint = utf8_to_codepoint (utf8_buf, output_size);
+ *len = bytes_consumed;
+ }
- if (pt->ilseq_handler == SCM_ICONVEH_QUESTION_MARK)
- {
- *codepoint = '?';
- err = 0;
- }
+ return err;
+}
- /* Fail when the strategy is SCM_ICONVEH_ERROR or
- SCM_ICONVEH_ESCAPE_SEQUENCE (the latter doesn't make sense for
- input encoding errors.) */
- }
+/* Read a codepoint from PORT and return it in *CODEPOINT. Fill BUF
+ with the byte representation of the codepoint in PORT's encoding, and
+ set *LEN to the length in bytes of that representation. Return 0 on
+ success and an errno value on error. */
+static int
+get_codepoint (SCM port, scm_t_wchar *codepoint,
+ char buf[SCM_MBCHAR_BUF_SIZE], size_t *len)
+{
+ int err;
+ scm_t_port *pt = SCM_PTAB_ENTRY (port);
+
+ if (pt->input_cd == (iconv_t) -1)
+ /* Initialize the conversion descriptors, if needed. */
+ scm_i_set_port_encoding_x (port, pt->encoding);
+
+ /* FIXME: In 2.1, add a flag to determine whether a port is UTF-8. */
+ if (pt->input_cd == (iconv_t) -1)
+ err = get_utf8_codepoint (port, codepoint, (scm_t_uint8 *) buf, len);
else
+ err = get_iconv_codepoint (port, codepoint, buf, len);
+
+ if (SCM_LIKELY (err == 0))
+ update_port_lf (*codepoint, port);
+ else if (pt->ilseq_handler == SCM_ICONVEH_QUESTION_MARK)
{
- /* Convert the UTF8_BUF sequence to a Unicode code point. */
- *codepoint = utf8_to_codepoint (utf8_buf, output_size);
+ *codepoint = '?';
+ err = 0;
update_port_lf (*codepoint, port);
}
- *len = bytes_consumed;
-
return err;
}
if (encoding == NULL)
encoding = "ISO-8859-1";
- pt->encoding = scm_gc_strdup (encoding, "port");
+ if (pt->encoding != encoding)
+ pt->encoding = scm_gc_strdup (encoding, "port");
- if (SCM_CELL_WORD_0 (port) & SCM_RDNG)
+ /* If ENCODING is UTF-8, then no conversion descriptor is opened
+ because we do I/O ourselves. This saves 100+ KiB for each
+ descriptor. */
+ if (strcmp (encoding, "UTF-8"))
{
- /* Open an input iconv conversion descriptor, from ENCODING
- to UTF-8. We choose UTF-8, not UTF-32, because iconv
- implementations can typically convert from anything to
- UTF-8, but not to UTF-32 (see
- <http://lists.gnu.org/archive/html/bug-libunistring/2010-09/msg00007.html>). */
- new_input_cd = iconv_open ("UTF-8", encoding);
- if (new_input_cd == (iconv_t) -1)
- goto invalid_encoding;
- }
+ if (SCM_CELL_WORD_0 (port) & SCM_RDNG)
+ {
+ /* Open an input iconv conversion descriptor, from ENCODING
+ to UTF-8. We choose UTF-8, not UTF-32, because iconv
+ implementations can typically convert from anything to
+ UTF-8, but not to UTF-32 (see
+ <http://lists.gnu.org/archive/html/bug-libunistring/2010-09/msg00007.html>). */
+ new_input_cd = iconv_open ("UTF-8", encoding);
+ if (new_input_cd == (iconv_t) -1)
+ goto invalid_encoding;
+ }
- if (SCM_CELL_WORD_0 (port) & SCM_WRTNG)
- {
- new_output_cd = iconv_open (encoding, "UTF-8");
- if (new_output_cd == (iconv_t) -1)
+ if (SCM_CELL_WORD_0 (port) & SCM_WRTNG)
{
- if (new_input_cd != (iconv_t) -1)
- iconv_close (new_input_cd);
- goto invalid_encoding;
+ new_output_cd = iconv_open (encoding, "UTF-8");
+ if (new_output_cd == (iconv_t) -1)
+ {
+ if (new_input_cd != (iconv_t) -1)
+ iconv_close (new_input_cd);
+ goto invalid_encoding;
+ }
}
}
if (scm_is_false (port))
{
/* Set the default encoding for future ports. */
- if (!scm_conversion_strategy
+ if (!scm_conversion_strategy_init
|| !scm_is_fluid (SCM_VARIABLE_REF (scm_conversion_strategy)))
scm_misc_error (NULL, "tried to set conversion strategy fluid before it is initialized",
SCM_EOL);
static SCM
scm_i_void_port (long mode_bits)
{
- scm_i_scm_pthread_mutex_lock (&scm_i_port_table_mutex);
- {
- SCM answer = scm_new_port_table_entry (scm_tc16_void_port);
- scm_t_port * pt = SCM_PTAB_ENTRY(answer);
+ SCM ret;
- scm_port_non_buffer (pt);
+ ret = scm_c_make_port (scm_tc16_void_port, mode_bits, 0);
+
+ scm_port_non_buffer (SCM_PTAB_ENTRY (ret));
- SCM_SETSTREAM (answer, 0);
- SCM_SET_CELL_TYPE (answer, scm_tc16_void_port | mode_bits);
- scm_i_pthread_mutex_unlock (&scm_i_port_table_mutex);
- return answer;
- }
+ return ret;
}
SCM
cur_errport_fluid = scm_make_fluid ();
cur_loadport_fluid = scm_make_fluid ();
- scm_i_port_weak_hash = scm_make_weak_key_hash_table (SCM_I_MAKINUM(31));
+ scm_i_port_weak_set = scm_c_make_weak_set (31);
#include "libguile/ports.x"