-/* Copyright (C) 1995,1996,1997,1998,1999,2000,2001, 2003, 2004, 2006, 2007, 2008, 2009 Free Software Foundation, Inc.
- *
+/* Copyright (C) 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2003, 2004,
+ * 2006, 2007, 2008, 2009, 2010, 2011 Free Software Foundation, Inc.
+ *
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public License
* as published by the Free Software Foundation; either version 3 of
#include <errno.h>
#include <fcntl.h> /* for chsize on mingw */
#include <assert.h>
+#include <iconv.h>
#include <uniconv.h>
#include <unistr.h>
#include <striconveh.h>
+#include <assert.h>
+
#include "libguile/_scm.h"
#include "libguile/async.h"
#include "libguile/eval.h"
#include "libguile/fports.h" /* direct access for seek and truncate */
-#include "libguile/objects.h"
#include "libguile/goops.h"
#include "libguile/smob.h"
#include "libguile/chars.h"
#include <string.h>
#endif
-#ifdef HAVE_MALLOC_H
-#include <malloc.h>
-#endif
-
#ifdef HAVE_IO_H
#include <io.h>
#endif
gnulib ftruncate.c has code using fcntl F_CHSIZE and F_FREESP, which
might be possibilities if we've got other systems without ftruncate. */
-#if HAVE_CHSIZE && ! HAVE_FTRUNCATE
+#if defined HAVE_CHSIZE && ! defined HAVE_FTRUNCATE
#define ftruncate(fd, size) chsize (fd, size)
#undef HAVE_FTRUNCATE
#define HAVE_FTRUNCATE 1
* Indexes into this table are used when generating type
* tags for smobjects (if you know a tag you can get an index and conversely).
*/
-scm_t_ptob_descriptor *scm_ptobs;
-long scm_numptob;
+scm_t_ptob_descriptor *scm_ptobs = NULL;
+long scm_numptob = 0;
/* GC marker for a port with stream of SCM type. */
SCM
{
}
-static size_t
-scm_port_free0 (SCM port)
-{
- return 0;
-}
-
scm_t_bits
scm_make_port_type (char *name,
int (*fill_input) (SCM port),
if (SCM_I_MAX_PORT_TYPE_COUNT - 1 <= scm_numptob)
goto ptoberr;
SCM_CRITICAL_SECTION_START;
- SCM_SYSCALL (tmp = (char *) realloc ((char *) scm_ptobs,
- (1 + scm_numptob)
- * sizeof (scm_t_ptob_descriptor)));
+ tmp = (char *) scm_gc_realloc ((char *) scm_ptobs,
+ scm_numptob * sizeof (scm_t_ptob_descriptor),
+ (1 + scm_numptob)
+ * sizeof (scm_t_ptob_descriptor),
+ "port-type");
if (tmp)
{
scm_ptobs = (scm_t_ptob_descriptor *) tmp;
scm_ptobs[scm_numptob].name = name;
scm_ptobs[scm_numptob].mark = 0;
- scm_ptobs[scm_numptob].free = scm_port_free0;
+ scm_ptobs[scm_numptob].free = NULL;
scm_ptobs[scm_numptob].print = scm_port_print;
scm_ptobs[scm_numptob].equalp = 0;
scm_ptobs[scm_numptob].close = 0;
if (SCM_UNBNDP (port))
port = scm_current_input_port ();
- else
- SCM_VALIDATE_OPINPORT (1, port);
+ /* It's possible to close the current input port, so validate even in
+ this case. */
+ SCM_VALIDATE_OPINPORT (1, port);
pt = SCM_PTAB_ENTRY (port);
if (pt->read_buf == pt->putback_buf)
count += pt->saved_read_end - pt->saved_read_pos;
- result = scm_i_make_string (count, &data);
- scm_take_from_input_buffers (port, data, count);
+ if (count)
+ {
+ result = scm_i_make_string (count, &data);
+ scm_take_from_input_buffers (port, data, count);
+ }
+ else
+ result = scm_nullstr;
+
return result;
}
#undef FUNC_NAME
scm_i_pthread_mutex_t scm_i_port_table_mutex = SCM_I_PTHREAD_MUTEX_INITIALIZER;
-/* This function is not and should not be thread safe. */
+\f
+/* Port finalization. */
+
+
+static void finalize_port (GC_PTR, GC_PTR);
+
+/* Register a finalizer for PORT. */
+static SCM_C_INLINE_KEYWORD void
+register_finalizer_for_port (SCM port)
+{
+ long port_type;
+ GC_finalization_proc prev_finalizer;
+ GC_PTR prev_finalization_data;
+
+ port_type = SCM_TC2PTOBNUM (SCM_CELL_TYPE (port));
+
+ /* Register a finalizer for PORT so that its iconv CDs get freed and
+ optionally its type's `free' function gets called. */
+ GC_REGISTER_FINALIZER_NO_ORDER (SCM2PTR (port), finalize_port, 0,
+ &prev_finalizer,
+ &prev_finalization_data);
+}
+
+/* Finalize the object (a port) pointed to by PTR. */
+static void
+finalize_port (GC_PTR ptr, GC_PTR data)
+{
+ long port_type;
+ SCM port = PTR2SCM (ptr);
+
+ if (!SCM_PORTP (port))
+ abort ();
+
+ if (SCM_OPENP (port))
+ {
+ if (SCM_REVEALED (port) > 0)
+ /* Keep "revealed" ports alive and re-register a finalizer. */
+ register_finalizer_for_port (port);
+ else
+ {
+ scm_t_port *entry;
+ port_type = SCM_TC2PTOBNUM (SCM_CELL_TYPE (port));
+ if (port_type >= scm_numptob)
+ abort ();
+
+ if (scm_ptobs[port_type].free)
+ /* Yes, I really do mean `.free' rather than `.close'. `.close'
+ is for explicit `close-port' by user. */
+ scm_ptobs[port_type].free (port);
+
+ entry = SCM_PTAB_ENTRY (port);
+
+ if (entry->input_cd != (iconv_t) -1)
+ iconv_close (entry->input_cd);
+ if (entry->output_cd != (iconv_t) -1)
+ iconv_close (entry->output_cd);
+
+ SCM_SETSTREAM (port, 0);
+ SCM_CLR_PORT_OPEN_FLAG (port);
+
+ scm_gc_ports_collected++;
+ }
+ }
+}
+
+
+
+\f
+
+/* This function is not and should not be thread safe. */
SCM
scm_new_port_table_entry (scm_t_bits tag)
#define FUNC_NAME "scm_new_port_table_entry"
if ((enc = scm_i_get_port_encoding (SCM_BOOL_F)) == NULL)
entry->encoding = NULL;
else
- entry->encoding = strdup (enc);
+ entry->encoding = scm_gc_strdup (enc, "port");
+
+ /* The conversion descriptors will be opened lazily. */
+ entry->input_cd = (iconv_t) -1;
+ entry->output_cd = (iconv_t) -1;
+
entry->ilseq_handler = scm_i_get_conversion_strategy (SCM_BOOL_F);
SCM_SET_CELL_TYPE (z, tag);
scm_hashq_set_x (scm_i_port_weak_hash, z, SCM_BOOL_F);
+ /* For each new port, register a finalizer so that it port type's free
+ function can be invoked eventually. */
+ register_finalizer_for_port (z);
+
return z;
}
#undef FUNC_NAME
#define FUNC_NAME "scm_remove_port"
{
scm_t_port *p = SCM_PTAB_ENTRY (port);
- if (p->putback_buf)
- scm_gc_free (p->putback_buf, p->putback_buf_size, "putback buffer");
- if (p->encoding)
- {
- free (p->encoding);
- p->encoding = NULL;
- }
- scm_gc_free (p, sizeof (scm_t_port), "port");
+
+ scm_port_non_buffer (p);
+
+ p->putback_buf = NULL;
+ p->putback_buf_size = 0;
SCM_SETPTAB_ENTRY (port, 0);
scm_hashq_remove_x (scm_i_port_weak_hash, port);
(SCM port),
"Return the next character available from @var{port}, updating\n"
"@var{port} to point to the following character. If no more\n"
- "characters are available, the end-of-file object is returned.")
+ "characters are available, the end-of-file object is returned.\n"
+ "\n"
+ "When @var{port}'s data cannot be decoded according to its\n"
+ "character encoding, a @code{decoding-error} is raised and\n"
+ "@var{port} points past the erroneous byte sequence.\n")
#define FUNC_NAME s_scm_read_char
{
scm_t_wchar c;
}
#undef FUNC_NAME
-#define SCM_MBCHAR_BUF_SIZE (4)
-
-/* Get one codepoint from a file, using the port's encoding. */
-scm_t_wchar
-scm_getc (SCM port)
+/* Update the line and column number of PORT after consumption of C. */
+static inline void
+update_port_lf (scm_t_wchar c, SCM port)
{
- int c;
- unsigned int bufcount = 0;
- char buf[SCM_MBCHAR_BUF_SIZE];
- scm_t_wchar codepoint = 0;
- scm_t_uint32 *u32;
- size_t u32len;
- scm_t_port *pt = SCM_PTAB_ENTRY (port);
-
- c = scm_get_byte_or_eof (port);
- if (c == EOF)
- return (scm_t_wchar) EOF;
-
- buf[0] = c;
- bufcount++;
-
- if (pt->encoding == NULL)
- {
- /* The encoding is Latin-1: bytes are characters. */
- codepoint = buf[0];
- goto success;
- }
-
- for (;;)
- {
- u32 = u32_conv_from_encoding (pt->encoding,
- (enum iconv_ilseq_handler) pt->ilseq_handler,
- buf, bufcount, NULL, NULL, &u32len);
- if (u32 == NULL || u32len == 0)
- {
- if (errno == ENOMEM)
- scm_memory_error ("Input decoding");
-
- /* Otherwise errno is EILSEQ or EINVAL, so perhaps more
- bytes are needed. Keep looping. */
- }
- else
- {
- /* Complete codepoint found. */
- codepoint = u32[0];
- free (u32);
- goto success;
- }
-
- if (bufcount == SCM_MBCHAR_BUF_SIZE)
- {
- /* We've read several bytes and didn't find a good
- codepoint. Give up. */
- goto failure;
- }
-
- c = scm_get_byte_or_eof (port);
-
- if (c == EOF)
- {
- /* EOF before a complete character was read. Push it all
- back and return EOF. */
- while (bufcount > 0)
- {
- /* FIXME: this will probably cause errors in the port column. */
- scm_unget_byte (buf[bufcount-1], port);
- bufcount --;
- }
- return EOF;
- }
-
- if (c == '\n')
- {
- /* It is always invalid to have EOL in the middle of a
- multibyte character. */
- scm_unget_byte ('\n', port);
- goto failure;
- }
-
- buf[bufcount++] = c;
- }
-
- success:
- switch (codepoint)
+ switch (c)
{
case '\a':
break;
break;
case '\n':
SCM_INCLINE (port);
- break;
+ break;
case '\r':
SCM_ZEROCOL (port);
break;
SCM_INCCOL (port);
break;
}
+}
+
+#define SCM_MBCHAR_BUF_SIZE (4)
+
+/* Convert the SIZE-byte UTF-8 sequence in UTF8_BUF to a codepoint.
+ UTF8_BUF is assumed to contain a valid UTF-8 sequence. */
+static scm_t_wchar
+utf8_to_codepoint (const scm_t_uint8 *utf8_buf, size_t size)
+{
+ scm_t_wchar codepoint;
+
+ if (utf8_buf[0] <= 0x7f)
+ {
+ assert (size == 1);
+ codepoint = utf8_buf[0];
+ }
+ else if ((utf8_buf[0] & 0xe0) == 0xc0)
+ {
+ assert (size == 2);
+ codepoint = ((scm_t_wchar) utf8_buf[0] & 0x1f) << 6UL
+ | (utf8_buf[1] & 0x3f);
+ }
+ else if ((utf8_buf[0] & 0xf0) == 0xe0)
+ {
+ assert (size == 3);
+ codepoint = ((scm_t_wchar) utf8_buf[0] & 0x0f) << 12UL
+ | ((scm_t_wchar) utf8_buf[1] & 0x3f) << 6UL
+ | (utf8_buf[2] & 0x3f);
+ }
+ else
+ {
+ assert (size == 4);
+ codepoint = ((scm_t_wchar) utf8_buf[0] & 0x07) << 18UL
+ | ((scm_t_wchar) utf8_buf[1] & 0x3f) << 12UL
+ | ((scm_t_wchar) utf8_buf[2] & 0x3f) << 6UL
+ | (utf8_buf[3] & 0x3f);
+ }
return codepoint;
+}
- failure:
- {
- char *err_buf;
- SCM err_str = scm_i_make_string (bufcount, &err_buf);
- memcpy (err_buf, buf, bufcount);
-
- if (errno == EILSEQ)
- scm_misc_error (NULL, "input encoding error for ~s: ~s",
- scm_list_2 (scm_from_locale_string (scm_i_get_port_encoding (port)),
- err_str));
- else
- scm_misc_error (NULL, "input encoding error (invalid) for ~s: ~s\n",
- scm_list_2 (scm_from_locale_string (scm_i_get_port_encoding (port)),
- err_str));
- }
+/* Read a codepoint from PORT and return it in *CODEPOINT. Fill BUF
+ with the byte representation of the codepoint in PORT's encoding, and
+ set *LEN to the length in bytes of that representation. Return 0 on
+ success and an errno value on error. */
+static int
+get_codepoint (SCM port, scm_t_wchar *codepoint,
+ char buf[SCM_MBCHAR_BUF_SIZE], size_t *len)
+{
+ int err, byte_read;
+ size_t bytes_consumed, output_size;
+ char *output;
+ scm_t_uint8 utf8_buf[SCM_MBCHAR_BUF_SIZE];
+ scm_t_port *pt = SCM_PTAB_ENTRY (port);
+
+ if (SCM_UNLIKELY (pt->input_cd == (iconv_t) -1))
+ /* Initialize the conversion descriptors. */
+ scm_i_set_port_encoding_x (port, pt->encoding);
- /* Never gets here. */
- return 0;
+ for (output_size = 0, output = (char *) utf8_buf,
+ bytes_consumed = 0, err = 0;
+ err == 0 && output_size == 0
+ && (bytes_consumed == 0 || byte_read != EOF);
+ bytes_consumed++)
+ {
+ char *input;
+ size_t input_left, output_left, done;
+
+ byte_read = scm_get_byte_or_eof (port);
+ if (byte_read == EOF)
+ {
+ if (bytes_consumed == 0)
+ {
+ *codepoint = (scm_t_wchar) EOF;
+ *len = 0;
+ return 0;
+ }
+ else
+ continue;
+ }
+
+ buf[bytes_consumed] = byte_read;
+
+ input = buf;
+ input_left = bytes_consumed + 1;
+ output_left = sizeof (utf8_buf);
+
+ done = iconv (pt->input_cd, &input, &input_left,
+ &output, &output_left);
+ if (done == (size_t) -1)
+ {
+ err = errno;
+ if (err == EINVAL)
+ /* Missing input: keep trying. */
+ err = 0;
+ }
+ else
+ output_size = sizeof (utf8_buf) - output_left;
+ }
+
+ if (SCM_UNLIKELY (err != 0))
+ {
+ /* Reset the `iconv' state. */
+ iconv (pt->input_cd, NULL, NULL, NULL, NULL);
+
+ if (pt->ilseq_handler == SCM_ICONVEH_QUESTION_MARK)
+ {
+ *codepoint = '?';
+ err = 0;
+ }
+
+ /* Fail when the strategy is SCM_ICONVEH_ERROR or
+ SCM_ICONVEH_ESCAPE_SEQUENCE (the latter doesn't make sense for
+ input encoding errors.) */
+ }
+ else
+ /* Convert the UTF8_BUF sequence to a Unicode code point. */
+ *codepoint = utf8_to_codepoint (utf8_buf, output_size);
+
+ if (SCM_LIKELY (err == 0))
+ update_port_lf (*codepoint, port);
+
+ *len = bytes_consumed;
+
+ return err;
}
+/* Read a codepoint from PORT and return it. */
+scm_t_wchar
+scm_getc (SCM port)
+#define FUNC_NAME "scm_getc"
+{
+ int err;
+ size_t len;
+ scm_t_wchar codepoint;
+ char buf[SCM_MBCHAR_BUF_SIZE];
+
+ err = get_codepoint (port, &codepoint, buf, &len);
+ if (SCM_UNLIKELY (err != 0))
+ /* At this point PORT should point past the invalid encoding, as per
+ R6RS-lib Section 8.2.4. */
+ scm_decoding_error (FUNC_NAME, err, "input decoding error", port);
+
+ return codepoint;
+}
+#undef FUNC_NAME
/* this should only be called when the read buffer is empty. it
tries to refill the read buffer. it returns the first char from
* This function differs from scm_c_write; it updates port line and
* column. */
-static void
-update_port_lf (scm_t_wchar c, SCM port)
-{
- if (c == '\a')
- ; /* Do nothing. */
- else if (c == '\b')
- SCM_DECCOL (port);
- else if (c == '\n')
- SCM_INCLINE (port);
- else if (c == '\r')
- SCM_ZEROCOL (port);
- else if (c == '\t')
- SCM_TABCOL (port);
- else
- SCM_INCCOL (port);
-}
-
void
scm_lfwrite (const char *ptr, size_t size, SCM port)
{
pt->rw_active = SCM_PORT_WRITE;
}
-/* Write a scheme string STR to PORT from START inclusive to END
- exclusive. */
+/* Write STR to PORT from START inclusive to END exclusive. */
void
scm_lfwrite_substr (SCM str, size_t start, size_t end, SCM port)
{
- size_t i, size = scm_i_string_length (str);
- scm_t_port *pt = SCM_PTAB_ENTRY (port);
- scm_t_ptob_descriptor *ptob = &scm_ptobs[SCM_PTOBNUM (port)];
- scm_t_wchar p;
- char *buf;
- size_t len;
-
- if (pt->rw_active == SCM_PORT_READ)
- scm_end_input (port);
-
- if (end == (size_t) (-1))
- end = size;
- size = end - start;
-
- /* Note that making a substring will likely take the
- stringbuf_write_mutex. So, one shouldn't use scm_lfwrite_substr
- if the stringbuf write mutex may still be held elsewhere. */
- buf = scm_to_stringn (scm_c_substring (str, start, end), &len,
- pt->encoding, pt->ilseq_handler);
- ptob->write (port, buf, len);
- free (buf);
-
- for (i = 0; i < size; i++)
- {
- p = scm_i_string_ref (str, i + start);
- update_port_lf (p, port);
- }
-
- if (pt->rw_random)
- pt->rw_active = SCM_PORT_WRITE;
-}
-
-/* Write a scheme string STR to PORT. */
-void
-scm_lfwrite_str (SCM str, SCM port)
-{
- size_t i, size = scm_i_string_length (str);
scm_t_port *pt = SCM_PTAB_ENTRY (port);
- scm_t_ptob_descriptor *ptob = &scm_ptobs[SCM_PTOBNUM (port)];
- scm_t_wchar p;
- char *buf;
- size_t len;
if (pt->rw_active == SCM_PORT_READ)
scm_end_input (port);
- buf = scm_to_stringn (str, &len,
- pt->encoding, pt->ilseq_handler);
- ptob->write (port, buf, len);
- free (buf);
+ if (end == (size_t) -1)
+ end = scm_i_string_length (str);
- for (i = 0; i < size; i++)
- {
- p = scm_i_string_ref (str, i);
- update_port_lf (p, port);
- }
+ scm_display (scm_c_substring (str, start, end), port);
if (pt->rw_random)
pt->rw_active = SCM_PORT_WRITE;
requested number of bytes. (Note that a single scm_fill_input
call does not guarantee to fill the whole of the port's read
buffer.) */
- if (pt->read_buf_size <= 1)
+ if (pt->read_buf_size <= 1 && pt->encoding == NULL)
{
/* The port that we are reading from is unbuffered - i.e. does
not have its own persistent buffer - but we have a buffer,
We need to make sure that the port's normal (1 byte) buffer
is reinstated in case one of the scm_fill_input () calls
throws an exception; we use the scm_dynwind_* API to achieve
- that. */
+ that.
+
+ A consequence of this optimization is that the fill_input
+ functions can't unget characters. That'll push data to the
+ pushback buffer instead of this psb buffer. */
+#if SCM_DEBUG == 1
+ unsigned char *pback = pt->putback_buf;
+#endif
psb.pt = pt;
psb.buffer = buffer;
psb.size = size;
pt->read_buf_size -= (pt->read_end - pt->read_pos);
pt->read_pos = pt->read_buf = pt->read_end;
}
+#if SCM_DEBUG == 1
+ if (pback != pt->putback_buf
+ || pt->read_buf - (unsigned char *) buffer < 0)
+ scm_misc_error (FUNC_NAME,
+ "scm_c_read must not call a fill function that pushes "
+ "back characters onto an unbuffered port", SCM_EOL);
+#endif
n_read += pt->read_buf - (unsigned char *) buffer;
-
+
/* Reinstate the port's normal buffer. */
scm_dynwind_end ();
}
}
#undef FUNC_NAME
-void
+void
scm_flush (SCM port)
{
long i = SCM_PTOBNUM (port);
+ assert (i >= 0);
(scm_ptobs[i].flush) (port);
}
if (pt->putback_buf == NULL)
{
pt->putback_buf
- = (unsigned char *) scm_gc_malloc (SCM_INITIAL_PUTBACK_BUF_SIZE,
- "putback buffer");
+ = (unsigned char *) scm_gc_malloc_pointerless
+ (SCM_INITIAL_PUTBACK_BUF_SIZE, "putback buffer");
pt->putback_buf_size = SCM_INITIAL_PUTBACK_BUF_SIZE;
}
}
#undef FUNC_NAME
-void
+void
scm_ungetc (scm_t_wchar c, SCM port)
#define FUNC_NAME "scm_ungetc"
{
scm_t_port *pt = SCM_PTAB_ENTRY (port);
- scm_t_wchar *wbuf;
- SCM str = scm_i_make_wide_string (1, &wbuf);
- char *buf;
+ char *result;
+ char result_buf[10];
+ const char *encoding;
size_t len;
int i;
- wbuf[0] = c;
- buf = scm_to_stringn (str, &len, pt->encoding, pt->ilseq_handler);
-
+ if (pt->encoding != NULL)
+ encoding = pt->encoding;
+ else
+ encoding = "ISO-8859-1";
+
+ len = sizeof (result_buf);
+ result = u32_conv_to_encoding (encoding,
+ (enum iconv_ilseq_handler) pt->ilseq_handler,
+ (uint32_t *) &c, 1, NULL,
+ result_buf, &len);
+
+ if (SCM_UNLIKELY (result == NULL || len == 0))
+ {
+ SCM chr;
+
+ chr = scm_integer_to_char (scm_from_uint32 (c));
+ scm_encoding_error (FUNC_NAME, errno,
+ "conversion to port encoding failed",
+ "UTF-32", encoding,
+ scm_string (scm_list_1 (chr)));
+ }
+
for (i = len - 1; i >= 0; i--)
- scm_unget_byte (buf[i], port);
+ scm_unget_byte (result[i], port);
+
+ if (SCM_UNLIKELY (result != result_buf))
+ free (result);
if (c == '\n')
{
"return the value returned by the preceding call to\n"
"@code{peek-char}. In particular, a call to @code{peek-char} on\n"
"an interactive port will hang waiting for input whenever a call\n"
- "to @code{read-char} would have hung.")
+ "to @code{read-char} would have hung.\n"
+ "\n"
+ "As for @code{read-char}, a @code{decoding-error} may be raised\n"
+ "if such a situation occurs. However, unlike with @code{read-char},\n"
+ "@var{port} still points at the beginning of the erroneous byte\n"
+ "sequence when the error is raised.\n")
#define FUNC_NAME s_scm_peek_char
{
- scm_t_wchar c, column;
+ int err;
+ SCM result;
+ scm_t_wchar c;
+ char bytes[SCM_MBCHAR_BUF_SIZE];
+ long column, line, i;
+ size_t len;
+
if (SCM_UNBNDP (port))
port = scm_current_input_port ();
+ SCM_VALIDATE_OPINPORT (1, port);
+
+ column = SCM_COL (port);
+ line = SCM_LINUM (port);
+
+ err = get_codepoint (port, &c, bytes, &len);
+
+ for (i = len - 1; i >= 0; i--)
+ scm_unget_byte (bytes[i], port);
+
+ SCM_COL (port) = column;
+ SCM_LINUM (port) = line;
+
+ if (SCM_UNLIKELY (err != 0))
+ {
+ scm_decoding_error (FUNC_NAME, err, "input decoding error", port);
+
+ /* Shouldn't happen since `catch' always aborts to prompt. */
+ result = SCM_BOOL_F;
+ }
+ else if (c == EOF)
+ result = SCM_EOF_VAL;
else
- SCM_VALIDATE_OPINPORT (1, port);
- column = SCM_COL(port);
- c = scm_getc (port);
- if (EOF == c)
- return SCM_EOF_VAL;
- scm_ungetc (c, port);
- SCM_COL(port) = column;
- return SCM_MAKE_CHAR (c);
+ result = SCM_MAKE_CHAR (c);
+
+ return result;
}
#undef FUNC_NAME
SCM_VALIDATE_CHAR (1, cobj);
if (SCM_UNBNDP (port))
port = scm_current_input_port ();
- else
- SCM_VALIDATE_OPINPORT (2, port);
+ SCM_VALIDATE_OPINPORT (2, port);
c = SCM_CHAR (cobj);
SCM_VALIDATE_STRING (1, str);
if (SCM_UNBNDP (port))
port = scm_current_input_port ();
- else
- SCM_VALIDATE_OPINPORT (2, port);
+ SCM_VALIDATE_OPINPORT (2, port);
n = scm_i_string_length (str);
}
#undef FUNC_NAME
-/* The default port encoding for this locale. New ports will have this
- encoding. If it is a string, that is the encoding. If it #f, it
- is in the native (Latin-1) encoding. */
-SCM_GLOBAL_VARIABLE (scm_port_encoding_var, "%port-encoding");
+/* A fluid specifying the default encoding for newly created ports. If it is
+ a string, that is the encoding. If it is #f, it is in the "native"
+ (Latin-1) encoding. */
+SCM_VARIABLE (default_port_encoding_var, "%default-port-encoding");
+
static int scm_port_encoding_init = 0;
/* Return a C string representation of the current encoding. */
{
if (!scm_port_encoding_init)
return NULL;
- else if (!scm_is_fluid (SCM_VARIABLE_REF (scm_port_encoding_var)))
+ else if (!scm_is_fluid (SCM_VARIABLE_REF (default_port_encoding_var)))
return NULL;
else
{
- encoding = scm_fluid_ref (SCM_VARIABLE_REF (scm_port_encoding_var));
+ encoding = scm_fluid_ref (SCM_VARIABLE_REF (default_port_encoding_var));
if (!scm_is_string (encoding))
return NULL;
else
{
scm_t_port *pt;
pt = SCM_PTAB_ENTRY (port);
- if (pt->encoding)
- return pt->encoding;
- else
- return NULL;
+ return pt->encoding;
}
}
-/* Returns ENC is if is a recognized encoding. If it isn't, it tries
+/* Returns ENC if it is a recognized encoding. If it isn't, it tries
to find an alias of ENC that is valid. Otherwise, it returns
NULL. */
static const char *
{
int isvalid = 0;
const char str[] = " ";
+ scm_t_uint32 result_buf;
scm_t_uint32 *u32;
size_t u32len;
-
+
+ u32len = sizeof (result_buf) / sizeof (scm_t_uint32);
u32 = u32_conv_from_encoding (enc, iconveh_error, str, 1,
- NULL, NULL, &u32len);
+ NULL, &result_buf, &u32len);
isvalid = (u32 != NULL);
- free (u32);
-
+
+ if (SCM_UNLIKELY (u32 != &result_buf))
+ free (u32);
+
if (isvalid)
return enc;
{
valid_enc = find_valid_encoding (enc);
if (valid_enc == NULL)
- {
- SCM err;
- err = scm_from_locale_string (enc);
- scm_misc_error (NULL, "invalid or unknown character encoding ~s",
- scm_list_1 (err));
- }
+ goto invalid_encoding;
}
if (scm_is_false (port))
{
/* Set the default encoding for future ports. */
if (!scm_port_encoding_init
- || !scm_is_fluid (SCM_VARIABLE_REF (scm_port_encoding_var)))
+ || !scm_is_fluid (SCM_VARIABLE_REF (default_port_encoding_var)))
scm_misc_error (NULL, "tried to set port encoding fluid before it is initialized",
SCM_EOL);
|| !strcmp (valid_enc, "ASCII")
|| !strcmp (valid_enc, "ANSI_X3.4-1968")
|| !strcmp (valid_enc, "ISO-8859-1"))
- scm_fluid_set_x (SCM_VARIABLE_REF (scm_port_encoding_var), SCM_BOOL_F);
+ scm_fluid_set_x (SCM_VARIABLE_REF (default_port_encoding_var), SCM_BOOL_F);
else
- scm_fluid_set_x (SCM_VARIABLE_REF (scm_port_encoding_var),
+ scm_fluid_set_x (SCM_VARIABLE_REF (default_port_encoding_var),
scm_from_locale_string (valid_enc));
}
else
{
+ iconv_t new_input_cd, new_output_cd;
+
+ new_input_cd = (iconv_t) -1;
+ new_output_cd = (iconv_t) -1;
+
/* Set the character encoding for this port. */
pt = SCM_PTAB_ENTRY (port);
- if (pt->encoding)
- free (pt->encoding);
+
if (valid_enc == NULL)
- pt->encoding = NULL;
- else
- pt->encoding = strdup (valid_enc);
+ valid_enc = "ISO-8859-1";
+
+ pt->encoding = scm_gc_strdup (valid_enc, "port");
+
+ if (SCM_CELL_WORD_0 (port) & SCM_RDNG)
+ {
+ /* Open an input iconv conversion descriptor, from VALID_ENC
+ to UTF-8. We choose UTF-8, not UTF-32, because iconv
+ implementations can typically convert from anything to
+ UTF-8, but not to UTF-32 (see
+ <http://lists.gnu.org/archive/html/bug-libunistring/2010-09/msg00007.html>). */
+ new_input_cd = iconv_open ("UTF-8", valid_enc);
+ if (new_input_cd == (iconv_t) -1)
+ goto invalid_encoding;
+ }
+
+ if (SCM_CELL_WORD_0 (port) & SCM_WRTNG)
+ {
+ new_output_cd = iconv_open (valid_enc, "UTF-8");
+ if (new_output_cd == (iconv_t) -1)
+ {
+ if (new_input_cd != (iconv_t) -1)
+ iconv_close (new_input_cd);
+ goto invalid_encoding;
+ }
+ }
+
+ if (pt->input_cd != (iconv_t) -1)
+ iconv_close (pt->input_cd);
+ if (pt->output_cd != (iconv_t) -1)
+ iconv_close (pt->output_cd);
+
+ pt->input_cd = new_input_cd;
+ pt->output_cd = new_output_cd;
}
+
+ return;
+
+ invalid_encoding:
+ {
+ SCM err;
+ err = scm_from_locale_string (enc);
+ scm_misc_error (NULL, "invalid or unknown character encoding ~s",
+ scm_list_1 (err));
+ }
}
SCM_DEFINE (scm_port_encoding, "port-encoding", 1, 0, 0,
if (enc)
return scm_from_locale_string (pt->encoding);
else
- return scm_from_locale_string ("NONE");
+ return SCM_BOOL_F;
}
#undef FUNC_NAME
-
+
SCM_DEFINE (scm_set_port_encoding_x, "set-port-encoding!", 2, 0, 0,
(SCM port, SCM enc),
"Sets the character encoding that will be used to interpret all\n"
"appropriate for the current locale if @code{setlocale} has \n"
"been called or ISO-8859-1 otherwise\n"
"and this procedure can be used to modify that encoding.\n")
-
#define FUNC_NAME s_scm_set_port_encoding_x
{
char *enc_str;
- const char *valid_enc_str;
SCM_VALIDATE_PORT (1, port);
SCM_VALIDATE_STRING (2, enc);
enc_str = scm_to_locale_string (enc);
- valid_enc_str = find_valid_encoding (enc_str);
- if (valid_enc_str == NULL)
- {
- free (enc_str);
- scm_misc_error (FUNC_NAME, "invalid or unknown character encoding ~s",
- scm_list_1 (enc));
- }
- else
- {
- scm_i_set_port_encoding_x (port, valid_enc_str);
- free (enc_str);
- }
+ scm_i_set_port_encoding_x (port, enc_str);
+
return SCM_UNSPECIFIED;
}
#undef FUNC_NAME
{
scm_t_port *pt;
pt = SCM_PTAB_ENTRY (port);
- return pt->ilseq_handler;
+ return pt->ilseq_handler;
}
}
h = scm_i_get_conversion_strategy (port);
if (h == SCM_FAILED_CONVERSION_ERROR)
- return scm_from_locale_symbol ("error");
+ return scm_from_latin1_symbol ("error");
else if (h == SCM_FAILED_CONVERSION_QUESTION_MARK)
- return scm_from_locale_symbol ("substitute");
+ return scm_from_latin1_symbol ("substitute");
else if (h == SCM_FAILED_CONVERSION_ESCAPE_SEQUENCE)
- return scm_from_locale_symbol ("escape");
+ return scm_from_latin1_symbol ("escape");
else
abort ();
SCM_VALIDATE_OPPORT (1, port);
}
- err = scm_from_locale_symbol ("error");
+ err = scm_from_latin1_symbol ("error");
if (scm_is_true (scm_eqv_p (sym, err)))
{
scm_i_set_conversion_strategy_x (port, SCM_FAILED_CONVERSION_ERROR);
return SCM_UNSPECIFIED;
}
- qm = scm_from_locale_symbol ("substitute");
+ qm = scm_from_latin1_symbol ("substitute");
if (scm_is_true (scm_eqv_p (sym, qm)))
{
scm_i_set_conversion_strategy_x (port,
return SCM_UNSPECIFIED;
}
- esc = scm_from_locale_symbol ("escape");
+ esc = scm_from_latin1_symbol ("escape");
if (scm_is_true (scm_eqv_p (sym, esc)))
{
scm_i_set_conversion_strategy_x (port,
return 1;
}
-void
-scm_ports_prehistory ()
-{
- scm_numptob = 0;
- scm_ptobs = (scm_t_ptob_descriptor *) scm_malloc (sizeof (scm_t_ptob_descriptor));
-}
-
\f
/* Void ports. */
scm_tc16_void_port = scm_make_port_type ("void", fill_input_void_port,
write_void_port);
- cur_inport_fluid = scm_permanent_object (scm_make_fluid ());
- cur_outport_fluid = scm_permanent_object (scm_make_fluid ());
- cur_errport_fluid = scm_permanent_object (scm_make_fluid ());
- cur_loadport_fluid = scm_permanent_object (scm_make_fluid ());
+ cur_inport_fluid = scm_make_fluid ();
+ cur_outport_fluid = scm_make_fluid ();
+ cur_errport_fluid = scm_make_fluid ();
+ cur_loadport_fluid = scm_make_fluid ();
+
+ scm_i_port_weak_hash = scm_make_weak_key_hash_table (SCM_I_MAKINUM(31));
- scm_i_port_weak_hash = scm_permanent_object (scm_make_weak_key_hash_table (SCM_I_MAKINUM(31)));
#include "libguile/ports.x"
- SCM_VARIABLE_SET (scm_port_encoding_var, scm_make_fluid ());
- scm_fluid_set_x (SCM_VARIABLE_REF (scm_port_encoding_var), SCM_BOOL_F);
+ /* Use Latin-1 as the default port encoding. */
+ SCM_VARIABLE_SET (default_port_encoding_var, scm_make_fluid ());
+ scm_fluid_set_x (SCM_VARIABLE_REF (default_port_encoding_var), SCM_BOOL_F);
scm_port_encoding_init = 1;
-
+
SCM_VARIABLE_SET (scm_conversion_strategy, scm_make_fluid ());
scm_fluid_set_x (SCM_VARIABLE_REF (scm_conversion_strategy),
scm_from_int ((int) SCM_FAILED_CONVERSION_QUESTION_MARK));