-/* Copyright (C) 1995,1996,1997,1998,1999,2000,2001, 2003, 2004, 2006, 2007, 2008, 2009 Free Software Foundation, Inc.
+/* Copyright (C) 1995,1996,1997,1998,1999,2000,2001, 2003, 2004, 2006, 2007, 2008, 2009, 2010 Free Software Foundation, Inc.
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public License
#include "libguile/async.h"
#include "libguile/eval.h"
#include "libguile/fports.h" /* direct access for seek and truncate */
-#include "libguile/objects.h"
#include "libguile/goops.h"
#include "libguile/smob.h"
#include "libguile/chars.h"
gnulib ftruncate.c has code using fcntl F_CHSIZE and F_FREESP, which
might be possibilities if we've got other systems without ftruncate. */
-#if HAVE_CHSIZE && ! HAVE_FTRUNCATE
+#if defined HAVE_CHSIZE && ! defined HAVE_FTRUNCATE
#define ftruncate(fd, size) chsize (fd, size)
#undef HAVE_FTRUNCATE
#define HAVE_FTRUNCATE 1
* Indexes into this table are used when generating type
* tags for smobjects (if you know a tag you can get an index and conversely).
*/
-scm_t_ptob_descriptor *scm_ptobs;
-long scm_numptob;
+scm_t_ptob_descriptor *scm_ptobs = NULL;
+long scm_numptob = 0;
/* GC marker for a port with stream of SCM type. */
SCM
#define SCM_MBCHAR_BUF_SIZE (4)
-/* Get one codepoint from a file, using the port's encoding. */
-scm_t_wchar
-scm_getc (SCM port)
+/* Read a codepoint from PORT and return it. Fill BUF with the byte
+ representation of the codepoint in PORT's encoding, and set *LEN to
+ the length in bytes of that representation. Raise an error on
+ failure. */
+static scm_t_wchar
+get_codepoint (SCM port, char buf[SCM_MBCHAR_BUF_SIZE], size_t *len)
{
int c;
- unsigned int bufcount = 0;
- char buf[SCM_MBCHAR_BUF_SIZE];
+ size_t bufcount = 0;
+ scm_t_uint32 result_buf;
scm_t_wchar codepoint = 0;
scm_t_uint32 *u32;
size_t u32len;
bufcount++;
if (pt->encoding == NULL)
- {
+ {
/* The encoding is Latin-1: bytes are characters. */
codepoint = (unsigned char) buf[0];
goto success;
for (;;)
{
- u32 = u32_conv_from_encoding (pt->encoding,
- (enum iconv_ilseq_handler) pt->ilseq_handler,
- buf, bufcount, NULL, NULL, &u32len);
+ u32len = sizeof (result_buf) / sizeof (scm_t_uint32);
+ u32 = u32_conv_from_encoding (pt->encoding,
+ (enum iconv_ilseq_handler) pt->ilseq_handler,
+ buf, bufcount, NULL, &result_buf, &u32len);
if (u32 == NULL || u32len == 0)
{
if (errno == ENOMEM)
scm_memory_error ("Input decoding");
-
+
/* Otherwise errno is EILSEQ or EINVAL, so perhaps more
bytes are needed. Keep looping. */
}
- else
+ else
{
/* Complete codepoint found. */
codepoint = u32[0];
- free (u32);
+
+ if (SCM_UNLIKELY (u32 != &result_buf))
+ /* libunistring up to 0.9.3 (included) would always heap-allocate
+ the result even when a large-enough RESULT_BUF is supplied, see
+ <http://lists.gnu.org/archive/html/bug-libunistring/2010-07/msg00003.html>. */
+ free (u32);
+
goto success;
}
break;
}
+ *len = bufcount;
+
return codepoint;
failure:
return 0;
}
+/* Read a codepoint from PORT and return it. */
+scm_t_wchar
+scm_getc (SCM port)
+{
+ size_t len;
+ char buf[SCM_MBCHAR_BUF_SIZE];
+
+ return get_codepoint (port, buf, &len);
+}
/* this should only be called when the read buffer is empty. it
tries to refill the read buffer. it returns the first char from
}
#undef FUNC_NAME
-void
+void
scm_ungetc (scm_t_wchar c, SCM port)
#define FUNC_NAME "scm_ungetc"
{
scm_t_port *pt = SCM_PTAB_ENTRY (port);
- scm_t_wchar *wbuf;
- SCM str = scm_i_make_wide_string (1, &wbuf);
- char *buf;
+ char *result;
+ char result_buf[10];
+ const char *encoding;
size_t len;
int i;
- wbuf[0] = c;
- buf = scm_to_stringn (str, &len, pt->encoding, pt->ilseq_handler);
-
+ if (pt->encoding != NULL)
+ encoding = pt->encoding;
+ else
+ encoding = "ISO-8859-1";
+
+ len = sizeof (result_buf);
+ result = u32_conv_to_encoding (encoding,
+ (enum iconv_ilseq_handler) pt->ilseq_handler,
+ (uint32_t *) &c, 1, NULL,
+ result_buf, &len);
+
+ if (SCM_UNLIKELY (result == NULL || len == 0))
+ {
+ SCM chr;
+
+ chr = scm_integer_to_char (scm_from_uint32 (c));
+ scm_encoding_error (FUNC_NAME, errno,
+ "conversion to port encoding failed",
+ "UTF-32", encoding,
+ scm_string (scm_list_1 (chr)));
+ }
+
for (i = len - 1; i >= 0; i--)
- scm_unget_byte (buf[i], port);
+ scm_unget_byte (result[i], port);
+
+ if (SCM_UNLIKELY (result != result_buf))
+ free (result);
if (c == '\n')
{
"to @code{read-char} would have hung.")
#define FUNC_NAME s_scm_peek_char
{
- scm_t_wchar c, column;
+ SCM result;
+ scm_t_wchar c;
+ char bytes[SCM_MBCHAR_BUF_SIZE];
+ long column, line;
+ size_t len;
+
if (SCM_UNBNDP (port))
port = scm_current_input_port ();
else
SCM_VALIDATE_OPINPORT (1, port);
- column = SCM_COL(port);
- c = scm_getc (port);
- if (EOF == c)
- return SCM_EOF_VAL;
- scm_ungetc (c, port);
- SCM_COL(port) = column;
- return SCM_MAKE_CHAR (c);
+
+ column = SCM_COL (port);
+ line = SCM_LINUM (port);
+
+ c = get_codepoint (port, bytes, &len);
+ if (c == EOF)
+ result = SCM_EOF_VAL;
+ else
+ {
+ long i;
+
+ result = SCM_MAKE_CHAR (c);
+
+ for (i = len - 1; i >= 0; i--)
+ scm_unget_byte (bytes[i], port);
+
+ SCM_COL (port) = column;
+ SCM_LINUM (port) = line;
+ }
+
+ return result;
}
#undef FUNC_NAME
}
#undef FUNC_NAME
-/* The default port encoding for this locale. New ports will have this
- encoding. If it is a string, that is the encoding. If it #f, it
- is in the native (Latin-1) encoding. */
-SCM_GLOBAL_VARIABLE (scm_port_encoding_var, "%port-encoding");
+/* A fluid specifying the default encoding for newly created ports. If it is
+ a string, that is the encoding. If it is #f, it is in the "native"
+ (Latin-1) encoding. */
+SCM_VARIABLE (default_port_encoding_var, "%default-port-encoding");
+
static int scm_port_encoding_init = 0;
/* Return a C string representation of the current encoding. */
{
if (!scm_port_encoding_init)
return NULL;
- else if (!scm_is_fluid (SCM_VARIABLE_REF (scm_port_encoding_var)))
+ else if (!scm_is_fluid (SCM_VARIABLE_REF (default_port_encoding_var)))
return NULL;
else
{
- encoding = scm_fluid_ref (SCM_VARIABLE_REF (scm_port_encoding_var));
+ encoding = scm_fluid_ref (SCM_VARIABLE_REF (default_port_encoding_var));
if (!scm_is_string (encoding))
return NULL;
else
{
int isvalid = 0;
const char str[] = " ";
+ scm_t_uint32 result_buf;
scm_t_uint32 *u32;
size_t u32len;
-
+
+ u32len = sizeof (result_buf) / sizeof (scm_t_uint32);
u32 = u32_conv_from_encoding (enc, iconveh_error, str, 1,
- NULL, NULL, &u32len);
+ NULL, &result_buf, &u32len);
isvalid = (u32 != NULL);
- free (u32);
-
+
+ if (SCM_UNLIKELY (u32 != &result_buf))
+ free (u32);
+
if (isvalid)
return enc;
{
/* Set the default encoding for future ports. */
if (!scm_port_encoding_init
- || !scm_is_fluid (SCM_VARIABLE_REF (scm_port_encoding_var)))
+ || !scm_is_fluid (SCM_VARIABLE_REF (default_port_encoding_var)))
scm_misc_error (NULL, "tried to set port encoding fluid before it is initialized",
SCM_EOL);
|| !strcmp (valid_enc, "ASCII")
|| !strcmp (valid_enc, "ANSI_X3.4-1968")
|| !strcmp (valid_enc, "ISO-8859-1"))
- scm_fluid_set_x (SCM_VARIABLE_REF (scm_port_encoding_var), SCM_BOOL_F);
+ scm_fluid_set_x (SCM_VARIABLE_REF (default_port_encoding_var), SCM_BOOL_F);
else
- scm_fluid_set_x (SCM_VARIABLE_REF (scm_port_encoding_var),
+ scm_fluid_set_x (SCM_VARIABLE_REF (default_port_encoding_var),
scm_from_locale_string (valid_enc));
}
else
if (enc)
return scm_from_locale_string (pt->encoding);
else
- return scm_from_locale_string ("NONE");
+ return SCM_BOOL_F;
}
#undef FUNC_NAME
-
+
SCM_DEFINE (scm_set_port_encoding_x, "set-port-encoding!", 2, 0, 0,
(SCM port, SCM enc),
"Sets the character encoding that will be used to interpret all\n"
"appropriate for the current locale if @code{setlocale} has \n"
"been called or ISO-8859-1 otherwise\n"
"and this procedure can be used to modify that encoding.\n")
-
#define FUNC_NAME s_scm_set_port_encoding_x
{
char *enc_str;
return 1;
}
-void
-scm_ports_prehistory ()
-{
- scm_numptob = 0;
- scm_ptobs = NULL;
-}
-
\f
/* Void ports. */
scm_tc16_void_port = scm_make_port_type ("void", fill_input_void_port,
write_void_port);
- cur_inport_fluid = scm_permanent_object (scm_make_fluid ());
- cur_outport_fluid = scm_permanent_object (scm_make_fluid ());
- cur_errport_fluid = scm_permanent_object (scm_make_fluid ());
- cur_loadport_fluid = scm_permanent_object (scm_make_fluid ());
+ cur_inport_fluid = scm_make_fluid ();
+ cur_outport_fluid = scm_make_fluid ();
+ cur_errport_fluid = scm_make_fluid ();
+ cur_loadport_fluid = scm_make_fluid ();
+
+ scm_i_port_weak_hash = scm_make_weak_key_hash_table (SCM_I_MAKINUM(31));
- scm_i_port_weak_hash = scm_permanent_object (scm_make_weak_key_hash_table (SCM_I_MAKINUM(31)));
#include "libguile/ports.x"
- SCM_VARIABLE_SET (scm_port_encoding_var, scm_make_fluid ());
- scm_fluid_set_x (SCM_VARIABLE_REF (scm_port_encoding_var), SCM_BOOL_F);
+ /* Use Latin-1 as the default port encoding. */
+ SCM_VARIABLE_SET (default_port_encoding_var, scm_make_fluid ());
+ scm_fluid_set_x (SCM_VARIABLE_REF (default_port_encoding_var), SCM_BOOL_F);
scm_port_encoding_init = 1;
-
+
SCM_VARIABLE_SET (scm_conversion_strategy, scm_make_fluid ());
scm_fluid_set_x (SCM_VARIABLE_REF (scm_conversion_strategy),
scm_from_int ((int) SCM_FAILED_CONVERSION_QUESTION_MARK));