/* Copyright (C) 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2003, 2004,
- * 2006, 2007, 2008, 2009, 2010, 2011, 2012 Free Software Foundation, Inc.
+ * 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013 Free Software Foundation, Inc.
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public License
#endif
\f
+/* Port encodings are case-insensitive ASCII strings. */
+static char
+ascii_toupper (char c)
+{
+ return (c < 'a' || c > 'z') ? c : ('A' + (c - 'a'));
+}
+
+/* It is only necessary to use this function on encodings that come from
+ the user and have not been canonicalized yet. Encodings that are set
+ on ports or in the default encoding fluid are in upper-case, and can
+ be compared with strcmp. */
+static int
+encoding_matches (const char *enc, const char *upper)
+{
+ if (!enc)
+ enc = "ISO-8859-1";
+
+ while (*enc)
+ if (ascii_toupper (*enc++) != *upper++)
+ return 0;
+
+ return !*upper;
+}
+
+static char*
+canonicalize_encoding (const char *enc)
+{
+ char *ret;
+ int i;
+
+ if (!enc)
+ return "ISO-8859-1";
+
+ ret = scm_gc_strdup (enc, "port");
+
+ for (i = 0; ret[i]; i++)
+ {
+ if (ret[i] > 127)
+ /* Restrict to ASCII. */
+ scm_misc_error (NULL, "invalid character encoding ~s",
+ scm_list_1 (scm_from_latin1_string (enc)));
+ else
+ ret[i] = ascii_toupper (ret[i]);
+ }
+
+ return ret;
+}
+
+
+\f
/* The port kind table --- a dynamically resized array of port types. */
SCM
scm_current_warning_port (void)
{
- static SCM cwp_var = SCM_BOOL_F;
+ static SCM cwp_var = SCM_UNDEFINED;
+ static scm_i_pthread_mutex_t cwp_var_mutex
+ = SCM_I_PTHREAD_MUTEX_INITIALIZER;
- if (scm_is_false (cwp_var))
- cwp_var = scm_c_private_lookup ("guile", "current-warning-port");
+ scm_i_scm_pthread_mutex_lock (&cwp_var_mutex);
+ if (SCM_UNBNDP (cwp_var))
+ cwp_var = scm_c_private_variable ("guile", "current-warning-port");
+ scm_i_pthread_mutex_unlock (&cwp_var_mutex);
return scm_call_0 (scm_variable_ref (cwp_var));
}
long
scm_mode_bits (char *modes)
{
- return scm_i_mode_bits (scm_from_locale_string (modes));
+ /* Valid characters are rw+a0l. So, use latin1. */
+ return scm_i_mode_bits (scm_from_latin1_string (modes));
}
long
/* Finalize the object (a port) pointed to by PTR. */
static void
-finalize_port (GC_PTR ptr, GC_PTR data)
+finalize_port (void *ptr, void *data)
{
SCM port = SCM_PACK_POINTER (ptr);
entry->rw_active = SCM_PORT_NEITHER;
entry->port = ret;
entry->stream = stream;
- entry->encoding = encoding ? scm_gc_strdup (encoding, "port") : NULL;
- if (encoding && strcmp (encoding, "UTF-8") == 0)
- entry->encoding_mode = SCM_PORT_ENCODING_MODE_UTF8;
- else if (!encoding || strcmp (encoding, "ISO-8859-1") == 0)
- entry->encoding_mode = SCM_PORT_ENCODING_MODE_LATIN1;
+
+ if (encoding_matches (encoding, "UTF-8"))
+ {
+ entry->encoding_mode = SCM_PORT_ENCODING_MODE_UTF8;
+ entry->encoding = "UTF-8";
+ }
+ else if (encoding_matches (encoding, "ISO-8859-1"))
+ {
+ entry->encoding_mode = SCM_PORT_ENCODING_MODE_LATIN1;
+ entry->encoding = "ISO-8859-1";
+ }
else
- entry->encoding_mode = SCM_PORT_ENCODING_MODE_ICONV;
+ {
+ entry->encoding_mode = SCM_PORT_ENCODING_MODE_ICONV;
+ entry->encoding = canonicalize_encoding (encoding);
+ }
+
entry->ilseq_handler = handler;
entry->iconv_descriptors = NULL;
+ entry->alist = SCM_EOL;
+
if (SCM_PORT_DESCRIPTOR (ret)->free)
scm_i_set_finalizer (SCM2PTR (ret), finalize_port, NULL);
{
return scm_c_make_port_with_encoding (tag, mode_bits,
scm_i_default_port_encoding (),
- scm_i_get_conversion_strategy (SCM_BOOL_F),
+ scm_i_default_port_conversion_handler (),
stream);
}
scm_misc_error (NULL, "tried to set port encoding fluid before it is initialized",
SCM_EOL);
- if (encoding == NULL
- || !strcmp (encoding, "ASCII")
- || !strcmp (encoding, "ANSI_X3.4-1968")
- || !strcmp (encoding, "ISO-8859-1"))
+ if (encoding_matches (encoding, "ISO-8859-1"))
scm_fluid_set_x (SCM_VARIABLE_REF (default_port_encoding_var), SCM_BOOL_F);
else
scm_fluid_set_x (SCM_VARIABLE_REF (default_port_encoding_var),
- scm_from_locale_string (encoding));
+ scm_from_latin1_string (canonicalize_encoding (encoding)));
}
-/* Return the name of the default encoding for newly created ports; a
- return value of NULL means "ISO-8859-1". */
+/* Return the name of the default encoding for newly created ports. */
const char *
scm_i_default_port_encoding (void)
{
if (!scm_port_encoding_init)
- return NULL;
+ return "ISO-8859-1";
else if (!scm_is_fluid (SCM_VARIABLE_REF (default_port_encoding_var)))
- return NULL;
+ return "ISO-8859-1";
else
{
SCM encoding;
encoding = scm_fluid_ref (SCM_VARIABLE_REF (default_port_encoding_var));
if (!scm_is_string (encoding))
- return NULL;
+ return "ISO-8859-1";
else
return scm_i_string_chars (encoding);
}
}
+/* A fluid specifying the default conversion handler for newly created
+ ports. Its value should be one of the symbols below. */
+SCM_VARIABLE (default_conversion_strategy_var,
+ "%default-port-conversion-strategy");
+
+/* Whether the above fluid is initialized. */
+static int scm_conversion_strategy_init = 0;
+
+/* The possible conversion strategies. */
+SCM_SYMBOL (sym_error, "error");
+SCM_SYMBOL (sym_substitute, "substitute");
+SCM_SYMBOL (sym_escape, "escape");
+
+/* Return the default failed encoding conversion policy for new created
+ ports. */
+scm_t_string_failed_conversion_handler
+scm_i_default_port_conversion_handler (void)
+{
+ scm_t_string_failed_conversion_handler handler;
+
+ if (!scm_conversion_strategy_init
+ || !scm_is_fluid (SCM_VARIABLE_REF (default_conversion_strategy_var)))
+ handler = SCM_FAILED_CONVERSION_QUESTION_MARK;
+ else
+ {
+ SCM fluid, value;
+
+ fluid = SCM_VARIABLE_REF (default_conversion_strategy_var);
+ value = scm_fluid_ref (fluid);
+
+ if (scm_is_eq (sym_substitute, value))
+ handler = SCM_FAILED_CONVERSION_QUESTION_MARK;
+ else if (scm_is_eq (sym_escape, value))
+ handler = SCM_FAILED_CONVERSION_ESCAPE_SEQUENCE;
+ else
+ /* Default to 'error also when the fluid's value is not one of
+ the valid symbols. */
+ handler = SCM_FAILED_CONVERSION_ERROR;
+ }
+
+ return handler;
+}
+
+/* Use HANDLER as the default conversion strategy for future ports. */
+void
+scm_i_set_default_port_conversion_handler (scm_t_string_failed_conversion_handler
+ handler)
+{
+ SCM strategy;
+
+ if (!scm_conversion_strategy_init
+ || !scm_is_fluid (SCM_VARIABLE_REF (default_conversion_strategy_var)))
+ scm_misc_error (NULL, "tried to set conversion strategy fluid before it is initialized",
+ SCM_EOL);
+
+ switch (handler)
+ {
+ case SCM_FAILED_CONVERSION_ERROR:
+ strategy = sym_error;
+ break;
+
+ case SCM_FAILED_CONVERSION_ESCAPE_SEQUENCE:
+ strategy = sym_escape;
+ break;
+
+ case SCM_FAILED_CONVERSION_QUESTION_MARK:
+ strategy = sym_substitute;
+ break;
+
+ default:
+ abort ();
+ }
+
+ scm_fluid_set_x (SCM_VARIABLE_REF (default_conversion_strategy_var),
+ strategy);
+}
+
static void
-finalize_iconv_descriptors (GC_PTR ptr, GC_PTR data)
+finalize_iconv_descriptors (void *ptr, void *data)
{
close_iconv_descriptors (ptr);
}
{
scm_t_iconv_descriptors *id;
iconv_t input_cd, output_cd;
+ size_t i;
input_cd = (iconv_t) -1;
output_cd = (iconv_t) -1;
+ for (i = 0; encoding[i]; i++)
+ if (encoding[i] > 127)
+ goto invalid_encoding;
+
if (reading)
{
/* Open an input iconv conversion descriptor, from ENCODING
invalid_encoding:
{
SCM err;
- err = scm_from_locale_string (encoding);
+ err = scm_from_latin1_string (encoding);
scm_misc_error ("open_iconv_descriptors",
"invalid or unknown character encoding ~s",
scm_list_1 (err));
return pt->iconv_descriptors;
}
+/* The name of the encoding is itself encoded in ASCII. */
void
scm_i_set_port_encoding_x (SCM port, const char *encoding)
{
pt = SCM_PTAB_ENTRY (port);
prev = pt->iconv_descriptors;
- if (encoding && strcmp (encoding, "UTF-8") == 0)
+ if (encoding_matches (encoding, "UTF-8"))
{
pt->encoding = "UTF-8";
pt->encoding_mode = SCM_PORT_ENCODING_MODE_UTF8;
pt->iconv_descriptors = NULL;
}
- else if (!encoding || strcmp (encoding, "ISO-8859-1") == 0)
+ else if (encoding_matches (encoding, "ISO-8859-1"))
{
pt->encoding = "ISO-8859-1";
pt->encoding_mode = SCM_PORT_ENCODING_MODE_LATIN1;
else
{
/* Open descriptors before mutating the port. */
+ char *gc_encoding = canonicalize_encoding (encoding);
pt->iconv_descriptors =
- open_iconv_descriptors (encoding,
+ open_iconv_descriptors (gc_encoding,
SCM_INPUT_PORT_P (port),
SCM_OUTPUT_PORT_P (port));
- pt->encoding = scm_gc_strdup (encoding, "port");
+ pt->encoding = gc_encoding;
pt->encoding_mode = SCM_PORT_ENCODING_MODE_ICONV;
}
"uses to interpret its input and output.\n")
#define FUNC_NAME s_scm_port_encoding
{
- scm_t_port *pt;
- const char *enc;
-
SCM_VALIDATE_PORT (1, port);
- pt = SCM_PTAB_ENTRY (port);
- enc = pt->encoding;
- if (enc)
- return scm_from_locale_string (pt->encoding);
- else
- return SCM_BOOL_F;
+ return scm_from_latin1_string (SCM_PTAB_ENTRY (port)->encoding);
}
#undef FUNC_NAME
SCM_VALIDATE_PORT (1, port);
SCM_VALIDATE_STRING (2, enc);
- enc_str = scm_to_locale_string (enc);
+ enc_str = scm_to_latin1_string (enc);
scm_i_set_port_encoding_x (port, enc_str);
free (enc_str);
}
#undef FUNC_NAME
-
-/* This determines how conversions handle unconvertible characters. */
-SCM_GLOBAL_VARIABLE (scm_conversion_strategy, "%port-conversion-strategy");
-static int scm_conversion_strategy_init = 0;
-
-scm_t_string_failed_conversion_handler
-scm_i_get_conversion_strategy (SCM port)
-{
- SCM encoding;
-
- if (scm_is_false (port))
- {
- if (!scm_conversion_strategy_init
- || !scm_is_fluid (SCM_VARIABLE_REF (scm_conversion_strategy)))
- return SCM_FAILED_CONVERSION_QUESTION_MARK;
- else
- {
- encoding = scm_fluid_ref (SCM_VARIABLE_REF (scm_conversion_strategy));
- if (scm_is_false (encoding))
- return SCM_FAILED_CONVERSION_QUESTION_MARK;
- else
- return (scm_t_string_failed_conversion_handler) scm_to_int (encoding);
- }
- }
- else
- {
- scm_t_port *pt;
- pt = SCM_PTAB_ENTRY (port);
- return pt->ilseq_handler;
- }
-
-}
-
-void
-scm_i_set_conversion_strategy_x (SCM port,
- scm_t_string_failed_conversion_handler handler)
-{
- SCM strategy;
- scm_t_port *pt;
-
- strategy = scm_from_int ((int) handler);
-
- if (scm_is_false (port))
- {
- /* Set the default encoding for future ports. */
- if (!scm_conversion_strategy_init
- || !scm_is_fluid (SCM_VARIABLE_REF (scm_conversion_strategy)))
- scm_misc_error (NULL, "tried to set conversion strategy fluid before it is initialized",
- SCM_EOL);
- scm_fluid_set_x (SCM_VARIABLE_REF (scm_conversion_strategy), strategy);
- }
- else
- {
- /* Set the character encoding for this port. */
- pt = SCM_PTAB_ENTRY (port);
- pt->ilseq_handler = handler;
- }
-}
-
SCM_DEFINE (scm_port_conversion_strategy, "port-conversion-strategy",
1, 0, 0, (SCM port),
"Returns the behavior of the port when handling a character that\n"
SCM_VALIDATE_OPPORT (1, port);
- if (!scm_is_false (port))
+ if (scm_is_false (port))
+ h = scm_i_default_port_conversion_handler ();
+ else
{
+ scm_t_port *pt;
+
SCM_VALIDATE_OPPORT (1, port);
+ pt = SCM_PTAB_ENTRY (port);
+
+ h = pt->ilseq_handler;
}
- h = scm_i_get_conversion_strategy (port);
if (h == SCM_FAILED_CONVERSION_ERROR)
return scm_from_latin1_symbol ("error");
else if (h == SCM_FAILED_CONVERSION_QUESTION_MARK)
"this thread.\n")
#define FUNC_NAME s_scm_set_port_conversion_strategy_x
{
- SCM err;
- SCM qm;
- SCM esc;
-
- if (!scm_is_false (port))
- {
- SCM_VALIDATE_OPPORT (1, port);
- }
+ scm_t_string_failed_conversion_handler handler;
- err = scm_from_latin1_symbol ("error");
- if (scm_is_true (scm_eqv_p (sym, err)))
- {
- scm_i_set_conversion_strategy_x (port, SCM_FAILED_CONVERSION_ERROR);
- return SCM_UNSPECIFIED;
- }
-
- qm = scm_from_latin1_symbol ("substitute");
- if (scm_is_true (scm_eqv_p (sym, qm)))
- {
- scm_i_set_conversion_strategy_x (port,
- SCM_FAILED_CONVERSION_QUESTION_MARK);
- return SCM_UNSPECIFIED;
- }
+ if (scm_is_eq (sym, sym_error))
+ handler = SCM_FAILED_CONVERSION_ERROR;
+ else if (scm_is_eq (sym, sym_substitute))
+ handler = SCM_FAILED_CONVERSION_QUESTION_MARK;
+ else if (scm_is_eq (sym, sym_escape))
+ handler = SCM_FAILED_CONVERSION_ESCAPE_SEQUENCE;
+ else
+ SCM_MISC_ERROR ("unknown conversion strategy ~s", scm_list_1 (sym));
- esc = scm_from_latin1_symbol ("escape");
- if (scm_is_true (scm_eqv_p (sym, esc)))
+ if (scm_is_false (port))
+ scm_i_set_default_port_conversion_handler (handler);
+ else
{
- scm_i_set_conversion_strategy_x (port,
- SCM_FAILED_CONVERSION_ESCAPE_SEQUENCE);
- return SCM_UNSPECIFIED;
+ SCM_VALIDATE_OPPORT (1, port);
+ SCM_PTAB_ENTRY (port)->ilseq_handler = handler;
}
- SCM_MISC_ERROR ("unknown conversion behavior ~s", scm_list_1 (sym));
-
return SCM_UNSPECIFIED;
}
#undef FUNC_NAME
requested number of bytes. (Note that a single scm_fill_input
call does not guarantee to fill the whole of the port's read
buffer.) */
- if (pt->read_buf_size <= 1 && pt->encoding == NULL)
+ if (pt->read_buf_size <= 1
+ && pt->encoding_mode == SCM_PORT_ENCODING_MODE_LATIN1)
{
/* The port that we are reading from is unbuffered - i.e. does
not have its own persistent buffer - but we have a buffer,
}
/* Read an ISO-8859-1 codepoint (a byte) from PORT. On success, return
- *0 and set CODEPOINT to the codepoint that was read, fill BUF with
- *its UTF-8 representation, and set *LEN to the length in bytes.
- *Return `EILSEQ' on error. */
+ 0 and set *CODEPOINT to the codepoint that was read, fill BUF with
+ its UTF-8 representation, and set *LEN to the length in bytes.
+ Return `EILSEQ' on error. */
static int
get_latin1_codepoint (SCM port, scm_t_wchar *codepoint,
char buf[SCM_MBCHAR_BUF_SIZE], size_t *len)
with the byte representation of the codepoint in PORT's encoding, and
set *LEN to the length in bytes of that representation. Return 0 on
success and an errno value on error. */
-static int
+static SCM_C_INLINE int
get_codepoint (SCM port, scm_t_wchar *codepoint,
char buf[SCM_MBCHAR_BUF_SIZE], size_t *len)
{
scm_t_port *pt = SCM_PTAB_ENTRY (port);
char *result;
char result_buf[10];
- const char *encoding;
size_t len;
int i;
- if (pt->encoding != NULL)
- encoding = pt->encoding;
- else
- encoding = "ISO-8859-1";
-
len = sizeof (result_buf);
- result = u32_conv_to_encoding (encoding,
+ result = u32_conv_to_encoding (pt->encoding,
(enum iconv_ilseq_handler) pt->ilseq_handler,
(uint32_t *) &c, 1, NULL,
result_buf, &len);
return ret;
}
-/* move up to read_len chars from port's putback and/or read buffers
- into memory starting at dest. returns the number of chars moved. */
+/* Move up to READ_LEN bytes from PORT's putback and/or read buffers
+ into memory starting at DEST. Return the number of bytes moved.
+ PORT's line/column numbers are left unchanged. */
size_t
scm_take_from_input_buffers (SCM port, char *dest, size_t read_len)
{
scm_t_port *pt = SCM_PTAB_ENTRY (port);
- size_t chars_read = 0;
+ size_t bytes_read = 0;
size_t from_buf = min (pt->read_end - pt->read_pos, read_len);
if (from_buf > 0)
{
memcpy (dest, pt->read_pos, from_buf);
pt->read_pos += from_buf;
- chars_read += from_buf;
+ bytes_read += from_buf;
read_len -= from_buf;
dest += from_buf;
}
{
memcpy (dest, pt->saved_read_pos, from_buf);
pt->saved_read_pos += from_buf;
- chars_read += from_buf;
+ bytes_read += from_buf;
}
}
- return chars_read;
+
+ return bytes_read;
}
/* Clear a port's read buffers, returning the contents. */
if (end == (size_t) -1)
end = scm_i_string_length (str);
- scm_display (scm_c_substring (str, start, end), port);
+ scm_i_display_substring (str, start, end, port);
if (pt->rw_random)
pt->rw_active = SCM_PORT_WRITE;
scm_make_fluid_with_default (SCM_BOOL_F));
scm_port_encoding_init = 1;
- SCM_VARIABLE_SET (scm_conversion_strategy,
- scm_make_fluid_with_default
- (scm_from_int ((int) SCM_FAILED_CONVERSION_QUESTION_MARK)));
+ SCM_VARIABLE_SET (default_conversion_strategy_var,
+ scm_make_fluid_with_default (sym_substitute));
scm_conversion_strategy_init = 1;
-
+
/* These bindings are used when boot-9 turns `current-input-port' et
al into parameters. They are then removed from the guile module. */
scm_c_define ("%current-input-port-fluid", cur_inport_fluid);