X-Git-Url: http://git.hcoop.net/bpt/guile.git/blobdiff_plain/4d497b629b73afda35ba409c3dcbfb665fe41dde..26d148066f9cb20e395a7dc4fefdf2e2ef0b2fb0:/libguile/ports.c diff --git a/libguile/ports.c b/libguile/ports.c index f5ab24ed2..31d338efe 100644 --- a/libguile/ports.c +++ b/libguile/ports.c @@ -1,5 +1,5 @@ /* Copyright (C) 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2003, 2004, - * 2006, 2007, 2008, 2009, 2010, 2011, 2012 Free Software Foundation, Inc. + * 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013 Free Software Foundation, Inc. * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public License @@ -90,6 +90,56 @@ #endif +/* Port encodings are case-insensitive ASCII strings. */ +static char +ascii_toupper (char c) +{ + return (c < 'a' || c > 'z') ? c : ('A' + (c - 'a')); +} + +/* It is only necessary to use this function on encodings that come from + the user and have not been canonicalized yet. Encodings that are set + on ports or in the default encoding fluid are in upper-case, and can + be compared with strcmp. */ +static int +encoding_matches (const char *enc, const char *upper) +{ + if (!enc) + enc = "ISO-8859-1"; + + while (*enc) + if (ascii_toupper (*enc++) != *upper++) + return 0; + + return !*upper; +} + +static char* +canonicalize_encoding (const char *enc) +{ + char *ret; + int i; + + if (!enc) + return "ISO-8859-1"; + + ret = scm_gc_strdup (enc, "port"); + + for (i = 0; ret[i]; i++) + { + if (ret[i] > 127) + /* Restrict to ASCII. */ + scm_misc_error (NULL, "invalid character encoding ~s", + scm_list_1 (scm_from_latin1_string (enc))); + else + ret[i] = ascii_toupper (ret[i]); + } + + return ret; +} + + + /* The port kind table --- a dynamically resized array of port types. */ @@ -333,10 +383,14 @@ SCM_DEFINE (scm_current_error_port, "current-error-port", 0, 0, 0, SCM scm_current_warning_port (void) { - static SCM cwp_var = SCM_BOOL_F; + static SCM cwp_var = SCM_UNDEFINED; + static scm_i_pthread_mutex_t cwp_var_mutex + = SCM_I_PTHREAD_MUTEX_INITIALIZER; - if (scm_is_false (cwp_var)) - cwp_var = scm_c_private_lookup ("guile", "current-warning-port"); + scm_i_scm_pthread_mutex_lock (&cwp_var_mutex); + if (SCM_UNBNDP (cwp_var)) + cwp_var = scm_c_private_variable ("guile", "current-warning-port"); + scm_i_pthread_mutex_unlock (&cwp_var_mutex); return scm_call_0 (scm_variable_ref (cwp_var)); } @@ -553,7 +607,7 @@ do_free (void *body_data) /* Finalize the object (a port) pointed to by PTR. */ static void -finalize_port (GC_PTR ptr, GC_PTR data) +finalize_port (void *ptr, void *data) { SCM port = SCM_PACK_POINTER (ptr); @@ -603,16 +657,28 @@ scm_c_make_port_with_encoding (scm_t_bits tag, unsigned long mode_bits, entry->rw_active = SCM_PORT_NEITHER; entry->port = ret; entry->stream = stream; - entry->encoding = encoding ? scm_gc_strdup (encoding, "port") : NULL; - if (encoding && strcmp (encoding, "UTF-8") == 0) - entry->encoding_mode = SCM_PORT_ENCODING_MODE_UTF8; - else if (!encoding || strcmp (encoding, "ISO-8859-1") == 0) - entry->encoding_mode = SCM_PORT_ENCODING_MODE_LATIN1; + + if (encoding_matches (encoding, "UTF-8")) + { + entry->encoding_mode = SCM_PORT_ENCODING_MODE_UTF8; + entry->encoding = "UTF-8"; + } + else if (encoding_matches (encoding, "ISO-8859-1")) + { + entry->encoding_mode = SCM_PORT_ENCODING_MODE_LATIN1; + entry->encoding = "ISO-8859-1"; + } else - entry->encoding_mode = SCM_PORT_ENCODING_MODE_ICONV; + { + entry->encoding_mode = SCM_PORT_ENCODING_MODE_ICONV; + entry->encoding = canonicalize_encoding (encoding); + } + entry->ilseq_handler = handler; entry->iconv_descriptors = NULL; + entry->alist = SCM_EOL; + if (SCM_PORT_DESCRIPTOR (ret)->free) scm_i_set_finalizer (SCM2PTR (ret), finalize_port, NULL); @@ -627,7 +693,7 @@ scm_c_make_port (scm_t_bits tag, unsigned long mode_bits, scm_t_bits stream) { return scm_c_make_port_with_encoding (tag, mode_bits, scm_i_default_port_encoding (), - scm_i_get_conversion_strategy (SCM_BOOL_F), + scm_i_default_port_conversion_handler (), stream); } @@ -804,51 +870,112 @@ scm_i_set_default_port_encoding (const char *encoding) scm_misc_error (NULL, "tried to set port encoding fluid before it is initialized", SCM_EOL); - if (encoding == NULL - || !strcmp (encoding, "ASCII") - || !strcmp (encoding, "ANSI_X3.4-1968") - || !strcmp (encoding, "ISO-8859-1")) + if (encoding_matches (encoding, "ISO-8859-1")) scm_fluid_set_x (SCM_VARIABLE_REF (default_port_encoding_var), SCM_BOOL_F); else - { - SCM str; - size_t i; - - str = scm_from_latin1_string (encoding); - - /* Restrict to ASCII. */ - for (i = 0; encoding[i]; i++) - if (encoding[i] > 127) - scm_misc_error ("scm_i_set_default_port_encoding", - "invalid character encoding ~s", scm_list_1 (str)); - - scm_fluid_set_x (SCM_VARIABLE_REF (default_port_encoding_var), str); - } + scm_fluid_set_x (SCM_VARIABLE_REF (default_port_encoding_var), + scm_from_latin1_string (canonicalize_encoding (encoding))); } -/* Return the name of the default encoding for newly created ports; a - return value of NULL means "ISO-8859-1". */ +/* Return the name of the default encoding for newly created ports. */ const char * scm_i_default_port_encoding (void) { if (!scm_port_encoding_init) - return NULL; + return "ISO-8859-1"; else if (!scm_is_fluid (SCM_VARIABLE_REF (default_port_encoding_var))) - return NULL; + return "ISO-8859-1"; else { SCM encoding; encoding = scm_fluid_ref (SCM_VARIABLE_REF (default_port_encoding_var)); if (!scm_is_string (encoding)) - return NULL; + return "ISO-8859-1"; else return scm_i_string_chars (encoding); } } +/* A fluid specifying the default conversion handler for newly created + ports. Its value should be one of the symbols below. */ +SCM_VARIABLE (default_conversion_strategy_var, + "%default-port-conversion-strategy"); + +/* Whether the above fluid is initialized. */ +static int scm_conversion_strategy_init = 0; + +/* The possible conversion strategies. */ +SCM_SYMBOL (sym_error, "error"); +SCM_SYMBOL (sym_substitute, "substitute"); +SCM_SYMBOL (sym_escape, "escape"); + +/* Return the default failed encoding conversion policy for new created + ports. */ +scm_t_string_failed_conversion_handler +scm_i_default_port_conversion_handler (void) +{ + scm_t_string_failed_conversion_handler handler; + + if (!scm_conversion_strategy_init + || !scm_is_fluid (SCM_VARIABLE_REF (default_conversion_strategy_var))) + handler = SCM_FAILED_CONVERSION_QUESTION_MARK; + else + { + SCM fluid, value; + + fluid = SCM_VARIABLE_REF (default_conversion_strategy_var); + value = scm_fluid_ref (fluid); + + if (scm_is_eq (sym_substitute, value)) + handler = SCM_FAILED_CONVERSION_QUESTION_MARK; + else if (scm_is_eq (sym_escape, value)) + handler = SCM_FAILED_CONVERSION_ESCAPE_SEQUENCE; + else + /* Default to 'error also when the fluid's value is not one of + the valid symbols. */ + handler = SCM_FAILED_CONVERSION_ERROR; + } + + return handler; +} + +/* Use HANDLER as the default conversion strategy for future ports. */ +void +scm_i_set_default_port_conversion_handler (scm_t_string_failed_conversion_handler + handler) +{ + SCM strategy; + + if (!scm_conversion_strategy_init + || !scm_is_fluid (SCM_VARIABLE_REF (default_conversion_strategy_var))) + scm_misc_error (NULL, "tried to set conversion strategy fluid before it is initialized", + SCM_EOL); + + switch (handler) + { + case SCM_FAILED_CONVERSION_ERROR: + strategy = sym_error; + break; + + case SCM_FAILED_CONVERSION_ESCAPE_SEQUENCE: + strategy = sym_escape; + break; + + case SCM_FAILED_CONVERSION_QUESTION_MARK: + strategy = sym_substitute; + break; + + default: + abort (); + } + + scm_fluid_set_x (SCM_VARIABLE_REF (default_conversion_strategy_var), + strategy); +} + static void -finalize_iconv_descriptors (GC_PTR ptr, GC_PTR data) +finalize_iconv_descriptors (void *ptr, void *data) { close_iconv_descriptors (ptr); } @@ -962,13 +1089,13 @@ scm_i_set_port_encoding_x (SCM port, const char *encoding) pt = SCM_PTAB_ENTRY (port); prev = pt->iconv_descriptors; - if (encoding && strcmp (encoding, "UTF-8") == 0) + if (encoding_matches (encoding, "UTF-8")) { pt->encoding = "UTF-8"; pt->encoding_mode = SCM_PORT_ENCODING_MODE_UTF8; pt->iconv_descriptors = NULL; } - else if (!encoding || strcmp (encoding, "ISO-8859-1") == 0) + else if (encoding_matches (encoding, "ISO-8859-1")) { pt->encoding = "ISO-8859-1"; pt->encoding_mode = SCM_PORT_ENCODING_MODE_LATIN1; @@ -977,11 +1104,12 @@ scm_i_set_port_encoding_x (SCM port, const char *encoding) else { /* Open descriptors before mutating the port. */ + char *gc_encoding = canonicalize_encoding (encoding); pt->iconv_descriptors = - open_iconv_descriptors (encoding, + open_iconv_descriptors (gc_encoding, SCM_INPUT_PORT_P (port), SCM_OUTPUT_PORT_P (port)); - pt->encoding = scm_gc_strdup (encoding, "port"); + pt->encoding = gc_encoding; pt->encoding_mode = SCM_PORT_ENCODING_MODE_ICONV; } @@ -995,17 +1123,9 @@ SCM_DEFINE (scm_port_encoding, "port-encoding", 1, 0, 0, "uses to interpret its input and output.\n") #define FUNC_NAME s_scm_port_encoding { - scm_t_port *pt; - const char *enc; - SCM_VALIDATE_PORT (1, port); - pt = SCM_PTAB_ENTRY (port); - enc = pt->encoding; - if (enc) - return scm_from_latin1_string (pt->encoding); - else - return SCM_BOOL_F; + return scm_from_latin1_string (SCM_PTAB_ENTRY (port)->encoding); } #undef FUNC_NAME @@ -1031,65 +1151,6 @@ SCM_DEFINE (scm_set_port_encoding_x, "set-port-encoding!", 2, 0, 0, } #undef FUNC_NAME - -/* This determines how conversions handle unconvertible characters. */ -SCM_GLOBAL_VARIABLE (scm_conversion_strategy, "%port-conversion-strategy"); -static int scm_conversion_strategy_init = 0; - -scm_t_string_failed_conversion_handler -scm_i_get_conversion_strategy (SCM port) -{ - SCM encoding; - - if (scm_is_false (port)) - { - if (!scm_conversion_strategy_init - || !scm_is_fluid (SCM_VARIABLE_REF (scm_conversion_strategy))) - return SCM_FAILED_CONVERSION_QUESTION_MARK; - else - { - encoding = scm_fluid_ref (SCM_VARIABLE_REF (scm_conversion_strategy)); - if (scm_is_false (encoding)) - return SCM_FAILED_CONVERSION_QUESTION_MARK; - else - return (scm_t_string_failed_conversion_handler) scm_to_int (encoding); - } - } - else - { - scm_t_port *pt; - pt = SCM_PTAB_ENTRY (port); - return pt->ilseq_handler; - } - -} - -void -scm_i_set_conversion_strategy_x (SCM port, - scm_t_string_failed_conversion_handler handler) -{ - SCM strategy; - scm_t_port *pt; - - strategy = scm_from_int ((int) handler); - - if (scm_is_false (port)) - { - /* Set the default encoding for future ports. */ - if (!scm_conversion_strategy_init - || !scm_is_fluid (SCM_VARIABLE_REF (scm_conversion_strategy))) - scm_misc_error (NULL, "tried to set conversion strategy fluid before it is initialized", - SCM_EOL); - scm_fluid_set_x (SCM_VARIABLE_REF (scm_conversion_strategy), strategy); - } - else - { - /* Set the character encoding for this port. */ - pt = SCM_PTAB_ENTRY (port); - pt->ilseq_handler = handler; - } -} - SCM_DEFINE (scm_port_conversion_strategy, "port-conversion-strategy", 1, 0, 0, (SCM port), "Returns the behavior of the port when handling a character that\n" @@ -1109,12 +1170,18 @@ SCM_DEFINE (scm_port_conversion_strategy, "port-conversion-strategy", SCM_VALIDATE_OPPORT (1, port); - if (!scm_is_false (port)) + if (scm_is_false (port)) + h = scm_i_default_port_conversion_handler (); + else { + scm_t_port *pt; + SCM_VALIDATE_OPPORT (1, port); + pt = SCM_PTAB_ENTRY (port); + + h = pt->ilseq_handler; } - h = scm_i_get_conversion_strategy (port); if (h == SCM_FAILED_CONVERSION_ERROR) return scm_from_latin1_symbol ("error"); else if (h == SCM_FAILED_CONVERSION_QUESTION_MARK) @@ -1149,40 +1216,25 @@ SCM_DEFINE (scm_set_port_conversion_strategy_x, "set-port-conversion-strategy!", "this thread.\n") #define FUNC_NAME s_scm_set_port_conversion_strategy_x { - SCM err; - SCM qm; - SCM esc; + scm_t_string_failed_conversion_handler handler; - if (!scm_is_false (port)) - { - SCM_VALIDATE_OPPORT (1, port); - } - - err = scm_from_latin1_symbol ("error"); - if (scm_is_true (scm_eqv_p (sym, err))) - { - scm_i_set_conversion_strategy_x (port, SCM_FAILED_CONVERSION_ERROR); - return SCM_UNSPECIFIED; - } - - qm = scm_from_latin1_symbol ("substitute"); - if (scm_is_true (scm_eqv_p (sym, qm))) - { - scm_i_set_conversion_strategy_x (port, - SCM_FAILED_CONVERSION_QUESTION_MARK); - return SCM_UNSPECIFIED; - } + if (scm_is_eq (sym, sym_error)) + handler = SCM_FAILED_CONVERSION_ERROR; + else if (scm_is_eq (sym, sym_substitute)) + handler = SCM_FAILED_CONVERSION_QUESTION_MARK; + else if (scm_is_eq (sym, sym_escape)) + handler = SCM_FAILED_CONVERSION_ESCAPE_SEQUENCE; + else + SCM_MISC_ERROR ("unknown conversion strategy ~s", scm_list_1 (sym)); - esc = scm_from_latin1_symbol ("escape"); - if (scm_is_true (scm_eqv_p (sym, esc))) + if (scm_is_false (port)) + scm_i_set_default_port_conversion_handler (handler); + else { - scm_i_set_conversion_strategy_x (port, - SCM_FAILED_CONVERSION_ESCAPE_SEQUENCE); - return SCM_UNSPECIFIED; + SCM_VALIDATE_OPPORT (1, port); + SCM_PTAB_ENTRY (port)->ilseq_handler = handler; } - SCM_MISC_ERROR ("unknown conversion behavior ~s", scm_list_1 (sym)); - return SCM_UNSPECIFIED; } #undef FUNC_NAME @@ -1322,7 +1374,8 @@ scm_c_read_unlocked (SCM port, void *buffer, size_t size) requested number of bytes. (Note that a single scm_fill_input call does not guarantee to fill the whole of the port's read buffer.) */ - if (pt->read_buf_size <= 1 && pt->encoding == NULL) + if (pt->read_buf_size <= 1 + && pt->encoding_mode == SCM_PORT_ENCODING_MODE_LATIN1) { /* The port that we are reading from is unbuffered - i.e. does not have its own persistent buffer - but we have a buffer, @@ -1867,17 +1920,11 @@ scm_ungetc_unlocked (scm_t_wchar c, SCM port) scm_t_port *pt = SCM_PTAB_ENTRY (port); char *result; char result_buf[10]; - const char *encoding; size_t len; int i; - if (pt->encoding != NULL) - encoding = pt->encoding; - else - encoding = "ISO-8859-1"; - len = sizeof (result_buf); - result = u32_conv_to_encoding (encoding, + result = u32_conv_to_encoding (pt->encoding, (enum iconv_ilseq_handler) pt->ilseq_handler, (uint32_t *) &c, 1, NULL, result_buf, &len); @@ -2101,20 +2148,21 @@ scm_fill_input (SCM port) return ret; } -/* move up to read_len chars from port's putback and/or read buffers - into memory starting at dest. returns the number of chars moved. */ +/* Move up to READ_LEN bytes from PORT's putback and/or read buffers + into memory starting at DEST. Return the number of bytes moved. + PORT's line/column numbers are left unchanged. */ size_t scm_take_from_input_buffers (SCM port, char *dest, size_t read_len) { scm_t_port *pt = SCM_PTAB_ENTRY (port); - size_t chars_read = 0; + size_t bytes_read = 0; size_t from_buf = min (pt->read_end - pt->read_pos, read_len); if (from_buf > 0) { memcpy (dest, pt->read_pos, from_buf); pt->read_pos += from_buf; - chars_read += from_buf; + bytes_read += from_buf; read_len -= from_buf; dest += from_buf; } @@ -2127,10 +2175,11 @@ scm_take_from_input_buffers (SCM port, char *dest, size_t read_len) { memcpy (dest, pt->saved_read_pos, from_buf); pt->saved_read_pos += from_buf; - chars_read += from_buf; + bytes_read += from_buf; } } - return chars_read; + + return bytes_read; } /* Clear a port's read buffers, returning the contents. */ @@ -2359,7 +2408,7 @@ scm_lfwrite_substr (SCM str, size_t start, size_t end, SCM port) if (end == (size_t) -1) end = scm_i_string_length (str); - scm_display (scm_c_substring (str, start, end), port); + scm_i_display_substring (str, start, end, port); if (pt->rw_random) pt->rw_active = SCM_PORT_WRITE; @@ -2855,11 +2904,10 @@ scm_init_ports () scm_make_fluid_with_default (SCM_BOOL_F)); scm_port_encoding_init = 1; - SCM_VARIABLE_SET (scm_conversion_strategy, - scm_make_fluid_with_default - (scm_from_int ((int) SCM_FAILED_CONVERSION_QUESTION_MARK))); + SCM_VARIABLE_SET (default_conversion_strategy_var, + scm_make_fluid_with_default (sym_substitute)); scm_conversion_strategy_init = 1; - + /* These bindings are used when boot-9 turns `current-input-port' et al into parameters. They are then removed from the guile module. */ scm_c_define ("%current-input-port-fluid", cur_inport_fluid);