X-Git-Url: http://git.hcoop.net/bpt/guile.git/blobdiff_plain/d489998364e5f20e81cf7cf12998e694626d2f6f..2aed2667fce5ccb115667a36ffd368c4c3b6e9f4:/libguile/ports.c diff --git a/libguile/ports.c b/libguile/ports.c index e17ea069d..11142ba65 100644 --- a/libguile/ports.c +++ b/libguile/ports.c @@ -470,7 +470,8 @@ scm_i_mode_bits_n (SCM modes) long scm_mode_bits (char *modes) { - return scm_i_mode_bits (scm_from_locale_string (modes)); + /* Valid characters are rw+a0l. So, use latin1. */ + return scm_i_mode_bits (scm_from_latin1_string (modes)); } long @@ -552,7 +553,7 @@ do_free (void *body_data) /* Finalize the object (a port) pointed to by PTR. */ static void -finalize_port (GC_PTR ptr, GC_PTR data) +finalize_port (void *ptr, void *data) { SCM port = SCM_PACK_POINTER (ptr); @@ -626,7 +627,7 @@ scm_c_make_port (scm_t_bits tag, unsigned long mode_bits, scm_t_bits stream) { return scm_c_make_port_with_encoding (tag, mode_bits, scm_i_default_port_encoding (), - scm_i_get_conversion_strategy (SCM_BOOL_F), + scm_i_default_port_conversion_handler (), stream); } @@ -809,8 +810,20 @@ scm_i_set_default_port_encoding (const char *encoding) || !strcmp (encoding, "ISO-8859-1")) scm_fluid_set_x (SCM_VARIABLE_REF (default_port_encoding_var), SCM_BOOL_F); else - scm_fluid_set_x (SCM_VARIABLE_REF (default_port_encoding_var), - scm_from_locale_string (encoding)); + { + SCM str; + size_t i; + + str = scm_from_latin1_string (encoding); + + /* Restrict to ASCII. */ + for (i = 0; encoding[i]; i++) + if (encoding[i] > 127) + scm_misc_error ("scm_i_set_default_port_encoding", + "invalid character encoding ~s", scm_list_1 (str)); + + scm_fluid_set_x (SCM_VARIABLE_REF (default_port_encoding_var), str); + } } /* Return the name of the default encoding for newly created ports; a @@ -834,8 +847,85 @@ scm_i_default_port_encoding (void) } } +/* A fluid specifying the default conversion handler for newly created + ports. Its value should be one of the symbols below. */ +SCM_VARIABLE (default_conversion_strategy_var, + "%default-port-conversion-strategy"); + +/* Whether the above fluid is initialized. */ +static int scm_conversion_strategy_init = 0; + +/* The possible conversion strategies. */ +SCM_SYMBOL (sym_error, "error"); +SCM_SYMBOL (sym_substitute, "substitute"); +SCM_SYMBOL (sym_escape, "escape"); + +/* Return the default failed encoding conversion policy for new created + ports. */ +scm_t_string_failed_conversion_handler +scm_i_default_port_conversion_handler (void) +{ + scm_t_string_failed_conversion_handler handler; + + if (!scm_conversion_strategy_init + || !scm_is_fluid (SCM_VARIABLE_REF (default_conversion_strategy_var))) + handler = SCM_FAILED_CONVERSION_QUESTION_MARK; + else + { + SCM fluid, value; + + fluid = SCM_VARIABLE_REF (default_conversion_strategy_var); + value = scm_fluid_ref (fluid); + + if (scm_is_eq (sym_substitute, value)) + handler = SCM_FAILED_CONVERSION_QUESTION_MARK; + else if (scm_is_eq (sym_escape, value)) + handler = SCM_FAILED_CONVERSION_ESCAPE_SEQUENCE; + else + /* Default to 'error also when the fluid's value is not one of + the valid symbols. */ + handler = SCM_FAILED_CONVERSION_ERROR; + } + + return handler; +} + +/* Use HANDLER as the default conversion strategy for future ports. */ +void +scm_i_set_default_port_conversion_handler (scm_t_string_failed_conversion_handler + handler) +{ + SCM strategy; + + if (!scm_conversion_strategy_init + || !scm_is_fluid (SCM_VARIABLE_REF (default_conversion_strategy_var))) + scm_misc_error (NULL, "tried to set conversion strategy fluid before it is initialized", + SCM_EOL); + + switch (handler) + { + case SCM_FAILED_CONVERSION_ERROR: + strategy = sym_error; + break; + + case SCM_FAILED_CONVERSION_ESCAPE_SEQUENCE: + strategy = sym_escape; + break; + + case SCM_FAILED_CONVERSION_QUESTION_MARK: + strategy = sym_substitute; + break; + + default: + abort (); + } + + scm_fluid_set_x (SCM_VARIABLE_REF (default_conversion_strategy_var), + strategy); +} + static void -finalize_iconv_descriptors (GC_PTR ptr, GC_PTR data) +finalize_iconv_descriptors (void *ptr, void *data) { close_iconv_descriptors (ptr); } @@ -845,10 +935,15 @@ open_iconv_descriptors (const char *encoding, int reading, int writing) { scm_t_iconv_descriptors *id; iconv_t input_cd, output_cd; + size_t i; input_cd = (iconv_t) -1; output_cd = (iconv_t) -1; + for (i = 0; encoding[i]; i++) + if (encoding[i] > 127) + goto invalid_encoding; + if (reading) { /* Open an input iconv conversion descriptor, from ENCODING @@ -893,7 +988,7 @@ open_iconv_descriptors (const char *encoding, int reading, int writing) invalid_encoding: { SCM err; - err = scm_from_locale_string (encoding); + err = scm_from_latin1_string (encoding); scm_misc_error ("open_iconv_descriptors", "invalid or unknown character encoding ~s", scm_list_1 (err)); @@ -933,6 +1028,7 @@ scm_i_port_iconv_descriptors (SCM port) return pt->iconv_descriptors; } +/* The name of the encoding is itself encoded in ASCII. */ void scm_i_set_port_encoding_x (SCM port, const char *encoding) { @@ -984,7 +1080,7 @@ SCM_DEFINE (scm_port_encoding, "port-encoding", 1, 0, 0, pt = SCM_PTAB_ENTRY (port); enc = pt->encoding; if (enc) - return scm_from_locale_string (pt->encoding); + return scm_from_latin1_string (pt->encoding); else return SCM_BOOL_F; } @@ -1004,7 +1100,7 @@ SCM_DEFINE (scm_set_port_encoding_x, "set-port-encoding!", 2, 0, 0, SCM_VALIDATE_PORT (1, port); SCM_VALIDATE_STRING (2, enc); - enc_str = scm_to_locale_string (enc); + enc_str = scm_to_latin1_string (enc); scm_i_set_port_encoding_x (port, enc_str); free (enc_str); @@ -1012,65 +1108,6 @@ SCM_DEFINE (scm_set_port_encoding_x, "set-port-encoding!", 2, 0, 0, } #undef FUNC_NAME - -/* This determines how conversions handle unconvertible characters. */ -SCM_GLOBAL_VARIABLE (scm_conversion_strategy, "%port-conversion-strategy"); -static int scm_conversion_strategy_init = 0; - -scm_t_string_failed_conversion_handler -scm_i_get_conversion_strategy (SCM port) -{ - SCM encoding; - - if (scm_is_false (port)) - { - if (!scm_conversion_strategy_init - || !scm_is_fluid (SCM_VARIABLE_REF (scm_conversion_strategy))) - return SCM_FAILED_CONVERSION_QUESTION_MARK; - else - { - encoding = scm_fluid_ref (SCM_VARIABLE_REF (scm_conversion_strategy)); - if (scm_is_false (encoding)) - return SCM_FAILED_CONVERSION_QUESTION_MARK; - else - return (scm_t_string_failed_conversion_handler) scm_to_int (encoding); - } - } - else - { - scm_t_port *pt; - pt = SCM_PTAB_ENTRY (port); - return pt->ilseq_handler; - } - -} - -void -scm_i_set_conversion_strategy_x (SCM port, - scm_t_string_failed_conversion_handler handler) -{ - SCM strategy; - scm_t_port *pt; - - strategy = scm_from_int ((int) handler); - - if (scm_is_false (port)) - { - /* Set the default encoding for future ports. */ - if (!scm_conversion_strategy_init - || !scm_is_fluid (SCM_VARIABLE_REF (scm_conversion_strategy))) - scm_misc_error (NULL, "tried to set conversion strategy fluid before it is initialized", - SCM_EOL); - scm_fluid_set_x (SCM_VARIABLE_REF (scm_conversion_strategy), strategy); - } - else - { - /* Set the character encoding for this port. */ - pt = SCM_PTAB_ENTRY (port); - pt->ilseq_handler = handler; - } -} - SCM_DEFINE (scm_port_conversion_strategy, "port-conversion-strategy", 1, 0, 0, (SCM port), "Returns the behavior of the port when handling a character that\n" @@ -1090,12 +1127,18 @@ SCM_DEFINE (scm_port_conversion_strategy, "port-conversion-strategy", SCM_VALIDATE_OPPORT (1, port); - if (!scm_is_false (port)) + if (scm_is_false (port)) + h = scm_i_default_port_conversion_handler (); + else { + scm_t_port *pt; + SCM_VALIDATE_OPPORT (1, port); + pt = SCM_PTAB_ENTRY (port); + + h = pt->ilseq_handler; } - h = scm_i_get_conversion_strategy (port); if (h == SCM_FAILED_CONVERSION_ERROR) return scm_from_latin1_symbol ("error"); else if (h == SCM_FAILED_CONVERSION_QUESTION_MARK) @@ -1130,40 +1173,25 @@ SCM_DEFINE (scm_set_port_conversion_strategy_x, "set-port-conversion-strategy!", "this thread.\n") #define FUNC_NAME s_scm_set_port_conversion_strategy_x { - SCM err; - SCM qm; - SCM esc; - - if (!scm_is_false (port)) - { - SCM_VALIDATE_OPPORT (1, port); - } - - err = scm_from_latin1_symbol ("error"); - if (scm_is_true (scm_eqv_p (sym, err))) - { - scm_i_set_conversion_strategy_x (port, SCM_FAILED_CONVERSION_ERROR); - return SCM_UNSPECIFIED; - } + scm_t_string_failed_conversion_handler handler; - qm = scm_from_latin1_symbol ("substitute"); - if (scm_is_true (scm_eqv_p (sym, qm))) - { - scm_i_set_conversion_strategy_x (port, - SCM_FAILED_CONVERSION_QUESTION_MARK); - return SCM_UNSPECIFIED; - } + if (scm_is_eq (sym, sym_error)) + handler = SCM_FAILED_CONVERSION_ERROR; + else if (scm_is_eq (sym, sym_substitute)) + handler = SCM_FAILED_CONVERSION_QUESTION_MARK; + else if (scm_is_eq (sym, sym_escape)) + handler = SCM_FAILED_CONVERSION_ESCAPE_SEQUENCE; + else + SCM_MISC_ERROR ("unknown conversion strategy ~s", scm_list_1 (sym)); - esc = scm_from_latin1_symbol ("escape"); - if (scm_is_true (scm_eqv_p (sym, esc))) + if (scm_is_false (port)) + scm_i_set_default_port_conversion_handler (handler); + else { - scm_i_set_conversion_strategy_x (port, - SCM_FAILED_CONVERSION_ESCAPE_SEQUENCE); - return SCM_UNSPECIFIED; + SCM_VALIDATE_OPPORT (1, port); + SCM_PTAB_ENTRY (port)->ilseq_handler = handler; } - SCM_MISC_ERROR ("unknown conversion behavior ~s", scm_list_1 (sym)); - return SCM_UNSPECIFIED; } #undef FUNC_NAME @@ -1588,9 +1616,9 @@ get_utf8_codepoint (SCM port, scm_t_wchar *codepoint, } /* Read an ISO-8859-1 codepoint (a byte) from PORT. On success, return - *0 and set CODEPOINT to the codepoint that was read, fill BUF with - *its UTF-8 representation, and set *LEN to the length in bytes. - *Return `EILSEQ' on error. */ + 0 and set *CODEPOINT to the codepoint that was read, fill BUF with + its UTF-8 representation, and set *LEN to the length in bytes. + Return `EILSEQ' on error. */ static int get_latin1_codepoint (SCM port, scm_t_wchar *codepoint, char buf[SCM_MBCHAR_BUF_SIZE], size_t *len) @@ -1678,7 +1706,7 @@ get_iconv_codepoint (SCM port, scm_t_wchar *codepoint, with the byte representation of the codepoint in PORT's encoding, and set *LEN to the length in bytes of that representation. Return 0 on success and an errno value on error. */ -static int +static SCM_C_INLINE int get_codepoint (SCM port, scm_t_wchar *codepoint, char buf[SCM_MBCHAR_BUF_SIZE], size_t *len) { @@ -2082,20 +2110,21 @@ scm_fill_input (SCM port) return ret; } -/* move up to read_len chars from port's putback and/or read buffers - into memory starting at dest. returns the number of chars moved. */ +/* Move up to READ_LEN bytes from PORT's putback and/or read buffers + into memory starting at DEST. Return the number of bytes moved. + PORT's line/column numbers are left unchanged. */ size_t scm_take_from_input_buffers (SCM port, char *dest, size_t read_len) { scm_t_port *pt = SCM_PTAB_ENTRY (port); - size_t chars_read = 0; + size_t bytes_read = 0; size_t from_buf = min (pt->read_end - pt->read_pos, read_len); if (from_buf > 0) { memcpy (dest, pt->read_pos, from_buf); pt->read_pos += from_buf; - chars_read += from_buf; + bytes_read += from_buf; read_len -= from_buf; dest += from_buf; } @@ -2108,10 +2137,11 @@ scm_take_from_input_buffers (SCM port, char *dest, size_t read_len) { memcpy (dest, pt->saved_read_pos, from_buf); pt->saved_read_pos += from_buf; - chars_read += from_buf; + bytes_read += from_buf; } } - return chars_read; + + return bytes_read; } /* Clear a port's read buffers, returning the contents. */ @@ -2836,11 +2866,10 @@ scm_init_ports () scm_make_fluid_with_default (SCM_BOOL_F)); scm_port_encoding_init = 1; - SCM_VARIABLE_SET (scm_conversion_strategy, - scm_make_fluid_with_default - (scm_from_int ((int) SCM_FAILED_CONVERSION_QUESTION_MARK))); + SCM_VARIABLE_SET (default_conversion_strategy_var, + scm_make_fluid_with_default (sym_substitute)); scm_conversion_strategy_init = 1; - + /* These bindings are used when boot-9 turns `current-input-port' et al into parameters. They are then removed from the guile module. */ scm_c_define ("%current-input-port-fluid", cur_inport_fluid);