X-Git-Url: http://git.hcoop.net/bpt/guile.git/blobdiff_plain/14dcb5ccd282622abd71aed3e41235d90f89d39d..26d148066f9cb20e395a7dc4fefdf2e2ef0b2fb0:/libguile/ports.c diff --git a/libguile/ports.c b/libguile/ports.c index bad524e97..31d338efe 100644 --- a/libguile/ports.c +++ b/libguile/ports.c @@ -1,5 +1,5 @@ /* Copyright (C) 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2003, 2004, - * 2006, 2007, 2008, 2009, 2010, 2011 Free Software Foundation, Inc. + * 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013 Free Software Foundation, Inc. * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public License @@ -90,6 +90,56 @@ #endif +/* Port encodings are case-insensitive ASCII strings. */ +static char +ascii_toupper (char c) +{ + return (c < 'a' || c > 'z') ? c : ('A' + (c - 'a')); +} + +/* It is only necessary to use this function on encodings that come from + the user and have not been canonicalized yet. Encodings that are set + on ports or in the default encoding fluid are in upper-case, and can + be compared with strcmp. */ +static int +encoding_matches (const char *enc, const char *upper) +{ + if (!enc) + enc = "ISO-8859-1"; + + while (*enc) + if (ascii_toupper (*enc++) != *upper++) + return 0; + + return !*upper; +} + +static char* +canonicalize_encoding (const char *enc) +{ + char *ret; + int i; + + if (!enc) + return "ISO-8859-1"; + + ret = scm_gc_strdup (enc, "port"); + + for (i = 0; ret[i]; i++) + { + if (ret[i] > 127) + /* Restrict to ASCII. */ + scm_misc_error (NULL, "invalid character encoding ~s", + scm_list_1 (scm_from_latin1_string (enc))); + else + ret[i] = ascii_toupper (ret[i]); + } + + return ret; +} + + + /* The port kind table --- a dynamically resized array of port types. */ @@ -250,7 +300,9 @@ scm_set_port_close (scm_t_bits tc, int (*close) (SCM)) void scm_set_port_flush (scm_t_bits tc, void (*flush) (SCM port)) { - scm_c_port_type_ref (SCM_TC2PTOBNUM (tc))->flush = flush; + scm_t_ptob_descriptor *ptob = scm_c_port_type_ref (SCM_TC2PTOBNUM (tc)); + ptob->flush = flush; + ptob->flags |= SCM_PORT_TYPE_HAS_FLUSH; } void @@ -328,6 +380,21 @@ SCM_DEFINE (scm_current_error_port, "current-error-port", 0, 0, 0, } #undef FUNC_NAME +SCM +scm_current_warning_port (void) +{ + static SCM cwp_var = SCM_UNDEFINED; + static scm_i_pthread_mutex_t cwp_var_mutex + = SCM_I_PTHREAD_MUTEX_INITIALIZER; + + scm_i_scm_pthread_mutex_lock (&cwp_var_mutex); + if (SCM_UNBNDP (cwp_var)) + cwp_var = scm_c_private_variable ("guile", "current-warning-port"); + scm_i_pthread_mutex_unlock (&cwp_var_mutex); + + return scm_call_0 (scm_variable_ref (cwp_var)); +} + SCM_DEFINE (scm_current_load_port, "current-load-port", 0, 0, 0, (), "Return the current-load-port.\n" @@ -382,6 +449,19 @@ SCM_DEFINE (scm_set_current_error_port, "set-current-error-port", 1, 0, 0, } #undef FUNC_NAME + +SCM +scm_set_current_warning_port (SCM port) +{ + static SCM cwp_var = SCM_BOOL_F; + + if (scm_is_false (cwp_var)) + cwp_var = scm_c_private_lookup ("guile", "current-warning-port"); + + return scm_call_1 (scm_variable_ref (cwp_var), port); +} + + void scm_dynwind_current_input_port (SCM port) #define FUNC_NAME NULL @@ -444,7 +524,8 @@ scm_i_mode_bits_n (SCM modes) long scm_mode_bits (char *modes) { - return scm_i_mode_bits (scm_from_locale_string (modes)); + /* Valid characters are rw+a0l. So, use latin1. */ + return scm_i_mode_bits (scm_from_latin1_string (modes)); } long @@ -506,26 +587,27 @@ SCM scm_i_port_weak_set; /* Port finalization. */ -static void finalize_port (GC_PTR, GC_PTR); +struct do_free_data +{ + scm_t_ptob_descriptor *ptob; + SCM port; +}; -/* Register a finalizer for PORT. */ -static SCM_C_INLINE_KEYWORD void -register_finalizer_for_port (SCM port) +static SCM +do_free (void *body_data) { - GC_finalization_proc prev_finalizer; - GC_PTR prev_finalization_data; + struct do_free_data *data = body_data; + + /* `close' is for explicit `close-port' by user. `free' is for this + purpose: ports collected by the GC. */ + data->ptob->free (data->port); - /* Register a finalizer for PORT so that its iconv CDs get freed and - optionally its type's `free' function gets called. */ - GC_REGISTER_FINALIZER_NO_ORDER (SCM_HEAP_OBJECT_BASE (port), - finalize_port, 0, - &prev_finalizer, - &prev_finalization_data); + return SCM_BOOL_T; } /* Finalize the object (a port) pointed to by PTR. */ static void -finalize_port (GC_PTR ptr, GC_PTR data) +finalize_port (void *ptr, void *data) { SCM port = SCM_PACK_POINTER (ptr); @@ -534,31 +616,17 @@ finalize_port (GC_PTR ptr, GC_PTR data) if (SCM_OPENP (port)) { - if (SCM_REVEALED (port) > 0) - /* Keep "revealed" ports alive and re-register a finalizer. */ - register_finalizer_for_port (port); - else - { - scm_t_ptob_descriptor *ptob = SCM_PORT_DESCRIPTOR (port); - scm_t_port *entry; - - if (ptob->free) - /* Yes, I really do mean `free' rather than `close'. `close' - is for explicit `close-port' by user. */ - ptob->free (port); + struct do_free_data data; - entry = SCM_PTAB_ENTRY (port); + SCM_CLR_PORT_OPEN_FLAG (port); - if (entry->input_cd != (iconv_t) -1) - iconv_close (entry->input_cd); - if (entry->output_cd != (iconv_t) -1) - iconv_close (entry->output_cd); + data.ptob = SCM_PORT_DESCRIPTOR (port); + data.port = port; - SCM_SETSTREAM (port, 0); - SCM_CLR_PORT_OPEN_FLAG (port); + scm_internal_catch (SCM_BOOL_T, do_free, &data, + scm_handle_by_message_noexit, NULL); - scm_gc_ports_collected++; - } + scm_gc_ports_collected++; } } @@ -582,25 +650,40 @@ scm_c_make_port_with_encoding (scm_t_bits tag, unsigned long mode_bits, SCM_SET_CELL_WORD_1 (ret, (scm_t_bits) entry); SCM_SET_CELL_WORD_2 (ret, (scm_t_bits) ptob); -#if SCM_USE_PTHREAD_THREADS - scm_i_pthread_mutex_init (&entry->lock, scm_i_pthread_mutexattr_recursive); -#endif + entry->lock = scm_gc_malloc_pointerless (sizeof (*entry->lock), "port lock"); + scm_i_pthread_mutex_init (entry->lock, scm_i_pthread_mutexattr_recursive); entry->file_name = SCM_BOOL_F; entry->rw_active = SCM_PORT_NEITHER; entry->port = ret; entry->stream = stream; - entry->encoding = encoding ? scm_gc_strdup (encoding, "port") : NULL; - /* The conversion descriptors will be opened lazily. */ - entry->input_cd = (iconv_t) -1; - entry->output_cd = (iconv_t) -1; + + if (encoding_matches (encoding, "UTF-8")) + { + entry->encoding_mode = SCM_PORT_ENCODING_MODE_UTF8; + entry->encoding = "UTF-8"; + } + else if (encoding_matches (encoding, "ISO-8859-1")) + { + entry->encoding_mode = SCM_PORT_ENCODING_MODE_LATIN1; + entry->encoding = "ISO-8859-1"; + } + else + { + entry->encoding_mode = SCM_PORT_ENCODING_MODE_ICONV; + entry->encoding = canonicalize_encoding (encoding); + } + entry->ilseq_handler = handler; + entry->iconv_descriptors = NULL; - scm_weak_set_add_x (scm_i_port_weak_set, ret); + entry->alist = SCM_EOL; - /* For each new port, register a finalizer so that it port type's free - function can be invoked eventually. */ - register_finalizer_for_port (ret); + if (SCM_PORT_DESCRIPTOR (ret)->free) + scm_i_set_finalizer (SCM2PTR (ret), finalize_port, NULL); + + if (SCM_PORT_DESCRIPTOR (ret)->flags & SCM_PORT_TYPE_HAS_FLUSH) + scm_weak_set_add_x (scm_i_port_weak_set, ret); return ret; } @@ -610,7 +693,7 @@ scm_c_make_port (scm_t_bits tag, unsigned long mode_bits, scm_t_bits stream) { return scm_c_make_port_with_encoding (tag, mode_bits, scm_i_default_port_encoding (), - scm_i_get_conversion_strategy (SCM_BOOL_F), + scm_i_default_port_conversion_handler (), stream); } @@ -620,37 +703,6 @@ scm_new_port_table_entry (scm_t_bits tag) return scm_c_make_port (tag, 0, 0); } -/* Remove a port from the table and destroy it. */ - -static void -scm_i_remove_port (SCM port) -#define FUNC_NAME "scm_remove_port" -{ - scm_t_port *p; - - p = SCM_PTAB_ENTRY (port); - scm_port_non_buffer (p); - SCM_SETPTAB_ENTRY (port, 0); - scm_weak_set_remove_x (scm_i_port_weak_set, port); - - p->putback_buf = NULL; - p->putback_buf_size = 0; - - if (p->input_cd != (iconv_t) -1) - { - iconv_close (p->input_cd); - p->input_cd = (iconv_t) -1; - } - - if (p->output_cd != (iconv_t) -1) - { - iconv_close (p->output_cd); - p->output_cd = (iconv_t) -1; - } -} -#undef FUNC_NAME - - /* Predicates. */ @@ -715,6 +767,8 @@ SCM_DEFINE (scm_eof_object_p, "eof-object?", 1, 0, 0, /* Closing ports. */ +static void close_iconv_descriptors (scm_t_iconv_descriptors *id); + /* scm_close_port * Call the close operation on a port object. * see also scm_close. @@ -729,6 +783,7 @@ SCM_DEFINE (scm_close_port, "close-port", 1, 0, 0, "descriptors.") #define FUNC_NAME s_scm_close_port { + scm_t_port *p; int rv; port = SCM_COERCE_OUTPORT (port); @@ -736,12 +791,28 @@ SCM_DEFINE (scm_close_port, "close-port", 1, 0, 0, SCM_VALIDATE_PORT (1, port); if (SCM_CLOSEDP (port)) return SCM_BOOL_F; + + p = SCM_PTAB_ENTRY (port); + SCM_CLR_PORT_OPEN_FLAG (port); + + if (SCM_PORT_DESCRIPTOR (port)->flags & SCM_PORT_TYPE_HAS_FLUSH) + scm_weak_set_remove_x (scm_i_port_weak_set, port); + if (SCM_PORT_DESCRIPTOR (port)->close) + /* Note! This may throw an exception. Anything after this point + should be resilient to non-local exits. */ rv = SCM_PORT_DESCRIPTOR (port)->close (port); else rv = 0; - scm_i_remove_port (port); - SCM_CLR_PORT_OPEN_FLAG (port); + + if (p->iconv_descriptors) + { + /* If we don't get here, the iconv_descriptors finalizer will + clean up. */ + close_iconv_descriptors (p->iconv_descriptors); + p->iconv_descriptors = NULL; + } + return scm_from_bool (rv >= 0); } #undef FUNC_NAME @@ -799,121 +870,262 @@ scm_i_set_default_port_encoding (const char *encoding) scm_misc_error (NULL, "tried to set port encoding fluid before it is initialized", SCM_EOL); - if (encoding == NULL - || !strcmp (encoding, "ASCII") - || !strcmp (encoding, "ANSI_X3.4-1968") - || !strcmp (encoding, "ISO-8859-1")) + if (encoding_matches (encoding, "ISO-8859-1")) scm_fluid_set_x (SCM_VARIABLE_REF (default_port_encoding_var), SCM_BOOL_F); else scm_fluid_set_x (SCM_VARIABLE_REF (default_port_encoding_var), - scm_from_locale_string (encoding)); + scm_from_latin1_string (canonicalize_encoding (encoding))); } -/* Return the name of the default encoding for newly created ports; a - return value of NULL means "ISO-8859-1". */ +/* Return the name of the default encoding for newly created ports. */ const char * scm_i_default_port_encoding (void) { if (!scm_port_encoding_init) - return NULL; + return "ISO-8859-1"; else if (!scm_is_fluid (SCM_VARIABLE_REF (default_port_encoding_var))) - return NULL; + return "ISO-8859-1"; else { SCM encoding; encoding = scm_fluid_ref (SCM_VARIABLE_REF (default_port_encoding_var)); if (!scm_is_string (encoding)) - return NULL; + return "ISO-8859-1"; else return scm_i_string_chars (encoding); } } +/* A fluid specifying the default conversion handler for newly created + ports. Its value should be one of the symbols below. */ +SCM_VARIABLE (default_conversion_strategy_var, + "%default-port-conversion-strategy"); + +/* Whether the above fluid is initialized. */ +static int scm_conversion_strategy_init = 0; + +/* The possible conversion strategies. */ +SCM_SYMBOL (sym_error, "error"); +SCM_SYMBOL (sym_substitute, "substitute"); +SCM_SYMBOL (sym_escape, "escape"); + +/* Return the default failed encoding conversion policy for new created + ports. */ +scm_t_string_failed_conversion_handler +scm_i_default_port_conversion_handler (void) +{ + scm_t_string_failed_conversion_handler handler; + + if (!scm_conversion_strategy_init + || !scm_is_fluid (SCM_VARIABLE_REF (default_conversion_strategy_var))) + handler = SCM_FAILED_CONVERSION_QUESTION_MARK; + else + { + SCM fluid, value; + + fluid = SCM_VARIABLE_REF (default_conversion_strategy_var); + value = scm_fluid_ref (fluid); + + if (scm_is_eq (sym_substitute, value)) + handler = SCM_FAILED_CONVERSION_QUESTION_MARK; + else if (scm_is_eq (sym_escape, value)) + handler = SCM_FAILED_CONVERSION_ESCAPE_SEQUENCE; + else + /* Default to 'error also when the fluid's value is not one of + the valid symbols. */ + handler = SCM_FAILED_CONVERSION_ERROR; + } + + return handler; +} + +/* Use HANDLER as the default conversion strategy for future ports. */ void -scm_i_set_port_encoding_x (SCM port, const char *encoding) +scm_i_set_default_port_conversion_handler (scm_t_string_failed_conversion_handler + handler) { - scm_t_port *pt; - iconv_t new_input_cd, new_output_cd; + SCM strategy; - new_input_cd = (iconv_t) -1; - new_output_cd = (iconv_t) -1; + if (!scm_conversion_strategy_init + || !scm_is_fluid (SCM_VARIABLE_REF (default_conversion_strategy_var))) + scm_misc_error (NULL, "tried to set conversion strategy fluid before it is initialized", + SCM_EOL); - /* Set the character encoding for this port. */ - pt = SCM_PTAB_ENTRY (port); + switch (handler) + { + case SCM_FAILED_CONVERSION_ERROR: + strategy = sym_error; + break; + + case SCM_FAILED_CONVERSION_ESCAPE_SEQUENCE: + strategy = sym_escape; + break; + + case SCM_FAILED_CONVERSION_QUESTION_MARK: + strategy = sym_substitute; + break; + + default: + abort (); + } - if (encoding == NULL) - encoding = "ISO-8859-1"; + scm_fluid_set_x (SCM_VARIABLE_REF (default_conversion_strategy_var), + strategy); +} - if (pt->encoding != encoding) - pt->encoding = scm_gc_strdup (encoding, "port"); +static void +finalize_iconv_descriptors (void *ptr, void *data) +{ + close_iconv_descriptors (ptr); +} - /* If ENCODING is UTF-8, then no conversion descriptor is opened - because we do I/O ourselves. This saves 100+ KiB for each - descriptor. */ - if (strcmp (encoding, "UTF-8")) +static scm_t_iconv_descriptors * +open_iconv_descriptors (const char *encoding, int reading, int writing) +{ + scm_t_iconv_descriptors *id; + iconv_t input_cd, output_cd; + size_t i; + + input_cd = (iconv_t) -1; + output_cd = (iconv_t) -1; + + for (i = 0; encoding[i]; i++) + if (encoding[i] > 127) + goto invalid_encoding; + + if (reading) { - if (SCM_CELL_WORD_0 (port) & SCM_RDNG) - { - /* Open an input iconv conversion descriptor, from ENCODING - to UTF-8. We choose UTF-8, not UTF-32, because iconv - implementations can typically convert from anything to - UTF-8, but not to UTF-32 (see - ). */ - new_input_cd = iconv_open ("UTF-8", encoding); - if (new_input_cd == (iconv_t) -1) - goto invalid_encoding; - } + /* Open an input iconv conversion descriptor, from ENCODING + to UTF-8. We choose UTF-8, not UTF-32, because iconv + implementations can typically convert from anything to + UTF-8, but not to UTF-32 (see + ). */ + + /* Assume opening an iconv descriptor causes about 16 KB of + allocation. */ + scm_gc_register_allocation (16 * 1024); + + input_cd = iconv_open ("UTF-8", encoding); + if (input_cd == (iconv_t) -1) + goto invalid_encoding; + } - if (SCM_CELL_WORD_0 (port) & SCM_WRTNG) - { - new_output_cd = iconv_open (encoding, "UTF-8"); - if (new_output_cd == (iconv_t) -1) - { - if (new_input_cd != (iconv_t) -1) - iconv_close (new_input_cd); - goto invalid_encoding; - } - } + if (writing) + { + /* Assume opening an iconv descriptor causes about 16 KB of + allocation. */ + scm_gc_register_allocation (16 * 1024); + + output_cd = iconv_open (encoding, "UTF-8"); + if (output_cd == (iconv_t) -1) + { + if (input_cd != (iconv_t) -1) + iconv_close (input_cd); + goto invalid_encoding; + } } - if (pt->input_cd != (iconv_t) -1) - iconv_close (pt->input_cd); - if (pt->output_cd != (iconv_t) -1) - iconv_close (pt->output_cd); + id = scm_gc_malloc_pointerless (sizeof (*id), "iconv descriptors"); + id->input_cd = input_cd; + id->output_cd = output_cd; - pt->input_cd = new_input_cd; - pt->output_cd = new_output_cd; + /* Register a finalizer to close the descriptors. */ + scm_i_set_finalizer (id, finalize_iconv_descriptors, NULL); - return; + return id; invalid_encoding: { SCM err; - err = scm_from_locale_string (encoding); - scm_misc_error ("scm_i_set_port_encoding_x", + err = scm_from_latin1_string (encoding); + scm_misc_error ("open_iconv_descriptors", "invalid or unknown character encoding ~s", scm_list_1 (err)); } } +static void +close_iconv_descriptors (scm_t_iconv_descriptors *id) +{ + if (id->input_cd != (iconv_t) -1) + iconv_close (id->input_cd); + if (id->output_cd != (iconv_t) -1) + iconv_close (id->output_cd); + id->input_cd = (void *) -1; + id->output_cd = (void *) -1; +} + +scm_t_iconv_descriptors * +scm_i_port_iconv_descriptors (SCM port) +{ + scm_t_port *pt; + + pt = SCM_PTAB_ENTRY (port); + + assert (pt->encoding_mode == SCM_PORT_ENCODING_MODE_ICONV); + + if (!pt->iconv_descriptors) + { + if (!pt->encoding) + pt->encoding = "ISO-8859-1"; + pt->iconv_descriptors = + open_iconv_descriptors (pt->encoding, + SCM_INPUT_PORT_P (port), + SCM_OUTPUT_PORT_P (port)); + } + + return pt->iconv_descriptors; +} + +/* The name of the encoding is itself encoded in ASCII. */ +void +scm_i_set_port_encoding_x (SCM port, const char *encoding) +{ + scm_t_port *pt; + scm_t_iconv_descriptors *prev; + + /* Set the character encoding for this port. */ + pt = SCM_PTAB_ENTRY (port); + prev = pt->iconv_descriptors; + + if (encoding_matches (encoding, "UTF-8")) + { + pt->encoding = "UTF-8"; + pt->encoding_mode = SCM_PORT_ENCODING_MODE_UTF8; + pt->iconv_descriptors = NULL; + } + else if (encoding_matches (encoding, "ISO-8859-1")) + { + pt->encoding = "ISO-8859-1"; + pt->encoding_mode = SCM_PORT_ENCODING_MODE_LATIN1; + pt->iconv_descriptors = NULL; + } + else + { + /* Open descriptors before mutating the port. */ + char *gc_encoding = canonicalize_encoding (encoding); + pt->iconv_descriptors = + open_iconv_descriptors (gc_encoding, + SCM_INPUT_PORT_P (port), + SCM_OUTPUT_PORT_P (port)); + pt->encoding = gc_encoding; + pt->encoding_mode = SCM_PORT_ENCODING_MODE_ICONV; + } + + if (prev) + close_iconv_descriptors (prev); +} + SCM_DEFINE (scm_port_encoding, "port-encoding", 1, 0, 0, (SCM port), "Returns, as a string, the character encoding that @var{port}\n" "uses to interpret its input and output.\n") #define FUNC_NAME s_scm_port_encoding { - scm_t_port *pt; - const char *enc; - SCM_VALIDATE_PORT (1, port); - pt = SCM_PTAB_ENTRY (port); - enc = pt->encoding; - if (enc) - return scm_from_locale_string (pt->encoding); - else - return SCM_BOOL_F; + return scm_from_latin1_string (SCM_PTAB_ENTRY (port)->encoding); } #undef FUNC_NAME @@ -931,7 +1143,7 @@ SCM_DEFINE (scm_set_port_encoding_x, "set-port-encoding!", 2, 0, 0, SCM_VALIDATE_PORT (1, port); SCM_VALIDATE_STRING (2, enc); - enc_str = scm_to_locale_string (enc); + enc_str = scm_to_latin1_string (enc); scm_i_set_port_encoding_x (port, enc_str); free (enc_str); @@ -939,65 +1151,6 @@ SCM_DEFINE (scm_set_port_encoding_x, "set-port-encoding!", 2, 0, 0, } #undef FUNC_NAME - -/* This determines how conversions handle unconvertible characters. */ -SCM_GLOBAL_VARIABLE (scm_conversion_strategy, "%port-conversion-strategy"); -static int scm_conversion_strategy_init = 0; - -scm_t_string_failed_conversion_handler -scm_i_get_conversion_strategy (SCM port) -{ - SCM encoding; - - if (scm_is_false (port)) - { - if (!scm_conversion_strategy_init - || !scm_is_fluid (SCM_VARIABLE_REF (scm_conversion_strategy))) - return SCM_FAILED_CONVERSION_QUESTION_MARK; - else - { - encoding = scm_fluid_ref (SCM_VARIABLE_REF (scm_conversion_strategy)); - if (scm_is_false (encoding)) - return SCM_FAILED_CONVERSION_QUESTION_MARK; - else - return (scm_t_string_failed_conversion_handler) scm_to_int (encoding); - } - } - else - { - scm_t_port *pt; - pt = SCM_PTAB_ENTRY (port); - return pt->ilseq_handler; - } - -} - -void -scm_i_set_conversion_strategy_x (SCM port, - scm_t_string_failed_conversion_handler handler) -{ - SCM strategy; - scm_t_port *pt; - - strategy = scm_from_int ((int) handler); - - if (scm_is_false (port)) - { - /* Set the default encoding for future ports. */ - if (!scm_conversion_strategy_init - || !scm_is_fluid (SCM_VARIABLE_REF (scm_conversion_strategy))) - scm_misc_error (NULL, "tried to set conversion strategy fluid before it is initialized", - SCM_EOL); - scm_fluid_set_x (SCM_VARIABLE_REF (scm_conversion_strategy), strategy); - } - else - { - /* Set the character encoding for this port. */ - pt = SCM_PTAB_ENTRY (port); - pt->ilseq_handler = handler; - } -} - SCM_DEFINE (scm_port_conversion_strategy, "port-conversion-strategy", 1, 0, 0, (SCM port), "Returns the behavior of the port when handling a character that\n" @@ -1017,12 +1170,18 @@ SCM_DEFINE (scm_port_conversion_strategy, "port-conversion-strategy", SCM_VALIDATE_OPPORT (1, port); - if (!scm_is_false (port)) + if (scm_is_false (port)) + h = scm_i_default_port_conversion_handler (); + else { + scm_t_port *pt; + SCM_VALIDATE_OPPORT (1, port); + pt = SCM_PTAB_ENTRY (port); + + h = pt->ilseq_handler; } - h = scm_i_get_conversion_strategy (port); if (h == SCM_FAILED_CONVERSION_ERROR) return scm_from_latin1_symbol ("error"); else if (h == SCM_FAILED_CONVERSION_QUESTION_MARK) @@ -1057,40 +1216,25 @@ SCM_DEFINE (scm_set_port_conversion_strategy_x, "set-port-conversion-strategy!", "this thread.\n") #define FUNC_NAME s_scm_set_port_conversion_strategy_x { - SCM err; - SCM qm; - SCM esc; - - if (!scm_is_false (port)) - { - SCM_VALIDATE_OPPORT (1, port); - } - - err = scm_from_latin1_symbol ("error"); - if (scm_is_true (scm_eqv_p (sym, err))) - { - scm_i_set_conversion_strategy_x (port, SCM_FAILED_CONVERSION_ERROR); - return SCM_UNSPECIFIED; - } + scm_t_string_failed_conversion_handler handler; - qm = scm_from_latin1_symbol ("substitute"); - if (scm_is_true (scm_eqv_p (sym, qm))) - { - scm_i_set_conversion_strategy_x (port, - SCM_FAILED_CONVERSION_QUESTION_MARK); - return SCM_UNSPECIFIED; - } + if (scm_is_eq (sym, sym_error)) + handler = SCM_FAILED_CONVERSION_ERROR; + else if (scm_is_eq (sym, sym_substitute)) + handler = SCM_FAILED_CONVERSION_QUESTION_MARK; + else if (scm_is_eq (sym, sym_escape)) + handler = SCM_FAILED_CONVERSION_ESCAPE_SEQUENCE; + else + SCM_MISC_ERROR ("unknown conversion strategy ~s", scm_list_1 (sym)); - esc = scm_from_latin1_symbol ("escape"); - if (scm_is_true (scm_eqv_p (sym, esc))) + if (scm_is_false (port)) + scm_i_set_default_port_conversion_handler (handler); + else { - scm_i_set_conversion_strategy_x (port, - SCM_FAILED_CONVERSION_ESCAPE_SEQUENCE); - return SCM_UNSPECIFIED; + SCM_VALIDATE_OPPORT (1, port); + SCM_PTAB_ENTRY (port)->ilseq_handler = handler; } - SCM_MISC_ERROR ("unknown conversion behavior ~s", scm_list_1 (sym)); - return SCM_UNSPECIFIED; } #undef FUNC_NAME @@ -1101,90 +1245,29 @@ SCM_DEFINE (scm_set_port_conversion_strategy_x, "set-port-conversion-strategy!", /* The port lock. */ static void -lock_port (SCM port) +lock_port (void *mutex) { - scm_c_lock_port (port); + scm_i_pthread_mutex_lock (mutex); } static void -unlock_port (SCM port) +unlock_port (void *mutex) { - scm_c_unlock_port (port); + scm_i_pthread_mutex_unlock (mutex); } void scm_dynwind_lock_port (SCM port) +#define FUNC_NAME "dynwind-lock-port" { - scm_dynwind_unwind_handler_with_scm (unlock_port, port, - SCM_F_WIND_EXPLICITLY); - scm_dynwind_rewind_handler_with_scm (lock_port, port, - SCM_F_WIND_EXPLICITLY); -} - - - - -/* Revealed counts --- an oddity inherited from SCSH. */ - -/* Find a port in the table and return its revealed count. - Also used by the garbage collector. - */ -int -scm_revealed_count (SCM port) -{ - int ret; - - scm_c_lock_port (port); - ret = SCM_REVEALED (port); - scm_c_unlock_port (port); - - return ret; -} - -SCM_DEFINE (scm_port_revealed, "port-revealed", 1, 0, 0, - (SCM port), - "Return the revealed count for @var{port}.") -#define FUNC_NAME s_scm_port_revealed -{ - port = SCM_COERCE_OUTPORT (port); - SCM_VALIDATE_OPENPORT (1, port); - return scm_from_int (scm_revealed_count (port)); -} -#undef FUNC_NAME - -/* Set the revealed count for a port. */ -SCM_DEFINE (scm_set_port_revealed_x, "set-port-revealed!", 2, 0, 0, - (SCM port, SCM rcount), - "Sets the revealed count for a port to a given value.\n" - "The return value is unspecified.") -#define FUNC_NAME s_scm_set_port_revealed_x -{ - int r; - port = SCM_COERCE_OUTPORT (port); - SCM_VALIDATE_OPENPORT (1, port); - r = scm_to_int (rcount); - scm_c_lock_port (port); - SCM_REVEALED (port) = r; - scm_c_unlock_port (port); - return SCM_UNSPECIFIED; -} -#undef FUNC_NAME - -/* Set the revealed count for a port. */ -SCM_DEFINE (scm_adjust_port_revealed_x, "adjust-port-revealed!", 2, 0, 0, - (SCM port, SCM addend), - "Add @var{addend} to the revealed count of @var{port}.\n" - "The return value is unspecified.") -#define FUNC_NAME s_scm_set_port_revealed_x -{ - int a; - port = SCM_COERCE_OUTPORT (port); - SCM_VALIDATE_OPENPORT (1, port); - a = scm_to_int (addend); - scm_c_lock_port (port); - SCM_REVEALED (port) += a; - scm_c_unlock_port (port); - return SCM_UNSPECIFIED; + scm_i_pthread_mutex_t *lock; + SCM_VALIDATE_OPPORT (SCM_ARG1, port); + scm_c_lock_port (port, &lock); + if (lock) + { + scm_dynwind_unwind_handler (unlock_port, lock, SCM_F_WIND_EXPLICITLY); + scm_dynwind_rewind_handler (lock_port, lock, 0); + } } #undef FUNC_NAME @@ -1196,11 +1279,13 @@ SCM_DEFINE (scm_adjust_port_revealed_x, "adjust-port-revealed!", 2, 0, 0, int scm_get_byte_or_eof (SCM port) { + scm_i_pthread_mutex_t *lock; int ret; - scm_c_lock_port (port); + scm_c_lock_port (port, &lock); ret = scm_get_byte_or_eof_unlocked (port); - scm_c_unlock_port (port); + if (lock) + scm_i_pthread_mutex_unlock (lock); return ret; } @@ -1208,11 +1293,13 @@ scm_get_byte_or_eof (SCM port) int scm_peek_byte_or_eof (SCM port) { + scm_i_pthread_mutex_t *lock; int ret; - scm_c_lock_port (port); + scm_c_lock_port (port, &lock); ret = scm_peek_byte_or_eof_unlocked (port); - scm_c_unlock_port (port); + if (lock) + scm_i_pthread_mutex_unlock (lock); return ret; } @@ -1287,7 +1374,8 @@ scm_c_read_unlocked (SCM port, void *buffer, size_t size) requested number of bytes. (Note that a single scm_fill_input call does not guarantee to fill the whole of the port's read buffer.) */ - if (pt->read_buf_size <= 1 && pt->encoding == NULL) + if (pt->read_buf_size <= 1 + && pt->encoding_mode == SCM_PORT_ENCODING_MODE_LATIN1) { /* The port that we are reading from is unbuffered - i.e. does not have its own persistent buffer - but we have a buffer, @@ -1358,11 +1446,14 @@ scm_c_read_unlocked (SCM port, void *buffer, size_t size) size_t scm_c_read (SCM port, void *buffer, size_t size) { + scm_i_pthread_mutex_t *lock; size_t ret; - scm_c_lock_port (port); + scm_c_lock_port (port, &lock); ret = scm_c_read_unlocked (port, buffer, size); - scm_c_unlock_port (port); + if (lock) + scm_i_pthread_mutex_unlock (lock); + return ret; } @@ -1568,19 +1659,39 @@ get_utf8_codepoint (SCM port, scm_t_wchar *codepoint, #undef ASSERT_NOT_EOF } +/* Read an ISO-8859-1 codepoint (a byte) from PORT. On success, return + 0 and set *CODEPOINT to the codepoint that was read, fill BUF with + its UTF-8 representation, and set *LEN to the length in bytes. + Return `EILSEQ' on error. */ +static int +get_latin1_codepoint (SCM port, scm_t_wchar *codepoint, + char buf[SCM_MBCHAR_BUF_SIZE], size_t *len) +{ + *codepoint = scm_get_byte_or_eof_unlocked (port); + + if (*codepoint == EOF) + *len = 0; + else + { + *len = 1; + buf[0] = *codepoint; + } + return 0; +} + /* Likewise, read a byte sequence from PORT, passing it through its input conversion descriptor. */ static int get_iconv_codepoint (SCM port, scm_t_wchar *codepoint, char buf[SCM_MBCHAR_BUF_SIZE], size_t *len) { - scm_t_port *pt; + scm_t_iconv_descriptors *id; int err, byte_read; size_t bytes_consumed, output_size; char *output; scm_t_uint8 utf8_buf[SCM_MBCHAR_BUF_SIZE]; - pt = SCM_PTAB_ENTRY (port); + id = scm_i_port_iconv_descriptors (port); for (output_size = 0, output = (char *) utf8_buf, bytes_consumed = 0, err = 0; @@ -1610,8 +1721,7 @@ get_iconv_codepoint (SCM port, scm_t_wchar *codepoint, input_left = bytes_consumed + 1; output_left = sizeof (utf8_buf); - done = iconv (pt->input_cd, &input, &input_left, - &output, &output_left); + done = iconv (id->input_cd, &input, &input_left, &output, &output_left); if (done == (size_t) -1) { err = errno; @@ -1640,20 +1750,17 @@ get_iconv_codepoint (SCM port, scm_t_wchar *codepoint, with the byte representation of the codepoint in PORT's encoding, and set *LEN to the length in bytes of that representation. Return 0 on success and an errno value on error. */ -static int +static SCM_C_INLINE int get_codepoint (SCM port, scm_t_wchar *codepoint, char buf[SCM_MBCHAR_BUF_SIZE], size_t *len) { int err; scm_t_port *pt = SCM_PTAB_ENTRY (port); - if (pt->input_cd == (iconv_t) -1) - /* Initialize the conversion descriptors, if needed. */ - scm_i_set_port_encoding_x (port, pt->encoding); - - /* FIXME: In 2.1, add a flag to determine whether a port is UTF-8. */ - if (pt->input_cd == (iconv_t) -1) + if (pt->encoding_mode == SCM_PORT_ENCODING_MODE_UTF8) err = get_utf8_codepoint (port, codepoint, (scm_t_uint8 *) buf, len); + else if (pt->encoding_mode == SCM_PORT_ENCODING_MODE_LATIN1) + err = get_latin1_codepoint (port, codepoint, buf, len); else err = get_iconv_codepoint (port, codepoint, buf, len); @@ -1692,11 +1799,14 @@ scm_getc_unlocked (SCM port) scm_t_wchar scm_getc (SCM port) { + scm_i_pthread_mutex_t *lock; scm_t_wchar ret; - scm_c_lock_port (port); + scm_c_lock_port (port, &lock); ret = scm_getc_unlocked (port); - scm_c_unlock_port (port); + if (lock) + scm_i_pthread_mutex_unlock (lock); + return ret; } @@ -1795,9 +1905,12 @@ scm_unget_byte_unlocked (int c, SCM port) void scm_unget_byte (int c, SCM port) { - scm_c_lock_port (port); + scm_i_pthread_mutex_t *lock; + scm_c_lock_port (port, &lock); scm_unget_byte_unlocked (c, port); - scm_c_unlock_port (port); + if (lock) + scm_i_pthread_mutex_unlock (lock); + } void @@ -1807,17 +1920,11 @@ scm_ungetc_unlocked (scm_t_wchar c, SCM port) scm_t_port *pt = SCM_PTAB_ENTRY (port); char *result; char result_buf[10]; - const char *encoding; size_t len; int i; - if (pt->encoding != NULL) - encoding = pt->encoding; - else - encoding = "ISO-8859-1"; - len = sizeof (result_buf); - result = u32_conv_to_encoding (encoding, + result = u32_conv_to_encoding (pt->encoding, (enum iconv_ilseq_handler) pt->ilseq_handler, (uint32_t *) &c, 1, NULL, result_buf, &len); @@ -1848,9 +1955,12 @@ scm_ungetc_unlocked (scm_t_wchar c, SCM port) void scm_ungetc (scm_t_wchar c, SCM port) { - scm_c_lock_port (port); + scm_i_pthread_mutex_t *lock; + scm_c_lock_port (port, &lock); scm_ungetc_unlocked (c, port); - scm_c_unlock_port (port); + if (lock) + scm_i_pthread_mutex_unlock (lock); + } void @@ -1869,9 +1979,12 @@ scm_ungets_unlocked (const char *s, int n, SCM port) void scm_ungets (const char *s, int n, SCM port) { - scm_c_lock_port (port); + scm_i_pthread_mutex_t *lock; + scm_c_lock_port (port, &lock); scm_ungets_unlocked (s, n, port); - scm_c_unlock_port (port); + if (lock) + scm_i_pthread_mutex_unlock (lock); + } SCM_DEFINE (scm_peek_char, "peek-char", 0, 1, 0, @@ -1937,10 +2050,11 @@ SCM_DEFINE (scm_peek_char, "peek-char", 0, 1, 0, SCM_DEFINE (scm_unread_char, "unread-char", 1, 1, 0, (SCM cobj, SCM port), - "Place @var{char} in @var{port} so that it will be read by the\n" - "next read operation. If called multiple times, the unread characters\n" - "will be read again in last-in first-out order. If @var{port} is\n" - "not supplied, the current input port is used.") + "Place character @var{cobj} in @var{port} so that it will be\n" + "read by the next read operation. If called multiple times, the\n" + "unread characters will be read again in last-in first-out\n" + "order. If @var{port} is not supplied, the current input port\n" + "is used.") #define FUNC_NAME s_scm_unread_char { int c; @@ -2022,29 +2136,33 @@ scm_fill_input_unlocked (SCM port) int scm_fill_input (SCM port) { + scm_i_pthread_mutex_t *lock; int ret; - scm_c_lock_port (port); + scm_c_lock_port (port, &lock); ret = scm_fill_input_unlocked (port); - scm_c_unlock_port (port); + if (lock) + scm_i_pthread_mutex_unlock (lock); + return ret; } -/* move up to read_len chars from port's putback and/or read buffers - into memory starting at dest. returns the number of chars moved. */ +/* Move up to READ_LEN bytes from PORT's putback and/or read buffers + into memory starting at DEST. Return the number of bytes moved. + PORT's line/column numbers are left unchanged. */ size_t scm_take_from_input_buffers (SCM port, char *dest, size_t read_len) { scm_t_port *pt = SCM_PTAB_ENTRY (port); - size_t chars_read = 0; + size_t bytes_read = 0; size_t from_buf = min (pt->read_end - pt->read_pos, read_len); if (from_buf > 0) { memcpy (dest, pt->read_pos, from_buf); pt->read_pos += from_buf; - chars_read += from_buf; + bytes_read += from_buf; read_len -= from_buf; dest += from_buf; } @@ -2057,10 +2175,11 @@ scm_take_from_input_buffers (SCM port, char *dest, size_t read_len) { memcpy (dest, pt->saved_read_pos, from_buf); pt->saved_read_pos += from_buf; - chars_read += from_buf; + bytes_read += from_buf; } } - return chars_read; + + return bytes_read; } /* Clear a port's read buffers, returning the contents. */ @@ -2128,9 +2247,12 @@ scm_end_input_unlocked (SCM port) void scm_end_input (SCM port) { - scm_c_lock_port (port); + scm_i_pthread_mutex_t *lock; + scm_c_lock_port (port, &lock); scm_end_input_unlocked (port); - scm_c_unlock_port (port); + if (lock) + scm_i_pthread_mutex_unlock (lock); + } SCM_DEFINE (scm_force_output, "force-output", 0, 1, 0, @@ -2164,9 +2286,12 @@ scm_flush_unlocked (SCM port) void scm_flush (SCM port) { - scm_c_lock_port (port); + scm_i_pthread_mutex_t *lock; + scm_c_lock_port (port, &lock); scm_flush_unlocked (port); - scm_c_unlock_port (port); + if (lock) + scm_i_pthread_mutex_unlock (lock); + } @@ -2177,17 +2302,23 @@ scm_flush (SCM port) void scm_putc (char c, SCM port) { - scm_c_lock_port (port); + scm_i_pthread_mutex_t *lock; + scm_c_lock_port (port, &lock); scm_putc_unlocked (c, port); - scm_c_unlock_port (port); + if (lock) + scm_i_pthread_mutex_unlock (lock); + } void scm_puts (const char *s, SCM port) { - scm_c_lock_port (port); + scm_i_pthread_mutex_t *lock; + scm_c_lock_port (port, &lock); scm_puts_unlocked (s, port); - scm_c_unlock_port (port); + if (lock) + scm_i_pthread_mutex_unlock (lock); + } /* scm_c_write @@ -2224,9 +2355,12 @@ scm_c_write_unlocked (SCM port, const void *ptr, size_t size) void scm_c_write (SCM port, const void *ptr, size_t size) { - scm_c_lock_port (port); + scm_i_pthread_mutex_t *lock; + scm_c_lock_port (port, &lock); scm_c_write_unlocked (port, ptr, size); - scm_c_unlock_port (port); + if (lock) + scm_i_pthread_mutex_unlock (lock); + } /* scm_lfwrite @@ -2254,9 +2388,12 @@ scm_lfwrite_unlocked (const char *ptr, size_t size, SCM port) void scm_lfwrite (const char *ptr, size_t size, SCM port) { - scm_c_lock_port (port); + scm_i_pthread_mutex_t *lock; + scm_c_lock_port (port, &lock); scm_lfwrite_unlocked (ptr, size, port); - scm_c_unlock_port (port); + if (lock) + scm_i_pthread_mutex_unlock (lock); + } /* Write STR to PORT from START inclusive to END exclusive. */ @@ -2271,7 +2408,7 @@ scm_lfwrite_substr (SCM str, size_t start, size_t end, SCM port) if (end == (size_t) -1) end = scm_i_string_length (str); - scm_display (scm_c_substring (str, start, end), port); + scm_i_display_substring (str, start, end, port); if (pt->rw_random) pt->rw_active = SCM_PORT_WRITE; @@ -2331,7 +2468,7 @@ SCM_DEFINE (scm_char_ready_p, "char-ready?", 0, 1, 0, SCM_DEFINE (scm_seek, "seek", 3, 0, 0, (SCM fd_port, SCM offset, SCM whence), - "Sets the current position of @var{fd/port} to the integer\n" + "Sets the current position of @var{fd_port} to the integer\n" "@var{offset}, which is interpreted according to the value of\n" "@var{whence}.\n" "\n" @@ -2346,7 +2483,7 @@ SCM_DEFINE (scm_seek, "seek", 3, 0, 0, "@defvar SEEK_END\n" "Seek from the end of the file.\n" "@end defvar\n" - "If @var{fd/port} is a file descriptor, the underlying system\n" + "If @var{fd_port} is a file descriptor, the underlying system\n" "call is @code{lseek}. @var{port} may be a string port.\n" "\n" "The value returned is the new position in the file. This means\n" @@ -2420,8 +2557,9 @@ truncate (const char *file, off_t length) SCM_DEFINE (scm_truncate_file, "truncate-file", 1, 1, 0, (SCM object, SCM length), - "Truncate @var{file} to @var{length} bytes. @var{file} can be a\n" - "filename string, a port object, or an integer file descriptor.\n" + "Truncate file @var{object} to @var{length} bytes. @var{object}\n" + "can be a filename string, a port object, or an integer file\n" + "descriptor.\n" "The return value is unspecified.\n" "\n" "For a port or file descriptor @var{length} can be omitted, in\n" @@ -2656,9 +2794,9 @@ SCM_DEFINE (scm_port_for_each, "port-for-each", 1, 0, 0, "Apply @var{proc} to each port in the Guile port table\n" "in turn. The return value is unspecified. More specifically,\n" "@var{proc} is applied exactly once to every port that exists\n" - "in the system at the time @var{port-for-each} is invoked.\n" - "Changes to the port table while @var{port-for-each} is running\n" - "have no effect as far as @var{port-for-each} is concerned.") + "in the system at the time @code{port-for-each} is invoked.\n" + "Changes to the port table while @code{port-for-each} is running\n" + "have no effect as far as @code{port-for-each} is concerned.") #define FUNC_NAME s_scm_port_for_each { SCM_VALIDATE_PROC (1, proc); @@ -2762,15 +2900,19 @@ scm_init_ports () #include "libguile/ports.x" /* Use Latin-1 as the default port encoding. */ - SCM_VARIABLE_SET (default_port_encoding_var, scm_make_fluid ()); - scm_fluid_set_x (SCM_VARIABLE_REF (default_port_encoding_var), SCM_BOOL_F); + SCM_VARIABLE_SET (default_port_encoding_var, + scm_make_fluid_with_default (SCM_BOOL_F)); scm_port_encoding_init = 1; - SCM_VARIABLE_SET (scm_conversion_strategy, scm_make_fluid ()); - scm_fluid_set_x (SCM_VARIABLE_REF (scm_conversion_strategy), - scm_from_int ((int) SCM_FAILED_CONVERSION_QUESTION_MARK)); + SCM_VARIABLE_SET (default_conversion_strategy_var, + scm_make_fluid_with_default (sym_substitute)); scm_conversion_strategy_init = 1; - + + /* These bindings are used when boot-9 turns `current-input-port' et + al into parameters. They are then removed from the guile module. */ + scm_c_define ("%current-input-port-fluid", cur_inport_fluid); + scm_c_define ("%current-output-port-fluid", cur_outport_fluid); + scm_c_define ("%current-error-port-fluid", cur_errport_fluid); } /*