X-Git-Url: http://git.hcoop.net/bpt/guile.git/blobdiff_plain/6922d92f966a593e01dded92020a132ab15a71f6..26d148066f9cb20e395a7dc4fefdf2e2ef0b2fb0:/libguile/ports.c diff --git a/libguile/ports.c b/libguile/ports.c index 2f8c79217..31d338efe 100644 --- a/libguile/ports.c +++ b/libguile/ports.c @@ -1,5 +1,5 @@ /* Copyright (C) 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2003, 2004, - * 2006, 2007, 2008, 2009, 2010, 2011, 2012 Free Software Foundation, Inc. + * 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013 Free Software Foundation, Inc. * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public License @@ -56,7 +56,7 @@ #include "libguile/validate.h" #include "libguile/ports.h" #include "libguile/vectors.h" -#include "libguile/weaks.h" +#include "libguile/weak-set.h" #include "libguile/fluids.h" #include "libguile/eq.h" @@ -90,6 +90,56 @@ #endif +/* Port encodings are case-insensitive ASCII strings. */ +static char +ascii_toupper (char c) +{ + return (c < 'a' || c > 'z') ? c : ('A' + (c - 'a')); +} + +/* It is only necessary to use this function on encodings that come from + the user and have not been canonicalized yet. Encodings that are set + on ports or in the default encoding fluid are in upper-case, and can + be compared with strcmp. */ +static int +encoding_matches (const char *enc, const char *upper) +{ + if (!enc) + enc = "ISO-8859-1"; + + while (*enc) + if (ascii_toupper (*enc++) != *upper++) + return 0; + + return !*upper; +} + +static char* +canonicalize_encoding (const char *enc) +{ + char *ret; + int i; + + if (!enc) + return "ISO-8859-1"; + + ret = scm_gc_strdup (enc, "port"); + + for (i = 0; ret[i]; i++) + { + if (ret[i] > 127) + /* Restrict to ASCII. */ + scm_misc_error (NULL, "invalid character encoding ~s", + scm_list_1 (scm_from_latin1_string (enc))); + else + ret[i] = ascii_toupper (ret[i]); + } + + return ret; +} + + + /* The port kind table --- a dynamically resized array of port types. */ @@ -98,19 +148,79 @@ * Indexes into this table are used when generating type * tags for smobjects (if you know a tag you can get an index and conversely). */ -scm_t_ptob_descriptor *scm_ptobs = NULL; -long scm_numptob = 0; +static scm_t_ptob_descriptor **scm_ptobs = NULL; +static long scm_numptob = 0; /* Number of port types. */ +static long scm_ptobs_size = 0; /* Number of slots in the port type + table. */ +static scm_i_pthread_mutex_t scm_ptobs_lock = SCM_I_PTHREAD_MUTEX_INITIALIZER; + +long +scm_c_num_port_types (void) +{ + long ret; + + scm_i_pthread_mutex_lock (&scm_ptobs_lock); + ret = scm_numptob; + scm_i_pthread_mutex_unlock (&scm_ptobs_lock); + + return ret; +} -/* GC marker for a port with stream of SCM type. */ -SCM -scm_markstream (SCM ptr) +scm_t_ptob_descriptor* +scm_c_port_type_ref (long ptobnum) { - int openp; - openp = SCM_CELL_WORD_0 (ptr) & SCM_OPN; - if (openp) - return SCM_PACK (SCM_STREAM (ptr)); - else - return SCM_BOOL_F; + scm_t_ptob_descriptor *ret = NULL; + + scm_i_pthread_mutex_lock (&scm_ptobs_lock); + + if (0 <= ptobnum && ptobnum < scm_numptob) + ret = scm_ptobs[ptobnum]; + + scm_i_pthread_mutex_unlock (&scm_ptobs_lock); + + if (!ret) + scm_out_of_range ("scm_c_port_type_ref", scm_from_long (ptobnum)); + + return ret; +} + +long +scm_c_port_type_add_x (scm_t_ptob_descriptor *desc) +{ + long ret = -1; + + scm_i_pthread_mutex_lock (&scm_ptobs_lock); + + if (scm_numptob + 1 < SCM_I_MAX_PORT_TYPE_COUNT) + { + if (scm_numptob == scm_ptobs_size) + { + unsigned long old_size = scm_ptobs_size; + scm_t_ptob_descriptor **old_ptobs = scm_ptobs; + + /* Currently there are only 9 predefined port types, so one + resize will cover it. */ + scm_ptobs_size = old_size + 10; + + if (scm_ptobs_size >= SCM_I_MAX_PORT_TYPE_COUNT) + scm_ptobs_size = SCM_I_MAX_PORT_TYPE_COUNT; + + scm_ptobs = scm_gc_malloc (sizeof (*scm_ptobs) * scm_ptobs_size, + "scm_ptobs"); + + memcpy (scm_ptobs, old_ptobs, sizeof (*scm_ptobs) * scm_numptob); + } + + ret = scm_numptob++; + scm_ptobs[ret] = desc; + } + + scm_i_pthread_mutex_unlock (&scm_ptobs_lock); + + if (ret < 0) + scm_out_of_range ("scm_c_port_type_add_x", scm_from_long (scm_numptob)); + + return ret; } /* @@ -134,238 +244,93 @@ scm_make_port_type (char *name, int (*fill_input) (SCM port), void (*write) (SCM port, const void *data, size_t size)) { - char *tmp; - if (SCM_I_MAX_PORT_TYPE_COUNT - 1 <= scm_numptob) - goto ptoberr; - SCM_CRITICAL_SECTION_START; - tmp = (char *) scm_gc_realloc ((char *) scm_ptobs, - scm_numptob * sizeof (scm_t_ptob_descriptor), - (1 + scm_numptob) - * sizeof (scm_t_ptob_descriptor), - "port-type"); - if (tmp) - { - scm_ptobs = (scm_t_ptob_descriptor *) tmp; - - scm_ptobs[scm_numptob].name = name; - scm_ptobs[scm_numptob].mark = 0; - scm_ptobs[scm_numptob].free = NULL; - scm_ptobs[scm_numptob].print = scm_port_print; - scm_ptobs[scm_numptob].equalp = 0; - scm_ptobs[scm_numptob].close = 0; + scm_t_ptob_descriptor *desc; + long ptobnum; - scm_ptobs[scm_numptob].write = write; - scm_ptobs[scm_numptob].flush = flush_port_default; + desc = scm_gc_malloc_pointerless (sizeof (*desc), "port-type"); + memset (desc, 0, sizeof (*desc)); - scm_ptobs[scm_numptob].end_input = end_input_default; - scm_ptobs[scm_numptob].fill_input = fill_input; - scm_ptobs[scm_numptob].input_waiting = 0; + desc->name = name; + desc->print = scm_port_print; + desc->write = write; + desc->flush = flush_port_default; + desc->end_input = end_input_default; + desc->fill_input = fill_input; - scm_ptobs[scm_numptob].seek = 0; - scm_ptobs[scm_numptob].truncate = 0; + ptobnum = scm_c_port_type_add_x (desc); - scm_numptob++; - } - SCM_CRITICAL_SECTION_END; - if (!tmp) - { - ptoberr: - scm_memory_error ("scm_make_port_type"); - } - /* Make a class object if Goops is present */ + /* Make a class object if GOOPS is present. */ if (SCM_UNPACK (scm_port_class[0]) != 0) - scm_make_port_classes (scm_numptob - 1, SCM_PTOBNAME (scm_numptob - 1)); - return scm_tc7_port + (scm_numptob - 1) * 256; + scm_make_port_classes (ptobnum, name); + + return scm_tc7_port + ptobnum * 256; } void scm_set_port_mark (scm_t_bits tc, SCM (*mark) (SCM)) { - scm_ptobs[SCM_TC2PTOBNUM (tc)].mark = mark; + scm_c_port_type_ref (SCM_TC2PTOBNUM (tc))->mark = mark; } void scm_set_port_free (scm_t_bits tc, size_t (*free) (SCM)) { - scm_ptobs[SCM_TC2PTOBNUM (tc)].free = free; + scm_c_port_type_ref (SCM_TC2PTOBNUM (tc))->free = free; } void scm_set_port_print (scm_t_bits tc, int (*print) (SCM exp, SCM port, - scm_print_state *pstate)) + scm_print_state *pstate)) { - scm_ptobs[SCM_TC2PTOBNUM (tc)].print = print; + scm_c_port_type_ref (SCM_TC2PTOBNUM (tc))->print = print; } void scm_set_port_equalp (scm_t_bits tc, SCM (*equalp) (SCM, SCM)) { - scm_ptobs[SCM_TC2PTOBNUM (tc)].equalp = equalp; + scm_c_port_type_ref (SCM_TC2PTOBNUM (tc))->equalp = equalp; } void -scm_set_port_flush (scm_t_bits tc, void (*flush) (SCM port)) +scm_set_port_close (scm_t_bits tc, int (*close) (SCM)) { - scm_ptobs[SCM_TC2PTOBNUM (tc)].flush = flush; + scm_c_port_type_ref (SCM_TC2PTOBNUM (tc))->close = close; } void -scm_set_port_end_input (scm_t_bits tc, void (*end_input) (SCM port, int offset)) +scm_set_port_flush (scm_t_bits tc, void (*flush) (SCM port)) { - scm_ptobs[SCM_TC2PTOBNUM (tc)].end_input = end_input; + scm_t_ptob_descriptor *ptob = scm_c_port_type_ref (SCM_TC2PTOBNUM (tc)); + ptob->flush = flush; + ptob->flags |= SCM_PORT_TYPE_HAS_FLUSH; } void -scm_set_port_close (scm_t_bits tc, int (*close) (SCM)) +scm_set_port_end_input (scm_t_bits tc, void (*end_input) (SCM port, int offset)) { - scm_ptobs[SCM_TC2PTOBNUM (tc)].close = close; + scm_c_port_type_ref (SCM_TC2PTOBNUM (tc))->end_input = end_input; } void -scm_set_port_seek (scm_t_bits tc, - scm_t_off (*seek) (SCM, scm_t_off, int)) +scm_set_port_seek (scm_t_bits tc, scm_t_off (*seek) (SCM, scm_t_off, int)) { - scm_ptobs[SCM_TC2PTOBNUM (tc)].seek = seek; + scm_c_port_type_ref (SCM_TC2PTOBNUM (tc))->seek = seek; } void scm_set_port_truncate (scm_t_bits tc, void (*truncate) (SCM, scm_t_off)) { - scm_ptobs[SCM_TC2PTOBNUM (tc)].truncate = truncate; + scm_c_port_type_ref (SCM_TC2PTOBNUM (tc))->truncate = truncate; } void scm_set_port_input_waiting (scm_t_bits tc, int (*input_waiting) (SCM)) { - scm_ptobs[SCM_TC2PTOBNUM (tc)].input_waiting = input_waiting; + scm_c_port_type_ref (SCM_TC2PTOBNUM (tc))->input_waiting = input_waiting; } -SCM_DEFINE (scm_char_ready_p, "char-ready?", 0, 1, 0, - (SCM port), - "Return @code{#t} if a character is ready on input @var{port}\n" - "and return @code{#f} otherwise. If @code{char-ready?} returns\n" - "@code{#t} then the next @code{read-char} operation on\n" - "@var{port} is guaranteed not to hang. If @var{port} is a file\n" - "port at end of file then @code{char-ready?} returns @code{#t}.\n" - "\n" - "@code{char-ready?} exists to make it possible for a\n" - "program to accept characters from interactive ports without\n" - "getting stuck waiting for input. Any input editors associated\n" - "with such ports must make sure that characters whose existence\n" - "has been asserted by @code{char-ready?} cannot be rubbed out.\n" - "If @code{char-ready?} were to return @code{#f} at end of file,\n" - "a port at end of file would be indistinguishable from an\n" - "interactive port that has no ready characters.") -#define FUNC_NAME s_scm_char_ready_p -{ - scm_t_port *pt; - - if (SCM_UNBNDP (port)) - port = scm_current_input_port (); - /* It's possible to close the current input port, so validate even in - this case. */ - SCM_VALIDATE_OPINPORT (1, port); - - pt = SCM_PTAB_ENTRY (port); - - /* if the current read buffer is filled, or the - last pushed-back char has been read and the saved buffer is - filled, result is true. */ - if (pt->read_pos < pt->read_end - || (pt->read_buf == pt->putback_buf - && pt->saved_read_pos < pt->saved_read_end)) - return SCM_BOOL_T; - else - { - scm_t_ptob_descriptor *ptob = &scm_ptobs[SCM_PTOBNUM (port)]; - - if (ptob->input_waiting) - return scm_from_bool(ptob->input_waiting (port)); - else - return SCM_BOOL_T; - } -} -#undef FUNC_NAME - -/* Move up to READ_LEN bytes from PORT's putback and/or read buffers - into memory starting at DEST. Return the number of bytes moved. - PORT's line/column numbers are left unchanged. */ -size_t -scm_take_from_input_buffers (SCM port, char *dest, size_t read_len) -{ - scm_t_port *pt = SCM_PTAB_ENTRY (port); - size_t bytes_read = 0; - size_t from_buf = min (pt->read_end - pt->read_pos, read_len); - - if (from_buf > 0) - { - memcpy (dest, pt->read_pos, from_buf); - pt->read_pos += from_buf; - bytes_read += from_buf; - read_len -= from_buf; - dest += from_buf; - } - - /* if putback was active, try the real input buffer too. */ - if (pt->read_buf == pt->putback_buf) - { - from_buf = min (pt->saved_read_end - pt->saved_read_pos, read_len); - if (from_buf > 0) - { - memcpy (dest, pt->saved_read_pos, from_buf); - pt->saved_read_pos += from_buf; - bytes_read += from_buf; - } - } - - return bytes_read; -} - -/* Clear a port's read buffers, returning the contents. */ -SCM_DEFINE (scm_drain_input, "drain-input", 1, 0, 0, - (SCM port), - "This procedure clears a port's input buffers, similar\n" - "to the way that force-output clears the output buffer. The\n" - "contents of the buffers are returned as a single string, e.g.,\n" - "\n" - "@lisp\n" - "(define p (open-input-file ...))\n" - "(drain-input p) => empty string, nothing buffered yet.\n" - "(unread-char (read-char p) p)\n" - "(drain-input p) => initial chars from p, up to the buffer size.\n" - "@end lisp\n\n" - "Draining the buffers may be useful for cleanly finishing\n" - "buffered I/O so that the file descriptor can be used directly\n" - "for further input.") -#define FUNC_NAME s_scm_drain_input -{ - SCM result; - char *data; - scm_t_port *pt; - long count; - - SCM_VALIDATE_OPINPORT (1, port); - pt = SCM_PTAB_ENTRY (port); - - count = pt->read_end - pt->read_pos; - if (pt->read_buf == pt->putback_buf) - count += pt->saved_read_end - pt->saved_read_pos; - - if (count) - { - result = scm_i_make_string (count, &data, 0); - scm_take_from_input_buffers (port, data, count); - } - else - result = scm_nullstr; - - return result; -} -#undef FUNC_NAME - - /* Standard ports --- current input, output, error, and more(!). */ static SCM cur_inport_fluid = SCM_BOOL_F; @@ -418,10 +383,14 @@ SCM_DEFINE (scm_current_error_port, "current-error-port", 0, 0, 0, SCM scm_current_warning_port (void) { - static SCM cwp_var = SCM_BOOL_F; + static SCM cwp_var = SCM_UNDEFINED; + static scm_i_pthread_mutex_t cwp_var_mutex + = SCM_I_PTHREAD_MUTEX_INITIALIZER; - if (scm_is_false (cwp_var)) - cwp_var = scm_c_private_lookup ("guile", "current-warning-port"); + scm_i_scm_pthread_mutex_lock (&cwp_var_mutex); + if (SCM_UNBNDP (cwp_var)) + cwp_var = scm_c_private_variable ("guile", "current-warning-port"); + scm_i_pthread_mutex_unlock (&cwp_var_mutex); return scm_call_0 (scm_variable_ref (cwp_var)); } @@ -528,321 +497,278 @@ scm_i_dynwind_current_load_port (SCM port) scm_dynwind_fluid (cur_loadport_fluid, port); } - -/* The port table --- an array of pointers to ports. */ - -/* - We need a global registry of ports to flush them all at exit, and to - get all the ports matching a file descriptor. - */ -SCM scm_i_port_weak_hash; - -scm_i_pthread_mutex_t scm_i_port_table_mutex = SCM_I_PTHREAD_MUTEX_INITIALIZER; -/* Port finalization. */ +/* Retrieving a port's mode. */ -static void finalize_port (void *, void *); +/* Return the flags that characterize a port based on the mode + * string used to open a file for that port. + * + * See PORT FLAGS in scm.h + */ -/* Register a finalizer for PORT. */ -static SCM_C_INLINE_KEYWORD void -register_finalizer_for_port (SCM port) +static long +scm_i_mode_bits_n (SCM modes) { - /* Register a finalizer for PORT so that its iconv CDs get freed and - optionally its type's `free' function gets called. */ - scm_i_set_finalizer (SCM2PTR (port), finalize_port, NULL); + return (SCM_OPN + | (scm_i_string_contains_char (modes, 'r') + || scm_i_string_contains_char (modes, '+') ? SCM_RDNG : 0) + | (scm_i_string_contains_char (modes, 'w') + || scm_i_string_contains_char (modes, 'a') + || scm_i_string_contains_char (modes, '+') ? SCM_WRTNG : 0) + | (scm_i_string_contains_char (modes, '0') ? SCM_BUF0 : 0) + | (scm_i_string_contains_char (modes, 'l') ? SCM_BUFLINE : 0)); } -/* Finalize the object (a port) pointed to by PTR. */ -static void -finalize_port (void *ptr, void *data) +long +scm_mode_bits (char *modes) { - long port_type; - SCM port = PTR2SCM (ptr); - - if (!SCM_PORTP (port)) - abort (); - - if (SCM_OPENP (port)) - { - if (SCM_REVEALED (port) > 0) - /* Keep "revealed" ports alive and re-register a finalizer. */ - register_finalizer_for_port (port); - else - { - scm_t_port *entry; - - port_type = SCM_TC2PTOBNUM (SCM_CELL_TYPE (port)); - if (port_type >= scm_numptob) - abort (); - - if (scm_ptobs[port_type].free) - /* Yes, I really do mean `.free' rather than `.close'. `.close' - is for explicit `close-port' by user. */ - scm_ptobs[port_type].free (port); - - entry = SCM_PTAB_ENTRY (port); - - if (entry->input_cd != (iconv_t) -1) - iconv_close (entry->input_cd); - if (entry->output_cd != (iconv_t) -1) - iconv_close (entry->output_cd); - - SCM_SETSTREAM (port, 0); - SCM_CLR_PORT_OPEN_FLAG (port); - - scm_gc_ports_collected++; - } - } + /* Valid characters are rw+a0l. So, use latin1. */ + return scm_i_mode_bits (scm_from_latin1_string (modes)); } - - - - -/* This function is not and should not be thread safe. */ -SCM -scm_new_port_table_entry (scm_t_bits tag) -#define FUNC_NAME "scm_new_port_table_entry" +long +scm_i_mode_bits (SCM modes) { - /* - We initialize the cell to empty, this is in case scm_gc_calloc - triggers GC ; we don't want the GC to scan a half-finished Z. - */ - - SCM z = scm_cons (SCM_EOL, SCM_EOL); - scm_t_port *entry = (scm_t_port *) scm_gc_calloc (sizeof (scm_t_port), "port"); - const char *enc; - - entry->file_name = SCM_BOOL_F; - entry->rw_active = SCM_PORT_NEITHER; - entry->port = z; + long bits; - /* Initialize this port with the thread's current default - encoding. */ - enc = scm_i_default_port_encoding (); - entry->encoding = enc ? scm_gc_strdup (enc, "port") : NULL; + if (!scm_is_string (modes)) + scm_wrong_type_arg_msg (NULL, 0, modes, "string"); - /* The conversion descriptors will be opened lazily. */ - entry->input_cd = (iconv_t) -1; - entry->output_cd = (iconv_t) -1; + bits = scm_i_mode_bits_n (modes); + scm_remember_upto_here_1 (modes); + return bits; +} - entry->ilseq_handler = scm_i_default_port_conversion_handler (); +/* Return the mode flags from an open port. + * Some modes such as "append" are only used when opening + * a file and are not returned here. */ - SCM_SET_CELL_TYPE (z, tag); - SCM_SETPTAB_ENTRY (z, entry); - - scm_hashq_set_x (scm_i_port_weak_hash, z, SCM_BOOL_F); +SCM_DEFINE (scm_port_mode, "port-mode", 1, 0, 0, + (SCM port), + "Return the port modes associated with the open port @var{port}.\n" + "These will not necessarily be identical to the modes used when\n" + "the port was opened, since modes such as \"append\" which are\n" + "used only during port creation are not retained.") +#define FUNC_NAME s_scm_port_mode +{ + char modes[4]; + modes[0] = '\0'; - /* For each new port, register a finalizer so that it port type's free - function can be invoked eventually. */ - register_finalizer_for_port (z); + port = SCM_COERCE_OUTPORT (port); + SCM_VALIDATE_OPPORT (1, port); + if (SCM_CELL_WORD_0 (port) & SCM_RDNG) { + if (SCM_CELL_WORD_0 (port) & SCM_WRTNG) + strcpy (modes, "r+"); + else + strcpy (modes, "r"); + } + else if (SCM_CELL_WORD_0 (port) & SCM_WRTNG) + strcpy (modes, "w"); + if (SCM_CELL_WORD_0 (port) & SCM_BUF0) + strcat (modes, "0"); - return z; + return scm_from_latin1_string (modes); } #undef FUNC_NAME -#if SCM_ENABLE_DEPRECATED==1 -scm_t_port * -scm_add_to_port_table (SCM port) -{ - SCM z; - scm_t_port * pt; - scm_c_issue_deprecation_warning ("scm_add_to_port_table is deprecated."); + - scm_i_pthread_mutex_lock (&scm_i_port_table_mutex); - z = scm_new_port_table_entry (scm_tc7_port); - pt = SCM_PTAB_ENTRY(z); - pt->port = port; - SCM_SETCAR (z, SCM_EOL); - SCM_SETCDR (z, SCM_EOL); - SCM_SETPTAB_ENTRY (port, pt); - scm_i_pthread_mutex_unlock (&scm_i_port_table_mutex); +/* The port table --- a weak set of all ports. + + We need a global registry of ports to flush them all at exit, and to + get all the ports matching a file descriptor. */ +SCM scm_i_port_weak_set; - return pt; -} -#endif + -/* Remove a port from the table and destroy it. */ +/* Port finalization. */ -static void -scm_i_remove_port (SCM port) -#define FUNC_NAME "scm_remove_port" +struct do_free_data { - scm_t_port *p; + scm_t_ptob_descriptor *ptob; + SCM port; +}; - scm_i_scm_pthread_mutex_lock (&scm_i_port_table_mutex); +static SCM +do_free (void *body_data) +{ + struct do_free_data *data = body_data; - p = SCM_PTAB_ENTRY (port); - scm_port_non_buffer (p); - p->putback_buf = NULL; - p->putback_buf_size = 0; + /* `close' is for explicit `close-port' by user. `free' is for this + purpose: ports collected by the GC. */ + data->ptob->free (data->port); - if (p->input_cd != (iconv_t) -1) - { - iconv_close (p->input_cd); - p->input_cd = (iconv_t) -1; - } - - if (p->output_cd != (iconv_t) -1) + return SCM_BOOL_T; +} + +/* Finalize the object (a port) pointed to by PTR. */ +static void +finalize_port (void *ptr, void *data) +{ + SCM port = SCM_PACK_POINTER (ptr); + + if (!SCM_PORTP (port)) + abort (); + + if (SCM_OPENP (port)) { - iconv_close (p->output_cd); - p->output_cd = (iconv_t) -1; - } + struct do_free_data data; - SCM_SETPTAB_ENTRY (port, 0); + SCM_CLR_PORT_OPEN_FLAG (port); - scm_hashq_remove_x (scm_i_port_weak_hash, port); + data.ptob = SCM_PORT_DESCRIPTOR (port); + data.port = port; - scm_i_pthread_mutex_unlock (&scm_i_port_table_mutex); + scm_internal_catch (SCM_BOOL_T, do_free, &data, + scm_handle_by_message_noexit, NULL); + + scm_gc_ports_collected++; + } } -#undef FUNC_NAME -/* Functions for debugging. */ -#ifdef GUILE_DEBUG -SCM_DEFINE (scm_pt_size, "pt-size", 0, 0, 0, - (), - "Return the number of ports in the port table. @code{pt-size}\n" - "is only included in @code{--enable-guile-debug} builds.") -#define FUNC_NAME s_scm_pt_size -{ - return scm_from_int (SCM_HASHTABLE_N_ITEMS (scm_i_port_weak_hash)); -} -#undef FUNC_NAME -#endif + -void -scm_port_non_buffer (scm_t_port *pt) +SCM +scm_c_make_port_with_encoding (scm_t_bits tag, unsigned long mode_bits, + const char *encoding, + scm_t_string_failed_conversion_handler handler, + scm_t_bits stream) { - pt->read_pos = pt->read_buf = pt->read_end = &pt->shortbuf; - pt->write_buf = pt->write_pos = &pt->shortbuf; - pt->read_buf_size = pt->write_buf_size = 1; - pt->write_end = pt->write_buf + pt->write_buf_size; -} + SCM ret; + scm_t_port *entry; + scm_t_ptob_descriptor *ptob; - -/* Revealed counts --- an oddity inherited from SCSH. */ + entry = (scm_t_port *) scm_gc_calloc (sizeof (scm_t_port), "port"); + ptob = scm_c_port_type_ref (SCM_TC2PTOBNUM (tag)); -/* Find a port in the table and return its revealed count. - Also used by the garbage collector. - */ + ret = scm_words (tag | mode_bits, 3); + SCM_SET_CELL_WORD_1 (ret, (scm_t_bits) entry); + SCM_SET_CELL_WORD_2 (ret, (scm_t_bits) ptob); -int -scm_revealed_count (SCM port) -{ - return SCM_REVEALED(port); -} + entry->lock = scm_gc_malloc_pointerless (sizeof (*entry->lock), "port lock"); + scm_i_pthread_mutex_init (entry->lock, scm_i_pthread_mutexattr_recursive); + + entry->file_name = SCM_BOOL_F; + entry->rw_active = SCM_PORT_NEITHER; + entry->port = ret; + entry->stream = stream; + + if (encoding_matches (encoding, "UTF-8")) + { + entry->encoding_mode = SCM_PORT_ENCODING_MODE_UTF8; + entry->encoding = "UTF-8"; + } + else if (encoding_matches (encoding, "ISO-8859-1")) + { + entry->encoding_mode = SCM_PORT_ENCODING_MODE_LATIN1; + entry->encoding = "ISO-8859-1"; + } + else + { + entry->encoding_mode = SCM_PORT_ENCODING_MODE_ICONV; + entry->encoding = canonicalize_encoding (encoding); + } + entry->ilseq_handler = handler; + entry->iconv_descriptors = NULL; + entry->alist = SCM_EOL; -/* Return the revealed count for a port. */ + if (SCM_PORT_DESCRIPTOR (ret)->free) + scm_i_set_finalizer (SCM2PTR (ret), finalize_port, NULL); -SCM_DEFINE (scm_port_revealed, "port-revealed", 1, 0, 0, - (SCM port), - "Return the revealed count for @var{port}.") -#define FUNC_NAME s_scm_port_revealed -{ - port = SCM_COERCE_OUTPORT (port); - SCM_VALIDATE_OPENPORT (1, port); - return scm_from_int (scm_revealed_count (port)); + if (SCM_PORT_DESCRIPTOR (ret)->flags & SCM_PORT_TYPE_HAS_FLUSH) + scm_weak_set_add_x (scm_i_port_weak_set, ret); + + return ret; } -#undef FUNC_NAME -/* Set the revealed count for a port. */ -SCM_DEFINE (scm_set_port_revealed_x, "set-port-revealed!", 2, 0, 0, - (SCM port, SCM rcount), - "Sets the revealed count for a port to a given value.\n" - "The return value is unspecified.") -#define FUNC_NAME s_scm_set_port_revealed_x +SCM +scm_c_make_port (scm_t_bits tag, unsigned long mode_bits, scm_t_bits stream) { - port = SCM_COERCE_OUTPORT (port); - SCM_VALIDATE_OPENPORT (1, port); - SCM_REVEALED (port) = scm_to_int (rcount); - return SCM_UNSPECIFIED; + return scm_c_make_port_with_encoding (tag, mode_bits, + scm_i_default_port_encoding (), + scm_i_default_port_conversion_handler (), + stream); } -#undef FUNC_NAME +SCM +scm_new_port_table_entry (scm_t_bits tag) +{ + return scm_c_make_port (tag, 0, 0); +} -/* Retrieving a port's mode. */ -/* Return the flags that characterize a port based on the mode - * string used to open a file for that port. - * - * See PORT FLAGS in scm.h - */ +/* Predicates. */ -static long -scm_i_mode_bits_n (SCM modes) +SCM_DEFINE (scm_port_p, "port?", 1, 0, 0, + (SCM x), + "Return a boolean indicating whether @var{x} is a port.\n" + "Equivalent to @code{(or (input-port? @var{x}) (output-port?\n" + "@var{x}))}.") +#define FUNC_NAME s_scm_port_p { - return (SCM_OPN - | (scm_i_string_contains_char (modes, 'r') - || scm_i_string_contains_char (modes, '+') ? SCM_RDNG : 0) - | (scm_i_string_contains_char (modes, 'w') - || scm_i_string_contains_char (modes, 'a') - || scm_i_string_contains_char (modes, '+') ? SCM_WRTNG : 0) - | (scm_i_string_contains_char (modes, '0') ? SCM_BUF0 : 0) - | (scm_i_string_contains_char (modes, 'l') ? SCM_BUFLINE : 0)); + return scm_from_bool (SCM_PORTP (x)); } +#undef FUNC_NAME -long -scm_mode_bits (char *modes) +SCM_DEFINE (scm_input_port_p, "input-port?", 1, 0, 0, + (SCM x), + "Return @code{#t} if @var{x} is an input port, otherwise return\n" + "@code{#f}. Any object satisfying this predicate also satisfies\n" + "@code{port?}.") +#define FUNC_NAME s_scm_input_port_p { - return scm_i_mode_bits (scm_from_locale_string (modes)); + return scm_from_bool (SCM_INPUT_PORT_P (x)); } +#undef FUNC_NAME -long -scm_i_mode_bits (SCM modes) +SCM_DEFINE (scm_output_port_p, "output-port?", 1, 0, 0, + (SCM x), + "Return @code{#t} if @var{x} is an output port, otherwise return\n" + "@code{#f}. Any object satisfying this predicate also satisfies\n" + "@code{port?}.") +#define FUNC_NAME s_scm_output_port_p { - long bits; - - if (!scm_is_string (modes)) - scm_wrong_type_arg_msg (NULL, 0, modes, "string"); - - bits = scm_i_mode_bits_n (modes); - scm_remember_upto_here_1 (modes); - return bits; + x = SCM_COERCE_OUTPORT (x); + return scm_from_bool (SCM_OUTPUT_PORT_P (x)); } +#undef FUNC_NAME -/* Return the mode flags from an open port. - * Some modes such as "append" are only used when opening - * a file and are not returned here. */ - -SCM_DEFINE (scm_port_mode, "port-mode", 1, 0, 0, +SCM_DEFINE (scm_port_closed_p, "port-closed?", 1, 0, 0, (SCM port), - "Return the port modes associated with the open port @var{port}.\n" - "These will not necessarily be identical to the modes used when\n" - "the port was opened, since modes such as \"append\" which are\n" - "used only during port creation are not retained.") -#define FUNC_NAME s_scm_port_mode + "Return @code{#t} if @var{port} is closed or @code{#f} if it is\n" + "open.") +#define FUNC_NAME s_scm_port_closed_p { - char modes[4]; - modes[0] = '\0'; + SCM_VALIDATE_PORT (1, port); + return scm_from_bool (!SCM_OPPORTP (port)); +} +#undef FUNC_NAME - port = SCM_COERCE_OUTPORT (port); - SCM_VALIDATE_OPPORT (1, port); - if (SCM_CELL_WORD_0 (port) & SCM_RDNG) { - if (SCM_CELL_WORD_0 (port) & SCM_WRTNG) - strcpy (modes, "r+"); - else - strcpy (modes, "r"); - } - else if (SCM_CELL_WORD_0 (port) & SCM_WRTNG) - strcpy (modes, "w"); - if (SCM_CELL_WORD_0 (port) & SCM_BUF0) - strcat (modes, "0"); - return scm_from_locale_string (modes); +SCM_DEFINE (scm_eof_object_p, "eof-object?", 1, 0, 0, + (SCM x), + "Return @code{#t} if @var{x} is an end-of-file object; otherwise\n" + "return @code{#f}.") +#define FUNC_NAME s_scm_eof_object_p +{ + return scm_from_bool (SCM_EOF_OBJECT_P (x)); } #undef FUNC_NAME + /* Closing ports. */ +static void close_iconv_descriptors (scm_t_iconv_descriptors *id); + /* scm_close_port * Call the close operation on a port object. * see also scm_close. @@ -857,7 +783,7 @@ SCM_DEFINE (scm_close_port, "close-port", 1, 0, 0, "descriptors.") #define FUNC_NAME s_scm_close_port { - size_t i; + scm_t_port *p; int rv; port = SCM_COERCE_OUTPORT (port); @@ -865,13 +791,28 @@ SCM_DEFINE (scm_close_port, "close-port", 1, 0, 0, SCM_VALIDATE_PORT (1, port); if (SCM_CLOSEDP (port)) return SCM_BOOL_F; - i = SCM_PTOBNUM (port); - if (scm_ptobs[i].close) - rv = (scm_ptobs[i].close) (port); + + p = SCM_PTAB_ENTRY (port); + SCM_CLR_PORT_OPEN_FLAG (port); + + if (SCM_PORT_DESCRIPTOR (port)->flags & SCM_PORT_TYPE_HAS_FLUSH) + scm_weak_set_remove_x (scm_i_port_weak_set, port); + + if (SCM_PORT_DESCRIPTOR (port)->close) + /* Note! This may throw an exception. Anything after this point + should be resilient to non-local exits. */ + rv = SCM_PORT_DESCRIPTOR (port)->close (port); else rv = 0; - scm_i_remove_port (port); - SCM_CLR_PORT_OPEN_FLAG (port); + + if (p->iconv_descriptors) + { + /* If we don't get here, the iconv_descriptors finalizer will + clean up. */ + close_iconv_descriptors (p->iconv_descriptors); + p->iconv_descriptors = NULL; + } + return scm_from_bool (rv >= 0); } #undef FUNC_NAME @@ -907,287 +848,720 @@ SCM_DEFINE (scm_close_output_port, "close-output-port", 1, 0, 0, } #undef FUNC_NAME -static SCM -collect_keys (void *unused, SCM key, SCM value, SCM result) -{ - return scm_cons (key, result); -} - -void -scm_c_port_for_each (void (*proc)(void *data, SCM p), void *data) -{ - SCM ports; - - /* Copy out the port table as a list so that we get strong references - to all the values. */ - scm_i_pthread_mutex_lock (&scm_i_port_table_mutex); - ports = scm_internal_hash_fold (collect_keys, NULL, - SCM_EOL, scm_i_port_weak_hash); - scm_i_pthread_mutex_unlock (&scm_i_port_table_mutex); - for (; scm_is_pair (ports); ports = scm_cdr (ports)) - { - SCM p = scm_car (ports); - if (SCM_PORTP (p)) - proc (data, p); - } -} + -SCM_DEFINE (scm_port_for_each, "port-for-each", 1, 0, 0, - (SCM proc), - "Apply @var{proc} to each port in the Guile port table\n" - "in turn. The return value is unspecified. More specifically,\n" - "@var{proc} is applied exactly once to every port that exists\n" - "in the system at the time @code{port-for-each} is invoked.\n" - "Changes to the port table while @code{port-for-each} is running\n" - "have no effect as far as @code{port-for-each} is concerned.") -#define FUNC_NAME s_scm_port_for_each -{ - SCM ports; +/* Encoding characters to byte streams, and decoding byte streams to + characters. */ - SCM_VALIDATE_PROC (1, proc); +/* A fluid specifying the default encoding for newly created ports. If it is + a string, that is the encoding. If it is #f, it is in the "native" + (Latin-1) encoding. */ +SCM_VARIABLE (default_port_encoding_var, "%default-port-encoding"); - /* Copy out the port table as a list so that we get strong references - to all the values. */ - scm_i_pthread_mutex_lock (&scm_i_port_table_mutex); - ports = scm_internal_hash_fold (collect_keys, NULL, - SCM_EOL, scm_i_port_weak_hash); - scm_i_pthread_mutex_unlock (&scm_i_port_table_mutex); +static int scm_port_encoding_init = 0; - for (; scm_is_pair (ports); ports = scm_cdr (ports)) - if (SCM_PORTP (SCM_CAR (ports))) - scm_call_1 (proc, SCM_CAR (ports)); +/* Use ENCODING as the default encoding for future ports. */ +void +scm_i_set_default_port_encoding (const char *encoding) +{ + if (!scm_port_encoding_init + || !scm_is_fluid (SCM_VARIABLE_REF (default_port_encoding_var))) + scm_misc_error (NULL, "tried to set port encoding fluid before it is initialized", + SCM_EOL); - return SCM_UNSPECIFIED; + if (encoding_matches (encoding, "ISO-8859-1")) + scm_fluid_set_x (SCM_VARIABLE_REF (default_port_encoding_var), SCM_BOOL_F); + else + scm_fluid_set_x (SCM_VARIABLE_REF (default_port_encoding_var), + scm_from_latin1_string (canonicalize_encoding (encoding))); } -#undef FUNC_NAME +/* Return the name of the default encoding for newly created ports. */ +const char * +scm_i_default_port_encoding (void) +{ + if (!scm_port_encoding_init) + return "ISO-8859-1"; + else if (!scm_is_fluid (SCM_VARIABLE_REF (default_port_encoding_var))) + return "ISO-8859-1"; + else + { + SCM encoding; - -/* Utter miscellany. Gosh, we should clean this up some time. */ - -SCM_DEFINE (scm_input_port_p, "input-port?", 1, 0, 0, - (SCM x), - "Return @code{#t} if @var{x} is an input port, otherwise return\n" - "@code{#f}. Any object satisfying this predicate also satisfies\n" - "@code{port?}.") -#define FUNC_NAME s_scm_input_port_p -{ - return scm_from_bool (SCM_INPUT_PORT_P (x)); + encoding = scm_fluid_ref (SCM_VARIABLE_REF (default_port_encoding_var)); + if (!scm_is_string (encoding)) + return "ISO-8859-1"; + else + return scm_i_string_chars (encoding); + } } -#undef FUNC_NAME -SCM_DEFINE (scm_output_port_p, "output-port?", 1, 0, 0, - (SCM x), - "Return @code{#t} if @var{x} is an output port, otherwise return\n" - "@code{#f}. Any object satisfying this predicate also satisfies\n" - "@code{port?}.") -#define FUNC_NAME s_scm_output_port_p -{ - x = SCM_COERCE_OUTPORT (x); - return scm_from_bool (SCM_OUTPUT_PORT_P (x)); -} -#undef FUNC_NAME +/* A fluid specifying the default conversion handler for newly created + ports. Its value should be one of the symbols below. */ +SCM_VARIABLE (default_conversion_strategy_var, + "%default-port-conversion-strategy"); -SCM_DEFINE (scm_port_p, "port?", 1, 0, 0, - (SCM x), - "Return a boolean indicating whether @var{x} is a port.\n" - "Equivalent to @code{(or (input-port? @var{x}) (output-port?\n" - "@var{x}))}.") -#define FUNC_NAME s_scm_port_p -{ - return scm_from_bool (SCM_PORTP (x)); -} -#undef FUNC_NAME +/* Whether the above fluid is initialized. */ +static int scm_conversion_strategy_init = 0; -SCM_DEFINE (scm_port_closed_p, "port-closed?", 1, 0, 0, - (SCM port), - "Return @code{#t} if @var{port} is closed or @code{#f} if it is\n" - "open.") -#define FUNC_NAME s_scm_port_closed_p -{ - SCM_VALIDATE_PORT (1, port); - return scm_from_bool (!SCM_OPPORTP (port)); -} -#undef FUNC_NAME +/* The possible conversion strategies. */ +SCM_SYMBOL (sym_error, "error"); +SCM_SYMBOL (sym_substitute, "substitute"); +SCM_SYMBOL (sym_escape, "escape"); -SCM_DEFINE (scm_eof_object_p, "eof-object?", 1, 0, 0, - (SCM x), - "Return @code{#t} if @var{x} is an end-of-file object; otherwise\n" - "return @code{#f}.") -#define FUNC_NAME s_scm_eof_object_p +/* Return the default failed encoding conversion policy for new created + ports. */ +scm_t_string_failed_conversion_handler +scm_i_default_port_conversion_handler (void) { - return scm_from_bool(SCM_EOF_OBJECT_P (x)); -} -#undef FUNC_NAME + scm_t_string_failed_conversion_handler handler; -SCM_DEFINE (scm_force_output, "force-output", 0, 1, 0, - (SCM port), - "Flush the specified output port, or the current output port if @var{port}\n" - "is omitted. The current output buffer contents are passed to the\n" - "underlying port implementation (e.g., in the case of fports, the\n" - "data will be written to the file and the output buffer will be cleared.)\n" - "It has no effect on an unbuffered port.\n\n" - "The return value is unspecified.") -#define FUNC_NAME s_scm_force_output -{ - if (SCM_UNBNDP (port)) - port = scm_current_output_port (); + if (!scm_conversion_strategy_init + || !scm_is_fluid (SCM_VARIABLE_REF (default_conversion_strategy_var))) + handler = SCM_FAILED_CONVERSION_QUESTION_MARK; else { - port = SCM_COERCE_OUTPORT (port); - SCM_VALIDATE_OPOUTPORT (1, port); - } - scm_flush (port); - return SCM_UNSPECIFIED; -} -#undef FUNC_NAME + SCM fluid, value; + fluid = SCM_VARIABLE_REF (default_conversion_strategy_var); + value = scm_fluid_ref (fluid); -static void -flush_output_port (void *closure, SCM port) -{ - if (SCM_OPOUTPORTP (port)) - scm_flush (port); -} + if (scm_is_eq (sym_substitute, value)) + handler = SCM_FAILED_CONVERSION_QUESTION_MARK; + else if (scm_is_eq (sym_escape, value)) + handler = SCM_FAILED_CONVERSION_ESCAPE_SEQUENCE; + else + /* Default to 'error also when the fluid's value is not one of + the valid symbols. */ + handler = SCM_FAILED_CONVERSION_ERROR; + } -SCM_DEFINE (scm_flush_all_ports, "flush-all-ports", 0, 0, 0, - (), - "Equivalent to calling @code{force-output} on\n" - "all open output ports. The return value is unspecified.") -#define FUNC_NAME s_scm_flush_all_ports -{ - scm_c_port_for_each (&flush_output_port, NULL); - return SCM_UNSPECIFIED; + return handler; } -#undef FUNC_NAME -SCM_DEFINE (scm_read_char, "read-char", 0, 1, 0, - (SCM port), - "Return the next character available from @var{port}, updating\n" - "@var{port} to point to the following character. If no more\n" - "characters are available, the end-of-file object is returned.\n" - "\n" - "When @var{port}'s data cannot be decoded according to its\n" - "character encoding, a @code{decoding-error} is raised and\n" - "@var{port} points past the erroneous byte sequence.\n") -#define FUNC_NAME s_scm_read_char +/* Use HANDLER as the default conversion strategy for future ports. */ +void +scm_i_set_default_port_conversion_handler (scm_t_string_failed_conversion_handler + handler) { - scm_t_wchar c; - if (SCM_UNBNDP (port)) - port = scm_current_input_port (); - SCM_VALIDATE_OPINPORT (1, port); - c = scm_getc (port); - if (EOF == c) - return SCM_EOF_VAL; - return SCM_MAKE_CHAR (c); -} -#undef FUNC_NAME + SCM strategy; -/* Update the line and column number of PORT after consumption of C. */ -static inline void -update_port_lf (scm_t_wchar c, SCM port) -{ - switch (c) + if (!scm_conversion_strategy_init + || !scm_is_fluid (SCM_VARIABLE_REF (default_conversion_strategy_var))) + scm_misc_error (NULL, "tried to set conversion strategy fluid before it is initialized", + SCM_EOL); + + switch (handler) { - case '\a': - case EOF: - break; - case '\b': - SCM_DECCOL (port); - break; - case '\n': - SCM_INCLINE (port); + case SCM_FAILED_CONVERSION_ERROR: + strategy = sym_error; break; - case '\r': - SCM_ZEROCOL (port); + + case SCM_FAILED_CONVERSION_ESCAPE_SEQUENCE: + strategy = sym_escape; break; - case '\t': - SCM_TABCOL (port); + + case SCM_FAILED_CONVERSION_QUESTION_MARK: + strategy = sym_substitute; break; + default: - SCM_INCCOL (port); - break; + abort (); } + + scm_fluid_set_x (SCM_VARIABLE_REF (default_conversion_strategy_var), + strategy); } -#define SCM_MBCHAR_BUF_SIZE (4) +static void +finalize_iconv_descriptors (void *ptr, void *data) +{ + close_iconv_descriptors (ptr); +} -/* Convert the SIZE-byte UTF-8 sequence in UTF8_BUF to a codepoint. - UTF8_BUF is assumed to contain a valid UTF-8 sequence. */ -static scm_t_wchar -utf8_to_codepoint (const scm_t_uint8 *utf8_buf, size_t size) +static scm_t_iconv_descriptors * +open_iconv_descriptors (const char *encoding, int reading, int writing) { - scm_t_wchar codepoint; + scm_t_iconv_descriptors *id; + iconv_t input_cd, output_cd; + size_t i; - if (utf8_buf[0] <= 0x7f) - { - assert (size == 1); - codepoint = utf8_buf[0]; - } - else if ((utf8_buf[0] & 0xe0) == 0xc0) - { - assert (size == 2); - codepoint = ((scm_t_wchar) utf8_buf[0] & 0x1f) << 6UL - | (utf8_buf[1] & 0x3f); - } - else if ((utf8_buf[0] & 0xf0) == 0xe0) + input_cd = (iconv_t) -1; + output_cd = (iconv_t) -1; + + for (i = 0; encoding[i]; i++) + if (encoding[i] > 127) + goto invalid_encoding; + + if (reading) { - assert (size == 3); - codepoint = ((scm_t_wchar) utf8_buf[0] & 0x0f) << 12UL - | ((scm_t_wchar) utf8_buf[1] & 0x3f) << 6UL - | (utf8_buf[2] & 0x3f); + /* Open an input iconv conversion descriptor, from ENCODING + to UTF-8. We choose UTF-8, not UTF-32, because iconv + implementations can typically convert from anything to + UTF-8, but not to UTF-32 (see + ). */ + + /* Assume opening an iconv descriptor causes about 16 KB of + allocation. */ + scm_gc_register_allocation (16 * 1024); + + input_cd = iconv_open ("UTF-8", encoding); + if (input_cd == (iconv_t) -1) + goto invalid_encoding; } - else + + if (writing) { - assert (size == 4); - codepoint = ((scm_t_wchar) utf8_buf[0] & 0x07) << 18UL - | ((scm_t_wchar) utf8_buf[1] & 0x3f) << 12UL - | ((scm_t_wchar) utf8_buf[2] & 0x3f) << 6UL - | (utf8_buf[3] & 0x3f); + /* Assume opening an iconv descriptor causes about 16 KB of + allocation. */ + scm_gc_register_allocation (16 * 1024); + + output_cd = iconv_open (encoding, "UTF-8"); + if (output_cd == (iconv_t) -1) + { + if (input_cd != (iconv_t) -1) + iconv_close (input_cd); + goto invalid_encoding; + } } - return codepoint; + id = scm_gc_malloc_pointerless (sizeof (*id), "iconv descriptors"); + id->input_cd = input_cd; + id->output_cd = output_cd; + + /* Register a finalizer to close the descriptors. */ + scm_i_set_finalizer (id, finalize_iconv_descriptors, NULL); + + return id; + + invalid_encoding: + { + SCM err; + err = scm_from_latin1_string (encoding); + scm_misc_error ("open_iconv_descriptors", + "invalid or unknown character encoding ~s", + scm_list_1 (err)); + } } -/* Read a UTF-8 sequence from PORT. On success, return 0 and set - *CODEPOINT to the codepoint that was read, fill BUF with its UTF-8 - representation, and set *LEN to the length in bytes. Return - `EILSEQ' on error. */ -static int -get_utf8_codepoint (SCM port, scm_t_wchar *codepoint, - scm_t_uint8 buf[SCM_MBCHAR_BUF_SIZE], size_t *len) +static void +close_iconv_descriptors (scm_t_iconv_descriptors *id) { -#define ASSERT_NOT_EOF(b) \ - if (SCM_UNLIKELY ((b) == EOF)) \ - goto invalid_seq -#define CONSUME_PEEKED_BYTE() \ - pt->read_pos++ + if (id->input_cd != (iconv_t) -1) + iconv_close (id->input_cd); + if (id->output_cd != (iconv_t) -1) + iconv_close (id->output_cd); + id->input_cd = (void *) -1; + id->output_cd = (void *) -1; +} - int byte; +scm_t_iconv_descriptors * +scm_i_port_iconv_descriptors (SCM port) +{ scm_t_port *pt; - *len = 0; pt = SCM_PTAB_ENTRY (port); - byte = scm_get_byte_or_eof (port); - if (byte == EOF) + assert (pt->encoding_mode == SCM_PORT_ENCODING_MODE_ICONV); + + if (!pt->iconv_descriptors) { - *codepoint = EOF; - return 0; + if (!pt->encoding) + pt->encoding = "ISO-8859-1"; + pt->iconv_descriptors = + open_iconv_descriptors (pt->encoding, + SCM_INPUT_PORT_P (port), + SCM_OUTPUT_PORT_P (port)); } - buf[0] = (scm_t_uint8) byte; - *len = 1; - - if (buf[0] <= 0x7f) - /* 1-byte form. */ - *codepoint = buf[0]; - else if (buf[0] >= 0xc2 && buf[0] <= 0xdf) - { - /* 2-byte form. */ - byte = scm_peek_byte_or_eof (port); - ASSERT_NOT_EOF (byte); + return pt->iconv_descriptors; +} + +/* The name of the encoding is itself encoded in ASCII. */ +void +scm_i_set_port_encoding_x (SCM port, const char *encoding) +{ + scm_t_port *pt; + scm_t_iconv_descriptors *prev; + + /* Set the character encoding for this port. */ + pt = SCM_PTAB_ENTRY (port); + prev = pt->iconv_descriptors; + + if (encoding_matches (encoding, "UTF-8")) + { + pt->encoding = "UTF-8"; + pt->encoding_mode = SCM_PORT_ENCODING_MODE_UTF8; + pt->iconv_descriptors = NULL; + } + else if (encoding_matches (encoding, "ISO-8859-1")) + { + pt->encoding = "ISO-8859-1"; + pt->encoding_mode = SCM_PORT_ENCODING_MODE_LATIN1; + pt->iconv_descriptors = NULL; + } + else + { + /* Open descriptors before mutating the port. */ + char *gc_encoding = canonicalize_encoding (encoding); + pt->iconv_descriptors = + open_iconv_descriptors (gc_encoding, + SCM_INPUT_PORT_P (port), + SCM_OUTPUT_PORT_P (port)); + pt->encoding = gc_encoding; + pt->encoding_mode = SCM_PORT_ENCODING_MODE_ICONV; + } + + if (prev) + close_iconv_descriptors (prev); +} + +SCM_DEFINE (scm_port_encoding, "port-encoding", 1, 0, 0, + (SCM port), + "Returns, as a string, the character encoding that @var{port}\n" + "uses to interpret its input and output.\n") +#define FUNC_NAME s_scm_port_encoding +{ + SCM_VALIDATE_PORT (1, port); + + return scm_from_latin1_string (SCM_PTAB_ENTRY (port)->encoding); +} +#undef FUNC_NAME + +SCM_DEFINE (scm_set_port_encoding_x, "set-port-encoding!", 2, 0, 0, + (SCM port, SCM enc), + "Sets the character encoding that will be used to interpret all\n" + "port I/O. New ports are created with the encoding\n" + "appropriate for the current locale if @code{setlocale} has \n" + "been called or ISO-8859-1 otherwise\n" + "and this procedure can be used to modify that encoding.\n") +#define FUNC_NAME s_scm_set_port_encoding_x +{ + char *enc_str; + + SCM_VALIDATE_PORT (1, port); + SCM_VALIDATE_STRING (2, enc); + + enc_str = scm_to_latin1_string (enc); + scm_i_set_port_encoding_x (port, enc_str); + free (enc_str); + + return SCM_UNSPECIFIED; +} +#undef FUNC_NAME + +SCM_DEFINE (scm_port_conversion_strategy, "port-conversion-strategy", + 1, 0, 0, (SCM port), + "Returns the behavior of the port when handling a character that\n" + "is not representable in the port's current encoding.\n" + "It returns the symbol @code{error} if unrepresentable characters\n" + "should cause exceptions, @code{substitute} if the port should\n" + "try to replace unrepresentable characters with question marks or\n" + "approximate characters, or @code{escape} if unrepresentable\n" + "characters should be converted to string escapes.\n" + "\n" + "If @var{port} is @code{#f}, then the current default behavior\n" + "will be returned. New ports will have this default behavior\n" + "when they are created.\n") +#define FUNC_NAME s_scm_port_conversion_strategy +{ + scm_t_string_failed_conversion_handler h; + + SCM_VALIDATE_OPPORT (1, port); + + if (scm_is_false (port)) + h = scm_i_default_port_conversion_handler (); + else + { + scm_t_port *pt; + + SCM_VALIDATE_OPPORT (1, port); + pt = SCM_PTAB_ENTRY (port); + + h = pt->ilseq_handler; + } + + if (h == SCM_FAILED_CONVERSION_ERROR) + return scm_from_latin1_symbol ("error"); + else if (h == SCM_FAILED_CONVERSION_QUESTION_MARK) + return scm_from_latin1_symbol ("substitute"); + else if (h == SCM_FAILED_CONVERSION_ESCAPE_SEQUENCE) + return scm_from_latin1_symbol ("escape"); + else + abort (); + + /* Never gets here. */ + return SCM_UNDEFINED; +} +#undef FUNC_NAME + +SCM_DEFINE (scm_set_port_conversion_strategy_x, "set-port-conversion-strategy!", + 2, 0, 0, + (SCM port, SCM sym), + "Sets the behavior of the interpreter when outputting a character\n" + "that is not representable in the port's current encoding.\n" + "@var{sym} can be either @code{'error}, @code{'substitute}, or\n" + "@code{'escape}. If it is @code{'error}, an error will be thrown\n" + "when an unconvertible character is encountered. If it is\n" + "@code{'substitute}, then unconvertible characters will \n" + "be replaced with approximate characters, or with question marks\n" + "if no approximately correct character is available.\n" + "If it is @code{'escape},\n" + "it will appear as a hex escape when output.\n" + "\n" + "If @var{port} is an open port, the conversion error behavior\n" + "is set for that port. If it is @code{#f}, it is set as the\n" + "default behavior for any future ports that get created in\n" + "this thread.\n") +#define FUNC_NAME s_scm_set_port_conversion_strategy_x +{ + scm_t_string_failed_conversion_handler handler; + + if (scm_is_eq (sym, sym_error)) + handler = SCM_FAILED_CONVERSION_ERROR; + else if (scm_is_eq (sym, sym_substitute)) + handler = SCM_FAILED_CONVERSION_QUESTION_MARK; + else if (scm_is_eq (sym, sym_escape)) + handler = SCM_FAILED_CONVERSION_ESCAPE_SEQUENCE; + else + SCM_MISC_ERROR ("unknown conversion strategy ~s", scm_list_1 (sym)); + + if (scm_is_false (port)) + scm_i_set_default_port_conversion_handler (handler); + else + { + SCM_VALIDATE_OPPORT (1, port); + SCM_PTAB_ENTRY (port)->ilseq_handler = handler; + } + + return SCM_UNSPECIFIED; +} +#undef FUNC_NAME + + + + +/* The port lock. */ + +static void +lock_port (void *mutex) +{ + scm_i_pthread_mutex_lock (mutex); +} + +static void +unlock_port (void *mutex) +{ + scm_i_pthread_mutex_unlock (mutex); +} + +void +scm_dynwind_lock_port (SCM port) +#define FUNC_NAME "dynwind-lock-port" +{ + scm_i_pthread_mutex_t *lock; + SCM_VALIDATE_OPPORT (SCM_ARG1, port); + scm_c_lock_port (port, &lock); + if (lock) + { + scm_dynwind_unwind_handler (unlock_port, lock, SCM_F_WIND_EXPLICITLY); + scm_dynwind_rewind_handler (lock_port, lock, 0); + } +} +#undef FUNC_NAME + + + + +/* Input. */ + +int +scm_get_byte_or_eof (SCM port) +{ + scm_i_pthread_mutex_t *lock; + int ret; + + scm_c_lock_port (port, &lock); + ret = scm_get_byte_or_eof_unlocked (port); + if (lock) + scm_i_pthread_mutex_unlock (lock); + + return ret; +} + +int +scm_peek_byte_or_eof (SCM port) +{ + scm_i_pthread_mutex_t *lock; + int ret; + + scm_c_lock_port (port, &lock); + ret = scm_peek_byte_or_eof_unlocked (port); + if (lock) + scm_i_pthread_mutex_unlock (lock); + + return ret; +} + +/* scm_c_read + * + * Used by an application to read arbitrary number of bytes from an + * SCM port. Same semantics as libc read, except that scm_c_read only + * returns less than SIZE bytes if at end-of-file. + * + * Warning: Doesn't update port line and column counts! */ + +/* This structure, and the following swap_buffer function, are used + for temporarily swapping a port's own read buffer, and the buffer + that the caller of scm_c_read provides. */ +struct port_and_swap_buffer +{ + scm_t_port *pt; + unsigned char *buffer; + size_t size; +}; + +static void +swap_buffer (void *data) +{ + struct port_and_swap_buffer *psb = (struct port_and_swap_buffer *) data; + unsigned char *old_buf = psb->pt->read_buf; + size_t old_size = psb->pt->read_buf_size; + + /* Make the port use (buffer, size) from the struct. */ + psb->pt->read_pos = psb->pt->read_buf = psb->pt->read_end = psb->buffer; + psb->pt->read_buf_size = psb->size; + + /* Save the port's old (buffer, size) in the struct. */ + psb->buffer = old_buf; + psb->size = old_size; +} + +size_t +scm_c_read_unlocked (SCM port, void *buffer, size_t size) +#define FUNC_NAME "scm_c_read" +{ + scm_t_port *pt; + size_t n_read = 0, n_available; + struct port_and_swap_buffer psb; + + SCM_VALIDATE_OPINPORT (1, port); + + pt = SCM_PTAB_ENTRY (port); + if (pt->rw_active == SCM_PORT_WRITE) + SCM_PORT_DESCRIPTOR (port)->flush (port); + + if (pt->rw_random) + pt->rw_active = SCM_PORT_READ; + + /* Take bytes first from the port's read buffer. */ + if (pt->read_pos < pt->read_end) + { + n_available = min (size, pt->read_end - pt->read_pos); + memcpy (buffer, pt->read_pos, n_available); + buffer = (char *) buffer + n_available; + pt->read_pos += n_available; + n_read += n_available; + size -= n_available; + } + + /* Avoid the scm_dynwind_* costs if we now have enough data. */ + if (size == 0) + return n_read; + + /* Now we will call scm_fill_input repeatedly until we have read the + requested number of bytes. (Note that a single scm_fill_input + call does not guarantee to fill the whole of the port's read + buffer.) */ + if (pt->read_buf_size <= 1 + && pt->encoding_mode == SCM_PORT_ENCODING_MODE_LATIN1) + { + /* The port that we are reading from is unbuffered - i.e. does + not have its own persistent buffer - but we have a buffer, + provided by our caller, that is the right size for the data + that is wanted. For the following scm_fill_input calls, + therefore, we use the buffer in hand as the port's read + buffer. + + We need to make sure that the port's normal (1 byte) buffer + is reinstated in case one of the scm_fill_input () calls + throws an exception; we use the scm_dynwind_* API to achieve + that. + + A consequence of this optimization is that the fill_input + functions can't unget characters. That'll push data to the + pushback buffer instead of this psb buffer. */ +#if SCM_DEBUG == 1 + unsigned char *pback = pt->putback_buf; +#endif + psb.pt = pt; + psb.buffer = buffer; + psb.size = size; + scm_dynwind_begin (SCM_F_DYNWIND_REWINDABLE); + scm_dynwind_rewind_handler (swap_buffer, &psb, SCM_F_WIND_EXPLICITLY); + scm_dynwind_unwind_handler (swap_buffer, &psb, SCM_F_WIND_EXPLICITLY); + + /* Call scm_fill_input until we have all the bytes that we need, + or we hit EOF. */ + while (pt->read_buf_size && (scm_fill_input_unlocked (port) != EOF)) + { + pt->read_buf_size -= (pt->read_end - pt->read_pos); + pt->read_pos = pt->read_buf = pt->read_end; + } +#if SCM_DEBUG == 1 + if (pback != pt->putback_buf + || pt->read_buf - (unsigned char *) buffer < 0) + scm_misc_error (FUNC_NAME, + "scm_c_read must not call a fill function that pushes " + "back characters onto an unbuffered port", SCM_EOL); +#endif + n_read += pt->read_buf - (unsigned char *) buffer; + + /* Reinstate the port's normal buffer. */ + scm_dynwind_end (); + } + else + { + /* The port has its own buffer. It is important that we use it, + even if it happens to be smaller than our caller's buffer, so + that a custom port implementation's entry points (in + particular, fill_input) can rely on the buffer always being + the same as they first set up. */ + while (size && (scm_fill_input_unlocked (port) != EOF)) + { + n_available = min (size, pt->read_end - pt->read_pos); + memcpy (buffer, pt->read_pos, n_available); + buffer = (char *) buffer + n_available; + pt->read_pos += n_available; + n_read += n_available; + size -= n_available; + } + } + + return n_read; +} +#undef FUNC_NAME + +size_t +scm_c_read (SCM port, void *buffer, size_t size) +{ + scm_i_pthread_mutex_t *lock; + size_t ret; + + scm_c_lock_port (port, &lock); + ret = scm_c_read_unlocked (port, buffer, size); + if (lock) + scm_i_pthread_mutex_unlock (lock); + + + return ret; +} + +/* Update the line and column number of PORT after consumption of C. */ +static inline void +update_port_lf (scm_t_wchar c, SCM port) +{ + switch (c) + { + case '\a': + case EOF: + break; + case '\b': + SCM_DECCOL (port); + break; + case '\n': + SCM_INCLINE (port); + break; + case '\r': + SCM_ZEROCOL (port); + break; + case '\t': + SCM_TABCOL (port); + break; + default: + SCM_INCCOL (port); + break; + } +} + +#define SCM_MBCHAR_BUF_SIZE (4) + +/* Convert the SIZE-byte UTF-8 sequence in UTF8_BUF to a codepoint. + UTF8_BUF is assumed to contain a valid UTF-8 sequence. */ +static scm_t_wchar +utf8_to_codepoint (const scm_t_uint8 *utf8_buf, size_t size) +{ + scm_t_wchar codepoint; + + if (utf8_buf[0] <= 0x7f) + { + assert (size == 1); + codepoint = utf8_buf[0]; + } + else if ((utf8_buf[0] & 0xe0) == 0xc0) + { + assert (size == 2); + codepoint = ((scm_t_wchar) utf8_buf[0] & 0x1f) << 6UL + | (utf8_buf[1] & 0x3f); + } + else if ((utf8_buf[0] & 0xf0) == 0xe0) + { + assert (size == 3); + codepoint = ((scm_t_wchar) utf8_buf[0] & 0x0f) << 12UL + | ((scm_t_wchar) utf8_buf[1] & 0x3f) << 6UL + | (utf8_buf[2] & 0x3f); + } + else + { + assert (size == 4); + codepoint = ((scm_t_wchar) utf8_buf[0] & 0x07) << 18UL + | ((scm_t_wchar) utf8_buf[1] & 0x3f) << 12UL + | ((scm_t_wchar) utf8_buf[2] & 0x3f) << 6UL + | (utf8_buf[3] & 0x3f); + } + + return codepoint; +} + +/* Read a UTF-8 sequence from PORT. On success, return 0 and set + *CODEPOINT to the codepoint that was read, fill BUF with its UTF-8 + representation, and set *LEN to the length in bytes. Return + `EILSEQ' on error. */ +static int +get_utf8_codepoint (SCM port, scm_t_wchar *codepoint, + scm_t_uint8 buf[SCM_MBCHAR_BUF_SIZE], size_t *len) +{ +#define ASSERT_NOT_EOF(b) \ + if (SCM_UNLIKELY ((b) == EOF)) \ + goto invalid_seq +#define CONSUME_PEEKED_BYTE() \ + pt->read_pos++ + + int byte; + scm_t_port *pt; + + *len = 0; + pt = SCM_PTAB_ENTRY (port); + + byte = scm_get_byte_or_eof_unlocked (port); + if (byte == EOF) + { + *codepoint = EOF; + return 0; + } + + buf[0] = (scm_t_uint8) byte; + *len = 1; + + if (buf[0] <= 0x7f) + /* 1-byte form. */ + *codepoint = buf[0]; + else if (buf[0] >= 0xc2 && buf[0] <= 0xdf) + { + /* 2-byte form. */ + byte = scm_peek_byte_or_eof_unlocked (port); + ASSERT_NOT_EOF (byte); if (SCM_UNLIKELY ((byte & 0xc0) != 0x80)) goto invalid_seq; @@ -1202,7 +1576,7 @@ get_utf8_codepoint (SCM port, scm_t_wchar *codepoint, else if ((buf[0] & 0xf0) == 0xe0) { /* 3-byte form. */ - byte = scm_peek_byte_or_eof (port); + byte = scm_peek_byte_or_eof_unlocked (port); ASSERT_NOT_EOF (byte); if (SCM_UNLIKELY ((byte & 0xc0) != 0x80 @@ -1214,7 +1588,7 @@ get_utf8_codepoint (SCM port, scm_t_wchar *codepoint, buf[1] = (scm_t_uint8) byte; *len = 2; - byte = scm_peek_byte_or_eof (port); + byte = scm_peek_byte_or_eof_unlocked (port); ASSERT_NOT_EOF (byte); if (SCM_UNLIKELY ((byte & 0xc0) != 0x80)) @@ -1231,7 +1605,7 @@ get_utf8_codepoint (SCM port, scm_t_wchar *codepoint, else if (buf[0] >= 0xf0 && buf[0] <= 0xf4) { /* 4-byte form. */ - byte = scm_peek_byte_or_eof (port); + byte = scm_peek_byte_or_eof_unlocked (port); ASSERT_NOT_EOF (byte); if (SCM_UNLIKELY (((byte & 0xc0) != 0x80) @@ -1243,7 +1617,7 @@ get_utf8_codepoint (SCM port, scm_t_wchar *codepoint, buf[1] = (scm_t_uint8) byte; *len = 2; - byte = scm_peek_byte_or_eof (port); + byte = scm_peek_byte_or_eof_unlocked (port); ASSERT_NOT_EOF (byte); if (SCM_UNLIKELY ((byte & 0xc0) != 0x80)) @@ -1253,7 +1627,7 @@ get_utf8_codepoint (SCM port, scm_t_wchar *codepoint, buf[2] = (scm_t_uint8) byte; *len = 3; - byte = scm_peek_byte_or_eof (port); + byte = scm_peek_byte_or_eof_unlocked (port); ASSERT_NOT_EOF (byte); if (SCM_UNLIKELY ((byte & 0xc0) != 0x80)) @@ -1285,19 +1659,39 @@ get_utf8_codepoint (SCM port, scm_t_wchar *codepoint, #undef ASSERT_NOT_EOF } +/* Read an ISO-8859-1 codepoint (a byte) from PORT. On success, return + 0 and set *CODEPOINT to the codepoint that was read, fill BUF with + its UTF-8 representation, and set *LEN to the length in bytes. + Return `EILSEQ' on error. */ +static int +get_latin1_codepoint (SCM port, scm_t_wchar *codepoint, + char buf[SCM_MBCHAR_BUF_SIZE], size_t *len) +{ + *codepoint = scm_get_byte_or_eof_unlocked (port); + + if (*codepoint == EOF) + *len = 0; + else + { + *len = 1; + buf[0] = *codepoint; + } + return 0; +} + /* Likewise, read a byte sequence from PORT, passing it through its input conversion descriptor. */ static int get_iconv_codepoint (SCM port, scm_t_wchar *codepoint, char buf[SCM_MBCHAR_BUF_SIZE], size_t *len) { - scm_t_port *pt; + scm_t_iconv_descriptors *id; int err, byte_read; size_t bytes_consumed, output_size; char *output; scm_t_uint8 utf8_buf[SCM_MBCHAR_BUF_SIZE]; - pt = SCM_PTAB_ENTRY (port); + id = scm_i_port_iconv_descriptors (port); for (output_size = 0, output = (char *) utf8_buf, bytes_consumed = 0, err = 0; @@ -1308,7 +1702,7 @@ get_iconv_codepoint (SCM port, scm_t_wchar *codepoint, char *input; size_t input_left, output_left, done; - byte_read = scm_get_byte_or_eof (port); + byte_read = scm_get_byte_or_eof_unlocked (port); if (byte_read == EOF) { if (bytes_consumed == 0) @@ -1327,8 +1721,7 @@ get_iconv_codepoint (SCM port, scm_t_wchar *codepoint, input_left = bytes_consumed + 1; output_left = sizeof (utf8_buf); - done = iconv (pt->input_cd, &input, &input_left, - &output, &output_left); + done = iconv (id->input_cd, &input, &input_left, &output, &output_left); if (done == (size_t) -1) { err = errno; @@ -1357,323 +1750,96 @@ get_iconv_codepoint (SCM port, scm_t_wchar *codepoint, with the byte representation of the codepoint in PORT's encoding, and set *LEN to the length in bytes of that representation. Return 0 on success and an errno value on error. */ -static int +static SCM_C_INLINE int get_codepoint (SCM port, scm_t_wchar *codepoint, char buf[SCM_MBCHAR_BUF_SIZE], size_t *len) { int err; scm_t_port *pt = SCM_PTAB_ENTRY (port); - if (pt->input_cd == (iconv_t) -1) - /* Initialize the conversion descriptors, if needed. */ - scm_i_set_port_encoding_x (port, pt->encoding); - - /* FIXME: In 2.1, add a flag to determine whether a port is UTF-8. */ - if (pt->input_cd == (iconv_t) -1) + if (pt->encoding_mode == SCM_PORT_ENCODING_MODE_UTF8) err = get_utf8_codepoint (port, codepoint, (scm_t_uint8 *) buf, len); + else if (pt->encoding_mode == SCM_PORT_ENCODING_MODE_LATIN1) + err = get_latin1_codepoint (port, codepoint, buf, len); else err = get_iconv_codepoint (port, codepoint, buf, len); if (SCM_LIKELY (err == 0)) update_port_lf (*codepoint, port); else if (pt->ilseq_handler == SCM_ICONVEH_QUESTION_MARK) - { - *codepoint = '?'; - err = 0; - update_port_lf (*codepoint, port); - } - - return err; -} - -/* Read a codepoint from PORT and return it. */ -scm_t_wchar -scm_getc (SCM port) -#define FUNC_NAME "scm_getc" -{ - int err; - size_t len; - scm_t_wchar codepoint; - char buf[SCM_MBCHAR_BUF_SIZE]; - - err = get_codepoint (port, &codepoint, buf, &len); - if (SCM_UNLIKELY (err != 0)) - /* At this point PORT should point past the invalid encoding, as per - R6RS-lib Section 8.2.4. */ - scm_decoding_error (FUNC_NAME, err, "input decoding error", port); - - return codepoint; -} -#undef FUNC_NAME - -/* this should only be called when the read buffer is empty. it - tries to refill the read buffer. it returns the first char from - the port, which is either EOF or *(pt->read_pos). */ -int -scm_fill_input (SCM port) -{ - scm_t_port *pt = SCM_PTAB_ENTRY (port); - - assert (pt->read_pos == pt->read_end); - - if (pt->read_buf == pt->putback_buf) - { - /* finished reading put-back chars. */ - pt->read_buf = pt->saved_read_buf; - pt->read_pos = pt->saved_read_pos; - pt->read_end = pt->saved_read_end; - pt->read_buf_size = pt->saved_read_buf_size; - if (pt->read_pos < pt->read_end) - return *(pt->read_pos); - } - return scm_ptobs[SCM_PTOBNUM (port)].fill_input (port); -} - - -/* scm_lfwrite - * - * This function differs from scm_c_write; it updates port line and - * column. */ - -void -scm_lfwrite (const char *ptr, size_t size, SCM port) -{ - scm_t_port *pt = SCM_PTAB_ENTRY (port); - scm_t_ptob_descriptor *ptob = &scm_ptobs[SCM_PTOBNUM (port)]; - - if (pt->rw_active == SCM_PORT_READ) - scm_end_input (port); - - ptob->write (port, ptr, size); - - for (; size; ptr++, size--) - update_port_lf ((scm_t_wchar) (unsigned char) *ptr, port); - - if (pt->rw_random) - pt->rw_active = SCM_PORT_WRITE; -} - -/* Write STR to PORT from START inclusive to END exclusive. */ -void -scm_lfwrite_substr (SCM str, size_t start, size_t end, SCM port) -{ - scm_t_port *pt = SCM_PTAB_ENTRY (port); - - if (pt->rw_active == SCM_PORT_READ) - scm_end_input (port); - - if (end == (size_t) -1) - end = scm_i_string_length (str); - - scm_display (scm_c_substring (str, start, end), port); - - if (pt->rw_random) - pt->rw_active = SCM_PORT_WRITE; -} - -/* scm_c_read - * - * Used by an application to read arbitrary number of bytes from an - * SCM port. Same semantics as libc read, except that scm_c_read only - * returns less than SIZE bytes if at end-of-file. - * - * Warning: Doesn't update port line and column counts! */ - -/* This structure, and the following swap_buffer function, are used - for temporarily swapping a port's own read buffer, and the buffer - that the caller of scm_c_read provides. */ -struct port_and_swap_buffer -{ - scm_t_port *pt; - unsigned char *buffer; - size_t size; -}; - -static void -swap_buffer (void *data) -{ - struct port_and_swap_buffer *psb = (struct port_and_swap_buffer *) data; - unsigned char *old_buf = psb->pt->read_buf; - size_t old_size = psb->pt->read_buf_size; - - /* Make the port use (buffer, size) from the struct. */ - psb->pt->read_pos = psb->pt->read_buf = psb->pt->read_end = psb->buffer; - psb->pt->read_buf_size = psb->size; - - /* Save the port's old (buffer, size) in the struct. */ - psb->buffer = old_buf; - psb->size = old_size; -} - -size_t -scm_c_read (SCM port, void *buffer, size_t size) -#define FUNC_NAME "scm_c_read" -{ - scm_t_port *pt; - size_t n_read = 0, n_available; - struct port_and_swap_buffer psb; - - SCM_VALIDATE_OPINPORT (1, port); - - pt = SCM_PTAB_ENTRY (port); - if (pt->rw_active == SCM_PORT_WRITE) - scm_ptobs[SCM_PTOBNUM (port)].flush (port); - - if (pt->rw_random) - pt->rw_active = SCM_PORT_READ; - - /* Take bytes first from the port's read buffer. */ - if (pt->read_pos < pt->read_end) - { - n_available = min (size, pt->read_end - pt->read_pos); - memcpy (buffer, pt->read_pos, n_available); - buffer = (char *) buffer + n_available; - pt->read_pos += n_available; - n_read += n_available; - size -= n_available; - } - - /* Avoid the scm_dynwind_* costs if we now have enough data. */ - if (size == 0) - return n_read; - - /* Now we will call scm_fill_input repeatedly until we have read the - requested number of bytes. (Note that a single scm_fill_input - call does not guarantee to fill the whole of the port's read - buffer.) */ - if (pt->read_buf_size <= 1 && pt->encoding == NULL) - { - /* The port that we are reading from is unbuffered - i.e. does - not have its own persistent buffer - but we have a buffer, - provided by our caller, that is the right size for the data - that is wanted. For the following scm_fill_input calls, - therefore, we use the buffer in hand as the port's read - buffer. - - We need to make sure that the port's normal (1 byte) buffer - is reinstated in case one of the scm_fill_input () calls - throws an exception; we use the scm_dynwind_* API to achieve - that. - - A consequence of this optimization is that the fill_input - functions can't unget characters. That'll push data to the - pushback buffer instead of this psb buffer. */ -#if SCM_DEBUG == 1 - unsigned char *pback = pt->putback_buf; -#endif - psb.pt = pt; - psb.buffer = buffer; - psb.size = size; - scm_dynwind_begin (SCM_F_DYNWIND_REWINDABLE); - scm_dynwind_rewind_handler (swap_buffer, &psb, SCM_F_WIND_EXPLICITLY); - scm_dynwind_unwind_handler (swap_buffer, &psb, SCM_F_WIND_EXPLICITLY); - - /* Call scm_fill_input until we have all the bytes that we need, - or we hit EOF. */ - while (pt->read_buf_size && (scm_fill_input (port) != EOF)) - { - pt->read_buf_size -= (pt->read_end - pt->read_pos); - pt->read_pos = pt->read_buf = pt->read_end; - } -#if SCM_DEBUG == 1 - if (pback != pt->putback_buf - || pt->read_buf - (unsigned char *) buffer < 0) - scm_misc_error (FUNC_NAME, - "scm_c_read must not call a fill function that pushes " - "back characters onto an unbuffered port", SCM_EOL); -#endif - n_read += pt->read_buf - (unsigned char *) buffer; - - /* Reinstate the port's normal buffer. */ - scm_dynwind_end (); - } - else - { - /* The port has its own buffer. It is important that we use it, - even if it happens to be smaller than our caller's buffer, so - that a custom port implementation's entry points (in - particular, fill_input) can rely on the buffer always being - the same as they first set up. */ - while (size && (scm_fill_input (port) != EOF)) - { - n_available = min (size, pt->read_end - pt->read_pos); - memcpy (buffer, pt->read_pos, n_available); - buffer = (char *) buffer + n_available; - pt->read_pos += n_available; - n_read += n_available; - size -= n_available; - } + { + *codepoint = '?'; + err = 0; + update_port_lf (*codepoint, port); } - return n_read; + return err; } -#undef FUNC_NAME - -/* scm_c_write - * - * Used by an application to write arbitrary number of bytes to an SCM - * port. Similar semantics as libc write. However, unlike libc - * write, scm_c_write writes the requested number of bytes and has no - * return value. - * - * Warning: Doesn't update port line and column counts! - */ -void -scm_c_write (SCM port, const void *ptr, size_t size) -#define FUNC_NAME "scm_c_write" +/* Read a codepoint from PORT and return it. */ +scm_t_wchar +scm_getc_unlocked (SCM port) +#define FUNC_NAME "scm_getc" { - scm_t_port *pt; - scm_t_ptob_descriptor *ptob; - - SCM_VALIDATE_OPOUTPORT (1, port); - - pt = SCM_PTAB_ENTRY (port); - ptob = &scm_ptobs[SCM_PTOBNUM (port)]; - - if (pt->rw_active == SCM_PORT_READ) - scm_end_input (port); + int err; + size_t len; + scm_t_wchar codepoint; + char buf[SCM_MBCHAR_BUF_SIZE]; - ptob->write (port, ptr, size); + err = get_codepoint (port, &codepoint, buf, &len); + if (SCM_UNLIKELY (err != 0)) + /* At this point PORT should point past the invalid encoding, as per + R6RS-lib Section 8.2.4. */ + scm_decoding_error (FUNC_NAME, err, "input decoding error", port); - if (pt->rw_random) - pt->rw_active = SCM_PORT_WRITE; + return codepoint; } #undef FUNC_NAME -void -scm_flush (SCM port) +scm_t_wchar +scm_getc (SCM port) { - long i = SCM_PTOBNUM (port); - assert (i >= 0); - (scm_ptobs[i].flush) (port); -} + scm_i_pthread_mutex_t *lock; + scm_t_wchar ret; -void -scm_end_input (SCM port) -{ - long offset; - scm_t_port *pt = SCM_PTAB_ENTRY (port); + scm_c_lock_port (port, &lock); + ret = scm_getc_unlocked (port); + if (lock) + scm_i_pthread_mutex_unlock (lock); + - if (pt->read_buf == pt->putback_buf) - { - offset = pt->read_end - pt->read_pos; - pt->read_buf = pt->saved_read_buf; - pt->read_pos = pt->saved_read_pos; - pt->read_end = pt->saved_read_end; - pt->read_buf_size = pt->saved_read_buf_size; - } - else - offset = 0; + return ret; +} - scm_ptobs[SCM_PTOBNUM (port)].end_input (port, offset); +SCM_DEFINE (scm_read_char, "read-char", 0, 1, 0, + (SCM port), + "Return the next character available from @var{port}, updating\n" + "@var{port} to point to the following character. If no more\n" + "characters are available, the end-of-file object is returned.\n" + "\n" + "When @var{port}'s data cannot be decoded according to its\n" + "character encoding, a @code{decoding-error} is raised and\n" + "@var{port} points past the erroneous byte sequence.\n") +#define FUNC_NAME s_scm_read_char +{ + scm_t_wchar c; + if (SCM_UNBNDP (port)) + port = scm_current_input_port (); + SCM_VALIDATE_OPINPORT (1, port); + c = scm_getc_unlocked (port); + if (EOF == c) + return SCM_EOF_VAL; + return SCM_MAKE_CHAR (c); } +#undef FUNC_NAME + +/* Pushback. */ void -scm_unget_byte (int c, SCM port) +scm_unget_byte_unlocked (int c, SCM port) #define FUNC_NAME "scm_unget_byte" { scm_t_port *pt = SCM_PTAB_ENTRY (port); @@ -1736,24 +1902,29 @@ scm_unget_byte (int c, SCM port) } #undef FUNC_NAME +void +scm_unget_byte (int c, SCM port) +{ + scm_i_pthread_mutex_t *lock; + scm_c_lock_port (port, &lock); + scm_unget_byte_unlocked (c, port); + if (lock) + scm_i_pthread_mutex_unlock (lock); + +} + void -scm_ungetc (scm_t_wchar c, SCM port) +scm_ungetc_unlocked (scm_t_wchar c, SCM port) #define FUNC_NAME "scm_ungetc" { scm_t_port *pt = SCM_PTAB_ENTRY (port); char *result; char result_buf[10]; - const char *encoding; size_t len; int i; - if (pt->encoding != NULL) - encoding = pt->encoding; - else - encoding = "ISO-8859-1"; - len = sizeof (result_buf); - result = u32_conv_to_encoding (encoding, + result = u32_conv_to_encoding (pt->encoding, (enum iconv_ilseq_handler) pt->ilseq_handler, (uint32_t *) &c, 1, NULL, result_buf, &len); @@ -1764,7 +1935,7 @@ scm_ungetc (scm_t_wchar c, SCM port) SCM_BOOL_F, SCM_MAKE_CHAR (c)); for (i = len - 1; i >= 0; i--) - scm_unget_byte (result[i], port); + scm_unget_byte_unlocked (result[i], port); if (SCM_UNLIKELY (result != result_buf)) free (result); @@ -1781,9 +1952,19 @@ scm_ungetc (scm_t_wchar c, SCM port) } #undef FUNC_NAME +void +scm_ungetc (scm_t_wchar c, SCM port) +{ + scm_i_pthread_mutex_t *lock; + scm_c_lock_port (port, &lock); + scm_ungetc_unlocked (c, port); + if (lock) + scm_i_pthread_mutex_unlock (lock); + +} void -scm_ungets (const char *s, int n, SCM port) +scm_ungets_unlocked (const char *s, int n, SCM port) { /* This is simple minded and inefficient, but unreading strings is * probably not a common operation, and remember that line and @@ -1792,9 +1973,19 @@ scm_ungets (const char *s, int n, SCM port) * Please feel free to write an optimized version! */ while (n--) - scm_ungetc (s[n], port); + scm_ungetc_unlocked (s[n], port); } +void +scm_ungets (const char *s, int n, SCM port) +{ + scm_i_pthread_mutex_t *lock; + scm_c_lock_port (port, &lock); + scm_ungets_unlocked (s, n, port); + if (lock) + scm_i_pthread_mutex_unlock (lock); + +} SCM_DEFINE (scm_peek_char, "peek-char", 0, 1, 0, (SCM port), @@ -1836,7 +2027,7 @@ SCM_DEFINE (scm_peek_char, "peek-char", 0, 1, 0, err = get_codepoint (port, &c, bytes, &len); for (i = len - 1; i >= 0; i--) - scm_unget_byte (bytes[i], port); + scm_unget_byte_unlocked (bytes[i], port); SCM_COL (port) = column; SCM_LINUM (port) = line; @@ -1875,7 +2066,7 @@ SCM_DEFINE (scm_unread_char, "unread-char", 1, 1, 0, c = SCM_CHAR (cobj); - scm_ungetc (c, port); + scm_ungetc_unlocked (c, port); return cobj; } #undef FUNC_NAME @@ -1897,587 +2088,642 @@ SCM_DEFINE (scm_unread_string, "unread-string", 2, 0, 0, n = scm_i_string_length (str); while (n--) - scm_ungetc (scm_i_string_ref (str, n), port); + scm_ungetc_unlocked (scm_i_string_ref (str, n), port); return str; } #undef FUNC_NAME -SCM_DEFINE (scm_seek, "seek", 3, 0, 0, - (SCM fd_port, SCM offset, SCM whence), - "Sets the current position of @var{fd_port} to the integer\n" - "@var{offset}, which is interpreted according to the value of\n" - "@var{whence}.\n" - "\n" - "One of the following variables should be supplied for\n" - "@var{whence}:\n" - "@defvar SEEK_SET\n" - "Seek from the beginning of the file.\n" - "@end defvar\n" - "@defvar SEEK_CUR\n" - "Seek from the current position.\n" - "@end defvar\n" - "@defvar SEEK_END\n" - "Seek from the end of the file.\n" - "@end defvar\n" - "If @var{fd_port} is a file descriptor, the underlying system\n" - "call is @code{lseek}. @var{port} may be a string port.\n" - "\n" - "The value returned is the new position in the file. This means\n" - "that the current position of a port can be obtained using:\n" - "@lisp\n" - "(seek port 0 SEEK_CUR)\n" - "@end lisp") -#define FUNC_NAME s_scm_seek + + + +/* Manipulating the buffers. */ + +/* This routine does not take any locks, as it is usually called as part + of a port implementation. */ +void +scm_port_non_buffer (scm_t_port *pt) { - int how; + pt->read_pos = pt->read_buf = pt->read_end = &pt->shortbuf; + pt->write_buf = pt->write_pos = &pt->shortbuf; + pt->read_buf_size = pt->write_buf_size = 1; + pt->write_end = pt->write_buf + pt->write_buf_size; +} - fd_port = SCM_COERCE_OUTPORT (fd_port); +/* this should only be called when the read buffer is empty. it + tries to refill the read buffer. it returns the first char from + the port, which is either EOF or *(pt->read_pos). */ +int +scm_fill_input_unlocked (SCM port) +{ + scm_t_port *pt = SCM_PTAB_ENTRY (port); - how = scm_to_int (whence); - if (how != SEEK_SET && how != SEEK_CUR && how != SEEK_END) - SCM_OUT_OF_RANGE (3, whence); + assert (pt->read_pos == pt->read_end); - if (SCM_OPPORTP (fd_port)) + if (pt->read_buf == pt->putback_buf) { - scm_t_ptob_descriptor *ptob = scm_ptobs + SCM_PTOBNUM (fd_port); - off_t_or_off64_t off = scm_to_off_t_or_off64_t (offset); - off_t_or_off64_t rv; + /* finished reading put-back chars. */ + pt->read_buf = pt->saved_read_buf; + pt->read_pos = pt->saved_read_pos; + pt->read_end = pt->saved_read_end; + pt->read_buf_size = pt->saved_read_buf_size; + if (pt->read_pos < pt->read_end) + return *(pt->read_pos); + } + return SCM_PORT_DESCRIPTOR (port)->fill_input (port); +} - if (!ptob->seek) - SCM_MISC_ERROR ("port is not seekable", - scm_cons (fd_port, SCM_EOL)); - else - rv = ptob->seek (fd_port, off, how); - return scm_from_off_t_or_off64_t (rv); +int +scm_fill_input (SCM port) +{ + scm_i_pthread_mutex_t *lock; + int ret; + + scm_c_lock_port (port, &lock); + ret = scm_fill_input_unlocked (port); + if (lock) + scm_i_pthread_mutex_unlock (lock); + + + return ret; +} + +/* Move up to READ_LEN bytes from PORT's putback and/or read buffers + into memory starting at DEST. Return the number of bytes moved. + PORT's line/column numbers are left unchanged. */ +size_t +scm_take_from_input_buffers (SCM port, char *dest, size_t read_len) +{ + scm_t_port *pt = SCM_PTAB_ENTRY (port); + size_t bytes_read = 0; + size_t from_buf = min (pt->read_end - pt->read_pos, read_len); + + if (from_buf > 0) + { + memcpy (dest, pt->read_pos, from_buf); + pt->read_pos += from_buf; + bytes_read += from_buf; + read_len -= from_buf; + dest += from_buf; } - else /* file descriptor?. */ + + /* if putback was active, try the real input buffer too. */ + if (pt->read_buf == pt->putback_buf) { - off_t_or_off64_t off = scm_to_off_t_or_off64_t (offset); - off_t_or_off64_t rv; - rv = lseek_or_lseek64 (scm_to_int (fd_port), off, how); - if (rv == -1) - SCM_SYSERROR; - return scm_from_off_t_or_off64_t (rv); + from_buf = min (pt->saved_read_end - pt->saved_read_pos, read_len); + if (from_buf > 0) + { + memcpy (dest, pt->saved_read_pos, from_buf); + pt->saved_read_pos += from_buf; + bytes_read += from_buf; + } + } + + return bytes_read; +} + +/* Clear a port's read buffers, returning the contents. */ +SCM_DEFINE (scm_drain_input, "drain-input", 1, 0, 0, + (SCM port), + "This procedure clears a port's input buffers, similar\n" + "to the way that force-output clears the output buffer. The\n" + "contents of the buffers are returned as a single string, e.g.,\n" + "\n" + "@lisp\n" + "(define p (open-input-file ...))\n" + "(drain-input p) => empty string, nothing buffered yet.\n" + "(unread-char (read-char p) p)\n" + "(drain-input p) => initial chars from p, up to the buffer size.\n" + "@end lisp\n\n" + "Draining the buffers may be useful for cleanly finishing\n" + "buffered I/O so that the file descriptor can be used directly\n" + "for further input.") +#define FUNC_NAME s_scm_drain_input +{ + SCM result; + char *data; + scm_t_port *pt; + long count; + + SCM_VALIDATE_OPINPORT (1, port); + pt = SCM_PTAB_ENTRY (port); + + count = pt->read_end - pt->read_pos; + if (pt->read_buf == pt->putback_buf) + count += pt->saved_read_end - pt->saved_read_pos; + + if (count) + { + result = scm_i_make_string (count, &data, 0); + scm_take_from_input_buffers (port, data, count); } + else + result = scm_nullstr; + + return result; } #undef FUNC_NAME -#ifndef O_BINARY -#define O_BINARY 0 -#endif - -/* Mingw has ftruncate(), perhaps implemented above using chsize, but - doesn't have the filename version truncate(), hence this code. */ -#if HAVE_FTRUNCATE && ! HAVE_TRUNCATE -static int -truncate (const char *file, off_t length) +void +scm_end_input_unlocked (SCM port) { - int ret, fdes; - - fdes = open (file, O_BINARY | O_WRONLY); - if (fdes == -1) - return -1; + long offset; + scm_t_port *pt = SCM_PTAB_ENTRY (port); - ret = ftruncate (fdes, length); - if (ret == -1) + if (pt->read_buf == pt->putback_buf) { - int save_errno = errno; - close (fdes); - errno = save_errno; - return -1; + offset = pt->read_end - pt->read_pos; + pt->read_buf = pt->saved_read_buf; + pt->read_pos = pt->saved_read_pos; + pt->read_end = pt->saved_read_end; + pt->read_buf_size = pt->saved_read_buf_size; } + else + offset = 0; - return close (fdes); + SCM_PORT_DESCRIPTOR (port)->end_input (port, offset); } -#endif /* HAVE_FTRUNCATE && ! HAVE_TRUNCATE */ -SCM_DEFINE (scm_truncate_file, "truncate-file", 1, 1, 0, - (SCM object, SCM length), - "Truncate file @var{object} to @var{length} bytes. @var{object}\n" - "can be a filename string, a port object, or an integer file\n" - "descriptor.\n" - "The return value is unspecified.\n" - "\n" - "For a port or file descriptor @var{length} can be omitted, in\n" - "which case the file is truncated at the current position (per\n" - "@code{ftell} above).\n" - "\n" - "On most systems a file can be extended by giving a length\n" - "greater than the current size, but this is not mandatory in the\n" - "POSIX standard.") -#define FUNC_NAME s_scm_truncate_file +void +scm_end_input (SCM port) { - int rv; - - /* "object" can be a port, fdes or filename. - - Negative "length" makes no sense, but it's left to truncate() or - ftruncate() to give back an error for that (normally EINVAL). - */ - - if (SCM_UNBNDP (length)) - { - /* must supply length if object is a filename. */ - if (scm_is_string (object)) - SCM_MISC_ERROR("must supply length if OBJECT is a filename", SCM_EOL); - - length = scm_seek (object, SCM_INUM0, scm_from_int (SEEK_CUR)); - } + scm_i_pthread_mutex_t *lock; + scm_c_lock_port (port, &lock); + scm_end_input_unlocked (port); + if (lock) + scm_i_pthread_mutex_unlock (lock); + +} - object = SCM_COERCE_OUTPORT (object); - if (scm_is_integer (object)) - { - off_t_or_off64_t c_length = scm_to_off_t_or_off64_t (length); - SCM_SYSCALL (rv = ftruncate_or_ftruncate64 (scm_to_int (object), - c_length)); - } - else if (SCM_OPOUTPORTP (object)) - { - off_t_or_off64_t c_length = scm_to_off_t_or_off64_t (length); - scm_t_port *pt = SCM_PTAB_ENTRY (object); - scm_t_ptob_descriptor *ptob = scm_ptobs + SCM_PTOBNUM (object); - - if (!ptob->truncate) - SCM_MISC_ERROR ("port is not truncatable", SCM_EOL); - if (pt->rw_active == SCM_PORT_READ) - scm_end_input (object); - else if (pt->rw_active == SCM_PORT_WRITE) - ptob->flush (object); - - ptob->truncate (object, c_length); - rv = 0; - } +SCM_DEFINE (scm_force_output, "force-output", 0, 1, 0, + (SCM port), + "Flush the specified output port, or the current output port if @var{port}\n" + "is omitted. The current output buffer contents are passed to the\n" + "underlying port implementation (e.g., in the case of fports, the\n" + "data will be written to the file and the output buffer will be cleared.)\n" + "It has no effect on an unbuffered port.\n\n" + "The return value is unspecified.") +#define FUNC_NAME s_scm_force_output +{ + if (SCM_UNBNDP (port)) + port = scm_current_output_port (); else { - off_t_or_off64_t c_length = scm_to_off_t_or_off64_t (length); - char *str = scm_to_locale_string (object); - int eno; - SCM_SYSCALL (rv = truncate_or_truncate64 (str, c_length)); - eno = errno; - free (str); - errno = eno; + port = SCM_COERCE_OUTPORT (port); + SCM_VALIDATE_OPOUTPORT (1, port); } - if (rv == -1) - SCM_SYSERROR; + scm_flush_unlocked (port); return SCM_UNSPECIFIED; } #undef FUNC_NAME -SCM_DEFINE (scm_port_line, "port-line", 1, 0, 0, - (SCM port), - "Return the current line number for @var{port}.\n" - "\n" - "The first line of a file is 0. But you might want to add 1\n" - "when printing line numbers, since starting from 1 is\n" - "traditional in error messages, and likely to be more natural to\n" - "non-programmers.") -#define FUNC_NAME s_scm_port_line +void +scm_flush_unlocked (SCM port) { - port = SCM_COERCE_OUTPORT (port); - SCM_VALIDATE_OPENPORT (1, port); - return scm_from_long (SCM_LINUM (port)); + SCM_PORT_DESCRIPTOR (port)->flush (port); } -#undef FUNC_NAME -SCM_DEFINE (scm_set_port_line_x, "set-port-line!", 2, 0, 0, - (SCM port, SCM line), - "Set the current line number for @var{port} to @var{line}. The\n" - "first line of a file is 0.") -#define FUNC_NAME s_scm_set_port_line_x +void +scm_flush (SCM port) { - port = SCM_COERCE_OUTPORT (port); - SCM_VALIDATE_OPENPORT (1, port); - SCM_PTAB_ENTRY (port)->line_number = scm_to_long (line); - return SCM_UNSPECIFIED; + scm_i_pthread_mutex_t *lock; + scm_c_lock_port (port, &lock); + scm_flush_unlocked (port); + if (lock) + scm_i_pthread_mutex_unlock (lock); + } -#undef FUNC_NAME -SCM_DEFINE (scm_port_column, "port-column", 1, 0, 0, - (SCM port), - "Return the current column number of @var{port}.\n" - "If the number is\n" - "unknown, the result is #f. Otherwise, the result is a 0-origin integer\n" - "- i.e. the first character of the first line is line 0, column 0.\n" - "(However, when you display a file position, for example in an error\n" - "message, we recommend you add 1 to get 1-origin integers. This is\n" - "because lines and column numbers traditionally start with 1, and that is\n" - "what non-programmers will find most natural.)") -#define FUNC_NAME s_scm_port_column + + + +/* Output. */ + +void +scm_putc (char c, SCM port) { - port = SCM_COERCE_OUTPORT (port); - SCM_VALIDATE_OPENPORT (1, port); - return scm_from_int (SCM_COL (port)); + scm_i_pthread_mutex_t *lock; + scm_c_lock_port (port, &lock); + scm_putc_unlocked (c, port); + if (lock) + scm_i_pthread_mutex_unlock (lock); + } -#undef FUNC_NAME -SCM_DEFINE (scm_set_port_column_x, "set-port-column!", 2, 0, 0, - (SCM port, SCM column), - "Set the current column of @var{port}. Before reading the first\n" - "character on a line the column should be 0.") -#define FUNC_NAME s_scm_set_port_column_x +void +scm_puts (const char *s, SCM port) { - port = SCM_COERCE_OUTPORT (port); - SCM_VALIDATE_OPENPORT (1, port); - SCM_PTAB_ENTRY (port)->column_number = scm_to_int (column); - return SCM_UNSPECIFIED; + scm_i_pthread_mutex_t *lock; + scm_c_lock_port (port, &lock); + scm_puts_unlocked (s, port); + if (lock) + scm_i_pthread_mutex_unlock (lock); + } -#undef FUNC_NAME - -SCM_DEFINE (scm_port_filename, "port-filename", 1, 0, 0, - (SCM port), - "Return the filename associated with @var{port}, or @code{#f}\n" - "if no filename is associated with the port.") -#define FUNC_NAME s_scm_port_filename + +/* scm_c_write + * + * Used by an application to write arbitrary number of bytes to an SCM + * port. Similar semantics as libc write. However, unlike libc + * write, scm_c_write writes the requested number of bytes and has no + * return value. + * + * Warning: Doesn't update port line and column counts! + */ +void +scm_c_write_unlocked (SCM port, const void *ptr, size_t size) +#define FUNC_NAME "scm_c_write" { - port = SCM_COERCE_OUTPORT (port); - SCM_VALIDATE_OPENPORT (1, port); - return SCM_FILENAME (port); + scm_t_port *pt; + scm_t_ptob_descriptor *ptob; + + SCM_VALIDATE_OPOUTPORT (1, port); + + pt = SCM_PTAB_ENTRY (port); + ptob = SCM_PORT_DESCRIPTOR (port); + + if (pt->rw_active == SCM_PORT_READ) + scm_end_input_unlocked (port); + + ptob->write (port, ptr, size); + + if (pt->rw_random) + pt->rw_active = SCM_PORT_WRITE; } #undef FUNC_NAME -SCM_DEFINE (scm_set_port_filename_x, "set-port-filename!", 2, 0, 0, - (SCM port, SCM filename), - "Change the filename associated with @var{port}, using the current input\n" - "port if none is specified. Note that this does not change the port's\n" - "source of data, but only the value that is returned by\n" - "@code{port-filename} and reported in diagnostic output.") -#define FUNC_NAME s_scm_set_port_filename_x +void +scm_c_write (SCM port, const void *ptr, size_t size) { - port = SCM_COERCE_OUTPORT (port); - SCM_VALIDATE_OPENPORT (1, port); - /* We allow the user to set the filename to whatever he likes. */ - SCM_SET_FILENAME (port, filename); - return SCM_UNSPECIFIED; + scm_i_pthread_mutex_t *lock; + scm_c_lock_port (port, &lock); + scm_c_write_unlocked (port, ptr, size); + if (lock) + scm_i_pthread_mutex_unlock (lock); + } -#undef FUNC_NAME - -/* A fluid specifying the default encoding for newly created ports. If it is - a string, that is the encoding. If it is #f, it is in the "native" - (Latin-1) encoding. */ -SCM_VARIABLE (default_port_encoding_var, "%default-port-encoding"); - -static int scm_port_encoding_init = 0; -/* Use ENCODING as the default encoding for future ports. */ +/* scm_lfwrite + * + * This function differs from scm_c_write; it updates port line and + * column. */ void -scm_i_set_default_port_encoding (const char *encoding) +scm_lfwrite_unlocked (const char *ptr, size_t size, SCM port) { - if (!scm_port_encoding_init - || !scm_is_fluid (SCM_VARIABLE_REF (default_port_encoding_var))) - scm_misc_error (NULL, "tried to set port encoding fluid before it is initialized", - SCM_EOL); + scm_t_port *pt = SCM_PTAB_ENTRY (port); + scm_t_ptob_descriptor *ptob = SCM_PORT_DESCRIPTOR (port); - if (encoding == NULL - || !strcmp (encoding, "ASCII") - || !strcmp (encoding, "ANSI_X3.4-1968") - || !strcmp (encoding, "ISO-8859-1")) - scm_fluid_set_x (SCM_VARIABLE_REF (default_port_encoding_var), SCM_BOOL_F); - else - scm_fluid_set_x (SCM_VARIABLE_REF (default_port_encoding_var), - scm_from_locale_string (encoding)); + if (pt->rw_active == SCM_PORT_READ) + scm_end_input_unlocked (port); + + ptob->write (port, ptr, size); + + for (; size; ptr++, size--) + update_port_lf ((scm_t_wchar) (unsigned char) *ptr, port); + + if (pt->rw_random) + pt->rw_active = SCM_PORT_WRITE; } -/* Return the name of the default encoding for newly created ports; a - return value of NULL means "ISO-8859-1". */ -const char * -scm_i_default_port_encoding (void) +void +scm_lfwrite (const char *ptr, size_t size, SCM port) { - if (!scm_port_encoding_init) - return NULL; - else if (!scm_is_fluid (SCM_VARIABLE_REF (default_port_encoding_var))) - return NULL; - else - { - SCM encoding; - - encoding = scm_fluid_ref (SCM_VARIABLE_REF (default_port_encoding_var)); - if (!scm_is_string (encoding)) - return NULL; - else - return scm_i_string_chars (encoding); - } + scm_i_pthread_mutex_t *lock; + scm_c_lock_port (port, &lock); + scm_lfwrite_unlocked (ptr, size, port); + if (lock) + scm_i_pthread_mutex_unlock (lock); + } +/* Write STR to PORT from START inclusive to END exclusive. */ void -scm_i_set_port_encoding_x (SCM port, const char *encoding) +scm_lfwrite_substr (SCM str, size_t start, size_t end, SCM port) { - scm_t_port *pt; - iconv_t new_input_cd, new_output_cd; - - new_input_cd = (iconv_t) -1; - new_output_cd = (iconv_t) -1; - - /* Set the character encoding for this port. */ - pt = SCM_PTAB_ENTRY (port); - - if (encoding == NULL) - encoding = "ISO-8859-1"; + scm_t_port *pt = SCM_PTAB_ENTRY (port); - if (pt->encoding != encoding) - pt->encoding = scm_gc_strdup (encoding, "port"); + if (pt->rw_active == SCM_PORT_READ) + scm_end_input_unlocked (port); - /* If ENCODING is UTF-8, then no conversion descriptor is opened - because we do I/O ourselves. This saves 100+ KiB for each - descriptor. */ - if (strcmp (encoding, "UTF-8")) - { - if (SCM_CELL_WORD_0 (port) & SCM_RDNG) - { - /* Open an input iconv conversion descriptor, from ENCODING - to UTF-8. We choose UTF-8, not UTF-32, because iconv - implementations can typically convert from anything to - UTF-8, but not to UTF-32 (see - ). */ - new_input_cd = iconv_open ("UTF-8", encoding); - if (new_input_cd == (iconv_t) -1) - goto invalid_encoding; - } + if (end == (size_t) -1) + end = scm_i_string_length (str); - if (SCM_CELL_WORD_0 (port) & SCM_WRTNG) - { - new_output_cd = iconv_open (encoding, "UTF-8"); - if (new_output_cd == (iconv_t) -1) - { - if (new_input_cd != (iconv_t) -1) - iconv_close (new_input_cd); - goto invalid_encoding; - } - } - } + scm_i_display_substring (str, start, end, port); - if (pt->input_cd != (iconv_t) -1) - iconv_close (pt->input_cd); - if (pt->output_cd != (iconv_t) -1) - iconv_close (pt->output_cd); + if (pt->rw_random) + pt->rw_active = SCM_PORT_WRITE; +} - pt->input_cd = new_input_cd; - pt->output_cd = new_output_cd; - return; + - invalid_encoding: - { - SCM err; - err = scm_from_locale_string (encoding); - scm_misc_error ("scm_i_set_port_encoding_x", - "invalid or unknown character encoding ~s", - scm_list_1 (err)); - } -} +/* Querying and setting positions, and character availability. */ -SCM_DEFINE (scm_port_encoding, "port-encoding", 1, 0, 0, +SCM_DEFINE (scm_char_ready_p, "char-ready?", 0, 1, 0, (SCM port), - "Returns, as a string, the character encoding that @var{port}\n" - "uses to interpret its input and output.\n") -#define FUNC_NAME s_scm_port_encoding + "Return @code{#t} if a character is ready on input @var{port}\n" + "and return @code{#f} otherwise. If @code{char-ready?} returns\n" + "@code{#t} then the next @code{read-char} operation on\n" + "@var{port} is guaranteed not to hang. If @var{port} is a file\n" + "port at end of file then @code{char-ready?} returns @code{#t}.\n" + "\n" + "@code{char-ready?} exists to make it possible for a\n" + "program to accept characters from interactive ports without\n" + "getting stuck waiting for input. Any input editors associated\n" + "with such ports must make sure that characters whose existence\n" + "has been asserted by @code{char-ready?} cannot be rubbed out.\n" + "If @code{char-ready?} were to return @code{#f} at end of file,\n" + "a port at end of file would be indistinguishable from an\n" + "interactive port that has no ready characters.") +#define FUNC_NAME s_scm_char_ready_p { scm_t_port *pt; - const char *enc; - SCM_VALIDATE_PORT (1, port); + if (SCM_UNBNDP (port)) + port = scm_current_input_port (); + /* It's possible to close the current input port, so validate even in + this case. */ + SCM_VALIDATE_OPINPORT (1, port); pt = SCM_PTAB_ENTRY (port); - enc = pt->encoding; - if (enc) - return scm_from_locale_string (pt->encoding); + + /* if the current read buffer is filled, or the + last pushed-back char has been read and the saved buffer is + filled, result is true. */ + if (pt->read_pos < pt->read_end + || (pt->read_buf == pt->putback_buf + && pt->saved_read_pos < pt->saved_read_end)) + return SCM_BOOL_T; else - return SCM_BOOL_F; + { + scm_t_ptob_descriptor *ptob = SCM_PORT_DESCRIPTOR (port); + + if (ptob->input_waiting) + return scm_from_bool(ptob->input_waiting (port)); + else + return SCM_BOOL_T; + } } #undef FUNC_NAME -SCM_DEFINE (scm_set_port_encoding_x, "set-port-encoding!", 2, 0, 0, - (SCM port, SCM enc), - "Sets the character encoding that will be used to interpret all\n" - "port I/O. New ports are created with the encoding\n" - "appropriate for the current locale if @code{setlocale} has \n" - "been called or ISO-8859-1 otherwise\n" - "and this procedure can be used to modify that encoding.\n") -#define FUNC_NAME s_scm_set_port_encoding_x +SCM_DEFINE (scm_seek, "seek", 3, 0, 0, + (SCM fd_port, SCM offset, SCM whence), + "Sets the current position of @var{fd_port} to the integer\n" + "@var{offset}, which is interpreted according to the value of\n" + "@var{whence}.\n" + "\n" + "One of the following variables should be supplied for\n" + "@var{whence}:\n" + "@defvar SEEK_SET\n" + "Seek from the beginning of the file.\n" + "@end defvar\n" + "@defvar SEEK_CUR\n" + "Seek from the current position.\n" + "@end defvar\n" + "@defvar SEEK_END\n" + "Seek from the end of the file.\n" + "@end defvar\n" + "If @var{fd_port} is a file descriptor, the underlying system\n" + "call is @code{lseek}. @var{port} may be a string port.\n" + "\n" + "The value returned is the new position in the file. This means\n" + "that the current position of a port can be obtained using:\n" + "@lisp\n" + "(seek port 0 SEEK_CUR)\n" + "@end lisp") +#define FUNC_NAME s_scm_seek { - char *enc_str; - - SCM_VALIDATE_PORT (1, port); - SCM_VALIDATE_STRING (2, enc); - - enc_str = scm_to_locale_string (enc); - scm_i_set_port_encoding_x (port, enc_str); - free (enc_str); - - return SCM_UNSPECIFIED; -} -#undef FUNC_NAME - - -/* A fluid specifying the default conversion handler for newly created - ports. Its value should be one of the symbols below. */ -SCM_VARIABLE (default_conversion_strategy_var, - "%default-port-conversion-strategy"); - -/* Whether the above fluid is initialized. */ -static int scm_conversion_strategy_init = 0; + int how; -/* The possible conversion strategies. */ -SCM_SYMBOL (sym_error, "error"); -SCM_SYMBOL (sym_substitute, "substitute"); -SCM_SYMBOL (sym_escape, "escape"); + fd_port = SCM_COERCE_OUTPORT (fd_port); -/* Return the default failed encoding conversion policy for new created - ports. */ -scm_t_string_failed_conversion_handler -scm_i_default_port_conversion_handler (void) -{ - scm_t_string_failed_conversion_handler handler; + how = scm_to_int (whence); + if (how != SEEK_SET && how != SEEK_CUR && how != SEEK_END) + SCM_OUT_OF_RANGE (3, whence); - if (!scm_conversion_strategy_init - || !scm_is_fluid (SCM_VARIABLE_REF (default_conversion_strategy_var))) - handler = SCM_FAILED_CONVERSION_QUESTION_MARK; - else + if (SCM_OPPORTP (fd_port)) { - SCM fluid, value; - - fluid = SCM_VARIABLE_REF (default_conversion_strategy_var); - value = scm_fluid_ref (fluid); + scm_t_ptob_descriptor *ptob = SCM_PORT_DESCRIPTOR (fd_port); + off_t_or_off64_t off = scm_to_off_t_or_off64_t (offset); + off_t_or_off64_t rv; - if (scm_is_eq (sym_substitute, value)) - handler = SCM_FAILED_CONVERSION_QUESTION_MARK; - else if (scm_is_eq (sym_escape, value)) - handler = SCM_FAILED_CONVERSION_ESCAPE_SEQUENCE; + if (!ptob->seek) + SCM_MISC_ERROR ("port is not seekable", + scm_cons (fd_port, SCM_EOL)); else - /* Default to 'error also when the fluid's value is not one of - the valid symbols. */ - handler = SCM_FAILED_CONVERSION_ERROR; + rv = ptob->seek (fd_port, off, how); + return scm_from_off_t_or_off64_t (rv); + } + else /* file descriptor?. */ + { + off_t_or_off64_t off = scm_to_off_t_or_off64_t (offset); + off_t_or_off64_t rv; + rv = lseek_or_lseek64 (scm_to_int (fd_port), off, how); + if (rv == -1) + SCM_SYSERROR; + return scm_from_off_t_or_off64_t (rv); } - - return handler; } +#undef FUNC_NAME -/* Use HANDLER as the default conversion strategy for future ports. */ -void -scm_i_set_default_port_conversion_handler (scm_t_string_failed_conversion_handler - handler) +#ifndef O_BINARY +#define O_BINARY 0 +#endif + +/* Mingw has ftruncate(), perhaps implemented above using chsize, but + doesn't have the filename version truncate(), hence this code. */ +#if HAVE_FTRUNCATE && ! HAVE_TRUNCATE +static int +truncate (const char *file, off_t length) { - SCM strategy; + int ret, fdes; - if (!scm_conversion_strategy_init - || !scm_is_fluid (SCM_VARIABLE_REF (default_conversion_strategy_var))) - scm_misc_error (NULL, "tried to set conversion strategy fluid before it is initialized", - SCM_EOL); + fdes = open (file, O_BINARY | O_WRONLY); + if (fdes == -1) + return -1; - switch (handler) + ret = ftruncate (fdes, length); + if (ret == -1) { - case SCM_FAILED_CONVERSION_ERROR: - strategy = sym_error; - break; - - case SCM_FAILED_CONVERSION_ESCAPE_SEQUENCE: - strategy = sym_escape; - break; - - case SCM_FAILED_CONVERSION_QUESTION_MARK: - strategy = sym_substitute; - break; - - default: - abort (); + int save_errno = errno; + close (fdes); + errno = save_errno; + return -1; } - scm_fluid_set_x (SCM_VARIABLE_REF (default_conversion_strategy_var), - strategy); + return close (fdes); } +#endif /* HAVE_FTRUNCATE && ! HAVE_TRUNCATE */ -SCM_DEFINE (scm_port_conversion_strategy, "port-conversion-strategy", - 1, 0, 0, (SCM port), - "Returns the behavior of the port when handling a character that\n" - "is not representable in the port's current encoding.\n" - "It returns the symbol @code{error} if unrepresentable characters\n" - "should cause exceptions, @code{substitute} if the port should\n" - "try to replace unrepresentable characters with question marks or\n" - "approximate characters, or @code{escape} if unrepresentable\n" - "characters should be converted to string escapes.\n" +SCM_DEFINE (scm_truncate_file, "truncate-file", 1, 1, 0, + (SCM object, SCM length), + "Truncate file @var{object} to @var{length} bytes. @var{object}\n" + "can be a filename string, a port object, or an integer file\n" + "descriptor.\n" + "The return value is unspecified.\n" "\n" - "If @var{port} is @code{#f}, then the current default behavior\n" - "will be returned. New ports will have this default behavior\n" - "when they are created.\n") -#define FUNC_NAME s_scm_port_conversion_strategy + "For a port or file descriptor @var{length} can be omitted, in\n" + "which case the file is truncated at the current position (per\n" + "@code{ftell} above).\n" + "\n" + "On most systems a file can be extended by giving a length\n" + "greater than the current size, but this is not mandatory in the\n" + "POSIX standard.") +#define FUNC_NAME s_scm_truncate_file { - scm_t_string_failed_conversion_handler h; + int rv; - if (scm_is_false (port)) - h = scm_i_default_port_conversion_handler (); - else - { - scm_t_port *pt; + /* "object" can be a port, fdes or filename. - SCM_VALIDATE_OPPORT (1, port); - pt = SCM_PTAB_ENTRY (port); + Negative "length" makes no sense, but it's left to truncate() or + ftruncate() to give back an error for that (normally EINVAL). + */ - h = pt->ilseq_handler; + if (SCM_UNBNDP (length)) + { + /* must supply length if object is a filename. */ + if (scm_is_string (object)) + SCM_MISC_ERROR("must supply length if OBJECT is a filename", SCM_EOL); + + length = scm_seek (object, SCM_INUM0, scm_from_int (SEEK_CUR)); } - if (h == SCM_FAILED_CONVERSION_ERROR) - return scm_from_latin1_symbol ("error"); - else if (h == SCM_FAILED_CONVERSION_QUESTION_MARK) - return scm_from_latin1_symbol ("substitute"); - else if (h == SCM_FAILED_CONVERSION_ESCAPE_SEQUENCE) - return scm_from_latin1_symbol ("escape"); + object = SCM_COERCE_OUTPORT (object); + if (scm_is_integer (object)) + { + off_t_or_off64_t c_length = scm_to_off_t_or_off64_t (length); + SCM_SYSCALL (rv = ftruncate_or_ftruncate64 (scm_to_int (object), + c_length)); + } + else if (SCM_OPOUTPORTP (object)) + { + off_t_or_off64_t c_length = scm_to_off_t_or_off64_t (length); + scm_t_port *pt = SCM_PTAB_ENTRY (object); + scm_t_ptob_descriptor *ptob = SCM_PORT_DESCRIPTOR (object); + + if (!ptob->truncate) + SCM_MISC_ERROR ("port is not truncatable", SCM_EOL); + if (pt->rw_active == SCM_PORT_READ) + scm_end_input_unlocked (object); + else if (pt->rw_active == SCM_PORT_WRITE) + ptob->flush (object); + + ptob->truncate (object, c_length); + rv = 0; + } else - abort (); - - /* Never gets here. */ - return SCM_UNDEFINED; + { + off_t_or_off64_t c_length = scm_to_off_t_or_off64_t (length); + char *str = scm_to_locale_string (object); + int eno; + SCM_SYSCALL (rv = truncate_or_truncate64 (str, c_length)); + eno = errno; + free (str); + errno = eno; + } + if (rv == -1) + SCM_SYSERROR; + return SCM_UNSPECIFIED; } #undef FUNC_NAME -SCM_DEFINE (scm_set_port_conversion_strategy_x, "set-port-conversion-strategy!", - 2, 0, 0, - (SCM port, SCM sym), - "Sets the behavior of the interpreter when outputting a character\n" - "that is not representable in the port's current encoding.\n" - "@var{sym} can be either @code{'error}, @code{'substitute}, or\n" - "@code{'escape}. If it is @code{'error}, an error will be thrown\n" - "when an unconvertible character is encountered. If it is\n" - "@code{'substitute}, then unconvertible characters will \n" - "be replaced with approximate characters, or with question marks\n" - "if no approximately correct character is available.\n" - "If it is @code{'escape},\n" - "it will appear as a hex escape when output.\n" +SCM_DEFINE (scm_port_line, "port-line", 1, 0, 0, + (SCM port), + "Return the current line number for @var{port}.\n" "\n" - "If @var{port} is an open port, the conversion error behavior\n" - "is set for that port. If it is @code{#f}, it is set as the\n" - "default behavior for any future ports that get created in\n" - "this thread.\n") -#define FUNC_NAME s_scm_set_port_conversion_strategy_x + "The first line of a file is 0. But you might want to add 1\n" + "when printing line numbers, since starting from 1 is\n" + "traditional in error messages, and likely to be more natural to\n" + "non-programmers.") +#define FUNC_NAME s_scm_port_line { - scm_t_string_failed_conversion_handler handler; + port = SCM_COERCE_OUTPORT (port); + SCM_VALIDATE_OPENPORT (1, port); + return scm_from_long (SCM_LINUM (port)); +} +#undef FUNC_NAME - if (scm_is_eq (sym, sym_error)) - handler = SCM_FAILED_CONVERSION_ERROR; - else if (scm_is_eq (sym, sym_substitute)) - handler = SCM_FAILED_CONVERSION_QUESTION_MARK; - else if (scm_is_eq (sym, sym_escape)) - handler = SCM_FAILED_CONVERSION_ESCAPE_SEQUENCE; - else - SCM_MISC_ERROR ("unknown conversion strategy ~s", scm_list_1 (sym)); +SCM_DEFINE (scm_set_port_line_x, "set-port-line!", 2, 0, 0, + (SCM port, SCM line), + "Set the current line number for @var{port} to @var{line}. The\n" + "first line of a file is 0.") +#define FUNC_NAME s_scm_set_port_line_x +{ + port = SCM_COERCE_OUTPORT (port); + SCM_VALIDATE_OPENPORT (1, port); + SCM_PTAB_ENTRY (port)->line_number = scm_to_long (line); + return SCM_UNSPECIFIED; +} +#undef FUNC_NAME - if (scm_is_false (port)) - scm_i_set_default_port_conversion_handler (handler); - else - { - SCM_VALIDATE_OPPORT (1, port); - SCM_PTAB_ENTRY (port)->ilseq_handler = handler; - } +SCM_DEFINE (scm_port_column, "port-column", 1, 0, 0, + (SCM port), + "Return the current column number of @var{port}.\n" + "If the number is\n" + "unknown, the result is #f. Otherwise, the result is a 0-origin integer\n" + "- i.e. the first character of the first line is line 0, column 0.\n" + "(However, when you display a file position, for example in an error\n" + "message, we recommend you add 1 to get 1-origin integers. This is\n" + "because lines and column numbers traditionally start with 1, and that is\n" + "what non-programmers will find most natural.)") +#define FUNC_NAME s_scm_port_column +{ + port = SCM_COERCE_OUTPORT (port); + SCM_VALIDATE_OPENPORT (1, port); + return scm_from_int (SCM_COL (port)); +} +#undef FUNC_NAME + +SCM_DEFINE (scm_set_port_column_x, "set-port-column!", 2, 0, 0, + (SCM port, SCM column), + "Set the current column of @var{port}. Before reading the first\n" + "character on a line the column should be 0.") +#define FUNC_NAME s_scm_set_port_column_x +{ + port = SCM_COERCE_OUTPORT (port); + SCM_VALIDATE_OPENPORT (1, port); + SCM_PTAB_ENTRY (port)->column_number = scm_to_int (column); + return SCM_UNSPECIFIED; +} +#undef FUNC_NAME + +SCM_DEFINE (scm_port_filename, "port-filename", 1, 0, 0, + (SCM port), + "Return the filename associated with @var{port}, or @code{#f}\n" + "if no filename is associated with the port.") +#define FUNC_NAME s_scm_port_filename +{ + port = SCM_COERCE_OUTPORT (port); + SCM_VALIDATE_OPENPORT (1, port); + return SCM_FILENAME (port); +} +#undef FUNC_NAME +SCM_DEFINE (scm_set_port_filename_x, "set-port-filename!", 2, 0, 0, + (SCM port, SCM filename), + "Change the filename associated with @var{port}, using the current input\n" + "port if none is specified. Note that this does not change the port's\n" + "source of data, but only the value that is returned by\n" + "@code{port-filename} and reported in diagnostic output.") +#define FUNC_NAME s_scm_set_port_filename_x +{ + port = SCM_COERCE_OUTPORT (port); + SCM_VALIDATE_OPENPORT (1, port); + /* We allow the user to set the filename to whatever he likes. */ + SCM_SET_FILENAME (port, filename); return SCM_UNSPECIFIED; } #undef FUNC_NAME + + +/* Implementation helpers for port printing functions. */ void scm_print_port_mode (SCM exp, SCM port) { - scm_puts (SCM_CLOSEDP (exp) + scm_puts_unlocked (SCM_CLOSEDP (exp) ? "closed: " : (SCM_RDNG & SCM_CELL_WORD_0 (exp) ? (SCM_WRTNG & SCM_CELL_WORD_0 (exp) @@ -2495,15 +2741,91 @@ scm_port_print (SCM exp, SCM port, scm_print_state *pstate SCM_UNUSED) char *type = SCM_PTOBNAME (SCM_PTOBNUM (exp)); if (!type) type = "port"; - scm_puts ("#<", port); + scm_puts_unlocked ("#<", port); scm_print_port_mode (exp, port); - scm_puts (type, port); - scm_putc (' ', port); + scm_puts_unlocked (type, port); + scm_putc_unlocked (' ', port); scm_uintprint (SCM_CELL_WORD_1 (exp), 16, port); - scm_putc ('>', port); + scm_putc_unlocked ('>', port); return 1; } + + + +/* Iterating over all ports. */ + +struct for_each_data +{ + void (*proc) (void *data, SCM p); + void *data; +}; + +static SCM +for_each_trampoline (void *data, SCM port, SCM result) +{ + struct for_each_data *d = data; + + d->proc (d->data, port); + + return result; +} + +void +scm_c_port_for_each (void (*proc)(void *data, SCM p), void *data) +{ + struct for_each_data d; + + d.proc = proc; + d.data = data; + + scm_c_weak_set_fold (for_each_trampoline, &d, SCM_EOL, + scm_i_port_weak_set); +} + +static void +scm_for_each_trampoline (void *data, SCM port) +{ + scm_call_1 (SCM_PACK_POINTER (data), port); +} + +SCM_DEFINE (scm_port_for_each, "port-for-each", 1, 0, 0, + (SCM proc), + "Apply @var{proc} to each port in the Guile port table\n" + "in turn. The return value is unspecified. More specifically,\n" + "@var{proc} is applied exactly once to every port that exists\n" + "in the system at the time @code{port-for-each} is invoked.\n" + "Changes to the port table while @code{port-for-each} is running\n" + "have no effect as far as @code{port-for-each} is concerned.") +#define FUNC_NAME s_scm_port_for_each +{ + SCM_VALIDATE_PROC (1, proc); + + scm_c_port_for_each (scm_for_each_trampoline, SCM_UNPACK_POINTER (proc)); + + return SCM_UNSPECIFIED; +} +#undef FUNC_NAME + +static void +flush_output_port (void *closure, SCM port) +{ + if (SCM_OPOUTPORTP (port)) + scm_flush_unlocked (port); +} + +SCM_DEFINE (scm_flush_all_ports, "flush-all-ports", 0, 0, 0, + (), + "Equivalent to calling @code{force-output} on\n" + "all open output ports. The return value is unspecified.") +#define FUNC_NAME s_scm_flush_all_ports +{ + scm_c_port_for_each (&flush_output_port, NULL); + return SCM_UNSPECIFIED; +} +#undef FUNC_NAME + + /* Void ports. */ @@ -2525,18 +2847,13 @@ write_void_port (SCM port SCM_UNUSED, static SCM scm_i_void_port (long mode_bits) { - scm_i_scm_pthread_mutex_lock (&scm_i_port_table_mutex); - { - SCM answer = scm_new_port_table_entry (scm_tc16_void_port); - scm_t_port * pt = SCM_PTAB_ENTRY(answer); + SCM ret; - scm_port_non_buffer (pt); + ret = scm_c_make_port (scm_tc16_void_port, mode_bits, 0); + + scm_port_non_buffer (SCM_PTAB_ENTRY (ret)); - SCM_SETSTREAM (answer, 0); - SCM_SET_CELL_TYPE (answer, scm_tc16_void_port | mode_bits); - scm_i_pthread_mutex_unlock (&scm_i_port_table_mutex); - return answer; - } + return ret; } SCM @@ -2557,7 +2874,9 @@ SCM_DEFINE (scm_sys_make_void_port, "%make-void-port", 1, 0, 0, } #undef FUNC_NAME + + /* Initialization. */ void @@ -2576,7 +2895,7 @@ scm_init_ports () cur_errport_fluid = scm_make_fluid (); cur_loadport_fluid = scm_make_fluid (); - scm_i_port_weak_hash = scm_make_weak_key_hash_table (SCM_I_MAKINUM(31)); + scm_i_port_weak_set = scm_c_make_weak_set (31); #include "libguile/ports.x"