X-Git-Url: https://git.hcoop.net/bpt/guile.git/blobdiff_plain/f5b2eea6a39507ecf6a8ecc62cc1c796c45c2d1d..f6f4feb0a2222efcb297e634603621126542e63f:/libguile/ports.c diff --git a/libguile/ports.c b/libguile/ports.c index 9068c5cfa..20c90810d 100644 --- a/libguile/ports.c +++ b/libguile/ports.c @@ -1,5 +1,5 @@ -/* Copyright (C) 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2003, 2004, 2006, - * 2007, 2008, 2009, 2010, 2011, 2012, 2013 Free Software Foundation, Inc. +/* Copyright (C) 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2003, 2004, + * 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013 Free Software Foundation, Inc. * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public License @@ -35,7 +35,6 @@ #include #include #include -#include #include @@ -58,7 +57,7 @@ #include "libguile/ports.h" #include "libguile/ports-internal.h" #include "libguile/vectors.h" -#include "libguile/weaks.h" +#include "libguile/weak-set.h" #include "libguile/fluids.h" #include "libguile/eq.h" @@ -92,6 +91,56 @@ #endif +/* Port encodings are case-insensitive ASCII strings. */ +static char +ascii_toupper (char c) +{ + return (c < 'a' || c > 'z') ? c : ('A' + (c - 'a')); +} + +/* It is only necessary to use this function on encodings that come from + the user and have not been canonicalized yet. Encodings that are set + on ports or in the default encoding fluid are in upper-case, and can + be compared with strcmp. */ +static int +encoding_matches (const char *enc, const char *upper) +{ + if (!enc) + enc = "ISO-8859-1"; + + while (*enc) + if (ascii_toupper (*enc++) != *upper++) + return 0; + + return !*upper; +} + +static char* +canonicalize_encoding (const char *enc) +{ + char *ret; + int i; + + if (!enc) + return "ISO-8859-1"; + + ret = scm_gc_strdup (enc, "port"); + + for (i = 0; ret[i]; i++) + { + if (ret[i] > 127) + /* Restrict to ASCII. */ + scm_misc_error (NULL, "invalid character encoding ~s", + scm_list_1 (scm_from_latin1_string (enc))); + else + ret[i] = ascii_toupper (ret[i]); + } + + return ret; +} + + + /* The port kind table --- a dynamically resized array of port types. */ @@ -100,19 +149,79 @@ * Indexes into this table are used when generating type * tags for smobjects (if you know a tag you can get an index and conversely). */ -scm_t_ptob_descriptor *scm_ptobs = NULL; -long scm_numptob = 0; +static scm_t_ptob_descriptor **scm_ptobs = NULL; +static long scm_numptob = 0; /* Number of port types. */ +static long scm_ptobs_size = 0; /* Number of slots in the port type + table. */ +static scm_i_pthread_mutex_t scm_ptobs_lock = SCM_I_PTHREAD_MUTEX_INITIALIZER; -/* GC marker for a port with stream of SCM type. */ -SCM -scm_markstream (SCM ptr) +long +scm_c_num_port_types (void) { - int openp; - openp = SCM_CELL_WORD_0 (ptr) & SCM_OPN; - if (openp) - return SCM_PACK (SCM_STREAM (ptr)); - else - return SCM_BOOL_F; + long ret; + + scm_i_pthread_mutex_lock (&scm_ptobs_lock); + ret = scm_numptob; + scm_i_pthread_mutex_unlock (&scm_ptobs_lock); + + return ret; +} + +scm_t_ptob_descriptor* +scm_c_port_type_ref (long ptobnum) +{ + scm_t_ptob_descriptor *ret = NULL; + + scm_i_pthread_mutex_lock (&scm_ptobs_lock); + + if (0 <= ptobnum && ptobnum < scm_numptob) + ret = scm_ptobs[ptobnum]; + + scm_i_pthread_mutex_unlock (&scm_ptobs_lock); + + if (!ret) + scm_out_of_range ("scm_c_port_type_ref", scm_from_long (ptobnum)); + + return ret; +} + +long +scm_c_port_type_add_x (scm_t_ptob_descriptor *desc) +{ + long ret = -1; + + scm_i_pthread_mutex_lock (&scm_ptobs_lock); + + if (scm_numptob + 1 < SCM_I_MAX_PORT_TYPE_COUNT) + { + if (scm_numptob == scm_ptobs_size) + { + unsigned long old_size = scm_ptobs_size; + scm_t_ptob_descriptor **old_ptobs = scm_ptobs; + + /* Currently there are only 9 predefined port types, so one + resize will cover it. */ + scm_ptobs_size = old_size + 10; + + if (scm_ptobs_size >= SCM_I_MAX_PORT_TYPE_COUNT) + scm_ptobs_size = SCM_I_MAX_PORT_TYPE_COUNT; + + scm_ptobs = scm_gc_malloc (sizeof (*scm_ptobs) * scm_ptobs_size, + "scm_ptobs"); + + memcpy (scm_ptobs, old_ptobs, sizeof (*scm_ptobs) * scm_numptob); + } + + ret = scm_numptob++; + scm_ptobs[ret] = desc; + } + + scm_i_pthread_mutex_unlock (&scm_ptobs_lock); + + if (ret < 0) + scm_out_of_range ("scm_c_port_type_add_x", scm_from_long (scm_numptob)); + + return ret; } /* @@ -136,110 +245,89 @@ scm_make_port_type (char *name, int (*fill_input) (SCM port), void (*write) (SCM port, const void *data, size_t size)) { - char *tmp; - if (SCM_I_MAX_PORT_TYPE_COUNT - 1 <= scm_numptob) - goto ptoberr; - SCM_CRITICAL_SECTION_START; - tmp = (char *) scm_gc_realloc ((char *) scm_ptobs, - scm_numptob * sizeof (scm_t_ptob_descriptor), - (1 + scm_numptob) - * sizeof (scm_t_ptob_descriptor), - "port-type"); - if (tmp) - { - scm_ptobs = (scm_t_ptob_descriptor *) tmp; + scm_t_ptob_descriptor *desc; + long ptobnum; - scm_ptobs[scm_numptob].name = name; - scm_ptobs[scm_numptob].mark = 0; - scm_ptobs[scm_numptob].free = NULL; - scm_ptobs[scm_numptob].print = scm_port_print; - scm_ptobs[scm_numptob].equalp = 0; - scm_ptobs[scm_numptob].close = 0; + desc = scm_gc_malloc_pointerless (sizeof (*desc), "port-type"); + memset (desc, 0, sizeof (*desc)); - scm_ptobs[scm_numptob].write = write; - scm_ptobs[scm_numptob].flush = flush_port_default; + desc->name = name; + desc->print = scm_port_print; + desc->write = write; + desc->flush = flush_port_default; + desc->end_input = end_input_default; + desc->fill_input = fill_input; - scm_ptobs[scm_numptob].end_input = end_input_default; - scm_ptobs[scm_numptob].fill_input = fill_input; - scm_ptobs[scm_numptob].input_waiting = 0; + ptobnum = scm_c_port_type_add_x (desc); - scm_ptobs[scm_numptob].seek = 0; - scm_ptobs[scm_numptob].truncate = 0; - - scm_numptob++; - } - SCM_CRITICAL_SECTION_END; - if (!tmp) - { - ptoberr: - scm_memory_error ("scm_make_port_type"); - } - /* Make a class object if Goops is present */ + /* Make a class object if GOOPS is present. */ if (SCM_UNPACK (scm_port_class[0]) != 0) - scm_make_port_classes (scm_numptob - 1, SCM_PTOBNAME (scm_numptob - 1)); - return scm_tc7_port + (scm_numptob - 1) * 256; + scm_make_port_classes (ptobnum, name); + + return scm_tc7_port + ptobnum * 256; } void scm_set_port_mark (scm_t_bits tc, SCM (*mark) (SCM)) { - scm_ptobs[SCM_TC2PTOBNUM (tc)].mark = mark; + scm_c_port_type_ref (SCM_TC2PTOBNUM (tc))->mark = mark; } void scm_set_port_free (scm_t_bits tc, size_t (*free) (SCM)) { - scm_ptobs[SCM_TC2PTOBNUM (tc)].free = free; + scm_c_port_type_ref (SCM_TC2PTOBNUM (tc))->free = free; } void scm_set_port_print (scm_t_bits tc, int (*print) (SCM exp, SCM port, - scm_print_state *pstate)) + scm_print_state *pstate)) { - scm_ptobs[SCM_TC2PTOBNUM (tc)].print = print; + scm_c_port_type_ref (SCM_TC2PTOBNUM (tc))->print = print; } void scm_set_port_equalp (scm_t_bits tc, SCM (*equalp) (SCM, SCM)) { - scm_ptobs[SCM_TC2PTOBNUM (tc)].equalp = equalp; + scm_c_port_type_ref (SCM_TC2PTOBNUM (tc))->equalp = equalp; } void -scm_set_port_flush (scm_t_bits tc, void (*flush) (SCM port)) +scm_set_port_close (scm_t_bits tc, int (*close) (SCM)) { - scm_ptobs[SCM_TC2PTOBNUM (tc)].flush = flush; + scm_c_port_type_ref (SCM_TC2PTOBNUM (tc))->close = close; } void -scm_set_port_end_input (scm_t_bits tc, void (*end_input) (SCM port, int offset)) +scm_set_port_flush (scm_t_bits tc, void (*flush) (SCM port)) { - scm_ptobs[SCM_TC2PTOBNUM (tc)].end_input = end_input; + scm_t_ptob_descriptor *ptob = scm_c_port_type_ref (SCM_TC2PTOBNUM (tc)); + ptob->flush = flush; + ptob->flags |= SCM_PORT_TYPE_HAS_FLUSH; } void -scm_set_port_close (scm_t_bits tc, int (*close) (SCM)) +scm_set_port_end_input (scm_t_bits tc, void (*end_input) (SCM port, int offset)) { - scm_ptobs[SCM_TC2PTOBNUM (tc)].close = close; + scm_c_port_type_ref (SCM_TC2PTOBNUM (tc))->end_input = end_input; } void -scm_set_port_seek (scm_t_bits tc, - scm_t_off (*seek) (SCM, scm_t_off, int)) +scm_set_port_seek (scm_t_bits tc, scm_t_off (*seek) (SCM, scm_t_off, int)) { - scm_ptobs[SCM_TC2PTOBNUM (tc)].seek = seek; + scm_c_port_type_ref (SCM_TC2PTOBNUM (tc))->seek = seek; } void scm_set_port_truncate (scm_t_bits tc, void (*truncate) (SCM, scm_t_off)) { - scm_ptobs[SCM_TC2PTOBNUM (tc)].truncate = truncate; + scm_c_port_type_ref (SCM_TC2PTOBNUM (tc))->truncate = truncate; } void scm_set_port_input_waiting (scm_t_bits tc, int (*input_waiting) (SCM)) { - scm_ptobs[SCM_TC2PTOBNUM (tc)].input_waiting = input_waiting; + scm_c_port_type_ref (SCM_TC2PTOBNUM (tc))->input_waiting = input_waiting; } static void @@ -268,130 +356,6 @@ scm_i_set_port_alist_x (SCM port, SCM alist) -SCM_DEFINE (scm_char_ready_p, "char-ready?", 0, 1, 0, - (SCM port), - "Return @code{#t} if a character is ready on input @var{port}\n" - "and return @code{#f} otherwise. If @code{char-ready?} returns\n" - "@code{#t} then the next @code{read-char} operation on\n" - "@var{port} is guaranteed not to hang. If @var{port} is a file\n" - "port at end of file then @code{char-ready?} returns @code{#t}.\n" - "\n" - "@code{char-ready?} exists to make it possible for a\n" - "program to accept characters from interactive ports without\n" - "getting stuck waiting for input. Any input editors associated\n" - "with such ports must make sure that characters whose existence\n" - "has been asserted by @code{char-ready?} cannot be rubbed out.\n" - "If @code{char-ready?} were to return @code{#f} at end of file,\n" - "a port at end of file would be indistinguishable from an\n" - "interactive port that has no ready characters.") -#define FUNC_NAME s_scm_char_ready_p -{ - scm_t_port *pt; - - if (SCM_UNBNDP (port)) - port = scm_current_input_port (); - /* It's possible to close the current input port, so validate even in - this case. */ - SCM_VALIDATE_OPINPORT (1, port); - - pt = SCM_PTAB_ENTRY (port); - - /* if the current read buffer is filled, or the - last pushed-back char has been read and the saved buffer is - filled, result is true. */ - if (pt->read_pos < pt->read_end - || (pt->read_buf == pt->putback_buf - && pt->saved_read_pos < pt->saved_read_end)) - return SCM_BOOL_T; - else - { - scm_t_ptob_descriptor *ptob = &scm_ptobs[SCM_PTOBNUM (port)]; - - if (ptob->input_waiting) - return scm_from_bool(ptob->input_waiting (port)); - else - return SCM_BOOL_T; - } -} -#undef FUNC_NAME - -/* Move up to READ_LEN bytes from PORT's putback and/or read buffers - into memory starting at DEST. Return the number of bytes moved. - PORT's line/column numbers are left unchanged. */ -size_t -scm_take_from_input_buffers (SCM port, char *dest, size_t read_len) -{ - scm_t_port *pt = SCM_PTAB_ENTRY (port); - size_t bytes_read = 0; - size_t from_buf = min (pt->read_end - pt->read_pos, read_len); - - if (from_buf > 0) - { - memcpy (dest, pt->read_pos, from_buf); - pt->read_pos += from_buf; - bytes_read += from_buf; - read_len -= from_buf; - dest += from_buf; - } - - /* if putback was active, try the real input buffer too. */ - if (pt->read_buf == pt->putback_buf) - { - from_buf = min (pt->saved_read_end - pt->saved_read_pos, read_len); - if (from_buf > 0) - { - memcpy (dest, pt->saved_read_pos, from_buf); - pt->saved_read_pos += from_buf; - bytes_read += from_buf; - } - } - - return bytes_read; -} - -/* Clear a port's read buffers, returning the contents. */ -SCM_DEFINE (scm_drain_input, "drain-input", 1, 0, 0, - (SCM port), - "This procedure clears a port's input buffers, similar\n" - "to the way that force-output clears the output buffer. The\n" - "contents of the buffers are returned as a single string, e.g.,\n" - "\n" - "@lisp\n" - "(define p (open-input-file ...))\n" - "(drain-input p) => empty string, nothing buffered yet.\n" - "(unread-char (read-char p) p)\n" - "(drain-input p) => initial chars from p, up to the buffer size.\n" - "@end lisp\n\n" - "Draining the buffers may be useful for cleanly finishing\n" - "buffered I/O so that the file descriptor can be used directly\n" - "for further input.") -#define FUNC_NAME s_scm_drain_input -{ - SCM result; - char *data; - scm_t_port *pt; - long count; - - SCM_VALIDATE_OPINPORT (1, port); - pt = SCM_PTAB_ENTRY (port); - - count = pt->read_end - pt->read_pos; - if (pt->read_buf == pt->putback_buf) - count += pt->saved_read_end - pt->saved_read_pos; - - if (count) - { - result = scm_i_make_string (count, &data, 0); - scm_take_from_input_buffers (port, data, count); - } - else - result = scm_nullstr; - - return result; -} -#undef FUNC_NAME - - /* Standard ports --- current input, output, error, and more(!). */ static SCM cur_inport_fluid = SCM_BOOL_F; @@ -558,323 +522,285 @@ scm_i_dynwind_current_load_port (SCM port) scm_dynwind_fluid (cur_loadport_fluid, port); } - -/* The port table --- an array of pointers to ports. */ - -/* - We need a global registry of ports to flush them all at exit, and to - get all the ports matching a file descriptor. - */ -SCM scm_i_port_weak_hash; - -scm_i_pthread_mutex_t scm_i_port_table_mutex = SCM_I_PTHREAD_MUTEX_INITIALIZER; -/* Port finalization. */ +/* Retrieving a port's mode. */ -static void finalize_port (void *, void *); +/* Return the flags that characterize a port based on the mode + * string used to open a file for that port. + * + * See PORT FLAGS in scm.h + */ -/* Register a finalizer for PORT. */ -static SCM_C_INLINE_KEYWORD void -register_finalizer_for_port (SCM port) +static long +scm_i_mode_bits_n (SCM modes) { - /* Register a finalizer for PORT so that its - type's `free' function gets called. */ - scm_i_set_finalizer (SCM2PTR (port), finalize_port, NULL); + return (SCM_OPN + | (scm_i_string_contains_char (modes, 'r') + || scm_i_string_contains_char (modes, '+') ? SCM_RDNG : 0) + | (scm_i_string_contains_char (modes, 'w') + || scm_i_string_contains_char (modes, 'a') + || scm_i_string_contains_char (modes, '+') ? SCM_WRTNG : 0) + | (scm_i_string_contains_char (modes, '0') ? SCM_BUF0 : 0) + | (scm_i_string_contains_char (modes, 'l') ? SCM_BUFLINE : 0)); } -/* Finalize the object (a port) pointed to by PTR. */ -static void -finalize_port (void *ptr, void *data) +long +scm_mode_bits (char *modes) { - long port_type; - SCM port = PTR2SCM (ptr); - - if (!SCM_PORTP (port)) - abort (); - - if (SCM_OPENP (port)) - { - if (SCM_REVEALED (port) > 0) - /* Keep "revealed" ports alive and re-register a finalizer. */ - register_finalizer_for_port (port); - else - { - port_type = SCM_TC2PTOBNUM (SCM_CELL_TYPE (port)); - if (port_type >= scm_numptob) - abort (); + /* Valid characters are rw+a0l. So, use latin1. */ + return scm_i_mode_bits (scm_from_latin1_string (modes)); +} - if (scm_ptobs[port_type].free) - /* Yes, I really do mean `.free' rather than `.close'. `.close' - is for explicit `close-port' by user. */ - scm_ptobs[port_type].free (port); +long +scm_i_mode_bits (SCM modes) +{ + long bits; - SCM_SETSTREAM (port, 0); - SCM_CLR_PORT_OPEN_FLAG (port); + if (!scm_is_string (modes)) + scm_wrong_type_arg_msg (NULL, 0, modes, "string"); - scm_gc_ports_collected++; - } - } + bits = scm_i_mode_bits_n (modes); + scm_remember_upto_here_1 (modes); + return bits; } +/* Return the mode flags from an open port. + * Some modes such as "append" are only used when opening + * a file and are not returned here. */ - - - -/* This function is not and should not be thread safe. */ -SCM -scm_new_port_table_entry (scm_t_bits tag) -#define FUNC_NAME "scm_new_port_table_entry" +SCM_DEFINE (scm_port_mode, "port-mode", 1, 0, 0, + (SCM port), + "Return the port modes associated with the open port @var{port}.\n" + "These will not necessarily be identical to the modes used when\n" + "the port was opened, since modes such as \"append\" which are\n" + "used only during port creation are not retained.") +#define FUNC_NAME s_scm_port_mode { - /* - We initialize the cell to empty, this is in case scm_gc_calloc - triggers GC ; we don't want the GC to scan a half-finished Z. - */ - - SCM z = scm_cons (SCM_EOL, SCM_EOL); - scm_t_port *entry = scm_gc_typed_calloc (scm_t_port); - scm_t_port_internal *pti = scm_gc_typed_calloc (scm_t_port_internal); - const char *encoding; + char modes[4]; + modes[0] = '\0'; - entry->file_name = SCM_BOOL_F; - entry->rw_active = SCM_PORT_NEITHER; - entry->port = z; - - /* Initialize this port with the thread's current default - encoding. */ - encoding = scm_i_default_port_encoding (); - entry->ilseq_handler = scm_i_default_port_conversion_handler (); - entry->encoding = encoding ? scm_gc_strdup (encoding, "port") : NULL; - if (encoding && c_strcasecmp (encoding, "UTF-8") == 0) - pti->encoding_mode = SCM_PORT_ENCODING_MODE_UTF8; - else - pti->encoding_mode = SCM_PORT_ENCODING_MODE_ICONV; - pti->iconv_descriptors = NULL; + port = SCM_COERCE_OUTPORT (port); + SCM_VALIDATE_OPPORT (1, port); + if (SCM_CELL_WORD_0 (port) & SCM_RDNG) { + if (SCM_CELL_WORD_0 (port) & SCM_WRTNG) + strcpy (modes, "r+"); + else + strcpy (modes, "r"); + } + else if (SCM_CELL_WORD_0 (port) & SCM_WRTNG) + strcpy (modes, "w"); + if (SCM_CELL_WORD_0 (port) & SCM_BUF0) + strcat (modes, "0"); - pti->at_stream_start_for_bom_read = 1; - pti->at_stream_start_for_bom_write = 1; + return scm_from_latin1_string (modes); +} +#undef FUNC_NAME - /* XXX These fields are not what they seem. They have been - repurposed, but cannot safely be renamed in 2.0 without breaking - ABI compatibility. This will be cleaned up in 2.2. */ - entry->input_cd = pti; /* XXX pointer to the internal port structure */ - entry->output_cd = NULL; /* XXX unused */ - pti->pending_eof = 0; - pti->alist = SCM_EOL; + - SCM_SET_CELL_TYPE (z, tag); - SCM_SETPTAB_ENTRY (z, entry); +/* The port table --- a weak set of all ports. - scm_hashq_set_x (scm_i_port_weak_hash, z, SCM_BOOL_F); + We need a global registry of ports to flush them all at exit, and to + get all the ports matching a file descriptor. */ +SCM scm_i_port_weak_set; - /* For each new port, register a finalizer so that it port type's free - function can be invoked eventually. */ - register_finalizer_for_port (z); - return z; -} -#undef FUNC_NAME + -#if SCM_ENABLE_DEPRECATED==1 -scm_t_port * -scm_add_to_port_table (SCM port) +/* Port finalization. */ + +struct do_free_data { - SCM z; - scm_t_port * pt; + scm_t_ptob_descriptor *ptob; + SCM port; +}; - scm_c_issue_deprecation_warning ("scm_add_to_port_table is deprecated."); +static SCM +do_free (void *body_data) +{ + struct do_free_data *data = body_data; - scm_i_pthread_mutex_lock (&scm_i_port_table_mutex); - z = scm_new_port_table_entry (scm_tc7_port); - pt = SCM_PTAB_ENTRY(z); - pt->port = port; - SCM_SETCAR (z, SCM_EOL); - SCM_SETCDR (z, SCM_EOL); - SCM_SETPTAB_ENTRY (port, pt); - scm_i_pthread_mutex_unlock (&scm_i_port_table_mutex); + /* `close' is for explicit `close-port' by user. `free' is for this + purpose: ports collected by the GC. */ + data->ptob->free (data->port); - return pt; + return SCM_BOOL_T; } -#endif - - -/* Remove a port from the table and destroy it. */ - -static void close_iconv_descriptors (scm_t_iconv_descriptors *id); +/* Finalize the object (a port) pointed to by PTR. */ static void -scm_i_remove_port (SCM port) -#define FUNC_NAME "scm_remove_port" +finalize_port (void *ptr, void *data) { - scm_t_port *p; - scm_t_port_internal *pti; + SCM port = SCM_PACK_POINTER (ptr); - scm_i_scm_pthread_mutex_lock (&scm_i_port_table_mutex); - - p = SCM_PTAB_ENTRY (port); - pti = SCM_PORT_GET_INTERNAL (port); - scm_port_non_buffer (p); - p->putback_buf = NULL; - p->putback_buf_size = 0; + if (!SCM_PORTP (port)) + abort (); - if (pti->iconv_descriptors) + if (SCM_OPENP (port)) { - close_iconv_descriptors (pti->iconv_descriptors); - pti->iconv_descriptors = NULL; - } + struct do_free_data data; + + SCM_CLR_PORT_OPEN_FLAG (port); - SCM_SETPTAB_ENTRY (port, 0); + data.ptob = SCM_PORT_DESCRIPTOR (port); + data.port = port; - scm_hashq_remove_x (scm_i_port_weak_hash, port); + scm_internal_catch (SCM_BOOL_T, do_free, &data, + scm_handle_by_message_noexit, NULL); - scm_i_pthread_mutex_unlock (&scm_i_port_table_mutex); + scm_gc_ports_collected++; + } } -#undef FUNC_NAME -/* Functions for debugging. */ -#ifdef GUILE_DEBUG -SCM_DEFINE (scm_pt_size, "pt-size", 0, 0, 0, - (), - "Return the number of ports in the port table. @code{pt-size}\n" - "is only included in @code{--enable-guile-debug} builds.") -#define FUNC_NAME s_scm_pt_size -{ - return scm_from_int (SCM_HASHTABLE_N_ITEMS (scm_i_port_weak_hash)); -} -#undef FUNC_NAME -#endif + -void -scm_port_non_buffer (scm_t_port *pt) +SCM +scm_c_make_port_with_encoding (scm_t_bits tag, unsigned long mode_bits, + const char *encoding, + scm_t_string_failed_conversion_handler handler, + scm_t_bits stream) { - pt->read_pos = pt->read_buf = pt->read_end = &pt->shortbuf; - pt->write_buf = pt->write_pos = &pt->shortbuf; - pt->read_buf_size = pt->write_buf_size = 1; - pt->write_end = pt->write_buf + pt->write_buf_size; -} + SCM ret; + scm_t_port *entry; + scm_t_port_internal *pti; + scm_t_ptob_descriptor *ptob; - -/* Revealed counts --- an oddity inherited from SCSH. */ + entry = scm_gc_typed_calloc (scm_t_port); + pti = scm_gc_typed_calloc (scm_t_port_internal); + ptob = scm_c_port_type_ref (SCM_TC2PTOBNUM (tag)); -/* Find a port in the table and return its revealed count. - Also used by the garbage collector. - */ + ret = scm_words (tag | mode_bits, 3); + SCM_SET_CELL_WORD_1 (ret, (scm_t_bits) entry); + SCM_SET_CELL_WORD_2 (ret, (scm_t_bits) ptob); -int -scm_revealed_count (SCM port) -{ - return SCM_REVEALED(port); -} + entry->lock = scm_gc_malloc_pointerless (sizeof (*entry->lock), "port lock"); + scm_i_pthread_mutex_init (entry->lock, scm_i_pthread_mutexattr_recursive); + + entry->internal = pti; + entry->file_name = SCM_BOOL_F; + entry->rw_active = SCM_PORT_NEITHER; + entry->port = ret; + entry->stream = stream; + + if (encoding_matches (encoding, "UTF-8")) + { + pti->encoding_mode = SCM_PORT_ENCODING_MODE_UTF8; + entry->encoding = "UTF-8"; + } + else if (encoding_matches (encoding, "ISO-8859-1")) + { + pti->encoding_mode = SCM_PORT_ENCODING_MODE_LATIN1; + entry->encoding = "ISO-8859-1"; + } + else + { + pti->encoding_mode = SCM_PORT_ENCODING_MODE_ICONV; + entry->encoding = canonicalize_encoding (encoding); + } + entry->ilseq_handler = handler; + pti->iconv_descriptors = NULL; + pti->at_stream_start_for_bom_read = 1; + pti->at_stream_start_for_bom_write = 1; -/* Return the revealed count for a port. */ + pti->pending_eof = 0; + pti->alist = SCM_EOL; -SCM_DEFINE (scm_port_revealed, "port-revealed", 1, 0, 0, - (SCM port), - "Return the revealed count for @var{port}.") -#define FUNC_NAME s_scm_port_revealed -{ - port = SCM_COERCE_OUTPORT (port); - SCM_VALIDATE_OPENPORT (1, port); - return scm_from_int (scm_revealed_count (port)); + if (SCM_PORT_DESCRIPTOR (ret)->free) + scm_i_set_finalizer (SCM2PTR (ret), finalize_port, NULL); + + if (SCM_PORT_DESCRIPTOR (ret)->flags & SCM_PORT_TYPE_HAS_FLUSH) + scm_weak_set_add_x (scm_i_port_weak_set, ret); + + return ret; } -#undef FUNC_NAME -/* Set the revealed count for a port. */ -SCM_DEFINE (scm_set_port_revealed_x, "set-port-revealed!", 2, 0, 0, - (SCM port, SCM rcount), - "Sets the revealed count for a port to a given value.\n" - "The return value is unspecified.") -#define FUNC_NAME s_scm_set_port_revealed_x +SCM +scm_c_make_port (scm_t_bits tag, unsigned long mode_bits, scm_t_bits stream) { - port = SCM_COERCE_OUTPORT (port); - SCM_VALIDATE_OPENPORT (1, port); - SCM_REVEALED (port) = scm_to_int (rcount); - return SCM_UNSPECIFIED; + return scm_c_make_port_with_encoding (tag, mode_bits, + scm_i_default_port_encoding (), + scm_i_default_port_conversion_handler (), + stream); } -#undef FUNC_NAME +SCM +scm_new_port_table_entry (scm_t_bits tag) +{ + return scm_c_make_port (tag, 0, 0); +} -/* Retrieving a port's mode. */ -/* Return the flags that characterize a port based on the mode - * string used to open a file for that port. - * - * See PORT FLAGS in scm.h - */ +/* Predicates. */ -static long -scm_i_mode_bits_n (SCM modes) +SCM_DEFINE (scm_port_p, "port?", 1, 0, 0, + (SCM x), + "Return a boolean indicating whether @var{x} is a port.\n" + "Equivalent to @code{(or (input-port? @var{x}) (output-port?\n" + "@var{x}))}.") +#define FUNC_NAME s_scm_port_p { - return (SCM_OPN - | (scm_i_string_contains_char (modes, 'r') - || scm_i_string_contains_char (modes, '+') ? SCM_RDNG : 0) - | (scm_i_string_contains_char (modes, 'w') - || scm_i_string_contains_char (modes, 'a') - || scm_i_string_contains_char (modes, '+') ? SCM_WRTNG : 0) - | (scm_i_string_contains_char (modes, '0') ? SCM_BUF0 : 0) - | (scm_i_string_contains_char (modes, 'l') ? SCM_BUFLINE : 0)); + return scm_from_bool (SCM_PORTP (x)); } +#undef FUNC_NAME -long -scm_mode_bits (char *modes) +SCM_DEFINE (scm_input_port_p, "input-port?", 1, 0, 0, + (SCM x), + "Return @code{#t} if @var{x} is an input port, otherwise return\n" + "@code{#f}. Any object satisfying this predicate also satisfies\n" + "@code{port?}.") +#define FUNC_NAME s_scm_input_port_p { - return scm_i_mode_bits (scm_from_locale_string (modes)); + return scm_from_bool (SCM_INPUT_PORT_P (x)); } +#undef FUNC_NAME -long -scm_i_mode_bits (SCM modes) +SCM_DEFINE (scm_output_port_p, "output-port?", 1, 0, 0, + (SCM x), + "Return @code{#t} if @var{x} is an output port, otherwise return\n" + "@code{#f}. Any object satisfying this predicate also satisfies\n" + "@code{port?}.") +#define FUNC_NAME s_scm_output_port_p { - long bits; - - if (!scm_is_string (modes)) - scm_wrong_type_arg_msg (NULL, 0, modes, "string"); - - bits = scm_i_mode_bits_n (modes); - scm_remember_upto_here_1 (modes); - return bits; + x = SCM_COERCE_OUTPORT (x); + return scm_from_bool (SCM_OUTPUT_PORT_P (x)); } +#undef FUNC_NAME -/* Return the mode flags from an open port. - * Some modes such as "append" are only used when opening - * a file and are not returned here. */ - -SCM_DEFINE (scm_port_mode, "port-mode", 1, 0, 0, +SCM_DEFINE (scm_port_closed_p, "port-closed?", 1, 0, 0, (SCM port), - "Return the port modes associated with the open port @var{port}.\n" - "These will not necessarily be identical to the modes used when\n" - "the port was opened, since modes such as \"append\" which are\n" - "used only during port creation are not retained.") -#define FUNC_NAME s_scm_port_mode + "Return @code{#t} if @var{port} is closed or @code{#f} if it is\n" + "open.") +#define FUNC_NAME s_scm_port_closed_p { - char modes[4]; - modes[0] = '\0'; + SCM_VALIDATE_PORT (1, port); + return scm_from_bool (!SCM_OPPORTP (port)); +} +#undef FUNC_NAME - port = SCM_COERCE_OUTPORT (port); - SCM_VALIDATE_OPPORT (1, port); - if (SCM_CELL_WORD_0 (port) & SCM_RDNG) { - if (SCM_CELL_WORD_0 (port) & SCM_WRTNG) - strcpy (modes, "r+"); - else - strcpy (modes, "r"); - } - else if (SCM_CELL_WORD_0 (port) & SCM_WRTNG) - strcpy (modes, "w"); - if (SCM_CELL_WORD_0 (port) & SCM_BUF0) - strcat (modes, "0"); - return scm_from_locale_string (modes); +SCM_DEFINE (scm_eof_object_p, "eof-object?", 1, 0, 0, + (SCM x), + "Return @code{#t} if @var{x} is an end-of-file object; otherwise\n" + "return @code{#f}.") +#define FUNC_NAME s_scm_eof_object_p +{ + return scm_from_bool (SCM_EOF_OBJECT_P (x)); } #undef FUNC_NAME + /* Closing ports. */ +static void close_iconv_descriptors (scm_t_iconv_descriptors *id); + /* scm_close_port * Call the close operation on a port object. * see also scm_close. @@ -889,7 +815,7 @@ SCM_DEFINE (scm_close_port, "close-port", 1, 0, 0, "descriptors.") #define FUNC_NAME s_scm_close_port { - size_t i; + scm_t_port_internal *pti; int rv; port = SCM_COERCE_OUTPORT (port); @@ -897,13 +823,28 @@ SCM_DEFINE (scm_close_port, "close-port", 1, 0, 0, SCM_VALIDATE_PORT (1, port); if (SCM_CLOSEDP (port)) return SCM_BOOL_F; - i = SCM_PTOBNUM (port); - if (scm_ptobs[i].close) - rv = (scm_ptobs[i].close) (port); + + pti = SCM_PORT_GET_INTERNAL (port); + SCM_CLR_PORT_OPEN_FLAG (port); + + if (SCM_PORT_DESCRIPTOR (port)->flags & SCM_PORT_TYPE_HAS_FLUSH) + scm_weak_set_remove_x (scm_i_port_weak_set, port); + + if (SCM_PORT_DESCRIPTOR (port)->close) + /* Note! This may throw an exception. Anything after this point + should be resilient to non-local exits. */ + rv = SCM_PORT_DESCRIPTOR (port)->close (port); else rv = 0; - scm_i_remove_port (port); - SCM_CLR_PORT_OPEN_FLAG (port); + + if (pti->iconv_descriptors) + { + /* If we don't get here, the iconv_descriptors finalizer will + clean up. */ + close_iconv_descriptors (pti->iconv_descriptors); + pti->iconv_descriptors = NULL; + } + return scm_from_bool (rv >= 0); } #undef FUNC_NAME @@ -939,661 +880,543 @@ SCM_DEFINE (scm_close_output_port, "close-output-port", 1, 0, 0, } #undef FUNC_NAME -static SCM -collect_keys (void *unused, SCM key, SCM value, SCM result) -{ - return scm_cons (key, result); -} + + +/* Encoding characters to byte streams, and decoding byte streams to + characters. */ + +/* A fluid specifying the default encoding for newly created ports. If it is + a string, that is the encoding. If it is #f, it is in the "native" + (Latin-1) encoding. */ +SCM_VARIABLE (default_port_encoding_var, "%default-port-encoding"); + +static int scm_port_encoding_init = 0; + +/* Use ENCODING as the default encoding for future ports. */ void -scm_c_port_for_each (void (*proc)(void *data, SCM p), void *data) +scm_i_set_default_port_encoding (const char *encoding) { - SCM ports; + if (!scm_port_encoding_init + || !scm_is_fluid (SCM_VARIABLE_REF (default_port_encoding_var))) + scm_misc_error (NULL, "tried to set port encoding fluid before it is initialized", + SCM_EOL); - /* Copy out the port table as a list so that we get strong references - to all the values. */ - scm_i_pthread_mutex_lock (&scm_i_port_table_mutex); - ports = scm_internal_hash_fold (collect_keys, NULL, - SCM_EOL, scm_i_port_weak_hash); - scm_i_pthread_mutex_unlock (&scm_i_port_table_mutex); + if (encoding_matches (encoding, "ISO-8859-1")) + scm_fluid_set_x (SCM_VARIABLE_REF (default_port_encoding_var), SCM_BOOL_F); + else + scm_fluid_set_x (SCM_VARIABLE_REF (default_port_encoding_var), + scm_from_latin1_string (canonicalize_encoding (encoding))); +} - for (; scm_is_pair (ports); ports = scm_cdr (ports)) +/* Return the name of the default encoding for newly created ports. */ +const char * +scm_i_default_port_encoding (void) +{ + if (!scm_port_encoding_init) + return "ISO-8859-1"; + else if (!scm_is_fluid (SCM_VARIABLE_REF (default_port_encoding_var))) + return "ISO-8859-1"; + else { - SCM p = scm_car (ports); - if (SCM_PORTP (p)) - proc (data, p); + SCM encoding; + + encoding = scm_fluid_ref (SCM_VARIABLE_REF (default_port_encoding_var)); + if (!scm_is_string (encoding)) + return "ISO-8859-1"; + else + return scm_i_string_chars (encoding); } } -SCM_DEFINE (scm_port_for_each, "port-for-each", 1, 0, 0, - (SCM proc), - "Apply @var{proc} to each port in the Guile port table\n" - "in turn. The return value is unspecified. More specifically,\n" - "@var{proc} is applied exactly once to every port that exists\n" - "in the system at the time @code{port-for-each} is invoked.\n" - "Changes to the port table while @code{port-for-each} is running\n" - "have no effect as far as @code{port-for-each} is concerned.") -#define FUNC_NAME s_scm_port_for_each -{ - SCM ports; +/* A fluid specifying the default conversion handler for newly created + ports. Its value should be one of the symbols below. */ +SCM_VARIABLE (default_conversion_strategy_var, + "%default-port-conversion-strategy"); - SCM_VALIDATE_PROC (1, proc); +/* Whether the above fluid is initialized. */ +static int scm_conversion_strategy_init = 0; - /* Copy out the port table as a list so that we get strong references - to all the values. */ - scm_i_pthread_mutex_lock (&scm_i_port_table_mutex); - ports = scm_internal_hash_fold (collect_keys, NULL, - SCM_EOL, scm_i_port_weak_hash); - scm_i_pthread_mutex_unlock (&scm_i_port_table_mutex); +/* The possible conversion strategies. */ +SCM_SYMBOL (sym_error, "error"); +SCM_SYMBOL (sym_substitute, "substitute"); +SCM_SYMBOL (sym_escape, "escape"); - for (; scm_is_pair (ports); ports = scm_cdr (ports)) - if (SCM_PORTP (SCM_CAR (ports))) - scm_call_1 (proc, SCM_CAR (ports)); +/* Return the default failed encoding conversion policy for new created + ports. */ +scm_t_string_failed_conversion_handler +scm_i_default_port_conversion_handler (void) +{ + scm_t_string_failed_conversion_handler handler; - return SCM_UNSPECIFIED; -} -#undef FUNC_NAME + if (!scm_conversion_strategy_init + || !scm_is_fluid (SCM_VARIABLE_REF (default_conversion_strategy_var))) + handler = SCM_FAILED_CONVERSION_QUESTION_MARK; + else + { + SCM fluid, value; + fluid = SCM_VARIABLE_REF (default_conversion_strategy_var); + value = scm_fluid_ref (fluid); - -/* Utter miscellany. Gosh, we should clean this up some time. */ + if (scm_is_eq (sym_substitute, value)) + handler = SCM_FAILED_CONVERSION_QUESTION_MARK; + else if (scm_is_eq (sym_escape, value)) + handler = SCM_FAILED_CONVERSION_ESCAPE_SEQUENCE; + else + /* Default to 'error also when the fluid's value is not one of + the valid symbols. */ + handler = SCM_FAILED_CONVERSION_ERROR; + } -SCM_DEFINE (scm_input_port_p, "input-port?", 1, 0, 0, - (SCM x), - "Return @code{#t} if @var{x} is an input port, otherwise return\n" - "@code{#f}. Any object satisfying this predicate also satisfies\n" - "@code{port?}.") -#define FUNC_NAME s_scm_input_port_p -{ - return scm_from_bool (SCM_INPUT_PORT_P (x)); + return handler; } -#undef FUNC_NAME -SCM_DEFINE (scm_output_port_p, "output-port?", 1, 0, 0, - (SCM x), - "Return @code{#t} if @var{x} is an output port, otherwise return\n" - "@code{#f}. Any object satisfying this predicate also satisfies\n" - "@code{port?}.") -#define FUNC_NAME s_scm_output_port_p +/* Use HANDLER as the default conversion strategy for future ports. */ +void +scm_i_set_default_port_conversion_handler (scm_t_string_failed_conversion_handler + handler) { - x = SCM_COERCE_OUTPORT (x); - return scm_from_bool (SCM_OUTPUT_PORT_P (x)); -} -#undef FUNC_NAME + SCM strategy; -SCM_DEFINE (scm_port_p, "port?", 1, 0, 0, - (SCM x), - "Return a boolean indicating whether @var{x} is a port.\n" - "Equivalent to @code{(or (input-port? @var{x}) (output-port?\n" - "@var{x}))}.") -#define FUNC_NAME s_scm_port_p -{ - return scm_from_bool (SCM_PORTP (x)); -} -#undef FUNC_NAME + if (!scm_conversion_strategy_init + || !scm_is_fluid (SCM_VARIABLE_REF (default_conversion_strategy_var))) + scm_misc_error (NULL, "tried to set conversion strategy fluid before it is initialized", + SCM_EOL); -SCM_DEFINE (scm_port_closed_p, "port-closed?", 1, 0, 0, - (SCM port), - "Return @code{#t} if @var{port} is closed or @code{#f} if it is\n" - "open.") -#define FUNC_NAME s_scm_port_closed_p -{ - SCM_VALIDATE_PORT (1, port); - return scm_from_bool (!SCM_OPPORTP (port)); -} -#undef FUNC_NAME + switch (handler) + { + case SCM_FAILED_CONVERSION_ERROR: + strategy = sym_error; + break; -SCM_DEFINE (scm_eof_object_p, "eof-object?", 1, 0, 0, - (SCM x), - "Return @code{#t} if @var{x} is an end-of-file object; otherwise\n" - "return @code{#f}.") -#define FUNC_NAME s_scm_eof_object_p -{ - return scm_from_bool(SCM_EOF_OBJECT_P (x)); + case SCM_FAILED_CONVERSION_ESCAPE_SEQUENCE: + strategy = sym_escape; + break; + + case SCM_FAILED_CONVERSION_QUESTION_MARK: + strategy = sym_substitute; + break; + + default: + abort (); + } + + scm_fluid_set_x (SCM_VARIABLE_REF (default_conversion_strategy_var), + strategy); } -#undef FUNC_NAME -SCM_DEFINE (scm_force_output, "force-output", 0, 1, 0, - (SCM port), - "Flush the specified output port, or the current output port if @var{port}\n" - "is omitted. The current output buffer contents are passed to the\n" - "underlying port implementation (e.g., in the case of fports, the\n" - "data will be written to the file and the output buffer will be cleared.)\n" - "It has no effect on an unbuffered port.\n\n" - "The return value is unspecified.") -#define FUNC_NAME s_scm_force_output +static void +scm_i_unget_bytes_unlocked (const unsigned char *buf, size_t len, SCM port); + +/* If the next LEN bytes from PORT are equal to those in BYTES, then + return 1, else return 0. Leave the port position unchanged. */ +static int +looking_at_bytes (SCM port, const unsigned char *bytes, int len) { - if (SCM_UNBNDP (port)) - port = scm_current_output_port (); - else + scm_t_port *pt = SCM_PTAB_ENTRY (port); + int i = 0; + + while (i < len && scm_peek_byte_or_eof_unlocked (port) == bytes[i]) { - port = SCM_COERCE_OUTPORT (port); - SCM_VALIDATE_OPOUTPORT (1, port); + pt->read_pos++; + i++; } - scm_flush (port); - return SCM_UNSPECIFIED; + scm_i_unget_bytes_unlocked (bytes, i, port); + return (i == len); } -#undef FUNC_NAME +static const unsigned char scm_utf8_bom[3] = {0xEF, 0xBB, 0xBF}; +static const unsigned char scm_utf16be_bom[2] = {0xFE, 0xFF}; +static const unsigned char scm_utf16le_bom[2] = {0xFF, 0xFE}; +static const unsigned char scm_utf32be_bom[4] = {0x00, 0x00, 0xFE, 0xFF}; +static const unsigned char scm_utf32le_bom[4] = {0xFF, 0xFE, 0x00, 0x00}; -static void -flush_output_port (void *closure, SCM port) +/* Decide what byte order to use for a UTF-16 port. Return "UTF-16BE" + or "UTF-16LE". MODE must be either SCM_PORT_READ or SCM_PORT_WRITE, + and specifies which operation is about to be done. The MODE + determines how we will decide the byte order. We deliberately avoid + reading from the port unless the user is about to do so. If the user + is about to read, then we look for a BOM, and if present, we use it + to determine the byte order. Otherwise we choose big endian, as + recommended by the Unicode Standard. Note that the BOM (if any) is + not consumed here. */ +static const char * +decide_utf16_encoding (SCM port, scm_t_port_rw_active mode) { - if (SCM_OPOUTPORTP (port)) - scm_flush (port); + if (mode == SCM_PORT_READ + && SCM_PORT_GET_INTERNAL (port)->at_stream_start_for_bom_read + && looking_at_bytes (port, scm_utf16le_bom, sizeof scm_utf16le_bom)) + return "UTF-16LE"; + else + return "UTF-16BE"; } -SCM_DEFINE (scm_flush_all_ports, "flush-all-ports", 0, 0, 0, - (), - "Equivalent to calling @code{force-output} on\n" - "all open output ports. The return value is unspecified.") -#define FUNC_NAME s_scm_flush_all_ports +/* Decide what byte order to use for a UTF-32 port. Return "UTF-32BE" + or "UTF-32LE". See the comment above 'decide_utf16_encoding' for + details. */ +static const char * +decide_utf32_encoding (SCM port, scm_t_port_rw_active mode) { - scm_c_port_for_each (&flush_output_port, NULL); - return SCM_UNSPECIFIED; + if (mode == SCM_PORT_READ + && SCM_PORT_GET_INTERNAL (port)->at_stream_start_for_bom_read + && looking_at_bytes (port, scm_utf32le_bom, sizeof scm_utf32le_bom)) + return "UTF-32LE"; + else + return "UTF-32BE"; } -#undef FUNC_NAME -SCM_DEFINE (scm_read_char, "read-char", 0, 1, 0, - (SCM port), - "Return the next character available from @var{port}, updating\n" - "@var{port} to point to the following character. If no more\n" - "characters are available, the end-of-file object is returned.\n" - "\n" - "When @var{port}'s data cannot be decoded according to its\n" - "character encoding, a @code{decoding-error} is raised and\n" - "@var{port} points past the erroneous byte sequence.\n") -#define FUNC_NAME s_scm_read_char +static void +finalize_iconv_descriptors (void *ptr, void *data) { - scm_t_wchar c; - if (SCM_UNBNDP (port)) - port = scm_current_input_port (); - SCM_VALIDATE_OPINPORT (1, port); - c = scm_getc (port); - if (EOF == c) - return SCM_EOF_VAL; - return SCM_MAKE_CHAR (c); + close_iconv_descriptors (ptr); } -#undef FUNC_NAME -/* Update the line and column number of PORT after consumption of C. */ -static inline void -update_port_lf (scm_t_wchar c, SCM port) +static scm_t_iconv_descriptors * +open_iconv_descriptors (const char *encoding, int reading, int writing) { - switch (c) - { - case '\a': - case EOF: - break; - case '\b': - SCM_DECCOL (port); - break; - case '\n': - SCM_INCLINE (port); - break; - case '\r': - SCM_ZEROCOL (port); - break; - case '\t': - SCM_TABCOL (port); - break; - default: - SCM_INCCOL (port); - break; - } -} + scm_t_iconv_descriptors *id; + iconv_t input_cd, output_cd; + size_t i; -#define SCM_MBCHAR_BUF_SIZE (4) + input_cd = (iconv_t) -1; + output_cd = (iconv_t) -1; -/* Convert the SIZE-byte UTF-8 sequence in UTF8_BUF to a codepoint. - UTF8_BUF is assumed to contain a valid UTF-8 sequence. */ -static scm_t_wchar -utf8_to_codepoint (const scm_t_uint8 *utf8_buf, size_t size) -{ - scm_t_wchar codepoint; + for (i = 0; encoding[i]; i++) + if (encoding[i] > 127) + goto invalid_encoding; - if (utf8_buf[0] <= 0x7f) - { - assert (size == 1); - codepoint = utf8_buf[0]; - } - else if ((utf8_buf[0] & 0xe0) == 0xc0) + if (reading) { - assert (size == 2); - codepoint = ((scm_t_wchar) utf8_buf[0] & 0x1f) << 6UL - | (utf8_buf[1] & 0x3f); - } - else if ((utf8_buf[0] & 0xf0) == 0xe0) - { - assert (size == 3); - codepoint = ((scm_t_wchar) utf8_buf[0] & 0x0f) << 12UL - | ((scm_t_wchar) utf8_buf[1] & 0x3f) << 6UL - | (utf8_buf[2] & 0x3f); - } - else - { - assert (size == 4); - codepoint = ((scm_t_wchar) utf8_buf[0] & 0x07) << 18UL - | ((scm_t_wchar) utf8_buf[1] & 0x3f) << 12UL - | ((scm_t_wchar) utf8_buf[2] & 0x3f) << 6UL - | (utf8_buf[3] & 0x3f); - } - - return codepoint; -} - -/* Read a UTF-8 sequence from PORT. On success, return 0 and set - *CODEPOINT to the codepoint that was read, fill BUF with its UTF-8 - representation, and set *LEN to the length in bytes. Return - `EILSEQ' on error. */ -static int -get_utf8_codepoint (SCM port, scm_t_wchar *codepoint, - scm_t_uint8 buf[SCM_MBCHAR_BUF_SIZE], size_t *len) -{ -#define ASSERT_NOT_EOF(b) \ - if (SCM_UNLIKELY ((b) == EOF)) \ - goto invalid_seq -#define CONSUME_PEEKED_BYTE() \ - pt->read_pos++ - - int byte; - scm_t_port *pt; + /* Open an input iconv conversion descriptor, from ENCODING + to UTF-8. We choose UTF-8, not UTF-32, because iconv + implementations can typically convert from anything to + UTF-8, but not to UTF-32 (see + ). */ - *len = 0; - pt = SCM_PTAB_ENTRY (port); + /* Assume opening an iconv descriptor causes about 16 KB of + allocation. */ + scm_gc_register_allocation (16 * 1024); - byte = scm_get_byte_or_eof (port); - if (byte == EOF) - { - *codepoint = EOF; - return 0; + input_cd = iconv_open ("UTF-8", encoding); + if (input_cd == (iconv_t) -1) + goto invalid_encoding; } - buf[0] = (scm_t_uint8) byte; - *len = 1; - - if (buf[0] <= 0x7f) - /* 1-byte form. */ - *codepoint = buf[0]; - else if (buf[0] >= 0xc2 && buf[0] <= 0xdf) + if (writing) { - /* 2-byte form. */ - byte = scm_peek_byte_or_eof (port); - ASSERT_NOT_EOF (byte); + /* Assume opening an iconv descriptor causes about 16 KB of + allocation. */ + scm_gc_register_allocation (16 * 1024); - if (SCM_UNLIKELY ((byte & 0xc0) != 0x80)) - goto invalid_seq; + output_cd = iconv_open (encoding, "UTF-8"); + if (output_cd == (iconv_t) -1) + { + if (input_cd != (iconv_t) -1) + iconv_close (input_cd); + goto invalid_encoding; + } + } - CONSUME_PEEKED_BYTE (); - buf[1] = (scm_t_uint8) byte; - *len = 2; + id = scm_gc_malloc_pointerless (sizeof (*id), "iconv descriptors"); + id->input_cd = input_cd; + id->output_cd = output_cd; - *codepoint = ((scm_t_wchar) buf[0] & 0x1f) << 6UL - | (buf[1] & 0x3f); - } - else if ((buf[0] & 0xf0) == 0xe0) - { - /* 3-byte form. */ - byte = scm_peek_byte_or_eof (port); - ASSERT_NOT_EOF (byte); + /* Register a finalizer to close the descriptors. */ + scm_i_set_finalizer (id, finalize_iconv_descriptors, NULL); - if (SCM_UNLIKELY ((byte & 0xc0) != 0x80 - || (buf[0] == 0xe0 && byte < 0xa0) - || (buf[0] == 0xed && byte > 0x9f))) - goto invalid_seq; + return id; - CONSUME_PEEKED_BYTE (); - buf[1] = (scm_t_uint8) byte; - *len = 2; + invalid_encoding: + { + SCM err; + err = scm_from_latin1_string (encoding); + scm_misc_error ("open_iconv_descriptors", + "invalid or unknown character encoding ~s", + scm_list_1 (err)); + } +} - byte = scm_peek_byte_or_eof (port); - ASSERT_NOT_EOF (byte); +static void +close_iconv_descriptors (scm_t_iconv_descriptors *id) +{ + if (id->input_cd != (iconv_t) -1) + iconv_close (id->input_cd); + if (id->output_cd != (iconv_t) -1) + iconv_close (id->output_cd); + id->input_cd = (void *) -1; + id->output_cd = (void *) -1; +} - if (SCM_UNLIKELY ((byte & 0xc0) != 0x80)) - goto invalid_seq; +scm_t_iconv_descriptors * +scm_i_port_iconv_descriptors (SCM port, scm_t_port_rw_active mode) +{ + scm_t_port_internal *pti = SCM_PORT_GET_INTERNAL (port); - CONSUME_PEEKED_BYTE (); - buf[2] = (scm_t_uint8) byte; - *len = 3; + assert (pti->encoding_mode == SCM_PORT_ENCODING_MODE_ICONV); - *codepoint = ((scm_t_wchar) buf[0] & 0x0f) << 12UL - | ((scm_t_wchar) buf[1] & 0x3f) << 6UL - | (buf[2] & 0x3f); - } - else if (buf[0] >= 0xf0 && buf[0] <= 0xf4) + if (!pti->iconv_descriptors) { - /* 4-byte form. */ - byte = scm_peek_byte_or_eof (port); - ASSERT_NOT_EOF (byte); - - if (SCM_UNLIKELY (((byte & 0xc0) != 0x80) - || (buf[0] == 0xf0 && byte < 0x90) - || (buf[0] == 0xf4 && byte > 0x8f))) - goto invalid_seq; + scm_t_port *pt = SCM_PTAB_ENTRY (port); + const char *precise_encoding; - CONSUME_PEEKED_BYTE (); - buf[1] = (scm_t_uint8) byte; - *len = 2; + if (!pt->encoding) + pt->encoding = "ISO-8859-1"; - byte = scm_peek_byte_or_eof (port); - ASSERT_NOT_EOF (byte); + /* If the specified encoding is UTF-16 or UTF-32, then make + that more precise by deciding what byte order to use. */ + if (strcmp (pt->encoding, "UTF-16") == 0) + precise_encoding = decide_utf16_encoding (port, mode); + else if (strcmp (pt->encoding, "UTF-32") == 0) + precise_encoding = decide_utf32_encoding (port, mode); + else + precise_encoding = pt->encoding; - if (SCM_UNLIKELY ((byte & 0xc0) != 0x80)) - goto invalid_seq; + pti->iconv_descriptors = + open_iconv_descriptors (precise_encoding, + SCM_INPUT_PORT_P (port), + SCM_OUTPUT_PORT_P (port)); + } - CONSUME_PEEKED_BYTE (); - buf[2] = (scm_t_uint8) byte; - *len = 3; + return pti->iconv_descriptors; +} - byte = scm_peek_byte_or_eof (port); - ASSERT_NOT_EOF (byte); +/* The name of the encoding is itself encoded in ASCII. */ +void +scm_i_set_port_encoding_x (SCM port, const char *encoding) +{ + scm_t_port *pt; + scm_t_port_internal *pti; + scm_t_iconv_descriptors *prev; - if (SCM_UNLIKELY ((byte & 0xc0) != 0x80)) - goto invalid_seq; + /* Set the character encoding for this port. */ + pt = SCM_PTAB_ENTRY (port); + pti = SCM_PORT_GET_INTERNAL (port); + prev = pti->iconv_descriptors; - CONSUME_PEEKED_BYTE (); - buf[3] = (scm_t_uint8) byte; - *len = 4; + /* In order to handle cases where the encoding changes mid-stream + (e.g. within an HTTP stream, or within a file that is composed of + segments with different encodings), we consider this to be "stream + start" for purposes of BOM handling, regardless of our actual file + position. */ + pti->at_stream_start_for_bom_read = 1; + pti->at_stream_start_for_bom_write = 1; - *codepoint = ((scm_t_wchar) buf[0] & 0x07) << 18UL - | ((scm_t_wchar) buf[1] & 0x3f) << 12UL - | ((scm_t_wchar) buf[2] & 0x3f) << 6UL - | (buf[3] & 0x3f); + if (encoding_matches (encoding, "UTF-8")) + { + pt->encoding = "UTF-8"; + pti->encoding_mode = SCM_PORT_ENCODING_MODE_UTF8; + } + else if (encoding_matches (encoding, "ISO-8859-1")) + { + pt->encoding = "ISO-8859-1"; + pti->encoding_mode = SCM_PORT_ENCODING_MODE_LATIN1; } else - goto invalid_seq; - - return 0; + { + pt->encoding = canonicalize_encoding (encoding); + pti->encoding_mode = SCM_PORT_ENCODING_MODE_ICONV; + } - invalid_seq: - /* Here we could choose the consume the faulty byte when it's not a - valid starting byte, but it's not a requirement. What Section 3.9 - of Unicode 6.0.0 mandates, though, is to not consume a byte that - would otherwise be a valid starting byte. */ + pti->iconv_descriptors = NULL; + if (prev) + close_iconv_descriptors (prev); +} - return EILSEQ; +SCM_DEFINE (scm_port_encoding, "port-encoding", 1, 0, 0, + (SCM port), + "Returns, as a string, the character encoding that @var{port}\n" + "uses to interpret its input and output.\n") +#define FUNC_NAME s_scm_port_encoding +{ + SCM_VALIDATE_PORT (1, port); -#undef CONSUME_PEEKED_BYTE -#undef ASSERT_NOT_EOF + return scm_from_latin1_string (SCM_PTAB_ENTRY (port)->encoding); } +#undef FUNC_NAME -/* Likewise, read a byte sequence from PORT, passing it through its - input conversion descriptor. */ -static int -get_iconv_codepoint (SCM port, scm_t_wchar *codepoint, - char buf[SCM_MBCHAR_BUF_SIZE], size_t *len) +SCM_DEFINE (scm_set_port_encoding_x, "set-port-encoding!", 2, 0, 0, + (SCM port, SCM enc), + "Sets the character encoding that will be used to interpret all\n" + "port I/O. New ports are created with the encoding\n" + "appropriate for the current locale if @code{setlocale} has \n" + "been called or ISO-8859-1 otherwise\n" + "and this procedure can be used to modify that encoding.\n") +#define FUNC_NAME s_scm_set_port_encoding_x { - scm_t_iconv_descriptors *id; - scm_t_uint8 utf8_buf[SCM_MBCHAR_BUF_SIZE]; - size_t input_size = 0; + char *enc_str; - id = scm_i_port_iconv_descriptors (port, SCM_PORT_READ); + SCM_VALIDATE_PORT (1, port); + SCM_VALIDATE_STRING (2, enc); - for (;;) - { - int byte_read; - char *input, *output; - size_t input_left, output_left, done; + enc_str = scm_to_latin1_string (enc); + scm_i_set_port_encoding_x (port, enc_str); + free (enc_str); - byte_read = scm_get_byte_or_eof (port); - if (SCM_UNLIKELY (byte_read == EOF)) - { - if (SCM_LIKELY (input_size == 0)) - { - *codepoint = (scm_t_wchar) EOF; - *len = input_size; - return 0; - } - else - { - /* EOF found in the middle of a multibyte character. */ - scm_i_set_pending_eof (port); - return EILSEQ; - } - } + return SCM_UNSPECIFIED; +} +#undef FUNC_NAME - buf[input_size++] = byte_read; +SCM_DEFINE (scm_port_conversion_strategy, "port-conversion-strategy", + 1, 0, 0, (SCM port), + "Returns the behavior of the port when handling a character that\n" + "is not representable in the port's current encoding.\n" + "It returns the symbol @code{error} if unrepresentable characters\n" + "should cause exceptions, @code{substitute} if the port should\n" + "try to replace unrepresentable characters with question marks or\n" + "approximate characters, or @code{escape} if unrepresentable\n" + "characters should be converted to string escapes.\n" + "\n" + "If @var{port} is @code{#f}, then the current default behavior\n" + "will be returned. New ports will have this default behavior\n" + "when they are created.\n") +#define FUNC_NAME s_scm_port_conversion_strategy +{ + scm_t_string_failed_conversion_handler h; - input = buf; - input_left = input_size; - output = (char *) utf8_buf; - output_left = sizeof (utf8_buf); + SCM_VALIDATE_OPPORT (1, port); - done = iconv (id->input_cd, &input, &input_left, &output, &output_left); + if (scm_is_false (port)) + h = scm_i_default_port_conversion_handler (); + else + { + scm_t_port *pt; - if (done == (size_t) -1) - { - int err = errno; - if (SCM_LIKELY (err == EINVAL)) - /* The input byte sequence did not form a complete - character. Read another byte and try again. */ - continue; - else - return err; - } - else - { - size_t output_size = sizeof (utf8_buf) - output_left; - if (SCM_LIKELY (output_size > 0)) - { - /* iconv generated output. Convert the UTF8_BUF sequence - to a Unicode code point. */ - *codepoint = utf8_to_codepoint (utf8_buf, output_size); - *len = input_size; - return 0; - } - else - { - /* iconv consumed some bytes without producing any output. - Most likely this means that a Unicode byte-order mark - (BOM) was consumed, which should not be included in the - returned buf. Shift any remaining bytes to the beginning - of buf, and continue the loop. */ - memmove (buf, input, input_left); - input_size = input_left; - continue; - } - } + SCM_VALIDATE_OPPORT (1, port); + pt = SCM_PTAB_ENTRY (port); + + h = pt->ilseq_handler; } + + if (h == SCM_FAILED_CONVERSION_ERROR) + return scm_from_latin1_symbol ("error"); + else if (h == SCM_FAILED_CONVERSION_QUESTION_MARK) + return scm_from_latin1_symbol ("substitute"); + else if (h == SCM_FAILED_CONVERSION_ESCAPE_SEQUENCE) + return scm_from_latin1_symbol ("escape"); + else + abort (); + + /* Never gets here. */ + return SCM_UNDEFINED; } +#undef FUNC_NAME -/* Read a codepoint from PORT and return it in *CODEPOINT. Fill BUF - with the byte representation of the codepoint in PORT's encoding, and - set *LEN to the length in bytes of that representation. Return 0 on - success and an errno value on error. */ -static int -get_codepoint (SCM port, scm_t_wchar *codepoint, - char buf[SCM_MBCHAR_BUF_SIZE], size_t *len) +SCM_DEFINE (scm_set_port_conversion_strategy_x, "set-port-conversion-strategy!", + 2, 0, 0, + (SCM port, SCM sym), + "Sets the behavior of the interpreter when outputting a character\n" + "that is not representable in the port's current encoding.\n" + "@var{sym} can be either @code{'error}, @code{'substitute}, or\n" + "@code{'escape}. If it is @code{'error}, an error will be thrown\n" + "when an unconvertible character is encountered. If it is\n" + "@code{'substitute}, then unconvertible characters will \n" + "be replaced with approximate characters, or with question marks\n" + "if no approximately correct character is available.\n" + "If it is @code{'escape},\n" + "it will appear as a hex escape when output.\n" + "\n" + "If @var{port} is an open port, the conversion error behavior\n" + "is set for that port. If it is @code{#f}, it is set as the\n" + "default behavior for any future ports that get created in\n" + "this thread.\n") +#define FUNC_NAME s_scm_set_port_conversion_strategy_x { - int err; - scm_t_port *pt = SCM_PTAB_ENTRY (port); - scm_t_port_internal *pti = SCM_PORT_GET_INTERNAL (port); + scm_t_string_failed_conversion_handler handler; - if (pti->encoding_mode == SCM_PORT_ENCODING_MODE_UTF8) - err = get_utf8_codepoint (port, codepoint, (scm_t_uint8 *) buf, len); + if (scm_is_eq (sym, sym_error)) + handler = SCM_FAILED_CONVERSION_ERROR; + else if (scm_is_eq (sym, sym_substitute)) + handler = SCM_FAILED_CONVERSION_QUESTION_MARK; + else if (scm_is_eq (sym, sym_escape)) + handler = SCM_FAILED_CONVERSION_ESCAPE_SEQUENCE; else - err = get_iconv_codepoint (port, codepoint, buf, len); - - if (SCM_LIKELY (err == 0)) - { - if (SCM_UNLIKELY (pti->at_stream_start_for_bom_read)) - { - /* Record that we're no longer at stream start. */ - pti->at_stream_start_for_bom_read = 0; - if (pt->rw_random) - pti->at_stream_start_for_bom_write = 0; + SCM_MISC_ERROR ("unknown conversion strategy ~s", scm_list_1 (sym)); - /* If we just read a BOM in an encoding that recognizes them, - then silently consume it and read another code point. */ - if (SCM_UNLIKELY - (*codepoint == SCM_UNICODE_BOM - && (pti->encoding_mode == SCM_PORT_ENCODING_MODE_UTF8 - || c_strcasecmp (pt->encoding, "UTF-16") == 0 - || c_strcasecmp (pt->encoding, "UTF-32") == 0))) - return get_codepoint (port, codepoint, buf, len); - } - update_port_lf (*codepoint, port); - } - else if (pt->ilseq_handler == SCM_ICONVEH_QUESTION_MARK) + if (scm_is_false (port)) + scm_i_set_default_port_conversion_handler (handler); + else { - *codepoint = '?'; - err = 0; - update_port_lf (*codepoint, port); + SCM_VALIDATE_OPPORT (1, port); + SCM_PTAB_ENTRY (port)->ilseq_handler = handler; } - return err; + return SCM_UNSPECIFIED; } +#undef FUNC_NAME -/* Read a codepoint from PORT and return it. */ -scm_t_wchar -scm_getc (SCM port) -#define FUNC_NAME "scm_getc" -{ - int err; - size_t len; - scm_t_wchar codepoint; - char buf[SCM_MBCHAR_BUF_SIZE]; - err = get_codepoint (port, &codepoint, buf, &len); - if (SCM_UNLIKELY (err != 0)) - /* At this point PORT should point past the invalid encoding, as per - R6RS-lib Section 8.2.4. */ - scm_decoding_error (FUNC_NAME, err, "input decoding error", port); + - return codepoint; -} -#undef FUNC_NAME +/* The port lock. */ -/* this should only be called when the read buffer is empty. it - tries to refill the read buffer. it returns the first char from - the port, which is either EOF or *(pt->read_pos). */ -static int -scm_i_fill_input (SCM port) +static void +lock_port (void *mutex) { - scm_t_port *pt = SCM_PTAB_ENTRY (port); - scm_t_port_internal *pti = SCM_PORT_GET_INTERNAL (port); - - assert (pt->read_pos == pt->read_end); + scm_i_pthread_mutex_lock (mutex); +} - if (pti->pending_eof) - { - pti->pending_eof = 0; - return EOF; - } +static void +unlock_port (void *mutex) +{ + scm_i_pthread_mutex_unlock (mutex); +} - if (pt->read_buf == pt->putback_buf) +void +scm_dynwind_lock_port (SCM port) +#define FUNC_NAME "dynwind-lock-port" +{ + scm_i_pthread_mutex_t *lock; + SCM_VALIDATE_OPPORT (SCM_ARG1, port); + scm_c_lock_port (port, &lock); + if (lock) { - /* finished reading put-back chars. */ - pt->read_buf = pt->saved_read_buf; - pt->read_pos = pt->saved_read_pos; - pt->read_end = pt->saved_read_end; - pt->read_buf_size = pt->saved_read_buf_size; - if (pt->read_pos < pt->read_end) - return *(pt->read_pos); + scm_dynwind_unwind_handler (unlock_port, lock, SCM_F_WIND_EXPLICITLY); + scm_dynwind_rewind_handler (lock_port, lock, 0); } - return scm_ptobs[SCM_PTOBNUM (port)].fill_input (port); } +#undef FUNC_NAME -int -scm_fill_input (SCM port) -{ - return scm_i_fill_input (port); -} -/* Slow-path fallback for 'scm_get_byte_or_eof' in inline.h */ -int -scm_slow_get_byte_or_eof (SCM port) -{ - scm_t_port *pt = SCM_PTAB_ENTRY (port); + - if (pt->rw_active == SCM_PORT_WRITE) - scm_flush (port); +/* Input. */ - if (pt->rw_random) - pt->rw_active = SCM_PORT_READ; +int +scm_get_byte_or_eof (SCM port) +{ + scm_i_pthread_mutex_t *lock; + int ret; - if (pt->read_pos >= pt->read_end) - { - if (SCM_UNLIKELY (scm_i_fill_input (port) == EOF)) - return EOF; - } + scm_c_lock_port (port, &lock); + ret = scm_get_byte_or_eof_unlocked (port); + if (lock) + scm_i_pthread_mutex_unlock (lock); - return *pt->read_pos++; + return ret; } -/* Slow-path fallback for 'scm_peek_byte_or_eof' in inline.h */ int -scm_slow_peek_byte_or_eof (SCM port) +scm_peek_byte_or_eof (SCM port) { - scm_t_port *pt = SCM_PTAB_ENTRY (port); - - if (pt->rw_active == SCM_PORT_WRITE) - scm_flush (port); - - if (pt->rw_random) - pt->rw_active = SCM_PORT_READ; + scm_i_pthread_mutex_t *lock; + int ret; - if (pt->read_pos >= pt->read_end) - { - if (SCM_UNLIKELY (scm_i_fill_input (port) == EOF)) - { - scm_i_set_pending_eof (port); - return EOF; - } - } + scm_c_lock_port (port, &lock); + ret = scm_peek_byte_or_eof_unlocked (port); + if (lock) + scm_i_pthread_mutex_unlock (lock); - return *pt->read_pos; + return ret; } - -/* scm_lfwrite +/* scm_c_read * - * This function differs from scm_c_write; it updates port line and - * column. */ - -void -scm_lfwrite (const char *ptr, size_t size, SCM port) -{ - scm_t_port *pt = SCM_PTAB_ENTRY (port); - scm_t_ptob_descriptor *ptob = &scm_ptobs[SCM_PTOBNUM (port)]; - - if (pt->rw_active == SCM_PORT_READ) - scm_end_input (port); - - ptob->write (port, ptr, size); - - for (; size; ptr++, size--) - update_port_lf ((scm_t_wchar) (unsigned char) *ptr, port); - - if (pt->rw_random) - pt->rw_active = SCM_PORT_WRITE; -} - -/* Write STR to PORT from START inclusive to END exclusive. */ -void -scm_lfwrite_substr (SCM str, size_t start, size_t end, SCM port) -{ - scm_t_port *pt = SCM_PTAB_ENTRY (port); - - if (pt->rw_active == SCM_PORT_READ) - scm_end_input (port); - - if (end == (size_t) -1) - end = scm_i_string_length (str); - - scm_i_display_substring (str, start, end, port); - - if (pt->rw_random) - pt->rw_active = SCM_PORT_WRITE; -} - -/* scm_c_read - * - * Used by an application to read arbitrary number of bytes from an - * SCM port. Same semantics as libc read, except that scm_c_read only - * returns less than SIZE bytes if at end-of-file. - * - * Warning: Doesn't update port line and column counts! */ + * Used by an application to read arbitrary number of bytes from an + * SCM port. Same semantics as libc read, except that scm_c_read only + * returns less than SIZE bytes if at end-of-file. + * + * Warning: Doesn't update port line and column counts! */ /* This structure, and the following swap_buffer function, are used for temporarily swapping a port's own read buffer, and the buffer @@ -1621,19 +1444,23 @@ swap_buffer (void *data) psb->size = old_size; } +static int scm_i_fill_input_unlocked (SCM port); + size_t -scm_c_read (SCM port, void *buffer, size_t size) +scm_c_read_unlocked (SCM port, void *buffer, size_t size) #define FUNC_NAME "scm_c_read" { scm_t_port *pt; + scm_t_port_internal *pti; size_t n_read = 0, n_available; struct port_and_swap_buffer psb; SCM_VALIDATE_OPINPORT (1, port); pt = SCM_PTAB_ENTRY (port); + pti = SCM_PORT_GET_INTERNAL (port); if (pt->rw_active == SCM_PORT_WRITE) - scm_ptobs[SCM_PTOBNUM (port)].flush (port); + SCM_PORT_DESCRIPTOR (port)->flush (port); if (pt->rw_random) pt->rw_active = SCM_PORT_READ; @@ -1653,23 +1480,23 @@ scm_c_read (SCM port, void *buffer, size_t size) if (size == 0) return n_read; - /* Now we will call scm_i_fill_input repeatedly until we have read the - requested number of bytes. (Note that a single scm_i_fill_input - call does not guarantee to fill the whole of the port's read - buffer.) */ - if (pt->read_buf_size <= 1 && pt->encoding == NULL) + /* Now we will call scm_i_fill_input_unlocked repeatedly until we have + read the requested number of bytes. (Note that a single + scm_i_fill_input_unlocked call does not guarantee to fill the whole + of the port's read buffer.) */ + if (pt->read_buf_size <= 1 + && pti->encoding_mode == SCM_PORT_ENCODING_MODE_LATIN1) { - /* The port that we are reading from is unbuffered - i.e. does - not have its own persistent buffer - but we have a buffer, - provided by our caller, that is the right size for the data - that is wanted. For the following scm_i_fill_input calls, - therefore, we use the buffer in hand as the port's read - buffer. - - We need to make sure that the port's normal (1 byte) buffer - is reinstated in case one of the scm_i_fill_input () calls - throws an exception; we use the scm_dynwind_* API to achieve - that. + /* The port that we are reading from is unbuffered - i.e. does not + have its own persistent buffer - but we have a buffer, provided + by our caller, that is the right size for the data that is + wanted. For the following scm_i_fill_input_unlocked calls, + therefore, we use the buffer in hand as the port's read buffer. + + We need to make sure that the port's normal (1 byte) buffer is + reinstated in case one of the scm_i_fill_input_unlocked () + calls throws an exception; we use the scm_dynwind_* API to + achieve that. A consequence of this optimization is that the fill_input functions can't unget characters. That'll push data to the @@ -1684,9 +1511,9 @@ scm_c_read (SCM port, void *buffer, size_t size) scm_dynwind_rewind_handler (swap_buffer, &psb, SCM_F_WIND_EXPLICITLY); scm_dynwind_unwind_handler (swap_buffer, &psb, SCM_F_WIND_EXPLICITLY); - /* Call scm_i_fill_input until we have all the bytes that we need, - or we hit EOF. */ - while (pt->read_buf_size && (scm_i_fill_input (port) != EOF)) + /* Call scm_i_fill_input_unlocked until we have all the bytes that + we need, or we hit EOF. */ + while (pt->read_buf_size && (scm_i_fill_input_unlocked (port) != EOF)) { pt->read_buf_size -= (pt->read_end - pt->read_pos); pt->read_pos = pt->read_buf = pt->read_end; @@ -1710,7 +1537,7 @@ scm_c_read (SCM port, void *buffer, size_t size) that a custom port implementation's entry points (in particular, fill_input) can rely on the buffer always being the same as they first set up. */ - while (size && (scm_i_fill_input (port) != EOF)) + while (size && (scm_i_fill_input_unlocked (port) != EOF)) { n_available = min (size, pt->read_end - pt->read_pos); memcpy (buffer, pt->read_pos, n_available); @@ -1725,187 +1552,565 @@ scm_c_read (SCM port, void *buffer, size_t size) } #undef FUNC_NAME -/* scm_c_write - * - * Used by an application to write arbitrary number of bytes to an SCM - * port. Similar semantics as libc write. However, unlike libc - * write, scm_c_write writes the requested number of bytes and has no - * return value. - * - * Warning: Doesn't update port line and column counts! - */ - -void -scm_c_write (SCM port, const void *ptr, size_t size) -#define FUNC_NAME "scm_c_write" +size_t +scm_c_read (SCM port, void *buffer, size_t size) { - scm_t_port *pt; - scm_t_ptob_descriptor *ptob; - - SCM_VALIDATE_OPOUTPORT (1, port); - - pt = SCM_PTAB_ENTRY (port); - ptob = &scm_ptobs[SCM_PTOBNUM (port)]; - - if (pt->rw_active == SCM_PORT_READ) - scm_end_input (port); + scm_i_pthread_mutex_t *lock; + size_t ret; - ptob->write (port, ptr, size); + scm_c_lock_port (port, &lock); + ret = scm_c_read_unlocked (port, buffer, size); + if (lock) + scm_i_pthread_mutex_unlock (lock); + - if (pt->rw_random) - pt->rw_active = SCM_PORT_WRITE; + return ret; } -#undef FUNC_NAME -void -scm_flush (SCM port) +/* Update the line and column number of PORT after consumption of C. */ +static inline void +update_port_lf (scm_t_wchar c, SCM port) { - long i = SCM_PTOBNUM (port); - assert (i >= 0); - (scm_ptobs[i].flush) (port); + switch (c) + { + case '\a': + case EOF: + break; + case '\b': + SCM_DECCOL (port); + break; + case '\n': + SCM_INCLINE (port); + break; + case '\r': + SCM_ZEROCOL (port); + break; + case '\t': + SCM_TABCOL (port); + break; + default: + SCM_INCCOL (port); + break; + } } -void -scm_end_input (SCM port) +#define SCM_MBCHAR_BUF_SIZE (4) + +/* Convert the SIZE-byte UTF-8 sequence in UTF8_BUF to a codepoint. + UTF8_BUF is assumed to contain a valid UTF-8 sequence. */ +static scm_t_wchar +utf8_to_codepoint (const scm_t_uint8 *utf8_buf, size_t size) { - long offset; - scm_t_port *pt = SCM_PTAB_ENTRY (port); + scm_t_wchar codepoint; - scm_i_clear_pending_eof (port); - if (pt->read_buf == pt->putback_buf) + if (utf8_buf[0] <= 0x7f) { - offset = pt->read_end - pt->read_pos; - pt->read_buf = pt->saved_read_buf; - pt->read_pos = pt->saved_read_pos; - pt->read_end = pt->saved_read_end; - pt->read_buf_size = pt->saved_read_buf_size; + assert (size == 1); + codepoint = utf8_buf[0]; + } + else if ((utf8_buf[0] & 0xe0) == 0xc0) + { + assert (size == 2); + codepoint = ((scm_t_wchar) utf8_buf[0] & 0x1f) << 6UL + | (utf8_buf[1] & 0x3f); + } + else if ((utf8_buf[0] & 0xf0) == 0xe0) + { + assert (size == 3); + codepoint = ((scm_t_wchar) utf8_buf[0] & 0x0f) << 12UL + | ((scm_t_wchar) utf8_buf[1] & 0x3f) << 6UL + | (utf8_buf[2] & 0x3f); } else - offset = 0; + { + assert (size == 4); + codepoint = ((scm_t_wchar) utf8_buf[0] & 0x07) << 18UL + | ((scm_t_wchar) utf8_buf[1] & 0x3f) << 12UL + | ((scm_t_wchar) utf8_buf[2] & 0x3f) << 6UL + | (utf8_buf[3] & 0x3f); + } - scm_ptobs[SCM_PTOBNUM (port)].end_input (port, offset); + return codepoint; } - - - -static void -scm_i_unget_bytes (const unsigned char *buf, size_t len, SCM port) -#define FUNC_NAME "scm_unget_bytes" +/* Read a UTF-8 sequence from PORT. On success, return 0 and set + *CODEPOINT to the codepoint that was read, fill BUF with its UTF-8 + representation, and set *LEN to the length in bytes. Return + `EILSEQ' on error. */ +static int +get_utf8_codepoint (SCM port, scm_t_wchar *codepoint, + scm_t_uint8 buf[SCM_MBCHAR_BUF_SIZE], size_t *len) { - scm_t_port *pt = SCM_PTAB_ENTRY (port); - size_t old_len, new_len; - - scm_i_clear_pending_eof (port); +#define ASSERT_NOT_EOF(b) \ + if (SCM_UNLIKELY ((b) == EOF)) \ + goto invalid_seq +#define CONSUME_PEEKED_BYTE() \ + pt->read_pos++ - if (pt->read_buf != pt->putback_buf) - /* switch to the put-back buffer. */ - { - if (pt->putback_buf == NULL) - { - pt->putback_buf_size = (len > SCM_INITIAL_PUTBACK_BUF_SIZE - ? len : SCM_INITIAL_PUTBACK_BUF_SIZE); - pt->putback_buf - = (unsigned char *) scm_gc_malloc_pointerless - (pt->putback_buf_size, "putback buffer"); - } + int byte; + scm_t_port *pt; - pt->saved_read_buf = pt->read_buf; - pt->saved_read_pos = pt->read_pos; - pt->saved_read_end = pt->read_end; - pt->saved_read_buf_size = pt->read_buf_size; + *len = 0; + pt = SCM_PTAB_ENTRY (port); - /* Put read_pos at the end of the buffer, so that ungets will not - have to shift the buffer contents each time. */ - pt->read_buf = pt->putback_buf; - pt->read_pos = pt->read_end = pt->putback_buf + pt->putback_buf_size; - pt->read_buf_size = pt->putback_buf_size; + byte = scm_get_byte_or_eof_unlocked (port); + if (byte == EOF) + { + *codepoint = EOF; + return 0; } - old_len = pt->read_end - pt->read_pos; - new_len = old_len + len; + buf[0] = (scm_t_uint8) byte; + *len = 1; - if (new_len > pt->read_buf_size) - /* The putback buffer needs to be enlarged. */ + if (buf[0] <= 0x7f) + /* 1-byte form. */ + *codepoint = buf[0]; + else if (buf[0] >= 0xc2 && buf[0] <= 0xdf) { - size_t new_buf_size; - unsigned char *new_buf, *new_end, *new_pos; - - new_buf_size = pt->read_buf_size * 2; - if (new_buf_size < new_len) - new_buf_size = new_len; + /* 2-byte form. */ + byte = scm_peek_byte_or_eof_unlocked (port); + ASSERT_NOT_EOF (byte); - new_buf = (unsigned char *) - scm_gc_malloc_pointerless (new_buf_size, "putback buffer"); + if (SCM_UNLIKELY ((byte & 0xc0) != 0x80)) + goto invalid_seq; - /* Put the bytes at the end of the buffer, so that future - ungets won't need to shift the buffer. */ - new_end = new_buf + new_buf_size; - new_pos = new_end - old_len; - memcpy (new_pos, pt->read_pos, old_len); + CONSUME_PEEKED_BYTE (); + buf[1] = (scm_t_uint8) byte; + *len = 2; - pt->read_buf = pt->putback_buf = new_buf; - pt->read_pos = new_pos; - pt->read_end = new_end; - pt->read_buf_size = pt->putback_buf_size = new_buf_size; + *codepoint = ((scm_t_wchar) buf[0] & 0x1f) << 6UL + | (buf[1] & 0x3f); } - else if (pt->read_buf + len < pt->read_pos) - /* If needed, shift the existing buffer contents up. - This should not happen unless some external code - manipulates the putback buffer pointers. */ + else if ((buf[0] & 0xf0) == 0xe0) { - unsigned char *new_end = pt->read_buf + pt->read_buf_size; - unsigned char *new_pos = new_end - old_len; + /* 3-byte form. */ + byte = scm_peek_byte_or_eof_unlocked (port); + ASSERT_NOT_EOF (byte); - memmove (new_pos, pt->read_pos, old_len); - pt->read_pos = new_pos; - pt->read_end = new_end; - } + if (SCM_UNLIKELY ((byte & 0xc0) != 0x80 + || (buf[0] == 0xe0 && byte < 0xa0) + || (buf[0] == 0xed && byte > 0x9f))) + goto invalid_seq; - /* Move read_pos back and copy the bytes there. */ - pt->read_pos -= len; - memcpy (pt->read_buf + (pt->read_pos - pt->read_buf), buf, len); + CONSUME_PEEKED_BYTE (); + buf[1] = (scm_t_uint8) byte; + *len = 2; - if (pt->rw_active == SCM_PORT_WRITE) - scm_flush (port); + byte = scm_peek_byte_or_eof_unlocked (port); + ASSERT_NOT_EOF (byte); - if (pt->rw_random) - pt->rw_active = SCM_PORT_READ; -} -#undef FUNC_NAME + if (SCM_UNLIKELY ((byte & 0xc0) != 0x80)) + goto invalid_seq; -void -scm_unget_bytes (const unsigned char *buf, size_t len, SCM port) + CONSUME_PEEKED_BYTE (); + buf[2] = (scm_t_uint8) byte; + *len = 3; + + *codepoint = ((scm_t_wchar) buf[0] & 0x0f) << 12UL + | ((scm_t_wchar) buf[1] & 0x3f) << 6UL + | (buf[2] & 0x3f); + } + else if (buf[0] >= 0xf0 && buf[0] <= 0xf4) + { + /* 4-byte form. */ + byte = scm_peek_byte_or_eof_unlocked (port); + ASSERT_NOT_EOF (byte); + + if (SCM_UNLIKELY (((byte & 0xc0) != 0x80) + || (buf[0] == 0xf0 && byte < 0x90) + || (buf[0] == 0xf4 && byte > 0x8f))) + goto invalid_seq; + + CONSUME_PEEKED_BYTE (); + buf[1] = (scm_t_uint8) byte; + *len = 2; + + byte = scm_peek_byte_or_eof_unlocked (port); + ASSERT_NOT_EOF (byte); + + if (SCM_UNLIKELY ((byte & 0xc0) != 0x80)) + goto invalid_seq; + + CONSUME_PEEKED_BYTE (); + buf[2] = (scm_t_uint8) byte; + *len = 3; + + byte = scm_peek_byte_or_eof_unlocked (port); + ASSERT_NOT_EOF (byte); + + if (SCM_UNLIKELY ((byte & 0xc0) != 0x80)) + goto invalid_seq; + + CONSUME_PEEKED_BYTE (); + buf[3] = (scm_t_uint8) byte; + *len = 4; + + *codepoint = ((scm_t_wchar) buf[0] & 0x07) << 18UL + | ((scm_t_wchar) buf[1] & 0x3f) << 12UL + | ((scm_t_wchar) buf[2] & 0x3f) << 6UL + | (buf[3] & 0x3f); + } + else + goto invalid_seq; + + return 0; + + invalid_seq: + /* Here we could choose the consume the faulty byte when it's not a + valid starting byte, but it's not a requirement. What Section 3.9 + of Unicode 6.0.0 mandates, though, is to not consume a byte that + would otherwise be a valid starting byte. */ + + return EILSEQ; + +#undef CONSUME_PEEKED_BYTE +#undef ASSERT_NOT_EOF +} + +/* Read an ISO-8859-1 codepoint (a byte) from PORT. On success, return + 0 and set *CODEPOINT to the codepoint that was read, fill BUF with + its UTF-8 representation, and set *LEN to the length in bytes. + Return `EILSEQ' on error. */ +static int +get_latin1_codepoint (SCM port, scm_t_wchar *codepoint, + char buf[SCM_MBCHAR_BUF_SIZE], size_t *len) +{ + *codepoint = scm_get_byte_or_eof_unlocked (port); + + if (*codepoint == EOF) + *len = 0; + else + { + *len = 1; + buf[0] = *codepoint; + } + return 0; +} + +/* Likewise, read a byte sequence from PORT, passing it through its + input conversion descriptor. */ +static int +get_iconv_codepoint (SCM port, scm_t_wchar *codepoint, + char buf[SCM_MBCHAR_BUF_SIZE], size_t *len) +{ + scm_t_iconv_descriptors *id; + scm_t_uint8 utf8_buf[SCM_MBCHAR_BUF_SIZE]; + size_t input_size = 0; + + id = scm_i_port_iconv_descriptors (port, SCM_PORT_READ); + + for (;;) + { + int byte_read; + char *input, *output; + size_t input_left, output_left, done; + + byte_read = scm_get_byte_or_eof_unlocked (port); + if (SCM_UNLIKELY (byte_read == EOF)) + { + if (SCM_LIKELY (input_size == 0)) + { + *codepoint = (scm_t_wchar) EOF; + *len = input_size; + return 0; + } + else + { + /* EOF found in the middle of a multibyte character. */ + scm_i_set_pending_eof (port); + return EILSEQ; + } + } + + buf[input_size++] = byte_read; + + input = buf; + input_left = input_size; + output = (char *) utf8_buf; + output_left = sizeof (utf8_buf); + + done = iconv (id->input_cd, &input, &input_left, &output, &output_left); + + if (done == (size_t) -1) + { + int err = errno; + if (SCM_LIKELY (err == EINVAL)) + /* The input byte sequence did not form a complete + character. Read another byte and try again. */ + continue; + else + return err; + } + else + { + size_t output_size = sizeof (utf8_buf) - output_left; + if (SCM_LIKELY (output_size > 0)) + { + /* iconv generated output. Convert the UTF8_BUF sequence + to a Unicode code point. */ + *codepoint = utf8_to_codepoint (utf8_buf, output_size); + *len = input_size; + return 0; + } + else + { + /* iconv consumed some bytes without producing any output. + Most likely this means that a Unicode byte-order mark + (BOM) was consumed, which should not be included in the + returned buf. Shift any remaining bytes to the beginning + of buf, and continue the loop. */ + memmove (buf, input, input_left); + input_size = input_left; + continue; + } + } + } +} + +/* Read a codepoint from PORT and return it in *CODEPOINT. Fill BUF + with the byte representation of the codepoint in PORT's encoding, and + set *LEN to the length in bytes of that representation. Return 0 on + success and an errno value on error. */ +static SCM_C_INLINE int +get_codepoint (SCM port, scm_t_wchar *codepoint, + char buf[SCM_MBCHAR_BUF_SIZE], size_t *len) +{ + int err; + scm_t_port *pt = SCM_PTAB_ENTRY (port); + scm_t_port_internal *pti = SCM_PORT_GET_INTERNAL (port); + + if (pti->encoding_mode == SCM_PORT_ENCODING_MODE_UTF8) + err = get_utf8_codepoint (port, codepoint, (scm_t_uint8 *) buf, len); + else if (pti->encoding_mode == SCM_PORT_ENCODING_MODE_LATIN1) + err = get_latin1_codepoint (port, codepoint, buf, len); + else + err = get_iconv_codepoint (port, codepoint, buf, len); + + if (SCM_LIKELY (err == 0)) + { + if (SCM_UNLIKELY (pti->at_stream_start_for_bom_read)) + { + /* Record that we're no longer at stream start. */ + pti->at_stream_start_for_bom_read = 0; + if (pt->rw_random) + pti->at_stream_start_for_bom_write = 0; + + /* If we just read a BOM in an encoding that recognizes them, + then silently consume it and read another code point. */ + if (SCM_UNLIKELY + (*codepoint == SCM_UNICODE_BOM + && (pti->encoding_mode == SCM_PORT_ENCODING_MODE_UTF8 + || strcmp (pt->encoding, "UTF-16") == 0 + || strcmp (pt->encoding, "UTF-32") == 0))) + return get_codepoint (port, codepoint, buf, len); + } + update_port_lf (*codepoint, port); + } + else if (pt->ilseq_handler == SCM_ICONVEH_QUESTION_MARK) + { + *codepoint = '?'; + err = 0; + update_port_lf (*codepoint, port); + } + + return err; +} + +/* Read a codepoint from PORT and return it. */ +scm_t_wchar +scm_getc_unlocked (SCM port) +#define FUNC_NAME "scm_getc" +{ + int err; + size_t len; + scm_t_wchar codepoint; + char buf[SCM_MBCHAR_BUF_SIZE]; + + err = get_codepoint (port, &codepoint, buf, &len); + if (SCM_UNLIKELY (err != 0)) + /* At this point PORT should point past the invalid encoding, as per + R6RS-lib Section 8.2.4. */ + scm_decoding_error (FUNC_NAME, err, "input decoding error", port); + + return codepoint; +} +#undef FUNC_NAME + +scm_t_wchar +scm_getc (SCM port) +{ + scm_i_pthread_mutex_t *lock; + scm_t_wchar ret; + + scm_c_lock_port (port, &lock); + ret = scm_getc_unlocked (port); + if (lock) + scm_i_pthread_mutex_unlock (lock); + + + return ret; +} + +SCM_DEFINE (scm_read_char, "read-char", 0, 1, 0, + (SCM port), + "Return the next character available from @var{port}, updating\n" + "@var{port} to point to the following character. If no more\n" + "characters are available, the end-of-file object is returned.\n" + "\n" + "When @var{port}'s data cannot be decoded according to its\n" + "character encoding, a @code{decoding-error} is raised and\n" + "@var{port} points past the erroneous byte sequence.\n") +#define FUNC_NAME s_scm_read_char +{ + scm_t_wchar c; + if (SCM_UNBNDP (port)) + port = scm_current_input_port (); + SCM_VALIDATE_OPINPORT (1, port); + c = scm_getc_unlocked (port); + if (EOF == c) + return SCM_EOF_VAL; + return SCM_MAKE_CHAR (c); +} +#undef FUNC_NAME + + + + +/* Pushback. */ + + + +static void +scm_i_unget_bytes_unlocked (const unsigned char *buf, size_t len, SCM port) +#define FUNC_NAME "scm_unget_bytes" +{ + scm_t_port *pt = SCM_PTAB_ENTRY (port); + size_t old_len, new_len; + + scm_i_clear_pending_eof (port); + + if (pt->read_buf != pt->putback_buf) + /* switch to the put-back buffer. */ + { + if (pt->putback_buf == NULL) + { + pt->putback_buf_size = (len > SCM_INITIAL_PUTBACK_BUF_SIZE + ? len : SCM_INITIAL_PUTBACK_BUF_SIZE); + pt->putback_buf + = (unsigned char *) scm_gc_malloc_pointerless + (pt->putback_buf_size, "putback buffer"); + } + + pt->saved_read_buf = pt->read_buf; + pt->saved_read_pos = pt->read_pos; + pt->saved_read_end = pt->read_end; + pt->saved_read_buf_size = pt->read_buf_size; + + /* Put read_pos at the end of the buffer, so that ungets will not + have to shift the buffer contents each time. */ + pt->read_buf = pt->putback_buf; + pt->read_pos = pt->read_end = pt->putback_buf + pt->putback_buf_size; + pt->read_buf_size = pt->putback_buf_size; + } + + old_len = pt->read_end - pt->read_pos; + new_len = old_len + len; + + if (new_len > pt->read_buf_size) + /* The putback buffer needs to be enlarged. */ + { + size_t new_buf_size; + unsigned char *new_buf, *new_end, *new_pos; + + new_buf_size = pt->read_buf_size * 2; + if (new_buf_size < new_len) + new_buf_size = new_len; + + new_buf = (unsigned char *) + scm_gc_malloc_pointerless (new_buf_size, "putback buffer"); + + /* Put the bytes at the end of the buffer, so that future + ungets won't need to shift the buffer. */ + new_end = new_buf + new_buf_size; + new_pos = new_end - old_len; + memcpy (new_pos, pt->read_pos, old_len); + + pt->read_buf = pt->putback_buf = new_buf; + pt->read_pos = new_pos; + pt->read_end = new_end; + pt->read_buf_size = pt->putback_buf_size = new_buf_size; + } + else if (pt->read_buf + len < pt->read_pos) + /* If needed, shift the existing buffer contents up. + This should not happen unless some external code + manipulates the putback buffer pointers. */ + { + unsigned char *new_end = pt->read_buf + pt->read_buf_size; + unsigned char *new_pos = new_end - old_len; + + memmove (new_pos, pt->read_pos, old_len); + pt->read_pos = new_pos; + pt->read_end = new_end; + } + + /* Move read_pos back and copy the bytes there. */ + pt->read_pos -= len; + memcpy (pt->read_buf + (pt->read_pos - pt->read_buf), buf, len); + + if (pt->rw_active == SCM_PORT_WRITE) + scm_flush (port); + + if (pt->rw_random) + pt->rw_active = SCM_PORT_READ; +} +#undef FUNC_NAME + +void +scm_unget_bytes_unlocked (const unsigned char *buf, size_t len, SCM port) { - scm_i_unget_bytes (buf, len, port); + scm_i_unget_bytes_unlocked (buf, len, port); } void -scm_unget_byte (int c, SCM port) +scm_unget_byte_unlocked (int c, SCM port) +{ + unsigned char byte = c; + scm_i_unget_bytes_unlocked (&byte, 1, port); +} + +void +scm_unget_bytes (const unsigned char *buf, size_t len, SCM port) { - unsigned char byte; + scm_i_pthread_mutex_t *lock; + scm_c_lock_port (port, &lock); + scm_i_unget_bytes_unlocked (buf, len, port); + if (lock) + scm_i_pthread_mutex_unlock (lock); +} - byte = c; - scm_i_unget_bytes (&byte, 1, port); +void +scm_unget_byte (int c, SCM port) +{ + unsigned char byte = c; + scm_i_pthread_mutex_t *lock; + scm_c_lock_port (port, &lock); + scm_i_unget_bytes_unlocked (&byte, 1, port); + if (lock) + scm_i_pthread_mutex_unlock (lock); } void -scm_ungetc (scm_t_wchar c, SCM port) +scm_ungetc_unlocked (scm_t_wchar c, SCM port) #define FUNC_NAME "scm_ungetc" { scm_t_port *pt = SCM_PTAB_ENTRY (port); char *result; char result_buf[10]; - const char *encoding; size_t len; - if (pt->encoding != NULL) - encoding = pt->encoding; - else - encoding = "ISO-8859-1"; - len = sizeof (result_buf); - result = u32_conv_to_encoding (encoding, + result = u32_conv_to_encoding (pt->encoding, (enum iconv_ilseq_handler) pt->ilseq_handler, (uint32_t *) &c, 1, NULL, result_buf, &len); @@ -1915,7 +2120,7 @@ scm_ungetc (scm_t_wchar c, SCM port) "conversion to port encoding failed", SCM_BOOL_F, SCM_MAKE_CHAR (c)); - scm_i_unget_bytes ((unsigned char *) result, len, port); + scm_i_unget_bytes_unlocked ((unsigned char *) result, len, port); if (SCM_UNLIKELY (result != result_buf)) free (result); @@ -1932,9 +2137,19 @@ scm_ungetc (scm_t_wchar c, SCM port) } #undef FUNC_NAME +void +scm_ungetc (scm_t_wchar c, SCM port) +{ + scm_i_pthread_mutex_t *lock; + scm_c_lock_port (port, &lock); + scm_ungetc_unlocked (c, port); + if (lock) + scm_i_pthread_mutex_unlock (lock); + +} void -scm_ungets (const char *s, int n, SCM port) +scm_ungets_unlocked (const char *s, int n, SCM port) { /* This is simple minded and inefficient, but unreading strings is * probably not a common operation, and remember that line and @@ -1943,9 +2158,19 @@ scm_ungets (const char *s, int n, SCM port) * Please feel free to write an optimized version! */ while (n--) - scm_ungetc (s[n], port); + scm_ungetc_unlocked (s[n], port); } +void +scm_ungets (const char *s, int n, SCM port) +{ + scm_i_pthread_mutex_t *lock; + scm_c_lock_port (port, &lock); + scm_ungets_unlocked (s, n, port); + if (lock) + scm_i_pthread_mutex_unlock (lock); + +} SCM_DEFINE (scm_peek_char, "peek-char", 0, 1, 0, (SCM port), @@ -1977,810 +2202,784 @@ SCM_DEFINE (scm_peek_char, "peek-char", 0, 1, 0, long column, line; size_t len; - if (SCM_UNBNDP (port)) - port = scm_current_input_port (); - SCM_VALIDATE_OPINPORT (1, port); - - column = SCM_COL (port); - line = SCM_LINUM (port); - - err = get_codepoint (port, &c, bytes, &len); - - scm_i_unget_bytes ((unsigned char *) bytes, len, port); - - SCM_COL (port) = column; - SCM_LINUM (port) = line; - - if (SCM_UNLIKELY (err != 0)) - { - scm_decoding_error (FUNC_NAME, err, "input decoding error", port); - - /* Shouldn't happen since `catch' always aborts to prompt. */ - result = SCM_BOOL_F; - } - else if (c == EOF) - { - scm_i_set_pending_eof (port); - result = SCM_EOF_VAL; - } - else - result = SCM_MAKE_CHAR (c); - - return result; -} -#undef FUNC_NAME - -SCM_DEFINE (scm_unread_char, "unread-char", 1, 1, 0, - (SCM cobj, SCM port), - "Place character @var{cobj} in @var{port} so that it will be\n" - "read by the next read operation. If called multiple times, the\n" - "unread characters will be read again in last-in first-out\n" - "order. If @var{port} is not supplied, the current input port\n" - "is used.") -#define FUNC_NAME s_scm_unread_char -{ - int c; - - SCM_VALIDATE_CHAR (1, cobj); - if (SCM_UNBNDP (port)) - port = scm_current_input_port (); - SCM_VALIDATE_OPINPORT (2, port); - - c = SCM_CHAR (cobj); - - scm_ungetc (c, port); - return cobj; -} -#undef FUNC_NAME - -SCM_DEFINE (scm_unread_string, "unread-string", 2, 0, 0, - (SCM str, SCM port), - "Place the string @var{str} in @var{port} so that its characters will be\n" - "read in subsequent read operations. If called multiple times, the\n" - "unread characters will be read again in last-in first-out order. If\n" - "@var{port} is not supplied, the current-input-port is used.") -#define FUNC_NAME s_scm_unread_string -{ - int n; - SCM_VALIDATE_STRING (1, str); - if (SCM_UNBNDP (port)) - port = scm_current_input_port (); - SCM_VALIDATE_OPINPORT (2, port); - - n = scm_i_string_length (str); - - while (n--) - scm_ungetc (scm_i_string_ref (str, n), port); - - return str; -} -#undef FUNC_NAME - -SCM_DEFINE (scm_seek, "seek", 3, 0, 0, - (SCM fd_port, SCM offset, SCM whence), - "Sets the current position of @var{fd_port} to the integer\n" - "@var{offset}, which is interpreted according to the value of\n" - "@var{whence}.\n" - "\n" - "One of the following variables should be supplied for\n" - "@var{whence}:\n" - "@defvar SEEK_SET\n" - "Seek from the beginning of the file.\n" - "@end defvar\n" - "@defvar SEEK_CUR\n" - "Seek from the current position.\n" - "@end defvar\n" - "@defvar SEEK_END\n" - "Seek from the end of the file.\n" - "@end defvar\n" - "If @var{fd_port} is a file descriptor, the underlying system\n" - "call is @code{lseek}. @var{port} may be a string port.\n" - "\n" - "The value returned is the new position in the file. This means\n" - "that the current position of a port can be obtained using:\n" - "@lisp\n" - "(seek port 0 SEEK_CUR)\n" - "@end lisp") -#define FUNC_NAME s_scm_seek -{ - int how; - - fd_port = SCM_COERCE_OUTPORT (fd_port); - - how = scm_to_int (whence); - if (how != SEEK_SET && how != SEEK_CUR && how != SEEK_END) - SCM_OUT_OF_RANGE (3, whence); - - if (SCM_OPPORTP (fd_port)) - { - scm_t_port_internal *pti = SCM_PORT_GET_INTERNAL (fd_port); - scm_t_ptob_descriptor *ptob = scm_ptobs + SCM_PTOBNUM (fd_port); - off_t_or_off64_t off = scm_to_off_t_or_off64_t (offset); - off_t_or_off64_t rv; - - if (!ptob->seek) - SCM_MISC_ERROR ("port is not seekable", - scm_cons (fd_port, SCM_EOL)); - else - rv = ptob->seek (fd_port, off, how); - - /* Set stream-start flags according to new position. */ - pti->at_stream_start_for_bom_read = (rv == 0); - pti->at_stream_start_for_bom_write = (rv == 0); - - scm_i_clear_pending_eof (fd_port); - - return scm_from_off_t_or_off64_t (rv); - } - else /* file descriptor?. */ - { - off_t_or_off64_t off = scm_to_off_t_or_off64_t (offset); - off_t_or_off64_t rv; - rv = lseek_or_lseek64 (scm_to_int (fd_port), off, how); - if (rv == -1) - SCM_SYSERROR; - return scm_from_off_t_or_off64_t (rv); - } -} -#undef FUNC_NAME - -#ifndef O_BINARY -#define O_BINARY 0 -#endif - -/* Mingw has ftruncate(), perhaps implemented above using chsize, but - doesn't have the filename version truncate(), hence this code. */ -#if HAVE_FTRUNCATE && ! HAVE_TRUNCATE -static int -truncate (const char *file, off_t length) -{ - int ret, fdes; - - fdes = open (file, O_BINARY | O_WRONLY); - if (fdes == -1) - return -1; - - ret = ftruncate (fdes, length); - if (ret == -1) - { - int save_errno = errno; - close (fdes); - errno = save_errno; - return -1; - } - - return close (fdes); -} -#endif /* HAVE_FTRUNCATE && ! HAVE_TRUNCATE */ - -SCM_DEFINE (scm_truncate_file, "truncate-file", 1, 1, 0, - (SCM object, SCM length), - "Truncate file @var{object} to @var{length} bytes. @var{object}\n" - "can be a filename string, a port object, or an integer file\n" - "descriptor.\n" - "The return value is unspecified.\n" - "\n" - "For a port or file descriptor @var{length} can be omitted, in\n" - "which case the file is truncated at the current position (per\n" - "@code{ftell} above).\n" - "\n" - "On most systems a file can be extended by giving a length\n" - "greater than the current size, but this is not mandatory in the\n" - "POSIX standard.") -#define FUNC_NAME s_scm_truncate_file -{ - int rv; - - /* "object" can be a port, fdes or filename. - - Negative "length" makes no sense, but it's left to truncate() or - ftruncate() to give back an error for that (normally EINVAL). - */ - - if (SCM_UNBNDP (length)) - { - /* must supply length if object is a filename. */ - if (scm_is_string (object)) - SCM_MISC_ERROR("must supply length if OBJECT is a filename", SCM_EOL); - - length = scm_seek (object, SCM_INUM0, scm_from_int (SEEK_CUR)); - } - - object = SCM_COERCE_OUTPORT (object); - if (scm_is_integer (object)) - { - off_t_or_off64_t c_length = scm_to_off_t_or_off64_t (length); - SCM_SYSCALL (rv = ftruncate_or_ftruncate64 (scm_to_int (object), - c_length)); - } - else if (SCM_OPOUTPORTP (object)) - { - off_t_or_off64_t c_length = scm_to_off_t_or_off64_t (length); - scm_t_port *pt = SCM_PTAB_ENTRY (object); - scm_t_ptob_descriptor *ptob = scm_ptobs + SCM_PTOBNUM (object); + if (SCM_UNBNDP (port)) + port = scm_current_input_port (); + SCM_VALIDATE_OPINPORT (1, port); - if (!ptob->truncate) - SCM_MISC_ERROR ("port is not truncatable", SCM_EOL); + column = SCM_COL (port); + line = SCM_LINUM (port); - scm_i_clear_pending_eof (object); - if (pt->rw_active == SCM_PORT_READ) - scm_end_input (object); - else if (pt->rw_active == SCM_PORT_WRITE) - ptob->flush (object); + err = get_codepoint (port, &c, bytes, &len); - ptob->truncate (object, c_length); - rv = 0; + scm_i_unget_bytes_unlocked ((unsigned char *) bytes, len, port); + + SCM_COL (port) = column; + SCM_LINUM (port) = line; + + if (SCM_UNLIKELY (err != 0)) + { + scm_decoding_error (FUNC_NAME, err, "input decoding error", port); + + /* Shouldn't happen since `catch' always aborts to prompt. */ + result = SCM_BOOL_F; } - else + else if (c == EOF) { - off_t_or_off64_t c_length = scm_to_off_t_or_off64_t (length); - char *str = scm_to_locale_string (object); - int eno; - SCM_SYSCALL (rv = truncate_or_truncate64 (str, c_length)); - eno = errno; - free (str); - errno = eno; + scm_i_set_pending_eof (port); + result = SCM_EOF_VAL; } - if (rv == -1) - SCM_SYSERROR; - return SCM_UNSPECIFIED; + else + result = SCM_MAKE_CHAR (c); + + return result; } #undef FUNC_NAME -SCM_DEFINE (scm_port_line, "port-line", 1, 0, 0, - (SCM port), - "Return the current line number for @var{port}.\n" - "\n" - "The first line of a file is 0. But you might want to add 1\n" - "when printing line numbers, since starting from 1 is\n" - "traditional in error messages, and likely to be more natural to\n" - "non-programmers.") -#define FUNC_NAME s_scm_port_line +SCM_DEFINE (scm_unread_char, "unread-char", 1, 1, 0, + (SCM cobj, SCM port), + "Place character @var{cobj} in @var{port} so that it will be\n" + "read by the next read operation. If called multiple times, the\n" + "unread characters will be read again in last-in first-out\n" + "order. If @var{port} is not supplied, the current input port\n" + "is used.") +#define FUNC_NAME s_scm_unread_char { - port = SCM_COERCE_OUTPORT (port); - SCM_VALIDATE_OPENPORT (1, port); - return scm_from_long (SCM_LINUM (port)); + int c; + + SCM_VALIDATE_CHAR (1, cobj); + if (SCM_UNBNDP (port)) + port = scm_current_input_port (); + SCM_VALIDATE_OPINPORT (2, port); + + c = SCM_CHAR (cobj); + + scm_ungetc_unlocked (c, port); + return cobj; } #undef FUNC_NAME -SCM_DEFINE (scm_set_port_line_x, "set-port-line!", 2, 0, 0, - (SCM port, SCM line), - "Set the current line number for @var{port} to @var{line}. The\n" - "first line of a file is 0.") -#define FUNC_NAME s_scm_set_port_line_x +SCM_DEFINE (scm_unread_string, "unread-string", 2, 0, 0, + (SCM str, SCM port), + "Place the string @var{str} in @var{port} so that its characters will be\n" + "read in subsequent read operations. If called multiple times, the\n" + "unread characters will be read again in last-in first-out order. If\n" + "@var{port} is not supplied, the current-input-port is used.") +#define FUNC_NAME s_scm_unread_string { - port = SCM_COERCE_OUTPORT (port); - SCM_VALIDATE_OPENPORT (1, port); - SCM_PTAB_ENTRY (port)->line_number = scm_to_long (line); - return SCM_UNSPECIFIED; + int n; + SCM_VALIDATE_STRING (1, str); + if (SCM_UNBNDP (port)) + port = scm_current_input_port (); + SCM_VALIDATE_OPINPORT (2, port); + + n = scm_i_string_length (str); + + while (n--) + scm_ungetc_unlocked (scm_i_string_ref (str, n), port); + + return str; } #undef FUNC_NAME -SCM_DEFINE (scm_port_column, "port-column", 1, 0, 0, - (SCM port), - "Return the current column number of @var{port}.\n" - "If the number is\n" - "unknown, the result is #f. Otherwise, the result is a 0-origin integer\n" - "- i.e. the first character of the first line is line 0, column 0.\n" - "(However, when you display a file position, for example in an error\n" - "message, we recommend you add 1 to get 1-origin integers. This is\n" - "because lines and column numbers traditionally start with 1, and that is\n" - "what non-programmers will find most natural.)") -#define FUNC_NAME s_scm_port_column + + + +/* Manipulating the buffers. */ + +/* This routine does not take any locks, as it is usually called as part + of a port implementation. */ +void +scm_port_non_buffer (scm_t_port *pt) { - port = SCM_COERCE_OUTPORT (port); - SCM_VALIDATE_OPENPORT (1, port); - return scm_from_int (SCM_COL (port)); + pt->read_pos = pt->read_buf = pt->read_end = &pt->shortbuf; + pt->write_buf = pt->write_pos = &pt->shortbuf; + pt->read_buf_size = pt->write_buf_size = 1; + pt->write_end = pt->write_buf + pt->write_buf_size; } -#undef FUNC_NAME -SCM_DEFINE (scm_set_port_column_x, "set-port-column!", 2, 0, 0, - (SCM port, SCM column), - "Set the current column of @var{port}. Before reading the first\n" - "character on a line the column should be 0.") -#define FUNC_NAME s_scm_set_port_column_x +/* this should only be called when the read buffer is empty. it + tries to refill the read buffer. it returns the first char from + the port, which is either EOF or *(pt->read_pos). */ +static int +scm_i_fill_input_unlocked (SCM port) { - port = SCM_COERCE_OUTPORT (port); - SCM_VALIDATE_OPENPORT (1, port); - SCM_PTAB_ENTRY (port)->column_number = scm_to_int (column); - return SCM_UNSPECIFIED; + scm_t_port *pt = SCM_PTAB_ENTRY (port); + scm_t_port_internal *pti = SCM_PORT_GET_INTERNAL (port); + + assert (pt->read_pos == pt->read_end); + + if (pti->pending_eof) + { + pti->pending_eof = 0; + return EOF; + } + + if (pt->read_buf == pt->putback_buf) + { + /* finished reading put-back chars. */ + pt->read_buf = pt->saved_read_buf; + pt->read_pos = pt->saved_read_pos; + pt->read_end = pt->saved_read_end; + pt->read_buf_size = pt->saved_read_buf_size; + if (pt->read_pos < pt->read_end) + return *(pt->read_pos); + } + return SCM_PORT_DESCRIPTOR (port)->fill_input (port); } -#undef FUNC_NAME -SCM_DEFINE (scm_port_filename, "port-filename", 1, 0, 0, - (SCM port), - "Return the filename associated with @var{port}, or @code{#f}\n" - "if no filename is associated with the port.") -#define FUNC_NAME s_scm_port_filename +int +scm_fill_input (SCM port) { - port = SCM_COERCE_OUTPORT (port); - SCM_VALIDATE_OPENPORT (1, port); - return SCM_FILENAME (port); + scm_i_pthread_mutex_t *lock; + int ret; + + scm_c_lock_port (port, &lock); + ret = scm_fill_input_unlocked (port); + if (lock) + scm_i_pthread_mutex_unlock (lock); + + + return ret; } -#undef FUNC_NAME -SCM_DEFINE (scm_set_port_filename_x, "set-port-filename!", 2, 0, 0, - (SCM port, SCM filename), - "Change the filename associated with @var{port}, using the current input\n" - "port if none is specified. Note that this does not change the port's\n" - "source of data, but only the value that is returned by\n" - "@code{port-filename} and reported in diagnostic output.") -#define FUNC_NAME s_scm_set_port_filename_x +/* Slow-path fallback for 'scm_get_byte_or_eof_unlocked' */ +int +scm_slow_get_byte_or_eof_unlocked (SCM port) { - port = SCM_COERCE_OUTPORT (port); - SCM_VALIDATE_OPENPORT (1, port); - /* We allow the user to set the filename to whatever he likes. */ - SCM_SET_FILENAME (port, filename); - return SCM_UNSPECIFIED; + scm_t_port *pt = SCM_PTAB_ENTRY (port); + + if (pt->rw_active == SCM_PORT_WRITE) + scm_flush_unlocked (port); + + if (pt->rw_random) + pt->rw_active = SCM_PORT_READ; + + if (pt->read_pos >= pt->read_end) + { + if (SCM_UNLIKELY (scm_i_fill_input_unlocked (port) == EOF)) + return EOF; + } + + return *pt->read_pos++; } -#undef FUNC_NAME -/* A fluid specifying the default encoding for newly created ports. If it is - a string, that is the encoding. If it is #f, it is in the "native" - (Latin-1) encoding. */ -SCM_VARIABLE (default_port_encoding_var, "%default-port-encoding"); +/* Slow-path fallback for 'scm_peek_byte_or_eof_unlocked' */ +int +scm_slow_peek_byte_or_eof_unlocked (SCM port) +{ + scm_t_port *pt = SCM_PTAB_ENTRY (port); -static int scm_port_encoding_init = 0; + if (pt->rw_active == SCM_PORT_WRITE) + scm_flush_unlocked (port); -/* Use ENCODING as the default encoding for future ports. */ -void -scm_i_set_default_port_encoding (const char *encoding) + if (pt->rw_random) + pt->rw_active = SCM_PORT_READ; + + if (pt->read_pos >= pt->read_end) + { + if (SCM_UNLIKELY (scm_i_fill_input_unlocked (port) == EOF)) + { + scm_i_set_pending_eof (port); + return EOF; + } + } + + return *pt->read_pos; +} + +/* Move up to READ_LEN bytes from PORT's putback and/or read buffers + into memory starting at DEST. Return the number of bytes moved. + PORT's line/column numbers are left unchanged. */ +size_t +scm_take_from_input_buffers (SCM port, char *dest, size_t read_len) { - if (!scm_port_encoding_init - || !scm_is_fluid (SCM_VARIABLE_REF (default_port_encoding_var))) - scm_misc_error (NULL, "tried to set port encoding fluid before it is initialized", - SCM_EOL); + scm_t_port *pt = SCM_PTAB_ENTRY (port); + size_t bytes_read = 0; + size_t from_buf = min (pt->read_end - pt->read_pos, read_len); + + if (from_buf > 0) + { + memcpy (dest, pt->read_pos, from_buf); + pt->read_pos += from_buf; + bytes_read += from_buf; + read_len -= from_buf; + dest += from_buf; + } + + /* if putback was active, try the real input buffer too. */ + if (pt->read_buf == pt->putback_buf) + { + from_buf = min (pt->saved_read_end - pt->saved_read_pos, read_len); + if (from_buf > 0) + { + memcpy (dest, pt->saved_read_pos, from_buf); + pt->saved_read_pos += from_buf; + bytes_read += from_buf; + } + } - if (encoding == NULL - || c_strcasecmp (encoding, "ASCII") == 0 - || c_strcasecmp (encoding, "ANSI_X3.4-1968") == 0 - || c_strcasecmp (encoding, "ISO-8859-1") == 0) - scm_fluid_set_x (SCM_VARIABLE_REF (default_port_encoding_var), SCM_BOOL_F); - else - scm_fluid_set_x (SCM_VARIABLE_REF (default_port_encoding_var), - scm_from_locale_string (encoding)); + return bytes_read; } -/* Return the name of the default encoding for newly created ports; a - return value of NULL means "ISO-8859-1". */ -const char * -scm_i_default_port_encoding (void) +/* Clear a port's read buffers, returning the contents. */ +SCM_DEFINE (scm_drain_input, "drain-input", 1, 0, 0, + (SCM port), + "This procedure clears a port's input buffers, similar\n" + "to the way that force-output clears the output buffer. The\n" + "contents of the buffers are returned as a single string, e.g.,\n" + "\n" + "@lisp\n" + "(define p (open-input-file ...))\n" + "(drain-input p) => empty string, nothing buffered yet.\n" + "(unread-char (read-char p) p)\n" + "(drain-input p) => initial chars from p, up to the buffer size.\n" + "@end lisp\n\n" + "Draining the buffers may be useful for cleanly finishing\n" + "buffered I/O so that the file descriptor can be used directly\n" + "for further input.") +#define FUNC_NAME s_scm_drain_input { - if (!scm_port_encoding_init) - return NULL; - else if (!scm_is_fluid (SCM_VARIABLE_REF (default_port_encoding_var))) - return NULL; - else - { - SCM encoding; + SCM result; + char *data; + scm_t_port *pt; + long count; - encoding = scm_fluid_ref (SCM_VARIABLE_REF (default_port_encoding_var)); - if (!scm_is_string (encoding)) - return NULL; - else - return scm_i_string_chars (encoding); + SCM_VALIDATE_OPINPORT (1, port); + pt = SCM_PTAB_ENTRY (port); + + count = pt->read_end - pt->read_pos; + if (pt->read_buf == pt->putback_buf) + count += pt->saved_read_end - pt->saved_read_pos; + + if (count) + { + result = scm_i_make_string (count, &data, 0); + scm_take_from_input_buffers (port, data, count); } + else + result = scm_nullstr; + + return result; } +#undef FUNC_NAME -/* If the next LEN bytes from PORT are equal to those in BYTES, then - return 1, else return 0. Leave the port position unchanged. */ -static int -looking_at_bytes (SCM port, const unsigned char *bytes, int len) +void +scm_end_input_unlocked (SCM port) { + long offset; scm_t_port *pt = SCM_PTAB_ENTRY (port); - int i = 0; - while (i < len && scm_peek_byte_or_eof (port) == bytes[i]) + scm_i_clear_pending_eof (port); + if (pt->read_buf == pt->putback_buf) { - pt->read_pos++; - i++; + offset = pt->read_end - pt->read_pos; + pt->read_buf = pt->saved_read_buf; + pt->read_pos = pt->saved_read_pos; + pt->read_end = pt->saved_read_end; + pt->read_buf_size = pt->saved_read_buf_size; } - scm_i_unget_bytes (bytes, i, port); - return (i == len); + else + offset = 0; + + SCM_PORT_DESCRIPTOR (port)->end_input (port, offset); } -static const unsigned char scm_utf8_bom[3] = {0xEF, 0xBB, 0xBF}; -static const unsigned char scm_utf16be_bom[2] = {0xFE, 0xFF}; -static const unsigned char scm_utf16le_bom[2] = {0xFF, 0xFE}; -static const unsigned char scm_utf32be_bom[4] = {0x00, 0x00, 0xFE, 0xFF}; -static const unsigned char scm_utf32le_bom[4] = {0xFF, 0xFE, 0x00, 0x00}; +void +scm_end_input (SCM port) +{ + scm_i_pthread_mutex_t *lock; + scm_c_lock_port (port, &lock); + scm_end_input_unlocked (port); + if (lock) + scm_i_pthread_mutex_unlock (lock); + +} -/* Decide what byte order to use for a UTF-16 port. Return "UTF-16BE" - or "UTF-16LE". MODE must be either SCM_PORT_READ or SCM_PORT_WRITE, - and specifies which operation is about to be done. The MODE - determines how we will decide the byte order. We deliberately avoid - reading from the port unless the user is about to do so. If the user - is about to read, then we look for a BOM, and if present, we use it - to determine the byte order. Otherwise we choose big endian, as - recommended by the Unicode Standard. Note that the BOM (if any) is - not consumed here. */ -static const char * -decide_utf16_encoding (SCM port, scm_t_port_rw_active mode) +SCM_DEFINE (scm_force_output, "force-output", 0, 1, 0, + (SCM port), + "Flush the specified output port, or the current output port if @var{port}\n" + "is omitted. The current output buffer contents are passed to the\n" + "underlying port implementation (e.g., in the case of fports, the\n" + "data will be written to the file and the output buffer will be cleared.)\n" + "It has no effect on an unbuffered port.\n\n" + "The return value is unspecified.") +#define FUNC_NAME s_scm_force_output { - if (mode == SCM_PORT_READ - && SCM_PORT_GET_INTERNAL (port)->at_stream_start_for_bom_read - && looking_at_bytes (port, scm_utf16le_bom, sizeof scm_utf16le_bom)) - return "UTF-16LE"; + if (SCM_UNBNDP (port)) + port = scm_current_output_port (); else - return "UTF-16BE"; + { + port = SCM_COERCE_OUTPORT (port); + SCM_VALIDATE_OPOUTPORT (1, port); + } + scm_flush_unlocked (port); + return SCM_UNSPECIFIED; } +#undef FUNC_NAME -/* Decide what byte order to use for a UTF-32 port. Return "UTF-32BE" - or "UTF-32LE". See the comment above 'decide_utf16_encoding' for - details. */ -static const char * -decide_utf32_encoding (SCM port, scm_t_port_rw_active mode) +void +scm_flush_unlocked (SCM port) { - if (mode == SCM_PORT_READ - && SCM_PORT_GET_INTERNAL (port)->at_stream_start_for_bom_read - && looking_at_bytes (port, scm_utf32le_bom, sizeof scm_utf32le_bom)) - return "UTF-32LE"; - else - return "UTF-32BE"; + SCM_PORT_DESCRIPTOR (port)->flush (port); } -static void -finalize_iconv_descriptors (void *ptr, void *data) +void +scm_flush (SCM port) { - close_iconv_descriptors (ptr); + scm_i_pthread_mutex_t *lock; + scm_c_lock_port (port, &lock); + scm_flush_unlocked (port); + if (lock) + scm_i_pthread_mutex_unlock (lock); + } -static scm_t_iconv_descriptors * -open_iconv_descriptors (const char *encoding, int reading, int writing) +int +scm_fill_input_unlocked (SCM port) { - scm_t_iconv_descriptors *id; - iconv_t input_cd, output_cd; + return scm_i_fill_input_unlocked (port); +} - input_cd = (iconv_t) -1; - output_cd = (iconv_t) -1; - if (reading) - { - /* Open an input iconv conversion descriptor, from ENCODING - to UTF-8. We choose UTF-8, not UTF-32, because iconv - implementations can typically convert from anything to - UTF-8, but not to UTF-32 (see - ). */ - /* Assume opening an iconv descriptor causes about 16 KB of - allocation. */ - scm_gc_register_allocation (16 * 1024); + - input_cd = iconv_open ("UTF-8", encoding); - if (input_cd == (iconv_t) -1) - goto invalid_encoding; - } +/* Output. */ - if (writing) - { - /* Assume opening an iconv descriptor causes about 16 KB of - allocation. */ - scm_gc_register_allocation (16 * 1024); +void +scm_putc (char c, SCM port) +{ + scm_i_pthread_mutex_t *lock; + scm_c_lock_port (port, &lock); + scm_putc_unlocked (c, port); + if (lock) + scm_i_pthread_mutex_unlock (lock); + +} - output_cd = iconv_open (encoding, "UTF-8"); - if (output_cd == (iconv_t) -1) - { - if (input_cd != (iconv_t) -1) - iconv_close (input_cd); - goto invalid_encoding; - } - } +void +scm_puts (const char *s, SCM port) +{ + scm_i_pthread_mutex_t *lock; + scm_c_lock_port (port, &lock); + scm_puts_unlocked (s, port); + if (lock) + scm_i_pthread_mutex_unlock (lock); + +} + +/* scm_c_write + * + * Used by an application to write arbitrary number of bytes to an SCM + * port. Similar semantics as libc write. However, unlike libc + * write, scm_c_write writes the requested number of bytes and has no + * return value. + * + * Warning: Doesn't update port line and column counts! + */ +void +scm_c_write_unlocked (SCM port, const void *ptr, size_t size) +#define FUNC_NAME "scm_c_write" +{ + scm_t_port *pt; + scm_t_ptob_descriptor *ptob; - id = scm_gc_malloc_pointerless (sizeof (*id), "iconv descriptors"); - id->input_cd = input_cd; - id->output_cd = output_cd; + SCM_VALIDATE_OPOUTPORT (1, port); - /* Register a finalizer to close the descriptors. */ - scm_i_set_finalizer (id, finalize_iconv_descriptors, NULL); + pt = SCM_PTAB_ENTRY (port); + ptob = SCM_PORT_DESCRIPTOR (port); - return id; + if (pt->rw_active == SCM_PORT_READ) + scm_end_input_unlocked (port); - invalid_encoding: - { - SCM err; - err = scm_from_locale_string (encoding); - scm_misc_error ("open_iconv_descriptors", - "invalid or unknown character encoding ~s", - scm_list_1 (err)); - } + ptob->write (port, ptr, size); + + if (pt->rw_random) + pt->rw_active = SCM_PORT_WRITE; } +#undef FUNC_NAME -static void -close_iconv_descriptors (scm_t_iconv_descriptors *id) +void +scm_c_write (SCM port, const void *ptr, size_t size) { - if (id->input_cd != (iconv_t) -1) - iconv_close (id->input_cd); - if (id->output_cd != (iconv_t) -1) - iconv_close (id->output_cd); - id->input_cd = (void *) -1; - id->output_cd = (void *) -1; + scm_i_pthread_mutex_t *lock; + scm_c_lock_port (port, &lock); + scm_c_write_unlocked (port, ptr, size); + if (lock) + scm_i_pthread_mutex_unlock (lock); + } -/* Return the iconv_descriptors, initializing them if necessary. MODE - must be either SCM_PORT_READ or SCM_PORT_WRITE, and specifies which - operation is about to be done. We deliberately avoid reading from - the port unless the user was about to do so. */ -scm_t_iconv_descriptors * -scm_i_port_iconv_descriptors (SCM port, scm_t_port_rw_active mode) +/* scm_lfwrite + * + * This function differs from scm_c_write; it updates port line and + * column. */ +void +scm_lfwrite_unlocked (const char *ptr, size_t size, SCM port) { - scm_t_port_internal *pti = SCM_PORT_GET_INTERNAL (port); - - assert (pti->encoding_mode == SCM_PORT_ENCODING_MODE_ICONV); - - if (!pti->iconv_descriptors) - { - scm_t_port *pt = SCM_PTAB_ENTRY (port); - const char *precise_encoding; - - if (!pt->encoding) - pt->encoding = "ISO-8859-1"; + scm_t_port *pt = SCM_PTAB_ENTRY (port); + scm_t_ptob_descriptor *ptob = SCM_PORT_DESCRIPTOR (port); - /* If the specified encoding is UTF-16 or UTF-32, then make - that more precise by deciding what byte order to use. */ - if (c_strcasecmp (pt->encoding, "UTF-16") == 0) - precise_encoding = decide_utf16_encoding (port, mode); - else if (c_strcasecmp (pt->encoding, "UTF-32") == 0) - precise_encoding = decide_utf32_encoding (port, mode); - else - precise_encoding = pt->encoding; + if (pt->rw_active == SCM_PORT_READ) + scm_end_input_unlocked (port); - pti->iconv_descriptors = - open_iconv_descriptors (precise_encoding, - SCM_INPUT_PORT_P (port), - SCM_OUTPUT_PORT_P (port)); - } + ptob->write (port, ptr, size); - return pti->iconv_descriptors; + for (; size; ptr++, size--) + update_port_lf ((scm_t_wchar) (unsigned char) *ptr, port); + + if (pt->rw_random) + pt->rw_active = SCM_PORT_WRITE; } void -scm_i_set_port_encoding_x (SCM port, const char *encoding) +scm_lfwrite (const char *ptr, size_t size, SCM port) { - scm_t_port *pt; - scm_t_port_internal *pti; - scm_t_iconv_descriptors *prev; + scm_i_pthread_mutex_t *lock; + scm_c_lock_port (port, &lock); + scm_lfwrite_unlocked (ptr, size, port); + if (lock) + scm_i_pthread_mutex_unlock (lock); + +} - /* Set the character encoding for this port. */ - pt = SCM_PTAB_ENTRY (port); - pti = SCM_PORT_GET_INTERNAL (port); - prev = pti->iconv_descriptors; +/* Write STR to PORT from START inclusive to END exclusive. */ +void +scm_lfwrite_substr (SCM str, size_t start, size_t end, SCM port) +{ + scm_t_port *pt = SCM_PTAB_ENTRY (port); - /* In order to handle cases where the encoding changes mid-stream - (e.g. within an HTTP stream, or within a file that is composed of - segments with different encodings), we consider this to be "stream - start" for purposes of BOM handling, regardless of our actual file - position. */ - pti->at_stream_start_for_bom_read = 1; - pti->at_stream_start_for_bom_write = 1; + if (pt->rw_active == SCM_PORT_READ) + scm_end_input_unlocked (port); - if (encoding == NULL) - encoding = "ISO-8859-1"; + if (end == (size_t) -1) + end = scm_i_string_length (str); - /* If ENCODING is UTF-8, then no conversion descriptor is opened - because we do I/O ourselves. This saves 100+ KiB for each - descriptor. */ - pt->encoding = scm_gc_strdup (encoding, "port"); - if (c_strcasecmp (encoding, "UTF-8") == 0) - pti->encoding_mode = SCM_PORT_ENCODING_MODE_UTF8; - else - pti->encoding_mode = SCM_PORT_ENCODING_MODE_ICONV; + scm_i_display_substring (str, start, end, port); - pti->iconv_descriptors = NULL; - if (prev) - close_iconv_descriptors (prev); + if (pt->rw_random) + pt->rw_active = SCM_PORT_WRITE; } -SCM_DEFINE (scm_port_encoding, "port-encoding", 1, 0, 0, + + + +/* Querying and setting positions, and character availability. */ + +SCM_DEFINE (scm_char_ready_p, "char-ready?", 0, 1, 0, (SCM port), - "Returns, as a string, the character encoding that @var{port}\n" - "uses to interpret its input and output.\n") -#define FUNC_NAME s_scm_port_encoding + "Return @code{#t} if a character is ready on input @var{port}\n" + "and return @code{#f} otherwise. If @code{char-ready?} returns\n" + "@code{#t} then the next @code{read-char} operation on\n" + "@var{port} is guaranteed not to hang. If @var{port} is a file\n" + "port at end of file then @code{char-ready?} returns @code{#t}.\n" + "\n" + "@code{char-ready?} exists to make it possible for a\n" + "program to accept characters from interactive ports without\n" + "getting stuck waiting for input. Any input editors associated\n" + "with such ports must make sure that characters whose existence\n" + "has been asserted by @code{char-ready?} cannot be rubbed out.\n" + "If @code{char-ready?} were to return @code{#f} at end of file,\n" + "a port at end of file would be indistinguishable from an\n" + "interactive port that has no ready characters.") +#define FUNC_NAME s_scm_char_ready_p { scm_t_port *pt; - const char *enc; - SCM_VALIDATE_PORT (1, port); + if (SCM_UNBNDP (port)) + port = scm_current_input_port (); + /* It's possible to close the current input port, so validate even in + this case. */ + SCM_VALIDATE_OPINPORT (1, port); pt = SCM_PTAB_ENTRY (port); - enc = pt->encoding; - if (enc) - return scm_from_locale_string (pt->encoding); + + /* if the current read buffer is filled, or the + last pushed-back char has been read and the saved buffer is + filled, result is true. */ + if (pt->read_pos < pt->read_end + || (pt->read_buf == pt->putback_buf + && pt->saved_read_pos < pt->saved_read_end)) + return SCM_BOOL_T; else - return SCM_BOOL_F; + { + scm_t_ptob_descriptor *ptob = SCM_PORT_DESCRIPTOR (port); + + if (ptob->input_waiting) + return scm_from_bool(ptob->input_waiting (port)); + else + return SCM_BOOL_T; + } } #undef FUNC_NAME -SCM_DEFINE (scm_set_port_encoding_x, "set-port-encoding!", 2, 0, 0, - (SCM port, SCM enc), - "Sets the character encoding that will be used to interpret all\n" - "port I/O. New ports are created with the encoding\n" - "appropriate for the current locale if @code{setlocale} has \n" - "been called or ISO-8859-1 otherwise\n" - "and this procedure can be used to modify that encoding.\n") -#define FUNC_NAME s_scm_set_port_encoding_x +SCM_DEFINE (scm_seek, "seek", 3, 0, 0, + (SCM fd_port, SCM offset, SCM whence), + "Sets the current position of @var{fd_port} to the integer\n" + "@var{offset}, which is interpreted according to the value of\n" + "@var{whence}.\n" + "\n" + "One of the following variables should be supplied for\n" + "@var{whence}:\n" + "@defvar SEEK_SET\n" + "Seek from the beginning of the file.\n" + "@end defvar\n" + "@defvar SEEK_CUR\n" + "Seek from the current position.\n" + "@end defvar\n" + "@defvar SEEK_END\n" + "Seek from the end of the file.\n" + "@end defvar\n" + "If @var{fd_port} is a file descriptor, the underlying system\n" + "call is @code{lseek}. @var{port} may be a string port.\n" + "\n" + "The value returned is the new position in the file. This means\n" + "that the current position of a port can be obtained using:\n" + "@lisp\n" + "(seek port 0 SEEK_CUR)\n" + "@end lisp") +#define FUNC_NAME s_scm_seek { - char *enc_str; + int how; - SCM_VALIDATE_PORT (1, port); - SCM_VALIDATE_STRING (2, enc); + fd_port = SCM_COERCE_OUTPORT (fd_port); - enc_str = scm_to_locale_string (enc); - scm_i_set_port_encoding_x (port, enc_str); - free (enc_str); + how = scm_to_int (whence); + if (how != SEEK_SET && how != SEEK_CUR && how != SEEK_END) + SCM_OUT_OF_RANGE (3, whence); - return SCM_UNSPECIFIED; -} -#undef FUNC_NAME + if (SCM_OPPORTP (fd_port)) + { + scm_t_port_internal *pti = SCM_PORT_GET_INTERNAL (fd_port); + scm_t_ptob_descriptor *ptob = SCM_PORT_DESCRIPTOR (fd_port); + off_t_or_off64_t off = scm_to_off_t_or_off64_t (offset); + off_t_or_off64_t rv; + if (!ptob->seek) + SCM_MISC_ERROR ("port is not seekable", + scm_cons (fd_port, SCM_EOL)); + else + rv = ptob->seek (fd_port, off, how); -/* A fluid specifying the default conversion handler for newly created - ports. Its value should be one of the symbols below. */ -SCM_VARIABLE (default_conversion_strategy_var, - "%default-port-conversion-strategy"); + /* Set stream-start flags according to new position. */ + pti->at_stream_start_for_bom_read = (rv == 0); + pti->at_stream_start_for_bom_write = (rv == 0); -/* Whether the above fluid is initialized. */ -static int scm_conversion_strategy_init = 0; + scm_i_clear_pending_eof (fd_port); -/* The possible conversion strategies. */ -SCM_SYMBOL (sym_error, "error"); -SCM_SYMBOL (sym_substitute, "substitute"); -SCM_SYMBOL (sym_escape, "escape"); + return scm_from_off_t_or_off64_t (rv); + } + else /* file descriptor?. */ + { + off_t_or_off64_t off = scm_to_off_t_or_off64_t (offset); + off_t_or_off64_t rv; + rv = lseek_or_lseek64 (scm_to_int (fd_port), off, how); + if (rv == -1) + SCM_SYSERROR; + return scm_from_off_t_or_off64_t (rv); + } +} +#undef FUNC_NAME -/* Return the default failed encoding conversion policy for new created - ports. */ -scm_t_string_failed_conversion_handler -scm_i_default_port_conversion_handler (void) -{ - scm_t_string_failed_conversion_handler handler; +#ifndef O_BINARY +#define O_BINARY 0 +#endif - if (!scm_conversion_strategy_init - || !scm_is_fluid (SCM_VARIABLE_REF (default_conversion_strategy_var))) - handler = SCM_FAILED_CONVERSION_QUESTION_MARK; - else - { - SCM fluid, value; +/* Mingw has ftruncate(), perhaps implemented above using chsize, but + doesn't have the filename version truncate(), hence this code. */ +#if HAVE_FTRUNCATE && ! HAVE_TRUNCATE +static int +truncate (const char *file, off_t length) +{ + int ret, fdes; - fluid = SCM_VARIABLE_REF (default_conversion_strategy_var); - value = scm_fluid_ref (fluid); + fdes = open (file, O_BINARY | O_WRONLY); + if (fdes == -1) + return -1; - if (scm_is_eq (sym_substitute, value)) - handler = SCM_FAILED_CONVERSION_QUESTION_MARK; - else if (scm_is_eq (sym_escape, value)) - handler = SCM_FAILED_CONVERSION_ESCAPE_SEQUENCE; - else - /* Default to 'error also when the fluid's value is not one of - the valid symbols. */ - handler = SCM_FAILED_CONVERSION_ERROR; + ret = ftruncate (fdes, length); + if (ret == -1) + { + int save_errno = errno; + close (fdes); + errno = save_errno; + return -1; } - return handler; + return close (fdes); } +#endif /* HAVE_FTRUNCATE && ! HAVE_TRUNCATE */ -/* Use HANDLER as the default conversion strategy for future ports. */ -void -scm_i_set_default_port_conversion_handler (scm_t_string_failed_conversion_handler - handler) +SCM_DEFINE (scm_truncate_file, "truncate-file", 1, 1, 0, + (SCM object, SCM length), + "Truncate file @var{object} to @var{length} bytes. @var{object}\n" + "can be a filename string, a port object, or an integer file\n" + "descriptor.\n" + "The return value is unspecified.\n" + "\n" + "For a port or file descriptor @var{length} can be omitted, in\n" + "which case the file is truncated at the current position (per\n" + "@code{ftell} above).\n" + "\n" + "On most systems a file can be extended by giving a length\n" + "greater than the current size, but this is not mandatory in the\n" + "POSIX standard.") +#define FUNC_NAME s_scm_truncate_file { - SCM strategy; - - if (!scm_conversion_strategy_init - || !scm_is_fluid (SCM_VARIABLE_REF (default_conversion_strategy_var))) - scm_misc_error (NULL, "tried to set conversion strategy fluid before it is initialized", - SCM_EOL); + int rv; - switch (handler) - { - case SCM_FAILED_CONVERSION_ERROR: - strategy = sym_error; - break; + /* "object" can be a port, fdes or filename. - case SCM_FAILED_CONVERSION_ESCAPE_SEQUENCE: - strategy = sym_escape; - break; + Negative "length" makes no sense, but it's left to truncate() or + ftruncate() to give back an error for that (normally EINVAL). + */ - case SCM_FAILED_CONVERSION_QUESTION_MARK: - strategy = sym_substitute; - break; + if (SCM_UNBNDP (length)) + { + /* must supply length if object is a filename. */ + if (scm_is_string (object)) + SCM_MISC_ERROR("must supply length if OBJECT is a filename", SCM_EOL); + + length = scm_seek (object, SCM_INUM0, scm_from_int (SEEK_CUR)); + } - default: - abort (); + object = SCM_COERCE_OUTPORT (object); + if (scm_is_integer (object)) + { + off_t_or_off64_t c_length = scm_to_off_t_or_off64_t (length); + SCM_SYSCALL (rv = ftruncate_or_ftruncate64 (scm_to_int (object), + c_length)); } + else if (SCM_OPOUTPORTP (object)) + { + off_t_or_off64_t c_length = scm_to_off_t_or_off64_t (length); + scm_t_port *pt = SCM_PTAB_ENTRY (object); + scm_t_ptob_descriptor *ptob = SCM_PORT_DESCRIPTOR (object); - scm_fluid_set_x (SCM_VARIABLE_REF (default_conversion_strategy_var), - strategy); -} + if (!ptob->truncate) + SCM_MISC_ERROR ("port is not truncatable", SCM_EOL); -SCM_DEFINE (scm_port_conversion_strategy, "port-conversion-strategy", - 1, 0, 0, (SCM port), - "Returns the behavior of the port when handling a character that\n" - "is not representable in the port's current encoding.\n" - "It returns the symbol @code{error} if unrepresentable characters\n" - "should cause exceptions, @code{substitute} if the port should\n" - "try to replace unrepresentable characters with question marks or\n" - "approximate characters, or @code{escape} if unrepresentable\n" - "characters should be converted to string escapes.\n" - "\n" - "If @var{port} is @code{#f}, then the current default behavior\n" - "will be returned. New ports will have this default behavior\n" - "when they are created.\n") -#define FUNC_NAME s_scm_port_conversion_strategy -{ - scm_t_string_failed_conversion_handler h; + scm_i_clear_pending_eof (object); + if (pt->rw_active == SCM_PORT_READ) + scm_end_input_unlocked (object); + else if (pt->rw_active == SCM_PORT_WRITE) + ptob->flush (object); - if (scm_is_false (port)) - h = scm_i_default_port_conversion_handler (); + ptob->truncate (object, c_length); + rv = 0; + } else { - scm_t_port *pt; - - SCM_VALIDATE_OPPORT (1, port); - pt = SCM_PTAB_ENTRY (port); - - h = pt->ilseq_handler; + off_t_or_off64_t c_length = scm_to_off_t_or_off64_t (length); + char *str = scm_to_locale_string (object); + int eno; + SCM_SYSCALL (rv = truncate_or_truncate64 (str, c_length)); + eno = errno; + free (str); + errno = eno; } + if (rv == -1) + SCM_SYSERROR; + return SCM_UNSPECIFIED; +} +#undef FUNC_NAME - if (h == SCM_FAILED_CONVERSION_ERROR) - return scm_from_latin1_symbol ("error"); - else if (h == SCM_FAILED_CONVERSION_QUESTION_MARK) - return scm_from_latin1_symbol ("substitute"); - else if (h == SCM_FAILED_CONVERSION_ESCAPE_SEQUENCE) - return scm_from_latin1_symbol ("escape"); - else - abort (); +SCM_DEFINE (scm_port_line, "port-line", 1, 0, 0, + (SCM port), + "Return the current line number for @var{port}.\n" + "\n" + "The first line of a file is 0. But you might want to add 1\n" + "when printing line numbers, since starting from 1 is\n" + "traditional in error messages, and likely to be more natural to\n" + "non-programmers.") +#define FUNC_NAME s_scm_port_line +{ + port = SCM_COERCE_OUTPORT (port); + SCM_VALIDATE_OPENPORT (1, port); + return scm_from_long (SCM_LINUM (port)); +} +#undef FUNC_NAME - /* Never gets here. */ - return SCM_UNDEFINED; +SCM_DEFINE (scm_set_port_line_x, "set-port-line!", 2, 0, 0, + (SCM port, SCM line), + "Set the current line number for @var{port} to @var{line}. The\n" + "first line of a file is 0.") +#define FUNC_NAME s_scm_set_port_line_x +{ + port = SCM_COERCE_OUTPORT (port); + SCM_VALIDATE_OPENPORT (1, port); + SCM_PTAB_ENTRY (port)->line_number = scm_to_long (line); + return SCM_UNSPECIFIED; } #undef FUNC_NAME -SCM_DEFINE (scm_set_port_conversion_strategy_x, "set-port-conversion-strategy!", - 2, 0, 0, - (SCM port, SCM sym), - "Sets the behavior of the interpreter when outputting a character\n" - "that is not representable in the port's current encoding.\n" - "@var{sym} can be either @code{'error}, @code{'substitute}, or\n" - "@code{'escape}. If it is @code{'error}, an error will be thrown\n" - "when an unconvertible character is encountered. If it is\n" - "@code{'substitute}, then unconvertible characters will \n" - "be replaced with approximate characters, or with question marks\n" - "if no approximately correct character is available.\n" - "If it is @code{'escape},\n" - "it will appear as a hex escape when output.\n" - "\n" - "If @var{port} is an open port, the conversion error behavior\n" - "is set for that port. If it is @code{#f}, it is set as the\n" - "default behavior for any future ports that get created in\n" - "this thread.\n") -#define FUNC_NAME s_scm_set_port_conversion_strategy_x +SCM_DEFINE (scm_port_column, "port-column", 1, 0, 0, + (SCM port), + "Return the current column number of @var{port}.\n" + "If the number is\n" + "unknown, the result is #f. Otherwise, the result is a 0-origin integer\n" + "- i.e. the first character of the first line is line 0, column 0.\n" + "(However, when you display a file position, for example in an error\n" + "message, we recommend you add 1 to get 1-origin integers. This is\n" + "because lines and column numbers traditionally start with 1, and that is\n" + "what non-programmers will find most natural.)") +#define FUNC_NAME s_scm_port_column { - scm_t_string_failed_conversion_handler handler; + port = SCM_COERCE_OUTPORT (port); + SCM_VALIDATE_OPENPORT (1, port); + return scm_from_int (SCM_COL (port)); +} +#undef FUNC_NAME - if (scm_is_eq (sym, sym_error)) - handler = SCM_FAILED_CONVERSION_ERROR; - else if (scm_is_eq (sym, sym_substitute)) - handler = SCM_FAILED_CONVERSION_QUESTION_MARK; - else if (scm_is_eq (sym, sym_escape)) - handler = SCM_FAILED_CONVERSION_ESCAPE_SEQUENCE; - else - SCM_MISC_ERROR ("unknown conversion strategy ~s", scm_list_1 (sym)); +SCM_DEFINE (scm_set_port_column_x, "set-port-column!", 2, 0, 0, + (SCM port, SCM column), + "Set the current column of @var{port}. Before reading the first\n" + "character on a line the column should be 0.") +#define FUNC_NAME s_scm_set_port_column_x +{ + port = SCM_COERCE_OUTPORT (port); + SCM_VALIDATE_OPENPORT (1, port); + SCM_PTAB_ENTRY (port)->column_number = scm_to_int (column); + return SCM_UNSPECIFIED; +} +#undef FUNC_NAME - if (scm_is_false (port)) - scm_i_set_default_port_conversion_handler (handler); - else - { - SCM_VALIDATE_OPPORT (1, port); - SCM_PTAB_ENTRY (port)->ilseq_handler = handler; - } +SCM_DEFINE (scm_port_filename, "port-filename", 1, 0, 0, + (SCM port), + "Return the filename associated with @var{port}, or @code{#f}\n" + "if no filename is associated with the port.") +#define FUNC_NAME s_scm_port_filename +{ + port = SCM_COERCE_OUTPORT (port); + SCM_VALIDATE_OPENPORT (1, port); + return SCM_FILENAME (port); +} +#undef FUNC_NAME +SCM_DEFINE (scm_set_port_filename_x, "set-port-filename!", 2, 0, 0, + (SCM port, SCM filename), + "Change the filename associated with @var{port}, using the current input\n" + "port if none is specified. Note that this does not change the port's\n" + "source of data, but only the value that is returned by\n" + "@code{port-filename} and reported in diagnostic output.") +#define FUNC_NAME s_scm_set_port_filename_x +{ + port = SCM_COERCE_OUTPORT (port); + SCM_VALIDATE_OPENPORT (1, port); + /* We allow the user to set the filename to whatever he likes. */ + SCM_SET_FILENAME (port, filename); return SCM_UNSPECIFIED; } #undef FUNC_NAME + + +/* Implementation helpers for port printing functions. */ void scm_print_port_mode (SCM exp, SCM port) { - scm_puts (SCM_CLOSEDP (exp) + scm_puts_unlocked (SCM_CLOSEDP (exp) ? "closed: " : (SCM_RDNG & SCM_CELL_WORD_0 (exp) ? (SCM_WRTNG & SCM_CELL_WORD_0 (exp) @@ -2798,15 +2997,91 @@ scm_port_print (SCM exp, SCM port, scm_print_state *pstate SCM_UNUSED) char *type = SCM_PTOBNAME (SCM_PTOBNUM (exp)); if (!type) type = "port"; - scm_puts ("#<", port); + scm_puts_unlocked ("#<", port); scm_print_port_mode (exp, port); - scm_puts (type, port); - scm_putc (' ', port); + scm_puts_unlocked (type, port); + scm_putc_unlocked (' ', port); scm_uintprint (SCM_CELL_WORD_1 (exp), 16, port); - scm_putc ('>', port); + scm_putc_unlocked ('>', port); return 1; } + + + +/* Iterating over all ports. */ + +struct for_each_data +{ + void (*proc) (void *data, SCM p); + void *data; +}; + +static SCM +for_each_trampoline (void *data, SCM port, SCM result) +{ + struct for_each_data *d = data; + + d->proc (d->data, port); + + return result; +} + +void +scm_c_port_for_each (void (*proc)(void *data, SCM p), void *data) +{ + struct for_each_data d; + + d.proc = proc; + d.data = data; + + scm_c_weak_set_fold (for_each_trampoline, &d, SCM_EOL, + scm_i_port_weak_set); +} + +static void +scm_for_each_trampoline (void *data, SCM port) +{ + scm_call_1 (SCM_PACK_POINTER (data), port); +} + +SCM_DEFINE (scm_port_for_each, "port-for-each", 1, 0, 0, + (SCM proc), + "Apply @var{proc} to each port in the Guile port table\n" + "in turn. The return value is unspecified. More specifically,\n" + "@var{proc} is applied exactly once to every port that exists\n" + "in the system at the time @code{port-for-each} is invoked.\n" + "Changes to the port table while @code{port-for-each} is running\n" + "have no effect as far as @code{port-for-each} is concerned.") +#define FUNC_NAME s_scm_port_for_each +{ + SCM_VALIDATE_PROC (1, proc); + + scm_c_port_for_each (scm_for_each_trampoline, SCM_UNPACK_POINTER (proc)); + + return SCM_UNSPECIFIED; +} +#undef FUNC_NAME + +static void +flush_output_port (void *closure, SCM port) +{ + if (SCM_OPOUTPORTP (port)) + scm_flush_unlocked (port); +} + +SCM_DEFINE (scm_flush_all_ports, "flush-all-ports", 0, 0, 0, + (), + "Equivalent to calling @code{force-output} on\n" + "all open output ports. The return value is unspecified.") +#define FUNC_NAME s_scm_flush_all_ports +{ + scm_c_port_for_each (&flush_output_port, NULL); + return SCM_UNSPECIFIED; +} +#undef FUNC_NAME + + /* Void ports. */ @@ -2828,18 +3103,13 @@ write_void_port (SCM port SCM_UNUSED, static SCM scm_i_void_port (long mode_bits) { - scm_i_scm_pthread_mutex_lock (&scm_i_port_table_mutex); - { - SCM answer = scm_new_port_table_entry (scm_tc16_void_port); - scm_t_port * pt = SCM_PTAB_ENTRY(answer); + SCM ret; - scm_port_non_buffer (pt); + ret = scm_c_make_port (scm_tc16_void_port, mode_bits, 0); + + scm_port_non_buffer (SCM_PTAB_ENTRY (ret)); - SCM_SETSTREAM (answer, 0); - SCM_SET_CELL_TYPE (answer, scm_tc16_void_port | mode_bits); - scm_i_pthread_mutex_unlock (&scm_i_port_table_mutex); - return answer; - } + return ret; } SCM @@ -2860,7 +3130,9 @@ SCM_DEFINE (scm_sys_make_void_port, "%make-void-port", 1, 0, 0, } #undef FUNC_NAME + + /* Initialization. */ void @@ -2879,7 +3151,7 @@ scm_init_ports () cur_errport_fluid = scm_make_fluid (); cur_loadport_fluid = scm_make_fluid (); - scm_i_port_weak_hash = scm_make_weak_key_hash_table (SCM_I_MAKINUM(31)); + scm_i_port_weak_set = scm_c_make_weak_set (31); #include "libguile/ports.x"