X-Git-Url: http://git.hcoop.net/bpt/guile.git/blobdiff_plain/228a24ef30e635e58af0e4fe5fc9b9db738abeff..8c6206f319971fc61df9a7362ad0253bb47349bd:/libguile/strports.c diff --git a/libguile/strports.c b/libguile/strports.c index 0a75a4068..a6a03b4eb 100644 --- a/libguile/strports.c +++ b/libguile/strports.c @@ -1,55 +1,35 @@ -/* Copyright (C) 1995,1996,1998,1999,2000,2001 Free Software Foundation, Inc. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2, or (at your option) - * any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this software; see the file COPYING. If not, write to - * the Free Software Foundation, Inc., 59 Temple Place, Suite 330, - * Boston, MA 02111-1307 USA +/* Copyright (C) 1995, 1996, 1998, 1999, 2000, 2001, 2002, 2003, 2005, 2006, + * 2009, 2010, 2011, 2012, 2013, 2014 Free Software Foundation, Inc. * - * As a special exception, the Free Software Foundation gives permission - * for additional uses of the text contained in its release of GUILE. + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public License + * as published by the Free Software Foundation; either version 3 of + * the License, or (at your option) any later version. * - * The exception is that, if you link the GUILE library with other files - * to produce an executable, this does not by itself cause the - * resulting executable to be covered by the GNU General Public License. - * Your use of that executable is in no way restricted on account of - * linking the GUILE library code into it. + * This library is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. * - * This exception does not however invalidate any other reasons why - * the executable file might be covered by the GNU General Public License. - * - * This exception applies only to the code released by the - * Free Software Foundation under the name GUILE. If you copy - * code from other Free Software Foundation releases into a copy of - * GUILE, as the General Public License permits, the exception does - * not apply to the code that you add in this way. To avoid misleading - * anyone as to the status of such modified files, you must delete - * this exception notice from them. - * - * If you write modifications of your own for GUILE, it is your choice - * whether to permit this exception to apply to your modifications. - * If you do not wish that, delete this exception notice. */ + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301 USA + */ +#ifdef HAVE_CONFIG_H +# include +#endif + #include "libguile/_scm.h" #include -#ifdef HAVE_UNISTD_H #include -#endif -#include "libguile/unif.h" +#include "libguile/bytevectors.h" #include "libguile/eval.h" #include "libguile/ports.h" #include "libguile/read.h" @@ -58,6 +38,7 @@ #include "libguile/modules.h" #include "libguile/validate.h" #include "libguile/deprecation.h" +#include "libguile/srfi-4.h" #include "libguile/strports.h" @@ -72,96 +53,96 @@ */ /* NOTES: - write_buf/write_end point to the ends of the allocated string. - read_buf/read_end in principle point to the part of the string which - has been written to, but this is only updated after a flush. - read_pos and write_pos in principle should be equal, but this is only true - when rw_active is SCM_PORT_NEITHER. -*/ + + write_buf/write_end point to the ends of the allocated bytevector. + read_buf/read_end point to the part of the bytevector which has been + written to. read_pos and write_pos are always equal. + + ENHANCE-ME - output blocks: + + The current code keeps an output string as a single block. That means + when the size is increased the entire old contents must be copied. It'd + be more efficient to begin a new block when the old one is full, so + there's no re-copying of previous data. + + To make seeking efficient, keeping the pieces in a vector might be best, + though appending is probably the most common operation. The size of each + block could be progressively increased, so the bigger the string the + bigger the blocks. + + When `get-output-string' is called the blocks have to be coalesced into a + string, the result could be kept as a single big block. If blocks were + strings then `get-output-string' could notice when there's just one and + return that with a copy-on-write (though repeated calls to + `get-output-string' are probably unlikely). + + Another possibility would be to extend the port mechanism to let SCM + strings come through directly from `display' and friends. That way if a + big string is written it can be kept as a copy-on-write, saving time + copying and maybe saving some space. */ + scm_t_bits scm_tc16_strport; static int -stfill_buffer (SCM port) +st_fill_input (SCM port) { scm_t_port *pt = SCM_PTAB_ENTRY (port); if (pt->read_pos >= pt->read_end) return EOF; else - return scm_return_first_int (*pt->read_pos, port); + return *pt->read_pos; } -/* change the size of a port's string to new_size. this doesn't - change read_buf_size. */ -static void -st_resize_port (scm_t_port *pt, off_t new_size) +/* Change the size of a port's bytevector to NEW_SIZE. This doesn't + change `read_buf_size'. */ +static void +st_resize_port (scm_t_port *pt, scm_t_off new_size) { SCM old_stream = SCM_PACK (pt->stream); - SCM new_stream = scm_allocate_string (new_size); - unsigned long int old_size = SCM_STRING_LENGTH (old_stream); + const signed char *src = SCM_BYTEVECTOR_CONTENTS (old_stream); + SCM new_stream = scm_c_make_bytevector (new_size); + signed char *dst = SCM_BYTEVECTOR_CONTENTS (new_stream); + unsigned long int old_size = SCM_BYTEVECTOR_LENGTH (old_stream); unsigned long int min_size = min (old_size, new_size); - unsigned long int i; - off_t index = pt->write_pos - pt->write_buf; + scm_t_off offset = pt->write_pos - pt->write_buf; pt->write_buf_size = new_size; - for (i = 0; i != min_size; ++i) - SCM_STRING_CHARS (new_stream) [i] = SCM_STRING_CHARS (old_stream) [i]; + memcpy (dst, src, min_size); + + scm_remember_upto_here_1 (old_stream); /* reset buffer. */ { pt->stream = SCM_UNPACK (new_stream); - pt->read_buf = pt->write_buf = SCM_STRING_UCHARS (new_stream); - pt->read_pos = pt->write_pos = pt->write_buf + index; + pt->read_buf = pt->write_buf = (unsigned char *)dst; + pt->read_pos = pt->write_pos = pt->write_buf + offset; pt->write_end = pt->write_buf + pt->write_buf_size; pt->read_end = pt->read_buf + pt->read_buf_size; } } -/* amount by which write_buf is expanded. */ -#define SCM_WRITE_BLOCK 80 - -/* ensure that write_pos < write_end by enlarging the buffer when - necessary. update read_buf to account for written chars. */ static void -st_flush (SCM port) +st_write (SCM port, const void *data, size_t size) { scm_t_port *pt = SCM_PTAB_ENTRY (port); - if (pt->write_pos == pt->write_end) - { - st_resize_port (pt, pt->write_buf_size + SCM_WRITE_BLOCK); - } - pt->read_pos = pt->write_pos; + if (size > pt->write_end - pt->write_pos) + st_resize_port (pt, max (pt->write_buf_size * 2, + pt->write_end - pt->write_pos + size)); + + memcpy ((char *) pt->write_pos, data, size); + pt->read_pos = (pt->write_pos += size); + if (pt->read_pos > pt->read_end) { pt->read_end = (unsigned char *) pt->read_pos; pt->read_buf_size = pt->read_end - pt->read_buf; } - pt->rw_active = SCM_PORT_NEITHER; -} - -static void -st_write (SCM port, const void *data, size_t size) -{ - scm_t_port *pt = SCM_PTAB_ENTRY (port); - const char *input = (char *) data; - - while (size > 0) - { - int space = pt->write_end - pt->write_pos; - int write_len = (size > space) ? space : size; - - memcpy ((char *) pt->write_pos, input, write_len); - pt->write_pos += write_len; - size -= write_len; - input += write_len; - if (write_len == space) - st_flush (port); - } } static void @@ -176,11 +157,11 @@ st_end_input (SCM port, int offset) pt->rw_active = SCM_PORT_NEITHER; } -static off_t -st_seek (SCM port, off_t offset, int whence) +static scm_t_off +st_seek (SCM port, scm_t_off offset, int whence) { scm_t_port *pt = SCM_PTAB_ENTRY (port); - off_t target; + scm_t_off target; if (pt->rw_active == SCM_PORT_READ && offset == 0 && whence == SEEK_CUR) /* special case to avoid disturbing the unread-char buffer. */ @@ -198,11 +179,10 @@ st_seek (SCM port, off_t offset, int whence) else /* all other cases. */ { - if (pt->rw_active == SCM_PORT_WRITE) - st_flush (port); - if (pt->rw_active == SCM_PORT_READ) - scm_end_input (port); + scm_end_input_unlocked (port); + + pt->rw_active = SCM_PORT_NEITHER; switch (whence) { @@ -231,12 +211,8 @@ st_seek (SCM port, off_t offset, int whence) SCM_EOL); } } - else - { - st_resize_port (pt, target + (target == pt->write_buf_size - ? SCM_WRITE_BLOCK - : 0)); - } + else if (target == pt->write_buf_size) + st_resize_port (pt, target * 2); } pt->read_pos = pt->write_pos = pt->read_buf + target; if (pt->read_pos > pt->read_end) @@ -249,7 +225,7 @@ st_seek (SCM port, off_t offset, int whence) } static void -st_truncate (SCM port, off_t length) +st_truncate (SCM port, scm_t_off length) { scm_t_port *pt = SCM_PTAB_ENTRY (port); @@ -259,59 +235,91 @@ st_truncate (SCM port, off_t length) pt->read_buf_size = length; pt->read_end = pt->read_buf + length; if (pt->read_pos > pt->read_end) - pt->read_pos = pt->read_end; - - if (pt->write_pos > pt->read_end) - pt->write_pos = pt->read_end; + pt->read_pos = pt->write_pos = pt->read_end; } -SCM +/* The initial size in bytes of a string port's buffer. */ +#define INITIAL_BUFFER_SIZE 128 + +/* Return a new string port with MODES. If STR is #f, a new backing + buffer is allocated; otherwise STR must be a string and a copy of it + serves as the buffer for the new port. */ +SCM scm_mkstrport (SCM pos, SCM str, long modes, const char *caller) { - SCM z; + SCM z, buf; scm_t_port *pt; - size_t str_len; + size_t read_buf_size, num_bytes, c_byte_pos; + char *c_buf; - SCM_ASSERT (SCM_INUMP(pos) && SCM_INUM(pos) >= 0, pos, SCM_ARG1, caller); - SCM_ASSERT (SCM_STRINGP (str), str, SCM_ARG1, caller); - str_len = SCM_STRING_LENGTH (str); - if (SCM_INUM (pos) > str_len) - scm_out_of_range (caller, pos); if (!((modes & SCM_WRTNG) || (modes & SCM_RDNG))) scm_misc_error ("scm_mkstrport", "port must read or write", SCM_EOL); - z = scm_cell (scm_tc16_strport, 0); - SCM_DEFER_INTS; - pt = scm_add_to_port_table (z); - SCM_SET_CELL_TYPE (z, scm_tc16_strport | modes); - SCM_SETPTAB_ENTRY (z, pt); - SCM_SETSTREAM (z, SCM_UNPACK (str)); - pt->write_buf = pt->read_buf = SCM_STRING_UCHARS (str); - pt->read_pos = pt->write_pos = pt->read_buf + SCM_INUM (pos); - pt->write_buf_size = pt->read_buf_size = str_len; - pt->write_end = pt->read_end = pt->read_buf + pt->read_buf_size; - pt->rw_random = 1; + if (scm_is_false (str)) + { + /* Allocate a new buffer to write to. */ + num_bytes = INITIAL_BUFFER_SIZE; + buf = scm_c_make_bytevector (num_bytes); + c_buf = (char *) SCM_BYTEVECTOR_CONTENTS (buf); + + /* Reset `read_buf_size'. It will contain the actual number of + bytes written to the port. */ + read_buf_size = 0; + c_byte_pos = 0; + } + else + { + char *copy; + + SCM_ASSERT (scm_is_string (str), str, SCM_ARG1, caller); + + /* STR is a string. */ + /* Create a copy of STR in UTF-8. */ + copy = scm_to_utf8_stringn (str, &num_bytes); + buf = scm_c_make_bytevector (num_bytes); + c_buf = (char *) SCM_BYTEVECTOR_CONTENTS (buf); + memcpy (c_buf, copy, num_bytes); + free (copy); + + read_buf_size = num_bytes; + + if (scm_is_eq (pos, SCM_INUM0)) + c_byte_pos = 0; + else + /* Inefficient but simple way to convert the character position + POS into a byte position C_BYTE_POS. */ + free (scm_to_utf8_stringn (scm_substring (str, SCM_INUM0, pos), + &c_byte_pos)); + } + + z = scm_c_make_port_with_encoding (scm_tc16_strport, modes, + "UTF-8", + scm_i_default_port_conversion_handler (), + SCM_UNPACK (buf)); - SCM_ALLOW_INTS; + pt = SCM_PTAB_ENTRY (z); + + pt->write_buf = pt->read_buf = (unsigned char *) c_buf; + pt->read_pos = pt->write_pos = pt->read_buf + c_byte_pos; + pt->read_buf_size = read_buf_size; + pt->write_buf_size = num_bytes; + pt->write_end = pt->read_end = pt->read_buf + pt->read_buf_size; + pt->rw_random = 1; - /* ensure write_pos is writable. */ - if ((modes & SCM_WRTNG) && pt->write_pos == pt->write_end) - st_flush (z); return z; } -/* create a new string from a string port's buffer. */ -SCM scm_strport_to_string (SCM port) +/* Create a new string from the buffer of PORT, a string port, converting from + PORT's encoding to the standard string representation. */ +SCM +scm_strport_to_string (SCM port) { scm_t_port *pt = SCM_PTAB_ENTRY (port); - SCM str; - if (pt->rw_active == SCM_PORT_WRITE) - st_flush (port); + if (pt->read_buf_size == 0) + return scm_nullstr; - str = scm_mem2string ((char *) pt->read_buf, pt->read_buf_size); - scm_remember_upto_here_1 (port); - return str; + return scm_from_port_stringn ((char *)pt->read_buf, pt->read_buf_size, port); } SCM_DEFINE (scm_object_to_string, "object->string", 1, 1, 0, @@ -321,53 +329,54 @@ SCM_DEFINE (scm_object_to_string, "object->string", 1, 1, 0, "argument @var{printer} (default: @code{write}).") #define FUNC_NAME s_scm_object_to_string { - SCM str, port; + SCM port, result; if (!SCM_UNBNDP (printer)) SCM_VALIDATE_PROC (2, printer); - str = scm_allocate_string (0); - port = scm_mkstrport (SCM_INUM0, str, SCM_OPN | SCM_WRTNG, FUNC_NAME); + port = scm_mkstrport (SCM_INUM0, SCM_BOOL_F, + SCM_OPN | SCM_WRTNG, FUNC_NAME); if (SCM_UNBNDP (printer)) scm_write (obj, port); else scm_call_2 (printer, obj, port); - return scm_strport_to_string (port); + result = scm_strport_to_string (port); + + /* Explicitly close PORT so that the iconv CDs associated with it are + deallocated right away. This is important because CDs use a lot of + memory that's not visible to the GC, so not freeing them can lead + to almost large heap usage. See + + for details. */ + scm_close_port (port); + + return result; } #undef FUNC_NAME -SCM_DEFINE (scm_call_with_output_string, "call-with-output-string", 1, 0, 0, - (SCM proc), - "Calls the one-argument procedure @var{proc} with a newly created output\n" - "port. When the function returns, the string composed of the characters\n" - "written into the port is returned.") -#define FUNC_NAME s_scm_call_with_output_string +SCM +scm_call_with_output_string (SCM proc) { - SCM p; + static SCM var = SCM_BOOL_F; - p = scm_mkstrport (SCM_INUM0, - scm_make_string (SCM_INUM0, SCM_UNDEFINED), - SCM_OPN | SCM_WRTNG, - FUNC_NAME); - scm_call_1 (proc, p); + if (scm_is_false (var)) + var = scm_c_private_lookup ("guile", "call-with-output-string"); - return scm_strport_to_string (p); + return scm_call_1 (scm_variable_ref (var), proc); } -#undef FUNC_NAME -SCM_DEFINE (scm_call_with_input_string, "call-with-input-string", 2, 0, 0, - (SCM string, SCM proc), - "Calls the one-argument procedure @var{proc} with a newly\n" - "created input port from which @var{string}'s contents may be\n" - "read. The value yielded by the @var{proc} is returned.") -#define FUNC_NAME s_scm_call_with_input_string +SCM +scm_call_with_input_string (SCM string, SCM proc) { - SCM p = scm_mkstrport(SCM_INUM0, string, SCM_OPN | SCM_RDNG, FUNC_NAME); - return scm_call_1 (proc, p); + static SCM var = SCM_BOOL_F; + + if (scm_is_false (var)) + var = scm_c_private_lookup ("guile", "call-with-input-string"); + + return scm_call_2 (scm_variable_ref (var), string, proc); } -#undef FUNC_NAME SCM_DEFINE (scm_open_input_string, "open-input-string", 1, 0, 0, (SCM str), @@ -393,8 +402,7 @@ SCM_DEFINE (scm_open_output_string, "open-output-string", 0, 0, 0, { SCM p; - p = scm_mkstrport (SCM_INUM0, - scm_make_string (SCM_INUM0, SCM_UNDEFINED), + p = scm_mkstrport (SCM_INUM0, SCM_BOOL_F, SCM_OPN | SCM_WRTNG, FUNC_NAME); return p; @@ -420,12 +428,11 @@ SCM scm_c_read_string (const char *expr) { SCM port = scm_mkstrport (SCM_INUM0, - scm_makfrom0str (expr), + scm_from_locale_string (expr), SCM_OPN | SCM_RDNG, "scm_c_read_string"); SCM form; - /* Read expressions from that port; ignore the values. */ form = scm_read (port); scm_close_port (port); @@ -437,51 +444,62 @@ scm_c_read_string (const char *expr) SCM scm_c_eval_string (const char *expr) { - return scm_eval_string (scm_makfrom0str (expr)); + return scm_eval_string (scm_from_locale_string (expr)); } -static SCM -inner_eval_string (void *data) +SCM +scm_c_eval_string_in_module (const char *expr, SCM module) { - SCM port = (SCM)data; - SCM form; - SCM ans = SCM_UNSPECIFIED; + return scm_eval_string_in_module (scm_from_locale_string (expr), module); +} - /* Read expressions from that port; ignore the values. */ - while (!SCM_EOF_OBJECT_P (form = scm_read (port))) - ans = scm_primitive_eval_x (form); - /* Don't close the port here; if we re-enter this function via a - continuation, then the next time we enter it, we'll get an error. - It's a string port anyway, so there's no advantage to closing it - early. */ +static SCM eval_string_var; +static SCM k_module; - return ans; +static void +init_eval_string_var_and_k_module (void) +{ + eval_string_var = scm_c_public_variable ("ice-9 eval-string", "eval-string"); + k_module = scm_from_locale_keyword ("module"); } -SCM_DEFINE (scm_eval_string, "eval-string", 1, 0, 0, - (SCM string), +SCM_DEFINE (scm_eval_string_in_module, "eval-string", 1, 1, 0, + (SCM string, SCM module), "Evaluate @var{string} as the text representation of a Scheme\n" "form or forms, and return whatever value they produce.\n" - "Evaluation takes place in the environment returned by the\n" - "procedure @code{interaction-environment}.") -#define FUNC_NAME s_scm_eval_string + "Evaluation takes place in the given module, or the current\n" + "module when no module is given.\n" + "While the code is evaluated, the given module is made the\n" + "current one. The current module is restored when this\n" + "procedure returns.") +#define FUNC_NAME s_scm_eval_string_in_module { - SCM port = scm_mkstrport (SCM_INUM0, string, SCM_OPN | SCM_RDNG, - "eval-string"); - return scm_c_call_with_current_module (scm_interaction_environment (), - inner_eval_string, (void *)port); + static scm_i_pthread_once_t once = SCM_I_PTHREAD_ONCE_INIT; + scm_i_pthread_once (&once, init_eval_string_var_and_k_module); + + if (SCM_UNBNDP (module)) + module = scm_current_module (); + else + SCM_VALIDATE_MODULE (2, module); + + return scm_call_3 (scm_variable_ref (eval_string_var), + string, k_module, module); } #undef FUNC_NAME +SCM +scm_eval_string (SCM string) +{ + return scm_eval_string_in_module (string, SCM_UNDEFINED); +} + static scm_t_bits scm_make_stptob () { - scm_t_bits tc = scm_make_port_type ("string", stfill_buffer, st_write); + scm_t_bits tc = scm_make_port_type ("string", st_fill_input, st_write); - scm_set_port_mark (tc, scm_markstream); scm_set_port_end_input (tc, st_end_input); - scm_set_port_flush (tc, st_flush); scm_set_port_seek (tc, st_seek); scm_set_port_truncate (tc, st_truncate); @@ -493,9 +511,7 @@ scm_init_strports () { scm_tc16_strport = scm_make_stptob (); -#ifndef SCM_MAGIC_SNARFER #include "libguile/strports.x" -#endif }