X-Git-Url: https://git.hcoop.net/bpt/guile.git/blobdiff_plain/574b7be0ba5dbbecfacf172ed81a5f22d1d5566e..7f3be1db9949b0566d3a2cb6bd9d0e84287bbb0a:/libguile/strports.c diff --git a/libguile/strports.c b/libguile/strports.c index 64987fabc..d1b293c21 100644 --- a/libguile/strports.c +++ b/libguile/strports.c @@ -1,5 +1,6 @@ -/* Copyright (C) 1995,1996,1998,1999,2000,2001,2002, 2003, 2005, 2006, 2009, 2010, 2011 Free Software Foundation, Inc. - * +/* Copyright (C) 1995, 1996, 1998, 1999, 2000, 2001, 2002, 2003, 2005, 2006, + * 2009, 2010, 2011, 2012 Free Software Foundation, Inc. + * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public License * as published by the Free Software Foundation; either version 3 of @@ -30,7 +31,7 @@ #include #endif -#include "libguile/arrays.h" +#include "libguile/bytevectors.h" #include "libguile/eval.h" #include "libguile/ports.h" #include "libguile/read.h" @@ -55,15 +56,8 @@ /* NOTES: - We break the rules set forth by strings.h about accessing the - internals of strings here. We can do this since we can guarantee - that the string used as pt->stream is not in use by anyone else. - Thus, it's representation will not change asynchronously. - - (Ports aren't thread-safe yet anyway...) - - write_buf/write_end point to the ends of the allocated string. - read_buf/read_end in principle point to the part of the string which + write_buf/write_end point to the ends of the allocated bytevector. + read_buf/read_end in principle point to the part of the bytevector which has been written to, but this is only updated after a flush. read_pos and write_pos in principle should be equal, but this is only true when rw_active is SCM_PORT_NEITHER. @@ -106,25 +100,23 @@ stfill_buffer (SCM port) return scm_return_first_int (*pt->read_pos, port); } -/* change the size of a port's string to new_size. this doesn't - change read_buf_size. */ -static void +/* Change the size of a port's bytevector to NEW_SIZE. This doesn't + change `read_buf_size'. */ +static void st_resize_port (scm_t_port *pt, scm_t_off new_size) { SCM old_stream = SCM_PACK (pt->stream); - const char *src = scm_i_string_chars (old_stream); - char *dst; - SCM new_stream = scm_i_make_string (new_size, &dst); - unsigned long int old_size = scm_i_string_length (old_stream); + const signed char *src = SCM_BYTEVECTOR_CONTENTS (old_stream); + SCM new_stream = scm_c_make_bytevector (new_size); + signed char *dst = SCM_BYTEVECTOR_CONTENTS (new_stream); + unsigned long int old_size = SCM_BYTEVECTOR_LENGTH (old_stream); unsigned long int min_size = min (old_size, new_size); - unsigned long int i; scm_t_off index = pt->write_pos - pt->write_buf; pt->write_buf_size = new_size; - for (i = 0; i != min_size; ++i) - dst[i] = src[i]; + memcpy (dst, src, min_size); scm_remember_upto_here_1 (old_stream); @@ -138,27 +130,17 @@ st_resize_port (scm_t_port *pt, scm_t_off new_size) } } -/* amount by which write_buf is expanded. */ -#define SCM_WRITE_BLOCK 80 - -/* ensure that write_pos < write_end by enlarging the buffer when - necessary. update read_buf to account for written chars. - - The buffer is enlarged by 1.5 times, plus SCM_WRITE_BLOCK. Adding just a - fixed amount is no good, because there's a block copy for each increment, - and that copying would take quadratic time. In the past it was found to - be very slow just adding 80 bytes each time (eg. about 10 seconds for - writing a 100kbyte string). */ - +/* Ensure that `write_pos' < `write_end' by enlarging the buffer when + necessary. Update `read_buf' to account for written chars. The + buffer is enlarged geometrically. */ static void st_flush (SCM port) { scm_t_port *pt = SCM_PTAB_ENTRY (port); if (pt->write_pos == pt->write_end) - { - st_resize_port (pt, pt->write_buf_size * 3 / 2 + SCM_WRITE_BLOCK); - } + st_resize_port (pt, pt->write_buf_size * 2); + pt->read_pos = pt->write_pos; if (pt->read_pos > pt->read_end) { @@ -255,12 +237,8 @@ st_seek (SCM port, scm_t_off offset, int whence) SCM_EOL); } } - else - { - st_resize_port (pt, target + (target == pt->write_buf_size - ? SCM_WRITE_BLOCK - : 0)); - } + else if (target == pt->write_buf_size) + st_resize_port (pt, target * 2); } pt->read_pos = pt->write_pos = pt->read_buf + target; if (pt->read_pos > pt->read_end) @@ -289,16 +267,19 @@ st_truncate (SCM port, scm_t_off length) pt->write_pos = pt->read_end; } +/* The initial size in bytes of a string port's buffer. */ +#define INITIAL_BUFFER_SIZE 128 + +/* Return a new string port with MODES. If STR is #f, a new backing + buffer is allocated; otherwise STR must be a string and a copy of it + serves as the buffer for the new port. */ SCM scm_mkstrport (SCM pos, SCM str, long modes, const char *caller) { - SCM z; + SCM z, buf; scm_t_port *pt; size_t str_len, c_pos; - char *buf, *c_str; - - SCM_ASSERT (scm_is_string (str), str, SCM_ARG1, caller); - c_pos = scm_to_unsigned_integer (pos, 0, scm_i_string_length (str)); + char *c_buf; if (!((modes & SCM_WRTNG) || (modes & SCM_RDNG))) scm_misc_error ("scm_mkstrport", "port must read or write", SCM_EOL); @@ -307,31 +288,71 @@ scm_mkstrport (SCM pos, SCM str, long modes, const char *caller) scm_i_dynwind_pthread_mutex_lock (&scm_i_port_table_mutex); z = scm_new_port_table_entry (scm_tc16_strport); - pt = SCM_PTAB_ENTRY(z); - SCM_SETSTREAM (z, SCM_UNPACK (str)); + SCM_SET_CELL_TYPE (z, scm_tc16_strport); + pt = SCM_PTAB_ENTRY (z); + + /* Make PT initially empty, and release the port-table mutex + immediately. This is so that if one of the function calls below + raises an exception, a pre-unwind catch handler can still create + new ports; for instance, `display-backtrace' needs to be able to + allocate a new string port. See . */ + scm_port_non_buffer (pt); + SCM_SETSTREAM (z, SCM_UNPACK (scm_null_bytevector)); + + scm_dynwind_end (); + + if (scm_is_false (str)) + { + /* Allocate a new buffer to write to. */ + str_len = INITIAL_BUFFER_SIZE; + buf = scm_c_make_bytevector (str_len); + c_buf = (char *) SCM_BYTEVECTOR_CONTENTS (buf); + c_pos = 0; + } + else + { + /* STR is a string. */ + char *copy; + + SCM_ASSERT (scm_is_string (str), str, SCM_ARG1, caller); + + /* Create a copy of STR in the encoding of PT. */ + copy = scm_to_stringn (str, &str_len, pt->encoding, + SCM_FAILED_CONVERSION_ERROR); + buf = scm_c_make_bytevector (str_len); + c_buf = (char *) SCM_BYTEVECTOR_CONTENTS (buf); + memcpy (c_buf, copy, str_len); + free (copy); + + c_pos = scm_to_unsigned_integer (pos, 0, str_len); + } + + /* Now, finish up the port. */ + scm_i_pthread_mutex_lock (&scm_i_port_table_mutex); + + SCM_SETSTREAM (z, SCM_UNPACK (buf)); SCM_SET_CELL_TYPE (z, scm_tc16_strport | modes); - /* Create a copy of STR in the encoding of Z. */ - buf = scm_to_stringn (str, &str_len, pt->encoding, - SCM_FAILED_CONVERSION_ERROR); - c_str = scm_gc_malloc_pointerless (str_len, "strport"); - memcpy (c_str, buf, str_len); - free (buf); + if (scm_is_false (str)) + /* Reset `read_buf_size'. It will contain the actual number of + bytes written to PT. */ + pt->read_buf_size = 0; + else + pt->read_buf_size = str_len; - pt->write_buf = pt->read_buf = (unsigned char *) c_str; + pt->write_buf = pt->read_buf = (unsigned char *) c_buf; pt->read_pos = pt->write_pos = pt->read_buf + c_pos; - pt->write_buf_size = pt->read_buf_size = str_len; + pt->write_buf_size = str_len; pt->write_end = pt->read_end = pt->read_buf + pt->read_buf_size; pt->rw_random = 1; - scm_dynwind_end (); + scm_i_pthread_mutex_unlock (&scm_i_port_table_mutex); /* Ensure WRITE_POS is writable. */ if ((modes & SCM_WRTNG) && pt->write_pos == pt->write_end) st_flush (z); - scm_i_set_conversion_strategy_x (z, SCM_FAILED_CONVERSION_ERROR); return z; } @@ -352,7 +373,7 @@ scm_strport_to_string (SCM port) if (pt->encoding == NULL) { char *buf; - str = scm_i_make_string (pt->read_buf_size, &buf); + str = scm_i_make_string (pt->read_buf_size, &buf, 0); memcpy (buf, pt->read_buf, pt->read_buf_size); } else @@ -369,20 +390,30 @@ SCM_DEFINE (scm_object_to_string, "object->string", 1, 1, 0, "argument @var{printer} (default: @code{write}).") #define FUNC_NAME s_scm_object_to_string { - SCM str, port; + SCM port, result; if (!SCM_UNBNDP (printer)) SCM_VALIDATE_PROC (2, printer); - str = scm_c_make_string (0, SCM_UNDEFINED); - port = scm_mkstrport (SCM_INUM0, str, SCM_OPN | SCM_WRTNG, FUNC_NAME); + port = scm_mkstrport (SCM_INUM0, SCM_BOOL_F, + SCM_OPN | SCM_WRTNG, FUNC_NAME); if (SCM_UNBNDP (printer)) scm_write (obj, port); else scm_call_2 (printer, obj, port); - return scm_strport_to_string (port); + result = scm_strport_to_string (port); + + /* Explicitly close PORT so that the iconv CDs associated with it are + deallocated right away. This is important because CDs use a lot of + memory that's not visible to the GC, so not freeing them can lead + to almost large heap usage. See + + for details. */ + scm_close_port (port); + + return result; } #undef FUNC_NAME @@ -395,8 +426,7 @@ SCM_DEFINE (scm_call_with_output_string, "call-with-output-string", 1, 0, 0, { SCM p; - p = scm_mkstrport (SCM_INUM0, - scm_make_string (SCM_INUM0, SCM_UNDEFINED), + p = scm_mkstrport (SCM_INUM0, SCM_BOOL_F, SCM_OPN | SCM_WRTNG, FUNC_NAME); scm_call_1 (proc, p); @@ -441,8 +471,7 @@ SCM_DEFINE (scm_open_output_string, "open-output-string", 0, 0, 0, { SCM p; - p = scm_mkstrport (SCM_INUM0, - scm_make_string (SCM_INUM0, SCM_UNDEFINED), + p = scm_mkstrport (SCM_INUM0, SCM_BOOL_F, SCM_OPN | SCM_WRTNG, FUNC_NAME); return p; @@ -467,15 +496,12 @@ SCM_DEFINE (scm_get_output_string, "get-output-string", 1, 0, 0, SCM scm_c_read_string (const char *expr) { - /* FIXME: the c string gets packed into a string, only to get - immediately unpacked in scm_mkstrport. */ SCM port = scm_mkstrport (SCM_INUM0, scm_from_locale_string (expr), SCM_OPN | SCM_RDNG, "scm_c_read_string"); SCM form; - /* Read expressions from that port; ignore the values. */ form = scm_read (port); scm_close_port (port); @@ -497,25 +523,6 @@ scm_c_eval_string_in_module (const char *expr, SCM module) } -static SCM -inner_eval_string (void *data) -{ - SCM port = (SCM)data; - SCM form; - SCM ans = SCM_UNSPECIFIED; - - /* Read expressions from that port; ignore the values. */ - while (!SCM_EOF_OBJECT_P (form = scm_read (port))) - ans = scm_primitive_eval_x (form); - - /* Don't close the port here; if we re-enter this function via a - continuation, then the next time we enter it, we'll get an error. - It's a string port anyway, so there's no advantage to closing it - early. */ - - return ans; -} - SCM_DEFINE (scm_eval_string_in_module, "eval-string", 1, 1, 0, (SCM string, SCM module), "Evaluate @var{string} as the text representation of a Scheme\n" @@ -527,14 +534,23 @@ SCM_DEFINE (scm_eval_string_in_module, "eval-string", 1, 1, 0, "procedure returns.") #define FUNC_NAME s_scm_eval_string_in_module { - SCM port = scm_mkstrport (SCM_INUM0, string, SCM_OPN | SCM_RDNG, - FUNC_NAME); + static SCM eval_string = SCM_UNDEFINED, k_module = SCM_UNDEFINED; + static scm_i_pthread_mutex_t init_mutex = SCM_I_PTHREAD_MUTEX_INITIALIZER; + + scm_i_scm_pthread_mutex_lock (&init_mutex); + if (SCM_UNBNDP (eval_string)) + { + eval_string = scm_c_public_variable ("ice-9 eval-string", "eval-string"); + k_module = scm_from_locale_keyword ("module"); + } + scm_i_pthread_mutex_unlock (&init_mutex); + if (SCM_UNBNDP (module)) module = scm_current_module (); else SCM_VALIDATE_MODULE (2, module); - return scm_c_call_with_current_module (module, - inner_eval_string, (void *)port); + + return scm_call_3 (scm_variable_ref (eval_string), string, k_module, module); } #undef FUNC_NAME