X-Git-Url: http://git.hcoop.net/bpt/guile.git/blobdiff_plain/6869328b3b6b6cdd12df86ca517ecf2aa7544940..f45eccffa73c043466a4cc0f5037132ee5795eee:/libguile/strings.c?ds=sidebyside diff --git a/libguile/strings.c b/libguile/strings.c index 6feb017de..4d71fc74d 100644 --- a/libguile/strings.c +++ b/libguile/strings.c @@ -1,22 +1,26 @@ -/* Copyright (C) 1995,1996,1998,2000,2001, 2004 Free Software Foundation, Inc. +/* Copyright (C) 1995,1996,1998,2000,2001, 2004, 2006, 2008, 2009 Free Software Foundation, Inc. * * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. + * modify it under the terms of the GNU Lesser General Public License + * as published by the Free Software Foundation; either version 3 of + * the License, or (at your option) any later version. * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of + * This library is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301 USA */ +#ifdef HAVE_CONFIG_H +# include +#endif #include #include @@ -25,6 +29,7 @@ #include "libguile/chars.h" #include "libguile/root.h" #include "libguile/strings.h" +#include "libguile/generalized-vectors.h" #include "libguile/deprecation.h" #include "libguile/validate.h" #include "libguile/dynwind.h" @@ -122,6 +127,17 @@ make_stringbuf (size_t len) } } +/* Return a new stringbuf whose underlying storage consists of the LEN+1 + octets pointed to by STR (the last octet is zero). */ +SCM +scm_i_take_stringbufn (char *str, size_t len) +{ + scm_gc_register_collectable_memory (str, len + 1, "stringbuf"); + + return scm_double_cell (STRINGBUF_TAG, (scm_t_bits) str, + (scm_t_bits) len, (scm_t_bits) 0); +} + SCM scm_i_stringbuf_mark (SCM buf) { @@ -136,7 +152,7 @@ scm_i_stringbuf_free (SCM buf) STRINGBUF_OUTLINE_LENGTH (buf) + 1, "string"); } -SCM_MUTEX (stringbuf_write_mutex); +scm_i_pthread_mutex_t stringbuf_write_mutex = SCM_I_PTHREAD_MUTEX_INITIALIZER; /* Copy-on-write strings. */ @@ -152,6 +168,12 @@ SCM_MUTEX (stringbuf_write_mutex); #define IS_STRING(str) (SCM_NIMP(str) && SCM_TYP7(str) == STRING_TAG) +/* Read-only strings. + */ + +#define RO_STRING_TAG (scm_tc7_string + 0x200) +#define IS_RO_STRING(str) (SCM_CELL_TYPE(str)==RO_STRING_TAG) + /* Mutation-sharing substrings */ @@ -203,22 +225,36 @@ scm_i_substring (SCM str, size_t start, size_t end) SCM buf; size_t str_start; get_str_buf_start (&str, &buf, &str_start); - scm_i_plugin_mutex_lock (&stringbuf_write_mutex); + scm_i_pthread_mutex_lock (&stringbuf_write_mutex); SET_STRINGBUF_SHARED (buf); - scm_i_plugin_mutex_unlock (&stringbuf_write_mutex); + scm_i_pthread_mutex_unlock (&stringbuf_write_mutex); return scm_double_cell (STRING_TAG, SCM_UNPACK(buf), (scm_t_bits)str_start + start, (scm_t_bits) end - start); } +SCM +scm_i_substring_read_only (SCM str, size_t start, size_t end) +{ + SCM buf; + size_t str_start; + get_str_buf_start (&str, &buf, &str_start); + scm_i_pthread_mutex_lock (&stringbuf_write_mutex); + SET_STRINGBUF_SHARED (buf); + scm_i_pthread_mutex_unlock (&stringbuf_write_mutex); + return scm_double_cell (RO_STRING_TAG, SCM_UNPACK(buf), + (scm_t_bits)str_start + start, + (scm_t_bits) end - start); +} + SCM scm_i_substring_copy (SCM str, size_t start, size_t end) { size_t len = end - start; - SCM buf; + SCM buf, my_buf; size_t str_start; get_str_buf_start (&str, &buf, &str_start); - SCM my_buf = make_stringbuf (len); + my_buf = make_stringbuf (len); memcpy (STRINGBUF_CHARS (my_buf), STRINGBUF_CHARS (buf) + str_start + start, len); scm_remember_upto_here_1 (buf); @@ -251,6 +287,13 @@ scm_c_substring (SCM str, size_t start, size_t end) return scm_i_substring (str, start, end); } +SCM +scm_c_substring_read_only (SCM str, size_t start, size_t end) +{ + validate_substring_args (str, start, end); + return scm_i_substring_read_only (str, start, end); +} + SCM scm_c_substring_copy (SCM str, size_t start, size_t end) { @@ -298,12 +341,16 @@ scm_i_string_chars (SCM str) } char * -scm_i_string_writable_chars (SCM str) +scm_i_string_writable_chars (SCM orig_str) { - SCM buf; + SCM buf, str = orig_str; size_t start; + get_str_buf_start (&str, &buf, &start); - scm_i_plugin_mutex_lock (&stringbuf_write_mutex); + if (IS_RO_STRING (str)) + scm_misc_error (NULL, "string is read-only: ~s", scm_list_1 (orig_str)); + + scm_i_pthread_mutex_lock (&stringbuf_write_mutex); if (STRINGBUF_SHARED (buf)) { /* Clone stringbuf. For this, we put all threads to sleep. @@ -312,7 +359,7 @@ scm_i_string_writable_chars (SCM str) size_t len = STRING_LENGTH (str); SCM new_buf; - scm_i_plugin_mutex_unlock (&stringbuf_write_mutex); + scm_i_pthread_mutex_unlock (&stringbuf_write_mutex); new_buf = make_stringbuf (len); memcpy (STRINGBUF_CHARS (new_buf), @@ -326,7 +373,7 @@ scm_i_string_writable_chars (SCM str) buf = new_buf; - scm_i_plugin_mutex_lock (&stringbuf_write_mutex); + scm_i_pthread_mutex_lock (&stringbuf_write_mutex); } return STRINGBUF_CHARS (buf) + start; @@ -335,7 +382,7 @@ scm_i_string_writable_chars (SCM str) void scm_i_string_stop_writing (void) { - scm_i_plugin_mutex_unlock (&stringbuf_write_mutex); + scm_i_pthread_mutex_unlock (&stringbuf_write_mutex); } /* Symbols. @@ -365,9 +412,9 @@ scm_i_make_symbol (SCM name, scm_t_bits flags, if (start == 0 && length == STRINGBUF_LENGTH (buf)) { /* reuse buf. */ - scm_i_plugin_mutex_lock (&stringbuf_write_mutex); + scm_i_pthread_mutex_lock (&stringbuf_write_mutex); SET_STRINGBUF_SHARED (buf); - scm_i_plugin_mutex_unlock (&stringbuf_write_mutex); + scm_i_pthread_mutex_unlock (&stringbuf_write_mutex); } else { @@ -381,12 +428,45 @@ scm_i_make_symbol (SCM name, scm_t_bits flags, (scm_t_bits) hash, SCM_UNPACK (props)); } +SCM +scm_i_c_make_symbol (const char *name, size_t len, + scm_t_bits flags, unsigned long hash, SCM props) +{ + SCM buf = make_stringbuf (len); + memcpy (STRINGBUF_CHARS (buf), name, len); + + return scm_double_cell (scm_tc7_symbol | flags, SCM_UNPACK (buf), + (scm_t_bits) hash, SCM_UNPACK (props)); +} + +/* Return a new symbol that uses the LEN bytes pointed to by NAME as its + underlying storage. */ +SCM +scm_i_c_take_symbol (char *name, size_t len, + scm_t_bits flags, unsigned long hash, SCM props) +{ + SCM buf = scm_i_take_stringbufn (name, len); + + return scm_double_cell (scm_tc7_symbol | flags, SCM_UNPACK (buf), + (scm_t_bits) hash, SCM_UNPACK (props)); +} + size_t scm_i_symbol_length (SCM sym) { return STRINGBUF_LENGTH (SYMBOL_STRINGBUF (sym)); } +size_t +scm_c_symbol_length (SCM sym) +#define FUNC_NAME "scm_c_symbol_length" +{ + SCM_VALIDATE_SYMBOL (1, sym); + + return STRINGBUF_LENGTH (SYMBOL_STRINGBUF (sym)); +} +#undef FUNC_NAME + const char * scm_i_symbol_chars (SCM sym) { @@ -410,10 +490,10 @@ SCM scm_i_symbol_substring (SCM sym, size_t start, size_t end) { SCM buf = SYMBOL_STRINGBUF (sym); - scm_i_plugin_mutex_lock (&stringbuf_write_mutex); + scm_i_pthread_mutex_lock (&stringbuf_write_mutex); SET_STRINGBUF_SHARED (buf); - scm_i_plugin_mutex_unlock (&stringbuf_write_mutex); - return scm_double_cell (STRING_TAG, SCM_UNPACK(buf), + scm_i_pthread_mutex_unlock (&stringbuf_write_mutex); + return scm_double_cell (RO_STRING_TAG, SCM_UNPACK (buf), (scm_t_bits)start, (scm_t_bits) end - start); } @@ -521,7 +601,7 @@ SCM_DEFINE (scm_string, "string", 0, 0, 1, } result = scm_i_make_string (len, &data); - while (len > 0 && SCM_CONSP (chrs)) + while (len > 0 && scm_is_pair (chrs)) { SCM elt = SCM_CAR (chrs); @@ -532,7 +612,7 @@ SCM_DEFINE (scm_string, "string", 0, 0, 1, } if (len > 0) scm_misc_error (NULL, "list changed while constructing string", SCM_EOL); - if (!SCM_NULLP (chrs)) + if (!scm_is_null (chrs)) scm_wrong_type_arg_msg (NULL, 0, chrs, "proper list"); return result; @@ -592,10 +672,17 @@ SCM_DEFINE (scm_string_ref, "string-ref", 2, 0, 0, "indexing. @var{k} must be a valid index of @var{str}.") #define FUNC_NAME s_scm_string_ref { + size_t len; unsigned long idx; SCM_VALIDATE_STRING (1, str); - idx = scm_to_unsigned_integer (k, 0, scm_i_string_length (str)-1); + + len = scm_i_string_length (str); + if (SCM_LIKELY (len > 0)) + idx = scm_to_unsigned_integer (k, 0, len - 1); + else + scm_out_of_range (NULL, k); + return SCM_MAKE_CHAR (scm_i_string_chars (str)[idx]); } #undef FUNC_NAME @@ -615,10 +702,17 @@ SCM_DEFINE (scm_string_set_x, "string-set!", 3, 0, 0, "@var{str}.") #define FUNC_NAME s_scm_string_set_x { + size_t len; unsigned long idx; SCM_VALIDATE_STRING (1, str); - idx = scm_to_unsigned_integer (k, 0, scm_i_string_length(str)-1); + + len = scm_i_string_length (str); + if (SCM_LIKELY (len > 0)) + idx = scm_to_unsigned_integer (k, 0, len - 1); + else + scm_out_of_range (NULL, k); + SCM_VALIDATE_CHAR (3, chr); { char *dst = scm_i_string_writable_chars (str); @@ -664,6 +758,32 @@ SCM_DEFINE (scm_substring, "substring", 2, 1, 0, } #undef FUNC_NAME +SCM_DEFINE (scm_substring_read_only, "substring/read-only", 2, 1, 0, + (SCM str, SCM start, SCM end), + "Return a newly allocated string formed from the characters\n" + "of @var{str} beginning with index @var{start} (inclusive) and\n" + "ending with index @var{end} (exclusive).\n" + "@var{str} must be a string, @var{start} and @var{end} must be\n" + "exact integers satisfying:\n" + "\n" + "0 <= @var{start} <= @var{end} <= (string-length @var{str}).\n" + "\n" + "The returned string is read-only.\n") +#define FUNC_NAME s_scm_substring_read_only +{ + size_t len, from, to; + + SCM_VALIDATE_STRING (1, str); + len = scm_i_string_length (str); + from = scm_to_unsigned_integer (start, 0, len); + if (SCM_UNBNDP (end)) + to = len; + else + to = scm_to_unsigned_integer (end, from, len); + return scm_i_substring_read_only (str, from, to); +} +#undef FUNC_NAME + SCM_DEFINE (scm_substring_copy, "substring/copy", 2, 1, 0, (SCM str, SCM start, SCM end), "Return a newly allocated string formed from the characters\n" @@ -723,18 +843,19 @@ SCM_DEFINE (scm_string_append, "string-append", 0, 0, 1, char *data; SCM_VALIDATE_REST_ARGUMENT (args); - for (l = args; !SCM_NULLP (l); l = SCM_CDR (l)) + for (l = args; !scm_is_null (l); l = SCM_CDR (l)) { s = SCM_CAR (l); SCM_VALIDATE_STRING (SCM_ARGn, s); i += scm_i_string_length (s); } res = scm_i_make_string (i, &data); - for (l = args; !SCM_NULLP (l); l = SCM_CDR (l)) + for (l = args; !scm_is_null (l); l = SCM_CDR (l)) { + size_t len; s = SCM_CAR (l); SCM_VALIDATE_STRING (SCM_ARGn, s); - size_t len = scm_i_string_length (s); + len = scm_i_string_length (s); memcpy (data, scm_i_string_chars (s), len); data += len; scm_remember_upto_here_1 (s); @@ -771,32 +892,30 @@ scm_from_locale_string (const char *str) SCM scm_take_locale_stringn (char *str, size_t len) { + SCM buf, res; + if (len == (size_t)-1) - return scm_take_locale_string (str); + len = strlen (str); else { - /* STR might not be zero terminated and we are not allowed to - look at str[len], so we have to make a new one... - */ - SCM res = scm_from_locale_stringn (str, len); - free (str); - return res; + /* Ensure STR is null terminated. A realloc for 1 extra byte should + often be satisfied from the alignment padding after the block, with + no actual data movement. */ + str = scm_realloc (str, len+1); + str[len] = '\0'; } + + buf = scm_i_take_stringbufn (str, len); + res = scm_double_cell (STRING_TAG, + SCM_UNPACK (buf), + (scm_t_bits) 0, (scm_t_bits) len); + return res; } SCM scm_take_locale_string (char *str) { - size_t len = strlen (str); - SCM buf, res; - - buf = scm_double_cell (STRINGBUF_TAG, (scm_t_bits) str, - (scm_t_bits) len, (scm_t_bits) 0); - res = scm_double_cell (STRING_TAG, - SCM_UNPACK (buf), - (scm_t_bits) 0, (scm_t_bits) len); - scm_gc_register_collectable_memory (str, len+1, "string"); - return res; + return scm_take_locale_stringn (str, -1); } char * @@ -874,22 +993,22 @@ scm_i_allocate_string_pointers (SCM list) if (len < 0) scm_wrong_type_arg_msg (NULL, 0, list, "proper list"); - scm_frame_begin (0); + scm_dynwind_begin (0); result = (char **) scm_malloc ((len + 1) * sizeof (char *)); result[len] = NULL; - scm_frame_unwind_handler (free, result, 0); + scm_dynwind_unwind_handler (free, result, 0); /* The list might be have been modified in another thread, so we check LIST before each access. */ - for (i = 0; i < len && SCM_CONSP (list); i++) + for (i = 0; i < len && scm_is_pair (list); i++) { result[i] = scm_to_locale_string (SCM_CAR (list)); list = SCM_CDR (list); } - scm_frame_end (); + scm_dynwind_end (); return result; } @@ -921,28 +1040,47 @@ scm_i_get_substring_spec (size_t len, #if SCM_ENABLE_DEPRECATED +/* When these definitions are removed, it becomes reasonable to use + read-only strings for string literals. For that, change the reader + to create string literals with scm_c_substring_read_only instead of + with scm_c_substring_copy. +*/ + int -SCM_STRINGP (SCM str) +scm_i_deprecated_stringp (SCM str) { scm_c_issue_deprecation_warning ("SCM_STRINGP is deprecated. Use scm_is_string instead."); - /* We don't accept shared substrings here since they are not - null-terminated. - */ - - return IS_STRING (str) && !IS_SH_STRING (str); + return scm_is_string (str); } char * -SCM_STRING_CHARS (SCM str) +scm_i_deprecated_string_chars (SCM str) { char *chars; scm_c_issue_deprecation_warning ("SCM_STRING_CHARS is deprecated. See the manual for alternatives."); - /* The following is wrong, of course... + /* We don't accept shared substrings here since they are not + null-terminated. + */ + if (IS_SH_STRING (str)) + scm_misc_error (NULL, + "SCM_STRING_CHARS does not work with shared substrings.", + SCM_EOL); + + /* We explicitly test for read-only strings to produce a better + error message. + */ + + if (IS_RO_STRING (str)) + scm_misc_error (NULL, + "SCM_STRING_CHARS does not work with read-only strings.", + SCM_EOL); + + /* The following is still wrong, of course... */ chars = scm_i_string_writable_chars (str); scm_i_string_stop_writing (); @@ -950,7 +1088,7 @@ SCM_STRING_CHARS (SCM str) } size_t -SCM_STRING_LENGTH (SCM str) +scm_i_deprecated_string_length (SCM str) { scm_c_issue_deprecation_warning ("SCM_STRING_LENGTH is deprecated. Use scm_c_string_length instead."); @@ -959,6 +1097,36 @@ SCM_STRING_LENGTH (SCM str) #endif +static SCM +string_handle_ref (scm_t_array_handle *h, size_t index) +{ + return scm_c_string_ref (h->array, index); +} + +static void +string_handle_set (scm_t_array_handle *h, size_t index, SCM val) +{ + scm_c_string_set_x (h->array, index, val); +} + +static void +string_get_handle (SCM v, scm_t_array_handle *h) +{ + h->array = v; + h->ndims = 1; + h->dims = &h->dim0; + h->dim0.lbnd = 0; + h->dim0.ubnd = scm_c_string_length (v) - 1; + h->dim0.inc = 1; + h->element_type = SCM_ARRAY_ELEMENT_TYPE_CHAR; + h->elements = h->writable_elements = NULL; +} + +SCM_ARRAY_IMPLEMENTATION (scm_tc7_string, 0x7f & ~2, + string_handle_ref, string_handle_set, + string_get_handle); +SCM_VECTOR_IMPLEMENTATION (SCM_ARRAY_ELEMENT_TYPE_CHAR, scm_make_string); + void scm_init_strings () {