X-Git-Url: https://git.hcoop.net/bpt/guile.git/blobdiff_plain/29553c54b597880d329013c6b4b435e2949a9872..574b7be0ba5dbbecfacf172ed81a5f22d1d5566e:/libguile/bytevectors.c diff --git a/libguile/bytevectors.c b/libguile/bytevectors.c index e0998197c..f01469774 100644 --- a/libguile/bytevectors.c +++ b/libguile/bytevectors.c @@ -1,4 +1,4 @@ -/* Copyright (C) 2009 Free Software Foundation, Inc. +/* Copyright (C) 2009, 2010, 2011 Free Software Foundation, Inc. * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public License @@ -40,6 +40,7 @@ #include #include #include +#include #ifdef HAVE_LIMITS_H # include @@ -130,7 +131,7 @@ SCM_VALIDATE_SYMBOL (3, endianness); \ \ { \ - _sign long c_value; \ + scm_t_signed_bits c_value; \ INT_TYPE (_len, _sign) c_value_short; \ \ if (SCM_UNLIKELY (!SCM_I_INUMP (value))) \ @@ -155,7 +156,7 @@ INTEGER_ACCESSOR_PROLOGUE (_len, _sign); \ \ { \ - _sign long c_value; \ + scm_t_signed_bits c_value; \ INT_TYPE (_len, _sign) c_value_short; \ \ if (SCM_UNLIKELY (!SCM_I_INUMP (value))) \ @@ -181,13 +182,21 @@ #define SCM_BYTEVECTOR_SET_LENGTH(_bv, _len) \ SCM_SET_CELL_WORD_1 ((_bv), (scm_t_bits) (_len)) - -#define SCM_BYTEVECTOR_SET_ELEMENT_TYPE(bv, hint) \ - SCM_SET_BYTEVECTOR_FLAGS ((bv), (hint)) +#define SCM_BYTEVECTOR_SET_CONTENTS(_bv, _contents) \ + SCM_SET_CELL_WORD_2 ((_bv), (scm_t_bits) (_contents)) +#define SCM_BYTEVECTOR_SET_CONTIGUOUS_P(bv, contiguous_p) \ + SCM_SET_BYTEVECTOR_FLAGS ((bv), \ + SCM_BYTEVECTOR_ELEMENT_TYPE (bv) \ + | ((contiguous_p) << 8UL)) + +#define SCM_BYTEVECTOR_SET_ELEMENT_TYPE(bv, hint) \ + SCM_SET_BYTEVECTOR_FLAGS ((bv), \ + (hint) \ + | (SCM_BYTEVECTOR_CONTIGUOUS_P (bv) << 8UL)) #define SCM_BYTEVECTOR_TYPE_SIZE(var) \ (scm_i_array_element_type_sizes[SCM_BYTEVECTOR_ELEMENT_TYPE (var)]/8) #define SCM_BYTEVECTOR_TYPED_LENGTH(var) \ - SCM_BYTEVECTOR_LENGTH (var) / SCM_BYTEVECTOR_TYPE_SIZE (var) + (SCM_BYTEVECTOR_LENGTH (var) / SCM_BYTEVECTOR_TYPE_SIZE (var)) /* The empty bytevector. */ SCM scm_null_bytevector = SCM_UNSPECIFIED; @@ -211,13 +220,18 @@ make_bytevector (size_t len, scm_t_array_element_type element_type) ret = scm_null_bytevector; else { + signed char *contents; + c_len = len * (scm_i_array_element_type_sizes[element_type] / 8); - ret = PTR2SCM (scm_gc_malloc_pointerless (SCM_BYTEVECTOR_HEADER_BYTES + c_len, - SCM_GC_BYTEVECTOR)); + contents = scm_gc_malloc_pointerless (SCM_BYTEVECTOR_HEADER_BYTES + c_len, + SCM_GC_BYTEVECTOR); + ret = PTR2SCM (contents); + contents += SCM_BYTEVECTOR_HEADER_BYTES; - SCM_SET_CELL_TYPE (ret, scm_tc7_bytevector); SCM_BYTEVECTOR_SET_LENGTH (ret, c_len); + SCM_BYTEVECTOR_SET_CONTENTS (ret, contents); + SCM_BYTEVECTOR_SET_CONTIGUOUS_P (ret, 1); SCM_BYTEVECTOR_SET_ELEMENT_TYPE (ret, element_type); } @@ -225,28 +239,29 @@ make_bytevector (size_t len, scm_t_array_element_type element_type) } /* Return a bytevector of LEN elements of type ELEMENT_TYPE, with element - values taken from CONTENTS. */ + values taken from CONTENTS. Assume that the storage for CONTENTS will be + automatically reclaimed when it becomes unreachable. */ static inline SCM make_bytevector_from_buffer (size_t len, void *contents, scm_t_array_element_type element_type) { SCM ret; - /* We actually never reuse storage from CONTENTS. Hans Boehm says in - that realloc(3) "shouldn't have been invented" and he may well - be right. */ - ret = make_bytevector (len, element_type); - - if (len > 0) + if (SCM_UNLIKELY (len == 0)) + ret = make_bytevector (len, element_type); + else { size_t c_len; + ret = PTR2SCM (scm_gc_malloc (SCM_BYTEVECTOR_HEADER_BYTES, + SCM_GC_BYTEVECTOR)); + c_len = len * (scm_i_array_element_type_sizes[element_type] / 8); - memcpy (SCM_BYTEVECTOR_CONTENTS (ret), - contents, - c_len); - scm_gc_free (contents, c_len, SCM_GC_BYTEVECTOR); + SCM_BYTEVECTOR_SET_LENGTH (ret, c_len); + SCM_BYTEVECTOR_SET_CONTENTS (ret, contents); + SCM_BYTEVECTOR_SET_CONTIGUOUS_P (ret, 0); + SCM_BYTEVECTOR_SET_ELEMENT_TYPE (ret, element_type); } return ret; @@ -300,11 +315,21 @@ scm_c_shrink_bytevector (SCM bv, size_t c_new_len) SCM_BYTEVECTOR_SET_LENGTH (bv, c_new_len); - /* Resize the existing buffer. */ - new_bv = PTR2SCM (scm_gc_realloc (SCM2PTR (bv), - c_len + SCM_BYTEVECTOR_HEADER_BYTES, - c_new_len + SCM_BYTEVECTOR_HEADER_BYTES, - SCM_GC_BYTEVECTOR)); + if (SCM_BYTEVECTOR_CONTIGUOUS_P (bv)) + new_bv = PTR2SCM (scm_gc_realloc (SCM2PTR (bv), + c_len + SCM_BYTEVECTOR_HEADER_BYTES, + c_new_len + SCM_BYTEVECTOR_HEADER_BYTES, + SCM_GC_BYTEVECTOR)); + else + { + signed char *c_bv; + + c_bv = scm_gc_realloc (SCM_BYTEVECTOR_CONTENTS (bv), + c_len, c_new_len, SCM_GC_BYTEVECTOR); + SCM_BYTEVECTOR_SET_CONTENTS (bv, c_bv); + + new_bv = bv; + } return new_bv; } @@ -457,6 +482,8 @@ SCM_DEFINE (scm_make_bytevector, "make-bytevector", 1, 1, 0, for (i = 0; i < c_len; i++) contents[i] = c_fill; } + else + memset (SCM_BYTEVECTOR_CONTENTS (bv), 0, c_len); return bv; } @@ -486,7 +513,8 @@ SCM_DEFINE (scm_bytevector_eq_p, "bytevector=?", 2, 0, 0, c_len1 = SCM_BYTEVECTOR_LENGTH (bv1); c_len2 = SCM_BYTEVECTOR_LENGTH (bv2); - if (c_len1 == c_len2) + if (c_len1 == c_len2 && (SCM_BYTEVECTOR_ELEMENT_TYPE (bv1) + == SCM_BYTEVECTOR_ELEMENT_TYPE (bv2))) { signed char *c_bv1, *c_bv2; @@ -604,9 +632,12 @@ SCM_DEFINE (scm_uniform_array_to_bytevector, "uniform-array->bytevector", sz = scm_array_handle_uniform_element_bit_size (&h); if (sz >= 8 && ((sz % 8) == 0)) byte_len = len * (sz / 8); - else + else if (sz < 8) /* byte_len = ceil (len * sz / 8) */ byte_len = (len * sz + 7) / 8; + else + /* an internal guile error, really */ + SCM_MISC_ERROR ("uniform elements larger than 8 bits must fill whole bytes", SCM_EOL); ret = make_bytevector (byte_len, SCM_ARRAY_ELEMENT_TYPE_VU8); memcpy (SCM_BYTEVECTOR_CONTENTS (ret), elts, byte_len); @@ -706,7 +737,7 @@ SCM_DEFINE (scm_u8_list_to_bytevector, "u8-list->bytevector", 1, 0, 0, if (SCM_LIKELY (SCM_I_INUMP (item))) { - long c_item; + scm_t_signed_bits c_item; c_item = SCM_I_INUM (item); if (SCM_LIKELY ((c_item >= 0) && (c_item < 256))) @@ -922,7 +953,7 @@ bytevector_unsigned_ref (const char *c_bv, size_t c_size, SCM endianness) #define GENERIC_INTEGER_SET(_sign) \ if (c_size < 3) \ { \ - _sign int c_value; \ + scm_t_signed_bits c_value; \ \ if (SCM_UNLIKELY (!SCM_I_INUMP (value))) \ goto range_error; \ @@ -1868,58 +1899,50 @@ utf_encoding_name (char *name, size_t utf_width, SCM endianness) #define MAX_UTF_ENCODING_NAME_LEN 16 /* Produce the body of a `string->utf' function. */ -#define STRING_TO_UTF(_utf_width) \ - SCM utf; \ - int err; \ - char *c_str; \ - char c_utf_name[MAX_UTF_ENCODING_NAME_LEN]; \ - char *c_utf = NULL, *c_locale; \ - size_t c_strlen, c_raw_strlen, c_utf_len = 0; \ - \ - SCM_VALIDATE_STRING (1, str); \ - if (endianness == SCM_UNDEFINED) \ - endianness = scm_sym_big; \ - else \ - SCM_VALIDATE_SYMBOL (2, endianness); \ - \ - c_strlen = scm_c_string_length (str); \ - c_raw_strlen = c_strlen * ((_utf_width) / 8); \ - do \ - { \ - c_str = (char *) alloca (c_raw_strlen + 1); \ - c_raw_strlen = scm_to_locale_stringbuf (str, c_str, c_strlen); \ - } \ - while (c_raw_strlen > c_strlen); \ - c_str[c_raw_strlen] = '\0'; \ - \ - utf_encoding_name (c_utf_name, (_utf_width), endianness); \ - \ - c_locale = (char *) alloca (strlen (locale_charset ()) + 1); \ - strcpy (c_locale, locale_charset ()); \ - \ - err = mem_iconveh (c_str, c_raw_strlen, \ - c_locale, c_utf_name, \ - iconveh_question_mark, NULL, \ - &c_utf, &c_utf_len); \ - if (SCM_UNLIKELY (err)) \ - scm_syserror_msg (FUNC_NAME, "failed to convert string: ~A", \ - scm_list_1 (str), err); \ - else \ - { \ - /* C_UTF is null-terminated. It is malloc(3)-allocated, so we cannot \ - use `scm_c_take_bytevector ()'. */ \ - scm_dynwind_begin (0); \ - scm_dynwind_free (c_utf); \ - \ - utf = make_bytevector (c_utf_len, \ - SCM_ARRAY_ELEMENT_TYPE_VU8); \ - memcpy (SCM_BYTEVECTOR_CONTENTS (utf), c_utf, \ - c_utf_len); \ - \ - scm_dynwind_end (); \ - } \ - \ - return (utf); +#define STRING_TO_UTF(_utf_width) \ + SCM utf; \ + int err; \ + char c_utf_name[MAX_UTF_ENCODING_NAME_LEN]; \ + char *c_utf = NULL; \ + size_t c_strlen, c_utf_len = 0; \ + \ + SCM_VALIDATE_STRING (1, str); \ + if (endianness == SCM_UNDEFINED) \ + endianness = scm_sym_big; \ + else \ + SCM_VALIDATE_SYMBOL (2, endianness); \ + \ + utf_encoding_name (c_utf_name, (_utf_width), endianness); \ + \ + c_strlen = scm_i_string_length (str); \ + if (scm_i_is_narrow_string (str)) \ + { \ + err = mem_iconveh (scm_i_string_chars (str), c_strlen, \ + "ISO-8859-1", c_utf_name, \ + iconveh_question_mark, NULL, \ + &c_utf, &c_utf_len); \ + if (SCM_UNLIKELY (err)) \ + scm_syserror_msg (FUNC_NAME, "failed to convert string: ~A", \ + scm_list_1 (str), err); \ + } \ + else \ + { \ + const scm_t_wchar *wbuf = scm_i_string_wide_chars (str); \ + c_utf = u32_conv_to_encoding (c_utf_name, \ + iconveh_question_mark, \ + (scm_t_uint32 *) wbuf, \ + c_strlen, NULL, NULL, &c_utf_len); \ + if (SCM_UNLIKELY (c_utf == NULL)) \ + scm_syserror_msg (FUNC_NAME, "failed to convert string: ~A", \ + scm_list_1 (str), errno); \ + } \ + scm_dynwind_begin (0); \ + scm_dynwind_free (c_utf); \ + utf = make_bytevector (c_utf_len, SCM_ARRAY_ELEMENT_TYPE_VU8); \ + memcpy (SCM_BYTEVECTOR_CONTENTS (utf), c_utf, c_utf_len); \ + scm_dynwind_end (); \ + \ + return (utf); @@ -1931,36 +1954,30 @@ SCM_DEFINE (scm_string_to_utf8, "string->utf8", #define FUNC_NAME s_scm_string_to_utf8 { SCM utf; - char *c_str; uint8_t *c_utf; - size_t c_strlen, c_raw_strlen; + size_t c_strlen, c_utf_len = 0; SCM_VALIDATE_STRING (1, str); - c_strlen = scm_c_string_length (str); - c_raw_strlen = c_strlen; - do + c_strlen = scm_i_string_length (str); + if (scm_i_is_narrow_string (str)) + c_utf = u8_conv_from_encoding ("ISO-8859-1", iconveh_question_mark, + scm_i_string_chars (str), c_strlen, + NULL, NULL, &c_utf_len); + else { - c_str = (char *) alloca (c_raw_strlen + 1); - c_raw_strlen = scm_to_locale_stringbuf (str, c_str, c_strlen); + const scm_t_wchar *wbuf = scm_i_string_wide_chars (str); + c_utf = u32_to_u8 ((const uint32_t *) wbuf, c_strlen, NULL, &c_utf_len); } - while (c_raw_strlen > c_strlen); - c_str[c_raw_strlen] = '\0'; - - c_utf = u8_strconv_from_locale (c_str); if (SCM_UNLIKELY (c_utf == NULL)) scm_syserror (FUNC_NAME); else { - /* C_UTF is null-terminated. It is malloc(3)-allocated, so we cannot - use `scm_c_take_bytevector ()'. */ scm_dynwind_begin (0); scm_dynwind_free (c_utf); - utf = make_bytevector (UTF_STRLEN (8, c_utf), - SCM_ARRAY_ELEMENT_TYPE_VU8); - memcpy (SCM_BYTEVECTOR_CONTENTS (utf), c_utf, - UTF_STRLEN (8, c_utf)); + utf = make_bytevector (c_utf_len, SCM_ARRAY_ELEMENT_TYPE_VU8); + memcpy (SCM_BYTEVECTOR_CONTENTS (utf), c_utf, c_utf_len); scm_dynwind_end (); } @@ -1997,10 +2014,10 @@ SCM_DEFINE (scm_string_to_utf32, "string->utf32", #define UTF_TO_STRING(_utf_width) \ SCM str = SCM_BOOL_F; \ int err; \ - char *c_str = NULL, *c_locale; \ + char *c_str = NULL; \ char c_utf_name[MAX_UTF_ENCODING_NAME_LEN]; \ - const char *c_utf; \ - size_t c_strlen = 0, c_utf_len; \ + char *c_utf; \ + size_t c_strlen = 0, c_utf_len = 0; \ \ SCM_VALIDATE_BYTEVECTOR (1, utf); \ if (endianness == SCM_UNDEFINED) \ @@ -2012,20 +2029,19 @@ SCM_DEFINE (scm_string_to_utf32, "string->utf32", c_utf = (char *) SCM_BYTEVECTOR_CONTENTS (utf); \ utf_encoding_name (c_utf_name, (_utf_width), endianness); \ \ - c_locale = (char *) alloca (strlen (locale_charset ()) + 1); \ - strcpy (c_locale, locale_charset ()); \ - \ err = mem_iconveh (c_utf, c_utf_len, \ - c_utf_name, c_locale, \ + c_utf_name, "UTF-8", \ iconveh_question_mark, NULL, \ &c_str, &c_strlen); \ if (SCM_UNLIKELY (err)) \ scm_syserror_msg (FUNC_NAME, "failed to convert to string: ~A", \ scm_list_1 (utf), err); \ else \ - /* C_STR is null-terminated. */ \ - str = scm_take_locale_stringn (c_str, c_strlen); \ - \ + { \ + str = scm_from_stringn (c_str, c_strlen, "UTF-8", \ + SCM_FAILED_CONVERSION_ERROR); \ + free (c_str); \ + } \ return (str); @@ -2037,29 +2053,15 @@ SCM_DEFINE (scm_utf8_to_string, "utf8->string", #define FUNC_NAME s_scm_utf8_to_string { SCM str; - int err; - char *c_str = NULL, *c_locale; const char *c_utf; - size_t c_utf_len, c_strlen = 0; + size_t c_utf_len = 0; SCM_VALIDATE_BYTEVECTOR (1, utf); c_utf_len = SCM_BYTEVECTOR_LENGTH (utf); - - c_locale = (char *) alloca (strlen (locale_charset ()) + 1); - strcpy (c_locale, locale_charset ()); - c_utf = (char *) SCM_BYTEVECTOR_CONTENTS (utf); - err = mem_iconveh (c_utf, c_utf_len, - "UTF-8", c_locale, - iconveh_question_mark, NULL, - &c_str, &c_strlen); - if (SCM_UNLIKELY (err)) - scm_syserror_msg (FUNC_NAME, "failed to convert to string: ~A", - scm_list_1 (utf), err); - else - /* C_STR is null-terminated. */ - str = scm_take_locale_stringn (c_str, c_strlen); + str = scm_from_stringn (c_utf, c_utf_len, "UTF-8", + SCM_FAILED_CONVERSION_ERROR); return (str); } @@ -2087,7 +2089,6 @@ SCM_DEFINE (scm_utf32_to_string, "utf32->string", } #undef FUNC_NAME - /* Bytevectors as generalized vectors & arrays. */ @@ -2097,7 +2098,7 @@ bytevector_ref_c32 (SCM bv, SCM idx) { /* FIXME add some checks */ const float *contents = (const float*)SCM_BYTEVECTOR_CONTENTS (bv); size_t i = scm_to_size_t (idx); - return scm_c_make_rectangular (contents[i/8], contents[i/8 + 1]); + return scm_c_make_rectangular (contents[i/4], contents[i/4 + 1]); } static SCM @@ -2105,7 +2106,7 @@ bytevector_ref_c64 (SCM bv, SCM idx) { /* FIXME add some checks */ const double *contents = (const double*)SCM_BYTEVECTOR_CONTENTS (bv); size_t i = scm_to_size_t (idx); - return scm_c_make_rectangular (contents[i/16], contents[i/16 + 1]); + return scm_c_make_rectangular (contents[i/8], contents[i/8 + 1]); } typedef SCM (*scm_t_bytevector_ref_fn)(SCM, SCM); @@ -2142,23 +2143,22 @@ bv_handle_ref (scm_t_array_handle *h, size_t index) return ref_fn (h->array, byte_index); } +/* FIXME add checks!!! */ static SCM bytevector_set_c32 (SCM bv, SCM idx, SCM val) -{ /* checks are unnecessary here */ - float *contents = (float*)SCM_BYTEVECTOR_CONTENTS (bv); +{ float *contents = (float*)SCM_BYTEVECTOR_CONTENTS (bv); size_t i = scm_to_size_t (idx); - contents[i/8] = scm_c_real_part (val); - contents[i/8 + 1] = scm_c_imag_part (val); + contents[i/4] = scm_c_real_part (val); + contents[i/4 + 1] = scm_c_imag_part (val); return SCM_UNSPECIFIED; } static SCM bytevector_set_c64 (SCM bv, SCM idx, SCM val) -{ /* checks are unnecessary here */ - double *contents = (double*)SCM_BYTEVECTOR_CONTENTS (bv); +{ double *contents = (double*)SCM_BYTEVECTOR_CONTENTS (bv); size_t i = scm_to_size_t (idx); - contents[i/16] = scm_c_real_part (val); - contents[i/16 + 1] = scm_c_imag_part (val); + contents[i/8] = scm_c_real_part (val); + contents[i/8 + 1] = scm_c_imag_part (val); return SCM_UNSPECIFIED; } @@ -2216,18 +2216,18 @@ void scm_bootstrap_bytevectors (void) { /* This must be instantiated here because the generalized-vector API may - want to access bytevectors even though `(rnrs bytevector)' hasn't been + want to access bytevectors even though `(rnrs bytevectors)' hasn't been loaded. */ - scm_null_bytevector = - scm_gc_protect_object (make_bytevector (0, SCM_ARRAY_ELEMENT_TYPE_VU8)); + scm_null_bytevector = make_bytevector (0, SCM_ARRAY_ELEMENT_TYPE_VU8); #ifdef WORDS_BIGENDIAN - scm_i_native_endianness = scm_permanent_object (scm_from_locale_symbol ("big")); + scm_i_native_endianness = scm_from_latin1_symbol ("big"); #else - scm_i_native_endianness = scm_permanent_object (scm_from_locale_symbol ("little")); + scm_i_native_endianness = scm_from_latin1_symbol ("little"); #endif - scm_c_register_extension ("libguile", "scm_init_bytevectors", + scm_c_register_extension ("libguile-" SCM_EFFECTIVE_VERSION, + "scm_init_bytevectors", (scm_t_extension_init_func) scm_init_bytevectors, NULL);