From c829a4274f35bae62cae06312335a0596504a333 Mon Sep 17 00:00:00 2001 From: Marius Vollmer Date: Tue, 10 Aug 2004 13:20:59 +0000 Subject: [PATCH] * strings.h, deprecated.h (SCM_STRING_COERCE_0TERMINATION_X): Moved from string.h to deprecated.h. * deprecated.c, deprecated.h (SCM_CHARS, SCM_LENGTH): Removed. * strings.h, strings.c (SCM_MAKE_STRING_TAG): Rename dto SCM_I_MAKE_STRING_TAG, changed all uses. (SCM_STRING_CHARS, SCM_STRING_UCHARS, SCM_STRING_LENGTH): Renamed to SCM_I_STRING_CHARS, SCM_I_STRING_UCHARS, and SCM_I_LENGTH respectively. For a short time, the old names are still there as aliases. Not all uses have been changed yet, but the ones in strings.c have. (SCM_STRING_MAX_LEN): Do not hardcode to 24 bits, compute from SCM_T_BITS_MAX. (scm_is_string, scm_from_locale_string, scm_from_locale_stringn, scm_take_locale_string, scm_take_locale_stringn, scm_to_locale_string, scm_to_locale_stringn, scm_to_locale_stringbuf): New. (scm_c_string2str, scm_c_substring2str): Deprecated by moving to deprecated.[hc]. Implemented in terms of the new functions above. (scm_take_str, scm_take0str, scm_mem2string, scm_str2string, scm_makfrom0str): Reimplemented in terms of the new functions from above. They will be discouraged shortly. (scm_substring): Do not use scm_mem2string. (scm_i_allocate_string_pointers, scm_i_free_string_pointers): New, to replace similar code from posix.c, simpos.c, and dynl.c. (scm_string_append): Use memcpy instead of explicit loop. Do not use register keyword. Use plain 'char' instead of 'unsigned char'. --- libguile/deprecated.c | 73 +++++++------ libguile/deprecated.h | 40 +++++-- libguile/strings.c | 247 ++++++++++++++++++++++++++++-------------- libguile/strings.h | 52 +++++---- 4 files changed, 271 insertions(+), 141 deletions(-) diff --git a/libguile/deprecated.c b/libguile/deprecated.c index 846dc7cc2..99b6e70cd 100644 --- a/libguile/deprecated.c +++ b/libguile/deprecated.c @@ -649,33 +649,6 @@ scm_strprint_obj (SCM obj) return scm_object_to_string (obj, SCM_UNDEFINED); } -char * -scm_i_object_chars (SCM obj) -{ - scm_c_issue_deprecation_warning - ("SCM_CHARS is deprecated. Use SCM_STRING_CHARS or " - "SCM_SYMBOL_CHARS instead."); - if (SCM_STRINGP (obj)) - return SCM_STRING_CHARS (obj); - if (SCM_SYMBOLP (obj)) - return SCM_SYMBOL_CHARS (obj); - abort (); -} - -long -scm_i_object_length (SCM obj) -{ - scm_c_issue_deprecation_warning - ("SCM_LENGTH is deprecated. Use SCM_STRING_LENGTH instead, for example."); - if (SCM_STRINGP (obj)) - return SCM_STRING_LENGTH (obj); - if (SCM_SYMBOLP (obj)) - return SCM_SYMBOL_LENGTH (obj); - if (SCM_VECTORP (obj)) - return SCM_VECTOR_LENGTH (obj); - abort (); -} - SCM scm_sym2ovcell_soft (SCM sym, SCM obarray) { @@ -841,8 +814,8 @@ SCM_DEFINE (scm_string_to_obarray_symbol, "string->obarray-symbol", 2, 1, 0, else if (scm_is_eq (o, SCM_BOOL_T)) o = SCM_BOOL_F; - vcell = scm_intern_obarray_soft (SCM_STRING_CHARS(s), - SCM_STRING_LENGTH (s), + vcell = scm_intern_obarray_soft (SCM_I_STRING_CHARS(s), + SCM_I_STRING_LENGTH (s), o, softness); if (scm_is_false (vcell)) @@ -1074,10 +1047,10 @@ SCM_DEFINE (scm_gentemp, "gentemp", 0, 2, 0, else { SCM_VALIDATE_STRING (1, prefix); - len = SCM_STRING_LENGTH (prefix); + len = SCM_I_STRING_LENGTH (prefix); if (len > MAX_PREFIX_LENGTH) name = SCM_MUST_MALLOC (MAX_PREFIX_LENGTH + SCM_INTBUFLEN); - strncpy (name, SCM_STRING_CHARS (prefix), len); + strncpy (name, SCM_I_STRING_CHARS (prefix), len); } if (SCM_UNBNDP (obarray)) @@ -1129,6 +1102,44 @@ SCM_INUM (SCM obj) return scm_to_intmax (obj); } +char * +scm_c_string2str (SCM obj, char *str, size_t *lenp) +{ + scm_c_issue_deprecation_warning + ("scm_c_string2str is deprecated. Use scm_to_locale_stringbuf or similar instead."); + + if (str == NULL) + { + char *result = scm_to_locale_string (obj); + if (lenp) + *lenp = SCM_I_STRING_LENGTH (obj); + return result; + } + else + { + /* Pray that STR is large enough. + */ + size_t len = scm_to_locale_stringbuf (obj, str, SCM_I_SIZE_MAX); + str[len] = '\0'; + if (lenp) + *lenp = len; + return str; + } +} + +char * +scm_c_substring2str (SCM obj, char *str, size_t start, size_t len) +{ + scm_c_issue_deprecation_warning + ("scm_c_substring2str is deprecated. Use scm_substring plus scm_to_locale_stringbuf instead."); + + if (start) + obj = scm_substring (obj, scm_from_size_t (start), SCM_UNDEFINED); + + scm_to_locale_stringbuf (obj, str, len); + return str; +} + double scm_truncate (double x) { diff --git a/libguile/deprecated.h b/libguile/deprecated.h index f9f500e6f..4336dd472 100644 --- a/libguile/deprecated.h +++ b/libguile/deprecated.h @@ -226,15 +226,6 @@ SCM_API SCM scm_strprint_obj (SCM obj); SCM_API SCM scm_read_0str (char *expr); SCM_API SCM scm_eval_0str (const char *expr); -SCM_API char *scm_i_object_chars (SCM); - -#define SCM_CHARS(x) scm_i_object_chars(x) -#define SCM_UCHARS(x) ((unsigned char *)SCM_CHARS(x)) - -SCM_API long scm_i_object_length (SCM); - -#define SCM_LENGTH(x) scm_i_object_length(x) - #define scm_strhash(str, len, n) (scm_string_hash ((str), (len)) % (n)) SCM_API SCM scm_sym2ovcell_soft (SCM sym, SCM obarray); @@ -381,6 +372,37 @@ SCM_API scm_t_signed_bits SCM_INUM (SCM obj); cvar = SCM_INUM (k); \ } while (0) +#define SCM_STRING_COERCE_0TERMINATION_X(x) (x) + +/* XXX - buggy interface, STR might not be large enough. + + Converts the given Scheme string OBJ into a C string, containing a copy + of OBJ's content with a trailing null byte. If LENP is non-NULL, set + *LENP to the string's length. + + When STR is non-NULL it receives the copy and is returned by the function, + otherwise new memory is allocated and the caller is responsible for + freeing it via free(). If out of memory, NULL is returned. + + Note that Scheme strings may contain arbitrary data, including null + characters. This means that null termination is not a reliable way to + determine the length of the returned value. However, the function always + copies the complete contents of OBJ, and sets *LENP to the length of the + scheme string (if LENP is non-null). +*/ +SCM_API char *scm_c_string2str (SCM obj, char *str, size_t *lenp); + +/* XXX - buggy interface, you don't know how many bytes have been copied. + + Copy LEN characters at START from the Scheme string OBJ to memory + at STR. START is an index into OBJ; zero means the beginning of + the string. STR has already been allocated by the caller. + + If START + LEN is off the end of OBJ, silently truncate the source + region to fit the string. If truncation occurs, the corresponding + area of STR is left unchanged. +*/ +SCM_API char *scm_c_substring2str (SCM obj, char *str, size_t start, size_t len); /* Deprecated because the names belong to what is now scm_truncate_number and scm_round_number. diff --git a/libguile/strings.c b/libguile/strings.c index 9c45d7396..1308753aa 100644 --- a/libguile/strings.c +++ b/libguile/strings.c @@ -26,6 +26,7 @@ #include "libguile/strings.h" #include "libguile/deprecation.h" #include "libguile/validate.h" +#include "libguile/dynwind.h" @@ -37,7 +38,7 @@ SCM_DEFINE (scm_string_p, "string?", 1, 0, 0, "Return @code{#t} if @var{obj} is a string, else @code{#f}.") #define FUNC_NAME s_scm_string_p { - return scm_from_bool (SCM_STRINGP (obj)); + return scm_from_bool (SCM_I_STRINGP (obj)); } #undef FUNC_NAME @@ -61,7 +62,7 @@ SCM_DEFINE (scm_string, "string", 0, 0, 1, } { - unsigned char *data = SCM_STRING_UCHARS (result); + unsigned char *data = SCM_I_STRING_UCHARS (result); while (!SCM_NULLP (chrs)) { @@ -108,7 +109,7 @@ scm_take_str (char *s, size_t len) SCM_ASSERT_RANGE (2, scm_from_ulong (len), len <= SCM_STRING_MAX_LENGTH); - answer = scm_cell (SCM_MAKE_STRING_TAG (len), (scm_t_bits) s); + answer = scm_cell (SCM_I_MAKE_STRING_TAG (len), (scm_t_bits) s); scm_gc_register_collectable_memory (s, len+1, "string"); return answer; @@ -120,24 +121,21 @@ scm_take_str (char *s, size_t len) SCM scm_take0str (char *s) { - return scm_take_str (s, strlen (s)); + return scm_take_locale_string (s); } SCM scm_mem2string (const char *src, size_t len) { - SCM s = scm_allocate_string (len); - char *dst = SCM_STRING_CHARS (s); - memcpy (dst, src, len); - return s; + return scm_from_locale_stringn (src, len); } SCM scm_str2string (const char *src) { - return scm_mem2string (src, strlen (src)); + return scm_from_locale_string (src); } @@ -145,7 +143,7 @@ SCM scm_makfrom0str (const char *src) { if (!src) return SCM_BOOL_F; - return scm_mem2string (src, strlen (src)); + return scm_from_locale_string (src); } @@ -168,7 +166,7 @@ scm_allocate_string (size_t len) mem = (char *) scm_gc_malloc (len + 1, "string"); mem[len] = 0; - s = scm_cell (SCM_MAKE_STRING_TAG (len), (scm_t_bits) mem); + s = scm_cell (SCM_I_MAKE_STRING_TAG (len), (scm_t_bits) mem); return s; } @@ -192,7 +190,7 @@ SCM_DEFINE (scm_make_string, "make-string", 1, 1, 0, SCM_VALIDATE_CHAR (2, chr); - dst = SCM_STRING_UCHARS (res); + dst = SCM_I_STRING_UCHARS (res); memset (dst, SCM_CHAR (chr), i); } @@ -207,7 +205,7 @@ SCM_DEFINE (scm_string_length, "string-length", 1, 0, 0, #define FUNC_NAME s_scm_string_length { SCM_VALIDATE_STRING (1, string); - return scm_from_size_t (SCM_STRING_LENGTH (string)); + return scm_from_size_t (SCM_I_STRING_LENGTH (string)); } #undef FUNC_NAME @@ -220,8 +218,8 @@ SCM_DEFINE (scm_string_ref, "string-ref", 2, 0, 0, unsigned long idx; SCM_VALIDATE_STRING (1, str); - idx = scm_to_unsigned_integer (k, 0, SCM_STRING_LENGTH(str)-1); - return SCM_MAKE_CHAR (SCM_STRING_UCHARS (str)[idx]); + idx = scm_to_unsigned_integer (k, 0, SCM_I_STRING_LENGTH(str)-1); + return SCM_MAKE_CHAR (SCM_I_STRING_UCHARS (str)[idx]); } #undef FUNC_NAME @@ -236,9 +234,9 @@ SCM_DEFINE (scm_string_set_x, "string-set!", 3, 0, 0, unsigned long idx; SCM_VALIDATE_STRING (1, str); - idx = scm_to_unsigned_integer (k, 0, SCM_STRING_LENGTH(str)-1); + idx = scm_to_unsigned_integer (k, 0, SCM_I_STRING_LENGTH(str)-1); SCM_VALIDATE_CHAR (3, chr); - SCM_STRING_UCHARS (str)[idx] = SCM_CHAR (chr); + SCM_I_STRING_UCHARS (str)[idx] = SCM_CHAR (chr); return SCM_UNSPECIFIED; } #undef FUNC_NAME @@ -259,12 +257,14 @@ SCM_DEFINE (scm_substring, "substring", 2, 1, 0, SCM substr; SCM_VALIDATE_STRING (1, str); - from = scm_to_unsigned_integer (start, 0, SCM_STRING_LENGTH(str)); + from = scm_to_unsigned_integer (start, 0, SCM_I_STRING_LENGTH(str)); if (SCM_UNBNDP (end)) - to = SCM_STRING_LENGTH(str); + to = SCM_I_STRING_LENGTH(str); else - to = scm_to_unsigned_integer (end, from, SCM_STRING_LENGTH(str)); - substr = scm_mem2string (&SCM_STRING_CHARS (str)[from], to - from); + to = scm_to_unsigned_integer (end, from, SCM_I_STRING_LENGTH(str)); + substr = scm_allocate_string (to - from); + memcpy (SCM_I_STRING_CHARS (substr), SCM_I_STRING_CHARS (str) + from, + to - from); scm_remember_upto_here_1 (str); return substr; } @@ -279,91 +279,178 @@ SCM_DEFINE (scm_string_append, "string-append", 0, 0, 1, { SCM res; size_t i = 0; - register SCM l, s; - register unsigned char *data; + SCM l, s; + char *data; SCM_VALIDATE_REST_ARGUMENT (args); - for (l = args; !SCM_NULLP (l); l = SCM_CDR (l)) { - s = SCM_CAR (l); - SCM_VALIDATE_STRING (SCM_ARGn, s); - i += SCM_STRING_LENGTH (s); - } + for (l = args; !SCM_NULLP (l); l = SCM_CDR (l)) + { + s = SCM_CAR (l); + SCM_VALIDATE_STRING (SCM_ARGn, s); + i += SCM_I_STRING_LENGTH (s); + } res = scm_allocate_string (i); - data = SCM_STRING_UCHARS (res); - for (l = args; !SCM_NULLP (l);l = SCM_CDR (l)) { - s = SCM_CAR (l); - for (i = 0;i SCM_STRING_MAX_LENGTH) { - /* FIXME: Should we use exported wrappers for malloc (and free), which - * allow windows DLLs to call the correct freeing function? */ - str = (char *) scm_malloc ((len + 1) * sizeof (char)); - if (str == NULL) - return NULL; + free (str); + scm_out_of_range (NULL, scm_from_size_t (len)); } - memcpy (str, SCM_STRING_CHARS (obj), len); - scm_remember_upto_here_1 (obj); - str[len] = '\0'; + res = scm_cell (SCM_I_MAKE_STRING_TAG (len), (scm_t_bits) str); + scm_gc_register_collectable_memory (str, len+1, "string"); + + return res; +} + +char * +scm_to_locale_stringn (SCM str, size_t *lenp) +{ + char *res; + size_t len; - if (lenp != NULL) + if (!SCM_I_STRINGP (str)) + scm_wrong_type_arg_msg (NULL, 0, str, "string"); + len = SCM_I_STRING_LENGTH (str); + res = scm_malloc (len + ((lenp==NULL)? 1 : 0)); + memcpy (res, SCM_I_STRING_CHARS (str), len); + if (lenp == NULL) + { + res[len] = '\0'; + if (strlen (res) != len) + { + free (res); + scm_misc_error (NULL, + "string contains #\\nul character: ~S", + scm_list_1 (str)); + } + } + else *lenp = len; - return str; + scm_remember_upto_here_1 (str); + return res; } -#undef FUNC_NAME +char * +scm_to_locale_string (SCM str) +{ + return scm_to_locale_stringn (str, NULL); +} -/* Copy LEN characters at START from the Scheme string OBJ to memory - at STR. START is an index into OBJ; zero means the beginning of - the string. STR has already been allocated by the caller. +size_t +scm_to_locale_stringbuf (SCM str, char *buf, size_t max_len) +{ + size_t len; + + if (!SCM_I_STRINGP (str)) + scm_wrong_type_arg_msg (NULL, 0, str, "string"); + len = SCM_I_STRING_LENGTH (str); + memcpy (buf, SCM_I_STRING_CHARS (str), (len > max_len)? max_len : len); + scm_remember_upto_here_1 (str); + return len; +} - If START + LEN is off the end of OBJ, silently truncate the source - region to fit the string. If truncation occurs, the corresponding - area of STR is left unchanged. */ -#define FUNC_NAME "scm_c_substring2str" -char * -scm_c_substring2str (SCM obj, char *str, size_t start, size_t len) +/* Return a newly allocated array of char pointers to each of the strings + in args, with a terminating NULL pointer. */ + +char ** +scm_i_allocate_string_pointers (SCM list) { - size_t src_length, effective_length; - - SCM_ASSERT (SCM_STRINGP (obj), obj, SCM_ARG2, FUNC_NAME); - src_length = SCM_STRING_LENGTH (obj); - effective_length = (len + start <= src_length) ? len : src_length - start; - memcpy (str, SCM_STRING_CHARS (obj) + start, effective_length); - scm_remember_upto_here_1 (obj); - return str; + char **result; + int len = scm_ilength (list); + int i; + + if (len < 0) + scm_wrong_type_arg_msg (NULL, 0, list, "proper list"); + + scm_frame_begin (0); + + result = (char **) scm_malloc ((len + 1) * sizeof (char *)); + result[len] = NULL; + scm_frame_unwind_handler (free, result, 0); + + /* The list might be have been modified in another thread, so + we check LIST before each access. + */ + for (i = 0; i < len && SCM_CONSP (list); i++) + { + result[i] = scm_to_locale_string (SCM_CAR (list)); + list = SCM_CDR (list); + } + + scm_frame_end (); + return result; } -#undef FUNC_NAME +void +scm_i_free_string_pointers (char **pointers) +{ + int i; + + for (i = 0; pointers[i]; i++) + free (pointers[i]); + free (pointers); +} void scm_init_strings () diff --git a/libguile/strings.h b/libguile/strings.h index 8e915d470..09edabbcb 100644 --- a/libguile/strings.h +++ b/libguile/strings.h @@ -26,19 +26,30 @@ -#define SCM_STRINGP(x) (!SCM_IMP (x) && (SCM_TYP7 (x) == scm_tc7_string)) -#define SCM_STRING_UCHARS(x) ((unsigned char *) (SCM_CELL_WORD_1 (x))) -#define SCM_STRING_CHARS(x) ((char *) (SCM_CELL_WORD_1 (x))) -#define SCM_SET_STRING_CHARS(s, c) (SCM_SET_CELL_WORD_1 ((s), (c))) -#define SCM_STRING_MAX_LENGTH ((1UL << 24) - 1UL) -#define SCM_STRING_LENGTH(x) ((size_t) (SCM_CELL_WORD_0 (x) >> 8)) -#define SCM_MAKE_STRING_TAG(l) ((((scm_t_bits) (l)) << 8) + scm_tc7_string) -#define SCM_SET_STRING_LENGTH(s, l) (SCM_SET_CELL_WORD_0 ((s), SCM_MAKE_STRING_TAG (l))) +#define SCM_STRING_MAX_LENGTH ((SCM_T_BITS_MAX-255)/256) + +#define SCM_I_MAKE_STRING_TAG(l) ((((scm_t_bits) (l)) << 8) + scm_tc7_string) +#define SCM_I_STRINGP(x) (!SCM_IMP (x) && (SCM_TYP7 (x) == scm_tc7_string)) +#define SCM_I_STRING_UCHARS(x) ((unsigned char *) (SCM_CELL_WORD_1 (x))) +#define SCM_I_STRING_CHARS(x) ((char *) (SCM_CELL_WORD_1 (x))) +#define SCM_I_STRING_LENGTH(x) ((size_t) (SCM_CELL_WORD_0 (x) >> 8)) + +#define SCM_STRINGP SCM_I_STRINGP +#define SCM_STRING_CHARS SCM_I_STRING_CHARS +#define SCM_STRING_UCHARS SCM_I_STRING_UCHARS +#define SCM_STRING_LENGTH SCM_I_STRING_LENGTH SCM_API SCM scm_string_p (SCM x); SCM_API SCM scm_string (SCM chrs); +SCM_API SCM scm_make_string (SCM k, SCM chr); +SCM_API SCM scm_string_length (SCM str); +SCM_API SCM scm_string_ref (SCM str, SCM k); +SCM_API SCM scm_string_set_x (SCM str, SCM k, SCM chr); +SCM_API SCM scm_substring (SCM str, SCM start, SCM end); +SCM_API SCM scm_string_append (SCM args); + SCM_API SCM scm_makfromstrs (int argc, char **argv); SCM_API SCM scm_take_str (char *s, size_t len); SCM_API SCM scm_take0str (char *s); @@ -47,23 +58,22 @@ SCM_API SCM scm_str2string (const char *src); SCM_API SCM scm_makfrom0str (const char *src); SCM_API SCM scm_makfrom0str_opt (const char *src); SCM_API SCM scm_allocate_string (size_t len); -SCM_API SCM scm_make_string (SCM k, SCM chr); -SCM_API SCM scm_string_length (SCM str); -SCM_API SCM scm_string_ref (SCM str, SCM k); -SCM_API SCM scm_string_set_x (SCM str, SCM k, SCM chr); -SCM_API SCM scm_substring (SCM str, SCM start, SCM end); -SCM_API SCM scm_string_append (SCM args); -SCM_API void scm_init_strings (void); -SCM_API char *scm_c_string2str (SCM obj, char *str, size_t *lenp); -SCM_API char *scm_c_substring2str (SCM obj, char *str, size_t start, size_t len); - +SCM_API int scm_is_string (SCM x); +SCM_API SCM scm_from_locale_string (const char *str); +SCM_API SCM scm_from_locale_stringn (const char *str, size_t len); +SCM_API SCM scm_take_locale_string (char *str); +SCM_API SCM scm_take_locale_stringn (char *str, size_t len); +SCM_API char *scm_to_locale_string (SCM str); +SCM_API char *scm_to_locale_stringn (SCM str, size_t *lenp); +SCM_API size_t scm_to_locale_stringbuf (SCM str, char *buf, size_t max_len); -#if (SCM_ENABLE_DEPRECATED == 1) +/* internal utility functions. */ -#define SCM_STRING_COERCE_0TERMINATION_X(x) (x) +SCM_API char **scm_i_allocate_string_pointers (SCM list); +SCM_API void scm_i_free_string_pointers (char **pointers); -#endif +SCM_API void scm_init_strings (void); #endif /* SCM_STRINGS_H */ -- 2.20.1