-/* Copyright (C) 1995,1996,1998,2000,2001, 2004, 2006, 2008, 2009, 2010, 2011 Free Software Foundation, Inc.
+/* Copyright (C) 1995,1996,1998,2000,2001, 2004, 2006, 2008, 2009, 2010, 2011, 2012 Free Software Foundation, Inc.
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public License
}
scm_i_pthread_mutex_t stringbuf_write_mutex = SCM_I_PTHREAD_MUTEX_INITIALIZER;
+SCM_PTHREAD_ATFORK_LOCK_STATIC_MUTEX (stringbuf_write_mutex);
\f
/* Copy-on-write strings.
SCM
scm_i_make_string (size_t len, char **charsp, int read_only_p)
{
- SCM buf = make_stringbuf (len);
+ static SCM null_stringbuf = SCM_BOOL_F;
+ SCM buf;
SCM res;
+
+ if (len == 0)
+ {
+ if (SCM_UNLIKELY (scm_is_false (null_stringbuf)))
+ {
+ null_stringbuf = make_stringbuf (0);
+ SET_STRINGBUF_SHARED (null_stringbuf);
+ }
+ buf = null_stringbuf;
+ }
+ else
+ buf = make_stringbuf (len);
+
if (charsp)
*charsp = (char *) STRINGBUF_CHARS (buf);
res = scm_double_cell (read_only_p ? RO_STRING_TAG : STRING_TAG,
SCM
scm_i_substring (SCM str, size_t start, size_t end)
{
- SCM buf;
- size_t str_start;
- get_str_buf_start (&str, &buf, &str_start);
- scm_i_pthread_mutex_lock (&stringbuf_write_mutex);
- SET_STRINGBUF_SHARED (buf);
- scm_i_pthread_mutex_unlock (&stringbuf_write_mutex);
- return scm_double_cell (STRING_TAG, SCM_UNPACK(buf),
- (scm_t_bits)str_start + start,
- (scm_t_bits) end - start);
+ if (start == end)
+ return scm_i_make_string (0, NULL, 0);
+ else
+ {
+ SCM buf;
+ size_t str_start;
+ get_str_buf_start (&str, &buf, &str_start);
+ scm_i_pthread_mutex_lock (&stringbuf_write_mutex);
+ SET_STRINGBUF_SHARED (buf);
+ scm_i_pthread_mutex_unlock (&stringbuf_write_mutex);
+ return scm_double_cell (STRING_TAG, SCM_UNPACK(buf),
+ (scm_t_bits)str_start + start,
+ (scm_t_bits) end - start);
+ }
}
SCM
scm_i_substring_read_only (SCM str, size_t start, size_t end)
{
- SCM buf;
- size_t str_start;
- get_str_buf_start (&str, &buf, &str_start);
- scm_i_pthread_mutex_lock (&stringbuf_write_mutex);
- SET_STRINGBUF_SHARED (buf);
- scm_i_pthread_mutex_unlock (&stringbuf_write_mutex);
- return scm_double_cell (RO_STRING_TAG, SCM_UNPACK(buf),
- (scm_t_bits)str_start + start,
- (scm_t_bits) end - start);
+ if (start == end)
+ return scm_i_make_string (0, NULL, 1);
+ else
+ {
+ SCM buf;
+ size_t str_start;
+ get_str_buf_start (&str, &buf, &str_start);
+ scm_i_pthread_mutex_lock (&stringbuf_write_mutex);
+ SET_STRINGBUF_SHARED (buf);
+ scm_i_pthread_mutex_unlock (&stringbuf_write_mutex);
+ return scm_double_cell (RO_STRING_TAG, SCM_UNPACK(buf),
+ (scm_t_bits)str_start + start,
+ (scm_t_bits) end - start);
+ }
}
SCM
scm_i_substring_copy (SCM str, size_t start, size_t end)
{
- size_t len = end - start;
- SCM buf, my_buf;
- size_t str_start;
- get_str_buf_start (&str, &buf, &str_start);
- if (scm_i_is_narrow_string (str))
- {
- my_buf = make_stringbuf (len);
- memcpy (STRINGBUF_CHARS (my_buf),
- STRINGBUF_CHARS (buf) + str_start + start, len);
- }
+ if (start == end)
+ return scm_i_make_string (0, NULL, 0);
else
{
- my_buf = make_wide_stringbuf (len);
- u32_cpy ((scm_t_uint32 *) STRINGBUF_WIDE_CHARS (my_buf),
- (scm_t_uint32 *) (STRINGBUF_WIDE_CHARS (buf) + str_start
- + start), len);
- /* Even though this string is wide, the substring may be narrow.
- Consider adding code to narrow the string. */
+ size_t len = end - start;
+ SCM buf, my_buf, substr;
+ size_t str_start;
+ int wide = 0;
+ get_str_buf_start (&str, &buf, &str_start);
+ if (scm_i_is_narrow_string (str))
+ {
+ my_buf = make_stringbuf (len);
+ memcpy (STRINGBUF_CHARS (my_buf),
+ STRINGBUF_CHARS (buf) + str_start + start, len);
+ }
+ else
+ {
+ my_buf = make_wide_stringbuf (len);
+ u32_cpy ((scm_t_uint32 *) STRINGBUF_WIDE_CHARS (my_buf),
+ (scm_t_uint32 *) (STRINGBUF_WIDE_CHARS (buf) + str_start
+ + start), len);
+ wide = 1;
+ }
+ scm_remember_upto_here_1 (buf);
+ substr = scm_double_cell (STRING_TAG, SCM_UNPACK (my_buf),
+ (scm_t_bits) 0, (scm_t_bits) len);
+ if (wide)
+ scm_i_try_narrow_string (substr);
+ return substr;
}
- scm_remember_upto_here_1 (buf);
- return scm_double_cell (STRING_TAG, SCM_UNPACK (my_buf),
- (scm_t_bits) 0, (scm_t_bits) len);
}
SCM
{
if (start == 0 && end == STRING_LENGTH (str))
return str;
- else
+ else if (start == end)
+ return scm_i_make_string (0, NULL, 0);
+ else
{
size_t len = end - start;
if (IS_SH_STRING (str))
int
scm_i_is_narrow_string (SCM str)
{
+ if (IS_SH_STRING (str))
+ str = SH_STRING_STRING (str);
+
return !STRINGBUF_WIDE (STRING_STRINGBUF (str));
}
int
scm_i_try_narrow_string (SCM str)
{
+ if (IS_SH_STRING (str))
+ str = SH_STRING_STRING (str);
+
SET_STRING_STRINGBUF (str, narrow_stringbuf (STRING_STRINGBUF (str)));
return scm_i_is_narrow_string (str);
void
scm_i_string_set_x (SCM str, size_t p, scm_t_wchar chr)
{
+ if (IS_SH_STRING (str))
+ {
+ p += STRING_START (str);
+ str = SH_STRING_STRING (str);
+ }
+
if (chr > 0xFF && scm_i_is_narrow_string (str))
SET_STRING_STRINGBUF (str, wide_stringbuf (STRING_STRINGBUF (str)));
"Return a newly allocated string of\n"
"length @var{k}. If @var{chr} is given, then all elements of\n"
"the string are initialized to @var{chr}, otherwise the contents\n"
- "of the @var{string} are all set to @var{#\nul}.")
+ "of the string are all set to @code{#\nul}.")
#define FUNC_NAME s_scm_make_string
{
return scm_c_make_string (scm_to_size_t (k), chr);
scm_misc_error ("scm_from_stringn", "NULL string pointer", SCM_EOL);
if (len == (size_t) -1)
len = strlen (str);
- if (len == 0)
- return scm_nullstr;
- if (encoding == NULL)
- {
- /* If encoding is null, use Latin-1. */
- char *buf;
- res = scm_i_make_string (len, &buf, 0);
- memcpy (buf, str, len);
- return res;
- }
+ if (encoding == NULL || len == 0)
+ return scm_from_latin1_stringn (str, len);
+ else if (strcmp (encoding, "UTF-8") == 0)
+ return scm_from_utf8_stringn (str, len);
u32len = 0;
u32 = (scm_t_wchar *) u32_conv_from_encoding (encoding,
res = scm_i_make_string (char_len, &dst, 0);
- for (i = 0, j = 0; i < len; i++, j++)
+ for (i = 0, j = 0; i < len; j++)
{
i += u8_mbtouc_unsafe (&c, ustr + i, len - i);
dst[j] = (signed char) c;
res = scm_i_make_wide_string (char_len, &dst, 0);
- for (i = 0, j = 0; i < len; i++, j++)
+ for (i = 0, j = 0; i < len; j++)
{
i += u8_mbtouc_unsafe (&c, ustr + i, len - i);
dst[j] = c;
return scm_to_utf8_stringn (str, NULL);
}
+static size_t
+latin1_u8_strlen (const scm_t_uint8 *str, size_t len)
+{
+ size_t ret, i;
+ for (i = 0, ret = 0; i < len; i++)
+ ret += (str[i] < 128) ? 1 : 2;
+ return ret;
+}
+
+static scm_t_uint8*
+latin1_to_u8 (const scm_t_uint8 *str, size_t latin_len,
+ scm_t_uint8 *u8_result, size_t *u8_lenp)
+{
+ size_t i, n;
+ size_t u8_len = latin1_u8_strlen (str, latin_len);
+
+ if (!(u8_result && u8_lenp && *u8_lenp > u8_len))
+ u8_result = scm_malloc (u8_len + 1);
+ if (u8_lenp)
+ *u8_lenp = u8_len;
+
+ for (i = 0, n = 0; i < latin_len; i++)
+ n += u8_uctomb (u8_result + n, str[i], u8_len - n);
+ if (n != u8_len)
+ abort ();
+ u8_result[n] = 0;
+
+ return u8_result;
+}
+
char *
scm_to_utf8_stringn (SCM str, size_t *lenp)
{
- return scm_to_stringn (str, lenp, "UTF-8", SCM_FAILED_CONVERSION_ERROR);
+ if (scm_i_is_narrow_string (str))
+ return (char *) latin1_to_u8 ((scm_t_uint8 *) scm_i_string_chars (str),
+ scm_i_string_length (str),
+ NULL, lenp);
+ else
+ return (char *) u32_to_u8 ((scm_t_uint32*)scm_i_string_wide_chars (str),
+ scm_i_string_length (str),
+ NULL, lenp);
}
scm_t_wchar *
SCM_VALIDATE_STRING (1, str);
if (scm_i_is_narrow_string (str))
- result = (scm_t_wchar *)
- scm_to_stringn (str, lenp, "UTF-32",
- SCM_FAILED_CONVERSION_ERROR);
+ {
+ scm_t_uint8 *codepoints;
+ size_t i, len;
+
+ codepoints = (scm_t_uint8*) scm_i_string_chars (str);
+ len = scm_i_string_length (str);
+ if (lenp)
+ *lenp = len;
+
+ result = scm_malloc ((len + 1) * sizeof (scm_t_wchar));
+ for (i = 0; i < len; i++)
+ result[i] = codepoints[i];
+ result[len] = 0;
+ }
else
{
size_t len;
void
scm_init_strings ()
{
- scm_nullstr = scm_i_make_string (0, NULL, 1);
+ scm_nullstr = scm_i_make_string (0, NULL, 0);
#include "libguile/strings.x"
}