/* srfi-13.c --- SRFI-13 procedures for Guile
*
- * Copyright (C) 2001, 2004, 2005, 2006, 2008, 2009, 2010, 2011 Free Software Foundation, Inc.
+ * Copyright (C) 2001, 2004, 2005, 2006, 2008, 2009, 2010, 2011, 2012 Free Software Foundation, Inc.
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public License
if (wide)
{
scm_t_wchar *wbuf = NULL;
- res = scm_i_make_wide_string (clen, &wbuf);
+ res = scm_i_make_wide_string (clen, &wbuf, 0);
memcpy (wbuf, buf, clen * sizeof (scm_t_wchar));
free (buf);
}
else
{
char *nbuf = NULL;
- res = scm_i_make_string (clen, &nbuf);
+ res = scm_i_make_string (clen, &nbuf, 0);
for (i = 0; i < clen; i ++)
nbuf[i] = (unsigned char) buf[i];
free (buf);
if (i < 0)
SCM_WRONG_TYPE_ARG (1, chrs);
- result = scm_i_make_string (i, &data);
+ result = scm_i_make_string (i, &data, 0);
{
SCM rest;
SCM_DEFINE (scm_string_join, "string-join", 1, 2, 0,
(SCM ls, SCM delimiter, SCM grammar),
"Append the string in the string list @var{ls}, using the string\n"
- "@var{delim} as a delimiter between the elements of @var{ls}.\n"
+ "@var{delimiter} as a delimiter between the elements of @var{ls}.\n"
"@var{grammar} is a symbol which specifies how the delimiter is\n"
"placed between the strings, and defaults to the symbol\n"
"@code{infix}.\n"
"@item infix\n"
"Insert the separator between list elements. An empty string\n"
"will produce an empty list.\n"
- "@item string-infix\n"
+ "@item strict-infix\n"
"Like @code{infix}, but will raise an error if given the empty\n"
"list.\n"
"@item suffix\n"
"@end table")
#define FUNC_NAME s_scm_string_join
{
-#define GRAM_INFIX 0
-#define GRAM_STRICT_INFIX 1
-#define GRAM_SUFFIX 2
-#define GRAM_PREFIX 3
- SCM tmp;
- SCM result;
- int gram = GRAM_INFIX;
- size_t del_len = 0;
- long strings = scm_ilength (ls);
+ SCM append_list = SCM_EOL;
+ long list_len = scm_ilength (ls);
+ size_t delimiter_len = 0;
/* Validate the string list. */
- if (strings < 0)
+ if (list_len < 0)
SCM_WRONG_TYPE_ARG (1, ls);
/* Validate the delimiter and record its length. */
if (SCM_UNBNDP (delimiter))
{
delimiter = scm_from_locale_string (" ");
- del_len = 1;
+ delimiter_len = 1;
}
else
{
SCM_VALIDATE_STRING (2, delimiter);
- del_len = scm_i_string_length (delimiter);
+ delimiter_len = scm_i_string_length (delimiter);
}
- /* Validate the grammar symbol and remember the grammar. */
+ /* Validate the grammar symbol. */
if (SCM_UNBNDP (grammar))
- gram = GRAM_INFIX;
- else if (scm_is_eq (grammar, scm_sym_infix))
- gram = GRAM_INFIX;
- else if (scm_is_eq (grammar, scm_sym_strict_infix))
- gram = GRAM_STRICT_INFIX;
- else if (scm_is_eq (grammar, scm_sym_suffix))
- gram = GRAM_SUFFIX;
- else if (scm_is_eq (grammar, scm_sym_prefix))
- gram = GRAM_PREFIX;
- else
+ grammar = scm_sym_infix;
+ else if (!(scm_is_eq (grammar, scm_sym_infix)
+ || scm_is_eq (grammar, scm_sym_strict_infix)
+ || scm_is_eq (grammar, scm_sym_suffix)
+ || scm_is_eq (grammar, scm_sym_prefix)))
SCM_WRONG_TYPE_ARG (3, grammar);
- /* Check grammar constraints. */
- if (strings == 0 && gram == GRAM_STRICT_INFIX)
- SCM_MISC_ERROR ("strict-infix grammar requires non-empty list",
- SCM_EOL);
+ if (list_len == 0)
+ {
+ if (scm_is_eq (grammar, scm_sym_strict_infix))
+ SCM_MISC_ERROR ("strict-infix grammar requires non-empty list",
+ SCM_EOL);
+ else
+ /* Handle empty lists specially */
+ append_list = SCM_EOL;
+ }
+ else if (delimiter_len == 0)
+ /* Handle empty delimiters specially */
+ append_list = ls;
+ else
+ {
+ SCM *last_cdr_p = &append_list;
- result = scm_i_make_string (0, NULL);
+#define ADD_TO_APPEND_LIST(x) \
+ ((*last_cdr_p = scm_list_1 (x)), \
+ (last_cdr_p = SCM_CDRLOC (*last_cdr_p)))
- tmp = ls;
- switch (gram)
- {
- case GRAM_INFIX:
- case GRAM_STRICT_INFIX:
- while (scm_is_pair (tmp))
- {
- result = scm_string_append (scm_list_2 (result, SCM_CAR (tmp)));
- if (!scm_is_null (SCM_CDR (tmp)) && del_len > 0)
- result = scm_string_append (scm_list_2 (result, delimiter));
- tmp = SCM_CDR (tmp);
- }
- break;
- case GRAM_SUFFIX:
- while (scm_is_pair (tmp))
- {
- result = scm_string_append (scm_list_2 (result, SCM_CAR (tmp)));
- if (del_len > 0)
- result = scm_string_append (scm_list_2 (result, delimiter));
- tmp = SCM_CDR (tmp);
- }
- break;
- case GRAM_PREFIX:
- while (scm_is_pair (tmp))
- {
- if (del_len > 0)
- result = scm_string_append (scm_list_2 (result, delimiter));
- result = scm_string_append (scm_list_2 (result, SCM_CAR (tmp)));
- tmp = SCM_CDR (tmp);
- }
- break;
+ /* Build a list of strings to pass to 'string-append'.
+ Here we assume that 'ls' has at least one element. */
+
+ /* If using the 'prefix' grammar, start with the delimiter. */
+ if (scm_is_eq (grammar, scm_sym_prefix))
+ ADD_TO_APPEND_LIST (delimiter);
+
+ /* Handle the first element of 'ls' specially, so that in the loop
+ that follows we can unconditionally insert the delimiter before
+ every remaining element. */
+ ADD_TO_APPEND_LIST (SCM_CAR (ls));
+ ls = SCM_CDR (ls);
+
+ /* Insert the delimiter before every remaining element. */
+ while (scm_is_pair (ls))
+ {
+ ADD_TO_APPEND_LIST (delimiter);
+ ADD_TO_APPEND_LIST (SCM_CAR (ls));
+ ls = SCM_CDR (ls);
+ }
+
+ /* If using the 'suffix' grammar, add the delimiter to the end. */
+ if (scm_is_eq (grammar, scm_sym_suffix))
+ ADD_TO_APPEND_LIST (delimiter);
+
+#undef ADD_TO_APPEND_LIST
}
- return result;
-#undef GRAM_INFIX
-#undef GRAM_STRICT_INFIX
-#undef GRAM_SUFFIX
-#undef GRAM_PREFIX
+ /* Construct the final result. */
+ return scm_string_append (append_list);
}
#undef FUNC_NAME
MY_VALIDATE_SUBSTRING_SPEC (3, s,
4, start, cstart,
5, end, cend);
- len = cend - cstart;
- SCM_ASSERT_RANGE (3, s, len <= scm_i_string_length (target) - ctstart);
-
- target = scm_i_string_start_writing (target);
- for (i = 0; i < cend - cstart; i++)
+ if (cstart < cend)
{
- scm_i_string_set_x (target, ctstart + i,
- scm_i_string_ref (s, cstart + i));
+ len = cend - cstart;
+ SCM_ASSERT_RANGE (3, s, len <= scm_i_string_length (target) - ctstart);
+
+ target = scm_i_string_start_writing (target);
+ for (i = 0; i < cend - cstart; i++)
+ {
+ scm_i_string_set_x (target, ctstart + i,
+ scm_i_string_ref (s, cstart + i));
+ }
+ scm_i_string_stop_writing ();
+ scm_remember_upto_here_1 (target);
}
- scm_i_string_stop_writing ();
- scm_remember_upto_here_1 (target);
return SCM_UNSPECIFIED;
}
MY_VALIDATE_SUBSTRING_SPEC (1, s,
3, start, cstart,
4, end, cend);
- if (SCM_UNBNDP (char_pred))
+ if (SCM_UNBNDP (char_pred)
+ || scm_is_eq (char_pred, scm_char_set_whitespace))
{
while (cstart < cend)
{
MY_VALIDATE_SUBSTRING_SPEC (1, s,
3, start, cstart,
4, end, cend);
- if (SCM_UNBNDP (char_pred))
+ if (SCM_UNBNDP (char_pred)
+ || scm_is_eq (char_pred, scm_char_set_whitespace))
{
while (cstart < cend)
{
MY_VALIDATE_SUBSTRING_SPEC (1, s,
3, start, cstart,
4, end, cend);
- if (SCM_UNBNDP (char_pred))
+ if (SCM_UNBNDP (char_pred)
+ || scm_is_eq (char_pred, scm_char_set_whitespace))
{
while (cstart < cend)
{
4, end, cend);
SCM_VALIDATE_CHAR (2, chr);
-
- str = scm_i_string_start_writing (str);
- for (k = cstart; k < cend; k++)
- scm_i_string_set_x (str, k, SCM_CHAR (chr));
- scm_i_string_stop_writing ();
+ if (cstart < cend)
+ {
+ str = scm_i_string_start_writing (str);
+ for (k = cstart; k < cend; k++)
+ scm_i_string_set_x (str, k, SCM_CHAR (chr));
+ scm_i_string_stop_writing ();
+ }
return SCM_UNSPECIFIED;
}
len1 = scm_i_string_length (s1);
len2 = scm_i_string_length (s2);
- if (SCM_LIKELY (len1 == len2))
+ if (len1 != len2)
+ return SCM_BOOL_F;
+ else
{
if (!scm_i_is_narrow_string (s1))
len1 *= 4;
SCM_DEFINE (scm_substring_hash, "string-hash", 1, 3, 0,
(SCM s, SCM bound, SCM start, SCM end),
- "Compute a hash value for @var{S}. the optional argument "
+ "Compute a hash value for @var{s}. the optional argument "
"@var{bound} is a non-negative exact "
"integer specifying the range of the hash function. "
"A positive value restricts the return value to the "
SCM_DEFINE (scm_substring_hash_ci, "string-hash-ci", 1, 3, 0,
(SCM s, SCM bound, SCM start, SCM end),
- "Compute a hash value for @var{S}. the optional argument "
+ "Compute a hash value for @var{s}. the optional argument "
"@var{bound} is a non-negative exact "
"integer specifying the range of the hash function. "
"A positive value restricts the return value to the "
{
size_t k;
- v = scm_i_string_start_writing (v);
- for (k = start; k < end; ++k)
- scm_i_string_set_x (v, k, uc_toupper (scm_i_string_ref (v, k)));
- scm_i_string_stop_writing ();
- scm_remember_upto_here_1 (v);
+ if (start < end)
+ {
+ v = scm_i_string_start_writing (v);
+ for (k = start; k < end; ++k)
+ scm_i_string_set_x (v, k, uc_toupper (scm_i_string_ref (v, k)));
+ scm_i_string_stop_writing ();
+ scm_remember_upto_here_1 (v);
+ }
return v;
}
{
size_t k;
- v = scm_i_string_start_writing (v);
- for (k = start; k < end; ++k)
- scm_i_string_set_x (v, k, uc_tolower (scm_i_string_ref (v, k)));
- scm_i_string_stop_writing ();
- scm_remember_upto_here_1 (v);
+ if (start < end)
+ {
+ v = scm_i_string_start_writing (v);
+ for (k = start; k < end; ++k)
+ scm_i_string_set_x (v, k, uc_tolower (scm_i_string_ref (v, k)));
+ scm_i_string_stop_writing ();
+ scm_remember_upto_here_1 (v);
+ }
return v;
}
size_t i;
int in_word = 0;
- str = scm_i_string_start_writing (str);
- for(i = start; i < end; i++)
+ if (start < end)
{
- ch = SCM_MAKE_CHAR (scm_i_string_ref (str, i));
- if (scm_is_true (scm_char_alphabetic_p (ch)))
- {
- if (!in_word)
- {
- scm_i_string_set_x (str, i, uc_totitle (SCM_CHAR (ch)));
- in_word = 1;
- }
- else
- {
- scm_i_string_set_x (str, i, uc_tolower (SCM_CHAR (ch)));
- }
- }
- else
- in_word = 0;
+ str = scm_i_string_start_writing (str);
+ for(i = start; i < end; i++)
+ {
+ ch = SCM_MAKE_CHAR (scm_i_string_ref (str, i));
+ if (scm_is_true (scm_char_alphabetic_p (ch)))
+ {
+ if (!in_word)
+ {
+ scm_i_string_set_x (str, i, uc_totitle (SCM_CHAR (ch)));
+ in_word = 1;
+ }
+ else
+ {
+ scm_i_string_set_x (str, i, uc_tolower (SCM_CHAR (ch)));
+ }
+ }
+ else
+ in_word = 0;
+ }
+ scm_i_string_stop_writing ();
+ scm_remember_upto_here_1 (str);
}
- scm_i_string_stop_writing ();
- scm_remember_upto_here_1 (str);
return str;
}
static void
string_reverse_x (SCM str, size_t cstart, size_t cend)
{
- SCM tmp;
-
- str = scm_i_string_start_writing (str);
- if (cend > 0)
+ if (cstart < cend)
{
- cend--;
- while (cstart < cend)
- {
- tmp = SCM_MAKE_CHAR (scm_i_string_ref (str, cstart));
- scm_i_string_set_x (str, cstart, scm_i_string_ref (str, cend));
- scm_i_string_set_x (str, cend, SCM_CHAR (tmp));
- cstart++;
- cend--;
- }
+ str = scm_i_string_start_writing (str);
+ if (cend > 0)
+ {
+ SCM tmp;
+
+ cend--;
+ while (cstart < cend)
+ {
+ tmp = SCM_MAKE_CHAR (scm_i_string_ref (str, cstart));
+ scm_i_string_set_x (str, cstart, scm_i_string_ref (str, cend));
+ scm_i_string_set_x (str, cend, SCM_CHAR (tmp));
+ cstart++;
+ cend--;
+ }
+ }
+ scm_i_string_stop_writing ();
}
- scm_i_string_stop_writing ();
}
MY_VALIDATE_SUBSTRING_SPEC (2, s,
3, start, cstart,
4, end, cend);
- result = scm_i_make_string (cend - cstart, NULL);
+ result = scm_i_make_string (cend - cstart, NULL, 0);
p = 0;
while (cstart < cend)
{
ans = base;
}
else
- ans = scm_i_make_string (0, NULL);
+ ans = scm_i_make_string (0, NULL, 0);
if (!SCM_UNBNDP (make_final))
SCM_VALIDATE_PROC (6, make_final);
SCM ch = scm_call_1 (f, seed);
if (!SCM_CHARP (ch))
SCM_MISC_ERROR ("procedure ~S returned non-char", scm_list_1 (f));
- str = scm_i_make_string (1, NULL);
+ str = scm_i_make_string (1, NULL, 0);
str = scm_i_string_start_writing (str);
scm_i_string_set_x (str, i, SCM_CHAR (ch));
scm_i_string_stop_writing ();
ans = base;
}
else
- ans = scm_i_make_string (0, NULL);
+ ans = scm_i_make_string (0, NULL, 0);
if (!SCM_UNBNDP (make_final))
SCM_VALIDATE_PROC (6, make_final);
SCM ch = scm_call_1 (f, seed);
if (!SCM_CHARP (ch))
SCM_MISC_ERROR ("procedure ~S returned non-char", scm_list_1 (f));
- str = scm_i_make_string (1, NULL);
+ str = scm_i_make_string (1, NULL, 0);
str = scm_i_string_start_writing (str);
scm_i_string_set_x (str, i, SCM_CHAR (ch));
scm_i_string_stop_writing ();
if (cstart == cend && cfrom != cto)
SCM_MISC_ERROR ("start and end indices must not be equal", SCM_EOL);
- result = scm_i_make_string (cto - cfrom, NULL);
+ result = scm_i_make_string (cto - cfrom, NULL, 0);
result = scm_i_string_start_writing (result);
p = 0;
csto = csfrom + (cend - cstart);
else
csto = scm_to_int (sto);
- if (cstart == cend && csfrom != csto)
- SCM_MISC_ERROR ("start and end indices must not be equal", SCM_EOL);
- SCM_ASSERT_RANGE (1, tstart,
- ctstart + (csto - csfrom) <= scm_i_string_length (target));
-
- p = 0;
- target = scm_i_string_start_writing (target);
- while (csfrom < csto)
+ if (csfrom < csto)
{
- size_t t = ((csfrom < 0) ? -csfrom : csfrom) % (cend - cstart);
- if (csfrom < 0)
- scm_i_string_set_x (target, p + cstart, scm_i_string_ref (s, (cend - cstart) - t));
- else
- scm_i_string_set_x (target, p + cstart, scm_i_string_ref (s, t));
- csfrom++;
- p++;
- }
- scm_i_string_stop_writing ();
+ if (cstart == cend)
+ SCM_MISC_ERROR ("start and end indices must not be equal", SCM_EOL);
+ SCM_ASSERT_RANGE (1, tstart,
+ ctstart + (csto - csfrom) <= scm_i_string_length (target));
- scm_remember_upto_here_2 (target, s);
+ p = 0;
+ target = scm_i_string_start_writing (target);
+ while (csfrom < csto)
+ {
+ size_t t = ((csfrom < 0) ? -csfrom : csfrom) % (cend - cstart);
+ if (csfrom < 0)
+ scm_i_string_set_x (target, p + cstart, scm_i_string_ref (s, (cend - cstart) - t));
+ else
+ scm_i_string_set_x (target, p + cstart, scm_i_string_ref (s, t));
+ csfrom++;
+ p++;
+ }
+ scm_i_string_stop_writing ();
+
+ scm_remember_upto_here_2 (target, s);
+ }
return SCM_UNSPECIFIED;
}
#undef FUNC_NAME
#undef FUNC_NAME
SCM_DEFINE (scm_string_split, "string-split", 2, 0, 0,
- (SCM str, SCM chr),
+ (SCM str, SCM char_pred),
"Split the string @var{str} into a list of the substrings delimited\n"
- "by appearances of the character @var{chr}. Note that an empty substring\n"
- "between separator characters will result in an empty string in the\n"
- "result list.\n"
+ "by appearances of characters that\n"
+ "\n"
+ "@itemize @bullet\n"
+ "@item\n"
+ "equal @var{char_pred}, if it is a character,\n"
+ "\n"
+ "@item\n"
+ "satisfy the predicate @var{char_pred}, if it is a procedure,\n"
+ "\n"
+ "@item\n"
+ "are in the set @var{char_pred}, if it is a character set.\n"
+ "@end itemize\n\n"
+ "Note that an empty substring between separator characters\n"
+ "will result in an empty string in the result list.\n"
"\n"
"@lisp\n"
"(string-split \"root:x:0:0:root:/root:/bin/bash\" #\\:)\n"
"@end lisp")
#define FUNC_NAME s_scm_string_split
{
- long idx, last_idx;
- int narrow;
SCM res = SCM_EOL;
SCM_VALIDATE_STRING (1, str);
- SCM_VALIDATE_CHAR (2, chr);
- /* This is explicit wide/narrow logic (instead of using
- scm_i_string_ref) is a speed optimization. */
- idx = scm_i_string_length (str);
- narrow = scm_i_is_narrow_string (str);
- if (narrow)
+ if (SCM_CHARP (char_pred))
{
- const char *buf = scm_i_string_chars (str);
- while (idx >= 0)
+ long idx, last_idx;
+ int narrow;
+
+ /* This is explicit wide/narrow logic (instead of using
+ scm_i_string_ref) is a speed optimization. */
+ idx = scm_i_string_length (str);
+ narrow = scm_i_is_narrow_string (str);
+ if (narrow)
+ {
+ const char *buf = scm_i_string_chars (str);
+ while (idx >= 0)
+ {
+ last_idx = idx;
+ while (idx > 0 && buf[idx-1] != (char) SCM_CHAR(char_pred))
+ idx--;
+ if (idx >= 0)
+ {
+ res = scm_cons (scm_i_substring (str, idx, last_idx), res);
+ idx--;
+ }
+ }
+ }
+ else
{
- last_idx = idx;
- while (idx > 0 && buf[idx-1] != (char) SCM_CHAR(chr))
- idx--;
- if (idx >= 0)
+ const scm_t_wchar *buf = scm_i_string_wide_chars (str);
+ while (idx >= 0)
{
- res = scm_cons (scm_i_substring (str, idx, last_idx), res);
- idx--;
+ last_idx = idx;
+ while (idx > 0 && buf[idx-1] != SCM_CHAR(char_pred))
+ idx--;
+ if (idx >= 0)
+ {
+ res = scm_cons (scm_i_substring (str, idx, last_idx), res);
+ idx--;
+ }
}
}
}
else
{
- const scm_t_wchar *buf = scm_i_string_wide_chars (str);
- while (idx >= 0)
+ SCM sidx, slast_idx;
+
+ if (!SCM_CHARSETP (char_pred))
+ SCM_ASSERT (scm_is_true (scm_procedure_p (char_pred)),
+ char_pred, SCM_ARG2, FUNC_NAME);
+
+ /* Supporting predicates and character sets involves handling SCM
+ values so there is less chance to optimize. */
+ slast_idx = scm_string_length (str);
+ for (;;)
{
- last_idx = idx;
- while (idx > 0 && buf[idx-1] != SCM_CHAR(chr))
- idx--;
- if (idx >= 0)
- {
- res = scm_cons (scm_i_substring (str, idx, last_idx), res);
- idx--;
- }
+ sidx = scm_string_index_right (str, char_pred, SCM_INUM0, slast_idx);
+ if (scm_is_false (sidx))
+ break;
+ res = scm_cons (scm_substring (str, scm_oneplus (sidx), slast_idx), res);
+ slast_idx = sidx;
}
+
+ res = scm_cons (scm_substring (str, SCM_INUM0, slast_idx), res);
}
+
scm_remember_upto_here_1 (str);
return res;
}
else
{
size_t dst = 0;
- result = scm_i_make_string (count, NULL);
+ result = scm_i_make_string (count, NULL, 0);
result = scm_i_string_start_writing (result);
/* decrement "count" in this loop as well as using idx, so that if
{
int i = 0;
/* new string for retained portion */
- result = scm_i_make_string (count, NULL);
+ result = scm_i_make_string (count, NULL, 0);
result = scm_i_string_start_writing (result);
/* decrement "count" in this loop as well as using idx, so that if
another thread is simultaneously changing "s" there's no chance
{
size_t i = 0;
/* new string for retained portion */
- result = scm_i_make_string (count, NULL);
+ result = scm_i_make_string (count, NULL, 0);
result = scm_i_string_start_writing (result);
/* decrement "count" in this loop as well as using idx, so that if