"@item infix\n"
"Insert the separator between list elements. An empty string\n"
"will produce an empty list.\n"
- "@item string-infix\n"
+ "@item strict-infix\n"
"Like @code{infix}, but will raise an error if given the empty\n"
"list.\n"
"@item suffix\n"
"@end table")
#define FUNC_NAME s_scm_string_join
{
-#define GRAM_INFIX 0
-#define GRAM_STRICT_INFIX 1
-#define GRAM_SUFFIX 2
-#define GRAM_PREFIX 3
- SCM tmp;
- SCM result;
- int gram = GRAM_INFIX;
- size_t del_len = 0;
- long strings = scm_ilength (ls);
+ SCM append_list = SCM_EOL;
+ long list_len = scm_ilength (ls);
+ size_t delimiter_len = 0;
/* Validate the string list. */
- if (strings < 0)
+ if (list_len < 0)
SCM_WRONG_TYPE_ARG (1, ls);
/* Validate the delimiter and record its length. */
if (SCM_UNBNDP (delimiter))
{
delimiter = scm_from_locale_string (" ");
- del_len = 1;
+ delimiter_len = 1;
}
else
{
SCM_VALIDATE_STRING (2, delimiter);
- del_len = scm_i_string_length (delimiter);
+ delimiter_len = scm_i_string_length (delimiter);
}
- /* Validate the grammar symbol and remember the grammar. */
+ /* Validate the grammar symbol. */
if (SCM_UNBNDP (grammar))
- gram = GRAM_INFIX;
- else if (scm_is_eq (grammar, scm_sym_infix))
- gram = GRAM_INFIX;
- else if (scm_is_eq (grammar, scm_sym_strict_infix))
- gram = GRAM_STRICT_INFIX;
- else if (scm_is_eq (grammar, scm_sym_suffix))
- gram = GRAM_SUFFIX;
- else if (scm_is_eq (grammar, scm_sym_prefix))
- gram = GRAM_PREFIX;
- else
+ grammar = scm_sym_infix;
+ else if (!(scm_is_eq (grammar, scm_sym_infix)
+ || scm_is_eq (grammar, scm_sym_strict_infix)
+ || scm_is_eq (grammar, scm_sym_suffix)
+ || scm_is_eq (grammar, scm_sym_prefix)))
SCM_WRONG_TYPE_ARG (3, grammar);
- /* Check grammar constraints. */
- if (strings == 0 && gram == GRAM_STRICT_INFIX)
- SCM_MISC_ERROR ("strict-infix grammar requires non-empty list",
- SCM_EOL);
+ if (list_len == 0)
+ {
+ if (scm_is_eq (grammar, scm_sym_strict_infix))
+ SCM_MISC_ERROR ("strict-infix grammar requires non-empty list",
+ SCM_EOL);
+ else
+ /* Handle empty lists specially */
+ append_list = SCM_EOL;
+ }
+ else if (delimiter_len == 0)
+ /* Handle empty delimiters specially */
+ append_list = ls;
+ else
+ {
+ SCM *last_cdr_p = &append_list;
- result = scm_i_make_string (0, NULL, 0);
+#define ADD_TO_APPEND_LIST(x) \
+ ((*last_cdr_p = scm_list_1 (x)), \
+ (last_cdr_p = SCM_CDRLOC (*last_cdr_p)))
- tmp = ls;
- switch (gram)
- {
- case GRAM_INFIX:
- case GRAM_STRICT_INFIX:
- while (scm_is_pair (tmp))
- {
- result = scm_string_append (scm_list_2 (result, SCM_CAR (tmp)));
- if (!scm_is_null (SCM_CDR (tmp)) && del_len > 0)
- result = scm_string_append (scm_list_2 (result, delimiter));
- tmp = SCM_CDR (tmp);
- }
- break;
- case GRAM_SUFFIX:
- while (scm_is_pair (tmp))
- {
- result = scm_string_append (scm_list_2 (result, SCM_CAR (tmp)));
- if (del_len > 0)
- result = scm_string_append (scm_list_2 (result, delimiter));
- tmp = SCM_CDR (tmp);
- }
- break;
- case GRAM_PREFIX:
- while (scm_is_pair (tmp))
- {
- if (del_len > 0)
- result = scm_string_append (scm_list_2 (result, delimiter));
- result = scm_string_append (scm_list_2 (result, SCM_CAR (tmp)));
- tmp = SCM_CDR (tmp);
- }
- break;
+ /* Build a list of strings to pass to 'string-append'.
+ Here we assume that 'ls' has at least one element. */
+
+ /* If using the 'prefix' grammar, start with the delimiter. */
+ if (scm_is_eq (grammar, scm_sym_prefix))
+ ADD_TO_APPEND_LIST (delimiter);
+
+ /* Handle the first element of 'ls' specially, so that in the loop
+ that follows we can unconditionally insert the delimiter before
+ every remaining element. */
+ ADD_TO_APPEND_LIST (SCM_CAR (ls));
+ ls = SCM_CDR (ls);
+
+ /* Insert the delimiter before every remaining element. */
+ while (scm_is_pair (ls))
+ {
+ ADD_TO_APPEND_LIST (delimiter);
+ ADD_TO_APPEND_LIST (SCM_CAR (ls));
+ ls = SCM_CDR (ls);
+ }
+
+ /* If using the 'suffix' grammar, add the delimiter to the end. */
+ if (scm_is_eq (grammar, scm_sym_suffix))
+ ADD_TO_APPEND_LIST (delimiter);
+
+#undef ADD_TO_APPEND_LIST
}
- return result;
-#undef GRAM_INFIX
-#undef GRAM_STRICT_INFIX
-#undef GRAM_SUFFIX
-#undef GRAM_PREFIX
+ /* Construct the final result. */
+ return scm_string_append (append_list);
}
#undef FUNC_NAME
SCM_ASSERT_RANGE (3, s, len <= scm_i_string_length (target) - ctstart);
target = scm_i_string_start_writing (target);
- for (i = 0; i < cend - cstart; i++)
+ if (ctstart < cstart)
+ {
+ for (i = 0; i < len; i++)
+ scm_i_string_set_x (target, ctstart + i,
+ scm_i_string_ref (s, cstart + i));
+ }
+ else
{
- scm_i_string_set_x (target, ctstart + i,
- scm_i_string_ref (s, cstart + i));
+ for (i = len; i--;)
+ scm_i_string_set_x (target, ctstart + i,
+ scm_i_string_ref (s, cstart + i));
}
scm_i_string_stop_writing ();
scm_remember_upto_here_1 (target);
#undef FUNC_NAME
SCM_DEFINE (scm_string_split, "string-split", 2, 0, 0,
- (SCM str, SCM chr),
+ (SCM str, SCM char_pred),
"Split the string @var{str} into a list of the substrings delimited\n"
- "by appearances of the character @var{chr}. Note that an empty substring\n"
- "between separator characters will result in an empty string in the\n"
- "result list.\n"
+ "by appearances of characters that\n"
+ "\n"
+ "@itemize @bullet\n"
+ "@item\n"
+ "equal @var{char_pred}, if it is a character,\n"
+ "\n"
+ "@item\n"
+ "satisfy the predicate @var{char_pred}, if it is a procedure,\n"
+ "\n"
+ "@item\n"
+ "are in the set @var{char_pred}, if it is a character set.\n"
+ "@end itemize\n\n"
+ "Note that an empty substring between separator characters\n"
+ "will result in an empty string in the result list.\n"
"\n"
"@lisp\n"
"(string-split \"root:x:0:0:root:/root:/bin/bash\" #\\:)\n"
"@end lisp")
#define FUNC_NAME s_scm_string_split
{
- long idx, last_idx;
- int narrow;
SCM res = SCM_EOL;
SCM_VALIDATE_STRING (1, str);
- SCM_VALIDATE_CHAR (2, chr);
- /* This is explicit wide/narrow logic (instead of using
- scm_i_string_ref) is a speed optimization. */
- idx = scm_i_string_length (str);
- narrow = scm_i_is_narrow_string (str);
- if (narrow)
+ if (SCM_CHARP (char_pred))
{
- const char *buf = scm_i_string_chars (str);
- while (idx >= 0)
+ long idx, last_idx;
+ int narrow;
+
+ /* This is explicit wide/narrow logic (instead of using
+ scm_i_string_ref) is a speed optimization. */
+ idx = scm_i_string_length (str);
+ narrow = scm_i_is_narrow_string (str);
+ if (narrow)
+ {
+ const char *buf = scm_i_string_chars (str);
+ while (idx >= 0)
+ {
+ last_idx = idx;
+ while (idx > 0 && buf[idx-1] != (char) SCM_CHAR(char_pred))
+ idx--;
+ if (idx >= 0)
+ {
+ res = scm_cons (scm_i_substring (str, idx, last_idx), res);
+ idx--;
+ }
+ }
+ }
+ else
{
- last_idx = idx;
- while (idx > 0 && buf[idx-1] != (char) SCM_CHAR(chr))
- idx--;
- if (idx >= 0)
+ const scm_t_wchar *buf = scm_i_string_wide_chars (str);
+ while (idx >= 0)
{
- res = scm_cons (scm_i_substring (str, idx, last_idx), res);
- idx--;
+ last_idx = idx;
+ while (idx > 0 && buf[idx-1] != SCM_CHAR(char_pred))
+ idx--;
+ if (idx >= 0)
+ {
+ res = scm_cons (scm_i_substring (str, idx, last_idx), res);
+ idx--;
+ }
}
}
}
else
{
- const scm_t_wchar *buf = scm_i_string_wide_chars (str);
- while (idx >= 0)
+ SCM sidx, slast_idx;
+
+ if (!SCM_CHARSETP (char_pred))
+ SCM_ASSERT (scm_is_true (scm_procedure_p (char_pred)),
+ char_pred, SCM_ARG2, FUNC_NAME);
+
+ /* Supporting predicates and character sets involves handling SCM
+ values so there is less chance to optimize. */
+ slast_idx = scm_string_length (str);
+ for (;;)
{
- last_idx = idx;
- while (idx > 0 && buf[idx-1] != SCM_CHAR(chr))
- idx--;
- if (idx >= 0)
- {
- res = scm_cons (scm_i_substring (str, idx, last_idx), res);
- idx--;
- }
+ sidx = scm_string_index_right (str, char_pred, SCM_INUM0, slast_idx);
+ if (scm_is_false (sidx))
+ break;
+ res = scm_cons (scm_substring (str, scm_oneplus (sidx), slast_idx), res);
+ slast_idx = sidx;
}
+
+ res = scm_cons (scm_substring (str, SCM_INUM0, slast_idx), res);
}
+
scm_remember_upto_here_1 (str);
return res;
}