%compute-applicable-methods in Scheme

[bpt/guile.git] / libguile / srfi-13.c
diff --git a/libguile/srfi-13.c b/libguile/srfi-13.c

index 75feae3..5c30dfe 100644 (file)
--- a/libguile/srfi-13.c
+++ b/libguile/srfi-13.c
@@ -1,6 +1,6 @@
  /* srfi-13.c --- SRFI-13 procedures for Guile
   *
- * Copyright (C) 2001, 2004, 2005, 2006, 2008, 2009, 2010, 2011 Free Software Foundation, Inc.
+ * Copyright (C) 2001, 2004, 2005, 2006, 2008, 2009, 2010, 2011, 2012 Free Software Foundation, Inc.
   *
   * This library is free software; you can redistribute it and/or
   * modify it under the terms of the GNU Lesser General Public License
@@ -384,7 +384,7 @@ SCM_DEFINE (scm_string_join, "string-join", 1, 2, 0,
             "@item infix\n"
             "Insert the separator between list elements.  An empty string\n"
             "will produce an empty list.\n"
-           "@item string-infix\n"
+           "@item strict-infix\n"
             "Like @code{infix}, but will raise an error if given the empty\n"
             "list.\n"
             "@item suffix\n"
@@ -394,91 +394,85 @@ SCM_DEFINE (scm_string_join, "string-join", 1, 2, 0,
             "@end table")
  #define FUNC_NAME s_scm_string_join
  {
-#define GRAM_INFIX        0
-#define GRAM_STRICT_INFIX 1
-#define GRAM_SUFFIX       2
-#define GRAM_PREFIX       3
-  SCM tmp;
-  SCM result;
-  int gram = GRAM_INFIX;
-  size_t del_len = 0;
-  long strings = scm_ilength (ls);
+  SCM append_list = SCM_EOL;
+  long list_len = scm_ilength (ls);
+  size_t delimiter_len = 0;
  
    /* Validate the string list.  */
-  if (strings < 0)
+  if (list_len < 0)
      SCM_WRONG_TYPE_ARG (1, ls);
  
    /* Validate the delimiter and record its length.  */
    if (SCM_UNBNDP (delimiter))
      {
        delimiter = scm_from_locale_string (" ");
-      del_len = 1;
+      delimiter_len = 1;
      }
    else
      {
        SCM_VALIDATE_STRING (2, delimiter);
-      del_len = scm_i_string_length (delimiter);
+      delimiter_len = scm_i_string_length (delimiter);
      }
  
-  /* Validate the grammar symbol and remember the grammar.  */
+  /* Validate the grammar symbol.  */
    if (SCM_UNBNDP (grammar))
-    gram = GRAM_INFIX;
-  else if (scm_is_eq (grammar, scm_sym_infix))
-    gram = GRAM_INFIX;
-  else if (scm_is_eq (grammar, scm_sym_strict_infix))
-    gram = GRAM_STRICT_INFIX;
-  else if (scm_is_eq (grammar, scm_sym_suffix))
-    gram = GRAM_SUFFIX;
-  else if (scm_is_eq (grammar, scm_sym_prefix))
-    gram = GRAM_PREFIX;
-  else
+    grammar = scm_sym_infix;
+  else if (!(scm_is_eq (grammar, scm_sym_infix)
+             || scm_is_eq (grammar, scm_sym_strict_infix)
+             || scm_is_eq (grammar, scm_sym_suffix)
+             || scm_is_eq (grammar, scm_sym_prefix)))
      SCM_WRONG_TYPE_ARG (3, grammar);
  
-  /* Check grammar constraints.  */
-  if (strings == 0 && gram == GRAM_STRICT_INFIX)
-    SCM_MISC_ERROR ("strict-infix grammar requires non-empty list",
-                   SCM_EOL);
+  if (list_len == 0)
+    {
+      if (scm_is_eq (grammar, scm_sym_strict_infix))
+        SCM_MISC_ERROR ("strict-infix grammar requires non-empty list",
+                        SCM_EOL);
+      else
+        /* Handle empty lists specially */
+        append_list = SCM_EOL;
+    }
+  else if (delimiter_len == 0)
+    /* Handle empty delimiters specially */
+    append_list = ls;
+  else
+    {
+      SCM *last_cdr_p = &append_list;
  
-  result = scm_i_make_string (0, NULL, 0);
+#define ADD_TO_APPEND_LIST(x)                   \
+      ((*last_cdr_p = scm_list_1 (x)),          \
+       (last_cdr_p = SCM_CDRLOC (*last_cdr_p)))
  
-  tmp = ls;
-  switch (gram)
-    {
-    case GRAM_INFIX:
-    case GRAM_STRICT_INFIX:
-      while (scm_is_pair (tmp))
-       {
-         result = scm_string_append (scm_list_2 (result, SCM_CAR (tmp)));
-         if (!scm_is_null (SCM_CDR (tmp)) && del_len > 0)
-           result = scm_string_append (scm_list_2 (result, delimiter));
-         tmp = SCM_CDR (tmp);
-       }
-      break;
-    case GRAM_SUFFIX:
-      while (scm_is_pair (tmp))
-       {
-         result = scm_string_append (scm_list_2 (result, SCM_CAR (tmp)));
-         if (del_len > 0)
-           result = scm_string_append (scm_list_2 (result, delimiter));
-         tmp = SCM_CDR (tmp);
-       }
-      break;
-    case GRAM_PREFIX:
-      while (scm_is_pair (tmp))
-       {
-         if (del_len > 0)
-           result = scm_string_append (scm_list_2 (result, delimiter));
-         result = scm_string_append (scm_list_2 (result, SCM_CAR (tmp)));
-         tmp = SCM_CDR (tmp);
-       }
-      break;
+      /* Build a list of strings to pass to 'string-append'.
+         Here we assume that 'ls' has at least one element. */
+
+      /* If using the 'prefix' grammar, start with the delimiter. */
+      if (scm_is_eq (grammar, scm_sym_prefix))
+        ADD_TO_APPEND_LIST (delimiter);
+
+      /* Handle the first element of 'ls' specially, so that in the loop
+         that follows we can unconditionally insert the delimiter before
+         every remaining element. */
+      ADD_TO_APPEND_LIST (SCM_CAR (ls));
+      ls = SCM_CDR (ls);
+
+      /* Insert the delimiter before every remaining element. */
+      while (scm_is_pair (ls))
+        {
+          ADD_TO_APPEND_LIST (delimiter);
+          ADD_TO_APPEND_LIST (SCM_CAR (ls));
+          ls = SCM_CDR (ls);
+        }
+
+      /* If using the 'suffix' grammar, add the delimiter to the end. */
+      if (scm_is_eq (grammar, scm_sym_suffix))
+        ADD_TO_APPEND_LIST (delimiter);
+
+#undef ADD_TO_APPEND_LIST
      }
  
-  return result;
-#undef GRAM_INFIX
-#undef GRAM_STRICT_INFIX
-#undef GRAM_SUFFIX
-#undef GRAM_PREFIX
+  /* Construct the final result. */
+  return scm_string_append (append_list);
  }
  #undef FUNC_NAME
  
@@ -552,10 +546,17 @@ SCM_DEFINE (scm_string_copy_x, "string-copy!", 3, 2, 0,
        SCM_ASSERT_RANGE (3, s, len <= scm_i_string_length (target) - ctstart);
  
        target = scm_i_string_start_writing (target);
-      for (i = 0; i < cend - cstart; i++)
+      if (ctstart < cstart)
+        {
+          for (i = 0; i < len; i++)
+            scm_i_string_set_x (target, ctstart + i,
+                                scm_i_string_ref (s, cstart + i));
+        }
+      else
          {
-          scm_i_string_set_x (target, ctstart + i, 
-                              scm_i_string_ref (s, cstart + i));
+          for (i = len; i--;)
+            scm_i_string_set_x (target, ctstart + i,
+                                scm_i_string_ref (s, cstart + i));
          }
        scm_i_string_stop_writing ();
        scm_remember_upto_here_1 (target);
@@ -719,7 +720,8 @@ SCM_DEFINE (scm_string_trim, "string-trim", 1, 3, 0,
    MY_VALIDATE_SUBSTRING_SPEC (1, s,
                               3, start, cstart,
                               4, end, cend);
-  if (SCM_UNBNDP (char_pred))
+  if (SCM_UNBNDP (char_pred)
+      || scm_is_eq (char_pred, scm_char_set_whitespace))
      {
        while (cstart < cend)
         {
@@ -794,7 +796,8 @@ SCM_DEFINE (scm_string_trim_right, "string-trim-right", 1, 3, 0,
    MY_VALIDATE_SUBSTRING_SPEC (1, s,
                               3, start, cstart,
                               4, end, cend);
-  if (SCM_UNBNDP (char_pred))
+  if (SCM_UNBNDP (char_pred)
+      || scm_is_eq (char_pred, scm_char_set_whitespace))
      {
        while (cstart < cend)
         {
@@ -869,7 +872,8 @@ SCM_DEFINE (scm_string_trim_both, "string-trim-both", 1, 3, 0,
    MY_VALIDATE_SUBSTRING_SPEC (1, s,
                               3, start, cstart,
                               4, end, cend);
-  if (SCM_UNBNDP (char_pred))
+  if (SCM_UNBNDP (char_pred)
+      || scm_is_eq (char_pred, scm_char_set_whitespace))
      {
        while (cstart < cend)
         {
@@ -2990,11 +2994,22 @@ SCM_DEFINE (scm_string_tokenize, "string-tokenize", 1, 3, 0,
  #undef FUNC_NAME
  
  SCM_DEFINE (scm_string_split, "string-split", 2, 0, 0,
-           (SCM str, SCM chr),
+           (SCM str, SCM char_pred),
             "Split the string @var{str} into a list of the substrings delimited\n"
-           "by appearances of the character @var{chr}.  Note that an empty substring\n"
-           "between separator characters will result in an empty string in the\n"
-           "result list.\n"
+            "by appearances of characters that\n"
+            "\n"
+            "@itemize @bullet\n"
+            "@item\n"
+            "equal @var{char_pred}, if it is a character,\n"
+            "\n"
+            "@item\n"
+            "satisfy the predicate @var{char_pred}, if it is a procedure,\n"
+            "\n"
+            "@item\n"
+            "are in the set @var{char_pred}, if it is a character set.\n"
+            "@end itemize\n\n"
+            "Note that an empty substring between separator characters\n"
+            "will result in an empty string in the result list.\n"
             "\n"
             "@lisp\n"
             "(string-split \"root:x:0:0:root:/root:/bin/bash\" #\\:)\n"
@@ -3011,47 +3026,73 @@ SCM_DEFINE (scm_string_split, "string-split", 2, 0, 0,
             "@end lisp")
  #define FUNC_NAME s_scm_string_split
  {
-  long idx, last_idx;
-  int narrow;
    SCM res = SCM_EOL;
  
    SCM_VALIDATE_STRING (1, str);
-  SCM_VALIDATE_CHAR (2, chr);
    
-  /* This is explicit wide/narrow logic (instead of using
-     scm_i_string_ref) is a speed optimization.  */
-  idx = scm_i_string_length (str);
-  narrow = scm_i_is_narrow_string (str);
-  if (narrow)
+  if (SCM_CHARP (char_pred))
      {
-      const char *buf = scm_i_string_chars (str);
-      while (idx >= 0)
+      long idx, last_idx;
+      int narrow;
+
+      /* This is explicit wide/narrow logic (instead of using
+         scm_i_string_ref) is a speed optimization.  */
+      idx = scm_i_string_length (str);
+      narrow = scm_i_is_narrow_string (str);
+      if (narrow)
+        {
+          const char *buf = scm_i_string_chars (str);
+          while (idx >= 0)
+            {
+              last_idx = idx;
+              while (idx > 0 && buf[idx-1] != (char) SCM_CHAR(char_pred))
+                idx--;
+              if (idx >= 0)
+                {
+                  res = scm_cons (scm_i_substring (str, idx, last_idx), res);
+                  idx--;
+                }
+            }
+        }
+      else
          {
-          last_idx = idx;
-          while (idx > 0 && buf[idx-1] != (char) SCM_CHAR(chr))
-            idx--;
-          if (idx >= 0)
+          const scm_t_wchar *buf = scm_i_string_wide_chars (str);
+          while (idx >= 0)
              {
-              res = scm_cons (scm_i_substring (str, idx, last_idx), res);
-              idx--;
+              last_idx = idx;
+              while (idx > 0 && buf[idx-1] != SCM_CHAR(char_pred))
+                idx--;
+              if (idx >= 0)
+                {
+                  res = scm_cons (scm_i_substring (str, idx, last_idx), res);
+                  idx--;
+                }
              }
          }
      }
    else
      {
-      const scm_t_wchar *buf = scm_i_string_wide_chars (str);
-      while (idx >= 0)
+      SCM sidx, slast_idx;
+
+      if (!SCM_CHARSETP (char_pred))
+        SCM_ASSERT (scm_is_true (scm_procedure_p (char_pred)),
+                    char_pred, SCM_ARG2, FUNC_NAME);
+
+      /* Supporting predicates and character sets involves handling SCM
+         values so there is less chance to optimize. */
+      slast_idx = scm_string_length (str);
+      for (;;)
          {
-          last_idx = idx;
-          while (idx > 0 && buf[idx-1] != SCM_CHAR(chr))
-            idx--;
-          if (idx >= 0)
-            {
-              res = scm_cons (scm_i_substring (str, idx, last_idx), res);
-              idx--;
-            }
+          sidx = scm_string_index_right (str, char_pred, SCM_INUM0, slast_idx);
+          if (scm_is_false (sidx))
+            break;
+          res = scm_cons (scm_substring (str, scm_oneplus (sidx), slast_idx), res);
+          slast_idx = sidx;
          }
+
+      res = scm_cons (scm_substring (str, SCM_INUM0, slast_idx), res);
      }
+
    scm_remember_upto_here_1 (str);
    return res;
  }