Merge remote-tracking branch 'origin/stable-2.0'

[bpt/guile.git] / libguile / strings.c
diff --git a/libguile/strings.c b/libguile/strings.c

index 5130cb3..23a1a70 100644 (file)
--- a/libguile/strings.c
+++ b/libguile/strings.c
@@ -1,4 +1,4 @@
-/* Copyright (C) 1995,1996,1998,2000,2001, 2004, 2006, 2008, 2009, 2010, 2011, 2012 Free Software Foundation, Inc.
+/* Copyright (C) 1995,1996,1998,2000,2001, 2004, 2006, 2008, 2009, 2010, 2011, 2012, 2013 Free Software Foundation, Inc.
   * 
   * This library is free software; you can redistribute it and/or
   * modify it under the terms of the GNU Lesser General Public License
@@ -1401,7 +1401,8 @@ SCM_DEFINE (scm_string_append, "string-append", 0, 0, 1,
  #define FUNC_NAME s_scm_string_append
  {
    SCM res;
-  size_t len = 0;
+  size_t total = 0;
+  size_t len;
    int wide = 0;
    SCM l, s;
    size_t i;
@@ -1416,15 +1417,18 @@ SCM_DEFINE (scm_string_append, "string-append", 0, 0, 1,
      {
        s = SCM_CAR (l);
        SCM_VALIDATE_STRING (SCM_ARGn, s);
-      len += scm_i_string_length (s);
+      len = scm_i_string_length (s);
+      if (((size_t) -1) - total < len)
+        scm_num_overflow (s_scm_string_append);
+      total += len;
        if (!scm_i_is_narrow_string (s))
          wide = 1;
      }
    data.narrow = NULL;
    if (!wide)
-    res = scm_i_make_string (len, &data.narrow, 0);
+    res = scm_i_make_string (total, &data.narrow, 0);
    else
-    res = scm_i_make_wide_string (len, &data.wide, 0);
+    res = scm_i_make_wide_string (total, &data.wide, 0);
  
    for (l = args; !scm_is_null (l); l = SCM_CDR (l))
      {
@@ -1432,6 +1436,8 @@ SCM_DEFINE (scm_string_append, "string-append", 0, 0, 1,
        s = SCM_CAR (l);
        SCM_VALIDATE_STRING (SCM_ARGn, s);
        len = scm_i_string_length (s);
+      if (len > total)
+        SCM_MISC_ERROR ("list changed during string-append", SCM_EOL);
        if (!wide)
          {
            memcpy (data.narrow, scm_i_string_chars (s), len);
@@ -1441,16 +1447,20 @@ SCM_DEFINE (scm_string_append, "string-append", 0, 0, 1,
          {
            if (scm_i_is_narrow_string (s))
              {
-              for (i = 0; i < scm_i_string_length (s); i++)
-                data.wide[i] = (unsigned char) scm_i_string_chars (s)[i];
+              const char *src = scm_i_string_chars (s);
+              for (i = 0; i < len; i++)
+                data.wide[i] = (unsigned char) src[i];
              }
            else
              u32_cpy ((scm_t_uint32 *) data.wide,
                       (scm_t_uint32 *) scm_i_string_wide_chars (s), len);
            data.wide += len;
          }
+      total -= len;
        scm_remember_upto_here_1 (s);
      }
+  if (total != 0)
+    SCM_MISC_ERROR ("list changed during string-append", SCM_EOL);
    return res;
  }
  #undef FUNC_NAME
@@ -1524,9 +1534,10 @@ scm_from_stringn (const char *str, size_t len, const char *encoding,
    if (len == (size_t) -1)
      len = strlen (str);
  
-  if (encoding == NULL || len == 0)
+  if (strcmp (encoding, "ISO-8859-1") == 0 || len == 0)
      return scm_from_latin1_stringn (str, len);
-  else if (strcmp (encoding, "UTF-8") == 0)
+  else if (strcmp (encoding, "UTF-8") == 0
+           && handler == SCM_FAILED_CONVERSION_ERROR)
      return scm_from_utf8_stringn (str, len);
  
    u32len = 0;
@@ -1639,7 +1650,7 @@ scm_from_utf8_stringn (const char *str, size_t len)
  
            nbytes = u8_mbtouc (&c, ustr + i, len - i);
  
-          if (nbytes < 0)
+          if (c == 0xfffd)
              /* Bad UTF-8.  */
              decoding_error (__func__, errno, str, len);
  
@@ -1711,6 +1722,26 @@ scm_from_utf32_stringn (const scm_t_wchar *str, size_t len)
    return result;
  }
  
+SCM
+scm_from_port_string (const char *str, SCM port)
+{
+  return scm_from_port_stringn (str, -1, port);
+}
+
+SCM
+scm_from_port_stringn (const char *str, size_t len, SCM port)
+{
+  scm_t_port *pt = SCM_PTAB_ENTRY (port);
+
+  if (pt->encoding_mode == SCM_PORT_ENCODING_MODE_LATIN1)
+    return scm_from_latin1_stringn (str, len);
+  else if (pt->encoding_mode == SCM_PORT_ENCODING_MODE_UTF8
+           && pt->ilseq_handler == SCM_FAILED_CONVERSION_ERROR)
+    return scm_from_utf8_stringn (str, len);
+  else
+    return scm_from_stringn (str, len, pt->encoding, pt->ilseq_handler);
+}
+
  /* Create a new scheme string from the C string STR.  The memory of
     STR may be used directly as storage for the new string.  */
  /* FIXME: GC-wise, the only way to use the memory area pointed to by STR
@@ -2096,6 +2127,26 @@ scm_to_utf32_stringn (SCM str, size_t *lenp)
  }
  #undef FUNC_NAME
  
+char *
+scm_to_port_string (SCM str, SCM port)
+{
+  return scm_to_port_stringn (str, NULL, port);
+}
+
+char *
+scm_to_port_stringn (SCM str, size_t *lenp, SCM port)
+{
+  scm_t_port *pt = SCM_PTAB_ENTRY (port);
+
+  if (pt->encoding_mode == SCM_PORT_ENCODING_MODE_LATIN1
+      && pt->ilseq_handler == SCM_FAILED_CONVERSION_ERROR)
+    return scm_to_latin1_stringn (str, lenp);
+  else if (pt->encoding_mode == SCM_PORT_ENCODING_MODE_UTF8)
+    return scm_to_utf8_stringn (str, lenp);
+  else
+    return scm_to_stringn (str, lenp, pt->encoding, pt->ilseq_handler);
+}
+
  /* Return a malloc(3)-allocated buffer containing the contents of STR encoded
     according to ENCODING.  If LENP is non-NULL, set it to the size in bytes of
     the returned buffer.  If the conversion to ENCODING fails, apply the strategy
@@ -2129,7 +2180,7 @@ scm_to_stringn (SCM str, size_t *lenp, const char *encoding,
                          "string contains #\\nul character: ~S",
                          scm_list_1 (str));
  
-  if (scm_i_is_narrow_string (str) && (encoding == NULL))
+  if (scm_i_is_narrow_string (str) && strcmp (encoding, "ISO-8859-1") == 0)
      {
        /* If using native Latin-1 encoding, just copy the string
           contents.  */