From: Ludovic Courtès <ludo@gnu.org>
Date: Wed, 15 Sep 2010 21:32:28 +0000 (+0200)
Subject: Fix write-beyond-end-of-string error in the conversion to R6RS string escapes.
X-Git-Url: https://git.hcoop.net/bpt/guile.git/commitdiff_plain/f1ee6d54d219056c62d87a8e4a6b199162c946e8

Fix write-beyond-end-of-string error in the conversion to R6RS string escapes.

Reported by Mike Gran <spk121@yahoo.com>.

* libguile/strings.c (scm_i_unistring_escapes_to_guile_escapes,
  scm_i_unistring_escapes_to_r6rs_escapes): Augment comments.
  (scm_to_stringn): When `handler ==
  SCM_FAILED_CONVERSION_ESCAPE_SEQUENCE && SCM_R6RS_ESCAPES_P', realloc
  BUF so that it's large enough for the worst case.

* libguile/print.c (display_character): When `result != NULL && strategy
  == SCM_FAILED_CONVERSION_ESCAPE_SEQUENCE && SCM_R6RS_ESCAPES_P', make
  LOCALE_ENCODED large enough to hold an R6RS escape.
---

diff --git a/libguile/print.c b/libguile/print.c
index 2ffe70ec0..bdc6c9f20 100644
--- a/libguile/print.c
+++ b/libguile/print.c
@@ -768,7 +768,7 @@ display_character (scm_t_wchar ch, SCM port,
   else
     {
       size_t len;
-      char locale_encoded[sizeof (ch)], *result;
+      char locale_encoded[8 * sizeof (ch)], *result;
 
       len = sizeof (locale_encoded);
       result = u32_conv_to_encoding (encoding, strategy,
@@ -782,7 +782,16 @@ display_character (scm_t_wchar ch, SCM port,
 	    {
 	      /* Apply the same escaping syntax as in `write_character'.  */
 	      if (SCM_R6RS_ESCAPES_P)
-		scm_i_unistring_escapes_to_r6rs_escapes (result, &len);
+		{
+		  /* LOCALE_ENCODED is large enough to store an R6RS
+		     `\xNNNN;' escape sequence.  However, libunistring
+		     up to 0.9.3 (included) always returns a
+		     heap-allocated RESULT.  */
+		  if (SCM_UNLIKELY (result != locale_encoded))
+		    result = scm_realloc (result, len * 7);
+
+		  scm_i_unistring_escapes_to_r6rs_escapes (result, &len);
+		}
 	      else
 		scm_i_unistring_escapes_to_guile_escapes (result, &len);
 	    }
diff --git a/libguile/strings.c b/libguile/strings.c
index dbff0660a..e64c37b84 100644
--- a/libguile/strings.c
+++ b/libguile/strings.c
@@ -1575,8 +1575,9 @@ scm_take_locale_string (char *str)
   return scm_take_locale_stringn (str, -1);
 }
 
-/* Change libunistring escapes (\uXXXX and \UXXXXXXXX) to \xXX \uXXXX
-   and \UXXXXXX.  */
+/* Change libunistring escapes (`\uXXXX' and `\UXXXXXXXX') in BUF, a
+   *LENP-byte locale-encoded string, to `\xXX', `\uXXXX', or `\UXXXXXX'.
+   Set *LENP to the size of the resulting string.  */
 void
 scm_i_unistring_escapes_to_guile_escapes (char *buf, size_t *lenp)
 {
@@ -1629,7 +1630,11 @@ scm_i_unistring_escapes_to_guile_escapes (char *buf, size_t *lenp)
   *lenp = j;
 }
 
-/* Change libunistring escapes (\uXXXX and \UXXXXXXXX) to \xXXXX; */
+/* Change libunistring escapes (`\uXXXX' and `\UXXXXXXXX') in BUF, a
+   *LENP-byte locale-encoded string, to `\xXXXX;'.  Set *LEN to the size
+   of the resulting string.  BUF must be large enough to handle the
+   worst case when `\uXXXX' escapes (6 characters) are replaced by
+   `\xXXXX;' (7 characters).  */
 void
 scm_i_unistring_escapes_to_r6rs_escapes (char *buf, size_t *lenp)
 {
@@ -1815,7 +1820,14 @@ scm_to_stringn (SCM str, size_t *lenp, const char *encoding,
   if (handler == SCM_FAILED_CONVERSION_ESCAPE_SEQUENCE)
     {
       if (SCM_R6RS_ESCAPES_P)
-        scm_i_unistring_escapes_to_r6rs_escapes (buf, &len);
+	{
+	  /* The worst case is if the input string contains all 4-digit
+	     hex escapes.  "\uXXXX" (six characters) becomes "\xXXXX;"
+	     (seven characters).  Make BUF large enough to hold
+	     that.  */
+	  buf = scm_realloc (buf, (len * 7) / 6 + 1);
+	  scm_i_unistring_escapes_to_r6rs_escapes (buf, &len);
+	}
       else
         scm_i_unistring_escapes_to_guile_escapes (buf, &len);