From 24d23822ee9d6a515aed8baaeff9d363fd7ec813 Mon Sep 17 00:00:00 2001
From: Michael Gran <spk121@yahoo.com>
Date: Fri, 28 Aug 2009 23:47:42 -0700
Subject: [PATCH] Surrogate characters shouldn't be in charsets

* libguile/srfi-14.c (charsets_complement): use surrogate #defines instead
  of hardcoded numbers

* libguile/srfi-14.i.c (cs_full_ranges): remove surrogates from full
  charset

* libguile/unidata_to_charset.pl (full): test for surrogates
---
 libguile/srfi-14.c             |  5 +++--
 libguile/srfi-14.i.c           |  5 +++--
 libguile/unidata_to_charset.pl | 10 +++++++---
 3 files changed, 13 insertions(+), 7 deletions(-)

diff --git a/libguile/srfi-14.c b/libguile/srfi-14.c
index 7ab65ac97..50229ef0f 100644
--- a/libguile/srfi-14.c
+++ b/libguile/srfi-14.c
@@ -29,6 +29,7 @@
 #include "libguile.h"
 #include "libguile/srfi-14.h"
 #include "libguile/strings.h"
+#include "libguile/chars.h"
 
 /* Include the pre-computed standard charset data.  */
 #include "libguile/srfi-14.i.c"
@@ -386,8 +387,8 @@ charsets_complement (scm_t_char_set *p, scm_t_char_set *q)
       p->ranges = scm_gc_malloc (sizeof (scm_t_char_range) * 2,
                                  "character-set");
       p->ranges[0].lo = 0;
-      p->ranges[0].hi = 0xd7ff;
-      p->ranges[1].lo = 0xe000;
+      p->ranges[0].hi = SCM_CODEPOINT_SURROGATE_START - 1;
+      p->ranges[1].lo = SCM_CODEPOINT_SURROGATE_END + 1;
       p->ranges[1].hi = SCM_CODEPOINT_MAX;
       return;
     }
diff --git a/libguile/srfi-14.i.c b/libguile/srfi-14.i.c
index 5ef21f333..d92b4d73e 100644
--- a/libguile/srfi-14.i.c
+++ b/libguile/srfi-14.i.c
@@ -2,7 +2,8 @@
 
 /* This file is #include'd by srfi-14.c.  */
 
-/* This file was generated from http://unicode.org/Public/UNIDATA/UnicodeData.txt
+/* This file was generated from
+   http://unicode.org/Public/UNIDATA/UnicodeData.txt
    with the unidata_to_charset.pl script.  */
 
 scm_t_char_range cs_lower_case_ranges[] = {
@@ -6925,7 +6926,7 @@ scm_t_char_range cs_full_ranges[] = {
   ,
   {0xac00, 0xd7a3}
   ,
-  {0xd800, 0xfa2d}
+  {0xe000, 0xfa2d}
   ,
   {0xfa30, 0xfa6a}
   ,
diff --git a/libguile/unidata_to_charset.pl b/libguile/unidata_to_charset.pl
index 6871e67ee..61c8d100e 100755
--- a/libguile/unidata_to_charset.pl
+++ b/libguile/unidata_to_charset.pl
@@ -254,10 +254,14 @@ sub empty {
     return 0;
 }
 
-# Full -- All characters.  
+# Full -- All characters except for the surrogates
 sub full {
     my($codepoint, $name, $category, $uppercase, $lowercase)= @_;
-    return 1;
+    if ($category =~ (/Cs/)) {
+        return 0;
+    } else {
+        return 1;
+    }
 }
 
 
@@ -362,7 +366,7 @@ sub compute {
 # Write a bit of a header
 print $out "/* srfi-14.i.c -- standard SRFI-14 character set data */\n\n";
 print $out "/* This file is #include'd by srfi-14.c.  */\n\n";
-print $out "/* This file was generated from\n"
+print $out "/* This file was generated from\n";
 print $out "   http://unicode.org/Public/UNIDATA/UnicodeData.txt\n";
 print $out "   with the unidata_to_charset.pl script.  */\n\n";
 
-- 
2.20.1