#!/usr/bin/perl
# unidata_to_charset.pl --- Compute SRFI-14 charsets from UnicodeData.txt
#
-# Copyright (C) 2009 Free Software Foundation, Inc.
-#
+# Copyright (C) 2009, 2010 Free Software Foundation, Inc.
+#
# This library is free software; you can redistribute it and/or
# modify it under the terms of the GNU Lesser General Public
# License as published by the Free Software Foundation; either
# version 3 of the License, or (at your option) any later version.
-#
+#
# This library is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# Lesser General Public License for more details.
-#
+#
# You should have received a copy of the GNU Lesser General Public
# License along with this library; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
return 0;
}
-# Full -- All characters.
-sub full {
+# Designated -- All characters except for the surrogates
+sub designated {
my($codepoint, $name, $category, $uppercase, $lowercase)= @_;
- return 1;
+ if ($category =~ (/Cs/)) {
+ return 0;
+ } else {
+ return 1;
+ }
}
$rend[$len] = $end;
$len++;
} elsif ($len == 0) {
- $rstart[0] = $start;
- $rend[0] = $end;
+ $rstart[0] = $start;
+ $rend[0] = $end;
+ $len++;
}
}
# Write a bit of a header
print $out "/* srfi-14.i.c -- standard SRFI-14 character set data */\n\n";
print $out "/* This file is #include'd by srfi-14.c. */\n\n";
-print $out "/* This file was generated from\n"
+print $out "/* This file was generated from\n";
print $out " http://unicode.org/Public/UNIDATA/UnicodeData.txt\n";
print $out " with the unidata_to_charset.pl script. */\n\n";
compute "blank";
compute "ascii";
compute "empty";
-compute "full";
+compute "designated";
close $in;
close $out;