Imported Upstream version 0.63.0
[hcoop/debian/courier-authlib.git] / unicode / ksx1001.pl
1 #! /usr/bin/perl
2
3 # USAGE: perl ksx1001.pl > ksx1001.h
4
5 # Requires CP949.TXT, found on:
6 # http://www.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/WINDOWS/CP949.TXT"
7
8 require "cjkcompat.pl";
9
10 $cp949 = 'CP949.TXT';
11 $perline = 8;
12
13 die "${cp949}: File not found.\n" if (!(-f $cp949));
14 open (SET, $cp949) or die "${cp949}: $!\n";
15
16 while (<SET>) {
17 chomp;
18
19 s/\#.*//;
20
21 next unless /^0x([0-9A-F]{2,4})\s+0x([0-9A-F]{4})\s*$/;
22
23 my ($code, $ucode) = (hex($1), hex($2));
24
25 if ($code > 0x8000) {
26 print STDERR "Warning: duplicated: $code->$k2u{$code},$ucode\n"
27 if defined $k2u{$code};
28 $k2u{$code} = $ucode;
29 }
30 }
31
32 close SET;
33
34 # make reversal map.
35
36 foreach (keys %k2u) {
37 print STDERR "Warning: duplicated: $u2k{$k2u{$_}},$_<-$k2u{$_}\n"
38 if defined $u2k{$k2u{$_}};
39 $c1 = $_ >> 8;
40 $c2 = $_ & 0x00FF;
41 if ($c1 >= 0xA1 && $c2 >= 0xA1) {
42 $u2k_ksx1001{$k2u{$_}} = $_;
43 } else {
44 $u2k_cp949{$k2u{$_}} = $_;
45 }
46 }
47
48 # Add maps for CJK compatibility ideographs of Unicode.
49 ####&add_cjkcompat(%compat_ksx1001);
50 &add_cjkcompat(%compat_big5);
51 &add_cjkcompat(%compat_ibm32);
52 &add_cjkcompat(%compat_jisx0213);
53 &add_cjkcompat(%compat_cns11643);
54
55 sub add_cjkcompat {
56 local(%compat) = @_;
57 foreach (keys %compat) {
58 if (defined $u2k_ksx1001{$compat{$_}}) {
59 $u2k_ksx1001{$_} = $u2k_ksx1001{$compat{$_}};
60 }
61 if (defined $u2k_cp949{$compat{$_}}) {
62 $u2k_cp949{$_} = $u2k_cp949{$compat{$_}};
63 }
64 }
65 }
66
67
68 print <<"EOF";
69 #ifndef _KSX1001_HDR_
70 #define _KSX1001_HDR_
71 /*
72 * KS X 1001 and CP949 (UHC) support
73 * by Hatuka*nezumi - IKEDA Soji <nezumi\@jca.apc.org>
74 * $Id: ksx1001.pl,v 1.1 2004/02/03 02:00:00 mrsam Exp $
75 *
76 */
77
78 #include "unicode.h"
79
80 #define KS_CHAR_SO 0x0E
81 #define KS_CHAR_SI 0x0F
82 #define KS_CHAR_ESC 0x1B
83
84 /* ISOREG #1/#3: US-ASCII (identical to ISO 646 IRV) */
85 #define KS_STATE_ASCII 0x0
86 /* ISOREG #149: KS X 1001:1992 Wansung */
87 #define KS_STATE_KSX1001 0x4
88 /* Unknown state */
89 #define KS_STATE_BINARY 0xF
90
91 EOF
92
93 print "/* map: CP949 to Unicode */\n";
94 for ($hb = 0x81; $hb <= 0xFE; $hb++) {
95 $items = 0;
96 for ($lb = 0x41; $lb <= 0xFE; $lb++) {
97 $items++ if $k2u{$hb*256 + $lb};
98 }
99 if ($items) {
100 $items = 0;
101 printf "static const unicode_char cp949_to_uni_tbl_%02x[] = {", $hb;
102 for ($lb = 0x41; $lb <= 0xFE; $lb++) {
103 $code = $hb*256 + $lb;
104 print ", " if ($items > 0);
105 print "\n " if ($items % $perline == 0);
106 $k2u{$code} = 0xFFFD unless $k2u{$code};
107 printf("0x%04X", $k2u{$code});
108 $items++;
109 }
110 print "\n};\n";
111 $k2uout{$hb} = 1;
112 }
113 }
114
115 print "const unicode_char * cp949_to_uni_tbls[] = {\n";
116 for ($hb=0x81; $hb <= 0xFE; $hb++) {
117 print (($hb > 0x81) ? ",\n " : " ");
118 if ($k2uout{$hb}) {
119 printf "cp949_to_uni_tbl_%02x", $hb;
120 } else {
121 print "NULL";
122 }
123 }
124
125 print "\n};\n";
126 print "\n\n";
127
128
129 %u2kout = ();
130 print "/* map: Unicode to KS X 1001 */\n";
131 for ($hb = 0x00; $hb <= 0xFF; $hb++) {
132 $items = 0;
133 for ($lb = 0x00; $lb <= 0xFF; $lb++) {
134 $items++ if $u2k_ksx1001{$hb*256 + $lb};
135 }
136 if ($items) {
137 $items = 0;
138 printf "static const unicode_char uni_to_ksx1001_tbl_%02x[] = {", $hb;
139 for ($lb = 0x00; $lb <= 0xFF; $lb++) {
140 $code = $hb*256 + $lb;
141 print ", " if ($items > 0);
142 print "\n " if ($items % $perline == 0);
143 $u2k_ksx1001{$code} = 0x003F unless $u2k_ksx1001{$code};
144 printf("0x%04X", $u2k_ksx1001{$code});
145 $items++;
146 }
147 print "\n};\n";
148 $u2kout{$hb} = 1;
149 }
150 }
151
152 print "const unicode_char * uni_to_ksx1001_tbls[] = {\n";
153 for ($hb=0x00; $hb <= 0xFF; $hb++) {
154 print (($hb > 0x00) ? ",\n " : " ");
155 if ($u2kout{$hb}) {
156 printf "uni_to_ksx1001_tbl_%02x", $hb;
157 } else {
158 print "NULL";
159 }
160 }
161
162 print "\n};\n";
163 print "\n\n";
164
165 %u2kout = ();
166 print "/* map: Unicode to CP949 extension */\n";
167 for ($hb = 0x00; $hb <= 0xFF; $hb++) {
168 $items = 0;
169 for ($lb = 0x00; $lb <= 0xFF; $lb++) {
170 $items++ if $u2k_cp949{$hb*256 + $lb};
171 }
172 if ($items) {
173 $items = 0;
174 printf "static const unicode_char uni_to_cp949_tbl_%02x[] = {", $hb;
175 for ($lb = 0x00; $lb <= 0xFF; $lb++) {
176 $code = $hb*256 + $lb;
177 print ", " if ($items > 0);
178 print "\n " if ($items % $perline == 0);
179 $u2k_cp949{$code} = 0x003F unless $u2k_cp949{$code};
180 printf("0x%04X", $u2k_cp949{$code});
181 $items++;
182 }
183 print "\n};\n";
184 $u2kout{$hb} = 1;
185 }
186 }
187
188 print "const unicode_char * uni_to_cp949_tbls[] = {\n";
189 for ($hb=0x00; $hb <= 0xFF; $hb++) {
190 print (($hb > 0x00) ? ",\n " : " ");
191 if ($u2kout{$hb}) {
192 printf "uni_to_cp949_tbl_%02x", $hb;
193 } else {
194 print "NULL";
195 }
196 }
197
198 print "\n};\n";
199 print "\n\n";
200
201
202 print "#endif /* _KSX1001_HDR_ */\n";