Commit | Line | Data |
---|---|---|
8d138742 CE |
1 | # Copyright 2000-2002 Double Precision, Inc. |
2 | # Changed 2006 by Serhij Dubyk | |
3 | # | |
4 | # $Id: mkkoi8u.pl,v 1.1 2006/03/25 14:24:43 mrsam Exp $ | |
5 | # | |
6 | # Generate iso-8859* unicode tables | |
7 | ||
8 | open (U, "UnicodeData.txt") || die "$!\n"; | |
9 | ||
10 | while (<U>) | |
11 | { | |
12 | chomp; | |
13 | ||
14 | my @fields= split /;/; | |
15 | ||
16 | my ($code, $uc, $lc, $tc); | |
17 | ||
18 | $code="0x$fields[0]"; | |
19 | eval "\$code=$code;"; | |
20 | ||
21 | $uc=$fields[12]; | |
22 | if ($uc ne "") | |
23 | { | |
24 | eval "\$uc=0x$uc;"; | |
25 | $UC{$code}=$uc; | |
26 | } | |
27 | ||
28 | $lc=$fields[13]; | |
29 | if ($lc ne "") | |
30 | { | |
31 | eval "\$lc=0x$lc;"; | |
32 | $LC{$code}=$lc; | |
33 | } | |
34 | ||
35 | $tc=$fields[14]; | |
36 | if ($tc ne "") | |
37 | { | |
38 | eval "\$tc=0x$tc;"; | |
39 | $TC{$code}=$tc; | |
40 | } | |
41 | } | |
42 | ||
43 | close(U); | |
44 | ||
45 | my @fwd; | |
46 | ||
47 | my $rev; | |
48 | ||
49 | open (SET, "koi8-u.txt") || die "koi8-u.txt: $!\n"; | |
50 | while (<SET>) | |
51 | { | |
52 | chomp; | |
53 | s/\#.*//; | |
54 | ||
55 | my ($code, $unicode)=split /[ \t]+/; | |
56 | ||
57 | next unless $code ne ""; | |
58 | ||
59 | eval "\$code=$code;"; | |
60 | eval "\$unicode=$unicode;"; | |
61 | ||
62 | die if $code < 0 || $code > 255; | |
63 | ||
64 | $fwd[$code]=$unicode; | |
65 | $rev{$unicode}=$code; | |
66 | } | |
67 | close(SET); | |
68 | ||
69 | my $fwdname="koi8u_tab"; | |
70 | ||
71 | print ' | |
72 | /* | |
73 | ** Copyright 2000-2002 Double Precision, Inc. | |
74 | ** Changed 2006 by Serhij Dubyk | |
75 | ** | |
76 | ** $Id: mkkoi8u.pl,v 1.1 2006/03/25 14:24:43 mrsam Exp $ | |
77 | */ | |
78 | ||
79 | #include "unicode.h" | |
80 | '; | |
81 | ||
82 | ||
83 | print "static const unicode_char $fwdname [128]={\n"; | |
84 | ||
85 | for ($i=0; $i<128; $i++) | |
86 | { | |
87 | die "Unicode map not US-ASCII: $i\n" unless $fwd[$i] == $i; | |
88 | } | |
89 | ||
90 | for ($i=128; $i<256; $i++) | |
91 | { | |
92 | my $n=$fwd[$i]; | |
93 | ||
94 | $n=0 unless $n; | |
95 | ||
96 | print "$n"; | |
97 | print "," if $i < 255; | |
98 | print "\n" if ($i % 16) == 15; | |
99 | } | |
100 | ||
101 | print "};\n"; | |
102 | ||
103 | my $ucname="koi8u_uc"; | |
104 | ||
105 | print "static const char $ucname [256]={\n"; | |
106 | ||
107 | for ($i=0; $i<256; $i++) | |
108 | { | |
109 | my $unicode=$fwd[$i]; | |
110 | ||
111 | $unicode=$UC{$unicode} && $rev{$UC{$unicode}} ? $rev{$UC{$unicode}}:$i; | |
112 | ||
113 | printf("(char)0x%02x", $unicode); | |
114 | print "," if $i < 255; | |
115 | print "\n" if ($i % 8) == 7; | |
116 | } | |
117 | ||
118 | print "};\n"; | |
119 | ||
120 | my $lcname="koi8u_lc"; | |
121 | ||
122 | print "static const char $lcname [256]={\n"; | |
123 | ||
124 | for ($i=0; $i<256; $i++) | |
125 | { | |
126 | my $unicode=$fwd[$i]; | |
127 | ||
128 | $unicode=$LC{$unicode} && $rev{$LC{$unicode}} ? $rev{$LC{$unicode}}:$i; | |
129 | ||
130 | printf("(char)0x%02x", $unicode); | |
131 | print "," if $i < 255; | |
132 | print "\n" if ($i % 8) == 7; | |
133 | } | |
134 | ||
135 | print "};\n"; | |
136 | ||
137 | my $tcname="koi8u_tc"; | |
138 | ||
139 | print "static const char $tcname [256]={\n"; | |
140 | ||
141 | for ($i=0; $i<256; $i++) | |
142 | { | |
143 | my $unicode=$fwd[$i]; | |
144 | ||
145 | $unicode=$TC{$unicode} && $rev{$TC{$unicode}} ? $rev{$TC{$unicode}}:$i; | |
146 | ||
147 | printf("(char)0x%02x", $unicode); | |
148 | print "," if $i < 255; | |
149 | print "\n" if ($i % 8) == 7; | |
150 | } | |
151 | ||
152 | my $structname="unicode_KOI8U"; | |
153 | my $chsetname="KOI8-U"; | |
154 | ||
155 | print "}; | |
156 | ||
157 | ||
158 | static unicode_char *c2u(const struct unicode_info *u, const char *cp, int *ip) | |
159 | { | |
160 | return (unicode_iso8859_c2u(cp, ip, $fwdname)); | |
161 | } | |
162 | ||
163 | static char *u2c(const struct unicode_info *u, const unicode_char *cp, int *ip) | |
164 | { | |
165 | return (unicode_iso8859_u2c(cp, ip, $fwdname)); | |
166 | } | |
167 | ||
168 | static char *toupper_func(const struct unicode_info *u, const char *cp, int *ip) | |
169 | { | |
170 | return (unicode_iso8859_convert(cp, ip, $ucname)); | |
171 | } | |
172 | ||
173 | static char *tolower_func(const struct unicode_info *u, const char *cp, int *ip) | |
174 | { | |
175 | return (unicode_iso8859_convert(cp, ip, $lcname)); | |
176 | } | |
177 | ||
178 | static char *totitle_func(const struct unicode_info *u, const char *cp, int *ip) | |
179 | { | |
180 | return (unicode_iso8859_convert(cp, ip, $tcname)); | |
181 | } | |
182 | ||
183 | const struct unicode_info $structname = { | |
184 | \"$chsetname\", | |
185 | UNICODE_HEADER_BASE64 | UNICODE_BODY_BASE64, | |
186 | c2u, | |
187 | u2c, | |
188 | toupper_func, | |
189 | tolower_func, | |
190 | totitle_func}; | |
191 | "; |