Commit | Line | Data |
---|---|---|
8d138742 CE |
1 | # Copyright 2000-2006 Double Precision, Inc. |
2 | # See COPYING for distribution information. | |
3 | # | |
4 | # $Id: mkiso8859.pl,v 1.7 2006/03/25 14:24:43 mrsam Exp $ | |
5 | # | |
6 | # Generate iso-8859* unicode tables | |
7 | ||
8 | my $set=shift; | |
9 | ||
10 | open (U, "UnicodeData.txt") || die "$!\n"; | |
11 | ||
12 | while (<U>) | |
13 | { | |
14 | chomp; | |
15 | ||
16 | my @fields= split /;/; | |
17 | ||
18 | my ($code, $uc, $lc, $tc); | |
19 | ||
20 | $code="0x$fields[0]"; | |
21 | eval "\$code=$code;"; | |
22 | ||
23 | $uc=$fields[12]; | |
24 | if ($uc ne "") | |
25 | { | |
26 | eval "\$uc=0x$uc;"; | |
27 | $UC{$code}=$uc; | |
28 | } | |
29 | ||
30 | $lc=$fields[13]; | |
31 | if ($lc ne "") | |
32 | { | |
33 | eval "\$lc=0x$lc;"; | |
34 | $LC{$code}=$lc; | |
35 | } | |
36 | ||
37 | $tc=$fields[14]; | |
38 | if ($tc ne "") | |
39 | { | |
40 | eval "\$tc=0x$tc;"; | |
41 | $TC{$code}=$tc; | |
42 | } | |
43 | } | |
44 | ||
45 | close(U); | |
46 | ||
47 | my @fwd; | |
48 | ||
49 | my $rev; | |
50 | ||
51 | open (SET, $set) || die "$set: $!\n"; | |
52 | while (<SET>) | |
53 | { | |
54 | chomp; | |
55 | s/\#.*//; | |
56 | ||
57 | my ($code, $unicode)=split /[ \t]+/; | |
58 | ||
59 | next unless $code ne ""; | |
60 | ||
61 | eval "\$code=$code;"; | |
62 | eval "\$unicode=$unicode;"; | |
63 | ||
64 | die if $code < 0 || $code > 255; | |
65 | ||
66 | $fwd[$code]=$unicode; | |
67 | $rev{$unicode}=$code; | |
68 | } | |
69 | close(SET); | |
70 | ||
71 | my $fwdname=shift; | |
72 | ||
73 | print ' | |
74 | /* | |
75 | ** Copyright 2000-2006 Double Precision, Inc. | |
76 | ** See COPYING for distribution information. | |
77 | ** | |
78 | ** $Id: mkiso8859.pl,v 1.7 2006/03/25 14:24:43 mrsam Exp $ | |
79 | */ | |
80 | ||
81 | #include "unicode.h" | |
82 | '; | |
83 | ||
84 | ||
85 | print "static const unicode_char $fwdname [128]={\n"; | |
86 | ||
87 | for ($i=0; $i<128; $i++) | |
88 | { | |
89 | die "Unicode map not US-ASCII: $i\n" unless $fwd[$i] == $i; | |
90 | } | |
91 | ||
92 | for ($i=128; $i<256; $i++) | |
93 | { | |
94 | my $n=$fwd[$i]; | |
95 | ||
96 | $n=0 unless $n; | |
97 | ||
98 | print "$n"; | |
99 | print "," if $i < 255; | |
100 | print "\n" if ($i % 16) == 15; | |
101 | } | |
102 | ||
103 | print "};\n"; | |
104 | ||
105 | my $ucname=shift; | |
106 | ||
107 | print "static const char $ucname [256]={\n"; | |
108 | ||
109 | for ($i=0; $i<256; $i++) | |
110 | { | |
111 | my $unicode=$fwd[$i]; | |
112 | ||
113 | $unicode=$UC{$unicode} && $rev{$UC{$unicode}} ? $rev{$UC{$unicode}}:$i; | |
114 | ||
115 | printf("(char)0x%02x", $unicode); | |
116 | print "," if $i < 255; | |
117 | print "\n" if ($i % 8) == 7; | |
118 | } | |
119 | ||
120 | print "};\n"; | |
121 | ||
122 | my $lcname=shift; | |
123 | ||
124 | print "static const char $lcname [256]={\n"; | |
125 | ||
126 | for ($i=0; $i<256; $i++) | |
127 | { | |
128 | my $unicode=$fwd[$i]; | |
129 | ||
130 | $unicode=$LC{$unicode} && $rev{$LC{$unicode}} ? $rev{$LC{$unicode}}:$i; | |
131 | ||
132 | printf("(char)0x%02x", $unicode); | |
133 | print "," if $i < 255; | |
134 | print "\n" if ($i % 8) == 7; | |
135 | } | |
136 | ||
137 | print "};\n"; | |
138 | ||
139 | my $tcname=shift; | |
140 | ||
141 | print "static const char $tcname [256]={\n"; | |
142 | ||
143 | for ($i=0; $i<256; $i++) | |
144 | { | |
145 | my $unicode=$fwd[$i]; | |
146 | ||
147 | $unicode=$TC{$unicode} && $rev{$TC{$unicode}} ? $rev{$TC{$unicode}}:$i; | |
148 | ||
149 | printf("(char)0x%02x", $unicode); | |
150 | print "," if $i < 255; | |
151 | print "\n" if ($i % 8) == 7; | |
152 | } | |
153 | ||
154 | my $structname=shift; | |
155 | my $chsetname=shift; | |
156 | ||
157 | print "}; | |
158 | ||
159 | ||
160 | static unicode_char *c2u(const struct unicode_info *u, const char *cp, int *ip) | |
161 | { | |
162 | return (unicode_iso8859_c2u(cp, ip, $fwdname)); | |
163 | } | |
164 | ||
165 | static char *u2c(const struct unicode_info *u, const unicode_char *cp, int *ip) | |
166 | { | |
167 | return (" | |
168 | . | |
169 | ($set eq "windows-874.txt" ? "unicode_windows874_u2c":"unicode_iso8859_u2c") | |
170 | . "(cp, ip, $fwdname)); | |
171 | } | |
172 | ||
173 | static char *toupper_func(const struct unicode_info *u, const char *cp, int *ip) | |
174 | { | |
175 | return (unicode_iso8859_convert(cp, ip, $ucname)); | |
176 | } | |
177 | ||
178 | static char *tolower_func(const struct unicode_info *u, const char *cp, int *ip) | |
179 | { | |
180 | return (unicode_iso8859_convert(cp, ip, $lcname)); | |
181 | } | |
182 | ||
183 | static char *totitle_func(const struct unicode_info *u, const char *cp, int *ip) | |
184 | { | |
185 | return (unicode_iso8859_convert(cp, ip, $tcname)); | |
186 | } | |
187 | ||
188 | const struct unicode_info $structname = { | |
189 | \"$chsetname\"," . ($set =~ /^8859*/ || $set =~ /^windows-12/ ? " | |
190 | UNICODE_USASCII | | |
191 | UNICODE_HEADER_QUOPRI | UNICODE_BODY_QUOPRI," : | |
192 | $set eq "windows-874.txt" ? " | |
193 | UNICODE_USASCII | | |
194 | UNICODE_HEADER_BASE64 | UNICODE_BODY_BASE64," :" | |
195 | UNICODE_USASCII,") . " | |
196 | c2u, | |
197 | u2c, | |
198 | toupper_func, | |
199 | tolower_func, | |
200 | totitle_func}; | |
201 | "; |