Commit | Line | Data |
---|---|---|
8d138742 CE |
1 | #!/usr/bin/perl |
2 | # usage: | |
3 | # % iso2022jp.pl > iso2022jp.h | |
4 | ||
5 | require "cjkcompat.pl"; | |
6 | ||
7 | ||
8 | $perline = 8; | |
9 | $unihan = "Unihan-3.2.0.txt.gz"; | |
10 | ||
11 | die "${unihan}: File not found.\n" if (!(-f $unihan)); | |
12 | open (SET, "gunzip -cd < ${unihan} |") or die "${unihan}: $!\n"; | |
13 | ||
14 | $ln=0; $ls = ""; | |
15 | while (<SET>) | |
16 | { | |
17 | $line++; | |
18 | $ls = $_; | |
19 | chomp; | |
20 | s/\#.*//; | |
21 | next unless /^U\+(.....?)\s+kIRG_JSource\s+([01])\-(....)/; | |
22 | ||
23 | my $jcode = hex $3; | |
24 | my $jlevel = $2+0; | |
25 | my $ucode = hex $1; | |
26 | ||
27 | my $jcodeh = int ($jcode / 256); | |
28 | my $jcodel = $jcode % 256; | |
29 | ||
30 | if ($jcode < 0 || $jcode > 65535 || $ucode == 0 || | |
31 | $jcodeh < 0x21 || $jcodeh > 0x7e || $jcodel < 0x21 || $jcodel > 0x7e) { | |
32 | print "$0: Out of JIS/Unicode code range\n"; | |
33 | print sprintf("line %d: JIS 0x%04X, U+%04X\n", $line, $jcode, $ucode); | |
34 | print "> $ls"; | |
35 | die; | |
36 | } | |
37 | ||
38 | if ($jlevel == 0) { | |
39 | $j2u{$jcode} = $ucode; | |
40 | } elsif ($jlevel == 1) { | |
41 | $jisx0212_j2u{$jcode} = $ucode; | |
42 | } | |
43 | } | |
44 | ||
45 | close(SET); | |
46 | ||
47 | ## | |
48 | ## Map for JIS X 0208:1983/1990/1997 | |
49 | ## | |
50 | ||
51 | # NOTE: Some row-cells of JIS X 0208:1997 and JIS X 0212 are duplicated | |
52 | # with US-ASCII (identical to ISO 646 IRV) and GL of JIS X 0201 (ISO 646 | |
53 | # Japanese version). | |
54 | # They are mapped to Fullwidth Form (U+FFxx) so that round-trip compatibility | |
55 | # will be kept. | |
56 | ||
57 | # DIGIT and LETTER of BASIC LATIN | |
58 | foreach $jcode ((0x2330..0x2339,0x2341..0x235a,0x2361..0x237a)) { | |
59 | $ucode = $jcode + 0xdbe0; | |
60 | $j2u{$jcode} = $ucode; | |
61 | } | |
62 | # HIRAGANA | |
63 | # Note: 0x2474-0x2476 are assigned by JIS X 0213:2000, | |
64 | # but are found in some vendor codepages for JIS X 0208. | |
65 | for ($jcode = 0x2421; $jcode <= 0x2476; $jcode++) { | |
66 | $ucode = $jcode + 0xc20; | |
67 | $j2u{$jcode} = $ucode; | |
68 | } | |
69 | # KATAKANA | |
70 | for ($jcode = 0x2521; $jcode <= 0x2576; $jcode++) { | |
71 | $ucode = $jcode + 0xb80; | |
72 | $j2u{$jcode} = $ucode; | |
73 | } | |
74 | ||
75 | # Unicode-3.2 does not make mention of JIS X 0208 marks | |
76 | # (there is no clear definitions), so manually add a | |
77 | # converting map... | |
78 | # cf. JIS X 0208:1997 | |
79 | ||
80 | $j2u{0x2121} = 0x3000; | |
81 | $j2u{0x2122} = 0x3001; | |
82 | $j2u{0x2123} = 0x3002; | |
83 | $j2u{0x2124} = 0xff0c; | |
84 | $j2u{0x2125} = 0xff0e; | |
85 | $j2u{0x2126} = 0x30fb; | |
86 | $j2u{0x2127} = 0xff1a; | |
87 | $j2u{0x2128} = 0xff1b; | |
88 | $j2u{0x2129} = 0xff1f; | |
89 | $j2u{0x212a} = 0xff01; | |
90 | $j2u{0x212b} = 0x309b; | |
91 | $j2u{0x212c} = 0x309c; | |
92 | $j2u{0x212d} = 0x00b4; | |
93 | $j2u{0x212e} = 0xff40; | |
94 | $j2u{0x212f} = 0x00a8; | |
95 | $j2u{0x2130} = 0xff3e; | |
96 | $j2u{0x2131} = 0xffe3; # OVERLINE vs. JIS X 0201 GL -> FULLWIDTH MACRON | |
97 | $j2u{0x2132} = 0xff3f; | |
98 | $j2u{0x2133} = 0x30fd; | |
99 | $j2u{0x2134} = 0x30fe; | |
100 | $j2u{0x2135} = 0x309d; | |
101 | $j2u{0x2136} = 0x309e; | |
102 | $j2u{0x2137} = 0x3003; | |
103 | $j2u{0x2138} = 0x4edd; | |
104 | $j2u{0x2139} = 0x3005; | |
105 | $j2u{0x213a} = 0x3006; | |
106 | $j2u{0x213b} = 0x3007; | |
107 | $j2u{0x213c} = 0x30fc; | |
108 | $j2u{0x213d} = 0x2014; # HOLIZONTAL BAR (JIS X 0208:1990) -> EM DASH (1997) | |
109 | $j2u{0x213e} = 0x2010; | |
110 | $j2u{0x213f} = 0xff0f; | |
111 | $j2u{0x2140} = 0xff3c; # REVERSE SOLIDUS vs. ASCII -> FULLWIDTH REVERSE SOLIDUS | |
112 | $j2u{0x2141} = 0x301c; | |
113 | $j2u{0x2142} = 0x2016; | |
114 | $j2u{0x2143} = 0xff5c; | |
115 | $j2u{0x2144} = 0x2026; | |
116 | $j2u{0x2145} = 0x2025; | |
117 | $j2u{0x2146} = 0x2018; | |
118 | $j2u{0x2147} = 0x2019; | |
119 | $j2u{0x2148} = 0x201c; | |
120 | $j2u{0x2149} = 0x201d; | |
121 | $j2u{0x214a} = 0xff08; | |
122 | $j2u{0x214b} = 0xff09; | |
123 | $j2u{0x214c} = 0x3014; | |
124 | $j2u{0x214d} = 0x3015; | |
125 | $j2u{0x214e} = 0xff3b; | |
126 | $j2u{0x214f} = 0xff3d; | |
127 | $j2u{0x2150} = 0xff5b; | |
128 | $j2u{0x2151} = 0xff5d; | |
129 | $j2u{0x2152} = 0x3008; | |
130 | $j2u{0x2153} = 0x3009; | |
131 | $j2u{0x2154} = 0x300a; | |
132 | $j2u{0x2155} = 0x300b; | |
133 | $j2u{0x2156} = 0x300c; | |
134 | $j2u{0x2157} = 0x300d; | |
135 | $j2u{0x2158} = 0x300e; | |
136 | $j2u{0x2159} = 0x300f; | |
137 | $j2u{0x215a} = 0x3010; | |
138 | $j2u{0x215b} = 0x3011; | |
139 | $j2u{0x215c} = 0xff0b; | |
140 | $j2u{0x215d} = 0x2212; | |
141 | $j2u{0x215e} = 0x00b1; | |
142 | $j2u{0x215f} = 0x00d7; | |
143 | $j2u{0x2160} = 0x00f7; | |
144 | $j2u{0x2161} = 0xff1d; | |
145 | $j2u{0x2162} = 0x2260; | |
146 | $j2u{0x2163} = 0xff1c; | |
147 | $j2u{0x2164} = 0xff1e; | |
148 | $j2u{0x2165} = 0x2266; | |
149 | $j2u{0x2166} = 0x2267; | |
150 | $j2u{0x2167} = 0x221e; | |
151 | $j2u{0x2168} = 0x2234; | |
152 | $j2u{0x2169} = 0x2642; | |
153 | $j2u{0x216a} = 0x2640; | |
154 | $j2u{0x216b} = 0x00b0; | |
155 | $j2u{0x216c} = 0x2032; | |
156 | $j2u{0x216d} = 0x2033; | |
157 | $j2u{0x216e} = 0x2103; | |
158 | $j2u{0x216f} = 0xffe5; # YEN SIGN vs. JIS X 0201 GL -> FULLWIDTH YEN SIGN | |
159 | $j2u{0x2170} = 0xff04; | |
160 | $j2u{0x2171} = 0x00a2; | |
161 | $j2u{0x2172} = 0x00a3; | |
162 | $j2u{0x2173} = 0xff05; | |
163 | $j2u{0x2174} = 0xff03; | |
164 | $j2u{0x2175} = 0xff06; | |
165 | $j2u{0x2176} = 0xff0a; | |
166 | $j2u{0x2177} = 0xff20; | |
167 | $j2u{0x2178} = 0x00a7; | |
168 | $j2u{0x2179} = 0x2606; | |
169 | $j2u{0x217a} = 0x2605; | |
170 | $j2u{0x217b} = 0x25cb; | |
171 | $j2u{0x217c} = 0x25cf; | |
172 | $j2u{0x217d} = 0x25ce; | |
173 | $j2u{0x217e} = 0x25c7; | |
174 | $j2u{0x2221} = 0x25c6; | |
175 | $j2u{0x2222} = 0x25a1; | |
176 | $j2u{0x2223} = 0x25a0; | |
177 | $j2u{0x2224} = 0x25b3; | |
178 | $j2u{0x2225} = 0x25b2; | |
179 | $j2u{0x2226} = 0x25bd; | |
180 | $j2u{0x2227} = 0x25bc; | |
181 | $j2u{0x2228} = 0x203b; | |
182 | $j2u{0x2229} = 0x3012; | |
183 | $j2u{0x222a} = 0x2192; | |
184 | $j2u{0x222b} = 0x2190; | |
185 | $j2u{0x222c} = 0x2191; | |
186 | $j2u{0x222d} = 0x2193; | |
187 | $j2u{0x222e} = 0x3013; | |
188 | $j2u{0x223a} = 0x2208; | |
189 | $j2u{0x223b} = 0x220b; | |
190 | $j2u{0x223c} = 0x2286; | |
191 | $j2u{0x223d} = 0x2287; | |
192 | $j2u{0x223e} = 0x2282; | |
193 | $j2u{0x223f} = 0x2283; | |
194 | $j2u{0x2240} = 0x222a; | |
195 | $j2u{0x2241} = 0x2229; | |
196 | $j2u{0x224a} = 0x2227; | |
197 | $j2u{0x224b} = 0x2228; | |
198 | $j2u{0x224c} = 0x00ac; | |
199 | $j2u{0x224d} = 0x21d2; | |
200 | $j2u{0x224e} = 0x21d4; | |
201 | $j2u{0x224f} = 0x2200; | |
202 | $j2u{0x2250} = 0x2203; | |
203 | $j2u{0x225c} = 0x2220; | |
204 | $j2u{0x225d} = 0x22a5; | |
205 | $j2u{0x225e} = 0x2312; | |
206 | $j2u{0x225f} = 0x2202; | |
207 | $j2u{0x2260} = 0x2207; | |
208 | $j2u{0x2261} = 0x2261; | |
209 | $j2u{0x2262} = 0x2252; | |
210 | $j2u{0x2263} = 0x226a; | |
211 | $j2u{0x2264} = 0x226b; | |
212 | $j2u{0x2265} = 0x221a; | |
213 | $j2u{0x2266} = 0x223d; | |
214 | $j2u{0x2267} = 0x221d; | |
215 | $j2u{0x2268} = 0x2235; | |
216 | $j2u{0x2269} = 0x222b; | |
217 | $j2u{0x226a} = 0x222c; | |
218 | $j2u{0x2272} = 0x212b; | |
219 | $j2u{0x2273} = 0x2030; | |
220 | $j2u{0x2274} = 0x266f; | |
221 | $j2u{0x2275} = 0x266d; | |
222 | $j2u{0x2276} = 0x266a; | |
223 | $j2u{0x2277} = 0x2020; | |
224 | $j2u{0x2278} = 0x2021; | |
225 | $j2u{0x2279} = 0x00b6; | |
226 | $j2u{0x227e} = 0x25ef; | |
227 | $j2u{0x2621} = 0x0391; | |
228 | $j2u{0x2622} = 0x0392; | |
229 | $j2u{0x2623} = 0x0393; | |
230 | $j2u{0x2624} = 0x0394; | |
231 | $j2u{0x2625} = 0x0395; | |
232 | $j2u{0x2626} = 0x0396; | |
233 | $j2u{0x2627} = 0x0397; | |
234 | $j2u{0x2628} = 0x0398; | |
235 | $j2u{0x2629} = 0x0399; | |
236 | $j2u{0x262a} = 0x039a; | |
237 | $j2u{0x262b} = 0x039b; | |
238 | $j2u{0x262c} = 0x039c; | |
239 | $j2u{0x262d} = 0x039d; | |
240 | $j2u{0x262e} = 0x039e; | |
241 | $j2u{0x262f} = 0x039f; | |
242 | $j2u{0x2630} = 0x03a0; | |
243 | $j2u{0x2631} = 0x03a1; | |
244 | $j2u{0x2632} = 0x03a3; | |
245 | $j2u{0x2633} = 0x03a4; | |
246 | $j2u{0x2634} = 0x03a5; | |
247 | $j2u{0x2635} = 0x03a6; | |
248 | $j2u{0x2636} = 0x03a7; | |
249 | $j2u{0x2637} = 0x03a8; | |
250 | $j2u{0x2638} = 0x03a9; | |
251 | $j2u{0x2641} = 0x03b1; | |
252 | $j2u{0x2642} = 0x03b2; | |
253 | $j2u{0x2643} = 0x03b3; | |
254 | $j2u{0x2644} = 0x03b4; | |
255 | $j2u{0x2645} = 0x03b5; | |
256 | $j2u{0x2646} = 0x03b6; | |
257 | $j2u{0x2647} = 0x03b7; | |
258 | $j2u{0x2648} = 0x03b8; | |
259 | $j2u{0x2649} = 0x03b9; | |
260 | $j2u{0x264a} = 0x03ba; | |
261 | $j2u{0x264b} = 0x03bb; | |
262 | $j2u{0x264c} = 0x03bc; | |
263 | $j2u{0x264d} = 0x03bd; | |
264 | $j2u{0x264e} = 0x03be; | |
265 | $j2u{0x264f} = 0x03bf; | |
266 | $j2u{0x2650} = 0x03c0; | |
267 | $j2u{0x2651} = 0x03c1; | |
268 | $j2u{0x2652} = 0x03c3; | |
269 | $j2u{0x2653} = 0x03c4; | |
270 | $j2u{0x2654} = 0x03c5; | |
271 | $j2u{0x2655} = 0x03c6; | |
272 | $j2u{0x2656} = 0x03c7; | |
273 | $j2u{0x2657} = 0x03c8; | |
274 | $j2u{0x2658} = 0x03c9; | |
275 | $j2u{0x2721} = 0x0410; | |
276 | $j2u{0x2722} = 0x0411; | |
277 | $j2u{0x2723} = 0x0412; | |
278 | $j2u{0x2724} = 0x0413; | |
279 | $j2u{0x2725} = 0x0414; | |
280 | $j2u{0x2726} = 0x0415; | |
281 | $j2u{0x2727} = 0x0401; | |
282 | $j2u{0x2728} = 0x0416; | |
283 | $j2u{0x2729} = 0x0417; | |
284 | $j2u{0x272a} = 0x0418; | |
285 | $j2u{0x272b} = 0x0419; | |
286 | $j2u{0x272c} = 0x041a; | |
287 | $j2u{0x272d} = 0x041b; | |
288 | $j2u{0x272e} = 0x041c; | |
289 | $j2u{0x272f} = 0x041d; | |
290 | $j2u{0x2730} = 0x041e; | |
291 | $j2u{0x2731} = 0x041f; | |
292 | $j2u{0x2732} = 0x0420; | |
293 | $j2u{0x2733} = 0x0421; | |
294 | $j2u{0x2734} = 0x0422; | |
295 | $j2u{0x2735} = 0x0423; | |
296 | $j2u{0x2736} = 0x0424; | |
297 | $j2u{0x2737} = 0x0425; | |
298 | $j2u{0x2738} = 0x0426; | |
299 | $j2u{0x2739} = 0x0427; | |
300 | $j2u{0x273a} = 0x0428; | |
301 | $j2u{0x273b} = 0x0429; | |
302 | $j2u{0x273c} = 0x042a; | |
303 | $j2u{0x273d} = 0x042b; | |
304 | $j2u{0x273e} = 0x042c; | |
305 | $j2u{0x273f} = 0x042d; | |
306 | $j2u{0x2740} = 0x042e; | |
307 | $j2u{0x2741} = 0x042f; | |
308 | $j2u{0x2751} = 0x0430; | |
309 | $j2u{0x2752} = 0x0431; | |
310 | $j2u{0x2753} = 0x0432; | |
311 | $j2u{0x2754} = 0x0433; | |
312 | $j2u{0x2755} = 0x0434; | |
313 | $j2u{0x2756} = 0x0435; | |
314 | $j2u{0x2757} = 0x0451; | |
315 | $j2u{0x2758} = 0x0436; | |
316 | $j2u{0x2759} = 0x0437; | |
317 | $j2u{0x275a} = 0x0438; | |
318 | $j2u{0x275b} = 0x0439; | |
319 | $j2u{0x275c} = 0x043a; | |
320 | $j2u{0x275d} = 0x043b; | |
321 | $j2u{0x275e} = 0x043c; | |
322 | $j2u{0x275f} = 0x043d; | |
323 | $j2u{0x2760} = 0x043e; | |
324 | $j2u{0x2761} = 0x043f; | |
325 | $j2u{0x2762} = 0x0440; | |
326 | $j2u{0x2763} = 0x0441; | |
327 | $j2u{0x2764} = 0x0442; | |
328 | $j2u{0x2765} = 0x0443; | |
329 | $j2u{0x2766} = 0x0444; | |
330 | $j2u{0x2767} = 0x0445; | |
331 | $j2u{0x2768} = 0x0446; | |
332 | $j2u{0x2769} = 0x0447; | |
333 | $j2u{0x276a} = 0x0448; | |
334 | $j2u{0x276b} = 0x0449; | |
335 | $j2u{0x276c} = 0x044a; | |
336 | $j2u{0x276d} = 0x044b; | |
337 | $j2u{0x276e} = 0x044c; | |
338 | $j2u{0x276f} = 0x044d; | |
339 | $j2u{0x2770} = 0x044e; | |
340 | $j2u{0x2771} = 0x044f; | |
341 | $j2u{0x2821} = 0x2500; | |
342 | $j2u{0x2822} = 0x2502; | |
343 | $j2u{0x2823} = 0x250c; | |
344 | $j2u{0x2824} = 0x2510; | |
345 | $j2u{0x2825} = 0x2518; | |
346 | $j2u{0x2826} = 0x2514; | |
347 | $j2u{0x2827} = 0x251c; | |
348 | $j2u{0x2828} = 0x252c; | |
349 | $j2u{0x2829} = 0x2524; | |
350 | $j2u{0x282a} = 0x2534; | |
351 | $j2u{0x282b} = 0x253c; | |
352 | $j2u{0x282c} = 0x2501; | |
353 | $j2u{0x282d} = 0x2503; | |
354 | $j2u{0x282e} = 0x250f; | |
355 | $j2u{0x282f} = 0x2513; | |
356 | $j2u{0x2830} = 0x251b; | |
357 | $j2u{0x2831} = 0x2517; | |
358 | $j2u{0x2832} = 0x2523; | |
359 | $j2u{0x2833} = 0x2533; | |
360 | $j2u{0x2834} = 0x252b; | |
361 | $j2u{0x2835} = 0x253b; | |
362 | $j2u{0x2836} = 0x254b; | |
363 | $j2u{0x2837} = 0x2520; | |
364 | $j2u{0x2838} = 0x252f; | |
365 | $j2u{0x2839} = 0x2528; | |
366 | $j2u{0x283a} = 0x2537; | |
367 | $j2u{0x283b} = 0x253f; | |
368 | $j2u{0x283c} = 0x251d; | |
369 | $j2u{0x283d} = 0x2530; | |
370 | $j2u{0x283e} = 0x2525; | |
371 | $j2u{0x283f} = 0x2538; | |
372 | $j2u{0x2840} = 0x2542; | |
373 | ||
374 | # 73 row-cells below are assigned as "Compatibility characters for | |
375 | # national implementations" by JIS X 0213:2000, | |
376 | # but are found in some vendor codepages for JIS X 0208. | |
377 | # NOTE: U+2116 NUMERO SIGN is duplicated with JIS X 0212. | |
378 | $j2u{0x2d21} = 0x2460; # CIRCLED DIGIT ONE | |
379 | $j2u{0x2d22} = 0x2461; # CIRCLED DIGIT TWO | |
380 | $j2u{0x2d23} = 0x2462; # CIRCLED DIGIT THREE | |
381 | $j2u{0x2d24} = 0x2463; # CIRCLED DIGIT FOUR | |
382 | $j2u{0x2d25} = 0x2464; # CIRCLED DIGIT FIVE | |
383 | $j2u{0x2d26} = 0x2465; # CIRCLED DIGIT SIX | |
384 | $j2u{0x2d27} = 0x2466; # CIRCLED DIGIT SEVEN | |
385 | $j2u{0x2d28} = 0x2467; # CIRCLED DIGIT EIGHT | |
386 | $j2u{0x2d29} = 0x2468; # CIRCLED DIGIT NINE | |
387 | $j2u{0x2d2a} = 0x2469; # CIRCLED NUMBER TEN | |
388 | $j2u{0x2d2b} = 0x246a; # CIRCLED NUMBER ELEVEN | |
389 | $j2u{0x2d2c} = 0x246b; # CIRCLED NUMBER TWELVE | |
390 | $j2u{0x2d2d} = 0x246c; # CIRCLED NUMBER THIRTEEN | |
391 | $j2u{0x2d2e} = 0x246d; # CIRCLED NUMBER FOURTEEN | |
392 | $j2u{0x2d2f} = 0x246e; # CIRCLED NUMBER FIFTEEN | |
393 | $j2u{0x2d30} = 0x246f; # CIRCLED NUMBER SIXTEEN | |
394 | $j2u{0x2d31} = 0x2470; # CIRCLED NUMBER SEVENTEEN | |
395 | $j2u{0x2d32} = 0x2471; # CIRCLED NUMBER EIGHTEEN | |
396 | $j2u{0x2d33} = 0x2472; # CIRCLED NUMBER NINETEEN | |
397 | $j2u{0x2d34} = 0x2473; # CIRCLED NUMBER TWENTY | |
398 | $j2u{0x2d35} = 0x2160; # ROMAN NUMERAL ONE | |
399 | $j2u{0x2d36} = 0x2161; # ROMAN NUMERAL TWO | |
400 | $j2u{0x2d37} = 0x2162; # ROMAN NUMERAL THREE | |
401 | $j2u{0x2d38} = 0x2163; # ROMAN NUMERAL FOUR | |
402 | $j2u{0x2d39} = 0x2164; # ROMAN NUMERAL FIVE | |
403 | $j2u{0x2d3a} = 0x2165; # ROMAN NUMERAL SIX | |
404 | $j2u{0x2d3b} = 0x2166; # ROMAN NUMERAL SEVEN | |
405 | $j2u{0x2d3c} = 0x2167; # ROMAN NUMERAL EIGHT | |
406 | $j2u{0x2d3d} = 0x2168; # ROMAN NUMERAL NINE | |
407 | $j2u{0x2d3e} = 0x2169; # ROMAN NUMERAL TEN | |
408 | #$j2u{0x2d3f} = 0x216a; # ROMAN NUMERAL ELEVEN | |
409 | $j2u{0x2d40} = 0x3349; # SQUARE MIRI | |
410 | $j2u{0x2d41} = 0x3314; # SQUARE KIRO | |
411 | $j2u{0x2d42} = 0x3322; # SQUARE SENTI | |
412 | $j2u{0x2d43} = 0x334d; # SQUARE MEETORU | |
413 | $j2u{0x2d44} = 0x3318; # SQUARE GURAMU | |
414 | $j2u{0x2d45} = 0x3327; # SQUARE TON | |
415 | $j2u{0x2d46} = 0x3303; # SQUARE AARU | |
416 | $j2u{0x2d47} = 0x3336; # SQUARE HEKUTAARU | |
417 | $j2u{0x2d48} = 0x3351; # SQUARE RITTORU | |
418 | $j2u{0x2d49} = 0x3357; # SQUARE WATTO | |
419 | $j2u{0x2d4a} = 0x330d; # SQUARE KARORII | |
420 | $j2u{0x2d4b} = 0x3326; # SQUARE DORU | |
421 | $j2u{0x2d4c} = 0x3323; # SQUARE SENTO | |
422 | $j2u{0x2d4d} = 0x332b; # SQUARE PAASENTO | |
423 | $j2u{0x2d4e} = 0x334a; # SQUARE MIRIBAARU | |
424 | $j2u{0x2d4f} = 0x333b; # SQUARE PEEZI | |
425 | $j2u{0x2d50} = 0x339c; # SQUARE MM | |
426 | $j2u{0x2d51} = 0x339d; # SQUARE CM | |
427 | $j2u{0x2d52} = 0x339e; # SQUARE KM | |
428 | $j2u{0x2d53} = 0x338e; # SQUARE MG | |
429 | $j2u{0x2d54} = 0x338f; # SQUARE KG | |
430 | $j2u{0x2d55} = 0x33c4; # SQUARE CC | |
431 | $j2u{0x2d56} = 0x33a1; # SQUARE M SQUARED | |
432 | #$j2u{0x2d57} = 0x216b; # ROMAN NUMERAL TWELVE | |
433 | $j2u{0x2d5f} = 0x337b; # SQUARE ERA NAME HEISEI | |
434 | $j2u{0x2d60} = 0x301d; # REVERSED DOUBLE PRIME QUATATION MARK | |
435 | $j2u{0x2d61} = 0x301f; # LOW DOUBLE PRIME QUATATION MARK | |
436 | $j2u{0x2d62} = 0x2116; # NUMERO SIGN | |
437 | $j2u{0x2d63} = 0x33cd; # SQUARE KK | |
438 | $j2u{0x2d64} = 0x2121; # TELEPHONE SIGN | |
439 | $j2u{0x2d65} = 0x32a4; # CIRCLED IDEOGRAPH HIGH | |
440 | $j2u{0x2d66} = 0x32a5; # CIRCLED IDEOGRAPH CENTRE | |
441 | $j2u{0x2d67} = 0x32a6; # CIRCLED IDEOGRAPH LOW | |
442 | $j2u{0x2d68} = 0x32a7; # CIRCLED IDEOGRAPH LEFT | |
443 | $j2u{0x2d69} = 0x32a8; # CIRCLED IDEOGRAPH RIGHT | |
444 | $j2u{0x2d6a} = 0x3231; # PARENTHESIZED IDEOGRAPH STOCK | |
445 | $j2u{0x2d6b} = 0x3232; # PARENTHESIZED IDEOGRAPH HAVE | |
446 | $j2u{0x2d6c} = 0x3239; # PARENTHESIZED IDEOGRAPH REPRESENT | |
447 | $j2u{0x2d6d} = 0x337e; # SQUARE ERA NAME MEIZI | |
448 | $j2u{0x2d6e} = 0x337d; # SQUARE ERA NAME TAISYOU | |
449 | $j2u{0x2d6f} = 0x337c; # SQUARE ERA NAME SYOUWA | |
450 | $j2u{0x2d73} = 0x222e; # CONTOUR INTEGRAL | |
451 | $j2u{0x2d78} = 0x221f; # RIGHT ANGLE | |
452 | $j2u{0x2d79} = 0x22bf; # RIGHT TRIANGLE | |
453 | #$j2u{0x2d7d} = 0x2756; # BLACK DIAMOND MINUS WHITE X | |
454 | #$j2u{0x2d7e} = 0x261e; # WHITE RIGHT POINTING INDEX | |
455 | ||
456 | ||
457 | ## | |
458 | ## Map (upper-)compatible with JIS C 6226:1978 | |
459 | ## | |
460 | ||
461 | # 26 pairs of row-cells in JIS X 0208:1997 are swapped. | |
462 | # cf. JIS X 0208:1997 Annex 2. | |
463 | ||
464 | foreach (keys %j2u) { | |
465 | $jisx0208_1978_j2u{$_} = $j2u{$_}; | |
466 | } | |
467 | &swap_1978(0x3033, 0x724D); | |
468 | &swap_1978(0x3229, 0x7274); | |
469 | &swap_1978(0x3342, 0x695a); | |
470 | &swap_1978(0x3349, 0x5978); | |
471 | &swap_1978(0x3376, 0x635e); | |
472 | &swap_1978(0x3443, 0x5e75); | |
473 | &swap_1978(0x3452, 0x6b5d); | |
474 | &swap_1978(0x375b, 0x7074); | |
475 | &swap_1978(0x395c, 0x6268); | |
476 | &swap_1978(0x3c49, 0x6922); | |
477 | &swap_1978(0x3F59, 0x7057); | |
478 | &swap_1978(0x4128, 0x6c4d); | |
479 | &swap_1978(0x445B, 0x5464); | |
480 | &swap_1978(0x4557, 0x626a); | |
481 | &swap_1978(0x456e, 0x5b6d); | |
482 | &swap_1978(0x4573, 0x5e39); | |
483 | &swap_1978(0x4676, 0x6d6e); | |
484 | &swap_1978(0x4768, 0x6a24); | |
485 | &swap_1978(0x4930, 0x5B58); | |
486 | &swap_1978(0x4b79, 0x5056); | |
487 | &swap_1978(0x4c79, 0x692e); | |
488 | &swap_1978(0x4F36, 0x6446); | |
489 | &swap_1978(0x3646, 0x7421); | |
490 | &swap_1978(0x4B6A, 0x7422); | |
491 | &swap_1978(0x4D5A, 0x7423); | |
492 | &swap_1978(0x6076, 0x7424); | |
493 | ||
494 | sub swap_1978 { | |
495 | local($x, $y) = @_; | |
496 | ($jisx0208_1978_j2u{$x}, $jisx0208_1978_j2u{$y}) = | |
497 | ($jisx0208_1978_j2u{$y}, $jisx0208_1978_j2u{$x}); | |
498 | $j2u_1978{int($x/256)} = 1; | |
499 | $j2u_1978{int($y/256)} = 1; | |
500 | $u2j_1978{int($jisx0208_1978_j2u{$x}/256)} = 1; | |
501 | $u2j_1978{int($jisx0208_1978_j2u{$y}/256)} = 1; | |
502 | } | |
503 | ||
504 | ||
505 | ## | |
506 | ## Map for JIS X 0212:1990 ("Supplementary Kanzi") | |
507 | ## | |
508 | ||
509 | # Unicode-3.2 does not make mention of JIS X 0212 symbols, marks, | |
510 | # alphabets with diacritial mark etc. So manually add converting map... | |
511 | # cf. ftp://ftp.unicode.org/Public/MAPPINGS/OBSOLETE/EASTASIA/JIS/JIS0212.TXT | |
512 | ||
513 | $jisx0212_j2u{0x222f} = 0x02d8; # BREVE | |
514 | $jisx0212_j2u{0x2230} = 0x02c7; # CARON (Mandarin Chinese third tone) | |
515 | $jisx0212_j2u{0x2231} = 0x00b8; # CEDILLA | |
516 | $jisx0212_j2u{0x2232} = 0x02d9; # DOT ABOVE (Mandarin Chinese light tone) | |
517 | $jisx0212_j2u{0x2233} = 0x02dd; # DOUBLE ACUTE ACCENT | |
518 | $jisx0212_j2u{0x2234} = 0x00af; # MACRON | |
519 | $jisx0212_j2u{0x2235} = 0x02db; # OGONEK | |
520 | $jisx0212_j2u{0x2236} = 0x02da; # RING ABOVE | |
521 | $jisx0212_j2u{0x2237} = 0xff5e; # TILDE vs. ASCII -> FULLWIDTH TILDE | |
522 | $jisx0212_j2u{0x2238} = 0x0384; # GREEK TONOS | |
523 | $jisx0212_j2u{0x2239} = 0x0385; # GREEK DIALYTIKA TONOS | |
524 | $jisx0212_j2u{0x2242} = 0x00a1; # INVERTED EXCLAMATION MARK | |
525 | $jisx0212_j2u{0x2243} = 0x00a6; # BROKEN BAR | |
526 | $jisx0212_j2u{0x2244} = 0x00bf; # INVERTED QUESTION MARK | |
527 | $jisx0212_j2u{0x226b} = 0x00ba; # MASCULINE ORDINAL INDICATOR | |
528 | $jisx0212_j2u{0x226c} = 0x00aa; # FEMININE ORDINAL INDICATOR | |
529 | $jisx0212_j2u{0x226d} = 0x00a9; # COPYRIGHT SIGN | |
530 | $jisx0212_j2u{0x226e} = 0x00ae; # REGISTERED SIGN | |
531 | $jisx0212_j2u{0x226f} = 0x2122; # TRADE MARK SIGN | |
532 | $jisx0212_j2u{0x2270} = 0x00a4; # CURRENCY SIGN | |
533 | $jisx0212_j2u{0x2271} = 0x2116; # NUMERO SIGN | |
534 | $jisx0212_j2u{0x2661} = 0x0386; # GREEK CAPITAL LETTER ALPHA WITH TONOS | |
535 | $jisx0212_j2u{0x2662} = 0x0388; # GREEK CAPITAL LETTER EPSILON WITH TONOS | |
536 | $jisx0212_j2u{0x2663} = 0x0389; # GREEK CAPITAL LETTER ETA WITH TONOS | |
537 | $jisx0212_j2u{0x2664} = 0x038a; # GREEK CAPITAL LETTER IOTA WITH TONOS | |
538 | $jisx0212_j2u{0x2665} = 0x03aa; # GREEK CAPITAL LETTER IOTA WITH DIALYTIKA | |
539 | $jisx0212_j2u{0x2667} = 0x038c; # GREEK CAPITAL LETTER OMICRON WITH TONOS | |
540 | $jisx0212_j2u{0x2669} = 0x038e; # GREEK CAPITAL LETTER UPSILON WITH TONOS | |
541 | $jisx0212_j2u{0x266a} = 0x03ab; # GREEK CAPITAL LETTER UPSILON WITH DIALYTIKA | |
542 | $jisx0212_j2u{0x266c} = 0x038f; # GREEK CAPITAL LETTER OMEGA WITH TONOS | |
543 | $jisx0212_j2u{0x2671} = 0x03ac; # GREEK SMALL LETTER ALPHA WITH TONOS | |
544 | $jisx0212_j2u{0x2672} = 0x03ad; # GREEK SMALL LETTER EPSILON WITH TONOS | |
545 | $jisx0212_j2u{0x2673} = 0x03ae; # GREEK SMALL LETTER ETA WITH TONOS | |
546 | $jisx0212_j2u{0x2674} = 0x03af; # GREEK SMALL LETTER IOTA WITH TONOS | |
547 | $jisx0212_j2u{0x2675} = 0x03ca; # GREEK SMALL LETTER IOTA WITH DIALYTIKA | |
548 | $jisx0212_j2u{0x2676} = 0x0390; # GREEK SMALL LETTER IOTA WITH DIALYTIKA AND TONOS | |
549 | $jisx0212_j2u{0x2677} = 0x03cc; # GREEK SMALL LETTER OMICRON WITH TONOS | |
550 | $jisx0212_j2u{0x2678} = 0x03c2; # GREEK SMALL LETTER FINAL SIGMA | |
551 | $jisx0212_j2u{0x2679} = 0x03cd; # GREEK SMALL LETTER UPSILON WITH TONOS | |
552 | $jisx0212_j2u{0x267a} = 0x03cb; # GREEK SMALL LETTER UPSILON WITH DIALYTIKA | |
553 | $jisx0212_j2u{0x267b} = 0x03b0; # GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND TONOS | |
554 | $jisx0212_j2u{0x267c} = 0x03ce; # GREEK SMALL LETTER OMEGA WITH TONOS | |
555 | $jisx0212_j2u{0x2742} = 0x0402; # CYRILLIC CAPITAL LETTER DJE | |
556 | $jisx0212_j2u{0x2743} = 0x0403; # CYRILLIC CAPITAL LETTER GJE | |
557 | $jisx0212_j2u{0x2744} = 0x0404; # CYRILLIC CAPITAL LETTER UKRAINIAN IE | |
558 | $jisx0212_j2u{0x2745} = 0x0405; # CYRILLIC CAPITAL LETTER DZE | |
559 | $jisx0212_j2u{0x2746} = 0x0406; # CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I | |
560 | $jisx0212_j2u{0x2747} = 0x0407; # CYRILLIC CAPITAL LETTER YI | |
561 | $jisx0212_j2u{0x2748} = 0x0408; # CYRILLIC CAPITAL LETTER JE | |
562 | $jisx0212_j2u{0x2749} = 0x0409; # CYRILLIC CAPITAL LETTER LJE | |
563 | $jisx0212_j2u{0x274a} = 0x040a; # CYRILLIC CAPITAL LETTER NJE | |
564 | $jisx0212_j2u{0x274b} = 0x040b; # CYRILLIC CAPITAL LETTER TSHE | |
565 | $jisx0212_j2u{0x274c} = 0x040c; # CYRILLIC CAPITAL LETTER KJE | |
566 | $jisx0212_j2u{0x274d} = 0x040e; # CYRILLIC CAPITAL LETTER SHORT U | |
567 | $jisx0212_j2u{0x274e} = 0x040f; # CYRILLIC CAPITAL LETTER DZHE | |
568 | $jisx0212_j2u{0x2772} = 0x0452; # CYRILLIC SMALL LETTER DJE | |
569 | $jisx0212_j2u{0x2773} = 0x0453; # CYRILLIC SMALL LETTER GJE | |
570 | $jisx0212_j2u{0x2774} = 0x0454; # CYRILLIC SMALL LETTER UKRAINIAN IE | |
571 | $jisx0212_j2u{0x2775} = 0x0455; # CYRILLIC SMALL LETTER DZE | |
572 | $jisx0212_j2u{0x2776} = 0x0456; # CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I | |
573 | $jisx0212_j2u{0x2777} = 0x0457; # CYRILLIC SMALL LETTER YI | |
574 | $jisx0212_j2u{0x2778} = 0x0458; # CYRILLIC SMALL LETTER JE | |
575 | $jisx0212_j2u{0x2779} = 0x0459; # CYRILLIC SMALL LETTER LJE | |
576 | $jisx0212_j2u{0x277a} = 0x045a; # CYRILLIC SMALL LETTER NJE | |
577 | $jisx0212_j2u{0x277b} = 0x045b; # CYRILLIC SMALL LETTER TSHE | |
578 | $jisx0212_j2u{0x277c} = 0x045c; # CYRILLIC SMALL LETTER KJE | |
579 | $jisx0212_j2u{0x277d} = 0x045e; # CYRILLIC SMALL LETTER SHORT U | |
580 | $jisx0212_j2u{0x277e} = 0x045f; # CYRILLIC SMALL LETTER DZHE | |
581 | $jisx0212_j2u{0x2921} = 0x00c6; # LATIN CAPITAL LIGATURE AE | |
582 | $jisx0212_j2u{0x2922} = 0x0110; # LATIN CAPITAL LETTER D WITH STROKE | |
583 | $jisx0212_j2u{0x2924} = 0x0126; # LATIN CAPITAL LETTER H WITH STROKE | |
584 | $jisx0212_j2u{0x2926} = 0x0132; # LATIN CAPITAL LIGATURE IJ | |
585 | $jisx0212_j2u{0x2928} = 0x0141; # LATIN CAPITAL LETTER L WITH STROKE | |
586 | $jisx0212_j2u{0x2929} = 0x013f; # LATIN CAPITAL LETTER L WITH MIDDLE DOT | |
587 | $jisx0212_j2u{0x292b} = 0x014a; # LATIN CAPITAL LETTER ENG | |
588 | $jisx0212_j2u{0x292c} = 0x00d8; # LATIN CAPITAL LETTER O WITH STROKE | |
589 | $jisx0212_j2u{0x292d} = 0x0152; # LATIN CAPITAL LIGATURE OE | |
590 | $jisx0212_j2u{0x292f} = 0x0166; # LATIN CAPITAL LETTER T WITH STROKE | |
591 | $jisx0212_j2u{0x2930} = 0x00de; # LATIN CAPITAL LETTER THORN | |
592 | $jisx0212_j2u{0x2941} = 0x00e6; # LATIN SMALL LIGATURE AE | |
593 | $jisx0212_j2u{0x2942} = 0x0111; # LATIN SMALL LETTER D WITH STROKE | |
594 | $jisx0212_j2u{0x2943} = 0x00f0; # LATIN SMALL LETTER ETH | |
595 | $jisx0212_j2u{0x2944} = 0x0127; # LATIN SMALL LETTER H WITH STROKE | |
596 | $jisx0212_j2u{0x2945} = 0x0131; # LATIN SMALL LETTER DOTLESS I | |
597 | $jisx0212_j2u{0x2946} = 0x0133; # LATIN SMALL LIGATURE IJ | |
598 | $jisx0212_j2u{0x2947} = 0x0138; # LATIN SMALL LETTER KRA | |
599 | $jisx0212_j2u{0x2948} = 0x0142; # LATIN SMALL LETTER L WITH STROKE | |
600 | $jisx0212_j2u{0x2949} = 0x0140; # LATIN SMALL LETTER L WITH MIDDLE DOT | |
601 | $jisx0212_j2u{0x294a} = 0x0149; # LATIN SMALL LETTER N PRECEDED BY APOSTROPHE | |
602 | $jisx0212_j2u{0x294b} = 0x014b; # LATIN SMALL LETTER ENG | |
603 | $jisx0212_j2u{0x294c} = 0x00f8; # LATIN SMALL LETTER O WITH STROKE | |
604 | $jisx0212_j2u{0x294d} = 0x0153; # LATIN SMALL LIGATURE OE | |
605 | $jisx0212_j2u{0x294e} = 0x00df; # LATIN SMALL LETTER SHARP S | |
606 | $jisx0212_j2u{0x294f} = 0x0167; # LATIN SMALL LETTER T WITH STROKE | |
607 | $jisx0212_j2u{0x2950} = 0x00fe; # LATIN SMALL LETTER THORN | |
608 | $jisx0212_j2u{0x2a21} = 0x00c1; # LATIN CAPITAL LETTER A WITH ACUTE | |
609 | $jisx0212_j2u{0x2a22} = 0x00c0; # LATIN CAPITAL LETTER A WITH GRAVE | |
610 | $jisx0212_j2u{0x2a23} = 0x00c4; # LATIN CAPITAL LETTER A WITH DIAERESIS | |
611 | $jisx0212_j2u{0x2a24} = 0x00c2; # LATIN CAPITAL LETTER A WITH CIRCUMFLEX | |
612 | $jisx0212_j2u{0x2a25} = 0x0102; # LATIN CAPITAL LETTER A WITH BREVE | |
613 | $jisx0212_j2u{0x2a26} = 0x01cd; # LATIN CAPITAL LETTER A WITH CARON | |
614 | $jisx0212_j2u{0x2a27} = 0x0100; # LATIN CAPITAL LETTER A WITH MACRON | |
615 | $jisx0212_j2u{0x2a28} = 0x0104; # LATIN CAPITAL LETTER A WITH OGONEK | |
616 | $jisx0212_j2u{0x2a29} = 0x00c5; # LATIN CAPITAL LETTER A WITH RING ABOVE | |
617 | $jisx0212_j2u{0x2a2a} = 0x00c3; # LATIN CAPITAL LETTER A WITH TILDE | |
618 | $jisx0212_j2u{0x2a2b} = 0x0106; # LATIN CAPITAL LETTER C WITH ACUTE | |
619 | $jisx0212_j2u{0x2a2c} = 0x0108; # LATIN CAPITAL LETTER C WITH CIRCUMFLEX | |
620 | $jisx0212_j2u{0x2a2d} = 0x010c; # LATIN CAPITAL LETTER C WITH CARON | |
621 | $jisx0212_j2u{0x2a2e} = 0x00c7; # LATIN CAPITAL LETTER C WITH CEDILLA | |
622 | $jisx0212_j2u{0x2a2f} = 0x010a; # LATIN CAPITAL LETTER C WITH DOT ABOVE | |
623 | $jisx0212_j2u{0x2a30} = 0x010e; # LATIN CAPITAL LETTER D WITH CARON | |
624 | $jisx0212_j2u{0x2a31} = 0x00c9; # LATIN CAPITAL LETTER E WITH ACUTE | |
625 | $jisx0212_j2u{0x2a32} = 0x00c8; # LATIN CAPITAL LETTER E WITH GRAVE | |
626 | $jisx0212_j2u{0x2a33} = 0x00cb; # LATIN CAPITAL LETTER E WITH DIAERESIS | |
627 | $jisx0212_j2u{0x2a34} = 0x00ca; # LATIN CAPITAL LETTER E WITH CIRCUMFLEX | |
628 | $jisx0212_j2u{0x2a35} = 0x011a; # LATIN CAPITAL LETTER E WITH CARON | |
629 | $jisx0212_j2u{0x2a36} = 0x0116; # LATIN CAPITAL LETTER E WITH DOT ABOVE | |
630 | $jisx0212_j2u{0x2a37} = 0x0112; # LATIN CAPITAL LETTER E WITH MACRON | |
631 | $jisx0212_j2u{0x2a38} = 0x0118; # LATIN CAPITAL LETTER E WITH OGONEK | |
632 | $jisx0212_j2u{0x2a3a} = 0x011c; # LATIN CAPITAL LETTER G WITH CIRCUMFLEX | |
633 | $jisx0212_j2u{0x2a3b} = 0x011e; # LATIN CAPITAL LETTER G WITH BREVE | |
634 | $jisx0212_j2u{0x2a3c} = 0x0122; # LATIN CAPITAL LETTER G WITH CEDILLA | |
635 | $jisx0212_j2u{0x2a3d} = 0x0120; # LATIN CAPITAL LETTER G WITH DOT ABOVE | |
636 | $jisx0212_j2u{0x2a3e} = 0x0124; # LATIN CAPITAL LETTER H WITH CIRCUMFLEX | |
637 | $jisx0212_j2u{0x2a3f} = 0x00cd; # LATIN CAPITAL LETTER I WITH ACUTE | |
638 | $jisx0212_j2u{0x2a40} = 0x00cc; # LATIN CAPITAL LETTER I WITH GRAVE | |
639 | $jisx0212_j2u{0x2a41} = 0x00cf; # LATIN CAPITAL LETTER I WITH DIAERESIS | |
640 | $jisx0212_j2u{0x2a42} = 0x00ce; # LATIN CAPITAL LETTER I WITH CIRCUMFLEX | |
641 | $jisx0212_j2u{0x2a43} = 0x01cf; # LATIN CAPITAL LETTER I WITH CARON | |
642 | $jisx0212_j2u{0x2a44} = 0x0130; # LATIN CAPITAL LETTER I WITH DOT ABOVE | |
643 | $jisx0212_j2u{0x2a45} = 0x012a; # LATIN CAPITAL LETTER I WITH MACRON | |
644 | $jisx0212_j2u{0x2a46} = 0x012e; # LATIN CAPITAL LETTER I WITH OGONEK | |
645 | $jisx0212_j2u{0x2a47} = 0x0128; # LATIN CAPITAL LETTER I WITH TILDE | |
646 | $jisx0212_j2u{0x2a48} = 0x0134; # LATIN CAPITAL LETTER J WITH CIRCUMFLEX | |
647 | $jisx0212_j2u{0x2a49} = 0x0136; # LATIN CAPITAL LETTER K WITH CEDILLA | |
648 | $jisx0212_j2u{0x2a4a} = 0x0139; # LATIN CAPITAL LETTER L WITH ACUTE | |
649 | $jisx0212_j2u{0x2a4b} = 0x013d; # LATIN CAPITAL LETTER L WITH CARON | |
650 | $jisx0212_j2u{0x2a4c} = 0x013b; # LATIN CAPITAL LETTER L WITH CEDILLA | |
651 | $jisx0212_j2u{0x2a4d} = 0x0143; # LATIN CAPITAL LETTER N WITH ACUTE | |
652 | $jisx0212_j2u{0x2a4e} = 0x0147; # LATIN CAPITAL LETTER N WITH CARON | |
653 | $jisx0212_j2u{0x2a4f} = 0x0145; # LATIN CAPITAL LETTER N WITH CEDILLA | |
654 | $jisx0212_j2u{0x2a50} = 0x00d1; # LATIN CAPITAL LETTER N WITH TILDE | |
655 | $jisx0212_j2u{0x2a51} = 0x00d3; # LATIN CAPITAL LETTER O WITH ACUTE | |
656 | $jisx0212_j2u{0x2a52} = 0x00d2; # LATIN CAPITAL LETTER O WITH GRAVE | |
657 | $jisx0212_j2u{0x2a53} = 0x00d6; # LATIN CAPITAL LETTER O WITH DIAERESIS | |
658 | $jisx0212_j2u{0x2a54} = 0x00d4; # LATIN CAPITAL LETTER O WITH CIRCUMFLEX | |
659 | $jisx0212_j2u{0x2a55} = 0x01d1; # LATIN CAPITAL LETTER O WITH CARON | |
660 | $jisx0212_j2u{0x2a56} = 0x0150; # LATIN CAPITAL LETTER O WITH DOUBLE ACUTE | |
661 | $jisx0212_j2u{0x2a57} = 0x014c; # LATIN CAPITAL LETTER O WITH MACRON | |
662 | $jisx0212_j2u{0x2a58} = 0x00d5; # LATIN CAPITAL LETTER O WITH TILDE | |
663 | $jisx0212_j2u{0x2a59} = 0x0154; # LATIN CAPITAL LETTER R WITH ACUTE | |
664 | $jisx0212_j2u{0x2a5a} = 0x0158; # LATIN CAPITAL LETTER R WITH CARON | |
665 | $jisx0212_j2u{0x2a5b} = 0x0156; # LATIN CAPITAL LETTER R WITH CEDILLA | |
666 | $jisx0212_j2u{0x2a5c} = 0x015a; # LATIN CAPITAL LETTER S WITH ACUTE | |
667 | $jisx0212_j2u{0x2a5d} = 0x015c; # LATIN CAPITAL LETTER S WITH CIRCUMFLEX | |
668 | $jisx0212_j2u{0x2a5e} = 0x0160; # LATIN CAPITAL LETTER S WITH CARON | |
669 | $jisx0212_j2u{0x2a5f} = 0x015e; # LATIN CAPITAL LETTER S WITH CEDILLA | |
670 | $jisx0212_j2u{0x2a60} = 0x0164; # LATIN CAPITAL LETTER T WITH CARON | |
671 | $jisx0212_j2u{0x2a61} = 0x0162; # LATIN CAPITAL LETTER T WITH CEDILLA | |
672 | $jisx0212_j2u{0x2a62} = 0x00da; # LATIN CAPITAL LETTER U WITH ACUTE | |
673 | $jisx0212_j2u{0x2a63} = 0x00d9; # LATIN CAPITAL LETTER U WITH GRAVE | |
674 | $jisx0212_j2u{0x2a64} = 0x00dc; # LATIN CAPITAL LETTER U WITH DIAERESIS | |
675 | $jisx0212_j2u{0x2a65} = 0x00db; # LATIN CAPITAL LETTER U WITH CIRCUMFLEX | |
676 | $jisx0212_j2u{0x2a66} = 0x016c; # LATIN CAPITAL LETTER U WITH BREVE | |
677 | $jisx0212_j2u{0x2a67} = 0x01d3; # LATIN CAPITAL LETTER U WITH CARON | |
678 | $jisx0212_j2u{0x2a68} = 0x0170; # LATIN CAPITAL LETTER U WITH DOUBLE ACUTE | |
679 | $jisx0212_j2u{0x2a69} = 0x016a; # LATIN CAPITAL LETTER U WITH MACRON | |
680 | $jisx0212_j2u{0x2a6a} = 0x0172; # LATIN CAPITAL LETTER U WITH OGONEK | |
681 | $jisx0212_j2u{0x2a6b} = 0x016e; # LATIN CAPITAL LETTER U WITH RING ABOVE | |
682 | $jisx0212_j2u{0x2a6c} = 0x0168; # LATIN CAPITAL LETTER U WITH TILDE | |
683 | $jisx0212_j2u{0x2a6d} = 0x01d7; # LATIN CAPITAL LETTER U WITH DIAERESIS AND ACUTE | |
684 | $jisx0212_j2u{0x2a6e} = 0x01db; # LATIN CAPITAL LETTER U WITH DIAERESIS AND GRAVE | |
685 | $jisx0212_j2u{0x2a6f} = 0x01d9; # LATIN CAPITAL LETTER U WITH DIAERESIS AND CARON | |
686 | $jisx0212_j2u{0x2a70} = 0x01d5; # LATIN CAPITAL LETTER U WITH DIAERESIS AND MACRON | |
687 | $jisx0212_j2u{0x2a71} = 0x0174; # LATIN CAPITAL LETTER W WITH CIRCUMFLEX | |
688 | $jisx0212_j2u{0x2a72} = 0x00dd; # LATIN CAPITAL LETTER Y WITH ACUTE | |
689 | $jisx0212_j2u{0x2a73} = 0x0178; # LATIN CAPITAL LETTER Y WITH DIAERESIS | |
690 | $jisx0212_j2u{0x2a74} = 0x0176; # LATIN CAPITAL LETTER Y WITH CIRCUMFLEX | |
691 | $jisx0212_j2u{0x2a75} = 0x0179; # LATIN CAPITAL LETTER Z WITH ACUTE | |
692 | $jisx0212_j2u{0x2a76} = 0x017d; # LATIN CAPITAL LETTER Z WITH CARON | |
693 | $jisx0212_j2u{0x2a77} = 0x017b; # LATIN CAPITAL LETTER Z WITH DOT ABOVE | |
694 | $jisx0212_j2u{0x2b21} = 0x00e1; # LATIN SMALL LETTER A WITH ACUTE | |
695 | $jisx0212_j2u{0x2b22} = 0x00e0; # LATIN SMALL LETTER A WITH GRAVE | |
696 | $jisx0212_j2u{0x2b23} = 0x00e4; # LATIN SMALL LETTER A WITH DIAERESIS | |
697 | $jisx0212_j2u{0x2b24} = 0x00e2; # LATIN SMALL LETTER A WITH CIRCUMFLEX | |
698 | $jisx0212_j2u{0x2b25} = 0x0103; # LATIN SMALL LETTER A WITH BREVE | |
699 | $jisx0212_j2u{0x2b26} = 0x01ce; # LATIN SMALL LETTER A WITH CARON | |
700 | $jisx0212_j2u{0x2b27} = 0x0101; # LATIN SMALL LETTER A WITH MACRON | |
701 | $jisx0212_j2u{0x2b28} = 0x0105; # LATIN SMALL LETTER A WITH OGONEK | |
702 | $jisx0212_j2u{0x2b29} = 0x00e5; # LATIN SMALL LETTER A WITH RING ABOVE | |
703 | $jisx0212_j2u{0x2b2a} = 0x00e3; # LATIN SMALL LETTER A WITH TILDE | |
704 | $jisx0212_j2u{0x2b2b} = 0x0107; # LATIN SMALL LETTER C WITH ACUTE | |
705 | $jisx0212_j2u{0x2b2c} = 0x0109; # LATIN SMALL LETTER C WITH CIRCUMFLEX | |
706 | $jisx0212_j2u{0x2b2d} = 0x010d; # LATIN SMALL LETTER C WITH CARON | |
707 | $jisx0212_j2u{0x2b2e} = 0x00e7; # LATIN SMALL LETTER C WITH CEDILLA | |
708 | $jisx0212_j2u{0x2b2f} = 0x010b; # LATIN SMALL LETTER C WITH DOT ABOVE | |
709 | $jisx0212_j2u{0x2b30} = 0x010f; # LATIN SMALL LETTER D WITH CARON | |
710 | $jisx0212_j2u{0x2b31} = 0x00e9; # LATIN SMALL LETTER E WITH ACUTE | |
711 | $jisx0212_j2u{0x2b32} = 0x00e8; # LATIN SMALL LETTER E WITH GRAVE | |
712 | $jisx0212_j2u{0x2b33} = 0x00eb; # LATIN SMALL LETTER E WITH DIAERESIS | |
713 | $jisx0212_j2u{0x2b34} = 0x00ea; # LATIN SMALL LETTER E WITH CIRCUMFLEX | |
714 | $jisx0212_j2u{0x2b35} = 0x011b; # LATIN SMALL LETTER E WITH CARON | |
715 | $jisx0212_j2u{0x2b36} = 0x0117; # LATIN SMALL LETTER E WITH DOT ABOVE | |
716 | $jisx0212_j2u{0x2b37} = 0x0113; # LATIN SMALL LETTER E WITH MACRON | |
717 | $jisx0212_j2u{0x2b38} = 0x0119; # LATIN SMALL LETTER E WITH OGONEK | |
718 | $jisx0212_j2u{0x2b39} = 0x01f5; # LATIN SMALL LETTER G WITH ACUTE | |
719 | $jisx0212_j2u{0x2b3a} = 0x011d; # LATIN SMALL LETTER G WITH CIRCUMFLEX | |
720 | $jisx0212_j2u{0x2b3b} = 0x011f; # LATIN SMALL LETTER G WITH BREVE | |
721 | $jisx0212_j2u{0x2b3d} = 0x0121; # LATIN SMALL LETTER G WITH DOT ABOVE | |
722 | $jisx0212_j2u{0x2b3e} = 0x0125; # LATIN SMALL LETTER H WITH CIRCUMFLEX | |
723 | $jisx0212_j2u{0x2b3f} = 0x00ed; # LATIN SMALL LETTER I WITH ACUTE | |
724 | $jisx0212_j2u{0x2b40} = 0x00ec; # LATIN SMALL LETTER I WITH GRAVE | |
725 | $jisx0212_j2u{0x2b41} = 0x00ef; # LATIN SMALL LETTER I WITH DIAERESIS | |
726 | $jisx0212_j2u{0x2b42} = 0x00ee; # LATIN SMALL LETTER I WITH CIRCUMFLEX | |
727 | $jisx0212_j2u{0x2b43} = 0x01d0; # LATIN SMALL LETTER I WITH CARON | |
728 | $jisx0212_j2u{0x2b45} = 0x012b; # LATIN SMALL LETTER I WITH MACRON | |
729 | $jisx0212_j2u{0x2b46} = 0x012f; # LATIN SMALL LETTER I WITH OGONEK | |
730 | $jisx0212_j2u{0x2b47} = 0x0129; # LATIN SMALL LETTER I WITH TILDE | |
731 | $jisx0212_j2u{0x2b48} = 0x0135; # LATIN SMALL LETTER J WITH CIRCUMFLEX | |
732 | $jisx0212_j2u{0x2b49} = 0x0137; # LATIN SMALL LETTER K WITH CEDILLA | |
733 | $jisx0212_j2u{0x2b4a} = 0x013a; # LATIN SMALL LETTER L WITH ACUTE | |
734 | $jisx0212_j2u{0x2b4b} = 0x013e; # LATIN SMALL LETTER L WITH CARON | |
735 | $jisx0212_j2u{0x2b4c} = 0x013c; # LATIN SMALL LETTER L WITH CEDILLA | |
736 | $jisx0212_j2u{0x2b4d} = 0x0144; # LATIN SMALL LETTER N WITH ACUTE | |
737 | $jisx0212_j2u{0x2b4e} = 0x0148; # LATIN SMALL LETTER N WITH CARON | |
738 | $jisx0212_j2u{0x2b4f} = 0x0146; # LATIN SMALL LETTER N WITH CEDILLA | |
739 | $jisx0212_j2u{0x2b50} = 0x00f1; # LATIN SMALL LETTER N WITH TILDE | |
740 | $jisx0212_j2u{0x2b51} = 0x00f3; # LATIN SMALL LETTER O WITH ACUTE | |
741 | $jisx0212_j2u{0x2b52} = 0x00f2; # LATIN SMALL LETTER O WITH GRAVE | |
742 | $jisx0212_j2u{0x2b53} = 0x00f6; # LATIN SMALL LETTER O WITH DIAERESIS | |
743 | $jisx0212_j2u{0x2b54} = 0x00f4; # LATIN SMALL LETTER O WITH CIRCUMFLEX | |
744 | $jisx0212_j2u{0x2b55} = 0x01d2; # LATIN SMALL LETTER O WITH CARON | |
745 | $jisx0212_j2u{0x2b56} = 0x0151; # LATIN SMALL LETTER O WITH DOUBLE ACUTE | |
746 | $jisx0212_j2u{0x2b57} = 0x014d; # LATIN SMALL LETTER O WITH MACRON | |
747 | $jisx0212_j2u{0x2b58} = 0x00f5; # LATIN SMALL LETTER O WITH TILDE | |
748 | $jisx0212_j2u{0x2b59} = 0x0155; # LATIN SMALL LETTER R WITH ACUTE | |
749 | $jisx0212_j2u{0x2b5a} = 0x0159; # LATIN SMALL LETTER R WITH CARON | |
750 | $jisx0212_j2u{0x2b5b} = 0x0157; # LATIN SMALL LETTER R WITH CEDILLA | |
751 | $jisx0212_j2u{0x2b5c} = 0x015b; # LATIN SMALL LETTER S WITH ACUTE | |
752 | $jisx0212_j2u{0x2b5d} = 0x015d; # LATIN SMALL LETTER S WITH CIRCUMFLEX | |
753 | $jisx0212_j2u{0x2b5e} = 0x0161; # LATIN SMALL LETTER S WITH CARON | |
754 | $jisx0212_j2u{0x2b5f} = 0x015f; # LATIN SMALL LETTER S WITH CEDILLA | |
755 | $jisx0212_j2u{0x2b60} = 0x0165; # LATIN SMALL LETTER T WITH CARON | |
756 | $jisx0212_j2u{0x2b61} = 0x0163; # LATIN SMALL LETTER T WITH CEDILLA | |
757 | $jisx0212_j2u{0x2b62} = 0x00fa; # LATIN SMALL LETTER U WITH ACUTE | |
758 | $jisx0212_j2u{0x2b63} = 0x00f9; # LATIN SMALL LETTER U WITH GRAVE | |
759 | $jisx0212_j2u{0x2b64} = 0x00fc; # LATIN SMALL LETTER U WITH DIAERESIS | |
760 | $jisx0212_j2u{0x2b65} = 0x00fb; # LATIN SMALL LETTER U WITH CIRCUMFLEX | |
761 | $jisx0212_j2u{0x2b66} = 0x016d; # LATIN SMALL LETTER U WITH BREVE | |
762 | $jisx0212_j2u{0x2b67} = 0x01d4; # LATIN SMALL LETTER U WITH CARON | |
763 | $jisx0212_j2u{0x2b68} = 0x0171; # LATIN SMALL LETTER U WITH DOUBLE ACUTE | |
764 | $jisx0212_j2u{0x2b69} = 0x016b; # LATIN SMALL LETTER U WITH MACRON | |
765 | $jisx0212_j2u{0x2b6a} = 0x0173; # LATIN SMALL LETTER U WITH OGONEK | |
766 | $jisx0212_j2u{0x2b6b} = 0x016f; # LATIN SMALL LETTER U WITH RING ABOVE | |
767 | $jisx0212_j2u{0x2b6c} = 0x0169; # LATIN SMALL LETTER U WITH TILDE | |
768 | $jisx0212_j2u{0x2b6d} = 0x01d8; # LATIN SMALL LETTER U WITH DIAERESIS AND ACUTE | |
769 | $jisx0212_j2u{0x2b6e} = 0x01dc; # LATIN SMALL LETTER U WITH DIAERESIS AND GRAVE | |
770 | $jisx0212_j2u{0x2b6f} = 0x01da; # LATIN SMALL LETTER U WITH DIAERESIS AND CARON | |
771 | $jisx0212_j2u{0x2b70} = 0x01d6; # LATIN SMALL LETTER U WITH DIAERESIS AND MACRON | |
772 | $jisx0212_j2u{0x2b71} = 0x0175; # LATIN SMALL LETTER W WITH CIRCUMFLEX | |
773 | $jisx0212_j2u{0x2b72} = 0x00fd; # LATIN SMALL LETTER Y WITH ACUTE | |
774 | $jisx0212_j2u{0x2b73} = 0x00ff; # LATIN SMALL LETTER Y WITH DIAERESIS | |
775 | $jisx0212_j2u{0x2b74} = 0x0177; # LATIN SMALL LETTER Y WITH CIRCUMFLEX | |
776 | $jisx0212_j2u{0x2b75} = 0x017a; # LATIN SMALL LETTER Z WITH ACUTE | |
777 | $jisx0212_j2u{0x2b76} = 0x017e; # LATIN SMALL LETTER Z WITH CARON | |
778 | $jisx0212_j2u{0x2b77} = 0x017c; # LATIN SMALL LETTER Z WITH DOT ABOVE | |
779 | ||
780 | # 12 row-cells of JIS X 0212:1990 below are unified to JIS X 0208:1997 | |
781 | # by JIS X 0213:2000. | |
782 | # cf. http://wakaba-web.hp.infoseek.co.jp/0212-0213/jisx0212-0213.ja.html | |
783 | &unify_jisx0212(0x3031, 0x213a); | |
784 | &unify_jisx0212(0x3063, 0x4322); | |
785 | &unify_jisx0212(0x3742, 0x4333); | |
786 | &unify_jisx0212(0x3c77, 0x5740); | |
787 | &unify_jisx0212(0x3d58, 0x5765); | |
788 | &unify_jisx0212(0x4039, 0x5954); | |
789 | &unify_jisx0212(0x4147, 0x327e); | |
790 | &unify_jisx0212(0x4323, 0x3341); | |
791 | &unify_jisx0212(0x4344, 0x5b4f); | |
792 | &unify_jisx0212(0x4b51, 0x6824); | |
793 | &unify_jisx0212(0x4d77, 0x4169); | |
794 | &unify_jisx0212(0x545a, 0x3752); | |
795 | ||
796 | sub unify_jisx0212 { | |
797 | local ($x, $y) = @_; | |
798 | ($jisx0212_unified{$x}, $jisx0212_j2u{$x}) = ($jisx0212_j2u{$x}, $j2u{$y}); | |
799 | } | |
800 | ||
801 | ||
802 | ## | |
803 | ## Make reversal maps. | |
804 | ## | |
805 | ||
806 | foreach (keys %j2u) { | |
807 | $u2j{$j2u{$_}} = $_; | |
808 | } | |
809 | foreach (keys %jisx0208_1978_j2u) { | |
810 | $jisx0208_1978_u2j{$jisx0208_1978_j2u{$_}} = $_; | |
811 | } | |
812 | foreach (keys %jisx0212_j2u) { | |
813 | $jisx0212_u2j{$jisx0212_j2u{$_}} = $_; | |
814 | } | |
815 | ||
816 | # Remove maps duplicated between JIS X 0208 extension and JIS X 0212. | |
817 | delete $u2j{0x2116}; # NUMERO SIGN | |
818 | delete $jisx0208_1978_u2j{0x2116}; # NUMERO SIGN | |
819 | ||
820 | # Add JIS X 0212 maps unified with JIS X 0208:1997 | |
821 | foreach (keys %jisx0212_unified) { | |
822 | if (defined($jisx0212_u2j{$jisx0212_unified{$_}})) { | |
823 | die "Duplicated map: $_ : $jisx0212_unified{$_} : $jisx0212_u2j{$jisx0212_unified{$_}}"; | |
824 | } else { | |
825 | $jisx0212_u2j{$jisx0212_unified{$_}} = $_; | |
826 | } | |
827 | } | |
828 | ||
829 | # Add maps for CJK compatibility ideographs of Unicode. | |
830 | &add_cjkcompat(%compat_ksx1001); | |
831 | &add_cjkcompat(%compat_big5); | |
832 | &add_cjkcompat(%compat_ibm32); | |
833 | &add_cjkcompat(%compat_jisx0213); | |
834 | &add_cjkcompat(%compat_cns11643); | |
835 | ||
836 | sub add_cjkcompat { | |
837 | local(%compat) = @_; | |
838 | foreach (keys %compat) { | |
839 | if (defined $u2j{$compat{$_}}) { | |
840 | if (defined $u2j{$_}) { | |
841 | warn sprintf("duplicated: %04X -> %04X / %04X", $_, $u2j{$_}, $u2j{$compat{$_}}); | |
842 | } | |
843 | $u2j{$_} = $u2j{$compat{$_}}; | |
844 | } | |
845 | if (defined $jisx0208_1978_u2j{$compat{$_}}) { | |
846 | if (defined $jisx0208_1978_u2j{$_}) { | |
847 | warn sprintf("duplicated: %04X -> %04X / %04X", $_, $jisx0208_1978_u2j{$_}, $jisx0208_1978_u2j{$compat{$_}}); | |
848 | } | |
849 | $jisx0208_1978_u2j{$_} = $jisx0208_1978_u2j{$compat{$_}}; | |
850 | } | |
851 | $u2j_1978{0xf9} = 1; | |
852 | $u2j_1978{0xfa} = 1; | |
853 | ||
854 | if (defined $jisx0212_u2j{$compat{$_}}) { | |
855 | if (defined $jisx0212_u2j{$_}) { | |
856 | warn sprintf("duplicated: %04X -> %04X / %04X", $_, $jisx0212_u2j{$_}, $jisx0212_u2j{$compat{$_}}); | |
857 | } | |
858 | $jisx0212_u2j{$_} = $jisx0212_u2j{$compat{$_}}; | |
859 | } | |
860 | } | |
861 | } | |
862 | ||
863 | ||
864 | print <<_HEADER_; | |
865 | #ifndef _ISO2022JP_HDR_ | |
866 | #define _ISO2022JP_HDR_ | |
867 | /* | |
868 | * iso-2022-jp support by Norihisa Washitake <nori\@washitake.com> | |
869 | * JIS X 0208:1997 update and JIS X 0212:1990 support | |
870 | * by Hatuka*nezumi - IKEDA Soji <nezumi\@jca.apc.org> | |
871 | * $Id: iso2022jp.pl,v 1.7 2004/02/03 02:00:00 mrsam Exp $ | |
872 | * | |
873 | */ | |
874 | ||
875 | #if (JIS_DEBUG > 0) && defined(JIS_BUILD_APP) | |
876 | #include <stdlib.h> | |
877 | #include <stdio.h> | |
878 | #include <wchar.h> | |
879 | #include <string.h> | |
880 | ||
881 | /* Definitions from unicode.h */ | |
882 | typedef wchar_t unicode_char; | |
883 | struct unicode_info { | |
884 | const char *chset; | |
885 | unicode_char *(*c2u)(const char *, int *); | |
886 | char *(*u2c)(const unicode_char *, int *); | |
887 | char *(*toupper_func)(const char *, int *); | |
888 | char *(*tolower_func)(const char *, int *); | |
889 | char *(*totitle_func)(const char *, int *); | |
890 | }; | |
891 | #else | |
892 | #include "unicode.h" | |
893 | #endif /* JIS_BUILD_APP */ | |
894 | ||
895 | /* | |
896 | * Some characters are unique in ISO-2022-JP character set, | |
897 | * so define them specially. | |
898 | */ | |
899 | ||
900 | #define JIS_CHAR_ESC 0x1B | |
901 | #define JIS_CHAR_SO 0x0E | |
902 | #define JIS_CHAR_SI 0x0F | |
903 | ||
904 | /* ISOREG #1/#3: US-ASCII (identical to ISO 646 IRV) */ | |
905 | #define JIS_TYPE_ASCII 0x0 | |
906 | /* ISOREG #14: JIS X 0201:1976/1997 GL (ISO 646 Japanese version) */ | |
907 | #define JIS_TYPE_ROMAN 0x1 | |
908 | /* ISOREG #13: JIS X 0201:1976/1997 GR ("Halfwidth katakana") */ | |
909 | #define JIS_TYPE_7BITKANA 0x2 | |
910 | #define JIS_TYPE_8BITKANA 0x3 | |
911 | /* ISOREG #87/#168: JIS X 0208:1983/1990/1997 */ | |
912 | #define JIS_TYPE_KANJI 0x4 | |
913 | #define JIS_TYPE_JISX0208 0x4 | |
914 | /* ISOREG #42: JIS C 6226:1978 ("78JIS" or "Old JIS") */ | |
915 | #define JIS_TYPE_JISX0208_1978 0x5 | |
916 | /* ISOREG #159: JIS X 0212:1990 ("Supplementary kanzi") */ | |
917 | #define JIS_TYPE_JISX0212 0x6 | |
918 | /* Unknown state */ | |
919 | #define JIS_TYPE_BINARY 0xF | |
920 | ||
921 | struct jischar_t { | |
922 | int type; | |
923 | unsigned int value; | |
924 | }; | |
925 | ||
926 | _HEADER_ | |
927 | ||
928 | ||
929 | # first, j2u. next, u2j. | |
930 | &j2u_map('jisx0208', 'JIS X 0208:1997', 0, %j2u); | |
931 | print "#define jis2uni_tbls jisx0208_to_uni_tbls\n\n"; | |
932 | &u2j_map('jisx0208', 'JIS X 0208:1997', 0, %u2j); | |
933 | print "#define uni2jis_tbls uni_to_jisx0208_tbls\n\n"; | |
934 | &j2u_map('jisx0208_1978', 'JIS C 6226:1978', 1, %jisx0208_1978_j2u); | |
935 | &u2j_map('jisx0208_1978', 'JIS C 6226:1978', 1, %jisx0208_1978_u2j); | |
936 | &j2u_map('jisx0212', 'JIS X 0212:1990', 0, %jisx0212_j2u); | |
937 | &u2j_map('jisx0212', 'JIS X 0212:1990', 0, %jisx0212_u2j); | |
938 | ||
939 | sub j2u_map { | |
940 | local ($name, $setname, $compat_1978, %j2u) = @_; | |
941 | local %j2uout; | |
942 | ||
943 | print "/* map: $setname to Unicode */\n"; | |
944 | for ($hb=0x21; $hb<0x7f; $hb++) { | |
945 | $items = 0; | |
946 | for ($lb=0x21; $lb<0x7f; $lb++) { | |
947 | $items++ if ($j2u{$hb*256 + $lb} > 0); | |
948 | } | |
949 | if ($items > 0) { | |
950 | $items = 0; | |
951 | if (!$compat_1978 || ($compat_1978 && $j2u_1978{$hb})) { | |
952 | printf "static const unicode_char ${name}_to_uni_tbl_%02x[] = {", $hb; | |
953 | for ($lb = 0x21; $lb < 0x7f; $lb++) { | |
954 | $real = $hb*256 + $lb; | |
955 | print ", " if ($items > 0); | |
956 | print "\n " if ($items % $perline == 0); | |
957 | $j2u{$real} = 0x003f if ($j2u{$real} == 0); | |
958 | printf("0x%04X", $j2u{$real}); | |
959 | $items++; | |
960 | } | |
961 | print "\n};\n"; | |
962 | } | |
963 | $j2uout{$hb} = 1; | |
964 | } | |
965 | } | |
966 | ||
967 | print "const unicode_char * ${name}_to_uni_tbls[] = {\n"; | |
968 | for ($hb=0x21; $hb<0x7f; $hb++) { | |
969 | print (($hb > 0x21) ? ",\n " : " "); | |
970 | if ($j2uout{$hb} > 0) { | |
971 | if (!$compat_1978 || ($compat_1978 && $j2u_1978{$hb})) { | |
972 | printf "${name}_to_uni_tbl_%02x", $hb; | |
973 | } elsif ($compat_1978) { | |
974 | printf "jisx0208_to_uni_tbl_%02x", $hb; | |
975 | } | |
976 | }else { | |
977 | print "NULL"; | |
978 | } | |
979 | } | |
980 | ||
981 | print "\n};\n"; | |
982 | print "\n\n"; | |
983 | } | |
984 | ||
985 | sub u2j_map { | |
986 | local ($name, $setname, $compat_1978, %u2j) = @_; | |
987 | local %u2jout; | |
988 | ||
989 | print "/* map : Unicode to $setname */\n"; | |
990 | for ($hb=0x00; $hb<=0xff; $hb++) { | |
991 | $items = 0; | |
992 | for ($lb=0x0; $lb<=0xff; $lb++) { | |
993 | $items++ if ($u2j{$hb*256 + $lb} > 0); | |
994 | } | |
995 | if ($items > 0) { | |
996 | $items = 0; | |
997 | if (!$compat_1978 || ($compat_1978 && $u2j_1978{$hb})) { | |
998 | printf "static const unsigned uni_to_${name}_tbl_%02x[] = {", $hb; | |
999 | for ($lb = 0x00; $lb <= 0xff; $lb++) { | |
1000 | $real = $hb * 256 + $lb; | |
1001 | print ", " if ($items > 0); | |
1002 | print "\n " if ($items % $perline == 0); | |
1003 | $u2j{$real} = 0x003f if ($u2j{$real} == 0); | |
1004 | printf("0x%04X", $u2j{$real}); | |
1005 | $items++; | |
1006 | } | |
1007 | print "\n};\n"; | |
1008 | } | |
1009 | $u2jout{$hb} = 1; | |
1010 | } | |
1011 | } | |
1012 | ||
1013 | print "const unsigned * uni_to_${name}_tbls[] = {\n"; | |
1014 | for ($hb = 0x00; $hb <= 0xff; $hb++) { | |
1015 | print (($hb > 0x00) ? ",\n " : " "); | |
1016 | if ($u2jout{$hb} > 0) { | |
1017 | if (!$compat_1978 || ($compat_1978 && $u2j_1978{$hb})) { | |
1018 | printf "uni_to_${name}_tbl_%02x", $hb; | |
1019 | } elsif ($compat_1978) { | |
1020 | printf "uni_to_jisx0208_tbl_%02x", $hb | |
1021 | } | |
1022 | } else { | |
1023 | print "NULL"; | |
1024 | } | |
1025 | } | |
1026 | print "\n};\n"; | |
1027 | } | |
1028 | ||
1029 | print "#endif /* _ISO2022JP_HDR_ */\n"; | |
1030 | ||
1031 | __END__ | |
1032 |