Commit | Line | Data |
---|---|---|
8d138742 CE |
1 | # Copyright 2000-2004 Double Precision, Inc. |
2 | # See COPYING for distribution information. | |
3 | # | |
8d138742 CE |
4 | # |
5 | # Generate unicode upper/lower/titlecase translations. | |
6 | ||
7 | print '/* | |
8 | ** Copyright 2000-2004 Double Precision, Inc. | |
9 | ** See COPYING for distribution information. | |
10 | ** | |
8d138742 CE |
11 | */ |
12 | ||
13 | #include "unicode.h" | |
14 | ||
15 | '; | |
16 | ||
17 | my $set=shift; | |
18 | ||
19 | open (U, "UnicodeData.txt") || die "$!\n"; | |
20 | ||
21 | while (<U>) | |
22 | { | |
23 | chomp; | |
24 | ||
25 | my @fields= split /;/; | |
26 | ||
27 | my ($code, $uc, $lc, $tc); | |
28 | ||
29 | $code="0x$fields[0]"; | |
30 | eval "\$code=$code;"; | |
31 | ||
32 | $uc=$fields[12]; | |
33 | if ($uc ne "") | |
34 | { | |
35 | eval "\$uc=0x$uc;"; | |
36 | $UC{$code}=$uc; | |
37 | $FLAG{$code}=1; | |
38 | } | |
39 | ||
40 | $lc=$fields[13]; | |
41 | if ($lc ne "") | |
42 | { | |
43 | eval "\$lc=0x$lc;"; | |
44 | $LC{$code}=$lc; | |
45 | $FLAG{$code}=1; | |
46 | } | |
47 | ||
48 | $tc=$fields[14]; | |
49 | if ($tc ne "") | |
50 | { | |
51 | eval "\$tc=0x$tc;"; | |
52 | $TC{$code}=$tc; | |
53 | $FLAG{$code}=1; | |
54 | } | |
55 | } | |
56 | ||
57 | close(U); | |
58 | ||
b0322a85 | 59 | my $tabsize=2048; |
8d138742 CE |
60 | |
61 | grep ($bucket[ $_ % $tabsize ] .= "$_\n", keys %FLAG); | |
62 | ||
63 | my $maxcnt=0; | |
64 | ||
65 | for ($i=0; $i < $tabsize; $i++) | |
66 | { | |
67 | my $cnt=0; | |
68 | ||
69 | grep ( ++$cnt, split (/\n/, $bucket[$i])); | |
70 | ||
71 | $maxcnt=$cnt if $cnt > $maxcnt; | |
72 | } | |
73 | ||
74 | print "const unsigned unicode_case_hash=$tabsize;\n"; | |
75 | print "/* unicode_case_maxbucket=" | |
76 | . ($maxcnt+2) . "*/\n"; | |
77 | ||
78 | print "const unicode_char unicode_case_tab[][4]={\n"; | |
79 | ||
80 | my $idx=0; | |
81 | ||
82 | for ($i=0; $i<$tabsize; $i++) | |
83 | { | |
84 | $offset[$i]=$idx; | |
85 | ||
86 | grep { | |
87 | my $j=$_; | |
88 | my $u=$UC{$j}+0; | |
89 | my $l=$LC{$j}+0; | |
90 | my $t=$TC{$j}+0; | |
91 | ||
92 | if ($u || $l || $t) | |
93 | { | |
94 | $u=$j unless $u; | |
95 | $l=$j unless $l; | |
96 | $t=$u unless $t; | |
97 | ||
98 | printf("{0x%04x,0x%04x,0x%04x,0x%04x},",$j,$u,$l,$t); | |
99 | print "\n" if ($idx % 4) == 3; | |
100 | ++$idx; | |
101 | } | |
102 | } split(/\n/, $bucket[$i]); | |
103 | } | |
104 | print "{0,0,0,0}}; | |
105 | ||
106 | const unsigned unicode_case_offset[$tabsize]={ | |
107 | "; | |
108 | ||
109 | for ($i=0; $i<$tabsize;$i++) | |
110 | { | |
111 | printf("%4d", $offset[$i]); | |
112 | print "," if $i < $tabsize-1; | |
113 | ||
114 | print "\n" if ($i % 16) == 15; | |
115 | } | |
116 | print "};\n"; |