Commit | Line | Data |
---|---|---|
8d138742 CE |
1 | # Copyright 2000-2004 Double Precision, Inc. |
2 | # See COPYING for distribution information. | |
3 | # | |
4 | # $Id: mkultcase.pl,v 1.3 2004/02/01 06:09:09 mrsam Exp $ | |
5 | # | |
6 | # Generate unicode upper/lower/titlecase translations. | |
7 | ||
8 | print '/* | |
9 | ** Copyright 2000-2004 Double Precision, Inc. | |
10 | ** See COPYING for distribution information. | |
11 | ** | |
12 | ** $Id: mkultcase.pl,v 1.3 2004/02/01 06:09:09 mrsam Exp $ | |
13 | */ | |
14 | ||
15 | #include "unicode.h" | |
16 | ||
17 | '; | |
18 | ||
19 | my $set=shift; | |
20 | ||
21 | open (U, "UnicodeData.txt") || die "$!\n"; | |
22 | ||
23 | while (<U>) | |
24 | { | |
25 | chomp; | |
26 | ||
27 | my @fields= split /;/; | |
28 | ||
29 | my ($code, $uc, $lc, $tc); | |
30 | ||
31 | $code="0x$fields[0]"; | |
32 | eval "\$code=$code;"; | |
33 | ||
34 | $uc=$fields[12]; | |
35 | if ($uc ne "") | |
36 | { | |
37 | eval "\$uc=0x$uc;"; | |
38 | $UC{$code}=$uc; | |
39 | $FLAG{$code}=1; | |
40 | } | |
41 | ||
42 | $lc=$fields[13]; | |
43 | if ($lc ne "") | |
44 | { | |
45 | eval "\$lc=0x$lc;"; | |
46 | $LC{$code}=$lc; | |
47 | $FLAG{$code}=1; | |
48 | } | |
49 | ||
50 | $tc=$fields[14]; | |
51 | if ($tc ne "") | |
52 | { | |
53 | eval "\$tc=0x$tc;"; | |
54 | $TC{$code}=$tc; | |
55 | $FLAG{$code}=1; | |
56 | } | |
57 | } | |
58 | ||
59 | close(U); | |
60 | ||
61 | my $tabsize=1024; | |
62 | ||
63 | grep ($bucket[ $_ % $tabsize ] .= "$_\n", keys %FLAG); | |
64 | ||
65 | my $maxcnt=0; | |
66 | ||
67 | for ($i=0; $i < $tabsize; $i++) | |
68 | { | |
69 | my $cnt=0; | |
70 | ||
71 | grep ( ++$cnt, split (/\n/, $bucket[$i])); | |
72 | ||
73 | $maxcnt=$cnt if $cnt > $maxcnt; | |
74 | } | |
75 | ||
76 | print "const unsigned unicode_case_hash=$tabsize;\n"; | |
77 | print "/* unicode_case_maxbucket=" | |
78 | . ($maxcnt+2) . "*/\n"; | |
79 | ||
80 | print "const unicode_char unicode_case_tab[][4]={\n"; | |
81 | ||
82 | my $idx=0; | |
83 | ||
84 | for ($i=0; $i<$tabsize; $i++) | |
85 | { | |
86 | $offset[$i]=$idx; | |
87 | ||
88 | grep { | |
89 | my $j=$_; | |
90 | my $u=$UC{$j}+0; | |
91 | my $l=$LC{$j}+0; | |
92 | my $t=$TC{$j}+0; | |
93 | ||
94 | if ($u || $l || $t) | |
95 | { | |
96 | $u=$j unless $u; | |
97 | $l=$j unless $l; | |
98 | $t=$u unless $t; | |
99 | ||
100 | printf("{0x%04x,0x%04x,0x%04x,0x%04x},",$j,$u,$l,$t); | |
101 | print "\n" if ($idx % 4) == 3; | |
102 | ++$idx; | |
103 | } | |
104 | } split(/\n/, $bucket[$i]); | |
105 | } | |
106 | print "{0,0,0,0}}; | |
107 | ||
108 | const unsigned unicode_case_offset[$tabsize]={ | |
109 | "; | |
110 | ||
111 | for ($i=0; $i<$tabsize;$i++) | |
112 | { | |
113 | printf("%4d", $offset[$i]); | |
114 | print "," if $i < $tabsize-1; | |
115 | ||
116 | print "\n" if ($i % 16) == 15; | |
117 | } | |
118 | print "};\n"; |