Commit | Line | Data |
---|---|---|
b0322a85 CE |
1 | #! /usr/bin/perl |
2 | # | |
3 | # Compile LineBreak.txt into C array declarations. | |
4 | # | |
5 | # The array's structure is [firstchar, lastchar, class], giving the | |
6 | # linebreaking "class" for unicode character range firstchar-lastchar. | |
7 | # | |
8 | # The ranges are sorted in numerical order. | |
9 | # | |
10 | # An array gets generated for each block of 4096 unicode characters. | |
11 | # | |
12 | # Finally, two arrays get declared: a pointer to an array for each 4096 | |
13 | # unicode character block, and the number of elements in the array. | |
14 | # | |
15 | # The pointer is NULL for each block of 4096 unicode characters that is not | |
16 | # defined in LineBreak.txt | |
17 | # | |
18 | # By definition, a unicode character that is not listed in the array is | |
19 | # class XX. | |
20 | ||
21 | use strict; | |
22 | use warnings; | |
23 | use mkcommon; | |
24 | ||
25 | my %general_category; | |
26 | ||
27 | open(UC, "<UnicodeData.txt") || die; | |
28 | ||
29 | while (defined($_=<UC>)) | |
30 | { | |
31 | chomp; | |
32 | ||
33 | my @f=split(/;/); | |
34 | ||
35 | my $cp; | |
36 | ||
37 | eval "\$cp=0x$f[0]"; | |
38 | ||
39 | $general_category{$cp}=$f[2]; | |
40 | } | |
41 | ||
42 | my $obj=mkcommon->new; | |
43 | ||
44 | open(F, "<LineBreak.txt") || die; | |
45 | ||
46 | while (defined($_=<F>)) | |
47 | { | |
48 | chomp; | |
49 | ||
50 | next unless /^([0-9A-F]+)(\.\.([0-9A-F]+))?\;([^\s][^\s])\s*/; | |
51 | ||
52 | my $f=$1; | |
53 | my $l=$3; | |
54 | my $t=$4; | |
55 | ||
56 | $l=$f unless $l; | |
57 | ||
58 | eval "\$f=0x$f"; | |
59 | eval "\$l=0x$l"; | |
60 | ||
61 | next if $t eq "XX"; | |
62 | ||
63 | if ($t eq "SA") | |
64 | { | |
65 | while ($f <= $l) | |
66 | { | |
67 | die "Cannot find general_category for $f\n" | |
68 | unless exists $general_category{$f}; | |
69 | ||
70 | $obj->range($f, $f, | |
71 | $general_category{$f} eq "Mn" || | |
72 | $general_category{$f} eq "Mc" ? | |
73 | "UNICODE_LB_CM":"UNICODE_LB_AL"); | |
74 | # LB1 rule | |
75 | ++$f; | |
76 | } | |
77 | } | |
78 | else | |
79 | { | |
80 | $t="AL" if $t eq "AI" || $t eq "SG"; # LB1 rule | |
81 | ||
82 | $obj->range($f, $l, "UNICODE_LB_$t"); | |
83 | } | |
84 | } | |
85 | ||
86 | $obj->output; |