Commit | Line | Data |
---|---|---|
b0322a85 CE |
1 | #! /usr/bin/perl |
2 | # | |
3 | # Compile GraphemeBreak.txt into C array declarations. | |
4 | # | |
5 | # The array's structure is [firstchar, lastchar, class], giving the | |
6 | # linebreaking "class" for unicode character range firstchar-lastchar. | |
7 | # | |
8 | # The ranges are sorted in numerical order. | |
9 | # | |
10 | # An array gets generated for each block of 4096 unicode characters. | |
11 | # | |
12 | # Finally, two arrays get declared: a pointer to an array for each 4096 | |
13 | # unicode character block, and the number of elements in the array. | |
14 | # | |
15 | # The pointer is NULL for each block of 4096 unicode characters that is not | |
16 | # defined in LineBreak.txt | |
17 | # | |
18 | # By definition, a unicode character that is not listed in the array is | |
19 | # class XX. | |
20 | ||
21 | use strict; | |
22 | use warnings; | |
23 | use mkcommon; | |
24 | ||
25 | open(F, "<GraphemeBreakProperty.txt") || die; | |
26 | ||
27 | # Read LineBreak.txt | |
28 | # | |
29 | # Invoke doclass() passing the first unicode char, the last unicode char, | |
30 | # and the linebreaking class for the given unicode char range. | |
31 | ||
32 | my @breaklist; | |
33 | ||
34 | while (defined($_=<F>)) | |
35 | { | |
36 | chomp; | |
37 | ||
38 | next unless /^([0-9A-F]+)(\.\.([0-9A-F]+))?\s*\;\s*([^\s]+)\s*/; | |
39 | ||
40 | my $f=$1; | |
41 | my $l=$3; | |
42 | my $t=$4; | |
43 | ||
44 | $l=$f unless $l; | |
45 | ||
46 | eval "\$f=0x$f"; | |
47 | eval "\$l=0x$l"; | |
48 | ||
49 | push @breaklist, [$f, $l, $t]; | |
50 | } | |
51 | ||
52 | ||
53 | my $obj=mkcommon->new; | |
54 | ||
55 | foreach (sort { $$a[0] <=> $$b[0] } @breaklist) | |
56 | { | |
57 | $obj->range($$_[0], $$_[1], "UNICODE_GRAPHEMEBREAK_$$_[2]"); | |
58 | } | |
59 | ||
60 | $obj->output; |