Commit | Line | Data |
---|---|---|
b0322a85 CE |
1 | #! /usr/bin/perl |
2 | # | |
3 | # Compile WordBreak.txt into C array declarations. | |
4 | # | |
5 | # The array's structure is [firstchar, lastchar, class], giving the | |
6 | # linebreaking "class" for unicode character range firstchar-lastchar. | |
7 | # | |
8 | # The ranges are sorted in numerical order. | |
9 | # | |
10 | # An array gets generated for each block of 4096 unicode characters. | |
11 | # | |
12 | # Finally, two arrays get declared: a pointer to an array for each 4096 | |
13 | # unicode character block, and the number of elements in the array. | |
14 | # | |
15 | # The pointer is NULL for each block of 4096 unicode characters that is not | |
16 | # defined in WordBreak.txt | |
17 | ||
18 | use strict; | |
19 | use warnings; | |
20 | use mkcommon; | |
21 | ||
22 | my $obj=mkcommon->new; | |
23 | ||
24 | open(F, "<WordBreakProperty.txt") || die; | |
25 | ||
26 | my @table; | |
27 | ||
28 | while (defined($_=<F>)) | |
29 | { | |
30 | chomp; | |
31 | ||
32 | next unless /^([0-9A-F]+)(\.\.([0-9A-F]+))?\s*\;\s*([^\s]+)\s*/; | |
33 | ||
34 | my $f=$1; | |
35 | my $l=$3; | |
36 | my $t=$4; | |
37 | ||
38 | $l=$f unless $l; | |
39 | ||
40 | eval "\$f=0x$f"; | |
41 | eval "\$l=0x$l"; | |
42 | ||
43 | push @table, [$f, $l, $t]; | |
44 | } | |
45 | ||
46 | grep { | |
47 | ||
48 | $obj->range($$_[0], $$_[1], "UNICODE_WB_$$_[2]"); | |
49 | ||
50 | } sort { $$a[0] <=> $$b[0] } @table; | |
51 | ||
52 | $obj->output; |