| 1 | #! /usr/bin/perl |
| 2 | # |
| 3 | # Compile WordBreak.txt into C array declarations. |
| 4 | # |
| 5 | # The array's structure is [firstchar, lastchar, class], giving the |
| 6 | # linebreaking "class" for unicode character range firstchar-lastchar. |
| 7 | # |
| 8 | # The ranges are sorted in numerical order. |
| 9 | # |
| 10 | # An array gets generated for each block of 4096 unicode characters. |
| 11 | # |
| 12 | # Finally, two arrays get declared: a pointer to an array for each 4096 |
| 13 | # unicode character block, and the number of elements in the array. |
| 14 | # |
| 15 | # The pointer is NULL for each block of 4096 unicode characters that is not |
| 16 | # defined in WordBreak.txt |
| 17 | |
| 18 | use strict; |
| 19 | use warnings; |
| 20 | use mkcommon; |
| 21 | |
| 22 | my $obj=mkcommon->new; |
| 23 | |
| 24 | open(F, "<WordBreakProperty.txt") || die; |
| 25 | |
| 26 | my @table; |
| 27 | |
| 28 | while (defined($_=<F>)) |
| 29 | { |
| 30 | chomp; |
| 31 | |
| 32 | next unless /^([0-9A-F]+)(\.\.([0-9A-F]+))?\s*\;\s*([^\s]+)\s*/; |
| 33 | |
| 34 | my $f=$1; |
| 35 | my $l=$3; |
| 36 | my $t=$4; |
| 37 | |
| 38 | $l=$f unless $l; |
| 39 | |
| 40 | eval "\$f=0x$f"; |
| 41 | eval "\$l=0x$l"; |
| 42 | |
| 43 | push @table, [$f, $l, $t]; |
| 44 | } |
| 45 | |
| 46 | grep { |
| 47 | |
| 48 | $obj->range($$_[0], $$_[1], "UNICODE_WB_$$_[2]"); |
| 49 | |
| 50 | } sort { $$a[0] <=> $$b[0] } @table; |
| 51 | |
| 52 | $obj->output; |