Imported Upstream version 0.66.1
[hcoop/debian/courier-authlib.git] / libs / unicode / mkwordbreak.pl
1 #! /usr/bin/perl
2 #
3 # Compile WordBreak.txt into C array declarations.
4 #
5 # The array's structure is [firstchar, lastchar, class], giving the
6 # linebreaking "class" for unicode character range firstchar-lastchar.
7 #
8 # The ranges are sorted in numerical order.
9 #
10 # An array gets generated for each block of 4096 unicode characters.
11 #
12 # Finally, two arrays get declared: a pointer to an array for each 4096
13 # unicode character block, and the number of elements in the array.
14 #
15 # The pointer is NULL for each block of 4096 unicode characters that is not
16 # defined in WordBreak.txt
17
18 use strict;
19 use warnings;
20 use mkcommon;
21
22 my $obj=mkcommon->new;
23
24 open(F, "<WordBreakProperty.txt") || die;
25
26 my @table;
27
28 while (defined($_=<F>))
29 {
30 chomp;
31
32 next unless /^([0-9A-F]+)(\.\.([0-9A-F]+))?\s*\;\s*([^\s]+)\s*/;
33
34 my $f=$1;
35 my $l=$3;
36 my $t=$4;
37
38 $l=$f unless $l;
39
40 eval "\$f=0x$f";
41 eval "\$l=0x$l";
42
43 push @table, [$f, $l, $t];
44 }
45
46 grep {
47
48 $obj->range($$_[0], $$_[1], "UNICODE_WB_$$_[2]");
49
50 } sort { $$a[0] <=> $$b[0] } @table;
51
52 $obj->output;