Imported Upstream version 0.66.1
[hcoop/debian/courier-authlib.git] / libs / unicode / mkgraphemebreak.pl
1 #! /usr/bin/perl
2 #
3 # Compile GraphemeBreak.txt into C array declarations.
4 #
5 # The array's structure is [firstchar, lastchar, class], giving the
6 # linebreaking "class" for unicode character range firstchar-lastchar.
7 #
8 # The ranges are sorted in numerical order.
9 #
10 # An array gets generated for each block of 4096 unicode characters.
11 #
12 # Finally, two arrays get declared: a pointer to an array for each 4096
13 # unicode character block, and the number of elements in the array.
14 #
15 # The pointer is NULL for each block of 4096 unicode characters that is not
16 # defined in LineBreak.txt
17 #
18 # By definition, a unicode character that is not listed in the array is
19 # class XX.
20
21 use strict;
22 use warnings;
23 use mkcommon;
24
25 open(F, "<GraphemeBreakProperty.txt") || die;
26
27 # Read LineBreak.txt
28 #
29 # Invoke doclass() passing the first unicode char, the last unicode char,
30 # and the linebreaking class for the given unicode char range.
31
32 my @breaklist;
33
34 while (defined($_=<F>))
35 {
36 chomp;
37
38 next unless /^([0-9A-F]+)(\.\.([0-9A-F]+))?\s*\;\s*([^\s]+)\s*/;
39
40 my $f=$1;
41 my $l=$3;
42 my $t=$4;
43
44 $l=$f unless $l;
45
46 eval "\$f=0x$f";
47 eval "\$l=0x$l";
48
49 push @breaklist, [$f, $l, $t];
50 }
51
52
53 my $obj=mkcommon->new;
54
55 foreach (sort { $$a[0] <=> $$b[0] } @breaklist)
56 {
57 $obj->range($$_[0], $$_[1], "UNICODE_GRAPHEMEBREAK_$$_[2]");
58 }
59
60 $obj->output;