Imported Upstream version 0.66.1
[hcoop/debian/courier-authlib.git] / libs / unicode / mklinebreak.pl
1 #! /usr/bin/perl
2 #
3 # Compile LineBreak.txt into C array declarations.
4 #
5 # The array's structure is [firstchar, lastchar, class], giving the
6 # linebreaking "class" for unicode character range firstchar-lastchar.
7 #
8 # The ranges are sorted in numerical order.
9 #
10 # An array gets generated for each block of 4096 unicode characters.
11 #
12 # Finally, two arrays get declared: a pointer to an array for each 4096
13 # unicode character block, and the number of elements in the array.
14 #
15 # The pointer is NULL for each block of 4096 unicode characters that is not
16 # defined in LineBreak.txt
17 #
18 # By definition, a unicode character that is not listed in the array is
19 # class XX.
20
21 use strict;
22 use warnings;
23 use mkcommon;
24
25 my %general_category;
26
27 open(UC, "<UnicodeData.txt") || die;
28
29 while (defined($_=<UC>))
30 {
31 chomp;
32
33 my @f=split(/;/);
34
35 my $cp;
36
37 eval "\$cp=0x$f[0]";
38
39 $general_category{$cp}=$f[2];
40 }
41
42 my $obj=mkcommon->new;
43
44 open(F, "<LineBreak.txt") || die;
45
46 while (defined($_=<F>))
47 {
48 chomp;
49
50 next unless /^([0-9A-F]+)(\.\.([0-9A-F]+))?\;([^\s][^\s])\s*/;
51
52 my $f=$1;
53 my $l=$3;
54 my $t=$4;
55
56 $l=$f unless $l;
57
58 eval "\$f=0x$f";
59 eval "\$l=0x$l";
60
61 next if $t eq "XX";
62
63 if ($t eq "SA")
64 {
65 while ($f <= $l)
66 {
67 die "Cannot find general_category for $f\n"
68 unless exists $general_category{$f};
69
70 $obj->range($f, $f,
71 $general_category{$f} eq "Mn" ||
72 $general_category{$f} eq "Mc" ?
73 "UNICODE_LB_CM":"UNICODE_LB_AL");
74 # LB1 rule
75 ++$f;
76 }
77 }
78 else
79 {
80 $t="AL" if $t eq "AI" || $t eq "SG"; # LB1 rule
81
82 $obj->range($f, $l, "UNICODE_LB_$t");
83 }
84 }
85
86 $obj->output;