3 # Compile LineBreak.txt into C array declarations.
5 # The array's structure is [firstchar, lastchar, class], giving the
6 # linebreaking "class" for unicode character range firstchar-lastchar.
8 # The ranges are sorted in numerical order.
10 # An array gets generated for each block of 4096 unicode characters.
12 # Finally, two arrays get declared: a pointer to an array for each 4096
13 # unicode character block, and the number of elements in the array.
15 # The pointer is NULL for each block of 4096 unicode characters that is not
16 # defined in LineBreak.txt
18 # By definition, a unicode character that is not listed in the array is
27 open(UC
, "<UnicodeData.txt") || die;
29 while (defined($_=<UC
>))
39 $general_category{$cp}=$f[2];
42 my $obj=mkcommon
->new;
44 open(F
, "<LineBreak.txt") || die;
46 while (defined($_=<F
>))
50 next unless /^([0-9A-F]+)(\.\.([0-9A-F]+))?\;([^\s][^\s])\s*/;
67 die "Cannot find general_category for $f\n"
68 unless exists $general_category{$f};
71 $general_category{$f} eq "Mn" ||
72 $general_category{$f} eq "Mc" ?
73 "UNICODE_LB_CM":"UNICODE_LB_AL");
80 $t="AL" if $t eq "AI" || $t eq "SG"; # LB1 rule
82 $obj->range($f, $l, "UNICODE_LB_$t");