Merge branch 'debian'
[hcoop/debian/courier-authlib.git] / libs / unicode / mkwordbreak.pl
CommitLineData
b0322a85
CE
1#! /usr/bin/perl
2#
3# Compile WordBreak.txt into C array declarations.
4#
5# The array's structure is [firstchar, lastchar, class], giving the
6# linebreaking "class" for unicode character range firstchar-lastchar.
7#
8# The ranges are sorted in numerical order.
9#
10# An array gets generated for each block of 4096 unicode characters.
11#
12# Finally, two arrays get declared: a pointer to an array for each 4096
13# unicode character block, and the number of elements in the array.
14#
15# The pointer is NULL for each block of 4096 unicode characters that is not
16# defined in WordBreak.txt
17
18use strict;
19use warnings;
20use mkcommon;
21
22my $obj=mkcommon->new;
23
24open(F, "<WordBreakProperty.txt") || die;
25
26my @table;
27
28while (defined($_=<F>))
29{
30 chomp;
31
32 next unless /^([0-9A-F]+)(\.\.([0-9A-F]+))?\s*\;\s*([^\s]+)\s*/;
33
34 my $f=$1;
35 my $l=$3;
36 my $t=$4;
37
38 $l=$f unless $l;
39
40 eval "\$f=0x$f";
41 eval "\$l=0x$l";
42
43 push @table, [$f, $l, $t];
44}
45
46grep {
47
48 $obj->range($$_[0], $$_[1], "UNICODE_WB_$$_[2]");
49
50} sort { $$a[0] <=> $$b[0] } @table;
51
52$obj->output;