Imported Debian patch 0.66.1-1
[hcoop/debian/courier-authlib.git] / libs / unicode / mkgraphemebreak.pl
CommitLineData
b0322a85
CE
1#! /usr/bin/perl
2#
3# Compile GraphemeBreak.txt into C array declarations.
4#
5# The array's structure is [firstchar, lastchar, class], giving the
6# linebreaking "class" for unicode character range firstchar-lastchar.
7#
8# The ranges are sorted in numerical order.
9#
10# An array gets generated for each block of 4096 unicode characters.
11#
12# Finally, two arrays get declared: a pointer to an array for each 4096
13# unicode character block, and the number of elements in the array.
14#
15# The pointer is NULL for each block of 4096 unicode characters that is not
16# defined in LineBreak.txt
17#
18# By definition, a unicode character that is not listed in the array is
19# class XX.
20
21use strict;
22use warnings;
23use mkcommon;
24
25open(F, "<GraphemeBreakProperty.txt") || die;
26
27# Read LineBreak.txt
28#
29# Invoke doclass() passing the first unicode char, the last unicode char,
30# and the linebreaking class for the given unicode char range.
31
32my @breaklist;
33
34while (defined($_=<F>))
35{
36 chomp;
37
38 next unless /^([0-9A-F]+)(\.\.([0-9A-F]+))?\s*\;\s*([^\s]+)\s*/;
39
40 my $f=$1;
41 my $l=$3;
42 my $t=$4;
43
44 $l=$f unless $l;
45
46 eval "\$f=0x$f";
47 eval "\$l=0x$l";
48
49 push @breaklist, [$f, $l, $t];
50}
51
52
53my $obj=mkcommon->new;
54
55foreach (sort { $$a[0] <=> $$b[0] } @breaklist)
56{
57 $obj->range($$_[0], $$_[1], "UNICODE_GRAPHEMEBREAK_$$_[2]");
58}
59
60$obj->output;