Imported Upstream version 0.66.1
[hcoop/debian/courier-authlib.git] / libs / unicode / mkcommon.pm
1 package mkcommon;
2
3 use 5.012002;
4 use strict;
5 use warnings;
6
7 require Exporter;
8
9 our @ISA = qw(Exporter);
10
11 # Items to export into callers namespace by default. Note: do not export
12 # names by default without a very good reason. Use EXPORT_OK instead.
13 # Do not simply export all your public functions/methods/constants.
14
15 # This allows declaration use mkcommon ':all';
16 # If you do not need this, moving things directly into @EXPORT or @EXPORT_OK
17 # will save memory.
18 our %EXPORT_TAGS = ( 'all' => [ qw(
19
20 ) ] );
21
22 our @EXPORT_OK = ( @{ $EXPORT_TAGS{'all'} } );
23
24 our @EXPORT = qw(
25
26 );
27
28 our $VERSION = '0.01';
29
30 my $BLOCK_SIZE=256;
31
32 # Preloaded methods go here.
33
34 sub new {
35
36 my $this=shift;
37
38 my $class = ref($this) || $this;
39 my $self = {};
40 bless $self, $class;
41
42 $$self{'char_array'}=[];
43 $$self{'char_class'}=[];
44 $$self{'char_start'}=[0];
45
46 $$self{'last_block'}=-1;
47 $$self{'last'}="";
48 $$self{'last_f'}=0;
49 $$self{'last_l'}=0;
50
51 return $self;
52 }
53
54 sub _doemit_block {
55 my $this=shift;
56
57 my $f=shift;
58 my $l=shift;
59
60 push @{$$this{'char_array'}}, [$f, $l];
61 push @{$$this{'char_class'}}, $$this{'last'};
62 }
63
64 sub _doemit_endblock {
65
66 my $this=shift;
67
68 push @{$$this{'char_start'}}, $#{$$this{'char_array'}}+1;
69 }
70
71 # _doemit invokes _doemit_block() for each unicode char range with a given
72 # linebreaking class. However, once a unicode char range starts in a different
73 # $BLOCK_SIZE character class, call _doemit_endblock() before calling _doemit_block().
74 #
75 # If a single unicode char range crosses a $BLOCK_SIZE character class boundary,
76 # split it at the boundary; call _doemit_endblock() to finish the current $BLOCK_SIZE
77 # char boundary, call _doemit_endblock(), then call _doemit_block() for the
78 # rest of the char range.
79
80
81 sub _doemit {
82
83 my $this=shift;
84
85 $this->_doemit_endblock()
86 if int($$this{'last_f'} / $BLOCK_SIZE)
87 != $$this{'last_block'} && $$this{'last_block'} != -1;
88
89 if (int($$this{'last_f'} / $BLOCK_SIZE) != int($$this{'last_l'} / $BLOCK_SIZE))
90 {
91 while (int($$this{'last_f'} / $BLOCK_SIZE) != int($$this{'last_l'} / $BLOCK_SIZE))
92 {
93 my $n=int($$this{'last_f'} / $BLOCK_SIZE) * $BLOCK_SIZE + ($BLOCK_SIZE-1);
94
95 $this->_doemit_block($$this{'last_f'}, $n);
96 $this->_doemit_endblock();
97 $$this{'last_f'}=$n+1;
98 }
99 }
100 $this->_doemit_block($$this{'last_f'}, $$this{'last_l'});
101
102 $$this{'last_block'}=int($$this{'last_l'} / $BLOCK_SIZE);
103 }
104
105 #
106 # Coalesce adjacent unicode char blocks that have the same linebreaking
107 # property. Invoke _doemit() for the accumulate unicode char range once
108 # a range with a different linebreaking class is seen.
109
110 sub range {
111
112 my $this=shift;
113
114 my $f=shift;
115 my $l=shift;
116 my $t=shift;
117
118 if ($$this{'last_l'} + 1 == $f && $$this{'last'} eq $t)
119 {
120 $$this{'last_l'}=$l;
121 return;
122 }
123
124 $this->_doemit() if $$this{'last'}; # New linebreaking class
125
126 $$this{'last_f'}=$f;
127 $$this{'last_l'}=$l;
128 $$this{'last'}=$t;
129 }
130
131 sub output {
132 my $this=shift;
133
134 $this->_doemit(); # Emit last linebreaking unicode char range class
135
136 $this->_doemit_endblock(); # End of the most recent $BLOCK_SIZE char range class
137
138 print "static const uint8_t unicode_rangetab[][2]={\n";
139
140 my $comma="\t";
141
142 my $modulo=sprintf("0x%X", $BLOCK_SIZE-1);
143
144 foreach ( @{$$this{'char_array'}} )
145 {
146 print "${comma}{0x" . sprintf("%04x", $$_[0]) . " & $modulo, 0x"
147 . sprintf("%04x", $$_[1]) . " & $modulo}";
148 $comma=",\n\t";
149 }
150
151 print "};\n\n";
152
153 print "static const uint8_t unicode_classtab[]={\n";
154
155 $comma="\t";
156 foreach ( @{$$this{'char_class'}} )
157 {
158 print "${comma}$_";
159 $comma=",\n\t";
160 }
161
162 print "};\n\n";
163
164 print "static const size_t unicode_indextab[]={\n";
165
166 $comma="\t";
167
168 my $prev_block=-1;
169 foreach (@{$$this{'char_start'}})
170 {
171 my $sp=$_;
172 my $cnt=1;
173
174 if ($sp <= $#{$$this{'char_array'}})
175 {
176 my $block=int($$this{'char_array'}->[$sp]->[0] / $BLOCK_SIZE);
177
178 $cnt = $block - $prev_block;
179 $prev_block=$block;
180 }
181
182 foreach (1..$cnt)
183 {
184 print "$comma$sp";
185 $comma=",\n\t";
186 }
187 }
188
189 print "};\n\n";
190 }
191
192 1;