Commit | Line | Data |
---|---|---|
b0322a85 CE |
1 | package mkcommon; |
2 | ||
3 | use 5.012002; | |
4 | use strict; | |
5 | use warnings; | |
6 | ||
7 | require Exporter; | |
8 | ||
9 | our @ISA = qw(Exporter); | |
10 | ||
11 | # Items to export into callers namespace by default. Note: do not export | |
12 | # names by default without a very good reason. Use EXPORT_OK instead. | |
13 | # Do not simply export all your public functions/methods/constants. | |
14 | ||
15 | # This allows declaration use mkcommon ':all'; | |
16 | # If you do not need this, moving things directly into @EXPORT or @EXPORT_OK | |
17 | # will save memory. | |
18 | our %EXPORT_TAGS = ( 'all' => [ qw( | |
19 | ||
20 | ) ] ); | |
21 | ||
22 | our @EXPORT_OK = ( @{ $EXPORT_TAGS{'all'} } ); | |
23 | ||
24 | our @EXPORT = qw( | |
25 | ||
26 | ); | |
27 | ||
28 | our $VERSION = '0.01'; | |
29 | ||
30 | my $BLOCK_SIZE=256; | |
31 | ||
32 | # Preloaded methods go here. | |
33 | ||
34 | sub new { | |
35 | ||
36 | my $this=shift; | |
37 | ||
38 | my $class = ref($this) || $this; | |
39 | my $self = {}; | |
40 | bless $self, $class; | |
41 | ||
42 | $$self{'char_array'}=[]; | |
43 | $$self{'char_class'}=[]; | |
44 | $$self{'char_start'}=[0]; | |
45 | ||
46 | $$self{'last_block'}=-1; | |
47 | $$self{'last'}=""; | |
48 | $$self{'last_f'}=0; | |
49 | $$self{'last_l'}=0; | |
50 | ||
51 | return $self; | |
52 | } | |
53 | ||
54 | sub _doemit_block { | |
55 | my $this=shift; | |
56 | ||
57 | my $f=shift; | |
58 | my $l=shift; | |
59 | ||
60 | push @{$$this{'char_array'}}, [$f, $l]; | |
61 | push @{$$this{'char_class'}}, $$this{'last'}; | |
62 | } | |
63 | ||
64 | sub _doemit_endblock { | |
65 | ||
66 | my $this=shift; | |
67 | ||
68 | push @{$$this{'char_start'}}, $#{$$this{'char_array'}}+1; | |
69 | } | |
70 | ||
71 | # _doemit invokes _doemit_block() for each unicode char range with a given | |
72 | # linebreaking class. However, once a unicode char range starts in a different | |
73 | # $BLOCK_SIZE character class, call _doemit_endblock() before calling _doemit_block(). | |
74 | # | |
75 | # If a single unicode char range crosses a $BLOCK_SIZE character class boundary, | |
76 | # split it at the boundary; call _doemit_endblock() to finish the current $BLOCK_SIZE | |
77 | # char boundary, call _doemit_endblock(), then call _doemit_block() for the | |
78 | # rest of the char range. | |
79 | ||
80 | ||
81 | sub _doemit { | |
82 | ||
83 | my $this=shift; | |
84 | ||
85 | $this->_doemit_endblock() | |
86 | if int($$this{'last_f'} / $BLOCK_SIZE) | |
87 | != $$this{'last_block'} && $$this{'last_block'} != -1; | |
88 | ||
89 | if (int($$this{'last_f'} / $BLOCK_SIZE) != int($$this{'last_l'} / $BLOCK_SIZE)) | |
90 | { | |
91 | while (int($$this{'last_f'} / $BLOCK_SIZE) != int($$this{'last_l'} / $BLOCK_SIZE)) | |
92 | { | |
93 | my $n=int($$this{'last_f'} / $BLOCK_SIZE) * $BLOCK_SIZE + ($BLOCK_SIZE-1); | |
94 | ||
95 | $this->_doemit_block($$this{'last_f'}, $n); | |
96 | $this->_doemit_endblock(); | |
97 | $$this{'last_f'}=$n+1; | |
98 | } | |
99 | } | |
100 | $this->_doemit_block($$this{'last_f'}, $$this{'last_l'}); | |
101 | ||
102 | $$this{'last_block'}=int($$this{'last_l'} / $BLOCK_SIZE); | |
103 | } | |
104 | ||
105 | # | |
106 | # Coalesce adjacent unicode char blocks that have the same linebreaking | |
107 | # property. Invoke _doemit() for the accumulate unicode char range once | |
108 | # a range with a different linebreaking class is seen. | |
109 | ||
110 | sub range { | |
111 | ||
112 | my $this=shift; | |
113 | ||
114 | my $f=shift; | |
115 | my $l=shift; | |
116 | my $t=shift; | |
117 | ||
118 | if ($$this{'last_l'} + 1 == $f && $$this{'last'} eq $t) | |
119 | { | |
120 | $$this{'last_l'}=$l; | |
121 | return; | |
122 | } | |
123 | ||
124 | $this->_doemit() if $$this{'last'}; # New linebreaking class | |
125 | ||
126 | $$this{'last_f'}=$f; | |
127 | $$this{'last_l'}=$l; | |
128 | $$this{'last'}=$t; | |
129 | } | |
130 | ||
131 | sub output { | |
132 | my $this=shift; | |
133 | ||
134 | $this->_doemit(); # Emit last linebreaking unicode char range class | |
135 | ||
136 | $this->_doemit_endblock(); # End of the most recent $BLOCK_SIZE char range class | |
137 | ||
138 | print "static const uint8_t unicode_rangetab[][2]={\n"; | |
139 | ||
140 | my $comma="\t"; | |
141 | ||
142 | my $modulo=sprintf("0x%X", $BLOCK_SIZE-1); | |
143 | ||
144 | foreach ( @{$$this{'char_array'}} ) | |
145 | { | |
146 | print "${comma}{0x" . sprintf("%04x", $$_[0]) . " & $modulo, 0x" | |
147 | . sprintf("%04x", $$_[1]) . " & $modulo}"; | |
148 | $comma=",\n\t"; | |
149 | } | |
150 | ||
151 | print "};\n\n"; | |
152 | ||
153 | print "static const uint8_t unicode_classtab[]={\n"; | |
154 | ||
155 | $comma="\t"; | |
156 | foreach ( @{$$this{'char_class'}} ) | |
157 | { | |
158 | print "${comma}$_"; | |
159 | $comma=",\n\t"; | |
160 | } | |
161 | ||
162 | print "};\n\n"; | |
163 | ||
164 | print "static const size_t unicode_indextab[]={\n"; | |
165 | ||
166 | $comma="\t"; | |
167 | ||
168 | my $prev_block=-1; | |
169 | foreach (@{$$this{'char_start'}}) | |
170 | { | |
171 | my $sp=$_; | |
172 | my $cnt=1; | |
173 | ||
174 | if ($sp <= $#{$$this{'char_array'}}) | |
175 | { | |
176 | my $block=int($$this{'char_array'}->[$sp]->[0] / $BLOCK_SIZE); | |
177 | ||
178 | $cnt = $block - $prev_block; | |
179 | $prev_block=$block; | |
180 | } | |
181 | ||
182 | foreach (1..$cnt) | |
183 | { | |
184 | print "$comma$sp"; | |
185 | $comma=",\n\t"; | |
186 | } | |
187 | } | |
188 | ||
189 | print "};\n\n"; | |
190 | } | |
191 | ||
192 | 1; |