Commit | Line | Data |
---|---|---|
feec80c3 C |
1 | (* |
2 | * Unzip - inflate format decompression algorithm | |
3 | * Copyright (C) 2004 Nicolas Cannasse | |
4 | * Compliant with RFC 1950 and 1951 | |
5 | * | |
6 | * This library is free software; you can redistribute it and/or | |
7 | * modify it under the terms of the GNU Lesser General Public | |
8 | * License as published by the Free Software Foundation; either | |
9 | * version 2.1 of the License, or (at your option) any later version, | |
10 | * with the special exception on linking described in file LICENSE. | |
11 | * | |
12 | * This library is distributed in the hope that it will be useful, | |
13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
15 | * Lesser General Public License for more details. | |
16 | * | |
17 | * You should have received a copy of the GNU Lesser General Public | |
18 | * License along with this library; if not, write to the Free Software | |
19 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | |
20 | *) | |
21 | ||
22 | type huffman = | |
23 | | Found of int | |
24 | | NeedBit of huffman * huffman | |
25 | | NeedBits of int * huffman array | |
26 | ||
27 | ||
28 | type adler32 = { | |
29 | mutable a1 : int; | |
30 | mutable a2 : int; | |
31 | } | |
32 | ||
33 | type window = { | |
34 | mutable wbuffer : string; | |
35 | mutable wpos : int; | |
36 | wcrc : adler32; | |
37 | } | |
38 | ||
39 | type state = | |
40 | | Head | |
41 | | Block | |
42 | | CData | |
43 | | Flat | |
44 | | Crc | |
45 | | Dist | |
46 | | DistOne | |
47 | | Done | |
48 | ||
49 | type t = { | |
50 | mutable znbits : int; | |
51 | mutable zbits : int; | |
52 | mutable zstate : state; | |
53 | mutable zfinal : bool; | |
54 | mutable zhuffman : huffman; | |
55 | mutable zhuffdist : huffman option; | |
56 | mutable zlen : int; | |
57 | mutable zdist : int; | |
58 | mutable zneeded : int; | |
59 | mutable zoutput : string; | |
60 | mutable zoutpos : int; | |
61 | zinput : IO.input; | |
62 | zlengths : int array; | |
63 | zwindow : window; | |
64 | } | |
65 | ||
66 | type error_msg = | |
67 | | Invalid_huffman | |
68 | | Invalid_data | |
69 | | Invalid_crc | |
70 | | Truncated_data | |
71 | | Unsupported_dictionary | |
72 | ||
73 | exception Error of error_msg | |
74 | ||
75 | let error msg = raise (Error msg) | |
76 | ||
77 | (* ************************************************************************ *) | |
78 | (* HUFFMAN TREES *) | |
79 | ||
80 | let rec tree_depth = function | |
81 | | Found _ -> 0 | |
82 | | NeedBits _ -> assert false | |
83 | | NeedBit (a,b) -> | |
84 | 1 + min (tree_depth a) (tree_depth b) | |
85 | ||
86 | let rec tree_compress t = | |
87 | match tree_depth t with | |
88 | | 0 -> t | |
89 | | 1 -> | |
90 | (match t with | |
91 | | NeedBit (a,b) -> NeedBit (tree_compress a,tree_compress b) | |
92 | | _ -> assert false) | |
93 | | d -> | |
94 | let size = 1 lsl d in | |
95 | let tbl = Array.make size (Found (-1)) in | |
96 | tree_walk tbl 0 0 d t; | |
97 | NeedBits (d,tbl) | |
98 | ||
99 | and tree_walk tbl p cd d = function | |
100 | | NeedBit (a,b) when d > 0 -> | |
101 | tree_walk tbl p (cd + 1) (d-1) a; | |
102 | tree_walk tbl (p lor (1 lsl cd)) (cd + 1) (d-1) b; | |
103 | | t -> | |
104 | Array.set tbl p (tree_compress t) | |
105 | ||
106 | let make_huffman lengths pos nlengths maxbits = | |
107 | let counts = Array.make maxbits 0 in | |
108 | for i = 0 to nlengths - 1 do | |
109 | let p = Array.unsafe_get lengths (i + pos) in | |
110 | if p >= maxbits then error Invalid_huffman; | |
111 | Array.unsafe_set counts p (Array.unsafe_get counts p + 1); | |
112 | done; | |
113 | let code = ref 0 in | |
114 | let tmp = Array.make maxbits 0 in | |
115 | for i = 1 to maxbits - 2 do | |
116 | code := (!code + Array.unsafe_get counts i) lsl 1; | |
117 | Array.unsafe_set tmp i !code; | |
118 | done; | |
119 | let bits = Hashtbl.create 0 in | |
120 | for i = 0 to nlengths - 1 do | |
121 | let l = Array.unsafe_get lengths (i + pos) in | |
122 | if l <> 0 then begin | |
123 | let n = Array.unsafe_get tmp (l - 1) in | |
124 | Array.unsafe_set tmp (l - 1) (n + 1); | |
125 | Hashtbl.add bits (n,l) i; | |
126 | end; | |
127 | done; | |
128 | let rec tree_make v l = | |
129 | if l > maxbits then error Invalid_huffman; | |
130 | try | |
131 | Found (Hashtbl.find bits (v,l)) | |
132 | with | |
133 | Not_found -> | |
134 | NeedBit (tree_make (v lsl 1) (l + 1) , tree_make (v lsl 1 lor 1) (l + 1)) | |
135 | in | |
136 | tree_compress (NeedBit (tree_make 0 1 , tree_make 1 1)) | |
137 | ||
138 | (* ************************************************************************ *) | |
139 | (* ADLER32 (CRC) *) | |
140 | ||
141 | let adler32_create() = { | |
142 | a1 = 1; | |
143 | a2 = 0; | |
144 | } | |
145 | ||
146 | let adler32_update a s p l = | |
147 | let p = ref p in | |
148 | for i = 0 to l - 1 do | |
149 | let c = int_of_char (String.unsafe_get s !p) in | |
150 | a.a1 <- (a.a1 + c) mod 65521; | |
151 | a.a2 <- (a.a2 + a.a1) mod 65521; | |
152 | incr p; | |
153 | done | |
154 | ||
155 | let adler32_read ch = | |
156 | let a2a = IO.read_byte ch in | |
157 | let a2b = IO.read_byte ch in | |
158 | let a1a = IO.read_byte ch in | |
159 | let a1b = IO.read_byte ch in | |
160 | { | |
161 | a1 = (a1a lsl 8) lor a1b; | |
162 | a2 = (a2a lsl 8) lor a2b; | |
163 | } | |
164 | ||
165 | (* ************************************************************************ *) | |
166 | (* WINDOW *) | |
167 | ||
168 | let window_size = 1 lsl 15 | |
169 | let buffer_size = 1 lsl 16 | |
170 | ||
171 | let window_create size = { | |
172 | wbuffer = String.create buffer_size; | |
173 | wpos = 0; | |
174 | wcrc = adler32_create() | |
175 | } | |
176 | ||
177 | let window_slide w = | |
178 | adler32_update w.wcrc w.wbuffer 0 window_size; | |
179 | let b = String.create buffer_size in | |
180 | w.wpos <- w.wpos - window_size; | |
181 | String.unsafe_blit w.wbuffer window_size b 0 w.wpos; | |
182 | w.wbuffer <- b | |
183 | ||
184 | let window_add_string w s p len = | |
185 | if w.wpos + len > buffer_size then window_slide w; | |
186 | String.unsafe_blit s p w.wbuffer w.wpos len; | |
187 | w.wpos <- w.wpos + len | |
188 | ||
189 | let window_add_char w c = | |
190 | if w.wpos = buffer_size then window_slide w; | |
191 | String.unsafe_set w.wbuffer w.wpos c; | |
192 | w.wpos <- w.wpos + 1 | |
193 | ||
194 | let window_get_last_char w = | |
195 | String.unsafe_get w.wbuffer (w.wpos - 1) | |
196 | ||
197 | let window_available w = | |
198 | w.wpos | |
199 | ||
200 | let window_checksum w = | |
201 | adler32_update w.wcrc w.wbuffer 0 w.wpos; | |
202 | w.wcrc | |
203 | ||
204 | (* ************************************************************************ *) | |
205 | ||
206 | let len_extra_bits_tbl = [|0;0;0;0;0;0;0;0;1;1;1;1;2;2;2;2;3;3;3;3;4;4;4;4;5;5;5;5;0;-1;-1|] | |
207 | let len_base_val_tbl = [|3;4;5;6;7;8;9;10;11;13;15;17;19;23;27;31;35;43;51;59;67;83;99;115;131;163;195;227;258|] | |
208 | let dist_extra_bits_tbl = [|0;0;0;0;1;1;2;2;3;3;4;4;5;5;6;6;7;7;8;8;9;9;10;10;11;11;12;12;13;13;-1;-1|] | |
209 | let dist_base_val_tbl = [|1;2;3;4;5;7;9;13;17;25;33;49;65;97;129;193;257;385;513;769;1025;1537;2049;3073;4097;6145;8193;12289;16385;24577|] | |
210 | let code_lengths_pos = [|16;17;18;0;8;7;9;6;10;5;11;4;12;3;13;2;14;1;15|] | |
211 | ||
212 | let fixed_huffman = make_huffman (Array.init 288 (fun n -> | |
213 | if n <= 143 then 8 | |
214 | else if n <= 255 then 9 | |
215 | else if n <= 279 then 7 | |
216 | else 8 | |
217 | )) 0 288 10 | |
218 | ||
219 | let get_bits z n = | |
220 | while z.znbits < n do | |
221 | z.zbits <- z.zbits lor ((IO.read_byte z.zinput) lsl z.znbits); | |
222 | z.znbits <- z.znbits + 8; | |
223 | done; | |
224 | let b = z.zbits land (1 lsl n - 1) in | |
225 | z.znbits <- z.znbits - n; | |
226 | z.zbits <- z.zbits lsr n; | |
227 | b | |
228 | ||
229 | let get_bit z = | |
230 | if z.znbits = 0 then begin | |
231 | z.znbits <- 8; | |
232 | z.zbits <- IO.read_byte z.zinput; | |
233 | end; | |
234 | let b = z.zbits land 1 = 1 in | |
235 | z.znbits <- z.znbits - 1; | |
236 | z.zbits <- z.zbits lsr 1; | |
237 | b | |
238 | ||
239 | let rec get_rev_bits z n = | |
240 | if n = 0 then | |
241 | 0 | |
242 | else if get_bit z then | |
243 | (1 lsl (n - 1)) lor (get_rev_bits z (n-1)) | |
244 | else | |
245 | get_rev_bits z (n-1) | |
246 | ||
247 | let reset_bits z = | |
248 | z.zbits <- 0; | |
249 | z.znbits <- 0 | |
250 | ||
251 | let add_string z s p l = | |
252 | window_add_string z.zwindow s p l; | |
253 | String.unsafe_blit s p z.zoutput z.zoutpos l; | |
254 | z.zneeded <- z.zneeded - l; | |
255 | z.zoutpos <- z.zoutpos + l | |
256 | ||
257 | let add_char z c = | |
258 | window_add_char z.zwindow c; | |
259 | String.unsafe_set z.zoutput z.zoutpos c; | |
260 | z.zneeded <- z.zneeded - 1; | |
261 | z.zoutpos <- z.zoutpos + 1 | |
262 | ||
263 | let add_dist_one z n = | |
264 | let c = window_get_last_char z.zwindow in | |
265 | let s = String.make n c in | |
266 | add_string z s 0 n | |
267 | ||
268 | let add_dist z d l = | |
269 | add_string z z.zwindow.wbuffer (z.zwindow.wpos - d) l | |
270 | ||
271 | let rec apply_huffman z = function | |
272 | | Found n -> n | |
273 | | NeedBit (a,b) -> apply_huffman z (if get_bit z then b else a) | |
274 | | NeedBits (n,t) -> apply_huffman z (Array.unsafe_get t (get_bits z n)) | |
275 | ||
276 | let inflate_lengths z a max = | |
277 | let i = ref 0 in | |
278 | let prev = ref 0 in | |
279 | while !i < max do | |
280 | match apply_huffman z z.zhuffman with | |
281 | | n when n <= 15 -> | |
282 | prev := n; | |
283 | Array.unsafe_set a !i n; | |
284 | incr i | |
285 | | 16 -> | |
286 | let n = 3 + get_bits z 2 in | |
287 | if !i + n > max then error Invalid_data; | |
288 | for k = 0 to n - 1 do | |
289 | Array.unsafe_set a !i !prev; | |
290 | incr i; | |
291 | done; | |
292 | | 17 -> | |
293 | let n = 3 + get_bits z 3 in | |
294 | i := !i + n; | |
295 | if !i > max then error Invalid_data; | |
296 | | 18 -> | |
297 | let n = 11 + get_bits z 7 in | |
298 | i := !i + n; | |
299 | if !i > max then error Invalid_data; | |
300 | | _ -> | |
301 | error Invalid_data | |
302 | done | |
303 | ||
304 | let rec inflate_loop z = | |
305 | match z.zstate with | |
306 | | Head -> | |
307 | let cmf = IO.read_byte z.zinput in | |
308 | let cm = cmf land 15 in | |
309 | let cinfo = cmf lsr 4 in | |
310 | if cm <> 8 || cinfo <> 7 then error Invalid_data; | |
311 | let flg = IO.read_byte z.zinput in | |
312 | (*let fcheck = flg land 31 in*) | |
313 | let fdict = flg land 32 <> 0 in | |
314 | (*let flevel = flg lsr 6 in*) | |
315 | if (cmf lsl 8 + flg) mod 31 <> 0 then error Invalid_data; | |
316 | if fdict then error Unsupported_dictionary; | |
317 | z.zstate <- Block; | |
318 | inflate_loop z | |
319 | | Crc -> | |
320 | let calc = window_checksum z.zwindow in | |
321 | let crc = adler32_read z.zinput in | |
322 | if calc <> crc then error Invalid_crc; | |
323 | z.zstate <- Done; | |
324 | inflate_loop z | |
325 | | Done -> | |
326 | () | |
327 | | Block -> | |
328 | z.zfinal <- get_bit z; | |
329 | let btype = get_bits z 2 in | |
330 | (match btype with | |
331 | | 0 -> (* no compression *) | |
332 | z.zlen <- IO.read_ui16 z.zinput; | |
333 | let nlen = IO.read_ui16 z.zinput in | |
334 | if nlen <> 0xffff - z.zlen then error Invalid_data; | |
335 | z.zstate <- Flat; | |
336 | inflate_loop z; | |
337 | reset_bits z | |
338 | | 1 -> (* fixed Huffman *) | |
339 | z.zhuffman <- fixed_huffman; | |
340 | z.zhuffdist <- None; | |
341 | z.zstate <- CData; | |
342 | inflate_loop z | |
343 | | 2 -> (* dynamic Huffman *) | |
344 | let hlit = get_bits z 5 + 257 in | |
345 | let hdist = get_bits z 5 + 1 in | |
346 | let hclen = get_bits z 4 + 4 in | |
347 | for i = 0 to hclen - 1 do | |
348 | Array.unsafe_set z.zlengths (Array.unsafe_get code_lengths_pos i) (get_bits z 3); | |
349 | done; | |
350 | for i = hclen to 18 do | |
351 | Array.unsafe_set z.zlengths (Array.unsafe_get code_lengths_pos i) 0; | |
352 | done; | |
353 | z.zhuffman <- make_huffman z.zlengths 0 19 8; | |
354 | let lengths = Array.make (hlit + hdist) 0 in | |
355 | inflate_lengths z lengths (hlit + hdist); | |
356 | z.zhuffdist <- Some (make_huffman lengths hlit hdist 16); | |
357 | z.zhuffman <- make_huffman lengths 0 hlit 16; | |
358 | z.zstate <- CData; | |
359 | inflate_loop z | |
360 | | _ -> | |
361 | error Invalid_data) | |
362 | | Flat -> | |
363 | let rlen = min z.zlen z.zneeded in | |
364 | let str = IO.nread z.zinput rlen in | |
365 | let len = String.length str in | |
366 | z.zlen <- z.zlen - len; | |
367 | add_string z str 0 len; | |
368 | if z.zlen = 0 then z.zstate <- (if z.zfinal then Crc else Block); | |
369 | if z.zneeded > 0 then inflate_loop z | |
370 | | DistOne -> | |
371 | let len = min z.zlen z.zneeded in | |
372 | add_dist_one z len; | |
373 | z.zlen <- z.zlen - len; | |
374 | if z.zlen = 0 then z.zstate <- CData; | |
375 | if z.zneeded > 0 then inflate_loop z | |
376 | | Dist -> | |
377 | while z.zlen > 0 && z.zneeded > 0 do | |
378 | let len = min z.zneeded (min z.zlen z.zdist) in | |
379 | add_dist z z.zdist len; | |
380 | z.zlen <- z.zlen - len; | |
381 | done; | |
382 | if z.zlen = 0 then z.zstate <- CData; | |
383 | if z.zneeded > 0 then inflate_loop z | |
384 | | CData -> | |
385 | match apply_huffman z z.zhuffman with | |
386 | | n when n < 256 -> | |
387 | add_char z (Char.unsafe_chr n); | |
388 | if z.zneeded > 0 then inflate_loop z | |
389 | | 256 -> | |
390 | z.zstate <- if z.zfinal then Crc else Block; | |
391 | inflate_loop z | |
392 | | n -> | |
393 | let n = n - 257 in | |
394 | let extra_bits = Array.unsafe_get len_extra_bits_tbl n in | |
395 | if extra_bits = -1 then error Invalid_data; | |
396 | z.zlen <- (Array.unsafe_get len_base_val_tbl n) + (get_bits z extra_bits); | |
397 | let dist_code = (match z.zhuffdist with None -> get_rev_bits z 5 | Some h -> apply_huffman z h) in | |
398 | let extra_bits = Array.unsafe_get dist_extra_bits_tbl dist_code in | |
399 | if extra_bits = -1 then error Invalid_data; | |
400 | z.zdist <- (Array.unsafe_get dist_base_val_tbl dist_code) + (get_bits z extra_bits); | |
401 | if z.zdist > window_available z.zwindow then error Invalid_data; | |
402 | z.zstate <- (if z.zdist = 1 then DistOne else Dist); | |
403 | inflate_loop z | |
404 | ||
405 | let inflate_data z s pos len = | |
406 | if pos < 0 || len < 0 || pos + len > String.length s then invalid_arg "inflate_data"; | |
407 | z.zneeded <- len; | |
408 | z.zoutpos <- pos; | |
409 | z.zoutput <- s; | |
410 | try | |
411 | if len > 0 then inflate_loop z; | |
412 | len - z.zneeded | |
413 | with | |
414 | IO.No_more_input -> error Truncated_data | |
415 | ||
416 | let inflate_init ?(header=true) ch = | |
417 | { | |
418 | zfinal = false; | |
419 | zhuffman = fixed_huffman; | |
420 | zhuffdist = None; | |
421 | zlen = 0; | |
422 | zdist = 0; | |
423 | zstate = (if header then Head else Block); | |
424 | zinput = ch; | |
425 | zbits = 0; | |
426 | znbits = 0; | |
427 | zneeded = 0; | |
428 | zoutput = ""; | |
429 | zoutpos = 0; | |
430 | zlengths = Array.make 19 (-1); | |
431 | zwindow = window_create (1 lsl 15) | |
432 | } | |
433 | ||
434 | let inflate ?(header=true) ch = | |
435 | let z = inflate_init ~header ch in | |
436 | let s = String.create 1 in | |
437 | IO.create_in | |
438 | ~read:(fun() -> | |
439 | let l = inflate_data z s 0 1 in | |
440 | if l = 1 then String.unsafe_get s 0 else raise IO.No_more_input | |
441 | ) | |
442 | ~input:(fun s p l -> | |
443 | let n = inflate_data z s p l in | |
444 | if n = 0 then raise IO.No_more_input; | |
445 | n | |
446 | ) | |
447 | ~close:(fun () -> | |
448 | IO.close_in ch | |
449 | ) |