Commit | Line | Data |
---|---|---|
31d289a4 LLB |
1 | From 47b3ceae397d21bf822bc2ac73052a4b1daf8e1c Mon Sep 17 00:00:00 2001 |
2 | From: Mark Adler <madler@alumni.caltech.edu> | |
3 | Date: Tue, 11 Jun 2019 22:01:18 -0700 | |
4 | Subject: [PATCH] Detect and reject a zip bomb using overlapped entries. | |
5 | ||
6 | This detects an invalid zip file that has at least one entry that | |
7 | overlaps with another entry or with the central directory to the | |
8 | end of the file. A Fifield zip bomb uses overlapped local entries | |
9 | to vastly increase the potential inflation ratio. Such an invalid | |
10 | zip file is rejected. | |
11 | ||
12 | See https://www.bamsoftware.com/hacks/zipbomb/ for David Fifield's | |
13 | analysis, construction, and examples of such zip bombs. | |
14 | ||
15 | The detection maintains a list of covered spans of the zip files | |
16 | so far, where the central directory to the end of the file and any | |
17 | bytes preceding the first entry at zip file offset zero are | |
18 | considered covered initially. Then as each entry is decompressed | |
19 | or tested, it is considered covered. When a new entry is about to | |
20 | be processed, its initial offset is checked to see if it is | |
21 | contained by a covered span. If so, the zip file is rejected as | |
22 | invalid. | |
23 | ||
24 | This commit depends on a preceding commit: "Fix bug in | |
25 | undefer_input() that misplaced the input state." | |
26 | --- | |
27 | extract.c | 190 +++++++++++++++++++++++++++++++++++++++++++++++++++++- | |
28 | globals.c | 1 + | |
29 | globals.h | 3 + | |
30 | process.c | 11 ++++ | |
31 | unzip.h | 1 + | |
32 | 5 files changed, 205 insertions(+), 1 deletion(-) | |
33 | ||
34 | diff --git a/extract.c b/extract.c | |
35 | index 1acd769..0973a33 100644 | |
36 | --- a/extract.c | |
37 | +++ b/extract.c | |
38 | @@ -319,6 +319,125 @@ static ZCONST char Far UnsupportedExtraField[] = | |
39 | "\nerror: unsupported extra-field compression type (%u)--skipping\n"; | |
40 | static ZCONST char Far BadExtraFieldCRC[] = | |
41 | "error [%s]: bad extra-field CRC %08lx (should be %08lx)\n"; | |
42 | +static ZCONST char Far NotEnoughMemCover[] = | |
43 | + "error: not enough memory for bomb detection\n"; | |
44 | +static ZCONST char Far OverlappedComponents[] = | |
45 | + "error: invalid zip file with overlapped components (possible zip bomb)\n"; | |
46 | + | |
47 | + | |
48 | + | |
49 | + | |
50 | + | |
51 | +/* A growable list of spans. */ | |
52 | +typedef zoff_t bound_t; | |
53 | +typedef struct { | |
54 | + bound_t beg; /* start of the span */ | |
55 | + bound_t end; /* one past the end of the span */ | |
56 | +} span_t; | |
57 | +typedef struct { | |
58 | + span_t *span; /* allocated, distinct, and sorted list of spans */ | |
59 | + size_t num; /* number of spans in the list */ | |
60 | + size_t max; /* allocated number of spans (num <= max) */ | |
61 | +} cover_t; | |
62 | + | |
63 | +/* | |
64 | + * Return the index of the first span in cover whose beg is greater than val. | |
65 | + * If there is no such span, then cover->num is returned. | |
66 | + */ | |
67 | +static size_t cover_find(cover, val) | |
68 | + cover_t *cover; | |
69 | + bound_t val; | |
70 | +{ | |
71 | + size_t lo = 0, hi = cover->num; | |
72 | + while (lo < hi) { | |
73 | + size_t mid = (lo + hi) >> 1; | |
74 | + if (val < cover->span[mid].beg) | |
75 | + hi = mid; | |
76 | + else | |
77 | + lo = mid + 1; | |
78 | + } | |
79 | + return hi; | |
80 | +} | |
81 | + | |
82 | +/* Return true if val lies within any one of the spans in cover. */ | |
83 | +static int cover_within(cover, val) | |
84 | + cover_t *cover; | |
85 | + bound_t val; | |
86 | +{ | |
87 | + size_t pos = cover_find(cover, val); | |
88 | + return pos > 0 && val < cover->span[pos - 1].end; | |
89 | +} | |
90 | + | |
91 | +/* | |
92 | + * Add a new span to the list, but only if the new span does not overlap any | |
93 | + * spans already in the list. The new span covers the values beg..end-1. beg | |
94 | + * must be less than end. | |
95 | + * | |
96 | + * Keep the list sorted and merge adjacent spans. Grow the allocated space for | |
97 | + * the list as needed. On success, 0 is returned. If the new span overlaps any | |
98 | + * existing spans, then 1 is returned and the new span is not added to the | |
99 | + * list. If the new span is invalid because beg is greater than or equal to | |
100 | + * end, then -1 is returned. If the list needs to be grown but the memory | |
101 | + * allocation fails, then -2 is returned. | |
102 | + */ | |
103 | +static int cover_add(cover, beg, end) | |
104 | + cover_t *cover; | |
105 | + bound_t beg; | |
106 | + bound_t end; | |
107 | +{ | |
108 | + size_t pos; | |
109 | + int prec, foll; | |
110 | + | |
111 | + if (beg >= end) | |
112 | + /* The new span is invalid. */ | |
113 | + return -1; | |
114 | + | |
115 | + /* Find where the new span should go, and make sure that it does not | |
116 | + overlap with any existing spans. */ | |
117 | + pos = cover_find(cover, beg); | |
118 | + if ((pos > 0 && beg < cover->span[pos - 1].end) || | |
119 | + (pos < cover->num && end > cover->span[pos].beg)) | |
120 | + return 1; | |
121 | + | |
122 | + /* Check for adjacencies. */ | |
123 | + prec = pos > 0 && beg == cover->span[pos - 1].end; | |
124 | + foll = pos < cover->num && end == cover->span[pos].beg; | |
125 | + if (prec && foll) { | |
126 | + /* The new span connects the preceding and following spans. Merge the | |
127 | + following span into the preceding span, and delete the following | |
128 | + span. */ | |
129 | + cover->span[pos - 1].end = cover->span[pos].end; | |
130 | + cover->num--; | |
131 | + memmove(cover->span + pos, cover->span + pos + 1, | |
132 | + (cover->num - pos) * sizeof(span_t)); | |
133 | + } | |
134 | + else if (prec) | |
135 | + /* The new span is adjacent only to the preceding span. Extend the end | |
136 | + of the preceding span. */ | |
137 | + cover->span[pos - 1].end = end; | |
138 | + else if (foll) | |
139 | + /* The new span is adjacent only to the following span. Extend the | |
140 | + beginning of the following span. */ | |
141 | + cover->span[pos].beg = beg; | |
142 | + else { | |
143 | + /* The new span has gaps between both the preceding and the following | |
144 | + spans. Assure that there is room and insert the span. */ | |
145 | + if (cover->num == cover->max) { | |
146 | + size_t max = cover->max == 0 ? 16 : cover->max << 1; | |
147 | + span_t *span = realloc(cover->span, max * sizeof(span_t)); | |
148 | + if (span == NULL) | |
149 | + return -2; | |
150 | + cover->span = span; | |
151 | + cover->max = max; | |
152 | + } | |
153 | + memmove(cover->span + pos + 1, cover->span + pos, | |
154 | + (cover->num - pos) * sizeof(span_t)); | |
155 | + cover->num++; | |
156 | + cover->span[pos].beg = beg; | |
157 | + cover->span[pos].end = end; | |
158 | + } | |
159 | + return 0; | |
160 | +} | |
161 | ||
162 | ||
163 | ||
164 | @@ -374,6 +493,29 @@ int extract_or_test_files(__G) /* return PK-type error code */ | |
165 | } | |
166 | #endif /* !SFX || SFX_EXDIR */ | |
167 | ||
168 | + /* One more: initialize cover structure for bomb detection. Start with a | |
169 | + span that covers the central directory though the end of the file. */ | |
170 | + if (G.cover == NULL) { | |
171 | + G.cover = malloc(sizeof(cover_t)); | |
172 | + if (G.cover == NULL) { | |
173 | + Info(slide, 0x401, ((char *)slide, | |
174 | + LoadFarString(NotEnoughMemCover))); | |
175 | + return PK_MEM; | |
176 | + } | |
177 | + ((cover_t *)G.cover)->span = NULL; | |
178 | + ((cover_t *)G.cover)->max = 0; | |
179 | + } | |
180 | + ((cover_t *)G.cover)->num = 0; | |
181 | + if ((G.extra_bytes != 0 && | |
182 | + cover_add((cover_t *)G.cover, 0, G.extra_bytes) != 0) || | |
183 | + cover_add((cover_t *)G.cover, | |
184 | + G.extra_bytes + G.ecrec.offset_start_central_directory, | |
185 | + G.ziplen) != 0) { | |
186 | + Info(slide, 0x401, ((char *)slide, | |
187 | + LoadFarString(NotEnoughMemCover))); | |
188 | + return PK_MEM; | |
189 | + } | |
190 | + | |
191 | /*--------------------------------------------------------------------------- | |
192 | The basic idea of this function is as follows. Since the central di- | |
193 | rectory lies at the end of the zipfile and the member files lie at the | |
194 | @@ -591,7 +733,8 @@ int extract_or_test_files(__G) /* return PK-type error code */ | |
195 | if (error > error_in_archive) | |
196 | error_in_archive = error; | |
197 | /* ...and keep going (unless disk full or user break) */ | |
198 | - if (G.disk_full > 1 || error_in_archive == IZ_CTRLC) { | |
199 | + if (G.disk_full > 1 || error_in_archive == IZ_CTRLC || | |
200 | + error == PK_BOMB) { | |
201 | /* clear reached_end to signal premature stop ... */ | |
202 | reached_end = FALSE; | |
203 | /* ... and cancel scanning the central directory */ | |
204 | @@ -1060,6 +1203,11 @@ static int extract_or_test_entrylist(__G__ numchunk, | |
205 | ||
206 | /* seek_zipf(__G__ pInfo->offset); */ | |
207 | request = G.pInfo->offset + G.extra_bytes; | |
208 | + if (cover_within((cover_t *)G.cover, request)) { | |
209 | + Info(slide, 0x401, ((char *)slide, | |
210 | + LoadFarString(OverlappedComponents))); | |
211 | + return PK_BOMB; | |
212 | + } | |
213 | inbuf_offset = request % INBUFSIZ; | |
214 | bufstart = request - inbuf_offset; | |
215 | ||
216 | @@ -1591,6 +1739,18 @@ static int extract_or_test_entrylist(__G__ numchunk, | |
217 | return IZ_CTRLC; /* cancel operation by user request */ | |
218 | } | |
219 | #endif | |
220 | + error = cover_add((cover_t *)G.cover, request, | |
221 | + G.cur_zipfile_bufstart + (G.inptr - G.inbuf)); | |
222 | + if (error < 0) { | |
223 | + Info(slide, 0x401, ((char *)slide, | |
224 | + LoadFarString(NotEnoughMemCover))); | |
225 | + return PK_MEM; | |
226 | + } | |
227 | + if (error != 0) { | |
228 | + Info(slide, 0x401, ((char *)slide, | |
229 | + LoadFarString(OverlappedComponents))); | |
230 | + return PK_BOMB; | |
231 | + } | |
232 | #ifdef MACOS /* MacOS is no preemptive OS, thus call event-handling by hand */ | |
233 | UserStop(); | |
234 | #endif | |
235 | @@ -1992,6 +2152,34 @@ static int extract_or_test_member(__G) /* return PK-type error code */ | |
236 | } | |
237 | ||
238 | undefer_input(__G); | |
239 | + | |
240 | + if ((G.lrec.general_purpose_bit_flag & 8) != 0) { | |
241 | + /* skip over data descriptor (harder than it sounds, due to signature | |
242 | + * ambiguity) | |
243 | + */ | |
244 | +# define SIG 0x08074b50 | |
245 | +# define LOW 0xffffffff | |
246 | + uch buf[12]; | |
247 | + unsigned shy = 12 - readbuf((char *)buf, 12); | |
248 | + ulg crc = shy ? 0 : makelong(buf); | |
249 | + ulg clen = shy ? 0 : makelong(buf + 4); | |
250 | + ulg ulen = shy ? 0 : makelong(buf + 8); /* or high clen if ZIP64 */ | |
251 | + if (crc == SIG && /* if not SIG, no signature */ | |
252 | + (G.lrec.crc32 != SIG || /* if not SIG, have signature */ | |
253 | + (clen == SIG && /* if not SIG, no signature */ | |
254 | + ((G.lrec.csize & LOW) != SIG || /* if not SIG, have signature */ | |
255 | + (ulen == SIG && /* if not SIG, no signature */ | |
256 | + (G.zip64 ? G.lrec.csize >> 32 : G.lrec.ucsize) != SIG | |
257 | + /* if not SIG, have signature */ | |
258 | + ))))) | |
259 | + /* skip four more bytes to account for signature */ | |
260 | + shy += 4 - readbuf((char *)buf, 4); | |
261 | + if (G.zip64) | |
262 | + shy += 8 - readbuf((char *)buf, 8); /* skip eight more for ZIP64 */ | |
263 | + if (shy) | |
264 | + error = PK_ERR; | |
265 | + } | |
266 | + | |
267 | return error; | |
268 | ||
269 | } /* end function extract_or_test_member() */ | |
270 | diff --git a/globals.c b/globals.c | |
271 | index fa8cca5..1e0f608 100644 | |
272 | --- a/globals.c | |
273 | +++ b/globals.c | |
274 | @@ -181,6 +181,7 @@ Uz_Globs *globalsCtor() | |
275 | # if (!defined(NO_TIMESTAMPS)) | |
276 | uO.D_flag=1; /* default to '-D', no restoration of dir timestamps */ | |
277 | # endif | |
278 | + G.cover = NULL; /* not allocated yet */ | |
279 | #endif | |
280 | ||
281 | uO.lflag=(-1); | |
282 | diff --git a/globals.h b/globals.h | |
283 | index 11b7215..2bdcdeb 100644 | |
284 | --- a/globals.h | |
285 | +++ b/globals.h | |
286 | @@ -260,12 +260,15 @@ typedef struct Globals { | |
287 | ecdir_rec ecrec; /* used in unzip.c, extract.c */ | |
288 | z_stat statbuf; /* used by main, mapname, check_for_newer */ | |
289 | ||
290 | + int zip64; /* true if Zip64 info in extra field */ | |
291 | + | |
292 | int mem_mode; | |
293 | uch *outbufptr; /* extract.c static */ | |
294 | ulg outsize; /* extract.c static */ | |
295 | int reported_backslash; /* extract.c static */ | |
296 | int disk_full; | |
297 | int newfile; | |
298 | + void **cover; /* used in extract.c for bomb detection */ | |
299 | ||
300 | int didCRlast; /* fileio static */ | |
301 | ulg numlines; /* fileio static: number of lines printed */ | |
302 | diff --git a/process.c b/process.c | |
303 | index 1e9a1e1..d2e4dc3 100644 | |
304 | --- a/process.c | |
305 | +++ b/process.c | |
306 | @@ -637,6 +637,13 @@ void free_G_buffers(__G) /* releases all memory allocated in global vars */ | |
307 | } | |
308 | #endif | |
309 | ||
310 | + /* Free the cover span list and the cover structure. */ | |
311 | + if (G.cover != NULL) { | |
312 | + free(*(G.cover)); | |
313 | + free(G.cover); | |
314 | + G.cover = NULL; | |
315 | + } | |
316 | + | |
317 | } /* end function free_G_buffers() */ | |
318 | ||
319 | ||
320 | @@ -1890,6 +1897,8 @@ int getZip64Data(__G__ ef_buf, ef_len) | |
321 | #define Z64FLGS 0xffff | |
322 | #define Z64FLGL 0xffffffff | |
323 | ||
324 | + G.zip64 = FALSE; | |
325 | + | |
326 | if (ef_len == 0 || ef_buf == NULL) | |
327 | return PK_COOL; | |
328 | ||
329 | @@ -1927,6 +1936,8 @@ int getZip64Data(__G__ ef_buf, ef_len) | |
330 | #if 0 | |
331 | break; /* Expect only one EF_PKSZ64 block. */ | |
332 | #endif /* 0 */ | |
333 | + | |
334 | + G.zip64 = TRUE; | |
335 | } | |
336 | ||
337 | /* Skip this extra field block. */ | |
338 | diff --git a/unzip.h b/unzip.h | |
339 | index 5b2a326..ed24a5b 100644 | |
340 | --- a/unzip.h | |
341 | +++ b/unzip.h | |
342 | @@ -645,6 +645,7 @@ typedef struct _Uzp_cdir_Rec { | |
343 | #define PK_NOZIP 9 /* zipfile not found */ | |
344 | #define PK_PARAM 10 /* bad or illegal parameters specified */ | |
345 | #define PK_FIND 11 /* no files found */ | |
346 | +#define PK_BOMB 12 /* likely zip bomb */ | |
347 | #define PK_DISK 50 /* disk full */ | |
348 | #define PK_EOF 51 /* unexpected EOF */ | |
349 |