/src/mupdf/source/fitz/unzip.c
Line | Count | Source (jump to first uncovered line) |
1 | | // Copyright (C) 2004-2024 Artifex Software, Inc. |
2 | | // |
3 | | // This file is part of MuPDF. |
4 | | // |
5 | | // MuPDF is free software: you can redistribute it and/or modify it under the |
6 | | // terms of the GNU Affero General Public License as published by the Free |
7 | | // Software Foundation, either version 3 of the License, or (at your option) |
8 | | // any later version. |
9 | | // |
10 | | // MuPDF is distributed in the hope that it will be useful, but WITHOUT ANY |
11 | | // WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS |
12 | | // FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more |
13 | | // details. |
14 | | // |
15 | | // You should have received a copy of the GNU Affero General Public License |
16 | | // along with MuPDF. If not, see <https://www.gnu.org/licenses/agpl-3.0.en.html> |
17 | | // |
18 | | // Alternative licensing terms are available from the licensor. |
19 | | // For commercial licensing, see <https://www.artifex.com/> or contact |
20 | | // Artifex Software, Inc., 39 Mesa Street, Suite 108A, San Francisco, |
21 | | // CA 94129, USA, for further information. |
22 | | |
23 | | #include "mupdf/fitz.h" |
24 | | |
25 | | #include <string.h> |
26 | | #include <limits.h> |
27 | | |
28 | | #include "z-imp.h" |
29 | | |
30 | | #if !defined (INT32_MAX) |
31 | | #define INT32_MAX 2147483647L |
32 | | #endif |
33 | | |
34 | 29 | #define ZIP_LOCAL_FILE_SIG 0x04034b50 |
35 | | #define ZIP_DATA_DESC_SIG 0x08074b50 |
36 | 1.08k | #define ZIP_CENTRAL_DIRECTORY_SIG 0x02014b50 |
37 | 131 | #define ZIP_END_OF_CENTRAL_DIRECTORY_SIG 0x06054b50 |
38 | 324 | #define ZIP_UP_SIG 0x7075 |
39 | | |
40 | 20 | #define ZIP64_END_OF_CENTRAL_DIRECTORY_LOCATOR_SIG 0x07064b50 |
41 | 20 | #define ZIP64_END_OF_CENTRAL_DIRECTORY_SIG 0x06064b50 |
42 | 162 | #define ZIP64_EXTRA_FIELD_SIG 0x0001 |
43 | | |
44 | 27 | #define ZIP_ENCRYPTED_FLAG 0x1 |
45 | | |
46 | | typedef struct |
47 | | { |
48 | | char *name; |
49 | | uint64_t offset, csize, usize; |
50 | | } zip_entry; |
51 | | |
52 | | typedef struct |
53 | | { |
54 | | fz_archive super; |
55 | | |
56 | | int count; |
57 | | zip_entry *entries; |
58 | | } fz_zip_archive; |
59 | | |
60 | | static void drop_zip_archive(fz_context *ctx, fz_archive *arch) |
61 | 395 | { |
62 | 395 | fz_zip_archive *zip = (fz_zip_archive *) arch; |
63 | 395 | int i; |
64 | 1.42k | for (i = 0; i < zip->count; ++i) |
65 | 1.02k | fz_free(ctx, zip->entries[i].name); |
66 | 395 | fz_free(ctx, zip->entries); |
67 | 395 | } |
68 | | |
69 | | static int ishex(char c) |
70 | 0 | { |
71 | 0 | if (c >= '0' && c <= '9') |
72 | 0 | return 1; |
73 | 0 | if (c >= 'a' && c <= 'f') |
74 | 0 | return 1; |
75 | 0 | if (c >= 'A' && c <= 'F') |
76 | 0 | return 1; |
77 | 0 | return 0; |
78 | 0 | } |
79 | | |
80 | | static int unhex(char c) |
81 | 0 | { |
82 | 0 | if (c >= '0' && c <= '9') |
83 | 0 | return c-'0'; |
84 | 0 | if (c >= 'a' && c <= 'f') |
85 | 0 | return c-'a'+10; |
86 | 0 | return c - 'A'+10; |
87 | 0 | } |
88 | | |
89 | | /* This is to cope with the #Uffff and #Lffffff escaping scheme |
90 | | * used by info-zip when encoding files into zipfiles on |
91 | | * non-utf8-native platforms, like Windows. Although this isn't |
92 | | * strictly part of the zip standard, info-zip has been doing |
93 | | * this since 2008 at least, and it's ubiquitous. We shouldn't |
94 | | * get '#' chars in filenames otherwise, so it's pretty safe. |
95 | | */ |
96 | | static unsigned char *unescape(fz_context *ctx, unsigned char *name) |
97 | 965 | { |
98 | 965 | unsigned char *newname; |
99 | 965 | unsigned char *d; |
100 | 965 | unsigned char *s = name; |
101 | 965 | unsigned char c; |
102 | 965 | size_t z = 1; |
103 | | |
104 | | /* Count the target length */ |
105 | 25.3k | while ((c = *s++) != 0) |
106 | 24.4k | { |
107 | 24.4k | if (c == '#' && s[0] == 'U' && |
108 | 24.4k | ishex(s[1]) && ishex(s[2]) && ishex(s[3]) && ishex(s[4])) |
109 | 0 | { |
110 | 0 | int uni = (unhex(s[1])<<12)+(unhex(s[2])<<8)+(unhex(s[3])<<4)+unhex(s[4]); |
111 | |
|
112 | 0 | if (uni < 0x80) |
113 | 0 | { |
114 | | /* Unlikely, cos why would it have been escaped? */ |
115 | 0 | z++; |
116 | 0 | } |
117 | 0 | else if (uni < (1<<11)) |
118 | 0 | { |
119 | 0 | z += 2; |
120 | 0 | } |
121 | 0 | else |
122 | 0 | { |
123 | 0 | z += 3; |
124 | 0 | } |
125 | 0 | s += 5; |
126 | 0 | } |
127 | 24.4k | else if (c == '#' && s[0] == 'L' && |
128 | 24.4k | ishex(s[1]) && ishex(s[2]) && ishex(s[3]) && ishex(s[4]) && ishex(s[5]) && ishex(s[6])) |
129 | 0 | { |
130 | 0 | int uni = (unhex(s[1])<<20)+(unhex(s[2])<<16)+(unhex(s[3])<<12)+(unhex(s[4])<<8)+(unhex(s[5])<<4)+unhex(s[6]); |
131 | |
|
132 | 0 | if (uni < 0x80) |
133 | 0 | { |
134 | | /* Unlikely, cos why would it have been escaped? */ |
135 | 0 | z++; |
136 | 0 | } |
137 | 0 | else if (uni < (1<<11)) |
138 | 0 | { |
139 | | /* Unlikely, cos why wouldn't it be #U? */ |
140 | 0 | z += 2; |
141 | 0 | } |
142 | 0 | else if (uni < (1<<16)) |
143 | 0 | { |
144 | | /* Unlikely, cos why wouldn't it be #U? */ |
145 | 0 | z += 3; |
146 | 0 | } |
147 | 0 | else if (uni <= 0x10FFFF) |
148 | 0 | { |
149 | 0 | z += 4; |
150 | 0 | } |
151 | 0 | else |
152 | 0 | { |
153 | | /* Illegal char for utf-8 encoding. */ |
154 | | /* Leave escaped! */ |
155 | 0 | z += 8; |
156 | 0 | } |
157 | 0 | s += 7; |
158 | 0 | } |
159 | 24.4k | else if (c >= 0x80) |
160 | 88 | { |
161 | | /* Why wasn't this byte escaped? Encode it to utf-8, best we can do. */ |
162 | 88 | z += 2; |
163 | 88 | } |
164 | 24.3k | else |
165 | 24.3k | z++; |
166 | 24.4k | } |
167 | | |
168 | 965 | newname = Memento_label(fz_malloc(ctx, z), "zip_name"); |
169 | | |
170 | 965 | d = newname; |
171 | 965 | s = name; |
172 | | |
173 | | /* Now rewrite the name */ |
174 | 25.3k | while ((c = *s++) != 0) |
175 | 24.4k | { |
176 | 24.4k | if (c == '#' && s[0] == 'U' && |
177 | 24.4k | ishex(s[1]) && ishex(s[2]) && ishex(s[3]) && ishex(s[4])) |
178 | 0 | { |
179 | 0 | int uni = (unhex(s[1])<<12)+(unhex(s[2])<<8)+(unhex(s[3])<<4)+unhex(s[4]); |
180 | |
|
181 | 0 | if (uni < 0x80) |
182 | 0 | { |
183 | | /* Unlikely, cos why would it have been escaped? */ |
184 | 0 | *d++ = uni; |
185 | 0 | } |
186 | 0 | else if (uni < (1<<11)) |
187 | 0 | { |
188 | 0 | *d++ = 0xC0+(uni>>6); /* 5 bits */ |
189 | 0 | *d++ = 0x80+(uni & 0x3f); /* 6 bits */ |
190 | 0 | } |
191 | 0 | else |
192 | 0 | { |
193 | 0 | *d++ = 0xE0+(uni>>12); /* 4 bits */ |
194 | 0 | *d++ = 0x80+((uni>>6) & 0x3f); /* 6 bits */ |
195 | 0 | *d++ = 0x80+(uni & 0x3f); /* 6 bits */ |
196 | 0 | } |
197 | 0 | s += 5; |
198 | 0 | } |
199 | 24.4k | else if (c == '#' && s[0] == 'L' && |
200 | 24.4k | ishex(s[1]) && ishex(s[2]) && ishex(s[3]) && ishex(s[4]) && ishex(s[5]) && ishex(s[6])) |
201 | 0 | { |
202 | 0 | int uni = (unhex(s[1])<<20)+(unhex(s[2])<<16)+(unhex(s[3])<<12)+(unhex(s[4])<<8)+(unhex(s[5])<<4)+unhex(s[6]); |
203 | |
|
204 | 0 | if (uni < 0x80) |
205 | 0 | { |
206 | | /* Unlikely, cos why would it have been escaped? */ |
207 | 0 | *d++ = uni; |
208 | 0 | } |
209 | 0 | else if (uni < (1<<11)) |
210 | 0 | { |
211 | | /* Unlikely, cos why wouldn't it be #U? */ |
212 | 0 | *d++ = 0xC0+(uni>>6); /* 5 bits */ |
213 | 0 | *d++ = 0x80+(uni & 0x3f); /* 6 bits */ |
214 | 0 | } |
215 | 0 | else if (uni < (1<<16)) |
216 | 0 | { |
217 | | /* Unlikely, cos why wouldn't it be #U? */ |
218 | 0 | *d++ = 0xE0+(uni>>12); /* 4 bits */ |
219 | 0 | *d++ = 0x80+((uni>>6) & 0x3f); /* 6 bits */ |
220 | 0 | *d++ = 0x80+(uni & 0x3f); /* 6 bits */ |
221 | 0 | } |
222 | 0 | else if (uni <= 0x10FFFF) |
223 | 0 | { |
224 | 0 | *d++ = 0xF0+(uni>>18); /* 3 bits */ |
225 | 0 | *d++ = 0x80+((uni>>12) & 0x3f); /* 6 bits */ |
226 | 0 | *d++ = 0x80+((uni>>6) & 0x3f); /* 6 bits */ |
227 | 0 | *d++ = 0x80+(uni & 0x3f); /* 6 bits */ |
228 | 0 | } |
229 | 0 | else |
230 | 0 | { |
231 | | /* Illegal char for utf-8 encoding. */ |
232 | | /* Leave escaped! */ |
233 | 0 | memcpy(d, s-1, 8); |
234 | 0 | d += 8; |
235 | 0 | } |
236 | 0 | s += 7; |
237 | 0 | } |
238 | 24.4k | else if (c >= 0x80) |
239 | 88 | { |
240 | | /* Why wasn't this byte escaped? Encode it to utf-8, best we can do. */ |
241 | 88 | *d++ = 0xC0+(c>>6); /* 5 bits */ |
242 | 88 | *d++ = 0x80+(c & 0x3f); /* 6 bits */ |
243 | 88 | } |
244 | 24.3k | else |
245 | 24.3k | *d++ = c; |
246 | | |
247 | 24.4k | } |
248 | 965 | *d = 0; |
249 | | |
250 | 965 | fz_free(ctx, name); |
251 | | |
252 | 965 | return newname; |
253 | 965 | } |
254 | | |
255 | | static void read_zip_dir_imp(fz_context *ctx, fz_zip_archive *zip, int64_t start_offset) |
256 | 131 | { |
257 | 131 | fz_stream *file = zip->super.file; |
258 | 131 | uint32_t sig; |
259 | 131 | int i; |
260 | 131 | int namesize, metasize, commentsize; |
261 | 131 | uint64_t count, offset; |
262 | 131 | uint64_t csize, usize; |
263 | 131 | char *name = NULL; |
264 | 131 | size_t n; |
265 | 131 | int gp; |
266 | 131 | int utf8 = 0; |
267 | | |
268 | 131 | fz_var(name); |
269 | | |
270 | 131 | zip->count = 0; |
271 | | |
272 | 131 | fz_seek(ctx, file, start_offset, 0); |
273 | | |
274 | 131 | sig = fz_read_uint32_le(ctx, file); |
275 | 131 | if (sig != ZIP_END_OF_CENTRAL_DIRECTORY_SIG) |
276 | 0 | fz_throw(ctx, FZ_ERROR_FORMAT, "wrong zip end of central directory signature (0x%x)", sig); |
277 | | |
278 | 131 | (void) fz_read_uint16_le(ctx, file); /* this disk */ |
279 | 131 | (void) fz_read_uint16_le(ctx, file); /* start disk */ |
280 | 131 | (void) fz_read_uint16_le(ctx, file); /* entries in this disk */ |
281 | 131 | count = fz_read_uint16_le(ctx, file); /* entries in central directory disk */ |
282 | 131 | (void) fz_read_uint32_le(ctx, file); /* size of central directory */ |
283 | 131 | offset = fz_read_uint32_le(ctx, file); /* offset to central directory */ |
284 | | |
285 | | /* ZIP64 */ |
286 | 131 | if (count == 0xFFFF || offset == 0xFFFFFFFF) |
287 | 20 | { |
288 | 20 | int64_t offset64, count64; |
289 | | |
290 | 20 | fz_seek(ctx, file, start_offset - 20, 0); |
291 | | |
292 | 20 | sig = fz_read_uint32_le(ctx, file); |
293 | 20 | if (sig != ZIP64_END_OF_CENTRAL_DIRECTORY_LOCATOR_SIG) |
294 | 0 | fz_throw(ctx, FZ_ERROR_FORMAT, "wrong zip64 end of central directory locator signature (0x%x)", sig); |
295 | | |
296 | 20 | (void) fz_read_uint32_le(ctx, file); /* start disk */ |
297 | 20 | offset64 = fz_read_uint64_le(ctx, file); /* offset to end of central directory record */ |
298 | | |
299 | 20 | fz_seek(ctx, file, offset64, 0); |
300 | | |
301 | 20 | sig = fz_read_uint32_le(ctx, file); |
302 | 20 | if (sig != ZIP64_END_OF_CENTRAL_DIRECTORY_SIG) |
303 | 8 | fz_throw(ctx, FZ_ERROR_FORMAT, "wrong zip64 end of central directory signature (0x%x)", sig); |
304 | | |
305 | 12 | (void) fz_read_uint64_le(ctx, file); /* size of record */ |
306 | 12 | (void) fz_read_uint16_le(ctx, file); /* version made by */ |
307 | 12 | (void) fz_read_uint16_le(ctx, file); /* version to extract */ |
308 | 12 | (void) fz_read_uint32_le(ctx, file); /* disk number */ |
309 | 12 | (void) fz_read_uint32_le(ctx, file); /* disk number start */ |
310 | 12 | count64 = fz_read_uint64_le(ctx, file); /* entries in central directory disk */ |
311 | 12 | (void) fz_read_uint64_le(ctx, file); /* entries in central directory */ |
312 | 12 | (void) fz_read_uint64_le(ctx, file); /* size of central directory */ |
313 | 12 | offset64 = fz_read_uint64_le(ctx, file); /* offset to central directory */ |
314 | | |
315 | 12 | if (count == 0xFFFF) |
316 | 4 | { |
317 | 4 | count = count64; |
318 | 4 | } |
319 | 12 | if (offset == 0xFFFFFFFF) |
320 | 8 | { |
321 | 8 | offset = offset64; |
322 | 8 | } |
323 | 12 | } |
324 | | |
325 | 123 | fz_seek(ctx, file, offset, 0); |
326 | | |
327 | 230 | fz_try(ctx) |
328 | 230 | { |
329 | 115 | if (count > INT_MAX) |
330 | 4 | count = INT_MAX; |
331 | 1.17k | for (i = 0; i < (int)count; i++) |
332 | 1.08k | { |
333 | 1.08k | sig = fz_read_uint32_le(ctx, file); |
334 | 1.08k | if (sig != ZIP_CENTRAL_DIRECTORY_SIG) |
335 | 16 | fz_throw(ctx, FZ_ERROR_FORMAT, "wrong zip central directory signature (0x%x)", sig); |
336 | | |
337 | 1.06k | (void) fz_read_uint16_le(ctx, file); /* version made by */ |
338 | 1.06k | (void) fz_read_uint16_le(ctx, file); /* version to extract */ |
339 | 1.06k | gp = fz_read_uint16_le(ctx, file); /* general */ |
340 | 1.06k | utf8 = !!(gp & (1<<11)); |
341 | 1.06k | (void) fz_read_uint16_le(ctx, file); /* method */ |
342 | 1.06k | (void) fz_read_uint16_le(ctx, file); /* last mod file time */ |
343 | 1.06k | (void) fz_read_uint16_le(ctx, file); /* last mod file date */ |
344 | 1.06k | (void) fz_read_uint32_le(ctx, file); /* crc-32 */ |
345 | 1.06k | csize = fz_read_uint32_le(ctx, file); |
346 | 1.06k | usize = fz_read_uint32_le(ctx, file); |
347 | 1.06k | namesize = fz_read_uint16_le(ctx, file); |
348 | 1.06k | metasize = fz_read_uint16_le(ctx, file); |
349 | 1.06k | commentsize = fz_read_uint16_le(ctx, file); |
350 | 1.06k | (void) fz_read_uint16_le(ctx, file); /* disk number start */ |
351 | 1.06k | (void) fz_read_uint16_le(ctx, file); /* int file atts */ |
352 | 1.06k | (void) fz_read_uint32_le(ctx, file); /* ext file atts */ |
353 | 1.06k | offset = fz_read_uint32_le(ctx, file); |
354 | | |
355 | 1.06k | name = Memento_label(fz_malloc(ctx, namesize + 1), "zip_name"); |
356 | | |
357 | 1.06k | n = fz_read(ctx, file, (unsigned char*)name, namesize); |
358 | 1.06k | if (n < (size_t)namesize) |
359 | 8 | fz_throw(ctx, FZ_ERROR_FORMAT, "premature end of data in zip entry name"); |
360 | 1.05k | name[namesize] = '\0'; |
361 | | |
362 | 1.05k | if (!utf8) |
363 | 965 | name = (char *)unescape(ctx, (unsigned char *)name); |
364 | | |
365 | 1.21k | while (metasize > 0) |
366 | 162 | { |
367 | 162 | int type = fz_read_uint16_le(ctx, file); |
368 | 162 | int size = fz_read_uint16_le(ctx, file); |
369 | | |
370 | 162 | if (type == ZIP64_EXTRA_FIELD_SIG) |
371 | 0 | { |
372 | 0 | int sizeleft = size; |
373 | 0 | if (usize == 0xFFFFFFFF && sizeleft >= 8) |
374 | 0 | { |
375 | 0 | usize = fz_read_uint64_le(ctx, file); |
376 | 0 | sizeleft -= 8; |
377 | 0 | } |
378 | 0 | if (csize == 0xFFFFFFFF && sizeleft >= 8) |
379 | 0 | { |
380 | 0 | csize = fz_read_uint64_le(ctx, file); |
381 | 0 | sizeleft -= 8; |
382 | 0 | } |
383 | 0 | if (offset == 0xFFFFFFFF && sizeleft >= 8) |
384 | 0 | { |
385 | 0 | offset = fz_read_uint64_le(ctx, file); |
386 | 0 | sizeleft -= 8; |
387 | 0 | } |
388 | 0 | fz_seek(ctx, file, sizeleft - size, 1); |
389 | 0 | } |
390 | 162 | if (type == ZIP_UP_SIG && size > 5) |
391 | 0 | { |
392 | 0 | int sizeleft = size - 1; |
393 | 0 | if (fz_read_byte(ctx, file) == 1) |
394 | 0 | { |
395 | | /* Version 1 */ |
396 | 0 | (void) fz_read_uint32(ctx, file); /* Skip the CRC */ |
397 | 0 | sizeleft -= 4; |
398 | 0 | fz_free(ctx, name); |
399 | 0 | name = NULL; |
400 | 0 | name = Memento_label(fz_malloc(ctx, sizeleft + 1), "zip_name"); |
401 | 0 | fz_read(ctx, file, (unsigned char *)name, sizeleft); |
402 | 0 | name[sizeleft] = 0; |
403 | 0 | sizeleft = 0; |
404 | 0 | } |
405 | 0 | fz_seek(ctx, file, sizeleft - size, 1); |
406 | 0 | } |
407 | 162 | fz_seek(ctx, file, size, 1); |
408 | 162 | metasize -= 4 + size; |
409 | 162 | } |
410 | | |
411 | 1.05k | if (usize > INT32_MAX || csize > INT32_MAX) |
412 | 0 | fz_throw(ctx, FZ_ERROR_FORMAT, "zip archive entry larger than 2 GB"); |
413 | | |
414 | 1.05k | fz_seek(ctx, file, commentsize, 1); |
415 | | |
416 | 1.05k | zip->entries = Memento_label(fz_realloc_array(ctx, zip->entries, zip->count + 1, zip_entry), "zip_entries"); |
417 | | |
418 | 1.05k | zip->entries[zip->count].offset = offset; |
419 | 1.05k | zip->entries[zip->count].csize = csize; |
420 | 1.05k | zip->entries[zip->count].usize = usize; |
421 | 1.05k | zip->entries[zip->count].name = name; |
422 | 1.05k | name = NULL; |
423 | | |
424 | 1.05k | zip->count++; |
425 | 1.05k | } |
426 | 115 | } |
427 | 230 | fz_always(ctx) |
428 | 115 | fz_free(ctx, name); |
429 | 115 | fz_catch(ctx) |
430 | 56 | fz_rethrow(ctx); |
431 | 99 | } |
432 | | |
433 | | static int read_zip_entry_header(fz_context *ctx, fz_zip_archive *zip, zip_entry *ent) |
434 | 29 | { |
435 | 29 | fz_stream *file = zip->super.file; |
436 | 29 | uint32_t sig; |
437 | 29 | int general, method, namelength, extralength; |
438 | | |
439 | 29 | fz_seek(ctx, file, ent->offset, 0); |
440 | | |
441 | 29 | sig = fz_read_uint32_le(ctx, file); |
442 | 29 | if (sig != ZIP_LOCAL_FILE_SIG) |
443 | 2 | fz_throw(ctx, FZ_ERROR_FORMAT, "wrong zip local file signature (0x%x)", sig); |
444 | | |
445 | 27 | (void) fz_read_uint16_le(ctx, file); /* version */ |
446 | 27 | general = fz_read_uint16_le(ctx, file); /* general */ |
447 | 27 | if (general & ZIP_ENCRYPTED_FLAG) |
448 | 0 | fz_throw(ctx, FZ_ERROR_FORMAT, "zip content is encrypted"); |
449 | | |
450 | 27 | method = fz_read_uint16_le(ctx, file); |
451 | 27 | (void) fz_read_uint16_le(ctx, file); /* file time */ |
452 | 27 | (void) fz_read_uint16_le(ctx, file); /* file date */ |
453 | 27 | (void) fz_read_uint32_le(ctx, file); /* crc-32 */ |
454 | 27 | (void) fz_read_uint32_le(ctx, file); /* csize */ |
455 | 27 | (void) fz_read_uint32_le(ctx, file); /* usize */ |
456 | 27 | namelength = fz_read_uint16_le(ctx, file); |
457 | 27 | extralength = fz_read_uint16_le(ctx, file); |
458 | | |
459 | 27 | fz_seek(ctx, file, namelength + extralength, 1); |
460 | | |
461 | 27 | return method; |
462 | 27 | } |
463 | | |
464 | | static void ensure_zip_entries(fz_context *ctx, fz_zip_archive *zip) |
465 | 395 | { |
466 | 395 | fz_stream *file = zip->super.file; |
467 | 395 | unsigned char buf[512]; |
468 | 395 | size_t size, back, maxback; |
469 | 395 | size_t i, n; |
470 | | |
471 | 395 | fz_seek(ctx, file, 0, SEEK_END); |
472 | 395 | size = fz_tell(ctx, file); |
473 | | |
474 | 395 | maxback = fz_minz(size, 0xFFFF + sizeof buf); |
475 | 395 | back = fz_minz(maxback, sizeof buf); |
476 | | |
477 | 3.61k | while (back <= maxback) |
478 | 3.35k | { |
479 | 3.35k | fz_seek(ctx, file, (int64_t)(size - back), 0); |
480 | 3.35k | n = fz_read(ctx, file, buf, sizeof buf); |
481 | 3.35k | if (n < 4) |
482 | 0 | break; |
483 | 1.62M | for (i = n - 4; i > 0; i--) |
484 | 1.62M | if (!memcmp(buf + i, "PK\5\6", 4)) |
485 | 131 | { |
486 | 131 | read_zip_dir_imp(ctx, zip, size - back + i); |
487 | 131 | return; |
488 | 131 | } |
489 | 3.22k | back += sizeof buf - 4; |
490 | 3.22k | } |
491 | | |
492 | 264 | fz_throw(ctx, FZ_ERROR_FORMAT, "cannot find end of central directory"); |
493 | 395 | } |
494 | | |
495 | | static zip_entry *lookup_zip_entry(fz_context *ctx, fz_zip_archive *zip, const char *name) |
496 | 103 | { |
497 | 103 | int i; |
498 | 103 | if (name[0] == '/') |
499 | 33 | ++name; |
500 | 1.32k | for (i = 0; i < zip->count; i++) |
501 | 1.28k | if (!fz_strcasecmp(name, zip->entries[i].name)) |
502 | 58 | return &zip->entries[i]; |
503 | 45 | return NULL; |
504 | 103 | } |
505 | | |
506 | | static fz_stream *open_zip_entry(fz_context *ctx, fz_archive *arch, const char *name) |
507 | 0 | { |
508 | 0 | fz_zip_archive *zip = (fz_zip_archive *) arch; |
509 | 0 | fz_stream *file = zip->super.file; |
510 | 0 | int method; |
511 | 0 | zip_entry *ent; |
512 | |
|
513 | 0 | ent = lookup_zip_entry(ctx, zip, name); |
514 | 0 | if (!ent) |
515 | 0 | return NULL; |
516 | | |
517 | 0 | method = read_zip_entry_header(ctx, zip, ent); |
518 | 0 | if (method == 0) |
519 | 0 | return fz_open_null_filter(ctx, file, ent->usize, fz_tell(ctx, file)); |
520 | 0 | if (method == 8) |
521 | 0 | return fz_open_flated(ctx, file, -15); |
522 | 0 | fz_throw(ctx, FZ_ERROR_FORMAT, "unknown zip method: %d", method); |
523 | 0 | } |
524 | | |
525 | | static fz_buffer *read_zip_entry(fz_context *ctx, fz_archive *arch, const char *name) |
526 | 29 | { |
527 | 29 | fz_zip_archive *zip = (fz_zip_archive *) arch; |
528 | 29 | fz_stream *file = zip->super.file; |
529 | 29 | fz_buffer *ubuf; |
530 | 29 | unsigned char *cbuf = NULL; |
531 | 29 | int method; |
532 | 29 | z_stream z; |
533 | 29 | int code; |
534 | 29 | uint64_t len; |
535 | 29 | zip_entry *ent; |
536 | | |
537 | 29 | fz_var(cbuf); |
538 | | |
539 | 29 | ent = lookup_zip_entry(ctx, zip, name); |
540 | 29 | if (!ent) |
541 | 0 | return NULL; |
542 | | |
543 | 29 | method = read_zip_entry_header(ctx, zip, ent); |
544 | 29 | ubuf = fz_new_buffer(ctx, ent->usize + 1); /* +1 because many callers will add a terminating zero */ |
545 | | |
546 | 29 | if (method == 0) |
547 | 4 | { |
548 | 8 | fz_try(ctx) |
549 | 8 | { |
550 | 4 | ubuf->len = fz_read(ctx, file, ubuf->data, ent->usize); |
551 | 4 | if (ubuf->len < (size_t)ent->usize) |
552 | 0 | fz_warn(ctx, "premature end of data in stored zip archive entry"); |
553 | 4 | } |
554 | 8 | fz_catch(ctx) |
555 | 0 | { |
556 | 0 | fz_drop_buffer(ctx, ubuf); |
557 | 0 | fz_rethrow(ctx); |
558 | 0 | } |
559 | 4 | return ubuf; |
560 | 4 | } |
561 | 25 | else if (method == 8) |
562 | 23 | { |
563 | 46 | fz_try(ctx) |
564 | 46 | { |
565 | 23 | cbuf = fz_malloc(ctx, ent->csize); |
566 | | |
567 | 23 | z.zalloc = fz_zlib_alloc; |
568 | 23 | z.zfree = fz_zlib_free; |
569 | 23 | z.opaque = ctx; |
570 | 23 | z.next_out = ubuf->data; |
571 | 23 | z.avail_out = ent->usize; |
572 | 23 | z.next_in = cbuf; |
573 | 23 | z.avail_in = fz_read(ctx, file, cbuf, ent->csize); |
574 | 23 | if (z.avail_in < ent->csize) |
575 | 2 | fz_warn(ctx, "premature end of compressed data for compressed archive entry"); |
576 | | |
577 | 23 | code = inflateInit2(&z, -15); |
578 | 23 | if (code != Z_OK) |
579 | 0 | { |
580 | 0 | fz_throw(ctx, FZ_ERROR_LIBRARY, "zlib inflateInit2 error: %s", z.msg); |
581 | 0 | } |
582 | 23 | code = inflate(&z, Z_FINISH); |
583 | 23 | if (code != Z_STREAM_END) |
584 | 3 | { |
585 | 3 | inflateEnd(&z); |
586 | 3 | fz_throw(ctx, FZ_ERROR_LIBRARY, "zlib inflate error: %s", z.msg); |
587 | 3 | } |
588 | 20 | code = inflateEnd(&z); |
589 | 20 | if (code != Z_OK) |
590 | 0 | { |
591 | 0 | fz_throw(ctx, FZ_ERROR_LIBRARY, "zlib inflateEnd error: %s", z.msg); |
592 | 0 | } |
593 | | |
594 | 20 | len = ent->usize - z.avail_out; |
595 | 20 | if (len < ent->usize) |
596 | 0 | fz_warn(ctx, "premature end of data in compressed archive entry"); |
597 | 20 | ubuf->len = len; |
598 | 20 | } |
599 | 46 | fz_always(ctx) |
600 | 23 | { |
601 | 23 | fz_free(ctx, cbuf); |
602 | 23 | } |
603 | 23 | fz_catch(ctx) |
604 | 3 | { |
605 | 3 | fz_drop_buffer(ctx, ubuf); |
606 | 3 | fz_rethrow(ctx); |
607 | 3 | } |
608 | 17 | return ubuf; |
609 | 20 | } |
610 | | |
611 | 2 | fz_drop_buffer(ctx, ubuf); |
612 | 2 | fz_throw(ctx, FZ_ERROR_FORMAT, "unknown zip method: %d", method); |
613 | 29 | } |
614 | | |
615 | | static int has_zip_entry(fz_context *ctx, fz_archive *arch, const char *name) |
616 | 74 | { |
617 | 74 | fz_zip_archive *zip = (fz_zip_archive *) arch; |
618 | 74 | zip_entry *ent = lookup_zip_entry(ctx, zip, name); |
619 | 74 | return ent != NULL; |
620 | 74 | } |
621 | | |
622 | | static const char *list_zip_entry(fz_context *ctx, fz_archive *arch, int idx) |
623 | 236 | { |
624 | 236 | fz_zip_archive *zip = (fz_zip_archive *) arch; |
625 | 236 | if (idx < 0 || idx >= zip->count) |
626 | 0 | return NULL; |
627 | 236 | return zip->entries[idx].name; |
628 | 236 | } |
629 | | |
630 | | static int count_zip_entries(fz_context *ctx, fz_archive *arch) |
631 | 14 | { |
632 | 14 | fz_zip_archive *zip = (fz_zip_archive *) arch; |
633 | 14 | return zip->count; |
634 | 14 | } |
635 | | |
636 | | int |
637 | | fz_is_zip_archive(fz_context *ctx, fz_stream *file) |
638 | 54.6k | { |
639 | 54.6k | const unsigned char signature[4] = { 'P', 'K', 0x03, 0x04 }; |
640 | 54.6k | unsigned char data[4]; |
641 | 54.6k | size_t n; |
642 | | |
643 | 54.6k | if (file == NULL) |
644 | 0 | return 0; |
645 | | |
646 | 54.6k | fz_seek(ctx, file, 0, 0); |
647 | 54.6k | n = fz_read(ctx, file, data, nelem(data)); |
648 | 54.6k | if (n != nelem(signature)) |
649 | 112 | return 0; |
650 | 54.5k | if (memcmp(data, signature, nelem(signature))) |
651 | 53.7k | return 0; |
652 | | |
653 | 790 | return 1; |
654 | 54.5k | } |
655 | | |
656 | | fz_archive * |
657 | | fz_open_zip_archive_with_stream(fz_context *ctx, fz_stream *file) |
658 | 395 | { |
659 | 395 | fz_zip_archive *zip; |
660 | | |
661 | 395 | if (!fz_is_zip_archive(ctx, file)) |
662 | 0 | fz_throw(ctx, FZ_ERROR_FORMAT, "cannot recognize zip archive"); |
663 | | |
664 | 395 | zip = fz_new_derived_archive(ctx, file, fz_zip_archive); |
665 | 395 | zip->super.format = "zip"; |
666 | 395 | zip->super.count_entries = count_zip_entries; |
667 | 395 | zip->super.list_entry = list_zip_entry; |
668 | 395 | zip->super.has_entry = has_zip_entry; |
669 | 395 | zip->super.read_entry = read_zip_entry; |
670 | 395 | zip->super.open_entry = open_zip_entry; |
671 | 395 | zip->super.drop_archive = drop_zip_archive; |
672 | | |
673 | 790 | fz_try(ctx) |
674 | 790 | { |
675 | 395 | ensure_zip_entries(ctx, zip); |
676 | 395 | } |
677 | 790 | fz_catch(ctx) |
678 | 336 | { |
679 | 336 | fz_drop_archive(ctx, &zip->super); |
680 | 336 | fz_rethrow(ctx); |
681 | 336 | } |
682 | | |
683 | 59 | return &zip->super; |
684 | 395 | } |
685 | | |
686 | | fz_archive * |
687 | | fz_open_zip_archive(fz_context *ctx, const char *filename) |
688 | 0 | { |
689 | 0 | fz_archive *zip = NULL; |
690 | 0 | fz_stream *file; |
691 | |
|
692 | 0 | file = fz_open_file(ctx, filename); |
693 | |
|
694 | 0 | fz_var(zip); |
695 | |
|
696 | 0 | fz_try(ctx) |
697 | 0 | zip = fz_open_zip_archive_with_stream(ctx, file); |
698 | 0 | fz_always(ctx) |
699 | 0 | fz_drop_stream(ctx, file); |
700 | 0 | fz_catch(ctx) |
701 | 0 | fz_rethrow(ctx); |
702 | | |
703 | 0 | return zip; |
704 | 0 | } |