/src/zstd/contrib/seekable_format/zstdseek_decompress.c
Line | Count | Source (jump to first uncovered line) |
1 | | /* |
2 | | * Copyright (c) Meta Platforms, Inc. and affiliates. |
3 | | * All rights reserved. |
4 | | * |
5 | | * This source code is licensed under both the BSD-style license (found in the |
6 | | * LICENSE file in the root directory of this source tree) and the GPLv2 (found |
7 | | * in the COPYING file in the root directory of this source tree). |
8 | | * You may select, at your option, one of the above-listed licenses. |
9 | | */ |
10 | | |
11 | | /* ********************************************************* |
12 | | * Turn on Large Files support (>4GB) for 32-bit Linux/Unix |
13 | | ***********************************************************/ |
14 | | #if !defined(__64BIT__) || defined(__MINGW32__) /* No point defining Large file for 64 bit but MinGW-w64 requires it */ |
15 | | # if !defined(_FILE_OFFSET_BITS) |
16 | | # define _FILE_OFFSET_BITS 64 /* turn off_t into a 64-bit type for ftello, fseeko */ |
17 | | # endif |
18 | | # if !defined(_LARGEFILE_SOURCE) /* obsolete macro, replaced with _FILE_OFFSET_BITS */ |
19 | | # define _LARGEFILE_SOURCE 1 /* Large File Support extension (LFS) - fseeko, ftello */ |
20 | | # endif |
21 | | # if defined(_AIX) || defined(__hpux) |
22 | | # define _LARGE_FILES /* Large file support on 32-bits AIX and HP-UX */ |
23 | | # endif |
24 | | #endif |
25 | | |
26 | | /* ************************************************************ |
27 | | * Detect POSIX version |
28 | | * PLATFORM_POSIX_VERSION = 0 for non-Unix e.g. Windows |
29 | | * PLATFORM_POSIX_VERSION = 1 for Unix-like but non-POSIX |
30 | | * PLATFORM_POSIX_VERSION > 1 is equal to found _POSIX_VERSION |
31 | | * Value of PLATFORM_POSIX_VERSION can be forced on command line |
32 | | ***************************************************************/ |
33 | | #ifndef PLATFORM_POSIX_VERSION |
34 | | |
35 | | # if (defined(__APPLE__) && defined(__MACH__)) || defined(__SVR4) || defined(_AIX) || defined(__hpux) /* POSIX.1-2001 (SUSv3) conformant */ \ |
36 | | || defined(__DragonFly__) || defined(__FreeBSD__) || defined(__NetBSD__) || defined(__OpenBSD__) /* BSD distros */ |
37 | | /* exception rule : force posix version to 200112L, |
38 | | * note: it's better to use unistd.h's _POSIX_VERSION whenever possible */ |
39 | | # define PLATFORM_POSIX_VERSION 200112L |
40 | | |
41 | | /* try to determine posix version through official unistd.h's _POSIX_VERSION (https://pubs.opengroup.org/onlinepubs/7908799/xsh/unistd.h.html). |
42 | | * note : there is no simple way to know in advance if <unistd.h> is present or not on target system, |
43 | | * Posix specification mandates its presence and its content, but target system must respect this spec. |
44 | | * It's necessary to _not_ #include <unistd.h> whenever target OS is not unix-like |
45 | | * otherwise it will block preprocessing stage. |
46 | | * The following list of build macros tries to "guess" if target OS is likely unix-like, and therefore can #include <unistd.h> |
47 | | */ |
48 | | # elif !defined(_WIN32) \ |
49 | | && ( defined(__unix__) || defined(__unix) \ |
50 | | || defined(__midipix__) || defined(__VMS) || defined(__HAIKU__) ) |
51 | | |
52 | | # if defined(__linux__) || defined(__linux) || defined(__CYGWIN__) |
53 | | # ifndef _POSIX_C_SOURCE |
54 | | # define _POSIX_C_SOURCE 200809L /* feature test macro : https://www.gnu.org/software/libc/manual/html_node/Feature-Test-Macros.html */ |
55 | | # endif |
56 | | # endif |
57 | | # include <unistd.h> /* declares _POSIX_VERSION */ |
58 | | # if defined(_POSIX_VERSION) /* POSIX compliant */ |
59 | | # define PLATFORM_POSIX_VERSION _POSIX_VERSION |
60 | | # else |
61 | | # define PLATFORM_POSIX_VERSION 1 |
62 | | # endif |
63 | | |
64 | | # ifdef __UCLIBC__ |
65 | | # ifndef __USE_MISC |
66 | | # define __USE_MISC /* enable st_mtim on uclibc */ |
67 | | # endif |
68 | | # endif |
69 | | |
70 | | # else /* non-unix target platform (like Windows) */ |
71 | | # define PLATFORM_POSIX_VERSION 0 |
72 | | # endif |
73 | | |
74 | | #endif /* PLATFORM_POSIX_VERSION */ |
75 | | |
76 | | |
77 | | /* ************************************************************ |
78 | | * Avoid fseek()'s 2GiB barrier with MSVC, macOS, *BSD, MinGW |
79 | | ***************************************************************/ |
80 | | #if defined(_MSC_VER) && _MSC_VER >= 1400 |
81 | | # define LONG_SEEK _fseeki64 |
82 | | #elif !defined(__64BIT__) && (PLATFORM_POSIX_VERSION >= 200112L) /* No point defining Large file for 64 bit */ |
83 | 0 | # define LONG_SEEK fseeko |
84 | | #elif defined(__MINGW32__) && !defined(__STRICT_ANSI__) && !defined(__NO_MINGW_LFS) && defined(__MSVCRT__) |
85 | | # define LONG_SEEK fseeko64 |
86 | | #elif defined(_WIN32) && !defined(__DJGPP__) |
87 | | # include <windows.h> |
88 | | static int LONG_SEEK(FILE* file, __int64 offset, int origin) { |
89 | | LARGE_INTEGER off; |
90 | | DWORD method; |
91 | | off.QuadPart = offset; |
92 | | if (origin == SEEK_END) |
93 | | method = FILE_END; |
94 | | else if (origin == SEEK_CUR) |
95 | | method = FILE_CURRENT; |
96 | | else |
97 | | method = FILE_BEGIN; |
98 | | |
99 | | if (SetFilePointerEx((HANDLE) _get_osfhandle(_fileno(file)), off, NULL, method)) |
100 | | return 0; |
101 | | else |
102 | | return -1; |
103 | | } |
104 | | #else |
105 | | # define LONG_SEEK fseek |
106 | | #endif |
107 | | |
108 | | #include <stdlib.h> /* malloc, free */ |
109 | | #include <stdio.h> /* FILE* */ |
110 | | #include <limits.h> /* UNIT_MAX */ |
111 | | #include <assert.h> |
112 | | |
113 | | #define XXH_STATIC_LINKING_ONLY |
114 | | #include "xxhash.h" |
115 | | |
116 | | #define ZSTD_STATIC_LINKING_ONLY |
117 | | #include "zstd.h" |
118 | | #include "zstd_errors.h" |
119 | | #include "mem.h" |
120 | | #include "zstd_seekable.h" |
121 | | |
122 | | #undef ERROR |
123 | 0 | #define ERROR(name) ((size_t)-ZSTD_error_##name) |
124 | | |
125 | 70.9k | #define CHECK_IO(f) { int const errcod = (f); if (errcod < 0) return ERROR(seekableIO); } |
126 | | |
127 | | #undef MIN |
128 | | #undef MAX |
129 | 40.0k | #define MIN(a, b) ((a) < (b) ? (a) : (b)) |
130 | | #define MAX(a, b) ((a) > (b) ? (a) : (b)) |
131 | | |
132 | 9.54k | #define ZSTD_SEEKABLE_NO_OUTPUT_PROGRESS_MAX 16 |
133 | | |
134 | | /* Special-case callbacks for FILE* and in-memory modes, so that we can treat |
135 | | * them the same way as the advanced API */ |
136 | | static int ZSTD_seekable_read_FILE(void* opaque, void* buffer, size_t n) |
137 | 0 | { |
138 | 0 | size_t const result = fread(buffer, 1, n, (FILE*)opaque); |
139 | 0 | if (result != n) { |
140 | 0 | return -1; |
141 | 0 | } |
142 | 0 | return 0; |
143 | 0 | } |
144 | | |
145 | | static int ZSTD_seekable_seek_FILE(void* opaque, long long offset, int origin) |
146 | 0 | { |
147 | 0 | int const ret = LONG_SEEK((FILE*)opaque, offset, origin); |
148 | 0 | if (ret) return ret; |
149 | 0 | return fflush((FILE*)opaque); |
150 | 0 | } |
151 | | |
152 | | typedef struct { |
153 | | const void *ptr; |
154 | | size_t size; |
155 | | size_t pos; |
156 | | } buffWrapper_t; |
157 | | |
158 | | static int ZSTD_seekable_read_buff(void* opaque, void* buffer, size_t n) |
159 | 39.0k | { |
160 | 39.0k | buffWrapper_t* const buff = (buffWrapper_t*)opaque; |
161 | 39.0k | assert(buff != NULL); |
162 | 39.0k | if (buff->pos + n > buff->size) return -1; |
163 | 39.0k | memcpy(buffer, (const BYTE*)buff->ptr + buff->pos, n); |
164 | 39.0k | buff->pos += n; |
165 | 39.0k | return 0; |
166 | 39.0k | } |
167 | | |
168 | | static int ZSTD_seekable_seek_buff(void* opaque, long long offset, int origin) |
169 | 31.8k | { |
170 | 31.8k | buffWrapper_t* const buff = (buffWrapper_t*) opaque; |
171 | 31.8k | unsigned long long newOffset; |
172 | 31.8k | assert(buff != NULL); |
173 | 31.8k | switch (origin) { |
174 | 10.6k | case SEEK_SET: |
175 | 10.6k | assert(offset >= 0); |
176 | 10.6k | newOffset = (unsigned long long)offset; |
177 | 10.6k | break; |
178 | 0 | case SEEK_CUR: |
179 | 0 | newOffset = (unsigned long long)((long long)buff->pos + offset); |
180 | 0 | break; |
181 | 21.2k | case SEEK_END: |
182 | 21.2k | newOffset = (unsigned long long)((long long)buff->size + offset); |
183 | 21.2k | break; |
184 | 0 | default: |
185 | 0 | assert(0); /* not possible */ |
186 | 31.8k | } |
187 | 31.8k | if (newOffset > buff->size) { |
188 | 0 | return -1; |
189 | 0 | } |
190 | 31.8k | buff->pos = newOffset; |
191 | 31.8k | return 0; |
192 | 31.8k | } |
193 | | |
194 | | typedef struct { |
195 | | U64 cOffset; |
196 | | U64 dOffset; |
197 | | U32 checksum; |
198 | | } seekEntry_t; |
199 | | |
200 | | struct ZSTD_seekTable_s { |
201 | | seekEntry_t* entries; |
202 | | size_t tableLen; |
203 | | |
204 | | int checksumFlag; |
205 | | }; |
206 | | |
207 | 21.1k | #define SEEKABLE_BUFF_SIZE ZSTD_BLOCKSIZE_MAX |
208 | | |
209 | | struct ZSTD_seekable_s { |
210 | | ZSTD_DStream* dstream; |
211 | | ZSTD_seekTable seekTable; |
212 | | ZSTD_seekable_customFile src; |
213 | | |
214 | | U64 decompressedOffset; |
215 | | U32 curFrame; |
216 | | |
217 | | BYTE inBuff[SEEKABLE_BUFF_SIZE]; /* need to do our own input buffering */ |
218 | | BYTE outBuff[SEEKABLE_BUFF_SIZE]; /* so we can efficiently decompress the |
219 | | starts of chunks before we get to the |
220 | | desired section */ |
221 | | ZSTD_inBuffer in; /* maintain continuity across ZSTD_seekable_decompress operations */ |
222 | | buffWrapper_t buffWrapper; /* for `src.opaque` in in-memory mode */ |
223 | | |
224 | | XXH64_state_t xxhState; |
225 | | }; |
226 | | |
227 | | ZSTD_seekable* ZSTD_seekable_create(void) |
228 | 10.6k | { |
229 | 10.6k | ZSTD_seekable* const zs = (ZSTD_seekable*)malloc(sizeof(ZSTD_seekable)); |
230 | 10.6k | if (zs == NULL) return NULL; |
231 | | |
232 | | /* also initializes stage to zsds_init */ |
233 | 10.6k | memset(zs, 0, sizeof(*zs)); |
234 | | |
235 | 10.6k | zs->dstream = ZSTD_createDStream(); |
236 | 10.6k | if (zs->dstream == NULL) { |
237 | 0 | free(zs); |
238 | 0 | return NULL; |
239 | 0 | } |
240 | | |
241 | 10.6k | return zs; |
242 | 10.6k | } |
243 | | |
244 | | size_t ZSTD_seekable_free(ZSTD_seekable* zs) |
245 | 10.6k | { |
246 | 10.6k | if (zs == NULL) return 0; /* support free on null */ |
247 | 10.6k | ZSTD_freeDStream(zs->dstream); |
248 | 10.6k | free(zs->seekTable.entries); |
249 | 10.6k | free(zs); |
250 | 10.6k | return 0; |
251 | 10.6k | } |
252 | | |
253 | | ZSTD_seekTable* ZSTD_seekTable_create_fromSeekable(const ZSTD_seekable* zs) |
254 | 0 | { |
255 | 0 | ZSTD_seekTable* const st = (ZSTD_seekTable*)malloc(sizeof(ZSTD_seekTable)); |
256 | 0 | if (st==NULL) return NULL; |
257 | | |
258 | 0 | st->checksumFlag = zs->seekTable.checksumFlag; |
259 | 0 | st->tableLen = zs->seekTable.tableLen; |
260 | | |
261 | | /* Allocate an extra entry at the end to match logic of initial allocation */ |
262 | 0 | size_t const entriesSize = sizeof(seekEntry_t) * (zs->seekTable.tableLen + 1); |
263 | 0 | seekEntry_t* const entries = (seekEntry_t*)malloc(entriesSize); |
264 | 0 | if (entries==NULL) { |
265 | 0 | free(st); |
266 | 0 | return NULL; |
267 | 0 | } |
268 | | |
269 | 0 | memcpy(entries, zs->seekTable.entries, entriesSize); |
270 | 0 | st->entries = entries; |
271 | 0 | return st; |
272 | 0 | } |
273 | | |
274 | | size_t ZSTD_seekTable_free(ZSTD_seekTable* st) |
275 | 0 | { |
276 | 0 | if (st == NULL) return 0; /* support free on null */ |
277 | 0 | free(st->entries); |
278 | 0 | free(st); |
279 | 0 | return 0; |
280 | 0 | } |
281 | | |
282 | | /** ZSTD_seekable_offsetToFrameIndex() : |
283 | | * Performs a binary search to find the last frame with a decompressed offset |
284 | | * <= pos |
285 | | * @return : the frame's index */ |
286 | | unsigned ZSTD_seekable_offsetToFrameIndex(const ZSTD_seekable* zs, unsigned long long pos) |
287 | 10.6k | { |
288 | 10.6k | return ZSTD_seekTable_offsetToFrameIndex(&zs->seekTable, pos); |
289 | 10.6k | } |
290 | | |
291 | | unsigned ZSTD_seekTable_offsetToFrameIndex(const ZSTD_seekTable* st, unsigned long long pos) |
292 | 10.6k | { |
293 | 10.6k | U32 lo = 0; |
294 | 10.6k | U32 hi = (U32)st->tableLen; |
295 | 10.6k | assert(st->tableLen <= UINT_MAX); |
296 | | |
297 | 10.6k | if (pos >= st->entries[st->tableLen].dOffset) { |
298 | 51 | return (unsigned)st->tableLen; |
299 | 51 | } |
300 | | |
301 | 21.1k | while (lo + 1 < hi) { |
302 | 10.5k | U32 const mid = lo + ((hi - lo) >> 1); |
303 | 10.5k | if (st->entries[mid].dOffset <= pos) { |
304 | 0 | lo = mid; |
305 | 10.5k | } else { |
306 | 10.5k | hi = mid; |
307 | 10.5k | } |
308 | 10.5k | } |
309 | 10.5k | return lo; |
310 | 10.6k | } |
311 | | |
312 | | unsigned ZSTD_seekable_getNumFrames(const ZSTD_seekable* zs) |
313 | 0 | { |
314 | 0 | return ZSTD_seekTable_getNumFrames(&zs->seekTable); |
315 | 0 | } |
316 | | |
317 | | unsigned ZSTD_seekTable_getNumFrames(const ZSTD_seekTable* st) |
318 | 0 | { |
319 | 0 | assert(st->tableLen <= UINT_MAX); |
320 | 0 | return (unsigned)st->tableLen; |
321 | 0 | } |
322 | | |
323 | | unsigned long long ZSTD_seekable_getFrameCompressedOffset(const ZSTD_seekable* zs, unsigned frameIndex) |
324 | 0 | { |
325 | 0 | return ZSTD_seekTable_getFrameCompressedOffset(&zs->seekTable, frameIndex); |
326 | 0 | } |
327 | | |
328 | | unsigned long long ZSTD_seekTable_getFrameCompressedOffset(const ZSTD_seekTable* st, unsigned frameIndex) |
329 | 0 | { |
330 | 0 | if (frameIndex >= st->tableLen) return ZSTD_SEEKABLE_FRAMEINDEX_TOOLARGE; |
331 | 0 | return st->entries[frameIndex].cOffset; |
332 | 0 | } |
333 | | |
334 | | unsigned long long ZSTD_seekable_getFrameDecompressedOffset(const ZSTD_seekable* zs, unsigned frameIndex) |
335 | 0 | { |
336 | 0 | return ZSTD_seekTable_getFrameDecompressedOffset(&zs->seekTable, frameIndex); |
337 | 0 | } |
338 | | |
339 | | unsigned long long ZSTD_seekTable_getFrameDecompressedOffset(const ZSTD_seekTable* st, unsigned frameIndex) |
340 | 0 | { |
341 | 0 | if (frameIndex >= st->tableLen) return ZSTD_SEEKABLE_FRAMEINDEX_TOOLARGE; |
342 | 0 | return st->entries[frameIndex].dOffset; |
343 | 0 | } |
344 | | |
345 | | size_t ZSTD_seekable_getFrameCompressedSize(const ZSTD_seekable* zs, unsigned frameIndex) |
346 | 0 | { |
347 | 0 | return ZSTD_seekTable_getFrameCompressedSize(&zs->seekTable, frameIndex); |
348 | 0 | } |
349 | | |
350 | | size_t ZSTD_seekTable_getFrameCompressedSize(const ZSTD_seekTable* st, unsigned frameIndex) |
351 | 0 | { |
352 | 0 | if (frameIndex >= st->tableLen) return ERROR(frameIndex_tooLarge); |
353 | 0 | return st->entries[frameIndex + 1].cOffset - |
354 | 0 | st->entries[frameIndex].cOffset; |
355 | 0 | } |
356 | | |
357 | | size_t ZSTD_seekable_getFrameDecompressedSize(const ZSTD_seekable* zs, unsigned frameIndex) |
358 | 0 | { |
359 | 0 | return ZSTD_seekTable_getFrameDecompressedSize(&zs->seekTable, frameIndex); |
360 | 0 | } |
361 | | |
362 | | size_t ZSTD_seekTable_getFrameDecompressedSize(const ZSTD_seekTable* st, unsigned frameIndex) |
363 | 0 | { |
364 | 0 | if (frameIndex > st->tableLen) return ERROR(frameIndex_tooLarge); |
365 | 0 | return st->entries[frameIndex + 1].dOffset - |
366 | 0 | st->entries[frameIndex].dOffset; |
367 | 0 | } |
368 | | |
369 | | static size_t ZSTD_seekable_loadSeekTable(ZSTD_seekable* zs) |
370 | 10.6k | { |
371 | 10.6k | int checksumFlag; |
372 | 10.6k | ZSTD_seekable_customFile src = zs->src; |
373 | | /* read the footer, fixed size */ |
374 | 10.6k | CHECK_IO(src.seek(src.opaque, -(int)ZSTD_seekTableFooterSize, SEEK_END)); |
375 | 10.6k | CHECK_IO(src.read(src.opaque, zs->inBuff, ZSTD_seekTableFooterSize)); |
376 | | |
377 | 10.6k | if (MEM_readLE32(zs->inBuff + 5) != ZSTD_SEEKABLE_MAGICNUMBER) { |
378 | 0 | return ERROR(prefix_unknown); |
379 | 0 | } |
380 | | |
381 | 10.6k | { BYTE const sfd = zs->inBuff[4]; |
382 | 10.6k | checksumFlag = sfd >> 7; |
383 | | |
384 | | /* check reserved bits */ |
385 | 10.6k | if ((sfd >> 2) & 0x1f) { |
386 | 0 | return ERROR(corruption_detected); |
387 | 0 | } } |
388 | | |
389 | 10.6k | { U32 const numFrames = MEM_readLE32(zs->inBuff); |
390 | 10.6k | U32 const sizePerEntry = 8 + (checksumFlag?4:0); |
391 | 10.6k | U32 const tableSize = sizePerEntry * numFrames; |
392 | 10.6k | U32 const frameSize = tableSize + ZSTD_seekTableFooterSize + ZSTD_SKIPPABLEHEADERSIZE; |
393 | | |
394 | 10.6k | U32 remaining = frameSize - ZSTD_seekTableFooterSize; /* don't need to re-read footer */ |
395 | 10.6k | { U32 const toRead = MIN(remaining, SEEKABLE_BUFF_SIZE); |
396 | 10.6k | CHECK_IO(src.seek(src.opaque, -(S64)frameSize, SEEK_END)); |
397 | 10.6k | CHECK_IO(src.read(src.opaque, zs->inBuff, toRead)); |
398 | 10.6k | remaining -= toRead; |
399 | 10.6k | } |
400 | | |
401 | 10.6k | if (MEM_readLE32(zs->inBuff) != (ZSTD_MAGIC_SKIPPABLE_START | 0xE)) { |
402 | 0 | return ERROR(prefix_unknown); |
403 | 0 | } |
404 | 10.6k | if (MEM_readLE32(zs->inBuff+4) + ZSTD_SKIPPABLEHEADERSIZE != frameSize) { |
405 | 0 | return ERROR(prefix_unknown); |
406 | 0 | } |
407 | | |
408 | 10.6k | { /* Allocate an extra entry at the end so that we can do size |
409 | | * computations on the last element without special case */ |
410 | 10.6k | seekEntry_t* const entries = (seekEntry_t*)malloc(sizeof(seekEntry_t) * (numFrames + 1)); |
411 | | |
412 | 10.6k | U32 idx = 0; |
413 | 10.6k | U32 pos = 8; |
414 | | |
415 | 10.6k | U64 cOffset = 0; |
416 | 10.6k | U64 dOffset = 0; |
417 | | |
418 | 10.6k | if (entries == NULL) return ERROR(memory_allocation); |
419 | | |
420 | | /* compute cumulative positions */ |
421 | 31.8k | for (; idx < numFrames; idx++) { |
422 | 21.1k | if (pos + sizePerEntry > SEEKABLE_BUFF_SIZE) { |
423 | 0 | U32 const offset = SEEKABLE_BUFF_SIZE - pos; |
424 | 0 | U32 const toRead = MIN(remaining, SEEKABLE_BUFF_SIZE - offset); |
425 | 0 | memmove(zs->inBuff, zs->inBuff + pos, offset); /* move any data we haven't read yet */ |
426 | 0 | CHECK_IO(src.read(src.opaque, zs->inBuff+offset, toRead)); |
427 | 0 | remaining -= toRead; |
428 | 0 | pos = 0; |
429 | 0 | } |
430 | 21.1k | entries[idx].cOffset = cOffset; |
431 | 21.1k | entries[idx].dOffset = dOffset; |
432 | | |
433 | 21.1k | cOffset += MEM_readLE32(zs->inBuff + pos); |
434 | 21.1k | pos += 4; |
435 | 21.1k | dOffset += MEM_readLE32(zs->inBuff + pos); |
436 | 21.1k | pos += 4; |
437 | 21.1k | if (checksumFlag) { |
438 | 4.53k | entries[idx].checksum = MEM_readLE32(zs->inBuff + pos); |
439 | 4.53k | pos += 4; |
440 | 4.53k | } |
441 | 21.1k | } |
442 | 10.6k | entries[numFrames].cOffset = cOffset; |
443 | 10.6k | entries[numFrames].dOffset = dOffset; |
444 | | |
445 | 10.6k | zs->seekTable.entries = entries; |
446 | 10.6k | zs->seekTable.tableLen = numFrames; |
447 | 10.6k | zs->seekTable.checksumFlag = checksumFlag; |
448 | 10.6k | return 0; |
449 | 10.6k | } |
450 | 10.6k | } |
451 | 10.6k | } |
452 | | |
453 | | size_t ZSTD_seekable_initBuff(ZSTD_seekable* zs, const void* src, size_t srcSize) |
454 | 10.6k | { |
455 | 10.6k | zs->buffWrapper = (buffWrapper_t){src, srcSize, 0}; |
456 | 10.6k | { ZSTD_seekable_customFile srcFile = {&zs->buffWrapper, |
457 | 10.6k | &ZSTD_seekable_read_buff, |
458 | 10.6k | &ZSTD_seekable_seek_buff}; |
459 | 10.6k | return ZSTD_seekable_initAdvanced(zs, srcFile); } |
460 | 10.6k | } |
461 | | |
462 | | size_t ZSTD_seekable_initFile(ZSTD_seekable* zs, FILE* src) |
463 | 0 | { |
464 | 0 | ZSTD_seekable_customFile srcFile = {src, &ZSTD_seekable_read_FILE, |
465 | 0 | &ZSTD_seekable_seek_FILE}; |
466 | 0 | return ZSTD_seekable_initAdvanced(zs, srcFile); |
467 | 0 | } |
468 | | |
469 | | size_t ZSTD_seekable_initAdvanced(ZSTD_seekable* zs, ZSTD_seekable_customFile src) |
470 | 10.6k | { |
471 | 10.6k | zs->src = src; |
472 | | |
473 | 10.6k | { const size_t seekTableInit = ZSTD_seekable_loadSeekTable(zs); |
474 | 10.6k | if (ZSTD_isError(seekTableInit)) return seekTableInit; } |
475 | | |
476 | 10.6k | zs->decompressedOffset = (U64)-1; |
477 | 10.6k | zs->curFrame = (U32)-1; |
478 | | |
479 | 10.6k | { const size_t dstreamInit = ZSTD_initDStream(zs->dstream); |
480 | 10.6k | if (ZSTD_isError(dstreamInit)) return dstreamInit; } |
481 | 10.6k | return 0; |
482 | 10.6k | } |
483 | | |
484 | | size_t ZSTD_seekable_decompress(ZSTD_seekable* zs, void* dst, size_t len, unsigned long long offset) |
485 | 10.6k | { |
486 | 10.6k | unsigned long long const eos = zs->seekTable.entries[zs->seekTable.tableLen].dOffset; |
487 | 10.6k | if (offset + len > eos) { |
488 | 0 | len = eos - offset; |
489 | 0 | } |
490 | | |
491 | 10.6k | U32 targetFrame = ZSTD_seekable_offsetToFrameIndex(zs, offset); |
492 | 10.6k | U32 noOutputProgressCount = 0; |
493 | 10.6k | size_t srcBytesRead = 0; |
494 | 10.6k | do { |
495 | | /* check if we can continue from a previous decompress job */ |
496 | 10.6k | if (targetFrame != zs->curFrame || offset < zs->decompressedOffset) { |
497 | 10.6k | zs->decompressedOffset = zs->seekTable.entries[targetFrame].dOffset; |
498 | 10.6k | zs->curFrame = targetFrame; |
499 | | |
500 | 10.6k | assert(zs->seekTable.entries[targetFrame].cOffset < LLONG_MAX); |
501 | 10.6k | CHECK_IO(zs->src.seek(zs->src.opaque, |
502 | 10.6k | (long long)zs->seekTable.entries[targetFrame].cOffset, |
503 | 10.6k | SEEK_SET)); |
504 | 10.6k | zs->in = (ZSTD_inBuffer){zs->inBuff, 0, 0}; |
505 | 10.6k | XXH64_reset(&zs->xxhState, 0); |
506 | 10.6k | ZSTD_DCtx_reset(zs->dstream, ZSTD_reset_session_only); |
507 | 10.6k | if (zs->buffWrapper.size && srcBytesRead > zs->buffWrapper.size) { |
508 | 0 | return ERROR(seekableIO); |
509 | 0 | } |
510 | 10.6k | } |
511 | | |
512 | 36.0k | while (zs->decompressedOffset < offset + len) { |
513 | 25.5k | size_t toRead; |
514 | 25.5k | ZSTD_outBuffer outTmp; |
515 | 25.5k | size_t prevOutPos; |
516 | 25.5k | size_t prevInPos; |
517 | 25.5k | size_t forwardProgress; |
518 | 25.5k | if (zs->decompressedOffset < offset) { |
519 | | /* dummy decompressions until we get to the target offset */ |
520 | 11.5k | outTmp = (ZSTD_outBuffer){zs->outBuff, (size_t) (MIN(SEEKABLE_BUFF_SIZE, offset - zs->decompressedOffset)), 0}; |
521 | 13.9k | } else { |
522 | 13.9k | outTmp = (ZSTD_outBuffer){dst, len, (size_t) (zs->decompressedOffset - offset)}; |
523 | 13.9k | } |
524 | | |
525 | 25.5k | prevOutPos = outTmp.pos; |
526 | 25.5k | prevInPos = zs->in.pos; |
527 | 25.5k | toRead = ZSTD_decompressStream(zs->dstream, &outTmp, &zs->in); |
528 | 25.5k | if (ZSTD_isError(toRead)) { |
529 | 0 | return toRead; |
530 | 0 | } |
531 | | |
532 | 25.5k | if (zs->seekTable.checksumFlag) { |
533 | 12.5k | XXH64_update(&zs->xxhState, (BYTE*)outTmp.dst + prevOutPos, |
534 | 12.5k | outTmp.pos - prevOutPos); |
535 | 12.5k | } |
536 | 25.5k | forwardProgress = outTmp.pos - prevOutPos; |
537 | 25.5k | if (forwardProgress == 0) { |
538 | 9.54k | if (noOutputProgressCount++ > ZSTD_SEEKABLE_NO_OUTPUT_PROGRESS_MAX) { |
539 | 0 | return ERROR(seekableIO); |
540 | 0 | } |
541 | 15.9k | } else { |
542 | 15.9k | noOutputProgressCount = 0; |
543 | 15.9k | } |
544 | 25.5k | zs->decompressedOffset += forwardProgress; |
545 | 25.5k | srcBytesRead += zs->in.pos - prevInPos; |
546 | | |
547 | 25.5k | if (toRead == 0) { |
548 | | /* frame complete */ |
549 | | |
550 | | /* verify checksum */ |
551 | 90 | if (zs->seekTable.checksumFlag && |
552 | 90 | (XXH64_digest(&zs->xxhState) & 0xFFFFFFFFU) != |
553 | 48 | zs->seekTable.entries[targetFrame].checksum) { |
554 | 0 | return ERROR(corruption_detected); |
555 | 0 | } |
556 | | |
557 | 90 | if (zs->decompressedOffset < offset + len) { |
558 | | /* go back to the start and force a reset of the stream */ |
559 | 0 | targetFrame = ZSTD_seekable_offsetToFrameIndex(zs, zs->decompressedOffset); |
560 | | /* in this case it will fail later with corruption_detected, since last block does not have checksum */ |
561 | 0 | assert(targetFrame != zs->seekTable.tableLen); |
562 | 0 | } |
563 | 90 | break; |
564 | 90 | } |
565 | | |
566 | | /* read in more data if we're done with this buffer */ |
567 | 25.4k | if (zs->in.pos == zs->in.size) { |
568 | 17.8k | toRead = MIN(toRead, SEEKABLE_BUFF_SIZE); |
569 | 17.8k | CHECK_IO(zs->src.read(zs->src.opaque, zs->inBuff, toRead)); |
570 | 17.8k | zs->in.size = toRead; |
571 | 17.8k | zs->in.pos = 0; |
572 | 17.8k | } |
573 | 25.4k | } /* while (zs->decompressedOffset < offset + len) */ |
574 | 10.6k | } while (zs->decompressedOffset != offset + len); |
575 | | |
576 | 10.6k | return len; |
577 | 10.6k | } |
578 | | |
579 | | size_t ZSTD_seekable_decompressFrame(ZSTD_seekable* zs, void* dst, size_t dstSize, unsigned frameIndex) |
580 | 0 | { |
581 | 0 | if (frameIndex >= zs->seekTable.tableLen) { |
582 | 0 | return ERROR(frameIndex_tooLarge); |
583 | 0 | } |
584 | | |
585 | 0 | { size_t const decompressedSize = |
586 | 0 | zs->seekTable.entries[frameIndex + 1].dOffset - |
587 | 0 | zs->seekTable.entries[frameIndex].dOffset; |
588 | 0 | if (dstSize < decompressedSize) { |
589 | 0 | return ERROR(dstSize_tooSmall); |
590 | 0 | } |
591 | 0 | return ZSTD_seekable_decompress( |
592 | 0 | zs, dst, decompressedSize, |
593 | 0 | zs->seekTable.entries[frameIndex].dOffset); |
594 | 0 | } |
595 | 0 | } |