Coverage Report

Created: 2025-08-26 06:15

/src/zstd/contrib/seekable_format/zstdseek_compress.c
Line
Count
Source (jump to first uncovered line)
1
/*
2
 * Copyright (c) Meta Platforms, Inc. and affiliates.
3
 * All rights reserved.
4
 *
5
 * This source code is licensed under both the BSD-style license (found in the
6
 * LICENSE file in the root directory of this source tree) and the GPLv2 (found
7
 * in the COPYING file in the root directory of this source tree).
8
 */
9
10
#include <stdlib.h>     /* malloc, free */
11
#include <limits.h>     /* UINT_MAX */
12
#include <assert.h>
13
14
#define XXH_STATIC_LINKING_ONLY
15
#include "xxhash.h"
16
17
#define ZSTD_STATIC_LINKING_ONLY
18
#include "zstd.h"
19
#include "zstd_errors.h"
20
#include "mem.h"
21
22
#include "zstd_seekable.h"
23
24
108k
#define CHECK_Z(f) { size_t const ret = (f); if (ret != 0) return ret; }
25
26
#undef ERROR
27
0
#define ERROR(name) ((size_t)-ZSTD_error_##name)
28
29
#undef MIN
30
#undef MAX
31
121k
#define MIN(a, b) ((a) < (b) ? (a) : (b))
32
#define MAX(a, b) ((a) > (b) ? (a) : (b))
33
34
typedef struct {
35
    U32 cSize;
36
    U32 dSize;
37
    U32 checksum;
38
} framelogEntry_t;
39
40
struct ZSTD_frameLog_s {
41
    framelogEntry_t* entries;
42
    U32 size;
43
    U32 capacity;
44
45
    int checksumFlag;
46
47
    /* for use when streaming out the seek table */
48
    U32 seekTablePos;
49
    U32 seekTableIndex;
50
} framelog_t;
51
52
struct ZSTD_seekable_CStream_s {
53
    ZSTD_CStream* cstream;
54
    ZSTD_frameLog framelog;
55
56
    U32 frameCSize;
57
    U32 frameDSize;
58
59
    XXH64_state_t xxhState;
60
61
    U32 maxFrameSize;
62
63
    int writingSeekTable;
64
};
65
66
static size_t ZSTD_seekable_frameLog_allocVec(ZSTD_frameLog* fl)
67
12.7k
{
68
    /* allocate some initial space */
69
12.7k
    size_t const FRAMELOG_STARTING_CAPACITY = 16;
70
12.7k
    fl->entries = (framelogEntry_t*)malloc(
71
12.7k
            sizeof(framelogEntry_t) * FRAMELOG_STARTING_CAPACITY);
72
12.7k
    if (fl->entries == NULL) return ERROR(memory_allocation);
73
12.7k
    fl->capacity = (U32)FRAMELOG_STARTING_CAPACITY;
74
12.7k
    return 0;
75
12.7k
}
76
77
static size_t ZSTD_seekable_frameLog_freeVec(ZSTD_frameLog* fl)
78
12.7k
{
79
12.7k
    if (fl != NULL) free(fl->entries);
80
12.7k
    return 0;
81
12.7k
}
82
83
ZSTD_frameLog* ZSTD_seekable_createFrameLog(int checksumFlag)
84
0
{
85
0
    ZSTD_frameLog* const fl = (ZSTD_frameLog*)malloc(sizeof(ZSTD_frameLog));
86
0
    if (fl == NULL) return NULL;
87
88
0
    if (ZSTD_isError(ZSTD_seekable_frameLog_allocVec(fl))) {
89
0
        free(fl);
90
0
        return NULL;
91
0
    }
92
93
0
    fl->checksumFlag = checksumFlag;
94
0
    fl->seekTablePos = 0;
95
0
    fl->seekTableIndex = 0;
96
0
    fl->size = 0;
97
98
0
    return fl;
99
0
}
100
101
size_t ZSTD_seekable_freeFrameLog(ZSTD_frameLog* fl)
102
0
{
103
0
    ZSTD_seekable_frameLog_freeVec(fl);
104
0
    free(fl);
105
0
    return 0;
106
0
}
107
108
ZSTD_seekable_CStream* ZSTD_seekable_createCStream(void)
109
12.7k
{
110
12.7k
    ZSTD_seekable_CStream* const zcs = (ZSTD_seekable_CStream*)malloc(sizeof(ZSTD_seekable_CStream));
111
12.7k
    if (zcs == NULL) return NULL;
112
113
12.7k
    memset(zcs, 0, sizeof(*zcs));
114
115
12.7k
    zcs->cstream = ZSTD_createCStream();
116
12.7k
    if (zcs->cstream == NULL) goto failed1;
117
118
12.7k
    if (ZSTD_isError(ZSTD_seekable_frameLog_allocVec(&zcs->framelog))) goto failed2;
119
120
12.7k
    return zcs;
121
122
0
failed2:
123
0
    ZSTD_freeCStream(zcs->cstream);
124
0
failed1:
125
0
    free(zcs);
126
0
    return NULL;
127
0
}
128
129
size_t ZSTD_seekable_freeCStream(ZSTD_seekable_CStream* zcs)
130
12.7k
{
131
12.7k
    if (zcs == NULL) return 0; /* support free on null */
132
12.7k
    ZSTD_freeCStream(zcs->cstream);
133
12.7k
    ZSTD_seekable_frameLog_freeVec(&zcs->framelog);
134
12.7k
    free(zcs);
135
12.7k
    return 0;
136
12.7k
}
137
138
size_t ZSTD_seekable_initCStream(ZSTD_seekable_CStream* zcs,
139
                                 int compressionLevel,
140
                                 int checksumFlag,
141
                                 unsigned maxFrameSize)
142
12.7k
{
143
12.7k
    zcs->framelog.size = 0;
144
12.7k
    zcs->frameCSize = 0;
145
12.7k
    zcs->frameDSize = 0;
146
147
    /* make sure maxFrameSize has a reasonable value */
148
12.7k
    if (maxFrameSize > ZSTD_SEEKABLE_MAX_FRAME_DECOMPRESSED_SIZE) {
149
0
        return ERROR(frameParameter_unsupported);
150
0
    }
151
152
12.7k
    zcs->maxFrameSize = maxFrameSize ?
153
12.6k
                        maxFrameSize : ZSTD_SEEKABLE_MAX_FRAME_DECOMPRESSED_SIZE;
154
155
12.7k
    zcs->framelog.checksumFlag = checksumFlag;
156
12.7k
    if (zcs->framelog.checksumFlag) {
157
3.58k
        XXH64_reset(&zcs->xxhState, 0);
158
3.58k
    }
159
160
12.7k
    zcs->framelog.seekTablePos = 0;
161
12.7k
    zcs->framelog.seekTableIndex = 0;
162
12.7k
    zcs->writingSeekTable = 0;
163
164
12.7k
    return ZSTD_initCStream(zcs->cstream, compressionLevel);
165
12.7k
}
166
167
size_t ZSTD_seekable_logFrame(ZSTD_frameLog* fl,
168
                              unsigned compressedSize,
169
                              unsigned decompressedSize,
170
                              unsigned checksum)
171
25.4k
{
172
25.4k
    if (fl->size == ZSTD_SEEKABLE_MAXFRAMES)
173
0
        return ERROR(frameIndex_tooLarge);
174
175
    /* grow the buffer if required */
176
25.4k
    if (fl->size == fl->capacity) {
177
        /* exponential size increase for constant amortized runtime */
178
0
        size_t const newCapacity = fl->capacity * 2;
179
0
        framelogEntry_t* const newEntries = (framelogEntry_t*)realloc(fl->entries,
180
0
                sizeof(framelogEntry_t) * newCapacity);
181
182
0
        if (newEntries == NULL) return ERROR(memory_allocation);
183
184
0
        fl->entries = newEntries;
185
0
        assert(newCapacity <= UINT_MAX);
186
0
        fl->capacity = (U32)newCapacity;
187
0
    }
188
189
25.4k
    fl->entries[fl->size] = (framelogEntry_t){
190
25.4k
            compressedSize, decompressedSize, checksum
191
25.4k
    };
192
25.4k
    fl->size++;
193
194
25.4k
    return 0;
195
25.4k
}
196
197
size_t ZSTD_seekable_endFrame(ZSTD_seekable_CStream* zcs, ZSTD_outBuffer* output)
198
25.4k
{
199
25.4k
    size_t const prevOutPos = output->pos;
200
    /* end the frame */
201
25.4k
    size_t ret = ZSTD_endStream(zcs->cstream, output);
202
203
25.4k
    zcs->frameCSize += (U32)(output->pos - prevOutPos);
204
205
    /* need to flush before doing the rest */
206
25.4k
    if (ret) return ret;
207
208
    /* frame done */
209
210
    /* store the frame data for later */
211
25.4k
    ret = ZSTD_seekable_logFrame(
212
25.4k
            &zcs->framelog, zcs->frameCSize, zcs->frameDSize,
213
25.4k
            zcs->framelog.checksumFlag
214
25.4k
                    ? XXH64_digest(&zcs->xxhState) & 0xFFFFFFFFU
215
25.4k
                    : 0);
216
25.4k
    if (ret) return ret;
217
218
    /* reset for the next frame */
219
25.4k
    zcs->frameCSize = 0;
220
25.4k
    zcs->frameDSize = 0;
221
222
25.4k
    ZSTD_CCtx_reset(zcs->cstream, ZSTD_reset_session_only);
223
25.4k
    if (zcs->framelog.checksumFlag) XXH64_reset(&zcs->xxhState, 0);
224
225
25.4k
    return 0;
226
25.4k
}
227
228
size_t ZSTD_seekable_compressStream(ZSTD_seekable_CStream* zcs, ZSTD_outBuffer* output, ZSTD_inBuffer* input)
229
12.7k
{
230
12.7k
    const BYTE* const inBase = (const BYTE*) input->src + input->pos;
231
12.7k
    size_t inLen = input->size - input->pos;
232
233
12.7k
    assert(zcs->maxFrameSize < INT_MAX);
234
12.7k
    ZSTD_CCtx_setParameter(zcs->cstream, ZSTD_c_srcSizeHint, (int)zcs->maxFrameSize);
235
12.7k
    inLen = MIN(inLen, (size_t)(zcs->maxFrameSize - zcs->frameDSize));
236
237
    /* if we haven't finished flushing the last frame, don't start writing a new one */
238
12.7k
    if (inLen > 0) {
239
12.6k
        ZSTD_inBuffer inTmp = { inBase, inLen, 0 };
240
12.6k
        size_t const prevOutPos = output->pos;
241
242
12.6k
        size_t const ret = ZSTD_compressStream(zcs->cstream, output, &inTmp);
243
244
12.6k
        if (zcs->framelog.checksumFlag) {
245
3.58k
            XXH64_update(&zcs->xxhState, inBase, inTmp.pos);
246
3.58k
        }
247
248
12.6k
        zcs->frameCSize += (U32)(output->pos - prevOutPos);
249
12.6k
        zcs->frameDSize += (U32)inTmp.pos;
250
251
12.6k
        input->pos += inTmp.pos;
252
253
12.6k
        if (ZSTD_isError(ret)) return ret;
254
12.6k
    }
255
256
12.7k
    if (zcs->maxFrameSize == zcs->frameDSize) {
257
        /* log the frame and start over */
258
12.6k
        size_t const ret = ZSTD_seekable_endFrame(zcs, output);
259
12.6k
        if (ZSTD_isError(ret)) return ret;
260
261
        /* get the client ready for the next frame */
262
12.6k
        return (size_t)zcs->maxFrameSize;
263
12.6k
    }
264
265
47
    return (size_t)(zcs->maxFrameSize - zcs->frameDSize);
266
12.7k
}
267
268
static inline size_t ZSTD_seekable_seekTableSize(const ZSTD_frameLog* fl)
269
12.7k
{
270
12.7k
    size_t const sizePerFrame = 8 + (fl->checksumFlag?4:0);
271
12.7k
    size_t const seekTableLen = ZSTD_SKIPPABLEHEADERSIZE +
272
12.7k
                                sizePerFrame * fl->size +
273
12.7k
                                ZSTD_seekTableFooterSize;
274
275
12.7k
    return seekTableLen;
276
12.7k
}
277
278
static inline size_t ZSTD_stwrite32(ZSTD_frameLog* fl,
279
                                    ZSTD_outBuffer* output, U32 const value,
280
                                    U32 const offset)
281
108k
{
282
108k
    if (fl->seekTablePos < offset + 4) {
283
108k
        BYTE tmp[4]; /* so that we can work with buffers too small to write a whole word to */
284
108k
        size_t const lenWrite =
285
108k
                MIN(output->size - output->pos, offset + 4 - fl->seekTablePos);
286
108k
        MEM_writeLE32(tmp, value);
287
108k
        memcpy((BYTE*)output->dst + output->pos,
288
108k
               tmp + (fl->seekTablePos - offset), lenWrite);
289
108k
        output->pos += lenWrite;
290
108k
        fl->seekTablePos += (U32)lenWrite;
291
292
108k
        if (lenWrite < 4) return ZSTD_seekable_seekTableSize(fl) - fl->seekTablePos;
293
108k
    }
294
108k
    return 0;
295
108k
}
296
297
size_t ZSTD_seekable_writeSeekTable(ZSTD_frameLog* fl, ZSTD_outBuffer* output)
298
12.7k
{
299
    /* seekTableIndex: the current index in the table and
300
     * seekTableSize: the amount of the table written so far
301
     *
302
     * This function is written this way so that if it has to return early
303
     * because of a small buffer, it can keep going where it left off.
304
     */
305
306
12.7k
    size_t const sizePerFrame = 8 + (fl->checksumFlag?4:0);
307
12.7k
    size_t const seekTableLen = ZSTD_seekable_seekTableSize(fl);
308
309
12.7k
    CHECK_Z(ZSTD_stwrite32(fl, output, ZSTD_MAGIC_SKIPPABLE_START | 0xE, 0));
310
12.7k
    assert(seekTableLen <= (size_t)UINT_MAX);
311
12.7k
    CHECK_Z(ZSTD_stwrite32(fl, output, (U32)seekTableLen - ZSTD_SKIPPABLEHEADERSIZE, 4));
312
313
38.1k
    while (fl->seekTableIndex < fl->size) {
314
25.4k
        unsigned long long const start = ZSTD_SKIPPABLEHEADERSIZE + sizePerFrame * fl->seekTableIndex;
315
25.4k
        assert(start + 8 <= UINT_MAX);
316
25.4k
        CHECK_Z(ZSTD_stwrite32(fl, output,
317
25.4k
                               fl->entries[fl->seekTableIndex].cSize,
318
25.4k
                               (U32)start + 0));
319
320
25.4k
        CHECK_Z(ZSTD_stwrite32(fl, output,
321
25.4k
                               fl->entries[fl->seekTableIndex].dSize,
322
25.4k
                               (U32)start + 4));
323
324
25.4k
        if (fl->checksumFlag) {
325
7.16k
            CHECK_Z(ZSTD_stwrite32(
326
7.16k
                    fl, output, fl->entries[fl->seekTableIndex].checksum,
327
7.16k
                    (U32)start + 8));
328
7.16k
        }
329
330
25.4k
        fl->seekTableIndex++;
331
25.4k
    }
332
333
12.7k
    assert(seekTableLen <= UINT_MAX);
334
12.7k
    CHECK_Z(ZSTD_stwrite32(fl, output, fl->size,
335
12.7k
                           (U32)seekTableLen - ZSTD_seekTableFooterSize));
336
337
12.7k
    if (output->size - output->pos < 1) return seekTableLen - fl->seekTablePos;
338
12.7k
    if (fl->seekTablePos < seekTableLen - 4) {
339
12.7k
        BYTE const sfd = (BYTE)((fl->checksumFlag) << 7);
340
341
12.7k
        ((BYTE*)output->dst)[output->pos] = sfd;
342
12.7k
        output->pos++;
343
12.7k
        fl->seekTablePos++;
344
12.7k
    }
345
346
12.7k
    CHECK_Z(ZSTD_stwrite32(fl, output, ZSTD_SEEKABLE_MAGICNUMBER,
347
12.7k
                           (U32)seekTableLen - 4));
348
349
12.7k
    if (fl->seekTablePos != seekTableLen) return ERROR(GENERIC);
350
12.7k
    return 0;
351
12.7k
}
352
353
size_t ZSTD_seekable_endStream(ZSTD_seekable_CStream* zcs, ZSTD_outBuffer* output)
354
12.7k
{
355
12.7k
    if (!zcs->writingSeekTable) {
356
12.7k
        const size_t endFrame = ZSTD_seekable_endFrame(zcs, output);
357
12.7k
        if (ZSTD_isError(endFrame)) return endFrame;
358
        /* return an accurate size hint */
359
12.7k
        if (endFrame) return endFrame + ZSTD_seekable_seekTableSize(&zcs->framelog);
360
12.7k
    }
361
362
12.7k
    zcs->writingSeekTable = 1;
363
364
12.7k
    return ZSTD_seekable_writeSeekTable(&zcs->framelog, output);
365
12.7k
}