Coverage Report

Created: 2026-02-24 06:33

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/zstd/lib/compress/zstd_preSplit.c
Line
Count
Source
1
/*
2
 * Copyright (c) Meta Platforms, Inc. and affiliates.
3
 * All rights reserved.
4
 *
5
 * This source code is licensed under both the BSD-style license (found in the
6
 * LICENSE file in the root directory of this source tree) and the GPLv2 (found
7
 * in the COPYING file in the root directory of this source tree).
8
 * You may select, at your option, one of the above-listed licenses.
9
 */
10
11
#include "../common/compiler.h" /* ZSTD_ALIGNOF */
12
#include "../common/mem.h" /* S64 */
13
#include "../common/zstd_deps.h" /* ZSTD_memset */
14
#include "../common/zstd_internal.h" /* ZSTD_STATIC_ASSERT */
15
#include "hist.h" /* HIST_add */
16
#include "zstd_preSplit.h"
17
18
19
#define BLOCKSIZE_MIN 3500
20
33.2k
#define THRESHOLD_PENALTY_RATE 16
21
16.6k
#define THRESHOLD_BASE (THRESHOLD_PENALTY_RATE - 2)
22
2.25k
#define THRESHOLD_PENALTY 3
23
24
15.0k
#define HASHLENGTH 2
25
11.1M
#define HASHLOG_MAX 10
26
11.1M
#define HASHTABLESIZE (1 << HASHLOG_MAX)
27
#define HASHMASK (HASHTABLESIZE - 1)
28
61.0M
#define KNUTH 0x9e3779b9
29
30
/* for hashLog > 8, hash 2 bytes.
31
 * for hashLog == 8, just take the byte, no hashing.
32
 * The speed of this method relies on compile-time constant propagation */
33
FORCE_INLINE_TEMPLATE unsigned hash2(const void *p, unsigned hashLog)
34
61.5M
{
35
61.5M
    assert(hashLog >= 8);
36
61.5M
    if (hashLog == 8) return (U32)((const BYTE*)p)[0];
37
61.5M
    assert(hashLog <= HASHLOG_MAX);
38
61.0M
    return (U32)(MEM_read16(p)) * KNUTH >> (32 - hashLog);
39
61.0M
}
40
41
42
typedef struct {
43
  unsigned events[HASHTABLESIZE];
44
  size_t nbEvents;
45
} Fingerprint;
46
typedef struct {
47
    Fingerprint pastEvents;
48
    Fingerprint newEvents;
49
} FPStats;
50
51
static void initStats(FPStats* fpstats)
52
6.07k
{
53
6.07k
    ZSTD_memset(fpstats, 0, sizeof(FPStats));
54
6.07k
}
55
56
FORCE_INLINE_TEMPLATE void
57
addEvents_generic(Fingerprint* fp, const void* src, size_t srcSize, size_t samplingRate, unsigned hashLog)
58
15.0k
{
59
15.0k
    const char* p = (const char*)src;
60
15.0k
    size_t limit = srcSize - HASHLENGTH + 1;
61
15.0k
    size_t n;
62
15.0k
    assert(srcSize >= HASHLENGTH);
63
61.5M
    for (n = 0; n < limit; n+=samplingRate) {
64
61.5M
        fp->events[hash2(p+n, hashLog)]++;
65
61.5M
    }
66
15.0k
    fp->nbEvents += limit/samplingRate;
67
15.0k
}
68
69
FORCE_INLINE_TEMPLATE void
70
recordFingerprint_generic(Fingerprint* fp, const void* src, size_t srcSize, size_t samplingRate, unsigned hashLog)
71
15.0k
{
72
15.0k
    ZSTD_memset(fp, 0, sizeof(unsigned) * ((size_t)1 << hashLog));
73
15.0k
    fp->nbEvents = 0;
74
15.0k
    addEvents_generic(fp, src, srcSize, samplingRate, hashLog);
75
15.0k
}
76
77
typedef void (*RecordEvents_f)(Fingerprint* fp, const void* src, size_t srcSize);
78
79
9.00k
#define FP_RECORD(_rate) ZSTD_recordFingerprint_##_rate
80
81
#define ZSTD_GEN_RECORD_FINGERPRINT(_rate, _hSize)                                 \
82
    static void FP_RECORD(_rate)(Fingerprint* fp, const void* src, size_t srcSize) \
83
15.0k
    {                                                                              \
84
15.0k
        recordFingerprint_generic(fp, src, srcSize, _rate, _hSize);                \
85
15.0k
    }
zstd_preSplit.c:ZSTD_recordFingerprint_43
Line
Count
Source
83
2.16k
    {                                                                              \
84
2.16k
        recordFingerprint_generic(fp, src, srcSize, _rate, _hSize);                \
85
2.16k
    }
zstd_preSplit.c:ZSTD_recordFingerprint_11
Line
Count
Source
83
2.47k
    {                                                                              \
84
2.47k
        recordFingerprint_generic(fp, src, srcSize, _rate, _hSize);                \
85
2.47k
    }
zstd_preSplit.c:ZSTD_recordFingerprint_5
Line
Count
Source
83
3.96k
    {                                                                              \
84
3.96k
        recordFingerprint_generic(fp, src, srcSize, _rate, _hSize);                \
85
3.96k
    }
zstd_preSplit.c:ZSTD_recordFingerprint_1
Line
Count
Source
83
6.43k
    {                                                                              \
84
6.43k
        recordFingerprint_generic(fp, src, srcSize, _rate, _hSize);                \
85
6.43k
    }
86
87
ZSTD_GEN_RECORD_FINGERPRINT(1, 10)
88
ZSTD_GEN_RECORD_FINGERPRINT(5, 10)
89
ZSTD_GEN_RECORD_FINGERPRINT(11, 9)
90
ZSTD_GEN_RECORD_FINGERPRINT(43, 8)
91
92
93
12.8M
static U64 abs64(S64 s64) { return (U64)((s64 < 0) ? -s64 : s64); }
94
95
static U64 fpDistance(const Fingerprint* fp1, const Fingerprint* fp2, unsigned hashLog)
96
21.7k
{
97
21.7k
    U64 distance = 0;
98
21.7k
    size_t n;
99
21.7k
    assert(hashLog <= HASHLOG_MAX);
100
12.9M
    for (n = 0; n < ((size_t)1 << hashLog); n++) {
101
12.8M
        distance +=
102
12.8M
            abs64((S64)fp1->events[n] * (S64)fp2->nbEvents - (S64)fp2->events[n] * (S64)fp1->nbEvents);
103
12.8M
    }
104
21.7k
    return distance;
105
21.7k
}
106
107
/* Compare newEvents with pastEvents
108
 * return 1 when considered "too different"
109
 */
110
static int compareFingerprints(const Fingerprint* ref,
111
                            const Fingerprint* newfp,
112
                            int penalty,
113
                            unsigned hashLog)
114
16.6k
{
115
16.6k
    assert(ref->nbEvents > 0);
116
16.6k
    assert(newfp->nbEvents > 0);
117
16.6k
    {   U64 p50 = (U64)ref->nbEvents * (U64)newfp->nbEvents;
118
16.6k
        U64 deviation = fpDistance(ref, newfp, hashLog);
119
16.6k
        U64 threshold = p50 * (U64)(THRESHOLD_BASE + penalty) / THRESHOLD_PENALTY_RATE;
120
16.6k
        return deviation >= threshold;
121
16.6k
    }
122
16.6k
}
123
124
static void mergeEvents(Fingerprint* acc, const Fingerprint* newfp)
125
10.8k
{
126
10.8k
    size_t n;
127
11.1M
    for (n = 0; n < HASHTABLESIZE; n++) {
128
11.1M
        acc->events[n] += newfp->events[n];
129
11.1M
    }
130
10.8k
    acc->nbEvents += newfp->nbEvents;
131
10.8k
}
132
133
static void flushEvents(FPStats* fpstats)
134
0
{
135
0
    size_t n;
136
0
    for (n = 0; n < HASHTABLESIZE; n++) {
137
0
        fpstats->pastEvents.events[n] = fpstats->newEvents.events[n];
138
0
    }
139
0
    fpstats->pastEvents.nbEvents = fpstats->newEvents.nbEvents;
140
0
    ZSTD_memset(&fpstats->newEvents, 0, sizeof(fpstats->newEvents));
141
0
}
142
143
static void removeEvents(Fingerprint* acc, const Fingerprint* slice)
144
0
{
145
0
    size_t n;
146
0
    for (n = 0; n < HASHTABLESIZE; n++) {
147
0
        assert(acc->events[n] >= slice->events[n]);
148
0
        acc->events[n] -= slice->events[n];
149
0
    }
150
0
    acc->nbEvents -= slice->nbEvents;
151
0
}
152
153
41.3k
#define CHUNKSIZE (8 << 10)
154
static size_t ZSTD_splitBlock_byChunks(const void* blockStart, size_t blockSize,
155
                        int level,
156
                        void* workspace, size_t wkspSize)
157
2.25k
{
158
2.25k
    static const RecordEvents_f records_fs[] = {
159
2.25k
        FP_RECORD(43), FP_RECORD(11), FP_RECORD(5), FP_RECORD(1)
160
2.25k
    };
161
2.25k
    static const unsigned hashParams[] = { 8, 9, 10, 10 };
162
2.25k
    const RecordEvents_f record_f = (assert(0<=level && level<=3), records_fs[level]);
163
2.25k
    FPStats* const fpstats = (FPStats*)workspace;
164
2.25k
    const char* p = (const char*)blockStart;
165
2.25k
    int penalty = THRESHOLD_PENALTY;
166
2.25k
    size_t pos = 0;
167
2.25k
    assert(blockSize == (128 << 10));
168
2.25k
    assert(workspace != NULL);
169
2.25k
    assert((size_t)workspace % ZSTD_ALIGNOF(FPStats) == 0);
170
2.25k
    ZSTD_STATIC_ASSERT(ZSTD_SLIPBLOCK_WORKSPACESIZE >= sizeof(FPStats));
171
2.25k
    assert(wkspSize >= sizeof(FPStats)); (void)wkspSize;
172
173
2.25k
    initStats(fpstats);
174
2.25k
    record_f(&fpstats->pastEvents, p, CHUNKSIZE);
175
13.1k
    for (pos = CHUNKSIZE; pos <= blockSize - CHUNKSIZE; pos += CHUNKSIZE) {
176
12.8k
        record_f(&fpstats->newEvents, p + pos, CHUNKSIZE);
177
12.8k
        if (compareFingerprints(&fpstats->pastEvents, &fpstats->newEvents, penalty, hashParams[level])) {
178
1.91k
            return pos;
179
10.8k
        } else {
180
10.8k
            mergeEvents(&fpstats->pastEvents, &fpstats->newEvents);
181
10.8k
            if (penalty > 0) penalty--;
182
10.8k
        }
183
12.8k
    }
184
2.25k
    assert(pos == blockSize);
185
333
    return blockSize;
186
0
    (void)flushEvents; (void)removeEvents;
187
0
}
188
189
/* ZSTD_splitBlock_fromBorders(): very fast strategy :
190
 * compare fingerprint from beginning and end of the block,
191
 * derive from their difference if it's preferable to split in the middle,
192
 * repeat the process a second time, for finer grained decision.
193
 * 3 times did not brought improvements, so I stopped at 2.
194
 * Benefits are good enough for a cheap heuristic.
195
 * More accurate splitting saves more, but speed impact is also more perceptible.
196
 * For better accuracy, use more elaborate variant *_byChunks.
197
 */
198
static size_t ZSTD_splitBlock_fromBorders(const void* blockStart, size_t blockSize,
199
                        void* workspace, size_t wkspSize)
200
3.82k
{
201
28.2k
#define SEGMENT_SIZE 512
202
3.82k
    FPStats* const fpstats = (FPStats*)workspace;
203
3.82k
    Fingerprint* middleEvents = (Fingerprint*)(void*)((char*)workspace + 512 * sizeof(unsigned));
204
3.82k
    assert(blockSize == (128 << 10));
205
3.82k
    assert(workspace != NULL);
206
3.82k
    assert((size_t)workspace % ZSTD_ALIGNOF(FPStats) == 0);
207
3.82k
    ZSTD_STATIC_ASSERT(ZSTD_SLIPBLOCK_WORKSPACESIZE >= sizeof(FPStats));
208
3.82k
    assert(wkspSize >= sizeof(FPStats)); (void)wkspSize;
209
210
3.82k
    initStats(fpstats);
211
3.82k
    HIST_add(fpstats->pastEvents.events, blockStart, SEGMENT_SIZE);
212
3.82k
    HIST_add(fpstats->newEvents.events, (const char*)blockStart + blockSize - SEGMENT_SIZE, SEGMENT_SIZE);
213
3.82k
    fpstats->pastEvents.nbEvents = fpstats->newEvents.nbEvents = SEGMENT_SIZE;
214
3.82k
    if (!compareFingerprints(&fpstats->pastEvents, &fpstats->newEvents, 0, 8))
215
1.23k
        return blockSize;
216
217
2.58k
    HIST_add(middleEvents->events, (const char*)blockStart + blockSize/2 - SEGMENT_SIZE/2, SEGMENT_SIZE);
218
2.58k
    middleEvents->nbEvents = SEGMENT_SIZE;
219
2.58k
    {   U64 const distFromBegin = fpDistance(&fpstats->pastEvents, middleEvents, 8);
220
2.58k
        U64 const distFromEnd = fpDistance(&fpstats->newEvents, middleEvents, 8);
221
2.58k
        U64 const minDistance = SEGMENT_SIZE * SEGMENT_SIZE / 3;
222
2.58k
        if (abs64((S64)distFromBegin - (S64)distFromEnd) < minDistance)
223
847
            return 64 KB;
224
1.74k
        return (distFromBegin > distFromEnd) ? 32 KB : 96 KB;
225
2.58k
    }
226
2.58k
}
227
228
size_t ZSTD_splitBlock(const void* blockStart, size_t blockSize,
229
                    int level,
230
                    void* workspace, size_t wkspSize)
231
6.07k
{
232
6.07k
    DEBUGLOG(6, "ZSTD_splitBlock (level=%i)", level);
233
6.07k
    assert(0<=level && level<=4);
234
6.07k
    if (level == 0)
235
3.82k
        return ZSTD_splitBlock_fromBorders(blockStart, blockSize, workspace, wkspSize);
236
    /* level >= 1*/
237
2.25k
    return ZSTD_splitBlock_byChunks(blockStart, blockSize, level-1, workspace, wkspSize);
238
6.07k
}