/src/zstd/lib/compress/zstd_preSplit.c
Line | Count | Source (jump to first uncovered line) |
1 | | /* |
2 | | * Copyright (c) Meta Platforms, Inc. and affiliates. |
3 | | * All rights reserved. |
4 | | * |
5 | | * This source code is licensed under both the BSD-style license (found in the |
6 | | * LICENSE file in the root directory of this source tree) and the GPLv2 (found |
7 | | * in the COPYING file in the root directory of this source tree). |
8 | | * You may select, at your option, one of the above-listed licenses. |
9 | | */ |
10 | | |
11 | | #include "../common/compiler.h" /* ZSTD_ALIGNOF */ |
12 | | #include "../common/mem.h" /* S64 */ |
13 | | #include "../common/zstd_deps.h" /* ZSTD_memset */ |
14 | | #include "../common/zstd_internal.h" /* ZSTD_STATIC_ASSERT */ |
15 | | #include "hist.h" /* HIST_add */ |
16 | | #include "zstd_preSplit.h" |
17 | | |
18 | | |
19 | | #define BLOCKSIZE_MIN 3500 |
20 | 0 | #define THRESHOLD_PENALTY_RATE 16 |
21 | 0 | #define THRESHOLD_BASE (THRESHOLD_PENALTY_RATE - 2) |
22 | 0 | #define THRESHOLD_PENALTY 3 |
23 | | |
24 | 0 | #define HASHLENGTH 2 |
25 | 0 | #define HASHLOG_MAX 10 |
26 | 0 | #define HASHTABLESIZE (1 << HASHLOG_MAX) |
27 | | #define HASHMASK (HASHTABLESIZE - 1) |
28 | 0 | #define KNUTH 0x9e3779b9 |
29 | | |
30 | | /* for hashLog > 8, hash 2 bytes. |
31 | | * for hashLog == 8, just take the byte, no hashing. |
32 | | * The speed of this method relies on compile-time constant propagation */ |
33 | | FORCE_INLINE_TEMPLATE unsigned hash2(const void *p, unsigned hashLog) |
34 | 0 | { |
35 | 0 | assert(hashLog >= 8); |
36 | 0 | if (hashLog == 8) return (U32)((const BYTE*)p)[0]; |
37 | 0 | assert(hashLog <= HASHLOG_MAX); |
38 | 0 | return (U32)(MEM_read16(p)) * KNUTH >> (32 - hashLog); |
39 | 0 | } |
40 | | |
41 | | |
42 | | typedef struct { |
43 | | unsigned events[HASHTABLESIZE]; |
44 | | size_t nbEvents; |
45 | | } Fingerprint; |
46 | | typedef struct { |
47 | | Fingerprint pastEvents; |
48 | | Fingerprint newEvents; |
49 | | } FPStats; |
50 | | |
51 | | static void initStats(FPStats* fpstats) |
52 | 0 | { |
53 | 0 | ZSTD_memset(fpstats, 0, sizeof(FPStats)); |
54 | 0 | } |
55 | | |
56 | | FORCE_INLINE_TEMPLATE void |
57 | | addEvents_generic(Fingerprint* fp, const void* src, size_t srcSize, size_t samplingRate, unsigned hashLog) |
58 | 0 | { |
59 | 0 | const char* p = (const char*)src; |
60 | 0 | size_t limit = srcSize - HASHLENGTH + 1; |
61 | 0 | size_t n; |
62 | 0 | assert(srcSize >= HASHLENGTH); |
63 | 0 | for (n = 0; n < limit; n+=samplingRate) { |
64 | 0 | fp->events[hash2(p+n, hashLog)]++; |
65 | 0 | } |
66 | 0 | fp->nbEvents += limit/samplingRate; |
67 | 0 | } |
68 | | |
69 | | FORCE_INLINE_TEMPLATE void |
70 | | recordFingerprint_generic(Fingerprint* fp, const void* src, size_t srcSize, size_t samplingRate, unsigned hashLog) |
71 | 0 | { |
72 | 0 | ZSTD_memset(fp, 0, sizeof(unsigned) * ((size_t)1 << hashLog)); |
73 | 0 | fp->nbEvents = 0; |
74 | 0 | addEvents_generic(fp, src, srcSize, samplingRate, hashLog); |
75 | 0 | } |
76 | | |
77 | | typedef void (*RecordEvents_f)(Fingerprint* fp, const void* src, size_t srcSize); |
78 | | |
79 | 0 | #define FP_RECORD(_rate) ZSTD_recordFingerprint_##_rate |
80 | | |
81 | | #define ZSTD_GEN_RECORD_FINGERPRINT(_rate, _hSize) \ |
82 | | static void FP_RECORD(_rate)(Fingerprint* fp, const void* src, size_t srcSize) \ |
83 | 0 | { \ |
84 | 0 | recordFingerprint_generic(fp, src, srcSize, _rate, _hSize); \ |
85 | 0 | } Unexecuted instantiation: zstd_preSplit.c:ZSTD_recordFingerprint_43 Unexecuted instantiation: zstd_preSplit.c:ZSTD_recordFingerprint_11 Unexecuted instantiation: zstd_preSplit.c:ZSTD_recordFingerprint_5 Unexecuted instantiation: zstd_preSplit.c:ZSTD_recordFingerprint_1 |
86 | | |
87 | | ZSTD_GEN_RECORD_FINGERPRINT(1, 10) |
88 | | ZSTD_GEN_RECORD_FINGERPRINT(5, 10) |
89 | | ZSTD_GEN_RECORD_FINGERPRINT(11, 9) |
90 | | ZSTD_GEN_RECORD_FINGERPRINT(43, 8) |
91 | | |
92 | | |
93 | 0 | static U64 abs64(S64 s64) { return (U64)((s64 < 0) ? -s64 : s64); } |
94 | | |
95 | | static U64 fpDistance(const Fingerprint* fp1, const Fingerprint* fp2, unsigned hashLog) |
96 | 0 | { |
97 | 0 | U64 distance = 0; |
98 | 0 | size_t n; |
99 | 0 | assert(hashLog <= HASHLOG_MAX); |
100 | 0 | for (n = 0; n < ((size_t)1 << hashLog); n++) { |
101 | 0 | distance += |
102 | 0 | abs64((S64)fp1->events[n] * (S64)fp2->nbEvents - (S64)fp2->events[n] * (S64)fp1->nbEvents); |
103 | 0 | } |
104 | 0 | return distance; |
105 | 0 | } |
106 | | |
107 | | /* Compare newEvents with pastEvents |
108 | | * return 1 when considered "too different" |
109 | | */ |
110 | | static int compareFingerprints(const Fingerprint* ref, |
111 | | const Fingerprint* newfp, |
112 | | int penalty, |
113 | | unsigned hashLog) |
114 | 0 | { |
115 | 0 | assert(ref->nbEvents > 0); |
116 | 0 | assert(newfp->nbEvents > 0); |
117 | 0 | { U64 p50 = (U64)ref->nbEvents * (U64)newfp->nbEvents; |
118 | 0 | U64 deviation = fpDistance(ref, newfp, hashLog); |
119 | 0 | U64 threshold = p50 * (U64)(THRESHOLD_BASE + penalty) / THRESHOLD_PENALTY_RATE; |
120 | 0 | return deviation >= threshold; |
121 | 0 | } |
122 | 0 | } |
123 | | |
124 | | static void mergeEvents(Fingerprint* acc, const Fingerprint* newfp) |
125 | 0 | { |
126 | 0 | size_t n; |
127 | 0 | for (n = 0; n < HASHTABLESIZE; n++) { |
128 | 0 | acc->events[n] += newfp->events[n]; |
129 | 0 | } |
130 | 0 | acc->nbEvents += newfp->nbEvents; |
131 | 0 | } |
132 | | |
133 | | static void flushEvents(FPStats* fpstats) |
134 | 0 | { |
135 | 0 | size_t n; |
136 | 0 | for (n = 0; n < HASHTABLESIZE; n++) { |
137 | 0 | fpstats->pastEvents.events[n] = fpstats->newEvents.events[n]; |
138 | 0 | } |
139 | 0 | fpstats->pastEvents.nbEvents = fpstats->newEvents.nbEvents; |
140 | 0 | ZSTD_memset(&fpstats->newEvents, 0, sizeof(fpstats->newEvents)); |
141 | 0 | } |
142 | | |
143 | | static void removeEvents(Fingerprint* acc, const Fingerprint* slice) |
144 | 0 | { |
145 | 0 | size_t n; |
146 | 0 | for (n = 0; n < HASHTABLESIZE; n++) { |
147 | 0 | assert(acc->events[n] >= slice->events[n]); |
148 | 0 | acc->events[n] -= slice->events[n]; |
149 | 0 | } |
150 | 0 | acc->nbEvents -= slice->nbEvents; |
151 | 0 | } |
152 | | |
153 | 0 | #define CHUNKSIZE (8 << 10) |
154 | | static size_t ZSTD_splitBlock_byChunks(const void* blockStart, size_t blockSize, |
155 | | int level, |
156 | | void* workspace, size_t wkspSize) |
157 | 0 | { |
158 | 0 | static const RecordEvents_f records_fs[] = { |
159 | 0 | FP_RECORD(43), FP_RECORD(11), FP_RECORD(5), FP_RECORD(1) |
160 | 0 | }; |
161 | 0 | static const unsigned hashParams[] = { 8, 9, 10, 10 }; |
162 | 0 | const RecordEvents_f record_f = (assert(0<=level && level<=3), records_fs[level]); |
163 | 0 | FPStats* const fpstats = (FPStats*)workspace; |
164 | 0 | const char* p = (const char*)blockStart; |
165 | 0 | int penalty = THRESHOLD_PENALTY; |
166 | 0 | size_t pos = 0; |
167 | 0 | assert(blockSize == (128 << 10)); |
168 | 0 | assert(workspace != NULL); |
169 | 0 | assert((size_t)workspace % ZSTD_ALIGNOF(FPStats) == 0); |
170 | 0 | ZSTD_STATIC_ASSERT(ZSTD_SLIPBLOCK_WORKSPACESIZE >= sizeof(FPStats)); |
171 | 0 | assert(wkspSize >= sizeof(FPStats)); (void)wkspSize; |
172 | |
|
173 | 0 | initStats(fpstats); |
174 | 0 | record_f(&fpstats->pastEvents, p, CHUNKSIZE); |
175 | 0 | for (pos = CHUNKSIZE; pos <= blockSize - CHUNKSIZE; pos += CHUNKSIZE) { |
176 | 0 | record_f(&fpstats->newEvents, p + pos, CHUNKSIZE); |
177 | 0 | if (compareFingerprints(&fpstats->pastEvents, &fpstats->newEvents, penalty, hashParams[level])) { |
178 | 0 | return pos; |
179 | 0 | } else { |
180 | 0 | mergeEvents(&fpstats->pastEvents, &fpstats->newEvents); |
181 | 0 | if (penalty > 0) penalty--; |
182 | 0 | } |
183 | 0 | } |
184 | 0 | assert(pos == blockSize); |
185 | 0 | return blockSize; |
186 | 0 | (void)flushEvents; (void)removeEvents; |
187 | 0 | } |
188 | | |
189 | | /* ZSTD_splitBlock_fromBorders(): very fast strategy : |
190 | | * compare fingerprint from beginning and end of the block, |
191 | | * derive from their difference if it's preferable to split in the middle, |
192 | | * repeat the process a second time, for finer grained decision. |
193 | | * 3 times did not brought improvements, so I stopped at 2. |
194 | | * Benefits are good enough for a cheap heuristic. |
195 | | * More accurate splitting saves more, but speed impact is also more perceptible. |
196 | | * For better accuracy, use more elaborate variant *_byChunks. |
197 | | */ |
198 | | static size_t ZSTD_splitBlock_fromBorders(const void* blockStart, size_t blockSize, |
199 | | void* workspace, size_t wkspSize) |
200 | 0 | { |
201 | 0 | #define SEGMENT_SIZE 512 |
202 | 0 | FPStats* const fpstats = (FPStats*)workspace; |
203 | 0 | Fingerprint* middleEvents = (Fingerprint*)(void*)((char*)workspace + 512 * sizeof(unsigned)); |
204 | 0 | assert(blockSize == (128 << 10)); |
205 | 0 | assert(workspace != NULL); |
206 | 0 | assert((size_t)workspace % ZSTD_ALIGNOF(FPStats) == 0); |
207 | 0 | ZSTD_STATIC_ASSERT(ZSTD_SLIPBLOCK_WORKSPACESIZE >= sizeof(FPStats)); |
208 | 0 | assert(wkspSize >= sizeof(FPStats)); (void)wkspSize; |
209 | |
|
210 | 0 | initStats(fpstats); |
211 | 0 | HIST_add(fpstats->pastEvents.events, blockStart, SEGMENT_SIZE); |
212 | 0 | HIST_add(fpstats->newEvents.events, (const char*)blockStart + blockSize - SEGMENT_SIZE, SEGMENT_SIZE); |
213 | 0 | fpstats->pastEvents.nbEvents = fpstats->newEvents.nbEvents = SEGMENT_SIZE; |
214 | 0 | if (!compareFingerprints(&fpstats->pastEvents, &fpstats->newEvents, 0, 8)) |
215 | 0 | return blockSize; |
216 | | |
217 | 0 | HIST_add(middleEvents->events, (const char*)blockStart + blockSize/2 - SEGMENT_SIZE/2, SEGMENT_SIZE); |
218 | 0 | middleEvents->nbEvents = SEGMENT_SIZE; |
219 | 0 | { U64 const distFromBegin = fpDistance(&fpstats->pastEvents, middleEvents, 8); |
220 | 0 | U64 const distFromEnd = fpDistance(&fpstats->newEvents, middleEvents, 8); |
221 | 0 | U64 const minDistance = SEGMENT_SIZE * SEGMENT_SIZE / 3; |
222 | 0 | if (abs64((S64)distFromBegin - (S64)distFromEnd) < minDistance) |
223 | 0 | return 64 KB; |
224 | 0 | return (distFromBegin > distFromEnd) ? 32 KB : 96 KB; |
225 | 0 | } |
226 | 0 | } |
227 | | |
228 | | size_t ZSTD_splitBlock(const void* blockStart, size_t blockSize, |
229 | | int level, |
230 | | void* workspace, size_t wkspSize) |
231 | 0 | { |
232 | 0 | DEBUGLOG(6, "ZSTD_splitBlock (level=%i)", level); |
233 | 0 | assert(0<=level && level<=4); |
234 | 0 | if (level == 0) |
235 | 0 | return ZSTD_splitBlock_fromBorders(blockStart, blockSize, workspace, wkspSize); |
236 | | /* level >= 1*/ |
237 | 0 | return ZSTD_splitBlock_byChunks(blockStart, blockSize, level-1, workspace, wkspSize); |
238 | 0 | } |