/src/zstd/lib/compress/zstd_double_fast.c
Line | Count | Source (jump to first uncovered line) |
1 | | /* |
2 | | * Copyright (c) Meta Platforms, Inc. and affiliates. |
3 | | * All rights reserved. |
4 | | * |
5 | | * This source code is licensed under both the BSD-style license (found in the |
6 | | * LICENSE file in the root directory of this source tree) and the GPLv2 (found |
7 | | * in the COPYING file in the root directory of this source tree). |
8 | | * You may select, at your option, one of the above-listed licenses. |
9 | | */ |
10 | | |
11 | | #include "zstd_compress_internal.h" |
12 | | #include "zstd_double_fast.h" |
13 | | |
14 | | #ifndef ZSTD_EXCLUDE_DFAST_BLOCK_COMPRESSOR |
15 | | |
16 | | static void ZSTD_fillDoubleHashTableForCDict(ZSTD_matchState_t* ms, |
17 | | void const* end, ZSTD_dictTableLoadMethod_e dtlm) |
18 | 0 | { |
19 | 0 | const ZSTD_compressionParameters* const cParams = &ms->cParams; |
20 | 0 | U32* const hashLarge = ms->hashTable; |
21 | 0 | U32 const hBitsL = cParams->hashLog + ZSTD_SHORT_CACHE_TAG_BITS; |
22 | 0 | U32 const mls = cParams->minMatch; |
23 | 0 | U32* const hashSmall = ms->chainTable; |
24 | 0 | U32 const hBitsS = cParams->chainLog + ZSTD_SHORT_CACHE_TAG_BITS; |
25 | 0 | const BYTE* const base = ms->window.base; |
26 | 0 | const BYTE* ip = base + ms->nextToUpdate; |
27 | 0 | const BYTE* const iend = ((const BYTE*)end) - HASH_READ_SIZE; |
28 | 0 | const U32 fastHashFillStep = 3; |
29 | | |
30 | | /* Always insert every fastHashFillStep position into the hash tables. |
31 | | * Insert the other positions into the large hash table if their entry |
32 | | * is empty. |
33 | | */ |
34 | 0 | for (; ip + fastHashFillStep - 1 <= iend; ip += fastHashFillStep) { |
35 | 0 | U32 const curr = (U32)(ip - base); |
36 | 0 | U32 i; |
37 | 0 | for (i = 0; i < fastHashFillStep; ++i) { |
38 | 0 | size_t const smHashAndTag = ZSTD_hashPtr(ip + i, hBitsS, mls); |
39 | 0 | size_t const lgHashAndTag = ZSTD_hashPtr(ip + i, hBitsL, 8); |
40 | 0 | if (i == 0) { |
41 | 0 | ZSTD_writeTaggedIndex(hashSmall, smHashAndTag, curr + i); |
42 | 0 | } |
43 | 0 | if (i == 0 || hashLarge[lgHashAndTag >> ZSTD_SHORT_CACHE_TAG_BITS] == 0) { |
44 | 0 | ZSTD_writeTaggedIndex(hashLarge, lgHashAndTag, curr + i); |
45 | 0 | } |
46 | | /* Only load extra positions for ZSTD_dtlm_full */ |
47 | 0 | if (dtlm == ZSTD_dtlm_fast) |
48 | 0 | break; |
49 | 0 | } } |
50 | 0 | } |
51 | | |
52 | | static void ZSTD_fillDoubleHashTableForCCtx(ZSTD_matchState_t* ms, |
53 | | void const* end, ZSTD_dictTableLoadMethod_e dtlm) |
54 | 925k | { |
55 | 925k | const ZSTD_compressionParameters* const cParams = &ms->cParams; |
56 | 925k | U32* const hashLarge = ms->hashTable; |
57 | 925k | U32 const hBitsL = cParams->hashLog; |
58 | 925k | U32 const mls = cParams->minMatch; |
59 | 925k | U32* const hashSmall = ms->chainTable; |
60 | 925k | U32 const hBitsS = cParams->chainLog; |
61 | 925k | const BYTE* const base = ms->window.base; |
62 | 925k | const BYTE* ip = base + ms->nextToUpdate; |
63 | 925k | const BYTE* const iend = ((const BYTE*)end) - HASH_READ_SIZE; |
64 | 925k | const U32 fastHashFillStep = 3; |
65 | | |
66 | | /* Always insert every fastHashFillStep position into the hash tables. |
67 | | * Insert the other positions into the large hash table if their entry |
68 | | * is empty. |
69 | | */ |
70 | 150M | for (; ip + fastHashFillStep - 1 <= iend; ip += fastHashFillStep) { |
71 | 149M | U32 const curr = (U32)(ip - base); |
72 | 149M | U32 i; |
73 | 149M | for (i = 0; i < fastHashFillStep; ++i) { |
74 | 149M | size_t const smHash = ZSTD_hashPtr(ip + i, hBitsS, mls); |
75 | 149M | size_t const lgHash = ZSTD_hashPtr(ip + i, hBitsL, 8); |
76 | 149M | if (i == 0) |
77 | 149M | hashSmall[smHash] = curr + i; |
78 | 149M | if (i == 0 || hashLarge[lgHash] == 0) |
79 | 149M | hashLarge[lgHash] = curr + i; |
80 | | /* Only load extra positions for ZSTD_dtlm_full */ |
81 | 149M | if (dtlm == ZSTD_dtlm_fast) |
82 | 149M | break; |
83 | 149M | } } |
84 | 925k | } |
85 | | |
86 | | void ZSTD_fillDoubleHashTable(ZSTD_matchState_t* ms, |
87 | | const void* const end, |
88 | | ZSTD_dictTableLoadMethod_e dtlm, |
89 | | ZSTD_tableFillPurpose_e tfp) |
90 | 925k | { |
91 | 925k | if (tfp == ZSTD_tfp_forCDict) { |
92 | 0 | ZSTD_fillDoubleHashTableForCDict(ms, end, dtlm); |
93 | 925k | } else { |
94 | 925k | ZSTD_fillDoubleHashTableForCCtx(ms, end, dtlm); |
95 | 925k | } |
96 | 925k | } |
97 | | |
98 | | |
99 | | FORCE_INLINE_TEMPLATE |
100 | | size_t ZSTD_compressBlock_doubleFast_noDict_generic( |
101 | | ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], |
102 | | void const* src, size_t srcSize, U32 const mls /* template */) |
103 | 809k | { |
104 | 809k | ZSTD_compressionParameters const* cParams = &ms->cParams; |
105 | 809k | U32* const hashLong = ms->hashTable; |
106 | 809k | const U32 hBitsL = cParams->hashLog; |
107 | 809k | U32* const hashSmall = ms->chainTable; |
108 | 809k | const U32 hBitsS = cParams->chainLog; |
109 | 809k | const BYTE* const base = ms->window.base; |
110 | 809k | const BYTE* const istart = (const BYTE*)src; |
111 | 809k | const BYTE* anchor = istart; |
112 | 809k | const U32 endIndex = (U32)((size_t)(istart - base) + srcSize); |
113 | | /* presumes that, if there is a dictionary, it must be using Attach mode */ |
114 | 809k | const U32 prefixLowestIndex = ZSTD_getLowestPrefixIndex(ms, endIndex, cParams->windowLog); |
115 | 809k | const BYTE* const prefixLowest = base + prefixLowestIndex; |
116 | 809k | const BYTE* const iend = istart + srcSize; |
117 | 809k | const BYTE* const ilimit = iend - HASH_READ_SIZE; |
118 | 809k | U32 offset_1=rep[0], offset_2=rep[1]; |
119 | 809k | U32 offsetSaved1 = 0, offsetSaved2 = 0; |
120 | | |
121 | 809k | size_t mLength; |
122 | 809k | U32 offset; |
123 | 809k | U32 curr; |
124 | | |
125 | | /* how many positions to search before increasing step size */ |
126 | 809k | const size_t kStepIncr = 1 << kSearchStrength; |
127 | | /* the position at which to increment the step size if no match is found */ |
128 | 809k | const BYTE* nextStep; |
129 | 809k | size_t step; /* the current step size */ |
130 | | |
131 | 809k | size_t hl0; /* the long hash at ip */ |
132 | 809k | size_t hl1; /* the long hash at ip1 */ |
133 | | |
134 | 809k | U32 idxl0; /* the long match index for ip */ |
135 | 809k | U32 idxl1; /* the long match index for ip1 */ |
136 | | |
137 | 809k | const BYTE* matchl0; /* the long match for ip */ |
138 | 809k | const BYTE* matchs0; /* the short match for ip */ |
139 | 809k | const BYTE* matchl1; /* the long match for ip1 */ |
140 | | |
141 | 809k | const BYTE* ip = istart; /* the current position */ |
142 | 809k | const BYTE* ip1; /* the next position */ |
143 | | |
144 | 809k | DEBUGLOG(5, "ZSTD_compressBlock_doubleFast_noDict_generic"); |
145 | | |
146 | | /* init */ |
147 | 809k | ip += ((ip - prefixLowest) == 0); |
148 | 809k | { |
149 | 809k | U32 const current = (U32)(ip - base); |
150 | 809k | U32 const windowLow = ZSTD_getLowestPrefixIndex(ms, current, cParams->windowLog); |
151 | 809k | U32 const maxRep = current - windowLow; |
152 | 809k | if (offset_2 > maxRep) offsetSaved2 = offset_2, offset_2 = 0; |
153 | 809k | if (offset_1 > maxRep) offsetSaved1 = offset_1, offset_1 = 0; |
154 | 809k | } |
155 | | |
156 | | /* Outer Loop: one iteration per match found and stored */ |
157 | 2.72M | while (1) { |
158 | 2.72M | step = 1; |
159 | 2.72M | nextStep = ip + kStepIncr; |
160 | 2.72M | ip1 = ip + step; |
161 | | |
162 | 2.72M | if (ip1 > ilimit) { |
163 | 734k | goto _cleanup; |
164 | 734k | } |
165 | | |
166 | 1.98M | hl0 = ZSTD_hashPtr(ip, hBitsL, 8); |
167 | 1.98M | idxl0 = hashLong[hl0]; |
168 | 1.98M | matchl0 = base + idxl0; |
169 | | |
170 | | /* Inner Loop: one iteration per search / position */ |
171 | 15.1M | do { |
172 | 15.1M | const size_t hs0 = ZSTD_hashPtr(ip, hBitsS, mls); |
173 | 15.1M | const U32 idxs0 = hashSmall[hs0]; |
174 | 15.1M | curr = (U32)(ip-base); |
175 | 15.1M | matchs0 = base + idxs0; |
176 | | |
177 | 15.1M | hashLong[hl0] = hashSmall[hs0] = curr; /* update hash tables */ |
178 | | |
179 | | /* check noDict repcode */ |
180 | 15.1M | if ((offset_1 > 0) & (MEM_read32(ip+1-offset_1) == MEM_read32(ip+1))) { |
181 | 700k | mLength = ZSTD_count(ip+1+4, ip+1+4-offset_1, iend) + 4; |
182 | 700k | ip++; |
183 | 700k | ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, REPCODE1_TO_OFFBASE, mLength); |
184 | 0 | goto _match_stored; |
185 | 700k | } |
186 | | |
187 | 14.4M | hl1 = ZSTD_hashPtr(ip1, hBitsL, 8); |
188 | | |
189 | 14.4M | if (idxl0 > prefixLowestIndex) { |
190 | | /* check prefix long match */ |
191 | 10.7M | if (MEM_read64(matchl0) == MEM_read64(ip)) { |
192 | 374k | mLength = ZSTD_count(ip+8, matchl0+8, iend) + 8; |
193 | 374k | offset = (U32)(ip-matchl0); |
194 | 460k | while (((ip>anchor) & (matchl0>prefixLowest)) && (ip[-1] == matchl0[-1])) { ip--; matchl0--; mLength++; } /* catch up */ |
195 | 374k | goto _match_found; |
196 | 374k | } |
197 | 10.7M | } |
198 | | |
199 | 14.1M | idxl1 = hashLong[hl1]; |
200 | 14.1M | matchl1 = base + idxl1; |
201 | | |
202 | 14.1M | if (idxs0 > prefixLowestIndex) { |
203 | | /* check prefix short match */ |
204 | 8.44M | if (MEM_read32(matchs0) == MEM_read32(ip)) { |
205 | 837k | goto _search_next_long; |
206 | 837k | } |
207 | 8.44M | } |
208 | | |
209 | 13.2M | if (ip1 >= nextStep) { |
210 | 27.7k | PREFETCH_L1(ip1 + 64); |
211 | 27.7k | PREFETCH_L1(ip1 + 128); |
212 | 27.7k | step++; |
213 | 27.7k | nextStep += kStepIncr; |
214 | 27.7k | } |
215 | 13.2M | ip = ip1; |
216 | 13.2M | ip1 += step; |
217 | | |
218 | 13.2M | hl0 = hl1; |
219 | 13.2M | idxl0 = idxl1; |
220 | 13.2M | matchl0 = matchl1; |
221 | | #if defined(__aarch64__) |
222 | | PREFETCH_L1(ip+256); |
223 | | #endif |
224 | 13.2M | } while (ip1 <= ilimit); |
225 | | |
226 | 809k | _cleanup: |
227 | | /* If offset_1 started invalid (offsetSaved1 != 0) and became valid (offset_1 != 0), |
228 | | * rotate saved offsets. See comment in ZSTD_compressBlock_fast_noDict for more context. */ |
229 | 809k | offsetSaved2 = ((offsetSaved1 != 0) && (offset_1 != 0)) ? offsetSaved1 : offsetSaved2; |
230 | | |
231 | | /* save reps for next block */ |
232 | 809k | rep[0] = offset_1 ? offset_1 : offsetSaved1; |
233 | 809k | rep[1] = offset_2 ? offset_2 : offsetSaved2; |
234 | | |
235 | | /* Return the last literals size */ |
236 | 809k | return (size_t)(iend - anchor); |
237 | | |
238 | 837k | _search_next_long: |
239 | | |
240 | | /* check prefix long +1 match */ |
241 | 837k | if (idxl1 > prefixLowestIndex) { |
242 | 731k | if (MEM_read64(matchl1) == MEM_read64(ip1)) { |
243 | 104k | ip = ip1; |
244 | 104k | mLength = ZSTD_count(ip+8, matchl1+8, iend) + 8; |
245 | 104k | offset = (U32)(ip-matchl1); |
246 | 179k | while (((ip>anchor) & (matchl1>prefixLowest)) && (ip[-1] == matchl1[-1])) { ip--; matchl1--; mLength++; } /* catch up */ |
247 | 104k | goto _match_found; |
248 | 104k | } |
249 | 731k | } |
250 | | |
251 | | /* if no long +1 match, explore the short match we found */ |
252 | 732k | mLength = ZSTD_count(ip+4, matchs0+4, iend) + 4; |
253 | 732k | offset = (U32)(ip - matchs0); |
254 | 828k | while (((ip>anchor) & (matchs0>prefixLowest)) && (ip[-1] == matchs0[-1])) { ip--; matchs0--; mLength++; } /* catch up */ |
255 | | |
256 | | /* fall-through */ |
257 | | |
258 | 1.21M | _match_found: /* requires ip, offset, mLength */ |
259 | 1.21M | offset_2 = offset_1; |
260 | 1.21M | offset_1 = offset; |
261 | | |
262 | 1.21M | if (step < 4) { |
263 | | /* It is unsafe to write this value back to the hashtable when ip1 is |
264 | | * greater than or equal to the new ip we will have after we're done |
265 | | * processing this match. Rather than perform that test directly |
266 | | * (ip1 >= ip + mLength), which costs speed in practice, we do a simpler |
267 | | * more predictable test. The minmatch even if we take a short match is |
268 | | * 4 bytes, so as long as step, the distance between ip and ip1 |
269 | | * (initially) is less than 4, we know ip1 < new ip. */ |
270 | 1.21M | hashLong[hl1] = (U32)(ip1 - base); |
271 | 1.21M | } |
272 | | |
273 | 1.21M | ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, OFFSET_TO_OFFBASE(offset), mLength); |
274 | | |
275 | 1.91M | _match_stored: |
276 | | /* match found */ |
277 | 1.91M | ip += mLength; |
278 | 1.91M | anchor = ip; |
279 | | |
280 | 1.91M | if (ip <= ilimit) { |
281 | | /* Complementary insertion */ |
282 | | /* done after iLimit test, as candidates could be > iend-8 */ |
283 | 1.61M | { U32 const indexToInsert = curr+2; |
284 | 1.61M | hashLong[ZSTD_hashPtr(base+indexToInsert, hBitsL, 8)] = indexToInsert; |
285 | 1.61M | hashLong[ZSTD_hashPtr(ip-2, hBitsL, 8)] = (U32)(ip-2-base); |
286 | 1.61M | hashSmall[ZSTD_hashPtr(base+indexToInsert, hBitsS, mls)] = indexToInsert; |
287 | 1.61M | hashSmall[ZSTD_hashPtr(ip-1, hBitsS, mls)] = (U32)(ip-1-base); |
288 | 1.61M | } |
289 | | |
290 | | /* check immediate repcode */ |
291 | 1.76M | while ( (ip <= ilimit) |
292 | 1.76M | && ( (offset_2>0) |
293 | 1.73M | & (MEM_read32(ip) == MEM_read32(ip - offset_2)) )) { |
294 | | /* store sequence */ |
295 | 148k | size_t const rLength = ZSTD_count(ip+4, ip+4-offset_2, iend) + 4; |
296 | 148k | U32 const tmpOff = offset_2; offset_2 = offset_1; offset_1 = tmpOff; /* swap offset_2 <=> offset_1 */ |
297 | 148k | hashSmall[ZSTD_hashPtr(ip, hBitsS, mls)] = (U32)(ip-base); |
298 | 148k | hashLong[ZSTD_hashPtr(ip, hBitsL, 8)] = (U32)(ip-base); |
299 | 148k | ZSTD_storeSeq(seqStore, 0, anchor, iend, REPCODE1_TO_OFFBASE, rLength); |
300 | 0 | ip += rLength; |
301 | 148k | anchor = ip; |
302 | 148k | continue; /* faster when present ... (?) */ |
303 | 148k | } |
304 | 1.61M | } |
305 | 1.91M | } |
306 | 809k | } |
307 | | |
308 | | |
309 | | FORCE_INLINE_TEMPLATE |
310 | | size_t ZSTD_compressBlock_doubleFast_dictMatchState_generic( |
311 | | ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], |
312 | | void const* src, size_t srcSize, |
313 | | U32 const mls /* template */) |
314 | 0 | { |
315 | 0 | ZSTD_compressionParameters const* cParams = &ms->cParams; |
316 | 0 | U32* const hashLong = ms->hashTable; |
317 | 0 | const U32 hBitsL = cParams->hashLog; |
318 | 0 | U32* const hashSmall = ms->chainTable; |
319 | 0 | const U32 hBitsS = cParams->chainLog; |
320 | 0 | const BYTE* const base = ms->window.base; |
321 | 0 | const BYTE* const istart = (const BYTE*)src; |
322 | 0 | const BYTE* ip = istart; |
323 | 0 | const BYTE* anchor = istart; |
324 | 0 | const U32 endIndex = (U32)((size_t)(istart - base) + srcSize); |
325 | | /* presumes that, if there is a dictionary, it must be using Attach mode */ |
326 | 0 | const U32 prefixLowestIndex = ZSTD_getLowestPrefixIndex(ms, endIndex, cParams->windowLog); |
327 | 0 | const BYTE* const prefixLowest = base + prefixLowestIndex; |
328 | 0 | const BYTE* const iend = istart + srcSize; |
329 | 0 | const BYTE* const ilimit = iend - HASH_READ_SIZE; |
330 | 0 | U32 offset_1=rep[0], offset_2=rep[1]; |
331 | |
|
332 | 0 | const ZSTD_matchState_t* const dms = ms->dictMatchState; |
333 | 0 | const ZSTD_compressionParameters* const dictCParams = &dms->cParams; |
334 | 0 | const U32* const dictHashLong = dms->hashTable; |
335 | 0 | const U32* const dictHashSmall = dms->chainTable; |
336 | 0 | const U32 dictStartIndex = dms->window.dictLimit; |
337 | 0 | const BYTE* const dictBase = dms->window.base; |
338 | 0 | const BYTE* const dictStart = dictBase + dictStartIndex; |
339 | 0 | const BYTE* const dictEnd = dms->window.nextSrc; |
340 | 0 | const U32 dictIndexDelta = prefixLowestIndex - (U32)(dictEnd - dictBase); |
341 | 0 | const U32 dictHBitsL = dictCParams->hashLog + ZSTD_SHORT_CACHE_TAG_BITS; |
342 | 0 | const U32 dictHBitsS = dictCParams->chainLog + ZSTD_SHORT_CACHE_TAG_BITS; |
343 | 0 | const U32 dictAndPrefixLength = (U32)((ip - prefixLowest) + (dictEnd - dictStart)); |
344 | |
|
345 | 0 | DEBUGLOG(5, "ZSTD_compressBlock_doubleFast_dictMatchState_generic"); |
346 | | |
347 | | /* if a dictionary is attached, it must be within window range */ |
348 | 0 | assert(ms->window.dictLimit + (1U << cParams->windowLog) >= endIndex); |
349 | | |
350 | 0 | if (ms->prefetchCDictTables) { |
351 | 0 | size_t const hashTableBytes = (((size_t)1) << dictCParams->hashLog) * sizeof(U32); |
352 | 0 | size_t const chainTableBytes = (((size_t)1) << dictCParams->chainLog) * sizeof(U32); |
353 | 0 | PREFETCH_AREA(dictHashLong, hashTableBytes) |
354 | 0 | PREFETCH_AREA(dictHashSmall, chainTableBytes) |
355 | 0 | } |
356 | | |
357 | | /* init */ |
358 | 0 | ip += (dictAndPrefixLength == 0); |
359 | | |
360 | | /* dictMatchState repCode checks don't currently handle repCode == 0 |
361 | | * disabling. */ |
362 | 0 | assert(offset_1 <= dictAndPrefixLength); |
363 | 0 | assert(offset_2 <= dictAndPrefixLength); |
364 | | |
365 | | /* Main Search Loop */ |
366 | 0 | while (ip < ilimit) { /* < instead of <=, because repcode check at (ip+1) */ |
367 | 0 | size_t mLength; |
368 | 0 | U32 offset; |
369 | 0 | size_t const h2 = ZSTD_hashPtr(ip, hBitsL, 8); |
370 | 0 | size_t const h = ZSTD_hashPtr(ip, hBitsS, mls); |
371 | 0 | size_t const dictHashAndTagL = ZSTD_hashPtr(ip, dictHBitsL, 8); |
372 | 0 | size_t const dictHashAndTagS = ZSTD_hashPtr(ip, dictHBitsS, mls); |
373 | 0 | U32 const dictMatchIndexAndTagL = dictHashLong[dictHashAndTagL >> ZSTD_SHORT_CACHE_TAG_BITS]; |
374 | 0 | U32 const dictMatchIndexAndTagS = dictHashSmall[dictHashAndTagS >> ZSTD_SHORT_CACHE_TAG_BITS]; |
375 | 0 | int const dictTagsMatchL = ZSTD_comparePackedTags(dictMatchIndexAndTagL, dictHashAndTagL); |
376 | 0 | int const dictTagsMatchS = ZSTD_comparePackedTags(dictMatchIndexAndTagS, dictHashAndTagS); |
377 | 0 | U32 const curr = (U32)(ip-base); |
378 | 0 | U32 const matchIndexL = hashLong[h2]; |
379 | 0 | U32 matchIndexS = hashSmall[h]; |
380 | 0 | const BYTE* matchLong = base + matchIndexL; |
381 | 0 | const BYTE* match = base + matchIndexS; |
382 | 0 | const U32 repIndex = curr + 1 - offset_1; |
383 | 0 | const BYTE* repMatch = (repIndex < prefixLowestIndex) ? |
384 | 0 | dictBase + (repIndex - dictIndexDelta) : |
385 | 0 | base + repIndex; |
386 | 0 | hashLong[h2] = hashSmall[h] = curr; /* update hash tables */ |
387 | | |
388 | | /* check repcode */ |
389 | 0 | if (((U32)((prefixLowestIndex-1) - repIndex) >= 3 /* intentional underflow */) |
390 | 0 | && (MEM_read32(repMatch) == MEM_read32(ip+1)) ) { |
391 | 0 | const BYTE* repMatchEnd = repIndex < prefixLowestIndex ? dictEnd : iend; |
392 | 0 | mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, prefixLowest) + 4; |
393 | 0 | ip++; |
394 | 0 | ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, REPCODE1_TO_OFFBASE, mLength); |
395 | 0 | goto _match_stored; |
396 | 0 | } |
397 | | |
398 | 0 | if (matchIndexL > prefixLowestIndex) { |
399 | | /* check prefix long match */ |
400 | 0 | if (MEM_read64(matchLong) == MEM_read64(ip)) { |
401 | 0 | mLength = ZSTD_count(ip+8, matchLong+8, iend) + 8; |
402 | 0 | offset = (U32)(ip-matchLong); |
403 | 0 | while (((ip>anchor) & (matchLong>prefixLowest)) && (ip[-1] == matchLong[-1])) { ip--; matchLong--; mLength++; } /* catch up */ |
404 | 0 | goto _match_found; |
405 | 0 | } |
406 | 0 | } else if (dictTagsMatchL) { |
407 | | /* check dictMatchState long match */ |
408 | 0 | U32 const dictMatchIndexL = dictMatchIndexAndTagL >> ZSTD_SHORT_CACHE_TAG_BITS; |
409 | 0 | const BYTE* dictMatchL = dictBase + dictMatchIndexL; |
410 | 0 | assert(dictMatchL < dictEnd); |
411 | | |
412 | 0 | if (dictMatchL > dictStart && MEM_read64(dictMatchL) == MEM_read64(ip)) { |
413 | 0 | mLength = ZSTD_count_2segments(ip+8, dictMatchL+8, iend, dictEnd, prefixLowest) + 8; |
414 | 0 | offset = (U32)(curr - dictMatchIndexL - dictIndexDelta); |
415 | 0 | while (((ip>anchor) & (dictMatchL>dictStart)) && (ip[-1] == dictMatchL[-1])) { ip--; dictMatchL--; mLength++; } /* catch up */ |
416 | 0 | goto _match_found; |
417 | 0 | } } |
418 | | |
419 | 0 | if (matchIndexS > prefixLowestIndex) { |
420 | | /* check prefix short match */ |
421 | 0 | if (MEM_read32(match) == MEM_read32(ip)) { |
422 | 0 | goto _search_next_long; |
423 | 0 | } |
424 | 0 | } else if (dictTagsMatchS) { |
425 | | /* check dictMatchState short match */ |
426 | 0 | U32 const dictMatchIndexS = dictMatchIndexAndTagS >> ZSTD_SHORT_CACHE_TAG_BITS; |
427 | 0 | match = dictBase + dictMatchIndexS; |
428 | 0 | matchIndexS = dictMatchIndexS + dictIndexDelta; |
429 | |
|
430 | 0 | if (match > dictStart && MEM_read32(match) == MEM_read32(ip)) { |
431 | 0 | goto _search_next_long; |
432 | 0 | } } |
433 | | |
434 | 0 | ip += ((ip-anchor) >> kSearchStrength) + 1; |
435 | | #if defined(__aarch64__) |
436 | | PREFETCH_L1(ip+256); |
437 | | #endif |
438 | 0 | continue; |
439 | | |
440 | 0 | _search_next_long: |
441 | 0 | { size_t const hl3 = ZSTD_hashPtr(ip+1, hBitsL, 8); |
442 | 0 | size_t const dictHashAndTagL3 = ZSTD_hashPtr(ip+1, dictHBitsL, 8); |
443 | 0 | U32 const matchIndexL3 = hashLong[hl3]; |
444 | 0 | U32 const dictMatchIndexAndTagL3 = dictHashLong[dictHashAndTagL3 >> ZSTD_SHORT_CACHE_TAG_BITS]; |
445 | 0 | int const dictTagsMatchL3 = ZSTD_comparePackedTags(dictMatchIndexAndTagL3, dictHashAndTagL3); |
446 | 0 | const BYTE* matchL3 = base + matchIndexL3; |
447 | 0 | hashLong[hl3] = curr + 1; |
448 | | |
449 | | /* check prefix long +1 match */ |
450 | 0 | if (matchIndexL3 > prefixLowestIndex) { |
451 | 0 | if (MEM_read64(matchL3) == MEM_read64(ip+1)) { |
452 | 0 | mLength = ZSTD_count(ip+9, matchL3+8, iend) + 8; |
453 | 0 | ip++; |
454 | 0 | offset = (U32)(ip-matchL3); |
455 | 0 | while (((ip>anchor) & (matchL3>prefixLowest)) && (ip[-1] == matchL3[-1])) { ip--; matchL3--; mLength++; } /* catch up */ |
456 | 0 | goto _match_found; |
457 | 0 | } |
458 | 0 | } else if (dictTagsMatchL3) { |
459 | | /* check dict long +1 match */ |
460 | 0 | U32 const dictMatchIndexL3 = dictMatchIndexAndTagL3 >> ZSTD_SHORT_CACHE_TAG_BITS; |
461 | 0 | const BYTE* dictMatchL3 = dictBase + dictMatchIndexL3; |
462 | 0 | assert(dictMatchL3 < dictEnd); |
463 | 0 | if (dictMatchL3 > dictStart && MEM_read64(dictMatchL3) == MEM_read64(ip+1)) { |
464 | 0 | mLength = ZSTD_count_2segments(ip+1+8, dictMatchL3+8, iend, dictEnd, prefixLowest) + 8; |
465 | 0 | ip++; |
466 | 0 | offset = (U32)(curr + 1 - dictMatchIndexL3 - dictIndexDelta); |
467 | 0 | while (((ip>anchor) & (dictMatchL3>dictStart)) && (ip[-1] == dictMatchL3[-1])) { ip--; dictMatchL3--; mLength++; } /* catch up */ |
468 | 0 | goto _match_found; |
469 | 0 | } } } |
470 | | |
471 | | /* if no long +1 match, explore the short match we found */ |
472 | 0 | if (matchIndexS < prefixLowestIndex) { |
473 | 0 | mLength = ZSTD_count_2segments(ip+4, match+4, iend, dictEnd, prefixLowest) + 4; |
474 | 0 | offset = (U32)(curr - matchIndexS); |
475 | 0 | while (((ip>anchor) & (match>dictStart)) && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */ |
476 | 0 | } else { |
477 | 0 | mLength = ZSTD_count(ip+4, match+4, iend) + 4; |
478 | 0 | offset = (U32)(ip - match); |
479 | 0 | while (((ip>anchor) & (match>prefixLowest)) && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */ |
480 | 0 | } |
481 | |
|
482 | 0 | _match_found: |
483 | 0 | offset_2 = offset_1; |
484 | 0 | offset_1 = offset; |
485 | |
|
486 | 0 | ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, OFFSET_TO_OFFBASE(offset), mLength); |
487 | | |
488 | 0 | _match_stored: |
489 | | /* match found */ |
490 | 0 | ip += mLength; |
491 | 0 | anchor = ip; |
492 | |
|
493 | 0 | if (ip <= ilimit) { |
494 | | /* Complementary insertion */ |
495 | | /* done after iLimit test, as candidates could be > iend-8 */ |
496 | 0 | { U32 const indexToInsert = curr+2; |
497 | 0 | hashLong[ZSTD_hashPtr(base+indexToInsert, hBitsL, 8)] = indexToInsert; |
498 | 0 | hashLong[ZSTD_hashPtr(ip-2, hBitsL, 8)] = (U32)(ip-2-base); |
499 | 0 | hashSmall[ZSTD_hashPtr(base+indexToInsert, hBitsS, mls)] = indexToInsert; |
500 | 0 | hashSmall[ZSTD_hashPtr(ip-1, hBitsS, mls)] = (U32)(ip-1-base); |
501 | 0 | } |
502 | | |
503 | | /* check immediate repcode */ |
504 | 0 | while (ip <= ilimit) { |
505 | 0 | U32 const current2 = (U32)(ip-base); |
506 | 0 | U32 const repIndex2 = current2 - offset_2; |
507 | 0 | const BYTE* repMatch2 = repIndex2 < prefixLowestIndex ? |
508 | 0 | dictBase + repIndex2 - dictIndexDelta : |
509 | 0 | base + repIndex2; |
510 | 0 | if ( ((U32)((prefixLowestIndex-1) - (U32)repIndex2) >= 3 /* intentional overflow */) |
511 | 0 | && (MEM_read32(repMatch2) == MEM_read32(ip)) ) { |
512 | 0 | const BYTE* const repEnd2 = repIndex2 < prefixLowestIndex ? dictEnd : iend; |
513 | 0 | size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, prefixLowest) + 4; |
514 | 0 | U32 tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset; /* swap offset_2 <=> offset_1 */ |
515 | 0 | ZSTD_storeSeq(seqStore, 0, anchor, iend, REPCODE1_TO_OFFBASE, repLength2); |
516 | 0 | hashSmall[ZSTD_hashPtr(ip, hBitsS, mls)] = current2; |
517 | 0 | hashLong[ZSTD_hashPtr(ip, hBitsL, 8)] = current2; |
518 | 0 | ip += repLength2; |
519 | 0 | anchor = ip; |
520 | 0 | continue; |
521 | 0 | } |
522 | 0 | break; |
523 | 0 | } |
524 | 0 | } |
525 | 0 | } /* while (ip < ilimit) */ |
526 | | |
527 | | /* save reps for next block */ |
528 | 0 | rep[0] = offset_1; |
529 | 0 | rep[1] = offset_2; |
530 | | |
531 | | /* Return the last literals size */ |
532 | 0 | return (size_t)(iend - anchor); |
533 | 0 | } |
534 | | |
535 | | #define ZSTD_GEN_DFAST_FN(dictMode, mls) \ |
536 | | static size_t ZSTD_compressBlock_doubleFast_##dictMode##_##mls( \ |
537 | | ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], \ |
538 | | void const* src, size_t srcSize) \ |
539 | 974k | { \ |
540 | 974k | return ZSTD_compressBlock_doubleFast_##dictMode##_generic(ms, seqStore, rep, src, srcSize, mls); \ |
541 | 974k | } zstd_double_fast.c:ZSTD_compressBlock_doubleFast_noDict_4 Line | Count | Source | 539 | 185k | { \ | 540 | 185k | return ZSTD_compressBlock_doubleFast_##dictMode##_generic(ms, seqStore, rep, src, srcSize, mls); \ | 541 | 185k | } |
zstd_double_fast.c:ZSTD_compressBlock_doubleFast_noDict_5 Line | Count | Source | 539 | 176k | { \ | 540 | 176k | return ZSTD_compressBlock_doubleFast_##dictMode##_generic(ms, seqStore, rep, src, srcSize, mls); \ | 541 | 176k | } |
zstd_double_fast.c:ZSTD_compressBlock_doubleFast_noDict_6 Line | Count | Source | 539 | 190k | { \ | 540 | 190k | return ZSTD_compressBlock_doubleFast_##dictMode##_generic(ms, seqStore, rep, src, srcSize, mls); \ | 541 | 190k | } |
zstd_double_fast.c:ZSTD_compressBlock_doubleFast_noDict_7 Line | Count | Source | 539 | 257k | { \ | 540 | 257k | return ZSTD_compressBlock_doubleFast_##dictMode##_generic(ms, seqStore, rep, src, srcSize, mls); \ | 541 | 257k | } |
Unexecuted instantiation: zstd_double_fast.c:ZSTD_compressBlock_doubleFast_dictMatchState_4 Unexecuted instantiation: zstd_double_fast.c:ZSTD_compressBlock_doubleFast_dictMatchState_5 Unexecuted instantiation: zstd_double_fast.c:ZSTD_compressBlock_doubleFast_dictMatchState_6 Unexecuted instantiation: zstd_double_fast.c:ZSTD_compressBlock_doubleFast_dictMatchState_7 zstd_double_fast.c:ZSTD_compressBlock_doubleFast_extDict_4 Line | Count | Source | 539 | 46.7k | { \ | 540 | 46.7k | return ZSTD_compressBlock_doubleFast_##dictMode##_generic(ms, seqStore, rep, src, srcSize, mls); \ | 541 | 46.7k | } |
zstd_double_fast.c:ZSTD_compressBlock_doubleFast_extDict_5 Line | Count | Source | 539 | 32.9k | { \ | 540 | 32.9k | return ZSTD_compressBlock_doubleFast_##dictMode##_generic(ms, seqStore, rep, src, srcSize, mls); \ | 541 | 32.9k | } |
zstd_double_fast.c:ZSTD_compressBlock_doubleFast_extDict_6 Line | Count | Source | 539 | 33.8k | { \ | 540 | 33.8k | return ZSTD_compressBlock_doubleFast_##dictMode##_generic(ms, seqStore, rep, src, srcSize, mls); \ | 541 | 33.8k | } |
zstd_double_fast.c:ZSTD_compressBlock_doubleFast_extDict_7 Line | Count | Source | 539 | 51.3k | { \ | 540 | 51.3k | return ZSTD_compressBlock_doubleFast_##dictMode##_generic(ms, seqStore, rep, src, srcSize, mls); \ | 541 | 51.3k | } |
|
542 | | |
543 | | ZSTD_GEN_DFAST_FN(noDict, 4) |
544 | | ZSTD_GEN_DFAST_FN(noDict, 5) |
545 | | ZSTD_GEN_DFAST_FN(noDict, 6) |
546 | | ZSTD_GEN_DFAST_FN(noDict, 7) |
547 | | |
548 | | ZSTD_GEN_DFAST_FN(dictMatchState, 4) |
549 | | ZSTD_GEN_DFAST_FN(dictMatchState, 5) |
550 | | ZSTD_GEN_DFAST_FN(dictMatchState, 6) |
551 | | ZSTD_GEN_DFAST_FN(dictMatchState, 7) |
552 | | |
553 | | |
554 | | size_t ZSTD_compressBlock_doubleFast( |
555 | | ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], |
556 | | void const* src, size_t srcSize) |
557 | 809k | { |
558 | 809k | const U32 mls = ms->cParams.minMatch; |
559 | 809k | switch(mls) |
560 | 809k | { |
561 | 161k | default: /* includes case 3 */ |
562 | 185k | case 4 : |
563 | 185k | return ZSTD_compressBlock_doubleFast_noDict_4(ms, seqStore, rep, src, srcSize); |
564 | 176k | case 5 : |
565 | 176k | return ZSTD_compressBlock_doubleFast_noDict_5(ms, seqStore, rep, src, srcSize); |
566 | 190k | case 6 : |
567 | 190k | return ZSTD_compressBlock_doubleFast_noDict_6(ms, seqStore, rep, src, srcSize); |
568 | 257k | case 7 : |
569 | 257k | return ZSTD_compressBlock_doubleFast_noDict_7(ms, seqStore, rep, src, srcSize); |
570 | 809k | } |
571 | 809k | } |
572 | | |
573 | | |
574 | | size_t ZSTD_compressBlock_doubleFast_dictMatchState( |
575 | | ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], |
576 | | void const* src, size_t srcSize) |
577 | 0 | { |
578 | 0 | const U32 mls = ms->cParams.minMatch; |
579 | 0 | switch(mls) |
580 | 0 | { |
581 | 0 | default: /* includes case 3 */ |
582 | 0 | case 4 : |
583 | 0 | return ZSTD_compressBlock_doubleFast_dictMatchState_4(ms, seqStore, rep, src, srcSize); |
584 | 0 | case 5 : |
585 | 0 | return ZSTD_compressBlock_doubleFast_dictMatchState_5(ms, seqStore, rep, src, srcSize); |
586 | 0 | case 6 : |
587 | 0 | return ZSTD_compressBlock_doubleFast_dictMatchState_6(ms, seqStore, rep, src, srcSize); |
588 | 0 | case 7 : |
589 | 0 | return ZSTD_compressBlock_doubleFast_dictMatchState_7(ms, seqStore, rep, src, srcSize); |
590 | 0 | } |
591 | 0 | } |
592 | | |
593 | | |
594 | | static size_t ZSTD_compressBlock_doubleFast_extDict_generic( |
595 | | ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], |
596 | | void const* src, size_t srcSize, |
597 | | U32 const mls /* template */) |
598 | 164k | { |
599 | 164k | ZSTD_compressionParameters const* cParams = &ms->cParams; |
600 | 164k | U32* const hashLong = ms->hashTable; |
601 | 164k | U32 const hBitsL = cParams->hashLog; |
602 | 164k | U32* const hashSmall = ms->chainTable; |
603 | 164k | U32 const hBitsS = cParams->chainLog; |
604 | 164k | const BYTE* const istart = (const BYTE*)src; |
605 | 164k | const BYTE* ip = istart; |
606 | 164k | const BYTE* anchor = istart; |
607 | 164k | const BYTE* const iend = istart + srcSize; |
608 | 164k | const BYTE* const ilimit = iend - 8; |
609 | 164k | const BYTE* const base = ms->window.base; |
610 | 164k | const U32 endIndex = (U32)((size_t)(istart - base) + srcSize); |
611 | 164k | const U32 lowLimit = ZSTD_getLowestMatchIndex(ms, endIndex, cParams->windowLog); |
612 | 164k | const U32 dictStartIndex = lowLimit; |
613 | 164k | const U32 dictLimit = ms->window.dictLimit; |
614 | 164k | const U32 prefixStartIndex = (dictLimit > lowLimit) ? dictLimit : lowLimit; |
615 | 164k | const BYTE* const prefixStart = base + prefixStartIndex; |
616 | 164k | const BYTE* const dictBase = ms->window.dictBase; |
617 | 164k | const BYTE* const dictStart = dictBase + dictStartIndex; |
618 | 164k | const BYTE* const dictEnd = dictBase + prefixStartIndex; |
619 | 164k | U32 offset_1=rep[0], offset_2=rep[1]; |
620 | | |
621 | 164k | DEBUGLOG(5, "ZSTD_compressBlock_doubleFast_extDict_generic (srcSize=%zu)", srcSize); |
622 | | |
623 | | /* if extDict is invalidated due to maxDistance, switch to "regular" variant */ |
624 | 164k | if (prefixStartIndex == dictStartIndex) |
625 | 13.6k | return ZSTD_compressBlock_doubleFast(ms, seqStore, rep, src, srcSize); |
626 | | |
627 | | /* Search Loop */ |
628 | 3.11M | while (ip < ilimit) { /* < instead of <=, because (ip+1) */ |
629 | 2.96M | const size_t hSmall = ZSTD_hashPtr(ip, hBitsS, mls); |
630 | 2.96M | const U32 matchIndex = hashSmall[hSmall]; |
631 | 2.96M | const BYTE* const matchBase = matchIndex < prefixStartIndex ? dictBase : base; |
632 | 2.96M | const BYTE* match = matchBase + matchIndex; |
633 | | |
634 | 2.96M | const size_t hLong = ZSTD_hashPtr(ip, hBitsL, 8); |
635 | 2.96M | const U32 matchLongIndex = hashLong[hLong]; |
636 | 2.96M | const BYTE* const matchLongBase = matchLongIndex < prefixStartIndex ? dictBase : base; |
637 | 2.96M | const BYTE* matchLong = matchLongBase + matchLongIndex; |
638 | | |
639 | 2.96M | const U32 curr = (U32)(ip-base); |
640 | 2.96M | const U32 repIndex = curr + 1 - offset_1; /* offset_1 expected <= curr +1 */ |
641 | 2.96M | const BYTE* const repBase = repIndex < prefixStartIndex ? dictBase : base; |
642 | 2.96M | const BYTE* const repMatch = repBase + repIndex; |
643 | 2.96M | size_t mLength; |
644 | 2.96M | hashSmall[hSmall] = hashLong[hLong] = curr; /* update hash table */ |
645 | | |
646 | 2.96M | if ((((U32)((prefixStartIndex-1) - repIndex) >= 3) /* intentional underflow : ensure repIndex doesn't overlap dict + prefix */ |
647 | 2.96M | & (offset_1 <= curr+1 - dictStartIndex)) /* note: we are searching at curr+1 */ |
648 | 2.96M | && (MEM_read32(repMatch) == MEM_read32(ip+1)) ) { |
649 | 108k | const BYTE* repMatchEnd = repIndex < prefixStartIndex ? dictEnd : iend; |
650 | 108k | mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, prefixStart) + 4; |
651 | 108k | ip++; |
652 | 216k | ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, REPCODE1_TO_OFFBASE, mLength); |
653 | 2.85M | } else { |
654 | 2.85M | if ((matchLongIndex > dictStartIndex) && (MEM_read64(matchLong) == MEM_read64(ip))) { |
655 | 56.5k | const BYTE* const matchEnd = matchLongIndex < prefixStartIndex ? dictEnd : iend; |
656 | 56.5k | const BYTE* const lowMatchPtr = matchLongIndex < prefixStartIndex ? dictStart : prefixStart; |
657 | 56.5k | U32 offset; |
658 | 56.5k | mLength = ZSTD_count_2segments(ip+8, matchLong+8, iend, matchEnd, prefixStart) + 8; |
659 | 56.5k | offset = curr - matchLongIndex; |
660 | 70.4k | while (((ip>anchor) & (matchLong>lowMatchPtr)) && (ip[-1] == matchLong[-1])) { ip--; matchLong--; mLength++; } /* catch up */ |
661 | 56.5k | offset_2 = offset_1; |
662 | 56.5k | offset_1 = offset; |
663 | 56.5k | ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, OFFSET_TO_OFFBASE(offset), mLength); |
664 | | |
665 | 2.79M | } else if ((matchIndex > dictStartIndex) && (MEM_read32(match) == MEM_read32(ip))) { |
666 | 117k | size_t const h3 = ZSTD_hashPtr(ip+1, hBitsL, 8); |
667 | 117k | U32 const matchIndex3 = hashLong[h3]; |
668 | 117k | const BYTE* const match3Base = matchIndex3 < prefixStartIndex ? dictBase : base; |
669 | 117k | const BYTE* match3 = match3Base + matchIndex3; |
670 | 117k | U32 offset; |
671 | 117k | hashLong[h3] = curr + 1; |
672 | 117k | if ( (matchIndex3 > dictStartIndex) && (MEM_read64(match3) == MEM_read64(ip+1)) ) { |
673 | 13.3k | const BYTE* const matchEnd = matchIndex3 < prefixStartIndex ? dictEnd : iend; |
674 | 13.3k | const BYTE* const lowMatchPtr = matchIndex3 < prefixStartIndex ? dictStart : prefixStart; |
675 | 13.3k | mLength = ZSTD_count_2segments(ip+9, match3+8, iend, matchEnd, prefixStart) + 8; |
676 | 13.3k | ip++; |
677 | 13.3k | offset = curr+1 - matchIndex3; |
678 | 22.2k | while (((ip>anchor) & (match3>lowMatchPtr)) && (ip[-1] == match3[-1])) { ip--; match3--; mLength++; } /* catch up */ |
679 | 104k | } else { |
680 | 104k | const BYTE* const matchEnd = matchIndex < prefixStartIndex ? dictEnd : iend; |
681 | 104k | const BYTE* const lowMatchPtr = matchIndex < prefixStartIndex ? dictStart : prefixStart; |
682 | 104k | mLength = ZSTD_count_2segments(ip+4, match+4, iend, matchEnd, prefixStart) + 4; |
683 | 104k | offset = curr - matchIndex; |
684 | 128k | while (((ip>anchor) & (match>lowMatchPtr)) && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */ |
685 | 104k | } |
686 | 117k | offset_2 = offset_1; |
687 | 117k | offset_1 = offset; |
688 | 117k | ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, OFFSET_TO_OFFBASE(offset), mLength); |
689 | | |
690 | 2.68M | } else { |
691 | 2.68M | ip += ((ip-anchor) >> kSearchStrength) + 1; |
692 | 2.68M | continue; |
693 | 2.68M | } } |
694 | | |
695 | | /* move to next sequence start */ |
696 | 281k | ip += mLength; |
697 | 281k | anchor = ip; |
698 | | |
699 | 281k | if (ip <= ilimit) { |
700 | | /* Complementary insertion */ |
701 | | /* done after iLimit test, as candidates could be > iend-8 */ |
702 | 229k | { U32 const indexToInsert = curr+2; |
703 | 229k | hashLong[ZSTD_hashPtr(base+indexToInsert, hBitsL, 8)] = indexToInsert; |
704 | 229k | hashLong[ZSTD_hashPtr(ip-2, hBitsL, 8)] = (U32)(ip-2-base); |
705 | 229k | hashSmall[ZSTD_hashPtr(base+indexToInsert, hBitsS, mls)] = indexToInsert; |
706 | 229k | hashSmall[ZSTD_hashPtr(ip-1, hBitsS, mls)] = (U32)(ip-1-base); |
707 | 229k | } |
708 | | |
709 | | /* check immediate repcode */ |
710 | 246k | while (ip <= ilimit) { |
711 | 242k | U32 const current2 = (U32)(ip-base); |
712 | 242k | U32 const repIndex2 = current2 - offset_2; |
713 | 242k | const BYTE* repMatch2 = repIndex2 < prefixStartIndex ? dictBase + repIndex2 : base + repIndex2; |
714 | 242k | if ( (((U32)((prefixStartIndex-1) - repIndex2) >= 3) /* intentional overflow : ensure repIndex2 doesn't overlap dict + prefix */ |
715 | 242k | & (offset_2 <= current2 - dictStartIndex)) |
716 | 242k | && (MEM_read32(repMatch2) == MEM_read32(ip)) ) { |
717 | 16.8k | const BYTE* const repEnd2 = repIndex2 < prefixStartIndex ? dictEnd : iend; |
718 | 16.8k | size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, prefixStart) + 4; |
719 | 16.8k | U32 const tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset; /* swap offset_2 <=> offset_1 */ |
720 | 16.8k | ZSTD_storeSeq(seqStore, 0, anchor, iend, REPCODE1_TO_OFFBASE, repLength2); |
721 | 0 | hashSmall[ZSTD_hashPtr(ip, hBitsS, mls)] = current2; |
722 | 16.8k | hashLong[ZSTD_hashPtr(ip, hBitsL, 8)] = current2; |
723 | 16.8k | ip += repLength2; |
724 | 16.8k | anchor = ip; |
725 | 16.8k | continue; |
726 | 16.8k | } |
727 | 225k | break; |
728 | 242k | } } } |
729 | | |
730 | | /* save reps for next block */ |
731 | 151k | rep[0] = offset_1; |
732 | 151k | rep[1] = offset_2; |
733 | | |
734 | | /* Return the last literals size */ |
735 | 151k | return (size_t)(iend - anchor); |
736 | 151k | } |
737 | | |
738 | | ZSTD_GEN_DFAST_FN(extDict, 4) |
739 | | ZSTD_GEN_DFAST_FN(extDict, 5) |
740 | | ZSTD_GEN_DFAST_FN(extDict, 6) |
741 | | ZSTD_GEN_DFAST_FN(extDict, 7) |
742 | | |
743 | | size_t ZSTD_compressBlock_doubleFast_extDict( |
744 | | ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], |
745 | | void const* src, size_t srcSize) |
746 | 164k | { |
747 | 164k | U32 const mls = ms->cParams.minMatch; |
748 | 164k | switch(mls) |
749 | 164k | { |
750 | 42.0k | default: /* includes case 3 */ |
751 | 46.7k | case 4 : |
752 | 46.7k | return ZSTD_compressBlock_doubleFast_extDict_4(ms, seqStore, rep, src, srcSize); |
753 | 32.9k | case 5 : |
754 | 32.9k | return ZSTD_compressBlock_doubleFast_extDict_5(ms, seqStore, rep, src, srcSize); |
755 | 33.8k | case 6 : |
756 | 33.8k | return ZSTD_compressBlock_doubleFast_extDict_6(ms, seqStore, rep, src, srcSize); |
757 | 51.3k | case 7 : |
758 | 51.3k | return ZSTD_compressBlock_doubleFast_extDict_7(ms, seqStore, rep, src, srcSize); |
759 | 164k | } |
760 | 164k | } |
761 | | |
762 | | #endif /* ZSTD_EXCLUDE_DFAST_BLOCK_COMPRESSOR */ |