Coverage Report

Created: 2023-06-07 07:04

/src/zstd/lib/compress/zstd_double_fast.c
Line
Count
Source (jump to first uncovered line)
1
/*
2
 * Copyright (c) Meta Platforms, Inc. and affiliates.
3
 * All rights reserved.
4
 *
5
 * This source code is licensed under both the BSD-style license (found in the
6
 * LICENSE file in the root directory of this source tree) and the GPLv2 (found
7
 * in the COPYING file in the root directory of this source tree).
8
 * You may select, at your option, one of the above-listed licenses.
9
 */
10
11
#include "zstd_compress_internal.h"
12
#include "zstd_double_fast.h"
13
14
#ifndef ZSTD_EXCLUDE_DFAST_BLOCK_COMPRESSOR
15
16
static void ZSTD_fillDoubleHashTableForCDict(ZSTD_matchState_t* ms,
17
                              void const* end, ZSTD_dictTableLoadMethod_e dtlm)
18
0
{
19
0
    const ZSTD_compressionParameters* const cParams = &ms->cParams;
20
0
    U32* const hashLarge = ms->hashTable;
21
0
    U32  const hBitsL = cParams->hashLog + ZSTD_SHORT_CACHE_TAG_BITS;
22
0
    U32  const mls = cParams->minMatch;
23
0
    U32* const hashSmall = ms->chainTable;
24
0
    U32  const hBitsS = cParams->chainLog + ZSTD_SHORT_CACHE_TAG_BITS;
25
0
    const BYTE* const base = ms->window.base;
26
0
    const BYTE* ip = base + ms->nextToUpdate;
27
0
    const BYTE* const iend = ((const BYTE*)end) - HASH_READ_SIZE;
28
0
    const U32 fastHashFillStep = 3;
29
30
    /* Always insert every fastHashFillStep position into the hash tables.
31
     * Insert the other positions into the large hash table if their entry
32
     * is empty.
33
     */
34
0
    for (; ip + fastHashFillStep - 1 <= iend; ip += fastHashFillStep) {
35
0
        U32 const curr = (U32)(ip - base);
36
0
        U32 i;
37
0
        for (i = 0; i < fastHashFillStep; ++i) {
38
0
            size_t const smHashAndTag = ZSTD_hashPtr(ip + i, hBitsS, mls);
39
0
            size_t const lgHashAndTag = ZSTD_hashPtr(ip + i, hBitsL, 8);
40
0
            if (i == 0) {
41
0
                ZSTD_writeTaggedIndex(hashSmall, smHashAndTag, curr + i);
42
0
            }
43
0
            if (i == 0 || hashLarge[lgHashAndTag >> ZSTD_SHORT_CACHE_TAG_BITS] == 0) {
44
0
                ZSTD_writeTaggedIndex(hashLarge, lgHashAndTag, curr + i);
45
0
            }
46
            /* Only load extra positions for ZSTD_dtlm_full */
47
0
            if (dtlm == ZSTD_dtlm_fast)
48
0
                break;
49
0
    }   }
50
0
}
51
52
static void ZSTD_fillDoubleHashTableForCCtx(ZSTD_matchState_t* ms,
53
                              void const* end, ZSTD_dictTableLoadMethod_e dtlm)
54
925k
{
55
925k
    const ZSTD_compressionParameters* const cParams = &ms->cParams;
56
925k
    U32* const hashLarge = ms->hashTable;
57
925k
    U32  const hBitsL = cParams->hashLog;
58
925k
    U32  const mls = cParams->minMatch;
59
925k
    U32* const hashSmall = ms->chainTable;
60
925k
    U32  const hBitsS = cParams->chainLog;
61
925k
    const BYTE* const base = ms->window.base;
62
925k
    const BYTE* ip = base + ms->nextToUpdate;
63
925k
    const BYTE* const iend = ((const BYTE*)end) - HASH_READ_SIZE;
64
925k
    const U32 fastHashFillStep = 3;
65
66
    /* Always insert every fastHashFillStep position into the hash tables.
67
     * Insert the other positions into the large hash table if their entry
68
     * is empty.
69
     */
70
150M
    for (; ip + fastHashFillStep - 1 <= iend; ip += fastHashFillStep) {
71
149M
        U32 const curr = (U32)(ip - base);
72
149M
        U32 i;
73
149M
        for (i = 0; i < fastHashFillStep; ++i) {
74
149M
            size_t const smHash = ZSTD_hashPtr(ip + i, hBitsS, mls);
75
149M
            size_t const lgHash = ZSTD_hashPtr(ip + i, hBitsL, 8);
76
149M
            if (i == 0)
77
149M
                hashSmall[smHash] = curr + i;
78
149M
            if (i == 0 || hashLarge[lgHash] == 0)
79
149M
                hashLarge[lgHash] = curr + i;
80
            /* Only load extra positions for ZSTD_dtlm_full */
81
149M
            if (dtlm == ZSTD_dtlm_fast)
82
149M
                break;
83
149M
        }   }
84
925k
}
85
86
void ZSTD_fillDoubleHashTable(ZSTD_matchState_t* ms,
87
                        const void* const end,
88
                        ZSTD_dictTableLoadMethod_e dtlm,
89
                        ZSTD_tableFillPurpose_e tfp)
90
925k
{
91
925k
    if (tfp == ZSTD_tfp_forCDict) {
92
0
        ZSTD_fillDoubleHashTableForCDict(ms, end, dtlm);
93
925k
    } else {
94
925k
        ZSTD_fillDoubleHashTableForCCtx(ms, end, dtlm);
95
925k
    }
96
925k
}
97
98
99
FORCE_INLINE_TEMPLATE
100
size_t ZSTD_compressBlock_doubleFast_noDict_generic(
101
        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
102
        void const* src, size_t srcSize, U32 const mls /* template */)
103
809k
{
104
809k
    ZSTD_compressionParameters const* cParams = &ms->cParams;
105
809k
    U32* const hashLong = ms->hashTable;
106
809k
    const U32 hBitsL = cParams->hashLog;
107
809k
    U32* const hashSmall = ms->chainTable;
108
809k
    const U32 hBitsS = cParams->chainLog;
109
809k
    const BYTE* const base = ms->window.base;
110
809k
    const BYTE* const istart = (const BYTE*)src;
111
809k
    const BYTE* anchor = istart;
112
809k
    const U32 endIndex = (U32)((size_t)(istart - base) + srcSize);
113
    /* presumes that, if there is a dictionary, it must be using Attach mode */
114
809k
    const U32 prefixLowestIndex = ZSTD_getLowestPrefixIndex(ms, endIndex, cParams->windowLog);
115
809k
    const BYTE* const prefixLowest = base + prefixLowestIndex;
116
809k
    const BYTE* const iend = istart + srcSize;
117
809k
    const BYTE* const ilimit = iend - HASH_READ_SIZE;
118
809k
    U32 offset_1=rep[0], offset_2=rep[1];
119
809k
    U32 offsetSaved1 = 0, offsetSaved2 = 0;
120
121
809k
    size_t mLength;
122
809k
    U32 offset;
123
809k
    U32 curr;
124
125
    /* how many positions to search before increasing step size */
126
809k
    const size_t kStepIncr = 1 << kSearchStrength;
127
    /* the position at which to increment the step size if no match is found */
128
809k
    const BYTE* nextStep;
129
809k
    size_t step; /* the current step size */
130
131
809k
    size_t hl0; /* the long hash at ip */
132
809k
    size_t hl1; /* the long hash at ip1 */
133
134
809k
    U32 idxl0; /* the long match index for ip */
135
809k
    U32 idxl1; /* the long match index for ip1 */
136
137
809k
    const BYTE* matchl0; /* the long match for ip */
138
809k
    const BYTE* matchs0; /* the short match for ip */
139
809k
    const BYTE* matchl1; /* the long match for ip1 */
140
141
809k
    const BYTE* ip = istart; /* the current position */
142
809k
    const BYTE* ip1; /* the next position */
143
144
809k
    DEBUGLOG(5, "ZSTD_compressBlock_doubleFast_noDict_generic");
145
146
    /* init */
147
809k
    ip += ((ip - prefixLowest) == 0);
148
809k
    {
149
809k
        U32 const current = (U32)(ip - base);
150
809k
        U32 const windowLow = ZSTD_getLowestPrefixIndex(ms, current, cParams->windowLog);
151
809k
        U32 const maxRep = current - windowLow;
152
809k
        if (offset_2 > maxRep) offsetSaved2 = offset_2, offset_2 = 0;
153
809k
        if (offset_1 > maxRep) offsetSaved1 = offset_1, offset_1 = 0;
154
809k
    }
155
156
    /* Outer Loop: one iteration per match found and stored */
157
2.72M
    while (1) {
158
2.72M
        step = 1;
159
2.72M
        nextStep = ip + kStepIncr;
160
2.72M
        ip1 = ip + step;
161
162
2.72M
        if (ip1 > ilimit) {
163
734k
            goto _cleanup;
164
734k
        }
165
166
1.98M
        hl0 = ZSTD_hashPtr(ip, hBitsL, 8);
167
1.98M
        idxl0 = hashLong[hl0];
168
1.98M
        matchl0 = base + idxl0;
169
170
        /* Inner Loop: one iteration per search / position */
171
15.1M
        do {
172
15.1M
            const size_t hs0 = ZSTD_hashPtr(ip, hBitsS, mls);
173
15.1M
            const U32 idxs0 = hashSmall[hs0];
174
15.1M
            curr = (U32)(ip-base);
175
15.1M
            matchs0 = base + idxs0;
176
177
15.1M
            hashLong[hl0] = hashSmall[hs0] = curr;   /* update hash tables */
178
179
            /* check noDict repcode */
180
15.1M
            if ((offset_1 > 0) & (MEM_read32(ip+1-offset_1) == MEM_read32(ip+1))) {
181
700k
                mLength = ZSTD_count(ip+1+4, ip+1+4-offset_1, iend) + 4;
182
700k
                ip++;
183
700k
                ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, REPCODE1_TO_OFFBASE, mLength);
184
0
                goto _match_stored;
185
700k
            }
186
187
14.4M
            hl1 = ZSTD_hashPtr(ip1, hBitsL, 8);
188
189
14.4M
            if (idxl0 > prefixLowestIndex) {
190
                /* check prefix long match */
191
10.7M
                if (MEM_read64(matchl0) == MEM_read64(ip)) {
192
374k
                    mLength = ZSTD_count(ip+8, matchl0+8, iend) + 8;
193
374k
                    offset = (U32)(ip-matchl0);
194
460k
                    while (((ip>anchor) & (matchl0>prefixLowest)) && (ip[-1] == matchl0[-1])) { ip--; matchl0--; mLength++; } /* catch up */
195
374k
                    goto _match_found;
196
374k
                }
197
10.7M
            }
198
199
14.1M
            idxl1 = hashLong[hl1];
200
14.1M
            matchl1 = base + idxl1;
201
202
14.1M
            if (idxs0 > prefixLowestIndex) {
203
                /* check prefix short match */
204
8.44M
                if (MEM_read32(matchs0) == MEM_read32(ip)) {
205
837k
                    goto _search_next_long;
206
837k
                }
207
8.44M
            }
208
209
13.2M
            if (ip1 >= nextStep) {
210
27.7k
                PREFETCH_L1(ip1 + 64);
211
27.7k
                PREFETCH_L1(ip1 + 128);
212
27.7k
                step++;
213
27.7k
                nextStep += kStepIncr;
214
27.7k
            }
215
13.2M
            ip = ip1;
216
13.2M
            ip1 += step;
217
218
13.2M
            hl0 = hl1;
219
13.2M
            idxl0 = idxl1;
220
13.2M
            matchl0 = matchl1;
221
    #if defined(__aarch64__)
222
            PREFETCH_L1(ip+256);
223
    #endif
224
13.2M
        } while (ip1 <= ilimit);
225
226
809k
_cleanup:
227
        /* If offset_1 started invalid (offsetSaved1 != 0) and became valid (offset_1 != 0),
228
         * rotate saved offsets. See comment in ZSTD_compressBlock_fast_noDict for more context. */
229
809k
        offsetSaved2 = ((offsetSaved1 != 0) && (offset_1 != 0)) ? offsetSaved1 : offsetSaved2;
230
231
        /* save reps for next block */
232
809k
        rep[0] = offset_1 ? offset_1 : offsetSaved1;
233
809k
        rep[1] = offset_2 ? offset_2 : offsetSaved2;
234
235
        /* Return the last literals size */
236
809k
        return (size_t)(iend - anchor);
237
238
837k
_search_next_long:
239
240
        /* check prefix long +1 match */
241
837k
        if (idxl1 > prefixLowestIndex) {
242
731k
            if (MEM_read64(matchl1) == MEM_read64(ip1)) {
243
104k
                ip = ip1;
244
104k
                mLength = ZSTD_count(ip+8, matchl1+8, iend) + 8;
245
104k
                offset = (U32)(ip-matchl1);
246
179k
                while (((ip>anchor) & (matchl1>prefixLowest)) && (ip[-1] == matchl1[-1])) { ip--; matchl1--; mLength++; } /* catch up */
247
104k
                goto _match_found;
248
104k
            }
249
731k
        }
250
251
        /* if no long +1 match, explore the short match we found */
252
732k
        mLength = ZSTD_count(ip+4, matchs0+4, iend) + 4;
253
732k
        offset = (U32)(ip - matchs0);
254
828k
        while (((ip>anchor) & (matchs0>prefixLowest)) && (ip[-1] == matchs0[-1])) { ip--; matchs0--; mLength++; } /* catch up */
255
256
        /* fall-through */
257
258
1.21M
_match_found: /* requires ip, offset, mLength */
259
1.21M
        offset_2 = offset_1;
260
1.21M
        offset_1 = offset;
261
262
1.21M
        if (step < 4) {
263
            /* It is unsafe to write this value back to the hashtable when ip1 is
264
             * greater than or equal to the new ip we will have after we're done
265
             * processing this match. Rather than perform that test directly
266
             * (ip1 >= ip + mLength), which costs speed in practice, we do a simpler
267
             * more predictable test. The minmatch even if we take a short match is
268
             * 4 bytes, so as long as step, the distance between ip and ip1
269
             * (initially) is less than 4, we know ip1 < new ip. */
270
1.21M
            hashLong[hl1] = (U32)(ip1 - base);
271
1.21M
        }
272
273
1.21M
        ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, OFFSET_TO_OFFBASE(offset), mLength);
274
275
1.91M
_match_stored:
276
        /* match found */
277
1.91M
        ip += mLength;
278
1.91M
        anchor = ip;
279
280
1.91M
        if (ip <= ilimit) {
281
            /* Complementary insertion */
282
            /* done after iLimit test, as candidates could be > iend-8 */
283
1.61M
            {   U32 const indexToInsert = curr+2;
284
1.61M
                hashLong[ZSTD_hashPtr(base+indexToInsert, hBitsL, 8)] = indexToInsert;
285
1.61M
                hashLong[ZSTD_hashPtr(ip-2, hBitsL, 8)] = (U32)(ip-2-base);
286
1.61M
                hashSmall[ZSTD_hashPtr(base+indexToInsert, hBitsS, mls)] = indexToInsert;
287
1.61M
                hashSmall[ZSTD_hashPtr(ip-1, hBitsS, mls)] = (U32)(ip-1-base);
288
1.61M
            }
289
290
            /* check immediate repcode */
291
1.76M
            while ( (ip <= ilimit)
292
1.76M
                 && ( (offset_2>0)
293
1.73M
                    & (MEM_read32(ip) == MEM_read32(ip - offset_2)) )) {
294
                /* store sequence */
295
148k
                size_t const rLength = ZSTD_count(ip+4, ip+4-offset_2, iend) + 4;
296
148k
                U32 const tmpOff = offset_2; offset_2 = offset_1; offset_1 = tmpOff;  /* swap offset_2 <=> offset_1 */
297
148k
                hashSmall[ZSTD_hashPtr(ip, hBitsS, mls)] = (U32)(ip-base);
298
148k
                hashLong[ZSTD_hashPtr(ip, hBitsL, 8)] = (U32)(ip-base);
299
148k
                ZSTD_storeSeq(seqStore, 0, anchor, iend, REPCODE1_TO_OFFBASE, rLength);
300
0
                ip += rLength;
301
148k
                anchor = ip;
302
148k
                continue;   /* faster when present ... (?) */
303
148k
            }
304
1.61M
        }
305
1.91M
    }
306
809k
}
307
308
309
FORCE_INLINE_TEMPLATE
310
size_t ZSTD_compressBlock_doubleFast_dictMatchState_generic(
311
        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
312
        void const* src, size_t srcSize,
313
        U32 const mls /* template */)
314
0
{
315
0
    ZSTD_compressionParameters const* cParams = &ms->cParams;
316
0
    U32* const hashLong = ms->hashTable;
317
0
    const U32 hBitsL = cParams->hashLog;
318
0
    U32* const hashSmall = ms->chainTable;
319
0
    const U32 hBitsS = cParams->chainLog;
320
0
    const BYTE* const base = ms->window.base;
321
0
    const BYTE* const istart = (const BYTE*)src;
322
0
    const BYTE* ip = istart;
323
0
    const BYTE* anchor = istart;
324
0
    const U32 endIndex = (U32)((size_t)(istart - base) + srcSize);
325
    /* presumes that, if there is a dictionary, it must be using Attach mode */
326
0
    const U32 prefixLowestIndex = ZSTD_getLowestPrefixIndex(ms, endIndex, cParams->windowLog);
327
0
    const BYTE* const prefixLowest = base + prefixLowestIndex;
328
0
    const BYTE* const iend = istart + srcSize;
329
0
    const BYTE* const ilimit = iend - HASH_READ_SIZE;
330
0
    U32 offset_1=rep[0], offset_2=rep[1];
331
332
0
    const ZSTD_matchState_t* const dms = ms->dictMatchState;
333
0
    const ZSTD_compressionParameters* const dictCParams = &dms->cParams;
334
0
    const U32* const dictHashLong  = dms->hashTable;
335
0
    const U32* const dictHashSmall = dms->chainTable;
336
0
    const U32 dictStartIndex       = dms->window.dictLimit;
337
0
    const BYTE* const dictBase     = dms->window.base;
338
0
    const BYTE* const dictStart    = dictBase + dictStartIndex;
339
0
    const BYTE* const dictEnd      = dms->window.nextSrc;
340
0
    const U32 dictIndexDelta       = prefixLowestIndex - (U32)(dictEnd - dictBase);
341
0
    const U32 dictHBitsL           = dictCParams->hashLog + ZSTD_SHORT_CACHE_TAG_BITS;
342
0
    const U32 dictHBitsS           = dictCParams->chainLog + ZSTD_SHORT_CACHE_TAG_BITS;
343
0
    const U32 dictAndPrefixLength  = (U32)((ip - prefixLowest) + (dictEnd - dictStart));
344
345
0
    DEBUGLOG(5, "ZSTD_compressBlock_doubleFast_dictMatchState_generic");
346
347
    /* if a dictionary is attached, it must be within window range */
348
0
    assert(ms->window.dictLimit + (1U << cParams->windowLog) >= endIndex);
349
350
0
    if (ms->prefetchCDictTables) {
351
0
        size_t const hashTableBytes = (((size_t)1) << dictCParams->hashLog) * sizeof(U32);
352
0
        size_t const chainTableBytes = (((size_t)1) << dictCParams->chainLog) * sizeof(U32);
353
0
        PREFETCH_AREA(dictHashLong, hashTableBytes)
354
0
        PREFETCH_AREA(dictHashSmall, chainTableBytes)
355
0
    }
356
357
    /* init */
358
0
    ip += (dictAndPrefixLength == 0);
359
360
    /* dictMatchState repCode checks don't currently handle repCode == 0
361
     * disabling. */
362
0
    assert(offset_1 <= dictAndPrefixLength);
363
0
    assert(offset_2 <= dictAndPrefixLength);
364
365
    /* Main Search Loop */
366
0
    while (ip < ilimit) {   /* < instead of <=, because repcode check at (ip+1) */
367
0
        size_t mLength;
368
0
        U32 offset;
369
0
        size_t const h2 = ZSTD_hashPtr(ip, hBitsL, 8);
370
0
        size_t const h = ZSTD_hashPtr(ip, hBitsS, mls);
371
0
        size_t const dictHashAndTagL = ZSTD_hashPtr(ip, dictHBitsL, 8);
372
0
        size_t const dictHashAndTagS = ZSTD_hashPtr(ip, dictHBitsS, mls);
373
0
        U32 const dictMatchIndexAndTagL = dictHashLong[dictHashAndTagL >> ZSTD_SHORT_CACHE_TAG_BITS];
374
0
        U32 const dictMatchIndexAndTagS = dictHashSmall[dictHashAndTagS >> ZSTD_SHORT_CACHE_TAG_BITS];
375
0
        int const dictTagsMatchL = ZSTD_comparePackedTags(dictMatchIndexAndTagL, dictHashAndTagL);
376
0
        int const dictTagsMatchS = ZSTD_comparePackedTags(dictMatchIndexAndTagS, dictHashAndTagS);
377
0
        U32 const curr = (U32)(ip-base);
378
0
        U32 const matchIndexL = hashLong[h2];
379
0
        U32 matchIndexS = hashSmall[h];
380
0
        const BYTE* matchLong = base + matchIndexL;
381
0
        const BYTE* match = base + matchIndexS;
382
0
        const U32 repIndex = curr + 1 - offset_1;
383
0
        const BYTE* repMatch = (repIndex < prefixLowestIndex) ?
384
0
                               dictBase + (repIndex - dictIndexDelta) :
385
0
                               base + repIndex;
386
0
        hashLong[h2] = hashSmall[h] = curr;   /* update hash tables */
387
388
        /* check repcode */
389
0
        if (((U32)((prefixLowestIndex-1) - repIndex) >= 3 /* intentional underflow */)
390
0
            && (MEM_read32(repMatch) == MEM_read32(ip+1)) ) {
391
0
            const BYTE* repMatchEnd = repIndex < prefixLowestIndex ? dictEnd : iend;
392
0
            mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, prefixLowest) + 4;
393
0
            ip++;
394
0
            ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, REPCODE1_TO_OFFBASE, mLength);
395
0
            goto _match_stored;
396
0
        }
397
398
0
        if (matchIndexL > prefixLowestIndex) {
399
            /* check prefix long match */
400
0
            if (MEM_read64(matchLong) == MEM_read64(ip)) {
401
0
                mLength = ZSTD_count(ip+8, matchLong+8, iend) + 8;
402
0
                offset = (U32)(ip-matchLong);
403
0
                while (((ip>anchor) & (matchLong>prefixLowest)) && (ip[-1] == matchLong[-1])) { ip--; matchLong--; mLength++; } /* catch up */
404
0
                goto _match_found;
405
0
            }
406
0
        } else if (dictTagsMatchL) {
407
            /* check dictMatchState long match */
408
0
            U32 const dictMatchIndexL = dictMatchIndexAndTagL >> ZSTD_SHORT_CACHE_TAG_BITS;
409
0
            const BYTE* dictMatchL = dictBase + dictMatchIndexL;
410
0
            assert(dictMatchL < dictEnd);
411
412
0
            if (dictMatchL > dictStart && MEM_read64(dictMatchL) == MEM_read64(ip)) {
413
0
                mLength = ZSTD_count_2segments(ip+8, dictMatchL+8, iend, dictEnd, prefixLowest) + 8;
414
0
                offset = (U32)(curr - dictMatchIndexL - dictIndexDelta);
415
0
                while (((ip>anchor) & (dictMatchL>dictStart)) && (ip[-1] == dictMatchL[-1])) { ip--; dictMatchL--; mLength++; } /* catch up */
416
0
                goto _match_found;
417
0
        }   }
418
419
0
        if (matchIndexS > prefixLowestIndex) {
420
            /* check prefix short match */
421
0
            if (MEM_read32(match) == MEM_read32(ip)) {
422
0
                goto _search_next_long;
423
0
            }
424
0
        } else if (dictTagsMatchS) {
425
            /* check dictMatchState short match */
426
0
            U32 const dictMatchIndexS = dictMatchIndexAndTagS >> ZSTD_SHORT_CACHE_TAG_BITS;
427
0
            match = dictBase + dictMatchIndexS;
428
0
            matchIndexS = dictMatchIndexS + dictIndexDelta;
429
430
0
            if (match > dictStart && MEM_read32(match) == MEM_read32(ip)) {
431
0
                goto _search_next_long;
432
0
        }   }
433
434
0
        ip += ((ip-anchor) >> kSearchStrength) + 1;
435
#if defined(__aarch64__)
436
        PREFETCH_L1(ip+256);
437
#endif
438
0
        continue;
439
440
0
_search_next_long:
441
0
        {   size_t const hl3 = ZSTD_hashPtr(ip+1, hBitsL, 8);
442
0
            size_t const dictHashAndTagL3 = ZSTD_hashPtr(ip+1, dictHBitsL, 8);
443
0
            U32 const matchIndexL3 = hashLong[hl3];
444
0
            U32 const dictMatchIndexAndTagL3 = dictHashLong[dictHashAndTagL3 >> ZSTD_SHORT_CACHE_TAG_BITS];
445
0
            int const dictTagsMatchL3 = ZSTD_comparePackedTags(dictMatchIndexAndTagL3, dictHashAndTagL3);
446
0
            const BYTE* matchL3 = base + matchIndexL3;
447
0
            hashLong[hl3] = curr + 1;
448
449
            /* check prefix long +1 match */
450
0
            if (matchIndexL3 > prefixLowestIndex) {
451
0
                if (MEM_read64(matchL3) == MEM_read64(ip+1)) {
452
0
                    mLength = ZSTD_count(ip+9, matchL3+8, iend) + 8;
453
0
                    ip++;
454
0
                    offset = (U32)(ip-matchL3);
455
0
                    while (((ip>anchor) & (matchL3>prefixLowest)) && (ip[-1] == matchL3[-1])) { ip--; matchL3--; mLength++; } /* catch up */
456
0
                    goto _match_found;
457
0
                }
458
0
            } else if (dictTagsMatchL3) {
459
                /* check dict long +1 match */
460
0
                U32 const dictMatchIndexL3 = dictMatchIndexAndTagL3 >> ZSTD_SHORT_CACHE_TAG_BITS;
461
0
                const BYTE* dictMatchL3 = dictBase + dictMatchIndexL3;
462
0
                assert(dictMatchL3 < dictEnd);
463
0
                if (dictMatchL3 > dictStart && MEM_read64(dictMatchL3) == MEM_read64(ip+1)) {
464
0
                    mLength = ZSTD_count_2segments(ip+1+8, dictMatchL3+8, iend, dictEnd, prefixLowest) + 8;
465
0
                    ip++;
466
0
                    offset = (U32)(curr + 1 - dictMatchIndexL3 - dictIndexDelta);
467
0
                    while (((ip>anchor) & (dictMatchL3>dictStart)) && (ip[-1] == dictMatchL3[-1])) { ip--; dictMatchL3--; mLength++; } /* catch up */
468
0
                    goto _match_found;
469
0
        }   }   }
470
471
        /* if no long +1 match, explore the short match we found */
472
0
        if (matchIndexS < prefixLowestIndex) {
473
0
            mLength = ZSTD_count_2segments(ip+4, match+4, iend, dictEnd, prefixLowest) + 4;
474
0
            offset = (U32)(curr - matchIndexS);
475
0
            while (((ip>anchor) & (match>dictStart)) && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */
476
0
        } else {
477
0
            mLength = ZSTD_count(ip+4, match+4, iend) + 4;
478
0
            offset = (U32)(ip - match);
479
0
            while (((ip>anchor) & (match>prefixLowest)) && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */
480
0
        }
481
482
0
_match_found:
483
0
        offset_2 = offset_1;
484
0
        offset_1 = offset;
485
486
0
        ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, OFFSET_TO_OFFBASE(offset), mLength);
487
488
0
_match_stored:
489
        /* match found */
490
0
        ip += mLength;
491
0
        anchor = ip;
492
493
0
        if (ip <= ilimit) {
494
            /* Complementary insertion */
495
            /* done after iLimit test, as candidates could be > iend-8 */
496
0
            {   U32 const indexToInsert = curr+2;
497
0
                hashLong[ZSTD_hashPtr(base+indexToInsert, hBitsL, 8)] = indexToInsert;
498
0
                hashLong[ZSTD_hashPtr(ip-2, hBitsL, 8)] = (U32)(ip-2-base);
499
0
                hashSmall[ZSTD_hashPtr(base+indexToInsert, hBitsS, mls)] = indexToInsert;
500
0
                hashSmall[ZSTD_hashPtr(ip-1, hBitsS, mls)] = (U32)(ip-1-base);
501
0
            }
502
503
            /* check immediate repcode */
504
0
            while (ip <= ilimit) {
505
0
                U32 const current2 = (U32)(ip-base);
506
0
                U32 const repIndex2 = current2 - offset_2;
507
0
                const BYTE* repMatch2 = repIndex2 < prefixLowestIndex ?
508
0
                        dictBase + repIndex2 - dictIndexDelta :
509
0
                        base + repIndex2;
510
0
                if ( ((U32)((prefixLowestIndex-1) - (U32)repIndex2) >= 3 /* intentional overflow */)
511
0
                   && (MEM_read32(repMatch2) == MEM_read32(ip)) ) {
512
0
                    const BYTE* const repEnd2 = repIndex2 < prefixLowestIndex ? dictEnd : iend;
513
0
                    size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, prefixLowest) + 4;
514
0
                    U32 tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset;   /* swap offset_2 <=> offset_1 */
515
0
                    ZSTD_storeSeq(seqStore, 0, anchor, iend, REPCODE1_TO_OFFBASE, repLength2);
516
0
                    hashSmall[ZSTD_hashPtr(ip, hBitsS, mls)] = current2;
517
0
                    hashLong[ZSTD_hashPtr(ip, hBitsL, 8)] = current2;
518
0
                    ip += repLength2;
519
0
                    anchor = ip;
520
0
                    continue;
521
0
                }
522
0
                break;
523
0
            }
524
0
        }
525
0
    }   /* while (ip < ilimit) */
526
527
    /* save reps for next block */
528
0
    rep[0] = offset_1;
529
0
    rep[1] = offset_2;
530
531
    /* Return the last literals size */
532
0
    return (size_t)(iend - anchor);
533
0
}
534
535
#define ZSTD_GEN_DFAST_FN(dictMode, mls)                                                                 \
536
    static size_t ZSTD_compressBlock_doubleFast_##dictMode##_##mls(                                      \
537
            ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],                          \
538
            void const* src, size_t srcSize)                                                             \
539
974k
    {                                                                                                    \
540
974k
        return ZSTD_compressBlock_doubleFast_##dictMode##_generic(ms, seqStore, rep, src, srcSize, mls); \
541
974k
    }
zstd_double_fast.c:ZSTD_compressBlock_doubleFast_noDict_4
Line
Count
Source
539
185k
    {                                                                                                    \
540
185k
        return ZSTD_compressBlock_doubleFast_##dictMode##_generic(ms, seqStore, rep, src, srcSize, mls); \
541
185k
    }
zstd_double_fast.c:ZSTD_compressBlock_doubleFast_noDict_5
Line
Count
Source
539
176k
    {                                                                                                    \
540
176k
        return ZSTD_compressBlock_doubleFast_##dictMode##_generic(ms, seqStore, rep, src, srcSize, mls); \
541
176k
    }
zstd_double_fast.c:ZSTD_compressBlock_doubleFast_noDict_6
Line
Count
Source
539
190k
    {                                                                                                    \
540
190k
        return ZSTD_compressBlock_doubleFast_##dictMode##_generic(ms, seqStore, rep, src, srcSize, mls); \
541
190k
    }
zstd_double_fast.c:ZSTD_compressBlock_doubleFast_noDict_7
Line
Count
Source
539
257k
    {                                                                                                    \
540
257k
        return ZSTD_compressBlock_doubleFast_##dictMode##_generic(ms, seqStore, rep, src, srcSize, mls); \
541
257k
    }
Unexecuted instantiation: zstd_double_fast.c:ZSTD_compressBlock_doubleFast_dictMatchState_4
Unexecuted instantiation: zstd_double_fast.c:ZSTD_compressBlock_doubleFast_dictMatchState_5
Unexecuted instantiation: zstd_double_fast.c:ZSTD_compressBlock_doubleFast_dictMatchState_6
Unexecuted instantiation: zstd_double_fast.c:ZSTD_compressBlock_doubleFast_dictMatchState_7
zstd_double_fast.c:ZSTD_compressBlock_doubleFast_extDict_4
Line
Count
Source
539
46.7k
    {                                                                                                    \
540
46.7k
        return ZSTD_compressBlock_doubleFast_##dictMode##_generic(ms, seqStore, rep, src, srcSize, mls); \
541
46.7k
    }
zstd_double_fast.c:ZSTD_compressBlock_doubleFast_extDict_5
Line
Count
Source
539
32.9k
    {                                                                                                    \
540
32.9k
        return ZSTD_compressBlock_doubleFast_##dictMode##_generic(ms, seqStore, rep, src, srcSize, mls); \
541
32.9k
    }
zstd_double_fast.c:ZSTD_compressBlock_doubleFast_extDict_6
Line
Count
Source
539
33.8k
    {                                                                                                    \
540
33.8k
        return ZSTD_compressBlock_doubleFast_##dictMode##_generic(ms, seqStore, rep, src, srcSize, mls); \
541
33.8k
    }
zstd_double_fast.c:ZSTD_compressBlock_doubleFast_extDict_7
Line
Count
Source
539
51.3k
    {                                                                                                    \
540
51.3k
        return ZSTD_compressBlock_doubleFast_##dictMode##_generic(ms, seqStore, rep, src, srcSize, mls); \
541
51.3k
    }
542
543
ZSTD_GEN_DFAST_FN(noDict, 4)
544
ZSTD_GEN_DFAST_FN(noDict, 5)
545
ZSTD_GEN_DFAST_FN(noDict, 6)
546
ZSTD_GEN_DFAST_FN(noDict, 7)
547
548
ZSTD_GEN_DFAST_FN(dictMatchState, 4)
549
ZSTD_GEN_DFAST_FN(dictMatchState, 5)
550
ZSTD_GEN_DFAST_FN(dictMatchState, 6)
551
ZSTD_GEN_DFAST_FN(dictMatchState, 7)
552
553
554
size_t ZSTD_compressBlock_doubleFast(
555
        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
556
        void const* src, size_t srcSize)
557
809k
{
558
809k
    const U32 mls = ms->cParams.minMatch;
559
809k
    switch(mls)
560
809k
    {
561
161k
    default: /* includes case 3 */
562
185k
    case 4 :
563
185k
        return ZSTD_compressBlock_doubleFast_noDict_4(ms, seqStore, rep, src, srcSize);
564
176k
    case 5 :
565
176k
        return ZSTD_compressBlock_doubleFast_noDict_5(ms, seqStore, rep, src, srcSize);
566
190k
    case 6 :
567
190k
        return ZSTD_compressBlock_doubleFast_noDict_6(ms, seqStore, rep, src, srcSize);
568
257k
    case 7 :
569
257k
        return ZSTD_compressBlock_doubleFast_noDict_7(ms, seqStore, rep, src, srcSize);
570
809k
    }
571
809k
}
572
573
574
size_t ZSTD_compressBlock_doubleFast_dictMatchState(
575
        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
576
        void const* src, size_t srcSize)
577
0
{
578
0
    const U32 mls = ms->cParams.minMatch;
579
0
    switch(mls)
580
0
    {
581
0
    default: /* includes case 3 */
582
0
    case 4 :
583
0
        return ZSTD_compressBlock_doubleFast_dictMatchState_4(ms, seqStore, rep, src, srcSize);
584
0
    case 5 :
585
0
        return ZSTD_compressBlock_doubleFast_dictMatchState_5(ms, seqStore, rep, src, srcSize);
586
0
    case 6 :
587
0
        return ZSTD_compressBlock_doubleFast_dictMatchState_6(ms, seqStore, rep, src, srcSize);
588
0
    case 7 :
589
0
        return ZSTD_compressBlock_doubleFast_dictMatchState_7(ms, seqStore, rep, src, srcSize);
590
0
    }
591
0
}
592
593
594
static size_t ZSTD_compressBlock_doubleFast_extDict_generic(
595
        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
596
        void const* src, size_t srcSize,
597
        U32 const mls /* template */)
598
164k
{
599
164k
    ZSTD_compressionParameters const* cParams = &ms->cParams;
600
164k
    U32* const hashLong = ms->hashTable;
601
164k
    U32  const hBitsL = cParams->hashLog;
602
164k
    U32* const hashSmall = ms->chainTable;
603
164k
    U32  const hBitsS = cParams->chainLog;
604
164k
    const BYTE* const istart = (const BYTE*)src;
605
164k
    const BYTE* ip = istart;
606
164k
    const BYTE* anchor = istart;
607
164k
    const BYTE* const iend = istart + srcSize;
608
164k
    const BYTE* const ilimit = iend - 8;
609
164k
    const BYTE* const base = ms->window.base;
610
164k
    const U32   endIndex = (U32)((size_t)(istart - base) + srcSize);
611
164k
    const U32   lowLimit = ZSTD_getLowestMatchIndex(ms, endIndex, cParams->windowLog);
612
164k
    const U32   dictStartIndex = lowLimit;
613
164k
    const U32   dictLimit = ms->window.dictLimit;
614
164k
    const U32   prefixStartIndex = (dictLimit > lowLimit) ? dictLimit : lowLimit;
615
164k
    const BYTE* const prefixStart = base + prefixStartIndex;
616
164k
    const BYTE* const dictBase = ms->window.dictBase;
617
164k
    const BYTE* const dictStart = dictBase + dictStartIndex;
618
164k
    const BYTE* const dictEnd = dictBase + prefixStartIndex;
619
164k
    U32 offset_1=rep[0], offset_2=rep[1];
620
621
164k
    DEBUGLOG(5, "ZSTD_compressBlock_doubleFast_extDict_generic (srcSize=%zu)", srcSize);
622
623
    /* if extDict is invalidated due to maxDistance, switch to "regular" variant */
624
164k
    if (prefixStartIndex == dictStartIndex)
625
13.6k
        return ZSTD_compressBlock_doubleFast(ms, seqStore, rep, src, srcSize);
626
627
    /* Search Loop */
628
3.11M
    while (ip < ilimit) {  /* < instead of <=, because (ip+1) */
629
2.96M
        const size_t hSmall = ZSTD_hashPtr(ip, hBitsS, mls);
630
2.96M
        const U32 matchIndex = hashSmall[hSmall];
631
2.96M
        const BYTE* const matchBase = matchIndex < prefixStartIndex ? dictBase : base;
632
2.96M
        const BYTE* match = matchBase + matchIndex;
633
634
2.96M
        const size_t hLong = ZSTD_hashPtr(ip, hBitsL, 8);
635
2.96M
        const U32 matchLongIndex = hashLong[hLong];
636
2.96M
        const BYTE* const matchLongBase = matchLongIndex < prefixStartIndex ? dictBase : base;
637
2.96M
        const BYTE* matchLong = matchLongBase + matchLongIndex;
638
639
2.96M
        const U32 curr = (U32)(ip-base);
640
2.96M
        const U32 repIndex = curr + 1 - offset_1;   /* offset_1 expected <= curr +1 */
641
2.96M
        const BYTE* const repBase = repIndex < prefixStartIndex ? dictBase : base;
642
2.96M
        const BYTE* const repMatch = repBase + repIndex;
643
2.96M
        size_t mLength;
644
2.96M
        hashSmall[hSmall] = hashLong[hLong] = curr;   /* update hash table */
645
646
2.96M
        if ((((U32)((prefixStartIndex-1) - repIndex) >= 3) /* intentional underflow : ensure repIndex doesn't overlap dict + prefix */
647
2.96M
            & (offset_1 <= curr+1 - dictStartIndex)) /* note: we are searching at curr+1 */
648
2.96M
          && (MEM_read32(repMatch) == MEM_read32(ip+1)) ) {
649
108k
            const BYTE* repMatchEnd = repIndex < prefixStartIndex ? dictEnd : iend;
650
108k
            mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, prefixStart) + 4;
651
108k
            ip++;
652
216k
            ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, REPCODE1_TO_OFFBASE, mLength);
653
2.85M
        } else {
654
2.85M
            if ((matchLongIndex > dictStartIndex) && (MEM_read64(matchLong) == MEM_read64(ip))) {
655
56.5k
                const BYTE* const matchEnd = matchLongIndex < prefixStartIndex ? dictEnd : iend;
656
56.5k
                const BYTE* const lowMatchPtr = matchLongIndex < prefixStartIndex ? dictStart : prefixStart;
657
56.5k
                U32 offset;
658
56.5k
                mLength = ZSTD_count_2segments(ip+8, matchLong+8, iend, matchEnd, prefixStart) + 8;
659
56.5k
                offset = curr - matchLongIndex;
660
70.4k
                while (((ip>anchor) & (matchLong>lowMatchPtr)) && (ip[-1] == matchLong[-1])) { ip--; matchLong--; mLength++; }   /* catch up */
661
56.5k
                offset_2 = offset_1;
662
56.5k
                offset_1 = offset;
663
56.5k
                ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, OFFSET_TO_OFFBASE(offset), mLength);
664
665
2.79M
            } else if ((matchIndex > dictStartIndex) && (MEM_read32(match) == MEM_read32(ip))) {
666
117k
                size_t const h3 = ZSTD_hashPtr(ip+1, hBitsL, 8);
667
117k
                U32 const matchIndex3 = hashLong[h3];
668
117k
                const BYTE* const match3Base = matchIndex3 < prefixStartIndex ? dictBase : base;
669
117k
                const BYTE* match3 = match3Base + matchIndex3;
670
117k
                U32 offset;
671
117k
                hashLong[h3] = curr + 1;
672
117k
                if ( (matchIndex3 > dictStartIndex) && (MEM_read64(match3) == MEM_read64(ip+1)) ) {
673
13.3k
                    const BYTE* const matchEnd = matchIndex3 < prefixStartIndex ? dictEnd : iend;
674
13.3k
                    const BYTE* const lowMatchPtr = matchIndex3 < prefixStartIndex ? dictStart : prefixStart;
675
13.3k
                    mLength = ZSTD_count_2segments(ip+9, match3+8, iend, matchEnd, prefixStart) + 8;
676
13.3k
                    ip++;
677
13.3k
                    offset = curr+1 - matchIndex3;
678
22.2k
                    while (((ip>anchor) & (match3>lowMatchPtr)) && (ip[-1] == match3[-1])) { ip--; match3--; mLength++; } /* catch up */
679
104k
                } else {
680
104k
                    const BYTE* const matchEnd = matchIndex < prefixStartIndex ? dictEnd : iend;
681
104k
                    const BYTE* const lowMatchPtr = matchIndex < prefixStartIndex ? dictStart : prefixStart;
682
104k
                    mLength = ZSTD_count_2segments(ip+4, match+4, iend, matchEnd, prefixStart) + 4;
683
104k
                    offset = curr - matchIndex;
684
128k
                    while (((ip>anchor) & (match>lowMatchPtr)) && (ip[-1] == match[-1])) { ip--; match--; mLength++; }   /* catch up */
685
104k
                }
686
117k
                offset_2 = offset_1;
687
117k
                offset_1 = offset;
688
117k
                ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, OFFSET_TO_OFFBASE(offset), mLength);
689
690
2.68M
            } else {
691
2.68M
                ip += ((ip-anchor) >> kSearchStrength) + 1;
692
2.68M
                continue;
693
2.68M
        }   }
694
695
        /* move to next sequence start */
696
281k
        ip += mLength;
697
281k
        anchor = ip;
698
699
281k
        if (ip <= ilimit) {
700
            /* Complementary insertion */
701
            /* done after iLimit test, as candidates could be > iend-8 */
702
229k
            {   U32 const indexToInsert = curr+2;
703
229k
                hashLong[ZSTD_hashPtr(base+indexToInsert, hBitsL, 8)] = indexToInsert;
704
229k
                hashLong[ZSTD_hashPtr(ip-2, hBitsL, 8)] = (U32)(ip-2-base);
705
229k
                hashSmall[ZSTD_hashPtr(base+indexToInsert, hBitsS, mls)] = indexToInsert;
706
229k
                hashSmall[ZSTD_hashPtr(ip-1, hBitsS, mls)] = (U32)(ip-1-base);
707
229k
            }
708
709
            /* check immediate repcode */
710
246k
            while (ip <= ilimit) {
711
242k
                U32 const current2 = (U32)(ip-base);
712
242k
                U32 const repIndex2 = current2 - offset_2;
713
242k
                const BYTE* repMatch2 = repIndex2 < prefixStartIndex ? dictBase + repIndex2 : base + repIndex2;
714
242k
                if ( (((U32)((prefixStartIndex-1) - repIndex2) >= 3)   /* intentional overflow : ensure repIndex2 doesn't overlap dict + prefix */
715
242k
                    & (offset_2 <= current2 - dictStartIndex))
716
242k
                  && (MEM_read32(repMatch2) == MEM_read32(ip)) ) {
717
16.8k
                    const BYTE* const repEnd2 = repIndex2 < prefixStartIndex ? dictEnd : iend;
718
16.8k
                    size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, prefixStart) + 4;
719
16.8k
                    U32 const tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset;   /* swap offset_2 <=> offset_1 */
720
16.8k
                    ZSTD_storeSeq(seqStore, 0, anchor, iend, REPCODE1_TO_OFFBASE, repLength2);
721
0
                    hashSmall[ZSTD_hashPtr(ip, hBitsS, mls)] = current2;
722
16.8k
                    hashLong[ZSTD_hashPtr(ip, hBitsL, 8)] = current2;
723
16.8k
                    ip += repLength2;
724
16.8k
                    anchor = ip;
725
16.8k
                    continue;
726
16.8k
                }
727
225k
                break;
728
242k
    }   }   }
729
730
    /* save reps for next block */
731
151k
    rep[0] = offset_1;
732
151k
    rep[1] = offset_2;
733
734
    /* Return the last literals size */
735
151k
    return (size_t)(iend - anchor);
736
151k
}
737
738
ZSTD_GEN_DFAST_FN(extDict, 4)
739
ZSTD_GEN_DFAST_FN(extDict, 5)
740
ZSTD_GEN_DFAST_FN(extDict, 6)
741
ZSTD_GEN_DFAST_FN(extDict, 7)
742
743
size_t ZSTD_compressBlock_doubleFast_extDict(
744
        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
745
        void const* src, size_t srcSize)
746
164k
{
747
164k
    U32 const mls = ms->cParams.minMatch;
748
164k
    switch(mls)
749
164k
    {
750
42.0k
    default: /* includes case 3 */
751
46.7k
    case 4 :
752
46.7k
        return ZSTD_compressBlock_doubleFast_extDict_4(ms, seqStore, rep, src, srcSize);
753
32.9k
    case 5 :
754
32.9k
        return ZSTD_compressBlock_doubleFast_extDict_5(ms, seqStore, rep, src, srcSize);
755
33.8k
    case 6 :
756
33.8k
        return ZSTD_compressBlock_doubleFast_extDict_6(ms, seqStore, rep, src, srcSize);
757
51.3k
    case 7 :
758
51.3k
        return ZSTD_compressBlock_doubleFast_extDict_7(ms, seqStore, rep, src, srcSize);
759
164k
    }
760
164k
}
761
762
#endif /* ZSTD_EXCLUDE_DFAST_BLOCK_COMPRESSOR */