Coverage Report

Created: 2025-07-18 06:58

/src/zstd/lib/compress/zstd_opt.c
Line
Count
Source (jump to first uncovered line)
1
/*
2
 * Copyright (c) Meta Platforms, Inc. and affiliates.
3
 * All rights reserved.
4
 *
5
 * This source code is licensed under both the BSD-style license (found in the
6
 * LICENSE file in the root directory of this source tree) and the GPLv2 (found
7
 * in the COPYING file in the root directory of this source tree).
8
 * You may select, at your option, one of the above-listed licenses.
9
 */
10
11
#include "zstd_compress_internal.h"
12
#include "hist.h"
13
#include "zstd_opt.h"
14
15
#if !defined(ZSTD_EXCLUDE_BTLAZY2_BLOCK_COMPRESSOR) \
16
 || !defined(ZSTD_EXCLUDE_BTOPT_BLOCK_COMPRESSOR) \
17
 || !defined(ZSTD_EXCLUDE_BTULTRA_BLOCK_COMPRESSOR)
18
19
54.2M
#define ZSTD_LITFREQ_ADD    2   /* scaling factor for litFreq, so that frequencies adapt faster to new stats */
20
107M
#define ZSTD_MAX_PRICE     (1<<30)
21
22
5.87k
#define ZSTD_PREDEF_THRESHOLD 8   /* if srcSize < ZSTD_PREDEF_THRESHOLD, symbols' cost is assumed static, directly determined by pre-defined distributions */
23
24
25
/*-*************************************
26
*  Price functions for optimal parser
27
***************************************/
28
29
#if 0    /* approximation at bit level (for tests) */
30
#  define BITCOST_ACCURACY 0
31
#  define BITCOST_MULTIPLIER (1 << BITCOST_ACCURACY)
32
#  define WEIGHT(stat, opt) ((void)(opt), ZSTD_bitWeight(stat))
33
#elif 0  /* fractional bit accuracy (for tests) */
34
#  define BITCOST_ACCURACY 8
35
#  define BITCOST_MULTIPLIER (1 << BITCOST_ACCURACY)
36
#  define WEIGHT(stat,opt) ((void)(opt), ZSTD_fracWeight(stat))
37
#else    /* opt==approx, ultra==accurate */
38
4.17G
#  define BITCOST_ACCURACY 8
39
3.02G
#  define BITCOST_MULTIPLIER (1 << BITCOST_ACCURACY)
40
1.27G
#  define WEIGHT(stat,opt) ((opt) ? ZSTD_fracWeight(stat) : ZSTD_bitWeight(stat))
41
#endif
42
43
/* ZSTD_bitWeight() :
44
 * provide estimated "cost" of a stat in full bits only */
45
MEM_STATIC U32 ZSTD_bitWeight(U32 stat)
46
132M
{
47
132M
    return (ZSTD_highbit32(stat+1) * BITCOST_MULTIPLIER);
48
132M
}
49
50
/* ZSTD_fracWeight() :
51
 * provide fractional-bit "cost" of a stat,
52
 * using linear interpolation approximation */
53
MEM_STATIC U32 ZSTD_fracWeight(U32 rawStat)
54
1.14G
{
55
1.14G
    U32 const stat = rawStat + 1;
56
1.14G
    U32 const hb = ZSTD_highbit32(stat);
57
1.14G
    U32 const BWeight = hb * BITCOST_MULTIPLIER;
58
    /* Fweight was meant for "Fractional weight"
59
     * but it's effectively a value between 1 and 2
60
     * using fixed point arithmetic */
61
1.14G
    U32 const FWeight = (stat << BITCOST_ACCURACY) >> hb;
62
1.14G
    U32 const weight = BWeight + FWeight;
63
1.14G
    assert(hb + BITCOST_ACCURACY < 31);
64
1.14G
    return weight;
65
1.14G
}
66
67
#if (DEBUGLEVEL>=2)
68
/* debugging function,
69
 * @return price in bytes as fractional value
70
 * for debug messages only */
71
MEM_STATIC double ZSTD_fCost(int price)
72
{
73
    return (double)price / (BITCOST_MULTIPLIER*8);
74
}
75
#endif
76
77
static int ZSTD_compressedLiterals(optState_t const* const optPtr)
78
94.7M
{
79
94.7M
    return optPtr->literalCompressionMode != ZSTD_ps_disable;
80
94.7M
}
81
82
static void ZSTD_setBasePrices(optState_t* optPtr, int optLevel)
83
3.38M
{
84
3.38M
    if (ZSTD_compressedLiterals(optPtr))
85
3.38M
        optPtr->litSumBasePrice = WEIGHT(optPtr->litSum, optLevel);
86
3.38M
    optPtr->litLengthSumBasePrice = WEIGHT(optPtr->litLengthSum, optLevel);
87
3.38M
    optPtr->matchLengthSumBasePrice = WEIGHT(optPtr->matchLengthSum, optLevel);
88
3.38M
    optPtr->offCodeSumBasePrice = WEIGHT(optPtr->offCodeSum, optLevel);
89
3.38M
}
90
91
92
static U32 sum_u32(const unsigned table[], size_t nbElts)
93
32.3k
{
94
32.3k
    size_t n;
95
32.3k
    U32 total = 0;
96
2.58M
    for (n=0; n<nbElts; n++) {
97
2.55M
        total += table[n];
98
2.55M
    }
99
32.3k
    return total;
100
32.3k
}
101
102
typedef enum { base_0possible=0, base_1guaranteed=1 } base_directive_e;
103
104
static U32
105
ZSTD_downscaleStats(unsigned* table, U32 lastEltIndex, U32 shift, base_directive_e base1)
106
9.13k
{
107
9.13k
    U32 s, sum=0;
108
9.13k
    DEBUGLOG(5, "ZSTD_downscaleStats (nbElts=%u, shift=%u)",
109
9.13k
            (unsigned)lastEltIndex+1, (unsigned)shift );
110
9.13k
    assert(shift < 30);
111
1.68M
    for (s=0; s<lastEltIndex+1; s++) {
112
1.67M
        unsigned const base = base1 ? 1 : (table[s]>0);
113
1.67M
        unsigned const newStat = base + (table[s] >> shift);
114
1.67M
        sum += newStat;
115
1.67M
        table[s] = newStat;
116
1.67M
    }
117
9.13k
    return sum;
118
9.13k
}
119
120
/* ZSTD_scaleStats() :
121
 * reduce all elt frequencies in table if sum too large
122
 * return the resulting sum of elements */
123
static U32 ZSTD_scaleStats(unsigned* table, U32 lastEltIndex, U32 logTarget)
124
24.0k
{
125
24.0k
    U32 const prevsum = sum_u32(table, lastEltIndex+1);
126
24.0k
    U32 const factor = prevsum >> logTarget;
127
24.0k
    DEBUGLOG(5, "ZSTD_scaleStats (nbElts=%u, target=%u)", (unsigned)lastEltIndex+1, (unsigned)logTarget);
128
24.0k
    assert(logTarget < 30);
129
24.0k
    if (factor <= 1) return prevsum;
130
4.98k
    return ZSTD_downscaleStats(table, lastEltIndex, ZSTD_highbit32(factor), base_1guaranteed);
131
24.0k
}
132
133
/* ZSTD_rescaleFreqs() :
134
 * if first block (detected by optPtr->litLengthSum == 0) : init statistics
135
 *    take hints from dictionary if there is one
136
 *    and init from zero if there is none,
137
 *    using src for literals stats, and baseline stats for sequence symbols
138
 * otherwise downscale existing stats, to be used as seed for next block.
139
 */
140
static void
141
ZSTD_rescaleFreqs(optState_t* const optPtr,
142
            const BYTE* const src, size_t const srcSize,
143
                  int const optLevel)
144
10.1k
{
145
10.1k
    int const compressedLiterals = ZSTD_compressedLiterals(optPtr);
146
10.1k
    DEBUGLOG(5, "ZSTD_rescaleFreqs (srcSize=%u)", (unsigned)srcSize);
147
10.1k
    optPtr->priceType = zop_dynamic;
148
149
10.1k
    if (optPtr->litLengthSum == 0) {  /* no literals stats collected -> first block assumed -> init */
150
151
        /* heuristic: use pre-defined stats for too small inputs */
152
4.14k
        if (srcSize <= ZSTD_PREDEF_THRESHOLD) {
153
51
            DEBUGLOG(5, "srcSize <= %i : use predefined stats", ZSTD_PREDEF_THRESHOLD);
154
51
            optPtr->priceType = zop_predef;
155
51
        }
156
157
4.14k
        assert(optPtr->symbolCosts != NULL);
158
4.14k
        if (optPtr->symbolCosts->huf.repeatMode == HUF_repeat_valid) {
159
160
            /* huffman stats covering the full value set : table presumed generated by dictionary */
161
0
            optPtr->priceType = zop_dynamic;
162
163
0
            if (compressedLiterals) {
164
                /* generate literals statistics from huffman table */
165
0
                unsigned lit;
166
0
                assert(optPtr->litFreq != NULL);
167
0
                optPtr->litSum = 0;
168
0
                for (lit=0; lit<=MaxLit; lit++) {
169
0
                    U32 const scaleLog = 11;   /* scale to 2K */
170
0
                    U32 const bitCost = HUF_getNbBitsFromCTable(optPtr->symbolCosts->huf.CTable, lit);
171
0
                    assert(bitCost <= scaleLog);
172
0
                    optPtr->litFreq[lit] = bitCost ? 1 << (scaleLog-bitCost) : 1 /*minimum to calculate cost*/;
173
0
                    optPtr->litSum += optPtr->litFreq[lit];
174
0
            }   }
175
176
0
            {   unsigned ll;
177
0
                FSE_CState_t llstate;
178
0
                FSE_initCState(&llstate, optPtr->symbolCosts->fse.litlengthCTable);
179
0
                optPtr->litLengthSum = 0;
180
0
                for (ll=0; ll<=MaxLL; ll++) {
181
0
                    U32 const scaleLog = 10;   /* scale to 1K */
182
0
                    U32 const bitCost = FSE_getMaxNbBits(llstate.symbolTT, ll);
183
0
                    assert(bitCost < scaleLog);
184
0
                    optPtr->litLengthFreq[ll] = bitCost ? 1 << (scaleLog-bitCost) : 1 /*minimum to calculate cost*/;
185
0
                    optPtr->litLengthSum += optPtr->litLengthFreq[ll];
186
0
            }   }
187
188
0
            {   unsigned ml;
189
0
                FSE_CState_t mlstate;
190
0
                FSE_initCState(&mlstate, optPtr->symbolCosts->fse.matchlengthCTable);
191
0
                optPtr->matchLengthSum = 0;
192
0
                for (ml=0; ml<=MaxML; ml++) {
193
0
                    U32 const scaleLog = 10;
194
0
                    U32 const bitCost = FSE_getMaxNbBits(mlstate.symbolTT, ml);
195
0
                    assert(bitCost < scaleLog);
196
0
                    optPtr->matchLengthFreq[ml] = bitCost ? 1 << (scaleLog-bitCost) : 1 /*minimum to calculate cost*/;
197
0
                    optPtr->matchLengthSum += optPtr->matchLengthFreq[ml];
198
0
            }   }
199
200
0
            {   unsigned of;
201
0
                FSE_CState_t ofstate;
202
0
                FSE_initCState(&ofstate, optPtr->symbolCosts->fse.offcodeCTable);
203
0
                optPtr->offCodeSum = 0;
204
0
                for (of=0; of<=MaxOff; of++) {
205
0
                    U32 const scaleLog = 10;
206
0
                    U32 const bitCost = FSE_getMaxNbBits(ofstate.symbolTT, of);
207
0
                    assert(bitCost < scaleLog);
208
0
                    optPtr->offCodeFreq[of] = bitCost ? 1 << (scaleLog-bitCost) : 1 /*minimum to calculate cost*/;
209
0
                    optPtr->offCodeSum += optPtr->offCodeFreq[of];
210
0
            }   }
211
212
4.14k
        } else {  /* first block, no dictionary */
213
214
4.14k
            assert(optPtr->litFreq != NULL);
215
4.14k
            if (compressedLiterals) {
216
                /* base initial cost of literals on direct frequency within src */
217
4.14k
                unsigned lit = MaxLit;
218
4.14k
                HIST_count_simple(optPtr->litFreq, &lit, src, srcSize);   /* use raw first block to init statistics */
219
4.14k
                optPtr->litSum = ZSTD_downscaleStats(optPtr->litFreq, MaxLit, 8, base_0possible);
220
4.14k
            }
221
222
4.14k
            {   unsigned const baseLLfreqs[MaxLL+1] = {
223
4.14k
                    4, 2, 1, 1, 1, 1, 1, 1,
224
4.14k
                    1, 1, 1, 1, 1, 1, 1, 1,
225
4.14k
                    1, 1, 1, 1, 1, 1, 1, 1,
226
4.14k
                    1, 1, 1, 1, 1, 1, 1, 1,
227
4.14k
                    1, 1, 1, 1
228
4.14k
                };
229
4.14k
                ZSTD_memcpy(optPtr->litLengthFreq, baseLLfreqs, sizeof(baseLLfreqs));
230
4.14k
                optPtr->litLengthSum = sum_u32(baseLLfreqs, MaxLL+1);
231
4.14k
            }
232
233
4.14k
            {   unsigned ml;
234
223k
                for (ml=0; ml<=MaxML; ml++)
235
219k
                    optPtr->matchLengthFreq[ml] = 1;
236
4.14k
            }
237
4.14k
            optPtr->matchLengthSum = MaxML+1;
238
239
4.14k
            {   unsigned const baseOFCfreqs[MaxOff+1] = {
240
4.14k
                    6, 2, 1, 1, 2, 3, 4, 4,
241
4.14k
                    4, 3, 2, 1, 1, 1, 1, 1,
242
4.14k
                    1, 1, 1, 1, 1, 1, 1, 1,
243
4.14k
                    1, 1, 1, 1, 1, 1, 1, 1
244
4.14k
                };
245
4.14k
                ZSTD_memcpy(optPtr->offCodeFreq, baseOFCfreqs, sizeof(baseOFCfreqs));
246
4.14k
                optPtr->offCodeSum = sum_u32(baseOFCfreqs, MaxOff+1);
247
4.14k
            }
248
249
4.14k
        }
250
251
6.02k
    } else {   /* new block : scale down accumulated statistics */
252
253
6.02k
        if (compressedLiterals)
254
6.02k
            optPtr->litSum = ZSTD_scaleStats(optPtr->litFreq, MaxLit, 12);
255
6.02k
        optPtr->litLengthSum = ZSTD_scaleStats(optPtr->litLengthFreq, MaxLL, 11);
256
6.02k
        optPtr->matchLengthSum = ZSTD_scaleStats(optPtr->matchLengthFreq, MaxML, 11);
257
6.02k
        optPtr->offCodeSum = ZSTD_scaleStats(optPtr->offCodeFreq, MaxOff, 11);
258
6.02k
    }
259
260
10.1k
    ZSTD_setBasePrices(optPtr, optLevel);
261
10.1k
}
262
263
/* ZSTD_rawLiteralsCost() :
264
 * price of literals (only) in specified segment (which length can be 0).
265
 * does not include price of literalLength symbol */
266
static U32 ZSTD_rawLiteralsCost(const BYTE* const literals, U32 const litLength,
267
                                const optState_t* const optPtr,
268
                                int optLevel)
269
79.2M
{
270
79.2M
    DEBUGLOG(8, "ZSTD_rawLiteralsCost (%u literals)", litLength);
271
79.2M
    if (litLength == 0) return 0;
272
273
79.2M
    if (!ZSTD_compressedLiterals(optPtr))
274
0
        return (litLength << 3) * BITCOST_MULTIPLIER;  /* Uncompressed - 8 bytes per literal. */
275
276
79.2M
    if (optPtr->priceType == zop_predef)
277
0
        return (litLength*6) * BITCOST_MULTIPLIER;  /* 6 bit per literal - no statistic used */
278
279
    /* dynamic statistics */
280
79.2M
    {   U32 price = optPtr->litSumBasePrice * litLength;
281
79.2M
        U32 const litPriceMax = optPtr->litSumBasePrice - BITCOST_MULTIPLIER;
282
79.2M
        U32 u;
283
79.2M
        assert(optPtr->litSumBasePrice >= BITCOST_MULTIPLIER);
284
158M
        for (u=0; u < litLength; u++) {
285
79.2M
            U32 litPrice = WEIGHT(optPtr->litFreq[literals[u]], optLevel);
286
79.2M
            if (UNLIKELY(litPrice > litPriceMax)) litPrice = litPriceMax;
287
79.2M
            price -= litPrice;
288
79.2M
        }
289
79.2M
        return price;
290
79.2M
    }
291
79.2M
}
292
293
/* ZSTD_litLengthPrice() :
294
 * cost of literalLength symbol */
295
static U32 ZSTD_litLengthPrice(U32 const litLength, const optState_t* const optPtr, int optLevel)
296
252M
{
297
252M
    assert(litLength <= ZSTD_BLOCKSIZE_MAX);
298
252M
    if (optPtr->priceType == zop_predef)
299
0
        return WEIGHT(litLength, optLevel);
300
301
    /* ZSTD_LLcode() can't compute litLength price for sizes >= ZSTD_BLOCKSIZE_MAX
302
     * because it isn't representable in the zstd format.
303
     * So instead just pretend it would cost 1 bit more than ZSTD_BLOCKSIZE_MAX - 1.
304
     * In such a case, the block would be all literals.
305
     */
306
252M
    if (litLength == ZSTD_BLOCKSIZE_MAX)
307
0
        return BITCOST_MULTIPLIER + ZSTD_litLengthPrice(ZSTD_BLOCKSIZE_MAX - 1, optPtr, optLevel);
308
309
    /* dynamic statistics */
310
252M
    {   U32 const llCode = ZSTD_LLcode(litLength);
311
252M
        return (LL_bits[llCode] * BITCOST_MULTIPLIER)
312
252M
             + optPtr->litLengthSumBasePrice
313
252M
             - WEIGHT(optPtr->litLengthFreq[llCode], optLevel);
314
252M
    }
315
252M
}
316
317
/* ZSTD_getMatchPrice() :
318
 * Provides the cost of the match part (offset + matchLength) of a sequence.
319
 * Must be combined with ZSTD_fullLiteralsCost() to get the full cost of a sequence.
320
 * @offBase : sumtype, representing an offset or a repcode, and using numeric representation of ZSTD_storeSeq()
321
 * @optLevel: when <2, favors small offset for decompression speed (improved cache efficiency)
322
 */
323
FORCE_INLINE_TEMPLATE U32
324
ZSTD_getMatchPrice(U32 const offBase,
325
                   U32 const matchLength,
326
             const optState_t* const optPtr,
327
                   int const optLevel)
328
466M
{
329
466M
    U32 price;
330
466M
    U32 const offCode = ZSTD_highbit32(offBase);
331
466M
    U32 const mlBase = matchLength - MINMATCH;
332
466M
    assert(matchLength >= MINMATCH);
333
334
466M
    if (optPtr->priceType == zop_predef)  /* fixed scheme, does not use statistics */
335
0
        return WEIGHT(mlBase, optLevel)
336
0
             + ((16 + offCode) * BITCOST_MULTIPLIER); /* emulated offset cost */
337
338
    /* dynamic statistics */
339
466M
    price = (offCode * BITCOST_MULTIPLIER) + (optPtr->offCodeSumBasePrice - WEIGHT(optPtr->offCodeFreq[offCode], optLevel));
340
466M
    if ((optLevel<2) /*static*/ && offCode >= 20)
341
0
        price += (offCode-19)*2 * BITCOST_MULTIPLIER; /* handicap for long distance offsets, favor decompression speed */
342
343
    /* match Length */
344
466M
    {   U32 const mlCode = ZSTD_MLcode(mlBase);
345
466M
        price += (ML_bits[mlCode] * BITCOST_MULTIPLIER) + (optPtr->matchLengthSumBasePrice - WEIGHT(optPtr->matchLengthFreq[mlCode], optLevel));
346
466M
    }
347
348
466M
    price += BITCOST_MULTIPLIER / 5;   /* heuristic : make matches a bit more costly to favor less sequences -> faster decompression speed */
349
350
466M
    DEBUGLOG(8, "ZSTD_getMatchPrice(ml:%u) = %u", matchLength, price);
351
466M
    return price;
352
466M
}
353
354
/* ZSTD_updateStats() :
355
 * assumption : literals + litLength <= iend */
356
static void ZSTD_updateStats(optState_t* const optPtr,
357
                             U32 litLength, const BYTE* literals,
358
                             U32 offBase, U32 matchLength)
359
12.1M
{
360
    /* literals */
361
12.1M
    if (ZSTD_compressedLiterals(optPtr)) {
362
12.1M
        U32 u;
363
54.2M
        for (u=0; u < litLength; u++)
364
42.1M
            optPtr->litFreq[literals[u]] += ZSTD_LITFREQ_ADD;
365
12.1M
        optPtr->litSum += litLength*ZSTD_LITFREQ_ADD;
366
12.1M
    }
367
368
    /* literal Length */
369
12.1M
    {   U32 const llCode = ZSTD_LLcode(litLength);
370
12.1M
        optPtr->litLengthFreq[llCode]++;
371
12.1M
        optPtr->litLengthSum++;
372
12.1M
    }
373
374
    /* offset code : follows storeSeq() numeric representation */
375
12.1M
    {   U32 const offCode = ZSTD_highbit32(offBase);
376
12.1M
        assert(offCode <= MaxOff);
377
12.1M
        optPtr->offCodeFreq[offCode]++;
378
12.1M
        optPtr->offCodeSum++;
379
12.1M
    }
380
381
    /* match Length */
382
12.1M
    {   U32 const mlBase = matchLength - MINMATCH;
383
12.1M
        U32 const mlCode = ZSTD_MLcode(mlBase);
384
12.1M
        optPtr->matchLengthFreq[mlCode]++;
385
12.1M
        optPtr->matchLengthSum++;
386
12.1M
    }
387
12.1M
}
388
389
390
/* ZSTD_readMINMATCH() :
391
 * function safe only for comparisons
392
 * assumption : memPtr must be at least 4 bytes before end of buffer */
393
MEM_STATIC U32 ZSTD_readMINMATCH(const void* memPtr, U32 length)
394
574M
{
395
574M
    switch (length)
396
574M
    {
397
0
    default :
398
159M
    case 4 : return MEM_read32(memPtr);
399
415M
    case 3 : if (MEM_isLittleEndian())
400
415M
                return MEM_read32(memPtr)<<8;
401
0
             else
402
0
                return MEM_read32(memPtr)>>8;
403
574M
    }
404
574M
}
405
406
407
/* Update hashTable3 up to ip (excluded)
408
   Assumption : always within prefix (i.e. not within extDict) */
409
static
410
ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
411
U32 ZSTD_insertAndFindFirstIndexHash3 (const ZSTD_MatchState_t* ms,
412
                                       U32* nextToUpdate3,
413
                                       const BYTE* const ip)
414
48.8M
{
415
48.8M
    U32* const hashTable3 = ms->hashTable3;
416
48.8M
    U32 const hashLog3 = ms->hashLog3;
417
48.8M
    const BYTE* const base = ms->window.base;
418
48.8M
    U32 idx = *nextToUpdate3;
419
48.8M
    U32 const target = (U32)(ip - base);
420
48.8M
    size_t const hash3 = ZSTD_hash3Ptr(ip, hashLog3);
421
48.8M
    assert(hashLog3 > 0);
422
423
195M
    while(idx < target) {
424
146M
        hashTable3[ZSTD_hash3Ptr(base+idx, hashLog3)] = idx;
425
146M
        idx++;
426
146M
    }
427
428
48.8M
    *nextToUpdate3 = target;
429
48.8M
    return hashTable3[hash3];
430
48.8M
}
431
432
433
/*-*************************************
434
*  Binary Tree search
435
***************************************/
436
/** ZSTD_insertBt1() : add one or multiple positions to tree.
437
 * @param ip assumed <= iend-8 .
438
 * @param target The target of ZSTD_updateTree_internal() - we are filling to this position
439
 * @return : nb of positions added */
440
static
441
ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
442
U32 ZSTD_insertBt1(
443
                const ZSTD_MatchState_t* ms,
444
                const BYTE* const ip, const BYTE* const iend,
445
                U32 const target,
446
                U32 const mls, const int extDict)
447
23.9M
{
448
23.9M
    const ZSTD_compressionParameters* const cParams = &ms->cParams;
449
23.9M
    U32*   const hashTable = ms->hashTable;
450
23.9M
    U32    const hashLog = cParams->hashLog;
451
23.9M
    size_t const h  = ZSTD_hashPtr(ip, hashLog, mls);
452
23.9M
    U32*   const bt = ms->chainTable;
453
23.9M
    U32    const btLog  = cParams->chainLog - 1;
454
23.9M
    U32    const btMask = (1 << btLog) - 1;
455
23.9M
    U32 matchIndex = hashTable[h];
456
23.9M
    size_t commonLengthSmaller=0, commonLengthLarger=0;
457
23.9M
    const BYTE* const base = ms->window.base;
458
23.9M
    const BYTE* const dictBase = ms->window.dictBase;
459
23.9M
    const U32 dictLimit = ms->window.dictLimit;
460
23.9M
    const BYTE* const dictEnd = dictBase + dictLimit;
461
23.9M
    const BYTE* const prefixStart = base + dictLimit;
462
23.9M
    const BYTE* match;
463
23.9M
    const U32 curr = (U32)(ip-base);
464
23.9M
    const U32 btLow = btMask >= curr ? 0 : curr - btMask;
465
23.9M
    U32* smallerPtr = bt + 2*(curr&btMask);
466
23.9M
    U32* largerPtr  = smallerPtr + 1;
467
23.9M
    U32 dummy32;   /* to be nullified at the end */
468
    /* windowLow is based on target because
469
     * we only need positions that will be in the window at the end of the tree update.
470
     */
471
23.9M
    U32 const windowLow = ZSTD_getLowestMatchIndex(ms, target, cParams->windowLog);
472
23.9M
    U32 matchEndIdx = curr+8+1;
473
23.9M
    size_t bestLength = 8;
474
23.9M
    U32 nbCompares = 1U << cParams->searchLog;
475
#ifdef ZSTD_C_PREDICT
476
    U32 predictedSmall = *(bt + 2*((curr-1)&btMask) + 0);
477
    U32 predictedLarge = *(bt + 2*((curr-1)&btMask) + 1);
478
    predictedSmall += (predictedSmall>0);
479
    predictedLarge += (predictedLarge>0);
480
#endif /* ZSTD_C_PREDICT */
481
482
23.9M
    DEBUGLOG(8, "ZSTD_insertBt1 (%u)", curr);
483
484
23.9M
    assert(curr <= target);
485
23.9M
    assert(ip <= iend-8);   /* required for h calculation */
486
23.9M
    hashTable[h] = curr;   /* Update Hash Table */
487
488
23.9M
    assert(windowLow > 0);
489
174M
    for (; nbCompares && (matchIndex >= windowLow); --nbCompares) {
490
150M
        U32* const nextPtr = bt + 2*(matchIndex & btMask);
491
150M
        size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger);   /* guaranteed minimum nb of common bytes */
492
150M
        assert(matchIndex < curr);
493
494
#ifdef ZSTD_C_PREDICT   /* note : can create issues when hlog small <= 11 */
495
        const U32* predictPtr = bt + 2*((matchIndex-1) & btMask);   /* written this way, as bt is a roll buffer */
496
        if (matchIndex == predictedSmall) {
497
            /* no need to check length, result known */
498
            *smallerPtr = matchIndex;
499
            if (matchIndex <= btLow) { smallerPtr=&dummy32; break; }   /* beyond tree size, stop the search */
500
            smallerPtr = nextPtr+1;               /* new "smaller" => larger of match */
501
            matchIndex = nextPtr[1];              /* new matchIndex larger than previous (closer to current) */
502
            predictedSmall = predictPtr[1] + (predictPtr[1]>0);
503
            continue;
504
        }
505
        if (matchIndex == predictedLarge) {
506
            *largerPtr = matchIndex;
507
            if (matchIndex <= btLow) { largerPtr=&dummy32; break; }   /* beyond tree size, stop the search */
508
            largerPtr = nextPtr;
509
            matchIndex = nextPtr[0];
510
            predictedLarge = predictPtr[0] + (predictPtr[0]>0);
511
            continue;
512
        }
513
#endif
514
515
150M
        if (!extDict || (matchIndex+matchLength >= dictLimit)) {
516
150M
            assert(matchIndex+matchLength >= dictLimit);   /* might be wrong if actually extDict */
517
150M
            match = base + matchIndex;
518
150M
            matchLength += ZSTD_count(ip+matchLength, match+matchLength, iend);
519
150M
        } else {
520
0
            match = dictBase + matchIndex;
521
0
            matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iend, dictEnd, prefixStart);
522
0
            if (matchIndex+matchLength >= dictLimit)
523
0
                match = base + matchIndex;   /* to prepare for next usage of match[matchLength] */
524
0
        }
525
526
150M
        if (matchLength > bestLength) {
527
32.5M
            bestLength = matchLength;
528
32.5M
            if (matchLength > matchEndIdx - matchIndex)
529
354k
                matchEndIdx = matchIndex + (U32)matchLength;
530
32.5M
        }
531
532
150M
        if (ip+matchLength == iend) {   /* equal : no way to know if inf or sup */
533
26.9k
            break;   /* drop , to guarantee consistency ; miss a bit of compression, but other solutions can corrupt tree */
534
26.9k
        }
535
536
150M
        if (match[matchLength] < ip[matchLength]) {  /* necessarily within buffer */
537
            /* match is smaller than current */
538
54.2M
            *smallerPtr = matchIndex;             /* update smaller idx */
539
54.2M
            commonLengthSmaller = matchLength;    /* all smaller will now have at least this guaranteed common length */
540
54.2M
            if (matchIndex <= btLow) { smallerPtr=&dummy32; break; }   /* beyond tree size, stop searching */
541
54.2M
            smallerPtr = nextPtr+1;               /* new "candidate" => larger than match, which was smaller than target */
542
54.2M
            matchIndex = nextPtr[1];              /* new matchIndex, larger than previous and closer to current */
543
96.2M
        } else {
544
            /* match is larger than current */
545
96.2M
            *largerPtr = matchIndex;
546
96.2M
            commonLengthLarger = matchLength;
547
96.2M
            if (matchIndex <= btLow) { largerPtr=&dummy32; break; }   /* beyond tree size, stop searching */
548
96.2M
            largerPtr = nextPtr;
549
96.2M
            matchIndex = nextPtr[0];
550
96.2M
    }   }
551
552
23.9M
    *smallerPtr = *largerPtr = 0;
553
23.9M
    {   U32 positions = 0;
554
23.9M
        if (bestLength > 384) positions = MIN(192, (U32)(bestLength - 384));   /* speed optimization */
555
23.9M
        assert(matchEndIdx > curr + 8);
556
23.9M
        return MAX(positions, matchEndIdx - (curr + 8));
557
23.9M
    }
558
23.9M
}
559
560
FORCE_INLINE_TEMPLATE
561
ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
562
void ZSTD_updateTree_internal(
563
                ZSTD_MatchState_t* ms,
564
                const BYTE* const ip, const BYTE* const iend,
565
                const U32 mls, const ZSTD_dictMode_e dictMode)
566
96.3M
{
567
96.3M
    const BYTE* const base = ms->window.base;
568
96.3M
    U32 const target = (U32)(ip - base);
569
96.3M
    U32 idx = ms->nextToUpdate;
570
96.3M
    DEBUGLOG(7, "ZSTD_updateTree_internal, from %u to %u  (dictMode:%u)",
571
96.3M
                idx, target, dictMode);
572
573
120M
    while(idx < target) {
574
23.9M
        U32 const forward = ZSTD_insertBt1(ms, base+idx, iend, target, mls, dictMode == ZSTD_extDict);
575
23.9M
        assert(idx < (U32)(idx + forward));
576
23.9M
        idx += forward;
577
23.9M
    }
578
96.3M
    assert((size_t)(ip - base) <= (size_t)(U32)(-1));
579
96.3M
    assert((size_t)(iend - base) <= (size_t)(U32)(-1));
580
96.3M
    ms->nextToUpdate = target;
581
96.3M
}
582
583
0
void ZSTD_updateTree(ZSTD_MatchState_t* ms, const BYTE* ip, const BYTE* iend) {
584
0
    ZSTD_updateTree_internal(ms, ip, iend, ms->cParams.minMatch, ZSTD_noDict);
585
0
}
586
587
FORCE_INLINE_TEMPLATE
588
ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
589
U32
590
ZSTD_insertBtAndGetAllMatches (
591
                ZSTD_match_t* matches,  /* store result (found matches) in this table (presumed large enough) */
592
                ZSTD_MatchState_t* ms,
593
                U32* nextToUpdate3,
594
                const BYTE* const ip, const BYTE* const iLimit,
595
                const ZSTD_dictMode_e dictMode,
596
                const U32 rep[ZSTD_REP_NUM],
597
                const U32 ll0,  /* tells if associated literal length is 0 or not. This value must be 0 or 1 */
598
                const U32 lengthToBeat,
599
                const U32 mls /* template */)
600
96.3M
{
601
96.3M
    const ZSTD_compressionParameters* const cParams = &ms->cParams;
602
96.3M
    U32 const sufficient_len = MIN(cParams->targetLength, ZSTD_OPT_NUM -1);
603
96.3M
    const BYTE* const base = ms->window.base;
604
96.3M
    U32 const curr = (U32)(ip-base);
605
96.3M
    U32 const hashLog = cParams->hashLog;
606
96.3M
    U32 const minMatch = (mls==3) ? 3 : 4;
607
96.3M
    U32* const hashTable = ms->hashTable;
608
96.3M
    size_t const h  = ZSTD_hashPtr(ip, hashLog, mls);
609
96.3M
    U32 matchIndex  = hashTable[h];
610
96.3M
    U32* const bt   = ms->chainTable;
611
96.3M
    U32 const btLog = cParams->chainLog - 1;
612
96.3M
    U32 const btMask= (1U << btLog) - 1;
613
96.3M
    size_t commonLengthSmaller=0, commonLengthLarger=0;
614
96.3M
    const BYTE* const dictBase = ms->window.dictBase;
615
96.3M
    U32 const dictLimit = ms->window.dictLimit;
616
96.3M
    const BYTE* const dictEnd = dictBase + dictLimit;
617
96.3M
    const BYTE* const prefixStart = base + dictLimit;
618
96.3M
    U32 const btLow = (btMask >= curr) ? 0 : curr - btMask;
619
96.3M
    U32 const windowLow = ZSTD_getLowestMatchIndex(ms, curr, cParams->windowLog);
620
96.3M
    U32 const matchLow = windowLow ? windowLow : 1;
621
96.3M
    U32* smallerPtr = bt + 2*(curr&btMask);
622
96.3M
    U32* largerPtr  = bt + 2*(curr&btMask) + 1;
623
96.3M
    U32 matchEndIdx = curr+8+1;   /* farthest referenced position of any match => detects repetitive patterns */
624
96.3M
    U32 dummy32;   /* to be nullified at the end */
625
96.3M
    U32 mnum = 0;
626
96.3M
    U32 nbCompares = 1U << cParams->searchLog;
627
628
96.3M
    const ZSTD_MatchState_t* dms    = dictMode == ZSTD_dictMatchState ? ms->dictMatchState : NULL;
629
96.3M
    const ZSTD_compressionParameters* const dmsCParams =
630
96.3M
                                      dictMode == ZSTD_dictMatchState ? &dms->cParams : NULL;
631
96.3M
    const BYTE* const dmsBase       = dictMode == ZSTD_dictMatchState ? dms->window.base : NULL;
632
96.3M
    const BYTE* const dmsEnd        = dictMode == ZSTD_dictMatchState ? dms->window.nextSrc : NULL;
633
96.3M
    U32         const dmsHighLimit  = dictMode == ZSTD_dictMatchState ? (U32)(dmsEnd - dmsBase) : 0;
634
96.3M
    U32         const dmsLowLimit   = dictMode == ZSTD_dictMatchState ? dms->window.lowLimit : 0;
635
96.3M
    U32         const dmsIndexDelta = dictMode == ZSTD_dictMatchState ? windowLow - dmsHighLimit : 0;
636
96.3M
    U32         const dmsHashLog    = dictMode == ZSTD_dictMatchState ? dmsCParams->hashLog : hashLog;
637
96.3M
    U32         const dmsBtLog      = dictMode == ZSTD_dictMatchState ? dmsCParams->chainLog - 1 : btLog;
638
96.3M
    U32         const dmsBtMask     = dictMode == ZSTD_dictMatchState ? (1U << dmsBtLog) - 1 : 0;
639
96.3M
    U32         const dmsBtLow      = dictMode == ZSTD_dictMatchState && dmsBtMask < dmsHighLimit - dmsLowLimit ? dmsHighLimit - dmsBtMask : dmsLowLimit;
640
641
96.3M
    size_t bestLength = lengthToBeat-1;
642
96.3M
    DEBUGLOG(8, "ZSTD_insertBtAndGetAllMatches: current=%u", curr);
643
644
    /* check repCode */
645
96.3M
    assert(ll0 <= 1);   /* necessarily 1 or 0 */
646
96.3M
    {   U32 const lastR = ZSTD_REP_NUM + ll0;
647
96.3M
        U32 repCode;
648
385M
        for (repCode = ll0; repCode < lastR; repCode++) {
649
288M
            U32 const repOffset = (repCode==ZSTD_REP_NUM) ? (rep[0] - 1) : rep[repCode];
650
288M
            U32 const repIndex = curr - repOffset;
651
288M
            U32 repLen = 0;
652
288M
            assert(curr >= dictLimit);
653
288M
            if (repOffset-1 /* intentional overflow, discards 0 and -1 */ < curr-dictLimit) {  /* equivalent to `curr > repIndex >= dictLimit` */
654
                /* We must validate the repcode offset because when we're using a dictionary the
655
                 * valid offset range shrinks when the dictionary goes out of bounds.
656
                 */
657
287M
                if ((repIndex >= windowLow) & (ZSTD_readMINMATCH(ip, minMatch) == ZSTD_readMINMATCH(ip - repOffset, minMatch))) {
658
33.9M
                    repLen = (U32)ZSTD_count(ip+minMatch, ip+minMatch-repOffset, iLimit) + minMatch;
659
33.9M
                }
660
287M
            } else {  /* repIndex < dictLimit || repIndex >= curr */
661
1.73M
                const BYTE* const repMatch = dictMode == ZSTD_dictMatchState ?
662
0
                                             dmsBase + repIndex - dmsIndexDelta :
663
1.73M
                                             dictBase + repIndex;
664
1.73M
                assert(curr >= windowLow);
665
1.73M
                if ( dictMode == ZSTD_extDict
666
1.73M
                  && ( ((repOffset-1) /*intentional overflow*/ < curr - windowLow)  /* equivalent to `curr > repIndex >= windowLow` */
667
0
                     & (ZSTD_index_overlap_check(dictLimit, repIndex)) )
668
1.73M
                  && (ZSTD_readMINMATCH(ip, minMatch) == ZSTD_readMINMATCH(repMatch, minMatch)) ) {
669
0
                    repLen = (U32)ZSTD_count_2segments(ip+minMatch, repMatch+minMatch, iLimit, dictEnd, prefixStart) + minMatch;
670
0
                }
671
1.73M
                if (dictMode == ZSTD_dictMatchState
672
1.73M
                  && ( ((repOffset-1) /*intentional overflow*/ < curr - (dmsLowLimit + dmsIndexDelta))  /* equivalent to `curr > repIndex >= dmsLowLimit` */
673
0
                     & (ZSTD_index_overlap_check(dictLimit, repIndex)) )
674
1.73M
                  && (ZSTD_readMINMATCH(ip, minMatch) == ZSTD_readMINMATCH(repMatch, minMatch)) ) {
675
0
                    repLen = (U32)ZSTD_count_2segments(ip+minMatch, repMatch+minMatch, iLimit, dmsEnd, prefixStart) + minMatch;
676
0
            }   }
677
            /* save longer solution */
678
288M
            if (repLen > bestLength) {
679
25.8M
                DEBUGLOG(8, "found repCode %u (ll0:%u, offset:%u) of length %u",
680
25.8M
                            repCode, ll0, repOffset, repLen);
681
25.8M
                bestLength = repLen;
682
51.6M
                matches[mnum].off = REPCODE_TO_OFFBASE(repCode - ll0 + 1);  /* expect value between 1 and 3 */
683
0
                matches[mnum].len = (U32)repLen;
684
51.6M
                mnum++;
685
51.6M
                if ( (repLen > sufficient_len)
686
25.8M
                   | (ip+repLen == iLimit) ) {  /* best possible */
687
71.7k
                    return mnum;
688
71.7k
    }   }   }   }
689
690
    /* HC3 match finder */
691
96.2M
    if ((mls == 3) /*static*/ && (bestLength < mls)) {
692
48.8M
        U32 const matchIndex3 = ZSTD_insertAndFindFirstIndexHash3(ms, nextToUpdate3, ip);
693
48.8M
        if ((matchIndex3 >= matchLow)
694
48.8M
          & (curr - matchIndex3 < (1<<18)) /*heuristic : longer distance likely too expensive*/ ) {
695
26.9M
            size_t mlen;
696
26.9M
            if ((dictMode == ZSTD_noDict) /*static*/ || (dictMode == ZSTD_dictMatchState) /*static*/ || (matchIndex3 >= dictLimit)) {
697
26.9M
                const BYTE* const match = base + matchIndex3;
698
26.9M
                mlen = ZSTD_count(ip, match, iLimit);
699
26.9M
            } else {
700
0
                const BYTE* const match = dictBase + matchIndex3;
701
0
                mlen = ZSTD_count_2segments(ip, match, iLimit, dictEnd, prefixStart);
702
0
            }
703
704
            /* save best solution */
705
26.9M
            if (mlen >= mls /* == 3 > bestLength */) {
706
20.8M
                DEBUGLOG(8, "found small match with hlog3, of length %u",
707
20.8M
                            (U32)mlen);
708
20.8M
                bestLength = mlen;
709
20.8M
                assert(curr > matchIndex3);
710
20.8M
                assert(mnum==0);  /* no prior solution */
711
20.8M
                matches[0].off = OFFSET_TO_OFFBASE(curr - matchIndex3);
712
0
                matches[0].len = (U32)mlen;
713
20.8M
                mnum = 1;
714
20.8M
                if ( (mlen > sufficient_len) |
715
20.8M
                     (ip+mlen == iLimit) ) {  /* best possible length */
716
9.34k
                    ms->nextToUpdate = curr+1;  /* skip insertion */
717
9.34k
                    return 1;
718
9.34k
        }   }   }
719
        /* no dictMatchState lookup: dicts don't have a populated HC3 table */
720
48.8M
    }  /* if (mls == 3) */
721
722
96.2M
    hashTable[h] = curr;   /* Update Hash Table */
723
724
328M
    for (; nbCompares && (matchIndex >= matchLow); --nbCompares) {
725
232M
        U32* const nextPtr = bt + 2*(matchIndex & btMask);
726
232M
        const BYTE* match;
727
232M
        size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger);   /* guaranteed minimum nb of common bytes */
728
232M
        assert(curr > matchIndex);
729
730
232M
        if ((dictMode == ZSTD_noDict) || (dictMode == ZSTD_dictMatchState) || (matchIndex+matchLength >= dictLimit)) {
731
232M
            assert(matchIndex+matchLength >= dictLimit);  /* ensure the condition is correct when !extDict */
732
232M
            match = base + matchIndex;
733
232M
            if (matchIndex >= dictLimit) assert(memcmp(match, ip, matchLength) == 0);  /* ensure early section of match is equal as expected */
734
232M
            matchLength += ZSTD_count(ip+matchLength, match+matchLength, iLimit);
735
232M
        } else {
736
0
            match = dictBase + matchIndex;
737
0
            assert(memcmp(match, ip, matchLength) == 0);  /* ensure early section of match is equal as expected */
738
0
            matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iLimit, dictEnd, prefixStart);
739
0
            if (matchIndex+matchLength >= dictLimit)
740
0
                match = base + matchIndex;   /* prepare for match[matchLength] read */
741
0
        }
742
743
232M
        if (matchLength > bestLength) {
744
26.9M
            DEBUGLOG(8, "found match of length %u at distance %u (offBase=%u)",
745
26.9M
                    (U32)matchLength, curr - matchIndex, OFFSET_TO_OFFBASE(curr - matchIndex));
746
26.9M
            assert(matchEndIdx > matchIndex);
747
26.9M
            if (matchLength > matchEndIdx - matchIndex)
748
109k
                matchEndIdx = matchIndex + (U32)matchLength;
749
26.9M
            bestLength = matchLength;
750
26.9M
            matches[mnum].off = OFFSET_TO_OFFBASE(curr - matchIndex);
751
0
            matches[mnum].len = (U32)matchLength;
752
26.9M
            mnum++;
753
26.9M
            if ( (matchLength > ZSTD_OPT_NUM)
754
26.9M
               | (ip+matchLength == iLimit) /* equal : no way to know if inf or sup */) {
755
6.13k
                if (dictMode == ZSTD_dictMatchState) nbCompares = 0; /* break should also skip searching dms */
756
6.13k
                break; /* drop, to preserve bt consistency (miss a little bit of compression) */
757
6.13k
        }   }
758
759
232M
        if (match[matchLength] < ip[matchLength]) {
760
            /* match smaller than current */
761
75.3M
            *smallerPtr = matchIndex;             /* update smaller idx */
762
75.3M
            commonLengthSmaller = matchLength;    /* all smaller will now have at least this guaranteed common length */
763
75.3M
            if (matchIndex <= btLow) { smallerPtr=&dummy32; break; }   /* beyond tree size, stop the search */
764
75.3M
            smallerPtr = nextPtr+1;               /* new candidate => larger than match, which was smaller than current */
765
75.3M
            matchIndex = nextPtr[1];              /* new matchIndex, larger than previous, closer to current */
766
157M
        } else {
767
157M
            *largerPtr = matchIndex;
768
157M
            commonLengthLarger = matchLength;
769
157M
            if (matchIndex <= btLow) { largerPtr=&dummy32; break; }   /* beyond tree size, stop the search */
770
157M
            largerPtr = nextPtr;
771
157M
            matchIndex = nextPtr[0];
772
157M
    }   }
773
774
96.2M
    *smallerPtr = *largerPtr = 0;
775
776
96.2M
    assert(nbCompares <= (1U << ZSTD_SEARCHLOG_MAX)); /* Check we haven't underflowed. */
777
96.2M
    if (dictMode == ZSTD_dictMatchState && nbCompares) {
778
0
        size_t const dmsH = ZSTD_hashPtr(ip, dmsHashLog, mls);
779
0
        U32 dictMatchIndex = dms->hashTable[dmsH];
780
0
        const U32* const dmsBt = dms->chainTable;
781
0
        commonLengthSmaller = commonLengthLarger = 0;
782
0
        for (; nbCompares && (dictMatchIndex > dmsLowLimit); --nbCompares) {
783
0
            const U32* const nextPtr = dmsBt + 2*(dictMatchIndex & dmsBtMask);
784
0
            size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger);   /* guaranteed minimum nb of common bytes */
785
0
            const BYTE* match = dmsBase + dictMatchIndex;
786
0
            matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iLimit, dmsEnd, prefixStart);
787
0
            if (dictMatchIndex+matchLength >= dmsHighLimit)
788
0
                match = base + dictMatchIndex + dmsIndexDelta;   /* to prepare for next usage of match[matchLength] */
789
790
0
            if (matchLength > bestLength) {
791
0
                matchIndex = dictMatchIndex + dmsIndexDelta;
792
0
                DEBUGLOG(8, "found dms match of length %u at distance %u (offBase=%u)",
793
0
                        (U32)matchLength, curr - matchIndex, OFFSET_TO_OFFBASE(curr - matchIndex));
794
0
                if (matchLength > matchEndIdx - matchIndex)
795
0
                    matchEndIdx = matchIndex + (U32)matchLength;
796
0
                bestLength = matchLength;
797
0
                matches[mnum].off = OFFSET_TO_OFFBASE(curr - matchIndex);
798
0
                matches[mnum].len = (U32)matchLength;
799
0
                mnum++;
800
0
                if ( (matchLength > ZSTD_OPT_NUM)
801
0
                   | (ip+matchLength == iLimit) /* equal : no way to know if inf or sup */) {
802
0
                    break;   /* drop, to guarantee consistency (miss a little bit of compression) */
803
0
            }   }
804
805
0
            if (dictMatchIndex <= dmsBtLow) { break; }   /* beyond tree size, stop the search */
806
0
            if (match[matchLength] < ip[matchLength]) {
807
0
                commonLengthSmaller = matchLength;    /* all smaller will now have at least this guaranteed common length */
808
0
                dictMatchIndex = nextPtr[1];              /* new matchIndex larger than previous (closer to current) */
809
0
            } else {
810
                /* match is larger than current */
811
0
                commonLengthLarger = matchLength;
812
0
                dictMatchIndex = nextPtr[0];
813
0
    }   }   }  /* if (dictMode == ZSTD_dictMatchState) */
814
815
96.2M
    assert(matchEndIdx > curr+8);
816
96.2M
    ms->nextToUpdate = matchEndIdx - 8;  /* skip repetitive patterns */
817
96.2M
    return mnum;
818
96.2M
}
819
820
typedef U32 (*ZSTD_getAllMatchesFn)(
821
    ZSTD_match_t*,
822
    ZSTD_MatchState_t*,
823
    U32*,
824
    const BYTE*,
825
    const BYTE*,
826
    const U32 rep[ZSTD_REP_NUM],
827
    U32 const ll0,
828
    U32 const lengthToBeat);
829
830
FORCE_INLINE_TEMPLATE
831
ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
832
U32 ZSTD_btGetAllMatches_internal(
833
        ZSTD_match_t* matches,
834
        ZSTD_MatchState_t* ms,
835
        U32* nextToUpdate3,
836
        const BYTE* ip,
837
        const BYTE* const iHighLimit,
838
        const U32 rep[ZSTD_REP_NUM],
839
        U32 const ll0,
840
        U32 const lengthToBeat,
841
        const ZSTD_dictMode_e dictMode,
842
        const U32 mls)
843
98.5M
{
844
98.5M
    assert(BOUNDED(3, ms->cParams.minMatch, 6) == mls);
845
98.5M
    DEBUGLOG(8, "ZSTD_BtGetAllMatches(dictMode=%d, mls=%u)", (int)dictMode, mls);
846
98.5M
    if (ip < ms->window.base + ms->nextToUpdate)
847
2.14M
        return 0;   /* skipped area */
848
96.3M
    ZSTD_updateTree_internal(ms, ip, iHighLimit, mls, dictMode);
849
96.3M
    return ZSTD_insertBtAndGetAllMatches(matches, ms, nextToUpdate3, ip, iHighLimit, dictMode, rep, ll0, lengthToBeat, mls);
850
98.5M
}
851
852
122k
#define ZSTD_BT_GET_ALL_MATCHES_FN(dictMode, mls) ZSTD_btGetAllMatches_##dictMode##_##mls
853
854
#define GEN_ZSTD_BT_GET_ALL_MATCHES_(dictMode, mls)            \
855
    static U32 ZSTD_BT_GET_ALL_MATCHES_FN(dictMode, mls)(      \
856
            ZSTD_match_t* matches,                             \
857
            ZSTD_MatchState_t* ms,                             \
858
            U32* nextToUpdate3,                                \
859
            const BYTE* ip,                                    \
860
            const BYTE* const iHighLimit,                      \
861
            const U32 rep[ZSTD_REP_NUM],                       \
862
            U32 const ll0,                                     \
863
            U32 const lengthToBeat)                            \
864
98.5M
    {                                                          \
865
98.5M
        return ZSTD_btGetAllMatches_internal(                  \
866
98.5M
                matches, ms, nextToUpdate3, ip, iHighLimit,    \
867
98.5M
                rep, ll0, lengthToBeat, ZSTD_##dictMode, mls); \
868
98.5M
    }
zstd_opt.c:ZSTD_btGetAllMatches_noDict_3
Line
Count
Source
864
71.4M
    {                                                          \
865
71.4M
        return ZSTD_btGetAllMatches_internal(                  \
866
71.4M
                matches, ms, nextToUpdate3, ip, iHighLimit,    \
867
71.4M
                rep, ll0, lengthToBeat, ZSTD_##dictMode, mls); \
868
71.4M
    }
zstd_opt.c:ZSTD_btGetAllMatches_noDict_4
Line
Count
Source
864
12.8M
    {                                                          \
865
12.8M
        return ZSTD_btGetAllMatches_internal(                  \
866
12.8M
                matches, ms, nextToUpdate3, ip, iHighLimit,    \
867
12.8M
                rep, ll0, lengthToBeat, ZSTD_##dictMode, mls); \
868
12.8M
    }
zstd_opt.c:ZSTD_btGetAllMatches_noDict_5
Line
Count
Source
864
14.1M
    {                                                          \
865
14.1M
        return ZSTD_btGetAllMatches_internal(                  \
866
14.1M
                matches, ms, nextToUpdate3, ip, iHighLimit,    \
867
14.1M
                rep, ll0, lengthToBeat, ZSTD_##dictMode, mls); \
868
14.1M
    }
Unexecuted instantiation: zstd_opt.c:ZSTD_btGetAllMatches_noDict_6
Unexecuted instantiation: zstd_opt.c:ZSTD_btGetAllMatches_extDict_3
Unexecuted instantiation: zstd_opt.c:ZSTD_btGetAllMatches_extDict_4
Unexecuted instantiation: zstd_opt.c:ZSTD_btGetAllMatches_extDict_5
Unexecuted instantiation: zstd_opt.c:ZSTD_btGetAllMatches_extDict_6
Unexecuted instantiation: zstd_opt.c:ZSTD_btGetAllMatches_dictMatchState_3
Unexecuted instantiation: zstd_opt.c:ZSTD_btGetAllMatches_dictMatchState_4
Unexecuted instantiation: zstd_opt.c:ZSTD_btGetAllMatches_dictMatchState_5
Unexecuted instantiation: zstd_opt.c:ZSTD_btGetAllMatches_dictMatchState_6
869
870
#define GEN_ZSTD_BT_GET_ALL_MATCHES(dictMode)  \
871
    GEN_ZSTD_BT_GET_ALL_MATCHES_(dictMode, 3)  \
872
    GEN_ZSTD_BT_GET_ALL_MATCHES_(dictMode, 4)  \
873
    GEN_ZSTD_BT_GET_ALL_MATCHES_(dictMode, 5)  \
874
    GEN_ZSTD_BT_GET_ALL_MATCHES_(dictMode, 6)
875
876
GEN_ZSTD_BT_GET_ALL_MATCHES(noDict)
877
GEN_ZSTD_BT_GET_ALL_MATCHES(extDict)
878
GEN_ZSTD_BT_GET_ALL_MATCHES(dictMatchState)
879
880
#define ZSTD_BT_GET_ALL_MATCHES_ARRAY(dictMode)  \
881
30.5k
    {                                            \
882
30.5k
        ZSTD_BT_GET_ALL_MATCHES_FN(dictMode, 3), \
883
30.5k
        ZSTD_BT_GET_ALL_MATCHES_FN(dictMode, 4), \
884
30.5k
        ZSTD_BT_GET_ALL_MATCHES_FN(dictMode, 5), \
885
30.5k
        ZSTD_BT_GET_ALL_MATCHES_FN(dictMode, 6)  \
886
30.5k
    }
887
888
static ZSTD_getAllMatchesFn
889
ZSTD_selectBtGetAllMatches(ZSTD_MatchState_t const* ms, ZSTD_dictMode_e const dictMode)
890
10.1k
{
891
10.1k
    ZSTD_getAllMatchesFn const getAllMatchesFns[3][4] = {
892
10.1k
        ZSTD_BT_GET_ALL_MATCHES_ARRAY(noDict),
893
10.1k
        ZSTD_BT_GET_ALL_MATCHES_ARRAY(extDict),
894
10.1k
        ZSTD_BT_GET_ALL_MATCHES_ARRAY(dictMatchState)
895
10.1k
    };
896
10.1k
    U32 const mls = BOUNDED(3, ms->cParams.minMatch, 6);
897
10.1k
    assert((U32)dictMode < 3);
898
10.1k
    assert(mls - 3 < 4);
899
10.1k
    return getAllMatchesFns[(int)dictMode][mls - 3];
900
10.1k
}
901
902
/*************************
903
*  LDM helper functions  *
904
*************************/
905
906
/* Struct containing info needed to make decision about ldm inclusion */
907
typedef struct {
908
    RawSeqStore_t seqStore;   /* External match candidates store for this block */
909
    U32 startPosInBlock;      /* Start position of the current match candidate */
910
    U32 endPosInBlock;        /* End position of the current match candidate */
911
    U32 offset;               /* Offset of the match candidate */
912
} ZSTD_optLdm_t;
913
914
/* ZSTD_optLdm_skipRawSeqStoreBytes():
915
 * Moves forward in @rawSeqStore by @nbBytes,
916
 * which will update the fields 'pos' and 'posInSequence'.
917
 */
918
static void ZSTD_optLdm_skipRawSeqStoreBytes(RawSeqStore_t* rawSeqStore, size_t nbBytes)
919
0
{
920
0
    U32 currPos = (U32)(rawSeqStore->posInSequence + nbBytes);
921
0
    while (currPos && rawSeqStore->pos < rawSeqStore->size) {
922
0
        rawSeq currSeq = rawSeqStore->seq[rawSeqStore->pos];
923
0
        if (currPos >= currSeq.litLength + currSeq.matchLength) {
924
0
            currPos -= currSeq.litLength + currSeq.matchLength;
925
0
            rawSeqStore->pos++;
926
0
        } else {
927
0
            rawSeqStore->posInSequence = currPos;
928
0
            break;
929
0
        }
930
0
    }
931
0
    if (currPos == 0 || rawSeqStore->pos == rawSeqStore->size) {
932
0
        rawSeqStore->posInSequence = 0;
933
0
    }
934
0
}
935
936
/* ZSTD_opt_getNextMatchAndUpdateSeqStore():
937
 * Calculates the beginning and end of the next match in the current block.
938
 * Updates 'pos' and 'posInSequence' of the ldmSeqStore.
939
 */
940
static void
941
ZSTD_opt_getNextMatchAndUpdateSeqStore(ZSTD_optLdm_t* optLdm, U32 currPosInBlock,
942
                                       U32 blockBytesRemaining)
943
10.1k
{
944
10.1k
    rawSeq currSeq;
945
10.1k
    U32 currBlockEndPos;
946
10.1k
    U32 literalsBytesRemaining;
947
10.1k
    U32 matchBytesRemaining;
948
949
    /* Setting match end position to MAX to ensure we never use an LDM during this block */
950
10.1k
    if (optLdm->seqStore.size == 0 || optLdm->seqStore.pos >= optLdm->seqStore.size) {
951
10.1k
        optLdm->startPosInBlock = UINT_MAX;
952
10.1k
        optLdm->endPosInBlock = UINT_MAX;
953
10.1k
        return;
954
10.1k
    }
955
    /* Calculate appropriate bytes left in matchLength and litLength
956
     * after adjusting based on ldmSeqStore->posInSequence */
957
0
    currSeq = optLdm->seqStore.seq[optLdm->seqStore.pos];
958
0
    assert(optLdm->seqStore.posInSequence <= currSeq.litLength + currSeq.matchLength);
959
0
    currBlockEndPos = currPosInBlock + blockBytesRemaining;
960
0
    literalsBytesRemaining = (optLdm->seqStore.posInSequence < currSeq.litLength) ?
961
0
            currSeq.litLength - (U32)optLdm->seqStore.posInSequence :
962
0
            0;
963
0
    matchBytesRemaining = (literalsBytesRemaining == 0) ?
964
0
            currSeq.matchLength - ((U32)optLdm->seqStore.posInSequence - currSeq.litLength) :
965
0
            currSeq.matchLength;
966
967
    /* If there are more literal bytes than bytes remaining in block, no ldm is possible */
968
0
    if (literalsBytesRemaining >= blockBytesRemaining) {
969
0
        optLdm->startPosInBlock = UINT_MAX;
970
0
        optLdm->endPosInBlock = UINT_MAX;
971
0
        ZSTD_optLdm_skipRawSeqStoreBytes(&optLdm->seqStore, blockBytesRemaining);
972
0
        return;
973
0
    }
974
975
    /* Matches may be < minMatch by this process. In that case, we will reject them
976
       when we are deciding whether or not to add the ldm */
977
0
    optLdm->startPosInBlock = currPosInBlock + literalsBytesRemaining;
978
0
    optLdm->endPosInBlock = optLdm->startPosInBlock + matchBytesRemaining;
979
0
    optLdm->offset = currSeq.offset;
980
981
0
    if (optLdm->endPosInBlock > currBlockEndPos) {
982
        /* Match ends after the block ends, we can't use the whole match */
983
0
        optLdm->endPosInBlock = currBlockEndPos;
984
0
        ZSTD_optLdm_skipRawSeqStoreBytes(&optLdm->seqStore, currBlockEndPos - currPosInBlock);
985
0
    } else {
986
        /* Consume nb of bytes equal to size of sequence left */
987
0
        ZSTD_optLdm_skipRawSeqStoreBytes(&optLdm->seqStore, literalsBytesRemaining + matchBytesRemaining);
988
0
    }
989
0
}
990
991
/* ZSTD_optLdm_maybeAddMatch():
992
 * Adds a match if it's long enough,
993
 * based on it's 'matchStartPosInBlock' and 'matchEndPosInBlock',
994
 * into 'matches'. Maintains the correct ordering of 'matches'.
995
 */
996
static void ZSTD_optLdm_maybeAddMatch(ZSTD_match_t* matches, U32* nbMatches,
997
                                      const ZSTD_optLdm_t* optLdm, U32 currPosInBlock,
998
                                      U32 minMatch)
999
0
{
1000
0
    U32 const posDiff = currPosInBlock - optLdm->startPosInBlock;
1001
    /* Note: ZSTD_match_t actually contains offBase and matchLength (before subtracting MINMATCH) */
1002
0
    U32 const candidateMatchLength = optLdm->endPosInBlock - optLdm->startPosInBlock - posDiff;
1003
1004
    /* Ensure that current block position is not outside of the match */
1005
0
    if (currPosInBlock < optLdm->startPosInBlock
1006
0
      || currPosInBlock >= optLdm->endPosInBlock
1007
0
      || candidateMatchLength < minMatch) {
1008
0
        return;
1009
0
    }
1010
1011
0
    if (*nbMatches == 0 || ((candidateMatchLength > matches[*nbMatches-1].len) && *nbMatches < ZSTD_OPT_NUM)) {
1012
0
        U32 const candidateOffBase = OFFSET_TO_OFFBASE(optLdm->offset);
1013
0
        DEBUGLOG(6, "ZSTD_optLdm_maybeAddMatch(): Adding ldm candidate match (offBase: %u matchLength %u) at block position=%u",
1014
0
                 candidateOffBase, candidateMatchLength, currPosInBlock);
1015
0
        matches[*nbMatches].len = candidateMatchLength;
1016
0
        matches[*nbMatches].off = candidateOffBase;
1017
0
        (*nbMatches)++;
1018
0
    }
1019
0
}
1020
1021
/* ZSTD_optLdm_processMatchCandidate():
1022
 * Wrapper function to update ldm seq store and call ldm functions as necessary.
1023
 */
1024
static void
1025
ZSTD_optLdm_processMatchCandidate(ZSTD_optLdm_t* optLdm,
1026
                                  ZSTD_match_t* matches, U32* nbMatches,
1027
                                  U32 currPosInBlock, U32 remainingBytes,
1028
                                  U32 minMatch)
1029
98.5M
{
1030
98.5M
    if (optLdm->seqStore.size == 0 || optLdm->seqStore.pos >= optLdm->seqStore.size) {
1031
98.5M
        return;
1032
98.5M
    }
1033
1034
0
    if (currPosInBlock >= optLdm->endPosInBlock) {
1035
0
        if (currPosInBlock > optLdm->endPosInBlock) {
1036
            /* The position at which ZSTD_optLdm_processMatchCandidate() is called is not necessarily
1037
             * at the end of a match from the ldm seq store, and will often be some bytes
1038
             * over beyond matchEndPosInBlock. As such, we need to correct for these "overshoots"
1039
             */
1040
0
            U32 const posOvershoot = currPosInBlock - optLdm->endPosInBlock;
1041
0
            ZSTD_optLdm_skipRawSeqStoreBytes(&optLdm->seqStore, posOvershoot);
1042
0
        }
1043
0
        ZSTD_opt_getNextMatchAndUpdateSeqStore(optLdm, currPosInBlock, remainingBytes);
1044
0
    }
1045
0
    ZSTD_optLdm_maybeAddMatch(matches, nbMatches, optLdm, currPosInBlock, minMatch);
1046
0
}
1047
1048
1049
/*-*******************************
1050
*  Optimal parser
1051
*********************************/
1052
1053
#if 0 /* debug */
1054
1055
static void
1056
listStats(const U32* table, int lastEltID)
1057
{
1058
    int const nbElts = lastEltID + 1;
1059
    int enb;
1060
    for (enb=0; enb < nbElts; enb++) {
1061
        (void)table;
1062
        /* RAWLOG(2, "%3i:%3i,  ", enb, table[enb]); */
1063
        RAWLOG(2, "%4i,", table[enb]);
1064
    }
1065
    RAWLOG(2, " \n");
1066
}
1067
1068
#endif
1069
1070
79.2M
#define LIT_PRICE(_p) (int)ZSTD_rawLiteralsCost(_p, 1, optStatePtr, optLevel)
1071
252M
#define LL_PRICE(_l) (int)ZSTD_litLengthPrice(_l, optStatePtr, optLevel)
1072
87.9M
#define LL_INCPRICE(_l) (LL_PRICE(_l) - LL_PRICE(_l-1))
1073
1074
FORCE_INLINE_TEMPLATE
1075
ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
1076
size_t
1077
ZSTD_compressBlock_opt_generic(ZSTD_MatchState_t* ms,
1078
                               SeqStore_t* seqStore,
1079
                               U32 rep[ZSTD_REP_NUM],
1080
                         const void* src, size_t srcSize,
1081
                         const int optLevel,
1082
                         const ZSTD_dictMode_e dictMode)
1083
10.1k
{
1084
10.1k
    optState_t* const optStatePtr = &ms->opt;
1085
10.1k
    const BYTE* const istart = (const BYTE*)src;
1086
10.1k
    const BYTE* ip = istart;
1087
10.1k
    const BYTE* anchor = istart;
1088
10.1k
    const BYTE* const iend = istart + srcSize;
1089
10.1k
    const BYTE* const ilimit = iend - 8;
1090
10.1k
    const BYTE* const base = ms->window.base;
1091
10.1k
    const BYTE* const prefixStart = base + ms->window.dictLimit;
1092
10.1k
    const ZSTD_compressionParameters* const cParams = &ms->cParams;
1093
1094
10.1k
    ZSTD_getAllMatchesFn getAllMatches = ZSTD_selectBtGetAllMatches(ms, dictMode);
1095
1096
10.1k
    U32 const sufficient_len = MIN(cParams->targetLength, ZSTD_OPT_NUM -1);
1097
10.1k
    U32 const minMatch = (cParams->minMatch == 3) ? 3 : 4;
1098
10.1k
    U32 nextToUpdate3 = ms->nextToUpdate;
1099
1100
10.1k
    ZSTD_optimal_t* const opt = optStatePtr->priceTable;
1101
10.1k
    ZSTD_match_t* const matches = optStatePtr->matchTable;
1102
10.1k
    ZSTD_optimal_t lastStretch;
1103
10.1k
    ZSTD_optLdm_t optLdm;
1104
1105
10.1k
    ZSTD_memset(&lastStretch, 0, sizeof(ZSTD_optimal_t));
1106
1107
10.1k
    optLdm.seqStore = ms->ldmSeqStore ? *ms->ldmSeqStore : kNullRawSeqStore;
1108
10.1k
    optLdm.endPosInBlock = optLdm.startPosInBlock = optLdm.offset = 0;
1109
10.1k
    ZSTD_opt_getNextMatchAndUpdateSeqStore(&optLdm, (U32)(ip-istart), (U32)(iend-ip));
1110
1111
    /* init */
1112
10.1k
    DEBUGLOG(5, "ZSTD_compressBlock_opt_generic: current=%u, prefix=%u, nextToUpdate=%u",
1113
10.1k
                (U32)(ip - base), ms->window.dictLimit, ms->nextToUpdate);
1114
10.1k
    assert(optLevel <= 2);
1115
10.1k
    ZSTD_rescaleFreqs(optStatePtr, (const BYTE*)src, srcSize, optLevel);
1116
10.1k
    ip += (ip==prefixStart);
1117
1118
    /* Match Loop */
1119
39.0M
    while (ip < ilimit) {
1120
39.0M
        U32 cur, last_pos = 0;
1121
1122
        /* find first match */
1123
39.0M
        {   U32 const litlen = (U32)(ip - anchor);
1124
39.0M
            U32 const ll0 = !litlen;
1125
39.0M
            U32 nbMatches = getAllMatches(matches, ms, &nextToUpdate3, ip, iend, rep, ll0, minMatch);
1126
39.0M
            ZSTD_optLdm_processMatchCandidate(&optLdm, matches, &nbMatches,
1127
39.0M
                                              (U32)(ip-istart), (U32)(iend-ip),
1128
39.0M
                                              minMatch);
1129
39.0M
            if (!nbMatches) {
1130
35.2M
                DEBUGLOG(8, "no match found at cPos %u", (unsigned)(ip-istart));
1131
35.2M
                ip++;
1132
35.2M
                continue;
1133
35.2M
            }
1134
1135
            /* Match found: let's store this solution, and eventually find more candidates.
1136
             * During this forward pass, @opt is used to store stretches,
1137
             * defined as "a match followed by N literals".
1138
             * Note how this is different from a Sequence, which is "N literals followed by a match".
1139
             * Storing stretches allows us to store different match predecessors
1140
             * for each literal position part of a literals run. */
1141
1142
            /* initialize opt[0] */
1143
3.71M
            opt[0].mlen = 0;  /* there are only literals so far */
1144
3.71M
            opt[0].litlen = litlen;
1145
            /* No need to include the actual price of the literals before the first match
1146
             * because it is static for the duration of the forward pass, and is included
1147
             * in every subsequent price. But, we include the literal length because
1148
             * the cost variation of litlen depends on the value of litlen.
1149
             */
1150
3.71M
            opt[0].price = LL_PRICE(litlen);
1151
3.71M
            ZSTD_STATIC_ASSERT(sizeof(opt[0].rep[0]) == sizeof(rep[0]));
1152
3.71M
            ZSTD_memcpy(&opt[0].rep, rep, sizeof(opt[0].rep));
1153
1154
            /* large match -> immediate encoding */
1155
3.71M
            {   U32 const maxML = matches[nbMatches-1].len;
1156
3.71M
                U32 const maxOffBase = matches[nbMatches-1].off;
1157
3.71M
                DEBUGLOG(6, "found %u matches of maxLength=%u and maxOffBase=%u at cPos=%u => start new series",
1158
3.71M
                            nbMatches, maxML, maxOffBase, (U32)(ip-prefixStart));
1159
1160
3.71M
                if (maxML > sufficient_len) {
1161
141k
                    lastStretch.litlen = 0;
1162
141k
                    lastStretch.mlen = maxML;
1163
141k
                    lastStretch.off = maxOffBase;
1164
141k
                    DEBUGLOG(6, "large match (%u>%u) => immediate encoding",
1165
141k
                                maxML, sufficient_len);
1166
141k
                    cur = 0;
1167
141k
                    last_pos = maxML;
1168
141k
                    goto _shortestPath;
1169
141k
            }   }
1170
1171
            /* set prices for first matches starting position == 0 */
1172
3.57M
            assert(opt[0].price >= 0);
1173
3.57M
            {   U32 pos;
1174
3.57M
                U32 matchNb;
1175
11.9M
                for (pos = 1; pos < minMatch; pos++) {
1176
8.37M
                    opt[pos].price = ZSTD_MAX_PRICE;
1177
8.37M
                    opt[pos].mlen = 0;
1178
8.37M
                    opt[pos].litlen = litlen + pos;
1179
8.37M
                }
1180
8.24M
                for (matchNb = 0; matchNb < nbMatches; matchNb++) {
1181
4.66M
                    U32 const offBase = matches[matchNb].off;
1182
4.66M
                    U32 const end = matches[matchNb].len;
1183
17.5M
                    for ( ; pos <= end ; pos++ ) {
1184
12.9M
                        int const matchPrice = (int)ZSTD_getMatchPrice(offBase, pos, optStatePtr, optLevel);
1185
12.9M
                        int const sequencePrice = opt[0].price + matchPrice;
1186
12.9M
                        DEBUGLOG(7, "rPos:%u => set initial price : %.2f",
1187
12.9M
                                    pos, ZSTD_fCost(sequencePrice));
1188
12.9M
                        opt[pos].mlen = pos;
1189
12.9M
                        opt[pos].off = offBase;
1190
12.9M
                        opt[pos].litlen = 0; /* end of match */
1191
12.9M
                        opt[pos].price = sequencePrice + LL_PRICE(0);
1192
12.9M
                    }
1193
4.66M
                }
1194
3.57M
                last_pos = pos-1;
1195
3.57M
                opt[pos].price = ZSTD_MAX_PRICE;
1196
3.57M
            }
1197
3.57M
        }
1198
1199
        /* check further positions */
1200
70.2M
        for (cur = 1; cur <= last_pos; cur++) {
1201
70.2M
            const BYTE* const inr = ip + cur;
1202
70.2M
            assert(cur <= ZSTD_OPT_NUM);
1203
70.2M
            DEBUGLOG(7, "cPos:%i==rPos:%u", (int)(inr-istart), cur);
1204
1205
            /* Fix current position with one literal if cheaper */
1206
70.2M
            {   U32 const litlen = opt[cur-1].litlen + 1;
1207
70.2M
                int const price = opt[cur-1].price
1208
70.2M
                                + LIT_PRICE(ip+cur-1)
1209
70.2M
                                + LL_INCPRICE(litlen);
1210
70.2M
                assert(price < 1000000000); /* overflow check */
1211
70.2M
                if (price <= opt[cur].price) {
1212
27.2M
                    ZSTD_optimal_t const prevMatch = opt[cur];
1213
27.2M
                    DEBUGLOG(7, "cPos:%i==rPos:%u : better price (%.2f<=%.2f) using literal (ll==%u) (hist:%u,%u,%u)",
1214
27.2M
                                (int)(inr-istart), cur, ZSTD_fCost(price), ZSTD_fCost(opt[cur].price), litlen,
1215
27.2M
                                opt[cur-1].rep[0], opt[cur-1].rep[1], opt[cur-1].rep[2]);
1216
27.2M
                    opt[cur] = opt[cur-1];
1217
27.2M
                    opt[cur].litlen = litlen;
1218
27.2M
                    opt[cur].price = price;
1219
27.2M
                    if ( (optLevel >= 1) /* additional check only for higher modes */
1220
27.2M
                      && (prevMatch.litlen == 0) /* replace a match */
1221
27.2M
                      && (LL_INCPRICE(1) < 0) /* ll1 is cheaper than ll0 */
1222
27.2M
                      && LIKELY(ip + cur < iend)
1223
27.2M
                    ) {
1224
                        /* check next position, in case it would be cheaper */
1225
4.51M
                        int with1literal = prevMatch.price + LIT_PRICE(ip+cur) + LL_INCPRICE(1);
1226
4.51M
                        int withMoreLiterals = price + LIT_PRICE(ip+cur) + LL_INCPRICE(litlen+1);
1227
4.51M
                        DEBUGLOG(7, "then at next rPos %u : match+1lit %.2f vs %ulits %.2f",
1228
4.51M
                                cur+1, ZSTD_fCost(with1literal), litlen+1, ZSTD_fCost(withMoreLiterals));
1229
4.51M
                        if ( (with1literal < withMoreLiterals)
1230
4.51M
                          && (with1literal < opt[cur+1].price) ) {
1231
                            /* update offset history - before it disappears */
1232
558k
                            U32 const prev = cur - prevMatch.mlen;
1233
558k
                            Repcodes_t const newReps = ZSTD_newRep(opt[prev].rep, prevMatch.off, opt[prev].litlen==0);
1234
558k
                            assert(cur >= prevMatch.mlen);
1235
558k
                            DEBUGLOG(7, "==> match+1lit is cheaper (%.2f < %.2f) (hist:%u,%u,%u) !",
1236
558k
                                        ZSTD_fCost(with1literal), ZSTD_fCost(withMoreLiterals),
1237
558k
                                        newReps.rep[0], newReps.rep[1], newReps.rep[2] );
1238
558k
                            opt[cur+1] = prevMatch;  /* mlen & offbase */
1239
558k
                            ZSTD_memcpy(opt[cur+1].rep, &newReps, sizeof(Repcodes_t));
1240
558k
                            opt[cur+1].litlen = 1;
1241
558k
                            opt[cur+1].price = with1literal;
1242
558k
                            if (last_pos < cur+1) last_pos = cur+1;
1243
558k
                        }
1244
4.51M
                    }
1245
42.9M
                } else {
1246
42.9M
                    DEBUGLOG(7, "cPos:%i==rPos:%u : literal would cost more (%.2f>%.2f)",
1247
42.9M
                                (int)(inr-istart), cur, ZSTD_fCost(price), ZSTD_fCost(opt[cur].price));
1248
42.9M
                }
1249
70.2M
            }
1250
1251
            /* Offset history is not updated during match comparison.
1252
             * Do it here, now that the match is selected and confirmed.
1253
             */
1254
70.2M
            ZSTD_STATIC_ASSERT(sizeof(opt[cur].rep) == sizeof(Repcodes_t));
1255
70.2M
            assert(cur >= opt[cur].mlen);
1256
70.2M
            if (opt[cur].litlen == 0) {
1257
                /* just finished a match => alter offset history */
1258
42.4M
                U32 const prev = cur - opt[cur].mlen;
1259
42.4M
                Repcodes_t const newReps = ZSTD_newRep(opt[prev].rep, opt[cur].off, opt[prev].litlen==0);
1260
42.4M
                ZSTD_memcpy(opt[cur].rep, &newReps, sizeof(Repcodes_t));
1261
42.4M
            }
1262
1263
            /* last match must start at a minimum distance of 8 from oend */
1264
70.2M
            if (inr > ilimit) continue;
1265
1266
70.1M
            if (cur == last_pos) break;
1267
1268
66.6M
            if ( (optLevel==0) /*static_test*/
1269
66.6M
              && (opt[cur+1].price <= opt[cur].price + (BITCOST_MULTIPLIER/2)) ) {
1270
7.17M
                DEBUGLOG(7, "skip current position : next rPos(%u) price is cheaper", cur+1);
1271
7.17M
                continue;  /* skip unpromising positions; about ~+6% speed, -0.01 ratio */
1272
7.17M
            }
1273
1274
59.4M
            assert(opt[cur].price >= 0);
1275
59.4M
            {   U32 const ll0 = (opt[cur].litlen == 0);
1276
59.4M
                int const previousPrice = opt[cur].price;
1277
59.4M
                int const basePrice = previousPrice + LL_PRICE(0);
1278
59.4M
                U32 nbMatches = getAllMatches(matches, ms, &nextToUpdate3, inr, iend, opt[cur].rep, ll0, minMatch);
1279
59.4M
                U32 matchNb;
1280
1281
59.4M
                ZSTD_optLdm_processMatchCandidate(&optLdm, matches, &nbMatches,
1282
59.4M
                                                  (U32)(inr-istart), (U32)(iend-inr),
1283
59.4M
                                                  minMatch);
1284
1285
59.4M
                if (!nbMatches) {
1286
14.0M
                    DEBUGLOG(7, "rPos:%u : no match found", cur);
1287
14.0M
                    continue;
1288
14.0M
                }
1289
1290
45.4M
                {   U32 const longestML = matches[nbMatches-1].len;
1291
45.4M
                    DEBUGLOG(7, "cPos:%i==rPos:%u, found %u matches, of longest ML=%u",
1292
45.4M
                                (int)(inr-istart), cur, nbMatches, longestML);
1293
1294
45.4M
                    if ( (longestML > sufficient_len)
1295
45.4M
                      || (cur + longestML >= ZSTD_OPT_NUM)
1296
45.4M
                      || (ip + cur + longestML >= iend) ) {
1297
53.7k
                        lastStretch.mlen = longestML;
1298
53.7k
                        lastStretch.off = matches[nbMatches-1].off;
1299
53.7k
                        lastStretch.litlen = 0;
1300
53.7k
                        last_pos = cur + longestML;
1301
53.7k
                        goto _shortestPath;
1302
53.7k
                }   }
1303
1304
                /* set prices using matches found at position == cur */
1305
113M
                for (matchNb = 0; matchNb < nbMatches; matchNb++) {
1306
68.4M
                    U32 const offset = matches[matchNb].off;
1307
68.4M
                    U32 const lastML = matches[matchNb].len;
1308
68.4M
                    U32 const startML = (matchNb>0) ? matches[matchNb-1].len+1 : minMatch;
1309
68.4M
                    U32 mlen;
1310
1311
68.4M
                    DEBUGLOG(7, "testing match %u => offBase=%4u, mlen=%2u, llen=%2u",
1312
68.4M
                                matchNb, matches[matchNb].off, lastML, opt[cur].litlen);
1313
1314
514M
                    for (mlen = lastML; mlen >= startML; mlen--) {  /* scan downward */
1315
453M
                        U32 const pos = cur + mlen;
1316
453M
                        int const price = basePrice + (int)ZSTD_getMatchPrice(offset, mlen, optStatePtr, optLevel);
1317
1318
453M
                        if ((pos > last_pos) || (price < opt[pos].price)) {
1319
71.8M
                            DEBUGLOG(7, "rPos:%u (ml=%2u) => new better price (%.2f<%.2f)",
1320
71.8M
                                        pos, mlen, ZSTD_fCost(price), ZSTD_fCost(opt[pos].price));
1321
121M
                            while (last_pos < pos) {
1322
                                /* fill empty positions, for future comparisons */
1323
49.7M
                                last_pos++;
1324
49.7M
                                opt[last_pos].price = ZSTD_MAX_PRICE;
1325
49.7M
                                opt[last_pos].litlen = !0;  /* just needs to be != 0, to mean "not an end of match" */
1326
49.7M
                            }
1327
71.8M
                            opt[pos].mlen = mlen;
1328
71.8M
                            opt[pos].off = offset;
1329
71.8M
                            opt[pos].litlen = 0;
1330
71.8M
                            opt[pos].price = price;
1331
381M
                        } else {
1332
381M
                            DEBUGLOG(7, "rPos:%u (ml=%2u) => new price is worse (%.2f>=%.2f)",
1333
381M
                                        pos, mlen, ZSTD_fCost(price), ZSTD_fCost(opt[pos].price));
1334
381M
                            if (optLevel==0) break;  /* early update abort; gets ~+10% speed for about -0.01 ratio loss */
1335
381M
                        }
1336
453M
            }   }   }
1337
45.3M
            opt[last_pos+1].price = ZSTD_MAX_PRICE;
1338
45.3M
        }  /* for (cur = 1; cur <= last_pos; cur++) */
1339
1340
3.52M
        lastStretch = opt[last_pos];
1341
3.52M
        assert(cur >= lastStretch.mlen);
1342
3.52M
        cur = last_pos - lastStretch.mlen;
1343
1344
3.71M
_shortestPath:   /* cur, last_pos, best_mlen, best_off have to be set */
1345
3.71M
        assert(opt[0].mlen == 0);
1346
3.71M
        assert(last_pos >= lastStretch.mlen);
1347
3.71M
        assert(cur == last_pos - lastStretch.mlen);
1348
1349
3.71M
        if (lastStretch.mlen==0) {
1350
            /* no solution : all matches have been converted into literals */
1351
345k
            assert(lastStretch.litlen == (ip - anchor) + last_pos);
1352
345k
            ip += last_pos;
1353
345k
            continue;
1354
345k
        }
1355
3.37M
        assert(lastStretch.off > 0);
1356
1357
        /* Update offset history */
1358
3.37M
        if (lastStretch.litlen == 0) {
1359
            /* finishing on a match : update offset history */
1360
2.69M
            Repcodes_t const reps = ZSTD_newRep(opt[cur].rep, lastStretch.off, opt[cur].litlen==0);
1361
2.69M
            ZSTD_memcpy(rep, &reps, sizeof(Repcodes_t));
1362
2.69M
        } else {
1363
678k
            ZSTD_memcpy(rep, lastStretch.rep, sizeof(Repcodes_t));
1364
678k
            assert(cur >= lastStretch.litlen);
1365
678k
            cur -= lastStretch.litlen;
1366
678k
        }
1367
1368
        /* Let's write the shortest path solution.
1369
         * It is stored in @opt in reverse order,
1370
         * starting from @storeEnd (==cur+2),
1371
         * effectively partially @opt overwriting.
1372
         * Content is changed too:
1373
         * - So far, @opt stored stretches, aka a match followed by literals
1374
         * - Now, it will store sequences, aka literals followed by a match
1375
         */
1376
3.37M
        {   U32 const storeEnd = cur + 2;
1377
3.37M
            U32 storeStart = storeEnd;
1378
3.37M
            U32 stretchPos = cur;
1379
1380
3.37M
            DEBUGLOG(6, "start reverse traversal (last_pos:%u, cur:%u)",
1381
3.37M
                        last_pos, cur); (void)last_pos;
1382
3.37M
            assert(storeEnd < ZSTD_OPT_SIZE);
1383
3.37M
            DEBUGLOG(6, "last stretch copied into pos=%u (llen=%u,mlen=%u,ofc=%u)",
1384
3.37M
                        storeEnd, lastStretch.litlen, lastStretch.mlen, lastStretch.off);
1385
3.37M
            if (lastStretch.litlen > 0) {
1386
                /* last "sequence" is unfinished: just a bunch of literals */
1387
678k
                opt[storeEnd].litlen = lastStretch.litlen;
1388
678k
                opt[storeEnd].mlen = 0;
1389
678k
                storeStart = storeEnd-1;
1390
678k
                opt[storeStart] = lastStretch;
1391
678k
            } {
1392
3.37M
                opt[storeEnd] = lastStretch;  /* note: litlen will be fixed */
1393
3.37M
                storeStart = storeEnd;
1394
3.37M
            }
1395
12.1M
            while (1) {
1396
12.1M
                ZSTD_optimal_t nextStretch = opt[stretchPos];
1397
12.1M
                opt[storeStart].litlen = nextStretch.litlen;
1398
12.1M
                DEBUGLOG(6, "selected sequence (llen=%u,mlen=%u,ofc=%u)",
1399
12.1M
                            opt[storeStart].litlen, opt[storeStart].mlen, opt[storeStart].off);
1400
12.1M
                if (nextStretch.mlen == 0) {
1401
                    /* reaching beginning of segment */
1402
3.37M
                    break;
1403
3.37M
                }
1404
8.74M
                storeStart--;
1405
8.74M
                opt[storeStart] = nextStretch; /* note: litlen will be fixed */
1406
8.74M
                assert(nextStretch.litlen + nextStretch.mlen <= stretchPos);
1407
8.74M
                stretchPos -= nextStretch.litlen + nextStretch.mlen;
1408
8.74M
            }
1409
1410
            /* save sequences */
1411
3.37M
            DEBUGLOG(6, "sending selected sequences into seqStore");
1412
3.37M
            {   U32 storePos;
1413
15.4M
                for (storePos=storeStart; storePos <= storeEnd; storePos++) {
1414
12.1M
                    U32 const llen = opt[storePos].litlen;
1415
12.1M
                    U32 const mlen = opt[storePos].mlen;
1416
12.1M
                    U32 const offBase = opt[storePos].off;
1417
12.1M
                    U32 const advance = llen + mlen;
1418
12.1M
                    DEBUGLOG(6, "considering seq starting at %i, llen=%u, mlen=%u",
1419
12.1M
                                (int)(anchor - istart), (unsigned)llen, (unsigned)mlen);
1420
1421
12.1M
                    if (mlen==0) {  /* only literals => must be last "sequence", actually starting a new stream of sequences */
1422
0
                        assert(storePos == storeEnd);   /* must be last sequence */
1423
0
                        ip = anchor + llen;     /* last "sequence" is a bunch of literals => don't progress anchor */
1424
0
                        continue;   /* will finish */
1425
0
                    }
1426
1427
12.1M
                    assert(anchor + llen <= iend);
1428
12.1M
                    ZSTD_updateStats(optStatePtr, llen, anchor, offBase, mlen);
1429
12.1M
                    ZSTD_storeSeq(seqStore, llen, anchor, iend, offBase, mlen);
1430
12.1M
                    anchor += advance;
1431
12.1M
                    ip = anchor;
1432
12.1M
            }   }
1433
3.37M
            DEBUGLOG(7, "new offset history : %u, %u, %u", rep[0], rep[1], rep[2]);
1434
1435
            /* update all costs */
1436
3.37M
            ZSTD_setBasePrices(optStatePtr, optLevel);
1437
3.37M
        }
1438
3.37M
    }   /* while (ip < ilimit) */
1439
1440
    /* Return the last literals size */
1441
10.1k
    return (size_t)(iend - anchor);
1442
10.1k
}
1443
#endif /* build exclusions */
1444
1445
#ifndef ZSTD_EXCLUDE_BTOPT_BLOCK_COMPRESSOR
1446
static size_t ZSTD_compressBlock_opt0(
1447
        ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1448
        const void* src, size_t srcSize, const ZSTD_dictMode_e dictMode)
1449
4.09k
{
1450
4.09k
    return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 0 /* optLevel */, dictMode);
1451
4.09k
}
1452
#endif
1453
1454
#ifndef ZSTD_EXCLUDE_BTULTRA_BLOCK_COMPRESSOR
1455
static size_t ZSTD_compressBlock_opt2(
1456
        ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1457
        const void* src, size_t srcSize, const ZSTD_dictMode_e dictMode)
1458
6.07k
{
1459
6.07k
    return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 2 /* optLevel */, dictMode);
1460
6.07k
}
1461
#endif
1462
1463
#ifndef ZSTD_EXCLUDE_BTOPT_BLOCK_COMPRESSOR
1464
size_t ZSTD_compressBlock_btopt(
1465
        ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1466
        const void* src, size_t srcSize)
1467
4.09k
{
1468
4.09k
    DEBUGLOG(5, "ZSTD_compressBlock_btopt");
1469
4.09k
    return ZSTD_compressBlock_opt0(ms, seqStore, rep, src, srcSize, ZSTD_noDict);
1470
4.09k
}
1471
#endif
1472
1473
1474
1475
1476
#ifndef ZSTD_EXCLUDE_BTULTRA_BLOCK_COMPRESSOR
1477
/* ZSTD_initStats_ultra():
1478
 * make a first compression pass, just to seed stats with more accurate starting values.
1479
 * only works on first block, with no dictionary and no ldm.
1480
 * this function cannot error out, its narrow contract must be respected.
1481
 */
1482
static
1483
ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
1484
void ZSTD_initStats_ultra(ZSTD_MatchState_t* ms,
1485
                          SeqStore_t* seqStore,
1486
                          U32 rep[ZSTD_REP_NUM],
1487
                    const void* src, size_t srcSize)
1488
1.68k
{
1489
1.68k
    U32 tmpRep[ZSTD_REP_NUM];  /* updated rep codes will sink here */
1490
1.68k
    ZSTD_memcpy(tmpRep, rep, sizeof(tmpRep));
1491
1492
1.68k
    DEBUGLOG(4, "ZSTD_initStats_ultra (srcSize=%zu)", srcSize);
1493
1.68k
    assert(ms->opt.litLengthSum == 0);    /* first block */
1494
1.68k
    assert(seqStore->sequences == seqStore->sequencesStart);   /* no ldm */
1495
1.68k
    assert(ms->window.dictLimit == ms->window.lowLimit);   /* no dictionary */
1496
1.68k
    assert(ms->window.dictLimit - ms->nextToUpdate <= 1);  /* no prefix (note: intentional overflow, defined as 2-complement) */
1497
1498
1.68k
    ZSTD_compressBlock_opt2(ms, seqStore, tmpRep, src, srcSize, ZSTD_noDict);   /* generate stats into ms->opt*/
1499
1500
    /* invalidate first scan from history, only keep entropy stats */
1501
1.68k
    ZSTD_resetSeqStore(seqStore);
1502
1.68k
    ms->window.base -= srcSize;
1503
1.68k
    ms->window.dictLimit += (U32)srcSize;
1504
1.68k
    ms->window.lowLimit = ms->window.dictLimit;
1505
1.68k
    ms->nextToUpdate = ms->window.dictLimit;
1506
1507
1.68k
}
1508
1509
size_t ZSTD_compressBlock_btultra(
1510
        ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1511
        const void* src, size_t srcSize)
1512
909
{
1513
909
    DEBUGLOG(5, "ZSTD_compressBlock_btultra (srcSize=%zu)", srcSize);
1514
909
    return ZSTD_compressBlock_opt2(ms, seqStore, rep, src, srcSize, ZSTD_noDict);
1515
909
}
1516
1517
size_t ZSTD_compressBlock_btultra2(
1518
        ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1519
        const void* src, size_t srcSize)
1520
3.48k
{
1521
3.48k
    U32 const curr = (U32)((const BYTE*)src - ms->window.base);
1522
3.48k
    DEBUGLOG(5, "ZSTD_compressBlock_btultra2 (srcSize=%zu)", srcSize);
1523
1524
    /* 2-passes strategy:
1525
     * this strategy makes a first pass over first block to collect statistics
1526
     * in order to seed next round's statistics with it.
1527
     * After 1st pass, function forgets history, and starts a new block.
1528
     * Consequently, this can only work if no data has been previously loaded in tables,
1529
     * aka, no dictionary, no prefix, no ldm preprocessing.
1530
     * The compression ratio gain is generally small (~0.5% on first block),
1531
     * the cost is 2x cpu time on first block. */
1532
3.48k
    assert(srcSize <= ZSTD_BLOCKSIZE_MAX);
1533
3.48k
    if ( (ms->opt.litLengthSum==0)   /* first block */
1534
3.48k
      && (seqStore->sequences == seqStore->sequencesStart)  /* no ldm */
1535
3.48k
      && (ms->window.dictLimit == ms->window.lowLimit)   /* no dictionary */
1536
3.48k
      && (curr == ms->window.dictLimit)    /* start of frame, nothing already loaded nor skipped */
1537
3.48k
      && (srcSize > ZSTD_PREDEF_THRESHOLD) /* input large enough to not employ default stats */
1538
3.48k
      ) {
1539
1.68k
        ZSTD_initStats_ultra(ms, seqStore, rep, src, srcSize);
1540
1.68k
    }
1541
1542
3.48k
    return ZSTD_compressBlock_opt2(ms, seqStore, rep, src, srcSize, ZSTD_noDict);
1543
3.48k
}
1544
#endif
1545
1546
#ifndef ZSTD_EXCLUDE_BTOPT_BLOCK_COMPRESSOR
1547
size_t ZSTD_compressBlock_btopt_dictMatchState(
1548
        ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1549
        const void* src, size_t srcSize)
1550
0
{
1551
0
    return ZSTD_compressBlock_opt0(ms, seqStore, rep, src, srcSize, ZSTD_dictMatchState);
1552
0
}
1553
1554
size_t ZSTD_compressBlock_btopt_extDict(
1555
        ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1556
        const void* src, size_t srcSize)
1557
0
{
1558
0
    return ZSTD_compressBlock_opt0(ms, seqStore, rep, src, srcSize, ZSTD_extDict);
1559
0
}
1560
#endif
1561
1562
#ifndef ZSTD_EXCLUDE_BTULTRA_BLOCK_COMPRESSOR
1563
size_t ZSTD_compressBlock_btultra_dictMatchState(
1564
        ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1565
        const void* src, size_t srcSize)
1566
0
{
1567
0
    return ZSTD_compressBlock_opt2(ms, seqStore, rep, src, srcSize, ZSTD_dictMatchState);
1568
0
}
1569
1570
size_t ZSTD_compressBlock_btultra_extDict(
1571
        ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1572
        const void* src, size_t srcSize)
1573
0
{
1574
0
    return ZSTD_compressBlock_opt2(ms, seqStore, rep, src, srcSize, ZSTD_extDict);
1575
0
}
1576
#endif
1577
1578
/* note : no btultra2 variant for extDict nor dictMatchState,
1579
 * because btultra2 is not meant to work with dictionaries
1580
 * and is only specific for the first block (no prefix) */