Coverage Report

Created: 2026-04-12 07:05

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/zstd/lib/compress/zstd_opt.c
Line
Count
Source
1
/*
2
 * Copyright (c) Meta Platforms, Inc. and affiliates.
3
 * All rights reserved.
4
 *
5
 * This source code is licensed under both the BSD-style license (found in the
6
 * LICENSE file in the root directory of this source tree) and the GPLv2 (found
7
 * in the COPYING file in the root directory of this source tree).
8
 * You may select, at your option, one of the above-listed licenses.
9
 */
10
11
#include "zstd_compress_internal.h"
12
#include "hist.h"
13
#include "zstd_opt.h"
14
15
#if !defined(ZSTD_EXCLUDE_BTLAZY2_BLOCK_COMPRESSOR) \
16
 || !defined(ZSTD_EXCLUDE_BTOPT_BLOCK_COMPRESSOR) \
17
 || !defined(ZSTD_EXCLUDE_BTULTRA_BLOCK_COMPRESSOR)
18
19
75.3M
#define ZSTD_LITFREQ_ADD    2   /* scaling factor for litFreq, so that frequencies adapt faster to new stats */
20
186M
#define ZSTD_MAX_PRICE     (1<<30)
21
22
5.73k
#define ZSTD_PREDEF_THRESHOLD 8   /* if srcSize < ZSTD_PREDEF_THRESHOLD, symbols' cost is assumed static, directly determined by pre-defined distributions */
23
24
25
/*-*************************************
26
*  Price functions for optimal parser
27
***************************************/
28
29
#if 0    /* approximation at bit level (for tests) */
30
#  define BITCOST_ACCURACY 0
31
#  define BITCOST_MULTIPLIER (1 << BITCOST_ACCURACY)
32
#  define WEIGHT(stat, opt) ((void)(opt), ZSTD_bitWeight(stat))
33
#elif 0  /* fractional bit accuracy (for tests) */
34
#  define BITCOST_ACCURACY 8
35
#  define BITCOST_MULTIPLIER (1 << BITCOST_ACCURACY)
36
#  define WEIGHT(stat,opt) ((void)(opt), ZSTD_fracWeight(stat))
37
#else    /* opt==approx, ultra==accurate */
38
9.81G
#  define BITCOST_ACCURACY 8
39
7.17G
#  define BITCOST_MULTIPLIER (1 << BITCOST_ACCURACY)
40
2.99G
#  define WEIGHT(stat,opt) ((opt) ? ZSTD_fracWeight(stat) : ZSTD_bitWeight(stat))
41
#endif
42
43
/* ZSTD_bitWeight() :
44
 * provide estimated "cost" of a stat in full bits only */
45
MEM_STATIC U32 ZSTD_bitWeight(U32 stat)
46
360M
{
47
360M
    return (ZSTD_highbit32(stat+1) * BITCOST_MULTIPLIER);
48
360M
}
49
50
/* ZSTD_fracWeight() :
51
 * provide fractional-bit "cost" of a stat,
52
 * using linear interpolation approximation */
53
MEM_STATIC U32 ZSTD_fracWeight(U32 rawStat)
54
2.63G
{
55
2.63G
    U32 const stat = rawStat + 1;
56
2.63G
    U32 const hb = ZSTD_highbit32(stat);
57
2.63G
    U32 const BWeight = hb * BITCOST_MULTIPLIER;
58
    /* Fweight was meant for "Fractional weight"
59
     * but it's effectively a value between 1 and 2
60
     * using fixed point arithmetic */
61
2.63G
    U32 const FWeight = (stat << BITCOST_ACCURACY) >> hb;
62
2.63G
    U32 const weight = BWeight + FWeight;
63
2.63G
    assert(hb + BITCOST_ACCURACY < 31);
64
2.63G
    return weight;
65
2.63G
}
66
67
#if (DEBUGLEVEL>=2)
68
/* debugging function,
69
 * @return price in bytes as fractional value
70
 * for debug messages only */
71
MEM_STATIC double ZSTD_fCost(int price)
72
{
73
    return (double)price / (BITCOST_MULTIPLIER*8);
74
}
75
#endif
76
77
static int ZSTD_compressedLiterals(optState_t const* const optPtr)
78
183M
{
79
183M
    return optPtr->literalCompressionMode != ZSTD_ps_disable;
80
183M
}
81
82
static void ZSTD_setBasePrices(optState_t* optPtr, int optLevel)
83
5.47M
{
84
5.47M
    if (ZSTD_compressedLiterals(optPtr))
85
5.47M
        optPtr->litSumBasePrice = WEIGHT(optPtr->litSum, optLevel);
86
5.47M
    optPtr->litLengthSumBasePrice = WEIGHT(optPtr->litLengthSum, optLevel);
87
5.47M
    optPtr->matchLengthSumBasePrice = WEIGHT(optPtr->matchLengthSum, optLevel);
88
5.47M
    optPtr->offCodeSumBasePrice = WEIGHT(optPtr->offCodeSum, optLevel);
89
5.47M
}
90
91
92
static U32 sum_u32(const unsigned table[], size_t nbElts)
93
23.8k
{
94
23.8k
    size_t n;
95
23.8k
    U32 total = 0;
96
1.81M
    for (n=0; n<nbElts; n++) {
97
1.79M
        total += table[n];
98
1.79M
    }
99
23.8k
    return total;
100
23.8k
}
101
102
typedef enum { base_0possible=0, base_1guaranteed=1 } base_directive_e;
103
104
static U32
105
ZSTD_downscaleStats(unsigned* table, U32 lastEltIndex, U32 shift, base_directive_e base1)
106
9.95k
{
107
9.95k
    U32 s, sum=0;
108
9.95k
    DEBUGLOG(5, "ZSTD_downscaleStats (nbElts=%u, shift=%u)",
109
9.95k
            (unsigned)lastEltIndex+1, (unsigned)shift );
110
9.95k
    assert(shift < 30);
111
1.67M
    for (s=0; s<lastEltIndex+1; s++) {
112
1.66M
        unsigned const base = base1 ? 1 : (table[s]>0);
113
1.66M
        unsigned const newStat = base + (table[s] >> shift);
114
1.66M
        sum += newStat;
115
1.66M
        table[s] = newStat;
116
1.66M
    }
117
9.95k
    return sum;
118
9.95k
}
119
120
/* ZSTD_scaleStats() :
121
 * reduce all elt frequencies in table if sum too large
122
 * return the resulting sum of elements */
123
static U32 ZSTD_scaleStats(unsigned* table, U32 lastEltIndex, U32 logTarget)
124
16.2k
{
125
16.2k
    U32 const prevsum = sum_u32(table, lastEltIndex+1);
126
16.2k
    U32 const factor = prevsum >> logTarget;
127
16.2k
    DEBUGLOG(5, "ZSTD_scaleStats (nbElts=%u, target=%u)", (unsigned)lastEltIndex+1, (unsigned)logTarget);
128
16.2k
    assert(logTarget < 30);
129
16.2k
    if (factor <= 1) return prevsum;
130
6.17k
    return ZSTD_downscaleStats(table, lastEltIndex, ZSTD_highbit32(factor), base_1guaranteed);
131
16.2k
}
132
133
/* ZSTD_rescaleFreqs() :
134
 * if first block (detected by optPtr->litLengthSum == 0) : init statistics
135
 *    take hints from dictionary if there is one
136
 *    and init from zero if there is none,
137
 *    using src for literals stats, and baseline stats for sequence symbols
138
 * otherwise downscale existing stats, to be used as seed for next block.
139
 */
140
static void
141
ZSTD_rescaleFreqs(optState_t* const optPtr,
142
            const BYTE* const src, size_t const srcSize,
143
                  int const optLevel)
144
7.85k
{
145
7.85k
    int const compressedLiterals = ZSTD_compressedLiterals(optPtr);
146
7.85k
    DEBUGLOG(5, "ZSTD_rescaleFreqs (srcSize=%u)", (unsigned)srcSize);
147
7.85k
    optPtr->priceType = zop_dynamic;
148
149
7.85k
    if (optPtr->litLengthSum == 0) {  /* no literals stats collected -> first block assumed -> init */
150
151
        /* heuristic: use pre-defined stats for too small inputs */
152
3.78k
        if (srcSize <= ZSTD_PREDEF_THRESHOLD) {
153
48
            DEBUGLOG(5, "srcSize <= %i : use predefined stats", ZSTD_PREDEF_THRESHOLD);
154
48
            optPtr->priceType = zop_predef;
155
48
        }
156
157
3.78k
        assert(optPtr->symbolCosts != NULL);
158
3.78k
        if (optPtr->symbolCosts->huf.repeatMode == HUF_repeat_valid) {
159
160
            /* huffman stats covering the full value set : table presumed generated by dictionary */
161
0
            optPtr->priceType = zop_dynamic;
162
163
0
            if (compressedLiterals) {
164
                /* generate literals statistics from huffman table */
165
0
                unsigned lit;
166
0
                assert(optPtr->litFreq != NULL);
167
0
                optPtr->litSum = 0;
168
0
                for (lit=0; lit<=MaxLit; lit++) {
169
0
                    U32 const scaleLog = 11;   /* scale to 2K */
170
0
                    U32 const bitCost = HUF_getNbBitsFromCTable(optPtr->symbolCosts->huf.CTable, lit);
171
0
                    assert(bitCost <= scaleLog);
172
0
                    optPtr->litFreq[lit] = bitCost ? 1 << (scaleLog-bitCost) : 1 /*minimum to calculate cost*/;
173
0
                    optPtr->litSum += optPtr->litFreq[lit];
174
0
            }   }
175
176
0
            {   unsigned ll;
177
0
                FSE_CState_t llstate;
178
0
                FSE_initCState(&llstate, optPtr->symbolCosts->fse.litlengthCTable);
179
0
                optPtr->litLengthSum = 0;
180
0
                for (ll=0; ll<=MaxLL; ll++) {
181
0
                    U32 const scaleLog = 10;   /* scale to 1K */
182
0
                    U32 const bitCost = FSE_getMaxNbBits(llstate.symbolTT, ll);
183
0
                    assert(bitCost < scaleLog);
184
0
                    optPtr->litLengthFreq[ll] = bitCost ? 1 << (scaleLog-bitCost) : 1 /*minimum to calculate cost*/;
185
0
                    optPtr->litLengthSum += optPtr->litLengthFreq[ll];
186
0
            }   }
187
188
0
            {   unsigned ml;
189
0
                FSE_CState_t mlstate;
190
0
                FSE_initCState(&mlstate, optPtr->symbolCosts->fse.matchlengthCTable);
191
0
                optPtr->matchLengthSum = 0;
192
0
                for (ml=0; ml<=MaxML; ml++) {
193
0
                    U32 const scaleLog = 10;
194
0
                    U32 const bitCost = FSE_getMaxNbBits(mlstate.symbolTT, ml);
195
0
                    assert(bitCost < scaleLog);
196
0
                    optPtr->matchLengthFreq[ml] = bitCost ? 1 << (scaleLog-bitCost) : 1 /*minimum to calculate cost*/;
197
0
                    optPtr->matchLengthSum += optPtr->matchLengthFreq[ml];
198
0
            }   }
199
200
0
            {   unsigned of;
201
0
                FSE_CState_t ofstate;
202
0
                FSE_initCState(&ofstate, optPtr->symbolCosts->fse.offcodeCTable);
203
0
                optPtr->offCodeSum = 0;
204
0
                for (of=0; of<=MaxOff; of++) {
205
0
                    U32 const scaleLog = 10;
206
0
                    U32 const bitCost = FSE_getMaxNbBits(ofstate.symbolTT, of);
207
0
                    assert(bitCost < scaleLog);
208
0
                    optPtr->offCodeFreq[of] = bitCost ? 1 << (scaleLog-bitCost) : 1 /*minimum to calculate cost*/;
209
0
                    optPtr->offCodeSum += optPtr->offCodeFreq[of];
210
0
            }   }
211
212
3.78k
        } else {  /* first block, no dictionary */
213
214
3.78k
            assert(optPtr->litFreq != NULL);
215
3.78k
            if (compressedLiterals) {
216
                /* base initial cost of literals on direct frequency within src */
217
3.78k
                unsigned lit = MaxLit;
218
3.78k
                HIST_count_simple(optPtr->litFreq, &lit, src, srcSize);   /* use raw first block to init statistics */
219
3.78k
                optPtr->litSum = ZSTD_downscaleStats(optPtr->litFreq, MaxLit, 8, base_0possible);
220
3.78k
            }
221
222
3.78k
            {   unsigned const baseLLfreqs[MaxLL+1] = {
223
3.78k
                    4, 2, 1, 1, 1, 1, 1, 1,
224
3.78k
                    1, 1, 1, 1, 1, 1, 1, 1,
225
3.78k
                    1, 1, 1, 1, 1, 1, 1, 1,
226
3.78k
                    1, 1, 1, 1, 1, 1, 1, 1,
227
3.78k
                    1, 1, 1, 1
228
3.78k
                };
229
3.78k
                ZSTD_memcpy(optPtr->litLengthFreq, baseLLfreqs, sizeof(baseLLfreqs));
230
3.78k
                optPtr->litLengthSum = sum_u32(baseLLfreqs, MaxLL+1);
231
3.78k
            }
232
233
3.78k
            {   unsigned ml;
234
204k
                for (ml=0; ml<=MaxML; ml++)
235
200k
                    optPtr->matchLengthFreq[ml] = 1;
236
3.78k
            }
237
3.78k
            optPtr->matchLengthSum = MaxML+1;
238
239
3.78k
            {   unsigned const baseOFCfreqs[MaxOff+1] = {
240
3.78k
                    6, 2, 1, 1, 2, 3, 4, 4,
241
3.78k
                    4, 3, 2, 1, 1, 1, 1, 1,
242
3.78k
                    1, 1, 1, 1, 1, 1, 1, 1,
243
3.78k
                    1, 1, 1, 1, 1, 1, 1, 1
244
3.78k
                };
245
3.78k
                ZSTD_memcpy(optPtr->offCodeFreq, baseOFCfreqs, sizeof(baseOFCfreqs));
246
3.78k
                optPtr->offCodeSum = sum_u32(baseOFCfreqs, MaxOff+1);
247
3.78k
            }
248
249
3.78k
        }
250
251
4.07k
    } else {   /* new block : scale down accumulated statistics */
252
253
4.07k
        if (compressedLiterals)
254
4.07k
            optPtr->litSum = ZSTD_scaleStats(optPtr->litFreq, MaxLit, 12);
255
4.07k
        optPtr->litLengthSum = ZSTD_scaleStats(optPtr->litLengthFreq, MaxLL, 11);
256
4.07k
        optPtr->matchLengthSum = ZSTD_scaleStats(optPtr->matchLengthFreq, MaxML, 11);
257
4.07k
        optPtr->offCodeSum = ZSTD_scaleStats(optPtr->offCodeFreq, MaxOff, 11);
258
4.07k
    }
259
260
7.85k
    ZSTD_setBasePrices(optPtr, optLevel);
261
7.85k
}
262
263
/* ZSTD_rawLiteralsCost() :
264
 * price of literals (only) in specified segment (which length can be 0).
265
 * does not include price of literalLength symbol */
266
static U32 ZSTD_rawLiteralsCost(const BYTE* const literals, U32 const litLength,
267
                                const optState_t* const optPtr,
268
                                int optLevel)
269
164M
{
270
164M
    DEBUGLOG(8, "ZSTD_rawLiteralsCost (%u literals)", litLength);
271
164M
    if (litLength == 0) return 0;
272
273
164M
    if (!ZSTD_compressedLiterals(optPtr))
274
0
        return (litLength << 3) * BITCOST_MULTIPLIER;  /* Uncompressed - 8 bytes per literal. */
275
276
164M
    if (optPtr->priceType == zop_predef)
277
0
        return (litLength*6) * BITCOST_MULTIPLIER;  /* 6 bit per literal - no statistic used */
278
279
    /* dynamic statistics */
280
164M
    {   U32 price = optPtr->litSumBasePrice * litLength;
281
164M
        U32 const litPriceMax = optPtr->litSumBasePrice - BITCOST_MULTIPLIER;
282
164M
        U32 u;
283
164M
        assert(optPtr->litSumBasePrice >= BITCOST_MULTIPLIER);
284
328M
        for (u=0; u < litLength; u++) {
285
164M
            U32 litPrice = WEIGHT(optPtr->litFreq[literals[u]], optLevel);
286
164M
            if (UNLIKELY(litPrice > litPriceMax)) litPrice = litPriceMax;
287
164M
            price -= litPrice;
288
164M
        }
289
164M
        return price;
290
164M
    }
291
164M
}
292
293
/* ZSTD_litLengthPrice() :
294
 * cost of literalLength symbol */
295
static U32 ZSTD_litLengthPrice(U32 const litLength, const optState_t* const optPtr, int optLevel)
296
498M
{
297
498M
    assert(litLength <= ZSTD_BLOCKSIZE_MAX);
298
498M
    if (optPtr->priceType == zop_predef)
299
0
        return WEIGHT(litLength, optLevel);
300
301
    /* ZSTD_LLcode() can't compute litLength price for sizes >= ZSTD_BLOCKSIZE_MAX
302
     * because it isn't representable in the zstd format.
303
     * So instead just pretend it would cost 1 bit more than ZSTD_BLOCKSIZE_MAX - 1.
304
     * In such a case, the block would be all literals.
305
     */
306
498M
    if (litLength == ZSTD_BLOCKSIZE_MAX)
307
0
        return BITCOST_MULTIPLIER + ZSTD_litLengthPrice(ZSTD_BLOCKSIZE_MAX - 1, optPtr, optLevel);
308
309
    /* dynamic statistics */
310
498M
    {   U32 const llCode = ZSTD_LLcode(litLength);
311
498M
        return (LL_bits[llCode] * BITCOST_MULTIPLIER)
312
498M
             + optPtr->litLengthSumBasePrice
313
498M
             - WEIGHT(optPtr->litLengthFreq[llCode], optLevel);
314
498M
    }
315
498M
}
316
317
/* ZSTD_getMatchPrice() :
318
 * Provides the cost of the match part (offset + matchLength) of a sequence.
319
 * Must be combined with ZSTD_fullLiteralsCost() to get the full cost of a sequence.
320
 * @offBase : sumtype, representing an offset or a repcode, and using numeric representation of ZSTD_storeSeq()
321
 * @optLevel: when <2, favors small offset for decompression speed (improved cache efficiency)
322
 */
323
FORCE_INLINE_TEMPLATE U32
324
ZSTD_getMatchPrice(U32 const offBase,
325
                   U32 const matchLength,
326
             const optState_t* const optPtr,
327
                   int const optLevel)
328
1.15G
{
329
1.15G
    U32 price;
330
1.15G
    U32 const offCode = ZSTD_highbit32(offBase);
331
1.15G
    U32 const mlBase = matchLength - MINMATCH;
332
1.15G
    assert(matchLength >= MINMATCH);
333
334
1.15G
    if (optPtr->priceType == zop_predef)  /* fixed scheme, does not use statistics */
335
0
        return WEIGHT(mlBase, optLevel)
336
0
             + ((16 + offCode) * BITCOST_MULTIPLIER); /* emulated offset cost */
337
338
    /* dynamic statistics */
339
1.15G
    price = (offCode * BITCOST_MULTIPLIER) + (optPtr->offCodeSumBasePrice - WEIGHT(optPtr->offCodeFreq[offCode], optLevel));
340
1.15G
    if ((optLevel<2) /*static*/ && offCode >= 20)
341
0
        price += (offCode-19)*2 * BITCOST_MULTIPLIER; /* handicap for long distance offsets, favor decompression speed */
342
343
    /* match Length */
344
1.15G
    {   U32 const mlCode = ZSTD_MLcode(mlBase);
345
1.15G
        price += (ML_bits[mlCode] * BITCOST_MULTIPLIER) + (optPtr->matchLengthSumBasePrice - WEIGHT(optPtr->matchLengthFreq[mlCode], optLevel));
346
1.15G
    }
347
348
1.15G
    price += BITCOST_MULTIPLIER / 5;   /* heuristic : make matches a bit more costly to favor less sequences -> faster decompression speed */
349
350
1.15G
    DEBUGLOG(8, "ZSTD_getMatchPrice(ml:%u) = %u", matchLength, price);
351
1.15G
    return price;
352
1.15G
}
353
354
/* ZSTD_updateStats() :
355
 * assumption : literals + litLength <= iend */
356
static void ZSTD_updateStats(optState_t* const optPtr,
357
                             U32 litLength, const BYTE* literals,
358
                             U32 offBase, U32 matchLength)
359
13.2M
{
360
    /* literals */
361
13.2M
    if (ZSTD_compressedLiterals(optPtr)) {
362
13.2M
        U32 u;
363
75.3M
        for (u=0; u < litLength; u++)
364
62.0M
            optPtr->litFreq[literals[u]] += ZSTD_LITFREQ_ADD;
365
13.2M
        optPtr->litSum += litLength*ZSTD_LITFREQ_ADD;
366
13.2M
    }
367
368
    /* literal Length */
369
13.2M
    {   U32 const llCode = ZSTD_LLcode(litLength);
370
13.2M
        optPtr->litLengthFreq[llCode]++;
371
13.2M
        optPtr->litLengthSum++;
372
13.2M
    }
373
374
    /* offset code : follows storeSeq() numeric representation */
375
13.2M
    {   U32 const offCode = ZSTD_highbit32(offBase);
376
13.2M
        assert(offCode <= MaxOff);
377
13.2M
        optPtr->offCodeFreq[offCode]++;
378
13.2M
        optPtr->offCodeSum++;
379
13.2M
    }
380
381
    /* match Length */
382
13.2M
    {   U32 const mlBase = matchLength - MINMATCH;
383
13.2M
        U32 const mlCode = ZSTD_MLcode(mlBase);
384
13.2M
        optPtr->matchLengthFreq[mlCode]++;
385
13.2M
        optPtr->matchLengthSum++;
386
13.2M
    }
387
13.2M
}
388
389
390
/* ZSTD_readMINMATCH() :
391
 * function safe only for comparisons
392
 * assumption : memPtr must be at least 4 bytes before end of buffer */
393
MEM_STATIC U32 ZSTD_readMINMATCH(const void* memPtr, U32 length)
394
890M
{
395
890M
    switch (length)
396
890M
    {
397
0
    default :
398
262M
    case 4 : return MEM_read32(memPtr);
399
627M
    case 3 : if (MEM_isLittleEndian())
400
627M
                return MEM_read32(memPtr)<<8;
401
0
             else
402
0
                return MEM_read32(memPtr)>>8;
403
890M
    }
404
890M
}
405
406
407
/* Update hashTable3 up to ip (excluded)
408
   Assumption : always within prefix (i.e. not within extDict) */
409
static
410
ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
411
U32 ZSTD_insertAndFindFirstIndexHash3 (const ZSTD_MatchState_t* ms,
412
                                       U32* nextToUpdate3,
413
                                       const BYTE* const ip)
414
68.7M
{
415
68.7M
    U32* const hashTable3 = ms->hashTable3;
416
68.7M
    U32 const hashLog3 = ms->hashLog3;
417
68.7M
    const BYTE* const base = ms->window.base;
418
68.7M
    U32 idx = *nextToUpdate3;
419
68.7M
    U32 const target = (U32)(ip - base);
420
68.7M
    size_t const hash3 = ZSTD_hash3Ptr(ip, hashLog3);
421
68.7M
    assert(hashLog3 > 0);
422
423
218M
    while(idx < target) {
424
149M
        hashTable3[ZSTD_hash3Ptr(base+idx, hashLog3)] = idx;
425
149M
        idx++;
426
149M
    }
427
428
68.7M
    *nextToUpdate3 = target;
429
68.7M
    return hashTable3[hash3];
430
68.7M
}
431
432
433
/*-*************************************
434
*  Binary Tree search
435
***************************************/
436
/** ZSTD_insertBt1() : add one or multiple positions to tree.
437
 * @param ip assumed <= iend-8 .
438
 * @param target The target of ZSTD_updateTree_internal() - we are filling to this position
439
 * @return : nb of positions added */
440
static
441
ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
442
U32 ZSTD_insertBt1(
443
                const ZSTD_MatchState_t* ms,
444
                const BYTE* const ip, const BYTE* const iend,
445
                U32 const target,
446
                U32 const mls, const int extDict)
447
42.3M
{
448
42.3M
    const ZSTD_compressionParameters* const cParams = &ms->cParams;
449
42.3M
    U32*   const hashTable = ms->hashTable;
450
42.3M
    U32    const hashLog = cParams->hashLog;
451
42.3M
    size_t const h  = ZSTD_hashPtr(ip, hashLog, mls);
452
42.3M
    U32*   const bt = ms->chainTable;
453
42.3M
    U32    const btLog  = cParams->chainLog - 1;
454
42.3M
    U32    const btMask = (1 << btLog) - 1;
455
42.3M
    U32 matchIndex = hashTable[h];
456
42.3M
    size_t commonLengthSmaller=0, commonLengthLarger=0;
457
42.3M
    const BYTE* const base = ms->window.base;
458
42.3M
    const BYTE* const dictBase = ms->window.dictBase;
459
42.3M
    const U32 dictLimit = ms->window.dictLimit;
460
42.3M
    const BYTE* const dictEnd = dictBase + dictLimit;
461
42.3M
    const BYTE* const prefixStart = base + dictLimit;
462
42.3M
    const BYTE* match;
463
42.3M
    const U32 curr = (U32)(ip-base);
464
42.3M
    const U32 btLow = btMask >= curr ? 0 : curr - btMask;
465
42.3M
    U32* smallerPtr = bt + 2*(curr&btMask);
466
42.3M
    U32* largerPtr  = smallerPtr + 1;
467
42.3M
    U32 dummy32;   /* to be nullified at the end */
468
    /* windowLow is based on target because
469
     * we only need positions that will be in the window at the end of the tree update.
470
     */
471
42.3M
    U32 const windowLow = ZSTD_getLowestMatchIndex(ms, target, cParams->windowLog);
472
42.3M
    U32 matchEndIdx = curr+8+1;
473
42.3M
    size_t bestLength = 8;
474
42.3M
    U32 nbCompares = 1U << cParams->searchLog;
475
#ifdef ZSTD_C_PREDICT
476
    U32 predictedSmall = *(bt + 2*((curr-1)&btMask) + 0);
477
    U32 predictedLarge = *(bt + 2*((curr-1)&btMask) + 1);
478
    predictedSmall += (predictedSmall>0);
479
    predictedLarge += (predictedLarge>0);
480
#endif /* ZSTD_C_PREDICT */
481
482
42.3M
    DEBUGLOG(8, "ZSTD_insertBt1 (%u)", curr);
483
484
42.3M
    assert(curr <= target);
485
42.3M
    assert(ip <= iend-8);   /* required for h calculation */
486
42.3M
    hashTable[h] = curr;   /* Update Hash Table */
487
488
42.3M
    assert(windowLow > 0);
489
509M
    for (; nbCompares && (matchIndex >= windowLow); --nbCompares) {
490
467M
        U32* const nextPtr = bt + 2*(matchIndex & btMask);
491
467M
        size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger);   /* guaranteed minimum nb of common bytes */
492
467M
        assert(matchIndex < curr);
493
494
#ifdef ZSTD_C_PREDICT   /* note : can create issues when hlog small <= 11 */
495
        const U32* predictPtr = bt + 2*((matchIndex-1) & btMask);   /* written this way, as bt is a roll buffer */
496
        if (matchIndex == predictedSmall) {
497
            /* no need to check length, result known */
498
            *smallerPtr = matchIndex;
499
            if (matchIndex <= btLow) { smallerPtr=&dummy32; break; }   /* beyond tree size, stop the search */
500
            smallerPtr = nextPtr+1;               /* new "smaller" => larger of match */
501
            matchIndex = nextPtr[1];              /* new matchIndex larger than previous (closer to current) */
502
            predictedSmall = predictPtr[1] + (predictPtr[1]>0);
503
            continue;
504
        }
505
        if (matchIndex == predictedLarge) {
506
            *largerPtr = matchIndex;
507
            if (matchIndex <= btLow) { largerPtr=&dummy32; break; }   /* beyond tree size, stop the search */
508
            largerPtr = nextPtr;
509
            matchIndex = nextPtr[0];
510
            predictedLarge = predictPtr[0] + (predictPtr[0]>0);
511
            continue;
512
        }
513
#endif
514
515
467M
        if (!extDict || (matchIndex+matchLength >= dictLimit)) {
516
467M
            assert(matchIndex+matchLength >= dictLimit);   /* might be wrong if actually extDict */
517
467M
            match = base + matchIndex;
518
467M
            matchLength += ZSTD_count(ip+matchLength, match+matchLength, iend);
519
467M
        } else {
520
0
            match = dictBase + matchIndex;
521
0
            matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iend, dictEnd, prefixStart);
522
0
            if (matchIndex+matchLength >= dictLimit)
523
0
                match = base + matchIndex;   /* to prepare for next usage of match[matchLength] */
524
0
        }
525
526
467M
        if (matchLength > bestLength) {
527
42.7M
            bestLength = matchLength;
528
42.7M
            if (matchLength > matchEndIdx - matchIndex)
529
267k
                matchEndIdx = matchIndex + (U32)matchLength;
530
42.7M
        }
531
532
467M
        if (ip+matchLength == iend) {   /* equal : no way to know if inf or sup */
533
4.79k
            break;   /* drop , to guarantee consistency ; miss a bit of compression, but other solutions can corrupt tree */
534
4.79k
        }
535
536
467M
        if (match[matchLength] < ip[matchLength]) {  /* necessarily within buffer */
537
            /* match is smaller than current */
538
59.5M
            *smallerPtr = matchIndex;             /* update smaller idx */
539
59.5M
            commonLengthSmaller = matchLength;    /* all smaller will now have at least this guaranteed common length */
540
59.5M
            if (matchIndex <= btLow) { smallerPtr=&dummy32; break; }   /* beyond tree size, stop searching */
541
59.5M
            smallerPtr = nextPtr+1;               /* new "candidate" => larger than match, which was smaller than target */
542
59.5M
            matchIndex = nextPtr[1];              /* new matchIndex, larger than previous and closer to current */
543
408M
        } else {
544
            /* match is larger than current */
545
408M
            *largerPtr = matchIndex;
546
408M
            commonLengthLarger = matchLength;
547
408M
            if (matchIndex <= btLow) { largerPtr=&dummy32; break; }   /* beyond tree size, stop searching */
548
407M
            largerPtr = nextPtr;
549
407M
            matchIndex = nextPtr[0];
550
407M
    }   }
551
552
42.3M
    *smallerPtr = *largerPtr = 0;
553
42.3M
    {   U32 positions = 0;
554
42.3M
        if (bestLength > 384) positions = MIN(192, (U32)(bestLength - 384));   /* speed optimization */
555
42.3M
        assert(matchEndIdx > curr + 8);
556
42.3M
        return MAX(positions, matchEndIdx - (curr + 8));
557
42.3M
    }
558
42.3M
}
559
560
FORCE_INLINE_TEMPLATE
561
ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
562
void ZSTD_updateTree_internal(
563
                ZSTD_MatchState_t* ms,
564
                const BYTE* const ip, const BYTE* const iend,
565
                const U32 mls, const ZSTD_dictMode_e dictMode)
566
149M
{
567
149M
    const BYTE* const base = ms->window.base;
568
149M
    U32 const target = (U32)(ip - base);
569
149M
    U32 idx = ms->nextToUpdate;
570
149M
    DEBUGLOG(7, "ZSTD_updateTree_internal, from %u to %u  (dictMode:%u)",
571
149M
                idx, target, dictMode);
572
573
191M
    while(idx < target) {
574
42.3M
        U32 const forward = ZSTD_insertBt1(ms, base+idx, iend, target, mls, dictMode == ZSTD_extDict);
575
42.3M
        assert(idx < (U32)(idx + forward));
576
42.3M
        idx += forward;
577
42.3M
    }
578
149M
    assert((size_t)(ip - base) <= (size_t)(U32)(-1));
579
149M
    assert((size_t)(iend - base) <= (size_t)(U32)(-1));
580
149M
    ms->nextToUpdate = target;
581
149M
}
582
583
0
void ZSTD_updateTree(ZSTD_MatchState_t* ms, const BYTE* ip, const BYTE* iend) {
584
0
    ZSTD_updateTree_internal(ms, ip, iend, ms->cParams.minMatch, ZSTD_noDict);
585
0
}
586
587
FORCE_INLINE_TEMPLATE
588
ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
589
U32
590
ZSTD_insertBtAndGetAllMatches (
591
                ZSTD_match_t* matches,  /* store result (found matches) in this table (presumed large enough) */
592
                ZSTD_MatchState_t* ms,
593
                U32* nextToUpdate3,
594
                const BYTE* const ip, const BYTE* const iLimit,
595
                const ZSTD_dictMode_e dictMode,
596
                const U32 rep[ZSTD_REP_NUM],
597
                const U32 ll0,  /* tells if associated literal length is 0 or not. This value must be 0 or 1 */
598
                const U32 lengthToBeat,
599
                const U32 mls /* template */)
600
149M
{
601
149M
    const ZSTD_compressionParameters* const cParams = &ms->cParams;
602
149M
    U32 const sufficient_len = MIN(cParams->targetLength, ZSTD_OPT_NUM -1);
603
149M
    const BYTE* const base = ms->window.base;
604
149M
    U32 const curr = (U32)(ip-base);
605
149M
    U32 const hashLog = cParams->hashLog;
606
149M
    U32 const minMatch = (mls==3) ? 3 : 4;
607
149M
    U32* const hashTable = ms->hashTable;
608
149M
    size_t const h  = ZSTD_hashPtr(ip, hashLog, mls);
609
149M
    U32 matchIndex  = hashTable[h];
610
149M
    U32* const bt   = ms->chainTable;
611
149M
    U32 const btLog = cParams->chainLog - 1;
612
149M
    U32 const btMask= (1U << btLog) - 1;
613
149M
    size_t commonLengthSmaller=0, commonLengthLarger=0;
614
149M
    const BYTE* const dictBase = ms->window.dictBase;
615
149M
    U32 const dictLimit = ms->window.dictLimit;
616
149M
    const BYTE* const dictEnd = dictBase + dictLimit;
617
149M
    const BYTE* const prefixStart = base + dictLimit;
618
149M
    U32 const btLow = (btMask >= curr) ? 0 : curr - btMask;
619
149M
    U32 const windowLow = ZSTD_getLowestMatchIndex(ms, curr, cParams->windowLog);
620
149M
    U32 const matchLow = windowLow ? windowLow : 1;
621
149M
    U32* smallerPtr = bt + 2*(curr&btMask);
622
149M
    U32* largerPtr  = bt + 2*(curr&btMask) + 1;
623
149M
    U32 matchEndIdx = curr+8+1;   /* farthest referenced position of any match => detects repetitive patterns */
624
149M
    U32 dummy32;   /* to be nullified at the end */
625
149M
    U32 mnum = 0;
626
149M
    U32 nbCompares = 1U << cParams->searchLog;
627
628
149M
    const ZSTD_MatchState_t* dms    = dictMode == ZSTD_dictMatchState ? ms->dictMatchState : NULL;
629
149M
    const ZSTD_compressionParameters* const dmsCParams =
630
149M
                                      dictMode == ZSTD_dictMatchState ? &dms->cParams : NULL;
631
149M
    const BYTE* const dmsBase       = dictMode == ZSTD_dictMatchState ? dms->window.base : NULL;
632
149M
    const BYTE* const dmsEnd        = dictMode == ZSTD_dictMatchState ? dms->window.nextSrc : NULL;
633
149M
    U32         const dmsHighLimit  = dictMode == ZSTD_dictMatchState ? (U32)(dmsEnd - dmsBase) : 0;
634
149M
    U32         const dmsLowLimit   = dictMode == ZSTD_dictMatchState ? dms->window.lowLimit : 0;
635
149M
    U32         const dmsIndexDelta = dictMode == ZSTD_dictMatchState ? windowLow - dmsHighLimit : 0;
636
149M
    U32         const dmsHashLog    = dictMode == ZSTD_dictMatchState ? dmsCParams->hashLog : hashLog;
637
149M
    U32         const dmsBtLog      = dictMode == ZSTD_dictMatchState ? dmsCParams->chainLog - 1 : btLog;
638
149M
    U32         const dmsBtMask     = dictMode == ZSTD_dictMatchState ? (1U << dmsBtLog) - 1 : 0;
639
149M
    U32         const dmsBtLow      = dictMode == ZSTD_dictMatchState && dmsBtMask < dmsHighLimit - dmsLowLimit ? dmsHighLimit - dmsBtMask : dmsLowLimit;
640
641
149M
    size_t bestLength = lengthToBeat-1;
642
149M
    DEBUGLOG(8, "ZSTD_insertBtAndGetAllMatches: current=%u", curr);
643
644
    /* check repCode */
645
149M
    assert(ll0 <= 1);   /* necessarily 1 or 0 */
646
149M
    {   U32 const lastR = ZSTD_REP_NUM + ll0;
647
149M
        U32 repCode;
648
595M
        for (repCode = ll0; repCode < lastR; repCode++) {
649
446M
            U32 const repOffset = (repCode==ZSTD_REP_NUM) ? (rep[0] - 1) : rep[repCode];
650
446M
            U32 const repIndex = curr - repOffset;
651
446M
            U32 repLen = 0;
652
446M
            assert(curr >= dictLimit);
653
446M
            if (repOffset-1 /* intentional overflow, discards 0 and -1 */ < curr-dictLimit) {  /* equivalent to `curr > repIndex >= dictLimit` */
654
                /* We must validate the repcode offset because when we're using a dictionary the
655
                 * valid offset range shrinks when the dictionary goes out of bounds.
656
                 */
657
445M
                if ((repIndex >= windowLow) & (ZSTD_readMINMATCH(ip, minMatch) == ZSTD_readMINMATCH(ip - repOffset, minMatch))) {
658
66.8M
                    repLen = (U32)ZSTD_count(ip+minMatch, ip+minMatch-repOffset, iLimit) + minMatch;
659
66.8M
                }
660
445M
            } else {  /* repIndex < dictLimit || repIndex >= curr */
661
1.89M
                const BYTE* const repMatch = dictMode == ZSTD_dictMatchState ?
662
0
                                             dmsBase + repIndex - dmsIndexDelta :
663
1.89M
                                             dictBase + repIndex;
664
1.89M
                assert(curr >= windowLow);
665
1.89M
                if ( dictMode == ZSTD_extDict
666
0
                  && ( ((repOffset-1) /*intentional overflow*/ < curr - windowLow)  /* equivalent to `curr > repIndex >= windowLow` */
667
0
                     & (ZSTD_index_overlap_check(dictLimit, repIndex)) )
668
0
                  && (ZSTD_readMINMATCH(ip, minMatch) == ZSTD_readMINMATCH(repMatch, minMatch)) ) {
669
0
                    repLen = (U32)ZSTD_count_2segments(ip+minMatch, repMatch+minMatch, iLimit, dictEnd, prefixStart) + minMatch;
670
0
                }
671
1.89M
                if (dictMode == ZSTD_dictMatchState
672
0
                  && ( ((repOffset-1) /*intentional overflow*/ < curr - (dmsLowLimit + dmsIndexDelta))  /* equivalent to `curr > repIndex >= dmsLowLimit` */
673
0
                     & (ZSTD_index_overlap_check(dictLimit, repIndex)) )
674
0
                  && (ZSTD_readMINMATCH(ip, minMatch) == ZSTD_readMINMATCH(repMatch, minMatch)) ) {
675
0
                    repLen = (U32)ZSTD_count_2segments(ip+minMatch, repMatch+minMatch, iLimit, dmsEnd, prefixStart) + minMatch;
676
0
            }   }
677
            /* save longer solution */
678
446M
            if (repLen > bestLength) {
679
51.7M
                DEBUGLOG(8, "found repCode %u (ll0:%u, offset:%u) of length %u",
680
51.7M
                            repCode, ll0, repOffset, repLen);
681
51.7M
                bestLength = repLen;
682
103M
                matches[mnum].off = REPCODE_TO_OFFBASE(repCode - ll0 + 1);  /* expect value between 1 and 3 */
683
103M
                matches[mnum].len = (U32)repLen;
684
103M
                mnum++;
685
103M
                if ( (repLen > sufficient_len)
686
51.7M
                   | (ip+repLen == iLimit) ) {  /* best possible */
687
101k
                    return mnum;
688
101k
    }   }   }   }
689
690
    /* HC3 match finder */
691
148M
    if ((mls == 3) /*static*/ && (bestLength < mls)) {
692
68.7M
        U32 const matchIndex3 = ZSTD_insertAndFindFirstIndexHash3(ms, nextToUpdate3, ip);
693
68.7M
        if ((matchIndex3 >= matchLow)
694
68.7M
          & (curr - matchIndex3 < (1<<18)) /*heuristic : longer distance likely too expensive*/ ) {
695
39.0M
            size_t mlen;
696
39.0M
            if ((dictMode == ZSTD_noDict) /*static*/ || (dictMode == ZSTD_dictMatchState) /*static*/ || (matchIndex3 >= dictLimit)) {
697
39.0M
                const BYTE* const match = base + matchIndex3;
698
39.0M
                mlen = ZSTD_count(ip, match, iLimit);
699
39.0M
            } else {
700
0
                const BYTE* const match = dictBase + matchIndex3;
701
0
                mlen = ZSTD_count_2segments(ip, match, iLimit, dictEnd, prefixStart);
702
0
            }
703
704
            /* save best solution */
705
39.0M
            if (mlen >= mls /* == 3 > bestLength */) {
706
30.3M
                DEBUGLOG(8, "found small match with hlog3, of length %u",
707
30.3M
                            (U32)mlen);
708
30.3M
                bestLength = mlen;
709
30.3M
                assert(curr > matchIndex3);
710
30.3M
                assert(mnum==0);  /* no prior solution */
711
30.3M
                matches[0].off = OFFSET_TO_OFFBASE(curr - matchIndex3);
712
30.3M
                matches[0].len = (U32)mlen;
713
30.3M
                mnum = 1;
714
30.3M
                if ( (mlen > sufficient_len) |
715
30.3M
                     (ip+mlen == iLimit) ) {  /* best possible length */
716
6.32k
                    ms->nextToUpdate = curr+1;  /* skip insertion */
717
6.32k
                    return 1;
718
6.32k
        }   }   }
719
        /* no dictMatchState lookup: dicts don't have a populated HC3 table */
720
68.7M
    }  /* if (mls == 3) */
721
722
148M
    hashTable[h] = curr;   /* Update Hash Table */
723
724
1.01G
    for (; nbCompares && (matchIndex >= matchLow); --nbCompares) {
725
866M
        U32* const nextPtr = bt + 2*(matchIndex & btMask);
726
866M
        const BYTE* match;
727
866M
        size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger);   /* guaranteed minimum nb of common bytes */
728
866M
        assert(curr > matchIndex);
729
730
866M
        if ((dictMode == ZSTD_noDict) || (dictMode == ZSTD_dictMatchState) || (matchIndex+matchLength >= dictLimit)) {
731
866M
            assert(matchIndex+matchLength >= dictLimit);  /* ensure the condition is correct when !extDict */
732
866M
            match = base + matchIndex;
733
866M
            if (matchIndex >= dictLimit) assert(memcmp(match, ip, matchLength) == 0);  /* ensure early section of match is equal as expected */
734
866M
            matchLength += ZSTD_count(ip+matchLength, match+matchLength, iLimit);
735
866M
        } else {
736
0
            match = dictBase + matchIndex;
737
0
            assert(memcmp(match, ip, matchLength) == 0);  /* ensure early section of match is equal as expected */
738
0
            matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iLimit, dictEnd, prefixStart);
739
0
            if (matchIndex+matchLength >= dictLimit)
740
0
                match = base + matchIndex;   /* prepare for match[matchLength] read */
741
0
        }
742
743
866M
        if (matchLength > bestLength) {
744
40.1M
            DEBUGLOG(8, "found match of length %u at distance %u (offBase=%u)",
745
40.1M
                    (U32)matchLength, curr - matchIndex, OFFSET_TO_OFFBASE(curr - matchIndex));
746
40.1M
            assert(matchEndIdx > matchIndex);
747
40.1M
            if (matchLength > matchEndIdx - matchIndex)
748
92.3k
                matchEndIdx = matchIndex + (U32)matchLength;
749
40.1M
            bestLength = matchLength;
750
40.1M
            matches[mnum].off = OFFSET_TO_OFFBASE(curr - matchIndex);
751
40.1M
            matches[mnum].len = (U32)matchLength;
752
40.1M
            mnum++;
753
40.1M
            if ( (matchLength > ZSTD_OPT_NUM)
754
40.1M
               | (ip+matchLength == iLimit) /* equal : no way to know if inf or sup */) {
755
3.30k
                if (dictMode == ZSTD_dictMatchState) nbCompares = 0; /* break should also skip searching dms */
756
3.30k
                break; /* drop, to preserve bt consistency (miss a little bit of compression) */
757
3.30k
        }   }
758
759
866M
        if (match[matchLength] < ip[matchLength]) {
760
            /* match smaller than current */
761
123M
            *smallerPtr = matchIndex;             /* update smaller idx */
762
123M
            commonLengthSmaller = matchLength;    /* all smaller will now have at least this guaranteed common length */
763
123M
            if (matchIndex <= btLow) { smallerPtr=&dummy32; break; }   /* beyond tree size, stop the search */
764
123M
            smallerPtr = nextPtr+1;               /* new candidate => larger than match, which was smaller than current */
765
123M
            matchIndex = nextPtr[1];              /* new matchIndex, larger than previous, closer to current */
766
742M
        } else {
767
742M
            *largerPtr = matchIndex;
768
742M
            commonLengthLarger = matchLength;
769
742M
            if (matchIndex <= btLow) { largerPtr=&dummy32; break; }   /* beyond tree size, stop the search */
770
742M
            largerPtr = nextPtr;
771
742M
            matchIndex = nextPtr[0];
772
742M
    }   }
773
774
148M
    *smallerPtr = *largerPtr = 0;
775
776
148M
    assert(nbCompares <= (1U << ZSTD_SEARCHLOG_MAX)); /* Check we haven't underflowed. */
777
148M
    if (dictMode == ZSTD_dictMatchState && nbCompares) {
778
0
        size_t const dmsH = ZSTD_hashPtr(ip, dmsHashLog, mls);
779
0
        U32 dictMatchIndex = dms->hashTable[dmsH];
780
0
        const U32* const dmsBt = dms->chainTable;
781
0
        commonLengthSmaller = commonLengthLarger = 0;
782
0
        for (; nbCompares && (dictMatchIndex > dmsLowLimit); --nbCompares) {
783
0
            const U32* const nextPtr = dmsBt + 2*(dictMatchIndex & dmsBtMask);
784
0
            size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger);   /* guaranteed minimum nb of common bytes */
785
0
            const BYTE* match = dmsBase + dictMatchIndex;
786
0
            matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iLimit, dmsEnd, prefixStart);
787
0
            if (dictMatchIndex+matchLength >= dmsHighLimit)
788
0
                match = base + dictMatchIndex + dmsIndexDelta;   /* to prepare for next usage of match[matchLength] */
789
790
0
            if (matchLength > bestLength) {
791
0
                matchIndex = dictMatchIndex + dmsIndexDelta;
792
0
                DEBUGLOG(8, "found dms match of length %u at distance %u (offBase=%u)",
793
0
                        (U32)matchLength, curr - matchIndex, OFFSET_TO_OFFBASE(curr - matchIndex));
794
0
                if (matchLength > matchEndIdx - matchIndex)
795
0
                    matchEndIdx = matchIndex + (U32)matchLength;
796
0
                bestLength = matchLength;
797
0
                matches[mnum].off = OFFSET_TO_OFFBASE(curr - matchIndex);
798
0
                matches[mnum].len = (U32)matchLength;
799
0
                mnum++;
800
0
                if ( (matchLength > ZSTD_OPT_NUM)
801
0
                   | (ip+matchLength == iLimit) /* equal : no way to know if inf or sup */) {
802
0
                    break;   /* drop, to guarantee consistency (miss a little bit of compression) */
803
0
            }   }
804
805
0
            if (dictMatchIndex <= dmsBtLow) { break; }   /* beyond tree size, stop the search */
806
0
            if (match[matchLength] < ip[matchLength]) {
807
0
                commonLengthSmaller = matchLength;    /* all smaller will now have at least this guaranteed common length */
808
0
                dictMatchIndex = nextPtr[1];              /* new matchIndex larger than previous (closer to current) */
809
0
            } else {
810
                /* match is larger than current */
811
0
                commonLengthLarger = matchLength;
812
0
                dictMatchIndex = nextPtr[0];
813
0
    }   }   }  /* if (dictMode == ZSTD_dictMatchState) */
814
815
148M
    assert(matchEndIdx > curr+8);
816
148M
    ms->nextToUpdate = matchEndIdx - 8;  /* skip repetitive patterns */
817
148M
    return mnum;
818
148M
}
819
820
typedef U32 (*ZSTD_getAllMatchesFn)(
821
    ZSTD_match_t*,
822
    ZSTD_MatchState_t*,
823
    U32*,
824
    const BYTE*,
825
    const BYTE*,
826
    const U32 rep[ZSTD_REP_NUM],
827
    U32 const ll0,
828
    U32 const lengthToBeat);
829
830
FORCE_INLINE_TEMPLATE
831
ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
832
U32 ZSTD_btGetAllMatches_internal(
833
        ZSTD_match_t* matches,
834
        ZSTD_MatchState_t* ms,
835
        U32* nextToUpdate3,
836
        const BYTE* ip,
837
        const BYTE* const iHighLimit,
838
        const U32 rep[ZSTD_REP_NUM],
839
        U32 const ll0,
840
        U32 const lengthToBeat,
841
        const ZSTD_dictMode_e dictMode,
842
        const U32 mls)
843
153M
{
844
153M
    assert(BOUNDED(3, ms->cParams.minMatch, 6) == mls);
845
153M
    DEBUGLOG(8, "ZSTD_BtGetAllMatches(dictMode=%d, mls=%u)", (int)dictMode, mls);
846
153M
    if (ip < ms->window.base + ms->nextToUpdate)
847
4.14M
        return 0;   /* skipped area */
848
149M
    ZSTD_updateTree_internal(ms, ip, iHighLimit, mls, dictMode);
849
149M
    return ZSTD_insertBtAndGetAllMatches(matches, ms, nextToUpdate3, ip, iHighLimit, dictMode, rep, ll0, lengthToBeat, mls);
850
153M
}
851
852
94.2k
#define ZSTD_BT_GET_ALL_MATCHES_FN(dictMode, mls) ZSTD_btGetAllMatches_##dictMode##_##mls
853
854
#define GEN_ZSTD_BT_GET_ALL_MATCHES_(dictMode, mls)            \
855
    static U32 ZSTD_BT_GET_ALL_MATCHES_FN(dictMode, mls)(      \
856
            ZSTD_match_t* matches,                             \
857
            ZSTD_MatchState_t* ms,                             \
858
            U32* nextToUpdate3,                                \
859
            const BYTE* ip,                                    \
860
            const BYTE* const iHighLimit,                      \
861
            const U32 rep[ZSTD_REP_NUM],                       \
862
            U32 const ll0,                                     \
863
            U32 const lengthToBeat)                            \
864
153M
    {                                                          \
865
153M
        return ZSTD_btGetAllMatches_internal(                  \
866
153M
                matches, ms, nextToUpdate3, ip, iHighLimit,    \
867
153M
                rep, ll0, lengthToBeat, ZSTD_##dictMode, mls); \
868
153M
    }
zstd_opt.c:ZSTD_btGetAllMatches_noDict_3
Line
Count
Source
864
108M
    {                                                          \
865
108M
        return ZSTD_btGetAllMatches_internal(                  \
866
108M
                matches, ms, nextToUpdate3, ip, iHighLimit,    \
867
108M
                rep, ll0, lengthToBeat, ZSTD_##dictMode, mls); \
868
108M
    }
zstd_opt.c:ZSTD_btGetAllMatches_noDict_4
Line
Count
Source
864
9.38M
    {                                                          \
865
9.38M
        return ZSTD_btGetAllMatches_internal(                  \
866
9.38M
                matches, ms, nextToUpdate3, ip, iHighLimit,    \
867
9.38M
                rep, ll0, lengthToBeat, ZSTD_##dictMode, mls); \
868
9.38M
    }
zstd_opt.c:ZSTD_btGetAllMatches_noDict_5
Line
Count
Source
864
35.0M
    {                                                          \
865
35.0M
        return ZSTD_btGetAllMatches_internal(                  \
866
35.0M
                matches, ms, nextToUpdate3, ip, iHighLimit,    \
867
35.0M
                rep, ll0, lengthToBeat, ZSTD_##dictMode, mls); \
868
35.0M
    }
Unexecuted instantiation: zstd_opt.c:ZSTD_btGetAllMatches_noDict_6
Unexecuted instantiation: zstd_opt.c:ZSTD_btGetAllMatches_extDict_3
Unexecuted instantiation: zstd_opt.c:ZSTD_btGetAllMatches_extDict_4
Unexecuted instantiation: zstd_opt.c:ZSTD_btGetAllMatches_extDict_5
Unexecuted instantiation: zstd_opt.c:ZSTD_btGetAllMatches_extDict_6
Unexecuted instantiation: zstd_opt.c:ZSTD_btGetAllMatches_dictMatchState_3
Unexecuted instantiation: zstd_opt.c:ZSTD_btGetAllMatches_dictMatchState_4
Unexecuted instantiation: zstd_opt.c:ZSTD_btGetAllMatches_dictMatchState_5
Unexecuted instantiation: zstd_opt.c:ZSTD_btGetAllMatches_dictMatchState_6
869
870
#define GEN_ZSTD_BT_GET_ALL_MATCHES(dictMode)  \
871
    GEN_ZSTD_BT_GET_ALL_MATCHES_(dictMode, 3)  \
872
    GEN_ZSTD_BT_GET_ALL_MATCHES_(dictMode, 4)  \
873
    GEN_ZSTD_BT_GET_ALL_MATCHES_(dictMode, 5)  \
874
    GEN_ZSTD_BT_GET_ALL_MATCHES_(dictMode, 6)
875
876
GEN_ZSTD_BT_GET_ALL_MATCHES(noDict)
877
GEN_ZSTD_BT_GET_ALL_MATCHES(extDict)
878
GEN_ZSTD_BT_GET_ALL_MATCHES(dictMatchState)
879
880
#define ZSTD_BT_GET_ALL_MATCHES_ARRAY(dictMode)  \
881
23.5k
    {                                            \
882
23.5k
        ZSTD_BT_GET_ALL_MATCHES_FN(dictMode, 3), \
883
23.5k
        ZSTD_BT_GET_ALL_MATCHES_FN(dictMode, 4), \
884
23.5k
        ZSTD_BT_GET_ALL_MATCHES_FN(dictMode, 5), \
885
23.5k
        ZSTD_BT_GET_ALL_MATCHES_FN(dictMode, 6)  \
886
23.5k
    }
887
888
static ZSTD_getAllMatchesFn
889
ZSTD_selectBtGetAllMatches(ZSTD_MatchState_t const* ms, ZSTD_dictMode_e const dictMode)
890
7.85k
{
891
7.85k
    ZSTD_getAllMatchesFn const getAllMatchesFns[3][4] = {
892
7.85k
        ZSTD_BT_GET_ALL_MATCHES_ARRAY(noDict),
893
7.85k
        ZSTD_BT_GET_ALL_MATCHES_ARRAY(extDict),
894
7.85k
        ZSTD_BT_GET_ALL_MATCHES_ARRAY(dictMatchState)
895
7.85k
    };
896
7.85k
    U32 const mls = BOUNDED(3, ms->cParams.minMatch, 6);
897
7.85k
    assert((U32)dictMode < 3);
898
7.85k
    assert(mls - 3 < 4);
899
7.85k
    return getAllMatchesFns[(int)dictMode][mls - 3];
900
7.85k
}
901
902
/*************************
903
*  LDM helper functions  *
904
*************************/
905
906
/* Struct containing info needed to make decision about ldm inclusion */
907
typedef struct {
908
    RawSeqStore_t seqStore;   /* External match candidates store for this block */
909
    U32 startPosInBlock;      /* Start position of the current match candidate */
910
    U32 endPosInBlock;        /* End position of the current match candidate */
911
    U32 offset;               /* Offset of the match candidate */
912
} ZSTD_optLdm_t;
913
914
/* ZSTD_optLdm_skipRawSeqStoreBytes():
915
 * Moves forward in @rawSeqStore by @nbBytes,
916
 * which will update the fields 'pos' and 'posInSequence'.
917
 */
918
static void ZSTD_optLdm_skipRawSeqStoreBytes(RawSeqStore_t* rawSeqStore, size_t nbBytes)
919
0
{
920
0
    U32 currPos = (U32)(rawSeqStore->posInSequence + nbBytes);
921
0
    while (currPos && rawSeqStore->pos < rawSeqStore->size) {
922
0
        rawSeq currSeq = rawSeqStore->seq[rawSeqStore->pos];
923
0
        if (currPos >= currSeq.litLength + currSeq.matchLength) {
924
0
            currPos -= currSeq.litLength + currSeq.matchLength;
925
0
            rawSeqStore->pos++;
926
0
        } else {
927
0
            rawSeqStore->posInSequence = currPos;
928
0
            break;
929
0
        }
930
0
    }
931
0
    if (currPos == 0 || rawSeqStore->pos == rawSeqStore->size) {
932
0
        rawSeqStore->posInSequence = 0;
933
0
    }
934
0
}
935
936
/* ZSTD_opt_getNextMatchAndUpdateSeqStore():
937
 * Calculates the beginning and end of the next match in the current block.
938
 * Updates 'pos' and 'posInSequence' of the ldmSeqStore.
939
 */
940
static void
941
ZSTD_opt_getNextMatchAndUpdateSeqStore(ZSTD_optLdm_t* optLdm, U32 currPosInBlock,
942
                                       U32 blockBytesRemaining)
943
7.85k
{
944
7.85k
    rawSeq currSeq;
945
7.85k
    U32 currBlockEndPos;
946
7.85k
    U32 literalsBytesRemaining;
947
7.85k
    U32 matchBytesRemaining;
948
949
    /* Setting match end position to MAX to ensure we never use an LDM during this block */
950
7.85k
    if (optLdm->seqStore.size == 0 || optLdm->seqStore.pos >= optLdm->seqStore.size) {
951
7.85k
        optLdm->startPosInBlock = UINT_MAX;
952
7.85k
        optLdm->endPosInBlock = UINT_MAX;
953
7.85k
        return;
954
7.85k
    }
955
    /* Calculate appropriate bytes left in matchLength and litLength
956
     * after adjusting based on ldmSeqStore->posInSequence */
957
0
    currSeq = optLdm->seqStore.seq[optLdm->seqStore.pos];
958
0
    assert(optLdm->seqStore.posInSequence <= currSeq.litLength + currSeq.matchLength);
959
0
    currBlockEndPos = currPosInBlock + blockBytesRemaining;
960
0
    literalsBytesRemaining = (optLdm->seqStore.posInSequence < currSeq.litLength) ?
961
0
            currSeq.litLength - (U32)optLdm->seqStore.posInSequence :
962
0
            0;
963
0
    matchBytesRemaining = (literalsBytesRemaining == 0) ?
964
0
            currSeq.matchLength - ((U32)optLdm->seqStore.posInSequence - currSeq.litLength) :
965
0
            currSeq.matchLength;
966
967
    /* If there are more literal bytes than bytes remaining in block, no ldm is possible */
968
0
    if (literalsBytesRemaining >= blockBytesRemaining) {
969
0
        optLdm->startPosInBlock = UINT_MAX;
970
0
        optLdm->endPosInBlock = UINT_MAX;
971
0
        ZSTD_optLdm_skipRawSeqStoreBytes(&optLdm->seqStore, blockBytesRemaining);
972
0
        return;
973
0
    }
974
975
    /* Matches may be < minMatch by this process. In that case, we will reject them
976
       when we are deciding whether or not to add the ldm */
977
0
    optLdm->startPosInBlock = currPosInBlock + literalsBytesRemaining;
978
0
    optLdm->endPosInBlock = optLdm->startPosInBlock + matchBytesRemaining;
979
0
    optLdm->offset = currSeq.offset;
980
981
0
    if (optLdm->endPosInBlock > currBlockEndPos) {
982
        /* Match ends after the block ends, we can't use the whole match */
983
0
        optLdm->endPosInBlock = currBlockEndPos;
984
0
        ZSTD_optLdm_skipRawSeqStoreBytes(&optLdm->seqStore, currBlockEndPos - currPosInBlock);
985
0
    } else {
986
        /* Consume nb of bytes equal to size of sequence left */
987
0
        ZSTD_optLdm_skipRawSeqStoreBytes(&optLdm->seqStore, literalsBytesRemaining + matchBytesRemaining);
988
0
    }
989
0
}
990
991
/* ZSTD_optLdm_maybeAddMatch():
992
 * Adds a match if it's long enough,
993
 * based on it's 'matchStartPosInBlock' and 'matchEndPosInBlock',
994
 * into 'matches'. Maintains the correct ordering of 'matches'.
995
 */
996
static void ZSTD_optLdm_maybeAddMatch(ZSTD_match_t* matches, U32* nbMatches,
997
                                      const ZSTD_optLdm_t* optLdm, U32 currPosInBlock,
998
                                      U32 minMatch)
999
0
{
1000
0
    U32 const posDiff = currPosInBlock - optLdm->startPosInBlock;
1001
    /* Note: ZSTD_match_t actually contains offBase and matchLength (before subtracting MINMATCH) */
1002
0
    U32 const candidateMatchLength = optLdm->endPosInBlock - optLdm->startPosInBlock - posDiff;
1003
1004
    /* Ensure that current block position is not outside of the match */
1005
0
    if (currPosInBlock < optLdm->startPosInBlock
1006
0
      || currPosInBlock >= optLdm->endPosInBlock
1007
0
      || candidateMatchLength < minMatch) {
1008
0
        return;
1009
0
    }
1010
1011
0
    if (*nbMatches == 0 || ((candidateMatchLength > matches[*nbMatches-1].len) && *nbMatches < ZSTD_OPT_NUM)) {
1012
0
        U32 const candidateOffBase = OFFSET_TO_OFFBASE(optLdm->offset);
1013
0
        DEBUGLOG(6, "ZSTD_optLdm_maybeAddMatch(): Adding ldm candidate match (offBase: %u matchLength %u) at block position=%u",
1014
0
                 candidateOffBase, candidateMatchLength, currPosInBlock);
1015
0
        matches[*nbMatches].len = candidateMatchLength;
1016
0
        matches[*nbMatches].off = candidateOffBase;
1017
0
        (*nbMatches)++;
1018
0
    }
1019
0
}
1020
1021
/* ZSTD_optLdm_processMatchCandidate():
1022
 * Wrapper function to update ldm seq store and call ldm functions as necessary.
1023
 */
1024
static void
1025
ZSTD_optLdm_processMatchCandidate(ZSTD_optLdm_t* optLdm,
1026
                                  ZSTD_match_t* matches, U32* nbMatches,
1027
                                  U32 currPosInBlock, U32 remainingBytes,
1028
                                  U32 minMatch)
1029
153M
{
1030
153M
    if (optLdm->seqStore.size == 0 || optLdm->seqStore.pos >= optLdm->seqStore.size) {
1031
153M
        return;
1032
153M
    }
1033
1034
0
    if (currPosInBlock >= optLdm->endPosInBlock) {
1035
0
        if (currPosInBlock > optLdm->endPosInBlock) {
1036
            /* The position at which ZSTD_optLdm_processMatchCandidate() is called is not necessarily
1037
             * at the end of a match from the ldm seq store, and will often be some bytes
1038
             * over beyond matchEndPosInBlock. As such, we need to correct for these "overshoots"
1039
             */
1040
0
            U32 const posOvershoot = currPosInBlock - optLdm->endPosInBlock;
1041
0
            ZSTD_optLdm_skipRawSeqStoreBytes(&optLdm->seqStore, posOvershoot);
1042
0
        }
1043
0
        ZSTD_opt_getNextMatchAndUpdateSeqStore(optLdm, currPosInBlock, remainingBytes);
1044
0
    }
1045
0
    ZSTD_optLdm_maybeAddMatch(matches, nbMatches, optLdm, currPosInBlock, minMatch);
1046
0
}
1047
1048
1049
/*-*******************************
1050
*  Optimal parser
1051
*********************************/
1052
1053
#if 0 /* debug */
1054
1055
static void
1056
listStats(const U32* table, int lastEltID)
1057
{
1058
    int const nbElts = lastEltID + 1;
1059
    int enb;
1060
    for (enb=0; enb < nbElts; enb++) {
1061
        (void)table;
1062
        /* RAWLOG(2, "%3i:%3i,  ", enb, table[enb]); */
1063
        RAWLOG(2, "%4i,", table[enb]);
1064
    }
1065
    RAWLOG(2, " \n");
1066
}
1067
1068
#endif
1069
1070
164M
#define LIT_PRICE(_p) (int)ZSTD_rawLiteralsCost(_p, 1, optStatePtr, optLevel)
1071
498M
#define LL_PRICE(_l) (int)ZSTD_litLengthPrice(_l, optStatePtr, optLevel)
1072
184M
#define LL_INCPRICE(_l) (LL_PRICE(_l) - LL_PRICE(_l-1))
1073
1074
FORCE_INLINE_TEMPLATE
1075
ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
1076
size_t
1077
ZSTD_compressBlock_opt_generic(ZSTD_MatchState_t* ms,
1078
                               SeqStore_t* seqStore,
1079
                               U32 rep[ZSTD_REP_NUM],
1080
                         const void* src, size_t srcSize,
1081
                         const int optLevel,
1082
                         const ZSTD_dictMode_e dictMode)
1083
7.85k
{
1084
7.85k
    optState_t* const optStatePtr = &ms->opt;
1085
7.85k
    const BYTE* const istart = (const BYTE*)src;
1086
7.85k
    const BYTE* ip = istart;
1087
7.85k
    const BYTE* anchor = istart;
1088
7.85k
    const BYTE* const iend = istart + srcSize;
1089
7.85k
    const BYTE* const ilimit = iend - 8;
1090
7.85k
    const BYTE* const base = ms->window.base;
1091
7.85k
    const BYTE* const prefixStart = base + ms->window.dictLimit;
1092
7.85k
    const ZSTD_compressionParameters* const cParams = &ms->cParams;
1093
1094
7.85k
    ZSTD_getAllMatchesFn getAllMatches = ZSTD_selectBtGetAllMatches(ms, dictMode);
1095
1096
7.85k
    U32 const sufficient_len = MIN(cParams->targetLength, ZSTD_OPT_NUM -1);
1097
7.85k
    U32 const minMatch = (cParams->minMatch == 3) ? 3 : 4;
1098
7.85k
    U32 nextToUpdate3 = ms->nextToUpdate;
1099
1100
7.85k
    ZSTD_optimal_t* const opt = optStatePtr->priceTable;
1101
7.85k
    ZSTD_match_t* const matches = optStatePtr->matchTable;
1102
7.85k
    ZSTD_optimal_t lastStretch;
1103
7.85k
    ZSTD_optLdm_t optLdm;
1104
1105
7.85k
    ZSTD_memset(&lastStretch, 0, sizeof(ZSTD_optimal_t));
1106
1107
7.85k
    optLdm.seqStore = ms->ldmSeqStore ? *ms->ldmSeqStore : kNullRawSeqStore;
1108
7.85k
    optLdm.endPosInBlock = optLdm.startPosInBlock = optLdm.offset = 0;
1109
7.85k
    ZSTD_opt_getNextMatchAndUpdateSeqStore(&optLdm, (U32)(ip-istart), (U32)(iend-ip));
1110
1111
    /* init */
1112
7.85k
    DEBUGLOG(5, "ZSTD_compressBlock_opt_generic: current=%u, prefix=%u, nextToUpdate=%u",
1113
7.85k
                (U32)(ip - base), ms->window.dictLimit, ms->nextToUpdate);
1114
7.85k
    assert(optLevel <= 2);
1115
7.85k
    ZSTD_rescaleFreqs(optStatePtr, (const BYTE*)src, srcSize, optLevel);
1116
7.85k
    ip += (ip==prefixStart);
1117
1118
    /* Match Loop */
1119
55.1M
    while (ip < ilimit) {
1120
55.1M
        U32 cur, last_pos = 0;
1121
1122
        /* find first match */
1123
55.1M
        {   U32 const litlen = (U32)(ip - anchor);
1124
55.1M
            U32 const ll0 = !litlen;
1125
55.1M
            U32 nbMatches = getAllMatches(matches, ms, &nextToUpdate3, ip, iend, rep, ll0, minMatch);
1126
55.1M
            ZSTD_optLdm_processMatchCandidate(&optLdm, matches, &nbMatches,
1127
55.1M
                                              (U32)(ip-istart), (U32)(iend-ip),
1128
55.1M
                                              minMatch);
1129
55.1M
            if (!nbMatches) {
1130
49.2M
                DEBUGLOG(8, "no match found at cPos %u", (unsigned)(ip-istart));
1131
49.2M
                ip++;
1132
49.2M
                continue;
1133
49.2M
            }
1134
1135
            /* Match found: let's store this solution, and eventually find more candidates.
1136
             * During this forward pass, @opt is used to store stretches,
1137
             * defined as "a match followed by N literals".
1138
             * Note how this is different from a Sequence, which is "N literals followed by a match".
1139
             * Storing stretches allows us to store different match predecessors
1140
             * for each literal position part of a literals run. */
1141
1142
            /* initialize opt[0] */
1143
5.91M
            opt[0].mlen = 0;  /* there are only literals so far */
1144
5.91M
            opt[0].litlen = litlen;
1145
            /* No need to include the actual price of the literals before the first match
1146
             * because it is static for the duration of the forward pass, and is included
1147
             * in every subsequent price. But, we include the literal length because
1148
             * the cost variation of litlen depends on the value of litlen.
1149
             */
1150
5.91M
            opt[0].price = LL_PRICE(litlen);
1151
5.91M
            ZSTD_STATIC_ASSERT(sizeof(opt[0].rep[0]) == sizeof(rep[0]));
1152
5.91M
            ZSTD_memcpy(&opt[0].rep, rep, sizeof(opt[0].rep));
1153
1154
            /* large match -> immediate encoding */
1155
5.91M
            {   U32 const maxML = matches[nbMatches-1].len;
1156
5.91M
                U32 const maxOffBase = matches[nbMatches-1].off;
1157
5.91M
                DEBUGLOG(6, "found %u matches of maxLength=%u and maxOffBase=%u at cPos=%u => start new series",
1158
5.91M
                            nbMatches, maxML, maxOffBase, (U32)(ip-prefixStart));
1159
1160
5.91M
                if (maxML > sufficient_len) {
1161
142k
                    lastStretch.litlen = 0;
1162
142k
                    lastStretch.mlen = maxML;
1163
142k
                    lastStretch.off = maxOffBase;
1164
142k
                    DEBUGLOG(6, "large match (%u>%u) => immediate encoding",
1165
142k
                                maxML, sufficient_len);
1166
142k
                    cur = 0;
1167
142k
                    last_pos = maxML;
1168
142k
                    goto _shortestPath;
1169
142k
            }   }
1170
1171
            /* set prices for first matches starting position == 0 */
1172
5.91M
            assert(opt[0].price >= 0);
1173
5.77M
            {   U32 pos;
1174
5.77M
                U32 matchNb;
1175
18.9M
                for (pos = 1; pos < minMatch; pos++) {
1176
13.1M
                    opt[pos].price = ZSTD_MAX_PRICE;
1177
13.1M
                    opt[pos].mlen = 0;
1178
13.1M
                    opt[pos].litlen = litlen + pos;
1179
13.1M
                }
1180
12.9M
                for (matchNb = 0; matchNb < nbMatches; matchNb++) {
1181
7.12M
                    U32 const offBase = matches[matchNb].off;
1182
7.12M
                    U32 const end = matches[matchNb].len;
1183
32.9M
                    for ( ; pos <= end ; pos++ ) {
1184
25.8M
                        int const matchPrice = (int)ZSTD_getMatchPrice(offBase, pos, optStatePtr, optLevel);
1185
25.8M
                        int const sequencePrice = opt[0].price + matchPrice;
1186
25.8M
                        DEBUGLOG(7, "rPos:%u => set initial price : %.2f",
1187
25.8M
                                    pos, ZSTD_fCost(sequencePrice));
1188
25.8M
                        opt[pos].mlen = pos;
1189
25.8M
                        opt[pos].off = offBase;
1190
25.8M
                        opt[pos].litlen = 0; /* end of match */
1191
25.8M
                        opt[pos].price = sequencePrice + LL_PRICE(0);
1192
25.8M
                    }
1193
7.12M
                }
1194
5.77M
                last_pos = pos-1;
1195
5.77M
                opt[pos].price = ZSTD_MAX_PRICE;
1196
5.77M
            }
1197
5.77M
        }
1198
1199
        /* check further positions */
1200
126M
        for (cur = 1; cur <= last_pos; cur++) {
1201
126M
            const BYTE* const inr = ip + cur;
1202
126M
            assert(cur <= ZSTD_OPT_NUM);
1203
126M
            DEBUGLOG(7, "cPos:%i==rPos:%u", (int)(inr-istart), cur);
1204
1205
            /* Fix current position with one literal if cheaper */
1206
126M
            {   U32 const litlen = opt[cur-1].litlen + 1;
1207
126M
                int const price = opt[cur-1].price
1208
126M
                                + LIT_PRICE(ip+cur-1)
1209
126M
                                + LL_INCPRICE(litlen);
1210
126M
                assert(price < 1000000000); /* overflow check */
1211
126M
                if (price <= opt[cur].price) {
1212
47.9M
                    ZSTD_optimal_t const prevMatch = opt[cur];
1213
47.9M
                    DEBUGLOG(7, "cPos:%i==rPos:%u : better price (%.2f<=%.2f) using literal (ll==%u) (hist:%u,%u,%u)",
1214
47.9M
                                (int)(inr-istart), cur, ZSTD_fCost(price), ZSTD_fCost(opt[cur].price), litlen,
1215
47.9M
                                opt[cur-1].rep[0], opt[cur-1].rep[1], opt[cur-1].rep[2]);
1216
47.9M
                    opt[cur] = opt[cur-1];
1217
47.9M
                    opt[cur].litlen = litlen;
1218
47.9M
                    opt[cur].price = price;
1219
47.9M
                    if ( (optLevel >= 1) /* additional check only for higher modes */
1220
31.6M
                      && (prevMatch.litlen == 0) /* replace a match */
1221
20.0M
                      && (LL_INCPRICE(1) < 0) /* ll1 is cheaper than ll0 */
1222
18.7M
                      && LIKELY(ip + cur < iend)
1223
47.9M
                    ) {
1224
                        /* check next position, in case it would be cheaper */
1225
18.7M
                        int with1literal = prevMatch.price + LIT_PRICE(ip+cur) + LL_INCPRICE(1);
1226
18.7M
                        int withMoreLiterals = price + LIT_PRICE(ip+cur) + LL_INCPRICE(litlen+1);
1227
18.7M
                        DEBUGLOG(7, "then at next rPos %u : match+1lit %.2f vs %ulits %.2f",
1228
18.7M
                                cur+1, ZSTD_fCost(with1literal), litlen+1, ZSTD_fCost(withMoreLiterals));
1229
18.7M
                        if ( (with1literal < withMoreLiterals)
1230
8.85M
                          && (with1literal < opt[cur+1].price) ) {
1231
                            /* update offset history - before it disappears */
1232
4.50M
                            U32 const prev = cur - prevMatch.mlen;
1233
4.50M
                            Repcodes_t const newReps = ZSTD_newRep(opt[prev].rep, prevMatch.off, opt[prev].litlen==0);
1234
4.50M
                            assert(cur >= prevMatch.mlen);
1235
4.50M
                            DEBUGLOG(7, "==> match+1lit is cheaper (%.2f < %.2f) (hist:%u,%u,%u) !",
1236
4.50M
                                        ZSTD_fCost(with1literal), ZSTD_fCost(withMoreLiterals),
1237
4.50M
                                        newReps.rep[0], newReps.rep[1], newReps.rep[2] );
1238
4.50M
                            opt[cur+1] = prevMatch;  /* mlen & offbase */
1239
4.50M
                            ZSTD_memcpy(opt[cur+1].rep, &newReps, sizeof(Repcodes_t));
1240
4.50M
                            opt[cur+1].litlen = 1;
1241
4.50M
                            opt[cur+1].price = with1literal;
1242
4.50M
                            if (last_pos < cur+1) last_pos = cur+1;
1243
4.50M
                        }
1244
18.7M
                    }
1245
79.0M
                } else {
1246
79.0M
                    DEBUGLOG(7, "cPos:%i==rPos:%u : literal would cost more (%.2f>%.2f)",
1247
79.0M
                                (int)(inr-istart), cur, ZSTD_fCost(price), ZSTD_fCost(opt[cur].price));
1248
79.0M
                }
1249
126M
            }
1250
1251
            /* Offset history is not updated during match comparison.
1252
             * Do it here, now that the match is selected and confirmed.
1253
             */
1254
126M
            ZSTD_STATIC_ASSERT(sizeof(opt[cur].rep) == sizeof(Repcodes_t));
1255
126M
            assert(cur >= opt[cur].mlen);
1256
126M
            if (opt[cur].litlen == 0) {
1257
                /* just finished a match => alter offset history */
1258
74.5M
                U32 const prev = cur - opt[cur].mlen;
1259
74.5M
                Repcodes_t const newReps = ZSTD_newRep(opt[prev].rep, opt[cur].off, opt[prev].litlen==0);
1260
74.5M
                ZSTD_memcpy(opt[cur].rep, &newReps, sizeof(Repcodes_t));
1261
74.5M
            }
1262
1263
            /* last match must start at a minimum distance of 8 from oend */
1264
126M
            if (inr > ilimit) continue;
1265
1266
126M
            if (cur == last_pos) break;
1267
1268
121M
            if ( (optLevel==0) /*static_test*/
1269
45.7M
              && (opt[cur+1].price <= opt[cur].price + (BITCOST_MULTIPLIER/2)) ) {
1270
23.1M
                DEBUGLOG(7, "skip current position : next rPos(%u) price is cheaper", cur+1);
1271
23.1M
                continue;  /* skip unpromising positions; about ~+6% speed, -0.01 ratio */
1272
23.1M
            }
1273
1274
121M
            assert(opt[cur].price >= 0);
1275
98.0M
            {   U32 const ll0 = (opt[cur].litlen == 0);
1276
98.0M
                int const previousPrice = opt[cur].price;
1277
98.0M
                int const basePrice = previousPrice + LL_PRICE(0);
1278
98.0M
                U32 nbMatches = getAllMatches(matches, ms, &nextToUpdate3, inr, iend, opt[cur].rep, ll0, minMatch);
1279
98.0M
                U32 matchNb;
1280
1281
98.0M
                ZSTD_optLdm_processMatchCandidate(&optLdm, matches, &nbMatches,
1282
98.0M
                                                  (U32)(inr-istart), (U32)(iend-inr),
1283
98.0M
                                                  minMatch);
1284
1285
98.0M
                if (!nbMatches) {
1286
18.8M
                    DEBUGLOG(7, "rPos:%u : no match found", cur);
1287
18.8M
                    continue;
1288
18.8M
                }
1289
1290
79.2M
                {   U32 const longestML = matches[nbMatches-1].len;
1291
79.2M
                    DEBUGLOG(7, "cPos:%i==rPos:%u, found %u matches, of longest ML=%u",
1292
79.2M
                                (int)(inr-istart), cur, nbMatches, longestML);
1293
1294
79.2M
                    if ( (longestML > sufficient_len)
1295
79.1M
                      || (cur + longestML >= ZSTD_OPT_NUM)
1296
79.1M
                      || (ip + cur + longestML >= iend) ) {
1297
77.5k
                        lastStretch.mlen = longestML;
1298
77.5k
                        lastStretch.off = matches[nbMatches-1].off;
1299
77.5k
                        lastStretch.litlen = 0;
1300
77.5k
                        last_pos = cur + longestML;
1301
77.5k
                        goto _shortestPath;
1302
77.5k
                }   }
1303
1304
                /* set prices using matches found at position == cur */
1305
193M
                for (matchNb = 0; matchNb < nbMatches; matchNb++) {
1306
114M
                    U32 const offset = matches[matchNb].off;
1307
114M
                    U32 const lastML = matches[matchNb].len;
1308
114M
                    U32 const startML = (matchNb>0) ? matches[matchNb-1].len+1 : minMatch;
1309
114M
                    U32 mlen;
1310
1311
114M
                    DEBUGLOG(7, "testing match %u => offBase=%4u, mlen=%2u, llen=%2u",
1312
114M
                                matchNb, matches[matchNb].off, lastML, opt[cur].litlen);
1313
1314
1.22G
                    for (mlen = lastML; mlen >= startML; mlen--) {  /* scan downward */
1315
1.13G
                        U32 const pos = cur + mlen;
1316
1.13G
                        int const price = basePrice + (int)ZSTD_getMatchPrice(offset, mlen, optStatePtr, optLevel);
1317
1318
1.13G
                        if ((pos > last_pos) || (price < opt[pos].price)) {
1319
156M
                            DEBUGLOG(7, "rPos:%u (ml=%2u) => new better price (%.2f<%.2f)",
1320
156M
                                        pos, mlen, ZSTD_fCost(price), ZSTD_fCost(opt[pos].price));
1321
244M
                            while (last_pos < pos) {
1322
                                /* fill empty positions, for future comparisons */
1323
88.5M
                                last_pos++;
1324
88.5M
                                opt[last_pos].price = ZSTD_MAX_PRICE;
1325
88.5M
                                opt[last_pos].litlen = !0;  /* just needs to be != 0, to mean "not an end of match" */
1326
88.5M
                            }
1327
156M
                            opt[pos].mlen = mlen;
1328
156M
                            opt[pos].off = offset;
1329
156M
                            opt[pos].litlen = 0;
1330
156M
                            opt[pos].price = price;
1331
974M
                        } else {
1332
974M
                            DEBUGLOG(7, "rPos:%u (ml=%2u) => new price is worse (%.2f>=%.2f)",
1333
974M
                                        pos, mlen, ZSTD_fCost(price), ZSTD_fCost(opt[pos].price));
1334
974M
                            if (optLevel==0) break;  /* early update abort; gets ~+10% speed for about -0.01 ratio loss */
1335
974M
                        }
1336
1.13G
            }   }   }
1337
79.1M
            opt[last_pos+1].price = ZSTD_MAX_PRICE;
1338
79.1M
        }  /* for (cur = 1; cur <= last_pos; cur++) */
1339
1340
5.69M
        lastStretch = opt[last_pos];
1341
5.69M
        assert(cur >= lastStretch.mlen);
1342
5.69M
        cur = last_pos - lastStretch.mlen;
1343
1344
5.91M
_shortestPath:   /* cur, last_pos, best_mlen, best_off have to be set */
1345
5.91M
        assert(opt[0].mlen == 0);
1346
5.91M
        assert(last_pos >= lastStretch.mlen);
1347
5.91M
        assert(cur == last_pos - lastStretch.mlen);
1348
1349
5.91M
        if (lastStretch.mlen==0) {
1350
            /* no solution : all matches have been converted into literals */
1351
452k
            assert(lastStretch.litlen == (ip - anchor) + last_pos);
1352
452k
            ip += last_pos;
1353
452k
            continue;
1354
452k
        }
1355
5.91M
        assert(lastStretch.off > 0);
1356
1357
        /* Update offset history */
1358
5.46M
        if (lastStretch.litlen == 0) {
1359
            /* finishing on a match : update offset history */
1360
3.87M
            Repcodes_t const reps = ZSTD_newRep(opt[cur].rep, lastStretch.off, opt[cur].litlen==0);
1361
3.87M
            ZSTD_memcpy(rep, &reps, sizeof(Repcodes_t));
1362
3.87M
        } else {
1363
1.58M
            ZSTD_memcpy(rep, lastStretch.rep, sizeof(Repcodes_t));
1364
1.58M
            assert(cur >= lastStretch.litlen);
1365
1.58M
            cur -= lastStretch.litlen;
1366
1.58M
        }
1367
1368
        /* Let's write the shortest path solution.
1369
         * It is stored in @opt in reverse order,
1370
         * starting from @storeEnd (==cur+2),
1371
         * effectively partially @opt overwriting.
1372
         * Content is changed too:
1373
         * - So far, @opt stored stretches, aka a match followed by literals
1374
         * - Now, it will store sequences, aka literals followed by a match
1375
         */
1376
5.46M
        {   U32 const storeEnd = cur + 2;
1377
5.46M
            U32 storeStart = storeEnd;
1378
5.46M
            U32 stretchPos = cur;
1379
1380
5.46M
            DEBUGLOG(6, "start reverse traversal (last_pos:%u, cur:%u)",
1381
5.46M
                        last_pos, cur); (void)last_pos;
1382
5.46M
            assert(storeEnd < ZSTD_OPT_SIZE);
1383
5.46M
            DEBUGLOG(6, "last stretch copied into pos=%u (llen=%u,mlen=%u,ofc=%u)",
1384
5.46M
                        storeEnd, lastStretch.litlen, lastStretch.mlen, lastStretch.off);
1385
5.46M
            opt[storeEnd] = lastStretch;  /* note: litlen will be fixed */
1386
5.46M
            storeStart = storeEnd;
1387
13.2M
            while (1) {
1388
13.2M
                ZSTD_optimal_t nextStretch = opt[stretchPos];
1389
13.2M
                opt[storeStart].litlen = nextStretch.litlen;
1390
13.2M
                DEBUGLOG(6, "selected sequence (llen=%u,mlen=%u,ofc=%u)",
1391
13.2M
                            opt[storeStart].litlen, opt[storeStart].mlen, opt[storeStart].off);
1392
13.2M
                if (nextStretch.mlen == 0) {
1393
                    /* reaching beginning of segment */
1394
5.46M
                    break;
1395
5.46M
                }
1396
7.75M
                storeStart--;
1397
7.75M
                opt[storeStart] = nextStretch; /* note: litlen will be fixed */
1398
7.75M
                assert(nextStretch.litlen + nextStretch.mlen <= stretchPos);
1399
7.75M
                stretchPos -= nextStretch.litlen + nextStretch.mlen;
1400
7.75M
            }
1401
1402
            /* save sequences */
1403
5.46M
            DEBUGLOG(6, "sending selected sequences into seqStore");
1404
5.46M
            {   U32 storePos;
1405
18.6M
                for (storePos=storeStart; storePos <= storeEnd; storePos++) {
1406
13.2M
                    U32 const llen = opt[storePos].litlen;
1407
13.2M
                    U32 const mlen = opt[storePos].mlen;
1408
13.2M
                    U32 const offBase = opt[storePos].off;
1409
13.2M
                    U32 const advance = llen + mlen;
1410
13.2M
                    DEBUGLOG(6, "considering seq starting at %i, llen=%u, mlen=%u",
1411
13.2M
                                (int)(anchor - istart), (unsigned)llen, (unsigned)mlen);
1412
1413
13.2M
                    if (mlen==0) {  /* only literals => must be last "sequence", actually starting a new stream of sequences */
1414
0
                        assert(storePos == storeEnd);   /* must be last sequence */
1415
0
                        ip = anchor + llen;     /* last "sequence" is a bunch of literals => don't progress anchor */
1416
0
                        continue;   /* will finish */
1417
0
                    }
1418
1419
13.2M
                    assert(anchor + llen <= iend);
1420
13.2M
                    ZSTD_updateStats(optStatePtr, llen, anchor, offBase, mlen);
1421
13.2M
                    ZSTD_storeSeq(seqStore, llen, anchor, iend, offBase, mlen);
1422
13.2M
                    anchor += advance;
1423
13.2M
                    ip = anchor;
1424
13.2M
            }   }
1425
5.46M
            DEBUGLOG(7, "new offset history : %u, %u, %u", rep[0], rep[1], rep[2]);
1426
1427
            /* update all costs */
1428
5.46M
            ZSTD_setBasePrices(optStatePtr, optLevel);
1429
5.46M
        }
1430
5.46M
    }   /* while (ip < ilimit) */
1431
1432
    /* Return the last literals size */
1433
7.85k
    return (size_t)(iend - anchor);
1434
7.85k
}
1435
#endif /* build exclusions */
1436
1437
#ifndef ZSTD_EXCLUDE_BTOPT_BLOCK_COMPRESSOR
1438
static size_t ZSTD_compressBlock_opt0(
1439
        ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1440
        const void* src, size_t srcSize, const ZSTD_dictMode_e dictMode)
1441
2.86k
{
1442
2.86k
    return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 0 /* optLevel */, dictMode);
1443
2.86k
}
1444
#endif
1445
1446
#ifndef ZSTD_EXCLUDE_BTULTRA_BLOCK_COMPRESSOR
1447
static size_t ZSTD_compressBlock_opt2(
1448
        ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1449
        const void* src, size_t srcSize, const ZSTD_dictMode_e dictMode)
1450
4.99k
{
1451
4.99k
    return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 2 /* optLevel */, dictMode);
1452
4.99k
}
1453
#endif
1454
1455
#ifndef ZSTD_EXCLUDE_BTOPT_BLOCK_COMPRESSOR
1456
size_t ZSTD_compressBlock_btopt(
1457
        ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1458
        const void* src, size_t srcSize)
1459
2.86k
{
1460
2.86k
    DEBUGLOG(5, "ZSTD_compressBlock_btopt");
1461
2.86k
    return ZSTD_compressBlock_opt0(ms, seqStore, rep, src, srcSize, ZSTD_noDict);
1462
2.86k
}
1463
#endif
1464
1465
1466
1467
1468
#ifndef ZSTD_EXCLUDE_BTULTRA_BLOCK_COMPRESSOR
1469
/* ZSTD_initStats_ultra():
1470
 * make a first compression pass, just to seed stats with more accurate starting values.
1471
 * only works on first block, with no dictionary and no ldm.
1472
 * this function cannot error out, its narrow contract must be respected.
1473
 */
1474
static
1475
ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
1476
void ZSTD_initStats_ultra(ZSTD_MatchState_t* ms,
1477
                          SeqStore_t* seqStore,
1478
                          U32 rep[ZSTD_REP_NUM],
1479
                    const void* src, size_t srcSize)
1480
1.91k
{
1481
1.91k
    U32 tmpRep[ZSTD_REP_NUM];  /* updated rep codes will sink here */
1482
1.91k
    ZSTD_memcpy(tmpRep, rep, sizeof(tmpRep));
1483
1484
1.91k
    DEBUGLOG(4, "ZSTD_initStats_ultra (srcSize=%zu)", srcSize);
1485
1.91k
    assert(ms->opt.litLengthSum == 0);    /* first block */
1486
1.91k
    assert(seqStore->sequences == seqStore->sequencesStart);   /* no ldm */
1487
1.91k
    assert(ms->window.dictLimit == ms->window.lowLimit);   /* no dictionary */
1488
1.91k
    assert(ms->window.dictLimit - ms->nextToUpdate <= 1);  /* no prefix (note: intentional overflow, defined as 2-complement) */
1489
1490
1.91k
    ZSTD_compressBlock_opt2(ms, seqStore, tmpRep, src, srcSize, ZSTD_noDict);   /* generate stats into ms->opt*/
1491
1492
    /* invalidate first scan from history, only keep entropy stats */
1493
1.91k
    ZSTD_resetSeqStore(seqStore);
1494
1.91k
    ms->window.base -= srcSize;
1495
1.91k
    ms->window.dictLimit += (U32)srcSize;
1496
1.91k
    ms->window.lowLimit = ms->window.dictLimit;
1497
1.91k
    ms->nextToUpdate = ms->window.dictLimit;
1498
1499
1.91k
}
1500
1501
size_t ZSTD_compressBlock_btultra(
1502
        ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1503
        const void* src, size_t srcSize)
1504
520
{
1505
520
    DEBUGLOG(5, "ZSTD_compressBlock_btultra (srcSize=%zu)", srcSize);
1506
520
    return ZSTD_compressBlock_opt2(ms, seqStore, rep, src, srcSize, ZSTD_noDict);
1507
520
}
1508
1509
size_t ZSTD_compressBlock_btultra2(
1510
        ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1511
        const void* src, size_t srcSize)
1512
2.56k
{
1513
2.56k
    U32 const curr = (U32)((const BYTE*)src - ms->window.base);
1514
2.56k
    DEBUGLOG(5, "ZSTD_compressBlock_btultra2 (srcSize=%zu)", srcSize);
1515
1516
    /* 2-passes strategy:
1517
     * this strategy makes a first pass over first block to collect statistics
1518
     * in order to seed next round's statistics with it.
1519
     * After 1st pass, function forgets history, and starts a new block.
1520
     * Consequently, this can only work if no data has been previously loaded in tables,
1521
     * aka, no dictionary, no prefix, no ldm preprocessing.
1522
     * The compression ratio gain is generally small (~0.5% on first block),
1523
     * the cost is 2x cpu time on first block. */
1524
2.56k
    assert(srcSize <= ZSTD_BLOCKSIZE_MAX);
1525
2.56k
    if ( (ms->opt.litLengthSum==0)   /* first block */
1526
1.95k
      && (seqStore->sequences == seqStore->sequencesStart)  /* no ldm */
1527
1.95k
      && (ms->window.dictLimit == ms->window.lowLimit)   /* no dictionary */
1528
1.95k
      && (curr == ms->window.dictLimit)    /* start of frame, nothing already loaded nor skipped */
1529
1.95k
      && (srcSize > ZSTD_PREDEF_THRESHOLD) /* input large enough to not employ default stats */
1530
2.56k
      ) {
1531
1.91k
        ZSTD_initStats_ultra(ms, seqStore, rep, src, srcSize);
1532
1.91k
    }
1533
1534
2.56k
    return ZSTD_compressBlock_opt2(ms, seqStore, rep, src, srcSize, ZSTD_noDict);
1535
2.56k
}
1536
#endif
1537
1538
#ifndef ZSTD_EXCLUDE_BTOPT_BLOCK_COMPRESSOR
1539
size_t ZSTD_compressBlock_btopt_dictMatchState(
1540
        ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1541
        const void* src, size_t srcSize)
1542
0
{
1543
0
    return ZSTD_compressBlock_opt0(ms, seqStore, rep, src, srcSize, ZSTD_dictMatchState);
1544
0
}
1545
1546
size_t ZSTD_compressBlock_btopt_extDict(
1547
        ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1548
        const void* src, size_t srcSize)
1549
0
{
1550
0
    return ZSTD_compressBlock_opt0(ms, seqStore, rep, src, srcSize, ZSTD_extDict);
1551
0
}
1552
#endif
1553
1554
#ifndef ZSTD_EXCLUDE_BTULTRA_BLOCK_COMPRESSOR
1555
size_t ZSTD_compressBlock_btultra_dictMatchState(
1556
        ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1557
        const void* src, size_t srcSize)
1558
0
{
1559
0
    return ZSTD_compressBlock_opt2(ms, seqStore, rep, src, srcSize, ZSTD_dictMatchState);
1560
0
}
1561
1562
size_t ZSTD_compressBlock_btultra_extDict(
1563
        ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1564
        const void* src, size_t srcSize)
1565
0
{
1566
0
    return ZSTD_compressBlock_opt2(ms, seqStore, rep, src, srcSize, ZSTD_extDict);
1567
0
}
1568
#endif
1569
1570
/* note : no btultra2 variant for extDict nor dictMatchState,
1571
 * because btultra2 is not meant to work with dictionaries
1572
 * and is only specific for the first block (no prefix) */