Coverage Report

Created: 2025-12-11 06:06

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/c-blosc/internal-complibs/zstd-1.5.6/compress/zstd_opt.c
Line
Count
Source
1
/*
2
 * Copyright (c) Meta Platforms, Inc. and affiliates.
3
 * All rights reserved.
4
 *
5
 * This source code is licensed under both the BSD-style license (found in the
6
 * LICENSE file in the root directory of this source tree) and the GPLv2 (found
7
 * in the COPYING file in the root directory of this source tree).
8
 * You may select, at your option, one of the above-listed licenses.
9
 */
10
11
#include "zstd_compress_internal.h"
12
#include "hist.h"
13
#include "zstd_opt.h"
14
15
#if !defined(ZSTD_EXCLUDE_BTLAZY2_BLOCK_COMPRESSOR) \
16
 || !defined(ZSTD_EXCLUDE_BTOPT_BLOCK_COMPRESSOR) \
17
 || !defined(ZSTD_EXCLUDE_BTULTRA_BLOCK_COMPRESSOR)
18
19
131M
#define ZSTD_LITFREQ_ADD    2   /* scaling factor for litFreq, so that frequencies adapt faster to new stats */
20
98.6M
#define ZSTD_MAX_PRICE     (1<<30)
21
22
65.3k
#define ZSTD_PREDEF_THRESHOLD 8   /* if srcSize < ZSTD_PREDEF_THRESHOLD, symbols' cost is assumed static, directly determined by pre-defined distributions */
23
24
25
/*-*************************************
26
*  Price functions for optimal parser
27
***************************************/
28
29
#if 0    /* approximation at bit level (for tests) */
30
#  define BITCOST_ACCURACY 0
31
#  define BITCOST_MULTIPLIER (1 << BITCOST_ACCURACY)
32
#  define WEIGHT(stat, opt) ((void)(opt), ZSTD_bitWeight(stat))
33
#elif 0  /* fractional bit accuracy (for tests) */
34
#  define BITCOST_ACCURACY 8
35
#  define BITCOST_MULTIPLIER (1 << BITCOST_ACCURACY)
36
#  define WEIGHT(stat,opt) ((void)(opt), ZSTD_fracWeight(stat))
37
#else    /* opt==approx, ultra==accurate */
38
56.7G
#  define BITCOST_ACCURACY 8
39
40.5G
#  define BITCOST_MULTIPLIER (1 << BITCOST_ACCURACY)
40
16.2G
#  define WEIGHT(stat,opt) ((opt) ? ZSTD_fracWeight(stat) : ZSTD_bitWeight(stat))
41
#endif
42
43
/* ZSTD_bitWeight() :
44
 * provide estimated "cost" of a stat in full bits only */
45
MEM_STATIC U32 ZSTD_bitWeight(U32 stat)
46
45.2M
{
47
45.2M
    return (ZSTD_highbit32(stat+1) * BITCOST_MULTIPLIER);
48
45.2M
}
49
50
/* ZSTD_fracWeight() :
51
 * provide fractional-bit "cost" of a stat,
52
 * using linear interpolation approximation */
53
MEM_STATIC U32 ZSTD_fracWeight(U32 rawStat)
54
16.2G
{
55
16.2G
    U32 const stat = rawStat + 1;
56
16.2G
    U32 const hb = ZSTD_highbit32(stat);
57
16.2G
    U32 const BWeight = hb * BITCOST_MULTIPLIER;
58
    /* Fweight was meant for "Fractional weight"
59
     * but it's effectively a value between 1 and 2
60
     * using fixed point arithmetic */
61
16.2G
    U32 const FWeight = (stat << BITCOST_ACCURACY) >> hb;
62
16.2G
    U32 const weight = BWeight + FWeight;
63
16.2G
    assert(hb + BITCOST_ACCURACY < 31);
64
16.2G
    return weight;
65
16.2G
}
66
67
#if (DEBUGLEVEL>=2)
68
/* debugging function,
69
 * @return price in bytes as fractional value
70
 * for debug messages only */
71
MEM_STATIC double ZSTD_fCost(int price)
72
{
73
    return (double)price / (BITCOST_MULTIPLIER*8);
74
}
75
#endif
76
77
static int ZSTD_compressedLiterals(optState_t const* const optPtr)
78
87.5M
{
79
87.5M
    return optPtr->literalCompressionMode != ZSTD_ps_disable;
80
87.5M
}
81
82
static void ZSTD_setBasePrices(optState_t* optPtr, int optLevel)
83
2.94M
{
84
2.94M
    if (ZSTD_compressedLiterals(optPtr))
85
2.94M
        optPtr->litSumBasePrice = WEIGHT(optPtr->litSum, optLevel);
86
2.94M
    optPtr->litLengthSumBasePrice = WEIGHT(optPtr->litLengthSum, optLevel);
87
2.94M
    optPtr->matchLengthSumBasePrice = WEIGHT(optPtr->matchLengthSum, optLevel);
88
2.94M
    optPtr->offCodeSumBasePrice = WEIGHT(optPtr->offCodeSum, optLevel);
89
2.94M
}
90
91
92
static U32 sum_u32(const unsigned table[], size_t nbElts)
93
180k
{
94
180k
    size_t n;
95
180k
    U32 total = 0;
96
12.2M
    for (n=0; n<nbElts; n++) {
97
12.0M
        total += table[n];
98
12.0M
    }
99
180k
    return total;
100
180k
}
101
102
typedef enum { base_0possible=0, base_1guaranteed=1 } base_directive_e;
103
104
static U32
105
ZSTD_downscaleStats(unsigned* table, U32 lastEltIndex, U32 shift, base_directive_e base1)
106
43.4k
{
107
43.4k
    U32 s, sum=0;
108
43.4k
    DEBUGLOG(5, "ZSTD_downscaleStats (nbElts=%u, shift=%u)",
109
43.4k
            (unsigned)lastEltIndex+1, (unsigned)shift );
110
43.4k
    assert(shift < 30);
111
11.1M
    for (s=0; s<lastEltIndex+1; s++) {
112
11.0M
        unsigned const base = base1 ? 1 : (table[s]>0);
113
11.0M
        unsigned const newStat = base + (table[s] >> shift);
114
11.0M
        sum += newStat;
115
11.0M
        table[s] = newStat;
116
11.0M
    }
117
43.4k
    return sum;
118
43.4k
}
119
120
/* ZSTD_scaleStats() :
121
 * reduce all elt frequencies in table if sum too large
122
 * return the resulting sum of elements */
123
static U32 ZSTD_scaleStats(unsigned* table, U32 lastEltIndex, U32 logTarget)
124
98.7k
{
125
98.7k
    U32 const prevsum = sum_u32(table, lastEltIndex+1);
126
98.7k
    U32 const factor = prevsum >> logTarget;
127
98.7k
    DEBUGLOG(5, "ZSTD_scaleStats (nbElts=%u, target=%u)", (unsigned)lastEltIndex+1, (unsigned)logTarget);
128
98.7k
    assert(logTarget < 30);
129
98.7k
    if (factor <= 1) return prevsum;
130
2.83k
    return ZSTD_downscaleStats(table, lastEltIndex, ZSTD_highbit32(factor), base_1guaranteed);
131
98.7k
}
132
133
/* ZSTD_rescaleFreqs() :
134
 * if first block (detected by optPtr->litLengthSum == 0) : init statistics
135
 *    take hints from dictionary if there is one
136
 *    and init from zero if there is none,
137
 *    using src for literals stats, and baseline stats for sequence symbols
138
 * otherwise downscale existing stats, to be used as seed for next block.
139
 */
140
static void
141
ZSTD_rescaleFreqs(optState_t* const optPtr,
142
            const BYTE* const src, size_t const srcSize,
143
                  int const optLevel)
144
65.3k
{
145
65.3k
    int const compressedLiterals = ZSTD_compressedLiterals(optPtr);
146
65.3k
    DEBUGLOG(5, "ZSTD_rescaleFreqs (srcSize=%u)", (unsigned)srcSize);
147
65.3k
    optPtr->priceType = zop_dynamic;
148
149
65.3k
    if (optPtr->litLengthSum == 0) {  /* no literals stats collected -> first block assumed -> init */
150
151
        /* heuristic: use pre-defined stats for too small inputs */
152
40.6k
        if (srcSize <= ZSTD_PREDEF_THRESHOLD) {
153
0
            DEBUGLOG(5, "srcSize <= %i : use predefined stats", ZSTD_PREDEF_THRESHOLD);
154
0
            optPtr->priceType = zop_predef;
155
0
        }
156
157
40.6k
        assert(optPtr->symbolCosts != NULL);
158
40.6k
        if (optPtr->symbolCosts->huf.repeatMode == HUF_repeat_valid) {
159
160
            /* huffman stats covering the full value set : table presumed generated by dictionary */
161
0
            optPtr->priceType = zop_dynamic;
162
163
0
            if (compressedLiterals) {
164
                /* generate literals statistics from huffman table */
165
0
                unsigned lit;
166
0
                assert(optPtr->litFreq != NULL);
167
0
                optPtr->litSum = 0;
168
0
                for (lit=0; lit<=MaxLit; lit++) {
169
0
                    U32 const scaleLog = 11;   /* scale to 2K */
170
0
                    U32 const bitCost = HUF_getNbBitsFromCTable(optPtr->symbolCosts->huf.CTable, lit);
171
0
                    assert(bitCost <= scaleLog);
172
0
                    optPtr->litFreq[lit] = bitCost ? 1 << (scaleLog-bitCost) : 1 /*minimum to calculate cost*/;
173
0
                    optPtr->litSum += optPtr->litFreq[lit];
174
0
            }   }
175
176
0
            {   unsigned ll;
177
0
                FSE_CState_t llstate;
178
0
                FSE_initCState(&llstate, optPtr->symbolCosts->fse.litlengthCTable);
179
0
                optPtr->litLengthSum = 0;
180
0
                for (ll=0; ll<=MaxLL; ll++) {
181
0
                    U32 const scaleLog = 10;   /* scale to 1K */
182
0
                    U32 const bitCost = FSE_getMaxNbBits(llstate.symbolTT, ll);
183
0
                    assert(bitCost < scaleLog);
184
0
                    optPtr->litLengthFreq[ll] = bitCost ? 1 << (scaleLog-bitCost) : 1 /*minimum to calculate cost*/;
185
0
                    optPtr->litLengthSum += optPtr->litLengthFreq[ll];
186
0
            }   }
187
188
0
            {   unsigned ml;
189
0
                FSE_CState_t mlstate;
190
0
                FSE_initCState(&mlstate, optPtr->symbolCosts->fse.matchlengthCTable);
191
0
                optPtr->matchLengthSum = 0;
192
0
                for (ml=0; ml<=MaxML; ml++) {
193
0
                    U32 const scaleLog = 10;
194
0
                    U32 const bitCost = FSE_getMaxNbBits(mlstate.symbolTT, ml);
195
0
                    assert(bitCost < scaleLog);
196
0
                    optPtr->matchLengthFreq[ml] = bitCost ? 1 << (scaleLog-bitCost) : 1 /*minimum to calculate cost*/;
197
0
                    optPtr->matchLengthSum += optPtr->matchLengthFreq[ml];
198
0
            }   }
199
200
0
            {   unsigned of;
201
0
                FSE_CState_t ofstate;
202
0
                FSE_initCState(&ofstate, optPtr->symbolCosts->fse.offcodeCTable);
203
0
                optPtr->offCodeSum = 0;
204
0
                for (of=0; of<=MaxOff; of++) {
205
0
                    U32 const scaleLog = 10;
206
0
                    U32 const bitCost = FSE_getMaxNbBits(ofstate.symbolTT, of);
207
0
                    assert(bitCost < scaleLog);
208
0
                    optPtr->offCodeFreq[of] = bitCost ? 1 << (scaleLog-bitCost) : 1 /*minimum to calculate cost*/;
209
0
                    optPtr->offCodeSum += optPtr->offCodeFreq[of];
210
0
            }   }
211
212
40.6k
        } else {  /* first block, no dictionary */
213
214
40.6k
            assert(optPtr->litFreq != NULL);
215
40.6k
            if (compressedLiterals) {
216
                /* base initial cost of literals on direct frequency within src */
217
40.6k
                unsigned lit = MaxLit;
218
40.6k
                HIST_count_simple(optPtr->litFreq, &lit, src, srcSize);   /* use raw first block to init statistics */
219
40.6k
                optPtr->litSum = ZSTD_downscaleStats(optPtr->litFreq, MaxLit, 8, base_0possible);
220
40.6k
            }
221
222
40.6k
            {   unsigned const baseLLfreqs[MaxLL+1] = {
223
40.6k
                    4, 2, 1, 1, 1, 1, 1, 1,
224
40.6k
                    1, 1, 1, 1, 1, 1, 1, 1,
225
40.6k
                    1, 1, 1, 1, 1, 1, 1, 1,
226
40.6k
                    1, 1, 1, 1, 1, 1, 1, 1,
227
40.6k
                    1, 1, 1, 1
228
40.6k
                };
229
40.6k
                ZSTD_memcpy(optPtr->litLengthFreq, baseLLfreqs, sizeof(baseLLfreqs));
230
40.6k
                optPtr->litLengthSum = sum_u32(baseLLfreqs, MaxLL+1);
231
40.6k
            }
232
233
40.6k
            {   unsigned ml;
234
2.19M
                for (ml=0; ml<=MaxML; ml++)
235
2.15M
                    optPtr->matchLengthFreq[ml] = 1;
236
40.6k
            }
237
40.6k
            optPtr->matchLengthSum = MaxML+1;
238
239
40.6k
            {   unsigned const baseOFCfreqs[MaxOff+1] = {
240
40.6k
                    6, 2, 1, 1, 2, 3, 4, 4,
241
40.6k
                    4, 3, 2, 1, 1, 1, 1, 1,
242
40.6k
                    1, 1, 1, 1, 1, 1, 1, 1,
243
40.6k
                    1, 1, 1, 1, 1, 1, 1, 1
244
40.6k
                };
245
40.6k
                ZSTD_memcpy(optPtr->offCodeFreq, baseOFCfreqs, sizeof(baseOFCfreqs));
246
40.6k
                optPtr->offCodeSum = sum_u32(baseOFCfreqs, MaxOff+1);
247
40.6k
            }
248
249
40.6k
        }
250
251
40.6k
    } else {   /* new block : scale down accumulated statistics */
252
253
24.6k
        if (compressedLiterals)
254
24.6k
            optPtr->litSum = ZSTD_scaleStats(optPtr->litFreq, MaxLit, 12);
255
24.6k
        optPtr->litLengthSum = ZSTD_scaleStats(optPtr->litLengthFreq, MaxLL, 11);
256
24.6k
        optPtr->matchLengthSum = ZSTD_scaleStats(optPtr->matchLengthFreq, MaxML, 11);
257
24.6k
        optPtr->offCodeSum = ZSTD_scaleStats(optPtr->offCodeFreq, MaxOff, 11);
258
24.6k
    }
259
260
65.3k
    ZSTD_setBasePrices(optPtr, optLevel);
261
65.3k
}
262
263
/* ZSTD_rawLiteralsCost() :
264
 * price of literals (only) in specified segment (which length can be 0).
265
 * does not include price of literalLength symbol */
266
static U32 ZSTD_rawLiteralsCost(const BYTE* const literals, U32 const litLength,
267
                                const optState_t* const optPtr,
268
                                int optLevel)
269
79.9M
{
270
79.9M
    DEBUGLOG(8, "ZSTD_rawLiteralsCost (%u literals)", litLength);
271
79.9M
    if (litLength == 0) return 0;
272
273
79.9M
    if (!ZSTD_compressedLiterals(optPtr))
274
0
        return (litLength << 3) * BITCOST_MULTIPLIER;  /* Uncompressed - 8 bytes per literal. */
275
276
79.9M
    if (optPtr->priceType == zop_predef)
277
0
        return (litLength*6) * BITCOST_MULTIPLIER;  /* 6 bit per literal - no statistic used */
278
279
    /* dynamic statistics */
280
79.9M
    {   U32 price = optPtr->litSumBasePrice * litLength;
281
79.9M
        U32 const litPriceMax = optPtr->litSumBasePrice - BITCOST_MULTIPLIER;
282
79.9M
        U32 u;
283
79.9M
        assert(optPtr->litSumBasePrice >= BITCOST_MULTIPLIER);
284
159M
        for (u=0; u < litLength; u++) {
285
79.9M
            U32 litPrice = WEIGHT(optPtr->litFreq[literals[u]], optLevel);
286
79.9M
            if (UNLIKELY(litPrice > litPriceMax)) litPrice = litPriceMax;
287
79.9M
            price -= litPrice;
288
79.9M
        }
289
79.9M
        return price;
290
79.9M
    }
291
79.9M
}
292
293
/* ZSTD_litLengthPrice() :
294
 * cost of literalLength symbol */
295
static U32 ZSTD_litLengthPrice(U32 const litLength, const optState_t* const optPtr, int optLevel)
296
280M
{
297
280M
    assert(litLength <= ZSTD_BLOCKSIZE_MAX);
298
280M
    if (optPtr->priceType == zop_predef)
299
0
        return WEIGHT(litLength, optLevel);
300
301
    /* ZSTD_LLcode() can't compute litLength price for sizes >= ZSTD_BLOCKSIZE_MAX
302
     * because it isn't representable in the zstd format.
303
     * So instead just pretend it would cost 1 bit more than ZSTD_BLOCKSIZE_MAX - 1.
304
     * In such a case, the block would be all literals.
305
     */
306
280M
    if (litLength == ZSTD_BLOCKSIZE_MAX)
307
0
        return BITCOST_MULTIPLIER + ZSTD_litLengthPrice(ZSTD_BLOCKSIZE_MAX - 1, optPtr, optLevel);
308
309
    /* dynamic statistics */
310
280M
    {   U32 const llCode = ZSTD_LLcode(litLength);
311
280M
        return (LL_bits[llCode] * BITCOST_MULTIPLIER)
312
280M
             + optPtr->litLengthSumBasePrice
313
280M
             - WEIGHT(optPtr->litLengthFreq[llCode], optLevel);
314
280M
    }
315
280M
}
316
317
/* ZSTD_getMatchPrice() :
318
 * Provides the cost of the match part (offset + matchLength) of a sequence.
319
 * Must be combined with ZSTD_fullLiteralsCost() to get the full cost of a sequence.
320
 * @offBase : sumtype, representing an offset or a repcode, and using numeric representation of ZSTD_storeSeq()
321
 * @optLevel: when <2, favors small offset for decompression speed (improved cache efficiency)
322
 */
323
FORCE_INLINE_TEMPLATE U32
324
ZSTD_getMatchPrice(U32 const offBase,
325
                   U32 const matchLength,
326
             const optState_t* const optPtr,
327
                   int const optLevel)
328
7.95G
{
329
7.95G
    U32 price;
330
7.95G
    U32 const offCode = ZSTD_highbit32(offBase);
331
7.95G
    U32 const mlBase = matchLength - MINMATCH;
332
7.95G
    assert(matchLength >= MINMATCH);
333
334
7.95G
    if (optPtr->priceType == zop_predef)  /* fixed scheme, does not use statistics */
335
0
        return WEIGHT(mlBase, optLevel)
336
0
             + ((16 + offCode) * BITCOST_MULTIPLIER); /* emulated offset cost */
337
338
    /* dynamic statistics */
339
7.95G
    price = (offCode * BITCOST_MULTIPLIER) + (optPtr->offCodeSumBasePrice - WEIGHT(optPtr->offCodeFreq[offCode], optLevel));
340
7.95G
    if ((optLevel<2) /*static*/ && offCode >= 20)
341
0
        price += (offCode-19)*2 * BITCOST_MULTIPLIER; /* handicap for long distance offsets, favor decompression speed */
342
343
    /* match Length */
344
7.95G
    {   U32 const mlCode = ZSTD_MLcode(mlBase);
345
7.95G
        price += (ML_bits[mlCode] * BITCOST_MULTIPLIER) + (optPtr->matchLengthSumBasePrice - WEIGHT(optPtr->matchLengthFreq[mlCode], optLevel));
346
7.95G
    }
347
348
7.95G
    price += BITCOST_MULTIPLIER / 5;   /* heuristic : make matches a bit more costly to favor less sequences -> faster decompression speed */
349
350
7.95G
    DEBUGLOG(8, "ZSTD_getMatchPrice(ml:%u) = %u", matchLength, price);
351
7.95G
    return price;
352
7.95G
}
353
354
/* ZSTD_updateStats() :
355
 * assumption : literals + litLength <= iend */
356
static void ZSTD_updateStats(optState_t* const optPtr,
357
                             U32 litLength, const BYTE* literals,
358
                             U32 offBase, U32 matchLength)
359
4.67M
{
360
    /* literals */
361
4.67M
    if (ZSTD_compressedLiterals(optPtr)) {
362
4.67M
        U32 u;
363
131M
        for (u=0; u < litLength; u++)
364
127M
            optPtr->litFreq[literals[u]] += ZSTD_LITFREQ_ADD;
365
4.67M
        optPtr->litSum += litLength*ZSTD_LITFREQ_ADD;
366
4.67M
    }
367
368
    /* literal Length */
369
4.67M
    {   U32 const llCode = ZSTD_LLcode(litLength);
370
4.67M
        optPtr->litLengthFreq[llCode]++;
371
4.67M
        optPtr->litLengthSum++;
372
4.67M
    }
373
374
    /* offset code : follows storeSeq() numeric representation */
375
4.67M
    {   U32 const offCode = ZSTD_highbit32(offBase);
376
4.67M
        assert(offCode <= MaxOff);
377
4.67M
        optPtr->offCodeFreq[offCode]++;
378
4.67M
        optPtr->offCodeSum++;
379
4.67M
    }
380
381
    /* match Length */
382
4.67M
    {   U32 const mlBase = matchLength - MINMATCH;
383
4.67M
        U32 const mlCode = ZSTD_MLcode(mlBase);
384
4.67M
        optPtr->matchLengthFreq[mlCode]++;
385
4.67M
        optPtr->matchLengthSum++;
386
4.67M
    }
387
4.67M
}
388
389
390
/* ZSTD_readMINMATCH() :
391
 * function safe only for comparisons
392
 * assumption : memPtr must be at least 4 bytes before end of buffer */
393
MEM_STATIC U32 ZSTD_readMINMATCH(const void* memPtr, U32 length)
394
1.20G
{
395
1.20G
    switch (length)
396
1.20G
    {
397
0
    default :
398
112M
    case 4 : return MEM_read32(memPtr);
399
1.09G
    case 3 : if (MEM_isLittleEndian())
400
1.09G
                return MEM_read32(memPtr)<<8;
401
0
             else
402
0
                return MEM_read32(memPtr)>>8;
403
1.20G
    }
404
1.20G
}
405
406
407
/* Update hashTable3 up to ip (excluded)
408
   Assumption : always within prefix (i.e. not within extDict) */
409
static
410
ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
411
U32 ZSTD_insertAndFindFirstIndexHash3 (const ZSTD_matchState_t* ms,
412
                                       U32* nextToUpdate3,
413
                                       const BYTE* const ip)
414
156M
{
415
156M
    U32* const hashTable3 = ms->hashTable3;
416
156M
    U32 const hashLog3 = ms->hashLog3;
417
156M
    const BYTE* const base = ms->window.base;
418
156M
    U32 idx = *nextToUpdate3;
419
156M
    U32 const target = (U32)(ip - base);
420
156M
    size_t const hash3 = ZSTD_hash3Ptr(ip, hashLog3);
421
156M
    assert(hashLog3 > 0);
422
423
365M
    while(idx < target) {
424
209M
        hashTable3[ZSTD_hash3Ptr(base+idx, hashLog3)] = idx;
425
209M
        idx++;
426
209M
    }
427
428
156M
    *nextToUpdate3 = target;
429
156M
    return hashTable3[hash3];
430
156M
}
431
432
433
/*-*************************************
434
*  Binary Tree search
435
***************************************/
436
/** ZSTD_insertBt1() : add one or multiple positions to tree.
437
 * @param ip assumed <= iend-8 .
438
 * @param target The target of ZSTD_updateTree_internal() - we are filling to this position
439
 * @return : nb of positions added */
440
static
441
ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
442
U32 ZSTD_insertBt1(
443
                const ZSTD_matchState_t* ms,
444
                const BYTE* const ip, const BYTE* const iend,
445
                U32 const target,
446
                U32 const mls, const int extDict)
447
10.2M
{
448
10.2M
    const ZSTD_compressionParameters* const cParams = &ms->cParams;
449
10.2M
    U32*   const hashTable = ms->hashTable;
450
10.2M
    U32    const hashLog = cParams->hashLog;
451
10.2M
    size_t const h  = ZSTD_hashPtr(ip, hashLog, mls);
452
10.2M
    U32*   const bt = ms->chainTable;
453
10.2M
    U32    const btLog  = cParams->chainLog - 1;
454
10.2M
    U32    const btMask = (1 << btLog) - 1;
455
10.2M
    U32 matchIndex = hashTable[h];
456
10.2M
    size_t commonLengthSmaller=0, commonLengthLarger=0;
457
10.2M
    const BYTE* const base = ms->window.base;
458
10.2M
    const BYTE* const dictBase = ms->window.dictBase;
459
10.2M
    const U32 dictLimit = ms->window.dictLimit;
460
10.2M
    const BYTE* const dictEnd = dictBase + dictLimit;
461
10.2M
    const BYTE* const prefixStart = base + dictLimit;
462
10.2M
    const BYTE* match;
463
10.2M
    const U32 curr = (U32)(ip-base);
464
10.2M
    const U32 btLow = btMask >= curr ? 0 : curr - btMask;
465
10.2M
    U32* smallerPtr = bt + 2*(curr&btMask);
466
10.2M
    U32* largerPtr  = smallerPtr + 1;
467
10.2M
    U32 dummy32;   /* to be nullified at the end */
468
    /* windowLow is based on target because
469
     * we only need positions that will be in the window at the end of the tree update.
470
     */
471
10.2M
    U32 const windowLow = ZSTD_getLowestMatchIndex(ms, target, cParams->windowLog);
472
10.2M
    U32 matchEndIdx = curr+8+1;
473
10.2M
    size_t bestLength = 8;
474
10.2M
    U32 nbCompares = 1U << cParams->searchLog;
475
#ifdef ZSTD_C_PREDICT
476
    U32 predictedSmall = *(bt + 2*((curr-1)&btMask) + 0);
477
    U32 predictedLarge = *(bt + 2*((curr-1)&btMask) + 1);
478
    predictedSmall += (predictedSmall>0);
479
    predictedLarge += (predictedLarge>0);
480
#endif /* ZSTD_C_PREDICT */
481
482
10.2M
    DEBUGLOG(8, "ZSTD_insertBt1 (%u)", curr);
483
484
10.2M
    assert(curr <= target);
485
10.2M
    assert(ip <= iend-8);   /* required for h calculation */
486
10.2M
    hashTable[h] = curr;   /* Update Hash Table */
487
488
10.2M
    assert(windowLow > 0);
489
56.3M
    for (; nbCompares && (matchIndex >= windowLow); --nbCompares) {
490
46.1M
        U32* const nextPtr = bt + 2*(matchIndex & btMask);
491
46.1M
        size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger);   /* guaranteed minimum nb of common bytes */
492
46.1M
        assert(matchIndex < curr);
493
494
#ifdef ZSTD_C_PREDICT   /* note : can create issues when hlog small <= 11 */
495
        const U32* predictPtr = bt + 2*((matchIndex-1) & btMask);   /* written this way, as bt is a roll buffer */
496
        if (matchIndex == predictedSmall) {
497
            /* no need to check length, result known */
498
            *smallerPtr = matchIndex;
499
            if (matchIndex <= btLow) { smallerPtr=&dummy32; break; }   /* beyond tree size, stop the search */
500
            smallerPtr = nextPtr+1;               /* new "smaller" => larger of match */
501
            matchIndex = nextPtr[1];              /* new matchIndex larger than previous (closer to current) */
502
            predictedSmall = predictPtr[1] + (predictPtr[1]>0);
503
            continue;
504
        }
505
        if (matchIndex == predictedLarge) {
506
            *largerPtr = matchIndex;
507
            if (matchIndex <= btLow) { largerPtr=&dummy32; break; }   /* beyond tree size, stop the search */
508
            largerPtr = nextPtr;
509
            matchIndex = nextPtr[0];
510
            predictedLarge = predictPtr[0] + (predictPtr[0]>0);
511
            continue;
512
        }
513
#endif
514
515
46.1M
        if (!extDict || (matchIndex+matchLength >= dictLimit)) {
516
46.1M
            assert(matchIndex+matchLength >= dictLimit);   /* might be wrong if actually extDict */
517
46.1M
            match = base + matchIndex;
518
46.1M
            matchLength += ZSTD_count(ip+matchLength, match+matchLength, iend);
519
46.1M
        } else {
520
0
            match = dictBase + matchIndex;
521
0
            matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iend, dictEnd, prefixStart);
522
0
            if (matchIndex+matchLength >= dictLimit)
523
0
                match = base + matchIndex;   /* to prepare for next usage of match[matchLength] */
524
0
        }
525
526
46.1M
        if (matchLength > bestLength) {
527
10.7M
            bestLength = matchLength;
528
10.7M
            if (matchLength > matchEndIdx - matchIndex)
529
214k
                matchEndIdx = matchIndex + (U32)matchLength;
530
10.7M
        }
531
532
46.1M
        if (ip+matchLength == iend) {   /* equal : no way to know if inf or sup */
533
71.0k
            break;   /* drop , to guarantee consistency ; miss a bit of compression, but other solutions can corrupt tree */
534
71.0k
        }
535
536
46.1M
        if (match[matchLength] < ip[matchLength]) {  /* necessarily within buffer */
537
            /* match is smaller than current */
538
18.3M
            *smallerPtr = matchIndex;             /* update smaller idx */
539
18.3M
            commonLengthSmaller = matchLength;    /* all smaller will now have at least this guaranteed common length */
540
18.3M
            if (matchIndex <= btLow) { smallerPtr=&dummy32; break; }   /* beyond tree size, stop searching */
541
18.3M
            smallerPtr = nextPtr+1;               /* new "candidate" => larger than match, which was smaller than target */
542
18.3M
            matchIndex = nextPtr[1];              /* new matchIndex, larger than previous and closer to current */
543
27.7M
        } else {
544
            /* match is larger than current */
545
27.7M
            *largerPtr = matchIndex;
546
27.7M
            commonLengthLarger = matchLength;
547
27.7M
            if (matchIndex <= btLow) { largerPtr=&dummy32; break; }   /* beyond tree size, stop searching */
548
27.7M
            largerPtr = nextPtr;
549
27.7M
            matchIndex = nextPtr[0];
550
27.7M
    }   }
551
552
10.2M
    *smallerPtr = *largerPtr = 0;
553
10.2M
    {   U32 positions = 0;
554
10.2M
        if (bestLength > 384) positions = MIN(192, (U32)(bestLength - 384));   /* speed optimization */
555
10.2M
        assert(matchEndIdx > curr + 8);
556
10.2M
        return MAX(positions, matchEndIdx - (curr + 8));
557
10.2M
    }
558
10.2M
}
559
560
FORCE_INLINE_TEMPLATE
561
ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
562
void ZSTD_updateTree_internal(
563
                ZSTD_matchState_t* ms,
564
                const BYTE* const ip, const BYTE* const iend,
565
                const U32 mls, const ZSTD_dictMode_e dictMode)
566
206M
{
567
206M
    const BYTE* const base = ms->window.base;
568
206M
    U32 const target = (U32)(ip - base);
569
206M
    U32 idx = ms->nextToUpdate;
570
206M
    DEBUGLOG(7, "ZSTD_updateTree_internal, from %u to %u  (dictMode:%u)",
571
206M
                idx, target, dictMode);
572
573
216M
    while(idx < target) {
574
10.2M
        U32 const forward = ZSTD_insertBt1(ms, base+idx, iend, target, mls, dictMode == ZSTD_extDict);
575
10.2M
        assert(idx < (U32)(idx + forward));
576
10.2M
        idx += forward;
577
10.2M
    }
578
206M
    assert((size_t)(ip - base) <= (size_t)(U32)(-1));
579
206M
    assert((size_t)(iend - base) <= (size_t)(U32)(-1));
580
206M
    ms->nextToUpdate = target;
581
206M
}
582
583
0
void ZSTD_updateTree(ZSTD_matchState_t* ms, const BYTE* ip, const BYTE* iend) {
584
0
    ZSTD_updateTree_internal(ms, ip, iend, ms->cParams.minMatch, ZSTD_noDict);
585
0
}
586
587
FORCE_INLINE_TEMPLATE
588
ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
589
U32
590
ZSTD_insertBtAndGetAllMatches (
591
                ZSTD_match_t* matches,  /* store result (found matches) in this table (presumed large enough) */
592
                ZSTD_matchState_t* ms,
593
                U32* nextToUpdate3,
594
                const BYTE* const ip, const BYTE* const iLimit,
595
                const ZSTD_dictMode_e dictMode,
596
                const U32 rep[ZSTD_REP_NUM],
597
                const U32 ll0,  /* tells if associated literal length is 0 or not. This value must be 0 or 1 */
598
                const U32 lengthToBeat,
599
                const U32 mls /* template */)
600
206M
{
601
206M
    const ZSTD_compressionParameters* const cParams = &ms->cParams;
602
206M
    U32 const sufficient_len = MIN(cParams->targetLength, ZSTD_OPT_NUM -1);
603
206M
    const BYTE* const base = ms->window.base;
604
206M
    U32 const curr = (U32)(ip-base);
605
206M
    U32 const hashLog = cParams->hashLog;
606
206M
    U32 const minMatch = (mls==3) ? 3 : 4;
607
206M
    U32* const hashTable = ms->hashTable;
608
206M
    size_t const h  = ZSTD_hashPtr(ip, hashLog, mls);
609
206M
    U32 matchIndex  = hashTable[h];
610
206M
    U32* const bt   = ms->chainTable;
611
206M
    U32 const btLog = cParams->chainLog - 1;
612
206M
    U32 const btMask= (1U << btLog) - 1;
613
206M
    size_t commonLengthSmaller=0, commonLengthLarger=0;
614
206M
    const BYTE* const dictBase = ms->window.dictBase;
615
206M
    U32 const dictLimit = ms->window.dictLimit;
616
206M
    const BYTE* const dictEnd = dictBase + dictLimit;
617
206M
    const BYTE* const prefixStart = base + dictLimit;
618
206M
    U32 const btLow = (btMask >= curr) ? 0 : curr - btMask;
619
206M
    U32 const windowLow = ZSTD_getLowestMatchIndex(ms, curr, cParams->windowLog);
620
206M
    U32 const matchLow = windowLow ? windowLow : 1;
621
206M
    U32* smallerPtr = bt + 2*(curr&btMask);
622
206M
    U32* largerPtr  = bt + 2*(curr&btMask) + 1;
623
206M
    U32 matchEndIdx = curr+8+1;   /* farthest referenced position of any match => detects repetitive patterns */
624
206M
    U32 dummy32;   /* to be nullified at the end */
625
206M
    U32 mnum = 0;
626
206M
    U32 nbCompares = 1U << cParams->searchLog;
627
628
206M
    const ZSTD_matchState_t* dms    = dictMode == ZSTD_dictMatchState ? ms->dictMatchState : NULL;
629
206M
    const ZSTD_compressionParameters* const dmsCParams =
630
206M
                                      dictMode == ZSTD_dictMatchState ? &dms->cParams : NULL;
631
206M
    const BYTE* const dmsBase       = dictMode == ZSTD_dictMatchState ? dms->window.base : NULL;
632
206M
    const BYTE* const dmsEnd        = dictMode == ZSTD_dictMatchState ? dms->window.nextSrc : NULL;
633
206M
    U32         const dmsHighLimit  = dictMode == ZSTD_dictMatchState ? (U32)(dmsEnd - dmsBase) : 0;
634
206M
    U32         const dmsLowLimit   = dictMode == ZSTD_dictMatchState ? dms->window.lowLimit : 0;
635
206M
    U32         const dmsIndexDelta = dictMode == ZSTD_dictMatchState ? windowLow - dmsHighLimit : 0;
636
206M
    U32         const dmsHashLog    = dictMode == ZSTD_dictMatchState ? dmsCParams->hashLog : hashLog;
637
206M
    U32         const dmsBtLog      = dictMode == ZSTD_dictMatchState ? dmsCParams->chainLog - 1 : btLog;
638
206M
    U32         const dmsBtMask     = dictMode == ZSTD_dictMatchState ? (1U << dmsBtLog) - 1 : 0;
639
206M
    U32         const dmsBtLow      = dictMode == ZSTD_dictMatchState && dmsBtMask < dmsHighLimit - dmsLowLimit ? dmsHighLimit - dmsBtMask : dmsLowLimit;
640
641
206M
    size_t bestLength = lengthToBeat-1;
642
206M
    DEBUGLOG(8, "ZSTD_insertBtAndGetAllMatches: current=%u", curr);
643
644
    /* check repCode */
645
206M
    assert(ll0 <= 1);   /* necessarily 1 or 0 */
646
206M
    {   U32 const lastR = ZSTD_REP_NUM + ll0;
647
206M
        U32 repCode;
648
825M
        for (repCode = ll0; repCode < lastR; repCode++) {
649
619M
            U32 const repOffset = (repCode==ZSTD_REP_NUM) ? (rep[0] - 1) : rep[repCode];
650
619M
            U32 const repIndex = curr - repOffset;
651
619M
            U32 repLen = 0;
652
619M
            assert(curr >= dictLimit);
653
619M
            if (repOffset-1 /* intentional overflow, discards 0 and -1 */ < curr-dictLimit) {  /* equivalent to `curr > repIndex >= dictLimit` */
654
                /* We must validate the repcode offset because when we're using a dictionary the
655
                 * valid offset range shrinks when the dictionary goes out of bounds.
656
                 */
657
603M
                if ((repIndex >= windowLow) & (ZSTD_readMINMATCH(ip, minMatch) == ZSTD_readMINMATCH(ip - repOffset, minMatch))) {
658
61.4M
                    repLen = (U32)ZSTD_count(ip+minMatch, ip+minMatch-repOffset, iLimit) + minMatch;
659
61.4M
                }
660
603M
            } else {  /* repIndex < dictLimit || repIndex >= curr */
661
15.5M
                const BYTE* const repMatch = dictMode == ZSTD_dictMatchState ?
662
0
                                             dmsBase + repIndex - dmsIndexDelta :
663
15.5M
                                             dictBase + repIndex;
664
15.5M
                assert(curr >= windowLow);
665
15.5M
                if ( dictMode == ZSTD_extDict
666
0
                  && ( ((repOffset-1) /*intentional overflow*/ < curr - windowLow)  /* equivalent to `curr > repIndex >= windowLow` */
667
0
                     & (((U32)((dictLimit-1) - repIndex) >= 3) ) /* intentional overflow : do not test positions overlapping 2 memory segments */)
668
0
                  && (ZSTD_readMINMATCH(ip, minMatch) == ZSTD_readMINMATCH(repMatch, minMatch)) ) {
669
0
                    repLen = (U32)ZSTD_count_2segments(ip+minMatch, repMatch+minMatch, iLimit, dictEnd, prefixStart) + minMatch;
670
0
                }
671
15.5M
                if (dictMode == ZSTD_dictMatchState
672
0
                  && ( ((repOffset-1) /*intentional overflow*/ < curr - (dmsLowLimit + dmsIndexDelta))  /* equivalent to `curr > repIndex >= dmsLowLimit` */
673
0
                     & ((U32)((dictLimit-1) - repIndex) >= 3) ) /* intentional overflow : do not test positions overlapping 2 memory segments */
674
0
                  && (ZSTD_readMINMATCH(ip, minMatch) == ZSTD_readMINMATCH(repMatch, minMatch)) ) {
675
0
                    repLen = (U32)ZSTD_count_2segments(ip+minMatch, repMatch+minMatch, iLimit, dmsEnd, prefixStart) + minMatch;
676
0
            }   }
677
            /* save longer solution */
678
619M
            if (repLen > bestLength) {
679
35.0M
                DEBUGLOG(8, "found repCode %u (ll0:%u, offset:%u) of length %u",
680
35.0M
                            repCode, ll0, repOffset, repLen);
681
35.0M
                bestLength = repLen;
682
35.0M
                matches[mnum].off = REPCODE_TO_OFFBASE(repCode - ll0 + 1);  /* expect value between 1 and 3 */
683
35.0M
                matches[mnum].len = (U32)repLen;
684
35.0M
                mnum++;
685
35.0M
                if ( (repLen > sufficient_len)
686
35.0M
                   | (ip+repLen == iLimit) ) {  /* best possible */
687
150k
                    return mnum;
688
150k
    }   }   }   }
689
690
    /* HC3 match finder */
691
206M
    if ((mls == 3) /*static*/ && (bestLength < mls)) {
692
156M
        U32 const matchIndex3 = ZSTD_insertAndFindFirstIndexHash3(ms, nextToUpdate3, ip);
693
156M
        if ((matchIndex3 >= matchLow)
694
156M
          & (curr - matchIndex3 < (1<<18)) /*heuristic : longer distance likely too expensive*/ ) {
695
67.9M
            size_t mlen;
696
67.9M
            if ((dictMode == ZSTD_noDict) /*static*/ || (dictMode == ZSTD_dictMatchState) /*static*/ || (matchIndex3 >= dictLimit)) {
697
67.9M
                const BYTE* const match = base + matchIndex3;
698
67.9M
                mlen = ZSTD_count(ip, match, iLimit);
699
67.9M
            } else {
700
0
                const BYTE* const match = dictBase + matchIndex3;
701
0
                mlen = ZSTD_count_2segments(ip, match, iLimit, dictEnd, prefixStart);
702
0
            }
703
704
            /* save best solution */
705
67.9M
            if (mlen >= mls /* == 3 > bestLength */) {
706
22.9M
                DEBUGLOG(8, "found small match with hlog3, of length %u",
707
22.9M
                            (U32)mlen);
708
22.9M
                bestLength = mlen;
709
22.9M
                assert(curr > matchIndex3);
710
22.9M
                assert(mnum==0);  /* no prior solution */
711
22.9M
                matches[0].off = OFFSET_TO_OFFBASE(curr - matchIndex3);
712
22.9M
                matches[0].len = (U32)mlen;
713
22.9M
                mnum = 1;
714
22.9M
                if ( (mlen > sufficient_len) |
715
22.9M
                     (ip+mlen == iLimit) ) {  /* best possible length */
716
10.7k
                    ms->nextToUpdate = curr+1;  /* skip insertion */
717
10.7k
                    return 1;
718
10.7k
        }   }   }
719
        /* no dictMatchState lookup: dicts don't have a populated HC3 table */
720
156M
    }  /* if (mls == 3) */
721
722
206M
    hashTable[h] = curr;   /* Update Hash Table */
723
724
2.04G
    for (; nbCompares && (matchIndex >= matchLow); --nbCompares) {
725
1.84G
        U32* const nextPtr = bt + 2*(matchIndex & btMask);
726
1.84G
        const BYTE* match;
727
1.84G
        size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger);   /* guaranteed minimum nb of common bytes */
728
1.84G
        assert(curr > matchIndex);
729
730
1.84G
        if ((dictMode == ZSTD_noDict) || (dictMode == ZSTD_dictMatchState) || (matchIndex+matchLength >= dictLimit)) {
731
1.84G
            assert(matchIndex+matchLength >= dictLimit);  /* ensure the condition is correct when !extDict */
732
1.84G
            match = base + matchIndex;
733
1.84G
            if (matchIndex >= dictLimit) assert(memcmp(match, ip, matchLength) == 0);  /* ensure early section of match is equal as expected */
734
1.84G
            matchLength += ZSTD_count(ip+matchLength, match+matchLength, iLimit);
735
1.84G
        } else {
736
0
            match = dictBase + matchIndex;
737
0
            assert(memcmp(match, ip, matchLength) == 0);  /* ensure early section of match is equal as expected */
738
0
            matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iLimit, dictEnd, prefixStart);
739
0
            if (matchIndex+matchLength >= dictLimit)
740
0
                match = base + matchIndex;   /* prepare for match[matchLength] read */
741
0
        }
742
743
1.84G
        if (matchLength > bestLength) {
744
38.2M
            DEBUGLOG(8, "found match of length %u at distance %u (offBase=%u)",
745
38.2M
                    (U32)matchLength, curr - matchIndex, OFFSET_TO_OFFBASE(curr - matchIndex));
746
38.2M
            assert(matchEndIdx > matchIndex);
747
38.2M
            if (matchLength > matchEndIdx - matchIndex)
748
123k
                matchEndIdx = matchIndex + (U32)matchLength;
749
38.2M
            bestLength = matchLength;
750
38.2M
            matches[mnum].off = OFFSET_TO_OFFBASE(curr - matchIndex);
751
38.2M
            matches[mnum].len = (U32)matchLength;
752
38.2M
            mnum++;
753
38.2M
            if ( (matchLength > ZSTD_OPT_NUM)
754
38.2M
               | (ip+matchLength == iLimit) /* equal : no way to know if inf or sup */) {
755
16.5k
                if (dictMode == ZSTD_dictMatchState) nbCompares = 0; /* break should also skip searching dms */
756
16.5k
                break; /* drop, to preserve bt consistency (miss a little bit of compression) */
757
16.5k
        }   }
758
759
1.84G
        if (match[matchLength] < ip[matchLength]) {
760
            /* match smaller than current */
761
888M
            *smallerPtr = matchIndex;             /* update smaller idx */
762
888M
            commonLengthSmaller = matchLength;    /* all smaller will now have at least this guaranteed common length */
763
888M
            if (matchIndex <= btLow) { smallerPtr=&dummy32; break; }   /* beyond tree size, stop the search */
764
888M
            smallerPtr = nextPtr+1;               /* new candidate => larger than match, which was smaller than current */
765
888M
            matchIndex = nextPtr[1];              /* new matchIndex, larger than previous, closer to current */
766
952M
        } else {
767
952M
            *largerPtr = matchIndex;
768
952M
            commonLengthLarger = matchLength;
769
952M
            if (matchIndex <= btLow) { largerPtr=&dummy32; break; }   /* beyond tree size, stop the search */
770
952M
            largerPtr = nextPtr;
771
952M
            matchIndex = nextPtr[0];
772
952M
    }   }
773
774
206M
    *smallerPtr = *largerPtr = 0;
775
776
206M
    assert(nbCompares <= (1U << ZSTD_SEARCHLOG_MAX)); /* Check we haven't underflowed. */
777
206M
    if (dictMode == ZSTD_dictMatchState && nbCompares) {
778
0
        size_t const dmsH = ZSTD_hashPtr(ip, dmsHashLog, mls);
779
0
        U32 dictMatchIndex = dms->hashTable[dmsH];
780
0
        const U32* const dmsBt = dms->chainTable;
781
0
        commonLengthSmaller = commonLengthLarger = 0;
782
0
        for (; nbCompares && (dictMatchIndex > dmsLowLimit); --nbCompares) {
783
0
            const U32* const nextPtr = dmsBt + 2*(dictMatchIndex & dmsBtMask);
784
0
            size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger);   /* guaranteed minimum nb of common bytes */
785
0
            const BYTE* match = dmsBase + dictMatchIndex;
786
0
            matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iLimit, dmsEnd, prefixStart);
787
0
            if (dictMatchIndex+matchLength >= dmsHighLimit)
788
0
                match = base + dictMatchIndex + dmsIndexDelta;   /* to prepare for next usage of match[matchLength] */
789
790
0
            if (matchLength > bestLength) {
791
0
                matchIndex = dictMatchIndex + dmsIndexDelta;
792
0
                DEBUGLOG(8, "found dms match of length %u at distance %u (offBase=%u)",
793
0
                        (U32)matchLength, curr - matchIndex, OFFSET_TO_OFFBASE(curr - matchIndex));
794
0
                if (matchLength > matchEndIdx - matchIndex)
795
0
                    matchEndIdx = matchIndex + (U32)matchLength;
796
0
                bestLength = matchLength;
797
0
                matches[mnum].off = OFFSET_TO_OFFBASE(curr - matchIndex);
798
0
                matches[mnum].len = (U32)matchLength;
799
0
                mnum++;
800
0
                if ( (matchLength > ZSTD_OPT_NUM)
801
0
                   | (ip+matchLength == iLimit) /* equal : no way to know if inf or sup */) {
802
0
                    break;   /* drop, to guarantee consistency (miss a little bit of compression) */
803
0
            }   }
804
805
0
            if (dictMatchIndex <= dmsBtLow) { break; }   /* beyond tree size, stop the search */
806
0
            if (match[matchLength] < ip[matchLength]) {
807
0
                commonLengthSmaller = matchLength;    /* all smaller will now have at least this guaranteed common length */
808
0
                dictMatchIndex = nextPtr[1];              /* new matchIndex larger than previous (closer to current) */
809
0
            } else {
810
                /* match is larger than current */
811
0
                commonLengthLarger = matchLength;
812
0
                dictMatchIndex = nextPtr[0];
813
0
    }   }   }  /* if (dictMode == ZSTD_dictMatchState) */
814
815
206M
    assert(matchEndIdx > curr+8);
816
206M
    ms->nextToUpdate = matchEndIdx - 8;  /* skip repetitive patterns */
817
206M
    return mnum;
818
206M
}
819
820
typedef U32 (*ZSTD_getAllMatchesFn)(
821
    ZSTD_match_t*,
822
    ZSTD_matchState_t*,
823
    U32*,
824
    const BYTE*,
825
    const BYTE*,
826
    const U32 rep[ZSTD_REP_NUM],
827
    U32 const ll0,
828
    U32 const lengthToBeat);
829
830
FORCE_INLINE_TEMPLATE
831
ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
832
U32 ZSTD_btGetAllMatches_internal(
833
        ZSTD_match_t* matches,
834
        ZSTD_matchState_t* ms,
835
        U32* nextToUpdate3,
836
        const BYTE* ip,
837
        const BYTE* const iHighLimit,
838
        const U32 rep[ZSTD_REP_NUM],
839
        U32 const ll0,
840
        U32 const lengthToBeat,
841
        const ZSTD_dictMode_e dictMode,
842
        const U32 mls)
843
212M
{
844
212M
    assert(BOUNDED(3, ms->cParams.minMatch, 6) == mls);
845
212M
    DEBUGLOG(8, "ZSTD_BtGetAllMatches(dictMode=%d, mls=%u)", (int)dictMode, mls);
846
212M
    if (ip < ms->window.base + ms->nextToUpdate)
847
6.39M
        return 0;   /* skipped area */
848
206M
    ZSTD_updateTree_internal(ms, ip, iHighLimit, mls, dictMode);
849
206M
    return ZSTD_insertBtAndGetAllMatches(matches, ms, nextToUpdate3, ip, iHighLimit, dictMode, rep, ll0, lengthToBeat, mls);
850
212M
}
851
852
784k
#define ZSTD_BT_GET_ALL_MATCHES_FN(dictMode, mls) ZSTD_btGetAllMatches_##dictMode##_##mls
853
854
#define GEN_ZSTD_BT_GET_ALL_MATCHES_(dictMode, mls)            \
855
    static U32 ZSTD_BT_GET_ALL_MATCHES_FN(dictMode, mls)(      \
856
            ZSTD_match_t* matches,                             \
857
            ZSTD_matchState_t* ms,                             \
858
            U32* nextToUpdate3,                                \
859
            const BYTE* ip,                                    \
860
            const BYTE* const iHighLimit,                      \
861
            const U32 rep[ZSTD_REP_NUM],                       \
862
            U32 const ll0,                                     \
863
            U32 const lengthToBeat)                            \
864
212M
    {                                                          \
865
212M
        return ZSTD_btGetAllMatches_internal(                  \
866
212M
                matches, ms, nextToUpdate3, ip, iHighLimit,    \
867
212M
                rep, ll0, lengthToBeat, ZSTD_##dictMode, mls); \
868
212M
    }
zstd_opt.c:ZSTD_btGetAllMatches_noDict_3
Line
Count
Source
864
193M
    {                                                          \
865
193M
        return ZSTD_btGetAllMatches_internal(                  \
866
193M
                matches, ms, nextToUpdate3, ip, iHighLimit,    \
867
193M
                rep, ll0, lengthToBeat, ZSTD_##dictMode, mls); \
868
193M
    }
zstd_opt.c:ZSTD_btGetAllMatches_noDict_4
Line
Count
Source
864
19.0M
    {                                                          \
865
19.0M
        return ZSTD_btGetAllMatches_internal(                  \
866
19.0M
                matches, ms, nextToUpdate3, ip, iHighLimit,    \
867
19.0M
                rep, ll0, lengthToBeat, ZSTD_##dictMode, mls); \
868
19.0M
    }
Unexecuted instantiation: zstd_opt.c:ZSTD_btGetAllMatches_noDict_5
Unexecuted instantiation: zstd_opt.c:ZSTD_btGetAllMatches_noDict_6
Unexecuted instantiation: zstd_opt.c:ZSTD_btGetAllMatches_extDict_3
Unexecuted instantiation: zstd_opt.c:ZSTD_btGetAllMatches_extDict_4
Unexecuted instantiation: zstd_opt.c:ZSTD_btGetAllMatches_extDict_5
Unexecuted instantiation: zstd_opt.c:ZSTD_btGetAllMatches_extDict_6
Unexecuted instantiation: zstd_opt.c:ZSTD_btGetAllMatches_dictMatchState_3
Unexecuted instantiation: zstd_opt.c:ZSTD_btGetAllMatches_dictMatchState_4
Unexecuted instantiation: zstd_opt.c:ZSTD_btGetAllMatches_dictMatchState_5
Unexecuted instantiation: zstd_opt.c:ZSTD_btGetAllMatches_dictMatchState_6
869
870
#define GEN_ZSTD_BT_GET_ALL_MATCHES(dictMode)  \
871
    GEN_ZSTD_BT_GET_ALL_MATCHES_(dictMode, 3)  \
872
    GEN_ZSTD_BT_GET_ALL_MATCHES_(dictMode, 4)  \
873
    GEN_ZSTD_BT_GET_ALL_MATCHES_(dictMode, 5)  \
874
    GEN_ZSTD_BT_GET_ALL_MATCHES_(dictMode, 6)
875
876
GEN_ZSTD_BT_GET_ALL_MATCHES(noDict)
877
GEN_ZSTD_BT_GET_ALL_MATCHES(extDict)
878
GEN_ZSTD_BT_GET_ALL_MATCHES(dictMatchState)
879
880
#define ZSTD_BT_GET_ALL_MATCHES_ARRAY(dictMode)  \
881
196k
    {                                            \
882
196k
        ZSTD_BT_GET_ALL_MATCHES_FN(dictMode, 3), \
883
196k
        ZSTD_BT_GET_ALL_MATCHES_FN(dictMode, 4), \
884
196k
        ZSTD_BT_GET_ALL_MATCHES_FN(dictMode, 5), \
885
196k
        ZSTD_BT_GET_ALL_MATCHES_FN(dictMode, 6)  \
886
196k
    }
887
888
static ZSTD_getAllMatchesFn
889
ZSTD_selectBtGetAllMatches(ZSTD_matchState_t const* ms, ZSTD_dictMode_e const dictMode)
890
65.3k
{
891
65.3k
    ZSTD_getAllMatchesFn const getAllMatchesFns[3][4] = {
892
65.3k
        ZSTD_BT_GET_ALL_MATCHES_ARRAY(noDict),
893
65.3k
        ZSTD_BT_GET_ALL_MATCHES_ARRAY(extDict),
894
65.3k
        ZSTD_BT_GET_ALL_MATCHES_ARRAY(dictMatchState)
895
65.3k
    };
896
65.3k
    U32 const mls = BOUNDED(3, ms->cParams.minMatch, 6);
897
65.3k
    assert((U32)dictMode < 3);
898
65.3k
    assert(mls - 3 < 4);
899
65.3k
    return getAllMatchesFns[(int)dictMode][mls - 3];
900
65.3k
}
901
902
/*************************
903
*  LDM helper functions  *
904
*************************/
905
906
/* Struct containing info needed to make decision about ldm inclusion */
907
typedef struct {
908
    rawSeqStore_t seqStore;   /* External match candidates store for this block */
909
    U32 startPosInBlock;      /* Start position of the current match candidate */
910
    U32 endPosInBlock;        /* End position of the current match candidate */
911
    U32 offset;               /* Offset of the match candidate */
912
} ZSTD_optLdm_t;
913
914
/* ZSTD_optLdm_skipRawSeqStoreBytes():
915
 * Moves forward in @rawSeqStore by @nbBytes,
916
 * which will update the fields 'pos' and 'posInSequence'.
917
 */
918
static void ZSTD_optLdm_skipRawSeqStoreBytes(rawSeqStore_t* rawSeqStore, size_t nbBytes)
919
0
{
920
0
    U32 currPos = (U32)(rawSeqStore->posInSequence + nbBytes);
921
0
    while (currPos && rawSeqStore->pos < rawSeqStore->size) {
922
0
        rawSeq currSeq = rawSeqStore->seq[rawSeqStore->pos];
923
0
        if (currPos >= currSeq.litLength + currSeq.matchLength) {
924
0
            currPos -= currSeq.litLength + currSeq.matchLength;
925
0
            rawSeqStore->pos++;
926
0
        } else {
927
0
            rawSeqStore->posInSequence = currPos;
928
0
            break;
929
0
        }
930
0
    }
931
0
    if (currPos == 0 || rawSeqStore->pos == rawSeqStore->size) {
932
0
        rawSeqStore->posInSequence = 0;
933
0
    }
934
0
}
935
936
/* ZSTD_opt_getNextMatchAndUpdateSeqStore():
937
 * Calculates the beginning and end of the next match in the current block.
938
 * Updates 'pos' and 'posInSequence' of the ldmSeqStore.
939
 */
940
static void
941
ZSTD_opt_getNextMatchAndUpdateSeqStore(ZSTD_optLdm_t* optLdm, U32 currPosInBlock,
942
                                       U32 blockBytesRemaining)
943
65.3k
{
944
65.3k
    rawSeq currSeq;
945
65.3k
    U32 currBlockEndPos;
946
65.3k
    U32 literalsBytesRemaining;
947
65.3k
    U32 matchBytesRemaining;
948
949
    /* Setting match end position to MAX to ensure we never use an LDM during this block */
950
65.3k
    if (optLdm->seqStore.size == 0 || optLdm->seqStore.pos >= optLdm->seqStore.size) {
951
65.3k
        optLdm->startPosInBlock = UINT_MAX;
952
65.3k
        optLdm->endPosInBlock = UINT_MAX;
953
65.3k
        return;
954
65.3k
    }
955
    /* Calculate appropriate bytes left in matchLength and litLength
956
     * after adjusting based on ldmSeqStore->posInSequence */
957
0
    currSeq = optLdm->seqStore.seq[optLdm->seqStore.pos];
958
0
    assert(optLdm->seqStore.posInSequence <= currSeq.litLength + currSeq.matchLength);
959
0
    currBlockEndPos = currPosInBlock + blockBytesRemaining;
960
0
    literalsBytesRemaining = (optLdm->seqStore.posInSequence < currSeq.litLength) ?
961
0
            currSeq.litLength - (U32)optLdm->seqStore.posInSequence :
962
0
            0;
963
0
    matchBytesRemaining = (literalsBytesRemaining == 0) ?
964
0
            currSeq.matchLength - ((U32)optLdm->seqStore.posInSequence - currSeq.litLength) :
965
0
            currSeq.matchLength;
966
967
    /* If there are more literal bytes than bytes remaining in block, no ldm is possible */
968
0
    if (literalsBytesRemaining >= blockBytesRemaining) {
969
0
        optLdm->startPosInBlock = UINT_MAX;
970
0
        optLdm->endPosInBlock = UINT_MAX;
971
0
        ZSTD_optLdm_skipRawSeqStoreBytes(&optLdm->seqStore, blockBytesRemaining);
972
0
        return;
973
0
    }
974
975
    /* Matches may be < MINMATCH by this process. In that case, we will reject them
976
       when we are deciding whether or not to add the ldm */
977
0
    optLdm->startPosInBlock = currPosInBlock + literalsBytesRemaining;
978
0
    optLdm->endPosInBlock = optLdm->startPosInBlock + matchBytesRemaining;
979
0
    optLdm->offset = currSeq.offset;
980
981
0
    if (optLdm->endPosInBlock > currBlockEndPos) {
982
        /* Match ends after the block ends, we can't use the whole match */
983
0
        optLdm->endPosInBlock = currBlockEndPos;
984
0
        ZSTD_optLdm_skipRawSeqStoreBytes(&optLdm->seqStore, currBlockEndPos - currPosInBlock);
985
0
    } else {
986
        /* Consume nb of bytes equal to size of sequence left */
987
0
        ZSTD_optLdm_skipRawSeqStoreBytes(&optLdm->seqStore, literalsBytesRemaining + matchBytesRemaining);
988
0
    }
989
0
}
990
991
/* ZSTD_optLdm_maybeAddMatch():
992
 * Adds a match if it's long enough,
993
 * based on it's 'matchStartPosInBlock' and 'matchEndPosInBlock',
994
 * into 'matches'. Maintains the correct ordering of 'matches'.
995
 */
996
static void ZSTD_optLdm_maybeAddMatch(ZSTD_match_t* matches, U32* nbMatches,
997
                                      const ZSTD_optLdm_t* optLdm, U32 currPosInBlock)
998
0
{
999
0
    U32 const posDiff = currPosInBlock - optLdm->startPosInBlock;
1000
    /* Note: ZSTD_match_t actually contains offBase and matchLength (before subtracting MINMATCH) */
1001
0
    U32 const candidateMatchLength = optLdm->endPosInBlock - optLdm->startPosInBlock - posDiff;
1002
1003
    /* Ensure that current block position is not outside of the match */
1004
0
    if (currPosInBlock < optLdm->startPosInBlock
1005
0
      || currPosInBlock >= optLdm->endPosInBlock
1006
0
      || candidateMatchLength < MINMATCH) {
1007
0
        return;
1008
0
    }
1009
1010
0
    if (*nbMatches == 0 || ((candidateMatchLength > matches[*nbMatches-1].len) && *nbMatches < ZSTD_OPT_NUM)) {
1011
0
        U32 const candidateOffBase = OFFSET_TO_OFFBASE(optLdm->offset);
1012
0
        DEBUGLOG(6, "ZSTD_optLdm_maybeAddMatch(): Adding ldm candidate match (offBase: %u matchLength %u) at block position=%u",
1013
0
                 candidateOffBase, candidateMatchLength, currPosInBlock);
1014
0
        matches[*nbMatches].len = candidateMatchLength;
1015
0
        matches[*nbMatches].off = candidateOffBase;
1016
0
        (*nbMatches)++;
1017
0
    }
1018
0
}
1019
1020
/* ZSTD_optLdm_processMatchCandidate():
1021
 * Wrapper function to update ldm seq store and call ldm functions as necessary.
1022
 */
1023
static void
1024
ZSTD_optLdm_processMatchCandidate(ZSTD_optLdm_t* optLdm,
1025
                                  ZSTD_match_t* matches, U32* nbMatches,
1026
                                  U32 currPosInBlock, U32 remainingBytes)
1027
212M
{
1028
212M
    if (optLdm->seqStore.size == 0 || optLdm->seqStore.pos >= optLdm->seqStore.size) {
1029
212M
        return;
1030
212M
    }
1031
1032
0
    if (currPosInBlock >= optLdm->endPosInBlock) {
1033
0
        if (currPosInBlock > optLdm->endPosInBlock) {
1034
            /* The position at which ZSTD_optLdm_processMatchCandidate() is called is not necessarily
1035
             * at the end of a match from the ldm seq store, and will often be some bytes
1036
             * over beyond matchEndPosInBlock. As such, we need to correct for these "overshoots"
1037
             */
1038
0
            U32 const posOvershoot = currPosInBlock - optLdm->endPosInBlock;
1039
0
            ZSTD_optLdm_skipRawSeqStoreBytes(&optLdm->seqStore, posOvershoot);
1040
0
        }
1041
0
        ZSTD_opt_getNextMatchAndUpdateSeqStore(optLdm, currPosInBlock, remainingBytes);
1042
0
    }
1043
0
    ZSTD_optLdm_maybeAddMatch(matches, nbMatches, optLdm, currPosInBlock);
1044
0
}
1045
1046
1047
/*-*******************************
1048
*  Optimal parser
1049
*********************************/
1050
1051
#if 0 /* debug */
1052
1053
static void
1054
listStats(const U32* table, int lastEltID)
1055
{
1056
    int const nbElts = lastEltID + 1;
1057
    int enb;
1058
    for (enb=0; enb < nbElts; enb++) {
1059
        (void)table;
1060
        /* RAWLOG(2, "%3i:%3i,  ", enb, table[enb]); */
1061
        RAWLOG(2, "%4i,", table[enb]);
1062
    }
1063
    RAWLOG(2, " \n");
1064
}
1065
1066
#endif
1067
1068
79.9M
#define LIT_PRICE(_p) (int)ZSTD_rawLiteralsCost(_p, 1, optStatePtr, optLevel)
1069
280M
#define LL_PRICE(_l) (int)ZSTD_litLengthPrice(_l, optStatePtr, optLevel)
1070
86.5M
#define LL_INCPRICE(_l) (LL_PRICE(_l) - LL_PRICE(_l-1))
1071
1072
FORCE_INLINE_TEMPLATE
1073
ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
1074
size_t
1075
ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
1076
                               seqStore_t* seqStore,
1077
                               U32 rep[ZSTD_REP_NUM],
1078
                         const void* src, size_t srcSize,
1079
                         const int optLevel,
1080
                         const ZSTD_dictMode_e dictMode)
1081
65.3k
{
1082
65.3k
    optState_t* const optStatePtr = &ms->opt;
1083
65.3k
    const BYTE* const istart = (const BYTE*)src;
1084
65.3k
    const BYTE* ip = istart;
1085
65.3k
    const BYTE* anchor = istart;
1086
65.3k
    const BYTE* const iend = istart + srcSize;
1087
65.3k
    const BYTE* const ilimit = iend - 8;
1088
65.3k
    const BYTE* const base = ms->window.base;
1089
65.3k
    const BYTE* const prefixStart = base + ms->window.dictLimit;
1090
65.3k
    const ZSTD_compressionParameters* const cParams = &ms->cParams;
1091
1092
65.3k
    ZSTD_getAllMatchesFn getAllMatches = ZSTD_selectBtGetAllMatches(ms, dictMode);
1093
1094
65.3k
    U32 const sufficient_len = MIN(cParams->targetLength, ZSTD_OPT_NUM -1);
1095
65.3k
    U32 const minMatch = (cParams->minMatch == 3) ? 3 : 4;
1096
65.3k
    U32 nextToUpdate3 = ms->nextToUpdate;
1097
1098
65.3k
    ZSTD_optimal_t* const opt = optStatePtr->priceTable;
1099
65.3k
    ZSTD_match_t* const matches = optStatePtr->matchTable;
1100
65.3k
    ZSTD_optimal_t lastStretch;
1101
65.3k
    ZSTD_optLdm_t optLdm;
1102
1103
65.3k
    ZSTD_memset(&lastStretch, 0, sizeof(ZSTD_optimal_t));
1104
1105
65.3k
    optLdm.seqStore = ms->ldmSeqStore ? *ms->ldmSeqStore : kNullRawSeqStore;
1106
65.3k
    optLdm.endPosInBlock = optLdm.startPosInBlock = optLdm.offset = 0;
1107
65.3k
    ZSTD_opt_getNextMatchAndUpdateSeqStore(&optLdm, (U32)(ip-istart), (U32)(iend-ip));
1108
1109
    /* init */
1110
65.3k
    DEBUGLOG(5, "ZSTD_compressBlock_opt_generic: current=%u, prefix=%u, nextToUpdate=%u",
1111
65.3k
                (U32)(ip - base), ms->window.dictLimit, ms->nextToUpdate);
1112
65.3k
    assert(optLevel <= 2);
1113
65.3k
    ZSTD_rescaleFreqs(optStatePtr, (const BYTE*)src, srcSize, optLevel);
1114
65.3k
    ip += (ip==prefixStart);
1115
1116
    /* Match Loop */
1117
145M
    while (ip < ilimit) {
1118
145M
        U32 cur, last_pos = 0;
1119
1120
        /* find first match */
1121
145M
        {   U32 const litlen = (U32)(ip - anchor);
1122
145M
            U32 const ll0 = !litlen;
1123
145M
            U32 nbMatches = getAllMatches(matches, ms, &nextToUpdate3, ip, iend, rep, ll0, minMatch);
1124
145M
            ZSTD_optLdm_processMatchCandidate(&optLdm, matches, &nbMatches,
1125
145M
                                              (U32)(ip-istart), (U32)(iend-ip));
1126
145M
            if (!nbMatches) {
1127
141M
                DEBUGLOG(8, "no match found at cPos %u", (unsigned)(ip-istart));
1128
141M
                ip++;
1129
141M
                continue;
1130
141M
            }
1131
1132
            /* Match found: let's store this solution, and eventually find more candidates.
1133
             * During this forward pass, @opt is used to store stretches,
1134
             * defined as "a match followed by N literals".
1135
             * Note how this is different from a Sequence, which is "N literals followed by a match".
1136
             * Storing stretches allows us to store different match predecessors
1137
             * for each literal position part of a literals run. */
1138
1139
            /* initialize opt[0] */
1140
3.40M
            opt[0].mlen = 0;  /* there are only literals so far */
1141
3.40M
            opt[0].litlen = litlen;
1142
            /* No need to include the actual price of the literals before the first match
1143
             * because it is static for the duration of the forward pass, and is included
1144
             * in every subsequent price. But, we include the literal length because
1145
             * the cost variation of litlen depends on the value of litlen.
1146
             */
1147
3.40M
            opt[0].price = LL_PRICE(litlen);
1148
3.40M
            ZSTD_STATIC_ASSERT(sizeof(opt[0].rep[0]) == sizeof(rep[0]));
1149
3.40M
            ZSTD_memcpy(&opt[0].rep, rep, sizeof(opt[0].rep));
1150
1151
            /* large match -> immediate encoding */
1152
3.40M
            {   U32 const maxML = matches[nbMatches-1].len;
1153
3.40M
                U32 const maxOffBase = matches[nbMatches-1].off;
1154
3.40M
                DEBUGLOG(6, "found %u matches of maxLength=%u and maxOffBase=%u at cPos=%u => start new series",
1155
3.40M
                            nbMatches, maxML, maxOffBase, (U32)(ip-prefixStart));
1156
1157
3.40M
                if (maxML > sufficient_len) {
1158
173k
                    lastStretch.litlen = 0;
1159
173k
                    lastStretch.mlen = maxML;
1160
173k
                    lastStretch.off = maxOffBase;
1161
173k
                    DEBUGLOG(6, "large match (%u>%u) => immediate encoding",
1162
173k
                                maxML, sufficient_len);
1163
173k
                    cur = 0;
1164
173k
                    last_pos = maxML;
1165
173k
                    goto _shortestPath;
1166
173k
            }   }
1167
1168
            /* set prices for first matches starting position == 0 */
1169
3.23M
            assert(opt[0].price >= 0);
1170
3.23M
            {   U32 pos;
1171
3.23M
                U32 matchNb;
1172
10.4M
                for (pos = 1; pos < minMatch; pos++) {
1173
7.18M
                    opt[pos].price = ZSTD_MAX_PRICE;
1174
7.18M
                    opt[pos].mlen = 0;
1175
7.18M
                    opt[pos].litlen = litlen + pos;
1176
7.18M
                }
1177
13.5M
                for (matchNb = 0; matchNb < nbMatches; matchNb++) {
1178
10.3M
                    U32 const offBase = matches[matchNb].off;
1179
10.3M
                    U32 const end = matches[matchNb].len;
1180
47.1M
                    for ( ; pos <= end ; pos++ ) {
1181
36.8M
                        int const matchPrice = (int)ZSTD_getMatchPrice(offBase, pos, optStatePtr, optLevel);
1182
36.8M
                        int const sequencePrice = opt[0].price + matchPrice;
1183
36.8M
                        DEBUGLOG(7, "rPos:%u => set initial price : %.2f",
1184
36.8M
                                    pos, ZSTD_fCost(sequencePrice));
1185
36.8M
                        opt[pos].mlen = pos;
1186
36.8M
                        opt[pos].off = offBase;
1187
36.8M
                        opt[pos].litlen = 0; /* end of match */
1188
36.8M
                        opt[pos].price = sequencePrice + LL_PRICE(0);
1189
36.8M
                    }
1190
10.3M
                }
1191
3.23M
                last_pos = pos-1;
1192
3.23M
                opt[pos].price = ZSTD_MAX_PRICE;
1193
3.23M
            }
1194
3.23M
        }
1195
1196
        /* check further positions */
1197
72.5M
        for (cur = 1; cur <= last_pos; cur++) {
1198
72.4M
            const BYTE* const inr = ip + cur;
1199
72.4M
            assert(cur <= ZSTD_OPT_NUM);
1200
72.4M
            DEBUGLOG(7, "cPos:%zi==rPos:%u", inr-istart, cur);
1201
1202
            /* Fix current position with one literal if cheaper */
1203
72.4M
            {   U32 const litlen = opt[cur-1].litlen + 1;
1204
72.4M
                int const price = opt[cur-1].price
1205
72.4M
                                + LIT_PRICE(ip+cur-1)
1206
72.4M
                                + LL_INCPRICE(litlen);
1207
72.4M
                assert(price < 1000000000); /* overflow check */
1208
72.4M
                if (price <= opt[cur].price) {
1209
16.3M
                    ZSTD_optimal_t const prevMatch = opt[cur];
1210
16.3M
                    DEBUGLOG(7, "cPos:%zi==rPos:%u : better price (%.2f<=%.2f) using literal (ll==%u) (hist:%u,%u,%u)",
1211
16.3M
                                inr-istart, cur, ZSTD_fCost(price), ZSTD_fCost(opt[cur].price), litlen,
1212
16.3M
                                opt[cur-1].rep[0], opt[cur-1].rep[1], opt[cur-1].rep[2]);
1213
16.3M
                    opt[cur] = opt[cur-1];
1214
16.3M
                    opt[cur].litlen = litlen;
1215
16.3M
                    opt[cur].price = price;
1216
16.3M
                    if ( (optLevel >= 1) /* additional check only for higher modes */
1217
12.5M
                      && (prevMatch.litlen == 0) /* replace a match */
1218
6.61M
                      && (LL_INCPRICE(1) < 0) /* ll1 is cheaper than ll0 */
1219
3.70M
                      && LIKELY(ip + cur < iend)
1220
16.3M
                    ) {
1221
                        /* check next position, in case it would be cheaper */
1222
3.70M
                        int with1literal = prevMatch.price + LIT_PRICE(ip+cur) + LL_INCPRICE(1);
1223
3.70M
                        int withMoreLiterals = price + LIT_PRICE(ip+cur) + LL_INCPRICE(litlen+1);
1224
3.70M
                        DEBUGLOG(7, "then at next rPos %u : match+1lit %.2f vs %ulits %.2f",
1225
3.70M
                                cur+1, ZSTD_fCost(with1literal), litlen+1, ZSTD_fCost(withMoreLiterals));
1226
3.70M
                        if ( (with1literal < withMoreLiterals)
1227
1.66M
                          && (with1literal < opt[cur+1].price) ) {
1228
                            /* update offset history - before it disappears */
1229
1.02M
                            U32 const prev = cur - prevMatch.mlen;
1230
1.02M
                            repcodes_t const newReps = ZSTD_newRep(opt[prev].rep, prevMatch.off, opt[prev].litlen==0);
1231
1.02M
                            assert(cur >= prevMatch.mlen);
1232
1.02M
                            DEBUGLOG(7, "==> match+1lit is cheaper (%.2f < %.2f) (hist:%u,%u,%u) !",
1233
1.02M
                                        ZSTD_fCost(with1literal), ZSTD_fCost(withMoreLiterals),
1234
1.02M
                                        newReps.rep[0], newReps.rep[1], newReps.rep[2] );
1235
1.02M
                            opt[cur+1] = prevMatch;  /* mlen & offbase */
1236
1.02M
                            ZSTD_memcpy(opt[cur+1].rep, &newReps, sizeof(repcodes_t));
1237
1.02M
                            opt[cur+1].litlen = 1;
1238
1.02M
                            opt[cur+1].price = with1literal;
1239
1.02M
                            if (last_pos < cur+1) last_pos = cur+1;
1240
1.02M
                        }
1241
3.70M
                    }
1242
56.1M
                } else {
1243
56.1M
                    DEBUGLOG(7, "cPos:%zi==rPos:%u : literal would cost more (%.2f>%.2f)",
1244
56.1M
                                inr-istart, cur, ZSTD_fCost(price), ZSTD_fCost(opt[cur].price));
1245
56.1M
                }
1246
72.4M
            }
1247
1248
            /* Offset history is not updated during match comparison.
1249
             * Do it here, now that the match is selected and confirmed.
1250
             */
1251
72.4M
            ZSTD_STATIC_ASSERT(sizeof(opt[cur].rep) == sizeof(repcodes_t));
1252
72.4M
            assert(cur >= opt[cur].mlen);
1253
72.4M
            if (opt[cur].litlen == 0) {
1254
                /* just finished a match => alter offset history */
1255
55.0M
                U32 const prev = cur - opt[cur].mlen;
1256
55.0M
                repcodes_t const newReps = ZSTD_newRep(opt[prev].rep, opt[cur].off, opt[prev].litlen==0);
1257
55.0M
                ZSTD_memcpy(opt[cur].rep, &newReps, sizeof(repcodes_t));
1258
55.0M
            }
1259
1260
            /* last match must start at a minimum distance of 8 from oend */
1261
72.4M
            if (inr > ilimit) continue;
1262
1263
72.4M
            if (cur == last_pos) break;
1264
1265
69.3M
            if ( (optLevel==0) /*static_test*/
1266
5.72M
              && (opt[cur+1].price <= opt[cur].price + (BITCOST_MULTIPLIER/2)) ) {
1267
1.83M
                DEBUGLOG(7, "skip current position : next rPos(%u) price is cheaper", cur+1);
1268
1.83M
                continue;  /* skip unpromising positions; about ~+6% speed, -0.01 ratio */
1269
1.83M
            }
1270
1271
67.5M
            assert(opt[cur].price >= 0);
1272
67.5M
            {   U32 const ll0 = (opt[cur].litlen == 0);
1273
67.5M
                int const previousPrice = opt[cur].price;
1274
67.5M
                int const basePrice = previousPrice + LL_PRICE(0);
1275
67.5M
                U32 nbMatches = getAllMatches(matches, ms, &nextToUpdate3, inr, iend, opt[cur].rep, ll0, minMatch);
1276
67.5M
                U32 matchNb;
1277
1278
67.5M
                ZSTD_optLdm_processMatchCandidate(&optLdm, matches, &nbMatches,
1279
67.5M
                                                  (U32)(inr-istart), (U32)(iend-inr));
1280
1281
67.5M
                if (!nbMatches) {
1282
12.7M
                    DEBUGLOG(7, "rPos:%u : no match found", cur);
1283
12.7M
                    continue;
1284
12.7M
                }
1285
1286
54.7M
                {   U32 const longestML = matches[nbMatches-1].len;
1287
54.7M
                    DEBUGLOG(7, "cPos:%zi==rPos:%u, found %u matches, of longest ML=%u",
1288
54.7M
                                inr-istart, cur, nbMatches, longestML);
1289
1290
54.7M
                    if ( (longestML > sufficient_len)
1291
54.6M
                      || (cur + longestML >= ZSTD_OPT_NUM)
1292
54.6M
                      || (ip + cur + longestML >= iend) ) {
1293
104k
                        lastStretch.mlen = longestML;
1294
104k
                        lastStretch.off = matches[nbMatches-1].off;
1295
104k
                        lastStretch.litlen = 0;
1296
104k
                        last_pos = cur + longestML;
1297
104k
                        goto _shortestPath;
1298
104k
                }   }
1299
1300
                /* set prices using matches found at position == cur */
1301
140M
                for (matchNb = 0; matchNb < nbMatches; matchNb++) {
1302
85.4M
                    U32 const offset = matches[matchNb].off;
1303
85.4M
                    U32 const lastML = matches[matchNb].len;
1304
85.4M
                    U32 const startML = (matchNb>0) ? matches[matchNb-1].len+1 : minMatch;
1305
85.4M
                    U32 mlen;
1306
1307
85.4M
                    DEBUGLOG(7, "testing match %u => offBase=%4u, mlen=%2u, llen=%2u",
1308
85.4M
                                matchNb, matches[matchNb].off, lastML, opt[cur].litlen);
1309
1310
7.99G
                    for (mlen = lastML; mlen >= startML; mlen--) {  /* scan downward */
1311
7.91G
                        U32 const pos = cur + mlen;
1312
7.91G
                        int const price = basePrice + (int)ZSTD_getMatchPrice(offset, mlen, optStatePtr, optLevel);
1313
1314
7.91G
                        if ((pos > last_pos) || (price < opt[pos].price)) {
1315
64.3M
                            DEBUGLOG(7, "rPos:%u (ml=%2u) => new better price (%.2f<%.2f)",
1316
64.3M
                                        pos, mlen, ZSTD_fCost(price), ZSTD_fCost(opt[pos].price));
1317
97.9M
                            while (last_pos < pos) {
1318
                                /* fill empty positions, for future comparisons */
1319
33.6M
                                last_pos++;
1320
33.6M
                                opt[last_pos].price = ZSTD_MAX_PRICE;
1321
33.6M
                                opt[last_pos].litlen = !0;  /* just needs to be != 0, to mean "not an end of match" */
1322
33.6M
                            }
1323
64.3M
                            opt[pos].mlen = mlen;
1324
64.3M
                            opt[pos].off = offset;
1325
64.3M
                            opt[pos].litlen = 0;
1326
64.3M
                            opt[pos].price = price;
1327
7.85G
                        } else {
1328
7.85G
                            DEBUGLOG(7, "rPos:%u (ml=%2u) => new price is worse (%.2f>=%.2f)",
1329
7.85G
                                        pos, mlen, ZSTD_fCost(price), ZSTD_fCost(opt[pos].price));
1330
7.85G
                            if (optLevel==0) break;  /* early update abort; gets ~+10% speed for about -0.01 ratio loss */
1331
7.85G
                        }
1332
7.91G
            }   }   }
1333
54.6M
            opt[last_pos+1].price = ZSTD_MAX_PRICE;
1334
54.6M
        }  /* for (cur = 1; cur <= last_pos; cur++) */
1335
1336
3.13M
        lastStretch = opt[last_pos];
1337
3.13M
        assert(cur >= lastStretch.mlen);
1338
3.13M
        cur = last_pos - lastStretch.mlen;
1339
1340
3.40M
_shortestPath:   /* cur, last_pos, best_mlen, best_off have to be set */
1341
3.40M
        assert(opt[0].mlen == 0);
1342
3.40M
        assert(last_pos >= lastStretch.mlen);
1343
3.40M
        assert(cur == last_pos - lastStretch.mlen);
1344
1345
3.40M
        if (lastStretch.mlen==0) {
1346
            /* no solution : all matches have been converted into literals */
1347
533k
            assert(lastStretch.litlen == (ip - anchor) + last_pos);
1348
533k
            ip += last_pos;
1349
533k
            continue;
1350
533k
        }
1351
2.87M
        assert(lastStretch.off > 0);
1352
1353
        /* Update offset history */
1354
2.87M
        if (lastStretch.litlen == 0) {
1355
            /* finishing on a match : update offset history */
1356
2.36M
            repcodes_t const reps = ZSTD_newRep(opt[cur].rep, lastStretch.off, opt[cur].litlen==0);
1357
2.36M
            ZSTD_memcpy(rep, &reps, sizeof(repcodes_t));
1358
2.36M
        } else {
1359
506k
            ZSTD_memcpy(rep, lastStretch.rep, sizeof(repcodes_t));
1360
506k
            assert(cur >= lastStretch.litlen);
1361
506k
            cur -= lastStretch.litlen;
1362
506k
        }
1363
1364
        /* Let's write the shortest path solution.
1365
         * It is stored in @opt in reverse order,
1366
         * starting from @storeEnd (==cur+2),
1367
         * effectively partially @opt overwriting.
1368
         * Content is changed too:
1369
         * - So far, @opt stored stretches, aka a match followed by literals
1370
         * - Now, it will store sequences, aka literals followed by a match
1371
         */
1372
2.87M
        {   U32 const storeEnd = cur + 2;
1373
2.87M
            U32 storeStart = storeEnd;
1374
2.87M
            U32 stretchPos = cur;
1375
1376
2.87M
            DEBUGLOG(6, "start reverse traversal (last_pos:%u, cur:%u)",
1377
2.87M
                        last_pos, cur); (void)last_pos;
1378
2.87M
            assert(storeEnd < ZSTD_OPT_SIZE);
1379
2.87M
            DEBUGLOG(6, "last stretch copied into pos=%u (llen=%u,mlen=%u,ofc=%u)",
1380
2.87M
                        storeEnd, lastStretch.litlen, lastStretch.mlen, lastStretch.off);
1381
2.87M
            if (lastStretch.litlen > 0) {
1382
                /* last "sequence" is unfinished: just a bunch of literals */
1383
506k
                opt[storeEnd].litlen = lastStretch.litlen;
1384
506k
                opt[storeEnd].mlen = 0;
1385
506k
                storeStart = storeEnd-1;
1386
506k
                opt[storeStart] = lastStretch;
1387
506k
            } {
1388
2.87M
                opt[storeEnd] = lastStretch;  /* note: litlen will be fixed */
1389
2.87M
                storeStart = storeEnd;
1390
2.87M
            }
1391
4.67M
            while (1) {
1392
4.67M
                ZSTD_optimal_t nextStretch = opt[stretchPos];
1393
4.67M
                opt[storeStart].litlen = nextStretch.litlen;
1394
4.67M
                DEBUGLOG(6, "selected sequence (llen=%u,mlen=%u,ofc=%u)",
1395
4.67M
                            opt[storeStart].litlen, opt[storeStart].mlen, opt[storeStart].off);
1396
4.67M
                if (nextStretch.mlen == 0) {
1397
                    /* reaching beginning of segment */
1398
2.87M
                    break;
1399
2.87M
                }
1400
1.79M
                storeStart--;
1401
1.79M
                opt[storeStart] = nextStretch; /* note: litlen will be fixed */
1402
1.79M
                assert(nextStretch.litlen + nextStretch.mlen <= stretchPos);
1403
1.79M
                stretchPos -= nextStretch.litlen + nextStretch.mlen;
1404
1.79M
            }
1405
1406
            /* save sequences */
1407
2.87M
            DEBUGLOG(6, "sending selected sequences into seqStore");
1408
2.87M
            {   U32 storePos;
1409
7.54M
                for (storePos=storeStart; storePos <= storeEnd; storePos++) {
1410
4.67M
                    U32 const llen = opt[storePos].litlen;
1411
4.67M
                    U32 const mlen = opt[storePos].mlen;
1412
4.67M
                    U32 const offBase = opt[storePos].off;
1413
4.67M
                    U32 const advance = llen + mlen;
1414
4.67M
                    DEBUGLOG(6, "considering seq starting at %zi, llen=%u, mlen=%u",
1415
4.67M
                                anchor - istart, (unsigned)llen, (unsigned)mlen);
1416
1417
4.67M
                    if (mlen==0) {  /* only literals => must be last "sequence", actually starting a new stream of sequences */
1418
0
                        assert(storePos == storeEnd);   /* must be last sequence */
1419
0
                        ip = anchor + llen;     /* last "sequence" is a bunch of literals => don't progress anchor */
1420
0
                        continue;   /* will finish */
1421
0
                    }
1422
1423
4.67M
                    assert(anchor + llen <= iend);
1424
4.67M
                    ZSTD_updateStats(optStatePtr, llen, anchor, offBase, mlen);
1425
4.67M
                    ZSTD_storeSeq(seqStore, llen, anchor, iend, offBase, mlen);
1426
4.67M
                    anchor += advance;
1427
4.67M
                    ip = anchor;
1428
4.67M
            }   }
1429
2.87M
            DEBUGLOG(7, "new offset history : %u, %u, %u", rep[0], rep[1], rep[2]);
1430
1431
            /* update all costs */
1432
2.87M
            ZSTD_setBasePrices(optStatePtr, optLevel);
1433
2.87M
        }
1434
2.87M
    }   /* while (ip < ilimit) */
1435
1436
    /* Return the last literals size */
1437
65.3k
    return (size_t)(iend - anchor);
1438
65.3k
}
1439
#endif /* build exclusions */
1440
1441
#ifndef ZSTD_EXCLUDE_BTOPT_BLOCK_COMPRESSOR
1442
static size_t ZSTD_compressBlock_opt0(
1443
        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1444
        const void* src, size_t srcSize, const ZSTD_dictMode_e dictMode)
1445
9.78k
{
1446
9.78k
    return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 0 /* optLevel */, dictMode);
1447
9.78k
}
1448
#endif
1449
1450
#ifndef ZSTD_EXCLUDE_BTULTRA_BLOCK_COMPRESSOR
1451
static size_t ZSTD_compressBlock_opt2(
1452
        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1453
        const void* src, size_t srcSize, const ZSTD_dictMode_e dictMode)
1454
55.5k
{
1455
55.5k
    return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 2 /* optLevel */, dictMode);
1456
55.5k
}
1457
#endif
1458
1459
#ifndef ZSTD_EXCLUDE_BTOPT_BLOCK_COMPRESSOR
1460
size_t ZSTD_compressBlock_btopt(
1461
        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1462
        const void* src, size_t srcSize)
1463
9.78k
{
1464
9.78k
    DEBUGLOG(5, "ZSTD_compressBlock_btopt");
1465
9.78k
    return ZSTD_compressBlock_opt0(ms, seqStore, rep, src, srcSize, ZSTD_noDict);
1466
9.78k
}
1467
#endif
1468
1469
1470
1471
1472
#ifndef ZSTD_EXCLUDE_BTULTRA_BLOCK_COMPRESSOR
1473
/* ZSTD_initStats_ultra():
1474
 * make a first compression pass, just to seed stats with more accurate starting values.
1475
 * only works on first block, with no dictionary and no ldm.
1476
 * this function cannot error out, its narrow contract must be respected.
1477
 */
1478
static
1479
ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
1480
void ZSTD_initStats_ultra(ZSTD_matchState_t* ms,
1481
                          seqStore_t* seqStore,
1482
                          U32 rep[ZSTD_REP_NUM],
1483
                    const void* src, size_t srcSize)
1484
24.6k
{
1485
24.6k
    U32 tmpRep[ZSTD_REP_NUM];  /* updated rep codes will sink here */
1486
24.6k
    ZSTD_memcpy(tmpRep, rep, sizeof(tmpRep));
1487
1488
24.6k
    DEBUGLOG(4, "ZSTD_initStats_ultra (srcSize=%zu)", srcSize);
1489
24.6k
    assert(ms->opt.litLengthSum == 0);    /* first block */
1490
24.6k
    assert(seqStore->sequences == seqStore->sequencesStart);   /* no ldm */
1491
24.6k
    assert(ms->window.dictLimit == ms->window.lowLimit);   /* no dictionary */
1492
24.6k
    assert(ms->window.dictLimit - ms->nextToUpdate <= 1);  /* no prefix (note: intentional overflow, defined as 2-complement) */
1493
1494
24.6k
    ZSTD_compressBlock_opt2(ms, seqStore, tmpRep, src, srcSize, ZSTD_noDict);   /* generate stats into ms->opt*/
1495
1496
    /* invalidate first scan from history, only keep entropy stats */
1497
24.6k
    ZSTD_resetSeqStore(seqStore);
1498
24.6k
    ms->window.base -= srcSize;
1499
24.6k
    ms->window.dictLimit += (U32)srcSize;
1500
24.6k
    ms->window.lowLimit = ms->window.dictLimit;
1501
24.6k
    ms->nextToUpdate = ms->window.dictLimit;
1502
1503
24.6k
}
1504
1505
size_t ZSTD_compressBlock_btultra(
1506
        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1507
        const void* src, size_t srcSize)
1508
6.19k
{
1509
6.19k
    DEBUGLOG(5, "ZSTD_compressBlock_btultra (srcSize=%zu)", srcSize);
1510
6.19k
    return ZSTD_compressBlock_opt2(ms, seqStore, rep, src, srcSize, ZSTD_noDict);
1511
6.19k
}
1512
1513
size_t ZSTD_compressBlock_btultra2(
1514
        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1515
        const void* src, size_t srcSize)
1516
24.6k
{
1517
24.6k
    U32 const curr = (U32)((const BYTE*)src - ms->window.base);
1518
24.6k
    DEBUGLOG(5, "ZSTD_compressBlock_btultra2 (srcSize=%zu)", srcSize);
1519
1520
    /* 2-passes strategy:
1521
     * this strategy makes a first pass over first block to collect statistics
1522
     * in order to seed next round's statistics with it.
1523
     * After 1st pass, function forgets history, and starts a new block.
1524
     * Consequently, this can only work if no data has been previously loaded in tables,
1525
     * aka, no dictionary, no prefix, no ldm preprocessing.
1526
     * The compression ratio gain is generally small (~0.5% on first block),
1527
     * the cost is 2x cpu time on first block. */
1528
24.6k
    assert(srcSize <= ZSTD_BLOCKSIZE_MAX);
1529
24.6k
    if ( (ms->opt.litLengthSum==0)   /* first block */
1530
24.6k
      && (seqStore->sequences == seqStore->sequencesStart)  /* no ldm */
1531
24.6k
      && (ms->window.dictLimit == ms->window.lowLimit)   /* no dictionary */
1532
24.6k
      && (curr == ms->window.dictLimit)    /* start of frame, nothing already loaded nor skipped */
1533
24.6k
      && (srcSize > ZSTD_PREDEF_THRESHOLD) /* input large enough to not employ default stats */
1534
24.6k
      ) {
1535
24.6k
        ZSTD_initStats_ultra(ms, seqStore, rep, src, srcSize);
1536
24.6k
    }
1537
1538
24.6k
    return ZSTD_compressBlock_opt2(ms, seqStore, rep, src, srcSize, ZSTD_noDict);
1539
24.6k
}
1540
#endif
1541
1542
#ifndef ZSTD_EXCLUDE_BTOPT_BLOCK_COMPRESSOR
1543
size_t ZSTD_compressBlock_btopt_dictMatchState(
1544
        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1545
        const void* src, size_t srcSize)
1546
0
{
1547
0
    return ZSTD_compressBlock_opt0(ms, seqStore, rep, src, srcSize, ZSTD_dictMatchState);
1548
0
}
1549
1550
size_t ZSTD_compressBlock_btopt_extDict(
1551
        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1552
        const void* src, size_t srcSize)
1553
0
{
1554
0
    return ZSTD_compressBlock_opt0(ms, seqStore, rep, src, srcSize, ZSTD_extDict);
1555
0
}
1556
#endif
1557
1558
#ifndef ZSTD_EXCLUDE_BTULTRA_BLOCK_COMPRESSOR
1559
size_t ZSTD_compressBlock_btultra_dictMatchState(
1560
        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1561
        const void* src, size_t srcSize)
1562
0
{
1563
0
    return ZSTD_compressBlock_opt2(ms, seqStore, rep, src, srcSize, ZSTD_dictMatchState);
1564
0
}
1565
1566
size_t ZSTD_compressBlock_btultra_extDict(
1567
        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1568
        const void* src, size_t srcSize)
1569
0
{
1570
0
    return ZSTD_compressBlock_opt2(ms, seqStore, rep, src, srcSize, ZSTD_extDict);
1571
0
}
1572
#endif
1573
1574
/* note : no btultra2 variant for extDict nor dictMatchState,
1575
 * because btultra2 is not meant to work with dictionaries
1576
 * and is only specific for the first block (no prefix) */