Coverage Report

Created: 2025-06-20 06:13

/src/c-blosc2/internal-complibs/zstd-1.5.7/compress/zstd_opt.c
Line
Count
Source (jump to first uncovered line)
1
/*
2
 * Copyright (c) Meta Platforms, Inc. and affiliates.
3
 * All rights reserved.
4
 *
5
 * This source code is licensed under both the BSD-style license (found in the
6
 * LICENSE file in the root directory of this source tree) and the GPLv2 (found
7
 * in the COPYING file in the root directory of this source tree).
8
 * You may select, at your option, one of the above-listed licenses.
9
 */
10
11
#include "zstd_compress_internal.h"
12
#include "hist.h"
13
#include "zstd_opt.h"
14
15
#if !defined(ZSTD_EXCLUDE_BTLAZY2_BLOCK_COMPRESSOR) \
16
 || !defined(ZSTD_EXCLUDE_BTOPT_BLOCK_COMPRESSOR) \
17
 || !defined(ZSTD_EXCLUDE_BTULTRA_BLOCK_COMPRESSOR)
18
19
259M
#define ZSTD_LITFREQ_ADD    2   /* scaling factor for litFreq, so that frequencies adapt faster to new stats */
20
204M
#define ZSTD_MAX_PRICE     (1<<30)
21
22
124k
#define ZSTD_PREDEF_THRESHOLD 8   /* if srcSize < ZSTD_PREDEF_THRESHOLD, symbols' cost is assumed static, directly determined by pre-defined distributions */
23
24
25
/*-*************************************
26
*  Price functions for optimal parser
27
***************************************/
28
29
#if 0    /* approximation at bit level (for tests) */
30
#  define BITCOST_ACCURACY 0
31
#  define BITCOST_MULTIPLIER (1 << BITCOST_ACCURACY)
32
#  define WEIGHT(stat, opt) ((void)(opt), ZSTD_bitWeight(stat))
33
#elif 0  /* fractional bit accuracy (for tests) */
34
#  define BITCOST_ACCURACY 8
35
#  define BITCOST_MULTIPLIER (1 << BITCOST_ACCURACY)
36
#  define WEIGHT(stat,opt) ((void)(opt), ZSTD_fracWeight(stat))
37
#else    /* opt==approx, ultra==accurate */
38
121G
#  define BITCOST_ACCURACY 8
39
86.3G
#  define BITCOST_MULTIPLIER (1 << BITCOST_ACCURACY)
40
34.6G
#  define WEIGHT(stat,opt) ((opt) ? ZSTD_fracWeight(stat) : ZSTD_bitWeight(stat))
41
#endif
42
43
/* ZSTD_bitWeight() :
44
 * provide estimated "cost" of a stat in full bits only */
45
MEM_STATIC U32 ZSTD_bitWeight(U32 stat)
46
58.5M
{
47
58.5M
    return (ZSTD_highbit32(stat+1) * BITCOST_MULTIPLIER);
48
58.5M
}
49
50
/* ZSTD_fracWeight() :
51
 * provide fractional-bit "cost" of a stat,
52
 * using linear interpolation approximation */
53
MEM_STATIC U32 ZSTD_fracWeight(U32 rawStat)
54
34.6G
{
55
34.6G
    U32 const stat = rawStat + 1;
56
34.6G
    U32 const hb = ZSTD_highbit32(stat);
57
34.6G
    U32 const BWeight = hb * BITCOST_MULTIPLIER;
58
    /* Fweight was meant for "Fractional weight"
59
     * but it's effectively a value between 1 and 2
60
     * using fixed point arithmetic */
61
34.6G
    U32 const FWeight = (stat << BITCOST_ACCURACY) >> hb;
62
34.6G
    U32 const weight = BWeight + FWeight;
63
34.6G
    assert(hb + BITCOST_ACCURACY < 31);
64
34.6G
    return weight;
65
34.6G
}
66
67
#if (DEBUGLEVEL>=2)
68
/* debugging function,
69
 * @return price in bytes as fractional value
70
 * for debug messages only */
71
MEM_STATIC double ZSTD_fCost(int price)
72
{
73
    return (double)price / (BITCOST_MULTIPLIER*8);
74
}
75
#endif
76
77
static int ZSTD_compressedLiterals(optState_t const* const optPtr)
78
167M
{
79
167M
    return optPtr->literalCompressionMode != ZSTD_ps_disable;
80
167M
}
81
82
static void ZSTD_setBasePrices(optState_t* optPtr, int optLevel)
83
4.33M
{
84
4.33M
    if (ZSTD_compressedLiterals(optPtr))
85
4.33M
        optPtr->litSumBasePrice = WEIGHT(optPtr->litSum, optLevel);
86
4.33M
    optPtr->litLengthSumBasePrice = WEIGHT(optPtr->litLengthSum, optLevel);
87
4.33M
    optPtr->matchLengthSumBasePrice = WEIGHT(optPtr->matchLengthSum, optLevel);
88
4.33M
    optPtr->offCodeSumBasePrice = WEIGHT(optPtr->offCodeSum, optLevel);
89
4.33M
}
90
91
92
static U32 sum_u32(const unsigned table[], size_t nbElts)
93
356k
{
94
356k
    size_t n;
95
356k
    U32 total = 0;
96
25.4M
    for (n=0; n<nbElts; n++) {
97
25.0M
        total += table[n];
98
25.0M
    }
99
356k
    return total;
100
356k
}
101
102
typedef enum { base_0possible=0, base_1guaranteed=1 } base_directive_e;
103
104
static U32
105
ZSTD_downscaleStats(unsigned* table, U32 lastEltIndex, U32 shift, base_directive_e base1)
106
85.5k
{
107
85.5k
    U32 s, sum=0;
108
85.5k
    DEBUGLOG(5, "ZSTD_downscaleStats (nbElts=%u, shift=%u)",
109
85.5k
            (unsigned)lastEltIndex+1, (unsigned)shift );
110
85.5k
    assert(shift < 30);
111
21.9M
    for (s=0; s<lastEltIndex+1; s++) {
112
21.9M
        unsigned const base = base1 ? 1 : (table[s]>0);
113
21.9M
        unsigned const newStat = base + (table[s] >> shift);
114
21.9M
        sum += newStat;
115
21.9M
        table[s] = newStat;
116
21.9M
    }
117
85.5k
    return sum;
118
85.5k
}
119
120
/* ZSTD_scaleStats() :
121
 * reduce all elt frequencies in table if sum too large
122
 * return the resulting sum of elements */
123
static U32 ZSTD_scaleStats(unsigned* table, U32 lastEltIndex, U32 logTarget)
124
214k
{
125
214k
    U32 const prevsum = sum_u32(table, lastEltIndex+1);
126
214k
    U32 const factor = prevsum >> logTarget;
127
214k
    DEBUGLOG(5, "ZSTD_scaleStats (nbElts=%u, target=%u)", (unsigned)lastEltIndex+1, (unsigned)logTarget);
128
214k
    assert(logTarget < 30);
129
214k
    if (factor <= 1) return prevsum;
130
14.6k
    return ZSTD_downscaleStats(table, lastEltIndex, ZSTD_highbit32(factor), base_1guaranteed);
131
214k
}
132
133
/* ZSTD_rescaleFreqs() :
134
 * if first block (detected by optPtr->litLengthSum == 0) : init statistics
135
 *    take hints from dictionary if there is one
136
 *    and init from zero if there is none,
137
 *    using src for literals stats, and baseline stats for sequence symbols
138
 * otherwise downscale existing stats, to be used as seed for next block.
139
 */
140
static void
141
ZSTD_rescaleFreqs(optState_t* const optPtr,
142
            const BYTE* const src, size_t const srcSize,
143
                  int const optLevel)
144
124k
{
145
124k
    int const compressedLiterals = ZSTD_compressedLiterals(optPtr);
146
124k
    DEBUGLOG(5, "ZSTD_rescaleFreqs (srcSize=%u)", (unsigned)srcSize);
147
124k
    optPtr->priceType = zop_dynamic;
148
149
124k
    if (optPtr->litLengthSum == 0) {  /* no literals stats collected -> first block assumed -> init */
150
151
        /* heuristic: use pre-defined stats for too small inputs */
152
70.8k
        if (srcSize <= ZSTD_PREDEF_THRESHOLD) {
153
0
            DEBUGLOG(5, "srcSize <= %i : use predefined stats", ZSTD_PREDEF_THRESHOLD);
154
0
            optPtr->priceType = zop_predef;
155
0
        }
156
157
70.8k
        assert(optPtr->symbolCosts != NULL);
158
70.8k
        if (optPtr->symbolCosts->huf.repeatMode == HUF_repeat_valid) {
159
160
            /* huffman stats covering the full value set : table presumed generated by dictionary */
161
0
            optPtr->priceType = zop_dynamic;
162
163
0
            if (compressedLiterals) {
164
                /* generate literals statistics from huffman table */
165
0
                unsigned lit;
166
0
                assert(optPtr->litFreq != NULL);
167
0
                optPtr->litSum = 0;
168
0
                for (lit=0; lit<=MaxLit; lit++) {
169
0
                    U32 const scaleLog = 11;   /* scale to 2K */
170
0
                    U32 const bitCost = HUF_getNbBitsFromCTable(optPtr->symbolCosts->huf.CTable, lit);
171
0
                    assert(bitCost <= scaleLog);
172
0
                    optPtr->litFreq[lit] = bitCost ? 1 << (scaleLog-bitCost) : 1 /*minimum to calculate cost*/;
173
0
                    optPtr->litSum += optPtr->litFreq[lit];
174
0
            }   }
175
176
0
            {   unsigned ll;
177
0
                FSE_CState_t llstate;
178
0
                FSE_initCState(&llstate, optPtr->symbolCosts->fse.litlengthCTable);
179
0
                optPtr->litLengthSum = 0;
180
0
                for (ll=0; ll<=MaxLL; ll++) {
181
0
                    U32 const scaleLog = 10;   /* scale to 1K */
182
0
                    U32 const bitCost = FSE_getMaxNbBits(llstate.symbolTT, ll);
183
0
                    assert(bitCost < scaleLog);
184
0
                    optPtr->litLengthFreq[ll] = bitCost ? 1 << (scaleLog-bitCost) : 1 /*minimum to calculate cost*/;
185
0
                    optPtr->litLengthSum += optPtr->litLengthFreq[ll];
186
0
            }   }
187
188
0
            {   unsigned ml;
189
0
                FSE_CState_t mlstate;
190
0
                FSE_initCState(&mlstate, optPtr->symbolCosts->fse.matchlengthCTable);
191
0
                optPtr->matchLengthSum = 0;
192
0
                for (ml=0; ml<=MaxML; ml++) {
193
0
                    U32 const scaleLog = 10;
194
0
                    U32 const bitCost = FSE_getMaxNbBits(mlstate.symbolTT, ml);
195
0
                    assert(bitCost < scaleLog);
196
0
                    optPtr->matchLengthFreq[ml] = bitCost ? 1 << (scaleLog-bitCost) : 1 /*minimum to calculate cost*/;
197
0
                    optPtr->matchLengthSum += optPtr->matchLengthFreq[ml];
198
0
            }   }
199
200
0
            {   unsigned of;
201
0
                FSE_CState_t ofstate;
202
0
                FSE_initCState(&ofstate, optPtr->symbolCosts->fse.offcodeCTable);
203
0
                optPtr->offCodeSum = 0;
204
0
                for (of=0; of<=MaxOff; of++) {
205
0
                    U32 const scaleLog = 10;
206
0
                    U32 const bitCost = FSE_getMaxNbBits(ofstate.symbolTT, of);
207
0
                    assert(bitCost < scaleLog);
208
0
                    optPtr->offCodeFreq[of] = bitCost ? 1 << (scaleLog-bitCost) : 1 /*minimum to calculate cost*/;
209
0
                    optPtr->offCodeSum += optPtr->offCodeFreq[of];
210
0
            }   }
211
212
70.8k
        } else {  /* first block, no dictionary */
213
214
70.8k
            assert(optPtr->litFreq != NULL);
215
70.8k
            if (compressedLiterals) {
216
                /* base initial cost of literals on direct frequency within src */
217
70.8k
                unsigned lit = MaxLit;
218
70.8k
                HIST_count_simple(optPtr->litFreq, &lit, src, srcSize);   /* use raw first block to init statistics */
219
70.8k
                optPtr->litSum = ZSTD_downscaleStats(optPtr->litFreq, MaxLit, 8, base_0possible);
220
70.8k
            }
221
222
70.8k
            {   unsigned const baseLLfreqs[MaxLL+1] = {
223
70.8k
                    4, 2, 1, 1, 1, 1, 1, 1,
224
70.8k
                    1, 1, 1, 1, 1, 1, 1, 1,
225
70.8k
                    1, 1, 1, 1, 1, 1, 1, 1,
226
70.8k
                    1, 1, 1, 1, 1, 1, 1, 1,
227
70.8k
                    1, 1, 1, 1
228
70.8k
                };
229
70.8k
                ZSTD_memcpy(optPtr->litLengthFreq, baseLLfreqs, sizeof(baseLLfreqs));
230
70.8k
                optPtr->litLengthSum = sum_u32(baseLLfreqs, MaxLL+1);
231
70.8k
            }
232
233
70.8k
            {   unsigned ml;
234
3.82M
                for (ml=0; ml<=MaxML; ml++)
235
3.75M
                    optPtr->matchLengthFreq[ml] = 1;
236
70.8k
            }
237
70.8k
            optPtr->matchLengthSum = MaxML+1;
238
239
70.8k
            {   unsigned const baseOFCfreqs[MaxOff+1] = {
240
70.8k
                    6, 2, 1, 1, 2, 3, 4, 4,
241
70.8k
                    4, 3, 2, 1, 1, 1, 1, 1,
242
70.8k
                    1, 1, 1, 1, 1, 1, 1, 1,
243
70.8k
                    1, 1, 1, 1, 1, 1, 1, 1
244
70.8k
                };
245
70.8k
                ZSTD_memcpy(optPtr->offCodeFreq, baseOFCfreqs, sizeof(baseOFCfreqs));
246
70.8k
                optPtr->offCodeSum = sum_u32(baseOFCfreqs, MaxOff+1);
247
70.8k
            }
248
249
70.8k
        }
250
251
70.8k
    } else {   /* new block : scale down accumulated statistics */
252
253
53.6k
        if (compressedLiterals)
254
53.6k
            optPtr->litSum = ZSTD_scaleStats(optPtr->litFreq, MaxLit, 12);
255
53.6k
        optPtr->litLengthSum = ZSTD_scaleStats(optPtr->litLengthFreq, MaxLL, 11);
256
53.6k
        optPtr->matchLengthSum = ZSTD_scaleStats(optPtr->matchLengthFreq, MaxML, 11);
257
53.6k
        optPtr->offCodeSum = ZSTD_scaleStats(optPtr->offCodeFreq, MaxOff, 11);
258
53.6k
    }
259
260
124k
    ZSTD_setBasePrices(optPtr, optLevel);
261
124k
}
262
263
/* ZSTD_rawLiteralsCost() :
264
 * price of literals (only) in specified segment (which length can be 0).
265
 * does not include price of literalLength symbol */
266
static U32 ZSTD_rawLiteralsCost(const BYTE* const literals, U32 const litLength,
267
                                const optState_t* const optPtr,
268
                                int optLevel)
269
158M
{
270
158M
    DEBUGLOG(8, "ZSTD_rawLiteralsCost (%u literals)", litLength);
271
158M
    if (litLength == 0) return 0;
272
273
158M
    if (!ZSTD_compressedLiterals(optPtr))
274
0
        return (litLength << 3) * BITCOST_MULTIPLIER;  /* Uncompressed - 8 bytes per literal. */
275
276
158M
    if (optPtr->priceType == zop_predef)
277
0
        return (litLength*6) * BITCOST_MULTIPLIER;  /* 6 bit per literal - no statistic used */
278
279
    /* dynamic statistics */
280
158M
    {   U32 price = optPtr->litSumBasePrice * litLength;
281
158M
        U32 const litPriceMax = optPtr->litSumBasePrice - BITCOST_MULTIPLIER;
282
158M
        U32 u;
283
158M
        assert(optPtr->litSumBasePrice >= BITCOST_MULTIPLIER);
284
316M
        for (u=0; u < litLength; u++) {
285
158M
            U32 litPrice = WEIGHT(optPtr->litFreq[literals[u]], optLevel);
286
158M
            if (UNLIKELY(litPrice > litPriceMax)) litPrice = litPriceMax;
287
158M
            price -= litPrice;
288
158M
        }
289
158M
        return price;
290
158M
    }
291
158M
}
292
293
/* ZSTD_litLengthPrice() :
294
 * cost of literalLength symbol */
295
static U32 ZSTD_litLengthPrice(U32 const litLength, const optState_t* const optPtr, int optLevel)
296
551M
{
297
551M
    assert(litLength <= ZSTD_BLOCKSIZE_MAX);
298
551M
    if (optPtr->priceType == zop_predef)
299
0
        return WEIGHT(litLength, optLevel);
300
301
    /* ZSTD_LLcode() can't compute litLength price for sizes >= ZSTD_BLOCKSIZE_MAX
302
     * because it isn't representable in the zstd format.
303
     * So instead just pretend it would cost 1 bit more than ZSTD_BLOCKSIZE_MAX - 1.
304
     * In such a case, the block would be all literals.
305
     */
306
551M
    if (litLength == ZSTD_BLOCKSIZE_MAX)
307
0
        return BITCOST_MULTIPLIER + ZSTD_litLengthPrice(ZSTD_BLOCKSIZE_MAX - 1, optPtr, optLevel);
308
309
    /* dynamic statistics */
310
551M
    {   U32 const llCode = ZSTD_LLcode(litLength);
311
551M
        return (LL_bits[llCode] * BITCOST_MULTIPLIER)
312
551M
             + optPtr->litLengthSumBasePrice
313
551M
             - WEIGHT(optPtr->litLengthFreq[llCode], optLevel);
314
551M
    }
315
551M
}
316
317
/* ZSTD_getMatchPrice() :
318
 * Provides the cost of the match part (offset + matchLength) of a sequence.
319
 * Must be combined with ZSTD_fullLiteralsCost() to get the full cost of a sequence.
320
 * @offBase : sumtype, representing an offset or a repcode, and using numeric representation of ZSTD_storeSeq()
321
 * @optLevel: when <2, favors small offset for decompression speed (improved cache efficiency)
322
 */
323
FORCE_INLINE_TEMPLATE U32
324
ZSTD_getMatchPrice(U32 const offBase,
325
                   U32 const matchLength,
326
             const optState_t* const optPtr,
327
                   int const optLevel)
328
16.9G
{
329
16.9G
    U32 price;
330
16.9G
    U32 const offCode = ZSTD_highbit32(offBase);
331
16.9G
    U32 const mlBase = matchLength - MINMATCH;
332
16.9G
    assert(matchLength >= MINMATCH);
333
334
16.9G
    if (optPtr->priceType == zop_predef)  /* fixed scheme, does not use statistics */
335
0
        return WEIGHT(mlBase, optLevel)
336
0
             + ((16 + offCode) * BITCOST_MULTIPLIER); /* emulated offset cost */
337
338
    /* dynamic statistics */
339
16.9G
    price = (offCode * BITCOST_MULTIPLIER) + (optPtr->offCodeSumBasePrice - WEIGHT(optPtr->offCodeFreq[offCode], optLevel));
340
16.9G
    if ((optLevel<2) /*static*/ && offCode >= 20)
341
0
        price += (offCode-19)*2 * BITCOST_MULTIPLIER; /* handicap for long distance offsets, favor decompression speed */
342
343
    /* match Length */
344
16.9G
    {   U32 const mlCode = ZSTD_MLcode(mlBase);
345
16.9G
        price += (ML_bits[mlCode] * BITCOST_MULTIPLIER) + (optPtr->matchLengthSumBasePrice - WEIGHT(optPtr->matchLengthFreq[mlCode], optLevel));
346
16.9G
    }
347
348
16.9G
    price += BITCOST_MULTIPLIER / 5;   /* heuristic : make matches a bit more costly to favor less sequences -> faster decompression speed */
349
350
16.9G
    DEBUGLOG(8, "ZSTD_getMatchPrice(ml:%u) = %u", matchLength, price);
351
16.9G
    return price;
352
16.9G
}
353
354
/* ZSTD_updateStats() :
355
 * assumption : literals + litLength <= iend */
356
static void ZSTD_updateStats(optState_t* const optPtr,
357
                             U32 litLength, const BYTE* literals,
358
                             U32 offBase, U32 matchLength)
359
4.95M
{
360
    /* literals */
361
4.95M
    if (ZSTD_compressedLiterals(optPtr)) {
362
4.95M
        U32 u;
363
259M
        for (u=0; u < litLength; u++)
364
254M
            optPtr->litFreq[literals[u]] += ZSTD_LITFREQ_ADD;
365
4.95M
        optPtr->litSum += litLength*ZSTD_LITFREQ_ADD;
366
4.95M
    }
367
368
    /* literal Length */
369
4.95M
    {   U32 const llCode = ZSTD_LLcode(litLength);
370
4.95M
        optPtr->litLengthFreq[llCode]++;
371
4.95M
        optPtr->litLengthSum++;
372
4.95M
    }
373
374
    /* offset code : follows storeSeq() numeric representation */
375
4.95M
    {   U32 const offCode = ZSTD_highbit32(offBase);
376
4.95M
        assert(offCode <= MaxOff);
377
4.95M
        optPtr->offCodeFreq[offCode]++;
378
4.95M
        optPtr->offCodeSum++;
379
4.95M
    }
380
381
    /* match Length */
382
4.95M
    {   U32 const mlBase = matchLength - MINMATCH;
383
4.95M
        U32 const mlCode = ZSTD_MLcode(mlBase);
384
4.95M
        optPtr->matchLengthFreq[mlCode]++;
385
4.95M
        optPtr->matchLengthSum++;
386
4.95M
    }
387
4.95M
}
388
389
390
/* ZSTD_readMINMATCH() :
391
 * function safe only for comparisons
392
 * assumption : memPtr must be at least 4 bytes before end of buffer */
393
MEM_STATIC U32 ZSTD_readMINMATCH(const void* memPtr, U32 length)
394
2.19G
{
395
2.19G
    switch (length)
396
2.19G
    {
397
0
    default :
398
123M
    case 4 : return MEM_read32(memPtr);
399
2.07G
    case 3 : if (MEM_isLittleEndian())
400
2.07G
                return MEM_read32(memPtr)<<8;
401
0
             else
402
0
                return MEM_read32(memPtr)>>8;
403
2.19G
    }
404
2.19G
}
405
406
407
/* Update hashTable3 up to ip (excluded)
408
   Assumption : always within prefix (i.e. not within extDict) */
409
static
410
ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
411
U32 ZSTD_insertAndFindFirstIndexHash3 (const ZSTD_MatchState_t* ms,
412
                                       U32* nextToUpdate3,
413
                                       const BYTE* const ip)
414
269M
{
415
269M
    U32* const hashTable3 = ms->hashTable3;
416
269M
    U32 const hashLog3 = ms->hashLog3;
417
269M
    const BYTE* const base = ms->window.base;
418
269M
    U32 idx = *nextToUpdate3;
419
269M
    U32 const target = (U32)(ip - base);
420
269M
    size_t const hash3 = ZSTD_hash3Ptr(ip, hashLog3);
421
269M
    assert(hashLog3 > 0);
422
423
677M
    while(idx < target) {
424
407M
        hashTable3[ZSTD_hash3Ptr(base+idx, hashLog3)] = idx;
425
407M
        idx++;
426
407M
    }
427
428
269M
    *nextToUpdate3 = target;
429
269M
    return hashTable3[hash3];
430
269M
}
431
432
433
/*-*************************************
434
*  Binary Tree search
435
***************************************/
436
/** ZSTD_insertBt1() : add one or multiple positions to tree.
437
 * @param ip assumed <= iend-8 .
438
 * @param target The target of ZSTD_updateTree_internal() - we are filling to this position
439
 * @return : nb of positions added */
440
static
441
ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
442
U32 ZSTD_insertBt1(
443
                const ZSTD_MatchState_t* ms,
444
                const BYTE* const ip, const BYTE* const iend,
445
                U32 const target,
446
                U32 const mls, const int extDict)
447
11.7M
{
448
11.7M
    const ZSTD_compressionParameters* const cParams = &ms->cParams;
449
11.7M
    U32*   const hashTable = ms->hashTable;
450
11.7M
    U32    const hashLog = cParams->hashLog;
451
11.7M
    size_t const h  = ZSTD_hashPtr(ip, hashLog, mls);
452
11.7M
    U32*   const bt = ms->chainTable;
453
11.7M
    U32    const btLog  = cParams->chainLog - 1;
454
11.7M
    U32    const btMask = (1 << btLog) - 1;
455
11.7M
    U32 matchIndex = hashTable[h];
456
11.7M
    size_t commonLengthSmaller=0, commonLengthLarger=0;
457
11.7M
    const BYTE* const base = ms->window.base;
458
11.7M
    const BYTE* const dictBase = ms->window.dictBase;
459
11.7M
    const U32 dictLimit = ms->window.dictLimit;
460
11.7M
    const BYTE* const dictEnd = dictBase + dictLimit;
461
11.7M
    const BYTE* const prefixStart = base + dictLimit;
462
11.7M
    const BYTE* match;
463
11.7M
    const U32 curr = (U32)(ip-base);
464
11.7M
    const U32 btLow = btMask >= curr ? 0 : curr - btMask;
465
11.7M
    U32* smallerPtr = bt + 2*(curr&btMask);
466
11.7M
    U32* largerPtr  = smallerPtr + 1;
467
11.7M
    U32 dummy32;   /* to be nullified at the end */
468
    /* windowLow is based on target because
469
     * we only need positions that will be in the window at the end of the tree update.
470
     */
471
11.7M
    U32 const windowLow = ZSTD_getLowestMatchIndex(ms, target, cParams->windowLog);
472
11.7M
    U32 matchEndIdx = curr+8+1;
473
11.7M
    size_t bestLength = 8;
474
11.7M
    U32 nbCompares = 1U << cParams->searchLog;
475
#ifdef ZSTD_C_PREDICT
476
    U32 predictedSmall = *(bt + 2*((curr-1)&btMask) + 0);
477
    U32 predictedLarge = *(bt + 2*((curr-1)&btMask) + 1);
478
    predictedSmall += (predictedSmall>0);
479
    predictedLarge += (predictedLarge>0);
480
#endif /* ZSTD_C_PREDICT */
481
482
11.7M
    DEBUGLOG(8, "ZSTD_insertBt1 (%u)", curr);
483
484
11.7M
    assert(curr <= target);
485
11.7M
    assert(ip <= iend-8);   /* required for h calculation */
486
11.7M
    hashTable[h] = curr;   /* Update Hash Table */
487
488
11.7M
    assert(windowLow > 0);
489
43.5M
    for (; nbCompares && (matchIndex >= windowLow); --nbCompares) {
490
31.8M
        U32* const nextPtr = bt + 2*(matchIndex & btMask);
491
31.8M
        size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger);   /* guaranteed minimum nb of common bytes */
492
31.8M
        assert(matchIndex < curr);
493
494
#ifdef ZSTD_C_PREDICT   /* note : can create issues when hlog small <= 11 */
495
        const U32* predictPtr = bt + 2*((matchIndex-1) & btMask);   /* written this way, as bt is a roll buffer */
496
        if (matchIndex == predictedSmall) {
497
            /* no need to check length, result known */
498
            *smallerPtr = matchIndex;
499
            if (matchIndex <= btLow) { smallerPtr=&dummy32; break; }   /* beyond tree size, stop the search */
500
            smallerPtr = nextPtr+1;               /* new "smaller" => larger of match */
501
            matchIndex = nextPtr[1];              /* new matchIndex larger than previous (closer to current) */
502
            predictedSmall = predictPtr[1] + (predictPtr[1]>0);
503
            continue;
504
        }
505
        if (matchIndex == predictedLarge) {
506
            *largerPtr = matchIndex;
507
            if (matchIndex <= btLow) { largerPtr=&dummy32; break; }   /* beyond tree size, stop the search */
508
            largerPtr = nextPtr;
509
            matchIndex = nextPtr[0];
510
            predictedLarge = predictPtr[0] + (predictPtr[0]>0);
511
            continue;
512
        }
513
#endif
514
515
31.8M
        if (!extDict || (matchIndex+matchLength >= dictLimit)) {
516
31.8M
            assert(matchIndex+matchLength >= dictLimit);   /* might be wrong if actually extDict */
517
31.8M
            match = base + matchIndex;
518
31.8M
            matchLength += ZSTD_count(ip+matchLength, match+matchLength, iend);
519
31.8M
        } else {
520
0
            match = dictBase + matchIndex;
521
0
            matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iend, dictEnd, prefixStart);
522
0
            if (matchIndex+matchLength >= dictLimit)
523
0
                match = base + matchIndex;   /* to prepare for next usage of match[matchLength] */
524
0
        }
525
526
31.8M
        if (matchLength > bestLength) {
527
10.0M
            bestLength = matchLength;
528
10.0M
            if (matchLength > matchEndIdx - matchIndex)
529
191k
                matchEndIdx = matchIndex + (U32)matchLength;
530
10.0M
        }
531
532
31.8M
        if (ip+matchLength == iend) {   /* equal : no way to know if inf or sup */
533
124k
            break;   /* drop , to guarantee consistency ; miss a bit of compression, but other solutions can corrupt tree */
534
124k
        }
535
536
31.7M
        if (match[matchLength] < ip[matchLength]) {  /* necessarily within buffer */
537
            /* match is smaller than current */
538
14.5M
            *smallerPtr = matchIndex;             /* update smaller idx */
539
14.5M
            commonLengthSmaller = matchLength;    /* all smaller will now have at least this guaranteed common length */
540
14.5M
            if (matchIndex <= btLow) { smallerPtr=&dummy32; break; }   /* beyond tree size, stop searching */
541
14.5M
            smallerPtr = nextPtr+1;               /* new "candidate" => larger than match, which was smaller than target */
542
14.5M
            matchIndex = nextPtr[1];              /* new matchIndex, larger than previous and closer to current */
543
17.1M
        } else {
544
            /* match is larger than current */
545
17.1M
            *largerPtr = matchIndex;
546
17.1M
            commonLengthLarger = matchLength;
547
17.1M
            if (matchIndex <= btLow) { largerPtr=&dummy32; break; }   /* beyond tree size, stop searching */
548
17.1M
            largerPtr = nextPtr;
549
17.1M
            matchIndex = nextPtr[0];
550
17.1M
    }   }
551
552
11.7M
    *smallerPtr = *largerPtr = 0;
553
11.7M
    {   U32 positions = 0;
554
11.7M
        if (bestLength > 384) positions = MIN(192, (U32)(bestLength - 384));   /* speed optimization */
555
11.7M
        assert(matchEndIdx > curr + 8);
556
11.7M
        return MAX(positions, matchEndIdx - (curr + 8));
557
11.7M
    }
558
11.7M
}
559
560
FORCE_INLINE_TEMPLATE
561
ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
562
void ZSTD_updateTree_internal(
563
                ZSTD_MatchState_t* ms,
564
                const BYTE* const ip, const BYTE* const iend,
565
                const U32 mls, const ZSTD_dictMode_e dictMode)
566
382M
{
567
382M
    const BYTE* const base = ms->window.base;
568
382M
    U32 const target = (U32)(ip - base);
569
382M
    U32 idx = ms->nextToUpdate;
570
382M
    DEBUGLOG(7, "ZSTD_updateTree_internal, from %u to %u  (dictMode:%u)",
571
382M
                idx, target, dictMode);
572
573
394M
    while(idx < target) {
574
11.7M
        U32 const forward = ZSTD_insertBt1(ms, base+idx, iend, target, mls, dictMode == ZSTD_extDict);
575
11.7M
        assert(idx < (U32)(idx + forward));
576
11.7M
        idx += forward;
577
11.7M
    }
578
382M
    assert((size_t)(ip - base) <= (size_t)(U32)(-1));
579
382M
    assert((size_t)(iend - base) <= (size_t)(U32)(-1));
580
382M
    ms->nextToUpdate = target;
581
382M
}
582
583
0
void ZSTD_updateTree(ZSTD_MatchState_t* ms, const BYTE* ip, const BYTE* iend) {
584
0
    ZSTD_updateTree_internal(ms, ip, iend, ms->cParams.minMatch, ZSTD_noDict);
585
0
}
586
587
FORCE_INLINE_TEMPLATE
588
ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
589
U32
590
ZSTD_insertBtAndGetAllMatches (
591
                ZSTD_match_t* matches,  /* store result (found matches) in this table (presumed large enough) */
592
                ZSTD_MatchState_t* ms,
593
                U32* nextToUpdate3,
594
                const BYTE* const ip, const BYTE* const iLimit,
595
                const ZSTD_dictMode_e dictMode,
596
                const U32 rep[ZSTD_REP_NUM],
597
                const U32 ll0,  /* tells if associated literal length is 0 or not. This value must be 0 or 1 */
598
                const U32 lengthToBeat,
599
                const U32 mls /* template */)
600
382M
{
601
382M
    const ZSTD_compressionParameters* const cParams = &ms->cParams;
602
382M
    U32 const sufficient_len = MIN(cParams->targetLength, ZSTD_OPT_NUM -1);
603
382M
    const BYTE* const base = ms->window.base;
604
382M
    U32 const curr = (U32)(ip-base);
605
382M
    U32 const hashLog = cParams->hashLog;
606
382M
    U32 const minMatch = (mls==3) ? 3 : 4;
607
382M
    U32* const hashTable = ms->hashTable;
608
382M
    size_t const h  = ZSTD_hashPtr(ip, hashLog, mls);
609
382M
    U32 matchIndex  = hashTable[h];
610
382M
    U32* const bt   = ms->chainTable;
611
382M
    U32 const btLog = cParams->chainLog - 1;
612
382M
    U32 const btMask= (1U << btLog) - 1;
613
382M
    size_t commonLengthSmaller=0, commonLengthLarger=0;
614
382M
    const BYTE* const dictBase = ms->window.dictBase;
615
382M
    U32 const dictLimit = ms->window.dictLimit;
616
382M
    const BYTE* const dictEnd = dictBase + dictLimit;
617
382M
    const BYTE* const prefixStart = base + dictLimit;
618
382M
    U32 const btLow = (btMask >= curr) ? 0 : curr - btMask;
619
382M
    U32 const windowLow = ZSTD_getLowestMatchIndex(ms, curr, cParams->windowLog);
620
382M
    U32 const matchLow = windowLow ? windowLow : 1;
621
382M
    U32* smallerPtr = bt + 2*(curr&btMask);
622
382M
    U32* largerPtr  = bt + 2*(curr&btMask) + 1;
623
382M
    U32 matchEndIdx = curr+8+1;   /* farthest referenced position of any match => detects repetitive patterns */
624
382M
    U32 dummy32;   /* to be nullified at the end */
625
382M
    U32 mnum = 0;
626
382M
    U32 nbCompares = 1U << cParams->searchLog;
627
628
382M
    const ZSTD_MatchState_t* dms    = dictMode == ZSTD_dictMatchState ? ms->dictMatchState : NULL;
629
382M
    const ZSTD_compressionParameters* const dmsCParams =
630
382M
                                      dictMode == ZSTD_dictMatchState ? &dms->cParams : NULL;
631
382M
    const BYTE* const dmsBase       = dictMode == ZSTD_dictMatchState ? dms->window.base : NULL;
632
382M
    const BYTE* const dmsEnd        = dictMode == ZSTD_dictMatchState ? dms->window.nextSrc : NULL;
633
382M
    U32         const dmsHighLimit  = dictMode == ZSTD_dictMatchState ? (U32)(dmsEnd - dmsBase) : 0;
634
382M
    U32         const dmsLowLimit   = dictMode == ZSTD_dictMatchState ? dms->window.lowLimit : 0;
635
382M
    U32         const dmsIndexDelta = dictMode == ZSTD_dictMatchState ? windowLow - dmsHighLimit : 0;
636
382M
    U32         const dmsHashLog    = dictMode == ZSTD_dictMatchState ? dmsCParams->hashLog : hashLog;
637
382M
    U32         const dmsBtLog      = dictMode == ZSTD_dictMatchState ? dmsCParams->chainLog - 1 : btLog;
638
382M
    U32         const dmsBtMask     = dictMode == ZSTD_dictMatchState ? (1U << dmsBtLog) - 1 : 0;
639
382M
    U32         const dmsBtLow      = dictMode == ZSTD_dictMatchState && dmsBtMask < dmsHighLimit - dmsLowLimit ? dmsHighLimit - dmsBtMask : dmsLowLimit;
640
641
382M
    size_t bestLength = lengthToBeat-1;
642
382M
    DEBUGLOG(8, "ZSTD_insertBtAndGetAllMatches: current=%u", curr);
643
644
    /* check repCode */
645
382M
    assert(ll0 <= 1);   /* necessarily 1 or 0 */
646
382M
    {   U32 const lastR = ZSTD_REP_NUM + ll0;
647
382M
        U32 repCode;
648
1.53G
        for (repCode = ll0; repCode < lastR; repCode++) {
649
1.14G
            U32 const repOffset = (repCode==ZSTD_REP_NUM) ? (rep[0] - 1) : rep[repCode];
650
1.14G
            U32 const repIndex = curr - repOffset;
651
1.14G
            U32 repLen = 0;
652
1.14G
            assert(curr >= dictLimit);
653
1.14G
            if (repOffset-1 /* intentional overflow, discards 0 and -1 */ < curr-dictLimit) {  /* equivalent to `curr > repIndex >= dictLimit` */
654
                /* We must validate the repcode offset because when we're using a dictionary the
655
                 * valid offset range shrinks when the dictionary goes out of bounds.
656
                 */
657
1.09G
                if ((repIndex >= windowLow) & (ZSTD_readMINMATCH(ip, minMatch) == ZSTD_readMINMATCH(ip - repOffset, minMatch))) {
658
202M
                    repLen = (U32)ZSTD_count(ip+minMatch, ip+minMatch-repOffset, iLimit) + minMatch;
659
202M
                }
660
1.09G
            } else {  /* repIndex < dictLimit || repIndex >= curr */
661
49.8M
                const BYTE* const repMatch = dictMode == ZSTD_dictMatchState ?
662
0
                                             dmsBase + repIndex - dmsIndexDelta :
663
49.8M
                                             dictBase + repIndex;
664
49.8M
                assert(curr >= windowLow);
665
49.8M
                if ( dictMode == ZSTD_extDict
666
49.8M
                  && ( ((repOffset-1) /*intentional overflow*/ < curr - windowLow)  /* equivalent to `curr > repIndex >= windowLow` */
667
0
                     & (ZSTD_index_overlap_check(dictLimit, repIndex)) )
668
49.8M
                  && (ZSTD_readMINMATCH(ip, minMatch) == ZSTD_readMINMATCH(repMatch, minMatch)) ) {
669
0
                    repLen = (U32)ZSTD_count_2segments(ip+minMatch, repMatch+minMatch, iLimit, dictEnd, prefixStart) + minMatch;
670
0
                }
671
49.8M
                if (dictMode == ZSTD_dictMatchState
672
49.8M
                  && ( ((repOffset-1) /*intentional overflow*/ < curr - (dmsLowLimit + dmsIndexDelta))  /* equivalent to `curr > repIndex >= dmsLowLimit` */
673
0
                     & (ZSTD_index_overlap_check(dictLimit, repIndex)) )
674
49.8M
                  && (ZSTD_readMINMATCH(ip, minMatch) == ZSTD_readMINMATCH(repMatch, minMatch)) ) {
675
0
                    repLen = (U32)ZSTD_count_2segments(ip+minMatch, repMatch+minMatch, iLimit, dmsEnd, prefixStart) + minMatch;
676
0
            }   }
677
            /* save longer solution */
678
1.14G
            if (repLen > bestLength) {
679
101M
                DEBUGLOG(8, "found repCode %u (ll0:%u, offset:%u) of length %u",
680
101M
                            repCode, ll0, repOffset, repLen);
681
101M
                bestLength = repLen;
682
101M
                matches[mnum].off = REPCODE_TO_OFFBASE(repCode - ll0 + 1);  /* expect value between 1 and 3 */
683
101M
                matches[mnum].len = (U32)repLen;
684
101M
                mnum++;
685
101M
                if ( (repLen > sufficient_len)
686
101M
                   | (ip+repLen == iLimit) ) {  /* best possible */
687
145k
                    return mnum;
688
145k
    }   }   }   }
689
690
    /* HC3 match finder */
691
382M
    if ((mls == 3) /*static*/ && (bestLength < mls)) {
692
269M
        U32 const matchIndex3 = ZSTD_insertAndFindFirstIndexHash3(ms, nextToUpdate3, ip);
693
269M
        if ((matchIndex3 >= matchLow)
694
269M
          & (curr - matchIndex3 < (1<<18)) /*heuristic : longer distance likely too expensive*/ ) {
695
105M
            size_t mlen;
696
105M
            if ((dictMode == ZSTD_noDict) /*static*/ || (dictMode == ZSTD_dictMatchState) /*static*/ || (matchIndex3 >= dictLimit)) {
697
105M
                const BYTE* const match = base + matchIndex3;
698
105M
                mlen = ZSTD_count(ip, match, iLimit);
699
105M
            } else {
700
0
                const BYTE* const match = dictBase + matchIndex3;
701
0
                mlen = ZSTD_count_2segments(ip, match, iLimit, dictEnd, prefixStart);
702
0
            }
703
704
            /* save best solution */
705
105M
            if (mlen >= mls /* == 3 > bestLength */) {
706
18.0M
                DEBUGLOG(8, "found small match with hlog3, of length %u",
707
18.0M
                            (U32)mlen);
708
18.0M
                bestLength = mlen;
709
18.0M
                assert(curr > matchIndex3);
710
18.0M
                assert(mnum==0);  /* no prior solution */
711
18.0M
                matches[0].off = OFFSET_TO_OFFBASE(curr - matchIndex3);
712
18.0M
                matches[0].len = (U32)mlen;
713
18.0M
                mnum = 1;
714
18.0M
                if ( (mlen > sufficient_len) |
715
18.0M
                     (ip+mlen == iLimit) ) {  /* best possible length */
716
20.0k
                    ms->nextToUpdate = curr+1;  /* skip insertion */
717
20.0k
                    return 1;
718
20.0k
        }   }   }
719
        /* no dictMatchState lookup: dicts don't have a populated HC3 table */
720
269M
    }  /* if (mls == 3) */
721
722
382M
    hashTable[h] = curr;   /* Update Hash Table */
723
724
8.33G
    for (; nbCompares && (matchIndex >= matchLow); --nbCompares) {
725
7.95G
        U32* const nextPtr = bt + 2*(matchIndex & btMask);
726
7.95G
        const BYTE* match;
727
7.95G
        size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger);   /* guaranteed minimum nb of common bytes */
728
7.95G
        assert(curr > matchIndex);
729
730
7.95G
        if ((dictMode == ZSTD_noDict) || (dictMode == ZSTD_dictMatchState) || (matchIndex+matchLength >= dictLimit)) {
731
7.95G
            assert(matchIndex+matchLength >= dictLimit);  /* ensure the condition is correct when !extDict */
732
7.95G
            match = base + matchIndex;
733
7.95G
            if (matchIndex >= dictLimit) assert(memcmp(match, ip, matchLength) == 0);  /* ensure early section of match is equal as expected */
734
7.95G
            matchLength += ZSTD_count(ip+matchLength, match+matchLength, iLimit);
735
7.95G
        } else {
736
0
            match = dictBase + matchIndex;
737
0
            assert(memcmp(match, ip, matchLength) == 0);  /* ensure early section of match is equal as expected */
738
0
            matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iLimit, dictEnd, prefixStart);
739
0
            if (matchIndex+matchLength >= dictLimit)
740
0
                match = base + matchIndex;   /* prepare for match[matchLength] read */
741
0
        }
742
743
7.95G
        if (matchLength > bestLength) {
744
82.5M
            DEBUGLOG(8, "found match of length %u at distance %u (offBase=%u)",
745
82.5M
                    (U32)matchLength, curr - matchIndex, OFFSET_TO_OFFBASE(curr - matchIndex));
746
82.5M
            assert(matchEndIdx > matchIndex);
747
82.5M
            if (matchLength > matchEndIdx - matchIndex)
748
240k
                matchEndIdx = matchIndex + (U32)matchLength;
749
82.5M
            bestLength = matchLength;
750
82.5M
            matches[mnum].off = OFFSET_TO_OFFBASE(curr - matchIndex);
751
82.5M
            matches[mnum].len = (U32)matchLength;
752
82.5M
            mnum++;
753
82.5M
            if ( (matchLength > ZSTD_OPT_NUM)
754
82.5M
               | (ip+matchLength == iLimit) /* equal : no way to know if inf or sup */) {
755
29.4k
                if (dictMode == ZSTD_dictMatchState) nbCompares = 0; /* break should also skip searching dms */
756
29.4k
                break; /* drop, to preserve bt consistency (miss a little bit of compression) */
757
29.4k
        }   }
758
759
7.95G
        if (match[matchLength] < ip[matchLength]) {
760
            /* match smaller than current */
761
3.31G
            *smallerPtr = matchIndex;             /* update smaller idx */
762
3.31G
            commonLengthSmaller = matchLength;    /* all smaller will now have at least this guaranteed common length */
763
3.31G
            if (matchIndex <= btLow) { smallerPtr=&dummy32; break; }   /* beyond tree size, stop the search */
764
3.31G
            smallerPtr = nextPtr+1;               /* new candidate => larger than match, which was smaller than current */
765
3.31G
            matchIndex = nextPtr[1];              /* new matchIndex, larger than previous, closer to current */
766
4.63G
        } else {
767
4.63G
            *largerPtr = matchIndex;
768
4.63G
            commonLengthLarger = matchLength;
769
4.63G
            if (matchIndex <= btLow) { largerPtr=&dummy32; break; }   /* beyond tree size, stop the search */
770
4.63G
            largerPtr = nextPtr;
771
4.63G
            matchIndex = nextPtr[0];
772
4.63G
    }   }
773
774
382M
    *smallerPtr = *largerPtr = 0;
775
776
382M
    assert(nbCompares <= (1U << ZSTD_SEARCHLOG_MAX)); /* Check we haven't underflowed. */
777
382M
    if (dictMode == ZSTD_dictMatchState && nbCompares) {
778
0
        size_t const dmsH = ZSTD_hashPtr(ip, dmsHashLog, mls);
779
0
        U32 dictMatchIndex = dms->hashTable[dmsH];
780
0
        const U32* const dmsBt = dms->chainTable;
781
0
        commonLengthSmaller = commonLengthLarger = 0;
782
0
        for (; nbCompares && (dictMatchIndex > dmsLowLimit); --nbCompares) {
783
0
            const U32* const nextPtr = dmsBt + 2*(dictMatchIndex & dmsBtMask);
784
0
            size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger);   /* guaranteed minimum nb of common bytes */
785
0
            const BYTE* match = dmsBase + dictMatchIndex;
786
0
            matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iLimit, dmsEnd, prefixStart);
787
0
            if (dictMatchIndex+matchLength >= dmsHighLimit)
788
0
                match = base + dictMatchIndex + dmsIndexDelta;   /* to prepare for next usage of match[matchLength] */
789
790
0
            if (matchLength > bestLength) {
791
0
                matchIndex = dictMatchIndex + dmsIndexDelta;
792
0
                DEBUGLOG(8, "found dms match of length %u at distance %u (offBase=%u)",
793
0
                        (U32)matchLength, curr - matchIndex, OFFSET_TO_OFFBASE(curr - matchIndex));
794
0
                if (matchLength > matchEndIdx - matchIndex)
795
0
                    matchEndIdx = matchIndex + (U32)matchLength;
796
0
                bestLength = matchLength;
797
0
                matches[mnum].off = OFFSET_TO_OFFBASE(curr - matchIndex);
798
0
                matches[mnum].len = (U32)matchLength;
799
0
                mnum++;
800
0
                if ( (matchLength > ZSTD_OPT_NUM)
801
0
                   | (ip+matchLength == iLimit) /* equal : no way to know if inf or sup */) {
802
0
                    break;   /* drop, to guarantee consistency (miss a little bit of compression) */
803
0
            }   }
804
805
0
            if (dictMatchIndex <= dmsBtLow) { break; }   /* beyond tree size, stop the search */
806
0
            if (match[matchLength] < ip[matchLength]) {
807
0
                commonLengthSmaller = matchLength;    /* all smaller will now have at least this guaranteed common length */
808
0
                dictMatchIndex = nextPtr[1];              /* new matchIndex larger than previous (closer to current) */
809
0
            } else {
810
                /* match is larger than current */
811
0
                commonLengthLarger = matchLength;
812
0
                dictMatchIndex = nextPtr[0];
813
0
    }   }   }  /* if (dictMode == ZSTD_dictMatchState) */
814
815
382M
    assert(matchEndIdx > curr+8);
816
382M
    ms->nextToUpdate = matchEndIdx - 8;  /* skip repetitive patterns */
817
382M
    return mnum;
818
382M
}
819
820
typedef U32 (*ZSTD_getAllMatchesFn)(
821
    ZSTD_match_t*,
822
    ZSTD_MatchState_t*,
823
    U32*,
824
    const BYTE*,
825
    const BYTE*,
826
    const U32 rep[ZSTD_REP_NUM],
827
    U32 const ll0,
828
    U32 const lengthToBeat);
829
830
FORCE_INLINE_TEMPLATE
831
ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
832
U32 ZSTD_btGetAllMatches_internal(
833
        ZSTD_match_t* matches,
834
        ZSTD_MatchState_t* ms,
835
        U32* nextToUpdate3,
836
        const BYTE* ip,
837
        const BYTE* const iHighLimit,
838
        const U32 rep[ZSTD_REP_NUM],
839
        U32 const ll0,
840
        U32 const lengthToBeat,
841
        const ZSTD_dictMode_e dictMode,
842
        const U32 mls)
843
404M
{
844
404M
    assert(BOUNDED(3, ms->cParams.minMatch, 6) == mls);
845
404M
    DEBUGLOG(8, "ZSTD_BtGetAllMatches(dictMode=%d, mls=%u)", (int)dictMode, mls);
846
404M
    if (ip < ms->window.base + ms->nextToUpdate)
847
22.0M
        return 0;   /* skipped area */
848
382M
    ZSTD_updateTree_internal(ms, ip, iHighLimit, mls, dictMode);
849
382M
    return ZSTD_insertBtAndGetAllMatches(matches, ms, nextToUpdate3, ip, iHighLimit, dictMode, rep, ll0, lengthToBeat, mls);
850
404M
}
851
852
1.49M
#define ZSTD_BT_GET_ALL_MATCHES_FN(dictMode, mls) ZSTD_btGetAllMatches_##dictMode##_##mls
853
854
#define GEN_ZSTD_BT_GET_ALL_MATCHES_(dictMode, mls)            \
855
    static U32 ZSTD_BT_GET_ALL_MATCHES_FN(dictMode, mls)(      \
856
            ZSTD_match_t* matches,                             \
857
            ZSTD_MatchState_t* ms,                             \
858
            U32* nextToUpdate3,                                \
859
            const BYTE* ip,                                    \
860
            const BYTE* const iHighLimit,                      \
861
            const U32 rep[ZSTD_REP_NUM],                       \
862
            U32 const ll0,                                     \
863
            U32 const lengthToBeat)                            \
864
404M
    {                                                          \
865
404M
        return ZSTD_btGetAllMatches_internal(                  \
866
404M
                matches, ms, nextToUpdate3, ip, iHighLimit,    \
867
404M
                rep, ll0, lengthToBeat, ZSTD_##dictMode, mls); \
868
404M
    }
zstd_opt.c:ZSTD_btGetAllMatches_noDict_3
Line
Count
Source
864
384M
    {                                                          \
865
384M
        return ZSTD_btGetAllMatches_internal(                  \
866
384M
                matches, ms, nextToUpdate3, ip, iHighLimit,    \
867
384M
                rep, ll0, lengthToBeat, ZSTD_##dictMode, mls); \
868
384M
    }
zstd_opt.c:ZSTD_btGetAllMatches_noDict_4
Line
Count
Source
864
20.7M
    {                                                          \
865
20.7M
        return ZSTD_btGetAllMatches_internal(                  \
866
20.7M
                matches, ms, nextToUpdate3, ip, iHighLimit,    \
867
20.7M
                rep, ll0, lengthToBeat, ZSTD_##dictMode, mls); \
868
20.7M
    }
Unexecuted instantiation: zstd_opt.c:ZSTD_btGetAllMatches_noDict_5
Unexecuted instantiation: zstd_opt.c:ZSTD_btGetAllMatches_noDict_6
Unexecuted instantiation: zstd_opt.c:ZSTD_btGetAllMatches_extDict_3
Unexecuted instantiation: zstd_opt.c:ZSTD_btGetAllMatches_extDict_4
Unexecuted instantiation: zstd_opt.c:ZSTD_btGetAllMatches_extDict_5
Unexecuted instantiation: zstd_opt.c:ZSTD_btGetAllMatches_extDict_6
Unexecuted instantiation: zstd_opt.c:ZSTD_btGetAllMatches_dictMatchState_3
Unexecuted instantiation: zstd_opt.c:ZSTD_btGetAllMatches_dictMatchState_4
Unexecuted instantiation: zstd_opt.c:ZSTD_btGetAllMatches_dictMatchState_5
Unexecuted instantiation: zstd_opt.c:ZSTD_btGetAllMatches_dictMatchState_6
869
870
#define GEN_ZSTD_BT_GET_ALL_MATCHES(dictMode)  \
871
    GEN_ZSTD_BT_GET_ALL_MATCHES_(dictMode, 3)  \
872
    GEN_ZSTD_BT_GET_ALL_MATCHES_(dictMode, 4)  \
873
    GEN_ZSTD_BT_GET_ALL_MATCHES_(dictMode, 5)  \
874
    GEN_ZSTD_BT_GET_ALL_MATCHES_(dictMode, 6)
875
876
GEN_ZSTD_BT_GET_ALL_MATCHES(noDict)
877
GEN_ZSTD_BT_GET_ALL_MATCHES(extDict)
878
GEN_ZSTD_BT_GET_ALL_MATCHES(dictMatchState)
879
880
#define ZSTD_BT_GET_ALL_MATCHES_ARRAY(dictMode)  \
881
373k
    {                                            \
882
373k
        ZSTD_BT_GET_ALL_MATCHES_FN(dictMode, 3), \
883
373k
        ZSTD_BT_GET_ALL_MATCHES_FN(dictMode, 4), \
884
373k
        ZSTD_BT_GET_ALL_MATCHES_FN(dictMode, 5), \
885
373k
        ZSTD_BT_GET_ALL_MATCHES_FN(dictMode, 6)  \
886
373k
    }
887
888
static ZSTD_getAllMatchesFn
889
ZSTD_selectBtGetAllMatches(ZSTD_MatchState_t const* ms, ZSTD_dictMode_e const dictMode)
890
124k
{
891
124k
    ZSTD_getAllMatchesFn const getAllMatchesFns[3][4] = {
892
124k
        ZSTD_BT_GET_ALL_MATCHES_ARRAY(noDict),
893
124k
        ZSTD_BT_GET_ALL_MATCHES_ARRAY(extDict),
894
124k
        ZSTD_BT_GET_ALL_MATCHES_ARRAY(dictMatchState)
895
124k
    };
896
124k
    U32 const mls = BOUNDED(3, ms->cParams.minMatch, 6);
897
124k
    assert((U32)dictMode < 3);
898
124k
    assert(mls - 3 < 4);
899
124k
    return getAllMatchesFns[(int)dictMode][mls - 3];
900
124k
}
901
902
/*************************
903
*  LDM helper functions  *
904
*************************/
905
906
/* Struct containing info needed to make decision about ldm inclusion */
907
typedef struct {
908
    RawSeqStore_t seqStore;   /* External match candidates store for this block */
909
    U32 startPosInBlock;      /* Start position of the current match candidate */
910
    U32 endPosInBlock;        /* End position of the current match candidate */
911
    U32 offset;               /* Offset of the match candidate */
912
} ZSTD_optLdm_t;
913
914
/* ZSTD_optLdm_skipRawSeqStoreBytes():
915
 * Moves forward in @rawSeqStore by @nbBytes,
916
 * which will update the fields 'pos' and 'posInSequence'.
917
 */
918
static void ZSTD_optLdm_skipRawSeqStoreBytes(RawSeqStore_t* rawSeqStore, size_t nbBytes)
919
0
{
920
0
    U32 currPos = (U32)(rawSeqStore->posInSequence + nbBytes);
921
0
    while (currPos && rawSeqStore->pos < rawSeqStore->size) {
922
0
        rawSeq currSeq = rawSeqStore->seq[rawSeqStore->pos];
923
0
        if (currPos >= currSeq.litLength + currSeq.matchLength) {
924
0
            currPos -= currSeq.litLength + currSeq.matchLength;
925
0
            rawSeqStore->pos++;
926
0
        } else {
927
0
            rawSeqStore->posInSequence = currPos;
928
0
            break;
929
0
        }
930
0
    }
931
0
    if (currPos == 0 || rawSeqStore->pos == rawSeqStore->size) {
932
0
        rawSeqStore->posInSequence = 0;
933
0
    }
934
0
}
935
936
/* ZSTD_opt_getNextMatchAndUpdateSeqStore():
937
 * Calculates the beginning and end of the next match in the current block.
938
 * Updates 'pos' and 'posInSequence' of the ldmSeqStore.
939
 */
940
static void
941
ZSTD_opt_getNextMatchAndUpdateSeqStore(ZSTD_optLdm_t* optLdm, U32 currPosInBlock,
942
                                       U32 blockBytesRemaining)
943
124k
{
944
124k
    rawSeq currSeq;
945
124k
    U32 currBlockEndPos;
946
124k
    U32 literalsBytesRemaining;
947
124k
    U32 matchBytesRemaining;
948
949
    /* Setting match end position to MAX to ensure we never use an LDM during this block */
950
124k
    if (optLdm->seqStore.size == 0 || optLdm->seqStore.pos >= optLdm->seqStore.size) {
951
124k
        optLdm->startPosInBlock = UINT_MAX;
952
124k
        optLdm->endPosInBlock = UINT_MAX;
953
124k
        return;
954
124k
    }
955
    /* Calculate appropriate bytes left in matchLength and litLength
956
     * after adjusting based on ldmSeqStore->posInSequence */
957
0
    currSeq = optLdm->seqStore.seq[optLdm->seqStore.pos];
958
0
    assert(optLdm->seqStore.posInSequence <= currSeq.litLength + currSeq.matchLength);
959
0
    currBlockEndPos = currPosInBlock + blockBytesRemaining;
960
0
    literalsBytesRemaining = (optLdm->seqStore.posInSequence < currSeq.litLength) ?
961
0
            currSeq.litLength - (U32)optLdm->seqStore.posInSequence :
962
0
            0;
963
0
    matchBytesRemaining = (literalsBytesRemaining == 0) ?
964
0
            currSeq.matchLength - ((U32)optLdm->seqStore.posInSequence - currSeq.litLength) :
965
0
            currSeq.matchLength;
966
967
    /* If there are more literal bytes than bytes remaining in block, no ldm is possible */
968
0
    if (literalsBytesRemaining >= blockBytesRemaining) {
969
0
        optLdm->startPosInBlock = UINT_MAX;
970
0
        optLdm->endPosInBlock = UINT_MAX;
971
0
        ZSTD_optLdm_skipRawSeqStoreBytes(&optLdm->seqStore, blockBytesRemaining);
972
0
        return;
973
0
    }
974
975
    /* Matches may be < minMatch by this process. In that case, we will reject them
976
       when we are deciding whether or not to add the ldm */
977
0
    optLdm->startPosInBlock = currPosInBlock + literalsBytesRemaining;
978
0
    optLdm->endPosInBlock = optLdm->startPosInBlock + matchBytesRemaining;
979
0
    optLdm->offset = currSeq.offset;
980
981
0
    if (optLdm->endPosInBlock > currBlockEndPos) {
982
        /* Match ends after the block ends, we can't use the whole match */
983
0
        optLdm->endPosInBlock = currBlockEndPos;
984
0
        ZSTD_optLdm_skipRawSeqStoreBytes(&optLdm->seqStore, currBlockEndPos - currPosInBlock);
985
0
    } else {
986
        /* Consume nb of bytes equal to size of sequence left */
987
0
        ZSTD_optLdm_skipRawSeqStoreBytes(&optLdm->seqStore, literalsBytesRemaining + matchBytesRemaining);
988
0
    }
989
0
}
990
991
/* ZSTD_optLdm_maybeAddMatch():
992
 * Adds a match if it's long enough,
993
 * based on it's 'matchStartPosInBlock' and 'matchEndPosInBlock',
994
 * into 'matches'. Maintains the correct ordering of 'matches'.
995
 */
996
static void ZSTD_optLdm_maybeAddMatch(ZSTD_match_t* matches, U32* nbMatches,
997
                                      const ZSTD_optLdm_t* optLdm, U32 currPosInBlock,
998
                                      U32 minMatch)
999
0
{
1000
0
    U32 const posDiff = currPosInBlock - optLdm->startPosInBlock;
1001
    /* Note: ZSTD_match_t actually contains offBase and matchLength (before subtracting MINMATCH) */
1002
0
    U32 const candidateMatchLength = optLdm->endPosInBlock - optLdm->startPosInBlock - posDiff;
1003
1004
    /* Ensure that current block position is not outside of the match */
1005
0
    if (currPosInBlock < optLdm->startPosInBlock
1006
0
      || currPosInBlock >= optLdm->endPosInBlock
1007
0
      || candidateMatchLength < minMatch) {
1008
0
        return;
1009
0
    }
1010
1011
0
    if (*nbMatches == 0 || ((candidateMatchLength > matches[*nbMatches-1].len) && *nbMatches < ZSTD_OPT_NUM)) {
1012
0
        U32 const candidateOffBase = OFFSET_TO_OFFBASE(optLdm->offset);
1013
0
        DEBUGLOG(6, "ZSTD_optLdm_maybeAddMatch(): Adding ldm candidate match (offBase: %u matchLength %u) at block position=%u",
1014
0
                 candidateOffBase, candidateMatchLength, currPosInBlock);
1015
0
        matches[*nbMatches].len = candidateMatchLength;
1016
0
        matches[*nbMatches].off = candidateOffBase;
1017
0
        (*nbMatches)++;
1018
0
    }
1019
0
}
1020
1021
/* ZSTD_optLdm_processMatchCandidate():
1022
 * Wrapper function to update ldm seq store and call ldm functions as necessary.
1023
 */
1024
static void
1025
ZSTD_optLdm_processMatchCandidate(ZSTD_optLdm_t* optLdm,
1026
                                  ZSTD_match_t* matches, U32* nbMatches,
1027
                                  U32 currPosInBlock, U32 remainingBytes,
1028
                                  U32 minMatch)
1029
404M
{
1030
404M
    if (optLdm->seqStore.size == 0 || optLdm->seqStore.pos >= optLdm->seqStore.size) {
1031
404M
        return;
1032
404M
    }
1033
1034
0
    if (currPosInBlock >= optLdm->endPosInBlock) {
1035
0
        if (currPosInBlock > optLdm->endPosInBlock) {
1036
            /* The position at which ZSTD_optLdm_processMatchCandidate() is called is not necessarily
1037
             * at the end of a match from the ldm seq store, and will often be some bytes
1038
             * over beyond matchEndPosInBlock. As such, we need to correct for these "overshoots"
1039
             */
1040
0
            U32 const posOvershoot = currPosInBlock - optLdm->endPosInBlock;
1041
0
            ZSTD_optLdm_skipRawSeqStoreBytes(&optLdm->seqStore, posOvershoot);
1042
0
        }
1043
0
        ZSTD_opt_getNextMatchAndUpdateSeqStore(optLdm, currPosInBlock, remainingBytes);
1044
0
    }
1045
0
    ZSTD_optLdm_maybeAddMatch(matches, nbMatches, optLdm, currPosInBlock, minMatch);
1046
0
}
1047
1048
1049
/*-*******************************
1050
*  Optimal parser
1051
*********************************/
1052
1053
#if 0 /* debug */
1054
1055
static void
1056
listStats(const U32* table, int lastEltID)
1057
{
1058
    int const nbElts = lastEltID + 1;
1059
    int enb;
1060
    for (enb=0; enb < nbElts; enb++) {
1061
        (void)table;
1062
        /* RAWLOG(2, "%3i:%3i,  ", enb, table[enb]); */
1063
        RAWLOG(2, "%4i,", table[enb]);
1064
    }
1065
    RAWLOG(2, " \n");
1066
}
1067
1068
#endif
1069
1070
158M
#define LIT_PRICE(_p) (int)ZSTD_rawLiteralsCost(_p, 1, optStatePtr, optLevel)
1071
551M
#define LL_PRICE(_l) (int)ZSTD_litLengthPrice(_l, optStatePtr, optLevel)
1072
167M
#define LL_INCPRICE(_l) (LL_PRICE(_l) - LL_PRICE(_l-1))
1073
1074
FORCE_INLINE_TEMPLATE
1075
ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
1076
size_t
1077
ZSTD_compressBlock_opt_generic(ZSTD_MatchState_t* ms,
1078
                               SeqStore_t* seqStore,
1079
                               U32 rep[ZSTD_REP_NUM],
1080
                         const void* src, size_t srcSize,
1081
                         const int optLevel,
1082
                         const ZSTD_dictMode_e dictMode)
1083
124k
{
1084
124k
    optState_t* const optStatePtr = &ms->opt;
1085
124k
    const BYTE* const istart = (const BYTE*)src;
1086
124k
    const BYTE* ip = istart;
1087
124k
    const BYTE* anchor = istart;
1088
124k
    const BYTE* const iend = istart + srcSize;
1089
124k
    const BYTE* const ilimit = iend - 8;
1090
124k
    const BYTE* const base = ms->window.base;
1091
124k
    const BYTE* const prefixStart = base + ms->window.dictLimit;
1092
124k
    const ZSTD_compressionParameters* const cParams = &ms->cParams;
1093
1094
124k
    ZSTD_getAllMatchesFn getAllMatches = ZSTD_selectBtGetAllMatches(ms, dictMode);
1095
1096
124k
    U32 const sufficient_len = MIN(cParams->targetLength, ZSTD_OPT_NUM -1);
1097
124k
    U32 const minMatch = (cParams->minMatch == 3) ? 3 : 4;
1098
124k
    U32 nextToUpdate3 = ms->nextToUpdate;
1099
1100
124k
    ZSTD_optimal_t* const opt = optStatePtr->priceTable;
1101
124k
    ZSTD_match_t* const matches = optStatePtr->matchTable;
1102
124k
    ZSTD_optimal_t lastStretch;
1103
124k
    ZSTD_optLdm_t optLdm;
1104
1105
124k
    ZSTD_memset(&lastStretch, 0, sizeof(ZSTD_optimal_t));
1106
1107
124k
    optLdm.seqStore = ms->ldmSeqStore ? *ms->ldmSeqStore : kNullRawSeqStore;
1108
124k
    optLdm.endPosInBlock = optLdm.startPosInBlock = optLdm.offset = 0;
1109
124k
    ZSTD_opt_getNextMatchAndUpdateSeqStore(&optLdm, (U32)(ip-istart), (U32)(iend-ip));
1110
1111
    /* init */
1112
124k
    DEBUGLOG(5, "ZSTD_compressBlock_opt_generic: current=%u, prefix=%u, nextToUpdate=%u",
1113
124k
                (U32)(ip - base), ms->window.dictLimit, ms->nextToUpdate);
1114
124k
    assert(optLevel <= 2);
1115
124k
    ZSTD_rescaleFreqs(optStatePtr, (const BYTE*)src, srcSize, optLevel);
1116
124k
    ip += (ip==prefixStart);
1117
1118
    /* Match Loop */
1119
263M
    while (ip < ilimit) {
1120
263M
        U32 cur, last_pos = 0;
1121
1122
        /* find first match */
1123
263M
        {   U32 const litlen = (U32)(ip - anchor);
1124
263M
            U32 const ll0 = !litlen;
1125
263M
            U32 nbMatches = getAllMatches(matches, ms, &nextToUpdate3, ip, iend, rep, ll0, minMatch);
1126
263M
            ZSTD_optLdm_processMatchCandidate(&optLdm, matches, &nbMatches,
1127
263M
                                              (U32)(ip-istart), (U32)(iend-ip),
1128
263M
                                              minMatch);
1129
263M
            if (!nbMatches) {
1130
258M
                DEBUGLOG(8, "no match found at cPos %u", (unsigned)(ip-istart));
1131
258M
                ip++;
1132
258M
                continue;
1133
258M
            }
1134
1135
            /* Match found: let's store this solution, and eventually find more candidates.
1136
             * During this forward pass, @opt is used to store stretches,
1137
             * defined as "a match followed by N literals".
1138
             * Note how this is different from a Sequence, which is "N literals followed by a match".
1139
             * Storing stretches allows us to store different match predecessors
1140
             * for each literal position part of a literals run. */
1141
1142
            /* initialize opt[0] */
1143
5.24M
            opt[0].mlen = 0;  /* there are only literals so far */
1144
5.24M
            opt[0].litlen = litlen;
1145
            /* No need to include the actual price of the literals before the first match
1146
             * because it is static for the duration of the forward pass, and is included
1147
             * in every subsequent price. But, we include the literal length because
1148
             * the cost variation of litlen depends on the value of litlen.
1149
             */
1150
5.24M
            opt[0].price = LL_PRICE(litlen);
1151
5.24M
            ZSTD_STATIC_ASSERT(sizeof(opt[0].rep[0]) == sizeof(rep[0]));
1152
5.24M
            ZSTD_memcpy(&opt[0].rep, rep, sizeof(opt[0].rep));
1153
1154
            /* large match -> immediate encoding */
1155
5.24M
            {   U32 const maxML = matches[nbMatches-1].len;
1156
5.24M
                U32 const maxOffBase = matches[nbMatches-1].off;
1157
5.24M
                DEBUGLOG(6, "found %u matches of maxLength=%u and maxOffBase=%u at cPos=%u => start new series",
1158
5.24M
                            nbMatches, maxML, maxOffBase, (U32)(ip-prefixStart));
1159
1160
5.24M
                if (maxML > sufficient_len) {
1161
173k
                    lastStretch.litlen = 0;
1162
173k
                    lastStretch.mlen = maxML;
1163
173k
                    lastStretch.off = maxOffBase;
1164
173k
                    DEBUGLOG(6, "large match (%u>%u) => immediate encoding",
1165
173k
                                maxML, sufficient_len);
1166
173k
                    cur = 0;
1167
173k
                    last_pos = maxML;
1168
173k
                    goto _shortestPath;
1169
173k
            }   }
1170
1171
            /* set prices for first matches starting position == 0 */
1172
5.07M
            assert(opt[0].price >= 0);
1173
5.07M
            {   U32 pos;
1174
5.07M
                U32 matchNb;
1175
16.6M
                for (pos = 1; pos < minMatch; pos++) {
1176
11.5M
                    opt[pos].price = ZSTD_MAX_PRICE;
1177
11.5M
                    opt[pos].mlen = 0;
1178
11.5M
                    opt[pos].litlen = litlen + pos;
1179
11.5M
                }
1180
29.1M
                for (matchNb = 0; matchNb < nbMatches; matchNb++) {
1181
24.1M
                    U32 const offBase = matches[matchNb].off;
1182
24.1M
                    U32 const end = matches[matchNb].len;
1183
94.6M
                    for ( ; pos <= end ; pos++ ) {
1184
70.5M
                        int const matchPrice = (int)ZSTD_getMatchPrice(offBase, pos, optStatePtr, optLevel);
1185
70.5M
                        int const sequencePrice = opt[0].price + matchPrice;
1186
70.5M
                        DEBUGLOG(7, "rPos:%u => set initial price : %.2f",
1187
70.5M
                                    pos, ZSTD_fCost(sequencePrice));
1188
70.5M
                        opt[pos].mlen = pos;
1189
70.5M
                        opt[pos].off = offBase;
1190
70.5M
                        opt[pos].litlen = 0; /* end of match */
1191
70.5M
                        opt[pos].price = sequencePrice + LL_PRICE(0);
1192
70.5M
                    }
1193
24.1M
                }
1194
5.07M
                last_pos = pos-1;
1195
5.07M
                opt[pos].price = ZSTD_MAX_PRICE;
1196
5.07M
            }
1197
5.07M
        }
1198
1199
        /* check further positions */
1200
149M
        for (cur = 1; cur <= last_pos; cur++) {
1201
149M
            const BYTE* const inr = ip + cur;
1202
149M
            assert(cur <= ZSTD_OPT_NUM);
1203
149M
            DEBUGLOG(7, "cPos:%i==rPos:%u", (int)(inr-istart), cur);
1204
1205
            /* Fix current position with one literal if cheaper */
1206
149M
            {   U32 const litlen = opt[cur-1].litlen + 1;
1207
149M
                int const price = opt[cur-1].price
1208
149M
                                + LIT_PRICE(ip+cur-1)
1209
149M
                                + LL_INCPRICE(litlen);
1210
149M
                assert(price < 1000000000); /* overflow check */
1211
149M
                if (price <= opt[cur].price) {
1212
22.2M
                    ZSTD_optimal_t const prevMatch = opt[cur];
1213
22.2M
                    DEBUGLOG(7, "cPos:%i==rPos:%u : better price (%.2f<=%.2f) using literal (ll==%u) (hist:%u,%u,%u)",
1214
22.2M
                                (int)(inr-istart), cur, ZSTD_fCost(price), ZSTD_fCost(opt[cur].price), litlen,
1215
22.2M
                                opt[cur-1].rep[0], opt[cur-1].rep[1], opt[cur-1].rep[2]);
1216
22.2M
                    opt[cur] = opt[cur-1];
1217
22.2M
                    opt[cur].litlen = litlen;
1218
22.2M
                    opt[cur].price = price;
1219
22.2M
                    if ( (optLevel >= 1) /* additional check only for higher modes */
1220
22.2M
                      && (prevMatch.litlen == 0) /* replace a match */
1221
22.2M
                      && (LL_INCPRICE(1) < 0) /* ll1 is cheaper than ll0 */
1222
22.2M
                      && LIKELY(ip + cur < iend)
1223
22.2M
                    ) {
1224
                        /* check next position, in case it would be cheaper */
1225
4.45M
                        int with1literal = prevMatch.price + LIT_PRICE(ip+cur) + LL_INCPRICE(1);
1226
4.45M
                        int withMoreLiterals = price + LIT_PRICE(ip+cur) + LL_INCPRICE(litlen+1);
1227
4.45M
                        DEBUGLOG(7, "then at next rPos %u : match+1lit %.2f vs %ulits %.2f",
1228
4.45M
                                cur+1, ZSTD_fCost(with1literal), litlen+1, ZSTD_fCost(withMoreLiterals));
1229
4.45M
                        if ( (with1literal < withMoreLiterals)
1230
4.45M
                          && (with1literal < opt[cur+1].price) ) {
1231
                            /* update offset history - before it disappears */
1232
335k
                            U32 const prev = cur - prevMatch.mlen;
1233
335k
                            Repcodes_t const newReps = ZSTD_newRep(opt[prev].rep, prevMatch.off, opt[prev].litlen==0);
1234
335k
                            assert(cur >= prevMatch.mlen);
1235
335k
                            DEBUGLOG(7, "==> match+1lit is cheaper (%.2f < %.2f) (hist:%u,%u,%u) !",
1236
335k
                                        ZSTD_fCost(with1literal), ZSTD_fCost(withMoreLiterals),
1237
335k
                                        newReps.rep[0], newReps.rep[1], newReps.rep[2] );
1238
335k
                            opt[cur+1] = prevMatch;  /* mlen & offbase */
1239
335k
                            ZSTD_memcpy(opt[cur+1].rep, &newReps, sizeof(Repcodes_t));
1240
335k
                            opt[cur+1].litlen = 1;
1241
335k
                            opt[cur+1].price = with1literal;
1242
335k
                            if (last_pos < cur+1) last_pos = cur+1;
1243
335k
                        }
1244
4.45M
                    }
1245
127M
                } else {
1246
127M
                    DEBUGLOG(7, "cPos:%i==rPos:%u : literal would cost more (%.2f>%.2f)",
1247
127M
                                (int)(inr-istart), cur, ZSTD_fCost(price), ZSTD_fCost(opt[cur].price));
1248
127M
                }
1249
149M
            }
1250
1251
            /* Offset history is not updated during match comparison.
1252
             * Do it here, now that the match is selected and confirmed.
1253
             */
1254
149M
            ZSTD_STATIC_ASSERT(sizeof(opt[cur].rep) == sizeof(Repcodes_t));
1255
149M
            assert(cur >= opt[cur].mlen);
1256
149M
            if (opt[cur].litlen == 0) {
1257
                /* just finished a match => alter offset history */
1258
126M
                U32 const prev = cur - opt[cur].mlen;
1259
126M
                Repcodes_t const newReps = ZSTD_newRep(opt[prev].rep, opt[cur].off, opt[prev].litlen==0);
1260
126M
                ZSTD_memcpy(opt[cur].rep, &newReps, sizeof(Repcodes_t));
1261
126M
            }
1262
1263
            /* last match must start at a minimum distance of 8 from oend */
1264
149M
            if (inr > ilimit) continue;
1265
1266
149M
            if (cur == last_pos) break;
1267
1268
144M
            if ( (optLevel==0) /*static_test*/
1269
144M
              && (opt[cur+1].price <= opt[cur].price + (BITCOST_MULTIPLIER/2)) ) {
1270
3.11M
                DEBUGLOG(7, "skip current position : next rPos(%u) price is cheaper", cur+1);
1271
3.11M
                continue;  /* skip unpromising positions; about ~+6% speed, -0.01 ratio */
1272
3.11M
            }
1273
1274
141M
            assert(opt[cur].price >= 0);
1275
141M
            {   U32 const ll0 = (opt[cur].litlen == 0);
1276
141M
                int const previousPrice = opt[cur].price;
1277
141M
                int const basePrice = previousPrice + LL_PRICE(0);
1278
141M
                U32 nbMatches = getAllMatches(matches, ms, &nextToUpdate3, inr, iend, opt[cur].rep, ll0, minMatch);
1279
141M
                U32 matchNb;
1280
1281
141M
                ZSTD_optLdm_processMatchCandidate(&optLdm, matches, &nbMatches,
1282
141M
                                                  (U32)(inr-istart), (U32)(iend-inr),
1283
141M
                                                  minMatch);
1284
1285
141M
                if (!nbMatches) {
1286
30.4M
                    DEBUGLOG(7, "rPos:%u : no match found", cur);
1287
30.4M
                    continue;
1288
30.4M
                }
1289
1290
110M
                {   U32 const longestML = matches[nbMatches-1].len;
1291
110M
                    DEBUGLOG(7, "cPos:%i==rPos:%u, found %u matches, of longest ML=%u",
1292
110M
                                (int)(inr-istart), cur, nbMatches, longestML);
1293
1294
110M
                    if ( (longestML > sufficient_len)
1295
110M
                      || (cur + longestML >= ZSTD_OPT_NUM)
1296
110M
                      || (ip + cur + longestML >= iend) ) {
1297
128k
                        lastStretch.mlen = longestML;
1298
128k
                        lastStretch.off = matches[nbMatches-1].off;
1299
128k
                        lastStretch.litlen = 0;
1300
128k
                        last_pos = cur + longestML;
1301
128k
                        goto _shortestPath;
1302
128k
                }   }
1303
1304
                /* set prices using matches found at position == cur */
1305
284M
                for (matchNb = 0; matchNb < nbMatches; matchNb++) {
1306
174M
                    U32 const offset = matches[matchNb].off;
1307
174M
                    U32 const lastML = matches[matchNb].len;
1308
174M
                    U32 const startML = (matchNb>0) ? matches[matchNb-1].len+1 : minMatch;
1309
174M
                    U32 mlen;
1310
1311
174M
                    DEBUGLOG(7, "testing match %u => offBase=%4u, mlen=%2u, llen=%2u",
1312
174M
                                matchNb, matches[matchNb].off, lastML, opt[cur].litlen);
1313
1314
17.0G
                    for (mlen = lastML; mlen >= startML; mlen--) {  /* scan downward */
1315
16.9G
                        U32 const pos = cur + mlen;
1316
16.9G
                        int const price = basePrice + (int)ZSTD_getMatchPrice(offset, mlen, optStatePtr, optLevel);
1317
1318
16.9G
                        if ((pos > last_pos) || (price < opt[pos].price)) {
1319
144M
                            DEBUGLOG(7, "rPos:%u (ml=%2u) => new better price (%.2f<%.2f)",
1320
144M
                                        pos, mlen, ZSTD_fCost(price), ZSTD_fCost(opt[pos].price));
1321
221M
                            while (last_pos < pos) {
1322
                                /* fill empty positions, for future comparisons */
1323
76.9M
                                last_pos++;
1324
76.9M
                                opt[last_pos].price = ZSTD_MAX_PRICE;
1325
76.9M
                                opt[last_pos].litlen = !0;  /* just needs to be != 0, to mean "not an end of match" */
1326
76.9M
                            }
1327
144M
                            opt[pos].mlen = mlen;
1328
144M
                            opt[pos].off = offset;
1329
144M
                            opt[pos].litlen = 0;
1330
144M
                            opt[pos].price = price;
1331
16.7G
                        } else {
1332
16.7G
                            DEBUGLOG(7, "rPos:%u (ml=%2u) => new price is worse (%.2f>=%.2f)",
1333
16.7G
                                        pos, mlen, ZSTD_fCost(price), ZSTD_fCost(opt[pos].price));
1334
16.7G
                            if (optLevel==0) break;  /* early update abort; gets ~+10% speed for about -0.01 ratio loss */
1335
16.7G
                        }
1336
16.9G
            }   }   }
1337
110M
            opt[last_pos+1].price = ZSTD_MAX_PRICE;
1338
110M
        }  /* for (cur = 1; cur <= last_pos; cur++) */
1339
1340
4.94M
        lastStretch = opt[last_pos];
1341
4.94M
        assert(cur >= lastStretch.mlen);
1342
4.94M
        cur = last_pos - lastStretch.mlen;
1343
1344
5.24M
_shortestPath:   /* cur, last_pos, best_mlen, best_off have to be set */
1345
5.24M
        assert(opt[0].mlen == 0);
1346
5.24M
        assert(last_pos >= lastStretch.mlen);
1347
5.24M
        assert(cur == last_pos - lastStretch.mlen);
1348
1349
5.24M
        if (lastStretch.mlen==0) {
1350
            /* no solution : all matches have been converted into literals */
1351
1.03M
            assert(lastStretch.litlen == (ip - anchor) + last_pos);
1352
1.03M
            ip += last_pos;
1353
1.03M
            continue;
1354
1.03M
        }
1355
4.21M
        assert(lastStretch.off > 0);
1356
1357
        /* Update offset history */
1358
4.21M
        if (lastStretch.litlen == 0) {
1359
            /* finishing on a match : update offset history */
1360
3.68M
            Repcodes_t const reps = ZSTD_newRep(opt[cur].rep, lastStretch.off, opt[cur].litlen==0);
1361
3.68M
            ZSTD_memcpy(rep, &reps, sizeof(Repcodes_t));
1362
3.68M
        } else {
1363
526k
            ZSTD_memcpy(rep, lastStretch.rep, sizeof(Repcodes_t));
1364
526k
            assert(cur >= lastStretch.litlen);
1365
526k
            cur -= lastStretch.litlen;
1366
526k
        }
1367
1368
        /* Let's write the shortest path solution.
1369
         * It is stored in @opt in reverse order,
1370
         * starting from @storeEnd (==cur+2),
1371
         * effectively partially @opt overwriting.
1372
         * Content is changed too:
1373
         * - So far, @opt stored stretches, aka a match followed by literals
1374
         * - Now, it will store sequences, aka literals followed by a match
1375
         */
1376
4.21M
        {   U32 const storeEnd = cur + 2;
1377
4.21M
            U32 storeStart = storeEnd;
1378
4.21M
            U32 stretchPos = cur;
1379
1380
4.21M
            DEBUGLOG(6, "start reverse traversal (last_pos:%u, cur:%u)",
1381
4.21M
                        last_pos, cur); (void)last_pos;
1382
4.21M
            assert(storeEnd < ZSTD_OPT_SIZE);
1383
4.21M
            DEBUGLOG(6, "last stretch copied into pos=%u (llen=%u,mlen=%u,ofc=%u)",
1384
4.21M
                        storeEnd, lastStretch.litlen, lastStretch.mlen, lastStretch.off);
1385
4.21M
            if (lastStretch.litlen > 0) {
1386
                /* last "sequence" is unfinished: just a bunch of literals */
1387
526k
                opt[storeEnd].litlen = lastStretch.litlen;
1388
526k
                opt[storeEnd].mlen = 0;
1389
526k
                storeStart = storeEnd-1;
1390
526k
                opt[storeStart] = lastStretch;
1391
526k
            } {
1392
4.21M
                opt[storeEnd] = lastStretch;  /* note: litlen will be fixed */
1393
4.21M
                storeStart = storeEnd;
1394
4.21M
            }
1395
4.95M
            while (1) {
1396
4.95M
                ZSTD_optimal_t nextStretch = opt[stretchPos];
1397
4.95M
                opt[storeStart].litlen = nextStretch.litlen;
1398
4.95M
                DEBUGLOG(6, "selected sequence (llen=%u,mlen=%u,ofc=%u)",
1399
4.95M
                            opt[storeStart].litlen, opt[storeStart].mlen, opt[storeStart].off);
1400
4.95M
                if (nextStretch.mlen == 0) {
1401
                    /* reaching beginning of segment */
1402
4.21M
                    break;
1403
4.21M
                }
1404
748k
                storeStart--;
1405
748k
                opt[storeStart] = nextStretch; /* note: litlen will be fixed */
1406
748k
                assert(nextStretch.litlen + nextStretch.mlen <= stretchPos);
1407
748k
                stretchPos -= nextStretch.litlen + nextStretch.mlen;
1408
748k
            }
1409
1410
            /* save sequences */
1411
4.21M
            DEBUGLOG(6, "sending selected sequences into seqStore");
1412
4.21M
            {   U32 storePos;
1413
9.17M
                for (storePos=storeStart; storePos <= storeEnd; storePos++) {
1414
4.95M
                    U32 const llen = opt[storePos].litlen;
1415
4.95M
                    U32 const mlen = opt[storePos].mlen;
1416
4.95M
                    U32 const offBase = opt[storePos].off;
1417
4.95M
                    U32 const advance = llen + mlen;
1418
4.95M
                    DEBUGLOG(6, "considering seq starting at %i, llen=%u, mlen=%u",
1419
4.95M
                                (int)(anchor - istart), (unsigned)llen, (unsigned)mlen);
1420
1421
4.95M
                    if (mlen==0) {  /* only literals => must be last "sequence", actually starting a new stream of sequences */
1422
0
                        assert(storePos == storeEnd);   /* must be last sequence */
1423
0
                        ip = anchor + llen;     /* last "sequence" is a bunch of literals => don't progress anchor */
1424
0
                        continue;   /* will finish */
1425
0
                    }
1426
1427
4.95M
                    assert(anchor + llen <= iend);
1428
4.95M
                    ZSTD_updateStats(optStatePtr, llen, anchor, offBase, mlen);
1429
4.95M
                    ZSTD_storeSeq(seqStore, llen, anchor, iend, offBase, mlen);
1430
4.95M
                    anchor += advance;
1431
4.95M
                    ip = anchor;
1432
4.95M
            }   }
1433
4.21M
            DEBUGLOG(7, "new offset history : %u, %u, %u", rep[0], rep[1], rep[2]);
1434
1435
            /* update all costs */
1436
4.21M
            ZSTD_setBasePrices(optStatePtr, optLevel);
1437
4.21M
        }
1438
4.21M
    }   /* while (ip < ilimit) */
1439
1440
    /* Return the last literals size */
1441
124k
    return (size_t)(iend - anchor);
1442
124k
}
1443
#endif /* build exclusions */
1444
1445
#ifndef ZSTD_EXCLUDE_BTOPT_BLOCK_COMPRESSOR
1446
static size_t ZSTD_compressBlock_opt0(
1447
        ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1448
        const void* src, size_t srcSize, const ZSTD_dictMode_e dictMode)
1449
11.5k
{
1450
11.5k
    return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 0 /* optLevel */, dictMode);
1451
11.5k
}
1452
#endif
1453
1454
#ifndef ZSTD_EXCLUDE_BTULTRA_BLOCK_COMPRESSOR
1455
static size_t ZSTD_compressBlock_opt2(
1456
        ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1457
        const void* src, size_t srcSize, const ZSTD_dictMode_e dictMode)
1458
112k
{
1459
112k
    return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 2 /* optLevel */, dictMode);
1460
112k
}
1461
#endif
1462
1463
#ifndef ZSTD_EXCLUDE_BTOPT_BLOCK_COMPRESSOR
1464
size_t ZSTD_compressBlock_btopt(
1465
        ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1466
        const void* src, size_t srcSize)
1467
11.5k
{
1468
11.5k
    DEBUGLOG(5, "ZSTD_compressBlock_btopt");
1469
11.5k
    return ZSTD_compressBlock_opt0(ms, seqStore, rep, src, srcSize, ZSTD_noDict);
1470
11.5k
}
1471
#endif
1472
1473
1474
1475
1476
#ifndef ZSTD_EXCLUDE_BTULTRA_BLOCK_COMPRESSOR
1477
/* ZSTD_initStats_ultra():
1478
 * make a first compression pass, just to seed stats with more accurate starting values.
1479
 * only works on first block, with no dictionary and no ldm.
1480
 * this function cannot error out, its narrow contract must be respected.
1481
 */
1482
static
1483
ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
1484
void ZSTD_initStats_ultra(ZSTD_MatchState_t* ms,
1485
                          SeqStore_t* seqStore,
1486
                          U32 rep[ZSTD_REP_NUM],
1487
                    const void* src, size_t srcSize)
1488
53.6k
{
1489
53.6k
    U32 tmpRep[ZSTD_REP_NUM];  /* updated rep codes will sink here */
1490
53.6k
    ZSTD_memcpy(tmpRep, rep, sizeof(tmpRep));
1491
1492
53.6k
    DEBUGLOG(4, "ZSTD_initStats_ultra (srcSize=%zu)", srcSize);
1493
53.6k
    assert(ms->opt.litLengthSum == 0);    /* first block */
1494
53.6k
    assert(seqStore->sequences == seqStore->sequencesStart);   /* no ldm */
1495
53.6k
    assert(ms->window.dictLimit == ms->window.lowLimit);   /* no dictionary */
1496
53.6k
    assert(ms->window.dictLimit - ms->nextToUpdate <= 1);  /* no prefix (note: intentional overflow, defined as 2-complement) */
1497
1498
53.6k
    ZSTD_compressBlock_opt2(ms, seqStore, tmpRep, src, srcSize, ZSTD_noDict);   /* generate stats into ms->opt*/
1499
1500
    /* invalidate first scan from history, only keep entropy stats */
1501
53.6k
    ZSTD_resetSeqStore(seqStore);
1502
53.6k
    ms->window.base -= srcSize;
1503
53.6k
    ms->window.dictLimit += (U32)srcSize;
1504
53.6k
    ms->window.lowLimit = ms->window.dictLimit;
1505
53.6k
    ms->nextToUpdate = ms->window.dictLimit;
1506
1507
53.6k
}
1508
1509
size_t ZSTD_compressBlock_btultra(
1510
        ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1511
        const void* src, size_t srcSize)
1512
5.60k
{
1513
5.60k
    DEBUGLOG(5, "ZSTD_compressBlock_btultra (srcSize=%zu)", srcSize);
1514
5.60k
    return ZSTD_compressBlock_opt2(ms, seqStore, rep, src, srcSize, ZSTD_noDict);
1515
5.60k
}
1516
1517
size_t ZSTD_compressBlock_btultra2(
1518
        ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1519
        const void* src, size_t srcSize)
1520
53.6k
{
1521
53.6k
    U32 const curr = (U32)((const BYTE*)src - ms->window.base);
1522
53.6k
    DEBUGLOG(5, "ZSTD_compressBlock_btultra2 (srcSize=%zu)", srcSize);
1523
1524
    /* 2-passes strategy:
1525
     * this strategy makes a first pass over first block to collect statistics
1526
     * in order to seed next round's statistics with it.
1527
     * After 1st pass, function forgets history, and starts a new block.
1528
     * Consequently, this can only work if no data has been previously loaded in tables,
1529
     * aka, no dictionary, no prefix, no ldm preprocessing.
1530
     * The compression ratio gain is generally small (~0.5% on first block),
1531
     * the cost is 2x cpu time on first block. */
1532
53.6k
    assert(srcSize <= ZSTD_BLOCKSIZE_MAX);
1533
53.6k
    if ( (ms->opt.litLengthSum==0)   /* first block */
1534
53.6k
      && (seqStore->sequences == seqStore->sequencesStart)  /* no ldm */
1535
53.6k
      && (ms->window.dictLimit == ms->window.lowLimit)   /* no dictionary */
1536
53.6k
      && (curr == ms->window.dictLimit)    /* start of frame, nothing already loaded nor skipped */
1537
53.6k
      && (srcSize > ZSTD_PREDEF_THRESHOLD) /* input large enough to not employ default stats */
1538
53.6k
      ) {
1539
53.6k
        ZSTD_initStats_ultra(ms, seqStore, rep, src, srcSize);
1540
53.6k
    }
1541
1542
53.6k
    return ZSTD_compressBlock_opt2(ms, seqStore, rep, src, srcSize, ZSTD_noDict);
1543
53.6k
}
1544
#endif
1545
1546
#ifndef ZSTD_EXCLUDE_BTOPT_BLOCK_COMPRESSOR
1547
size_t ZSTD_compressBlock_btopt_dictMatchState(
1548
        ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1549
        const void* src, size_t srcSize)
1550
0
{
1551
0
    return ZSTD_compressBlock_opt0(ms, seqStore, rep, src, srcSize, ZSTD_dictMatchState);
1552
0
}
1553
1554
size_t ZSTD_compressBlock_btopt_extDict(
1555
        ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1556
        const void* src, size_t srcSize)
1557
0
{
1558
0
    return ZSTD_compressBlock_opt0(ms, seqStore, rep, src, srcSize, ZSTD_extDict);
1559
0
}
1560
#endif
1561
1562
#ifndef ZSTD_EXCLUDE_BTULTRA_BLOCK_COMPRESSOR
1563
size_t ZSTD_compressBlock_btultra_dictMatchState(
1564
        ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1565
        const void* src, size_t srcSize)
1566
0
{
1567
0
    return ZSTD_compressBlock_opt2(ms, seqStore, rep, src, srcSize, ZSTD_dictMatchState);
1568
0
}
1569
1570
size_t ZSTD_compressBlock_btultra_extDict(
1571
        ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1572
        const void* src, size_t srcSize)
1573
0
{
1574
0
    return ZSTD_compressBlock_opt2(ms, seqStore, rep, src, srcSize, ZSTD_extDict);
1575
0
}
1576
#endif
1577
1578
/* note : no btultra2 variant for extDict nor dictMatchState,
1579
 * because btultra2 is not meant to work with dictionaries
1580
 * and is only specific for the first block (no prefix) */