Coverage Report

Created: 2025-12-14 06:55

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/zlib-ng/match_tpl.h
Line
Count
Source
1
/* match_tpl.h -- find longest match template for compare256 variants
2
 *
3
 * Copyright (C) 1995-2024 Jean-loup Gailly and Mark Adler
4
 * For conditions of distribution and use, see copyright notice in zlib.h
5
 *
6
 * Portions copyright (C) 2014-2021 Konstantin Nosov
7
 *  Fast-zlib optimized longest_match
8
 *  https://github.com/gildor2/fast_zlib
9
 */
10
11
#include "insert_string_p.h"
12
13
47.5M
#define EARLY_EXIT_TRIGGER_LEVEL 5
14
15
/* Set match_start to the longest match starting at the given string and
16
 * return its length. Matches shorter or equal to prev_length are discarded,
17
 * in which case the result is equal to prev_length and match_start is garbage.
18
 *
19
 * IN assertions: cur_match is the head of the hash chain for the current
20
 * string (strstart) and its distance is <= MAX_DIST, and prev_length >=1
21
 * OUT assertion: the match length is not greater than s->lookahead
22
 *
23
 * The LONGEST_MATCH_SLOW variant spends more time to attempt to find longer
24
 * matches once a match has already been found.
25
 */
26
47.5M
Z_INTERNAL uint32_t LONGEST_MATCH(deflate_state *const s, Pos cur_match) {
27
47.5M
    unsigned int strstart = s->strstart;
28
47.5M
    const unsigned wmask = W_MASK(s);
29
47.5M
    unsigned char *window = s->window;
30
47.5M
    unsigned char *scan = window + strstart;
31
47.5M
    Z_REGISTER unsigned char *mbase_start = window;
32
47.5M
    Z_REGISTER unsigned char *mbase_end;
33
47.5M
    const Pos *prev = s->prev;
34
47.5M
    Pos limit;
35
#ifdef LONGEST_MATCH_SLOW
36
    Pos limit_base;
37
#else
38
    int32_t early_exit;
39
#endif
40
47.5M
    uint32_t chain_length, nice_match, best_len, offset;
41
47.5M
    uint32_t lookahead = s->lookahead;
42
47.5M
    Pos match_offset = 0;
43
47.5M
    uint64_t scan_start;
44
47.5M
    uint64_t scan_end;
45
46
47.5M
#define GOTO_NEXT_CHAIN \
47
127M
    if (--chain_length && (cur_match = prev[cur_match & wmask]) > limit) \
48
80.1M
        continue; \
49
47.3M
    return best_len;
50
51
    /* The code is optimized for STD_MAX_MATCH-2 multiple of 16. */
52
47.5M
    Assert(STD_MAX_MATCH == 258, "Code too clever");
53
54
47.5M
    best_len = s->prev_length ? s->prev_length : STD_MIN_MATCH-1;
55
56
    /* Calculate read offset which should only extend an extra byte
57
     * to find the next best match length.
58
     */
59
47.5M
    offset = best_len-1;
60
47.5M
    if (best_len >= sizeof(uint32_t)) {
61
0
        offset -= 2;
62
0
        if (best_len >= sizeof(uint64_t))
63
0
            offset -= 4;
64
0
    }
65
66
47.5M
    scan_start = zng_memread_8(scan);
67
47.5M
    scan_end = zng_memread_8(scan+offset);
68
47.5M
    mbase_end  = (mbase_start+offset);
69
70
    /* Do not waste too much time if we already have a good match */
71
47.5M
    chain_length = s->max_chain_length;
72
47.5M
    if (best_len >= s->good_match)
73
0
        chain_length >>= 2;
74
47.5M
    nice_match = (uint32_t)s->nice_match;
75
76
    /* Stop when cur_match becomes <= limit. To simplify the code,
77
     * we prevent matches with the string of window index 0
78
     */
79
47.5M
    limit = strstart > MAX_DIST(s) ? (Pos)(strstart - MAX_DIST(s)) : 0;
80
#ifdef LONGEST_MATCH_SLOW
81
    limit_base = limit;
82
0
    if (best_len >= STD_MIN_MATCH) {
83
        /* We're continuing search (lazy evaluation). */
84
0
        uint32_t i, hash;
85
0
        Pos pos;
86
87
        /* Find a most distant chain starting from scan with index=1 (index=0 corresponds
88
         * to cur_match). We cannot use s->prev[strstart+1,...] immediately, because
89
         * these strings are not yet inserted into the hash table.
90
         */
91
        // use update_hash_roll for deflate_slow
92
        hash = update_hash_roll(0, scan[1]);
93
        hash = update_hash_roll(hash, scan[2]);
94
95
0
        for (i = 3; i <= best_len; i++) {
96
            // use update_hash_roll for deflate_slow
97
0
            hash = update_hash_roll(hash, scan[i]);
98
            /* If we're starting with best_len >= 3, we can use offset search. */
99
0
            pos = s->head[hash];
100
0
            if (pos < cur_match) {
101
0
                match_offset = (Pos)(i - 2);
102
0
                cur_match = pos;
103
0
            }
104
0
        }
105
106
        /* Update offset-dependent variables */
107
0
        limit = limit_base+match_offset;
108
0
        if (cur_match <= limit)
109
0
            goto break_matching;
110
0
        mbase_start -= match_offset;
111
0
        mbase_end -= match_offset;
112
0
    }
113
#else
114
47.5M
    early_exit = s->level < EARLY_EXIT_TRIGGER_LEVEL;
115
#endif
116
0
    Assert((unsigned long)strstart <= s->window_size - MIN_LOOKAHEAD, "need lookahead");
117
52.3M
    for (;;) {
118
52.3M
        if (cur_match >= strstart)
119
0
            break;
120
121
        /* Skip to next match if the match length cannot increase or if the match length is
122
         * less than 2. Note that the checks below for insufficient lookahead only occur
123
         * occasionally for performance reasons.
124
         * Therefore uninitialized memory will be accessed and conditional jumps will be made
125
         * that depend on those values. However the length of the match is limited to the
126
         * lookahead, so the output of deflate is not affected by the uninitialized values.
127
         */
128
52.3M
        if (best_len < sizeof(uint32_t)) {
129
61.2M
            for (;;) {
130
61.2M
                if (zng_memcmp_2(mbase_end+cur_match, &scan_end) == 0 &&
131
3.70M
                    zng_memcmp_2(mbase_start+cur_match, &scan_start) == 0)
132
3.69M
                    break;
133
57.5M
                GOTO_NEXT_CHAIN;
134
57.5M
            }
135
47.5M
        } else if (best_len >= sizeof(uint64_t)) {
136
32.5M
            for (;;) {
137
32.5M
                if (zng_memcmp_8(mbase_end+cur_match, &scan_end) == 0 &&
138
1.66M
                    zng_memcmp_8(mbase_start+cur_match, &scan_start) == 0)
139
1.58M
                    break;
140
30.9M
                GOTO_NEXT_CHAIN;
141
30.9M
            }
142
2.54M
        } else {
143
33.8M
            for (;;) {
144
33.8M
                if (zng_memcmp_4(mbase_end+cur_match, &scan_end) == 0 &&
145
832k
                    zng_memcmp_4(mbase_start+cur_match, &scan_start) == 0)
146
832k
                    break;
147
33.0M
                GOTO_NEXT_CHAIN;
148
33.0M
            }
149
2.27M
        }
150
6.11M
        uint32_t len = COMPARE256(scan+2, mbase_start+cur_match+2) + 2;
151
6.11M
        Assert(scan+len <= window+(unsigned)(s->window_size-1), "wild scan");
152
153
6.11M
        if (len > best_len) {
154
5.91M
            uint32_t match_start = cur_match - match_offset;
155
5.91M
            s->match_start = match_start;
156
157
            /* Do not look for matches beyond the end of the input. */
158
5.91M
            if (len > lookahead)
159
229
                return lookahead;
160
5.91M
            best_len = len;
161
5.91M
            if (best_len >= nice_match)
162
146k
                return best_len;
163
164
5.77M
            offset = best_len-1;
165
5.77M
            if (best_len >= sizeof(uint32_t)) {
166
5.77M
                offset -= 2;
167
5.77M
                if (best_len >= sizeof(uint64_t))
168
2.58M
                    offset -= 4;
169
5.77M
            }
170
171
5.77M
            scan_end = zng_memread_8(scan+offset);
172
173
#ifdef LONGEST_MATCH_SLOW
174
            /* Look for a better string offset */
175
0
            if (UNLIKELY(len > STD_MIN_MATCH && match_start + len < strstart)) {
176
0
                Pos pos, next_pos;
177
0
                uint32_t i, hash;
178
0
                unsigned char *scan_endstr;
179
180
                /* Go back to offset 0 */
181
                cur_match -= match_offset;
182
                match_offset = 0;
183
                next_pos = cur_match;
184
0
                for (i = 0; i <= len - STD_MIN_MATCH; i++) {
185
0
                    pos = prev[(cur_match + i) & wmask];
186
0
                    if (pos < next_pos) {
187
                        /* Hash chain is more distant, use it */
188
0
                        if (pos <= limit_base + i)
189
0
                            goto break_matching;
190
0
                        next_pos = pos;
191
0
                        match_offset = (Pos)i;
192
0
                    }
193
0
                }
194
                /* Switch cur_match to next_pos chain */
195
0
                cur_match = next_pos;
196
197
                /* Try hash head at len-(STD_MIN_MATCH-1) position to see if we could get
198
                 * a better cur_match at the end of string. Using (STD_MIN_MATCH-1) lets
199
                 * us include one more byte into hash - the byte which will be checked
200
                 * in main loop now, and which allows to grow match by 1.
201
                 */
202
0
                scan_endstr = scan + len - (STD_MIN_MATCH+1);
203
204
                // use update_hash_roll for deflate_slow
205
0
                hash = update_hash_roll(0, scan_endstr[0]);
206
0
                hash = update_hash_roll(hash, scan_endstr[1]);
207
0
                hash = update_hash_roll(hash, scan_endstr[2]);
208
209
0
                pos = s->head[hash];
210
0
                if (pos < cur_match) {
211
0
                    match_offset = (Pos)(len - (STD_MIN_MATCH+1));
212
0
                    if (pos <= limit_base + match_offset)
213
0
                        goto break_matching;
214
0
                    cur_match = pos;
215
0
                }
216
217
                /* Update offset-dependent variables */
218
0
                limit = limit_base+match_offset;
219
0
                mbase_start = window-match_offset;
220
0
                mbase_end = (mbase_start+offset);
221
0
                continue;
222
0
            }
223
0
#endif
224
0
            mbase_end = (mbase_start+offset);
225
0
        }
226
#ifndef LONGEST_MATCH_SLOW
227
194k
        else if (UNLIKELY(early_exit)) {
228
            /* The probability of finding a match later if we here is pretty low, so for
229
             * performance it's best to outright stop here for the lower compression levels
230
             */
231
0
            break;
232
0
        }
233
5.96M
#endif
234
5.96M
        GOTO_NEXT_CHAIN;
235
5.96M
    }
236
0
    return best_len;
237
238
#ifdef LONGEST_MATCH_SLOW
239
0
break_matching:
240
241
0
    if (best_len < s->lookahead)
242
0
        return best_len;
243
244
0
    return s->lookahead;
245
#endif
246
0
}
Unexecuted instantiation: longest_match_sse2
Unexecuted instantiation: longest_match_slow_sse2
longest_match_avx2
Line
Count
Source
26
47.5M
Z_INTERNAL uint32_t LONGEST_MATCH(deflate_state *const s, Pos cur_match) {
27
47.5M
    unsigned int strstart = s->strstart;
28
47.5M
    const unsigned wmask = W_MASK(s);
29
47.5M
    unsigned char *window = s->window;
30
47.5M
    unsigned char *scan = window + strstart;
31
47.5M
    Z_REGISTER unsigned char *mbase_start = window;
32
47.5M
    Z_REGISTER unsigned char *mbase_end;
33
47.5M
    const Pos *prev = s->prev;
34
47.5M
    Pos limit;
35
#ifdef LONGEST_MATCH_SLOW
36
    Pos limit_base;
37
#else
38
47.5M
    int32_t early_exit;
39
47.5M
#endif
40
47.5M
    uint32_t chain_length, nice_match, best_len, offset;
41
47.5M
    uint32_t lookahead = s->lookahead;
42
47.5M
    Pos match_offset = 0;
43
47.5M
    uint64_t scan_start;
44
47.5M
    uint64_t scan_end;
45
46
47.5M
#define GOTO_NEXT_CHAIN \
47
47.5M
    if (--chain_length && (cur_match = prev[cur_match & wmask]) > limit) \
48
47.5M
        continue; \
49
47.5M
    return best_len;
50
51
    /* The code is optimized for STD_MAX_MATCH-2 multiple of 16. */
52
47.5M
    Assert(STD_MAX_MATCH == 258, "Code too clever");
53
54
47.5M
    best_len = s->prev_length ? s->prev_length : STD_MIN_MATCH-1;
55
56
    /* Calculate read offset which should only extend an extra byte
57
     * to find the next best match length.
58
     */
59
47.5M
    offset = best_len-1;
60
47.5M
    if (best_len >= sizeof(uint32_t)) {
61
0
        offset -= 2;
62
0
        if (best_len >= sizeof(uint64_t))
63
0
            offset -= 4;
64
0
    }
65
66
47.5M
    scan_start = zng_memread_8(scan);
67
47.5M
    scan_end = zng_memread_8(scan+offset);
68
47.5M
    mbase_end  = (mbase_start+offset);
69
70
    /* Do not waste too much time if we already have a good match */
71
47.5M
    chain_length = s->max_chain_length;
72
47.5M
    if (best_len >= s->good_match)
73
0
        chain_length >>= 2;
74
47.5M
    nice_match = (uint32_t)s->nice_match;
75
76
    /* Stop when cur_match becomes <= limit. To simplify the code,
77
     * we prevent matches with the string of window index 0
78
     */
79
47.5M
    limit = strstart > MAX_DIST(s) ? (Pos)(strstart - MAX_DIST(s)) : 0;
80
#ifdef LONGEST_MATCH_SLOW
81
    limit_base = limit;
82
    if (best_len >= STD_MIN_MATCH) {
83
        /* We're continuing search (lazy evaluation). */
84
        uint32_t i, hash;
85
        Pos pos;
86
87
        /* Find a most distant chain starting from scan with index=1 (index=0 corresponds
88
         * to cur_match). We cannot use s->prev[strstart+1,...] immediately, because
89
         * these strings are not yet inserted into the hash table.
90
         */
91
        // use update_hash_roll for deflate_slow
92
        hash = update_hash_roll(0, scan[1]);
93
        hash = update_hash_roll(hash, scan[2]);
94
95
        for (i = 3; i <= best_len; i++) {
96
            // use update_hash_roll for deflate_slow
97
            hash = update_hash_roll(hash, scan[i]);
98
            /* If we're starting with best_len >= 3, we can use offset search. */
99
            pos = s->head[hash];
100
            if (pos < cur_match) {
101
                match_offset = (Pos)(i - 2);
102
                cur_match = pos;
103
            }
104
        }
105
106
        /* Update offset-dependent variables */
107
        limit = limit_base+match_offset;
108
        if (cur_match <= limit)
109
            goto break_matching;
110
        mbase_start -= match_offset;
111
        mbase_end -= match_offset;
112
    }
113
#else
114
47.5M
    early_exit = s->level < EARLY_EXIT_TRIGGER_LEVEL;
115
47.5M
#endif
116
47.5M
    Assert((unsigned long)strstart <= s->window_size - MIN_LOOKAHEAD, "need lookahead");
117
52.3M
    for (;;) {
118
52.3M
        if (cur_match >= strstart)
119
0
            break;
120
121
        /* Skip to next match if the match length cannot increase or if the match length is
122
         * less than 2. Note that the checks below for insufficient lookahead only occur
123
         * occasionally for performance reasons.
124
         * Therefore uninitialized memory will be accessed and conditional jumps will be made
125
         * that depend on those values. However the length of the match is limited to the
126
         * lookahead, so the output of deflate is not affected by the uninitialized values.
127
         */
128
52.3M
        if (best_len < sizeof(uint32_t)) {
129
61.2M
            for (;;) {
130
61.2M
                if (zng_memcmp_2(mbase_end+cur_match, &scan_end) == 0 &&
131
3.70M
                    zng_memcmp_2(mbase_start+cur_match, &scan_start) == 0)
132
3.69M
                    break;
133
57.5M
                GOTO_NEXT_CHAIN;
134
57.5M
            }
135
47.5M
        } else if (best_len >= sizeof(uint64_t)) {
136
32.5M
            for (;;) {
137
32.5M
                if (zng_memcmp_8(mbase_end+cur_match, &scan_end) == 0 &&
138
1.66M
                    zng_memcmp_8(mbase_start+cur_match, &scan_start) == 0)
139
1.58M
                    break;
140
30.9M
                GOTO_NEXT_CHAIN;
141
30.9M
            }
142
2.54M
        } else {
143
33.8M
            for (;;) {
144
33.8M
                if (zng_memcmp_4(mbase_end+cur_match, &scan_end) == 0 &&
145
832k
                    zng_memcmp_4(mbase_start+cur_match, &scan_start) == 0)
146
832k
                    break;
147
33.0M
                GOTO_NEXT_CHAIN;
148
33.0M
            }
149
2.27M
        }
150
6.11M
        uint32_t len = COMPARE256(scan+2, mbase_start+cur_match+2) + 2;
151
6.11M
        Assert(scan+len <= window+(unsigned)(s->window_size-1), "wild scan");
152
153
6.11M
        if (len > best_len) {
154
5.91M
            uint32_t match_start = cur_match - match_offset;
155
5.91M
            s->match_start = match_start;
156
157
            /* Do not look for matches beyond the end of the input. */
158
5.91M
            if (len > lookahead)
159
229
                return lookahead;
160
5.91M
            best_len = len;
161
5.91M
            if (best_len >= nice_match)
162
146k
                return best_len;
163
164
5.77M
            offset = best_len-1;
165
5.77M
            if (best_len >= sizeof(uint32_t)) {
166
5.77M
                offset -= 2;
167
5.77M
                if (best_len >= sizeof(uint64_t))
168
2.58M
                    offset -= 4;
169
5.77M
            }
170
171
5.77M
            scan_end = zng_memread_8(scan+offset);
172
173
#ifdef LONGEST_MATCH_SLOW
174
            /* Look for a better string offset */
175
            if (UNLIKELY(len > STD_MIN_MATCH && match_start + len < strstart)) {
176
                Pos pos, next_pos;
177
                uint32_t i, hash;
178
                unsigned char *scan_endstr;
179
180
                /* Go back to offset 0 */
181
                cur_match -= match_offset;
182
                match_offset = 0;
183
                next_pos = cur_match;
184
                for (i = 0; i <= len - STD_MIN_MATCH; i++) {
185
                    pos = prev[(cur_match + i) & wmask];
186
                    if (pos < next_pos) {
187
                        /* Hash chain is more distant, use it */
188
                        if (pos <= limit_base + i)
189
                            goto break_matching;
190
                        next_pos = pos;
191
                        match_offset = (Pos)i;
192
                    }
193
                }
194
                /* Switch cur_match to next_pos chain */
195
                cur_match = next_pos;
196
197
                /* Try hash head at len-(STD_MIN_MATCH-1) position to see if we could get
198
                 * a better cur_match at the end of string. Using (STD_MIN_MATCH-1) lets
199
                 * us include one more byte into hash - the byte which will be checked
200
                 * in main loop now, and which allows to grow match by 1.
201
                 */
202
                scan_endstr = scan + len - (STD_MIN_MATCH+1);
203
204
                // use update_hash_roll for deflate_slow
205
                hash = update_hash_roll(0, scan_endstr[0]);
206
                hash = update_hash_roll(hash, scan_endstr[1]);
207
                hash = update_hash_roll(hash, scan_endstr[2]);
208
209
                pos = s->head[hash];
210
                if (pos < cur_match) {
211
                    match_offset = (Pos)(len - (STD_MIN_MATCH+1));
212
                    if (pos <= limit_base + match_offset)
213
                        goto break_matching;
214
                    cur_match = pos;
215
                }
216
217
                /* Update offset-dependent variables */
218
                limit = limit_base+match_offset;
219
                mbase_start = window-match_offset;
220
                mbase_end = (mbase_start+offset);
221
                continue;
222
            }
223
#endif
224
5.77M
            mbase_end = (mbase_start+offset);
225
5.77M
        }
226
194k
#ifndef LONGEST_MATCH_SLOW
227
194k
        else if (UNLIKELY(early_exit)) {
228
            /* The probability of finding a match later if we here is pretty low, so for
229
             * performance it's best to outright stop here for the lower compression levels
230
             */
231
0
            break;
232
0
        }
233
5.96M
#endif
234
5.96M
        GOTO_NEXT_CHAIN;
235
5.96M
    }
236
0
    return best_len;
237
238
#ifdef LONGEST_MATCH_SLOW
239
break_matching:
240
241
    if (best_len < s->lookahead)
242
        return best_len;
243
244
    return s->lookahead;
245
#endif
246
47.5M
}
Unexecuted instantiation: longest_match_slow_avx2
Unexecuted instantiation: longest_match_avx512
Unexecuted instantiation: longest_match_slow_avx512
247
248
#undef LONGEST_MATCH_SLOW
249
#undef LONGEST_MATCH