Coverage Report

Created: 2026-01-17 06:26

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/zlib-ng/match_tpl.h
Line
Count
Source
1
/* match_tpl.h -- find longest match template for compare256 variants
2
 *
3
 * Copyright (C) 1995-2024 Jean-loup Gailly and Mark Adler
4
 * For conditions of distribution and use, see copyright notice in zlib.h
5
 *
6
 * Portions copyright (C) 2014-2021 Konstantin Nosov
7
 *  Fast-zlib optimized longest_match
8
 *  https://github.com/gildor2/fast_zlib
9
 */
10
11
#include "insert_string_p.h"
12
13
24.2M
#define EARLY_EXIT_TRIGGER_LEVEL 5
14
15
#define GOTO_NEXT_CHAIN \
16
284M
    if (--chain_length && (cur_match = prev[cur_match & wmask]) > limit) \
17
235M
        continue; \
18
48.1M
    return best_len;
19
20
/* Set match_start to the longest match starting at the given string and
21
 * return its length. Matches shorter or equal to prev_length are discarded,
22
 * in which case the result is equal to prev_length and match_start is garbage.
23
 *
24
 * IN assertions: cur_match is the head of the hash chain for the current
25
 * string (strstart) and its distance is <= MAX_DIST, and prev_length >=1
26
 * OUT assertion: the match length is not greater than s->lookahead
27
 *
28
 * The LONGEST_MATCH_SLOW variant spends more time to attempt to find longer
29
 * matches once a match has already been found.
30
 */
31
51.3M
Z_INTERNAL uint32_t LONGEST_MATCH(deflate_state *const s, uint32_t cur_match) {
32
51.3M
    const unsigned wmask = W_MASK(s);
33
51.3M
    unsigned int strstart = s->strstart;
34
51.3M
    const unsigned char *window = s->window;
35
51.3M
    const Pos *prev = s->prev;
36
#ifdef LONGEST_MATCH_SLOW
37
    const Pos *head = s->head;
38
#endif
39
51.3M
    const unsigned char *scan;
40
51.3M
    const unsigned char *mbase_start = window;
41
51.3M
    const unsigned char *mbase_end;
42
51.3M
    uint32_t limit;
43
#ifdef LONGEST_MATCH_SLOW
44
    uint32_t limit_base;
45
#else
46
    int32_t early_exit;
47
#endif
48
51.3M
    uint32_t chain_length = s->max_chain_length;
49
51.3M
    uint32_t nice_match = (uint32_t)s->nice_match;
50
51.3M
    uint32_t best_len, offset;
51
51.3M
    uint32_t lookahead = s->lookahead;
52
51.3M
    uint32_t match_offset = 0;
53
51.3M
    uint64_t scan_start;
54
51.3M
    uint64_t scan_end;
55
56
    /* The code is optimized for STD_MAX_MATCH-2 multiple of 16. */
57
51.3M
    Assert(STD_MAX_MATCH == 258, "Code too clever");
58
59
51.3M
    scan = window + strstart;
60
51.3M
    best_len = s->prev_length ? s->prev_length : STD_MIN_MATCH-1;
61
62
    /* Calculate read offset which should only extend an extra byte
63
     * to find the next best match length.
64
     */
65
51.3M
    offset = best_len-1;
66
51.3M
    if (best_len >= sizeof(uint32_t)) {
67
3.89M
        offset -= 2;
68
3.89M
        if (best_len >= sizeof(uint64_t))
69
498k
            offset -= 4;
70
3.89M
    }
71
72
51.3M
    scan_start = zng_memread_8(scan);
73
51.3M
    scan_end = zng_memread_8(scan+offset);
74
51.3M
    mbase_end  = (mbase_start+offset);
75
76
    /* Do not waste too much time if we already have a good match */
77
51.3M
    if (best_len >= s->good_match)
78
220k
        chain_length >>= 2;
79
80
    /* Stop when cur_match becomes <= limit. To simplify the code,
81
     * we prevent matches with the string of window index 0
82
     */
83
51.3M
    limit = strstart > MAX_DIST(s) ? (strstart - MAX_DIST(s)) : 0;
84
#ifdef LONGEST_MATCH_SLOW
85
    limit_base = limit;
86
27.1M
    if (best_len >= STD_MIN_MATCH) {
87
        /* We're continuing search (lazy evaluation). */
88
5.92M
        uint32_t hash;
89
5.92M
        uint32_t pos;
90
91
        /* Find a most distant chain starting from scan with index=1 (index=0 corresponds
92
         * to cur_match). We cannot use s->prev[strstart+1,...] immediately, because
93
         * these strings are not yet inserted into the hash table.
94
         */
95
        // use update_hash_roll for deflate_slow
96
        hash = update_hash_roll(0, scan[1]);
97
        hash = update_hash_roll(hash, scan[2]);
98
99
22.9M
        for (uint32_t i = 3; i <= best_len; i++) {
100
            // use update_hash_roll for deflate_slow
101
17.0M
            hash = update_hash_roll(hash, scan[i]);
102
            /* If we're starting with best_len >= 3, we can use offset search. */
103
17.0M
            pos = head[hash];
104
17.0M
            if (pos < cur_match) {
105
4.94M
                match_offset = i - 2;
106
4.94M
                cur_match = pos;
107
4.94M
            }
108
17.0M
        }
109
110
        /* Update offset-dependent variables */
111
5.92M
        limit = limit_base+match_offset;
112
5.92M
        if (cur_match <= limit)
113
1.29M
            goto break_matching;
114
4.63M
        mbase_start -= match_offset;
115
4.63M
        mbase_end -= match_offset;
116
4.63M
    }
117
#else
118
24.2M
    early_exit = s->level < EARLY_EXIT_TRIGGER_LEVEL;
119
#endif
120
25.8M
    Assert((unsigned long)strstart <= s->window_size - MIN_LOOKAHEAD, "need lookahead");
121
64.2M
    for (;;) {
122
64.2M
        if (cur_match >= strstart)
123
30
            break;
124
125
        /* Skip to next match if the match length cannot increase or if the match length is
126
         * less than 2. Note that the checks below for insufficient lookahead only occur
127
         * occasionally for performance reasons.
128
         * Therefore uninitialized memory will be accessed and conditional jumps will be made
129
         * that depend on those values. However the length of the match is limited to the
130
         * lookahead, so the output of deflate is not affected by the uninitialized values.
131
         */
132
64.2M
        if (best_len < sizeof(uint32_t)) {
133
134M
            for (;;) {
134
134M
                if (zng_memcmp_2(mbase_end+cur_match, &scan_end) == 0 &&
135
23.7M
                    zng_memcmp_2(mbase_start+cur_match, &scan_start) == 0)
136
13.2M
                    break;
137
120M
                GOTO_NEXT_CHAIN;
138
120M
            }
139
51.6M
        } else if (best_len >= sizeof(uint64_t)) {
140
74.7M
            for (;;) {
141
74.7M
                if (zng_memcmp_8(mbase_end+cur_match, &scan_end) == 0 &&
142
4.37M
                    zng_memcmp_8(mbase_start+cur_match, &scan_start) == 0)
143
3.42M
                    break;
144
71.3M
                GOTO_NEXT_CHAIN;
145
71.3M
            }
146
8.14M
        } else {
147
80.4M
            for (;;) {
148
80.4M
                if (zng_memcmp_4(mbase_end+cur_match, &scan_end) == 0 &&
149
4.29M
                    zng_memcmp_4(mbase_start+cur_match, &scan_start) == 0)
150
1.68M
                    break;
151
78.7M
                GOTO_NEXT_CHAIN;
152
78.7M
            }
153
8.14M
        }
154
18.3M
        uint32_t len = COMPARE256(scan+2, mbase_start+cur_match+2) + 2;
155
18.3M
        Assert(scan+len <= window+(unsigned)(s->window_size-1), "wild scan");
156
157
18.3M
        if (len > best_len) {
158
16.8M
            uint32_t match_start = cur_match - match_offset;
159
16.8M
            s->match_start = match_start;
160
161
            /* Do not look for matches beyond the end of the input. */
162
16.8M
            if (len > lookahead)
163
966
                return lookahead;
164
16.8M
            if (len >= nice_match)
165
546k
                return len;
166
167
16.3M
            best_len = len;
168
169
16.3M
            offset = best_len-1;
170
16.3M
            if (best_len >= sizeof(uint32_t)) {
171
11.1M
                offset -= 2;
172
11.1M
                if (best_len >= sizeof(uint64_t))
173
3.07M
                    offset -= 4;
174
11.1M
            }
175
176
16.3M
            scan_end = zng_memread_8(scan+offset);
177
178
#ifdef LONGEST_MATCH_SLOW
179
            /* Look for a better string offset */
180
10.1M
            if (UNLIKELY(len > STD_MIN_MATCH && match_start + len < strstart)) {
181
4.74M
                const unsigned char *scan_endstr;
182
4.74M
                uint32_t hash;
183
4.74M
                uint32_t pos, next_pos;
184
185
                /* Go back to offset 0 */
186
                cur_match -= match_offset;
187
                match_offset = 0;
188
                next_pos = cur_match;
189
60.9M
                for (uint32_t i = 0; i <= len - STD_MIN_MATCH; i++) {
190
57.6M
                    pos = prev[(cur_match + i) & wmask];
191
57.6M
                    if (pos < next_pos) {
192
                        /* Hash chain is more distant, use it */
193
6.48M
                        if (pos <= limit_base + i)
194
1.41M
                            goto break_matching;
195
5.07M
                        next_pos = pos;
196
5.07M
                        match_offset = i;
197
5.07M
                    }
198
57.6M
                }
199
                /* Switch cur_match to next_pos chain */
200
3.33M
                cur_match = next_pos;
201
202
                /* Try hash head at len-(STD_MIN_MATCH-1) position to see if we could get
203
                 * a better cur_match at the end of string. Using (STD_MIN_MATCH-1) lets
204
                 * us include one more byte into hash - the byte which will be checked
205
                 * in main loop now, and which allows to grow match by 1.
206
                 */
207
3.33M
                scan_endstr = scan + len - (STD_MIN_MATCH+1);
208
209
                // use update_hash_roll for deflate_slow
210
3.33M
                hash = update_hash_roll(0, scan_endstr[0]);
211
3.33M
                hash = update_hash_roll(hash, scan_endstr[1]);
212
3.33M
                hash = update_hash_roll(hash, scan_endstr[2]);
213
214
3.33M
                pos = head[hash];
215
3.33M
                if (pos < cur_match) {
216
0
                    match_offset = len - (STD_MIN_MATCH+1);
217
0
                    if (pos <= limit_base + match_offset)
218
0
                        goto break_matching;
219
0
                    cur_match = pos;
220
0
                }
221
222
                /* Update offset-dependent variables */
223
3.33M
                limit = limit_base+match_offset;
224
3.33M
                mbase_start = window-match_offset;
225
3.33M
                mbase_end = (mbase_start+offset);
226
3.33M
                continue;
227
3.33M
            }
228
5.38M
#endif
229
5.38M
            mbase_end = (mbase_start+offset);
230
5.38M
        }
231
#ifndef LONGEST_MATCH_SLOW
232
171k
        else if (UNLIKELY(early_exit)) {
233
            /* The probability of finding a match later if we here is pretty low, so for
234
             * performance it's best to outright stop here for the lower compression levels
235
             */
236
2.60k
            break;
237
2.60k
        }
238
6.38M
#endif
239
13.0M
        GOTO_NEXT_CHAIN;
240
13.0M
    }
241
2.63k
    return best_len;
242
243
#ifdef LONGEST_MATCH_SLOW
244
2.70M
break_matching:
245
246
2.70M
    if (best_len < lookahead)
247
2.70M
        return best_len;
248
249
286
    return lookahead;
250
#endif
251
2.70M
}
Unexecuted instantiation: longest_match_sse2
Unexecuted instantiation: longest_match_slow_sse2
longest_match_avx2
Line
Count
Source
31
24.2M
Z_INTERNAL uint32_t LONGEST_MATCH(deflate_state *const s, uint32_t cur_match) {
32
24.2M
    const unsigned wmask = W_MASK(s);
33
24.2M
    unsigned int strstart = s->strstart;
34
24.2M
    const unsigned char *window = s->window;
35
24.2M
    const Pos *prev = s->prev;
36
#ifdef LONGEST_MATCH_SLOW
37
    const Pos *head = s->head;
38
#endif
39
24.2M
    const unsigned char *scan;
40
24.2M
    const unsigned char *mbase_start = window;
41
24.2M
    const unsigned char *mbase_end;
42
24.2M
    uint32_t limit;
43
#ifdef LONGEST_MATCH_SLOW
44
    uint32_t limit_base;
45
#else
46
24.2M
    int32_t early_exit;
47
24.2M
#endif
48
24.2M
    uint32_t chain_length = s->max_chain_length;
49
24.2M
    uint32_t nice_match = (uint32_t)s->nice_match;
50
24.2M
    uint32_t best_len, offset;
51
24.2M
    uint32_t lookahead = s->lookahead;
52
24.2M
    uint32_t match_offset = 0;
53
24.2M
    uint64_t scan_start;
54
24.2M
    uint64_t scan_end;
55
56
    /* The code is optimized for STD_MAX_MATCH-2 multiple of 16. */
57
24.2M
    Assert(STD_MAX_MATCH == 258, "Code too clever");
58
59
24.2M
    scan = window + strstart;
60
24.2M
    best_len = s->prev_length ? s->prev_length : STD_MIN_MATCH-1;
61
62
    /* Calculate read offset which should only extend an extra byte
63
     * to find the next best match length.
64
     */
65
24.2M
    offset = best_len-1;
66
24.2M
    if (best_len >= sizeof(uint32_t)) {
67
541k
        offset -= 2;
68
541k
        if (best_len >= sizeof(uint64_t))
69
208k
            offset -= 4;
70
541k
    }
71
72
24.2M
    scan_start = zng_memread_8(scan);
73
24.2M
    scan_end = zng_memread_8(scan+offset);
74
24.2M
    mbase_end  = (mbase_start+offset);
75
76
    /* Do not waste too much time if we already have a good match */
77
24.2M
    if (best_len >= s->good_match)
78
175k
        chain_length >>= 2;
79
80
    /* Stop when cur_match becomes <= limit. To simplify the code,
81
     * we prevent matches with the string of window index 0
82
     */
83
24.2M
    limit = strstart > MAX_DIST(s) ? (strstart - MAX_DIST(s)) : 0;
84
#ifdef LONGEST_MATCH_SLOW
85
    limit_base = limit;
86
    if (best_len >= STD_MIN_MATCH) {
87
        /* We're continuing search (lazy evaluation). */
88
        uint32_t hash;
89
        uint32_t pos;
90
91
        /* Find a most distant chain starting from scan with index=1 (index=0 corresponds
92
         * to cur_match). We cannot use s->prev[strstart+1,...] immediately, because
93
         * these strings are not yet inserted into the hash table.
94
         */
95
        // use update_hash_roll for deflate_slow
96
        hash = update_hash_roll(0, scan[1]);
97
        hash = update_hash_roll(hash, scan[2]);
98
99
        for (uint32_t i = 3; i <= best_len; i++) {
100
            // use update_hash_roll for deflate_slow
101
            hash = update_hash_roll(hash, scan[i]);
102
            /* If we're starting with best_len >= 3, we can use offset search. */
103
            pos = head[hash];
104
            if (pos < cur_match) {
105
                match_offset = i - 2;
106
                cur_match = pos;
107
            }
108
        }
109
110
        /* Update offset-dependent variables */
111
        limit = limit_base+match_offset;
112
        if (cur_match <= limit)
113
            goto break_matching;
114
        mbase_start -= match_offset;
115
        mbase_end -= match_offset;
116
    }
117
#else
118
24.2M
    early_exit = s->level < EARLY_EXIT_TRIGGER_LEVEL;
119
24.2M
#endif
120
24.2M
    Assert((unsigned long)strstart <= s->window_size - MIN_LOOKAHEAD, "need lookahead");
121
29.0M
    for (;;) {
122
29.0M
        if (cur_match >= strstart)
123
30
            break;
124
125
        /* Skip to next match if the match length cannot increase or if the match length is
126
         * less than 2. Note that the checks below for insufficient lookahead only occur
127
         * occasionally for performance reasons.
128
         * Therefore uninitialized memory will be accessed and conditional jumps will be made
129
         * that depend on those values. However the length of the match is limited to the
130
         * lookahead, so the output of deflate is not affected by the uninitialized values.
131
         */
132
29.0M
        if (best_len < sizeof(uint32_t)) {
133
30.0M
            for (;;) {
134
30.0M
                if (zng_memcmp_2(mbase_end+cur_match, &scan_end) == 0 &&
135
4.38M
                    zng_memcmp_2(mbase_start+cur_match, &scan_start) == 0)
136
4.38M
                    break;
137
25.7M
                GOTO_NEXT_CHAIN;
138
25.7M
            }
139
23.6M
        } else if (best_len >= sizeof(uint64_t)) {
140
31.7M
            for (;;) {
141
31.7M
                if (zng_memcmp_8(mbase_end+cur_match, &scan_end) == 0 &&
142
1.63M
                    zng_memcmp_8(mbase_start+cur_match, &scan_start) == 0)
143
1.47M
                    break;
144
30.2M
                GOTO_NEXT_CHAIN;
145
30.2M
            }
146
3.22M
        } else {
147
29.8M
            for (;;) {
148
29.8M
                if (zng_memcmp_4(mbase_end+cur_match, &scan_end) == 0 &&
149
1.03M
                    zng_memcmp_4(mbase_start+cur_match, &scan_start) == 0)
150
1.02M
                    break;
151
28.8M
                GOTO_NEXT_CHAIN;
152
28.8M
            }
153
3.22M
        }
154
6.88M
        uint32_t len = COMPARE256(scan+2, mbase_start+cur_match+2) + 2;
155
6.88M
        Assert(scan+len <= window+(unsigned)(s->window_size-1), "wild scan");
156
157
6.88M
        if (len > best_len) {
158
6.71M
            uint32_t match_start = cur_match - match_offset;
159
6.71M
            s->match_start = match_start;
160
161
            /* Do not look for matches beyond the end of the input. */
162
6.71M
            if (len > lookahead)
163
547
                return lookahead;
164
6.71M
            if (len >= nice_match)
165
498k
                return len;
166
167
6.21M
            best_len = len;
168
169
6.21M
            offset = best_len-1;
170
6.21M
            if (best_len >= sizeof(uint32_t)) {
171
6.21M
                offset -= 2;
172
6.21M
                if (best_len >= sizeof(uint64_t))
173
2.12M
                    offset -= 4;
174
6.21M
            }
175
176
6.21M
            scan_end = zng_memread_8(scan+offset);
177
178
#ifdef LONGEST_MATCH_SLOW
179
            /* Look for a better string offset */
180
            if (UNLIKELY(len > STD_MIN_MATCH && match_start + len < strstart)) {
181
                const unsigned char *scan_endstr;
182
                uint32_t hash;
183
                uint32_t pos, next_pos;
184
185
                /* Go back to offset 0 */
186
                cur_match -= match_offset;
187
                match_offset = 0;
188
                next_pos = cur_match;
189
                for (uint32_t i = 0; i <= len - STD_MIN_MATCH; i++) {
190
                    pos = prev[(cur_match + i) & wmask];
191
                    if (pos < next_pos) {
192
                        /* Hash chain is more distant, use it */
193
                        if (pos <= limit_base + i)
194
                            goto break_matching;
195
                        next_pos = pos;
196
                        match_offset = i;
197
                    }
198
                }
199
                /* Switch cur_match to next_pos chain */
200
                cur_match = next_pos;
201
202
                /* Try hash head at len-(STD_MIN_MATCH-1) position to see if we could get
203
                 * a better cur_match at the end of string. Using (STD_MIN_MATCH-1) lets
204
                 * us include one more byte into hash - the byte which will be checked
205
                 * in main loop now, and which allows to grow match by 1.
206
                 */
207
                scan_endstr = scan + len - (STD_MIN_MATCH+1);
208
209
                // use update_hash_roll for deflate_slow
210
                hash = update_hash_roll(0, scan_endstr[0]);
211
                hash = update_hash_roll(hash, scan_endstr[1]);
212
                hash = update_hash_roll(hash, scan_endstr[2]);
213
214
                pos = head[hash];
215
                if (pos < cur_match) {
216
                    match_offset = len - (STD_MIN_MATCH+1);
217
                    if (pos <= limit_base + match_offset)
218
                        goto break_matching;
219
                    cur_match = pos;
220
                }
221
222
                /* Update offset-dependent variables */
223
                limit = limit_base+match_offset;
224
                mbase_start = window-match_offset;
225
                mbase_end = (mbase_start+offset);
226
                continue;
227
            }
228
#endif
229
6.21M
            mbase_end = (mbase_start+offset);
230
6.21M
        }
231
171k
#ifndef LONGEST_MATCH_SLOW
232
171k
        else if (UNLIKELY(early_exit)) {
233
            /* The probability of finding a match later if we here is pretty low, so for
234
             * performance it's best to outright stop here for the lower compression levels
235
             */
236
2.60k
            break;
237
2.60k
        }
238
6.38M
#endif
239
6.38M
        GOTO_NEXT_CHAIN;
240
6.38M
    }
241
2.63k
    return best_len;
242
243
#ifdef LONGEST_MATCH_SLOW
244
break_matching:
245
246
    if (best_len < lookahead)
247
        return best_len;
248
249
    return lookahead;
250
#endif
251
24.2M
}
longest_match_slow_avx2
Line
Count
Source
31
27.1M
Z_INTERNAL uint32_t LONGEST_MATCH(deflate_state *const s, uint32_t cur_match) {
32
27.1M
    const unsigned wmask = W_MASK(s);
33
27.1M
    unsigned int strstart = s->strstart;
34
27.1M
    const unsigned char *window = s->window;
35
27.1M
    const Pos *prev = s->prev;
36
27.1M
#ifdef LONGEST_MATCH_SLOW
37
27.1M
    const Pos *head = s->head;
38
27.1M
#endif
39
27.1M
    const unsigned char *scan;
40
27.1M
    const unsigned char *mbase_start = window;
41
27.1M
    const unsigned char *mbase_end;
42
27.1M
    uint32_t limit;
43
27.1M
#ifdef LONGEST_MATCH_SLOW
44
27.1M
    uint32_t limit_base;
45
#else
46
    int32_t early_exit;
47
#endif
48
27.1M
    uint32_t chain_length = s->max_chain_length;
49
27.1M
    uint32_t nice_match = (uint32_t)s->nice_match;
50
27.1M
    uint32_t best_len, offset;
51
27.1M
    uint32_t lookahead = s->lookahead;
52
27.1M
    uint32_t match_offset = 0;
53
27.1M
    uint64_t scan_start;
54
27.1M
    uint64_t scan_end;
55
56
    /* The code is optimized for STD_MAX_MATCH-2 multiple of 16. */
57
27.1M
    Assert(STD_MAX_MATCH == 258, "Code too clever");
58
59
27.1M
    scan = window + strstart;
60
27.1M
    best_len = s->prev_length ? s->prev_length : STD_MIN_MATCH-1;
61
62
    /* Calculate read offset which should only extend an extra byte
63
     * to find the next best match length.
64
     */
65
27.1M
    offset = best_len-1;
66
27.1M
    if (best_len >= sizeof(uint32_t)) {
67
3.35M
        offset -= 2;
68
3.35M
        if (best_len >= sizeof(uint64_t))
69
289k
            offset -= 4;
70
3.35M
    }
71
72
27.1M
    scan_start = zng_memread_8(scan);
73
27.1M
    scan_end = zng_memread_8(scan+offset);
74
27.1M
    mbase_end  = (mbase_start+offset);
75
76
    /* Do not waste too much time if we already have a good match */
77
27.1M
    if (best_len >= s->good_match)
78
45.4k
        chain_length >>= 2;
79
80
    /* Stop when cur_match becomes <= limit. To simplify the code,
81
     * we prevent matches with the string of window index 0
82
     */
83
27.1M
    limit = strstart > MAX_DIST(s) ? (strstart - MAX_DIST(s)) : 0;
84
27.1M
#ifdef LONGEST_MATCH_SLOW
85
27.1M
    limit_base = limit;
86
27.1M
    if (best_len >= STD_MIN_MATCH) {
87
        /* We're continuing search (lazy evaluation). */
88
5.92M
        uint32_t hash;
89
5.92M
        uint32_t pos;
90
91
        /* Find a most distant chain starting from scan with index=1 (index=0 corresponds
92
         * to cur_match). We cannot use s->prev[strstart+1,...] immediately, because
93
         * these strings are not yet inserted into the hash table.
94
         */
95
        // use update_hash_roll for deflate_slow
96
5.92M
        hash = update_hash_roll(0, scan[1]);
97
5.92M
        hash = update_hash_roll(hash, scan[2]);
98
99
22.9M
        for (uint32_t i = 3; i <= best_len; i++) {
100
            // use update_hash_roll for deflate_slow
101
17.0M
            hash = update_hash_roll(hash, scan[i]);
102
            /* If we're starting with best_len >= 3, we can use offset search. */
103
17.0M
            pos = head[hash];
104
17.0M
            if (pos < cur_match) {
105
4.94M
                match_offset = i - 2;
106
4.94M
                cur_match = pos;
107
4.94M
            }
108
17.0M
        }
109
110
        /* Update offset-dependent variables */
111
5.92M
        limit = limit_base+match_offset;
112
5.92M
        if (cur_match <= limit)
113
1.29M
            goto break_matching;
114
4.63M
        mbase_start -= match_offset;
115
4.63M
        mbase_end -= match_offset;
116
4.63M
    }
117
#else
118
    early_exit = s->level < EARLY_EXIT_TRIGGER_LEVEL;
119
#endif
120
25.8M
    Assert((unsigned long)strstart <= s->window_size - MIN_LOOKAHEAD, "need lookahead");
121
35.2M
    for (;;) {
122
35.2M
        if (cur_match >= strstart)
123
0
            break;
124
125
        /* Skip to next match if the match length cannot increase or if the match length is
126
         * less than 2. Note that the checks below for insufficient lookahead only occur
127
         * occasionally for performance reasons.
128
         * Therefore uninitialized memory will be accessed and conditional jumps will be made
129
         * that depend on those values. However the length of the match is limited to the
130
         * lookahead, so the output of deflate is not affected by the uninitialized values.
131
         */
132
35.2M
        if (best_len < sizeof(uint32_t)) {
133
103M
            for (;;) {
134
103M
                if (zng_memcmp_2(mbase_end+cur_match, &scan_end) == 0 &&
135
19.3M
                    zng_memcmp_2(mbase_start+cur_match, &scan_start) == 0)
136
8.83M
                    break;
137
95.1M
                GOTO_NEXT_CHAIN;
138
95.1M
            }
139
27.9M
        } else if (best_len >= sizeof(uint64_t)) {
140
43.0M
            for (;;) {
141
43.0M
                if (zng_memcmp_8(mbase_end+cur_match, &scan_end) == 0 &&
142
2.74M
                    zng_memcmp_8(mbase_start+cur_match, &scan_start) == 0)
143
1.95M
                    break;
144
41.1M
                GOTO_NEXT_CHAIN;
145
41.1M
            }
146
4.91M
        } else {
147
50.5M
            for (;;) {
148
50.5M
                if (zng_memcmp_4(mbase_end+cur_match, &scan_end) == 0 &&
149
3.26M
                    zng_memcmp_4(mbase_start+cur_match, &scan_start) == 0)
150
654k
                    break;
151
49.9M
                GOTO_NEXT_CHAIN;
152
49.9M
            }
153
4.91M
        }
154
11.4M
        uint32_t len = COMPARE256(scan+2, mbase_start+cur_match+2) + 2;
155
11.4M
        Assert(scan+len <= window+(unsigned)(s->window_size-1), "wild scan");
156
157
11.4M
        if (len > best_len) {
158
10.1M
            uint32_t match_start = cur_match - match_offset;
159
10.1M
            s->match_start = match_start;
160
161
            /* Do not look for matches beyond the end of the input. */
162
10.1M
            if (len > lookahead)
163
419
                return lookahead;
164
10.1M
            if (len >= nice_match)
165
47.8k
                return len;
166
167
10.1M
            best_len = len;
168
169
10.1M
            offset = best_len-1;
170
10.1M
            if (best_len >= sizeof(uint32_t)) {
171
4.88M
                offset -= 2;
172
4.88M
                if (best_len >= sizeof(uint64_t))
173
952k
                    offset -= 4;
174
4.88M
            }
175
176
10.1M
            scan_end = zng_memread_8(scan+offset);
177
178
10.1M
#ifdef LONGEST_MATCH_SLOW
179
            /* Look for a better string offset */
180
10.1M
            if (UNLIKELY(len > STD_MIN_MATCH && match_start + len < strstart)) {
181
4.74M
                const unsigned char *scan_endstr;
182
4.74M
                uint32_t hash;
183
4.74M
                uint32_t pos, next_pos;
184
185
                /* Go back to offset 0 */
186
4.74M
                cur_match -= match_offset;
187
4.74M
                match_offset = 0;
188
4.74M
                next_pos = cur_match;
189
60.9M
                for (uint32_t i = 0; i <= len - STD_MIN_MATCH; i++) {
190
57.6M
                    pos = prev[(cur_match + i) & wmask];
191
57.6M
                    if (pos < next_pos) {
192
                        /* Hash chain is more distant, use it */
193
6.48M
                        if (pos <= limit_base + i)
194
1.41M
                            goto break_matching;
195
5.07M
                        next_pos = pos;
196
5.07M
                        match_offset = i;
197
5.07M
                    }
198
57.6M
                }
199
                /* Switch cur_match to next_pos chain */
200
3.33M
                cur_match = next_pos;
201
202
                /* Try hash head at len-(STD_MIN_MATCH-1) position to see if we could get
203
                 * a better cur_match at the end of string. Using (STD_MIN_MATCH-1) lets
204
                 * us include one more byte into hash - the byte which will be checked
205
                 * in main loop now, and which allows to grow match by 1.
206
                 */
207
3.33M
                scan_endstr = scan + len - (STD_MIN_MATCH+1);
208
209
                // use update_hash_roll for deflate_slow
210
3.33M
                hash = update_hash_roll(0, scan_endstr[0]);
211
3.33M
                hash = update_hash_roll(hash, scan_endstr[1]);
212
3.33M
                hash = update_hash_roll(hash, scan_endstr[2]);
213
214
3.33M
                pos = head[hash];
215
3.33M
                if (pos < cur_match) {
216
0
                    match_offset = len - (STD_MIN_MATCH+1);
217
0
                    if (pos <= limit_base + match_offset)
218
0
                        goto break_matching;
219
0
                    cur_match = pos;
220
0
                }
221
222
                /* Update offset-dependent variables */
223
3.33M
                limit = limit_base+match_offset;
224
3.33M
                mbase_start = window-match_offset;
225
3.33M
                mbase_end = (mbase_start+offset);
226
3.33M
                continue;
227
3.33M
            }
228
5.38M
#endif
229
5.38M
            mbase_end = (mbase_start+offset);
230
5.38M
        }
231
#ifndef LONGEST_MATCH_SLOW
232
        else if (UNLIKELY(early_exit)) {
233
            /* The probability of finding a match later if we here is pretty low, so for
234
             * performance it's best to outright stop here for the lower compression levels
235
             */
236
            break;
237
        }
238
#endif
239
6.64M
        GOTO_NEXT_CHAIN;
240
6.64M
    }
241
0
    return best_len;
242
243
0
#ifdef LONGEST_MATCH_SLOW
244
2.70M
break_matching:
245
246
2.70M
    if (best_len < lookahead)
247
2.70M
        return best_len;
248
249
286
    return lookahead;
250
2.70M
#endif
251
2.70M
}
Unexecuted instantiation: longest_match_avx512
Unexecuted instantiation: longest_match_slow_avx512
252
253
#undef LONGEST_MATCH_SLOW
254
#undef LONGEST_MATCH