Coverage Report

Created: 2026-04-12 06:41

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/zlib-ng/match_tpl.h
Line
Count
Source
1
/* match_tpl.h -- find longest match template for compare256 variants
2
 *
3
 * Copyright (C) 1995-2024 Jean-loup Gailly and Mark Adler
4
 * For conditions of distribution and use, see copyright notice in zlib.h
5
 *
6
 * Portions copyright (C) 2014-2021 Konstantin Nosov
7
 *  Fast-zlib optimized longest_match
8
 *  https://github.com/gildor2/fast_zlib
9
 */
10
11
#include "insert_string_p.h"
12
13
349k
#define EARLY_EXIT_TRIGGER_LEVEL 5
14
15
#define GOTO_NEXT_CHAIN \
16
27.1M
    if (--chain_length && (cur_match = prev[cur_match & wmask]) > limit) \
17
26.6M
        continue; \
18
534k
    return best_len;
19
20
/* Set match_start to the longest match starting at the given string and
21
 * return its length. Matches shorter or equal to prev_length are discarded,
22
 * in which case the result is equal to prev_length and match_start is garbage.
23
 *
24
 * IN assertions: cur_match is the head of the hash chain for the current
25
 * string (strstart) and its distance is <= MAX_DIST, and prev_length >=1
26
 * OUT assertion: the match length is not greater than s->lookahead
27
 *
28
 * The LONGEST_MATCH_SLOW variant spends more time to attempt to find longer
29
 * matches once a match has already been found.
30
 */
31
764k
Z_INTERNAL uint32_t LONGEST_MATCH(deflate_state *const s, uint32_t cur_match) {
32
764k
    const unsigned wmask = W_MASK(s);
33
764k
    unsigned int strstart = s->strstart;
34
764k
    const unsigned char *window = s->window;
35
764k
    const Pos *prev = s->prev;
36
#ifdef LONGEST_MATCH_SLOW
37
    const Pos *head = s->head;
38
#endif
39
764k
    const unsigned char *scan;
40
764k
    const unsigned char *mbase_start = window;
41
764k
    const unsigned char *mbase_end;
42
764k
    uint32_t limit;
43
#ifdef LONGEST_MATCH_SLOW
44
    uint32_t limit_base;
45
#else
46
    int32_t early_exit;
47
#endif
48
764k
    uint32_t chain_length = s->max_chain_length;
49
764k
    uint32_t nice_match = (uint32_t)s->nice_match;
50
764k
    uint32_t best_len, offset;
51
764k
    uint32_t lookahead = s->lookahead;
52
764k
    uint32_t match_offset = 0;
53
764k
    uint64_t scan_start;
54
764k
    uint64_t scan_end;
55
56
    /* The code is optimized for STD_MAX_MATCH-2 multiple of 16. */
57
764k
    Assert(STD_MAX_MATCH == 258, "Code too clever");
58
59
764k
    scan = window + strstart;
60
764k
    best_len = s->prev_length ? s->prev_length : STD_MIN_MATCH-1;
61
62
    /* Calculate read offset which should only extend an extra byte
63
     * to find the next best match length.
64
     */
65
764k
    offset = best_len-1;
66
764k
    if (best_len >= sizeof(uint32_t)) {
67
131k
        offset -= 2;
68
131k
        if (best_len >= sizeof(uint64_t))
69
62.9k
            offset -= 4;
70
131k
    }
71
72
764k
    scan_start = zng_memread_8(scan);
73
764k
    scan_end = zng_memread_8(scan+offset);
74
764k
    mbase_end  = (mbase_start+offset);
75
76
    /* Do not waste too much time if we already have a good match */
77
764k
    if (best_len >= s->good_match)
78
22.1k
        chain_length >>= 2;
79
80
    /* Stop when cur_match becomes <= limit. To simplify the code,
81
     * we prevent matches with the string of window index 0
82
     */
83
764k
    limit = strstart > MAX_DIST(s) ? (strstart - MAX_DIST(s)) : 0;
84
#ifdef LONGEST_MATCH_SLOW
85
    limit_base = limit;
86
415k
    if (best_len >= STD_MIN_MATCH) {
87
        /* We're continuing search (lazy evaluation). */
88
143k
        uint32_t hash;
89
143k
        uint32_t pos;
90
91
        /* Find a most distant chain starting from scan with index=1 (index=0 corresponds
92
         * to cur_match). We cannot use s->prev[strstart+1,...] immediately, because
93
         * these strings are not yet inserted into the hash table.
94
         */
95
        // use update_hash_roll for deflate_slow
96
        hash = update_hash_roll(0, scan[1]);
97
        hash = update_hash_roll(hash, scan[2]);
98
99
2.48M
        for (uint32_t i = 3; i <= best_len; i++) {
100
            // use update_hash_roll for deflate_slow
101
2.33M
            hash = update_hash_roll(hash, scan[i]);
102
            /* If we're starting with best_len >= 3, we can use offset search. */
103
2.33M
            pos = head[hash];
104
2.33M
            if (pos < cur_match) {
105
152k
                match_offset = i - 2;
106
152k
                cur_match = pos;
107
152k
            }
108
2.33M
        }
109
110
        /* Update offset-dependent variables */
111
143k
        limit = limit_base+match_offset;
112
143k
        if (cur_match <= limit)
113
92.6k
            goto break_matching;
114
50.6k
        mbase_start -= match_offset;
115
50.6k
        mbase_end -= match_offset;
116
50.6k
    }
117
#else
118
349k
    early_exit = s->level < EARLY_EXIT_TRIGGER_LEVEL;
119
#endif
120
322k
    Assert((unsigned long)strstart <= s->window_size - MIN_LOOKAHEAD, "need lookahead");
121
2.57M
    for (;;) {
122
2.57M
        if (cur_match >= strstart)
123
0
            break;
124
125
        /* Skip to next match if the match length cannot increase or if the match length is
126
         * less than 2. Note that the checks below for insufficient lookahead only occur
127
         * occasionally for performance reasons.
128
         * Therefore uninitialized memory will be accessed and conditional jumps will be made
129
         * that depend on those values. However the length of the match is limited to the
130
         * lookahead, so the output of deflate is not affected by the uninitialized values.
131
         */
132
2.57M
        if (best_len < sizeof(uint32_t)) {
133
5.10M
            for (;;) {
134
5.10M
                if (zng_memcmp_2(mbase_end+cur_match, &scan_end) == 0 &&
135
1.53M
                    zng_memcmp_2(mbase_start+cur_match, &scan_start) == 0)
136
539k
                    break;
137
4.56M
                GOTO_NEXT_CHAIN;
138
4.56M
            }
139
1.86M
        } else if (best_len >= sizeof(uint64_t)) {
140
16.1M
            for (;;) {
141
16.1M
                if (zng_memcmp_8(mbase_end+cur_match, &scan_end) == 0 &&
142
1.64M
                    zng_memcmp_8(mbase_start+cur_match, &scan_start) == 0)
143
1.39M
                    break;
144
14.8M
                GOTO_NEXT_CHAIN;
145
14.8M
            }
146
1.49M
        } else {
147
6.64M
            for (;;) {
148
6.64M
                if (zng_memcmp_4(mbase_end+cur_match, &scan_end) == 0 &&
149
375k
                    zng_memcmp_4(mbase_start+cur_match, &scan_start) == 0)
150
247k
                    break;
151
6.39M
                GOTO_NEXT_CHAIN;
152
6.39M
            }
153
368k
        }
154
2.17M
        uint32_t len = COMPARE256(scan+2, mbase_start+cur_match+2) + 2;
155
2.17M
        Assert(scan+len <= window+(unsigned)(s->window_size-1), "wild scan");
156
157
2.17M
        if (len > best_len) {
158
1.65M
            uint32_t match_start = cur_match - match_offset;
159
1.65M
            s->match_start = match_start;
160
161
            /* Do not look for better matches if the current match reaches
162
             * or exceeds the end of the input.
163
             */
164
1.65M
            if (len >= lookahead)
165
3.14k
                return lookahead;
166
1.65M
            if (len >= nice_match)
167
86.1k
                return len;
168
169
1.56M
            best_len = len;
170
171
1.56M
            offset = best_len-1;
172
1.56M
            if (best_len >= sizeof(uint32_t)) {
173
1.44M
                offset -= 2;
174
1.44M
                if (best_len >= sizeof(uint64_t))
175
998k
                    offset -= 4;
176
1.44M
            }
177
178
1.56M
            scan_end = zng_memread_8(scan+offset);
179
180
#ifdef LONGEST_MATCH_SLOW
181
            /* Look for a better string offset */
182
821k
            if (UNLIKELY(len > STD_MIN_MATCH && match_start + len < strstart)) {
183
663k
                const unsigned char *scan_endstr;
184
663k
                uint32_t hash;
185
663k
                uint32_t pos, next_pos;
186
187
                /* Go back to offset 0 */
188
                cur_match -= match_offset;
189
                match_offset = 0;
190
                next_pos = cur_match;
191
32.7M
                for (uint32_t i = 0; i <= len - STD_MIN_MATCH; i++) {
192
32.1M
                    pos = prev[(cur_match + i) & wmask];
193
32.1M
                    if (pos < next_pos) {
194
                        /* Hash chain is more distant, use it */
195
714k
                        if (pos <= limit_base + i)
196
48.1k
                            goto break_matching;
197
666k
                        next_pos = pos;
198
666k
                        match_offset = i;
199
666k
                    }
200
32.1M
                }
201
                /* Switch cur_match to next_pos chain */
202
615k
                cur_match = next_pos;
203
204
                /* Try hash head at len-(STD_MIN_MATCH-1) position to see if we could get
205
                 * a better cur_match at the end of string. Using (STD_MIN_MATCH-1) lets
206
                 * us include one more byte into hash - the byte which will be checked
207
                 * in main loop now, and which allows to grow match by 1.
208
                 */
209
615k
                scan_endstr = scan + len - (STD_MIN_MATCH+1);
210
211
                // use update_hash_roll for deflate_slow
212
615k
                hash = update_hash_roll(0, scan_endstr[0]);
213
615k
                hash = update_hash_roll(hash, scan_endstr[1]);
214
615k
                hash = update_hash_roll(hash, scan_endstr[2]);
215
216
615k
                pos = head[hash];
217
615k
                if (pos < cur_match) {
218
0
                    match_offset = len - (STD_MIN_MATCH+1);
219
0
                    if (pos <= limit_base + match_offset)
220
0
                        goto break_matching;
221
0
                    cur_match = pos;
222
0
                }
223
224
                /* Update offset-dependent variables */
225
615k
                limit = limit_base+match_offset;
226
615k
                mbase_start = window-match_offset;
227
615k
                mbase_end = (mbase_start+offset);
228
615k
                continue;
229
615k
            }
230
157k
#endif
231
157k
            mbase_end = (mbase_start+offset);
232
157k
        }
233
#ifndef LONGEST_MATCH_SLOW
234
78.6k
        else if (UNLIKELY(early_exit)) {
235
            /* The probability of finding a match later if we here is pretty low, so for
236
             * performance it's best to outright stop here for the lower compression levels
237
             */
238
224
            break;
239
224
        }
240
825k
#endif
241
1.42M
        GOTO_NEXT_CHAIN;
242
1.42M
    }
243
224
    return best_len;
244
245
#ifdef LONGEST_MATCH_SLOW
246
140k
break_matching:
247
248
140k
    if (best_len < lookahead)
249
140k
        return best_len;
250
251
430
    return lookahead;
252
#endif
253
140k
}
Unexecuted instantiation: longest_match_sse2
Unexecuted instantiation: longest_match_slow_sse2
longest_match_avx2
Line
Count
Source
31
349k
Z_INTERNAL uint32_t LONGEST_MATCH(deflate_state *const s, uint32_t cur_match) {
32
349k
    const unsigned wmask = W_MASK(s);
33
349k
    unsigned int strstart = s->strstart;
34
349k
    const unsigned char *window = s->window;
35
349k
    const Pos *prev = s->prev;
36
#ifdef LONGEST_MATCH_SLOW
37
    const Pos *head = s->head;
38
#endif
39
349k
    const unsigned char *scan;
40
349k
    const unsigned char *mbase_start = window;
41
349k
    const unsigned char *mbase_end;
42
349k
    uint32_t limit;
43
#ifdef LONGEST_MATCH_SLOW
44
    uint32_t limit_base;
45
#else
46
349k
    int32_t early_exit;
47
349k
#endif
48
349k
    uint32_t chain_length = s->max_chain_length;
49
349k
    uint32_t nice_match = (uint32_t)s->nice_match;
50
349k
    uint32_t best_len, offset;
51
349k
    uint32_t lookahead = s->lookahead;
52
349k
    uint32_t match_offset = 0;
53
349k
    uint64_t scan_start;
54
349k
    uint64_t scan_end;
55
56
    /* The code is optimized for STD_MAX_MATCH-2 multiple of 16. */
57
349k
    Assert(STD_MAX_MATCH == 258, "Code too clever");
58
59
349k
    scan = window + strstart;
60
349k
    best_len = s->prev_length ? s->prev_length : STD_MIN_MATCH-1;
61
62
    /* Calculate read offset which should only extend an extra byte
63
     * to find the next best match length.
64
     */
65
349k
    offset = best_len-1;
66
349k
    if (best_len >= sizeof(uint32_t)) {
67
7.96k
        offset -= 2;
68
7.96k
        if (best_len >= sizeof(uint64_t))
69
5.22k
            offset -= 4;
70
7.96k
    }
71
72
349k
    scan_start = zng_memread_8(scan);
73
349k
    scan_end = zng_memread_8(scan+offset);
74
349k
    mbase_end  = (mbase_start+offset);
75
76
    /* Do not waste too much time if we already have a good match */
77
349k
    if (best_len >= s->good_match)
78
2.62k
        chain_length >>= 2;
79
80
    /* Stop when cur_match becomes <= limit. To simplify the code,
81
     * we prevent matches with the string of window index 0
82
     */
83
349k
    limit = strstart > MAX_DIST(s) ? (strstart - MAX_DIST(s)) : 0;
84
#ifdef LONGEST_MATCH_SLOW
85
    limit_base = limit;
86
    if (best_len >= STD_MIN_MATCH) {
87
        /* We're continuing search (lazy evaluation). */
88
        uint32_t hash;
89
        uint32_t pos;
90
91
        /* Find a most distant chain starting from scan with index=1 (index=0 corresponds
92
         * to cur_match). We cannot use s->prev[strstart+1,...] immediately, because
93
         * these strings are not yet inserted into the hash table.
94
         */
95
        // use update_hash_roll for deflate_slow
96
        hash = update_hash_roll(0, scan[1]);
97
        hash = update_hash_roll(hash, scan[2]);
98
99
        for (uint32_t i = 3; i <= best_len; i++) {
100
            // use update_hash_roll for deflate_slow
101
            hash = update_hash_roll(hash, scan[i]);
102
            /* If we're starting with best_len >= 3, we can use offset search. */
103
            pos = head[hash];
104
            if (pos < cur_match) {
105
                match_offset = i - 2;
106
                cur_match = pos;
107
            }
108
        }
109
110
        /* Update offset-dependent variables */
111
        limit = limit_base+match_offset;
112
        if (cur_match <= limit)
113
            goto break_matching;
114
        mbase_start -= match_offset;
115
        mbase_end -= match_offset;
116
    }
117
#else
118
349k
    early_exit = s->level < EARLY_EXIT_TRIGGER_LEVEL;
119
349k
#endif
120
349k
    Assert((unsigned long)strstart <= s->window_size - MIN_LOOKAHEAD, "need lookahead");
121
1.08M
    for (;;) {
122
1.08M
        if (cur_match >= strstart)
123
0
            break;
124
125
        /* Skip to next match if the match length cannot increase or if the match length is
126
         * less than 2. Note that the checks below for insufficient lookahead only occur
127
         * occasionally for performance reasons.
128
         * Therefore uninitialized memory will be accessed and conditional jumps will be made
129
         * that depend on those values. However the length of the match is limited to the
130
         * lookahead, so the output of deflate is not affected by the uninitialized values.
131
         */
132
1.08M
        if (best_len < sizeof(uint32_t)) {
133
374k
            for (;;) {
134
374k
                if (zng_memcmp_2(mbase_end+cur_match, &scan_end) == 0 &&
135
287k
                    zng_memcmp_2(mbase_start+cur_match, &scan_start) == 0)
136
287k
                    break;
137
87.2k
                GOTO_NEXT_CHAIN;
138
87.2k
            }
139
744k
        } else if (best_len >= sizeof(uint64_t)) {
140
3.51M
            for (;;) {
141
3.51M
                if (zng_memcmp_8(mbase_end+cur_match, &scan_end) == 0 &&
142
505k
                    zng_memcmp_8(mbase_start+cur_match, &scan_start) == 0)
143
470k
                    break;
144
3.04M
                GOTO_NEXT_CHAIN;
145
3.04M
            }
146
524k
        } else {
147
1.37M
            for (;;) {
148
1.37M
                if (zng_memcmp_4(mbase_end+cur_match, &scan_end) == 0 &&
149
150k
                    zng_memcmp_4(mbase_start+cur_match, &scan_start) == 0)
150
149k
                    break;
151
1.22M
                GOTO_NEXT_CHAIN;
152
1.22M
            }
153
220k
        }
154
907k
        uint32_t len = COMPARE256(scan+2, mbase_start+cur_match+2) + 2;
155
907k
        Assert(scan+len <= window+(unsigned)(s->window_size-1), "wild scan");
156
157
907k
        if (len > best_len) {
158
828k
            uint32_t match_start = cur_match - match_offset;
159
828k
            s->match_start = match_start;
160
161
            /* Do not look for better matches if the current match reaches
162
             * or exceeds the end of the input.
163
             */
164
828k
            if (len >= lookahead)
165
1.58k
                return lookahead;
166
827k
            if (len >= nice_match)
167
79.8k
                return len;
168
169
747k
            best_len = len;
170
171
747k
            offset = best_len-1;
172
747k
            if (best_len >= sizeof(uint32_t)) {
173
747k
                offset -= 2;
174
747k
                if (best_len >= sizeof(uint64_t))
175
467k
                    offset -= 4;
176
747k
            }
177
178
747k
            scan_end = zng_memread_8(scan+offset);
179
180
#ifdef LONGEST_MATCH_SLOW
181
            /* Look for a better string offset */
182
            if (UNLIKELY(len > STD_MIN_MATCH && match_start + len < strstart)) {
183
                const unsigned char *scan_endstr;
184
                uint32_t hash;
185
                uint32_t pos, next_pos;
186
187
                /* Go back to offset 0 */
188
                cur_match -= match_offset;
189
                match_offset = 0;
190
                next_pos = cur_match;
191
                for (uint32_t i = 0; i <= len - STD_MIN_MATCH; i++) {
192
                    pos = prev[(cur_match + i) & wmask];
193
                    if (pos < next_pos) {
194
                        /* Hash chain is more distant, use it */
195
                        if (pos <= limit_base + i)
196
                            goto break_matching;
197
                        next_pos = pos;
198
                        match_offset = i;
199
                    }
200
                }
201
                /* Switch cur_match to next_pos chain */
202
                cur_match = next_pos;
203
204
                /* Try hash head at len-(STD_MIN_MATCH-1) position to see if we could get
205
                 * a better cur_match at the end of string. Using (STD_MIN_MATCH-1) lets
206
                 * us include one more byte into hash - the byte which will be checked
207
                 * in main loop now, and which allows to grow match by 1.
208
                 */
209
                scan_endstr = scan + len - (STD_MIN_MATCH+1);
210
211
                // use update_hash_roll for deflate_slow
212
                hash = update_hash_roll(0, scan_endstr[0]);
213
                hash = update_hash_roll(hash, scan_endstr[1]);
214
                hash = update_hash_roll(hash, scan_endstr[2]);
215
216
                pos = head[hash];
217
                if (pos < cur_match) {
218
                    match_offset = len - (STD_MIN_MATCH+1);
219
                    if (pos <= limit_base + match_offset)
220
                        goto break_matching;
221
                    cur_match = pos;
222
                }
223
224
                /* Update offset-dependent variables */
225
                limit = limit_base+match_offset;
226
                mbase_start = window-match_offset;
227
                mbase_end = (mbase_start+offset);
228
                continue;
229
            }
230
#endif
231
747k
            mbase_end = (mbase_start+offset);
232
747k
        }
233
78.6k
#ifndef LONGEST_MATCH_SLOW
234
78.6k
        else if (UNLIKELY(early_exit)) {
235
            /* The probability of finding a match later if we here is pretty low, so for
236
             * performance it's best to outright stop here for the lower compression levels
237
             */
238
224
            break;
239
224
        }
240
825k
#endif
241
825k
        GOTO_NEXT_CHAIN;
242
825k
    }
243
224
    return best_len;
244
245
#ifdef LONGEST_MATCH_SLOW
246
break_matching:
247
248
    if (best_len < lookahead)
249
        return best_len;
250
251
    return lookahead;
252
#endif
253
349k
}
longest_match_slow_avx2
Line
Count
Source
31
415k
Z_INTERNAL uint32_t LONGEST_MATCH(deflate_state *const s, uint32_t cur_match) {
32
415k
    const unsigned wmask = W_MASK(s);
33
415k
    unsigned int strstart = s->strstart;
34
415k
    const unsigned char *window = s->window;
35
415k
    const Pos *prev = s->prev;
36
415k
#ifdef LONGEST_MATCH_SLOW
37
415k
    const Pos *head = s->head;
38
415k
#endif
39
415k
    const unsigned char *scan;
40
415k
    const unsigned char *mbase_start = window;
41
415k
    const unsigned char *mbase_end;
42
415k
    uint32_t limit;
43
415k
#ifdef LONGEST_MATCH_SLOW
44
415k
    uint32_t limit_base;
45
#else
46
    int32_t early_exit;
47
#endif
48
415k
    uint32_t chain_length = s->max_chain_length;
49
415k
    uint32_t nice_match = (uint32_t)s->nice_match;
50
415k
    uint32_t best_len, offset;
51
415k
    uint32_t lookahead = s->lookahead;
52
415k
    uint32_t match_offset = 0;
53
415k
    uint64_t scan_start;
54
415k
    uint64_t scan_end;
55
56
    /* The code is optimized for STD_MAX_MATCH-2 multiple of 16. */
57
415k
    Assert(STD_MAX_MATCH == 258, "Code too clever");
58
59
415k
    scan = window + strstart;
60
415k
    best_len = s->prev_length ? s->prev_length : STD_MIN_MATCH-1;
61
62
    /* Calculate read offset which should only extend an extra byte
63
     * to find the next best match length.
64
     */
65
415k
    offset = best_len-1;
66
415k
    if (best_len >= sizeof(uint32_t)) {
67
123k
        offset -= 2;
68
123k
        if (best_len >= sizeof(uint64_t))
69
57.7k
            offset -= 4;
70
123k
    }
71
72
415k
    scan_start = zng_memread_8(scan);
73
415k
    scan_end = zng_memread_8(scan+offset);
74
415k
    mbase_end  = (mbase_start+offset);
75
76
    /* Do not waste too much time if we already have a good match */
77
415k
    if (best_len >= s->good_match)
78
19.5k
        chain_length >>= 2;
79
80
    /* Stop when cur_match becomes <= limit. To simplify the code,
81
     * we prevent matches with the string of window index 0
82
     */
83
415k
    limit = strstart > MAX_DIST(s) ? (strstart - MAX_DIST(s)) : 0;
84
415k
#ifdef LONGEST_MATCH_SLOW
85
415k
    limit_base = limit;
86
415k
    if (best_len >= STD_MIN_MATCH) {
87
        /* We're continuing search (lazy evaluation). */
88
143k
        uint32_t hash;
89
143k
        uint32_t pos;
90
91
        /* Find a most distant chain starting from scan with index=1 (index=0 corresponds
92
         * to cur_match). We cannot use s->prev[strstart+1,...] immediately, because
93
         * these strings are not yet inserted into the hash table.
94
         */
95
        // use update_hash_roll for deflate_slow
96
143k
        hash = update_hash_roll(0, scan[1]);
97
143k
        hash = update_hash_roll(hash, scan[2]);
98
99
2.48M
        for (uint32_t i = 3; i <= best_len; i++) {
100
            // use update_hash_roll for deflate_slow
101
2.33M
            hash = update_hash_roll(hash, scan[i]);
102
            /* If we're starting with best_len >= 3, we can use offset search. */
103
2.33M
            pos = head[hash];
104
2.33M
            if (pos < cur_match) {
105
152k
                match_offset = i - 2;
106
152k
                cur_match = pos;
107
152k
            }
108
2.33M
        }
109
110
        /* Update offset-dependent variables */
111
143k
        limit = limit_base+match_offset;
112
143k
        if (cur_match <= limit)
113
92.6k
            goto break_matching;
114
50.6k
        mbase_start -= match_offset;
115
50.6k
        mbase_end -= match_offset;
116
50.6k
    }
117
#else
118
    early_exit = s->level < EARLY_EXIT_TRIGGER_LEVEL;
119
#endif
120
322k
    Assert((unsigned long)strstart <= s->window_size - MIN_LOOKAHEAD, "need lookahead");
121
1.48M
    for (;;) {
122
1.48M
        if (cur_match >= strstart)
123
0
            break;
124
125
        /* Skip to next match if the match length cannot increase or if the match length is
126
         * less than 2. Note that the checks below for insufficient lookahead only occur
127
         * occasionally for performance reasons.
128
         * Therefore uninitialized memory will be accessed and conditional jumps will be made
129
         * that depend on those values. However the length of the match is limited to the
130
         * lookahead, so the output of deflate is not affected by the uninitialized values.
131
         */
132
1.48M
        if (best_len < sizeof(uint32_t)) {
133
4.73M
            for (;;) {
134
4.73M
                if (zng_memcmp_2(mbase_end+cur_match, &scan_end) == 0 &&
135
1.24M
                    zng_memcmp_2(mbase_start+cur_match, &scan_start) == 0)
136
252k
                    break;
137
4.47M
                GOTO_NEXT_CHAIN;
138
4.47M
            }
139
1.12M
        } else if (best_len >= sizeof(uint64_t)) {
140
12.6M
            for (;;) {
141
12.6M
                if (zng_memcmp_8(mbase_end+cur_match, &scan_end) == 0 &&
142
1.13M
                    zng_memcmp_8(mbase_start+cur_match, &scan_start) == 0)
143
921k
                    break;
144
11.7M
                GOTO_NEXT_CHAIN;
145
11.7M
            }
146
972k
        } else {
147
5.27M
            for (;;) {
148
5.27M
                if (zng_memcmp_4(mbase_end+cur_match, &scan_end) == 0 &&
149
225k
                    zng_memcmp_4(mbase_start+cur_match, &scan_start) == 0)
150
97.3k
                    break;
151
5.17M
                GOTO_NEXT_CHAIN;
152
5.17M
            }
153
147k
        }
154
1.27M
        uint32_t len = COMPARE256(scan+2, mbase_start+cur_match+2) + 2;
155
1.27M
        Assert(scan+len <= window+(unsigned)(s->window_size-1), "wild scan");
156
157
1.27M
        if (len > best_len) {
158
829k
            uint32_t match_start = cur_match - match_offset;
159
829k
            s->match_start = match_start;
160
161
            /* Do not look for better matches if the current match reaches
162
             * or exceeds the end of the input.
163
             */
164
829k
            if (len >= lookahead)
165
1.55k
                return lookahead;
166
828k
            if (len >= nice_match)
167
6.32k
                return len;
168
169
821k
            best_len = len;
170
171
821k
            offset = best_len-1;
172
821k
            if (best_len >= sizeof(uint32_t)) {
173
694k
                offset -= 2;
174
694k
                if (best_len >= sizeof(uint64_t))
175
530k
                    offset -= 4;
176
694k
            }
177
178
821k
            scan_end = zng_memread_8(scan+offset);
179
180
821k
#ifdef LONGEST_MATCH_SLOW
181
            /* Look for a better string offset */
182
821k
            if (UNLIKELY(len > STD_MIN_MATCH && match_start + len < strstart)) {
183
663k
                const unsigned char *scan_endstr;
184
663k
                uint32_t hash;
185
663k
                uint32_t pos, next_pos;
186
187
                /* Go back to offset 0 */
188
663k
                cur_match -= match_offset;
189
663k
                match_offset = 0;
190
663k
                next_pos = cur_match;
191
32.7M
                for (uint32_t i = 0; i <= len - STD_MIN_MATCH; i++) {
192
32.1M
                    pos = prev[(cur_match + i) & wmask];
193
32.1M
                    if (pos < next_pos) {
194
                        /* Hash chain is more distant, use it */
195
714k
                        if (pos <= limit_base + i)
196
48.1k
                            goto break_matching;
197
666k
                        next_pos = pos;
198
666k
                        match_offset = i;
199
666k
                    }
200
32.1M
                }
201
                /* Switch cur_match to next_pos chain */
202
615k
                cur_match = next_pos;
203
204
                /* Try hash head at len-(STD_MIN_MATCH-1) position to see if we could get
205
                 * a better cur_match at the end of string. Using (STD_MIN_MATCH-1) lets
206
                 * us include one more byte into hash - the byte which will be checked
207
                 * in main loop now, and which allows to grow match by 1.
208
                 */
209
615k
                scan_endstr = scan + len - (STD_MIN_MATCH+1);
210
211
                // use update_hash_roll for deflate_slow
212
615k
                hash = update_hash_roll(0, scan_endstr[0]);
213
615k
                hash = update_hash_roll(hash, scan_endstr[1]);
214
615k
                hash = update_hash_roll(hash, scan_endstr[2]);
215
216
615k
                pos = head[hash];
217
615k
                if (pos < cur_match) {
218
0
                    match_offset = len - (STD_MIN_MATCH+1);
219
0
                    if (pos <= limit_base + match_offset)
220
0
                        goto break_matching;
221
0
                    cur_match = pos;
222
0
                }
223
224
                /* Update offset-dependent variables */
225
615k
                limit = limit_base+match_offset;
226
615k
                mbase_start = window-match_offset;
227
615k
                mbase_end = (mbase_start+offset);
228
615k
                continue;
229
615k
            }
230
157k
#endif
231
157k
            mbase_end = (mbase_start+offset);
232
157k
        }
233
#ifndef LONGEST_MATCH_SLOW
234
        else if (UNLIKELY(early_exit)) {
235
            /* The probability of finding a match later if we here is pretty low, so for
236
             * performance it's best to outright stop here for the lower compression levels
237
             */
238
            break;
239
        }
240
#endif
241
598k
        GOTO_NEXT_CHAIN;
242
598k
    }
243
0
    return best_len;
244
245
0
#ifdef LONGEST_MATCH_SLOW
246
140k
break_matching:
247
248
140k
    if (best_len < lookahead)
249
140k
        return best_len;
250
251
430
    return lookahead;
252
140k
#endif
253
140k
}
Unexecuted instantiation: longest_match_avx512
Unexecuted instantiation: longest_match_slow_avx512
254
255
#undef LONGEST_MATCH_SLOW
256
#undef LONGEST_MATCH