Coverage Report

Created: 2025-10-23 06:32

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/zlib-ng/match_tpl.h
Line
Count
Source
1
/* match_tpl.h -- find longest match template for compare256 variants
2
 *
3
 * Copyright (C) 1995-2024 Jean-loup Gailly and Mark Adler
4
 * For conditions of distribution and use, see copyright notice in zlib.h
5
 *
6
 * Portions copyright (C) 2014-2021 Konstantin Nosov
7
 *  Fast-zlib optimized longest_match
8
 *  https://github.com/gildor2/fast_zlib
9
 */
10
11
#ifndef MATCH_TPL_H
12
#define MATCH_TPL_H
13
14
17.7M
#define EARLY_EXIT_TRIGGER_LEVEL 5
15
16
#endif
17
18
/* Set match_start to the longest match starting at the given string and
19
 * return its length. Matches shorter or equal to prev_length are discarded,
20
 * in which case the result is equal to prev_length and match_start is garbage.
21
 *
22
 * IN assertions: cur_match is the head of the hash chain for the current
23
 * string (strstart) and its distance is <= MAX_DIST, and prev_length >=1
24
 * OUT assertion: the match length is not greater than s->lookahead
25
 *
26
 * The LONGEST_MATCH_SLOW variant spends more time to attempt to find longer
27
 * matches once a match has already been found.
28
 */
29
33.8M
Z_INTERNAL uint32_t LONGEST_MATCH(deflate_state *const s, Pos cur_match) {
30
33.8M
    unsigned int strstart = s->strstart;
31
33.8M
    const unsigned wmask = s->w_mask;
32
33.8M
    unsigned char *window = s->window;
33
33.8M
    unsigned char *scan = window + strstart;
34
33.8M
    Z_REGISTER unsigned char *mbase_start = window;
35
33.8M
    Z_REGISTER unsigned char *mbase_end;
36
33.8M
    const Pos *prev = s->prev;
37
33.8M
    Pos limit;
38
#ifdef LONGEST_MATCH_SLOW
39
    Pos limit_base;
40
#else
41
    int32_t early_exit;
42
#endif
43
33.8M
    uint32_t chain_length, nice_match, best_len, offset;
44
33.8M
    uint32_t lookahead = s->lookahead;
45
33.8M
    Pos match_offset = 0;
46
33.8M
    uint64_t scan_start;
47
33.8M
    uint64_t scan_end;
48
49
33.8M
#define GOTO_NEXT_CHAIN \
50
239M
    if (--chain_length && (cur_match = prev[cur_match & wmask]) > limit) \
51
207M
        continue; \
52
31.7M
    return best_len;
53
54
    /* The code is optimized for STD_MAX_MATCH-2 multiple of 16. */
55
33.8M
    Assert(STD_MAX_MATCH == 258, "Code too clever");
56
57
33.8M
    best_len = s->prev_length ? s->prev_length : STD_MIN_MATCH-1;
58
59
    /* Calculate read offset which should only extend an extra byte
60
     * to find the next best match length.
61
     */
62
33.8M
    offset = best_len-1;
63
33.8M
    if (best_len >= sizeof(uint32_t)) {
64
2.78M
        offset -= 2;
65
2.78M
        if (best_len >= sizeof(uint64_t))
66
665k
            offset -= 4;
67
2.78M
    }
68
69
33.8M
    scan_start = zng_memread_8(scan);
70
33.8M
    scan_end = zng_memread_8(scan+offset);
71
33.8M
    mbase_end  = (mbase_start+offset);
72
73
    /* Do not waste too much time if we already have a good match */
74
33.8M
    chain_length = s->max_chain_length;
75
33.8M
    if (best_len >= s->good_match)
76
430k
        chain_length >>= 2;
77
33.8M
    nice_match = (uint32_t)s->nice_match;
78
79
    /* Stop when cur_match becomes <= limit. To simplify the code,
80
     * we prevent matches with the string of window index 0
81
     */
82
33.8M
    limit = strstart > MAX_DIST(s) ? (Pos)(strstart - MAX_DIST(s)) : 0;
83
#ifdef LONGEST_MATCH_SLOW
84
    limit_base = limit;
85
16.0M
    if (best_len >= STD_MIN_MATCH) {
86
        /* We're continuing search (lazy evaluation). */
87
3.08M
        uint32_t i, hash;
88
3.08M
        Pos pos;
89
90
        /* Find a most distant chain starting from scan with index=1 (index=0 corresponds
91
         * to cur_match). We cannot use s->prev[strstart+1,...] immediately, because
92
         * these strings are not yet inserted into the hash table.
93
         */
94
        hash = s->update_hash(0, scan[1]);
95
        hash = s->update_hash(hash, scan[2]);
96
97
13.7M
        for (i = 3; i <= best_len; i++) {
98
10.6M
            hash = s->update_hash(hash, scan[i]);
99
100
            /* If we're starting with best_len >= 3, we can use offset search. */
101
10.6M
            pos = s->head[hash];
102
10.6M
            if (pos < cur_match) {
103
2.69M
                match_offset = (Pos)(i - 2);
104
2.69M
                cur_match = pos;
105
2.69M
            }
106
10.6M
        }
107
108
        /* Update offset-dependent variables */
109
3.08M
        limit = limit_base+match_offset;
110
3.08M
        if (cur_match <= limit)
111
671k
            goto break_matching;
112
2.41M
        mbase_start -= match_offset;
113
2.41M
        mbase_end -= match_offset;
114
2.41M
    }
115
#else
116
17.7M
    early_exit = s->level < EARLY_EXIT_TRIGGER_LEVEL;
117
#endif
118
15.4M
    Assert((unsigned long)strstart <= s->window_size - MIN_LOOKAHEAD, "need lookahead");
119
43.8M
    for (;;) {
120
43.8M
        if (cur_match >= strstart)
121
26
            break;
122
123
        /* Skip to next match if the match length cannot increase or if the match length is
124
         * less than 2. Note that the checks below for insufficient lookahead only occur
125
         * occasionally for performance reasons.
126
         * Therefore uninitialized memory will be accessed and conditional jumps will be made
127
         * that depend on those values. However the length of the match is limited to the
128
         * lookahead, so the output of deflate is not affected by the uninitialized values.
129
         */
130
43.8M
        if (best_len < sizeof(uint32_t)) {
131
75.9M
            for (;;) {
132
75.9M
                if (zng_memcmp_2(mbase_end+cur_match, &scan_end) == 0 &&
133
15.4M
                    zng_memcmp_2(mbase_start+cur_match, &scan_start) == 0)
134
10.0M
                    break;
135
65.9M
                GOTO_NEXT_CHAIN;
136
65.9M
            }
137
33.6M
        } else if (best_len >= sizeof(uint64_t)) {
138
74.3M
            for (;;) {
139
74.3M
                if (zng_memcmp_8(mbase_end+cur_match, &scan_end) == 0 &&
140
3.42M
                    zng_memcmp_8(mbase_start+cur_match, &scan_start) == 0)
141
2.61M
                    break;
142
71.7M
                GOTO_NEXT_CHAIN;
143
71.7M
            }
144
6.15M
        } else {
145
92.6M
            for (;;) {
146
92.6M
                if (zng_memcmp_4(mbase_end+cur_match, &scan_end) == 0 &&
147
2.95M
                    zng_memcmp_4(mbase_start+cur_match, &scan_start) == 0)
148
1.54M
                    break;
149
91.1M
                GOTO_NEXT_CHAIN;
150
91.1M
            }
151
6.15M
        }
152
14.1M
        uint32_t len = COMPARE256(scan+2, mbase_start+cur_match+2) + 2;
153
14.1M
        Assert(scan+len <= window+(unsigned)(s->window_size-1), "wild scan");
154
155
14.1M
        if (len > best_len) {
156
13.1M
            uint32_t match_start = cur_match - match_offset;
157
13.1M
            s->match_start = match_start;
158
159
            /* Do not look for matches beyond the end of the input. */
160
13.1M
            if (len > lookahead)
161
801
                return lookahead;
162
13.1M
            best_len = len;
163
13.1M
            if (best_len >= nice_match)
164
614k
                return best_len;
165
166
12.5M
            offset = best_len-1;
167
12.5M
            if (best_len >= sizeof(uint32_t)) {
168
9.33M
                offset -= 2;
169
9.33M
                if (best_len >= sizeof(uint64_t))
170
2.89M
                    offset -= 4;
171
9.33M
            }
172
173
12.5M
            scan_end = zng_memread_8(scan+offset);
174
175
#ifdef LONGEST_MATCH_SLOW
176
            /* Look for a better string offset */
177
6.06M
            if (UNLIKELY(len > STD_MIN_MATCH && match_start + len < strstart)) {
178
2.79M
                Pos pos, next_pos;
179
2.79M
                uint32_t i, hash;
180
2.79M
                unsigned char *scan_endstr;
181
182
                /* Go back to offset 0 */
183
                cur_match -= match_offset;
184
                match_offset = 0;
185
                next_pos = cur_match;
186
37.0M
                for (i = 0; i <= len - STD_MIN_MATCH; i++) {
187
35.0M
                    pos = prev[(cur_match + i) & wmask];
188
35.0M
                    if (pos < next_pos) {
189
                        /* Hash chain is more distant, use it */
190
3.85M
                        if (pos <= limit_base + i)
191
789k
                            goto break_matching;
192
3.06M
                        next_pos = pos;
193
3.06M
                        match_offset = (Pos)i;
194
3.06M
                    }
195
35.0M
                }
196
                /* Switch cur_match to next_pos chain */
197
2.00M
                cur_match = next_pos;
198
199
                /* Try hash head at len-(STD_MIN_MATCH-1) position to see if we could get
200
                 * a better cur_match at the end of string. Using (STD_MIN_MATCH-1) lets
201
                 * us include one more byte into hash - the byte which will be checked
202
                 * in main loop now, and which allows to grow match by 1.
203
                 */
204
2.00M
                scan_endstr = scan + len - (STD_MIN_MATCH+1);
205
206
2.00M
                hash = s->update_hash(0, scan_endstr[0]);
207
2.00M
                hash = s->update_hash(hash, scan_endstr[1]);
208
2.00M
                hash = s->update_hash(hash, scan_endstr[2]);
209
210
2.00M
                pos = s->head[hash];
211
2.00M
                if (pos < cur_match) {
212
0
                    match_offset = (Pos)(len - (STD_MIN_MATCH+1));
213
0
                    if (pos <= limit_base + match_offset)
214
0
                        goto break_matching;
215
0
                    cur_match = pos;
216
0
                }
217
218
                /* Update offset-dependent variables */
219
2.00M
                limit = limit_base+match_offset;
220
2.00M
                mbase_start = window-match_offset;
221
2.00M
                mbase_end = (mbase_start+offset);
222
2.00M
                continue;
223
2.00M
            }
224
3.26M
#endif
225
3.26M
            mbase_end = (mbase_start+offset);
226
3.26M
        }
227
#ifndef LONGEST_MATCH_SLOW
228
160k
        else if (UNLIKELY(early_exit)) {
229
            /* The probability of finding a match later if we here is pretty low, so for
230
             * performance it's best to outright stop here for the lower compression levels
231
             */
232
810
            break;
233
810
        }
234
6.62M
#endif
235
10.7M
        GOTO_NEXT_CHAIN;
236
10.7M
    }
237
836
    return best_len;
238
239
#ifdef LONGEST_MATCH_SLOW
240
1.46M
break_matching:
241
242
1.46M
    if (best_len < s->lookahead)
243
1.46M
        return best_len;
244
245
204
    return s->lookahead;
246
#endif
247
1.46M
}
Unexecuted instantiation: longest_match_sse2
Unexecuted instantiation: longest_match_slow_sse2
longest_match_avx2
Line
Count
Source
29
17.7M
Z_INTERNAL uint32_t LONGEST_MATCH(deflate_state *const s, Pos cur_match) {
30
17.7M
    unsigned int strstart = s->strstart;
31
17.7M
    const unsigned wmask = s->w_mask;
32
17.7M
    unsigned char *window = s->window;
33
17.7M
    unsigned char *scan = window + strstart;
34
17.7M
    Z_REGISTER unsigned char *mbase_start = window;
35
17.7M
    Z_REGISTER unsigned char *mbase_end;
36
17.7M
    const Pos *prev = s->prev;
37
17.7M
    Pos limit;
38
#ifdef LONGEST_MATCH_SLOW
39
    Pos limit_base;
40
#else
41
17.7M
    int32_t early_exit;
42
17.7M
#endif
43
17.7M
    uint32_t chain_length, nice_match, best_len, offset;
44
17.7M
    uint32_t lookahead = s->lookahead;
45
17.7M
    Pos match_offset = 0;
46
17.7M
    uint64_t scan_start;
47
17.7M
    uint64_t scan_end;
48
49
17.7M
#define GOTO_NEXT_CHAIN \
50
17.7M
    if (--chain_length && (cur_match = prev[cur_match & wmask]) > limit) \
51
17.7M
        continue; \
52
17.7M
    return best_len;
53
54
    /* The code is optimized for STD_MAX_MATCH-2 multiple of 16. */
55
17.7M
    Assert(STD_MAX_MATCH == 258, "Code too clever");
56
57
17.7M
    best_len = s->prev_length ? s->prev_length : STD_MIN_MATCH-1;
58
59
    /* Calculate read offset which should only extend an extra byte
60
     * to find the next best match length.
61
     */
62
17.7M
    offset = best_len-1;
63
17.7M
    if (best_len >= sizeof(uint32_t)) {
64
985k
        offset -= 2;
65
985k
        if (best_len >= sizeof(uint64_t))
66
413k
            offset -= 4;
67
985k
    }
68
69
17.7M
    scan_start = zng_memread_8(scan);
70
17.7M
    scan_end = zng_memread_8(scan+offset);
71
17.7M
    mbase_end  = (mbase_start+offset);
72
73
    /* Do not waste too much time if we already have a good match */
74
17.7M
    chain_length = s->max_chain_length;
75
17.7M
    if (best_len >= s->good_match)
76
394k
        chain_length >>= 2;
77
17.7M
    nice_match = (uint32_t)s->nice_match;
78
79
    /* Stop when cur_match becomes <= limit. To simplify the code,
80
     * we prevent matches with the string of window index 0
81
     */
82
17.7M
    limit = strstart > MAX_DIST(s) ? (Pos)(strstart - MAX_DIST(s)) : 0;
83
#ifdef LONGEST_MATCH_SLOW
84
    limit_base = limit;
85
    if (best_len >= STD_MIN_MATCH) {
86
        /* We're continuing search (lazy evaluation). */
87
        uint32_t i, hash;
88
        Pos pos;
89
90
        /* Find a most distant chain starting from scan with index=1 (index=0 corresponds
91
         * to cur_match). We cannot use s->prev[strstart+1,...] immediately, because
92
         * these strings are not yet inserted into the hash table.
93
         */
94
        hash = s->update_hash(0, scan[1]);
95
        hash = s->update_hash(hash, scan[2]);
96
97
        for (i = 3; i <= best_len; i++) {
98
            hash = s->update_hash(hash, scan[i]);
99
100
            /* If we're starting with best_len >= 3, we can use offset search. */
101
            pos = s->head[hash];
102
            if (pos < cur_match) {
103
                match_offset = (Pos)(i - 2);
104
                cur_match = pos;
105
            }
106
        }
107
108
        /* Update offset-dependent variables */
109
        limit = limit_base+match_offset;
110
        if (cur_match <= limit)
111
            goto break_matching;
112
        mbase_start -= match_offset;
113
        mbase_end -= match_offset;
114
    }
115
#else
116
17.7M
    early_exit = s->level < EARLY_EXIT_TRIGGER_LEVEL;
117
17.7M
#endif
118
17.7M
    Assert((unsigned long)strstart <= s->window_size - MIN_LOOKAHEAD, "need lookahead");
119
22.7M
    for (;;) {
120
22.7M
        if (cur_match >= strstart)
121
26
            break;
122
123
        /* Skip to next match if the match length cannot increase or if the match length is
124
         * less than 2. Note that the checks below for insufficient lookahead only occur
125
         * occasionally for performance reasons.
126
         * Therefore uninitialized memory will be accessed and conditional jumps will be made
127
         * that depend on those values. However the length of the match is limited to the
128
         * lookahead, so the output of deflate is not affected by the uninitialized values.
129
         */
130
22.7M
        if (best_len < sizeof(uint32_t)) {
131
21.2M
            for (;;) {
132
21.2M
                if (zng_memcmp_2(mbase_end+cur_match, &scan_end) == 0 &&
133
4.74M
                    zng_memcmp_2(mbase_start+cur_match, &scan_start) == 0)
134
4.74M
                    break;
135
16.5M
                GOTO_NEXT_CHAIN;
136
16.5M
            }
137
16.7M
        } else if (best_len >= sizeof(uint64_t)) {
138
46.2M
            for (;;) {
139
46.2M
                if (zng_memcmp_8(mbase_end+cur_match, &scan_end) == 0 &&
140
1.47M
                    zng_memcmp_8(mbase_start+cur_match, &scan_start) == 0)
141
1.30M
                    break;
142
44.9M
                GOTO_NEXT_CHAIN;
143
44.9M
            }
144
3.50M
        } else {
145
65.0M
            for (;;) {
146
65.0M
                if (zng_memcmp_4(mbase_end+cur_match, &scan_end) == 0 &&
147
1.16M
                    zng_memcmp_4(mbase_start+cur_match, &scan_start) == 0)
148
1.16M
                    break;
149
63.8M
                GOTO_NEXT_CHAIN;
150
63.8M
            }
151
3.50M
        }
152
7.21M
        uint32_t len = COMPARE256(scan+2, mbase_start+cur_match+2) + 2;
153
7.21M
        Assert(scan+len <= window+(unsigned)(s->window_size-1), "wild scan");
154
155
7.21M
        if (len > best_len) {
156
7.05M
            uint32_t match_start = cur_match - match_offset;
157
7.05M
            s->match_start = match_start;
158
159
            /* Do not look for matches beyond the end of the input. */
160
7.05M
            if (len > lookahead)
161
440
                return lookahead;
162
7.05M
            best_len = len;
163
7.05M
            if (best_len >= nice_match)
164
584k
                return best_len;
165
166
6.46M
            offset = best_len-1;
167
6.46M
            if (best_len >= sizeof(uint32_t)) {
168
6.46M
                offset -= 2;
169
6.46M
                if (best_len >= sizeof(uint64_t))
170
2.23M
                    offset -= 4;
171
6.46M
            }
172
173
6.46M
            scan_end = zng_memread_8(scan+offset);
174
175
#ifdef LONGEST_MATCH_SLOW
176
            /* Look for a better string offset */
177
            if (UNLIKELY(len > STD_MIN_MATCH && match_start + len < strstart)) {
178
                Pos pos, next_pos;
179
                uint32_t i, hash;
180
                unsigned char *scan_endstr;
181
182
                /* Go back to offset 0 */
183
                cur_match -= match_offset;
184
                match_offset = 0;
185
                next_pos = cur_match;
186
                for (i = 0; i <= len - STD_MIN_MATCH; i++) {
187
                    pos = prev[(cur_match + i) & wmask];
188
                    if (pos < next_pos) {
189
                        /* Hash chain is more distant, use it */
190
                        if (pos <= limit_base + i)
191
                            goto break_matching;
192
                        next_pos = pos;
193
                        match_offset = (Pos)i;
194
                    }
195
                }
196
                /* Switch cur_match to next_pos chain */
197
                cur_match = next_pos;
198
199
                /* Try hash head at len-(STD_MIN_MATCH-1) position to see if we could get
200
                 * a better cur_match at the end of string. Using (STD_MIN_MATCH-1) lets
201
                 * us include one more byte into hash - the byte which will be checked
202
                 * in main loop now, and which allows to grow match by 1.
203
                 */
204
                scan_endstr = scan + len - (STD_MIN_MATCH+1);
205
206
                hash = s->update_hash(0, scan_endstr[0]);
207
                hash = s->update_hash(hash, scan_endstr[1]);
208
                hash = s->update_hash(hash, scan_endstr[2]);
209
210
                pos = s->head[hash];
211
                if (pos < cur_match) {
212
                    match_offset = (Pos)(len - (STD_MIN_MATCH+1));
213
                    if (pos <= limit_base + match_offset)
214
                        goto break_matching;
215
                    cur_match = pos;
216
                }
217
218
                /* Update offset-dependent variables */
219
                limit = limit_base+match_offset;
220
                mbase_start = window-match_offset;
221
                mbase_end = (mbase_start+offset);
222
                continue;
223
            }
224
#endif
225
6.46M
            mbase_end = (mbase_start+offset);
226
6.46M
        }
227
160k
#ifndef LONGEST_MATCH_SLOW
228
160k
        else if (UNLIKELY(early_exit)) {
229
            /* The probability of finding a match later if we here is pretty low, so for
230
             * performance it's best to outright stop here for the lower compression levels
231
             */
232
810
            break;
233
810
        }
234
6.62M
#endif
235
6.62M
        GOTO_NEXT_CHAIN;
236
6.62M
    }
237
836
    return best_len;
238
239
#ifdef LONGEST_MATCH_SLOW
240
break_matching:
241
242
    if (best_len < s->lookahead)
243
        return best_len;
244
245
    return s->lookahead;
246
#endif
247
17.7M
}
longest_match_slow_avx2
Line
Count
Source
29
16.0M
Z_INTERNAL uint32_t LONGEST_MATCH(deflate_state *const s, Pos cur_match) {
30
16.0M
    unsigned int strstart = s->strstart;
31
16.0M
    const unsigned wmask = s->w_mask;
32
16.0M
    unsigned char *window = s->window;
33
16.0M
    unsigned char *scan = window + strstart;
34
16.0M
    Z_REGISTER unsigned char *mbase_start = window;
35
16.0M
    Z_REGISTER unsigned char *mbase_end;
36
16.0M
    const Pos *prev = s->prev;
37
16.0M
    Pos limit;
38
16.0M
#ifdef LONGEST_MATCH_SLOW
39
16.0M
    Pos limit_base;
40
#else
41
    int32_t early_exit;
42
#endif
43
16.0M
    uint32_t chain_length, nice_match, best_len, offset;
44
16.0M
    uint32_t lookahead = s->lookahead;
45
16.0M
    Pos match_offset = 0;
46
16.0M
    uint64_t scan_start;
47
16.0M
    uint64_t scan_end;
48
49
16.0M
#define GOTO_NEXT_CHAIN \
50
16.0M
    if (--chain_length && (cur_match = prev[cur_match & wmask]) > limit) \
51
16.0M
        continue; \
52
16.0M
    return best_len;
53
54
    /* The code is optimized for STD_MAX_MATCH-2 multiple of 16. */
55
16.0M
    Assert(STD_MAX_MATCH == 258, "Code too clever");
56
57
16.0M
    best_len = s->prev_length ? s->prev_length : STD_MIN_MATCH-1;
58
59
    /* Calculate read offset which should only extend an extra byte
60
     * to find the next best match length.
61
     */
62
16.0M
    offset = best_len-1;
63
16.0M
    if (best_len >= sizeof(uint32_t)) {
64
1.80M
        offset -= 2;
65
1.80M
        if (best_len >= sizeof(uint64_t))
66
251k
            offset -= 4;
67
1.80M
    }
68
69
16.0M
    scan_start = zng_memread_8(scan);
70
16.0M
    scan_end = zng_memread_8(scan+offset);
71
16.0M
    mbase_end  = (mbase_start+offset);
72
73
    /* Do not waste too much time if we already have a good match */
74
16.0M
    chain_length = s->max_chain_length;
75
16.0M
    if (best_len >= s->good_match)
76
35.7k
        chain_length >>= 2;
77
16.0M
    nice_match = (uint32_t)s->nice_match;
78
79
    /* Stop when cur_match becomes <= limit. To simplify the code,
80
     * we prevent matches with the string of window index 0
81
     */
82
16.0M
    limit = strstart > MAX_DIST(s) ? (Pos)(strstart - MAX_DIST(s)) : 0;
83
16.0M
#ifdef LONGEST_MATCH_SLOW
84
16.0M
    limit_base = limit;
85
16.0M
    if (best_len >= STD_MIN_MATCH) {
86
        /* We're continuing search (lazy evaluation). */
87
3.08M
        uint32_t i, hash;
88
3.08M
        Pos pos;
89
90
        /* Find a most distant chain starting from scan with index=1 (index=0 corresponds
91
         * to cur_match). We cannot use s->prev[strstart+1,...] immediately, because
92
         * these strings are not yet inserted into the hash table.
93
         */
94
3.08M
        hash = s->update_hash(0, scan[1]);
95
3.08M
        hash = s->update_hash(hash, scan[2]);
96
97
13.7M
        for (i = 3; i <= best_len; i++) {
98
10.6M
            hash = s->update_hash(hash, scan[i]);
99
100
            /* If we're starting with best_len >= 3, we can use offset search. */
101
10.6M
            pos = s->head[hash];
102
10.6M
            if (pos < cur_match) {
103
2.69M
                match_offset = (Pos)(i - 2);
104
2.69M
                cur_match = pos;
105
2.69M
            }
106
10.6M
        }
107
108
        /* Update offset-dependent variables */
109
3.08M
        limit = limit_base+match_offset;
110
3.08M
        if (cur_match <= limit)
111
671k
            goto break_matching;
112
2.41M
        mbase_start -= match_offset;
113
2.41M
        mbase_end -= match_offset;
114
2.41M
    }
115
#else
116
    early_exit = s->level < EARLY_EXIT_TRIGGER_LEVEL;
117
#endif
118
15.4M
    Assert((unsigned long)strstart <= s->window_size - MIN_LOOKAHEAD, "need lookahead");
119
21.1M
    for (;;) {
120
21.1M
        if (cur_match >= strstart)
121
0
            break;
122
123
        /* Skip to next match if the match length cannot increase or if the match length is
124
         * less than 2. Note that the checks below for insufficient lookahead only occur
125
         * occasionally for performance reasons.
126
         * Therefore uninitialized memory will be accessed and conditional jumps will be made
127
         * that depend on those values. However the length of the match is limited to the
128
         * lookahead, so the output of deflate is not affected by the uninitialized values.
129
         */
130
21.1M
        if (best_len < sizeof(uint32_t)) {
131
54.7M
            for (;;) {
132
54.7M
                if (zng_memcmp_2(mbase_end+cur_match, &scan_end) == 0 &&
133
10.7M
                    zng_memcmp_2(mbase_start+cur_match, &scan_start) == 0)
134
5.27M
                    break;
135
49.4M
                GOTO_NEXT_CHAIN;
136
49.4M
            }
137
16.8M
        } else if (best_len >= sizeof(uint64_t)) {
138
28.1M
            for (;;) {
139
28.1M
                if (zng_memcmp_8(mbase_end+cur_match, &scan_end) == 0 &&
140
1.94M
                    zng_memcmp_8(mbase_start+cur_match, &scan_start) == 0)
141
1.30M
                    break;
142
26.8M
                GOTO_NEXT_CHAIN;
143
26.8M
            }
144
2.65M
        } else {
145
27.6M
            for (;;) {
146
27.6M
                if (zng_memcmp_4(mbase_end+cur_match, &scan_end) == 0 &&
147
1.79M
                    zng_memcmp_4(mbase_start+cur_match, &scan_start) == 0)
148
382k
                    break;
149
27.2M
                GOTO_NEXT_CHAIN;
150
27.2M
            }
151
2.65M
        }
152
6.96M
        uint32_t len = COMPARE256(scan+2, mbase_start+cur_match+2) + 2;
153
6.96M
        Assert(scan+len <= window+(unsigned)(s->window_size-1), "wild scan");
154
155
6.96M
        if (len > best_len) {
156
6.09M
            uint32_t match_start = cur_match - match_offset;
157
6.09M
            s->match_start = match_start;
158
159
            /* Do not look for matches beyond the end of the input. */
160
6.09M
            if (len > lookahead)
161
361
                return lookahead;
162
6.09M
            best_len = len;
163
6.09M
            if (best_len >= nice_match)
164
30.3k
                return best_len;
165
166
6.06M
            offset = best_len-1;
167
6.06M
            if (best_len >= sizeof(uint32_t)) {
168
2.87M
                offset -= 2;
169
2.87M
                if (best_len >= sizeof(uint64_t))
170
662k
                    offset -= 4;
171
2.87M
            }
172
173
6.06M
            scan_end = zng_memread_8(scan+offset);
174
175
6.06M
#ifdef LONGEST_MATCH_SLOW
176
            /* Look for a better string offset */
177
6.06M
            if (UNLIKELY(len > STD_MIN_MATCH && match_start + len < strstart)) {
178
2.79M
                Pos pos, next_pos;
179
2.79M
                uint32_t i, hash;
180
2.79M
                unsigned char *scan_endstr;
181
182
                /* Go back to offset 0 */
183
2.79M
                cur_match -= match_offset;
184
2.79M
                match_offset = 0;
185
2.79M
                next_pos = cur_match;
186
37.0M
                for (i = 0; i <= len - STD_MIN_MATCH; i++) {
187
35.0M
                    pos = prev[(cur_match + i) & wmask];
188
35.0M
                    if (pos < next_pos) {
189
                        /* Hash chain is more distant, use it */
190
3.85M
                        if (pos <= limit_base + i)
191
789k
                            goto break_matching;
192
3.06M
                        next_pos = pos;
193
3.06M
                        match_offset = (Pos)i;
194
3.06M
                    }
195
35.0M
                }
196
                /* Switch cur_match to next_pos chain */
197
2.00M
                cur_match = next_pos;
198
199
                /* Try hash head at len-(STD_MIN_MATCH-1) position to see if we could get
200
                 * a better cur_match at the end of string. Using (STD_MIN_MATCH-1) lets
201
                 * us include one more byte into hash - the byte which will be checked
202
                 * in main loop now, and which allows to grow match by 1.
203
                 */
204
2.00M
                scan_endstr = scan + len - (STD_MIN_MATCH+1);
205
206
2.00M
                hash = s->update_hash(0, scan_endstr[0]);
207
2.00M
                hash = s->update_hash(hash, scan_endstr[1]);
208
2.00M
                hash = s->update_hash(hash, scan_endstr[2]);
209
210
2.00M
                pos = s->head[hash];
211
2.00M
                if (pos < cur_match) {
212
0
                    match_offset = (Pos)(len - (STD_MIN_MATCH+1));
213
0
                    if (pos <= limit_base + match_offset)
214
0
                        goto break_matching;
215
0
                    cur_match = pos;
216
0
                }
217
218
                /* Update offset-dependent variables */
219
2.00M
                limit = limit_base+match_offset;
220
2.00M
                mbase_start = window-match_offset;
221
2.00M
                mbase_end = (mbase_start+offset);
222
2.00M
                continue;
223
2.00M
            }
224
3.26M
#endif
225
3.26M
            mbase_end = (mbase_start+offset);
226
3.26M
        }
227
#ifndef LONGEST_MATCH_SLOW
228
        else if (UNLIKELY(early_exit)) {
229
            /* The probability of finding a match later if we here is pretty low, so for
230
             * performance it's best to outright stop here for the lower compression levels
231
             */
232
            break;
233
        }
234
#endif
235
4.13M
        GOTO_NEXT_CHAIN;
236
4.13M
    }
237
0
    return best_len;
238
239
0
#ifdef LONGEST_MATCH_SLOW
240
1.46M
break_matching:
241
242
1.46M
    if (best_len < s->lookahead)
243
1.46M
        return best_len;
244
245
204
    return s->lookahead;
246
1.46M
#endif
247
1.46M
}
Unexecuted instantiation: longest_match_avx512
Unexecuted instantiation: longest_match_slow_avx512
248
249
#undef LONGEST_MATCH_SLOW
250
#undef LONGEST_MATCH