/src/zlib-ng/match_tpl.h

Source
/* match_tpl.h -- find longest match template for compare256 variants
 *
 * Copyright (C) 1995-2024 Jean-loup Gailly and Mark Adler
 * For conditions of distribution and use, see copyright notice in zlib.h
 *
 * Portions copyright (C) 2014-2021 Konstantin Nosov
 *  Fast-zlib optimized longest_match
 *  https://github.com/gildor2/fast_zlib
 */

#include "insert_string_p.h"

#define EARLY_EXIT_TRIGGER_LEVEL 5

#define GOTO_NEXT_CHAIN \
    if (--chain_length && (cur_match = prev[cur_match & wmask]) > limit) \
        continue; \
    return best_len;

/* Set match_start to the longest match starting at the given string and
 * return its length. Matches shorter or equal to prev_length are discarded,
 * in which case the result is equal to prev_length and match_start is garbage.
 *
 * IN assertions: cur_match is the head of the hash chain for the current
 * string (strstart) and its distance is <= MAX_DIST, and prev_length >=1
 * OUT assertion: the match length is not greater than s->lookahead
 *
 * The LONGEST_MATCH_SLOW variant spends more time to attempt to find longer
 * matches once a match has already been found.
 */
Z_INTERNAL uint32_t LONGEST_MATCH(deflate_state *const s, uint32_t cur_match) {
    const unsigned wmask = W_MASK(s);
    unsigned int strstart = s->strstart;
    const unsigned char *window = s->window;
    const Pos *prev = s->prev;
#ifdef LONGEST_MATCH_SLOW
    const Pos *head = s->head;
#endif
    const unsigned char *scan;
    const unsigned char *mbase_start = window;
    const unsigned char *mbase_end;
    uint32_t limit;
#ifdef LONGEST_MATCH_SLOW
    uint32_t limit_base;
#else
    int32_t early_exit;
#endif
    uint32_t chain_length = s->max_chain_length;
    uint32_t nice_match = (uint32_t)s->nice_match;
    uint32_t best_len, offset;
    uint32_t lookahead = s->lookahead;
    uint32_t match_offset = 0;
    uint64_t scan_start;
    uint64_t scan_end;

    /* The code is optimized for STD_MAX_MATCH-2 multiple of 16. */
    Assert(STD_MAX_MATCH == 258, "Code too clever");

    scan = window + strstart;
    best_len = s->prev_length ? s->prev_length : STD_MIN_MATCH-1;

    /* Calculate read offset which should only extend an extra byte
     * to find the next best match length.
     */
    offset = best_len-1;
    if (best_len >= sizeof(uint32_t)) {
        offset -= 2;
        if (best_len >= sizeof(uint64_t))
            offset -= 4;
    }

    scan_start = zng_memread_8(scan);
    scan_end = zng_memread_8(scan+offset);
    mbase_end  = (mbase_start+offset);

    /* Do not waste too much time if we already have a good match */
    if (best_len >= s->good_match)
        chain_length >>= 2;

    /* Stop when cur_match becomes <= limit. To simplify the code,
     * we prevent matches with the string of window index 0
     */
    limit = strstart > MAX_DIST(s) ? (strstart - MAX_DIST(s)) : 0;
#ifdef LONGEST_MATCH_SLOW
    limit_base = limit;
    if (best_len >= STD_MIN_MATCH) {
        /* We're continuing search (lazy evaluation). */
        uint32_t hash;
        uint32_t pos;

        /* Find a most distant chain starting from scan with index=1 (index=0 corresponds
         * to cur_match). We cannot use s->prev[strstart+1,...] immediately, because
         * these strings are not yet inserted into the hash table.
         */
        // use update_hash_roll for deflate_slow
        hash = update_hash_roll(0, scan[1]);
        hash = update_hash_roll(hash, scan[2]);

        for (uint32_t i = 3; i <= best_len; i++) {
            // use update_hash_roll for deflate_slow
            hash = update_hash_roll(hash, scan[i]);
            /* If we're starting with best_len >= 3, we can use offset search. */
            pos = head[hash];
            if (pos < cur_match) {
                match_offset = i - 2;
                cur_match = pos;
            }
        }

        /* Update offset-dependent variables */
        limit = limit_base+match_offset;
        if (cur_match <= limit)
            goto break_matching;
        mbase_start -= match_offset;
        mbase_end -= match_offset;
    }
#else
    early_exit = s->level < EARLY_EXIT_TRIGGER_LEVEL;
#endif
    Assert((unsigned long)strstart <= s->window_size - MIN_LOOKAHEAD, "need lookahead");
    for (;;) {
        if (cur_match >= strstart)
            break;

        /* Skip to next match if the match length cannot increase or if the match length is
         * less than 2. Note that the checks below for insufficient lookahead only occur
         * occasionally for performance reasons.
         * Therefore uninitialized memory will be accessed and conditional jumps will be made
         * that depend on those values. However the length of the match is limited to the
         * lookahead, so the output of deflate is not affected by the uninitialized values.
         */
        if (best_len < sizeof(uint32_t)) {
            for (;;) {
                if (zng_memcmp_2(mbase_end+cur_match, &scan_end) == 0 &&
                    zng_memcmp_2(mbase_start+cur_match, &scan_start) == 0)
                    break;
                GOTO_NEXT_CHAIN;
            }
        } else if (best_len >= sizeof(uint64_t)) {
            for (;;) {
                if (zng_memcmp_8(mbase_end+cur_match, &scan_end) == 0 &&
                    zng_memcmp_8(mbase_start+cur_match, &scan_start) == 0)
                    break;
                GOTO_NEXT_CHAIN;
            }
        } else {
            for (;;) {
                if (zng_memcmp_4(mbase_end+cur_match, &scan_end) == 0 &&
                    zng_memcmp_4(mbase_start+cur_match, &scan_start) == 0)
                    break;
                GOTO_NEXT_CHAIN;
            }
        }
        uint32_t len = COMPARE256(scan+2, mbase_start+cur_match+2) + 2;
        Assert(scan+len <= window+(unsigned)(s->window_size-1), "wild scan");

        if (len > best_len) {
            uint32_t match_start = cur_match - match_offset;
            s->match_start = match_start;

            /* Do not look for matches beyond the end of the input. */
            if (len > lookahead)
                return lookahead;
            if (len >= nice_match)
                return len;

            best_len = len;

            offset = best_len-1;
            if (best_len >= sizeof(uint32_t)) {
                offset -= 2;
                if (best_len >= sizeof(uint64_t))
                    offset -= 4;
            }

            scan_end = zng_memread_8(scan+offset);

#ifdef LONGEST_MATCH_SLOW
            /* Look for a better string offset */
            if (UNLIKELY(len > STD_MIN_MATCH && match_start + len < strstart)) {
                const unsigned char *scan_endstr;
                uint32_t hash;
                uint32_t pos, next_pos;

                /* Go back to offset 0 */
                cur_match -= match_offset;
                match_offset = 0;
                next_pos = cur_match;
                for (uint32_t i = 0; i <= len - STD_MIN_MATCH; i++) {
                    pos = prev[(cur_match + i) & wmask];
                    if (pos < next_pos) {
                        /* Hash chain is more distant, use it */
                        if (pos <= limit_base + i)
                            goto break_matching;
                        next_pos = pos;
                        match_offset = i;
                    }
                }
                /* Switch cur_match to next_pos chain */
                cur_match = next_pos;

                /* Try hash head at len-(STD_MIN_MATCH-1) position to see if we could get
                 * a better cur_match at the end of string. Using (STD_MIN_MATCH-1) lets
                 * us include one more byte into hash - the byte which will be checked
                 * in main loop now, and which allows to grow match by 1.
                 */
                scan_endstr = scan + len - (STD_MIN_MATCH+1);

                // use update_hash_roll for deflate_slow
                hash = update_hash_roll(0, scan_endstr[0]);
                hash = update_hash_roll(hash, scan_endstr[1]);
                hash = update_hash_roll(hash, scan_endstr[2]);

                pos = head[hash];
                if (pos < cur_match) {
                    match_offset = len - (STD_MIN_MATCH+1);
                    if (pos <= limit_base + match_offset)
                        goto break_matching;
                    cur_match = pos;
                }

                /* Update offset-dependent variables */
                limit = limit_base+match_offset;
                mbase_start = window-match_offset;
                mbase_end = (mbase_start+offset);
                continue;
            }
#endif
            mbase_end = (mbase_start+offset);
        }
#ifndef LONGEST_MATCH_SLOW
        else if (UNLIKELY(early_exit)) {
            /* The probability of finding a match later if we here is pretty low, so for
             * performance it's best to outright stop here for the lower compression levels
             */
            break;
        }
#endif
        GOTO_NEXT_CHAIN;
    }
    return best_len;

#ifdef LONGEST_MATCH_SLOW
break_matching:

    if (best_len < lookahead)
        return best_len;

    return lookahead;
#endif
}

#undef LONGEST_MATCH_SLOW
#undef LONGEST_MATCH

Coverage Report

Created: 2025-12-28 06:36

Line	Count	Source
1		/* match_tpl.h -- find longest match template for compare256 variants
2		*
3		* Copyright (C) 1995-2024 Jean-loup Gailly and Mark Adler
4		* For conditions of distribution and use, see copyright notice in zlib.h
5		*
6		* Portions copyright (C) 2014-2021 Konstantin Nosov
7		* Fast-zlib optimized longest_match
8		* https://github.com/gildor2/fast_zlib
9		*/
10
11		#include "insert_string_p.h"
12
13	0	#define EARLY_EXIT_TRIGGER_LEVEL 5
14
15		#define GOTO_NEXT_CHAIN \
16	0	if (--chain_length && (cur_match = prev[cur_match & wmask]) > limit) \
17	0	continue; \
18	0	return best_len;
19
20		/* Set match_start to the longest match starting at the given string and
21		* return its length. Matches shorter or equal to prev_length are discarded,
22		* in which case the result is equal to prev_length and match_start is garbage.
23		*
24		* IN assertions: cur_match is the head of the hash chain for the current
25		* string (strstart) and its distance is <= MAX_DIST, and prev_length >=1
26		* OUT assertion: the match length is not greater than s->lookahead
27		*
28		* The LONGEST_MATCH_SLOW variant spends more time to attempt to find longer
29		* matches once a match has already been found.
30		*/
31	0	Z_INTERNAL uint32_t LONGEST_MATCH(deflate_state *const s, uint32_t cur_match) {
32	0	const unsigned wmask = W_MASK(s);
33	0	unsigned int strstart = s->strstart;
34	0	const unsigned char *window = s->window;
35	0	const Pos *prev = s->prev;
36		#ifdef LONGEST_MATCH_SLOW
37		const Pos *head = s->head;
38		#endif
39	0	const unsigned char *scan;
40	0	const unsigned char *mbase_start = window;
41	0	const unsigned char *mbase_end;
42	0	uint32_t limit;
43		#ifdef LONGEST_MATCH_SLOW
44		uint32_t limit_base;
45		#else
46		int32_t early_exit;
47		#endif
48	0	uint32_t chain_length = s->max_chain_length;
49	0	uint32_t nice_match = (uint32_t)s->nice_match;
50	0	uint32_t best_len, offset;
51	0	uint32_t lookahead = s->lookahead;
52	0	uint32_t match_offset = 0;
53	0	uint64_t scan_start;
54	0	uint64_t scan_end;
55
56		/* The code is optimized for STD_MAX_MATCH-2 multiple of 16. */
57	0	Assert(STD_MAX_MATCH == 258, "Code too clever");
58
59	0	scan = window + strstart;
60	0	best_len = s->prev_length ? s->prev_length : STD_MIN_MATCH-1;
61
62		/* Calculate read offset which should only extend an extra byte
63		* to find the next best match length.
64		*/
65	0	offset = best_len-1;
66	0	if (best_len >= sizeof(uint32_t)) {
67	0	offset -= 2;
68	0	if (best_len >= sizeof(uint64_t))
69	0	offset -= 4;
70	0	}
71
72	0	scan_start = zng_memread_8(scan);
73	0	scan_end = zng_memread_8(scan+offset);
74	0	mbase_end = (mbase_start+offset);
75
76		/* Do not waste too much time if we already have a good match */
77	0	if (best_len >= s->good_match)
78	0	chain_length >>= 2;
79
80		/* Stop when cur_match becomes <= limit. To simplify the code,
81		* we prevent matches with the string of window index 0
82		*/
83	0	limit = strstart > MAX_DIST(s) ? (strstart - MAX_DIST(s)) : 0;
84		#ifdef LONGEST_MATCH_SLOW
85		limit_base = limit;
86	0	if (best_len >= STD_MIN_MATCH) {
87		/* We're continuing search (lazy evaluation). */
88	0	uint32_t hash;
89	0	uint32_t pos;
90
91		/* Find a most distant chain starting from scan with index=1 (index=0 corresponds
92		* to cur_match). We cannot use s->prev[strstart+1,...] immediately, because
93		* these strings are not yet inserted into the hash table.
94		*/
95		// use update_hash_roll for deflate_slow
96		hash = update_hash_roll(0, scan[1]);
97		hash = update_hash_roll(hash, scan[2]);
98
99	0	for (uint32_t i = 3; i <= best_len; i++) {
100		// use update_hash_roll for deflate_slow
101	0	hash = update_hash_roll(hash, scan[i]);
102		/* If we're starting with best_len >= 3, we can use offset search. */
103	0	pos = head[hash];
104	0	if (pos < cur_match) {
105	0	match_offset = i - 2;
106	0	cur_match = pos;
107	0	}
108	0	}
109
110		/* Update offset-dependent variables */
111	0	limit = limit_base+match_offset;
112	0	if (cur_match <= limit)
113	0	goto break_matching;
114	0	mbase_start -= match_offset;
115	0	mbase_end -= match_offset;
116	0	}
117		#else
118	0	early_exit = s->level < EARLY_EXIT_TRIGGER_LEVEL;
119		#endif
120	0	Assert((unsigned long)strstart <= s->window_size - MIN_LOOKAHEAD, "need lookahead");
121	0	for (;;) {
122	0	if (cur_match >= strstart)
123	0	break;
124
125		/* Skip to next match if the match length cannot increase or if the match length is
126		* less than 2. Note that the checks below for insufficient lookahead only occur
127		* occasionally for performance reasons.
128		* Therefore uninitialized memory will be accessed and conditional jumps will be made
129		* that depend on those values. However the length of the match is limited to the
130		* lookahead, so the output of deflate is not affected by the uninitialized values.
131		*/
132	0	if (best_len < sizeof(uint32_t)) {
133	0	for (;;) {
134	0	if (zng_memcmp_2(mbase_end+cur_match, &scan_end) == 0 &&
135	0	zng_memcmp_2(mbase_start+cur_match, &scan_start) == 0)
136	0	break;
137	0	GOTO_NEXT_CHAIN;
138	0	}
139	0	} else if (best_len >= sizeof(uint64_t)) {
140	0	for (;;) {
141	0	if (zng_memcmp_8(mbase_end+cur_match, &scan_end) == 0 &&
142	0	zng_memcmp_8(mbase_start+cur_match, &scan_start) == 0)
143	0	break;
144	0	GOTO_NEXT_CHAIN;
145	0	}
146	0	} else {
147	0	for (;;) {
148	0	if (zng_memcmp_4(mbase_end+cur_match, &scan_end) == 0 &&
149	0	zng_memcmp_4(mbase_start+cur_match, &scan_start) == 0)
150	0	break;
151	0	GOTO_NEXT_CHAIN;
152	0	}
153	0	}
154	0	uint32_t len = COMPARE256(scan+2, mbase_start+cur_match+2) + 2;
155	0	Assert(scan+len <= window+(unsigned)(s->window_size-1), "wild scan");
156
157	0	if (len > best_len) {
158	0	uint32_t match_start = cur_match - match_offset;
159	0	s->match_start = match_start;
160
161		/* Do not look for matches beyond the end of the input. */
162	0	if (len > lookahead)
163	0	return lookahead;
164	0	if (len >= nice_match)
165	0	return len;
166
167	0	best_len = len;
168
169	0	offset = best_len-1;
170	0	if (best_len >= sizeof(uint32_t)) {
171	0	offset -= 2;
172	0	if (best_len >= sizeof(uint64_t))
173	0	offset -= 4;
174	0	}
175
176	0	scan_end = zng_memread_8(scan+offset);
177
178		#ifdef LONGEST_MATCH_SLOW
179		/* Look for a better string offset */
180	0	if (UNLIKELY(len > STD_MIN_MATCH && match_start + len < strstart)) {
181	0	const unsigned char *scan_endstr;
182	0	uint32_t hash;
183	0	uint32_t pos, next_pos;
184
185		/* Go back to offset 0 */
186		cur_match -= match_offset;
187		match_offset = 0;
188		next_pos = cur_match;
189	0	for (uint32_t i = 0; i <= len - STD_MIN_MATCH; i++) {
190	0	pos = prev[(cur_match + i) & wmask];
191	0	if (pos < next_pos) {
192		/* Hash chain is more distant, use it */
193	0	if (pos <= limit_base + i)
194	0	goto break_matching;
195	0	next_pos = pos;
196	0	match_offset = i;
197	0	}
198	0	}
199		/* Switch cur_match to next_pos chain */
200	0	cur_match = next_pos;
201
202		/* Try hash head at len-(STD_MIN_MATCH-1) position to see if we could get
203		* a better cur_match at the end of string. Using (STD_MIN_MATCH-1) lets
204		* us include one more byte into hash - the byte which will be checked
205		* in main loop now, and which allows to grow match by 1.
206		*/
207	0	scan_endstr = scan + len - (STD_MIN_MATCH+1);
208
209		// use update_hash_roll for deflate_slow
210	0	hash = update_hash_roll(0, scan_endstr[0]);
211	0	hash = update_hash_roll(hash, scan_endstr[1]);
212	0	hash = update_hash_roll(hash, scan_endstr[2]);
213
214	0	pos = head[hash];
215	0	if (pos < cur_match) {
216	0	match_offset = len - (STD_MIN_MATCH+1);
217	0	if (pos <= limit_base + match_offset)
218	0	goto break_matching;
219	0	cur_match = pos;
220	0	}
221
222		/* Update offset-dependent variables */
223	0	limit = limit_base+match_offset;
224	0	mbase_start = window-match_offset;
225	0	mbase_end = (mbase_start+offset);
226	0	continue;
227	0	}
228	0	#endif
229	0	mbase_end = (mbase_start+offset);
230	0	}
231		#ifndef LONGEST_MATCH_SLOW
232	0	else if (UNLIKELY(early_exit)) {
233		/* The probability of finding a match later if we here is pretty low, so for
234		* performance it's best to outright stop here for the lower compression levels
235		*/
236	0	break;
237	0	}
238	0	#endif
239	0	GOTO_NEXT_CHAIN;
240	0	}
241	0	return best_len;
242
243		#ifdef LONGEST_MATCH_SLOW
244	0	break_matching:
245
246	0	if (best_len < lookahead)
247	0	return best_len;
248
249	0	return lookahead;
250		#endif
251	0	} Unexecuted instantiation: longest_match_sse2 Unexecuted instantiation: longest_match_slow_sse2 Unexecuted instantiation: longest_match_avx2 Unexecuted instantiation: longest_match_slow_avx2 Unexecuted instantiation: longest_match_avx512 Unexecuted instantiation: longest_match_slow_avx512
252
253		#undef LONGEST_MATCH_SLOW
254		#undef LONGEST_MATCH