/src/zlib-ng/compare256_rle.h
Line | Count | Source (jump to first uncovered line) |
1 | | /* compare256_rle.h -- 256 byte run-length encoding comparison |
2 | | * Copyright (C) 2022 Nathan Moinvaziri |
3 | | * For conditions of distribution and use, see copyright notice in zlib.h |
4 | | */ |
5 | | |
6 | | #include "zbuild.h" |
7 | | #include "zmemory.h" |
8 | | #include "fallback_builtins.h" |
9 | | |
10 | | typedef uint32_t (*compare256_rle_func)(const uint8_t* src0, const uint8_t* src1); |
11 | | |
12 | | /* 8-bit integer comparison */ |
13 | 0 | static inline uint32_t compare256_rle_8(const uint8_t *src0, const uint8_t *src1) { |
14 | 0 | uint32_t len = 0; |
15 | 0 |
|
16 | 0 | do { |
17 | 0 | if (*src0 != *src1) |
18 | 0 | return len; |
19 | 0 | src1 += 1, len += 1; |
20 | 0 | if (*src0 != *src1) |
21 | 0 | return len; |
22 | 0 | src1 += 1, len += 1; |
23 | 0 | if (*src0 != *src1) |
24 | 0 | return len; |
25 | 0 | src1 += 1, len += 1; |
26 | 0 | if (*src0 != *src1) |
27 | 0 | return len; |
28 | 0 | src1 += 1, len += 1; |
29 | 0 | if (*src0 != *src1) |
30 | 0 | return len; |
31 | 0 | src1 += 1, len += 1; |
32 | 0 | if (*src0 != *src1) |
33 | 0 | return len; |
34 | 0 | src1 += 1, len += 1; |
35 | 0 | if (*src0 != *src1) |
36 | 0 | return len; |
37 | 0 | src1 += 1, len += 1; |
38 | 0 | if (*src0 != *src1) |
39 | 0 | return len; |
40 | 0 | src1 += 1, len += 1; |
41 | 0 | } while (len < 256); |
42 | 0 |
|
43 | 0 | return 256; |
44 | 0 | } |
45 | | |
46 | | /* 16-bit integer comparison */ |
47 | 0 | static inline uint32_t compare256_rle_16(const uint8_t *src0, const uint8_t *src1) { |
48 | 0 | uint32_t len = 0; |
49 | 0 | uint16_t src0_cmp; |
50 | 0 |
|
51 | 0 | src0_cmp = zng_memread_2(src0); |
52 | 0 |
|
53 | 0 | do { |
54 | 0 | if (src0_cmp != zng_memread_2(src1)) |
55 | 0 | return len + (*src0 == *src1); |
56 | 0 | src1 += 2, len += 2; |
57 | 0 | if (src0_cmp != zng_memread_2(src1)) |
58 | 0 | return len + (*src0 == *src1); |
59 | 0 | src1 += 2, len += 2; |
60 | 0 | if (src0_cmp != zng_memread_2(src1)) |
61 | 0 | return len + (*src0 == *src1); |
62 | 0 | src1 += 2, len += 2; |
63 | 0 | if (src0_cmp != zng_memread_2(src1)) |
64 | 0 | return len + (*src0 == *src1); |
65 | 0 | src1 += 2, len += 2; |
66 | 0 | } while (len < 256); |
67 | 0 |
|
68 | 0 | return 256; |
69 | 0 | } |
70 | | |
71 | | #ifdef HAVE_BUILTIN_CTZ |
72 | | /* 32-bit integer comparison */ |
73 | 0 | static inline uint32_t compare256_rle_32(const uint8_t *src0, const uint8_t *src1) { |
74 | 0 | uint32_t sv, len = 0; |
75 | 0 | uint16_t src0_cmp; |
76 | 0 |
|
77 | 0 | src0_cmp = zng_memread_2(src0); |
78 | 0 | sv = ((uint32_t)src0_cmp << 16) | src0_cmp; |
79 | 0 |
|
80 | 0 | do { |
81 | 0 | uint32_t mv, diff; |
82 | 0 |
|
83 | 0 | mv = zng_memread_4(src1); |
84 | 0 |
|
85 | 0 | diff = sv ^ mv; |
86 | 0 | if (diff) { |
87 | 0 | #if BYTE_ORDER == LITTLE_ENDIAN |
88 | 0 | uint32_t match_byte = __builtin_ctz(diff) / 8; |
89 | 0 | #else |
90 | 0 | uint32_t match_byte = __builtin_clz(diff) / 8; |
91 | 0 | #endif |
92 | 0 | return len + match_byte; |
93 | 0 | } |
94 | 0 |
|
95 | 0 | src1 += 4, len += 4; |
96 | 0 | } while (len < 256); |
97 | 0 |
|
98 | 0 | return 256; |
99 | 0 | } |
100 | | #endif |
101 | | |
102 | | #ifdef HAVE_BUILTIN_CTZLL |
103 | | /* 64-bit integer comparison */ |
104 | 1.60M | static inline uint32_t compare256_rle_64(const uint8_t *src0, const uint8_t *src1) { |
105 | 1.60M | uint32_t src0_cmp32, len = 0; |
106 | 1.60M | uint16_t src0_cmp; |
107 | 1.60M | uint64_t sv; |
108 | | |
109 | 1.60M | src0_cmp = zng_memread_2(src0); |
110 | 1.60M | src0_cmp32 = ((uint32_t)src0_cmp << 16) | src0_cmp; |
111 | 1.60M | sv = ((uint64_t)src0_cmp32 << 32) | src0_cmp32; |
112 | | |
113 | 2.72M | do { |
114 | 2.72M | uint64_t mv, diff; |
115 | | |
116 | 2.72M | mv = zng_memread_8(src1); |
117 | | |
118 | 2.72M | diff = sv ^ mv; |
119 | 2.72M | if (diff) { |
120 | 1.58M | #if BYTE_ORDER == LITTLE_ENDIAN |
121 | 1.58M | uint64_t match_byte = __builtin_ctzll(diff) / 8; |
122 | | #else |
123 | | uint64_t match_byte = __builtin_clzll(diff) / 8; |
124 | | #endif |
125 | 1.58M | return len + (uint32_t)match_byte; |
126 | 1.58M | } |
127 | | |
128 | 1.14M | src1 += 8, len += 8; |
129 | 1.14M | } while (len < 256); |
130 | | |
131 | 18.7k | return 256; |
132 | 1.60M | } |
133 | | #endif |