/src/libdeflate/lib/adler32.c
Line | Count | Source (jump to first uncovered line) |
1 | | /* |
2 | | * adler32.c - Adler-32 checksum algorithm |
3 | | * |
4 | | * Copyright 2016 Eric Biggers |
5 | | * |
6 | | * Permission is hereby granted, free of charge, to any person |
7 | | * obtaining a copy of this software and associated documentation |
8 | | * files (the "Software"), to deal in the Software without |
9 | | * restriction, including without limitation the rights to use, |
10 | | * copy, modify, merge, publish, distribute, sublicense, and/or sell |
11 | | * copies of the Software, and to permit persons to whom the |
12 | | * Software is furnished to do so, subject to the following |
13 | | * conditions: |
14 | | * |
15 | | * The above copyright notice and this permission notice shall be |
16 | | * included in all copies or substantial portions of the Software. |
17 | | * |
18 | | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, |
19 | | * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES |
20 | | * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND |
21 | | * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT |
22 | | * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, |
23 | | * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING |
24 | | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR |
25 | | * OTHER DEALINGS IN THE SOFTWARE. |
26 | | */ |
27 | | |
28 | | #include "lib_common.h" |
29 | | |
30 | | /* The Adler-32 divisor, or "base", value */ |
31 | 7.94k | #define DIVISOR 65521 |
32 | | |
33 | | /* |
34 | | * MAX_CHUNK_LEN is the most bytes that can be processed without the possibility |
35 | | * of s2 overflowing when it is represented as an unsigned 32-bit integer. This |
36 | | * value was computed using the following Python script: |
37 | | * |
38 | | * divisor = 65521 |
39 | | * count = 0 |
40 | | * s1 = divisor - 1 |
41 | | * s2 = divisor - 1 |
42 | | * while True: |
43 | | * s1 += 0xFF |
44 | | * s2 += s1 |
45 | | * if s2 > 0xFFFFFFFF: |
46 | | * break |
47 | | * count += 1 |
48 | | * print(count) |
49 | | * |
50 | | * Note that to get the correct worst-case value, we must assume that every byte |
51 | | * has value 0xFF and that s1 and s2 started with the highest possible values |
52 | | * modulo the divisor. |
53 | | */ |
54 | | #define MAX_CHUNK_LEN 5552 |
55 | | |
56 | | /* |
57 | | * Update the Adler-32 values s1 and s2 using n bytes from p, update p to p + n, |
58 | | * update n to 0, and reduce s1 and s2 mod DIVISOR. It is assumed that neither |
59 | | * s1 nor s2 can overflow before the reduction at the end, i.e. n plus any bytes |
60 | | * already processed after the last reduction must not exceed MAX_CHUNK_LEN. |
61 | | * |
62 | | * This uses only portable C code. This is used as a fallback when a vectorized |
63 | | * implementation of Adler-32 (e.g. AVX2) is unavailable on the platform. |
64 | | * |
65 | | * Some of the vectorized implementations also use this to handle the end of the |
66 | | * data when the data isn't evenly divisible by the length the vectorized code |
67 | | * works on. To avoid compiler errors about target-specific option mismatches |
68 | | * when this is used in that way, this is a macro rather than a function. |
69 | | * |
70 | | * Although this is unvectorized, this does include an optimization where the |
71 | | * main loop processes four bytes at a time using a strategy similar to that |
72 | | * used by vectorized implementations. This provides increased instruction- |
73 | | * level parallelism compared to the traditional 's1 += *p++; s2 += s1;'. |
74 | | */ |
75 | 3.93k | #define ADLER32_CHUNK(s1, s2, p, n) \ |
76 | 3.93k | do { \ |
77 | 3.93k | if (n >= 4) { \ |
78 | 276 | u32 s1_sum = 0; \ |
79 | 276 | u32 byte_0_sum = 0; \ |
80 | 276 | u32 byte_1_sum = 0; \ |
81 | 276 | u32 byte_2_sum = 0; \ |
82 | 276 | u32 byte_3_sum = 0; \ |
83 | 276 | \ |
84 | 2.33k | do { \ |
85 | 2.33k | s1_sum += s1; \ |
86 | 2.33k | s1 += p[0] + p[1] + p[2] + p[3]; \ |
87 | 2.33k | byte_0_sum += p[0]; \ |
88 | 2.33k | byte_1_sum += p[1]; \ |
89 | 2.33k | byte_2_sum += p[2]; \ |
90 | 2.33k | byte_3_sum += p[3]; \ |
91 | 2.33k | p += 4; \ |
92 | 2.33k | n -= 4; \ |
93 | 2.33k | } while (n >= 4); \ |
94 | 276 | s2 += (4 * (s1_sum + byte_0_sum)) + (3 * byte_1_sum) + \ |
95 | 276 | (2 * byte_2_sum) + byte_3_sum; \ |
96 | 276 | } \ |
97 | 4.00k | for (; n; n--, p++) { \ |
98 | 69 | s1 += *p; \ |
99 | 69 | s2 += s1; \ |
100 | 69 | } \ |
101 | 3.93k | s1 %= DIVISOR; \ |
102 | 3.93k | s2 %= DIVISOR; \ |
103 | 3.93k | } while (0) |
104 | | |
105 | | static u32 MAYBE_UNUSED |
106 | | adler32_generic(u32 adler, const u8 *p, size_t len) |
107 | 0 | { |
108 | 0 | u32 s1 = adler & 0xFFFF; |
109 | 0 | u32 s2 = adler >> 16; |
110 | |
|
111 | 0 | while (len) { |
112 | 0 | size_t n = MIN(len, MAX_CHUNK_LEN & ~3); |
113 | |
|
114 | 0 | len -= n; |
115 | 0 | ADLER32_CHUNK(s1, s2, p, n); |
116 | 0 | } |
117 | |
|
118 | 0 | return (s2 << 16) | s1; |
119 | 0 | } |
120 | | |
121 | | /* Include architecture-specific implementation(s) if available. */ |
122 | | #undef DEFAULT_IMPL |
123 | | #undef arch_select_adler32_func |
124 | | typedef u32 (*adler32_func_t)(u32 adler, const u8 *p, size_t len); |
125 | | #if defined(ARCH_ARM32) || defined(ARCH_ARM64) |
126 | | # include "arm/adler32_impl.h" |
127 | | #elif defined(ARCH_X86_32) || defined(ARCH_X86_64) |
128 | | # include "x86/adler32_impl.h" |
129 | | #endif |
130 | | |
131 | | #ifndef DEFAULT_IMPL |
132 | 0 | # define DEFAULT_IMPL adler32_generic |
133 | | #endif |
134 | | |
135 | | #ifdef arch_select_adler32_func |
136 | | static u32 dispatch_adler32(u32 adler, const u8 *p, size_t len); |
137 | | |
138 | | static volatile adler32_func_t adler32_impl = dispatch_adler32; |
139 | | |
140 | | /* Choose the best implementation at runtime. */ |
141 | | static u32 dispatch_adler32(u32 adler, const u8 *p, size_t len) |
142 | 6 | { |
143 | 6 | adler32_func_t f = arch_select_adler32_func(); |
144 | | |
145 | 6 | if (f == NULL) |
146 | 0 | f = DEFAULT_IMPL; |
147 | | |
148 | 6 | adler32_impl = f; |
149 | 6 | return f(adler, p, len); |
150 | 6 | } |
151 | | #else |
152 | | /* The best implementation is statically known, so call it directly. */ |
153 | | #define adler32_impl DEFAULT_IMPL |
154 | | #endif |
155 | | |
156 | | LIBDEFLATEAPI u32 |
157 | | libdeflate_adler32(u32 adler, const void *buffer, size_t len) |
158 | 1.04k | { |
159 | 1.04k | if (buffer == NULL) /* Return initial value. */ |
160 | 0 | return 1; |
161 | 1.04k | return adler32_impl(adler, buffer, len); |
162 | 1.04k | } |