Line | Count | Source |
1 | | /* adler32_p.h -- Private inline functions and macros shared with |
2 | | * different computation of the Adler-32 checksum |
3 | | * of a data stream. |
4 | | * Copyright (C) 1995-2011, 2016 Mark Adler |
5 | | * For conditions of distribution and use, see copyright notice in zlib.h |
6 | | */ |
7 | | |
8 | | #ifndef ADLER32_P_H |
9 | | #define ADLER32_P_H |
10 | | |
11 | 613M | #define BASE 65521U /* largest prime smaller than 65536 */ |
12 | 1.17M | #define NMAX 5552 |
13 | | /* NMAX is the largest n such that 255n(n+1)/2 + (n+1)(BASE-1) <= 2^32-1 */ |
14 | | #define NMAX_ALIGNED32 (NMAX & ~31) |
15 | | /* NMAX rounded down to a multiple of 32 is 5536 */ |
16 | | |
17 | 312M | #define ADLER_DO1(sum1, sum2, buf, i) {(sum1) += buf[(i)]; (sum2) += (sum1);} |
18 | 4.05M | #define ADLER_DO2(sum1, sum2, buf, i) {ADLER_DO1(sum1, sum2, buf, i); ADLER_DO1(sum1, sum2, buf, i+1);} |
19 | 1.76M | #define ADLER_DO4(sum1, sum2, buf, i) {ADLER_DO2(sum1, sum2, buf, i); ADLER_DO2(sum1, sum2, buf, i+2);} |
20 | 0 | #define ADLER_DO8(sum1, sum2, buf, i) {ADLER_DO4(sum1, sum2, buf, i); ADLER_DO4(sum1, sum2, buf, i+4);} |
21 | 0 | #define ADLER_DO16(sum1, sum2, buf) {ADLER_DO8(sum1, sum2, buf, 0); ADLER_DO8(sum1, sum2, buf, 8);} |
22 | | |
23 | | Z_FORCEINLINE static void adler32_copy_align(uint32_t *Z_RESTRICT adler, uint8_t *dst, const uint8_t *buf, size_t len, |
24 | 0 | uint32_t *Z_RESTRICT sum2, const int MAX_LEN, const int COPY) { |
25 | 0 | Z_UNUSED(MAX_LEN); |
26 | 0 | if (len & 1) { |
27 | 0 | if (COPY) { |
28 | 0 | *dst = *buf; |
29 | 0 | dst += 1; |
30 | 0 | } |
31 | 0 | ADLER_DO1(*adler, *sum2, buf, 0); |
32 | 0 | buf += 1; |
33 | 0 | } |
34 | 0 | if (len & 2) { |
35 | 0 | if (COPY) { |
36 | 0 | memcpy(dst, buf, 2); |
37 | 0 | dst += 2; |
38 | 0 | } |
39 | 0 | ADLER_DO2(*adler, *sum2, buf, 0); |
40 | 0 | buf += 2; |
41 | 0 | } |
42 | 0 | while (len >= 4) { |
43 | 0 | if (COPY) { |
44 | 0 | memcpy(dst, buf, 4); |
45 | 0 | dst += 4; |
46 | 0 | } |
47 | 0 | len -= 4; |
48 | 0 | ADLER_DO4(*adler, *sum2, buf, 0); |
49 | 0 | buf += 4; |
50 | 0 | } |
51 | 0 | } Unexecuted instantiation: adler32_ssse3.c:adler32_copy_align Unexecuted instantiation: adler32_sse42.c:adler32_copy_align Unexecuted instantiation: adler32_avx2.c:adler32_copy_align Unexecuted instantiation: adler32_avx512.c:adler32_copy_align Unexecuted instantiation: adler32_avx512_vnni.c:adler32_copy_align Unexecuted instantiation: adler32_c.c:adler32_copy_align Unexecuted instantiation: adler32.c:adler32_copy_align |
52 | | |
53 | | Z_FORCEINLINE static uint32_t adler32_copy_tail(uint32_t adler, uint8_t *dst, const uint8_t *buf, size_t len, |
54 | 305M | uint32_t sum2, const int REBASE, const int MAX_LEN, const int COPY) { |
55 | 305M | if (len) { |
56 | | /* DO16 loop for large remainders only (scalar, risc-v). */ |
57 | 305M | if (MAX_LEN >= 32) { |
58 | 0 | while (len >= 16) { |
59 | 0 | if (COPY) { |
60 | 0 | memcpy(dst, buf, 16); |
61 | 0 | dst += 16; |
62 | 0 | } |
63 | 0 | len -= 16; |
64 | 0 | ADLER_DO16(adler, sum2, buf); |
65 | 0 | buf += 16; |
66 | 0 | } |
67 | 0 | } |
68 | | /* DO4 loop avoids GCC x86 register pressure from hoisted DO8/DO16 loads. */ |
69 | 306M | while (len >= 4) { |
70 | 1.76M | if (COPY) { |
71 | 67.6k | memcpy(dst, buf, 4); |
72 | 67.6k | dst += 4; |
73 | 67.6k | } |
74 | 1.76M | len -= 4; |
75 | 1.76M | ADLER_DO4(adler, sum2, buf, 0); |
76 | 1.76M | buf += 4; |
77 | 1.76M | } |
78 | 305M | if (len & 2) { |
79 | 521k | if (COPY) { |
80 | 29.1k | memcpy(dst, buf, 2); |
81 | 29.1k | dst += 2; |
82 | 29.1k | } |
83 | 521k | ADLER_DO2(adler, sum2, buf, 0); |
84 | 521k | buf += 2; |
85 | 521k | } |
86 | 305M | if (len & 1) { |
87 | 304M | if (COPY) |
88 | 302M | *dst = *buf; |
89 | 304M | ADLER_DO1(adler, sum2, buf, 0); |
90 | 304M | } |
91 | 305M | } |
92 | 305M | if (REBASE) { |
93 | 305M | adler %= BASE; |
94 | 305M | sum2 %= BASE; |
95 | 305M | } |
96 | | /* D = B * 65536 + A, see: https://en.wikipedia.org/wiki/Adler-32. */ |
97 | 305M | return adler | (sum2 << 16); |
98 | 305M | } adler32_ssse3.c:adler32_copy_tail Line | Count | Source | 54 | 588k | uint32_t sum2, const int REBASE, const int MAX_LEN, const int COPY) { | 55 | 588k | if (len) { | 56 | | /* DO16 loop for large remainders only (scalar, risc-v). */ | 57 | 511k | if (MAX_LEN >= 32) { | 58 | 0 | while (len >= 16) { | 59 | 0 | if (COPY) { | 60 | 0 | memcpy(dst, buf, 16); | 61 | 0 | dst += 16; | 62 | 0 | } | 63 | 0 | len -= 16; | 64 | 0 | ADLER_DO16(adler, sum2, buf); | 65 | 0 | buf += 16; | 66 | 0 | } | 67 | 0 | } | 68 | | /* DO4 loop avoids GCC x86 register pressure from hoisted DO8/DO16 loads. */ | 69 | 1.95M | while (len >= 4) { | 70 | 1.44M | if (COPY) { | 71 | 0 | memcpy(dst, buf, 4); | 72 | 0 | dst += 4; | 73 | 0 | } | 74 | 1.44M | len -= 4; | 75 | 1.44M | ADLER_DO4(adler, sum2, buf, 0); | 76 | 1.44M | buf += 4; | 77 | 1.44M | } | 78 | 511k | if (len & 2) { | 79 | 397k | if (COPY) { | 80 | 0 | memcpy(dst, buf, 2); | 81 | 0 | dst += 2; | 82 | 0 | } | 83 | 397k | ADLER_DO2(adler, sum2, buf, 0); | 84 | 397k | buf += 2; | 85 | 397k | } | 86 | 511k | if (len & 1) { | 87 | 249k | if (COPY) | 88 | 0 | *dst = *buf; | 89 | 249k | ADLER_DO1(adler, sum2, buf, 0); | 90 | 249k | } | 91 | 511k | } | 92 | 588k | if (REBASE) { | 93 | 511k | adler %= BASE; | 94 | 511k | sum2 %= BASE; | 95 | 511k | } | 96 | | /* D = B * 65536 + A, see: https://en.wikipedia.org/wiki/Adler-32. */ | 97 | 588k | return adler | (sum2 << 16); | 98 | 588k | } |
adler32_sse42.c:adler32_copy_tail Line | Count | Source | 54 | 19.8k | uint32_t sum2, const int REBASE, const int MAX_LEN, const int COPY) { | 55 | 19.8k | if (len) { | 56 | | /* DO16 loop for large remainders only (scalar, risc-v). */ | 57 | 19.8k | if (MAX_LEN >= 32) { | 58 | 0 | while (len >= 16) { | 59 | 0 | if (COPY) { | 60 | 0 | memcpy(dst, buf, 16); | 61 | 0 | dst += 16; | 62 | 0 | } | 63 | 0 | len -= 16; | 64 | 0 | ADLER_DO16(adler, sum2, buf); | 65 | 0 | buf += 16; | 66 | 0 | } | 67 | 0 | } | 68 | | /* DO4 loop avoids GCC x86 register pressure from hoisted DO8/DO16 loads. */ | 69 | 51.5k | while (len >= 4) { | 70 | 31.7k | if (COPY) { | 71 | 31.7k | memcpy(dst, buf, 4); | 72 | 31.7k | dst += 4; | 73 | 31.7k | } | 74 | 31.7k | len -= 4; | 75 | 31.7k | ADLER_DO4(adler, sum2, buf, 0); | 76 | 31.7k | buf += 4; | 77 | 31.7k | } | 78 | 19.8k | if (len & 2) { | 79 | 11.4k | if (COPY) { | 80 | 11.4k | memcpy(dst, buf, 2); | 81 | 11.4k | dst += 2; | 82 | 11.4k | } | 83 | 11.4k | ADLER_DO2(adler, sum2, buf, 0); | 84 | 11.4k | buf += 2; | 85 | 11.4k | } | 86 | 19.8k | if (len & 1) { | 87 | 10.2k | if (COPY) | 88 | 10.2k | *dst = *buf; | 89 | 10.2k | ADLER_DO1(adler, sum2, buf, 0); | 90 | 10.2k | } | 91 | 19.8k | } | 92 | 19.8k | if (REBASE) { | 93 | 19.8k | adler %= BASE; | 94 | 19.8k | sum2 %= BASE; | 95 | 19.8k | } | 96 | | /* D = B * 65536 + A, see: https://en.wikipedia.org/wiki/Adler-32. */ | 97 | 19.8k | return adler | (sum2 << 16); | 98 | 19.8k | } |
adler32_avx2.c:adler32_copy_tail Line | Count | Source | 54 | 304M | uint32_t sum2, const int REBASE, const int MAX_LEN, const int COPY) { | 55 | 304M | if (len) { | 56 | | /* DO16 loop for large remainders only (scalar, risc-v). */ | 57 | 304M | if (MAX_LEN >= 32) { | 58 | 0 | while (len >= 16) { | 59 | 0 | if (COPY) { | 60 | 0 | memcpy(dst, buf, 16); | 61 | 0 | dst += 16; | 62 | 0 | } | 63 | 0 | len -= 16; | 64 | 0 | ADLER_DO16(adler, sum2, buf); | 65 | 0 | buf += 16; | 66 | 0 | } | 67 | 0 | } | 68 | | /* DO4 loop avoids GCC x86 register pressure from hoisted DO8/DO16 loads. */ | 69 | 304M | while (len >= 4) { | 70 | 291k | if (COPY) { | 71 | 35.8k | memcpy(dst, buf, 4); | 72 | 35.8k | dst += 4; | 73 | 35.8k | } | 74 | 291k | len -= 4; | 75 | 291k | ADLER_DO4(adler, sum2, buf, 0); | 76 | 291k | buf += 4; | 77 | 291k | } | 78 | 304M | if (len & 2) { | 79 | 112k | if (COPY) { | 80 | 17.6k | memcpy(dst, buf, 2); | 81 | 17.6k | dst += 2; | 82 | 17.6k | } | 83 | 112k | ADLER_DO2(adler, sum2, buf, 0); | 84 | 112k | buf += 2; | 85 | 112k | } | 86 | 304M | if (len & 1) { | 87 | 304M | if (COPY) | 88 | 302M | *dst = *buf; | 89 | 304M | ADLER_DO1(adler, sum2, buf, 0); | 90 | 304M | } | 91 | 304M | } | 92 | 304M | if (REBASE) { | 93 | 304M | adler %= BASE; | 94 | 304M | sum2 %= BASE; | 95 | 304M | } | 96 | | /* D = B * 65536 + A, see: https://en.wikipedia.org/wiki/Adler-32. */ | 97 | 304M | return adler | (sum2 << 16); | 98 | 304M | } |
Unexecuted instantiation: adler32_avx512.c:adler32_copy_tail Unexecuted instantiation: adler32_avx512_vnni.c:adler32_copy_tail Unexecuted instantiation: adler32_c.c:adler32_copy_tail Unexecuted instantiation: adler32.c:adler32_copy_tail |
99 | | |
100 | | #endif /* ADLER32_P_H */ |