/src/serenity/Userland/Libraries/LibCrypto/Checksum/CRC32.cpp
Line | Count | Source (jump to first uncovered line) |
1 | | /* |
2 | | * Copyright (c) 2020-2022, the SerenityOS developers. |
3 | | * |
4 | | * SPDX-License-Identifier: BSD-2-Clause |
5 | | */ |
6 | | |
7 | | #include <AK/Array.h> |
8 | | #include <AK/NumericLimits.h> |
9 | | #include <AK/Span.h> |
10 | | #include <AK/Types.h> |
11 | | #include <LibCrypto/Checksum/CRC32.h> |
12 | | |
13 | | #ifdef __ARM_ACLE |
14 | | # include <arm_acle.h> |
15 | | #endif |
16 | | |
17 | | namespace Crypto::Checksum { |
18 | | |
19 | | #if defined(__ARM_ACLE) && __ARM_ARCH >= 8 && defined(__ARM_FEATURE_CRC32) |
20 | | void CRC32::update(ReadonlyBytes span) |
21 | | { |
22 | | // FIXME: Does this require runtime checking on rpi? |
23 | | // (Maybe the instruction is present on the rpi4 but not on the rpi3?) |
24 | | |
25 | | u8 const* data = span.data(); |
26 | | size_t size = span.size(); |
27 | | |
28 | | while (size > 0 && (reinterpret_cast<FlatPtr>(data) & 7) != 0) { |
29 | | m_state = __crc32b(m_state, *data); |
30 | | ++data; |
31 | | --size; |
32 | | } |
33 | | |
34 | | auto* data64 = reinterpret_cast<u64 const*>(data); |
35 | | while (size >= 8) { |
36 | | m_state = __crc32d(m_state, *data64); |
37 | | ++data64; |
38 | | size -= 8; |
39 | | } |
40 | | |
41 | | data = reinterpret_cast<u8 const*>(data64); |
42 | | while (size > 0) { |
43 | | m_state = __crc32b(m_state, *data); |
44 | | ++data; |
45 | | --size; |
46 | | } |
47 | | } |
48 | | |
49 | | // FIXME: On Intel, use _mm_crc32_u8 / _mm_crc32_u64 if available (SSE 4.2). |
50 | | |
51 | | #else |
52 | | |
53 | | static constexpr size_t ethernet_polynomial = 0xEDB88320; |
54 | | |
55 | | # if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ |
56 | | |
57 | | // This implements Intel's slicing-by-8 algorithm. Their original paper is no longer on their website, |
58 | | // but their source code is still available for reference: |
59 | | // https://sourceforge.net/projects/slicing-by-8/ |
60 | | static constexpr auto generate_table() |
61 | 0 | { |
62 | 0 | Array<Array<u32, 256>, 8> data {}; |
63 | 0 |
|
64 | 0 | for (u32 i = 0; i < 256; ++i) { |
65 | 0 | auto value = i; |
66 | 0 |
|
67 | 0 | for (size_t j = 0; j < 8; ++j) |
68 | 0 | value = (value >> 1) ^ ((value & 1) * ethernet_polynomial); |
69 | 0 |
|
70 | 0 | data[0][i] = value; |
71 | 0 | } |
72 | 0 |
|
73 | 0 | for (u32 i = 0; i < 256; ++i) { |
74 | 0 | for (size_t j = 1; j < 8; ++j) |
75 | 0 | data[j][i] = (data[j - 1][i] >> 8) ^ data[0][data[j - 1][i] & 0xff]; |
76 | 0 | } |
77 | 0 |
|
78 | 0 | return data; |
79 | 0 | } |
80 | | |
81 | | static constexpr auto table = generate_table(); |
82 | | |
83 | | struct AlignmentData { |
84 | | ReadonlyBytes misaligned; |
85 | | ReadonlyBytes aligned; |
86 | | }; |
87 | | |
88 | | static AlignmentData split_bytes_for_alignment(ReadonlyBytes data, size_t alignment) |
89 | 2.12M | { |
90 | 2.12M | auto address = reinterpret_cast<uintptr_t>(data.data()); |
91 | 2.12M | auto offset = alignment - address % alignment; |
92 | | |
93 | 2.12M | if (offset == alignment) |
94 | 2.09M | return { {}, data }; |
95 | | |
96 | 27.5k | if (data.size() < alignment) |
97 | 839 | return { data, {} }; |
98 | | |
99 | 26.7k | return { data.trim(offset), data.slice(offset) }; |
100 | 27.5k | } |
101 | | |
102 | | static constexpr u32 single_byte_crc(u32 crc, u8 byte) |
103 | 132k | { |
104 | 132k | return (crc >> 8) ^ table[0][(crc & 0xff) ^ byte]; |
105 | 132k | } |
106 | | |
107 | | void CRC32::update(ReadonlyBytes data) |
108 | 2.12M | { |
109 | | // The provided data may not be aligned to a 4-byte boundary, required to reinterpret its address |
110 | | // into a u32 in the loop below. So we split the bytes into two segments: the misaligned bytes |
111 | | // (which undergo the standard 1-byte-at-a-time algorithm) and remaining aligned bytes. |
112 | 2.12M | auto [misaligned_data, aligned_data] = split_bytes_for_alignment(data, alignof(u32)); |
113 | | |
114 | 2.12M | for (auto byte : misaligned_data) |
115 | 54.7k | m_state = single_byte_crc(m_state, byte); |
116 | | |
117 | 1.09G | while (aligned_data.size() >= 8) { |
118 | 1.09G | auto const* segment = reinterpret_cast<u32 const*>(aligned_data.data()); |
119 | 1.09G | auto low = *segment ^ m_state; |
120 | 1.09G | auto high = *(++segment); |
121 | | |
122 | 1.09G | m_state = table[0][(high >> 24) & 0xff] |
123 | 1.09G | ^ table[1][(high >> 16) & 0xff] |
124 | 1.09G | ^ table[2][(high >> 8) & 0xff] |
125 | 1.09G | ^ table[3][high & 0xff] |
126 | 1.09G | ^ table[4][(low >> 24) & 0xff] |
127 | 1.09G | ^ table[5][(low >> 16) & 0xff] |
128 | 1.09G | ^ table[6][(low >> 8) & 0xff] |
129 | 1.09G | ^ table[7][low & 0xff]; |
130 | | |
131 | 1.09G | aligned_data = aligned_data.slice(8); |
132 | 1.09G | } |
133 | | |
134 | 2.12M | for (auto byte : aligned_data) |
135 | 77.7k | m_state = single_byte_crc(m_state, byte); |
136 | 2.12M | } |
137 | | |
138 | | # else |
139 | | |
140 | | // FIXME: Implement the slicing-by-8 algorithm for big endian CPUs. |
141 | | static constexpr auto generate_table() |
142 | | { |
143 | | Array<u32, 256> data {}; |
144 | | for (auto i = 0u; i < data.size(); i++) { |
145 | | u32 value = i; |
146 | | |
147 | | for (auto j = 0; j < 8; j++) { |
148 | | if (value & 1) { |
149 | | value = ethernet_polynomial ^ (value >> 1); |
150 | | } else { |
151 | | value = value >> 1; |
152 | | } |
153 | | } |
154 | | |
155 | | data[i] = value; |
156 | | } |
157 | | return data; |
158 | | } |
159 | | |
160 | | static constexpr auto table = generate_table(); |
161 | | |
162 | | void CRC32::update(ReadonlyBytes data) |
163 | | { |
164 | | for (size_t i = 0; i < data.size(); i++) { |
165 | | m_state = table[(m_state ^ data.at(i)) & 0xFF] ^ (m_state >> 8); |
166 | | } |
167 | | } |
168 | | |
169 | | # endif |
170 | | #endif |
171 | | |
172 | | u32 CRC32::digest() |
173 | 53.3k | { |
174 | 53.3k | return ~m_state; |
175 | 53.3k | } |
176 | | |
177 | | } |