/src/serenity/Userland/Libraries/LibCrypto/Hash/SHA2.cpp
Line | Count | Source (jump to first uncovered line) |
1 | | /* |
2 | | * Copyright (c) 2020, Ali Mohammad Pur <mpfard@serenityos.org> |
3 | | * Copyright (c) 2023, Jelle Raaijmakers <jelle@gmta.nl> |
4 | | * |
5 | | * SPDX-License-Identifier: BSD-2-Clause |
6 | | */ |
7 | | |
8 | | #include <AK/CPUFeatures.h> |
9 | | #include <AK/Platform.h> |
10 | | #include <AK/SIMD.h> |
11 | | #include <AK/SIMDExtras.h> |
12 | | #include <AK/Types.h> |
13 | | #include <LibCrypto/Hash/SHA2.h> |
14 | | |
15 | | namespace Crypto::Hash { |
16 | 57.7M | constexpr static auto ROTRIGHT(u32 a, size_t b) { return (a >> b) | (a << (32 - b)); } |
17 | 6.41M | constexpr static auto CH(u32 x, u32 y, u32 z) { return (x & y) ^ (z & ~x); } |
18 | 6.41M | constexpr static auto MAJ(u32 x, u32 y, u32 z) { return (x & y) ^ (x & z) ^ (y & z); } |
19 | 6.41M | constexpr static auto EP0(u32 x) { return ROTRIGHT(x, 2) ^ ROTRIGHT(x, 13) ^ ROTRIGHT(x, 22); } |
20 | 6.41M | constexpr static auto EP1(u32 x) { return ROTRIGHT(x, 6) ^ ROTRIGHT(x, 11) ^ ROTRIGHT(x, 25); } |
21 | 4.81M | constexpr static auto SIGN0(u32 x) { return ROTRIGHT(x, 7) ^ ROTRIGHT(x, 18) ^ (x >> 3); } |
22 | 4.81M | constexpr static auto SIGN1(u32 x) { return ROTRIGHT(x, 17) ^ ROTRIGHT(x, 19) ^ (x >> 10); } |
23 | | |
24 | 75.0M | constexpr static auto ROTRIGHT(u64 a, size_t b) { return (a >> b) | (a << (64 - b)); } |
25 | 8.15M | constexpr static auto CH(u64 x, u64 y, u64 z) { return (x & y) ^ (z & ~x); } |
26 | 8.15M | constexpr static auto MAJ(u64 x, u64 y, u64 z) { return (x & y) ^ (x & z) ^ (y & z); } |
27 | 8.15M | constexpr static auto EP0(u64 x) { return ROTRIGHT(x, 28) ^ ROTRIGHT(x, 34) ^ ROTRIGHT(x, 39); } |
28 | 8.15M | constexpr static auto EP1(u64 x) { return ROTRIGHT(x, 14) ^ ROTRIGHT(x, 18) ^ ROTRIGHT(x, 41); } |
29 | 6.52M | constexpr static auto SIGN0(u64 x) { return ROTRIGHT(x, 1) ^ ROTRIGHT(x, 8) ^ (x >> 7); } |
30 | 6.52M | constexpr static auto SIGN1(u64 x) { return ROTRIGHT(x, 19) ^ ROTRIGHT(x, 61) ^ (x >> 6); } |
31 | | |
32 | | template<> |
33 | | void SHA256::transform_impl<CPUFeatures::None>() |
34 | 100k | { |
35 | 100k | auto& data = m_data_buffer; |
36 | | |
37 | 100k | u32 m[16]; |
38 | | |
39 | 100k | size_t i = 0; |
40 | 1.70M | for (size_t j = 0; i < 16; ++i, j += 4) { |
41 | 1.60M | m[i] = (data[j] << 24) | (data[j + 1] << 16) | (data[j + 2] << 8) | data[j + 3]; |
42 | 1.60M | } |
43 | | |
44 | 100k | auto a = m_state[0], b = m_state[1], |
45 | 100k | c = m_state[2], d = m_state[3], |
46 | 100k | e = m_state[4], f = m_state[5], |
47 | 100k | g = m_state[6], h = m_state[7]; |
48 | | |
49 | 6.51M | for (i = 0; i < Rounds; ++i) { |
50 | 6.41M | if (i >= 16) |
51 | 4.81M | m[i % 16] = SIGN1(m[(i - 2) % 16]) + m[(i - 7) % 16] + SIGN0(m[(i - 15) % 16]) + m[(i - 16) % 16]; |
52 | | |
53 | 6.41M | auto temp0 = h + EP1(e) + CH(e, f, g) + SHA256Constants::RoundConstants[i] + m[i % 16]; |
54 | 6.41M | auto temp1 = EP0(a) + MAJ(a, b, c); |
55 | 6.41M | h = g; |
56 | 6.41M | g = f; |
57 | 6.41M | f = e; |
58 | 6.41M | e = d + temp0; |
59 | 6.41M | d = c; |
60 | 6.41M | c = b; |
61 | 6.41M | b = a; |
62 | 6.41M | a = temp0 + temp1; |
63 | 6.41M | } |
64 | | |
65 | 100k | m_state[0] += a; |
66 | 100k | m_state[1] += b; |
67 | 100k | m_state[2] += c; |
68 | 100k | m_state[3] += d; |
69 | 100k | m_state[4] += e; |
70 | 100k | m_state[5] += f; |
71 | 100k | m_state[6] += g; |
72 | 100k | m_state[7] += h; |
73 | 100k | } |
74 | | |
75 | | // Note: The SHA extension was introduced with |
76 | | // Intel Goldmont (SSE4.2), Ice Lake (AVX512), Rocket Lake (AVX512), and AMD Zen (AVX2) |
77 | | // So it's safe to assume that if we have SHA we have at least SSE4.2 |
78 | | // ~https://en.wikipedia.org/wiki/Intel_SHA_extensions |
79 | | #if AK_CAN_CODEGEN_FOR_X86_SHA && AK_CAN_CODEGEN_FOR_X86_SSE42 |
80 | | template<> |
81 | | [[gnu::target("sha,sse4.2")]] void SHA256::transform_impl<CPUFeatures::X86_SHA | CPUFeatures::X86_SSE42>() |
82 | 0 | { |
83 | 0 | using AK::SIMD::i32x4, AK::SIMD::u32x4; |
84 | |
|
85 | 0 | auto& state = m_state; |
86 | 0 | auto& data = m_data_buffer; |
87 | |
|
88 | 0 | u32x4 states[2] {}; |
89 | 0 | states[0] = AK::SIMD::load_unaligned<u32x4>(&state[0]); |
90 | 0 | states[1] = AK::SIMD::load_unaligned<u32x4>(&state[4]); |
91 | 0 | auto tmp = u32x4 { states[0][1], states[0][0], states[0][3], states[0][2] }; |
92 | 0 | states[1] = u32x4 { states[1][3], states[1][2], states[1][1], states[1][0] }; |
93 | 0 | states[0] = u32x4 { states[1][2], states[1][3], tmp[0], tmp[1] }; |
94 | 0 | states[1] = u32x4 { states[1][0], states[1][1], tmp[2], tmp[3] }; |
95 | |
|
96 | 0 | u32x4 msgs[4] {}; |
97 | 0 | u32x4 old[2] { states[0], states[1] }; |
98 | 0 | for (int i = 0; i != 16; ++i) { |
99 | 0 | u32x4 msg {}; |
100 | 0 | if (i < 4) { |
101 | 0 | msgs[i] = AK::SIMD::load_unaligned<u32x4>(&data[i * 16]); |
102 | 0 | msgs[i] = AK::SIMD::elementwise_byte_reverse(msgs[i]); |
103 | 0 | tmp = AK::SIMD::load_unaligned<u32x4>(&SHA256Constants::RoundConstants[i * 4]); |
104 | 0 | msg = msgs[i] + tmp; |
105 | 0 | } else { |
106 | 0 | msgs[(i + 0) % 4] = bit_cast<u32x4>(__builtin_ia32_sha256msg1(bit_cast<i32x4>(msgs[(i + 0) % 4]), bit_cast<i32x4>(msgs[(i + 1) % 4]))); |
107 | 0 | tmp = u32x4 { msgs[(i + 2) % 4][1], msgs[(i + 2) % 4][2], msgs[(i + 2) % 4][3], msgs[(i + 3) % 4][0] }; |
108 | 0 | msgs[(i + 0) % 4] += tmp; |
109 | 0 | msgs[(i + 0) % 4] = bit_cast<u32x4>(__builtin_ia32_sha256msg2(bit_cast<i32x4>(msgs[(i + 0) % 4]), bit_cast<i32x4>(msgs[(i + 3) % 4]))); |
110 | 0 | tmp = AK::SIMD::load_unaligned<u32x4>(&SHA256Constants::RoundConstants[i * 4]); |
111 | 0 | msg = msgs[(i + 0) % 4] + tmp; |
112 | 0 | } |
113 | 0 | states[1] = bit_cast<u32x4>(__builtin_ia32_sha256rnds2(bit_cast<i32x4>(states[1]), bit_cast<i32x4>(states[0]), bit_cast<i32x4>(msg))); |
114 | 0 | msg = u32x4 { msg[2], msg[3], 0, 0 }; |
115 | 0 | states[0] = bit_cast<u32x4>(__builtin_ia32_sha256rnds2(bit_cast<i32x4>(states[0]), bit_cast<i32x4>(states[1]), bit_cast<i32x4>(msg))); |
116 | 0 | } |
117 | 0 | states[0] += old[0]; |
118 | 0 | states[1] += old[1]; |
119 | |
|
120 | 0 | tmp = u32x4 { states[0][3], states[0][2], states[0][1], states[0][0] }; |
121 | 0 | states[1] = u32x4 { states[1][1], states[1][0], states[1][3], states[1][2] }; |
122 | 0 | states[0] = u32x4 { tmp[0], tmp[1], states[1][2], states[1][3] }; |
123 | 0 | states[1] = u32x4 { tmp[2], tmp[3], states[1][0], states[1][1] }; |
124 | 0 | AK::SIMD::store_unaligned(&state[0], states[0]); |
125 | 0 | AK::SIMD::store_unaligned(&state[4], states[1]); |
126 | 0 | } |
127 | | #endif |
128 | | |
129 | 8 | decltype(SHA256::transform_dispatched) SHA256::transform_dispatched = [] { |
130 | 8 | CPUFeatures features = detect_cpu_features(); |
131 | | |
132 | 8 | if constexpr (is_valid_feature(CPUFeatures::X86_SHA | CPUFeatures::X86_SSE42)) { |
133 | 8 | if (has_flag(features, CPUFeatures::X86_SHA | CPUFeatures::X86_SSE42)) |
134 | 0 | return &SHA256::transform_impl<CPUFeatures::X86_SHA | CPUFeatures::X86_SSE42>; |
135 | 8 | } |
136 | | |
137 | 8 | return &SHA256::transform_impl<CPUFeatures::None>; |
138 | 8 | }(); |
139 | | |
140 | | template<size_t BlockSize, typename Callback> |
141 | | void update_buffer(u8* buffer, u8 const* input, size_t length, size_t& data_length, Callback callback) |
142 | 273 | { |
143 | 202k | while (length > 0) { |
144 | 202k | size_t copy_bytes = AK::min(length, BlockSize - data_length); |
145 | 202k | __builtin_memcpy(buffer + data_length, input, copy_bytes); |
146 | 202k | input += copy_bytes; |
147 | 202k | length -= copy_bytes; |
148 | 202k | data_length += copy_bytes; |
149 | 202k | if (data_length == BlockSize) { |
150 | 201k | callback(); |
151 | 201k | data_length = 0; |
152 | 201k | } |
153 | 202k | } |
154 | 273 | } SHA2.cpp:void Crypto::Hash::update_buffer<64ul, Crypto::Hash::SHA256::update(unsigned char const*, unsigned long)::$_0>(unsigned char*, unsigned char const*, unsigned long, unsigned long&, Crypto::Hash::SHA256::update(unsigned char const*, unsigned long)::$_0) Line | Count | Source | 142 | 89 | { | 143 | 100k | while (length > 0) { | 144 | 100k | size_t copy_bytes = AK::min(length, BlockSize - data_length); | 145 | 100k | __builtin_memcpy(buffer + data_length, input, copy_bytes); | 146 | 100k | input += copy_bytes; | 147 | 100k | length -= copy_bytes; | 148 | 100k | data_length += copy_bytes; | 149 | 100k | if (data_length == BlockSize) { | 150 | 100k | callback(); | 151 | 100k | data_length = 0; | 152 | 100k | } | 153 | 100k | } | 154 | 89 | } |
SHA2.cpp:void Crypto::Hash::update_buffer<128ul, Crypto::Hash::SHA384::update(unsigned char const*, unsigned long)::$_0>(unsigned char*, unsigned char const*, unsigned long, unsigned long&, Crypto::Hash::SHA384::update(unsigned char const*, unsigned long)::$_0) Line | Count | Source | 142 | 92 | { | 143 | 50.9k | while (length > 0) { | 144 | 50.9k | size_t copy_bytes = AK::min(length, BlockSize - data_length); | 145 | 50.9k | __builtin_memcpy(buffer + data_length, input, copy_bytes); | 146 | 50.9k | input += copy_bytes; | 147 | 50.9k | length -= copy_bytes; | 148 | 50.9k | data_length += copy_bytes; | 149 | 50.9k | if (data_length == BlockSize) { | 150 | 50.8k | callback(); | 151 | 50.8k | data_length = 0; | 152 | 50.8k | } | 153 | 50.9k | } | 154 | 92 | } |
SHA2.cpp:void Crypto::Hash::update_buffer<128ul, Crypto::Hash::SHA512::update(unsigned char const*, unsigned long)::$_0>(unsigned char*, unsigned char const*, unsigned long, unsigned long&, Crypto::Hash::SHA512::update(unsigned char const*, unsigned long)::$_0) Line | Count | Source | 142 | 92 | { | 143 | 50.9k | while (length > 0) { | 144 | 50.9k | size_t copy_bytes = AK::min(length, BlockSize - data_length); | 145 | 50.9k | __builtin_memcpy(buffer + data_length, input, copy_bytes); | 146 | 50.9k | input += copy_bytes; | 147 | 50.9k | length -= copy_bytes; | 148 | 50.9k | data_length += copy_bytes; | 149 | 50.9k | if (data_length == BlockSize) { | 150 | 50.8k | callback(); | 151 | 50.8k | data_length = 0; | 152 | 50.8k | } | 153 | 50.9k | } | 154 | 92 | } |
|
155 | | |
156 | | void SHA256::update(u8 const* message, size_t length) |
157 | 89 | { |
158 | 100k | update_buffer<BlockSize>(m_data_buffer, message, length, m_data_length, [&]() { |
159 | 100k | transform(); |
160 | 100k | m_bit_length += BlockSize * 8; |
161 | 100k | }); |
162 | 89 | } |
163 | | |
164 | | SHA256::DigestType SHA256::digest() |
165 | 89 | { |
166 | 89 | auto digest = peek(); |
167 | 89 | reset(); |
168 | 89 | return digest; |
169 | 89 | } |
170 | | |
171 | | SHA256::DigestType SHA256::peek() |
172 | 89 | { |
173 | 89 | DigestType digest; |
174 | 89 | size_t i = m_data_length; |
175 | | |
176 | 89 | if (i < FinalBlockDataSize) { |
177 | 37 | m_data_buffer[i++] = 0x80; |
178 | 1.49k | while (i < FinalBlockDataSize) |
179 | 1.45k | m_data_buffer[i++] = 0x00; |
180 | 52 | } else { |
181 | | // First, complete a block with some padding. |
182 | 52 | m_data_buffer[i++] = 0x80; |
183 | 202 | while (i < BlockSize) |
184 | 150 | m_data_buffer[i++] = 0x00; |
185 | 52 | transform(); |
186 | | |
187 | | // Then start another block with BlockSize - 8 bytes of zeros |
188 | 52 | __builtin_memset(m_data_buffer, 0, FinalBlockDataSize); |
189 | 52 | } |
190 | | |
191 | | // append total message length |
192 | 89 | m_bit_length += m_data_length * 8; |
193 | 89 | m_data_buffer[BlockSize - 1] = m_bit_length; |
194 | 89 | m_data_buffer[BlockSize - 2] = m_bit_length >> 8; |
195 | 89 | m_data_buffer[BlockSize - 3] = m_bit_length >> 16; |
196 | 89 | m_data_buffer[BlockSize - 4] = m_bit_length >> 24; |
197 | 89 | m_data_buffer[BlockSize - 5] = m_bit_length >> 32; |
198 | 89 | m_data_buffer[BlockSize - 6] = m_bit_length >> 40; |
199 | 89 | m_data_buffer[BlockSize - 7] = m_bit_length >> 48; |
200 | 89 | m_data_buffer[BlockSize - 8] = m_bit_length >> 56; |
201 | | |
202 | 89 | transform(); |
203 | | |
204 | | // SHA uses big-endian and we assume little-endian |
205 | | // FIXME: looks like a thing for AK::NetworkOrdered, |
206 | | // but that doesn't support shifting operations |
207 | 445 | for (i = 0; i < 4; ++i) { |
208 | 356 | digest.data[i + 0] = (m_state[0] >> (24 - i * 8)) & 0x000000ff; |
209 | 356 | digest.data[i + 4] = (m_state[1] >> (24 - i * 8)) & 0x000000ff; |
210 | 356 | digest.data[i + 8] = (m_state[2] >> (24 - i * 8)) & 0x000000ff; |
211 | 356 | digest.data[i + 12] = (m_state[3] >> (24 - i * 8)) & 0x000000ff; |
212 | 356 | digest.data[i + 16] = (m_state[4] >> (24 - i * 8)) & 0x000000ff; |
213 | 356 | digest.data[i + 20] = (m_state[5] >> (24 - i * 8)) & 0x000000ff; |
214 | 356 | digest.data[i + 24] = (m_state[6] >> (24 - i * 8)) & 0x000000ff; |
215 | 356 | digest.data[i + 28] = (m_state[7] >> (24 - i * 8)) & 0x000000ff; |
216 | 356 | } |
217 | 89 | return digest; |
218 | 89 | } |
219 | | |
220 | | inline void SHA384::transform(u8 const* data) |
221 | 50.9k | { |
222 | 50.9k | u64 m[16]; |
223 | | |
224 | 50.9k | size_t i = 0; |
225 | 866k | for (size_t j = 0; i < 16; ++i, j += 8) { |
226 | 815k | m[i] = ((u64)data[j] << 56) | ((u64)data[j + 1] << 48) | ((u64)data[j + 2] << 40) | ((u64)data[j + 3] << 32) | ((u64)data[j + 4] << 24) | ((u64)data[j + 5] << 16) | ((u64)data[j + 6] << 8) | (u64)data[j + 7]; |
227 | 815k | } |
228 | | |
229 | 50.9k | auto a = m_state[0], b = m_state[1], |
230 | 50.9k | c = m_state[2], d = m_state[3], |
231 | 50.9k | e = m_state[4], f = m_state[5], |
232 | 50.9k | g = m_state[6], h = m_state[7]; |
233 | | |
234 | 4.12M | for (i = 0; i < Rounds; ++i) { |
235 | 4.07M | if (i >= 16) |
236 | 3.26M | m[i % 16] = SIGN1(m[(i - 2) % 16]) + m[(i - 7) % 16] + SIGN0(m[(i - 15) % 16]) + m[(i - 16) % 16]; |
237 | | // Note : SHA384 uses the SHA512 constants. |
238 | 4.07M | auto temp0 = h + EP1(e) + CH(e, f, g) + SHA512Constants::RoundConstants[i] + m[i % 16]; |
239 | 4.07M | auto temp1 = EP0(a) + MAJ(a, b, c); |
240 | 4.07M | h = g; |
241 | 4.07M | g = f; |
242 | 4.07M | f = e; |
243 | 4.07M | e = d + temp0; |
244 | 4.07M | d = c; |
245 | 4.07M | c = b; |
246 | 4.07M | b = a; |
247 | 4.07M | a = temp0 + temp1; |
248 | 4.07M | } |
249 | | |
250 | 50.9k | m_state[0] += a; |
251 | 50.9k | m_state[1] += b; |
252 | 50.9k | m_state[2] += c; |
253 | 50.9k | m_state[3] += d; |
254 | 50.9k | m_state[4] += e; |
255 | 50.9k | m_state[5] += f; |
256 | 50.9k | m_state[6] += g; |
257 | 50.9k | m_state[7] += h; |
258 | 50.9k | } |
259 | | |
260 | | void SHA384::update(u8 const* message, size_t length) |
261 | 92 | { |
262 | 50.8k | update_buffer<BlockSize>(m_data_buffer, message, length, m_data_length, [&]() { |
263 | 50.8k | transform(m_data_buffer); |
264 | 50.8k | m_bit_length += BlockSize * 8; |
265 | 50.8k | }); |
266 | 92 | } |
267 | | |
268 | | SHA384::DigestType SHA384::digest() |
269 | 92 | { |
270 | 92 | auto digest = peek(); |
271 | 92 | reset(); |
272 | 92 | return digest; |
273 | 92 | } |
274 | | |
275 | | SHA384::DigestType SHA384::peek() |
276 | 92 | { |
277 | 92 | DigestType digest; |
278 | 92 | size_t i = m_data_length; |
279 | | |
280 | 92 | if (i < FinalBlockDataSize) { |
281 | 40 | m_data_buffer[i++] = 0x80; |
282 | 3.10k | while (i < FinalBlockDataSize) |
283 | 3.06k | m_data_buffer[i++] = 0x00; |
284 | 52 | } else { |
285 | | // First, complete a block with some padding. |
286 | 52 | m_data_buffer[i++] = 0x80; |
287 | 377 | while (i < BlockSize) |
288 | 325 | m_data_buffer[i++] = 0x00; |
289 | 52 | transform(m_data_buffer); |
290 | | |
291 | | // Then start another block with BlockSize - 8 bytes of zeros |
292 | 52 | __builtin_memset(m_data_buffer, 0, FinalBlockDataSize); |
293 | 52 | } |
294 | | |
295 | | // append total message length |
296 | 92 | m_bit_length += m_data_length * 8; |
297 | 92 | m_data_buffer[BlockSize - 1] = m_bit_length; |
298 | 92 | m_data_buffer[BlockSize - 2] = m_bit_length >> 8; |
299 | 92 | m_data_buffer[BlockSize - 3] = m_bit_length >> 16; |
300 | 92 | m_data_buffer[BlockSize - 4] = m_bit_length >> 24; |
301 | 92 | m_data_buffer[BlockSize - 5] = m_bit_length >> 32; |
302 | 92 | m_data_buffer[BlockSize - 6] = m_bit_length >> 40; |
303 | 92 | m_data_buffer[BlockSize - 7] = m_bit_length >> 48; |
304 | 92 | m_data_buffer[BlockSize - 8] = m_bit_length >> 56; |
305 | | // FIXME: Theoretically we should keep track of the number of bits as a u128, now we can only hash up to 2 EiB. |
306 | 92 | m_data_buffer[BlockSize - 9] = 0; |
307 | 92 | m_data_buffer[BlockSize - 10] = 0; |
308 | 92 | m_data_buffer[BlockSize - 11] = 0; |
309 | 92 | m_data_buffer[BlockSize - 12] = 0; |
310 | 92 | m_data_buffer[BlockSize - 13] = 0; |
311 | 92 | m_data_buffer[BlockSize - 14] = 0; |
312 | 92 | m_data_buffer[BlockSize - 15] = 0; |
313 | 92 | m_data_buffer[BlockSize - 16] = 0; |
314 | | |
315 | 92 | transform(m_data_buffer); |
316 | | |
317 | | // SHA uses big-endian and we assume little-endian |
318 | | // FIXME: looks like a thing for AK::NetworkOrdered, |
319 | | // but that doesn't support shifting operations |
320 | 828 | for (i = 0; i < 8; ++i) { |
321 | 736 | digest.data[i + 0] = (m_state[0] >> (56 - i * 8)) & 0x000000ff; |
322 | 736 | digest.data[i + 8] = (m_state[1] >> (56 - i * 8)) & 0x000000ff; |
323 | 736 | digest.data[i + 16] = (m_state[2] >> (56 - i * 8)) & 0x000000ff; |
324 | 736 | digest.data[i + 24] = (m_state[3] >> (56 - i * 8)) & 0x000000ff; |
325 | 736 | digest.data[i + 32] = (m_state[4] >> (56 - i * 8)) & 0x000000ff; |
326 | 736 | digest.data[i + 40] = (m_state[5] >> (56 - i * 8)) & 0x000000ff; |
327 | 736 | } |
328 | 92 | return digest; |
329 | 92 | } |
330 | | |
331 | | inline void SHA512::transform(u8 const* data) |
332 | 50.9k | { |
333 | 50.9k | u64 m[16]; |
334 | | |
335 | 50.9k | size_t i = 0; |
336 | 866k | for (size_t j = 0; i < 16; ++i, j += 8) { |
337 | 815k | m[i] = ((u64)data[j] << 56) | ((u64)data[j + 1] << 48) | ((u64)data[j + 2] << 40) | ((u64)data[j + 3] << 32) | ((u64)data[j + 4] << 24) | ((u64)data[j + 5] << 16) | ((u64)data[j + 6] << 8) | (u64)data[j + 7]; |
338 | 815k | } |
339 | | |
340 | 50.9k | auto a = m_state[0], b = m_state[1], |
341 | 50.9k | c = m_state[2], d = m_state[3], |
342 | 50.9k | e = m_state[4], f = m_state[5], |
343 | 50.9k | g = m_state[6], h = m_state[7]; |
344 | | |
345 | 4.12M | for (i = 0; i < Rounds; ++i) { |
346 | 4.07M | if (i >= 16) |
347 | 3.26M | m[i % 16] = SIGN1(m[(i - 2) % 16]) + m[(i - 7) % 16] + SIGN0(m[(i - 15) % 16]) + m[(i - 16) % 16]; |
348 | | |
349 | 4.07M | auto temp0 = h + EP1(e) + CH(e, f, g) + SHA512Constants::RoundConstants[i] + m[i % 16]; |
350 | 4.07M | auto temp1 = EP0(a) + MAJ(a, b, c); |
351 | 4.07M | h = g; |
352 | 4.07M | g = f; |
353 | 4.07M | f = e; |
354 | 4.07M | e = d + temp0; |
355 | 4.07M | d = c; |
356 | 4.07M | c = b; |
357 | 4.07M | b = a; |
358 | 4.07M | a = temp0 + temp1; |
359 | 4.07M | } |
360 | | |
361 | 50.9k | m_state[0] += a; |
362 | 50.9k | m_state[1] += b; |
363 | 50.9k | m_state[2] += c; |
364 | 50.9k | m_state[3] += d; |
365 | 50.9k | m_state[4] += e; |
366 | 50.9k | m_state[5] += f; |
367 | 50.9k | m_state[6] += g; |
368 | 50.9k | m_state[7] += h; |
369 | 50.9k | } |
370 | | |
371 | | void SHA512::update(u8 const* message, size_t length) |
372 | 92 | { |
373 | 50.8k | update_buffer<BlockSize>(m_data_buffer, message, length, m_data_length, [&]() { |
374 | 50.8k | transform(m_data_buffer); |
375 | 50.8k | m_bit_length += BlockSize * 8; |
376 | 50.8k | }); |
377 | 92 | } |
378 | | |
379 | | SHA512::DigestType SHA512::digest() |
380 | 92 | { |
381 | 92 | auto digest = peek(); |
382 | 92 | reset(); |
383 | 92 | return digest; |
384 | 92 | } |
385 | | |
386 | | SHA512::DigestType SHA512::peek() |
387 | 92 | { |
388 | 92 | DigestType digest; |
389 | 92 | size_t i = m_data_length; |
390 | | |
391 | 92 | if (i < FinalBlockDataSize) { |
392 | 40 | m_data_buffer[i++] = 0x80; |
393 | 3.10k | while (i < FinalBlockDataSize) |
394 | 3.06k | m_data_buffer[i++] = 0x00; |
395 | 52 | } else { |
396 | | // First, complete a block with some padding. |
397 | 52 | m_data_buffer[i++] = 0x80; |
398 | 377 | while (i < BlockSize) |
399 | 325 | m_data_buffer[i++] = 0x00; |
400 | 52 | transform(m_data_buffer); |
401 | | |
402 | | // Then start another block with BlockSize - 8 bytes of zeros |
403 | 52 | __builtin_memset(m_data_buffer, 0, FinalBlockDataSize); |
404 | 52 | } |
405 | | |
406 | | // append total message length |
407 | 92 | m_bit_length += m_data_length * 8; |
408 | 92 | m_data_buffer[BlockSize - 1] = m_bit_length; |
409 | 92 | m_data_buffer[BlockSize - 2] = m_bit_length >> 8; |
410 | 92 | m_data_buffer[BlockSize - 3] = m_bit_length >> 16; |
411 | 92 | m_data_buffer[BlockSize - 4] = m_bit_length >> 24; |
412 | 92 | m_data_buffer[BlockSize - 5] = m_bit_length >> 32; |
413 | 92 | m_data_buffer[BlockSize - 6] = m_bit_length >> 40; |
414 | 92 | m_data_buffer[BlockSize - 7] = m_bit_length >> 48; |
415 | 92 | m_data_buffer[BlockSize - 8] = m_bit_length >> 56; |
416 | | // FIXME: Theoretically we should keep track of the number of bits as a u128, now we can only hash up to 2 EiB. |
417 | 92 | m_data_buffer[BlockSize - 9] = 0; |
418 | 92 | m_data_buffer[BlockSize - 10] = 0; |
419 | 92 | m_data_buffer[BlockSize - 11] = 0; |
420 | 92 | m_data_buffer[BlockSize - 12] = 0; |
421 | 92 | m_data_buffer[BlockSize - 13] = 0; |
422 | 92 | m_data_buffer[BlockSize - 14] = 0; |
423 | 92 | m_data_buffer[BlockSize - 15] = 0; |
424 | 92 | m_data_buffer[BlockSize - 16] = 0; |
425 | | |
426 | 92 | transform(m_data_buffer); |
427 | | |
428 | | // SHA uses big-endian and we assume little-endian |
429 | | // FIXME: looks like a thing for AK::NetworkOrdered, |
430 | | // but that doesn't support shifting operations |
431 | 828 | for (i = 0; i < 8; ++i) { |
432 | 736 | digest.data[i + 0] = (m_state[0] >> (56 - i * 8)) & 0x000000ff; |
433 | 736 | digest.data[i + 8] = (m_state[1] >> (56 - i * 8)) & 0x000000ff; |
434 | 736 | digest.data[i + 16] = (m_state[2] >> (56 - i * 8)) & 0x000000ff; |
435 | 736 | digest.data[i + 24] = (m_state[3] >> (56 - i * 8)) & 0x000000ff; |
436 | 736 | digest.data[i + 32] = (m_state[4] >> (56 - i * 8)) & 0x000000ff; |
437 | 736 | digest.data[i + 40] = (m_state[5] >> (56 - i * 8)) & 0x000000ff; |
438 | 736 | digest.data[i + 48] = (m_state[6] >> (56 - i * 8)) & 0x000000ff; |
439 | 736 | digest.data[i + 56] = (m_state[7] >> (56 - i * 8)) & 0x000000ff; |
440 | 736 | } |
441 | 92 | return digest; |
442 | 92 | } |
443 | | } |