/src/botan/src/lib/hash/sha2_32/sha2_32_x86/sha2_32_x86.cpp
Line | Count | Source (jump to first uncovered line) |
1 | | /* |
2 | | * Based on public domain code by Sean Gulley |
3 | | * |
4 | | * Further changes |
5 | | * |
6 | | * (C) 2017,2020,2025 Jack Lloyd |
7 | | * |
8 | | * Botan is released under the Simplified BSD License (see license.txt) |
9 | | */ |
10 | | |
11 | | #include <botan/internal/sha2_32.h> |
12 | | |
13 | | #include <botan/internal/isa_extn.h> |
14 | | #include <botan/internal/simd_4x32.h> |
15 | | #include <immintrin.h> |
16 | | |
17 | | namespace Botan { |
18 | | |
19 | | namespace { |
20 | | |
21 | | BOTAN_FORCE_INLINE BOTAN_FN_ISA_SHANI void sha256_rnds4(SIMD_4x32& S0, |
22 | | SIMD_4x32& S1, |
23 | | const SIMD_4x32& msg, |
24 | 0 | const SIMD_4x32& k) { |
25 | 0 | const auto mk = msg + k; |
26 | 0 | S1 = SIMD_4x32(_mm_sha256rnds2_epu32(S1.raw(), S0.raw(), mk.raw())); |
27 | 0 | S0 = SIMD_4x32(_mm_sha256rnds2_epu32(S0.raw(), S1.raw(), mk.shift_elems_right<2>().raw())); |
28 | 0 | } |
29 | | |
30 | 0 | BOTAN_FORCE_INLINE BOTAN_FN_ISA_SHANI void sha256_msg_exp(SIMD_4x32& m0, SIMD_4x32& m1, SIMD_4x32& m2) { |
31 | 0 | m2 += SIMD_4x32(_mm_alignr_epi8(m1.raw(), m0.raw(), 4)); |
32 | 0 | m0 = SIMD_4x32(_mm_sha256msg1_epu32(m0.raw(), m1.raw())); |
33 | 0 | m2 = SIMD_4x32(_mm_sha256msg2_epu32(m2.raw(), m1.raw())); |
34 | 0 | } |
35 | | |
36 | 0 | BOTAN_FORCE_INLINE BOTAN_FN_ISA_SHANI void sha256_permute_state(SIMD_4x32& S0, SIMD_4x32& S1) { |
37 | 0 | S0 = SIMD_4x32(_mm_shuffle_epi32(S0.raw(), 0b10110001)); // CDAB |
38 | 0 | S1 = SIMD_4x32(_mm_shuffle_epi32(S1.raw(), 0b00011011)); // EFGH |
39 | |
|
40 | 0 | __m128i tmp = _mm_alignr_epi8(S0.raw(), S1.raw(), 8); // ABEF |
41 | 0 | S1 = SIMD_4x32(_mm_blend_epi16(S1.raw(), S0.raw(), 0xF0)); // CDGH |
42 | 0 | S0 = SIMD_4x32(tmp); |
43 | 0 | } |
44 | | |
45 | | } // namespace |
46 | | |
47 | | void BOTAN_FN_ISA_SHANI SHA_256::compress_digest_x86(digest_type& digest, |
48 | | std::span<const uint8_t> input_span, |
49 | 0 | size_t blocks) { |
50 | 0 | alignas(64) static const uint32_t K[] = { |
51 | 0 | 0x428A2F98, 0x71374491, 0xB5C0FBCF, 0xE9B5DBA5, 0x3956C25B, 0x59F111F1, 0x923F82A4, 0xAB1C5ED5, |
52 | 0 | 0xD807AA98, 0x12835B01, 0x243185BE, 0x550C7DC3, 0x72BE5D74, 0x80DEB1FE, 0x9BDC06A7, 0xC19BF174, |
53 | 0 | 0xE49B69C1, 0xEFBE4786, 0x0FC19DC6, 0x240CA1CC, 0x2DE92C6F, 0x4A7484AA, 0x5CB0A9DC, 0x76F988DA, |
54 | 0 | 0x983E5152, 0xA831C66D, 0xB00327C8, 0xBF597FC7, 0xC6E00BF3, 0xD5A79147, 0x06CA6351, 0x14292967, |
55 | 0 | 0x27B70A85, 0x2E1B2138, 0x4D2C6DFC, 0x53380D13, 0x650A7354, 0x766A0ABB, 0x81C2C92E, 0x92722C85, |
56 | 0 | 0xA2BFE8A1, 0xA81A664B, 0xC24B8B70, 0xC76C51A3, 0xD192E819, 0xD6990624, 0xF40E3585, 0x106AA070, |
57 | 0 | 0x19A4C116, 0x1E376C08, 0x2748774C, 0x34B0BCB5, 0x391C0CB3, 0x4ED8AA4A, 0x5B9CCA4F, 0x682E6FF3, |
58 | 0 | 0x748F82EE, 0x78A5636F, 0x84C87814, 0x8CC70208, 0x90BEFFFA, 0xA4506CEB, 0xBEF9A3F7, 0xC67178F2, |
59 | 0 | }; |
60 | |
|
61 | 0 | const uint8_t* input = input_span.data(); |
62 | |
|
63 | 0 | SIMD_4x32 S0 = SIMD_4x32::load_le(&digest[0]); |
64 | 0 | SIMD_4x32 S1 = SIMD_4x32::load_le(&digest[4]); |
65 | |
|
66 | 0 | sha256_permute_state(S0, S1); |
67 | |
|
68 | 0 | while(blocks > 0) { |
69 | 0 | const auto S0_SAVE = S0; |
70 | 0 | const auto S1_SAVE = S1; |
71 | |
|
72 | 0 | auto W0 = SIMD_4x32::load_be(input); |
73 | 0 | auto W1 = SIMD_4x32::load_be(input + 16); |
74 | 0 | auto W2 = SIMD_4x32::load_be(input + 32); |
75 | 0 | auto W3 = SIMD_4x32::load_be(input + 48); |
76 | |
|
77 | 0 | sha256_rnds4(S0, S1, W0, SIMD_4x32::load_le(&K[0])); |
78 | 0 | sha256_rnds4(S0, S1, W1, SIMD_4x32::load_le(&K[4])); |
79 | 0 | sha256_rnds4(S0, S1, W2, SIMD_4x32::load_le(&K[8])); |
80 | 0 | sha256_rnds4(S0, S1, W3, SIMD_4x32::load_le(&K[12])); |
81 | |
|
82 | 0 | W0 = SIMD_4x32(_mm_sha256msg1_epu32(W0.raw(), W1.raw())); |
83 | 0 | W1 = SIMD_4x32(_mm_sha256msg1_epu32(W1.raw(), W2.raw())); |
84 | |
|
85 | 0 | for(size_t r = 4; r != 16; r += 4) { |
86 | 0 | sha256_msg_exp(W2, W3, W0); |
87 | 0 | sha256_rnds4(S0, S1, W0, SIMD_4x32::load_le(&K[4 * (r + 0)])); |
88 | |
|
89 | 0 | sha256_msg_exp(W3, W0, W1); |
90 | 0 | sha256_rnds4(S0, S1, W1, SIMD_4x32::load_le(&K[4 * (r + 1)])); |
91 | |
|
92 | 0 | sha256_msg_exp(W0, W1, W2); |
93 | 0 | sha256_rnds4(S0, S1, W2, SIMD_4x32::load_le(&K[4 * (r + 2)])); |
94 | |
|
95 | 0 | sha256_msg_exp(W1, W2, W3); |
96 | 0 | sha256_rnds4(S0, S1, W3, SIMD_4x32::load_le(&K[4 * (r + 3)])); |
97 | 0 | } |
98 | | |
99 | | // Add values back to state |
100 | 0 | S0 += S0_SAVE; |
101 | 0 | S1 += S1_SAVE; |
102 | |
|
103 | 0 | input += 64; |
104 | 0 | blocks--; |
105 | 0 | } |
106 | |
|
107 | 0 | sha256_permute_state(S1, S0); |
108 | |
|
109 | 0 | S0.store_le(&digest[0]); |
110 | 0 | S1.store_le(&digest[4]); |
111 | 0 | } |
112 | | |
113 | | } // namespace Botan |