Coverage Report

Created: 2025-04-11 06:34

/src/botan/src/lib/hash/sha2_32/sha2_32_x86/sha2_32_x86.cpp
Line
Count
Source (jump to first uncovered line)
1
/*
2
* Based on public domain code by Sean Gulley
3
*
4
* Further changes
5
*
6
* (C) 2017,2020,2025 Jack Lloyd
7
*
8
* Botan is released under the Simplified BSD License (see license.txt)
9
*/
10
11
#include <botan/internal/sha2_32.h>
12
13
#include <botan/internal/isa_extn.h>
14
#include <botan/internal/simd_4x32.h>
15
#include <immintrin.h>
16
17
namespace Botan {
18
19
namespace {
20
21
BOTAN_FORCE_INLINE BOTAN_FN_ISA_SHANI void sha256_rnds4(SIMD_4x32& S0,
22
                                                        SIMD_4x32& S1,
23
                                                        const SIMD_4x32& msg,
24
0
                                                        const SIMD_4x32& k) {
25
0
   const auto mk = msg + k;
26
0
   S1 = SIMD_4x32(_mm_sha256rnds2_epu32(S1.raw(), S0.raw(), mk.raw()));
27
0
   S0 = SIMD_4x32(_mm_sha256rnds2_epu32(S0.raw(), S1.raw(), mk.shift_elems_right<2>().raw()));
28
0
}
29
30
0
BOTAN_FORCE_INLINE BOTAN_FN_ISA_SHANI void sha256_msg_exp(SIMD_4x32& m0, SIMD_4x32& m1, SIMD_4x32& m2) {
31
0
   m2 += SIMD_4x32(_mm_alignr_epi8(m1.raw(), m0.raw(), 4));
32
0
   m0 = SIMD_4x32(_mm_sha256msg1_epu32(m0.raw(), m1.raw()));
33
0
   m2 = SIMD_4x32(_mm_sha256msg2_epu32(m2.raw(), m1.raw()));
34
0
}
35
36
0
BOTAN_FORCE_INLINE BOTAN_FN_ISA_SHANI void sha256_permute_state(SIMD_4x32& S0, SIMD_4x32& S1) {
37
0
   S0 = SIMD_4x32(_mm_shuffle_epi32(S0.raw(), 0b10110001));  // CDAB
38
0
   S1 = SIMD_4x32(_mm_shuffle_epi32(S1.raw(), 0b00011011));  // EFGH
39
40
0
   __m128i tmp = _mm_alignr_epi8(S0.raw(), S1.raw(), 8);       // ABEF
41
0
   S1 = SIMD_4x32(_mm_blend_epi16(S1.raw(), S0.raw(), 0xF0));  // CDGH
42
0
   S0 = SIMD_4x32(tmp);
43
0
}
44
45
}  // namespace
46
47
void BOTAN_FN_ISA_SHANI SHA_256::compress_digest_x86(digest_type& digest,
48
                                                     std::span<const uint8_t> input_span,
49
0
                                                     size_t blocks) {
50
0
   alignas(64) static const uint32_t K[] = {
51
0
      0x428A2F98, 0x71374491, 0xB5C0FBCF, 0xE9B5DBA5, 0x3956C25B, 0x59F111F1, 0x923F82A4, 0xAB1C5ED5,
52
0
      0xD807AA98, 0x12835B01, 0x243185BE, 0x550C7DC3, 0x72BE5D74, 0x80DEB1FE, 0x9BDC06A7, 0xC19BF174,
53
0
      0xE49B69C1, 0xEFBE4786, 0x0FC19DC6, 0x240CA1CC, 0x2DE92C6F, 0x4A7484AA, 0x5CB0A9DC, 0x76F988DA,
54
0
      0x983E5152, 0xA831C66D, 0xB00327C8, 0xBF597FC7, 0xC6E00BF3, 0xD5A79147, 0x06CA6351, 0x14292967,
55
0
      0x27B70A85, 0x2E1B2138, 0x4D2C6DFC, 0x53380D13, 0x650A7354, 0x766A0ABB, 0x81C2C92E, 0x92722C85,
56
0
      0xA2BFE8A1, 0xA81A664B, 0xC24B8B70, 0xC76C51A3, 0xD192E819, 0xD6990624, 0xF40E3585, 0x106AA070,
57
0
      0x19A4C116, 0x1E376C08, 0x2748774C, 0x34B0BCB5, 0x391C0CB3, 0x4ED8AA4A, 0x5B9CCA4F, 0x682E6FF3,
58
0
      0x748F82EE, 0x78A5636F, 0x84C87814, 0x8CC70208, 0x90BEFFFA, 0xA4506CEB, 0xBEF9A3F7, 0xC67178F2,
59
0
   };
60
61
0
   const uint8_t* input = input_span.data();
62
63
0
   SIMD_4x32 S0 = SIMD_4x32::load_le(&digest[0]);
64
0
   SIMD_4x32 S1 = SIMD_4x32::load_le(&digest[4]);
65
66
0
   sha256_permute_state(S0, S1);
67
68
0
   while(blocks > 0) {
69
0
      const auto S0_SAVE = S0;
70
0
      const auto S1_SAVE = S1;
71
72
0
      auto W0 = SIMD_4x32::load_be(input);
73
0
      auto W1 = SIMD_4x32::load_be(input + 16);
74
0
      auto W2 = SIMD_4x32::load_be(input + 32);
75
0
      auto W3 = SIMD_4x32::load_be(input + 48);
76
77
0
      sha256_rnds4(S0, S1, W0, SIMD_4x32::load_le(&K[0]));
78
0
      sha256_rnds4(S0, S1, W1, SIMD_4x32::load_le(&K[4]));
79
0
      sha256_rnds4(S0, S1, W2, SIMD_4x32::load_le(&K[8]));
80
0
      sha256_rnds4(S0, S1, W3, SIMD_4x32::load_le(&K[12]));
81
82
0
      W0 = SIMD_4x32(_mm_sha256msg1_epu32(W0.raw(), W1.raw()));
83
0
      W1 = SIMD_4x32(_mm_sha256msg1_epu32(W1.raw(), W2.raw()));
84
85
0
      for(size_t r = 4; r != 16; r += 4) {
86
0
         sha256_msg_exp(W2, W3, W0);
87
0
         sha256_rnds4(S0, S1, W0, SIMD_4x32::load_le(&K[4 * (r + 0)]));
88
89
0
         sha256_msg_exp(W3, W0, W1);
90
0
         sha256_rnds4(S0, S1, W1, SIMD_4x32::load_le(&K[4 * (r + 1)]));
91
92
0
         sha256_msg_exp(W0, W1, W2);
93
0
         sha256_rnds4(S0, S1, W2, SIMD_4x32::load_le(&K[4 * (r + 2)]));
94
95
0
         sha256_msg_exp(W1, W2, W3);
96
0
         sha256_rnds4(S0, S1, W3, SIMD_4x32::load_le(&K[4 * (r + 3)]));
97
0
      }
98
99
      // Add values back to state
100
0
      S0 += S0_SAVE;
101
0
      S1 += S1_SAVE;
102
103
0
      input += 64;
104
0
      blocks--;
105
0
   }
106
107
0
   sha256_permute_state(S1, S0);
108
109
0
   S0.store_le(&digest[0]);
110
0
   S1.store_le(&digest[4]);
111
0
}
112
113
}  // namespace Botan