Coverage Report

Created: 2022-06-23 06:44

/src/botan/src/lib/hash/sha2_32/sha2_32_bmi2/sha2_32_bmi2.cpp
Line
Count
Source
1
/*
2
* (C) 2018 Jack Lloyd
3
*
4
* Botan is released under the Simplified BSD License (see license.txt)
5
*/
6
7
#include <botan/internal/sha2_32.h>
8
#include <botan/internal/loadstor.h>
9
#include <botan/internal/rotate.h>
10
#include <botan/internal/bit_ops.h>
11
12
namespace Botan {
13
14
/*
15
Your eyes do not decieve you; this is currently just a copy of the
16
baseline SHA-256 implementation. Because we compile it with BMI2
17
flags, GCC and Clang use the BMI2 instructions without further help.
18
19
Likely instruction scheduling could be improved by using inline asm.
20
*/
21
22
58.9M
#define SHA2_32_F(A, B, C, D, E, F, G, H, M1, M2, M3, M4, magic) do {   \
23
58.9M
   uint32_t A_rho = rotr<2>(A) ^ rotr<13>(A) ^ rotr<22>(A);             \
24
58.9M
   uint32_t E_rho = rotr<6>(E) ^ rotr<11>(E) ^ rotr<25>(E);             \
25
58.9M
   uint32_t M2_sigma = rotr<17>(M2) ^ rotr<19>(M2) ^ (M2 >> 10);        \
26
58.9M
   uint32_t M4_sigma = rotr<7>(M4) ^ rotr<18>(M4) ^ (M4 >> 3);          \
27
58.9M
   H += magic + E_rho + choose(E, F, G) + M1;                           \
28
58.9M
   D += H;                                                              \
29
58.9M
   H += A_rho + majority(A, B, C);                                      \
30
58.9M
   M1 += M2_sigma + M3 + M4_sigma;                                      \
31
58.9M
   } while(0);
32
33
void SHA_256::compress_digest_x86_bmi2(secure_vector<uint32_t>& digest,
34
                                       const uint8_t input[],
35
                                       size_t blocks)
36
749k
   {
37
749k
   uint32_t A = digest[0], B = digest[1], C = digest[2],
38
749k
            D = digest[3], E = digest[4], F = digest[5],
39
749k
            G = digest[6], H = digest[7];
40
41
1.67M
   for(size_t i = 0; i != blocks; ++i)
42
920k
      {
43
920k
      uint32_t W00 = load_be<uint32_t>(input,  0);
44
920k
      uint32_t W01 = load_be<uint32_t>(input,  1);
45
920k
      uint32_t W02 = load_be<uint32_t>(input,  2);
46
920k
      uint32_t W03 = load_be<uint32_t>(input,  3);
47
920k
      uint32_t W04 = load_be<uint32_t>(input,  4);
48
920k
      uint32_t W05 = load_be<uint32_t>(input,  5);
49
920k
      uint32_t W06 = load_be<uint32_t>(input,  6);
50
920k
      uint32_t W07 = load_be<uint32_t>(input,  7);
51
920k
      uint32_t W08 = load_be<uint32_t>(input,  8);
52
920k
      uint32_t W09 = load_be<uint32_t>(input,  9);
53
920k
      uint32_t W10 = load_be<uint32_t>(input, 10);
54
920k
      uint32_t W11 = load_be<uint32_t>(input, 11);
55
920k
      uint32_t W12 = load_be<uint32_t>(input, 12);
56
920k
      uint32_t W13 = load_be<uint32_t>(input, 13);
57
920k
      uint32_t W14 = load_be<uint32_t>(input, 14);
58
920k
      uint32_t W15 = load_be<uint32_t>(input, 15);
59
60
920k
      SHA2_32_F(A, B, C, D, E, F, G, H, W00, W14, W09, W01, 0x428A2F98);
61
920k
      SHA2_32_F(H, A, B, C, D, E, F, G, W01, W15, W10, W02, 0x71374491);
62
920k
      SHA2_32_F(G, H, A, B, C, D, E, F, W02, W00, W11, W03, 0xB5C0FBCF);
63
920k
      SHA2_32_F(F, G, H, A, B, C, D, E, W03, W01, W12, W04, 0xE9B5DBA5);
64
920k
      SHA2_32_F(E, F, G, H, A, B, C, D, W04, W02, W13, W05, 0x3956C25B);
65
920k
      SHA2_32_F(D, E, F, G, H, A, B, C, W05, W03, W14, W06, 0x59F111F1);
66
920k
      SHA2_32_F(C, D, E, F, G, H, A, B, W06, W04, W15, W07, 0x923F82A4);
67
920k
      SHA2_32_F(B, C, D, E, F, G, H, A, W07, W05, W00, W08, 0xAB1C5ED5);
68
920k
      SHA2_32_F(A, B, C, D, E, F, G, H, W08, W06, W01, W09, 0xD807AA98);
69
920k
      SHA2_32_F(H, A, B, C, D, E, F, G, W09, W07, W02, W10, 0x12835B01);
70
920k
      SHA2_32_F(G, H, A, B, C, D, E, F, W10, W08, W03, W11, 0x243185BE);
71
920k
      SHA2_32_F(F, G, H, A, B, C, D, E, W11, W09, W04, W12, 0x550C7DC3);
72
920k
      SHA2_32_F(E, F, G, H, A, B, C, D, W12, W10, W05, W13, 0x72BE5D74);
73
920k
      SHA2_32_F(D, E, F, G, H, A, B, C, W13, W11, W06, W14, 0x80DEB1FE);
74
920k
      SHA2_32_F(C, D, E, F, G, H, A, B, W14, W12, W07, W15, 0x9BDC06A7);
75
920k
      SHA2_32_F(B, C, D, E, F, G, H, A, W15, W13, W08, W00, 0xC19BF174);
76
77
920k
      SHA2_32_F(A, B, C, D, E, F, G, H, W00, W14, W09, W01, 0xE49B69C1);
78
920k
      SHA2_32_F(H, A, B, C, D, E, F, G, W01, W15, W10, W02, 0xEFBE4786);
79
920k
      SHA2_32_F(G, H, A, B, C, D, E, F, W02, W00, W11, W03, 0x0FC19DC6);
80
920k
      SHA2_32_F(F, G, H, A, B, C, D, E, W03, W01, W12, W04, 0x240CA1CC);
81
920k
      SHA2_32_F(E, F, G, H, A, B, C, D, W04, W02, W13, W05, 0x2DE92C6F);
82
920k
      SHA2_32_F(D, E, F, G, H, A, B, C, W05, W03, W14, W06, 0x4A7484AA);
83
920k
      SHA2_32_F(C, D, E, F, G, H, A, B, W06, W04, W15, W07, 0x5CB0A9DC);
84
920k
      SHA2_32_F(B, C, D, E, F, G, H, A, W07, W05, W00, W08, 0x76F988DA);
85
920k
      SHA2_32_F(A, B, C, D, E, F, G, H, W08, W06, W01, W09, 0x983E5152);
86
920k
      SHA2_32_F(H, A, B, C, D, E, F, G, W09, W07, W02, W10, 0xA831C66D);
87
920k
      SHA2_32_F(G, H, A, B, C, D, E, F, W10, W08, W03, W11, 0xB00327C8);
88
920k
      SHA2_32_F(F, G, H, A, B, C, D, E, W11, W09, W04, W12, 0xBF597FC7);
89
920k
      SHA2_32_F(E, F, G, H, A, B, C, D, W12, W10, W05, W13, 0xC6E00BF3);
90
920k
      SHA2_32_F(D, E, F, G, H, A, B, C, W13, W11, W06, W14, 0xD5A79147);
91
920k
      SHA2_32_F(C, D, E, F, G, H, A, B, W14, W12, W07, W15, 0x06CA6351);
92
920k
      SHA2_32_F(B, C, D, E, F, G, H, A, W15, W13, W08, W00, 0x14292967);
93
94
920k
      SHA2_32_F(A, B, C, D, E, F, G, H, W00, W14, W09, W01, 0x27B70A85);
95
920k
      SHA2_32_F(H, A, B, C, D, E, F, G, W01, W15, W10, W02, 0x2E1B2138);
96
920k
      SHA2_32_F(G, H, A, B, C, D, E, F, W02, W00, W11, W03, 0x4D2C6DFC);
97
920k
      SHA2_32_F(F, G, H, A, B, C, D, E, W03, W01, W12, W04, 0x53380D13);
98
920k
      SHA2_32_F(E, F, G, H, A, B, C, D, W04, W02, W13, W05, 0x650A7354);
99
920k
      SHA2_32_F(D, E, F, G, H, A, B, C, W05, W03, W14, W06, 0x766A0ABB);
100
920k
      SHA2_32_F(C, D, E, F, G, H, A, B, W06, W04, W15, W07, 0x81C2C92E);
101
920k
      SHA2_32_F(B, C, D, E, F, G, H, A, W07, W05, W00, W08, 0x92722C85);
102
920k
      SHA2_32_F(A, B, C, D, E, F, G, H, W08, W06, W01, W09, 0xA2BFE8A1);
103
920k
      SHA2_32_F(H, A, B, C, D, E, F, G, W09, W07, W02, W10, 0xA81A664B);
104
920k
      SHA2_32_F(G, H, A, B, C, D, E, F, W10, W08, W03, W11, 0xC24B8B70);
105
920k
      SHA2_32_F(F, G, H, A, B, C, D, E, W11, W09, W04, W12, 0xC76C51A3);
106
920k
      SHA2_32_F(E, F, G, H, A, B, C, D, W12, W10, W05, W13, 0xD192E819);
107
920k
      SHA2_32_F(D, E, F, G, H, A, B, C, W13, W11, W06, W14, 0xD6990624);
108
920k
      SHA2_32_F(C, D, E, F, G, H, A, B, W14, W12, W07, W15, 0xF40E3585);
109
920k
      SHA2_32_F(B, C, D, E, F, G, H, A, W15, W13, W08, W00, 0x106AA070);
110
111
920k
      SHA2_32_F(A, B, C, D, E, F, G, H, W00, W14, W09, W01, 0x19A4C116);
112
920k
      SHA2_32_F(H, A, B, C, D, E, F, G, W01, W15, W10, W02, 0x1E376C08);
113
920k
      SHA2_32_F(G, H, A, B, C, D, E, F, W02, W00, W11, W03, 0x2748774C);
114
920k
      SHA2_32_F(F, G, H, A, B, C, D, E, W03, W01, W12, W04, 0x34B0BCB5);
115
920k
      SHA2_32_F(E, F, G, H, A, B, C, D, W04, W02, W13, W05, 0x391C0CB3);
116
920k
      SHA2_32_F(D, E, F, G, H, A, B, C, W05, W03, W14, W06, 0x4ED8AA4A);
117
920k
      SHA2_32_F(C, D, E, F, G, H, A, B, W06, W04, W15, W07, 0x5B9CCA4F);
118
920k
      SHA2_32_F(B, C, D, E, F, G, H, A, W07, W05, W00, W08, 0x682E6FF3);
119
920k
      SHA2_32_F(A, B, C, D, E, F, G, H, W08, W06, W01, W09, 0x748F82EE);
120
920k
      SHA2_32_F(H, A, B, C, D, E, F, G, W09, W07, W02, W10, 0x78A5636F);
121
920k
      SHA2_32_F(G, H, A, B, C, D, E, F, W10, W08, W03, W11, 0x84C87814);
122
920k
      SHA2_32_F(F, G, H, A, B, C, D, E, W11, W09, W04, W12, 0x8CC70208);
123
920k
      SHA2_32_F(E, F, G, H, A, B, C, D, W12, W10, W05, W13, 0x90BEFFFA);
124
920k
      SHA2_32_F(D, E, F, G, H, A, B, C, W13, W11, W06, W14, 0xA4506CEB);
125
920k
      SHA2_32_F(C, D, E, F, G, H, A, B, W14, W12, W07, W15, 0xBEF9A3F7);
126
920k
      SHA2_32_F(B, C, D, E, F, G, H, A, W15, W13, W08, W00, 0xC67178F2);
127
128
920k
      A = (digest[0] += A);
129
920k
      B = (digest[1] += B);
130
920k
      C = (digest[2] += C);
131
920k
      D = (digest[3] += D);
132
920k
      E = (digest[4] += E);
133
920k
      F = (digest[5] += F);
134
920k
      G = (digest[6] += G);
135
920k
      H = (digest[7] += H);
136
137
920k
      input += 64;
138
920k
      }
139
749k
   }
140
141
}