Coverage Report

Created: 2020-11-21 08:34

/src/botan/src/lib/hash/sha2_32/sha2_32_bmi2/sha2_32_bmi2.cpp
Line
Count
Source
1
/*
2
* (C) 2018 Jack Lloyd
3
*
4
* Botan is released under the Simplified BSD License (see license.txt)
5
*/
6
7
#include <botan/internal/sha2_32.h>
8
#include <botan/internal/loadstor.h>
9
#include <botan/internal/rotate.h>
10
11
namespace Botan {
12
13
/*
14
Your eyes do not decieve you; this is currently just a copy of the
15
baseline SHA-256 implementation. Because we compile it with BMI2
16
flags, GCC and Clang use the BMI2 instructions without further help.
17
18
Likely instruction scheduling could be improved by using inline asm.
19
*/
20
21
58.6M
#define SHA2_32_F(A, B, C, D, E, F, G, H, M1, M2, M3, M4, magic) do {   \
22
58.6M
   uint32_t A_rho = rotr<2>(A) ^ rotr<13>(A) ^ rotr<22>(A);             \
23
58.6M
   uint32_t E_rho = rotr<6>(E) ^ rotr<11>(E) ^ rotr<25>(E);             \
24
58.6M
   uint32_t M2_sigma = rotr<17>(M2) ^ rotr<19>(M2) ^ (M2 >> 10);        \
25
58.6M
   uint32_t M4_sigma = rotr<7>(M4) ^ rotr<18>(M4) ^ (M4 >> 3);          \
26
58.6M
   H += magic + E_rho + ((E & F) ^ (~E & G)) + M1;                      \
27
58.6M
   D += H;                                                              \
28
58.6M
   H += A_rho + ((A & B) | ((A | B) & C));                              \
29
58.6M
   M1 += M2_sigma + M3 + M4_sigma;                                      \
30
58.6M
   } while(0);
31
32
void SHA_256::compress_digest_x86_bmi2(secure_vector<uint32_t>& digest,
33
                                       const uint8_t input[],
34
                                       size_t blocks)
35
730k
   {
36
730k
   uint32_t A = digest[0], B = digest[1], C = digest[2],
37
730k
            D = digest[3], E = digest[4], F = digest[5],
38
730k
            G = digest[6], H = digest[7];
39
40
1.64M
   for(size_t i = 0; i != blocks; ++i)
41
916k
      {
42
916k
      uint32_t W00 = load_be<uint32_t>(input,  0);
43
916k
      uint32_t W01 = load_be<uint32_t>(input,  1);
44
916k
      uint32_t W02 = load_be<uint32_t>(input,  2);
45
916k
      uint32_t W03 = load_be<uint32_t>(input,  3);
46
916k
      uint32_t W04 = load_be<uint32_t>(input,  4);
47
916k
      uint32_t W05 = load_be<uint32_t>(input,  5);
48
916k
      uint32_t W06 = load_be<uint32_t>(input,  6);
49
916k
      uint32_t W07 = load_be<uint32_t>(input,  7);
50
916k
      uint32_t W08 = load_be<uint32_t>(input,  8);
51
916k
      uint32_t W09 = load_be<uint32_t>(input,  9);
52
916k
      uint32_t W10 = load_be<uint32_t>(input, 10);
53
916k
      uint32_t W11 = load_be<uint32_t>(input, 11);
54
916k
      uint32_t W12 = load_be<uint32_t>(input, 12);
55
916k
      uint32_t W13 = load_be<uint32_t>(input, 13);
56
916k
      uint32_t W14 = load_be<uint32_t>(input, 14);
57
916k
      uint32_t W15 = load_be<uint32_t>(input, 15);
58
59
916k
      SHA2_32_F(A, B, C, D, E, F, G, H, W00, W14, W09, W01, 0x428A2F98);
60
916k
      SHA2_32_F(H, A, B, C, D, E, F, G, W01, W15, W10, W02, 0x71374491);
61
916k
      SHA2_32_F(G, H, A, B, C, D, E, F, W02, W00, W11, W03, 0xB5C0FBCF);
62
916k
      SHA2_32_F(F, G, H, A, B, C, D, E, W03, W01, W12, W04, 0xE9B5DBA5);
63
916k
      SHA2_32_F(E, F, G, H, A, B, C, D, W04, W02, W13, W05, 0x3956C25B);
64
916k
      SHA2_32_F(D, E, F, G, H, A, B, C, W05, W03, W14, W06, 0x59F111F1);
65
916k
      SHA2_32_F(C, D, E, F, G, H, A, B, W06, W04, W15, W07, 0x923F82A4);
66
916k
      SHA2_32_F(B, C, D, E, F, G, H, A, W07, W05, W00, W08, 0xAB1C5ED5);
67
916k
      SHA2_32_F(A, B, C, D, E, F, G, H, W08, W06, W01, W09, 0xD807AA98);
68
916k
      SHA2_32_F(H, A, B, C, D, E, F, G, W09, W07, W02, W10, 0x12835B01);
69
916k
      SHA2_32_F(G, H, A, B, C, D, E, F, W10, W08, W03, W11, 0x243185BE);
70
916k
      SHA2_32_F(F, G, H, A, B, C, D, E, W11, W09, W04, W12, 0x550C7DC3);
71
916k
      SHA2_32_F(E, F, G, H, A, B, C, D, W12, W10, W05, W13, 0x72BE5D74);
72
916k
      SHA2_32_F(D, E, F, G, H, A, B, C, W13, W11, W06, W14, 0x80DEB1FE);
73
916k
      SHA2_32_F(C, D, E, F, G, H, A, B, W14, W12, W07, W15, 0x9BDC06A7);
74
916k
      SHA2_32_F(B, C, D, E, F, G, H, A, W15, W13, W08, W00, 0xC19BF174);
75
76
916k
      SHA2_32_F(A, B, C, D, E, F, G, H, W00, W14, W09, W01, 0xE49B69C1);
77
916k
      SHA2_32_F(H, A, B, C, D, E, F, G, W01, W15, W10, W02, 0xEFBE4786);
78
916k
      SHA2_32_F(G, H, A, B, C, D, E, F, W02, W00, W11, W03, 0x0FC19DC6);
79
916k
      SHA2_32_F(F, G, H, A, B, C, D, E, W03, W01, W12, W04, 0x240CA1CC);
80
916k
      SHA2_32_F(E, F, G, H, A, B, C, D, W04, W02, W13, W05, 0x2DE92C6F);
81
916k
      SHA2_32_F(D, E, F, G, H, A, B, C, W05, W03, W14, W06, 0x4A7484AA);
82
916k
      SHA2_32_F(C, D, E, F, G, H, A, B, W06, W04, W15, W07, 0x5CB0A9DC);
83
916k
      SHA2_32_F(B, C, D, E, F, G, H, A, W07, W05, W00, W08, 0x76F988DA);
84
916k
      SHA2_32_F(A, B, C, D, E, F, G, H, W08, W06, W01, W09, 0x983E5152);
85
916k
      SHA2_32_F(H, A, B, C, D, E, F, G, W09, W07, W02, W10, 0xA831C66D);
86
916k
      SHA2_32_F(G, H, A, B, C, D, E, F, W10, W08, W03, W11, 0xB00327C8);
87
916k
      SHA2_32_F(F, G, H, A, B, C, D, E, W11, W09, W04, W12, 0xBF597FC7);
88
916k
      SHA2_32_F(E, F, G, H, A, B, C, D, W12, W10, W05, W13, 0xC6E00BF3);
89
916k
      SHA2_32_F(D, E, F, G, H, A, B, C, W13, W11, W06, W14, 0xD5A79147);
90
916k
      SHA2_32_F(C, D, E, F, G, H, A, B, W14, W12, W07, W15, 0x06CA6351);
91
916k
      SHA2_32_F(B, C, D, E, F, G, H, A, W15, W13, W08, W00, 0x14292967);
92
93
916k
      SHA2_32_F(A, B, C, D, E, F, G, H, W00, W14, W09, W01, 0x27B70A85);
94
916k
      SHA2_32_F(H, A, B, C, D, E, F, G, W01, W15, W10, W02, 0x2E1B2138);
95
916k
      SHA2_32_F(G, H, A, B, C, D, E, F, W02, W00, W11, W03, 0x4D2C6DFC);
96
916k
      SHA2_32_F(F, G, H, A, B, C, D, E, W03, W01, W12, W04, 0x53380D13);
97
916k
      SHA2_32_F(E, F, G, H, A, B, C, D, W04, W02, W13, W05, 0x650A7354);
98
916k
      SHA2_32_F(D, E, F, G, H, A, B, C, W05, W03, W14, W06, 0x766A0ABB);
99
916k
      SHA2_32_F(C, D, E, F, G, H, A, B, W06, W04, W15, W07, 0x81C2C92E);
100
916k
      SHA2_32_F(B, C, D, E, F, G, H, A, W07, W05, W00, W08, 0x92722C85);
101
916k
      SHA2_32_F(A, B, C, D, E, F, G, H, W08, W06, W01, W09, 0xA2BFE8A1);
102
916k
      SHA2_32_F(H, A, B, C, D, E, F, G, W09, W07, W02, W10, 0xA81A664B);
103
916k
      SHA2_32_F(G, H, A, B, C, D, E, F, W10, W08, W03, W11, 0xC24B8B70);
104
916k
      SHA2_32_F(F, G, H, A, B, C, D, E, W11, W09, W04, W12, 0xC76C51A3);
105
916k
      SHA2_32_F(E, F, G, H, A, B, C, D, W12, W10, W05, W13, 0xD192E819);
106
916k
      SHA2_32_F(D, E, F, G, H, A, B, C, W13, W11, W06, W14, 0xD6990624);
107
916k
      SHA2_32_F(C, D, E, F, G, H, A, B, W14, W12, W07, W15, 0xF40E3585);
108
916k
      SHA2_32_F(B, C, D, E, F, G, H, A, W15, W13, W08, W00, 0x106AA070);
109
110
916k
      SHA2_32_F(A, B, C, D, E, F, G, H, W00, W14, W09, W01, 0x19A4C116);
111
916k
      SHA2_32_F(H, A, B, C, D, E, F, G, W01, W15, W10, W02, 0x1E376C08);
112
916k
      SHA2_32_F(G, H, A, B, C, D, E, F, W02, W00, W11, W03, 0x2748774C);
113
916k
      SHA2_32_F(F, G, H, A, B, C, D, E, W03, W01, W12, W04, 0x34B0BCB5);
114
916k
      SHA2_32_F(E, F, G, H, A, B, C, D, W04, W02, W13, W05, 0x391C0CB3);
115
916k
      SHA2_32_F(D, E, F, G, H, A, B, C, W05, W03, W14, W06, 0x4ED8AA4A);
116
916k
      SHA2_32_F(C, D, E, F, G, H, A, B, W06, W04, W15, W07, 0x5B9CCA4F);
117
916k
      SHA2_32_F(B, C, D, E, F, G, H, A, W07, W05, W00, W08, 0x682E6FF3);
118
916k
      SHA2_32_F(A, B, C, D, E, F, G, H, W08, W06, W01, W09, 0x748F82EE);
119
916k
      SHA2_32_F(H, A, B, C, D, E, F, G, W09, W07, W02, W10, 0x78A5636F);
120
916k
      SHA2_32_F(G, H, A, B, C, D, E, F, W10, W08, W03, W11, 0x84C87814);
121
916k
      SHA2_32_F(F, G, H, A, B, C, D, E, W11, W09, W04, W12, 0x8CC70208);
122
916k
      SHA2_32_F(E, F, G, H, A, B, C, D, W12, W10, W05, W13, 0x90BEFFFA);
123
916k
      SHA2_32_F(D, E, F, G, H, A, B, C, W13, W11, W06, W14, 0xA4506CEB);
124
916k
      SHA2_32_F(C, D, E, F, G, H, A, B, W14, W12, W07, W15, 0xBEF9A3F7);
125
916k
      SHA2_32_F(B, C, D, E, F, G, H, A, W15, W13, W08, W00, 0xC67178F2);
126
127
916k
      A = (digest[0] += A);
128
916k
      B = (digest[1] += B);
129
916k
      C = (digest[2] += C);
130
916k
      D = (digest[3] += D);
131
916k
      E = (digest[4] += E);
132
916k
      F = (digest[5] += F);
133
916k
      G = (digest[6] += G);
134
916k
      H = (digest[7] += H);
135
136
916k
      input += 64;
137
916k
      }
138
730k
   }
139
140
}