Coverage Report

Created: 2020-02-14 15:38

/src/botan/src/lib/hash/sha2_32/sha2_32_bmi2/sha2_32_bmi2.cpp
Line
Count
Source
1
/*
2
* (C) 2018 Jack Lloyd
3
*
4
* Botan is released under the Simplified BSD License (see license.txt)
5
*/
6
7
#include <botan/sha2_32.h>
8
#include <botan/loadstor.h>
9
#include <botan/rotate.h>
10
11
namespace Botan {
12
13
/*
14
Your eyes do not decieve you; this is currently just a copy of the
15
baseline SHA-256 implementation. Because we compile it with BMI2
16
flags, GCC and Clang use the BMI2 instructions without further help.
17
18
Likely instruction scheduling could be improved by using inline asm.
19
*/
20
21
51.2M
#define SHA2_32_F(A, B, C, D, E, F, G, H, M1, M2, M3, M4, magic) do {   \
22
51.2M
   uint32_t A_rho = rotr<2>(A) ^ rotr<13>(A) ^ rotr<22>(A);             \
23
51.2M
   uint32_t E_rho = rotr<6>(E) ^ rotr<11>(E) ^ rotr<25>(E);             \
24
51.2M
   uint32_t M2_sigma = rotr<17>(M2) ^ rotr<19>(M2) ^ (M2 >> 10);        \
25
51.2M
   uint32_t M4_sigma = rotr<7>(M4) ^ rotr<18>(M4) ^ (M4 >> 3);          \
26
51.2M
   H += magic + E_rho + ((E & F) ^ (~E & G)) + M1;                      \
27
51.2M
   D += H;                                                              \
28
51.2M
   H += A_rho + ((A & B) | ((A | B) & C));                              \
29
51.2M
   M1 += M2_sigma + M3 + M4_sigma;                                      \
30
51.2M
   } while(0);
31
32
void SHA_256::compress_digest_x86_bmi2(secure_vector<uint32_t>& digest,
33
                                       const uint8_t input[],
34
                                       size_t blocks)
35
629k
   {
36
629k
   uint32_t A = digest[0], B = digest[1], C = digest[2],
37
629k
            D = digest[3], E = digest[4], F = digest[5],
38
629k
            G = digest[6], H = digest[7];
39
629k
40
1.43M
   for(size_t i = 0; i != blocks; ++i)
41
801k
      {
42
801k
      uint32_t W00 = load_be<uint32_t>(input,  0);
43
801k
      uint32_t W01 = load_be<uint32_t>(input,  1);
44
801k
      uint32_t W02 = load_be<uint32_t>(input,  2);
45
801k
      uint32_t W03 = load_be<uint32_t>(input,  3);
46
801k
      uint32_t W04 = load_be<uint32_t>(input,  4);
47
801k
      uint32_t W05 = load_be<uint32_t>(input,  5);
48
801k
      uint32_t W06 = load_be<uint32_t>(input,  6);
49
801k
      uint32_t W07 = load_be<uint32_t>(input,  7);
50
801k
      uint32_t W08 = load_be<uint32_t>(input,  8);
51
801k
      uint32_t W09 = load_be<uint32_t>(input,  9);
52
801k
      uint32_t W10 = load_be<uint32_t>(input, 10);
53
801k
      uint32_t W11 = load_be<uint32_t>(input, 11);
54
801k
      uint32_t W12 = load_be<uint32_t>(input, 12);
55
801k
      uint32_t W13 = load_be<uint32_t>(input, 13);
56
801k
      uint32_t W14 = load_be<uint32_t>(input, 14);
57
801k
      uint32_t W15 = load_be<uint32_t>(input, 15);
58
801k
59
801k
      SHA2_32_F(A, B, C, D, E, F, G, H, W00, W14, W09, W01, 0x428A2F98);
60
801k
      SHA2_32_F(H, A, B, C, D, E, F, G, W01, W15, W10, W02, 0x71374491);
61
801k
      SHA2_32_F(G, H, A, B, C, D, E, F, W02, W00, W11, W03, 0xB5C0FBCF);
62
801k
      SHA2_32_F(F, G, H, A, B, C, D, E, W03, W01, W12, W04, 0xE9B5DBA5);
63
801k
      SHA2_32_F(E, F, G, H, A, B, C, D, W04, W02, W13, W05, 0x3956C25B);
64
801k
      SHA2_32_F(D, E, F, G, H, A, B, C, W05, W03, W14, W06, 0x59F111F1);
65
801k
      SHA2_32_F(C, D, E, F, G, H, A, B, W06, W04, W15, W07, 0x923F82A4);
66
801k
      SHA2_32_F(B, C, D, E, F, G, H, A, W07, W05, W00, W08, 0xAB1C5ED5);
67
801k
      SHA2_32_F(A, B, C, D, E, F, G, H, W08, W06, W01, W09, 0xD807AA98);
68
801k
      SHA2_32_F(H, A, B, C, D, E, F, G, W09, W07, W02, W10, 0x12835B01);
69
801k
      SHA2_32_F(G, H, A, B, C, D, E, F, W10, W08, W03, W11, 0x243185BE);
70
801k
      SHA2_32_F(F, G, H, A, B, C, D, E, W11, W09, W04, W12, 0x550C7DC3);
71
801k
      SHA2_32_F(E, F, G, H, A, B, C, D, W12, W10, W05, W13, 0x72BE5D74);
72
801k
      SHA2_32_F(D, E, F, G, H, A, B, C, W13, W11, W06, W14, 0x80DEB1FE);
73
801k
      SHA2_32_F(C, D, E, F, G, H, A, B, W14, W12, W07, W15, 0x9BDC06A7);
74
801k
      SHA2_32_F(B, C, D, E, F, G, H, A, W15, W13, W08, W00, 0xC19BF174);
75
801k
76
801k
      SHA2_32_F(A, B, C, D, E, F, G, H, W00, W14, W09, W01, 0xE49B69C1);
77
801k
      SHA2_32_F(H, A, B, C, D, E, F, G, W01, W15, W10, W02, 0xEFBE4786);
78
801k
      SHA2_32_F(G, H, A, B, C, D, E, F, W02, W00, W11, W03, 0x0FC19DC6);
79
801k
      SHA2_32_F(F, G, H, A, B, C, D, E, W03, W01, W12, W04, 0x240CA1CC);
80
801k
      SHA2_32_F(E, F, G, H, A, B, C, D, W04, W02, W13, W05, 0x2DE92C6F);
81
801k
      SHA2_32_F(D, E, F, G, H, A, B, C, W05, W03, W14, W06, 0x4A7484AA);
82
801k
      SHA2_32_F(C, D, E, F, G, H, A, B, W06, W04, W15, W07, 0x5CB0A9DC);
83
801k
      SHA2_32_F(B, C, D, E, F, G, H, A, W07, W05, W00, W08, 0x76F988DA);
84
801k
      SHA2_32_F(A, B, C, D, E, F, G, H, W08, W06, W01, W09, 0x983E5152);
85
801k
      SHA2_32_F(H, A, B, C, D, E, F, G, W09, W07, W02, W10, 0xA831C66D);
86
801k
      SHA2_32_F(G, H, A, B, C, D, E, F, W10, W08, W03, W11, 0xB00327C8);
87
801k
      SHA2_32_F(F, G, H, A, B, C, D, E, W11, W09, W04, W12, 0xBF597FC7);
88
801k
      SHA2_32_F(E, F, G, H, A, B, C, D, W12, W10, W05, W13, 0xC6E00BF3);
89
801k
      SHA2_32_F(D, E, F, G, H, A, B, C, W13, W11, W06, W14, 0xD5A79147);
90
801k
      SHA2_32_F(C, D, E, F, G, H, A, B, W14, W12, W07, W15, 0x06CA6351);
91
801k
      SHA2_32_F(B, C, D, E, F, G, H, A, W15, W13, W08, W00, 0x14292967);
92
801k
93
801k
      SHA2_32_F(A, B, C, D, E, F, G, H, W00, W14, W09, W01, 0x27B70A85);
94
801k
      SHA2_32_F(H, A, B, C, D, E, F, G, W01, W15, W10, W02, 0x2E1B2138);
95
801k
      SHA2_32_F(G, H, A, B, C, D, E, F, W02, W00, W11, W03, 0x4D2C6DFC);
96
801k
      SHA2_32_F(F, G, H, A, B, C, D, E, W03, W01, W12, W04, 0x53380D13);
97
801k
      SHA2_32_F(E, F, G, H, A, B, C, D, W04, W02, W13, W05, 0x650A7354);
98
801k
      SHA2_32_F(D, E, F, G, H, A, B, C, W05, W03, W14, W06, 0x766A0ABB);
99
801k
      SHA2_32_F(C, D, E, F, G, H, A, B, W06, W04, W15, W07, 0x81C2C92E);
100
801k
      SHA2_32_F(B, C, D, E, F, G, H, A, W07, W05, W00, W08, 0x92722C85);
101
801k
      SHA2_32_F(A, B, C, D, E, F, G, H, W08, W06, W01, W09, 0xA2BFE8A1);
102
801k
      SHA2_32_F(H, A, B, C, D, E, F, G, W09, W07, W02, W10, 0xA81A664B);
103
801k
      SHA2_32_F(G, H, A, B, C, D, E, F, W10, W08, W03, W11, 0xC24B8B70);
104
801k
      SHA2_32_F(F, G, H, A, B, C, D, E, W11, W09, W04, W12, 0xC76C51A3);
105
801k
      SHA2_32_F(E, F, G, H, A, B, C, D, W12, W10, W05, W13, 0xD192E819);
106
801k
      SHA2_32_F(D, E, F, G, H, A, B, C, W13, W11, W06, W14, 0xD6990624);
107
801k
      SHA2_32_F(C, D, E, F, G, H, A, B, W14, W12, W07, W15, 0xF40E3585);
108
801k
      SHA2_32_F(B, C, D, E, F, G, H, A, W15, W13, W08, W00, 0x106AA070);
109
801k
110
801k
      SHA2_32_F(A, B, C, D, E, F, G, H, W00, W14, W09, W01, 0x19A4C116);
111
801k
      SHA2_32_F(H, A, B, C, D, E, F, G, W01, W15, W10, W02, 0x1E376C08);
112
801k
      SHA2_32_F(G, H, A, B, C, D, E, F, W02, W00, W11, W03, 0x2748774C);
113
801k
      SHA2_32_F(F, G, H, A, B, C, D, E, W03, W01, W12, W04, 0x34B0BCB5);
114
801k
      SHA2_32_F(E, F, G, H, A, B, C, D, W04, W02, W13, W05, 0x391C0CB3);
115
801k
      SHA2_32_F(D, E, F, G, H, A, B, C, W05, W03, W14, W06, 0x4ED8AA4A);
116
801k
      SHA2_32_F(C, D, E, F, G, H, A, B, W06, W04, W15, W07, 0x5B9CCA4F);
117
801k
      SHA2_32_F(B, C, D, E, F, G, H, A, W07, W05, W00, W08, 0x682E6FF3);
118
801k
      SHA2_32_F(A, B, C, D, E, F, G, H, W08, W06, W01, W09, 0x748F82EE);
119
801k
      SHA2_32_F(H, A, B, C, D, E, F, G, W09, W07, W02, W10, 0x78A5636F);
120
801k
      SHA2_32_F(G, H, A, B, C, D, E, F, W10, W08, W03, W11, 0x84C87814);
121
801k
      SHA2_32_F(F, G, H, A, B, C, D, E, W11, W09, W04, W12, 0x8CC70208);
122
801k
      SHA2_32_F(E, F, G, H, A, B, C, D, W12, W10, W05, W13, 0x90BEFFFA);
123
801k
      SHA2_32_F(D, E, F, G, H, A, B, C, W13, W11, W06, W14, 0xA4506CEB);
124
801k
      SHA2_32_F(C, D, E, F, G, H, A, B, W14, W12, W07, W15, 0xBEF9A3F7);
125
801k
      SHA2_32_F(B, C, D, E, F, G, H, A, W15, W13, W08, W00, 0xC67178F2);
126
801k
127
801k
      A = (digest[0] += A);
128
801k
      B = (digest[1] += B);
129
801k
      C = (digest[2] += C);
130
801k
      D = (digest[3] += D);
131
801k
      E = (digest[4] += E);
132
801k
      F = (digest[5] += F);
133
801k
      G = (digest[6] += G);
134
801k
      H = (digest[7] += H);
135
801k
136
801k
      input += 64;
137
801k
      }
138
629k
   }
139
140
}