Coverage Report

Created: 2021-02-21 07:20

/src/botan/src/lib/hash/sha2_32/sha2_32_bmi2/sha2_32_bmi2.cpp
Line
Count
Source
1
/*
2
* (C) 2018 Jack Lloyd
3
*
4
* Botan is released under the Simplified BSD License (see license.txt)
5
*/
6
7
#include <botan/internal/sha2_32.h>
8
#include <botan/internal/loadstor.h>
9
#include <botan/internal/rotate.h>
10
#include <botan/internal/bit_ops.h>
11
12
namespace Botan {
13
14
/*
15
Your eyes do not decieve you; this is currently just a copy of the
16
baseline SHA-256 implementation. Because we compile it with BMI2
17
flags, GCC and Clang use the BMI2 instructions without further help.
18
19
Likely instruction scheduling could be improved by using inline asm.
20
*/
21
22
48.7M
#define SHA2_32_F(A, B, C, D, E, F, G, H, M1, M2, M3, M4, magic) do {   \
23
48.7M
   uint32_t A_rho = rotr<2>(A) ^ rotr<13>(A) ^ rotr<22>(A);             \
24
48.7M
   uint32_t E_rho = rotr<6>(E) ^ rotr<11>(E) ^ rotr<25>(E);             \
25
48.7M
   uint32_t M2_sigma = rotr<17>(M2) ^ rotr<19>(M2) ^ (M2 >> 10);        \
26
48.7M
   uint32_t M4_sigma = rotr<7>(M4) ^ rotr<18>(M4) ^ (M4 >> 3);          \
27
48.7M
   H += magic + E_rho + choose(E, F, G) + M1;                           \
28
48.7M
   D += H;                                                              \
29
48.7M
   H += A_rho + majority(A, B, C);                                      \
30
48.7M
   M1 += M2_sigma + M3 + M4_sigma;                                      \
31
48.7M
   } while(0);
32
33
void SHA_256::compress_digest_x86_bmi2(secure_vector<uint32_t>& digest,
34
                                       const uint8_t input[],
35
                                       size_t blocks)
36
590k
   {
37
590k
   uint32_t A = digest[0], B = digest[1], C = digest[2],
38
590k
            D = digest[3], E = digest[4], F = digest[5],
39
590k
            G = digest[6], H = digest[7];
40
41
1.35M
   for(size_t i = 0; i != blocks; ++i)
42
761k
      {
43
761k
      uint32_t W00 = load_be<uint32_t>(input,  0);
44
761k
      uint32_t W01 = load_be<uint32_t>(input,  1);
45
761k
      uint32_t W02 = load_be<uint32_t>(input,  2);
46
761k
      uint32_t W03 = load_be<uint32_t>(input,  3);
47
761k
      uint32_t W04 = load_be<uint32_t>(input,  4);
48
761k
      uint32_t W05 = load_be<uint32_t>(input,  5);
49
761k
      uint32_t W06 = load_be<uint32_t>(input,  6);
50
761k
      uint32_t W07 = load_be<uint32_t>(input,  7);
51
761k
      uint32_t W08 = load_be<uint32_t>(input,  8);
52
761k
      uint32_t W09 = load_be<uint32_t>(input,  9);
53
761k
      uint32_t W10 = load_be<uint32_t>(input, 10);
54
761k
      uint32_t W11 = load_be<uint32_t>(input, 11);
55
761k
      uint32_t W12 = load_be<uint32_t>(input, 12);
56
761k
      uint32_t W13 = load_be<uint32_t>(input, 13);
57
761k
      uint32_t W14 = load_be<uint32_t>(input, 14);
58
761k
      uint32_t W15 = load_be<uint32_t>(input, 15);
59
60
761k
      SHA2_32_F(A, B, C, D, E, F, G, H, W00, W14, W09, W01, 0x428A2F98);
61
761k
      SHA2_32_F(H, A, B, C, D, E, F, G, W01, W15, W10, W02, 0x71374491);
62
761k
      SHA2_32_F(G, H, A, B, C, D, E, F, W02, W00, W11, W03, 0xB5C0FBCF);
63
761k
      SHA2_32_F(F, G, H, A, B, C, D, E, W03, W01, W12, W04, 0xE9B5DBA5);
64
761k
      SHA2_32_F(E, F, G, H, A, B, C, D, W04, W02, W13, W05, 0x3956C25B);
65
761k
      SHA2_32_F(D, E, F, G, H, A, B, C, W05, W03, W14, W06, 0x59F111F1);
66
761k
      SHA2_32_F(C, D, E, F, G, H, A, B, W06, W04, W15, W07, 0x923F82A4);
67
761k
      SHA2_32_F(B, C, D, E, F, G, H, A, W07, W05, W00, W08, 0xAB1C5ED5);
68
761k
      SHA2_32_F(A, B, C, D, E, F, G, H, W08, W06, W01, W09, 0xD807AA98);
69
761k
      SHA2_32_F(H, A, B, C, D, E, F, G, W09, W07, W02, W10, 0x12835B01);
70
761k
      SHA2_32_F(G, H, A, B, C, D, E, F, W10, W08, W03, W11, 0x243185BE);
71
761k
      SHA2_32_F(F, G, H, A, B, C, D, E, W11, W09, W04, W12, 0x550C7DC3);
72
761k
      SHA2_32_F(E, F, G, H, A, B, C, D, W12, W10, W05, W13, 0x72BE5D74);
73
761k
      SHA2_32_F(D, E, F, G, H, A, B, C, W13, W11, W06, W14, 0x80DEB1FE);
74
761k
      SHA2_32_F(C, D, E, F, G, H, A, B, W14, W12, W07, W15, 0x9BDC06A7);
75
761k
      SHA2_32_F(B, C, D, E, F, G, H, A, W15, W13, W08, W00, 0xC19BF174);
76
77
761k
      SHA2_32_F(A, B, C, D, E, F, G, H, W00, W14, W09, W01, 0xE49B69C1);
78
761k
      SHA2_32_F(H, A, B, C, D, E, F, G, W01, W15, W10, W02, 0xEFBE4786);
79
761k
      SHA2_32_F(G, H, A, B, C, D, E, F, W02, W00, W11, W03, 0x0FC19DC6);
80
761k
      SHA2_32_F(F, G, H, A, B, C, D, E, W03, W01, W12, W04, 0x240CA1CC);
81
761k
      SHA2_32_F(E, F, G, H, A, B, C, D, W04, W02, W13, W05, 0x2DE92C6F);
82
761k
      SHA2_32_F(D, E, F, G, H, A, B, C, W05, W03, W14, W06, 0x4A7484AA);
83
761k
      SHA2_32_F(C, D, E, F, G, H, A, B, W06, W04, W15, W07, 0x5CB0A9DC);
84
761k
      SHA2_32_F(B, C, D, E, F, G, H, A, W07, W05, W00, W08, 0x76F988DA);
85
761k
      SHA2_32_F(A, B, C, D, E, F, G, H, W08, W06, W01, W09, 0x983E5152);
86
761k
      SHA2_32_F(H, A, B, C, D, E, F, G, W09, W07, W02, W10, 0xA831C66D);
87
761k
      SHA2_32_F(G, H, A, B, C, D, E, F, W10, W08, W03, W11, 0xB00327C8);
88
761k
      SHA2_32_F(F, G, H, A, B, C, D, E, W11, W09, W04, W12, 0xBF597FC7);
89
761k
      SHA2_32_F(E, F, G, H, A, B, C, D, W12, W10, W05, W13, 0xC6E00BF3);
90
761k
      SHA2_32_F(D, E, F, G, H, A, B, C, W13, W11, W06, W14, 0xD5A79147);
91
761k
      SHA2_32_F(C, D, E, F, G, H, A, B, W14, W12, W07, W15, 0x06CA6351);
92
761k
      SHA2_32_F(B, C, D, E, F, G, H, A, W15, W13, W08, W00, 0x14292967);
93
94
761k
      SHA2_32_F(A, B, C, D, E, F, G, H, W00, W14, W09, W01, 0x27B70A85);
95
761k
      SHA2_32_F(H, A, B, C, D, E, F, G, W01, W15, W10, W02, 0x2E1B2138);
96
761k
      SHA2_32_F(G, H, A, B, C, D, E, F, W02, W00, W11, W03, 0x4D2C6DFC);
97
761k
      SHA2_32_F(F, G, H, A, B, C, D, E, W03, W01, W12, W04, 0x53380D13);
98
761k
      SHA2_32_F(E, F, G, H, A, B, C, D, W04, W02, W13, W05, 0x650A7354);
99
761k
      SHA2_32_F(D, E, F, G, H, A, B, C, W05, W03, W14, W06, 0x766A0ABB);
100
761k
      SHA2_32_F(C, D, E, F, G, H, A, B, W06, W04, W15, W07, 0x81C2C92E);
101
761k
      SHA2_32_F(B, C, D, E, F, G, H, A, W07, W05, W00, W08, 0x92722C85);
102
761k
      SHA2_32_F(A, B, C, D, E, F, G, H, W08, W06, W01, W09, 0xA2BFE8A1);
103
761k
      SHA2_32_F(H, A, B, C, D, E, F, G, W09, W07, W02, W10, 0xA81A664B);
104
761k
      SHA2_32_F(G, H, A, B, C, D, E, F, W10, W08, W03, W11, 0xC24B8B70);
105
761k
      SHA2_32_F(F, G, H, A, B, C, D, E, W11, W09, W04, W12, 0xC76C51A3);
106
761k
      SHA2_32_F(E, F, G, H, A, B, C, D, W12, W10, W05, W13, 0xD192E819);
107
761k
      SHA2_32_F(D, E, F, G, H, A, B, C, W13, W11, W06, W14, 0xD6990624);
108
761k
      SHA2_32_F(C, D, E, F, G, H, A, B, W14, W12, W07, W15, 0xF40E3585);
109
761k
      SHA2_32_F(B, C, D, E, F, G, H, A, W15, W13, W08, W00, 0x106AA070);
110
111
761k
      SHA2_32_F(A, B, C, D, E, F, G, H, W00, W14, W09, W01, 0x19A4C116);
112
761k
      SHA2_32_F(H, A, B, C, D, E, F, G, W01, W15, W10, W02, 0x1E376C08);
113
761k
      SHA2_32_F(G, H, A, B, C, D, E, F, W02, W00, W11, W03, 0x2748774C);
114
761k
      SHA2_32_F(F, G, H, A, B, C, D, E, W03, W01, W12, W04, 0x34B0BCB5);
115
761k
      SHA2_32_F(E, F, G, H, A, B, C, D, W04, W02, W13, W05, 0x391C0CB3);
116
761k
      SHA2_32_F(D, E, F, G, H, A, B, C, W05, W03, W14, W06, 0x4ED8AA4A);
117
761k
      SHA2_32_F(C, D, E, F, G, H, A, B, W06, W04, W15, W07, 0x5B9CCA4F);
118
761k
      SHA2_32_F(B, C, D, E, F, G, H, A, W07, W05, W00, W08, 0x682E6FF3);
119
761k
      SHA2_32_F(A, B, C, D, E, F, G, H, W08, W06, W01, W09, 0x748F82EE);
120
761k
      SHA2_32_F(H, A, B, C, D, E, F, G, W09, W07, W02, W10, 0x78A5636F);
121
761k
      SHA2_32_F(G, H, A, B, C, D, E, F, W10, W08, W03, W11, 0x84C87814);
122
761k
      SHA2_32_F(F, G, H, A, B, C, D, E, W11, W09, W04, W12, 0x8CC70208);
123
761k
      SHA2_32_F(E, F, G, H, A, B, C, D, W12, W10, W05, W13, 0x90BEFFFA);
124
761k
      SHA2_32_F(D, E, F, G, H, A, B, C, W13, W11, W06, W14, 0xA4506CEB);
125
761k
      SHA2_32_F(C, D, E, F, G, H, A, B, W14, W12, W07, W15, 0xBEF9A3F7);
126
761k
      SHA2_32_F(B, C, D, E, F, G, H, A, W15, W13, W08, W00, 0xC67178F2);
127
128
761k
      A = (digest[0] += A);
129
761k
      B = (digest[1] += B);
130
761k
      C = (digest[2] += C);
131
761k
      D = (digest[3] += D);
132
761k
      E = (digest[4] += E);
133
761k
      F = (digest[5] += F);
134
761k
      G = (digest[6] += G);
135
761k
      H = (digest[7] += H);
136
137
761k
      input += 64;
138
761k
      }
139
590k
   }
140
141
}