Coverage Report

Created: 2022-01-14 08:07

/src/botan/src/lib/hash/sha2_32/sha2_32_bmi2/sha2_32_bmi2.cpp
Line
Count
Source
1
/*
2
* (C) 2018 Jack Lloyd
3
*
4
* Botan is released under the Simplified BSD License (see license.txt)
5
*/
6
7
#include <botan/internal/sha2_32.h>
8
#include <botan/internal/loadstor.h>
9
#include <botan/internal/rotate.h>
10
#include <botan/internal/bit_ops.h>
11
12
namespace Botan {
13
14
/*
15
Your eyes do not decieve you; this is currently just a copy of the
16
baseline SHA-256 implementation. Because we compile it with BMI2
17
flags, GCC and Clang use the BMI2 instructions without further help.
18
19
Likely instruction scheduling could be improved by using inline asm.
20
*/
21
22
60.6M
#define SHA2_32_F(A, B, C, D, E, F, G, H, M1, M2, M3, M4, magic) do {   \
23
60.6M
   uint32_t A_rho = rotr<2>(A) ^ rotr<13>(A) ^ rotr<22>(A);             \
24
60.6M
   uint32_t E_rho = rotr<6>(E) ^ rotr<11>(E) ^ rotr<25>(E);             \
25
60.6M
   uint32_t M2_sigma = rotr<17>(M2) ^ rotr<19>(M2) ^ (M2 >> 10);        \
26
60.6M
   uint32_t M4_sigma = rotr<7>(M4) ^ rotr<18>(M4) ^ (M4 >> 3);          \
27
60.6M
   H += magic + E_rho + choose(E, F, G) + M1;                           \
28
60.6M
   D += H;                                                              \
29
60.6M
   H += A_rho + majority(A, B, C);                                      \
30
60.6M
   M1 += M2_sigma + M3 + M4_sigma;                                      \
31
60.6M
   } while(0);
32
33
void SHA_256::compress_digest_x86_bmi2(secure_vector<uint32_t>& digest,
34
                                       const uint8_t input[],
35
                                       size_t blocks)
36
755k
   {
37
755k
   uint32_t A = digest[0], B = digest[1], C = digest[2],
38
755k
            D = digest[3], E = digest[4], F = digest[5],
39
755k
            G = digest[6], H = digest[7];
40
41
1.70M
   for(size_t i = 0; i != blocks; ++i)
42
947k
      {
43
947k
      uint32_t W00 = load_be<uint32_t>(input,  0);
44
947k
      uint32_t W01 = load_be<uint32_t>(input,  1);
45
947k
      uint32_t W02 = load_be<uint32_t>(input,  2);
46
947k
      uint32_t W03 = load_be<uint32_t>(input,  3);
47
947k
      uint32_t W04 = load_be<uint32_t>(input,  4);
48
947k
      uint32_t W05 = load_be<uint32_t>(input,  5);
49
947k
      uint32_t W06 = load_be<uint32_t>(input,  6);
50
947k
      uint32_t W07 = load_be<uint32_t>(input,  7);
51
947k
      uint32_t W08 = load_be<uint32_t>(input,  8);
52
947k
      uint32_t W09 = load_be<uint32_t>(input,  9);
53
947k
      uint32_t W10 = load_be<uint32_t>(input, 10);
54
947k
      uint32_t W11 = load_be<uint32_t>(input, 11);
55
947k
      uint32_t W12 = load_be<uint32_t>(input, 12);
56
947k
      uint32_t W13 = load_be<uint32_t>(input, 13);
57
947k
      uint32_t W14 = load_be<uint32_t>(input, 14);
58
947k
      uint32_t W15 = load_be<uint32_t>(input, 15);
59
60
947k
      SHA2_32_F(A, B, C, D, E, F, G, H, W00, W14, W09, W01, 0x428A2F98);
61
947k
      SHA2_32_F(H, A, B, C, D, E, F, G, W01, W15, W10, W02, 0x71374491);
62
947k
      SHA2_32_F(G, H, A, B, C, D, E, F, W02, W00, W11, W03, 0xB5C0FBCF);
63
947k
      SHA2_32_F(F, G, H, A, B, C, D, E, W03, W01, W12, W04, 0xE9B5DBA5);
64
947k
      SHA2_32_F(E, F, G, H, A, B, C, D, W04, W02, W13, W05, 0x3956C25B);
65
947k
      SHA2_32_F(D, E, F, G, H, A, B, C, W05, W03, W14, W06, 0x59F111F1);
66
947k
      SHA2_32_F(C, D, E, F, G, H, A, B, W06, W04, W15, W07, 0x923F82A4);
67
947k
      SHA2_32_F(B, C, D, E, F, G, H, A, W07, W05, W00, W08, 0xAB1C5ED5);
68
947k
      SHA2_32_F(A, B, C, D, E, F, G, H, W08, W06, W01, W09, 0xD807AA98);
69
947k
      SHA2_32_F(H, A, B, C, D, E, F, G, W09, W07, W02, W10, 0x12835B01);
70
947k
      SHA2_32_F(G, H, A, B, C, D, E, F, W10, W08, W03, W11, 0x243185BE);
71
947k
      SHA2_32_F(F, G, H, A, B, C, D, E, W11, W09, W04, W12, 0x550C7DC3);
72
947k
      SHA2_32_F(E, F, G, H, A, B, C, D, W12, W10, W05, W13, 0x72BE5D74);
73
947k
      SHA2_32_F(D, E, F, G, H, A, B, C, W13, W11, W06, W14, 0x80DEB1FE);
74
947k
      SHA2_32_F(C, D, E, F, G, H, A, B, W14, W12, W07, W15, 0x9BDC06A7);
75
947k
      SHA2_32_F(B, C, D, E, F, G, H, A, W15, W13, W08, W00, 0xC19BF174);
76
77
947k
      SHA2_32_F(A, B, C, D, E, F, G, H, W00, W14, W09, W01, 0xE49B69C1);
78
947k
      SHA2_32_F(H, A, B, C, D, E, F, G, W01, W15, W10, W02, 0xEFBE4786);
79
947k
      SHA2_32_F(G, H, A, B, C, D, E, F, W02, W00, W11, W03, 0x0FC19DC6);
80
947k
      SHA2_32_F(F, G, H, A, B, C, D, E, W03, W01, W12, W04, 0x240CA1CC);
81
947k
      SHA2_32_F(E, F, G, H, A, B, C, D, W04, W02, W13, W05, 0x2DE92C6F);
82
947k
      SHA2_32_F(D, E, F, G, H, A, B, C, W05, W03, W14, W06, 0x4A7484AA);
83
947k
      SHA2_32_F(C, D, E, F, G, H, A, B, W06, W04, W15, W07, 0x5CB0A9DC);
84
947k
      SHA2_32_F(B, C, D, E, F, G, H, A, W07, W05, W00, W08, 0x76F988DA);
85
947k
      SHA2_32_F(A, B, C, D, E, F, G, H, W08, W06, W01, W09, 0x983E5152);
86
947k
      SHA2_32_F(H, A, B, C, D, E, F, G, W09, W07, W02, W10, 0xA831C66D);
87
947k
      SHA2_32_F(G, H, A, B, C, D, E, F, W10, W08, W03, W11, 0xB00327C8);
88
947k
      SHA2_32_F(F, G, H, A, B, C, D, E, W11, W09, W04, W12, 0xBF597FC7);
89
947k
      SHA2_32_F(E, F, G, H, A, B, C, D, W12, W10, W05, W13, 0xC6E00BF3);
90
947k
      SHA2_32_F(D, E, F, G, H, A, B, C, W13, W11, W06, W14, 0xD5A79147);
91
947k
      SHA2_32_F(C, D, E, F, G, H, A, B, W14, W12, W07, W15, 0x06CA6351);
92
947k
      SHA2_32_F(B, C, D, E, F, G, H, A, W15, W13, W08, W00, 0x14292967);
93
94
947k
      SHA2_32_F(A, B, C, D, E, F, G, H, W00, W14, W09, W01, 0x27B70A85);
95
947k
      SHA2_32_F(H, A, B, C, D, E, F, G, W01, W15, W10, W02, 0x2E1B2138);
96
947k
      SHA2_32_F(G, H, A, B, C, D, E, F, W02, W00, W11, W03, 0x4D2C6DFC);
97
947k
      SHA2_32_F(F, G, H, A, B, C, D, E, W03, W01, W12, W04, 0x53380D13);
98
947k
      SHA2_32_F(E, F, G, H, A, B, C, D, W04, W02, W13, W05, 0x650A7354);
99
947k
      SHA2_32_F(D, E, F, G, H, A, B, C, W05, W03, W14, W06, 0x766A0ABB);
100
947k
      SHA2_32_F(C, D, E, F, G, H, A, B, W06, W04, W15, W07, 0x81C2C92E);
101
947k
      SHA2_32_F(B, C, D, E, F, G, H, A, W07, W05, W00, W08, 0x92722C85);
102
947k
      SHA2_32_F(A, B, C, D, E, F, G, H, W08, W06, W01, W09, 0xA2BFE8A1);
103
947k
      SHA2_32_F(H, A, B, C, D, E, F, G, W09, W07, W02, W10, 0xA81A664B);
104
947k
      SHA2_32_F(G, H, A, B, C, D, E, F, W10, W08, W03, W11, 0xC24B8B70);
105
947k
      SHA2_32_F(F, G, H, A, B, C, D, E, W11, W09, W04, W12, 0xC76C51A3);
106
947k
      SHA2_32_F(E, F, G, H, A, B, C, D, W12, W10, W05, W13, 0xD192E819);
107
947k
      SHA2_32_F(D, E, F, G, H, A, B, C, W13, W11, W06, W14, 0xD6990624);
108
947k
      SHA2_32_F(C, D, E, F, G, H, A, B, W14, W12, W07, W15, 0xF40E3585);
109
947k
      SHA2_32_F(B, C, D, E, F, G, H, A, W15, W13, W08, W00, 0x106AA070);
110
111
947k
      SHA2_32_F(A, B, C, D, E, F, G, H, W00, W14, W09, W01, 0x19A4C116);
112
947k
      SHA2_32_F(H, A, B, C, D, E, F, G, W01, W15, W10, W02, 0x1E376C08);
113
947k
      SHA2_32_F(G, H, A, B, C, D, E, F, W02, W00, W11, W03, 0x2748774C);
114
947k
      SHA2_32_F(F, G, H, A, B, C, D, E, W03, W01, W12, W04, 0x34B0BCB5);
115
947k
      SHA2_32_F(E, F, G, H, A, B, C, D, W04, W02, W13, W05, 0x391C0CB3);
116
947k
      SHA2_32_F(D, E, F, G, H, A, B, C, W05, W03, W14, W06, 0x4ED8AA4A);
117
947k
      SHA2_32_F(C, D, E, F, G, H, A, B, W06, W04, W15, W07, 0x5B9CCA4F);
118
947k
      SHA2_32_F(B, C, D, E, F, G, H, A, W07, W05, W00, W08, 0x682E6FF3);
119
947k
      SHA2_32_F(A, B, C, D, E, F, G, H, W08, W06, W01, W09, 0x748F82EE);
120
947k
      SHA2_32_F(H, A, B, C, D, E, F, G, W09, W07, W02, W10, 0x78A5636F);
121
947k
      SHA2_32_F(G, H, A, B, C, D, E, F, W10, W08, W03, W11, 0x84C87814);
122
947k
      SHA2_32_F(F, G, H, A, B, C, D, E, W11, W09, W04, W12, 0x8CC70208);
123
947k
      SHA2_32_F(E, F, G, H, A, B, C, D, W12, W10, W05, W13, 0x90BEFFFA);
124
947k
      SHA2_32_F(D, E, F, G, H, A, B, C, W13, W11, W06, W14, 0xA4506CEB);
125
947k
      SHA2_32_F(C, D, E, F, G, H, A, B, W14, W12, W07, W15, 0xBEF9A3F7);
126
947k
      SHA2_32_F(B, C, D, E, F, G, H, A, W15, W13, W08, W00, 0xC67178F2);
127
128
947k
      A = (digest[0] += A);
129
947k
      B = (digest[1] += B);
130
947k
      C = (digest[2] += C);
131
947k
      D = (digest[3] += D);
132
947k
      E = (digest[4] += E);
133
947k
      F = (digest[5] += F);
134
947k
      G = (digest[6] += G);
135
947k
      H = (digest[7] += H);
136
137
947k
      input += 64;
138
947k
      }
139
755k
   }
140
141
}