Coverage Report

Created: 2021-11-25 09:31

/src/botan/src/lib/block/shacal2/shacal2_avx2/shacal2_avx2.cpp
Line
Count
Source (jump to first uncovered line)
1
/*
2
* (C) 2018 Jack Lloyd
3
*
4
* Botan is released under the Simplified BSD License (see license.txt)
5
*/
6
7
#include <botan/internal/shacal2.h>
8
#include <botan/internal/simd_avx2.h>
9
10
namespace Botan {
11
12
namespace {
13
14
void BOTAN_FORCE_INLINE BOTAN_FUNC_ISA("avx2")
15
   SHACAL2_Fwd(const SIMD_8x32& A, const SIMD_8x32& B, const SIMD_8x32& C, SIMD_8x32& D,
16
               const SIMD_8x32& E, const SIMD_8x32& F, const SIMD_8x32& G, SIMD_8x32& H,
17
               uint32_t RK)
18
0
   {
19
0
   H += E.sigma1() + SIMD_8x32::choose(E, F, G) + SIMD_8x32::splat(RK);
20
0
   D += H;
21
0
   H += A.sigma0() + SIMD_8x32::majority(A, B, C);
22
0
   }
23
24
void BOTAN_FORCE_INLINE BOTAN_FUNC_ISA("avx2")
25
   SHACAL2_Rev(const SIMD_8x32& A, const SIMD_8x32& B, const SIMD_8x32& C, SIMD_8x32& D,
26
               const SIMD_8x32& E, const SIMD_8x32& F, const SIMD_8x32& G, SIMD_8x32& H,
27
               uint32_t RK)
28
0
   {
29
0
   H -= A.sigma0() + SIMD_8x32::majority(A, B, C);
30
0
   D -= H;
31
0
   H -= E.sigma1() + SIMD_8x32::choose(E, F, G) + SIMD_8x32::splat(RK);
32
0
   }
33
34
}
35
36
void BOTAN_FUNC_ISA("avx2") SHACAL2::avx2_encrypt_8(const uint8_t in[], uint8_t out[]) const
37
0
   {
38
0
   SIMD_8x32::reset_registers();
39
40
0
   SIMD_8x32 A = SIMD_8x32::load_be(in);
41
0
   SIMD_8x32 B = SIMD_8x32::load_be(in+32);
42
0
   SIMD_8x32 C = SIMD_8x32::load_be(in+64);
43
0
   SIMD_8x32 D = SIMD_8x32::load_be(in+96);
44
45
0
   SIMD_8x32 E = SIMD_8x32::load_be(in+128);
46
0
   SIMD_8x32 F = SIMD_8x32::load_be(in+160);
47
0
   SIMD_8x32 G = SIMD_8x32::load_be(in+192);
48
0
   SIMD_8x32 H = SIMD_8x32::load_be(in+224);
49
50
0
   SIMD_8x32::transpose(A, B, C, D, E, F, G, H);
51
52
0
   for(size_t r = 0; r != 64; r += 8)
53
0
      {
54
0
      SHACAL2_Fwd(A, B, C, D, E, F, G, H, m_RK[r+0]);
55
0
      SHACAL2_Fwd(H, A, B, C, D, E, F, G, m_RK[r+1]);
56
0
      SHACAL2_Fwd(G, H, A, B, C, D, E, F, m_RK[r+2]);
57
0
      SHACAL2_Fwd(F, G, H, A, B, C, D, E, m_RK[r+3]);
58
0
      SHACAL2_Fwd(E, F, G, H, A, B, C, D, m_RK[r+4]);
59
0
      SHACAL2_Fwd(D, E, F, G, H, A, B, C, m_RK[r+5]);
60
0
      SHACAL2_Fwd(C, D, E, F, G, H, A, B, m_RK[r+6]);
61
0
      SHACAL2_Fwd(B, C, D, E, F, G, H, A, m_RK[r+7]);
62
0
      }
63
64
0
   SIMD_8x32::transpose(A, B, C, D, E, F, G, H);
65
66
0
   A.store_be(out);
67
0
   B.store_be(out+32);
68
0
   C.store_be(out+64);
69
0
   D.store_be(out+96);
70
71
0
   E.store_be(out+128);
72
0
   F.store_be(out+160);
73
0
   G.store_be(out+192);
74
0
   H.store_be(out+224);
75
76
0
   SIMD_8x32::zero_registers();
77
0
   }
78
79
BOTAN_FUNC_ISA("avx2") void SHACAL2::avx2_decrypt_8(const uint8_t in[], uint8_t out[]) const
80
0
   {
81
0
   SIMD_8x32::reset_registers();
82
83
0
   SIMD_8x32 A = SIMD_8x32::load_be(in);
84
0
   SIMD_8x32 B = SIMD_8x32::load_be(in+32);
85
0
   SIMD_8x32 C = SIMD_8x32::load_be(in+64);
86
0
   SIMD_8x32 D = SIMD_8x32::load_be(in+96);
87
88
0
   SIMD_8x32 E = SIMD_8x32::load_be(in+128);
89
0
   SIMD_8x32 F = SIMD_8x32::load_be(in+160);
90
0
   SIMD_8x32 G = SIMD_8x32::load_be(in+192);
91
0
   SIMD_8x32 H = SIMD_8x32::load_be(in+224);
92
93
0
   SIMD_8x32::transpose(A, B, C, D, E, F, G, H);
94
95
0
   for(size_t r = 0; r != 64; r += 8)
96
0
      {
97
0
      SHACAL2_Rev(B, C, D, E, F, G, H, A, m_RK[63-r]);
98
0
      SHACAL2_Rev(C, D, E, F, G, H, A, B, m_RK[62-r]);
99
0
      SHACAL2_Rev(D, E, F, G, H, A, B, C, m_RK[61-r]);
100
0
      SHACAL2_Rev(E, F, G, H, A, B, C, D, m_RK[60-r]);
101
0
      SHACAL2_Rev(F, G, H, A, B, C, D, E, m_RK[59-r]);
102
0
      SHACAL2_Rev(G, H, A, B, C, D, E, F, m_RK[58-r]);
103
0
      SHACAL2_Rev(H, A, B, C, D, E, F, G, m_RK[57-r]);
104
0
      SHACAL2_Rev(A, B, C, D, E, F, G, H, m_RK[56-r]);
105
0
      }
106
107
0
   SIMD_8x32::transpose(A, B, C, D, E, F, G, H);
108
109
0
   A.store_be(out);
110
0
   B.store_be(out+32);
111
0
   C.store_be(out+64);
112
0
   D.store_be(out+96);
113
114
0
   E.store_be(out+128);
115
0
   F.store_be(out+160);
116
0
   G.store_be(out+192);
117
0
   H.store_be(out+224);
118
119
0
   SIMD_8x32::zero_registers();
120
0
   }
121
122
}