/src/botan/build/include/botan/internal/simd_avx512.h
Line | Count | Source (jump to first uncovered line) |
1 | | /* |
2 | | * (C) 2023 Jack Lloyd |
3 | | * |
4 | | * Botan is released under the Simplified BSD License (see license.txt) |
5 | | */ |
6 | | |
7 | | #ifndef BOTAN_SIMD_AVX512_H_ |
8 | | #define BOTAN_SIMD_AVX512_H_ |
9 | | |
10 | | #include <botan/types.h> |
11 | | #include <immintrin.h> |
12 | | |
13 | | namespace Botan { |
14 | | |
15 | | #define BOTAN_AVX512_FN BOTAN_FUNC_ISA("avx512f,avx512dq,avx512bw") |
16 | | |
17 | | class SIMD_16x32 final { |
18 | | public: |
19 | | SIMD_16x32& operator=(const SIMD_16x32& other) = default; |
20 | | SIMD_16x32(const SIMD_16x32& other) = default; |
21 | | |
22 | | SIMD_16x32& operator=(SIMD_16x32&& other) = default; |
23 | | SIMD_16x32(SIMD_16x32&& other) = default; |
24 | | |
25 | | BOTAN_AVX512_FN |
26 | 0 | BOTAN_FORCE_INLINE SIMD_16x32() { m_avx512 = _mm512_setzero_si512(); } |
27 | | |
28 | | BOTAN_AVX512_FN |
29 | 0 | explicit SIMD_16x32(const uint32_t B[16]) { m_avx512 = _mm512_loadu_si512(reinterpret_cast<const __m512i*>(B)); } |
30 | | |
31 | | BOTAN_AVX512_FN |
32 | | explicit SIMD_16x32(uint32_t B0, |
33 | | uint32_t B1, |
34 | | uint32_t B2, |
35 | | uint32_t B3, |
36 | | uint32_t B4, |
37 | | uint32_t B5, |
38 | | uint32_t B6, |
39 | | uint32_t B7, |
40 | | uint32_t B8, |
41 | | uint32_t B9, |
42 | | uint32_t BA, |
43 | | uint32_t BB, |
44 | | uint32_t BC, |
45 | | uint32_t BD, |
46 | | uint32_t BE, |
47 | 0 | uint32_t BF) { |
48 | 0 | m_avx512 = _mm512_set_epi32(BF, BE, BD, BC, BB, BA, B9, B8, B7, B6, B5, B4, B3, B2, B1, B0); |
49 | 0 | } |
50 | | |
51 | | BOTAN_AVX512_FN |
52 | 0 | static SIMD_16x32 splat(uint32_t B) { return SIMD_16x32(_mm512_set1_epi32(B)); } |
53 | | |
54 | | BOTAN_AVX512_FN |
55 | 0 | static SIMD_16x32 load_le(const uint8_t* in) { |
56 | 0 | return SIMD_16x32(_mm512_loadu_si512(reinterpret_cast<const __m512i*>(in))); |
57 | 0 | } |
58 | | |
59 | | BOTAN_AVX512_FN |
60 | 0 | static SIMD_16x32 load_be(const uint8_t* in) { return load_le(in).bswap(); } |
61 | | |
62 | | BOTAN_AVX512_FN |
63 | 0 | void store_le(uint8_t out[]) const { _mm512_storeu_si512(reinterpret_cast<__m512i*>(out), m_avx512); } |
64 | | |
65 | | BOTAN_AVX512_FN |
66 | 0 | void store_be(uint8_t out[]) const { bswap().store_le(out); } |
67 | | |
68 | | template <size_t ROT> |
69 | | BOTAN_AVX512_FN SIMD_16x32 rotl() const |
70 | | requires(ROT > 0 && ROT < 32) |
71 | 0 | { |
72 | 0 | return SIMD_16x32(_mm512_rol_epi32(m_avx512, ROT)); |
73 | 0 | } Unexecuted instantiation: Botan::SIMD_16x32 Botan::SIMD_16x32::rotl<13ul>() const Unexecuted instantiation: Botan::SIMD_16x32 Botan::SIMD_16x32::rotl<3ul>() const Unexecuted instantiation: Botan::SIMD_16x32 Botan::SIMD_16x32::rotl<1ul>() const Unexecuted instantiation: Botan::SIMD_16x32 Botan::SIMD_16x32::rotl<7ul>() const Unexecuted instantiation: Botan::SIMD_16x32 Botan::SIMD_16x32::rotl<5ul>() const Unexecuted instantiation: Botan::SIMD_16x32 Botan::SIMD_16x32::rotl<22ul>() const Unexecuted instantiation: Botan::SIMD_16x32 Botan::SIMD_16x32::rotl<10ul>() const Unexecuted instantiation: Botan::SIMD_16x32 Botan::SIMD_16x32::rotl<27ul>() const Unexecuted instantiation: Botan::SIMD_16x32 Botan::SIMD_16x32::rotl<25ul>() const Unexecuted instantiation: Botan::SIMD_16x32 Botan::SIMD_16x32::rotl<31ul>() const Unexecuted instantiation: Botan::SIMD_16x32 Botan::SIMD_16x32::rotl<29ul>() const Unexecuted instantiation: Botan::SIMD_16x32 Botan::SIMD_16x32::rotl<19ul>() const Unexecuted instantiation: Botan::SIMD_16x32 Botan::SIMD_16x32::rotl<30ul>() const Unexecuted instantiation: Botan::SIMD_16x32 Botan::SIMD_16x32::rotl<26ul>() const Unexecuted instantiation: Botan::SIMD_16x32 Botan::SIMD_16x32::rotl<21ul>() const Unexecuted instantiation: Botan::SIMD_16x32 Botan::SIMD_16x32::rotl<16ul>() const Unexecuted instantiation: Botan::SIMD_16x32 Botan::SIMD_16x32::rotl<12ul>() const Unexecuted instantiation: Botan::SIMD_16x32 Botan::SIMD_16x32::rotl<8ul>() const |
74 | | |
75 | | template <size_t ROT> |
76 | 0 | BOTAN_AVX512_FN SIMD_16x32 rotr() const { |
77 | 0 | return this->rotl<32 - ROT>(); |
78 | 0 | } Unexecuted instantiation: Botan::SIMD_16x32 Botan::SIMD_16x32::rotr<22ul>() const Unexecuted instantiation: Botan::SIMD_16x32 Botan::SIMD_16x32::rotr<5ul>() const Unexecuted instantiation: Botan::SIMD_16x32 Botan::SIMD_16x32::rotr<7ul>() const Unexecuted instantiation: Botan::SIMD_16x32 Botan::SIMD_16x32::rotr<1ul>() const Unexecuted instantiation: Botan::SIMD_16x32 Botan::SIMD_16x32::rotr<3ul>() const Unexecuted instantiation: Botan::SIMD_16x32 Botan::SIMD_16x32::rotr<13ul>() const Unexecuted instantiation: Botan::SIMD_16x32 Botan::SIMD_16x32::rotr<2ul>() const Unexecuted instantiation: Botan::SIMD_16x32 Botan::SIMD_16x32::rotr<6ul>() const Unexecuted instantiation: Botan::SIMD_16x32 Botan::SIMD_16x32::rotr<11ul>() const Unexecuted instantiation: Botan::SIMD_16x32 Botan::SIMD_16x32::rotr<25ul>() const |
79 | | |
80 | 0 | SIMD_16x32 BOTAN_AVX512_FN sigma0() const { |
81 | 0 | const SIMD_16x32 rot1 = this->rotr<2>(); |
82 | 0 | const SIMD_16x32 rot2 = this->rotr<13>(); |
83 | 0 | const SIMD_16x32 rot3 = this->rotr<22>(); |
84 | 0 | return rot1 ^ rot2 ^ rot3; |
85 | 0 | } |
86 | | |
87 | 0 | SIMD_16x32 BOTAN_AVX512_FN sigma1() const { |
88 | 0 | const SIMD_16x32 rot1 = this->rotr<6>(); |
89 | 0 | const SIMD_16x32 rot2 = this->rotr<11>(); |
90 | 0 | const SIMD_16x32 rot3 = this->rotr<25>(); |
91 | 0 | return rot1 ^ rot2 ^ rot3; |
92 | 0 | } |
93 | | |
94 | | BOTAN_AVX512_FN |
95 | 0 | SIMD_16x32 operator+(const SIMD_16x32& other) const { |
96 | 0 | SIMD_16x32 retval(*this); |
97 | 0 | retval += other; |
98 | 0 | return retval; |
99 | 0 | } |
100 | | |
101 | | BOTAN_AVX512_FN |
102 | 0 | SIMD_16x32 operator-(const SIMD_16x32& other) const { |
103 | 0 | SIMD_16x32 retval(*this); |
104 | 0 | retval -= other; |
105 | 0 | return retval; |
106 | 0 | } |
107 | | |
108 | | BOTAN_AVX512_FN |
109 | 0 | SIMD_16x32 operator^(const SIMD_16x32& other) const { |
110 | 0 | SIMD_16x32 retval(*this); |
111 | 0 | retval ^= other; |
112 | 0 | return retval; |
113 | 0 | } |
114 | | |
115 | | BOTAN_AVX512_FN |
116 | 0 | SIMD_16x32 operator|(const SIMD_16x32& other) const { |
117 | 0 | SIMD_16x32 retval(*this); |
118 | 0 | retval |= other; |
119 | 0 | return retval; |
120 | 0 | } |
121 | | |
122 | | BOTAN_AVX512_FN |
123 | 0 | SIMD_16x32 operator&(const SIMD_16x32& other) const { |
124 | 0 | SIMD_16x32 retval(*this); |
125 | 0 | retval &= other; |
126 | 0 | return retval; |
127 | 0 | } |
128 | | |
129 | | BOTAN_AVX512_FN |
130 | 0 | void operator+=(const SIMD_16x32& other) { m_avx512 = _mm512_add_epi32(m_avx512, other.m_avx512); } |
131 | | |
132 | | BOTAN_AVX512_FN |
133 | 0 | void operator-=(const SIMD_16x32& other) { m_avx512 = _mm512_sub_epi32(m_avx512, other.m_avx512); } |
134 | | |
135 | | BOTAN_AVX512_FN |
136 | 0 | void operator^=(const SIMD_16x32& other) { m_avx512 = _mm512_xor_si512(m_avx512, other.m_avx512); } |
137 | | |
138 | | BOTAN_AVX512_FN |
139 | 0 | void operator^=(uint32_t other) { *this ^= SIMD_16x32::splat(other); } |
140 | | |
141 | | BOTAN_AVX512_FN |
142 | 0 | void operator|=(const SIMD_16x32& other) { m_avx512 = _mm512_or_si512(m_avx512, other.m_avx512); } |
143 | | |
144 | | BOTAN_AVX512_FN |
145 | 0 | void operator&=(const SIMD_16x32& other) { m_avx512 = _mm512_and_si512(m_avx512, other.m_avx512); } |
146 | | |
147 | | template <int SHIFT> |
148 | 0 | BOTAN_AVX512_FN SIMD_16x32 shl() const { |
149 | 0 | return SIMD_16x32(_mm512_slli_epi32(m_avx512, SHIFT)); |
150 | 0 | } Unexecuted instantiation: Botan::SIMD_16x32 Botan::SIMD_16x32::shl<3>() const Unexecuted instantiation: Botan::SIMD_16x32 Botan::SIMD_16x32::shl<7>() const |
151 | | |
152 | | template <int SHIFT> |
153 | | BOTAN_AVX512_FN SIMD_16x32 shr() const { |
154 | | return SIMD_16x32(_mm512_srli_epi32(m_avx512, SHIFT)); |
155 | | } |
156 | | |
157 | | BOTAN_AVX512_FN |
158 | 0 | SIMD_16x32 operator~() const { return SIMD_16x32(_mm512_xor_si512(m_avx512, _mm512_set1_epi32(0xFFFFFFFF))); } |
159 | | |
160 | | // (~reg) & other |
161 | | BOTAN_AVX512_FN |
162 | 0 | SIMD_16x32 andc(const SIMD_16x32& other) const { |
163 | 0 | return SIMD_16x32(_mm512_andnot_si512(m_avx512, other.m_avx512)); |
164 | 0 | } |
165 | | |
166 | | template <uint8_t TBL> |
167 | 0 | BOTAN_AVX512_FN static SIMD_16x32 ternary_fn(const SIMD_16x32& a, const SIMD_16x32& b, const SIMD_16x32& c) { |
168 | 0 | return _mm512_ternarylogic_epi32(a.raw(), b.raw(), c.raw(), TBL); |
169 | 0 | } Unexecuted instantiation: Botan::SIMD_16x32 Botan::SIMD_16x32::ternary_fn<(unsigned char)185>(Botan::SIMD_16x32 const&, Botan::SIMD_16x32 const&, Botan::SIMD_16x32 const&) Unexecuted instantiation: Botan::SIMD_16x32 Botan::SIMD_16x32::ternary_fn<(unsigned char)226>(Botan::SIMD_16x32 const&, Botan::SIMD_16x32 const&, Botan::SIMD_16x32 const&) Unexecuted instantiation: Botan::SIMD_16x32 Botan::SIMD_16x32::ternary_fn<(unsigned char)54>(Botan::SIMD_16x32 const&, Botan::SIMD_16x32 const&, Botan::SIMD_16x32 const&) Unexecuted instantiation: Botan::SIMD_16x32 Botan::SIMD_16x32::ternary_fn<(unsigned char)38>(Botan::SIMD_16x32 const&, Botan::SIMD_16x32 const&, Botan::SIMD_16x32 const&) Unexecuted instantiation: Botan::SIMD_16x32 Botan::SIMD_16x32::ternary_fn<(unsigned char)172>(Botan::SIMD_16x32 const&, Botan::SIMD_16x32 const&, Botan::SIMD_16x32 const&) Unexecuted instantiation: Botan::SIMD_16x32 Botan::SIMD_16x32::ternary_fn<(unsigned char)150>(Botan::SIMD_16x32 const&, Botan::SIMD_16x32 const&, Botan::SIMD_16x32 const&) Unexecuted instantiation: Botan::SIMD_16x32 Botan::SIMD_16x32::ternary_fn<(unsigned char)169>(Botan::SIMD_16x32 const&, Botan::SIMD_16x32 const&, Botan::SIMD_16x32 const&) Unexecuted instantiation: Botan::SIMD_16x32 Botan::SIMD_16x32::ternary_fn<(unsigned char)60>(Botan::SIMD_16x32 const&, Botan::SIMD_16x32 const&, Botan::SIMD_16x32 const&) Unexecuted instantiation: Botan::SIMD_16x32 Botan::SIMD_16x32::ternary_fn<(unsigned char)229>(Botan::SIMD_16x32 const&, Botan::SIMD_16x32 const&, Botan::SIMD_16x32 const&) Unexecuted instantiation: Botan::SIMD_16x32 Botan::SIMD_16x32::ternary_fn<(unsigned char)166>(Botan::SIMD_16x32 const&, Botan::SIMD_16x32 const&, Botan::SIMD_16x32 const&) Unexecuted instantiation: Botan::SIMD_16x32 Botan::SIMD_16x32::ternary_fn<(unsigned char)43>(Botan::SIMD_16x32 const&, Botan::SIMD_16x32 const&, Botan::SIMD_16x32 const&) Unexecuted instantiation: Botan::SIMD_16x32 Botan::SIMD_16x32::ternary_fn<(unsigned char)131>(Botan::SIMD_16x32 const&, Botan::SIMD_16x32 const&, Botan::SIMD_16x32 const&) Unexecuted instantiation: Botan::SIMD_16x32 Botan::SIMD_16x32::ternary_fn<(unsigned char)105>(Botan::SIMD_16x32 const&, Botan::SIMD_16x32 const&, Botan::SIMD_16x32 const&) Unexecuted instantiation: Botan::SIMD_16x32 Botan::SIMD_16x32::ternary_fn<(unsigned char)101>(Botan::SIMD_16x32 const&, Botan::SIMD_16x32 const&, Botan::SIMD_16x32 const&) Unexecuted instantiation: Botan::SIMD_16x32 Botan::SIMD_16x32::ternary_fn<(unsigned char)218>(Botan::SIMD_16x32 const&, Botan::SIMD_16x32 const&, Botan::SIMD_16x32 const&) Unexecuted instantiation: Botan::SIMD_16x32 Botan::SIMD_16x32::ternary_fn<(unsigned char)102>(Botan::SIMD_16x32 const&, Botan::SIMD_16x32 const&, Botan::SIMD_16x32 const&) Unexecuted instantiation: Botan::SIMD_16x32 Botan::SIMD_16x32::ternary_fn<(unsigned char)148>(Botan::SIMD_16x32 const&, Botan::SIMD_16x32 const&, Botan::SIMD_16x32 const&) Unexecuted instantiation: Botan::SIMD_16x32 Botan::SIMD_16x32::ternary_fn<(unsigned char)161>(Botan::SIMD_16x32 const&, Botan::SIMD_16x32 const&, Botan::SIMD_16x32 const&) Unexecuted instantiation: Botan::SIMD_16x32 Botan::SIMD_16x32::ternary_fn<(unsigned char)210>(Botan::SIMD_16x32 const&, Botan::SIMD_16x32 const&, Botan::SIMD_16x32 const&) Unexecuted instantiation: Botan::SIMD_16x32 Botan::SIMD_16x32::ternary_fn<(unsigned char)45>(Botan::SIMD_16x32 const&, Botan::SIMD_16x32 const&, Botan::SIMD_16x32 const&) Unexecuted instantiation: Botan::SIMD_16x32 Botan::SIMD_16x32::ternary_fn<(unsigned char)146>(Botan::SIMD_16x32 const&, Botan::SIMD_16x32 const&, Botan::SIMD_16x32 const&) Unexecuted instantiation: Botan::SIMD_16x32 Botan::SIMD_16x32::ternary_fn<(unsigned char)59>(Botan::SIMD_16x32 const&, Botan::SIMD_16x32 const&, Botan::SIMD_16x32 const&) Unexecuted instantiation: Botan::SIMD_16x32 Botan::SIMD_16x32::ternary_fn<(unsigned char)188>(Botan::SIMD_16x32 const&, Botan::SIMD_16x32 const&, Botan::SIMD_16x32 const&) Unexecuted instantiation: Botan::SIMD_16x32 Botan::SIMD_16x32::ternary_fn<(unsigned char)104>(Botan::SIMD_16x32 const&, Botan::SIMD_16x32 const&, Botan::SIMD_16x32 const&) Unexecuted instantiation: Botan::SIMD_16x32 Botan::SIMD_16x32::ternary_fn<(unsigned char)110>(Botan::SIMD_16x32 const&, Botan::SIMD_16x32 const&, Botan::SIMD_16x32 const&) Unexecuted instantiation: Botan::SIMD_16x32 Botan::SIMD_16x32::ternary_fn<(unsigned char)109>(Botan::SIMD_16x32 const&, Botan::SIMD_16x32 const&, Botan::SIMD_16x32 const&) Unexecuted instantiation: Botan::SIMD_16x32 Botan::SIMD_16x32::ternary_fn<(unsigned char)56>(Botan::SIMD_16x32 const&, Botan::SIMD_16x32 const&, Botan::SIMD_16x32 const&) Unexecuted instantiation: Botan::SIMD_16x32 Botan::SIMD_16x32::ternary_fn<(unsigned char)194>(Botan::SIMD_16x32 const&, Botan::SIMD_16x32 const&, Botan::SIMD_16x32 const&) Unexecuted instantiation: Botan::SIMD_16x32 Botan::SIMD_16x32::ternary_fn<(unsigned char)121>(Botan::SIMD_16x32 const&, Botan::SIMD_16x32 const&, Botan::SIMD_16x32 const&) Unexecuted instantiation: Botan::SIMD_16x32 Botan::SIMD_16x32::ternary_fn<(unsigned char)113>(Botan::SIMD_16x32 const&, Botan::SIMD_16x32 const&, Botan::SIMD_16x32 const&) Unexecuted instantiation: Botan::SIMD_16x32 Botan::SIMD_16x32::ternary_fn<(unsigned char)107>(Botan::SIMD_16x32 const&, Botan::SIMD_16x32 const&, Botan::SIMD_16x32 const&) Unexecuted instantiation: Botan::SIMD_16x32 Botan::SIMD_16x32::ternary_fn<(unsigned char)73>(Botan::SIMD_16x32 const&, Botan::SIMD_16x32 const&, Botan::SIMD_16x32 const&) Unexecuted instantiation: Botan::SIMD_16x32 Botan::SIMD_16x32::ternary_fn<(unsigned char)214>(Botan::SIMD_16x32 const&, Botan::SIMD_16x32 const&, Botan::SIMD_16x32 const&) Unexecuted instantiation: Botan::SIMD_16x32 Botan::SIMD_16x32::ternary_fn<(unsigned char)147>(Botan::SIMD_16x32 const&, Botan::SIMD_16x32 const&, Botan::SIMD_16x32 const&) Unexecuted instantiation: Botan::SIMD_16x32 Botan::SIMD_16x32::ternary_fn<(unsigned char)195>(Botan::SIMD_16x32 const&, Botan::SIMD_16x32 const&, Botan::SIMD_16x32 const&) Unexecuted instantiation: Botan::SIMD_16x32 Botan::SIMD_16x32::ternary_fn<(unsigned char)39>(Botan::SIMD_16x32 const&, Botan::SIMD_16x32 const&, Botan::SIMD_16x32 const&) Unexecuted instantiation: Botan::SIMD_16x32 Botan::SIMD_16x32::ternary_fn<(unsigned char)133>(Botan::SIMD_16x32 const&, Botan::SIMD_16x32 const&, Botan::SIMD_16x32 const&) Unexecuted instantiation: Botan::SIMD_16x32 Botan::SIMD_16x32::ternary_fn<(unsigned char)122>(Botan::SIMD_16x32 const&, Botan::SIMD_16x32 const&, Botan::SIMD_16x32 const&) Unexecuted instantiation: Botan::SIMD_16x32 Botan::SIMD_16x32::ternary_fn<(unsigned char)135>(Botan::SIMD_16x32 const&, Botan::SIMD_16x32 const&, Botan::SIMD_16x32 const&) Unexecuted instantiation: Botan::SIMD_16x32 Botan::SIMD_16x32::ternary_fn<(unsigned char)97>(Botan::SIMD_16x32 const&, Botan::SIMD_16x32 const&, Botan::SIMD_16x32 const&) Unexecuted instantiation: Botan::SIMD_16x32 Botan::SIMD_16x32::ternary_fn<(unsigned char)156>(Botan::SIMD_16x32 const&, Botan::SIMD_16x32 const&, Botan::SIMD_16x32 const&) Unexecuted instantiation: Botan::SIMD_16x32 Botan::SIMD_16x32::ternary_fn<(unsigned char)181>(Botan::SIMD_16x32 const&, Botan::SIMD_16x32 const&, Botan::SIMD_16x32 const&) Unexecuted instantiation: Botan::SIMD_16x32 Botan::SIMD_16x32::ternary_fn<(unsigned char)124>(Botan::SIMD_16x32 const&, Botan::SIMD_16x32 const&, Botan::SIMD_16x32 const&) Unexecuted instantiation: Botan::SIMD_16x32 Botan::SIMD_16x32::ternary_fn<(unsigned char)30>(Botan::SIMD_16x32 const&, Botan::SIMD_16x32 const&, Botan::SIMD_16x32 const&) Unexecuted instantiation: Botan::SIMD_16x32 Botan::SIMD_16x32::ternary_fn<(unsigned char)41>(Botan::SIMD_16x32 const&, Botan::SIMD_16x32 const&, Botan::SIMD_16x32 const&) Unexecuted instantiation: Botan::SIMD_16x32 Botan::SIMD_16x32::ternary_fn<(unsigned char)155>(Botan::SIMD_16x32 const&, Botan::SIMD_16x32 const&, Botan::SIMD_16x32 const&) Unexecuted instantiation: Botan::SIMD_16x32 Botan::SIMD_16x32::ternary_fn<(unsigned char)227>(Botan::SIMD_16x32 const&, Botan::SIMD_16x32 const&, Botan::SIMD_16x32 const&) Unexecuted instantiation: Botan::SIMD_16x32 Botan::SIMD_16x32::ternary_fn<(unsigned char)225>(Botan::SIMD_16x32 const&, Botan::SIMD_16x32 const&, Botan::SIMD_16x32 const&) Unexecuted instantiation: Botan::SIMD_16x32 Botan::SIMD_16x32::ternary_fn<(unsigned char)209>(Botan::SIMD_16x32 const&, Botan::SIMD_16x32 const&, Botan::SIMD_16x32 const&) Unexecuted instantiation: Botan::SIMD_16x32 Botan::SIMD_16x32::ternary_fn<(unsigned char)103>(Botan::SIMD_16x32 const&, Botan::SIMD_16x32 const&, Botan::SIMD_16x32 const&) Unexecuted instantiation: Botan::SIMD_16x32 Botan::SIMD_16x32::ternary_fn<(unsigned char)62>(Botan::SIMD_16x32 const&, Botan::SIMD_16x32 const&, Botan::SIMD_16x32 const&) Unexecuted instantiation: Botan::SIMD_16x32 Botan::SIMD_16x32::ternary_fn<(unsigned char)28>(Botan::SIMD_16x32 const&, Botan::SIMD_16x32 const&, Botan::SIMD_16x32 const&) Unexecuted instantiation: Botan::SIMD_16x32 Botan::SIMD_16x32::ternary_fn<(unsigned char)125>(Botan::SIMD_16x32 const&, Botan::SIMD_16x32 const&, Botan::SIMD_16x32 const&) Unexecuted instantiation: Botan::SIMD_16x32 Botan::SIMD_16x32::ternary_fn<(unsigned char)7>(Botan::SIMD_16x32 const&, Botan::SIMD_16x32 const&, Botan::SIMD_16x32 const&) Unexecuted instantiation: Botan::SIMD_16x32 Botan::SIMD_16x32::ternary_fn<(unsigned char)158>(Botan::SIMD_16x32 const&, Botan::SIMD_16x32 const&, Botan::SIMD_16x32 const&) Unexecuted instantiation: Botan::SIMD_16x32 Botan::SIMD_16x32::ternary_fn<(unsigned char)198>(Botan::SIMD_16x32 const&, Botan::SIMD_16x32 const&, Botan::SIMD_16x32 const&) Unexecuted instantiation: Botan::SIMD_16x32 Botan::SIMD_16x32::ternary_fn<(unsigned char)52>(Botan::SIMD_16x32 const&, Botan::SIMD_16x32 const&, Botan::SIMD_16x32 const&) Unexecuted instantiation: Botan::SIMD_16x32 Botan::SIMD_16x32::ternary_fn<(unsigned char)203>(Botan::SIMD_16x32 const&, Botan::SIMD_16x32 const&, Botan::SIMD_16x32 const&) Unexecuted instantiation: Botan::SIMD_16x32 Botan::SIMD_16x32::ternary_fn<(unsigned char)75>(Botan::SIMD_16x32 const&, Botan::SIMD_16x32 const&, Botan::SIMD_16x32 const&) Unexecuted instantiation: Botan::SIMD_16x32 Botan::SIMD_16x32::ternary_fn<(unsigned char)201>(Botan::SIMD_16x32 const&, Botan::SIMD_16x32 const&, Botan::SIMD_16x32 const&) Unexecuted instantiation: Botan::SIMD_16x32 Botan::SIMD_16x32::ternary_fn<(unsigned char)37>(Botan::SIMD_16x32 const&, Botan::SIMD_16x32 const&, Botan::SIMD_16x32 const&) Unexecuted instantiation: Botan::SIMD_16x32 Botan::SIMD_16x32::ternary_fn<(unsigned char)99>(Botan::SIMD_16x32 const&, Botan::SIMD_16x32 const&, Botan::SIMD_16x32 const&) Unexecuted instantiation: Botan::SIMD_16x32 Botan::SIMD_16x32::ternary_fn<(unsigned char)134>(Botan::SIMD_16x32 const&, Botan::SIMD_16x32 const&, Botan::SIMD_16x32 const&) Unexecuted instantiation: Botan::SIMD_16x32 Botan::SIMD_16x32::ternary_fn<(unsigned char)71>(Botan::SIMD_16x32 const&, Botan::SIMD_16x32 const&, Botan::SIMD_16x32 const&) Unexecuted instantiation: Botan::SIMD_16x32 Botan::SIMD_16x32::ternary_fn<(unsigned char)94>(Botan::SIMD_16x32 const&, Botan::SIMD_16x32 const&, Botan::SIMD_16x32 const&) Unexecuted instantiation: Botan::SIMD_16x32 Botan::SIMD_16x32::ternary_fn<(unsigned char)80>(Botan::SIMD_16x32 const&, Botan::SIMD_16x32 const&, Botan::SIMD_16x32 const&) Unexecuted instantiation: Botan::SIMD_16x32 Botan::SIMD_16x32::ternary_fn<(unsigned char)90>(Botan::SIMD_16x32 const&, Botan::SIMD_16x32 const&, Botan::SIMD_16x32 const&) Unexecuted instantiation: Botan::SIMD_16x32 Botan::SIMD_16x32::ternary_fn<(unsigned char)153>(Botan::SIMD_16x32 const&, Botan::SIMD_16x32 const&, Botan::SIMD_16x32 const&) Unexecuted instantiation: Botan::SIMD_16x32 Botan::SIMD_16x32::ternary_fn<(unsigned char)151>(Botan::SIMD_16x32 const&, Botan::SIMD_16x32 const&, Botan::SIMD_16x32 const&) Unexecuted instantiation: Botan::SIMD_16x32 Botan::SIMD_16x32::ternary_fn<(unsigned char)14>(Botan::SIMD_16x32 const&, Botan::SIMD_16x32 const&, Botan::SIMD_16x32 const&) Unexecuted instantiation: Botan::SIMD_16x32 Botan::SIMD_16x32::ternary_fn<(unsigned char)180>(Botan::SIMD_16x32 const&, Botan::SIMD_16x32 const&, Botan::SIMD_16x32 const&) Unexecuted instantiation: Botan::SIMD_16x32 Botan::SIMD_16x32::ternary_fn<(unsigned char)91>(Botan::SIMD_16x32 const&, Botan::SIMD_16x32 const&, Botan::SIMD_16x32 const&) Unexecuted instantiation: Botan::SIMD_16x32 Botan::SIMD_16x32::ternary_fn<(unsigned char)115>(Botan::SIMD_16x32 const&, Botan::SIMD_16x32 const&, Botan::SIMD_16x32 const&) Unexecuted instantiation: Botan::SIMD_16x32 Botan::SIMD_16x32::ternary_fn<(unsigned char)197>(Botan::SIMD_16x32 const&, Botan::SIMD_16x32 const&, Botan::SIMD_16x32 const&) Unexecuted instantiation: Botan::SIMD_16x32 Botan::SIMD_16x32::ternary_fn<(unsigned char)77>(Botan::SIMD_16x32 const&, Botan::SIMD_16x32 const&, Botan::SIMD_16x32 const&) Unexecuted instantiation: Botan::SIMD_16x32 Botan::SIMD_16x32::ternary_fn<(unsigned char)86>(Botan::SIMD_16x32 const&, Botan::SIMD_16x32 const&, Botan::SIMD_16x32 const&) Unexecuted instantiation: Botan::SIMD_16x32 Botan::SIMD_16x32::ternary_fn<(unsigned char)202>(Botan::SIMD_16x32 const&, Botan::SIMD_16x32 const&, Botan::SIMD_16x32 const&) Unexecuted instantiation: Botan::SIMD_16x32 Botan::SIMD_16x32::ternary_fn<(unsigned char)232>(Botan::SIMD_16x32 const&, Botan::SIMD_16x32 const&, Botan::SIMD_16x32 const&) |
170 | | |
171 | | BOTAN_AVX512_FN |
172 | 0 | SIMD_16x32 bswap() const { |
173 | 0 | const uint8_t BSWAP_MASK[64] = { |
174 | 0 | 3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12, 19, 18, 17, 16, 23, 22, |
175 | 0 | 21, 20, 27, 26, 25, 24, 31, 30, 29, 28, 35, 34, 33, 32, 39, 38, 37, 36, 43, 42, 41, 40, |
176 | 0 | 47, 46, 45, 44, 51, 50, 49, 48, 55, 54, 53, 52, 59, 58, 57, 56, 63, 62, 61, 60, |
177 | 0 | }; |
178 | 0 |
|
179 | 0 | const __m512i bswap = _mm512_loadu_si512(reinterpret_cast<const __m512i*>(BSWAP_MASK)); |
180 | 0 |
|
181 | 0 | const __m512i output = _mm512_shuffle_epi8(m_avx512, bswap); |
182 | 0 |
|
183 | 0 | return SIMD_16x32(output); |
184 | 0 | } |
185 | | |
186 | | BOTAN_AVX512_FN |
187 | 0 | static void transpose(SIMD_16x32& B0, SIMD_16x32& B1, SIMD_16x32& B2, SIMD_16x32& B3) { |
188 | 0 | const __m512i T0 = _mm512_unpacklo_epi32(B0.m_avx512, B1.m_avx512); |
189 | 0 | const __m512i T1 = _mm512_unpacklo_epi32(B2.m_avx512, B3.m_avx512); |
190 | 0 | const __m512i T2 = _mm512_unpackhi_epi32(B0.m_avx512, B1.m_avx512); |
191 | 0 | const __m512i T3 = _mm512_unpackhi_epi32(B2.m_avx512, B3.m_avx512); |
192 | |
|
193 | 0 | B0.m_avx512 = _mm512_unpacklo_epi64(T0, T1); |
194 | 0 | B1.m_avx512 = _mm512_unpackhi_epi64(T0, T1); |
195 | 0 | B2.m_avx512 = _mm512_unpacklo_epi64(T2, T3); |
196 | 0 | B3.m_avx512 = _mm512_unpackhi_epi64(T2, T3); |
197 | 0 | } |
198 | | |
199 | | BOTAN_AVX512_FN |
200 | | static void transpose(SIMD_16x32& B0, |
201 | | SIMD_16x32& B1, |
202 | | SIMD_16x32& B2, |
203 | | SIMD_16x32& B3, |
204 | | SIMD_16x32& B4, |
205 | | SIMD_16x32& B5, |
206 | | SIMD_16x32& B6, |
207 | | SIMD_16x32& B7, |
208 | | SIMD_16x32& B8, |
209 | | SIMD_16x32& B9, |
210 | | SIMD_16x32& BA, |
211 | | SIMD_16x32& BB, |
212 | | SIMD_16x32& BC, |
213 | | SIMD_16x32& BD, |
214 | | SIMD_16x32& BE, |
215 | 0 | SIMD_16x32& BF) { |
216 | 0 | auto t0 = _mm512_unpacklo_epi32(B0.raw(), B1.raw()); |
217 | 0 | auto t1 = _mm512_unpackhi_epi32(B0.raw(), B1.raw()); |
218 | 0 | auto t2 = _mm512_unpacklo_epi32(B2.raw(), B3.raw()); |
219 | 0 | auto t3 = _mm512_unpackhi_epi32(B2.raw(), B3.raw()); |
220 | 0 | auto t4 = _mm512_unpacklo_epi32(B4.raw(), B5.raw()); |
221 | 0 | auto t5 = _mm512_unpackhi_epi32(B4.raw(), B5.raw()); |
222 | 0 | auto t6 = _mm512_unpacklo_epi32(B6.raw(), B7.raw()); |
223 | 0 | auto t7 = _mm512_unpackhi_epi32(B6.raw(), B7.raw()); |
224 | 0 | auto t8 = _mm512_unpacklo_epi32(B8.raw(), B9.raw()); |
225 | 0 | auto t9 = _mm512_unpackhi_epi32(B8.raw(), B9.raw()); |
226 | 0 | auto ta = _mm512_unpacklo_epi32(BA.raw(), BB.raw()); |
227 | 0 | auto tb = _mm512_unpackhi_epi32(BA.raw(), BB.raw()); |
228 | 0 | auto tc = _mm512_unpacklo_epi32(BC.raw(), BD.raw()); |
229 | 0 | auto td = _mm512_unpackhi_epi32(BC.raw(), BD.raw()); |
230 | 0 | auto te = _mm512_unpacklo_epi32(BE.raw(), BF.raw()); |
231 | 0 | auto tf = _mm512_unpackhi_epi32(BE.raw(), BF.raw()); |
232 | |
|
233 | 0 | auto r0 = _mm512_unpacklo_epi64(t0, t2); |
234 | 0 | auto r1 = _mm512_unpackhi_epi64(t0, t2); |
235 | 0 | auto r2 = _mm512_unpacklo_epi64(t1, t3); |
236 | 0 | auto r3 = _mm512_unpackhi_epi64(t1, t3); |
237 | 0 | auto r4 = _mm512_unpacklo_epi64(t4, t6); |
238 | 0 | auto r5 = _mm512_unpackhi_epi64(t4, t6); |
239 | 0 | auto r6 = _mm512_unpacklo_epi64(t5, t7); |
240 | 0 | auto r7 = _mm512_unpackhi_epi64(t5, t7); |
241 | 0 | auto r8 = _mm512_unpacklo_epi64(t8, ta); |
242 | 0 | auto r9 = _mm512_unpackhi_epi64(t8, ta); |
243 | 0 | auto ra = _mm512_unpacklo_epi64(t9, tb); |
244 | 0 | auto rb = _mm512_unpackhi_epi64(t9, tb); |
245 | 0 | auto rc = _mm512_unpacklo_epi64(tc, te); |
246 | 0 | auto rd = _mm512_unpackhi_epi64(tc, te); |
247 | 0 | auto re = _mm512_unpacklo_epi64(td, tf); |
248 | 0 | auto rf = _mm512_unpackhi_epi64(td, tf); |
249 | |
|
250 | 0 | t0 = _mm512_shuffle_i32x4(r0, r4, 0x88); |
251 | 0 | t1 = _mm512_shuffle_i32x4(r1, r5, 0x88); |
252 | 0 | t2 = _mm512_shuffle_i32x4(r2, r6, 0x88); |
253 | 0 | t3 = _mm512_shuffle_i32x4(r3, r7, 0x88); |
254 | 0 | t4 = _mm512_shuffle_i32x4(r0, r4, 0xdd); |
255 | 0 | t5 = _mm512_shuffle_i32x4(r1, r5, 0xdd); |
256 | 0 | t6 = _mm512_shuffle_i32x4(r2, r6, 0xdd); |
257 | 0 | t7 = _mm512_shuffle_i32x4(r3, r7, 0xdd); |
258 | 0 | t8 = _mm512_shuffle_i32x4(r8, rc, 0x88); |
259 | 0 | t9 = _mm512_shuffle_i32x4(r9, rd, 0x88); |
260 | 0 | ta = _mm512_shuffle_i32x4(ra, re, 0x88); |
261 | 0 | tb = _mm512_shuffle_i32x4(rb, rf, 0x88); |
262 | 0 | tc = _mm512_shuffle_i32x4(r8, rc, 0xdd); |
263 | 0 | td = _mm512_shuffle_i32x4(r9, rd, 0xdd); |
264 | 0 | te = _mm512_shuffle_i32x4(ra, re, 0xdd); |
265 | 0 | tf = _mm512_shuffle_i32x4(rb, rf, 0xdd); |
266 | |
|
267 | 0 | B0.m_avx512 = _mm512_shuffle_i32x4(t0, t8, 0x88); |
268 | 0 | B1.m_avx512 = _mm512_shuffle_i32x4(t1, t9, 0x88); |
269 | 0 | B2.m_avx512 = _mm512_shuffle_i32x4(t2, ta, 0x88); |
270 | 0 | B3.m_avx512 = _mm512_shuffle_i32x4(t3, tb, 0x88); |
271 | 0 | B4.m_avx512 = _mm512_shuffle_i32x4(t4, tc, 0x88); |
272 | 0 | B5.m_avx512 = _mm512_shuffle_i32x4(t5, td, 0x88); |
273 | 0 | B6.m_avx512 = _mm512_shuffle_i32x4(t6, te, 0x88); |
274 | 0 | B7.m_avx512 = _mm512_shuffle_i32x4(t7, tf, 0x88); |
275 | 0 | B8.m_avx512 = _mm512_shuffle_i32x4(t0, t8, 0xdd); |
276 | 0 | B9.m_avx512 = _mm512_shuffle_i32x4(t1, t9, 0xdd); |
277 | 0 | BA.m_avx512 = _mm512_shuffle_i32x4(t2, ta, 0xdd); |
278 | 0 | BB.m_avx512 = _mm512_shuffle_i32x4(t3, tb, 0xdd); |
279 | 0 | BC.m_avx512 = _mm512_shuffle_i32x4(t4, tc, 0xdd); |
280 | 0 | BD.m_avx512 = _mm512_shuffle_i32x4(t5, td, 0xdd); |
281 | 0 | BE.m_avx512 = _mm512_shuffle_i32x4(t6, te, 0xdd); |
282 | 0 | BF.m_avx512 = _mm512_shuffle_i32x4(t7, tf, 0xdd); |
283 | 0 | } |
284 | | |
285 | | BOTAN_AVX512_FN |
286 | 0 | static SIMD_16x32 choose(const SIMD_16x32& mask, const SIMD_16x32& a, const SIMD_16x32& b) { |
287 | 0 | return SIMD_16x32::ternary_fn<0xca>(mask, a, b); |
288 | 0 | } |
289 | | |
290 | | BOTAN_AVX512_FN |
291 | 0 | static SIMD_16x32 majority(const SIMD_16x32& x, const SIMD_16x32& y, const SIMD_16x32& z) { |
292 | 0 | return SIMD_16x32::ternary_fn<0xe8>(x, y, z); |
293 | 0 | } |
294 | | |
295 | 0 | BOTAN_FUNC_ISA("avx2") static void zero_registers() { |
296 | | // Unfortunately this only zeros zmm0-zmm15 and not zmm16-zmm32 |
297 | 0 | _mm256_zeroall(); |
298 | 0 | } |
299 | | |
300 | 0 | __m512i BOTAN_AVX512_FN raw() const { return m_avx512; } |
301 | | |
302 | | BOTAN_AVX512_FN |
303 | 0 | SIMD_16x32(__m512i x) : m_avx512(x) {} |
304 | | |
305 | | private: |
306 | | __m512i m_avx512; |
307 | | }; |
308 | | |
309 | | template <size_t R> |
310 | 0 | inline SIMD_16x32 rotl(SIMD_16x32 input) { |
311 | 0 | return input.rotl<R>(); |
312 | 0 | } Unexecuted instantiation: Botan::SIMD_16x32 Botan::rotl<13ul>(Botan::SIMD_16x32) Unexecuted instantiation: Botan::SIMD_16x32 Botan::rotl<3ul>(Botan::SIMD_16x32) Unexecuted instantiation: Botan::SIMD_16x32 Botan::rotl<1ul>(Botan::SIMD_16x32) Unexecuted instantiation: Botan::SIMD_16x32 Botan::rotl<7ul>(Botan::SIMD_16x32) Unexecuted instantiation: Botan::SIMD_16x32 Botan::rotl<5ul>(Botan::SIMD_16x32) Unexecuted instantiation: Botan::SIMD_16x32 Botan::rotl<22ul>(Botan::SIMD_16x32) |
313 | | |
314 | | template <size_t R> |
315 | 0 | inline SIMD_16x32 rotr(SIMD_16x32 input) { |
316 | 0 | return input.rotr<R>(); |
317 | 0 | } Unexecuted instantiation: Botan::SIMD_16x32 Botan::rotr<22ul>(Botan::SIMD_16x32) Unexecuted instantiation: Botan::SIMD_16x32 Botan::rotr<5ul>(Botan::SIMD_16x32) Unexecuted instantiation: Botan::SIMD_16x32 Botan::rotr<7ul>(Botan::SIMD_16x32) Unexecuted instantiation: Botan::SIMD_16x32 Botan::rotr<1ul>(Botan::SIMD_16x32) Unexecuted instantiation: Botan::SIMD_16x32 Botan::rotr<3ul>(Botan::SIMD_16x32) Unexecuted instantiation: Botan::SIMD_16x32 Botan::rotr<13ul>(Botan::SIMD_16x32) |
318 | | |
319 | | // For Serpent: |
320 | | template <size_t S> |
321 | 0 | inline SIMD_16x32 shl(SIMD_16x32 input) { |
322 | 0 | return input.shl<S>(); |
323 | 0 | } Unexecuted instantiation: Botan::SIMD_16x32 Botan::shl<3ul>(Botan::SIMD_16x32) Unexecuted instantiation: Botan::SIMD_16x32 Botan::shl<7ul>(Botan::SIMD_16x32) |
324 | | |
325 | | } // namespace Botan |
326 | | |
327 | | #endif |