/src/botan/src/lib/modes/aead/gcm/clmul_ssse3/clmul_ssse3.cpp
Line | Count | Source (jump to first uncovered line) |
1 | | /* |
2 | | * (C) 2017 Jack Lloyd |
3 | | * |
4 | | * Botan is released under the Simplified BSD License (see license.txt) |
5 | | */ |
6 | | |
7 | | #include <botan/internal/clmul_ssse3.h> |
8 | | #include <immintrin.h> |
9 | | |
10 | | namespace Botan { |
11 | | |
12 | | BOTAN_FUNC_ISA("ssse3") |
13 | | void gcm_multiply_ssse3(uint8_t x[16], |
14 | | const uint64_t HM[256], |
15 | | const uint8_t input_bytes[], size_t blocks) |
16 | 0 | { |
17 | 0 | const __m128i BSWAP_MASK = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); |
18 | 0 |
|
19 | 0 | const __m128i* HM_mm = reinterpret_cast<const __m128i*>(HM); |
20 | 0 |
|
21 | 0 | __m128i X = _mm_loadu_si128(reinterpret_cast<__m128i*>(x)); |
22 | 0 | X = _mm_shuffle_epi8(X, BSWAP_MASK); |
23 | 0 |
|
24 | 0 | const __m128i ones = _mm_set1_epi8(-1); |
25 | 0 |
|
26 | 0 | for(size_t b = 0; b != blocks; ++b) |
27 | 0 | { |
28 | 0 | __m128i M = _mm_loadu_si128(reinterpret_cast<const __m128i*>(input_bytes) + b); |
29 | 0 | M = _mm_shuffle_epi8(M, BSWAP_MASK); |
30 | 0 |
|
31 | 0 | X = _mm_xor_si128(X, M); |
32 | 0 |
|
33 | 0 | __m128i Z = _mm_setzero_si128(); |
34 | 0 |
|
35 | 0 | for(size_t i = 0; i != 64; i += 2) |
36 | 0 | { |
37 | 0 | const __m128i HM0 = _mm_loadu_si128(HM_mm + 2*i); |
38 | 0 | const __m128i HM1 = _mm_loadu_si128(HM_mm + 2*i + 1); |
39 | 0 | const __m128i HM2 = _mm_loadu_si128(HM_mm + 2*i + 2); |
40 | 0 | const __m128i HM3 = _mm_loadu_si128(HM_mm + 2*i + 3); |
41 | 0 |
|
42 | 0 | const __m128i XMASK1 = _mm_add_epi64(_mm_srli_epi64(X, 63), ones); |
43 | 0 | X = _mm_slli_epi64(X, 1); |
44 | 0 | const __m128i XMASK2 = _mm_add_epi64(_mm_srli_epi64(X, 63), ones); |
45 | 0 | X = _mm_slli_epi64(X, 1); |
46 | 0 |
|
47 | 0 | Z = _mm_xor_si128(Z, _mm_andnot_si128(_mm_unpackhi_epi64(XMASK1, XMASK1), HM0)); |
48 | 0 | Z = _mm_xor_si128(Z, _mm_andnot_si128(_mm_unpacklo_epi64(XMASK1, XMASK1), HM1)); |
49 | 0 | Z = _mm_xor_si128(Z, _mm_andnot_si128(_mm_unpackhi_epi64(XMASK2, XMASK2), HM2)); |
50 | 0 | Z = _mm_xor_si128(Z, _mm_andnot_si128(_mm_unpacklo_epi64(XMASK2, XMASK2), HM3)); |
51 | 0 | } |
52 | 0 |
|
53 | 0 | X = _mm_shuffle_epi32(Z, _MM_SHUFFLE(1, 0, 3, 2)); |
54 | 0 | } |
55 | 0 |
|
56 | 0 | X = _mm_shuffle_epi8(X, BSWAP_MASK); |
57 | 0 | _mm_storeu_si128(reinterpret_cast<__m128i*>(x), X); |
58 | 0 | } |
59 | | |
60 | | } |