/src/botan/src/lib/utils/ghash/ghash_vperm/ghash_vperm.cpp
Line | Count | Source (jump to first uncovered line) |
1 | | /* |
2 | | * (C) 2017 Jack Lloyd |
3 | | * |
4 | | * Botan is released under the Simplified BSD License (see license.txt) |
5 | | */ |
6 | | |
7 | | #include <botan/internal/ghash.h> |
8 | | #include <immintrin.h> |
9 | | |
10 | | namespace Botan { |
11 | | |
12 | | // TODO: extend this to support NEON and AltiVec |
13 | | |
14 | | BOTAN_FUNC_ISA("ssse3") |
15 | | void GHASH::ghash_multiply_vperm(uint8_t x[16], |
16 | | const uint64_t HM[256], |
17 | | const uint8_t input_bytes[], size_t blocks) |
18 | 0 | { |
19 | 0 | const __m128i BSWAP_MASK = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); |
20 | |
|
21 | 0 | const __m128i* HM_mm = reinterpret_cast<const __m128i*>(HM); |
22 | |
|
23 | 0 | __m128i X = _mm_loadu_si128(reinterpret_cast<__m128i*>(x)); |
24 | 0 | X = _mm_shuffle_epi8(X, BSWAP_MASK); |
25 | |
|
26 | 0 | const __m128i ones = _mm_set1_epi8(-1); |
27 | |
|
28 | 0 | for(size_t b = 0; b != blocks; ++b) |
29 | 0 | { |
30 | 0 | __m128i M = _mm_loadu_si128(reinterpret_cast<const __m128i*>(input_bytes) + b); |
31 | 0 | M = _mm_shuffle_epi8(M, BSWAP_MASK); |
32 | |
|
33 | 0 | X = _mm_xor_si128(X, M); |
34 | |
|
35 | 0 | __m128i Z = _mm_setzero_si128(); |
36 | |
|
37 | 0 | for(size_t i = 0; i != 64; i += 2) |
38 | 0 | { |
39 | 0 | const __m128i HM0 = _mm_loadu_si128(HM_mm + 2*i); |
40 | 0 | const __m128i HM1 = _mm_loadu_si128(HM_mm + 2*i + 1); |
41 | 0 | const __m128i HM2 = _mm_loadu_si128(HM_mm + 2*i + 2); |
42 | 0 | const __m128i HM3 = _mm_loadu_si128(HM_mm + 2*i + 3); |
43 | |
|
44 | 0 | const __m128i XMASK1 = _mm_add_epi64(_mm_srli_epi64(X, 63), ones); |
45 | 0 | X = _mm_slli_epi64(X, 1); |
46 | 0 | const __m128i XMASK2 = _mm_add_epi64(_mm_srli_epi64(X, 63), ones); |
47 | 0 | X = _mm_slli_epi64(X, 1); |
48 | |
|
49 | 0 | Z = _mm_xor_si128(Z, _mm_andnot_si128(_mm_unpackhi_epi64(XMASK1, XMASK1), HM0)); |
50 | 0 | Z = _mm_xor_si128(Z, _mm_andnot_si128(_mm_unpacklo_epi64(XMASK1, XMASK1), HM1)); |
51 | 0 | Z = _mm_xor_si128(Z, _mm_andnot_si128(_mm_unpackhi_epi64(XMASK2, XMASK2), HM2)); |
52 | 0 | Z = _mm_xor_si128(Z, _mm_andnot_si128(_mm_unpacklo_epi64(XMASK2, XMASK2), HM3)); |
53 | 0 | } |
54 | |
|
55 | 0 | X = _mm_shuffle_epi32(Z, _MM_SHUFFLE(1, 0, 3, 2)); |
56 | 0 | } |
57 | |
|
58 | 0 | X = _mm_shuffle_epi8(X, BSWAP_MASK); |
59 | 0 | _mm_storeu_si128(reinterpret_cast<__m128i*>(x), X); |
60 | 0 | } |
61 | | |
62 | | } |