/src/nss-nspr/nss/lib/freebl/gcm-x86.c
Line | Count | Source |
1 | | /* This Source Code Form is subject to the terms of the Mozilla Public |
2 | | * License, v. 2.0. If a copy of the MPL was not distributed with this |
3 | | * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ |
4 | | |
5 | | #ifdef FREEBL_NO_DEPEND |
6 | | #include "stubs.h" |
7 | | #endif |
8 | | #include "gcm.h" |
9 | | #include "secerr.h" |
10 | | |
11 | | #include <wmmintrin.h> /* clmul */ |
12 | | |
13 | | #define WRITE64(x, bytes) \ |
14 | 4 | (bytes)[0] = (x) >> 56; \ |
15 | 4 | (bytes)[1] = (x) >> 48; \ |
16 | 4 | (bytes)[2] = (x) >> 40; \ |
17 | 4 | (bytes)[3] = (x) >> 32; \ |
18 | 4 | (bytes)[4] = (x) >> 24; \ |
19 | 4 | (bytes)[5] = (x) >> 16; \ |
20 | 4 | (bytes)[6] = (x) >> 8; \ |
21 | 4 | (bytes)[7] = (x); |
22 | | |
23 | | SECStatus |
24 | | gcm_HashWrite_hw(gcmHashContext *ghash, unsigned char *outbuf) |
25 | 2 | { |
26 | 2 | uint64_t tmp_out[2]; |
27 | 2 | _mm_storeu_si128((__m128i *)tmp_out, ghash->x); |
28 | | /* maxout must be larger than 16 byte (checked by the caller). */ |
29 | 2 | WRITE64(tmp_out[0], outbuf + 8); |
30 | 2 | WRITE64(tmp_out[1], outbuf); |
31 | 2 | return SECSuccess; |
32 | 2 | } |
33 | | |
34 | | SECStatus |
35 | | gcm_HashMult_hw(gcmHashContext *ghash, const unsigned char *buf, |
36 | | unsigned int count) |
37 | 5 | { |
38 | 5 | size_t i; |
39 | 5 | pre_align __m128i z_high post_align; |
40 | 5 | pre_align __m128i z_low post_align; |
41 | 5 | pre_align __m128i C post_align; |
42 | 5 | pre_align __m128i D post_align; |
43 | 5 | pre_align __m128i E post_align; |
44 | 5 | pre_align __m128i F post_align; |
45 | 5 | pre_align __m128i bin post_align; |
46 | 5 | pre_align __m128i Ci post_align; |
47 | 5 | pre_align __m128i tmp post_align; |
48 | | |
49 | 14 | for (i = 0; i < count; i++, buf += 16) { |
50 | 9 | bin = _mm_set_epi16(((uint16_t)buf[0] << 8) | buf[1], |
51 | 9 | ((uint16_t)buf[2] << 8) | buf[3], |
52 | 9 | ((uint16_t)buf[4] << 8) | buf[5], |
53 | 9 | ((uint16_t)buf[6] << 8) | buf[7], |
54 | 9 | ((uint16_t)buf[8] << 8) | buf[9], |
55 | 9 | ((uint16_t)buf[10] << 8) | buf[11], |
56 | 9 | ((uint16_t)buf[12] << 8) | buf[13], |
57 | 9 | ((uint16_t)buf[14] << 8) | buf[15]); |
58 | 9 | Ci = _mm_xor_si128(bin, ghash->x); |
59 | | |
60 | | /* Do binary mult ghash->X = Ci * ghash->H. */ |
61 | 9 | C = _mm_clmulepi64_si128(Ci, ghash->h, 0x00); |
62 | 9 | D = _mm_clmulepi64_si128(Ci, ghash->h, 0x11); |
63 | 9 | E = _mm_clmulepi64_si128(Ci, ghash->h, 0x01); |
64 | 9 | F = _mm_clmulepi64_si128(Ci, ghash->h, 0x10); |
65 | 9 | tmp = _mm_xor_si128(E, F); |
66 | 9 | z_high = _mm_xor_si128(tmp, _mm_slli_si128(D, 8)); |
67 | 9 | z_high = _mm_unpackhi_epi64(z_high, D); |
68 | 9 | z_low = _mm_xor_si128(_mm_slli_si128(tmp, 8), C); |
69 | 9 | z_low = _mm_unpackhi_epi64(_mm_slli_si128(C, 8), z_low); |
70 | | |
71 | | /* Shift one to the left (multiply by x) as gcm spec is stupid. */ |
72 | 9 | C = _mm_slli_si128(z_low, 8); |
73 | 9 | E = _mm_srli_epi64(C, 63); |
74 | 9 | D = _mm_slli_si128(z_high, 8); |
75 | 9 | F = _mm_srli_epi64(D, 63); |
76 | | /* Carry over */ |
77 | 9 | C = _mm_srli_si128(z_low, 8); |
78 | 9 | D = _mm_srli_epi64(C, 63); |
79 | 9 | z_low = _mm_or_si128(_mm_slli_epi64(z_low, 1), E); |
80 | 9 | z_high = _mm_or_si128(_mm_or_si128(_mm_slli_epi64(z_high, 1), F), D); |
81 | | |
82 | | /* Reduce */ |
83 | 9 | C = _mm_slli_si128(z_low, 8); |
84 | | /* D = z_low << 127 */ |
85 | 9 | D = _mm_slli_epi64(C, 63); |
86 | | /* E = z_low << 126 */ |
87 | 9 | E = _mm_slli_epi64(C, 62); |
88 | | /* F = z_low << 121 */ |
89 | 9 | F = _mm_slli_epi64(C, 57); |
90 | | /* z_low ^= (z_low << 127) ^ (z_low << 126) ^ (z_low << 121); */ |
91 | 9 | z_low = _mm_xor_si128(_mm_xor_si128(_mm_xor_si128(z_low, D), E), F); |
92 | 9 | C = _mm_srli_si128(z_low, 8); |
93 | | /* D = z_low >> 1 */ |
94 | 9 | D = _mm_slli_epi64(C, 63); |
95 | 9 | D = _mm_or_si128(_mm_srli_epi64(z_low, 1), D); |
96 | | /* E = z_low >> 2 */ |
97 | 9 | E = _mm_slli_epi64(C, 62); |
98 | 9 | E = _mm_or_si128(_mm_srli_epi64(z_low, 2), E); |
99 | | /* F = z_low >> 7 */ |
100 | 9 | F = _mm_slli_epi64(C, 57); |
101 | 9 | F = _mm_or_si128(_mm_srli_epi64(z_low, 7), F); |
102 | | /* ghash->x ^= z_low ^ (z_low >> 1) ^ (z_low >> 2) ^ (z_low >> 7); */ |
103 | 9 | ghash->x = _mm_xor_si128(_mm_xor_si128( |
104 | 9 | _mm_xor_si128(_mm_xor_si128(z_high, z_low), D), E), |
105 | 9 | F); |
106 | 9 | } |
107 | 5 | return SECSuccess; |
108 | 5 | } |
109 | | |
110 | | SECStatus |
111 | | gcm_HashInit_hw(gcmHashContext *ghash) |
112 | 12 | { |
113 | 12 | ghash->ghash_mul = gcm_HashMult_hw; |
114 | 12 | ghash->x = _mm_setzero_si128(); |
115 | | /* MSVC requires __m64 to load epi64. */ |
116 | 12 | ghash->h = _mm_set_epi32(ghash->h_high >> 32, (uint32_t)ghash->h_high, |
117 | 12 | ghash->h_low >> 32, (uint32_t)ghash->h_low); |
118 | 12 | ghash->hw = PR_TRUE; |
119 | 12 | return SECSuccess; |
120 | 12 | } |
121 | | |
122 | | SECStatus |
123 | | gcm_HashZeroX_hw(gcmHashContext *ghash) |
124 | 2 | { |
125 | 2 | ghash->x = _mm_setzero_si128(); |
126 | 2 | return SECSuccess; |
127 | 2 | } |