Coverage Report

Created: 2024-11-21 07:03

/src/nss-nspr/nss/lib/freebl/gcm-x86.c
Line
Count
Source
1
/* This Source Code Form is subject to the terms of the Mozilla Public
2
 * License, v. 2.0. If a copy of the MPL was not distributed with this
3
 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
4
5
#ifdef FREEBL_NO_DEPEND
6
#include "stubs.h"
7
#endif
8
#include "gcm.h"
9
#include "secerr.h"
10
11
#include <wmmintrin.h> /* clmul */
12
13
#define WRITE64(x, bytes)   \
14
4
    (bytes)[0] = (x) >> 56; \
15
4
    (bytes)[1] = (x) >> 48; \
16
4
    (bytes)[2] = (x) >> 40; \
17
4
    (bytes)[3] = (x) >> 32; \
18
4
    (bytes)[4] = (x) >> 24; \
19
4
    (bytes)[5] = (x) >> 16; \
20
4
    (bytes)[6] = (x) >> 8;  \
21
4
    (bytes)[7] = (x);
22
23
SECStatus
24
gcm_HashWrite_hw(gcmHashContext *ghash, unsigned char *outbuf)
25
2
{
26
2
    uint64_t tmp_out[2];
27
2
    _mm_storeu_si128((__m128i *)tmp_out, ghash->x);
28
    /* maxout must be larger than 16 byte (checked by the caller). */
29
2
    WRITE64(tmp_out[0], outbuf + 8);
30
2
    WRITE64(tmp_out[1], outbuf);
31
2
    return SECSuccess;
32
2
}
33
34
SECStatus
35
gcm_HashMult_hw(gcmHashContext *ghash, const unsigned char *buf,
36
                unsigned int count)
37
5
{
38
5
    size_t i;
39
5
    pre_align __m128i z_high post_align;
40
5
    pre_align __m128i z_low post_align;
41
5
    pre_align __m128i C post_align;
42
5
    pre_align __m128i D post_align;
43
5
    pre_align __m128i E post_align;
44
5
    pre_align __m128i F post_align;
45
5
    pre_align __m128i bin post_align;
46
5
    pre_align __m128i Ci post_align;
47
5
    pre_align __m128i tmp post_align;
48
49
14
    for (i = 0; i < count; i++, buf += 16) {
50
9
        bin = _mm_set_epi16(((uint16_t)buf[0] << 8) | buf[1],
51
9
                            ((uint16_t)buf[2] << 8) | buf[3],
52
9
                            ((uint16_t)buf[4] << 8) | buf[5],
53
9
                            ((uint16_t)buf[6] << 8) | buf[7],
54
9
                            ((uint16_t)buf[8] << 8) | buf[9],
55
9
                            ((uint16_t)buf[10] << 8) | buf[11],
56
9
                            ((uint16_t)buf[12] << 8) | buf[13],
57
9
                            ((uint16_t)buf[14] << 8) | buf[15]);
58
9
        Ci = _mm_xor_si128(bin, ghash->x);
59
60
        /* Do binary mult ghash->X = Ci * ghash->H. */
61
9
        C = _mm_clmulepi64_si128(Ci, ghash->h, 0x00);
62
9
        D = _mm_clmulepi64_si128(Ci, ghash->h, 0x11);
63
9
        E = _mm_clmulepi64_si128(Ci, ghash->h, 0x01);
64
9
        F = _mm_clmulepi64_si128(Ci, ghash->h, 0x10);
65
9
        tmp = _mm_xor_si128(E, F);
66
9
        z_high = _mm_xor_si128(tmp, _mm_slli_si128(D, 8));
67
9
        z_high = _mm_unpackhi_epi64(z_high, D);
68
9
        z_low = _mm_xor_si128(_mm_slli_si128(tmp, 8), C);
69
9
        z_low = _mm_unpackhi_epi64(_mm_slli_si128(C, 8), z_low);
70
71
        /* Shift one to the left (multiply by x) as gcm spec is stupid. */
72
9
        C = _mm_slli_si128(z_low, 8);
73
9
        E = _mm_srli_epi64(C, 63);
74
9
        D = _mm_slli_si128(z_high, 8);
75
9
        F = _mm_srli_epi64(D, 63);
76
        /* Carry over */
77
9
        C = _mm_srli_si128(z_low, 8);
78
9
        D = _mm_srli_epi64(C, 63);
79
9
        z_low = _mm_or_si128(_mm_slli_epi64(z_low, 1), E);
80
9
        z_high = _mm_or_si128(_mm_or_si128(_mm_slli_epi64(z_high, 1), F), D);
81
82
        /* Reduce */
83
9
        C = _mm_slli_si128(z_low, 8);
84
        /* D = z_low << 127 */
85
9
        D = _mm_slli_epi64(C, 63);
86
        /* E = z_low << 126 */
87
9
        E = _mm_slli_epi64(C, 62);
88
        /* F = z_low << 121 */
89
9
        F = _mm_slli_epi64(C, 57);
90
        /* z_low ^= (z_low << 127) ^ (z_low << 126) ^ (z_low << 121); */
91
9
        z_low = _mm_xor_si128(_mm_xor_si128(_mm_xor_si128(z_low, D), E), F);
92
9
        C = _mm_srli_si128(z_low, 8);
93
        /* D = z_low >> 1 */
94
9
        D = _mm_slli_epi64(C, 63);
95
9
        D = _mm_or_si128(_mm_srli_epi64(z_low, 1), D);
96
        /* E = z_low >> 2 */
97
9
        E = _mm_slli_epi64(C, 62);
98
9
        E = _mm_or_si128(_mm_srli_epi64(z_low, 2), E);
99
        /* F = z_low >> 7 */
100
9
        F = _mm_slli_epi64(C, 57);
101
9
        F = _mm_or_si128(_mm_srli_epi64(z_low, 7), F);
102
        /* ghash->x ^= z_low ^ (z_low >> 1) ^ (z_low >> 2) ^ (z_low >> 7); */
103
9
        ghash->x = _mm_xor_si128(_mm_xor_si128(
104
9
                                     _mm_xor_si128(_mm_xor_si128(z_high, z_low), D), E),
105
9
                                 F);
106
9
    }
107
5
    return SECSuccess;
108
5
}
109
110
SECStatus
111
gcm_HashInit_hw(gcmHashContext *ghash)
112
12
{
113
12
    ghash->ghash_mul = gcm_HashMult_hw;
114
12
    ghash->x = _mm_setzero_si128();
115
    /* MSVC requires __m64 to load epi64. */
116
12
    ghash->h = _mm_set_epi32(ghash->h_high >> 32, (uint32_t)ghash->h_high,
117
12
                             ghash->h_low >> 32, (uint32_t)ghash->h_low);
118
12
    ghash->hw = PR_TRUE;
119
12
    return SECSuccess;
120
12
}
121
122
SECStatus
123
gcm_HashZeroX_hw(gcmHashContext *ghash)
124
2
{
125
2
    ghash->x = _mm_setzero_si128();
126
2
    return SECSuccess;
127
2
}