Coverage Report

Created: 2025-07-04 06:47

/src/libsodium/src/libsodium/crypto_generichash/blake2b/ref/blake2b-compress-sse41.c
Line
Count
Source (jump to first uncovered line)
1
2
#define BLAKE2_USE_SSSE3
3
#define BLAKE2_USE_SSE41
4
5
#include <stdint.h>
6
#include <string.h>
7
8
#include "blake2.h"
9
#include "private/common.h"
10
11
#if defined(HAVE_EMMINTRIN_H) && defined(HAVE_TMMINTRIN_H) && \
12
    defined(HAVE_SMMINTRIN_H)
13
14
# ifdef __clang__
15
#  pragma clang attribute push(__attribute__((target("sse2,ssse3,sse4.1"))), apply_to = function)
16
# elif defined(__GNUC__)
17
#  pragma GCC target("sse2,ssse3,sse4.1")
18
# endif
19
20
# include <emmintrin.h>
21
# include <smmintrin.h>
22
# include <tmmintrin.h>
23
# include "private/sse2_64_32.h"
24
25
# include "blake2b-compress-sse41.h"
26
27
CRYPTO_ALIGN(64)
28
static const uint64_t blake2b_IV[8] = {
29
    0x6a09e667f3bcc908ULL, 0xbb67ae8584caa73bULL, 0x3c6ef372fe94f82bULL,
30
    0xa54ff53a5f1d36f1ULL, 0x510e527fade682d1ULL, 0x9b05688c2b3e6c1fULL,
31
    0x1f83d9abfb41bd6bULL, 0x5be0cd19137e2179ULL
32
};
33
34
int
35
blake2b_compress_sse41(blake2b_state *S,
36
                       const uint8_t  block[BLAKE2B_BLOCKBYTES])
37
0
{
38
0
    __m128i       row1l, row1h;
39
0
    __m128i       row2l, row2h;
40
0
    __m128i       row3l, row3h;
41
0
    __m128i       row4l, row4h;
42
0
    __m128i       b0, b1;
43
0
    __m128i       t0, t1;
44
0
    const __m128i r16 =
45
0
        _mm_setr_epi8(2, 3, 4, 5, 6, 7, 0, 1, 10, 11, 12, 13, 14, 15, 8, 9);
46
0
    const __m128i r24 =
47
0
        _mm_setr_epi8(3, 4, 5, 6, 7, 0, 1, 2, 11, 12, 13, 14, 15, 8, 9, 10);
48
0
    const __m128i m0 = LOADU(block + 00);
49
0
    const __m128i m1 = LOADU(block + 16);
50
0
    const __m128i m2 = LOADU(block + 32);
51
0
    const __m128i m3 = LOADU(block + 48);
52
0
    const __m128i m4 = LOADU(block + 64);
53
0
    const __m128i m5 = LOADU(block + 80);
54
0
    const __m128i m6 = LOADU(block + 96);
55
0
    const __m128i m7 = LOADU(block + 112);
56
0
    row1l            = LOADU(&S->h[0]);
57
0
    row1h            = LOADU(&S->h[2]);
58
0
    row2l            = LOADU(&S->h[4]);
59
0
    row2h            = LOADU(&S->h[6]);
60
0
    row3l            = LOADU(&blake2b_IV[0]);
61
0
    row3h            = LOADU(&blake2b_IV[2]);
62
0
    row4l            = _mm_xor_si128(LOADU(&blake2b_IV[4]), LOADU(&S->t[0]));
63
0
    row4h            = _mm_xor_si128(LOADU(&blake2b_IV[6]), LOADU(&S->f[0]));
64
0
    ROUND(0);
65
0
    ROUND(1);
66
0
    ROUND(2);
67
0
    ROUND(3);
68
0
    ROUND(4);
69
0
    ROUND(5);
70
0
    ROUND(6);
71
0
    ROUND(7);
72
0
    ROUND(8);
73
0
    ROUND(9);
74
0
    ROUND(10);
75
0
    ROUND(11);
76
0
    row1l = _mm_xor_si128(row3l, row1l);
77
0
    row1h = _mm_xor_si128(row3h, row1h);
78
0
    STOREU(&S->h[0], _mm_xor_si128(LOADU(&S->h[0]), row1l));
79
0
    STOREU(&S->h[2], _mm_xor_si128(LOADU(&S->h[2]), row1h));
80
0
    row2l = _mm_xor_si128(row4l, row2l);
81
0
    row2h = _mm_xor_si128(row4h, row2h);
82
0
    STOREU(&S->h[4], _mm_xor_si128(LOADU(&S->h[4]), row2l));
83
0
    STOREU(&S->h[6], _mm_xor_si128(LOADU(&S->h[6]), row2h));
84
0
    return 0;
85
0
}
86
87
# ifdef __clang__
88
#  pragma clang attribute pop
89
# endif
90
91
#endif