/src/zlib-ng/arch/x86/adler32_ssse3_p.h
Line | Count | Source |
1 | | /* adler32_ssse3_p.h -- adler32 ssse3 utility functions |
2 | | * Copyright (C) 2022 Adam Stylinski |
3 | | * For conditions of distribution and use, see copyright notice in zlib.h |
4 | | */ |
5 | | |
6 | | #ifndef ADLER32_SSSE3_P_H_ |
7 | | #define ADLER32_SSSE3_P_H_ |
8 | | |
9 | | #ifdef X86_SSSE3 |
10 | | |
11 | | #include <immintrin.h> |
12 | | #include <stdint.h> |
13 | | |
14 | 639k | static inline uint32_t partial_hsum(__m128i x) { |
15 | 639k | __m128i second_int = _mm_srli_si128(x, 8); |
16 | 639k | __m128i sum = _mm_add_epi32(x, second_int); |
17 | 639k | return _mm_cvtsi128_si32(sum); |
18 | 639k | } adler32_ssse3.c:partial_hsum Line | Count | Source | 14 | 623k | static inline uint32_t partial_hsum(__m128i x) { | 15 | 623k | __m128i second_int = _mm_srli_si128(x, 8); | 16 | 623k | __m128i sum = _mm_add_epi32(x, second_int); | 17 | 623k | return _mm_cvtsi128_si32(sum); | 18 | 623k | } |
adler32_sse42.c:partial_hsum Line | Count | Source | 14 | 16.0k | static inline uint32_t partial_hsum(__m128i x) { | 15 | 16.0k | __m128i second_int = _mm_srli_si128(x, 8); | 16 | 16.0k | __m128i sum = _mm_add_epi32(x, second_int); | 17 | 16.0k | return _mm_cvtsi128_si32(sum); | 18 | 16.0k | } |
|
19 | | |
20 | 639k | static inline uint32_t hsum(__m128i x) { |
21 | 639k | __m128i sum1 = _mm_unpackhi_epi64(x, x); |
22 | 639k | __m128i sum2 = _mm_add_epi32(x, sum1); |
23 | 639k | __m128i sum3 = _mm_shuffle_epi32(sum2, 0x01); |
24 | 639k | __m128i sum4 = _mm_add_epi32(sum2, sum3); |
25 | 639k | return _mm_cvtsi128_si32(sum4); |
26 | 639k | } Line | Count | Source | 20 | 623k | static inline uint32_t hsum(__m128i x) { | 21 | 623k | __m128i sum1 = _mm_unpackhi_epi64(x, x); | 22 | 623k | __m128i sum2 = _mm_add_epi32(x, sum1); | 23 | 623k | __m128i sum3 = _mm_shuffle_epi32(sum2, 0x01); | 24 | 623k | __m128i sum4 = _mm_add_epi32(sum2, sum3); | 25 | 623k | return _mm_cvtsi128_si32(sum4); | 26 | 623k | } |
Line | Count | Source | 20 | 16.0k | static inline uint32_t hsum(__m128i x) { | 21 | 16.0k | __m128i sum1 = _mm_unpackhi_epi64(x, x); | 22 | 16.0k | __m128i sum2 = _mm_add_epi32(x, sum1); | 23 | 16.0k | __m128i sum3 = _mm_shuffle_epi32(sum2, 0x01); | 24 | 16.0k | __m128i sum4 = _mm_add_epi32(sum2, sum3); | 25 | 16.0k | return _mm_cvtsi128_si32(sum4); | 26 | 16.0k | } |
|
27 | | #endif |
28 | | |
29 | | #endif |