/src/zlib-ng/arch/x86/adler32_ssse3_p.h
Line | Count | Source |
1 | | /* adler32_ssse3_p.h -- adler32 ssse3 utility functions |
2 | | * Copyright (C) 2022 Adam Stylinski |
3 | | * For conditions of distribution and use, see copyright notice in zlib.h |
4 | | */ |
5 | | |
6 | | #ifndef ADLER32_SSSE3_P_H_ |
7 | | #define ADLER32_SSSE3_P_H_ |
8 | | |
9 | | #ifdef X86_SSSE3 |
10 | | |
11 | | #include <immintrin.h> |
12 | | #include <stdint.h> |
13 | | |
14 | 479k | static inline uint32_t partial_hsum(__m128i x) { |
15 | 479k | __m128i second_int = _mm_srli_si128(x, 8); |
16 | 479k | __m128i sum = _mm_add_epi32(x, second_int); |
17 | 479k | return _mm_cvtsi128_si32(sum); |
18 | 479k | } adler32_ssse3.c:partial_hsum Line | Count | Source | 14 | 458k | static inline uint32_t partial_hsum(__m128i x) { | 15 | | __m128i second_int = _mm_srli_si128(x, 8); | 16 | 458k | __m128i sum = _mm_add_epi32(x, second_int); | 17 | 458k | return _mm_cvtsi128_si32(sum); | 18 | 458k | } |
adler32_sse42.c:partial_hsum Line | Count | Source | 14 | 20.5k | static inline uint32_t partial_hsum(__m128i x) { | 15 | | __m128i second_int = _mm_srli_si128(x, 8); | 16 | 20.5k | __m128i sum = _mm_add_epi32(x, second_int); | 17 | 20.5k | return _mm_cvtsi128_si32(sum); | 18 | 20.5k | } |
|
19 | | |
20 | 479k | static inline uint32_t hsum(__m128i x) { |
21 | 479k | __m128i sum1 = _mm_unpackhi_epi64(x, x); |
22 | 479k | __m128i sum2 = _mm_add_epi32(x, sum1); |
23 | | __m128i sum3 = _mm_shuffle_epi32(sum2, 0x01); |
24 | 479k | __m128i sum4 = _mm_add_epi32(sum2, sum3); |
25 | 479k | return _mm_cvtsi128_si32(sum4); |
26 | 479k | } Line | Count | Source | 20 | 458k | static inline uint32_t hsum(__m128i x) { | 21 | 458k | __m128i sum1 = _mm_unpackhi_epi64(x, x); | 22 | 458k | __m128i sum2 = _mm_add_epi32(x, sum1); | 23 | | __m128i sum3 = _mm_shuffle_epi32(sum2, 0x01); | 24 | 458k | __m128i sum4 = _mm_add_epi32(sum2, sum3); | 25 | 458k | return _mm_cvtsi128_si32(sum4); | 26 | 458k | } |
Line | Count | Source | 20 | 20.5k | static inline uint32_t hsum(__m128i x) { | 21 | 20.5k | __m128i sum1 = _mm_unpackhi_epi64(x, x); | 22 | 20.5k | __m128i sum2 = _mm_add_epi32(x, sum1); | 23 | | __m128i sum3 = _mm_shuffle_epi32(sum2, 0x01); | 24 | 20.5k | __m128i sum4 = _mm_add_epi32(sum2, sum3); | 25 | 20.5k | return _mm_cvtsi128_si32(sum4); | 26 | 20.5k | } |
|
27 | | #endif |
28 | | |
29 | | #endif |