/src/aom/aom_dsp/x86/intrapred_x86.h
Line | Count | Source |
1 | | /* |
2 | | * Copyright (c) 2020, Alliance for Open Media. All rights reserved. |
3 | | * |
4 | | * This source code is subject to the terms of the BSD 2 Clause License and |
5 | | * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License |
6 | | * was not distributed with this source code in the LICENSE file, you can |
7 | | * obtain it at www.aomedia.org/license/software. If the Alliance for Open |
8 | | * Media Patent License 1.0 was not distributed with this source code in the |
9 | | * PATENTS file, you can obtain it at www.aomedia.org/license/patent. |
10 | | */ |
11 | | |
12 | | #ifndef AOM_AOM_DSP_X86_INTRAPRED_X86_H_ |
13 | | #define AOM_AOM_DSP_X86_INTRAPRED_X86_H_ |
14 | | |
15 | | #include <emmintrin.h> // SSE2 |
16 | | #include "aom/aom_integer.h" |
17 | | #include "config/aom_config.h" |
18 | | |
19 | 1.75M | static inline __m128i dc_sum_16_sse2(const uint8_t *ref) { |
20 | 1.75M | __m128i x = _mm_load_si128((__m128i const *)ref); |
21 | 1.75M | const __m128i zero = _mm_setzero_si128(); |
22 | 1.75M | x = _mm_sad_epu8(x, zero); |
23 | 1.75M | const __m128i high = _mm_unpackhi_epi64(x, x); |
24 | 1.75M | return _mm_add_epi16(x, high); |
25 | 1.75M | } intrapred_sse2.c:dc_sum_16_sse2 Line | Count | Source | 19 | 1.54M | static inline __m128i dc_sum_16_sse2(const uint8_t *ref) { | 20 | 1.54M | __m128i x = _mm_load_si128((__m128i const *)ref); | 21 | 1.54M | const __m128i zero = _mm_setzero_si128(); | 22 | 1.54M | x = _mm_sad_epu8(x, zero); | 23 | 1.54M | const __m128i high = _mm_unpackhi_epi64(x, x); | 24 | 1.54M | return _mm_add_epi16(x, high); | 25 | 1.54M | } |
Unexecuted instantiation: intrapred_sse4.c:dc_sum_16_sse2 intrapred_avx2.c:dc_sum_16_sse2 Line | Count | Source | 19 | 208k | static inline __m128i dc_sum_16_sse2(const uint8_t *ref) { | 20 | 208k | __m128i x = _mm_load_si128((__m128i const *)ref); | 21 | 208k | const __m128i zero = _mm_setzero_si128(); | 22 | 208k | x = _mm_sad_epu8(x, zero); | 23 | 208k | const __m128i high = _mm_unpackhi_epi64(x, x); | 24 | 208k | return _mm_add_epi16(x, high); | 25 | 208k | } |
|
26 | | |
27 | 787k | static inline __m128i dc_sum_32_sse2(const uint8_t *ref) { |
28 | 787k | __m128i x0 = _mm_load_si128((__m128i const *)ref); |
29 | 787k | __m128i x1 = _mm_load_si128((__m128i const *)(ref + 16)); |
30 | 787k | const __m128i zero = _mm_setzero_si128(); |
31 | 787k | x0 = _mm_sad_epu8(x0, zero); |
32 | 787k | x1 = _mm_sad_epu8(x1, zero); |
33 | 787k | x0 = _mm_add_epi16(x0, x1); |
34 | 787k | const __m128i high = _mm_unpackhi_epi64(x0, x0); |
35 | 787k | return _mm_add_epi16(x0, high); |
36 | 787k | } intrapred_sse2.c:dc_sum_32_sse2 Line | Count | Source | 27 | 639k | static inline __m128i dc_sum_32_sse2(const uint8_t *ref) { | 28 | 639k | __m128i x0 = _mm_load_si128((__m128i const *)ref); | 29 | 639k | __m128i x1 = _mm_load_si128((__m128i const *)(ref + 16)); | 30 | 639k | const __m128i zero = _mm_setzero_si128(); | 31 | 639k | x0 = _mm_sad_epu8(x0, zero); | 32 | 639k | x1 = _mm_sad_epu8(x1, zero); | 33 | 639k | x0 = _mm_add_epi16(x0, x1); | 34 | 639k | const __m128i high = _mm_unpackhi_epi64(x0, x0); | 35 | 639k | return _mm_add_epi16(x0, high); | 36 | 639k | } |
Unexecuted instantiation: intrapred_sse4.c:dc_sum_32_sse2 intrapred_avx2.c:dc_sum_32_sse2 Line | Count | Source | 27 | 147k | static inline __m128i dc_sum_32_sse2(const uint8_t *ref) { | 28 | 147k | __m128i x0 = _mm_load_si128((__m128i const *)ref); | 29 | 147k | __m128i x1 = _mm_load_si128((__m128i const *)(ref + 16)); | 30 | 147k | const __m128i zero = _mm_setzero_si128(); | 31 | 147k | x0 = _mm_sad_epu8(x0, zero); | 32 | 147k | x1 = _mm_sad_epu8(x1, zero); | 33 | 147k | x0 = _mm_add_epi16(x0, x1); | 34 | 147k | const __m128i high = _mm_unpackhi_epi64(x0, x0); | 35 | 147k | return _mm_add_epi16(x0, high); | 36 | 147k | } |
|
37 | | |
38 | | #endif // AOM_AOM_DSP_X86_INTRAPRED_X86_H_ |