/src/aom/av1/common/x86/av1_txfm_sse4.h
Line | Count | Source (jump to first uncovered line) |
1 | | /* |
2 | | * Copyright (c) 2018, Alliance for Open Media. All rights reserved. |
3 | | * |
4 | | * This source code is subject to the terms of the BSD 2 Clause License and |
5 | | * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License |
6 | | * was not distributed with this source code in the LICENSE file, you can |
7 | | * obtain it at www.aomedia.org/license/software. If the Alliance for Open |
8 | | * Media Patent License 1.0 was not distributed with this source code in the |
9 | | * PATENTS file, you can obtain it at www.aomedia.org/license/patent. |
10 | | */ |
11 | | |
12 | | #ifndef AOM_AV1_COMMON_X86_AV1_TXFM_SSE4_H_ |
13 | | #define AOM_AV1_COMMON_X86_AV1_TXFM_SSE4_H_ |
14 | | |
15 | | #include <smmintrin.h> |
16 | | |
17 | | #ifdef __cplusplus |
18 | | extern "C" { |
19 | | #endif |
20 | | |
21 | 65.9M | static inline __m128i av1_round_shift_32_sse4_1(__m128i vec, int bit) { |
22 | 65.9M | __m128i tmp, round; |
23 | 65.9M | round = _mm_set1_epi32(1 << (bit - 1)); |
24 | 65.9M | tmp = _mm_add_epi32(vec, round); |
25 | 65.9M | return _mm_srai_epi32(tmp, bit); |
26 | 65.9M | } Unexecuted instantiation: av1_txfm_sse4.c:av1_round_shift_32_sse4_1 highbd_inv_txfm_sse4.c:av1_round_shift_32_sse4_1 Line | Count | Source | 21 | 65.9M | static inline __m128i av1_round_shift_32_sse4_1(__m128i vec, int bit) { | 22 | 65.9M | __m128i tmp, round; | 23 | 65.9M | round = _mm_set1_epi32(1 << (bit - 1)); | 24 | 65.9M | tmp = _mm_add_epi32(vec, round); | 25 | 65.9M | return _mm_srai_epi32(tmp, bit); | 26 | 65.9M | } |
|
27 | | |
28 | | static inline void av1_round_shift_array_32_sse4_1(const __m128i *input, |
29 | | __m128i *output, |
30 | | const int size, |
31 | 4.28M | const int bit) { |
32 | 4.28M | if (bit > 0) { |
33 | 4.28M | int i; |
34 | 52.8M | for (i = 0; i < size; i++) { |
35 | 48.5M | output[i] = av1_round_shift_32_sse4_1(input[i], bit); |
36 | 48.5M | } |
37 | 18.4E | } else { |
38 | 18.4E | int i; |
39 | 18.4E | for (i = 0; i < size; i++) { |
40 | 0 | output[i] = _mm_slli_epi32(input[i], -bit); |
41 | 0 | } |
42 | 18.4E | } |
43 | 4.28M | } Unexecuted instantiation: av1_txfm_sse4.c:av1_round_shift_array_32_sse4_1 highbd_inv_txfm_sse4.c:av1_round_shift_array_32_sse4_1 Line | Count | Source | 31 | 4.28M | const int bit) { | 32 | 4.28M | if (bit > 0) { | 33 | 4.28M | int i; | 34 | 52.8M | for (i = 0; i < size; i++) { | 35 | 48.5M | output[i] = av1_round_shift_32_sse4_1(input[i], bit); | 36 | 48.5M | } | 37 | 18.4E | } else { | 38 | 18.4E | int i; | 39 | 18.4E | for (i = 0; i < size; i++) { | 40 | 0 | output[i] = _mm_slli_epi32(input[i], -bit); | 41 | 0 | } | 42 | 18.4E | } | 43 | 4.28M | } |
|
44 | | |
45 | | static inline void av1_round_shift_rect_array_32_sse4_1(const __m128i *input, |
46 | | __m128i *output, |
47 | | const int size, |
48 | | const int bit, |
49 | 1.93M | const int val) { |
50 | 1.93M | const __m128i sqrt2 = _mm_set1_epi32(val); |
51 | 1.93M | if (bit > 0) { |
52 | 0 | int i; |
53 | 0 | for (i = 0; i < size; i++) { |
54 | 0 | const __m128i r0 = av1_round_shift_32_sse4_1(input[i], bit); |
55 | 0 | const __m128i r1 = _mm_mullo_epi32(sqrt2, r0); |
56 | 0 | output[i] = av1_round_shift_32_sse4_1(r1, NewSqrt2Bits); |
57 | 0 | } |
58 | 1.93M | } else { |
59 | 1.93M | int i; |
60 | 19.3M | for (i = 0; i < size; i++) { |
61 | 17.3M | const __m128i r0 = _mm_slli_epi32(input[i], -bit); |
62 | 17.3M | const __m128i r1 = _mm_mullo_epi32(sqrt2, r0); |
63 | 17.3M | output[i] = av1_round_shift_32_sse4_1(r1, NewSqrt2Bits); |
64 | 17.3M | } |
65 | 1.93M | } |
66 | 1.93M | } Unexecuted instantiation: av1_txfm_sse4.c:av1_round_shift_rect_array_32_sse4_1 highbd_inv_txfm_sse4.c:av1_round_shift_rect_array_32_sse4_1 Line | Count | Source | 49 | 1.93M | const int val) { | 50 | 1.93M | const __m128i sqrt2 = _mm_set1_epi32(val); | 51 | 1.93M | if (bit > 0) { | 52 | 0 | int i; | 53 | 0 | for (i = 0; i < size; i++) { | 54 | 0 | const __m128i r0 = av1_round_shift_32_sse4_1(input[i], bit); | 55 | 0 | const __m128i r1 = _mm_mullo_epi32(sqrt2, r0); | 56 | 0 | output[i] = av1_round_shift_32_sse4_1(r1, NewSqrt2Bits); | 57 | 0 | } | 58 | 1.93M | } else { | 59 | 1.93M | int i; | 60 | 19.3M | for (i = 0; i < size; i++) { | 61 | 17.3M | const __m128i r0 = _mm_slli_epi32(input[i], -bit); | 62 | 17.3M | const __m128i r1 = _mm_mullo_epi32(sqrt2, r0); | 63 | 17.3M | output[i] = av1_round_shift_32_sse4_1(r1, NewSqrt2Bits); | 64 | 17.3M | } | 65 | 1.93M | } | 66 | 1.93M | } |
|
67 | | |
68 | | #ifdef __cplusplus |
69 | | } |
70 | | #endif |
71 | | |
72 | | #endif // AOM_AV1_COMMON_X86_AV1_TXFM_SSE4_H_ |