/src/aom/aom_dsp/x86/convolve_sse4_1.h
Line | Count | Source (jump to first uncovered line) |
1 | | /* |
2 | | * Copyright (c) 2018, Alliance for Open Media. All rights reserved. |
3 | | * |
4 | | * This source code is subject to the terms of the BSD 2 Clause License and |
5 | | * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License |
6 | | * was not distributed with this source code in the LICENSE file, you can |
7 | | * obtain it at www.aomedia.org/license/software. If the Alliance for Open |
8 | | * Media Patent License 1.0 was not distributed with this source code in the |
9 | | * PATENTS file, you can obtain it at www.aomedia.org/license/patent. |
10 | | */ |
11 | | |
12 | | #ifndef AOM_AOM_DSP_X86_CONVOLVE_SSE4_1_H_ |
13 | | #define AOM_AOM_DSP_X86_CONVOLVE_SSE4_1_H_ |
14 | | |
15 | | // Note: |
16 | | // This header file should be put below any x86 intrinsics head file |
17 | | |
18 | | static inline void mult_add_store(CONV_BUF_TYPE *const dst, |
19 | | const __m128i *const res, |
20 | | const __m128i *const wt0, |
21 | | const __m128i *const wt1, |
22 | 0 | const int do_average) { |
23 | 0 | __m128i d; |
24 | 0 | if (do_average) { |
25 | 0 | d = _mm_load_si128((__m128i *)dst); |
26 | 0 | d = _mm_add_epi32(_mm_mullo_epi32(d, *wt0), _mm_mullo_epi32(*res, *wt1)); |
27 | 0 | d = _mm_srai_epi32(d, DIST_PRECISION_BITS); |
28 | 0 | } else { |
29 | 0 | d = *res; |
30 | 0 | } |
31 | 0 | _mm_store_si128((__m128i *)dst, d); |
32 | 0 | } Unexecuted instantiation: highbd_convolve_2d_sse4.c:mult_add_store Unexecuted instantiation: highbd_jnt_convolve_sse4.c:mult_add_store Unexecuted instantiation: jnt_convolve_avx2.c:mult_add_store Unexecuted instantiation: highbd_jnt_convolve_avx2.c:mult_add_store |
33 | | |
34 | | static inline __m128i highbd_comp_avg_sse4_1(const __m128i *const data_ref_0, |
35 | | const __m128i *const res_unsigned, |
36 | | const __m128i *const wt0, |
37 | | const __m128i *const wt1, |
38 | 0 | const int use_dist_wtd_avg) { |
39 | 0 | __m128i res; |
40 | 0 | if (use_dist_wtd_avg) { |
41 | 0 | const __m128i wt0_res = _mm_mullo_epi32(*data_ref_0, *wt0); |
42 | 0 | const __m128i wt1_res = _mm_mullo_epi32(*res_unsigned, *wt1); |
43 | |
|
44 | 0 | const __m128i wt_res = _mm_add_epi32(wt0_res, wt1_res); |
45 | 0 | res = _mm_srai_epi32(wt_res, DIST_PRECISION_BITS); |
46 | 0 | } else { |
47 | 0 | const __m128i wt_res = _mm_add_epi32(*data_ref_0, *res_unsigned); |
48 | 0 | res = _mm_srai_epi32(wt_res, 1); |
49 | 0 | } |
50 | 0 | return res; |
51 | 0 | } Unexecuted instantiation: highbd_convolve_2d_sse4.c:highbd_comp_avg_sse4_1 Unexecuted instantiation: highbd_jnt_convolve_sse4.c:highbd_comp_avg_sse4_1 Unexecuted instantiation: jnt_convolve_avx2.c:highbd_comp_avg_sse4_1 Unexecuted instantiation: highbd_jnt_convolve_avx2.c:highbd_comp_avg_sse4_1 |
52 | | |
53 | | #endif // AOM_AOM_DSP_X86_CONVOLVE_SSE4_1_H_ |