/src/libvpx/vpx_dsp/x86/quantize_sse2.h
Line | Count | Source (jump to first uncovered line) |
1 | | /* |
2 | | * Copyright (c) 2017 The WebM project authors. All Rights Reserved. |
3 | | * |
4 | | * Use of this source code is governed by a BSD-style license |
5 | | * that can be found in the LICENSE file in the root of the source |
6 | | * tree. An additional intellectual property rights grant can be found |
7 | | * in the file PATENTS. All contributing project authors may |
8 | | * be found in the AUTHORS file in the root of the source tree. |
9 | | */ |
10 | | |
11 | | #ifndef VPX_VPX_DSP_X86_QUANTIZE_SSE2_H_ |
12 | | #define VPX_VPX_DSP_X86_QUANTIZE_SSE2_H_ |
13 | | |
14 | | #include <emmintrin.h> |
15 | | |
16 | | #include "./vpx_config.h" |
17 | | #include "vpx/vpx_integer.h" |
18 | | #include "vp9/encoder/vp9_block.h" |
19 | | |
20 | | static INLINE void load_b_values(const struct macroblock_plane *const mb_plane, |
21 | | __m128i *zbin, __m128i *round, __m128i *quant, |
22 | | const int16_t *dequant_ptr, __m128i *dequant, |
23 | 0 | __m128i *shift) { |
24 | 0 | *zbin = _mm_load_si128((const __m128i *)mb_plane->zbin); |
25 | 0 | *round = _mm_load_si128((const __m128i *)mb_plane->round); |
26 | 0 | *quant = _mm_load_si128((const __m128i *)mb_plane->quant); |
27 | 0 | *zbin = _mm_sub_epi16(*zbin, _mm_set1_epi16(1)); |
28 | 0 | *dequant = _mm_load_si128((const __m128i *)dequant_ptr); |
29 | 0 | *shift = _mm_load_si128((const __m128i *)mb_plane->quant_shift); |
30 | 0 | } Unexecuted instantiation: vp9_quantize_sse2.c:load_b_values Unexecuted instantiation: vp9_quantize_ssse3.c:load_b_values Unexecuted instantiation: vp9_quantize_avx2.c:load_b_values Unexecuted instantiation: quantize_sse2.c:load_b_values Unexecuted instantiation: quantize_ssse3.c:load_b_values Unexecuted instantiation: quantize_avx.c:load_b_values |
31 | | |
32 | | static INLINE void load_b_values32x32( |
33 | | const struct macroblock_plane *const mb_plane, __m128i *zbin, |
34 | | __m128i *round, __m128i *quant, const int16_t *dequant_ptr, |
35 | 0 | __m128i *dequant, __m128i *shift) { |
36 | 0 | const __m128i one = _mm_set1_epi16(1); |
37 | | // The 32x32 halves zbin and round. |
38 | 0 | *zbin = _mm_load_si128((const __m128i *)mb_plane->zbin); |
39 | | // Shift with rounding. |
40 | 0 | *zbin = _mm_add_epi16(*zbin, one); |
41 | 0 | *zbin = _mm_srli_epi16(*zbin, 1); |
42 | | // x86 has no "greater *or equal*" comparison. Subtract 1 from zbin so |
43 | | // it is a strict "greater" comparison. |
44 | 0 | *zbin = _mm_sub_epi16(*zbin, one); |
45 | |
|
46 | 0 | *round = _mm_load_si128((const __m128i *)mb_plane->round); |
47 | 0 | *round = _mm_add_epi16(*round, one); |
48 | 0 | *round = _mm_srli_epi16(*round, 1); |
49 | |
|
50 | 0 | *quant = _mm_load_si128((const __m128i *)mb_plane->quant); |
51 | 0 | *dequant = _mm_load_si128((const __m128i *)dequant_ptr); |
52 | 0 | *shift = _mm_load_si128((const __m128i *)mb_plane->quant_shift); |
53 | | // I suspect this is not technically OK because quant_shift can be up |
54 | | // to 1 << 16 and shifting up again will outrange that, but the test is not |
55 | | // comprehensive enough to catch that and "it's been that way forever" |
56 | 0 | *shift = _mm_slli_epi16(*shift, 1); |
57 | 0 | } Unexecuted instantiation: vp9_quantize_sse2.c:load_b_values32x32 Unexecuted instantiation: vp9_quantize_ssse3.c:load_b_values32x32 Unexecuted instantiation: vp9_quantize_avx2.c:load_b_values32x32 Unexecuted instantiation: quantize_sse2.c:load_b_values32x32 Unexecuted instantiation: quantize_ssse3.c:load_b_values32x32 Unexecuted instantiation: quantize_avx.c:load_b_values32x32 |
58 | | |
59 | | static INLINE void load_fp_values(const struct macroblock_plane *mb_plane, |
60 | | __m128i *round, __m128i *quant, |
61 | | const int16_t *dequant_ptr, |
62 | 0 | __m128i *dequant) { |
63 | 0 | *round = _mm_load_si128((const __m128i *)mb_plane->round_fp); |
64 | 0 | *quant = _mm_load_si128((const __m128i *)mb_plane->quant_fp); |
65 | 0 | *dequant = _mm_load_si128((const __m128i *)dequant_ptr); |
66 | 0 | } Unexecuted instantiation: vp9_quantize_sse2.c:load_fp_values Unexecuted instantiation: vp9_quantize_ssse3.c:load_fp_values Unexecuted instantiation: vp9_quantize_avx2.c:load_fp_values Unexecuted instantiation: quantize_sse2.c:load_fp_values Unexecuted instantiation: quantize_ssse3.c:load_fp_values Unexecuted instantiation: quantize_avx.c:load_fp_values |
67 | | |
68 | | // With ssse3 and later abs() and sign() are preferred. |
69 | 0 | static INLINE __m128i invert_sign_sse2(__m128i a, __m128i sign) { |
70 | 0 | a = _mm_xor_si128(a, sign); |
71 | 0 | return _mm_sub_epi16(a, sign); |
72 | 0 | } Unexecuted instantiation: vp9_quantize_sse2.c:invert_sign_sse2 Unexecuted instantiation: vp9_quantize_ssse3.c:invert_sign_sse2 Unexecuted instantiation: vp9_quantize_avx2.c:invert_sign_sse2 Unexecuted instantiation: quantize_sse2.c:invert_sign_sse2 Unexecuted instantiation: quantize_ssse3.c:invert_sign_sse2 Unexecuted instantiation: quantize_avx.c:invert_sign_sse2 |
73 | | |
74 | | static INLINE void calculate_qcoeff(__m128i *coeff, const __m128i round, |
75 | 0 | const __m128i quant, const __m128i shift) { |
76 | 0 | __m128i tmp, qcoeff; |
77 | 0 | qcoeff = _mm_adds_epi16(*coeff, round); |
78 | 0 | tmp = _mm_mulhi_epi16(qcoeff, quant); |
79 | 0 | qcoeff = _mm_add_epi16(tmp, qcoeff); |
80 | 0 | *coeff = _mm_mulhi_epi16(qcoeff, shift); |
81 | 0 | } Unexecuted instantiation: vp9_quantize_sse2.c:calculate_qcoeff Unexecuted instantiation: vp9_quantize_ssse3.c:calculate_qcoeff Unexecuted instantiation: vp9_quantize_avx2.c:calculate_qcoeff Unexecuted instantiation: quantize_sse2.c:calculate_qcoeff Unexecuted instantiation: quantize_ssse3.c:calculate_qcoeff Unexecuted instantiation: quantize_avx.c:calculate_qcoeff |
82 | | |
83 | | static INLINE void calculate_dqcoeff_and_store(__m128i qcoeff, __m128i dequant, |
84 | 0 | tran_low_t *dqcoeff) { |
85 | 0 | #if CONFIG_VP9_HIGHBITDEPTH |
86 | 0 | const __m128i low = _mm_mullo_epi16(qcoeff, dequant); |
87 | 0 | const __m128i high = _mm_mulhi_epi16(qcoeff, dequant); |
88 | |
|
89 | 0 | const __m128i dqcoeff32_0 = _mm_unpacklo_epi16(low, high); |
90 | 0 | const __m128i dqcoeff32_1 = _mm_unpackhi_epi16(low, high); |
91 | |
|
92 | 0 | _mm_store_si128((__m128i *)(dqcoeff), dqcoeff32_0); |
93 | 0 | _mm_store_si128((__m128i *)(dqcoeff + 4), dqcoeff32_1); |
94 | | #else |
95 | | const __m128i dqcoeff16 = _mm_mullo_epi16(qcoeff, dequant); |
96 | | |
97 | | _mm_store_si128((__m128i *)(dqcoeff), dqcoeff16); |
98 | | #endif // CONFIG_VP9_HIGHBITDEPTH |
99 | 0 | } Unexecuted instantiation: vp9_quantize_sse2.c:calculate_dqcoeff_and_store Unexecuted instantiation: vp9_quantize_ssse3.c:calculate_dqcoeff_and_store Unexecuted instantiation: vp9_quantize_avx2.c:calculate_dqcoeff_and_store Unexecuted instantiation: quantize_sse2.c:calculate_dqcoeff_and_store Unexecuted instantiation: quantize_ssse3.c:calculate_dqcoeff_and_store Unexecuted instantiation: quantize_avx.c:calculate_dqcoeff_and_store |
100 | | |
101 | | // Scan 16 values for eob reference in scan. |
102 | | static INLINE __m128i scan_for_eob(__m128i *coeff0, __m128i *coeff1, |
103 | | const int16_t *scan, const int index, |
104 | 0 | const __m128i zero) { |
105 | 0 | const __m128i zero_coeff0 = _mm_cmpeq_epi16(*coeff0, zero); |
106 | 0 | const __m128i zero_coeff1 = _mm_cmpeq_epi16(*coeff1, zero); |
107 | 0 | __m128i scan0 = _mm_load_si128((const __m128i *)(scan + index)); |
108 | 0 | __m128i scan1 = _mm_load_si128((const __m128i *)(scan + index + 8)); |
109 | 0 | __m128i eob0, eob1; |
110 | 0 | eob0 = _mm_andnot_si128(zero_coeff0, scan0); |
111 | 0 | eob1 = _mm_andnot_si128(zero_coeff1, scan1); |
112 | 0 | return _mm_max_epi16(eob0, eob1); |
113 | 0 | } Unexecuted instantiation: vp9_quantize_sse2.c:scan_for_eob Unexecuted instantiation: vp9_quantize_ssse3.c:scan_for_eob Unexecuted instantiation: vp9_quantize_avx2.c:scan_for_eob Unexecuted instantiation: quantize_sse2.c:scan_for_eob Unexecuted instantiation: quantize_ssse3.c:scan_for_eob Unexecuted instantiation: quantize_avx.c:scan_for_eob |
114 | | |
115 | 0 | static INLINE int16_t accumulate_eob(__m128i eob) { |
116 | 0 | __m128i eob_shuffled; |
117 | 0 | eob_shuffled = _mm_shuffle_epi32(eob, 0xe); |
118 | 0 | eob = _mm_max_epi16(eob, eob_shuffled); |
119 | 0 | eob_shuffled = _mm_shufflelo_epi16(eob, 0xe); |
120 | 0 | eob = _mm_max_epi16(eob, eob_shuffled); |
121 | 0 | eob_shuffled = _mm_shufflelo_epi16(eob, 0x1); |
122 | 0 | eob = _mm_max_epi16(eob, eob_shuffled); |
123 | 0 | return _mm_extract_epi16(eob, 1); |
124 | 0 | } Unexecuted instantiation: vp9_quantize_sse2.c:accumulate_eob Unexecuted instantiation: vp9_quantize_ssse3.c:accumulate_eob Unexecuted instantiation: vp9_quantize_avx2.c:accumulate_eob Unexecuted instantiation: quantize_sse2.c:accumulate_eob Unexecuted instantiation: quantize_ssse3.c:accumulate_eob Unexecuted instantiation: quantize_avx.c:accumulate_eob |
125 | | |
126 | | #endif // VPX_VPX_DSP_X86_QUANTIZE_SSE2_H_ |