/src/aom/aom_dsp/x86/synonyms.h
Line | Count | Source |
1 | | /* |
2 | | * Copyright (c) 2016, Alliance for Open Media. All rights reserved. |
3 | | * |
4 | | * This source code is subject to the terms of the BSD 2 Clause License and |
5 | | * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License |
6 | | * was not distributed with this source code in the LICENSE file, you can |
7 | | * obtain it at www.aomedia.org/license/software. If the Alliance for Open |
8 | | * Media Patent License 1.0 was not distributed with this source code in the |
9 | | * PATENTS file, you can obtain it at www.aomedia.org/license/patent. |
10 | | */ |
11 | | |
12 | | #ifndef AOM_AOM_DSP_X86_SYNONYMS_H_ |
13 | | #define AOM_AOM_DSP_X86_SYNONYMS_H_ |
14 | | |
15 | | #include <emmintrin.h> |
16 | | #include <string.h> |
17 | | |
18 | | #include "config/aom_config.h" |
19 | | |
20 | | #include "aom/aom_integer.h" |
21 | | |
22 | | /** |
23 | | * Various reusable shorthands for x86 SIMD intrinsics. |
24 | | * |
25 | | * Intrinsics prefixed with xx_ operate on or return 128bit XMM registers. |
26 | | * Intrinsics prefixed with yy_ operate on or return 256bit YMM registers. |
27 | | */ |
28 | | |
29 | | // Loads and stores to do away with the tedium of casting the address |
30 | | // to the right type. |
31 | 94.5M | static inline __m128i xx_loadl_32(const void *a) { |
32 | 94.5M | int val; |
33 | 94.5M | memcpy(&val, a, sizeof(val)); |
34 | 94.5M | return _mm_cvtsi32_si128(val); |
35 | 94.5M | } loopfilter_sse2.c:xx_loadl_32 Line | Count | Source | 31 | 76.3M | static inline __m128i xx_loadl_32(const void *a) { | 32 | 76.3M | int val; | 33 | 76.3M | memcpy(&val, a, sizeof(val)); | 34 | 76.3M | return _mm_cvtsi32_si128(val); | 35 | 76.3M | } |
blend_a64_mask_sse4.c:xx_loadl_32 Line | Count | Source | 31 | 7.80M | static inline __m128i xx_loadl_32(const void *a) { | 32 | 7.80M | int val; | 33 | 7.80M | memcpy(&val, a, sizeof(val)); | 34 | 7.80M | return _mm_cvtsi32_si128(val); | 35 | 7.80M | } |
blend_a64_vmask_sse4.c:xx_loadl_32 Line | Count | Source | 31 | 309k | static inline __m128i xx_loadl_32(const void *a) { | 32 | 309k | int val; | 33 | 309k | memcpy(&val, a, sizeof(val)); | 34 | 309k | return _mm_cvtsi32_si128(val); | 35 | 309k | } |
Unexecuted instantiation: aom_subpixel_8t_intrin_avx2.c:xx_loadl_32 blend_a64_mask_avx2.c:xx_loadl_32 Line | Count | Source | 31 | 782k | static inline __m128i xx_loadl_32(const void *a) { | 32 | 782k | int val; | 33 | 782k | memcpy(&val, a, sizeof(val)); | 34 | 782k | return _mm_cvtsi32_si128(val); | 35 | 782k | } |
Unexecuted instantiation: highbd_convolve_avx2.c:xx_loadl_32 Unexecuted instantiation: convolve_sse2.c:xx_loadl_32 Unexecuted instantiation: jnt_convolve_sse2.c:xx_loadl_32 Unexecuted instantiation: resize_sse2.c:xx_loadl_32 Unexecuted instantiation: av1_inv_txfm_ssse3.c:xx_loadl_32 Unexecuted instantiation: reconinter_ssse3.c:xx_loadl_32 Unexecuted instantiation: av1_convolve_horiz_rs_sse4.c:xx_loadl_32 filterintra_sse4.c:xx_loadl_32 Line | Count | Source | 31 | 9.29M | static inline __m128i xx_loadl_32(const void *a) { | 32 | 9.29M | int val; | 33 | 9.29M | memcpy(&val, a, sizeof(val)); | 34 | 9.29M | return _mm_cvtsi32_si128(val); | 35 | 9.29M | } |
Unexecuted instantiation: highbd_inv_txfm_sse4.c:xx_loadl_32 Unexecuted instantiation: selfguided_sse4.c:xx_loadl_32 Unexecuted instantiation: av1_inv_txfm_avx2.c:xx_loadl_32 Unexecuted instantiation: convolve_2d_avx2.c:xx_loadl_32 Unexecuted instantiation: convolve_avx2.c:xx_loadl_32 Unexecuted instantiation: highbd_inv_txfm_avx2.c:xx_loadl_32 Unexecuted instantiation: jnt_convolve_avx2.c:xx_loadl_32 Unexecuted instantiation: reconinter_avx2.c:xx_loadl_32 Unexecuted instantiation: resize_avx2.c:xx_loadl_32 Unexecuted instantiation: selfguided_avx2.c:xx_loadl_32 Unexecuted instantiation: warp_plane_avx2.c:xx_loadl_32 Unexecuted instantiation: wiener_convolve_avx2.c:xx_loadl_32 Unexecuted instantiation: highbd_convolve_2d_avx2.c:xx_loadl_32 Unexecuted instantiation: highbd_jnt_convolve_avx2.c:xx_loadl_32 Unexecuted instantiation: highbd_wiener_convolve_avx2.c:xx_loadl_32 |
36 | | |
37 | 329M | static inline __m128i xx_loadl_64(const void *a) { |
38 | 329M | return _mm_loadl_epi64((const __m128i *)a); |
39 | 329M | } Unexecuted instantiation: loopfilter_sse2.c:xx_loadl_64 blend_a64_mask_sse4.c:xx_loadl_64 Line | Count | Source | 37 | 16.5M | static inline __m128i xx_loadl_64(const void *a) { | 38 | 16.5M | return _mm_loadl_epi64((const __m128i *)a); | 39 | 16.5M | } |
blend_a64_vmask_sse4.c:xx_loadl_64 Line | Count | Source | 37 | 5.89M | static inline __m128i xx_loadl_64(const void *a) { | 38 | 5.89M | return _mm_loadl_epi64((const __m128i *)a); | 39 | 5.89M | } |
Unexecuted instantiation: aom_subpixel_8t_intrin_avx2.c:xx_loadl_64 blend_a64_mask_avx2.c:xx_loadl_64 Line | Count | Source | 37 | 4.59M | static inline __m128i xx_loadl_64(const void *a) { | 38 | 4.59M | return _mm_loadl_epi64((const __m128i *)a); | 39 | 4.59M | } |
Unexecuted instantiation: highbd_convolve_avx2.c:xx_loadl_64 Unexecuted instantiation: convolve_sse2.c:xx_loadl_64 Unexecuted instantiation: jnt_convolve_sse2.c:xx_loadl_64 Unexecuted instantiation: resize_sse2.c:xx_loadl_64 Unexecuted instantiation: av1_inv_txfm_ssse3.c:xx_loadl_64 Unexecuted instantiation: reconinter_ssse3.c:xx_loadl_64 av1_convolve_horiz_rs_sse4.c:xx_loadl_64 Line | Count | Source | 37 | 233M | static inline __m128i xx_loadl_64(const void *a) { | 38 | 233M | return _mm_loadl_epi64((const __m128i *)a); | 39 | 233M | } |
filterintra_sse4.c:xx_loadl_64 Line | Count | Source | 37 | 422k | static inline __m128i xx_loadl_64(const void *a) { | 38 | 422k | return _mm_loadl_epi64((const __m128i *)a); | 39 | 422k | } |
Unexecuted instantiation: highbd_inv_txfm_sse4.c:xx_loadl_64 Unexecuted instantiation: selfguided_sse4.c:xx_loadl_64 Unexecuted instantiation: av1_inv_txfm_avx2.c:xx_loadl_64 Unexecuted instantiation: convolve_2d_avx2.c:xx_loadl_64 Unexecuted instantiation: convolve_avx2.c:xx_loadl_64 Unexecuted instantiation: highbd_inv_txfm_avx2.c:xx_loadl_64 Unexecuted instantiation: jnt_convolve_avx2.c:xx_loadl_64 Unexecuted instantiation: reconinter_avx2.c:xx_loadl_64 Unexecuted instantiation: resize_avx2.c:xx_loadl_64 selfguided_avx2.c:xx_loadl_64 Line | Count | Source | 37 | 68.8M | static inline __m128i xx_loadl_64(const void *a) { | 38 | 68.8M | return _mm_loadl_epi64((const __m128i *)a); | 39 | 68.8M | } |
Unexecuted instantiation: warp_plane_avx2.c:xx_loadl_64 Unexecuted instantiation: wiener_convolve_avx2.c:xx_loadl_64 Unexecuted instantiation: highbd_convolve_2d_avx2.c:xx_loadl_64 Unexecuted instantiation: highbd_jnt_convolve_avx2.c:xx_loadl_64 Unexecuted instantiation: highbd_wiener_convolve_avx2.c:xx_loadl_64 |
40 | | |
41 | 1.85M | static inline __m128i xx_load_128(const void *a) { |
42 | 1.85M | return _mm_load_si128((const __m128i *)a); |
43 | 1.85M | } Unexecuted instantiation: loopfilter_sse2.c:xx_load_128 Unexecuted instantiation: blend_a64_mask_sse4.c:xx_load_128 Unexecuted instantiation: blend_a64_vmask_sse4.c:xx_load_128 Unexecuted instantiation: aom_subpixel_8t_intrin_avx2.c:xx_load_128 Unexecuted instantiation: blend_a64_mask_avx2.c:xx_load_128 Unexecuted instantiation: highbd_convolve_avx2.c:xx_load_128 Unexecuted instantiation: convolve_sse2.c:xx_load_128 Unexecuted instantiation: jnt_convolve_sse2.c:xx_load_128 Unexecuted instantiation: resize_sse2.c:xx_load_128 Unexecuted instantiation: av1_inv_txfm_ssse3.c:xx_load_128 Unexecuted instantiation: reconinter_ssse3.c:xx_load_128 Unexecuted instantiation: av1_convolve_horiz_rs_sse4.c:xx_load_128 filterintra_sse4.c:xx_load_128 Line | Count | Source | 41 | 1.85M | static inline __m128i xx_load_128(const void *a) { | 42 | 1.85M | return _mm_load_si128((const __m128i *)a); | 43 | 1.85M | } |
Unexecuted instantiation: highbd_inv_txfm_sse4.c:xx_load_128 Unexecuted instantiation: selfguided_sse4.c:xx_load_128 Unexecuted instantiation: av1_inv_txfm_avx2.c:xx_load_128 Unexecuted instantiation: convolve_2d_avx2.c:xx_load_128 Unexecuted instantiation: convolve_avx2.c:xx_load_128 Unexecuted instantiation: highbd_inv_txfm_avx2.c:xx_load_128 Unexecuted instantiation: jnt_convolve_avx2.c:xx_load_128 Unexecuted instantiation: reconinter_avx2.c:xx_load_128 Unexecuted instantiation: resize_avx2.c:xx_load_128 Unexecuted instantiation: selfguided_avx2.c:xx_load_128 Unexecuted instantiation: warp_plane_avx2.c:xx_load_128 Unexecuted instantiation: wiener_convolve_avx2.c:xx_load_128 Unexecuted instantiation: highbd_convolve_2d_avx2.c:xx_load_128 Unexecuted instantiation: highbd_jnt_convolve_avx2.c:xx_load_128 Unexecuted instantiation: highbd_wiener_convolve_avx2.c:xx_load_128 |
44 | | |
45 | 1.01G | static inline __m128i xx_loadu_128(const void *a) { |
46 | 1.01G | return _mm_loadu_si128((const __m128i *)a); |
47 | 1.01G | } Unexecuted instantiation: loopfilter_sse2.c:xx_loadu_128 blend_a64_mask_sse4.c:xx_loadu_128 Line | Count | Source | 45 | 16.9M | static inline __m128i xx_loadu_128(const void *a) { | 46 | 16.9M | return _mm_loadu_si128((const __m128i *)a); | 47 | 16.9M | } |
blend_a64_vmask_sse4.c:xx_loadu_128 Line | Count | Source | 45 | 7.12M | static inline __m128i xx_loadu_128(const void *a) { | 46 | 7.12M | return _mm_loadu_si128((const __m128i *)a); | 47 | 7.12M | } |
Unexecuted instantiation: aom_subpixel_8t_intrin_avx2.c:xx_loadu_128 blend_a64_mask_avx2.c:xx_loadu_128 Line | Count | Source | 45 | 8.42M | static inline __m128i xx_loadu_128(const void *a) { | 46 | 8.42M | return _mm_loadu_si128((const __m128i *)a); | 47 | 8.42M | } |
Unexecuted instantiation: highbd_convolve_avx2.c:xx_loadu_128 Unexecuted instantiation: convolve_sse2.c:xx_loadu_128 Unexecuted instantiation: jnt_convolve_sse2.c:xx_loadu_128 Unexecuted instantiation: resize_sse2.c:xx_loadu_128 Unexecuted instantiation: av1_inv_txfm_ssse3.c:xx_loadu_128 Unexecuted instantiation: reconinter_ssse3.c:xx_loadu_128 av1_convolve_horiz_rs_sse4.c:xx_loadu_128 Line | Count | Source | 45 | 589M | static inline __m128i xx_loadu_128(const void *a) { | 46 | 589M | return _mm_loadu_si128((const __m128i *)a); | 47 | 589M | } |
Unexecuted instantiation: filterintra_sse4.c:xx_loadu_128 Unexecuted instantiation: highbd_inv_txfm_sse4.c:xx_loadu_128 Unexecuted instantiation: selfguided_sse4.c:xx_loadu_128 Unexecuted instantiation: av1_inv_txfm_avx2.c:xx_loadu_128 Unexecuted instantiation: convolve_2d_avx2.c:xx_loadu_128 Unexecuted instantiation: convolve_avx2.c:xx_loadu_128 Unexecuted instantiation: highbd_inv_txfm_avx2.c:xx_loadu_128 Unexecuted instantiation: jnt_convolve_avx2.c:xx_loadu_128 Unexecuted instantiation: reconinter_avx2.c:xx_loadu_128 Unexecuted instantiation: resize_avx2.c:xx_loadu_128 selfguided_avx2.c:xx_loadu_128 Line | Count | Source | 45 | 392M | static inline __m128i xx_loadu_128(const void *a) { | 46 | 392M | return _mm_loadu_si128((const __m128i *)a); | 47 | 392M | } |
Unexecuted instantiation: warp_plane_avx2.c:xx_loadu_128 wiener_convolve_avx2.c:xx_loadu_128 Line | Count | Source | 45 | 222k | static inline __m128i xx_loadu_128(const void *a) { | 46 | 222k | return _mm_loadu_si128((const __m128i *)a); | 47 | 222k | } |
Unexecuted instantiation: highbd_convolve_2d_avx2.c:xx_loadu_128 Unexecuted instantiation: highbd_jnt_convolve_avx2.c:xx_loadu_128 highbd_wiener_convolve_avx2.c:xx_loadu_128 Line | Count | Source | 45 | 326k | static inline __m128i xx_loadu_128(const void *a) { | 46 | 326k | return _mm_loadu_si128((const __m128i *)a); | 47 | 326k | } |
|
48 | | |
49 | | // Load 64 bits from each of hi and low, and pack into an SSE register |
50 | | // Since directly loading as `int64_t`s and using _mm_set_epi64 may violate |
51 | | // the strict aliasing rule, this takes a different approach |
52 | 0 | static inline __m128i xx_loadu_2x64(const void *hi, const void *lo) { |
53 | 0 | return _mm_unpacklo_epi64(_mm_loadl_epi64((const __m128i *)lo), |
54 | 0 | _mm_loadl_epi64((const __m128i *)hi)); |
55 | 0 | } Unexecuted instantiation: loopfilter_sse2.c:xx_loadu_2x64 Unexecuted instantiation: blend_a64_mask_sse4.c:xx_loadu_2x64 Unexecuted instantiation: blend_a64_vmask_sse4.c:xx_loadu_2x64 Unexecuted instantiation: aom_subpixel_8t_intrin_avx2.c:xx_loadu_2x64 Unexecuted instantiation: blend_a64_mask_avx2.c:xx_loadu_2x64 Unexecuted instantiation: highbd_convolve_avx2.c:xx_loadu_2x64 Unexecuted instantiation: convolve_sse2.c:xx_loadu_2x64 Unexecuted instantiation: jnt_convolve_sse2.c:xx_loadu_2x64 Unexecuted instantiation: resize_sse2.c:xx_loadu_2x64 Unexecuted instantiation: av1_inv_txfm_ssse3.c:xx_loadu_2x64 Unexecuted instantiation: reconinter_ssse3.c:xx_loadu_2x64 Unexecuted instantiation: av1_convolve_horiz_rs_sse4.c:xx_loadu_2x64 Unexecuted instantiation: filterintra_sse4.c:xx_loadu_2x64 Unexecuted instantiation: highbd_inv_txfm_sse4.c:xx_loadu_2x64 Unexecuted instantiation: selfguided_sse4.c:xx_loadu_2x64 Unexecuted instantiation: av1_inv_txfm_avx2.c:xx_loadu_2x64 Unexecuted instantiation: convolve_2d_avx2.c:xx_loadu_2x64 Unexecuted instantiation: convolve_avx2.c:xx_loadu_2x64 Unexecuted instantiation: highbd_inv_txfm_avx2.c:xx_loadu_2x64 Unexecuted instantiation: jnt_convolve_avx2.c:xx_loadu_2x64 Unexecuted instantiation: reconinter_avx2.c:xx_loadu_2x64 Unexecuted instantiation: resize_avx2.c:xx_loadu_2x64 Unexecuted instantiation: selfguided_avx2.c:xx_loadu_2x64 Unexecuted instantiation: warp_plane_avx2.c:xx_loadu_2x64 Unexecuted instantiation: wiener_convolve_avx2.c:xx_loadu_2x64 Unexecuted instantiation: highbd_convolve_2d_avx2.c:xx_loadu_2x64 Unexecuted instantiation: highbd_jnt_convolve_avx2.c:xx_loadu_2x64 Unexecuted instantiation: highbd_wiener_convolve_avx2.c:xx_loadu_2x64 |
56 | | |
57 | 22.6k | static inline void xx_storel_16(void *const a, const __m128i v) { |
58 | 22.6k | const uint16_t val = (uint16_t)_mm_cvtsi128_si32(v); |
59 | 22.6k | memcpy(a, &val, sizeof(val)); |
60 | 22.6k | } Unexecuted instantiation: loopfilter_sse2.c:xx_storel_16 Unexecuted instantiation: blend_a64_mask_sse4.c:xx_storel_16 Unexecuted instantiation: blend_a64_vmask_sse4.c:xx_storel_16 Unexecuted instantiation: aom_subpixel_8t_intrin_avx2.c:xx_storel_16 Unexecuted instantiation: blend_a64_mask_avx2.c:xx_storel_16 Unexecuted instantiation: highbd_convolve_avx2.c:xx_storel_16 Unexecuted instantiation: convolve_sse2.c:xx_storel_16 Unexecuted instantiation: jnt_convolve_sse2.c:xx_storel_16 Unexecuted instantiation: resize_sse2.c:xx_storel_16 Unexecuted instantiation: av1_inv_txfm_ssse3.c:xx_storel_16 Unexecuted instantiation: reconinter_ssse3.c:xx_storel_16 Unexecuted instantiation: av1_convolve_horiz_rs_sse4.c:xx_storel_16 Unexecuted instantiation: filterintra_sse4.c:xx_storel_16 Unexecuted instantiation: highbd_inv_txfm_sse4.c:xx_storel_16 Unexecuted instantiation: selfguided_sse4.c:xx_storel_16 Unexecuted instantiation: av1_inv_txfm_avx2.c:xx_storel_16 Unexecuted instantiation: convolve_2d_avx2.c:xx_storel_16 convolve_avx2.c:xx_storel_16 Line | Count | Source | 57 | 22.6k | static inline void xx_storel_16(void *const a, const __m128i v) { | 58 | 22.6k | const uint16_t val = (uint16_t)_mm_cvtsi128_si32(v); | 59 | 22.6k | memcpy(a, &val, sizeof(val)); | 60 | 22.6k | } |
Unexecuted instantiation: highbd_inv_txfm_avx2.c:xx_storel_16 Unexecuted instantiation: jnt_convolve_avx2.c:xx_storel_16 Unexecuted instantiation: reconinter_avx2.c:xx_storel_16 Unexecuted instantiation: resize_avx2.c:xx_storel_16 Unexecuted instantiation: selfguided_avx2.c:xx_storel_16 Unexecuted instantiation: warp_plane_avx2.c:xx_storel_16 Unexecuted instantiation: wiener_convolve_avx2.c:xx_storel_16 Unexecuted instantiation: highbd_convolve_2d_avx2.c:xx_storel_16 Unexecuted instantiation: highbd_jnt_convolve_avx2.c:xx_storel_16 Unexecuted instantiation: highbd_wiener_convolve_avx2.c:xx_storel_16 |
61 | | |
62 | 155M | static inline void xx_storel_32(void *const a, const __m128i v) { |
63 | 155M | const int val = _mm_cvtsi128_si32(v); |
64 | 155M | memcpy(a, &val, sizeof(val)); |
65 | 155M | } loopfilter_sse2.c:xx_storel_32 Line | Count | Source | 62 | 71.2M | static inline void xx_storel_32(void *const a, const __m128i v) { | 63 | 71.2M | const int val = _mm_cvtsi128_si32(v); | 64 | 71.2M | memcpy(a, &val, sizeof(val)); | 65 | 71.2M | } |
blend_a64_mask_sse4.c:xx_storel_32 Line | Count | Source | 62 | 1.74M | static inline void xx_storel_32(void *const a, const __m128i v) { | 63 | 1.74M | const int val = _mm_cvtsi128_si32(v); | 64 | 1.74M | memcpy(a, &val, sizeof(val)); | 65 | 1.74M | } |
blend_a64_vmask_sse4.c:xx_storel_32 Line | Count | Source | 62 | 154k | static inline void xx_storel_32(void *const a, const __m128i v) { | 63 | 154k | const int val = _mm_cvtsi128_si32(v); | 64 | 154k | memcpy(a, &val, sizeof(val)); | 65 | 154k | } |
Unexecuted instantiation: aom_subpixel_8t_intrin_avx2.c:xx_storel_32 blend_a64_mask_avx2.c:xx_storel_32 Line | Count | Source | 62 | 458k | static inline void xx_storel_32(void *const a, const __m128i v) { | 63 | 458k | const int val = _mm_cvtsi128_si32(v); | 64 | 458k | memcpy(a, &val, sizeof(val)); | 65 | 458k | } |
highbd_convolve_avx2.c:xx_storel_32 Line | Count | Source | 62 | 841k | static inline void xx_storel_32(void *const a, const __m128i v) { | 63 | 841k | const int val = _mm_cvtsi128_si32(v); | 64 | 841k | memcpy(a, &val, sizeof(val)); | 65 | 841k | } |
Unexecuted instantiation: convolve_sse2.c:xx_storel_32 Unexecuted instantiation: jnt_convolve_sse2.c:xx_storel_32 Unexecuted instantiation: resize_sse2.c:xx_storel_32 Unexecuted instantiation: av1_inv_txfm_ssse3.c:xx_storel_32 Unexecuted instantiation: reconinter_ssse3.c:xx_storel_32 av1_convolve_horiz_rs_sse4.c:xx_storel_32 Line | Count | Source | 62 | 58.2M | static inline void xx_storel_32(void *const a, const __m128i v) { | 63 | 58.2M | const int val = _mm_cvtsi128_si32(v); | 64 | 58.2M | memcpy(a, &val, sizeof(val)); | 65 | 58.2M | } |
filterintra_sse4.c:xx_storel_32 Line | Count | Source | 62 | 17.2M | static inline void xx_storel_32(void *const a, const __m128i v) { | 63 | 17.2M | const int val = _mm_cvtsi128_si32(v); | 64 | 17.2M | memcpy(a, &val, sizeof(val)); | 65 | 17.2M | } |
Unexecuted instantiation: highbd_inv_txfm_sse4.c:xx_storel_32 Unexecuted instantiation: selfguided_sse4.c:xx_storel_32 Unexecuted instantiation: av1_inv_txfm_avx2.c:xx_storel_32 convolve_2d_avx2.c:xx_storel_32 Line | Count | Source | 62 | 2.71M | static inline void xx_storel_32(void *const a, const __m128i v) { | 63 | 2.71M | const int val = _mm_cvtsi128_si32(v); | 64 | 2.71M | memcpy(a, &val, sizeof(val)); | 65 | 2.71M | } |
convolve_avx2.c:xx_storel_32 Line | Count | Source | 62 | 982k | static inline void xx_storel_32(void *const a, const __m128i v) { | 63 | 982k | const int val = _mm_cvtsi128_si32(v); | 64 | 982k | memcpy(a, &val, sizeof(val)); | 65 | 982k | } |
Unexecuted instantiation: highbd_inv_txfm_avx2.c:xx_storel_32 Unexecuted instantiation: jnt_convolve_avx2.c:xx_storel_32 Unexecuted instantiation: reconinter_avx2.c:xx_storel_32 Unexecuted instantiation: resize_avx2.c:xx_storel_32 Unexecuted instantiation: selfguided_avx2.c:xx_storel_32 Unexecuted instantiation: warp_plane_avx2.c:xx_storel_32 Unexecuted instantiation: wiener_convolve_avx2.c:xx_storel_32 highbd_convolve_2d_avx2.c:xx_storel_32 Line | Count | Source | 62 | 1.78M | static inline void xx_storel_32(void *const a, const __m128i v) { | 63 | 1.78M | const int val = _mm_cvtsi128_si32(v); | 64 | 1.78M | memcpy(a, &val, sizeof(val)); | 65 | 1.78M | } |
Unexecuted instantiation: highbd_jnt_convolve_avx2.c:xx_storel_32 Unexecuted instantiation: highbd_wiener_convolve_avx2.c:xx_storel_32 |
66 | | |
67 | 151M | static inline void xx_storel_64(void *const a, const __m128i v) { |
68 | 151M | _mm_storel_epi64((__m128i *)a, v); |
69 | 151M | } Unexecuted instantiation: loopfilter_sse2.c:xx_storel_64 blend_a64_mask_sse4.c:xx_storel_64 Line | Count | Source | 67 | 3.92M | static inline void xx_storel_64(void *const a, const __m128i v) { | 68 | 3.92M | _mm_storel_epi64((__m128i *)a, v); | 69 | 3.92M | } |
blend_a64_vmask_sse4.c:xx_storel_64 Line | Count | Source | 67 | 1.10M | static inline void xx_storel_64(void *const a, const __m128i v) { | 68 | 1.10M | _mm_storel_epi64((__m128i *)a, v); | 69 | 1.10M | } |
Unexecuted instantiation: aom_subpixel_8t_intrin_avx2.c:xx_storel_64 blend_a64_mask_avx2.c:xx_storel_64 Line | Count | Source | 67 | 1.58M | static inline void xx_storel_64(void *const a, const __m128i v) { | 68 | 1.58M | _mm_storel_epi64((__m128i *)a, v); | 69 | 1.58M | } |
Unexecuted instantiation: highbd_convolve_avx2.c:xx_storel_64 Unexecuted instantiation: convolve_sse2.c:xx_storel_64 Unexecuted instantiation: jnt_convolve_sse2.c:xx_storel_64 Unexecuted instantiation: resize_sse2.c:xx_storel_64 Unexecuted instantiation: av1_inv_txfm_ssse3.c:xx_storel_64 Unexecuted instantiation: reconinter_ssse3.c:xx_storel_64 av1_convolve_horiz_rs_sse4.c:xx_storel_64 Line | Count | Source | 67 | 144M | static inline void xx_storel_64(void *const a, const __m128i v) { | 68 | 144M | _mm_storel_epi64((__m128i *)a, v); | 69 | 144M | } |
Unexecuted instantiation: filterintra_sse4.c:xx_storel_64 Unexecuted instantiation: highbd_inv_txfm_sse4.c:xx_storel_64 Unexecuted instantiation: selfguided_sse4.c:xx_storel_64 Unexecuted instantiation: av1_inv_txfm_avx2.c:xx_storel_64 Unexecuted instantiation: convolve_2d_avx2.c:xx_storel_64 Unexecuted instantiation: convolve_avx2.c:xx_storel_64 Unexecuted instantiation: highbd_inv_txfm_avx2.c:xx_storel_64 Unexecuted instantiation: jnt_convolve_avx2.c:xx_storel_64 Unexecuted instantiation: reconinter_avx2.c:xx_storel_64 Unexecuted instantiation: resize_avx2.c:xx_storel_64 Unexecuted instantiation: selfguided_avx2.c:xx_storel_64 Unexecuted instantiation: warp_plane_avx2.c:xx_storel_64 Unexecuted instantiation: wiener_convolve_avx2.c:xx_storel_64 Unexecuted instantiation: highbd_convolve_2d_avx2.c:xx_storel_64 Unexecuted instantiation: highbd_jnt_convolve_avx2.c:xx_storel_64 Unexecuted instantiation: highbd_wiener_convolve_avx2.c:xx_storel_64 |
70 | | |
71 | 0 | static inline void xx_store_128(void *const a, const __m128i v) { |
72 | 0 | _mm_store_si128((__m128i *)a, v); |
73 | 0 | } Unexecuted instantiation: loopfilter_sse2.c:xx_store_128 Unexecuted instantiation: blend_a64_mask_sse4.c:xx_store_128 Unexecuted instantiation: blend_a64_vmask_sse4.c:xx_store_128 Unexecuted instantiation: aom_subpixel_8t_intrin_avx2.c:xx_store_128 Unexecuted instantiation: blend_a64_mask_avx2.c:xx_store_128 Unexecuted instantiation: highbd_convolve_avx2.c:xx_store_128 Unexecuted instantiation: convolve_sse2.c:xx_store_128 Unexecuted instantiation: jnt_convolve_sse2.c:xx_store_128 Unexecuted instantiation: resize_sse2.c:xx_store_128 Unexecuted instantiation: av1_inv_txfm_ssse3.c:xx_store_128 Unexecuted instantiation: reconinter_ssse3.c:xx_store_128 Unexecuted instantiation: av1_convolve_horiz_rs_sse4.c:xx_store_128 Unexecuted instantiation: filterintra_sse4.c:xx_store_128 Unexecuted instantiation: highbd_inv_txfm_sse4.c:xx_store_128 Unexecuted instantiation: selfguided_sse4.c:xx_store_128 Unexecuted instantiation: av1_inv_txfm_avx2.c:xx_store_128 Unexecuted instantiation: convolve_2d_avx2.c:xx_store_128 Unexecuted instantiation: convolve_avx2.c:xx_store_128 Unexecuted instantiation: highbd_inv_txfm_avx2.c:xx_store_128 Unexecuted instantiation: jnt_convolve_avx2.c:xx_store_128 Unexecuted instantiation: reconinter_avx2.c:xx_store_128 Unexecuted instantiation: resize_avx2.c:xx_store_128 Unexecuted instantiation: selfguided_avx2.c:xx_store_128 Unexecuted instantiation: warp_plane_avx2.c:xx_store_128 Unexecuted instantiation: wiener_convolve_avx2.c:xx_store_128 Unexecuted instantiation: highbd_convolve_2d_avx2.c:xx_store_128 Unexecuted instantiation: highbd_jnt_convolve_avx2.c:xx_store_128 Unexecuted instantiation: highbd_wiener_convolve_avx2.c:xx_store_128 |
74 | | |
75 | 43.5M | static inline void xx_storeu_128(void *const a, const __m128i v) { |
76 | 43.5M | _mm_storeu_si128((__m128i *)a, v); |
77 | 43.5M | } Unexecuted instantiation: loopfilter_sse2.c:xx_storeu_128 blend_a64_mask_sse4.c:xx_storeu_128 Line | Count | Source | 75 | 7.93M | static inline void xx_storeu_128(void *const a, const __m128i v) { | 76 | 7.93M | _mm_storeu_si128((__m128i *)a, v); | 77 | 7.93M | } |
blend_a64_vmask_sse4.c:xx_storeu_128 Line | Count | Source | 75 | 4.48M | static inline void xx_storeu_128(void *const a, const __m128i v) { | 76 | 4.48M | _mm_storeu_si128((__m128i *)a, v); | 77 | 4.48M | } |
Unexecuted instantiation: aom_subpixel_8t_intrin_avx2.c:xx_storeu_128 blend_a64_mask_avx2.c:xx_storeu_128 Line | Count | Source | 75 | 1.26M | static inline void xx_storeu_128(void *const a, const __m128i v) { | 76 | 1.26M | _mm_storeu_si128((__m128i *)a, v); | 77 | 1.26M | } |
Unexecuted instantiation: highbd_convolve_avx2.c:xx_storeu_128 Unexecuted instantiation: convolve_sse2.c:xx_storeu_128 Unexecuted instantiation: jnt_convolve_sse2.c:xx_storeu_128 Unexecuted instantiation: resize_sse2.c:xx_storeu_128 Unexecuted instantiation: av1_inv_txfm_ssse3.c:xx_storeu_128 Unexecuted instantiation: reconinter_ssse3.c:xx_storeu_128 Unexecuted instantiation: av1_convolve_horiz_rs_sse4.c:xx_storeu_128 Unexecuted instantiation: filterintra_sse4.c:xx_storeu_128 Unexecuted instantiation: highbd_inv_txfm_sse4.c:xx_storeu_128 Unexecuted instantiation: selfguided_sse4.c:xx_storeu_128 Unexecuted instantiation: av1_inv_txfm_avx2.c:xx_storeu_128 Unexecuted instantiation: convolve_2d_avx2.c:xx_storeu_128 Unexecuted instantiation: convolve_avx2.c:xx_storeu_128 Unexecuted instantiation: highbd_inv_txfm_avx2.c:xx_storeu_128 Unexecuted instantiation: jnt_convolve_avx2.c:xx_storeu_128 Unexecuted instantiation: reconinter_avx2.c:xx_storeu_128 Unexecuted instantiation: resize_avx2.c:xx_storeu_128 selfguided_avx2.c:xx_storeu_128 Line | Count | Source | 75 | 29.9M | static inline void xx_storeu_128(void *const a, const __m128i v) { | 76 | 29.9M | _mm_storeu_si128((__m128i *)a, v); | 77 | 29.9M | } |
Unexecuted instantiation: warp_plane_avx2.c:xx_storeu_128 Unexecuted instantiation: wiener_convolve_avx2.c:xx_storeu_128 Unexecuted instantiation: highbd_convolve_2d_avx2.c:xx_storeu_128 Unexecuted instantiation: highbd_jnt_convolve_avx2.c:xx_storeu_128 Unexecuted instantiation: highbd_wiener_convolve_avx2.c:xx_storeu_128 |
78 | | |
79 | | // Fill an SSE register using an interleaved pair of values, ie. set the |
80 | | // 8 channels to {a, b, a, b, a, b, a, b}, using the same channel ordering |
81 | | // as when a register is stored to / loaded from memory. |
82 | | // |
83 | | // This is useful for rearranging filter kernels for use with the _mm_madd_epi16 |
84 | | // instruction |
85 | 0 | static inline __m128i xx_set2_epi16(int16_t a, int16_t b) { |
86 | 0 | return _mm_setr_epi16(a, b, a, b, a, b, a, b); |
87 | 0 | } Unexecuted instantiation: loopfilter_sse2.c:xx_set2_epi16 Unexecuted instantiation: blend_a64_mask_sse4.c:xx_set2_epi16 Unexecuted instantiation: blend_a64_vmask_sse4.c:xx_set2_epi16 Unexecuted instantiation: aom_subpixel_8t_intrin_avx2.c:xx_set2_epi16 Unexecuted instantiation: blend_a64_mask_avx2.c:xx_set2_epi16 Unexecuted instantiation: highbd_convolve_avx2.c:xx_set2_epi16 Unexecuted instantiation: convolve_sse2.c:xx_set2_epi16 Unexecuted instantiation: jnt_convolve_sse2.c:xx_set2_epi16 Unexecuted instantiation: resize_sse2.c:xx_set2_epi16 Unexecuted instantiation: av1_inv_txfm_ssse3.c:xx_set2_epi16 Unexecuted instantiation: reconinter_ssse3.c:xx_set2_epi16 Unexecuted instantiation: av1_convolve_horiz_rs_sse4.c:xx_set2_epi16 Unexecuted instantiation: filterintra_sse4.c:xx_set2_epi16 Unexecuted instantiation: highbd_inv_txfm_sse4.c:xx_set2_epi16 Unexecuted instantiation: selfguided_sse4.c:xx_set2_epi16 Unexecuted instantiation: av1_inv_txfm_avx2.c:xx_set2_epi16 Unexecuted instantiation: convolve_2d_avx2.c:xx_set2_epi16 Unexecuted instantiation: convolve_avx2.c:xx_set2_epi16 Unexecuted instantiation: highbd_inv_txfm_avx2.c:xx_set2_epi16 Unexecuted instantiation: jnt_convolve_avx2.c:xx_set2_epi16 Unexecuted instantiation: reconinter_avx2.c:xx_set2_epi16 Unexecuted instantiation: resize_avx2.c:xx_set2_epi16 Unexecuted instantiation: selfguided_avx2.c:xx_set2_epi16 Unexecuted instantiation: warp_plane_avx2.c:xx_set2_epi16 Unexecuted instantiation: wiener_convolve_avx2.c:xx_set2_epi16 Unexecuted instantiation: highbd_convolve_2d_avx2.c:xx_set2_epi16 Unexecuted instantiation: highbd_jnt_convolve_avx2.c:xx_set2_epi16 Unexecuted instantiation: highbd_wiener_convolve_avx2.c:xx_set2_epi16 |
88 | | |
89 | 868k | static inline __m128i xx_round_epu16(__m128i v_val_w) { |
90 | 868k | return _mm_avg_epu16(v_val_w, _mm_setzero_si128()); |
91 | 868k | } Unexecuted instantiation: loopfilter_sse2.c:xx_round_epu16 blend_a64_mask_sse4.c:xx_round_epu16 Line | Count | Source | 89 | 604k | static inline __m128i xx_round_epu16(__m128i v_val_w) { | 90 | 604k | return _mm_avg_epu16(v_val_w, _mm_setzero_si128()); | 91 | 604k | } |
blend_a64_vmask_sse4.c:xx_round_epu16 Line | Count | Source | 89 | 264k | static inline __m128i xx_round_epu16(__m128i v_val_w) { | 90 | 264k | return _mm_avg_epu16(v_val_w, _mm_setzero_si128()); | 91 | 264k | } |
Unexecuted instantiation: aom_subpixel_8t_intrin_avx2.c:xx_round_epu16 Unexecuted instantiation: blend_a64_mask_avx2.c:xx_round_epu16 Unexecuted instantiation: highbd_convolve_avx2.c:xx_round_epu16 Unexecuted instantiation: convolve_sse2.c:xx_round_epu16 Unexecuted instantiation: jnt_convolve_sse2.c:xx_round_epu16 Unexecuted instantiation: resize_sse2.c:xx_round_epu16 Unexecuted instantiation: av1_inv_txfm_ssse3.c:xx_round_epu16 Unexecuted instantiation: reconinter_ssse3.c:xx_round_epu16 Unexecuted instantiation: av1_convolve_horiz_rs_sse4.c:xx_round_epu16 Unexecuted instantiation: filterintra_sse4.c:xx_round_epu16 Unexecuted instantiation: highbd_inv_txfm_sse4.c:xx_round_epu16 Unexecuted instantiation: selfguided_sse4.c:xx_round_epu16 Unexecuted instantiation: av1_inv_txfm_avx2.c:xx_round_epu16 Unexecuted instantiation: convolve_2d_avx2.c:xx_round_epu16 Unexecuted instantiation: convolve_avx2.c:xx_round_epu16 Unexecuted instantiation: highbd_inv_txfm_avx2.c:xx_round_epu16 Unexecuted instantiation: jnt_convolve_avx2.c:xx_round_epu16 Unexecuted instantiation: reconinter_avx2.c:xx_round_epu16 Unexecuted instantiation: resize_avx2.c:xx_round_epu16 Unexecuted instantiation: selfguided_avx2.c:xx_round_epu16 Unexecuted instantiation: warp_plane_avx2.c:xx_round_epu16 Unexecuted instantiation: wiener_convolve_avx2.c:xx_round_epu16 Unexecuted instantiation: highbd_convolve_2d_avx2.c:xx_round_epu16 Unexecuted instantiation: highbd_jnt_convolve_avx2.c:xx_round_epu16 Unexecuted instantiation: highbd_wiener_convolve_avx2.c:xx_round_epu16 |
92 | | |
93 | 16.7M | static inline __m128i xx_roundn_epu16(__m128i v_val_w, int bits) { |
94 | 16.7M | const __m128i v_s_w = _mm_srli_epi16(v_val_w, bits - 1); |
95 | 16.7M | return _mm_avg_epu16(v_s_w, _mm_setzero_si128()); |
96 | 16.7M | } Unexecuted instantiation: loopfilter_sse2.c:xx_roundn_epu16 blend_a64_mask_sse4.c:xx_roundn_epu16 Line | Count | Source | 93 | 10.0M | static inline __m128i xx_roundn_epu16(__m128i v_val_w, int bits) { | 94 | 10.0M | const __m128i v_s_w = _mm_srli_epi16(v_val_w, bits - 1); | 95 | 10.0M | return _mm_avg_epu16(v_s_w, _mm_setzero_si128()); | 96 | 10.0M | } |
blend_a64_vmask_sse4.c:xx_roundn_epu16 Line | Count | Source | 93 | 6.39M | static inline __m128i xx_roundn_epu16(__m128i v_val_w, int bits) { | 94 | 6.39M | const __m128i v_s_w = _mm_srli_epi16(v_val_w, bits - 1); | 95 | 6.39M | return _mm_avg_epu16(v_s_w, _mm_setzero_si128()); | 96 | 6.39M | } |
Unexecuted instantiation: aom_subpixel_8t_intrin_avx2.c:xx_roundn_epu16 blend_a64_mask_avx2.c:xx_roundn_epu16 Line | Count | Source | 93 | 325k | static inline __m128i xx_roundn_epu16(__m128i v_val_w, int bits) { | 94 | 325k | const __m128i v_s_w = _mm_srli_epi16(v_val_w, bits - 1); | 95 | 325k | return _mm_avg_epu16(v_s_w, _mm_setzero_si128()); | 96 | 325k | } |
Unexecuted instantiation: highbd_convolve_avx2.c:xx_roundn_epu16 Unexecuted instantiation: convolve_sse2.c:xx_roundn_epu16 Unexecuted instantiation: jnt_convolve_sse2.c:xx_roundn_epu16 Unexecuted instantiation: resize_sse2.c:xx_roundn_epu16 Unexecuted instantiation: av1_inv_txfm_ssse3.c:xx_roundn_epu16 Unexecuted instantiation: reconinter_ssse3.c:xx_roundn_epu16 Unexecuted instantiation: av1_convolve_horiz_rs_sse4.c:xx_roundn_epu16 Unexecuted instantiation: filterintra_sse4.c:xx_roundn_epu16 Unexecuted instantiation: highbd_inv_txfm_sse4.c:xx_roundn_epu16 Unexecuted instantiation: selfguided_sse4.c:xx_roundn_epu16 Unexecuted instantiation: av1_inv_txfm_avx2.c:xx_roundn_epu16 Unexecuted instantiation: convolve_2d_avx2.c:xx_roundn_epu16 Unexecuted instantiation: convolve_avx2.c:xx_roundn_epu16 Unexecuted instantiation: highbd_inv_txfm_avx2.c:xx_roundn_epu16 Unexecuted instantiation: jnt_convolve_avx2.c:xx_roundn_epu16 Unexecuted instantiation: reconinter_avx2.c:xx_roundn_epu16 Unexecuted instantiation: resize_avx2.c:xx_roundn_epu16 Unexecuted instantiation: selfguided_avx2.c:xx_roundn_epu16 Unexecuted instantiation: warp_plane_avx2.c:xx_roundn_epu16 Unexecuted instantiation: wiener_convolve_avx2.c:xx_roundn_epu16 Unexecuted instantiation: highbd_convolve_2d_avx2.c:xx_roundn_epu16 Unexecuted instantiation: highbd_jnt_convolve_avx2.c:xx_roundn_epu16 Unexecuted instantiation: highbd_wiener_convolve_avx2.c:xx_roundn_epu16 |
97 | | |
98 | 0 | static inline __m128i xx_roundn_epu32(__m128i v_val_d, int bits) { |
99 | 0 | const __m128i v_bias_d = _mm_set1_epi32((1 << bits) >> 1); |
100 | 0 | const __m128i v_tmp_d = _mm_add_epi32(v_val_d, v_bias_d); |
101 | 0 | return _mm_srli_epi32(v_tmp_d, bits); |
102 | 0 | } Unexecuted instantiation: loopfilter_sse2.c:xx_roundn_epu32 Unexecuted instantiation: blend_a64_mask_sse4.c:xx_roundn_epu32 Unexecuted instantiation: blend_a64_vmask_sse4.c:xx_roundn_epu32 Unexecuted instantiation: aom_subpixel_8t_intrin_avx2.c:xx_roundn_epu32 Unexecuted instantiation: blend_a64_mask_avx2.c:xx_roundn_epu32 Unexecuted instantiation: highbd_convolve_avx2.c:xx_roundn_epu32 Unexecuted instantiation: convolve_sse2.c:xx_roundn_epu32 Unexecuted instantiation: jnt_convolve_sse2.c:xx_roundn_epu32 Unexecuted instantiation: resize_sse2.c:xx_roundn_epu32 Unexecuted instantiation: av1_inv_txfm_ssse3.c:xx_roundn_epu32 Unexecuted instantiation: reconinter_ssse3.c:xx_roundn_epu32 Unexecuted instantiation: av1_convolve_horiz_rs_sse4.c:xx_roundn_epu32 Unexecuted instantiation: filterintra_sse4.c:xx_roundn_epu32 Unexecuted instantiation: highbd_inv_txfm_sse4.c:xx_roundn_epu32 Unexecuted instantiation: selfguided_sse4.c:xx_roundn_epu32 Unexecuted instantiation: av1_inv_txfm_avx2.c:xx_roundn_epu32 Unexecuted instantiation: convolve_2d_avx2.c:xx_roundn_epu32 Unexecuted instantiation: convolve_avx2.c:xx_roundn_epu32 Unexecuted instantiation: highbd_inv_txfm_avx2.c:xx_roundn_epu32 Unexecuted instantiation: jnt_convolve_avx2.c:xx_roundn_epu32 Unexecuted instantiation: reconinter_avx2.c:xx_roundn_epu32 Unexecuted instantiation: resize_avx2.c:xx_roundn_epu32 Unexecuted instantiation: selfguided_avx2.c:xx_roundn_epu32 Unexecuted instantiation: warp_plane_avx2.c:xx_roundn_epu32 Unexecuted instantiation: wiener_convolve_avx2.c:xx_roundn_epu32 Unexecuted instantiation: highbd_convolve_2d_avx2.c:xx_roundn_epu32 Unexecuted instantiation: highbd_jnt_convolve_avx2.c:xx_roundn_epu32 Unexecuted instantiation: highbd_wiener_convolve_avx2.c:xx_roundn_epu32 |
103 | | |
104 | 17.2M | static inline __m128i xx_roundn_epi16_unsigned(__m128i v_val_d, int bits) { |
105 | 17.2M | const __m128i v_bias_d = _mm_set1_epi16((1 << bits) >> 1); |
106 | 17.2M | const __m128i v_tmp_d = _mm_add_epi16(v_val_d, v_bias_d); |
107 | 17.2M | return _mm_srai_epi16(v_tmp_d, bits); |
108 | 17.2M | } Unexecuted instantiation: loopfilter_sse2.c:xx_roundn_epi16_unsigned Unexecuted instantiation: blend_a64_mask_sse4.c:xx_roundn_epi16_unsigned Unexecuted instantiation: blend_a64_vmask_sse4.c:xx_roundn_epi16_unsigned Unexecuted instantiation: aom_subpixel_8t_intrin_avx2.c:xx_roundn_epi16_unsigned Unexecuted instantiation: blend_a64_mask_avx2.c:xx_roundn_epi16_unsigned Unexecuted instantiation: highbd_convolve_avx2.c:xx_roundn_epi16_unsigned Unexecuted instantiation: convolve_sse2.c:xx_roundn_epi16_unsigned Unexecuted instantiation: jnt_convolve_sse2.c:xx_roundn_epi16_unsigned Unexecuted instantiation: resize_sse2.c:xx_roundn_epi16_unsigned Unexecuted instantiation: av1_inv_txfm_ssse3.c:xx_roundn_epi16_unsigned Unexecuted instantiation: reconinter_ssse3.c:xx_roundn_epi16_unsigned Unexecuted instantiation: av1_convolve_horiz_rs_sse4.c:xx_roundn_epi16_unsigned filterintra_sse4.c:xx_roundn_epi16_unsigned Line | Count | Source | 104 | 17.2M | static inline __m128i xx_roundn_epi16_unsigned(__m128i v_val_d, int bits) { | 105 | 17.2M | const __m128i v_bias_d = _mm_set1_epi16((1 << bits) >> 1); | 106 | 17.2M | const __m128i v_tmp_d = _mm_add_epi16(v_val_d, v_bias_d); | 107 | 17.2M | return _mm_srai_epi16(v_tmp_d, bits); | 108 | 17.2M | } |
Unexecuted instantiation: highbd_inv_txfm_sse4.c:xx_roundn_epi16_unsigned Unexecuted instantiation: selfguided_sse4.c:xx_roundn_epi16_unsigned Unexecuted instantiation: av1_inv_txfm_avx2.c:xx_roundn_epi16_unsigned Unexecuted instantiation: convolve_2d_avx2.c:xx_roundn_epi16_unsigned Unexecuted instantiation: convolve_avx2.c:xx_roundn_epi16_unsigned Unexecuted instantiation: highbd_inv_txfm_avx2.c:xx_roundn_epi16_unsigned Unexecuted instantiation: jnt_convolve_avx2.c:xx_roundn_epi16_unsigned Unexecuted instantiation: reconinter_avx2.c:xx_roundn_epi16_unsigned Unexecuted instantiation: resize_avx2.c:xx_roundn_epi16_unsigned Unexecuted instantiation: selfguided_avx2.c:xx_roundn_epi16_unsigned Unexecuted instantiation: warp_plane_avx2.c:xx_roundn_epi16_unsigned Unexecuted instantiation: wiener_convolve_avx2.c:xx_roundn_epi16_unsigned Unexecuted instantiation: highbd_convolve_2d_avx2.c:xx_roundn_epi16_unsigned Unexecuted instantiation: highbd_jnt_convolve_avx2.c:xx_roundn_epi16_unsigned Unexecuted instantiation: highbd_wiener_convolve_avx2.c:xx_roundn_epi16_unsigned |
109 | | |
110 | | // This is equivalent to ROUND_POWER_OF_TWO(v_val_d, bits) |
111 | 0 | static inline __m128i xx_roundn_epi32_unsigned(__m128i v_val_d, int bits) { |
112 | 0 | const __m128i v_bias_d = _mm_set1_epi32((1 << bits) >> 1); |
113 | 0 | const __m128i v_tmp_d = _mm_add_epi32(v_val_d, v_bias_d); |
114 | 0 | return _mm_srai_epi32(v_tmp_d, bits); |
115 | 0 | } Unexecuted instantiation: loopfilter_sse2.c:xx_roundn_epi32_unsigned Unexecuted instantiation: blend_a64_mask_sse4.c:xx_roundn_epi32_unsigned Unexecuted instantiation: blend_a64_vmask_sse4.c:xx_roundn_epi32_unsigned Unexecuted instantiation: aom_subpixel_8t_intrin_avx2.c:xx_roundn_epi32_unsigned Unexecuted instantiation: blend_a64_mask_avx2.c:xx_roundn_epi32_unsigned Unexecuted instantiation: highbd_convolve_avx2.c:xx_roundn_epi32_unsigned Unexecuted instantiation: convolve_sse2.c:xx_roundn_epi32_unsigned Unexecuted instantiation: jnt_convolve_sse2.c:xx_roundn_epi32_unsigned Unexecuted instantiation: resize_sse2.c:xx_roundn_epi32_unsigned Unexecuted instantiation: av1_inv_txfm_ssse3.c:xx_roundn_epi32_unsigned Unexecuted instantiation: reconinter_ssse3.c:xx_roundn_epi32_unsigned Unexecuted instantiation: av1_convolve_horiz_rs_sse4.c:xx_roundn_epi32_unsigned Unexecuted instantiation: filterintra_sse4.c:xx_roundn_epi32_unsigned Unexecuted instantiation: highbd_inv_txfm_sse4.c:xx_roundn_epi32_unsigned Unexecuted instantiation: selfguided_sse4.c:xx_roundn_epi32_unsigned Unexecuted instantiation: av1_inv_txfm_avx2.c:xx_roundn_epi32_unsigned Unexecuted instantiation: convolve_2d_avx2.c:xx_roundn_epi32_unsigned Unexecuted instantiation: convolve_avx2.c:xx_roundn_epi32_unsigned Unexecuted instantiation: highbd_inv_txfm_avx2.c:xx_roundn_epi32_unsigned Unexecuted instantiation: jnt_convolve_avx2.c:xx_roundn_epi32_unsigned Unexecuted instantiation: reconinter_avx2.c:xx_roundn_epi32_unsigned Unexecuted instantiation: resize_avx2.c:xx_roundn_epi32_unsigned Unexecuted instantiation: selfguided_avx2.c:xx_roundn_epi32_unsigned Unexecuted instantiation: warp_plane_avx2.c:xx_roundn_epi32_unsigned Unexecuted instantiation: wiener_convolve_avx2.c:xx_roundn_epi32_unsigned Unexecuted instantiation: highbd_convolve_2d_avx2.c:xx_roundn_epi32_unsigned Unexecuted instantiation: highbd_jnt_convolve_avx2.c:xx_roundn_epi32_unsigned Unexecuted instantiation: highbd_wiener_convolve_avx2.c:xx_roundn_epi32_unsigned |
116 | | |
117 | 0 | static inline __m128i xx_roundn_epi16(__m128i v_val_d, int bits) { |
118 | 0 | const __m128i v_bias_d = _mm_set1_epi16((1 << bits) >> 1); |
119 | 0 | const __m128i v_sign_d = _mm_srai_epi16(v_val_d, 15); |
120 | 0 | const __m128i v_tmp_d = |
121 | 0 | _mm_add_epi16(_mm_add_epi16(v_val_d, v_bias_d), v_sign_d); |
122 | 0 | return _mm_srai_epi16(v_tmp_d, bits); |
123 | 0 | } Unexecuted instantiation: loopfilter_sse2.c:xx_roundn_epi16 Unexecuted instantiation: blend_a64_mask_sse4.c:xx_roundn_epi16 Unexecuted instantiation: blend_a64_vmask_sse4.c:xx_roundn_epi16 Unexecuted instantiation: aom_subpixel_8t_intrin_avx2.c:xx_roundn_epi16 Unexecuted instantiation: blend_a64_mask_avx2.c:xx_roundn_epi16 Unexecuted instantiation: highbd_convolve_avx2.c:xx_roundn_epi16 Unexecuted instantiation: convolve_sse2.c:xx_roundn_epi16 Unexecuted instantiation: jnt_convolve_sse2.c:xx_roundn_epi16 Unexecuted instantiation: resize_sse2.c:xx_roundn_epi16 Unexecuted instantiation: av1_inv_txfm_ssse3.c:xx_roundn_epi16 Unexecuted instantiation: reconinter_ssse3.c:xx_roundn_epi16 Unexecuted instantiation: av1_convolve_horiz_rs_sse4.c:xx_roundn_epi16 Unexecuted instantiation: filterintra_sse4.c:xx_roundn_epi16 Unexecuted instantiation: highbd_inv_txfm_sse4.c:xx_roundn_epi16 Unexecuted instantiation: selfguided_sse4.c:xx_roundn_epi16 Unexecuted instantiation: av1_inv_txfm_avx2.c:xx_roundn_epi16 Unexecuted instantiation: convolve_2d_avx2.c:xx_roundn_epi16 Unexecuted instantiation: convolve_avx2.c:xx_roundn_epi16 Unexecuted instantiation: highbd_inv_txfm_avx2.c:xx_roundn_epi16 Unexecuted instantiation: jnt_convolve_avx2.c:xx_roundn_epi16 Unexecuted instantiation: reconinter_avx2.c:xx_roundn_epi16 Unexecuted instantiation: resize_avx2.c:xx_roundn_epi16 Unexecuted instantiation: selfguided_avx2.c:xx_roundn_epi16 Unexecuted instantiation: warp_plane_avx2.c:xx_roundn_epi16 Unexecuted instantiation: wiener_convolve_avx2.c:xx_roundn_epi16 Unexecuted instantiation: highbd_convolve_2d_avx2.c:xx_roundn_epi16 Unexecuted instantiation: highbd_jnt_convolve_avx2.c:xx_roundn_epi16 Unexecuted instantiation: highbd_wiener_convolve_avx2.c:xx_roundn_epi16 |
124 | | |
125 | | #endif // AOM_AOM_DSP_X86_SYNONYMS_H_ |