/src/aom/aom_dsp/x86/synonyms.h
Line | Count | Source |
1 | | /* |
2 | | * Copyright (c) 2016, Alliance for Open Media. All rights reserved. |
3 | | * |
4 | | * This source code is subject to the terms of the BSD 2 Clause License and |
5 | | * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License |
6 | | * was not distributed with this source code in the LICENSE file, you can |
7 | | * obtain it at www.aomedia.org/license/software. If the Alliance for Open |
8 | | * Media Patent License 1.0 was not distributed with this source code in the |
9 | | * PATENTS file, you can obtain it at www.aomedia.org/license/patent. |
10 | | */ |
11 | | |
12 | | #ifndef AOM_AOM_DSP_X86_SYNONYMS_H_ |
13 | | #define AOM_AOM_DSP_X86_SYNONYMS_H_ |
14 | | |
15 | | #include <emmintrin.h> |
16 | | #include <string.h> |
17 | | |
18 | | #include "config/aom_config.h" |
19 | | |
20 | | #include "aom/aom_integer.h" |
21 | | |
22 | | /** |
23 | | * Various reusable shorthands for x86 SIMD intrinsics. |
24 | | * |
25 | | * Intrinsics prefixed with xx_ operate on or return 128bit XMM registers. |
26 | | * Intrinsics prefixed with yy_ operate on or return 256bit YMM registers. |
27 | | */ |
28 | | |
29 | | // Loads and stores to do away with the tedium of casting the address |
30 | | // to the right type. |
31 | 49.5M | static inline __m128i xx_loadl_32(const void *a) { |
32 | 49.5M | int val; |
33 | 49.5M | memcpy(&val, a, sizeof(val)); |
34 | 49.5M | return _mm_cvtsi32_si128(val); |
35 | 49.5M | } loopfilter_sse2.c:xx_loadl_32 Line | Count | Source | 31 | 31.9M | static inline __m128i xx_loadl_32(const void *a) { | 32 | 31.9M | int val; | 33 | 31.9M | memcpy(&val, a, sizeof(val)); | 34 | 31.9M | return _mm_cvtsi32_si128(val); | 35 | 31.9M | } |
blend_a64_mask_sse4.c:xx_loadl_32 Line | Count | Source | 31 | 7.64M | static inline __m128i xx_loadl_32(const void *a) { | 32 | 7.64M | int val; | 33 | 7.64M | memcpy(&val, a, sizeof(val)); | 34 | 7.64M | return _mm_cvtsi32_si128(val); | 35 | 7.64M | } |
blend_a64_vmask_sse4.c:xx_loadl_32 Line | Count | Source | 31 | 323k | static inline __m128i xx_loadl_32(const void *a) { | 32 | 323k | int val; | 33 | 323k | memcpy(&val, a, sizeof(val)); | 34 | 323k | return _mm_cvtsi32_si128(val); | 35 | 323k | } |
Unexecuted instantiation: aom_subpixel_8t_intrin_avx2.c:xx_loadl_32 blend_a64_mask_avx2.c:xx_loadl_32 Line | Count | Source | 31 | 982k | static inline __m128i xx_loadl_32(const void *a) { | 32 | 982k | int val; | 33 | 982k | memcpy(&val, a, sizeof(val)); | 34 | 982k | return _mm_cvtsi32_si128(val); | 35 | 982k | } |
Unexecuted instantiation: highbd_convolve_avx2.c:xx_loadl_32 Unexecuted instantiation: convolve_sse2.c:xx_loadl_32 Unexecuted instantiation: jnt_convolve_sse2.c:xx_loadl_32 Unexecuted instantiation: resize_sse2.c:xx_loadl_32 Unexecuted instantiation: av1_inv_txfm_ssse3.c:xx_loadl_32 Unexecuted instantiation: reconinter_ssse3.c:xx_loadl_32 Unexecuted instantiation: av1_convolve_horiz_rs_sse4.c:xx_loadl_32 filterintra_sse4.c:xx_loadl_32 Line | Count | Source | 31 | 8.63M | static inline __m128i xx_loadl_32(const void *a) { | 32 | 8.63M | int val; | 33 | 8.63M | memcpy(&val, a, sizeof(val)); | 34 | 8.63M | return _mm_cvtsi32_si128(val); | 35 | 8.63M | } |
Unexecuted instantiation: highbd_inv_txfm_sse4.c:xx_loadl_32 Unexecuted instantiation: selfguided_sse4.c:xx_loadl_32 Unexecuted instantiation: av1_inv_txfm_avx2.c:xx_loadl_32 Unexecuted instantiation: convolve_2d_avx2.c:xx_loadl_32 Unexecuted instantiation: convolve_avx2.c:xx_loadl_32 Unexecuted instantiation: highbd_inv_txfm_avx2.c:xx_loadl_32 Unexecuted instantiation: jnt_convolve_avx2.c:xx_loadl_32 Unexecuted instantiation: reconinter_avx2.c:xx_loadl_32 Unexecuted instantiation: resize_avx2.c:xx_loadl_32 Unexecuted instantiation: selfguided_avx2.c:xx_loadl_32 Unexecuted instantiation: warp_plane_avx2.c:xx_loadl_32 Unexecuted instantiation: wiener_convolve_avx2.c:xx_loadl_32 Unexecuted instantiation: highbd_convolve_2d_avx2.c:xx_loadl_32 Unexecuted instantiation: highbd_jnt_convolve_avx2.c:xx_loadl_32 Unexecuted instantiation: highbd_wiener_convolve_avx2.c:xx_loadl_32 |
36 | | |
37 | 305M | static inline __m128i xx_loadl_64(const void *a) { |
38 | 305M | return _mm_loadl_epi64((const __m128i *)a); |
39 | 305M | } Unexecuted instantiation: loopfilter_sse2.c:xx_loadl_64 blend_a64_mask_sse4.c:xx_loadl_64 Line | Count | Source | 37 | 17.1M | static inline __m128i xx_loadl_64(const void *a) { | 38 | 17.1M | return _mm_loadl_epi64((const __m128i *)a); | 39 | 17.1M | } |
blend_a64_vmask_sse4.c:xx_loadl_64 Line | Count | Source | 37 | 5.84M | static inline __m128i xx_loadl_64(const void *a) { | 38 | 5.84M | return _mm_loadl_epi64((const __m128i *)a); | 39 | 5.84M | } |
Unexecuted instantiation: aom_subpixel_8t_intrin_avx2.c:xx_loadl_64 blend_a64_mask_avx2.c:xx_loadl_64 Line | Count | Source | 37 | 4.80M | static inline __m128i xx_loadl_64(const void *a) { | 38 | 4.80M | return _mm_loadl_epi64((const __m128i *)a); | 39 | 4.80M | } |
Unexecuted instantiation: highbd_convolve_avx2.c:xx_loadl_64 Unexecuted instantiation: convolve_sse2.c:xx_loadl_64 Unexecuted instantiation: jnt_convolve_sse2.c:xx_loadl_64 Unexecuted instantiation: resize_sse2.c:xx_loadl_64 Unexecuted instantiation: av1_inv_txfm_ssse3.c:xx_loadl_64 Unexecuted instantiation: reconinter_ssse3.c:xx_loadl_64 av1_convolve_horiz_rs_sse4.c:xx_loadl_64 Line | Count | Source | 37 | 240M | static inline __m128i xx_loadl_64(const void *a) { | 38 | 240M | return _mm_loadl_epi64((const __m128i *)a); | 39 | 240M | } |
filterintra_sse4.c:xx_loadl_64 Line | Count | Source | 37 | 396k | static inline __m128i xx_loadl_64(const void *a) { | 38 | 396k | return _mm_loadl_epi64((const __m128i *)a); | 39 | 396k | } |
Unexecuted instantiation: highbd_inv_txfm_sse4.c:xx_loadl_64 Unexecuted instantiation: selfguided_sse4.c:xx_loadl_64 Unexecuted instantiation: av1_inv_txfm_avx2.c:xx_loadl_64 Unexecuted instantiation: convolve_2d_avx2.c:xx_loadl_64 Unexecuted instantiation: convolve_avx2.c:xx_loadl_64 Unexecuted instantiation: highbd_inv_txfm_avx2.c:xx_loadl_64 Unexecuted instantiation: jnt_convolve_avx2.c:xx_loadl_64 Unexecuted instantiation: reconinter_avx2.c:xx_loadl_64 Unexecuted instantiation: resize_avx2.c:xx_loadl_64 selfguided_avx2.c:xx_loadl_64 Line | Count | Source | 37 | 36.9M | static inline __m128i xx_loadl_64(const void *a) { | 38 | 36.9M | return _mm_loadl_epi64((const __m128i *)a); | 39 | 36.9M | } |
Unexecuted instantiation: warp_plane_avx2.c:xx_loadl_64 Unexecuted instantiation: wiener_convolve_avx2.c:xx_loadl_64 Unexecuted instantiation: highbd_convolve_2d_avx2.c:xx_loadl_64 Unexecuted instantiation: highbd_jnt_convolve_avx2.c:xx_loadl_64 Unexecuted instantiation: highbd_wiener_convolve_avx2.c:xx_loadl_64 |
40 | | |
41 | 1.73M | static inline __m128i xx_load_128(const void *a) { |
42 | 1.73M | return _mm_load_si128((const __m128i *)a); |
43 | 1.73M | } Unexecuted instantiation: loopfilter_sse2.c:xx_load_128 Unexecuted instantiation: blend_a64_mask_sse4.c:xx_load_128 Unexecuted instantiation: blend_a64_vmask_sse4.c:xx_load_128 Unexecuted instantiation: aom_subpixel_8t_intrin_avx2.c:xx_load_128 Unexecuted instantiation: blend_a64_mask_avx2.c:xx_load_128 Unexecuted instantiation: highbd_convolve_avx2.c:xx_load_128 Unexecuted instantiation: convolve_sse2.c:xx_load_128 Unexecuted instantiation: jnt_convolve_sse2.c:xx_load_128 Unexecuted instantiation: resize_sse2.c:xx_load_128 Unexecuted instantiation: av1_inv_txfm_ssse3.c:xx_load_128 Unexecuted instantiation: reconinter_ssse3.c:xx_load_128 Unexecuted instantiation: av1_convolve_horiz_rs_sse4.c:xx_load_128 filterintra_sse4.c:xx_load_128 Line | Count | Source | 41 | 1.73M | static inline __m128i xx_load_128(const void *a) { | 42 | 1.73M | return _mm_load_si128((const __m128i *)a); | 43 | 1.73M | } |
Unexecuted instantiation: highbd_inv_txfm_sse4.c:xx_load_128 Unexecuted instantiation: selfguided_sse4.c:xx_load_128 Unexecuted instantiation: av1_inv_txfm_avx2.c:xx_load_128 Unexecuted instantiation: convolve_2d_avx2.c:xx_load_128 Unexecuted instantiation: convolve_avx2.c:xx_load_128 Unexecuted instantiation: highbd_inv_txfm_avx2.c:xx_load_128 Unexecuted instantiation: jnt_convolve_avx2.c:xx_load_128 Unexecuted instantiation: reconinter_avx2.c:xx_load_128 Unexecuted instantiation: resize_avx2.c:xx_load_128 Unexecuted instantiation: selfguided_avx2.c:xx_load_128 Unexecuted instantiation: warp_plane_avx2.c:xx_load_128 Unexecuted instantiation: wiener_convolve_avx2.c:xx_load_128 Unexecuted instantiation: highbd_convolve_2d_avx2.c:xx_load_128 Unexecuted instantiation: highbd_jnt_convolve_avx2.c:xx_load_128 Unexecuted instantiation: highbd_wiener_convolve_avx2.c:xx_load_128 |
44 | | |
45 | 830M | static inline __m128i xx_loadu_128(const void *a) { |
46 | 830M | return _mm_loadu_si128((const __m128i *)a); |
47 | 830M | } Unexecuted instantiation: loopfilter_sse2.c:xx_loadu_128 blend_a64_mask_sse4.c:xx_loadu_128 Line | Count | Source | 45 | 19.0M | static inline __m128i xx_loadu_128(const void *a) { | 46 | 19.0M | return _mm_loadu_si128((const __m128i *)a); | 47 | 19.0M | } |
blend_a64_vmask_sse4.c:xx_loadu_128 Line | Count | Source | 45 | 6.84M | static inline __m128i xx_loadu_128(const void *a) { | 46 | 6.84M | return _mm_loadu_si128((const __m128i *)a); | 47 | 6.84M | } |
Unexecuted instantiation: aom_subpixel_8t_intrin_avx2.c:xx_loadu_128 blend_a64_mask_avx2.c:xx_loadu_128 Line | Count | Source | 45 | 8.34M | static inline __m128i xx_loadu_128(const void *a) { | 46 | 8.34M | return _mm_loadu_si128((const __m128i *)a); | 47 | 8.34M | } |
Unexecuted instantiation: highbd_convolve_avx2.c:xx_loadu_128 Unexecuted instantiation: convolve_sse2.c:xx_loadu_128 Unexecuted instantiation: jnt_convolve_sse2.c:xx_loadu_128 Unexecuted instantiation: resize_sse2.c:xx_loadu_128 Unexecuted instantiation: av1_inv_txfm_ssse3.c:xx_loadu_128 Unexecuted instantiation: reconinter_ssse3.c:xx_loadu_128 av1_convolve_horiz_rs_sse4.c:xx_loadu_128 Line | Count | Source | 45 | 442M | static inline __m128i xx_loadu_128(const void *a) { | 46 | 442M | return _mm_loadu_si128((const __m128i *)a); | 47 | 442M | } |
Unexecuted instantiation: filterintra_sse4.c:xx_loadu_128 Unexecuted instantiation: highbd_inv_txfm_sse4.c:xx_loadu_128 Unexecuted instantiation: selfguided_sse4.c:xx_loadu_128 Unexecuted instantiation: av1_inv_txfm_avx2.c:xx_loadu_128 Unexecuted instantiation: convolve_2d_avx2.c:xx_loadu_128 Unexecuted instantiation: convolve_avx2.c:xx_loadu_128 Unexecuted instantiation: highbd_inv_txfm_avx2.c:xx_loadu_128 Unexecuted instantiation: jnt_convolve_avx2.c:xx_loadu_128 Unexecuted instantiation: reconinter_avx2.c:xx_loadu_128 Unexecuted instantiation: resize_avx2.c:xx_loadu_128 selfguided_avx2.c:xx_loadu_128 Line | Count | Source | 45 | 354M | static inline __m128i xx_loadu_128(const void *a) { | 46 | 354M | return _mm_loadu_si128((const __m128i *)a); | 47 | 354M | } |
Unexecuted instantiation: warp_plane_avx2.c:xx_loadu_128 wiener_convolve_avx2.c:xx_loadu_128 Line | Count | Source | 45 | 177k | static inline __m128i xx_loadu_128(const void *a) { | 46 | 177k | return _mm_loadu_si128((const __m128i *)a); | 47 | 177k | } |
Unexecuted instantiation: highbd_convolve_2d_avx2.c:xx_loadu_128 Unexecuted instantiation: highbd_jnt_convolve_avx2.c:xx_loadu_128 highbd_wiener_convolve_avx2.c:xx_loadu_128 Line | Count | Source | 45 | 348k | static inline __m128i xx_loadu_128(const void *a) { | 46 | 348k | return _mm_loadu_si128((const __m128i *)a); | 47 | 348k | } |
|
48 | | |
49 | | // Load 64 bits from each of hi and low, and pack into an SSE register |
50 | | // Since directly loading as `int64_t`s and using _mm_set_epi64 may violate |
51 | | // the strict aliasing rule, this takes a different approach |
52 | 0 | static inline __m128i xx_loadu_2x64(const void *hi, const void *lo) { |
53 | 0 | return _mm_unpacklo_epi64(_mm_loadl_epi64((const __m128i *)lo), |
54 | 0 | _mm_loadl_epi64((const __m128i *)hi)); |
55 | 0 | } Unexecuted instantiation: loopfilter_sse2.c:xx_loadu_2x64 Unexecuted instantiation: blend_a64_mask_sse4.c:xx_loadu_2x64 Unexecuted instantiation: blend_a64_vmask_sse4.c:xx_loadu_2x64 Unexecuted instantiation: aom_subpixel_8t_intrin_avx2.c:xx_loadu_2x64 Unexecuted instantiation: blend_a64_mask_avx2.c:xx_loadu_2x64 Unexecuted instantiation: highbd_convolve_avx2.c:xx_loadu_2x64 Unexecuted instantiation: convolve_sse2.c:xx_loadu_2x64 Unexecuted instantiation: jnt_convolve_sse2.c:xx_loadu_2x64 Unexecuted instantiation: resize_sse2.c:xx_loadu_2x64 Unexecuted instantiation: av1_inv_txfm_ssse3.c:xx_loadu_2x64 Unexecuted instantiation: reconinter_ssse3.c:xx_loadu_2x64 Unexecuted instantiation: av1_convolve_horiz_rs_sse4.c:xx_loadu_2x64 Unexecuted instantiation: filterintra_sse4.c:xx_loadu_2x64 Unexecuted instantiation: highbd_inv_txfm_sse4.c:xx_loadu_2x64 Unexecuted instantiation: selfguided_sse4.c:xx_loadu_2x64 Unexecuted instantiation: av1_inv_txfm_avx2.c:xx_loadu_2x64 Unexecuted instantiation: convolve_2d_avx2.c:xx_loadu_2x64 Unexecuted instantiation: convolve_avx2.c:xx_loadu_2x64 Unexecuted instantiation: highbd_inv_txfm_avx2.c:xx_loadu_2x64 Unexecuted instantiation: jnt_convolve_avx2.c:xx_loadu_2x64 Unexecuted instantiation: reconinter_avx2.c:xx_loadu_2x64 Unexecuted instantiation: resize_avx2.c:xx_loadu_2x64 Unexecuted instantiation: selfguided_avx2.c:xx_loadu_2x64 Unexecuted instantiation: warp_plane_avx2.c:xx_loadu_2x64 Unexecuted instantiation: wiener_convolve_avx2.c:xx_loadu_2x64 Unexecuted instantiation: highbd_convolve_2d_avx2.c:xx_loadu_2x64 Unexecuted instantiation: highbd_jnt_convolve_avx2.c:xx_loadu_2x64 Unexecuted instantiation: highbd_wiener_convolve_avx2.c:xx_loadu_2x64 |
56 | | |
57 | 27.3k | static inline void xx_storel_16(void *const a, const __m128i v) { |
58 | 27.3k | const uint16_t val = (uint16_t)_mm_cvtsi128_si32(v); |
59 | 27.3k | memcpy(a, &val, sizeof(val)); |
60 | 27.3k | } Unexecuted instantiation: loopfilter_sse2.c:xx_storel_16 Unexecuted instantiation: blend_a64_mask_sse4.c:xx_storel_16 Unexecuted instantiation: blend_a64_vmask_sse4.c:xx_storel_16 Unexecuted instantiation: aom_subpixel_8t_intrin_avx2.c:xx_storel_16 Unexecuted instantiation: blend_a64_mask_avx2.c:xx_storel_16 Unexecuted instantiation: highbd_convolve_avx2.c:xx_storel_16 Unexecuted instantiation: convolve_sse2.c:xx_storel_16 Unexecuted instantiation: jnt_convolve_sse2.c:xx_storel_16 Unexecuted instantiation: resize_sse2.c:xx_storel_16 Unexecuted instantiation: av1_inv_txfm_ssse3.c:xx_storel_16 Unexecuted instantiation: reconinter_ssse3.c:xx_storel_16 Unexecuted instantiation: av1_convolve_horiz_rs_sse4.c:xx_storel_16 Unexecuted instantiation: filterintra_sse4.c:xx_storel_16 Unexecuted instantiation: highbd_inv_txfm_sse4.c:xx_storel_16 Unexecuted instantiation: selfguided_sse4.c:xx_storel_16 Unexecuted instantiation: av1_inv_txfm_avx2.c:xx_storel_16 Unexecuted instantiation: convolve_2d_avx2.c:xx_storel_16 convolve_avx2.c:xx_storel_16 Line | Count | Source | 57 | 27.3k | static inline void xx_storel_16(void *const a, const __m128i v) { | 58 | 27.3k | const uint16_t val = (uint16_t)_mm_cvtsi128_si32(v); | 59 | 27.3k | memcpy(a, &val, sizeof(val)); | 60 | 27.3k | } |
Unexecuted instantiation: highbd_inv_txfm_avx2.c:xx_storel_16 Unexecuted instantiation: jnt_convolve_avx2.c:xx_storel_16 Unexecuted instantiation: reconinter_avx2.c:xx_storel_16 Unexecuted instantiation: resize_avx2.c:xx_storel_16 Unexecuted instantiation: selfguided_avx2.c:xx_storel_16 Unexecuted instantiation: warp_plane_avx2.c:xx_storel_16 Unexecuted instantiation: wiener_convolve_avx2.c:xx_storel_16 Unexecuted instantiation: highbd_convolve_2d_avx2.c:xx_storel_16 Unexecuted instantiation: highbd_jnt_convolve_avx2.c:xx_storel_16 Unexecuted instantiation: highbd_wiener_convolve_avx2.c:xx_storel_16 |
61 | | |
62 | 116M | static inline void xx_storel_32(void *const a, const __m128i v) { |
63 | 116M | const int val = _mm_cvtsi128_si32(v); |
64 | 116M | memcpy(a, &val, sizeof(val)); |
65 | 116M | } loopfilter_sse2.c:xx_storel_32 Line | Count | Source | 62 | 31.4M | static inline void xx_storel_32(void *const a, const __m128i v) { | 63 | 31.4M | const int val = _mm_cvtsi128_si32(v); | 64 | 31.4M | memcpy(a, &val, sizeof(val)); | 65 | 31.4M | } |
blend_a64_mask_sse4.c:xx_storel_32 Line | Count | Source | 62 | 1.74M | static inline void xx_storel_32(void *const a, const __m128i v) { | 63 | 1.74M | const int val = _mm_cvtsi128_si32(v); | 64 | 1.74M | memcpy(a, &val, sizeof(val)); | 65 | 1.74M | } |
blend_a64_vmask_sse4.c:xx_storel_32 Line | Count | Source | 62 | 161k | static inline void xx_storel_32(void *const a, const __m128i v) { | 63 | 161k | const int val = _mm_cvtsi128_si32(v); | 64 | 161k | memcpy(a, &val, sizeof(val)); | 65 | 161k | } |
Unexecuted instantiation: aom_subpixel_8t_intrin_avx2.c:xx_storel_32 blend_a64_mask_avx2.c:xx_storel_32 Line | Count | Source | 62 | 568k | static inline void xx_storel_32(void *const a, const __m128i v) { | 63 | 568k | const int val = _mm_cvtsi128_si32(v); | 64 | 568k | memcpy(a, &val, sizeof(val)); | 65 | 568k | } |
highbd_convolve_avx2.c:xx_storel_32 Line | Count | Source | 62 | 816k | static inline void xx_storel_32(void *const a, const __m128i v) { | 63 | 816k | const int val = _mm_cvtsi128_si32(v); | 64 | 816k | memcpy(a, &val, sizeof(val)); | 65 | 816k | } |
Unexecuted instantiation: convolve_sse2.c:xx_storel_32 Unexecuted instantiation: jnt_convolve_sse2.c:xx_storel_32 Unexecuted instantiation: resize_sse2.c:xx_storel_32 Unexecuted instantiation: av1_inv_txfm_ssse3.c:xx_storel_32 Unexecuted instantiation: reconinter_ssse3.c:xx_storel_32 av1_convolve_horiz_rs_sse4.c:xx_storel_32 Line | Count | Source | 62 | 60.1M | static inline void xx_storel_32(void *const a, const __m128i v) { | 63 | 60.1M | const int val = _mm_cvtsi128_si32(v); | 64 | 60.1M | memcpy(a, &val, sizeof(val)); | 65 | 60.1M | } |
filterintra_sse4.c:xx_storel_32 Line | Count | Source | 62 | 15.9M | static inline void xx_storel_32(void *const a, const __m128i v) { | 63 | 15.9M | const int val = _mm_cvtsi128_si32(v); | 64 | 15.9M | memcpy(a, &val, sizeof(val)); | 65 | 15.9M | } |
Unexecuted instantiation: highbd_inv_txfm_sse4.c:xx_storel_32 Unexecuted instantiation: selfguided_sse4.c:xx_storel_32 Unexecuted instantiation: av1_inv_txfm_avx2.c:xx_storel_32 convolve_2d_avx2.c:xx_storel_32 Line | Count | Source | 62 | 2.70M | static inline void xx_storel_32(void *const a, const __m128i v) { | 63 | 2.70M | const int val = _mm_cvtsi128_si32(v); | 64 | 2.70M | memcpy(a, &val, sizeof(val)); | 65 | 2.70M | } |
convolve_avx2.c:xx_storel_32 Line | Count | Source | 62 | 1.10M | static inline void xx_storel_32(void *const a, const __m128i v) { | 63 | 1.10M | const int val = _mm_cvtsi128_si32(v); | 64 | 1.10M | memcpy(a, &val, sizeof(val)); | 65 | 1.10M | } |
Unexecuted instantiation: highbd_inv_txfm_avx2.c:xx_storel_32 Unexecuted instantiation: jnt_convolve_avx2.c:xx_storel_32 Unexecuted instantiation: reconinter_avx2.c:xx_storel_32 Unexecuted instantiation: resize_avx2.c:xx_storel_32 Unexecuted instantiation: selfguided_avx2.c:xx_storel_32 Unexecuted instantiation: warp_plane_avx2.c:xx_storel_32 Unexecuted instantiation: wiener_convolve_avx2.c:xx_storel_32 highbd_convolve_2d_avx2.c:xx_storel_32 Line | Count | Source | 62 | 1.60M | static inline void xx_storel_32(void *const a, const __m128i v) { | 63 | 1.60M | const int val = _mm_cvtsi128_si32(v); | 64 | 1.60M | memcpy(a, &val, sizeof(val)); | 65 | 1.60M | } |
Unexecuted instantiation: highbd_jnt_convolve_avx2.c:xx_storel_32 Unexecuted instantiation: highbd_wiener_convolve_avx2.c:xx_storel_32 |
66 | | |
67 | 114M | static inline void xx_storel_64(void *const a, const __m128i v) { |
68 | 114M | _mm_storel_epi64((__m128i *)a, v); |
69 | 114M | } Unexecuted instantiation: loopfilter_sse2.c:xx_storel_64 blend_a64_mask_sse4.c:xx_storel_64 Line | Count | Source | 67 | 3.75M | static inline void xx_storel_64(void *const a, const __m128i v) { | 68 | 3.75M | _mm_storel_epi64((__m128i *)a, v); | 69 | 3.75M | } |
blend_a64_vmask_sse4.c:xx_storel_64 Line | Count | Source | 67 | 1.07M | static inline void xx_storel_64(void *const a, const __m128i v) { | 68 | 1.07M | _mm_storel_epi64((__m128i *)a, v); | 69 | 1.07M | } |
Unexecuted instantiation: aom_subpixel_8t_intrin_avx2.c:xx_storel_64 blend_a64_mask_avx2.c:xx_storel_64 Line | Count | Source | 67 | 1.46M | static inline void xx_storel_64(void *const a, const __m128i v) { | 68 | 1.46M | _mm_storel_epi64((__m128i *)a, v); | 69 | 1.46M | } |
Unexecuted instantiation: highbd_convolve_avx2.c:xx_storel_64 Unexecuted instantiation: convolve_sse2.c:xx_storel_64 Unexecuted instantiation: jnt_convolve_sse2.c:xx_storel_64 Unexecuted instantiation: resize_sse2.c:xx_storel_64 Unexecuted instantiation: av1_inv_txfm_ssse3.c:xx_storel_64 Unexecuted instantiation: reconinter_ssse3.c:xx_storel_64 av1_convolve_horiz_rs_sse4.c:xx_storel_64 Line | Count | Source | 67 | 107M | static inline void xx_storel_64(void *const a, const __m128i v) { | 68 | 107M | _mm_storel_epi64((__m128i *)a, v); | 69 | 107M | } |
Unexecuted instantiation: filterintra_sse4.c:xx_storel_64 Unexecuted instantiation: highbd_inv_txfm_sse4.c:xx_storel_64 Unexecuted instantiation: selfguided_sse4.c:xx_storel_64 Unexecuted instantiation: av1_inv_txfm_avx2.c:xx_storel_64 Unexecuted instantiation: convolve_2d_avx2.c:xx_storel_64 Unexecuted instantiation: convolve_avx2.c:xx_storel_64 Unexecuted instantiation: highbd_inv_txfm_avx2.c:xx_storel_64 Unexecuted instantiation: jnt_convolve_avx2.c:xx_storel_64 Unexecuted instantiation: reconinter_avx2.c:xx_storel_64 Unexecuted instantiation: resize_avx2.c:xx_storel_64 Unexecuted instantiation: selfguided_avx2.c:xx_storel_64 Unexecuted instantiation: warp_plane_avx2.c:xx_storel_64 Unexecuted instantiation: wiener_convolve_avx2.c:xx_storel_64 Unexecuted instantiation: highbd_convolve_2d_avx2.c:xx_storel_64 Unexecuted instantiation: highbd_jnt_convolve_avx2.c:xx_storel_64 Unexecuted instantiation: highbd_wiener_convolve_avx2.c:xx_storel_64 |
70 | | |
71 | 0 | static inline void xx_store_128(void *const a, const __m128i v) { |
72 | 0 | _mm_store_si128((__m128i *)a, v); |
73 | 0 | } Unexecuted instantiation: loopfilter_sse2.c:xx_store_128 Unexecuted instantiation: blend_a64_mask_sse4.c:xx_store_128 Unexecuted instantiation: blend_a64_vmask_sse4.c:xx_store_128 Unexecuted instantiation: aom_subpixel_8t_intrin_avx2.c:xx_store_128 Unexecuted instantiation: blend_a64_mask_avx2.c:xx_store_128 Unexecuted instantiation: highbd_convolve_avx2.c:xx_store_128 Unexecuted instantiation: convolve_sse2.c:xx_store_128 Unexecuted instantiation: jnt_convolve_sse2.c:xx_store_128 Unexecuted instantiation: resize_sse2.c:xx_store_128 Unexecuted instantiation: av1_inv_txfm_ssse3.c:xx_store_128 Unexecuted instantiation: reconinter_ssse3.c:xx_store_128 Unexecuted instantiation: av1_convolve_horiz_rs_sse4.c:xx_store_128 Unexecuted instantiation: filterintra_sse4.c:xx_store_128 Unexecuted instantiation: highbd_inv_txfm_sse4.c:xx_store_128 Unexecuted instantiation: selfguided_sse4.c:xx_store_128 Unexecuted instantiation: av1_inv_txfm_avx2.c:xx_store_128 Unexecuted instantiation: convolve_2d_avx2.c:xx_store_128 Unexecuted instantiation: convolve_avx2.c:xx_store_128 Unexecuted instantiation: highbd_inv_txfm_avx2.c:xx_store_128 Unexecuted instantiation: jnt_convolve_avx2.c:xx_store_128 Unexecuted instantiation: reconinter_avx2.c:xx_store_128 Unexecuted instantiation: resize_avx2.c:xx_store_128 Unexecuted instantiation: selfguided_avx2.c:xx_store_128 Unexecuted instantiation: warp_plane_avx2.c:xx_store_128 Unexecuted instantiation: wiener_convolve_avx2.c:xx_store_128 Unexecuted instantiation: highbd_convolve_2d_avx2.c:xx_store_128 Unexecuted instantiation: highbd_jnt_convolve_avx2.c:xx_store_128 Unexecuted instantiation: highbd_wiener_convolve_avx2.c:xx_store_128 |
74 | | |
75 | 33.5M | static inline void xx_storeu_128(void *const a, const __m128i v) { |
76 | 33.5M | _mm_storeu_si128((__m128i *)a, v); |
77 | 33.5M | } Unexecuted instantiation: loopfilter_sse2.c:xx_storeu_128 blend_a64_mask_sse4.c:xx_storeu_128 Line | Count | Source | 75 | 8.94M | static inline void xx_storeu_128(void *const a, const __m128i v) { | 76 | 8.94M | _mm_storeu_si128((__m128i *)a, v); | 77 | 8.94M | } |
blend_a64_vmask_sse4.c:xx_storeu_128 Line | Count | Source | 75 | 4.34M | static inline void xx_storeu_128(void *const a, const __m128i v) { | 76 | 4.34M | _mm_storeu_si128((__m128i *)a, v); | 77 | 4.34M | } |
Unexecuted instantiation: aom_subpixel_8t_intrin_avx2.c:xx_storeu_128 blend_a64_mask_avx2.c:xx_storeu_128 Line | Count | Source | 75 | 1.09M | static inline void xx_storeu_128(void *const a, const __m128i v) { | 76 | 1.09M | _mm_storeu_si128((__m128i *)a, v); | 77 | 1.09M | } |
Unexecuted instantiation: highbd_convolve_avx2.c:xx_storeu_128 Unexecuted instantiation: convolve_sse2.c:xx_storeu_128 Unexecuted instantiation: jnt_convolve_sse2.c:xx_storeu_128 Unexecuted instantiation: resize_sse2.c:xx_storeu_128 Unexecuted instantiation: av1_inv_txfm_ssse3.c:xx_storeu_128 Unexecuted instantiation: reconinter_ssse3.c:xx_storeu_128 Unexecuted instantiation: av1_convolve_horiz_rs_sse4.c:xx_storeu_128 Unexecuted instantiation: filterintra_sse4.c:xx_storeu_128 Unexecuted instantiation: highbd_inv_txfm_sse4.c:xx_storeu_128 Unexecuted instantiation: selfguided_sse4.c:xx_storeu_128 Unexecuted instantiation: av1_inv_txfm_avx2.c:xx_storeu_128 Unexecuted instantiation: convolve_2d_avx2.c:xx_storeu_128 Unexecuted instantiation: convolve_avx2.c:xx_storeu_128 Unexecuted instantiation: highbd_inv_txfm_avx2.c:xx_storeu_128 Unexecuted instantiation: jnt_convolve_avx2.c:xx_storeu_128 Unexecuted instantiation: reconinter_avx2.c:xx_storeu_128 Unexecuted instantiation: resize_avx2.c:xx_storeu_128 selfguided_avx2.c:xx_storeu_128 Line | Count | Source | 75 | 19.1M | static inline void xx_storeu_128(void *const a, const __m128i v) { | 76 | 19.1M | _mm_storeu_si128((__m128i *)a, v); | 77 | 19.1M | } |
Unexecuted instantiation: warp_plane_avx2.c:xx_storeu_128 Unexecuted instantiation: wiener_convolve_avx2.c:xx_storeu_128 Unexecuted instantiation: highbd_convolve_2d_avx2.c:xx_storeu_128 Unexecuted instantiation: highbd_jnt_convolve_avx2.c:xx_storeu_128 Unexecuted instantiation: highbd_wiener_convolve_avx2.c:xx_storeu_128 |
78 | | |
79 | | // Fill an SSE register using an interleaved pair of values, ie. set the |
80 | | // 8 channels to {a, b, a, b, a, b, a, b}, using the same channel ordering |
81 | | // as when a register is stored to / loaded from memory. |
82 | | // |
83 | | // This is useful for rearranging filter kernels for use with the _mm_madd_epi16 |
84 | | // instruction |
85 | 0 | static inline __m128i xx_set2_epi16(int16_t a, int16_t b) { |
86 | 0 | return _mm_setr_epi16(a, b, a, b, a, b, a, b); |
87 | 0 | } Unexecuted instantiation: loopfilter_sse2.c:xx_set2_epi16 Unexecuted instantiation: blend_a64_mask_sse4.c:xx_set2_epi16 Unexecuted instantiation: blend_a64_vmask_sse4.c:xx_set2_epi16 Unexecuted instantiation: aom_subpixel_8t_intrin_avx2.c:xx_set2_epi16 Unexecuted instantiation: blend_a64_mask_avx2.c:xx_set2_epi16 Unexecuted instantiation: highbd_convolve_avx2.c:xx_set2_epi16 Unexecuted instantiation: convolve_sse2.c:xx_set2_epi16 Unexecuted instantiation: jnt_convolve_sse2.c:xx_set2_epi16 Unexecuted instantiation: resize_sse2.c:xx_set2_epi16 Unexecuted instantiation: av1_inv_txfm_ssse3.c:xx_set2_epi16 Unexecuted instantiation: reconinter_ssse3.c:xx_set2_epi16 Unexecuted instantiation: av1_convolve_horiz_rs_sse4.c:xx_set2_epi16 Unexecuted instantiation: filterintra_sse4.c:xx_set2_epi16 Unexecuted instantiation: highbd_inv_txfm_sse4.c:xx_set2_epi16 Unexecuted instantiation: selfguided_sse4.c:xx_set2_epi16 Unexecuted instantiation: av1_inv_txfm_avx2.c:xx_set2_epi16 Unexecuted instantiation: convolve_2d_avx2.c:xx_set2_epi16 Unexecuted instantiation: convolve_avx2.c:xx_set2_epi16 Unexecuted instantiation: highbd_inv_txfm_avx2.c:xx_set2_epi16 Unexecuted instantiation: jnt_convolve_avx2.c:xx_set2_epi16 Unexecuted instantiation: reconinter_avx2.c:xx_set2_epi16 Unexecuted instantiation: resize_avx2.c:xx_set2_epi16 Unexecuted instantiation: selfguided_avx2.c:xx_set2_epi16 Unexecuted instantiation: warp_plane_avx2.c:xx_set2_epi16 Unexecuted instantiation: wiener_convolve_avx2.c:xx_set2_epi16 Unexecuted instantiation: highbd_convolve_2d_avx2.c:xx_set2_epi16 Unexecuted instantiation: highbd_jnt_convolve_avx2.c:xx_set2_epi16 Unexecuted instantiation: highbd_wiener_convolve_avx2.c:xx_set2_epi16 |
88 | | |
89 | 815k | static inline __m128i xx_round_epu16(__m128i v_val_w) { |
90 | 815k | return _mm_avg_epu16(v_val_w, _mm_setzero_si128()); |
91 | 815k | } Unexecuted instantiation: loopfilter_sse2.c:xx_round_epu16 blend_a64_mask_sse4.c:xx_round_epu16 Line | Count | Source | 89 | 558k | static inline __m128i xx_round_epu16(__m128i v_val_w) { | 90 | 558k | return _mm_avg_epu16(v_val_w, _mm_setzero_si128()); | 91 | 558k | } |
blend_a64_vmask_sse4.c:xx_round_epu16 Line | Count | Source | 89 | 256k | static inline __m128i xx_round_epu16(__m128i v_val_w) { | 90 | 256k | return _mm_avg_epu16(v_val_w, _mm_setzero_si128()); | 91 | 256k | } |
Unexecuted instantiation: aom_subpixel_8t_intrin_avx2.c:xx_round_epu16 Unexecuted instantiation: blend_a64_mask_avx2.c:xx_round_epu16 Unexecuted instantiation: highbd_convolve_avx2.c:xx_round_epu16 Unexecuted instantiation: convolve_sse2.c:xx_round_epu16 Unexecuted instantiation: jnt_convolve_sse2.c:xx_round_epu16 Unexecuted instantiation: resize_sse2.c:xx_round_epu16 Unexecuted instantiation: av1_inv_txfm_ssse3.c:xx_round_epu16 Unexecuted instantiation: reconinter_ssse3.c:xx_round_epu16 Unexecuted instantiation: av1_convolve_horiz_rs_sse4.c:xx_round_epu16 Unexecuted instantiation: filterintra_sse4.c:xx_round_epu16 Unexecuted instantiation: highbd_inv_txfm_sse4.c:xx_round_epu16 Unexecuted instantiation: selfguided_sse4.c:xx_round_epu16 Unexecuted instantiation: av1_inv_txfm_avx2.c:xx_round_epu16 Unexecuted instantiation: convolve_2d_avx2.c:xx_round_epu16 Unexecuted instantiation: convolve_avx2.c:xx_round_epu16 Unexecuted instantiation: highbd_inv_txfm_avx2.c:xx_round_epu16 Unexecuted instantiation: jnt_convolve_avx2.c:xx_round_epu16 Unexecuted instantiation: reconinter_avx2.c:xx_round_epu16 Unexecuted instantiation: resize_avx2.c:xx_round_epu16 Unexecuted instantiation: selfguided_avx2.c:xx_round_epu16 Unexecuted instantiation: warp_plane_avx2.c:xx_round_epu16 Unexecuted instantiation: wiener_convolve_avx2.c:xx_round_epu16 Unexecuted instantiation: highbd_convolve_2d_avx2.c:xx_round_epu16 Unexecuted instantiation: highbd_jnt_convolve_avx2.c:xx_round_epu16 Unexecuted instantiation: highbd_wiener_convolve_avx2.c:xx_round_epu16 |
92 | | |
93 | 17.4M | static inline __m128i xx_roundn_epu16(__m128i v_val_w, int bits) { |
94 | 17.4M | const __m128i v_s_w = _mm_srli_epi16(v_val_w, bits - 1); |
95 | 17.4M | return _mm_avg_epu16(v_s_w, _mm_setzero_si128()); |
96 | 17.4M | } Unexecuted instantiation: loopfilter_sse2.c:xx_roundn_epu16 blend_a64_mask_sse4.c:xx_roundn_epu16 Line | Count | Source | 93 | 10.9M | static inline __m128i xx_roundn_epu16(__m128i v_val_w, int bits) { | 94 | 10.9M | const __m128i v_s_w = _mm_srli_epi16(v_val_w, bits - 1); | 95 | 10.9M | return _mm_avg_epu16(v_s_w, _mm_setzero_si128()); | 96 | 10.9M | } |
blend_a64_vmask_sse4.c:xx_roundn_epu16 Line | Count | Source | 93 | 6.25M | static inline __m128i xx_roundn_epu16(__m128i v_val_w, int bits) { | 94 | 6.25M | const __m128i v_s_w = _mm_srli_epi16(v_val_w, bits - 1); | 95 | 6.25M | return _mm_avg_epu16(v_s_w, _mm_setzero_si128()); | 96 | 6.25M | } |
Unexecuted instantiation: aom_subpixel_8t_intrin_avx2.c:xx_roundn_epu16 blend_a64_mask_avx2.c:xx_roundn_epu16 Line | Count | Source | 93 | 329k | static inline __m128i xx_roundn_epu16(__m128i v_val_w, int bits) { | 94 | 329k | const __m128i v_s_w = _mm_srli_epi16(v_val_w, bits - 1); | 95 | 329k | return _mm_avg_epu16(v_s_w, _mm_setzero_si128()); | 96 | 329k | } |
Unexecuted instantiation: highbd_convolve_avx2.c:xx_roundn_epu16 Unexecuted instantiation: convolve_sse2.c:xx_roundn_epu16 Unexecuted instantiation: jnt_convolve_sse2.c:xx_roundn_epu16 Unexecuted instantiation: resize_sse2.c:xx_roundn_epu16 Unexecuted instantiation: av1_inv_txfm_ssse3.c:xx_roundn_epu16 Unexecuted instantiation: reconinter_ssse3.c:xx_roundn_epu16 Unexecuted instantiation: av1_convolve_horiz_rs_sse4.c:xx_roundn_epu16 Unexecuted instantiation: filterintra_sse4.c:xx_roundn_epu16 Unexecuted instantiation: highbd_inv_txfm_sse4.c:xx_roundn_epu16 Unexecuted instantiation: selfguided_sse4.c:xx_roundn_epu16 Unexecuted instantiation: av1_inv_txfm_avx2.c:xx_roundn_epu16 Unexecuted instantiation: convolve_2d_avx2.c:xx_roundn_epu16 Unexecuted instantiation: convolve_avx2.c:xx_roundn_epu16 Unexecuted instantiation: highbd_inv_txfm_avx2.c:xx_roundn_epu16 Unexecuted instantiation: jnt_convolve_avx2.c:xx_roundn_epu16 Unexecuted instantiation: reconinter_avx2.c:xx_roundn_epu16 Unexecuted instantiation: resize_avx2.c:xx_roundn_epu16 Unexecuted instantiation: selfguided_avx2.c:xx_roundn_epu16 Unexecuted instantiation: warp_plane_avx2.c:xx_roundn_epu16 Unexecuted instantiation: wiener_convolve_avx2.c:xx_roundn_epu16 Unexecuted instantiation: highbd_convolve_2d_avx2.c:xx_roundn_epu16 Unexecuted instantiation: highbd_jnt_convolve_avx2.c:xx_roundn_epu16 Unexecuted instantiation: highbd_wiener_convolve_avx2.c:xx_roundn_epu16 |
97 | | |
98 | 0 | static inline __m128i xx_roundn_epu32(__m128i v_val_d, int bits) { |
99 | 0 | const __m128i v_bias_d = _mm_set1_epi32((1 << bits) >> 1); |
100 | 0 | const __m128i v_tmp_d = _mm_add_epi32(v_val_d, v_bias_d); |
101 | 0 | return _mm_srli_epi32(v_tmp_d, bits); |
102 | 0 | } Unexecuted instantiation: loopfilter_sse2.c:xx_roundn_epu32 Unexecuted instantiation: blend_a64_mask_sse4.c:xx_roundn_epu32 Unexecuted instantiation: blend_a64_vmask_sse4.c:xx_roundn_epu32 Unexecuted instantiation: aom_subpixel_8t_intrin_avx2.c:xx_roundn_epu32 Unexecuted instantiation: blend_a64_mask_avx2.c:xx_roundn_epu32 Unexecuted instantiation: highbd_convolve_avx2.c:xx_roundn_epu32 Unexecuted instantiation: convolve_sse2.c:xx_roundn_epu32 Unexecuted instantiation: jnt_convolve_sse2.c:xx_roundn_epu32 Unexecuted instantiation: resize_sse2.c:xx_roundn_epu32 Unexecuted instantiation: av1_inv_txfm_ssse3.c:xx_roundn_epu32 Unexecuted instantiation: reconinter_ssse3.c:xx_roundn_epu32 Unexecuted instantiation: av1_convolve_horiz_rs_sse4.c:xx_roundn_epu32 Unexecuted instantiation: filterintra_sse4.c:xx_roundn_epu32 Unexecuted instantiation: highbd_inv_txfm_sse4.c:xx_roundn_epu32 Unexecuted instantiation: selfguided_sse4.c:xx_roundn_epu32 Unexecuted instantiation: av1_inv_txfm_avx2.c:xx_roundn_epu32 Unexecuted instantiation: convolve_2d_avx2.c:xx_roundn_epu32 Unexecuted instantiation: convolve_avx2.c:xx_roundn_epu32 Unexecuted instantiation: highbd_inv_txfm_avx2.c:xx_roundn_epu32 Unexecuted instantiation: jnt_convolve_avx2.c:xx_roundn_epu32 Unexecuted instantiation: reconinter_avx2.c:xx_roundn_epu32 Unexecuted instantiation: resize_avx2.c:xx_roundn_epu32 Unexecuted instantiation: selfguided_avx2.c:xx_roundn_epu32 Unexecuted instantiation: warp_plane_avx2.c:xx_roundn_epu32 Unexecuted instantiation: wiener_convolve_avx2.c:xx_roundn_epu32 Unexecuted instantiation: highbd_convolve_2d_avx2.c:xx_roundn_epu32 Unexecuted instantiation: highbd_jnt_convolve_avx2.c:xx_roundn_epu32 Unexecuted instantiation: highbd_wiener_convolve_avx2.c:xx_roundn_epu32 |
103 | | |
104 | 15.9M | static inline __m128i xx_roundn_epi16_unsigned(__m128i v_val_d, int bits) { |
105 | 15.9M | const __m128i v_bias_d = _mm_set1_epi16((1 << bits) >> 1); |
106 | 15.9M | const __m128i v_tmp_d = _mm_add_epi16(v_val_d, v_bias_d); |
107 | 15.9M | return _mm_srai_epi16(v_tmp_d, bits); |
108 | 15.9M | } Unexecuted instantiation: loopfilter_sse2.c:xx_roundn_epi16_unsigned Unexecuted instantiation: blend_a64_mask_sse4.c:xx_roundn_epi16_unsigned Unexecuted instantiation: blend_a64_vmask_sse4.c:xx_roundn_epi16_unsigned Unexecuted instantiation: aom_subpixel_8t_intrin_avx2.c:xx_roundn_epi16_unsigned Unexecuted instantiation: blend_a64_mask_avx2.c:xx_roundn_epi16_unsigned Unexecuted instantiation: highbd_convolve_avx2.c:xx_roundn_epi16_unsigned Unexecuted instantiation: convolve_sse2.c:xx_roundn_epi16_unsigned Unexecuted instantiation: jnt_convolve_sse2.c:xx_roundn_epi16_unsigned Unexecuted instantiation: resize_sse2.c:xx_roundn_epi16_unsigned Unexecuted instantiation: av1_inv_txfm_ssse3.c:xx_roundn_epi16_unsigned Unexecuted instantiation: reconinter_ssse3.c:xx_roundn_epi16_unsigned Unexecuted instantiation: av1_convolve_horiz_rs_sse4.c:xx_roundn_epi16_unsigned filterintra_sse4.c:xx_roundn_epi16_unsigned Line | Count | Source | 104 | 15.9M | static inline __m128i xx_roundn_epi16_unsigned(__m128i v_val_d, int bits) { | 105 | 15.9M | const __m128i v_bias_d = _mm_set1_epi16((1 << bits) >> 1); | 106 | 15.9M | const __m128i v_tmp_d = _mm_add_epi16(v_val_d, v_bias_d); | 107 | 15.9M | return _mm_srai_epi16(v_tmp_d, bits); | 108 | 15.9M | } |
Unexecuted instantiation: highbd_inv_txfm_sse4.c:xx_roundn_epi16_unsigned Unexecuted instantiation: selfguided_sse4.c:xx_roundn_epi16_unsigned Unexecuted instantiation: av1_inv_txfm_avx2.c:xx_roundn_epi16_unsigned Unexecuted instantiation: convolve_2d_avx2.c:xx_roundn_epi16_unsigned Unexecuted instantiation: convolve_avx2.c:xx_roundn_epi16_unsigned Unexecuted instantiation: highbd_inv_txfm_avx2.c:xx_roundn_epi16_unsigned Unexecuted instantiation: jnt_convolve_avx2.c:xx_roundn_epi16_unsigned Unexecuted instantiation: reconinter_avx2.c:xx_roundn_epi16_unsigned Unexecuted instantiation: resize_avx2.c:xx_roundn_epi16_unsigned Unexecuted instantiation: selfguided_avx2.c:xx_roundn_epi16_unsigned Unexecuted instantiation: warp_plane_avx2.c:xx_roundn_epi16_unsigned Unexecuted instantiation: wiener_convolve_avx2.c:xx_roundn_epi16_unsigned Unexecuted instantiation: highbd_convolve_2d_avx2.c:xx_roundn_epi16_unsigned Unexecuted instantiation: highbd_jnt_convolve_avx2.c:xx_roundn_epi16_unsigned Unexecuted instantiation: highbd_wiener_convolve_avx2.c:xx_roundn_epi16_unsigned |
109 | | |
110 | | // This is equivalent to ROUND_POWER_OF_TWO(v_val_d, bits) |
111 | 0 | static inline __m128i xx_roundn_epi32_unsigned(__m128i v_val_d, int bits) { |
112 | 0 | const __m128i v_bias_d = _mm_set1_epi32((1 << bits) >> 1); |
113 | 0 | const __m128i v_tmp_d = _mm_add_epi32(v_val_d, v_bias_d); |
114 | 0 | return _mm_srai_epi32(v_tmp_d, bits); |
115 | 0 | } Unexecuted instantiation: loopfilter_sse2.c:xx_roundn_epi32_unsigned Unexecuted instantiation: blend_a64_mask_sse4.c:xx_roundn_epi32_unsigned Unexecuted instantiation: blend_a64_vmask_sse4.c:xx_roundn_epi32_unsigned Unexecuted instantiation: aom_subpixel_8t_intrin_avx2.c:xx_roundn_epi32_unsigned Unexecuted instantiation: blend_a64_mask_avx2.c:xx_roundn_epi32_unsigned Unexecuted instantiation: highbd_convolve_avx2.c:xx_roundn_epi32_unsigned Unexecuted instantiation: convolve_sse2.c:xx_roundn_epi32_unsigned Unexecuted instantiation: jnt_convolve_sse2.c:xx_roundn_epi32_unsigned Unexecuted instantiation: resize_sse2.c:xx_roundn_epi32_unsigned Unexecuted instantiation: av1_inv_txfm_ssse3.c:xx_roundn_epi32_unsigned Unexecuted instantiation: reconinter_ssse3.c:xx_roundn_epi32_unsigned Unexecuted instantiation: av1_convolve_horiz_rs_sse4.c:xx_roundn_epi32_unsigned Unexecuted instantiation: filterintra_sse4.c:xx_roundn_epi32_unsigned Unexecuted instantiation: highbd_inv_txfm_sse4.c:xx_roundn_epi32_unsigned Unexecuted instantiation: selfguided_sse4.c:xx_roundn_epi32_unsigned Unexecuted instantiation: av1_inv_txfm_avx2.c:xx_roundn_epi32_unsigned Unexecuted instantiation: convolve_2d_avx2.c:xx_roundn_epi32_unsigned Unexecuted instantiation: convolve_avx2.c:xx_roundn_epi32_unsigned Unexecuted instantiation: highbd_inv_txfm_avx2.c:xx_roundn_epi32_unsigned Unexecuted instantiation: jnt_convolve_avx2.c:xx_roundn_epi32_unsigned Unexecuted instantiation: reconinter_avx2.c:xx_roundn_epi32_unsigned Unexecuted instantiation: resize_avx2.c:xx_roundn_epi32_unsigned Unexecuted instantiation: selfguided_avx2.c:xx_roundn_epi32_unsigned Unexecuted instantiation: warp_plane_avx2.c:xx_roundn_epi32_unsigned Unexecuted instantiation: wiener_convolve_avx2.c:xx_roundn_epi32_unsigned Unexecuted instantiation: highbd_convolve_2d_avx2.c:xx_roundn_epi32_unsigned Unexecuted instantiation: highbd_jnt_convolve_avx2.c:xx_roundn_epi32_unsigned Unexecuted instantiation: highbd_wiener_convolve_avx2.c:xx_roundn_epi32_unsigned |
116 | | |
117 | 0 | static inline __m128i xx_roundn_epi16(__m128i v_val_d, int bits) { |
118 | 0 | const __m128i v_bias_d = _mm_set1_epi16((1 << bits) >> 1); |
119 | 0 | const __m128i v_sign_d = _mm_srai_epi16(v_val_d, 15); |
120 | 0 | const __m128i v_tmp_d = |
121 | 0 | _mm_add_epi16(_mm_add_epi16(v_val_d, v_bias_d), v_sign_d); |
122 | 0 | return _mm_srai_epi16(v_tmp_d, bits); |
123 | 0 | } Unexecuted instantiation: loopfilter_sse2.c:xx_roundn_epi16 Unexecuted instantiation: blend_a64_mask_sse4.c:xx_roundn_epi16 Unexecuted instantiation: blend_a64_vmask_sse4.c:xx_roundn_epi16 Unexecuted instantiation: aom_subpixel_8t_intrin_avx2.c:xx_roundn_epi16 Unexecuted instantiation: blend_a64_mask_avx2.c:xx_roundn_epi16 Unexecuted instantiation: highbd_convolve_avx2.c:xx_roundn_epi16 Unexecuted instantiation: convolve_sse2.c:xx_roundn_epi16 Unexecuted instantiation: jnt_convolve_sse2.c:xx_roundn_epi16 Unexecuted instantiation: resize_sse2.c:xx_roundn_epi16 Unexecuted instantiation: av1_inv_txfm_ssse3.c:xx_roundn_epi16 Unexecuted instantiation: reconinter_ssse3.c:xx_roundn_epi16 Unexecuted instantiation: av1_convolve_horiz_rs_sse4.c:xx_roundn_epi16 Unexecuted instantiation: filterintra_sse4.c:xx_roundn_epi16 Unexecuted instantiation: highbd_inv_txfm_sse4.c:xx_roundn_epi16 Unexecuted instantiation: selfguided_sse4.c:xx_roundn_epi16 Unexecuted instantiation: av1_inv_txfm_avx2.c:xx_roundn_epi16 Unexecuted instantiation: convolve_2d_avx2.c:xx_roundn_epi16 Unexecuted instantiation: convolve_avx2.c:xx_roundn_epi16 Unexecuted instantiation: highbd_inv_txfm_avx2.c:xx_roundn_epi16 Unexecuted instantiation: jnt_convolve_avx2.c:xx_roundn_epi16 Unexecuted instantiation: reconinter_avx2.c:xx_roundn_epi16 Unexecuted instantiation: resize_avx2.c:xx_roundn_epi16 Unexecuted instantiation: selfguided_avx2.c:xx_roundn_epi16 Unexecuted instantiation: warp_plane_avx2.c:xx_roundn_epi16 Unexecuted instantiation: wiener_convolve_avx2.c:xx_roundn_epi16 Unexecuted instantiation: highbd_convolve_2d_avx2.c:xx_roundn_epi16 Unexecuted instantiation: highbd_jnt_convolve_avx2.c:xx_roundn_epi16 Unexecuted instantiation: highbd_wiener_convolve_avx2.c:xx_roundn_epi16 |
124 | | |
125 | | #endif // AOM_AOM_DSP_X86_SYNONYMS_H_ |