/src/aom/av1/common/cfl.h
Line | Count | Source (jump to first uncovered line) |
1 | | /* |
2 | | * Copyright (c) 2016, Alliance for Open Media. All rights reserved. |
3 | | * |
4 | | * This source code is subject to the terms of the BSD 2 Clause License and |
5 | | * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License |
6 | | * was not distributed with this source code in the LICENSE file, you can |
7 | | * obtain it at www.aomedia.org/license/software. If the Alliance for Open |
8 | | * Media Patent License 1.0 was not distributed with this source code in the |
9 | | * PATENTS file, you can obtain it at www.aomedia.org/license/patent. |
10 | | */ |
11 | | |
12 | | #ifndef AOM_AV1_COMMON_CFL_H_ |
13 | | #define AOM_AV1_COMMON_CFL_H_ |
14 | | |
15 | | #include "av1/common/av1_common_int.h" |
16 | | #include "av1/common/blockd.h" |
17 | | |
18 | | // Can we use CfL for the current block? |
19 | 11.4M | static inline CFL_ALLOWED_TYPE is_cfl_allowed(const MACROBLOCKD *xd) { |
20 | 11.4M | const MB_MODE_INFO *mbmi = xd->mi[0]; |
21 | 11.4M | const BLOCK_SIZE bsize = mbmi->bsize; |
22 | 11.4M | assert(bsize < BLOCK_SIZES_ALL); |
23 | 11.4M | if (xd->lossless[mbmi->segment_id]) { |
24 | | // In lossless, CfL is available when the partition size is equal to the |
25 | | // transform size. |
26 | 68.4k | const int ssx = xd->plane[AOM_PLANE_U].subsampling_x; |
27 | 68.4k | const int ssy = xd->plane[AOM_PLANE_U].subsampling_y; |
28 | 68.4k | const int plane_bsize = get_plane_block_size(bsize, ssx, ssy); |
29 | 68.4k | return (CFL_ALLOWED_TYPE)(plane_bsize == BLOCK_4X4); |
30 | 68.4k | } |
31 | | // Spec: CfL is available to luma partitions lesser than or equal to 32x32 |
32 | 11.4M | return (CFL_ALLOWED_TYPE)(block_size_wide[bsize] <= 32 && |
33 | 11.4M | block_size_high[bsize] <= 32); |
34 | 11.4M | } Unexecuted instantiation: decodeframe.c:is_cfl_allowed decodemv.c:is_cfl_allowed Line | Count | Source | 19 | 8.71M | static inline CFL_ALLOWED_TYPE is_cfl_allowed(const MACROBLOCKD *xd) { | 20 | 8.71M | const MB_MODE_INFO *mbmi = xd->mi[0]; | 21 | 8.71M | const BLOCK_SIZE bsize = mbmi->bsize; | 22 | 8.71M | assert(bsize < BLOCK_SIZES_ALL); | 23 | 8.71M | if (xd->lossless[mbmi->segment_id]) { | 24 | | // In lossless, CfL is available when the partition size is equal to the | 25 | | // transform size. | 26 | 67.8k | const int ssx = xd->plane[AOM_PLANE_U].subsampling_x; | 27 | 67.8k | const int ssy = xd->plane[AOM_PLANE_U].subsampling_y; | 28 | 67.8k | const int plane_bsize = get_plane_block_size(bsize, ssx, ssy); | 29 | 67.8k | return (CFL_ALLOWED_TYPE)(plane_bsize == BLOCK_4X4); | 30 | 67.8k | } | 31 | | // Spec: CfL is available to luma partitions lesser than or equal to 32x32 | 32 | 8.65M | return (CFL_ALLOWED_TYPE)(block_size_wide[bsize] <= 32 && | 33 | 8.65M | block_size_high[bsize] <= 32); | 34 | 8.71M | } |
Line | Count | Source | 19 | 2.77M | static inline CFL_ALLOWED_TYPE is_cfl_allowed(const MACROBLOCKD *xd) { | 20 | 2.77M | const MB_MODE_INFO *mbmi = xd->mi[0]; | 21 | 2.77M | const BLOCK_SIZE bsize = mbmi->bsize; | 22 | 2.77M | assert(bsize < BLOCK_SIZES_ALL); | 23 | 2.77M | if (xd->lossless[mbmi->segment_id]) { | 24 | | // In lossless, CfL is available when the partition size is equal to the | 25 | | // transform size. | 26 | 618 | const int ssx = xd->plane[AOM_PLANE_U].subsampling_x; | 27 | 618 | const int ssy = xd->plane[AOM_PLANE_U].subsampling_y; | 28 | 618 | const int plane_bsize = get_plane_block_size(bsize, ssx, ssy); | 29 | 618 | return (CFL_ALLOWED_TYPE)(plane_bsize == BLOCK_4X4); | 30 | 618 | } | 31 | | // Spec: CfL is available to luma partitions lesser than or equal to 32x32 | 32 | 2.77M | return (CFL_ALLOWED_TYPE)(block_size_wide[bsize] <= 32 && | 33 | 2.77M | block_size_high[bsize] <= 32); | 34 | 2.77M | } |
Unexecuted instantiation: reconintra.c:is_cfl_allowed Unexecuted instantiation: cfl_sse2.c:is_cfl_allowed Unexecuted instantiation: cfl_ssse3.c:is_cfl_allowed Unexecuted instantiation: cfl_avx2.c:is_cfl_allowed |
35 | | |
36 | | // Do we need to save the luma pixels from the current block, |
37 | | // for a possible future CfL prediction? |
38 | | static inline CFL_ALLOWED_TYPE store_cfl_required(const AV1_COMMON *cm, |
39 | 50.3M | const MACROBLOCKD *xd) { |
40 | 50.3M | const MB_MODE_INFO *mbmi = xd->mi[0]; |
41 | | |
42 | 50.3M | if (cm->seq_params->monochrome) return CFL_DISALLOWED; |
43 | | |
44 | 48.4M | if (!xd->is_chroma_ref) { |
45 | | // For non-chroma-reference blocks, we should always store the luma pixels, |
46 | | // in case the corresponding chroma-reference block uses CfL. |
47 | | // Note that this can only happen for block sizes which are <8 on |
48 | | // their shortest side, as otherwise they would be chroma reference |
49 | | // blocks. |
50 | 2.14M | return CFL_ALLOWED; |
51 | 2.14M | } |
52 | | |
53 | | // If this block has chroma information, we know whether we're |
54 | | // actually going to perform a CfL prediction |
55 | 46.3M | return (CFL_ALLOWED_TYPE)(!is_inter_block(mbmi) && |
56 | 46.3M | mbmi->uv_mode == UV_CFL_PRED); |
57 | 48.4M | } decodeframe.c:store_cfl_required Line | Count | Source | 39 | 36.8M | const MACROBLOCKD *xd) { | 40 | 36.8M | const MB_MODE_INFO *mbmi = xd->mi[0]; | 41 | | | 42 | 36.8M | if (cm->seq_params->monochrome) return CFL_DISALLOWED; | 43 | | | 44 | 35.5M | if (!xd->is_chroma_ref) { | 45 | | // For non-chroma-reference blocks, we should always store the luma pixels, | 46 | | // in case the corresponding chroma-reference block uses CfL. | 47 | | // Note that this can only happen for block sizes which are <8 on | 48 | | // their shortest side, as otherwise they would be chroma reference | 49 | | // blocks. | 50 | 1.05M | return CFL_ALLOWED; | 51 | 1.05M | } | 52 | | | 53 | | // If this block has chroma information, we know whether we're | 54 | | // actually going to perform a CfL prediction | 55 | 34.5M | return (CFL_ALLOWED_TYPE)(!is_inter_block(mbmi) && | 56 | 34.5M | mbmi->uv_mode == UV_CFL_PRED); | 57 | 35.5M | } |
decodemv.c:store_cfl_required Line | Count | Source | 39 | 13.5M | const MACROBLOCKD *xd) { | 40 | 13.5M | const MB_MODE_INFO *mbmi = xd->mi[0]; | 41 | | | 42 | 13.5M | if (cm->seq_params->monochrome) return CFL_DISALLOWED; | 43 | | | 44 | 12.9M | if (!xd->is_chroma_ref) { | 45 | | // For non-chroma-reference blocks, we should always store the luma pixels, | 46 | | // in case the corresponding chroma-reference block uses CfL. | 47 | | // Note that this can only happen for block sizes which are <8 on | 48 | | // their shortest side, as otherwise they would be chroma reference | 49 | | // blocks. | 50 | 1.08M | return CFL_ALLOWED; | 51 | 1.08M | } | 52 | | | 53 | | // If this block has chroma information, we know whether we're | 54 | | // actually going to perform a CfL prediction | 55 | 11.8M | return (CFL_ALLOWED_TYPE)(!is_inter_block(mbmi) && | 56 | 11.8M | mbmi->uv_mode == UV_CFL_PRED); | 57 | 12.9M | } |
Unexecuted instantiation: cfl.c:store_cfl_required Unexecuted instantiation: reconintra.c:store_cfl_required Unexecuted instantiation: cfl_sse2.c:store_cfl_required Unexecuted instantiation: cfl_ssse3.c:store_cfl_required Unexecuted instantiation: cfl_avx2.c:store_cfl_required |
58 | | |
59 | 0 | static inline int get_scaled_luma_q0(int alpha_q3, int16_t pred_buf_q3) { |
60 | 0 | int scaled_luma_q6 = alpha_q3 * pred_buf_q3; |
61 | 0 | return ROUND_POWER_OF_TWO_SIGNED(scaled_luma_q6, 6); |
62 | 0 | } Unexecuted instantiation: decodeframe.c:get_scaled_luma_q0 Unexecuted instantiation: decodemv.c:get_scaled_luma_q0 Unexecuted instantiation: cfl.c:get_scaled_luma_q0 Unexecuted instantiation: reconintra.c:get_scaled_luma_q0 Unexecuted instantiation: cfl_sse2.c:get_scaled_luma_q0 Unexecuted instantiation: cfl_ssse3.c:get_scaled_luma_q0 Unexecuted instantiation: cfl_avx2.c:get_scaled_luma_q0 |
63 | | |
64 | 2.77M | static inline CFL_PRED_TYPE get_cfl_pred_type(int plane) { |
65 | 2.77M | assert(plane > 0); |
66 | 2.77M | return (CFL_PRED_TYPE)(plane - 1); |
67 | 2.77M | } Unexecuted instantiation: decodeframe.c:get_cfl_pred_type Unexecuted instantiation: decodemv.c:get_cfl_pred_type Unexecuted instantiation: cfl.c:get_cfl_pred_type reconintra.c:get_cfl_pred_type Line | Count | Source | 64 | 2.77M | static inline CFL_PRED_TYPE get_cfl_pred_type(int plane) { | 65 | 2.77M | assert(plane > 0); | 66 | 2.77M | return (CFL_PRED_TYPE)(plane - 1); | 67 | 2.77M | } |
Unexecuted instantiation: cfl_sse2.c:get_cfl_pred_type Unexecuted instantiation: cfl_ssse3.c:get_cfl_pred_type Unexecuted instantiation: cfl_avx2.c:get_cfl_pred_type |
68 | | |
69 | 284k | static inline void clear_cfl_dc_pred_cache_flags(CFL_CTX *cfl) { |
70 | 284k | cfl->use_dc_pred_cache = false; |
71 | 284k | cfl->dc_pred_is_cached[CFL_PRED_U] = false; |
72 | 284k | cfl->dc_pred_is_cached[CFL_PRED_V] = false; |
73 | 284k | } Unexecuted instantiation: decodeframe.c:clear_cfl_dc_pred_cache_flags Unexecuted instantiation: decodemv.c:clear_cfl_dc_pred_cache_flags cfl.c:clear_cfl_dc_pred_cache_flags Line | Count | Source | 69 | 284k | static inline void clear_cfl_dc_pred_cache_flags(CFL_CTX *cfl) { | 70 | 284k | cfl->use_dc_pred_cache = false; | 71 | 284k | cfl->dc_pred_is_cached[CFL_PRED_U] = false; | 72 | 284k | cfl->dc_pred_is_cached[CFL_PRED_V] = false; | 73 | 284k | } |
Unexecuted instantiation: reconintra.c:clear_cfl_dc_pred_cache_flags Unexecuted instantiation: cfl_sse2.c:clear_cfl_dc_pred_cache_flags Unexecuted instantiation: cfl_ssse3.c:clear_cfl_dc_pred_cache_flags Unexecuted instantiation: cfl_avx2.c:clear_cfl_dc_pred_cache_flags |
74 | | |
75 | | void av1_cfl_predict_block(MACROBLOCKD *const xd, uint8_t *dst, int dst_stride, |
76 | | TX_SIZE tx_size, int plane); |
77 | | |
78 | | void cfl_store_block(MACROBLOCKD *const xd, BLOCK_SIZE bsize, TX_SIZE tx_size); |
79 | | |
80 | | void cfl_store_tx(MACROBLOCKD *const xd, int row, int col, TX_SIZE tx_size, |
81 | | BLOCK_SIZE bsize); |
82 | | |
83 | | void cfl_store_dc_pred(MACROBLOCKD *const xd, const uint8_t *input, |
84 | | CFL_PRED_TYPE pred_plane, int width); |
85 | | |
86 | | void cfl_load_dc_pred(MACROBLOCKD *const xd, uint8_t *dst, int dst_stride, |
87 | | TX_SIZE tx_size, CFL_PRED_TYPE pred_plane); |
88 | | |
89 | | // Allows the CFL_SUBSAMPLE function to switch types depending on the bitdepth. |
90 | | #define CFL_lbd_TYPE uint8_t *cfl_type |
91 | | #define CFL_hbd_TYPE uint16_t *cfl_type |
92 | | |
93 | | // Declare a size-specific wrapper for the size-generic function. The compiler |
94 | | // will inline the size generic function in here, the advantage is that the size |
95 | | // will be constant allowing for loop unrolling and other constant propagated |
96 | | // goodness. |
97 | | #define CFL_SUBSAMPLE(arch, sub, bd, width, height) \ |
98 | | void cfl_subsample_##bd##_##sub##_##width##x##height##_##arch( \ |
99 | | const CFL_##bd##_TYPE, int input_stride, uint16_t *output_q3); \ |
100 | | void cfl_subsample_##bd##_##sub##_##width##x##height##_##arch( \ |
101 | 2.69M | const CFL_##bd##_TYPE, int input_stride, uint16_t *output_q3) { \ |
102 | 2.69M | cfl_luma_subsampling_##sub##_##bd##_##arch(cfl_type, input_stride, \ |
103 | 2.69M | output_q3, width, height); \ |
104 | 2.69M | } Unexecuted instantiation: cfl_subsample_lbd_420_4x4_c Unexecuted instantiation: cfl_subsample_lbd_420_8x8_c Unexecuted instantiation: cfl_subsample_lbd_420_16x16_c Unexecuted instantiation: cfl_subsample_lbd_420_32x32_c Unexecuted instantiation: cfl_subsample_lbd_420_4x8_c Unexecuted instantiation: cfl_subsample_lbd_420_8x4_c Unexecuted instantiation: cfl_subsample_lbd_420_8x16_c Unexecuted instantiation: cfl_subsample_lbd_420_16x8_c Unexecuted instantiation: cfl_subsample_lbd_420_16x32_c Unexecuted instantiation: cfl_subsample_lbd_420_32x16_c Unexecuted instantiation: cfl_subsample_lbd_420_4x16_c Unexecuted instantiation: cfl_subsample_lbd_420_16x4_c Unexecuted instantiation: cfl_subsample_lbd_420_8x32_c Unexecuted instantiation: cfl_subsample_lbd_420_32x8_c Unexecuted instantiation: cfl_subsample_lbd_422_4x4_c Unexecuted instantiation: cfl_subsample_lbd_422_8x8_c Unexecuted instantiation: cfl_subsample_lbd_422_16x16_c Unexecuted instantiation: cfl_subsample_lbd_422_32x32_c Unexecuted instantiation: cfl_subsample_lbd_422_4x8_c Unexecuted instantiation: cfl_subsample_lbd_422_8x4_c Unexecuted instantiation: cfl_subsample_lbd_422_8x16_c Unexecuted instantiation: cfl_subsample_lbd_422_16x8_c Unexecuted instantiation: cfl_subsample_lbd_422_16x32_c Unexecuted instantiation: cfl_subsample_lbd_422_32x16_c Unexecuted instantiation: cfl_subsample_lbd_422_4x16_c Unexecuted instantiation: cfl_subsample_lbd_422_16x4_c Unexecuted instantiation: cfl_subsample_lbd_422_8x32_c Unexecuted instantiation: cfl_subsample_lbd_422_32x8_c Unexecuted instantiation: cfl_subsample_lbd_444_4x4_c Unexecuted instantiation: cfl_subsample_lbd_444_8x8_c Unexecuted instantiation: cfl_subsample_lbd_444_16x16_c Unexecuted instantiation: cfl_subsample_lbd_444_32x32_c Unexecuted instantiation: cfl_subsample_lbd_444_4x8_c Unexecuted instantiation: cfl_subsample_lbd_444_8x4_c Unexecuted instantiation: cfl_subsample_lbd_444_8x16_c Unexecuted instantiation: cfl_subsample_lbd_444_16x8_c Unexecuted instantiation: cfl_subsample_lbd_444_16x32_c Unexecuted instantiation: cfl_subsample_lbd_444_32x16_c Unexecuted instantiation: cfl_subsample_lbd_444_4x16_c Unexecuted instantiation: cfl_subsample_lbd_444_16x4_c Unexecuted instantiation: cfl_subsample_lbd_444_8x32_c Unexecuted instantiation: cfl_subsample_lbd_444_32x8_c Unexecuted instantiation: cfl_subsample_hbd_420_4x4_c Unexecuted instantiation: cfl_subsample_hbd_420_8x8_c Unexecuted instantiation: cfl_subsample_hbd_420_16x16_c Unexecuted instantiation: cfl_subsample_hbd_420_32x32_c Unexecuted instantiation: cfl_subsample_hbd_420_4x8_c Unexecuted instantiation: cfl_subsample_hbd_420_8x4_c Unexecuted instantiation: cfl_subsample_hbd_420_8x16_c Unexecuted instantiation: cfl_subsample_hbd_420_16x8_c Unexecuted instantiation: cfl_subsample_hbd_420_16x32_c Unexecuted instantiation: cfl_subsample_hbd_420_32x16_c Unexecuted instantiation: cfl_subsample_hbd_420_4x16_c Unexecuted instantiation: cfl_subsample_hbd_420_16x4_c Unexecuted instantiation: cfl_subsample_hbd_420_8x32_c Unexecuted instantiation: cfl_subsample_hbd_420_32x8_c Unexecuted instantiation: cfl_subsample_hbd_422_4x4_c Unexecuted instantiation: cfl_subsample_hbd_422_8x8_c Unexecuted instantiation: cfl_subsample_hbd_422_16x16_c Unexecuted instantiation: cfl_subsample_hbd_422_32x32_c Unexecuted instantiation: cfl_subsample_hbd_422_4x8_c Unexecuted instantiation: cfl_subsample_hbd_422_8x4_c Unexecuted instantiation: cfl_subsample_hbd_422_8x16_c Unexecuted instantiation: cfl_subsample_hbd_422_16x8_c Unexecuted instantiation: cfl_subsample_hbd_422_16x32_c Unexecuted instantiation: cfl_subsample_hbd_422_32x16_c Unexecuted instantiation: cfl_subsample_hbd_422_4x16_c Unexecuted instantiation: cfl_subsample_hbd_422_16x4_c Unexecuted instantiation: cfl_subsample_hbd_422_8x32_c Unexecuted instantiation: cfl_subsample_hbd_422_32x8_c Unexecuted instantiation: cfl_subsample_hbd_444_4x4_c Unexecuted instantiation: cfl_subsample_hbd_444_8x8_c Unexecuted instantiation: cfl_subsample_hbd_444_16x16_c Unexecuted instantiation: cfl_subsample_hbd_444_32x32_c Unexecuted instantiation: cfl_subsample_hbd_444_4x8_c Unexecuted instantiation: cfl_subsample_hbd_444_8x4_c Unexecuted instantiation: cfl_subsample_hbd_444_8x16_c Unexecuted instantiation: cfl_subsample_hbd_444_16x8_c Unexecuted instantiation: cfl_subsample_hbd_444_16x32_c Unexecuted instantiation: cfl_subsample_hbd_444_32x16_c Unexecuted instantiation: cfl_subsample_hbd_444_4x16_c Unexecuted instantiation: cfl_subsample_hbd_444_16x4_c Unexecuted instantiation: cfl_subsample_hbd_444_8x32_c Unexecuted instantiation: cfl_subsample_hbd_444_32x8_c cfl_subsample_lbd_420_4x4_ssse3 Line | Count | Source | 101 | 172k | const CFL_##bd##_TYPE, int input_stride, uint16_t *output_q3) { \ | 102 | 172k | cfl_luma_subsampling_##sub##_##bd##_##arch(cfl_type, input_stride, \ | 103 | 172k | output_q3, width, height); \ | 104 | 172k | } |
cfl_subsample_lbd_420_8x8_ssse3 Line | Count | Source | 101 | 85.6k | const CFL_##bd##_TYPE, int input_stride, uint16_t *output_q3) { \ | 102 | 85.6k | cfl_luma_subsampling_##sub##_##bd##_##arch(cfl_type, input_stride, \ | 103 | 85.6k | output_q3, width, height); \ | 104 | 85.6k | } |
cfl_subsample_lbd_420_16x16_ssse3 Line | Count | Source | 101 | 38.7k | const CFL_##bd##_TYPE, int input_stride, uint16_t *output_q3) { \ | 102 | 38.7k | cfl_luma_subsampling_##sub##_##bd##_##arch(cfl_type, input_stride, \ | 103 | 38.7k | output_q3, width, height); \ | 104 | 38.7k | } |
Unexecuted instantiation: cfl_subsample_lbd_420_32x32_ssse3 cfl_subsample_lbd_420_4x8_ssse3 Line | Count | Source | 101 | 86.9k | const CFL_##bd##_TYPE, int input_stride, uint16_t *output_q3) { \ | 102 | 86.9k | cfl_luma_subsampling_##sub##_##bd##_##arch(cfl_type, input_stride, \ | 103 | 86.9k | output_q3, width, height); \ | 104 | 86.9k | } |
cfl_subsample_lbd_420_8x4_ssse3 Line | Count | Source | 101 | 110k | const CFL_##bd##_TYPE, int input_stride, uint16_t *output_q3) { \ | 102 | 110k | cfl_luma_subsampling_##sub##_##bd##_##arch(cfl_type, input_stride, \ | 103 | 110k | output_q3, width, height); \ | 104 | 110k | } |
cfl_subsample_lbd_420_8x16_ssse3 Line | Count | Source | 101 | 20.2k | const CFL_##bd##_TYPE, int input_stride, uint16_t *output_q3) { \ | 102 | 20.2k | cfl_luma_subsampling_##sub##_##bd##_##arch(cfl_type, input_stride, \ | 103 | 20.2k | output_q3, width, height); \ | 104 | 20.2k | } |
cfl_subsample_lbd_420_16x8_ssse3 Line | Count | Source | 101 | 32.1k | const CFL_##bd##_TYPE, int input_stride, uint16_t *output_q3) { \ | 102 | 32.1k | cfl_luma_subsampling_##sub##_##bd##_##arch(cfl_type, input_stride, \ | 103 | 32.1k | output_q3, width, height); \ | 104 | 32.1k | } |
cfl_subsample_lbd_420_16x32_ssse3 Line | Count | Source | 101 | 6.31k | const CFL_##bd##_TYPE, int input_stride, uint16_t *output_q3) { \ | 102 | 6.31k | cfl_luma_subsampling_##sub##_##bd##_##arch(cfl_type, input_stride, \ | 103 | 6.31k | output_q3, width, height); \ | 104 | 6.31k | } |
Unexecuted instantiation: cfl_subsample_lbd_420_32x16_ssse3 cfl_subsample_lbd_420_4x16_ssse3 Line | Count | Source | 101 | 108k | const CFL_##bd##_TYPE, int input_stride, uint16_t *output_q3) { \ | 102 | 108k | cfl_luma_subsampling_##sub##_##bd##_##arch(cfl_type, input_stride, \ | 103 | 108k | output_q3, width, height); \ | 104 | 108k | } |
cfl_subsample_lbd_420_16x4_ssse3 Line | Count | Source | 101 | 137k | const CFL_##bd##_TYPE, int input_stride, uint16_t *output_q3) { \ | 102 | 137k | cfl_luma_subsampling_##sub##_##bd##_##arch(cfl_type, input_stride, \ | 103 | 137k | output_q3, width, height); \ | 104 | 137k | } |
cfl_subsample_lbd_420_8x32_ssse3 Line | Count | Source | 101 | 10.9k | const CFL_##bd##_TYPE, int input_stride, uint16_t *output_q3) { \ | 102 | 10.9k | cfl_luma_subsampling_##sub##_##bd##_##arch(cfl_type, input_stride, \ | 103 | 10.9k | output_q3, width, height); \ | 104 | 10.9k | } |
Unexecuted instantiation: cfl_subsample_lbd_420_32x8_ssse3 cfl_subsample_lbd_422_4x4_ssse3 Line | Count | Source | 101 | 248 | const CFL_##bd##_TYPE, int input_stride, uint16_t *output_q3) { \ | 102 | 248 | cfl_luma_subsampling_##sub##_##bd##_##arch(cfl_type, input_stride, \ | 103 | 248 | output_q3, width, height); \ | 104 | 248 | } |
cfl_subsample_lbd_422_8x8_ssse3 Line | Count | Source | 101 | 286 | const CFL_##bd##_TYPE, int input_stride, uint16_t *output_q3) { \ | 102 | 286 | cfl_luma_subsampling_##sub##_##bd##_##arch(cfl_type, input_stride, \ | 103 | 286 | output_q3, width, height); \ | 104 | 286 | } |
cfl_subsample_lbd_422_16x16_ssse3 Line | Count | Source | 101 | 545 | const CFL_##bd##_TYPE, int input_stride, uint16_t *output_q3) { \ | 102 | 545 | cfl_luma_subsampling_##sub##_##bd##_##arch(cfl_type, input_stride, \ | 103 | 545 | output_q3, width, height); \ | 104 | 545 | } |
Unexecuted instantiation: cfl_subsample_lbd_422_32x32_ssse3 Unexecuted instantiation: cfl_subsample_lbd_422_4x8_ssse3 cfl_subsample_lbd_422_8x4_ssse3 Line | Count | Source | 101 | 140 | const CFL_##bd##_TYPE, int input_stride, uint16_t *output_q3) { \ | 102 | 140 | cfl_luma_subsampling_##sub##_##bd##_##arch(cfl_type, input_stride, \ | 103 | 140 | output_q3, width, height); \ | 104 | 140 | } |
Unexecuted instantiation: cfl_subsample_lbd_422_8x16_ssse3 cfl_subsample_lbd_422_16x8_ssse3 Line | Count | Source | 101 | 171 | const CFL_##bd##_TYPE, int input_stride, uint16_t *output_q3) { \ | 102 | 171 | cfl_luma_subsampling_##sub##_##bd##_##arch(cfl_type, input_stride, \ | 103 | 171 | output_q3, width, height); \ | 104 | 171 | } |
Unexecuted instantiation: cfl_subsample_lbd_422_16x32_ssse3 Unexecuted instantiation: cfl_subsample_lbd_422_32x16_ssse3 Unexecuted instantiation: cfl_subsample_lbd_422_4x16_ssse3 cfl_subsample_lbd_422_16x4_ssse3 Line | Count | Source | 101 | 158 | const CFL_##bd##_TYPE, int input_stride, uint16_t *output_q3) { \ | 102 | 158 | cfl_luma_subsampling_##sub##_##bd##_##arch(cfl_type, input_stride, \ | 103 | 158 | output_q3, width, height); \ | 104 | 158 | } |
Unexecuted instantiation: cfl_subsample_lbd_422_8x32_ssse3 Unexecuted instantiation: cfl_subsample_lbd_422_32x8_ssse3 cfl_subsample_lbd_444_4x4_ssse3 Line | Count | Source | 101 | 71.1k | const CFL_##bd##_TYPE, int input_stride, uint16_t *output_q3) { \ | 102 | 71.1k | cfl_luma_subsampling_##sub##_##bd##_##arch(cfl_type, input_stride, \ | 103 | 71.1k | output_q3, width, height); \ | 104 | 71.1k | } |
cfl_subsample_lbd_444_8x8_ssse3 Line | Count | Source | 101 | 76.1k | const CFL_##bd##_TYPE, int input_stride, uint16_t *output_q3) { \ | 102 | 76.1k | cfl_luma_subsampling_##sub##_##bd##_##arch(cfl_type, input_stride, \ | 103 | 76.1k | output_q3, width, height); \ | 104 | 76.1k | } |
cfl_subsample_lbd_444_16x16_ssse3 Line | Count | Source | 101 | 36.0k | const CFL_##bd##_TYPE, int input_stride, uint16_t *output_q3) { \ | 102 | 36.0k | cfl_luma_subsampling_##sub##_##bd##_##arch(cfl_type, input_stride, \ | 103 | 36.0k | output_q3, width, height); \ | 104 | 36.0k | } |
Unexecuted instantiation: cfl_subsample_lbd_444_32x32_ssse3 cfl_subsample_lbd_444_4x8_ssse3 Line | Count | Source | 101 | 14.5k | const CFL_##bd##_TYPE, int input_stride, uint16_t *output_q3) { \ | 102 | 14.5k | cfl_luma_subsampling_##sub##_##bd##_##arch(cfl_type, input_stride, \ | 103 | 14.5k | output_q3, width, height); \ | 104 | 14.5k | } |
cfl_subsample_lbd_444_8x4_ssse3 Line | Count | Source | 101 | 22.1k | const CFL_##bd##_TYPE, int input_stride, uint16_t *output_q3) { \ | 102 | 22.1k | cfl_luma_subsampling_##sub##_##bd##_##arch(cfl_type, input_stride, \ | 103 | 22.1k | output_q3, width, height); \ | 104 | 22.1k | } |
cfl_subsample_lbd_444_8x16_ssse3 Line | Count | Source | 101 | 23.8k | const CFL_##bd##_TYPE, int input_stride, uint16_t *output_q3) { \ | 102 | 23.8k | cfl_luma_subsampling_##sub##_##bd##_##arch(cfl_type, input_stride, \ | 103 | 23.8k | output_q3, width, height); \ | 104 | 23.8k | } |
cfl_subsample_lbd_444_16x8_ssse3 Line | Count | Source | 101 | 33.4k | const CFL_##bd##_TYPE, int input_stride, uint16_t *output_q3) { \ | 102 | 33.4k | cfl_luma_subsampling_##sub##_##bd##_##arch(cfl_type, input_stride, \ | 103 | 33.4k | output_q3, width, height); \ | 104 | 33.4k | } |
cfl_subsample_lbd_444_16x32_ssse3 Line | Count | Source | 101 | 8.57k | const CFL_##bd##_TYPE, int input_stride, uint16_t *output_q3) { \ | 102 | 8.57k | cfl_luma_subsampling_##sub##_##bd##_##arch(cfl_type, input_stride, \ | 103 | 8.57k | output_q3, width, height); \ | 104 | 8.57k | } |
Unexecuted instantiation: cfl_subsample_lbd_444_32x16_ssse3 cfl_subsample_lbd_444_4x16_ssse3 Line | Count | Source | 101 | 19.0k | const CFL_##bd##_TYPE, int input_stride, uint16_t *output_q3) { \ | 102 | 19.0k | cfl_luma_subsampling_##sub##_##bd##_##arch(cfl_type, input_stride, \ | 103 | 19.0k | output_q3, width, height); \ | 104 | 19.0k | } |
cfl_subsample_lbd_444_16x4_ssse3 Line | Count | Source | 101 | 40.4k | const CFL_##bd##_TYPE, int input_stride, uint16_t *output_q3) { \ | 102 | 40.4k | cfl_luma_subsampling_##sub##_##bd##_##arch(cfl_type, input_stride, \ | 103 | 40.4k | output_q3, width, height); \ | 104 | 40.4k | } |
cfl_subsample_lbd_444_8x32_ssse3 Line | Count | Source | 101 | 19.8k | const CFL_##bd##_TYPE, int input_stride, uint16_t *output_q3) { \ | 102 | 19.8k | cfl_luma_subsampling_##sub##_##bd##_##arch(cfl_type, input_stride, \ | 103 | 19.8k | output_q3, width, height); \ | 104 | 19.8k | } |
Unexecuted instantiation: cfl_subsample_lbd_444_32x8_ssse3 cfl_subsample_hbd_420_4x4_ssse3 Line | Count | Source | 101 | 140k | const CFL_##bd##_TYPE, int input_stride, uint16_t *output_q3) { \ | 102 | 140k | cfl_luma_subsampling_##sub##_##bd##_##arch(cfl_type, input_stride, \ | 103 | 140k | output_q3, width, height); \ | 104 | 140k | } |
cfl_subsample_hbd_420_8x8_ssse3 Line | Count | Source | 101 | 50.4k | const CFL_##bd##_TYPE, int input_stride, uint16_t *output_q3) { \ | 102 | 50.4k | cfl_luma_subsampling_##sub##_##bd##_##arch(cfl_type, input_stride, \ | 103 | 50.4k | output_q3, width, height); \ | 104 | 50.4k | } |
cfl_subsample_hbd_420_16x16_ssse3 Line | Count | Source | 101 | 25.3k | const CFL_##bd##_TYPE, int input_stride, uint16_t *output_q3) { \ | 102 | 25.3k | cfl_luma_subsampling_##sub##_##bd##_##arch(cfl_type, input_stride, \ | 103 | 25.3k | output_q3, width, height); \ | 104 | 25.3k | } |
Unexecuted instantiation: cfl_subsample_hbd_420_32x32_ssse3 cfl_subsample_hbd_420_4x8_ssse3 Line | Count | Source | 101 | 71.5k | const CFL_##bd##_TYPE, int input_stride, uint16_t *output_q3) { \ | 102 | 71.5k | cfl_luma_subsampling_##sub##_##bd##_##arch(cfl_type, input_stride, \ | 103 | 71.5k | output_q3, width, height); \ | 104 | 71.5k | } |
cfl_subsample_hbd_420_8x4_ssse3 Line | Count | Source | 101 | 103k | const CFL_##bd##_TYPE, int input_stride, uint16_t *output_q3) { \ | 102 | 103k | cfl_luma_subsampling_##sub##_##bd##_##arch(cfl_type, input_stride, \ | 103 | 103k | output_q3, width, height); \ | 104 | 103k | } |
cfl_subsample_hbd_420_8x16_ssse3 Line | Count | Source | 101 | 10.0k | const CFL_##bd##_TYPE, int input_stride, uint16_t *output_q3) { \ | 102 | 10.0k | cfl_luma_subsampling_##sub##_##bd##_##arch(cfl_type, input_stride, \ | 103 | 10.0k | output_q3, width, height); \ | 104 | 10.0k | } |
cfl_subsample_hbd_420_16x8_ssse3 Line | Count | Source | 101 | 23.7k | const CFL_##bd##_TYPE, int input_stride, uint16_t *output_q3) { \ | 102 | 23.7k | cfl_luma_subsampling_##sub##_##bd##_##arch(cfl_type, input_stride, \ | 103 | 23.7k | output_q3, width, height); \ | 104 | 23.7k | } |
cfl_subsample_hbd_420_16x32_ssse3 Line | Count | Source | 101 | 3.91k | const CFL_##bd##_TYPE, int input_stride, uint16_t *output_q3) { \ | 102 | 3.91k | cfl_luma_subsampling_##sub##_##bd##_##arch(cfl_type, input_stride, \ | 103 | 3.91k | output_q3, width, height); \ | 104 | 3.91k | } |
Unexecuted instantiation: cfl_subsample_hbd_420_32x16_ssse3 cfl_subsample_hbd_420_4x16_ssse3 Line | Count | Source | 101 | 93.8k | const CFL_##bd##_TYPE, int input_stride, uint16_t *output_q3) { \ | 102 | 93.8k | cfl_luma_subsampling_##sub##_##bd##_##arch(cfl_type, input_stride, \ | 103 | 93.8k | output_q3, width, height); \ | 104 | 93.8k | } |
cfl_subsample_hbd_420_16x4_ssse3 Line | Count | Source | 101 | 129k | const CFL_##bd##_TYPE, int input_stride, uint16_t *output_q3) { \ | 102 | 129k | cfl_luma_subsampling_##sub##_##bd##_##arch(cfl_type, input_stride, \ | 103 | 129k | output_q3, width, height); \ | 104 | 129k | } |
cfl_subsample_hbd_420_8x32_ssse3 Line | Count | Source | 101 | 8.27k | const CFL_##bd##_TYPE, int input_stride, uint16_t *output_q3) { \ | 102 | 8.27k | cfl_luma_subsampling_##sub##_##bd##_##arch(cfl_type, input_stride, \ | 103 | 8.27k | output_q3, width, height); \ | 104 | 8.27k | } |
Unexecuted instantiation: cfl_subsample_hbd_420_32x8_ssse3 cfl_subsample_hbd_422_4x4_ssse3 Line | Count | Source | 101 | 1.00k | const CFL_##bd##_TYPE, int input_stride, uint16_t *output_q3) { \ | 102 | 1.00k | cfl_luma_subsampling_##sub##_##bd##_##arch(cfl_type, input_stride, \ | 103 | 1.00k | output_q3, width, height); \ | 104 | 1.00k | } |
cfl_subsample_hbd_422_8x8_ssse3 Line | Count | Source | 101 | 223 | const CFL_##bd##_TYPE, int input_stride, uint16_t *output_q3) { \ | 102 | 223 | cfl_luma_subsampling_##sub##_##bd##_##arch(cfl_type, input_stride, \ | 103 | 223 | output_q3, width, height); \ | 104 | 223 | } |
cfl_subsample_hbd_422_16x16_ssse3 Line | Count | Source | 101 | 51 | const CFL_##bd##_TYPE, int input_stride, uint16_t *output_q3) { \ | 102 | 51 | cfl_luma_subsampling_##sub##_##bd##_##arch(cfl_type, input_stride, \ | 103 | 51 | output_q3, width, height); \ | 104 | 51 | } |
Unexecuted instantiation: cfl_subsample_hbd_422_32x32_ssse3 Unexecuted instantiation: cfl_subsample_hbd_422_4x8_ssse3 cfl_subsample_hbd_422_8x4_ssse3 Line | Count | Source | 101 | 94 | const CFL_##bd##_TYPE, int input_stride, uint16_t *output_q3) { \ | 102 | 94 | cfl_luma_subsampling_##sub##_##bd##_##arch(cfl_type, input_stride, \ | 103 | 94 | output_q3, width, height); \ | 104 | 94 | } |
Unexecuted instantiation: cfl_subsample_hbd_422_8x16_ssse3 cfl_subsample_hbd_422_16x8_ssse3 Line | Count | Source | 101 | 116 | const CFL_##bd##_TYPE, int input_stride, uint16_t *output_q3) { \ | 102 | 116 | cfl_luma_subsampling_##sub##_##bd##_##arch(cfl_type, input_stride, \ | 103 | 116 | output_q3, width, height); \ | 104 | 116 | } |
Unexecuted instantiation: cfl_subsample_hbd_422_16x32_ssse3 Unexecuted instantiation: cfl_subsample_hbd_422_32x16_ssse3 Unexecuted instantiation: cfl_subsample_hbd_422_4x16_ssse3 cfl_subsample_hbd_422_16x4_ssse3 Line | Count | Source | 101 | 59 | const CFL_##bd##_TYPE, int input_stride, uint16_t *output_q3) { \ | 102 | 59 | cfl_luma_subsampling_##sub##_##bd##_##arch(cfl_type, input_stride, \ | 103 | 59 | output_q3, width, height); \ | 104 | 59 | } |
Unexecuted instantiation: cfl_subsample_hbd_422_8x32_ssse3 Unexecuted instantiation: cfl_subsample_hbd_422_32x8_ssse3 cfl_subsample_hbd_444_4x4_ssse3 Line | Count | Source | 101 | 147k | const CFL_##bd##_TYPE, int input_stride, uint16_t *output_q3) { \ | 102 | 147k | cfl_luma_subsampling_##sub##_##bd##_##arch(cfl_type, input_stride, \ | 103 | 147k | output_q3, width, height); \ | 104 | 147k | } |
cfl_subsample_hbd_444_8x8_ssse3 Line | Count | Source | 101 | 193k | const CFL_##bd##_TYPE, int input_stride, uint16_t *output_q3) { \ | 102 | 193k | cfl_luma_subsampling_##sub##_##bd##_##arch(cfl_type, input_stride, \ | 103 | 193k | output_q3, width, height); \ | 104 | 193k | } |
cfl_subsample_hbd_444_16x16_ssse3 Line | Count | Source | 101 | 43.1k | const CFL_##bd##_TYPE, int input_stride, uint16_t *output_q3) { \ | 102 | 43.1k | cfl_luma_subsampling_##sub##_##bd##_##arch(cfl_type, input_stride, \ | 103 | 43.1k | output_q3, width, height); \ | 104 | 43.1k | } |
Unexecuted instantiation: cfl_subsample_hbd_444_32x32_ssse3 cfl_subsample_hbd_444_4x8_ssse3 Line | Count | Source | 101 | 41.5k | const CFL_##bd##_TYPE, int input_stride, uint16_t *output_q3) { \ | 102 | 41.5k | cfl_luma_subsampling_##sub##_##bd##_##arch(cfl_type, input_stride, \ | 103 | 41.5k | output_q3, width, height); \ | 104 | 41.5k | } |
cfl_subsample_hbd_444_8x4_ssse3 Line | Count | Source | 101 | 68.1k | const CFL_##bd##_TYPE, int input_stride, uint16_t *output_q3) { \ | 102 | 68.1k | cfl_luma_subsampling_##sub##_##bd##_##arch(cfl_type, input_stride, \ | 103 | 68.1k | output_q3, width, height); \ | 104 | 68.1k | } |
cfl_subsample_hbd_444_8x16_ssse3 Line | Count | Source | 101 | 47.9k | const CFL_##bd##_TYPE, int input_stride, uint16_t *output_q3) { \ | 102 | 47.9k | cfl_luma_subsampling_##sub##_##bd##_##arch(cfl_type, input_stride, \ | 103 | 47.9k | output_q3, width, height); \ | 104 | 47.9k | } |
cfl_subsample_hbd_444_16x8_ssse3 Line | Count | Source | 101 | 51.8k | const CFL_##bd##_TYPE, int input_stride, uint16_t *output_q3) { \ | 102 | 51.8k | cfl_luma_subsampling_##sub##_##bd##_##arch(cfl_type, input_stride, \ | 103 | 51.8k | output_q3, width, height); \ | 104 | 51.8k | } |
cfl_subsample_hbd_444_16x32_ssse3 Line | Count | Source | 101 | 9.31k | const CFL_##bd##_TYPE, int input_stride, uint16_t *output_q3) { \ | 102 | 9.31k | cfl_luma_subsampling_##sub##_##bd##_##arch(cfl_type, input_stride, \ | 103 | 9.31k | output_q3, width, height); \ | 104 | 9.31k | } |
Unexecuted instantiation: cfl_subsample_hbd_444_32x16_ssse3 cfl_subsample_hbd_444_4x16_ssse3 Line | Count | Source | 101 | 29.6k | const CFL_##bd##_TYPE, int input_stride, uint16_t *output_q3) { \ | 102 | 29.6k | cfl_luma_subsampling_##sub##_##bd##_##arch(cfl_type, input_stride, \ | 103 | 29.6k | output_q3, width, height); \ | 104 | 29.6k | } |
cfl_subsample_hbd_444_16x4_ssse3 Line | Count | Source | 101 | 40.6k | const CFL_##bd##_TYPE, int input_stride, uint16_t *output_q3) { \ | 102 | 40.6k | cfl_luma_subsampling_##sub##_##bd##_##arch(cfl_type, input_stride, \ | 103 | 40.6k | output_q3, width, height); \ | 104 | 40.6k | } |
cfl_subsample_hbd_444_8x32_ssse3 Line | Count | Source | 101 | 17.1k | const CFL_##bd##_TYPE, int input_stride, uint16_t *output_q3) { \ | 102 | 17.1k | cfl_luma_subsampling_##sub##_##bd##_##arch(cfl_type, input_stride, \ | 103 | 17.1k | output_q3, width, height); \ | 104 | 17.1k | } |
Unexecuted instantiation: cfl_subsample_hbd_444_32x8_ssse3 cfl_subsample_lbd_420_32x32_avx2 Line | Count | Source | 101 | 18.8k | const CFL_##bd##_TYPE, int input_stride, uint16_t *output_q3) { \ | 102 | 18.8k | cfl_luma_subsampling_##sub##_##bd##_##arch(cfl_type, input_stride, \ | 103 | 18.8k | output_q3, width, height); \ | 104 | 18.8k | } |
cfl_subsample_lbd_420_32x16_avx2 Line | Count | Source | 101 | 7.33k | const CFL_##bd##_TYPE, int input_stride, uint16_t *output_q3) { \ | 102 | 7.33k | cfl_luma_subsampling_##sub##_##bd##_##arch(cfl_type, input_stride, \ | 103 | 7.33k | output_q3, width, height); \ | 104 | 7.33k | } |
cfl_subsample_lbd_420_32x8_avx2 Line | Count | Source | 101 | 12.2k | const CFL_##bd##_TYPE, int input_stride, uint16_t *output_q3) { \ | 102 | 12.2k | cfl_luma_subsampling_##sub##_##bd##_##arch(cfl_type, input_stride, \ | 103 | 12.2k | output_q3, width, height); \ | 104 | 12.2k | } |
cfl_subsample_lbd_422_32x32_avx2 Line | Count | Source | 101 | 90 | const CFL_##bd##_TYPE, int input_stride, uint16_t *output_q3) { \ | 102 | 90 | cfl_luma_subsampling_##sub##_##bd##_##arch(cfl_type, input_stride, \ | 103 | 90 | output_q3, width, height); \ | 104 | 90 | } |
cfl_subsample_lbd_422_32x16_avx2 Line | Count | Source | 101 | 346 | const CFL_##bd##_TYPE, int input_stride, uint16_t *output_q3) { \ | 102 | 346 | cfl_luma_subsampling_##sub##_##bd##_##arch(cfl_type, input_stride, \ | 103 | 346 | output_q3, width, height); \ | 104 | 346 | } |
cfl_subsample_lbd_422_32x8_avx2 Line | Count | Source | 101 | 18 | const CFL_##bd##_TYPE, int input_stride, uint16_t *output_q3) { \ | 102 | 18 | cfl_luma_subsampling_##sub##_##bd##_##arch(cfl_type, input_stride, \ | 103 | 18 | output_q3, width, height); \ | 104 | 18 | } |
cfl_subsample_lbd_444_32x32_avx2 Line | Count | Source | 101 | 17.8k | const CFL_##bd##_TYPE, int input_stride, uint16_t *output_q3) { \ | 102 | 17.8k | cfl_luma_subsampling_##sub##_##bd##_##arch(cfl_type, input_stride, \ | 103 | 17.8k | output_q3, width, height); \ | 104 | 17.8k | } |
cfl_subsample_lbd_444_32x16_avx2 Line | Count | Source | 101 | 9.91k | const CFL_##bd##_TYPE, int input_stride, uint16_t *output_q3) { \ | 102 | 9.91k | cfl_luma_subsampling_##sub##_##bd##_##arch(cfl_type, input_stride, \ | 103 | 9.91k | output_q3, width, height); \ | 104 | 9.91k | } |
cfl_subsample_lbd_444_32x8_avx2 Line | Count | Source | 101 | 16.9k | const CFL_##bd##_TYPE, int input_stride, uint16_t *output_q3) { \ | 102 | 16.9k | cfl_luma_subsampling_##sub##_##bd##_##arch(cfl_type, input_stride, \ | 103 | 16.9k | output_q3, width, height); \ | 104 | 16.9k | } |
cfl_subsample_hbd_420_32x32_avx2 Line | Count | Source | 101 | 11.6k | const CFL_##bd##_TYPE, int input_stride, uint16_t *output_q3) { \ | 102 | 11.6k | cfl_luma_subsampling_##sub##_##bd##_##arch(cfl_type, input_stride, \ | 103 | 11.6k | output_q3, width, height); \ | 104 | 11.6k | } |
cfl_subsample_hbd_420_32x16_avx2 Line | Count | Source | 101 | 4.87k | const CFL_##bd##_TYPE, int input_stride, uint16_t *output_q3) { \ | 102 | 4.87k | cfl_luma_subsampling_##sub##_##bd##_##arch(cfl_type, input_stride, \ | 103 | 4.87k | output_q3, width, height); \ | 104 | 4.87k | } |
cfl_subsample_hbd_420_32x8_avx2 Line | Count | Source | 101 | 7.32k | const CFL_##bd##_TYPE, int input_stride, uint16_t *output_q3) { \ | 102 | 7.32k | cfl_luma_subsampling_##sub##_##bd##_##arch(cfl_type, input_stride, \ | 103 | 7.32k | output_q3, width, height); \ | 104 | 7.32k | } |
cfl_subsample_hbd_422_32x32_avx2 Line | Count | Source | 101 | 17 | const CFL_##bd##_TYPE, int input_stride, uint16_t *output_q3) { \ | 102 | 17 | cfl_luma_subsampling_##sub##_##bd##_##arch(cfl_type, input_stride, \ | 103 | 17 | output_q3, width, height); \ | 104 | 17 | } |
cfl_subsample_hbd_422_32x16_avx2 Line | Count | Source | 101 | 55 | const CFL_##bd##_TYPE, int input_stride, uint16_t *output_q3) { \ | 102 | 55 | cfl_luma_subsampling_##sub##_##bd##_##arch(cfl_type, input_stride, \ | 103 | 55 | output_q3, width, height); \ | 104 | 55 | } |
cfl_subsample_hbd_422_32x8_avx2 Line | Count | Source | 101 | 71 | const CFL_##bd##_TYPE, int input_stride, uint16_t *output_q3) { \ | 102 | 71 | cfl_luma_subsampling_##sub##_##bd##_##arch(cfl_type, input_stride, \ | 103 | 71 | output_q3, width, height); \ | 104 | 71 | } |
cfl_subsample_hbd_444_32x32_avx2 Line | Count | Source | 101 | 18.7k | const CFL_##bd##_TYPE, int input_stride, uint16_t *output_q3) { \ | 102 | 18.7k | cfl_luma_subsampling_##sub##_##bd##_##arch(cfl_type, input_stride, \ | 103 | 18.7k | output_q3, width, height); \ | 104 | 18.7k | } |
cfl_subsample_hbd_444_32x16_avx2 Line | Count | Source | 101 | 11.7k | const CFL_##bd##_TYPE, int input_stride, uint16_t *output_q3) { \ | 102 | 11.7k | cfl_luma_subsampling_##sub##_##bd##_##arch(cfl_type, input_stride, \ | 103 | 11.7k | output_q3, width, height); \ | 104 | 11.7k | } |
cfl_subsample_hbd_444_32x8_avx2 Line | Count | Source | 101 | 28.2k | const CFL_##bd##_TYPE, int input_stride, uint16_t *output_q3) { \ | 102 | 28.2k | cfl_luma_subsampling_##sub##_##bd##_##arch(cfl_type, input_stride, \ | 103 | 28.2k | output_q3, width, height); \ | 104 | 28.2k | } |
|
105 | | |
106 | | // Declare size-specific wrappers for all valid CfL sizes. |
107 | | #define CFL_SUBSAMPLE_FUNCTIONS(arch, sub, bd) \ |
108 | | CFL_SUBSAMPLE(arch, sub, bd, 4, 4) \ |
109 | | CFL_SUBSAMPLE(arch, sub, bd, 8, 8) \ |
110 | | CFL_SUBSAMPLE(arch, sub, bd, 16, 16) \ |
111 | | CFL_SUBSAMPLE(arch, sub, bd, 32, 32) \ |
112 | | CFL_SUBSAMPLE(arch, sub, bd, 4, 8) \ |
113 | | CFL_SUBSAMPLE(arch, sub, bd, 8, 4) \ |
114 | | CFL_SUBSAMPLE(arch, sub, bd, 8, 16) \ |
115 | | CFL_SUBSAMPLE(arch, sub, bd, 16, 8) \ |
116 | | CFL_SUBSAMPLE(arch, sub, bd, 16, 32) \ |
117 | | CFL_SUBSAMPLE(arch, sub, bd, 32, 16) \ |
118 | | CFL_SUBSAMPLE(arch, sub, bd, 4, 16) \ |
119 | | CFL_SUBSAMPLE(arch, sub, bd, 16, 4) \ |
120 | | CFL_SUBSAMPLE(arch, sub, bd, 8, 32) \ |
121 | | CFL_SUBSAMPLE(arch, sub, bd, 32, 8) \ |
122 | | cfl_subsample_##bd##_fn cfl_get_luma_subsampling_##sub##_##bd##_##arch( \ |
123 | 0 | TX_SIZE tx_size) { \ |
124 | 0 | CFL_SUBSAMPLE_FUNCTION_ARRAY(arch, sub, bd) \ |
125 | 0 | return subfn_##sub[tx_size]; \ |
126 | 0 | } Unexecuted instantiation: cfl_get_luma_subsampling_420_lbd_c Unexecuted instantiation: cfl_get_luma_subsampling_422_lbd_c Unexecuted instantiation: cfl_get_luma_subsampling_444_lbd_c Unexecuted instantiation: cfl_get_luma_subsampling_420_hbd_c Unexecuted instantiation: cfl_get_luma_subsampling_422_hbd_c Unexecuted instantiation: cfl_get_luma_subsampling_444_hbd_c Unexecuted instantiation: cfl_get_luma_subsampling_420_lbd_ssse3 Unexecuted instantiation: cfl_get_luma_subsampling_422_lbd_ssse3 Unexecuted instantiation: cfl_get_luma_subsampling_444_lbd_ssse3 Unexecuted instantiation: cfl_get_luma_subsampling_420_hbd_ssse3 Unexecuted instantiation: cfl_get_luma_subsampling_422_hbd_ssse3 Unexecuted instantiation: cfl_get_luma_subsampling_444_hbd_ssse3 |
127 | | |
128 | | // Declare an architecture-specific array of function pointers for size-specific |
129 | | // wrappers. |
130 | | #define CFL_SUBSAMPLE_FUNCTION_ARRAY(arch, sub, bd) \ |
131 | 0 | static const cfl_subsample_##bd##_fn subfn_##sub[TX_SIZES_ALL] = { \ |
132 | 0 | cfl_subsample_##bd##_##sub##_4x4_##arch, /* 4x4 */ \ |
133 | 0 | cfl_subsample_##bd##_##sub##_8x8_##arch, /* 8x8 */ \ |
134 | 0 | cfl_subsample_##bd##_##sub##_16x16_##arch, /* 16x16 */ \ |
135 | 0 | cfl_subsample_##bd##_##sub##_32x32_##arch, /* 32x32 */ \ |
136 | 0 | NULL, /* 64x64 (invalid CFL size) */ \ |
137 | 0 | cfl_subsample_##bd##_##sub##_4x8_##arch, /* 4x8 */ \ |
138 | 0 | cfl_subsample_##bd##_##sub##_8x4_##arch, /* 8x4 */ \ |
139 | 0 | cfl_subsample_##bd##_##sub##_8x16_##arch, /* 8x16 */ \ |
140 | 0 | cfl_subsample_##bd##_##sub##_16x8_##arch, /* 16x8 */ \ |
141 | 0 | cfl_subsample_##bd##_##sub##_16x32_##arch, /* 16x32 */ \ |
142 | 0 | cfl_subsample_##bd##_##sub##_32x16_##arch, /* 32x16 */ \ |
143 | 0 | NULL, /* 32x64 (invalid CFL size) */ \ |
144 | 0 | NULL, /* 64x32 (invalid CFL size) */ \ |
145 | 0 | cfl_subsample_##bd##_##sub##_4x16_##arch, /* 4x16 */ \ |
146 | 0 | cfl_subsample_##bd##_##sub##_16x4_##arch, /* 16x4 */ \ |
147 | 0 | cfl_subsample_##bd##_##sub##_8x32_##arch, /* 8x32 */ \ |
148 | 0 | cfl_subsample_##bd##_##sub##_32x8_##arch, /* 32x8 */ \ |
149 | 0 | NULL, /* 16x64 (invalid CFL size) */ \ |
150 | 0 | NULL, /* 64x16 (invalid CFL size) */ \ |
151 | 0 | }; |
152 | | |
153 | | // The RTCD script does not support passing in an array, so we wrap it in this |
154 | | // function. |
155 | | #if CONFIG_AV1_HIGHBITDEPTH |
156 | | #define CFL_GET_SUBSAMPLE_FUNCTION(arch) \ |
157 | | CFL_SUBSAMPLE_FUNCTIONS(arch, 420, lbd) \ |
158 | | CFL_SUBSAMPLE_FUNCTIONS(arch, 422, lbd) \ |
159 | | CFL_SUBSAMPLE_FUNCTIONS(arch, 444, lbd) \ |
160 | | CFL_SUBSAMPLE_FUNCTIONS(arch, 420, hbd) \ |
161 | | CFL_SUBSAMPLE_FUNCTIONS(arch, 422, hbd) \ |
162 | | CFL_SUBSAMPLE_FUNCTIONS(arch, 444, hbd) |
163 | | #else |
164 | | #define CFL_GET_SUBSAMPLE_FUNCTION(arch) \ |
165 | | CFL_SUBSAMPLE_FUNCTIONS(arch, 420, lbd) \ |
166 | | CFL_SUBSAMPLE_FUNCTIONS(arch, 422, lbd) \ |
167 | | CFL_SUBSAMPLE_FUNCTIONS(arch, 444, lbd) |
168 | | #endif |
169 | | |
170 | | // Declare a size-specific wrapper for the size-generic function. The compiler |
171 | | // will inline the size generic function in here, the advantage is that the size |
172 | | // will be constant allowing for loop unrolling and other constant propagated |
173 | | // goodness. |
174 | | #define CFL_SUB_AVG_X(arch, width, height, round_offset, num_pel_log2) \ |
175 | | void cfl_subtract_average_##width##x##height##_##arch(const uint16_t *src, \ |
176 | | int16_t *dst); \ |
177 | | void cfl_subtract_average_##width##x##height##_##arch(const uint16_t *src, \ |
178 | 1.38M | int16_t *dst) { \ |
179 | 1.38M | subtract_average_##arch(src, dst, width, height, round_offset, \ |
180 | 1.38M | num_pel_log2); \ |
181 | 1.38M | } Unexecuted instantiation: cfl_subtract_average_4x4_c Unexecuted instantiation: cfl_subtract_average_4x8_c Unexecuted instantiation: cfl_subtract_average_4x16_c Unexecuted instantiation: cfl_subtract_average_8x4_c Unexecuted instantiation: cfl_subtract_average_8x8_c Unexecuted instantiation: cfl_subtract_average_8x16_c Unexecuted instantiation: cfl_subtract_average_8x32_c Unexecuted instantiation: cfl_subtract_average_16x4_c Unexecuted instantiation: cfl_subtract_average_16x8_c Unexecuted instantiation: cfl_subtract_average_16x16_c Unexecuted instantiation: cfl_subtract_average_16x32_c Unexecuted instantiation: cfl_subtract_average_32x8_c Unexecuted instantiation: cfl_subtract_average_32x16_c Unexecuted instantiation: cfl_subtract_average_32x32_c cfl_subtract_average_4x4_sse2 Line | Count | Source | 178 | 251k | int16_t *dst) { \ | 179 | 251k | subtract_average_##arch(src, dst, width, height, round_offset, \ | 180 | 251k | num_pel_log2); \ | 181 | 251k | } |
cfl_subtract_average_4x8_sse2 Line | Count | Source | 178 | 94.0k | int16_t *dst) { \ | 179 | 94.0k | subtract_average_##arch(src, dst, width, height, round_offset, \ | 180 | 94.0k | num_pel_log2); \ | 181 | 94.0k | } |
cfl_subtract_average_4x16_sse2 Line | Count | Source | 178 | 79.3k | int16_t *dst) { \ | 179 | 79.3k | subtract_average_##arch(src, dst, width, height, round_offset, \ | 180 | 79.3k | num_pel_log2); \ | 181 | 79.3k | } |
cfl_subtract_average_8x4_sse2 Line | Count | Source | 178 | 155k | int16_t *dst) { \ | 179 | 155k | subtract_average_##arch(src, dst, width, height, round_offset, \ | 180 | 155k | num_pel_log2); \ | 181 | 155k | } |
cfl_subtract_average_8x8_sse2 Line | Count | Source | 178 | 216k | int16_t *dst) { \ | 179 | 216k | subtract_average_##arch(src, dst, width, height, round_offset, \ | 180 | 216k | num_pel_log2); \ | 181 | 216k | } |
cfl_subtract_average_8x16_sse2 Line | Count | Source | 178 | 81.6k | int16_t *dst) { \ | 179 | 81.6k | subtract_average_##arch(src, dst, width, height, round_offset, \ | 180 | 81.6k | num_pel_log2); \ | 181 | 81.6k | } |
cfl_subtract_average_8x32_sse2 Line | Count | Source | 178 | 47.2k | int16_t *dst) { \ | 179 | 47.2k | subtract_average_##arch(src, dst, width, height, round_offset, \ | 180 | 47.2k | num_pel_log2); \ | 181 | 47.2k | } |
Unexecuted instantiation: cfl_subtract_average_16x4_sse2 Unexecuted instantiation: cfl_subtract_average_16x8_sse2 Unexecuted instantiation: cfl_subtract_average_16x16_sse2 Unexecuted instantiation: cfl_subtract_average_16x32_sse2 Unexecuted instantiation: cfl_subtract_average_32x8_sse2 Unexecuted instantiation: cfl_subtract_average_32x16_sse2 Unexecuted instantiation: cfl_subtract_average_32x32_sse2 cfl_subtract_average_16x4_avx2 Line | Count | Source | 178 | 113k | int16_t *dst) { \ | 179 | 113k | subtract_average_##arch(src, dst, width, height, round_offset, \ | 180 | 113k | num_pel_log2); \ | 181 | 113k | } |
cfl_subtract_average_16x8_avx2 Line | Count | Source | 178 | 101k | int16_t *dst) { \ | 179 | 101k | subtract_average_##arch(src, dst, width, height, round_offset, \ | 180 | 101k | num_pel_log2); \ | 181 | 101k | } |
cfl_subtract_average_16x16_avx2 Line | Count | Source | 178 | 122k | int16_t *dst) { \ | 179 | 122k | subtract_average_##arch(src, dst, width, height, round_offset, \ | 180 | 122k | num_pel_log2); \ | 181 | 122k | } |
cfl_subtract_average_16x32_avx2 Line | Count | Source | 178 | 20.5k | int16_t *dst) { \ | 179 | 20.5k | subtract_average_##arch(src, dst, width, height, round_offset, \ | 180 | 20.5k | num_pel_log2); \ | 181 | 20.5k | } |
cfl_subtract_average_32x8_avx2 Line | Count | Source | 178 | 46.3k | int16_t *dst) { \ | 179 | 46.3k | subtract_average_##arch(src, dst, width, height, round_offset, \ | 180 | 46.3k | num_pel_log2); \ | 181 | 46.3k | } |
cfl_subtract_average_32x16_avx2 Line | Count | Source | 178 | 22.0k | int16_t *dst) { \ | 179 | 22.0k | subtract_average_##arch(src, dst, width, height, round_offset, \ | 180 | 22.0k | num_pel_log2); \ | 181 | 22.0k | } |
cfl_subtract_average_32x32_avx2 Line | Count | Source | 178 | 37.7k | int16_t *dst) { \ | 179 | 37.7k | subtract_average_##arch(src, dst, width, height, round_offset, \ | 180 | 37.7k | num_pel_log2); \ | 181 | 37.7k | } |
|
182 | | |
183 | | // Declare size-specific wrappers for all valid CfL sizes. |
184 | | #define CFL_SUB_AVG_FN(arch) \ |
185 | | CFL_SUB_AVG_X(arch, 4, 4, 8, 4) \ |
186 | | CFL_SUB_AVG_X(arch, 4, 8, 16, 5) \ |
187 | | CFL_SUB_AVG_X(arch, 4, 16, 32, 6) \ |
188 | | CFL_SUB_AVG_X(arch, 8, 4, 16, 5) \ |
189 | | CFL_SUB_AVG_X(arch, 8, 8, 32, 6) \ |
190 | | CFL_SUB_AVG_X(arch, 8, 16, 64, 7) \ |
191 | | CFL_SUB_AVG_X(arch, 8, 32, 128, 8) \ |
192 | | CFL_SUB_AVG_X(arch, 16, 4, 32, 6) \ |
193 | | CFL_SUB_AVG_X(arch, 16, 8, 64, 7) \ |
194 | | CFL_SUB_AVG_X(arch, 16, 16, 128, 8) \ |
195 | | CFL_SUB_AVG_X(arch, 16, 32, 256, 9) \ |
196 | | CFL_SUB_AVG_X(arch, 32, 8, 128, 8) \ |
197 | | CFL_SUB_AVG_X(arch, 32, 16, 256, 9) \ |
198 | | CFL_SUB_AVG_X(arch, 32, 32, 512, 10) \ |
199 | | cfl_subtract_average_fn cfl_get_subtract_average_fn_##arch( \ |
200 | 0 | TX_SIZE tx_size) { \ |
201 | 0 | static const cfl_subtract_average_fn sub_avg[TX_SIZES_ALL] = { \ |
202 | 0 | cfl_subtract_average_4x4_##arch, /* 4x4 */ \ |
203 | 0 | cfl_subtract_average_8x8_##arch, /* 8x8 */ \ |
204 | 0 | cfl_subtract_average_16x16_##arch, /* 16x16 */ \ |
205 | 0 | cfl_subtract_average_32x32_##arch, /* 32x32 */ \ |
206 | 0 | NULL, /* 64x64 (invalid CFL size) */ \ |
207 | 0 | cfl_subtract_average_4x8_##arch, /* 4x8 */ \ |
208 | 0 | cfl_subtract_average_8x4_##arch, /* 8x4 */ \ |
209 | 0 | cfl_subtract_average_8x16_##arch, /* 8x16 */ \ |
210 | 0 | cfl_subtract_average_16x8_##arch, /* 16x8 */ \ |
211 | 0 | cfl_subtract_average_16x32_##arch, /* 16x32 */ \ |
212 | 0 | cfl_subtract_average_32x16_##arch, /* 32x16 */ \ |
213 | 0 | NULL, /* 32x64 (invalid CFL size) */ \ |
214 | 0 | NULL, /* 64x32 (invalid CFL size) */ \ |
215 | 0 | cfl_subtract_average_4x16_##arch, /* 4x16 (invalid CFL size) */ \ |
216 | 0 | cfl_subtract_average_16x4_##arch, /* 16x4 (invalid CFL size) */ \ |
217 | 0 | cfl_subtract_average_8x32_##arch, /* 8x32 (invalid CFL size) */ \ |
218 | 0 | cfl_subtract_average_32x8_##arch, /* 32x8 (invalid CFL size) */ \ |
219 | 0 | NULL, /* 16x64 (invalid CFL size) */ \ |
220 | 0 | NULL, /* 64x16 (invalid CFL size) */ \ |
221 | 0 | }; \ |
222 | 0 | /* Modulo TX_SIZES_ALL to ensure that an attacker won't be able to */ \ |
223 | 0 | /* index the function pointer array out of bounds. */ \ |
224 | 0 | return sub_avg[tx_size % TX_SIZES_ALL]; \ |
225 | 0 | } Unexecuted instantiation: cfl_get_subtract_average_fn_c Unexecuted instantiation: cfl_get_subtract_average_fn_sse2 |
226 | | |
227 | | #define CFL_PREDICT_lbd(arch, width, height) \ |
228 | | void cfl_predict_lbd_##width##x##height##_##arch( \ |
229 | | const int16_t *pred_buf_q3, uint8_t *dst, int dst_stride, int alpha_q3); \ |
230 | | void cfl_predict_lbd_##width##x##height##_##arch( \ |
231 | | const int16_t *pred_buf_q3, uint8_t *dst, int dst_stride, \ |
232 | 1.28M | int alpha_q3) { \ |
233 | 1.28M | cfl_predict_lbd_##arch(pred_buf_q3, dst, dst_stride, alpha_q3, width, \ |
234 | 1.28M | height); \ |
235 | 1.28M | } Unexecuted instantiation: cfl_predict_lbd_4x4_c Unexecuted instantiation: cfl_predict_lbd_4x8_c Unexecuted instantiation: cfl_predict_lbd_4x16_c Unexecuted instantiation: cfl_predict_lbd_8x4_c Unexecuted instantiation: cfl_predict_lbd_8x8_c Unexecuted instantiation: cfl_predict_lbd_8x16_c Unexecuted instantiation: cfl_predict_lbd_8x32_c Unexecuted instantiation: cfl_predict_lbd_16x4_c Unexecuted instantiation: cfl_predict_lbd_16x8_c Unexecuted instantiation: cfl_predict_lbd_16x16_c Unexecuted instantiation: cfl_predict_lbd_16x32_c Unexecuted instantiation: cfl_predict_lbd_32x8_c Unexecuted instantiation: cfl_predict_lbd_32x16_c Unexecuted instantiation: cfl_predict_lbd_32x32_c cfl_predict_lbd_4x4_ssse3 Line | Count | Source | 232 | 273k | int alpha_q3) { \ | 233 | 273k | cfl_predict_lbd_##arch(pred_buf_q3, dst, dst_stride, alpha_q3, width, \ | 234 | 273k | height); \ | 235 | 273k | } |
cfl_predict_lbd_4x8_ssse3 Line | Count | Source | 232 | 96.4k | int alpha_q3) { \ | 233 | 96.4k | cfl_predict_lbd_##arch(pred_buf_q3, dst, dst_stride, alpha_q3, width, \ | 234 | 96.4k | height); \ | 235 | 96.4k | } |
cfl_predict_lbd_4x16_ssse3 Line | Count | Source | 232 | 63.5k | int alpha_q3) { \ | 233 | 63.5k | cfl_predict_lbd_##arch(pred_buf_q3, dst, dst_stride, alpha_q3, width, \ | 234 | 63.5k | height); \ | 235 | 63.5k | } |
cfl_predict_lbd_8x4_ssse3 Line | Count | Source | 232 | 139k | int alpha_q3) { \ | 233 | 139k | cfl_predict_lbd_##arch(pred_buf_q3, dst, dst_stride, alpha_q3, width, \ | 234 | 139k | height); \ | 235 | 139k | } |
cfl_predict_lbd_8x8_ssse3 Line | Count | Source | 232 | 192k | int alpha_q3) { \ | 233 | 192k | cfl_predict_lbd_##arch(pred_buf_q3, dst, dst_stride, alpha_q3, width, \ | 234 | 192k | height); \ | 235 | 192k | } |
cfl_predict_lbd_8x16_ssse3 Line | Count | Source | 232 | 62.0k | int alpha_q3) { \ | 233 | 62.0k | cfl_predict_lbd_##arch(pred_buf_q3, dst, dst_stride, alpha_q3, width, \ | 234 | 62.0k | height); \ | 235 | 62.0k | } |
cfl_predict_lbd_8x32_ssse3 Line | Count | Source | 232 | 46.1k | int alpha_q3) { \ | 233 | 46.1k | cfl_predict_lbd_##arch(pred_buf_q3, dst, dst_stride, alpha_q3, width, \ | 234 | 46.1k | height); \ | 235 | 46.1k | } |
cfl_predict_lbd_16x4_ssse3 Line | Count | Source | 232 | 106k | int alpha_q3) { \ | 233 | 106k | cfl_predict_lbd_##arch(pred_buf_q3, dst, dst_stride, alpha_q3, width, \ | 234 | 106k | height); \ | 235 | 106k | } |
cfl_predict_lbd_16x8_ssse3 Line | Count | Source | 232 | 83.8k | int alpha_q3) { \ | 233 | 83.8k | cfl_predict_lbd_##arch(pred_buf_q3, dst, dst_stride, alpha_q3, width, \ | 234 | 83.8k | height); \ | 235 | 83.8k | } |
cfl_predict_lbd_16x16_ssse3 Line | Count | Source | 232 | 113k | int alpha_q3) { \ | 233 | 113k | cfl_predict_lbd_##arch(pred_buf_q3, dst, dst_stride, alpha_q3, width, \ | 234 | 113k | height); \ | 235 | 113k | } |
cfl_predict_lbd_16x32_ssse3 Line | Count | Source | 232 | 17.5k | int alpha_q3) { \ | 233 | 17.5k | cfl_predict_lbd_##arch(pred_buf_q3, dst, dst_stride, alpha_q3, width, \ | 234 | 17.5k | height); \ | 235 | 17.5k | } |
Unexecuted instantiation: cfl_predict_lbd_32x8_ssse3 Unexecuted instantiation: cfl_predict_lbd_32x16_ssse3 Unexecuted instantiation: cfl_predict_lbd_32x32_ssse3 cfl_predict_lbd_32x8_avx2 Line | Count | Source | 232 | 34.0k | int alpha_q3) { \ | 233 | 34.0k | cfl_predict_lbd_##arch(pred_buf_q3, dst, dst_stride, alpha_q3, width, \ | 234 | 34.0k | height); \ | 235 | 34.0k | } |
cfl_predict_lbd_32x16_avx2 Line | Count | Source | 232 | 20.0k | int alpha_q3) { \ | 233 | 20.0k | cfl_predict_lbd_##arch(pred_buf_q3, dst, dst_stride, alpha_q3, width, \ | 234 | 20.0k | height); \ | 235 | 20.0k | } |
cfl_predict_lbd_32x32_avx2 Line | Count | Source | 232 | 36.1k | int alpha_q3) { \ | 233 | 36.1k | cfl_predict_lbd_##arch(pred_buf_q3, dst, dst_stride, alpha_q3, width, \ | 234 | 36.1k | height); \ | 235 | 36.1k | } |
|
236 | | |
237 | | #if CONFIG_AV1_HIGHBITDEPTH |
238 | | #define CFL_PREDICT_hbd(arch, width, height) \ |
239 | | void cfl_predict_hbd_##width##x##height##_##arch( \ |
240 | | const int16_t *pred_buf_q3, uint16_t *dst, int dst_stride, int alpha_q3, \ |
241 | | int bd); \ |
242 | | void cfl_predict_hbd_##width##x##height##_##arch( \ |
243 | | const int16_t *pred_buf_q3, uint16_t *dst, int dst_stride, int alpha_q3, \ |
244 | 1.49M | int bd) { \ |
245 | 1.49M | cfl_predict_hbd_##arch(pred_buf_q3, dst, dst_stride, alpha_q3, bd, width, \ |
246 | 1.49M | height); \ |
247 | 1.49M | } Unexecuted instantiation: cfl_predict_hbd_4x4_c Unexecuted instantiation: cfl_predict_hbd_4x8_c Unexecuted instantiation: cfl_predict_hbd_4x16_c Unexecuted instantiation: cfl_predict_hbd_8x4_c Unexecuted instantiation: cfl_predict_hbd_8x8_c Unexecuted instantiation: cfl_predict_hbd_8x16_c Unexecuted instantiation: cfl_predict_hbd_8x32_c Unexecuted instantiation: cfl_predict_hbd_16x4_c Unexecuted instantiation: cfl_predict_hbd_16x8_c Unexecuted instantiation: cfl_predict_hbd_16x16_c Unexecuted instantiation: cfl_predict_hbd_16x32_c Unexecuted instantiation: cfl_predict_hbd_32x8_c Unexecuted instantiation: cfl_predict_hbd_32x16_c Unexecuted instantiation: cfl_predict_hbd_32x32_c cfl_predict_hbd_4x4_ssse3 Line | Count | Source | 244 | 228k | int bd) { \ | 245 | 228k | cfl_predict_hbd_##arch(pred_buf_q3, dst, dst_stride, alpha_q3, bd, width, \ | 246 | 228k | height); \ | 247 | 228k | } |
cfl_predict_hbd_4x8_ssse3 Line | Count | Source | 244 | 91.6k | int bd) { \ | 245 | 91.6k | cfl_predict_hbd_##arch(pred_buf_q3, dst, dst_stride, alpha_q3, bd, width, \ | 246 | 91.6k | height); \ | 247 | 91.6k | } |
cfl_predict_hbd_4x16_ssse3 Line | Count | Source | 244 | 95.1k | int bd) { \ | 245 | 95.1k | cfl_predict_hbd_##arch(pred_buf_q3, dst, dst_stride, alpha_q3, bd, width, \ | 246 | 95.1k | height); \ | 247 | 95.1k | } |
cfl_predict_hbd_8x4_ssse3 Line | Count | Source | 244 | 172k | int bd) { \ | 245 | 172k | cfl_predict_hbd_##arch(pred_buf_q3, dst, dst_stride, alpha_q3, bd, width, \ | 246 | 172k | height); \ | 247 | 172k | } |
cfl_predict_hbd_8x8_ssse3 Line | Count | Source | 244 | 240k | int bd) { \ | 245 | 240k | cfl_predict_hbd_##arch(pred_buf_q3, dst, dst_stride, alpha_q3, bd, width, \ | 246 | 240k | height); \ | 247 | 240k | } |
cfl_predict_hbd_8x16_ssse3 Line | Count | Source | 244 | 101k | int bd) { \ | 245 | 101k | cfl_predict_hbd_##arch(pred_buf_q3, dst, dst_stride, alpha_q3, bd, width, \ | 246 | 101k | height); \ | 247 | 101k | } |
cfl_predict_hbd_8x32_ssse3 Line | Count | Source | 244 | 48.3k | int bd) { \ | 245 | 48.3k | cfl_predict_hbd_##arch(pred_buf_q3, dst, dst_stride, alpha_q3, bd, width, \ | 246 | 48.3k | height); \ | 247 | 48.3k | } |
Unexecuted instantiation: cfl_predict_hbd_16x4_ssse3 Unexecuted instantiation: cfl_predict_hbd_16x8_ssse3 Unexecuted instantiation: cfl_predict_hbd_16x16_ssse3 Unexecuted instantiation: cfl_predict_hbd_16x32_ssse3 Unexecuted instantiation: cfl_predict_hbd_32x8_ssse3 Unexecuted instantiation: cfl_predict_hbd_32x16_ssse3 Unexecuted instantiation: cfl_predict_hbd_32x32_ssse3 cfl_predict_hbd_16x4_avx2 Line | Count | Source | 244 | 119k | int bd) { \ | 245 | 119k | cfl_predict_hbd_##arch(pred_buf_q3, dst, dst_stride, alpha_q3, bd, width, \ | 246 | 119k | height); \ | 247 | 119k | } |
cfl_predict_hbd_16x8_avx2 Line | Count | Source | 244 | 118k | int bd) { \ | 245 | 118k | cfl_predict_hbd_##arch(pred_buf_q3, dst, dst_stride, alpha_q3, bd, width, \ | 246 | 118k | height); \ | 247 | 118k | } |
cfl_predict_hbd_16x16_avx2 Line | Count | Source | 244 | 132k | int bd) { \ | 245 | 132k | cfl_predict_hbd_##arch(pred_buf_q3, dst, dst_stride, alpha_q3, bd, width, \ | 246 | 132k | height); \ | 247 | 132k | } |
cfl_predict_hbd_16x32_avx2 Line | Count | Source | 244 | 23.6k | int bd) { \ | 245 | 23.6k | cfl_predict_hbd_##arch(pred_buf_q3, dst, dst_stride, alpha_q3, bd, width, \ | 246 | 23.6k | height); \ | 247 | 23.6k | } |
cfl_predict_hbd_32x8_avx2 Line | Count | Source | 244 | 58.6k | int bd) { \ | 245 | 58.6k | cfl_predict_hbd_##arch(pred_buf_q3, dst, dst_stride, alpha_q3, bd, width, \ | 246 | 58.6k | height); \ | 247 | 58.6k | } |
cfl_predict_hbd_32x16_avx2 Line | Count | Source | 244 | 24.0k | int bd) { \ | 245 | 24.0k | cfl_predict_hbd_##arch(pred_buf_q3, dst, dst_stride, alpha_q3, bd, width, \ | 246 | 24.0k | height); \ | 247 | 24.0k | } |
cfl_predict_hbd_32x32_avx2 Line | Count | Source | 244 | 39.4k | int bd) { \ | 245 | 39.4k | cfl_predict_hbd_##arch(pred_buf_q3, dst, dst_stride, alpha_q3, bd, width, \ | 246 | 39.4k | height); \ | 247 | 39.4k | } |
|
248 | | #endif |
249 | | |
250 | | // This wrapper exists because clang format does not like calling macros with |
251 | | // lowercase letters. |
252 | | #define CFL_PREDICT_X(arch, width, height, bd) \ |
253 | | CFL_PREDICT_##bd(arch, width, height) |
254 | | |
255 | | #define CFL_PREDICT_FN(arch, bd) \ |
256 | | CFL_PREDICT_X(arch, 4, 4, bd) \ |
257 | | CFL_PREDICT_X(arch, 4, 8, bd) \ |
258 | | CFL_PREDICT_X(arch, 4, 16, bd) \ |
259 | | CFL_PREDICT_X(arch, 8, 4, bd) \ |
260 | | CFL_PREDICT_X(arch, 8, 8, bd) \ |
261 | | CFL_PREDICT_X(arch, 8, 16, bd) \ |
262 | | CFL_PREDICT_X(arch, 8, 32, bd) \ |
263 | | CFL_PREDICT_X(arch, 16, 4, bd) \ |
264 | | CFL_PREDICT_X(arch, 16, 8, bd) \ |
265 | | CFL_PREDICT_X(arch, 16, 16, bd) \ |
266 | | CFL_PREDICT_X(arch, 16, 32, bd) \ |
267 | | CFL_PREDICT_X(arch, 32, 8, bd) \ |
268 | | CFL_PREDICT_X(arch, 32, 16, bd) \ |
269 | | CFL_PREDICT_X(arch, 32, 32, bd) \ |
270 | 0 | cfl_predict_##bd##_fn cfl_get_predict_##bd##_fn_##arch(TX_SIZE tx_size) { \ |
271 | 0 | static const cfl_predict_##bd##_fn pred[TX_SIZES_ALL] = { \ |
272 | 0 | cfl_predict_##bd##_4x4_##arch, /* 4x4 */ \ |
273 | 0 | cfl_predict_##bd##_8x8_##arch, /* 8x8 */ \ |
274 | 0 | cfl_predict_##bd##_16x16_##arch, /* 16x16 */ \ |
275 | 0 | cfl_predict_##bd##_32x32_##arch, /* 32x32 */ \ |
276 | 0 | NULL, /* 64x64 (invalid CFL size) */ \ |
277 | 0 | cfl_predict_##bd##_4x8_##arch, /* 4x8 */ \ |
278 | 0 | cfl_predict_##bd##_8x4_##arch, /* 8x4 */ \ |
279 | 0 | cfl_predict_##bd##_8x16_##arch, /* 8x16 */ \ |
280 | 0 | cfl_predict_##bd##_16x8_##arch, /* 16x8 */ \ |
281 | 0 | cfl_predict_##bd##_16x32_##arch, /* 16x32 */ \ |
282 | 0 | cfl_predict_##bd##_32x16_##arch, /* 32x16 */ \ |
283 | 0 | NULL, /* 32x64 (invalid CFL size) */ \ |
284 | 0 | NULL, /* 64x32 (invalid CFL size) */ \ |
285 | 0 | cfl_predict_##bd##_4x16_##arch, /* 4x16 */ \ |
286 | 0 | cfl_predict_##bd##_16x4_##arch, /* 16x4 */ \ |
287 | 0 | cfl_predict_##bd##_8x32_##arch, /* 8x32 */ \ |
288 | 0 | cfl_predict_##bd##_32x8_##arch, /* 32x8 */ \ |
289 | 0 | NULL, /* 16x64 (invalid CFL size) */ \ |
290 | 0 | NULL, /* 64x16 (invalid CFL size) */ \ |
291 | 0 | }; \ |
292 | 0 | /* Modulo TX_SIZES_ALL to ensure that an attacker won't be able to */ \ |
293 | 0 | /* index the function pointer array out of bounds. */ \ |
294 | 0 | return pred[tx_size % TX_SIZES_ALL]; \ |
295 | 0 | } Unexecuted instantiation: cfl_get_predict_lbd_fn_c Unexecuted instantiation: cfl_get_predict_hbd_fn_c Unexecuted instantiation: cfl_get_predict_lbd_fn_ssse3 Unexecuted instantiation: cfl_get_predict_hbd_fn_ssse3 |
296 | | |
297 | | #endif // AOM_AV1_COMMON_CFL_H_ |