/src/ffmpeg/libavcodec/vvc/dsp_template.c
Line | Count | Source |
1 | | /* |
2 | | * VVC transform and residual DSP |
3 | | * |
4 | | * Copyright (C) 2021 Nuo Mi |
5 | | * |
6 | | * This file is part of FFmpeg. |
7 | | * |
8 | | * FFmpeg is free software; you can redistribute it and/or |
9 | | * modify it under the terms of the GNU Lesser General Public |
10 | | * License as published by the Free Software Foundation; either |
11 | | * version 2.1 of the License, or (at your option) any later version. |
12 | | * |
13 | | * FFmpeg is distributed in the hope that it will be useful, |
14 | | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
15 | | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
16 | | * Lesser General Public License for more details. |
17 | | * |
18 | | * You should have received a copy of the GNU Lesser General Public |
19 | | * License along with FFmpeg; if not, write to the Free Software |
20 | | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
21 | | */ |
22 | | #include "libavutil/frame.h" |
23 | | #include "libavcodec/bit_depth_template.c" |
24 | | |
25 | | #include "dec.h" |
26 | | #include "data.h" |
27 | | |
28 | | #include "inter_template.c" |
29 | | #include "intra_template.c" |
30 | | #include "filter_template.c" |
31 | | |
32 | | static void FUNC(add_residual)(uint8_t *_dst, const int *res, |
33 | | const int w, const int h, const ptrdiff_t _stride) |
34 | 121M | { |
35 | 121M | pixel *dst = (pixel *)_dst; |
36 | | |
37 | 121M | const int stride = _stride / sizeof(pixel); |
38 | | |
39 | 866M | for (int y = 0; y < h; y++) { |
40 | 8.41G | for (int x = 0; x < w; x++) { |
41 | 7.66G | dst[x] = av_clip_pixel(dst[x] + *res); |
42 | 7.66G | res++; |
43 | 7.66G | } |
44 | 745M | dst += stride; |
45 | 745M | } |
46 | 121M | } Line | Count | Source | 34 | 30.9M | { | 35 | 30.9M | pixel *dst = (pixel *)_dst; | 36 | | | 37 | 30.9M | const int stride = _stride / sizeof(pixel); | 38 | | | 39 | 221M | for (int y = 0; y < h; y++) { | 40 | 1.93G | for (int x = 0; x < w; x++) { | 41 | 1.74G | dst[x] = av_clip_pixel(dst[x] + *res); | 42 | 1.74G | res++; | 43 | 1.74G | } | 44 | 190M | dst += stride; | 45 | 190M | } | 46 | 30.9M | } |
Line | Count | Source | 34 | 57.1M | { | 35 | 57.1M | pixel *dst = (pixel *)_dst; | 36 | | | 37 | 57.1M | const int stride = _stride / sizeof(pixel); | 38 | | | 39 | 404M | for (int y = 0; y < h; y++) { | 40 | 4.36G | for (int x = 0; x < w; x++) { | 41 | 4.01G | dst[x] = av_clip_pixel(dst[x] + *res); | 42 | 4.01G | res++; | 43 | 4.01G | } | 44 | 347M | dst += stride; | 45 | 347M | } | 46 | 57.1M | } |
Line | Count | Source | 34 | 33.2M | { | 35 | 33.2M | pixel *dst = (pixel *)_dst; | 36 | | | 37 | 33.2M | const int stride = _stride / sizeof(pixel); | 38 | | | 39 | 240M | for (int y = 0; y < h; y++) { | 40 | 2.11G | for (int x = 0; x < w; x++) { | 41 | 1.90G | dst[x] = av_clip_pixel(dst[x] + *res); | 42 | 1.90G | res++; | 43 | 1.90G | } | 44 | 206M | dst += stride; | 45 | 206M | } | 46 | 33.2M | } |
|
47 | | |
48 | | static void FUNC(pred_residual_joint)(int *dst, const int *src, const int w, const int h, |
49 | | const int c_sign, const int shift) |
50 | 318k | { |
51 | 318k | const int size = w * h; |
52 | 47.7M | for (int i = 0; i < size; i++) |
53 | 47.3M | dst[i] = (src[i] * c_sign) >> shift; |
54 | 318k | } dsp.c:pred_residual_joint_12 Line | Count | Source | 50 | 15.5k | { | 51 | 15.5k | const int size = w * h; | 52 | 424k | for (int i = 0; i < size; i++) | 53 | 408k | dst[i] = (src[i] * c_sign) >> shift; | 54 | 15.5k | } |
dsp.c:pred_residual_joint_10 Line | Count | Source | 50 | 299k | { | 51 | 299k | const int size = w * h; | 52 | 47.2M | for (int i = 0; i < size; i++) | 53 | 46.9M | dst[i] = (src[i] * c_sign) >> shift; | 54 | 299k | } |
dsp.c:pred_residual_joint_8 Line | Count | Source | 50 | 4.06k | { | 51 | 4.06k | const int size = w * h; | 52 | 70.3k | for (int i = 0; i < size; i++) | 53 | 66.2k | dst[i] = (src[i] * c_sign) >> shift; | 54 | 4.06k | } |
|
55 | | |
56 | | static void FUNC(transform_bdpcm)(int *coeffs, const int width, const int height, |
57 | | const int vertical, const int log2_transform_range) |
58 | 14.5k | { |
59 | 14.5k | int x, y; |
60 | | |
61 | 14.5k | if (vertical) { |
62 | 7.98k | coeffs += width; |
63 | 36.4k | for (y = 0; y < height - 1; y++) { |
64 | 173k | for (x = 0; x < width; x++) |
65 | 145k | coeffs[x] = av_clip_intp2(coeffs[x] + coeffs[x - width], log2_transform_range); |
66 | 28.4k | coeffs += width; |
67 | 28.4k | } |
68 | 7.98k | } else { |
69 | 33.0k | for (y = 0; y < height; y++) { |
70 | 105k | for (x = 1; x < width; x++) |
71 | 79.3k | coeffs[x] = av_clip_intp2(coeffs[x] + coeffs[x - 1], log2_transform_range); |
72 | 26.4k | coeffs += width; |
73 | 26.4k | } |
74 | 6.61k | } |
75 | 14.5k | } Line | Count | Source | 58 | 6.44k | { | 59 | 6.44k | int x, y; | 60 | | | 61 | 6.44k | if (vertical) { | 62 | 4.89k | coeffs += width; | 63 | 19.5k | for (y = 0; y < height - 1; y++) { | 64 | 73.4k | for (x = 0; x < width; x++) | 65 | 58.7k | coeffs[x] = av_clip_intp2(coeffs[x] + coeffs[x - width], log2_transform_range); | 66 | 14.6k | coeffs += width; | 67 | 14.6k | } | 68 | 4.89k | } else { | 69 | 7.79k | for (y = 0; y < height; y++) { | 70 | 24.9k | for (x = 1; x < width; x++) | 71 | 18.6k | coeffs[x] = av_clip_intp2(coeffs[x] + coeffs[x - 1], log2_transform_range); | 72 | 6.23k | coeffs += width; | 73 | 6.23k | } | 74 | 1.55k | } | 75 | 6.44k | } |
Line | Count | Source | 58 | 3.84k | { | 59 | 3.84k | int x, y; | 60 | | | 61 | 3.84k | if (vertical) { | 62 | 1.86k | coeffs += width; | 63 | 11.9k | for (y = 0; y < height - 1; y++) { | 64 | 81.8k | for (x = 0; x < width; x++) | 65 | 71.7k | coeffs[x] = av_clip_intp2(coeffs[x] + coeffs[x - width], log2_transform_range); | 66 | 10.0k | coeffs += width; | 67 | 10.0k | } | 68 | 1.97k | } else { | 69 | 9.89k | for (y = 0; y < height; y++) { | 70 | 31.6k | for (x = 1; x < width; x++) | 71 | 23.7k | coeffs[x] = av_clip_intp2(coeffs[x] + coeffs[x - 1], log2_transform_range); | 72 | 7.91k | coeffs += width; | 73 | 7.91k | } | 74 | 1.97k | } | 75 | 3.84k | } |
Line | Count | Source | 58 | 4.30k | { | 59 | 4.30k | int x, y; | 60 | | | 61 | 4.30k | if (vertical) { | 62 | 1.22k | coeffs += width; | 63 | 4.92k | for (y = 0; y < height - 1; y++) { | 64 | 18.5k | for (x = 0; x < width; x++) | 65 | 14.8k | coeffs[x] = av_clip_intp2(coeffs[x] + coeffs[x - width], log2_transform_range); | 66 | 3.70k | coeffs += width; | 67 | 3.70k | } | 68 | 3.07k | } else { | 69 | 15.3k | for (y = 0; y < height; y++) { | 70 | 49.2k | for (x = 1; x < width; x++) | 71 | 36.9k | coeffs[x] = av_clip_intp2(coeffs[x] + coeffs[x - 1], log2_transform_range); | 72 | 12.3k | coeffs += width; | 73 | 12.3k | } | 74 | 3.07k | } | 75 | 4.30k | } |
|
76 | | |
77 | | // 8.7.4.6 Residual modification process for blocks using colour space conversion |
78 | | static void FUNC(adaptive_color_transform)(int *y, int *u, int *v, const int width, const int height) |
79 | 12.2k | { |
80 | 12.2k | const int size = width * height; |
81 | 12.2k | const int bits = BIT_DEPTH + 1; |
82 | | |
83 | 408k | for (int i = 0; i < size; i++) { |
84 | 395k | const int y0 = av_clip_intp2(y[i], bits); |
85 | 395k | const int cg = av_clip_intp2(u[i], bits); |
86 | 395k | const int co = av_clip_intp2(v[i], bits); |
87 | 395k | const int t = y0 - (cg >> 1); |
88 | | |
89 | 395k | y[i] = cg + t; |
90 | 395k | u[i] = t - (co >> 1); |
91 | 395k | v[i] = co + u[i]; |
92 | 395k | } |
93 | 12.2k | } dsp.c:adaptive_color_transform_12 Line | Count | Source | 79 | 12.1k | { | 80 | 12.1k | const int size = width * height; | 81 | 12.1k | const int bits = BIT_DEPTH + 1; | 82 | | | 83 | 399k | for (int i = 0; i < size; i++) { | 84 | 387k | const int y0 = av_clip_intp2(y[i], bits); | 85 | 387k | const int cg = av_clip_intp2(u[i], bits); | 86 | 387k | const int co = av_clip_intp2(v[i], bits); | 87 | 387k | const int t = y0 - (cg >> 1); | 88 | | | 89 | 387k | y[i] = cg + t; | 90 | 387k | u[i] = t - (co >> 1); | 91 | 387k | v[i] = co + u[i]; | 92 | 387k | } | 93 | 12.1k | } |
Unexecuted instantiation: dsp.c:adaptive_color_transform_10 dsp.c:adaptive_color_transform_8 Line | Count | Source | 79 | 137 | { | 80 | 137 | const int size = width * height; | 81 | 137 | const int bits = BIT_DEPTH + 1; | 82 | | | 83 | 8.58k | for (int i = 0; i < size; i++) { | 84 | 8.44k | const int y0 = av_clip_intp2(y[i], bits); | 85 | 8.44k | const int cg = av_clip_intp2(u[i], bits); | 86 | 8.44k | const int co = av_clip_intp2(v[i], bits); | 87 | 8.44k | const int t = y0 - (cg >> 1); | 88 | | | 89 | 8.44k | y[i] = cg + t; | 90 | 8.44k | u[i] = t - (co >> 1); | 91 | 8.44k | v[i] = co + u[i]; | 92 | 8.44k | } | 93 | 137 | } |
|
94 | | |
95 | | static void FUNC(ff_vvc_itx_dsp_init)(VVCItxDSPContext *const itx) |
96 | 2.20M | { |
97 | 2.20M | #define VVC_ITX(TYPE, type, s) \ |
98 | 30.9M | itx->itx[VVC_##TYPE][VVC_##TX_SIZE_##s] = ff_vvc_inv_##type##_##s; \ |
99 | 2.20M | |
100 | 2.20M | #define VVC_ITX_COMMON(TYPE, type) \ |
101 | 6.62M | VVC_ITX(TYPE, type, 4); \ |
102 | 6.62M | VVC_ITX(TYPE, type, 8); \ |
103 | 6.62M | VVC_ITX(TYPE, type, 16); \ |
104 | 6.62M | VVC_ITX(TYPE, type, 32); |
105 | | |
106 | 2.20M | itx->add_residual = FUNC(add_residual); |
107 | 2.20M | itx->pred_residual_joint = FUNC(pred_residual_joint); |
108 | 2.20M | itx->transform_bdpcm = FUNC(transform_bdpcm); |
109 | 2.20M | VVC_ITX(DCT2, dct2, 2) |
110 | 2.20M | VVC_ITX(DCT2, dct2, 64) |
111 | 2.20M | VVC_ITX_COMMON(DCT2, dct2) |
112 | 2.20M | VVC_ITX_COMMON(DCT8, dct8) |
113 | 2.20M | VVC_ITX_COMMON(DST7, dst7) |
114 | | |
115 | 2.20M | itx->adaptive_color_transform = FUNC(adaptive_color_transform); |
116 | | |
117 | 2.20M | #undef VVC_ITX |
118 | 2.20M | #undef VVC_ITX_COMMON |
119 | 2.20M | } dsp.c:ff_vvc_itx_dsp_init_12 Line | Count | Source | 96 | 676k | { | 97 | 676k | #define VVC_ITX(TYPE, type, s) \ | 98 | 676k | itx->itx[VVC_##TYPE][VVC_##TX_SIZE_##s] = ff_vvc_inv_##type##_##s; \ | 99 | 676k | | 100 | 676k | #define VVC_ITX_COMMON(TYPE, type) \ | 101 | 676k | VVC_ITX(TYPE, type, 4); \ | 102 | 676k | VVC_ITX(TYPE, type, 8); \ | 103 | 676k | VVC_ITX(TYPE, type, 16); \ | 104 | 676k | VVC_ITX(TYPE, type, 32); | 105 | | | 106 | 676k | itx->add_residual = FUNC(add_residual); | 107 | 676k | itx->pred_residual_joint = FUNC(pred_residual_joint); | 108 | 676k | itx->transform_bdpcm = FUNC(transform_bdpcm); | 109 | 676k | VVC_ITX(DCT2, dct2, 2) | 110 | 676k | VVC_ITX(DCT2, dct2, 64) | 111 | 676k | VVC_ITX_COMMON(DCT2, dct2) | 112 | 676k | VVC_ITX_COMMON(DCT8, dct8) | 113 | 676k | VVC_ITX_COMMON(DST7, dst7) | 114 | | | 115 | 676k | itx->adaptive_color_transform = FUNC(adaptive_color_transform); | 116 | | | 117 | 676k | #undef VVC_ITX | 118 | 676k | #undef VVC_ITX_COMMON | 119 | 676k | } |
dsp.c:ff_vvc_itx_dsp_init_10 Line | Count | Source | 96 | 148k | { | 97 | 148k | #define VVC_ITX(TYPE, type, s) \ | 98 | 148k | itx->itx[VVC_##TYPE][VVC_##TX_SIZE_##s] = ff_vvc_inv_##type##_##s; \ | 99 | 148k | | 100 | 148k | #define VVC_ITX_COMMON(TYPE, type) \ | 101 | 148k | VVC_ITX(TYPE, type, 4); \ | 102 | 148k | VVC_ITX(TYPE, type, 8); \ | 103 | 148k | VVC_ITX(TYPE, type, 16); \ | 104 | 148k | VVC_ITX(TYPE, type, 32); | 105 | | | 106 | 148k | itx->add_residual = FUNC(add_residual); | 107 | 148k | itx->pred_residual_joint = FUNC(pred_residual_joint); | 108 | 148k | itx->transform_bdpcm = FUNC(transform_bdpcm); | 109 | 148k | VVC_ITX(DCT2, dct2, 2) | 110 | 148k | VVC_ITX(DCT2, dct2, 64) | 111 | 148k | VVC_ITX_COMMON(DCT2, dct2) | 112 | 148k | VVC_ITX_COMMON(DCT8, dct8) | 113 | 148k | VVC_ITX_COMMON(DST7, dst7) | 114 | | | 115 | 148k | itx->adaptive_color_transform = FUNC(adaptive_color_transform); | 116 | | | 117 | 148k | #undef VVC_ITX | 118 | 148k | #undef VVC_ITX_COMMON | 119 | 148k | } |
dsp.c:ff_vvc_itx_dsp_init_8 Line | Count | Source | 96 | 1.38M | { | 97 | 1.38M | #define VVC_ITX(TYPE, type, s) \ | 98 | 1.38M | itx->itx[VVC_##TYPE][VVC_##TX_SIZE_##s] = ff_vvc_inv_##type##_##s; \ | 99 | 1.38M | | 100 | 1.38M | #define VVC_ITX_COMMON(TYPE, type) \ | 101 | 1.38M | VVC_ITX(TYPE, type, 4); \ | 102 | 1.38M | VVC_ITX(TYPE, type, 8); \ | 103 | 1.38M | VVC_ITX(TYPE, type, 16); \ | 104 | 1.38M | VVC_ITX(TYPE, type, 32); | 105 | | | 106 | 1.38M | itx->add_residual = FUNC(add_residual); | 107 | 1.38M | itx->pred_residual_joint = FUNC(pred_residual_joint); | 108 | 1.38M | itx->transform_bdpcm = FUNC(transform_bdpcm); | 109 | 1.38M | VVC_ITX(DCT2, dct2, 2) | 110 | 1.38M | VVC_ITX(DCT2, dct2, 64) | 111 | 1.38M | VVC_ITX_COMMON(DCT2, dct2) | 112 | 1.38M | VVC_ITX_COMMON(DCT8, dct8) | 113 | 1.38M | VVC_ITX_COMMON(DST7, dst7) | 114 | | | 115 | 1.38M | itx->adaptive_color_transform = FUNC(adaptive_color_transform); | 116 | | | 117 | 1.38M | #undef VVC_ITX | 118 | 1.38M | #undef VVC_ITX_COMMON | 119 | 1.38M | } |
|