/src/ffmpeg/libavcodec/vvc/inter_template.c
Line | Count | Source |
1 | | /* |
2 | | * VVC inter prediction DSP |
3 | | * |
4 | | * Copyright (C) 2022 Nuo Mi |
5 | | * |
6 | | * This file is part of FFmpeg. |
7 | | * |
8 | | * FFmpeg is free software; you can redistribute it and/or |
9 | | * modify it under the terms of the GNU Lesser General Public |
10 | | * License as published by the Free Software Foundation; either |
11 | | * version 2.1 of the License, or (at your option) any later version. |
12 | | * |
13 | | * FFmpeg is distributed in the hope that it will be useful, |
14 | | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
15 | | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
16 | | * Lesser General Public License for more details. |
17 | | * |
18 | | * You should have received a copy of the GNU Lesser General Public |
19 | | * License along with FFmpeg; if not, write to the Free Software |
20 | | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
21 | | */ |
22 | | |
23 | | #include "libavcodec/h26x/h2656_inter_template.c" |
24 | | #include "libavutil/imgutils.h" |
25 | | |
26 | 0 | #define TMP_STRIDE EDGE_EMU_BUFFER_STRIDE |
27 | | static void av_always_inline FUNC(put_scaled)(uint8_t *_dst, const ptrdiff_t _dst_stride, |
28 | | const uint8_t *const _src, ptrdiff_t _src_stride, const int src_height, |
29 | | const int _x, const int _y, const int dx, const int dy, |
30 | | const int height, const int8_t *hf, const int8_t *vf, const int width, const int is_uni, const int is_chroma) |
31 | 0 | { |
32 | 0 | int16_t tmp_array[TMP_STRIDE * MAX_PB_SIZE]; |
33 | 0 | int16_t *tmp = tmp_array; |
34 | 0 | pixel *dst = (pixel*)_dst; |
35 | 0 | int16_t *dst16 = (int16_t*)_dst; |
36 | 0 | const ptrdiff_t dst_stride = _dst_stride / sizeof(pixel); |
37 | 0 | const ptrdiff_t src_stride = _src_stride / sizeof(pixel); |
38 | 0 | const int shift = FFMAX(2, 14 - BIT_DEPTH); |
39 | 0 | const int offset = 1 << (shift - 1); |
40 | 0 | const int taps = is_chroma ? VVC_INTER_CHROMA_TAPS : VVC_INTER_LUMA_TAPS; |
41 | 0 | const int extra = is_chroma ? CHROMA_EXTRA : LUMA_EXTRA; |
42 | 0 | const int extra_before = is_chroma ? CHROMA_EXTRA_BEFORE : LUMA_EXTRA_BEFORE; |
43 | 0 | const int shift1 = 6 - is_chroma; |
44 | 0 | const int shift2 = 4 + is_chroma; |
45 | 0 | const int x0 = SCALED_INT(_x); |
46 | 0 | const int y0 = SCALED_INT(_y); |
47 | |
|
48 | 0 | for (int i = 0; i < width; i++) { |
49 | 0 | const int tx = _x + dx * i; |
50 | 0 | const int x = SCALED_INT(tx) - x0; |
51 | 0 | const int mx = av_zero_extend(tx >> shift1, shift2); |
52 | 0 | const int8_t *filter = hf + mx * taps; |
53 | 0 | const pixel *src = (pixel*)_src - extra_before * src_stride; |
54 | |
|
55 | 0 | for (int j = 0; j < src_height + extra; j++) { |
56 | 0 | tmp[j] = (is_chroma ? CHROMA_FILTER(src, 1) : LUMA_FILTER(src, 1)) >> (BIT_DEPTH - 8); |
57 | 0 | src += src_stride; |
58 | 0 | } |
59 | 0 | tmp += TMP_STRIDE; |
60 | 0 | } |
61 | |
|
62 | 0 | for (int i = 0; i < height; i++) { |
63 | 0 | const int ty = _y + dy * i; |
64 | 0 | const int x = SCALED_INT(ty) - y0; |
65 | 0 | const int mx = av_zero_extend(ty >> shift1, shift2); |
66 | 0 | const int8_t *filter = vf + mx * taps; |
67 | |
|
68 | 0 | tmp = tmp_array + extra_before; |
69 | 0 | for (int j = 0; j < width; j++) { |
70 | 0 | const int val = (is_chroma ? CHROMA_FILTER(tmp, 1) : LUMA_FILTER(tmp, 1)) >> 6; |
71 | 0 | if (is_uni) |
72 | 0 | dst[j] = av_clip_pixel((val + offset) >> shift); |
73 | 0 | else |
74 | 0 | dst16[j] = val; |
75 | 0 | tmp += TMP_STRIDE; |
76 | 0 | } |
77 | 0 | if (is_uni) |
78 | 0 | dst += dst_stride; |
79 | 0 | else |
80 | 0 | dst16 += dst_stride; |
81 | 0 | } |
82 | 0 | } Unexecuted instantiation: dsp.c:put_scaled_12 Unexecuted instantiation: dsp.c:put_scaled_10 Unexecuted instantiation: dsp.c:put_scaled_8 |
83 | | |
84 | | static void FUNC(put_luma_scaled)(int16_t *_dst, |
85 | | const uint8_t *_src, ptrdiff_t _src_stride, const int src_height, |
86 | | const int x, const int y, const int dx, const int dy, |
87 | | const int height, const int8_t *hf, const int8_t *vf, const int width) |
88 | 0 | { |
89 | 0 | FUNC(put_scaled)((uint8_t *)_dst, MAX_PB_SIZE * sizeof(pixel), _src, _src_stride, src_height, x, y, dx, dy, height, hf, vf, width, 0, 0); |
90 | 0 | } Unexecuted instantiation: dsp.c:put_luma_scaled_12 Unexecuted instantiation: dsp.c:put_luma_scaled_10 Unexecuted instantiation: dsp.c:put_luma_scaled_8 |
91 | | |
92 | | static void FUNC(put_chroma_scaled)(int16_t *_dst, |
93 | | const uint8_t *_src, ptrdiff_t _src_stride, const int src_height, |
94 | | const int x, const int y, const int dx, const int dy, |
95 | | const int height, const int8_t *hf, const int8_t *vf, const int width) |
96 | 0 | { |
97 | 0 | FUNC(put_scaled)((uint8_t *)_dst, MAX_PB_SIZE * sizeof(pixel), _src, _src_stride, src_height, x, y, dx, dy, height, hf, vf, width, 0, 1); |
98 | 0 | } Unexecuted instantiation: dsp.c:put_chroma_scaled_12 Unexecuted instantiation: dsp.c:put_chroma_scaled_10 Unexecuted instantiation: dsp.c:put_chroma_scaled_8 |
99 | | |
100 | | static void FUNC(put_uni_luma_scaled)(uint8_t *_dst, const ptrdiff_t _dst_stride, |
101 | | const uint8_t *_src, ptrdiff_t _src_stride, const int src_height, |
102 | | const int x, const int y, const int dx, const int dy, |
103 | | const int height, const int8_t *hf, const int8_t *vf, const int width) |
104 | 0 | { |
105 | 0 | FUNC(put_scaled)(_dst, _dst_stride, _src, _src_stride, src_height, x, y, dx, dy, height, hf, vf, width, 1, 0); |
106 | 0 | } Unexecuted instantiation: dsp.c:put_uni_luma_scaled_12 Unexecuted instantiation: dsp.c:put_uni_luma_scaled_10 Unexecuted instantiation: dsp.c:put_uni_luma_scaled_8 |
107 | | |
108 | | static void FUNC(put_uni_chroma_scaled)(uint8_t *_dst, const ptrdiff_t _dst_stride, |
109 | | const uint8_t *_src, ptrdiff_t _src_stride, const int src_height, |
110 | | const int x, const int y, const int dx, const int dy, |
111 | | const int height, const int8_t *hf, const int8_t *vf, const int width) |
112 | 0 | { |
113 | 0 | FUNC(put_scaled)(_dst, _dst_stride, _src, _src_stride, src_height, x, y, dx, dy, height, hf, vf, width, 1, 1); |
114 | 0 | } Unexecuted instantiation: dsp.c:put_uni_chroma_scaled_12 Unexecuted instantiation: dsp.c:put_uni_chroma_scaled_10 Unexecuted instantiation: dsp.c:put_uni_chroma_scaled_8 |
115 | | |
116 | | static void av_always_inline FUNC(put_uni_w_scaled)(uint8_t *_dst, const ptrdiff_t _dst_stride, |
117 | | const uint8_t *const _src, ptrdiff_t _src_stride, const int src_height, |
118 | | const int _x, const int _y, const int dx, const int dy, const int denom, const int wx, const int _ox, |
119 | | const int height, const int8_t *hf, const int8_t *vf, const int width, const int is_chroma) |
120 | 0 | { |
121 | 0 | int16_t tmp_array[TMP_STRIDE * MAX_PB_SIZE]; |
122 | 0 | int16_t *tmp = tmp_array; |
123 | 0 | pixel *dst = (pixel*)_dst; |
124 | 0 | const ptrdiff_t dst_stride = _dst_stride / sizeof(pixel); |
125 | 0 | const ptrdiff_t src_stride = _src_stride / sizeof(pixel); |
126 | 0 | const int shift = FFMAX(2, 14 - BIT_DEPTH); |
127 | 0 | const int offset = 1 << (shift - 1); |
128 | 0 | const int ox = _ox * (1 << (BIT_DEPTH - 8)); |
129 | 0 | const int taps = is_chroma ? VVC_INTER_CHROMA_TAPS : VVC_INTER_LUMA_TAPS; |
130 | 0 | const int extra = is_chroma ? CHROMA_EXTRA : LUMA_EXTRA; |
131 | 0 | const int extra_before = is_chroma ? CHROMA_EXTRA_BEFORE : LUMA_EXTRA_BEFORE; |
132 | 0 | const int shift1 = 6 - is_chroma; |
133 | 0 | const int shift2 = 4 + is_chroma; |
134 | 0 | const int x0 = SCALED_INT(_x); |
135 | 0 | const int y0 = SCALED_INT(_y); |
136 | |
|
137 | 0 | for (int i = 0; i < width; i++) { |
138 | 0 | const int tx = _x + dx * i; |
139 | 0 | const int x = SCALED_INT(tx) - x0; |
140 | 0 | const int mx = av_zero_extend(tx >> shift1, shift2); |
141 | 0 | const int8_t *filter = hf + mx * taps; |
142 | 0 | const pixel *src = (pixel*)_src - extra_before * src_stride; |
143 | |
|
144 | 0 | for (int j = 0; j < src_height + extra; j++) { |
145 | 0 | tmp[j] = (is_chroma ? CHROMA_FILTER(src, 1) : LUMA_FILTER(src, 1)) >> (BIT_DEPTH - 8); |
146 | 0 | src += src_stride; |
147 | 0 | } |
148 | 0 | tmp += TMP_STRIDE; |
149 | 0 | } |
150 | |
|
151 | 0 | for (int i = 0; i < height; i++) { |
152 | 0 | const int ty = _y + dy * i; |
153 | 0 | const int x = SCALED_INT(ty) - y0; |
154 | 0 | const int mx = av_zero_extend(ty >> shift1, shift2); |
155 | 0 | const int8_t *filter = vf + mx * taps; |
156 | |
|
157 | 0 | tmp = tmp_array + extra_before; |
158 | 0 | for (int j = 0; j < width; j++) { |
159 | 0 | const int val = (is_chroma ? CHROMA_FILTER(tmp, 1) : LUMA_FILTER(tmp, 1)) >> 6; |
160 | 0 | dst[j] = av_clip_pixel(((wx * val + offset) >> shift) + ox); |
161 | 0 | tmp += TMP_STRIDE; |
162 | 0 | } |
163 | 0 | dst += dst_stride; |
164 | 0 | } |
165 | 0 | } Unexecuted instantiation: dsp.c:put_uni_w_scaled_12 Unexecuted instantiation: dsp.c:put_uni_w_scaled_10 Unexecuted instantiation: dsp.c:put_uni_w_scaled_8 |
166 | | |
167 | | static void FUNC(put_uni_luma_w_scaled)(uint8_t *_dst, const ptrdiff_t _dst_stride, |
168 | | const uint8_t *_src, ptrdiff_t _src_stride, const int src_height, |
169 | | const int x, const int y, const int dx, const int dy, const int denom, const int wx, const int ox, |
170 | | const int height, const int8_t *hf, const int8_t *vf, const int width) |
171 | 0 | { |
172 | 0 | FUNC(put_uni_w_scaled)(_dst, _dst_stride, _src, _src_stride, src_height, x, y, dx, dy, denom, wx, ox, height, hf, vf, width, 0); |
173 | 0 | } Unexecuted instantiation: dsp.c:put_uni_luma_w_scaled_12 Unexecuted instantiation: dsp.c:put_uni_luma_w_scaled_10 Unexecuted instantiation: dsp.c:put_uni_luma_w_scaled_8 |
174 | | |
175 | | static void FUNC(put_uni_chroma_w_scaled)(uint8_t *_dst, const ptrdiff_t _dst_stride, |
176 | | const uint8_t *_src, ptrdiff_t _src_stride, const int src_height, |
177 | | const int x, const int y, const int dx, const int dy, const int denom, const int wx, const int ox, |
178 | | const int height, const int8_t *hf, const int8_t *vf, const int width) |
179 | 0 | { |
180 | 0 | FUNC(put_uni_w_scaled)(_dst, _dst_stride, _src, _src_stride, src_height, x, y, dx, dy, denom, wx, ox, height, hf, vf, width, 1); |
181 | 0 | } Unexecuted instantiation: dsp.c:put_uni_chroma_w_scaled_12 Unexecuted instantiation: dsp.c:put_uni_chroma_w_scaled_10 Unexecuted instantiation: dsp.c:put_uni_chroma_w_scaled_8 |
182 | | |
183 | | #undef TMP_STRIDE |
184 | | |
185 | | static void FUNC(avg)(uint8_t *_dst, const ptrdiff_t _dst_stride, |
186 | | const int16_t *src0, const int16_t *src1, const int width, const int height) |
187 | 0 | { |
188 | 0 | pixel *dst = (pixel*)_dst; |
189 | 0 | const ptrdiff_t dst_stride = _dst_stride / sizeof(pixel); |
190 | 0 | const int shift = FFMAX(3, 15 - BIT_DEPTH); |
191 | 0 | const int offset = 1 << (shift - 1); |
192 | |
|
193 | 0 | for (int y = 0; y < height; y++) { |
194 | 0 | for (int x = 0; x < width; x++) |
195 | 0 | dst[x] = av_clip_pixel((src0[x] + src1[x] + offset) >> shift); |
196 | 0 | src0 += MAX_PB_SIZE; |
197 | 0 | src1 += MAX_PB_SIZE; |
198 | 0 | dst += dst_stride; |
199 | 0 | } |
200 | 0 | } Unexecuted instantiation: dsp.c:avg_12 Unexecuted instantiation: dsp.c:avg_10 Unexecuted instantiation: dsp.c:avg_8 |
201 | | |
202 | | static void FUNC(w_avg)(uint8_t *_dst, const ptrdiff_t _dst_stride, |
203 | | const int16_t *src0, const int16_t *src1, const int width, const int height, |
204 | | const int denom, const int w0, const int w1, const int o0, const int o1) |
205 | 0 | { |
206 | 0 | pixel *dst = (pixel*)_dst; |
207 | 0 | const ptrdiff_t dst_stride = _dst_stride / sizeof(pixel); |
208 | 0 | const int shift = denom + FFMAX(3, 15 - BIT_DEPTH); |
209 | 0 | const int offset = ((o0 + o1) * (1 << (BIT_DEPTH - 8)) + 1) * (1 << (shift - 1)); |
210 | |
|
211 | 0 | for (int y = 0; y < height; y++) { |
212 | 0 | for (int x = 0; x < width; x++) |
213 | 0 | dst[x] = av_clip_pixel((src0[x] * w0 + src1[x] * w1 + offset) >> shift); |
214 | 0 | src0 += MAX_PB_SIZE; |
215 | 0 | src1 += MAX_PB_SIZE; |
216 | 0 | dst += dst_stride; |
217 | 0 | } |
218 | 0 | } Unexecuted instantiation: dsp.c:w_avg_12 Unexecuted instantiation: dsp.c:w_avg_10 Unexecuted instantiation: dsp.c:w_avg_8 |
219 | | |
220 | | static void FUNC(put_ciip)(uint8_t *_dst, const ptrdiff_t _dst_stride, |
221 | | const int width, const int height, |
222 | | const uint8_t *_inter, const ptrdiff_t _inter_stride, const int intra_weight) |
223 | 0 | { |
224 | 0 | pixel *dst = (pixel *)_dst; |
225 | 0 | pixel *inter = (pixel *)_inter; |
226 | 0 | const size_t dst_stride = _dst_stride / sizeof(pixel); |
227 | 0 | const size_t inter_stride = _inter_stride / sizeof(pixel); |
228 | 0 | const int inter_weight = 4 - intra_weight; |
229 | |
|
230 | 0 | for (int y = 0; y < height; y++) { |
231 | 0 | for (int x = 0; x < width; x++) |
232 | 0 | dst[x] = (dst[x] * intra_weight + inter[x] * inter_weight + 2) >> 2; |
233 | 0 | dst += dst_stride; |
234 | 0 | inter += inter_stride; |
235 | 0 | } |
236 | 0 | } Unexecuted instantiation: dsp.c:put_ciip_12 Unexecuted instantiation: dsp.c:put_ciip_10 Unexecuted instantiation: dsp.c:put_ciip_8 |
237 | | |
238 | | static void FUNC(put_gpm)(uint8_t *_dst, ptrdiff_t dst_stride, |
239 | | const int width, const int height, |
240 | | const int16_t *src0, const int16_t *src1, |
241 | | const uint8_t *weights, const int step_x, const int step_y) |
242 | 3.21k | { |
243 | 3.21k | const int shift = FFMAX(5, 17 - BIT_DEPTH); |
244 | 3.21k | const int offset = 1 << (shift - 1); |
245 | 3.21k | pixel *dst = (pixel *)_dst; |
246 | | |
247 | 3.21k | dst_stride /= sizeof(pixel); |
248 | 20.3k | for (int y = 0; y < height; y++) { |
249 | 120k | for (int x = 0; x < width; x++) { |
250 | 103k | const uint8_t w = weights[x * step_x]; |
251 | 103k | dst[x] = av_clip_pixel((src0[x] * w + src1[x] * (8 - w) + offset) >> shift); |
252 | 103k | } |
253 | 17.1k | dst += dst_stride; |
254 | 17.1k | src0 += MAX_PB_SIZE; |
255 | 17.1k | src1 += MAX_PB_SIZE; |
256 | 17.1k | weights += step_y; |
257 | 17.1k | } |
258 | 3.21k | } Unexecuted instantiation: dsp.c:put_gpm_12 Unexecuted instantiation: dsp.c:put_gpm_10 Line | Count | Source | 242 | 3.21k | { | 243 | 3.21k | const int shift = FFMAX(5, 17 - BIT_DEPTH); | 244 | 3.21k | const int offset = 1 << (shift - 1); | 245 | 3.21k | pixel *dst = (pixel *)_dst; | 246 | | | 247 | 3.21k | dst_stride /= sizeof(pixel); | 248 | 20.3k | for (int y = 0; y < height; y++) { | 249 | 120k | for (int x = 0; x < width; x++) { | 250 | 103k | const uint8_t w = weights[x * step_x]; | 251 | 103k | dst[x] = av_clip_pixel((src0[x] * w + src1[x] * (8 - w) + offset) >> shift); | 252 | 103k | } | 253 | 17.1k | dst += dst_stride; | 254 | 17.1k | src0 += MAX_PB_SIZE; | 255 | 17.1k | src1 += MAX_PB_SIZE; | 256 | 17.1k | weights += step_y; | 257 | 17.1k | } | 258 | 3.21k | } |
|
259 | | |
260 | | //8.5.6.3.3 Luma integer sample fetching process, add one extra pad line |
261 | | static void FUNC(bdof_fetch_samples)(int16_t *_dst, const uint8_t *_src, const ptrdiff_t _src_stride, |
262 | | const int x_frac, const int y_frac, const int width, const int height) |
263 | 0 | { |
264 | 0 | const int x_off = (x_frac >> 3) - 1; |
265 | 0 | const int y_off = (y_frac >> 3) - 1; |
266 | 0 | const ptrdiff_t src_stride = _src_stride / sizeof(pixel); |
267 | 0 | const pixel *src = (pixel*)_src + (x_off) + y_off * src_stride; |
268 | 0 | int16_t *dst = _dst - 1 - MAX_PB_SIZE; |
269 | 0 | const int shift = 14 - BIT_DEPTH; |
270 | 0 | const int bdof_width = width + 2 * BDOF_BORDER_EXT; |
271 | | |
272 | | // top |
273 | 0 | for (int i = 0; i < bdof_width; i++) |
274 | 0 | dst[i] = src[i] << shift; |
275 | |
|
276 | 0 | dst += MAX_PB_SIZE; |
277 | 0 | src += src_stride; |
278 | |
|
279 | 0 | for (int i = 0; i < height; i++) { |
280 | 0 | dst[0] = src[0] << shift; |
281 | 0 | dst[1 + width] = src[1 + width] << shift; |
282 | 0 | dst += MAX_PB_SIZE; |
283 | 0 | src += src_stride; |
284 | 0 | } |
285 | 0 | for (int i = 0; i < bdof_width; i++) |
286 | 0 | dst[i] = src[i] << shift; |
287 | 0 | } Unexecuted instantiation: dsp.c:bdof_fetch_samples_12 Unexecuted instantiation: dsp.c:bdof_fetch_samples_10 Unexecuted instantiation: dsp.c:bdof_fetch_samples_8 |
288 | | |
289 | | //8.5.6.3.3 Luma integer sample fetching process |
290 | | static void FUNC(fetch_samples)(int16_t *_dst, const uint8_t *_src, const ptrdiff_t _src_stride, const int x_frac, const int y_frac) |
291 | 0 | { |
292 | 0 | FUNC(bdof_fetch_samples)(_dst, _src, _src_stride, x_frac, y_frac, AFFINE_MIN_BLOCK_SIZE, AFFINE_MIN_BLOCK_SIZE); |
293 | 0 | } Unexecuted instantiation: dsp.c:fetch_samples_12 Unexecuted instantiation: dsp.c:fetch_samples_10 Unexecuted instantiation: dsp.c:fetch_samples_8 |
294 | | |
295 | | static void FUNC(prof_grad_filter)(int16_t *gradient_h, int16_t *gradient_v, const ptrdiff_t gradient_stride, |
296 | | const int16_t *_src, const ptrdiff_t src_stride, const int width, const int height) |
297 | 0 | { |
298 | 0 | const int shift = 6; |
299 | 0 | const int16_t *src = _src; |
300 | |
|
301 | 0 | for (int y = 0; y < height; y++) { |
302 | 0 | const int16_t *p = src; |
303 | 0 | for (int x = 0; x < width; x++) { |
304 | 0 | gradient_h[x] = (p[1] >> shift) - (p[-1] >> shift); |
305 | 0 | gradient_v[x] = (p[src_stride] >> shift) - (p[-src_stride] >> shift); |
306 | 0 | p++; |
307 | 0 | } |
308 | 0 | gradient_h += gradient_stride; |
309 | 0 | gradient_v += gradient_stride; |
310 | 0 | src += src_stride; |
311 | 0 | } |
312 | 0 | } Unexecuted instantiation: dsp.c:prof_grad_filter_12 Unexecuted instantiation: dsp.c:prof_grad_filter_10 Unexecuted instantiation: dsp.c:prof_grad_filter_8 |
313 | | |
314 | | static void FUNC(apply_prof)(int16_t *dst, const int16_t *src, const int16_t *diff_mv_x, const int16_t *diff_mv_y) |
315 | 0 | { |
316 | 0 | const int limit = (1 << FFMAX(13, BIT_DEPTH + 1)); ///< dILimit |
317 | |
|
318 | 0 | int16_t gradient_h[AFFINE_MIN_BLOCK_SIZE * AFFINE_MIN_BLOCK_SIZE]; |
319 | 0 | int16_t gradient_v[AFFINE_MIN_BLOCK_SIZE * AFFINE_MIN_BLOCK_SIZE]; |
320 | 0 | FUNC(prof_grad_filter)(gradient_h, gradient_v, AFFINE_MIN_BLOCK_SIZE, src, MAX_PB_SIZE, AFFINE_MIN_BLOCK_SIZE, AFFINE_MIN_BLOCK_SIZE); |
321 | |
|
322 | 0 | for (int y = 0; y < AFFINE_MIN_BLOCK_SIZE; y++) { |
323 | 0 | for (int x = 0; x < AFFINE_MIN_BLOCK_SIZE; x++) { |
324 | 0 | const int o = y * AFFINE_MIN_BLOCK_SIZE + x; |
325 | 0 | const int di = gradient_h[o] * diff_mv_x[o] + gradient_v[o] * diff_mv_y[o]; |
326 | 0 | const int val = src[x] + av_clip(di, -limit, limit - 1); |
327 | 0 | dst[x] = val; |
328 | |
|
329 | 0 | } |
330 | 0 | src += MAX_PB_SIZE; |
331 | 0 | dst += MAX_PB_SIZE; |
332 | 0 | } |
333 | 0 | } Unexecuted instantiation: dsp.c:apply_prof_12 Unexecuted instantiation: dsp.c:apply_prof_10 Unexecuted instantiation: dsp.c:apply_prof_8 |
334 | | |
335 | | static void FUNC(apply_prof_uni)(uint8_t *_dst, const ptrdiff_t _dst_stride, const int16_t *src, const int16_t *diff_mv_x, const int16_t *diff_mv_y) |
336 | 0 | { |
337 | 0 | const int limit = (1 << FFMAX(13, BIT_DEPTH + 1)); ///< dILimit |
338 | 0 | pixel *dst = (pixel*)_dst; |
339 | 0 | const ptrdiff_t dst_stride = _dst_stride / sizeof(pixel); |
340 | 0 | const int shift = 14 - BIT_DEPTH; |
341 | 0 | #if BIT_DEPTH < 14 |
342 | 0 | const int offset = 1 << (shift - 1); |
343 | | #else |
344 | | const int offset = 0; |
345 | | #endif |
346 | 0 | int16_t gradient_h[AFFINE_MIN_BLOCK_SIZE * AFFINE_MIN_BLOCK_SIZE]; |
347 | 0 | int16_t gradient_v[AFFINE_MIN_BLOCK_SIZE * AFFINE_MIN_BLOCK_SIZE]; |
348 | |
|
349 | 0 | FUNC(prof_grad_filter)(gradient_h, gradient_v, AFFINE_MIN_BLOCK_SIZE, src, MAX_PB_SIZE, AFFINE_MIN_BLOCK_SIZE, AFFINE_MIN_BLOCK_SIZE); |
350 | |
|
351 | 0 | for (int y = 0; y < AFFINE_MIN_BLOCK_SIZE; y++) { |
352 | 0 | for (int x = 0; x < AFFINE_MIN_BLOCK_SIZE; x++) { |
353 | 0 | const int o = y * AFFINE_MIN_BLOCK_SIZE + x; |
354 | 0 | const int di = gradient_h[o] * diff_mv_x[o] + gradient_v[o] * diff_mv_y[o]; |
355 | 0 | const int val = src[x] + av_clip(di, -limit, limit - 1); |
356 | 0 | dst[x] = av_clip_pixel((val + offset) >> shift); |
357 | |
|
358 | 0 | } |
359 | 0 | src += MAX_PB_SIZE; |
360 | 0 | dst += dst_stride; |
361 | 0 | } |
362 | 0 | } Unexecuted instantiation: dsp.c:apply_prof_uni_12 Unexecuted instantiation: dsp.c:apply_prof_uni_10 Unexecuted instantiation: dsp.c:apply_prof_uni_8 |
363 | | |
364 | | static void FUNC(apply_prof_uni_w)(uint8_t *_dst, const ptrdiff_t _dst_stride, |
365 | | const int16_t *src, const int16_t *diff_mv_x, const int16_t *diff_mv_y, |
366 | | const int denom, const int wx, const int _ox) |
367 | 0 | { |
368 | 0 | const int limit = (1 << FFMAX(13, BIT_DEPTH + 1)); ///< dILimit |
369 | 0 | pixel *dst = (pixel*)_dst; |
370 | 0 | const ptrdiff_t dst_stride = _dst_stride / sizeof(pixel); |
371 | 0 | const int shift = denom + FFMAX(2, 14 - BIT_DEPTH); |
372 | 0 | const int offset = 1 << (shift - 1); |
373 | 0 | const int ox = _ox * (1 << (BIT_DEPTH - 8)); |
374 | 0 | int16_t gradient_h[AFFINE_MIN_BLOCK_SIZE * AFFINE_MIN_BLOCK_SIZE]; |
375 | 0 | int16_t gradient_v[AFFINE_MIN_BLOCK_SIZE * AFFINE_MIN_BLOCK_SIZE]; |
376 | |
|
377 | 0 | FUNC(prof_grad_filter)(gradient_h, gradient_v, AFFINE_MIN_BLOCK_SIZE, src, MAX_PB_SIZE, AFFINE_MIN_BLOCK_SIZE, AFFINE_MIN_BLOCK_SIZE); |
378 | |
|
379 | 0 | for (int y = 0; y < AFFINE_MIN_BLOCK_SIZE; y++) { |
380 | 0 | for (int x = 0; x < AFFINE_MIN_BLOCK_SIZE; x++) { |
381 | 0 | const int o = y * AFFINE_MIN_BLOCK_SIZE + x; |
382 | 0 | const int di = gradient_h[o] * diff_mv_x[o] + gradient_v[o] * diff_mv_y[o]; |
383 | 0 | const int val = src[x] + av_clip(di, -limit, limit - 1); |
384 | 0 | dst[x] = av_clip_pixel(((val * wx + offset) >> shift) + ox); |
385 | 0 | } |
386 | 0 | src += MAX_PB_SIZE; |
387 | 0 | dst += dst_stride; |
388 | 0 | } |
389 | 0 | } Unexecuted instantiation: dsp.c:apply_prof_uni_w_12 Unexecuted instantiation: dsp.c:apply_prof_uni_w_10 Unexecuted instantiation: dsp.c:apply_prof_uni_w_8 |
390 | | |
391 | | static void FUNC(derive_bdof_vx_vy)(const int16_t *_src0, const int16_t *_src1, |
392 | | const int pad_left, const int pad_top, const int pad_right, const int pad_bottom, |
393 | | const int16_t **gradient_h, const int16_t **gradient_v, |
394 | | int* vx, int* vy) |
395 | 0 | { |
396 | 0 | const int shift2 = 4; |
397 | 0 | const int shift3 = 1; |
398 | 0 | const int thres = 1 << 4; |
399 | 0 | int sgx2 = 0, sgy2 = 0, sgxgy = 0, sgxdi = 0, sgydi = 0; |
400 | |
|
401 | 0 | for (int y = -1; y < BDOF_MIN_BLOCK_SIZE + 1; y++) { |
402 | 0 | const int dy = y + (pad_top && y < 0) - (pad_bottom && y == BDOF_MIN_BLOCK_SIZE); // we pad for the first and last row |
403 | 0 | const int16_t *src0 = _src0 + dy * MAX_PB_SIZE; |
404 | 0 | const int16_t *src1 = _src1 + dy * MAX_PB_SIZE; |
405 | |
|
406 | 0 | for (int x = -1; x < BDOF_MIN_BLOCK_SIZE + 1; x++) { |
407 | 0 | const int dx = x + (pad_left && x < 0) - (pad_right && x == BDOF_MIN_BLOCK_SIZE); // we pad for the first and last col |
408 | 0 | const int diff = (src0[dx] >> shift2) - (src1[dx] >> shift2); |
409 | 0 | const int idx = BDOF_BLOCK_SIZE * dy + dx; |
410 | 0 | const int temph = (gradient_h[0][idx] + gradient_h[1][idx]) >> shift3; |
411 | 0 | const int tempv = (gradient_v[0][idx] + gradient_v[1][idx]) >> shift3; |
412 | |
|
413 | 0 | sgx2 += FFABS(temph); |
414 | 0 | sgy2 += FFABS(tempv); |
415 | 0 | sgxgy += VVC_SIGN(tempv) * temph; |
416 | 0 | sgxdi += -VVC_SIGN(temph) * diff; |
417 | 0 | sgydi += -VVC_SIGN(tempv) * diff; |
418 | 0 | } |
419 | 0 | } |
420 | 0 | *vx = sgx2 > 0 ? av_clip((sgxdi * (1 << 2)) >> av_log2(sgx2) , -thres + 1, thres - 1) : 0; |
421 | 0 | *vy = sgy2 > 0 ? av_clip(((sgydi * (1 << 2)) - ((*vx * sgxgy) >> 1)) >> av_log2(sgy2), -thres + 1, thres - 1) : 0; |
422 | 0 | } Unexecuted instantiation: dsp.c:derive_bdof_vx_vy_12 Unexecuted instantiation: dsp.c:derive_bdof_vx_vy_10 Unexecuted instantiation: dsp.c:derive_bdof_vx_vy_8 |
423 | | |
424 | | static void FUNC(apply_bdof_min_block)(pixel* dst, const ptrdiff_t dst_stride, const int16_t *src0, const int16_t *src1, |
425 | | const int16_t **gh, const int16_t **gv, const int vx, const int vy) |
426 | 0 | { |
427 | 0 | const int shift4 = 15 - BIT_DEPTH; |
428 | 0 | const int offset4 = 1 << (shift4 - 1); |
429 | |
|
430 | 0 | for (int y = 0; y < BDOF_MIN_BLOCK_SIZE; y++) { |
431 | 0 | for (int x = 0; x < BDOF_MIN_BLOCK_SIZE; x++) { |
432 | 0 | const int idx = y * BDOF_BLOCK_SIZE + x; |
433 | 0 | const int bdof_offset = vx * (gh[0][idx] - gh[1][idx]) + vy * (gv[0][idx] - gv[1][idx]); |
434 | 0 | dst[x] = av_clip_pixel((src0[x] + offset4 + src1[x] + bdof_offset) >> shift4); |
435 | 0 | } |
436 | 0 | dst += dst_stride; |
437 | 0 | src0 += MAX_PB_SIZE; |
438 | 0 | src1 += MAX_PB_SIZE; |
439 | 0 | } |
440 | 0 | } Unexecuted instantiation: dsp.c:apply_bdof_min_block_12 Unexecuted instantiation: dsp.c:apply_bdof_min_block_10 Unexecuted instantiation: dsp.c:apply_bdof_min_block_8 |
441 | | |
442 | | static void FUNC(apply_bdof)(uint8_t *_dst, const ptrdiff_t _dst_stride, const int16_t *_src0, const int16_t *_src1, |
443 | | const int block_w, const int block_h) |
444 | 0 | { |
445 | 0 | int16_t gradient_h[2][BDOF_BLOCK_SIZE * BDOF_BLOCK_SIZE]; |
446 | 0 | int16_t gradient_v[2][BDOF_BLOCK_SIZE * BDOF_BLOCK_SIZE]; |
447 | 0 | int vx, vy; |
448 | 0 | const ptrdiff_t dst_stride = _dst_stride / sizeof(pixel); |
449 | 0 | pixel* dst = (pixel*)_dst; |
450 | |
|
451 | 0 | FUNC(prof_grad_filter)(gradient_h[0], gradient_v[0], BDOF_BLOCK_SIZE, |
452 | 0 | _src0, MAX_PB_SIZE, block_w, block_h); |
453 | 0 | FUNC(prof_grad_filter)(gradient_h[1], gradient_v[1], BDOF_BLOCK_SIZE, |
454 | 0 | _src1, MAX_PB_SIZE, block_w, block_h); |
455 | |
|
456 | 0 | for (int y = 0; y < block_h; y += BDOF_MIN_BLOCK_SIZE) { |
457 | 0 | for (int x = 0; x < block_w; x += BDOF_MIN_BLOCK_SIZE) { |
458 | 0 | const int16_t* src0 = _src0 + y * MAX_PB_SIZE + x; |
459 | 0 | const int16_t* src1 = _src1 + y * MAX_PB_SIZE + x; |
460 | 0 | pixel *d = dst + x; |
461 | 0 | const int idx = BDOF_BLOCK_SIZE * y + x; |
462 | 0 | const int16_t* gh[] = { gradient_h[0] + idx, gradient_h[1] + idx }; |
463 | 0 | const int16_t* gv[] = { gradient_v[0] + idx, gradient_v[1] + idx }; |
464 | 0 | FUNC(derive_bdof_vx_vy)(src0, src1, !x, !y, x + BDOF_MIN_BLOCK_SIZE == block_w, y + BDOF_MIN_BLOCK_SIZE == block_h, gh, gv, &vx, &vy); |
465 | 0 | FUNC(apply_bdof_min_block)(d, dst_stride, src0, src1, gh, gv, vx, vy); |
466 | 0 | } |
467 | 0 | dst += BDOF_MIN_BLOCK_SIZE * dst_stride; |
468 | 0 | } |
469 | 0 | } Unexecuted instantiation: dsp.c:apply_bdof_12 Unexecuted instantiation: dsp.c:apply_bdof_10 Unexecuted instantiation: dsp.c:apply_bdof_8 |
470 | | |
471 | | #define DMVR_FILTER(src, stride) \ |
472 | 0 | (filter[0] * src[x] + \ |
473 | 0 | filter[1] * src[x + stride]) |
474 | | |
475 | | #define DMVR_FILTER2(filter, src0, src1) \ |
476 | 0 | (filter[0] * src0 + filter[1] * src1) |
477 | | |
478 | | //8.5.3.2.2 Luma sample bilinear interpolation process |
479 | | static void FUNC(dmvr)(int16_t *dst, const uint8_t *_src, const ptrdiff_t _src_stride, |
480 | | const int height, const intptr_t mx, const intptr_t my, const int width) |
481 | 0 | { |
482 | | #if BIT_DEPTH != 10 |
483 | | const pixel *src = (const pixel *)_src; |
484 | | const ptrdiff_t src_stride = _src_stride / sizeof(pixel); |
485 | | #if BIT_DEPTH > 10 |
486 | 0 | const int shift4 = BIT_DEPTH - 10; |
487 | | const int offset4 = 1 << (shift4 - 1); |
488 | 0 | #define DMVR_SHIFT(s) (((s) + offset4) >> shift4) |
489 | | #else |
490 | 0 | #define DMVR_SHIFT(s) ((s) << (10 - BIT_DEPTH)) |
491 | | #endif // BIT_DEPTH > 10 |
492 | | |
493 | 0 | for (int y = 0; y < height; y++) { |
494 | 0 | for (int x = 0; x < width; x++) |
495 | 0 | dst[x] = DMVR_SHIFT(src[x]); |
496 | 0 | src += src_stride; |
497 | 0 | dst += MAX_PB_SIZE; |
498 | 0 | } |
499 | | #undef DMVR_SHIFT |
500 | | #else |
501 | 0 | av_image_copy_plane((uint8_t*)dst, sizeof(int16_t) * MAX_PB_SIZE, _src, _src_stride, |
502 | | width * sizeof(pixel), height); |
503 | | #endif // BIT_DEPTH != 10 |
504 | 0 | } Unexecuted instantiation: dsp.c:dmvr_12 Unexecuted instantiation: dsp.c:dmvr_10 Unexecuted instantiation: dsp.c:dmvr_8 |
505 | | |
506 | | //8.5.3.2.2 Luma sample bilinear interpolation process |
507 | | static void FUNC(dmvr_h)(int16_t *dst, const uint8_t *_src, const ptrdiff_t _src_stride, |
508 | | const int height, const intptr_t mx, const intptr_t my, const int width) |
509 | 0 | { |
510 | 0 | const pixel *src = (const pixel*)_src; |
511 | 0 | const ptrdiff_t src_stride = _src_stride / sizeof(pixel); |
512 | 0 | const int8_t *filter = ff_vvc_inter_luma_dmvr_filters[mx]; |
513 | 0 | const int shift1 = BIT_DEPTH - 6; |
514 | 0 | const int offset1 = 1 << (shift1 - 1); |
515 | |
|
516 | 0 | for (int y = 0; y < height; y++) { |
517 | 0 | for (int x = 0; x < width; x++) |
518 | 0 | dst[x] = (DMVR_FILTER(src, 1) + offset1) >> shift1; |
519 | 0 | src += src_stride; |
520 | 0 | dst += MAX_PB_SIZE; |
521 | 0 | } |
522 | 0 | } Unexecuted instantiation: dsp.c:dmvr_h_12 Unexecuted instantiation: dsp.c:dmvr_h_10 Unexecuted instantiation: dsp.c:dmvr_h_8 |
523 | | |
524 | | //8.5.3.2.2 Luma sample bilinear interpolation process |
525 | | static void FUNC(dmvr_v)(int16_t *dst, const uint8_t *_src, const ptrdiff_t _src_stride, |
526 | | const int height, const intptr_t mx, const intptr_t my, const int width) |
527 | 0 | { |
528 | 0 | const pixel *src = (pixel*)_src; |
529 | 0 | const ptrdiff_t src_stride = _src_stride / sizeof(pixel); |
530 | 0 | const int8_t *filter = ff_vvc_inter_luma_dmvr_filters[my]; |
531 | 0 | const int shift1 = BIT_DEPTH - 6; |
532 | 0 | const int offset1 = 1 << (shift1 - 1); |
533 | |
|
534 | 0 | for (int y = 0; y < height; y++) { |
535 | 0 | for (int x = 0; x < width; x++) |
536 | 0 | dst[x] = (DMVR_FILTER(src, src_stride) + offset1) >> shift1; |
537 | 0 | src += src_stride; |
538 | 0 | dst += MAX_PB_SIZE; |
539 | 0 | } |
540 | |
|
541 | 0 | } Unexecuted instantiation: dsp.c:dmvr_v_12 Unexecuted instantiation: dsp.c:dmvr_v_10 Unexecuted instantiation: dsp.c:dmvr_v_8 |
542 | | |
543 | | //8.5.3.2.2 Luma sample bilinear interpolation process |
544 | | static void FUNC(dmvr_hv)(int16_t *dst, const uint8_t *_src, const ptrdiff_t _src_stride, |
545 | | const int height, const intptr_t mx, const intptr_t my, const int width) |
546 | 0 | { |
547 | 0 | int16_t tmp_array[MAX_PB_SIZE * 2]; |
548 | 0 | int16_t *tmp0 = tmp_array; |
549 | 0 | int16_t *tmp1 = tmp_array + MAX_PB_SIZE; |
550 | 0 | const pixel *src = (const pixel*)_src; |
551 | 0 | const ptrdiff_t src_stride = _src_stride / sizeof(pixel); |
552 | 0 | const int8_t *filter_x = ff_vvc_inter_luma_dmvr_filters[mx]; |
553 | 0 | const int8_t *filter_y = ff_vvc_inter_luma_dmvr_filters[my]; |
554 | 0 | const int shift1 = BIT_DEPTH - 6; |
555 | 0 | const int offset1 = 1 << (shift1 - 1); |
556 | 0 | const int shift2 = 4; |
557 | 0 | const int offset2 = 1 << (shift2 - 1); |
558 | |
|
559 | 0 | src -= BILINEAR_EXTRA_BEFORE * src_stride; |
560 | 0 | for (int x = 0; x < width; x++) |
561 | 0 | tmp0[x] = (DMVR_FILTER2(filter_x, src[x], src[x + 1]) + offset1) >> shift1; |
562 | 0 | src += src_stride; |
563 | |
|
564 | 0 | for (int y = 1; y < height + BILINEAR_EXTRA; y++) { |
565 | 0 | for (int x = 0; x < width; x++) { |
566 | 0 | tmp1[x] = (DMVR_FILTER2(filter_x, src[x], src[x + 1]) + offset1) >> shift1; |
567 | 0 | dst[x] = (DMVR_FILTER2(filter_y, tmp0[x], tmp1[x]) + offset2) >> shift2; |
568 | 0 | } |
569 | 0 | src += src_stride; |
570 | 0 | dst += MAX_PB_SIZE; |
571 | 0 | FFSWAP(int16_t *, tmp0, tmp1); |
572 | 0 | } |
573 | 0 | } Unexecuted instantiation: dsp.c:dmvr_hv_12 Unexecuted instantiation: dsp.c:dmvr_hv_10 Unexecuted instantiation: dsp.c:dmvr_hv_8 |
574 | | |
575 | | #define PEL_FUNC(dst, C, idx1, idx2, a) \ |
576 | 56.8M | do { \ |
577 | 454M | for (int w = 0; w < 7; w++) \ |
578 | 397M | inter->dst[C][w][idx1][idx2] = FUNC(a); \ |
579 | 56.8M | } while (0) \ |
580 | | |
581 | | #define DIR_FUNCS(d, C, c) \ |
582 | 4.73M | PEL_FUNC(put_##d, C, 0, 0, put_##d##_pixels); \ |
583 | 4.73M | PEL_FUNC(put_##d, C, 0, 1, put_##d##_##c##_h); \ |
584 | 4.73M | PEL_FUNC(put_##d, C, 1, 0, put_##d##_##c##_v); \ |
585 | 4.73M | PEL_FUNC(put_##d, C, 1, 1, put_##d##_##c##_hv); \ |
586 | 4.73M | PEL_FUNC(put_##d##_w, C, 0, 0, put_##d##_w_pixels); \ |
587 | 4.73M | PEL_FUNC(put_##d##_w, C, 0, 1, put_##d##_##c##_w_h); \ |
588 | 4.73M | PEL_FUNC(put_##d##_w, C, 1, 0, put_##d##_##c##_w_v); \ |
589 | 4.73M | PEL_FUNC(put_##d##_w, C, 1, 1, put_##d##_##c##_w_hv); |
590 | | |
591 | | #define FUNCS(C, c) \ |
592 | 4.73M | PEL_FUNC(put, C, 0, 0, put_pixels); \ |
593 | 4.73M | PEL_FUNC(put, C, 0, 1, put_##c##_h); \ |
594 | 4.73M | PEL_FUNC(put, C, 1, 0, put_##c##_v); \ |
595 | 4.73M | PEL_FUNC(put, C, 1, 1, put_##c##_hv); \ |
596 | 4.73M | DIR_FUNCS(uni, C, c); \ |
597 | | |
598 | | static void FUNC(ff_vvc_inter_dsp_init)(VVCInterDSPContext *const inter) |
599 | 2.36M | { |
600 | 2.36M | FUNCS(LUMA, luma); |
601 | 2.36M | FUNCS(CHROMA, chroma); |
602 | | |
603 | 18.9M | for (int i = 0; i < FF_ARRAY_ELEMS(inter->put_scaled[LUMA]); i++) { |
604 | 16.5M | inter->put_scaled[LUMA][i] = FUNC(put_luma_scaled); |
605 | 16.5M | inter->put_scaled[CHROMA][i] = FUNC(put_chroma_scaled); |
606 | 16.5M | inter->put_uni_scaled[LUMA][i] = FUNC(put_uni_luma_scaled); |
607 | 16.5M | inter->put_uni_scaled[CHROMA][i] = FUNC(put_uni_chroma_scaled); |
608 | 16.5M | inter->put_uni_w_scaled[LUMA][i] = FUNC(put_uni_luma_w_scaled); |
609 | 16.5M | inter->put_uni_w_scaled[CHROMA][i] = FUNC(put_uni_chroma_w_scaled); |
610 | 16.5M | } |
611 | | |
612 | 2.36M | inter->avg = FUNC(avg); |
613 | 2.36M | inter->w_avg = FUNC(w_avg); |
614 | | |
615 | 2.36M | inter->dmvr[0][0] = FUNC(dmvr); |
616 | 2.36M | inter->dmvr[0][1] = FUNC(dmvr_h); |
617 | 2.36M | inter->dmvr[1][0] = FUNC(dmvr_v); |
618 | 2.36M | inter->dmvr[1][1] = FUNC(dmvr_hv); |
619 | | |
620 | 2.36M | inter->put_ciip = FUNC(put_ciip); |
621 | 2.36M | inter->put_gpm = FUNC(put_gpm); |
622 | | |
623 | 2.36M | inter->fetch_samples = FUNC(fetch_samples); |
624 | 2.36M | inter->bdof_fetch_samples = FUNC(bdof_fetch_samples); |
625 | 2.36M | inter->apply_prof = FUNC(apply_prof); |
626 | 2.36M | inter->apply_prof_uni = FUNC(apply_prof_uni); |
627 | 2.36M | inter->apply_prof_uni_w = FUNC(apply_prof_uni_w); |
628 | 2.36M | inter->apply_bdof = FUNC(apply_bdof); |
629 | 2.36M | inter->sad = vvc_sad; |
630 | 2.36M | } dsp.c:ff_vvc_inter_dsp_init_12 Line | Count | Source | 599 | 731k | { | 600 | 731k | FUNCS(LUMA, luma); | 601 | 731k | FUNCS(CHROMA, chroma); | 602 | | | 603 | 5.85M | for (int i = 0; i < FF_ARRAY_ELEMS(inter->put_scaled[LUMA]); i++) { | 604 | 5.12M | inter->put_scaled[LUMA][i] = FUNC(put_luma_scaled); | 605 | 5.12M | inter->put_scaled[CHROMA][i] = FUNC(put_chroma_scaled); | 606 | 5.12M | inter->put_uni_scaled[LUMA][i] = FUNC(put_uni_luma_scaled); | 607 | 5.12M | inter->put_uni_scaled[CHROMA][i] = FUNC(put_uni_chroma_scaled); | 608 | 5.12M | inter->put_uni_w_scaled[LUMA][i] = FUNC(put_uni_luma_w_scaled); | 609 | 5.12M | inter->put_uni_w_scaled[CHROMA][i] = FUNC(put_uni_chroma_w_scaled); | 610 | 5.12M | } | 611 | | | 612 | 731k | inter->avg = FUNC(avg); | 613 | 731k | inter->w_avg = FUNC(w_avg); | 614 | | | 615 | 731k | inter->dmvr[0][0] = FUNC(dmvr); | 616 | 731k | inter->dmvr[0][1] = FUNC(dmvr_h); | 617 | 731k | inter->dmvr[1][0] = FUNC(dmvr_v); | 618 | 731k | inter->dmvr[1][1] = FUNC(dmvr_hv); | 619 | | | 620 | 731k | inter->put_ciip = FUNC(put_ciip); | 621 | 731k | inter->put_gpm = FUNC(put_gpm); | 622 | | | 623 | 731k | inter->fetch_samples = FUNC(fetch_samples); | 624 | 731k | inter->bdof_fetch_samples = FUNC(bdof_fetch_samples); | 625 | 731k | inter->apply_prof = FUNC(apply_prof); | 626 | 731k | inter->apply_prof_uni = FUNC(apply_prof_uni); | 627 | 731k | inter->apply_prof_uni_w = FUNC(apply_prof_uni_w); | 628 | 731k | inter->apply_bdof = FUNC(apply_bdof); | 629 | 731k | inter->sad = vvc_sad; | 630 | 731k | } |
dsp.c:ff_vvc_inter_dsp_init_10 Line | Count | Source | 599 | 202k | { | 600 | 202k | FUNCS(LUMA, luma); | 601 | 202k | FUNCS(CHROMA, chroma); | 602 | | | 603 | 1.62M | for (int i = 0; i < FF_ARRAY_ELEMS(inter->put_scaled[LUMA]); i++) { | 604 | 1.41M | inter->put_scaled[LUMA][i] = FUNC(put_luma_scaled); | 605 | 1.41M | inter->put_scaled[CHROMA][i] = FUNC(put_chroma_scaled); | 606 | 1.41M | inter->put_uni_scaled[LUMA][i] = FUNC(put_uni_luma_scaled); | 607 | 1.41M | inter->put_uni_scaled[CHROMA][i] = FUNC(put_uni_chroma_scaled); | 608 | 1.41M | inter->put_uni_w_scaled[LUMA][i] = FUNC(put_uni_luma_w_scaled); | 609 | 1.41M | inter->put_uni_w_scaled[CHROMA][i] = FUNC(put_uni_chroma_w_scaled); | 610 | 1.41M | } | 611 | | | 612 | 202k | inter->avg = FUNC(avg); | 613 | 202k | inter->w_avg = FUNC(w_avg); | 614 | | | 615 | 202k | inter->dmvr[0][0] = FUNC(dmvr); | 616 | 202k | inter->dmvr[0][1] = FUNC(dmvr_h); | 617 | 202k | inter->dmvr[1][0] = FUNC(dmvr_v); | 618 | 202k | inter->dmvr[1][1] = FUNC(dmvr_hv); | 619 | | | 620 | 202k | inter->put_ciip = FUNC(put_ciip); | 621 | 202k | inter->put_gpm = FUNC(put_gpm); | 622 | | | 623 | 202k | inter->fetch_samples = FUNC(fetch_samples); | 624 | 202k | inter->bdof_fetch_samples = FUNC(bdof_fetch_samples); | 625 | 202k | inter->apply_prof = FUNC(apply_prof); | 626 | 202k | inter->apply_prof_uni = FUNC(apply_prof_uni); | 627 | 202k | inter->apply_prof_uni_w = FUNC(apply_prof_uni_w); | 628 | 202k | inter->apply_bdof = FUNC(apply_bdof); | 629 | 202k | inter->sad = vvc_sad; | 630 | 202k | } |
dsp.c:ff_vvc_inter_dsp_init_8 Line | Count | Source | 599 | 1.43M | { | 600 | 1.43M | FUNCS(LUMA, luma); | 601 | 1.43M | FUNCS(CHROMA, chroma); | 602 | | | 603 | 11.4M | for (int i = 0; i < FF_ARRAY_ELEMS(inter->put_scaled[LUMA]); i++) { | 604 | 10.0M | inter->put_scaled[LUMA][i] = FUNC(put_luma_scaled); | 605 | 10.0M | inter->put_scaled[CHROMA][i] = FUNC(put_chroma_scaled); | 606 | 10.0M | inter->put_uni_scaled[LUMA][i] = FUNC(put_uni_luma_scaled); | 607 | 10.0M | inter->put_uni_scaled[CHROMA][i] = FUNC(put_uni_chroma_scaled); | 608 | 10.0M | inter->put_uni_w_scaled[LUMA][i] = FUNC(put_uni_luma_w_scaled); | 609 | 10.0M | inter->put_uni_w_scaled[CHROMA][i] = FUNC(put_uni_chroma_w_scaled); | 610 | 10.0M | } | 611 | | | 612 | 1.43M | inter->avg = FUNC(avg); | 613 | 1.43M | inter->w_avg = FUNC(w_avg); | 614 | | | 615 | 1.43M | inter->dmvr[0][0] = FUNC(dmvr); | 616 | 1.43M | inter->dmvr[0][1] = FUNC(dmvr_h); | 617 | 1.43M | inter->dmvr[1][0] = FUNC(dmvr_v); | 618 | 1.43M | inter->dmvr[1][1] = FUNC(dmvr_hv); | 619 | | | 620 | 1.43M | inter->put_ciip = FUNC(put_ciip); | 621 | 1.43M | inter->put_gpm = FUNC(put_gpm); | 622 | | | 623 | 1.43M | inter->fetch_samples = FUNC(fetch_samples); | 624 | 1.43M | inter->bdof_fetch_samples = FUNC(bdof_fetch_samples); | 625 | 1.43M | inter->apply_prof = FUNC(apply_prof); | 626 | 1.43M | inter->apply_prof_uni = FUNC(apply_prof_uni); | 627 | 1.43M | inter->apply_prof_uni_w = FUNC(apply_prof_uni_w); | 628 | 1.43M | inter->apply_bdof = FUNC(apply_bdof); | 629 | 1.43M | inter->sad = vvc_sad; | 630 | 1.43M | } |
|
631 | | |
632 | | #undef FUNCS |
633 | | #undef PEL_FUNC |
634 | | #undef DMVR_FUNCS |