/work/svt-av1/Source/Lib/Codec/super_res.c
Line | Count | Source |
1 | | /* |
2 | | * Copyright(c) 2019 Netflix, Inc. |
3 | | * Copyright (c) 2016, Alliance for Open Media. All rights reserved |
4 | | * |
5 | | * This source code is subject to the terms of the BSD 2 Clause License and |
6 | | * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License |
7 | | * was not distributed with this source code in the LICENSE file, you can |
8 | | * obtain it at https://www.aomedia.org/license/software-license. If the Alliance for Open |
9 | | * Media Patent License 1.0 was not distributed with this source code in the |
10 | | * PATENTS file, you can obtain it at https://www.aomedia.org/license/patent-license. |
11 | | */ |
12 | | |
13 | | #include "resize.h" |
14 | | #include "utility.h" |
15 | | #include "super_res.h" |
16 | | #include "intra_prediction.h" |
17 | | |
18 | | #define FILTER_BITS 7 |
19 | | |
20 | | const int16_t svt_av1_resize_filter_normative[(1 << RS_SUBPEL_BITS)][UPSCALE_NORMATIVE_TAPS] = { |
21 | | #if UPSCALE_NORMATIVE_TAPS == 8 |
22 | | {0, 0, 0, 128, 0, 0, 0, 0}, {0, 0, -1, 128, 2, -1, 0, 0}, {0, 1, -3, 127, 4, -2, 1, 0}, |
23 | | {0, 1, -4, 127, 6, -3, 1, 0}, {0, 2, -6, 126, 8, -3, 1, 0}, {0, 2, -7, 125, 11, -4, 1, 0}, |
24 | | {-1, 2, -8, 125, 13, -5, 2, 0}, {-1, 3, -9, 124, 15, -6, 2, 0}, {-1, 3, -10, 123, 18, -6, 2, -1}, |
25 | | {-1, 3, -11, 122, 20, -7, 3, -1}, {-1, 4, -12, 121, 22, -8, 3, -1}, {-1, 4, -13, 120, 25, -9, 3, -1}, |
26 | | {-1, 4, -14, 118, 28, -9, 3, -1}, {-1, 4, -15, 117, 30, -10, 4, -1}, {-1, 5, -16, 116, 32, -11, 4, -1}, |
27 | | {-1, 5, -16, 114, 35, -12, 4, -1}, {-1, 5, -17, 112, 38, -12, 4, -1}, {-1, 5, -18, 111, 40, -13, 5, -1}, |
28 | | {-1, 5, -18, 109, 43, -14, 5, -1}, {-1, 6, -19, 107, 45, -14, 5, -1}, {-1, 6, -19, 105, 48, -15, 5, -1}, |
29 | | {-1, 6, -19, 103, 51, -16, 5, -1}, {-1, 6, -20, 101, 53, -16, 6, -1}, {-1, 6, -20, 99, 56, -17, 6, -1}, |
30 | | {-1, 6, -20, 97, 58, -17, 6, -1}, {-1, 6, -20, 95, 61, -18, 6, -1}, {-2, 7, -20, 93, 64, -18, 6, -2}, |
31 | | {-2, 7, -20, 91, 66, -19, 6, -1}, {-2, 7, -20, 88, 69, -19, 6, -1}, {-2, 7, -20, 86, 71, -19, 6, -1}, |
32 | | {-2, 7, -20, 84, 74, -20, 7, -2}, {-2, 7, -20, 81, 76, -20, 7, -1}, {-2, 7, -20, 79, 79, -20, 7, -2}, |
33 | | {-1, 7, -20, 76, 81, -20, 7, -2}, {-2, 7, -20, 74, 84, -20, 7, -2}, {-1, 6, -19, 71, 86, -20, 7, -2}, |
34 | | {-1, 6, -19, 69, 88, -20, 7, -2}, {-1, 6, -19, 66, 91, -20, 7, -2}, {-2, 6, -18, 64, 93, -20, 7, -2}, |
35 | | {-1, 6, -18, 61, 95, -20, 6, -1}, {-1, 6, -17, 58, 97, -20, 6, -1}, {-1, 6, -17, 56, 99, -20, 6, -1}, |
36 | | {-1, 6, -16, 53, 101, -20, 6, -1}, {-1, 5, -16, 51, 103, -19, 6, -1}, {-1, 5, -15, 48, 105, -19, 6, -1}, |
37 | | {-1, 5, -14, 45, 107, -19, 6, -1}, {-1, 5, -14, 43, 109, -18, 5, -1}, {-1, 5, -13, 40, 111, -18, 5, -1}, |
38 | | {-1, 4, -12, 38, 112, -17, 5, -1}, {-1, 4, -12, 35, 114, -16, 5, -1}, {-1, 4, -11, 32, 116, -16, 5, -1}, |
39 | | {-1, 4, -10, 30, 117, -15, 4, -1}, {-1, 3, -9, 28, 118, -14, 4, -1}, {-1, 3, -9, 25, 120, -13, 4, -1}, |
40 | | {-1, 3, -8, 22, 121, -12, 4, -1}, {-1, 3, -7, 20, 122, -11, 3, -1}, {-1, 2, -6, 18, 123, -10, 3, -1}, |
41 | | {0, 2, -6, 15, 124, -9, 3, -1}, {0, 2, -5, 13, 125, -8, 2, -1}, {0, 1, -4, 11, 125, -7, 2, 0}, |
42 | | {0, 1, -3, 8, 126, -6, 2, 0}, {0, 1, -3, 6, 127, -4, 1, 0}, {0, 1, -2, 4, 127, -3, 1, 0}, |
43 | | {0, 0, -1, 2, 128, -1, 0, 0}, |
44 | | #else |
45 | | #error "Invalid value of UPSCALE_NORMATIVE_TAPS" |
46 | | #endif // UPSCALE_NORMATIVE_TAPS == 8 |
47 | | }; |
48 | | |
49 | | // Calculates the scaled dimension given the original dimension and the scale |
50 | | // denominator. |
51 | 0 | void calculate_scaled_size_helper(uint16_t* dim, uint8_t denom) { |
52 | 0 | if (denom != SCALE_NUMERATOR && denom <= SCALE_DENOMINATOR_MAX) { |
53 | | // We need to ensure the constraint in "Appendix A" of the spec: |
54 | | // * FrameWidth is greater than or equal to 16 |
55 | | // * FrameHeight is greater than or equal to 16 |
56 | | // For this, we clamp the downscaled dimension to at least 16. One |
57 | | // exception: if original dimension itself was < 16, then we keep the |
58 | | // downscaled dimension to be same as the original, to ensure that resizing |
59 | | // is valid. |
60 | 0 | const int min_dim = AOMMIN(16, *dim); |
61 | | // Use this version if we need *dim to be even |
62 | | // *width = (*width * SCALE_NUMERATOR + denom) / (2 * denom); |
63 | | // *width <<= 1; |
64 | 0 | *dim = (uint16_t)((*dim * SCALE_NUMERATOR + denom / 2) / (denom)); |
65 | 0 | *dim = (uint16_t)AOMMAX(*dim, min_dim); |
66 | 0 | } else if (denom == SCALE_THREE_QUATER) { |
67 | | // reference scaling resize defines denom 17 as 3/4 |
68 | 0 | *dim = (uint16_t)((3 + (*dim * 3)) >> 2); |
69 | 0 | } |
70 | 0 | } |
71 | | |
72 | 0 | static int32_t av1_get_upscale_convolve_step(int in_length, int out_length) { |
73 | 0 | return ((in_length << RS_SCALE_SUBPEL_BITS) + out_length / 2) / out_length; |
74 | 0 | } |
75 | | |
76 | 0 | static int32_t get_upscale_convolve_x0(int in_length, int out_length, int32_t x_step_qn) { |
77 | 0 | const int err = out_length * x_step_qn - (in_length << RS_SCALE_SUBPEL_BITS); |
78 | 0 | const int32_t x0 = (-((out_length - in_length) << (RS_SCALE_SUBPEL_BITS - 1)) + out_length / 2) / out_length + |
79 | 0 | RS_SCALE_EXTRA_OFF - err / 2; |
80 | 0 | return (int32_t)((uint32_t)x0 & RS_SCALE_SUBPEL_MASK); |
81 | 0 | } |
82 | | |
83 | | static void av1_convolve_horiz_rs_c(const uint8_t* src, int src_stride, uint8_t* dst, int dst_stride, int w, int h, |
84 | 0 | const int16_t* x_filters, int x0_qn, int x_step_qn) { |
85 | 0 | src -= UPSCALE_NORMATIVE_TAPS / 2 - 1; |
86 | 0 | for (int y = 0; y < h; ++y) { |
87 | 0 | int x_qn = x0_qn; |
88 | 0 | for (int x = 0; x < w; ++x) { |
89 | 0 | const uint8_t* const src_x = &src[x_qn >> RS_SCALE_SUBPEL_BITS]; |
90 | 0 | const int x_filter_idx = (x_qn & RS_SCALE_SUBPEL_MASK) >> RS_SCALE_EXTRA_BITS; |
91 | 0 | assert(x_filter_idx <= RS_SUBPEL_MASK); |
92 | 0 | const int16_t* const x_filter = &x_filters[x_filter_idx * UPSCALE_NORMATIVE_TAPS]; |
93 | 0 | int sum = 0; |
94 | 0 | for (int k = 0; k < UPSCALE_NORMATIVE_TAPS; ++k) { |
95 | 0 | sum += src_x[k] * x_filter[k]; |
96 | 0 | } |
97 | 0 | dst[x] = clip_pixel(ROUND_POWER_OF_TWO(sum, FILTER_BITS)); |
98 | 0 | x_qn += x_step_qn; |
99 | 0 | } |
100 | 0 | src += src_stride; |
101 | 0 | dst += dst_stride; |
102 | 0 | } |
103 | 0 | } |
104 | | |
105 | | static void av1_highbd_convolve_horiz_rs_c(const uint16_t* src, int src_stride, uint16_t* dst, int dst_stride, int w, |
106 | 0 | int h, const int16_t* x_filters, int x0_qn, int x_step_qn, int bd) { |
107 | 0 | src -= UPSCALE_NORMATIVE_TAPS / 2 - 1; |
108 | 0 | for (int y = 0; y < h; ++y) { |
109 | 0 | int x_qn = x0_qn; |
110 | 0 | for (int x = 0; x < w; ++x) { |
111 | 0 | const uint16_t* const src_x = &src[x_qn >> RS_SCALE_SUBPEL_BITS]; |
112 | 0 | const int x_filter_idx = (x_qn & RS_SCALE_SUBPEL_MASK) >> RS_SCALE_EXTRA_BITS; |
113 | 0 | assert(x_filter_idx <= RS_SUBPEL_MASK); |
114 | 0 | const int16_t* const x_filter = &x_filters[x_filter_idx * UPSCALE_NORMATIVE_TAPS]; |
115 | 0 | int sum = 0; |
116 | 0 | for (int k = 0; k < UPSCALE_NORMATIVE_TAPS; ++k) { |
117 | 0 | sum += src_x[k] * x_filter[k]; |
118 | 0 | } |
119 | 0 | dst[x] = clip_pixel_highbd(ROUND_POWER_OF_TWO(sum, FILTER_BITS), bd); |
120 | 0 | x_qn += x_step_qn; |
121 | 0 | } |
122 | 0 | src += src_stride; |
123 | 0 | dst += dst_stride; |
124 | 0 | } |
125 | 0 | } |
126 | | |
127 | | void upscale_normative_rect(const uint8_t* const input, int height, int width, int in_stride, uint8_t* output, |
128 | | int height2, int width2, int out_stride, int x_step_qn, int x0_qn, int pad_left, |
129 | 0 | int pad_right) { |
130 | 0 | assert(width > 0); |
131 | 0 | assert(height > 0); |
132 | 0 | assert(width2 > 0); |
133 | 0 | assert(height2 > 0); |
134 | 0 | assert(height2 == height); |
135 | | |
136 | | /* Extend the left/right pixels of the tile column if needed |
137 | | (either because we can't sample from other tiles, or because we're at |
138 | | a frame edge). |
139 | | Save the overwritten pixels into tmp_left and tmp_right. |
140 | | Note: Because we pass input-1 to av1_convolve_horiz_rs, we need one extra |
141 | | column of border pixels compared to what we'd naively think.*/ |
142 | 0 | const int border_cols = UPSCALE_NORMATIVE_TAPS / 2 + 1; |
143 | 0 | uint8_t* tmp_left = NULL; |
144 | 0 | uint8_t* tmp_right = NULL; |
145 | 0 | uint8_t* const in_tl = (uint8_t*)(input - border_cols); |
146 | 0 | uint8_t* const in_tr = (uint8_t*)(input + width); |
147 | |
|
148 | 0 | if (pad_left) { |
149 | 0 | tmp_left = (uint8_t*)svt_aom_malloc(sizeof(*tmp_left) * border_cols * height); |
150 | 0 | for (int i = 0; i < height; i++) { |
151 | 0 | svt_memcpy(tmp_left + i * border_cols, in_tl + i * in_stride, border_cols); |
152 | 0 | memset(in_tl + i * in_stride, input[i * in_stride], border_cols); |
153 | 0 | } |
154 | 0 | } |
155 | 0 | if (pad_right) { |
156 | 0 | tmp_right = (uint8_t*)svt_aom_malloc(sizeof(*tmp_right) * border_cols * height); |
157 | 0 | for (int i = 0; i < height; i++) { |
158 | 0 | svt_memcpy(tmp_right + i * border_cols, in_tr + i * in_stride, border_cols); |
159 | 0 | memset(in_tr + i * in_stride, input[i * in_stride + width - 1], border_cols); |
160 | 0 | } |
161 | 0 | } |
162 | |
|
163 | 0 | av1_convolve_horiz_rs_c(input - 1, |
164 | 0 | in_stride, |
165 | 0 | output, |
166 | 0 | out_stride, |
167 | 0 | width2, |
168 | 0 | height2, |
169 | 0 | &svt_av1_resize_filter_normative[0][0], |
170 | 0 | x0_qn, |
171 | 0 | x_step_qn); |
172 | | |
173 | | /* Restore the left/right border pixels */ |
174 | 0 | if (pad_left) { |
175 | 0 | for (int i = 0; i < height; i++) { |
176 | 0 | svt_memcpy(in_tl + i * in_stride, tmp_left + i * border_cols, border_cols); |
177 | 0 | } |
178 | 0 | svt_aom_free(tmp_left); |
179 | 0 | } |
180 | 0 | if (pad_right) { |
181 | 0 | for (int i = 0; i < height; i++) { |
182 | 0 | svt_memcpy(in_tr + i * in_stride, tmp_right + i * border_cols, border_cols); |
183 | 0 | } |
184 | 0 | svt_aom_free(tmp_right); |
185 | 0 | } |
186 | 0 | } |
187 | | |
188 | | static void highbd_upscale_normative_rect(const uint8_t* const input, int height, int width, int in_stride, |
189 | | uint8_t* output, int height2, int width2, int out_stride, int x_step_qn, |
190 | 0 | int x0_qn, int pad_left, int pad_right, int bd) { |
191 | 0 | assert(width > 0); |
192 | 0 | assert(height > 0); |
193 | 0 | assert(width2 > 0); |
194 | 0 | assert(height2 > 0); |
195 | 0 | assert(height2 == height); |
196 | | |
197 | | /* Extend the left/right pixels of the tile column if needed |
198 | | (either because we can't sample from other tiles, or because we're at |
199 | | a frame edge). |
200 | | Save the overwritten pixels into tmp_left and tmp_right. |
201 | | Note: Because we pass input-1 to av1_convolve_horiz_rs, we need one extra |
202 | | column of border pixels compared to what we'd naively think.*/ |
203 | 0 | const int border_cols = UPSCALE_NORMATIVE_TAPS / 2 + 1; |
204 | 0 | const int border_size = border_cols * sizeof(uint16_t); |
205 | 0 | uint16_t* tmp_left = NULL; |
206 | 0 | uint16_t* tmp_right = NULL; |
207 | 0 | uint16_t* const input16 = (uint16_t*)input; //CONVERT_TO_SHORTPTR(input); |
208 | 0 | uint16_t* const in_tl = input16 - border_cols; |
209 | 0 | uint16_t* const in_tr = input16 + width; |
210 | 0 | if (pad_left) { |
211 | 0 | tmp_left = (uint16_t*)svt_aom_malloc(sizeof(*tmp_left) * border_cols * height); |
212 | 0 | for (int i = 0; i < height; i++) { |
213 | 0 | svt_memcpy(tmp_left + i * border_cols, in_tl + i * in_stride, border_size); |
214 | 0 | svt_aom_memset16(in_tl + i * in_stride, input16[i * in_stride], border_cols); |
215 | 0 | } |
216 | 0 | } |
217 | 0 | if (pad_right) { |
218 | 0 | tmp_right = (uint16_t*)svt_aom_malloc(sizeof(*tmp_right) * border_cols * height); |
219 | 0 | for (int i = 0; i < height; i++) { |
220 | 0 | svt_memcpy(tmp_right + i * border_cols, in_tr + i * in_stride, border_size); |
221 | 0 | svt_aom_memset16(in_tr + i * in_stride, input16[i * in_stride + width - 1], border_cols); |
222 | 0 | } |
223 | 0 | } |
224 | |
|
225 | 0 | av1_highbd_convolve_horiz_rs_c(((uint16_t*)(input)-1), |
226 | 0 | in_stride, |
227 | 0 | (uint16_t*)(output), |
228 | 0 | out_stride, |
229 | 0 | width2, |
230 | 0 | height2, |
231 | 0 | &svt_av1_resize_filter_normative[0][0], |
232 | 0 | x0_qn, |
233 | 0 | x_step_qn, |
234 | 0 | bd); |
235 | | |
236 | | /*Restore the left/right border pixels*/ |
237 | 0 | if (pad_left) { |
238 | 0 | for (int i = 0; i < height; i++) { |
239 | 0 | svt_memcpy(in_tl + i * in_stride, tmp_left + i * border_cols, border_size); |
240 | 0 | } |
241 | 0 | svt_aom_free(tmp_left); |
242 | 0 | } |
243 | 0 | if (pad_right) { |
244 | 0 | for (int i = 0; i < height; i++) { |
245 | 0 | svt_memcpy(in_tr + i * in_stride, tmp_right + i * border_cols, border_size); |
246 | 0 | } |
247 | 0 | svt_aom_free(tmp_right); |
248 | 0 | } |
249 | 0 | } |
250 | | |
251 | | void svt_av1_upscale_normative_rows(const Av1Common* cm, const uint8_t* src, int src_stride, uint8_t* dst, |
252 | 0 | int dst_stride, int rows, int sub_x, int bd, bool is_16bit_pipeline) { |
253 | 0 | int high_bd = bd > EB_EIGHT_BIT || is_16bit_pipeline; |
254 | 0 | const int downscaled_plane_width = ROUND_POWER_OF_TWO(cm->frm_size.frame_width, sub_x); |
255 | 0 | const int upscaled_plane_width = ROUND_POWER_OF_TWO(cm->frm_size.superres_upscaled_width, sub_x); |
256 | 0 | const int superres_denom = cm->frm_size.superres_denominator; |
257 | |
|
258 | 0 | TileInfo tile_col; |
259 | 0 | const int32_t x_step_qn = av1_get_upscale_convolve_step(downscaled_plane_width, upscaled_plane_width); |
260 | 0 | int32_t x0_qn = get_upscale_convolve_x0(downscaled_plane_width, upscaled_plane_width, x_step_qn); |
261 | 0 | for (int j = 0; j < cm->tiles_info.tile_cols; j++) { |
262 | 0 | svt_av1_tile_set_col(&tile_col, &cm->tiles_info, cm->mi_cols, j); |
263 | | |
264 | | /*Determine the limits of this tile column in both the source |
265 | | and destination images. |
266 | | Note: The actual location which we start sampling from is |
267 | | (downscaled_x0 - 1 + (x0_qn/2^14)), and this quantity increases |
268 | | by exactly dst_width * (x_step_qn/2^14) pixels each iteration.*/ |
269 | 0 | const int downscaled_x0 = tile_col.mi_col_start << (MI_SIZE_LOG2 - sub_x); |
270 | 0 | const int downscaled_x1 = tile_col.mi_col_end << (MI_SIZE_LOG2 - sub_x); |
271 | 0 | const int src_width = downscaled_x1 - downscaled_x0; |
272 | |
|
273 | 0 | const int upscaled_x0 = (downscaled_x0 * superres_denom) / SCALE_NUMERATOR; |
274 | 0 | int upscaled_x1; |
275 | 0 | if (j == cm->tiles_info.tile_cols - 1) { |
276 | | /*Note that we can't just use AOMMIN here - due to rounding, |
277 | | (downscaled_x1 * superres_denom) / SCALE_NUMERATOR may be less than |
278 | | upscaled_plane_width.*/ |
279 | 0 | upscaled_x1 = upscaled_plane_width; |
280 | 0 | } else { |
281 | 0 | upscaled_x1 = (downscaled_x1 * superres_denom) / SCALE_NUMERATOR; |
282 | 0 | } |
283 | |
|
284 | 0 | const uint8_t* const src_ptr = src + (downscaled_x0 << high_bd); |
285 | 0 | uint8_t* const dst_ptr = dst + (upscaled_x0 << high_bd); |
286 | 0 | const int dst_width = upscaled_x1 - upscaled_x0; |
287 | |
|
288 | 0 | const int pad_left = (j == 0); |
289 | 0 | const int pad_right = (j == cm->tiles_info.tile_cols - 1); |
290 | |
|
291 | 0 | if (high_bd) { |
292 | 0 | highbd_upscale_normative_rect(src_ptr, |
293 | 0 | rows, |
294 | 0 | src_width, |
295 | 0 | src_stride, |
296 | 0 | dst_ptr, |
297 | 0 | rows, |
298 | 0 | dst_width, |
299 | 0 | dst_stride, |
300 | 0 | x_step_qn, |
301 | 0 | x0_qn, |
302 | 0 | pad_left, |
303 | 0 | pad_right, |
304 | 0 | bd); |
305 | 0 | } else { |
306 | 0 | upscale_normative_rect(src_ptr, |
307 | 0 | rows, |
308 | 0 | src_width, |
309 | 0 | src_stride, |
310 | 0 | dst_ptr, |
311 | 0 | rows, |
312 | 0 | dst_width, |
313 | 0 | dst_stride, |
314 | 0 | x_step_qn, |
315 | 0 | x0_qn, |
316 | 0 | pad_left, |
317 | 0 | pad_right); |
318 | 0 | } |
319 | | |
320 | | /*Update the fractional pixel offset to prepare for the next tile col*/ |
321 | 0 | x0_qn += (dst_width * x_step_qn) - (src_width << RS_SCALE_SUBPEL_BITS); |
322 | 0 | } |
323 | 0 | } |