/src/aom/av1/common/av1_loopfilter.c
Line | Count | Source (jump to first uncovered line) |
1 | | /* |
2 | | * Copyright (c) 2016, Alliance for Open Media. All rights reserved. |
3 | | * |
4 | | * This source code is subject to the terms of the BSD 2 Clause License and |
5 | | * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License |
6 | | * was not distributed with this source code in the LICENSE file, you can |
7 | | * obtain it at www.aomedia.org/license/software. If the Alliance for Open |
8 | | * Media Patent License 1.0 was not distributed with this source code in the |
9 | | * PATENTS file, you can obtain it at www.aomedia.org/license/patent. |
10 | | */ |
11 | | |
12 | | #include <math.h> |
13 | | |
14 | | #include "config/aom_config.h" |
15 | | #include "config/aom_dsp_rtcd.h" |
16 | | |
17 | | #include "aom_dsp/aom_dsp_common.h" |
18 | | #include "aom_mem/aom_mem.h" |
19 | | #include "aom_ports/mem.h" |
20 | | #include "av1/common/av1_common_int.h" |
21 | | #include "av1/common/av1_loopfilter.h" |
22 | | #include "av1/common/reconinter.h" |
23 | | #include "av1/common/seg_common.h" |
24 | | |
25 | | enum { |
26 | | USE_SINGLE, |
27 | | USE_DUAL, |
28 | | USE_QUAD, |
29 | | } UENUM1BYTE(USE_FILTER_TYPE); |
30 | | |
31 | | static const SEG_LVL_FEATURES seg_lvl_lf_lut[MAX_MB_PLANE][2] = { |
32 | | { SEG_LVL_ALT_LF_Y_V, SEG_LVL_ALT_LF_Y_H }, |
33 | | { SEG_LVL_ALT_LF_U, SEG_LVL_ALT_LF_U }, |
34 | | { SEG_LVL_ALT_LF_V, SEG_LVL_ALT_LF_V } |
35 | | }; |
36 | | |
37 | | static const int delta_lf_id_lut[MAX_MB_PLANE][2] = { { 0, 1 }, |
38 | | { 2, 2 }, |
39 | | { 3, 3 } }; |
40 | | |
41 | | static const int mode_lf_lut[] = { |
42 | | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // INTRA_MODES |
43 | | 1, 1, 0, 1, // INTER_MODES (GLOBALMV == 0) |
44 | | 1, 1, 1, 1, 1, 1, 0, 1 // INTER_COMPOUND_MODES (GLOBAL_GLOBALMV == 0) |
45 | | }; |
46 | | |
47 | 52.3k | static void update_sharpness(loop_filter_info_n *lfi, int sharpness_lvl) { |
48 | 52.3k | int lvl; |
49 | | |
50 | | // For each possible value for the loop filter fill out limits |
51 | 3.40M | for (lvl = 0; lvl <= MAX_LOOP_FILTER; lvl++) { |
52 | | // Set loop filter parameters that control sharpness. |
53 | 3.35M | int block_inside_limit = lvl >> ((sharpness_lvl > 0) + (sharpness_lvl > 4)); |
54 | | |
55 | 3.35M | if (sharpness_lvl > 0) { |
56 | 1.12M | if (block_inside_limit > (9 - sharpness_lvl)) |
57 | 864k | block_inside_limit = (9 - sharpness_lvl); |
58 | 1.12M | } |
59 | | |
60 | 3.35M | if (block_inside_limit < 1) block_inside_limit = 1; |
61 | | |
62 | 3.35M | memset(lfi->lfthr[lvl].lim, block_inside_limit, SIMD_WIDTH); |
63 | 3.35M | memset(lfi->lfthr[lvl].mblim, (2 * (lvl + 2) + block_inside_limit), |
64 | 3.35M | SIMD_WIDTH); |
65 | 3.35M | } |
66 | 52.3k | } |
67 | | |
68 | | uint8_t av1_get_filter_level(const AV1_COMMON *cm, |
69 | | const loop_filter_info_n *lfi_n, const int dir_idx, |
70 | 212M | int plane, const MB_MODE_INFO *mbmi) { |
71 | 212M | const int segment_id = mbmi->segment_id; |
72 | 212M | if (cm->delta_q_info.delta_lf_present_flag) { |
73 | 104M | int8_t delta_lf; |
74 | 104M | if (cm->delta_q_info.delta_lf_multi) { |
75 | 14.2M | const int delta_lf_idx = delta_lf_id_lut[plane][dir_idx]; |
76 | 14.2M | delta_lf = mbmi->delta_lf[delta_lf_idx]; |
77 | 90.1M | } else { |
78 | 90.1M | delta_lf = mbmi->delta_lf_from_base; |
79 | 90.1M | } |
80 | 104M | int base_level; |
81 | 104M | if (plane == 0) |
82 | 53.0M | base_level = cm->lf.filter_level[dir_idx]; |
83 | 51.3M | else if (plane == 1) |
84 | 49.4M | base_level = cm->lf.filter_level_u; |
85 | 1.89M | else |
86 | 1.89M | base_level = cm->lf.filter_level_v; |
87 | 104M | int lvl_seg = clamp(delta_lf + base_level, 0, MAX_LOOP_FILTER); |
88 | 104M | assert(plane >= 0 && plane <= 2); |
89 | 101M | const int seg_lf_feature_id = seg_lvl_lf_lut[plane][dir_idx]; |
90 | 101M | if (segfeature_active(&cm->seg, segment_id, seg_lf_feature_id)) { |
91 | 81.8M | const int data = get_segdata(&cm->seg, segment_id, seg_lf_feature_id); |
92 | 81.8M | lvl_seg = clamp(lvl_seg + data, 0, MAX_LOOP_FILTER); |
93 | 81.8M | } |
94 | | |
95 | 101M | if (cm->lf.mode_ref_delta_enabled) { |
96 | 49.2M | const int scale = 1 << (lvl_seg >> 5); |
97 | 49.2M | lvl_seg += cm->lf.ref_deltas[mbmi->ref_frame[0]] * scale; |
98 | 49.2M | if (mbmi->ref_frame[0] > INTRA_FRAME) |
99 | 8.97M | lvl_seg += cm->lf.mode_deltas[mode_lf_lut[mbmi->mode]] * scale; |
100 | 49.2M | lvl_seg = clamp(lvl_seg, 0, MAX_LOOP_FILTER); |
101 | 49.2M | } |
102 | 101M | return lvl_seg; |
103 | 107M | } else { |
104 | 107M | return lfi_n->lvl[plane][segment_id][dir_idx][mbmi->ref_frame[0]] |
105 | 107M | [mode_lf_lut[mbmi->mode]]; |
106 | 107M | } |
107 | 212M | } |
108 | | |
109 | 16.8k | void av1_loop_filter_init(AV1_COMMON *cm) { |
110 | 16.8k | assert(MB_MODE_COUNT == NELEMENTS(mode_lf_lut)); |
111 | 16.8k | loop_filter_info_n *lfi = &cm->lf_info; |
112 | 16.8k | struct loopfilter *lf = &cm->lf; |
113 | 16.8k | int lvl; |
114 | | |
115 | | // init limits for given sharpness |
116 | 16.8k | update_sharpness(lfi, lf->sharpness_level); |
117 | | |
118 | | // init hev threshold const vectors |
119 | 1.09M | for (lvl = 0; lvl <= MAX_LOOP_FILTER; lvl++) |
120 | 1.07M | memset(lfi->lfthr[lvl].hev_thr, (lvl >> 4), SIMD_WIDTH); |
121 | 16.8k | } |
122 | | |
123 | | // Update the loop filter for the current frame. |
124 | | // This should be called before loop_filter_rows(), |
125 | | // av1_loop_filter_frame() calls this function directly. |
126 | | void av1_loop_filter_frame_init(AV1_COMMON *cm, int plane_start, |
127 | 35.5k | int plane_end) { |
128 | 35.5k | int filt_lvl[MAX_MB_PLANE], filt_lvl_r[MAX_MB_PLANE]; |
129 | 35.5k | int plane; |
130 | 35.5k | int seg_id; |
131 | | // n_shift is the multiplier for lf_deltas |
132 | | // the multiplier is 1 for when filter_lvl is between 0 and 31; |
133 | | // 2 when filter_lvl is between 32 and 63 |
134 | 35.5k | loop_filter_info_n *const lfi = &cm->lf_info; |
135 | 35.5k | struct loopfilter *const lf = &cm->lf; |
136 | 35.5k | const struct segmentation *const seg = &cm->seg; |
137 | | |
138 | | // update sharpness limits |
139 | 35.5k | update_sharpness(lfi, lf->sharpness_level); |
140 | | |
141 | 35.5k | filt_lvl[0] = cm->lf.filter_level[0]; |
142 | 35.5k | filt_lvl[1] = cm->lf.filter_level_u; |
143 | 35.5k | filt_lvl[2] = cm->lf.filter_level_v; |
144 | | |
145 | 35.5k | filt_lvl_r[0] = cm->lf.filter_level[1]; |
146 | 35.5k | filt_lvl_r[1] = cm->lf.filter_level_u; |
147 | 35.5k | filt_lvl_r[2] = cm->lf.filter_level_v; |
148 | | |
149 | 35.5k | assert(plane_start >= AOM_PLANE_Y); |
150 | 35.5k | assert(plane_end <= MAX_MB_PLANE); |
151 | | |
152 | 138k | for (plane = plane_start; plane < plane_end; plane++) { |
153 | 102k | if (plane == 0 && !filt_lvl[0] && !filt_lvl_r[0]) |
154 | 0 | break; |
155 | 102k | else if (plane == 1 && !filt_lvl[1]) |
156 | 5.86k | continue; |
157 | 97.0k | else if (plane == 2 && !filt_lvl[2]) |
158 | 10.8k | continue; |
159 | | |
160 | 775k | for (seg_id = 0; seg_id < MAX_SEGMENTS; seg_id++) { |
161 | 2.06M | for (int dir = 0; dir < 2; ++dir) { |
162 | 1.37M | int lvl_seg = (dir == 0) ? filt_lvl[plane] : filt_lvl_r[plane]; |
163 | 1.37M | const int seg_lf_feature_id = seg_lvl_lf_lut[plane][dir]; |
164 | 1.37M | if (segfeature_active(seg, seg_id, seg_lf_feature_id)) { |
165 | 134k | const int data = get_segdata(&cm->seg, seg_id, seg_lf_feature_id); |
166 | 134k | lvl_seg = clamp(lvl_seg + data, 0, MAX_LOOP_FILTER); |
167 | 134k | } |
168 | | |
169 | 1.37M | if (!lf->mode_ref_delta_enabled) { |
170 | | // we could get rid of this if we assume that deltas are set to |
171 | | // zero when not in use; encoder always uses deltas |
172 | 700k | memset(lfi->lvl[plane][seg_id][dir], lvl_seg, |
173 | 700k | sizeof(lfi->lvl[plane][seg_id][dir])); |
174 | 700k | } else { |
175 | 677k | int ref, mode; |
176 | 677k | const int scale = 1 << (lvl_seg >> 5); |
177 | 677k | const int intra_lvl = lvl_seg + lf->ref_deltas[INTRA_FRAME] * scale; |
178 | 677k | lfi->lvl[plane][seg_id][dir][INTRA_FRAME][0] = |
179 | 677k | clamp(intra_lvl, 0, MAX_LOOP_FILTER); |
180 | | |
181 | 5.41M | for (ref = LAST_FRAME; ref < REF_FRAMES; ++ref) { |
182 | 14.2M | for (mode = 0; mode < MAX_MODE_LF_DELTAS; ++mode) { |
183 | 9.47M | const int inter_lvl = lvl_seg + lf->ref_deltas[ref] * scale + |
184 | 9.47M | lf->mode_deltas[mode] * scale; |
185 | 9.47M | lfi->lvl[plane][seg_id][dir][ref][mode] = |
186 | 9.47M | clamp(inter_lvl, 0, MAX_LOOP_FILTER); |
187 | 9.47M | } |
188 | 4.73M | } |
189 | 677k | } |
190 | 1.37M | } |
191 | 688k | } |
192 | 86.1k | } |
193 | 35.5k | } |
194 | | |
195 | | static AOM_FORCE_INLINE TX_SIZE |
196 | | get_transform_size(const MACROBLOCKD *const xd, const MB_MODE_INFO *const mbmi, |
197 | | const int mi_row, const int mi_col, const int plane, |
198 | 165M | const int ss_x, const int ss_y) { |
199 | 165M | assert(mbmi != NULL); |
200 | 183M | if (xd && xd->lossless[mbmi->segment_id]) return TX_4X4; |
201 | | |
202 | 74.0M | TX_SIZE tx_size = (plane == AOM_PLANE_Y) |
203 | 74.0M | ? mbmi->tx_size |
204 | 74.0M | : av1_get_max_uv_txsize(mbmi->bsize, ss_x, ss_y); |
205 | 74.0M | assert(tx_size < TX_SIZES_ALL); |
206 | 105M | if ((plane == AOM_PLANE_Y) && is_inter_block(mbmi) && !mbmi->skip_txfm) { |
207 | 5.61M | const BLOCK_SIZE sb_type = mbmi->bsize; |
208 | 5.61M | const int blk_row = mi_row & (mi_size_high[sb_type] - 1); |
209 | 5.61M | const int blk_col = mi_col & (mi_size_wide[sb_type] - 1); |
210 | 5.61M | const TX_SIZE mb_tx_size = |
211 | 5.61M | mbmi->inter_tx_size[av1_get_txb_size_index(sb_type, blk_row, blk_col)]; |
212 | 5.61M | assert(mb_tx_size < TX_SIZES_ALL); |
213 | 5.67M | tx_size = mb_tx_size; |
214 | 5.67M | } |
215 | | |
216 | 105M | return tx_size; |
217 | 105M | } |
218 | | |
219 | | static const int tx_dim_to_filter_length[TX_SIZES] = { 4, 8, 14, 14, 14 }; |
220 | | |
221 | | // Return TX_SIZE from get_transform_size(), so it is plane and direction |
222 | | // aware |
223 | | static TX_SIZE set_lpf_parameters( |
224 | | AV1_DEBLOCKING_PARAMETERS *const params, const ptrdiff_t mode_step, |
225 | | const AV1_COMMON *const cm, const MACROBLOCKD *const xd, |
226 | | const EDGE_DIR edge_dir, const uint32_t x, const uint32_t y, |
227 | 122M | const int plane, const struct macroblockd_plane *const plane_ptr) { |
228 | | // reset to initial values |
229 | 122M | params->filter_length = 0; |
230 | | |
231 | | // no deblocking is required |
232 | 122M | const uint32_t width = plane_ptr->dst.width; |
233 | 122M | const uint32_t height = plane_ptr->dst.height; |
234 | 125M | if ((width <= x) || (height <= y)) { |
235 | | // just return the smallest transform unit size |
236 | 2.37M | return TX_4X4; |
237 | 2.37M | } |
238 | | |
239 | 119M | const uint32_t scale_horz = plane_ptr->subsampling_x; |
240 | 119M | const uint32_t scale_vert = plane_ptr->subsampling_y; |
241 | | // for sub8x8 block, chroma prediction mode is obtained from the bottom/right |
242 | | // mi structure of the co-located 8x8 luma block. so for chroma plane, mi_row |
243 | | // and mi_col should map to the bottom/right mi structure, i.e, both mi_row |
244 | | // and mi_col should be odd number for chroma plane. |
245 | 119M | const int mi_row = scale_vert | ((y << scale_vert) >> MI_SIZE_LOG2); |
246 | 119M | const int mi_col = scale_horz | ((x << scale_horz) >> MI_SIZE_LOG2); |
247 | 119M | MB_MODE_INFO **mi = |
248 | 119M | cm->mi_params.mi_grid_base + mi_row * cm->mi_params.mi_stride + mi_col; |
249 | 119M | const MB_MODE_INFO *mbmi = mi[0]; |
250 | | // If current mbmi is not correctly setup, return an invalid value to stop |
251 | | // filtering. One example is that if this tile is not coded, then its mbmi |
252 | | // it not set up. |
253 | 119M | if (mbmi == NULL) return TX_INVALID; |
254 | | |
255 | 119M | const TX_SIZE ts = get_transform_size(xd, mi[0], mi_row, mi_col, plane, |
256 | 119M | scale_horz, scale_vert); |
257 | | |
258 | 119M | { |
259 | 119M | const uint32_t coord = (VERT_EDGE == edge_dir) ? (x) : (y); |
260 | 119M | const uint32_t transform_masks = |
261 | 119M | edge_dir == VERT_EDGE ? tx_size_wide[ts] - 1 : tx_size_high[ts] - 1; |
262 | 119M | const int32_t tu_edge = (coord & transform_masks) ? (0) : (1); |
263 | | |
264 | 119M | if (!tu_edge) return ts; |
265 | | |
266 | | // prepare outer edge parameters. deblock the edge if it's an edge of a TU |
267 | 119M | { |
268 | 119M | const uint32_t curr_level = |
269 | 119M | av1_get_filter_level(cm, &cm->lf_info, edge_dir, plane, mbmi); |
270 | 119M | const int curr_skipped = mbmi->skip_txfm && is_inter_block(mbmi); |
271 | 119M | uint32_t level = curr_level; |
272 | 131M | if (coord) { |
273 | 131M | { |
274 | 131M | const MB_MODE_INFO *const mi_prev = *(mi - mode_step); |
275 | 131M | if (mi_prev == NULL) return TX_INVALID; |
276 | 131M | const int pv_row = |
277 | 131M | (VERT_EDGE == edge_dir) ? (mi_row) : (mi_row - (1 << scale_vert)); |
278 | 131M | const int pv_col = |
279 | 131M | (VERT_EDGE == edge_dir) ? (mi_col - (1 << scale_horz)) : (mi_col); |
280 | 131M | const TX_SIZE pv_ts = get_transform_size( |
281 | 131M | xd, mi_prev, pv_row, pv_col, plane, scale_horz, scale_vert); |
282 | | |
283 | 131M | const uint32_t pv_lvl = |
284 | 131M | av1_get_filter_level(cm, &cm->lf_info, edge_dir, plane, mi_prev); |
285 | | |
286 | 131M | const int pv_skip_txfm = |
287 | 131M | mi_prev->skip_txfm && is_inter_block(mi_prev); |
288 | 131M | const BLOCK_SIZE bsize = get_plane_block_size( |
289 | 131M | mbmi->bsize, plane_ptr->subsampling_x, plane_ptr->subsampling_y); |
290 | 131M | assert(bsize < BLOCK_SIZES_ALL); |
291 | 136M | const int prediction_masks = edge_dir == VERT_EDGE |
292 | 136M | ? block_size_wide[bsize] - 1 |
293 | 136M | : block_size_high[bsize] - 1; |
294 | 136M | const int32_t pu_edge = !(coord & prediction_masks); |
295 | | // if the current and the previous blocks are skipped, |
296 | | // deblock the edge if the edge belongs to a PU's edge only. |
297 | 136M | if ((curr_level || pv_lvl) && |
298 | 136M | (!pv_skip_txfm || !curr_skipped || pu_edge)) { |
299 | 125M | const int dim = (VERT_EDGE == edge_dir) |
300 | 125M | ? AOMMIN(tx_size_wide_unit_log2[ts], |
301 | 125M | tx_size_wide_unit_log2[pv_ts]) |
302 | 125M | : AOMMIN(tx_size_high_unit_log2[ts], |
303 | 125M | tx_size_high_unit_log2[pv_ts]); |
304 | 125M | if (plane) { |
305 | 66.1M | params->filter_length = (dim == 0) ? 4 : 6; |
306 | 66.1M | } else { |
307 | 59.1M | assert(dim < TX_SIZES); |
308 | 67.5M | assert(dim >= 0); |
309 | 67.5M | params->filter_length = tx_dim_to_filter_length[dim]; |
310 | 67.5M | } |
311 | | |
312 | | // update the level if the current block is skipped, |
313 | | // but the previous one is not |
314 | 133M | level = (curr_level) ? (curr_level) : (pv_lvl); |
315 | 133M | } |
316 | 136M | } |
317 | 136M | } |
318 | | // prepare common parameters |
319 | 133M | if (params->filter_length) { |
320 | 120M | const loop_filter_thresh *const limits = cm->lf_info.lfthr + level; |
321 | 120M | params->lfthr = limits; |
322 | 120M | } |
323 | 133M | } |
324 | 133M | } |
325 | | |
326 | 0 | return ts; |
327 | 119M | } |
328 | | |
329 | | static const uint32_t vert_filter_length_luma[TX_SIZES_ALL][TX_SIZES_ALL] = { |
330 | | // TX_4X4 |
331 | | { |
332 | | 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, |
333 | | }, |
334 | | // TX_8X8 |
335 | | { |
336 | | 4, 8, 8, 8, 8, 4, 8, 8, 8, 8, 8, 8, 8, 4, 8, 8, 8, 8, 8, |
337 | | }, |
338 | | // TX_16X16 |
339 | | { |
340 | | 4, 8, 14, 14, 14, 4, 8, 8, 14, 14, 14, 14, 14, 4, 14, 8, 14, 14, 14, |
341 | | }, |
342 | | // TX_32X32 |
343 | | { |
344 | | 4, 8, 14, 14, 14, 4, 8, 8, 14, 14, 14, 14, 14, 4, 14, 8, 14, 14, 14, |
345 | | }, |
346 | | // TX_64X64 |
347 | | { |
348 | | 4, 8, 14, 14, 14, 4, 8, 8, 14, 14, 14, 14, 14, 4, 14, 8, 14, 14, 14, |
349 | | }, |
350 | | // TX_4X8 |
351 | | { |
352 | | 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, |
353 | | }, |
354 | | // TX_8X4 |
355 | | { |
356 | | 4, 8, 8, 8, 8, 4, 8, 8, 8, 8, 8, 8, 8, 4, 8, 8, 8, 8, 8, |
357 | | }, |
358 | | // TX_8X16 |
359 | | { |
360 | | 4, 8, 8, 8, 8, 4, 8, 8, 8, 8, 8, 8, 8, 4, 8, 8, 8, 8, 8, |
361 | | }, |
362 | | // TX_16X8 |
363 | | { |
364 | | 4, 8, 14, 14, 14, 4, 8, 8, 14, 14, 14, 14, 14, 4, 14, 8, 14, 14, 14, |
365 | | }, |
366 | | // TX_16X32 |
367 | | { |
368 | | 4, 8, 14, 14, 14, 4, 8, 8, 14, 14, 14, 14, 14, 4, 14, 8, 14, 14, 14, |
369 | | }, |
370 | | // TX_32X16 |
371 | | { |
372 | | 4, 8, 14, 14, 14, 4, 8, 8, 14, 14, 14, 14, 14, 4, 14, 8, 14, 14, 14, |
373 | | }, |
374 | | // TX_32X64 |
375 | | { |
376 | | 4, 8, 14, 14, 14, 4, 8, 8, 14, 14, 14, 14, 14, 4, 14, 8, 14, 14, 14, |
377 | | }, |
378 | | // TX_64X32 |
379 | | { |
380 | | 4, 8, 14, 14, 14, 4, 8, 8, 14, 14, 14, 14, 14, 4, 14, 8, 14, 14, 14, |
381 | | }, |
382 | | // TX_4X16 |
383 | | { |
384 | | 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, |
385 | | }, |
386 | | // TX_16X4 |
387 | | { |
388 | | 4, 8, 14, 14, 14, 4, 8, 8, 14, 14, 14, 14, 14, 4, 14, 8, 14, 14, 14, |
389 | | }, |
390 | | // TX_8X32 |
391 | | { |
392 | | 4, 8, 8, 8, 8, 4, 8, 8, 8, 8, 8, 8, 8, 4, 8, 8, 8, 8, 8, |
393 | | }, |
394 | | // TX_32X8 |
395 | | { |
396 | | 4, 8, 14, 14, 14, 4, 8, 8, 14, 14, 14, 14, 14, 4, 14, 8, 14, 14, 14, |
397 | | }, |
398 | | // TX_16X64 |
399 | | { |
400 | | 4, 8, 14, 14, 14, 4, 8, 8, 14, 14, 14, 14, 14, 4, 14, 8, 14, 14, 14, |
401 | | }, |
402 | | // TX_64X16 |
403 | | { |
404 | | 4, 8, 14, 14, 14, 4, 8, 8, 14, 14, 14, 14, 14, 4, 14, 8, 14, 14, 14, |
405 | | }, |
406 | | }; |
407 | | |
408 | | static const uint32_t horz_filter_length_luma[TX_SIZES_ALL][TX_SIZES_ALL] = { |
409 | | // TX_4X4 |
410 | | { |
411 | | 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, |
412 | | }, |
413 | | // TX_8X8 |
414 | | { |
415 | | 4, 8, 8, 8, 8, 8, 4, 8, 8, 8, 8, 8, 8, 8, 4, 8, 8, 8, 8, |
416 | | }, |
417 | | // TX_16X16 |
418 | | { |
419 | | 4, 8, 14, 14, 14, 8, 4, 14, 8, 14, 14, 14, 14, 14, 4, 14, 8, 14, 14, |
420 | | }, |
421 | | // TX_32X32 |
422 | | { |
423 | | 4, 8, 14, 14, 14, 8, 4, 14, 8, 14, 14, 14, 14, 14, 4, 14, 8, 14, 14, |
424 | | }, |
425 | | // TX_64X64 |
426 | | { |
427 | | 4, 8, 14, 14, 14, 8, 4, 14, 8, 14, 14, 14, 14, 14, 4, 14, 8, 14, 14, |
428 | | }, |
429 | | // TX_4X8 |
430 | | { |
431 | | 4, 8, 8, 8, 8, 8, 4, 8, 8, 8, 8, 8, 8, 8, 4, 8, 8, 8, 8, |
432 | | }, |
433 | | // TX_8X4 |
434 | | { |
435 | | 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, |
436 | | }, |
437 | | // TX_8X16 |
438 | | { |
439 | | 4, 8, 14, 14, 14, 8, 4, 14, 8, 14, 14, 14, 14, 14, 4, 14, 8, 14, 14, |
440 | | }, |
441 | | // TX_16X8 |
442 | | { |
443 | | 4, 8, 8, 8, 8, 8, 4, 8, 8, 8, 8, 8, 8, 8, 4, 8, 8, 8, 8, |
444 | | }, |
445 | | // TX_16X32 |
446 | | { |
447 | | 4, 8, 14, 14, 14, 8, 4, 14, 8, 14, 14, 14, 14, 14, 4, 14, 8, 14, 14, |
448 | | }, |
449 | | // TX_32X16 |
450 | | { |
451 | | 4, 8, 14, 14, 14, 8, 4, 14, 8, 14, 14, 14, 14, 14, 4, 14, 8, 14, 14, |
452 | | }, |
453 | | // TX_32X64 |
454 | | { |
455 | | 4, 8, 14, 14, 14, 8, 4, 14, 8, 14, 14, 14, 14, 14, 4, 14, 8, 14, 14, |
456 | | }, |
457 | | // TX_64X32 |
458 | | { |
459 | | 4, 8, 14, 14, 14, 8, 4, 14, 8, 14, 14, 14, 14, 14, 4, 14, 8, 14, 14, |
460 | | }, |
461 | | // TX_4X16 |
462 | | { |
463 | | 4, 8, 14, 14, 14, 8, 4, 14, 8, 14, 14, 14, 14, 14, 4, 14, 8, 14, 14, |
464 | | }, |
465 | | // TX_16X4 |
466 | | { |
467 | | 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, |
468 | | }, |
469 | | // TX_8X32 |
470 | | { |
471 | | 4, 8, 14, 14, 14, 8, 4, 14, 8, 14, 14, 14, 14, 14, 4, 14, 8, 14, 14, |
472 | | }, |
473 | | // TX_32X8 |
474 | | { |
475 | | 4, 8, 8, 8, 8, 8, 4, 8, 8, 8, 8, 8, 8, 8, 4, 8, 8, 8, 8, |
476 | | }, |
477 | | // TX_16X64 |
478 | | { |
479 | | 4, 8, 14, 14, 14, 8, 4, 14, 8, 14, 14, 14, 14, 14, 4, 14, 8, 14, 14, |
480 | | }, |
481 | | // TX_64X16 |
482 | | { |
483 | | 4, 8, 14, 14, 14, 8, 4, 14, 8, 14, 14, 14, 14, 14, 4, 14, 8, 14, 14, |
484 | | }, |
485 | | }; |
486 | | |
487 | | static const uint32_t vert_filter_length_chroma[TX_SIZES_ALL][TX_SIZES_ALL] = { |
488 | | // TX_4X4 |
489 | | { |
490 | | 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, |
491 | | }, |
492 | | // TX_8X8 |
493 | | { |
494 | | 4, 6, 6, 6, 6, 4, 6, 6, 6, 6, 6, 6, 6, 4, 6, 6, 6, 6, 6, |
495 | | }, |
496 | | // TX_16X16 |
497 | | { |
498 | | 4, 6, 6, 6, 6, 4, 6, 6, 6, 6, 6, 6, 6, 4, 6, 6, 6, 6, 6, |
499 | | }, |
500 | | // TX_32X32 |
501 | | { |
502 | | 4, 6, 6, 6, 6, 4, 6, 6, 6, 6, 6, 6, 6, 4, 6, 6, 6, 6, 6, |
503 | | }, |
504 | | // TX_64X64 |
505 | | { |
506 | | 4, 6, 6, 6, 6, 4, 6, 6, 6, 6, 6, 6, 6, 4, 6, 6, 6, 6, 6, |
507 | | }, |
508 | | // TX_4X8 |
509 | | { |
510 | | 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, |
511 | | }, |
512 | | // TX_8X4 |
513 | | { |
514 | | 4, 6, 6, 6, 6, 4, 6, 6, 6, 6, 6, 6, 6, 4, 6, 6, 6, 6, 6, |
515 | | }, |
516 | | // TX_8X16 |
517 | | { |
518 | | 4, 6, 6, 6, 6, 4, 6, 6, 6, 6, 6, 6, 6, 4, 6, 6, 6, 6, 6, |
519 | | }, |
520 | | // TX_16X8 |
521 | | { |
522 | | 4, 6, 6, 6, 6, 4, 6, 6, 6, 6, 6, 6, 6, 4, 6, 6, 6, 6, 6, |
523 | | }, |
524 | | // TX_16X32 |
525 | | { |
526 | | 4, 6, 6, 6, 6, 4, 6, 6, 6, 6, 6, 6, 6, 4, 6, 6, 6, 6, 6, |
527 | | }, |
528 | | // TX_32X16 |
529 | | { |
530 | | 4, 6, 6, 6, 6, 4, 6, 6, 6, 6, 6, 6, 6, 4, 6, 6, 6, 6, 6, |
531 | | }, |
532 | | // TX_32X64 |
533 | | { |
534 | | 4, 6, 6, 6, 6, 4, 6, 6, 6, 6, 6, 6, 6, 4, 6, 6, 6, 6, 6, |
535 | | }, |
536 | | // TX_64X32 |
537 | | { |
538 | | 4, 6, 6, 6, 6, 4, 6, 6, 6, 6, 6, 6, 6, 4, 6, 6, 6, 6, 6, |
539 | | }, |
540 | | // TX_4X16 |
541 | | { |
542 | | 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, |
543 | | }, |
544 | | // TX_16X4 |
545 | | { |
546 | | 4, 6, 6, 6, 6, 4, 6, 6, 6, 6, 6, 6, 6, 4, 6, 6, 6, 6, 6, |
547 | | }, |
548 | | // TX_8X32 |
549 | | { |
550 | | 4, 6, 6, 6, 6, 4, 6, 6, 6, 6, 6, 6, 6, 4, 6, 6, 6, 6, 6, |
551 | | }, |
552 | | // TX_32X8 |
553 | | { |
554 | | 4, 6, 6, 6, 6, 4, 6, 6, 6, 6, 6, 6, 6, 4, 6, 6, 6, 6, 6, |
555 | | }, |
556 | | // TX_16X64 |
557 | | { |
558 | | 4, 6, 6, 6, 6, 4, 6, 6, 6, 6, 6, 6, 6, 4, 6, 6, 6, 6, 6, |
559 | | }, |
560 | | // TX_64X16 |
561 | | { |
562 | | 4, 6, 6, 6, 6, 4, 6, 6, 6, 6, 6, 6, 6, 4, 6, 6, 6, 6, 6, |
563 | | }, |
564 | | }; |
565 | | |
566 | | static const uint32_t horz_filter_length_chroma[TX_SIZES_ALL][TX_SIZES_ALL] = { |
567 | | // TX_4X4 |
568 | | { |
569 | | 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, |
570 | | }, |
571 | | // TX_8X8 |
572 | | { |
573 | | 4, 6, 6, 6, 6, 6, 4, 6, 6, 6, 6, 6, 6, 6, 4, 6, 6, 6, 6, |
574 | | }, |
575 | | // TX_16X16 |
576 | | { |
577 | | 4, 6, 6, 6, 6, 6, 4, 6, 6, 6, 6, 6, 6, 6, 4, 6, 6, 6, 6, |
578 | | }, |
579 | | // TX_32X32 |
580 | | { |
581 | | 4, 6, 6, 6, 6, 6, 4, 6, 6, 6, 6, 6, 6, 6, 4, 6, 6, 6, 6, |
582 | | }, |
583 | | // TX_64X64 |
584 | | { |
585 | | 4, 6, 6, 6, 6, 6, 4, 6, 6, 6, 6, 6, 6, 6, 4, 6, 6, 6, 6, |
586 | | }, |
587 | | // TX_4X8 |
588 | | { |
589 | | 4, 6, 6, 6, 6, 6, 4, 6, 6, 6, 6, 6, 6, 6, 4, 6, 6, 6, 6, |
590 | | }, |
591 | | // TX_8X4 |
592 | | { |
593 | | 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, |
594 | | }, |
595 | | // TX_8X16 |
596 | | { |
597 | | 4, 6, 6, 6, 6, 6, 4, 6, 6, 6, 6, 6, 6, 6, 4, 6, 6, 6, 6, |
598 | | }, |
599 | | // TX_16X8 |
600 | | { |
601 | | 4, 6, 6, 6, 6, 6, 4, 6, 6, 6, 6, 6, 6, 6, 4, 6, 6, 6, 6, |
602 | | }, |
603 | | // TX_16X32 |
604 | | { |
605 | | 4, 6, 6, 6, 6, 6, 4, 6, 6, 6, 6, 6, 6, 6, 4, 6, 6, 6, 6, |
606 | | }, |
607 | | // TX_32X16 |
608 | | { |
609 | | 4, 6, 6, 6, 6, 6, 4, 6, 6, 6, 6, 6, 6, 6, 4, 6, 6, 6, 6, |
610 | | }, |
611 | | // TX_32X64 |
612 | | { |
613 | | 4, 6, 6, 6, 6, 6, 4, 6, 6, 6, 6, 6, 6, 6, 4, 6, 6, 6, 6, |
614 | | }, |
615 | | // TX_64X32 |
616 | | { |
617 | | 4, 6, 6, 6, 6, 6, 4, 6, 6, 6, 6, 6, 6, 6, 4, 6, 6, 6, 6, |
618 | | }, |
619 | | // TX_4X16 |
620 | | { |
621 | | 4, 6, 6, 6, 6, 6, 4, 6, 6, 6, 6, 6, 6, 6, 4, 6, 6, 6, 6, |
622 | | }, |
623 | | // TX_16X4 |
624 | | { |
625 | | 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, |
626 | | }, |
627 | | // TX_8X32 |
628 | | { |
629 | | 4, 6, 6, 6, 6, 6, 4, 6, 6, 6, 6, 6, 6, 6, 4, 6, 6, 6, 6, |
630 | | }, |
631 | | // TX_32X8 |
632 | | { |
633 | | 4, 6, 6, 6, 6, 6, 4, 6, 6, 6, 6, 6, 6, 6, 4, 6, 6, 6, 6, |
634 | | }, |
635 | | // TX_16X64 |
636 | | { |
637 | | 4, 6, 6, 6, 6, 6, 4, 6, 6, 6, 6, 6, 6, 6, 4, 6, 6, 6, 6, |
638 | | }, |
639 | | // TX_64X16 |
640 | | { |
641 | | 4, 6, 6, 6, 6, 6, 4, 6, 6, 6, 6, 6, 6, 6, 4, 6, 6, 6, 6, |
642 | | }, |
643 | | }; |
644 | | |
645 | | static AOM_FORCE_INLINE void set_one_param_for_line_luma( |
646 | | AV1_DEBLOCKING_PARAMETERS *const params, TX_SIZE *tx_size, |
647 | | const AV1_COMMON *const cm, const MACROBLOCKD *const xd, |
648 | | const EDGE_DIR edge_dir, uint32_t mi_col, uint32_t mi_row, |
649 | | const struct macroblockd_plane *const plane_ptr, int coord, |
650 | | bool is_first_block, TX_SIZE prev_tx_size, const ptrdiff_t mode_step, |
651 | 0 | int *min_dim) { |
652 | 0 | (void)plane_ptr; |
653 | 0 | assert(mi_col << MI_SIZE_LOG2 < (uint32_t)plane_ptr->dst.width && |
654 | 0 | mi_row << MI_SIZE_LOG2 < (uint32_t)plane_ptr->dst.height); |
655 | 0 | const int is_vert = edge_dir == VERT_EDGE; |
656 | | // reset to initial values |
657 | 0 | params->filter_length = 0; |
658 | |
|
659 | 0 | MB_MODE_INFO **mi = |
660 | 0 | cm->mi_params.mi_grid_base + mi_row * cm->mi_params.mi_stride + mi_col; |
661 | 0 | const MB_MODE_INFO *mbmi = mi[0]; |
662 | 0 | assert(mbmi); |
663 | | |
664 | 0 | const TX_SIZE ts = |
665 | 0 | get_transform_size(xd, mi[0], mi_row, mi_col, AOM_PLANE_Y, 0, 0); |
666 | |
|
667 | 0 | #ifndef NDEBUG |
668 | 0 | const uint32_t transform_masks = |
669 | 0 | is_vert ? tx_size_wide[ts] - 1 : tx_size_high[ts] - 1; |
670 | 0 | const int32_t tu_edge = ((coord * MI_SIZE) & transform_masks) ? (0) : (1); |
671 | 0 | assert(tu_edge); |
672 | 0 | #endif // NDEBUG |
673 | | // If we are not the first block, then coord is always true, so |
674 | | // !is_first_block is technically redundant. But we are keeping it here so the |
675 | | // compiler can compile away this conditional if we pass in is_first_block := |
676 | | // false |
677 | 0 | bool curr_skipped = false; |
678 | 0 | if (!is_first_block || coord) { |
679 | 0 | const MB_MODE_INFO *const mi_prev = *(mi - mode_step); |
680 | 0 | const int pv_row = is_vert ? mi_row : (mi_row - 1); |
681 | 0 | const int pv_col = is_vert ? (mi_col - 1) : mi_col; |
682 | 0 | const TX_SIZE pv_ts = |
683 | 0 | is_first_block |
684 | 0 | ? get_transform_size(xd, mi_prev, pv_row, pv_col, AOM_PLANE_Y, 0, 0) |
685 | 0 | : prev_tx_size; |
686 | 0 | if (is_first_block) { |
687 | 0 | *min_dim = is_vert ? block_size_high[mi_prev->bsize] |
688 | 0 | : block_size_wide[mi_prev->bsize]; |
689 | 0 | } |
690 | 0 | assert(mi_prev); |
691 | 0 | uint8_t level = |
692 | 0 | av1_get_filter_level(cm, &cm->lf_info, edge_dir, AOM_PLANE_Y, mbmi); |
693 | 0 | if (!level) { |
694 | 0 | level = av1_get_filter_level(cm, &cm->lf_info, edge_dir, AOM_PLANE_Y, |
695 | 0 | mi_prev); |
696 | 0 | } |
697 | |
|
698 | 0 | const int32_t pu_edge = mi_prev != mbmi; |
699 | | |
700 | | // The quad loop filter assumes that all the transform blocks within a |
701 | | // 8x16/16x8/16x16 prediction block are of the same size. |
702 | 0 | assert(IMPLIES( |
703 | 0 | !pu_edge && (mbmi->bsize >= BLOCK_8X16 && mbmi->bsize <= BLOCK_16X16), |
704 | 0 | pv_ts == ts)); |
705 | | |
706 | 0 | if (!pu_edge) { |
707 | 0 | curr_skipped = mbmi->skip_txfm && is_inter_block(mbmi); |
708 | 0 | } |
709 | 0 | if ((pu_edge || !curr_skipped) && level) { |
710 | 0 | params->filter_length = is_vert ? vert_filter_length_luma[ts][pv_ts] |
711 | 0 | : horz_filter_length_luma[ts][pv_ts]; |
712 | | |
713 | | // prepare common parameters |
714 | 0 | const loop_filter_thresh *const limits = cm->lf_info.lfthr + level; |
715 | 0 | params->lfthr = limits; |
716 | 0 | } |
717 | 0 | } |
718 | 0 | const int block_dim = |
719 | 0 | is_vert ? block_size_high[mbmi->bsize] : block_size_wide[mbmi->bsize]; |
720 | 0 | *min_dim = AOMMIN(*min_dim, block_dim); |
721 | |
|
722 | 0 | *tx_size = ts; |
723 | 0 | } |
724 | | |
725 | | // Similar to set_lpf_parameters, but does so one row/col at a time to reduce |
726 | | // calls to \ref get_transform_size and \ref av1_get_filter_level |
727 | | static AOM_FORCE_INLINE void set_lpf_parameters_for_line_luma( |
728 | | AV1_DEBLOCKING_PARAMETERS *const params_buf, TX_SIZE *tx_buf, |
729 | | const AV1_COMMON *const cm, const MACROBLOCKD *const xd, |
730 | | const EDGE_DIR edge_dir, uint32_t mi_col, uint32_t mi_row, |
731 | | const struct macroblockd_plane *const plane_ptr, const uint32_t mi_range, |
732 | 0 | const ptrdiff_t mode_step, int *min_dim) { |
733 | 0 | const int is_vert = edge_dir == VERT_EDGE; |
734 | |
|
735 | 0 | AV1_DEBLOCKING_PARAMETERS *params = params_buf; |
736 | 0 | TX_SIZE *tx_size = tx_buf; |
737 | 0 | uint32_t *counter_ptr = is_vert ? &mi_col : &mi_row; |
738 | 0 | TX_SIZE prev_tx_size = TX_INVALID; |
739 | | |
740 | | // Unroll the first iteration of the loop |
741 | 0 | set_one_param_for_line_luma(params, tx_size, cm, xd, edge_dir, mi_col, mi_row, |
742 | 0 | plane_ptr, *counter_ptr, true, prev_tx_size, |
743 | 0 | mode_step, min_dim); |
744 | | |
745 | | // Advance |
746 | 0 | int advance_units = |
747 | 0 | is_vert ? tx_size_wide_unit[*tx_size] : tx_size_high_unit[*tx_size]; |
748 | 0 | prev_tx_size = *tx_size; |
749 | 0 | *counter_ptr += advance_units; |
750 | 0 | params += advance_units; |
751 | 0 | tx_size += advance_units; |
752 | |
|
753 | 0 | while (*counter_ptr < mi_range) { |
754 | 0 | set_one_param_for_line_luma(params, tx_size, cm, xd, edge_dir, mi_col, |
755 | 0 | mi_row, plane_ptr, *counter_ptr, false, |
756 | 0 | prev_tx_size, mode_step, min_dim); |
757 | | |
758 | | // Advance |
759 | 0 | advance_units = |
760 | 0 | is_vert ? tx_size_wide_unit[*tx_size] : tx_size_high_unit[*tx_size]; |
761 | 0 | prev_tx_size = *tx_size; |
762 | 0 | *counter_ptr += advance_units; |
763 | 0 | params += advance_units; |
764 | 0 | tx_size += advance_units; |
765 | 0 | } |
766 | 0 | } |
767 | | |
768 | | static AOM_FORCE_INLINE void set_one_param_for_line_chroma( |
769 | | AV1_DEBLOCKING_PARAMETERS *const params, TX_SIZE *tx_size, |
770 | | const AV1_COMMON *const cm, const MACROBLOCKD *const xd, |
771 | | const EDGE_DIR edge_dir, uint32_t mi_col, uint32_t mi_row, int coord, |
772 | | bool is_first_block, TX_SIZE prev_tx_size, |
773 | | const struct macroblockd_plane *const plane_ptr, const ptrdiff_t mode_step, |
774 | | const int scale_horz, const int scale_vert, int *min_dim, int plane, |
775 | 0 | int joint_filter_chroma) { |
776 | 0 | const int is_vert = edge_dir == VERT_EDGE; |
777 | 0 | (void)plane_ptr; |
778 | 0 | assert((mi_col << MI_SIZE_LOG2) < |
779 | 0 | (uint32_t)(plane_ptr->dst.width << scale_horz) && |
780 | 0 | (mi_row << MI_SIZE_LOG2) < |
781 | 0 | (uint32_t)(plane_ptr->dst.height << scale_vert)); |
782 | | // reset to initial values |
783 | 0 | params->filter_length = 0; |
784 | | |
785 | | // for sub8x8 block, chroma prediction mode is obtained from the |
786 | | // bottom/right mi structure of the co-located 8x8 luma block. so for chroma |
787 | | // plane, mi_row and mi_col should map to the bottom/right mi structure, |
788 | | // i.e, both mi_row and mi_col should be odd number for chroma plane. |
789 | 0 | mi_row |= scale_vert; |
790 | 0 | mi_col |= scale_horz; |
791 | 0 | MB_MODE_INFO **mi = |
792 | 0 | cm->mi_params.mi_grid_base + mi_row * cm->mi_params.mi_stride + mi_col; |
793 | 0 | const MB_MODE_INFO *mbmi = mi[0]; |
794 | 0 | assert(mbmi); |
795 | | |
796 | 0 | const TX_SIZE ts = get_transform_size(xd, mi[0], mi_row, mi_col, plane, |
797 | 0 | scale_horz, scale_vert); |
798 | 0 | *tx_size = ts; |
799 | |
|
800 | 0 | #ifndef NDEBUG |
801 | 0 | const uint32_t transform_masks = |
802 | 0 | is_vert ? tx_size_wide[ts] - 1 : tx_size_high[ts] - 1; |
803 | 0 | const int32_t tu_edge = ((coord * MI_SIZE) & transform_masks) ? (0) : (1); |
804 | 0 | assert(tu_edge); |
805 | 0 | #endif // NDEBUG |
806 | | |
807 | | // If we are not the first block, then coord is always true, so |
808 | | // !is_first_block is technically redundant. But we are keeping it here so the |
809 | | // compiler can compile away this conditional if we pass in is_first_block := |
810 | | // false |
811 | 0 | bool curr_skipped = false; |
812 | 0 | if (!is_first_block || coord) { |
813 | 0 | const MB_MODE_INFO *const mi_prev = *(mi - mode_step); |
814 | 0 | assert(mi_prev); |
815 | 0 | const int pv_row = is_vert ? (mi_row) : (mi_row - (1 << scale_vert)); |
816 | 0 | const int pv_col = is_vert ? (mi_col - (1 << scale_horz)) : (mi_col); |
817 | 0 | const TX_SIZE pv_ts = |
818 | 0 | is_first_block ? get_transform_size(xd, mi_prev, pv_row, pv_col, plane, |
819 | 0 | scale_horz, scale_vert) |
820 | 0 | : prev_tx_size; |
821 | 0 | if (is_first_block) { |
822 | 0 | *min_dim = is_vert ? tx_size_high[pv_ts] : tx_size_wide[pv_ts]; |
823 | 0 | } |
824 | |
|
825 | 0 | uint8_t level = |
826 | 0 | av1_get_filter_level(cm, &cm->lf_info, edge_dir, plane, mbmi); |
827 | 0 | if (!level) { |
828 | 0 | level = av1_get_filter_level(cm, &cm->lf_info, edge_dir, plane, mi_prev); |
829 | 0 | } |
830 | 0 | #ifndef NDEBUG |
831 | 0 | if (joint_filter_chroma) { |
832 | 0 | uint8_t v_level = |
833 | 0 | av1_get_filter_level(cm, &cm->lf_info, edge_dir, AOM_PLANE_V, mbmi); |
834 | 0 | if (!v_level) { |
835 | 0 | v_level = av1_get_filter_level(cm, &cm->lf_info, edge_dir, AOM_PLANE_V, |
836 | 0 | mi_prev); |
837 | 0 | } |
838 | 0 | assert(level == v_level); |
839 | 0 | } |
840 | | #else |
841 | | (void)joint_filter_chroma; |
842 | | #endif // NDEBUG |
843 | 0 | const int32_t pu_edge = mi_prev != mbmi; |
844 | |
|
845 | 0 | if (!pu_edge) { |
846 | 0 | curr_skipped = mbmi->skip_txfm && is_inter_block(mbmi); |
847 | 0 | } |
848 | | // For realtime mode, u and v have the same level |
849 | 0 | if ((!curr_skipped || pu_edge) && level) { |
850 | 0 | params->filter_length = is_vert ? vert_filter_length_chroma[ts][pv_ts] |
851 | 0 | : horz_filter_length_chroma[ts][pv_ts]; |
852 | |
|
853 | 0 | const loop_filter_thresh *const limits = cm->lf_info.lfthr; |
854 | 0 | params->lfthr = limits + level; |
855 | 0 | } |
856 | 0 | } |
857 | 0 | const int tx_dim = is_vert ? tx_size_high[ts] : tx_size_wide[ts]; |
858 | 0 | *min_dim = AOMMIN(*min_dim, tx_dim); |
859 | 0 | } |
860 | | |
861 | | static AOM_FORCE_INLINE void set_lpf_parameters_for_line_chroma( |
862 | | AV1_DEBLOCKING_PARAMETERS *const params_buf, TX_SIZE *tx_buf, |
863 | | const AV1_COMMON *const cm, const MACROBLOCKD *const xd, |
864 | | const EDGE_DIR edge_dir, uint32_t mi_col, uint32_t mi_row, |
865 | | const struct macroblockd_plane *const plane_ptr, const uint32_t mi_range, |
866 | | const ptrdiff_t mode_step, const int scale_horz, const int scale_vert, |
867 | 0 | int *min_dim, int plane, int joint_filter_chroma) { |
868 | 0 | const int is_vert = edge_dir == VERT_EDGE; |
869 | |
|
870 | 0 | AV1_DEBLOCKING_PARAMETERS *params = params_buf; |
871 | 0 | TX_SIZE *tx_size = tx_buf; |
872 | 0 | uint32_t *counter_ptr = is_vert ? &mi_col : &mi_row; |
873 | 0 | const uint32_t scale = is_vert ? scale_horz : scale_vert; |
874 | 0 | TX_SIZE prev_tx_size = TX_INVALID; |
875 | | |
876 | | // Unroll the first iteration of the loop |
877 | 0 | set_one_param_for_line_chroma(params, tx_size, cm, xd, edge_dir, mi_col, |
878 | 0 | mi_row, *counter_ptr, true, prev_tx_size, |
879 | 0 | plane_ptr, mode_step, scale_horz, scale_vert, |
880 | 0 | min_dim, plane, joint_filter_chroma); |
881 | | |
882 | | // Advance |
883 | 0 | int advance_units = |
884 | 0 | is_vert ? tx_size_wide_unit[*tx_size] : tx_size_high_unit[*tx_size]; |
885 | 0 | prev_tx_size = *tx_size; |
886 | 0 | *counter_ptr += advance_units << scale; |
887 | 0 | params += advance_units; |
888 | 0 | tx_size += advance_units; |
889 | |
|
890 | 0 | while (*counter_ptr < mi_range) { |
891 | 0 | set_one_param_for_line_chroma(params, tx_size, cm, xd, edge_dir, mi_col, |
892 | 0 | mi_row, *counter_ptr, false, prev_tx_size, |
893 | 0 | plane_ptr, mode_step, scale_horz, scale_vert, |
894 | 0 | min_dim, plane, joint_filter_chroma); |
895 | | |
896 | | // Advance |
897 | 0 | advance_units = |
898 | 0 | is_vert ? tx_size_wide_unit[*tx_size] : tx_size_high_unit[*tx_size]; |
899 | 0 | prev_tx_size = *tx_size; |
900 | 0 | *counter_ptr += advance_units << scale; |
901 | 0 | params += advance_units; |
902 | 0 | tx_size += advance_units; |
903 | 0 | } |
904 | 0 | } |
905 | | |
906 | | static AOM_INLINE void filter_vert(uint8_t *dst, int dst_stride, |
907 | | const AV1_DEBLOCKING_PARAMETERS *params, |
908 | | const SequenceHeader *seq_params, |
909 | 66.3M | USE_FILTER_TYPE use_filter_type) { |
910 | 66.3M | const loop_filter_thresh *limits = params->lfthr; |
911 | 66.3M | #if CONFIG_AV1_HIGHBITDEPTH |
912 | 66.3M | const int use_highbitdepth = seq_params->use_highbitdepth; |
913 | 66.3M | const aom_bit_depth_t bit_depth = seq_params->bit_depth; |
914 | 66.3M | if (use_highbitdepth) { |
915 | 47.8M | uint16_t *dst_shortptr = CONVERT_TO_SHORTPTR(dst); |
916 | 47.8M | if (use_filter_type == USE_QUAD) { |
917 | 0 | switch (params->filter_length) { |
918 | | // apply 4-tap filtering |
919 | 0 | case 4: |
920 | 0 | aom_highbd_lpf_vertical_4_dual( |
921 | 0 | dst_shortptr, dst_stride, limits->mblim, limits->lim, |
922 | 0 | limits->hev_thr, limits->mblim, limits->lim, limits->hev_thr, |
923 | 0 | bit_depth); |
924 | 0 | aom_highbd_lpf_vertical_4_dual( |
925 | 0 | dst_shortptr + (2 * MI_SIZE * dst_stride), dst_stride, |
926 | 0 | limits->mblim, limits->lim, limits->hev_thr, limits->mblim, |
927 | 0 | limits->lim, limits->hev_thr, bit_depth); |
928 | 0 | break; |
929 | 0 | case 6: // apply 6-tap filter for chroma plane only |
930 | 0 | aom_highbd_lpf_vertical_6_dual( |
931 | 0 | dst_shortptr, dst_stride, limits->mblim, limits->lim, |
932 | 0 | limits->hev_thr, limits->mblim, limits->lim, limits->hev_thr, |
933 | 0 | bit_depth); |
934 | 0 | aom_highbd_lpf_vertical_6_dual( |
935 | 0 | dst_shortptr + (2 * MI_SIZE * dst_stride), dst_stride, |
936 | 0 | limits->mblim, limits->lim, limits->hev_thr, limits->mblim, |
937 | 0 | limits->lim, limits->hev_thr, bit_depth); |
938 | 0 | break; |
939 | | // apply 8-tap filtering |
940 | 0 | case 8: |
941 | 0 | aom_highbd_lpf_vertical_8_dual( |
942 | 0 | dst_shortptr, dst_stride, limits->mblim, limits->lim, |
943 | 0 | limits->hev_thr, limits->mblim, limits->lim, limits->hev_thr, |
944 | 0 | bit_depth); |
945 | 0 | aom_highbd_lpf_vertical_8_dual( |
946 | 0 | dst_shortptr + (2 * MI_SIZE * dst_stride), dst_stride, |
947 | 0 | limits->mblim, limits->lim, limits->hev_thr, limits->mblim, |
948 | 0 | limits->lim, limits->hev_thr, bit_depth); |
949 | 0 | break; |
950 | | // apply 14-tap filtering |
951 | 0 | case 14: |
952 | 0 | aom_highbd_lpf_vertical_14_dual( |
953 | 0 | dst_shortptr, dst_stride, limits->mblim, limits->lim, |
954 | 0 | limits->hev_thr, limits->mblim, limits->lim, limits->hev_thr, |
955 | 0 | bit_depth); |
956 | 0 | aom_highbd_lpf_vertical_14_dual( |
957 | 0 | dst_shortptr + (2 * MI_SIZE * dst_stride), dst_stride, |
958 | 0 | limits->mblim, limits->lim, limits->hev_thr, limits->mblim, |
959 | 0 | limits->lim, limits->hev_thr, bit_depth); |
960 | 0 | break; |
961 | | // no filtering |
962 | 0 | default: break; |
963 | 0 | } |
964 | 47.8M | } else if (use_filter_type == USE_DUAL) { |
965 | 0 | switch (params->filter_length) { |
966 | | // apply 4-tap filtering |
967 | 0 | case 4: |
968 | 0 | aom_highbd_lpf_vertical_4_dual( |
969 | 0 | dst_shortptr, dst_stride, limits->mblim, limits->lim, |
970 | 0 | limits->hev_thr, limits->mblim, limits->lim, limits->hev_thr, |
971 | 0 | bit_depth); |
972 | 0 | break; |
973 | 0 | case 6: // apply 6-tap filter for chroma plane only |
974 | 0 | aom_highbd_lpf_vertical_6_dual( |
975 | 0 | dst_shortptr, dst_stride, limits->mblim, limits->lim, |
976 | 0 | limits->hev_thr, limits->mblim, limits->lim, limits->hev_thr, |
977 | 0 | bit_depth); |
978 | 0 | break; |
979 | | // apply 8-tap filtering |
980 | 0 | case 8: |
981 | 0 | aom_highbd_lpf_vertical_8_dual( |
982 | 0 | dst_shortptr, dst_stride, limits->mblim, limits->lim, |
983 | 0 | limits->hev_thr, limits->mblim, limits->lim, limits->hev_thr, |
984 | 0 | bit_depth); |
985 | 0 | break; |
986 | | // apply 14-tap filtering |
987 | 0 | case 14: |
988 | 0 | aom_highbd_lpf_vertical_14_dual( |
989 | 0 | dst_shortptr, dst_stride, limits->mblim, limits->lim, |
990 | 0 | limits->hev_thr, limits->mblim, limits->lim, limits->hev_thr, |
991 | 0 | bit_depth); |
992 | 0 | break; |
993 | | // no filtering |
994 | 0 | default: break; |
995 | 0 | } |
996 | 47.8M | } else { |
997 | 47.8M | assert(use_filter_type == USE_SINGLE); |
998 | 47.9M | switch (params->filter_length) { |
999 | | // apply 4-tap filtering |
1000 | 28.0M | case 4: |
1001 | 28.0M | aom_highbd_lpf_vertical_4(dst_shortptr, dst_stride, limits->mblim, |
1002 | 28.0M | limits->lim, limits->hev_thr, bit_depth); |
1003 | 28.0M | break; |
1004 | 5.58M | case 6: // apply 6-tap filter for chroma plane only |
1005 | 5.58M | aom_highbd_lpf_vertical_6(dst_shortptr, dst_stride, limits->mblim, |
1006 | 5.58M | limits->lim, limits->hev_thr, bit_depth); |
1007 | 5.58M | break; |
1008 | | // apply 8-tap filtering |
1009 | 340k | case 8: |
1010 | 340k | aom_highbd_lpf_vertical_8(dst_shortptr, dst_stride, limits->mblim, |
1011 | 340k | limits->lim, limits->hev_thr, bit_depth); |
1012 | 340k | break; |
1013 | | // apply 14-tap filtering |
1014 | 6.19M | case 14: |
1015 | 6.19M | aom_highbd_lpf_vertical_14(dst_shortptr, dst_stride, limits->mblim, |
1016 | 6.19M | limits->lim, limits->hev_thr, bit_depth); |
1017 | 6.19M | break; |
1018 | | // no filtering |
1019 | 12.8M | default: break; |
1020 | 47.9M | } |
1021 | 47.9M | } |
1022 | 39.7M | return; |
1023 | 47.8M | } |
1024 | 18.4M | #endif // CONFIG_AV1_HIGHBITDEPTH |
1025 | 18.4M | if (use_filter_type == USE_QUAD) { |
1026 | | // Only one set of loop filter parameters (mblim, lim and hev_thr) is |
1027 | | // passed as argument to quad loop filter because quad loop filter is |
1028 | | // called for those cases where all the 4 set of loop filter parameters |
1029 | | // are equal. |
1030 | 0 | switch (params->filter_length) { |
1031 | | // apply 4-tap filtering |
1032 | 0 | case 4: |
1033 | 0 | aom_lpf_vertical_4_quad(dst, dst_stride, limits->mblim, limits->lim, |
1034 | 0 | limits->hev_thr); |
1035 | 0 | break; |
1036 | 0 | case 6: // apply 6-tap filter for chroma plane only |
1037 | 0 | aom_lpf_vertical_6_quad(dst, dst_stride, limits->mblim, limits->lim, |
1038 | 0 | limits->hev_thr); |
1039 | 0 | break; |
1040 | | // apply 8-tap filtering |
1041 | 0 | case 8: |
1042 | 0 | aom_lpf_vertical_8_quad(dst, dst_stride, limits->mblim, limits->lim, |
1043 | 0 | limits->hev_thr); |
1044 | 0 | break; |
1045 | | // apply 14-tap filtering |
1046 | 0 | case 14: |
1047 | 0 | aom_lpf_vertical_14_quad(dst, dst_stride, limits->mblim, limits->lim, |
1048 | 0 | limits->hev_thr); |
1049 | 0 | break; |
1050 | | // no filtering |
1051 | 0 | default: break; |
1052 | 0 | } |
1053 | 18.4M | } else if (use_filter_type == USE_DUAL) { |
1054 | 0 | switch (params->filter_length) { |
1055 | | // apply 4-tap filtering |
1056 | 0 | case 4: |
1057 | 0 | aom_lpf_vertical_4_dual(dst, dst_stride, limits->mblim, limits->lim, |
1058 | 0 | limits->hev_thr, limits->mblim, limits->lim, |
1059 | 0 | limits->hev_thr); |
1060 | 0 | break; |
1061 | 0 | case 6: // apply 6-tap filter for chroma plane only |
1062 | 0 | aom_lpf_vertical_6_dual(dst, dst_stride, limits->mblim, limits->lim, |
1063 | 0 | limits->hev_thr, limits->mblim, limits->lim, |
1064 | 0 | limits->hev_thr); |
1065 | 0 | break; |
1066 | | // apply 8-tap filtering |
1067 | 0 | case 8: |
1068 | 0 | aom_lpf_vertical_8_dual(dst, dst_stride, limits->mblim, limits->lim, |
1069 | 0 | limits->hev_thr, limits->mblim, limits->lim, |
1070 | 0 | limits->hev_thr); |
1071 | 0 | break; |
1072 | | // apply 14-tap filtering |
1073 | 0 | case 14: |
1074 | 0 | aom_lpf_vertical_14_dual(dst, dst_stride, limits->mblim, limits->lim, |
1075 | 0 | limits->hev_thr, limits->mblim, limits->lim, |
1076 | 0 | limits->hev_thr); |
1077 | 0 | break; |
1078 | | // no filtering |
1079 | 0 | default: break; |
1080 | 0 | } |
1081 | 18.4M | } else { |
1082 | 18.4M | assert(use_filter_type == USE_SINGLE); |
1083 | 18.6M | switch (params->filter_length) { |
1084 | | // apply 4-tap filtering |
1085 | 2.30M | case 4: |
1086 | 2.30M | aom_lpf_vertical_4(dst, dst_stride, limits->mblim, limits->lim, |
1087 | 2.30M | limits->hev_thr); |
1088 | 2.30M | break; |
1089 | 8.48M | case 6: // apply 6-tap filter for chroma plane only |
1090 | 8.48M | aom_lpf_vertical_6(dst, dst_stride, limits->mblim, limits->lim, |
1091 | 8.48M | limits->hev_thr); |
1092 | 8.48M | break; |
1093 | | // apply 8-tap filtering |
1094 | 2.10M | case 8: |
1095 | 2.10M | aom_lpf_vertical_8(dst, dst_stride, limits->mblim, limits->lim, |
1096 | 2.10M | limits->hev_thr); |
1097 | 2.10M | break; |
1098 | | // apply 14-tap filtering |
1099 | 6.51M | case 14: |
1100 | 6.51M | aom_lpf_vertical_14(dst, dst_stride, limits->mblim, limits->lim, |
1101 | 6.51M | limits->hev_thr); |
1102 | 6.51M | break; |
1103 | | // no filtering |
1104 | 2.08M | default: break; |
1105 | 18.6M | } |
1106 | 18.6M | } |
1107 | | #if !CONFIG_AV1_HIGHBITDEPTH |
1108 | | (void)seq_params; |
1109 | | #endif // !CONFIG_AV1_HIGHBITDEPTH |
1110 | 18.4M | } |
1111 | | |
1112 | | static AOM_INLINE void filter_vert_chroma( |
1113 | | uint8_t *u_dst, uint8_t *v_dst, int dst_stride, |
1114 | | const AV1_DEBLOCKING_PARAMETERS *params, const SequenceHeader *seq_params, |
1115 | 0 | USE_FILTER_TYPE use_filter_type) { |
1116 | 0 | const loop_filter_thresh *u_limits = params->lfthr; |
1117 | 0 | const loop_filter_thresh *v_limits = params->lfthr; |
1118 | 0 | #if CONFIG_AV1_HIGHBITDEPTH |
1119 | 0 | const int use_highbitdepth = seq_params->use_highbitdepth; |
1120 | 0 | const aom_bit_depth_t bit_depth = seq_params->bit_depth; |
1121 | 0 | if (use_highbitdepth) { |
1122 | 0 | uint16_t *u_dst_shortptr = CONVERT_TO_SHORTPTR(u_dst); |
1123 | 0 | uint16_t *v_dst_shortptr = CONVERT_TO_SHORTPTR(v_dst); |
1124 | 0 | if (use_filter_type == USE_QUAD) { |
1125 | 0 | switch (params->filter_length) { |
1126 | | // apply 4-tap filtering |
1127 | 0 | case 4: |
1128 | 0 | aom_highbd_lpf_vertical_4_dual( |
1129 | 0 | u_dst_shortptr, dst_stride, u_limits->mblim, u_limits->lim, |
1130 | 0 | u_limits->hev_thr, u_limits->mblim, u_limits->lim, |
1131 | 0 | u_limits->hev_thr, bit_depth); |
1132 | 0 | aom_highbd_lpf_vertical_4_dual( |
1133 | 0 | u_dst_shortptr + (2 * MI_SIZE * dst_stride), dst_stride, |
1134 | 0 | u_limits->mblim, u_limits->lim, u_limits->hev_thr, |
1135 | 0 | u_limits->mblim, u_limits->lim, u_limits->hev_thr, bit_depth); |
1136 | 0 | aom_highbd_lpf_vertical_4_dual( |
1137 | 0 | v_dst_shortptr, dst_stride, v_limits->mblim, v_limits->lim, |
1138 | 0 | v_limits->hev_thr, v_limits->mblim, v_limits->lim, |
1139 | 0 | v_limits->hev_thr, bit_depth); |
1140 | 0 | aom_highbd_lpf_vertical_4_dual( |
1141 | 0 | v_dst_shortptr + (2 * MI_SIZE * dst_stride), dst_stride, |
1142 | 0 | v_limits->mblim, v_limits->lim, v_limits->hev_thr, |
1143 | 0 | v_limits->mblim, v_limits->lim, v_limits->hev_thr, bit_depth); |
1144 | 0 | break; |
1145 | 0 | case 6: // apply 6-tap filter for chroma plane only |
1146 | 0 | aom_highbd_lpf_vertical_6_dual( |
1147 | 0 | u_dst_shortptr, dst_stride, u_limits->mblim, u_limits->lim, |
1148 | 0 | u_limits->hev_thr, u_limits->mblim, u_limits->lim, |
1149 | 0 | u_limits->hev_thr, bit_depth); |
1150 | 0 | aom_highbd_lpf_vertical_6_dual( |
1151 | 0 | u_dst_shortptr + (2 * MI_SIZE * dst_stride), dst_stride, |
1152 | 0 | u_limits->mblim, u_limits->lim, u_limits->hev_thr, |
1153 | 0 | u_limits->mblim, u_limits->lim, u_limits->hev_thr, bit_depth); |
1154 | 0 | aom_highbd_lpf_vertical_6_dual( |
1155 | 0 | v_dst_shortptr, dst_stride, v_limits->mblim, v_limits->lim, |
1156 | 0 | v_limits->hev_thr, v_limits->mblim, v_limits->lim, |
1157 | 0 | v_limits->hev_thr, bit_depth); |
1158 | 0 | aom_highbd_lpf_vertical_6_dual( |
1159 | 0 | v_dst_shortptr + (2 * MI_SIZE * dst_stride), dst_stride, |
1160 | 0 | v_limits->mblim, v_limits->lim, v_limits->hev_thr, |
1161 | 0 | v_limits->mblim, v_limits->lim, v_limits->hev_thr, bit_depth); |
1162 | 0 | break; |
1163 | 0 | case 8: |
1164 | 0 | case 14: assert(0); |
1165 | | // no filtering |
1166 | 0 | default: break; |
1167 | 0 | } |
1168 | 0 | } else if (use_filter_type == USE_DUAL) { |
1169 | 0 | switch (params->filter_length) { |
1170 | | // apply 4-tap filtering |
1171 | 0 | case 4: |
1172 | 0 | aom_highbd_lpf_vertical_4_dual( |
1173 | 0 | u_dst_shortptr, dst_stride, u_limits->mblim, u_limits->lim, |
1174 | 0 | u_limits->hev_thr, u_limits->mblim, u_limits->lim, |
1175 | 0 | u_limits->hev_thr, bit_depth); |
1176 | 0 | aom_highbd_lpf_vertical_4_dual( |
1177 | 0 | v_dst_shortptr, dst_stride, v_limits->mblim, v_limits->lim, |
1178 | 0 | v_limits->hev_thr, v_limits->mblim, v_limits->lim, |
1179 | 0 | v_limits->hev_thr, bit_depth); |
1180 | 0 | break; |
1181 | 0 | case 6: // apply 6-tap filter for chroma plane only |
1182 | 0 | aom_highbd_lpf_vertical_6_dual( |
1183 | 0 | u_dst_shortptr, dst_stride, u_limits->mblim, u_limits->lim, |
1184 | 0 | u_limits->hev_thr, u_limits->mblim, u_limits->lim, |
1185 | 0 | u_limits->hev_thr, bit_depth); |
1186 | 0 | aom_highbd_lpf_vertical_6_dual( |
1187 | 0 | v_dst_shortptr, dst_stride, v_limits->mblim, v_limits->lim, |
1188 | 0 | v_limits->hev_thr, v_limits->mblim, v_limits->lim, |
1189 | 0 | v_limits->hev_thr, bit_depth); |
1190 | 0 | break; |
1191 | 0 | case 8: |
1192 | 0 | case 14: assert(0); |
1193 | | // no filtering |
1194 | 0 | default: break; |
1195 | 0 | } |
1196 | 0 | } else { |
1197 | 0 | assert(use_filter_type == USE_SINGLE); |
1198 | 0 | switch (params->filter_length) { |
1199 | | // apply 4-tap filtering |
1200 | 0 | case 4: |
1201 | 0 | aom_highbd_lpf_vertical_4(u_dst_shortptr, dst_stride, u_limits->mblim, |
1202 | 0 | u_limits->lim, u_limits->hev_thr, |
1203 | 0 | bit_depth); |
1204 | 0 | aom_highbd_lpf_vertical_4(v_dst_shortptr, dst_stride, v_limits->mblim, |
1205 | 0 | v_limits->lim, v_limits->hev_thr, |
1206 | 0 | bit_depth); |
1207 | 0 | break; |
1208 | 0 | case 6: // apply 6-tap filter for chroma plane only |
1209 | 0 | aom_highbd_lpf_vertical_6(u_dst_shortptr, dst_stride, u_limits->mblim, |
1210 | 0 | u_limits->lim, u_limits->hev_thr, |
1211 | 0 | bit_depth); |
1212 | 0 | aom_highbd_lpf_vertical_6(v_dst_shortptr, dst_stride, v_limits->mblim, |
1213 | 0 | v_limits->lim, v_limits->hev_thr, |
1214 | 0 | bit_depth); |
1215 | 0 | break; |
1216 | 0 | case 8: |
1217 | 0 | case 14: assert(0); break; |
1218 | | // no filtering |
1219 | 0 | default: break; |
1220 | 0 | } |
1221 | 0 | } |
1222 | 0 | return; |
1223 | 0 | } |
1224 | 0 | #endif // CONFIG_AV1_HIGHBITDEPTH |
1225 | 0 | if (use_filter_type == USE_QUAD) { |
1226 | | // Only one set of loop filter parameters (mblim, lim and hev_thr) is |
1227 | | // passed as argument to quad loop filter because quad loop filter is |
1228 | | // called for those cases where all the 4 set of loop filter parameters |
1229 | | // are equal. |
1230 | 0 | switch (params->filter_length) { |
1231 | | // apply 4-tap filtering |
1232 | 0 | case 4: |
1233 | 0 | aom_lpf_vertical_4_quad(u_dst, dst_stride, u_limits->mblim, |
1234 | 0 | u_limits->lim, u_limits->hev_thr); |
1235 | 0 | aom_lpf_vertical_4_quad(v_dst, dst_stride, v_limits->mblim, |
1236 | 0 | v_limits->lim, v_limits->hev_thr); |
1237 | 0 | break; |
1238 | 0 | case 6: // apply 6-tap filter for chroma plane only |
1239 | 0 | aom_lpf_vertical_6_quad(u_dst, dst_stride, u_limits->mblim, |
1240 | 0 | u_limits->lim, u_limits->hev_thr); |
1241 | 0 | aom_lpf_vertical_6_quad(v_dst, dst_stride, v_limits->mblim, |
1242 | 0 | v_limits->lim, v_limits->hev_thr); |
1243 | 0 | break; |
1244 | 0 | case 8: |
1245 | 0 | case 14: assert(0); |
1246 | | // no filtering |
1247 | 0 | default: break; |
1248 | 0 | } |
1249 | 0 | } else if (use_filter_type == USE_DUAL) { |
1250 | 0 | switch (params->filter_length) { |
1251 | | // apply 4-tap filtering |
1252 | 0 | case 4: |
1253 | 0 | aom_lpf_vertical_4_dual(u_dst, dst_stride, u_limits->mblim, |
1254 | 0 | u_limits->lim, u_limits->hev_thr, |
1255 | 0 | u_limits->mblim, u_limits->lim, |
1256 | 0 | u_limits->hev_thr); |
1257 | 0 | aom_lpf_vertical_4_dual(v_dst, dst_stride, v_limits->mblim, |
1258 | 0 | v_limits->lim, v_limits->hev_thr, |
1259 | 0 | v_limits->mblim, v_limits->lim, |
1260 | 0 | v_limits->hev_thr); |
1261 | 0 | break; |
1262 | 0 | case 6: // apply 6-tap filter for chroma plane only |
1263 | 0 | aom_lpf_vertical_6_dual(u_dst, dst_stride, u_limits->mblim, |
1264 | 0 | u_limits->lim, u_limits->hev_thr, |
1265 | 0 | u_limits->mblim, u_limits->lim, |
1266 | 0 | u_limits->hev_thr); |
1267 | 0 | aom_lpf_vertical_6_dual(v_dst, dst_stride, v_limits->mblim, |
1268 | 0 | v_limits->lim, v_limits->hev_thr, |
1269 | 0 | v_limits->mblim, v_limits->lim, |
1270 | 0 | v_limits->hev_thr); |
1271 | 0 | break; |
1272 | 0 | case 8: |
1273 | 0 | case 14: assert(0); |
1274 | | // no filtering |
1275 | 0 | default: break; |
1276 | 0 | } |
1277 | 0 | } else { |
1278 | 0 | assert(use_filter_type == USE_SINGLE); |
1279 | 0 | switch (params->filter_length) { |
1280 | | // apply 4-tap filtering |
1281 | 0 | case 4: |
1282 | 0 | aom_lpf_vertical_4(u_dst, dst_stride, u_limits->mblim, u_limits->lim, |
1283 | 0 | u_limits->hev_thr); |
1284 | 0 | aom_lpf_vertical_4(v_dst, dst_stride, v_limits->mblim, v_limits->lim, |
1285 | 0 | u_limits->hev_thr); |
1286 | 0 | break; |
1287 | 0 | case 6: // apply 6-tap filter for chroma plane only |
1288 | 0 | aom_lpf_vertical_6(u_dst, dst_stride, u_limits->mblim, u_limits->lim, |
1289 | 0 | u_limits->hev_thr); |
1290 | 0 | aom_lpf_vertical_6(v_dst, dst_stride, v_limits->mblim, v_limits->lim, |
1291 | 0 | v_limits->hev_thr); |
1292 | 0 | break; |
1293 | 0 | case 8: |
1294 | 0 | case 14: assert(0); break; |
1295 | | // no filtering |
1296 | 0 | default: break; |
1297 | 0 | } |
1298 | 0 | } |
1299 | | #if !CONFIG_AV1_HIGHBITDEPTH |
1300 | | (void)seq_params; |
1301 | | #endif // !CONFIG_AV1_HIGHBITDEPTH |
1302 | 0 | } |
1303 | | |
1304 | | void av1_filter_block_plane_vert(const AV1_COMMON *const cm, |
1305 | | const MACROBLOCKD *const xd, const int plane, |
1306 | | const MACROBLOCKD_PLANE *const plane_ptr, |
1307 | 689k | const uint32_t mi_row, const uint32_t mi_col) { |
1308 | 689k | const uint32_t scale_horz = plane_ptr->subsampling_x; |
1309 | 689k | const uint32_t scale_vert = plane_ptr->subsampling_y; |
1310 | 689k | uint8_t *const dst_ptr = plane_ptr->dst.buf; |
1311 | 689k | const int dst_stride = plane_ptr->dst.stride; |
1312 | 689k | const int plane_mi_rows = |
1313 | 689k | ROUND_POWER_OF_TWO(cm->mi_params.mi_rows, scale_vert); |
1314 | 689k | const int plane_mi_cols = |
1315 | 689k | ROUND_POWER_OF_TWO(cm->mi_params.mi_cols, scale_horz); |
1316 | 689k | const int y_range = AOMMIN((int)(plane_mi_rows - (mi_row >> scale_vert)), |
1317 | 689k | (MAX_MIB_SIZE >> scale_vert)); |
1318 | 689k | const int x_range = AOMMIN((int)(plane_mi_cols - (mi_col >> scale_horz)), |
1319 | 689k | (MAX_MIB_SIZE >> scale_horz)); |
1320 | | |
1321 | 11.6M | for (int y = 0; y < y_range; y++) { |
1322 | 10.9M | uint8_t *p = dst_ptr + y * MI_SIZE * dst_stride; |
1323 | 65.3M | for (int x = 0; x < x_range;) { |
1324 | | // inner loop always filter vertical edges in a MI block. If MI size |
1325 | | // is 8x8, it will filter the vertical edge aligned with a 8x8 block. |
1326 | | // If 4x4 transform is used, it will then filter the internal edge |
1327 | | // aligned with a 4x4 block |
1328 | 54.4M | const uint32_t curr_x = ((mi_col * MI_SIZE) >> scale_horz) + x * MI_SIZE; |
1329 | 54.4M | const uint32_t curr_y = ((mi_row * MI_SIZE) >> scale_vert) + y * MI_SIZE; |
1330 | 54.4M | uint32_t advance_units; |
1331 | 54.4M | TX_SIZE tx_size; |
1332 | 54.4M | AV1_DEBLOCKING_PARAMETERS params; |
1333 | 54.4M | memset(¶ms, 0, sizeof(params)); |
1334 | | |
1335 | 54.4M | tx_size = |
1336 | 54.4M | set_lpf_parameters(¶ms, ((ptrdiff_t)1 << scale_horz), cm, xd, |
1337 | 54.4M | VERT_EDGE, curr_x, curr_y, plane, plane_ptr); |
1338 | 54.4M | if (tx_size == TX_INVALID) { |
1339 | 0 | params.filter_length = 0; |
1340 | 0 | tx_size = TX_4X4; |
1341 | 0 | } |
1342 | | |
1343 | 54.4M | filter_vert(p, dst_stride, ¶ms, cm->seq_params, USE_SINGLE); |
1344 | | |
1345 | | // advance the destination pointer |
1346 | 54.4M | advance_units = tx_size_wide_unit[tx_size]; |
1347 | 54.4M | x += advance_units; |
1348 | 54.4M | p += advance_units * MI_SIZE; |
1349 | 54.4M | } |
1350 | 10.9M | } |
1351 | 689k | } |
1352 | | |
1353 | | void av1_filter_block_plane_vert_opt( |
1354 | | const AV1_COMMON *const cm, const MACROBLOCKD *const xd, |
1355 | | const MACROBLOCKD_PLANE *const plane_ptr, const uint32_t mi_row, |
1356 | | const uint32_t mi_col, AV1_DEBLOCKING_PARAMETERS *params_buf, |
1357 | 0 | TX_SIZE *tx_buf, int num_mis_in_lpf_unit_height_log2) { |
1358 | 0 | uint8_t *const dst_ptr = plane_ptr->dst.buf; |
1359 | 0 | const int dst_stride = plane_ptr->dst.stride; |
1360 | | // Ensure that mi_cols/mi_rows are calculated based on frame dimension aligned |
1361 | | // to MI_SIZE. |
1362 | 0 | const int plane_mi_cols = |
1363 | 0 | CEIL_POWER_OF_TWO(plane_ptr->dst.width, MI_SIZE_LOG2); |
1364 | 0 | const int plane_mi_rows = |
1365 | 0 | CEIL_POWER_OF_TWO(plane_ptr->dst.height, MI_SIZE_LOG2); |
1366 | | // Whenever 'pipeline_lpf_mt_with_enc' is enabled, height of the unit to |
1367 | | // filter (i.e., y_range) is calculated based on the size of the superblock |
1368 | | // used. |
1369 | 0 | const int y_range = AOMMIN((int)(plane_mi_rows - mi_row), |
1370 | 0 | (1 << num_mis_in_lpf_unit_height_log2)); |
1371 | | // Width of the unit to filter (i.e., x_range) should always be calculated |
1372 | | // based on maximum superblock size as this function is called for mi_col = 0, |
1373 | | // MAX_MIB_SIZE, 2 * MAX_MIB_SIZE etc. |
1374 | 0 | const int x_range = AOMMIN((int)(plane_mi_cols - mi_col), MAX_MIB_SIZE); |
1375 | 0 | const ptrdiff_t mode_step = 1; |
1376 | 0 | for (int y = 0; y < y_range; y++) { |
1377 | 0 | const uint32_t curr_y = mi_row + y; |
1378 | 0 | const uint32_t x_start = mi_col; |
1379 | 0 | const uint32_t x_end = mi_col + x_range; |
1380 | 0 | int min_block_height = block_size_high[BLOCK_128X128]; |
1381 | 0 | set_lpf_parameters_for_line_luma(params_buf, tx_buf, cm, xd, VERT_EDGE, |
1382 | 0 | x_start, curr_y, plane_ptr, x_end, |
1383 | 0 | mode_step, &min_block_height); |
1384 | |
|
1385 | 0 | AV1_DEBLOCKING_PARAMETERS *params = params_buf; |
1386 | 0 | TX_SIZE *tx_size = tx_buf; |
1387 | 0 | USE_FILTER_TYPE use_filter_type = USE_SINGLE; |
1388 | |
|
1389 | 0 | uint8_t *p = dst_ptr + y * MI_SIZE * dst_stride; |
1390 | |
|
1391 | 0 | if ((y & 3) == 0 && (y + 3) < y_range && min_block_height >= 16) { |
1392 | | // If we are on a row which is a multiple of 4, and the minimum height is |
1393 | | // 16 pixels, then the current and right 3 cols must contain the same |
1394 | | // prediction block. This is because dim 16 can only happen every unit of |
1395 | | // 4 mi's. |
1396 | 0 | use_filter_type = USE_QUAD; |
1397 | 0 | y += 3; |
1398 | 0 | } else if ((y + 1) < y_range && min_block_height >= 8) { |
1399 | 0 | use_filter_type = USE_DUAL; |
1400 | 0 | y += 1; |
1401 | 0 | } |
1402 | |
|
1403 | 0 | for (int x = 0; x < x_range;) { |
1404 | 0 | if (*tx_size == TX_INVALID) { |
1405 | 0 | params->filter_length = 0; |
1406 | 0 | *tx_size = TX_4X4; |
1407 | 0 | } |
1408 | |
|
1409 | 0 | filter_vert(p, dst_stride, params, cm->seq_params, use_filter_type); |
1410 | | |
1411 | | // advance the destination pointer |
1412 | 0 | const uint32_t advance_units = tx_size_wide_unit[*tx_size]; |
1413 | 0 | x += advance_units; |
1414 | 0 | p += advance_units * MI_SIZE; |
1415 | 0 | params += advance_units; |
1416 | 0 | tx_size += advance_units; |
1417 | 0 | } |
1418 | 0 | } |
1419 | 0 | } |
1420 | | |
1421 | | void av1_filter_block_plane_vert_opt_chroma( |
1422 | | const AV1_COMMON *const cm, const MACROBLOCKD *const xd, |
1423 | | const MACROBLOCKD_PLANE *const plane_ptr, const uint32_t mi_row, |
1424 | | const uint32_t mi_col, AV1_DEBLOCKING_PARAMETERS *params_buf, |
1425 | | TX_SIZE *tx_buf, int plane, bool joint_filter_chroma, |
1426 | 0 | int num_mis_in_lpf_unit_height_log2) { |
1427 | 0 | const uint32_t scale_horz = plane_ptr->subsampling_x; |
1428 | 0 | const uint32_t scale_vert = plane_ptr->subsampling_y; |
1429 | 0 | const int dst_stride = plane_ptr->dst.stride; |
1430 | | // Ensure that mi_cols/mi_rows are calculated based on frame dimension aligned |
1431 | | // to MI_SIZE. |
1432 | 0 | const int mi_cols = |
1433 | 0 | ((plane_ptr->dst.width << scale_horz) + MI_SIZE - 1) >> MI_SIZE_LOG2; |
1434 | 0 | const int mi_rows = |
1435 | 0 | ((plane_ptr->dst.height << scale_vert) + MI_SIZE - 1) >> MI_SIZE_LOG2; |
1436 | 0 | const int plane_mi_rows = ROUND_POWER_OF_TWO(mi_rows, scale_vert); |
1437 | 0 | const int plane_mi_cols = ROUND_POWER_OF_TWO(mi_cols, scale_horz); |
1438 | 0 | const int y_range = |
1439 | 0 | AOMMIN((int)(plane_mi_rows - (mi_row >> scale_vert)), |
1440 | 0 | ((1 << num_mis_in_lpf_unit_height_log2) >> scale_vert)); |
1441 | 0 | const int x_range = AOMMIN((int)(plane_mi_cols - (mi_col >> scale_horz)), |
1442 | 0 | (MAX_MIB_SIZE >> scale_horz)); |
1443 | 0 | const ptrdiff_t mode_step = (ptrdiff_t)1 << scale_horz; |
1444 | |
|
1445 | 0 | for (int y = 0; y < y_range; y++) { |
1446 | 0 | const uint32_t curr_y = mi_row + (y << scale_vert); |
1447 | 0 | const uint32_t x_start = mi_col + (0 << scale_horz); |
1448 | 0 | const uint32_t x_end = mi_col + (x_range << scale_horz); |
1449 | 0 | int min_height = tx_size_high[TX_64X64]; |
1450 | 0 | set_lpf_parameters_for_line_chroma(params_buf, tx_buf, cm, xd, VERT_EDGE, |
1451 | 0 | x_start, curr_y, plane_ptr, x_end, |
1452 | 0 | mode_step, scale_horz, scale_vert, |
1453 | 0 | &min_height, plane, joint_filter_chroma); |
1454 | |
|
1455 | 0 | AV1_DEBLOCKING_PARAMETERS *params = params_buf; |
1456 | 0 | TX_SIZE *tx_size = tx_buf; |
1457 | 0 | int use_filter_type = USE_SINGLE; |
1458 | 0 | int y_inc = 0; |
1459 | |
|
1460 | 0 | if ((y & 3) == 0 && (y + 3) < y_range && min_height >= 16) { |
1461 | | // If we are on a row which is a multiple of 4, and the minimum height is |
1462 | | // 16 pixels, then the current and below 3 rows must contain the same tx |
1463 | | // block. This is because dim 16 can only happen every unit of 4 mi's. |
1464 | 0 | use_filter_type = USE_QUAD; |
1465 | 0 | y_inc = 3; |
1466 | 0 | } else if (y % 2 == 0 && (y + 1) < y_range && min_height >= 8) { |
1467 | | // If we are on an even row, and the minimum height is 8 pixels, then the |
1468 | | // current and below rows must contain the same tx block. This is because |
1469 | | // dim 4 can only happen every unit of 2**0, and 8 every unit of 2**1, |
1470 | | // etc. |
1471 | 0 | use_filter_type = USE_DUAL; |
1472 | 0 | y_inc = 1; |
1473 | 0 | } |
1474 | |
|
1475 | 0 | for (int x = 0; x < x_range;) { |
1476 | | // inner loop always filter vertical edges in a MI block. If MI size |
1477 | | // is 8x8, it will filter the vertical edge aligned with a 8x8 block. |
1478 | | // If 4x4 transform is used, it will then filter the internal edge |
1479 | | // aligned with a 4x4 block |
1480 | 0 | if (*tx_size == TX_INVALID) { |
1481 | 0 | params->filter_length = 0; |
1482 | 0 | *tx_size = TX_4X4; |
1483 | 0 | } |
1484 | |
|
1485 | 0 | const int offset = y * MI_SIZE * dst_stride + x * MI_SIZE; |
1486 | 0 | if (joint_filter_chroma) { |
1487 | 0 | uint8_t *u_dst = plane_ptr[0].dst.buf + offset; |
1488 | 0 | uint8_t *v_dst = plane_ptr[1].dst.buf + offset; |
1489 | 0 | filter_vert_chroma(u_dst, v_dst, dst_stride, params, cm->seq_params, |
1490 | 0 | use_filter_type); |
1491 | 0 | } else { |
1492 | 0 | uint8_t *dst_ptr = plane_ptr->dst.buf + offset; |
1493 | 0 | filter_vert(dst_ptr, dst_stride, params, cm->seq_params, |
1494 | 0 | use_filter_type); |
1495 | 0 | } |
1496 | | |
1497 | | // advance the destination pointer |
1498 | 0 | const uint32_t advance_units = tx_size_wide_unit[*tx_size]; |
1499 | 0 | x += advance_units; |
1500 | 0 | params += advance_units; |
1501 | 0 | tx_size += advance_units; |
1502 | 0 | } |
1503 | 0 | y += y_inc; |
1504 | 0 | } |
1505 | 0 | } |
1506 | | |
1507 | | static AOM_INLINE void filter_horz(uint8_t *dst, int dst_stride, |
1508 | | const AV1_DEBLOCKING_PARAMETERS *params, |
1509 | | const SequenceHeader *seq_params, |
1510 | 81.1M | USE_FILTER_TYPE use_filter_type) { |
1511 | 81.1M | const loop_filter_thresh *limits = params->lfthr; |
1512 | 81.1M | #if CONFIG_AV1_HIGHBITDEPTH |
1513 | 81.1M | const int use_highbitdepth = seq_params->use_highbitdepth; |
1514 | 81.1M | const aom_bit_depth_t bit_depth = seq_params->bit_depth; |
1515 | 81.1M | if (use_highbitdepth) { |
1516 | 57.1M | uint16_t *dst_shortptr = CONVERT_TO_SHORTPTR(dst); |
1517 | 57.1M | if (use_filter_type == USE_QUAD) { |
1518 | 0 | switch (params->filter_length) { |
1519 | | // apply 4-tap filtering |
1520 | 0 | case 4: |
1521 | 0 | aom_highbd_lpf_horizontal_4_dual( |
1522 | 0 | dst_shortptr, dst_stride, limits->mblim, limits->lim, |
1523 | 0 | limits->hev_thr, limits->mblim, limits->lim, limits->hev_thr, |
1524 | 0 | bit_depth); |
1525 | 0 | aom_highbd_lpf_horizontal_4_dual( |
1526 | 0 | dst_shortptr + (2 * MI_SIZE), dst_stride, limits->mblim, |
1527 | 0 | limits->lim, limits->hev_thr, limits->mblim, limits->lim, |
1528 | 0 | limits->hev_thr, bit_depth); |
1529 | 0 | break; |
1530 | 0 | case 6: // apply 6-tap filter for chroma plane only |
1531 | 0 | aom_highbd_lpf_horizontal_6_dual( |
1532 | 0 | dst_shortptr, dst_stride, limits->mblim, limits->lim, |
1533 | 0 | limits->hev_thr, limits->mblim, limits->lim, limits->hev_thr, |
1534 | 0 | bit_depth); |
1535 | 0 | aom_highbd_lpf_horizontal_6_dual( |
1536 | 0 | dst_shortptr + (2 * MI_SIZE), dst_stride, limits->mblim, |
1537 | 0 | limits->lim, limits->hev_thr, limits->mblim, limits->lim, |
1538 | 0 | limits->hev_thr, bit_depth); |
1539 | 0 | break; |
1540 | | // apply 8-tap filtering |
1541 | 0 | case 8: |
1542 | 0 | aom_highbd_lpf_horizontal_8_dual( |
1543 | 0 | dst_shortptr, dst_stride, limits->mblim, limits->lim, |
1544 | 0 | limits->hev_thr, limits->mblim, limits->lim, limits->hev_thr, |
1545 | 0 | bit_depth); |
1546 | 0 | aom_highbd_lpf_horizontal_8_dual( |
1547 | 0 | dst_shortptr + (2 * MI_SIZE), dst_stride, limits->mblim, |
1548 | 0 | limits->lim, limits->hev_thr, limits->mblim, limits->lim, |
1549 | 0 | limits->hev_thr, bit_depth); |
1550 | 0 | break; |
1551 | | // apply 14-tap filtering |
1552 | 0 | case 14: |
1553 | 0 | aom_highbd_lpf_horizontal_14_dual( |
1554 | 0 | dst_shortptr, dst_stride, limits->mblim, limits->lim, |
1555 | 0 | limits->hev_thr, limits->mblim, limits->lim, limits->hev_thr, |
1556 | 0 | bit_depth); |
1557 | 0 | aom_highbd_lpf_horizontal_14_dual( |
1558 | 0 | dst_shortptr + (2 * MI_SIZE), dst_stride, limits->mblim, |
1559 | 0 | limits->lim, limits->hev_thr, limits->mblim, limits->lim, |
1560 | 0 | limits->hev_thr, bit_depth); |
1561 | 0 | break; |
1562 | | // no filtering |
1563 | 0 | default: break; |
1564 | 0 | } |
1565 | 57.1M | } else if (use_filter_type == USE_DUAL) { |
1566 | 0 | switch (params->filter_length) { |
1567 | | // apply 4-tap filtering |
1568 | 0 | case 4: |
1569 | 0 | aom_highbd_lpf_horizontal_4_dual( |
1570 | 0 | dst_shortptr, dst_stride, limits->mblim, limits->lim, |
1571 | 0 | limits->hev_thr, limits->mblim, limits->lim, limits->hev_thr, |
1572 | 0 | bit_depth); |
1573 | 0 | break; |
1574 | 0 | case 6: // apply 6-tap filter for chroma plane only |
1575 | 0 | aom_highbd_lpf_horizontal_6_dual( |
1576 | 0 | dst_shortptr, dst_stride, limits->mblim, limits->lim, |
1577 | 0 | limits->hev_thr, limits->mblim, limits->lim, limits->hev_thr, |
1578 | 0 | bit_depth); |
1579 | 0 | break; |
1580 | | // apply 8-tap filtering |
1581 | 0 | case 8: |
1582 | 0 | aom_highbd_lpf_horizontal_8_dual( |
1583 | 0 | dst_shortptr, dst_stride, limits->mblim, limits->lim, |
1584 | 0 | limits->hev_thr, limits->mblim, limits->lim, limits->hev_thr, |
1585 | 0 | bit_depth); |
1586 | 0 | break; |
1587 | | // apply 14-tap filtering |
1588 | 0 | case 14: |
1589 | 0 | aom_highbd_lpf_horizontal_14_dual( |
1590 | 0 | dst_shortptr, dst_stride, limits->mblim, limits->lim, |
1591 | 0 | limits->hev_thr, limits->mblim, limits->lim, limits->hev_thr, |
1592 | 0 | bit_depth); |
1593 | 0 | break; |
1594 | | // no filtering |
1595 | 0 | default: break; |
1596 | 0 | } |
1597 | 57.1M | } else { |
1598 | 57.1M | assert(use_filter_type == USE_SINGLE); |
1599 | 57.0M | switch (params->filter_length) { |
1600 | | // apply 4-tap filtering |
1601 | 33.2M | case 4: |
1602 | 33.2M | aom_highbd_lpf_horizontal_4(dst_shortptr, dst_stride, limits->mblim, |
1603 | 33.2M | limits->lim, limits->hev_thr, bit_depth); |
1604 | 33.2M | break; |
1605 | 7.14M | case 6: // apply 6-tap filter for chroma plane only |
1606 | 7.14M | aom_highbd_lpf_horizontal_6(dst_shortptr, dst_stride, limits->mblim, |
1607 | 7.14M | limits->lim, limits->hev_thr, bit_depth); |
1608 | 7.14M | break; |
1609 | | // apply 8-tap filtering |
1610 | 3.14M | case 8: |
1611 | 3.14M | aom_highbd_lpf_horizontal_8(dst_shortptr, dst_stride, limits->mblim, |
1612 | 3.14M | limits->lim, limits->hev_thr, bit_depth); |
1613 | 3.14M | break; |
1614 | | // apply 14-tap filtering |
1615 | 8.08M | case 14: |
1616 | 8.08M | aom_highbd_lpf_horizontal_14(dst_shortptr, dst_stride, limits->mblim, |
1617 | 8.08M | limits->lim, limits->hev_thr, bit_depth); |
1618 | 8.08M | break; |
1619 | | // no filtering |
1620 | 12.7M | default: break; |
1621 | 57.0M | } |
1622 | 57.0M | } |
1623 | 51.5M | return; |
1624 | 57.1M | } |
1625 | 24.0M | #endif // CONFIG_AV1_HIGHBITDEPTH |
1626 | 24.0M | if (use_filter_type == USE_QUAD) { |
1627 | | // Only one set of loop filter parameters (mblim, lim and hev_thr) is |
1628 | | // passed as argument to quad loop filter because quad loop filter is |
1629 | | // called for those cases where all the 4 set of loop filter parameters |
1630 | | // are equal. |
1631 | 0 | switch (params->filter_length) { |
1632 | | // apply 4-tap filtering |
1633 | 0 | case 4: |
1634 | 0 | aom_lpf_horizontal_4_quad(dst, dst_stride, limits->mblim, limits->lim, |
1635 | 0 | limits->hev_thr); |
1636 | 0 | break; |
1637 | 0 | case 6: // apply 6-tap filter for chroma plane only |
1638 | 0 | aom_lpf_horizontal_6_quad(dst, dst_stride, limits->mblim, limits->lim, |
1639 | 0 | limits->hev_thr); |
1640 | 0 | break; |
1641 | | // apply 8-tap filtering |
1642 | 0 | case 8: |
1643 | 0 | aom_lpf_horizontal_8_quad(dst, dst_stride, limits->mblim, limits->lim, |
1644 | 0 | limits->hev_thr); |
1645 | 0 | break; |
1646 | | // apply 14-tap filtering |
1647 | 0 | case 14: |
1648 | 0 | aom_lpf_horizontal_14_quad(dst, dst_stride, limits->mblim, limits->lim, |
1649 | 0 | limits->hev_thr); |
1650 | 0 | break; |
1651 | | // no filtering |
1652 | 0 | default: break; |
1653 | 0 | } |
1654 | 24.0M | } else if (use_filter_type == USE_DUAL) { |
1655 | 0 | switch (params->filter_length) { |
1656 | | // apply 4-tap filtering |
1657 | 0 | case 4: |
1658 | 0 | aom_lpf_horizontal_4_dual(dst, dst_stride, limits->mblim, limits->lim, |
1659 | 0 | limits->hev_thr, limits->mblim, limits->lim, |
1660 | 0 | limits->hev_thr); |
1661 | 0 | break; |
1662 | 0 | case 6: // apply 6-tap filter for chroma plane only |
1663 | 0 | aom_lpf_horizontal_6_dual(dst, dst_stride, limits->mblim, limits->lim, |
1664 | 0 | limits->hev_thr, limits->mblim, limits->lim, |
1665 | 0 | limits->hev_thr); |
1666 | 0 | break; |
1667 | | // apply 8-tap filtering |
1668 | 0 | case 8: |
1669 | 0 | aom_lpf_horizontal_8_dual(dst, dst_stride, limits->mblim, limits->lim, |
1670 | 0 | limits->hev_thr, limits->mblim, limits->lim, |
1671 | 0 | limits->hev_thr); |
1672 | 0 | break; |
1673 | | // apply 14-tap filtering |
1674 | 0 | case 14: |
1675 | 0 | aom_lpf_horizontal_14_dual(dst, dst_stride, limits->mblim, limits->lim, |
1676 | 0 | limits->hev_thr, limits->mblim, limits->lim, |
1677 | 0 | limits->hev_thr); |
1678 | 0 | break; |
1679 | | // no filtering |
1680 | 0 | default: break; |
1681 | 0 | } |
1682 | 24.0M | } else { |
1683 | 24.0M | assert(use_filter_type == USE_SINGLE); |
1684 | 25.6M | switch (params->filter_length) { |
1685 | | // apply 4-tap filtering |
1686 | 4.64M | case 4: |
1687 | 4.64M | aom_lpf_horizontal_4(dst, dst_stride, limits->mblim, limits->lim, |
1688 | 4.64M | limits->hev_thr); |
1689 | 4.64M | break; |
1690 | 9.91M | case 6: // apply 6-tap filter for chroma plane only |
1691 | 9.91M | aom_lpf_horizontal_6(dst, dst_stride, limits->mblim, limits->lim, |
1692 | 9.91M | limits->hev_thr); |
1693 | 9.91M | break; |
1694 | | // apply 8-tap filtering |
1695 | 4.19M | case 8: |
1696 | 4.19M | aom_lpf_horizontal_8(dst, dst_stride, limits->mblim, limits->lim, |
1697 | 4.19M | limits->hev_thr); |
1698 | 4.19M | break; |
1699 | | // apply 14-tap filtering |
1700 | 8.24M | case 14: |
1701 | 8.24M | aom_lpf_horizontal_14(dst, dst_stride, limits->mblim, limits->lim, |
1702 | 8.24M | limits->hev_thr); |
1703 | 8.24M | break; |
1704 | | // no filtering |
1705 | 2.35M | default: break; |
1706 | 25.6M | } |
1707 | 25.6M | } |
1708 | | #if !CONFIG_AV1_HIGHBITDEPTH |
1709 | | (void)seq_params; |
1710 | | #endif // !CONFIG_AV1_HIGHBITDEPTH |
1711 | 24.0M | } |
1712 | | |
1713 | | static AOM_INLINE void filter_horz_chroma( |
1714 | | uint8_t *u_dst, uint8_t *v_dst, int dst_stride, |
1715 | | const AV1_DEBLOCKING_PARAMETERS *params, const SequenceHeader *seq_params, |
1716 | 0 | USE_FILTER_TYPE use_filter_type) { |
1717 | 0 | const loop_filter_thresh *u_limits = params->lfthr; |
1718 | 0 | const loop_filter_thresh *v_limits = params->lfthr; |
1719 | 0 | #if CONFIG_AV1_HIGHBITDEPTH |
1720 | 0 | const int use_highbitdepth = seq_params->use_highbitdepth; |
1721 | 0 | const aom_bit_depth_t bit_depth = seq_params->bit_depth; |
1722 | 0 | if (use_highbitdepth) { |
1723 | 0 | uint16_t *u_dst_shortptr = CONVERT_TO_SHORTPTR(u_dst); |
1724 | 0 | uint16_t *v_dst_shortptr = CONVERT_TO_SHORTPTR(v_dst); |
1725 | 0 | if (use_filter_type == USE_QUAD) { |
1726 | 0 | switch (params->filter_length) { |
1727 | | // apply 4-tap filtering |
1728 | 0 | case 4: |
1729 | 0 | aom_highbd_lpf_horizontal_4_dual( |
1730 | 0 | u_dst_shortptr, dst_stride, u_limits->mblim, u_limits->lim, |
1731 | 0 | u_limits->hev_thr, u_limits->mblim, u_limits->lim, |
1732 | 0 | u_limits->hev_thr, bit_depth); |
1733 | 0 | aom_highbd_lpf_horizontal_4_dual( |
1734 | 0 | u_dst_shortptr + (2 * MI_SIZE), dst_stride, u_limits->mblim, |
1735 | 0 | u_limits->lim, u_limits->hev_thr, u_limits->mblim, u_limits->lim, |
1736 | 0 | u_limits->hev_thr, bit_depth); |
1737 | 0 | aom_highbd_lpf_horizontal_4_dual( |
1738 | 0 | v_dst_shortptr, dst_stride, v_limits->mblim, v_limits->lim, |
1739 | 0 | v_limits->hev_thr, v_limits->mblim, v_limits->lim, |
1740 | 0 | v_limits->hev_thr, bit_depth); |
1741 | 0 | aom_highbd_lpf_horizontal_4_dual( |
1742 | 0 | v_dst_shortptr + (2 * MI_SIZE), dst_stride, v_limits->mblim, |
1743 | 0 | v_limits->lim, v_limits->hev_thr, v_limits->mblim, v_limits->lim, |
1744 | 0 | v_limits->hev_thr, bit_depth); |
1745 | 0 | break; |
1746 | 0 | case 6: // apply 6-tap filter for chroma plane only |
1747 | 0 | aom_highbd_lpf_horizontal_6_dual( |
1748 | 0 | u_dst_shortptr, dst_stride, u_limits->mblim, u_limits->lim, |
1749 | 0 | u_limits->hev_thr, u_limits->mblim, u_limits->lim, |
1750 | 0 | u_limits->hev_thr, bit_depth); |
1751 | 0 | aom_highbd_lpf_horizontal_6_dual( |
1752 | 0 | u_dst_shortptr + (2 * MI_SIZE), dst_stride, u_limits->mblim, |
1753 | 0 | u_limits->lim, u_limits->hev_thr, u_limits->mblim, u_limits->lim, |
1754 | 0 | u_limits->hev_thr, bit_depth); |
1755 | 0 | aom_highbd_lpf_horizontal_6_dual( |
1756 | 0 | v_dst_shortptr, dst_stride, v_limits->mblim, v_limits->lim, |
1757 | 0 | v_limits->hev_thr, v_limits->mblim, v_limits->lim, |
1758 | 0 | v_limits->hev_thr, bit_depth); |
1759 | 0 | aom_highbd_lpf_horizontal_6_dual( |
1760 | 0 | v_dst_shortptr + (2 * MI_SIZE), dst_stride, v_limits->mblim, |
1761 | 0 | v_limits->lim, v_limits->hev_thr, v_limits->mblim, v_limits->lim, |
1762 | 0 | v_limits->hev_thr, bit_depth); |
1763 | 0 | break; |
1764 | 0 | case 8: |
1765 | 0 | case 14: assert(0); |
1766 | | // no filtering |
1767 | 0 | default: break; |
1768 | 0 | } |
1769 | 0 | } else if (use_filter_type == USE_DUAL) { |
1770 | 0 | switch (params->filter_length) { |
1771 | | // apply 4-tap filtering |
1772 | 0 | case 4: |
1773 | 0 | aom_highbd_lpf_horizontal_4_dual( |
1774 | 0 | u_dst_shortptr, dst_stride, u_limits->mblim, u_limits->lim, |
1775 | 0 | u_limits->hev_thr, u_limits->mblim, u_limits->lim, |
1776 | 0 | u_limits->hev_thr, bit_depth); |
1777 | 0 | aom_highbd_lpf_horizontal_4_dual( |
1778 | 0 | v_dst_shortptr, dst_stride, v_limits->mblim, v_limits->lim, |
1779 | 0 | v_limits->hev_thr, v_limits->mblim, v_limits->lim, |
1780 | 0 | v_limits->hev_thr, bit_depth); |
1781 | 0 | break; |
1782 | 0 | case 6: // apply 6-tap filter for chroma plane only |
1783 | 0 | aom_highbd_lpf_horizontal_6_dual( |
1784 | 0 | u_dst_shortptr, dst_stride, u_limits->mblim, u_limits->lim, |
1785 | 0 | u_limits->hev_thr, u_limits->mblim, u_limits->lim, |
1786 | 0 | u_limits->hev_thr, bit_depth); |
1787 | 0 | aom_highbd_lpf_horizontal_6_dual( |
1788 | 0 | v_dst_shortptr, dst_stride, v_limits->mblim, v_limits->lim, |
1789 | 0 | v_limits->hev_thr, v_limits->mblim, v_limits->lim, |
1790 | 0 | v_limits->hev_thr, bit_depth); |
1791 | 0 | break; |
1792 | 0 | case 8: |
1793 | 0 | case 14: assert(0); |
1794 | | // no filtering |
1795 | 0 | default: break; |
1796 | 0 | } |
1797 | 0 | } else { |
1798 | 0 | assert(use_filter_type == USE_SINGLE); |
1799 | 0 | switch (params->filter_length) { |
1800 | | // apply 4-tap filtering |
1801 | 0 | case 4: |
1802 | 0 | aom_highbd_lpf_horizontal_4(u_dst_shortptr, dst_stride, |
1803 | 0 | u_limits->mblim, u_limits->lim, |
1804 | 0 | u_limits->hev_thr, bit_depth); |
1805 | 0 | aom_highbd_lpf_horizontal_4(v_dst_shortptr, dst_stride, |
1806 | 0 | v_limits->mblim, v_limits->lim, |
1807 | 0 | v_limits->hev_thr, bit_depth); |
1808 | 0 | break; |
1809 | 0 | case 6: // apply 6-tap filter for chroma plane only |
1810 | 0 | aom_highbd_lpf_horizontal_6(u_dst_shortptr, dst_stride, |
1811 | 0 | u_limits->mblim, u_limits->lim, |
1812 | 0 | u_limits->hev_thr, bit_depth); |
1813 | 0 | aom_highbd_lpf_horizontal_6(v_dst_shortptr, dst_stride, |
1814 | 0 | v_limits->mblim, v_limits->lim, |
1815 | 0 | v_limits->hev_thr, bit_depth); |
1816 | 0 | break; |
1817 | 0 | case 8: |
1818 | 0 | case 14: assert(0); break; |
1819 | | // no filtering |
1820 | 0 | default: break; |
1821 | 0 | } |
1822 | 0 | } |
1823 | 0 | return; |
1824 | 0 | } |
1825 | 0 | #endif // CONFIG_AV1_HIGHBITDEPTH |
1826 | 0 | if (use_filter_type == USE_QUAD) { |
1827 | | // Only one set of loop filter parameters (mblim, lim and hev_thr) is |
1828 | | // passed as argument to quad loop filter because quad loop filter is |
1829 | | // called for those cases where all the 4 set of loop filter parameters |
1830 | | // are equal. |
1831 | 0 | switch (params->filter_length) { |
1832 | | // apply 4-tap filtering |
1833 | 0 | case 4: |
1834 | 0 | aom_lpf_horizontal_4_quad(u_dst, dst_stride, u_limits->mblim, |
1835 | 0 | u_limits->lim, u_limits->hev_thr); |
1836 | 0 | aom_lpf_horizontal_4_quad(v_dst, dst_stride, v_limits->mblim, |
1837 | 0 | v_limits->lim, v_limits->hev_thr); |
1838 | 0 | break; |
1839 | 0 | case 6: // apply 6-tap filter for chroma plane only |
1840 | 0 | aom_lpf_horizontal_6_quad(u_dst, dst_stride, u_limits->mblim, |
1841 | 0 | u_limits->lim, u_limits->hev_thr); |
1842 | 0 | aom_lpf_horizontal_6_quad(v_dst, dst_stride, v_limits->mblim, |
1843 | 0 | v_limits->lim, v_limits->hev_thr); |
1844 | 0 | break; |
1845 | 0 | case 8: |
1846 | 0 | case 14: assert(0); |
1847 | | // no filtering |
1848 | 0 | default: break; |
1849 | 0 | } |
1850 | 0 | } else if (use_filter_type == USE_DUAL) { |
1851 | 0 | switch (params->filter_length) { |
1852 | | // apply 4-tap filtering |
1853 | 0 | case 4: |
1854 | 0 | aom_lpf_horizontal_4_dual(u_dst, dst_stride, u_limits->mblim, |
1855 | 0 | u_limits->lim, u_limits->hev_thr, |
1856 | 0 | u_limits->mblim, u_limits->lim, |
1857 | 0 | u_limits->hev_thr); |
1858 | 0 | aom_lpf_horizontal_4_dual(v_dst, dst_stride, v_limits->mblim, |
1859 | 0 | v_limits->lim, v_limits->hev_thr, |
1860 | 0 | v_limits->mblim, v_limits->lim, |
1861 | 0 | v_limits->hev_thr); |
1862 | 0 | break; |
1863 | 0 | case 6: // apply 6-tap filter for chroma plane only |
1864 | 0 | aom_lpf_horizontal_6_dual(u_dst, dst_stride, u_limits->mblim, |
1865 | 0 | u_limits->lim, u_limits->hev_thr, |
1866 | 0 | u_limits->mblim, u_limits->lim, |
1867 | 0 | u_limits->hev_thr); |
1868 | 0 | aom_lpf_horizontal_6_dual(v_dst, dst_stride, v_limits->mblim, |
1869 | 0 | v_limits->lim, v_limits->hev_thr, |
1870 | 0 | v_limits->mblim, v_limits->lim, |
1871 | 0 | v_limits->hev_thr); |
1872 | 0 | break; |
1873 | 0 | case 8: |
1874 | 0 | case 14: assert(0); |
1875 | | // no filtering |
1876 | 0 | default: break; |
1877 | 0 | } |
1878 | 0 | } else { |
1879 | 0 | assert(use_filter_type == USE_SINGLE); |
1880 | 0 | switch (params->filter_length) { |
1881 | | // apply 4-tap filtering |
1882 | 0 | case 4: |
1883 | 0 | aom_lpf_horizontal_4(u_dst, dst_stride, u_limits->mblim, u_limits->lim, |
1884 | 0 | u_limits->hev_thr); |
1885 | 0 | aom_lpf_horizontal_4(v_dst, dst_stride, v_limits->mblim, v_limits->lim, |
1886 | 0 | u_limits->hev_thr); |
1887 | 0 | break; |
1888 | 0 | case 6: // apply 6-tap filter for chroma plane only |
1889 | 0 | aom_lpf_horizontal_6(u_dst, dst_stride, u_limits->mblim, u_limits->lim, |
1890 | 0 | u_limits->hev_thr); |
1891 | 0 | aom_lpf_horizontal_6(v_dst, dst_stride, v_limits->mblim, v_limits->lim, |
1892 | 0 | v_limits->hev_thr); |
1893 | 0 | break; |
1894 | 0 | case 8: |
1895 | 0 | case 14: assert(0); break; |
1896 | | // no filtering |
1897 | 0 | default: break; |
1898 | 0 | } |
1899 | 0 | } |
1900 | | #if !CONFIG_AV1_HIGHBITDEPTH |
1901 | | (void)seq_params; |
1902 | | #endif // !CONFIG_AV1_HIGHBITDEPTH |
1903 | 0 | } |
1904 | | |
1905 | | void av1_filter_block_plane_horz(const AV1_COMMON *const cm, |
1906 | | const MACROBLOCKD *const xd, const int plane, |
1907 | | const MACROBLOCKD_PLANE *const plane_ptr, |
1908 | 693k | const uint32_t mi_row, const uint32_t mi_col) { |
1909 | 693k | const uint32_t scale_horz = plane_ptr->subsampling_x; |
1910 | 693k | const uint32_t scale_vert = plane_ptr->subsampling_y; |
1911 | 693k | uint8_t *const dst_ptr = plane_ptr->dst.buf; |
1912 | 693k | const int dst_stride = plane_ptr->dst.stride; |
1913 | 693k | const int plane_mi_rows = |
1914 | 693k | ROUND_POWER_OF_TWO(cm->mi_params.mi_rows, scale_vert); |
1915 | 693k | const int plane_mi_cols = |
1916 | 693k | ROUND_POWER_OF_TWO(cm->mi_params.mi_cols, scale_horz); |
1917 | 693k | const int y_range = AOMMIN((int)(plane_mi_rows - (mi_row >> scale_vert)), |
1918 | 693k | (MAX_MIB_SIZE >> scale_vert)); |
1919 | 693k | const int x_range = AOMMIN((int)(plane_mi_cols - (mi_col >> scale_horz)), |
1920 | 693k | (MAX_MIB_SIZE >> scale_horz)); |
1921 | 14.4M | for (int x = 0; x < x_range; x++) { |
1922 | 13.7M | uint8_t *p = dst_ptr + x * MI_SIZE; |
1923 | 88.6M | for (int y = 0; y < y_range;) { |
1924 | | // inner loop always filter vertical edges in a MI block. If MI size |
1925 | | // is 8x8, it will first filter the vertical edge aligned with a 8x8 |
1926 | | // block. If 4x4 transform is used, it will then filter the internal |
1927 | | // edge aligned with a 4x4 block |
1928 | 74.8M | const uint32_t curr_x = ((mi_col * MI_SIZE) >> scale_horz) + x * MI_SIZE; |
1929 | 74.8M | const uint32_t curr_y = ((mi_row * MI_SIZE) >> scale_vert) + y * MI_SIZE; |
1930 | 74.8M | uint32_t advance_units; |
1931 | 74.8M | TX_SIZE tx_size; |
1932 | 74.8M | AV1_DEBLOCKING_PARAMETERS params; |
1933 | 74.8M | memset(¶ms, 0, sizeof(params)); |
1934 | | |
1935 | 74.8M | tx_size = set_lpf_parameters( |
1936 | 74.8M | ¶ms, (cm->mi_params.mi_stride << scale_vert), cm, xd, HORZ_EDGE, |
1937 | 74.8M | curr_x, curr_y, plane, plane_ptr); |
1938 | 74.8M | if (tx_size == TX_INVALID) { |
1939 | 0 | params.filter_length = 0; |
1940 | 0 | tx_size = TX_4X4; |
1941 | 0 | } |
1942 | | |
1943 | 74.8M | filter_horz(p, dst_stride, ¶ms, cm->seq_params, USE_SINGLE); |
1944 | | |
1945 | | // advance the destination pointer |
1946 | 74.8M | advance_units = tx_size_high_unit[tx_size]; |
1947 | 74.8M | y += advance_units; |
1948 | 74.8M | p += advance_units * dst_stride * MI_SIZE; |
1949 | 74.8M | } |
1950 | 13.7M | } |
1951 | 693k | } |
1952 | | |
1953 | | void av1_filter_block_plane_horz_opt( |
1954 | | const AV1_COMMON *const cm, const MACROBLOCKD *const xd, |
1955 | | const MACROBLOCKD_PLANE *const plane_ptr, const uint32_t mi_row, |
1956 | | const uint32_t mi_col, AV1_DEBLOCKING_PARAMETERS *params_buf, |
1957 | 0 | TX_SIZE *tx_buf, int num_mis_in_lpf_unit_height_log2) { |
1958 | 0 | uint8_t *const dst_ptr = plane_ptr->dst.buf; |
1959 | 0 | const int dst_stride = plane_ptr->dst.stride; |
1960 | | // Ensure that mi_cols/mi_rows are calculated based on frame dimension aligned |
1961 | | // to MI_SIZE. |
1962 | 0 | const int plane_mi_cols = |
1963 | 0 | CEIL_POWER_OF_TWO(plane_ptr->dst.width, MI_SIZE_LOG2); |
1964 | 0 | const int plane_mi_rows = |
1965 | 0 | CEIL_POWER_OF_TWO(plane_ptr->dst.height, MI_SIZE_LOG2); |
1966 | 0 | const int y_range = AOMMIN((int)(plane_mi_rows - mi_row), |
1967 | 0 | (1 << num_mis_in_lpf_unit_height_log2)); |
1968 | 0 | const int x_range = AOMMIN((int)(plane_mi_cols - mi_col), MAX_MIB_SIZE); |
1969 | |
|
1970 | 0 | const ptrdiff_t mode_step = cm->mi_params.mi_stride; |
1971 | 0 | for (int x = 0; x < x_range; x++) { |
1972 | 0 | const uint32_t curr_x = mi_col + x; |
1973 | 0 | const uint32_t y_start = mi_row; |
1974 | 0 | const uint32_t y_end = mi_row + y_range; |
1975 | 0 | int min_block_width = block_size_high[BLOCK_128X128]; |
1976 | 0 | set_lpf_parameters_for_line_luma(params_buf, tx_buf, cm, xd, HORZ_EDGE, |
1977 | 0 | curr_x, y_start, plane_ptr, y_end, |
1978 | 0 | mode_step, &min_block_width); |
1979 | |
|
1980 | 0 | AV1_DEBLOCKING_PARAMETERS *params = params_buf; |
1981 | 0 | TX_SIZE *tx_size = tx_buf; |
1982 | 0 | USE_FILTER_TYPE filter_type = USE_SINGLE; |
1983 | |
|
1984 | 0 | uint8_t *p = dst_ptr + x * MI_SIZE; |
1985 | |
|
1986 | 0 | if ((x & 3) == 0 && (x + 3) < x_range && min_block_width >= 16) { |
1987 | | // If we are on a col which is a multiple of 4, and the minimum width is |
1988 | | // 16 pixels, then the current and right 3 cols must contain the same |
1989 | | // prediction block. This is because dim 16 can only happen every unit of |
1990 | | // 4 mi's. |
1991 | 0 | filter_type = USE_QUAD; |
1992 | 0 | x += 3; |
1993 | 0 | } else if ((x + 1) < x_range && min_block_width >= 8) { |
1994 | 0 | filter_type = USE_DUAL; |
1995 | 0 | x += 1; |
1996 | 0 | } |
1997 | |
|
1998 | 0 | for (int y = 0; y < y_range;) { |
1999 | 0 | if (*tx_size == TX_INVALID) { |
2000 | 0 | params->filter_length = 0; |
2001 | 0 | *tx_size = TX_4X4; |
2002 | 0 | } |
2003 | |
|
2004 | 0 | filter_horz(p, dst_stride, params, cm->seq_params, filter_type); |
2005 | | |
2006 | | // advance the destination pointer |
2007 | 0 | const uint32_t advance_units = tx_size_high_unit[*tx_size]; |
2008 | 0 | y += advance_units; |
2009 | 0 | p += advance_units * dst_stride * MI_SIZE; |
2010 | 0 | params += advance_units; |
2011 | 0 | tx_size += advance_units; |
2012 | 0 | } |
2013 | 0 | } |
2014 | 0 | } |
2015 | | |
2016 | | void av1_filter_block_plane_horz_opt_chroma( |
2017 | | const AV1_COMMON *const cm, const MACROBLOCKD *const xd, |
2018 | | const MACROBLOCKD_PLANE *const plane_ptr, const uint32_t mi_row, |
2019 | | const uint32_t mi_col, AV1_DEBLOCKING_PARAMETERS *params_buf, |
2020 | | TX_SIZE *tx_buf, int plane, bool joint_filter_chroma, |
2021 | 0 | int num_mis_in_lpf_unit_height_log2) { |
2022 | 0 | const uint32_t scale_horz = plane_ptr->subsampling_x; |
2023 | 0 | const uint32_t scale_vert = plane_ptr->subsampling_y; |
2024 | 0 | const int dst_stride = plane_ptr->dst.stride; |
2025 | | // Ensure that mi_cols/mi_rows are calculated based on frame dimension aligned |
2026 | | // to MI_SIZE. |
2027 | 0 | const int mi_cols = |
2028 | 0 | ((plane_ptr->dst.width << scale_horz) + MI_SIZE - 1) >> MI_SIZE_LOG2; |
2029 | 0 | const int mi_rows = |
2030 | 0 | ((plane_ptr->dst.height << scale_vert) + MI_SIZE - 1) >> MI_SIZE_LOG2; |
2031 | 0 | const int plane_mi_rows = ROUND_POWER_OF_TWO(mi_rows, scale_vert); |
2032 | 0 | const int plane_mi_cols = ROUND_POWER_OF_TWO(mi_cols, scale_horz); |
2033 | 0 | const int y_range = |
2034 | 0 | AOMMIN((int)(plane_mi_rows - (mi_row >> scale_vert)), |
2035 | 0 | ((1 << num_mis_in_lpf_unit_height_log2) >> scale_vert)); |
2036 | 0 | const int x_range = AOMMIN((int)(plane_mi_cols - (mi_col >> scale_horz)), |
2037 | 0 | (MAX_MIB_SIZE >> scale_horz)); |
2038 | 0 | const ptrdiff_t mode_step = cm->mi_params.mi_stride << scale_vert; |
2039 | 0 | for (int x = 0; x < x_range; x++) { |
2040 | 0 | const uint32_t y_start = mi_row + (0 << scale_vert); |
2041 | 0 | const uint32_t curr_x = mi_col + (x << scale_horz); |
2042 | 0 | const uint32_t y_end = mi_row + (y_range << scale_vert); |
2043 | 0 | int min_width = tx_size_wide[TX_64X64]; |
2044 | 0 | set_lpf_parameters_for_line_chroma(params_buf, tx_buf, cm, xd, HORZ_EDGE, |
2045 | 0 | curr_x, y_start, plane_ptr, y_end, |
2046 | 0 | mode_step, scale_horz, scale_vert, |
2047 | 0 | &min_width, plane, joint_filter_chroma); |
2048 | |
|
2049 | 0 | AV1_DEBLOCKING_PARAMETERS *params = params_buf; |
2050 | 0 | TX_SIZE *tx_size = tx_buf; |
2051 | 0 | USE_FILTER_TYPE use_filter_type = USE_SINGLE; |
2052 | 0 | int x_inc = 0; |
2053 | |
|
2054 | 0 | if ((x & 3) == 0 && (x + 3) < x_range && min_width >= 16) { |
2055 | | // If we are on a col which is a multiple of 4, and the minimum width is |
2056 | | // 16 pixels, then the current and right 3 cols must contain the same tx |
2057 | | // block. This is because dim 16 can only happen every unit of 4 mi's. |
2058 | 0 | use_filter_type = USE_QUAD; |
2059 | 0 | x_inc = 3; |
2060 | 0 | } else if (x % 2 == 0 && (x + 1) < x_range && min_width >= 8) { |
2061 | | // If we are on an even col, and the minimum width is 8 pixels, then the |
2062 | | // current and left cols must contain the same tx block. This is because |
2063 | | // dim 4 can only happen every unit of 2**0, and 8 every unit of 2**1, |
2064 | | // etc. |
2065 | 0 | use_filter_type = USE_DUAL; |
2066 | 0 | x_inc = 1; |
2067 | 0 | } |
2068 | |
|
2069 | 0 | for (int y = 0; y < y_range;) { |
2070 | | // inner loop always filter vertical edges in a MI block. If MI size |
2071 | | // is 8x8, it will first filter the vertical edge aligned with a 8x8 |
2072 | | // block. If 4x4 transform is used, it will then filter the internal |
2073 | | // edge aligned with a 4x4 block |
2074 | 0 | if (*tx_size == TX_INVALID) { |
2075 | 0 | params->filter_length = 0; |
2076 | 0 | *tx_size = TX_4X4; |
2077 | 0 | } |
2078 | |
|
2079 | 0 | const int offset = y * MI_SIZE * dst_stride + x * MI_SIZE; |
2080 | 0 | if (joint_filter_chroma) { |
2081 | 0 | uint8_t *u_dst = plane_ptr[0].dst.buf + offset; |
2082 | 0 | uint8_t *v_dst = plane_ptr[1].dst.buf + offset; |
2083 | 0 | filter_horz_chroma(u_dst, v_dst, dst_stride, params, cm->seq_params, |
2084 | 0 | use_filter_type); |
2085 | 0 | } else { |
2086 | 0 | uint8_t *dst_ptr = plane_ptr->dst.buf + offset; |
2087 | 0 | filter_horz(dst_ptr, dst_stride, params, cm->seq_params, |
2088 | 0 | use_filter_type); |
2089 | 0 | } |
2090 | | |
2091 | | // advance the destination pointer |
2092 | 0 | const int advance_units = tx_size_high_unit[*tx_size]; |
2093 | 0 | y += advance_units; |
2094 | 0 | params += advance_units; |
2095 | 0 | tx_size += advance_units; |
2096 | 0 | } |
2097 | 0 | x += x_inc; |
2098 | 0 | } |
2099 | 0 | } |