/src/aom/av1/encoder/picklpf.c
Line | Count | Source (jump to first uncovered line) |
1 | | /* |
2 | | * Copyright (c) 2016, Alliance for Open Media. All rights reserved. |
3 | | * |
4 | | * This source code is subject to the terms of the BSD 2 Clause License and |
5 | | * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License |
6 | | * was not distributed with this source code in the LICENSE file, you can |
7 | | * obtain it at www.aomedia.org/license/software. If the Alliance for Open |
8 | | * Media Patent License 1.0 was not distributed with this source code in the |
9 | | * PATENTS file, you can obtain it at www.aomedia.org/license/patent. |
10 | | */ |
11 | | |
12 | | #include <assert.h> |
13 | | #include <limits.h> |
14 | | |
15 | | #include "config/aom_scale_rtcd.h" |
16 | | |
17 | | #include "aom_dsp/aom_dsp_common.h" |
18 | | #include "aom_dsp/psnr.h" |
19 | | #include "aom_mem/aom_mem.h" |
20 | | #include "aom_ports/mem.h" |
21 | | |
22 | | #include "av1/common/av1_common_int.h" |
23 | | #include "av1/common/av1_loopfilter.h" |
24 | | #include "av1/common/quant_common.h" |
25 | | |
26 | | #include "av1/encoder/av1_quantize.h" |
27 | | #include "av1/encoder/encoder.h" |
28 | | #include "av1/encoder/picklpf.h" |
29 | | |
30 | | // AV1 loop filter applies to the whole frame according to mi_rows and mi_cols, |
31 | | // which are calculated based on aligned width and aligned height, |
32 | | // In addition, if super res is enabled, it copies the whole frame |
33 | | // according to the aligned width and height (av1_superres_upscale()). |
34 | | // So we need to copy the whole filtered region, instead of the cropped region. |
35 | | // For example, input image size is: 160x90. |
36 | | // Then src->y_crop_width = 160, src->y_crop_height = 90. |
37 | | // The aligned frame size is: src->y_width = 160, src->y_height = 96. |
38 | | // AV1 aligns frame size to a multiple of 8, if there is |
39 | | // chroma subsampling, it is able to ensure the chroma is also |
40 | | // an integer number of mi units. mi unit is 4x4, 8 = 4 * 2, and 2 luma mi |
41 | | // units correspond to 1 chroma mi unit if there is subsampling. |
42 | | // See: aom_realloc_frame_buffer() in yv12config.c. |
43 | | static void yv12_copy_plane(const YV12_BUFFER_CONFIG *src_bc, |
44 | 0 | YV12_BUFFER_CONFIG *dst_bc, int plane) { |
45 | 0 | switch (plane) { |
46 | 0 | case 0: aom_yv12_copy_y(src_bc, dst_bc, 0); break; |
47 | 0 | case 1: aom_yv12_copy_u(src_bc, dst_bc, 0); break; |
48 | 0 | case 2: aom_yv12_copy_v(src_bc, dst_bc, 0); break; |
49 | 0 | default: assert(plane >= 0 && plane <= 2); break; |
50 | 0 | } |
51 | 0 | } |
52 | | |
53 | 0 | static int get_max_filter_level(const AV1_COMP *cpi) { |
54 | 0 | if (is_stat_consumption_stage_twopass(cpi)) { |
55 | 0 | return cpi->ppi->twopass.section_intra_rating > 8 ? MAX_LOOP_FILTER * 3 / 4 |
56 | 0 | : MAX_LOOP_FILTER; |
57 | 0 | } else { |
58 | 0 | return MAX_LOOP_FILTER; |
59 | 0 | } |
60 | 0 | } |
61 | | |
62 | | static int64_t try_filter_frame(const YV12_BUFFER_CONFIG *sd, |
63 | | AV1_COMP *const cpi, int filt_level, |
64 | 0 | int partial_frame, int plane, int dir) { |
65 | 0 | MultiThreadInfo *const mt_info = &cpi->mt_info; |
66 | 0 | int num_workers = mt_info->num_mod_workers[MOD_LPF]; |
67 | 0 | AV1_COMMON *const cm = &cpi->common; |
68 | 0 | int64_t filt_err; |
69 | |
|
70 | 0 | assert(plane >= 0 && plane <= 2); |
71 | 0 | int filter_level[2] = { filt_level, filt_level }; |
72 | 0 | if (plane == 0 && dir == 0) filter_level[1] = cm->lf.filter_level[1]; |
73 | 0 | if (plane == 0 && dir == 1) filter_level[0] = cm->lf.filter_level[0]; |
74 | | |
75 | | // set base filters for use of get_filter_level (av1_loopfilter.c) when in |
76 | | // DELTA_LF mode |
77 | 0 | switch (plane) { |
78 | 0 | case 0: |
79 | 0 | cm->lf.filter_level[0] = filter_level[0]; |
80 | 0 | cm->lf.filter_level[1] = filter_level[1]; |
81 | 0 | break; |
82 | 0 | case 1: cm->lf.filter_level_u = filter_level[0]; break; |
83 | 0 | case 2: cm->lf.filter_level_v = filter_level[0]; break; |
84 | 0 | } |
85 | | |
86 | | // lpf_opt_level = 1 : Enables dual/quad loop-filtering. |
87 | 0 | int lpf_opt_level = is_inter_tx_size_search_level_one(&cpi->sf.tx_sf); |
88 | |
|
89 | 0 | av1_loop_filter_frame_mt(&cm->cur_frame->buf, cm, &cpi->td.mb.e_mbd, plane, |
90 | 0 | plane + 1, partial_frame, mt_info->workers, |
91 | 0 | num_workers, &mt_info->lf_row_sync, lpf_opt_level); |
92 | |
|
93 | 0 | filt_err = aom_get_sse_plane(sd, &cm->cur_frame->buf, plane, |
94 | 0 | cm->seq_params->use_highbitdepth); |
95 | | |
96 | | // Re-instate the unfiltered frame |
97 | 0 | yv12_copy_plane(&cpi->last_frame_uf, &cm->cur_frame->buf, plane); |
98 | |
|
99 | 0 | return filt_err; |
100 | 0 | } |
101 | | |
102 | | static int search_filter_level(const YV12_BUFFER_CONFIG *sd, AV1_COMP *cpi, |
103 | | int partial_frame, |
104 | | const int *last_frame_filter_level, int plane, |
105 | 0 | int dir) { |
106 | 0 | const AV1_COMMON *const cm = &cpi->common; |
107 | 0 | const int min_filter_level = 0; |
108 | 0 | const int max_filter_level = get_max_filter_level(cpi); |
109 | 0 | int filt_direction = 0; |
110 | 0 | int64_t best_err; |
111 | 0 | int filt_best; |
112 | | |
113 | | // Start the search at the previous frame filter level unless it is now out of |
114 | | // range. |
115 | 0 | int lvl; |
116 | 0 | switch (plane) { |
117 | 0 | case 0: |
118 | 0 | switch (dir) { |
119 | 0 | case 2: |
120 | 0 | lvl = (last_frame_filter_level[0] + last_frame_filter_level[1] + 1) >> |
121 | 0 | 1; |
122 | 0 | break; |
123 | 0 | case 0: |
124 | 0 | case 1: lvl = last_frame_filter_level[dir]; break; |
125 | 0 | default: assert(dir >= 0 && dir <= 2); return 0; |
126 | 0 | } |
127 | 0 | break; |
128 | 0 | case 1: lvl = last_frame_filter_level[2]; break; |
129 | 0 | case 2: lvl = last_frame_filter_level[3]; break; |
130 | 0 | default: assert(plane >= 0 && plane <= 2); return 0; |
131 | 0 | } |
132 | 0 | int filt_mid = clamp(lvl, min_filter_level, max_filter_level); |
133 | 0 | int filter_step = filt_mid < 16 ? 4 : filt_mid / 4; |
134 | | // Sum squared error at each filter level |
135 | 0 | int64_t ss_err[MAX_LOOP_FILTER + 1]; |
136 | |
|
137 | 0 | const int use_coarse_search = cpi->sf.lpf_sf.use_coarse_filter_level_search; |
138 | 0 | assert(use_coarse_search <= 1); |
139 | 0 | static const int min_filter_step_lookup[2] = { 0, 2 }; |
140 | | // min_filter_step_thesh determines the stopping criteria for the search. |
141 | | // The search is terminated when filter_step equals min_filter_step_thesh. |
142 | 0 | const int min_filter_step_thesh = min_filter_step_lookup[use_coarse_search]; |
143 | | |
144 | | // Set each entry to -1 |
145 | 0 | memset(ss_err, 0xFF, sizeof(ss_err)); |
146 | 0 | yv12_copy_plane(&cm->cur_frame->buf, &cpi->last_frame_uf, plane); |
147 | 0 | best_err = try_filter_frame(sd, cpi, filt_mid, partial_frame, plane, dir); |
148 | 0 | filt_best = filt_mid; |
149 | 0 | ss_err[filt_mid] = best_err; |
150 | |
|
151 | 0 | while (filter_step > min_filter_step_thesh) { |
152 | 0 | const int filt_high = AOMMIN(filt_mid + filter_step, max_filter_level); |
153 | 0 | const int filt_low = AOMMAX(filt_mid - filter_step, min_filter_level); |
154 | | |
155 | | // Bias against raising loop filter in favor of lowering it. |
156 | 0 | int64_t bias = (best_err >> (15 - (filt_mid / 8))) * filter_step; |
157 | |
|
158 | 0 | if ((is_stat_consumption_stage_twopass(cpi)) && |
159 | 0 | (cpi->ppi->twopass.section_intra_rating < 20)) |
160 | 0 | bias = (bias * cpi->ppi->twopass.section_intra_rating) / 20; |
161 | | |
162 | | // yx, bias less for large block size |
163 | 0 | if (cm->features.tx_mode != ONLY_4X4) bias >>= 1; |
164 | |
|
165 | 0 | if (filt_direction <= 0 && filt_low != filt_mid) { |
166 | | // Get Low filter error score |
167 | 0 | if (ss_err[filt_low] < 0) { |
168 | 0 | ss_err[filt_low] = |
169 | 0 | try_filter_frame(sd, cpi, filt_low, partial_frame, plane, dir); |
170 | 0 | } |
171 | | // If value is close to the best so far then bias towards a lower loop |
172 | | // filter value. |
173 | 0 | if (ss_err[filt_low] < (best_err + bias)) { |
174 | | // Was it actually better than the previous best? |
175 | 0 | if (ss_err[filt_low] < best_err) { |
176 | 0 | best_err = ss_err[filt_low]; |
177 | 0 | } |
178 | 0 | filt_best = filt_low; |
179 | 0 | } |
180 | 0 | } |
181 | | |
182 | | // Now look at filt_high |
183 | 0 | if (filt_direction >= 0 && filt_high != filt_mid) { |
184 | 0 | if (ss_err[filt_high] < 0) { |
185 | 0 | ss_err[filt_high] = |
186 | 0 | try_filter_frame(sd, cpi, filt_high, partial_frame, plane, dir); |
187 | 0 | } |
188 | | // If value is significantly better than previous best, bias added against |
189 | | // raising filter value |
190 | 0 | if (ss_err[filt_high] < (best_err - bias)) { |
191 | 0 | best_err = ss_err[filt_high]; |
192 | 0 | filt_best = filt_high; |
193 | 0 | } |
194 | 0 | } |
195 | | |
196 | | // Half the step distance if the best filter value was the same as last time |
197 | 0 | if (filt_best == filt_mid) { |
198 | 0 | filter_step /= 2; |
199 | 0 | filt_direction = 0; |
200 | 0 | } else { |
201 | 0 | filt_direction = (filt_best < filt_mid) ? -1 : 1; |
202 | 0 | filt_mid = filt_best; |
203 | 0 | } |
204 | 0 | } |
205 | |
|
206 | 0 | return filt_best; |
207 | 0 | } |
208 | | |
209 | | void av1_pick_filter_level(const YV12_BUFFER_CONFIG *sd, AV1_COMP *cpi, |
210 | 0 | LPF_PICK_METHOD method) { |
211 | 0 | AV1_COMMON *const cm = &cpi->common; |
212 | 0 | const SequenceHeader *const seq_params = cm->seq_params; |
213 | 0 | const int num_planes = av1_num_planes(cm); |
214 | 0 | struct loopfilter *const lf = &cm->lf; |
215 | 0 | int disable_filter_rt_screen = 0; |
216 | 0 | (void)sd; |
217 | | |
218 | | // Enable loop filter sharpness only for allintra encoding mode, |
219 | | // as frames do not have to serve as references to others |
220 | 0 | lf->sharpness_level = |
221 | 0 | cpi->oxcf.mode == ALLINTRA ? cpi->oxcf.algo_cfg.sharpness : 0; |
222 | |
|
223 | 0 | if (cpi->oxcf.tune_cfg.content == AOM_CONTENT_SCREEN && |
224 | 0 | cpi->oxcf.q_cfg.aq_mode == CYCLIC_REFRESH_AQ && |
225 | 0 | cpi->sf.rt_sf.skip_lf_screen) |
226 | 0 | disable_filter_rt_screen = av1_cyclic_refresh_disable_lf_cdef(cpi); |
227 | |
|
228 | 0 | if (disable_filter_rt_screen || |
229 | 0 | cpi->oxcf.algo_cfg.loopfilter_control == LOOPFILTER_NONE || |
230 | 0 | (cpi->oxcf.algo_cfg.loopfilter_control == LOOPFILTER_REFERENCE && |
231 | 0 | cpi->ppi->rtc_ref.non_reference_frame)) { |
232 | 0 | lf->filter_level[0] = 0; |
233 | 0 | lf->filter_level[1] = 0; |
234 | 0 | return; |
235 | 0 | } |
236 | | |
237 | 0 | if (method == LPF_PICK_MINIMAL_LPF) { |
238 | 0 | lf->filter_level[0] = 0; |
239 | 0 | lf->filter_level[1] = 0; |
240 | 0 | } else if (method >= LPF_PICK_FROM_Q) { |
241 | 0 | const int min_filter_level = 0; |
242 | 0 | const int max_filter_level = get_max_filter_level(cpi); |
243 | 0 | const int q = av1_ac_quant_QTX(cm->quant_params.base_qindex, 0, |
244 | 0 | seq_params->bit_depth); |
245 | | // based on tests result for rtc test set |
246 | | // 0.04590 boosted or 0.02295 non-booseted in 18-bit fixed point |
247 | 0 | const int strength_boost_q_treshold = 0; |
248 | 0 | int inter_frame_multiplier = |
249 | 0 | (q > strength_boost_q_treshold || |
250 | 0 | (cpi->sf.rt_sf.use_nonrd_pick_mode && |
251 | 0 | cpi->common.width * cpi->common.height > 352 * 288)) |
252 | 0 | ? 12034 |
253 | 0 | : 6017; |
254 | | // Increase strength on base TL0 for temporal layers, for low-resoln, |
255 | | // based on frame source_sad. |
256 | 0 | if (cpi->svc.number_temporal_layers > 1 && |
257 | 0 | cpi->svc.temporal_layer_id == 0 && |
258 | 0 | cpi->common.width * cpi->common.height <= 352 * 288 && |
259 | 0 | cpi->sf.rt_sf.use_nonrd_pick_mode) { |
260 | 0 | if (cpi->rc.frame_source_sad > 100000) |
261 | 0 | inter_frame_multiplier = inter_frame_multiplier << 1; |
262 | 0 | else if (cpi->rc.frame_source_sad > 50000) |
263 | 0 | inter_frame_multiplier = 3 * (inter_frame_multiplier >> 1); |
264 | 0 | } else if (cpi->sf.rt_sf.use_fast_fixed_part) { |
265 | 0 | inter_frame_multiplier = inter_frame_multiplier << 1; |
266 | 0 | } |
267 | | // These values were determined by linear fitting the result of the |
268 | | // searched level for 8 bit depth: |
269 | | // Keyframes: filt_guess = q * 0.06699 - 1.60817 |
270 | | // Other frames: filt_guess = q * inter_frame_multiplier + 2.48225 |
271 | | // |
272 | | // And high bit depth separately: |
273 | | // filt_guess = q * 0.316206 + 3.87252 |
274 | 0 | int filt_guess; |
275 | 0 | switch (seq_params->bit_depth) { |
276 | 0 | case AOM_BITS_8: |
277 | 0 | filt_guess = |
278 | 0 | (cm->current_frame.frame_type == KEY_FRAME) |
279 | 0 | ? ROUND_POWER_OF_TWO(q * 17563 - 421574, 18) |
280 | 0 | : ROUND_POWER_OF_TWO(q * inter_frame_multiplier + 650707, 18); |
281 | 0 | break; |
282 | 0 | case AOM_BITS_10: |
283 | 0 | filt_guess = ROUND_POWER_OF_TWO(q * 20723 + 4060632, 20); |
284 | 0 | break; |
285 | 0 | case AOM_BITS_12: |
286 | 0 | filt_guess = ROUND_POWER_OF_TWO(q * 20723 + 16242526, 22); |
287 | 0 | break; |
288 | 0 | default: |
289 | 0 | assert(0 && |
290 | 0 | "bit_depth should be AOM_BITS_8, AOM_BITS_10 " |
291 | 0 | "or AOM_BITS_12"); |
292 | 0 | return; |
293 | 0 | } |
294 | 0 | if (seq_params->bit_depth != AOM_BITS_8 && |
295 | 0 | cm->current_frame.frame_type == KEY_FRAME) |
296 | 0 | filt_guess -= 4; |
297 | | // TODO(chengchen): retrain the model for Y, U, V filter levels |
298 | 0 | lf->filter_level[0] = clamp(filt_guess, min_filter_level, max_filter_level); |
299 | 0 | lf->filter_level[1] = clamp(filt_guess, min_filter_level, max_filter_level); |
300 | 0 | lf->filter_level_u = clamp(filt_guess, min_filter_level, max_filter_level); |
301 | 0 | lf->filter_level_v = clamp(filt_guess, min_filter_level, max_filter_level); |
302 | 0 | if (cpi->oxcf.algo_cfg.loopfilter_control == LOOPFILTER_SELECTIVELY && |
303 | 0 | !frame_is_intra_only(cm) && !cpi->rc.high_source_sad) { |
304 | 0 | if (cpi->oxcf.tune_cfg.content == AOM_CONTENT_SCREEN) { |
305 | 0 | lf->filter_level[0] = 0; |
306 | 0 | lf->filter_level[1] = 0; |
307 | 0 | } else { |
308 | 0 | const int num4x4 = (cm->width >> 2) * (cm->height >> 2); |
309 | 0 | const int newmv_thresh = 7; |
310 | 0 | const int distance_since_key_thresh = 5; |
311 | 0 | if ((cpi->td.rd_counts.newmv_or_intra_blocks * 100 / num4x4) < |
312 | 0 | newmv_thresh && |
313 | 0 | cpi->rc.frames_since_key > distance_since_key_thresh) { |
314 | 0 | lf->filter_level[0] = 0; |
315 | 0 | lf->filter_level[1] = 0; |
316 | 0 | } |
317 | 0 | } |
318 | 0 | } |
319 | 0 | } else { |
320 | 0 | int last_frame_filter_level[4] = { 0 }; |
321 | 0 | if (!frame_is_intra_only(cm)) { |
322 | 0 | last_frame_filter_level[0] = cpi->ppi->filter_level[0]; |
323 | 0 | last_frame_filter_level[1] = cpi->ppi->filter_level[1]; |
324 | 0 | last_frame_filter_level[2] = cpi->ppi->filter_level_u; |
325 | 0 | last_frame_filter_level[3] = cpi->ppi->filter_level_v; |
326 | 0 | } |
327 | | // The frame buffer last_frame_uf is used to store the non-loop filtered |
328 | | // reconstructed frame in search_filter_level(). |
329 | 0 | if (aom_realloc_frame_buffer( |
330 | 0 | &cpi->last_frame_uf, cm->width, cm->height, |
331 | 0 | seq_params->subsampling_x, seq_params->subsampling_y, |
332 | 0 | seq_params->use_highbitdepth, cpi->oxcf.border_in_pixels, |
333 | 0 | cm->features.byte_alignment, NULL, NULL, NULL, false, 0)) |
334 | 0 | aom_internal_error(cm->error, AOM_CODEC_MEM_ERROR, |
335 | 0 | "Failed to allocate last frame buffer"); |
336 | |
|
337 | 0 | lf->filter_level[0] = lf->filter_level[1] = |
338 | 0 | search_filter_level(sd, cpi, method == LPF_PICK_FROM_SUBIMAGE, |
339 | 0 | last_frame_filter_level, 0, 2); |
340 | 0 | if (method != LPF_PICK_FROM_FULL_IMAGE_NON_DUAL) { |
341 | 0 | lf->filter_level[0] = |
342 | 0 | search_filter_level(sd, cpi, method == LPF_PICK_FROM_SUBIMAGE, |
343 | 0 | last_frame_filter_level, 0, 0); |
344 | 0 | lf->filter_level[1] = |
345 | 0 | search_filter_level(sd, cpi, method == LPF_PICK_FROM_SUBIMAGE, |
346 | 0 | last_frame_filter_level, 0, 1); |
347 | 0 | } |
348 | |
|
349 | 0 | if (num_planes > 1) { |
350 | 0 | lf->filter_level_u = |
351 | 0 | search_filter_level(sd, cpi, method == LPF_PICK_FROM_SUBIMAGE, |
352 | 0 | last_frame_filter_level, 1, 0); |
353 | 0 | lf->filter_level_v = |
354 | 0 | search_filter_level(sd, cpi, method == LPF_PICK_FROM_SUBIMAGE, |
355 | 0 | last_frame_filter_level, 2, 0); |
356 | 0 | } |
357 | 0 | } |
358 | 0 | } |