Coverage Report

Created: 2022-08-24 06:17

/src/aom/av1/encoder/var_based_part.c
Line
Count
Source (jump to first uncovered line)
1
/*
2
 * Copyright (c) 2019, Alliance for Open Media. All rights reserved
3
 *
4
 * This source code is subject to the terms of the BSD 2 Clause License and
5
 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6
 * was not distributed with this source code in the LICENSE file, you can
7
 * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8
 * Media Patent License 1.0 was not distributed with this source code in the
9
 * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
10
 */
11
12
#include <limits.h>
13
#include <math.h>
14
#include <stdbool.h>
15
#include <stdio.h>
16
17
#include "config/aom_config.h"
18
#include "config/aom_dsp_rtcd.h"
19
#include "config/av1_rtcd.h"
20
21
#include "aom_dsp/aom_dsp_common.h"
22
#include "aom_dsp/binary_codes_writer.h"
23
#include "aom_ports/mem.h"
24
#include "aom_ports/aom_timer.h"
25
26
#include "av1/common/reconinter.h"
27
#include "av1/common/blockd.h"
28
29
#include "av1/encoder/encodeframe.h"
30
#include "av1/encoder/var_based_part.h"
31
#include "av1/encoder/reconinter_enc.h"
32
33
extern const uint8_t AV1_VAR_OFFS[];
34
35
typedef struct {
36
  VPVariance *part_variances;
37
  VPartVar *split[4];
38
} variance_node;
39
40
static AOM_INLINE void tree_to_node(void *data, BLOCK_SIZE bsize,
41
0
                                    variance_node *node) {
42
0
  int i;
43
0
  node->part_variances = NULL;
44
0
  switch (bsize) {
45
0
    case BLOCK_128X128: {
46
0
      VP128x128 *vt = (VP128x128 *)data;
47
0
      node->part_variances = &vt->part_variances;
48
0
      for (i = 0; i < 4; i++)
49
0
        node->split[i] = &vt->split[i].part_variances.none;
50
0
      break;
51
0
    }
52
0
    case BLOCK_64X64: {
53
0
      VP64x64 *vt = (VP64x64 *)data;
54
0
      node->part_variances = &vt->part_variances;
55
0
      for (i = 0; i < 4; i++)
56
0
        node->split[i] = &vt->split[i].part_variances.none;
57
0
      break;
58
0
    }
59
0
    case BLOCK_32X32: {
60
0
      VP32x32 *vt = (VP32x32 *)data;
61
0
      node->part_variances = &vt->part_variances;
62
0
      for (i = 0; i < 4; i++)
63
0
        node->split[i] = &vt->split[i].part_variances.none;
64
0
      break;
65
0
    }
66
0
    case BLOCK_16X16: {
67
0
      VP16x16 *vt = (VP16x16 *)data;
68
0
      node->part_variances = &vt->part_variances;
69
0
      for (i = 0; i < 4; i++)
70
0
        node->split[i] = &vt->split[i].part_variances.none;
71
0
      break;
72
0
    }
73
0
    case BLOCK_8X8: {
74
0
      VP8x8 *vt = (VP8x8 *)data;
75
0
      node->part_variances = &vt->part_variances;
76
0
      for (i = 0; i < 4; i++)
77
0
        node->split[i] = &vt->split[i].part_variances.none;
78
0
      break;
79
0
    }
80
0
    default: {
81
0
      VP4x4 *vt = (VP4x4 *)data;
82
0
      assert(bsize == BLOCK_4X4);
83
0
      node->part_variances = &vt->part_variances;
84
0
      for (i = 0; i < 4; i++) node->split[i] = &vt->split[i];
85
0
      break;
86
0
    }
87
0
  }
88
0
}
89
90
// Set variance values given sum square error, sum error, count.
91
static AOM_INLINE void fill_variance(uint32_t s2, int32_t s, int c,
92
0
                                     VPartVar *v) {
93
0
  v->sum_square_error = s2;
94
0
  v->sum_error = s;
95
0
  v->log2_count = c;
96
0
}
97
98
0
static AOM_INLINE void get_variance(VPartVar *v) {
99
0
  v->variance =
100
0
      (int)(256 * (v->sum_square_error -
101
0
                   (uint32_t)(((int64_t)v->sum_error * v->sum_error) >>
102
0
                              v->log2_count)) >>
103
0
            v->log2_count);
104
0
}
105
106
static AOM_INLINE void sum_2_variances(const VPartVar *a, const VPartVar *b,
107
0
                                       VPartVar *r) {
108
0
  assert(a->log2_count == b->log2_count);
109
0
  fill_variance(a->sum_square_error + b->sum_square_error,
110
0
                a->sum_error + b->sum_error, a->log2_count + 1, r);
111
0
}
112
113
0
static AOM_INLINE void fill_variance_tree(void *data, BLOCK_SIZE bsize) {
114
0
  variance_node node;
115
0
  memset(&node, 0, sizeof(node));
116
0
  tree_to_node(data, bsize, &node);
117
0
  sum_2_variances(node.split[0], node.split[1], &node.part_variances->horz[0]);
118
0
  sum_2_variances(node.split[2], node.split[3], &node.part_variances->horz[1]);
119
0
  sum_2_variances(node.split[0], node.split[2], &node.part_variances->vert[0]);
120
0
  sum_2_variances(node.split[1], node.split[3], &node.part_variances->vert[1]);
121
0
  sum_2_variances(&node.part_variances->vert[0], &node.part_variances->vert[1],
122
0
                  &node.part_variances->none);
123
0
}
124
125
static AOM_INLINE void set_block_size(AV1_COMP *const cpi, MACROBLOCK *const x,
126
                                      MACROBLOCKD *const xd, int mi_row,
127
0
                                      int mi_col, BLOCK_SIZE bsize) {
128
0
  if (cpi->common.mi_params.mi_cols > mi_col &&
129
0
      cpi->common.mi_params.mi_rows > mi_row) {
130
0
    set_mode_info_offsets(&cpi->common.mi_params, &cpi->mbmi_ext_info, x, xd,
131
0
                          mi_row, mi_col);
132
0
    xd->mi[0]->bsize = bsize;
133
0
  }
134
0
}
135
136
static int set_vt_partitioning(AV1_COMP *cpi, MACROBLOCK *const x,
137
                               MACROBLOCKD *const xd,
138
                               const TileInfo *const tile, void *data,
139
                               BLOCK_SIZE bsize, int mi_row, int mi_col,
140
                               int64_t threshold, BLOCK_SIZE bsize_min,
141
0
                               int force_split) {
142
0
  AV1_COMMON *const cm = &cpi->common;
143
0
  variance_node vt;
144
0
  const int block_width = mi_size_wide[bsize];
145
0
  const int block_height = mi_size_high[bsize];
146
0
  int bs_width_check = block_width;
147
0
  int bs_height_check = block_height;
148
0
  int bs_width_vert_check = block_width >> 1;
149
0
  int bs_height_horiz_check = block_height >> 1;
150
  // On the right and bottom boundary we only need to check
151
  // if half the bsize fits, because boundary is extended
152
  // up to 64. So do this check only for sb_size = 64X64.
153
0
  if (cm->seq_params->sb_size == BLOCK_64X64) {
154
0
    if (tile->mi_col_end == cm->mi_params.mi_cols) {
155
0
      bs_width_check = (block_width >> 1) + 1;
156
0
      bs_width_vert_check = (block_width >> 2) + 1;
157
0
    }
158
0
    if (tile->mi_row_end == cm->mi_params.mi_rows) {
159
0
      bs_height_check = (block_height >> 1) + 1;
160
0
      bs_height_horiz_check = (block_height >> 2) + 1;
161
0
    }
162
0
  }
163
164
0
  assert(block_height == block_width);
165
0
  tree_to_node(data, bsize, &vt);
166
167
0
  if (force_split == 1) return 0;
168
169
  // For bsize=bsize_min (16x16/8x8 for 8x8/4x4 downsampling), select if
170
  // variance is below threshold, otherwise split will be selected.
171
  // No check for vert/horiz split as too few samples for variance.
172
0
  if (bsize == bsize_min) {
173
    // Variance already computed to set the force_split.
174
0
    if (frame_is_intra_only(cm)) get_variance(&vt.part_variances->none);
175
0
    if (mi_col + bs_width_check <= tile->mi_col_end &&
176
0
        mi_row + bs_height_check <= tile->mi_row_end &&
177
0
        vt.part_variances->none.variance < threshold) {
178
0
      set_block_size(cpi, x, xd, mi_row, mi_col, bsize);
179
0
      return 1;
180
0
    }
181
0
    return 0;
182
0
  } else if (bsize > bsize_min) {
183
    // Variance already computed to set the force_split.
184
0
    if (frame_is_intra_only(cm)) get_variance(&vt.part_variances->none);
185
    // For key frame: take split for bsize above 32X32 or very high variance.
186
0
    if (frame_is_intra_only(cm) &&
187
0
        (bsize > BLOCK_32X32 ||
188
0
         vt.part_variances->none.variance > (threshold << 4))) {
189
0
      return 0;
190
0
    }
191
    // If variance is low, take the bsize (no split).
192
0
    if (mi_col + bs_width_check <= tile->mi_col_end &&
193
0
        mi_row + bs_height_check <= tile->mi_row_end &&
194
0
        vt.part_variances->none.variance < threshold) {
195
0
      set_block_size(cpi, x, xd, mi_row, mi_col, bsize);
196
0
      return 1;
197
0
    }
198
    // Check vertical split.
199
0
    if (mi_row + bs_height_check <= tile->mi_row_end &&
200
0
        mi_col + bs_width_vert_check <= tile->mi_col_end) {
201
0
      BLOCK_SIZE subsize = get_partition_subsize(bsize, PARTITION_VERT);
202
0
      get_variance(&vt.part_variances->vert[0]);
203
0
      get_variance(&vt.part_variances->vert[1]);
204
0
      if (vt.part_variances->vert[0].variance < threshold &&
205
0
          vt.part_variances->vert[1].variance < threshold &&
206
0
          get_plane_block_size(subsize, xd->plane[1].subsampling_x,
207
0
                               xd->plane[1].subsampling_y) < BLOCK_INVALID) {
208
0
        set_block_size(cpi, x, xd, mi_row, mi_col, subsize);
209
0
        set_block_size(cpi, x, xd, mi_row, mi_col + block_width / 2, subsize);
210
0
        return 1;
211
0
      }
212
0
    }
213
    // Check horizontal split.
214
0
    if (mi_col + bs_width_check <= tile->mi_col_end &&
215
0
        mi_row + bs_height_horiz_check <= tile->mi_row_end) {
216
0
      BLOCK_SIZE subsize = get_partition_subsize(bsize, PARTITION_HORZ);
217
0
      get_variance(&vt.part_variances->horz[0]);
218
0
      get_variance(&vt.part_variances->horz[1]);
219
0
      if (vt.part_variances->horz[0].variance < threshold &&
220
0
          vt.part_variances->horz[1].variance < threshold &&
221
0
          get_plane_block_size(subsize, xd->plane[1].subsampling_x,
222
0
                               xd->plane[1].subsampling_y) < BLOCK_INVALID) {
223
0
        set_block_size(cpi, x, xd, mi_row, mi_col, subsize);
224
0
        set_block_size(cpi, x, xd, mi_row + block_height / 2, mi_col, subsize);
225
0
        return 1;
226
0
      }
227
0
    }
228
0
    return 0;
229
0
  }
230
0
  return 0;
231
0
}
232
233
static AOM_INLINE void fill_variance_8x8avg(const uint8_t *s, int sp,
234
                                            const uint8_t *d, int dp,
235
                                            int x16_idx, int y16_idx,
236
                                            VP16x16 *vst,
237
#if CONFIG_AV1_HIGHBITDEPTH
238
                                            int highbd_flag,
239
#endif
240
                                            int pixels_wide, int pixels_high,
241
0
                                            int is_key_frame) {
242
0
  int k;
243
0
  for (k = 0; k < 4; k++) {
244
0
    int x8_idx = x16_idx + ((k & 1) << 3);
245
0
    int y8_idx = y16_idx + ((k >> 1) << 3);
246
0
    unsigned int sse = 0;
247
0
    int sum = 0;
248
0
    if (x8_idx < pixels_wide && y8_idx < pixels_high) {
249
0
      int s_avg;
250
0
      int d_avg = 128;
251
0
#if CONFIG_AV1_HIGHBITDEPTH
252
0
      if (highbd_flag & YV12_FLAG_HIGHBITDEPTH) {
253
0
        s_avg = aom_highbd_avg_8x8(s + y8_idx * sp + x8_idx, sp);
254
0
        if (!is_key_frame)
255
0
          d_avg = aom_highbd_avg_8x8(d + y8_idx * dp + x8_idx, dp);
256
0
      } else {
257
0
        s_avg = aom_avg_8x8(s + y8_idx * sp + x8_idx, sp);
258
0
        if (!is_key_frame) d_avg = aom_avg_8x8(d + y8_idx * dp + x8_idx, dp);
259
0
      }
260
#else
261
      s_avg = aom_avg_8x8(s + y8_idx * sp + x8_idx, sp);
262
      if (!is_key_frame) d_avg = aom_avg_8x8(d + y8_idx * dp + x8_idx, dp);
263
#endif
264
0
      sum = s_avg - d_avg;
265
0
      sse = sum * sum;
266
0
    }
267
0
    fill_variance(sse, sum, 0, &vst->split[k].part_variances.none);
268
0
  }
269
0
}
270
271
static int compute_minmax_8x8(const uint8_t *s, int sp, const uint8_t *d,
272
                              int dp, int x16_idx, int y16_idx,
273
#if CONFIG_AV1_HIGHBITDEPTH
274
                              int highbd_flag,
275
#endif
276
0
                              int pixels_wide, int pixels_high) {
277
0
  int k;
278
0
  int minmax_max = 0;
279
0
  int minmax_min = 255;
280
  // Loop over the 4 8x8 subblocks.
281
0
  for (k = 0; k < 4; k++) {
282
0
    int x8_idx = x16_idx + ((k & 1) << 3);
283
0
    int y8_idx = y16_idx + ((k >> 1) << 3);
284
0
    int min = 0;
285
0
    int max = 0;
286
0
    if (x8_idx < pixels_wide && y8_idx < pixels_high) {
287
0
#if CONFIG_AV1_HIGHBITDEPTH
288
0
      if (highbd_flag & YV12_FLAG_HIGHBITDEPTH) {
289
0
        aom_highbd_minmax_8x8(s + y8_idx * sp + x8_idx, sp,
290
0
                              d + y8_idx * dp + x8_idx, dp, &min, &max);
291
0
      } else {
292
0
        aom_minmax_8x8(s + y8_idx * sp + x8_idx, sp, d + y8_idx * dp + x8_idx,
293
0
                       dp, &min, &max);
294
0
      }
295
#else
296
      aom_minmax_8x8(s + y8_idx * sp + x8_idx, sp, d + y8_idx * dp + x8_idx, dp,
297
                     &min, &max);
298
#endif
299
0
      if ((max - min) > minmax_max) minmax_max = (max - min);
300
0
      if ((max - min) < minmax_min) minmax_min = (max - min);
301
0
    }
302
0
  }
303
0
  return (minmax_max - minmax_min);
304
0
}
305
306
static AOM_INLINE void fill_variance_4x4avg(const uint8_t *s, int sp,
307
                                            const uint8_t *d, int dp,
308
                                            int x8_idx, int y8_idx, VP8x8 *vst,
309
#if CONFIG_AV1_HIGHBITDEPTH
310
                                            int highbd_flag,
311
#endif
312
                                            int pixels_wide, int pixels_high,
313
0
                                            int is_key_frame) {
314
0
  int k;
315
0
  for (k = 0; k < 4; k++) {
316
0
    int x4_idx = x8_idx + ((k & 1) << 2);
317
0
    int y4_idx = y8_idx + ((k >> 1) << 2);
318
0
    unsigned int sse = 0;
319
0
    int sum = 0;
320
0
    if (x4_idx < pixels_wide && y4_idx < pixels_high) {
321
0
      int s_avg;
322
0
      int d_avg = 128;
323
0
#if CONFIG_AV1_HIGHBITDEPTH
324
0
      if (highbd_flag & YV12_FLAG_HIGHBITDEPTH) {
325
0
        s_avg = aom_highbd_avg_4x4(s + y4_idx * sp + x4_idx, sp);
326
0
        if (!is_key_frame)
327
0
          d_avg = aom_highbd_avg_4x4(d + y4_idx * dp + x4_idx, dp);
328
0
      } else {
329
0
        s_avg = aom_avg_4x4(s + y4_idx * sp + x4_idx, sp);
330
0
        if (!is_key_frame) d_avg = aom_avg_4x4(d + y4_idx * dp + x4_idx, dp);
331
0
      }
332
#else
333
      s_avg = aom_avg_4x4(s + y4_idx * sp + x4_idx, sp);
334
      if (!is_key_frame) d_avg = aom_avg_4x4(d + y4_idx * dp + x4_idx, dp);
335
#endif
336
337
0
      sum = s_avg - d_avg;
338
0
      sse = sum * sum;
339
0
    }
340
0
    fill_variance(sse, sum, 0, &vst->split[k].part_variances.none);
341
0
  }
342
0
}
343
344
// TODO(kyslov) Bring back threshold adjustment based on content state
345
static int64_t scale_part_thresh_content(int64_t threshold_base, int speed,
346
                                         int width, int height,
347
0
                                         int non_reference_frame) {
348
0
  (void)width;
349
0
  (void)height;
350
0
  int64_t threshold = threshold_base;
351
0
  if (non_reference_frame) threshold = (3 * threshold) >> 1;
352
0
  if (speed >= 8) {
353
0
    return (5 * threshold) >> 2;
354
0
  }
355
0
  return threshold;
356
0
}
357
358
static AOM_INLINE void set_vbp_thresholds(AV1_COMP *cpi, int64_t thresholds[],
359
                                          int q, int content_lowsumdiff,
360
0
                                          int source_sad, int segment_id) {
361
0
  AV1_COMMON *const cm = &cpi->common;
362
0
  const int is_key_frame = frame_is_intra_only(cm);
363
0
  const int threshold_multiplier = is_key_frame ? 120 : 1;
364
0
  const int ac_q = av1_ac_quant_QTX(q, 0, cm->seq_params->bit_depth);
365
0
  int64_t threshold_base = (int64_t)(threshold_multiplier * ac_q);
366
0
  const int current_qindex = cm->quant_params.base_qindex;
367
0
  const int threshold_left_shift = cpi->sf.rt_sf.var_part_split_threshold_shift;
368
369
0
  if (is_key_frame) {
370
0
    if (cpi->sf.rt_sf.force_large_partition_blocks_intra) {
371
0
      const int shift_steps =
372
0
          threshold_left_shift - (cpi->oxcf.mode == ALLINTRA ? 7 : 8);
373
0
      assert(shift_steps >= 0);
374
0
      threshold_base <<= shift_steps;
375
0
    }
376
0
    thresholds[0] = threshold_base;
377
0
    thresholds[1] = threshold_base;
378
0
    if (cm->width * cm->height < 1280 * 720) {
379
0
      thresholds[2] = threshold_base / 3;
380
0
      thresholds[3] = threshold_base >> 1;
381
0
    } else {
382
0
      int shift_val = 2;
383
0
      if (cpi->sf.rt_sf.force_large_partition_blocks_intra) {
384
0
        shift_val = 0;
385
0
      }
386
387
0
      thresholds[2] = threshold_base >> shift_val;
388
0
      thresholds[3] = threshold_base >> shift_val;
389
0
    }
390
0
    thresholds[4] = threshold_base << 2;
391
0
    return;
392
0
  }
393
394
  // Increase partition thresholds for noisy content. Apply it only for
395
  // superblocks where sumdiff is low, as we assume the sumdiff of superblock
396
  // whose only change is due to noise will be low (i.e, noise will average
397
  // out over large block).
398
0
  if (cpi->noise_estimate.enabled && content_lowsumdiff &&
399
0
      (cm->width * cm->height > 640 * 480) &&
400
0
      cm->current_frame.frame_number > 60) {
401
0
    NOISE_LEVEL noise_level =
402
0
        av1_noise_estimate_extract_level(&cpi->noise_estimate);
403
0
    if (noise_level == kHigh)
404
0
      threshold_base = (5 * threshold_base) >> 1;
405
0
    else if (noise_level == kMedium &&
406
0
             !cpi->sf.rt_sf.force_large_partition_blocks)
407
0
      threshold_base = (5 * threshold_base) >> 2;
408
0
  }
409
  // TODO(kyslov) Enable var based partition adjusment on temporal denoising
410
#if 0  // CONFIG_AV1_TEMPORAL_DENOISING
411
  if (cpi->oxcf.noise_sensitivity > 0 && denoise_svc(cpi) &&
412
      cpi->oxcf.speed > 5 && cpi->denoiser.denoising_level >= kDenLow)
413
      threshold_base =
414
          av1_scale_part_thresh(threshold_base, cpi->denoiser.denoising_level,
415
                                content_state, cpi->svc.temporal_layer_id);
416
  else
417
    threshold_base =
418
        scale_part_thresh_content(threshold_base, cpi->oxcf.speed, cm->width,
419
                                  cm->height, cpi->svc.non_reference_frame);
420
#else
421
  // Increase base variance threshold based on content_state/sum_diff level.
422
0
  threshold_base =
423
0
      scale_part_thresh_content(threshold_base, cpi->oxcf.speed, cm->width,
424
0
                                cm->height, cpi->svc.non_reference_frame);
425
0
#endif
426
0
  thresholds[0] = threshold_base >> 1;
427
0
  thresholds[1] = threshold_base;
428
0
  thresholds[3] = threshold_base << threshold_left_shift;
429
0
  if (cm->width >= 1280 && cm->height >= 720)
430
0
    thresholds[3] = thresholds[3] << 1;
431
0
  if (cm->width * cm->height <= 352 * 288) {
432
0
    if (current_qindex >= QINDEX_HIGH_THR) {
433
0
      threshold_base = (5 * threshold_base) >> 1;
434
0
      thresholds[1] = threshold_base >> 3;
435
0
      thresholds[2] = threshold_base << 2;
436
0
      thresholds[3] = threshold_base << 5;
437
0
    } else if (current_qindex < QINDEX_LOW_THR) {
438
0
      thresholds[1] = threshold_base >> 3;
439
0
      thresholds[2] = threshold_base >> 1;
440
0
      thresholds[3] = threshold_base << 3;
441
0
    } else {
442
0
      int64_t qi_diff_low = current_qindex - QINDEX_LOW_THR;
443
0
      int64_t qi_diff_high = QINDEX_HIGH_THR - current_qindex;
444
0
      int64_t threshold_diff = QINDEX_HIGH_THR - QINDEX_LOW_THR;
445
0
      int64_t threshold_base_high = (5 * threshold_base) >> 1;
446
447
0
      threshold_diff = threshold_diff > 0 ? threshold_diff : 1;
448
0
      threshold_base =
449
0
          (qi_diff_low * threshold_base_high + qi_diff_high * threshold_base) /
450
0
          threshold_diff;
451
0
      thresholds[1] = threshold_base >> 3;
452
0
      thresholds[2] = ((qi_diff_low * threshold_base) +
453
0
                       qi_diff_high * (threshold_base >> 1)) /
454
0
                      threshold_diff;
455
0
      thresholds[3] = ((qi_diff_low * (threshold_base << 5)) +
456
0
                       qi_diff_high * (threshold_base << 3)) /
457
0
                      threshold_diff;
458
0
    }
459
0
  } else if (cm->width < 1280 && cm->height < 720) {
460
0
    thresholds[2] = (5 * threshold_base) >> 2;
461
0
  } else if (cm->width < 1920 && cm->height < 1080) {
462
0
    thresholds[2] = threshold_base << 1;
463
0
  } else {
464
0
    thresholds[2] = (5 * threshold_base) >> 1;
465
0
  }
466
0
  if (cpi->sf.rt_sf.force_large_partition_blocks) {
467
0
    double weight;
468
0
    const int win = 20;
469
0
    if (current_qindex < QINDEX_LARGE_BLOCK_THR - win)
470
0
      weight = 1.0;
471
0
    else if (current_qindex > QINDEX_LARGE_BLOCK_THR + win)
472
0
      weight = 0.0;
473
0
    else
474
0
      weight =
475
0
          1.0 - (current_qindex - QINDEX_LARGE_BLOCK_THR + win) / (2 * win);
476
0
    if (cm->width * cm->height > 640 * 480) {
477
0
      for (int i = 0; i < 4; i++) {
478
0
        thresholds[i] <<= 1;
479
0
      }
480
0
    }
481
0
    if (cm->width * cm->height <= 352 * 288) {
482
0
      thresholds[3] = INT32_MAX;
483
0
      if (segment_id == 0) {
484
0
        thresholds[1] <<= 2;
485
0
        thresholds[2] <<= (source_sad == kLowSad) ? 5 : 4;
486
0
      } else {
487
0
        thresholds[1] <<= 1;
488
0
        thresholds[2] <<= 3;
489
0
      }
490
      // Condition the increase of partition thresholds on the segment
491
      // and the content. Avoid the increase for superblocks which have
492
      // high source sad, unless the whole frame has very high motion
493
      // (i.e, cpi->rc.avg_source_sad is very large, in which case all blocks
494
      // have high source sad).
495
0
    } else if (cm->width * cm->height > 640 * 480 && segment_id == 0 &&
496
0
               (source_sad != kHighSad || cpi->rc.avg_source_sad > 50000)) {
497
0
      thresholds[0] = (3 * thresholds[0]) >> 1;
498
0
      thresholds[3] = INT32_MAX;
499
0
      if (current_qindex > QINDEX_LARGE_BLOCK_THR) {
500
0
        thresholds[1] =
501
0
            (int)((1 - weight) * (thresholds[1] << 1) + weight * thresholds[1]);
502
0
        thresholds[2] =
503
0
            (int)((1 - weight) * (thresholds[2] << 1) + weight * thresholds[2]);
504
0
      }
505
0
    } else if (current_qindex > QINDEX_LARGE_BLOCK_THR && segment_id == 0 &&
506
0
               (source_sad != kHighSad || cpi->rc.avg_source_sad > 50000)) {
507
0
      thresholds[1] =
508
0
          (int)((1 - weight) * (thresholds[1] << 2) + weight * thresholds[1]);
509
0
      thresholds[2] =
510
0
          (int)((1 - weight) * (thresholds[2] << 4) + weight * thresholds[2]);
511
0
      thresholds[3] = INT32_MAX;
512
0
    }
513
0
  }
514
0
}
515
516
// Set temporal variance low flag for superblock 64x64.
517
// Only first 25 in the array are used in this case.
518
static AOM_INLINE void set_low_temp_var_flag_64x64(
519
    CommonModeInfoParams *mi_params, PartitionSearchInfo *part_info,
520
    MACROBLOCKD *xd, VP64x64 *vt, const int64_t thresholds[], int mi_col,
521
0
    int mi_row) {
522
0
  if (xd->mi[0]->bsize == BLOCK_64X64) {
523
0
    if ((vt->part_variances).none.variance < (thresholds[0] >> 1))
524
0
      part_info->variance_low[0] = 1;
525
0
  } else if (xd->mi[0]->bsize == BLOCK_64X32) {
526
0
    for (int i = 0; i < 2; i++) {
527
0
      if (vt->part_variances.horz[i].variance < (thresholds[0] >> 2))
528
0
        part_info->variance_low[i + 1] = 1;
529
0
    }
530
0
  } else if (xd->mi[0]->bsize == BLOCK_32X64) {
531
0
    for (int i = 0; i < 2; i++) {
532
0
      if (vt->part_variances.vert[i].variance < (thresholds[0] >> 2))
533
0
        part_info->variance_low[i + 3] = 1;
534
0
    }
535
0
  } else {
536
0
    static const int idx[4][2] = { { 0, 0 }, { 0, 8 }, { 8, 0 }, { 8, 8 } };
537
0
    for (int i = 0; i < 4; i++) {
538
0
      const int idx_str =
539
0
          mi_params->mi_stride * (mi_row + idx[i][0]) + mi_col + idx[i][1];
540
0
      MB_MODE_INFO **this_mi = mi_params->mi_grid_base + idx_str;
541
542
0
      if (mi_params->mi_cols <= mi_col + idx[i][1] ||
543
0
          mi_params->mi_rows <= mi_row + idx[i][0])
544
0
        continue;
545
546
0
      if (*this_mi == NULL) continue;
547
548
0
      if ((*this_mi)->bsize == BLOCK_32X32) {
549
0
        int64_t threshold_32x32 = (5 * thresholds[1]) >> 3;
550
0
        if (vt->split[i].part_variances.none.variance < threshold_32x32)
551
0
          part_info->variance_low[i + 5] = 1;
552
0
      } else {
553
        // For 32x16 and 16x32 blocks, the flag is set on each 16x16 block
554
        // inside.
555
0
        if ((*this_mi)->bsize == BLOCK_16X16 ||
556
0
            (*this_mi)->bsize == BLOCK_32X16 ||
557
0
            (*this_mi)->bsize == BLOCK_16X32) {
558
0
          for (int j = 0; j < 4; j++) {
559
0
            if (vt->split[i].split[j].part_variances.none.variance <
560
0
                (thresholds[2] >> 8))
561
0
              part_info->variance_low[(i << 2) + j + 9] = 1;
562
0
          }
563
0
        }
564
0
      }
565
0
    }
566
0
  }
567
0
}
568
569
static AOM_INLINE void set_low_temp_var_flag_128x128(
570
    CommonModeInfoParams *mi_params, PartitionSearchInfo *part_info,
571
    MACROBLOCKD *xd, VP128x128 *vt, const int64_t thresholds[], int mi_col,
572
0
    int mi_row) {
573
0
  if (xd->mi[0]->bsize == BLOCK_128X128) {
574
0
    if (vt->part_variances.none.variance < (thresholds[0] >> 1))
575
0
      part_info->variance_low[0] = 1;
576
0
  } else if (xd->mi[0]->bsize == BLOCK_128X64) {
577
0
    for (int i = 0; i < 2; i++) {
578
0
      if (vt->part_variances.horz[i].variance < (thresholds[0] >> 2))
579
0
        part_info->variance_low[i + 1] = 1;
580
0
    }
581
0
  } else if (xd->mi[0]->bsize == BLOCK_64X128) {
582
0
    for (int i = 0; i < 2; i++) {
583
0
      if (vt->part_variances.vert[i].variance < (thresholds[0] >> 2))
584
0
        part_info->variance_low[i + 3] = 1;
585
0
    }
586
0
  } else {
587
0
    static const int idx64[4][2] = {
588
0
      { 0, 0 }, { 0, 16 }, { 16, 0 }, { 16, 16 }
589
0
    };
590
0
    static const int idx32[4][2] = { { 0, 0 }, { 0, 8 }, { 8, 0 }, { 8, 8 } };
591
0
    for (int i = 0; i < 4; i++) {
592
0
      const int idx_str =
593
0
          mi_params->mi_stride * (mi_row + idx64[i][0]) + mi_col + idx64[i][1];
594
0
      MB_MODE_INFO **mi_64 = mi_params->mi_grid_base + idx_str;
595
0
      if (*mi_64 == NULL) continue;
596
0
      if (mi_params->mi_cols <= mi_col + idx64[i][1] ||
597
0
          mi_params->mi_rows <= mi_row + idx64[i][0])
598
0
        continue;
599
0
      const int64_t threshold_64x64 = (5 * thresholds[1]) >> 3;
600
0
      if ((*mi_64)->bsize == BLOCK_64X64) {
601
0
        if (vt->split[i].part_variances.none.variance < threshold_64x64)
602
0
          part_info->variance_low[5 + i] = 1;
603
0
      } else if ((*mi_64)->bsize == BLOCK_64X32) {
604
0
        for (int j = 0; j < 2; j++)
605
0
          if (vt->split[i].part_variances.horz[j].variance <
606
0
              (threshold_64x64 >> 1))
607
0
            part_info->variance_low[9 + (i << 1) + j] = 1;
608
0
      } else if ((*mi_64)->bsize == BLOCK_32X64) {
609
0
        for (int j = 0; j < 2; j++)
610
0
          if (vt->split[i].part_variances.vert[j].variance <
611
0
              (threshold_64x64 >> 1))
612
0
            part_info->variance_low[17 + (i << 1) + j] = 1;
613
0
      } else {
614
0
        for (int k = 0; k < 4; k++) {
615
0
          const int idx_str1 = mi_params->mi_stride * idx32[k][0] + idx32[k][1];
616
0
          MB_MODE_INFO **mi_32 = mi_params->mi_grid_base + idx_str + idx_str1;
617
0
          if (*mi_32 == NULL) continue;
618
619
0
          if (mi_params->mi_cols <= mi_col + idx64[i][1] + idx32[k][1] ||
620
0
              mi_params->mi_rows <= mi_row + idx64[i][0] + idx32[k][0])
621
0
            continue;
622
0
          const int64_t threshold_32x32 = (5 * thresholds[2]) >> 3;
623
0
          if ((*mi_32)->bsize == BLOCK_32X32) {
624
0
            if (vt->split[i].split[k].part_variances.none.variance <
625
0
                threshold_32x32)
626
0
              part_info->variance_low[25 + (i << 2) + k] = 1;
627
0
          } else {
628
            // For 32x16 and 16x32 blocks, the flag is set on each 16x16 block
629
            // inside.
630
0
            if ((*mi_32)->bsize == BLOCK_16X16 ||
631
0
                (*mi_32)->bsize == BLOCK_32X16 ||
632
0
                (*mi_32)->bsize == BLOCK_16X32) {
633
0
              for (int j = 0; j < 4; j++) {
634
0
                if (vt->split[i]
635
0
                        .split[k]
636
0
                        .split[j]
637
0
                        .part_variances.none.variance < (thresholds[3] >> 8))
638
0
                  part_info->variance_low[41 + (i << 4) + (k << 2) + j] = 1;
639
0
              }
640
0
            }
641
0
          }
642
0
        }
643
0
      }
644
0
    }
645
0
  }
646
0
}
647
648
static AOM_INLINE void set_low_temp_var_flag(
649
    AV1_COMP *cpi, PartitionSearchInfo *part_info, MACROBLOCKD *xd,
650
    VP128x128 *vt, int64_t thresholds[], MV_REFERENCE_FRAME ref_frame_partition,
651
0
    int mi_col, int mi_row) {
652
0
  AV1_COMMON *const cm = &cpi->common;
653
  // Check temporal variance for bsize >= 16x16, if LAST_FRAME was selected.
654
  // If the temporal variance is small set the flag
655
  // variance_low for the block. The variance threshold can be adjusted, the
656
  // higher the more aggressive.
657
0
  if (ref_frame_partition == LAST_FRAME) {
658
0
    const int is_small_sb = (cm->seq_params->sb_size == BLOCK_64X64);
659
0
    if (is_small_sb)
660
0
      set_low_temp_var_flag_64x64(&cm->mi_params, part_info, xd,
661
0
                                  &(vt->split[0]), thresholds, mi_col, mi_row);
662
0
    else
663
0
      set_low_temp_var_flag_128x128(&cm->mi_params, part_info, xd, vt,
664
0
                                    thresholds, mi_col, mi_row);
665
0
  }
666
0
}
667
668
static const int pos_shift_16x16[4][4] = {
669
  { 9, 10, 13, 14 }, { 11, 12, 15, 16 }, { 17, 18, 21, 22 }, { 19, 20, 23, 24 }
670
};
671
672
int av1_get_force_skip_low_temp_var_small_sb(const uint8_t *variance_low,
673
                                             int mi_row, int mi_col,
674
0
                                             BLOCK_SIZE bsize) {
675
  // Relative indices of MB inside the superblock.
676
0
  const int mi_x = mi_row & 0xF;
677
0
  const int mi_y = mi_col & 0xF;
678
  // Relative indices of 16x16 block inside the superblock.
679
0
  const int i = mi_x >> 2;
680
0
  const int j = mi_y >> 2;
681
0
  int force_skip_low_temp_var = 0;
682
  // Set force_skip_low_temp_var based on the block size and block offset.
683
0
  switch (bsize) {
684
0
    case BLOCK_64X64: force_skip_low_temp_var = variance_low[0]; break;
685
0
    case BLOCK_64X32:
686
0
      if (!mi_y && !mi_x) {
687
0
        force_skip_low_temp_var = variance_low[1];
688
0
      } else if (!mi_y && mi_x) {
689
0
        force_skip_low_temp_var = variance_low[2];
690
0
      }
691
0
      break;
692
0
    case BLOCK_32X64:
693
0
      if (!mi_y && !mi_x) {
694
0
        force_skip_low_temp_var = variance_low[3];
695
0
      } else if (mi_y && !mi_x) {
696
0
        force_skip_low_temp_var = variance_low[4];
697
0
      }
698
0
      break;
699
0
    case BLOCK_32X32:
700
0
      if (!mi_y && !mi_x) {
701
0
        force_skip_low_temp_var = variance_low[5];
702
0
      } else if (mi_y && !mi_x) {
703
0
        force_skip_low_temp_var = variance_low[6];
704
0
      } else if (!mi_y && mi_x) {
705
0
        force_skip_low_temp_var = variance_low[7];
706
0
      } else if (mi_y && mi_x) {
707
0
        force_skip_low_temp_var = variance_low[8];
708
0
      }
709
0
      break;
710
0
    case BLOCK_32X16:
711
0
    case BLOCK_16X32:
712
0
    case BLOCK_16X16:
713
0
      force_skip_low_temp_var = variance_low[pos_shift_16x16[i][j]];
714
0
      break;
715
0
    default: break;
716
0
  }
717
718
0
  return force_skip_low_temp_var;
719
0
}
720
721
int av1_get_force_skip_low_temp_var(const uint8_t *variance_low, int mi_row,
722
0
                                    int mi_col, BLOCK_SIZE bsize) {
723
0
  int force_skip_low_temp_var = 0;
724
0
  int x, y;
725
0
  x = (mi_col & 0x1F) >> 4;
726
  // y = (mi_row & 0x1F) >> 4;
727
  // const int idx64 = (y << 1) + x;
728
0
  y = (mi_row & 0x17) >> 3;
729
0
  const int idx64 = y + x;
730
731
0
  x = (mi_col & 0xF) >> 3;
732
  // y = (mi_row & 0xF) >> 3;
733
  // const int idx32 = (y << 1) + x;
734
0
  y = (mi_row & 0xB) >> 2;
735
0
  const int idx32 = y + x;
736
737
0
  x = (mi_col & 0x7) >> 2;
738
  // y = (mi_row & 0x7) >> 2;
739
  // const int idx16 = (y << 1) + x;
740
0
  y = (mi_row & 0x5) >> 1;
741
0
  const int idx16 = y + x;
742
  // Set force_skip_low_temp_var based on the block size and block offset.
743
0
  switch (bsize) {
744
0
    case BLOCK_128X128: force_skip_low_temp_var = variance_low[0]; break;
745
0
    case BLOCK_128X64:
746
0
      assert((mi_col & 0x1F) == 0);
747
0
      force_skip_low_temp_var = variance_low[1 + ((mi_row & 0x1F) != 0)];
748
0
      break;
749
0
    case BLOCK_64X128:
750
0
      assert((mi_row & 0x1F) == 0);
751
0
      force_skip_low_temp_var = variance_low[3 + ((mi_col & 0x1F) != 0)];
752
0
      break;
753
0
    case BLOCK_64X64:
754
      // Location of this 64x64 block inside the 128x128 superblock
755
0
      force_skip_low_temp_var = variance_low[5 + idx64];
756
0
      break;
757
0
    case BLOCK_64X32:
758
0
      x = (mi_col & 0x1F) >> 4;
759
0
      y = (mi_row & 0x1F) >> 3;
760
      /*
761
      .---------------.---------------.
762
      | x=0,y=0,idx=0 | x=0,y=0,idx=2 |
763
      :---------------+---------------:
764
      | x=0,y=1,idx=1 | x=1,y=1,idx=3 |
765
      :---------------+---------------:
766
      | x=0,y=2,idx=4 | x=1,y=2,idx=6 |
767
      :---------------+---------------:
768
      | x=0,y=3,idx=5 | x=1,y=3,idx=7 |
769
      '---------------'---------------'
770
      */
771
0
      const int idx64x32 = (x << 1) + (y % 2) + ((y >> 1) << 2);
772
0
      force_skip_low_temp_var = variance_low[9 + idx64x32];
773
0
      break;
774
0
    case BLOCK_32X64:
775
0
      x = (mi_col & 0x1F) >> 3;
776
0
      y = (mi_row & 0x1F) >> 4;
777
0
      const int idx32x64 = (y << 2) + x;
778
0
      force_skip_low_temp_var = variance_low[17 + idx32x64];
779
0
      break;
780
0
    case BLOCK_32X32:
781
0
      force_skip_low_temp_var = variance_low[25 + (idx64 << 2) + idx32];
782
0
      break;
783
0
    case BLOCK_32X16:
784
0
    case BLOCK_16X32:
785
0
    case BLOCK_16X16:
786
0
      force_skip_low_temp_var =
787
0
          variance_low[41 + (idx64 << 4) + (idx32 << 2) + idx16];
788
0
      break;
789
0
    default: break;
790
0
  }
791
0
  return force_skip_low_temp_var;
792
0
}
793
794
void av1_set_variance_partition_thresholds(AV1_COMP *cpi, int q,
795
1.26k
                                           int content_lowsumdiff) {
796
1.26k
  SPEED_FEATURES *const sf = &cpi->sf;
797
1.26k
  if (sf->part_sf.partition_search_type != VAR_BASED_PARTITION) {
798
1.26k
    return;
799
1.26k
  } else {
800
0
    set_vbp_thresholds(cpi, cpi->vbp_info.thresholds, q, content_lowsumdiff, 0,
801
0
                       0);
802
    // The threshold below is not changed locally.
803
0
    cpi->vbp_info.threshold_minmax = 15 + (q >> 3);
804
0
  }
805
1.26k
}
806
807
static AOM_INLINE void chroma_check(AV1_COMP *cpi, MACROBLOCK *x,
808
                                    BLOCK_SIZE bsize, unsigned int y_sad,
809
0
                                    int is_key_frame) {
810
0
  int i;
811
0
  MACROBLOCKD *xd = &x->e_mbd;
812
813
0
  if (is_key_frame || cpi->oxcf.tool_cfg.enable_monochrome) return;
814
815
0
  for (i = 1; i <= 2; ++i) {
816
0
    unsigned int uv_sad = UINT_MAX;
817
0
    struct macroblock_plane *p = &x->plane[i];
818
0
    struct macroblockd_plane *pd = &xd->plane[i];
819
0
    const BLOCK_SIZE bs =
820
0
        get_plane_block_size(bsize, pd->subsampling_x, pd->subsampling_y);
821
822
0
    if (bs != BLOCK_INVALID)
823
0
      uv_sad = cpi->ppi->fn_ptr[bs].sdf(p->src.buf, p->src.stride, pd->dst.buf,
824
0
                                        pd->dst.stride);
825
826
0
    if (uv_sad > (y_sad >> 1))
827
0
      x->color_sensitivity_sb[i - 1] = 1;
828
0
    else if (uv_sad < (y_sad >> 3))
829
0
      x->color_sensitivity_sb[i - 1] = 0;
830
    // Borderline case: to be refined at coding block level in nonrd_pickmode,
831
    // for coding block size < sb_size.
832
0
    else
833
0
      x->color_sensitivity_sb[i - 1] = 2;
834
0
  }
835
0
}
836
837
static void fill_variance_tree_leaves(
838
    AV1_COMP *cpi, MACROBLOCK *x, VP128x128 *vt, VP16x16 *vt2,
839
    unsigned char *force_split, int avg_16x16[][4], int maxvar_16x16[][4],
840
    int minvar_16x16[][4], int *variance4x4downsample, int64_t *thresholds,
841
0
    uint8_t *src, int src_stride, const uint8_t *dst, int dst_stride) {
842
0
  AV1_COMMON *cm = &cpi->common;
843
0
  MACROBLOCKD *xd = &x->e_mbd;
844
0
  const int is_key_frame = frame_is_intra_only(cm);
845
0
  const int is_small_sb = (cm->seq_params->sb_size == BLOCK_64X64);
846
0
  const int num_64x64_blocks = is_small_sb ? 1 : 4;
847
  // TODO(kyslov) Bring back compute_minmax_variance with content type detection
848
0
  const int compute_minmax_variance = 0;
849
0
  const int segment_id = xd->mi[0]->segment_id;
850
0
  int pixels_wide = 128, pixels_high = 128;
851
852
0
  if (is_small_sb) {
853
0
    pixels_wide = 64;
854
0
    pixels_high = 64;
855
0
  }
856
0
  if (xd->mb_to_right_edge < 0) pixels_wide += (xd->mb_to_right_edge >> 3);
857
0
  if (xd->mb_to_bottom_edge < 0) pixels_high += (xd->mb_to_bottom_edge >> 3);
858
0
  for (int m = 0; m < num_64x64_blocks; m++) {
859
0
    const int x64_idx = ((m & 1) << 6);
860
0
    const int y64_idx = ((m >> 1) << 6);
861
0
    const int m2 = m << 2;
862
0
    force_split[m + 1] = 0;
863
864
0
    for (int i = 0; i < 4; i++) {
865
0
      const int x32_idx = x64_idx + ((i & 1) << 5);
866
0
      const int y32_idx = y64_idx + ((i >> 1) << 5);
867
0
      const int i2 = (m2 + i) << 2;
868
0
      force_split[5 + m2 + i] = 0;
869
0
      avg_16x16[m][i] = 0;
870
0
      maxvar_16x16[m][i] = 0;
871
0
      minvar_16x16[m][i] = INT_MAX;
872
0
      for (int j = 0; j < 4; j++) {
873
0
        const int x16_idx = x32_idx + ((j & 1) << 4);
874
0
        const int y16_idx = y32_idx + ((j >> 1) << 4);
875
0
        const int split_index = 21 + i2 + j;
876
0
        VP16x16 *vst = &vt->split[m].split[i].split[j];
877
0
        force_split[split_index] = 0;
878
0
        variance4x4downsample[i2 + j] = 0;
879
0
        if (!is_key_frame) {
880
0
          fill_variance_8x8avg(src, src_stride, dst, dst_stride, x16_idx,
881
0
                               y16_idx, vst,
882
0
#if CONFIG_AV1_HIGHBITDEPTH
883
0
                               xd->cur_buf->flags,
884
0
#endif
885
0
                               pixels_wide, pixels_high, is_key_frame);
886
0
          fill_variance_tree(&vt->split[m].split[i].split[j], BLOCK_16X16);
887
0
          get_variance(&vt->split[m].split[i].split[j].part_variances.none);
888
0
          avg_16x16[m][i] +=
889
0
              vt->split[m].split[i].split[j].part_variances.none.variance;
890
0
          if (vt->split[m].split[i].split[j].part_variances.none.variance <
891
0
              minvar_16x16[m][i])
892
0
            minvar_16x16[m][i] =
893
0
                vt->split[m].split[i].split[j].part_variances.none.variance;
894
0
          if (vt->split[m].split[i].split[j].part_variances.none.variance >
895
0
              maxvar_16x16[m][i])
896
0
            maxvar_16x16[m][i] =
897
0
                vt->split[m].split[i].split[j].part_variances.none.variance;
898
0
          if (vt->split[m].split[i].split[j].part_variances.none.variance >
899
0
              thresholds[3]) {
900
            // 16X16 variance is above threshold for split, so force split to
901
            // 8x8 for this 16x16 block (this also forces splits for upper
902
            // levels).
903
0
            force_split[split_index] = 1;
904
0
            force_split[5 + m2 + i] = 1;
905
0
            force_split[m + 1] = 1;
906
0
            force_split[0] = 1;
907
0
          } else if (!cyclic_refresh_segment_id_boosted(segment_id) &&
908
0
                     compute_minmax_variance &&
909
0
                     vt->split[m]
910
0
                             .split[i]
911
0
                             .split[j]
912
0
                             .part_variances.none.variance > thresholds[2]) {
913
            // We have some nominal amount of 16x16 variance (based on average),
914
            // compute the minmax over the 8x8 sub-blocks, and if above
915
            // threshold, force split to 8x8 block for this 16x16 block.
916
0
            int minmax = compute_minmax_8x8(src, src_stride, dst, dst_stride,
917
0
                                            x16_idx, y16_idx,
918
0
#if CONFIG_AV1_HIGHBITDEPTH
919
0
                                            xd->cur_buf->flags,
920
0
#endif
921
0
                                            pixels_wide, pixels_high);
922
0
            int thresh_minmax = (int)cpi->vbp_info.threshold_minmax;
923
0
            if (minmax > thresh_minmax) {
924
0
              force_split[split_index] = 1;
925
0
              force_split[5 + m2 + i] = 1;
926
0
              force_split[m + 1] = 1;
927
0
              force_split[0] = 1;
928
0
            }
929
0
          }
930
0
        }
931
0
        if (is_key_frame) {
932
0
          force_split[split_index] = 0;
933
          // Go down to 4x4 down-sampling for variance.
934
0
          variance4x4downsample[i2 + j] = 1;
935
0
          for (int k = 0; k < 4; k++) {
936
0
            int x8_idx = x16_idx + ((k & 1) << 3);
937
0
            int y8_idx = y16_idx + ((k >> 1) << 3);
938
0
            VP8x8 *vst2 = is_key_frame ? &vst->split[k] : &vt2[i2 + j].split[k];
939
0
            fill_variance_4x4avg(src, src_stride, dst, dst_stride, x8_idx,
940
0
                                 y8_idx, vst2,
941
0
#if CONFIG_AV1_HIGHBITDEPTH
942
0
                                 xd->cur_buf->flags,
943
0
#endif
944
0
                                 pixels_wide, pixels_high, is_key_frame);
945
0
          }
946
0
        }
947
0
      }
948
0
    }
949
0
  }
950
0
}
951
952
static void setup_planes(AV1_COMP *cpi, MACROBLOCK *x, unsigned int *y_sad,
953
                         unsigned int *y_sad_g,
954
                         MV_REFERENCE_FRAME *ref_frame_partition, int mi_row,
955
0
                         int mi_col) {
956
0
  AV1_COMMON *const cm = &cpi->common;
957
0
  MACROBLOCKD *xd = &x->e_mbd;
958
0
  const int num_planes = av1_num_planes(cm);
959
0
  const int is_small_sb = (cm->seq_params->sb_size == BLOCK_64X64);
960
0
  BLOCK_SIZE bsize = is_small_sb ? BLOCK_64X64 : BLOCK_128X128;
961
  // TODO(kyslov): we are assuming that the ref is LAST_FRAME! Check if it
962
  // is!!
963
0
  MB_MODE_INFO *mi = xd->mi[0];
964
0
  const YV12_BUFFER_CONFIG *yv12 = get_ref_frame_yv12_buf(cm, LAST_FRAME);
965
0
  assert(yv12 != NULL);
966
0
  const YV12_BUFFER_CONFIG *yv12_g = NULL;
967
968
  // For non-SVC GOLDEN is another temporal reference. Check if it should be
969
  // used as reference for partitioning.
970
0
  if (!cpi->ppi->use_svc && (cpi->ref_frame_flags & AOM_GOLD_FLAG)) {
971
0
    yv12_g = get_ref_frame_yv12_buf(cm, GOLDEN_FRAME);
972
0
    if (yv12_g && yv12_g != yv12) {
973
0
      av1_setup_pre_planes(xd, 0, yv12_g, mi_row, mi_col,
974
0
                           get_ref_scale_factors(cm, GOLDEN_FRAME), num_planes);
975
0
      *y_sad_g = cpi->ppi->fn_ptr[bsize].sdf(
976
0
          x->plane[0].src.buf, x->plane[0].src.stride, xd->plane[0].pre[0].buf,
977
0
          xd->plane[0].pre[0].stride);
978
0
    }
979
0
  }
980
981
0
  av1_setup_pre_planes(xd, 0, yv12, mi_row, mi_col,
982
0
                       get_ref_scale_factors(cm, LAST_FRAME), num_planes);
983
0
  mi->ref_frame[0] = LAST_FRAME;
984
0
  mi->ref_frame[1] = NONE_FRAME;
985
0
  mi->bsize = cm->seq_params->sb_size;
986
0
  mi->mv[0].as_int = 0;
987
0
  mi->interp_filters = av1_broadcast_interp_filter(BILINEAR);
988
0
  if (cpi->sf.rt_sf.estimate_motion_for_var_based_partition) {
989
0
    if (xd->mb_to_right_edge >= 0 && xd->mb_to_bottom_edge >= 0) {
990
0
      const MV dummy_mv = { 0, 0 };
991
0
      *y_sad = av1_int_pro_motion_estimation(cpi, x, cm->seq_params->sb_size,
992
0
                                             mi_row, mi_col, &dummy_mv);
993
0
    }
994
0
  }
995
0
  if (*y_sad == UINT_MAX) {
996
0
    *y_sad = cpi->ppi->fn_ptr[bsize].sdf(
997
0
        x->plane[0].src.buf, x->plane[0].src.stride, xd->plane[0].pre[0].buf,
998
0
        xd->plane[0].pre[0].stride);
999
0
  }
1000
1001
  // Pick the ref frame for partitioning, use golden frame only if its
1002
  // lower sad.
1003
0
  if (*y_sad_g < 0.9 * *y_sad) {
1004
0
    av1_setup_pre_planes(xd, 0, yv12_g, mi_row, mi_col,
1005
0
                         get_ref_scale_factors(cm, GOLDEN_FRAME), num_planes);
1006
0
    mi->ref_frame[0] = GOLDEN_FRAME;
1007
0
    mi->mv[0].as_int = 0;
1008
0
    *y_sad = *y_sad_g;
1009
0
    *ref_frame_partition = GOLDEN_FRAME;
1010
0
    x->nonrd_prune_ref_frame_search = 0;
1011
0
  } else {
1012
0
    *ref_frame_partition = LAST_FRAME;
1013
0
    x->nonrd_prune_ref_frame_search =
1014
0
        cpi->sf.rt_sf.nonrd_prune_ref_frame_search;
1015
0
  }
1016
1017
  // Only calculate the predictor for non-zero MV.
1018
0
  if (mi->mv[0].as_int != 0) {
1019
0
    set_ref_ptrs(cm, xd, mi->ref_frame[0], mi->ref_frame[1]);
1020
0
    av1_enc_build_inter_predictor(cm, xd, mi_row, mi_col, NULL,
1021
0
                                  cm->seq_params->sb_size, AOM_PLANE_Y,
1022
0
                                  AOM_PLANE_Y);
1023
0
  }
1024
0
}
1025
1026
int av1_choose_var_based_partitioning(AV1_COMP *cpi, const TileInfo *const tile,
1027
                                      ThreadData *td, MACROBLOCK *x, int mi_row,
1028
0
                                      int mi_col) {
1029
0
  AV1_COMMON *const cm = &cpi->common;
1030
0
  MACROBLOCKD *xd = &x->e_mbd;
1031
0
  const int64_t *const vbp_thresholds = cpi->vbp_info.thresholds;
1032
1033
0
  int i, j, k, m;
1034
0
  VP128x128 *vt;
1035
0
  VP16x16 *vt2 = NULL;
1036
0
  unsigned char force_split[85];
1037
0
  int avg_64x64;
1038
0
  int max_var_32x32[4];
1039
0
  int min_var_32x32[4];
1040
0
  int var_32x32;
1041
0
  int var_64x64;
1042
0
  int min_var_64x64 = INT_MAX;
1043
0
  int max_var_64x64 = 0;
1044
0
  int avg_16x16[4][4];
1045
0
  int maxvar_16x16[4][4];
1046
0
  int minvar_16x16[4][4];
1047
0
  int64_t threshold_4x4avg;
1048
0
  uint8_t *s;
1049
0
  const uint8_t *d;
1050
0
  int sp;
1051
0
  int dp;
1052
0
  NOISE_LEVEL noise_level = kLow;
1053
1054
0
  int is_key_frame =
1055
0
      (frame_is_intra_only(cm) ||
1056
0
       (cpi->ppi->use_svc &&
1057
0
        cpi->svc.layer_context[cpi->svc.temporal_layer_id].is_key_frame));
1058
1059
0
  assert(cm->seq_params->sb_size == BLOCK_64X64 ||
1060
0
         cm->seq_params->sb_size == BLOCK_128X128);
1061
0
  const int is_small_sb = (cm->seq_params->sb_size == BLOCK_64X64);
1062
0
  const int num_64x64_blocks = is_small_sb ? 1 : 4;
1063
1064
0
  unsigned int y_sad = UINT_MAX;
1065
0
  unsigned int y_sad_g = UINT_MAX;
1066
0
  BLOCK_SIZE bsize = is_small_sb ? BLOCK_64X64 : BLOCK_128X128;
1067
1068
  // Ref frame used in partitioning.
1069
0
  MV_REFERENCE_FRAME ref_frame_partition = LAST_FRAME;
1070
1071
0
  CHECK_MEM_ERROR(cm, vt, aom_malloc(sizeof(*vt)));
1072
1073
0
  vt->split = td->vt64x64;
1074
1075
0
  int64_t thresholds[5] = { vbp_thresholds[0], vbp_thresholds[1],
1076
0
                            vbp_thresholds[2], vbp_thresholds[3],
1077
0
                            vbp_thresholds[4] };
1078
1079
0
  const int low_res = (cm->width <= 352 && cm->height <= 288);
1080
0
  int variance4x4downsample[64];
1081
0
  const int segment_id = xd->mi[0]->segment_id;
1082
1083
0
  if (cpi->oxcf.q_cfg.aq_mode == CYCLIC_REFRESH_AQ && cm->seg.enabled &&
1084
0
      cyclic_refresh_segment_id_boosted(segment_id)) {
1085
0
    const int q =
1086
0
        av1_get_qindex(&cm->seg, segment_id, cm->quant_params.base_qindex);
1087
0
    set_vbp_thresholds(cpi, thresholds, q, x->content_state_sb.low_sumdiff,
1088
0
                       x->content_state_sb.source_sad, 1);
1089
0
  } else {
1090
0
    set_vbp_thresholds(cpi, thresholds, cm->quant_params.base_qindex,
1091
0
                       x->content_state_sb.low_sumdiff,
1092
0
                       x->content_state_sb.source_sad, 0);
1093
0
  }
1094
1095
  // For non keyframes, disable 4x4 average for low resolution when speed = 8
1096
0
  threshold_4x4avg = INT64_MAX;
1097
1098
0
  s = x->plane[0].src.buf;
1099
0
  sp = x->plane[0].src.stride;
1100
1101
  // Index for force_split: 0 for 64x64, 1-4 for 32x32 blocks,
1102
  // 5-20 for the 16x16 blocks.
1103
0
  force_split[0] = 0;
1104
0
  memset(x->part_search_info.variance_low, 0,
1105
0
         sizeof(x->part_search_info.variance_low));
1106
1107
  // Check if LAST frame is NULL or if the resolution of LAST is
1108
  // different than the current frame resolution, and if so, treat this frame
1109
  // as a key frame, for the purpose of the superblock partitioning.
1110
  // LAST == NULL can happen in cases where enhancement spatial layers are
1111
  // enabled dyanmically and the only reference is the spatial(GOLDEN).
1112
  // TODO(marpan): Check se of scaled references for the different resoln.
1113
0
  if (!frame_is_intra_only(cm)) {
1114
0
    const YV12_BUFFER_CONFIG *const ref =
1115
0
        get_ref_frame_yv12_buf(cm, LAST_FRAME);
1116
0
    if (ref == NULL || ref->y_crop_height != cm->height ||
1117
0
        ref->y_crop_width != cm->width) {
1118
0
      is_key_frame = 1;
1119
0
    }
1120
0
  }
1121
1122
0
  if (!is_key_frame) {
1123
0
    setup_planes(cpi, x, &y_sad, &y_sad_g, &ref_frame_partition, mi_row,
1124
0
                 mi_col);
1125
1126
0
    MB_MODE_INFO *mi = xd->mi[0];
1127
    // Use reference SB directly for zero mv.
1128
0
    if (mi->mv[0].as_int != 0) {
1129
0
      d = xd->plane[0].dst.buf;
1130
0
      dp = xd->plane[0].dst.stride;
1131
0
    } else {
1132
0
      d = xd->plane[0].pre[0].buf;
1133
0
      dp = xd->plane[0].pre[0].stride;
1134
0
    }
1135
0
  } else {
1136
0
    d = AV1_VAR_OFFS;
1137
0
    dp = 0;
1138
0
  }
1139
0
  if (cpi->noise_estimate.enabled)
1140
0
    noise_level = av1_noise_estimate_extract_level(&cpi->noise_estimate);
1141
1142
0
  if (low_res && threshold_4x4avg < INT64_MAX)
1143
0
    CHECK_MEM_ERROR(cm, vt2, aom_malloc(sizeof(*vt2)));
1144
  // Fill in the entire tree of 8x8 (or 4x4 under some conditions) variances
1145
  // for splits.
1146
0
  fill_variance_tree_leaves(cpi, x, vt, vt2, force_split, avg_16x16,
1147
0
                            maxvar_16x16, minvar_16x16, variance4x4downsample,
1148
0
                            thresholds, s, sp, d, dp);
1149
1150
0
  avg_64x64 = 0;
1151
0
  for (m = 0; m < num_64x64_blocks; ++m) {
1152
0
    max_var_32x32[m] = 0;
1153
0
    min_var_32x32[m] = INT_MAX;
1154
0
    const int m2 = m << 2;
1155
0
    for (i = 0; i < 4; i++) {
1156
0
      const int i2 = (m2 + i) << 2;
1157
0
      for (j = 0; j < 4; j++) {
1158
0
        const int split_index = 21 + i2 + j;
1159
0
        if (variance4x4downsample[i2 + j] == 1) {
1160
0
          VP16x16 *vtemp =
1161
0
              (!is_key_frame) ? &vt2[i2 + j] : &vt->split[m].split[i].split[j];
1162
0
          for (k = 0; k < 4; k++)
1163
0
            fill_variance_tree(&vtemp->split[k], BLOCK_8X8);
1164
0
          fill_variance_tree(vtemp, BLOCK_16X16);
1165
          // If variance of this 16x16 block is above the threshold, force block
1166
          // to split. This also forces a split on the upper levels.
1167
0
          get_variance(&vtemp->part_variances.none);
1168
0
          if (vtemp->part_variances.none.variance > thresholds[3]) {
1169
0
            force_split[split_index] = 1;
1170
0
            force_split[5 + m2 + i] = 1;
1171
0
            force_split[m + 1] = 1;
1172
0
            force_split[0] = 1;
1173
0
          }
1174
0
        }
1175
0
      }
1176
0
      fill_variance_tree(&vt->split[m].split[i], BLOCK_32X32);
1177
      // If variance of this 32x32 block is above the threshold, or if its above
1178
      // (some threshold of) the average variance over the sub-16x16 blocks,
1179
      // then force this block to split. This also forces a split on the upper
1180
      // (64x64) level.
1181
0
      if (!force_split[5 + m2 + i]) {
1182
0
        get_variance(&vt->split[m].split[i].part_variances.none);
1183
0
        var_32x32 = vt->split[m].split[i].part_variances.none.variance;
1184
0
        max_var_32x32[m] = AOMMAX(var_32x32, max_var_32x32[m]);
1185
0
        min_var_32x32[m] = AOMMIN(var_32x32, min_var_32x32[m]);
1186
0
        if (vt->split[m].split[i].part_variances.none.variance >
1187
0
                thresholds[2] ||
1188
0
            (!is_key_frame &&
1189
0
             vt->split[m].split[i].part_variances.none.variance >
1190
0
                 (thresholds[2] >> 1) &&
1191
0
             vt->split[m].split[i].part_variances.none.variance >
1192
0
                 (avg_16x16[m][i] >> 1))) {
1193
0
          force_split[5 + m2 + i] = 1;
1194
0
          force_split[m + 1] = 1;
1195
0
          force_split[0] = 1;
1196
0
        } else if (!is_key_frame && cm->height <= 360 &&
1197
0
                   (maxvar_16x16[m][i] - minvar_16x16[m][i]) >
1198
0
                       (thresholds[2] >> 1) &&
1199
0
                   maxvar_16x16[m][i] > thresholds[2]) {
1200
0
          force_split[5 + m2 + i] = 1;
1201
0
          force_split[m + 1] = 1;
1202
0
          force_split[0] = 1;
1203
0
        }
1204
0
      }
1205
0
    }
1206
0
    if (!force_split[1 + m]) {
1207
0
      fill_variance_tree(&vt->split[m], BLOCK_64X64);
1208
0
      get_variance(&vt->split[m].part_variances.none);
1209
0
      var_64x64 = vt->split[m].part_variances.none.variance;
1210
0
      max_var_64x64 = AOMMAX(var_64x64, max_var_64x64);
1211
0
      min_var_64x64 = AOMMIN(var_64x64, min_var_64x64);
1212
      // If the difference of the max-min variances of sub-blocks or max
1213
      // variance of a sub-block is above some threshold of then force this
1214
      // block to split. Only checking this for noise level >= medium, if
1215
      // encoder is in SVC or if we already forced large blocks.
1216
1217
0
      if (!is_key_frame &&
1218
0
          (max_var_32x32[m] - min_var_32x32[m]) > 3 * (thresholds[1] >> 3) &&
1219
0
          max_var_32x32[m] > thresholds[1] >> 1 &&
1220
0
          (noise_level >= kMedium || cpi->ppi->use_svc ||
1221
0
           cpi->sf.rt_sf.force_large_partition_blocks)) {
1222
0
        force_split[1 + m] = 1;
1223
0
        force_split[0] = 1;
1224
0
      }
1225
0
      avg_64x64 += var_64x64;
1226
0
    }
1227
0
    if (is_small_sb) force_split[0] = 1;
1228
0
  }
1229
1230
0
  if (!force_split[0]) {
1231
0
    fill_variance_tree(vt, BLOCK_128X128);
1232
0
    get_variance(&vt->part_variances.none);
1233
0
    if (!is_key_frame &&
1234
0
        vt->part_variances.none.variance > (9 * avg_64x64) >> 5)
1235
0
      force_split[0] = 1;
1236
1237
0
    if (!is_key_frame &&
1238
0
        (max_var_64x64 - min_var_64x64) > 3 * (thresholds[0] >> 3) &&
1239
0
        max_var_64x64 > thresholds[0] >> 1)
1240
0
      force_split[0] = 1;
1241
0
  }
1242
1243
0
  if (mi_col + 32 > tile->mi_col_end || mi_row + 32 > tile->mi_row_end ||
1244
0
      !set_vt_partitioning(cpi, x, xd, tile, vt, BLOCK_128X128, mi_row, mi_col,
1245
0
                           thresholds[0], BLOCK_16X16, force_split[0])) {
1246
0
    for (m = 0; m < num_64x64_blocks; ++m) {
1247
0
      const int x64_idx = ((m & 1) << 4);
1248
0
      const int y64_idx = ((m >> 1) << 4);
1249
0
      const int m2 = m << 2;
1250
1251
      // Now go through the entire structure, splitting every block size until
1252
      // we get to one that's got a variance lower than our threshold.
1253
0
      if (!set_vt_partitioning(cpi, x, xd, tile, &vt->split[m], BLOCK_64X64,
1254
0
                               mi_row + y64_idx, mi_col + x64_idx,
1255
0
                               thresholds[1], BLOCK_16X16,
1256
0
                               force_split[1 + m])) {
1257
0
        for (i = 0; i < 4; ++i) {
1258
0
          const int x32_idx = ((i & 1) << 3);
1259
0
          const int y32_idx = ((i >> 1) << 3);
1260
0
          const int i2 = (m2 + i) << 2;
1261
0
          if (!set_vt_partitioning(cpi, x, xd, tile, &vt->split[m].split[i],
1262
0
                                   BLOCK_32X32, (mi_row + y64_idx + y32_idx),
1263
0
                                   (mi_col + x64_idx + x32_idx), thresholds[2],
1264
0
                                   BLOCK_16X16, force_split[5 + m2 + i])) {
1265
0
            for (j = 0; j < 4; ++j) {
1266
0
              const int x16_idx = ((j & 1) << 2);
1267
0
              const int y16_idx = ((j >> 1) << 2);
1268
0
              const int split_index = 21 + i2 + j;
1269
              // For inter frames: if variance4x4downsample[] == 1 for this
1270
              // 16x16 block, then the variance is based on 4x4 down-sampling,
1271
              // so use vt2 in set_vt_partioning(), otherwise use vt.
1272
0
              VP16x16 *vtemp =
1273
0
                  (!is_key_frame && variance4x4downsample[i2 + j] == 1)
1274
0
                      ? &vt2[i2 + j]
1275
0
                      : &vt->split[m].split[i].split[j];
1276
0
              if (!set_vt_partitioning(cpi, x, xd, tile, vtemp, BLOCK_16X16,
1277
0
                                       mi_row + y64_idx + y32_idx + y16_idx,
1278
0
                                       mi_col + x64_idx + x32_idx + x16_idx,
1279
0
                                       thresholds[3], BLOCK_8X8,
1280
0
                                       force_split[split_index])) {
1281
0
                for (k = 0; k < 4; ++k) {
1282
0
                  const int x8_idx = (k & 1) << 1;
1283
0
                  const int y8_idx = (k >> 1) << 1;
1284
0
                  set_block_size(
1285
0
                      cpi, x, xd,
1286
0
                      (mi_row + y64_idx + y32_idx + y16_idx + y8_idx),
1287
0
                      (mi_col + x64_idx + x32_idx + x16_idx + x8_idx),
1288
0
                      BLOCK_8X8);
1289
0
                }
1290
0
              }
1291
0
            }
1292
0
          }
1293
0
        }
1294
0
      }
1295
0
    }
1296
0
  }
1297
1298
0
  if (cpi->sf.rt_sf.short_circuit_low_temp_var) {
1299
0
    set_low_temp_var_flag(cpi, &x->part_search_info, xd, vt, thresholds,
1300
0
                          ref_frame_partition, mi_col, mi_row);
1301
0
  }
1302
0
  chroma_check(cpi, x, bsize, y_sad, is_key_frame);
1303
1304
0
  if (vt2) aom_free(vt2);
1305
0
  if (vt) aom_free(vt);
1306
0
  return 0;
1307
0
}