Coverage Report

Created: 2026-04-01 07:24

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/aom/av1/encoder/var_based_part.c
Line
Count
Source
1
/*
2
 * Copyright (c) 2019, Alliance for Open Media. All rights reserved.
3
 *
4
 * This source code is subject to the terms of the BSD 2 Clause License and
5
 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6
 * was not distributed with this source code in the LICENSE file, you can
7
 * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8
 * Media Patent License 1.0 was not distributed with this source code in the
9
 * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
10
 */
11
12
#include <limits.h>
13
#include <math.h>
14
#include <stdbool.h>
15
#include <stdio.h>
16
17
#include "config/aom_config.h"
18
#include "config/aom_dsp_rtcd.h"
19
#include "config/av1_rtcd.h"
20
21
#include "aom_dsp/aom_dsp_common.h"
22
#include "aom_dsp/binary_codes_writer.h"
23
#include "aom_ports/mem.h"
24
#include "aom_ports/aom_timer.h"
25
26
#include "av1/common/reconinter.h"
27
#include "av1/common/blockd.h"
28
#include "av1/common/quant_common.h"
29
30
#include "av1/encoder/encodeframe.h"
31
#include "av1/encoder/encodeframe_utils.h"
32
#include "av1/encoder/var_based_part.h"
33
#include "av1/encoder/reconinter_enc.h"
34
#include "av1/encoder/rdopt_utils.h"
35
36
// Possible values for the force_split variable while evaluating variance based
37
// partitioning.
38
enum {
39
  // Evaluate all partition types
40
  PART_EVAL_ALL = 0,
41
  // Force PARTITION_SPLIT
42
  PART_EVAL_ONLY_SPLIT = 1,
43
  // Force PARTITION_NONE
44
  PART_EVAL_ONLY_NONE = 2
45
} UENUM1BYTE(PART_EVAL_STATUS);
46
47
typedef struct {
48
  VPVariance *part_variances;
49
  VPartVar *split[4];
50
} variance_node;
51
52
static inline void tree_to_node(void *data, BLOCK_SIZE bsize,
53
0
                                variance_node *node) {
54
0
  node->part_variances = NULL;
55
0
  switch (bsize) {
56
0
    case BLOCK_128X128: {
57
0
      VP128x128 *vt = (VP128x128 *)data;
58
0
      node->part_variances = &vt->part_variances;
59
0
      for (int split_idx = 0; split_idx < 4; split_idx++)
60
0
        node->split[split_idx] = &vt->split[split_idx].part_variances.none;
61
0
      break;
62
0
    }
63
0
    case BLOCK_64X64: {
64
0
      VP64x64 *vt = (VP64x64 *)data;
65
0
      node->part_variances = &vt->part_variances;
66
0
      for (int split_idx = 0; split_idx < 4; split_idx++)
67
0
        node->split[split_idx] = &vt->split[split_idx].part_variances.none;
68
0
      break;
69
0
    }
70
0
    case BLOCK_32X32: {
71
0
      VP32x32 *vt = (VP32x32 *)data;
72
0
      node->part_variances = &vt->part_variances;
73
0
      for (int split_idx = 0; split_idx < 4; split_idx++)
74
0
        node->split[split_idx] = &vt->split[split_idx].part_variances.none;
75
0
      break;
76
0
    }
77
0
    case BLOCK_16X16: {
78
0
      VP16x16 *vt = (VP16x16 *)data;
79
0
      node->part_variances = &vt->part_variances;
80
0
      for (int split_idx = 0; split_idx < 4; split_idx++)
81
0
        node->split[split_idx] = &vt->split[split_idx].part_variances.none;
82
0
      break;
83
0
    }
84
0
    case BLOCK_8X8: {
85
0
      VP8x8 *vt = (VP8x8 *)data;
86
0
      node->part_variances = &vt->part_variances;
87
0
      for (int split_idx = 0; split_idx < 4; split_idx++)
88
0
        node->split[split_idx] = &vt->split[split_idx].part_variances.none;
89
0
      break;
90
0
    }
91
0
    default: {
92
0
      VP4x4 *vt = (VP4x4 *)data;
93
0
      assert(bsize == BLOCK_4X4);
94
0
      node->part_variances = &vt->part_variances;
95
0
      for (int split_idx = 0; split_idx < 4; split_idx++)
96
0
        node->split[split_idx] = &vt->split[split_idx];
97
0
      break;
98
0
    }
99
0
  }
100
0
}
101
102
// Set variance values given sum square error, sum error, count.
103
0
static inline void fill_variance(uint32_t s2, int32_t s, int c, VPartVar *v) {
104
0
  v->sum_square_error = s2;
105
0
  v->sum_error = s;
106
0
  v->log2_count = c;
107
0
}
108
109
0
static inline void get_variance(VPartVar *v) {
110
0
  v->variance =
111
0
      (int)(256 * (v->sum_square_error -
112
0
                   (uint32_t)(((int64_t)v->sum_error * v->sum_error) >>
113
0
                              v->log2_count)) >>
114
0
            v->log2_count);
115
0
}
116
117
static inline void sum_2_variances(const VPartVar *a, const VPartVar *b,
118
0
                                   VPartVar *r) {
119
0
  assert(a->log2_count == b->log2_count);
120
0
  fill_variance(a->sum_square_error + b->sum_square_error,
121
0
                a->sum_error + b->sum_error, a->log2_count + 1, r);
122
0
}
123
124
0
static inline void fill_variance_tree(void *data, BLOCK_SIZE bsize) {
125
0
  variance_node node;
126
0
  memset(&node, 0, sizeof(node));
127
0
  tree_to_node(data, bsize, &node);
128
0
  sum_2_variances(node.split[0], node.split[1], &node.part_variances->horz[0]);
129
0
  sum_2_variances(node.split[2], node.split[3], &node.part_variances->horz[1]);
130
0
  sum_2_variances(node.split[0], node.split[2], &node.part_variances->vert[0]);
131
0
  sum_2_variances(node.split[1], node.split[3], &node.part_variances->vert[1]);
132
0
  sum_2_variances(&node.part_variances->vert[0], &node.part_variances->vert[1],
133
0
                  &node.part_variances->none);
134
0
}
135
136
static inline void set_block_size(AV1_COMP *const cpi, int mi_row, int mi_col,
137
0
                                  BLOCK_SIZE bsize) {
138
0
  if (cpi->common.mi_params.mi_cols > mi_col &&
139
0
      cpi->common.mi_params.mi_rows > mi_row) {
140
0
    CommonModeInfoParams *mi_params = &cpi->common.mi_params;
141
0
    const int mi_grid_idx = get_mi_grid_idx(mi_params, mi_row, mi_col);
142
0
    const int mi_alloc_idx = get_alloc_mi_idx(mi_params, mi_row, mi_col);
143
0
    MB_MODE_INFO *mi = mi_params->mi_grid_base[mi_grid_idx] =
144
0
        &mi_params->mi_alloc[mi_alloc_idx];
145
0
    mi->bsize = bsize;
146
0
  }
147
0
}
148
149
static int set_vt_partitioning(AV1_COMP *cpi, MACROBLOCKD *const xd,
150
                               const TileInfo *const tile, void *data,
151
                               BLOCK_SIZE bsize, int mi_row, int mi_col,
152
                               int64_t threshold, BLOCK_SIZE bsize_min,
153
0
                               PART_EVAL_STATUS force_split) {
154
0
  AV1_COMMON *const cm = &cpi->common;
155
0
  variance_node vt;
156
0
  const int block_width = mi_size_wide[bsize];
157
0
  const int block_height = mi_size_high[bsize];
158
0
  int bs_width_check = block_width;
159
0
  int bs_height_check = block_height;
160
0
  int bs_width_vert_check = block_width >> 1;
161
0
  int bs_height_horiz_check = block_height >> 1;
162
  // On the right and bottom boundary we only need to check
163
  // if half the bsize fits, because boundary is extended
164
  // up to 64. So do this check only for sb_size = 64X64.
165
0
  if (cm->seq_params->sb_size == BLOCK_64X64) {
166
0
    if (tile->mi_col_end == cm->mi_params.mi_cols) {
167
0
      bs_width_check = (block_width >> 1) + 1;
168
0
      bs_width_vert_check = (block_width >> 2) + 1;
169
0
    }
170
0
    if (tile->mi_row_end == cm->mi_params.mi_rows) {
171
0
      bs_height_check = (block_height >> 1) + 1;
172
0
      bs_height_horiz_check = (block_height >> 2) + 1;
173
0
    }
174
0
  }
175
176
0
  assert(block_height == block_width);
177
0
  tree_to_node(data, bsize, &vt);
178
179
0
  if (mi_col + bs_width_check <= tile->mi_col_end &&
180
0
      mi_row + bs_height_check <= tile->mi_row_end &&
181
0
      force_split == PART_EVAL_ONLY_NONE) {
182
0
    set_block_size(cpi, mi_row, mi_col, bsize);
183
0
    return 1;
184
0
  }
185
0
  if (force_split == PART_EVAL_ONLY_SPLIT) return 0;
186
187
  // For bsize=bsize_min (16x16/8x8 for 8x8/4x4 downsampling), select if
188
  // variance is below threshold, otherwise split will be selected.
189
  // No check for vert/horiz split as too few samples for variance.
190
0
  if (bsize == bsize_min) {
191
    // Variance already computed to set the force_split.
192
0
    if (frame_is_intra_only(cm)) get_variance(&vt.part_variances->none);
193
0
    if (mi_col + bs_width_check <= tile->mi_col_end &&
194
0
        mi_row + bs_height_check <= tile->mi_row_end &&
195
0
        vt.part_variances->none.variance < threshold) {
196
0
      set_block_size(cpi, mi_row, mi_col, bsize);
197
0
      return 1;
198
0
    }
199
0
    return 0;
200
0
  } else if (bsize > bsize_min) {
201
    // Variance already computed to set the force_split.
202
0
    if (frame_is_intra_only(cm)) get_variance(&vt.part_variances->none);
203
    // For key frame: take split for bsize above 32X32 or very high variance.
204
0
    if (frame_is_intra_only(cm) &&
205
0
        (bsize > BLOCK_32X32 ||
206
0
         vt.part_variances->none.variance > (threshold << 4))) {
207
0
      return 0;
208
0
    }
209
    // If variance is low, take the bsize (no split).
210
0
    if (mi_col + bs_width_check <= tile->mi_col_end &&
211
0
        mi_row + bs_height_check <= tile->mi_row_end &&
212
0
        vt.part_variances->none.variance < threshold) {
213
0
      set_block_size(cpi, mi_row, mi_col, bsize);
214
0
      return 1;
215
0
    }
216
    // Check vertical split.
217
0
    if (mi_row + bs_height_check <= tile->mi_row_end &&
218
0
        mi_col + bs_width_vert_check <= tile->mi_col_end) {
219
0
      BLOCK_SIZE subsize = get_partition_subsize(bsize, PARTITION_VERT);
220
0
      BLOCK_SIZE plane_bsize =
221
0
          get_plane_block_size(subsize, xd->plane[AOM_PLANE_U].subsampling_x,
222
0
                               xd->plane[AOM_PLANE_U].subsampling_y);
223
0
      get_variance(&vt.part_variances->vert[0]);
224
0
      get_variance(&vt.part_variances->vert[1]);
225
0
      if (vt.part_variances->vert[0].variance < threshold &&
226
0
          vt.part_variances->vert[1].variance < threshold &&
227
0
          plane_bsize < BLOCK_INVALID) {
228
0
        set_block_size(cpi, mi_row, mi_col, subsize);
229
0
        set_block_size(cpi, mi_row, mi_col + block_width / 2, subsize);
230
0
        return 1;
231
0
      }
232
0
    }
233
    // Check horizontal split.
234
0
    if (mi_col + bs_width_check <= tile->mi_col_end &&
235
0
        mi_row + bs_height_horiz_check <= tile->mi_row_end) {
236
0
      BLOCK_SIZE subsize = get_partition_subsize(bsize, PARTITION_HORZ);
237
0
      BLOCK_SIZE plane_bsize =
238
0
          get_plane_block_size(subsize, xd->plane[AOM_PLANE_U].subsampling_x,
239
0
                               xd->plane[AOM_PLANE_U].subsampling_y);
240
0
      get_variance(&vt.part_variances->horz[0]);
241
0
      get_variance(&vt.part_variances->horz[1]);
242
0
      if (vt.part_variances->horz[0].variance < threshold &&
243
0
          vt.part_variances->horz[1].variance < threshold &&
244
0
          plane_bsize < BLOCK_INVALID) {
245
0
        set_block_size(cpi, mi_row, mi_col, subsize);
246
0
        set_block_size(cpi, mi_row + block_height / 2, mi_col, subsize);
247
0
        return 1;
248
0
      }
249
0
    }
250
0
    return 0;
251
0
  }
252
0
  return 0;
253
0
}
254
255
static inline int all_blks_inside(int x16_idx, int y16_idx, int pixels_wide,
256
0
                                  int pixels_high) {
257
0
  int all_inside = 1;
258
0
  for (int idx = 0; idx < 4; idx++) {
259
0
    all_inside &= ((x16_idx + GET_BLK_IDX_X(idx, 3)) < pixels_wide);
260
0
    all_inside &= ((y16_idx + GET_BLK_IDX_Y(idx, 3)) < pixels_high);
261
0
  }
262
0
  return all_inside;
263
0
}
264
265
#if CONFIG_AV1_HIGHBITDEPTH
266
// TODO(yunqingwang): Perform average of four 8x8 blocks similar to lowbd
267
static inline void fill_variance_8x8avg_highbd(
268
    const uint8_t *src_buf, int src_stride, const uint8_t *dst_buf,
269
    int dst_stride, int x16_idx, int y16_idx, VP16x16 *vst, int pixels_wide,
270
0
    int pixels_high) {
271
0
  for (int idx = 0; idx < 4; idx++) {
272
0
    const int x8_idx = x16_idx + GET_BLK_IDX_X(idx, 3);
273
0
    const int y8_idx = y16_idx + GET_BLK_IDX_Y(idx, 3);
274
0
    unsigned int sse = 0;
275
0
    int sum = 0;
276
0
    if (x8_idx < pixels_wide && y8_idx < pixels_high) {
277
0
      int src_avg = aom_highbd_avg_8x8(src_buf + y8_idx * src_stride + x8_idx,
278
0
                                       src_stride);
279
0
      int dst_avg = aom_highbd_avg_8x8(dst_buf + y8_idx * dst_stride + x8_idx,
280
0
                                       dst_stride);
281
282
0
      sum = src_avg - dst_avg;
283
0
      sse = sum * sum;
284
0
    }
285
0
    fill_variance(sse, sum, 0, &vst->split[idx].part_variances.none);
286
0
  }
287
0
}
288
#endif
289
290
static inline void fill_variance_8x8avg_lowbd(
291
    const uint8_t *src_buf, int src_stride, const uint8_t *dst_buf,
292
    int dst_stride, int x16_idx, int y16_idx, VP16x16 *vst, int pixels_wide,
293
0
    int pixels_high) {
294
0
  unsigned int sse[4] = { 0 };
295
0
  int sum[4] = { 0 };
296
297
0
  if (all_blks_inside(x16_idx, y16_idx, pixels_wide, pixels_high)) {
298
0
    int src_avg[4];
299
0
    int dst_avg[4];
300
0
    aom_avg_8x8_quad(src_buf, src_stride, x16_idx, y16_idx, src_avg);
301
0
    aom_avg_8x8_quad(dst_buf, dst_stride, x16_idx, y16_idx, dst_avg);
302
0
    for (int idx = 0; idx < 4; idx++) {
303
0
      sum[idx] = src_avg[idx] - dst_avg[idx];
304
0
      sse[idx] = sum[idx] * sum[idx];
305
0
    }
306
0
  } else {
307
0
    for (int idx = 0; idx < 4; idx++) {
308
0
      const int x8_idx = x16_idx + GET_BLK_IDX_X(idx, 3);
309
0
      const int y8_idx = y16_idx + GET_BLK_IDX_Y(idx, 3);
310
0
      if (x8_idx < pixels_wide && y8_idx < pixels_high) {
311
0
        int src_avg =
312
0
            aom_avg_8x8(src_buf + y8_idx * src_stride + x8_idx, src_stride);
313
0
        int dst_avg =
314
0
            aom_avg_8x8(dst_buf + y8_idx * dst_stride + x8_idx, dst_stride);
315
0
        sum[idx] = src_avg - dst_avg;
316
0
        sse[idx] = sum[idx] * sum[idx];
317
0
      }
318
0
    }
319
0
  }
320
321
0
  for (int idx = 0; idx < 4; idx++) {
322
0
    fill_variance(sse[idx], sum[idx], 0, &vst->split[idx].part_variances.none);
323
0
  }
324
0
}
325
326
// Obtain parameters required to calculate variance (such as sum, sse, etc,.)
327
// at 8x8 sub-block level for a given 16x16 block.
328
// The function can be called only when is_key_frame is false since sum is
329
// computed between source and reference frames.
330
static inline void fill_variance_8x8avg(const uint8_t *src_buf, int src_stride,
331
                                        const uint8_t *dst_buf, int dst_stride,
332
                                        int x16_idx, int y16_idx, VP16x16 *vst,
333
                                        int highbd_flag, int pixels_wide,
334
0
                                        int pixels_high) {
335
0
#if CONFIG_AV1_HIGHBITDEPTH
336
0
  if (highbd_flag) {
337
0
    fill_variance_8x8avg_highbd(src_buf, src_stride, dst_buf, dst_stride,
338
0
                                x16_idx, y16_idx, vst, pixels_wide,
339
0
                                pixels_high);
340
0
    return;
341
0
  }
342
#else
343
  (void)highbd_flag;
344
#endif  // CONFIG_AV1_HIGHBITDEPTH
345
0
  fill_variance_8x8avg_lowbd(src_buf, src_stride, dst_buf, dst_stride, x16_idx,
346
0
                             y16_idx, vst, pixels_wide, pixels_high);
347
0
}
348
349
static int compute_minmax_8x8(const uint8_t *src_buf, int src_stride,
350
                              const uint8_t *dst_buf, int dst_stride,
351
                              int x16_idx, int y16_idx,
352
#if CONFIG_AV1_HIGHBITDEPTH
353
                              int highbd_flag,
354
#endif
355
0
                              int pixels_wide, int pixels_high) {
356
0
  int minmax_max = 0;
357
0
  int minmax_min = 255;
358
  // Loop over the 4 8x8 subblocks.
359
0
  for (int idx = 0; idx < 4; idx++) {
360
0
    const int x8_idx = x16_idx + GET_BLK_IDX_X(idx, 3);
361
0
    const int y8_idx = y16_idx + GET_BLK_IDX_Y(idx, 3);
362
0
    int min = 0;
363
0
    int max = 0;
364
0
    if (x8_idx < pixels_wide && y8_idx < pixels_high) {
365
0
#if CONFIG_AV1_HIGHBITDEPTH
366
0
      if (highbd_flag & YV12_FLAG_HIGHBITDEPTH) {
367
0
        aom_highbd_minmax_8x8(
368
0
            src_buf + y8_idx * src_stride + x8_idx, src_stride,
369
0
            dst_buf + y8_idx * dst_stride + x8_idx, dst_stride, &min, &max);
370
0
      } else {
371
0
        aom_minmax_8x8(src_buf + y8_idx * src_stride + x8_idx, src_stride,
372
0
                       dst_buf + y8_idx * dst_stride + x8_idx, dst_stride, &min,
373
0
                       &max);
374
0
      }
375
#else
376
      aom_minmax_8x8(src_buf + y8_idx * src_stride + x8_idx, src_stride,
377
                     dst_buf + y8_idx * dst_stride + x8_idx, dst_stride, &min,
378
                     &max);
379
#endif
380
0
      if ((max - min) > minmax_max) minmax_max = (max - min);
381
0
      if ((max - min) < minmax_min) minmax_min = (max - min);
382
0
    }
383
0
  }
384
0
  return (minmax_max - minmax_min);
385
0
}
386
387
// Function to compute average and variance of 4x4 sub-block.
388
// The function can be called only when is_key_frame is true since sum is
389
// computed using source frame only.
390
static inline void fill_variance_4x4avg(const uint8_t *src_buf, int src_stride,
391
                                        int x8_idx, int y8_idx, VP8x8 *vst,
392
#if CONFIG_AV1_HIGHBITDEPTH
393
                                        int highbd_flag,
394
#endif
395
                                        int pixels_wide, int pixels_high,
396
0
                                        int border_offset_4x4) {
397
0
  for (int idx = 0; idx < 4; idx++) {
398
0
    const int x4_idx = x8_idx + GET_BLK_IDX_X(idx, 2);
399
0
    const int y4_idx = y8_idx + GET_BLK_IDX_Y(idx, 2);
400
0
    unsigned int sse = 0;
401
0
    int sum = 0;
402
0
    if (x4_idx < pixels_wide - border_offset_4x4 &&
403
0
        y4_idx < pixels_high - border_offset_4x4) {
404
0
      int src_avg;
405
0
      int dst_avg = 128;
406
0
#if CONFIG_AV1_HIGHBITDEPTH
407
0
      if (highbd_flag & YV12_FLAG_HIGHBITDEPTH) {
408
0
        src_avg = aom_highbd_avg_4x4(src_buf + y4_idx * src_stride + x4_idx,
409
0
                                     src_stride);
410
0
      } else {
411
0
        src_avg =
412
0
            aom_avg_4x4(src_buf + y4_idx * src_stride + x4_idx, src_stride);
413
0
      }
414
#else
415
      src_avg = aom_avg_4x4(src_buf + y4_idx * src_stride + x4_idx, src_stride);
416
#endif
417
418
0
      sum = src_avg - dst_avg;
419
0
      sse = sum * sum;
420
0
    }
421
0
    fill_variance(sse, sum, 0, &vst->split[idx].part_variances.none);
422
0
  }
423
0
}
424
425
static int64_t scale_part_thresh_content(int64_t threshold_base, int speed,
426
                                         int non_reference_frame,
427
0
                                         int is_static) {
428
0
  int64_t threshold = threshold_base;
429
0
  if (non_reference_frame && !is_static) threshold = (3 * threshold) >> 1;
430
0
  if (speed >= 8) {
431
0
    return (5 * threshold) >> 2;
432
0
  }
433
0
  return threshold;
434
0
}
435
436
// Tune thresholds less or more aggressively to prefer larger partitions
437
static inline void tune_thresh_based_on_qindex(
438
    AV1_COMP *cpi, int64_t thresholds[], uint64_t block_sad, int current_qindex,
439
    int num_pixels, bool is_segment_id_boosted, int source_sad_nonrd,
440
0
    int lighting_change) {
441
0
  double weight;
442
0
  if (cpi->sf.rt_sf.prefer_large_partition_blocks >= 3) {
443
0
    const int win = 20;
444
0
    if (current_qindex < QINDEX_LARGE_BLOCK_THR - win)
445
0
      weight = 1.0;
446
0
    else if (current_qindex > QINDEX_LARGE_BLOCK_THR + win)
447
0
      weight = 0.0;
448
0
    else
449
0
      weight =
450
0
          1.0 - (current_qindex - QINDEX_LARGE_BLOCK_THR + win) / (2 * win);
451
0
    if (num_pixels > RESOLUTION_480P) {
452
0
      for (int i = 0; i < 4; i++) {
453
0
        thresholds[i] <<= 1;
454
0
      }
455
0
    }
456
0
    if (num_pixels <= RESOLUTION_288P) {
457
0
      thresholds[3] = INT64_MAX;
458
0
      if (is_segment_id_boosted == false) {
459
0
        thresholds[1] <<= 2;
460
0
        thresholds[2] <<= (source_sad_nonrd <= kLowSad) ? 5 : 4;
461
0
      } else {
462
0
        thresholds[1] <<= 1;
463
0
        thresholds[2] <<= 3;
464
0
      }
465
      // Allow for split to 8x8 for superblocks where part of it has
466
      // moving boundary. So allow for sb with source_sad above threshold,
467
      // and avoid very large source_sad or high source content, to avoid
468
      // too many 8x8 within superblock.
469
0
      uint64_t avg_source_sad_thresh = 25000;
470
0
      uint64_t block_sad_low = 25000;
471
0
      uint64_t block_sad_high = 50000;
472
0
      if (cpi->svc.temporal_layer_id == 0 &&
473
0
          cpi->svc.number_temporal_layers > 1) {
474
        // Increase the sad thresholds for base TL0, as reference/LAST is
475
        // 2/4 frames behind (for 2/3 #TL).
476
0
        avg_source_sad_thresh = 40000;
477
0
        block_sad_high = 70000;
478
0
      }
479
0
      if (is_segment_id_boosted == false &&
480
0
          cpi->rc.avg_source_sad < avg_source_sad_thresh &&
481
0
          block_sad > block_sad_low && block_sad < block_sad_high &&
482
0
          !lighting_change) {
483
0
        thresholds[2] = (3 * thresholds[2]) >> 2;
484
0
        thresholds[3] = thresholds[2] << 3;
485
0
      }
486
      // Condition the increase of partition thresholds on the segment
487
      // and the content. Avoid the increase for superblocks which have
488
      // high source sad, unless the whole frame has very high motion
489
      // (i.e, cpi->rc.avg_source_sad is very large, in which case all blocks
490
      // have high source sad).
491
0
    } else if (num_pixels > RESOLUTION_480P && is_segment_id_boosted == false &&
492
0
               (source_sad_nonrd != kHighSad ||
493
0
                cpi->rc.avg_source_sad > 50000)) {
494
0
      thresholds[0] = (3 * thresholds[0]) >> 1;
495
0
      thresholds[3] = INT64_MAX;
496
0
      if (current_qindex > QINDEX_LARGE_BLOCK_THR) {
497
0
        thresholds[1] =
498
0
            (int)((1 - weight) * (thresholds[1] << 1) + weight * thresholds[1]);
499
0
        thresholds[2] =
500
0
            (int)((1 - weight) * (thresholds[2] << 1) + weight * thresholds[2]);
501
0
      }
502
0
    } else if (current_qindex > QINDEX_LARGE_BLOCK_THR &&
503
0
               is_segment_id_boosted == false &&
504
0
               (source_sad_nonrd != kHighSad ||
505
0
                cpi->rc.avg_source_sad > 50000)) {
506
0
      thresholds[1] =
507
0
          (int)((1 - weight) * (thresholds[1] << 2) + weight * thresholds[1]);
508
0
      thresholds[2] =
509
0
          (int)((1 - weight) * (thresholds[2] << 4) + weight * thresholds[2]);
510
0
      thresholds[3] = INT64_MAX;
511
0
    }
512
0
  } else if (cpi->sf.rt_sf.prefer_large_partition_blocks >= 2) {
513
0
    thresholds[1] <<= (source_sad_nonrd <= kLowSad) ? 2 : 0;
514
0
    thresholds[2] =
515
0
        (source_sad_nonrd <= kLowSad) ? (3 * thresholds[2]) : thresholds[2];
516
0
  } else if (cpi->sf.rt_sf.prefer_large_partition_blocks >= 1) {
517
0
    const int fac = (source_sad_nonrd <= kLowSad) ? 2 : 1;
518
0
    if (current_qindex < QINDEX_LARGE_BLOCK_THR - 45)
519
0
      weight = 1.0;
520
0
    else if (current_qindex > QINDEX_LARGE_BLOCK_THR + 45)
521
0
      weight = 0.0;
522
0
    else
523
0
      weight = 1.0 - (current_qindex - QINDEX_LARGE_BLOCK_THR + 45) / (2 * 45);
524
0
    thresholds[1] =
525
0
        (int)((1 - weight) * (thresholds[1] << 1) + weight * thresholds[1]);
526
0
    thresholds[2] =
527
0
        (int)((1 - weight) * (thresholds[2] << 1) + weight * thresholds[2]);
528
0
    thresholds[3] =
529
0
        (int)((1 - weight) * (thresholds[3] << fac) + weight * thresholds[3]);
530
0
  }
531
0
  if (cpi->sf.part_sf.disable_8x8_part_based_on_qidx && (current_qindex < 128))
532
0
    thresholds[3] = INT64_MAX;
533
0
}
534
535
static void set_vbp_thresholds_key_frame(AV1_COMP *cpi, int64_t thresholds[],
536
                                         int64_t threshold_base,
537
                                         int threshold_left_shift,
538
0
                                         int num_pixels) {
539
0
  if (cpi->sf.rt_sf.force_large_partition_blocks_intra) {
540
0
    const int shift_steps =
541
0
        threshold_left_shift - (cpi->oxcf.mode == ALLINTRA ? 7 : 8);
542
0
    assert(shift_steps >= 0);
543
0
    threshold_base <<= shift_steps;
544
0
  }
545
0
  thresholds[0] = threshold_base;
546
0
  thresholds[1] = threshold_base;
547
0
  if (num_pixels < RESOLUTION_720P) {
548
0
    thresholds[2] = threshold_base / 3;
549
0
    thresholds[3] = threshold_base >> 1;
550
0
  } else {
551
0
    int shift_val = 2;
552
0
    if (cpi->sf.rt_sf.force_large_partition_blocks_intra) {
553
0
      shift_val = (cpi->oxcf.mode == ALLINTRA ? 1 : 0);
554
0
    }
555
556
0
    thresholds[2] = threshold_base >> shift_val;
557
0
    thresholds[3] = threshold_base >> shift_val;
558
0
  }
559
0
  thresholds[4] = threshold_base << 2;
560
0
}
561
562
static inline void tune_thresh_based_on_resolution(
563
    AV1_COMP *cpi, int64_t thresholds[], int64_t threshold_base,
564
0
    int current_qindex, int source_sad_rd, int num_pixels) {
565
0
  if (num_pixels >= RESOLUTION_720P) thresholds[3] = thresholds[3] << 1;
566
0
  if (num_pixels <= RESOLUTION_288P) {
567
0
    const int qindex_thr[5][2] = {
568
0
      { 200, 220 }, { 140, 170 }, { 120, 150 }, { 200, 210 }, { 170, 220 },
569
0
    };
570
0
    int th_idx = 0;
571
0
    if (cpi->sf.rt_sf.var_part_based_on_qidx >= 1)
572
0
      th_idx =
573
0
          (source_sad_rd <= kLowSad) ? cpi->sf.rt_sf.var_part_based_on_qidx : 0;
574
0
    if (cpi->sf.rt_sf.var_part_based_on_qidx >= 3)
575
0
      th_idx = cpi->sf.rt_sf.var_part_based_on_qidx;
576
0
    const int qindex_low_thr = qindex_thr[th_idx][0];
577
0
    const int qindex_high_thr = qindex_thr[th_idx][1];
578
0
    if (current_qindex >= qindex_high_thr) {
579
0
      threshold_base = (5 * threshold_base) >> 1;
580
0
      thresholds[1] = threshold_base >> 3;
581
0
      thresholds[2] = threshold_base << 2;
582
0
      thresholds[3] = threshold_base << 5;
583
0
    } else if (current_qindex < qindex_low_thr) {
584
0
      thresholds[1] = threshold_base >> 3;
585
0
      thresholds[2] = threshold_base >> 1;
586
0
      thresholds[3] = threshold_base << 3;
587
0
    } else {
588
0
      int64_t qi_diff_low = current_qindex - qindex_low_thr;
589
0
      int64_t qi_diff_high = qindex_high_thr - current_qindex;
590
0
      int64_t threshold_diff = qindex_high_thr - qindex_low_thr;
591
0
      int64_t threshold_base_high = (5 * threshold_base) >> 1;
592
593
0
      threshold_diff = threshold_diff > 0 ? threshold_diff : 1;
594
0
      threshold_base =
595
0
          (qi_diff_low * threshold_base_high + qi_diff_high * threshold_base) /
596
0
          threshold_diff;
597
0
      thresholds[1] = threshold_base >> 3;
598
0
      thresholds[2] = ((qi_diff_low * threshold_base) +
599
0
                       qi_diff_high * (threshold_base >> 1)) /
600
0
                      threshold_diff;
601
0
      thresholds[3] = ((qi_diff_low * (threshold_base << 5)) +
602
0
                       qi_diff_high * (threshold_base << 3)) /
603
0
                      threshold_diff;
604
0
    }
605
0
  } else if (num_pixels < RESOLUTION_720P) {
606
0
    thresholds[2] = (5 * threshold_base) >> 2;
607
0
  } else if (num_pixels < RESOLUTION_1080P) {
608
0
    thresholds[2] = threshold_base << 1;
609
0
  } else {
610
    // num_pixels >= RESOLUTION_1080P
611
0
    if (cpi->oxcf.tune_cfg.content == AOM_CONTENT_SCREEN) {
612
0
      if (num_pixels < RESOLUTION_1440P) {
613
0
        thresholds[2] = (5 * threshold_base) >> 1;
614
0
      } else {
615
0
        thresholds[2] = (7 * threshold_base) >> 1;
616
0
      }
617
0
    } else {
618
0
      if (cpi->oxcf.speed > 7) {
619
0
        thresholds[2] = 6 * threshold_base;
620
0
      } else {
621
0
        thresholds[2] = 3 * threshold_base;
622
0
      }
623
0
    }
624
0
  }
625
0
}
626
627
// Increase the base partition threshold, based on content and noise level.
628
static inline int64_t tune_base_thresh_content(AV1_COMP *cpi,
629
                                               int64_t threshold_base,
630
                                               int content_lowsumdiff,
631
                                               int source_sad_nonrd,
632
0
                                               int num_pixels) {
633
0
  AV1_COMMON *const cm = &cpi->common;
634
0
  int64_t updated_thresh_base = threshold_base;
635
0
  if (cpi->noise_estimate.enabled && content_lowsumdiff &&
636
0
      num_pixels > RESOLUTION_480P && cm->current_frame.frame_number > 60) {
637
0
    NOISE_LEVEL noise_level =
638
0
        av1_noise_estimate_extract_level(&cpi->noise_estimate);
639
0
    if (noise_level == kHigh)
640
0
      updated_thresh_base = (5 * updated_thresh_base) >> 1;
641
0
    else if (noise_level == kMedium &&
642
0
             !cpi->sf.rt_sf.prefer_large_partition_blocks)
643
0
      updated_thresh_base = (5 * updated_thresh_base) >> 2;
644
0
  }
645
0
  updated_thresh_base = scale_part_thresh_content(
646
0
      updated_thresh_base, cpi->oxcf.speed,
647
0
      cpi->ppi->rtc_ref.non_reference_frame, cpi->rc.frame_source_sad == 0);
648
0
  if (cpi->oxcf.speed >= 11 && source_sad_nonrd > kLowSad &&
649
0
      cpi->rc.high_motion_content_screen_rtc)
650
0
    updated_thresh_base = updated_thresh_base << 4;
651
0
  return updated_thresh_base;
652
0
}
653
654
static inline void set_vbp_thresholds(AV1_COMP *cpi, int64_t thresholds[],
655
                                      uint64_t blk_sad, int qindex,
656
                                      int content_lowsumdiff,
657
                                      int source_sad_nonrd, int source_sad_rd,
658
                                      bool is_segment_id_boosted,
659
0
                                      int lighting_change) {
660
0
  AV1_COMMON *const cm = &cpi->common;
661
0
  const int is_key_frame = frame_is_intra_only(cm);
662
0
  const int threshold_multiplier = is_key_frame ? 120 : 1;
663
0
  const int ac_q = av1_ac_quant_QTX(qindex, 0, cm->seq_params->bit_depth);
664
0
  int64_t threshold_base = (int64_t)(threshold_multiplier * ac_q);
665
0
  const int current_qindex = cm->quant_params.base_qindex;
666
0
  const int threshold_left_shift = cpi->sf.rt_sf.var_part_split_threshold_shift;
667
0
  const int num_pixels = cm->width * cm->height;
668
669
0
  if (is_key_frame) {
670
0
    set_vbp_thresholds_key_frame(cpi, thresholds, threshold_base,
671
0
                                 threshold_left_shift, num_pixels);
672
0
    return;
673
0
  }
674
675
0
  threshold_base = tune_base_thresh_content(
676
0
      cpi, threshold_base, content_lowsumdiff, source_sad_nonrd, num_pixels);
677
0
  thresholds[0] = threshold_base >> 1;
678
0
  thresholds[1] = threshold_base;
679
0
  thresholds[3] = threshold_base << threshold_left_shift;
680
681
0
  tune_thresh_based_on_resolution(cpi, thresholds, threshold_base,
682
0
                                  current_qindex, source_sad_rd, num_pixels);
683
684
0
  tune_thresh_based_on_qindex(cpi, thresholds, blk_sad, current_qindex,
685
0
                              num_pixels, is_segment_id_boosted,
686
0
                              source_sad_nonrd, lighting_change);
687
0
}
688
689
// Set temporal variance low flag for superblock 64x64.
690
// Only first 25 in the array are used in this case.
691
static inline void set_low_temp_var_flag_64x64(CommonModeInfoParams *mi_params,
692
                                               PartitionSearchInfo *part_info,
693
                                               MACROBLOCKD *xd, VP64x64 *vt,
694
                                               const int64_t thresholds[],
695
0
                                               int mi_col, int mi_row) {
696
0
  if (xd->mi[0]->bsize == BLOCK_64X64) {
697
0
    if ((vt->part_variances).none.variance < (thresholds[0] >> 1))
698
0
      part_info->variance_low[0] = 1;
699
0
  } else if (xd->mi[0]->bsize == BLOCK_64X32) {
700
0
    for (int part_idx = 0; part_idx < 2; part_idx++) {
701
0
      if (vt->part_variances.horz[part_idx].variance < (thresholds[0] >> 2))
702
0
        part_info->variance_low[part_idx + 1] = 1;
703
0
    }
704
0
  } else if (xd->mi[0]->bsize == BLOCK_32X64) {
705
0
    for (int part_idx = 0; part_idx < 2; part_idx++) {
706
0
      if (vt->part_variances.vert[part_idx].variance < (thresholds[0] >> 2))
707
0
        part_info->variance_low[part_idx + 3] = 1;
708
0
    }
709
0
  } else {
710
0
    static const int idx[4][2] = { { 0, 0 }, { 0, 8 }, { 8, 0 }, { 8, 8 } };
711
0
    for (int lvl1_idx = 0; lvl1_idx < 4; lvl1_idx++) {
712
0
      const int idx_str = mi_params->mi_stride * (mi_row + idx[lvl1_idx][0]) +
713
0
                          mi_col + idx[lvl1_idx][1];
714
0
      MB_MODE_INFO **this_mi = mi_params->mi_grid_base + idx_str;
715
716
0
      if (mi_params->mi_cols <= mi_col + idx[lvl1_idx][1] ||
717
0
          mi_params->mi_rows <= mi_row + idx[lvl1_idx][0])
718
0
        continue;
719
720
0
      if (*this_mi == NULL) continue;
721
722
0
      if ((*this_mi)->bsize == BLOCK_32X32) {
723
0
        int64_t threshold_32x32 = (5 * thresholds[1]) >> 3;
724
0
        if (vt->split[lvl1_idx].part_variances.none.variance < threshold_32x32)
725
0
          part_info->variance_low[lvl1_idx + 5] = 1;
726
0
      } else {
727
        // For 32x16 and 16x32 blocks, the flag is set on each 16x16 block
728
        // inside.
729
0
        if ((*this_mi)->bsize == BLOCK_16X16 ||
730
0
            (*this_mi)->bsize == BLOCK_32X16 ||
731
0
            (*this_mi)->bsize == BLOCK_16X32) {
732
0
          for (int lvl2_idx = 0; lvl2_idx < 4; lvl2_idx++) {
733
0
            if (vt->split[lvl1_idx]
734
0
                    .split[lvl2_idx]
735
0
                    .part_variances.none.variance < (thresholds[2] >> 8))
736
0
              part_info->variance_low[(lvl1_idx << 2) + lvl2_idx + 9] = 1;
737
0
          }
738
0
        }
739
0
      }
740
0
    }
741
0
  }
742
0
}
743
744
static inline void set_low_temp_var_flag_128x128(
745
    CommonModeInfoParams *mi_params, PartitionSearchInfo *part_info,
746
    MACROBLOCKD *xd, VP128x128 *vt, const int64_t thresholds[], int mi_col,
747
0
    int mi_row) {
748
0
  if (xd->mi[0]->bsize == BLOCK_128X128) {
749
0
    if (vt->part_variances.none.variance < (thresholds[0] >> 1))
750
0
      part_info->variance_low[0] = 1;
751
0
  } else if (xd->mi[0]->bsize == BLOCK_128X64) {
752
0
    for (int part_idx = 0; part_idx < 2; part_idx++) {
753
0
      if (vt->part_variances.horz[part_idx].variance < (thresholds[0] >> 2))
754
0
        part_info->variance_low[part_idx + 1] = 1;
755
0
    }
756
0
  } else if (xd->mi[0]->bsize == BLOCK_64X128) {
757
0
    for (int part_idx = 0; part_idx < 2; part_idx++) {
758
0
      if (vt->part_variances.vert[part_idx].variance < (thresholds[0] >> 2))
759
0
        part_info->variance_low[part_idx + 3] = 1;
760
0
    }
761
0
  } else {
762
0
    static const int idx64[4][2] = {
763
0
      { 0, 0 }, { 0, 16 }, { 16, 0 }, { 16, 16 }
764
0
    };
765
0
    static const int idx32[4][2] = { { 0, 0 }, { 0, 8 }, { 8, 0 }, { 8, 8 } };
766
0
    for (int lvl1_idx = 0; lvl1_idx < 4; lvl1_idx++) {
767
0
      const int idx_str = mi_params->mi_stride * (mi_row + idx64[lvl1_idx][0]) +
768
0
                          mi_col + idx64[lvl1_idx][1];
769
0
      MB_MODE_INFO **mi_64 = mi_params->mi_grid_base + idx_str;
770
0
      if (*mi_64 == NULL) continue;
771
0
      if (mi_params->mi_cols <= mi_col + idx64[lvl1_idx][1] ||
772
0
          mi_params->mi_rows <= mi_row + idx64[lvl1_idx][0])
773
0
        continue;
774
0
      const int64_t threshold_64x64 = (5 * thresholds[1]) >> 3;
775
0
      if ((*mi_64)->bsize == BLOCK_64X64) {
776
0
        if (vt->split[lvl1_idx].part_variances.none.variance < threshold_64x64)
777
0
          part_info->variance_low[5 + lvl1_idx] = 1;
778
0
      } else if ((*mi_64)->bsize == BLOCK_64X32) {
779
0
        for (int part_idx = 0; part_idx < 2; part_idx++)
780
0
          if (vt->split[lvl1_idx].part_variances.horz[part_idx].variance <
781
0
              (threshold_64x64 >> 1))
782
0
            part_info->variance_low[9 + (lvl1_idx << 1) + part_idx] = 1;
783
0
      } else if ((*mi_64)->bsize == BLOCK_32X64) {
784
0
        for (int part_idx = 0; part_idx < 2; part_idx++)
785
0
          if (vt->split[lvl1_idx].part_variances.vert[part_idx].variance <
786
0
              (threshold_64x64 >> 1))
787
0
            part_info->variance_low[17 + (lvl1_idx << 1) + part_idx] = 1;
788
0
      } else {
789
0
        for (int lvl2_idx = 0; lvl2_idx < 4; lvl2_idx++) {
790
0
          const int idx_str1 =
791
0
              mi_params->mi_stride * idx32[lvl2_idx][0] + idx32[lvl2_idx][1];
792
0
          MB_MODE_INFO **mi_32 = mi_params->mi_grid_base + idx_str + idx_str1;
793
0
          if (*mi_32 == NULL) continue;
794
795
0
          if (mi_params->mi_cols <=
796
0
                  mi_col + idx64[lvl1_idx][1] + idx32[lvl2_idx][1] ||
797
0
              mi_params->mi_rows <=
798
0
                  mi_row + idx64[lvl1_idx][0] + idx32[lvl2_idx][0])
799
0
            continue;
800
0
          const int64_t threshold_32x32 = (5 * thresholds[2]) >> 3;
801
0
          if ((*mi_32)->bsize == BLOCK_32X32) {
802
0
            if (vt->split[lvl1_idx]
803
0
                    .split[lvl2_idx]
804
0
                    .part_variances.none.variance < threshold_32x32)
805
0
              part_info->variance_low[25 + (lvl1_idx << 2) + lvl2_idx] = 1;
806
0
          } else {
807
            // For 32x16 and 16x32 blocks, the flag is set on each 16x16 block
808
            // inside.
809
0
            if ((*mi_32)->bsize == BLOCK_16X16 ||
810
0
                (*mi_32)->bsize == BLOCK_32X16 ||
811
0
                (*mi_32)->bsize == BLOCK_16X32) {
812
0
              for (int lvl3_idx = 0; lvl3_idx < 4; lvl3_idx++) {
813
0
                VPartVar *none_var = &vt->split[lvl1_idx]
814
0
                                          .split[lvl2_idx]
815
0
                                          .split[lvl3_idx]
816
0
                                          .part_variances.none;
817
0
                if (none_var->variance < (thresholds[3] >> 8))
818
0
                  part_info->variance_low[41 + (lvl1_idx << 4) +
819
0
                                          (lvl2_idx << 2) + lvl3_idx] = 1;
820
0
              }
821
0
            }
822
0
          }
823
0
        }
824
0
      }
825
0
    }
826
0
  }
827
0
}
828
829
static inline void set_low_temp_var_flag(
830
    AV1_COMP *cpi, PartitionSearchInfo *part_info, MACROBLOCKD *xd,
831
    VP128x128 *vt, int64_t thresholds[], MV_REFERENCE_FRAME ref_frame_partition,
832
0
    int mi_col, int mi_row, const bool is_small_sb) {
833
0
  AV1_COMMON *const cm = &cpi->common;
834
  // Check temporal variance for bsize >= 16x16, if LAST_FRAME was selected.
835
  // If the temporal variance is small set the flag
836
  // variance_low for the block. The variance threshold can be adjusted, the
837
  // higher the more aggressive.
838
0
  if (ref_frame_partition == LAST_FRAME) {
839
0
    if (is_small_sb)
840
0
      set_low_temp_var_flag_64x64(&cm->mi_params, part_info, xd,
841
0
                                  &(vt->split[0]), thresholds, mi_col, mi_row);
842
0
    else
843
0
      set_low_temp_var_flag_128x128(&cm->mi_params, part_info, xd, vt,
844
0
                                    thresholds, mi_col, mi_row);
845
0
  }
846
0
}
847
848
static const int pos_shift_16x16[4][4] = {
849
  { 9, 10, 13, 14 }, { 11, 12, 15, 16 }, { 17, 18, 21, 22 }, { 19, 20, 23, 24 }
850
};
851
852
int av1_get_force_skip_low_temp_var_small_sb(const uint8_t *variance_low,
853
                                             int mi_row, int mi_col,
854
0
                                             BLOCK_SIZE bsize) {
855
  // Relative indices of MB inside the superblock.
856
0
  const int mi_x = mi_row & 0xF;
857
0
  const int mi_y = mi_col & 0xF;
858
  // Relative indices of 16x16 block inside the superblock.
859
0
  const int i = mi_x >> 2;
860
0
  const int j = mi_y >> 2;
861
0
  int force_skip_low_temp_var = 0;
862
  // Set force_skip_low_temp_var based on the block size and block offset.
863
0
  switch (bsize) {
864
0
    case BLOCK_64X64: force_skip_low_temp_var = variance_low[0]; break;
865
0
    case BLOCK_64X32:
866
0
      if (!mi_y && !mi_x) {
867
0
        force_skip_low_temp_var = variance_low[1];
868
0
      } else if (!mi_y && mi_x) {
869
0
        force_skip_low_temp_var = variance_low[2];
870
0
      }
871
0
      break;
872
0
    case BLOCK_32X64:
873
0
      if (!mi_y && !mi_x) {
874
0
        force_skip_low_temp_var = variance_low[3];
875
0
      } else if (mi_y && !mi_x) {
876
0
        force_skip_low_temp_var = variance_low[4];
877
0
      }
878
0
      break;
879
0
    case BLOCK_32X32:
880
0
      if (!mi_y && !mi_x) {
881
0
        force_skip_low_temp_var = variance_low[5];
882
0
      } else if (mi_y && !mi_x) {
883
0
        force_skip_low_temp_var = variance_low[6];
884
0
      } else if (!mi_y && mi_x) {
885
0
        force_skip_low_temp_var = variance_low[7];
886
0
      } else if (mi_y && mi_x) {
887
0
        force_skip_low_temp_var = variance_low[8];
888
0
      }
889
0
      break;
890
0
    case BLOCK_32X16:
891
0
    case BLOCK_16X32:
892
0
    case BLOCK_16X16:
893
0
      force_skip_low_temp_var = variance_low[pos_shift_16x16[i][j]];
894
0
      break;
895
0
    default: break;
896
0
  }
897
898
0
  return force_skip_low_temp_var;
899
0
}
900
901
int av1_get_force_skip_low_temp_var(const uint8_t *variance_low, int mi_row,
902
0
                                    int mi_col, BLOCK_SIZE bsize) {
903
0
  int force_skip_low_temp_var = 0;
904
0
  int x, y;
905
0
  x = (mi_col & 0x1F) >> 4;
906
  // y = (mi_row & 0x1F) >> 4;
907
  // const int idx64 = (y << 1) + x;
908
0
  y = (mi_row & 0x17) >> 3;
909
0
  const int idx64 = y + x;
910
911
0
  x = (mi_col & 0xF) >> 3;
912
  // y = (mi_row & 0xF) >> 3;
913
  // const int idx32 = (y << 1) + x;
914
0
  y = (mi_row & 0xB) >> 2;
915
0
  const int idx32 = y + x;
916
917
0
  x = (mi_col & 0x7) >> 2;
918
  // y = (mi_row & 0x7) >> 2;
919
  // const int idx16 = (y << 1) + x;
920
0
  y = (mi_row & 0x5) >> 1;
921
0
  const int idx16 = y + x;
922
  // Set force_skip_low_temp_var based on the block size and block offset.
923
0
  switch (bsize) {
924
0
    case BLOCK_128X128: force_skip_low_temp_var = variance_low[0]; break;
925
0
    case BLOCK_128X64:
926
0
      assert((mi_col & 0x1F) == 0);
927
0
      force_skip_low_temp_var = variance_low[1 + ((mi_row & 0x1F) != 0)];
928
0
      break;
929
0
    case BLOCK_64X128:
930
0
      assert((mi_row & 0x1F) == 0);
931
0
      force_skip_low_temp_var = variance_low[3 + ((mi_col & 0x1F) != 0)];
932
0
      break;
933
0
    case BLOCK_64X64:
934
      // Location of this 64x64 block inside the 128x128 superblock
935
0
      force_skip_low_temp_var = variance_low[5 + idx64];
936
0
      break;
937
0
    case BLOCK_64X32:
938
0
      x = (mi_col & 0x1F) >> 4;
939
0
      y = (mi_row & 0x1F) >> 3;
940
      /*
941
      .---------------.---------------.
942
      | x=0,y=0,idx=0 | x=0,y=0,idx=2 |
943
      :---------------+---------------:
944
      | x=0,y=1,idx=1 | x=1,y=1,idx=3 |
945
      :---------------+---------------:
946
      | x=0,y=2,idx=4 | x=1,y=2,idx=6 |
947
      :---------------+---------------:
948
      | x=0,y=3,idx=5 | x=1,y=3,idx=7 |
949
      '---------------'---------------'
950
      */
951
0
      const int idx64x32 = (x << 1) + (y % 2) + ((y >> 1) << 2);
952
0
      force_skip_low_temp_var = variance_low[9 + idx64x32];
953
0
      break;
954
0
    case BLOCK_32X64:
955
0
      x = (mi_col & 0x1F) >> 3;
956
0
      y = (mi_row & 0x1F) >> 4;
957
0
      const int idx32x64 = (y << 2) + x;
958
0
      force_skip_low_temp_var = variance_low[17 + idx32x64];
959
0
      break;
960
0
    case BLOCK_32X32:
961
0
      force_skip_low_temp_var = variance_low[25 + (idx64 << 2) + idx32];
962
0
      break;
963
0
    case BLOCK_32X16:
964
0
    case BLOCK_16X32:
965
0
    case BLOCK_16X16:
966
0
      force_skip_low_temp_var =
967
0
          variance_low[41 + (idx64 << 4) + (idx32 << 2) + idx16];
968
0
      break;
969
0
    default: break;
970
0
  }
971
0
  return force_skip_low_temp_var;
972
0
}
973
974
void av1_set_variance_partition_thresholds(AV1_COMP *cpi, int qindex,
975
0
                                           int content_lowsumdiff) {
976
0
  SPEED_FEATURES *const sf = &cpi->sf;
977
0
  if (sf->part_sf.partition_search_type != VAR_BASED_PARTITION) {
978
0
    return;
979
0
  } else {
980
0
    set_vbp_thresholds(cpi, cpi->vbp_info.thresholds, 0, qindex,
981
0
                       content_lowsumdiff, 0, 0, 0, 0);
982
    // The threshold below is not changed locally.
983
0
    cpi->vbp_info.threshold_minmax = 15 + (qindex >> 3);
984
0
  }
985
0
}
986
987
static inline void chroma_check(AV1_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bsize,
988
                                unsigned int y_sad, unsigned int y_sad_g,
989
                                unsigned int y_sad_alt, bool is_key_frame,
990
0
                                bool zero_motion, unsigned int *uv_sad) {
991
0
  MACROBLOCKD *xd = &x->e_mbd;
992
0
  const int source_sad_nonrd = x->content_state_sb.source_sad_nonrd;
993
0
  int shift_upper_limit = 1;
994
0
  int shift_lower_limit = 3;
995
0
  int fac_uv = 6;
996
0
  if (is_key_frame || cpi->oxcf.tool_cfg.enable_monochrome) return;
997
998
  // Use lower threshold (more conservative in setting color flag) for
999
  // higher resolutions non-screen, which tend to have more camera noise.
1000
  // Since this may be used to skip compound mode in nonrd pickmode, which
1001
  // is generally more effective for higher resolutions, better to be more
1002
  // conservative.
1003
0
  if (cpi->oxcf.tune_cfg.content != AOM_CONTENT_SCREEN) {
1004
0
    if (cpi->common.width * cpi->common.height >= RESOLUTION_1080P)
1005
0
      fac_uv = 3;
1006
0
    else
1007
0
      fac_uv = 5;
1008
0
  }
1009
0
  if (cpi->oxcf.tune_cfg.content == AOM_CONTENT_SCREEN &&
1010
0
      cpi->rc.high_source_sad) {
1011
0
    shift_lower_limit = 7;
1012
0
  } else if (cpi->oxcf.tune_cfg.content == AOM_CONTENT_SCREEN &&
1013
0
             cpi->rc.percent_blocks_with_motion > 90 &&
1014
0
             cpi->rc.frame_source_sad > 10000 && source_sad_nonrd > kLowSad) {
1015
0
    shift_lower_limit = 8;
1016
0
    shift_upper_limit = 3;
1017
0
  } else if (source_sad_nonrd >= kMedSad && x->source_variance > 500 &&
1018
0
             cpi->common.width * cpi->common.height >= 640 * 360) {
1019
0
    shift_upper_limit = 2;
1020
0
    shift_lower_limit = source_sad_nonrd > kMedSad ? 5 : 4;
1021
0
  }
1022
1023
0
  MB_MODE_INFO *mi = xd->mi[0];
1024
0
  const AV1_COMMON *const cm = &cpi->common;
1025
0
  const YV12_BUFFER_CONFIG *yv12 = get_ref_frame_yv12_buf(cm, LAST_FRAME);
1026
0
  const YV12_BUFFER_CONFIG *yv12_g = get_ref_frame_yv12_buf(cm, GOLDEN_FRAME);
1027
0
  const YV12_BUFFER_CONFIG *yv12_alt = get_ref_frame_yv12_buf(cm, ALTREF_FRAME);
1028
0
  const struct scale_factors *const sf =
1029
0
      get_ref_scale_factors_const(cm, LAST_FRAME);
1030
0
  struct buf_2d dst;
1031
0
  unsigned int uv_sad_g = 0;
1032
0
  unsigned int uv_sad_alt = 0;
1033
1034
0
  for (int plane = AOM_PLANE_U; plane < MAX_MB_PLANE; ++plane) {
1035
0
    struct macroblock_plane *p = &x->plane[plane];
1036
0
    struct macroblockd_plane *pd = &xd->plane[plane];
1037
0
    const BLOCK_SIZE bs =
1038
0
        get_plane_block_size(bsize, pd->subsampling_x, pd->subsampling_y);
1039
1040
0
    if (bs != BLOCK_INVALID) {
1041
      // For last:
1042
0
      if (zero_motion) {
1043
0
        if (mi->ref_frame[0] == LAST_FRAME) {
1044
0
          uv_sad[plane - 1] = cpi->ppi->fn_ptr[bs].sdf(
1045
0
              p->src.buf, p->src.stride, pd->pre[0].buf, pd->pre[0].stride);
1046
0
        } else {
1047
0
          uint8_t *src = (plane == 1) ? yv12->u_buffer : yv12->v_buffer;
1048
0
          setup_pred_plane(&dst, xd->mi[0]->bsize, src, yv12->uv_crop_width,
1049
0
                           yv12->uv_crop_height, yv12->uv_stride, xd->mi_row,
1050
0
                           xd->mi_col, sf, xd->plane[plane].subsampling_x,
1051
0
                           xd->plane[plane].subsampling_y);
1052
1053
0
          uv_sad[plane - 1] = cpi->ppi->fn_ptr[bs].sdf(
1054
0
              p->src.buf, p->src.stride, dst.buf, dst.stride);
1055
0
        }
1056
0
      } else {
1057
0
        uv_sad[plane - 1] = cpi->ppi->fn_ptr[bs].sdf(
1058
0
            p->src.buf, p->src.stride, pd->dst.buf, pd->dst.stride);
1059
0
      }
1060
1061
      // For golden:
1062
0
      if (y_sad_g != UINT_MAX) {
1063
0
        uint8_t *src = (plane == 1) ? yv12_g->u_buffer : yv12_g->v_buffer;
1064
0
        setup_pred_plane(&dst, xd->mi[0]->bsize, src, yv12_g->uv_crop_width,
1065
0
                         yv12_g->uv_crop_height, yv12_g->uv_stride, xd->mi_row,
1066
0
                         xd->mi_col, sf, xd->plane[plane].subsampling_x,
1067
0
                         xd->plane[plane].subsampling_y);
1068
0
        uv_sad_g = cpi->ppi->fn_ptr[bs].sdf(p->src.buf, p->src.stride, dst.buf,
1069
0
                                            dst.stride);
1070
0
      }
1071
1072
      // For altref:
1073
0
      if (y_sad_alt != UINT_MAX) {
1074
0
        uint8_t *src = (plane == 1) ? yv12_alt->u_buffer : yv12_alt->v_buffer;
1075
0
        setup_pred_plane(&dst, xd->mi[0]->bsize, src, yv12_alt->uv_crop_width,
1076
0
                         yv12_alt->uv_crop_height, yv12_alt->uv_stride,
1077
0
                         xd->mi_row, xd->mi_col, sf,
1078
0
                         xd->plane[plane].subsampling_x,
1079
0
                         xd->plane[plane].subsampling_y);
1080
0
        uv_sad_alt = cpi->ppi->fn_ptr[bs].sdf(p->src.buf, p->src.stride,
1081
0
                                              dst.buf, dst.stride);
1082
0
      }
1083
0
    }
1084
1085
0
    if (uv_sad[plane - 1] > (y_sad >> shift_upper_limit))
1086
0
      x->color_sensitivity_sb[COLOR_SENS_IDX(plane)] = 1;
1087
0
    else if (uv_sad[plane - 1] < (y_sad >> shift_lower_limit))
1088
0
      x->color_sensitivity_sb[COLOR_SENS_IDX(plane)] = 0;
1089
    // Borderline case: to be refined at coding block level in nonrd_pickmode,
1090
    // for coding block size < sb_size.
1091
0
    else
1092
0
      x->color_sensitivity_sb[COLOR_SENS_IDX(plane)] = 2;
1093
1094
0
    x->color_sensitivity_sb_g[COLOR_SENS_IDX(plane)] =
1095
0
        uv_sad_g > y_sad_g / fac_uv;
1096
0
    x->color_sensitivity_sb_alt[COLOR_SENS_IDX(plane)] =
1097
0
        uv_sad_alt > y_sad_alt / fac_uv;
1098
0
  }
1099
0
}
1100
1101
static void fill_variance_tree_leaves(
1102
    AV1_COMP *cpi, MACROBLOCK *x, VP128x128 *vt, PART_EVAL_STATUS *force_split,
1103
    int avg_16x16[][4], int maxvar_16x16[][4], int minvar_16x16[][4],
1104
    int64_t *thresholds, const uint8_t *src_buf, int src_stride,
1105
    const uint8_t *dst_buf, int dst_stride, bool is_key_frame,
1106
0
    const bool is_small_sb) {
1107
0
  MACROBLOCKD *xd = &x->e_mbd;
1108
0
  const int num_64x64_blocks = is_small_sb ? 1 : 4;
1109
  // TODO(kyslov) Bring back compute_minmax_variance with content type detection
1110
0
  const int compute_minmax_variance = 0;
1111
0
  const int segment_id = xd->mi[0]->segment_id;
1112
0
  int pixels_wide = 128, pixels_high = 128;
1113
0
  int border_offset_4x4 = 0;
1114
0
  int temporal_denoising = cpi->sf.rt_sf.use_rtc_tf;
1115
  // dst_buf pointer is not used for is_key_frame, so it should be NULL.
1116
0
  assert(IMPLIES(is_key_frame, dst_buf == NULL));
1117
0
  if (is_small_sb) {
1118
0
    pixels_wide = 64;
1119
0
    pixels_high = 64;
1120
0
  }
1121
0
  if (xd->mb_to_right_edge < 0) pixels_wide += (xd->mb_to_right_edge >> 3);
1122
0
  if (xd->mb_to_bottom_edge < 0) pixels_high += (xd->mb_to_bottom_edge >> 3);
1123
#if CONFIG_AV1_TEMPORAL_DENOISING
1124
  temporal_denoising |= cpi->oxcf.noise_sensitivity;
1125
#endif
1126
  // For temporal filtering or temporal denoiser enabled: since the source
1127
  // is modified we need to avoid 4x4 avg along superblock boundary, since
1128
  // simd code will load 8 pixels for 4x4 avg and so can access source
1129
  // data outside superblock (while its being modified by temporal filter).
1130
  // Temporal filtering is never done on key frames.
1131
0
  if (!is_key_frame && temporal_denoising) border_offset_4x4 = 4;
1132
0
  for (int blk64_idx = 0; blk64_idx < num_64x64_blocks; blk64_idx++) {
1133
0
    const int x64_idx = GET_BLK_IDX_X(blk64_idx, 6);
1134
0
    const int y64_idx = GET_BLK_IDX_Y(blk64_idx, 6);
1135
0
    const int blk64_scale_idx = blk64_idx << 2;
1136
0
    force_split[blk64_idx + 1] = PART_EVAL_ALL;
1137
1138
0
    for (int lvl1_idx = 0; lvl1_idx < 4; lvl1_idx++) {
1139
0
      const int x32_idx = x64_idx + GET_BLK_IDX_X(lvl1_idx, 5);
1140
0
      const int y32_idx = y64_idx + GET_BLK_IDX_Y(lvl1_idx, 5);
1141
0
      const int lvl1_scale_idx = (blk64_scale_idx + lvl1_idx) << 2;
1142
0
      force_split[5 + blk64_scale_idx + lvl1_idx] = PART_EVAL_ALL;
1143
0
      avg_16x16[blk64_idx][lvl1_idx] = 0;
1144
0
      maxvar_16x16[blk64_idx][lvl1_idx] = 0;
1145
0
      minvar_16x16[blk64_idx][lvl1_idx] = INT_MAX;
1146
0
      for (int lvl2_idx = 0; lvl2_idx < 4; lvl2_idx++) {
1147
0
        const int x16_idx = x32_idx + GET_BLK_IDX_X(lvl2_idx, 4);
1148
0
        const int y16_idx = y32_idx + GET_BLK_IDX_Y(lvl2_idx, 4);
1149
0
        const int split_index = 21 + lvl1_scale_idx + lvl2_idx;
1150
0
        VP16x16 *vst = &vt->split[blk64_idx].split[lvl1_idx].split[lvl2_idx];
1151
0
        force_split[split_index] = PART_EVAL_ALL;
1152
0
        if (is_key_frame) {
1153
          // Go down to 4x4 down-sampling for variance.
1154
0
          for (int lvl3_idx = 0; lvl3_idx < 4; lvl3_idx++) {
1155
0
            const int x8_idx = x16_idx + GET_BLK_IDX_X(lvl3_idx, 3);
1156
0
            const int y8_idx = y16_idx + GET_BLK_IDX_Y(lvl3_idx, 3);
1157
0
            VP8x8 *vst2 = &vst->split[lvl3_idx];
1158
0
            fill_variance_4x4avg(src_buf, src_stride, x8_idx, y8_idx, vst2,
1159
0
#if CONFIG_AV1_HIGHBITDEPTH
1160
0
                                 xd->cur_buf->flags,
1161
0
#endif
1162
0
                                 pixels_wide, pixels_high, border_offset_4x4);
1163
0
          }
1164
0
        } else {
1165
0
          fill_variance_8x8avg(src_buf, src_stride, dst_buf, dst_stride,
1166
0
                               x16_idx, y16_idx, vst, is_cur_buf_hbd(xd),
1167
0
                               pixels_wide, pixels_high);
1168
1169
0
          fill_variance_tree(vst, BLOCK_16X16);
1170
0
          VPartVar *none_var = &vt->split[blk64_idx]
1171
0
                                    .split[lvl1_idx]
1172
0
                                    .split[lvl2_idx]
1173
0
                                    .part_variances.none;
1174
0
          get_variance(none_var);
1175
0
          const int val_none_var = none_var->variance;
1176
0
          avg_16x16[blk64_idx][lvl1_idx] += val_none_var;
1177
0
          minvar_16x16[blk64_idx][lvl1_idx] =
1178
0
              AOMMIN(minvar_16x16[blk64_idx][lvl1_idx], val_none_var);
1179
0
          maxvar_16x16[blk64_idx][lvl1_idx] =
1180
0
              AOMMAX(maxvar_16x16[blk64_idx][lvl1_idx], val_none_var);
1181
0
          if (val_none_var > thresholds[3]) {
1182
            // 16X16 variance is above threshold for split, so force split to
1183
            // 8x8 for this 16x16 block (this also forces splits for upper
1184
            // levels).
1185
0
            force_split[split_index] = PART_EVAL_ONLY_SPLIT;
1186
0
            force_split[5 + blk64_scale_idx + lvl1_idx] = PART_EVAL_ONLY_SPLIT;
1187
0
            force_split[blk64_idx + 1] = PART_EVAL_ONLY_SPLIT;
1188
0
            force_split[0] = PART_EVAL_ONLY_SPLIT;
1189
0
          } else if (!cyclic_refresh_segment_id_boosted(segment_id) &&
1190
0
                     compute_minmax_variance && val_none_var > thresholds[2]) {
1191
            // We have some nominal amount of 16x16 variance (based on average),
1192
            // compute the minmax over the 8x8 sub-blocks, and if above
1193
            // threshold, force split to 8x8 block for this 16x16 block.
1194
0
            int minmax = compute_minmax_8x8(src_buf, src_stride, dst_buf,
1195
0
                                            dst_stride, x16_idx, y16_idx,
1196
0
#if CONFIG_AV1_HIGHBITDEPTH
1197
0
                                            xd->cur_buf->flags,
1198
0
#endif
1199
0
                                            pixels_wide, pixels_high);
1200
0
            const int thresh_minmax = (int)cpi->vbp_info.threshold_minmax;
1201
0
            if (minmax > thresh_minmax) {
1202
0
              force_split[split_index] = PART_EVAL_ONLY_SPLIT;
1203
0
              force_split[5 + blk64_scale_idx + lvl1_idx] =
1204
0
                  PART_EVAL_ONLY_SPLIT;
1205
0
              force_split[blk64_idx + 1] = PART_EVAL_ONLY_SPLIT;
1206
0
              force_split[0] = PART_EVAL_ONLY_SPLIT;
1207
0
            }
1208
0
          }
1209
0
        }
1210
0
      }
1211
0
    }
1212
0
  }
1213
0
}
1214
1215
static inline void set_ref_frame_for_partition(
1216
    AV1_COMP *cpi, MACROBLOCK *x, MACROBLOCKD *xd,
1217
    MV_REFERENCE_FRAME *ref_frame_partition, MB_MODE_INFO *mi,
1218
    unsigned int *y_sad, unsigned int *y_sad_g, unsigned int *y_sad_alt,
1219
    const YV12_BUFFER_CONFIG *yv12_g, const YV12_BUFFER_CONFIG *yv12_alt,
1220
0
    int mi_row, int mi_col, int num_planes) {
1221
0
  AV1_COMMON *const cm = &cpi->common;
1222
0
  const double fac =
1223
0
      (cpi->svc.spatial_layer_id > 0 && cpi->svc.has_lower_quality_layer) ? 1.0
1224
0
                                                                          : 0.9;
1225
0
  const bool is_set_golden_ref_frame =
1226
0
      *y_sad_g < fac * *y_sad && *y_sad_g < *y_sad_alt;
1227
0
  const bool is_set_altref_ref_frame =
1228
0
      *y_sad_alt < fac * *y_sad && *y_sad_alt < *y_sad_g;
1229
1230
0
  if (is_set_golden_ref_frame) {
1231
0
    av1_setup_pre_planes(xd, 0, yv12_g, mi_row, mi_col,
1232
0
                         get_ref_scale_factors(cm, GOLDEN_FRAME), num_planes);
1233
0
    mi->ref_frame[0] = GOLDEN_FRAME;
1234
0
    mi->mv[0].as_int = 0;
1235
0
    *y_sad = *y_sad_g;
1236
0
    *ref_frame_partition = GOLDEN_FRAME;
1237
0
    x->nonrd_prune_ref_frame_search = 0;
1238
0
    x->sb_me_partition = 0;
1239
0
  } else if (is_set_altref_ref_frame) {
1240
0
    av1_setup_pre_planes(xd, 0, yv12_alt, mi_row, mi_col,
1241
0
                         get_ref_scale_factors(cm, ALTREF_FRAME), num_planes);
1242
0
    mi->ref_frame[0] = ALTREF_FRAME;
1243
0
    mi->mv[0].as_int = 0;
1244
0
    *y_sad = *y_sad_alt;
1245
0
    *ref_frame_partition = ALTREF_FRAME;
1246
0
    x->nonrd_prune_ref_frame_search = 0;
1247
0
    x->sb_me_partition = 0;
1248
0
  } else {
1249
0
    *ref_frame_partition = LAST_FRAME;
1250
0
    x->nonrd_prune_ref_frame_search =
1251
0
        cpi->sf.rt_sf.nonrd_prune_ref_frame_search;
1252
0
  }
1253
0
}
1254
1255
static AOM_FORCE_INLINE int mv_distance(const FULLPEL_MV *mv0,
1256
0
                                        const FULLPEL_MV *mv1) {
1257
0
  return abs(mv0->row - mv1->row) + abs(mv0->col - mv1->col);
1258
0
}
1259
1260
static inline void evaluate_neighbour_mvs(AV1_COMP *cpi, MACROBLOCK *x,
1261
                                          unsigned int *y_sad, bool is_small_sb,
1262
0
                                          int est_motion) {
1263
0
  const int source_sad_nonrd = x->content_state_sb.source_sad_nonrd;
1264
  // TODO(yunqingwang@google.com): test if this condition works with other
1265
  // speeds.
1266
0
  if (est_motion > 2 && source_sad_nonrd > kMedSad) return;
1267
1268
0
  MACROBLOCKD *xd = &x->e_mbd;
1269
0
  BLOCK_SIZE bsize = is_small_sb ? BLOCK_64X64 : BLOCK_128X128;
1270
0
  MB_MODE_INFO *mi = xd->mi[0];
1271
1272
0
  unsigned int above_y_sad = UINT_MAX;
1273
0
  unsigned int left_y_sad = UINT_MAX;
1274
0
  FULLPEL_MV above_mv = kZeroFullMv;
1275
0
  FULLPEL_MV left_mv = kZeroFullMv;
1276
0
  SubpelMvLimits subpel_mv_limits;
1277
0
  const MV dummy_mv = { 0, 0 };
1278
0
  av1_set_subpel_mv_search_range(&subpel_mv_limits, &x->mv_limits, &dummy_mv);
1279
1280
  // Current best MV
1281
0
  FULLPEL_MV best_mv = get_fullmv_from_mv(&mi->mv[0].as_mv);
1282
0
  const int multi = (est_motion > 2 && source_sad_nonrd > kLowSad) ? 7 : 8;
1283
1284
0
  if (xd->up_available) {
1285
0
    const MB_MODE_INFO *above_mbmi = xd->above_mbmi;
1286
0
    if (above_mbmi->mode >= INTRA_MODE_END &&
1287
0
        above_mbmi->ref_frame[0] == LAST_FRAME) {
1288
0
      MV temp = above_mbmi->mv[0].as_mv;
1289
0
      clamp_mv(&temp, &subpel_mv_limits);
1290
0
      above_mv = get_fullmv_from_mv(&temp);
1291
1292
0
      if (mv_distance(&best_mv, &above_mv) > 0) {
1293
0
        uint8_t const *ref_buf =
1294
0
            get_buf_from_fullmv(&xd->plane[0].pre[0], &above_mv);
1295
0
        above_y_sad = cpi->ppi->fn_ptr[bsize].sdf(
1296
0
            x->plane[0].src.buf, x->plane[0].src.stride, ref_buf,
1297
0
            xd->plane[0].pre[0].stride);
1298
0
      }
1299
0
    }
1300
0
  }
1301
0
  if (xd->left_available) {
1302
0
    const MB_MODE_INFO *left_mbmi = xd->left_mbmi;
1303
0
    if (left_mbmi->mode >= INTRA_MODE_END &&
1304
0
        left_mbmi->ref_frame[0] == LAST_FRAME) {
1305
0
      MV temp = left_mbmi->mv[0].as_mv;
1306
0
      clamp_mv(&temp, &subpel_mv_limits);
1307
0
      left_mv = get_fullmv_from_mv(&temp);
1308
1309
0
      if (mv_distance(&best_mv, &left_mv) > 0 &&
1310
0
          mv_distance(&above_mv, &left_mv) > 0) {
1311
0
        uint8_t const *ref_buf =
1312
0
            get_buf_from_fullmv(&xd->plane[0].pre[0], &left_mv);
1313
0
        left_y_sad = cpi->ppi->fn_ptr[bsize].sdf(
1314
0
            x->plane[0].src.buf, x->plane[0].src.stride, ref_buf,
1315
0
            xd->plane[0].pre[0].stride);
1316
0
      }
1317
0
    }
1318
0
  }
1319
1320
0
  if (above_y_sad < ((multi * *y_sad) >> 3) && above_y_sad < left_y_sad) {
1321
0
    *y_sad = above_y_sad;
1322
0
    mi->mv[0].as_mv = get_mv_from_fullmv(&above_mv);
1323
0
    clamp_mv(&mi->mv[0].as_mv, &subpel_mv_limits);
1324
0
  }
1325
0
  if (left_y_sad < ((multi * *y_sad) >> 3) && left_y_sad < above_y_sad) {
1326
0
    *y_sad = left_y_sad;
1327
0
    mi->mv[0].as_mv = get_mv_from_fullmv(&left_mv);
1328
0
    clamp_mv(&mi->mv[0].as_mv, &subpel_mv_limits);
1329
0
  }
1330
0
}
1331
1332
static void do_int_pro_motion_estimation(AV1_COMP *cpi, MACROBLOCK *x,
1333
                                         unsigned int *y_sad, int mi_row,
1334
0
                                         int mi_col, int source_sad_nonrd) {
1335
0
  AV1_COMMON *const cm = &cpi->common;
1336
0
  MACROBLOCKD *xd = &x->e_mbd;
1337
0
  MB_MODE_INFO *mi = xd->mi[0];
1338
0
  const int is_screen = cpi->oxcf.tune_cfg.content == AOM_CONTENT_SCREEN;
1339
0
  const int increase_col_sw = source_sad_nonrd > kMedSad &&
1340
0
                              !cpi->rc.high_motion_content_screen_rtc &&
1341
0
                              (cpi->svc.temporal_layer_id == 0 ||
1342
0
                               cpi->rc.num_col_blscroll_last_tl0 > 2);
1343
0
  int me_search_size_col = is_screen
1344
0
                               ? increase_col_sw ? 512 : 96
1345
0
                               : block_size_wide[cm->seq_params->sb_size] >> 1;
1346
  // For screen use larger search size row motion to capture
1347
  // vertical scroll, which can be larger motion.
1348
0
  int me_search_size_row = is_screen
1349
0
                               ? source_sad_nonrd > kMedSad ? 512 : 192
1350
0
                               : block_size_high[cm->seq_params->sb_size] >> 1;
1351
0
  if (cm->width * cm->height >= 3840 * 2160 &&
1352
0
      cpi->svc.temporal_layer_id == 0 && cpi->svc.number_temporal_layers > 1) {
1353
0
    me_search_size_row = me_search_size_row << 1;
1354
0
    me_search_size_col = me_search_size_col << 1;
1355
0
  }
1356
0
  unsigned int y_sad_zero;
1357
0
  *y_sad = av1_int_pro_motion_estimation(
1358
0
      cpi, x, cm->seq_params->sb_size, mi_row, mi_col, &kZeroMv, &y_sad_zero,
1359
0
      me_search_size_col, me_search_size_row);
1360
  // The logic below selects whether the motion estimated in the
1361
  // int_pro_motion() will be used in nonrd_pickmode. Only do this
1362
  // for screen for now.
1363
0
  if (is_screen) {
1364
0
    unsigned int thresh_sad =
1365
0
        (cm->seq_params->sb_size == BLOCK_128X128) ? 50000 : 20000;
1366
0
    if (*y_sad < (y_sad_zero >> 1) && *y_sad < thresh_sad) {
1367
0
      x->sb_me_partition = 1;
1368
0
      x->sb_me_mv.as_int = mi->mv[0].as_int;
1369
0
      if (cpi->svc.temporal_layer_id == 0) {
1370
0
        if (abs(mi->mv[0].as_mv.col) > 16 && abs(mi->mv[0].as_mv.row) == 0)
1371
0
          x->sb_col_scroll = 1;
1372
0
        else if (abs(mi->mv[0].as_mv.row) > 16 && abs(mi->mv[0].as_mv.col) == 0)
1373
0
          x->sb_row_scroll = 1;
1374
0
      }
1375
0
    } else {
1376
0
      x->sb_me_partition = 0;
1377
      // Fall back to using zero motion.
1378
0
      *y_sad = y_sad_zero;
1379
0
      mi->mv[0].as_int = 0;
1380
0
    }
1381
0
  }
1382
0
}
1383
1384
static void setup_planes(AV1_COMP *cpi, MACROBLOCK *x, unsigned int *y_sad,
1385
                         unsigned int *y_sad_g, unsigned int *y_sad_alt,
1386
                         unsigned int *y_sad_last,
1387
                         MV_REFERENCE_FRAME *ref_frame_partition,
1388
                         struct scale_factors *sf_no_scale, int mi_row,
1389
0
                         int mi_col, bool is_small_sb, bool scaled_ref_last) {
1390
0
  AV1_COMMON *const cm = &cpi->common;
1391
0
  MACROBLOCKD *xd = &x->e_mbd;
1392
0
  const int num_planes = av1_num_planes(cm);
1393
0
  bool scaled_ref_golden = false;
1394
0
  bool scaled_ref_alt = false;
1395
0
  BLOCK_SIZE bsize = is_small_sb ? BLOCK_64X64 : BLOCK_128X128;
1396
0
  MB_MODE_INFO *mi = xd->mi[0];
1397
0
  const YV12_BUFFER_CONFIG *yv12 =
1398
0
      scaled_ref_last ? av1_get_scaled_ref_frame(cpi, LAST_FRAME)
1399
0
                      : get_ref_frame_yv12_buf(cm, LAST_FRAME);
1400
0
  assert(yv12 != NULL);
1401
0
  const YV12_BUFFER_CONFIG *yv12_g = NULL;
1402
0
  const YV12_BUFFER_CONFIG *yv12_alt = NULL;
1403
  // Check if LAST is a reference. For spatial layers always use it as
1404
  // reference scaling.
1405
0
  int use_last_ref = (cpi->ref_frame_flags & AOM_LAST_FLAG) ||
1406
0
                     cpi->svc.number_spatial_layers > 1;
1407
0
  int use_golden_ref = cpi->ref_frame_flags & AOM_GOLD_FLAG;
1408
0
  int use_alt_ref = cpi->ppi->rtc_ref.set_ref_frame_config ||
1409
0
                    cpi->sf.rt_sf.use_nonrd_altref_frame ||
1410
0
                    (cpi->sf.rt_sf.use_comp_ref_nonrd &&
1411
0
                     cpi->sf.rt_sf.ref_frame_comp_nonrd[2] == 1);
1412
1413
  // Check if GOLDEN should be used as reference for partitioning.
1414
  // Allow for spatial layers if lower layer has same resolution.
1415
0
  if ((cpi->svc.number_spatial_layers == 1 ||
1416
0
       cpi->svc.has_lower_quality_layer) &&
1417
0
      use_golden_ref &&
1418
0
      (x->content_state_sb.source_sad_nonrd != kZeroSad || !use_last_ref)) {
1419
0
    yv12_g = get_ref_frame_yv12_buf(cm, GOLDEN_FRAME);
1420
0
    if (yv12_g && (yv12_g->y_crop_height != cm->height ||
1421
0
                   yv12_g->y_crop_width != cm->width)) {
1422
0
      yv12_g = av1_get_scaled_ref_frame(cpi, GOLDEN_FRAME);
1423
0
      scaled_ref_golden = true;
1424
0
    }
1425
0
    if (yv12_g && (yv12_g != yv12 || !use_last_ref)) {
1426
0
      av1_setup_pre_planes(
1427
0
          xd, 0, yv12_g, mi_row, mi_col,
1428
0
          scaled_ref_golden ? NULL : get_ref_scale_factors(cm, GOLDEN_FRAME),
1429
0
          num_planes);
1430
0
      *y_sad_g = cpi->ppi->fn_ptr[bsize].sdf(
1431
0
          x->plane[AOM_PLANE_Y].src.buf, x->plane[AOM_PLANE_Y].src.stride,
1432
0
          xd->plane[AOM_PLANE_Y].pre[0].buf,
1433
0
          xd->plane[AOM_PLANE_Y].pre[0].stride);
1434
0
    }
1435
0
  }
1436
1437
  // Check if ALTREF should be used as reference for partitioning.
1438
  // Allow for spatial layers if lower layer has same resolution.
1439
0
  if ((cpi->svc.number_spatial_layers == 1 ||
1440
0
       cpi->svc.has_lower_quality_layer) &&
1441
0
      use_alt_ref && (cpi->ref_frame_flags & AOM_ALT_FLAG) &&
1442
0
      (x->content_state_sb.source_sad_nonrd != kZeroSad || !use_last_ref)) {
1443
0
    yv12_alt = get_ref_frame_yv12_buf(cm, ALTREF_FRAME);
1444
0
    if (yv12_alt && (yv12_alt->y_crop_height != cm->height ||
1445
0
                     yv12_alt->y_crop_width != cm->width)) {
1446
0
      yv12_alt = av1_get_scaled_ref_frame(cpi, ALTREF_FRAME);
1447
0
      scaled_ref_alt = true;
1448
0
    }
1449
0
    if (yv12_alt && (yv12_alt != yv12 || !use_last_ref)) {
1450
0
      av1_setup_pre_planes(
1451
0
          xd, 0, yv12_alt, mi_row, mi_col,
1452
0
          scaled_ref_alt ? NULL : get_ref_scale_factors(cm, ALTREF_FRAME),
1453
0
          num_planes);
1454
0
      *y_sad_alt = cpi->ppi->fn_ptr[bsize].sdf(
1455
0
          x->plane[AOM_PLANE_Y].src.buf, x->plane[AOM_PLANE_Y].src.stride,
1456
0
          xd->plane[AOM_PLANE_Y].pre[0].buf,
1457
0
          xd->plane[AOM_PLANE_Y].pre[0].stride);
1458
0
    }
1459
0
  }
1460
1461
0
  if (use_last_ref) {
1462
0
    const int source_sad_nonrd = x->content_state_sb.source_sad_nonrd;
1463
0
    av1_setup_pre_planes(
1464
0
        xd, 0, yv12, mi_row, mi_col,
1465
0
        scaled_ref_last ? NULL : get_ref_scale_factors(cm, LAST_FRAME),
1466
0
        num_planes);
1467
0
    mi->ref_frame[0] = LAST_FRAME;
1468
0
    mi->ref_frame[1] = NONE_FRAME;
1469
0
    mi->bsize = cm->seq_params->sb_size;
1470
0
    mi->mv[0].as_int = 0;
1471
0
    mi->interp_filters = av1_broadcast_interp_filter(BILINEAR);
1472
1473
0
    int est_motion = cpi->sf.rt_sf.estimate_motion_for_var_based_partition;
1474
    // TODO(b/290596301): Look into adjusting this condition.
1475
    // There is regression on color content when
1476
    // estimate_motion_for_var_based_partition = 3 and high motion,
1477
    // so for now force it to 2 based on superblock sad.
1478
0
    if (est_motion > 2 && source_sad_nonrd > kMedSad) est_motion = 2;
1479
1480
0
    if ((est_motion == 1 || est_motion == 2) && xd->mb_to_right_edge >= 0 &&
1481
0
        xd->mb_to_bottom_edge >= 0 && x->source_variance > 100 &&
1482
0
        source_sad_nonrd > kLowSad) {
1483
0
      do_int_pro_motion_estimation(cpi, x, y_sad, mi_row, mi_col,
1484
0
                                   source_sad_nonrd);
1485
0
    }
1486
1487
0
    if (*y_sad == UINT_MAX) {
1488
0
      *y_sad = cpi->ppi->fn_ptr[bsize].sdf(
1489
0
          x->plane[AOM_PLANE_Y].src.buf, x->plane[AOM_PLANE_Y].src.stride,
1490
0
          xd->plane[AOM_PLANE_Y].pre[0].buf,
1491
0
          xd->plane[AOM_PLANE_Y].pre[0].stride);
1492
0
    }
1493
1494
    // Evaluate if neighbours' MVs give better predictions. Zero MV is tested
1495
    // already, so only non-zero MVs are tested here. Here the neighbour blocks
1496
    // are the first block above or left to this superblock.
1497
0
    if (est_motion >= 2 && (xd->up_available || xd->left_available))
1498
0
      evaluate_neighbour_mvs(cpi, x, y_sad, is_small_sb, est_motion);
1499
1500
0
    *y_sad_last = *y_sad;
1501
0
  }
1502
1503
  // Pick the ref frame for partitioning, use golden or altref frame only if
1504
  // its lower sad, bias to LAST with factor 0.9.
1505
0
  set_ref_frame_for_partition(cpi, x, xd, ref_frame_partition, mi, y_sad,
1506
0
                              y_sad_g, y_sad_alt, yv12_g, yv12_alt, mi_row,
1507
0
                              mi_col, num_planes);
1508
1509
  // Only calculate the predictor for non-zero MV.
1510
0
  if (mi->mv[0].as_int != 0) {
1511
0
    if (!scaled_ref_last) {
1512
0
      set_ref_ptrs(cm, xd, mi->ref_frame[0], mi->ref_frame[1]);
1513
0
    } else {
1514
0
      xd->block_ref_scale_factors[0] = sf_no_scale;
1515
0
      xd->block_ref_scale_factors[1] = sf_no_scale;
1516
0
    }
1517
0
    av1_enc_build_inter_predictor(cm, xd, mi_row, mi_col, NULL,
1518
0
                                  cm->seq_params->sb_size, AOM_PLANE_Y,
1519
0
                                  num_planes - 1);
1520
0
  }
1521
0
}
1522
1523
// Decides whether to split or merge a 16x16 partition block in variance based
1524
// partitioning based on the 8x8 sub-block variances.
1525
static inline PART_EVAL_STATUS get_part_eval_based_on_sub_blk_var(
1526
0
    VP16x16 *var_16x16_info, int64_t threshold16) {
1527
0
  int max_8x8_var = 0, min_8x8_var = INT_MAX;
1528
0
  for (int split_idx = 0; split_idx < 4; split_idx++) {
1529
0
    get_variance(&var_16x16_info->split[split_idx].part_variances.none);
1530
0
    int this_8x8_var =
1531
0
        var_16x16_info->split[split_idx].part_variances.none.variance;
1532
0
    max_8x8_var = AOMMAX(this_8x8_var, max_8x8_var);
1533
0
    min_8x8_var = AOMMIN(this_8x8_var, min_8x8_var);
1534
0
  }
1535
  // If the difference between maximum and minimum sub-block variances is high,
1536
  // then only evaluate PARTITION_SPLIT for the 16x16 block. Otherwise, evaluate
1537
  // only PARTITION_NONE. The shift factor for threshold16 has been derived
1538
  // empirically.
1539
0
  return ((max_8x8_var - min_8x8_var) > (threshold16 << 2))
1540
0
             ? PART_EVAL_ONLY_SPLIT
1541
0
             : PART_EVAL_ONLY_NONE;
1542
0
}
1543
1544
static inline bool is_set_force_zeromv_skip_based_on_src_sad(
1545
0
    int set_zeromv_skip_based_on_source_sad, SOURCE_SAD source_sad_nonrd) {
1546
0
  if (set_zeromv_skip_based_on_source_sad == 0) return false;
1547
1548
0
  if (set_zeromv_skip_based_on_source_sad >= 3)
1549
0
    return source_sad_nonrd <= kLowSad;
1550
0
  else if (set_zeromv_skip_based_on_source_sad >= 2)
1551
0
    return source_sad_nonrd <= kVeryLowSad;
1552
0
  else if (set_zeromv_skip_based_on_source_sad >= 1)
1553
0
    return source_sad_nonrd == kZeroSad;
1554
1555
0
  return false;
1556
0
}
1557
1558
static inline bool set_force_zeromv_skip_for_sb(
1559
    AV1_COMP *cpi, MACROBLOCK *x, const TileInfo *const tile, VP128x128 *vt,
1560
    unsigned int *uv_sad, int mi_row, int mi_col, unsigned int y_sad,
1561
0
    BLOCK_SIZE bsize) {
1562
0
  AV1_COMMON *const cm = &cpi->common;
1563
0
  if (!is_set_force_zeromv_skip_based_on_src_sad(
1564
0
          cpi->sf.rt_sf.set_zeromv_skip_based_on_source_sad,
1565
0
          x->content_state_sb.source_sad_nonrd))
1566
0
    return false;
1567
0
  int shift = cpi->sf.rt_sf.increase_source_sad_thresh ? 1 : 0;
1568
0
  const int block_width = mi_size_wide[cm->seq_params->sb_size];
1569
0
  const int block_height = mi_size_high[cm->seq_params->sb_size];
1570
0
  const unsigned int thresh_exit_part_y =
1571
0
      cpi->zeromv_skip_thresh_exit_part[bsize] << shift;
1572
0
  unsigned int thresh_exit_part_uv =
1573
0
      CALC_CHROMA_THRESH_FOR_ZEROMV_SKIP(thresh_exit_part_y) << shift;
1574
  // Be more aggressive in UV threshold if source_sad >= VeryLowSad
1575
  // to suppreess visual artifact caused by the speed feature:
1576
  // set_zeromv_skip_based_on_source_sad = 2. For now only for
1577
  // part_early_exit_zeromv = 1.
1578
0
  if (x->content_state_sb.source_sad_nonrd >= kVeryLowSad &&
1579
0
      cpi->sf.rt_sf.part_early_exit_zeromv == 1)
1580
0
    thresh_exit_part_uv = thresh_exit_part_uv >> 3;
1581
0
  if (mi_col + block_width <= tile->mi_col_end &&
1582
0
      mi_row + block_height <= tile->mi_row_end && y_sad < thresh_exit_part_y &&
1583
0
      uv_sad[0] < thresh_exit_part_uv && uv_sad[1] < thresh_exit_part_uv) {
1584
0
    set_block_size(cpi, mi_row, mi_col, bsize);
1585
0
    x->force_zeromv_skip_for_sb = 1;
1586
0
    aom_free(vt);
1587
    // Partition shape is set here at SB level.
1588
    // Exit needs to happen from av1_choose_var_based_partitioning().
1589
0
    return true;
1590
0
  } else if (x->content_state_sb.source_sad_nonrd == kZeroSad &&
1591
0
             cpi->sf.rt_sf.part_early_exit_zeromv >= 2)
1592
0
    x->force_zeromv_skip_for_sb = 2;
1593
0
  return false;
1594
0
}
1595
1596
int av1_choose_var_based_partitioning(AV1_COMP *cpi, const TileInfo *const tile,
1597
                                      ThreadData *td, MACROBLOCK *x, int mi_row,
1598
0
                                      int mi_col) {
1599
#if CONFIG_COLLECT_COMPONENT_TIMING
1600
  start_timing(cpi, choose_var_based_partitioning_time);
1601
#endif
1602
0
  AV1_COMMON *const cm = &cpi->common;
1603
0
  MACROBLOCKD *xd = &x->e_mbd;
1604
0
  const int64_t *const vbp_thresholds = cpi->vbp_info.thresholds;
1605
0
  PART_EVAL_STATUS force_split[85];
1606
0
  int avg_64x64;
1607
0
  int max_var_32x32[4];
1608
0
  int min_var_32x32[4];
1609
0
  int var_32x32;
1610
0
  int var_64x64;
1611
0
  int min_var_64x64 = INT_MAX;
1612
0
  int max_var_64x64 = 0;
1613
0
  int avg_16x16[4][4];
1614
0
  int maxvar_16x16[4][4];
1615
0
  int minvar_16x16[4][4];
1616
0
  const uint8_t *src_buf;
1617
0
  const uint8_t *dst_buf;
1618
0
  int dst_stride;
1619
0
  unsigned int uv_sad[MAX_MB_PLANE - 1];
1620
0
  NOISE_LEVEL noise_level = kLow;
1621
0
  bool is_zero_motion = true;
1622
0
  bool scaled_ref_last = false;
1623
0
  struct scale_factors sf_no_scale;
1624
0
  av1_setup_scale_factors_for_frame(&sf_no_scale, cm->width, cm->height,
1625
0
                                    cm->width, cm->height);
1626
1627
0
  bool is_key_frame =
1628
0
      (frame_is_intra_only(cm) ||
1629
0
       (cpi->ppi->use_svc &&
1630
0
        cpi->svc.layer_context[cpi->svc.temporal_layer_id].is_key_frame));
1631
1632
0
  assert(cm->seq_params->sb_size == BLOCK_64X64 ||
1633
0
         cm->seq_params->sb_size == BLOCK_128X128);
1634
0
  const bool is_small_sb = (cm->seq_params->sb_size == BLOCK_64X64);
1635
0
  const int num_64x64_blocks = is_small_sb ? 1 : 4;
1636
1637
0
  unsigned int y_sad = UINT_MAX;
1638
0
  unsigned int y_sad_g = UINT_MAX;
1639
0
  unsigned int y_sad_alt = UINT_MAX;
1640
0
  unsigned int y_sad_last = UINT_MAX;
1641
0
  BLOCK_SIZE bsize = is_small_sb ? BLOCK_64X64 : BLOCK_128X128;
1642
1643
  // Force skip encoding for all superblocks on slide change for
1644
  // non_reference_frames.
1645
0
  if (cpi->sf.rt_sf.skip_encoding_non_reference_slide_change &&
1646
0
      cpi->rc.high_source_sad && cpi->ppi->rtc_ref.non_reference_frame) {
1647
0
    MB_MODE_INFO **mi = cm->mi_params.mi_grid_base +
1648
0
                        get_mi_grid_idx(&cm->mi_params, mi_row, mi_col);
1649
0
    av1_set_fixed_partitioning(cpi, tile, mi, mi_row, mi_col, bsize);
1650
0
    x->force_zeromv_skip_for_sb = 1;
1651
0
    return 0;
1652
0
  }
1653
1654
  // Ref frame used in partitioning.
1655
0
  MV_REFERENCE_FRAME ref_frame_partition = LAST_FRAME;
1656
1657
0
  int64_t thresholds[5] = { vbp_thresholds[0], vbp_thresholds[1],
1658
0
                            vbp_thresholds[2], vbp_thresholds[3],
1659
0
                            vbp_thresholds[4] };
1660
1661
0
  const int segment_id = xd->mi[0]->segment_id;
1662
0
  uint64_t blk_sad = 0;
1663
0
  if (cpi->src_sad_blk_64x64 != NULL &&
1664
0
      cpi->svc.spatial_layer_id == cpi->svc.number_spatial_layers - 1) {
1665
0
    const int sb_size_by_mb = (cm->seq_params->sb_size == BLOCK_128X128)
1666
0
                                  ? (cm->seq_params->mib_size >> 1)
1667
0
                                  : cm->seq_params->mib_size;
1668
0
    const int sb_cols =
1669
0
        (cm->mi_params.mi_cols + sb_size_by_mb - 1) / sb_size_by_mb;
1670
0
    const int sbi_col = mi_col / sb_size_by_mb;
1671
0
    const int sbi_row = mi_row / sb_size_by_mb;
1672
0
    blk_sad = cpi->src_sad_blk_64x64[sbi_col + sbi_row * sb_cols];
1673
0
  }
1674
1675
0
  const bool is_segment_id_boosted =
1676
0
      cpi->oxcf.q_cfg.aq_mode == CYCLIC_REFRESH_AQ && cm->seg.enabled &&
1677
0
      cyclic_refresh_segment_id_boosted(segment_id);
1678
0
  const int sb_qindex =
1679
0
      clamp(cm->delta_q_info.delta_q_present_flag
1680
0
                ? cm->quant_params.base_qindex + x->delta_qindex
1681
0
                : cm->quant_params.base_qindex,
1682
0
            0, QINDEX_RANGE - 1);
1683
0
  const int qindex = is_segment_id_boosted || cpi->roi.delta_qp_enabled
1684
0
                         ? av1_get_qindex(&cm->seg, segment_id, sb_qindex)
1685
0
                         : sb_qindex;
1686
0
  set_vbp_thresholds(
1687
0
      cpi, thresholds, blk_sad, qindex, x->content_state_sb.low_sumdiff,
1688
0
      x->content_state_sb.source_sad_nonrd, x->content_state_sb.source_sad_rd,
1689
0
      is_segment_id_boosted, x->content_state_sb.lighting_change);
1690
1691
0
  src_buf = x->plane[AOM_PLANE_Y].src.buf;
1692
0
  int src_stride = x->plane[AOM_PLANE_Y].src.stride;
1693
1694
  // Index for force_split: 0 for 64x64, 1-4 for 32x32 blocks,
1695
  // 5-20 for the 16x16 blocks.
1696
0
  force_split[0] = PART_EVAL_ALL;
1697
0
  memset(x->part_search_info.variance_low, 0,
1698
0
         sizeof(x->part_search_info.variance_low));
1699
1700
  // Check if LAST frame is NULL, and if so, treat this frame
1701
  // as a key frame, for the purpose of the superblock partitioning.
1702
  // LAST == NULL can happen in cases where enhancement spatial layers are
1703
  // enabled dyanmically and the only reference is the spatial(GOLDEN).
1704
  // If LAST frame has a different resolution: set the scaled_ref_last flag
1705
  // and check if ref_scaled is NULL.
1706
0
  if (!frame_is_intra_only(cm)) {
1707
0
    const YV12_BUFFER_CONFIG *ref = get_ref_frame_yv12_buf(cm, LAST_FRAME);
1708
0
    if (ref == NULL) {
1709
0
      is_key_frame = true;
1710
0
    } else if (ref->y_crop_height != cm->height ||
1711
0
               ref->y_crop_width != cm->width) {
1712
0
      scaled_ref_last = true;
1713
0
      const YV12_BUFFER_CONFIG *ref_scaled =
1714
0
          av1_get_scaled_ref_frame(cpi, LAST_FRAME);
1715
0
      if (ref_scaled == NULL) is_key_frame = true;
1716
0
    }
1717
0
  }
1718
1719
0
  x->source_variance = UINT_MAX;
1720
  // For nord_pickmode: compute source_variance, only for superblocks with
1721
  // some motion for now. This input can then be used to bias the partitioning
1722
  // or the chroma_check.
1723
0
  if (cpi->sf.rt_sf.use_nonrd_pick_mode &&
1724
0
      x->content_state_sb.source_sad_nonrd > kLowSad)
1725
0
    x->source_variance = av1_get_perpixel_variance_facade(
1726
0
        cpi, xd, &x->plane[0].src, cm->seq_params->sb_size, AOM_PLANE_Y);
1727
1728
0
  if (!is_key_frame) {
1729
0
    setup_planes(cpi, x, &y_sad, &y_sad_g, &y_sad_alt, &y_sad_last,
1730
0
                 &ref_frame_partition, &sf_no_scale, mi_row, mi_col,
1731
0
                 is_small_sb, scaled_ref_last);
1732
1733
0
    MB_MODE_INFO *mi = xd->mi[0];
1734
    // Use reference SB directly for zero mv.
1735
0
    if (mi->mv[0].as_int != 0) {
1736
0
      dst_buf = xd->plane[AOM_PLANE_Y].dst.buf;
1737
0
      dst_stride = xd->plane[AOM_PLANE_Y].dst.stride;
1738
0
      is_zero_motion = false;
1739
0
    } else {
1740
0
      dst_buf = xd->plane[AOM_PLANE_Y].pre[0].buf;
1741
0
      dst_stride = xd->plane[AOM_PLANE_Y].pre[0].stride;
1742
0
    }
1743
0
  } else {
1744
0
    dst_buf = NULL;
1745
0
    dst_stride = 0;
1746
0
  }
1747
1748
  // check and set the color sensitivity of sb.
1749
0
  av1_zero(uv_sad);
1750
0
  chroma_check(cpi, x, bsize, y_sad_last, y_sad_g, y_sad_alt, is_key_frame,
1751
0
               is_zero_motion, uv_sad);
1752
1753
0
  x->force_zeromv_skip_for_sb = 0;
1754
1755
0
  VP128x128 *vt;
1756
0
  AOM_CHECK_MEM_ERROR(xd->error_info, vt, aom_malloc(sizeof(*vt)));
1757
0
  vt->split = td->vt64x64;
1758
1759
  // If the superblock is completely static (zero source sad) and
1760
  // the y_sad (relative to LAST ref) is very small, take the sb_size partition
1761
  // and exit, and force zeromv_last skip mode for nonrd_pickmode.
1762
  // Only do this on the base segment (so the QP-boosted segment, if applied,
1763
  // can still continue cleaning/ramping up the quality).
1764
  // Condition on color uv_sad is also added.
1765
0
  if (!is_key_frame && cpi->sf.rt_sf.part_early_exit_zeromv &&
1766
0
      cpi->rc.frames_since_key > 30 && segment_id == CR_SEGMENT_ID_BASE &&
1767
0
      ref_frame_partition == LAST_FRAME && xd->mi[0]->mv[0].as_int == 0) {
1768
    // Exit here, if zero mv skip flag is set at SB level.
1769
0
    if (set_force_zeromv_skip_for_sb(cpi, x, tile, vt, uv_sad, mi_row, mi_col,
1770
0
                                     y_sad, bsize))
1771
0
      return 0;
1772
0
  }
1773
1774
0
  if (cpi->noise_estimate.enabled)
1775
0
    noise_level = av1_noise_estimate_extract_level(&cpi->noise_estimate);
1776
1777
  // Fill in the entire tree of 8x8 (for inter frames) or 4x4 (for key frames)
1778
  // variances for splits.
1779
0
  fill_variance_tree_leaves(cpi, x, vt, force_split, avg_16x16, maxvar_16x16,
1780
0
                            minvar_16x16, thresholds, src_buf, src_stride,
1781
0
                            dst_buf, dst_stride, is_key_frame, is_small_sb);
1782
1783
0
  avg_64x64 = 0;
1784
0
  for (int blk64_idx = 0; blk64_idx < num_64x64_blocks; ++blk64_idx) {
1785
0
    max_var_32x32[blk64_idx] = 0;
1786
0
    min_var_32x32[blk64_idx] = INT_MAX;
1787
0
    const int blk64_scale_idx = blk64_idx << 2;
1788
0
    for (int lvl1_idx = 0; lvl1_idx < 4; lvl1_idx++) {
1789
0
      const int lvl1_scale_idx = (blk64_scale_idx + lvl1_idx) << 2;
1790
0
      for (int lvl2_idx = 0; lvl2_idx < 4; lvl2_idx++) {
1791
0
        if (!is_key_frame) continue;
1792
0
        VP16x16 *vtemp = &vt->split[blk64_idx].split[lvl1_idx].split[lvl2_idx];
1793
0
        for (int lvl3_idx = 0; lvl3_idx < 4; lvl3_idx++)
1794
0
          fill_variance_tree(&vtemp->split[lvl3_idx], BLOCK_8X8);
1795
0
        fill_variance_tree(vtemp, BLOCK_16X16);
1796
        // If variance of this 16x16 block is above the threshold, force block
1797
        // to split. This also forces a split on the upper levels.
1798
0
        get_variance(&vtemp->part_variances.none);
1799
0
        if (vtemp->part_variances.none.variance > thresholds[3]) {
1800
0
          const int split_index = 21 + lvl1_scale_idx + lvl2_idx;
1801
0
          force_split[split_index] =
1802
0
              cpi->sf.rt_sf.vbp_prune_16x16_split_using_min_max_sub_blk_var
1803
0
                  ? get_part_eval_based_on_sub_blk_var(vtemp, thresholds[3])
1804
0
                  : PART_EVAL_ONLY_SPLIT;
1805
0
          force_split[5 + blk64_scale_idx + lvl1_idx] = PART_EVAL_ONLY_SPLIT;
1806
0
          force_split[blk64_idx + 1] = PART_EVAL_ONLY_SPLIT;
1807
0
          force_split[0] = PART_EVAL_ONLY_SPLIT;
1808
0
        }
1809
0
      }
1810
0
      fill_variance_tree(&vt->split[blk64_idx].split[lvl1_idx], BLOCK_32X32);
1811
      // If variance of this 32x32 block is above the threshold, or if its above
1812
      // (some threshold of) the average variance over the sub-16x16 blocks,
1813
      // then force this block to split. This also forces a split on the upper
1814
      // (64x64) level.
1815
0
      uint64_t frame_sad_thresh = 20000;
1816
0
      const int is_360p_or_smaller = cm->width * cm->height <= RESOLUTION_360P;
1817
0
      if (cpi->svc.number_temporal_layers > 2 &&
1818
0
          cpi->svc.temporal_layer_id == 0)
1819
0
        frame_sad_thresh = frame_sad_thresh << 1;
1820
0
      if (force_split[5 + blk64_scale_idx + lvl1_idx] == PART_EVAL_ALL) {
1821
0
        get_variance(&vt->split[blk64_idx].split[lvl1_idx].part_variances.none);
1822
0
        var_32x32 =
1823
0
            vt->split[blk64_idx].split[lvl1_idx].part_variances.none.variance;
1824
0
        max_var_32x32[blk64_idx] = AOMMAX(var_32x32, max_var_32x32[blk64_idx]);
1825
0
        min_var_32x32[blk64_idx] = AOMMIN(var_32x32, min_var_32x32[blk64_idx]);
1826
0
        const int max_min_var_16X16_diff = (maxvar_16x16[blk64_idx][lvl1_idx] -
1827
0
                                            minvar_16x16[blk64_idx][lvl1_idx]);
1828
1829
0
        if (var_32x32 > thresholds[2] ||
1830
0
            (!is_key_frame && var_32x32 > (thresholds[2] >> 1) &&
1831
0
             var_32x32 > (avg_16x16[blk64_idx][lvl1_idx] >> 1))) {
1832
0
          force_split[5 + blk64_scale_idx + lvl1_idx] = PART_EVAL_ONLY_SPLIT;
1833
0
          force_split[blk64_idx + 1] = PART_EVAL_ONLY_SPLIT;
1834
0
          force_split[0] = PART_EVAL_ONLY_SPLIT;
1835
0
        } else if (!is_key_frame && is_360p_or_smaller &&
1836
0
                   ((max_min_var_16X16_diff > (thresholds[2] >> 1) &&
1837
0
                     maxvar_16x16[blk64_idx][lvl1_idx] > thresholds[2]) ||
1838
0
                    (cpi->sf.rt_sf.prefer_large_partition_blocks &&
1839
0
                     x->content_state_sb.source_sad_nonrd > kLowSad &&
1840
0
                     cpi->rc.frame_source_sad < frame_sad_thresh &&
1841
0
                     maxvar_16x16[blk64_idx][lvl1_idx] > (thresholds[2] >> 4) &&
1842
0
                     maxvar_16x16[blk64_idx][lvl1_idx] >
1843
0
                         (minvar_16x16[blk64_idx][lvl1_idx] << 2)))) {
1844
0
          force_split[5 + blk64_scale_idx + lvl1_idx] = PART_EVAL_ONLY_SPLIT;
1845
0
          force_split[blk64_idx + 1] = PART_EVAL_ONLY_SPLIT;
1846
0
          force_split[0] = PART_EVAL_ONLY_SPLIT;
1847
0
        }
1848
0
      }
1849
0
    }
1850
0
    if (force_split[1 + blk64_idx] == PART_EVAL_ALL) {
1851
0
      fill_variance_tree(&vt->split[blk64_idx], BLOCK_64X64);
1852
0
      get_variance(&vt->split[blk64_idx].part_variances.none);
1853
0
      var_64x64 = vt->split[blk64_idx].part_variances.none.variance;
1854
0
      max_var_64x64 = AOMMAX(var_64x64, max_var_64x64);
1855
0
      min_var_64x64 = AOMMIN(var_64x64, min_var_64x64);
1856
      // If the difference of the max-min variances of sub-blocks or max
1857
      // variance of a sub-block is above some threshold of then force this
1858
      // block to split. Only checking this for noise level >= medium, if
1859
      // encoder is in SVC or if we already forced large blocks.
1860
0
      const int max_min_var_32x32_diff =
1861
0
          max_var_32x32[blk64_idx] - min_var_32x32[blk64_idx];
1862
0
      const int check_max_var = max_var_32x32[blk64_idx] > thresholds[1] >> 1;
1863
0
      const bool check_noise_lvl = noise_level >= kMedium ||
1864
0
                                   cpi->ppi->use_svc ||
1865
0
                                   cpi->sf.rt_sf.prefer_large_partition_blocks;
1866
0
      const int64_t set_threshold = 3 * (thresholds[1] >> 3);
1867
1868
0
      if (!is_key_frame && max_min_var_32x32_diff > set_threshold &&
1869
0
          check_max_var && check_noise_lvl) {
1870
0
        force_split[1 + blk64_idx] = PART_EVAL_ONLY_SPLIT;
1871
0
        force_split[0] = PART_EVAL_ONLY_SPLIT;
1872
0
      }
1873
0
      avg_64x64 += var_64x64;
1874
0
    }
1875
0
    if (is_small_sb) force_split[0] = PART_EVAL_ONLY_SPLIT;
1876
0
  }
1877
1878
0
  if (force_split[0] == PART_EVAL_ALL) {
1879
0
    fill_variance_tree(vt, BLOCK_128X128);
1880
0
    get_variance(&vt->part_variances.none);
1881
0
    const int set_avg_64x64 = (9 * avg_64x64) >> 5;
1882
0
    if (!is_key_frame && vt->part_variances.none.variance > set_avg_64x64)
1883
0
      force_split[0] = PART_EVAL_ONLY_SPLIT;
1884
1885
0
    if (!is_key_frame &&
1886
0
        (max_var_64x64 - min_var_64x64) > 3 * (thresholds[0] >> 3) &&
1887
0
        max_var_64x64 > thresholds[0] >> 1)
1888
0
      force_split[0] = PART_EVAL_ONLY_SPLIT;
1889
0
  }
1890
1891
0
  if (mi_col + 32 > tile->mi_col_end || mi_row + 32 > tile->mi_row_end ||
1892
0
      !set_vt_partitioning(cpi, xd, tile, vt, BLOCK_128X128, mi_row, mi_col,
1893
0
                           thresholds[0], BLOCK_16X16, force_split[0])) {
1894
0
    for (int blk64_idx = 0; blk64_idx < num_64x64_blocks; ++blk64_idx) {
1895
0
      const int x64_idx = GET_BLK_IDX_X(blk64_idx, 4);
1896
0
      const int y64_idx = GET_BLK_IDX_Y(blk64_idx, 4);
1897
0
      const int blk64_scale_idx = blk64_idx << 2;
1898
1899
      // Now go through the entire structure, splitting every block size until
1900
      // we get to one that's got a variance lower than our threshold.
1901
0
      if (set_vt_partitioning(cpi, xd, tile, &vt->split[blk64_idx], BLOCK_64X64,
1902
0
                              mi_row + y64_idx, mi_col + x64_idx, thresholds[1],
1903
0
                              BLOCK_16X16, force_split[1 + blk64_idx]))
1904
0
        continue;
1905
0
      for (int lvl1_idx = 0; lvl1_idx < 4; ++lvl1_idx) {
1906
0
        const int x32_idx = GET_BLK_IDX_X(lvl1_idx, 3);
1907
0
        const int y32_idx = GET_BLK_IDX_Y(lvl1_idx, 3);
1908
0
        const int lvl1_scale_idx = (blk64_scale_idx + lvl1_idx) << 2;
1909
0
        if (set_vt_partitioning(
1910
0
                cpi, xd, tile, &vt->split[blk64_idx].split[lvl1_idx],
1911
0
                BLOCK_32X32, (mi_row + y64_idx + y32_idx),
1912
0
                (mi_col + x64_idx + x32_idx), thresholds[2], BLOCK_16X16,
1913
0
                force_split[5 + blk64_scale_idx + lvl1_idx]))
1914
0
          continue;
1915
0
        for (int lvl2_idx = 0; lvl2_idx < 4; ++lvl2_idx) {
1916
0
          const int x16_idx = GET_BLK_IDX_X(lvl2_idx, 2);
1917
0
          const int y16_idx = GET_BLK_IDX_Y(lvl2_idx, 2);
1918
0
          const int split_index = 21 + lvl1_scale_idx + lvl2_idx;
1919
0
          VP16x16 *vtemp =
1920
0
              &vt->split[blk64_idx].split[lvl1_idx].split[lvl2_idx];
1921
0
          if (set_vt_partitioning(cpi, xd, tile, vtemp, BLOCK_16X16,
1922
0
                                  mi_row + y64_idx + y32_idx + y16_idx,
1923
0
                                  mi_col + x64_idx + x32_idx + x16_idx,
1924
0
                                  thresholds[3], BLOCK_8X8,
1925
0
                                  force_split[split_index]))
1926
0
            continue;
1927
0
          for (int lvl3_idx = 0; lvl3_idx < 4; ++lvl3_idx) {
1928
0
            const int x8_idx = GET_BLK_IDX_X(lvl3_idx, 1);
1929
0
            const int y8_idx = GET_BLK_IDX_Y(lvl3_idx, 1);
1930
0
            set_block_size(cpi, (mi_row + y64_idx + y32_idx + y16_idx + y8_idx),
1931
0
                           (mi_col + x64_idx + x32_idx + x16_idx + x8_idx),
1932
0
                           BLOCK_8X8);
1933
0
          }
1934
0
        }
1935
0
      }
1936
0
    }
1937
0
  }
1938
1939
0
  if (cpi->sf.rt_sf.short_circuit_low_temp_var) {
1940
0
    set_low_temp_var_flag(cpi, &x->part_search_info, xd, vt, thresholds,
1941
0
                          ref_frame_partition, mi_col, mi_row, is_small_sb);
1942
0
  }
1943
1944
0
  aom_free(vt);
1945
#if CONFIG_COLLECT_COMPONENT_TIMING
1946
  end_timing(cpi, choose_var_based_partitioning_time);
1947
#endif
1948
0
  return 0;
1949
0
}