Coverage Report

Created: 2026-01-20 07:37

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/aom/av1/common/reconinter.c
Line
Count
Source
1
/*
2
 * Copyright (c) 2016, Alliance for Open Media. All rights reserved
3
 *
4
 * This source code is subject to the terms of the BSD 2 Clause License and
5
 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6
 * was not distributed with this source code in the LICENSE file, you can
7
 * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8
 * Media Patent License 1.0 was not distributed with this source code in the
9
 * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
10
 */
11
12
#include <assert.h>
13
#include <stdio.h>
14
#include <limits.h>
15
16
#include "config/aom_config.h"
17
#include "config/aom_dsp_rtcd.h"
18
#include "config/aom_scale_rtcd.h"
19
20
#include "aom/aom_integer.h"
21
#include "aom_dsp/blend.h"
22
23
#include "av1/common/av1_common_int.h"
24
#include "av1/common/blockd.h"
25
#include "av1/common/mvref_common.h"
26
#include "av1/common/obmc.h"
27
#include "av1/common/reconinter.h"
28
#include "av1/common/reconintra.h"
29
30
// This function will determine whether or not to create a warped
31
// prediction.
32
int av1_allow_warp(const MB_MODE_INFO *const mbmi,
33
                   const WarpTypesAllowed *const warp_types,
34
                   const WarpedMotionParams *const gm_params,
35
                   int build_for_obmc, const struct scale_factors *const sf,
36
138k
                   WarpedMotionParams *final_warp_params) {
37
  // Note: As per the spec, we must test the fixed point scales here, which are
38
  // at a higher precision (1 << 14) than the xs and ys in subpel_params (that
39
  // have 1 << 10 precision).
40
138k
  if (av1_is_scaled(sf)) return 0;
41
42
131k
  if (final_warp_params != NULL) *final_warp_params = default_warp_params;
43
44
131k
  if (build_for_obmc) return 0;
45
46
131k
  if (warp_types->local_warp_allowed && !mbmi->wm_params.invalid) {
47
1.23k
    if (final_warp_params != NULL)
48
1.23k
      memcpy(final_warp_params, &mbmi->wm_params, sizeof(*final_warp_params));
49
1.23k
    return 1;
50
130k
  } else if (warp_types->global_warp_allowed && !gm_params->invalid) {
51
12.2k
    if (final_warp_params != NULL)
52
12.2k
      memcpy(final_warp_params, gm_params, sizeof(*final_warp_params));
53
12.2k
    return 1;
54
12.2k
  }
55
56
117k
  return 0;
57
131k
}
58
59
void av1_init_inter_params(InterPredParams *inter_pred_params, int block_width,
60
                           int block_height, int pix_row, int pix_col,
61
                           int subsampling_x, int subsampling_y, int bit_depth,
62
                           int use_hbd_buf, int is_intrabc,
63
                           const struct scale_factors *sf,
64
                           const struct buf_2d *ref_buf,
65
422k
                           int_interpfilters interp_filters) {
66
422k
  inter_pred_params->block_width = block_width;
67
422k
  inter_pred_params->block_height = block_height;
68
422k
  inter_pred_params->pix_row = pix_row;
69
422k
  inter_pred_params->pix_col = pix_col;
70
422k
  inter_pred_params->subsampling_x = subsampling_x;
71
422k
  inter_pred_params->subsampling_y = subsampling_y;
72
422k
  inter_pred_params->bit_depth = bit_depth;
73
422k
  inter_pred_params->use_hbd_buf = use_hbd_buf;
74
422k
  inter_pred_params->is_intrabc = is_intrabc;
75
422k
  inter_pred_params->scale_factors = sf;
76
422k
  inter_pred_params->ref_frame_buf = *ref_buf;
77
422k
  inter_pred_params->mode = TRANSLATION_PRED;
78
422k
  inter_pred_params->comp_mode = UNIFORM_SINGLE;
79
80
422k
  if (is_intrabc) {
81
235k
    inter_pred_params->interp_filter_params[0] = &av1_intrabc_filter_params;
82
235k
    inter_pred_params->interp_filter_params[1] = &av1_intrabc_filter_params;
83
235k
  } else {
84
186k
    inter_pred_params->interp_filter_params[0] =
85
186k
        av1_get_interp_filter_params_with_block_size(
86
186k
            interp_filters.as_filters.x_filter, block_width);
87
186k
    inter_pred_params->interp_filter_params[1] =
88
186k
        av1_get_interp_filter_params_with_block_size(
89
186k
            interp_filters.as_filters.y_filter, block_height);
90
186k
  }
91
422k
}
92
93
79.4k
void av1_init_comp_mode(InterPredParams *inter_pred_params) {
94
79.4k
  inter_pred_params->comp_mode = UNIFORM_COMP;
95
79.4k
}
96
97
void av1_init_warp_params(InterPredParams *inter_pred_params,
98
                          const WarpTypesAllowed *warp_types, int ref,
99
397k
                          const MACROBLOCKD *xd, const MB_MODE_INFO *mi) {
100
397k
  if (inter_pred_params->block_height < 8 || inter_pred_params->block_width < 8)
101
197k
    return;
102
103
200k
  if (xd->cur_frame_force_integer_mv) return;
104
105
138k
  if (av1_allow_warp(mi, warp_types, &xd->global_motion[mi->ref_frame[ref]], 0,
106
138k
                     inter_pred_params->scale_factors,
107
138k
                     &inter_pred_params->warp_params)) {
108
#if CONFIG_REALTIME_ONLY
109
    aom_internal_error(xd->error_info, AOM_CODEC_UNSUP_FEATURE,
110
                       "Warped motion is disabled in realtime only build.");
111
#endif
112
13.5k
    inter_pred_params->mode = WARP_PRED;
113
13.5k
  }
114
138k
}
115
116
void av1_make_inter_predictor(const uint8_t *src, int src_stride, uint8_t *dst,
117
                              int dst_stride,
118
                              InterPredParams *inter_pred_params,
119
422k
                              const SubpelParams *subpel_params) {
120
422k
  assert(IMPLIES(inter_pred_params->conv_params.is_compound,
121
422k
                 inter_pred_params->conv_params.dst != NULL));
122
123
422k
  if (inter_pred_params->mode == TRANSLATION_PRED) {
124
408k
#if CONFIG_AV1_HIGHBITDEPTH
125
408k
    if (inter_pred_params->use_hbd_buf) {
126
264k
      highbd_inter_predictor(src, src_stride, dst, dst_stride, subpel_params,
127
264k
                             inter_pred_params->block_width,
128
264k
                             inter_pred_params->block_height,
129
264k
                             &inter_pred_params->conv_params,
130
264k
                             inter_pred_params->interp_filter_params,
131
264k
                             inter_pred_params->bit_depth);
132
264k
    } else {
133
144k
      inter_predictor(src, src_stride, dst, dst_stride, subpel_params,
134
144k
                      inter_pred_params->block_width,
135
144k
                      inter_pred_params->block_height,
136
144k
                      &inter_pred_params->conv_params,
137
144k
                      inter_pred_params->interp_filter_params);
138
144k
    }
139
#else
140
    inter_predictor(src, src_stride, dst, dst_stride, subpel_params,
141
                    inter_pred_params->block_width,
142
                    inter_pred_params->block_height,
143
                    &inter_pred_params->conv_params,
144
                    inter_pred_params->interp_filter_params);
145
#endif
146
408k
  }
147
13.5k
#if !CONFIG_REALTIME_ONLY
148
  // TODO(jingning): av1_warp_plane() can be further cleaned up.
149
13.5k
  else if (inter_pred_params->mode == WARP_PRED) {
150
13.5k
    av1_warp_plane(
151
13.5k
        &inter_pred_params->warp_params, inter_pred_params->use_hbd_buf,
152
13.5k
        inter_pred_params->bit_depth, inter_pred_params->ref_frame_buf.buf0,
153
13.5k
        inter_pred_params->ref_frame_buf.width,
154
13.5k
        inter_pred_params->ref_frame_buf.height,
155
13.5k
        inter_pred_params->ref_frame_buf.stride, dst,
156
13.5k
        inter_pred_params->pix_col, inter_pred_params->pix_row,
157
13.5k
        inter_pred_params->block_width, inter_pred_params->block_height,
158
13.5k
        dst_stride, inter_pred_params->subsampling_x,
159
13.5k
        inter_pred_params->subsampling_y, &inter_pred_params->conv_params);
160
13.5k
  }
161
0
#endif
162
0
  else {
163
0
    assert(0 && "Unsupported inter_pred_params->mode");
164
0
  }
165
422k
}
166
167
static const uint8_t wedge_master_oblique_odd[MASK_MASTER_SIZE] = {
168
  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
169
  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  1,  2,  6,  18,
170
  37, 53, 60, 63, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
171
  64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
172
};
173
static const uint8_t wedge_master_oblique_even[MASK_MASTER_SIZE] = {
174
  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
175
  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  1,  4,  11, 27,
176
  46, 58, 62, 63, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
177
  64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
178
};
179
static const uint8_t wedge_master_vertical[MASK_MASTER_SIZE] = {
180
  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
181
  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  2,  7,  21,
182
  43, 57, 62, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
183
  64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
184
};
185
186
static AOM_INLINE void shift_copy(const uint8_t *src, uint8_t *dst, int shift,
187
320
                                  int width) {
188
320
  if (shift >= 0) {
189
165
    memcpy(dst + shift, src, width - shift);
190
165
    memset(dst, src[0], shift);
191
165
  } else {
192
155
    shift = -shift;
193
155
    memcpy(dst, src + shift, width - shift);
194
155
    memset(dst + width - shift, src[width - 1], shift);
195
155
  }
196
320
}
197
198
/* clang-format off */
199
DECLARE_ALIGNED(16, static uint8_t,
200
                wedge_signflip_lookup[BLOCK_SIZES_ALL][MAX_WEDGE_TYPES]) = {
201
  { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, },  // not used
202
  { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, },  // not used
203
  { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, },  // not used
204
  { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, },
205
  { 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, },
206
  { 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, },
207
  { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, },
208
  { 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, },
209
  { 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, },
210
  { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, },
211
  { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, },  // not used
212
  { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, },  // not used
213
  { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, },  // not used
214
  { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, },  // not used
215
  { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, },  // not used
216
  { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, },  // not used
217
  { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, },  // not used
218
  { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, },  // not used
219
  { 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 0, 1, 1, 1, 0, 1, },
220
  { 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 0, 1, 0, 1, },
221
  { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, },  // not used
222
  { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, },  // not used
223
};
224
/* clang-format on */
225
226
// [negative][direction]
227
DECLARE_ALIGNED(
228
    16, static uint8_t,
229
    wedge_mask_obl[2][WEDGE_DIRECTIONS][MASK_MASTER_SIZE * MASK_MASTER_SIZE]);
230
231
// 4 * MAX_WEDGE_SQUARE is an easy to compute and fairly tight upper bound
232
// on the sum of all mask sizes up to an including MAX_WEDGE_SQUARE.
233
DECLARE_ALIGNED(16, static uint8_t,
234
                wedge_mask_buf[2 * MAX_WEDGE_TYPES * 4 * MAX_WEDGE_SQUARE]);
235
236
DECLARE_ALIGNED(16, static uint8_t,
237
                smooth_interintra_mask_buf[INTERINTRA_MODES][BLOCK_SIZES_ALL]
238
                                          [MAX_WEDGE_SQUARE]);
239
240
static wedge_masks_type wedge_masks[BLOCK_SIZES_ALL][2];
241
242
static const wedge_code_type wedge_codebook_16_hgtw[16] = {
243
  { WEDGE_OBLIQUE27, 4, 4 },  { WEDGE_OBLIQUE63, 4, 4 },
244
  { WEDGE_OBLIQUE117, 4, 4 }, { WEDGE_OBLIQUE153, 4, 4 },
245
  { WEDGE_HORIZONTAL, 4, 2 }, { WEDGE_HORIZONTAL, 4, 4 },
246
  { WEDGE_HORIZONTAL, 4, 6 }, { WEDGE_VERTICAL, 4, 4 },
247
  { WEDGE_OBLIQUE27, 4, 2 },  { WEDGE_OBLIQUE27, 4, 6 },
248
  { WEDGE_OBLIQUE153, 4, 2 }, { WEDGE_OBLIQUE153, 4, 6 },
249
  { WEDGE_OBLIQUE63, 2, 4 },  { WEDGE_OBLIQUE63, 6, 4 },
250
  { WEDGE_OBLIQUE117, 2, 4 }, { WEDGE_OBLIQUE117, 6, 4 },
251
};
252
253
static const wedge_code_type wedge_codebook_16_hltw[16] = {
254
  { WEDGE_OBLIQUE27, 4, 4 },  { WEDGE_OBLIQUE63, 4, 4 },
255
  { WEDGE_OBLIQUE117, 4, 4 }, { WEDGE_OBLIQUE153, 4, 4 },
256
  { WEDGE_VERTICAL, 2, 4 },   { WEDGE_VERTICAL, 4, 4 },
257
  { WEDGE_VERTICAL, 6, 4 },   { WEDGE_HORIZONTAL, 4, 4 },
258
  { WEDGE_OBLIQUE27, 4, 2 },  { WEDGE_OBLIQUE27, 4, 6 },
259
  { WEDGE_OBLIQUE153, 4, 2 }, { WEDGE_OBLIQUE153, 4, 6 },
260
  { WEDGE_OBLIQUE63, 2, 4 },  { WEDGE_OBLIQUE63, 6, 4 },
261
  { WEDGE_OBLIQUE117, 2, 4 }, { WEDGE_OBLIQUE117, 6, 4 },
262
};
263
264
static const wedge_code_type wedge_codebook_16_heqw[16] = {
265
  { WEDGE_OBLIQUE27, 4, 4 },  { WEDGE_OBLIQUE63, 4, 4 },
266
  { WEDGE_OBLIQUE117, 4, 4 }, { WEDGE_OBLIQUE153, 4, 4 },
267
  { WEDGE_HORIZONTAL, 4, 2 }, { WEDGE_HORIZONTAL, 4, 6 },
268
  { WEDGE_VERTICAL, 2, 4 },   { WEDGE_VERTICAL, 6, 4 },
269
  { WEDGE_OBLIQUE27, 4, 2 },  { WEDGE_OBLIQUE27, 4, 6 },
270
  { WEDGE_OBLIQUE153, 4, 2 }, { WEDGE_OBLIQUE153, 4, 6 },
271
  { WEDGE_OBLIQUE63, 2, 4 },  { WEDGE_OBLIQUE63, 6, 4 },
272
  { WEDGE_OBLIQUE117, 2, 4 }, { WEDGE_OBLIQUE117, 6, 4 },
273
};
274
275
const wedge_params_type av1_wedge_params_lookup[BLOCK_SIZES_ALL] = {
276
  { 0, NULL, NULL, NULL },
277
  { 0, NULL, NULL, NULL },
278
  { 0, NULL, NULL, NULL },
279
  { MAX_WEDGE_TYPES, wedge_codebook_16_heqw, wedge_signflip_lookup[BLOCK_8X8],
280
    wedge_masks[BLOCK_8X8] },
281
  { MAX_WEDGE_TYPES, wedge_codebook_16_hgtw, wedge_signflip_lookup[BLOCK_8X16],
282
    wedge_masks[BLOCK_8X16] },
283
  { MAX_WEDGE_TYPES, wedge_codebook_16_hltw, wedge_signflip_lookup[BLOCK_16X8],
284
    wedge_masks[BLOCK_16X8] },
285
  { MAX_WEDGE_TYPES, wedge_codebook_16_heqw, wedge_signflip_lookup[BLOCK_16X16],
286
    wedge_masks[BLOCK_16X16] },
287
  { MAX_WEDGE_TYPES, wedge_codebook_16_hgtw, wedge_signflip_lookup[BLOCK_16X32],
288
    wedge_masks[BLOCK_16X32] },
289
  { MAX_WEDGE_TYPES, wedge_codebook_16_hltw, wedge_signflip_lookup[BLOCK_32X16],
290
    wedge_masks[BLOCK_32X16] },
291
  { MAX_WEDGE_TYPES, wedge_codebook_16_heqw, wedge_signflip_lookup[BLOCK_32X32],
292
    wedge_masks[BLOCK_32X32] },
293
  { 0, NULL, NULL, NULL },
294
  { 0, NULL, NULL, NULL },
295
  { 0, NULL, NULL, NULL },
296
  { 0, NULL, NULL, NULL },
297
  { 0, NULL, NULL, NULL },
298
  { 0, NULL, NULL, NULL },
299
  { 0, NULL, NULL, NULL },
300
  { 0, NULL, NULL, NULL },
301
  { MAX_WEDGE_TYPES, wedge_codebook_16_hgtw, wedge_signflip_lookup[BLOCK_8X32],
302
    wedge_masks[BLOCK_8X32] },
303
  { MAX_WEDGE_TYPES, wedge_codebook_16_hltw, wedge_signflip_lookup[BLOCK_32X8],
304
    wedge_masks[BLOCK_32X8] },
305
  { 0, NULL, NULL, NULL },
306
  { 0, NULL, NULL, NULL },
307
};
308
309
static const uint8_t *get_wedge_mask_inplace(int wedge_index, int neg,
310
1.44k
                                             BLOCK_SIZE sb_type) {
311
1.44k
  const uint8_t *master;
312
1.44k
  const int bh = block_size_high[sb_type];
313
1.44k
  const int bw = block_size_wide[sb_type];
314
1.44k
  const wedge_code_type *a =
315
1.44k
      av1_wedge_params_lookup[sb_type].codebook + wedge_index;
316
1.44k
  int woff, hoff;
317
1.44k
  const uint8_t wsignflip =
318
1.44k
      av1_wedge_params_lookup[sb_type].signflip[wedge_index];
319
320
1.44k
  assert(wedge_index >= 0 && wedge_index < get_wedge_types_lookup(sb_type));
321
1.44k
  woff = (a->x_offset * bw) >> 3;
322
1.44k
  hoff = (a->y_offset * bh) >> 3;
323
1.44k
  master = wedge_mask_obl[neg ^ wsignflip][a->direction] +
324
1.44k
           MASK_MASTER_STRIDE * (MASK_MASTER_SIZE / 2 - hoff) +
325
1.44k
           MASK_MASTER_SIZE / 2 - woff;
326
1.44k
  return master;
327
1.44k
}
328
329
const uint8_t *av1_get_compound_type_mask(
330
8.49k
    const INTERINTER_COMPOUND_DATA *const comp_data, BLOCK_SIZE sb_type) {
331
8.49k
  (void)sb_type;
332
8.49k
  switch (comp_data->type) {
333
6.15k
    case COMPOUND_WEDGE:
334
6.15k
      return av1_get_contiguous_soft_mask(comp_data->wedge_index,
335
6.15k
                                          comp_data->wedge_sign, sb_type);
336
2.33k
    default: return comp_data->seg_mask;
337
8.49k
  }
338
8.49k
}
339
340
static AOM_INLINE void diffwtd_mask_d16(
341
    uint8_t *mask, int which_inverse, int mask_base, const CONV_BUF_TYPE *src0,
342
    int src0_stride, const CONV_BUF_TYPE *src1, int src1_stride, int h, int w,
343
795
    ConvolveParams *conv_params, int bd) {
344
795
  int round =
345
795
      2 * FILTER_BITS - conv_params->round_0 - conv_params->round_1 + (bd - 8);
346
795
  int i, j, m, diff;
347
8.49k
  for (i = 0; i < h; ++i) {
348
126k
    for (j = 0; j < w; ++j) {
349
118k
      diff = abs(src0[i * src0_stride + j] - src1[i * src1_stride + j]);
350
118k
      diff = ROUND_POWER_OF_TWO(diff, round);
351
118k
      m = clamp(mask_base + (diff / DIFF_FACTOR), 0, AOM_BLEND_A64_MAX_ALPHA);
352
118k
      mask[i * w + j] = which_inverse ? AOM_BLEND_A64_MAX_ALPHA - m : m;
353
118k
    }
354
7.70k
  }
355
795
}
356
357
void av1_build_compound_diffwtd_mask_d16_c(
358
    uint8_t *mask, DIFFWTD_MASK_TYPE mask_type, const CONV_BUF_TYPE *src0,
359
    int src0_stride, const CONV_BUF_TYPE *src1, int src1_stride, int h, int w,
360
795
    ConvolveParams *conv_params, int bd) {
361
795
  switch (mask_type) {
362
483
    case DIFFWTD_38:
363
483
      diffwtd_mask_d16(mask, 0, 38, src0, src0_stride, src1, src1_stride, h, w,
364
483
                       conv_params, bd);
365
483
      break;
366
312
    case DIFFWTD_38_INV:
367
312
      diffwtd_mask_d16(mask, 1, 38, src0, src0_stride, src1, src1_stride, h, w,
368
312
                       conv_params, bd);
369
312
      break;
370
0
    default: assert(0);
371
795
  }
372
795
}
373
374
static AOM_INLINE void diffwtd_mask(uint8_t *mask, int which_inverse,
375
                                    int mask_base, const uint8_t *src0,
376
                                    int src0_stride, const uint8_t *src1,
377
0
                                    int src1_stride, int h, int w) {
378
0
  int i, j, m, diff;
379
0
  for (i = 0; i < h; ++i) {
380
0
    for (j = 0; j < w; ++j) {
381
0
      diff =
382
0
          abs((int)src0[i * src0_stride + j] - (int)src1[i * src1_stride + j]);
383
0
      m = clamp(mask_base + (diff / DIFF_FACTOR), 0, AOM_BLEND_A64_MAX_ALPHA);
384
0
      mask[i * w + j] = which_inverse ? AOM_BLEND_A64_MAX_ALPHA - m : m;
385
0
    }
386
0
  }
387
0
}
388
389
void av1_build_compound_diffwtd_mask_c(uint8_t *mask,
390
                                       DIFFWTD_MASK_TYPE mask_type,
391
                                       const uint8_t *src0, int src0_stride,
392
                                       const uint8_t *src1, int src1_stride,
393
0
                                       int h, int w) {
394
0
  switch (mask_type) {
395
0
    case DIFFWTD_38:
396
0
      diffwtd_mask(mask, 0, 38, src0, src0_stride, src1, src1_stride, h, w);
397
0
      break;
398
0
    case DIFFWTD_38_INV:
399
0
      diffwtd_mask(mask, 1, 38, src0, src0_stride, src1, src1_stride, h, w);
400
0
      break;
401
0
    default: assert(0);
402
0
  }
403
0
}
404
405
static AOM_FORCE_INLINE void diffwtd_mask_highbd(
406
    uint8_t *mask, int which_inverse, int mask_base, const uint16_t *src0,
407
    int src0_stride, const uint16_t *src1, int src1_stride, int h, int w,
408
0
    const unsigned int bd) {
409
0
  assert(bd >= 8);
410
0
  if (bd == 8) {
411
0
    if (which_inverse) {
412
0
      for (int i = 0; i < h; ++i) {
413
0
        for (int j = 0; j < w; ++j) {
414
0
          int diff = abs((int)src0[j] - (int)src1[j]) / DIFF_FACTOR;
415
0
          unsigned int m = negative_to_zero(mask_base + diff);
416
0
          m = AOMMIN(m, AOM_BLEND_A64_MAX_ALPHA);
417
0
          mask[j] = AOM_BLEND_A64_MAX_ALPHA - m;
418
0
        }
419
0
        src0 += src0_stride;
420
0
        src1 += src1_stride;
421
0
        mask += w;
422
0
      }
423
0
    } else {
424
0
      for (int i = 0; i < h; ++i) {
425
0
        for (int j = 0; j < w; ++j) {
426
0
          int diff = abs((int)src0[j] - (int)src1[j]) / DIFF_FACTOR;
427
0
          unsigned int m = negative_to_zero(mask_base + diff);
428
0
          m = AOMMIN(m, AOM_BLEND_A64_MAX_ALPHA);
429
0
          mask[j] = m;
430
0
        }
431
0
        src0 += src0_stride;
432
0
        src1 += src1_stride;
433
0
        mask += w;
434
0
      }
435
0
    }
436
0
  } else {
437
0
    const unsigned int bd_shift = bd - 8;
438
0
    if (which_inverse) {
439
0
      for (int i = 0; i < h; ++i) {
440
0
        for (int j = 0; j < w; ++j) {
441
0
          int diff =
442
0
              (abs((int)src0[j] - (int)src1[j]) >> bd_shift) / DIFF_FACTOR;
443
0
          unsigned int m = negative_to_zero(mask_base + diff);
444
0
          m = AOMMIN(m, AOM_BLEND_A64_MAX_ALPHA);
445
0
          mask[j] = AOM_BLEND_A64_MAX_ALPHA - m;
446
0
        }
447
0
        src0 += src0_stride;
448
0
        src1 += src1_stride;
449
0
        mask += w;
450
0
      }
451
0
    } else {
452
0
      for (int i = 0; i < h; ++i) {
453
0
        for (int j = 0; j < w; ++j) {
454
0
          int diff =
455
0
              (abs((int)src0[j] - (int)src1[j]) >> bd_shift) / DIFF_FACTOR;
456
0
          unsigned int m = negative_to_zero(mask_base + diff);
457
0
          m = AOMMIN(m, AOM_BLEND_A64_MAX_ALPHA);
458
0
          mask[j] = m;
459
0
        }
460
0
        src0 += src0_stride;
461
0
        src1 += src1_stride;
462
0
        mask += w;
463
0
      }
464
0
    }
465
0
  }
466
0
}
467
468
void av1_build_compound_diffwtd_mask_highbd_c(
469
    uint8_t *mask, DIFFWTD_MASK_TYPE mask_type, const uint8_t *src0,
470
    int src0_stride, const uint8_t *src1, int src1_stride, int h, int w,
471
0
    int bd) {
472
0
  switch (mask_type) {
473
0
    case DIFFWTD_38:
474
0
      diffwtd_mask_highbd(mask, 0, 38, CONVERT_TO_SHORTPTR(src0), src0_stride,
475
0
                          CONVERT_TO_SHORTPTR(src1), src1_stride, h, w, bd);
476
0
      break;
477
0
    case DIFFWTD_38_INV:
478
0
      diffwtd_mask_highbd(mask, 1, 38, CONVERT_TO_SHORTPTR(src0), src0_stride,
479
0
                          CONVERT_TO_SHORTPTR(src1), src1_stride, h, w, bd);
480
0
      break;
481
0
    default: assert(0);
482
0
  }
483
0
}
484
485
5
static AOM_INLINE void init_wedge_master_masks() {
486
5
  int i, j;
487
5
  const int w = MASK_MASTER_SIZE;
488
5
  const int h = MASK_MASTER_SIZE;
489
5
  const int stride = MASK_MASTER_STRIDE;
490
  // Note: index [0] stores the masters, and [1] its complement.
491
  // Generate prototype by shifting the masters
492
5
  int shift = h / 4;
493
165
  for (i = 0; i < h; i += 2) {
494
160
    shift_copy(wedge_master_oblique_even,
495
160
               &wedge_mask_obl[0][WEDGE_OBLIQUE63][i * stride], shift,
496
160
               MASK_MASTER_SIZE);
497
160
    shift--;
498
160
    shift_copy(wedge_master_oblique_odd,
499
160
               &wedge_mask_obl[0][WEDGE_OBLIQUE63][(i + 1) * stride], shift,
500
160
               MASK_MASTER_SIZE);
501
160
    memcpy(&wedge_mask_obl[0][WEDGE_VERTICAL][i * stride],
502
160
           wedge_master_vertical,
503
160
           MASK_MASTER_SIZE * sizeof(wedge_master_vertical[0]));
504
160
    memcpy(&wedge_mask_obl[0][WEDGE_VERTICAL][(i + 1) * stride],
505
160
           wedge_master_vertical,
506
160
           MASK_MASTER_SIZE * sizeof(wedge_master_vertical[0]));
507
160
  }
508
509
325
  for (i = 0; i < h; ++i) {
510
20.8k
    for (j = 0; j < w; ++j) {
511
20.4k
      const int msk = wedge_mask_obl[0][WEDGE_OBLIQUE63][i * stride + j];
512
20.4k
      wedge_mask_obl[0][WEDGE_OBLIQUE27][j * stride + i] = msk;
513
20.4k
      wedge_mask_obl[0][WEDGE_OBLIQUE117][i * stride + w - 1 - j] =
514
20.4k
          wedge_mask_obl[0][WEDGE_OBLIQUE153][(w - 1 - j) * stride + i] =
515
20.4k
              (1 << WEDGE_WEIGHT_BITS) - msk;
516
20.4k
      wedge_mask_obl[1][WEDGE_OBLIQUE63][i * stride + j] =
517
20.4k
          wedge_mask_obl[1][WEDGE_OBLIQUE27][j * stride + i] =
518
20.4k
              (1 << WEDGE_WEIGHT_BITS) - msk;
519
20.4k
      wedge_mask_obl[1][WEDGE_OBLIQUE117][i * stride + w - 1 - j] =
520
20.4k
          wedge_mask_obl[1][WEDGE_OBLIQUE153][(w - 1 - j) * stride + i] = msk;
521
20.4k
      const int mskx = wedge_mask_obl[0][WEDGE_VERTICAL][i * stride + j];
522
20.4k
      wedge_mask_obl[0][WEDGE_HORIZONTAL][j * stride + i] = mskx;
523
20.4k
      wedge_mask_obl[1][WEDGE_VERTICAL][i * stride + j] =
524
20.4k
          wedge_mask_obl[1][WEDGE_HORIZONTAL][j * stride + i] =
525
20.4k
              (1 << WEDGE_WEIGHT_BITS) - mskx;
526
20.4k
    }
527
320
  }
528
5
}
529
530
5
static AOM_INLINE void init_wedge_masks() {
531
5
  uint8_t *dst = wedge_mask_buf;
532
5
  BLOCK_SIZE bsize;
533
5
  memset(wedge_masks, 0, sizeof(wedge_masks));
534
115
  for (bsize = BLOCK_4X4; bsize < BLOCK_SIZES_ALL; ++bsize) {
535
110
    const wedge_params_type *wedge_params = &av1_wedge_params_lookup[bsize];
536
110
    const int wtypes = wedge_params->wedge_types;
537
110
    if (wtypes == 0) continue;
538
45
    const uint8_t *mask;
539
45
    const int bw = block_size_wide[bsize];
540
45
    const int bh = block_size_high[bsize];
541
45
    int w;
542
765
    for (w = 0; w < wtypes; ++w) {
543
720
      mask = get_wedge_mask_inplace(w, 0, bsize);
544
720
      aom_convolve_copy(mask, MASK_MASTER_STRIDE, dst, bw /* dst_stride */, bw,
545
720
                        bh);
546
720
      wedge_params->masks[0][w] = dst;
547
720
      dst += bw * bh;
548
549
720
      mask = get_wedge_mask_inplace(w, 1, bsize);
550
720
      aom_convolve_copy(mask, MASK_MASTER_STRIDE, dst, bw /* dst_stride */, bw,
551
720
                        bh);
552
720
      wedge_params->masks[1][w] = dst;
553
720
      dst += bw * bh;
554
720
    }
555
45
    assert(sizeof(wedge_mask_buf) >= (size_t)(dst - wedge_mask_buf));
556
45
  }
557
5
}
558
559
/* clang-format off */
560
static const uint8_t ii_weights1d[MAX_SB_SIZE] = {
561
  60, 58, 56, 54, 52, 50, 48, 47, 45, 44, 42, 41, 39, 38, 37, 35, 34, 33, 32,
562
  31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 22, 21, 20, 19, 19, 18, 18, 17, 16,
563
  16, 15, 15, 14, 14, 13, 13, 12, 12, 12, 11, 11, 10, 10, 10,  9,  9,  9,  8,
564
  8,  8,  8,  7,  7,  7,  7,  6,  6,  6,  6,  6,  5,  5,  5,  5,  5,  4,  4,
565
  4,  4,  4,  4,  4,  4,  3,  3,  3,  3,  3,  3,  3,  3,  3,  2,  2,  2,  2,
566
  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  1,  1,  1,  1,  1,  1,  1,  1,
567
  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1
568
};
569
static uint8_t ii_size_scales[BLOCK_SIZES_ALL] = {
570
    32, 16, 16, 16, 8, 8, 8, 4,
571
    4,  4,  2,  2,  2, 1, 1, 1,
572
    8,  8,  4,  4,  2, 2
573
};
574
/* clang-format on */
575
576
static AOM_INLINE void build_smooth_interintra_mask(uint8_t *mask, int stride,
577
                                                    BLOCK_SIZE plane_bsize,
578
2.54k
                                                    INTERINTRA_MODE mode) {
579
2.54k
  int i, j;
580
2.54k
  const int bw = block_size_wide[plane_bsize];
581
2.54k
  const int bh = block_size_high[plane_bsize];
582
2.54k
  const int size_scale = ii_size_scales[plane_bsize];
583
584
2.54k
  switch (mode) {
585
1.20k
    case II_V_PRED:
586
11.3k
      for (i = 0; i < bh; ++i) {
587
10.1k
        memset(mask, ii_weights1d[i * size_scale], bw * sizeof(mask[0]));
588
10.1k
        mask += stride;
589
10.1k
      }
590
1.20k
      break;
591
592
398
    case II_H_PRED:
593
7.53k
      for (i = 0; i < bh; ++i) {
594
108k
        for (j = 0; j < bw; ++j) mask[j] = ii_weights1d[j * size_scale];
595
7.13k
        mask += stride;
596
7.13k
      }
597
398
      break;
598
599
831
    case II_SMOOTH_PRED:
600
8.25k
      for (i = 0; i < bh; ++i) {
601
64.8k
        for (j = 0; j < bw; ++j)
602
57.4k
          mask[j] = ii_weights1d[(i < j ? i : j) * size_scale];
603
7.42k
        mask += stride;
604
7.42k
      }
605
831
      break;
606
607
111
    case II_DC_PRED:
608
111
    default:
609
1.62k
      for (i = 0; i < bh; ++i) {
610
1.51k
        memset(mask, 32, bw * sizeof(mask[0]));
611
1.51k
        mask += stride;
612
1.51k
      }
613
111
      break;
614
2.54k
  }
615
2.54k
}
616
617
5
static AOM_INLINE void init_smooth_interintra_masks() {
618
25
  for (int m = 0; m < INTERINTRA_MODES; ++m) {
619
460
    for (int bs = 0; bs < BLOCK_SIZES_ALL; ++bs) {
620
440
      const int bw = block_size_wide[bs];
621
440
      const int bh = block_size_high[bs];
622
440
      if (bw > MAX_WEDGE_SIZE || bh > MAX_WEDGE_SIZE) continue;
623
280
      build_smooth_interintra_mask(smooth_interintra_mask_buf[m][bs], bw, bs,
624
280
                                   m);
625
280
    }
626
20
  }
627
5
}
628
629
// Equation of line: f(x, y) = a[0]*(x - a[2]*w/8) + a[1]*(y - a[3]*h/8) = 0
630
5
void av1_init_wedge_masks() {
631
5
  init_wedge_master_masks();
632
5
  init_wedge_masks();
633
5
  init_smooth_interintra_masks();
634
5
}
635
636
static AOM_INLINE void build_masked_compound_no_round(
637
    uint8_t *dst, int dst_stride, const CONV_BUF_TYPE *src0, int src0_stride,
638
    const CONV_BUF_TYPE *src1, int src1_stride,
639
    const INTERINTER_COMPOUND_DATA *const comp_data, BLOCK_SIZE sb_type, int h,
640
8.49k
    int w, InterPredParams *inter_pred_params) {
641
8.49k
  const int ssy = inter_pred_params->subsampling_y;
642
8.49k
  const int ssx = inter_pred_params->subsampling_x;
643
8.49k
  const uint8_t *mask = av1_get_compound_type_mask(comp_data, sb_type);
644
8.49k
  const int mask_stride = block_size_wide[sb_type];
645
8.49k
#if CONFIG_AV1_HIGHBITDEPTH
646
8.49k
  if (inter_pred_params->use_hbd_buf) {
647
4.37k
    aom_highbd_blend_a64_d16_mask(dst, dst_stride, src0, src0_stride, src1,
648
4.37k
                                  src1_stride, mask, mask_stride, w, h, ssx,
649
4.37k
                                  ssy, &inter_pred_params->conv_params,
650
4.37k
                                  inter_pred_params->bit_depth);
651
4.37k
  } else {
652
4.12k
    aom_lowbd_blend_a64_d16_mask(dst, dst_stride, src0, src0_stride, src1,
653
4.12k
                                 src1_stride, mask, mask_stride, w, h, ssx, ssy,
654
4.12k
                                 &inter_pred_params->conv_params);
655
4.12k
  }
656
#else
657
  aom_lowbd_blend_a64_d16_mask(dst, dst_stride, src0, src0_stride, src1,
658
                               src1_stride, mask, mask_stride, w, h, ssx, ssy,
659
                               &inter_pred_params->conv_params);
660
#endif
661
8.49k
}
662
663
static void make_masked_inter_predictor(const uint8_t *pre, int pre_stride,
664
                                        uint8_t *dst, int dst_stride,
665
                                        InterPredParams *inter_pred_params,
666
8.49k
                                        const SubpelParams *subpel_params) {
667
8.49k
  const INTERINTER_COMPOUND_DATA *comp_data = &inter_pred_params->mask_comp;
668
8.49k
  BLOCK_SIZE sb_type = inter_pred_params->sb_type;
669
670
  // We're going to call av1_make_inter_predictor to generate a prediction into
671
  // a temporary buffer, then will blend that temporary buffer with that from
672
  // the other reference.
673
8.49k
  DECLARE_ALIGNED(32, uint8_t, tmp_buf[2 * MAX_SB_SQUARE]);
674
8.49k
  uint8_t *tmp_dst =
675
8.49k
      inter_pred_params->use_hbd_buf ? CONVERT_TO_BYTEPTR(tmp_buf) : tmp_buf;
676
677
8.49k
  const int tmp_buf_stride = MAX_SB_SIZE;
678
8.49k
  CONV_BUF_TYPE *org_dst = inter_pred_params->conv_params.dst;
679
8.49k
  int org_dst_stride = inter_pred_params->conv_params.dst_stride;
680
8.49k
  CONV_BUF_TYPE *tmp_buf16 = (CONV_BUF_TYPE *)tmp_buf;
681
8.49k
  inter_pred_params->conv_params.dst = tmp_buf16;
682
8.49k
  inter_pred_params->conv_params.dst_stride = tmp_buf_stride;
683
8.49k
  assert(inter_pred_params->conv_params.do_average == 0);
684
685
  // This will generate a prediction in tmp_buf for the second reference
686
8.49k
  av1_make_inter_predictor(pre, pre_stride, tmp_dst, MAX_SB_SIZE,
687
8.49k
                           inter_pred_params, subpel_params);
688
689
8.49k
  if (!inter_pred_params->conv_params.plane &&
690
2.85k
      comp_data->type == COMPOUND_DIFFWTD) {
691
795
    av1_build_compound_diffwtd_mask_d16(
692
795
        comp_data->seg_mask, comp_data->mask_type, org_dst, org_dst_stride,
693
795
        tmp_buf16, tmp_buf_stride, inter_pred_params->block_height,
694
795
        inter_pred_params->block_width, &inter_pred_params->conv_params,
695
795
        inter_pred_params->bit_depth);
696
795
  }
697
8.49k
  build_masked_compound_no_round(
698
8.49k
      dst, dst_stride, org_dst, org_dst_stride, tmp_buf16, tmp_buf_stride,
699
8.49k
      comp_data, sb_type, inter_pred_params->block_height,
700
8.49k
      inter_pred_params->block_width, inter_pred_params);
701
8.49k
}
702
703
void av1_build_one_inter_predictor(
704
    uint8_t *dst, int dst_stride, const MV *const src_mv,
705
    InterPredParams *inter_pred_params, MACROBLOCKD *xd, int mi_x, int mi_y,
706
422k
    int ref, uint8_t **mc_buf, CalcSubpelParamsFunc calc_subpel_params_func) {
707
422k
  SubpelParams subpel_params;
708
422k
  uint8_t *src;
709
422k
  int src_stride;
710
422k
  calc_subpel_params_func(src_mv, inter_pred_params, xd, mi_x, mi_y, ref,
711
422k
                          mc_buf, &src, &subpel_params, &src_stride);
712
713
422k
  if (inter_pred_params->comp_mode == UNIFORM_SINGLE ||
714
413k
      inter_pred_params->comp_mode == UNIFORM_COMP) {
715
413k
    av1_make_inter_predictor(src, src_stride, dst, dst_stride,
716
413k
                             inter_pred_params, &subpel_params);
717
413k
  } else {
718
8.49k
    make_masked_inter_predictor(src, src_stride, dst, dst_stride,
719
8.49k
                                inter_pred_params, &subpel_params);
720
8.49k
  }
721
422k
}
722
723
void av1_dist_wtd_comp_weight_assign(const AV1_COMMON *cm,
724
                                     const MB_MODE_INFO *mbmi, int *fwd_offset,
725
                                     int *bck_offset,
726
                                     int *use_dist_wtd_comp_avg,
727
414k
                                     int is_compound) {
728
414k
  assert(fwd_offset != NULL && bck_offset != NULL);
729
414k
  if (!is_compound || mbmi->compound_idx) {
730
399k
    *fwd_offset = 8;
731
399k
    *bck_offset = 8;
732
399k
    *use_dist_wtd_comp_avg = 0;
733
399k
    return;
734
399k
  }
735
736
14.9k
  *use_dist_wtd_comp_avg = 1;
737
14.9k
  const RefCntBuffer *const bck_buf = get_ref_frame_buf(cm, mbmi->ref_frame[0]);
738
14.9k
  const RefCntBuffer *const fwd_buf = get_ref_frame_buf(cm, mbmi->ref_frame[1]);
739
14.9k
  const int cur_frame_index = cm->cur_frame->order_hint;
740
14.9k
  int bck_frame_index = 0, fwd_frame_index = 0;
741
742
14.9k
  if (bck_buf != NULL) bck_frame_index = bck_buf->order_hint;
743
14.9k
  if (fwd_buf != NULL) fwd_frame_index = fwd_buf->order_hint;
744
745
14.9k
  int d0 = clamp(abs(get_relative_dist(&cm->seq_params->order_hint_info,
746
14.9k
                                       fwd_frame_index, cur_frame_index)),
747
14.9k
                 0, MAX_FRAME_DISTANCE);
748
14.9k
  int d1 = clamp(abs(get_relative_dist(&cm->seq_params->order_hint_info,
749
14.9k
                                       cur_frame_index, bck_frame_index)),
750
14.9k
                 0, MAX_FRAME_DISTANCE);
751
752
14.9k
  const int order = d0 <= d1;
753
754
14.9k
  if (d0 == 0 || d1 == 0) {
755
984
    *fwd_offset = quant_dist_lookup_table[3][order];
756
984
    *bck_offset = quant_dist_lookup_table[3][1 - order];
757
984
    return;
758
984
  }
759
760
13.9k
  int i;
761
21.0k
  for (i = 0; i < 3; ++i) {
762
19.1k
    int c0 = quant_dist_weight[i][order];
763
19.1k
    int c1 = quant_dist_weight[i][!order];
764
19.1k
    int d0_c0 = d0 * c0;
765
19.1k
    int d1_c1 = d1 * c1;
766
19.1k
    if ((d0 > d1 && d0_c0 < d1_c1) || (d0 <= d1 && d0_c0 > d1_c1)) break;
767
19.1k
  }
768
769
13.9k
  *fwd_offset = quant_dist_lookup_table[i][order];
770
13.9k
  *bck_offset = quant_dist_lookup_table[i][1 - order];
771
13.9k
}
772
773
// True if the following hold:
774
//  1. Not intrabc and not build_for_obmc
775
//  2. At least one dimension is size 4 with subsampling
776
//  3. If sub-sampled, none of the previous blocks around the sub-sample
777
//     are intrabc or inter-blocks
778
static bool is_sub8x8_inter(const MACROBLOCKD *xd, int plane, BLOCK_SIZE bsize,
779
378k
                            int is_intrabc, int build_for_obmc) {
780
378k
  if (is_intrabc || build_for_obmc) {
781
252k
    return false;
782
252k
  }
783
784
125k
  const struct macroblockd_plane *const pd = &xd->plane[plane];
785
125k
  const int ss_x = pd->subsampling_x;
786
125k
  const int ss_y = pd->subsampling_y;
787
125k
  const int is_sub4_x = (block_size_wide[bsize] == 4) && ss_x;
788
125k
  const int is_sub4_y = (block_size_high[bsize] == 4) && ss_y;
789
125k
  if (!is_sub4_x && !is_sub4_y) {
790
122k
    return false;
791
122k
  }
792
793
  // For sub8x8 chroma blocks, we may be covering more than one luma block's
794
  // worth of pixels. Thus (mi_x, mi_y) may not be the correct coordinates for
795
  // the top-left corner of the prediction source - the correct top-left corner
796
  // is at (pre_x, pre_y).
797
3.89k
  const int row_start = is_sub4_y ? -1 : 0;
798
3.89k
  const int col_start = is_sub4_x ? -1 : 0;
799
800
7.96k
  for (int row = row_start; row <= 0; ++row) {
801
12.1k
    for (int col = col_start; col <= 0; ++col) {
802
8.09k
      const MB_MODE_INFO *this_mbmi = xd->mi[row * xd->mi_stride + col];
803
8.09k
      if (!is_inter_block(this_mbmi)) return false;
804
8.00k
      if (is_intrabc_block(this_mbmi)) return false;
805
8.00k
    }
806
4.16k
  }
807
3.79k
  return true;
808
3.89k
}
809
810
static void build_inter_predictors_sub8x8(
811
    const AV1_COMMON *cm, MACROBLOCKD *xd, int plane, const MB_MODE_INFO *mi,
812
    int mi_x, int mi_y, uint8_t **mc_buf,
813
3.79k
    CalcSubpelParamsFunc calc_subpel_params_func) {
814
3.79k
  const BLOCK_SIZE bsize = mi->bsize;
815
3.79k
  struct macroblockd_plane *const pd = &xd->plane[plane];
816
3.79k
  const bool ss_x = pd->subsampling_x;
817
3.79k
  const bool ss_y = pd->subsampling_y;
818
3.79k
  const int b4_w = block_size_wide[bsize] >> ss_x;
819
3.79k
  const int b4_h = block_size_high[bsize] >> ss_y;
820
3.79k
  const BLOCK_SIZE plane_bsize = get_plane_block_size(bsize, ss_x, ss_y);
821
3.79k
  const int b8_w = block_size_wide[plane_bsize];
822
3.79k
  const int b8_h = block_size_high[plane_bsize];
823
3.79k
  const int is_compound = has_second_ref(mi);
824
3.79k
  assert(!is_compound);
825
3.79k
  assert(!is_intrabc_block(mi));
826
827
  // For sub8x8 chroma blocks, we may be covering more than one luma block's
828
  // worth of pixels. Thus (mi_x, mi_y) may not be the correct coordinates for
829
  // the top-left corner of the prediction source - the correct top-left corner
830
  // is at (pre_x, pre_y).
831
3.79k
  const int row_start = (block_size_high[bsize] == 4) && ss_y ? -1 : 0;
832
3.79k
  const int col_start = (block_size_wide[bsize] == 4) && ss_x ? -1 : 0;
833
3.79k
  const int pre_x = (mi_x + MI_SIZE * col_start) >> ss_x;
834
3.79k
  const int pre_y = (mi_y + MI_SIZE * row_start) >> ss_y;
835
836
3.79k
  int row = row_start;
837
7.85k
  for (int y = 0; y < b8_h; y += b4_h) {
838
4.05k
    int col = col_start;
839
12.0k
    for (int x = 0; x < b8_w; x += b4_w) {
840
7.98k
      MB_MODE_INFO *this_mbmi = xd->mi[row * xd->mi_stride + col];
841
7.98k
      struct buf_2d *const dst_buf = &pd->dst;
842
7.98k
      uint8_t *dst = dst_buf->buf + dst_buf->stride * y + x;
843
7.98k
      int ref = 0;
844
7.98k
      const RefCntBuffer *ref_buf =
845
7.98k
          get_ref_frame_buf(cm, this_mbmi->ref_frame[ref]);
846
7.98k
      const struct scale_factors *ref_scale_factors =
847
7.98k
          get_ref_scale_factors_const(cm, this_mbmi->ref_frame[ref]);
848
7.98k
      const struct scale_factors *const sf = ref_scale_factors;
849
7.98k
      const struct buf_2d pre_buf = {
850
7.98k
        NULL,
851
7.98k
        (plane == 1) ? ref_buf->buf.u_buffer : ref_buf->buf.v_buffer,
852
7.98k
        ref_buf->buf.uv_crop_width,
853
7.98k
        ref_buf->buf.uv_crop_height,
854
7.98k
        ref_buf->buf.uv_stride,
855
7.98k
      };
856
857
7.98k
      const MV mv = this_mbmi->mv[ref].as_mv;
858
859
7.98k
      InterPredParams inter_pred_params;
860
7.98k
      av1_init_inter_params(&inter_pred_params, b4_w, b4_h, pre_y + y,
861
7.98k
                            pre_x + x, pd->subsampling_x, pd->subsampling_y,
862
7.98k
                            xd->bd, is_cur_buf_hbd(xd), mi->use_intrabc, sf,
863
7.98k
                            &pre_buf, this_mbmi->interp_filters);
864
7.98k
      inter_pred_params.conv_params =
865
7.98k
          get_conv_params_no_round(ref, plane, NULL, 0, is_compound, xd->bd);
866
867
7.98k
      av1_build_one_inter_predictor(dst, dst_buf->stride, &mv,
868
7.98k
                                    &inter_pred_params, xd, mi_x + x, mi_y + y,
869
7.98k
                                    ref, mc_buf, calc_subpel_params_func);
870
871
7.98k
      ++col;
872
7.98k
    }
873
4.05k
    ++row;
874
4.05k
  }
875
3.79k
}
876
877
static void build_inter_predictors_8x8_and_bigger(
878
    const AV1_COMMON *cm, MACROBLOCKD *xd, int plane, const MB_MODE_INFO *mi,
879
    int build_for_obmc, int bw, int bh, int mi_x, int mi_y, uint8_t **mc_buf,
880
374k
    CalcSubpelParamsFunc calc_subpel_params_func) {
881
374k
  const int is_compound = has_second_ref(mi);
882
374k
  const int is_intrabc = is_intrabc_block(mi);
883
374k
  assert(IMPLIES(is_intrabc, !is_compound));
884
374k
  struct macroblockd_plane *const pd = &xd->plane[plane];
885
374k
  struct buf_2d *const dst_buf = &pd->dst;
886
374k
  uint8_t *const dst = dst_buf->buf;
887
888
374k
  int is_global[2] = { 0, 0 };
889
788k
  for (int ref = 0; ref < 1 + is_compound; ++ref) {
890
414k
    const WarpedMotionParams *const wm = &xd->global_motion[mi->ref_frame[ref]];
891
414k
    is_global[ref] = is_global_mv_block(mi, wm->wmtype);
892
414k
  }
893
894
374k
  const BLOCK_SIZE bsize = mi->bsize;
895
374k
  const int ss_x = pd->subsampling_x;
896
374k
  const int ss_y = pd->subsampling_y;
897
374k
  const int row_start =
898
374k
      (block_size_high[bsize] == 4) && ss_y && !build_for_obmc ? -1 : 0;
899
374k
  const int col_start =
900
374k
      (block_size_wide[bsize] == 4) && ss_x && !build_for_obmc ? -1 : 0;
901
374k
  const int pre_x = (mi_x + MI_SIZE * col_start) >> ss_x;
902
374k
  const int pre_y = (mi_y + MI_SIZE * row_start) >> ss_y;
903
904
788k
  for (int ref = 0; ref < 1 + is_compound; ++ref) {
905
414k
    const struct scale_factors *const sf =
906
414k
        is_intrabc ? &cm->sf_identity : xd->block_ref_scale_factors[ref];
907
414k
    struct buf_2d *const pre_buf = is_intrabc ? dst_buf : &pd->pre[ref];
908
414k
    const MV mv = mi->mv[ref].as_mv;
909
414k
    const WarpTypesAllowed warp_types = { is_global[ref],
910
414k
                                          mi->motion_mode == WARPED_CAUSAL };
911
912
414k
    InterPredParams inter_pred_params;
913
414k
    av1_init_inter_params(&inter_pred_params, bw, bh, pre_y, pre_x,
914
414k
                          pd->subsampling_x, pd->subsampling_y, xd->bd,
915
414k
                          is_cur_buf_hbd(xd), mi->use_intrabc, sf, pre_buf,
916
414k
                          mi->interp_filters);
917
414k
    if (is_compound) av1_init_comp_mode(&inter_pred_params);
918
414k
    inter_pred_params.conv_params = get_conv_params_no_round(
919
414k
        ref, plane, xd->tmp_conv_dst, MAX_SB_SIZE, is_compound, xd->bd);
920
921
414k
    av1_dist_wtd_comp_weight_assign(
922
414k
        cm, mi, &inter_pred_params.conv_params.fwd_offset,
923
414k
        &inter_pred_params.conv_params.bck_offset,
924
414k
        &inter_pred_params.conv_params.use_dist_wtd_comp_avg, is_compound);
925
926
414k
    if (!build_for_obmc)
927
397k
      av1_init_warp_params(&inter_pred_params, &warp_types, ref, xd, mi);
928
929
414k
    if (is_masked_compound_type(mi->interinter_comp.type)) {
930
16.9k
      inter_pred_params.sb_type = mi->bsize;
931
16.9k
      inter_pred_params.mask_comp = mi->interinter_comp;
932
16.9k
      if (ref == 1) {
933
8.49k
        inter_pred_params.conv_params.do_average = 0;
934
8.49k
        inter_pred_params.comp_mode = MASK_COMP;
935
8.49k
      }
936
      // Assign physical buffer.
937
16.9k
      inter_pred_params.mask_comp.seg_mask = xd->seg_mask;
938
16.9k
    }
939
940
414k
    av1_build_one_inter_predictor(dst, dst_buf->stride, &mv, &inter_pred_params,
941
414k
                                  xd, mi_x, mi_y, ref, mc_buf,
942
414k
                                  calc_subpel_params_func);
943
414k
  }
944
374k
}
945
946
void av1_build_inter_predictors(const AV1_COMMON *cm, MACROBLOCKD *xd,
947
                                int plane, const MB_MODE_INFO *mi,
948
                                int build_for_obmc, int bw, int bh, int mi_x,
949
                                int mi_y, uint8_t **mc_buf,
950
378k
                                CalcSubpelParamsFunc calc_subpel_params_func) {
951
378k
  if (is_sub8x8_inter(xd, plane, mi->bsize, is_intrabc_block(mi),
952
378k
                      build_for_obmc)) {
953
3.79k
    assert(bw < 8 || bh < 8);
954
3.79k
    build_inter_predictors_sub8x8(cm, xd, plane, mi, mi_x, mi_y, mc_buf,
955
3.79k
                                  calc_subpel_params_func);
956
374k
  } else {
957
374k
    build_inter_predictors_8x8_and_bigger(cm, xd, plane, mi, build_for_obmc, bw,
958
374k
                                          bh, mi_x, mi_y, mc_buf,
959
374k
                                          calc_subpel_params_func);
960
374k
  }
961
378k
}
962
void av1_setup_dst_planes(struct macroblockd_plane *planes, BLOCK_SIZE bsize,
963
                          const YV12_BUFFER_CONFIG *src, int mi_row, int mi_col,
964
9.98M
                          const int plane_start, const int plane_end) {
965
  // We use AOMMIN(num_planes, MAX_MB_PLANE) instead of num_planes to quiet
966
  // the static analysis warnings.
967
39.1M
  for (int i = plane_start; i < AOMMIN(plane_end, MAX_MB_PLANE); ++i) {
968
29.1M
    struct macroblockd_plane *const pd = &planes[i];
969
29.1M
    const int is_uv = i > 0;
970
29.1M
    setup_pred_plane(&pd->dst, bsize, src->buffers[i], src->crop_widths[is_uv],
971
29.1M
                     src->crop_heights[is_uv], src->strides[is_uv], mi_row,
972
29.1M
                     mi_col, NULL, pd->subsampling_x, pd->subsampling_y);
973
29.1M
  }
974
9.98M
}
975
976
void av1_setup_pre_planes(MACROBLOCKD *xd, int idx,
977
                          const YV12_BUFFER_CONFIG *src, int mi_row, int mi_col,
978
                          const struct scale_factors *sf,
979
69.0k
                          const int num_planes) {
980
69.0k
  if (src != NULL) {
981
    // We use AOMMIN(num_planes, MAX_MB_PLANE) instead of num_planes to quiet
982
    // the static analysis warnings.
983
258k
    for (int i = 0; i < AOMMIN(num_planes, MAX_MB_PLANE); ++i) {
984
189k
      struct macroblockd_plane *const pd = &xd->plane[i];
985
189k
      const int is_uv = i > 0;
986
189k
      setup_pred_plane(&pd->pre[idx], xd->mi[0]->bsize, src->buffers[i],
987
189k
                       src->crop_widths[is_uv], src->crop_heights[is_uv],
988
189k
                       src->strides[is_uv], mi_row, mi_col, sf,
989
189k
                       pd->subsampling_x, pd->subsampling_y);
990
189k
    }
991
69.0k
  }
992
69.0k
}
993
994
// obmc_mask_N[overlap_position]
995
static const uint8_t obmc_mask_1[1] = { 64 };
996
DECLARE_ALIGNED(2, static const uint8_t, obmc_mask_2[2]) = { 45, 64 };
997
998
DECLARE_ALIGNED(4, static const uint8_t, obmc_mask_4[4]) = { 39, 50, 59, 64 };
999
1000
static const uint8_t obmc_mask_8[8] = { 36, 42, 48, 53, 57, 61, 64, 64 };
1001
1002
static const uint8_t obmc_mask_16[16] = { 34, 37, 40, 43, 46, 49, 52, 54,
1003
                                          56, 58, 60, 61, 64, 64, 64, 64 };
1004
1005
static const uint8_t obmc_mask_32[32] = { 33, 35, 36, 38, 40, 41, 43, 44,
1006
                                          45, 47, 48, 50, 51, 52, 53, 55,
1007
                                          56, 57, 58, 59, 60, 60, 61, 62,
1008
                                          64, 64, 64, 64, 64, 64, 64, 64 };
1009
1010
static const uint8_t obmc_mask_64[64] = {
1011
  33, 34, 35, 35, 36, 37, 38, 39, 40, 40, 41, 42, 43, 44, 44, 44,
1012
  45, 46, 47, 47, 48, 49, 50, 51, 51, 51, 52, 52, 53, 54, 55, 56,
1013
  56, 56, 57, 57, 58, 58, 59, 60, 60, 60, 60, 60, 61, 62, 62, 62,
1014
  62, 62, 63, 63, 63, 63, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
1015
};
1016
1017
16.6k
const uint8_t *av1_get_obmc_mask(int length) {
1018
16.6k
  switch (length) {
1019
0
    case 1: return obmc_mask_1;
1020
2.15k
    case 2: return obmc_mask_2;
1021
12.5k
    case 4: return obmc_mask_4;
1022
1.92k
    case 8: return obmc_mask_8;
1023
9
    case 16: return obmc_mask_16;
1024
0
    case 32: return obmc_mask_32;
1025
0
    case 64: return obmc_mask_64;
1026
0
    default: assert(0); return NULL;
1027
16.6k
  }
1028
16.6k
}
1029
1030
static INLINE void increment_int_ptr(MACROBLOCKD *xd, int rel_mi_row,
1031
                                     int rel_mi_col, uint8_t op_mi_size,
1032
                                     int dir, MB_MODE_INFO *mi, void *fun_ctxt,
1033
22.1k
                                     const int num_planes) {
1034
22.1k
  (void)xd;
1035
22.1k
  (void)rel_mi_row;
1036
22.1k
  (void)rel_mi_col;
1037
22.1k
  (void)op_mi_size;
1038
22.1k
  (void)dir;
1039
22.1k
  (void)mi;
1040
22.1k
  ++*(int *)fun_ctxt;
1041
22.1k
  (void)num_planes;
1042
22.1k
}
1043
1044
47.9k
void av1_count_overlappable_neighbors(const AV1_COMMON *cm, MACROBLOCKD *xd) {
1045
47.9k
  MB_MODE_INFO *mbmi = xd->mi[0];
1046
1047
47.9k
  mbmi->overlappable_neighbors = 0;
1048
1049
47.9k
  if (!is_motion_variation_allowed_bsize(mbmi->bsize)) return;
1050
1051
31.0k
  foreach_overlappable_nb_above(cm, xd, INT_MAX, increment_int_ptr,
1052
31.0k
                                &mbmi->overlappable_neighbors);
1053
31.0k
  if (mbmi->overlappable_neighbors) return;
1054
16.6k
  foreach_overlappable_nb_left(cm, xd, INT_MAX, increment_int_ptr,
1055
16.6k
                               &mbmi->overlappable_neighbors);
1056
16.6k
}
1057
1058
// HW does not support < 4x4 prediction. To limit the bandwidth requirement, if
1059
// block-size of current plane is smaller than 8x8, always only blend with the
1060
// left neighbor(s) (skip blending with the above side).
1061
#define DISABLE_CHROMA_U8X8_OBMC 0  // 0: one-sided obmc; 1: disable
1062
1063
int av1_skip_u4x4_pred_in_obmc(BLOCK_SIZE bsize,
1064
37.9k
                               const struct macroblockd_plane *pd, int dir) {
1065
37.9k
  assert(is_motion_variation_allowed_bsize(bsize));
1066
1067
37.9k
  const BLOCK_SIZE bsize_plane =
1068
37.9k
      get_plane_block_size(bsize, pd->subsampling_x, pd->subsampling_y);
1069
37.9k
  switch (bsize_plane) {
1070
#if DISABLE_CHROMA_U8X8_OBMC
1071
    case BLOCK_4X4:
1072
    case BLOCK_8X4:
1073
    case BLOCK_4X8: return 1; break;
1074
#else
1075
0
    case BLOCK_4X4:
1076
0
    case BLOCK_8X4:
1077
9.06k
    case BLOCK_4X8: return dir == 0; break;
1078
0
#endif
1079
28.9k
    default: return 0;
1080
37.9k
  }
1081
37.9k
}
1082
1083
6.36k
void av1_modify_neighbor_predictor_for_obmc(MB_MODE_INFO *mbmi) {
1084
6.36k
  mbmi->ref_frame[1] = NONE_FRAME;
1085
6.36k
  mbmi->interinter_comp.type = COMPOUND_AVERAGE;
1086
1087
6.36k
  return;
1088
6.36k
}
1089
1090
struct obmc_inter_pred_ctxt {
1091
  uint8_t **adjacent;
1092
  int *adjacent_stride;
1093
};
1094
1095
static INLINE void build_obmc_inter_pred_above(
1096
    MACROBLOCKD *xd, int rel_mi_row, int rel_mi_col, uint8_t op_mi_size,
1097
3.57k
    int dir, MB_MODE_INFO *above_mi, void *fun_ctxt, const int num_planes) {
1098
3.57k
  (void)above_mi;
1099
3.57k
  (void)rel_mi_row;
1100
3.57k
  (void)dir;
1101
3.57k
  struct obmc_inter_pred_ctxt *ctxt = (struct obmc_inter_pred_ctxt *)fun_ctxt;
1102
3.57k
  const BLOCK_SIZE bsize = xd->mi[0]->bsize;
1103
3.57k
  const int overlap =
1104
3.57k
      AOMMIN(block_size_high[bsize], block_size_high[BLOCK_64X64]) >> 1;
1105
1106
14.1k
  for (int plane = 0; plane < num_planes; ++plane) {
1107
10.5k
    const struct macroblockd_plane *pd = &xd->plane[plane];
1108
10.5k
    const int bw = (op_mi_size * MI_SIZE) >> pd->subsampling_x;
1109
10.5k
    const int bh = overlap >> pd->subsampling_y;
1110
10.5k
    const int plane_col = (rel_mi_col * MI_SIZE) >> pd->subsampling_x;
1111
1112
10.5k
    if (av1_skip_u4x4_pred_in_obmc(bsize, pd, 0)) continue;
1113
1114
8.21k
    const int dst_stride = pd->dst.stride;
1115
8.21k
    uint8_t *const dst = &pd->dst.buf[plane_col];
1116
8.21k
    const int tmp_stride = ctxt->adjacent_stride[plane];
1117
8.21k
    const uint8_t *const tmp = &ctxt->adjacent[plane][plane_col];
1118
8.21k
    const uint8_t *const mask = av1_get_obmc_mask(bh);
1119
8.21k
#if CONFIG_AV1_HIGHBITDEPTH
1120
8.21k
    const int is_hbd = is_cur_buf_hbd(xd);
1121
8.21k
    if (is_hbd)
1122
6.51k
      aom_highbd_blend_a64_vmask(dst, dst_stride, dst, dst_stride, tmp,
1123
6.51k
                                 tmp_stride, mask, bw, bh, xd->bd);
1124
1.70k
    else
1125
1.70k
      aom_blend_a64_vmask(dst, dst_stride, dst, dst_stride, tmp, tmp_stride,
1126
1.70k
                          mask, bw, bh);
1127
#else
1128
    aom_blend_a64_vmask(dst, dst_stride, dst, dst_stride, tmp, tmp_stride, mask,
1129
                        bw, bh);
1130
#endif
1131
8.21k
  }
1132
3.57k
}
1133
1134
static INLINE void build_obmc_inter_pred_left(
1135
    MACROBLOCKD *xd, int rel_mi_row, int rel_mi_col, uint8_t op_mi_size,
1136
2.79k
    int dir, MB_MODE_INFO *left_mi, void *fun_ctxt, const int num_planes) {
1137
2.79k
  (void)left_mi;
1138
2.79k
  (void)rel_mi_col;
1139
2.79k
  (void)dir;
1140
2.79k
  struct obmc_inter_pred_ctxt *ctxt = (struct obmc_inter_pred_ctxt *)fun_ctxt;
1141
2.79k
  const BLOCK_SIZE bsize = xd->mi[0]->bsize;
1142
2.79k
  const int overlap =
1143
2.79k
      AOMMIN(block_size_wide[bsize], block_size_wide[BLOCK_64X64]) >> 1;
1144
1145
11.1k
  for (int plane = 0; plane < num_planes; ++plane) {
1146
8.38k
    const struct macroblockd_plane *pd = &xd->plane[plane];
1147
8.38k
    const int bw = overlap >> pd->subsampling_x;
1148
8.38k
    const int bh = (op_mi_size * MI_SIZE) >> pd->subsampling_y;
1149
8.38k
    const int plane_row = (rel_mi_row * MI_SIZE) >> pd->subsampling_y;
1150
1151
8.38k
    if (av1_skip_u4x4_pred_in_obmc(bsize, pd, 1)) continue;
1152
1153
8.38k
    const int dst_stride = pd->dst.stride;
1154
8.38k
    uint8_t *const dst = &pd->dst.buf[plane_row * dst_stride];
1155
8.38k
    const int tmp_stride = ctxt->adjacent_stride[plane];
1156
8.38k
    const uint8_t *const tmp = &ctxt->adjacent[plane][plane_row * tmp_stride];
1157
8.38k
    const uint8_t *const mask = av1_get_obmc_mask(bw);
1158
1159
8.38k
#if CONFIG_AV1_HIGHBITDEPTH
1160
8.38k
    const int is_hbd = is_cur_buf_hbd(xd);
1161
8.38k
    if (is_hbd)
1162
5.73k
      aom_highbd_blend_a64_hmask(dst, dst_stride, dst, dst_stride, tmp,
1163
5.73k
                                 tmp_stride, mask, bw, bh, xd->bd);
1164
2.65k
    else
1165
2.65k
      aom_blend_a64_hmask(dst, dst_stride, dst, dst_stride, tmp, tmp_stride,
1166
2.65k
                          mask, bw, bh);
1167
#else
1168
    aom_blend_a64_hmask(dst, dst_stride, dst, dst_stride, tmp, tmp_stride, mask,
1169
                        bw, bh);
1170
#endif
1171
8.38k
  }
1172
2.79k
}
1173
1174
// This function combines motion compensated predictions that are generated by
1175
// top/left neighboring blocks' inter predictors with the regular inter
1176
// prediction. We assume the original prediction (bmc) is stored in
1177
// xd->plane[].dst.buf
1178
void av1_build_obmc_inter_prediction(const AV1_COMMON *cm, MACROBLOCKD *xd,
1179
                                     uint8_t *above[MAX_MB_PLANE],
1180
                                     int above_stride[MAX_MB_PLANE],
1181
                                     uint8_t *left[MAX_MB_PLANE],
1182
4.69k
                                     int left_stride[MAX_MB_PLANE]) {
1183
4.69k
  const BLOCK_SIZE bsize = xd->mi[0]->bsize;
1184
1185
  // handle above row
1186
4.69k
  struct obmc_inter_pred_ctxt ctxt_above = { above, above_stride };
1187
4.69k
  foreach_overlappable_nb_above(cm, xd,
1188
4.69k
                                max_neighbor_obmc[mi_size_wide_log2[bsize]],
1189
4.69k
                                build_obmc_inter_pred_above, &ctxt_above);
1190
1191
  // handle left column
1192
4.69k
  struct obmc_inter_pred_ctxt ctxt_left = { left, left_stride };
1193
4.69k
  foreach_overlappable_nb_left(cm, xd,
1194
4.69k
                               max_neighbor_obmc[mi_size_high_log2[bsize]],
1195
4.69k
                               build_obmc_inter_pred_left, &ctxt_left);
1196
4.69k
}
1197
1198
void av1_setup_obmc_dst_bufs(MACROBLOCKD *xd, uint8_t **dst_buf1,
1199
4.69k
                             uint8_t **dst_buf2) {
1200
4.69k
  if (is_cur_buf_hbd(xd)) {
1201
3.38k
    int len = sizeof(uint16_t);
1202
3.38k
    dst_buf1[0] = CONVERT_TO_BYTEPTR(xd->tmp_obmc_bufs[0]);
1203
3.38k
    dst_buf1[1] =
1204
3.38k
        CONVERT_TO_BYTEPTR(xd->tmp_obmc_bufs[0] + MAX_SB_SQUARE * len);
1205
3.38k
    dst_buf1[2] =
1206
3.38k
        CONVERT_TO_BYTEPTR(xd->tmp_obmc_bufs[0] + MAX_SB_SQUARE * 2 * len);
1207
3.38k
    dst_buf2[0] = CONVERT_TO_BYTEPTR(xd->tmp_obmc_bufs[1]);
1208
3.38k
    dst_buf2[1] =
1209
3.38k
        CONVERT_TO_BYTEPTR(xd->tmp_obmc_bufs[1] + MAX_SB_SQUARE * len);
1210
3.38k
    dst_buf2[2] =
1211
3.38k
        CONVERT_TO_BYTEPTR(xd->tmp_obmc_bufs[1] + MAX_SB_SQUARE * 2 * len);
1212
3.38k
  } else {
1213
1.30k
    dst_buf1[0] = xd->tmp_obmc_bufs[0];
1214
1.30k
    dst_buf1[1] = xd->tmp_obmc_bufs[0] + MAX_SB_SQUARE;
1215
1.30k
    dst_buf1[2] = xd->tmp_obmc_bufs[0] + MAX_SB_SQUARE * 2;
1216
1.30k
    dst_buf2[0] = xd->tmp_obmc_bufs[1];
1217
1.30k
    dst_buf2[1] = xd->tmp_obmc_bufs[1] + MAX_SB_SQUARE;
1218
1.30k
    dst_buf2[2] = xd->tmp_obmc_bufs[1] + MAX_SB_SQUARE * 2;
1219
1.30k
  }
1220
4.69k
}
1221
1222
void av1_setup_build_prediction_by_above_pred(
1223
    MACROBLOCKD *xd, int rel_mi_col, uint8_t above_mi_width,
1224
    MB_MODE_INFO *above_mbmi, struct build_prediction_ctxt *ctxt,
1225
3.57k
    const int num_planes) {
1226
3.57k
  const BLOCK_SIZE a_bsize = AOMMAX(BLOCK_8X8, above_mbmi->bsize);
1227
3.57k
  const int above_mi_col = xd->mi_col + rel_mi_col;
1228
1229
3.57k
  av1_modify_neighbor_predictor_for_obmc(above_mbmi);
1230
1231
14.1k
  for (int j = 0; j < num_planes; ++j) {
1232
10.5k
    struct macroblockd_plane *const pd = &xd->plane[j];
1233
10.5k
    setup_pred_plane(&pd->dst, a_bsize, ctxt->tmp_buf[j], ctxt->tmp_width[j],
1234
10.5k
                     ctxt->tmp_height[j], ctxt->tmp_stride[j], 0, rel_mi_col,
1235
10.5k
                     NULL, pd->subsampling_x, pd->subsampling_y);
1236
10.5k
  }
1237
1238
3.57k
  const int num_refs = 1 + has_second_ref(above_mbmi);
1239
1240
7.14k
  for (int ref = 0; ref < num_refs; ++ref) {
1241
3.57k
    const MV_REFERENCE_FRAME frame = above_mbmi->ref_frame[ref];
1242
1243
3.57k
    const RefCntBuffer *const ref_buf = get_ref_frame_buf(ctxt->cm, frame);
1244
3.57k
    const struct scale_factors *const sf =
1245
3.57k
        get_ref_scale_factors_const(ctxt->cm, frame);
1246
3.57k
    xd->block_ref_scale_factors[ref] = sf;
1247
3.57k
    if ((!av1_is_valid_scale(sf)))
1248
0
      aom_internal_error(xd->error_info, AOM_CODEC_UNSUP_BITSTREAM,
1249
0
                         "Reference frame has invalid dimensions");
1250
3.57k
    av1_setup_pre_planes(xd, ref, &ref_buf->buf, xd->mi_row, above_mi_col, sf,
1251
3.57k
                         num_planes);
1252
3.57k
  }
1253
1254
3.57k
  xd->mb_to_left_edge = 8 * MI_SIZE * (-above_mi_col);
1255
3.57k
  xd->mb_to_right_edge =
1256
3.57k
      ctxt->mb_to_far_edge +
1257
3.57k
      (xd->width - rel_mi_col - above_mi_width) * MI_SIZE * 8;
1258
3.57k
}
1259
1260
void av1_setup_build_prediction_by_left_pred(MACROBLOCKD *xd, int rel_mi_row,
1261
                                             uint8_t left_mi_height,
1262
                                             MB_MODE_INFO *left_mbmi,
1263
                                             struct build_prediction_ctxt *ctxt,
1264
2.79k
                                             const int num_planes) {
1265
2.79k
  const BLOCK_SIZE l_bsize = AOMMAX(BLOCK_8X8, left_mbmi->bsize);
1266
2.79k
  const int left_mi_row = xd->mi_row + rel_mi_row;
1267
1268
2.79k
  av1_modify_neighbor_predictor_for_obmc(left_mbmi);
1269
1270
11.1k
  for (int j = 0; j < num_planes; ++j) {
1271
8.38k
    struct macroblockd_plane *const pd = &xd->plane[j];
1272
8.38k
    setup_pred_plane(&pd->dst, l_bsize, ctxt->tmp_buf[j], ctxt->tmp_width[j],
1273
8.38k
                     ctxt->tmp_height[j], ctxt->tmp_stride[j], rel_mi_row, 0,
1274
8.38k
                     NULL, pd->subsampling_x, pd->subsampling_y);
1275
8.38k
  }
1276
1277
2.79k
  const int num_refs = 1 + has_second_ref(left_mbmi);
1278
1279
5.59k
  for (int ref = 0; ref < num_refs; ++ref) {
1280
2.79k
    const MV_REFERENCE_FRAME frame = left_mbmi->ref_frame[ref];
1281
1282
2.79k
    const RefCntBuffer *const ref_buf = get_ref_frame_buf(ctxt->cm, frame);
1283
2.79k
    const struct scale_factors *const ref_scale_factors =
1284
2.79k
        get_ref_scale_factors_const(ctxt->cm, frame);
1285
1286
2.79k
    xd->block_ref_scale_factors[ref] = ref_scale_factors;
1287
2.79k
    if ((!av1_is_valid_scale(ref_scale_factors)))
1288
0
      aom_internal_error(xd->error_info, AOM_CODEC_UNSUP_BITSTREAM,
1289
0
                         "Reference frame has invalid dimensions");
1290
2.79k
    av1_setup_pre_planes(xd, ref, &ref_buf->buf, left_mi_row, xd->mi_col,
1291
2.79k
                         ref_scale_factors, num_planes);
1292
2.79k
  }
1293
1294
2.79k
  xd->mb_to_top_edge = GET_MV_SUBPEL(MI_SIZE * (-left_mi_row));
1295
2.79k
  xd->mb_to_bottom_edge =
1296
2.79k
      ctxt->mb_to_far_edge +
1297
2.79k
      GET_MV_SUBPEL((xd->height - rel_mi_row - left_mi_height) * MI_SIZE);
1298
2.79k
}
1299
1300
static AOM_INLINE void combine_interintra(
1301
    INTERINTRA_MODE mode, int8_t use_wedge_interintra, int8_t wedge_index,
1302
    int8_t wedge_sign, BLOCK_SIZE bsize, BLOCK_SIZE plane_bsize,
1303
    uint8_t *comppred, int compstride, const uint8_t *interpred,
1304
2.19k
    int interstride, const uint8_t *intrapred, int intrastride) {
1305
2.19k
  const int bw = block_size_wide[plane_bsize];
1306
2.19k
  const int bh = block_size_high[plane_bsize];
1307
1308
2.19k
  if (use_wedge_interintra) {
1309
775
    if (av1_is_wedge_used(bsize)) {
1310
775
      const uint8_t *mask =
1311
775
          av1_get_contiguous_soft_mask(wedge_index, wedge_sign, bsize);
1312
775
      const int subw = 2 * mi_size_wide[bsize] == bw;
1313
775
      const int subh = 2 * mi_size_high[bsize] == bh;
1314
775
      aom_blend_a64_mask(comppred, compstride, intrapred, intrastride,
1315
775
                         interpred, interstride, mask, block_size_wide[bsize],
1316
775
                         bw, bh, subw, subh);
1317
775
    }
1318
775
    return;
1319
775
  }
1320
1321
1.42k
  const uint8_t *mask = smooth_interintra_mask_buf[mode][plane_bsize];
1322
1.42k
  aom_blend_a64_mask(comppred, compstride, intrapred, intrastride, interpred,
1323
1.42k
                     interstride, mask, bw, bw, bh, 0, 0);
1324
1.42k
}
1325
1326
#if CONFIG_AV1_HIGHBITDEPTH
1327
static AOM_INLINE void combine_interintra_highbd(
1328
    INTERINTRA_MODE mode, int8_t use_wedge_interintra, int8_t wedge_index,
1329
    int8_t wedge_sign, BLOCK_SIZE bsize, BLOCK_SIZE plane_bsize,
1330
    uint8_t *comppred8, int compstride, const uint8_t *interpred8,
1331
4.02k
    int interstride, const uint8_t *intrapred8, int intrastride, int bd) {
1332
4.02k
  const int bw = block_size_wide[plane_bsize];
1333
4.02k
  const int bh = block_size_high[plane_bsize];
1334
1335
4.02k
  if (use_wedge_interintra) {
1336
1.76k
    if (av1_is_wedge_used(bsize)) {
1337
1.76k
      const uint8_t *mask =
1338
1.76k
          av1_get_contiguous_soft_mask(wedge_index, wedge_sign, bsize);
1339
1.76k
      const int subh = 2 * mi_size_high[bsize] == bh;
1340
1.76k
      const int subw = 2 * mi_size_wide[bsize] == bw;
1341
1.76k
      aom_highbd_blend_a64_mask(comppred8, compstride, intrapred8, intrastride,
1342
1.76k
                                interpred8, interstride, mask,
1343
1.76k
                                block_size_wide[bsize], bw, bh, subw, subh, bd);
1344
1.76k
    }
1345
1.76k
    return;
1346
1.76k
  }
1347
1348
2.26k
  uint8_t mask[MAX_SB_SQUARE];
1349
2.26k
  build_smooth_interintra_mask(mask, bw, plane_bsize, mode);
1350
2.26k
  aom_highbd_blend_a64_mask(comppred8, compstride, intrapred8, intrastride,
1351
2.26k
                            interpred8, interstride, mask, bw, bw, bh, 0, 0,
1352
2.26k
                            bd);
1353
2.26k
}
1354
#endif
1355
1356
void av1_build_intra_predictors_for_interintra(const AV1_COMMON *cm,
1357
                                               MACROBLOCKD *xd,
1358
                                               BLOCK_SIZE bsize, int plane,
1359
                                               const BUFFER_SET *ctx,
1360
6.21k
                                               uint8_t *dst, int dst_stride) {
1361
6.21k
  struct macroblockd_plane *const pd = &xd->plane[plane];
1362
6.21k
  const int ssx = xd->plane[plane].subsampling_x;
1363
6.21k
  const int ssy = xd->plane[plane].subsampling_y;
1364
6.21k
  BLOCK_SIZE plane_bsize = get_plane_block_size(bsize, ssx, ssy);
1365
6.21k
  PREDICTION_MODE mode = interintra_to_intra_mode[xd->mi[0]->interintra_mode];
1366
6.21k
  assert(xd->mi[0]->angle_delta[PLANE_TYPE_Y] == 0);
1367
6.21k
  assert(xd->mi[0]->angle_delta[PLANE_TYPE_UV] == 0);
1368
6.21k
  assert(xd->mi[0]->filter_intra_mode_info.use_filter_intra == 0);
1369
6.21k
  assert(xd->mi[0]->use_intrabc == 0);
1370
6.21k
  const SequenceHeader *seq_params = cm->seq_params;
1371
1372
6.21k
  av1_predict_intra_block(xd, seq_params->sb_size,
1373
6.21k
                          seq_params->enable_intra_edge_filter, pd->width,
1374
6.21k
                          pd->height, max_txsize_rect_lookup[plane_bsize], mode,
1375
6.21k
                          0, 0, FILTER_INTRA_MODES, ctx->plane[plane],
1376
6.21k
                          ctx->stride[plane], dst, dst_stride, 0, 0, plane);
1377
6.21k
}
1378
1379
void av1_combine_interintra(MACROBLOCKD *xd, BLOCK_SIZE bsize, int plane,
1380
                            const uint8_t *inter_pred, int inter_stride,
1381
6.21k
                            const uint8_t *intra_pred, int intra_stride) {
1382
6.21k
  const int ssx = xd->plane[plane].subsampling_x;
1383
6.21k
  const int ssy = xd->plane[plane].subsampling_y;
1384
6.21k
  const BLOCK_SIZE plane_bsize = get_plane_block_size(bsize, ssx, ssy);
1385
6.21k
#if CONFIG_AV1_HIGHBITDEPTH
1386
6.21k
  if (is_cur_buf_hbd(xd)) {
1387
4.02k
    combine_interintra_highbd(
1388
4.02k
        xd->mi[0]->interintra_mode, xd->mi[0]->use_wedge_interintra,
1389
4.02k
        xd->mi[0]->interintra_wedge_index, INTERINTRA_WEDGE_SIGN, bsize,
1390
4.02k
        plane_bsize, xd->plane[plane].dst.buf, xd->plane[plane].dst.stride,
1391
4.02k
        inter_pred, inter_stride, intra_pred, intra_stride, xd->bd);
1392
4.02k
    return;
1393
4.02k
  }
1394
2.19k
#endif
1395
2.19k
  combine_interintra(
1396
2.19k
      xd->mi[0]->interintra_mode, xd->mi[0]->use_wedge_interintra,
1397
2.19k
      xd->mi[0]->interintra_wedge_index, INTERINTRA_WEDGE_SIGN, bsize,
1398
2.19k
      plane_bsize, xd->plane[plane].dst.buf, xd->plane[plane].dst.stride,
1399
2.19k
      inter_pred, inter_stride, intra_pred, intra_stride);
1400
2.19k
}
1401
1402
// build interintra_predictors for one plane
1403
void av1_build_interintra_predictor(const AV1_COMMON *cm, MACROBLOCKD *xd,
1404
                                    uint8_t *pred, int stride,
1405
                                    const BUFFER_SET *ctx, int plane,
1406
6.21k
                                    BLOCK_SIZE bsize) {
1407
6.21k
  assert(bsize < BLOCK_SIZES_ALL);
1408
6.21k
  if (is_cur_buf_hbd(xd)) {
1409
4.02k
    DECLARE_ALIGNED(16, uint16_t, intrapredictor[MAX_SB_SQUARE]);
1410
4.02k
    av1_build_intra_predictors_for_interintra(
1411
4.02k
        cm, xd, bsize, plane, ctx, CONVERT_TO_BYTEPTR(intrapredictor),
1412
4.02k
        MAX_SB_SIZE);
1413
4.02k
    av1_combine_interintra(xd, bsize, plane, pred, stride,
1414
4.02k
                           CONVERT_TO_BYTEPTR(intrapredictor), MAX_SB_SIZE);
1415
4.02k
  } else {
1416
2.19k
    DECLARE_ALIGNED(16, uint8_t, intrapredictor[MAX_SB_SQUARE]);
1417
2.19k
    av1_build_intra_predictors_for_interintra(cm, xd, bsize, plane, ctx,
1418
2.19k
                                              intrapredictor, MAX_SB_SIZE);
1419
2.19k
    av1_combine_interintra(xd, bsize, plane, pred, stride, intrapredictor,
1420
2.19k
                           MAX_SB_SIZE);
1421
2.19k
  }
1422
6.21k
}