Coverage Report

Created: 2025-12-03 07:28

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/aom/av1/common/reconinter.c
Line
Count
Source
1
/*
2
 * Copyright (c) 2016, Alliance for Open Media. All rights reserved
3
 *
4
 * This source code is subject to the terms of the BSD 2 Clause License and
5
 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6
 * was not distributed with this source code in the LICENSE file, you can
7
 * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8
 * Media Patent License 1.0 was not distributed with this source code in the
9
 * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
10
 */
11
12
#include <assert.h>
13
#include <stdio.h>
14
#include <limits.h>
15
16
#include "config/aom_config.h"
17
#include "config/aom_dsp_rtcd.h"
18
#include "config/aom_scale_rtcd.h"
19
20
#include "aom/aom_integer.h"
21
#include "aom_dsp/blend.h"
22
23
#include "av1/common/av1_common_int.h"
24
#include "av1/common/blockd.h"
25
#include "av1/common/mvref_common.h"
26
#include "av1/common/obmc.h"
27
#include "av1/common/reconinter.h"
28
#include "av1/common/reconintra.h"
29
30
// This function will determine whether or not to create a warped
31
// prediction.
32
int av1_allow_warp(const MB_MODE_INFO *const mbmi,
33
                   const WarpTypesAllowed *const warp_types,
34
                   const WarpedMotionParams *const gm_params,
35
                   int build_for_obmc, const struct scale_factors *const sf,
36
140k
                   WarpedMotionParams *final_warp_params) {
37
  // Note: As per the spec, we must test the fixed point scales here, which are
38
  // at a higher precision (1 << 14) than the xs and ys in subpel_params (that
39
  // have 1 << 10 precision).
40
140k
  if (av1_is_scaled(sf)) return 0;
41
42
134k
  if (final_warp_params != NULL) *final_warp_params = default_warp_params;
43
44
134k
  if (build_for_obmc) return 0;
45
46
134k
  if (warp_types->local_warp_allowed && !mbmi->wm_params.invalid) {
47
1.25k
    if (final_warp_params != NULL)
48
1.25k
      memcpy(final_warp_params, &mbmi->wm_params, sizeof(*final_warp_params));
49
1.25k
    return 1;
50
132k
  } else if (warp_types->global_warp_allowed && !gm_params->invalid) {
51
12.0k
    if (final_warp_params != NULL)
52
12.0k
      memcpy(final_warp_params, gm_params, sizeof(*final_warp_params));
53
12.0k
    return 1;
54
12.0k
  }
55
56
120k
  return 0;
57
134k
}
58
59
void av1_init_inter_params(InterPredParams *inter_pred_params, int block_width,
60
                           int block_height, int pix_row, int pix_col,
61
                           int subsampling_x, int subsampling_y, int bit_depth,
62
                           int use_hbd_buf, int is_intrabc,
63
                           const struct scale_factors *sf,
64
                           const struct buf_2d *ref_buf,
65
410k
                           int_interpfilters interp_filters) {
66
410k
  inter_pred_params->block_width = block_width;
67
410k
  inter_pred_params->block_height = block_height;
68
410k
  inter_pred_params->pix_row = pix_row;
69
410k
  inter_pred_params->pix_col = pix_col;
70
410k
  inter_pred_params->subsampling_x = subsampling_x;
71
410k
  inter_pred_params->subsampling_y = subsampling_y;
72
410k
  inter_pred_params->bit_depth = bit_depth;
73
410k
  inter_pred_params->use_hbd_buf = use_hbd_buf;
74
410k
  inter_pred_params->is_intrabc = is_intrabc;
75
410k
  inter_pred_params->scale_factors = sf;
76
410k
  inter_pred_params->ref_frame_buf = *ref_buf;
77
410k
  inter_pred_params->mode = TRANSLATION_PRED;
78
410k
  inter_pred_params->comp_mode = UNIFORM_SINGLE;
79
80
410k
  if (is_intrabc) {
81
242k
    inter_pred_params->interp_filter_params[0] = &av1_intrabc_filter_params;
82
242k
    inter_pred_params->interp_filter_params[1] = &av1_intrabc_filter_params;
83
242k
  } else {
84
168k
    inter_pred_params->interp_filter_params[0] =
85
168k
        av1_get_interp_filter_params_with_block_size(
86
168k
            interp_filters.as_filters.x_filter, block_width);
87
168k
    inter_pred_params->interp_filter_params[1] =
88
168k
        av1_get_interp_filter_params_with_block_size(
89
168k
            interp_filters.as_filters.y_filter, block_height);
90
168k
  }
91
410k
}
92
93
68.8k
void av1_init_comp_mode(InterPredParams *inter_pred_params) {
94
68.8k
  inter_pred_params->comp_mode = UNIFORM_COMP;
95
68.8k
}
96
97
void av1_init_warp_params(InterPredParams *inter_pred_params,
98
                          const WarpTypesAllowed *warp_types, int ref,
99
388k
                          const MACROBLOCKD *xd, const MB_MODE_INFO *mi) {
100
388k
  if (inter_pred_params->block_height < 8 || inter_pred_params->block_width < 8)
101
200k
    return;
102
103
188k
  if (xd->cur_frame_force_integer_mv) return;
104
105
140k
  if (av1_allow_warp(mi, warp_types, &xd->global_motion[mi->ref_frame[ref]], 0,
106
140k
                     inter_pred_params->scale_factors,
107
140k
                     &inter_pred_params->warp_params)) {
108
#if CONFIG_REALTIME_ONLY
109
    aom_internal_error(xd->error_info, AOM_CODEC_UNSUP_FEATURE,
110
                       "Warped motion is disabled in realtime only build.");
111
#endif
112
13.2k
    inter_pred_params->mode = WARP_PRED;
113
13.2k
  }
114
140k
}
115
116
void av1_make_inter_predictor(const uint8_t *src, int src_stride, uint8_t *dst,
117
                              int dst_stride,
118
                              InterPredParams *inter_pred_params,
119
410k
                              const SubpelParams *subpel_params) {
120
410k
  assert(IMPLIES(inter_pred_params->conv_params.is_compound,
121
410k
                 inter_pred_params->conv_params.dst != NULL));
122
123
410k
  if (inter_pred_params->mode == TRANSLATION_PRED) {
124
397k
#if CONFIG_AV1_HIGHBITDEPTH
125
397k
    if (inter_pred_params->use_hbd_buf) {
126
281k
      highbd_inter_predictor(src, src_stride, dst, dst_stride, subpel_params,
127
281k
                             inter_pred_params->block_width,
128
281k
                             inter_pred_params->block_height,
129
281k
                             &inter_pred_params->conv_params,
130
281k
                             inter_pred_params->interp_filter_params,
131
281k
                             inter_pred_params->bit_depth);
132
281k
    } else {
133
115k
      inter_predictor(src, src_stride, dst, dst_stride, subpel_params,
134
115k
                      inter_pred_params->block_width,
135
115k
                      inter_pred_params->block_height,
136
115k
                      &inter_pred_params->conv_params,
137
115k
                      inter_pred_params->interp_filter_params);
138
115k
    }
139
#else
140
    inter_predictor(src, src_stride, dst, dst_stride, subpel_params,
141
                    inter_pred_params->block_width,
142
                    inter_pred_params->block_height,
143
                    &inter_pred_params->conv_params,
144
                    inter_pred_params->interp_filter_params);
145
#endif
146
397k
  }
147
13.2k
#if !CONFIG_REALTIME_ONLY
148
  // TODO(jingning): av1_warp_plane() can be further cleaned up.
149
13.2k
  else if (inter_pred_params->mode == WARP_PRED) {
150
13.2k
    av1_warp_plane(
151
13.2k
        &inter_pred_params->warp_params, inter_pred_params->use_hbd_buf,
152
13.2k
        inter_pred_params->bit_depth, inter_pred_params->ref_frame_buf.buf0,
153
13.2k
        inter_pred_params->ref_frame_buf.width,
154
13.2k
        inter_pred_params->ref_frame_buf.height,
155
13.2k
        inter_pred_params->ref_frame_buf.stride, dst,
156
13.2k
        inter_pred_params->pix_col, inter_pred_params->pix_row,
157
13.2k
        inter_pred_params->block_width, inter_pred_params->block_height,
158
13.2k
        dst_stride, inter_pred_params->subsampling_x,
159
13.2k
        inter_pred_params->subsampling_y, &inter_pred_params->conv_params);
160
13.2k
  }
161
0
#endif
162
0
  else {
163
0
    assert(0 && "Unsupported inter_pred_params->mode");
164
0
  }
165
410k
}
166
167
static const uint8_t wedge_master_oblique_odd[MASK_MASTER_SIZE] = {
168
  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
169
  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  1,  2,  6,  18,
170
  37, 53, 60, 63, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
171
  64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
172
};
173
static const uint8_t wedge_master_oblique_even[MASK_MASTER_SIZE] = {
174
  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
175
  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  1,  4,  11, 27,
176
  46, 58, 62, 63, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
177
  64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
178
};
179
static const uint8_t wedge_master_vertical[MASK_MASTER_SIZE] = {
180
  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
181
  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  2,  7,  21,
182
  43, 57, 62, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
183
  64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
184
};
185
186
static AOM_INLINE void shift_copy(const uint8_t *src, uint8_t *dst, int shift,
187
320
                                  int width) {
188
320
  if (shift >= 0) {
189
165
    memcpy(dst + shift, src, width - shift);
190
165
    memset(dst, src[0], shift);
191
165
  } else {
192
155
    shift = -shift;
193
155
    memcpy(dst, src + shift, width - shift);
194
155
    memset(dst + width - shift, src[width - 1], shift);
195
155
  }
196
320
}
197
198
/* clang-format off */
199
DECLARE_ALIGNED(16, static uint8_t,
200
                wedge_signflip_lookup[BLOCK_SIZES_ALL][MAX_WEDGE_TYPES]) = {
201
  { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, },  // not used
202
  { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, },  // not used
203
  { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, },  // not used
204
  { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, },
205
  { 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, },
206
  { 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, },
207
  { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, },
208
  { 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, },
209
  { 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, },
210
  { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, },
211
  { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, },  // not used
212
  { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, },  // not used
213
  { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, },  // not used
214
  { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, },  // not used
215
  { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, },  // not used
216
  { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, },  // not used
217
  { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, },  // not used
218
  { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, },  // not used
219
  { 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 0, 1, 1, 1, 0, 1, },
220
  { 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 0, 1, 0, 1, },
221
  { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, },  // not used
222
  { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, },  // not used
223
};
224
/* clang-format on */
225
226
// [negative][direction]
227
DECLARE_ALIGNED(
228
    16, static uint8_t,
229
    wedge_mask_obl[2][WEDGE_DIRECTIONS][MASK_MASTER_SIZE * MASK_MASTER_SIZE]);
230
231
// 4 * MAX_WEDGE_SQUARE is an easy to compute and fairly tight upper bound
232
// on the sum of all mask sizes up to an including MAX_WEDGE_SQUARE.
233
DECLARE_ALIGNED(16, static uint8_t,
234
                wedge_mask_buf[2 * MAX_WEDGE_TYPES * 4 * MAX_WEDGE_SQUARE]);
235
236
DECLARE_ALIGNED(16, static uint8_t,
237
                smooth_interintra_mask_buf[INTERINTRA_MODES][BLOCK_SIZES_ALL]
238
                                          [MAX_WEDGE_SQUARE]);
239
240
static wedge_masks_type wedge_masks[BLOCK_SIZES_ALL][2];
241
242
static const wedge_code_type wedge_codebook_16_hgtw[16] = {
243
  { WEDGE_OBLIQUE27, 4, 4 },  { WEDGE_OBLIQUE63, 4, 4 },
244
  { WEDGE_OBLIQUE117, 4, 4 }, { WEDGE_OBLIQUE153, 4, 4 },
245
  { WEDGE_HORIZONTAL, 4, 2 }, { WEDGE_HORIZONTAL, 4, 4 },
246
  { WEDGE_HORIZONTAL, 4, 6 }, { WEDGE_VERTICAL, 4, 4 },
247
  { WEDGE_OBLIQUE27, 4, 2 },  { WEDGE_OBLIQUE27, 4, 6 },
248
  { WEDGE_OBLIQUE153, 4, 2 }, { WEDGE_OBLIQUE153, 4, 6 },
249
  { WEDGE_OBLIQUE63, 2, 4 },  { WEDGE_OBLIQUE63, 6, 4 },
250
  { WEDGE_OBLIQUE117, 2, 4 }, { WEDGE_OBLIQUE117, 6, 4 },
251
};
252
253
static const wedge_code_type wedge_codebook_16_hltw[16] = {
254
  { WEDGE_OBLIQUE27, 4, 4 },  { WEDGE_OBLIQUE63, 4, 4 },
255
  { WEDGE_OBLIQUE117, 4, 4 }, { WEDGE_OBLIQUE153, 4, 4 },
256
  { WEDGE_VERTICAL, 2, 4 },   { WEDGE_VERTICAL, 4, 4 },
257
  { WEDGE_VERTICAL, 6, 4 },   { WEDGE_HORIZONTAL, 4, 4 },
258
  { WEDGE_OBLIQUE27, 4, 2 },  { WEDGE_OBLIQUE27, 4, 6 },
259
  { WEDGE_OBLIQUE153, 4, 2 }, { WEDGE_OBLIQUE153, 4, 6 },
260
  { WEDGE_OBLIQUE63, 2, 4 },  { WEDGE_OBLIQUE63, 6, 4 },
261
  { WEDGE_OBLIQUE117, 2, 4 }, { WEDGE_OBLIQUE117, 6, 4 },
262
};
263
264
static const wedge_code_type wedge_codebook_16_heqw[16] = {
265
  { WEDGE_OBLIQUE27, 4, 4 },  { WEDGE_OBLIQUE63, 4, 4 },
266
  { WEDGE_OBLIQUE117, 4, 4 }, { WEDGE_OBLIQUE153, 4, 4 },
267
  { WEDGE_HORIZONTAL, 4, 2 }, { WEDGE_HORIZONTAL, 4, 6 },
268
  { WEDGE_VERTICAL, 2, 4 },   { WEDGE_VERTICAL, 6, 4 },
269
  { WEDGE_OBLIQUE27, 4, 2 },  { WEDGE_OBLIQUE27, 4, 6 },
270
  { WEDGE_OBLIQUE153, 4, 2 }, { WEDGE_OBLIQUE153, 4, 6 },
271
  { WEDGE_OBLIQUE63, 2, 4 },  { WEDGE_OBLIQUE63, 6, 4 },
272
  { WEDGE_OBLIQUE117, 2, 4 }, { WEDGE_OBLIQUE117, 6, 4 },
273
};
274
275
const wedge_params_type av1_wedge_params_lookup[BLOCK_SIZES_ALL] = {
276
  { 0, NULL, NULL, NULL },
277
  { 0, NULL, NULL, NULL },
278
  { 0, NULL, NULL, NULL },
279
  { MAX_WEDGE_TYPES, wedge_codebook_16_heqw, wedge_signflip_lookup[BLOCK_8X8],
280
    wedge_masks[BLOCK_8X8] },
281
  { MAX_WEDGE_TYPES, wedge_codebook_16_hgtw, wedge_signflip_lookup[BLOCK_8X16],
282
    wedge_masks[BLOCK_8X16] },
283
  { MAX_WEDGE_TYPES, wedge_codebook_16_hltw, wedge_signflip_lookup[BLOCK_16X8],
284
    wedge_masks[BLOCK_16X8] },
285
  { MAX_WEDGE_TYPES, wedge_codebook_16_heqw, wedge_signflip_lookup[BLOCK_16X16],
286
    wedge_masks[BLOCK_16X16] },
287
  { MAX_WEDGE_TYPES, wedge_codebook_16_hgtw, wedge_signflip_lookup[BLOCK_16X32],
288
    wedge_masks[BLOCK_16X32] },
289
  { MAX_WEDGE_TYPES, wedge_codebook_16_hltw, wedge_signflip_lookup[BLOCK_32X16],
290
    wedge_masks[BLOCK_32X16] },
291
  { MAX_WEDGE_TYPES, wedge_codebook_16_heqw, wedge_signflip_lookup[BLOCK_32X32],
292
    wedge_masks[BLOCK_32X32] },
293
  { 0, NULL, NULL, NULL },
294
  { 0, NULL, NULL, NULL },
295
  { 0, NULL, NULL, NULL },
296
  { 0, NULL, NULL, NULL },
297
  { 0, NULL, NULL, NULL },
298
  { 0, NULL, NULL, NULL },
299
  { 0, NULL, NULL, NULL },
300
  { 0, NULL, NULL, NULL },
301
  { MAX_WEDGE_TYPES, wedge_codebook_16_hgtw, wedge_signflip_lookup[BLOCK_8X32],
302
    wedge_masks[BLOCK_8X32] },
303
  { MAX_WEDGE_TYPES, wedge_codebook_16_hltw, wedge_signflip_lookup[BLOCK_32X8],
304
    wedge_masks[BLOCK_32X8] },
305
  { 0, NULL, NULL, NULL },
306
  { 0, NULL, NULL, NULL },
307
};
308
309
static const uint8_t *get_wedge_mask_inplace(int wedge_index, int neg,
310
1.44k
                                             BLOCK_SIZE sb_type) {
311
1.44k
  const uint8_t *master;
312
1.44k
  const int bh = block_size_high[sb_type];
313
1.44k
  const int bw = block_size_wide[sb_type];
314
1.44k
  const wedge_code_type *a =
315
1.44k
      av1_wedge_params_lookup[sb_type].codebook + wedge_index;
316
1.44k
  int woff, hoff;
317
1.44k
  const uint8_t wsignflip =
318
1.44k
      av1_wedge_params_lookup[sb_type].signflip[wedge_index];
319
320
1.44k
  assert(wedge_index >= 0 && wedge_index < get_wedge_types_lookup(sb_type));
321
1.44k
  woff = (a->x_offset * bw) >> 3;
322
1.44k
  hoff = (a->y_offset * bh) >> 3;
323
1.44k
  master = wedge_mask_obl[neg ^ wsignflip][a->direction] +
324
1.44k
           MASK_MASTER_STRIDE * (MASK_MASTER_SIZE / 2 - hoff) +
325
1.44k
           MASK_MASTER_SIZE / 2 - woff;
326
1.44k
  return master;
327
1.44k
}
328
329
const uint8_t *av1_get_compound_type_mask(
330
7.04k
    const INTERINTER_COMPOUND_DATA *const comp_data, BLOCK_SIZE sb_type) {
331
7.04k
  (void)sb_type;
332
7.04k
  switch (comp_data->type) {
333
4.82k
    case COMPOUND_WEDGE:
334
4.82k
      return av1_get_contiguous_soft_mask(comp_data->wedge_index,
335
4.82k
                                          comp_data->wedge_sign, sb_type);
336
2.22k
    default: return comp_data->seg_mask;
337
7.04k
  }
338
7.04k
}
339
340
static AOM_INLINE void diffwtd_mask_d16(
341
    uint8_t *mask, int which_inverse, int mask_base, const CONV_BUF_TYPE *src0,
342
    int src0_stride, const CONV_BUF_TYPE *src1, int src1_stride, int h, int w,
343
752
    ConvolveParams *conv_params, int bd) {
344
752
  int round =
345
752
      2 * FILTER_BITS - conv_params->round_0 - conv_params->round_1 + (bd - 8);
346
752
  int i, j, m, diff;
347
7.88k
  for (i = 0; i < h; ++i) {
348
113k
    for (j = 0; j < w; ++j) {
349
106k
      diff = abs(src0[i * src0_stride + j] - src1[i * src1_stride + j]);
350
106k
      diff = ROUND_POWER_OF_TWO(diff, round);
351
106k
      m = clamp(mask_base + (diff / DIFF_FACTOR), 0, AOM_BLEND_A64_MAX_ALPHA);
352
106k
      mask[i * w + j] = which_inverse ? AOM_BLEND_A64_MAX_ALPHA - m : m;
353
106k
    }
354
7.13k
  }
355
752
}
356
357
void av1_build_compound_diffwtd_mask_d16_c(
358
    uint8_t *mask, DIFFWTD_MASK_TYPE mask_type, const CONV_BUF_TYPE *src0,
359
    int src0_stride, const CONV_BUF_TYPE *src1, int src1_stride, int h, int w,
360
752
    ConvolveParams *conv_params, int bd) {
361
752
  switch (mask_type) {
362
453
    case DIFFWTD_38:
363
453
      diffwtd_mask_d16(mask, 0, 38, src0, src0_stride, src1, src1_stride, h, w,
364
453
                       conv_params, bd);
365
453
      break;
366
299
    case DIFFWTD_38_INV:
367
299
      diffwtd_mask_d16(mask, 1, 38, src0, src0_stride, src1, src1_stride, h, w,
368
299
                       conv_params, bd);
369
299
      break;
370
0
    default: assert(0);
371
752
  }
372
752
}
373
374
static AOM_INLINE void diffwtd_mask(uint8_t *mask, int which_inverse,
375
                                    int mask_base, const uint8_t *src0,
376
                                    int src0_stride, const uint8_t *src1,
377
0
                                    int src1_stride, int h, int w) {
378
0
  int i, j, m, diff;
379
0
  for (i = 0; i < h; ++i) {
380
0
    for (j = 0; j < w; ++j) {
381
0
      diff =
382
0
          abs((int)src0[i * src0_stride + j] - (int)src1[i * src1_stride + j]);
383
0
      m = clamp(mask_base + (diff / DIFF_FACTOR), 0, AOM_BLEND_A64_MAX_ALPHA);
384
0
      mask[i * w + j] = which_inverse ? AOM_BLEND_A64_MAX_ALPHA - m : m;
385
0
    }
386
0
  }
387
0
}
388
389
void av1_build_compound_diffwtd_mask_c(uint8_t *mask,
390
                                       DIFFWTD_MASK_TYPE mask_type,
391
                                       const uint8_t *src0, int src0_stride,
392
                                       const uint8_t *src1, int src1_stride,
393
0
                                       int h, int w) {
394
0
  switch (mask_type) {
395
0
    case DIFFWTD_38:
396
0
      diffwtd_mask(mask, 0, 38, src0, src0_stride, src1, src1_stride, h, w);
397
0
      break;
398
0
    case DIFFWTD_38_INV:
399
0
      diffwtd_mask(mask, 1, 38, src0, src0_stride, src1, src1_stride, h, w);
400
0
      break;
401
0
    default: assert(0);
402
0
  }
403
0
}
404
405
static AOM_FORCE_INLINE void diffwtd_mask_highbd(
406
    uint8_t *mask, int which_inverse, int mask_base, const uint16_t *src0,
407
    int src0_stride, const uint16_t *src1, int src1_stride, int h, int w,
408
0
    const unsigned int bd) {
409
0
  assert(bd >= 8);
410
0
  if (bd == 8) {
411
0
    if (which_inverse) {
412
0
      for (int i = 0; i < h; ++i) {
413
0
        for (int j = 0; j < w; ++j) {
414
0
          int diff = abs((int)src0[j] - (int)src1[j]) / DIFF_FACTOR;
415
0
          unsigned int m = negative_to_zero(mask_base + diff);
416
0
          m = AOMMIN(m, AOM_BLEND_A64_MAX_ALPHA);
417
0
          mask[j] = AOM_BLEND_A64_MAX_ALPHA - m;
418
0
        }
419
0
        src0 += src0_stride;
420
0
        src1 += src1_stride;
421
0
        mask += w;
422
0
      }
423
0
    } else {
424
0
      for (int i = 0; i < h; ++i) {
425
0
        for (int j = 0; j < w; ++j) {
426
0
          int diff = abs((int)src0[j] - (int)src1[j]) / DIFF_FACTOR;
427
0
          unsigned int m = negative_to_zero(mask_base + diff);
428
0
          m = AOMMIN(m, AOM_BLEND_A64_MAX_ALPHA);
429
0
          mask[j] = m;
430
0
        }
431
0
        src0 += src0_stride;
432
0
        src1 += src1_stride;
433
0
        mask += w;
434
0
      }
435
0
    }
436
0
  } else {
437
0
    const unsigned int bd_shift = bd - 8;
438
0
    if (which_inverse) {
439
0
      for (int i = 0; i < h; ++i) {
440
0
        for (int j = 0; j < w; ++j) {
441
0
          int diff =
442
0
              (abs((int)src0[j] - (int)src1[j]) >> bd_shift) / DIFF_FACTOR;
443
0
          unsigned int m = negative_to_zero(mask_base + diff);
444
0
          m = AOMMIN(m, AOM_BLEND_A64_MAX_ALPHA);
445
0
          mask[j] = AOM_BLEND_A64_MAX_ALPHA - m;
446
0
        }
447
0
        src0 += src0_stride;
448
0
        src1 += src1_stride;
449
0
        mask += w;
450
0
      }
451
0
    } else {
452
0
      for (int i = 0; i < h; ++i) {
453
0
        for (int j = 0; j < w; ++j) {
454
0
          int diff =
455
0
              (abs((int)src0[j] - (int)src1[j]) >> bd_shift) / DIFF_FACTOR;
456
0
          unsigned int m = negative_to_zero(mask_base + diff);
457
0
          m = AOMMIN(m, AOM_BLEND_A64_MAX_ALPHA);
458
0
          mask[j] = m;
459
0
        }
460
0
        src0 += src0_stride;
461
0
        src1 += src1_stride;
462
0
        mask += w;
463
0
      }
464
0
    }
465
0
  }
466
0
}
467
468
void av1_build_compound_diffwtd_mask_highbd_c(
469
    uint8_t *mask, DIFFWTD_MASK_TYPE mask_type, const uint8_t *src0,
470
    int src0_stride, const uint8_t *src1, int src1_stride, int h, int w,
471
0
    int bd) {
472
0
  switch (mask_type) {
473
0
    case DIFFWTD_38:
474
0
      diffwtd_mask_highbd(mask, 0, 38, CONVERT_TO_SHORTPTR(src0), src0_stride,
475
0
                          CONVERT_TO_SHORTPTR(src1), src1_stride, h, w, bd);
476
0
      break;
477
0
    case DIFFWTD_38_INV:
478
0
      diffwtd_mask_highbd(mask, 1, 38, CONVERT_TO_SHORTPTR(src0), src0_stride,
479
0
                          CONVERT_TO_SHORTPTR(src1), src1_stride, h, w, bd);
480
0
      break;
481
0
    default: assert(0);
482
0
  }
483
0
}
484
485
5
static AOM_INLINE void init_wedge_master_masks() {
486
5
  int i, j;
487
5
  const int w = MASK_MASTER_SIZE;
488
5
  const int h = MASK_MASTER_SIZE;
489
5
  const int stride = MASK_MASTER_STRIDE;
490
  // Note: index [0] stores the masters, and [1] its complement.
491
  // Generate prototype by shifting the masters
492
5
  int shift = h / 4;
493
165
  for (i = 0; i < h; i += 2) {
494
160
    shift_copy(wedge_master_oblique_even,
495
160
               &wedge_mask_obl[0][WEDGE_OBLIQUE63][i * stride], shift,
496
160
               MASK_MASTER_SIZE);
497
160
    shift--;
498
160
    shift_copy(wedge_master_oblique_odd,
499
160
               &wedge_mask_obl[0][WEDGE_OBLIQUE63][(i + 1) * stride], shift,
500
160
               MASK_MASTER_SIZE);
501
160
    memcpy(&wedge_mask_obl[0][WEDGE_VERTICAL][i * stride],
502
160
           wedge_master_vertical,
503
160
           MASK_MASTER_SIZE * sizeof(wedge_master_vertical[0]));
504
160
    memcpy(&wedge_mask_obl[0][WEDGE_VERTICAL][(i + 1) * stride],
505
160
           wedge_master_vertical,
506
160
           MASK_MASTER_SIZE * sizeof(wedge_master_vertical[0]));
507
160
  }
508
509
325
  for (i = 0; i < h; ++i) {
510
20.8k
    for (j = 0; j < w; ++j) {
511
20.4k
      const int msk = wedge_mask_obl[0][WEDGE_OBLIQUE63][i * stride + j];
512
20.4k
      wedge_mask_obl[0][WEDGE_OBLIQUE27][j * stride + i] = msk;
513
20.4k
      wedge_mask_obl[0][WEDGE_OBLIQUE117][i * stride + w - 1 - j] =
514
20.4k
          wedge_mask_obl[0][WEDGE_OBLIQUE153][(w - 1 - j) * stride + i] =
515
20.4k
              (1 << WEDGE_WEIGHT_BITS) - msk;
516
20.4k
      wedge_mask_obl[1][WEDGE_OBLIQUE63][i * stride + j] =
517
20.4k
          wedge_mask_obl[1][WEDGE_OBLIQUE27][j * stride + i] =
518
20.4k
              (1 << WEDGE_WEIGHT_BITS) - msk;
519
20.4k
      wedge_mask_obl[1][WEDGE_OBLIQUE117][i * stride + w - 1 - j] =
520
20.4k
          wedge_mask_obl[1][WEDGE_OBLIQUE153][(w - 1 - j) * stride + i] = msk;
521
20.4k
      const int mskx = wedge_mask_obl[0][WEDGE_VERTICAL][i * stride + j];
522
20.4k
      wedge_mask_obl[0][WEDGE_HORIZONTAL][j * stride + i] = mskx;
523
20.4k
      wedge_mask_obl[1][WEDGE_VERTICAL][i * stride + j] =
524
20.4k
          wedge_mask_obl[1][WEDGE_HORIZONTAL][j * stride + i] =
525
20.4k
              (1 << WEDGE_WEIGHT_BITS) - mskx;
526
20.4k
    }
527
320
  }
528
5
}
529
530
5
static AOM_INLINE void init_wedge_masks() {
531
5
  uint8_t *dst = wedge_mask_buf;
532
5
  BLOCK_SIZE bsize;
533
5
  memset(wedge_masks, 0, sizeof(wedge_masks));
534
115
  for (bsize = BLOCK_4X4; bsize < BLOCK_SIZES_ALL; ++bsize) {
535
110
    const wedge_params_type *wedge_params = &av1_wedge_params_lookup[bsize];
536
110
    const int wtypes = wedge_params->wedge_types;
537
110
    if (wtypes == 0) continue;
538
45
    const uint8_t *mask;
539
45
    const int bw = block_size_wide[bsize];
540
45
    const int bh = block_size_high[bsize];
541
45
    int w;
542
765
    for (w = 0; w < wtypes; ++w) {
543
720
      mask = get_wedge_mask_inplace(w, 0, bsize);
544
720
      aom_convolve_copy(mask, MASK_MASTER_STRIDE, dst, bw /* dst_stride */, bw,
545
720
                        bh);
546
720
      wedge_params->masks[0][w] = dst;
547
720
      dst += bw * bh;
548
549
720
      mask = get_wedge_mask_inplace(w, 1, bsize);
550
720
      aom_convolve_copy(mask, MASK_MASTER_STRIDE, dst, bw /* dst_stride */, bw,
551
720
                        bh);
552
720
      wedge_params->masks[1][w] = dst;
553
720
      dst += bw * bh;
554
720
    }
555
45
    assert(sizeof(wedge_mask_buf) >= (size_t)(dst - wedge_mask_buf));
556
45
  }
557
5
}
558
559
/* clang-format off */
560
static const uint8_t ii_weights1d[MAX_SB_SIZE] = {
561
  60, 58, 56, 54, 52, 50, 48, 47, 45, 44, 42, 41, 39, 38, 37, 35, 34, 33, 32,
562
  31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 22, 21, 20, 19, 19, 18, 18, 17, 16,
563
  16, 15, 15, 14, 14, 13, 13, 12, 12, 12, 11, 11, 10, 10, 10,  9,  9,  9,  8,
564
  8,  8,  8,  7,  7,  7,  7,  6,  6,  6,  6,  6,  5,  5,  5,  5,  5,  4,  4,
565
  4,  4,  4,  4,  4,  4,  3,  3,  3,  3,  3,  3,  3,  3,  3,  2,  2,  2,  2,
566
  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  1,  1,  1,  1,  1,  1,  1,  1,
567
  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1
568
};
569
static uint8_t ii_size_scales[BLOCK_SIZES_ALL] = {
570
    32, 16, 16, 16, 8, 8, 8, 4,
571
    4,  4,  2,  2,  2, 1, 1, 1,
572
    8,  8,  4,  4,  2, 2
573
};
574
/* clang-format on */
575
576
static AOM_INLINE void build_smooth_interintra_mask(uint8_t *mask, int stride,
577
                                                    BLOCK_SIZE plane_bsize,
578
2.55k
                                                    INTERINTRA_MODE mode) {
579
2.55k
  int i, j;
580
2.55k
  const int bw = block_size_wide[plane_bsize];
581
2.55k
  const int bh = block_size_high[plane_bsize];
582
2.55k
  const int size_scale = ii_size_scales[plane_bsize];
583
584
2.55k
  switch (mode) {
585
1.29k
    case II_V_PRED:
586
12.0k
      for (i = 0; i < bh; ++i) {
587
10.7k
        memset(mask, ii_weights1d[i * size_scale], bw * sizeof(mask[0]));
588
10.7k
        mask += stride;
589
10.7k
      }
590
1.29k
      break;
591
592
348
    case II_H_PRED:
593
7.00k
      for (i = 0; i < bh; ++i) {
594
105k
        for (j = 0; j < bw; ++j) mask[j] = ii_weights1d[j * size_scale];
595
6.65k
        mask += stride;
596
6.65k
      }
597
348
      break;
598
599
805
    case II_SMOOTH_PRED:
600
7.88k
      for (i = 0; i < bh; ++i) {
601
59.8k
        for (j = 0; j < bw; ++j)
602
52.8k
          mask[j] = ii_weights1d[(i < j ? i : j) * size_scale];
603
7.07k
        mask += stride;
604
7.07k
      }
605
805
      break;
606
607
107
    case II_DC_PRED:
608
107
    default:
609
1.55k
      for (i = 0; i < bh; ++i) {
610
1.45k
        memset(mask, 32, bw * sizeof(mask[0]));
611
1.45k
        mask += stride;
612
1.45k
      }
613
107
      break;
614
2.55k
  }
615
2.55k
}
616
617
5
static AOM_INLINE void init_smooth_interintra_masks() {
618
25
  for (int m = 0; m < INTERINTRA_MODES; ++m) {
619
460
    for (int bs = 0; bs < BLOCK_SIZES_ALL; ++bs) {
620
440
      const int bw = block_size_wide[bs];
621
440
      const int bh = block_size_high[bs];
622
440
      if (bw > MAX_WEDGE_SIZE || bh > MAX_WEDGE_SIZE) continue;
623
280
      build_smooth_interintra_mask(smooth_interintra_mask_buf[m][bs], bw, bs,
624
280
                                   m);
625
280
    }
626
20
  }
627
5
}
628
629
// Equation of line: f(x, y) = a[0]*(x - a[2]*w/8) + a[1]*(y - a[3]*h/8) = 0
630
5
void av1_init_wedge_masks() {
631
5
  init_wedge_master_masks();
632
5
  init_wedge_masks();
633
5
  init_smooth_interintra_masks();
634
5
}
635
636
static AOM_INLINE void build_masked_compound_no_round(
637
    uint8_t *dst, int dst_stride, const CONV_BUF_TYPE *src0, int src0_stride,
638
    const CONV_BUF_TYPE *src1, int src1_stride,
639
    const INTERINTER_COMPOUND_DATA *const comp_data, BLOCK_SIZE sb_type, int h,
640
7.04k
    int w, InterPredParams *inter_pred_params) {
641
7.04k
  const int ssy = inter_pred_params->subsampling_y;
642
7.04k
  const int ssx = inter_pred_params->subsampling_x;
643
7.04k
  const uint8_t *mask = av1_get_compound_type_mask(comp_data, sb_type);
644
7.04k
  const int mask_stride = block_size_wide[sb_type];
645
7.04k
#if CONFIG_AV1_HIGHBITDEPTH
646
7.04k
  if (inter_pred_params->use_hbd_buf) {
647
3.90k
    aom_highbd_blend_a64_d16_mask(dst, dst_stride, src0, src0_stride, src1,
648
3.90k
                                  src1_stride, mask, mask_stride, w, h, ssx,
649
3.90k
                                  ssy, &inter_pred_params->conv_params,
650
3.90k
                                  inter_pred_params->bit_depth);
651
3.90k
  } else {
652
3.14k
    aom_lowbd_blend_a64_d16_mask(dst, dst_stride, src0, src0_stride, src1,
653
3.14k
                                 src1_stride, mask, mask_stride, w, h, ssx, ssy,
654
3.14k
                                 &inter_pred_params->conv_params);
655
3.14k
  }
656
#else
657
  aom_lowbd_blend_a64_d16_mask(dst, dst_stride, src0, src0_stride, src1,
658
                               src1_stride, mask, mask_stride, w, h, ssx, ssy,
659
                               &inter_pred_params->conv_params);
660
#endif
661
7.04k
}
662
663
static void make_masked_inter_predictor(const uint8_t *pre, int pre_stride,
664
                                        uint8_t *dst, int dst_stride,
665
                                        InterPredParams *inter_pred_params,
666
7.04k
                                        const SubpelParams *subpel_params) {
667
7.04k
  const INTERINTER_COMPOUND_DATA *comp_data = &inter_pred_params->mask_comp;
668
7.04k
  BLOCK_SIZE sb_type = inter_pred_params->sb_type;
669
670
  // We're going to call av1_make_inter_predictor to generate a prediction into
671
  // a temporary buffer, then will blend that temporary buffer with that from
672
  // the other reference.
673
7.04k
  DECLARE_ALIGNED(32, uint8_t, tmp_buf[2 * MAX_SB_SQUARE]);
674
7.04k
  uint8_t *tmp_dst =
675
7.04k
      inter_pred_params->use_hbd_buf ? CONVERT_TO_BYTEPTR(tmp_buf) : tmp_buf;
676
677
7.04k
  const int tmp_buf_stride = MAX_SB_SIZE;
678
7.04k
  CONV_BUF_TYPE *org_dst = inter_pred_params->conv_params.dst;
679
7.04k
  int org_dst_stride = inter_pred_params->conv_params.dst_stride;
680
7.04k
  CONV_BUF_TYPE *tmp_buf16 = (CONV_BUF_TYPE *)tmp_buf;
681
7.04k
  inter_pred_params->conv_params.dst = tmp_buf16;
682
7.04k
  inter_pred_params->conv_params.dst_stride = tmp_buf_stride;
683
7.04k
  assert(inter_pred_params->conv_params.do_average == 0);
684
685
  // This will generate a prediction in tmp_buf for the second reference
686
7.04k
  av1_make_inter_predictor(pre, pre_stride, tmp_dst, MAX_SB_SIZE,
687
7.04k
                           inter_pred_params, subpel_params);
688
689
7.04k
  if (!inter_pred_params->conv_params.plane &&
690
2.37k
      comp_data->type == COMPOUND_DIFFWTD) {
691
752
    av1_build_compound_diffwtd_mask_d16(
692
752
        comp_data->seg_mask, comp_data->mask_type, org_dst, org_dst_stride,
693
752
        tmp_buf16, tmp_buf_stride, inter_pred_params->block_height,
694
752
        inter_pred_params->block_width, &inter_pred_params->conv_params,
695
752
        inter_pred_params->bit_depth);
696
752
  }
697
7.04k
  build_masked_compound_no_round(
698
7.04k
      dst, dst_stride, org_dst, org_dst_stride, tmp_buf16, tmp_buf_stride,
699
7.04k
      comp_data, sb_type, inter_pred_params->block_height,
700
7.04k
      inter_pred_params->block_width, inter_pred_params);
701
7.04k
}
702
703
void av1_build_one_inter_predictor(
704
    uint8_t *dst, int dst_stride, const MV *const src_mv,
705
    InterPredParams *inter_pred_params, MACROBLOCKD *xd, int mi_x, int mi_y,
706
410k
    int ref, uint8_t **mc_buf, CalcSubpelParamsFunc calc_subpel_params_func) {
707
410k
  SubpelParams subpel_params;
708
410k
  uint8_t *src;
709
410k
  int src_stride;
710
410k
  calc_subpel_params_func(src_mv, inter_pred_params, xd, mi_x, mi_y, ref,
711
410k
                          mc_buf, &src, &subpel_params, &src_stride);
712
713
410k
  if (inter_pred_params->comp_mode == UNIFORM_SINGLE ||
714
403k
      inter_pred_params->comp_mode == UNIFORM_COMP) {
715
403k
    av1_make_inter_predictor(src, src_stride, dst, dst_stride,
716
403k
                             inter_pred_params, &subpel_params);
717
403k
  } else {
718
7.04k
    make_masked_inter_predictor(src, src_stride, dst, dst_stride,
719
7.04k
                                inter_pred_params, &subpel_params);
720
7.04k
  }
721
410k
}
722
723
void av1_dist_wtd_comp_weight_assign(const AV1_COMMON *cm,
724
                                     const MB_MODE_INFO *mbmi, int *fwd_offset,
725
                                     int *bck_offset,
726
                                     int *use_dist_wtd_comp_avg,
727
403k
                                     int is_compound) {
728
403k
  assert(fwd_offset != NULL && bck_offset != NULL);
729
403k
  if (!is_compound || mbmi->compound_idx) {
730
389k
    *fwd_offset = 8;
731
389k
    *bck_offset = 8;
732
389k
    *use_dist_wtd_comp_avg = 0;
733
389k
    return;
734
389k
  }
735
736
13.7k
  *use_dist_wtd_comp_avg = 1;
737
13.7k
  const RefCntBuffer *const bck_buf = get_ref_frame_buf(cm, mbmi->ref_frame[0]);
738
13.7k
  const RefCntBuffer *const fwd_buf = get_ref_frame_buf(cm, mbmi->ref_frame[1]);
739
13.7k
  const int cur_frame_index = cm->cur_frame->order_hint;
740
13.7k
  int bck_frame_index = 0, fwd_frame_index = 0;
741
742
13.7k
  if (bck_buf != NULL) bck_frame_index = bck_buf->order_hint;
743
13.7k
  if (fwd_buf != NULL) fwd_frame_index = fwd_buf->order_hint;
744
745
13.7k
  int d0 = clamp(abs(get_relative_dist(&cm->seq_params->order_hint_info,
746
13.7k
                                       fwd_frame_index, cur_frame_index)),
747
13.7k
                 0, MAX_FRAME_DISTANCE);
748
13.7k
  int d1 = clamp(abs(get_relative_dist(&cm->seq_params->order_hint_info,
749
13.7k
                                       cur_frame_index, bck_frame_index)),
750
13.7k
                 0, MAX_FRAME_DISTANCE);
751
752
13.7k
  const int order = d0 <= d1;
753
754
13.7k
  if (d0 == 0 || d1 == 0) {
755
872
    *fwd_offset = quant_dist_lookup_table[3][order];
756
872
    *bck_offset = quant_dist_lookup_table[3][1 - order];
757
872
    return;
758
872
  }
759
760
12.8k
  int i;
761
19.6k
  for (i = 0; i < 3; ++i) {
762
17.9k
    int c0 = quant_dist_weight[i][order];
763
17.9k
    int c1 = quant_dist_weight[i][!order];
764
17.9k
    int d0_c0 = d0 * c0;
765
17.9k
    int d1_c1 = d1 * c1;
766
17.9k
    if ((d0 > d1 && d0_c0 < d1_c1) || (d0 <= d1 && d0_c0 > d1_c1)) break;
767
17.9k
  }
768
769
12.8k
  *fwd_offset = quant_dist_lookup_table[i][order];
770
12.8k
  *bck_offset = quant_dist_lookup_table[i][1 - order];
771
12.8k
}
772
773
// True if the following hold:
774
//  1. Not intrabc and not build_for_obmc
775
//  2. At least one dimension is size 4 with subsampling
776
//  3. If sub-sampled, none of the previous blocks around the sub-sample
777
//     are intrabc or inter-blocks
778
static bool is_sub8x8_inter(const MACROBLOCKD *xd, int plane, BLOCK_SIZE bsize,
779
372k
                            int is_intrabc, int build_for_obmc) {
780
372k
  if (is_intrabc || build_for_obmc) {
781
257k
    return false;
782
257k
  }
783
784
114k
  const struct macroblockd_plane *const pd = &xd->plane[plane];
785
114k
  const int ss_x = pd->subsampling_x;
786
114k
  const int ss_y = pd->subsampling_y;
787
114k
  const int is_sub4_x = (block_size_wide[bsize] == 4) && ss_x;
788
114k
  const int is_sub4_y = (block_size_high[bsize] == 4) && ss_y;
789
114k
  if (!is_sub4_x && !is_sub4_y) {
790
111k
    return false;
791
111k
  }
792
793
  // For sub8x8 chroma blocks, we may be covering more than one luma block's
794
  // worth of pixels. Thus (mi_x, mi_y) may not be the correct coordinates for
795
  // the top-left corner of the prediction source - the correct top-left corner
796
  // is at (pre_x, pre_y).
797
3.22k
  const int row_start = is_sub4_y ? -1 : 0;
798
3.22k
  const int col_start = is_sub4_x ? -1 : 0;
799
800
6.67k
  for (int row = row_start; row <= 0; ++row) {
801
10.2k
    for (int col = col_start; col <= 0; ++col) {
802
6.83k
      const MB_MODE_INFO *this_mbmi = xd->mi[row * xd->mi_stride + col];
803
6.83k
      if (!is_inter_block(this_mbmi)) return false;
804
6.74k
      if (is_intrabc_block(this_mbmi)) return false;
805
6.74k
    }
806
3.53k
  }
807
3.14k
  return true;
808
3.22k
}
809
810
static void build_inter_predictors_sub8x8(
811
    const AV1_COMMON *cm, MACROBLOCKD *xd, int plane, const MB_MODE_INFO *mi,
812
    int mi_x, int mi_y, uint8_t **mc_buf,
813
3.14k
    CalcSubpelParamsFunc calc_subpel_params_func) {
814
3.14k
  const BLOCK_SIZE bsize = mi->bsize;
815
3.14k
  struct macroblockd_plane *const pd = &xd->plane[plane];
816
3.14k
  const bool ss_x = pd->subsampling_x;
817
3.14k
  const bool ss_y = pd->subsampling_y;
818
3.14k
  const int b4_w = block_size_wide[bsize] >> ss_x;
819
3.14k
  const int b4_h = block_size_high[bsize] >> ss_y;
820
3.14k
  const BLOCK_SIZE plane_bsize = get_plane_block_size(bsize, ss_x, ss_y);
821
3.14k
  const int b8_w = block_size_wide[plane_bsize];
822
3.14k
  const int b8_h = block_size_high[plane_bsize];
823
3.14k
  const int is_compound = has_second_ref(mi);
824
3.14k
  assert(!is_compound);
825
3.14k
  assert(!is_intrabc_block(mi));
826
827
  // For sub8x8 chroma blocks, we may be covering more than one luma block's
828
  // worth of pixels. Thus (mi_x, mi_y) may not be the correct coordinates for
829
  // the top-left corner of the prediction source - the correct top-left corner
830
  // is at (pre_x, pre_y).
831
3.14k
  const int row_start = (block_size_high[bsize] == 4) && ss_y ? -1 : 0;
832
3.14k
  const int col_start = (block_size_wide[bsize] == 4) && ss_x ? -1 : 0;
833
3.14k
  const int pre_x = (mi_x + MI_SIZE * col_start) >> ss_x;
834
3.14k
  const int pre_y = (mi_y + MI_SIZE * row_start) >> ss_y;
835
836
3.14k
  int row = row_start;
837
6.57k
  for (int y = 0; y < b8_h; y += b4_h) {
838
3.43k
    int col = col_start;
839
10.1k
    for (int x = 0; x < b8_w; x += b4_w) {
840
6.72k
      MB_MODE_INFO *this_mbmi = xd->mi[row * xd->mi_stride + col];
841
6.72k
      struct buf_2d *const dst_buf = &pd->dst;
842
6.72k
      uint8_t *dst = dst_buf->buf + dst_buf->stride * y + x;
843
6.72k
      int ref = 0;
844
6.72k
      const RefCntBuffer *ref_buf =
845
6.72k
          get_ref_frame_buf(cm, this_mbmi->ref_frame[ref]);
846
6.72k
      const struct scale_factors *ref_scale_factors =
847
6.72k
          get_ref_scale_factors_const(cm, this_mbmi->ref_frame[ref]);
848
6.72k
      const struct scale_factors *const sf = ref_scale_factors;
849
6.72k
      const struct buf_2d pre_buf = {
850
6.72k
        NULL,
851
6.72k
        (plane == 1) ? ref_buf->buf.u_buffer : ref_buf->buf.v_buffer,
852
6.72k
        ref_buf->buf.uv_crop_width,
853
6.72k
        ref_buf->buf.uv_crop_height,
854
6.72k
        ref_buf->buf.uv_stride,
855
6.72k
      };
856
857
6.72k
      const MV mv = this_mbmi->mv[ref].as_mv;
858
859
6.72k
      InterPredParams inter_pred_params;
860
6.72k
      av1_init_inter_params(&inter_pred_params, b4_w, b4_h, pre_y + y,
861
6.72k
                            pre_x + x, pd->subsampling_x, pd->subsampling_y,
862
6.72k
                            xd->bd, is_cur_buf_hbd(xd), mi->use_intrabc, sf,
863
6.72k
                            &pre_buf, this_mbmi->interp_filters);
864
6.72k
      inter_pred_params.conv_params =
865
6.72k
          get_conv_params_no_round(ref, plane, NULL, 0, is_compound, xd->bd);
866
867
6.72k
      av1_build_one_inter_predictor(dst, dst_buf->stride, &mv,
868
6.72k
                                    &inter_pred_params, xd, mi_x + x, mi_y + y,
869
6.72k
                                    ref, mc_buf, calc_subpel_params_func);
870
871
6.72k
      ++col;
872
6.72k
    }
873
3.43k
    ++row;
874
3.43k
  }
875
3.14k
}
876
877
static void build_inter_predictors_8x8_and_bigger(
878
    const AV1_COMMON *cm, MACROBLOCKD *xd, int plane, const MB_MODE_INFO *mi,
879
    int build_for_obmc, int bw, int bh, int mi_x, int mi_y, uint8_t **mc_buf,
880
369k
    CalcSubpelParamsFunc calc_subpel_params_func) {
881
369k
  const int is_compound = has_second_ref(mi);
882
369k
  const int is_intrabc = is_intrabc_block(mi);
883
369k
  assert(IMPLIES(is_intrabc, !is_compound));
884
369k
  struct macroblockd_plane *const pd = &xd->plane[plane];
885
369k
  struct buf_2d *const dst_buf = &pd->dst;
886
369k
  uint8_t *const dst = dst_buf->buf;
887
888
369k
  int is_global[2] = { 0, 0 };
889
773k
  for (int ref = 0; ref < 1 + is_compound; ++ref) {
890
403k
    const WarpedMotionParams *const wm = &xd->global_motion[mi->ref_frame[ref]];
891
403k
    is_global[ref] = is_global_mv_block(mi, wm->wmtype);
892
403k
  }
893
894
369k
  const BLOCK_SIZE bsize = mi->bsize;
895
369k
  const int ss_x = pd->subsampling_x;
896
369k
  const int ss_y = pd->subsampling_y;
897
369k
  const int row_start =
898
369k
      (block_size_high[bsize] == 4) && ss_y && !build_for_obmc ? -1 : 0;
899
369k
  const int col_start =
900
369k
      (block_size_wide[bsize] == 4) && ss_x && !build_for_obmc ? -1 : 0;
901
369k
  const int pre_x = (mi_x + MI_SIZE * col_start) >> ss_x;
902
369k
  const int pre_y = (mi_y + MI_SIZE * row_start) >> ss_y;
903
904
773k
  for (int ref = 0; ref < 1 + is_compound; ++ref) {
905
403k
    const struct scale_factors *const sf =
906
403k
        is_intrabc ? &cm->sf_identity : xd->block_ref_scale_factors[ref];
907
403k
    struct buf_2d *const pre_buf = is_intrabc ? dst_buf : &pd->pre[ref];
908
403k
    const MV mv = mi->mv[ref].as_mv;
909
403k
    const WarpTypesAllowed warp_types = { is_global[ref],
910
403k
                                          mi->motion_mode == WARPED_CAUSAL };
911
912
403k
    InterPredParams inter_pred_params;
913
403k
    av1_init_inter_params(&inter_pred_params, bw, bh, pre_y, pre_x,
914
403k
                          pd->subsampling_x, pd->subsampling_y, xd->bd,
915
403k
                          is_cur_buf_hbd(xd), mi->use_intrabc, sf, pre_buf,
916
403k
                          mi->interp_filters);
917
403k
    if (is_compound) av1_init_comp_mode(&inter_pred_params);
918
403k
    inter_pred_params.conv_params = get_conv_params_no_round(
919
403k
        ref, plane, xd->tmp_conv_dst, MAX_SB_SIZE, is_compound, xd->bd);
920
921
403k
    av1_dist_wtd_comp_weight_assign(
922
403k
        cm, mi, &inter_pred_params.conv_params.fwd_offset,
923
403k
        &inter_pred_params.conv_params.bck_offset,
924
403k
        &inter_pred_params.conv_params.use_dist_wtd_comp_avg, is_compound);
925
926
403k
    if (!build_for_obmc)
927
388k
      av1_init_warp_params(&inter_pred_params, &warp_types, ref, xd, mi);
928
929
403k
    if (is_masked_compound_type(mi->interinter_comp.type)) {
930
14.0k
      inter_pred_params.sb_type = mi->bsize;
931
14.0k
      inter_pred_params.mask_comp = mi->interinter_comp;
932
14.0k
      if (ref == 1) {
933
7.04k
        inter_pred_params.conv_params.do_average = 0;
934
7.04k
        inter_pred_params.comp_mode = MASK_COMP;
935
7.04k
      }
936
      // Assign physical buffer.
937
14.0k
      inter_pred_params.mask_comp.seg_mask = xd->seg_mask;
938
14.0k
    }
939
940
403k
    av1_build_one_inter_predictor(dst, dst_buf->stride, &mv, &inter_pred_params,
941
403k
                                  xd, mi_x, mi_y, ref, mc_buf,
942
403k
                                  calc_subpel_params_func);
943
403k
  }
944
369k
}
945
946
void av1_build_inter_predictors(const AV1_COMMON *cm, MACROBLOCKD *xd,
947
                                int plane, const MB_MODE_INFO *mi,
948
                                int build_for_obmc, int bw, int bh, int mi_x,
949
                                int mi_y, uint8_t **mc_buf,
950
372k
                                CalcSubpelParamsFunc calc_subpel_params_func) {
951
372k
  if (is_sub8x8_inter(xd, plane, mi->bsize, is_intrabc_block(mi),
952
372k
                      build_for_obmc)) {
953
3.14k
    assert(bw < 8 || bh < 8);
954
3.14k
    build_inter_predictors_sub8x8(cm, xd, plane, mi, mi_x, mi_y, mc_buf,
955
3.14k
                                  calc_subpel_params_func);
956
369k
  } else {
957
369k
    build_inter_predictors_8x8_and_bigger(cm, xd, plane, mi, build_for_obmc, bw,
958
369k
                                          bh, mi_x, mi_y, mc_buf,
959
369k
                                          calc_subpel_params_func);
960
369k
  }
961
372k
}
962
void av1_setup_dst_planes(struct macroblockd_plane *planes, BLOCK_SIZE bsize,
963
                          const YV12_BUFFER_CONFIG *src, int mi_row, int mi_col,
964
10.0M
                          const int plane_start, const int plane_end) {
965
  // We use AOMMIN(num_planes, MAX_MB_PLANE) instead of num_planes to quiet
966
  // the static analysis warnings.
967
39.3M
  for (int i = plane_start; i < AOMMIN(plane_end, MAX_MB_PLANE); ++i) {
968
29.3M
    struct macroblockd_plane *const pd = &planes[i];
969
29.3M
    const int is_uv = i > 0;
970
29.3M
    setup_pred_plane(&pd->dst, bsize, src->buffers[i], src->crop_widths[is_uv],
971
29.3M
                     src->crop_heights[is_uv], src->strides[is_uv], mi_row,
972
29.3M
                     mi_col, NULL, pd->subsampling_x, pd->subsampling_y);
973
29.3M
  }
974
10.0M
}
975
976
void av1_setup_pre_planes(MACROBLOCKD *xd, int idx,
977
                          const YV12_BUFFER_CONFIG *src, int mi_row, int mi_col,
978
                          const struct scale_factors *sf,
979
62.8k
                          const int num_planes) {
980
62.8k
  if (src != NULL) {
981
    // We use AOMMIN(num_planes, MAX_MB_PLANE) instead of num_planes to quiet
982
    // the static analysis warnings.
983
233k
    for (int i = 0; i < AOMMIN(num_planes, MAX_MB_PLANE); ++i) {
984
170k
      struct macroblockd_plane *const pd = &xd->plane[i];
985
170k
      const int is_uv = i > 0;
986
170k
      setup_pred_plane(&pd->pre[idx], xd->mi[0]->bsize, src->buffers[i],
987
170k
                       src->crop_widths[is_uv], src->crop_heights[is_uv],
988
170k
                       src->strides[is_uv], mi_row, mi_col, sf,
989
170k
                       pd->subsampling_x, pd->subsampling_y);
990
170k
    }
991
62.8k
  }
992
62.8k
}
993
994
// obmc_mask_N[overlap_position]
995
static const uint8_t obmc_mask_1[1] = { 64 };
996
DECLARE_ALIGNED(2, static const uint8_t, obmc_mask_2[2]) = { 45, 64 };
997
998
DECLARE_ALIGNED(4, static const uint8_t, obmc_mask_4[4]) = { 39, 50, 59, 64 };
999
1000
static const uint8_t obmc_mask_8[8] = { 36, 42, 48, 53, 57, 61, 64, 64 };
1001
1002
static const uint8_t obmc_mask_16[16] = { 34, 37, 40, 43, 46, 49, 52, 54,
1003
                                          56, 58, 60, 61, 64, 64, 64, 64 };
1004
1005
static const uint8_t obmc_mask_32[32] = { 33, 35, 36, 38, 40, 41, 43, 44,
1006
                                          45, 47, 48, 50, 51, 52, 53, 55,
1007
                                          56, 57, 58, 59, 60, 60, 61, 62,
1008
                                          64, 64, 64, 64, 64, 64, 64, 64 };
1009
1010
static const uint8_t obmc_mask_64[64] = {
1011
  33, 34, 35, 35, 36, 37, 38, 39, 40, 40, 41, 42, 43, 44, 44, 44,
1012
  45, 46, 47, 47, 48, 49, 50, 51, 51, 51, 52, 52, 53, 54, 55, 56,
1013
  56, 56, 57, 57, 58, 58, 59, 60, 60, 60, 60, 60, 61, 62, 62, 62,
1014
  62, 62, 63, 63, 63, 63, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
1015
};
1016
1017
15.3k
const uint8_t *av1_get_obmc_mask(int length) {
1018
15.3k
  switch (length) {
1019
0
    case 1: return obmc_mask_1;
1020
1.77k
    case 2: return obmc_mask_2;
1021
11.7k
    case 4: return obmc_mask_4;
1022
1.85k
    case 8: return obmc_mask_8;
1023
0
    case 16: return obmc_mask_16;
1024
0
    case 32: return obmc_mask_32;
1025
0
    case 64: return obmc_mask_64;
1026
0
    default: assert(0); return NULL;
1027
15.3k
  }
1028
15.3k
}
1029
1030
static INLINE void increment_int_ptr(MACROBLOCKD *xd, int rel_mi_row,
1031
                                     int rel_mi_col, uint8_t op_mi_size,
1032
                                     int dir, MB_MODE_INFO *mi, void *fun_ctxt,
1033
20.0k
                                     const int num_planes) {
1034
20.0k
  (void)xd;
1035
20.0k
  (void)rel_mi_row;
1036
20.0k
  (void)rel_mi_col;
1037
20.0k
  (void)op_mi_size;
1038
20.0k
  (void)dir;
1039
20.0k
  (void)mi;
1040
20.0k
  ++*(int *)fun_ctxt;
1041
20.0k
  (void)num_planes;
1042
20.0k
}
1043
1044
44.3k
void av1_count_overlappable_neighbors(const AV1_COMMON *cm, MACROBLOCKD *xd) {
1045
44.3k
  MB_MODE_INFO *mbmi = xd->mi[0];
1046
1047
44.3k
  mbmi->overlappable_neighbors = 0;
1048
1049
44.3k
  if (!is_motion_variation_allowed_bsize(mbmi->bsize)) return;
1050
1051
27.4k
  foreach_overlappable_nb_above(cm, xd, INT_MAX, increment_int_ptr,
1052
27.4k
                                &mbmi->overlappable_neighbors);
1053
27.4k
  if (mbmi->overlappable_neighbors) return;
1054
14.8k
  foreach_overlappable_nb_left(cm, xd, INT_MAX, increment_int_ptr,
1055
14.8k
                               &mbmi->overlappable_neighbors);
1056
14.8k
}
1057
1058
// HW does not support < 4x4 prediction. To limit the bandwidth requirement, if
1059
// block-size of current plane is smaller than 8x8, always only blend with the
1060
// left neighbor(s) (skip blending with the above side).
1061
#define DISABLE_CHROMA_U8X8_OBMC 0  // 0: one-sided obmc; 1: disable
1062
1063
int av1_skip_u4x4_pred_in_obmc(BLOCK_SIZE bsize,
1064
34.6k
                               const struct macroblockd_plane *pd, int dir) {
1065
34.6k
  assert(is_motion_variation_allowed_bsize(bsize));
1066
1067
34.6k
  const BLOCK_SIZE bsize_plane =
1068
34.6k
      get_plane_block_size(bsize, pd->subsampling_x, pd->subsampling_y);
1069
34.6k
  switch (bsize_plane) {
1070
#if DISABLE_CHROMA_U8X8_OBMC
1071
    case BLOCK_4X4:
1072
    case BLOCK_8X4:
1073
    case BLOCK_4X8: return 1; break;
1074
#else
1075
0
    case BLOCK_4X4:
1076
0
    case BLOCK_8X4:
1077
7.37k
    case BLOCK_4X8: return dir == 0; break;
1078
0
#endif
1079
27.2k
    default: return 0;
1080
34.6k
  }
1081
34.6k
}
1082
1083
5.76k
void av1_modify_neighbor_predictor_for_obmc(MB_MODE_INFO *mbmi) {
1084
5.76k
  mbmi->ref_frame[1] = NONE_FRAME;
1085
5.76k
  mbmi->interinter_comp.type = COMPOUND_AVERAGE;
1086
1087
5.76k
  return;
1088
5.76k
}
1089
1090
struct obmc_inter_pred_ctxt {
1091
  uint8_t **adjacent;
1092
  int *adjacent_stride;
1093
};
1094
1095
static INLINE void build_obmc_inter_pred_above(
1096
    MACROBLOCKD *xd, int rel_mi_row, int rel_mi_col, uint8_t op_mi_size,
1097
3.19k
    int dir, MB_MODE_INFO *above_mi, void *fun_ctxt, const int num_planes) {
1098
3.19k
  (void)above_mi;
1099
3.19k
  (void)rel_mi_row;
1100
3.19k
  (void)dir;
1101
3.19k
  struct obmc_inter_pred_ctxt *ctxt = (struct obmc_inter_pred_ctxt *)fun_ctxt;
1102
3.19k
  const BLOCK_SIZE bsize = xd->mi[0]->bsize;
1103
3.19k
  const int overlap =
1104
3.19k
      AOMMIN(block_size_high[bsize], block_size_high[BLOCK_64X64]) >> 1;
1105
1106
12.7k
  for (int plane = 0; plane < num_planes; ++plane) {
1107
9.57k
    const struct macroblockd_plane *pd = &xd->plane[plane];
1108
9.57k
    const int bw = (op_mi_size * MI_SIZE) >> pd->subsampling_x;
1109
9.57k
    const int bh = overlap >> pd->subsampling_y;
1110
9.57k
    const int plane_col = (rel_mi_col * MI_SIZE) >> pd->subsampling_x;
1111
1112
9.57k
    if (av1_skip_u4x4_pred_in_obmc(bsize, pd, 0)) continue;
1113
1114
7.65k
    const int dst_stride = pd->dst.stride;
1115
7.65k
    uint8_t *const dst = &pd->dst.buf[plane_col];
1116
7.65k
    const int tmp_stride = ctxt->adjacent_stride[plane];
1117
7.65k
    const uint8_t *const tmp = &ctxt->adjacent[plane][plane_col];
1118
7.65k
    const uint8_t *const mask = av1_get_obmc_mask(bh);
1119
7.65k
#if CONFIG_AV1_HIGHBITDEPTH
1120
7.65k
    const int is_hbd = is_cur_buf_hbd(xd);
1121
7.65k
    if (is_hbd)
1122
6.36k
      aom_highbd_blend_a64_vmask(dst, dst_stride, dst, dst_stride, tmp,
1123
6.36k
                                 tmp_stride, mask, bw, bh, xd->bd);
1124
1.29k
    else
1125
1.29k
      aom_blend_a64_vmask(dst, dst_stride, dst, dst_stride, tmp, tmp_stride,
1126
1.29k
                          mask, bw, bh);
1127
#else
1128
    aom_blend_a64_vmask(dst, dst_stride, dst, dst_stride, tmp, tmp_stride, mask,
1129
                        bw, bh);
1130
#endif
1131
7.65k
  }
1132
3.19k
}
1133
1134
static INLINE void build_obmc_inter_pred_left(
1135
    MACROBLOCKD *xd, int rel_mi_row, int rel_mi_col, uint8_t op_mi_size,
1136
2.57k
    int dir, MB_MODE_INFO *left_mi, void *fun_ctxt, const int num_planes) {
1137
2.57k
  (void)left_mi;
1138
2.57k
  (void)rel_mi_col;
1139
2.57k
  (void)dir;
1140
2.57k
  struct obmc_inter_pred_ctxt *ctxt = (struct obmc_inter_pred_ctxt *)fun_ctxt;
1141
2.57k
  const BLOCK_SIZE bsize = xd->mi[0]->bsize;
1142
2.57k
  const int overlap =
1143
2.57k
      AOMMIN(block_size_wide[bsize], block_size_wide[BLOCK_64X64]) >> 1;
1144
1145
10.3k
  for (int plane = 0; plane < num_planes; ++plane) {
1146
7.73k
    const struct macroblockd_plane *pd = &xd->plane[plane];
1147
7.73k
    const int bw = overlap >> pd->subsampling_x;
1148
7.73k
    const int bh = (op_mi_size * MI_SIZE) >> pd->subsampling_y;
1149
7.73k
    const int plane_row = (rel_mi_row * MI_SIZE) >> pd->subsampling_y;
1150
1151
7.73k
    if (av1_skip_u4x4_pred_in_obmc(bsize, pd, 1)) continue;
1152
1153
7.73k
    const int dst_stride = pd->dst.stride;
1154
7.73k
    uint8_t *const dst = &pd->dst.buf[plane_row * dst_stride];
1155
7.73k
    const int tmp_stride = ctxt->adjacent_stride[plane];
1156
7.73k
    const uint8_t *const tmp = &ctxt->adjacent[plane][plane_row * tmp_stride];
1157
7.73k
    const uint8_t *const mask = av1_get_obmc_mask(bw);
1158
1159
7.73k
#if CONFIG_AV1_HIGHBITDEPTH
1160
7.73k
    const int is_hbd = is_cur_buf_hbd(xd);
1161
7.73k
    if (is_hbd)
1162
5.44k
      aom_highbd_blend_a64_hmask(dst, dst_stride, dst, dst_stride, tmp,
1163
5.44k
                                 tmp_stride, mask, bw, bh, xd->bd);
1164
2.28k
    else
1165
2.28k
      aom_blend_a64_hmask(dst, dst_stride, dst, dst_stride, tmp, tmp_stride,
1166
2.28k
                          mask, bw, bh);
1167
#else
1168
    aom_blend_a64_hmask(dst, dst_stride, dst, dst_stride, tmp, tmp_stride, mask,
1169
                        bw, bh);
1170
#endif
1171
7.73k
  }
1172
2.57k
}
1173
1174
// This function combines motion compensated predictions that are generated by
1175
// top/left neighboring blocks' inter predictors with the regular inter
1176
// prediction. We assume the original prediction (bmc) is stored in
1177
// xd->plane[].dst.buf
1178
void av1_build_obmc_inter_prediction(const AV1_COMMON *cm, MACROBLOCKD *xd,
1179
                                     uint8_t *above[MAX_MB_PLANE],
1180
                                     int above_stride[MAX_MB_PLANE],
1181
                                     uint8_t *left[MAX_MB_PLANE],
1182
4.30k
                                     int left_stride[MAX_MB_PLANE]) {
1183
4.30k
  const BLOCK_SIZE bsize = xd->mi[0]->bsize;
1184
1185
  // handle above row
1186
4.30k
  struct obmc_inter_pred_ctxt ctxt_above = { above, above_stride };
1187
4.30k
  foreach_overlappable_nb_above(cm, xd,
1188
4.30k
                                max_neighbor_obmc[mi_size_wide_log2[bsize]],
1189
4.30k
                                build_obmc_inter_pred_above, &ctxt_above);
1190
1191
  // handle left column
1192
4.30k
  struct obmc_inter_pred_ctxt ctxt_left = { left, left_stride };
1193
4.30k
  foreach_overlappable_nb_left(cm, xd,
1194
4.30k
                               max_neighbor_obmc[mi_size_high_log2[bsize]],
1195
4.30k
                               build_obmc_inter_pred_left, &ctxt_left);
1196
4.30k
}
1197
1198
void av1_setup_obmc_dst_bufs(MACROBLOCKD *xd, uint8_t **dst_buf1,
1199
4.30k
                             uint8_t **dst_buf2) {
1200
4.30k
  if (is_cur_buf_hbd(xd)) {
1201
3.23k
    int len = sizeof(uint16_t);
1202
3.23k
    dst_buf1[0] = CONVERT_TO_BYTEPTR(xd->tmp_obmc_bufs[0]);
1203
3.23k
    dst_buf1[1] =
1204
3.23k
        CONVERT_TO_BYTEPTR(xd->tmp_obmc_bufs[0] + MAX_SB_SQUARE * len);
1205
3.23k
    dst_buf1[2] =
1206
3.23k
        CONVERT_TO_BYTEPTR(xd->tmp_obmc_bufs[0] + MAX_SB_SQUARE * 2 * len);
1207
3.23k
    dst_buf2[0] = CONVERT_TO_BYTEPTR(xd->tmp_obmc_bufs[1]);
1208
3.23k
    dst_buf2[1] =
1209
3.23k
        CONVERT_TO_BYTEPTR(xd->tmp_obmc_bufs[1] + MAX_SB_SQUARE * len);
1210
3.23k
    dst_buf2[2] =
1211
3.23k
        CONVERT_TO_BYTEPTR(xd->tmp_obmc_bufs[1] + MAX_SB_SQUARE * 2 * len);
1212
3.23k
  } else {
1213
1.07k
    dst_buf1[0] = xd->tmp_obmc_bufs[0];
1214
1.07k
    dst_buf1[1] = xd->tmp_obmc_bufs[0] + MAX_SB_SQUARE;
1215
1.07k
    dst_buf1[2] = xd->tmp_obmc_bufs[0] + MAX_SB_SQUARE * 2;
1216
1.07k
    dst_buf2[0] = xd->tmp_obmc_bufs[1];
1217
1.07k
    dst_buf2[1] = xd->tmp_obmc_bufs[1] + MAX_SB_SQUARE;
1218
1.07k
    dst_buf2[2] = xd->tmp_obmc_bufs[1] + MAX_SB_SQUARE * 2;
1219
1.07k
  }
1220
4.30k
}
1221
1222
void av1_setup_build_prediction_by_above_pred(
1223
    MACROBLOCKD *xd, int rel_mi_col, uint8_t above_mi_width,
1224
    MB_MODE_INFO *above_mbmi, struct build_prediction_ctxt *ctxt,
1225
3.19k
    const int num_planes) {
1226
3.19k
  const BLOCK_SIZE a_bsize = AOMMAX(BLOCK_8X8, above_mbmi->bsize);
1227
3.19k
  const int above_mi_col = xd->mi_col + rel_mi_col;
1228
1229
3.19k
  av1_modify_neighbor_predictor_for_obmc(above_mbmi);
1230
1231
12.7k
  for (int j = 0; j < num_planes; ++j) {
1232
9.57k
    struct macroblockd_plane *const pd = &xd->plane[j];
1233
9.57k
    setup_pred_plane(&pd->dst, a_bsize, ctxt->tmp_buf[j], ctxt->tmp_width[j],
1234
9.57k
                     ctxt->tmp_height[j], ctxt->tmp_stride[j], 0, rel_mi_col,
1235
9.57k
                     NULL, pd->subsampling_x, pd->subsampling_y);
1236
9.57k
  }
1237
1238
3.19k
  const int num_refs = 1 + has_second_ref(above_mbmi);
1239
1240
6.38k
  for (int ref = 0; ref < num_refs; ++ref) {
1241
3.19k
    const MV_REFERENCE_FRAME frame = above_mbmi->ref_frame[ref];
1242
1243
3.19k
    const RefCntBuffer *const ref_buf = get_ref_frame_buf(ctxt->cm, frame);
1244
3.19k
    const struct scale_factors *const sf =
1245
3.19k
        get_ref_scale_factors_const(ctxt->cm, frame);
1246
3.19k
    xd->block_ref_scale_factors[ref] = sf;
1247
3.19k
    if ((!av1_is_valid_scale(sf)))
1248
0
      aom_internal_error(xd->error_info, AOM_CODEC_UNSUP_BITSTREAM,
1249
0
                         "Reference frame has invalid dimensions");
1250
3.19k
    av1_setup_pre_planes(xd, ref, &ref_buf->buf, xd->mi_row, above_mi_col, sf,
1251
3.19k
                         num_planes);
1252
3.19k
  }
1253
1254
3.19k
  xd->mb_to_left_edge = 8 * MI_SIZE * (-above_mi_col);
1255
3.19k
  xd->mb_to_right_edge =
1256
3.19k
      ctxt->mb_to_far_edge +
1257
3.19k
      (xd->width - rel_mi_col - above_mi_width) * MI_SIZE * 8;
1258
3.19k
}
1259
1260
void av1_setup_build_prediction_by_left_pred(MACROBLOCKD *xd, int rel_mi_row,
1261
                                             uint8_t left_mi_height,
1262
                                             MB_MODE_INFO *left_mbmi,
1263
                                             struct build_prediction_ctxt *ctxt,
1264
2.57k
                                             const int num_planes) {
1265
2.57k
  const BLOCK_SIZE l_bsize = AOMMAX(BLOCK_8X8, left_mbmi->bsize);
1266
2.57k
  const int left_mi_row = xd->mi_row + rel_mi_row;
1267
1268
2.57k
  av1_modify_neighbor_predictor_for_obmc(left_mbmi);
1269
1270
10.3k
  for (int j = 0; j < num_planes; ++j) {
1271
7.73k
    struct macroblockd_plane *const pd = &xd->plane[j];
1272
7.73k
    setup_pred_plane(&pd->dst, l_bsize, ctxt->tmp_buf[j], ctxt->tmp_width[j],
1273
7.73k
                     ctxt->tmp_height[j], ctxt->tmp_stride[j], rel_mi_row, 0,
1274
7.73k
                     NULL, pd->subsampling_x, pd->subsampling_y);
1275
7.73k
  }
1276
1277
2.57k
  const int num_refs = 1 + has_second_ref(left_mbmi);
1278
1279
5.15k
  for (int ref = 0; ref < num_refs; ++ref) {
1280
2.57k
    const MV_REFERENCE_FRAME frame = left_mbmi->ref_frame[ref];
1281
1282
2.57k
    const RefCntBuffer *const ref_buf = get_ref_frame_buf(ctxt->cm, frame);
1283
2.57k
    const struct scale_factors *const ref_scale_factors =
1284
2.57k
        get_ref_scale_factors_const(ctxt->cm, frame);
1285
1286
2.57k
    xd->block_ref_scale_factors[ref] = ref_scale_factors;
1287
2.57k
    if ((!av1_is_valid_scale(ref_scale_factors)))
1288
0
      aom_internal_error(xd->error_info, AOM_CODEC_UNSUP_BITSTREAM,
1289
0
                         "Reference frame has invalid dimensions");
1290
2.57k
    av1_setup_pre_planes(xd, ref, &ref_buf->buf, left_mi_row, xd->mi_col,
1291
2.57k
                         ref_scale_factors, num_planes);
1292
2.57k
  }
1293
1294
2.57k
  xd->mb_to_top_edge = GET_MV_SUBPEL(MI_SIZE * (-left_mi_row));
1295
2.57k
  xd->mb_to_bottom_edge =
1296
2.57k
      ctxt->mb_to_far_edge +
1297
2.57k
      GET_MV_SUBPEL((xd->height - rel_mi_row - left_mi_height) * MI_SIZE);
1298
2.57k
}
1299
1300
static AOM_INLINE void combine_interintra(
1301
    INTERINTRA_MODE mode, int8_t use_wedge_interintra, int8_t wedge_index,
1302
    int8_t wedge_sign, BLOCK_SIZE bsize, BLOCK_SIZE plane_bsize,
1303
    uint8_t *comppred, int compstride, const uint8_t *interpred,
1304
1.81k
    int interstride, const uint8_t *intrapred, int intrastride) {
1305
1.81k
  const int bw = block_size_wide[plane_bsize];
1306
1.81k
  const int bh = block_size_high[plane_bsize];
1307
1308
1.81k
  if (use_wedge_interintra) {
1309
630
    if (av1_is_wedge_used(bsize)) {
1310
630
      const uint8_t *mask =
1311
630
          av1_get_contiguous_soft_mask(wedge_index, wedge_sign, bsize);
1312
630
      const int subw = 2 * mi_size_wide[bsize] == bw;
1313
630
      const int subh = 2 * mi_size_high[bsize] == bh;
1314
630
      aom_blend_a64_mask(comppred, compstride, intrapred, intrastride,
1315
630
                         interpred, interstride, mask, block_size_wide[bsize],
1316
630
                         bw, bh, subw, subh);
1317
630
    }
1318
630
    return;
1319
630
  }
1320
1321
1.18k
  const uint8_t *mask = smooth_interintra_mask_buf[mode][plane_bsize];
1322
1.18k
  aom_blend_a64_mask(comppred, compstride, intrapred, intrastride, interpred,
1323
1.18k
                     interstride, mask, bw, bw, bh, 0, 0);
1324
1.18k
}
1325
1326
#if CONFIG_AV1_HIGHBITDEPTH
1327
static AOM_INLINE void combine_interintra_highbd(
1328
    INTERINTRA_MODE mode, int8_t use_wedge_interintra, int8_t wedge_index,
1329
    int8_t wedge_sign, BLOCK_SIZE bsize, BLOCK_SIZE plane_bsize,
1330
    uint8_t *comppred8, int compstride, const uint8_t *interpred8,
1331
4.15k
    int interstride, const uint8_t *intrapred8, int intrastride, int bd) {
1332
4.15k
  const int bw = block_size_wide[plane_bsize];
1333
4.15k
  const int bh = block_size_high[plane_bsize];
1334
1335
4.15k
  if (use_wedge_interintra) {
1336
1.88k
    if (av1_is_wedge_used(bsize)) {
1337
1.88k
      const uint8_t *mask =
1338
1.88k
          av1_get_contiguous_soft_mask(wedge_index, wedge_sign, bsize);
1339
1.88k
      const int subh = 2 * mi_size_high[bsize] == bh;
1340
1.88k
      const int subw = 2 * mi_size_wide[bsize] == bw;
1341
1.88k
      aom_highbd_blend_a64_mask(comppred8, compstride, intrapred8, intrastride,
1342
1.88k
                                interpred8, interstride, mask,
1343
1.88k
                                block_size_wide[bsize], bw, bh, subw, subh, bd);
1344
1.88k
    }
1345
1.88k
    return;
1346
1.88k
  }
1347
1348
2.27k
  uint8_t mask[MAX_SB_SQUARE];
1349
2.27k
  build_smooth_interintra_mask(mask, bw, plane_bsize, mode);
1350
2.27k
  aom_highbd_blend_a64_mask(comppred8, compstride, intrapred8, intrastride,
1351
2.27k
                            interpred8, interstride, mask, bw, bw, bh, 0, 0,
1352
2.27k
                            bd);
1353
2.27k
}
1354
#endif
1355
1356
void av1_build_intra_predictors_for_interintra(const AV1_COMMON *cm,
1357
                                               MACROBLOCKD *xd,
1358
                                               BLOCK_SIZE bsize, int plane,
1359
                                               const BUFFER_SET *ctx,
1360
5.97k
                                               uint8_t *dst, int dst_stride) {
1361
5.97k
  struct macroblockd_plane *const pd = &xd->plane[plane];
1362
5.97k
  const int ssx = xd->plane[plane].subsampling_x;
1363
5.97k
  const int ssy = xd->plane[plane].subsampling_y;
1364
5.97k
  BLOCK_SIZE plane_bsize = get_plane_block_size(bsize, ssx, ssy);
1365
5.97k
  PREDICTION_MODE mode = interintra_to_intra_mode[xd->mi[0]->interintra_mode];
1366
5.97k
  assert(xd->mi[0]->angle_delta[PLANE_TYPE_Y] == 0);
1367
5.97k
  assert(xd->mi[0]->angle_delta[PLANE_TYPE_UV] == 0);
1368
5.97k
  assert(xd->mi[0]->filter_intra_mode_info.use_filter_intra == 0);
1369
5.97k
  assert(xd->mi[0]->use_intrabc == 0);
1370
5.97k
  const SequenceHeader *seq_params = cm->seq_params;
1371
1372
5.97k
  av1_predict_intra_block(xd, seq_params->sb_size,
1373
5.97k
                          seq_params->enable_intra_edge_filter, pd->width,
1374
5.97k
                          pd->height, max_txsize_rect_lookup[plane_bsize], mode,
1375
5.97k
                          0, 0, FILTER_INTRA_MODES, ctx->plane[plane],
1376
5.97k
                          ctx->stride[plane], dst, dst_stride, 0, 0, plane);
1377
5.97k
}
1378
1379
void av1_combine_interintra(MACROBLOCKD *xd, BLOCK_SIZE bsize, int plane,
1380
                            const uint8_t *inter_pred, int inter_stride,
1381
5.97k
                            const uint8_t *intra_pred, int intra_stride) {
1382
5.97k
  const int ssx = xd->plane[plane].subsampling_x;
1383
5.97k
  const int ssy = xd->plane[plane].subsampling_y;
1384
5.97k
  const BLOCK_SIZE plane_bsize = get_plane_block_size(bsize, ssx, ssy);
1385
5.97k
#if CONFIG_AV1_HIGHBITDEPTH
1386
5.97k
  if (is_cur_buf_hbd(xd)) {
1387
4.15k
    combine_interintra_highbd(
1388
4.15k
        xd->mi[0]->interintra_mode, xd->mi[0]->use_wedge_interintra,
1389
4.15k
        xd->mi[0]->interintra_wedge_index, INTERINTRA_WEDGE_SIGN, bsize,
1390
4.15k
        plane_bsize, xd->plane[plane].dst.buf, xd->plane[plane].dst.stride,
1391
4.15k
        inter_pred, inter_stride, intra_pred, intra_stride, xd->bd);
1392
4.15k
    return;
1393
4.15k
  }
1394
1.81k
#endif
1395
1.81k
  combine_interintra(
1396
1.81k
      xd->mi[0]->interintra_mode, xd->mi[0]->use_wedge_interintra,
1397
1.81k
      xd->mi[0]->interintra_wedge_index, INTERINTRA_WEDGE_SIGN, bsize,
1398
1.81k
      plane_bsize, xd->plane[plane].dst.buf, xd->plane[plane].dst.stride,
1399
1.81k
      inter_pred, inter_stride, intra_pred, intra_stride);
1400
1.81k
}
1401
1402
// build interintra_predictors for one plane
1403
void av1_build_interintra_predictor(const AV1_COMMON *cm, MACROBLOCKD *xd,
1404
                                    uint8_t *pred, int stride,
1405
                                    const BUFFER_SET *ctx, int plane,
1406
5.97k
                                    BLOCK_SIZE bsize) {
1407
5.97k
  assert(bsize < BLOCK_SIZES_ALL);
1408
5.97k
  if (is_cur_buf_hbd(xd)) {
1409
4.15k
    DECLARE_ALIGNED(16, uint16_t, intrapredictor[MAX_SB_SQUARE]);
1410
4.15k
    av1_build_intra_predictors_for_interintra(
1411
4.15k
        cm, xd, bsize, plane, ctx, CONVERT_TO_BYTEPTR(intrapredictor),
1412
4.15k
        MAX_SB_SIZE);
1413
4.15k
    av1_combine_interintra(xd, bsize, plane, pred, stride,
1414
4.15k
                           CONVERT_TO_BYTEPTR(intrapredictor), MAX_SB_SIZE);
1415
4.15k
  } else {
1416
1.81k
    DECLARE_ALIGNED(16, uint8_t, intrapredictor[MAX_SB_SQUARE]);
1417
1.81k
    av1_build_intra_predictors_for_interintra(cm, xd, bsize, plane, ctx,
1418
1.81k
                                              intrapredictor, MAX_SB_SIZE);
1419
1.81k
    av1_combine_interintra(xd, bsize, plane, pred, stride, intrapredictor,
1420
1.81k
                           MAX_SB_SIZE);
1421
1.81k
  }
1422
5.97k
}