Coverage Report

Created: 2025-11-16 07:22

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/aom/av1/common/reconinter.c
Line
Count
Source
1
/*
2
 * Copyright (c) 2016, Alliance for Open Media. All rights reserved
3
 *
4
 * This source code is subject to the terms of the BSD 2 Clause License and
5
 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6
 * was not distributed with this source code in the LICENSE file, you can
7
 * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8
 * Media Patent License 1.0 was not distributed with this source code in the
9
 * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
10
 */
11
12
#include <assert.h>
13
#include <stdio.h>
14
#include <limits.h>
15
16
#include "config/aom_config.h"
17
#include "config/aom_dsp_rtcd.h"
18
#include "config/aom_scale_rtcd.h"
19
20
#include "aom/aom_integer.h"
21
#include "aom_dsp/blend.h"
22
23
#include "av1/common/av1_common_int.h"
24
#include "av1/common/blockd.h"
25
#include "av1/common/mvref_common.h"
26
#include "av1/common/obmc.h"
27
#include "av1/common/reconinter.h"
28
#include "av1/common/reconintra.h"
29
30
// This function will determine whether or not to create a warped
31
// prediction.
32
int av1_allow_warp(const MB_MODE_INFO *const mbmi,
33
                   const WarpTypesAllowed *const warp_types,
34
                   const WarpedMotionParams *const gm_params,
35
                   int build_for_obmc, const struct scale_factors *const sf,
36
137k
                   WarpedMotionParams *final_warp_params) {
37
  // Note: As per the spec, we must test the fixed point scales here, which are
38
  // at a higher precision (1 << 14) than the xs and ys in subpel_params (that
39
  // have 1 << 10 precision).
40
137k
  if (av1_is_scaled(sf)) return 0;
41
42
131k
  if (final_warp_params != NULL) *final_warp_params = default_warp_params;
43
44
131k
  if (build_for_obmc) return 0;
45
46
131k
  if (warp_types->local_warp_allowed && !mbmi->wm_params.invalid) {
47
1.04k
    if (final_warp_params != NULL)
48
1.04k
      memcpy(final_warp_params, &mbmi->wm_params, sizeof(*final_warp_params));
49
1.04k
    return 1;
50
130k
  } else if (warp_types->global_warp_allowed && !gm_params->invalid) {
51
10.8k
    if (final_warp_params != NULL)
52
10.8k
      memcpy(final_warp_params, gm_params, sizeof(*final_warp_params));
53
10.8k
    return 1;
54
10.8k
  }
55
56
119k
  return 0;
57
131k
}
58
59
void av1_init_inter_params(InterPredParams *inter_pred_params, int block_width,
60
                           int block_height, int pix_row, int pix_col,
61
                           int subsampling_x, int subsampling_y, int bit_depth,
62
                           int use_hbd_buf, int is_intrabc,
63
                           const struct scale_factors *sf,
64
                           const struct buf_2d *ref_buf,
65
406k
                           int_interpfilters interp_filters) {
66
406k
  inter_pred_params->block_width = block_width;
67
406k
  inter_pred_params->block_height = block_height;
68
406k
  inter_pred_params->pix_row = pix_row;
69
406k
  inter_pred_params->pix_col = pix_col;
70
406k
  inter_pred_params->subsampling_x = subsampling_x;
71
406k
  inter_pred_params->subsampling_y = subsampling_y;
72
406k
  inter_pred_params->bit_depth = bit_depth;
73
406k
  inter_pred_params->use_hbd_buf = use_hbd_buf;
74
406k
  inter_pred_params->is_intrabc = is_intrabc;
75
406k
  inter_pred_params->scale_factors = sf;
76
406k
  inter_pred_params->ref_frame_buf = *ref_buf;
77
406k
  inter_pred_params->mode = TRANSLATION_PRED;
78
406k
  inter_pred_params->comp_mode = UNIFORM_SINGLE;
79
80
406k
  if (is_intrabc) {
81
255k
    inter_pred_params->interp_filter_params[0] = &av1_intrabc_filter_params;
82
255k
    inter_pred_params->interp_filter_params[1] = &av1_intrabc_filter_params;
83
255k
  } else {
84
150k
    inter_pred_params->interp_filter_params[0] =
85
150k
        av1_get_interp_filter_params_with_block_size(
86
150k
            interp_filters.as_filters.x_filter, block_width);
87
150k
    inter_pred_params->interp_filter_params[1] =
88
150k
        av1_get_interp_filter_params_with_block_size(
89
150k
            interp_filters.as_filters.y_filter, block_height);
90
150k
  }
91
406k
}
92
93
61.8k
void av1_init_comp_mode(InterPredParams *inter_pred_params) {
94
61.8k
  inter_pred_params->comp_mode = UNIFORM_COMP;
95
61.8k
}
96
97
void av1_init_warp_params(InterPredParams *inter_pred_params,
98
                          const WarpTypesAllowed *warp_types, int ref,
99
386k
                          const MACROBLOCKD *xd, const MB_MODE_INFO *mi) {
100
386k
  if (inter_pred_params->block_height < 8 || inter_pred_params->block_width < 8)
101
200k
    return;
102
103
185k
  if (xd->cur_frame_force_integer_mv) return;
104
105
137k
  if (av1_allow_warp(mi, warp_types, &xd->global_motion[mi->ref_frame[ref]], 0,
106
137k
                     inter_pred_params->scale_factors,
107
137k
                     &inter_pred_params->warp_params)) {
108
#if CONFIG_REALTIME_ONLY
109
    aom_internal_error(xd->error_info, AOM_CODEC_UNSUP_FEATURE,
110
                       "Warped motion is disabled in realtime only build.");
111
#endif
112
11.8k
    inter_pred_params->mode = WARP_PRED;
113
11.8k
  }
114
137k
}
115
116
void av1_make_inter_predictor(const uint8_t *src, int src_stride, uint8_t *dst,
117
                              int dst_stride,
118
                              InterPredParams *inter_pred_params,
119
406k
                              const SubpelParams *subpel_params) {
120
406k
  assert(IMPLIES(inter_pred_params->conv_params.is_compound,
121
406k
                 inter_pred_params->conv_params.dst != NULL));
122
123
406k
  if (inter_pred_params->mode == TRANSLATION_PRED) {
124
394k
#if CONFIG_AV1_HIGHBITDEPTH
125
394k
    if (inter_pred_params->use_hbd_buf) {
126
283k
      highbd_inter_predictor(src, src_stride, dst, dst_stride, subpel_params,
127
283k
                             inter_pred_params->block_width,
128
283k
                             inter_pred_params->block_height,
129
283k
                             &inter_pred_params->conv_params,
130
283k
                             inter_pred_params->interp_filter_params,
131
283k
                             inter_pred_params->bit_depth);
132
283k
    } else {
133
110k
      inter_predictor(src, src_stride, dst, dst_stride, subpel_params,
134
110k
                      inter_pred_params->block_width,
135
110k
                      inter_pred_params->block_height,
136
110k
                      &inter_pred_params->conv_params,
137
110k
                      inter_pred_params->interp_filter_params);
138
110k
    }
139
#else
140
    inter_predictor(src, src_stride, dst, dst_stride, subpel_params,
141
                    inter_pred_params->block_width,
142
                    inter_pred_params->block_height,
143
                    &inter_pred_params->conv_params,
144
                    inter_pred_params->interp_filter_params);
145
#endif
146
394k
  }
147
11.8k
#if !CONFIG_REALTIME_ONLY
148
  // TODO(jingning): av1_warp_plane() can be further cleaned up.
149
11.8k
  else if (inter_pred_params->mode == WARP_PRED) {
150
11.8k
    av1_warp_plane(
151
11.8k
        &inter_pred_params->warp_params, inter_pred_params->use_hbd_buf,
152
11.8k
        inter_pred_params->bit_depth, inter_pred_params->ref_frame_buf.buf0,
153
11.8k
        inter_pred_params->ref_frame_buf.width,
154
11.8k
        inter_pred_params->ref_frame_buf.height,
155
11.8k
        inter_pred_params->ref_frame_buf.stride, dst,
156
11.8k
        inter_pred_params->pix_col, inter_pred_params->pix_row,
157
11.8k
        inter_pred_params->block_width, inter_pred_params->block_height,
158
11.8k
        dst_stride, inter_pred_params->subsampling_x,
159
11.8k
        inter_pred_params->subsampling_y, &inter_pred_params->conv_params);
160
11.8k
  }
161
0
#endif
162
0
  else {
163
0
    assert(0 && "Unsupported inter_pred_params->mode");
164
0
  }
165
406k
}
166
167
static const uint8_t wedge_master_oblique_odd[MASK_MASTER_SIZE] = {
168
  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
169
  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  1,  2,  6,  18,
170
  37, 53, 60, 63, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
171
  64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
172
};
173
static const uint8_t wedge_master_oblique_even[MASK_MASTER_SIZE] = {
174
  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
175
  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  1,  4,  11, 27,
176
  46, 58, 62, 63, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
177
  64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
178
};
179
static const uint8_t wedge_master_vertical[MASK_MASTER_SIZE] = {
180
  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
181
  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  2,  7,  21,
182
  43, 57, 62, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
183
  64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
184
};
185
186
static AOM_INLINE void shift_copy(const uint8_t *src, uint8_t *dst, int shift,
187
320
                                  int width) {
188
320
  if (shift >= 0) {
189
165
    memcpy(dst + shift, src, width - shift);
190
165
    memset(dst, src[0], shift);
191
165
  } else {
192
155
    shift = -shift;
193
155
    memcpy(dst, src + shift, width - shift);
194
155
    memset(dst + width - shift, src[width - 1], shift);
195
155
  }
196
320
}
197
198
/* clang-format off */
199
DECLARE_ALIGNED(16, static uint8_t,
200
                wedge_signflip_lookup[BLOCK_SIZES_ALL][MAX_WEDGE_TYPES]) = {
201
  { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, },  // not used
202
  { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, },  // not used
203
  { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, },  // not used
204
  { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, },
205
  { 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, },
206
  { 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, },
207
  { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, },
208
  { 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, },
209
  { 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, },
210
  { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, },
211
  { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, },  // not used
212
  { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, },  // not used
213
  { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, },  // not used
214
  { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, },  // not used
215
  { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, },  // not used
216
  { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, },  // not used
217
  { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, },  // not used
218
  { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, },  // not used
219
  { 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 0, 1, 1, 1, 0, 1, },
220
  { 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 0, 1, 0, 1, },
221
  { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, },  // not used
222
  { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, },  // not used
223
};
224
/* clang-format on */
225
226
// [negative][direction]
227
DECLARE_ALIGNED(
228
    16, static uint8_t,
229
    wedge_mask_obl[2][WEDGE_DIRECTIONS][MASK_MASTER_SIZE * MASK_MASTER_SIZE]);
230
231
// 4 * MAX_WEDGE_SQUARE is an easy to compute and fairly tight upper bound
232
// on the sum of all mask sizes up to an including MAX_WEDGE_SQUARE.
233
DECLARE_ALIGNED(16, static uint8_t,
234
                wedge_mask_buf[2 * MAX_WEDGE_TYPES * 4 * MAX_WEDGE_SQUARE]);
235
236
DECLARE_ALIGNED(16, static uint8_t,
237
                smooth_interintra_mask_buf[INTERINTRA_MODES][BLOCK_SIZES_ALL]
238
                                          [MAX_WEDGE_SQUARE]);
239
240
static wedge_masks_type wedge_masks[BLOCK_SIZES_ALL][2];
241
242
static const wedge_code_type wedge_codebook_16_hgtw[16] = {
243
  { WEDGE_OBLIQUE27, 4, 4 },  { WEDGE_OBLIQUE63, 4, 4 },
244
  { WEDGE_OBLIQUE117, 4, 4 }, { WEDGE_OBLIQUE153, 4, 4 },
245
  { WEDGE_HORIZONTAL, 4, 2 }, { WEDGE_HORIZONTAL, 4, 4 },
246
  { WEDGE_HORIZONTAL, 4, 6 }, { WEDGE_VERTICAL, 4, 4 },
247
  { WEDGE_OBLIQUE27, 4, 2 },  { WEDGE_OBLIQUE27, 4, 6 },
248
  { WEDGE_OBLIQUE153, 4, 2 }, { WEDGE_OBLIQUE153, 4, 6 },
249
  { WEDGE_OBLIQUE63, 2, 4 },  { WEDGE_OBLIQUE63, 6, 4 },
250
  { WEDGE_OBLIQUE117, 2, 4 }, { WEDGE_OBLIQUE117, 6, 4 },
251
};
252
253
static const wedge_code_type wedge_codebook_16_hltw[16] = {
254
  { WEDGE_OBLIQUE27, 4, 4 },  { WEDGE_OBLIQUE63, 4, 4 },
255
  { WEDGE_OBLIQUE117, 4, 4 }, { WEDGE_OBLIQUE153, 4, 4 },
256
  { WEDGE_VERTICAL, 2, 4 },   { WEDGE_VERTICAL, 4, 4 },
257
  { WEDGE_VERTICAL, 6, 4 },   { WEDGE_HORIZONTAL, 4, 4 },
258
  { WEDGE_OBLIQUE27, 4, 2 },  { WEDGE_OBLIQUE27, 4, 6 },
259
  { WEDGE_OBLIQUE153, 4, 2 }, { WEDGE_OBLIQUE153, 4, 6 },
260
  { WEDGE_OBLIQUE63, 2, 4 },  { WEDGE_OBLIQUE63, 6, 4 },
261
  { WEDGE_OBLIQUE117, 2, 4 }, { WEDGE_OBLIQUE117, 6, 4 },
262
};
263
264
static const wedge_code_type wedge_codebook_16_heqw[16] = {
265
  { WEDGE_OBLIQUE27, 4, 4 },  { WEDGE_OBLIQUE63, 4, 4 },
266
  { WEDGE_OBLIQUE117, 4, 4 }, { WEDGE_OBLIQUE153, 4, 4 },
267
  { WEDGE_HORIZONTAL, 4, 2 }, { WEDGE_HORIZONTAL, 4, 6 },
268
  { WEDGE_VERTICAL, 2, 4 },   { WEDGE_VERTICAL, 6, 4 },
269
  { WEDGE_OBLIQUE27, 4, 2 },  { WEDGE_OBLIQUE27, 4, 6 },
270
  { WEDGE_OBLIQUE153, 4, 2 }, { WEDGE_OBLIQUE153, 4, 6 },
271
  { WEDGE_OBLIQUE63, 2, 4 },  { WEDGE_OBLIQUE63, 6, 4 },
272
  { WEDGE_OBLIQUE117, 2, 4 }, { WEDGE_OBLIQUE117, 6, 4 },
273
};
274
275
const wedge_params_type av1_wedge_params_lookup[BLOCK_SIZES_ALL] = {
276
  { 0, NULL, NULL, NULL },
277
  { 0, NULL, NULL, NULL },
278
  { 0, NULL, NULL, NULL },
279
  { MAX_WEDGE_TYPES, wedge_codebook_16_heqw, wedge_signflip_lookup[BLOCK_8X8],
280
    wedge_masks[BLOCK_8X8] },
281
  { MAX_WEDGE_TYPES, wedge_codebook_16_hgtw, wedge_signflip_lookup[BLOCK_8X16],
282
    wedge_masks[BLOCK_8X16] },
283
  { MAX_WEDGE_TYPES, wedge_codebook_16_hltw, wedge_signflip_lookup[BLOCK_16X8],
284
    wedge_masks[BLOCK_16X8] },
285
  { MAX_WEDGE_TYPES, wedge_codebook_16_heqw, wedge_signflip_lookup[BLOCK_16X16],
286
    wedge_masks[BLOCK_16X16] },
287
  { MAX_WEDGE_TYPES, wedge_codebook_16_hgtw, wedge_signflip_lookup[BLOCK_16X32],
288
    wedge_masks[BLOCK_16X32] },
289
  { MAX_WEDGE_TYPES, wedge_codebook_16_hltw, wedge_signflip_lookup[BLOCK_32X16],
290
    wedge_masks[BLOCK_32X16] },
291
  { MAX_WEDGE_TYPES, wedge_codebook_16_heqw, wedge_signflip_lookup[BLOCK_32X32],
292
    wedge_masks[BLOCK_32X32] },
293
  { 0, NULL, NULL, NULL },
294
  { 0, NULL, NULL, NULL },
295
  { 0, NULL, NULL, NULL },
296
  { 0, NULL, NULL, NULL },
297
  { 0, NULL, NULL, NULL },
298
  { 0, NULL, NULL, NULL },
299
  { 0, NULL, NULL, NULL },
300
  { 0, NULL, NULL, NULL },
301
  { MAX_WEDGE_TYPES, wedge_codebook_16_hgtw, wedge_signflip_lookup[BLOCK_8X32],
302
    wedge_masks[BLOCK_8X32] },
303
  { MAX_WEDGE_TYPES, wedge_codebook_16_hltw, wedge_signflip_lookup[BLOCK_32X8],
304
    wedge_masks[BLOCK_32X8] },
305
  { 0, NULL, NULL, NULL },
306
  { 0, NULL, NULL, NULL },
307
};
308
309
static const uint8_t *get_wedge_mask_inplace(int wedge_index, int neg,
310
1.44k
                                             BLOCK_SIZE sb_type) {
311
1.44k
  const uint8_t *master;
312
1.44k
  const int bh = block_size_high[sb_type];
313
1.44k
  const int bw = block_size_wide[sb_type];
314
1.44k
  const wedge_code_type *a =
315
1.44k
      av1_wedge_params_lookup[sb_type].codebook + wedge_index;
316
1.44k
  int woff, hoff;
317
1.44k
  const uint8_t wsignflip =
318
1.44k
      av1_wedge_params_lookup[sb_type].signflip[wedge_index];
319
320
1.44k
  assert(wedge_index >= 0 && wedge_index < get_wedge_types_lookup(sb_type));
321
1.44k
  woff = (a->x_offset * bw) >> 3;
322
1.44k
  hoff = (a->y_offset * bh) >> 3;
323
1.44k
  master = wedge_mask_obl[neg ^ wsignflip][a->direction] +
324
1.44k
           MASK_MASTER_STRIDE * (MASK_MASTER_SIZE / 2 - hoff) +
325
1.44k
           MASK_MASTER_SIZE / 2 - woff;
326
1.44k
  return master;
327
1.44k
}
328
329
const uint8_t *av1_get_compound_type_mask(
330
6.27k
    const INTERINTER_COMPOUND_DATA *const comp_data, BLOCK_SIZE sb_type) {
331
6.27k
  (void)sb_type;
332
6.27k
  switch (comp_data->type) {
333
4.20k
    case COMPOUND_WEDGE:
334
4.20k
      return av1_get_contiguous_soft_mask(comp_data->wedge_index,
335
4.20k
                                          comp_data->wedge_sign, sb_type);
336
2.06k
    default: return comp_data->seg_mask;
337
6.27k
  }
338
6.27k
}
339
340
static AOM_INLINE void diffwtd_mask_d16(
341
    uint8_t *mask, int which_inverse, int mask_base, const CONV_BUF_TYPE *src0,
342
    int src0_stride, const CONV_BUF_TYPE *src1, int src1_stride, int h, int w,
343
701
    ConvolveParams *conv_params, int bd) {
344
701
  int round =
345
701
      2 * FILTER_BITS - conv_params->round_0 - conv_params->round_1 + (bd - 8);
346
701
  int i, j, m, diff;
347
7.31k
  for (i = 0; i < h; ++i) {
348
106k
    for (j = 0; j < w; ++j) {
349
99.5k
      diff = abs(src0[i * src0_stride + j] - src1[i * src1_stride + j]);
350
99.5k
      diff = ROUND_POWER_OF_TWO(diff, round);
351
99.5k
      m = clamp(mask_base + (diff / DIFF_FACTOR), 0, AOM_BLEND_A64_MAX_ALPHA);
352
99.5k
      mask[i * w + j] = which_inverse ? AOM_BLEND_A64_MAX_ALPHA - m : m;
353
99.5k
    }
354
6.61k
  }
355
701
}
356
357
void av1_build_compound_diffwtd_mask_d16_c(
358
    uint8_t *mask, DIFFWTD_MASK_TYPE mask_type, const CONV_BUF_TYPE *src0,
359
    int src0_stride, const CONV_BUF_TYPE *src1, int src1_stride, int h, int w,
360
701
    ConvolveParams *conv_params, int bd) {
361
701
  switch (mask_type) {
362
431
    case DIFFWTD_38:
363
431
      diffwtd_mask_d16(mask, 0, 38, src0, src0_stride, src1, src1_stride, h, w,
364
431
                       conv_params, bd);
365
431
      break;
366
270
    case DIFFWTD_38_INV:
367
270
      diffwtd_mask_d16(mask, 1, 38, src0, src0_stride, src1, src1_stride, h, w,
368
270
                       conv_params, bd);
369
270
      break;
370
0
    default: assert(0);
371
701
  }
372
701
}
373
374
static AOM_INLINE void diffwtd_mask(uint8_t *mask, int which_inverse,
375
                                    int mask_base, const uint8_t *src0,
376
                                    int src0_stride, const uint8_t *src1,
377
0
                                    int src1_stride, int h, int w) {
378
0
  int i, j, m, diff;
379
0
  for (i = 0; i < h; ++i) {
380
0
    for (j = 0; j < w; ++j) {
381
0
      diff =
382
0
          abs((int)src0[i * src0_stride + j] - (int)src1[i * src1_stride + j]);
383
0
      m = clamp(mask_base + (diff / DIFF_FACTOR), 0, AOM_BLEND_A64_MAX_ALPHA);
384
0
      mask[i * w + j] = which_inverse ? AOM_BLEND_A64_MAX_ALPHA - m : m;
385
0
    }
386
0
  }
387
0
}
388
389
void av1_build_compound_diffwtd_mask_c(uint8_t *mask,
390
                                       DIFFWTD_MASK_TYPE mask_type,
391
                                       const uint8_t *src0, int src0_stride,
392
                                       const uint8_t *src1, int src1_stride,
393
0
                                       int h, int w) {
394
0
  switch (mask_type) {
395
0
    case DIFFWTD_38:
396
0
      diffwtd_mask(mask, 0, 38, src0, src0_stride, src1, src1_stride, h, w);
397
0
      break;
398
0
    case DIFFWTD_38_INV:
399
0
      diffwtd_mask(mask, 1, 38, src0, src0_stride, src1, src1_stride, h, w);
400
0
      break;
401
0
    default: assert(0);
402
0
  }
403
0
}
404
405
static AOM_FORCE_INLINE void diffwtd_mask_highbd(
406
    uint8_t *mask, int which_inverse, int mask_base, const uint16_t *src0,
407
    int src0_stride, const uint16_t *src1, int src1_stride, int h, int w,
408
0
    const unsigned int bd) {
409
0
  assert(bd >= 8);
410
0
  if (bd == 8) {
411
0
    if (which_inverse) {
412
0
      for (int i = 0; i < h; ++i) {
413
0
        for (int j = 0; j < w; ++j) {
414
0
          int diff = abs((int)src0[j] - (int)src1[j]) / DIFF_FACTOR;
415
0
          unsigned int m = negative_to_zero(mask_base + diff);
416
0
          m = AOMMIN(m, AOM_BLEND_A64_MAX_ALPHA);
417
0
          mask[j] = AOM_BLEND_A64_MAX_ALPHA - m;
418
0
        }
419
0
        src0 += src0_stride;
420
0
        src1 += src1_stride;
421
0
        mask += w;
422
0
      }
423
0
    } else {
424
0
      for (int i = 0; i < h; ++i) {
425
0
        for (int j = 0; j < w; ++j) {
426
0
          int diff = abs((int)src0[j] - (int)src1[j]) / DIFF_FACTOR;
427
0
          unsigned int m = negative_to_zero(mask_base + diff);
428
0
          m = AOMMIN(m, AOM_BLEND_A64_MAX_ALPHA);
429
0
          mask[j] = m;
430
0
        }
431
0
        src0 += src0_stride;
432
0
        src1 += src1_stride;
433
0
        mask += w;
434
0
      }
435
0
    }
436
0
  } else {
437
0
    const unsigned int bd_shift = bd - 8;
438
0
    if (which_inverse) {
439
0
      for (int i = 0; i < h; ++i) {
440
0
        for (int j = 0; j < w; ++j) {
441
0
          int diff =
442
0
              (abs((int)src0[j] - (int)src1[j]) >> bd_shift) / DIFF_FACTOR;
443
0
          unsigned int m = negative_to_zero(mask_base + diff);
444
0
          m = AOMMIN(m, AOM_BLEND_A64_MAX_ALPHA);
445
0
          mask[j] = AOM_BLEND_A64_MAX_ALPHA - m;
446
0
        }
447
0
        src0 += src0_stride;
448
0
        src1 += src1_stride;
449
0
        mask += w;
450
0
      }
451
0
    } else {
452
0
      for (int i = 0; i < h; ++i) {
453
0
        for (int j = 0; j < w; ++j) {
454
0
          int diff =
455
0
              (abs((int)src0[j] - (int)src1[j]) >> bd_shift) / DIFF_FACTOR;
456
0
          unsigned int m = negative_to_zero(mask_base + diff);
457
0
          m = AOMMIN(m, AOM_BLEND_A64_MAX_ALPHA);
458
0
          mask[j] = m;
459
0
        }
460
0
        src0 += src0_stride;
461
0
        src1 += src1_stride;
462
0
        mask += w;
463
0
      }
464
0
    }
465
0
  }
466
0
}
467
468
void av1_build_compound_diffwtd_mask_highbd_c(
469
    uint8_t *mask, DIFFWTD_MASK_TYPE mask_type, const uint8_t *src0,
470
    int src0_stride, const uint8_t *src1, int src1_stride, int h, int w,
471
0
    int bd) {
472
0
  switch (mask_type) {
473
0
    case DIFFWTD_38:
474
0
      diffwtd_mask_highbd(mask, 0, 38, CONVERT_TO_SHORTPTR(src0), src0_stride,
475
0
                          CONVERT_TO_SHORTPTR(src1), src1_stride, h, w, bd);
476
0
      break;
477
0
    case DIFFWTD_38_INV:
478
0
      diffwtd_mask_highbd(mask, 1, 38, CONVERT_TO_SHORTPTR(src0), src0_stride,
479
0
                          CONVERT_TO_SHORTPTR(src1), src1_stride, h, w, bd);
480
0
      break;
481
0
    default: assert(0);
482
0
  }
483
0
}
484
485
5
static AOM_INLINE void init_wedge_master_masks() {
486
5
  int i, j;
487
5
  const int w = MASK_MASTER_SIZE;
488
5
  const int h = MASK_MASTER_SIZE;
489
5
  const int stride = MASK_MASTER_STRIDE;
490
  // Note: index [0] stores the masters, and [1] its complement.
491
  // Generate prototype by shifting the masters
492
5
  int shift = h / 4;
493
165
  for (i = 0; i < h; i += 2) {
494
160
    shift_copy(wedge_master_oblique_even,
495
160
               &wedge_mask_obl[0][WEDGE_OBLIQUE63][i * stride], shift,
496
160
               MASK_MASTER_SIZE);
497
160
    shift--;
498
160
    shift_copy(wedge_master_oblique_odd,
499
160
               &wedge_mask_obl[0][WEDGE_OBLIQUE63][(i + 1) * stride], shift,
500
160
               MASK_MASTER_SIZE);
501
160
    memcpy(&wedge_mask_obl[0][WEDGE_VERTICAL][i * stride],
502
160
           wedge_master_vertical,
503
160
           MASK_MASTER_SIZE * sizeof(wedge_master_vertical[0]));
504
160
    memcpy(&wedge_mask_obl[0][WEDGE_VERTICAL][(i + 1) * stride],
505
160
           wedge_master_vertical,
506
160
           MASK_MASTER_SIZE * sizeof(wedge_master_vertical[0]));
507
160
  }
508
509
325
  for (i = 0; i < h; ++i) {
510
20.8k
    for (j = 0; j < w; ++j) {
511
20.4k
      const int msk = wedge_mask_obl[0][WEDGE_OBLIQUE63][i * stride + j];
512
20.4k
      wedge_mask_obl[0][WEDGE_OBLIQUE27][j * stride + i] = msk;
513
20.4k
      wedge_mask_obl[0][WEDGE_OBLIQUE117][i * stride + w - 1 - j] =
514
20.4k
          wedge_mask_obl[0][WEDGE_OBLIQUE153][(w - 1 - j) * stride + i] =
515
20.4k
              (1 << WEDGE_WEIGHT_BITS) - msk;
516
20.4k
      wedge_mask_obl[1][WEDGE_OBLIQUE63][i * stride + j] =
517
20.4k
          wedge_mask_obl[1][WEDGE_OBLIQUE27][j * stride + i] =
518
20.4k
              (1 << WEDGE_WEIGHT_BITS) - msk;
519
20.4k
      wedge_mask_obl[1][WEDGE_OBLIQUE117][i * stride + w - 1 - j] =
520
20.4k
          wedge_mask_obl[1][WEDGE_OBLIQUE153][(w - 1 - j) * stride + i] = msk;
521
20.4k
      const int mskx = wedge_mask_obl[0][WEDGE_VERTICAL][i * stride + j];
522
20.4k
      wedge_mask_obl[0][WEDGE_HORIZONTAL][j * stride + i] = mskx;
523
20.4k
      wedge_mask_obl[1][WEDGE_VERTICAL][i * stride + j] =
524
20.4k
          wedge_mask_obl[1][WEDGE_HORIZONTAL][j * stride + i] =
525
20.4k
              (1 << WEDGE_WEIGHT_BITS) - mskx;
526
20.4k
    }
527
320
  }
528
5
}
529
530
5
static AOM_INLINE void init_wedge_masks() {
531
5
  uint8_t *dst = wedge_mask_buf;
532
5
  BLOCK_SIZE bsize;
533
5
  memset(wedge_masks, 0, sizeof(wedge_masks));
534
115
  for (bsize = BLOCK_4X4; bsize < BLOCK_SIZES_ALL; ++bsize) {
535
110
    const wedge_params_type *wedge_params = &av1_wedge_params_lookup[bsize];
536
110
    const int wtypes = wedge_params->wedge_types;
537
110
    if (wtypes == 0) continue;
538
45
    const uint8_t *mask;
539
45
    const int bw = block_size_wide[bsize];
540
45
    const int bh = block_size_high[bsize];
541
45
    int w;
542
765
    for (w = 0; w < wtypes; ++w) {
543
720
      mask = get_wedge_mask_inplace(w, 0, bsize);
544
720
      aom_convolve_copy(mask, MASK_MASTER_STRIDE, dst, bw /* dst_stride */, bw,
545
720
                        bh);
546
720
      wedge_params->masks[0][w] = dst;
547
720
      dst += bw * bh;
548
549
720
      mask = get_wedge_mask_inplace(w, 1, bsize);
550
720
      aom_convolve_copy(mask, MASK_MASTER_STRIDE, dst, bw /* dst_stride */, bw,
551
720
                        bh);
552
720
      wedge_params->masks[1][w] = dst;
553
720
      dst += bw * bh;
554
720
    }
555
45
    assert(sizeof(wedge_mask_buf) >= (size_t)(dst - wedge_mask_buf));
556
45
  }
557
5
}
558
559
/* clang-format off */
560
static const uint8_t ii_weights1d[MAX_SB_SIZE] = {
561
  60, 58, 56, 54, 52, 50, 48, 47, 45, 44, 42, 41, 39, 38, 37, 35, 34, 33, 32,
562
  31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 22, 21, 20, 19, 19, 18, 18, 17, 16,
563
  16, 15, 15, 14, 14, 13, 13, 12, 12, 12, 11, 11, 10, 10, 10,  9,  9,  9,  8,
564
  8,  8,  8,  7,  7,  7,  7,  6,  6,  6,  6,  6,  5,  5,  5,  5,  5,  4,  4,
565
  4,  4,  4,  4,  4,  4,  3,  3,  3,  3,  3,  3,  3,  3,  3,  2,  2,  2,  2,
566
  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  1,  1,  1,  1,  1,  1,  1,  1,
567
  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1
568
};
569
static uint8_t ii_size_scales[BLOCK_SIZES_ALL] = {
570
    32, 16, 16, 16, 8, 8, 8, 4,
571
    4,  4,  2,  2,  2, 1, 1, 1,
572
    8,  8,  4,  4,  2, 2
573
};
574
/* clang-format on */
575
576
static AOM_INLINE void build_smooth_interintra_mask(uint8_t *mask, int stride,
577
                                                    BLOCK_SIZE plane_bsize,
578
2.37k
                                                    INTERINTRA_MODE mode) {
579
2.37k
  int i, j;
580
2.37k
  const int bw = block_size_wide[plane_bsize];
581
2.37k
  const int bh = block_size_high[plane_bsize];
582
2.37k
  const int size_scale = ii_size_scales[plane_bsize];
583
584
2.37k
  switch (mode) {
585
1.17k
    case II_V_PRED:
586
11.0k
      for (i = 0; i < bh; ++i) {
587
9.84k
        memset(mask, ii_weights1d[i * size_scale], bw * sizeof(mask[0]));
588
9.84k
        mask += stride;
589
9.84k
      }
590
1.17k
      break;
591
592
327
    case II_H_PRED:
593
6.57k
      for (i = 0; i < bh; ++i) {
594
98.7k
        for (j = 0; j < bw; ++j) mask[j] = ii_weights1d[j * size_scale];
595
6.24k
        mask += stride;
596
6.24k
      }
597
327
      break;
598
599
763
    case II_SMOOTH_PRED:
600
7.43k
      for (i = 0; i < bh; ++i) {
601
55.4k
        for (j = 0; j < bw; ++j)
602
48.7k
          mask[j] = ii_weights1d[(i < j ? i : j) * size_scale];
603
6.66k
        mask += stride;
604
6.66k
      }
605
763
      break;
606
607
110
    case II_DC_PRED:
608
110
    default:
609
1.58k
      for (i = 0; i < bh; ++i) {
610
1.47k
        memset(mask, 32, bw * sizeof(mask[0]));
611
1.47k
        mask += stride;
612
1.47k
      }
613
110
      break;
614
2.37k
  }
615
2.37k
}
616
617
5
static AOM_INLINE void init_smooth_interintra_masks() {
618
25
  for (int m = 0; m < INTERINTRA_MODES; ++m) {
619
460
    for (int bs = 0; bs < BLOCK_SIZES_ALL; ++bs) {
620
440
      const int bw = block_size_wide[bs];
621
440
      const int bh = block_size_high[bs];
622
440
      if (bw > MAX_WEDGE_SIZE || bh > MAX_WEDGE_SIZE) continue;
623
280
      build_smooth_interintra_mask(smooth_interintra_mask_buf[m][bs], bw, bs,
624
280
                                   m);
625
280
    }
626
20
  }
627
5
}
628
629
// Equation of line: f(x, y) = a[0]*(x - a[2]*w/8) + a[1]*(y - a[3]*h/8) = 0
630
5
void av1_init_wedge_masks() {
631
5
  init_wedge_master_masks();
632
5
  init_wedge_masks();
633
5
  init_smooth_interintra_masks();
634
5
}
635
636
static AOM_INLINE void build_masked_compound_no_round(
637
    uint8_t *dst, int dst_stride, const CONV_BUF_TYPE *src0, int src0_stride,
638
    const CONV_BUF_TYPE *src1, int src1_stride,
639
    const INTERINTER_COMPOUND_DATA *const comp_data, BLOCK_SIZE sb_type, int h,
640
6.27k
    int w, InterPredParams *inter_pred_params) {
641
6.27k
  const int ssy = inter_pred_params->subsampling_y;
642
6.27k
  const int ssx = inter_pred_params->subsampling_x;
643
6.27k
  const uint8_t *mask = av1_get_compound_type_mask(comp_data, sb_type);
644
6.27k
  const int mask_stride = block_size_wide[sb_type];
645
6.27k
#if CONFIG_AV1_HIGHBITDEPTH
646
6.27k
  if (inter_pred_params->use_hbd_buf) {
647
3.60k
    aom_highbd_blend_a64_d16_mask(dst, dst_stride, src0, src0_stride, src1,
648
3.60k
                                  src1_stride, mask, mask_stride, w, h, ssx,
649
3.60k
                                  ssy, &inter_pred_params->conv_params,
650
3.60k
                                  inter_pred_params->bit_depth);
651
3.60k
  } else {
652
2.67k
    aom_lowbd_blend_a64_d16_mask(dst, dst_stride, src0, src0_stride, src1,
653
2.67k
                                 src1_stride, mask, mask_stride, w, h, ssx, ssy,
654
2.67k
                                 &inter_pred_params->conv_params);
655
2.67k
  }
656
#else
657
  aom_lowbd_blend_a64_d16_mask(dst, dst_stride, src0, src0_stride, src1,
658
                               src1_stride, mask, mask_stride, w, h, ssx, ssy,
659
                               &inter_pred_params->conv_params);
660
#endif
661
6.27k
}
662
663
static void make_masked_inter_predictor(const uint8_t *pre, int pre_stride,
664
                                        uint8_t *dst, int dst_stride,
665
                                        InterPredParams *inter_pred_params,
666
6.27k
                                        const SubpelParams *subpel_params) {
667
6.27k
  const INTERINTER_COMPOUND_DATA *comp_data = &inter_pred_params->mask_comp;
668
6.27k
  BLOCK_SIZE sb_type = inter_pred_params->sb_type;
669
670
  // We're going to call av1_make_inter_predictor to generate a prediction into
671
  // a temporary buffer, then will blend that temporary buffer with that from
672
  // the other reference.
673
6.27k
  DECLARE_ALIGNED(32, uint8_t, tmp_buf[2 * MAX_SB_SQUARE]);
674
6.27k
  uint8_t *tmp_dst =
675
6.27k
      inter_pred_params->use_hbd_buf ? CONVERT_TO_BYTEPTR(tmp_buf) : tmp_buf;
676
677
6.27k
  const int tmp_buf_stride = MAX_SB_SIZE;
678
6.27k
  CONV_BUF_TYPE *org_dst = inter_pred_params->conv_params.dst;
679
6.27k
  int org_dst_stride = inter_pred_params->conv_params.dst_stride;
680
6.27k
  CONV_BUF_TYPE *tmp_buf16 = (CONV_BUF_TYPE *)tmp_buf;
681
6.27k
  inter_pred_params->conv_params.dst = tmp_buf16;
682
6.27k
  inter_pred_params->conv_params.dst_stride = tmp_buf_stride;
683
6.27k
  assert(inter_pred_params->conv_params.do_average == 0);
684
685
  // This will generate a prediction in tmp_buf for the second reference
686
6.27k
  av1_make_inter_predictor(pre, pre_stride, tmp_dst, MAX_SB_SIZE,
687
6.27k
                           inter_pred_params, subpel_params);
688
689
6.27k
  if (!inter_pred_params->conv_params.plane &&
690
2.12k
      comp_data->type == COMPOUND_DIFFWTD) {
691
701
    av1_build_compound_diffwtd_mask_d16(
692
701
        comp_data->seg_mask, comp_data->mask_type, org_dst, org_dst_stride,
693
701
        tmp_buf16, tmp_buf_stride, inter_pred_params->block_height,
694
701
        inter_pred_params->block_width, &inter_pred_params->conv_params,
695
701
        inter_pred_params->bit_depth);
696
701
  }
697
6.27k
  build_masked_compound_no_round(
698
6.27k
      dst, dst_stride, org_dst, org_dst_stride, tmp_buf16, tmp_buf_stride,
699
6.27k
      comp_data, sb_type, inter_pred_params->block_height,
700
6.27k
      inter_pred_params->block_width, inter_pred_params);
701
6.27k
}
702
703
void av1_build_one_inter_predictor(
704
    uint8_t *dst, int dst_stride, const MV *const src_mv,
705
    InterPredParams *inter_pred_params, MACROBLOCKD *xd, int mi_x, int mi_y,
706
406k
    int ref, uint8_t **mc_buf, CalcSubpelParamsFunc calc_subpel_params_func) {
707
406k
  SubpelParams subpel_params;
708
406k
  uint8_t *src;
709
406k
  int src_stride;
710
406k
  calc_subpel_params_func(src_mv, inter_pred_params, xd, mi_x, mi_y, ref,
711
406k
                          mc_buf, &src, &subpel_params, &src_stride);
712
713
406k
  if (inter_pred_params->comp_mode == UNIFORM_SINGLE ||
714
399k
      inter_pred_params->comp_mode == UNIFORM_COMP) {
715
399k
    av1_make_inter_predictor(src, src_stride, dst, dst_stride,
716
399k
                             inter_pred_params, &subpel_params);
717
399k
  } else {
718
6.27k
    make_masked_inter_predictor(src, src_stride, dst, dst_stride,
719
6.27k
                                inter_pred_params, &subpel_params);
720
6.27k
  }
721
406k
}
722
723
void av1_dist_wtd_comp_weight_assign(const AV1_COMMON *cm,
724
                                     const MB_MODE_INFO *mbmi, int *fwd_offset,
725
                                     int *bck_offset,
726
                                     int *use_dist_wtd_comp_avg,
727
400k
                                     int is_compound) {
728
400k
  assert(fwd_offset != NULL && bck_offset != NULL);
729
400k
  if (!is_compound || mbmi->compound_idx) {
730
387k
    *fwd_offset = 8;
731
387k
    *bck_offset = 8;
732
387k
    *use_dist_wtd_comp_avg = 0;
733
387k
    return;
734
387k
  }
735
736
13.0k
  *use_dist_wtd_comp_avg = 1;
737
13.0k
  const RefCntBuffer *const bck_buf = get_ref_frame_buf(cm, mbmi->ref_frame[0]);
738
13.0k
  const RefCntBuffer *const fwd_buf = get_ref_frame_buf(cm, mbmi->ref_frame[1]);
739
13.0k
  const int cur_frame_index = cm->cur_frame->order_hint;
740
13.0k
  int bck_frame_index = 0, fwd_frame_index = 0;
741
742
13.0k
  if (bck_buf != NULL) bck_frame_index = bck_buf->order_hint;
743
13.0k
  if (fwd_buf != NULL) fwd_frame_index = fwd_buf->order_hint;
744
745
13.0k
  int d0 = clamp(abs(get_relative_dist(&cm->seq_params->order_hint_info,
746
13.0k
                                       fwd_frame_index, cur_frame_index)),
747
13.0k
                 0, MAX_FRAME_DISTANCE);
748
13.0k
  int d1 = clamp(abs(get_relative_dist(&cm->seq_params->order_hint_info,
749
13.0k
                                       cur_frame_index, bck_frame_index)),
750
13.0k
                 0, MAX_FRAME_DISTANCE);
751
752
13.0k
  const int order = d0 <= d1;
753
754
13.0k
  if (d0 == 0 || d1 == 0) {
755
872
    *fwd_offset = quant_dist_lookup_table[3][order];
756
872
    *bck_offset = quant_dist_lookup_table[3][1 - order];
757
872
    return;
758
872
  }
759
760
12.2k
  int i;
761
18.4k
  for (i = 0; i < 3; ++i) {
762
16.8k
    int c0 = quant_dist_weight[i][order];
763
16.8k
    int c1 = quant_dist_weight[i][!order];
764
16.8k
    int d0_c0 = d0 * c0;
765
16.8k
    int d1_c1 = d1 * c1;
766
16.8k
    if ((d0 > d1 && d0_c0 < d1_c1) || (d0 <= d1 && d0_c0 > d1_c1)) break;
767
16.8k
  }
768
769
12.2k
  *fwd_offset = quant_dist_lookup_table[i][order];
770
12.2k
  *bck_offset = quant_dist_lookup_table[i][1 - order];
771
12.2k
}
772
773
// True if the following hold:
774
//  1. Not intrabc and not build_for_obmc
775
//  2. At least one dimension is size 4 with subsampling
776
//  3. If sub-sampled, none of the previous blocks around the sub-sample
777
//     are intrabc or inter-blocks
778
static bool is_sub8x8_inter(const MACROBLOCKD *xd, int plane, BLOCK_SIZE bsize,
779
372k
                            int is_intrabc, int build_for_obmc) {
780
372k
  if (is_intrabc || build_for_obmc) {
781
269k
    return false;
782
269k
  }
783
784
102k
  const struct macroblockd_plane *const pd = &xd->plane[plane];
785
102k
  const int ss_x = pd->subsampling_x;
786
102k
  const int ss_y = pd->subsampling_y;
787
102k
  const int is_sub4_x = (block_size_wide[bsize] == 4) && ss_x;
788
102k
  const int is_sub4_y = (block_size_high[bsize] == 4) && ss_y;
789
102k
  if (!is_sub4_x && !is_sub4_y) {
790
100k
    return false;
791
100k
  }
792
793
  // For sub8x8 chroma blocks, we may be covering more than one luma block's
794
  // worth of pixels. Thus (mi_x, mi_y) may not be the correct coordinates for
795
  // the top-left corner of the prediction source - the correct top-left corner
796
  // is at (pre_x, pre_y).
797
2.77k
  const int row_start = is_sub4_y ? -1 : 0;
798
2.77k
  const int col_start = is_sub4_x ? -1 : 0;
799
800
5.76k
  for (int row = row_start; row <= 0; ++row) {
801
8.94k
    for (int col = col_start; col <= 0; ++col) {
802
5.94k
      const MB_MODE_INFO *this_mbmi = xd->mi[row * xd->mi_stride + col];
803
5.94k
      if (!is_inter_block(this_mbmi)) return false;
804
5.86k
      if (is_intrabc_block(this_mbmi)) return false;
805
5.86k
    }
806
3.07k
  }
807
2.69k
  return true;
808
2.77k
}
809
810
static void build_inter_predictors_sub8x8(
811
    const AV1_COMMON *cm, MACROBLOCKD *xd, int plane, const MB_MODE_INFO *mi,
812
    int mi_x, int mi_y, uint8_t **mc_buf,
813
2.69k
    CalcSubpelParamsFunc calc_subpel_params_func) {
814
2.69k
  const BLOCK_SIZE bsize = mi->bsize;
815
2.69k
  struct macroblockd_plane *const pd = &xd->plane[plane];
816
2.69k
  const bool ss_x = pd->subsampling_x;
817
2.69k
  const bool ss_y = pd->subsampling_y;
818
2.69k
  const int b4_w = block_size_wide[bsize] >> ss_x;
819
2.69k
  const int b4_h = block_size_high[bsize] >> ss_y;
820
2.69k
  const BLOCK_SIZE plane_bsize = get_plane_block_size(bsize, ss_x, ss_y);
821
2.69k
  const int b8_w = block_size_wide[plane_bsize];
822
2.69k
  const int b8_h = block_size_high[plane_bsize];
823
2.69k
  const int is_compound = has_second_ref(mi);
824
2.69k
  assert(!is_compound);
825
2.69k
  assert(!is_intrabc_block(mi));
826
827
  // For sub8x8 chroma blocks, we may be covering more than one luma block's
828
  // worth of pixels. Thus (mi_x, mi_y) may not be the correct coordinates for
829
  // the top-left corner of the prediction source - the correct top-left corner
830
  // is at (pre_x, pre_y).
831
2.69k
  const int row_start = (block_size_high[bsize] == 4) && ss_y ? -1 : 0;
832
2.69k
  const int col_start = (block_size_wide[bsize] == 4) && ss_x ? -1 : 0;
833
2.69k
  const int pre_x = (mi_x + MI_SIZE * col_start) >> ss_x;
834
2.69k
  const int pre_y = (mi_y + MI_SIZE * row_start) >> ss_y;
835
836
2.69k
  int row = row_start;
837
5.68k
  for (int y = 0; y < b8_h; y += b4_h) {
838
2.98k
    int col = col_start;
839
8.83k
    for (int x = 0; x < b8_w; x += b4_w) {
840
5.84k
      MB_MODE_INFO *this_mbmi = xd->mi[row * xd->mi_stride + col];
841
5.84k
      struct buf_2d *const dst_buf = &pd->dst;
842
5.84k
      uint8_t *dst = dst_buf->buf + dst_buf->stride * y + x;
843
5.84k
      int ref = 0;
844
5.84k
      const RefCntBuffer *ref_buf =
845
5.84k
          get_ref_frame_buf(cm, this_mbmi->ref_frame[ref]);
846
5.84k
      const struct scale_factors *ref_scale_factors =
847
5.84k
          get_ref_scale_factors_const(cm, this_mbmi->ref_frame[ref]);
848
5.84k
      const struct scale_factors *const sf = ref_scale_factors;
849
5.84k
      const struct buf_2d pre_buf = {
850
5.84k
        NULL,
851
5.84k
        (plane == 1) ? ref_buf->buf.u_buffer : ref_buf->buf.v_buffer,
852
5.84k
        ref_buf->buf.uv_crop_width,
853
5.84k
        ref_buf->buf.uv_crop_height,
854
5.84k
        ref_buf->buf.uv_stride,
855
5.84k
      };
856
857
5.84k
      const MV mv = this_mbmi->mv[ref].as_mv;
858
859
5.84k
      InterPredParams inter_pred_params;
860
5.84k
      av1_init_inter_params(&inter_pred_params, b4_w, b4_h, pre_y + y,
861
5.84k
                            pre_x + x, pd->subsampling_x, pd->subsampling_y,
862
5.84k
                            xd->bd, is_cur_buf_hbd(xd), mi->use_intrabc, sf,
863
5.84k
                            &pre_buf, this_mbmi->interp_filters);
864
5.84k
      inter_pred_params.conv_params =
865
5.84k
          get_conv_params_no_round(ref, plane, NULL, 0, is_compound, xd->bd);
866
867
5.84k
      av1_build_one_inter_predictor(dst, dst_buf->stride, &mv,
868
5.84k
                                    &inter_pred_params, xd, mi_x + x, mi_y + y,
869
5.84k
                                    ref, mc_buf, calc_subpel_params_func);
870
871
5.84k
      ++col;
872
5.84k
    }
873
2.98k
    ++row;
874
2.98k
  }
875
2.69k
}
876
877
static void build_inter_predictors_8x8_and_bigger(
878
    const AV1_COMMON *cm, MACROBLOCKD *xd, int plane, const MB_MODE_INFO *mi,
879
    int build_for_obmc, int bw, int bh, int mi_x, int mi_y, uint8_t **mc_buf,
880
369k
    CalcSubpelParamsFunc calc_subpel_params_func) {
881
369k
  const int is_compound = has_second_ref(mi);
882
369k
  const int is_intrabc = is_intrabc_block(mi);
883
369k
  assert(IMPLIES(is_intrabc, !is_compound));
884
369k
  struct macroblockd_plane *const pd = &xd->plane[plane];
885
369k
  struct buf_2d *const dst_buf = &pd->dst;
886
369k
  uint8_t *const dst = dst_buf->buf;
887
888
369k
  int is_global[2] = { 0, 0 };
889
769k
  for (int ref = 0; ref < 1 + is_compound; ++ref) {
890
400k
    const WarpedMotionParams *const wm = &xd->global_motion[mi->ref_frame[ref]];
891
400k
    is_global[ref] = is_global_mv_block(mi, wm->wmtype);
892
400k
  }
893
894
369k
  const BLOCK_SIZE bsize = mi->bsize;
895
369k
  const int ss_x = pd->subsampling_x;
896
369k
  const int ss_y = pd->subsampling_y;
897
369k
  const int row_start =
898
369k
      (block_size_high[bsize] == 4) && ss_y && !build_for_obmc ? -1 : 0;
899
369k
  const int col_start =
900
369k
      (block_size_wide[bsize] == 4) && ss_x && !build_for_obmc ? -1 : 0;
901
369k
  const int pre_x = (mi_x + MI_SIZE * col_start) >> ss_x;
902
369k
  const int pre_y = (mi_y + MI_SIZE * row_start) >> ss_y;
903
904
769k
  for (int ref = 0; ref < 1 + is_compound; ++ref) {
905
400k
    const struct scale_factors *const sf =
906
400k
        is_intrabc ? &cm->sf_identity : xd->block_ref_scale_factors[ref];
907
400k
    struct buf_2d *const pre_buf = is_intrabc ? dst_buf : &pd->pre[ref];
908
400k
    const MV mv = mi->mv[ref].as_mv;
909
400k
    const WarpTypesAllowed warp_types = { is_global[ref],
910
400k
                                          mi->motion_mode == WARPED_CAUSAL };
911
912
400k
    InterPredParams inter_pred_params;
913
400k
    av1_init_inter_params(&inter_pred_params, bw, bh, pre_y, pre_x,
914
400k
                          pd->subsampling_x, pd->subsampling_y, xd->bd,
915
400k
                          is_cur_buf_hbd(xd), mi->use_intrabc, sf, pre_buf,
916
400k
                          mi->interp_filters);
917
400k
    if (is_compound) av1_init_comp_mode(&inter_pred_params);
918
400k
    inter_pred_params.conv_params = get_conv_params_no_round(
919
400k
        ref, plane, xd->tmp_conv_dst, MAX_SB_SIZE, is_compound, xd->bd);
920
921
400k
    av1_dist_wtd_comp_weight_assign(
922
400k
        cm, mi, &inter_pred_params.conv_params.fwd_offset,
923
400k
        &inter_pred_params.conv_params.bck_offset,
924
400k
        &inter_pred_params.conv_params.use_dist_wtd_comp_avg, is_compound);
925
926
400k
    if (!build_for_obmc)
927
386k
      av1_init_warp_params(&inter_pred_params, &warp_types, ref, xd, mi);
928
929
400k
    if (is_masked_compound_type(mi->interinter_comp.type)) {
930
12.5k
      inter_pred_params.sb_type = mi->bsize;
931
12.5k
      inter_pred_params.mask_comp = mi->interinter_comp;
932
12.5k
      if (ref == 1) {
933
6.27k
        inter_pred_params.conv_params.do_average = 0;
934
6.27k
        inter_pred_params.comp_mode = MASK_COMP;
935
6.27k
      }
936
      // Assign physical buffer.
937
12.5k
      inter_pred_params.mask_comp.seg_mask = xd->seg_mask;
938
12.5k
    }
939
940
400k
    av1_build_one_inter_predictor(dst, dst_buf->stride, &mv, &inter_pred_params,
941
400k
                                  xd, mi_x, mi_y, ref, mc_buf,
942
400k
                                  calc_subpel_params_func);
943
400k
  }
944
369k
}
945
946
void av1_build_inter_predictors(const AV1_COMMON *cm, MACROBLOCKD *xd,
947
                                int plane, const MB_MODE_INFO *mi,
948
                                int build_for_obmc, int bw, int bh, int mi_x,
949
                                int mi_y, uint8_t **mc_buf,
950
372k
                                CalcSubpelParamsFunc calc_subpel_params_func) {
951
372k
  if (is_sub8x8_inter(xd, plane, mi->bsize, is_intrabc_block(mi),
952
372k
                      build_for_obmc)) {
953
2.69k
    assert(bw < 8 || bh < 8);
954
2.69k
    build_inter_predictors_sub8x8(cm, xd, plane, mi, mi_x, mi_y, mc_buf,
955
2.69k
                                  calc_subpel_params_func);
956
369k
  } else {
957
369k
    build_inter_predictors_8x8_and_bigger(cm, xd, plane, mi, build_for_obmc, bw,
958
369k
                                          bh, mi_x, mi_y, mc_buf,
959
369k
                                          calc_subpel_params_func);
960
369k
  }
961
372k
}
962
void av1_setup_dst_planes(struct macroblockd_plane *planes, BLOCK_SIZE bsize,
963
                          const YV12_BUFFER_CONFIG *src, int mi_row, int mi_col,
964
10.3M
                          const int plane_start, const int plane_end) {
965
  // We use AOMMIN(num_planes, MAX_MB_PLANE) instead of num_planes to quiet
966
  // the static analysis warnings.
967
40.5M
  for (int i = plane_start; i < AOMMIN(plane_end, MAX_MB_PLANE); ++i) {
968
30.1M
    struct macroblockd_plane *const pd = &planes[i];
969
30.1M
    const int is_uv = i > 0;
970
30.1M
    setup_pred_plane(&pd->dst, bsize, src->buffers[i], src->crop_widths[is_uv],
971
30.1M
                     src->crop_heights[is_uv], src->strides[is_uv], mi_row,
972
30.1M
                     mi_col, NULL, pd->subsampling_x, pd->subsampling_y);
973
30.1M
  }
974
10.3M
}
975
976
void av1_setup_pre_planes(MACROBLOCKD *xd, int idx,
977
                          const YV12_BUFFER_CONFIG *src, int mi_row, int mi_col,
978
                          const struct scale_factors *sf,
979
56.9k
                          const int num_planes) {
980
56.9k
  if (src != NULL) {
981
    // We use AOMMIN(num_planes, MAX_MB_PLANE) instead of num_planes to quiet
982
    // the static analysis warnings.
983
209k
    for (int i = 0; i < AOMMIN(num_planes, MAX_MB_PLANE); ++i) {
984
152k
      struct macroblockd_plane *const pd = &xd->plane[i];
985
152k
      const int is_uv = i > 0;
986
152k
      setup_pred_plane(&pd->pre[idx], xd->mi[0]->bsize, src->buffers[i],
987
152k
                       src->crop_widths[is_uv], src->crop_heights[is_uv],
988
152k
                       src->strides[is_uv], mi_row, mi_col, sf,
989
152k
                       pd->subsampling_x, pd->subsampling_y);
990
152k
    }
991
56.9k
  }
992
56.9k
}
993
994
// obmc_mask_N[overlap_position]
995
static const uint8_t obmc_mask_1[1] = { 64 };
996
DECLARE_ALIGNED(2, static const uint8_t, obmc_mask_2[2]) = { 45, 64 };
997
998
DECLARE_ALIGNED(4, static const uint8_t, obmc_mask_4[4]) = { 39, 50, 59, 64 };
999
1000
static const uint8_t obmc_mask_8[8] = { 36, 42, 48, 53, 57, 61, 64, 64 };
1001
1002
static const uint8_t obmc_mask_16[16] = { 34, 37, 40, 43, 46, 49, 52, 54,
1003
                                          56, 58, 60, 61, 64, 64, 64, 64 };
1004
1005
static const uint8_t obmc_mask_32[32] = { 33, 35, 36, 38, 40, 41, 43, 44,
1006
                                          45, 47, 48, 50, 51, 52, 53, 55,
1007
                                          56, 57, 58, 59, 60, 60, 61, 62,
1008
                                          64, 64, 64, 64, 64, 64, 64, 64 };
1009
1010
static const uint8_t obmc_mask_64[64] = {
1011
  33, 34, 35, 35, 36, 37, 38, 39, 40, 40, 41, 42, 43, 44, 44, 44,
1012
  45, 46, 47, 47, 48, 49, 50, 51, 51, 51, 52, 52, 53, 54, 55, 56,
1013
  56, 56, 57, 57, 58, 58, 59, 60, 60, 60, 60, 60, 61, 62, 62, 62,
1014
  62, 62, 63, 63, 63, 63, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
1015
};
1016
1017
13.8k
const uint8_t *av1_get_obmc_mask(int length) {
1018
13.8k
  switch (length) {
1019
0
    case 1: return obmc_mask_1;
1020
1.58k
    case 2: return obmc_mask_2;
1021
10.5k
    case 4: return obmc_mask_4;
1022
1.67k
    case 8: return obmc_mask_8;
1023
0
    case 16: return obmc_mask_16;
1024
0
    case 32: return obmc_mask_32;
1025
0
    case 64: return obmc_mask_64;
1026
0
    default: assert(0); return NULL;
1027
13.8k
  }
1028
13.8k
}
1029
1030
static INLINE void increment_int_ptr(MACROBLOCKD *xd, int rel_mi_row,
1031
                                     int rel_mi_col, uint8_t op_mi_size,
1032
                                     int dir, MB_MODE_INFO *mi, void *fun_ctxt,
1033
17.9k
                                     const int num_planes) {
1034
17.9k
  (void)xd;
1035
17.9k
  (void)rel_mi_row;
1036
17.9k
  (void)rel_mi_col;
1037
17.9k
  (void)op_mi_size;
1038
17.9k
  (void)dir;
1039
17.9k
  (void)mi;
1040
17.9k
  ++*(int *)fun_ctxt;
1041
17.9k
  (void)num_planes;
1042
17.9k
}
1043
1044
40.2k
void av1_count_overlappable_neighbors(const AV1_COMMON *cm, MACROBLOCKD *xd) {
1045
40.2k
  MB_MODE_INFO *mbmi = xd->mi[0];
1046
1047
40.2k
  mbmi->overlappable_neighbors = 0;
1048
1049
40.2k
  if (!is_motion_variation_allowed_bsize(mbmi->bsize)) return;
1050
1051
25.0k
  foreach_overlappable_nb_above(cm, xd, INT_MAX, increment_int_ptr,
1052
25.0k
                                &mbmi->overlappable_neighbors);
1053
25.0k
  if (mbmi->overlappable_neighbors) return;
1054
13.7k
  foreach_overlappable_nb_left(cm, xd, INT_MAX, increment_int_ptr,
1055
13.7k
                               &mbmi->overlappable_neighbors);
1056
13.7k
}
1057
1058
// HW does not support < 4x4 prediction. To limit the bandwidth requirement, if
1059
// block-size of current plane is smaller than 8x8, always only blend with the
1060
// left neighbor(s) (skip blending with the above side).
1061
#define DISABLE_CHROMA_U8X8_OBMC 0  // 0: one-sided obmc; 1: disable
1062
1063
int av1_skip_u4x4_pred_in_obmc(BLOCK_SIZE bsize,
1064
31.0k
                               const struct macroblockd_plane *pd, int dir) {
1065
31.0k
  assert(is_motion_variation_allowed_bsize(bsize));
1066
1067
31.0k
  const BLOCK_SIZE bsize_plane =
1068
31.0k
      get_plane_block_size(bsize, pd->subsampling_x, pd->subsampling_y);
1069
31.0k
  switch (bsize_plane) {
1070
#if DISABLE_CHROMA_U8X8_OBMC
1071
    case BLOCK_4X4:
1072
    case BLOCK_8X4:
1073
    case BLOCK_4X8: return 1; break;
1074
#else
1075
0
    case BLOCK_4X4:
1076
0
    case BLOCK_8X4:
1077
6.60k
    case BLOCK_4X8: return dir == 0; break;
1078
0
#endif
1079
24.4k
    default: return 0;
1080
31.0k
  }
1081
31.0k
}
1082
1083
5.17k
void av1_modify_neighbor_predictor_for_obmc(MB_MODE_INFO *mbmi) {
1084
5.17k
  mbmi->ref_frame[1] = NONE_FRAME;
1085
5.17k
  mbmi->interinter_comp.type = COMPOUND_AVERAGE;
1086
1087
5.17k
  return;
1088
5.17k
}
1089
1090
struct obmc_inter_pred_ctxt {
1091
  uint8_t **adjacent;
1092
  int *adjacent_stride;
1093
};
1094
1095
static INLINE void build_obmc_inter_pred_above(
1096
    MACROBLOCKD *xd, int rel_mi_row, int rel_mi_col, uint8_t op_mi_size,
1097
2.89k
    int dir, MB_MODE_INFO *above_mi, void *fun_ctxt, const int num_planes) {
1098
2.89k
  (void)above_mi;
1099
2.89k
  (void)rel_mi_row;
1100
2.89k
  (void)dir;
1101
2.89k
  struct obmc_inter_pred_ctxt *ctxt = (struct obmc_inter_pred_ctxt *)fun_ctxt;
1102
2.89k
  const BLOCK_SIZE bsize = xd->mi[0]->bsize;
1103
2.89k
  const int overlap =
1104
2.89k
      AOMMIN(block_size_high[bsize], block_size_high[BLOCK_64X64]) >> 1;
1105
1106
11.5k
  for (int plane = 0; plane < num_planes; ++plane) {
1107
8.69k
    const struct macroblockd_plane *pd = &xd->plane[plane];
1108
8.69k
    const int bw = (op_mi_size * MI_SIZE) >> pd->subsampling_x;
1109
8.69k
    const int bh = overlap >> pd->subsampling_y;
1110
8.69k
    const int plane_col = (rel_mi_col * MI_SIZE) >> pd->subsampling_x;
1111
1112
8.69k
    if (av1_skip_u4x4_pred_in_obmc(bsize, pd, 0)) continue;
1113
1114
6.98k
    const int dst_stride = pd->dst.stride;
1115
6.98k
    uint8_t *const dst = &pd->dst.buf[plane_col];
1116
6.98k
    const int tmp_stride = ctxt->adjacent_stride[plane];
1117
6.98k
    const uint8_t *const tmp = &ctxt->adjacent[plane][plane_col];
1118
6.98k
    const uint8_t *const mask = av1_get_obmc_mask(bh);
1119
6.98k
#if CONFIG_AV1_HIGHBITDEPTH
1120
6.98k
    const int is_hbd = is_cur_buf_hbd(xd);
1121
6.98k
    if (is_hbd)
1122
5.83k
      aom_highbd_blend_a64_vmask(dst, dst_stride, dst, dst_stride, tmp,
1123
5.83k
                                 tmp_stride, mask, bw, bh, xd->bd);
1124
1.15k
    else
1125
1.15k
      aom_blend_a64_vmask(dst, dst_stride, dst, dst_stride, tmp, tmp_stride,
1126
1.15k
                          mask, bw, bh);
1127
#else
1128
    aom_blend_a64_vmask(dst, dst_stride, dst, dst_stride, tmp, tmp_stride, mask,
1129
                        bw, bh);
1130
#endif
1131
6.98k
  }
1132
2.89k
}
1133
1134
static INLINE void build_obmc_inter_pred_left(
1135
    MACROBLOCKD *xd, int rel_mi_row, int rel_mi_col, uint8_t op_mi_size,
1136
2.27k
    int dir, MB_MODE_INFO *left_mi, void *fun_ctxt, const int num_planes) {
1137
2.27k
  (void)left_mi;
1138
2.27k
  (void)rel_mi_col;
1139
2.27k
  (void)dir;
1140
2.27k
  struct obmc_inter_pred_ctxt *ctxt = (struct obmc_inter_pred_ctxt *)fun_ctxt;
1141
2.27k
  const BLOCK_SIZE bsize = xd->mi[0]->bsize;
1142
2.27k
  const int overlap =
1143
2.27k
      AOMMIN(block_size_wide[bsize], block_size_wide[BLOCK_64X64]) >> 1;
1144
1145
9.10k
  for (int plane = 0; plane < num_planes; ++plane) {
1146
6.83k
    const struct macroblockd_plane *pd = &xd->plane[plane];
1147
6.83k
    const int bw = overlap >> pd->subsampling_x;
1148
6.83k
    const int bh = (op_mi_size * MI_SIZE) >> pd->subsampling_y;
1149
6.83k
    const int plane_row = (rel_mi_row * MI_SIZE) >> pd->subsampling_y;
1150
1151
6.83k
    if (av1_skip_u4x4_pred_in_obmc(bsize, pd, 1)) continue;
1152
1153
6.83k
    const int dst_stride = pd->dst.stride;
1154
6.83k
    uint8_t *const dst = &pd->dst.buf[plane_row * dst_stride];
1155
6.83k
    const int tmp_stride = ctxt->adjacent_stride[plane];
1156
6.83k
    const uint8_t *const tmp = &ctxt->adjacent[plane][plane_row * tmp_stride];
1157
6.83k
    const uint8_t *const mask = av1_get_obmc_mask(bw);
1158
1159
6.83k
#if CONFIG_AV1_HIGHBITDEPTH
1160
6.83k
    const int is_hbd = is_cur_buf_hbd(xd);
1161
6.83k
    if (is_hbd)
1162
4.87k
      aom_highbd_blend_a64_hmask(dst, dst_stride, dst, dst_stride, tmp,
1163
4.87k
                                 tmp_stride, mask, bw, bh, xd->bd);
1164
1.95k
    else
1165
1.95k
      aom_blend_a64_hmask(dst, dst_stride, dst, dst_stride, tmp, tmp_stride,
1166
1.95k
                          mask, bw, bh);
1167
#else
1168
    aom_blend_a64_hmask(dst, dst_stride, dst, dst_stride, tmp, tmp_stride, mask,
1169
                        bw, bh);
1170
#endif
1171
6.83k
  }
1172
2.27k
}
1173
1174
// This function combines motion compensated predictions that are generated by
1175
// top/left neighboring blocks' inter predictors with the regular inter
1176
// prediction. We assume the original prediction (bmc) is stored in
1177
// xd->plane[].dst.buf
1178
void av1_build_obmc_inter_prediction(const AV1_COMMON *cm, MACROBLOCKD *xd,
1179
                                     uint8_t *above[MAX_MB_PLANE],
1180
                                     int above_stride[MAX_MB_PLANE],
1181
                                     uint8_t *left[MAX_MB_PLANE],
1182
3.85k
                                     int left_stride[MAX_MB_PLANE]) {
1183
3.85k
  const BLOCK_SIZE bsize = xd->mi[0]->bsize;
1184
1185
  // handle above row
1186
3.85k
  struct obmc_inter_pred_ctxt ctxt_above = { above, above_stride };
1187
3.85k
  foreach_overlappable_nb_above(cm, xd,
1188
3.85k
                                max_neighbor_obmc[mi_size_wide_log2[bsize]],
1189
3.85k
                                build_obmc_inter_pred_above, &ctxt_above);
1190
1191
  // handle left column
1192
3.85k
  struct obmc_inter_pred_ctxt ctxt_left = { left, left_stride };
1193
3.85k
  foreach_overlappable_nb_left(cm, xd,
1194
3.85k
                               max_neighbor_obmc[mi_size_high_log2[bsize]],
1195
3.85k
                               build_obmc_inter_pred_left, &ctxt_left);
1196
3.85k
}
1197
1198
void av1_setup_obmc_dst_bufs(MACROBLOCKD *xd, uint8_t **dst_buf1,
1199
3.85k
                             uint8_t **dst_buf2) {
1200
3.85k
  if (is_cur_buf_hbd(xd)) {
1201
2.91k
    int len = sizeof(uint16_t);
1202
2.91k
    dst_buf1[0] = CONVERT_TO_BYTEPTR(xd->tmp_obmc_bufs[0]);
1203
2.91k
    dst_buf1[1] =
1204
2.91k
        CONVERT_TO_BYTEPTR(xd->tmp_obmc_bufs[0] + MAX_SB_SQUARE * len);
1205
2.91k
    dst_buf1[2] =
1206
2.91k
        CONVERT_TO_BYTEPTR(xd->tmp_obmc_bufs[0] + MAX_SB_SQUARE * 2 * len);
1207
2.91k
    dst_buf2[0] = CONVERT_TO_BYTEPTR(xd->tmp_obmc_bufs[1]);
1208
2.91k
    dst_buf2[1] =
1209
2.91k
        CONVERT_TO_BYTEPTR(xd->tmp_obmc_bufs[1] + MAX_SB_SQUARE * len);
1210
2.91k
    dst_buf2[2] =
1211
2.91k
        CONVERT_TO_BYTEPTR(xd->tmp_obmc_bufs[1] + MAX_SB_SQUARE * 2 * len);
1212
2.91k
  } else {
1213
933
    dst_buf1[0] = xd->tmp_obmc_bufs[0];
1214
933
    dst_buf1[1] = xd->tmp_obmc_bufs[0] + MAX_SB_SQUARE;
1215
933
    dst_buf1[2] = xd->tmp_obmc_bufs[0] + MAX_SB_SQUARE * 2;
1216
933
    dst_buf2[0] = xd->tmp_obmc_bufs[1];
1217
933
    dst_buf2[1] = xd->tmp_obmc_bufs[1] + MAX_SB_SQUARE;
1218
933
    dst_buf2[2] = xd->tmp_obmc_bufs[1] + MAX_SB_SQUARE * 2;
1219
933
  }
1220
3.85k
}
1221
1222
void av1_setup_build_prediction_by_above_pred(
1223
    MACROBLOCKD *xd, int rel_mi_col, uint8_t above_mi_width,
1224
    MB_MODE_INFO *above_mbmi, struct build_prediction_ctxt *ctxt,
1225
2.89k
    const int num_planes) {
1226
2.89k
  const BLOCK_SIZE a_bsize = AOMMAX(BLOCK_8X8, above_mbmi->bsize);
1227
2.89k
  const int above_mi_col = xd->mi_col + rel_mi_col;
1228
1229
2.89k
  av1_modify_neighbor_predictor_for_obmc(above_mbmi);
1230
1231
11.5k
  for (int j = 0; j < num_planes; ++j) {
1232
8.69k
    struct macroblockd_plane *const pd = &xd->plane[j];
1233
8.69k
    setup_pred_plane(&pd->dst, a_bsize, ctxt->tmp_buf[j], ctxt->tmp_width[j],
1234
8.69k
                     ctxt->tmp_height[j], ctxt->tmp_stride[j], 0, rel_mi_col,
1235
8.69k
                     NULL, pd->subsampling_x, pd->subsampling_y);
1236
8.69k
  }
1237
1238
2.89k
  const int num_refs = 1 + has_second_ref(above_mbmi);
1239
1240
5.79k
  for (int ref = 0; ref < num_refs; ++ref) {
1241
2.89k
    const MV_REFERENCE_FRAME frame = above_mbmi->ref_frame[ref];
1242
1243
2.89k
    const RefCntBuffer *const ref_buf = get_ref_frame_buf(ctxt->cm, frame);
1244
2.89k
    const struct scale_factors *const sf =
1245
2.89k
        get_ref_scale_factors_const(ctxt->cm, frame);
1246
2.89k
    xd->block_ref_scale_factors[ref] = sf;
1247
2.89k
    if ((!av1_is_valid_scale(sf)))
1248
0
      aom_internal_error(xd->error_info, AOM_CODEC_UNSUP_BITSTREAM,
1249
0
                         "Reference frame has invalid dimensions");
1250
2.89k
    av1_setup_pre_planes(xd, ref, &ref_buf->buf, xd->mi_row, above_mi_col, sf,
1251
2.89k
                         num_planes);
1252
2.89k
  }
1253
1254
2.89k
  xd->mb_to_left_edge = 8 * MI_SIZE * (-above_mi_col);
1255
2.89k
  xd->mb_to_right_edge =
1256
2.89k
      ctxt->mb_to_far_edge +
1257
2.89k
      (xd->width - rel_mi_col - above_mi_width) * MI_SIZE * 8;
1258
2.89k
}
1259
1260
void av1_setup_build_prediction_by_left_pred(MACROBLOCKD *xd, int rel_mi_row,
1261
                                             uint8_t left_mi_height,
1262
                                             MB_MODE_INFO *left_mbmi,
1263
                                             struct build_prediction_ctxt *ctxt,
1264
2.27k
                                             const int num_planes) {
1265
2.27k
  const BLOCK_SIZE l_bsize = AOMMAX(BLOCK_8X8, left_mbmi->bsize);
1266
2.27k
  const int left_mi_row = xd->mi_row + rel_mi_row;
1267
1268
2.27k
  av1_modify_neighbor_predictor_for_obmc(left_mbmi);
1269
1270
9.10k
  for (int j = 0; j < num_planes; ++j) {
1271
6.83k
    struct macroblockd_plane *const pd = &xd->plane[j];
1272
6.83k
    setup_pred_plane(&pd->dst, l_bsize, ctxt->tmp_buf[j], ctxt->tmp_width[j],
1273
6.83k
                     ctxt->tmp_height[j], ctxt->tmp_stride[j], rel_mi_row, 0,
1274
6.83k
                     NULL, pd->subsampling_x, pd->subsampling_y);
1275
6.83k
  }
1276
1277
2.27k
  const int num_refs = 1 + has_second_ref(left_mbmi);
1278
1279
4.55k
  for (int ref = 0; ref < num_refs; ++ref) {
1280
2.27k
    const MV_REFERENCE_FRAME frame = left_mbmi->ref_frame[ref];
1281
1282
2.27k
    const RefCntBuffer *const ref_buf = get_ref_frame_buf(ctxt->cm, frame);
1283
2.27k
    const struct scale_factors *const ref_scale_factors =
1284
2.27k
        get_ref_scale_factors_const(ctxt->cm, frame);
1285
1286
2.27k
    xd->block_ref_scale_factors[ref] = ref_scale_factors;
1287
2.27k
    if ((!av1_is_valid_scale(ref_scale_factors)))
1288
0
      aom_internal_error(xd->error_info, AOM_CODEC_UNSUP_BITSTREAM,
1289
0
                         "Reference frame has invalid dimensions");
1290
2.27k
    av1_setup_pre_planes(xd, ref, &ref_buf->buf, left_mi_row, xd->mi_col,
1291
2.27k
                         ref_scale_factors, num_planes);
1292
2.27k
  }
1293
1294
2.27k
  xd->mb_to_top_edge = GET_MV_SUBPEL(MI_SIZE * (-left_mi_row));
1295
2.27k
  xd->mb_to_bottom_edge =
1296
2.27k
      ctxt->mb_to_far_edge +
1297
2.27k
      GET_MV_SUBPEL((xd->height - rel_mi_row - left_mi_height) * MI_SIZE);
1298
2.27k
}
1299
1300
static AOM_INLINE void combine_interintra(
1301
    INTERINTRA_MODE mode, int8_t use_wedge_interintra, int8_t wedge_index,
1302
    int8_t wedge_sign, BLOCK_SIZE bsize, BLOCK_SIZE plane_bsize,
1303
    uint8_t *comppred, int compstride, const uint8_t *interpred,
1304
1.71k
    int interstride, const uint8_t *intrapred, int intrastride) {
1305
1.71k
  const int bw = block_size_wide[plane_bsize];
1306
1.71k
  const int bh = block_size_high[plane_bsize];
1307
1308
1.71k
  if (use_wedge_interintra) {
1309
627
    if (av1_is_wedge_used(bsize)) {
1310
627
      const uint8_t *mask =
1311
627
          av1_get_contiguous_soft_mask(wedge_index, wedge_sign, bsize);
1312
627
      const int subw = 2 * mi_size_wide[bsize] == bw;
1313
627
      const int subh = 2 * mi_size_high[bsize] == bh;
1314
627
      aom_blend_a64_mask(comppred, compstride, intrapred, intrastride,
1315
627
                         interpred, interstride, mask, block_size_wide[bsize],
1316
627
                         bw, bh, subw, subh);
1317
627
    }
1318
627
    return;
1319
627
  }
1320
1321
1.08k
  const uint8_t *mask = smooth_interintra_mask_buf[mode][plane_bsize];
1322
1.08k
  aom_blend_a64_mask(comppred, compstride, intrapred, intrastride, interpred,
1323
1.08k
                     interstride, mask, bw, bw, bh, 0, 0);
1324
1.08k
}
1325
1326
#if CONFIG_AV1_HIGHBITDEPTH
1327
static AOM_INLINE void combine_interintra_highbd(
1328
    INTERINTRA_MODE mode, int8_t use_wedge_interintra, int8_t wedge_index,
1329
    int8_t wedge_sign, BLOCK_SIZE bsize, BLOCK_SIZE plane_bsize,
1330
    uint8_t *comppred8, int compstride, const uint8_t *interpred8,
1331
3.80k
    int interstride, const uint8_t *intrapred8, int intrastride, int bd) {
1332
3.80k
  const int bw = block_size_wide[plane_bsize];
1333
3.80k
  const int bh = block_size_high[plane_bsize];
1334
1335
3.80k
  if (use_wedge_interintra) {
1336
1.70k
    if (av1_is_wedge_used(bsize)) {
1337
1.70k
      const uint8_t *mask =
1338
1.70k
          av1_get_contiguous_soft_mask(wedge_index, wedge_sign, bsize);
1339
1.70k
      const int subh = 2 * mi_size_high[bsize] == bh;
1340
1.70k
      const int subw = 2 * mi_size_wide[bsize] == bw;
1341
1.70k
      aom_highbd_blend_a64_mask(comppred8, compstride, intrapred8, intrastride,
1342
1.70k
                                interpred8, interstride, mask,
1343
1.70k
                                block_size_wide[bsize], bw, bh, subw, subh, bd);
1344
1.70k
    }
1345
1.70k
    return;
1346
1.70k
  }
1347
1348
2.09k
  uint8_t mask[MAX_SB_SQUARE];
1349
2.09k
  build_smooth_interintra_mask(mask, bw, plane_bsize, mode);
1350
2.09k
  aom_highbd_blend_a64_mask(comppred8, compstride, intrapred8, intrastride,
1351
2.09k
                            interpred8, interstride, mask, bw, bw, bh, 0, 0,
1352
2.09k
                            bd);
1353
2.09k
}
1354
#endif
1355
1356
void av1_build_intra_predictors_for_interintra(const AV1_COMMON *cm,
1357
                                               MACROBLOCKD *xd,
1358
                                               BLOCK_SIZE bsize, int plane,
1359
                                               const BUFFER_SET *ctx,
1360
5.51k
                                               uint8_t *dst, int dst_stride) {
1361
5.51k
  struct macroblockd_plane *const pd = &xd->plane[plane];
1362
5.51k
  const int ssx = xd->plane[plane].subsampling_x;
1363
5.51k
  const int ssy = xd->plane[plane].subsampling_y;
1364
5.51k
  BLOCK_SIZE plane_bsize = get_plane_block_size(bsize, ssx, ssy);
1365
5.51k
  PREDICTION_MODE mode = interintra_to_intra_mode[xd->mi[0]->interintra_mode];
1366
5.51k
  assert(xd->mi[0]->angle_delta[PLANE_TYPE_Y] == 0);
1367
5.51k
  assert(xd->mi[0]->angle_delta[PLANE_TYPE_UV] == 0);
1368
5.51k
  assert(xd->mi[0]->filter_intra_mode_info.use_filter_intra == 0);
1369
5.51k
  assert(xd->mi[0]->use_intrabc == 0);
1370
5.51k
  const SequenceHeader *seq_params = cm->seq_params;
1371
1372
5.51k
  av1_predict_intra_block(xd, seq_params->sb_size,
1373
5.51k
                          seq_params->enable_intra_edge_filter, pd->width,
1374
5.51k
                          pd->height, max_txsize_rect_lookup[plane_bsize], mode,
1375
5.51k
                          0, 0, FILTER_INTRA_MODES, ctx->plane[plane],
1376
5.51k
                          ctx->stride[plane], dst, dst_stride, 0, 0, plane);
1377
5.51k
}
1378
1379
void av1_combine_interintra(MACROBLOCKD *xd, BLOCK_SIZE bsize, int plane,
1380
                            const uint8_t *inter_pred, int inter_stride,
1381
5.51k
                            const uint8_t *intra_pred, int intra_stride) {
1382
5.51k
  const int ssx = xd->plane[plane].subsampling_x;
1383
5.51k
  const int ssy = xd->plane[plane].subsampling_y;
1384
5.51k
  const BLOCK_SIZE plane_bsize = get_plane_block_size(bsize, ssx, ssy);
1385
5.51k
#if CONFIG_AV1_HIGHBITDEPTH
1386
5.51k
  if (is_cur_buf_hbd(xd)) {
1387
3.80k
    combine_interintra_highbd(
1388
3.80k
        xd->mi[0]->interintra_mode, xd->mi[0]->use_wedge_interintra,
1389
3.80k
        xd->mi[0]->interintra_wedge_index, INTERINTRA_WEDGE_SIGN, bsize,
1390
3.80k
        plane_bsize, xd->plane[plane].dst.buf, xd->plane[plane].dst.stride,
1391
3.80k
        inter_pred, inter_stride, intra_pred, intra_stride, xd->bd);
1392
3.80k
    return;
1393
3.80k
  }
1394
1.71k
#endif
1395
1.71k
  combine_interintra(
1396
1.71k
      xd->mi[0]->interintra_mode, xd->mi[0]->use_wedge_interintra,
1397
1.71k
      xd->mi[0]->interintra_wedge_index, INTERINTRA_WEDGE_SIGN, bsize,
1398
1.71k
      plane_bsize, xd->plane[plane].dst.buf, xd->plane[plane].dst.stride,
1399
1.71k
      inter_pred, inter_stride, intra_pred, intra_stride);
1400
1.71k
}
1401
1402
// build interintra_predictors for one plane
1403
void av1_build_interintra_predictor(const AV1_COMMON *cm, MACROBLOCKD *xd,
1404
                                    uint8_t *pred, int stride,
1405
                                    const BUFFER_SET *ctx, int plane,
1406
5.51k
                                    BLOCK_SIZE bsize) {
1407
5.51k
  assert(bsize < BLOCK_SIZES_ALL);
1408
5.51k
  if (is_cur_buf_hbd(xd)) {
1409
3.80k
    DECLARE_ALIGNED(16, uint16_t, intrapredictor[MAX_SB_SQUARE]);
1410
3.80k
    av1_build_intra_predictors_for_interintra(
1411
3.80k
        cm, xd, bsize, plane, ctx, CONVERT_TO_BYTEPTR(intrapredictor),
1412
3.80k
        MAX_SB_SIZE);
1413
3.80k
    av1_combine_interintra(xd, bsize, plane, pred, stride,
1414
3.80k
                           CONVERT_TO_BYTEPTR(intrapredictor), MAX_SB_SIZE);
1415
3.80k
  } else {
1416
1.71k
    DECLARE_ALIGNED(16, uint8_t, intrapredictor[MAX_SB_SQUARE]);
1417
1.71k
    av1_build_intra_predictors_for_interintra(cm, xd, bsize, plane, ctx,
1418
1.71k
                                              intrapredictor, MAX_SB_SIZE);
1419
1.71k
    av1_combine_interintra(xd, bsize, plane, pred, stride, intrapredictor,
1420
1.71k
                           MAX_SB_SIZE);
1421
1.71k
  }
1422
5.51k
}