Coverage Report

Created: 2025-12-31 07:53

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/aom/av1/common/reconinter.c
Line
Count
Source
1
/*
2
 * Copyright (c) 2016, Alliance for Open Media. All rights reserved
3
 *
4
 * This source code is subject to the terms of the BSD 2 Clause License and
5
 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6
 * was not distributed with this source code in the LICENSE file, you can
7
 * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8
 * Media Patent License 1.0 was not distributed with this source code in the
9
 * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
10
 */
11
12
#include <assert.h>
13
#include <stdio.h>
14
#include <limits.h>
15
16
#include "config/aom_config.h"
17
#include "config/aom_dsp_rtcd.h"
18
#include "config/aom_scale_rtcd.h"
19
20
#include "aom/aom_integer.h"
21
#include "aom_dsp/blend.h"
22
23
#include "av1/common/av1_common_int.h"
24
#include "av1/common/blockd.h"
25
#include "av1/common/mvref_common.h"
26
#include "av1/common/obmc.h"
27
#include "av1/common/reconinter.h"
28
#include "av1/common/reconintra.h"
29
30
// This function will determine whether or not to create a warped
31
// prediction.
32
int av1_allow_warp(const MB_MODE_INFO *const mbmi,
33
                   const WarpTypesAllowed *const warp_types,
34
                   const WarpedMotionParams *const gm_params,
35
                   int build_for_obmc, const struct scale_factors *const sf,
36
157k
                   WarpedMotionParams *final_warp_params) {
37
  // Note: As per the spec, we must test the fixed point scales here, which are
38
  // at a higher precision (1 << 14) than the xs and ys in subpel_params (that
39
  // have 1 << 10 precision).
40
157k
  if (av1_is_scaled(sf)) return 0;
41
42
150k
  if (final_warp_params != NULL) *final_warp_params = default_warp_params;
43
44
150k
  if (build_for_obmc) return 0;
45
46
150k
  if (warp_types->local_warp_allowed && !mbmi->wm_params.invalid) {
47
1.24k
    if (final_warp_params != NULL)
48
1.24k
      memcpy(final_warp_params, &mbmi->wm_params, sizeof(*final_warp_params));
49
1.24k
    return 1;
50
149k
  } else if (warp_types->global_warp_allowed && !gm_params->invalid) {
51
12.8k
    if (final_warp_params != NULL)
52
12.8k
      memcpy(final_warp_params, gm_params, sizeof(*final_warp_params));
53
12.8k
    return 1;
54
12.8k
  }
55
56
136k
  return 0;
57
150k
}
58
59
void av1_init_inter_params(InterPredParams *inter_pred_params, int block_width,
60
                           int block_height, int pix_row, int pix_col,
61
                           int subsampling_x, int subsampling_y, int bit_depth,
62
                           int use_hbd_buf, int is_intrabc,
63
                           const struct scale_factors *sf,
64
                           const struct buf_2d *ref_buf,
65
455k
                           int_interpfilters interp_filters) {
66
455k
  inter_pred_params->block_width = block_width;
67
455k
  inter_pred_params->block_height = block_height;
68
455k
  inter_pred_params->pix_row = pix_row;
69
455k
  inter_pred_params->pix_col = pix_col;
70
455k
  inter_pred_params->subsampling_x = subsampling_x;
71
455k
  inter_pred_params->subsampling_y = subsampling_y;
72
455k
  inter_pred_params->bit_depth = bit_depth;
73
455k
  inter_pred_params->use_hbd_buf = use_hbd_buf;
74
455k
  inter_pred_params->is_intrabc = is_intrabc;
75
455k
  inter_pred_params->scale_factors = sf;
76
455k
  inter_pred_params->ref_frame_buf = *ref_buf;
77
455k
  inter_pred_params->mode = TRANSLATION_PRED;
78
455k
  inter_pred_params->comp_mode = UNIFORM_SINGLE;
79
80
455k
  if (is_intrabc) {
81
272k
    inter_pred_params->interp_filter_params[0] = &av1_intrabc_filter_params;
82
272k
    inter_pred_params->interp_filter_params[1] = &av1_intrabc_filter_params;
83
272k
  } else {
84
182k
    inter_pred_params->interp_filter_params[0] =
85
182k
        av1_get_interp_filter_params_with_block_size(
86
182k
            interp_filters.as_filters.x_filter, block_width);
87
182k
    inter_pred_params->interp_filter_params[1] =
88
182k
        av1_get_interp_filter_params_with_block_size(
89
182k
            interp_filters.as_filters.y_filter, block_height);
90
182k
  }
91
455k
}
92
93
75.9k
void av1_init_comp_mode(InterPredParams *inter_pred_params) {
94
75.9k
  inter_pred_params->comp_mode = UNIFORM_COMP;
95
75.9k
}
96
97
void av1_init_warp_params(InterPredParams *inter_pred_params,
98
                          const WarpTypesAllowed *warp_types, int ref,
99
431k
                          const MACROBLOCKD *xd, const MB_MODE_INFO *mi) {
100
431k
  if (inter_pred_params->block_height < 8 || inter_pred_params->block_width < 8)
101
223k
    return;
102
103
208k
  if (xd->cur_frame_force_integer_mv) return;
104
105
157k
  if (av1_allow_warp(mi, warp_types, &xd->global_motion[mi->ref_frame[ref]], 0,
106
157k
                     inter_pred_params->scale_factors,
107
157k
                     &inter_pred_params->warp_params)) {
108
#if CONFIG_REALTIME_ONLY
109
    aom_internal_error(xd->error_info, AOM_CODEC_UNSUP_FEATURE,
110
                       "Warped motion is disabled in realtime only build.");
111
#endif
112
14.0k
    inter_pred_params->mode = WARP_PRED;
113
14.0k
  }
114
157k
}
115
116
void av1_make_inter_predictor(const uint8_t *src, int src_stride, uint8_t *dst,
117
                              int dst_stride,
118
                              InterPredParams *inter_pred_params,
119
455k
                              const SubpelParams *subpel_params) {
120
455k
  assert(IMPLIES(inter_pred_params->conv_params.is_compound,
121
455k
                 inter_pred_params->conv_params.dst != NULL));
122
123
455k
  if (inter_pred_params->mode == TRANSLATION_PRED) {
124
441k
#if CONFIG_AV1_HIGHBITDEPTH
125
441k
    if (inter_pred_params->use_hbd_buf) {
126
313k
      highbd_inter_predictor(src, src_stride, dst, dst_stride, subpel_params,
127
313k
                             inter_pred_params->block_width,
128
313k
                             inter_pred_params->block_height,
129
313k
                             &inter_pred_params->conv_params,
130
313k
                             inter_pred_params->interp_filter_params,
131
313k
                             inter_pred_params->bit_depth);
132
313k
    } else {
133
127k
      inter_predictor(src, src_stride, dst, dst_stride, subpel_params,
134
127k
                      inter_pred_params->block_width,
135
127k
                      inter_pred_params->block_height,
136
127k
                      &inter_pred_params->conv_params,
137
127k
                      inter_pred_params->interp_filter_params);
138
127k
    }
139
#else
140
    inter_predictor(src, src_stride, dst, dst_stride, subpel_params,
141
                    inter_pred_params->block_width,
142
                    inter_pred_params->block_height,
143
                    &inter_pred_params->conv_params,
144
                    inter_pred_params->interp_filter_params);
145
#endif
146
441k
  }
147
14.0k
#if !CONFIG_REALTIME_ONLY
148
  // TODO(jingning): av1_warp_plane() can be further cleaned up.
149
14.0k
  else if (inter_pred_params->mode == WARP_PRED) {
150
14.0k
    av1_warp_plane(
151
14.0k
        &inter_pred_params->warp_params, inter_pred_params->use_hbd_buf,
152
14.0k
        inter_pred_params->bit_depth, inter_pred_params->ref_frame_buf.buf0,
153
14.0k
        inter_pred_params->ref_frame_buf.width,
154
14.0k
        inter_pred_params->ref_frame_buf.height,
155
14.0k
        inter_pred_params->ref_frame_buf.stride, dst,
156
14.0k
        inter_pred_params->pix_col, inter_pred_params->pix_row,
157
14.0k
        inter_pred_params->block_width, inter_pred_params->block_height,
158
14.0k
        dst_stride, inter_pred_params->subsampling_x,
159
14.0k
        inter_pred_params->subsampling_y, &inter_pred_params->conv_params);
160
14.0k
  }
161
0
#endif
162
0
  else {
163
0
    assert(0 && "Unsupported inter_pred_params->mode");
164
0
  }
165
455k
}
166
167
static const uint8_t wedge_master_oblique_odd[MASK_MASTER_SIZE] = {
168
  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
169
  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  1,  2,  6,  18,
170
  37, 53, 60, 63, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
171
  64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
172
};
173
static const uint8_t wedge_master_oblique_even[MASK_MASTER_SIZE] = {
174
  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
175
  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  1,  4,  11, 27,
176
  46, 58, 62, 63, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
177
  64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
178
};
179
static const uint8_t wedge_master_vertical[MASK_MASTER_SIZE] = {
180
  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
181
  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  2,  7,  21,
182
  43, 57, 62, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
183
  64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
184
};
185
186
static AOM_INLINE void shift_copy(const uint8_t *src, uint8_t *dst, int shift,
187
320
                                  int width) {
188
320
  if (shift >= 0) {
189
165
    memcpy(dst + shift, src, width - shift);
190
165
    memset(dst, src[0], shift);
191
165
  } else {
192
155
    shift = -shift;
193
155
    memcpy(dst, src + shift, width - shift);
194
155
    memset(dst + width - shift, src[width - 1], shift);
195
155
  }
196
320
}
197
198
/* clang-format off */
199
DECLARE_ALIGNED(16, static uint8_t,
200
                wedge_signflip_lookup[BLOCK_SIZES_ALL][MAX_WEDGE_TYPES]) = {
201
  { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, },  // not used
202
  { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, },  // not used
203
  { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, },  // not used
204
  { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, },
205
  { 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, },
206
  { 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, },
207
  { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, },
208
  { 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, },
209
  { 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, },
210
  { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, },
211
  { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, },  // not used
212
  { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, },  // not used
213
  { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, },  // not used
214
  { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, },  // not used
215
  { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, },  // not used
216
  { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, },  // not used
217
  { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, },  // not used
218
  { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, },  // not used
219
  { 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 0, 1, 1, 1, 0, 1, },
220
  { 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 0, 1, 0, 1, },
221
  { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, },  // not used
222
  { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, },  // not used
223
};
224
/* clang-format on */
225
226
// [negative][direction]
227
DECLARE_ALIGNED(
228
    16, static uint8_t,
229
    wedge_mask_obl[2][WEDGE_DIRECTIONS][MASK_MASTER_SIZE * MASK_MASTER_SIZE]);
230
231
// 4 * MAX_WEDGE_SQUARE is an easy to compute and fairly tight upper bound
232
// on the sum of all mask sizes up to an including MAX_WEDGE_SQUARE.
233
DECLARE_ALIGNED(16, static uint8_t,
234
                wedge_mask_buf[2 * MAX_WEDGE_TYPES * 4 * MAX_WEDGE_SQUARE]);
235
236
DECLARE_ALIGNED(16, static uint8_t,
237
                smooth_interintra_mask_buf[INTERINTRA_MODES][BLOCK_SIZES_ALL]
238
                                          [MAX_WEDGE_SQUARE]);
239
240
static wedge_masks_type wedge_masks[BLOCK_SIZES_ALL][2];
241
242
static const wedge_code_type wedge_codebook_16_hgtw[16] = {
243
  { WEDGE_OBLIQUE27, 4, 4 },  { WEDGE_OBLIQUE63, 4, 4 },
244
  { WEDGE_OBLIQUE117, 4, 4 }, { WEDGE_OBLIQUE153, 4, 4 },
245
  { WEDGE_HORIZONTAL, 4, 2 }, { WEDGE_HORIZONTAL, 4, 4 },
246
  { WEDGE_HORIZONTAL, 4, 6 }, { WEDGE_VERTICAL, 4, 4 },
247
  { WEDGE_OBLIQUE27, 4, 2 },  { WEDGE_OBLIQUE27, 4, 6 },
248
  { WEDGE_OBLIQUE153, 4, 2 }, { WEDGE_OBLIQUE153, 4, 6 },
249
  { WEDGE_OBLIQUE63, 2, 4 },  { WEDGE_OBLIQUE63, 6, 4 },
250
  { WEDGE_OBLIQUE117, 2, 4 }, { WEDGE_OBLIQUE117, 6, 4 },
251
};
252
253
static const wedge_code_type wedge_codebook_16_hltw[16] = {
254
  { WEDGE_OBLIQUE27, 4, 4 },  { WEDGE_OBLIQUE63, 4, 4 },
255
  { WEDGE_OBLIQUE117, 4, 4 }, { WEDGE_OBLIQUE153, 4, 4 },
256
  { WEDGE_VERTICAL, 2, 4 },   { WEDGE_VERTICAL, 4, 4 },
257
  { WEDGE_VERTICAL, 6, 4 },   { WEDGE_HORIZONTAL, 4, 4 },
258
  { WEDGE_OBLIQUE27, 4, 2 },  { WEDGE_OBLIQUE27, 4, 6 },
259
  { WEDGE_OBLIQUE153, 4, 2 }, { WEDGE_OBLIQUE153, 4, 6 },
260
  { WEDGE_OBLIQUE63, 2, 4 },  { WEDGE_OBLIQUE63, 6, 4 },
261
  { WEDGE_OBLIQUE117, 2, 4 }, { WEDGE_OBLIQUE117, 6, 4 },
262
};
263
264
static const wedge_code_type wedge_codebook_16_heqw[16] = {
265
  { WEDGE_OBLIQUE27, 4, 4 },  { WEDGE_OBLIQUE63, 4, 4 },
266
  { WEDGE_OBLIQUE117, 4, 4 }, { WEDGE_OBLIQUE153, 4, 4 },
267
  { WEDGE_HORIZONTAL, 4, 2 }, { WEDGE_HORIZONTAL, 4, 6 },
268
  { WEDGE_VERTICAL, 2, 4 },   { WEDGE_VERTICAL, 6, 4 },
269
  { WEDGE_OBLIQUE27, 4, 2 },  { WEDGE_OBLIQUE27, 4, 6 },
270
  { WEDGE_OBLIQUE153, 4, 2 }, { WEDGE_OBLIQUE153, 4, 6 },
271
  { WEDGE_OBLIQUE63, 2, 4 },  { WEDGE_OBLIQUE63, 6, 4 },
272
  { WEDGE_OBLIQUE117, 2, 4 }, { WEDGE_OBLIQUE117, 6, 4 },
273
};
274
275
const wedge_params_type av1_wedge_params_lookup[BLOCK_SIZES_ALL] = {
276
  { 0, NULL, NULL, NULL },
277
  { 0, NULL, NULL, NULL },
278
  { 0, NULL, NULL, NULL },
279
  { MAX_WEDGE_TYPES, wedge_codebook_16_heqw, wedge_signflip_lookup[BLOCK_8X8],
280
    wedge_masks[BLOCK_8X8] },
281
  { MAX_WEDGE_TYPES, wedge_codebook_16_hgtw, wedge_signflip_lookup[BLOCK_8X16],
282
    wedge_masks[BLOCK_8X16] },
283
  { MAX_WEDGE_TYPES, wedge_codebook_16_hltw, wedge_signflip_lookup[BLOCK_16X8],
284
    wedge_masks[BLOCK_16X8] },
285
  { MAX_WEDGE_TYPES, wedge_codebook_16_heqw, wedge_signflip_lookup[BLOCK_16X16],
286
    wedge_masks[BLOCK_16X16] },
287
  { MAX_WEDGE_TYPES, wedge_codebook_16_hgtw, wedge_signflip_lookup[BLOCK_16X32],
288
    wedge_masks[BLOCK_16X32] },
289
  { MAX_WEDGE_TYPES, wedge_codebook_16_hltw, wedge_signflip_lookup[BLOCK_32X16],
290
    wedge_masks[BLOCK_32X16] },
291
  { MAX_WEDGE_TYPES, wedge_codebook_16_heqw, wedge_signflip_lookup[BLOCK_32X32],
292
    wedge_masks[BLOCK_32X32] },
293
  { 0, NULL, NULL, NULL },
294
  { 0, NULL, NULL, NULL },
295
  { 0, NULL, NULL, NULL },
296
  { 0, NULL, NULL, NULL },
297
  { 0, NULL, NULL, NULL },
298
  { 0, NULL, NULL, NULL },
299
  { 0, NULL, NULL, NULL },
300
  { 0, NULL, NULL, NULL },
301
  { MAX_WEDGE_TYPES, wedge_codebook_16_hgtw, wedge_signflip_lookup[BLOCK_8X32],
302
    wedge_masks[BLOCK_8X32] },
303
  { MAX_WEDGE_TYPES, wedge_codebook_16_hltw, wedge_signflip_lookup[BLOCK_32X8],
304
    wedge_masks[BLOCK_32X8] },
305
  { 0, NULL, NULL, NULL },
306
  { 0, NULL, NULL, NULL },
307
};
308
309
static const uint8_t *get_wedge_mask_inplace(int wedge_index, int neg,
310
1.44k
                                             BLOCK_SIZE sb_type) {
311
1.44k
  const uint8_t *master;
312
1.44k
  const int bh = block_size_high[sb_type];
313
1.44k
  const int bw = block_size_wide[sb_type];
314
1.44k
  const wedge_code_type *a =
315
1.44k
      av1_wedge_params_lookup[sb_type].codebook + wedge_index;
316
1.44k
  int woff, hoff;
317
1.44k
  const uint8_t wsignflip =
318
1.44k
      av1_wedge_params_lookup[sb_type].signflip[wedge_index];
319
320
1.44k
  assert(wedge_index >= 0 && wedge_index < get_wedge_types_lookup(sb_type));
321
1.44k
  woff = (a->x_offset * bw) >> 3;
322
1.44k
  hoff = (a->y_offset * bh) >> 3;
323
1.44k
  master = wedge_mask_obl[neg ^ wsignflip][a->direction] +
324
1.44k
           MASK_MASTER_STRIDE * (MASK_MASTER_SIZE / 2 - hoff) +
325
1.44k
           MASK_MASTER_SIZE / 2 - woff;
326
1.44k
  return master;
327
1.44k
}
328
329
const uint8_t *av1_get_compound_type_mask(
330
8.00k
    const INTERINTER_COMPOUND_DATA *const comp_data, BLOCK_SIZE sb_type) {
331
8.00k
  (void)sb_type;
332
8.00k
  switch (comp_data->type) {
333
5.49k
    case COMPOUND_WEDGE:
334
5.49k
      return av1_get_contiguous_soft_mask(comp_data->wedge_index,
335
5.49k
                                          comp_data->wedge_sign, sb_type);
336
2.51k
    default: return comp_data->seg_mask;
337
8.00k
  }
338
8.00k
}
339
340
static AOM_INLINE void diffwtd_mask_d16(
341
    uint8_t *mask, int which_inverse, int mask_base, const CONV_BUF_TYPE *src0,
342
    int src0_stride, const CONV_BUF_TYPE *src1, int src1_stride, int h, int w,
343
848
    ConvolveParams *conv_params, int bd) {
344
848
  int round =
345
848
      2 * FILTER_BITS - conv_params->round_0 - conv_params->round_1 + (bd - 8);
346
848
  int i, j, m, diff;
347
8.93k
  for (i = 0; i < h; ++i) {
348
128k
    for (j = 0; j < w; ++j) {
349
120k
      diff = abs(src0[i * src0_stride + j] - src1[i * src1_stride + j]);
350
120k
      diff = ROUND_POWER_OF_TWO(diff, round);
351
120k
      m = clamp(mask_base + (diff / DIFF_FACTOR), 0, AOM_BLEND_A64_MAX_ALPHA);
352
120k
      mask[i * w + j] = which_inverse ? AOM_BLEND_A64_MAX_ALPHA - m : m;
353
120k
    }
354
8.08k
  }
355
848
}
356
357
void av1_build_compound_diffwtd_mask_d16_c(
358
    uint8_t *mask, DIFFWTD_MASK_TYPE mask_type, const CONV_BUF_TYPE *src0,
359
    int src0_stride, const CONV_BUF_TYPE *src1, int src1_stride, int h, int w,
360
848
    ConvolveParams *conv_params, int bd) {
361
848
  switch (mask_type) {
362
513
    case DIFFWTD_38:
363
513
      diffwtd_mask_d16(mask, 0, 38, src0, src0_stride, src1, src1_stride, h, w,
364
513
                       conv_params, bd);
365
513
      break;
366
335
    case DIFFWTD_38_INV:
367
335
      diffwtd_mask_d16(mask, 1, 38, src0, src0_stride, src1, src1_stride, h, w,
368
335
                       conv_params, bd);
369
335
      break;
370
0
    default: assert(0);
371
848
  }
372
848
}
373
374
static AOM_INLINE void diffwtd_mask(uint8_t *mask, int which_inverse,
375
                                    int mask_base, const uint8_t *src0,
376
                                    int src0_stride, const uint8_t *src1,
377
0
                                    int src1_stride, int h, int w) {
378
0
  int i, j, m, diff;
379
0
  for (i = 0; i < h; ++i) {
380
0
    for (j = 0; j < w; ++j) {
381
0
      diff =
382
0
          abs((int)src0[i * src0_stride + j] - (int)src1[i * src1_stride + j]);
383
0
      m = clamp(mask_base + (diff / DIFF_FACTOR), 0, AOM_BLEND_A64_MAX_ALPHA);
384
0
      mask[i * w + j] = which_inverse ? AOM_BLEND_A64_MAX_ALPHA - m : m;
385
0
    }
386
0
  }
387
0
}
388
389
void av1_build_compound_diffwtd_mask_c(uint8_t *mask,
390
                                       DIFFWTD_MASK_TYPE mask_type,
391
                                       const uint8_t *src0, int src0_stride,
392
                                       const uint8_t *src1, int src1_stride,
393
0
                                       int h, int w) {
394
0
  switch (mask_type) {
395
0
    case DIFFWTD_38:
396
0
      diffwtd_mask(mask, 0, 38, src0, src0_stride, src1, src1_stride, h, w);
397
0
      break;
398
0
    case DIFFWTD_38_INV:
399
0
      diffwtd_mask(mask, 1, 38, src0, src0_stride, src1, src1_stride, h, w);
400
0
      break;
401
0
    default: assert(0);
402
0
  }
403
0
}
404
405
static AOM_FORCE_INLINE void diffwtd_mask_highbd(
406
    uint8_t *mask, int which_inverse, int mask_base, const uint16_t *src0,
407
    int src0_stride, const uint16_t *src1, int src1_stride, int h, int w,
408
0
    const unsigned int bd) {
409
0
  assert(bd >= 8);
410
0
  if (bd == 8) {
411
0
    if (which_inverse) {
412
0
      for (int i = 0; i < h; ++i) {
413
0
        for (int j = 0; j < w; ++j) {
414
0
          int diff = abs((int)src0[j] - (int)src1[j]) / DIFF_FACTOR;
415
0
          unsigned int m = negative_to_zero(mask_base + diff);
416
0
          m = AOMMIN(m, AOM_BLEND_A64_MAX_ALPHA);
417
0
          mask[j] = AOM_BLEND_A64_MAX_ALPHA - m;
418
0
        }
419
0
        src0 += src0_stride;
420
0
        src1 += src1_stride;
421
0
        mask += w;
422
0
      }
423
0
    } else {
424
0
      for (int i = 0; i < h; ++i) {
425
0
        for (int j = 0; j < w; ++j) {
426
0
          int diff = abs((int)src0[j] - (int)src1[j]) / DIFF_FACTOR;
427
0
          unsigned int m = negative_to_zero(mask_base + diff);
428
0
          m = AOMMIN(m, AOM_BLEND_A64_MAX_ALPHA);
429
0
          mask[j] = m;
430
0
        }
431
0
        src0 += src0_stride;
432
0
        src1 += src1_stride;
433
0
        mask += w;
434
0
      }
435
0
    }
436
0
  } else {
437
0
    const unsigned int bd_shift = bd - 8;
438
0
    if (which_inverse) {
439
0
      for (int i = 0; i < h; ++i) {
440
0
        for (int j = 0; j < w; ++j) {
441
0
          int diff =
442
0
              (abs((int)src0[j] - (int)src1[j]) >> bd_shift) / DIFF_FACTOR;
443
0
          unsigned int m = negative_to_zero(mask_base + diff);
444
0
          m = AOMMIN(m, AOM_BLEND_A64_MAX_ALPHA);
445
0
          mask[j] = AOM_BLEND_A64_MAX_ALPHA - m;
446
0
        }
447
0
        src0 += src0_stride;
448
0
        src1 += src1_stride;
449
0
        mask += w;
450
0
      }
451
0
    } else {
452
0
      for (int i = 0; i < h; ++i) {
453
0
        for (int j = 0; j < w; ++j) {
454
0
          int diff =
455
0
              (abs((int)src0[j] - (int)src1[j]) >> bd_shift) / DIFF_FACTOR;
456
0
          unsigned int m = negative_to_zero(mask_base + diff);
457
0
          m = AOMMIN(m, AOM_BLEND_A64_MAX_ALPHA);
458
0
          mask[j] = m;
459
0
        }
460
0
        src0 += src0_stride;
461
0
        src1 += src1_stride;
462
0
        mask += w;
463
0
      }
464
0
    }
465
0
  }
466
0
}
467
468
void av1_build_compound_diffwtd_mask_highbd_c(
469
    uint8_t *mask, DIFFWTD_MASK_TYPE mask_type, const uint8_t *src0,
470
    int src0_stride, const uint8_t *src1, int src1_stride, int h, int w,
471
0
    int bd) {
472
0
  switch (mask_type) {
473
0
    case DIFFWTD_38:
474
0
      diffwtd_mask_highbd(mask, 0, 38, CONVERT_TO_SHORTPTR(src0), src0_stride,
475
0
                          CONVERT_TO_SHORTPTR(src1), src1_stride, h, w, bd);
476
0
      break;
477
0
    case DIFFWTD_38_INV:
478
0
      diffwtd_mask_highbd(mask, 1, 38, CONVERT_TO_SHORTPTR(src0), src0_stride,
479
0
                          CONVERT_TO_SHORTPTR(src1), src1_stride, h, w, bd);
480
0
      break;
481
0
    default: assert(0);
482
0
  }
483
0
}
484
485
5
static AOM_INLINE void init_wedge_master_masks() {
486
5
  int i, j;
487
5
  const int w = MASK_MASTER_SIZE;
488
5
  const int h = MASK_MASTER_SIZE;
489
5
  const int stride = MASK_MASTER_STRIDE;
490
  // Note: index [0] stores the masters, and [1] its complement.
491
  // Generate prototype by shifting the masters
492
5
  int shift = h / 4;
493
165
  for (i = 0; i < h; i += 2) {
494
160
    shift_copy(wedge_master_oblique_even,
495
160
               &wedge_mask_obl[0][WEDGE_OBLIQUE63][i * stride], shift,
496
160
               MASK_MASTER_SIZE);
497
160
    shift--;
498
160
    shift_copy(wedge_master_oblique_odd,
499
160
               &wedge_mask_obl[0][WEDGE_OBLIQUE63][(i + 1) * stride], shift,
500
160
               MASK_MASTER_SIZE);
501
160
    memcpy(&wedge_mask_obl[0][WEDGE_VERTICAL][i * stride],
502
160
           wedge_master_vertical,
503
160
           MASK_MASTER_SIZE * sizeof(wedge_master_vertical[0]));
504
160
    memcpy(&wedge_mask_obl[0][WEDGE_VERTICAL][(i + 1) * stride],
505
160
           wedge_master_vertical,
506
160
           MASK_MASTER_SIZE * sizeof(wedge_master_vertical[0]));
507
160
  }
508
509
325
  for (i = 0; i < h; ++i) {
510
20.8k
    for (j = 0; j < w; ++j) {
511
20.4k
      const int msk = wedge_mask_obl[0][WEDGE_OBLIQUE63][i * stride + j];
512
20.4k
      wedge_mask_obl[0][WEDGE_OBLIQUE27][j * stride + i] = msk;
513
20.4k
      wedge_mask_obl[0][WEDGE_OBLIQUE117][i * stride + w - 1 - j] =
514
20.4k
          wedge_mask_obl[0][WEDGE_OBLIQUE153][(w - 1 - j) * stride + i] =
515
20.4k
              (1 << WEDGE_WEIGHT_BITS) - msk;
516
20.4k
      wedge_mask_obl[1][WEDGE_OBLIQUE63][i * stride + j] =
517
20.4k
          wedge_mask_obl[1][WEDGE_OBLIQUE27][j * stride + i] =
518
20.4k
              (1 << WEDGE_WEIGHT_BITS) - msk;
519
20.4k
      wedge_mask_obl[1][WEDGE_OBLIQUE117][i * stride + w - 1 - j] =
520
20.4k
          wedge_mask_obl[1][WEDGE_OBLIQUE153][(w - 1 - j) * stride + i] = msk;
521
20.4k
      const int mskx = wedge_mask_obl[0][WEDGE_VERTICAL][i * stride + j];
522
20.4k
      wedge_mask_obl[0][WEDGE_HORIZONTAL][j * stride + i] = mskx;
523
20.4k
      wedge_mask_obl[1][WEDGE_VERTICAL][i * stride + j] =
524
20.4k
          wedge_mask_obl[1][WEDGE_HORIZONTAL][j * stride + i] =
525
20.4k
              (1 << WEDGE_WEIGHT_BITS) - mskx;
526
20.4k
    }
527
320
  }
528
5
}
529
530
5
static AOM_INLINE void init_wedge_masks() {
531
5
  uint8_t *dst = wedge_mask_buf;
532
5
  BLOCK_SIZE bsize;
533
5
  memset(wedge_masks, 0, sizeof(wedge_masks));
534
115
  for (bsize = BLOCK_4X4; bsize < BLOCK_SIZES_ALL; ++bsize) {
535
110
    const wedge_params_type *wedge_params = &av1_wedge_params_lookup[bsize];
536
110
    const int wtypes = wedge_params->wedge_types;
537
110
    if (wtypes == 0) continue;
538
45
    const uint8_t *mask;
539
45
    const int bw = block_size_wide[bsize];
540
45
    const int bh = block_size_high[bsize];
541
45
    int w;
542
765
    for (w = 0; w < wtypes; ++w) {
543
720
      mask = get_wedge_mask_inplace(w, 0, bsize);
544
720
      aom_convolve_copy(mask, MASK_MASTER_STRIDE, dst, bw /* dst_stride */, bw,
545
720
                        bh);
546
720
      wedge_params->masks[0][w] = dst;
547
720
      dst += bw * bh;
548
549
720
      mask = get_wedge_mask_inplace(w, 1, bsize);
550
720
      aom_convolve_copy(mask, MASK_MASTER_STRIDE, dst, bw /* dst_stride */, bw,
551
720
                        bh);
552
720
      wedge_params->masks[1][w] = dst;
553
720
      dst += bw * bh;
554
720
    }
555
45
    assert(sizeof(wedge_mask_buf) >= (size_t)(dst - wedge_mask_buf));
556
45
  }
557
5
}
558
559
/* clang-format off */
560
static const uint8_t ii_weights1d[MAX_SB_SIZE] = {
561
  60, 58, 56, 54, 52, 50, 48, 47, 45, 44, 42, 41, 39, 38, 37, 35, 34, 33, 32,
562
  31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 22, 21, 20, 19, 19, 18, 18, 17, 16,
563
  16, 15, 15, 14, 14, 13, 13, 12, 12, 12, 11, 11, 10, 10, 10,  9,  9,  9,  8,
564
  8,  8,  8,  7,  7,  7,  7,  6,  6,  6,  6,  6,  5,  5,  5,  5,  5,  4,  4,
565
  4,  4,  4,  4,  4,  4,  3,  3,  3,  3,  3,  3,  3,  3,  3,  2,  2,  2,  2,
566
  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  1,  1,  1,  1,  1,  1,  1,  1,
567
  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1
568
};
569
static uint8_t ii_size_scales[BLOCK_SIZES_ALL] = {
570
    32, 16, 16, 16, 8, 8, 8, 4,
571
    4,  4,  2,  2,  2, 1, 1, 1,
572
    8,  8,  4,  4,  2, 2
573
};
574
/* clang-format on */
575
576
static AOM_INLINE void build_smooth_interintra_mask(uint8_t *mask, int stride,
577
                                                    BLOCK_SIZE plane_bsize,
578
2.53k
                                                    INTERINTRA_MODE mode) {
579
2.53k
  int i, j;
580
2.53k
  const int bw = block_size_wide[plane_bsize];
581
2.53k
  const int bh = block_size_high[plane_bsize];
582
2.53k
  const int size_scale = ii_size_scales[plane_bsize];
583
584
2.53k
  switch (mode) {
585
1.25k
    case II_V_PRED:
586
11.7k
      for (i = 0; i < bh; ++i) {
587
10.4k
        memset(mask, ii_weights1d[i * size_scale], bw * sizeof(mask[0]));
588
10.4k
        mask += stride;
589
10.4k
      }
590
1.25k
      break;
591
592
365
    case II_H_PRED:
593
7.19k
      for (i = 0; i < bh; ++i) {
594
105k
        for (j = 0; j < bw; ++j) mask[j] = ii_weights1d[j * size_scale];
595
6.82k
        mask += stride;
596
6.82k
      }
597
365
      break;
598
599
802
    case II_SMOOTH_PRED:
600
7.94k
      for (i = 0; i < bh; ++i) {
601
62.0k
        for (j = 0; j < bw; ++j)
602
54.8k
          mask[j] = ii_weights1d[(i < j ? i : j) * size_scale];
603
7.14k
        mask += stride;
604
7.14k
      }
605
802
      break;
606
607
113
    case II_DC_PRED:
608
113
    default:
609
1.60k
      for (i = 0; i < bh; ++i) {
610
1.49k
        memset(mask, 32, bw * sizeof(mask[0]));
611
1.49k
        mask += stride;
612
1.49k
      }
613
113
      break;
614
2.53k
  }
615
2.53k
}
616
617
5
static AOM_INLINE void init_smooth_interintra_masks() {
618
25
  for (int m = 0; m < INTERINTRA_MODES; ++m) {
619
460
    for (int bs = 0; bs < BLOCK_SIZES_ALL; ++bs) {
620
440
      const int bw = block_size_wide[bs];
621
440
      const int bh = block_size_high[bs];
622
440
      if (bw > MAX_WEDGE_SIZE || bh > MAX_WEDGE_SIZE) continue;
623
280
      build_smooth_interintra_mask(smooth_interintra_mask_buf[m][bs], bw, bs,
624
280
                                   m);
625
280
    }
626
20
  }
627
5
}
628
629
// Equation of line: f(x, y) = a[0]*(x - a[2]*w/8) + a[1]*(y - a[3]*h/8) = 0
630
5
void av1_init_wedge_masks() {
631
5
  init_wedge_master_masks();
632
5
  init_wedge_masks();
633
5
  init_smooth_interintra_masks();
634
5
}
635
636
static AOM_INLINE void build_masked_compound_no_round(
637
    uint8_t *dst, int dst_stride, const CONV_BUF_TYPE *src0, int src0_stride,
638
    const CONV_BUF_TYPE *src1, int src1_stride,
639
    const INTERINTER_COMPOUND_DATA *const comp_data, BLOCK_SIZE sb_type, int h,
640
8.00k
    int w, InterPredParams *inter_pred_params) {
641
8.00k
  const int ssy = inter_pred_params->subsampling_y;
642
8.00k
  const int ssx = inter_pred_params->subsampling_x;
643
8.00k
  const uint8_t *mask = av1_get_compound_type_mask(comp_data, sb_type);
644
8.00k
  const int mask_stride = block_size_wide[sb_type];
645
8.00k
#if CONFIG_AV1_HIGHBITDEPTH
646
8.00k
  if (inter_pred_params->use_hbd_buf) {
647
4.28k
    aom_highbd_blend_a64_d16_mask(dst, dst_stride, src0, src0_stride, src1,
648
4.28k
                                  src1_stride, mask, mask_stride, w, h, ssx,
649
4.28k
                                  ssy, &inter_pred_params->conv_params,
650
4.28k
                                  inter_pred_params->bit_depth);
651
4.28k
  } else {
652
3.72k
    aom_lowbd_blend_a64_d16_mask(dst, dst_stride, src0, src0_stride, src1,
653
3.72k
                                 src1_stride, mask, mask_stride, w, h, ssx, ssy,
654
3.72k
                                 &inter_pred_params->conv_params);
655
3.72k
  }
656
#else
657
  aom_lowbd_blend_a64_d16_mask(dst, dst_stride, src0, src0_stride, src1,
658
                               src1_stride, mask, mask_stride, w, h, ssx, ssy,
659
                               &inter_pred_params->conv_params);
660
#endif
661
8.00k
}
662
663
static void make_masked_inter_predictor(const uint8_t *pre, int pre_stride,
664
                                        uint8_t *dst, int dst_stride,
665
                                        InterPredParams *inter_pred_params,
666
8.00k
                                        const SubpelParams *subpel_params) {
667
8.00k
  const INTERINTER_COMPOUND_DATA *comp_data = &inter_pred_params->mask_comp;
668
8.00k
  BLOCK_SIZE sb_type = inter_pred_params->sb_type;
669
670
  // We're going to call av1_make_inter_predictor to generate a prediction into
671
  // a temporary buffer, then will blend that temporary buffer with that from
672
  // the other reference.
673
8.00k
  DECLARE_ALIGNED(32, uint8_t, tmp_buf[2 * MAX_SB_SQUARE]);
674
8.00k
  uint8_t *tmp_dst =
675
8.00k
      inter_pred_params->use_hbd_buf ? CONVERT_TO_BYTEPTR(tmp_buf) : tmp_buf;
676
677
8.00k
  const int tmp_buf_stride = MAX_SB_SIZE;
678
8.00k
  CONV_BUF_TYPE *org_dst = inter_pred_params->conv_params.dst;
679
8.00k
  int org_dst_stride = inter_pred_params->conv_params.dst_stride;
680
8.00k
  CONV_BUF_TYPE *tmp_buf16 = (CONV_BUF_TYPE *)tmp_buf;
681
8.00k
  inter_pred_params->conv_params.dst = tmp_buf16;
682
8.00k
  inter_pred_params->conv_params.dst_stride = tmp_buf_stride;
683
8.00k
  assert(inter_pred_params->conv_params.do_average == 0);
684
685
  // This will generate a prediction in tmp_buf for the second reference
686
8.00k
  av1_make_inter_predictor(pre, pre_stride, tmp_dst, MAX_SB_SIZE,
687
8.00k
                           inter_pred_params, subpel_params);
688
689
8.00k
  if (!inter_pred_params->conv_params.plane &&
690
2.69k
      comp_data->type == COMPOUND_DIFFWTD) {
691
848
    av1_build_compound_diffwtd_mask_d16(
692
848
        comp_data->seg_mask, comp_data->mask_type, org_dst, org_dst_stride,
693
848
        tmp_buf16, tmp_buf_stride, inter_pred_params->block_height,
694
848
        inter_pred_params->block_width, &inter_pred_params->conv_params,
695
848
        inter_pred_params->bit_depth);
696
848
  }
697
8.00k
  build_masked_compound_no_round(
698
8.00k
      dst, dst_stride, org_dst, org_dst_stride, tmp_buf16, tmp_buf_stride,
699
8.00k
      comp_data, sb_type, inter_pred_params->block_height,
700
8.00k
      inter_pred_params->block_width, inter_pred_params);
701
8.00k
}
702
703
void av1_build_one_inter_predictor(
704
    uint8_t *dst, int dst_stride, const MV *const src_mv,
705
    InterPredParams *inter_pred_params, MACROBLOCKD *xd, int mi_x, int mi_y,
706
455k
    int ref, uint8_t **mc_buf, CalcSubpelParamsFunc calc_subpel_params_func) {
707
455k
  SubpelParams subpel_params;
708
455k
  uint8_t *src;
709
455k
  int src_stride;
710
455k
  calc_subpel_params_func(src_mv, inter_pred_params, xd, mi_x, mi_y, ref,
711
455k
                          mc_buf, &src, &subpel_params, &src_stride);
712
713
455k
  if (inter_pred_params->comp_mode == UNIFORM_SINGLE ||
714
447k
      inter_pred_params->comp_mode == UNIFORM_COMP) {
715
447k
    av1_make_inter_predictor(src, src_stride, dst, dst_stride,
716
447k
                             inter_pred_params, &subpel_params);
717
447k
  } else {
718
8.00k
    make_masked_inter_predictor(src, src_stride, dst, dst_stride,
719
8.00k
                                inter_pred_params, &subpel_params);
720
8.00k
  }
721
455k
}
722
723
void av1_dist_wtd_comp_weight_assign(const AV1_COMMON *cm,
724
                                     const MB_MODE_INFO *mbmi, int *fwd_offset,
725
                                     int *bck_offset,
726
                                     int *use_dist_wtd_comp_avg,
727
447k
                                     int is_compound) {
728
447k
  assert(fwd_offset != NULL && bck_offset != NULL);
729
447k
  if (!is_compound || mbmi->compound_idx) {
730
433k
    *fwd_offset = 8;
731
433k
    *bck_offset = 8;
732
433k
    *use_dist_wtd_comp_avg = 0;
733
433k
    return;
734
433k
  }
735
736
14.6k
  *use_dist_wtd_comp_avg = 1;
737
14.6k
  const RefCntBuffer *const bck_buf = get_ref_frame_buf(cm, mbmi->ref_frame[0]);
738
14.6k
  const RefCntBuffer *const fwd_buf = get_ref_frame_buf(cm, mbmi->ref_frame[1]);
739
14.6k
  const int cur_frame_index = cm->cur_frame->order_hint;
740
14.6k
  int bck_frame_index = 0, fwd_frame_index = 0;
741
742
14.6k
  if (bck_buf != NULL) bck_frame_index = bck_buf->order_hint;
743
14.6k
  if (fwd_buf != NULL) fwd_frame_index = fwd_buf->order_hint;
744
745
14.6k
  int d0 = clamp(abs(get_relative_dist(&cm->seq_params->order_hint_info,
746
14.6k
                                       fwd_frame_index, cur_frame_index)),
747
14.6k
                 0, MAX_FRAME_DISTANCE);
748
14.6k
  int d1 = clamp(abs(get_relative_dist(&cm->seq_params->order_hint_info,
749
14.6k
                                       cur_frame_index, bck_frame_index)),
750
14.6k
                 0, MAX_FRAME_DISTANCE);
751
752
14.6k
  const int order = d0 <= d1;
753
754
14.6k
  if (d0 == 0 || d1 == 0) {
755
924
    *fwd_offset = quant_dist_lookup_table[3][order];
756
924
    *bck_offset = quant_dist_lookup_table[3][1 - order];
757
924
    return;
758
924
  }
759
760
13.7k
  int i;
761
21.1k
  for (i = 0; i < 3; ++i) {
762
19.1k
    int c0 = quant_dist_weight[i][order];
763
19.1k
    int c1 = quant_dist_weight[i][!order];
764
19.1k
    int d0_c0 = d0 * c0;
765
19.1k
    int d1_c1 = d1 * c1;
766
19.1k
    if ((d0 > d1 && d0_c0 < d1_c1) || (d0 <= d1 && d0_c0 > d1_c1)) break;
767
19.1k
  }
768
769
13.7k
  *fwd_offset = quant_dist_lookup_table[i][order];
770
13.7k
  *bck_offset = quant_dist_lookup_table[i][1 - order];
771
13.7k
}
772
773
// True if the following hold:
774
//  1. Not intrabc and not build_for_obmc
775
//  2. At least one dimension is size 4 with subsampling
776
//  3. If sub-sampled, none of the previous blocks around the sub-sample
777
//     are intrabc or inter-blocks
778
static bool is_sub8x8_inter(const MACROBLOCKD *xd, int plane, BLOCK_SIZE bsize,
779
413k
                            int is_intrabc, int build_for_obmc) {
780
413k
  if (is_intrabc || build_for_obmc) {
781
289k
    return false;
782
289k
  }
783
784
124k
  const struct macroblockd_plane *const pd = &xd->plane[plane];
785
124k
  const int ss_x = pd->subsampling_x;
786
124k
  const int ss_y = pd->subsampling_y;
787
124k
  const int is_sub4_x = (block_size_wide[bsize] == 4) && ss_x;
788
124k
  const int is_sub4_y = (block_size_high[bsize] == 4) && ss_y;
789
124k
  if (!is_sub4_x && !is_sub4_y) {
790
120k
    return false;
791
120k
  }
792
793
  // For sub8x8 chroma blocks, we may be covering more than one luma block's
794
  // worth of pixels. Thus (mi_x, mi_y) may not be the correct coordinates for
795
  // the top-left corner of the prediction source - the correct top-left corner
796
  // is at (pre_x, pre_y).
797
3.83k
  const int row_start = is_sub4_y ? -1 : 0;
798
3.83k
  const int col_start = is_sub4_x ? -1 : 0;
799
800
7.91k
  for (int row = row_start; row <= 0; ++row) {
801
12.1k
    for (int col = col_start; col <= 0; ++col) {
802
8.10k
      const MB_MODE_INFO *this_mbmi = xd->mi[row * xd->mi_stride + col];
803
8.10k
      if (!is_inter_block(this_mbmi)) return false;
804
8.01k
      if (is_intrabc_block(this_mbmi)) return false;
805
8.01k
    }
806
4.17k
  }
807
3.73k
  return true;
808
3.83k
}
809
810
static void build_inter_predictors_sub8x8(
811
    const AV1_COMMON *cm, MACROBLOCKD *xd, int plane, const MB_MODE_INFO *mi,
812
    int mi_x, int mi_y, uint8_t **mc_buf,
813
3.73k
    CalcSubpelParamsFunc calc_subpel_params_func) {
814
3.73k
  const BLOCK_SIZE bsize = mi->bsize;
815
3.73k
  struct macroblockd_plane *const pd = &xd->plane[plane];
816
3.73k
  const bool ss_x = pd->subsampling_x;
817
3.73k
  const bool ss_y = pd->subsampling_y;
818
3.73k
  const int b4_w = block_size_wide[bsize] >> ss_x;
819
3.73k
  const int b4_h = block_size_high[bsize] >> ss_y;
820
3.73k
  const BLOCK_SIZE plane_bsize = get_plane_block_size(bsize, ss_x, ss_y);
821
3.73k
  const int b8_w = block_size_wide[plane_bsize];
822
3.73k
  const int b8_h = block_size_high[plane_bsize];
823
3.73k
  const int is_compound = has_second_ref(mi);
824
3.73k
  assert(!is_compound);
825
3.73k
  assert(!is_intrabc_block(mi));
826
827
  // For sub8x8 chroma blocks, we may be covering more than one luma block's
828
  // worth of pixels. Thus (mi_x, mi_y) may not be the correct coordinates for
829
  // the top-left corner of the prediction source - the correct top-left corner
830
  // is at (pre_x, pre_y).
831
3.73k
  const int row_start = (block_size_high[bsize] == 4) && ss_y ? -1 : 0;
832
3.73k
  const int col_start = (block_size_wide[bsize] == 4) && ss_x ? -1 : 0;
833
3.73k
  const int pre_x = (mi_x + MI_SIZE * col_start) >> ss_x;
834
3.73k
  const int pre_y = (mi_y + MI_SIZE * row_start) >> ss_y;
835
836
3.73k
  int row = row_start;
837
7.81k
  for (int y = 0; y < b8_h; y += b4_h) {
838
4.07k
    int col = col_start;
839
12.0k
    for (int x = 0; x < b8_w; x += b4_w) {
840
7.99k
      MB_MODE_INFO *this_mbmi = xd->mi[row * xd->mi_stride + col];
841
7.99k
      struct buf_2d *const dst_buf = &pd->dst;
842
7.99k
      uint8_t *dst = dst_buf->buf + dst_buf->stride * y + x;
843
7.99k
      int ref = 0;
844
7.99k
      const RefCntBuffer *ref_buf =
845
7.99k
          get_ref_frame_buf(cm, this_mbmi->ref_frame[ref]);
846
7.99k
      const struct scale_factors *ref_scale_factors =
847
7.99k
          get_ref_scale_factors_const(cm, this_mbmi->ref_frame[ref]);
848
7.99k
      const struct scale_factors *const sf = ref_scale_factors;
849
7.99k
      const struct buf_2d pre_buf = {
850
7.99k
        NULL,
851
7.99k
        (plane == 1) ? ref_buf->buf.u_buffer : ref_buf->buf.v_buffer,
852
7.99k
        ref_buf->buf.uv_crop_width,
853
7.99k
        ref_buf->buf.uv_crop_height,
854
7.99k
        ref_buf->buf.uv_stride,
855
7.99k
      };
856
857
7.99k
      const MV mv = this_mbmi->mv[ref].as_mv;
858
859
7.99k
      InterPredParams inter_pred_params;
860
7.99k
      av1_init_inter_params(&inter_pred_params, b4_w, b4_h, pre_y + y,
861
7.99k
                            pre_x + x, pd->subsampling_x, pd->subsampling_y,
862
7.99k
                            xd->bd, is_cur_buf_hbd(xd), mi->use_intrabc, sf,
863
7.99k
                            &pre_buf, this_mbmi->interp_filters);
864
7.99k
      inter_pred_params.conv_params =
865
7.99k
          get_conv_params_no_round(ref, plane, NULL, 0, is_compound, xd->bd);
866
867
7.99k
      av1_build_one_inter_predictor(dst, dst_buf->stride, &mv,
868
7.99k
                                    &inter_pred_params, xd, mi_x + x, mi_y + y,
869
7.99k
                                    ref, mc_buf, calc_subpel_params_func);
870
871
7.99k
      ++col;
872
7.99k
    }
873
4.07k
    ++row;
874
4.07k
  }
875
3.73k
}
876
877
static void build_inter_predictors_8x8_and_bigger(
878
    const AV1_COMMON *cm, MACROBLOCKD *xd, int plane, const MB_MODE_INFO *mi,
879
    int build_for_obmc, int bw, int bh, int mi_x, int mi_y, uint8_t **mc_buf,
880
409k
    CalcSubpelParamsFunc calc_subpel_params_func) {
881
409k
  const int is_compound = has_second_ref(mi);
882
409k
  const int is_intrabc = is_intrabc_block(mi);
883
409k
  assert(IMPLIES(is_intrabc, !is_compound));
884
409k
  struct macroblockd_plane *const pd = &xd->plane[plane];
885
409k
  struct buf_2d *const dst_buf = &pd->dst;
886
409k
  uint8_t *const dst = dst_buf->buf;
887
888
409k
  int is_global[2] = { 0, 0 };
889
857k
  for (int ref = 0; ref < 1 + is_compound; ++ref) {
890
447k
    const WarpedMotionParams *const wm = &xd->global_motion[mi->ref_frame[ref]];
891
447k
    is_global[ref] = is_global_mv_block(mi, wm->wmtype);
892
447k
  }
893
894
409k
  const BLOCK_SIZE bsize = mi->bsize;
895
409k
  const int ss_x = pd->subsampling_x;
896
409k
  const int ss_y = pd->subsampling_y;
897
409k
  const int row_start =
898
409k
      (block_size_high[bsize] == 4) && ss_y && !build_for_obmc ? -1 : 0;
899
409k
  const int col_start =
900
409k
      (block_size_wide[bsize] == 4) && ss_x && !build_for_obmc ? -1 : 0;
901
409k
  const int pre_x = (mi_x + MI_SIZE * col_start) >> ss_x;
902
409k
  const int pre_y = (mi_y + MI_SIZE * row_start) >> ss_y;
903
904
857k
  for (int ref = 0; ref < 1 + is_compound; ++ref) {
905
447k
    const struct scale_factors *const sf =
906
447k
        is_intrabc ? &cm->sf_identity : xd->block_ref_scale_factors[ref];
907
447k
    struct buf_2d *const pre_buf = is_intrabc ? dst_buf : &pd->pre[ref];
908
447k
    const MV mv = mi->mv[ref].as_mv;
909
447k
    const WarpTypesAllowed warp_types = { is_global[ref],
910
447k
                                          mi->motion_mode == WARPED_CAUSAL };
911
912
447k
    InterPredParams inter_pred_params;
913
447k
    av1_init_inter_params(&inter_pred_params, bw, bh, pre_y, pre_x,
914
447k
                          pd->subsampling_x, pd->subsampling_y, xd->bd,
915
447k
                          is_cur_buf_hbd(xd), mi->use_intrabc, sf, pre_buf,
916
447k
                          mi->interp_filters);
917
447k
    if (is_compound) av1_init_comp_mode(&inter_pred_params);
918
447k
    inter_pred_params.conv_params = get_conv_params_no_round(
919
447k
        ref, plane, xd->tmp_conv_dst, MAX_SB_SIZE, is_compound, xd->bd);
920
921
447k
    av1_dist_wtd_comp_weight_assign(
922
447k
        cm, mi, &inter_pred_params.conv_params.fwd_offset,
923
447k
        &inter_pred_params.conv_params.bck_offset,
924
447k
        &inter_pred_params.conv_params.use_dist_wtd_comp_avg, is_compound);
925
926
447k
    if (!build_for_obmc)
927
431k
      av1_init_warp_params(&inter_pred_params, &warp_types, ref, xd, mi);
928
929
447k
    if (is_masked_compound_type(mi->interinter_comp.type)) {
930
16.0k
      inter_pred_params.sb_type = mi->bsize;
931
16.0k
      inter_pred_params.mask_comp = mi->interinter_comp;
932
16.0k
      if (ref == 1) {
933
8.00k
        inter_pred_params.conv_params.do_average = 0;
934
8.00k
        inter_pred_params.comp_mode = MASK_COMP;
935
8.00k
      }
936
      // Assign physical buffer.
937
16.0k
      inter_pred_params.mask_comp.seg_mask = xd->seg_mask;
938
16.0k
    }
939
940
447k
    av1_build_one_inter_predictor(dst, dst_buf->stride, &mv, &inter_pred_params,
941
447k
                                  xd, mi_x, mi_y, ref, mc_buf,
942
447k
                                  calc_subpel_params_func);
943
447k
  }
944
409k
}
945
946
void av1_build_inter_predictors(const AV1_COMMON *cm, MACROBLOCKD *xd,
947
                                int plane, const MB_MODE_INFO *mi,
948
                                int build_for_obmc, int bw, int bh, int mi_x,
949
                                int mi_y, uint8_t **mc_buf,
950
413k
                                CalcSubpelParamsFunc calc_subpel_params_func) {
951
413k
  if (is_sub8x8_inter(xd, plane, mi->bsize, is_intrabc_block(mi),
952
413k
                      build_for_obmc)) {
953
3.73k
    assert(bw < 8 || bh < 8);
954
3.73k
    build_inter_predictors_sub8x8(cm, xd, plane, mi, mi_x, mi_y, mc_buf,
955
3.73k
                                  calc_subpel_params_func);
956
409k
  } else {
957
409k
    build_inter_predictors_8x8_and_bigger(cm, xd, plane, mi, build_for_obmc, bw,
958
409k
                                          bh, mi_x, mi_y, mc_buf,
959
409k
                                          calc_subpel_params_func);
960
409k
  }
961
413k
}
962
void av1_setup_dst_planes(struct macroblockd_plane *planes, BLOCK_SIZE bsize,
963
                          const YV12_BUFFER_CONFIG *src, int mi_row, int mi_col,
964
10.0M
                          const int plane_start, const int plane_end) {
965
  // We use AOMMIN(num_planes, MAX_MB_PLANE) instead of num_planes to quiet
966
  // the static analysis warnings.
967
39.3M
  for (int i = plane_start; i < AOMMIN(plane_end, MAX_MB_PLANE); ++i) {
968
29.2M
    struct macroblockd_plane *const pd = &planes[i];
969
29.2M
    const int is_uv = i > 0;
970
29.2M
    setup_pred_plane(&pd->dst, bsize, src->buffers[i], src->crop_widths[is_uv],
971
29.2M
                     src->crop_heights[is_uv], src->strides[is_uv], mi_row,
972
29.2M
                     mi_col, NULL, pd->subsampling_x, pd->subsampling_y);
973
29.2M
  }
974
10.0M
}
975
976
void av1_setup_pre_planes(MACROBLOCKD *xd, int idx,
977
                          const YV12_BUFFER_CONFIG *src, int mi_row, int mi_col,
978
                          const struct scale_factors *sf,
979
68.2k
                          const int num_planes) {
980
68.2k
  if (src != NULL) {
981
    // We use AOMMIN(num_planes, MAX_MB_PLANE) instead of num_planes to quiet
982
    // the static analysis warnings.
983
253k
    for (int i = 0; i < AOMMIN(num_planes, MAX_MB_PLANE); ++i) {
984
185k
      struct macroblockd_plane *const pd = &xd->plane[i];
985
185k
      const int is_uv = i > 0;
986
185k
      setup_pred_plane(&pd->pre[idx], xd->mi[0]->bsize, src->buffers[i],
987
185k
                       src->crop_widths[is_uv], src->crop_heights[is_uv],
988
185k
                       src->strides[is_uv], mi_row, mi_col, sf,
989
185k
                       pd->subsampling_x, pd->subsampling_y);
990
185k
    }
991
68.2k
  }
992
68.2k
}
993
994
// obmc_mask_N[overlap_position]
995
static const uint8_t obmc_mask_1[1] = { 64 };
996
DECLARE_ALIGNED(2, static const uint8_t, obmc_mask_2[2]) = { 45, 64 };
997
998
DECLARE_ALIGNED(4, static const uint8_t, obmc_mask_4[4]) = { 39, 50, 59, 64 };
999
1000
static const uint8_t obmc_mask_8[8] = { 36, 42, 48, 53, 57, 61, 64, 64 };
1001
1002
static const uint8_t obmc_mask_16[16] = { 34, 37, 40, 43, 46, 49, 52, 54,
1003
                                          56, 58, 60, 61, 64, 64, 64, 64 };
1004
1005
static const uint8_t obmc_mask_32[32] = { 33, 35, 36, 38, 40, 41, 43, 44,
1006
                                          45, 47, 48, 50, 51, 52, 53, 55,
1007
                                          56, 57, 58, 59, 60, 60, 61, 62,
1008
                                          64, 64, 64, 64, 64, 64, 64, 64 };
1009
1010
static const uint8_t obmc_mask_64[64] = {
1011
  33, 34, 35, 35, 36, 37, 38, 39, 40, 40, 41, 42, 43, 44, 44, 44,
1012
  45, 46, 47, 47, 48, 49, 50, 51, 51, 51, 52, 52, 53, 54, 55, 56,
1013
  56, 56, 57, 57, 58, 58, 59, 60, 60, 60, 60, 60, 61, 62, 62, 62,
1014
  62, 62, 63, 63, 63, 63, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
1015
};
1016
1017
16.5k
const uint8_t *av1_get_obmc_mask(int length) {
1018
16.5k
  switch (length) {
1019
0
    case 1: return obmc_mask_1;
1020
2.05k
    case 2: return obmc_mask_2;
1021
12.5k
    case 4: return obmc_mask_4;
1022
1.91k
    case 8: return obmc_mask_8;
1023
0
    case 16: return obmc_mask_16;
1024
0
    case 32: return obmc_mask_32;
1025
0
    case 64: return obmc_mask_64;
1026
0
    default: assert(0); return NULL;
1027
16.5k
  }
1028
16.5k
}
1029
1030
static INLINE void increment_int_ptr(MACROBLOCKD *xd, int rel_mi_row,
1031
                                     int rel_mi_col, uint8_t op_mi_size,
1032
                                     int dir, MB_MODE_INFO *mi, void *fun_ctxt,
1033
21.4k
                                     const int num_planes) {
1034
21.4k
  (void)xd;
1035
21.4k
  (void)rel_mi_row;
1036
21.4k
  (void)rel_mi_col;
1037
21.4k
  (void)op_mi_size;
1038
21.4k
  (void)dir;
1039
21.4k
  (void)mi;
1040
21.4k
  ++*(int *)fun_ctxt;
1041
21.4k
  (void)num_planes;
1042
21.4k
}
1043
1044
47.9k
void av1_count_overlappable_neighbors(const AV1_COMMON *cm, MACROBLOCKD *xd) {
1045
47.9k
  MB_MODE_INFO *mbmi = xd->mi[0];
1046
1047
47.9k
  mbmi->overlappable_neighbors = 0;
1048
1049
47.9k
  if (!is_motion_variation_allowed_bsize(mbmi->bsize)) return;
1050
1051
29.4k
  foreach_overlappable_nb_above(cm, xd, INT_MAX, increment_int_ptr,
1052
29.4k
                                &mbmi->overlappable_neighbors);
1053
29.4k
  if (mbmi->overlappable_neighbors) return;
1054
15.9k
  foreach_overlappable_nb_left(cm, xd, INT_MAX, increment_int_ptr,
1055
15.9k
                               &mbmi->overlappable_neighbors);
1056
15.9k
}
1057
1058
// HW does not support < 4x4 prediction. To limit the bandwidth requirement, if
1059
// block-size of current plane is smaller than 8x8, always only blend with the
1060
// left neighbor(s) (skip blending with the above side).
1061
#define DISABLE_CHROMA_U8X8_OBMC 0  // 0: one-sided obmc; 1: disable
1062
1063
int av1_skip_u4x4_pred_in_obmc(BLOCK_SIZE bsize,
1064
37.4k
                               const struct macroblockd_plane *pd, int dir) {
1065
37.4k
  assert(is_motion_variation_allowed_bsize(bsize));
1066
1067
37.4k
  const BLOCK_SIZE bsize_plane =
1068
37.4k
      get_plane_block_size(bsize, pd->subsampling_x, pd->subsampling_y);
1069
37.4k
  switch (bsize_plane) {
1070
#if DISABLE_CHROMA_U8X8_OBMC
1071
    case BLOCK_4X4:
1072
    case BLOCK_8X4:
1073
    case BLOCK_4X8: return 1; break;
1074
#else
1075
0
    case BLOCK_4X4:
1076
0
    case BLOCK_8X4:
1077
8.56k
    case BLOCK_4X8: return dir == 0; break;
1078
0
#endif
1079
28.9k
    default: return 0;
1080
37.4k
  }
1081
37.4k
}
1082
1083
6.24k
void av1_modify_neighbor_predictor_for_obmc(MB_MODE_INFO *mbmi) {
1084
6.24k
  mbmi->ref_frame[1] = NONE_FRAME;
1085
6.24k
  mbmi->interinter_comp.type = COMPOUND_AVERAGE;
1086
1087
6.24k
  return;
1088
6.24k
}
1089
1090
struct obmc_inter_pred_ctxt {
1091
  uint8_t **adjacent;
1092
  int *adjacent_stride;
1093
};
1094
1095
static INLINE void build_obmc_inter_pred_above(
1096
    MACROBLOCKD *xd, int rel_mi_row, int rel_mi_col, uint8_t op_mi_size,
1097
3.46k
    int dir, MB_MODE_INFO *above_mi, void *fun_ctxt, const int num_planes) {
1098
3.46k
  (void)above_mi;
1099
3.46k
  (void)rel_mi_row;
1100
3.46k
  (void)dir;
1101
3.46k
  struct obmc_inter_pred_ctxt *ctxt = (struct obmc_inter_pred_ctxt *)fun_ctxt;
1102
3.46k
  const BLOCK_SIZE bsize = xd->mi[0]->bsize;
1103
3.46k
  const int overlap =
1104
3.46k
      AOMMIN(block_size_high[bsize], block_size_high[BLOCK_64X64]) >> 1;
1105
1106
13.8k
  for (int plane = 0; plane < num_planes; ++plane) {
1107
10.3k
    const struct macroblockd_plane *pd = &xd->plane[plane];
1108
10.3k
    const int bw = (op_mi_size * MI_SIZE) >> pd->subsampling_x;
1109
10.3k
    const int bh = overlap >> pd->subsampling_y;
1110
10.3k
    const int plane_col = (rel_mi_col * MI_SIZE) >> pd->subsampling_x;
1111
1112
10.3k
    if (av1_skip_u4x4_pred_in_obmc(bsize, pd, 0)) continue;
1113
1114
8.16k
    const int dst_stride = pd->dst.stride;
1115
8.16k
    uint8_t *const dst = &pd->dst.buf[plane_col];
1116
8.16k
    const int tmp_stride = ctxt->adjacent_stride[plane];
1117
8.16k
    const uint8_t *const tmp = &ctxt->adjacent[plane][plane_col];
1118
8.16k
    const uint8_t *const mask = av1_get_obmc_mask(bh);
1119
8.16k
#if CONFIG_AV1_HIGHBITDEPTH
1120
8.16k
    const int is_hbd = is_cur_buf_hbd(xd);
1121
8.16k
    if (is_hbd)
1122
6.58k
      aom_highbd_blend_a64_vmask(dst, dst_stride, dst, dst_stride, tmp,
1123
6.58k
                                 tmp_stride, mask, bw, bh, xd->bd);
1124
1.58k
    else
1125
1.58k
      aom_blend_a64_vmask(dst, dst_stride, dst, dst_stride, tmp, tmp_stride,
1126
1.58k
                          mask, bw, bh);
1127
#else
1128
    aom_blend_a64_vmask(dst, dst_stride, dst, dst_stride, tmp, tmp_stride, mask,
1129
                        bw, bh);
1130
#endif
1131
8.16k
  }
1132
3.46k
}
1133
1134
static INLINE void build_obmc_inter_pred_left(
1135
    MACROBLOCKD *xd, int rel_mi_row, int rel_mi_col, uint8_t op_mi_size,
1136
2.78k
    int dir, MB_MODE_INFO *left_mi, void *fun_ctxt, const int num_planes) {
1137
2.78k
  (void)left_mi;
1138
2.78k
  (void)rel_mi_col;
1139
2.78k
  (void)dir;
1140
2.78k
  struct obmc_inter_pred_ctxt *ctxt = (struct obmc_inter_pred_ctxt *)fun_ctxt;
1141
2.78k
  const BLOCK_SIZE bsize = xd->mi[0]->bsize;
1142
2.78k
  const int overlap =
1143
2.78k
      AOMMIN(block_size_wide[bsize], block_size_wide[BLOCK_64X64]) >> 1;
1144
1145
11.1k
  for (int plane = 0; plane < num_planes; ++plane) {
1146
8.35k
    const struct macroblockd_plane *pd = &xd->plane[plane];
1147
8.35k
    const int bw = overlap >> pd->subsampling_x;
1148
8.35k
    const int bh = (op_mi_size * MI_SIZE) >> pd->subsampling_y;
1149
8.35k
    const int plane_row = (rel_mi_row * MI_SIZE) >> pd->subsampling_y;
1150
1151
8.35k
    if (av1_skip_u4x4_pred_in_obmc(bsize, pd, 1)) continue;
1152
1153
8.35k
    const int dst_stride = pd->dst.stride;
1154
8.35k
    uint8_t *const dst = &pd->dst.buf[plane_row * dst_stride];
1155
8.35k
    const int tmp_stride = ctxt->adjacent_stride[plane];
1156
8.35k
    const uint8_t *const tmp = &ctxt->adjacent[plane][plane_row * tmp_stride];
1157
8.35k
    const uint8_t *const mask = av1_get_obmc_mask(bw);
1158
1159
8.35k
#if CONFIG_AV1_HIGHBITDEPTH
1160
8.35k
    const int is_hbd = is_cur_buf_hbd(xd);
1161
8.35k
    if (is_hbd)
1162
5.69k
      aom_highbd_blend_a64_hmask(dst, dst_stride, dst, dst_stride, tmp,
1163
5.69k
                                 tmp_stride, mask, bw, bh, xd->bd);
1164
2.66k
    else
1165
2.66k
      aom_blend_a64_hmask(dst, dst_stride, dst, dst_stride, tmp, tmp_stride,
1166
2.66k
                          mask, bw, bh);
1167
#else
1168
    aom_blend_a64_hmask(dst, dst_stride, dst, dst_stride, tmp, tmp_stride, mask,
1169
                        bw, bh);
1170
#endif
1171
8.35k
  }
1172
2.78k
}
1173
1174
// This function combines motion compensated predictions that are generated by
1175
// top/left neighboring blocks' inter predictors with the regular inter
1176
// prediction. We assume the original prediction (bmc) is stored in
1177
// xd->plane[].dst.buf
1178
void av1_build_obmc_inter_prediction(const AV1_COMMON *cm, MACROBLOCKD *xd,
1179
                                     uint8_t *above[MAX_MB_PLANE],
1180
                                     int above_stride[MAX_MB_PLANE],
1181
                                     uint8_t *left[MAX_MB_PLANE],
1182
4.61k
                                     int left_stride[MAX_MB_PLANE]) {
1183
4.61k
  const BLOCK_SIZE bsize = xd->mi[0]->bsize;
1184
1185
  // handle above row
1186
4.61k
  struct obmc_inter_pred_ctxt ctxt_above = { above, above_stride };
1187
4.61k
  foreach_overlappable_nb_above(cm, xd,
1188
4.61k
                                max_neighbor_obmc[mi_size_wide_log2[bsize]],
1189
4.61k
                                build_obmc_inter_pred_above, &ctxt_above);
1190
1191
  // handle left column
1192
4.61k
  struct obmc_inter_pred_ctxt ctxt_left = { left, left_stride };
1193
4.61k
  foreach_overlappable_nb_left(cm, xd,
1194
4.61k
                               max_neighbor_obmc[mi_size_high_log2[bsize]],
1195
4.61k
                               build_obmc_inter_pred_left, &ctxt_left);
1196
4.61k
}
1197
1198
void av1_setup_obmc_dst_bufs(MACROBLOCKD *xd, uint8_t **dst_buf1,
1199
4.61k
                             uint8_t **dst_buf2) {
1200
4.61k
  if (is_cur_buf_hbd(xd)) {
1201
3.34k
    int len = sizeof(uint16_t);
1202
3.34k
    dst_buf1[0] = CONVERT_TO_BYTEPTR(xd->tmp_obmc_bufs[0]);
1203
3.34k
    dst_buf1[1] =
1204
3.34k
        CONVERT_TO_BYTEPTR(xd->tmp_obmc_bufs[0] + MAX_SB_SQUARE * len);
1205
3.34k
    dst_buf1[2] =
1206
3.34k
        CONVERT_TO_BYTEPTR(xd->tmp_obmc_bufs[0] + MAX_SB_SQUARE * 2 * len);
1207
3.34k
    dst_buf2[0] = CONVERT_TO_BYTEPTR(xd->tmp_obmc_bufs[1]);
1208
3.34k
    dst_buf2[1] =
1209
3.34k
        CONVERT_TO_BYTEPTR(xd->tmp_obmc_bufs[1] + MAX_SB_SQUARE * len);
1210
3.34k
    dst_buf2[2] =
1211
3.34k
        CONVERT_TO_BYTEPTR(xd->tmp_obmc_bufs[1] + MAX_SB_SQUARE * 2 * len);
1212
3.34k
  } else {
1213
1.26k
    dst_buf1[0] = xd->tmp_obmc_bufs[0];
1214
1.26k
    dst_buf1[1] = xd->tmp_obmc_bufs[0] + MAX_SB_SQUARE;
1215
1.26k
    dst_buf1[2] = xd->tmp_obmc_bufs[0] + MAX_SB_SQUARE * 2;
1216
1.26k
    dst_buf2[0] = xd->tmp_obmc_bufs[1];
1217
1.26k
    dst_buf2[1] = xd->tmp_obmc_bufs[1] + MAX_SB_SQUARE;
1218
1.26k
    dst_buf2[2] = xd->tmp_obmc_bufs[1] + MAX_SB_SQUARE * 2;
1219
1.26k
  }
1220
4.61k
}
1221
1222
void av1_setup_build_prediction_by_above_pred(
1223
    MACROBLOCKD *xd, int rel_mi_col, uint8_t above_mi_width,
1224
    MB_MODE_INFO *above_mbmi, struct build_prediction_ctxt *ctxt,
1225
3.46k
    const int num_planes) {
1226
3.46k
  const BLOCK_SIZE a_bsize = AOMMAX(BLOCK_8X8, above_mbmi->bsize);
1227
3.46k
  const int above_mi_col = xd->mi_col + rel_mi_col;
1228
1229
3.46k
  av1_modify_neighbor_predictor_for_obmc(above_mbmi);
1230
1231
13.8k
  for (int j = 0; j < num_planes; ++j) {
1232
10.3k
    struct macroblockd_plane *const pd = &xd->plane[j];
1233
10.3k
    setup_pred_plane(&pd->dst, a_bsize, ctxt->tmp_buf[j], ctxt->tmp_width[j],
1234
10.3k
                     ctxt->tmp_height[j], ctxt->tmp_stride[j], 0, rel_mi_col,
1235
10.3k
                     NULL, pd->subsampling_x, pd->subsampling_y);
1236
10.3k
  }
1237
1238
3.46k
  const int num_refs = 1 + has_second_ref(above_mbmi);
1239
1240
6.92k
  for (int ref = 0; ref < num_refs; ++ref) {
1241
3.46k
    const MV_REFERENCE_FRAME frame = above_mbmi->ref_frame[ref];
1242
1243
3.46k
    const RefCntBuffer *const ref_buf = get_ref_frame_buf(ctxt->cm, frame);
1244
3.46k
    const struct scale_factors *const sf =
1245
3.46k
        get_ref_scale_factors_const(ctxt->cm, frame);
1246
3.46k
    xd->block_ref_scale_factors[ref] = sf;
1247
3.46k
    if ((!av1_is_valid_scale(sf)))
1248
0
      aom_internal_error(xd->error_info, AOM_CODEC_UNSUP_BITSTREAM,
1249
0
                         "Reference frame has invalid dimensions");
1250
3.46k
    av1_setup_pre_planes(xd, ref, &ref_buf->buf, xd->mi_row, above_mi_col, sf,
1251
3.46k
                         num_planes);
1252
3.46k
  }
1253
1254
3.46k
  xd->mb_to_left_edge = 8 * MI_SIZE * (-above_mi_col);
1255
3.46k
  xd->mb_to_right_edge =
1256
3.46k
      ctxt->mb_to_far_edge +
1257
3.46k
      (xd->width - rel_mi_col - above_mi_width) * MI_SIZE * 8;
1258
3.46k
}
1259
1260
void av1_setup_build_prediction_by_left_pred(MACROBLOCKD *xd, int rel_mi_row,
1261
                                             uint8_t left_mi_height,
1262
                                             MB_MODE_INFO *left_mbmi,
1263
                                             struct build_prediction_ctxt *ctxt,
1264
2.78k
                                             const int num_planes) {
1265
2.78k
  const BLOCK_SIZE l_bsize = AOMMAX(BLOCK_8X8, left_mbmi->bsize);
1266
2.78k
  const int left_mi_row = xd->mi_row + rel_mi_row;
1267
1268
2.78k
  av1_modify_neighbor_predictor_for_obmc(left_mbmi);
1269
1270
11.1k
  for (int j = 0; j < num_planes; ++j) {
1271
8.35k
    struct macroblockd_plane *const pd = &xd->plane[j];
1272
8.35k
    setup_pred_plane(&pd->dst, l_bsize, ctxt->tmp_buf[j], ctxt->tmp_width[j],
1273
8.35k
                     ctxt->tmp_height[j], ctxt->tmp_stride[j], rel_mi_row, 0,
1274
8.35k
                     NULL, pd->subsampling_x, pd->subsampling_y);
1275
8.35k
  }
1276
1277
2.78k
  const int num_refs = 1 + has_second_ref(left_mbmi);
1278
1279
5.56k
  for (int ref = 0; ref < num_refs; ++ref) {
1280
2.78k
    const MV_REFERENCE_FRAME frame = left_mbmi->ref_frame[ref];
1281
1282
2.78k
    const RefCntBuffer *const ref_buf = get_ref_frame_buf(ctxt->cm, frame);
1283
2.78k
    const struct scale_factors *const ref_scale_factors =
1284
2.78k
        get_ref_scale_factors_const(ctxt->cm, frame);
1285
1286
2.78k
    xd->block_ref_scale_factors[ref] = ref_scale_factors;
1287
2.78k
    if ((!av1_is_valid_scale(ref_scale_factors)))
1288
0
      aom_internal_error(xd->error_info, AOM_CODEC_UNSUP_BITSTREAM,
1289
0
                         "Reference frame has invalid dimensions");
1290
2.78k
    av1_setup_pre_planes(xd, ref, &ref_buf->buf, left_mi_row, xd->mi_col,
1291
2.78k
                         ref_scale_factors, num_planes);
1292
2.78k
  }
1293
1294
2.78k
  xd->mb_to_top_edge = GET_MV_SUBPEL(MI_SIZE * (-left_mi_row));
1295
2.78k
  xd->mb_to_bottom_edge =
1296
2.78k
      ctxt->mb_to_far_edge +
1297
2.78k
      GET_MV_SUBPEL((xd->height - rel_mi_row - left_mi_height) * MI_SIZE);
1298
2.78k
}
1299
1300
static AOM_INLINE void combine_interintra(
1301
    INTERINTRA_MODE mode, int8_t use_wedge_interintra, int8_t wedge_index,
1302
    int8_t wedge_sign, BLOCK_SIZE bsize, BLOCK_SIZE plane_bsize,
1303
    uint8_t *comppred, int compstride, const uint8_t *interpred,
1304
2.10k
    int interstride, const uint8_t *intrapred, int intrastride) {
1305
2.10k
  const int bw = block_size_wide[plane_bsize];
1306
2.10k
  const int bh = block_size_high[plane_bsize];
1307
1308
2.10k
  if (use_wedge_interintra) {
1309
743
    if (av1_is_wedge_used(bsize)) {
1310
743
      const uint8_t *mask =
1311
743
          av1_get_contiguous_soft_mask(wedge_index, wedge_sign, bsize);
1312
743
      const int subw = 2 * mi_size_wide[bsize] == bw;
1313
743
      const int subh = 2 * mi_size_high[bsize] == bh;
1314
743
      aom_blend_a64_mask(comppred, compstride, intrapred, intrastride,
1315
743
                         interpred, interstride, mask, block_size_wide[bsize],
1316
743
                         bw, bh, subw, subh);
1317
743
    }
1318
743
    return;
1319
743
  }
1320
1321
1.36k
  const uint8_t *mask = smooth_interintra_mask_buf[mode][plane_bsize];
1322
1.36k
  aom_blend_a64_mask(comppred, compstride, intrapred, intrastride, interpred,
1323
1.36k
                     interstride, mask, bw, bw, bh, 0, 0);
1324
1.36k
}
1325
1326
#if CONFIG_AV1_HIGHBITDEPTH
1327
static AOM_INLINE void combine_interintra_highbd(
1328
    INTERINTRA_MODE mode, int8_t use_wedge_interintra, int8_t wedge_index,
1329
    int8_t wedge_sign, BLOCK_SIZE bsize, BLOCK_SIZE plane_bsize,
1330
    uint8_t *comppred8, int compstride, const uint8_t *interpred8,
1331
4.11k
    int interstride, const uint8_t *intrapred8, int intrastride, int bd) {
1332
4.11k
  const int bw = block_size_wide[plane_bsize];
1333
4.11k
  const int bh = block_size_high[plane_bsize];
1334
1335
4.11k
  if (use_wedge_interintra) {
1336
1.86k
    if (av1_is_wedge_used(bsize)) {
1337
1.86k
      const uint8_t *mask =
1338
1.86k
          av1_get_contiguous_soft_mask(wedge_index, wedge_sign, bsize);
1339
1.86k
      const int subh = 2 * mi_size_high[bsize] == bh;
1340
1.86k
      const int subw = 2 * mi_size_wide[bsize] == bw;
1341
1.86k
      aom_highbd_blend_a64_mask(comppred8, compstride, intrapred8, intrastride,
1342
1.86k
                                interpred8, interstride, mask,
1343
1.86k
                                block_size_wide[bsize], bw, bh, subw, subh, bd);
1344
1.86k
    }
1345
1.86k
    return;
1346
1.86k
  }
1347
1348
2.25k
  uint8_t mask[MAX_SB_SQUARE];
1349
2.25k
  build_smooth_interintra_mask(mask, bw, plane_bsize, mode);
1350
2.25k
  aom_highbd_blend_a64_mask(comppred8, compstride, intrapred8, intrastride,
1351
2.25k
                            interpred8, interstride, mask, bw, bw, bh, 0, 0,
1352
2.25k
                            bd);
1353
2.25k
}
1354
#endif
1355
1356
void av1_build_intra_predictors_for_interintra(const AV1_COMMON *cm,
1357
                                               MACROBLOCKD *xd,
1358
                                               BLOCK_SIZE bsize, int plane,
1359
                                               const BUFFER_SET *ctx,
1360
6.22k
                                               uint8_t *dst, int dst_stride) {
1361
6.22k
  struct macroblockd_plane *const pd = &xd->plane[plane];
1362
6.22k
  const int ssx = xd->plane[plane].subsampling_x;
1363
6.22k
  const int ssy = xd->plane[plane].subsampling_y;
1364
6.22k
  BLOCK_SIZE plane_bsize = get_plane_block_size(bsize, ssx, ssy);
1365
6.22k
  PREDICTION_MODE mode = interintra_to_intra_mode[xd->mi[0]->interintra_mode];
1366
6.22k
  assert(xd->mi[0]->angle_delta[PLANE_TYPE_Y] == 0);
1367
6.22k
  assert(xd->mi[0]->angle_delta[PLANE_TYPE_UV] == 0);
1368
6.22k
  assert(xd->mi[0]->filter_intra_mode_info.use_filter_intra == 0);
1369
6.22k
  assert(xd->mi[0]->use_intrabc == 0);
1370
6.22k
  const SequenceHeader *seq_params = cm->seq_params;
1371
1372
6.22k
  av1_predict_intra_block(xd, seq_params->sb_size,
1373
6.22k
                          seq_params->enable_intra_edge_filter, pd->width,
1374
6.22k
                          pd->height, max_txsize_rect_lookup[plane_bsize], mode,
1375
6.22k
                          0, 0, FILTER_INTRA_MODES, ctx->plane[plane],
1376
6.22k
                          ctx->stride[plane], dst, dst_stride, 0, 0, plane);
1377
6.22k
}
1378
1379
void av1_combine_interintra(MACROBLOCKD *xd, BLOCK_SIZE bsize, int plane,
1380
                            const uint8_t *inter_pred, int inter_stride,
1381
6.22k
                            const uint8_t *intra_pred, int intra_stride) {
1382
6.22k
  const int ssx = xd->plane[plane].subsampling_x;
1383
6.22k
  const int ssy = xd->plane[plane].subsampling_y;
1384
6.22k
  const BLOCK_SIZE plane_bsize = get_plane_block_size(bsize, ssx, ssy);
1385
6.22k
#if CONFIG_AV1_HIGHBITDEPTH
1386
6.22k
  if (is_cur_buf_hbd(xd)) {
1387
4.11k
    combine_interintra_highbd(
1388
4.11k
        xd->mi[0]->interintra_mode, xd->mi[0]->use_wedge_interintra,
1389
4.11k
        xd->mi[0]->interintra_wedge_index, INTERINTRA_WEDGE_SIGN, bsize,
1390
4.11k
        plane_bsize, xd->plane[plane].dst.buf, xd->plane[plane].dst.stride,
1391
4.11k
        inter_pred, inter_stride, intra_pred, intra_stride, xd->bd);
1392
4.11k
    return;
1393
4.11k
  }
1394
2.10k
#endif
1395
2.10k
  combine_interintra(
1396
2.10k
      xd->mi[0]->interintra_mode, xd->mi[0]->use_wedge_interintra,
1397
2.10k
      xd->mi[0]->interintra_wedge_index, INTERINTRA_WEDGE_SIGN, bsize,
1398
2.10k
      plane_bsize, xd->plane[plane].dst.buf, xd->plane[plane].dst.stride,
1399
2.10k
      inter_pred, inter_stride, intra_pred, intra_stride);
1400
2.10k
}
1401
1402
// build interintra_predictors for one plane
1403
void av1_build_interintra_predictor(const AV1_COMMON *cm, MACROBLOCKD *xd,
1404
                                    uint8_t *pred, int stride,
1405
                                    const BUFFER_SET *ctx, int plane,
1406
6.22k
                                    BLOCK_SIZE bsize) {
1407
6.22k
  assert(bsize < BLOCK_SIZES_ALL);
1408
6.22k
  if (is_cur_buf_hbd(xd)) {
1409
4.11k
    DECLARE_ALIGNED(16, uint16_t, intrapredictor[MAX_SB_SQUARE]);
1410
4.11k
    av1_build_intra_predictors_for_interintra(
1411
4.11k
        cm, xd, bsize, plane, ctx, CONVERT_TO_BYTEPTR(intrapredictor),
1412
4.11k
        MAX_SB_SIZE);
1413
4.11k
    av1_combine_interintra(xd, bsize, plane, pred, stride,
1414
4.11k
                           CONVERT_TO_BYTEPTR(intrapredictor), MAX_SB_SIZE);
1415
4.11k
  } else {
1416
2.10k
    DECLARE_ALIGNED(16, uint8_t, intrapredictor[MAX_SB_SQUARE]);
1417
2.10k
    av1_build_intra_predictors_for_interintra(cm, xd, bsize, plane, ctx,
1418
2.10k
                                              intrapredictor, MAX_SB_SIZE);
1419
2.10k
    av1_combine_interintra(xd, bsize, plane, pred, stride, intrapredictor,
1420
2.10k
                           MAX_SB_SIZE);
1421
2.10k
  }
1422
6.22k
}