Coverage Report

Created: 2026-02-14 07:09

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/aom/av1/common/reconinter.c
Line
Count
Source
1
/*
2
 * Copyright (c) 2016, Alliance for Open Media. All rights reserved.
3
 *
4
 * This source code is subject to the terms of the BSD 2 Clause License and
5
 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6
 * was not distributed with this source code in the LICENSE file, you can
7
 * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8
 * Media Patent License 1.0 was not distributed with this source code in the
9
 * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
10
 */
11
12
#include <assert.h>
13
#include <stdio.h>
14
#include <limits.h>
15
16
#include "config/aom_config.h"
17
#include "config/aom_dsp_rtcd.h"
18
#include "config/aom_scale_rtcd.h"
19
20
#include "aom/aom_integer.h"
21
#include "aom_dsp/blend.h"
22
#include "aom_ports/aom_once.h"
23
24
#include "av1/common/av1_common_int.h"
25
#include "av1/common/blockd.h"
26
#include "av1/common/mvref_common.h"
27
#include "av1/common/obmc.h"
28
#include "av1/common/reconinter.h"
29
#include "av1/common/reconintra.h"
30
31
// This function will determine whether or not to create a warped
32
// prediction.
33
static int allow_warp(const MB_MODE_INFO *const mbmi,
34
                      const WarpTypesAllowed *const warp_types,
35
                      const WarpedMotionParams *const gm_params,
36
                      int build_for_obmc, const struct scale_factors *const sf,
37
147k
                      WarpedMotionParams *final_warp_params) {
38
  // Note: As per the spec, we must test the fixed point scales here, which are
39
  // at a higher precision (1 << 14) than the xs and ys in subpel_params (that
40
  // have 1 << 10 precision).
41
147k
  if (av1_is_scaled(sf)) return 0;
42
43
142k
  if (final_warp_params != NULL) *final_warp_params = default_warp_params;
44
45
142k
  if (build_for_obmc) return 0;
46
47
142k
  if (warp_types->local_warp_allowed && !mbmi->wm_params.invalid) {
48
5.30k
    if (final_warp_params != NULL)
49
5.30k
      memcpy(final_warp_params, &mbmi->wm_params, sizeof(*final_warp_params));
50
5.30k
    return 1;
51
137k
  } else if (warp_types->global_warp_allowed && !gm_params->invalid) {
52
10.2k
    if (final_warp_params != NULL)
53
10.2k
      memcpy(final_warp_params, gm_params, sizeof(*final_warp_params));
54
10.2k
    return 1;
55
10.2k
  }
56
57
126k
  return 0;
58
142k
}
59
60
void av1_init_warp_params(InterPredParams *inter_pred_params,
61
                          const WarpTypesAllowed *warp_types, int ref,
62
310k
                          const MACROBLOCKD *xd, const MB_MODE_INFO *mi) {
63
310k
  if (inter_pred_params->block_height < 8 || inter_pred_params->block_width < 8)
64
149k
    return;
65
66
161k
  if (xd->cur_frame_force_integer_mv) return;
67
68
147k
  if (allow_warp(mi, warp_types, &xd->global_motion[mi->ref_frame[ref]], 0,
69
147k
                 inter_pred_params->scale_factors,
70
147k
                 &inter_pred_params->warp_params)) {
71
#if CONFIG_REALTIME_ONLY && !CONFIG_AV1_DECODER
72
    aom_internal_error(xd->error_info, AOM_CODEC_UNSUP_FEATURE,
73
                       "Warped motion is disabled in realtime only build.");
74
#endif  // CONFIG_REALTIME_ONLY && !CONFIG_AV1_DECODER
75
15.5k
    inter_pred_params->mode = WARP_PRED;
76
15.5k
  }
77
147k
}
78
79
void av1_make_inter_predictor(const uint8_t *src, int src_stride, uint8_t *dst,
80
                              int dst_stride,
81
                              InterPredParams *inter_pred_params,
82
324k
                              const SubpelParams *subpel_params) {
83
324k
  assert(IMPLIES(inter_pred_params->conv_params.is_compound,
84
324k
                 inter_pred_params->conv_params.dst != NULL));
85
86
324k
  if (inter_pred_params->mode == TRANSLATION_PRED) {
87
308k
#if CONFIG_AV1_HIGHBITDEPTH
88
308k
    if (inter_pred_params->use_hbd_buf) {
89
129k
      highbd_inter_predictor(src, src_stride, dst, dst_stride, subpel_params,
90
129k
                             inter_pred_params->block_width,
91
129k
                             inter_pred_params->block_height,
92
129k
                             &inter_pred_params->conv_params,
93
129k
                             inter_pred_params->interp_filter_params,
94
129k
                             inter_pred_params->bit_depth);
95
178k
    } else {
96
178k
      inter_predictor(src, src_stride, dst, dst_stride, subpel_params,
97
178k
                      inter_pred_params->block_width,
98
178k
                      inter_pred_params->block_height,
99
178k
                      &inter_pred_params->conv_params,
100
178k
                      inter_pred_params->interp_filter_params);
101
178k
    }
102
#else
103
    inter_predictor(src, src_stride, dst, dst_stride, subpel_params,
104
                    inter_pred_params->block_width,
105
                    inter_pred_params->block_height,
106
                    &inter_pred_params->conv_params,
107
                    inter_pred_params->interp_filter_params);
108
#endif
109
308k
  }
110
15.5k
#if !CONFIG_REALTIME_ONLY || CONFIG_AV1_DECODER
111
  // TODO(jingning): av1_warp_plane() can be further cleaned up.
112
15.5k
  else if (inter_pred_params->mode == WARP_PRED) {
113
15.5k
    av1_warp_plane(
114
15.5k
        &inter_pred_params->warp_params, inter_pred_params->use_hbd_buf,
115
15.5k
        inter_pred_params->bit_depth, inter_pred_params->ref_frame_buf.buf0,
116
15.5k
        inter_pred_params->ref_frame_buf.width,
117
15.5k
        inter_pred_params->ref_frame_buf.height,
118
15.5k
        inter_pred_params->ref_frame_buf.stride, dst,
119
15.5k
        inter_pred_params->pix_col, inter_pred_params->pix_row,
120
15.5k
        inter_pred_params->block_width, inter_pred_params->block_height,
121
15.5k
        dst_stride, inter_pred_params->subsampling_x,
122
15.5k
        inter_pred_params->subsampling_y, &inter_pred_params->conv_params);
123
15.5k
  }
124
0
#endif  // !CONFIG_REALTIME_ONLY || CONFIG_AV1_DECODER
125
0
  else {
126
0
    assert(0 && "Unsupported inter_pred_params->mode");
127
0
  }
128
324k
}
129
130
static const uint8_t wedge_master_oblique_odd[MASK_MASTER_SIZE] = {
131
  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
132
  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  1,  2,  6,  18,
133
  37, 53, 60, 63, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
134
  64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
135
};
136
static const uint8_t wedge_master_oblique_even[MASK_MASTER_SIZE] = {
137
  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
138
  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  1,  4,  11, 27,
139
  46, 58, 62, 63, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
140
  64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
141
};
142
static const uint8_t wedge_master_vertical[MASK_MASTER_SIZE] = {
143
  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
144
  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  2,  7,  21,
145
  43, 57, 62, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
146
  64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
147
};
148
149
static inline void shift_copy(const uint8_t *src, uint8_t *dst, int shift,
150
128
                              int width) {
151
128
  if (shift >= 0) {
152
66
    memcpy(dst + shift, src, width - shift);
153
66
    memset(dst, src[0], shift);
154
66
  } else {
155
62
    shift = -shift;
156
62
    memcpy(dst, src + shift, width - shift);
157
62
    memset(dst + width - shift, src[width - 1], shift);
158
62
  }
159
128
}
160
161
/* clang-format off */
162
DECLARE_ALIGNED(16, static uint8_t,
163
                wedge_signflip_lookup[BLOCK_SIZES_ALL][MAX_WEDGE_TYPES]) = {
164
  { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, },  // not used
165
  { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, },  // not used
166
  { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, },  // not used
167
  { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, },
168
  { 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, },
169
  { 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, },
170
  { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, },
171
  { 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, },
172
  { 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, },
173
  { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, },
174
  { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, },  // not used
175
  { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, },  // not used
176
  { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, },  // not used
177
  { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, },  // not used
178
  { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, },  // not used
179
  { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, },  // not used
180
  { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, },  // not used
181
  { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, },  // not used
182
  { 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 0, 1, 1, 1, 0, 1, },
183
  { 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 0, 1, 0, 1, },
184
  { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, },  // not used
185
  { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, },  // not used
186
};
187
/* clang-format on */
188
189
// [negative][direction]
190
DECLARE_ALIGNED(
191
    16, static uint8_t,
192
    wedge_mask_obl[2][WEDGE_DIRECTIONS][MASK_MASTER_SIZE * MASK_MASTER_SIZE]);
193
194
// 4 * MAX_WEDGE_SQUARE is an easy to compute and fairly tight upper bound
195
// on the sum of all mask sizes up to an including MAX_WEDGE_SQUARE.
196
DECLARE_ALIGNED(16, static uint8_t,
197
                wedge_mask_buf[2 * MAX_WEDGE_TYPES * 4 * MAX_WEDGE_SQUARE]);
198
199
DECLARE_ALIGNED(16, static uint8_t,
200
                smooth_interintra_mask_buf[INTERINTRA_MODES][BLOCK_SIZES_ALL]
201
                                          [MAX_WEDGE_SQUARE]);
202
203
static wedge_masks_type wedge_masks[BLOCK_SIZES_ALL][2];
204
205
static const wedge_code_type wedge_codebook_16_hgtw[16] = {
206
  { WEDGE_OBLIQUE27, 4, 4 },  { WEDGE_OBLIQUE63, 4, 4 },
207
  { WEDGE_OBLIQUE117, 4, 4 }, { WEDGE_OBLIQUE153, 4, 4 },
208
  { WEDGE_HORIZONTAL, 4, 2 }, { WEDGE_HORIZONTAL, 4, 4 },
209
  { WEDGE_HORIZONTAL, 4, 6 }, { WEDGE_VERTICAL, 4, 4 },
210
  { WEDGE_OBLIQUE27, 4, 2 },  { WEDGE_OBLIQUE27, 4, 6 },
211
  { WEDGE_OBLIQUE153, 4, 2 }, { WEDGE_OBLIQUE153, 4, 6 },
212
  { WEDGE_OBLIQUE63, 2, 4 },  { WEDGE_OBLIQUE63, 6, 4 },
213
  { WEDGE_OBLIQUE117, 2, 4 }, { WEDGE_OBLIQUE117, 6, 4 },
214
};
215
216
static const wedge_code_type wedge_codebook_16_hltw[16] = {
217
  { WEDGE_OBLIQUE27, 4, 4 },  { WEDGE_OBLIQUE63, 4, 4 },
218
  { WEDGE_OBLIQUE117, 4, 4 }, { WEDGE_OBLIQUE153, 4, 4 },
219
  { WEDGE_VERTICAL, 2, 4 },   { WEDGE_VERTICAL, 4, 4 },
220
  { WEDGE_VERTICAL, 6, 4 },   { WEDGE_HORIZONTAL, 4, 4 },
221
  { WEDGE_OBLIQUE27, 4, 2 },  { WEDGE_OBLIQUE27, 4, 6 },
222
  { WEDGE_OBLIQUE153, 4, 2 }, { WEDGE_OBLIQUE153, 4, 6 },
223
  { WEDGE_OBLIQUE63, 2, 4 },  { WEDGE_OBLIQUE63, 6, 4 },
224
  { WEDGE_OBLIQUE117, 2, 4 }, { WEDGE_OBLIQUE117, 6, 4 },
225
};
226
227
static const wedge_code_type wedge_codebook_16_heqw[16] = {
228
  { WEDGE_OBLIQUE27, 4, 4 },  { WEDGE_OBLIQUE63, 4, 4 },
229
  { WEDGE_OBLIQUE117, 4, 4 }, { WEDGE_OBLIQUE153, 4, 4 },
230
  { WEDGE_HORIZONTAL, 4, 2 }, { WEDGE_HORIZONTAL, 4, 6 },
231
  { WEDGE_VERTICAL, 2, 4 },   { WEDGE_VERTICAL, 6, 4 },
232
  { WEDGE_OBLIQUE27, 4, 2 },  { WEDGE_OBLIQUE27, 4, 6 },
233
  { WEDGE_OBLIQUE153, 4, 2 }, { WEDGE_OBLIQUE153, 4, 6 },
234
  { WEDGE_OBLIQUE63, 2, 4 },  { WEDGE_OBLIQUE63, 6, 4 },
235
  { WEDGE_OBLIQUE117, 2, 4 }, { WEDGE_OBLIQUE117, 6, 4 },
236
};
237
238
const wedge_params_type av1_wedge_params_lookup[BLOCK_SIZES_ALL] = {
239
  { 0, NULL, NULL, NULL },
240
  { 0, NULL, NULL, NULL },
241
  { 0, NULL, NULL, NULL },
242
  { MAX_WEDGE_TYPES, wedge_codebook_16_heqw, wedge_signflip_lookup[BLOCK_8X8],
243
    wedge_masks[BLOCK_8X8] },
244
  { MAX_WEDGE_TYPES, wedge_codebook_16_hgtw, wedge_signflip_lookup[BLOCK_8X16],
245
    wedge_masks[BLOCK_8X16] },
246
  { MAX_WEDGE_TYPES, wedge_codebook_16_hltw, wedge_signflip_lookup[BLOCK_16X8],
247
    wedge_masks[BLOCK_16X8] },
248
  { MAX_WEDGE_TYPES, wedge_codebook_16_heqw, wedge_signflip_lookup[BLOCK_16X16],
249
    wedge_masks[BLOCK_16X16] },
250
  { MAX_WEDGE_TYPES, wedge_codebook_16_hgtw, wedge_signflip_lookup[BLOCK_16X32],
251
    wedge_masks[BLOCK_16X32] },
252
  { MAX_WEDGE_TYPES, wedge_codebook_16_hltw, wedge_signflip_lookup[BLOCK_32X16],
253
    wedge_masks[BLOCK_32X16] },
254
  { MAX_WEDGE_TYPES, wedge_codebook_16_heqw, wedge_signflip_lookup[BLOCK_32X32],
255
    wedge_masks[BLOCK_32X32] },
256
  { 0, NULL, NULL, NULL },
257
  { 0, NULL, NULL, NULL },
258
  { 0, NULL, NULL, NULL },
259
  { 0, NULL, NULL, NULL },
260
  { 0, NULL, NULL, NULL },
261
  { 0, NULL, NULL, NULL },
262
  { 0, NULL, NULL, NULL },
263
  { 0, NULL, NULL, NULL },
264
  { MAX_WEDGE_TYPES, wedge_codebook_16_hgtw, wedge_signflip_lookup[BLOCK_8X32],
265
    wedge_masks[BLOCK_8X32] },
266
  { MAX_WEDGE_TYPES, wedge_codebook_16_hltw, wedge_signflip_lookup[BLOCK_32X8],
267
    wedge_masks[BLOCK_32X8] },
268
  { 0, NULL, NULL, NULL },
269
  { 0, NULL, NULL, NULL },
270
};
271
272
static const uint8_t *get_wedge_mask_inplace(int wedge_index, int neg,
273
576
                                             BLOCK_SIZE sb_type) {
274
576
  const uint8_t *master;
275
576
  const int bh = block_size_high[sb_type];
276
576
  const int bw = block_size_wide[sb_type];
277
576
  const wedge_code_type *a =
278
576
      av1_wedge_params_lookup[sb_type].codebook + wedge_index;
279
576
  int woff, hoff;
280
576
  const uint8_t wsignflip =
281
576
      av1_wedge_params_lookup[sb_type].signflip[wedge_index];
282
283
576
  assert(wedge_index >= 0 && wedge_index < get_wedge_types_lookup(sb_type));
284
576
  woff = (a->x_offset * bw) >> 3;
285
576
  hoff = (a->y_offset * bh) >> 3;
286
576
  master = wedge_mask_obl[neg ^ wsignflip][a->direction] +
287
576
           MASK_MASTER_STRIDE * (MASK_MASTER_SIZE / 2 - hoff) +
288
576
           MASK_MASTER_SIZE / 2 - woff;
289
576
  return master;
290
576
}
291
292
const uint8_t *av1_get_compound_type_mask(
293
2.66k
    const INTERINTER_COMPOUND_DATA *const comp_data, BLOCK_SIZE sb_type) {
294
2.66k
  (void)sb_type;
295
2.66k
  switch (comp_data->type) {
296
1.93k
    case COMPOUND_WEDGE:
297
1.93k
      return av1_get_contiguous_soft_mask(comp_data->wedge_index,
298
1.93k
                                          comp_data->wedge_sign, sb_type);
299
735
    default: return comp_data->seg_mask;
300
2.66k
  }
301
2.66k
}
302
303
static inline void diffwtd_mask_d16(uint8_t *mask, int which_inverse,
304
                                    int mask_base, const CONV_BUF_TYPE *src0,
305
                                    int src0_stride, const CONV_BUF_TYPE *src1,
306
                                    int src1_stride, int h, int w,
307
249
                                    ConvolveParams *conv_params, int bd) {
308
249
  int round =
309
249
      2 * FILTER_BITS - conv_params->round_0 - conv_params->round_1 + (bd - 8);
310
249
  int i, j, m, diff;
311
3.48k
  for (i = 0; i < h; ++i) {
312
93.1k
    for (j = 0; j < w; ++j) {
313
89.9k
      diff = abs(src0[i * src0_stride + j] - src1[i * src1_stride + j]);
314
89.9k
      diff = ROUND_POWER_OF_TWO(diff, round);
315
89.9k
      m = clamp(mask_base + (diff / DIFF_FACTOR), 0, AOM_BLEND_A64_MAX_ALPHA);
316
89.9k
      mask[i * w + j] = which_inverse ? AOM_BLEND_A64_MAX_ALPHA - m : m;
317
89.9k
    }
318
3.23k
  }
319
249
}
320
321
void av1_build_compound_diffwtd_mask_d16_c(
322
    uint8_t *mask, DIFFWTD_MASK_TYPE mask_type, const CONV_BUF_TYPE *src0,
323
    int src0_stride, const CONV_BUF_TYPE *src1, int src1_stride, int h, int w,
324
249
    ConvolveParams *conv_params, int bd) {
325
249
  switch (mask_type) {
326
194
    case DIFFWTD_38:
327
194
      diffwtd_mask_d16(mask, 0, 38, src0, src0_stride, src1, src1_stride, h, w,
328
194
                       conv_params, bd);
329
194
      break;
330
55
    case DIFFWTD_38_INV:
331
55
      diffwtd_mask_d16(mask, 1, 38, src0, src0_stride, src1, src1_stride, h, w,
332
55
                       conv_params, bd);
333
55
      break;
334
0
    default: assert(0);
335
249
  }
336
249
}
337
338
static inline void diffwtd_mask(uint8_t *mask, int which_inverse, int mask_base,
339
                                const uint8_t *src0, int src0_stride,
340
                                const uint8_t *src1, int src1_stride, int h,
341
0
                                int w) {
342
0
  int i, j, m, diff;
343
0
  for (i = 0; i < h; ++i) {
344
0
    for (j = 0; j < w; ++j) {
345
0
      diff =
346
0
          abs((int)src0[i * src0_stride + j] - (int)src1[i * src1_stride + j]);
347
0
      m = clamp(mask_base + (diff / DIFF_FACTOR), 0, AOM_BLEND_A64_MAX_ALPHA);
348
0
      mask[i * w + j] = which_inverse ? AOM_BLEND_A64_MAX_ALPHA - m : m;
349
0
    }
350
0
  }
351
0
}
352
353
void av1_build_compound_diffwtd_mask_c(uint8_t *mask,
354
                                       DIFFWTD_MASK_TYPE mask_type,
355
                                       const uint8_t *src0, int src0_stride,
356
                                       const uint8_t *src1, int src1_stride,
357
0
                                       int h, int w) {
358
0
  switch (mask_type) {
359
0
    case DIFFWTD_38:
360
0
      diffwtd_mask(mask, 0, 38, src0, src0_stride, src1, src1_stride, h, w);
361
0
      break;
362
0
    case DIFFWTD_38_INV:
363
0
      diffwtd_mask(mask, 1, 38, src0, src0_stride, src1, src1_stride, h, w);
364
0
      break;
365
0
    default: assert(0);
366
0
  }
367
0
}
368
369
#if CONFIG_AV1_HIGHBITDEPTH
370
static AOM_FORCE_INLINE void diffwtd_mask_highbd(
371
    uint8_t *mask, int which_inverse, int mask_base, const uint16_t *src0,
372
    int src0_stride, const uint16_t *src1, int src1_stride, int h, int w,
373
0
    const unsigned int bd) {
374
0
  assert(bd >= 8);
375
0
  if (bd == 8) {
376
0
    if (which_inverse) {
377
0
      for (int i = 0; i < h; ++i) {
378
0
        for (int j = 0; j < w; ++j) {
379
0
          int diff = abs((int)src0[j] - (int)src1[j]) / DIFF_FACTOR;
380
0
          unsigned int m = negative_to_zero(mask_base + diff);
381
0
          m = AOMMIN(m, AOM_BLEND_A64_MAX_ALPHA);
382
0
          mask[j] = AOM_BLEND_A64_MAX_ALPHA - m;
383
0
        }
384
0
        src0 += src0_stride;
385
0
        src1 += src1_stride;
386
0
        mask += w;
387
0
      }
388
0
    } else {
389
0
      for (int i = 0; i < h; ++i) {
390
0
        for (int j = 0; j < w; ++j) {
391
0
          int diff = abs((int)src0[j] - (int)src1[j]) / DIFF_FACTOR;
392
0
          unsigned int m = negative_to_zero(mask_base + diff);
393
0
          m = AOMMIN(m, AOM_BLEND_A64_MAX_ALPHA);
394
0
          mask[j] = m;
395
0
        }
396
0
        src0 += src0_stride;
397
0
        src1 += src1_stride;
398
0
        mask += w;
399
0
      }
400
0
    }
401
0
  } else {
402
0
    const unsigned int bd_shift = bd - 8;
403
0
    if (which_inverse) {
404
0
      for (int i = 0; i < h; ++i) {
405
0
        for (int j = 0; j < w; ++j) {
406
0
          int diff =
407
0
              (abs((int)src0[j] - (int)src1[j]) >> bd_shift) / DIFF_FACTOR;
408
0
          unsigned int m = negative_to_zero(mask_base + diff);
409
0
          m = AOMMIN(m, AOM_BLEND_A64_MAX_ALPHA);
410
0
          mask[j] = AOM_BLEND_A64_MAX_ALPHA - m;
411
0
        }
412
0
        src0 += src0_stride;
413
0
        src1 += src1_stride;
414
0
        mask += w;
415
0
      }
416
0
    } else {
417
0
      for (int i = 0; i < h; ++i) {
418
0
        for (int j = 0; j < w; ++j) {
419
0
          int diff =
420
0
              (abs((int)src0[j] - (int)src1[j]) >> bd_shift) / DIFF_FACTOR;
421
0
          unsigned int m = negative_to_zero(mask_base + diff);
422
0
          m = AOMMIN(m, AOM_BLEND_A64_MAX_ALPHA);
423
0
          mask[j] = m;
424
0
        }
425
0
        src0 += src0_stride;
426
0
        src1 += src1_stride;
427
0
        mask += w;
428
0
      }
429
0
    }
430
0
  }
431
0
}
432
433
void av1_build_compound_diffwtd_mask_highbd_c(
434
    uint8_t *mask, DIFFWTD_MASK_TYPE mask_type, const uint8_t *src0,
435
    int src0_stride, const uint8_t *src1, int src1_stride, int h, int w,
436
0
    int bd) {
437
0
  switch (mask_type) {
438
0
    case DIFFWTD_38:
439
0
      diffwtd_mask_highbd(mask, 0, 38, CONVERT_TO_SHORTPTR(src0), src0_stride,
440
0
                          CONVERT_TO_SHORTPTR(src1), src1_stride, h, w, bd);
441
0
      break;
442
0
    case DIFFWTD_38_INV:
443
0
      diffwtd_mask_highbd(mask, 1, 38, CONVERT_TO_SHORTPTR(src0), src0_stride,
444
0
                          CONVERT_TO_SHORTPTR(src1), src1_stride, h, w, bd);
445
0
      break;
446
0
    default: assert(0);
447
0
  }
448
0
}
449
#endif  // CONFIG_AV1_HIGHBITDEPTH
450
451
2
static inline void init_wedge_master_masks(void) {
452
2
  int i, j;
453
2
  const int w = MASK_MASTER_SIZE;
454
2
  const int h = MASK_MASTER_SIZE;
455
2
  const int stride = MASK_MASTER_STRIDE;
456
  // Note: index [0] stores the masters, and [1] its complement.
457
  // Generate prototype by shifting the masters
458
2
  int shift = h / 4;
459
66
  for (i = 0; i < h; i += 2) {
460
64
    shift_copy(wedge_master_oblique_even,
461
64
               &wedge_mask_obl[0][WEDGE_OBLIQUE63][i * stride], shift,
462
64
               MASK_MASTER_SIZE);
463
64
    shift--;
464
64
    shift_copy(wedge_master_oblique_odd,
465
64
               &wedge_mask_obl[0][WEDGE_OBLIQUE63][(i + 1) * stride], shift,
466
64
               MASK_MASTER_SIZE);
467
64
    memcpy(&wedge_mask_obl[0][WEDGE_VERTICAL][i * stride],
468
64
           wedge_master_vertical,
469
64
           MASK_MASTER_SIZE * sizeof(wedge_master_vertical[0]));
470
64
    memcpy(&wedge_mask_obl[0][WEDGE_VERTICAL][(i + 1) * stride],
471
64
           wedge_master_vertical,
472
64
           MASK_MASTER_SIZE * sizeof(wedge_master_vertical[0]));
473
64
  }
474
475
130
  for (i = 0; i < h; ++i) {
476
8.32k
    for (j = 0; j < w; ++j) {
477
8.19k
      const int msk = wedge_mask_obl[0][WEDGE_OBLIQUE63][i * stride + j];
478
8.19k
      wedge_mask_obl[0][WEDGE_OBLIQUE27][j * stride + i] = msk;
479
8.19k
      wedge_mask_obl[0][WEDGE_OBLIQUE117][i * stride + w - 1 - j] =
480
8.19k
          wedge_mask_obl[0][WEDGE_OBLIQUE153][(w - 1 - j) * stride + i] =
481
8.19k
              (1 << WEDGE_WEIGHT_BITS) - msk;
482
8.19k
      wedge_mask_obl[1][WEDGE_OBLIQUE63][i * stride + j] =
483
8.19k
          wedge_mask_obl[1][WEDGE_OBLIQUE27][j * stride + i] =
484
8.19k
              (1 << WEDGE_WEIGHT_BITS) - msk;
485
8.19k
      wedge_mask_obl[1][WEDGE_OBLIQUE117][i * stride + w - 1 - j] =
486
8.19k
          wedge_mask_obl[1][WEDGE_OBLIQUE153][(w - 1 - j) * stride + i] = msk;
487
8.19k
      const int mskx = wedge_mask_obl[0][WEDGE_VERTICAL][i * stride + j];
488
8.19k
      wedge_mask_obl[0][WEDGE_HORIZONTAL][j * stride + i] = mskx;
489
8.19k
      wedge_mask_obl[1][WEDGE_VERTICAL][i * stride + j] =
490
8.19k
          wedge_mask_obl[1][WEDGE_HORIZONTAL][j * stride + i] =
491
8.19k
              (1 << WEDGE_WEIGHT_BITS) - mskx;
492
8.19k
    }
493
128
  }
494
2
}
495
496
2
static inline void init_wedge_masks(void) {
497
2
  uint8_t *dst = wedge_mask_buf;
498
2
  BLOCK_SIZE bsize;
499
2
  memset(wedge_masks, 0, sizeof(wedge_masks));
500
46
  for (bsize = BLOCK_4X4; bsize < BLOCK_SIZES_ALL; ++bsize) {
501
44
    const wedge_params_type *wedge_params = &av1_wedge_params_lookup[bsize];
502
44
    const int wtypes = wedge_params->wedge_types;
503
44
    if (wtypes == 0) continue;
504
18
    const uint8_t *mask;
505
18
    const int bw = block_size_wide[bsize];
506
18
    const int bh = block_size_high[bsize];
507
18
    int w;
508
306
    for (w = 0; w < wtypes; ++w) {
509
288
      mask = get_wedge_mask_inplace(w, 0, bsize);
510
288
      aom_convolve_copy(mask, MASK_MASTER_STRIDE, dst, bw /* dst_stride */, bw,
511
288
                        bh);
512
288
      wedge_params->masks[0][w] = dst;
513
288
      dst += bw * bh;
514
515
288
      mask = get_wedge_mask_inplace(w, 1, bsize);
516
288
      aom_convolve_copy(mask, MASK_MASTER_STRIDE, dst, bw /* dst_stride */, bw,
517
288
                        bh);
518
288
      wedge_params->masks[1][w] = dst;
519
288
      dst += bw * bh;
520
288
    }
521
18
    assert(sizeof(wedge_mask_buf) >= (size_t)(dst - wedge_mask_buf));
522
18
  }
523
2
}
524
525
/* clang-format off */
526
static const uint8_t ii_weights1d[MAX_SB_SIZE] = {
527
  60, 58, 56, 54, 52, 50, 48, 47, 45, 44, 42, 41, 39, 38, 37, 35, 34, 33, 32,
528
  31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 22, 21, 20, 19, 19, 18, 18, 17, 16,
529
  16, 15, 15, 14, 14, 13, 13, 12, 12, 12, 11, 11, 10, 10, 10,  9,  9,  9,  8,
530
  8,  8,  8,  7,  7,  7,  7,  6,  6,  6,  6,  6,  5,  5,  5,  5,  5,  4,  4,
531
  4,  4,  4,  4,  4,  4,  3,  3,  3,  3,  3,  3,  3,  3,  3,  2,  2,  2,  2,
532
  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  1,  1,  1,  1,  1,  1,  1,  1,
533
  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1
534
};
535
static uint8_t ii_size_scales[BLOCK_SIZES_ALL] = {
536
    32, 16, 16, 16, 8, 8, 8, 4,
537
    4,  4,  2,  2,  2, 1, 1, 1,
538
    8,  8,  4,  4,  2, 2
539
};
540
/* clang-format on */
541
542
static inline void build_smooth_interintra_mask(uint8_t *mask, int stride,
543
                                                BLOCK_SIZE plane_bsize,
544
1.35k
                                                INTERINTRA_MODE mode) {
545
1.35k
  int i, j;
546
1.35k
  const int bw = block_size_wide[plane_bsize];
547
1.35k
  const int bh = block_size_high[plane_bsize];
548
1.35k
  const int size_scale = ii_size_scales[plane_bsize];
549
550
1.35k
  switch (mode) {
551
364
    case II_V_PRED:
552
4.98k
      for (i = 0; i < bh; ++i) {
553
4.61k
        memset(mask, ii_weights1d[i * size_scale], bw * sizeof(mask[0]));
554
4.61k
        mask += stride;
555
4.61k
      }
556
364
      break;
557
558
139
    case II_H_PRED:
559
2.03k
      for (i = 0; i < bh; ++i) {
560
28.1k
        for (j = 0; j < bw; ++j) mask[j] = ii_weights1d[j * size_scale];
561
1.89k
        mask += stride;
562
1.89k
      }
563
139
      break;
564
565
785
    case II_SMOOTH_PRED:
566
7.24k
      for (i = 0; i < bh; ++i) {
567
45.4k
        for (j = 0; j < bw; ++j)
568
39.0k
          mask[j] = ii_weights1d[(i < j ? i : j) * size_scale];
569
6.45k
        mask += stride;
570
6.45k
      }
571
785
      break;
572
573
64
    case II_DC_PRED:
574
64
    default:
575
1.19k
      for (i = 0; i < bh; ++i) {
576
1.12k
        memset(mask, 32, bw * sizeof(mask[0]));
577
1.12k
        mask += stride;
578
1.12k
      }
579
64
      break;
580
1.35k
  }
581
1.35k
}
582
583
2
static inline void init_smooth_interintra_masks(void) {
584
10
  for (int m = 0; m < INTERINTRA_MODES; ++m) {
585
184
    for (int bs = 0; bs < BLOCK_SIZES_ALL; ++bs) {
586
176
      const int bw = block_size_wide[bs];
587
176
      const int bh = block_size_high[bs];
588
176
      if (bw > MAX_WEDGE_SIZE || bh > MAX_WEDGE_SIZE) continue;
589
112
      build_smooth_interintra_mask(smooth_interintra_mask_buf[m][bs], bw, bs,
590
112
                                   m);
591
112
    }
592
8
  }
593
2
}
594
595
// Equation of line: f(x, y) = a[0]*(x - a[2]*w/8) + a[1]*(y - a[3]*h/8) = 0
596
2
static void init_all_wedge_masks(void) {
597
2
  init_wedge_master_masks();
598
2
  init_wedge_masks();
599
2
  init_smooth_interintra_masks();
600
2
}
601
602
15.5k
void av1_init_wedge_masks(void) { aom_once(init_all_wedge_masks); }
603
604
static inline void build_masked_compound_no_round(
605
    uint8_t *dst, int dst_stride, const CONV_BUF_TYPE *src0, int src0_stride,
606
    const CONV_BUF_TYPE *src1, int src1_stride,
607
    const INTERINTER_COMPOUND_DATA *const comp_data, BLOCK_SIZE sb_type, int h,
608
2.66k
    int w, InterPredParams *inter_pred_params) {
609
2.66k
  const int ssy = inter_pred_params->subsampling_y;
610
2.66k
  const int ssx = inter_pred_params->subsampling_x;
611
2.66k
  const uint8_t *mask = av1_get_compound_type_mask(comp_data, sb_type);
612
2.66k
  const int mask_stride = block_size_wide[sb_type];
613
2.66k
#if CONFIG_AV1_HIGHBITDEPTH
614
2.66k
  if (inter_pred_params->use_hbd_buf) {
615
1.67k
    aom_highbd_blend_a64_d16_mask(dst, dst_stride, src0, src0_stride, src1,
616
1.67k
                                  src1_stride, mask, mask_stride, w, h, ssx,
617
1.67k
                                  ssy, &inter_pred_params->conv_params,
618
1.67k
                                  inter_pred_params->bit_depth);
619
1.67k
  } else {
620
996
    aom_lowbd_blend_a64_d16_mask(dst, dst_stride, src0, src0_stride, src1,
621
996
                                 src1_stride, mask, mask_stride, w, h, ssx, ssy,
622
996
                                 &inter_pred_params->conv_params);
623
996
  }
624
#else
625
  aom_lowbd_blend_a64_d16_mask(dst, dst_stride, src0, src0_stride, src1,
626
                               src1_stride, mask, mask_stride, w, h, ssx, ssy,
627
                               &inter_pred_params->conv_params);
628
#endif
629
2.66k
}
630
631
void av1_make_masked_inter_predictor(const uint8_t *pre, int pre_stride,
632
                                     uint8_t *dst, int dst_stride,
633
                                     InterPredParams *inter_pred_params,
634
2.66k
                                     const SubpelParams *subpel_params) {
635
2.66k
  const INTERINTER_COMPOUND_DATA *comp_data = &inter_pred_params->mask_comp;
636
2.66k
  BLOCK_SIZE sb_type = inter_pred_params->sb_type;
637
638
  // We're going to call av1_make_inter_predictor to generate a prediction into
639
  // a temporary buffer, then will blend that temporary buffer with that from
640
  // the other reference.
641
2.66k
  DECLARE_ALIGNED(32, uint8_t, tmp_buf[2 * MAX_SB_SQUARE]);
642
2.66k
  uint8_t *tmp_dst =
643
2.66k
      inter_pred_params->use_hbd_buf ? CONVERT_TO_BYTEPTR(tmp_buf) : tmp_buf;
644
645
2.66k
  const int tmp_buf_stride = MAX_SB_SIZE;
646
2.66k
  CONV_BUF_TYPE *org_dst = inter_pred_params->conv_params.dst;
647
2.66k
  int org_dst_stride = inter_pred_params->conv_params.dst_stride;
648
2.66k
  CONV_BUF_TYPE *tmp_buf16 = (CONV_BUF_TYPE *)tmp_buf;
649
2.66k
  inter_pred_params->conv_params.dst = tmp_buf16;
650
2.66k
  inter_pred_params->conv_params.dst_stride = tmp_buf_stride;
651
2.66k
  assert(inter_pred_params->conv_params.do_average == 0);
652
653
  // This will generate a prediction in tmp_buf for the second reference
654
2.66k
  av1_make_inter_predictor(pre, pre_stride, tmp_dst, MAX_SB_SIZE,
655
2.66k
                           inter_pred_params, subpel_params);
656
657
2.66k
  if (!inter_pred_params->conv_params.plane &&
658
893
      comp_data->type == COMPOUND_DIFFWTD) {
659
249
    av1_build_compound_diffwtd_mask_d16(
660
249
        comp_data->seg_mask, comp_data->mask_type, org_dst, org_dst_stride,
661
249
        tmp_buf16, tmp_buf_stride, inter_pred_params->block_height,
662
249
        inter_pred_params->block_width, &inter_pred_params->conv_params,
663
249
        inter_pred_params->bit_depth);
664
249
  }
665
2.66k
  build_masked_compound_no_round(
666
2.66k
      dst, dst_stride, org_dst, org_dst_stride, tmp_buf16, tmp_buf_stride,
667
2.66k
      comp_data, sb_type, inter_pred_params->block_height,
668
2.66k
      inter_pred_params->block_width, inter_pred_params);
669
2.66k
}
670
671
void av1_dist_wtd_comp_weight_assign(const AV1_COMMON *cm,
672
                                     const MB_MODE_INFO *mbmi, int *fwd_offset,
673
                                     int *bck_offset,
674
                                     int *use_dist_wtd_comp_avg,
675
317k
                                     int is_compound) {
676
317k
  assert(fwd_offset != NULL && bck_offset != NULL);
677
317k
  if (!is_compound || mbmi->compound_idx) {
678
306k
    *fwd_offset = 8;
679
306k
    *bck_offset = 8;
680
306k
    *use_dist_wtd_comp_avg = 0;
681
306k
    return;
682
306k
  }
683
684
11.3k
  *use_dist_wtd_comp_avg = 1;
685
11.3k
  const RefCntBuffer *const bck_buf = get_ref_frame_buf(cm, mbmi->ref_frame[0]);
686
11.3k
  const RefCntBuffer *const fwd_buf = get_ref_frame_buf(cm, mbmi->ref_frame[1]);
687
11.3k
  const int cur_frame_index = cm->cur_frame->order_hint;
688
11.3k
  int bck_frame_index = 0, fwd_frame_index = 0;
689
690
11.3k
  if (bck_buf != NULL) bck_frame_index = bck_buf->order_hint;
691
11.3k
  if (fwd_buf != NULL) fwd_frame_index = fwd_buf->order_hint;
692
693
11.3k
  int d0 = clamp(abs(get_relative_dist(&cm->seq_params->order_hint_info,
694
11.3k
                                       fwd_frame_index, cur_frame_index)),
695
11.3k
                 0, MAX_FRAME_DISTANCE);
696
11.3k
  int d1 = clamp(abs(get_relative_dist(&cm->seq_params->order_hint_info,
697
11.3k
                                       cur_frame_index, bck_frame_index)),
698
11.3k
                 0, MAX_FRAME_DISTANCE);
699
700
11.3k
  const int order = d0 <= d1;
701
702
11.3k
  if (d0 == 0 || d1 == 0) {
703
2.00k
    *fwd_offset = quant_dist_lookup_table[3][order];
704
2.00k
    *bck_offset = quant_dist_lookup_table[3][1 - order];
705
2.00k
    return;
706
2.00k
  }
707
708
9.33k
  int i;
709
15.8k
  for (i = 0; i < 3; ++i) {
710
14.4k
    int c0 = quant_dist_weight[i][order];
711
14.4k
    int c1 = quant_dist_weight[i][!order];
712
14.4k
    int d0_c0 = d0 * c0;
713
14.4k
    int d1_c1 = d1 * c1;
714
14.4k
    if ((d0 > d1 && d0_c0 < d1_c1) || (d0 <= d1 && d0_c0 > d1_c1)) break;
715
14.4k
  }
716
717
9.33k
  *fwd_offset = quant_dist_lookup_table[i][order];
718
9.33k
  *bck_offset = quant_dist_lookup_table[i][1 - order];
719
9.33k
}
720
721
void av1_setup_dst_planes(struct macroblockd_plane *planes, BLOCK_SIZE bsize,
722
                          const YV12_BUFFER_CONFIG *src, int mi_row, int mi_col,
723
7.56M
                          const int plane_start, const int plane_end) {
724
  // We use AOMMIN(num_planes, MAX_MB_PLANE) instead of num_planes to quiet
725
  // the static analysis warnings.
726
29.5M
  for (int i = plane_start; i < AOMMIN(plane_end, MAX_MB_PLANE); ++i) {
727
21.9M
    struct macroblockd_plane *const pd = &planes[i];
728
21.9M
    const int is_uv = i > 0;
729
21.9M
    setup_pred_plane(&pd->dst, bsize, src->buffers[i], src->crop_widths[is_uv],
730
21.9M
                     src->crop_heights[is_uv], src->strides[is_uv], mi_row,
731
21.9M
                     mi_col, NULL, pd->subsampling_x, pd->subsampling_y);
732
21.9M
  }
733
7.56M
}
734
735
void av1_setup_pre_planes(MACROBLOCKD *xd, int idx,
736
                          const YV12_BUFFER_CONFIG *src, int mi_row, int mi_col,
737
                          const struct scale_factors *sf,
738
43.4k
                          const int num_planes) {
739
43.4k
  if (src != NULL) {
740
    // We use AOMMIN(num_planes, MAX_MB_PLANE) instead of num_planes to quiet
741
    // the static analysis warnings.
742
161k
    for (int i = 0; i < AOMMIN(num_planes, MAX_MB_PLANE); ++i) {
743
117k
      struct macroblockd_plane *const pd = &xd->plane[i];
744
117k
      const int is_uv = i > 0;
745
117k
      setup_pred_plane(&pd->pre[idx], xd->mi[0]->bsize, src->buffers[i],
746
117k
                       src->crop_widths[is_uv], src->crop_heights[is_uv],
747
117k
                       src->strides[is_uv], mi_row, mi_col, sf,
748
117k
                       pd->subsampling_x, pd->subsampling_y);
749
117k
    }
750
43.4k
  }
751
43.4k
}
752
753
// obmc_mask_N[overlap_position]
754
static const uint8_t obmc_mask_1[1] = { 64 };
755
DECLARE_ALIGNED(2, static const uint8_t, obmc_mask_2[2]) = { 45, 64 };
756
757
DECLARE_ALIGNED(4, static const uint8_t, obmc_mask_4[4]) = { 39, 50, 59, 64 };
758
759
static const uint8_t obmc_mask_8[8] = { 36, 42, 48, 53, 57, 61, 64, 64 };
760
761
static const uint8_t obmc_mask_16[16] = { 34, 37, 40, 43, 46, 49, 52, 54,
762
                                          56, 58, 60, 61, 64, 64, 64, 64 };
763
764
static const uint8_t obmc_mask_32[32] = { 33, 35, 36, 38, 40, 41, 43, 44,
765
                                          45, 47, 48, 50, 51, 52, 53, 55,
766
                                          56, 57, 58, 59, 60, 60, 61, 62,
767
                                          64, 64, 64, 64, 64, 64, 64, 64 };
768
769
static const uint8_t obmc_mask_64[64] = {
770
  33, 34, 35, 35, 36, 37, 38, 39, 40, 40, 41, 42, 43, 44, 44, 44,
771
  45, 46, 47, 47, 48, 49, 50, 51, 51, 51, 52, 52, 53, 54, 55, 56,
772
  56, 56, 57, 57, 58, 58, 59, 60, 60, 60, 60, 60, 61, 62, 62, 62,
773
  62, 62, 63, 63, 63, 63, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
774
};
775
776
6.91k
const uint8_t *av1_get_obmc_mask(int length) {
777
6.91k
  switch (length) {
778
0
    case 1: return obmc_mask_1;
779
422
    case 2: return obmc_mask_2;
780
5.00k
    case 4: return obmc_mask_4;
781
1.41k
    case 8: return obmc_mask_8;
782
61
    case 16: return obmc_mask_16;
783
15
    case 32: return obmc_mask_32;
784
0
    case 64: return obmc_mask_64;
785
0
    default: assert(0); return NULL;
786
6.91k
  }
787
6.91k
}
788
789
static inline void increment_int_ptr(MACROBLOCKD *xd, int rel_mi_row,
790
                                     int rel_mi_col, uint8_t op_mi_size,
791
                                     int dir, MB_MODE_INFO *mi, void *fun_ctxt,
792
15.9k
                                     const int num_planes) {
793
15.9k
  (void)xd;
794
15.9k
  (void)rel_mi_row;
795
15.9k
  (void)rel_mi_col;
796
15.9k
  (void)op_mi_size;
797
15.9k
  (void)dir;
798
15.9k
  (void)mi;
799
15.9k
  ++*(uint8_t *)fun_ctxt;
800
15.9k
  (void)num_planes;
801
15.9k
}
802
803
48.5k
void av1_count_overlappable_neighbors(const AV1_COMMON *cm, MACROBLOCKD *xd) {
804
48.5k
  MB_MODE_INFO *mbmi = xd->mi[0];
805
806
48.5k
  mbmi->overlappable_neighbors = 0;
807
808
48.5k
  if (!is_motion_variation_allowed_bsize(mbmi->bsize)) return;
809
810
22.9k
  foreach_overlappable_nb_above(cm, xd, INT_MAX, increment_int_ptr,
811
22.9k
                                &mbmi->overlappable_neighbors);
812
22.9k
  if (mbmi->overlappable_neighbors) return;
813
16.3k
  foreach_overlappable_nb_left(cm, xd, INT_MAX, increment_int_ptr,
814
16.3k
                               &mbmi->overlappable_neighbors);
815
16.3k
}
816
817
// HW does not support < 4x4 prediction. To limit the bandwidth requirement, if
818
// block-size of current plane is smaller than 8x8, always only blend with the
819
// left neighbor(s) (skip blending with the above side).
820
#define DISABLE_CHROMA_U8X8_OBMC 0  // 0: one-sided obmc; 1: disable
821
822
int av1_skip_u4x4_pred_in_obmc(BLOCK_SIZE bsize,
823
14.6k
                               const struct macroblockd_plane *pd, int dir) {
824
14.6k
  assert(is_motion_variation_allowed_bsize(bsize));
825
826
14.6k
  const BLOCK_SIZE bsize_plane =
827
14.6k
      get_plane_block_size(bsize, pd->subsampling_x, pd->subsampling_y);
828
14.6k
  switch (bsize_plane) {
829
#if DISABLE_CHROMA_U8X8_OBMC
830
    case BLOCK_4X4:
831
    case BLOCK_8X4:
832
    case BLOCK_4X8: return 1;
833
#else
834
16
    case BLOCK_4X4:
835
16
    case BLOCK_8X4:
836
1.61k
    case BLOCK_4X8: return dir == 0;
837
0
#endif
838
12.9k
    default: return 0;
839
14.6k
  }
840
14.6k
}
841
842
#if CONFIG_AV1_DECODER
843
2.43k
static void modify_neighbor_predictor_for_obmc(MB_MODE_INFO *mbmi) {
844
2.43k
  mbmi->ref_frame[1] = NONE_FRAME;
845
2.43k
  mbmi->interinter_comp.type = COMPOUND_AVERAGE;
846
2.43k
}
847
#endif  // CONFIG_AV1_DECODER
848
849
struct obmc_inter_pred_ctxt {
850
  uint8_t **adjacent;
851
  int *adjacent_stride;
852
};
853
854
static inline void build_obmc_inter_pred_above(
855
    MACROBLOCKD *xd, int rel_mi_row, int rel_mi_col, uint8_t op_mi_size,
856
811
    int dir, MB_MODE_INFO *above_mi, void *fun_ctxt, const int num_planes) {
857
811
  (void)above_mi;
858
811
  (void)rel_mi_row;
859
811
  (void)dir;
860
811
  struct obmc_inter_pred_ctxt *ctxt = (struct obmc_inter_pred_ctxt *)fun_ctxt;
861
811
  const BLOCK_SIZE bsize = xd->mi[0]->bsize;
862
811
  const int overlap =
863
811
      AOMMIN(block_size_high[bsize], block_size_high[BLOCK_64X64]) >> 1;
864
865
3.24k
  for (int plane = 0; plane < num_planes; ++plane) {
866
2.43k
    const struct macroblockd_plane *pd = &xd->plane[plane];
867
2.43k
    const int bw = (op_mi_size * MI_SIZE) >> pd->subsampling_x;
868
2.43k
    const int bh = overlap >> pd->subsampling_y;
869
2.43k
    const int plane_col = (rel_mi_col * MI_SIZE) >> pd->subsampling_x;
870
871
2.43k
    if (av1_skip_u4x4_pred_in_obmc(bsize, pd, 0)) continue;
872
873
2.04k
    const int dst_stride = pd->dst.stride;
874
2.04k
    uint8_t *const dst = &pd->dst.buf[plane_col];
875
2.04k
    const int tmp_stride = ctxt->adjacent_stride[plane];
876
2.04k
    const uint8_t *const tmp = &ctxt->adjacent[plane][plane_col];
877
2.04k
    const uint8_t *const mask = av1_get_obmc_mask(bh);
878
2.04k
#if CONFIG_AV1_HIGHBITDEPTH
879
2.04k
    const int is_hbd = is_cur_buf_hbd(xd);
880
2.04k
    if (is_hbd)
881
1.62k
      aom_highbd_blend_a64_vmask(dst, dst_stride, dst, dst_stride, tmp,
882
1.62k
                                 tmp_stride, mask, bw, bh, xd->bd);
883
424
    else
884
424
      aom_blend_a64_vmask(dst, dst_stride, dst, dst_stride, tmp, tmp_stride,
885
424
                          mask, bw, bh);
886
#else
887
    aom_blend_a64_vmask(dst, dst_stride, dst, dst_stride, tmp, tmp_stride, mask,
888
                        bw, bh);
889
#endif
890
2.04k
  }
891
811
}
892
893
static inline void build_obmc_inter_pred_left(
894
    MACROBLOCKD *xd, int rel_mi_row, int rel_mi_col, uint8_t op_mi_size,
895
1.62k
    int dir, MB_MODE_INFO *left_mi, void *fun_ctxt, const int num_planes) {
896
1.62k
  (void)left_mi;
897
1.62k
  (void)rel_mi_col;
898
1.62k
  (void)dir;
899
1.62k
  struct obmc_inter_pred_ctxt *ctxt = (struct obmc_inter_pred_ctxt *)fun_ctxt;
900
1.62k
  const BLOCK_SIZE bsize = xd->mi[0]->bsize;
901
1.62k
  const int overlap =
902
1.62k
      AOMMIN(block_size_wide[bsize], block_size_wide[BLOCK_64X64]) >> 1;
903
904
6.49k
  for (int plane = 0; plane < num_planes; ++plane) {
905
4.86k
    const struct macroblockd_plane *pd = &xd->plane[plane];
906
4.86k
    const int bw = overlap >> pd->subsampling_x;
907
4.86k
    const int bh = (op_mi_size * MI_SIZE) >> pd->subsampling_y;
908
4.86k
    const int plane_row = (rel_mi_row * MI_SIZE) >> pd->subsampling_y;
909
910
4.86k
    if (av1_skip_u4x4_pred_in_obmc(bsize, pd, 1)) continue;
911
912
4.86k
    const int dst_stride = pd->dst.stride;
913
4.86k
    uint8_t *const dst = &pd->dst.buf[plane_row * dst_stride];
914
4.86k
    const int tmp_stride = ctxt->adjacent_stride[plane];
915
4.86k
    const uint8_t *const tmp = &ctxt->adjacent[plane][plane_row * tmp_stride];
916
4.86k
    const uint8_t *const mask = av1_get_obmc_mask(bw);
917
918
4.86k
#if CONFIG_AV1_HIGHBITDEPTH
919
4.86k
    const int is_hbd = is_cur_buf_hbd(xd);
920
4.86k
    if (is_hbd)
921
2.65k
      aom_highbd_blend_a64_hmask(dst, dst_stride, dst, dst_stride, tmp,
922
2.65k
                                 tmp_stride, mask, bw, bh, xd->bd);
923
2.21k
    else
924
2.21k
      aom_blend_a64_hmask(dst, dst_stride, dst, dst_stride, tmp, tmp_stride,
925
2.21k
                          mask, bw, bh);
926
#else
927
    aom_blend_a64_hmask(dst, dst_stride, dst, dst_stride, tmp, tmp_stride, mask,
928
                        bw, bh);
929
#endif
930
4.86k
  }
931
1.62k
}
932
933
// This function combines motion compensated predictions that are generated by
934
// top/left neighboring blocks' inter predictors with the regular inter
935
// prediction. We assume the original prediction (bmc) is stored in
936
// xd->plane[].dst.buf
937
void av1_build_obmc_inter_prediction(const AV1_COMMON *cm, MACROBLOCKD *xd,
938
                                     uint8_t *above[MAX_MB_PLANE],
939
                                     int above_stride[MAX_MB_PLANE],
940
                                     uint8_t *left[MAX_MB_PLANE],
941
2.12k
                                     int left_stride[MAX_MB_PLANE]) {
942
2.12k
  const BLOCK_SIZE bsize = xd->mi[0]->bsize;
943
944
  // handle above row
945
2.12k
  struct obmc_inter_pred_ctxt ctxt_above = { above, above_stride };
946
2.12k
  foreach_overlappable_nb_above(cm, xd,
947
2.12k
                                max_neighbor_obmc[mi_size_wide_log2[bsize]],
948
2.12k
                                build_obmc_inter_pred_above, &ctxt_above);
949
950
  // handle left column
951
2.12k
  struct obmc_inter_pred_ctxt ctxt_left = { left, left_stride };
952
2.12k
  foreach_overlappable_nb_left(cm, xd,
953
2.12k
                               max_neighbor_obmc[mi_size_high_log2[bsize]],
954
2.12k
                               build_obmc_inter_pred_left, &ctxt_left);
955
2.12k
}
956
957
void av1_setup_obmc_dst_bufs(MACROBLOCKD *xd, uint8_t **dst_buf1,
958
2.12k
                             uint8_t **dst_buf2) {
959
2.12k
  if (is_cur_buf_hbd(xd)) {
960
1.25k
    int len = sizeof(uint16_t);
961
1.25k
    dst_buf1[0] = CONVERT_TO_BYTEPTR(xd->tmp_obmc_bufs[0]);
962
1.25k
    dst_buf1[1] =
963
1.25k
        CONVERT_TO_BYTEPTR(xd->tmp_obmc_bufs[0] + MAX_SB_SQUARE * len);
964
1.25k
    dst_buf1[2] =
965
1.25k
        CONVERT_TO_BYTEPTR(xd->tmp_obmc_bufs[0] + MAX_SB_SQUARE * 2 * len);
966
1.25k
    dst_buf2[0] = CONVERT_TO_BYTEPTR(xd->tmp_obmc_bufs[1]);
967
1.25k
    dst_buf2[1] =
968
1.25k
        CONVERT_TO_BYTEPTR(xd->tmp_obmc_bufs[1] + MAX_SB_SQUARE * len);
969
1.25k
    dst_buf2[2] =
970
1.25k
        CONVERT_TO_BYTEPTR(xd->tmp_obmc_bufs[1] + MAX_SB_SQUARE * 2 * len);
971
1.25k
  } else {
972
867
    dst_buf1[0] = xd->tmp_obmc_bufs[0];
973
867
    dst_buf1[1] = xd->tmp_obmc_bufs[0] + MAX_SB_SQUARE;
974
867
    dst_buf1[2] = xd->tmp_obmc_bufs[0] + MAX_SB_SQUARE * 2;
975
867
    dst_buf2[0] = xd->tmp_obmc_bufs[1];
976
867
    dst_buf2[1] = xd->tmp_obmc_bufs[1] + MAX_SB_SQUARE;
977
867
    dst_buf2[2] = xd->tmp_obmc_bufs[1] + MAX_SB_SQUARE * 2;
978
867
  }
979
2.12k
}
980
981
#if CONFIG_AV1_DECODER
982
void av1_setup_build_prediction_by_above_pred(
983
    MACROBLOCKD *xd, int rel_mi_col, uint8_t above_mi_width,
984
    MB_MODE_INFO *above_mbmi, struct build_prediction_ctxt *ctxt,
985
811
    const int num_planes) {
986
811
  const BLOCK_SIZE a_bsize = AOMMAX(BLOCK_8X8, above_mbmi->bsize);
987
811
  const int above_mi_col = xd->mi_col + rel_mi_col;
988
989
811
  modify_neighbor_predictor_for_obmc(above_mbmi);
990
991
3.24k
  for (int j = 0; j < num_planes; ++j) {
992
2.43k
    struct macroblockd_plane *const pd = &xd->plane[j];
993
2.43k
    setup_pred_plane(&pd->dst, a_bsize, ctxt->tmp_buf[j], ctxt->tmp_width[j],
994
2.43k
                     ctxt->tmp_height[j], ctxt->tmp_stride[j], 0, rel_mi_col,
995
2.43k
                     NULL, pd->subsampling_x, pd->subsampling_y);
996
2.43k
  }
997
998
811
  const int num_refs = 1 + has_second_ref(above_mbmi);
999
1000
1.62k
  for (int ref = 0; ref < num_refs; ++ref) {
1001
811
    const MV_REFERENCE_FRAME frame = above_mbmi->ref_frame[ref];
1002
1003
811
    const RefCntBuffer *const ref_buf = get_ref_frame_buf(ctxt->cm, frame);
1004
811
    const struct scale_factors *const sf =
1005
811
        get_ref_scale_factors_const(ctxt->cm, frame);
1006
811
    xd->block_ref_scale_factors[ref] = sf;
1007
811
    if ((!av1_is_valid_scale(sf)))
1008
0
      aom_internal_error(xd->error_info, AOM_CODEC_UNSUP_BITSTREAM,
1009
0
                         "Reference frame has invalid dimensions");
1010
811
    av1_setup_pre_planes(xd, ref, &ref_buf->buf, xd->mi_row, above_mi_col, sf,
1011
811
                         num_planes);
1012
811
  }
1013
1014
811
  xd->mb_to_left_edge = 8 * MI_SIZE * (-above_mi_col);
1015
811
  xd->mb_to_right_edge =
1016
811
      ctxt->mb_to_far_edge +
1017
811
      (xd->width - rel_mi_col - above_mi_width) * MI_SIZE * 8;
1018
811
}
1019
1020
void av1_setup_build_prediction_by_left_pred(MACROBLOCKD *xd, int rel_mi_row,
1021
                                             uint8_t left_mi_height,
1022
                                             MB_MODE_INFO *left_mbmi,
1023
                                             struct build_prediction_ctxt *ctxt,
1024
1.62k
                                             const int num_planes) {
1025
1.62k
  const BLOCK_SIZE l_bsize = AOMMAX(BLOCK_8X8, left_mbmi->bsize);
1026
1.62k
  const int left_mi_row = xd->mi_row + rel_mi_row;
1027
1028
1.62k
  modify_neighbor_predictor_for_obmc(left_mbmi);
1029
1030
6.49k
  for (int j = 0; j < num_planes; ++j) {
1031
4.86k
    struct macroblockd_plane *const pd = &xd->plane[j];
1032
4.86k
    setup_pred_plane(&pd->dst, l_bsize, ctxt->tmp_buf[j], ctxt->tmp_width[j],
1033
4.86k
                     ctxt->tmp_height[j], ctxt->tmp_stride[j], rel_mi_row, 0,
1034
4.86k
                     NULL, pd->subsampling_x, pd->subsampling_y);
1035
4.86k
  }
1036
1037
1.62k
  const int num_refs = 1 + has_second_ref(left_mbmi);
1038
1039
3.24k
  for (int ref = 0; ref < num_refs; ++ref) {
1040
1.62k
    const MV_REFERENCE_FRAME frame = left_mbmi->ref_frame[ref];
1041
1042
1.62k
    const RefCntBuffer *const ref_buf = get_ref_frame_buf(ctxt->cm, frame);
1043
1.62k
    const struct scale_factors *const ref_scale_factors =
1044
1.62k
        get_ref_scale_factors_const(ctxt->cm, frame);
1045
1046
1.62k
    xd->block_ref_scale_factors[ref] = ref_scale_factors;
1047
1.62k
    if ((!av1_is_valid_scale(ref_scale_factors)))
1048
0
      aom_internal_error(xd->error_info, AOM_CODEC_UNSUP_BITSTREAM,
1049
0
                         "Reference frame has invalid dimensions");
1050
1.62k
    av1_setup_pre_planes(xd, ref, &ref_buf->buf, left_mi_row, xd->mi_col,
1051
1.62k
                         ref_scale_factors, num_planes);
1052
1.62k
  }
1053
1054
1.62k
  xd->mb_to_top_edge = GET_MV_SUBPEL(MI_SIZE * (-left_mi_row));
1055
1.62k
  xd->mb_to_bottom_edge =
1056
1.62k
      ctxt->mb_to_far_edge +
1057
1.62k
      GET_MV_SUBPEL((xd->height - rel_mi_row - left_mi_height) * MI_SIZE);
1058
1.62k
}
1059
#endif  // CONFIG_AV1_DECODER
1060
1061
static inline void combine_interintra(
1062
    INTERINTRA_MODE mode, int8_t use_wedge_interintra, int8_t wedge_index,
1063
    int8_t wedge_sign, BLOCK_SIZE bsize, BLOCK_SIZE plane_bsize,
1064
    uint8_t *comppred, int compstride, const uint8_t *interpred,
1065
916
    int interstride, const uint8_t *intrapred, int intrastride) {
1066
916
  const int bw = block_size_wide[plane_bsize];
1067
916
  const int bh = block_size_high[plane_bsize];
1068
1069
916
  if (use_wedge_interintra) {
1070
423
    if (av1_is_wedge_used(bsize)) {
1071
423
      const uint8_t *mask =
1072
423
          av1_get_contiguous_soft_mask(wedge_index, wedge_sign, bsize);
1073
423
      const int subw = 2 * mi_size_wide[bsize] == bw;
1074
423
      const int subh = 2 * mi_size_high[bsize] == bh;
1075
423
      aom_blend_a64_mask(comppred, compstride, intrapred, intrastride,
1076
423
                         interpred, interstride, mask, block_size_wide[bsize],
1077
423
                         bw, bh, subw, subh);
1078
423
    }
1079
423
    return;
1080
423
  }
1081
1082
493
  const uint8_t *mask = smooth_interintra_mask_buf[mode][plane_bsize];
1083
493
  aom_blend_a64_mask(comppred, compstride, intrapred, intrastride, interpred,
1084
493
                     interstride, mask, bw, bw, bh, 0, 0);
1085
493
}
1086
1087
#if CONFIG_AV1_HIGHBITDEPTH
1088
static inline void combine_interintra_highbd(
1089
    INTERINTRA_MODE mode, int8_t use_wedge_interintra, int8_t wedge_index,
1090
    int8_t wedge_sign, BLOCK_SIZE bsize, BLOCK_SIZE plane_bsize,
1091
    uint8_t *comppred8, int compstride, const uint8_t *interpred8,
1092
1.98k
    int interstride, const uint8_t *intrapred8, int intrastride, int bd) {
1093
1.98k
  const int bw = block_size_wide[plane_bsize];
1094
1.98k
  const int bh = block_size_high[plane_bsize];
1095
1096
1.98k
  if (use_wedge_interintra) {
1097
741
    if (av1_is_wedge_used(bsize)) {
1098
741
      const uint8_t *mask =
1099
741
          av1_get_contiguous_soft_mask(wedge_index, wedge_sign, bsize);
1100
741
      const int subh = 2 * mi_size_high[bsize] == bh;
1101
741
      const int subw = 2 * mi_size_wide[bsize] == bw;
1102
741
      aom_highbd_blend_a64_mask(comppred8, compstride, intrapred8, intrastride,
1103
741
                                interpred8, interstride, mask,
1104
741
                                block_size_wide[bsize], bw, bh, subw, subh, bd);
1105
741
    }
1106
741
    return;
1107
741
  }
1108
1109
1.24k
  uint8_t mask[MAX_SB_SQUARE];
1110
1.24k
  build_smooth_interintra_mask(mask, bw, plane_bsize, mode);
1111
1.24k
  aom_highbd_blend_a64_mask(comppred8, compstride, intrapred8, intrastride,
1112
1.24k
                            interpred8, interstride, mask, bw, bw, bh, 0, 0,
1113
1.24k
                            bd);
1114
1.24k
}
1115
#endif
1116
1117
void av1_build_intra_predictors_for_interintra(const AV1_COMMON *cm,
1118
                                               MACROBLOCKD *xd,
1119
                                               BLOCK_SIZE bsize, int plane,
1120
                                               const BUFFER_SET *ctx,
1121
2.89k
                                               uint8_t *dst, int dst_stride) {
1122
2.89k
  struct macroblockd_plane *const pd = &xd->plane[plane];
1123
2.89k
  const int ssx = xd->plane[plane].subsampling_x;
1124
2.89k
  const int ssy = xd->plane[plane].subsampling_y;
1125
2.89k
  BLOCK_SIZE plane_bsize = get_plane_block_size(bsize, ssx, ssy);
1126
2.89k
  PREDICTION_MODE mode = interintra_to_intra_mode[xd->mi[0]->interintra_mode];
1127
2.89k
  assert(xd->mi[0]->angle_delta[PLANE_TYPE_Y] == 0);
1128
2.89k
  assert(xd->mi[0]->angle_delta[PLANE_TYPE_UV] == 0);
1129
2.89k
  assert(xd->mi[0]->filter_intra_mode_info.use_filter_intra == 0);
1130
2.89k
  assert(xd->mi[0]->use_intrabc == 0);
1131
2.89k
  const SequenceHeader *seq_params = cm->seq_params;
1132
1133
2.89k
  av1_predict_intra_block(xd, seq_params->sb_size,
1134
2.89k
                          seq_params->enable_intra_edge_filter, pd->width,
1135
2.89k
                          pd->height, max_txsize_rect_lookup[plane_bsize], mode,
1136
2.89k
                          0, 0, FILTER_INTRA_MODES, ctx->plane[plane],
1137
2.89k
                          ctx->stride[plane], dst, dst_stride, 0, 0, plane);
1138
2.89k
}
1139
1140
void av1_combine_interintra(MACROBLOCKD *xd, BLOCK_SIZE bsize, int plane,
1141
                            const uint8_t *inter_pred, int inter_stride,
1142
2.89k
                            const uint8_t *intra_pred, int intra_stride) {
1143
2.89k
  const int ssx = xd->plane[plane].subsampling_x;
1144
2.89k
  const int ssy = xd->plane[plane].subsampling_y;
1145
2.89k
  const BLOCK_SIZE plane_bsize = get_plane_block_size(bsize, ssx, ssy);
1146
2.89k
#if CONFIG_AV1_HIGHBITDEPTH
1147
2.89k
  if (is_cur_buf_hbd(xd)) {
1148
1.98k
    combine_interintra_highbd(
1149
1.98k
        xd->mi[0]->interintra_mode, xd->mi[0]->use_wedge_interintra,
1150
1.98k
        xd->mi[0]->interintra_wedge_index, INTERINTRA_WEDGE_SIGN, bsize,
1151
1.98k
        plane_bsize, xd->plane[plane].dst.buf, xd->plane[plane].dst.stride,
1152
1.98k
        inter_pred, inter_stride, intra_pred, intra_stride, xd->bd);
1153
1.98k
    return;
1154
1.98k
  }
1155
916
#endif
1156
916
  combine_interintra(
1157
916
      xd->mi[0]->interintra_mode, xd->mi[0]->use_wedge_interintra,
1158
916
      xd->mi[0]->interintra_wedge_index, INTERINTRA_WEDGE_SIGN, bsize,
1159
916
      plane_bsize, xd->plane[plane].dst.buf, xd->plane[plane].dst.stride,
1160
916
      inter_pred, inter_stride, intra_pred, intra_stride);
1161
916
}
1162
1163
// build interintra_predictors for one plane
1164
void av1_build_interintra_predictor(const AV1_COMMON *cm, MACROBLOCKD *xd,
1165
                                    uint8_t *pred, int stride,
1166
                                    const BUFFER_SET *ctx, int plane,
1167
2.89k
                                    BLOCK_SIZE bsize) {
1168
2.89k
  assert(bsize < BLOCK_SIZES_ALL);
1169
2.89k
  if (is_cur_buf_hbd(xd)) {
1170
1.98k
    DECLARE_ALIGNED(16, uint16_t, intrapredictor[MAX_SB_SQUARE]);
1171
1.98k
    av1_build_intra_predictors_for_interintra(
1172
1.98k
        cm, xd, bsize, plane, ctx, CONVERT_TO_BYTEPTR(intrapredictor),
1173
1.98k
        MAX_SB_SIZE);
1174
1.98k
    av1_combine_interintra(xd, bsize, plane, pred, stride,
1175
1.98k
                           CONVERT_TO_BYTEPTR(intrapredictor), MAX_SB_SIZE);
1176
1.98k
  } else {
1177
916
    DECLARE_ALIGNED(16, uint8_t, intrapredictor[MAX_SB_SQUARE]);
1178
916
    av1_build_intra_predictors_for_interintra(cm, xd, bsize, plane, ctx,
1179
916
                                              intrapredictor, MAX_SB_SIZE);
1180
916
    av1_combine_interintra(xd, bsize, plane, pred, stride, intrapredictor,
1181
916
                           MAX_SB_SIZE);
1182
916
  }
1183
2.89k
}