Coverage Report

Created: 2025-12-31 06:49

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/aom/av1/common/reconinter.c
Line
Count
Source
1
/*
2
 * Copyright (c) 2016, Alliance for Open Media. All rights reserved.
3
 *
4
 * This source code is subject to the terms of the BSD 2 Clause License and
5
 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6
 * was not distributed with this source code in the LICENSE file, you can
7
 * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8
 * Media Patent License 1.0 was not distributed with this source code in the
9
 * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
10
 */
11
12
#include <assert.h>
13
#include <stdio.h>
14
#include <limits.h>
15
16
#include "config/aom_config.h"
17
#include "config/aom_dsp_rtcd.h"
18
#include "config/aom_scale_rtcd.h"
19
20
#include "aom/aom_integer.h"
21
#include "aom_dsp/blend.h"
22
#include "aom_ports/aom_once.h"
23
24
#include "av1/common/av1_common_int.h"
25
#include "av1/common/blockd.h"
26
#include "av1/common/mvref_common.h"
27
#include "av1/common/obmc.h"
28
#include "av1/common/reconinter.h"
29
#include "av1/common/reconintra.h"
30
31
// This function will determine whether or not to create a warped
32
// prediction.
33
static int allow_warp(const MB_MODE_INFO *const mbmi,
34
                      const WarpTypesAllowed *const warp_types,
35
                      const WarpedMotionParams *const gm_params,
36
                      int build_for_obmc, const struct scale_factors *const sf,
37
3.48M
                      WarpedMotionParams *final_warp_params) {
38
  // Note: As per the spec, we must test the fixed point scales here, which are
39
  // at a higher precision (1 << 14) than the xs and ys in subpel_params (that
40
  // have 1 << 10 precision).
41
3.48M
  if (av1_is_scaled(sf)) return 0;
42
43
3.21M
  if (final_warp_params != NULL) *final_warp_params = default_warp_params;
44
45
3.21M
  if (build_for_obmc) return 0;
46
47
3.21M
  if (warp_types->local_warp_allowed && !mbmi->wm_params.invalid) {
48
281k
    if (final_warp_params != NULL) *final_warp_params = mbmi->wm_params;
49
281k
    return 1;
50
2.93M
  } else if (warp_types->global_warp_allowed && !gm_params->invalid) {
51
45.9k
    if (final_warp_params != NULL) *final_warp_params = *gm_params;
52
45.9k
    return 1;
53
45.9k
  }
54
55
2.89M
  return 0;
56
3.21M
}
57
58
void av1_init_warp_params(InterPredParams *inter_pred_params,
59
                          const WarpTypesAllowed *warp_types, int ref,
60
7.11M
                          const MACROBLOCKD *xd, const MB_MODE_INFO *mi) {
61
7.11M
  if (inter_pred_params->block_height < 8 || inter_pred_params->block_width < 8)
62
3.57M
    return;
63
64
3.54M
  if (xd->cur_frame_force_integer_mv) return;
65
66
3.47M
  if (allow_warp(mi, warp_types, &xd->global_motion[mi->ref_frame[ref]], 0,
67
3.47M
                 inter_pred_params->scale_factors,
68
3.47M
                 &inter_pred_params->warp_params)) {
69
#if CONFIG_REALTIME_ONLY && !CONFIG_AV1_DECODER
70
    aom_internal_error(xd->error_info, AOM_CODEC_UNSUP_FEATURE,
71
                       "Warped motion is disabled in realtime only build.");
72
#endif  // CONFIG_REALTIME_ONLY && !CONFIG_AV1_DECODER
73
327k
    inter_pred_params->mode = WARP_PRED;
74
327k
  }
75
3.47M
}
76
77
void av1_make_inter_predictor(const uint8_t *src, int src_stride, uint8_t *dst,
78
                              int dst_stride,
79
                              InterPredParams *inter_pred_params,
80
10.6M
                              const SubpelParams *subpel_params) {
81
10.6M
  assert(IMPLIES(inter_pred_params->conv_params.is_compound,
82
10.6M
                 inter_pred_params->conv_params.dst != NULL));
83
84
10.6M
  if (inter_pred_params->mode == TRANSLATION_PRED) {
85
10.2M
#if CONFIG_AV1_HIGHBITDEPTH
86
10.2M
    if (inter_pred_params->use_hbd_buf) {
87
6.01M
      highbd_inter_predictor(src, src_stride, dst, dst_stride, subpel_params,
88
6.01M
                             inter_pred_params->block_width,
89
6.01M
                             inter_pred_params->block_height,
90
6.01M
                             &inter_pred_params->conv_params,
91
6.01M
                             inter_pred_params->interp_filter_params,
92
6.01M
                             inter_pred_params->bit_depth);
93
6.01M
    } else {
94
4.26M
      inter_predictor(src, src_stride, dst, dst_stride, subpel_params,
95
4.26M
                      inter_pred_params->block_width,
96
4.26M
                      inter_pred_params->block_height,
97
4.26M
                      &inter_pred_params->conv_params,
98
4.26M
                      inter_pred_params->interp_filter_params);
99
4.26M
    }
100
#else
101
    inter_predictor(src, src_stride, dst, dst_stride, subpel_params,
102
                    inter_pred_params->block_width,
103
                    inter_pred_params->block_height,
104
                    &inter_pred_params->conv_params,
105
                    inter_pred_params->interp_filter_params);
106
#endif
107
10.2M
  }
108
325k
#if !CONFIG_REALTIME_ONLY || CONFIG_AV1_DECODER
109
  // TODO(jingning): av1_warp_plane() can be further cleaned up.
110
327k
  else if (inter_pred_params->mode == WARP_PRED) {
111
327k
    av1_warp_plane(
112
327k
        &inter_pred_params->warp_params, inter_pred_params->use_hbd_buf,
113
327k
        inter_pred_params->bit_depth, inter_pred_params->ref_frame_buf.buf0,
114
327k
        inter_pred_params->ref_frame_buf.width,
115
327k
        inter_pred_params->ref_frame_buf.height,
116
327k
        inter_pred_params->ref_frame_buf.stride, dst,
117
327k
        inter_pred_params->pix_col, inter_pred_params->pix_row,
118
327k
        inter_pred_params->block_width, inter_pred_params->block_height,
119
327k
        dst_stride, inter_pred_params->subsampling_x,
120
327k
        inter_pred_params->subsampling_y, &inter_pred_params->conv_params);
121
327k
  }
122
18.4E
#endif  // !CONFIG_REALTIME_ONLY || CONFIG_AV1_DECODER
123
18.4E
  else {
124
18.4E
    assert(0 && "Unsupported inter_pred_params->mode");
125
18.4E
  }
126
10.6M
}
127
128
static const uint8_t wedge_master_oblique_odd[MASK_MASTER_SIZE] = {
129
  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
130
  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  1,  2,  6,  18,
131
  37, 53, 60, 63, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
132
  64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
133
};
134
static const uint8_t wedge_master_oblique_even[MASK_MASTER_SIZE] = {
135
  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
136
  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  1,  4,  11, 27,
137
  46, 58, 62, 63, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
138
  64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
139
};
140
static const uint8_t wedge_master_vertical[MASK_MASTER_SIZE] = {
141
  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
142
  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  2,  7,  21,
143
  43, 57, 62, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
144
  64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
145
};
146
147
static inline void shift_copy(const uint8_t *src, uint8_t *dst, int shift,
148
64
                              int width) {
149
64
  if (shift >= 0) {
150
33
    memcpy(dst + shift, src, width - shift);
151
33
    memset(dst, src[0], shift);
152
33
  } else {
153
31
    shift = -shift;
154
31
    memcpy(dst, src + shift, width - shift);
155
31
    memset(dst + width - shift, src[width - 1], shift);
156
31
  }
157
64
}
158
159
/* clang-format off */
160
DECLARE_ALIGNED(16, static uint8_t,
161
                wedge_signflip_lookup[BLOCK_SIZES_ALL][MAX_WEDGE_TYPES]) = {
162
  { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, },  // not used
163
  { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, },  // not used
164
  { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, },  // not used
165
  { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, },
166
  { 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, },
167
  { 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, },
168
  { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, },
169
  { 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, },
170
  { 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, },
171
  { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, },
172
  { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, },  // not used
173
  { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, },  // not used
174
  { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, },  // not used
175
  { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, },  // not used
176
  { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, },  // not used
177
  { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, },  // not used
178
  { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, },  // not used
179
  { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, },  // not used
180
  { 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 0, 1, 1, 1, 0, 1, },
181
  { 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 0, 1, 0, 1, },
182
  { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, },  // not used
183
  { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, },  // not used
184
};
185
/* clang-format on */
186
187
// [negative][direction]
188
DECLARE_ALIGNED(
189
    16, static uint8_t,
190
    wedge_mask_obl[2][WEDGE_DIRECTIONS][MASK_MASTER_SIZE * MASK_MASTER_SIZE]);
191
192
// 4 * MAX_WEDGE_SQUARE is an easy to compute and fairly tight upper bound
193
// on the sum of all mask sizes up to an including MAX_WEDGE_SQUARE.
194
DECLARE_ALIGNED(16, static uint8_t,
195
                wedge_mask_buf[2 * MAX_WEDGE_TYPES * 4 * MAX_WEDGE_SQUARE]);
196
197
DECLARE_ALIGNED(16, static uint8_t,
198
                smooth_interintra_mask_buf[INTERINTRA_MODES][BLOCK_SIZES_ALL]
199
                                          [MAX_WEDGE_SQUARE]);
200
201
static wedge_masks_type wedge_masks[BLOCK_SIZES_ALL][2];
202
203
static const wedge_code_type wedge_codebook_16_hgtw[16] = {
204
  { WEDGE_OBLIQUE27, 4, 4 },  { WEDGE_OBLIQUE63, 4, 4 },
205
  { WEDGE_OBLIQUE117, 4, 4 }, { WEDGE_OBLIQUE153, 4, 4 },
206
  { WEDGE_HORIZONTAL, 4, 2 }, { WEDGE_HORIZONTAL, 4, 4 },
207
  { WEDGE_HORIZONTAL, 4, 6 }, { WEDGE_VERTICAL, 4, 4 },
208
  { WEDGE_OBLIQUE27, 4, 2 },  { WEDGE_OBLIQUE27, 4, 6 },
209
  { WEDGE_OBLIQUE153, 4, 2 }, { WEDGE_OBLIQUE153, 4, 6 },
210
  { WEDGE_OBLIQUE63, 2, 4 },  { WEDGE_OBLIQUE63, 6, 4 },
211
  { WEDGE_OBLIQUE117, 2, 4 }, { WEDGE_OBLIQUE117, 6, 4 },
212
};
213
214
static const wedge_code_type wedge_codebook_16_hltw[16] = {
215
  { WEDGE_OBLIQUE27, 4, 4 },  { WEDGE_OBLIQUE63, 4, 4 },
216
  { WEDGE_OBLIQUE117, 4, 4 }, { WEDGE_OBLIQUE153, 4, 4 },
217
  { WEDGE_VERTICAL, 2, 4 },   { WEDGE_VERTICAL, 4, 4 },
218
  { WEDGE_VERTICAL, 6, 4 },   { WEDGE_HORIZONTAL, 4, 4 },
219
  { WEDGE_OBLIQUE27, 4, 2 },  { WEDGE_OBLIQUE27, 4, 6 },
220
  { WEDGE_OBLIQUE153, 4, 2 }, { WEDGE_OBLIQUE153, 4, 6 },
221
  { WEDGE_OBLIQUE63, 2, 4 },  { WEDGE_OBLIQUE63, 6, 4 },
222
  { WEDGE_OBLIQUE117, 2, 4 }, { WEDGE_OBLIQUE117, 6, 4 },
223
};
224
225
static const wedge_code_type wedge_codebook_16_heqw[16] = {
226
  { WEDGE_OBLIQUE27, 4, 4 },  { WEDGE_OBLIQUE63, 4, 4 },
227
  { WEDGE_OBLIQUE117, 4, 4 }, { WEDGE_OBLIQUE153, 4, 4 },
228
  { WEDGE_HORIZONTAL, 4, 2 }, { WEDGE_HORIZONTAL, 4, 6 },
229
  { WEDGE_VERTICAL, 2, 4 },   { WEDGE_VERTICAL, 6, 4 },
230
  { WEDGE_OBLIQUE27, 4, 2 },  { WEDGE_OBLIQUE27, 4, 6 },
231
  { WEDGE_OBLIQUE153, 4, 2 }, { WEDGE_OBLIQUE153, 4, 6 },
232
  { WEDGE_OBLIQUE63, 2, 4 },  { WEDGE_OBLIQUE63, 6, 4 },
233
  { WEDGE_OBLIQUE117, 2, 4 }, { WEDGE_OBLIQUE117, 6, 4 },
234
};
235
236
const wedge_params_type av1_wedge_params_lookup[BLOCK_SIZES_ALL] = {
237
  { 0, NULL, NULL, NULL },
238
  { 0, NULL, NULL, NULL },
239
  { 0, NULL, NULL, NULL },
240
  { MAX_WEDGE_TYPES, wedge_codebook_16_heqw, wedge_signflip_lookup[BLOCK_8X8],
241
    wedge_masks[BLOCK_8X8] },
242
  { MAX_WEDGE_TYPES, wedge_codebook_16_hgtw, wedge_signflip_lookup[BLOCK_8X16],
243
    wedge_masks[BLOCK_8X16] },
244
  { MAX_WEDGE_TYPES, wedge_codebook_16_hltw, wedge_signflip_lookup[BLOCK_16X8],
245
    wedge_masks[BLOCK_16X8] },
246
  { MAX_WEDGE_TYPES, wedge_codebook_16_heqw, wedge_signflip_lookup[BLOCK_16X16],
247
    wedge_masks[BLOCK_16X16] },
248
  { MAX_WEDGE_TYPES, wedge_codebook_16_hgtw, wedge_signflip_lookup[BLOCK_16X32],
249
    wedge_masks[BLOCK_16X32] },
250
  { MAX_WEDGE_TYPES, wedge_codebook_16_hltw, wedge_signflip_lookup[BLOCK_32X16],
251
    wedge_masks[BLOCK_32X16] },
252
  { MAX_WEDGE_TYPES, wedge_codebook_16_heqw, wedge_signflip_lookup[BLOCK_32X32],
253
    wedge_masks[BLOCK_32X32] },
254
  { 0, NULL, NULL, NULL },
255
  { 0, NULL, NULL, NULL },
256
  { 0, NULL, NULL, NULL },
257
  { 0, NULL, NULL, NULL },
258
  { 0, NULL, NULL, NULL },
259
  { 0, NULL, NULL, NULL },
260
  { 0, NULL, NULL, NULL },
261
  { 0, NULL, NULL, NULL },
262
  { MAX_WEDGE_TYPES, wedge_codebook_16_hgtw, wedge_signflip_lookup[BLOCK_8X32],
263
    wedge_masks[BLOCK_8X32] },
264
  { MAX_WEDGE_TYPES, wedge_codebook_16_hltw, wedge_signflip_lookup[BLOCK_32X8],
265
    wedge_masks[BLOCK_32X8] },
266
  { 0, NULL, NULL, NULL },
267
  { 0, NULL, NULL, NULL },
268
};
269
270
static const uint8_t *get_wedge_mask_inplace(int wedge_index, int neg,
271
288
                                             BLOCK_SIZE sb_type) {
272
288
  const uint8_t *master;
273
288
  const int bh = block_size_high[sb_type];
274
288
  const int bw = block_size_wide[sb_type];
275
288
  const wedge_code_type *a =
276
288
      av1_wedge_params_lookup[sb_type].codebook + wedge_index;
277
288
  int woff, hoff;
278
288
  const uint8_t wsignflip =
279
288
      av1_wedge_params_lookup[sb_type].signflip[wedge_index];
280
281
288
  assert(wedge_index >= 0 && wedge_index < get_wedge_types_lookup(sb_type));
282
288
  woff = (a->x_offset * bw) >> 3;
283
288
  hoff = (a->y_offset * bh) >> 3;
284
288
  master = wedge_mask_obl[neg ^ wsignflip][a->direction] +
285
288
           MASK_MASTER_STRIDE * (MASK_MASTER_SIZE / 2 - hoff) +
286
288
           MASK_MASTER_SIZE / 2 - woff;
287
288
  return master;
288
288
}
289
290
const uint8_t *av1_get_compound_type_mask(
291
185k
    const INTERINTER_COMPOUND_DATA *const comp_data, BLOCK_SIZE sb_type) {
292
185k
  (void)sb_type;
293
185k
  switch (comp_data->type) {
294
70.6k
    case COMPOUND_WEDGE:
295
70.6k
      return av1_get_contiguous_soft_mask(comp_data->wedge_index,
296
70.6k
                                          comp_data->wedge_sign, sb_type);
297
114k
    default: return comp_data->seg_mask;
298
185k
  }
299
185k
}
300
301
static inline void diffwtd_mask_d16(uint8_t *mask, int which_inverse,
302
                                    int mask_base, const CONV_BUF_TYPE *src0,
303
                                    int src0_stride, const CONV_BUF_TYPE *src1,
304
                                    int src1_stride, int h, int w,
305
0
                                    ConvolveParams *conv_params, int bd) {
306
0
  int round =
307
0
      2 * FILTER_BITS - conv_params->round_0 - conv_params->round_1 + (bd - 8);
308
0
  int i, j, m, diff;
309
0
  for (i = 0; i < h; ++i) {
310
0
    for (j = 0; j < w; ++j) {
311
0
      diff = abs(src0[i * src0_stride + j] - src1[i * src1_stride + j]);
312
0
      diff = ROUND_POWER_OF_TWO(diff, round);
313
0
      m = clamp(mask_base + (diff / DIFF_FACTOR), 0, AOM_BLEND_A64_MAX_ALPHA);
314
0
      mask[i * w + j] = which_inverse ? AOM_BLEND_A64_MAX_ALPHA - m : m;
315
0
    }
316
0
  }
317
0
}
318
319
void av1_build_compound_diffwtd_mask_d16_c(
320
    uint8_t *mask, DIFFWTD_MASK_TYPE mask_type, const CONV_BUF_TYPE *src0,
321
    int src0_stride, const CONV_BUF_TYPE *src1, int src1_stride, int h, int w,
322
0
    ConvolveParams *conv_params, int bd) {
323
0
  switch (mask_type) {
324
0
    case DIFFWTD_38:
325
0
      diffwtd_mask_d16(mask, 0, 38, src0, src0_stride, src1, src1_stride, h, w,
326
0
                       conv_params, bd);
327
0
      break;
328
0
    case DIFFWTD_38_INV:
329
0
      diffwtd_mask_d16(mask, 1, 38, src0, src0_stride, src1, src1_stride, h, w,
330
0
                       conv_params, bd);
331
0
      break;
332
0
    default: assert(0);
333
0
  }
334
0
}
335
336
static inline void diffwtd_mask(uint8_t *mask, int which_inverse, int mask_base,
337
                                const uint8_t *src0, int src0_stride,
338
                                const uint8_t *src1, int src1_stride, int h,
339
0
                                int w) {
340
0
  int i, j, m, diff;
341
0
  for (i = 0; i < h; ++i) {
342
0
    for (j = 0; j < w; ++j) {
343
0
      diff =
344
0
          abs((int)src0[i * src0_stride + j] - (int)src1[i * src1_stride + j]);
345
0
      m = clamp(mask_base + (diff / DIFF_FACTOR), 0, AOM_BLEND_A64_MAX_ALPHA);
346
0
      mask[i * w + j] = which_inverse ? AOM_BLEND_A64_MAX_ALPHA - m : m;
347
0
    }
348
0
  }
349
0
}
350
351
void av1_build_compound_diffwtd_mask_c(uint8_t *mask,
352
                                       DIFFWTD_MASK_TYPE mask_type,
353
                                       const uint8_t *src0, int src0_stride,
354
                                       const uint8_t *src1, int src1_stride,
355
0
                                       int h, int w) {
356
0
  switch (mask_type) {
357
0
    case DIFFWTD_38:
358
0
      diffwtd_mask(mask, 0, 38, src0, src0_stride, src1, src1_stride, h, w);
359
0
      break;
360
0
    case DIFFWTD_38_INV:
361
0
      diffwtd_mask(mask, 1, 38, src0, src0_stride, src1, src1_stride, h, w);
362
0
      break;
363
0
    default: assert(0);
364
0
  }
365
0
}
366
367
#if CONFIG_AV1_HIGHBITDEPTH
368
static AOM_FORCE_INLINE void diffwtd_mask_highbd(
369
    uint8_t *mask, int which_inverse, int mask_base, const uint16_t *src0,
370
    int src0_stride, const uint16_t *src1, int src1_stride, int h, int w,
371
0
    const unsigned int bd) {
372
0
  assert(bd >= 8);
373
0
  if (bd == 8) {
374
0
    if (which_inverse) {
375
0
      for (int i = 0; i < h; ++i) {
376
0
        for (int j = 0; j < w; ++j) {
377
0
          int diff = abs((int)src0[j] - (int)src1[j]) / DIFF_FACTOR;
378
0
          unsigned int m = negative_to_zero(mask_base + diff);
379
0
          m = AOMMIN(m, AOM_BLEND_A64_MAX_ALPHA);
380
0
          mask[j] = AOM_BLEND_A64_MAX_ALPHA - m;
381
0
        }
382
0
        src0 += src0_stride;
383
0
        src1 += src1_stride;
384
0
        mask += w;
385
0
      }
386
0
    } else {
387
0
      for (int i = 0; i < h; ++i) {
388
0
        for (int j = 0; j < w; ++j) {
389
0
          int diff = abs((int)src0[j] - (int)src1[j]) / DIFF_FACTOR;
390
0
          unsigned int m = negative_to_zero(mask_base + diff);
391
0
          m = AOMMIN(m, AOM_BLEND_A64_MAX_ALPHA);
392
0
          mask[j] = m;
393
0
        }
394
0
        src0 += src0_stride;
395
0
        src1 += src1_stride;
396
0
        mask += w;
397
0
      }
398
0
    }
399
0
  } else {
400
0
    const unsigned int bd_shift = bd - 8;
401
0
    if (which_inverse) {
402
0
      for (int i = 0; i < h; ++i) {
403
0
        for (int j = 0; j < w; ++j) {
404
0
          int diff =
405
0
              (abs((int)src0[j] - (int)src1[j]) >> bd_shift) / DIFF_FACTOR;
406
0
          unsigned int m = negative_to_zero(mask_base + diff);
407
0
          m = AOMMIN(m, AOM_BLEND_A64_MAX_ALPHA);
408
0
          mask[j] = AOM_BLEND_A64_MAX_ALPHA - m;
409
0
        }
410
0
        src0 += src0_stride;
411
0
        src1 += src1_stride;
412
0
        mask += w;
413
0
      }
414
0
    } else {
415
0
      for (int i = 0; i < h; ++i) {
416
0
        for (int j = 0; j < w; ++j) {
417
0
          int diff =
418
0
              (abs((int)src0[j] - (int)src1[j]) >> bd_shift) / DIFF_FACTOR;
419
0
          unsigned int m = negative_to_zero(mask_base + diff);
420
0
          m = AOMMIN(m, AOM_BLEND_A64_MAX_ALPHA);
421
0
          mask[j] = m;
422
0
        }
423
0
        src0 += src0_stride;
424
0
        src1 += src1_stride;
425
0
        mask += w;
426
0
      }
427
0
    }
428
0
  }
429
0
}
430
431
void av1_build_compound_diffwtd_mask_highbd_c(
432
    uint8_t *mask, DIFFWTD_MASK_TYPE mask_type, const uint8_t *src0,
433
    int src0_stride, const uint8_t *src1, int src1_stride, int h, int w,
434
0
    int bd) {
435
0
  switch (mask_type) {
436
0
    case DIFFWTD_38:
437
0
      diffwtd_mask_highbd(mask, 0, 38, CONVERT_TO_SHORTPTR(src0), src0_stride,
438
0
                          CONVERT_TO_SHORTPTR(src1), src1_stride, h, w, bd);
439
0
      break;
440
0
    case DIFFWTD_38_INV:
441
0
      diffwtd_mask_highbd(mask, 1, 38, CONVERT_TO_SHORTPTR(src0), src0_stride,
442
0
                          CONVERT_TO_SHORTPTR(src1), src1_stride, h, w, bd);
443
0
      break;
444
0
    default: assert(0);
445
0
  }
446
0
}
447
#endif  // CONFIG_AV1_HIGHBITDEPTH
448
449
1
static inline void init_wedge_master_masks(void) {
450
1
  int i, j;
451
1
  const int w = MASK_MASTER_SIZE;
452
1
  const int h = MASK_MASTER_SIZE;
453
1
  const int stride = MASK_MASTER_STRIDE;
454
  // Note: index [0] stores the masters, and [1] its complement.
455
  // Generate prototype by shifting the masters
456
1
  int shift = h / 4;
457
33
  for (i = 0; i < h; i += 2) {
458
32
    shift_copy(wedge_master_oblique_even,
459
32
               &wedge_mask_obl[0][WEDGE_OBLIQUE63][i * stride], shift,
460
32
               MASK_MASTER_SIZE);
461
32
    shift--;
462
32
    shift_copy(wedge_master_oblique_odd,
463
32
               &wedge_mask_obl[0][WEDGE_OBLIQUE63][(i + 1) * stride], shift,
464
32
               MASK_MASTER_SIZE);
465
32
    memcpy(&wedge_mask_obl[0][WEDGE_VERTICAL][i * stride],
466
32
           wedge_master_vertical,
467
32
           MASK_MASTER_SIZE * sizeof(wedge_master_vertical[0]));
468
32
    memcpy(&wedge_mask_obl[0][WEDGE_VERTICAL][(i + 1) * stride],
469
32
           wedge_master_vertical,
470
32
           MASK_MASTER_SIZE * sizeof(wedge_master_vertical[0]));
471
32
  }
472
473
65
  for (i = 0; i < h; ++i) {
474
4.16k
    for (j = 0; j < w; ++j) {
475
4.09k
      const int msk = wedge_mask_obl[0][WEDGE_OBLIQUE63][i * stride + j];
476
4.09k
      wedge_mask_obl[0][WEDGE_OBLIQUE27][j * stride + i] = msk;
477
4.09k
      wedge_mask_obl[0][WEDGE_OBLIQUE117][i * stride + w - 1 - j] =
478
4.09k
          wedge_mask_obl[0][WEDGE_OBLIQUE153][(w - 1 - j) * stride + i] =
479
4.09k
              (1 << WEDGE_WEIGHT_BITS) - msk;
480
4.09k
      wedge_mask_obl[1][WEDGE_OBLIQUE63][i * stride + j] =
481
4.09k
          wedge_mask_obl[1][WEDGE_OBLIQUE27][j * stride + i] =
482
4.09k
              (1 << WEDGE_WEIGHT_BITS) - msk;
483
4.09k
      wedge_mask_obl[1][WEDGE_OBLIQUE117][i * stride + w - 1 - j] =
484
4.09k
          wedge_mask_obl[1][WEDGE_OBLIQUE153][(w - 1 - j) * stride + i] = msk;
485
4.09k
      const int mskx = wedge_mask_obl[0][WEDGE_VERTICAL][i * stride + j];
486
4.09k
      wedge_mask_obl[0][WEDGE_HORIZONTAL][j * stride + i] = mskx;
487
4.09k
      wedge_mask_obl[1][WEDGE_VERTICAL][i * stride + j] =
488
4.09k
          wedge_mask_obl[1][WEDGE_HORIZONTAL][j * stride + i] =
489
4.09k
              (1 << WEDGE_WEIGHT_BITS) - mskx;
490
4.09k
    }
491
64
  }
492
1
}
493
494
1
static inline void init_wedge_masks(void) {
495
1
  uint8_t *dst = wedge_mask_buf;
496
1
  BLOCK_SIZE bsize;
497
1
  memset(wedge_masks, 0, sizeof(wedge_masks));
498
23
  for (bsize = BLOCK_4X4; bsize < BLOCK_SIZES_ALL; ++bsize) {
499
22
    const wedge_params_type *wedge_params = &av1_wedge_params_lookup[bsize];
500
22
    const int wtypes = wedge_params->wedge_types;
501
22
    if (wtypes == 0) continue;
502
9
    const uint8_t *mask;
503
9
    const int bw = block_size_wide[bsize];
504
9
    const int bh = block_size_high[bsize];
505
9
    int w;
506
153
    for (w = 0; w < wtypes; ++w) {
507
144
      mask = get_wedge_mask_inplace(w, 0, bsize);
508
144
      aom_convolve_copy(mask, MASK_MASTER_STRIDE, dst, bw /* dst_stride */, bw,
509
144
                        bh);
510
144
      wedge_params->masks[0][w] = dst;
511
144
      dst += bw * bh;
512
513
144
      mask = get_wedge_mask_inplace(w, 1, bsize);
514
144
      aom_convolve_copy(mask, MASK_MASTER_STRIDE, dst, bw /* dst_stride */, bw,
515
144
                        bh);
516
144
      wedge_params->masks[1][w] = dst;
517
144
      dst += bw * bh;
518
144
    }
519
9
    assert(sizeof(wedge_mask_buf) >= (size_t)(dst - wedge_mask_buf));
520
9
  }
521
1
}
522
523
/* clang-format off */
524
static const uint8_t ii_weights1d[MAX_SB_SIZE] = {
525
  60, 58, 56, 54, 52, 50, 48, 47, 45, 44, 42, 41, 39, 38, 37, 35, 34, 33, 32,
526
  31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 22, 21, 20, 19, 19, 18, 18, 17, 16,
527
  16, 15, 15, 14, 14, 13, 13, 12, 12, 12, 11, 11, 10, 10, 10,  9,  9,  9,  8,
528
  8,  8,  8,  7,  7,  7,  7,  6,  6,  6,  6,  6,  5,  5,  5,  5,  5,  4,  4,
529
  4,  4,  4,  4,  4,  4,  3,  3,  3,  3,  3,  3,  3,  3,  3,  2,  2,  2,  2,
530
  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  1,  1,  1,  1,  1,  1,  1,  1,
531
  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1
532
};
533
static uint8_t ii_size_scales[BLOCK_SIZES_ALL] = {
534
    32, 16, 16, 16, 8, 8, 8, 4,
535
    4,  4,  2,  2,  2, 1, 1, 1,
536
    8,  8,  4,  4,  2, 2
537
};
538
/* clang-format on */
539
540
static inline void build_smooth_interintra_mask(uint8_t *mask, int stride,
541
                                                BLOCK_SIZE plane_bsize,
542
245k
                                                INTERINTRA_MODE mode) {
543
245k
  int i, j;
544
245k
  const int bw = block_size_wide[plane_bsize];
545
245k
  const int bh = block_size_high[plane_bsize];
546
245k
  const int size_scale = ii_size_scales[plane_bsize];
547
548
245k
  switch (mode) {
549
49.0k
    case II_V_PRED:
550
562k
      for (i = 0; i < bh; ++i) {
551
513k
        memset(mask, ii_weights1d[i * size_scale], bw * sizeof(mask[0]));
552
513k
        mask += stride;
553
513k
      }
554
49.0k
      break;
555
556
114k
    case II_H_PRED:
557
1.29M
      for (i = 0; i < bh; ++i) {
558
16.0M
        for (j = 0; j < bw; ++j) mask[j] = ii_weights1d[j * size_scale];
559
1.17M
        mask += stride;
560
1.17M
      }
561
114k
      break;
562
563
38.6k
    case II_SMOOTH_PRED:
564
454k
      for (i = 0; i < bh; ++i) {
565
5.73M
        for (j = 0; j < bw; ++j)
566
5.32M
          mask[j] = ii_weights1d[(i < j ? i : j) * size_scale];
567
416k
        mask += stride;
568
416k
      }
569
38.6k
      break;
570
571
43.5k
    case II_DC_PRED:
572
43.5k
    default:
573
504k
      for (i = 0; i < bh; ++i) {
574
461k
        memset(mask, 32, bw * sizeof(mask[0]));
575
461k
        mask += stride;
576
461k
      }
577
43.5k
      break;
578
245k
  }
579
245k
}
580
581
1
static inline void init_smooth_interintra_masks(void) {
582
5
  for (int m = 0; m < INTERINTRA_MODES; ++m) {
583
92
    for (int bs = 0; bs < BLOCK_SIZES_ALL; ++bs) {
584
88
      const int bw = block_size_wide[bs];
585
88
      const int bh = block_size_high[bs];
586
88
      if (bw > MAX_WEDGE_SIZE || bh > MAX_WEDGE_SIZE) continue;
587
56
      build_smooth_interintra_mask(smooth_interintra_mask_buf[m][bs], bw, bs,
588
56
                                   m);
589
56
    }
590
4
  }
591
1
}
592
593
// Equation of line: f(x, y) = a[0]*(x - a[2]*w/8) + a[1]*(y - a[3]*h/8) = 0
594
1
static void init_all_wedge_masks(void) {
595
1
  init_wedge_master_masks();
596
1
  init_wedge_masks();
597
1
  init_smooth_interintra_masks();
598
1
}
599
600
5.81k
void av1_init_wedge_masks(void) { aom_once(init_all_wedge_masks); }
601
602
static inline void build_masked_compound_no_round(
603
    uint8_t *dst, int dst_stride, const CONV_BUF_TYPE *src0, int src0_stride,
604
    const CONV_BUF_TYPE *src1, int src1_stride,
605
    const INTERINTER_COMPOUND_DATA *const comp_data, BLOCK_SIZE sb_type, int h,
606
185k
    int w, InterPredParams *inter_pred_params) {
607
185k
  const int ssy = inter_pred_params->subsampling_y;
608
185k
  const int ssx = inter_pred_params->subsampling_x;
609
185k
  const uint8_t *mask = av1_get_compound_type_mask(comp_data, sb_type);
610
185k
  const int mask_stride = block_size_wide[sb_type];
611
185k
#if CONFIG_AV1_HIGHBITDEPTH
612
185k
  if (inter_pred_params->use_hbd_buf) {
613
124k
    aom_highbd_blend_a64_d16_mask(dst, dst_stride, src0, src0_stride, src1,
614
124k
                                  src1_stride, mask, mask_stride, w, h, ssx,
615
124k
                                  ssy, &inter_pred_params->conv_params,
616
124k
                                  inter_pred_params->bit_depth);
617
124k
  } else {
618
60.9k
    aom_lowbd_blend_a64_d16_mask(dst, dst_stride, src0, src0_stride, src1,
619
60.9k
                                 src1_stride, mask, mask_stride, w, h, ssx, ssy,
620
60.9k
                                 &inter_pred_params->conv_params);
621
60.9k
  }
622
#else
623
  aom_lowbd_blend_a64_d16_mask(dst, dst_stride, src0, src0_stride, src1,
624
                               src1_stride, mask, mask_stride, w, h, ssx, ssy,
625
                               &inter_pred_params->conv_params);
626
#endif
627
185k
}
628
629
void av1_make_masked_inter_predictor(const uint8_t *pre, int pre_stride,
630
                                     uint8_t *dst, int dst_stride,
631
                                     InterPredParams *inter_pred_params,
632
185k
                                     const SubpelParams *subpel_params) {
633
185k
  const INTERINTER_COMPOUND_DATA *comp_data = &inter_pred_params->mask_comp;
634
185k
  BLOCK_SIZE sb_type = inter_pred_params->sb_type;
635
636
  // We're going to call av1_make_inter_predictor to generate a prediction into
637
  // a temporary buffer, then will blend that temporary buffer with that from
638
  // the other reference.
639
185k
  DECLARE_ALIGNED(32, uint8_t, tmp_buf[2 * MAX_SB_SQUARE]);
640
185k
  uint8_t *tmp_dst =
641
185k
      inter_pred_params->use_hbd_buf ? CONVERT_TO_BYTEPTR(tmp_buf) : tmp_buf;
642
643
185k
  const int tmp_buf_stride = MAX_SB_SIZE;
644
185k
  CONV_BUF_TYPE *org_dst = inter_pred_params->conv_params.dst;
645
185k
  int org_dst_stride = inter_pred_params->conv_params.dst_stride;
646
185k
  CONV_BUF_TYPE *tmp_buf16 = (CONV_BUF_TYPE *)tmp_buf;
647
185k
  inter_pred_params->conv_params.dst = tmp_buf16;
648
185k
  inter_pred_params->conv_params.dst_stride = tmp_buf_stride;
649
185k
  assert(inter_pred_params->conv_params.do_average == 0);
650
651
  // This will generate a prediction in tmp_buf for the second reference
652
185k
  av1_make_inter_predictor(pre, pre_stride, tmp_dst, MAX_SB_SIZE,
653
185k
                           inter_pred_params, subpel_params);
654
655
185k
  if (!inter_pred_params->conv_params.plane &&
656
62.8k
      comp_data->type == COMPOUND_DIFFWTD) {
657
38.9k
    av1_build_compound_diffwtd_mask_d16(
658
38.9k
        comp_data->seg_mask, comp_data->mask_type, org_dst, org_dst_stride,
659
38.9k
        tmp_buf16, tmp_buf_stride, inter_pred_params->block_height,
660
38.9k
        inter_pred_params->block_width, &inter_pred_params->conv_params,
661
38.9k
        inter_pred_params->bit_depth);
662
38.9k
  }
663
185k
  build_masked_compound_no_round(
664
185k
      dst, dst_stride, org_dst, org_dst_stride, tmp_buf16, tmp_buf_stride,
665
185k
      comp_data, sb_type, inter_pred_params->block_height,
666
185k
      inter_pred_params->block_width, inter_pred_params);
667
185k
}
668
669
void av1_dist_wtd_comp_weight_assign(const AV1_COMMON *cm,
670
                                     const MB_MODE_INFO *mbmi, int *fwd_offset,
671
                                     int *bck_offset,
672
                                     int *use_dist_wtd_comp_avg,
673
8.90M
                                     int is_compound) {
674
8.90M
  assert(fwd_offset != NULL && bck_offset != NULL);
675
8.90M
  if (!is_compound || mbmi->compound_idx) {
676
8.58M
    *fwd_offset = 8;
677
8.58M
    *bck_offset = 8;
678
8.58M
    *use_dist_wtd_comp_avg = 0;
679
8.58M
    return;
680
8.58M
  }
681
682
324k
  *use_dist_wtd_comp_avg = 1;
683
324k
  const RefCntBuffer *const bck_buf = get_ref_frame_buf(cm, mbmi->ref_frame[0]);
684
324k
  const RefCntBuffer *const fwd_buf = get_ref_frame_buf(cm, mbmi->ref_frame[1]);
685
324k
  const int cur_frame_index = cm->cur_frame->order_hint;
686
324k
  int bck_frame_index = 0, fwd_frame_index = 0;
687
688
325k
  if (bck_buf != NULL) bck_frame_index = bck_buf->order_hint;
689
325k
  if (fwd_buf != NULL) fwd_frame_index = fwd_buf->order_hint;
690
691
324k
  int d0 = clamp(abs(get_relative_dist(&cm->seq_params->order_hint_info,
692
324k
                                       fwd_frame_index, cur_frame_index)),
693
324k
                 0, MAX_FRAME_DISTANCE);
694
324k
  int d1 = clamp(abs(get_relative_dist(&cm->seq_params->order_hint_info,
695
324k
                                       cur_frame_index, bck_frame_index)),
696
324k
                 0, MAX_FRAME_DISTANCE);
697
698
324k
  const int order = d0 <= d1;
699
700
324k
  if (d0 == 0 || d1 == 0) {
701
5.16k
    *fwd_offset = quant_dist_lookup_table[3][order];
702
5.16k
    *bck_offset = quant_dist_lookup_table[3][1 - order];
703
5.16k
    return;
704
5.16k
  }
705
706
319k
  int i;
707
400k
  for (i = 0; i < 3; ++i) {
708
378k
    int c0 = quant_dist_weight[i][order];
709
378k
    int c1 = quant_dist_weight[i][!order];
710
378k
    int d0_c0 = d0 * c0;
711
378k
    int d1_c1 = d1 * c1;
712
378k
    if ((d0 > d1 && d0_c0 < d1_c1) || (d0 <= d1 && d0_c0 > d1_c1)) break;
713
378k
  }
714
715
319k
  *fwd_offset = quant_dist_lookup_table[i][order];
716
319k
  *bck_offset = quant_dist_lookup_table[i][1 - order];
717
319k
}
718
719
void av1_setup_dst_planes(struct macroblockd_plane *planes, BLOCK_SIZE bsize,
720
                          const YV12_BUFFER_CONFIG *src, int mi_row, int mi_col,
721
15.4M
                          const int plane_start, const int plane_end) {
722
  // We use AOMMIN(num_planes, MAX_MB_PLANE) instead of num_planes to quiet
723
  // the static analysis warnings.
724
58.7M
  for (int i = plane_start; i < AOMMIN(plane_end, MAX_MB_PLANE); ++i) {
725
43.2M
    struct macroblockd_plane *const pd = &planes[i];
726
43.2M
    const int is_uv = i > 0;
727
43.2M
    setup_pred_plane(&pd->dst, bsize, src->buffers[i], src->crop_widths[is_uv],
728
43.2M
                     src->crop_heights[is_uv], src->strides[is_uv], mi_row,
729
43.2M
                     mi_col, NULL, pd->subsampling_x, pd->subsampling_y);
730
43.2M
  }
731
15.4M
}
732
733
void av1_setup_pre_planes(MACROBLOCKD *xd, int idx,
734
                          const YV12_BUFFER_CONFIG *src, int mi_row, int mi_col,
735
                          const struct scale_factors *sf,
736
3.80M
                          const int num_planes) {
737
3.80M
  if (src != NULL) {
738
    // We use AOMMIN(num_planes, MAX_MB_PLANE) instead of num_planes to quiet
739
    // the static analysis warnings.
740
15.1M
    for (int i = 0; i < AOMMIN(num_planes, MAX_MB_PLANE); ++i) {
741
11.3M
      struct macroblockd_plane *const pd = &xd->plane[i];
742
11.3M
      const int is_uv = i > 0;
743
11.3M
      setup_pred_plane(&pd->pre[idx], xd->mi[0]->bsize, src->buffers[i],
744
11.3M
                       src->crop_widths[is_uv], src->crop_heights[is_uv],
745
11.3M
                       src->strides[is_uv], mi_row, mi_col, sf,
746
11.3M
                       pd->subsampling_x, pd->subsampling_y);
747
11.3M
    }
748
3.80M
  }
749
3.80M
}
750
751
// obmc_mask_N[overlap_position]
752
static const uint8_t obmc_mask_1[1] = { 64 };
753
DECLARE_ALIGNED(2, static const uint8_t, obmc_mask_2[2]) = { 45, 64 };
754
755
DECLARE_ALIGNED(4, static const uint8_t, obmc_mask_4[4]) = { 39, 50, 59, 64 };
756
757
static const uint8_t obmc_mask_8[8] = { 36, 42, 48, 53, 57, 61, 64, 64 };
758
759
static const uint8_t obmc_mask_16[16] = { 34, 37, 40, 43, 46, 49, 52, 54,
760
                                          56, 58, 60, 61, 64, 64, 64, 64 };
761
762
static const uint8_t obmc_mask_32[32] = { 33, 35, 36, 38, 40, 41, 43, 44,
763
                                          45, 47, 48, 50, 51, 52, 53, 55,
764
                                          56, 57, 58, 59, 60, 60, 61, 62,
765
                                          64, 64, 64, 64, 64, 64, 64, 64 };
766
767
static const uint8_t obmc_mask_64[64] = {
768
  33, 34, 35, 35, 36, 37, 38, 39, 40, 40, 41, 42, 43, 44, 44, 44,
769
  45, 46, 47, 47, 48, 49, 50, 51, 51, 51, 52, 52, 53, 54, 55, 56,
770
  56, 56, 57, 57, 58, 58, 59, 60, 60, 60, 60, 60, 61, 62, 62, 62,
771
  62, 62, 63, 63, 63, 63, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
772
};
773
774
1.80M
const uint8_t *av1_get_obmc_mask(int length) {
775
1.80M
  switch (length) {
776
0
    case 1: return obmc_mask_1;
777
388k
    case 2: return obmc_mask_2;
778
874k
    case 4: return obmc_mask_4;
779
428k
    case 8: return obmc_mask_8;
780
100k
    case 16: return obmc_mask_16;
781
12.9k
    case 32: return obmc_mask_32;
782
0
    case 64: return obmc_mask_64;
783
0
    default: assert(0); return NULL;
784
1.80M
  }
785
1.80M
}
786
787
static inline void increment_int_ptr(MACROBLOCKD *xd, int rel_mi_row,
788
                                     int rel_mi_col, uint8_t op_mi_size,
789
                                     int dir, MB_MODE_INFO *mi, void *fun_ctxt,
790
2.12M
                                     const int num_planes) {
791
2.12M
  (void)xd;
792
2.12M
  (void)rel_mi_row;
793
2.12M
  (void)rel_mi_col;
794
2.12M
  (void)op_mi_size;
795
2.12M
  (void)dir;
796
2.12M
  (void)mi;
797
2.12M
  ++*(uint8_t *)fun_ctxt;
798
2.12M
  (void)num_planes;
799
2.12M
}
800
801
3.05M
void av1_count_overlappable_neighbors(const AV1_COMMON *cm, MACROBLOCKD *xd) {
802
3.05M
  MB_MODE_INFO *mbmi = xd->mi[0];
803
804
3.05M
  mbmi->overlappable_neighbors = 0;
805
806
3.05M
  if (!is_motion_variation_allowed_bsize(mbmi->bsize)) return;
807
808
1.89M
  foreach_overlappable_nb_above(cm, xd, INT_MAX, increment_int_ptr,
809
1.89M
                                &mbmi->overlappable_neighbors);
810
1.89M
  if (mbmi->overlappable_neighbors) return;
811
331k
  foreach_overlappable_nb_left(cm, xd, INT_MAX, increment_int_ptr,
812
331k
                               &mbmi->overlappable_neighbors);
813
331k
}
814
815
// HW does not support < 4x4 prediction. To limit the bandwidth requirement, if
816
// block-size of current plane is smaller than 8x8, always only blend with the
817
// left neighbor(s) (skip blending with the above side).
818
#define DISABLE_CHROMA_U8X8_OBMC 0  // 0: one-sided obmc; 1: disable
819
820
int av1_skip_u4x4_pred_in_obmc(BLOCK_SIZE bsize,
821
4.75M
                               const struct macroblockd_plane *pd, int dir) {
822
4.75M
  assert(is_motion_variation_allowed_bsize(bsize));
823
824
4.75M
  const BLOCK_SIZE bsize_plane =
825
4.75M
      get_plane_block_size(bsize, pd->subsampling_x, pd->subsampling_y);
826
4.75M
  switch (bsize_plane) {
827
#if DISABLE_CHROMA_U8X8_OBMC
828
    case BLOCK_4X4:
829
    case BLOCK_8X4:
830
    case BLOCK_4X8: return 1;
831
#else
832
760k
    case BLOCK_4X4:
833
1.77M
    case BLOCK_8X4:
834
2.26M
    case BLOCK_4X8: return dir == 0;
835
0
#endif
836
2.49M
    default: return 0;
837
4.75M
  }
838
4.75M
}
839
840
#if CONFIG_AV1_DECODER
841
795k
static void modify_neighbor_predictor_for_obmc(MB_MODE_INFO *mbmi) {
842
795k
  mbmi->ref_frame[1] = NONE_FRAME;
843
795k
  mbmi->interinter_comp.type = COMPOUND_AVERAGE;
844
795k
}
845
#endif  // CONFIG_AV1_DECODER
846
847
struct obmc_inter_pred_ctxt {
848
  uint8_t **adjacent;
849
  int *adjacent_stride;
850
};
851
852
static inline void build_obmc_inter_pred_above(
853
    MACROBLOCKD *xd, int rel_mi_row, int rel_mi_col, uint8_t op_mi_size,
854
400k
    int dir, MB_MODE_INFO *above_mi, void *fun_ctxt, const int num_planes) {
855
400k
  (void)above_mi;
856
400k
  (void)rel_mi_row;
857
400k
  (void)dir;
858
400k
  struct obmc_inter_pred_ctxt *ctxt = (struct obmc_inter_pred_ctxt *)fun_ctxt;
859
400k
  const BLOCK_SIZE bsize = xd->mi[0]->bsize;
860
400k
  const int overlap =
861
400k
      AOMMIN(block_size_high[bsize], block_size_high[BLOCK_64X64]) >> 1;
862
863
1.59M
  for (int plane = 0; plane < num_planes; ++plane) {
864
1.19M
    const struct macroblockd_plane *pd = &xd->plane[plane];
865
1.19M
    const int bw = (op_mi_size * MI_SIZE) >> pd->subsampling_x;
866
1.19M
    const int bh = overlap >> pd->subsampling_y;
867
1.19M
    const int plane_col = (rel_mi_col * MI_SIZE) >> pd->subsampling_x;
868
869
1.19M
    if (av1_skip_u4x4_pred_in_obmc(bsize, pd, 0)) continue;
870
871
623k
    const int dst_stride = pd->dst.stride;
872
623k
    uint8_t *const dst = &pd->dst.buf[plane_col];
873
623k
    const int tmp_stride = ctxt->adjacent_stride[plane];
874
623k
    const uint8_t *const tmp = &ctxt->adjacent[plane][plane_col];
875
623k
    const uint8_t *const mask = av1_get_obmc_mask(bh);
876
623k
#if CONFIG_AV1_HIGHBITDEPTH
877
623k
    const int is_hbd = is_cur_buf_hbd(xd);
878
623k
    if (is_hbd)
879
355k
      aom_highbd_blend_a64_vmask(dst, dst_stride, dst, dst_stride, tmp,
880
355k
                                 tmp_stride, mask, bw, bh, xd->bd);
881
268k
    else
882
268k
      aom_blend_a64_vmask(dst, dst_stride, dst, dst_stride, tmp, tmp_stride,
883
268k
                          mask, bw, bh);
884
#else
885
    aom_blend_a64_vmask(dst, dst_stride, dst, dst_stride, tmp, tmp_stride, mask,
886
                        bw, bh);
887
#endif
888
623k
  }
889
400k
}
890
891
static inline void build_obmc_inter_pred_left(
892
    MACROBLOCKD *xd, int rel_mi_row, int rel_mi_col, uint8_t op_mi_size,
893
395k
    int dir, MB_MODE_INFO *left_mi, void *fun_ctxt, const int num_planes) {
894
395k
  (void)left_mi;
895
395k
  (void)rel_mi_col;
896
395k
  (void)dir;
897
395k
  struct obmc_inter_pred_ctxt *ctxt = (struct obmc_inter_pred_ctxt *)fun_ctxt;
898
395k
  const BLOCK_SIZE bsize = xd->mi[0]->bsize;
899
395k
  const int overlap =
900
395k
      AOMMIN(block_size_wide[bsize], block_size_wide[BLOCK_64X64]) >> 1;
901
902
1.57M
  for (int plane = 0; plane < num_planes; ++plane) {
903
1.18M
    const struct macroblockd_plane *pd = &xd->plane[plane];
904
1.18M
    const int bw = overlap >> pd->subsampling_x;
905
1.18M
    const int bh = (op_mi_size * MI_SIZE) >> pd->subsampling_y;
906
1.18M
    const int plane_row = (rel_mi_row * MI_SIZE) >> pd->subsampling_y;
907
908
1.18M
    if (av1_skip_u4x4_pred_in_obmc(bsize, pd, 1)) continue;
909
910
1.18M
    const int dst_stride = pd->dst.stride;
911
1.18M
    uint8_t *const dst = &pd->dst.buf[plane_row * dst_stride];
912
1.18M
    const int tmp_stride = ctxt->adjacent_stride[plane];
913
1.18M
    const uint8_t *const tmp = &ctxt->adjacent[plane][plane_row * tmp_stride];
914
1.18M
    const uint8_t *const mask = av1_get_obmc_mask(bw);
915
916
1.18M
#if CONFIG_AV1_HIGHBITDEPTH
917
1.18M
    const int is_hbd = is_cur_buf_hbd(xd);
918
1.18M
    if (is_hbd)
919
667k
      aom_highbd_blend_a64_hmask(dst, dst_stride, dst, dst_stride, tmp,
920
667k
                                 tmp_stride, mask, bw, bh, xd->bd);
921
514k
    else
922
514k
      aom_blend_a64_hmask(dst, dst_stride, dst, dst_stride, tmp, tmp_stride,
923
514k
                          mask, bw, bh);
924
#else
925
    aom_blend_a64_hmask(dst, dst_stride, dst, dst_stride, tmp, tmp_stride, mask,
926
                        bw, bh);
927
#endif
928
1.18M
  }
929
395k
}
930
931
// This function combines motion compensated predictions that are generated by
932
// top/left neighboring blocks' inter predictors with the regular inter
933
// prediction. We assume the original prediction (bmc) is stored in
934
// xd->plane[].dst.buf
935
void av1_build_obmc_inter_prediction(const AV1_COMMON *cm, MACROBLOCKD *xd,
936
                                     uint8_t *above[MAX_MB_PLANE],
937
                                     int above_stride[MAX_MB_PLANE],
938
                                     uint8_t *left[MAX_MB_PLANE],
939
413k
                                     int left_stride[MAX_MB_PLANE]) {
940
413k
  const BLOCK_SIZE bsize = xd->mi[0]->bsize;
941
942
  // handle above row
943
413k
  struct obmc_inter_pred_ctxt ctxt_above = { above, above_stride };
944
413k
  foreach_overlappable_nb_above(cm, xd,
945
413k
                                max_neighbor_obmc[mi_size_wide_log2[bsize]],
946
413k
                                build_obmc_inter_pred_above, &ctxt_above);
947
948
  // handle left column
949
413k
  struct obmc_inter_pred_ctxt ctxt_left = { left, left_stride };
950
413k
  foreach_overlappable_nb_left(cm, xd,
951
413k
                               max_neighbor_obmc[mi_size_high_log2[bsize]],
952
413k
                               build_obmc_inter_pred_left, &ctxt_left);
953
413k
}
954
955
void av1_setup_obmc_dst_bufs(MACROBLOCKD *xd, uint8_t **dst_buf1,
956
413k
                             uint8_t **dst_buf2) {
957
413k
  if (is_cur_buf_hbd(xd)) {
958
238k
    int len = sizeof(uint16_t);
959
238k
    dst_buf1[0] = CONVERT_TO_BYTEPTR(xd->tmp_obmc_bufs[0]);
960
238k
    dst_buf1[1] =
961
238k
        CONVERT_TO_BYTEPTR(xd->tmp_obmc_bufs[0] + MAX_SB_SQUARE * len);
962
238k
    dst_buf1[2] =
963
238k
        CONVERT_TO_BYTEPTR(xd->tmp_obmc_bufs[0] + MAX_SB_SQUARE * 2 * len);
964
238k
    dst_buf2[0] = CONVERT_TO_BYTEPTR(xd->tmp_obmc_bufs[1]);
965
238k
    dst_buf2[1] =
966
238k
        CONVERT_TO_BYTEPTR(xd->tmp_obmc_bufs[1] + MAX_SB_SQUARE * len);
967
238k
    dst_buf2[2] =
968
238k
        CONVERT_TO_BYTEPTR(xd->tmp_obmc_bufs[1] + MAX_SB_SQUARE * 2 * len);
969
238k
  } else {
970
175k
    dst_buf1[0] = xd->tmp_obmc_bufs[0];
971
175k
    dst_buf1[1] = xd->tmp_obmc_bufs[0] + MAX_SB_SQUARE;
972
175k
    dst_buf1[2] = xd->tmp_obmc_bufs[0] + MAX_SB_SQUARE * 2;
973
175k
    dst_buf2[0] = xd->tmp_obmc_bufs[1];
974
175k
    dst_buf2[1] = xd->tmp_obmc_bufs[1] + MAX_SB_SQUARE;
975
175k
    dst_buf2[2] = xd->tmp_obmc_bufs[1] + MAX_SB_SQUARE * 2;
976
175k
  }
977
413k
}
978
979
#if CONFIG_AV1_DECODER
980
void av1_setup_build_prediction_by_above_pred(
981
    MACROBLOCKD *xd, int rel_mi_col, uint8_t above_mi_width,
982
    MB_MODE_INFO *above_mbmi, struct build_prediction_ctxt *ctxt,
983
400k
    const int num_planes) {
984
400k
  const BLOCK_SIZE a_bsize = AOMMAX(BLOCK_8X8, above_mbmi->bsize);
985
400k
  const int above_mi_col = xd->mi_col + rel_mi_col;
986
987
400k
  modify_neighbor_predictor_for_obmc(above_mbmi);
988
989
1.59M
  for (int j = 0; j < num_planes; ++j) {
990
1.19M
    struct macroblockd_plane *const pd = &xd->plane[j];
991
1.19M
    setup_pred_plane(&pd->dst, a_bsize, ctxt->tmp_buf[j], ctxt->tmp_width[j],
992
1.19M
                     ctxt->tmp_height[j], ctxt->tmp_stride[j], 0, rel_mi_col,
993
1.19M
                     NULL, pd->subsampling_x, pd->subsampling_y);
994
1.19M
  }
995
996
400k
  const int num_refs = 1 + has_second_ref(above_mbmi);
997
998
801k
  for (int ref = 0; ref < num_refs; ++ref) {
999
400k
    const MV_REFERENCE_FRAME frame = above_mbmi->ref_frame[ref];
1000
1001
400k
    const RefCntBuffer *const ref_buf = get_ref_frame_buf(ctxt->cm, frame);
1002
400k
    const struct scale_factors *const sf =
1003
400k
        get_ref_scale_factors_const(ctxt->cm, frame);
1004
400k
    xd->block_ref_scale_factors[ref] = sf;
1005
400k
    if ((!av1_is_valid_scale(sf)))
1006
0
      aom_internal_error(xd->error_info, AOM_CODEC_UNSUP_BITSTREAM,
1007
0
                         "Reference frame has invalid dimensions");
1008
400k
    av1_setup_pre_planes(xd, ref, &ref_buf->buf, xd->mi_row, above_mi_col, sf,
1009
400k
                         num_planes);
1010
400k
  }
1011
1012
400k
  xd->mb_to_left_edge = 8 * MI_SIZE * (-above_mi_col);
1013
400k
  xd->mb_to_right_edge =
1014
400k
      ctxt->mb_to_far_edge +
1015
400k
      (xd->width - rel_mi_col - above_mi_width) * MI_SIZE * 8;
1016
400k
}
1017
1018
void av1_setup_build_prediction_by_left_pred(MACROBLOCKD *xd, int rel_mi_row,
1019
                                             uint8_t left_mi_height,
1020
                                             MB_MODE_INFO *left_mbmi,
1021
                                             struct build_prediction_ctxt *ctxt,
1022
395k
                                             const int num_planes) {
1023
395k
  const BLOCK_SIZE l_bsize = AOMMAX(BLOCK_8X8, left_mbmi->bsize);
1024
395k
  const int left_mi_row = xd->mi_row + rel_mi_row;
1025
1026
395k
  modify_neighbor_predictor_for_obmc(left_mbmi);
1027
1028
1.57M
  for (int j = 0; j < num_planes; ++j) {
1029
1.18M
    struct macroblockd_plane *const pd = &xd->plane[j];
1030
1.18M
    setup_pred_plane(&pd->dst, l_bsize, ctxt->tmp_buf[j], ctxt->tmp_width[j],
1031
1.18M
                     ctxt->tmp_height[j], ctxt->tmp_stride[j], rel_mi_row, 0,
1032
1.18M
                     NULL, pd->subsampling_x, pd->subsampling_y);
1033
1.18M
  }
1034
1035
395k
  const int num_refs = 1 + has_second_ref(left_mbmi);
1036
1037
790k
  for (int ref = 0; ref < num_refs; ++ref) {
1038
395k
    const MV_REFERENCE_FRAME frame = left_mbmi->ref_frame[ref];
1039
1040
395k
    const RefCntBuffer *const ref_buf = get_ref_frame_buf(ctxt->cm, frame);
1041
395k
    const struct scale_factors *const ref_scale_factors =
1042
395k
        get_ref_scale_factors_const(ctxt->cm, frame);
1043
1044
395k
    xd->block_ref_scale_factors[ref] = ref_scale_factors;
1045
395k
    if ((!av1_is_valid_scale(ref_scale_factors)))
1046
0
      aom_internal_error(xd->error_info, AOM_CODEC_UNSUP_BITSTREAM,
1047
0
                         "Reference frame has invalid dimensions");
1048
395k
    av1_setup_pre_planes(xd, ref, &ref_buf->buf, left_mi_row, xd->mi_col,
1049
395k
                         ref_scale_factors, num_planes);
1050
395k
  }
1051
1052
395k
  xd->mb_to_top_edge = GET_MV_SUBPEL(MI_SIZE * (-left_mi_row));
1053
395k
  xd->mb_to_bottom_edge =
1054
395k
      ctxt->mb_to_far_edge +
1055
395k
      GET_MV_SUBPEL((xd->height - rel_mi_row - left_mi_height) * MI_SIZE);
1056
395k
}
1057
#endif  // CONFIG_AV1_DECODER
1058
1059
static inline void combine_interintra(
1060
    INTERINTRA_MODE mode, int8_t use_wedge_interintra, int8_t wedge_index,
1061
    int8_t wedge_sign, BLOCK_SIZE bsize, BLOCK_SIZE plane_bsize,
1062
    uint8_t *comppred, int compstride, const uint8_t *interpred,
1063
292k
    int interstride, const uint8_t *intrapred, int intrastride) {
1064
292k
  const int bw = block_size_wide[plane_bsize];
1065
292k
  const int bh = block_size_high[plane_bsize];
1066
1067
292k
  if (use_wedge_interintra) {
1068
81.3k
    if (av1_is_wedge_used(bsize)) {
1069
81.3k
      const uint8_t *mask =
1070
81.3k
          av1_get_contiguous_soft_mask(wedge_index, wedge_sign, bsize);
1071
81.3k
      const int subw = 2 * mi_size_wide[bsize] == bw;
1072
81.3k
      const int subh = 2 * mi_size_high[bsize] == bh;
1073
81.3k
      aom_blend_a64_mask(comppred, compstride, intrapred, intrastride,
1074
81.3k
                         interpred, interstride, mask, block_size_wide[bsize],
1075
81.3k
                         bw, bh, subw, subh);
1076
81.3k
    }
1077
81.3k
    return;
1078
81.3k
  }
1079
1080
210k
  const uint8_t *mask = smooth_interintra_mask_buf[mode][plane_bsize];
1081
210k
  aom_blend_a64_mask(comppred, compstride, intrapred, intrastride, interpred,
1082
210k
                     interstride, mask, bw, bw, bh, 0, 0);
1083
210k
}
1084
1085
#if CONFIG_AV1_HIGHBITDEPTH
1086
static inline void combine_interintra_highbd(
1087
    INTERINTRA_MODE mode, int8_t use_wedge_interintra, int8_t wedge_index,
1088
    int8_t wedge_sign, BLOCK_SIZE bsize, BLOCK_SIZE plane_bsize,
1089
    uint8_t *comppred8, int compstride, const uint8_t *interpred8,
1090
367k
    int interstride, const uint8_t *intrapred8, int intrastride, int bd) {
1091
367k
  const int bw = block_size_wide[plane_bsize];
1092
367k
  const int bh = block_size_high[plane_bsize];
1093
1094
367k
  if (use_wedge_interintra) {
1095
121k
    if (av1_is_wedge_used(bsize)) {
1096
121k
      const uint8_t *mask =
1097
121k
          av1_get_contiguous_soft_mask(wedge_index, wedge_sign, bsize);
1098
121k
      const int subh = 2 * mi_size_high[bsize] == bh;
1099
121k
      const int subw = 2 * mi_size_wide[bsize] == bw;
1100
121k
      aom_highbd_blend_a64_mask(comppred8, compstride, intrapred8, intrastride,
1101
121k
                                interpred8, interstride, mask,
1102
121k
                                block_size_wide[bsize], bw, bh, subw, subh, bd);
1103
121k
    }
1104
121k
    return;
1105
121k
  }
1106
1107
245k
  uint8_t mask[MAX_SB_SQUARE];
1108
245k
  build_smooth_interintra_mask(mask, bw, plane_bsize, mode);
1109
245k
  aom_highbd_blend_a64_mask(comppred8, compstride, intrapred8, intrastride,
1110
245k
                            interpred8, interstride, mask, bw, bw, bh, 0, 0,
1111
245k
                            bd);
1112
245k
}
1113
#endif
1114
1115
void av1_build_intra_predictors_for_interintra(const AV1_COMMON *cm,
1116
                                               MACROBLOCKD *xd,
1117
                                               BLOCK_SIZE bsize, int plane,
1118
                                               const BUFFER_SET *ctx,
1119
659k
                                               uint8_t *dst, int dst_stride) {
1120
659k
  struct macroblockd_plane *const pd = &xd->plane[plane];
1121
659k
  const int ssx = xd->plane[plane].subsampling_x;
1122
659k
  const int ssy = xd->plane[plane].subsampling_y;
1123
659k
  BLOCK_SIZE plane_bsize = get_plane_block_size(bsize, ssx, ssy);
1124
659k
  PREDICTION_MODE mode = interintra_to_intra_mode[xd->mi[0]->interintra_mode];
1125
659k
  assert(xd->mi[0]->angle_delta[PLANE_TYPE_Y] == 0);
1126
659k
  assert(xd->mi[0]->angle_delta[PLANE_TYPE_UV] == 0);
1127
659k
  assert(xd->mi[0]->filter_intra_mode_info.use_filter_intra == 0);
1128
659k
  assert(xd->mi[0]->use_intrabc == 0);
1129
659k
  const SequenceHeader *seq_params = cm->seq_params;
1130
1131
659k
  av1_predict_intra_block(xd, seq_params->sb_size,
1132
659k
                          seq_params->enable_intra_edge_filter, pd->width,
1133
659k
                          pd->height, max_txsize_rect_lookup[plane_bsize], mode,
1134
659k
                          0, 0, FILTER_INTRA_MODES, ctx->plane[plane],
1135
659k
                          ctx->stride[plane], dst, dst_stride, 0, 0, plane);
1136
659k
}
1137
1138
void av1_combine_interintra(MACROBLOCKD *xd, BLOCK_SIZE bsize, int plane,
1139
                            const uint8_t *inter_pred, int inter_stride,
1140
659k
                            const uint8_t *intra_pred, int intra_stride) {
1141
659k
  const int ssx = xd->plane[plane].subsampling_x;
1142
659k
  const int ssy = xd->plane[plane].subsampling_y;
1143
659k
  const BLOCK_SIZE plane_bsize = get_plane_block_size(bsize, ssx, ssy);
1144
659k
#if CONFIG_AV1_HIGHBITDEPTH
1145
659k
  if (is_cur_buf_hbd(xd)) {
1146
367k
    combine_interintra_highbd(
1147
367k
        xd->mi[0]->interintra_mode, xd->mi[0]->use_wedge_interintra,
1148
367k
        xd->mi[0]->interintra_wedge_index, INTERINTRA_WEDGE_SIGN, bsize,
1149
367k
        plane_bsize, xd->plane[plane].dst.buf, xd->plane[plane].dst.stride,
1150
367k
        inter_pred, inter_stride, intra_pred, intra_stride, xd->bd);
1151
367k
    return;
1152
367k
  }
1153
292k
#endif
1154
292k
  combine_interintra(
1155
292k
      xd->mi[0]->interintra_mode, xd->mi[0]->use_wedge_interintra,
1156
292k
      xd->mi[0]->interintra_wedge_index, INTERINTRA_WEDGE_SIGN, bsize,
1157
292k
      plane_bsize, xd->plane[plane].dst.buf, xd->plane[plane].dst.stride,
1158
292k
      inter_pred, inter_stride, intra_pred, intra_stride);
1159
292k
}
1160
1161
// build interintra_predictors for one plane
1162
void av1_build_interintra_predictor(const AV1_COMMON *cm, MACROBLOCKD *xd,
1163
                                    uint8_t *pred, int stride,
1164
                                    const BUFFER_SET *ctx, int plane,
1165
659k
                                    BLOCK_SIZE bsize) {
1166
659k
  assert(bsize < BLOCK_SIZES_ALL);
1167
659k
  if (is_cur_buf_hbd(xd)) {
1168
367k
    DECLARE_ALIGNED(16, uint16_t, intrapredictor[MAX_SB_SQUARE]);
1169
367k
    av1_build_intra_predictors_for_interintra(
1170
367k
        cm, xd, bsize, plane, ctx, CONVERT_TO_BYTEPTR(intrapredictor),
1171
367k
        MAX_SB_SIZE);
1172
367k
    av1_combine_interintra(xd, bsize, plane, pred, stride,
1173
367k
                           CONVERT_TO_BYTEPTR(intrapredictor), MAX_SB_SIZE);
1174
367k
  } else {
1175
292k
    DECLARE_ALIGNED(16, uint8_t, intrapredictor[MAX_SB_SQUARE]);
1176
292k
    av1_build_intra_predictors_for_interintra(cm, xd, bsize, plane, ctx,
1177
292k
                                              intrapredictor, MAX_SB_SIZE);
1178
292k
    av1_combine_interintra(xd, bsize, plane, pred, stride, intrapredictor,
1179
292k
                           MAX_SB_SIZE);
1180
292k
  }
1181
659k
}