Coverage Report

Created: 2023-06-07 06:31

/src/aom/av1/common/reconinter.c
Line
Count
Source (jump to first uncovered line)
1
/*
2
 * Copyright (c) 2016, Alliance for Open Media. All rights reserved
3
 *
4
 * This source code is subject to the terms of the BSD 2 Clause License and
5
 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6
 * was not distributed with this source code in the LICENSE file, you can
7
 * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8
 * Media Patent License 1.0 was not distributed with this source code in the
9
 * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
10
 */
11
12
#include <assert.h>
13
#include <stdio.h>
14
#include <limits.h>
15
16
#include "config/aom_config.h"
17
#include "config/aom_dsp_rtcd.h"
18
#include "config/aom_scale_rtcd.h"
19
20
#include "aom/aom_integer.h"
21
#include "aom_dsp/blend.h"
22
#include "aom_ports/aom_once.h"
23
24
#include "av1/common/av1_common_int.h"
25
#include "av1/common/blockd.h"
26
#include "av1/common/mvref_common.h"
27
#include "av1/common/obmc.h"
28
#include "av1/common/reconinter.h"
29
#include "av1/common/reconintra.h"
30
31
// This function will determine whether or not to create a warped
32
// prediction.
33
static int allow_warp(const MB_MODE_INFO *const mbmi,
34
                      const WarpTypesAllowed *const warp_types,
35
                      const WarpedMotionParams *const gm_params,
36
                      int build_for_obmc, const struct scale_factors *const sf,
37
8.01M
                      WarpedMotionParams *final_warp_params) {
38
  // Note: As per the spec, we must test the fixed point scales here, which are
39
  // at a higher precision (1 << 14) than the xs and ys in subpel_params (that
40
  // have 1 << 10 precision).
41
8.01M
  if (av1_is_scaled(sf)) return 0;
42
43
6.29M
  if (final_warp_params != NULL) *final_warp_params = default_warp_params;
44
45
6.29M
  if (build_for_obmc) return 0;
46
47
6.29M
  if (warp_types->local_warp_allowed && !mbmi->wm_params.invalid) {
48
606k
    if (final_warp_params != NULL)
49
606k
      memcpy(final_warp_params, &mbmi->wm_params, sizeof(*final_warp_params));
50
606k
    return 1;
51
5.69M
  } else if (warp_types->global_warp_allowed && !gm_params->invalid) {
52
157k
    if (final_warp_params != NULL)
53
157k
      memcpy(final_warp_params, gm_params, sizeof(*final_warp_params));
54
157k
    return 1;
55
157k
  }
56
57
5.53M
  return 0;
58
6.29M
}
59
60
void av1_init_warp_params(InterPredParams *inter_pred_params,
61
                          const WarpTypesAllowed *warp_types, int ref,
62
15.9M
                          const MACROBLOCKD *xd, const MB_MODE_INFO *mi) {
63
15.9M
  if (inter_pred_params->block_height < 8 || inter_pred_params->block_width < 8)
64
7.91M
    return;
65
66
8.07M
  if (xd->cur_frame_force_integer_mv) return;
67
68
7.99M
  if (allow_warp(mi, warp_types, &xd->global_motion[mi->ref_frame[ref]], 0,
69
7.99M
                 inter_pred_params->scale_factors,
70
7.99M
                 &inter_pred_params->warp_params)) {
71
764k
    inter_pred_params->mode = WARP_PRED;
72
764k
  }
73
7.99M
}
74
75
void av1_make_inter_predictor(const uint8_t *src, int src_stride, uint8_t *dst,
76
                              int dst_stride,
77
                              InterPredParams *inter_pred_params,
78
24.7M
                              const SubpelParams *subpel_params) {
79
24.7M
  assert(IMPLIES(inter_pred_params->conv_params.is_compound,
80
24.7M
                 inter_pred_params->conv_params.dst != NULL));
81
82
24.7M
  if (inter_pred_params->mode == TRANSLATION_PRED) {
83
23.9M
#if CONFIG_AV1_HIGHBITDEPTH
84
23.9M
    if (inter_pred_params->use_hbd_buf) {
85
5.71M
      highbd_inter_predictor(src, src_stride, dst, dst_stride, subpel_params,
86
5.71M
                             inter_pred_params->block_width,
87
5.71M
                             inter_pred_params->block_height,
88
5.71M
                             &inter_pred_params->conv_params,
89
5.71M
                             inter_pred_params->interp_filter_params,
90
5.71M
                             inter_pred_params->bit_depth);
91
18.2M
    } else {
92
18.2M
      inter_predictor(src, src_stride, dst, dst_stride, subpel_params,
93
18.2M
                      inter_pred_params->block_width,
94
18.2M
                      inter_pred_params->block_height,
95
18.2M
                      &inter_pred_params->conv_params,
96
18.2M
                      inter_pred_params->interp_filter_params);
97
18.2M
    }
98
#else
99
    inter_predictor(src, src_stride, dst, dst_stride, subpel_params,
100
                    inter_pred_params->block_width,
101
                    inter_pred_params->block_height,
102
                    &inter_pred_params->conv_params,
103
                    inter_pred_params->interp_filter_params);
104
#endif
105
23.9M
  }
106
  // TODO(jingning): av1_warp_plane() can be further cleaned up.
107
763k
  else if (inter_pred_params->mode == WARP_PRED) {
108
763k
    av1_warp_plane(
109
763k
        &inter_pred_params->warp_params, inter_pred_params->use_hbd_buf,
110
763k
        inter_pred_params->bit_depth, inter_pred_params->ref_frame_buf.buf0,
111
763k
        inter_pred_params->ref_frame_buf.width,
112
763k
        inter_pred_params->ref_frame_buf.height,
113
763k
        inter_pred_params->ref_frame_buf.stride, dst,
114
763k
        inter_pred_params->pix_col, inter_pred_params->pix_row,
115
763k
        inter_pred_params->block_width, inter_pred_params->block_height,
116
763k
        dst_stride, inter_pred_params->subsampling_x,
117
763k
        inter_pred_params->subsampling_y, &inter_pred_params->conv_params);
118
18.4E
  } else {
119
18.4E
    assert(0 && "Unsupported inter_pred_params->mode");
120
18.4E
  }
121
24.7M
}
122
123
static const uint8_t wedge_master_oblique_odd[MASK_MASTER_SIZE] = {
124
  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
125
  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  1,  2,  6,  18,
126
  37, 53, 60, 63, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
127
  64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
128
};
129
static const uint8_t wedge_master_oblique_even[MASK_MASTER_SIZE] = {
130
  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
131
  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  1,  4,  11, 27,
132
  46, 58, 62, 63, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
133
  64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
134
};
135
static const uint8_t wedge_master_vertical[MASK_MASTER_SIZE] = {
136
  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
137
  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  2,  7,  21,
138
  43, 57, 62, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
139
  64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
140
};
141
142
static AOM_INLINE void shift_copy(const uint8_t *src, uint8_t *dst, int shift,
143
64
                                  int width) {
144
64
  if (shift >= 0) {
145
33
    memcpy(dst + shift, src, width - shift);
146
33
    memset(dst, src[0], shift);
147
33
  } else {
148
31
    shift = -shift;
149
31
    memcpy(dst, src + shift, width - shift);
150
31
    memset(dst + width - shift, src[width - 1], shift);
151
31
  }
152
64
}
153
154
/* clang-format off */
155
DECLARE_ALIGNED(16, static uint8_t,
156
                wedge_signflip_lookup[BLOCK_SIZES_ALL][MAX_WEDGE_TYPES]) = {
157
  { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, },  // not used
158
  { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, },  // not used
159
  { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, },  // not used
160
  { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, },
161
  { 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, },
162
  { 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, },
163
  { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, },
164
  { 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, },
165
  { 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, },
166
  { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, },
167
  { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, },  // not used
168
  { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, },  // not used
169
  { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, },  // not used
170
  { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, },  // not used
171
  { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, },  // not used
172
  { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, },  // not used
173
  { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, },  // not used
174
  { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, },  // not used
175
  { 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 0, 1, 1, 1, 0, 1, },
176
  { 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 0, 1, 0, 1, },
177
  { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, },  // not used
178
  { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, },  // not used
179
};
180
/* clang-format on */
181
182
// [negative][direction]
183
DECLARE_ALIGNED(
184
    16, static uint8_t,
185
    wedge_mask_obl[2][WEDGE_DIRECTIONS][MASK_MASTER_SIZE * MASK_MASTER_SIZE]);
186
187
// 4 * MAX_WEDGE_SQUARE is an easy to compute and fairly tight upper bound
188
// on the sum of all mask sizes up to an including MAX_WEDGE_SQUARE.
189
DECLARE_ALIGNED(16, static uint8_t,
190
                wedge_mask_buf[2 * MAX_WEDGE_TYPES * 4 * MAX_WEDGE_SQUARE]);
191
192
DECLARE_ALIGNED(16, static uint8_t,
193
                smooth_interintra_mask_buf[INTERINTRA_MODES][BLOCK_SIZES_ALL]
194
                                          [MAX_WEDGE_SQUARE]);
195
196
static wedge_masks_type wedge_masks[BLOCK_SIZES_ALL][2];
197
198
static const wedge_code_type wedge_codebook_16_hgtw[16] = {
199
  { WEDGE_OBLIQUE27, 4, 4 },  { WEDGE_OBLIQUE63, 4, 4 },
200
  { WEDGE_OBLIQUE117, 4, 4 }, { WEDGE_OBLIQUE153, 4, 4 },
201
  { WEDGE_HORIZONTAL, 4, 2 }, { WEDGE_HORIZONTAL, 4, 4 },
202
  { WEDGE_HORIZONTAL, 4, 6 }, { WEDGE_VERTICAL, 4, 4 },
203
  { WEDGE_OBLIQUE27, 4, 2 },  { WEDGE_OBLIQUE27, 4, 6 },
204
  { WEDGE_OBLIQUE153, 4, 2 }, { WEDGE_OBLIQUE153, 4, 6 },
205
  { WEDGE_OBLIQUE63, 2, 4 },  { WEDGE_OBLIQUE63, 6, 4 },
206
  { WEDGE_OBLIQUE117, 2, 4 }, { WEDGE_OBLIQUE117, 6, 4 },
207
};
208
209
static const wedge_code_type wedge_codebook_16_hltw[16] = {
210
  { WEDGE_OBLIQUE27, 4, 4 },  { WEDGE_OBLIQUE63, 4, 4 },
211
  { WEDGE_OBLIQUE117, 4, 4 }, { WEDGE_OBLIQUE153, 4, 4 },
212
  { WEDGE_VERTICAL, 2, 4 },   { WEDGE_VERTICAL, 4, 4 },
213
  { WEDGE_VERTICAL, 6, 4 },   { WEDGE_HORIZONTAL, 4, 4 },
214
  { WEDGE_OBLIQUE27, 4, 2 },  { WEDGE_OBLIQUE27, 4, 6 },
215
  { WEDGE_OBLIQUE153, 4, 2 }, { WEDGE_OBLIQUE153, 4, 6 },
216
  { WEDGE_OBLIQUE63, 2, 4 },  { WEDGE_OBLIQUE63, 6, 4 },
217
  { WEDGE_OBLIQUE117, 2, 4 }, { WEDGE_OBLIQUE117, 6, 4 },
218
};
219
220
static const wedge_code_type wedge_codebook_16_heqw[16] = {
221
  { WEDGE_OBLIQUE27, 4, 4 },  { WEDGE_OBLIQUE63, 4, 4 },
222
  { WEDGE_OBLIQUE117, 4, 4 }, { WEDGE_OBLIQUE153, 4, 4 },
223
  { WEDGE_HORIZONTAL, 4, 2 }, { WEDGE_HORIZONTAL, 4, 6 },
224
  { WEDGE_VERTICAL, 2, 4 },   { WEDGE_VERTICAL, 6, 4 },
225
  { WEDGE_OBLIQUE27, 4, 2 },  { WEDGE_OBLIQUE27, 4, 6 },
226
  { WEDGE_OBLIQUE153, 4, 2 }, { WEDGE_OBLIQUE153, 4, 6 },
227
  { WEDGE_OBLIQUE63, 2, 4 },  { WEDGE_OBLIQUE63, 6, 4 },
228
  { WEDGE_OBLIQUE117, 2, 4 }, { WEDGE_OBLIQUE117, 6, 4 },
229
};
230
231
const wedge_params_type av1_wedge_params_lookup[BLOCK_SIZES_ALL] = {
232
  { 0, NULL, NULL, NULL },
233
  { 0, NULL, NULL, NULL },
234
  { 0, NULL, NULL, NULL },
235
  { MAX_WEDGE_TYPES, wedge_codebook_16_heqw, wedge_signflip_lookup[BLOCK_8X8],
236
    wedge_masks[BLOCK_8X8] },
237
  { MAX_WEDGE_TYPES, wedge_codebook_16_hgtw, wedge_signflip_lookup[BLOCK_8X16],
238
    wedge_masks[BLOCK_8X16] },
239
  { MAX_WEDGE_TYPES, wedge_codebook_16_hltw, wedge_signflip_lookup[BLOCK_16X8],
240
    wedge_masks[BLOCK_16X8] },
241
  { MAX_WEDGE_TYPES, wedge_codebook_16_heqw, wedge_signflip_lookup[BLOCK_16X16],
242
    wedge_masks[BLOCK_16X16] },
243
  { MAX_WEDGE_TYPES, wedge_codebook_16_hgtw, wedge_signflip_lookup[BLOCK_16X32],
244
    wedge_masks[BLOCK_16X32] },
245
  { MAX_WEDGE_TYPES, wedge_codebook_16_hltw, wedge_signflip_lookup[BLOCK_32X16],
246
    wedge_masks[BLOCK_32X16] },
247
  { MAX_WEDGE_TYPES, wedge_codebook_16_heqw, wedge_signflip_lookup[BLOCK_32X32],
248
    wedge_masks[BLOCK_32X32] },
249
  { 0, NULL, NULL, NULL },
250
  { 0, NULL, NULL, NULL },
251
  { 0, NULL, NULL, NULL },
252
  { 0, NULL, NULL, NULL },
253
  { 0, NULL, NULL, NULL },
254
  { 0, NULL, NULL, NULL },
255
  { 0, NULL, NULL, NULL },
256
  { 0, NULL, NULL, NULL },
257
  { MAX_WEDGE_TYPES, wedge_codebook_16_hgtw, wedge_signflip_lookup[BLOCK_8X32],
258
    wedge_masks[BLOCK_8X32] },
259
  { MAX_WEDGE_TYPES, wedge_codebook_16_hltw, wedge_signflip_lookup[BLOCK_32X8],
260
    wedge_masks[BLOCK_32X8] },
261
  { 0, NULL, NULL, NULL },
262
  { 0, NULL, NULL, NULL },
263
};
264
265
static const uint8_t *get_wedge_mask_inplace(int wedge_index, int neg,
266
288
                                             BLOCK_SIZE sb_type) {
267
288
  const uint8_t *master;
268
288
  const int bh = block_size_high[sb_type];
269
288
  const int bw = block_size_wide[sb_type];
270
288
  const wedge_code_type *a =
271
288
      av1_wedge_params_lookup[sb_type].codebook + wedge_index;
272
288
  int woff, hoff;
273
288
  const uint8_t wsignflip =
274
288
      av1_wedge_params_lookup[sb_type].signflip[wedge_index];
275
276
288
  assert(wedge_index >= 0 && wedge_index < get_wedge_types_lookup(sb_type));
277
0
  woff = (a->x_offset * bw) >> 3;
278
288
  hoff = (a->y_offset * bh) >> 3;
279
288
  master = wedge_mask_obl[neg ^ wsignflip][a->direction] +
280
288
           MASK_MASTER_STRIDE * (MASK_MASTER_SIZE / 2 - hoff) +
281
288
           MASK_MASTER_SIZE / 2 - woff;
282
288
  return master;
283
288
}
284
285
const uint8_t *av1_get_compound_type_mask(
286
564k
    const INTERINTER_COMPOUND_DATA *const comp_data, BLOCK_SIZE sb_type) {
287
564k
  (void)sb_type;
288
564k
  switch (comp_data->type) {
289
219k
    case COMPOUND_WEDGE:
290
219k
      return av1_get_contiguous_soft_mask(comp_data->wedge_index,
291
219k
                                          comp_data->wedge_sign, sb_type);
292
345k
    default: return comp_data->seg_mask;
293
564k
  }
294
564k
}
295
296
static AOM_INLINE void diffwtd_mask_d16(
297
    uint8_t *mask, int which_inverse, int mask_base, const CONV_BUF_TYPE *src0,
298
    int src0_stride, const CONV_BUF_TYPE *src1, int src1_stride, int h, int w,
299
0
    ConvolveParams *conv_params, int bd) {
300
0
  int round =
301
0
      2 * FILTER_BITS - conv_params->round_0 - conv_params->round_1 + (bd - 8);
302
0
  int i, j, m, diff;
303
0
  for (i = 0; i < h; ++i) {
304
0
    for (j = 0; j < w; ++j) {
305
0
      diff = abs(src0[i * src0_stride + j] - src1[i * src1_stride + j]);
306
0
      diff = ROUND_POWER_OF_TWO(diff, round);
307
0
      m = clamp(mask_base + (diff / DIFF_FACTOR), 0, AOM_BLEND_A64_MAX_ALPHA);
308
0
      mask[i * w + j] = which_inverse ? AOM_BLEND_A64_MAX_ALPHA - m : m;
309
0
    }
310
0
  }
311
0
}
312
313
void av1_build_compound_diffwtd_mask_d16_c(
314
    uint8_t *mask, DIFFWTD_MASK_TYPE mask_type, const CONV_BUF_TYPE *src0,
315
    int src0_stride, const CONV_BUF_TYPE *src1, int src1_stride, int h, int w,
316
0
    ConvolveParams *conv_params, int bd) {
317
0
  switch (mask_type) {
318
0
    case DIFFWTD_38:
319
0
      diffwtd_mask_d16(mask, 0, 38, src0, src0_stride, src1, src1_stride, h, w,
320
0
                       conv_params, bd);
321
0
      break;
322
0
    case DIFFWTD_38_INV:
323
0
      diffwtd_mask_d16(mask, 1, 38, src0, src0_stride, src1, src1_stride, h, w,
324
0
                       conv_params, bd);
325
0
      break;
326
0
    default: assert(0);
327
0
  }
328
0
}
329
330
static AOM_INLINE void diffwtd_mask(uint8_t *mask, int which_inverse,
331
                                    int mask_base, const uint8_t *src0,
332
                                    int src0_stride, const uint8_t *src1,
333
0
                                    int src1_stride, int h, int w) {
334
0
  int i, j, m, diff;
335
0
  for (i = 0; i < h; ++i) {
336
0
    for (j = 0; j < w; ++j) {
337
0
      diff =
338
0
          abs((int)src0[i * src0_stride + j] - (int)src1[i * src1_stride + j]);
339
0
      m = clamp(mask_base + (diff / DIFF_FACTOR), 0, AOM_BLEND_A64_MAX_ALPHA);
340
0
      mask[i * w + j] = which_inverse ? AOM_BLEND_A64_MAX_ALPHA - m : m;
341
0
    }
342
0
  }
343
0
}
344
345
void av1_build_compound_diffwtd_mask_c(uint8_t *mask,
346
                                       DIFFWTD_MASK_TYPE mask_type,
347
                                       const uint8_t *src0, int src0_stride,
348
                                       const uint8_t *src1, int src1_stride,
349
0
                                       int h, int w) {
350
0
  switch (mask_type) {
351
0
    case DIFFWTD_38:
352
0
      diffwtd_mask(mask, 0, 38, src0, src0_stride, src1, src1_stride, h, w);
353
0
      break;
354
0
    case DIFFWTD_38_INV:
355
0
      diffwtd_mask(mask, 1, 38, src0, src0_stride, src1, src1_stride, h, w);
356
0
      break;
357
0
    default: assert(0);
358
0
  }
359
0
}
360
361
static AOM_FORCE_INLINE void diffwtd_mask_highbd(
362
    uint8_t *mask, int which_inverse, int mask_base, const uint16_t *src0,
363
    int src0_stride, const uint16_t *src1, int src1_stride, int h, int w,
364
0
    const unsigned int bd) {
365
0
  assert(bd >= 8);
366
0
  if (bd == 8) {
367
0
    if (which_inverse) {
368
0
      for (int i = 0; i < h; ++i) {
369
0
        for (int j = 0; j < w; ++j) {
370
0
          int diff = abs((int)src0[j] - (int)src1[j]) / DIFF_FACTOR;
371
0
          unsigned int m = negative_to_zero(mask_base + diff);
372
0
          m = AOMMIN(m, AOM_BLEND_A64_MAX_ALPHA);
373
0
          mask[j] = AOM_BLEND_A64_MAX_ALPHA - m;
374
0
        }
375
0
        src0 += src0_stride;
376
0
        src1 += src1_stride;
377
0
        mask += w;
378
0
      }
379
0
    } else {
380
0
      for (int i = 0; i < h; ++i) {
381
0
        for (int j = 0; j < w; ++j) {
382
0
          int diff = abs((int)src0[j] - (int)src1[j]) / DIFF_FACTOR;
383
0
          unsigned int m = negative_to_zero(mask_base + diff);
384
0
          m = AOMMIN(m, AOM_BLEND_A64_MAX_ALPHA);
385
0
          mask[j] = m;
386
0
        }
387
0
        src0 += src0_stride;
388
0
        src1 += src1_stride;
389
0
        mask += w;
390
0
      }
391
0
    }
392
0
  } else {
393
0
    const unsigned int bd_shift = bd - 8;
394
0
    if (which_inverse) {
395
0
      for (int i = 0; i < h; ++i) {
396
0
        for (int j = 0; j < w; ++j) {
397
0
          int diff =
398
0
              (abs((int)src0[j] - (int)src1[j]) >> bd_shift) / DIFF_FACTOR;
399
0
          unsigned int m = negative_to_zero(mask_base + diff);
400
0
          m = AOMMIN(m, AOM_BLEND_A64_MAX_ALPHA);
401
0
          mask[j] = AOM_BLEND_A64_MAX_ALPHA - m;
402
0
        }
403
0
        src0 += src0_stride;
404
0
        src1 += src1_stride;
405
0
        mask += w;
406
0
      }
407
0
    } else {
408
0
      for (int i = 0; i < h; ++i) {
409
0
        for (int j = 0; j < w; ++j) {
410
0
          int diff =
411
0
              (abs((int)src0[j] - (int)src1[j]) >> bd_shift) / DIFF_FACTOR;
412
0
          unsigned int m = negative_to_zero(mask_base + diff);
413
0
          m = AOMMIN(m, AOM_BLEND_A64_MAX_ALPHA);
414
0
          mask[j] = m;
415
0
        }
416
0
        src0 += src0_stride;
417
0
        src1 += src1_stride;
418
0
        mask += w;
419
0
      }
420
0
    }
421
0
  }
422
0
}
423
424
void av1_build_compound_diffwtd_mask_highbd_c(
425
    uint8_t *mask, DIFFWTD_MASK_TYPE mask_type, const uint8_t *src0,
426
    int src0_stride, const uint8_t *src1, int src1_stride, int h, int w,
427
0
    int bd) {
428
0
  switch (mask_type) {
429
0
    case DIFFWTD_38:
430
0
      diffwtd_mask_highbd(mask, 0, 38, CONVERT_TO_SHORTPTR(src0), src0_stride,
431
0
                          CONVERT_TO_SHORTPTR(src1), src1_stride, h, w, bd);
432
0
      break;
433
0
    case DIFFWTD_38_INV:
434
0
      diffwtd_mask_highbd(mask, 1, 38, CONVERT_TO_SHORTPTR(src0), src0_stride,
435
0
                          CONVERT_TO_SHORTPTR(src1), src1_stride, h, w, bd);
436
0
      break;
437
0
    default: assert(0);
438
0
  }
439
0
}
440
441
1
static AOM_INLINE void init_wedge_master_masks(void) {
442
1
  int i, j;
443
1
  const int w = MASK_MASTER_SIZE;
444
1
  const int h = MASK_MASTER_SIZE;
445
1
  const int stride = MASK_MASTER_STRIDE;
446
  // Note: index [0] stores the masters, and [1] its complement.
447
  // Generate prototype by shifting the masters
448
1
  int shift = h / 4;
449
33
  for (i = 0; i < h; i += 2) {
450
32
    shift_copy(wedge_master_oblique_even,
451
32
               &wedge_mask_obl[0][WEDGE_OBLIQUE63][i * stride], shift,
452
32
               MASK_MASTER_SIZE);
453
32
    shift--;
454
32
    shift_copy(wedge_master_oblique_odd,
455
32
               &wedge_mask_obl[0][WEDGE_OBLIQUE63][(i + 1) * stride], shift,
456
32
               MASK_MASTER_SIZE);
457
32
    memcpy(&wedge_mask_obl[0][WEDGE_VERTICAL][i * stride],
458
32
           wedge_master_vertical,
459
32
           MASK_MASTER_SIZE * sizeof(wedge_master_vertical[0]));
460
32
    memcpy(&wedge_mask_obl[0][WEDGE_VERTICAL][(i + 1) * stride],
461
32
           wedge_master_vertical,
462
32
           MASK_MASTER_SIZE * sizeof(wedge_master_vertical[0]));
463
32
  }
464
465
65
  for (i = 0; i < h; ++i) {
466
4.16k
    for (j = 0; j < w; ++j) {
467
4.09k
      const int msk = wedge_mask_obl[0][WEDGE_OBLIQUE63][i * stride + j];
468
4.09k
      wedge_mask_obl[0][WEDGE_OBLIQUE27][j * stride + i] = msk;
469
4.09k
      wedge_mask_obl[0][WEDGE_OBLIQUE117][i * stride + w - 1 - j] =
470
4.09k
          wedge_mask_obl[0][WEDGE_OBLIQUE153][(w - 1 - j) * stride + i] =
471
4.09k
              (1 << WEDGE_WEIGHT_BITS) - msk;
472
4.09k
      wedge_mask_obl[1][WEDGE_OBLIQUE63][i * stride + j] =
473
4.09k
          wedge_mask_obl[1][WEDGE_OBLIQUE27][j * stride + i] =
474
4.09k
              (1 << WEDGE_WEIGHT_BITS) - msk;
475
4.09k
      wedge_mask_obl[1][WEDGE_OBLIQUE117][i * stride + w - 1 - j] =
476
4.09k
          wedge_mask_obl[1][WEDGE_OBLIQUE153][(w - 1 - j) * stride + i] = msk;
477
4.09k
      const int mskx = wedge_mask_obl[0][WEDGE_VERTICAL][i * stride + j];
478
4.09k
      wedge_mask_obl[0][WEDGE_HORIZONTAL][j * stride + i] = mskx;
479
4.09k
      wedge_mask_obl[1][WEDGE_VERTICAL][i * stride + j] =
480
4.09k
          wedge_mask_obl[1][WEDGE_HORIZONTAL][j * stride + i] =
481
4.09k
              (1 << WEDGE_WEIGHT_BITS) - mskx;
482
4.09k
    }
483
64
  }
484
1
}
485
486
1
static AOM_INLINE void init_wedge_masks(void) {
487
1
  uint8_t *dst = wedge_mask_buf;
488
1
  BLOCK_SIZE bsize;
489
1
  memset(wedge_masks, 0, sizeof(wedge_masks));
490
23
  for (bsize = BLOCK_4X4; bsize < BLOCK_SIZES_ALL; ++bsize) {
491
22
    const wedge_params_type *wedge_params = &av1_wedge_params_lookup[bsize];
492
22
    const int wtypes = wedge_params->wedge_types;
493
22
    if (wtypes == 0) continue;
494
9
    const uint8_t *mask;
495
9
    const int bw = block_size_wide[bsize];
496
9
    const int bh = block_size_high[bsize];
497
9
    int w;
498
153
    for (w = 0; w < wtypes; ++w) {
499
144
      mask = get_wedge_mask_inplace(w, 0, bsize);
500
144
      aom_convolve_copy(mask, MASK_MASTER_STRIDE, dst, bw /* dst_stride */, bw,
501
144
                        bh);
502
144
      wedge_params->masks[0][w] = dst;
503
144
      dst += bw * bh;
504
505
144
      mask = get_wedge_mask_inplace(w, 1, bsize);
506
144
      aom_convolve_copy(mask, MASK_MASTER_STRIDE, dst, bw /* dst_stride */, bw,
507
144
                        bh);
508
144
      wedge_params->masks[1][w] = dst;
509
144
      dst += bw * bh;
510
144
    }
511
9
    assert(sizeof(wedge_mask_buf) >= (size_t)(dst - wedge_mask_buf));
512
9
  }
513
1
}
514
515
/* clang-format off */
516
static const uint8_t ii_weights1d[MAX_SB_SIZE] = {
517
  60, 58, 56, 54, 52, 50, 48, 47, 45, 44, 42, 41, 39, 38, 37, 35, 34, 33, 32,
518
  31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 22, 21, 20, 19, 19, 18, 18, 17, 16,
519
  16, 15, 15, 14, 14, 13, 13, 12, 12, 12, 11, 11, 10, 10, 10,  9,  9,  9,  8,
520
  8,  8,  8,  7,  7,  7,  7,  6,  6,  6,  6,  6,  5,  5,  5,  5,  5,  4,  4,
521
  4,  4,  4,  4,  4,  4,  3,  3,  3,  3,  3,  3,  3,  3,  3,  2,  2,  2,  2,
522
  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  1,  1,  1,  1,  1,  1,  1,  1,
523
  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1
524
};
525
static uint8_t ii_size_scales[BLOCK_SIZES_ALL] = {
526
    32, 16, 16, 16, 8, 8, 8, 4,
527
    4,  4,  2,  2,  2, 1, 1, 1,
528
    8,  8,  4,  4,  2, 2
529
};
530
/* clang-format on */
531
532
static AOM_INLINE void build_smooth_interintra_mask(uint8_t *mask, int stride,
533
                                                    BLOCK_SIZE plane_bsize,
534
251k
                                                    INTERINTRA_MODE mode) {
535
251k
  int i, j;
536
251k
  const int bw = block_size_wide[plane_bsize];
537
251k
  const int bh = block_size_high[plane_bsize];
538
251k
  const int size_scale = ii_size_scales[plane_bsize];
539
540
251k
  switch (mode) {
541
46.2k
    case II_V_PRED:
542
432k
      for (i = 0; i < bh; ++i) {
543
386k
        memset(mask, ii_weights1d[i * size_scale], bw * sizeof(mask[0]));
544
386k
        mask += stride;
545
386k
      }
546
46.2k
      break;
547
548
101k
    case II_H_PRED:
549
980k
      for (i = 0; i < bh; ++i) {
550
11.9M
        for (j = 0; j < bw; ++j) mask[j] = ii_weights1d[j * size_scale];
551
879k
        mask += stride;
552
879k
      }
553
101k
      break;
554
555
53.5k
    case II_SMOOTH_PRED:
556
485k
      for (i = 0; i < bh; ++i) {
557
5.73M
        for (j = 0; j < bw; ++j)
558
5.29M
          mask[j] = ii_weights1d[(i < j ? i : j) * size_scale];
559
431k
        mask += stride;
560
431k
      }
561
53.5k
      break;
562
563
50.6k
    case II_DC_PRED:
564
50.6k
    default:
565
447k
      for (i = 0; i < bh; ++i) {
566
396k
        memset(mask, 32, bw * sizeof(mask[0]));
567
396k
        mask += stride;
568
396k
      }
569
50.6k
      break;
570
251k
  }
571
251k
}
572
573
1
static AOM_INLINE void init_smooth_interintra_masks(void) {
574
5
  for (int m = 0; m < INTERINTRA_MODES; ++m) {
575
92
    for (int bs = 0; bs < BLOCK_SIZES_ALL; ++bs) {
576
88
      const int bw = block_size_wide[bs];
577
88
      const int bh = block_size_high[bs];
578
88
      if (bw > MAX_WEDGE_SIZE || bh > MAX_WEDGE_SIZE) continue;
579
56
      build_smooth_interintra_mask(smooth_interintra_mask_buf[m][bs], bw, bs,
580
56
                                   m);
581
56
    }
582
4
  }
583
1
}
584
585
// Equation of line: f(x, y) = a[0]*(x - a[2]*w/8) + a[1]*(y - a[3]*h/8) = 0
586
1
static void init_all_wedge_masks(void) {
587
1
  init_wedge_master_masks();
588
1
  init_wedge_masks();
589
1
  init_smooth_interintra_masks();
590
1
}
591
592
14.2k
void av1_init_wedge_masks(void) { aom_once(init_all_wedge_masks); }
593
594
static AOM_INLINE void build_masked_compound_no_round(
595
    uint8_t *dst, int dst_stride, const CONV_BUF_TYPE *src0, int src0_stride,
596
    const CONV_BUF_TYPE *src1, int src1_stride,
597
    const INTERINTER_COMPOUND_DATA *const comp_data, BLOCK_SIZE sb_type, int h,
598
564k
    int w, InterPredParams *inter_pred_params) {
599
564k
  const int ssy = inter_pred_params->subsampling_y;
600
564k
  const int ssx = inter_pred_params->subsampling_x;
601
564k
  const uint8_t *mask = av1_get_compound_type_mask(comp_data, sb_type);
602
564k
  const int mask_stride = block_size_wide[sb_type];
603
564k
#if CONFIG_AV1_HIGHBITDEPTH
604
564k
  if (inter_pred_params->use_hbd_buf) {
605
110k
    aom_highbd_blend_a64_d16_mask(dst, dst_stride, src0, src0_stride, src1,
606
110k
                                  src1_stride, mask, mask_stride, w, h, ssx,
607
110k
                                  ssy, &inter_pred_params->conv_params,
608
110k
                                  inter_pred_params->bit_depth);
609
454k
  } else {
610
454k
    aom_lowbd_blend_a64_d16_mask(dst, dst_stride, src0, src0_stride, src1,
611
454k
                                 src1_stride, mask, mask_stride, w, h, ssx, ssy,
612
454k
                                 &inter_pred_params->conv_params);
613
454k
  }
614
#else
615
  aom_lowbd_blend_a64_d16_mask(dst, dst_stride, src0, src0_stride, src1,
616
                               src1_stride, mask, mask_stride, w, h, ssx, ssy,
617
                               &inter_pred_params->conv_params);
618
#endif
619
564k
}
620
621
void av1_make_masked_inter_predictor(const uint8_t *pre, int pre_stride,
622
                                     uint8_t *dst, int dst_stride,
623
                                     InterPredParams *inter_pred_params,
624
564k
                                     const SubpelParams *subpel_params) {
625
564k
  const INTERINTER_COMPOUND_DATA *comp_data = &inter_pred_params->mask_comp;
626
564k
  BLOCK_SIZE sb_type = inter_pred_params->sb_type;
627
628
  // We're going to call av1_make_inter_predictor to generate a prediction into
629
  // a temporary buffer, then will blend that temporary buffer with that from
630
  // the other reference.
631
564k
  DECLARE_ALIGNED(32, uint8_t, tmp_buf[2 * MAX_SB_SQUARE]);
632
564k
  uint8_t *tmp_dst =
633
564k
      inter_pred_params->use_hbd_buf ? CONVERT_TO_BYTEPTR(tmp_buf) : tmp_buf;
634
635
564k
  const int tmp_buf_stride = MAX_SB_SIZE;
636
564k
  CONV_BUF_TYPE *org_dst = inter_pred_params->conv_params.dst;
637
564k
  int org_dst_stride = inter_pred_params->conv_params.dst_stride;
638
564k
  CONV_BUF_TYPE *tmp_buf16 = (CONV_BUF_TYPE *)tmp_buf;
639
564k
  inter_pred_params->conv_params.dst = tmp_buf16;
640
564k
  inter_pred_params->conv_params.dst_stride = tmp_buf_stride;
641
564k
  assert(inter_pred_params->conv_params.do_average == 0);
642
643
  // This will generate a prediction in tmp_buf for the second reference
644
564k
  av1_make_inter_predictor(pre, pre_stride, tmp_dst, MAX_SB_SIZE,
645
564k
                           inter_pred_params, subpel_params);
646
647
564k
  if (!inter_pred_params->conv_params.plane &&
648
564k
      comp_data->type == COMPOUND_DIFFWTD) {
649
115k
    av1_build_compound_diffwtd_mask_d16(
650
115k
        comp_data->seg_mask, comp_data->mask_type, org_dst, org_dst_stride,
651
115k
        tmp_buf16, tmp_buf_stride, inter_pred_params->block_height,
652
115k
        inter_pred_params->block_width, &inter_pred_params->conv_params,
653
115k
        inter_pred_params->bit_depth);
654
115k
  }
655
564k
  build_masked_compound_no_round(
656
564k
      dst, dst_stride, org_dst, org_dst_stride, tmp_buf16, tmp_buf_stride,
657
564k
      comp_data, sb_type, inter_pred_params->block_height,
658
564k
      inter_pred_params->block_width, inter_pred_params);
659
564k
}
660
661
void av1_dist_wtd_comp_weight_assign(const AV1_COMMON *cm,
662
                                     const MB_MODE_INFO *mbmi, int *fwd_offset,
663
                                     int *bck_offset,
664
                                     int *use_dist_wtd_comp_avg,
665
21.1M
                                     int is_compound) {
666
21.1M
  assert(fwd_offset != NULL && bck_offset != NULL);
667
21.1M
  if (!is_compound || mbmi->compound_idx) {
668
20.4M
    *fwd_offset = 8;
669
20.4M
    *bck_offset = 8;
670
20.4M
    *use_dist_wtd_comp_avg = 0;
671
20.4M
    return;
672
20.4M
  }
673
674
620k
  *use_dist_wtd_comp_avg = 1;
675
620k
  const RefCntBuffer *const bck_buf = get_ref_frame_buf(cm, mbmi->ref_frame[0]);
676
620k
  const RefCntBuffer *const fwd_buf = get_ref_frame_buf(cm, mbmi->ref_frame[1]);
677
620k
  const int cur_frame_index = cm->cur_frame->order_hint;
678
620k
  int bck_frame_index = 0, fwd_frame_index = 0;
679
680
627k
  if (bck_buf != NULL) bck_frame_index = bck_buf->order_hint;
681
627k
  if (fwd_buf != NULL) fwd_frame_index = fwd_buf->order_hint;
682
683
620k
  int d0 = clamp(abs(get_relative_dist(&cm->seq_params->order_hint_info,
684
620k
                                       fwd_frame_index, cur_frame_index)),
685
620k
                 0, MAX_FRAME_DISTANCE);
686
620k
  int d1 = clamp(abs(get_relative_dist(&cm->seq_params->order_hint_info,
687
620k
                                       cur_frame_index, bck_frame_index)),
688
620k
                 0, MAX_FRAME_DISTANCE);
689
690
620k
  const int order = d0 <= d1;
691
692
625k
  if (d0 == 0 || d1 == 0) {
693
5.49k
    *fwd_offset = quant_dist_lookup_table[3][order];
694
5.49k
    *bck_offset = quant_dist_lookup_table[3][1 - order];
695
5.49k
    return;
696
5.49k
  }
697
698
614k
  int i;
699
1.06M
  for (i = 0; i < 3; ++i) {
700
927k
    int c0 = quant_dist_weight[i][order];
701
927k
    int c1 = quant_dist_weight[i][!order];
702
927k
    int d0_c0 = d0 * c0;
703
927k
    int d1_c1 = d1 * c1;
704
927k
    if ((d0 > d1 && d0_c0 < d1_c1) || (d0 <= d1 && d0_c0 > d1_c1)) break;
705
927k
  }
706
707
614k
  *fwd_offset = quant_dist_lookup_table[i][order];
708
614k
  *bck_offset = quant_dist_lookup_table[i][1 - order];
709
614k
}
710
711
void av1_setup_dst_planes(struct macroblockd_plane *planes, BLOCK_SIZE bsize,
712
                          const YV12_BUFFER_CONFIG *src, int mi_row, int mi_col,
713
27.8M
                          const int plane_start, const int plane_end) {
714
  // We use AOMMIN(num_planes, MAX_MB_PLANE) instead of num_planes to quiet
715
  // the static analysis warnings.
716
107M
  for (int i = plane_start; i < AOMMIN(plane_end, MAX_MB_PLANE); ++i) {
717
80.0M
    struct macroblockd_plane *const pd = &planes[i];
718
80.0M
    const int is_uv = i > 0;
719
80.0M
    setup_pred_plane(&pd->dst, bsize, src->buffers[i], src->crop_widths[is_uv],
720
80.0M
                     src->crop_heights[is_uv], src->strides[is_uv], mi_row,
721
80.0M
                     mi_col, NULL, pd->subsampling_x, pd->subsampling_y);
722
80.0M
  }
723
27.8M
}
724
725
void av1_setup_pre_planes(MACROBLOCKD *xd, int idx,
726
                          const YV12_BUFFER_CONFIG *src, int mi_row, int mi_col,
727
                          const struct scale_factors *sf,
728
8.87M
                          const int num_planes) {
729
8.88M
  if (src != NULL) {
730
    // We use AOMMIN(num_planes, MAX_MB_PLANE) instead of num_planes to quiet
731
    // the static analysis warnings.
732
35.4M
    for (int i = 0; i < AOMMIN(num_planes, MAX_MB_PLANE); ++i) {
733
26.5M
      struct macroblockd_plane *const pd = &xd->plane[i];
734
26.5M
      const int is_uv = i > 0;
735
26.5M
      setup_pred_plane(&pd->pre[idx], xd->mi[0]->bsize, src->buffers[i],
736
26.5M
                       src->crop_widths[is_uv], src->crop_heights[is_uv],
737
26.5M
                       src->strides[is_uv], mi_row, mi_col, sf,
738
26.5M
                       pd->subsampling_x, pd->subsampling_y);
739
26.5M
    }
740
8.88M
  }
741
8.87M
}
742
743
// obmc_mask_N[overlap_position]
744
static const uint8_t obmc_mask_1[1] = { 64 };
745
DECLARE_ALIGNED(2, static const uint8_t, obmc_mask_2[2]) = { 45, 64 };
746
747
DECLARE_ALIGNED(4, static const uint8_t, obmc_mask_4[4]) = { 39, 50, 59, 64 };
748
749
static const uint8_t obmc_mask_8[8] = { 36, 42, 48, 53, 57, 61, 64, 64 };
750
751
static const uint8_t obmc_mask_16[16] = { 34, 37, 40, 43, 46, 49, 52, 54,
752
                                          56, 58, 60, 61, 64, 64, 64, 64 };
753
754
static const uint8_t obmc_mask_32[32] = { 33, 35, 36, 38, 40, 41, 43, 44,
755
                                          45, 47, 48, 50, 51, 52, 53, 55,
756
                                          56, 57, 58, 59, 60, 60, 61, 62,
757
                                          64, 64, 64, 64, 64, 64, 64, 64 };
758
759
static const uint8_t obmc_mask_64[64] = {
760
  33, 34, 35, 35, 36, 37, 38, 39, 40, 40, 41, 42, 43, 44, 44, 44,
761
  45, 46, 47, 47, 48, 49, 50, 51, 51, 51, 52, 52, 53, 54, 55, 56,
762
  56, 56, 57, 57, 58, 58, 59, 60, 60, 60, 60, 60, 61, 62, 62, 62,
763
  62, 62, 63, 63, 63, 63, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
764
};
765
766
5.20M
const uint8_t *av1_get_obmc_mask(int length) {
767
5.20M
  switch (length) {
768
0
    case 1: return obmc_mask_1;
769
1.14M
    case 2: return obmc_mask_2;
770
2.49M
    case 4: return obmc_mask_4;
771
1.26M
    case 8: return obmc_mask_8;
772
279k
    case 16: return obmc_mask_16;
773
23.0k
    case 32: return obmc_mask_32;
774
0
    case 64: return obmc_mask_64;
775
0
    default: assert(0); return NULL;
776
5.20M
  }
777
5.20M
}
778
779
static INLINE void increment_int_ptr(MACROBLOCKD *xd, int rel_mi_row,
780
                                     int rel_mi_col, uint8_t op_mi_size,
781
                                     int dir, MB_MODE_INFO *mi, void *fun_ctxt,
782
4.64M
                                     const int num_planes) {
783
4.64M
  (void)xd;
784
4.64M
  (void)rel_mi_row;
785
4.64M
  (void)rel_mi_col;
786
4.64M
  (void)op_mi_size;
787
4.64M
  (void)dir;
788
4.64M
  (void)mi;
789
4.64M
  ++*(uint8_t *)fun_ctxt;
790
4.64M
  (void)num_planes;
791
4.64M
}
792
793
6.47M
void av1_count_overlappable_neighbors(const AV1_COMMON *cm, MACROBLOCKD *xd) {
794
6.47M
  MB_MODE_INFO *mbmi = xd->mi[0];
795
796
6.47M
  mbmi->overlappable_neighbors = 0;
797
798
6.47M
  if (!is_motion_variation_allowed_bsize(mbmi->bsize)) return;
799
800
4.15M
  foreach_overlappable_nb_above(cm, xd, INT_MAX, increment_int_ptr,
801
4.15M
                                &mbmi->overlappable_neighbors);
802
4.15M
  if (mbmi->overlappable_neighbors) return;
803
560k
  foreach_overlappable_nb_left(cm, xd, INT_MAX, increment_int_ptr,
804
560k
                               &mbmi->overlappable_neighbors);
805
560k
}
806
807
// HW does not support < 4x4 prediction. To limit the bandwidth requirement, if
808
// block-size of current plane is smaller than 8x8, always only blend with the
809
// left neighbor(s) (skip blending with the above side).
810
#define DISABLE_CHROMA_U8X8_OBMC 0  // 0: one-sided obmc; 1: disable
811
812
int av1_skip_u4x4_pred_in_obmc(BLOCK_SIZE bsize,
813
13.3M
                               const struct macroblockd_plane *pd, int dir) {
814
13.3M
  assert(is_motion_variation_allowed_bsize(bsize));
815
816
0
  const BLOCK_SIZE bsize_plane =
817
13.3M
      get_plane_block_size(bsize, pd->subsampling_x, pd->subsampling_y);
818
13.3M
  switch (bsize_plane) {
819
#if DISABLE_CHROMA_U8X8_OBMC
820
    case BLOCK_4X4:
821
    case BLOCK_8X4:
822
    case BLOCK_4X8: return 1;
823
#else
824
2.55M
    case BLOCK_4X4:
825
4.50M
    case BLOCK_8X4:
826
5.90M
    case BLOCK_4X8: return dir == 0;
827
0
#endif
828
7.48M
    default: return 0;
829
13.3M
  }
830
13.3M
}
831
832
2.23M
void av1_modify_neighbor_predictor_for_obmc(MB_MODE_INFO *mbmi) {
833
2.23M
  mbmi->ref_frame[1] = NONE_FRAME;
834
2.23M
  mbmi->interinter_comp.type = COMPOUND_AVERAGE;
835
2.23M
}
836
837
struct obmc_inter_pred_ctxt {
838
  uint8_t **adjacent;
839
  int *adjacent_stride;
840
};
841
842
static INLINE void build_obmc_inter_pred_above(
843
    MACROBLOCKD *xd, int rel_mi_row, int rel_mi_col, uint8_t op_mi_size,
844
1.12M
    int dir, MB_MODE_INFO *above_mi, void *fun_ctxt, const int num_planes) {
845
1.12M
  (void)above_mi;
846
1.12M
  (void)rel_mi_row;
847
1.12M
  (void)dir;
848
1.12M
  struct obmc_inter_pred_ctxt *ctxt = (struct obmc_inter_pred_ctxt *)fun_ctxt;
849
1.12M
  const BLOCK_SIZE bsize = xd->mi[0]->bsize;
850
1.12M
  const int overlap =
851
1.12M
      AOMMIN(block_size_high[bsize], block_size_high[BLOCK_64X64]) >> 1;
852
853
4.50M
  for (int plane = 0; plane < num_planes; ++plane) {
854
3.37M
    const struct macroblockd_plane *pd = &xd->plane[plane];
855
3.37M
    const int bw = (op_mi_size * MI_SIZE) >> pd->subsampling_x;
856
3.37M
    const int bh = overlap >> pd->subsampling_y;
857
3.37M
    const int plane_col = (rel_mi_col * MI_SIZE) >> pd->subsampling_x;
858
859
3.37M
    if (av1_skip_u4x4_pred_in_obmc(bsize, pd, 0)) continue;
860
861
1.87M
    const int dst_stride = pd->dst.stride;
862
1.87M
    uint8_t *const dst = &pd->dst.buf[plane_col];
863
1.87M
    const int tmp_stride = ctxt->adjacent_stride[plane];
864
1.87M
    const uint8_t *const tmp = &ctxt->adjacent[plane][plane_col];
865
1.87M
    const uint8_t *const mask = av1_get_obmc_mask(bh);
866
1.87M
#if CONFIG_AV1_HIGHBITDEPTH
867
1.87M
    const int is_hbd = is_cur_buf_hbd(xd);
868
1.87M
    if (is_hbd)
869
303k
      aom_highbd_blend_a64_vmask(dst, dst_stride, dst, dst_stride, tmp,
870
303k
                                 tmp_stride, mask, bw, bh, xd->bd);
871
1.57M
    else
872
1.57M
      aom_blend_a64_vmask(dst, dst_stride, dst, dst_stride, tmp, tmp_stride,
873
1.57M
                          mask, bw, bh);
874
#else
875
    aom_blend_a64_vmask(dst, dst_stride, dst, dst_stride, tmp, tmp_stride, mask,
876
                        bw, bh);
877
#endif
878
1.87M
  }
879
1.12M
}
880
881
static INLINE void build_obmc_inter_pred_left(
882
    MACROBLOCKD *xd, int rel_mi_row, int rel_mi_col, uint8_t op_mi_size,
883
1.11M
    int dir, MB_MODE_INFO *left_mi, void *fun_ctxt, const int num_planes) {
884
1.11M
  (void)left_mi;
885
1.11M
  (void)rel_mi_col;
886
1.11M
  (void)dir;
887
1.11M
  struct obmc_inter_pred_ctxt *ctxt = (struct obmc_inter_pred_ctxt *)fun_ctxt;
888
1.11M
  const BLOCK_SIZE bsize = xd->mi[0]->bsize;
889
1.11M
  const int overlap =
890
1.11M
      AOMMIN(block_size_wide[bsize], block_size_wide[BLOCK_64X64]) >> 1;
891
892
4.44M
  for (int plane = 0; plane < num_planes; ++plane) {
893
3.33M
    const struct macroblockd_plane *pd = &xd->plane[plane];
894
3.33M
    const int bw = overlap >> pd->subsampling_x;
895
3.33M
    const int bh = (op_mi_size * MI_SIZE) >> pd->subsampling_y;
896
3.33M
    const int plane_row = (rel_mi_row * MI_SIZE) >> pd->subsampling_y;
897
898
3.33M
    if (av1_skip_u4x4_pred_in_obmc(bsize, pd, 1)) continue;
899
900
3.33M
    const int dst_stride = pd->dst.stride;
901
3.33M
    uint8_t *const dst = &pd->dst.buf[plane_row * dst_stride];
902
3.33M
    const int tmp_stride = ctxt->adjacent_stride[plane];
903
3.33M
    const uint8_t *const tmp = &ctxt->adjacent[plane][plane_row * tmp_stride];
904
3.33M
    const uint8_t *const mask = av1_get_obmc_mask(bw);
905
906
3.33M
#if CONFIG_AV1_HIGHBITDEPTH
907
3.33M
    const int is_hbd = is_cur_buf_hbd(xd);
908
3.33M
    if (is_hbd)
909
567k
      aom_highbd_blend_a64_hmask(dst, dst_stride, dst, dst_stride, tmp,
910
567k
                                 tmp_stride, mask, bw, bh, xd->bd);
911
2.76M
    else
912
2.76M
      aom_blend_a64_hmask(dst, dst_stride, dst, dst_stride, tmp, tmp_stride,
913
2.76M
                          mask, bw, bh);
914
#else
915
    aom_blend_a64_hmask(dst, dst_stride, dst, dst_stride, tmp, tmp_stride, mask,
916
                        bw, bh);
917
#endif
918
3.33M
  }
919
1.11M
}
920
921
// This function combines motion compensated predictions that are generated by
922
// top/left neighboring blocks' inter predictors with the regular inter
923
// prediction. We assume the original prediction (bmc) is stored in
924
// xd->plane[].dst.buf
925
void av1_build_obmc_inter_prediction(const AV1_COMMON *cm, MACROBLOCKD *xd,
926
                                     uint8_t *above[MAX_MB_PLANE],
927
                                     int above_stride[MAX_MB_PLANE],
928
                                     uint8_t *left[MAX_MB_PLANE],
929
1.09M
                                     int left_stride[MAX_MB_PLANE]) {
930
1.09M
  const BLOCK_SIZE bsize = xd->mi[0]->bsize;
931
932
  // handle above row
933
1.09M
  struct obmc_inter_pred_ctxt ctxt_above = { above, above_stride };
934
1.09M
  foreach_overlappable_nb_above(cm, xd,
935
1.09M
                                max_neighbor_obmc[mi_size_wide_log2[bsize]],
936
1.09M
                                build_obmc_inter_pred_above, &ctxt_above);
937
938
  // handle left column
939
1.09M
  struct obmc_inter_pred_ctxt ctxt_left = { left, left_stride };
940
1.09M
  foreach_overlappable_nb_left(cm, xd,
941
1.09M
                               max_neighbor_obmc[mi_size_high_log2[bsize]],
942
1.09M
                               build_obmc_inter_pred_left, &ctxt_left);
943
1.09M
}
944
945
void av1_setup_obmc_dst_bufs(MACROBLOCKD *xd, uint8_t **dst_buf1,
946
1.09M
                             uint8_t **dst_buf2) {
947
1.09M
  if (is_cur_buf_hbd(xd)) {
948
191k
    int len = sizeof(uint16_t);
949
191k
    dst_buf1[0] = CONVERT_TO_BYTEPTR(xd->tmp_obmc_bufs[0]);
950
191k
    dst_buf1[1] =
951
191k
        CONVERT_TO_BYTEPTR(xd->tmp_obmc_bufs[0] + MAX_SB_SQUARE * len);
952
191k
    dst_buf1[2] =
953
191k
        CONVERT_TO_BYTEPTR(xd->tmp_obmc_bufs[0] + MAX_SB_SQUARE * 2 * len);
954
191k
    dst_buf2[0] = CONVERT_TO_BYTEPTR(xd->tmp_obmc_bufs[1]);
955
191k
    dst_buf2[1] =
956
191k
        CONVERT_TO_BYTEPTR(xd->tmp_obmc_bufs[1] + MAX_SB_SQUARE * len);
957
191k
    dst_buf2[2] =
958
191k
        CONVERT_TO_BYTEPTR(xd->tmp_obmc_bufs[1] + MAX_SB_SQUARE * 2 * len);
959
898k
  } else {
960
898k
    dst_buf1[0] = xd->tmp_obmc_bufs[0];
961
898k
    dst_buf1[1] = xd->tmp_obmc_bufs[0] + MAX_SB_SQUARE;
962
898k
    dst_buf1[2] = xd->tmp_obmc_bufs[0] + MAX_SB_SQUARE * 2;
963
898k
    dst_buf2[0] = xd->tmp_obmc_bufs[1];
964
898k
    dst_buf2[1] = xd->tmp_obmc_bufs[1] + MAX_SB_SQUARE;
965
898k
    dst_buf2[2] = xd->tmp_obmc_bufs[1] + MAX_SB_SQUARE * 2;
966
898k
  }
967
1.09M
}
968
969
void av1_setup_build_prediction_by_above_pred(
970
    MACROBLOCKD *xd, int rel_mi_col, uint8_t above_mi_width,
971
    MB_MODE_INFO *above_mbmi, struct build_prediction_ctxt *ctxt,
972
1.12M
    const int num_planes) {
973
1.12M
  const BLOCK_SIZE a_bsize = AOMMAX(BLOCK_8X8, above_mbmi->bsize);
974
1.12M
  const int above_mi_col = xd->mi_col + rel_mi_col;
975
976
1.12M
  av1_modify_neighbor_predictor_for_obmc(above_mbmi);
977
978
4.49M
  for (int j = 0; j < num_planes; ++j) {
979
3.37M
    struct macroblockd_plane *const pd = &xd->plane[j];
980
3.37M
    setup_pred_plane(&pd->dst, a_bsize, ctxt->tmp_buf[j], ctxt->tmp_width[j],
981
3.37M
                     ctxt->tmp_height[j], ctxt->tmp_stride[j], 0, rel_mi_col,
982
3.37M
                     NULL, pd->subsampling_x, pd->subsampling_y);
983
3.37M
  }
984
985
1.12M
  const int num_refs = 1 + has_second_ref(above_mbmi);
986
987
2.25M
  for (int ref = 0; ref < num_refs; ++ref) {
988
1.12M
    const MV_REFERENCE_FRAME frame = above_mbmi->ref_frame[ref];
989
990
1.12M
    const RefCntBuffer *const ref_buf = get_ref_frame_buf(ctxt->cm, frame);
991
1.12M
    const struct scale_factors *const sf =
992
1.12M
        get_ref_scale_factors_const(ctxt->cm, frame);
993
1.12M
    xd->block_ref_scale_factors[ref] = sf;
994
1.12M
    if ((!av1_is_valid_scale(sf)))
995
0
      aom_internal_error(xd->error_info, AOM_CODEC_UNSUP_BITSTREAM,
996
0
                         "Reference frame has invalid dimensions");
997
1.12M
    av1_setup_pre_planes(xd, ref, &ref_buf->buf, xd->mi_row, above_mi_col, sf,
998
1.12M
                         num_planes);
999
1.12M
  }
1000
1001
1.12M
  xd->mb_to_left_edge = 8 * MI_SIZE * (-above_mi_col);
1002
1.12M
  xd->mb_to_right_edge =
1003
1.12M
      ctxt->mb_to_far_edge +
1004
1.12M
      (xd->width - rel_mi_col - above_mi_width) * MI_SIZE * 8;
1005
1.12M
}
1006
1007
void av1_setup_build_prediction_by_left_pred(MACROBLOCKD *xd, int rel_mi_row,
1008
                                             uint8_t left_mi_height,
1009
                                             MB_MODE_INFO *left_mbmi,
1010
                                             struct build_prediction_ctxt *ctxt,
1011
1.11M
                                             const int num_planes) {
1012
1.11M
  const BLOCK_SIZE l_bsize = AOMMAX(BLOCK_8X8, left_mbmi->bsize);
1013
1.11M
  const int left_mi_row = xd->mi_row + rel_mi_row;
1014
1015
1.11M
  av1_modify_neighbor_predictor_for_obmc(left_mbmi);
1016
1017
4.44M
  for (int j = 0; j < num_planes; ++j) {
1018
3.33M
    struct macroblockd_plane *const pd = &xd->plane[j];
1019
3.33M
    setup_pred_plane(&pd->dst, l_bsize, ctxt->tmp_buf[j], ctxt->tmp_width[j],
1020
3.33M
                     ctxt->tmp_height[j], ctxt->tmp_stride[j], rel_mi_row, 0,
1021
3.33M
                     NULL, pd->subsampling_x, pd->subsampling_y);
1022
3.33M
  }
1023
1024
1.11M
  const int num_refs = 1 + has_second_ref(left_mbmi);
1025
1026
2.22M
  for (int ref = 0; ref < num_refs; ++ref) {
1027
1.11M
    const MV_REFERENCE_FRAME frame = left_mbmi->ref_frame[ref];
1028
1029
1.11M
    const RefCntBuffer *const ref_buf = get_ref_frame_buf(ctxt->cm, frame);
1030
1.11M
    const struct scale_factors *const ref_scale_factors =
1031
1.11M
        get_ref_scale_factors_const(ctxt->cm, frame);
1032
1033
1.11M
    xd->block_ref_scale_factors[ref] = ref_scale_factors;
1034
1.11M
    if ((!av1_is_valid_scale(ref_scale_factors)))
1035
0
      aom_internal_error(xd->error_info, AOM_CODEC_UNSUP_BITSTREAM,
1036
0
                         "Reference frame has invalid dimensions");
1037
1.11M
    av1_setup_pre_planes(xd, ref, &ref_buf->buf, left_mi_row, xd->mi_col,
1038
1.11M
                         ref_scale_factors, num_planes);
1039
1.11M
  }
1040
1041
1.11M
  xd->mb_to_top_edge = GET_MV_SUBPEL(MI_SIZE * (-left_mi_row));
1042
1.11M
  xd->mb_to_bottom_edge =
1043
1.11M
      ctxt->mb_to_far_edge +
1044
1.11M
      GET_MV_SUBPEL((xd->height - rel_mi_row - left_mi_height) * MI_SIZE);
1045
1.11M
}
1046
1047
static AOM_INLINE void combine_interintra(
1048
    INTERINTRA_MODE mode, int8_t use_wedge_interintra, int8_t wedge_index,
1049
    int8_t wedge_sign, BLOCK_SIZE bsize, BLOCK_SIZE plane_bsize,
1050
    uint8_t *comppred, int compstride, const uint8_t *interpred,
1051
878k
    int interstride, const uint8_t *intrapred, int intrastride) {
1052
878k
  const int bw = block_size_wide[plane_bsize];
1053
878k
  const int bh = block_size_high[plane_bsize];
1054
1055
878k
  if (use_wedge_interintra) {
1056
230k
    if (av1_is_wedge_used(bsize)) {
1057
230k
      const uint8_t *mask =
1058
230k
          av1_get_contiguous_soft_mask(wedge_index, wedge_sign, bsize);
1059
230k
      const int subw = 2 * mi_size_wide[bsize] == bw;
1060
230k
      const int subh = 2 * mi_size_high[bsize] == bh;
1061
230k
      aom_blend_a64_mask(comppred, compstride, intrapred, intrastride,
1062
230k
                         interpred, interstride, mask, block_size_wide[bsize],
1063
230k
                         bw, bh, subw, subh);
1064
230k
    }
1065
230k
    return;
1066
230k
  }
1067
1068
648k
  const uint8_t *mask = smooth_interintra_mask_buf[mode][plane_bsize];
1069
648k
  aom_blend_a64_mask(comppred, compstride, intrapred, intrastride, interpred,
1070
648k
                     interstride, mask, bw, bw, bh, 0, 0);
1071
648k
}
1072
1073
#if CONFIG_AV1_HIGHBITDEPTH
1074
static AOM_INLINE void combine_interintra_highbd(
1075
    INTERINTRA_MODE mode, int8_t use_wedge_interintra, int8_t wedge_index,
1076
    int8_t wedge_sign, BLOCK_SIZE bsize, BLOCK_SIZE plane_bsize,
1077
    uint8_t *comppred8, int compstride, const uint8_t *interpred8,
1078
381k
    int interstride, const uint8_t *intrapred8, int intrastride, int bd) {
1079
381k
  const int bw = block_size_wide[plane_bsize];
1080
381k
  const int bh = block_size_high[plane_bsize];
1081
1082
381k
  if (use_wedge_interintra) {
1083
129k
    if (av1_is_wedge_used(bsize)) {
1084
129k
      const uint8_t *mask =
1085
129k
          av1_get_contiguous_soft_mask(wedge_index, wedge_sign, bsize);
1086
129k
      const int subh = 2 * mi_size_high[bsize] == bh;
1087
129k
      const int subw = 2 * mi_size_wide[bsize] == bw;
1088
129k
      aom_highbd_blend_a64_mask(comppred8, compstride, intrapred8, intrastride,
1089
129k
                                interpred8, interstride, mask,
1090
129k
                                block_size_wide[bsize], bw, bh, subw, subh, bd);
1091
129k
    }
1092
129k
    return;
1093
129k
  }
1094
1095
251k
  uint8_t mask[MAX_SB_SQUARE];
1096
251k
  build_smooth_interintra_mask(mask, bw, plane_bsize, mode);
1097
251k
  aom_highbd_blend_a64_mask(comppred8, compstride, intrapred8, intrastride,
1098
251k
                            interpred8, interstride, mask, bw, bw, bh, 0, 0,
1099
251k
                            bd);
1100
251k
}
1101
#endif
1102
1103
void av1_build_intra_predictors_for_interintra(const AV1_COMMON *cm,
1104
                                               MACROBLOCKD *xd,
1105
                                               BLOCK_SIZE bsize, int plane,
1106
                                               const BUFFER_SET *ctx,
1107
1.26M
                                               uint8_t *dst, int dst_stride) {
1108
1.26M
  struct macroblockd_plane *const pd = &xd->plane[plane];
1109
1.26M
  const int ssx = xd->plane[plane].subsampling_x;
1110
1.26M
  const int ssy = xd->plane[plane].subsampling_y;
1111
1.26M
  BLOCK_SIZE plane_bsize = get_plane_block_size(bsize, ssx, ssy);
1112
1.26M
  PREDICTION_MODE mode = interintra_to_intra_mode[xd->mi[0]->interintra_mode];
1113
1.26M
  assert(xd->mi[0]->angle_delta[PLANE_TYPE_Y] == 0);
1114
0
  assert(xd->mi[0]->angle_delta[PLANE_TYPE_UV] == 0);
1115
0
  assert(xd->mi[0]->filter_intra_mode_info.use_filter_intra == 0);
1116
0
  assert(xd->mi[0]->use_intrabc == 0);
1117
0
  const SequenceHeader *seq_params = cm->seq_params;
1118
1119
1.26M
  av1_predict_intra_block(xd, seq_params->sb_size,
1120
1.26M
                          seq_params->enable_intra_edge_filter, pd->width,
1121
1.26M
                          pd->height, max_txsize_rect_lookup[plane_bsize], mode,
1122
1.26M
                          0, 0, FILTER_INTRA_MODES, ctx->plane[plane],
1123
1.26M
                          ctx->stride[plane], dst, dst_stride, 0, 0, plane);
1124
1.26M
}
1125
1126
void av1_combine_interintra(MACROBLOCKD *xd, BLOCK_SIZE bsize, int plane,
1127
                            const uint8_t *inter_pred, int inter_stride,
1128
1.26M
                            const uint8_t *intra_pred, int intra_stride) {
1129
1.26M
  const int ssx = xd->plane[plane].subsampling_x;
1130
1.26M
  const int ssy = xd->plane[plane].subsampling_y;
1131
1.26M
  const BLOCK_SIZE plane_bsize = get_plane_block_size(bsize, ssx, ssy);
1132
1.26M
#if CONFIG_AV1_HIGHBITDEPTH
1133
1.26M
  if (is_cur_buf_hbd(xd)) {
1134
381k
    combine_interintra_highbd(
1135
381k
        xd->mi[0]->interintra_mode, xd->mi[0]->use_wedge_interintra,
1136
381k
        xd->mi[0]->interintra_wedge_index, INTERINTRA_WEDGE_SIGN, bsize,
1137
381k
        plane_bsize, xd->plane[plane].dst.buf, xd->plane[plane].dst.stride,
1138
381k
        inter_pred, inter_stride, intra_pred, intra_stride, xd->bd);
1139
381k
    return;
1140
381k
  }
1141
879k
#endif
1142
879k
  combine_interintra(
1143
879k
      xd->mi[0]->interintra_mode, xd->mi[0]->use_wedge_interintra,
1144
879k
      xd->mi[0]->interintra_wedge_index, INTERINTRA_WEDGE_SIGN, bsize,
1145
879k
      plane_bsize, xd->plane[plane].dst.buf, xd->plane[plane].dst.stride,
1146
879k
      inter_pred, inter_stride, intra_pred, intra_stride);
1147
879k
}
1148
1149
// build interintra_predictors for one plane
1150
void av1_build_interintra_predictor(const AV1_COMMON *cm, MACROBLOCKD *xd,
1151
                                    uint8_t *pred, int stride,
1152
                                    const BUFFER_SET *ctx, int plane,
1153
1.26M
                                    BLOCK_SIZE bsize) {
1154
1.26M
  assert(bsize < BLOCK_SIZES_ALL);
1155
1.26M
  if (is_cur_buf_hbd(xd)) {
1156
381k
    DECLARE_ALIGNED(16, uint16_t, intrapredictor[MAX_SB_SQUARE]);
1157
381k
    av1_build_intra_predictors_for_interintra(
1158
381k
        cm, xd, bsize, plane, ctx, CONVERT_TO_BYTEPTR(intrapredictor),
1159
381k
        MAX_SB_SIZE);
1160
381k
    av1_combine_interintra(xd, bsize, plane, pred, stride,
1161
381k
                           CONVERT_TO_BYTEPTR(intrapredictor), MAX_SB_SIZE);
1162
879k
  } else {
1163
879k
    DECLARE_ALIGNED(16, uint8_t, intrapredictor[MAX_SB_SQUARE]);
1164
879k
    av1_build_intra_predictors_for_interintra(cm, xd, bsize, plane, ctx,
1165
879k
                                              intrapredictor, MAX_SB_SIZE);
1166
879k
    av1_combine_interintra(xd, bsize, plane, pred, stride, intrapredictor,
1167
879k
                           MAX_SB_SIZE);
1168
879k
  }
1169
1.26M
}