Coverage Report

Created: 2026-06-30 06:53

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/aom/av1/common/reconinter.c
Line
Count
Source
1
/*
2
 * Copyright (c) 2016, Alliance for Open Media. All rights reserved.
3
 *
4
 * This source code is subject to the terms of the BSD 2 Clause License and
5
 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6
 * was not distributed with this source code in the LICENSE file, you can
7
 * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8
 * Media Patent License 1.0 was not distributed with this source code in the
9
 * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
10
 */
11
12
#include <assert.h>
13
#include <stdio.h>
14
#include <limits.h>
15
16
#include "config/aom_config.h"
17
#include "config/aom_dsp_rtcd.h"
18
#include "config/aom_scale_rtcd.h"
19
20
#include "aom/aom_integer.h"
21
#include "aom_dsp/blend.h"
22
23
#include "av1/common/av1_common_int.h"
24
#include "av1/common/blockd.h"
25
#include "av1/common/mvref_common.h"
26
#include "av1/common/obmc.h"
27
#include "av1/common/reconinter.h"
28
#include "av1/common/reconintra.h"
29
30
// This function will determine whether or not to create a warped
31
// prediction.
32
static int allow_warp(const MB_MODE_INFO *const mbmi,
33
                      const WarpTypesAllowed *const warp_types,
34
                      const WarpedMotionParams *const gm_params,
35
                      int build_for_obmc, const struct scale_factors *const sf,
36
5.24M
                      WarpedMotionParams *final_warp_params) {
37
  // Note: As per the spec, we must test the fixed point scales here, which are
38
  // at a higher precision (1 << 14) than the xs and ys in subpel_params (that
39
  // have 1 << 10 precision).
40
5.24M
  if (av1_is_scaled(sf)) return 0;
41
42
4.84M
  if (final_warp_params != NULL) *final_warp_params = default_warp_params;
43
44
4.84M
  if (build_for_obmc) return 0;
45
46
4.84M
  if (warp_types->local_warp_allowed && !mbmi->wm_params.invalid) {
47
348k
    if (final_warp_params != NULL) *final_warp_params = mbmi->wm_params;
48
348k
    return 1;
49
4.49M
  } else if (warp_types->global_warp_allowed && !gm_params->invalid) {
50
109k
    if (final_warp_params != NULL) *final_warp_params = *gm_params;
51
109k
    return 1;
52
109k
  }
53
54
4.38M
  return 0;
55
4.84M
}
56
57
void av1_init_warp_params(InterPredParams *inter_pred_params,
58
                          const WarpTypesAllowed *warp_types, int ref,
59
9.68M
                          const MACROBLOCKD *xd, const MB_MODE_INFO *mi) {
60
9.68M
  if (inter_pred_params->block_height < 8 || inter_pred_params->block_width < 8)
61
4.28M
    return;
62
63
5.39M
  if (xd->cur_frame_force_integer_mv) return;
64
65
5.23M
  if (allow_warp(mi, warp_types, &xd->global_motion[mi->ref_frame[ref]], 0,
66
5.23M
                 inter_pred_params->scale_factors,
67
5.23M
                 &inter_pred_params->warp_params)) {
68
#if CONFIG_REALTIME_ONLY && !CONFIG_AV1_DECODER
69
    aom_internal_error(xd->error_info, AOM_CODEC_UNSUP_FEATURE,
70
                       "Warped motion is disabled in realtime only build.");
71
#endif  // CONFIG_REALTIME_ONLY && !CONFIG_AV1_DECODER
72
458k
    inter_pred_params->mode = WARP_PRED;
73
458k
  }
74
5.23M
}
75
76
void av1_make_inter_predictor(const uint8_t *src, int src_stride, uint8_t *dst,
77
                              int dst_stride,
78
                              InterPredParams *inter_pred_params,
79
13.6M
                              const SubpelParams *subpel_params) {
80
13.6M
  assert(IMPLIES(inter_pred_params->conv_params.is_compound,
81
13.6M
                 inter_pred_params->conv_params.dst != NULL));
82
83
13.6M
  if (inter_pred_params->mode == TRANSLATION_PRED) {
84
13.2M
#if CONFIG_AV1_HIGHBITDEPTH
85
13.2M
    if (inter_pred_params->use_hbd_buf) {
86
7.24M
      highbd_inter_predictor(src, src_stride, dst, dst_stride, subpel_params,
87
7.24M
                             inter_pred_params->block_width,
88
7.24M
                             inter_pred_params->block_height,
89
7.24M
                             &inter_pred_params->conv_params,
90
7.24M
                             inter_pred_params->interp_filter_params,
91
7.24M
                             inter_pred_params->bit_depth);
92
7.24M
    } else {
93
5.95M
      inter_predictor(src, src_stride, dst, dst_stride, subpel_params,
94
5.95M
                      inter_pred_params->block_width,
95
5.95M
                      inter_pred_params->block_height,
96
5.95M
                      &inter_pred_params->conv_params,
97
5.95M
                      inter_pred_params->interp_filter_params);
98
5.95M
    }
99
#else
100
    inter_predictor(src, src_stride, dst, dst_stride, subpel_params,
101
                    inter_pred_params->block_width,
102
                    inter_pred_params->block_height,
103
                    &inter_pred_params->conv_params,
104
                    inter_pred_params->interp_filter_params);
105
#endif
106
13.2M
  }
107
455k
#if !CONFIG_REALTIME_ONLY || CONFIG_AV1_DECODER
108
  // TODO(jingning): av1_warp_plane() can be further cleaned up.
109
458k
  else if (inter_pred_params->mode == WARP_PRED) {
110
458k
    av1_warp_plane(
111
458k
        &inter_pred_params->warp_params, inter_pred_params->use_hbd_buf,
112
458k
        inter_pred_params->bit_depth, inter_pred_params->ref_frame_buf.buf0,
113
458k
        inter_pred_params->ref_frame_buf.width,
114
458k
        inter_pred_params->ref_frame_buf.height,
115
458k
        inter_pred_params->ref_frame_buf.stride, dst,
116
458k
        inter_pred_params->pix_col, inter_pred_params->pix_row,
117
458k
        inter_pred_params->block_width, inter_pred_params->block_height,
118
458k
        dst_stride, inter_pred_params->subsampling_x,
119
458k
        inter_pred_params->subsampling_y, &inter_pred_params->conv_params);
120
458k
  }
121
18.4E
#endif  // !CONFIG_REALTIME_ONLY || CONFIG_AV1_DECODER
122
18.4E
  else {
123
18.4E
    assert(0 && "Unsupported inter_pred_params->mode");
124
18.4E
  }
125
13.6M
}
126
127
/* clang-format off */
128
DECLARE_ALIGNED(16, static const uint8_t,
129
                wedge_signflip_lookup[BLOCK_SIZES_ALL][MAX_WEDGE_TYPES]) = {
130
  { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, },  // not used
131
  { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, },  // not used
132
  { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, },  // not used
133
  { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, },
134
  { 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, },
135
  { 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, },
136
  { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, },
137
  { 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, },
138
  { 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, },
139
  { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, },
140
  { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, },  // not used
141
  { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, },  // not used
142
  { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, },  // not used
143
  { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, },  // not used
144
  { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, },  // not used
145
  { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, },  // not used
146
  { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, },  // not used
147
  { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, },  // not used
148
  { 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 0, 1, 1, 1, 0, 1, },
149
  { 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 0, 1, 0, 1, },
150
  { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, },  // not used
151
  { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, },  // not used
152
};
153
/* clang-format on */
154
155
// The wedge / inter-intra mask buffers used to be writable .data populated
156
// by init_all_wedge_masks() at runtime, which gave every process its own
157
// private COW copy. They are now precomputed at codegen time (see
158
// tools/gen_wedge_masks_data.py) and included here as const data so they
159
// live in .rdata and are shared across processes.
160
//
161
// `wedge_masks` stores byte offsets into `wedge_mask_buf` (not pointers) to
162
// avoid loader relocations that would otherwise dirty the .rdata pages.
163
#include "av1/common/wedge_masks_data.inc"
164
165
static const wedge_code_type wedge_codebook_16_hgtw[16] = {
166
  { WEDGE_OBLIQUE27, 4, 4 },  { WEDGE_OBLIQUE63, 4, 4 },
167
  { WEDGE_OBLIQUE117, 4, 4 }, { WEDGE_OBLIQUE153, 4, 4 },
168
  { WEDGE_HORIZONTAL, 4, 2 }, { WEDGE_HORIZONTAL, 4, 4 },
169
  { WEDGE_HORIZONTAL, 4, 6 }, { WEDGE_VERTICAL, 4, 4 },
170
  { WEDGE_OBLIQUE27, 4, 2 },  { WEDGE_OBLIQUE27, 4, 6 },
171
  { WEDGE_OBLIQUE153, 4, 2 }, { WEDGE_OBLIQUE153, 4, 6 },
172
  { WEDGE_OBLIQUE63, 2, 4 },  { WEDGE_OBLIQUE63, 6, 4 },
173
  { WEDGE_OBLIQUE117, 2, 4 }, { WEDGE_OBLIQUE117, 6, 4 },
174
};
175
176
static const wedge_code_type wedge_codebook_16_hltw[16] = {
177
  { WEDGE_OBLIQUE27, 4, 4 },  { WEDGE_OBLIQUE63, 4, 4 },
178
  { WEDGE_OBLIQUE117, 4, 4 }, { WEDGE_OBLIQUE153, 4, 4 },
179
  { WEDGE_VERTICAL, 2, 4 },   { WEDGE_VERTICAL, 4, 4 },
180
  { WEDGE_VERTICAL, 6, 4 },   { WEDGE_HORIZONTAL, 4, 4 },
181
  { WEDGE_OBLIQUE27, 4, 2 },  { WEDGE_OBLIQUE27, 4, 6 },
182
  { WEDGE_OBLIQUE153, 4, 2 }, { WEDGE_OBLIQUE153, 4, 6 },
183
  { WEDGE_OBLIQUE63, 2, 4 },  { WEDGE_OBLIQUE63, 6, 4 },
184
  { WEDGE_OBLIQUE117, 2, 4 }, { WEDGE_OBLIQUE117, 6, 4 },
185
};
186
187
static const wedge_code_type wedge_codebook_16_heqw[16] = {
188
  { WEDGE_OBLIQUE27, 4, 4 },  { WEDGE_OBLIQUE63, 4, 4 },
189
  { WEDGE_OBLIQUE117, 4, 4 }, { WEDGE_OBLIQUE153, 4, 4 },
190
  { WEDGE_HORIZONTAL, 4, 2 }, { WEDGE_HORIZONTAL, 4, 6 },
191
  { WEDGE_VERTICAL, 2, 4 },   { WEDGE_VERTICAL, 6, 4 },
192
  { WEDGE_OBLIQUE27, 4, 2 },  { WEDGE_OBLIQUE27, 4, 6 },
193
  { WEDGE_OBLIQUE153, 4, 2 }, { WEDGE_OBLIQUE153, 4, 6 },
194
  { WEDGE_OBLIQUE63, 2, 4 },  { WEDGE_OBLIQUE63, 6, 4 },
195
  { WEDGE_OBLIQUE117, 2, 4 }, { WEDGE_OBLIQUE117, 6, 4 },
196
};
197
198
const wedge_params_type av1_wedge_params_lookup[BLOCK_SIZES_ALL] = {
199
  { 0, NULL, NULL, NULL },
200
  { 0, NULL, NULL, NULL },
201
  { 0, NULL, NULL, NULL },
202
  { MAX_WEDGE_TYPES, wedge_codebook_16_heqw, wedge_signflip_lookup[BLOCK_8X8],
203
    wedge_masks[BLOCK_8X8] },
204
  { MAX_WEDGE_TYPES, wedge_codebook_16_hgtw, wedge_signflip_lookup[BLOCK_8X16],
205
    wedge_masks[BLOCK_8X16] },
206
  { MAX_WEDGE_TYPES, wedge_codebook_16_hltw, wedge_signflip_lookup[BLOCK_16X8],
207
    wedge_masks[BLOCK_16X8] },
208
  { MAX_WEDGE_TYPES, wedge_codebook_16_heqw, wedge_signflip_lookup[BLOCK_16X16],
209
    wedge_masks[BLOCK_16X16] },
210
  { MAX_WEDGE_TYPES, wedge_codebook_16_hgtw, wedge_signflip_lookup[BLOCK_16X32],
211
    wedge_masks[BLOCK_16X32] },
212
  { MAX_WEDGE_TYPES, wedge_codebook_16_hltw, wedge_signflip_lookup[BLOCK_32X16],
213
    wedge_masks[BLOCK_32X16] },
214
  { MAX_WEDGE_TYPES, wedge_codebook_16_heqw, wedge_signflip_lookup[BLOCK_32X32],
215
    wedge_masks[BLOCK_32X32] },
216
  { 0, NULL, NULL, NULL },
217
  { 0, NULL, NULL, NULL },
218
  { 0, NULL, NULL, NULL },
219
  { 0, NULL, NULL, NULL },
220
  { 0, NULL, NULL, NULL },
221
  { 0, NULL, NULL, NULL },
222
  { 0, NULL, NULL, NULL },
223
  { 0, NULL, NULL, NULL },
224
  { MAX_WEDGE_TYPES, wedge_codebook_16_hgtw, wedge_signflip_lookup[BLOCK_8X32],
225
    wedge_masks[BLOCK_8X32] },
226
  { MAX_WEDGE_TYPES, wedge_codebook_16_hltw, wedge_signflip_lookup[BLOCK_32X8],
227
    wedge_masks[BLOCK_32X8] },
228
  { 0, NULL, NULL, NULL },
229
  { 0, NULL, NULL, NULL },
230
};
231
232
const uint8_t *av1_get_contiguous_soft_mask(int8_t wedge_index,
233
                                            int8_t wedge_sign,
234
350k
                                            BLOCK_SIZE sb_type) {
235
350k
  return wedge_mask_buf +
236
350k
         av1_wedge_params_lookup[sb_type].masks[wedge_sign][wedge_index];
237
350k
}
238
239
const uint8_t *av1_get_compound_type_mask(
240
345k
    const INTERINTER_COMPOUND_DATA *const comp_data, BLOCK_SIZE sb_type) {
241
345k
  (void)sb_type;
242
345k
  switch (comp_data->type) {
243
131k
    case COMPOUND_WEDGE:
244
131k
      return av1_get_contiguous_soft_mask(comp_data->wedge_index,
245
131k
                                          comp_data->wedge_sign, sb_type);
246
214k
    default: return comp_data->seg_mask;
247
345k
  }
248
345k
}
249
250
static inline void diffwtd_mask_d16(uint8_t *mask, int which_inverse,
251
                                    int mask_base, const CONV_BUF_TYPE *src0,
252
                                    int src0_stride, const CONV_BUF_TYPE *src1,
253
                                    int src1_stride, int h, int w,
254
0
                                    ConvolveParams *conv_params, int bd) {
255
0
  int round =
256
0
      2 * FILTER_BITS - conv_params->round_0 - conv_params->round_1 + (bd - 8);
257
0
  int i, j, m, diff;
258
0
  for (i = 0; i < h; ++i) {
259
0
    for (j = 0; j < w; ++j) {
260
0
      diff = abs(src0[i * src0_stride + j] - src1[i * src1_stride + j]);
261
0
      diff = ROUND_POWER_OF_TWO(diff, round);
262
0
      m = clamp(mask_base + (diff / DIFF_FACTOR), 0, AOM_BLEND_A64_MAX_ALPHA);
263
0
      mask[i * w + j] = which_inverse ? AOM_BLEND_A64_MAX_ALPHA - m : m;
264
0
    }
265
0
  }
266
0
}
267
268
void av1_build_compound_diffwtd_mask_d16_c(
269
    uint8_t *mask, DIFFWTD_MASK_TYPE mask_type, const CONV_BUF_TYPE *src0,
270
    int src0_stride, const CONV_BUF_TYPE *src1, int src1_stride, int h, int w,
271
0
    ConvolveParams *conv_params, int bd) {
272
0
  switch (mask_type) {
273
0
    case DIFFWTD_38:
274
0
      diffwtd_mask_d16(mask, 0, 38, src0, src0_stride, src1, src1_stride, h, w,
275
0
                       conv_params, bd);
276
0
      break;
277
0
    case DIFFWTD_38_INV:
278
0
      diffwtd_mask_d16(mask, 1, 38, src0, src0_stride, src1, src1_stride, h, w,
279
0
                       conv_params, bd);
280
0
      break;
281
0
    default: assert(0);
282
0
  }
283
0
}
284
285
static inline void diffwtd_mask(uint8_t *mask, int which_inverse, int mask_base,
286
                                const uint8_t *src0, int src0_stride,
287
                                const uint8_t *src1, int src1_stride, int h,
288
0
                                int w) {
289
0
  int i, j, m, diff;
290
0
  for (i = 0; i < h; ++i) {
291
0
    for (j = 0; j < w; ++j) {
292
0
      diff =
293
0
          abs((int)src0[i * src0_stride + j] - (int)src1[i * src1_stride + j]);
294
0
      m = clamp(mask_base + (diff / DIFF_FACTOR), 0, AOM_BLEND_A64_MAX_ALPHA);
295
0
      mask[i * w + j] = which_inverse ? AOM_BLEND_A64_MAX_ALPHA - m : m;
296
0
    }
297
0
  }
298
0
}
299
300
void av1_build_compound_diffwtd_mask_c(uint8_t *mask,
301
                                       DIFFWTD_MASK_TYPE mask_type,
302
                                       const uint8_t *src0, int src0_stride,
303
                                       const uint8_t *src1, int src1_stride,
304
0
                                       int h, int w) {
305
0
  switch (mask_type) {
306
0
    case DIFFWTD_38:
307
0
      diffwtd_mask(mask, 0, 38, src0, src0_stride, src1, src1_stride, h, w);
308
0
      break;
309
0
    case DIFFWTD_38_INV:
310
0
      diffwtd_mask(mask, 1, 38, src0, src0_stride, src1, src1_stride, h, w);
311
0
      break;
312
0
    default: assert(0);
313
0
  }
314
0
}
315
316
#if CONFIG_AV1_HIGHBITDEPTH
317
static AOM_FORCE_INLINE void diffwtd_mask_highbd(
318
    uint8_t *mask, int which_inverse, int mask_base, const uint16_t *src0,
319
    int src0_stride, const uint16_t *src1, int src1_stride, int h, int w,
320
0
    const unsigned int bd) {
321
0
  assert(bd >= 8);
322
0
  if (bd == 8) {
323
0
    if (which_inverse) {
324
0
      for (int i = 0; i < h; ++i) {
325
0
        for (int j = 0; j < w; ++j) {
326
0
          int diff = abs((int)src0[j] - (int)src1[j]) / DIFF_FACTOR;
327
0
          unsigned int m = negative_to_zero(mask_base + diff);
328
0
          m = AOMMIN(m, AOM_BLEND_A64_MAX_ALPHA);
329
0
          mask[j] = AOM_BLEND_A64_MAX_ALPHA - m;
330
0
        }
331
0
        src0 += src0_stride;
332
0
        src1 += src1_stride;
333
0
        mask += w;
334
0
      }
335
0
    } else {
336
0
      for (int i = 0; i < h; ++i) {
337
0
        for (int j = 0; j < w; ++j) {
338
0
          int diff = abs((int)src0[j] - (int)src1[j]) / DIFF_FACTOR;
339
0
          unsigned int m = negative_to_zero(mask_base + diff);
340
0
          m = AOMMIN(m, AOM_BLEND_A64_MAX_ALPHA);
341
0
          mask[j] = m;
342
0
        }
343
0
        src0 += src0_stride;
344
0
        src1 += src1_stride;
345
0
        mask += w;
346
0
      }
347
0
    }
348
0
  } else {
349
0
    const unsigned int bd_shift = bd - 8;
350
0
    if (which_inverse) {
351
0
      for (int i = 0; i < h; ++i) {
352
0
        for (int j = 0; j < w; ++j) {
353
0
          int diff =
354
0
              (abs((int)src0[j] - (int)src1[j]) >> bd_shift) / DIFF_FACTOR;
355
0
          unsigned int m = negative_to_zero(mask_base + diff);
356
0
          m = AOMMIN(m, AOM_BLEND_A64_MAX_ALPHA);
357
0
          mask[j] = AOM_BLEND_A64_MAX_ALPHA - m;
358
0
        }
359
0
        src0 += src0_stride;
360
0
        src1 += src1_stride;
361
0
        mask += w;
362
0
      }
363
0
    } else {
364
0
      for (int i = 0; i < h; ++i) {
365
0
        for (int j = 0; j < w; ++j) {
366
0
          int diff =
367
0
              (abs((int)src0[j] - (int)src1[j]) >> bd_shift) / DIFF_FACTOR;
368
0
          unsigned int m = negative_to_zero(mask_base + diff);
369
0
          m = AOMMIN(m, AOM_BLEND_A64_MAX_ALPHA);
370
0
          mask[j] = m;
371
0
        }
372
0
        src0 += src0_stride;
373
0
        src1 += src1_stride;
374
0
        mask += w;
375
0
      }
376
0
    }
377
0
  }
378
0
}
379
380
void av1_build_compound_diffwtd_mask_highbd_c(
381
    uint8_t *mask, DIFFWTD_MASK_TYPE mask_type, const uint8_t *src0,
382
    int src0_stride, const uint8_t *src1, int src1_stride, int h, int w,
383
0
    int bd) {
384
0
  switch (mask_type) {
385
0
    case DIFFWTD_38:
386
0
      diffwtd_mask_highbd(mask, 0, 38, CONVERT_TO_SHORTPTR(src0), src0_stride,
387
0
                          CONVERT_TO_SHORTPTR(src1), src1_stride, h, w, bd);
388
0
      break;
389
0
    case DIFFWTD_38_INV:
390
0
      diffwtd_mask_highbd(mask, 1, 38, CONVERT_TO_SHORTPTR(src0), src0_stride,
391
0
                          CONVERT_TO_SHORTPTR(src1), src1_stride, h, w, bd);
392
0
      break;
393
0
    default: assert(0);
394
0
  }
395
0
}
396
#endif  // CONFIG_AV1_HIGHBITDEPTH
397
398
/* clang-format off */
399
#if CONFIG_AV1_HIGHBITDEPTH
400
static const uint8_t ii_weights1d[MAX_SB_SIZE] = {
401
  60, 58, 56, 54, 52, 50, 48, 47, 45, 44, 42, 41, 39, 38, 37, 35, 34, 33, 32,
402
  31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 22, 21, 20, 19, 19, 18, 18, 17, 16,
403
  16, 15, 15, 14, 14, 13, 13, 12, 12, 12, 11, 11, 10, 10, 10,  9,  9,  9,  8,
404
  8,  8,  8,  7,  7,  7,  7,  6,  6,  6,  6,  6,  5,  5,  5,  5,  5,  4,  4,
405
  4,  4,  4,  4,  4,  4,  3,  3,  3,  3,  3,  3,  3,  3,  3,  2,  2,  2,  2,
406
  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  1,  1,  1,  1,  1,  1,  1,  1,
407
  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1
408
};
409
static const uint8_t ii_size_scales[BLOCK_SIZES_ALL] = {
410
    32, 16, 16, 16, 8, 8, 8, 4,
411
    4,  4,  2,  2,  2, 1, 1, 1,
412
    8,  8,  4,  4,  2, 2
413
};
414
#endif  // CONFIG_AV1_HIGHBITDEPTH
415
/* clang-format on */
416
417
#if CONFIG_AV1_HIGHBITDEPTH
418
// Used at runtime by combine_interintra_highbd() below for a per-call
419
// stack-allocated mask. The block-size-major precomputed
420
// `smooth_interintra_mask_buf` only covers block sizes with bw,bh <=
421
// MAX_WEDGE_SIZE; the highbd path may need larger sizes.
422
static inline void build_smooth_interintra_mask(uint8_t *mask, int stride,
423
                                                BLOCK_SIZE plane_bsize,
424
276k
                                                INTERINTRA_MODE mode) {
425
276k
  int i, j;
426
276k
  const int bw = block_size_wide[plane_bsize];
427
276k
  const int bh = block_size_high[plane_bsize];
428
276k
  const int size_scale = ii_size_scales[plane_bsize];
429
430
276k
  switch (mode) {
431
56.3k
    case II_V_PRED:
432
606k
      for (i = 0; i < bh; ++i) {
433
550k
        memset(mask, ii_weights1d[i * size_scale], bw * sizeof(mask[0]));
434
550k
        mask += stride;
435
550k
      }
436
56.3k
      break;
437
438
129k
    case II_H_PRED:
439
1.39M
      for (i = 0; i < bh; ++i) {
440
17.6M
        for (j = 0; j < bw; ++j) mask[j] = ii_weights1d[j * size_scale];
441
1.27M
        mask += stride;
442
1.27M
      }
443
129k
      break;
444
445
44.0k
    case II_SMOOTH_PRED:
446
506k
      for (i = 0; i < bh; ++i) {
447
6.53M
        for (j = 0; j < bw; ++j)
448
6.07M
          mask[j] = ii_weights1d[(i < j ? i : j) * size_scale];
449
462k
        mask += stride;
450
462k
      }
451
44.0k
      break;
452
453
46.4k
    case II_DC_PRED:
454
46.4k
    default:
455
506k
      for (i = 0; i < bh; ++i) {
456
459k
        memset(mask, 32, bw * sizeof(mask[0]));
457
459k
        mask += stride;
458
459k
      }
459
46.4k
      break;
460
276k
  }
461
276k
}
462
#endif  // CONFIG_AV1_HIGHBITDEPTH
463
464
// No-op now that the wedge / inter-intra mask buffers are precomputed at
465
// codegen time and stored in `.rdata`. The symbol is kept exported because
466
// decoder.c, encoder.c, and unit tests still call it.
467
10.8k
void av1_init_wedge_masks(void) {}
468
469
static inline void build_masked_compound_no_round(
470
    uint8_t *dst, int dst_stride, const CONV_BUF_TYPE *src0, int src0_stride,
471
    const CONV_BUF_TYPE *src1, int src1_stride,
472
    const INTERINTER_COMPOUND_DATA *const comp_data, BLOCK_SIZE sb_type, int h,
473
345k
    int w, InterPredParams *inter_pred_params) {
474
345k
  const int ssy = inter_pred_params->subsampling_y;
475
345k
  const int ssx = inter_pred_params->subsampling_x;
476
345k
  const uint8_t *mask = av1_get_compound_type_mask(comp_data, sb_type);
477
345k
  const int mask_stride = block_size_wide[sb_type];
478
345k
#if CONFIG_AV1_HIGHBITDEPTH
479
345k
  if (inter_pred_params->use_hbd_buf) {
480
243k
    aom_highbd_blend_a64_d16_mask(dst, dst_stride, src0, src0_stride, src1,
481
243k
                                  src1_stride, mask, mask_stride, w, h, ssx,
482
243k
                                  ssy, &inter_pred_params->conv_params,
483
243k
                                  inter_pred_params->bit_depth);
484
243k
  } else {
485
102k
    aom_lowbd_blend_a64_d16_mask(dst, dst_stride, src0, src0_stride, src1,
486
102k
                                 src1_stride, mask, mask_stride, w, h, ssx, ssy,
487
102k
                                 &inter_pred_params->conv_params);
488
102k
  }
489
#else
490
  aom_lowbd_blend_a64_d16_mask(dst, dst_stride, src0, src0_stride, src1,
491
                               src1_stride, mask, mask_stride, w, h, ssx, ssy,
492
                               &inter_pred_params->conv_params);
493
#endif
494
345k
}
495
496
void av1_make_masked_inter_predictor(const uint8_t *pre, int pre_stride,
497
                                     uint8_t *dst, int dst_stride,
498
                                     InterPredParams *inter_pred_params,
499
345k
                                     const SubpelParams *subpel_params) {
500
345k
  const INTERINTER_COMPOUND_DATA *comp_data = &inter_pred_params->mask_comp;
501
345k
  BLOCK_SIZE sb_type = inter_pred_params->sb_type;
502
503
  // We're going to call av1_make_inter_predictor to generate a prediction into
504
  // a temporary buffer, then will blend that temporary buffer with that from
505
  // the other reference.
506
345k
  DECLARE_ALIGNED(32, uint8_t, tmp_buf[2 * MAX_SB_SQUARE]);
507
345k
  uint8_t *tmp_dst =
508
345k
      inter_pred_params->use_hbd_buf ? CONVERT_TO_BYTEPTR(tmp_buf) : tmp_buf;
509
510
345k
  const int tmp_buf_stride = MAX_SB_SIZE;
511
345k
  CONV_BUF_TYPE *org_dst = inter_pred_params->conv_params.dst;
512
345k
  int org_dst_stride = inter_pred_params->conv_params.dst_stride;
513
345k
  CONV_BUF_TYPE *tmp_buf16 = (CONV_BUF_TYPE *)tmp_buf;
514
345k
  inter_pred_params->conv_params.dst = tmp_buf16;
515
345k
  inter_pred_params->conv_params.dst_stride = tmp_buf_stride;
516
345k
  assert(inter_pred_params->conv_params.do_average == 0);
517
518
  // This will generate a prediction in tmp_buf for the second reference
519
345k
  av1_make_inter_predictor(pre, pre_stride, tmp_dst, MAX_SB_SIZE,
520
345k
                           inter_pred_params, subpel_params);
521
522
345k
  if (!inter_pred_params->conv_params.plane &&
523
116k
      comp_data->type == COMPOUND_DIFFWTD) {
524
72.3k
    av1_build_compound_diffwtd_mask_d16(
525
72.3k
        comp_data->seg_mask, comp_data->mask_type, org_dst, org_dst_stride,
526
72.3k
        tmp_buf16, tmp_buf_stride, inter_pred_params->block_height,
527
72.3k
        inter_pred_params->block_width, &inter_pred_params->conv_params,
528
72.3k
        inter_pred_params->bit_depth);
529
72.3k
  }
530
345k
  build_masked_compound_no_round(
531
345k
      dst, dst_stride, org_dst, org_dst_stride, tmp_buf16, tmp_buf_stride,
532
345k
      comp_data, sb_type, inter_pred_params->block_height,
533
345k
      inter_pred_params->block_width, inter_pred_params);
534
345k
}
535
536
void av1_dist_wtd_comp_weight_assign(const AV1_COMMON *cm,
537
                                     const MB_MODE_INFO *mbmi, int *fwd_offset,
538
                                     int *bck_offset,
539
                                     int *use_dist_wtd_comp_avg,
540
11.7M
                                     int is_compound) {
541
11.7M
  assert(fwd_offset != NULL && bck_offset != NULL);
542
11.7M
  if (!is_compound || mbmi->compound_idx) {
543
11.2M
    *fwd_offset = 8;
544
11.2M
    *bck_offset = 8;
545
11.2M
    *use_dist_wtd_comp_avg = 0;
546
11.2M
    return;
547
11.2M
  }
548
549
443k
  *use_dist_wtd_comp_avg = 1;
550
443k
  const RefCntBuffer *const bck_buf = get_ref_frame_buf(cm, mbmi->ref_frame[0]);
551
443k
  const RefCntBuffer *const fwd_buf = get_ref_frame_buf(cm, mbmi->ref_frame[1]);
552
443k
  const int cur_frame_index = cm->cur_frame->order_hint;
553
443k
  int bck_frame_index = 0, fwd_frame_index = 0;
554
555
444k
  if (bck_buf != NULL) bck_frame_index = bck_buf->order_hint;
556
444k
  if (fwd_buf != NULL) fwd_frame_index = fwd_buf->order_hint;
557
558
443k
  int d0 = clamp(abs(get_relative_dist(&cm->seq_params->order_hint_info,
559
443k
                                       fwd_frame_index, cur_frame_index)),
560
443k
                 0, MAX_FRAME_DISTANCE);
561
443k
  int d1 = clamp(abs(get_relative_dist(&cm->seq_params->order_hint_info,
562
443k
                                       cur_frame_index, bck_frame_index)),
563
443k
                 0, MAX_FRAME_DISTANCE);
564
565
443k
  const int order = d0 <= d1;
566
567
443k
  if (d0 == 0 || d1 == 0) {
568
7.85k
    *fwd_offset = quant_dist_lookup_table[3][order];
569
7.85k
    *bck_offset = quant_dist_lookup_table[3][1 - order];
570
7.85k
    return;
571
7.85k
  }
572
573
436k
  int i;
574
546k
  for (i = 0; i < 3; ++i) {
575
517k
    int c0 = quant_dist_weight[i][order];
576
517k
    int c1 = quant_dist_weight[i][!order];
577
517k
    int d0_c0 = d0 * c0;
578
517k
    int d1_c1 = d1 * c1;
579
517k
    if ((d0 > d1 && d0_c0 < d1_c1) || (d0 <= d1 && d0_c0 > d1_c1)) break;
580
517k
  }
581
582
436k
  *fwd_offset = quant_dist_lookup_table[i][order];
583
436k
  *bck_offset = quant_dist_lookup_table[i][1 - order];
584
436k
}
585
586
void av1_setup_dst_planes(struct macroblockd_plane *planes, BLOCK_SIZE bsize,
587
                          const YV12_BUFFER_CONFIG *src, int mi_row, int mi_col,
588
21.4M
                          const int plane_start, const int plane_end) {
589
  // We use AOMMIN(num_planes, MAX_MB_PLANE) instead of num_planes to quiet
590
  // the static analysis warnings.
591
81.6M
  for (int i = plane_start; i < AOMMIN(plane_end, MAX_MB_PLANE); ++i) {
592
60.1M
    struct macroblockd_plane *const pd = &planes[i];
593
60.1M
    const int is_uv = i > 0;
594
60.1M
    setup_pred_plane(&pd->dst, bsize, src->buffers[i], src->crop_widths[is_uv],
595
60.1M
                     src->crop_heights[is_uv], src->strides[is_uv], mi_row,
596
60.1M
                     mi_col, NULL, pd->subsampling_x, pd->subsampling_y);
597
60.1M
  }
598
21.4M
}
599
600
void av1_setup_pre_planes(MACROBLOCKD *xd, int idx,
601
                          const YV12_BUFFER_CONFIG *src, int mi_row, int mi_col,
602
                          const struct scale_factors *sf,
603
4.84M
                          const int num_planes) {
604
4.84M
  if (src != NULL) {
605
    // We use AOMMIN(num_planes, MAX_MB_PLANE) instead of num_planes to quiet
606
    // the static analysis warnings.
607
19.2M
    for (int i = 0; i < AOMMIN(num_planes, MAX_MB_PLANE); ++i) {
608
14.4M
      struct macroblockd_plane *const pd = &xd->plane[i];
609
14.4M
      const int is_uv = i > 0;
610
14.4M
      setup_pred_plane(&pd->pre[idx], xd->mi[0]->bsize, src->buffers[i],
611
14.4M
                       src->crop_widths[is_uv], src->crop_heights[is_uv],
612
14.4M
                       src->strides[is_uv], mi_row, mi_col, sf,
613
14.4M
                       pd->subsampling_x, pd->subsampling_y);
614
14.4M
    }
615
4.84M
  }
616
4.84M
}
617
618
// obmc_mask_N[overlap_position]
619
static const uint8_t obmc_mask_1[1] = { 64 };
620
DECLARE_ALIGNED(2, static const uint8_t, obmc_mask_2[2]) = { 45, 64 };
621
622
DECLARE_ALIGNED(4, static const uint8_t, obmc_mask_4[4]) = { 39, 50, 59, 64 };
623
624
static const uint8_t obmc_mask_8[8] = { 36, 42, 48, 53, 57, 61, 64, 64 };
625
626
static const uint8_t obmc_mask_16[16] = { 34, 37, 40, 43, 46, 49, 52, 54,
627
                                          56, 58, 60, 61, 64, 64, 64, 64 };
628
629
static const uint8_t obmc_mask_32[32] = { 33, 35, 36, 38, 40, 41, 43, 44,
630
                                          45, 47, 48, 50, 51, 52, 53, 55,
631
                                          56, 57, 58, 59, 60, 60, 61, 62,
632
                                          64, 64, 64, 64, 64, 64, 64, 64 };
633
634
static const uint8_t obmc_mask_64[64] = {
635
  33, 34, 35, 35, 36, 37, 38, 39, 40, 40, 41, 42, 43, 44, 44, 44,
636
  45, 46, 47, 47, 48, 49, 50, 51, 51, 51, 52, 52, 53, 54, 55, 56,
637
  56, 56, 57, 57, 58, 58, 59, 60, 60, 60, 60, 60, 61, 62, 62, 62,
638
  62, 62, 63, 63, 63, 63, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
639
};
640
641
2.06M
const uint8_t *av1_get_obmc_mask(int length) {
642
2.06M
  switch (length) {
643
0
    case 1: return obmc_mask_1;
644
399k
    case 2: return obmc_mask_2;
645
1.01M
    case 4: return obmc_mask_4;
646
512k
    case 8: return obmc_mask_8;
647
120k
    case 16: return obmc_mask_16;
648
16.6k
    case 32: return obmc_mask_32;
649
0
    case 64: return obmc_mask_64;
650
0
    default: assert(0); return NULL;
651
2.06M
  }
652
2.06M
}
653
654
static inline void increment_int_ptr(MACROBLOCKD *xd, int rel_mi_row,
655
                                     int rel_mi_col, uint8_t op_mi_size,
656
                                     int dir, MB_MODE_INFO *mi, void *fun_ctxt,
657
2.85M
                                     const int num_planes) {
658
2.85M
  (void)xd;
659
2.85M
  (void)rel_mi_row;
660
2.85M
  (void)rel_mi_col;
661
2.85M
  (void)op_mi_size;
662
2.85M
  (void)dir;
663
2.85M
  (void)mi;
664
2.85M
  ++*(uint8_t *)fun_ctxt;
665
2.85M
  (void)num_planes;
666
2.85M
}
667
668
3.81M
void av1_count_overlappable_neighbors(const AV1_COMMON *cm, MACROBLOCKD *xd) {
669
3.81M
  MB_MODE_INFO *mbmi = xd->mi[0];
670
671
3.81M
  mbmi->overlappable_neighbors = 0;
672
673
3.81M
  if (!is_motion_variation_allowed_bsize(mbmi->bsize)) return;
674
675
2.50M
  foreach_overlappable_nb_above(cm, xd, INT_MAX, increment_int_ptr,
676
2.50M
                                &mbmi->overlappable_neighbors);
677
2.50M
  if (mbmi->overlappable_neighbors) return;
678
478k
  foreach_overlappable_nb_left(cm, xd, INT_MAX, increment_int_ptr,
679
478k
                               &mbmi->overlappable_neighbors);
680
478k
}
681
682
// HW does not support < 4x4 prediction. To limit the bandwidth requirement, if
683
// block-size of current plane is smaller than 8x8, always only blend with the
684
// left neighbor(s) (skip blending with the above side).
685
#define DISABLE_CHROMA_U8X8_OBMC 0  // 0: one-sided obmc; 1: disable
686
687
int av1_skip_u4x4_pred_in_obmc(BLOCK_SIZE bsize,
688
5.39M
                               const struct macroblockd_plane *pd, int dir) {
689
5.39M
  assert(is_motion_variation_allowed_bsize(bsize));
690
691
5.39M
  const BLOCK_SIZE bsize_plane =
692
5.39M
      get_plane_block_size(bsize, pd->subsampling_x, pd->subsampling_y);
693
5.39M
  switch (bsize_plane) {
694
#if DISABLE_CHROMA_U8X8_OBMC
695
    case BLOCK_4X4:
696
    case BLOCK_8X4:
697
    case BLOCK_4X8: return 1;
698
#else
699
768k
    case BLOCK_4X4:
700
1.98M
    case BLOCK_8X4:
701
2.50M
    case BLOCK_4X8: return dir == 0;
702
0
#endif
703
2.89M
    default: return 0;
704
5.39M
  }
705
5.39M
}
706
707
#if CONFIG_AV1_DECODER
708
903k
static void modify_neighbor_predictor_for_obmc(MB_MODE_INFO *mbmi) {
709
903k
  mbmi->ref_frame[1] = NONE_FRAME;
710
903k
  mbmi->interinter_comp.type = COMPOUND_AVERAGE;
711
903k
}
712
#endif  // CONFIG_AV1_DECODER
713
714
struct obmc_inter_pred_ctxt {
715
  uint8_t **adjacent;
716
  int *adjacent_stride;
717
};
718
719
static inline void build_obmc_inter_pred_above(
720
    MACROBLOCKD *xd, int rel_mi_row, int rel_mi_col, uint8_t op_mi_size,
721
454k
    int dir, MB_MODE_INFO *above_mi, void *fun_ctxt, const int num_planes) {
722
454k
  (void)above_mi;
723
454k
  (void)rel_mi_row;
724
454k
  (void)dir;
725
454k
  struct obmc_inter_pred_ctxt *ctxt = (struct obmc_inter_pred_ctxt *)fun_ctxt;
726
454k
  const BLOCK_SIZE bsize = xd->mi[0]->bsize;
727
454k
  const int overlap =
728
454k
      AOMMIN(block_size_high[bsize], block_size_high[BLOCK_64X64]) >> 1;
729
730
1.81M
  for (int plane = 0; plane < num_planes; ++plane) {
731
1.35M
    const struct macroblockd_plane *pd = &xd->plane[plane];
732
1.35M
    const int bw = (op_mi_size * MI_SIZE) >> pd->subsampling_x;
733
1.35M
    const int bh = overlap >> pd->subsampling_y;
734
1.35M
    const int plane_col = (rel_mi_col * MI_SIZE) >> pd->subsampling_x;
735
736
1.35M
    if (av1_skip_u4x4_pred_in_obmc(bsize, pd, 0)) continue;
737
738
722k
    const int dst_stride = pd->dst.stride;
739
722k
    uint8_t *const dst = &pd->dst.buf[plane_col];
740
722k
    const int tmp_stride = ctxt->adjacent_stride[plane];
741
722k
    const uint8_t *const tmp = &ctxt->adjacent[plane][plane_col];
742
722k
    const uint8_t *const mask = av1_get_obmc_mask(bh);
743
722k
#if CONFIG_AV1_HIGHBITDEPTH
744
722k
    const int is_hbd = is_cur_buf_hbd(xd);
745
722k
    if (is_hbd)
746
427k
      aom_highbd_blend_a64_vmask(dst, dst_stride, dst, dst_stride, tmp,
747
427k
                                 tmp_stride, mask, bw, bh, xd->bd);
748
295k
    else
749
295k
      aom_blend_a64_vmask(dst, dst_stride, dst, dst_stride, tmp, tmp_stride,
750
295k
                          mask, bw, bh);
751
#else
752
    aom_blend_a64_vmask(dst, dst_stride, dst, dst_stride, tmp, tmp_stride, mask,
753
                        bw, bh);
754
#endif
755
722k
  }
756
454k
}
757
758
static inline void build_obmc_inter_pred_left(
759
    MACROBLOCKD *xd, int rel_mi_row, int rel_mi_col, uint8_t op_mi_size,
760
449k
    int dir, MB_MODE_INFO *left_mi, void *fun_ctxt, const int num_planes) {
761
449k
  (void)left_mi;
762
449k
  (void)rel_mi_col;
763
449k
  (void)dir;
764
449k
  struct obmc_inter_pred_ctxt *ctxt = (struct obmc_inter_pred_ctxt *)fun_ctxt;
765
449k
  const BLOCK_SIZE bsize = xd->mi[0]->bsize;
766
449k
  const int overlap =
767
449k
      AOMMIN(block_size_wide[bsize], block_size_wide[BLOCK_64X64]) >> 1;
768
769
1.79M
  for (int plane = 0; plane < num_planes; ++plane) {
770
1.34M
    const struct macroblockd_plane *pd = &xd->plane[plane];
771
1.34M
    const int bw = overlap >> pd->subsampling_x;
772
1.34M
    const int bh = (op_mi_size * MI_SIZE) >> pd->subsampling_y;
773
1.34M
    const int plane_row = (rel_mi_row * MI_SIZE) >> pd->subsampling_y;
774
775
1.34M
    if (av1_skip_u4x4_pred_in_obmc(bsize, pd, 1)) continue;
776
777
1.34M
    const int dst_stride = pd->dst.stride;
778
1.34M
    uint8_t *const dst = &pd->dst.buf[plane_row * dst_stride];
779
1.34M
    const int tmp_stride = ctxt->adjacent_stride[plane];
780
1.34M
    const uint8_t *const tmp = &ctxt->adjacent[plane][plane_row * tmp_stride];
781
1.34M
    const uint8_t *const mask = av1_get_obmc_mask(bw);
782
783
1.34M
#if CONFIG_AV1_HIGHBITDEPTH
784
1.34M
    const int is_hbd = is_cur_buf_hbd(xd);
785
1.34M
    if (is_hbd)
786
767k
      aom_highbd_blend_a64_hmask(dst, dst_stride, dst, dst_stride, tmp,
787
767k
                                 tmp_stride, mask, bw, bh, xd->bd);
788
576k
    else
789
576k
      aom_blend_a64_hmask(dst, dst_stride, dst, dst_stride, tmp, tmp_stride,
790
576k
                          mask, bw, bh);
791
#else
792
    aom_blend_a64_hmask(dst, dst_stride, dst, dst_stride, tmp, tmp_stride, mask,
793
                        bw, bh);
794
#endif
795
1.34M
  }
796
449k
}
797
798
// This function combines motion compensated predictions that are generated by
799
// top/left neighboring blocks' inter predictors with the regular inter
800
// prediction. We assume the original prediction (bmc) is stored in
801
// xd->plane[].dst.buf
802
void av1_build_obmc_inter_prediction(const AV1_COMMON *cm, MACROBLOCKD *xd,
803
                                     uint8_t *above[MAX_MB_PLANE],
804
                                     int above_stride[MAX_MB_PLANE],
805
                                     uint8_t *left[MAX_MB_PLANE],
806
468k
                                     int left_stride[MAX_MB_PLANE]) {
807
468k
  const BLOCK_SIZE bsize = xd->mi[0]->bsize;
808
809
  // handle above row
810
468k
  struct obmc_inter_pred_ctxt ctxt_above = { above, above_stride };
811
468k
  foreach_overlappable_nb_above(cm, xd,
812
468k
                                max_neighbor_obmc[mi_size_wide_log2[bsize]],
813
468k
                                build_obmc_inter_pred_above, &ctxt_above);
814
815
  // handle left column
816
468k
  struct obmc_inter_pred_ctxt ctxt_left = { left, left_stride };
817
468k
  foreach_overlappable_nb_left(cm, xd,
818
468k
                               max_neighbor_obmc[mi_size_high_log2[bsize]],
819
468k
                               build_obmc_inter_pred_left, &ctxt_left);
820
468k
}
821
822
void av1_setup_obmc_dst_bufs(MACROBLOCKD *xd, uint8_t **dst_buf1,
823
468k
                             uint8_t **dst_buf2) {
824
468k
  if (is_cur_buf_hbd(xd)) {
825
267k
    int len = sizeof(uint16_t);
826
267k
    dst_buf1[0] = CONVERT_TO_BYTEPTR(xd->tmp_obmc_bufs[0]);
827
267k
    dst_buf1[1] =
828
267k
        CONVERT_TO_BYTEPTR(xd->tmp_obmc_bufs[0] + MAX_SB_SQUARE * len);
829
267k
    dst_buf1[2] =
830
267k
        CONVERT_TO_BYTEPTR(xd->tmp_obmc_bufs[0] + MAX_SB_SQUARE * 2 * len);
831
267k
    dst_buf2[0] = CONVERT_TO_BYTEPTR(xd->tmp_obmc_bufs[1]);
832
267k
    dst_buf2[1] =
833
267k
        CONVERT_TO_BYTEPTR(xd->tmp_obmc_bufs[1] + MAX_SB_SQUARE * len);
834
267k
    dst_buf2[2] =
835
267k
        CONVERT_TO_BYTEPTR(xd->tmp_obmc_bufs[1] + MAX_SB_SQUARE * 2 * len);
836
267k
  } else {
837
201k
    dst_buf1[0] = xd->tmp_obmc_bufs[0];
838
201k
    dst_buf1[1] = xd->tmp_obmc_bufs[0] + MAX_SB_SQUARE;
839
201k
    dst_buf1[2] = xd->tmp_obmc_bufs[0] + MAX_SB_SQUARE * 2;
840
201k
    dst_buf2[0] = xd->tmp_obmc_bufs[1];
841
201k
    dst_buf2[1] = xd->tmp_obmc_bufs[1] + MAX_SB_SQUARE;
842
201k
    dst_buf2[2] = xd->tmp_obmc_bufs[1] + MAX_SB_SQUARE * 2;
843
201k
  }
844
468k
}
845
846
#if CONFIG_AV1_DECODER
847
void av1_setup_build_prediction_by_above_pred(
848
    MACROBLOCKD *xd, int rel_mi_col, uint8_t above_mi_width,
849
    MB_MODE_INFO *above_mbmi, struct build_prediction_ctxt *ctxt,
850
454k
    const int num_planes) {
851
454k
  const BLOCK_SIZE a_bsize = AOMMAX(BLOCK_8X8, above_mbmi->bsize);
852
454k
  const int above_mi_col = xd->mi_col + rel_mi_col;
853
854
454k
  modify_neighbor_predictor_for_obmc(above_mbmi);
855
856
1.81M
  for (int j = 0; j < num_planes; ++j) {
857
1.35M
    struct macroblockd_plane *const pd = &xd->plane[j];
858
1.35M
    setup_pred_plane(&pd->dst, a_bsize, ctxt->tmp_buf[j], ctxt->tmp_width[j],
859
1.35M
                     ctxt->tmp_height[j], ctxt->tmp_stride[j], 0, rel_mi_col,
860
1.35M
                     NULL, pd->subsampling_x, pd->subsampling_y);
861
1.35M
  }
862
863
454k
  const int num_refs = 1 + has_second_ref(above_mbmi);
864
865
908k
  for (int ref = 0; ref < num_refs; ++ref) {
866
454k
    const MV_REFERENCE_FRAME frame = above_mbmi->ref_frame[ref];
867
868
454k
    const RefCntBuffer *const ref_buf = get_ref_frame_buf(ctxt->cm, frame);
869
454k
    const struct scale_factors *const sf =
870
454k
        get_ref_scale_factors_const(ctxt->cm, frame);
871
454k
    xd->block_ref_scale_factors[ref] = sf;
872
454k
    if ((!av1_is_valid_scale(sf)))
873
0
      aom_internal_error(xd->error_info, AOM_CODEC_UNSUP_BITSTREAM,
874
0
                         "Reference frame has invalid dimensions");
875
454k
    av1_setup_pre_planes(xd, ref, &ref_buf->buf, xd->mi_row, above_mi_col, sf,
876
454k
                         num_planes);
877
454k
  }
878
879
454k
  xd->mb_to_left_edge = 8 * MI_SIZE * (-above_mi_col);
880
454k
  xd->mb_to_right_edge =
881
454k
      ctxt->mb_to_far_edge +
882
454k
      (xd->width - rel_mi_col - above_mi_width) * MI_SIZE * 8;
883
454k
}
884
885
void av1_setup_build_prediction_by_left_pred(MACROBLOCKD *xd, int rel_mi_row,
886
                                             uint8_t left_mi_height,
887
                                             MB_MODE_INFO *left_mbmi,
888
                                             struct build_prediction_ctxt *ctxt,
889
449k
                                             const int num_planes) {
890
449k
  const BLOCK_SIZE l_bsize = AOMMAX(BLOCK_8X8, left_mbmi->bsize);
891
449k
  const int left_mi_row = xd->mi_row + rel_mi_row;
892
893
449k
  modify_neighbor_predictor_for_obmc(left_mbmi);
894
895
1.79M
  for (int j = 0; j < num_planes; ++j) {
896
1.34M
    struct macroblockd_plane *const pd = &xd->plane[j];
897
1.34M
    setup_pred_plane(&pd->dst, l_bsize, ctxt->tmp_buf[j], ctxt->tmp_width[j],
898
1.34M
                     ctxt->tmp_height[j], ctxt->tmp_stride[j], rel_mi_row, 0,
899
1.34M
                     NULL, pd->subsampling_x, pd->subsampling_y);
900
1.34M
  }
901
902
449k
  const int num_refs = 1 + has_second_ref(left_mbmi);
903
904
899k
  for (int ref = 0; ref < num_refs; ++ref) {
905
449k
    const MV_REFERENCE_FRAME frame = left_mbmi->ref_frame[ref];
906
907
449k
    const RefCntBuffer *const ref_buf = get_ref_frame_buf(ctxt->cm, frame);
908
449k
    const struct scale_factors *const ref_scale_factors =
909
449k
        get_ref_scale_factors_const(ctxt->cm, frame);
910
911
449k
    xd->block_ref_scale_factors[ref] = ref_scale_factors;
912
449k
    if ((!av1_is_valid_scale(ref_scale_factors)))
913
0
      aom_internal_error(xd->error_info, AOM_CODEC_UNSUP_BITSTREAM,
914
0
                         "Reference frame has invalid dimensions");
915
449k
    av1_setup_pre_planes(xd, ref, &ref_buf->buf, left_mi_row, xd->mi_col,
916
449k
                         ref_scale_factors, num_planes);
917
449k
  }
918
919
449k
  xd->mb_to_top_edge = GET_MV_SUBPEL(MI_SIZE * (-left_mi_row));
920
449k
  xd->mb_to_bottom_edge =
921
449k
      ctxt->mb_to_far_edge +
922
449k
      GET_MV_SUBPEL((xd->height - rel_mi_row - left_mi_height) * MI_SIZE);
923
449k
}
924
#endif  // CONFIG_AV1_DECODER
925
926
static inline void combine_interintra(
927
    INTERINTRA_MODE mode, int8_t use_wedge_interintra, int8_t wedge_index,
928
    int8_t wedge_sign, BLOCK_SIZE bsize, BLOCK_SIZE plane_bsize,
929
    uint8_t *comppred, int compstride, const uint8_t *interpred,
930
316k
    int interstride, const uint8_t *intrapred, int intrastride) {
931
316k
  const int bw = block_size_wide[plane_bsize];
932
316k
  const int bh = block_size_high[plane_bsize];
933
934
316k
  if (use_wedge_interintra) {
935
85.3k
    if (av1_is_wedge_used(bsize)) {
936
85.3k
      const uint8_t *mask =
937
85.3k
          av1_get_contiguous_soft_mask(wedge_index, wedge_sign, bsize);
938
85.3k
      const int subw = 2 * mi_size_wide[bsize] == bw;
939
85.3k
      const int subh = 2 * mi_size_high[bsize] == bh;
940
85.3k
      aom_blend_a64_mask(comppred, compstride, intrapred, intrastride,
941
85.3k
                         interpred, interstride, mask, block_size_wide[bsize],
942
85.3k
                         bw, bh, subw, subh);
943
85.3k
    }
944
85.3k
    return;
945
85.3k
  }
946
947
231k
  const uint8_t *mask = smooth_interintra_mask_buf[mode][plane_bsize];
948
231k
  aom_blend_a64_mask(comppred, compstride, intrapred, intrastride, interpred,
949
231k
                     interstride, mask, bw, bw, bh, 0, 0);
950
231k
}
951
952
#if CONFIG_AV1_HIGHBITDEPTH
953
static inline void combine_interintra_highbd(
954
    INTERINTRA_MODE mode, int8_t use_wedge_interintra, int8_t wedge_index,
955
    int8_t wedge_sign, BLOCK_SIZE bsize, BLOCK_SIZE plane_bsize,
956
    uint8_t *comppred8, int compstride, const uint8_t *interpred8,
957
410k
    int interstride, const uint8_t *intrapred8, int intrastride, int bd) {
958
410k
  const int bw = block_size_wide[plane_bsize];
959
410k
  const int bh = block_size_high[plane_bsize];
960
961
410k
  if (use_wedge_interintra) {
962
134k
    if (av1_is_wedge_used(bsize)) {
963
134k
      const uint8_t *mask =
964
134k
          av1_get_contiguous_soft_mask(wedge_index, wedge_sign, bsize);
965
134k
      const int subh = 2 * mi_size_high[bsize] == bh;
966
134k
      const int subw = 2 * mi_size_wide[bsize] == bw;
967
134k
      aom_highbd_blend_a64_mask(comppred8, compstride, intrapred8, intrastride,
968
134k
                                interpred8, interstride, mask,
969
134k
                                block_size_wide[bsize], bw, bh, subw, subh, bd);
970
134k
    }
971
134k
    return;
972
134k
  }
973
974
276k
  uint8_t mask[MAX_SB_SQUARE];
975
276k
  build_smooth_interintra_mask(mask, bw, plane_bsize, mode);
976
276k
  aom_highbd_blend_a64_mask(comppred8, compstride, intrapred8, intrastride,
977
276k
                            interpred8, interstride, mask, bw, bw, bh, 0, 0,
978
276k
                            bd);
979
276k
}
980
#endif
981
982
void av1_build_intra_predictors_for_interintra(const AV1_COMMON *cm,
983
                                               MACROBLOCKD *xd,
984
                                               BLOCK_SIZE bsize, int plane,
985
                                               const BUFFER_SET *ctx,
986
727k
                                               uint8_t *dst, int dst_stride) {
987
727k
  struct macroblockd_plane *const pd = &xd->plane[plane];
988
727k
  const int ssx = xd->plane[plane].subsampling_x;
989
727k
  const int ssy = xd->plane[plane].subsampling_y;
990
727k
  BLOCK_SIZE plane_bsize = get_plane_block_size(bsize, ssx, ssy);
991
727k
  PREDICTION_MODE mode = interintra_to_intra_mode[xd->mi[0]->interintra_mode];
992
727k
  assert(xd->mi[0]->angle_delta[PLANE_TYPE_Y] == 0);
993
727k
  assert(xd->mi[0]->angle_delta[PLANE_TYPE_UV] == 0);
994
727k
  assert(xd->mi[0]->filter_intra_mode_info.use_filter_intra == 0);
995
727k
  assert(xd->mi[0]->use_intrabc == 0);
996
727k
  const SequenceHeader *seq_params = cm->seq_params;
997
998
727k
  av1_predict_intra_block(xd, seq_params->sb_size,
999
727k
                          seq_params->enable_intra_edge_filter, pd->width,
1000
727k
                          pd->height, max_txsize_rect_lookup[plane_bsize], mode,
1001
727k
                          0, 0, FILTER_INTRA_MODES, ctx->plane[plane],
1002
727k
                          ctx->stride[plane], dst, dst_stride, 0, 0, plane);
1003
727k
}
1004
1005
void av1_combine_interintra(MACROBLOCKD *xd, BLOCK_SIZE bsize, int plane,
1006
                            const uint8_t *inter_pred, int inter_stride,
1007
727k
                            const uint8_t *intra_pred, int intra_stride) {
1008
727k
  const int ssx = xd->plane[plane].subsampling_x;
1009
727k
  const int ssy = xd->plane[plane].subsampling_y;
1010
727k
  const BLOCK_SIZE plane_bsize = get_plane_block_size(bsize, ssx, ssy);
1011
727k
#if CONFIG_AV1_HIGHBITDEPTH
1012
727k
  if (is_cur_buf_hbd(xd)) {
1013
410k
    combine_interintra_highbd(
1014
410k
        xd->mi[0]->interintra_mode, xd->mi[0]->use_wedge_interintra,
1015
410k
        xd->mi[0]->interintra_wedge_index, INTERINTRA_WEDGE_SIGN, bsize,
1016
410k
        plane_bsize, xd->plane[plane].dst.buf, xd->plane[plane].dst.stride,
1017
410k
        inter_pred, inter_stride, intra_pred, intra_stride, xd->bd);
1018
410k
    return;
1019
410k
  }
1020
316k
#endif
1021
316k
  combine_interintra(
1022
316k
      xd->mi[0]->interintra_mode, xd->mi[0]->use_wedge_interintra,
1023
316k
      xd->mi[0]->interintra_wedge_index, INTERINTRA_WEDGE_SIGN, bsize,
1024
316k
      plane_bsize, xd->plane[plane].dst.buf, xd->plane[plane].dst.stride,
1025
316k
      inter_pred, inter_stride, intra_pred, intra_stride);
1026
316k
}
1027
1028
// build interintra_predictors for one plane
1029
void av1_build_interintra_predictor(const AV1_COMMON *cm, MACROBLOCKD *xd,
1030
                                    uint8_t *pred, int stride,
1031
                                    const BUFFER_SET *ctx, int plane,
1032
727k
                                    BLOCK_SIZE bsize) {
1033
727k
  assert(bsize < BLOCK_SIZES_ALL);
1034
727k
  if (is_cur_buf_hbd(xd)) {
1035
410k
    DECLARE_ALIGNED(16, uint16_t, intrapredictor[MAX_SB_SQUARE]);
1036
410k
    av1_build_intra_predictors_for_interintra(
1037
410k
        cm, xd, bsize, plane, ctx, CONVERT_TO_BYTEPTR(intrapredictor),
1038
410k
        MAX_SB_SIZE);
1039
410k
    av1_combine_interintra(xd, bsize, plane, pred, stride,
1040
410k
                           CONVERT_TO_BYTEPTR(intrapredictor), MAX_SB_SIZE);
1041
410k
  } else {
1042
316k
    DECLARE_ALIGNED(16, uint8_t, intrapredictor[MAX_SB_SQUARE]);
1043
316k
    av1_build_intra_predictors_for_interintra(cm, xd, bsize, plane, ctx,
1044
316k
                                              intrapredictor, MAX_SB_SIZE);
1045
316k
    av1_combine_interintra(xd, bsize, plane, pred, stride, intrapredictor,
1046
316k
                           MAX_SB_SIZE);
1047
316k
  }
1048
727k
}