Coverage Report

Created: 2018-09-25 14:53

/src/mozilla-central/third_party/aom/av1/common/reconinter.c
Line
Count
Source (jump to first uncovered line)
1
/*
2
 * Copyright (c) 2016, Alliance for Open Media. All rights reserved
3
 *
4
 * This source code is subject to the terms of the BSD 2 Clause License and
5
 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6
 * was not distributed with this source code in the LICENSE file, you can
7
 * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8
 * Media Patent License 1.0 was not distributed with this source code in the
9
 * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
10
 */
11
12
#include <assert.h>
13
#include <stdio.h>
14
#include <limits.h>
15
16
#include "config/aom_config.h"
17
#include "config/aom_dsp_rtcd.h"
18
#include "config/aom_scale_rtcd.h"
19
20
#include "aom/aom_integer.h"
21
#include "aom_dsp/blend.h"
22
23
#include "av1/common/blockd.h"
24
#include "av1/common/mvref_common.h"
25
#include "av1/common/reconinter.h"
26
#include "av1/common/reconintra.h"
27
#include "av1/common/onyxc_int.h"
28
#include "av1/common/obmc.h"
29
30
#define USE_PRECOMPUTED_WEDGE_MASK 1
31
#define USE_PRECOMPUTED_WEDGE_SIGN 1
32
33
// This function will determine whether or not to create a warped
34
// prediction.
35
int av1_allow_warp(const MB_MODE_INFO *const mbmi,
36
                   const WarpTypesAllowed *const warp_types,
37
                   const WarpedMotionParams *const gm_params,
38
                   int build_for_obmc, int x_scale, int y_scale,
39
0
                   WarpedMotionParams *final_warp_params) {
40
0
  if (x_scale != SCALE_SUBPEL_SHIFTS || y_scale != SCALE_SUBPEL_SHIFTS)
41
0
    return 0;
42
0
43
0
  if (final_warp_params != NULL) *final_warp_params = default_warp_params;
44
0
45
0
  if (build_for_obmc) return 0;
46
0
47
0
  if (warp_types->local_warp_allowed && !mbmi->wm_params.invalid) {
48
0
    if (final_warp_params != NULL)
49
0
      memcpy(final_warp_params, &mbmi->wm_params, sizeof(*final_warp_params));
50
0
    return 1;
51
0
  } else if (warp_types->global_warp_allowed && !gm_params->invalid) {
52
0
    if (final_warp_params != NULL)
53
0
      memcpy(final_warp_params, gm_params, sizeof(*final_warp_params));
54
0
    return 1;
55
0
  }
56
0
57
0
  return 0;
58
0
}
59
60
void av1_make_inter_predictor(const uint8_t *src, int src_stride, uint8_t *dst,
61
                              int dst_stride, const SubpelParams *subpel_params,
62
                              const struct scale_factors *sf, int w, int h,
63
                              ConvolveParams *conv_params,
64
                              InterpFilters interp_filters,
65
                              const WarpTypesAllowed *warp_types, int p_col,
66
                              int p_row, int plane, int ref,
67
                              const MB_MODE_INFO *mi, int build_for_obmc,
68
0
                              const MACROBLOCKD *xd, int can_use_previous) {
69
0
  // Make sure the selected motion mode is valid for this configuration
70
0
  assert_motion_mode_valid(mi->motion_mode, xd->global_motion, xd, mi,
71
0
                           can_use_previous);
72
0
  assert(IMPLIES(conv_params->is_compound, conv_params->dst != NULL));
73
0
74
0
  WarpedMotionParams final_warp_params;
75
0
  const int do_warp =
76
0
      (w >= 8 && h >= 8 &&
77
0
       av1_allow_warp(mi, warp_types, &xd->global_motion[mi->ref_frame[ref]],
78
0
                      build_for_obmc, subpel_params->xs, subpel_params->ys,
79
0
                      &final_warp_params));
80
0
  const int is_intrabc = mi->use_intrabc;
81
0
  assert(IMPLIES(is_intrabc, !do_warp));
82
0
83
0
  if (do_warp && xd->cur_frame_force_integer_mv == 0) {
84
0
    const struct macroblockd_plane *const pd = &xd->plane[plane];
85
0
    const struct buf_2d *const pre_buf = &pd->pre[ref];
86
0
    av1_warp_plane(&final_warp_params,
87
0
                   xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH, xd->bd,
88
0
                   pre_buf->buf0, pre_buf->width, pre_buf->height,
89
0
                   pre_buf->stride, dst, p_col, p_row, w, h, dst_stride,
90
0
                   pd->subsampling_x, pd->subsampling_y, conv_params);
91
0
  } else if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
92
0
    highbd_inter_predictor(src, src_stride, dst, dst_stride, subpel_params, sf,
93
0
                           w, h, conv_params, interp_filters, is_intrabc,
94
0
                           xd->bd);
95
0
  } else {
96
0
    inter_predictor(src, src_stride, dst, dst_stride, subpel_params, sf, w, h,
97
0
                    conv_params, interp_filters, is_intrabc);
98
0
  }
99
0
}
100
101
#if USE_PRECOMPUTED_WEDGE_MASK
102
static const uint8_t wedge_master_oblique_odd[MASK_MASTER_SIZE] = {
103
  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
104
  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  1,  2,  6,  18,
105
  37, 53, 60, 63, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
106
  64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
107
};
108
static const uint8_t wedge_master_oblique_even[MASK_MASTER_SIZE] = {
109
  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
110
  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  1,  4,  11, 27,
111
  46, 58, 62, 63, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
112
  64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
113
};
114
static const uint8_t wedge_master_vertical[MASK_MASTER_SIZE] = {
115
  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
116
  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  2,  7,  21,
117
  43, 57, 62, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
118
  64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
119
};
120
121
0
static void shift_copy(const uint8_t *src, uint8_t *dst, int shift, int width) {
122
0
  if (shift >= 0) {
123
0
    memcpy(dst + shift, src, width - shift);
124
0
    memset(dst, src[0], shift);
125
0
  } else {
126
0
    shift = -shift;
127
0
    memcpy(dst, src + shift, width - shift);
128
0
    memset(dst + width - shift, src[width - 1], shift);
129
0
  }
130
0
}
131
#endif  // USE_PRECOMPUTED_WEDGE_MASK
132
133
#if USE_PRECOMPUTED_WEDGE_SIGN
134
/* clang-format off */
135
DECLARE_ALIGNED(16, static uint8_t,
136
                wedge_signflip_lookup[BLOCK_SIZES_ALL][MAX_WEDGE_TYPES]) = {
137
  { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, },  // not used
138
  { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, },  // not used
139
  { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, },  // not used
140
  { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, },
141
  { 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, },
142
  { 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, },
143
  { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, },
144
  { 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, },
145
  { 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, },
146
  { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, },
147
  { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, },  // not used
148
  { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, },  // not used
149
  { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, },  // not used
150
  { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, },  // not used
151
  { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, },  // not used
152
  { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, },  // not used
153
  { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, },  // not used
154
  { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, },  // not used
155
  { 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 0, 1, 1, 1, 0, 1, },
156
  { 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 0, 1, 0, 1, },
157
  { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, },  // not used
158
  { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, },  // not used
159
};
160
/* clang-format on */
161
#else
162
DECLARE_ALIGNED(16, static uint8_t,
163
                wedge_signflip_lookup[BLOCK_SIZES_ALL][MAX_WEDGE_TYPES]);
164
#endif  // USE_PRECOMPUTED_WEDGE_SIGN
165
166
// [negative][direction]
167
DECLARE_ALIGNED(
168
    16, static uint8_t,
169
    wedge_mask_obl[2][WEDGE_DIRECTIONS][MASK_MASTER_SIZE * MASK_MASTER_SIZE]);
170
171
// 4 * MAX_WEDGE_SQUARE is an easy to compute and fairly tight upper bound
172
// on the sum of all mask sizes up to an including MAX_WEDGE_SQUARE.
173
DECLARE_ALIGNED(16, static uint8_t,
174
                wedge_mask_buf[2 * MAX_WEDGE_TYPES * 4 * MAX_WEDGE_SQUARE]);
175
176
static wedge_masks_type wedge_masks[BLOCK_SIZES_ALL][2];
177
178
static const wedge_code_type wedge_codebook_16_hgtw[16] = {
179
  { WEDGE_OBLIQUE27, 4, 4 },  { WEDGE_OBLIQUE63, 4, 4 },
180
  { WEDGE_OBLIQUE117, 4, 4 }, { WEDGE_OBLIQUE153, 4, 4 },
181
  { WEDGE_HORIZONTAL, 4, 2 }, { WEDGE_HORIZONTAL, 4, 4 },
182
  { WEDGE_HORIZONTAL, 4, 6 }, { WEDGE_VERTICAL, 4, 4 },
183
  { WEDGE_OBLIQUE27, 4, 2 },  { WEDGE_OBLIQUE27, 4, 6 },
184
  { WEDGE_OBLIQUE153, 4, 2 }, { WEDGE_OBLIQUE153, 4, 6 },
185
  { WEDGE_OBLIQUE63, 2, 4 },  { WEDGE_OBLIQUE63, 6, 4 },
186
  { WEDGE_OBLIQUE117, 2, 4 }, { WEDGE_OBLIQUE117, 6, 4 },
187
};
188
189
static const wedge_code_type wedge_codebook_16_hltw[16] = {
190
  { WEDGE_OBLIQUE27, 4, 4 },  { WEDGE_OBLIQUE63, 4, 4 },
191
  { WEDGE_OBLIQUE117, 4, 4 }, { WEDGE_OBLIQUE153, 4, 4 },
192
  { WEDGE_VERTICAL, 2, 4 },   { WEDGE_VERTICAL, 4, 4 },
193
  { WEDGE_VERTICAL, 6, 4 },   { WEDGE_HORIZONTAL, 4, 4 },
194
  { WEDGE_OBLIQUE27, 4, 2 },  { WEDGE_OBLIQUE27, 4, 6 },
195
  { WEDGE_OBLIQUE153, 4, 2 }, { WEDGE_OBLIQUE153, 4, 6 },
196
  { WEDGE_OBLIQUE63, 2, 4 },  { WEDGE_OBLIQUE63, 6, 4 },
197
  { WEDGE_OBLIQUE117, 2, 4 }, { WEDGE_OBLIQUE117, 6, 4 },
198
};
199
200
static const wedge_code_type wedge_codebook_16_heqw[16] = {
201
  { WEDGE_OBLIQUE27, 4, 4 },  { WEDGE_OBLIQUE63, 4, 4 },
202
  { WEDGE_OBLIQUE117, 4, 4 }, { WEDGE_OBLIQUE153, 4, 4 },
203
  { WEDGE_HORIZONTAL, 4, 2 }, { WEDGE_HORIZONTAL, 4, 6 },
204
  { WEDGE_VERTICAL, 2, 4 },   { WEDGE_VERTICAL, 6, 4 },
205
  { WEDGE_OBLIQUE27, 4, 2 },  { WEDGE_OBLIQUE27, 4, 6 },
206
  { WEDGE_OBLIQUE153, 4, 2 }, { WEDGE_OBLIQUE153, 4, 6 },
207
  { WEDGE_OBLIQUE63, 2, 4 },  { WEDGE_OBLIQUE63, 6, 4 },
208
  { WEDGE_OBLIQUE117, 2, 4 }, { WEDGE_OBLIQUE117, 6, 4 },
209
};
210
211
const wedge_params_type wedge_params_lookup[BLOCK_SIZES_ALL] = {
212
  { 0, NULL, NULL, NULL },
213
  { 0, NULL, NULL, NULL },
214
  { 0, NULL, NULL, NULL },
215
  { 4, wedge_codebook_16_heqw, wedge_signflip_lookup[BLOCK_8X8],
216
    wedge_masks[BLOCK_8X8] },
217
  { 4, wedge_codebook_16_hgtw, wedge_signflip_lookup[BLOCK_8X16],
218
    wedge_masks[BLOCK_8X16] },
219
  { 4, wedge_codebook_16_hltw, wedge_signflip_lookup[BLOCK_16X8],
220
    wedge_masks[BLOCK_16X8] },
221
  { 4, wedge_codebook_16_heqw, wedge_signflip_lookup[BLOCK_16X16],
222
    wedge_masks[BLOCK_16X16] },
223
  { 4, wedge_codebook_16_hgtw, wedge_signflip_lookup[BLOCK_16X32],
224
    wedge_masks[BLOCK_16X32] },
225
  { 4, wedge_codebook_16_hltw, wedge_signflip_lookup[BLOCK_32X16],
226
    wedge_masks[BLOCK_32X16] },
227
  { 4, wedge_codebook_16_heqw, wedge_signflip_lookup[BLOCK_32X32],
228
    wedge_masks[BLOCK_32X32] },
229
  { 0, NULL, NULL, NULL },
230
  { 0, NULL, NULL, NULL },
231
  { 0, NULL, NULL, NULL },
232
  { 0, NULL, NULL, NULL },
233
  { 0, NULL, NULL, NULL },
234
  { 0, NULL, NULL, NULL },
235
  { 0, NULL, NULL, NULL },
236
  { 0, NULL, NULL, NULL },
237
  { 4, wedge_codebook_16_hgtw, wedge_signflip_lookup[BLOCK_8X32],
238
    wedge_masks[BLOCK_8X32] },
239
  { 4, wedge_codebook_16_hltw, wedge_signflip_lookup[BLOCK_32X8],
240
    wedge_masks[BLOCK_32X8] },
241
  { 0, NULL, NULL, NULL },
242
  { 0, NULL, NULL, NULL },
243
};
244
245
static const uint8_t *get_wedge_mask_inplace(int wedge_index, int neg,
246
0
                                             BLOCK_SIZE sb_type) {
247
0
  const uint8_t *master;
248
0
  const int bh = block_size_high[sb_type];
249
0
  const int bw = block_size_wide[sb_type];
250
0
  const wedge_code_type *a =
251
0
      wedge_params_lookup[sb_type].codebook + wedge_index;
252
0
  int woff, hoff;
253
0
  const uint8_t wsignflip = wedge_params_lookup[sb_type].signflip[wedge_index];
254
0
255
0
  assert(wedge_index >= 0 &&
256
0
         wedge_index < (1 << get_wedge_bits_lookup(sb_type)));
257
0
  woff = (a->x_offset * bw) >> 3;
258
0
  hoff = (a->y_offset * bh) >> 3;
259
0
  master = wedge_mask_obl[neg ^ wsignflip][a->direction] +
260
0
           MASK_MASTER_STRIDE * (MASK_MASTER_SIZE / 2 - hoff) +
261
0
           MASK_MASTER_SIZE / 2 - woff;
262
0
  return master;
263
0
}
264
265
const uint8_t *av1_get_compound_type_mask(
266
    const INTERINTER_COMPOUND_DATA *const comp_data, BLOCK_SIZE sb_type) {
267
  assert(is_masked_compound_type(comp_data->type));
268
  (void)sb_type;
269
  switch (comp_data->type) {
270
    case COMPOUND_WEDGE:
271
      return av1_get_contiguous_soft_mask(comp_data->wedge_index,
272
                                          comp_data->wedge_sign, sb_type);
273
    case COMPOUND_DIFFWTD: return comp_data->seg_mask;
274
    default: assert(0); return NULL;
275
  }
276
}
277
278
static void diffwtd_mask_d16(uint8_t *mask, int which_inverse, int mask_base,
279
                             const CONV_BUF_TYPE *src0, int src0_stride,
280
                             const CONV_BUF_TYPE *src1, int src1_stride, int h,
281
0
                             int w, ConvolveParams *conv_params, int bd) {
282
0
  int round =
283
0
      2 * FILTER_BITS - conv_params->round_0 - conv_params->round_1 + (bd - 8);
284
0
  int i, j, m, diff;
285
0
  for (i = 0; i < h; ++i) {
286
0
    for (j = 0; j < w; ++j) {
287
0
      diff = abs(src0[i * src0_stride + j] - src1[i * src1_stride + j]);
288
0
      diff = ROUND_POWER_OF_TWO(diff, round);
289
0
      m = clamp(mask_base + (diff / DIFF_FACTOR), 0, AOM_BLEND_A64_MAX_ALPHA);
290
0
      mask[i * w + j] = which_inverse ? AOM_BLEND_A64_MAX_ALPHA - m : m;
291
0
    }
292
0
  }
293
0
}
294
295
void av1_build_compound_diffwtd_mask_d16_c(
296
    uint8_t *mask, DIFFWTD_MASK_TYPE mask_type, const CONV_BUF_TYPE *src0,
297
    int src0_stride, const CONV_BUF_TYPE *src1, int src1_stride, int h, int w,
298
    ConvolveParams *conv_params, int bd) {
299
  switch (mask_type) {
300
    case DIFFWTD_38:
301
      diffwtd_mask_d16(mask, 0, 38, src0, src0_stride, src1, src1_stride, h, w,
302
                       conv_params, bd);
303
      break;
304
    case DIFFWTD_38_INV:
305
      diffwtd_mask_d16(mask, 1, 38, src0, src0_stride, src1, src1_stride, h, w,
306
                       conv_params, bd);
307
      break;
308
    default: assert(0);
309
  }
310
}
311
312
static void diffwtd_mask(uint8_t *mask, int which_inverse, int mask_base,
313
                         const uint8_t *src0, int src0_stride,
314
0
                         const uint8_t *src1, int src1_stride, int h, int w) {
315
0
  int i, j, m, diff;
316
0
  for (i = 0; i < h; ++i) {
317
0
    for (j = 0; j < w; ++j) {
318
0
      diff =
319
0
          abs((int)src0[i * src0_stride + j] - (int)src1[i * src1_stride + j]);
320
0
      m = clamp(mask_base + (diff / DIFF_FACTOR), 0, AOM_BLEND_A64_MAX_ALPHA);
321
0
      mask[i * w + j] = which_inverse ? AOM_BLEND_A64_MAX_ALPHA - m : m;
322
0
    }
323
0
  }
324
0
}
325
326
void av1_build_compound_diffwtd_mask_c(uint8_t *mask,
327
                                       DIFFWTD_MASK_TYPE mask_type,
328
                                       const uint8_t *src0, int src0_stride,
329
                                       const uint8_t *src1, int src1_stride,
330
                                       int h, int w) {
331
  switch (mask_type) {
332
    case DIFFWTD_38:
333
      diffwtd_mask(mask, 0, 38, src0, src0_stride, src1, src1_stride, h, w);
334
      break;
335
    case DIFFWTD_38_INV:
336
      diffwtd_mask(mask, 1, 38, src0, src0_stride, src1, src1_stride, h, w);
337
      break;
338
    default: assert(0);
339
  }
340
}
341
342
static AOM_FORCE_INLINE void diffwtd_mask_highbd(
343
    uint8_t *mask, int which_inverse, int mask_base, const uint16_t *src0,
344
    int src0_stride, const uint16_t *src1, int src1_stride, int h, int w,
345
0
    const unsigned int bd) {
346
0
  assert(bd >= 8);
347
0
  if (bd == 8) {
348
0
    if (which_inverse) {
349
0
      for (int i = 0; i < h; ++i) {
350
0
        for (int j = 0; j < w; ++j) {
351
0
          int diff = abs((int)src0[j] - (int)src1[j]) / DIFF_FACTOR;
352
0
          unsigned int m = negative_to_zero(mask_base + diff);
353
0
          m = AOMMIN(m, AOM_BLEND_A64_MAX_ALPHA);
354
0
          mask[j] = AOM_BLEND_A64_MAX_ALPHA - m;
355
0
        }
356
0
        src0 += src0_stride;
357
0
        src1 += src1_stride;
358
0
        mask += w;
359
0
      }
360
0
    } else {
361
0
      for (int i = 0; i < h; ++i) {
362
0
        for (int j = 0; j < w; ++j) {
363
0
          int diff = abs((int)src0[j] - (int)src1[j]) / DIFF_FACTOR;
364
0
          unsigned int m = negative_to_zero(mask_base + diff);
365
0
          m = AOMMIN(m, AOM_BLEND_A64_MAX_ALPHA);
366
0
          mask[j] = m;
367
0
        }
368
0
        src0 += src0_stride;
369
0
        src1 += src1_stride;
370
0
        mask += w;
371
0
      }
372
0
    }
373
0
  } else {
374
0
    const unsigned int bd_shift = bd - 8;
375
0
    if (which_inverse) {
376
0
      for (int i = 0; i < h; ++i) {
377
0
        for (int j = 0; j < w; ++j) {
378
0
          int diff =
379
0
              (abs((int)src0[j] - (int)src1[j]) >> bd_shift) / DIFF_FACTOR;
380
0
          unsigned int m = negative_to_zero(mask_base + diff);
381
0
          m = AOMMIN(m, AOM_BLEND_A64_MAX_ALPHA);
382
0
          mask[j] = AOM_BLEND_A64_MAX_ALPHA - m;
383
0
        }
384
0
        src0 += src0_stride;
385
0
        src1 += src1_stride;
386
0
        mask += w;
387
0
      }
388
0
    } else {
389
0
      for (int i = 0; i < h; ++i) {
390
0
        for (int j = 0; j < w; ++j) {
391
0
          int diff =
392
0
              (abs((int)src0[j] - (int)src1[j]) >> bd_shift) / DIFF_FACTOR;
393
0
          unsigned int m = negative_to_zero(mask_base + diff);
394
0
          m = AOMMIN(m, AOM_BLEND_A64_MAX_ALPHA);
395
0
          mask[j] = m;
396
0
        }
397
0
        src0 += src0_stride;
398
0
        src1 += src1_stride;
399
0
        mask += w;
400
0
      }
401
0
    }
402
0
  }
403
0
}
404
405
void av1_build_compound_diffwtd_mask_highbd_c(
406
    uint8_t *mask, DIFFWTD_MASK_TYPE mask_type, const uint8_t *src0,
407
    int src0_stride, const uint8_t *src1, int src1_stride, int h, int w,
408
0
    int bd) {
409
0
  switch (mask_type) {
410
0
    case DIFFWTD_38:
411
0
      diffwtd_mask_highbd(mask, 0, 38, CONVERT_TO_SHORTPTR(src0), src0_stride,
412
0
                          CONVERT_TO_SHORTPTR(src1), src1_stride, h, w, bd);
413
0
      break;
414
0
    case DIFFWTD_38_INV:
415
0
      diffwtd_mask_highbd(mask, 1, 38, CONVERT_TO_SHORTPTR(src0), src0_stride,
416
0
                          CONVERT_TO_SHORTPTR(src1), src1_stride, h, w, bd);
417
0
      break;
418
0
    default: assert(0);
419
0
  }
420
0
}
421
422
0
static void init_wedge_master_masks() {
423
0
  int i, j;
424
0
  const int w = MASK_MASTER_SIZE;
425
0
  const int h = MASK_MASTER_SIZE;
426
0
  const int stride = MASK_MASTER_STRIDE;
427
0
// Note: index [0] stores the masters, and [1] its complement.
428
0
#if USE_PRECOMPUTED_WEDGE_MASK
429
0
  // Generate prototype by shifting the masters
430
0
  int shift = h / 4;
431
0
  for (i = 0; i < h; i += 2) {
432
0
    shift_copy(wedge_master_oblique_even,
433
0
               &wedge_mask_obl[0][WEDGE_OBLIQUE63][i * stride], shift,
434
0
               MASK_MASTER_SIZE);
435
0
    shift--;
436
0
    shift_copy(wedge_master_oblique_odd,
437
0
               &wedge_mask_obl[0][WEDGE_OBLIQUE63][(i + 1) * stride], shift,
438
0
               MASK_MASTER_SIZE);
439
0
    memcpy(&wedge_mask_obl[0][WEDGE_VERTICAL][i * stride],
440
0
           wedge_master_vertical,
441
0
           MASK_MASTER_SIZE * sizeof(wedge_master_vertical[0]));
442
0
    memcpy(&wedge_mask_obl[0][WEDGE_VERTICAL][(i + 1) * stride],
443
0
           wedge_master_vertical,
444
0
           MASK_MASTER_SIZE * sizeof(wedge_master_vertical[0]));
445
0
  }
446
#else
447
  static const double smoother_param = 2.85;
448
  const int a[2] = { 2, 1 };
449
  const double asqrt = sqrt(a[0] * a[0] + a[1] * a[1]);
450
  for (i = 0; i < h; i++) {
451
    for (j = 0; j < w; ++j) {
452
      int x = (2 * j + 1 - w);
453
      int y = (2 * i + 1 - h);
454
      double d = (a[0] * x + a[1] * y) / asqrt;
455
      const int msk = (int)rint((1.0 + tanh(d / smoother_param)) * 32);
456
      wedge_mask_obl[0][WEDGE_OBLIQUE63][i * stride + j] = msk;
457
      const int mskx = (int)rint((1.0 + tanh(x / smoother_param)) * 32);
458
      wedge_mask_obl[0][WEDGE_VERTICAL][i * stride + j] = mskx;
459
    }
460
  }
461
#endif  // USE_PRECOMPUTED_WEDGE_MASK
462
0
  for (i = 0; i < h; ++i) {
463
0
    for (j = 0; j < w; ++j) {
464
0
      const int msk = wedge_mask_obl[0][WEDGE_OBLIQUE63][i * stride + j];
465
0
      wedge_mask_obl[0][WEDGE_OBLIQUE27][j * stride + i] = msk;
466
0
      wedge_mask_obl[0][WEDGE_OBLIQUE117][i * stride + w - 1 - j] =
467
0
          wedge_mask_obl[0][WEDGE_OBLIQUE153][(w - 1 - j) * stride + i] =
468
0
              (1 << WEDGE_WEIGHT_BITS) - msk;
469
0
      wedge_mask_obl[1][WEDGE_OBLIQUE63][i * stride + j] =
470
0
          wedge_mask_obl[1][WEDGE_OBLIQUE27][j * stride + i] =
471
0
              (1 << WEDGE_WEIGHT_BITS) - msk;
472
0
      wedge_mask_obl[1][WEDGE_OBLIQUE117][i * stride + w - 1 - j] =
473
0
          wedge_mask_obl[1][WEDGE_OBLIQUE153][(w - 1 - j) * stride + i] = msk;
474
0
      const int mskx = wedge_mask_obl[0][WEDGE_VERTICAL][i * stride + j];
475
0
      wedge_mask_obl[0][WEDGE_HORIZONTAL][j * stride + i] = mskx;
476
0
      wedge_mask_obl[1][WEDGE_VERTICAL][i * stride + j] =
477
0
          wedge_mask_obl[1][WEDGE_HORIZONTAL][j * stride + i] =
478
0
              (1 << WEDGE_WEIGHT_BITS) - mskx;
479
0
    }
480
0
  }
481
0
}
482
483
#if !USE_PRECOMPUTED_WEDGE_SIGN
484
// If the signs for the wedges for various blocksizes are
485
// inconsistent flip the sign flag. Do it only once for every
486
// wedge codebook.
487
static void init_wedge_signs() {
488
  BLOCK_SIZE sb_type;
489
  memset(wedge_signflip_lookup, 0, sizeof(wedge_signflip_lookup));
490
  for (sb_type = BLOCK_4X4; sb_type < BLOCK_SIZES_ALL; ++sb_type) {
491
    const int bw = block_size_wide[sb_type];
492
    const int bh = block_size_high[sb_type];
493
    const wedge_params_type wedge_params = wedge_params_lookup[sb_type];
494
    const int wbits = wedge_params.bits;
495
    const int wtypes = 1 << wbits;
496
    int i, w;
497
    if (wbits) {
498
      for (w = 0; w < wtypes; ++w) {
499
        // Get the mask master, i.e. index [0]
500
        const uint8_t *mask = get_wedge_mask_inplace(w, 0, sb_type);
501
        int avg = 0;
502
        for (i = 0; i < bw; ++i) avg += mask[i];
503
        for (i = 1; i < bh; ++i) avg += mask[i * MASK_MASTER_STRIDE];
504
        avg = (avg + (bw + bh - 1) / 2) / (bw + bh - 1);
505
        // Default sign of this wedge is 1 if the average < 32, 0 otherwise.
506
        // If default sign is 1:
507
        //   If sign requested is 0, we need to flip the sign and return
508
        //   the complement i.e. index [1] instead. If sign requested is 1
509
        //   we need to flip the sign and return index [0] instead.
510
        // If default sign is 0:
511
        //   If sign requested is 0, we need to return index [0] the master
512
        //   if sign requested is 1, we need to return the complement index [1]
513
        //   instead.
514
        wedge_params.signflip[w] = (avg < 32);
515
      }
516
    }
517
  }
518
}
519
#endif  // !USE_PRECOMPUTED_WEDGE_SIGN
520
521
0
static void init_wedge_masks() {
522
0
  uint8_t *dst = wedge_mask_buf;
523
0
  BLOCK_SIZE bsize;
524
0
  memset(wedge_masks, 0, sizeof(wedge_masks));
525
0
  for (bsize = BLOCK_4X4; bsize < BLOCK_SIZES_ALL; ++bsize) {
526
0
    const uint8_t *mask;
527
0
    const int bw = block_size_wide[bsize];
528
0
    const int bh = block_size_high[bsize];
529
0
    const wedge_params_type *wedge_params = &wedge_params_lookup[bsize];
530
0
    const int wbits = wedge_params->bits;
531
0
    const int wtypes = 1 << wbits;
532
0
    int w;
533
0
    if (wbits == 0) continue;
534
0
    for (w = 0; w < wtypes; ++w) {
535
0
      mask = get_wedge_mask_inplace(w, 0, bsize);
536
0
      aom_convolve_copy(mask, MASK_MASTER_STRIDE, dst, bw, NULL, 0, NULL, 0, bw,
537
0
                        bh);
538
0
      wedge_params->masks[0][w] = dst;
539
0
      dst += bw * bh;
540
0
541
0
      mask = get_wedge_mask_inplace(w, 1, bsize);
542
0
      aom_convolve_copy(mask, MASK_MASTER_STRIDE, dst, bw, NULL, 0, NULL, 0, bw,
543
0
                        bh);
544
0
      wedge_params->masks[1][w] = dst;
545
0
      dst += bw * bh;
546
0
    }
547
0
    assert(sizeof(wedge_mask_buf) >= (size_t)(dst - wedge_mask_buf));
548
0
  }
549
0
}
550
551
// Equation of line: f(x, y) = a[0]*(x - a[2]*w/8) + a[1]*(y - a[3]*h/8) = 0
552
0
void av1_init_wedge_masks() {
553
0
  init_wedge_master_masks();
554
#if !USE_PRECOMPUTED_WEDGE_SIGN
555
  init_wedge_signs();
556
#endif  // !USE_PRECOMPUTED_WEDGE_SIGN
557
  init_wedge_masks();
558
0
}
559
560
static void build_masked_compound_no_round(
561
    uint8_t *dst, int dst_stride, const CONV_BUF_TYPE *src0, int src0_stride,
562
    const CONV_BUF_TYPE *src1, int src1_stride,
563
    const INTERINTER_COMPOUND_DATA *const comp_data, BLOCK_SIZE sb_type, int h,
564
0
    int w, ConvolveParams *conv_params, MACROBLOCKD *xd) {
565
0
  // Derive subsampling from h and w passed in. May be refactored to
566
0
  // pass in subsampling factors directly.
567
0
  const int subh = (2 << mi_size_high_log2[sb_type]) == h;
568
0
  const int subw = (2 << mi_size_wide_log2[sb_type]) == w;
569
0
  const uint8_t *mask = av1_get_compound_type_mask(comp_data, sb_type);
570
0
  if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH)
571
0
    aom_highbd_blend_a64_d16_mask(dst, dst_stride, src0, src0_stride, src1,
572
0
                                  src1_stride, mask, block_size_wide[sb_type],
573
0
                                  w, h, subw, subh, conv_params, xd->bd);
574
0
  else
575
0
    aom_lowbd_blend_a64_d16_mask(dst, dst_stride, src0, src0_stride, src1,
576
0
                                 src1_stride, mask, block_size_wide[sb_type], w,
577
0
                                 h, subw, subh, conv_params);
578
0
}
579
580
void av1_make_masked_inter_predictor(
581
    const uint8_t *pre, int pre_stride, uint8_t *dst, int dst_stride,
582
    const SubpelParams *subpel_params, const struct scale_factors *sf, int w,
583
    int h, ConvolveParams *conv_params, InterpFilters interp_filters, int plane,
584
    const WarpTypesAllowed *warp_types, int p_col, int p_row, int ref,
585
0
    MACROBLOCKD *xd, int can_use_previous) {
586
0
  MB_MODE_INFO *mi = xd->mi[0];
587
0
  (void)dst;
588
0
  (void)dst_stride;
589
0
  mi->interinter_comp.seg_mask = xd->seg_mask;
590
0
  const INTERINTER_COMPOUND_DATA *comp_data = &mi->interinter_comp;
591
0
592
0
// We're going to call av1_make_inter_predictor to generate a prediction into
593
0
// a temporary buffer, then will blend that temporary buffer with that from
594
0
// the other reference.
595
0
//
596
0
#define INTER_PRED_BYTES_PER_PIXEL 2
597
0
598
0
  DECLARE_ALIGNED(32, uint8_t,
599
0
                  tmp_buf[INTER_PRED_BYTES_PER_PIXEL * MAX_SB_SQUARE]);
600
0
#undef INTER_PRED_BYTES_PER_PIXEL
601
0
602
0
  uint8_t *tmp_dst = get_buf_by_bd(xd, tmp_buf);
603
0
604
0
  const int tmp_buf_stride = MAX_SB_SIZE;
605
0
  CONV_BUF_TYPE *org_dst = conv_params->dst;
606
0
  int org_dst_stride = conv_params->dst_stride;
607
0
  CONV_BUF_TYPE *tmp_buf16 = (CONV_BUF_TYPE *)tmp_buf;
608
0
  conv_params->dst = tmp_buf16;
609
0
  conv_params->dst_stride = tmp_buf_stride;
610
0
  assert(conv_params->do_average == 0);
611
0
612
0
  // This will generate a prediction in tmp_buf for the second reference
613
0
  av1_make_inter_predictor(pre, pre_stride, tmp_dst, MAX_SB_SIZE, subpel_params,
614
0
                           sf, w, h, conv_params, interp_filters, warp_types,
615
0
                           p_col, p_row, plane, ref, mi, 0, xd,
616
0
                           can_use_previous);
617
0
618
0
  if (!plane && comp_data->type == COMPOUND_DIFFWTD) {
619
0
    av1_build_compound_diffwtd_mask_d16(
620
0
        comp_data->seg_mask, comp_data->mask_type, org_dst, org_dst_stride,
621
0
        tmp_buf16, tmp_buf_stride, h, w, conv_params, xd->bd);
622
0
  }
623
0
  build_masked_compound_no_round(dst, dst_stride, org_dst, org_dst_stride,
624
0
                                 tmp_buf16, tmp_buf_stride, comp_data,
625
0
                                 mi->sb_type, h, w, conv_params, xd);
626
0
}
627
628
void av1_jnt_comp_weight_assign(const AV1_COMMON *cm, const MB_MODE_INFO *mbmi,
629
                                int order_idx, int *fwd_offset, int *bck_offset,
630
0
                                int *use_jnt_comp_avg, int is_compound) {
631
0
  assert(fwd_offset != NULL && bck_offset != NULL);
632
0
  if (!is_compound || mbmi->compound_idx) {
633
0
    *use_jnt_comp_avg = 0;
634
0
    return;
635
0
  }
636
0
637
0
  *use_jnt_comp_avg = 1;
638
0
  const int bck_idx = cm->frame_refs[mbmi->ref_frame[0] - LAST_FRAME].idx;
639
0
  const int fwd_idx = cm->frame_refs[mbmi->ref_frame[1] - LAST_FRAME].idx;
640
0
  const int cur_frame_index = cm->cur_frame->cur_frame_offset;
641
0
  int bck_frame_index = 0, fwd_frame_index = 0;
642
0
643
0
  if (bck_idx >= 0) {
644
0
    bck_frame_index = cm->buffer_pool->frame_bufs[bck_idx].cur_frame_offset;
645
0
  }
646
0
647
0
  if (fwd_idx >= 0) {
648
0
    fwd_frame_index = cm->buffer_pool->frame_bufs[fwd_idx].cur_frame_offset;
649
0
  }
650
0
651
0
  int d0 = clamp(abs(get_relative_dist(cm, fwd_frame_index, cur_frame_index)),
652
0
                 0, MAX_FRAME_DISTANCE);
653
0
  int d1 = clamp(abs(get_relative_dist(cm, cur_frame_index, bck_frame_index)),
654
0
                 0, MAX_FRAME_DISTANCE);
655
0
656
0
  const int order = d0 <= d1;
657
0
658
0
  if (d0 == 0 || d1 == 0) {
659
0
    *fwd_offset = quant_dist_lookup_table[order_idx][3][order];
660
0
    *bck_offset = quant_dist_lookup_table[order_idx][3][1 - order];
661
0
    return;
662
0
  }
663
0
664
0
  int i;
665
0
  for (i = 0; i < 3; ++i) {
666
0
    int c0 = quant_dist_weight[i][order];
667
0
    int c1 = quant_dist_weight[i][!order];
668
0
    int d0_c0 = d0 * c0;
669
0
    int d1_c1 = d1 * c1;
670
0
    if ((d0 > d1 && d0_c0 < d1_c1) || (d0 <= d1 && d0_c0 > d1_c1)) break;
671
0
  }
672
0
673
0
  *fwd_offset = quant_dist_lookup_table[order_idx][i][order];
674
0
  *bck_offset = quant_dist_lookup_table[order_idx][i][1 - order];
675
0
}
676
677
void av1_setup_dst_planes(struct macroblockd_plane *planes, BLOCK_SIZE bsize,
678
                          const YV12_BUFFER_CONFIG *src, int mi_row, int mi_col,
679
0
                          const int plane_start, const int plane_end) {
680
0
  // We use AOMMIN(num_planes, MAX_MB_PLANE) instead of num_planes to quiet
681
0
  // the static analysis warnings.
682
0
  for (int i = plane_start; i < AOMMIN(plane_end, MAX_MB_PLANE); ++i) {
683
0
    struct macroblockd_plane *const pd = &planes[i];
684
0
    const int is_uv = i > 0;
685
0
    setup_pred_plane(&pd->dst, bsize, src->buffers[i], src->crop_widths[is_uv],
686
0
                     src->crop_heights[is_uv], src->strides[is_uv], mi_row,
687
0
                     mi_col, NULL, pd->subsampling_x, pd->subsampling_y);
688
0
  }
689
0
}
690
691
void av1_setup_pre_planes(MACROBLOCKD *xd, int idx,
692
                          const YV12_BUFFER_CONFIG *src, int mi_row, int mi_col,
693
                          const struct scale_factors *sf,
694
0
                          const int num_planes) {
695
0
  if (src != NULL) {
696
0
    // We use AOMMIN(num_planes, MAX_MB_PLANE) instead of num_planes to quiet
697
0
    // the static analysis warnings.
698
0
    for (int i = 0; i < AOMMIN(num_planes, MAX_MB_PLANE); ++i) {
699
0
      struct macroblockd_plane *const pd = &xd->plane[i];
700
0
      const int is_uv = i > 0;
701
0
      setup_pred_plane(&pd->pre[idx], xd->mi[0]->sb_type, src->buffers[i],
702
0
                       src->crop_widths[is_uv], src->crop_heights[is_uv],
703
0
                       src->strides[is_uv], mi_row, mi_col, sf,
704
0
                       pd->subsampling_x, pd->subsampling_y);
705
0
    }
706
0
  }
707
0
}
708
709
// obmc_mask_N[overlap_position]
710
static const uint8_t obmc_mask_1[1] = { 64 };
711
712
static const uint8_t obmc_mask_2[2] = { 45, 64 };
713
714
static const uint8_t obmc_mask_4[4] = { 39, 50, 59, 64 };
715
716
static const uint8_t obmc_mask_8[8] = { 36, 42, 48, 53, 57, 61, 64, 64 };
717
718
static const uint8_t obmc_mask_16[16] = { 34, 37, 40, 43, 46, 49, 52, 54,
719
                                          56, 58, 60, 61, 64, 64, 64, 64 };
720
721
static const uint8_t obmc_mask_32[32] = { 33, 35, 36, 38, 40, 41, 43, 44,
722
                                          45, 47, 48, 50, 51, 52, 53, 55,
723
                                          56, 57, 58, 59, 60, 60, 61, 62,
724
                                          64, 64, 64, 64, 64, 64, 64, 64 };
725
726
static const uint8_t obmc_mask_64[64] = {
727
  33, 34, 35, 35, 36, 37, 38, 39, 40, 40, 41, 42, 43, 44, 44, 44,
728
  45, 46, 47, 47, 48, 49, 50, 51, 51, 51, 52, 52, 53, 54, 55, 56,
729
  56, 56, 57, 57, 58, 58, 59, 60, 60, 60, 60, 60, 61, 62, 62, 62,
730
  62, 62, 63, 63, 63, 63, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
731
};
732
733
const uint8_t *av1_get_obmc_mask(int length) {
734
  switch (length) {
735
    case 1: return obmc_mask_1;
736
    case 2: return obmc_mask_2;
737
    case 4: return obmc_mask_4;
738
    case 8: return obmc_mask_8;
739
    case 16: return obmc_mask_16;
740
    case 32: return obmc_mask_32;
741
    case 64: return obmc_mask_64;
742
    default: assert(0); return NULL;
743
  }
744
}
745
746
static INLINE void increment_int_ptr(MACROBLOCKD *xd, int rel_mi_rc,
747
                                     uint8_t mi_hw, MB_MODE_INFO *mi,
748
0
                                     void *fun_ctxt, const int num_planes) {
749
0
  (void)xd;
750
0
  (void)rel_mi_rc;
751
0
  (void)mi_hw;
752
0
  (void)mi;
753
0
  ++*(int *)fun_ctxt;
754
0
  (void)num_planes;
755
0
}
756
757
void av1_count_overlappable_neighbors(const AV1_COMMON *cm, MACROBLOCKD *xd,
758
0
                                      int mi_row, int mi_col) {
759
0
  MB_MODE_INFO *mbmi = xd->mi[0];
760
0
761
0
  mbmi->overlappable_neighbors[0] = 0;
762
0
  mbmi->overlappable_neighbors[1] = 0;
763
0
764
0
  if (!is_motion_variation_allowed_bsize(mbmi->sb_type)) return;
765
0
766
0
  foreach_overlappable_nb_above(cm, xd, mi_col, INT_MAX, increment_int_ptr,
767
0
                                &mbmi->overlappable_neighbors[0]);
768
0
  foreach_overlappable_nb_left(cm, xd, mi_row, INT_MAX, increment_int_ptr,
769
0
                               &mbmi->overlappable_neighbors[1]);
770
0
}
771
772
// HW does not support < 4x4 prediction. To limit the bandwidth requirement, if
773
// block-size of current plane is smaller than 8x8, always only blend with the
774
// left neighbor(s) (skip blending with the above side).
775
#define DISABLE_CHROMA_U8X8_OBMC 0  // 0: one-sided obmc; 1: disable
776
777
int av1_skip_u4x4_pred_in_obmc(BLOCK_SIZE bsize,
778
0
                               const struct macroblockd_plane *pd, int dir) {
779
0
  assert(is_motion_variation_allowed_bsize(bsize));
780
0
781
0
  const BLOCK_SIZE bsize_plane =
782
0
      get_plane_block_size(bsize, pd->subsampling_x, pd->subsampling_y);
783
0
  switch (bsize_plane) {
784
#if DISABLE_CHROMA_U8X8_OBMC
785
    case BLOCK_4X4:
786
    case BLOCK_8X4:
787
    case BLOCK_4X8: return 1; break;
788
#else
789
0
    case BLOCK_4X4:
790
0
    case BLOCK_8X4:
791
0
    case BLOCK_4X8: return dir == 0; break;
792
0
#endif
793
0
    default: return 0;
794
0
  }
795
0
}
796
797
0
void av1_modify_neighbor_predictor_for_obmc(MB_MODE_INFO *mbmi) {
798
0
  mbmi->ref_frame[1] = NONE_FRAME;
799
0
  mbmi->interinter_comp.type = COMPOUND_AVERAGE;
800
0
801
0
  return;
802
0
}
803
804
struct obmc_inter_pred_ctxt {
805
  uint8_t **adjacent;
806
  int *adjacent_stride;
807
};
808
809
static INLINE void build_obmc_inter_pred_above(MACROBLOCKD *xd, int rel_mi_col,
810
                                               uint8_t above_mi_width,
811
                                               MB_MODE_INFO *above_mi,
812
                                               void *fun_ctxt,
813
0
                                               const int num_planes) {
814
0
  (void)above_mi;
815
0
  struct obmc_inter_pred_ctxt *ctxt = (struct obmc_inter_pred_ctxt *)fun_ctxt;
816
0
  const BLOCK_SIZE bsize = xd->mi[0]->sb_type;
817
0
  const int is_hbd = (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) ? 1 : 0;
818
0
  const int overlap =
819
0
      AOMMIN(block_size_high[bsize], block_size_high[BLOCK_64X64]) >> 1;
820
0
821
0
  for (int plane = 0; plane < num_planes; ++plane) {
822
0
    const struct macroblockd_plane *pd = &xd->plane[plane];
823
0
    const int bw = (above_mi_width * MI_SIZE) >> pd->subsampling_x;
824
0
    const int bh = overlap >> pd->subsampling_y;
825
0
    const int plane_col = (rel_mi_col * MI_SIZE) >> pd->subsampling_x;
826
0
827
0
    if (av1_skip_u4x4_pred_in_obmc(bsize, pd, 0)) continue;
828
0
829
0
    const int dst_stride = pd->dst.stride;
830
0
    uint8_t *const dst = &pd->dst.buf[plane_col];
831
0
    const int tmp_stride = ctxt->adjacent_stride[plane];
832
0
    const uint8_t *const tmp = &ctxt->adjacent[plane][plane_col];
833
0
    const uint8_t *const mask = av1_get_obmc_mask(bh);
834
0
835
0
    if (is_hbd)
836
0
      aom_highbd_blend_a64_vmask(dst, dst_stride, dst, dst_stride, tmp,
837
0
                                 tmp_stride, mask, bw, bh, xd->bd);
838
0
    else
839
0
      aom_blend_a64_vmask(dst, dst_stride, dst, dst_stride, tmp, tmp_stride,
840
0
                          mask, bw, bh);
841
0
  }
842
0
}
843
844
static INLINE void build_obmc_inter_pred_left(MACROBLOCKD *xd, int rel_mi_row,
845
                                              uint8_t left_mi_height,
846
                                              MB_MODE_INFO *left_mi,
847
                                              void *fun_ctxt,
848
0
                                              const int num_planes) {
849
0
  (void)left_mi;
850
0
  struct obmc_inter_pred_ctxt *ctxt = (struct obmc_inter_pred_ctxt *)fun_ctxt;
851
0
  const BLOCK_SIZE bsize = xd->mi[0]->sb_type;
852
0
  const int overlap =
853
0
      AOMMIN(block_size_wide[bsize], block_size_wide[BLOCK_64X64]) >> 1;
854
0
  const int is_hbd = (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) ? 1 : 0;
855
0
856
0
  for (int plane = 0; plane < num_planes; ++plane) {
857
0
    const struct macroblockd_plane *pd = &xd->plane[plane];
858
0
    const int bw = overlap >> pd->subsampling_x;
859
0
    const int bh = (left_mi_height * MI_SIZE) >> pd->subsampling_y;
860
0
    const int plane_row = (rel_mi_row * MI_SIZE) >> pd->subsampling_y;
861
0
862
0
    if (av1_skip_u4x4_pred_in_obmc(bsize, pd, 1)) continue;
863
0
864
0
    const int dst_stride = pd->dst.stride;
865
0
    uint8_t *const dst = &pd->dst.buf[plane_row * dst_stride];
866
0
    const int tmp_stride = ctxt->adjacent_stride[plane];
867
0
    const uint8_t *const tmp = &ctxt->adjacent[plane][plane_row * tmp_stride];
868
0
    const uint8_t *const mask = av1_get_obmc_mask(bw);
869
0
870
0
    if (is_hbd)
871
0
      aom_highbd_blend_a64_hmask(dst, dst_stride, dst, dst_stride, tmp,
872
0
                                 tmp_stride, mask, bw, bh, xd->bd);
873
0
    else
874
0
      aom_blend_a64_hmask(dst, dst_stride, dst, dst_stride, tmp, tmp_stride,
875
0
                          mask, bw, bh);
876
0
  }
877
0
}
878
879
// This function combines motion compensated predictions that are generated by
880
// top/left neighboring blocks' inter predictors with the regular inter
881
// prediction. We assume the original prediction (bmc) is stored in
882
// xd->plane[].dst.buf
883
void av1_build_obmc_inter_prediction(const AV1_COMMON *cm, MACROBLOCKD *xd,
884
                                     int mi_row, int mi_col,
885
                                     uint8_t *above[MAX_MB_PLANE],
886
                                     int above_stride[MAX_MB_PLANE],
887
                                     uint8_t *left[MAX_MB_PLANE],
888
0
                                     int left_stride[MAX_MB_PLANE]) {
889
0
  const BLOCK_SIZE bsize = xd->mi[0]->sb_type;
890
0
891
0
  // handle above row
892
0
  struct obmc_inter_pred_ctxt ctxt_above = { above, above_stride };
893
0
  foreach_overlappable_nb_above(cm, xd, mi_col,
894
0
                                max_neighbor_obmc[mi_size_wide_log2[bsize]],
895
0
                                build_obmc_inter_pred_above, &ctxt_above);
896
0
897
0
  // handle left column
898
0
  struct obmc_inter_pred_ctxt ctxt_left = { left, left_stride };
899
0
  foreach_overlappable_nb_left(cm, xd, mi_row,
900
0
                               max_neighbor_obmc[mi_size_high_log2[bsize]],
901
0
                               build_obmc_inter_pred_left, &ctxt_left);
902
0
}
903
904
void av1_setup_build_prediction_by_above_pred(
905
    MACROBLOCKD *xd, int rel_mi_col, uint8_t above_mi_width,
906
    MB_MODE_INFO *above_mbmi, struct build_prediction_ctxt *ctxt,
907
0
    const int num_planes) {
908
0
  const BLOCK_SIZE a_bsize = AOMMAX(BLOCK_8X8, above_mbmi->sb_type);
909
0
  const int above_mi_col = ctxt->mi_col + rel_mi_col;
910
0
911
0
  av1_modify_neighbor_predictor_for_obmc(above_mbmi);
912
0
913
0
  for (int j = 0; j < num_planes; ++j) {
914
0
    struct macroblockd_plane *const pd = &xd->plane[j];
915
0
    setup_pred_plane(&pd->dst, a_bsize, ctxt->tmp_buf[j], ctxt->tmp_width[j],
916
0
                     ctxt->tmp_height[j], ctxt->tmp_stride[j], 0, rel_mi_col,
917
0
                     NULL, pd->subsampling_x, pd->subsampling_y);
918
0
  }
919
0
920
0
  const int num_refs = 1 + has_second_ref(above_mbmi);
921
0
922
0
  for (int ref = 0; ref < num_refs; ++ref) {
923
0
    const MV_REFERENCE_FRAME frame = above_mbmi->ref_frame[ref];
924
0
925
0
    const RefBuffer *const ref_buf = &ctxt->cm->frame_refs[frame - LAST_FRAME];
926
0
927
0
    xd->block_refs[ref] = ref_buf;
928
0
    if ((!av1_is_valid_scale(&ref_buf->sf)))
929
0
      aom_internal_error(xd->error_info, AOM_CODEC_UNSUP_BITSTREAM,
930
0
                         "Reference frame has invalid dimensions");
931
0
    av1_setup_pre_planes(xd, ref, ref_buf->buf, ctxt->mi_row, above_mi_col,
932
0
                         &ref_buf->sf, num_planes);
933
0
  }
934
0
935
0
  xd->mb_to_left_edge = 8 * MI_SIZE * (-above_mi_col);
936
0
  xd->mb_to_right_edge = ctxt->mb_to_far_edge +
937
0
                         (xd->n4_w - rel_mi_col - above_mi_width) * MI_SIZE * 8;
938
0
}
939
940
void av1_setup_build_prediction_by_left_pred(MACROBLOCKD *xd, int rel_mi_row,
941
                                             uint8_t left_mi_height,
942
                                             MB_MODE_INFO *left_mbmi,
943
                                             struct build_prediction_ctxt *ctxt,
944
0
                                             const int num_planes) {
945
0
  const BLOCK_SIZE l_bsize = AOMMAX(BLOCK_8X8, left_mbmi->sb_type);
946
0
  const int left_mi_row = ctxt->mi_row + rel_mi_row;
947
0
948
0
  av1_modify_neighbor_predictor_for_obmc(left_mbmi);
949
0
950
0
  for (int j = 0; j < num_planes; ++j) {
951
0
    struct macroblockd_plane *const pd = &xd->plane[j];
952
0
    setup_pred_plane(&pd->dst, l_bsize, ctxt->tmp_buf[j], ctxt->tmp_width[j],
953
0
                     ctxt->tmp_height[j], ctxt->tmp_stride[j], rel_mi_row, 0,
954
0
                     NULL, pd->subsampling_x, pd->subsampling_y);
955
0
  }
956
0
957
0
  const int num_refs = 1 + has_second_ref(left_mbmi);
958
0
959
0
  for (int ref = 0; ref < num_refs; ++ref) {
960
0
    const MV_REFERENCE_FRAME frame = left_mbmi->ref_frame[ref];
961
0
962
0
    const RefBuffer *const ref_buf = &ctxt->cm->frame_refs[frame - LAST_FRAME];
963
0
964
0
    xd->block_refs[ref] = ref_buf;
965
0
    if ((!av1_is_valid_scale(&ref_buf->sf)))
966
0
      aom_internal_error(xd->error_info, AOM_CODEC_UNSUP_BITSTREAM,
967
0
                         "Reference frame has invalid dimensions");
968
0
    av1_setup_pre_planes(xd, ref, ref_buf->buf, left_mi_row, ctxt->mi_col,
969
0
                         &ref_buf->sf, num_planes);
970
0
  }
971
0
972
0
  xd->mb_to_top_edge = 8 * MI_SIZE * (-left_mi_row);
973
0
  xd->mb_to_bottom_edge =
974
0
      ctxt->mb_to_far_edge +
975
0
      (xd->n4_h - rel_mi_row - left_mi_height) * MI_SIZE * 8;
976
0
}
977
978
/* clang-format off */
979
static const uint8_t ii_weights1d[MAX_SB_SIZE] = {
980
  60, 58, 56, 54, 52, 50, 48, 47, 45, 44, 42, 41, 39, 38, 37, 35, 34, 33, 32,
981
  31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 22, 21, 20, 19, 19, 18, 18, 17, 16,
982
  16, 15, 15, 14, 14, 13, 13, 12, 12, 12, 11, 11, 10, 10, 10,  9,  9,  9,  8,
983
  8,  8,  8,  7,  7,  7,  7,  6,  6,  6,  6,  6,  5,  5,  5,  5,  5,  4,  4,
984
  4,  4,  4,  4,  4,  4,  3,  3,  3,  3,  3,  3,  3,  3,  3,  2,  2,  2,  2,
985
  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  1,  1,  1,  1,  1,  1,  1,  1,
986
  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1
987
};
988
static uint8_t ii_size_scales[BLOCK_SIZES_ALL] = {
989
    32, 16, 16, 16, 8, 8, 8, 4,
990
    4,  4,  2,  2,  2, 1, 1, 1,
991
    8,  8,  4,  4,  2, 2
992
};
993
/* clang-format on */
994
995
static void build_smooth_interintra_mask(uint8_t *mask, int stride,
996
                                         BLOCK_SIZE plane_bsize,
997
0
                                         INTERINTRA_MODE mode) {
998
0
  int i, j;
999
0
  const int bw = block_size_wide[plane_bsize];
1000
0
  const int bh = block_size_high[plane_bsize];
1001
0
  const int size_scale = ii_size_scales[plane_bsize];
1002
0
1003
0
  switch (mode) {
1004
0
    case II_V_PRED:
1005
0
      for (i = 0; i < bh; ++i) {
1006
0
        memset(mask, ii_weights1d[i * size_scale], bw * sizeof(mask[0]));
1007
0
        mask += stride;
1008
0
      }
1009
0
      break;
1010
0
1011
0
    case II_H_PRED:
1012
0
      for (i = 0; i < bh; ++i) {
1013
0
        for (j = 0; j < bw; ++j) mask[j] = ii_weights1d[j * size_scale];
1014
0
        mask += stride;
1015
0
      }
1016
0
      break;
1017
0
1018
0
    case II_SMOOTH_PRED:
1019
0
      for (i = 0; i < bh; ++i) {
1020
0
        for (j = 0; j < bw; ++j)
1021
0
          mask[j] = ii_weights1d[(i < j ? i : j) * size_scale];
1022
0
        mask += stride;
1023
0
      }
1024
0
      break;
1025
0
1026
0
    case II_DC_PRED:
1027
0
    default:
1028
0
      for (i = 0; i < bh; ++i) {
1029
0
        memset(mask, 32, bw * sizeof(mask[0]));
1030
0
        mask += stride;
1031
0
      }
1032
0
      break;
1033
0
  }
1034
0
}
1035
1036
static void combine_interintra(INTERINTRA_MODE mode, int use_wedge_interintra,
1037
                               int wedge_index, int wedge_sign,
1038
                               BLOCK_SIZE bsize, BLOCK_SIZE plane_bsize,
1039
                               uint8_t *comppred, int compstride,
1040
                               const uint8_t *interpred, int interstride,
1041
0
                               const uint8_t *intrapred, int intrastride) {
1042
0
  const int bw = block_size_wide[plane_bsize];
1043
0
  const int bh = block_size_high[plane_bsize];
1044
0
1045
0
  if (use_wedge_interintra) {
1046
0
    if (is_interintra_wedge_used(bsize)) {
1047
0
      const uint8_t *mask =
1048
0
          av1_get_contiguous_soft_mask(wedge_index, wedge_sign, bsize);
1049
0
      const int subw = 2 * mi_size_wide[bsize] == bw;
1050
0
      const int subh = 2 * mi_size_high[bsize] == bh;
1051
0
      aom_blend_a64_mask(comppred, compstride, intrapred, intrastride,
1052
0
                         interpred, interstride, mask, block_size_wide[bsize],
1053
0
                         bw, bh, subw, subh);
1054
0
    }
1055
0
    return;
1056
0
  }
1057
0
1058
0
  uint8_t mask[MAX_SB_SQUARE];
1059
0
  build_smooth_interintra_mask(mask, bw, plane_bsize, mode);
1060
0
  aom_blend_a64_mask(comppred, compstride, intrapred, intrastride, interpred,
1061
0
                     interstride, mask, bw, bw, bh, 0, 0);
1062
0
}
1063
1064
static void combine_interintra_highbd(
1065
    INTERINTRA_MODE mode, int use_wedge_interintra, int wedge_index,
1066
    int wedge_sign, BLOCK_SIZE bsize, BLOCK_SIZE plane_bsize,
1067
    uint8_t *comppred8, int compstride, const uint8_t *interpred8,
1068
0
    int interstride, const uint8_t *intrapred8, int intrastride, int bd) {
1069
0
  const int bw = block_size_wide[plane_bsize];
1070
0
  const int bh = block_size_high[plane_bsize];
1071
0
1072
0
  if (use_wedge_interintra) {
1073
0
    if (is_interintra_wedge_used(bsize)) {
1074
0
      const uint8_t *mask =
1075
0
          av1_get_contiguous_soft_mask(wedge_index, wedge_sign, bsize);
1076
0
      const int subh = 2 * mi_size_high[bsize] == bh;
1077
0
      const int subw = 2 * mi_size_wide[bsize] == bw;
1078
0
      aom_highbd_blend_a64_mask(comppred8, compstride, intrapred8, intrastride,
1079
0
                                interpred8, interstride, mask,
1080
0
                                block_size_wide[bsize], bw, bh, subw, subh, bd);
1081
0
    }
1082
0
    return;
1083
0
  }
1084
0
1085
0
  uint8_t mask[MAX_SB_SQUARE];
1086
0
  build_smooth_interintra_mask(mask, bw, plane_bsize, mode);
1087
0
  aom_highbd_blend_a64_mask(comppred8, compstride, intrapred8, intrastride,
1088
0
                            interpred8, interstride, mask, bw, bw, bh, 0, 0,
1089
0
                            bd);
1090
0
}
1091
1092
void av1_build_intra_predictors_for_interintra(const AV1_COMMON *cm,
1093
                                               MACROBLOCKD *xd,
1094
                                               BLOCK_SIZE bsize, int plane,
1095
                                               BUFFER_SET *ctx, uint8_t *dst,
1096
0
                                               int dst_stride) {
1097
0
  struct macroblockd_plane *const pd = &xd->plane[plane];
1098
0
  const int ssx = xd->plane[plane].subsampling_x;
1099
0
  const int ssy = xd->plane[plane].subsampling_y;
1100
0
  BLOCK_SIZE plane_bsize = get_plane_block_size(bsize, ssx, ssy);
1101
0
  PREDICTION_MODE mode = interintra_to_intra_mode[xd->mi[0]->interintra_mode];
1102
0
  assert(xd->mi[0]->angle_delta[PLANE_TYPE_Y] == 0);
1103
0
  assert(xd->mi[0]->angle_delta[PLANE_TYPE_UV] == 0);
1104
0
  assert(xd->mi[0]->filter_intra_mode_info.use_filter_intra == 0);
1105
0
  assert(xd->mi[0]->use_intrabc == 0);
1106
0
1107
0
  av1_predict_intra_block(cm, xd, pd->width, pd->height,
1108
0
                          max_txsize_rect_lookup[plane_bsize], mode, 0, 0,
1109
0
                          FILTER_INTRA_MODES, ctx->plane[plane],
1110
0
                          ctx->stride[plane], dst, dst_stride, 0, 0, plane);
1111
0
}
1112
1113
void av1_combine_interintra(MACROBLOCKD *xd, BLOCK_SIZE bsize, int plane,
1114
                            const uint8_t *inter_pred, int inter_stride,
1115
0
                            const uint8_t *intra_pred, int intra_stride) {
1116
0
  const int ssx = xd->plane[plane].subsampling_x;
1117
0
  const int ssy = xd->plane[plane].subsampling_y;
1118
0
  const BLOCK_SIZE plane_bsize = get_plane_block_size(bsize, ssx, ssy);
1119
0
  if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
1120
0
    combine_interintra_highbd(
1121
0
        xd->mi[0]->interintra_mode, xd->mi[0]->use_wedge_interintra,
1122
0
        xd->mi[0]->interintra_wedge_index, xd->mi[0]->interintra_wedge_sign,
1123
0
        bsize, plane_bsize, xd->plane[plane].dst.buf,
1124
0
        xd->plane[plane].dst.stride, inter_pred, inter_stride, intra_pred,
1125
0
        intra_stride, xd->bd);
1126
0
    return;
1127
0
  }
1128
0
  combine_interintra(
1129
0
      xd->mi[0]->interintra_mode, xd->mi[0]->use_wedge_interintra,
1130
0
      xd->mi[0]->interintra_wedge_index, xd->mi[0]->interintra_wedge_sign,
1131
0
      bsize, plane_bsize, xd->plane[plane].dst.buf, xd->plane[plane].dst.stride,
1132
0
      inter_pred, inter_stride, intra_pred, intra_stride);
1133
0
}
1134
1135
// build interintra_predictors for one plane
1136
void av1_build_interintra_predictors_sbp(const AV1_COMMON *cm, MACROBLOCKD *xd,
1137
                                         uint8_t *pred, int stride,
1138
                                         BUFFER_SET *ctx, int plane,
1139
0
                                         BLOCK_SIZE bsize) {
1140
0
  if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
1141
0
    DECLARE_ALIGNED(16, uint16_t, intrapredictor[MAX_SB_SQUARE]);
1142
0
    av1_build_intra_predictors_for_interintra(
1143
0
        cm, xd, bsize, plane, ctx, CONVERT_TO_BYTEPTR(intrapredictor),
1144
0
        MAX_SB_SIZE);
1145
0
    av1_combine_interintra(xd, bsize, plane, pred, stride,
1146
0
                           CONVERT_TO_BYTEPTR(intrapredictor), MAX_SB_SIZE);
1147
0
  } else {
1148
0
    DECLARE_ALIGNED(16, uint8_t, intrapredictor[MAX_SB_SQUARE]);
1149
0
    av1_build_intra_predictors_for_interintra(cm, xd, bsize, plane, ctx,
1150
0
                                              intrapredictor, MAX_SB_SIZE);
1151
0
    av1_combine_interintra(xd, bsize, plane, pred, stride, intrapredictor,
1152
0
                           MAX_SB_SIZE);
1153
0
  }
1154
0
}
1155
1156
void av1_build_interintra_predictors_sbuv(const AV1_COMMON *cm, MACROBLOCKD *xd,
1157
                                          uint8_t *upred, uint8_t *vpred,
1158
                                          int ustride, int vstride,
1159
0
                                          BUFFER_SET *ctx, BLOCK_SIZE bsize) {
1160
0
  av1_build_interintra_predictors_sbp(cm, xd, upred, ustride, ctx, 1, bsize);
1161
0
  av1_build_interintra_predictors_sbp(cm, xd, vpred, vstride, ctx, 2, bsize);
1162
0
}